summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/sysfs-block12
-rw-r--r--Documentation/DMA-API.txt29
-rw-r--r--Documentation/admin-guide/cgroup-v2.rst8
-rw-r--r--Documentation/block/biodoc.txt88
-rw-r--r--Documentation/block/cfq-iosched.txt291
-rw-r--r--Documentation/block/queue-sysfs.txt29
-rw-r--r--Documentation/devicetree/bindings/mmc/arasan,sdhci.txt4
-rw-r--r--Documentation/devicetree/bindings/mmc/fsl-imx-esdhc.txt1
-rw-r--r--Documentation/devicetree/bindings/mmc/sdhci-am654.txt36
-rw-r--r--Documentation/devicetree/bindings/mmc/sdhci-msm.txt23
-rw-r--r--Documentation/devicetree/bindings/mmc/sdhci-omap.txt2
-rw-r--r--Documentation/devicetree/bindings/mmc/tmio_mmc.txt4
-rw-r--r--Documentation/devicetree/bindings/ufs/cdns,ufshc.txt31
-rw-r--r--Documentation/devicetree/bindings/ufs/ufshcd-pltfrm.txt6
-rw-r--r--Documentation/features/io/sg-chain/arch-support.txt33
-rw-r--r--Documentation/nvdimm/security.txt141
-rw-r--r--Documentation/scsi/scsi-parameters.txt5
-rw-r--r--Documentation/scsi/scsi_mid_low_api.txt2
-rw-r--r--Documentation/security/keys/trusted-encrypted.rst6
-rw-r--r--Documentation/sh/new-machine.txt8
-rw-r--r--Documentation/x86/x86_64/boot-options.txt5
-rw-r--r--MAINTAINERS2
-rw-r--r--arch/alpha/Kconfig2
-rw-r--r--arch/alpha/include/asm/dma-mapping.h2
-rw-r--r--arch/alpha/kernel/pci_iommu.c16
-rw-r--r--arch/arc/Kconfig2
-rw-r--r--arch/arc/mm/cache.c2
-rw-r--r--arch/arc/mm/dma.c2
-rw-r--r--arch/arm/Kconfig4
-rw-r--r--arch/arm/common/dmabounce.c12
-rw-r--r--arch/arm/include/asm/dma-iommu.h2
-rw-r--r--arch/arm/include/asm/dma-mapping.h2
-rw-r--r--arch/arm/mach-ep93xx/simone.c14
-rw-r--r--arch/arm/mach-ep93xx/vision_ep9307.c17
-rw-r--r--arch/arm/mach-imx/mach-pcm043.c17
-rw-r--r--arch/arm/mach-pxa/balloon3.c3
-rw-r--r--arch/arm/mach-pxa/cm-x270.c18
-rw-r--r--arch/arm/mach-pxa/cm-x300.c18
-rw-r--r--arch/arm/mach-pxa/colibri-evalboard.c40
-rw-r--r--arch/arm/mach-pxa/colibri-pxa270-income.c19
-rw-r--r--arch/arm/mach-pxa/corgi.c20
-rw-r--r--arch/arm/mach-pxa/csb726.c19
-rw-r--r--arch/arm/mach-pxa/em-x270.c34
-rw-r--r--arch/arm/mach-pxa/gumstix.c3
-rw-r--r--arch/arm/mach-pxa/idp.c3
-rw-r--r--arch/arm/mach-pxa/littleton.c18
-rw-r--r--arch/arm/mach-pxa/lubbock.c3
-rw-r--r--arch/arm/mach-pxa/magician.c26
-rw-r--r--arch/arm/mach-pxa/mainstone.c3
-rw-r--r--arch/arm/mach-pxa/mioa701.c21
-rw-r--r--arch/arm/mach-pxa/mxm8x10.c19
-rw-r--r--arch/arm/mach-pxa/palm27x.c10
-rw-r--r--arch/arm/mach-pxa/palm27x.h8
-rw-r--r--arch/arm/mach-pxa/palmld.c28
-rw-r--r--arch/arm/mach-pxa/palmt5.c16
-rw-r--r--arch/arm/mach-pxa/palmtc.c19
-rw-r--r--arch/arm/mach-pxa/palmte2.c18
-rw-r--r--arch/arm/mach-pxa/palmtreo.c31
-rw-r--r--arch/arm/mach-pxa/palmtx.c16
-rw-r--r--arch/arm/mach-pxa/palmz72.c16
-rw-r--r--arch/arm/mach-pxa/pcm990-baseboard.c3
-rw-r--r--arch/arm/mach-pxa/poodle.c15
-rw-r--r--arch/arm/mach-pxa/raumfeld.c3
-rw-r--r--arch/arm/mach-pxa/spitz.c16
-rw-r--r--arch/arm/mach-pxa/stargate2.c3
-rw-r--r--arch/arm/mach-pxa/tosa.c18
-rw-r--r--arch/arm/mach-pxa/trizeps4.c3
-rw-r--r--arch/arm/mach-pxa/vpac270.c16
-rw-r--r--arch/arm/mach-pxa/z2.c14
-rw-r--r--arch/arm/mach-pxa/zeus.c15
-rw-r--r--arch/arm/mach-pxa/zylonite.c57
-rw-r--r--arch/arm/mach-pxa/zylonite_pxa300.c2
-rw-r--r--arch/arm/mach-s3c24xx/mach-at2440evb.c14
-rw-r--r--arch/arm/mach-s3c24xx/mach-h1940.c15
-rw-r--r--arch/arm/mach-s3c24xx/mach-mini2440.c15
-rw-r--r--arch/arm/mach-s3c24xx/mach-n30.c15
-rw-r--r--arch/arm/mach-s3c24xx/mach-rx1950.c15
-rw-r--r--arch/arm/mm/dma-mapping-nommu.c14
-rw-r--r--arch/arm/mm/dma-mapping.c39
-rw-r--r--arch/arm64/Kconfig3
-rw-r--r--arch/arm64/include/asm/dma-mapping.h8
-rw-r--r--arch/arm64/mm/dma-mapping.c286
-rw-r--r--arch/c6x/Kconfig1
-rw-r--r--arch/c6x/mm/dma-coherent.c5
-rw-r--r--arch/csky/Kconfig3
-rw-r--r--arch/csky/mm/dma-mapping.c142
-rw-r--r--arch/h8300/Kconfig1
-rw-r--r--arch/hexagon/Kconfig1
-rw-r--r--arch/ia64/Kconfig4
-rw-r--r--arch/ia64/hp/common/hwsw_iommu.c2
-rw-r--r--arch/ia64/hp/common/sba_iommu.c87
-rw-r--r--arch/ia64/hp/sim/simscsi.c2
-rw-r--r--arch/ia64/kernel/dma-mapping.c21
-rw-r--r--arch/ia64/mm/init.c19
-rw-r--r--arch/ia64/sn/pci/pci_dma.c8
-rw-r--r--arch/m68k/Kconfig1
-rw-r--r--arch/m68k/kernel/dma.c2
-rw-r--r--arch/microblaze/Kconfig1
-rw-r--r--arch/microblaze/mm/consistent.c2
-rw-r--r--arch/mips/Kconfig1
-rw-r--r--arch/mips/include/asm/dma-mapping.h4
-rw-r--r--arch/mips/include/asm/jazzdma.h6
-rw-r--r--arch/mips/include/asm/mach-jz4740/jz4740_mmc.h4
-rw-r--r--arch/mips/include/asm/mach-rc32434/rb.h6
-rw-r--r--arch/mips/jazz/jazzdma.c16
-rw-r--r--arch/mips/jz4740/board-qi_lb60.c18
-rw-r--r--arch/mips/rb532/devices.c12
-rw-r--r--arch/nds32/Kconfig1
-rw-r--r--arch/nios2/Kconfig1
-rw-r--r--arch/openrisc/Kconfig1
-rw-r--r--arch/openrisc/kernel/dma.c2
-rw-r--r--arch/parisc/Kconfig2
-rw-r--r--arch/parisc/kernel/pci-dma.c4
-rw-r--r--arch/parisc/kernel/setup.c4
-rw-r--r--arch/powerpc/Kconfig1
-rw-r--r--arch/powerpc/include/asm/dma-mapping.h1
-rw-r--r--arch/powerpc/include/asm/iommu.h4
-rw-r--r--arch/powerpc/kernel/dma-iommu.c6
-rw-r--r--arch/powerpc/kernel/dma-swiotlb.c17
-rw-r--r--arch/powerpc/kernel/iommu.c28
-rw-r--r--arch/powerpc/platforms/cell/iommu.c1
-rw-r--r--arch/powerpc/platforms/pseries/vio.c3
-rw-r--r--arch/riscv/Kconfig1
-rw-r--r--arch/riscv/include/asm/dma-mapping.h15
-rw-r--r--arch/s390/Kconfig2
-rw-r--r--arch/s390/pci/pci_dma.c20
-rw-r--r--arch/sh/Kconfig1
-rw-r--r--arch/sh/boards/mach-dreamcast/Makefile4
-rw-r--r--arch/sh/boards/mach-dreamcast/rtc.c45
-rw-r--r--arch/sh/boards/mach-dreamcast/setup.c1
-rw-r--r--arch/sh/boards/mach-ecovec24/setup.c43
-rw-r--r--arch/sh/boards/mach-sh03/Makefile3
-rw-r--r--arch/sh/boards/mach-sh03/rtc.c51
-rw-r--r--arch/sh/boards/mach-sh03/setup.c9
-rw-r--r--arch/sh/boards/of-generic.c8
-rw-r--r--arch/sh/configs/dreamcast_defconfig2
-rw-r--r--arch/sh/configs/sh03_defconfig2
-rw-r--r--arch/sh/include/asm/rtc.h3
-rw-r--r--arch/sh/include/mach-dreamcast/mach/sysasic.h1
-rw-r--r--arch/sh/kernel/time.c74
-rw-r--r--arch/sparc/Kconfig2
-rw-r--r--arch/sparc/include/asm/dma-mapping.h8
-rw-r--r--arch/sparc/include/asm/dma.h48
-rw-r--r--arch/sparc/include/asm/leon.h9
-rw-r--r--arch/sparc/include/asm/pci.h53
-rw-r--r--arch/sparc/include/asm/pci_32.h41
-rw-r--r--arch/sparc/include/asm/pci_64.h52
-rw-r--r--arch/sparc/kernel/iommu.c12
-rw-r--r--arch/sparc/kernel/iommu_common.h2
-rw-r--r--arch/sparc/kernel/ioport.c243
-rw-r--r--arch/sparc/kernel/pci_sun4v.c14
-rw-r--r--arch/sparc/mm/io-unit.c80
-rw-r--r--arch/sparc/mm/iommu.c160
-rw-r--r--arch/unicore32/Kconfig1
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/kernel/amd_gart_64.c63
-rw-r--r--arch/x86/kernel/pci-calgary_64.c30
-rw-r--r--arch/x86/kernel/pci-dma.c2
-rw-r--r--arch/x86/kernel/pci-swiotlb.c4
-rw-r--r--arch/x86/mm/mem_encrypt.c7
-rw-r--r--arch/x86/pci/sta2x11-fixup.c1
-rw-r--r--arch/xtensa/Kconfig3
-rw-r--r--arch/xtensa/kernel/pci-dma.c2
-rw-r--r--block/Kconfig6
-rw-r--r--block/Kconfig.iosched61
-rw-r--r--block/Makefile5
-rw-r--r--block/bfq-cgroup.c6
-rw-r--r--block/bfq-iosched.c21
-rw-r--r--block/bio-integrity.c2
-rw-r--r--block/bio.c202
-rw-r--r--block/blk-cgroup.c272
-rw-r--r--block/blk-core.c2066
-rw-r--r--block/blk-exec.c20
-rw-r--r--block/blk-flush.c188
-rw-r--r--block/blk-ioc.c54
-rw-r--r--block/blk-iolatency.c75
-rw-r--r--block/blk-merge.c71
-rw-r--r--block/blk-mq-cpumap.c19
-rw-r--r--block/blk-mq-debugfs.c147
-rw-r--r--block/blk-mq-debugfs.h17
-rw-r--r--block/blk-mq-pci.c10
-rw-r--r--block/blk-mq-rdma.c8
-rw-r--r--block/blk-mq-sched.c82
-rw-r--r--block/blk-mq-sched.h25
-rw-r--r--block/blk-mq-sysfs.c35
-rw-r--r--block/blk-mq-tag.c41
-rw-r--r--block/blk-mq-virtio.c8
-rw-r--r--block/blk-mq.c757
-rw-r--r--block/blk-mq.h70
-rw-r--r--block/blk-pm.c20
-rw-r--r--block/blk-pm.h6
-rw-r--r--block/blk-rq-qos.c154
-rw-r--r--block/blk-rq-qos.h96
-rw-r--r--block/blk-settings.c68
-rw-r--r--block/blk-softirq.c27
-rw-r--r--block/blk-stat.c4
-rw-r--r--block/blk-stat.h5
-rw-r--r--block/blk-sysfs.c112
-rw-r--r--block/blk-tag.c378
-rw-r--r--block/blk-throttle.c39
-rw-r--r--block/blk-timeout.c117
-rw-r--r--block/blk-wbt.c176
-rw-r--r--block/blk-zoned.c2
-rw-r--r--block/blk.h188
-rw-r--r--block/bounce.c3
-rw-r--r--block/bsg-lib.c146
-rw-r--r--block/bsg.c2
-rw-r--r--block/cfq-iosched.c4916
-rw-r--r--block/deadline-iosched.c560
-rw-r--r--block/elevator.c477
-rw-r--r--block/genhd.c63
-rw-r--r--block/kyber-iosched.c37
-rw-r--r--block/mq-deadline.c15
-rw-r--r--block/noop-iosched.c124
-rw-r--r--block/partition-generic.c18
-rw-r--r--drivers/acpi/nfit/Kconfig11
-rw-r--r--drivers/acpi/nfit/Makefile1
-rw-r--r--drivers/acpi/nfit/core.c103
-rw-r--r--drivers/acpi/nfit/intel.c388
-rw-r--r--drivers/acpi/nfit/intel.h76
-rw-r--r--drivers/acpi/nfit/nfit.h24
-rw-r--r--drivers/acpi/scan.c5
-rw-r--r--drivers/ata/libata-eh.c4
-rw-r--r--drivers/ata/pata_palmld.c83
-rw-r--r--drivers/ata/pata_pxa.c1
-rw-r--r--drivers/ata/pata_rb532_cf.c45
-rw-r--r--drivers/ata/sata_highbank.c37
-rw-r--r--drivers/ata/sata_rcar.c4
-rw-r--r--drivers/base/platform.c34
-rw-r--r--drivers/block/aoe/aoe.h4
-rw-r--r--drivers/block/aoe/aoeblk.c1
-rw-r--r--drivers/block/aoe/aoecmd.c27
-rw-r--r--drivers/block/aoe/aoedev.c11
-rw-r--r--drivers/block/aoe/aoemain.c2
-rw-r--r--drivers/block/ataflop.c26
-rw-r--r--drivers/block/drbd/drbd_main.c2
-rw-r--r--drivers/block/floppy.c6
-rw-r--r--drivers/block/loop.c415
-rw-r--r--drivers/block/loop.h1
-rw-r--r--drivers/block/mtip32xx/mtip32xx.c226
-rw-r--r--drivers/block/mtip32xx/mtip32xx.h48
-rw-r--r--drivers/block/nbd.c3
-rw-r--r--drivers/block/null_blk.h1
-rw-r--r--drivers/block/null_blk_main.c21
-rw-r--r--drivers/block/null_blk_zoned.c27
-rw-r--r--drivers/block/paride/pd.c30
-rw-r--r--drivers/block/pktcdvd.c2
-rw-r--r--drivers/block/skd_main.c16
-rw-r--r--drivers/block/sunvdc.c153
-rw-r--r--drivers/block/sx8.c434
-rw-r--r--drivers/block/umem.c3
-rw-r--r--drivers/block/virtio_blk.c17
-rw-r--r--drivers/firewire/sbp2.c1
-rw-r--r--drivers/gpio/gpio-pca953x.c2
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_drv.c2
-rw-r--r--drivers/ide/ide-atapi.c27
-rw-r--r--drivers/ide/ide-cd.c179
-rw-r--r--drivers/ide/ide-devsets.c4
-rw-r--r--drivers/ide/ide-disk.c15
-rw-r--r--drivers/ide/ide-eh.c2
-rw-r--r--drivers/ide/ide-floppy.c2
-rw-r--r--drivers/ide/ide-io.c112
-rw-r--r--drivers/ide/ide-park.c8
-rw-r--r--drivers/ide/ide-pm.c46
-rw-r--r--drivers/ide/ide-probe.c69
-rw-r--r--drivers/ide/ide-tape.c2
-rw-r--r--drivers/ide/ide-taskfile.c2
-rw-r--r--drivers/infiniband/core/Makefile2
-rw-r--r--drivers/infiniband/core/agent.c4
-rw-r--r--drivers/infiniband/core/cache.c17
-rw-r--r--drivers/infiniband/core/cm.c6
-rw-r--r--drivers/infiniband/core/cma.c2
-rw-r--r--drivers/infiniband/core/cma_configfs.c3
-rw-r--r--drivers/infiniband/core/cma_priv.h28
-rw-r--r--drivers/infiniband/core/core_priv.h47
-rw-r--r--drivers/infiniband/core/cq.c8
-rw-r--r--drivers/infiniband/core/device.c166
-rw-r--r--drivers/infiniband/core/fmr_pool.c8
-rw-r--r--drivers/infiniband/core/iwcm.c12
-rw-r--r--drivers/infiniband/core/mad.c22
-rw-r--r--drivers/infiniband/core/mad_rmpp.c11
-rw-r--r--drivers/infiniband/core/nldev.c34
-rw-r--r--drivers/infiniband/core/opa_smi.h4
-rw-r--r--drivers/infiniband/core/rdma_core.c54
-rw-r--r--drivers/infiniband/core/rdma_core.h79
-rw-r--r--drivers/infiniband/core/restrack.c51
-rw-r--r--drivers/infiniband/core/sa_query.c5
-rw-r--r--drivers/infiniband/core/security.c8
-rw-r--r--drivers/infiniband/core/smi.h4
-rw-r--r--drivers/infiniband/core/sysfs.c28
-rw-r--r--drivers/infiniband/core/ucm.c2
-rw-r--r--drivers/infiniband/core/user_mad.c244
-rw-r--r--drivers/infiniband/core/uverbs.h86
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c2066
-rw-r--r--drivers/infiniband/core/uverbs_ioctl.c59
-rw-r--r--drivers/infiniband/core/uverbs_main.c242
-rw-r--r--drivers/infiniband/core/uverbs_std_types.c118
-rw-r--r--drivers/infiniband/core/uverbs_std_types_counters.c20
-rw-r--r--drivers/infiniband/core/uverbs_std_types_cq.c23
-rw-r--r--drivers/infiniband/core/uverbs_std_types_device.c224
-rw-r--r--drivers/infiniband/core/uverbs_std_types_dm.c17
-rw-r--r--drivers/infiniband/core/uverbs_std_types_flow_action.c31
-rw-r--r--drivers/infiniband/core/uverbs_std_types_mr.c76
-rw-r--r--drivers/infiniband/core/uverbs_uapi.c514
-rw-r--r--drivers/infiniband/core/verbs.c194
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.c17
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.h3
-rw-r--r--drivers/infiniband/hw/bnxt_re/main.c128
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_rcfw.c34
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_rcfw.h59
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_res.h1
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_sp.c10
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_sp.h6
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.c3
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c66
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c3
-rw-r--r--drivers/infiniband/hw/cxgb4/provider.c74
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c7
-rw-r--r--drivers/infiniband/hw/hfi1/Makefile1
-rw-r--r--drivers/infiniband/hw/hfi1/chip.c47
-rw-r--r--drivers/infiniband/hw/hfi1/chip_registers.h4
-rw-r--r--drivers/infiniband/hw/hfi1/common.h19
-rw-r--r--drivers/infiniband/hw/hfi1/debugfs.c49
-rw-r--r--drivers/infiniband/hw/hfi1/driver.c72
-rw-r--r--drivers/infiniband/hw/hfi1/hfi.h35
-rw-r--r--drivers/infiniband/hw/hfi1/mad.c2
-rw-r--r--drivers/infiniband/hw/hfi1/pio.c31
-rw-r--r--drivers/infiniband/hw/hfi1/pio.h5
-rw-r--r--drivers/infiniband/hw/hfi1/qp.c20
-rw-r--r--drivers/infiniband/hw/hfi1/rc.c32
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.c10
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.h1
-rw-r--r--drivers/infiniband/hw/hfi1/tid_rdma.c48
-rw-r--r--drivers/infiniband/hw/hfi1/tid_rdma.h13
-rw-r--r--drivers/infiniband/hw/hfi1/uc.c2
-rw-r--r--drivers/infiniband/hw/hfi1/ud.c33
-rw-r--r--drivers/infiniband/hw/hfi1/user_sdma.c5
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.c28
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.h2
-rw-r--r--drivers/infiniband/hw/hfi1/vnic_main.c4
-rw-r--r--drivers/infiniband/hw/hfi1/vnic_sdma.c18
-rw-r--r--drivers/infiniband/hw/hns/Makefile2
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_ah.c3
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_alloc.c2
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cmd.h4
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_common.h3
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_device.h97
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hem.c41
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hem.h2
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v1.c13
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.c574
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.h142
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_main.c185
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_mr.c141
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_qp.c37
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_srq.c457
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_cm.c2
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_verbs.c127
-rw-r--r--drivers/infiniband/hw/mlx4/ah.c6
-rw-r--r--drivers/infiniband/hw/mlx4/alias_GUID.c2
-rw-r--r--drivers/infiniband/hw/mlx4/mad.c28
-rw-r--r--drivers/infiniband/hw/mlx4/main.c205
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h5
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c31
-rw-r--r--drivers/infiniband/hw/mlx4/srq.c10
-rw-r--r--drivers/infiniband/hw/mlx4/sysfs.c12
-rw-r--r--drivers/infiniband/hw/mlx5/ah.c4
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.c19
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.h2
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c102
-rw-r--r--drivers/infiniband/hw/mlx5/devx.c233
-rw-r--r--drivers/infiniband/hw/mlx5/flow.c73
-rw-r--r--drivers/infiniband/hw/mlx5/mad.c18
-rw-r--r--drivers/infiniband/hw/mlx5/main.c360
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h52
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c42
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c125
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c447
-rw-r--r--drivers/infiniband/hw/mlx5/srq.c24
-rw-r--r--drivers/infiniband/hw/mthca/mthca_dev.h9
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mad.c7
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c158
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c22
-rw-r--r--drivers/infiniband/hw/mthca/mthca_srq.c29
-rw-r--r--drivers/infiniband/hw/nes/nes_cm.c2
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.c71
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_ah.c4
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_ah.h4
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_main.c92
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_stats.c5
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.c19
-rw-r--r--drivers/infiniband/hw/qedr/main.c103
-rw-r--r--drivers/infiniband/hw/qedr/verbs.c25
-rw-r--r--drivers/infiniband/hw/qedr/verbs.h4
-rw-r--r--drivers/infiniband/hw/qib/qib_iba6120.c1
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7220.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7322.c3
-rw-r--r--drivers/infiniband/hw/qib/qib_init.c1
-rw-r--r--drivers/infiniband/hw/qib/qib_mad.c3
-rw-r--r--drivers/infiniband/hw/qib/qib_pcie.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_sdma.c5
-rw-r--r--drivers/infiniband/hw/qib/qib_ud.c1
-rw-r--r--drivers/infiniband/hw/qib/qib_user_sdma.c1
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.c13
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_main.c63
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c2
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_verbs.c14
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_verbs.h3
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c82
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c2
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c2
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c8
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h4
-rw-r--r--drivers/infiniband/sw/rdmavt/ah.c5
-rw-r--r--drivers/infiniband/sw/rdmavt/ah.h3
-rw-r--r--drivers/infiniband/sw/rdmavt/mad.c3
-rw-r--r--drivers/infiniband/sw/rdmavt/qp.c9
-rw-r--r--drivers/infiniband/sw/rdmavt/vt.c299
-rw-r--r--drivers/infiniband/sw/rxe/rxe.h6
-rw-r--r--drivers/infiniband/sw/rxe/rxe_comp.c6
-rw-r--r--drivers/infiniband/sw/rxe/rxe_hw_counters.c9
-rw-r--r--drivers/infiniband/sw/rxe/rxe_hw_counters.h3
-rw-r--r--drivers/infiniband/sw/rxe/rxe_loc.h7
-rw-r--r--drivers/infiniband/sw/rxe/rxe_net.c16
-rw-r--r--drivers/infiniband/sw/rxe/rxe_pool.c30
-rw-r--r--drivers/infiniband/sw/rxe/rxe_pool.h2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_qp.c11
-rw-r--r--drivers/infiniband/sw/rxe/rxe_req.c3
-rw-r--r--drivers/infiniband/sw/rxe/rxe_resp.c33
-rw-r--r--drivers/infiniband/sw/rxe/rxe_sysfs.c18
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.c129
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.h8
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c4
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c4
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.c1
-rw-r--r--drivers/infiniband/ulp/iser/iser_memory.c9
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c8
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c8
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c160
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.h20
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.c291
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.h44
-rw-r--r--drivers/iommu/amd_iommu.c31
-rw-r--r--drivers/iommu/dma-iommu.c23
-rw-r--r--drivers/iommu/intel-iommu.c26
-rw-r--r--drivers/lightnvm/core.c25
-rw-r--r--drivers/lightnvm/pblk-core.c77
-rw-r--r--drivers/lightnvm/pblk-init.c103
-rw-r--r--drivers/lightnvm/pblk-map.c63
-rw-r--r--drivers/lightnvm/pblk-rb.c5
-rw-r--r--drivers/lightnvm/pblk-read.c66
-rw-r--r--drivers/lightnvm/pblk-recovery.c46
-rw-r--r--drivers/lightnvm/pblk-rl.c5
-rw-r--r--drivers/lightnvm/pblk-sysfs.c7
-rw-r--r--drivers/lightnvm/pblk-write.c64
-rw-r--r--drivers/lightnvm/pblk.h43
-rw-r--r--drivers/md/bcache/bcache.h20
-rw-r--r--drivers/md/bcache/btree.c5
-rw-r--r--drivers/md/bcache/btree.h18
-rw-r--r--drivers/md/bcache/debug.c3
-rw-r--r--drivers/md/bcache/journal.c2
-rw-r--r--drivers/md/bcache/request.c6
-rw-r--r--drivers/md/bcache/super.c48
-rw-r--r--drivers/md/bcache/sysfs.c61
-rw-r--r--drivers/md/bcache/writeback.c30
-rw-r--r--drivers/md/bcache/writeback.h12
-rw-r--r--drivers/md/dm-bufio.c12
-rw-r--r--drivers/md/dm-core.h5
-rw-r--r--drivers/md/dm-crypt.c17
-rw-r--r--drivers/md/dm-delay.c2
-rw-r--r--drivers/md/dm-flakey.c35
-rw-r--r--drivers/md/dm-integrity.c2
-rw-r--r--drivers/md/dm-kcopyd.c19
-rw-r--r--drivers/md/dm-linear.c2
-rw-r--r--drivers/md/dm-mpath.c6
-rw-r--r--drivers/md/dm-raid.c3
-rw-r--r--drivers/md/dm-raid1.c3
-rw-r--r--drivers/md/dm-rq.c25
-rw-r--r--drivers/md/dm-snap.c22
-rw-r--r--drivers/md/dm-table.c7
-rw-r--r--drivers/md/dm-unstripe.c2
-rw-r--r--drivers/md/dm-verity-target.c9
-rw-r--r--drivers/md/dm-writecache.c2
-rw-r--r--drivers/md/dm.c125
-rw-r--r--drivers/md/md.c7
-rw-r--r--drivers/md/raid0.c2
-rw-r--r--drivers/memstick/core/memstick.c3
-rw-r--r--drivers/memstick/core/ms_block.c109
-rw-r--r--drivers/memstick/core/ms_block.h1
-rw-r--r--drivers/memstick/core/mspro_block.c121
-rw-r--r--drivers/memstick/host/rtsx_usb_ms.c170
-rw-r--r--drivers/message/fusion/mptfc.c1
-rw-r--r--drivers/message/fusion/mptsas.c1
-rw-r--r--drivers/message/fusion/mptspi.c1
-rw-r--r--drivers/misc/Makefile2
-rw-r--r--drivers/misc/cardreader/Kconfig11
-rw-r--r--drivers/misc/cardreader/Makefile4
-rw-r--r--drivers/misc/cardreader/alcor_pci.c371
-rw-r--r--drivers/misc/cardreader/rtsx_usb.c8
-rw-r--r--drivers/misc/mic/host/mic_boot.c2
-rw-r--r--drivers/mmc/core/block.c28
-rw-r--r--drivers/mmc/core/card.h6
-rw-r--r--drivers/mmc/core/core.c19
-rw-r--r--drivers/mmc/core/core.h2
-rw-r--r--drivers/mmc/core/mmc.c9
-rw-r--r--drivers/mmc/core/mmc_ops.c93
-rw-r--r--drivers/mmc/core/mmc_ops.h3
-rw-r--r--drivers/mmc/core/mmc_test.c14
-rw-r--r--drivers/mmc/core/queue.c110
-rw-r--r--drivers/mmc/core/queue.h4
-rw-r--r--drivers/mmc/core/slot-gpio.c97
-rw-r--r--drivers/mmc/host/Kconfig28
-rw-r--r--drivers/mmc/host/Makefile2
-rw-r--r--drivers/mmc/host/alcor.c1162
-rw-r--r--drivers/mmc/host/atmel-mci.c34
-rw-r--r--drivers/mmc/host/bcm2835.c71
-rw-r--r--drivers/mmc/host/dw_mmc-bluefield.c11
-rw-r--r--drivers/mmc/host/jz4740_mmc.c202
-rw-r--r--drivers/mmc/host/meson-gx-mmc.c100
-rw-r--r--drivers/mmc/host/meson-mx-sdio.c6
-rw-r--r--drivers/mmc/host/mmc_spi.c27
-rw-r--r--drivers/mmc/host/mmci.c11
-rw-r--r--drivers/mmc/host/mmci.h2
-rw-r--r--drivers/mmc/host/mtk-sd.c53
-rw-r--r--drivers/mmc/host/of_mmc_spi.c34
-rw-r--r--drivers/mmc/host/omap_hsmmc.c14
-rw-r--r--drivers/mmc/host/pxamci.c68
-rw-r--r--drivers/mmc/host/renesas_sdhi_core.c50
-rw-r--r--drivers/mmc/host/renesas_sdhi_internal_dmac.c28
-rw-r--r--drivers/mmc/host/renesas_sdhi_sys_dmac.c20
-rw-r--r--drivers/mmc/host/rtsx_usb_sdmmc.c44
-rw-r--r--drivers/mmc/host/s3cmci.c59
-rw-r--r--drivers/mmc/host/sdhci-acpi.c7
-rw-r--r--drivers/mmc/host/sdhci-cadence.c2
-rw-r--r--drivers/mmc/host/sdhci-esdhc-imx.c26
-rw-r--r--drivers/mmc/host/sdhci-esdhc.h24
-rw-r--r--drivers/mmc/host/sdhci-msm.c126
-rw-r--r--drivers/mmc/host/sdhci-of-arasan.c46
-rw-r--r--drivers/mmc/host/sdhci-of-esdhc.c151
-rw-r--r--drivers/mmc/host/sdhci-omap.c130
-rw-r--r--drivers/mmc/host/sdhci-pci-core.c19
-rw-r--r--drivers/mmc/host/sdhci-xenon-phy.c10
-rw-r--r--drivers/mmc/host/sdhci-xenon.c10
-rw-r--r--drivers/mmc/host/sdhci.c106
-rw-r--r--drivers/mmc/host/sdhci.h13
-rw-r--r--drivers/mmc/host/sdhci_am654.c374
-rw-r--r--drivers/mmc/host/tmio_mmc.h6
-rw-r--r--drivers/mmc/host/tmio_mmc_core.c41
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c8
-rw-r--r--drivers/net/wireless/ath/ath6kl/cfg80211.c2
-rw-r--r--drivers/net/wireless/ath/ath6kl/common.h2
-rw-r--r--drivers/net/wireless/ath/ath6kl/wmi.c6
-rw-r--r--drivers/net/wireless/ath/ath6kl/wmi.h6
-rw-r--r--drivers/nvdimm/Kconfig5
-rw-r--r--drivers/nvdimm/Makefile1
-rw-r--r--drivers/nvdimm/bus.c33
-rw-r--r--drivers/nvdimm/dimm.c16
-rw-r--r--drivers/nvdimm/dimm_devs.c210
-rw-r--r--drivers/nvdimm/label.c7
-rw-r--r--drivers/nvdimm/namespace_devs.c3
-rw-r--r--drivers/nvdimm/nd-core.h57
-rw-r--r--drivers/nvdimm/nd.h8
-rw-r--r--drivers/nvdimm/pmem.c2
-rw-r--r--drivers/nvdimm/region_devs.c5
-rw-r--r--drivers/nvdimm/security.c454
-rw-r--r--drivers/nvme/host/Kconfig15
-rw-r--r--drivers/nvme/host/Makefile3
-rw-r--r--drivers/nvme/host/core.c191
-rw-r--r--drivers/nvme/host/fabrics.c61
-rw-r--r--drivers/nvme/host/fabrics.h17
-rw-r--r--drivers/nvme/host/fc.c43
-rw-r--r--drivers/nvme/host/lightnvm.c33
-rw-r--r--drivers/nvme/host/multipath.c20
-rw-r--r--drivers/nvme/host/nvme.h24
-rw-r--r--drivers/nvme/host/pci.c518
-rw-r--r--drivers/nvme/host/rdma.c119
-rw-r--r--drivers/nvme/host/tcp.c2278
-rw-r--r--drivers/nvme/host/trace.c3
-rw-r--r--drivers/nvme/host/trace.h27
-rw-r--r--drivers/nvme/target/Kconfig10
-rw-r--r--drivers/nvme/target/Makefile2
-rw-r--r--drivers/nvme/target/admin-cmd.c146
-rw-r--r--drivers/nvme/target/configfs.c43
-rw-r--r--drivers/nvme/target/core.c220
-rw-r--r--drivers/nvme/target/discovery.c139
-rw-r--r--drivers/nvme/target/fabrics-cmd.c64
-rw-r--r--drivers/nvme/target/fc.c66
-rw-r--r--drivers/nvme/target/io-cmd-bdev.c89
-rw-r--r--drivers/nvme/target/io-cmd-file.c165
-rw-r--r--drivers/nvme/target/loop.c2
-rw-r--r--drivers/nvme/target/nvmet.h68
-rw-r--r--drivers/nvme/target/rdma.c12
-rw-r--r--drivers/nvme/target/tcp.c1737
-rw-r--r--drivers/parisc/ccio-dma.c10
-rw-r--r--drivers/parisc/sba_iommu.c10
-rw-r--r--drivers/pci/controller/vmd.c48
-rw-r--r--drivers/pci/pci-driver.c4
-rw-r--r--drivers/s390/block/dasd_ioctl.c22
-rw-r--r--drivers/s390/scsi/zfcp_aux.c82
-rw-r--r--drivers/s390/scsi/zfcp_dbf.c25
-rw-r--r--drivers/s390/scsi/zfcp_dbf.h14
-rw-r--r--drivers/s390/scsi/zfcp_def.h113
-rw-r--r--drivers/s390/scsi/zfcp_erp.c345
-rw-r--r--drivers/s390/scsi/zfcp_ext.h9
-rw-r--r--drivers/s390/scsi/zfcp_fc.c48
-rw-r--r--drivers/s390/scsi/zfcp_fc.h21
-rw-r--r--drivers/s390/scsi/zfcp_fsf.c51
-rw-r--r--drivers/s390/scsi/zfcp_fsf.h4
-rw-r--r--drivers/s390/scsi/zfcp_qdio.c11
-rw-r--r--drivers/s390/scsi/zfcp_qdio.h9
-rw-r--r--drivers/s390/scsi/zfcp_reqlist.h2
-rw-r--r--drivers/s390/scsi/zfcp_scsi.c15
-rw-r--r--drivers/scsi/3w-9xxx.c1
-rw-r--r--drivers/scsi/3w-sas.c1
-rw-r--r--drivers/scsi/3w-xxxx.c3
-rw-r--r--drivers/scsi/53c700.c1
-rw-r--r--drivers/scsi/BusLogic.c2
-rw-r--r--drivers/scsi/Kconfig12
-rw-r--r--drivers/scsi/a100u2w.c1
-rw-r--r--drivers/scsi/a2091.c2
-rw-r--r--drivers/scsi/a3000.c1
-rw-r--r--drivers/scsi/aacraid/aachba.c5
-rw-r--r--drivers/scsi/aacraid/aacraid.h5
-rw-r--r--drivers/scsi/aacraid/commctrl.c5
-rw-r--r--drivers/scsi/aacraid/commsup.c19
-rw-r--r--drivers/scsi/aacraid/dpcsup.c19
-rw-r--r--drivers/scsi/aacraid/linit.c4
-rw-r--r--drivers/scsi/aacraid/src.c2
-rw-r--r--drivers/scsi/advansys.c12
-rw-r--r--drivers/scsi/aha152x.c2
-rw-r--r--drivers/scsi/aha1542.c127
-rw-r--r--drivers/scsi/aha1740.c1
-rw-r--r--drivers/scsi/aic7xxx/aic79xx_osm.c1
-rw-r--r--drivers/scsi/aic7xxx/aic7xxx_osm.c1
-rw-r--r--drivers/scsi/aic94xx/aic94xx_hwi.c3
-rw-r--r--drivers/scsi/aic94xx/aic94xx_init.c1
-rw-r--r--drivers/scsi/arcmsr/arcmsr_hba.c9
-rw-r--r--drivers/scsi/arm/acornscsi.c2
-rw-r--r--drivers/scsi/arm/arxescsi.c2
-rw-r--r--drivers/scsi/arm/cumana_1.c2
-rw-r--r--drivers/scsi/arm/cumana_2.c1
-rw-r--r--drivers/scsi/arm/eesox.c1
-rw-r--r--drivers/scsi/arm/oak.c2
-rw-r--r--drivers/scsi/arm/powertec.c1
-rw-r--r--drivers/scsi/atari_scsi.c2
-rw-r--r--drivers/scsi/atp870u.c1
-rw-r--r--drivers/scsi/be2iscsi/be_main.c9
-rw-r--r--drivers/scsi/bfa/bfa_ioc.c4
-rw-r--r--drivers/scsi/bfa/bfad.c18
-rw-r--r--drivers/scsi/bfa/bfad_im.c2
-rw-r--r--drivers/scsi/bnx2fc/bnx2fc_fcoe.c1
-rw-r--r--drivers/scsi/bnx2i/bnx2i_hwi.c10
-rw-r--r--drivers/scsi/bnx2i/bnx2i_iscsi.c1
-rw-r--r--drivers/scsi/csiostor/csio_init.c3
-rw-r--r--drivers/scsi/csiostor/csio_scsi.c10
-rw-r--r--drivers/scsi/cxgbi/cxgb3i/cxgb3i.c2
-rw-r--r--drivers/scsi/cxgbi/cxgb4i/Kconfig4
-rw-r--r--drivers/scsi/cxgbi/cxgb4i/cxgb4i.c2
-rw-r--r--drivers/scsi/cxlflash/main.c7
-rw-r--r--drivers/scsi/dc395x.c2
-rw-r--r--drivers/scsi/device_handler/scsi_dh_alua.c21
-rw-r--r--drivers/scsi/device_handler/scsi_dh_emc.c8
-rw-r--r--drivers/scsi/device_handler/scsi_dh_hp_sw.c7
-rw-r--r--drivers/scsi/device_handler/scsi_dh_rdac.c7
-rw-r--r--drivers/scsi/dmx3191d.c2
-rw-r--r--drivers/scsi/dpt_i2o.c13
-rw-r--r--drivers/scsi/esas2r/esas2r_init.c49
-rw-r--r--drivers/scsi/esas2r/esas2r_main.c1
-rw-r--r--drivers/scsi/esp_scsi.c1
-rw-r--r--drivers/scsi/fcoe/fcoe.c5
-rw-r--r--drivers/scsi/fnic/fnic_main.c1
-rw-r--r--drivers/scsi/fnic/fnic_scsi.c4
-rw-r--r--drivers/scsi/fnic/fnic_trace.c3
-rw-r--r--drivers/scsi/g_NCR5380.c2
-rw-r--r--drivers/scsi/gdth.c1
-rw-r--r--drivers/scsi/gvp11.c2
-rw-r--r--drivers/scsi/hisi_sas/hisi_sas.h13
-rw-r--r--drivers/scsi/hisi_sas/hisi_sas_main.c203
-rw-r--r--drivers/scsi/hisi_sas/hisi_sas_v1_hw.c25
-rw-r--r--drivers/scsi/hisi_sas/hisi_sas_v2_hw.c74
-rw-r--r--drivers/scsi/hisi_sas/hisi_sas_v3_hw.c322
-rw-r--r--drivers/scsi/hosts.c35
-rw-r--r--drivers/scsi/hpsa.c6
-rw-r--r--drivers/scsi/hptiop.c11
-rw-r--r--drivers/scsi/ibmvscsi/ibmvfc.c1
-rw-r--r--drivers/scsi/ibmvscsi/ibmvscsi.c1
-rw-r--r--drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c8
-rw-r--r--drivers/scsi/imm.c1
-rw-r--r--drivers/scsi/initio.c3
-rw-r--r--drivers/scsi/ipr.c1
-rw-r--r--drivers/scsi/ips.c10
-rw-r--r--drivers/scsi/ips.h9
-rw-r--r--drivers/scsi/isci/init.c20
-rw-r--r--drivers/scsi/isci/phy.c1
-rw-r--r--drivers/scsi/isci/remote_device.c4
-rw-r--r--drivers/scsi/isci/remote_node_context.c4
-rw-r--r--drivers/scsi/isci/request.c2
-rw-r--r--drivers/scsi/iscsi_tcp.c6
-rw-r--r--drivers/scsi/libfc/fc_rport.c3
-rw-r--r--drivers/scsi/libiscsi.c10
-rw-r--r--drivers/scsi/libiscsi_tcp.c4
-rw-r--r--drivers/scsi/libsas/Makefile3
-rw-r--r--drivers/scsi/libsas/sas_ata.c31
-rw-r--r--drivers/scsi/libsas/sas_discover.c33
-rw-r--r--drivers/scsi/libsas/sas_dump.c63
-rw-r--r--drivers/scsi/libsas/sas_dump.h29
-rw-r--r--drivers/scsi/libsas/sas_event.c1
-rw-r--r--drivers/scsi/libsas/sas_expander.c236
-rw-r--r--drivers/scsi/libsas/sas_init.c10
-rw-r--r--drivers/scsi/libsas/sas_internal.h16
-rw-r--r--drivers/scsi/libsas/sas_phy.c8
-rw-r--r--drivers/scsi/libsas/sas_port.c23
-rw-r--r--drivers/scsi/libsas/sas_scsi_host.c133
-rw-r--r--drivers/scsi/libsas/sas_task.c10
-rw-r--r--drivers/scsi/lpfc/lpfc.h29
-rw-r--r--drivers/scsi/lpfc/lpfc_attr.c230
-rw-r--r--drivers/scsi/lpfc/lpfc_bsg.c200
-rw-r--r--drivers/scsi/lpfc/lpfc_bsg.h38
-rw-r--r--drivers/scsi/lpfc/lpfc_crtn.h5
-rw-r--r--drivers/scsi/lpfc/lpfc_ct.c261
-rw-r--r--drivers/scsi/lpfc/lpfc_debugfs.c2
-rw-r--r--drivers/scsi/lpfc/lpfc_disc.h4
-rw-r--r--drivers/scsi/lpfc/lpfc_els.c468
-rw-r--r--drivers/scsi/lpfc/lpfc_hbadisc.c224
-rw-r--r--drivers/scsi/lpfc/lpfc_hw.h1
-rw-r--r--drivers/scsi/lpfc/lpfc_hw4.h80
-rw-r--r--drivers/scsi/lpfc/lpfc_init.c273
-rw-r--r--drivers/scsi/lpfc/lpfc_mbox.c44
-rw-r--r--drivers/scsi/lpfc/lpfc_mem.c6
-rw-r--r--drivers/scsi/lpfc/lpfc_nportdisc.c81
-rw-r--r--drivers/scsi/lpfc/lpfc_nvme.c2
-rw-r--r--drivers/scsi/lpfc/lpfc_scsi.c147
-rw-r--r--drivers/scsi/lpfc/lpfc_scsi.h4
-rw-r--r--drivers/scsi/lpfc/lpfc_sli.c309
-rw-r--r--drivers/scsi/lpfc/lpfc_sli.h6
-rw-r--r--drivers/scsi/lpfc/lpfc_sli4.h14
-rw-r--r--drivers/scsi/lpfc/lpfc_version.h2
-rw-r--r--drivers/scsi/lpfc/lpfc_vport.c4
-rw-r--r--drivers/scsi/mac53c94.c2
-rw-r--r--drivers/scsi/mac_esp.c2
-rw-r--r--drivers/scsi/mac_scsi.c2
-rw-r--r--drivers/scsi/megaraid.c1
-rw-r--r--drivers/scsi/megaraid/megaraid_mbox.c10
-rw-r--r--drivers/scsi/megaraid/megaraid_mm.c3
-rw-r--r--drivers/scsi/megaraid/megaraid_sas.h74
-rw-r--r--drivers/scsi/megaraid/megaraid_sas_base.c447
-rw-r--r--drivers/scsi/megaraid/megaraid_sas_fp.c24
-rw-r--r--drivers/scsi/megaraid/megaraid_sas_fusion.c470
-rw-r--r--drivers/scsi/megaraid/megaraid_sas_fusion.h26
-rw-r--r--drivers/scsi/mesh.c2
-rw-r--r--drivers/scsi/mpt3sas/mpi/mpi2.h17
-rw-r--r--drivers/scsi/mpt3sas/mpi/mpi2_cnfg.h94
-rw-r--r--drivers/scsi/mpt3sas/mpi/mpi2_image.h506
-rw-r--r--drivers/scsi/mpt3sas/mpi/mpi2_init.h2
-rw-r--r--drivers/scsi/mpt3sas/mpi/mpi2_ioc.h359
-rw-r--r--drivers/scsi/mpt3sas/mpi/mpi2_pci.h11
-rw-r--r--drivers/scsi/mpt3sas/mpi/mpi2_raid.h2
-rw-r--r--drivers/scsi/mpt3sas/mpi/mpi2_sas.h2
-rw-r--r--drivers/scsi/mpt3sas/mpi/mpi2_tool.h72
-rw-r--r--drivers/scsi/mpt3sas/mpt3sas_base.c145
-rw-r--r--drivers/scsi/mpt3sas/mpt3sas_base.h14
-rw-r--r--drivers/scsi/mpt3sas/mpt3sas_config.c29
-rw-r--r--drivers/scsi/mpt3sas/mpt3sas_ctl.c21
-rw-r--r--drivers/scsi/mpt3sas/mpt3sas_scsih.c72
-rw-r--r--drivers/scsi/mpt3sas/mpt3sas_transport.c91
-rw-r--r--drivers/scsi/mvme147.c1
-rw-r--r--drivers/scsi/mvsas/mv_init.c1
-rw-r--r--drivers/scsi/mvumi.c3
-rw-r--r--drivers/scsi/myrb.c3
-rw-r--r--drivers/scsi/ncr53c8xx.c1
-rw-r--r--drivers/scsi/nsp32.c2
-rw-r--r--drivers/scsi/osd/osd_initiator.c4
-rw-r--r--drivers/scsi/osst.c2
-rw-r--r--drivers/scsi/pcmcia/nsp_cs.c2
-rw-r--r--drivers/scsi/pcmcia/qlogic_stub.c2
-rw-r--r--drivers/scsi/pcmcia/sym53c500_cs.c1
-rw-r--r--drivers/scsi/pm8001/pm8001_init.c1
-rw-r--r--drivers/scsi/pmcraid.c117
-rw-r--r--drivers/scsi/ppa.c1
-rw-r--r--drivers/scsi/ps3rom.c1
-rw-r--r--drivers/scsi/qedf/qedf_main.c4
-rw-r--r--drivers/scsi/qedi/qedi.h7
-rw-r--r--drivers/scsi/qedi/qedi_iscsi.c1
-rw-r--r--drivers/scsi/qedi/qedi_main.c90
-rw-r--r--drivers/scsi/qedi/qedi_version.h4
-rw-r--r--drivers/scsi/qla1280.c53
-rw-r--r--drivers/scsi/qla2xxx/qla_attr.c2
-rw-r--r--drivers/scsi/qla2xxx/qla_init.c10
-rw-r--r--drivers/scsi/qla2xxx/qla_mid.c1
-rw-r--r--drivers/scsi/qla2xxx/qla_nvme.c28
-rw-r--r--drivers/scsi/qla2xxx/qla_os.c154
-rw-r--r--drivers/scsi/qla2xxx/qla_target.c22
-rw-r--r--drivers/scsi/qla2xxx/qla_target.h8
-rw-r--r--drivers/scsi/qla2xxx/qla_version.h2
-rw-r--r--drivers/scsi/qla2xxx/tcm_qla2xxx.c64
-rw-r--r--drivers/scsi/qla2xxx/tcm_qla2xxx.h3
-rw-r--r--drivers/scsi/qla4xxx/ql4_os.c11
-rw-r--r--drivers/scsi/qlogicfas.c2
-rw-r--r--drivers/scsi/qlogicpti.c1
-rw-r--r--drivers/scsi/scsi.c5
-rw-r--r--drivers/scsi/scsi_debug.c11
-rw-r--r--drivers/scsi/scsi_error.c24
-rw-r--r--drivers/scsi/scsi_lib.c812
-rw-r--r--drivers/scsi/scsi_priv.h1
-rw-r--r--drivers/scsi/scsi_scan.c10
-rw-r--r--drivers/scsi/scsi_sysfs.c8
-rw-r--r--drivers/scsi/scsi_transport_fc.c71
-rw-r--r--drivers/scsi/scsi_transport_iscsi.c41
-rw-r--r--drivers/scsi/scsi_transport_sas.c10
-rw-r--r--drivers/scsi/sd.c85
-rw-r--r--drivers/scsi/sd.h6
-rw-r--r--drivers/scsi/sd_zbc.c10
-rw-r--r--drivers/scsi/sg.c2
-rw-r--r--drivers/scsi/sgiwd93.c2
-rw-r--r--drivers/scsi/smartpqi/smartpqi.h216
-rw-r--r--drivers/scsi/smartpqi/smartpqi_init.c1561
-rw-r--r--drivers/scsi/smartpqi/smartpqi_sas_transport.c164
-rw-r--r--drivers/scsi/smartpqi/smartpqi_sis.c15
-rw-r--r--drivers/scsi/smartpqi/smartpqi_sis.h1
-rw-r--r--drivers/scsi/snic/snic_main.c1
-rw-r--r--drivers/scsi/snic/snic_trc.c3
-rw-r--r--drivers/scsi/sr.c12
-rw-r--r--drivers/scsi/st.c2
-rw-r--r--drivers/scsi/stex.c18
-rw-r--r--drivers/scsi/storvsc_drv.c1
-rw-r--r--drivers/scsi/sun3_scsi.c2
-rw-r--r--drivers/scsi/sun_esp.c7
-rw-r--r--drivers/scsi/sym53c8xx_2/sym_glue.c5
-rw-r--r--drivers/scsi/ufs/Kconfig8
-rw-r--r--drivers/scsi/ufs/Makefile1
-rw-r--r--drivers/scsi/ufs/cdns-pltfrm.c148
-rw-r--r--drivers/scsi/ufs/ufs.h18
-rw-r--r--drivers/scsi/ufs/ufs_bsg.c4
-rw-r--r--drivers/scsi/ufs/ufshcd.c104
-rw-r--r--drivers/scsi/ufs/ufshcd.h2
-rw-r--r--drivers/scsi/virtio_scsi.c56
-rw-r--r--drivers/scsi/vmw_pvscsi.c1
-rw-r--r--drivers/scsi/wd719x.c136
-rw-r--r--drivers/scsi/wd719x.h1
-rw-r--r--drivers/scsi/xen-scsifront.c3
-rw-r--r--drivers/staging/rts5208/rtsx.c6
-rw-r--r--drivers/staging/unisys/visorhba/visorhba_main.c1
-rw-r--r--drivers/target/iscsi/iscsi_target.c11
-rw-r--r--drivers/target/iscsi/iscsi_target_configfs.c11
-rw-r--r--drivers/target/iscsi/iscsi_target_erl1.c28
-rw-r--r--drivers/target/iscsi/iscsi_target_util.c12
-rw-r--r--drivers/target/loopback/tcm_loop.c10
-rw-r--r--drivers/target/sbp/sbp_target.c8
-rw-r--r--drivers/target/target_core_alua.c6
-rw-r--r--drivers/target/target_core_configfs.c157
-rw-r--r--drivers/target/target_core_device.c111
-rw-r--r--drivers/target/target_core_fabric_configfs.c2
-rw-r--r--drivers/target/target_core_internal.h2
-rw-r--r--drivers/target/target_core_pr.c90
-rw-r--r--drivers/target/target_core_pscsi.c52
-rw-r--r--drivers/target/target_core_spc.c28
-rw-r--r--drivers/target/target_core_stat.c34
-rw-r--r--drivers/target/target_core_tmr.c56
-rw-r--r--drivers/target/target_core_tpg.c23
-rw-r--r--drivers/target/target_core_transport.c416
-rw-r--r--drivers/target/target_core_ua.c4
-rw-r--r--drivers/target/target_core_user.c2
-rw-r--r--drivers/target/target_core_xcopy.c13
-rw-r--r--drivers/target/tcm_fc/tfc_conf.c8
-rw-r--r--drivers/usb/gadget/function/f_tcm.c8
-rw-r--r--drivers/usb/image/microtek.c1
-rw-r--r--drivers/usb/storage/scsiglue.c7
-rw-r--r--drivers/usb/storage/uas.c1
-rw-r--r--drivers/vhost/scsi.c8
-rw-r--r--drivers/xen/swiotlb-xen.c36
-rw-r--r--drivers/xen/xen-scsiback.c8
-rw-r--r--fs/aio.c291
-rw-r--r--fs/block_dev.c50
-rw-r--r--fs/buffer.c10
-rw-r--r--fs/cifs/smbdirect.c2
-rw-r--r--fs/direct-io.c4
-rw-r--r--fs/eventpoll.c52
-rw-r--r--fs/ext4/page-io.c2
-rw-r--r--fs/inode.c4
-rw-r--r--fs/iomap.c16
-rw-r--r--fs/select.c360
-rw-r--r--include/asm-generic/dma-mapping.h2
-rw-r--r--include/linux/alcor_pci.h286
-rw-r--r--include/linux/bio.h29
-rw-r--r--include/linux/blk-cgroup.h227
-rw-r--r--include/linux/blk-mq-pci.h4
-rw-r--r--include/linux/blk-mq-rdma.h2
-rw-r--r--include/linux/blk-mq-virtio.h4
-rw-r--r--include/linux/blk-mq.h83
-rw-r--r--include/linux/blk_types.h24
-rw-r--r--include/linux/blkdev.h256
-rw-r--r--include/linux/bsg-lib.h6
-rw-r--r--include/linux/cgroup.h2
-rw-r--r--include/linux/compat.h26
-rw-r--r--include/linux/dma-debug.h34
-rw-r--r--include/linux/dma-direct.h19
-rw-r--r--include/linux/dma-iommu.h1
-rw-r--r--include/linux/dma-mapping.h350
-rw-r--r--include/linux/dma-noncoherent.h7
-rw-r--r--include/linux/elevator.h94
-rw-r--r--include/linux/fs.h2
-rw-r--r--include/linux/futex.h8
-rw-r--r--include/linux/genhd.h57
-rw-r--r--include/linux/ide.h14
-rw-r--r--include/linux/init.h1
-rw-r--r--include/linux/ioprio.h13
-rw-r--r--include/linux/key.h3
-rw-r--r--include/linux/libata.h2
-rw-r--r--include/linux/libnvdimm.h76
-rw-r--r--include/linux/lightnvm.h3
-rw-r--r--include/linux/mfd/tmio.h9
-rw-r--r--include/linux/mlx5/driver.h4
-rw-r--r--include/linux/mlx5/mlx5_ifc.h76
-rw-r--r--include/linux/mmc/host.h5
-rw-r--r--include/linux/mmc/slot-gpio.h5
-rw-r--r--include/linux/nvme-fc-driver.h17
-rw-r--r--include/linux/nvme-tcp.h189
-rw-r--r--include/linux/nvme.h73
-rw-r--r--include/linux/platform_data/mmc-esdhc-imx.h4
-rw-r--r--include/linux/platform_data/mmc-pxamci.h4
-rw-r--r--include/linux/platform_data/mmc-s3cmci.h4
-rw-r--r--include/linux/sbitmap.h89
-rw-r--r--include/linux/scatterlist.h6
-rw-r--r--include/linux/signal.h4
-rw-r--r--include/linux/skbuff.h3
-rw-r--r--include/linux/socket.h9
-rw-r--r--include/linux/spi/mmc_spi.h15
-rw-r--r--include/linux/swiotlb.h77
-rw-r--r--include/linux/syscalls.h29
-rw-r--r--include/linux/time32.h25
-rw-r--r--include/linux/timekeeping.h14
-rw-r--r--include/linux/timekeeping32.h15
-rw-r--r--include/linux/uio.h5
-rw-r--r--include/linux/writeback.h5
-rw-r--r--include/rdma/ib_fmr_pool.h2
-rw-r--r--include/rdma/ib_mad.h10
-rw-r--r--include/rdma/ib_verbs.h584
-rw-r--r--include/rdma/rdma_vt.h7
-rw-r--r--include/rdma/restrack.h17
-rw-r--r--include/rdma/uverbs_ioctl.h251
-rw-r--r--include/rdma/uverbs_named_ioctl.h13
-rw-r--r--include/rdma/uverbs_std_types.h73
-rw-r--r--include/scsi/scsi_cmnd.h6
-rw-r--r--include/scsi/scsi_dh.h2
-rw-r--r--include/scsi/scsi_driver.h3
-rw-r--r--include/scsi/scsi_host.h38
-rw-r--r--include/scsi/scsi_tcq.h14
-rw-r--r--include/scsi/srp.h26
-rw-r--r--include/target/target_core_base.h25
-rw-r--r--include/target/target_core_fabric.h25
-rw-r--r--include/trace/events/bcache.h27
-rw-r--r--include/trace/events/iscsi.h107
-rw-r--r--include/uapi/linux/aio_abi.h2
-rw-r--r--include/uapi/linux/mmc/ioctl.h5
-rw-r--r--include/uapi/rdma/hfi/hfi1_user.h6
-rw-r--r--include/uapi/rdma/hns-abi.h6
-rw-r--r--include/uapi/rdma/ib_user_ioctl_cmds.h84
-rw-r--r--include/uapi/rdma/ib_user_ioctl_verbs.h16
-rw-r--r--include/uapi/rdma/ib_user_verbs.h18
-rw-r--r--include/uapi/rdma/mlx5-abi.h2
-rw-r--r--include/uapi/rdma/mlx5_user_ioctl_cmds.h1
-rw-r--r--include/uapi/rdma/rdma_netlink.h3
-rw-r--r--init/do_mounts_initrd.c3
-rw-r--r--init/initramfs.c6
-rw-r--r--init/main.c12
-rw-r--r--kernel/Makefile3
-rw-r--r--kernel/cgroup/cgroup.c48
-rw-r--r--kernel/dma/Kconfig14
-rw-r--r--kernel/dma/Makefile5
-rw-r--r--kernel/dma/debug.c259
-rw-r--r--kernel/dma/direct.c222
-rw-r--r--kernel/dma/dummy.c39
-rw-r--r--kernel/dma/mapping.c223
-rw-r--r--kernel/dma/remap.c256
-rw-r--r--kernel/dma/swiotlb.c253
-rw-r--r--kernel/dma/virt.c2
-rw-r--r--kernel/futex.c207
-rw-r--r--kernel/futex_compat.c202
-rw-r--r--kernel/pid.c6
-rw-r--r--kernel/signal.c143
-rw-r--r--kernel/sys_ni.c2
-rw-r--r--kernel/time/ntp.c10
-rw-r--r--kernel/time/time.c36
-rw-r--r--kernel/time/timekeeping.c12
-rw-r--r--kernel/trace/blktrace.c4
-rw-r--r--lib/Kconfig2
-rw-r--r--lib/iov_iter.c19
-rw-r--r--lib/sbitmap.c170
-rw-r--r--lib/scatterlist.c2
-rw-r--r--mm/page_io.c9
-rw-r--r--net/compat.c34
-rw-r--r--net/core/datagram.c159
-rw-r--r--net/rds/ib.c4
-rw-r--r--net/socket.c62
-rw-r--r--net/sunrpc/xprtrdma/fmr_ops.c2
-rw-r--r--security/keys/encrypted-keys/encrypted.c29
-rw-r--r--security/keys/internal.h2
-rw-r--r--security/keys/process_keys.c1
-rw-r--r--tools/testing/nvdimm/Kbuild3
-rw-r--r--tools/testing/nvdimm/dimm_devs.c41
-rw-r--r--tools/testing/nvdimm/test/nfit.c321
1001 files changed, 34728 insertions, 27081 deletions
diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block
index dea212db9df3..7710d4022b19 100644
--- a/Documentation/ABI/testing/sysfs-block
+++ b/Documentation/ABI/testing/sysfs-block
@@ -244,7 +244,7 @@ Description:
What: /sys/block/<disk>/queue/zoned
Date: September 2016
-Contact: Damien Le Moal <damien.lemoal@hgst.com>
+Contact: Damien Le Moal <damien.lemoal@wdc.com>
Description:
zoned indicates if the device is a zoned block device
and the zone model of the device if it is indeed zoned.
@@ -259,6 +259,14 @@ Description:
zone commands, they will be treated as regular block
devices and zoned will report "none".
+What: /sys/block/<disk>/queue/nr_zones
+Date: November 2018
+Contact: Damien Le Moal <damien.lemoal@wdc.com>
+Description:
+ nr_zones indicates the total number of zones of a zoned block
+ device ("host-aware" or "host-managed" zone model). For regular
+ block devices, the value is always 0.
+
What: /sys/block/<disk>/queue/chunk_sectors
Date: September 2016
Contact: Hannes Reinecke <hare@suse.com>
@@ -268,6 +276,6 @@ Description:
indicates the size in 512B sectors of the RAID volume
stripe segment. For a zoned block device, either
host-aware or host-managed, chunk_sectors indicates the
- size of 512B sectors of the zones of the device, with
+ size in 512B sectors of the zones of the device, with
the eventual exception of the last zone of the device
which may be smaller.
diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt
index ac66ae2509a9..e133ccd60228 100644
--- a/Documentation/DMA-API.txt
+++ b/Documentation/DMA-API.txt
@@ -60,15 +60,6 @@ the returned memory, like GFP_DMA).
::
- void *
- dma_zalloc_coherent(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t flag)
-
-Wraps dma_alloc_coherent() and also zeroes the returned memory if the
-allocation attempt succeeded.
-
-::
-
void
dma_free_coherent(struct device *dev, size_t size, void *cpu_addr,
dma_addr_t dma_handle)
@@ -717,12 +708,15 @@ dma-api/num_errors The number in this file shows how many
dma-api/min_free_entries This read-only file can be read to get the
minimum number of free dma_debug_entries the
allocator has ever seen. If this value goes
- down to zero the code will disable itself
- because it is not longer reliable.
+ down to zero the code will attempt to increase
+ nr_total_entries to compensate.
dma-api/num_free_entries The current number of free dma_debug_entries
in the allocator.
+dma-api/nr_total_entries The total number of dma_debug_entries in the
+ allocator, both free and used.
+
dma-api/driver-filter You can write a name of a driver into this file
to limit the debug output to requests from that
particular driver. Write an empty string to
@@ -742,10 +736,15 @@ driver filter at boot time. The debug code will only print errors for that
driver afterwards. This filter can be disabled or changed later using debugfs.
When the code disables itself at runtime this is most likely because it ran
-out of dma_debug_entries. These entries are preallocated at boot. The number
-of preallocated entries is defined per architecture. If it is too low for you
-boot with 'dma_debug_entries=<your_desired_number>' to overwrite the
-architectural default.
+out of dma_debug_entries and was unable to allocate more on-demand. 65536
+entries are preallocated at boot - if this is too low for you boot with
+'dma_debug_entries=<your_desired_number>' to overwrite the default. Note
+that the code allocates entries in batches, so the exact number of
+preallocated entries may be greater than the actual number requested. The
+code will print to the kernel log each time it has dynamically allocated
+as many entries as were initially preallocated. This is to indicate that a
+larger preallocation size may be appropriate, or if it happens continually
+that a driver may be leaking mappings.
::
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index 476722b7b636..baf19bf28385 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1879,8 +1879,10 @@ following two functions.
wbc_init_bio(@wbc, @bio)
Should be called for each bio carrying writeback data and
- associates the bio with the inode's owner cgroup. Can be
- called anytime between bio allocation and submission.
+ associates the bio with the inode's owner cgroup and the
+ corresponding request queue. This must be called after
+ a queue (device) has been associated with the bio and
+ before submission.
wbc_account_io(@wbc, @page, @bytes)
Should be called for each data segment being written out.
@@ -1899,7 +1901,7 @@ the configuration, the bio may be executed at a lower priority and if
the writeback session is holding shared resources, e.g. a journal
entry, may lead to priority inversion. There is no one easy solution
for the problem. Filesystems can try to work around specific problem
-cases by skipping wbc_init_bio() or using bio_associate_blkcg()
+cases by skipping wbc_init_bio() and using bio_associate_blkg()
directly.
diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt
index 207eca58efaa..ac18b488cb5e 100644
--- a/Documentation/block/biodoc.txt
+++ b/Documentation/block/biodoc.txt
@@ -65,7 +65,6 @@ Description of Contents:
3.2.3 I/O completion
3.2.4 Implications for drivers that do not interpret bios (don't handle
multiple segments)
- 3.2.5 Request command tagging
3.3 I/O submission
4. The I/O scheduler
5. Scalability related changes
@@ -708,93 +707,6 @@ is crossed on completion of a transfer. (The end*request* functions should
be used if only if the request has come down from block/bio path, not for
direct access requests which only specify rq->buffer without a valid rq->bio)
-3.2.5 Generic request command tagging
-
-3.2.5.1 Tag helpers
-
-Block now offers some simple generic functionality to help support command
-queueing (typically known as tagged command queueing), ie manage more than
-one outstanding command on a queue at any given time.
-
- blk_queue_init_tags(struct request_queue *q, int depth)
-
- Initialize internal command tagging structures for a maximum
- depth of 'depth'.
-
- blk_queue_free_tags((struct request_queue *q)
-
- Teardown tag info associated with the queue. This will be done
- automatically by block if blk_queue_cleanup() is called on a queue
- that is using tagging.
-
-The above are initialization and exit management, the main helpers during
-normal operations are:
-
- blk_queue_start_tag(struct request_queue *q, struct request *rq)
-
- Start tagged operation for this request. A free tag number between
- 0 and 'depth' is assigned to the request (rq->tag holds this number),
- and 'rq' is added to the internal tag management. If the maximum depth
- for this queue is already achieved (or if the tag wasn't started for
- some other reason), 1 is returned. Otherwise 0 is returned.
-
- blk_queue_end_tag(struct request_queue *q, struct request *rq)
-
- End tagged operation on this request. 'rq' is removed from the internal
- book keeping structures.
-
-To minimize struct request and queue overhead, the tag helpers utilize some
-of the same request members that are used for normal request queue management.
-This means that a request cannot both be an active tag and be on the queue
-list at the same time. blk_queue_start_tag() will remove the request, but
-the driver must remember to call blk_queue_end_tag() before signalling
-completion of the request to the block layer. This means ending tag
-operations before calling end_that_request_last()! For an example of a user
-of these helpers, see the IDE tagged command queueing support.
-
-3.2.5.2 Tag info
-
-Some block functions exist to query current tag status or to go from a
-tag number to the associated request. These are, in no particular order:
-
- blk_queue_tagged(q)
-
- Returns 1 if the queue 'q' is using tagging, 0 if not.
-
- blk_queue_tag_request(q, tag)
-
- Returns a pointer to the request associated with tag 'tag'.
-
- blk_queue_tag_depth(q)
-
- Return current queue depth.
-
- blk_queue_tag_queue(q)
-
- Returns 1 if the queue can accept a new queued command, 0 if we are
- at the maximum depth already.
-
- blk_queue_rq_tagged(rq)
-
- Returns 1 if the request 'rq' is tagged.
-
-3.2.5.2 Internal structure
-
-Internally, block manages tags in the blk_queue_tag structure:
-
- struct blk_queue_tag {
- struct request **tag_index; /* array or pointers to rq */
- unsigned long *tag_map; /* bitmap of free tags */
- struct list_head busy_list; /* fifo list of busy tags */
- int busy; /* queue depth */
- int max_depth; /* max queue depth */
- };
-
-Most of the above is simple and straight forward, however busy_list may need
-a bit of explaining. Normally we don't care too much about request ordering,
-but in the event of any barrier requests in the tag queue we need to ensure
-that requests are restarted in the order they were queue.
-
3.3 I/O Submission
The routine submit_bio() is used to submit a single io. Higher level i/o
diff --git a/Documentation/block/cfq-iosched.txt b/Documentation/block/cfq-iosched.txt
deleted file mode 100644
index 895bd3813115..000000000000
--- a/Documentation/block/cfq-iosched.txt
+++ /dev/null
@@ -1,291 +0,0 @@
-CFQ (Complete Fairness Queueing)
-===============================
-
-The main aim of CFQ scheduler is to provide a fair allocation of the disk
-I/O bandwidth for all the processes which requests an I/O operation.
-
-CFQ maintains the per process queue for the processes which request I/O
-operation(synchronous requests). In case of asynchronous requests, all the
-requests from all the processes are batched together according to their
-process's I/O priority.
-
-CFQ ioscheduler tunables
-========================
-
-slice_idle
-----------
-This specifies how long CFQ should idle for next request on certain cfq queues
-(for sequential workloads) and service trees (for random workloads) before
-queue is expired and CFQ selects next queue to dispatch from.
-
-By default slice_idle is a non-zero value. That means by default we idle on
-queues/service trees. This can be very helpful on highly seeky media like
-single spindle SATA/SAS disks where we can cut down on overall number of
-seeks and see improved throughput.
-
-Setting slice_idle to 0 will remove all the idling on queues/service tree
-level and one should see an overall improved throughput on faster storage
-devices like multiple SATA/SAS disks in hardware RAID configuration. The down
-side is that isolation provided from WRITES also goes down and notion of
-IO priority becomes weaker.
-
-So depending on storage and workload, it might be useful to set slice_idle=0.
-In general I think for SATA/SAS disks and software RAID of SATA/SAS disks
-keeping slice_idle enabled should be useful. For any configurations where
-there are multiple spindles behind single LUN (Host based hardware RAID
-controller or for storage arrays), setting slice_idle=0 might end up in better
-throughput and acceptable latencies.
-
-back_seek_max
--------------
-This specifies, given in Kbytes, the maximum "distance" for backward seeking.
-The distance is the amount of space from the current head location to the
-sectors that are backward in terms of distance.
-
-This parameter allows the scheduler to anticipate requests in the "backward"
-direction and consider them as being the "next" if they are within this
-distance from the current head location.
-
-back_seek_penalty
------------------
-This parameter is used to compute the cost of backward seeking. If the
-backward distance of request is just 1/back_seek_penalty from a "front"
-request, then the seeking cost of two requests is considered equivalent.
-
-So scheduler will not bias toward one or the other request (otherwise scheduler
-will bias toward front request). Default value of back_seek_penalty is 2.
-
-fifo_expire_async
------------------
-This parameter is used to set the timeout of asynchronous requests. Default
-value of this is 248ms.
-
-fifo_expire_sync
-----------------
-This parameter is used to set the timeout of synchronous requests. Default
-value of this is 124ms. In case to favor synchronous requests over asynchronous
-one, this value should be decreased relative to fifo_expire_async.
-
-group_idle
------------
-This parameter forces idling at the CFQ group level instead of CFQ
-queue level. This was introduced after a bottleneck was observed
-in higher end storage due to idle on sequential queue and allow dispatch
-from a single queue. The idea with this parameter is that it can be run with
-slice_idle=0 and group_idle=8, so that idling does not happen on individual
-queues in the group but happens overall on the group and thus still keeps the
-IO controller working.
-Not idling on individual queues in the group will dispatch requests from
-multiple queues in the group at the same time and achieve higher throughput
-on higher end storage.
-
-Default value for this parameter is 8ms.
-
-low_latency
------------
-This parameter is used to enable/disable the low latency mode of the CFQ
-scheduler. If enabled, CFQ tries to recompute the slice time for each process
-based on the target_latency set for the system. This favors fairness over
-throughput. Disabling low latency (setting it to 0) ignores target latency,
-allowing each process in the system to get a full time slice.
-
-By default low latency mode is enabled.
-
-target_latency
---------------
-This parameter is used to calculate the time slice for a process if cfq's
-latency mode is enabled. It will ensure that sync requests have an estimated
-latency. But if sequential workload is higher(e.g. sequential read),
-then to meet the latency constraints, throughput may decrease because of less
-time for each process to issue I/O request before the cfq queue is switched.
-
-Though this can be overcome by disabling the latency_mode, it may increase
-the read latency for some applications. This parameter allows for changing
-target_latency through the sysfs interface which can provide the balanced
-throughput and read latency.
-
-Default value for target_latency is 300ms.
-
-slice_async
------------
-This parameter is same as of slice_sync but for asynchronous queue. The
-default value is 40ms.
-
-slice_async_rq
---------------
-This parameter is used to limit the dispatching of asynchronous request to
-device request queue in queue's slice time. The maximum number of request that
-are allowed to be dispatched also depends upon the io priority. Default value
-for this is 2.
-
-slice_sync
-----------
-When a queue is selected for execution, the queues IO requests are only
-executed for a certain amount of time(time_slice) before switching to another
-queue. This parameter is used to calculate the time slice of synchronous
-queue.
-
-time_slice is computed using the below equation:-
-time_slice = slice_sync + (slice_sync/5 * (4 - prio)). To increase the
-time_slice of synchronous queue, increase the value of slice_sync. Default
-value is 100ms.
-
-quantum
--------
-This specifies the number of request dispatched to the device queue. In a
-queue's time slice, a request will not be dispatched if the number of request
-in the device exceeds this parameter. This parameter is used for synchronous
-request.
-
-In case of storage with several disk, this setting can limit the parallel
-processing of request. Therefore, increasing the value can improve the
-performance although this can cause the latency of some I/O to increase due
-to more number of requests.
-
-CFQ Group scheduling
-====================
-
-CFQ supports blkio cgroup and has "blkio." prefixed files in each
-blkio cgroup directory. It is weight-based and there are four knobs
-for configuration - weight[_device] and leaf_weight[_device].
-Internal cgroup nodes (the ones with children) can also have tasks in
-them, so the former two configure how much proportion the cgroup as a
-whole is entitled to at its parent's level while the latter two
-configure how much proportion the tasks in the cgroup have compared to
-its direct children.
-
-Another way to think about it is assuming that each internal node has
-an implicit leaf child node which hosts all the tasks whose weight is
-configured by leaf_weight[_device]. Let's assume a blkio hierarchy
-composed of five cgroups - root, A, B, AA and AB - with the following
-weights where the names represent the hierarchy.
-
- weight leaf_weight
- root : 125 125
- A : 500 750
- B : 250 500
- AA : 500 500
- AB : 1000 500
-
-root never has a parent making its weight is meaningless. For backward
-compatibility, weight is always kept in sync with leaf_weight. B, AA
-and AB have no child and thus its tasks have no children cgroup to
-compete with. They always get 100% of what the cgroup won at the
-parent level. Considering only the weights which matter, the hierarchy
-looks like the following.
-
- root
- / | \
- A B leaf
- 500 250 125
- / | \
- AA AB leaf
- 500 1000 750
-
-If all cgroups have active IOs and competing with each other, disk
-time will be distributed like the following.
-
-Distribution below root. The total active weight at this level is
-A:500 + B:250 + C:125 = 875.
-
- root-leaf : 125 / 875 =~ 14%
- A : 500 / 875 =~ 57%
- B(-leaf) : 250 / 875 =~ 28%
-
-A has children and further distributes its 57% among the children and
-the implicit leaf node. The total active weight at this level is
-AA:500 + AB:1000 + A-leaf:750 = 2250.
-
- A-leaf : ( 750 / 2250) * A =~ 19%
- AA(-leaf) : ( 500 / 2250) * A =~ 12%
- AB(-leaf) : (1000 / 2250) * A =~ 25%
-
-CFQ IOPS Mode for group scheduling
-===================================
-Basic CFQ design is to provide priority based time slices. Higher priority
-process gets bigger time slice and lower priority process gets smaller time
-slice. Measuring time becomes harder if storage is fast and supports NCQ and
-it would be better to dispatch multiple requests from multiple cfq queues in
-request queue at a time. In such scenario, it is not possible to measure time
-consumed by single queue accurately.
-
-What is possible though is to measure number of requests dispatched from a
-single queue and also allow dispatch from multiple cfq queue at the same time.
-This effectively becomes the fairness in terms of IOPS (IO operations per
-second).
-
-If one sets slice_idle=0 and if storage supports NCQ, CFQ internally switches
-to IOPS mode and starts providing fairness in terms of number of requests
-dispatched. Note that this mode switching takes effect only for group
-scheduling. For non-cgroup users nothing should change.
-
-CFQ IO scheduler Idling Theory
-===============================
-Idling on a queue is primarily about waiting for the next request to come
-on same queue after completion of a request. In this process CFQ will not
-dispatch requests from other cfq queues even if requests are pending there.
-
-The rationale behind idling is that it can cut down on number of seeks
-on rotational media. For example, if a process is doing dependent
-sequential reads (next read will come on only after completion of previous
-one), then not dispatching request from other queue should help as we
-did not move the disk head and kept on dispatching sequential IO from
-one queue.
-
-CFQ has following service trees and various queues are put on these trees.
-
- sync-idle sync-noidle async
-
-All cfq queues doing synchronous sequential IO go on to sync-idle tree.
-On this tree we idle on each queue individually.
-
-All synchronous non-sequential queues go on sync-noidle tree. Also any
-synchronous write request which is not marked with REQ_IDLE goes on this
-service tree. On this tree we do not idle on individual queues instead idle
-on the whole group of queues or the tree. So if there are 4 queues waiting
-for IO to dispatch we will idle only once last queue has dispatched the IO
-and there is no more IO on this service tree.
-
-All async writes go on async service tree. There is no idling on async
-queues.
-
-CFQ has some optimizations for SSDs and if it detects a non-rotational
-media which can support higher queue depth (multiple requests at in
-flight at a time), then it cuts down on idling of individual queues and
-all the queues move to sync-noidle tree and only tree idle remains. This
-tree idling provides isolation with buffered write queues on async tree.
-
-FAQ
-===
-Q1. Why to idle at all on queues not marked with REQ_IDLE.
-
-A1. We only do tree idle (all queues on sync-noidle tree) on queues not marked
- with REQ_IDLE. This helps in providing isolation with all the sync-idle
- queues. Otherwise in presence of many sequential readers, other
- synchronous IO might not get fair share of disk.
-
- For example, if there are 10 sequential readers doing IO and they get
- 100ms each. If a !REQ_IDLE request comes in, it will be scheduled
- roughly after 1 second. If after completion of !REQ_IDLE request we
- do not idle, and after a couple of milli seconds a another !REQ_IDLE
- request comes in, again it will be scheduled after 1second. Repeat it
- and notice how a workload can lose its disk share and suffer due to
- multiple sequential readers.
-
- fsync can generate dependent IO where bunch of data is written in the
- context of fsync, and later some journaling data is written. Journaling
- data comes in only after fsync has finished its IO (atleast for ext4
- that seemed to be the case). Now if one decides not to idle on fsync
- thread due to !REQ_IDLE, then next journaling write will not get
- scheduled for another second. A process doing small fsync, will suffer
- badly in presence of multiple sequential readers.
-
- Hence doing tree idling on threads using !REQ_IDLE flag on requests
- provides isolation from multiple sequential readers and at the same
- time we do not idle on individual threads.
-
-Q2. When to specify REQ_IDLE
-A2. I would think whenever one is doing synchronous write and expecting
- more writes to be dispatched from same context soon, should be able
- to specify REQ_IDLE on writes and that probably should work well for
- most of the cases.
diff --git a/Documentation/block/queue-sysfs.txt b/Documentation/block/queue-sysfs.txt
index 2c1e67058fd3..39e286d7afc9 100644
--- a/Documentation/block/queue-sysfs.txt
+++ b/Documentation/block/queue-sysfs.txt
@@ -64,7 +64,7 @@ guess, the kernel will put the process issuing IO to sleep for an amount
of time, before entering a classic poll loop. This mode might be a
little slower than pure classic polling, but it will be more efficient.
If set to a value larger than 0, the kernel will put the process issuing
-IO to sleep for this amont of microseconds before entering classic
+IO to sleep for this amount of microseconds before entering classic
polling.
iostats (RW)
@@ -194,4 +194,31 @@ blk-throttle makes decision based on the samplings. Lower time means cgroups
have more smooth throughput, but higher CPU overhead. This exists only when
CONFIG_BLK_DEV_THROTTLING_LOW is enabled.
+zoned (RO)
+----------
+This indicates if the device is a zoned block device and the zone model of the
+device if it is indeed zoned. The possible values indicated by zoned are
+"none" for regular block devices and "host-aware" or "host-managed" for zoned
+block devices. The characteristics of host-aware and host-managed zoned block
+devices are described in the ZBC (Zoned Block Commands) and ZAC
+(Zoned Device ATA Command Set) standards. These standards also define the
+"drive-managed" zone model. However, since drive-managed zoned block devices
+do not support zone commands, they will be treated as regular block devices
+and zoned will report "none".
+
+nr_zones (RO)
+-------------
+For zoned block devices (zoned attribute indicating "host-managed" or
+"host-aware"), this indicates the total number of zones of the device.
+This is always 0 for regular block devices.
+
+chunk_sectors (RO)
+------------------
+This has different meaning depending on the type of the block device.
+For a RAID device (dm-raid), chunk_sectors indicates the size in 512B sectors
+of the RAID volume stripe segment. For a zoned block device, either host-aware
+or host-managed, chunk_sectors indicates the size in 512B sectors of the zones
+of the device, with the eventual exception of the last zone of the device which
+may be smaller.
+
Jens Axboe <jens.axboe@oracle.com>, February 2009
diff --git a/Documentation/devicetree/bindings/mmc/arasan,sdhci.txt b/Documentation/devicetree/bindings/mmc/arasan,sdhci.txt
index e2effe17f05e..1edbb049cccb 100644
--- a/Documentation/devicetree/bindings/mmc/arasan,sdhci.txt
+++ b/Documentation/devicetree/bindings/mmc/arasan,sdhci.txt
@@ -16,6 +16,10 @@ Required Properties:
- "rockchip,rk3399-sdhci-5.1", "arasan,sdhci-5.1": rk3399 eMMC PHY
For this device it is strongly suggested to include arasan,soc-ctl-syscon.
- "ti,am654-sdhci-5.1", "arasan,sdhci-5.1": TI AM654 MMC PHY
+ Note: This binding has been deprecated and moved to [5].
+
+ [5] Documentation/devicetree/bindings/mmc/sdhci-am654.txt
+
- reg: From mmc bindings: Register location and length.
- clocks: From clock bindings: Handles to clock inputs.
- clock-names: From clock bindings: Tuple including "clk_xin" and "clk_ahb"
diff --git a/Documentation/devicetree/bindings/mmc/fsl-imx-esdhc.txt b/Documentation/devicetree/bindings/mmc/fsl-imx-esdhc.txt
index 3e29050ec769..9201a7d8d7b0 100644
--- a/Documentation/devicetree/bindings/mmc/fsl-imx-esdhc.txt
+++ b/Documentation/devicetree/bindings/mmc/fsl-imx-esdhc.txt
@@ -16,6 +16,7 @@ Required properties:
"fsl,imx6sl-usdhc"
"fsl,imx6sx-usdhc"
"fsl,imx7d-usdhc"
+ "fsl,imx8qxp-usdhc"
Optional properties:
- fsl,wp-controller : Indicate to use controller internal write protection
diff --git a/Documentation/devicetree/bindings/mmc/sdhci-am654.txt b/Documentation/devicetree/bindings/mmc/sdhci-am654.txt
new file mode 100644
index 000000000000..15dbbbace27e
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/sdhci-am654.txt
@@ -0,0 +1,36 @@
+Device Tree Bindings for the SDHCI Controllers present on TI's AM654 SOCs
+
+The bindings follow the mmc[1], clock[2] and interrupt[3] bindings.
+Only deviations are documented here.
+
+ [1] Documentation/devicetree/bindings/mmc/mmc.txt
+ [2] Documentation/devicetree/bindings/clock/clock-bindings.txt
+ [3] Documentation/devicetree/bindings/interrupt-controller/interrupts.txt
+
+Required Properties:
+ - compatible: should be "ti,am654-sdhci-5.1"
+ - reg: Must be two entries.
+ - The first should be the sdhci register space
+ - The second should the subsystem/phy register space
+ - clocks: Handles to the clock inputs.
+ - clock-names: Tuple including "clk_xin" and "clk_ahb"
+ - interrupts: Interrupt specifiers
+ - ti,otap-del-sel: Output Tap Delay select
+ - ti,trm-icp: DLL trim select
+ - ti,driver-strength-ohm: driver strength in ohms.
+ Valid values are 33, 40, 50, 66 and 100 ohms.
+
+Example:
+
+ sdhci0: sdhci@4f80000 {
+ compatible = "ti,am654-sdhci-5.1";
+ reg = <0x0 0x4f80000 0x0 0x260>, <0x0 0x4f90000 0x0 0x134>;
+ power-domains = <&k3_pds 47>;
+ clocks = <&k3_clks 47 0>, <&k3_clks 47 1>;
+ clock-names = "clk_ahb", "clk_xin";
+ interrupts = <GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>;
+ sdhci-caps-mask = <0x80000007 0x0>;
+ mmc-ddr-1_8v;
+ ti,otap-del-sel = <0x2>;
+ ti,trm-icp = <0x8>;
+ };
diff --git a/Documentation/devicetree/bindings/mmc/sdhci-msm.txt b/Documentation/devicetree/bindings/mmc/sdhci-msm.txt
index 502b3b851ebb..da4edb146a98 100644
--- a/Documentation/devicetree/bindings/mmc/sdhci-msm.txt
+++ b/Documentation/devicetree/bindings/mmc/sdhci-msm.txt
@@ -4,15 +4,28 @@ This file documents differences between the core properties in mmc.txt
and the properties used by the sdhci-msm driver.
Required properties:
-- compatible: Should contain:
+- compatible: Should contain a SoC-specific string and a IP version string:
+ version strings:
"qcom,sdhci-msm-v4" for sdcc versions less than 5.0
- "qcom,sdhci-msm-v5" for sdcc versions >= 5.0
+ "qcom,sdhci-msm-v5" for sdcc version 5.0
For SDCC version 5.0.0, MCI registers are removed from SDCC
interface and some registers are moved to HC. New compatible
string is added to support this change - "qcom,sdhci-msm-v5".
+ full compatible strings with SoC and version:
+ "qcom,apq8084-sdhci", "qcom,sdhci-msm-v4"
+ "qcom,msm8974-sdhci", "qcom,sdhci-msm-v4"
+ "qcom,msm8916-sdhci", "qcom,sdhci-msm-v4"
+ "qcom,msm8992-sdhci", "qcom,sdhci-msm-v4"
+ "qcom,msm8996-sdhci", "qcom,sdhci-msm-v4"
+ "qcom,sdm845-sdhci", "qcom,sdhci-msm-v5"
+ "qcom,qcs404-sdhci", "qcom,sdhci-msm-v5"
+ NOTE that some old device tree files may be floating around that only
+ have the string "qcom,sdhci-msm-v4" without the SoC compatible string
+ but doing that should be considered a deprecated practice.
+
- reg: Base address and length of the register in the following order:
- Host controller register map (required)
- - SD Core register map (required)
+ - SD Core register map (required for msm-v4 and below)
- interrupts: Should contain an interrupt-specifiers for the interrupts:
- Host controller interrupt (required)
- pinctrl-names: Should contain only one value - "default".
@@ -29,7 +42,7 @@ Required properties:
Example:
sdhc_1: sdhci@f9824900 {
- compatible = "qcom,sdhci-msm-v4";
+ compatible = "qcom,msm8974-sdhci", "qcom,sdhci-msm-v4";
reg = <0xf9824900 0x11c>, <0xf9824000 0x800>;
interrupts = <0 123 0>;
bus-width = <8>;
@@ -46,7 +59,7 @@ Example:
};
sdhc_2: sdhci@f98a4900 {
- compatible = "qcom,sdhci-msm-v4";
+ compatible = "qcom,msm8974-sdhci", "qcom,sdhci-msm-v4";
reg = <0xf98a4900 0x11c>, <0xf98a4000 0x800>;
interrupts = <0 125 0>;
bus-width = <4>;
diff --git a/Documentation/devicetree/bindings/mmc/sdhci-omap.txt b/Documentation/devicetree/bindings/mmc/sdhci-omap.txt
index 393848c2138e..72c4dec7e1db 100644
--- a/Documentation/devicetree/bindings/mmc/sdhci-omap.txt
+++ b/Documentation/devicetree/bindings/mmc/sdhci-omap.txt
@@ -2,6 +2,8 @@
Refer to mmc.txt for standard MMC bindings.
+For UHS devices which require tuning, the device tree should have a "cpu_thermal" node which maps to the appropriate thermal zone. This is used to get the temperature of the zone during tuning.
+
Required properties:
- compatible: Should be "ti,dra7-sdhci" for DRA7 and DRA72 controllers
Should be "ti,k2g-sdhci" for K2G
diff --git a/Documentation/devicetree/bindings/mmc/tmio_mmc.txt b/Documentation/devicetree/bindings/mmc/tmio_mmc.txt
index 27f2eab2981d..2b4f17ca9087 100644
--- a/Documentation/devicetree/bindings/mmc/tmio_mmc.txt
+++ b/Documentation/devicetree/bindings/mmc/tmio_mmc.txt
@@ -13,12 +13,14 @@ Required properties:
- compatible: should contain one or more of the following:
"renesas,sdhi-sh73a0" - SDHI IP on SH73A0 SoC
"renesas,sdhi-r7s72100" - SDHI IP on R7S72100 SoC
+ "renesas,sdhi-r7s9210" - SDHI IP on R7S9210 SoC
"renesas,sdhi-r8a73a4" - SDHI IP on R8A73A4 SoC
"renesas,sdhi-r8a7740" - SDHI IP on R8A7740 SoC
"renesas,sdhi-r8a7743" - SDHI IP on R8A7743 SoC
"renesas,sdhi-r8a7744" - SDHI IP on R8A7744 SoC
"renesas,sdhi-r8a7745" - SDHI IP on R8A7745 SoC
"renesas,sdhi-r8a774a1" - SDHI IP on R8A774A1 SoC
+ "renesas,sdhi-r8a774c0" - SDHI IP on R8A774C0 SoC
"renesas,sdhi-r8a77470" - SDHI IP on R8A77470 SoC
"renesas,sdhi-mmc-r8a77470" - SDHI/MMC IP on R8A77470 SoC
"renesas,sdhi-r8a7778" - SDHI IP on R8A7778 SoC
@@ -56,7 +58,7 @@ Required properties:
"core" and "cd". If the controller only has 1 clock, naming is not
required.
Devices which have more than 1 clock are listed below:
- 2: R7S72100
+ 2: R7S72100, R7S9210
Optional properties:
- pinctrl-names: should be "default", "state_uhs"
diff --git a/Documentation/devicetree/bindings/ufs/cdns,ufshc.txt b/Documentation/devicetree/bindings/ufs/cdns,ufshc.txt
new file mode 100644
index 000000000000..a04a4989ec7f
--- /dev/null
+++ b/Documentation/devicetree/bindings/ufs/cdns,ufshc.txt
@@ -0,0 +1,31 @@
+* Cadence Universal Flash Storage (UFS) Controller
+
+UFS nodes are defined to describe on-chip UFS host controllers.
+Each UFS controller instance should have its own node.
+Please see the ufshcd-pltfrm.txt for a list of all available properties.
+
+Required properties:
+- compatible : Compatible list, contains the following controller:
+ "cdns,ufshc"
+ complemented with the JEDEC version:
+ "jedec,ufs-2.0"
+
+- reg : Address and length of the UFS register set.
+- interrupts : One interrupt mapping.
+- freq-table-hz : Clock frequency table.
+ See the ufshcd-pltfrm.txt for details.
+- clocks : List of phandle and clock specifier pairs.
+- clock-names : List of clock input name strings sorted in the same
+ order as the clocks property. "core_clk" is mandatory.
+ Depending on a type of a PHY,
+ the "phy_clk" clock can also be added, if needed.
+
+Example:
+ ufs@fd030000 {
+ compatible = "cdns,ufshc", "jedec,ufs-2.0";
+ reg = <0xfd030000 0x10000>;
+ interrupts = <0 1 IRQ_TYPE_LEVEL_HIGH>;
+ freq-table-hz = <0 0>, <0 0>;
+ clocks = <&ufs_core_clk>, <&ufs_phy_clk>;
+ clock-names = "core_clk", "phy_clk";
+ };
diff --git a/Documentation/devicetree/bindings/ufs/ufshcd-pltfrm.txt b/Documentation/devicetree/bindings/ufs/ufshcd-pltfrm.txt
index 2df00524bd21..8cf59452c675 100644
--- a/Documentation/devicetree/bindings/ufs/ufshcd-pltfrm.txt
+++ b/Documentation/devicetree/bindings/ufs/ufshcd-pltfrm.txt
@@ -33,6 +33,12 @@ Optional properties:
- clocks : List of phandle and clock specifier pairs
- clock-names : List of clock input name strings sorted in the same
order as the clocks property.
+ "ref_clk" indicates reference clock frequency.
+ UFS host supplies reference clock to UFS device and UFS device
+ specification allows host to provide one of the 4 frequencies (19.2 MHz,
+ 26 MHz, 38.4 MHz, 52MHz) for reference clock. This "ref_clk" entry is
+ parsed and used to update the reference clock setting in device.
+ Defaults to 26 MHz(as per specification) if not specified by host.
- freq-table-hz : Array of <min max> operating frequencies stored in the same
order as the clocks property. If this property is not
defined or a value in the array is "0" then it is assumed
diff --git a/Documentation/features/io/sg-chain/arch-support.txt b/Documentation/features/io/sg-chain/arch-support.txt
deleted file mode 100644
index 6554f0372c3f..000000000000
--- a/Documentation/features/io/sg-chain/arch-support.txt
+++ /dev/null
@@ -1,33 +0,0 @@
-#
-# Feature name: sg-chain
-# Kconfig: ARCH_HAS_SG_CHAIN
-# description: arch supports chained scatter-gather lists
-#
- -----------------------
- | arch |status|
- -----------------------
- | alpha: | TODO |
- | arc: | ok |
- | arm: | ok |
- | arm64: | ok |
- | c6x: | TODO |
- | h8300: | TODO |
- | hexagon: | TODO |
- | ia64: | ok |
- | m68k: | TODO |
- | microblaze: | TODO |
- | mips: | TODO |
- | nds32: | TODO |
- | nios2: | TODO |
- | openrisc: | TODO |
- | parisc: | TODO |
- | powerpc: | ok |
- | riscv: | TODO |
- | s390: | ok |
- | sh: | TODO |
- | sparc: | ok |
- | um: | TODO |
- | unicore32: | TODO |
- | x86: | ok |
- | xtensa: | TODO |
- -----------------------
diff --git a/Documentation/nvdimm/security.txt b/Documentation/nvdimm/security.txt
new file mode 100644
index 000000000000..4c36c05ca98e
--- /dev/null
+++ b/Documentation/nvdimm/security.txt
@@ -0,0 +1,141 @@
+NVDIMM SECURITY
+===============
+
+1. Introduction
+---------------
+
+With the introduction of Intel Device Specific Methods (DSM) v1.8
+specification [1], security DSMs are introduced. The spec added the following
+security DSMs: "get security state", "set passphrase", "disable passphrase",
+"unlock unit", "freeze lock", "secure erase", and "overwrite". A security_ops
+data structure has been added to struct dimm in order to support the security
+operations and generic APIs are exposed to allow vendor neutral operations.
+
+2. Sysfs Interface
+------------------
+The "security" sysfs attribute is provided in the nvdimm sysfs directory. For
+example:
+/sys/devices/LNXSYSTM:00/LNXSYBUS:00/ACPI0012:00/ndbus0/nmem0/security
+
+The "show" attribute of that attribute will display the security state for
+that DIMM. The following states are available: disabled, unlocked, locked,
+frozen, and overwrite. If security is not supported, the sysfs attribute
+will not be visible.
+
+The "store" attribute takes several commands when it is being written to
+in order to support some of the security functionalities:
+update <old_keyid> <new_keyid> - enable or update passphrase.
+disable <keyid> - disable enabled security and remove key.
+freeze - freeze changing of security states.
+erase <keyid> - delete existing user encryption key.
+overwrite <keyid> - wipe the entire nvdimm.
+master_update <keyid> <new_keyid> - enable or update master passphrase.
+master_erase <keyid> - delete existing user encryption key.
+
+3. Key Management
+-----------------
+
+The key is associated to the payload by the DIMM id. For example:
+# cat /sys/devices/LNXSYSTM:00/LNXSYBUS:00/ACPI0012:00/ndbus0/nmem0/nfit/id
+8089-a2-1740-00000133
+The DIMM id would be provided along with the key payload (passphrase) to
+the kernel.
+
+The security keys are managed on the basis of a single key per DIMM. The
+key "passphrase" is expected to be 32bytes long. This is similar to the ATA
+security specification [2]. A key is initially acquired via the request_key()
+kernel API call during nvdimm unlock. It is up to the user to make sure that
+all the keys are in the kernel user keyring for unlock.
+
+A nvdimm encrypted-key of format enc32 has the description format of:
+nvdimm:<bus-provider-specific-unique-id>
+
+See file ``Documentation/security/keys/trusted-encrypted.rst`` for creating
+encrypted-keys of enc32 format. TPM usage with a master trusted key is
+preferred for sealing the encrypted-keys.
+
+4. Unlocking
+------------
+When the DIMMs are being enumerated by the kernel, the kernel will attempt to
+retrieve the key from the kernel user keyring. This is the only time
+a locked DIMM can be unlocked. Once unlocked, the DIMM will remain unlocked
+until reboot. Typically an entity (i.e. shell script) will inject all the
+relevant encrypted-keys into the kernel user keyring during the initramfs phase.
+This provides the unlock function access to all the related keys that contain
+the passphrase for the respective nvdimms. It is also recommended that the
+keys are injected before libnvdimm is loaded by modprobe.
+
+5. Update
+---------
+When doing an update, it is expected that the existing key is removed from
+the kernel user keyring and reinjected as different (old) key. It's irrelevant
+what the key description is for the old key since we are only interested in the
+keyid when doing the update operation. It is also expected that the new key
+is injected with the description format described from earlier in this
+document. The update command written to the sysfs attribute will be with
+the format:
+update <old keyid> <new keyid>
+
+If there is no old keyid due to a security enabling, then a 0 should be
+passed in.
+
+6. Freeze
+---------
+The freeze operation does not require any keys. The security config can be
+frozen by a user with root privelege.
+
+7. Disable
+----------
+The security disable command format is:
+disable <keyid>
+
+An key with the current passphrase payload that is tied to the nvdimm should be
+in the kernel user keyring.
+
+8. Secure Erase
+---------------
+The command format for doing a secure erase is:
+erase <keyid>
+
+An key with the current passphrase payload that is tied to the nvdimm should be
+in the kernel user keyring.
+
+9. Overwrite
+------------
+The command format for doing an overwrite is:
+overwrite <keyid>
+
+Overwrite can be done without a key if security is not enabled. A key serial
+of 0 can be passed in to indicate no key.
+
+The sysfs attribute "security" can be polled to wait on overwrite completion.
+Overwrite can last tens of minutes or more depending on nvdimm size.
+
+An encrypted-key with the current user passphrase that is tied to the nvdimm
+should be injected and its keyid should be passed in via sysfs.
+
+10. Master Update
+-----------------
+The command format for doing a master update is:
+update <old keyid> <new keyid>
+
+The operating mechanism for master update is identical to update except the
+master passphrase key is passed to the kernel. The master passphrase key
+is just another encrypted-key.
+
+This command is only available when security is disabled.
+
+11. Master Erase
+----------------
+The command format for doing a master erase is:
+master_erase <current keyid>
+
+This command has the same operating mechanism as erase except the master
+passphrase key is passed to the kernel. The master passphrase key is just
+another encrypted-key.
+
+This command is only available when the master security is enabled, indicated
+by the extended security status.
+
+[1]: http://pmem.io/documents/NVDIMM_DSM_Interface-V1.8.pdf
+[2]: http://www.t13.org/documents/UploadedDocuments/docs2006/e05179r4-ACS-SecurityClarifications.pdf
diff --git a/Documentation/scsi/scsi-parameters.txt b/Documentation/scsi/scsi-parameters.txt
index 92999d4e0cb8..25a4b4cf04a6 100644
--- a/Documentation/scsi/scsi-parameters.txt
+++ b/Documentation/scsi/scsi-parameters.txt
@@ -97,11 +97,6 @@ parameters may be changed at runtime by the command
allowing boot to proceed. none ignores them, expecting
user space to do the scan.
- scsi_mod.use_blk_mq=
- [SCSI] use blk-mq I/O path by default
- See SCSI_MQ_DEFAULT in drivers/scsi/Kconfig.
- Format: <y/n>
-
sim710= [SCSI,HW]
See header of drivers/scsi/sim710.c.
diff --git a/Documentation/scsi/scsi_mid_low_api.txt b/Documentation/scsi/scsi_mid_low_api.txt
index 177c031763c0..c1dd4939f4ae 100644
--- a/Documentation/scsi/scsi_mid_low_api.txt
+++ b/Documentation/scsi/scsi_mid_low_api.txt
@@ -1098,8 +1098,6 @@ of interest:
unchecked_isa_dma - 1=>only use bottom 16 MB of ram (ISA DMA addressing
restriction), 0=>can use full 32 bit (or better) DMA
address space
- use_clustering - 1=>SCSI commands in mid level's queue can be merged,
- 0=>disallow SCSI command merging
no_async_abort - 1=>Asynchronous aborts are not supported
0=>Timed-out commands will be aborted asynchronously
hostt - pointer to driver's struct scsi_host_template from which
diff --git a/Documentation/security/keys/trusted-encrypted.rst b/Documentation/security/keys/trusted-encrypted.rst
index 3bb24e09a332..e8a1c35cd277 100644
--- a/Documentation/security/keys/trusted-encrypted.rst
+++ b/Documentation/security/keys/trusted-encrypted.rst
@@ -76,7 +76,7 @@ Usage::
Where::
- format:= 'default | ecryptfs'
+ format:= 'default | ecryptfs | enc32'
key-type:= 'trusted' | 'user'
@@ -173,3 +173,7 @@ are anticipated. In particular the new format 'ecryptfs' has been defined in
in order to use encrypted keys to mount an eCryptfs filesystem. More details
about the usage can be found in the file
``Documentation/security/keys/ecryptfs.rst``.
+
+Another new format 'enc32' has been defined in order to support encrypted keys
+with payload size of 32 bytes. This will initially be used for nvdimm security
+but may expand to other usages that require 32 bytes payload.
diff --git a/Documentation/sh/new-machine.txt b/Documentation/sh/new-machine.txt
index f0354164cb0e..e0961a66130b 100644
--- a/Documentation/sh/new-machine.txt
+++ b/Documentation/sh/new-machine.txt
@@ -116,7 +116,6 @@ might look something like:
* arch/sh/boards/vapor/setup.c - Setup code for imaginary board
*/
#include <linux/init.h>
-#include <asm/rtc.h> /* for board_time_init() */
const char *get_system_type(void)
{
@@ -132,13 +131,6 @@ int __init platform_setup(void)
* this board.
*/
- /*
- * Presume all FooTech boards have the same broken timer,
- * and also presume that we've defined foo_timer_init to
- * do something useful.
- */
- board_time_init = foo_timer_init;
-
/* Start-up imaginary PCI ... */
/* And whatever else ... */
diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt
index ad6d2a80cf05..abc53886655e 100644
--- a/Documentation/x86/x86_64/boot-options.txt
+++ b/Documentation/x86/x86_64/boot-options.txt
@@ -209,7 +209,7 @@ IOMMU (input/output memory management unit)
mapping with memory protection, etc.
Kernel boot message: "PCI-DMA: Using Calgary IOMMU"
- iommu=[<size>][,noagp][,off][,force][,noforce][,leak[=<nr_of_leak_pages>]
+ iommu=[<size>][,noagp][,off][,force][,noforce]
[,memaper[=<order>]][,merge][,fullflush][,nomerge]
[,noaperture][,calgary]
@@ -228,9 +228,6 @@ IOMMU (input/output memory management unit)
allowed Overwrite iommu off workarounds for specific chipsets.
fullflush Flush IOMMU on each allocation (default).
nofullflush Don't use IOMMU fullflush.
- leak Turn on simple iommu leak tracing (only when
- CONFIG_IOMMU_LEAK is on). Default number of leak pages
- is 20.
memaper[=<order>] Allocate an own aperture over RAM with size 32MB<<order.
(default: order=1, i.e. 64MB)
merge Do scatter-gather (SG) merging. Implies "force"
diff --git a/MAINTAINERS b/MAINTAINERS
index 7a9804a891fd..784c78a4ae7e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3688,6 +3688,8 @@ F: drivers/net/ethernet/cisco/enic/
CISCO VIC LOW LATENCY NIC DRIVER
M: Christian Benvenuti <benve@cisco.com>
+M: Nelson Escobar <neescoba@cisco.com>
+M: Parvi Kaustubhi <pkaustub@cisco.com>
S: Supported
F: drivers/infiniband/hw/usnic/
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 5b4f88363453..5da6ff54b3e7 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -5,6 +5,7 @@ config ALPHA
select ARCH_MIGHT_HAVE_PC_PARPORT
select ARCH_MIGHT_HAVE_PC_SERIO
select ARCH_NO_PREEMPT
+ select ARCH_NO_SG_CHAIN
select ARCH_USE_CMPXCHG_LOCKREF
select HAVE_AOUT
select HAVE_IDE
@@ -202,7 +203,6 @@ config ALPHA_EIGER
config ALPHA_JENSEN
bool "Jensen"
depends on BROKEN
- select DMA_DIRECT_OPS
help
DEC PC 150 AXP (aka Jensen): This is a very old Digital system - one
of the first-generation Alpha systems. A number of these systems
diff --git a/arch/alpha/include/asm/dma-mapping.h b/arch/alpha/include/asm/dma-mapping.h
index 8beeafd4f68e..0ee6a5c99b16 100644
--- a/arch/alpha/include/asm/dma-mapping.h
+++ b/arch/alpha/include/asm/dma-mapping.h
@@ -7,7 +7,7 @@ extern const struct dma_map_ops alpha_pci_ops;
static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
{
#ifdef CONFIG_ALPHA_JENSEN
- return &dma_direct_ops;
+ return NULL;
#else
return &alpha_pci_ops;
#endif
diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c
index 46e08e0d9181..aa0f50d0f823 100644
--- a/arch/alpha/kernel/pci_iommu.c
+++ b/arch/alpha/kernel/pci_iommu.c
@@ -291,7 +291,7 @@ pci_map_single_1(struct pci_dev *pdev, void *cpu_addr, size_t size,
use direct_map above, it now must be considered an error. */
if (! alpha_mv.mv_pci_tbi) {
printk_once(KERN_WARNING "pci_map_single: no HW sg\n");
- return 0;
+ return DMA_MAPPING_ERROR;
}
arena = hose->sg_pci;
@@ -307,7 +307,7 @@ pci_map_single_1(struct pci_dev *pdev, void *cpu_addr, size_t size,
if (dma_ofs < 0) {
printk(KERN_WARNING "pci_map_single failed: "
"could not allocate dma page tables\n");
- return 0;
+ return DMA_MAPPING_ERROR;
}
paddr &= PAGE_MASK;
@@ -443,7 +443,7 @@ static void *alpha_pci_alloc_coherent(struct device *dev, size_t size,
gfp &= ~GFP_DMA;
try_again:
- cpu_addr = (void *)__get_free_pages(gfp, order);
+ cpu_addr = (void *)__get_free_pages(gfp | __GFP_ZERO, order);
if (! cpu_addr) {
printk(KERN_INFO "pci_alloc_consistent: "
"get_free_pages failed from %pf\n",
@@ -455,7 +455,7 @@ try_again:
memset(cpu_addr, 0, size);
*dma_addrp = pci_map_single_1(pdev, cpu_addr, size, 0);
- if (*dma_addrp == 0) {
+ if (*dma_addrp == DMA_MAPPING_ERROR) {
free_pages((unsigned long)cpu_addr, order);
if (alpha_mv.mv_pci_tbi || (gfp & GFP_DMA))
return NULL;
@@ -671,7 +671,7 @@ static int alpha_pci_map_sg(struct device *dev, struct scatterlist *sg,
sg->dma_address
= pci_map_single_1(pdev, SG_ENT_VIRT_ADDRESS(sg),
sg->length, dac_allowed);
- return sg->dma_address != 0;
+ return sg->dma_address != DMA_MAPPING_ERROR;
}
start = sg;
@@ -935,11 +935,6 @@ iommu_unbind(struct pci_iommu_arena *arena, long pg_start, long pg_count)
return 0;
}
-static int alpha_pci_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
- return dma_addr == 0;
-}
-
const struct dma_map_ops alpha_pci_ops = {
.alloc = alpha_pci_alloc_coherent,
.free = alpha_pci_free_coherent,
@@ -947,7 +942,6 @@ const struct dma_map_ops alpha_pci_ops = {
.unmap_page = alpha_pci_unmap_page,
.map_sg = alpha_pci_map_sg,
.unmap_sg = alpha_pci_unmap_sg,
- .mapping_error = alpha_pci_mapping_error,
.dma_supported = alpha_pci_supported,
};
EXPORT_SYMBOL(alpha_pci_ops);
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index dadb494d83fd..ca8e26d045a9 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -13,12 +13,10 @@ config ARC
select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_SYNC_DMA_FOR_CPU
select ARCH_HAS_SYNC_DMA_FOR_DEVICE
- select ARCH_HAS_SG_CHAIN
select ARCH_SUPPORTS_ATOMIC_RMW if ARC_HAS_LLSC
select BUILDTIME_EXTABLE_SORT
select CLONE_BACKWARDS
select COMMON_CLK
- select DMA_DIRECT_OPS
select GENERIC_ATOMIC64 if !ISA_ARCV2 || !(ARC_HAS_LL64 && ARC_HAS_LLSC)
select GENERIC_CLOCKEVENTS
select GENERIC_FIND_FIRST_BIT
diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
index cf9619d4efb4..4135abec3fb0 100644
--- a/arch/arc/mm/cache.c
+++ b/arch/arc/mm/cache.c
@@ -1294,7 +1294,7 @@ void __init arc_cache_init_master(void)
/*
* In case of IOC (say IOC+SLC case), pointers above could still be set
* but end up not being relevant as the first function in chain is not
- * called at all for @dma_direct_ops
+ * called at all for devices using coherent DMA.
* arch_sync_dma_for_cpu() -> dma_cache_*() -> __dma_cache_*()
*/
}
diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c
index db203ff69ccf..1525ac00fd02 100644
--- a/arch/arc/mm/dma.c
+++ b/arch/arc/mm/dma.c
@@ -33,7 +33,7 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
*/
BUG_ON(gfp & __GFP_HIGHMEM);
- page = alloc_pages(gfp, order);
+ page = alloc_pages(gfp | __GFP_ZERO, order);
if (!page)
return NULL;
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 5c0305585a0a..2196aac0e45c 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -19,6 +19,7 @@ config ARM
select ARCH_HAVE_CUSTOM_GPIO_H
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_MIGHT_HAVE_PC_PARPORT
+ select ARCH_NO_SG_CHAIN if !ARM_HAS_SG_CHAIN
select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX
select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT if CPU_V7
select ARCH_SUPPORTS_ATOMIC_RMW
@@ -29,7 +30,7 @@ config ARM
select CLONE_BACKWARDS
select CPU_PM if (SUSPEND || CPU_IDLE)
select DCACHE_WORD_ACCESS if HAVE_EFFICIENT_UNALIGNED_ACCESS
- select DMA_DIRECT_OPS if !MMU
+ select DMA_REMAP if MMU
select EDAC_SUPPORT
select EDAC_ATOMIC_SCRUB
select GENERIC_ALLOCATOR
@@ -118,7 +119,6 @@ config ARM
<http://www.arm.linux.org.uk/>.
config ARM_HAS_SG_CHAIN
- select ARCH_HAS_SG_CHAIN
bool
config ARM_DMA_USE_IOMMU
diff --git a/arch/arm/common/dmabounce.c b/arch/arm/common/dmabounce.c
index 9a92de63426f..5ba4622030ca 100644
--- a/arch/arm/common/dmabounce.c
+++ b/arch/arm/common/dmabounce.c
@@ -257,7 +257,7 @@ static inline dma_addr_t map_single(struct device *dev, void *ptr, size_t size,
if (buf == NULL) {
dev_err(dev, "%s: unable to map unsafe buffer %p!\n",
__func__, ptr);
- return ARM_MAPPING_ERROR;
+ return DMA_MAPPING_ERROR;
}
dev_dbg(dev, "%s: unsafe buffer %p (dma=%#x) mapped to %p (dma=%#x)\n",
@@ -327,7 +327,7 @@ static dma_addr_t dmabounce_map_page(struct device *dev, struct page *page,
ret = needs_bounce(dev, dma_addr, size);
if (ret < 0)
- return ARM_MAPPING_ERROR;
+ return DMA_MAPPING_ERROR;
if (ret == 0) {
arm_dma_ops.sync_single_for_device(dev, dma_addr, size, dir);
@@ -336,7 +336,7 @@ static dma_addr_t dmabounce_map_page(struct device *dev, struct page *page,
if (PageHighMem(page)) {
dev_err(dev, "DMA buffer bouncing of HIGHMEM pages is not supported\n");
- return ARM_MAPPING_ERROR;
+ return DMA_MAPPING_ERROR;
}
return map_single(dev, page_address(page) + offset, size, dir, attrs);
@@ -453,11 +453,6 @@ static int dmabounce_dma_supported(struct device *dev, u64 dma_mask)
return arm_dma_ops.dma_supported(dev, dma_mask);
}
-static int dmabounce_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
- return arm_dma_ops.mapping_error(dev, dma_addr);
-}
-
static const struct dma_map_ops dmabounce_ops = {
.alloc = arm_dma_alloc,
.free = arm_dma_free,
@@ -472,7 +467,6 @@ static const struct dma_map_ops dmabounce_ops = {
.sync_sg_for_cpu = arm_dma_sync_sg_for_cpu,
.sync_sg_for_device = arm_dma_sync_sg_for_device,
.dma_supported = dmabounce_dma_supported,
- .mapping_error = dmabounce_mapping_error,
};
static int dmabounce_init_pool(struct dmabounce_pool *pool, struct device *dev,
diff --git a/arch/arm/include/asm/dma-iommu.h b/arch/arm/include/asm/dma-iommu.h
index 6821f1249300..772f48ef84b7 100644
--- a/arch/arm/include/asm/dma-iommu.h
+++ b/arch/arm/include/asm/dma-iommu.h
@@ -9,8 +9,6 @@
#include <linux/dma-debug.h>
#include <linux/kref.h>
-#define ARM_MAPPING_ERROR (~(dma_addr_t)0x0)
-
struct dma_iommu_mapping {
/* iommu specific data */
struct iommu_domain *domain;
diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h
index 965b7c846ecb..31d3b96f0f4b 100644
--- a/arch/arm/include/asm/dma-mapping.h
+++ b/arch/arm/include/asm/dma-mapping.h
@@ -18,7 +18,7 @@ extern const struct dma_map_ops arm_coherent_dma_ops;
static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
{
- return IS_ENABLED(CONFIG_MMU) ? &arm_dma_ops : &dma_direct_ops;
+ return IS_ENABLED(CONFIG_MMU) ? &arm_dma_ops : NULL;
}
#ifdef __arch_page_to_dma
diff --git a/arch/arm/mach-ep93xx/simone.c b/arch/arm/mach-ep93xx/simone.c
index 41aa57581356..80ccb984d521 100644
--- a/arch/arm/mach-ep93xx/simone.c
+++ b/arch/arm/mach-ep93xx/simone.c
@@ -25,6 +25,7 @@
#include <linux/platform_data/video-ep93xx.h>
#include <linux/platform_data/spi-ep93xx.h>
#include <linux/gpio.h>
+#include <linux/gpio/machine.h>
#include <mach/hardware.h>
#include <mach/gpio-ep93xx.h>
@@ -45,9 +46,15 @@ static struct ep93xxfb_mach_info __initdata simone_fb_info = {
static struct mmc_spi_platform_data simone_mmc_spi_data = {
.detect_delay = 500,
.ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34,
- .flags = MMC_SPI_USE_CD_GPIO,
- .cd_gpio = EP93XX_GPIO_LINE_EGPIO0,
- .cd_debounce = 1,
+};
+
+static struct gpiod_lookup_table simone_mmc_spi_gpio_table = {
+ .dev_id = "mmc_spi.0", /* "mmc_spi" @ CS0 */
+ .table = {
+ /* Card detect */
+ GPIO_LOOKUP_IDX("A", 0, NULL, 0, GPIO_ACTIVE_LOW),
+ { },
+ },
};
static struct spi_board_info simone_spi_devices[] __initdata = {
@@ -105,6 +112,7 @@ static void __init simone_init_machine(void)
ep93xx_register_fb(&simone_fb_info);
ep93xx_register_i2c(simone_i2c_board_info,
ARRAY_SIZE(simone_i2c_board_info));
+ gpiod_add_lookup_table(&simone_mmc_spi_gpio_table);
ep93xx_register_spi(&simone_spi_info, simone_spi_devices,
ARRAY_SIZE(simone_spi_devices));
simone_register_audio();
diff --git a/arch/arm/mach-ep93xx/vision_ep9307.c b/arch/arm/mach-ep93xx/vision_ep9307.c
index 5a0b6187990a..767ee64628dc 100644
--- a/arch/arm/mach-ep93xx/vision_ep9307.c
+++ b/arch/arm/mach-ep93xx/vision_ep9307.c
@@ -18,6 +18,7 @@
#include <linux/platform_device.h>
#include <linux/irq.h>
#include <linux/gpio.h>
+#include <linux/gpio/machine.h>
#include <linux/fb.h>
#include <linux/io.h>
#include <linux/mtd/partitions.h>
@@ -202,13 +203,20 @@ static struct mmc_spi_platform_data vision_spi_mmc_data = {
.detect_delay = 100,
.powerup_msecs = 100,
.ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34,
- .flags = MMC_SPI_USE_CD_GPIO | MMC_SPI_USE_RO_GPIO,
- .cd_gpio = EP93XX_GPIO_LINE_EGPIO15,
- .cd_debounce = 1,
- .ro_gpio = EP93XX_GPIO_LINE_F(0),
.caps2 = MMC_CAP2_RO_ACTIVE_HIGH,
};
+static struct gpiod_lookup_table vision_spi_mmc_gpio_table = {
+ .dev_id = "mmc_spi.2", /* "mmc_spi @ CS2 */
+ .table = {
+ /* Card detect */
+ GPIO_LOOKUP_IDX("B", 7, NULL, 0, GPIO_ACTIVE_LOW),
+ /* Write protect */
+ GPIO_LOOKUP_IDX("F", 0, NULL, 1, GPIO_ACTIVE_HIGH),
+ { },
+ },
+};
+
/*************************************************************************
* SPI Bus
*************************************************************************/
@@ -286,6 +294,7 @@ static void __init vision_init_machine(void)
ep93xx_register_i2c(vision_i2c_info,
ARRAY_SIZE(vision_i2c_info));
+ gpiod_add_lookup_table(&vision_spi_mmc_gpio_table);
ep93xx_register_spi(&vision_spi_master, vision_spi_board_info,
ARRAY_SIZE(vision_spi_board_info));
vision_register_i2s();
diff --git a/arch/arm/mach-imx/mach-pcm043.c b/arch/arm/mach-imx/mach-pcm043.c
index e595e5368676..46ba3348e8f0 100644
--- a/arch/arm/mach-imx/mach-pcm043.c
+++ b/arch/arm/mach-imx/mach-pcm043.c
@@ -20,6 +20,7 @@
#include <linux/mtd/plat-ram.h>
#include <linux/memory.h>
#include <linux/gpio.h>
+#include <linux/gpio/machine.h>
#include <linux/smc911x.h>
#include <linux/interrupt.h>
#include <linux/delay.h>
@@ -214,8 +215,6 @@ static const iomux_v3_cfg_t pcm043_pads[] __initconst = {
#define AC97_GPIO_TXFS IMX_GPIO_NR(2, 31)
#define AC97_GPIO_TXD IMX_GPIO_NR(2, 28)
#define AC97_GPIO_RESET IMX_GPIO_NR(2, 0)
-#define SD1_GPIO_WP IMX_GPIO_NR(2, 23)
-#define SD1_GPIO_CD IMX_GPIO_NR(2, 24)
static void pcm043_ac97_warm_reset(struct snd_ac97 *ac97)
{
@@ -341,12 +340,21 @@ static int __init pcm043_otg_mode(char *options)
__setup("otg_mode=", pcm043_otg_mode);
static struct esdhc_platform_data sd1_pdata = {
- .wp_gpio = SD1_GPIO_WP,
- .cd_gpio = SD1_GPIO_CD,
.wp_type = ESDHC_WP_GPIO,
.cd_type = ESDHC_CD_GPIO,
};
+static struct gpiod_lookup_table sd1_gpio_table = {
+ .dev_id = "sdhci-esdhc-imx35.0",
+ .table = {
+ /* Card detect: bank 2 offset 24 */
+ GPIO_LOOKUP("imx35-gpio.2", 24, "cd", GPIO_ACTIVE_LOW),
+ /* Write protect: bank 2 offset 23 */
+ GPIO_LOOKUP("imx35-gpio.2", 23, "wp", GPIO_ACTIVE_LOW),
+ { },
+ },
+};
+
/*
* Board specific initialization.
*/
@@ -391,6 +399,7 @@ static void __init pcm043_late_init(void)
{
imx35_add_imx_ssi(0, &pcm043_ssi_pdata);
+ gpiod_add_lookup_table(&sd1_gpio_table);
imx35_add_sdhci_esdhc_imx(0, &sd1_pdata);
}
diff --git a/arch/arm/mach-pxa/balloon3.c b/arch/arm/mach-pxa/balloon3.c
index c52c081eb6d9..4bcbd3d55b36 100644
--- a/arch/arm/mach-pxa/balloon3.c
+++ b/arch/arm/mach-pxa/balloon3.c
@@ -290,9 +290,6 @@ static unsigned long balloon3_mmc_pin_config[] __initdata = {
static struct pxamci_platform_data balloon3_mci_platform_data = {
.ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34,
- .gpio_card_detect = -1,
- .gpio_card_ro = -1,
- .gpio_power = -1,
.detect_delay_ms = 200,
};
diff --git a/arch/arm/mach-pxa/cm-x270.c b/arch/arm/mach-pxa/cm-x270.c
index be4a66166d61..f7081a50dc67 100644
--- a/arch/arm/mach-pxa/cm-x270.c
+++ b/arch/arm/mach-pxa/cm-x270.c
@@ -12,6 +12,7 @@
#include <linux/platform_device.h>
#include <linux/irq.h>
#include <linux/gpio.h>
+#include <linux/gpio/machine.h>
#include <linux/delay.h>
#include <linux/platform_data/rtc-v3020.h>
@@ -288,14 +289,23 @@ static inline void cmx270_init_ohci(void) {}
#if defined(CONFIG_MMC) || defined(CONFIG_MMC_MODULE)
static struct pxamci_platform_data cmx270_mci_platform_data = {
.ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34,
- .gpio_card_detect = GPIO83_MMC_IRQ,
- .gpio_card_ro = -1,
- .gpio_power = GPIO105_MMC_POWER,
- .gpio_power_invert = 1,
+};
+
+static struct gpiod_lookup_table cmx270_mci_gpio_table = {
+ .dev_id = "pxa2xx-mci.0",
+ .table = {
+ /* Card detect on GPIO 83 */
+ GPIO_LOOKUP("gpio-pxa", GPIO83_MMC_IRQ, "cd", GPIO_ACTIVE_LOW),
+ /* Power on GPIO 105 */
+ GPIO_LOOKUP("gpio-pxa", GPIO105_MMC_POWER,
+ "power", GPIO_ACTIVE_LOW),
+ { },
+ },
};
static void __init cmx270_init_mmc(void)
{
+ gpiod_add_lookup_table(&cmx270_mci_gpio_table);
pxa_set_mci_info(&cmx270_mci_platform_data);
}
#else
diff --git a/arch/arm/mach-pxa/cm-x300.c b/arch/arm/mach-pxa/cm-x300.c
index c5c0ab8ac9f9..109fab292f94 100644
--- a/arch/arm/mach-pxa/cm-x300.c
+++ b/arch/arm/mach-pxa/cm-x300.c
@@ -459,9 +459,17 @@ static inline void cm_x300_init_nand(void) {}
static struct pxamci_platform_data cm_x300_mci_platform_data = {
.detect_delay_ms = 200,
.ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34,
- .gpio_card_detect = GPIO82_MMC_IRQ,
- .gpio_card_ro = GPIO85_MMC_WP,
- .gpio_power = -1,
+};
+
+static struct gpiod_lookup_table cm_x300_mci_gpio_table = {
+ .dev_id = "pxa2xx-mci.0",
+ .table = {
+ /* Card detect on GPIO 82 */
+ GPIO_LOOKUP("gpio-pxa", GPIO82_MMC_IRQ, "cd", GPIO_ACTIVE_LOW),
+ /* Write protect on GPIO 85 */
+ GPIO_LOOKUP("gpio-pxa", GPIO85_MMC_WP, "wp", GPIO_ACTIVE_LOW),
+ { },
+ },
};
/* The second MMC slot of CM-X300 is hardwired to Libertas card and has
@@ -482,13 +490,11 @@ static struct pxamci_platform_data cm_x300_mci2_platform_data = {
.ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34,
.init = cm_x300_mci2_init,
.exit = cm_x300_mci2_exit,
- .gpio_card_detect = -1,
- .gpio_card_ro = -1,
- .gpio_power = -1,
};
static void __init cm_x300_init_mmc(void)
{
+ gpiod_add_lookup_table(&cm_x300_mci_gpio_table);
pxa_set_mci_info(&cm_x300_mci_platform_data);
pxa3xx_set_mci2_info(&cm_x300_mci2_platform_data);
}
diff --git a/arch/arm/mach-pxa/colibri-evalboard.c b/arch/arm/mach-pxa/colibri-evalboard.c
index 10e2278b7a28..2ccdef5de138 100644
--- a/arch/arm/mach-pxa/colibri-evalboard.c
+++ b/arch/arm/mach-pxa/colibri-evalboard.c
@@ -14,7 +14,7 @@
#include <linux/kernel.h>
#include <linux/platform_device.h>
#include <linux/interrupt.h>
-#include <linux/gpio.h>
+#include <linux/gpio/machine.h>
#include <asm/mach-types.h>
#include <mach/hardware.h>
#include <asm/mach/arch.h>
@@ -37,22 +37,44 @@
#if defined(CONFIG_MMC_PXA) || defined(CONFIG_MMC_PXA_MODULE)
static struct pxamci_platform_data colibri_mci_platform_data = {
.ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34,
- .gpio_power = -1,
- .gpio_card_ro = -1,
.detect_delay_ms = 200,
};
+static struct gpiod_lookup_table colibri_pxa270_mci_gpio_table = {
+ .dev_id = "pxa2xx-mci.0",
+ .table = {
+ GPIO_LOOKUP("gpio-pxa", GPIO0_COLIBRI_PXA270_SD_DETECT,
+ "cd", GPIO_ACTIVE_LOW),
+ { },
+ },
+};
+
+static struct gpiod_lookup_table colibri_pxa300_mci_gpio_table = {
+ .dev_id = "pxa2xx-mci.0",
+ .table = {
+ GPIO_LOOKUP("gpio-pxa", GPIO13_COLIBRI_PXA300_SD_DETECT,
+ "cd", GPIO_ACTIVE_LOW),
+ { },
+ },
+};
+
+static struct gpiod_lookup_table colibri_pxa320_mci_gpio_table = {
+ .dev_id = "pxa2xx-mci.0",
+ .table = {
+ GPIO_LOOKUP("gpio-pxa", GPIO28_COLIBRI_PXA320_SD_DETECT,
+ "cd", GPIO_ACTIVE_LOW),
+ { },
+ },
+};
+
static void __init colibri_mmc_init(void)
{
if (machine_is_colibri()) /* PXA270 Colibri */
- colibri_mci_platform_data.gpio_card_detect =
- GPIO0_COLIBRI_PXA270_SD_DETECT;
+ gpiod_add_lookup_table(&colibri_pxa270_mci_gpio_table);
if (machine_is_colibri300()) /* PXA300 Colibri */
- colibri_mci_platform_data.gpio_card_detect =
- GPIO13_COLIBRI_PXA300_SD_DETECT;
+ gpiod_add_lookup_table(&colibri_pxa300_mci_gpio_table);
else /* PXA320 Colibri */
- colibri_mci_platform_data.gpio_card_detect =
- GPIO28_COLIBRI_PXA320_SD_DETECT;
+ gpiod_add_lookup_table(&colibri_pxa320_mci_gpio_table);
pxa_set_mci_info(&colibri_mci_platform_data);
}
diff --git a/arch/arm/mach-pxa/colibri-pxa270-income.c b/arch/arm/mach-pxa/colibri-pxa270-income.c
index 3ccf2a95569b..d203dd30cdd0 100644
--- a/arch/arm/mach-pxa/colibri-pxa270-income.c
+++ b/arch/arm/mach-pxa/colibri-pxa270-income.c
@@ -14,7 +14,7 @@
#include <linux/bitops.h>
#include <linux/delay.h>
-#include <linux/gpio.h>
+#include <linux/gpio/machine.h>
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/leds.h>
@@ -51,14 +51,25 @@
#if defined(CONFIG_MMC_PXA) || defined(CONFIG_MMC_PXA_MODULE)
static struct pxamci_platform_data income_mci_platform_data = {
.ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34,
- .gpio_power = -1,
- .gpio_card_detect = GPIO0_INCOME_SD_DETECT,
- .gpio_card_ro = GPIO0_INCOME_SD_RO,
.detect_delay_ms = 200,
};
+static struct gpiod_lookup_table income_mci_gpio_table = {
+ .dev_id = "pxa2xx-mci.0",
+ .table = {
+ /* Card detect on GPIO 0 */
+ GPIO_LOOKUP("gpio-pxa", GPIO0_INCOME_SD_DETECT,
+ "cd", GPIO_ACTIVE_LOW),
+ /* Write protect on GPIO 1 */
+ GPIO_LOOKUP("gpio-pxa", GPIO0_INCOME_SD_RO,
+ "wp", GPIO_ACTIVE_LOW),
+ { },
+ },
+};
+
static void __init income_mmc_init(void)
{
+ gpiod_add_lookup_table(&income_mci_gpio_table);
pxa_set_mci_info(&income_mci_platform_data);
}
#else
diff --git a/arch/arm/mach-pxa/corgi.c b/arch/arm/mach-pxa/corgi.c
index 9a5a35e90769..c9732cace5e3 100644
--- a/arch/arm/mach-pxa/corgi.c
+++ b/arch/arm/mach-pxa/corgi.c
@@ -24,6 +24,7 @@
#include <linux/mtd/physmap.h>
#include <linux/pm.h>
#include <linux/gpio.h>
+#include <linux/gpio/machine.h>
#include <linux/backlight.h>
#include <linux/i2c.h>
#include <linux/platform_data/i2c-pxa.h>
@@ -493,11 +494,23 @@ static struct platform_device corgi_audio_device = {
static struct pxamci_platform_data corgi_mci_platform_data = {
.detect_delay_ms = 250,
.ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34,
- .gpio_card_detect = CORGI_GPIO_nSD_DETECT,
- .gpio_card_ro = CORGI_GPIO_nSD_WP,
- .gpio_power = CORGI_GPIO_SD_PWR,
};
+static struct gpiod_lookup_table corgi_mci_gpio_table = {
+ .dev_id = "pxa2xx-mci.0",
+ .table = {
+ /* Card detect on GPIO 9 */
+ GPIO_LOOKUP("gpio-pxa", CORGI_GPIO_nSD_DETECT,
+ "cd", GPIO_ACTIVE_LOW),
+ /* Write protect on GPIO 7 */
+ GPIO_LOOKUP("gpio-pxa", CORGI_GPIO_nSD_WP,
+ "wp", GPIO_ACTIVE_LOW),
+ /* Power on GPIO 33 */
+ GPIO_LOOKUP("gpio-pxa", CORGI_GPIO_SD_PWR,
+ "power", GPIO_ACTIVE_HIGH),
+ { },
+ },
+};
/*
* Irda
@@ -731,6 +744,7 @@ static void __init corgi_init(void)
corgi_init_spi();
pxa_set_udc_info(&udc_info);
+ gpiod_add_lookup_table(&corgi_mci_gpio_table);
pxa_set_mci_info(&corgi_mci_platform_data);
pxa_set_ficp_info(&corgi_ficp_platform_data);
pxa_set_i2c_info(NULL);
diff --git a/arch/arm/mach-pxa/csb726.c b/arch/arm/mach-pxa/csb726.c
index 271aedae7542..e26e7e60a169 100644
--- a/arch/arm/mach-pxa/csb726.c
+++ b/arch/arm/mach-pxa/csb726.c
@@ -11,7 +11,7 @@
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/io.h>
-#include <linux/gpio.h>
+#include <linux/gpio/machine.h>
#include <linux/platform_device.h>
#include <linux/mtd/physmap.h>
#include <linux/mtd/partitions.h>
@@ -129,9 +129,19 @@ static struct pxamci_platform_data csb726_mci = {
.detect_delay_ms = 500,
.ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34,
/* FIXME setpower */
- .gpio_card_detect = CSB726_GPIO_MMC_DETECT,
- .gpio_card_ro = CSB726_GPIO_MMC_RO,
- .gpio_power = -1,
+};
+
+static struct gpiod_lookup_table csb726_mci_gpio_table = {
+ .dev_id = "pxa2xx-mci.0",
+ .table = {
+ /* Card detect on GPIO 100 */
+ GPIO_LOOKUP("gpio-pxa", CSB726_GPIO_MMC_DETECT,
+ "cd", GPIO_ACTIVE_LOW),
+ /* Write protect on GPIO 101 */
+ GPIO_LOOKUP("gpio-pxa", CSB726_GPIO_MMC_RO,
+ "wp", GPIO_ACTIVE_LOW),
+ { },
+ },
};
static struct pxaohci_platform_data csb726_ohci_platform_data = {
@@ -264,6 +274,7 @@ static void __init csb726_init(void)
pxa_set_stuart_info(NULL);
pxa_set_i2c_info(NULL);
pxa27x_set_i2c_power_info(NULL);
+ gpiod_add_lookup_table(&csb726_mci_gpio_table);
pxa_set_mci_info(&csb726_mci);
pxa_set_ohci_info(&csb726_ohci_platform_data);
pxa_set_ac97_info(NULL);
diff --git a/arch/arm/mach-pxa/em-x270.c b/arch/arm/mach-pxa/em-x270.c
index 67e37df637f5..32c1edeb3f14 100644
--- a/arch/arm/mach-pxa/em-x270.c
+++ b/arch/arm/mach-pxa/em-x270.c
@@ -20,6 +20,7 @@
#include <linux/input.h>
#include <linux/gpio_keys.h>
#include <linux/gpio.h>
+#include <linux/gpio/machine.h>
#include <linux/mfd/da903x.h>
#include <linux/regulator/machine.h>
#include <linux/regulator/fixed.h>
@@ -546,6 +547,15 @@ static inline void em_x270_init_ohci(void) {}
#if defined(CONFIG_MMC) || defined(CONFIG_MMC_MODULE)
static struct regulator *em_x270_sdio_ldo;
+static struct gpiod_lookup_table em_x270_mci_wp_gpio_table = {
+ .dev_id = "pxa2xx-mci.0",
+ .table = {
+ /* Write protect on GPIO 95 */
+ GPIO_LOOKUP("gpio-pxa", GPIO95_MMC_WP, "wp", GPIO_ACTIVE_LOW),
+ { },
+ },
+};
+
static int em_x270_mci_init(struct device *dev,
irq_handler_t em_x270_detect_int,
void *data)
@@ -567,15 +577,7 @@ static int em_x270_mci_init(struct device *dev,
goto err_irq;
}
- if (machine_is_em_x270()) {
- err = gpio_request(GPIO95_MMC_WP, "MMC WP");
- if (err) {
- dev_err(dev, "can't request MMC write protect: %d\n",
- err);
- goto err_gpio_wp;
- }
- gpio_direction_input(GPIO95_MMC_WP);
- } else {
+ if (!machine_is_em_x270()) {
err = gpio_request(GPIO38_SD_PWEN, "sdio power");
if (err) {
dev_err(dev, "can't request MMC power control : %d\n",
@@ -615,17 +617,10 @@ static void em_x270_mci_exit(struct device *dev, void *data)
free_irq(gpio_to_irq(mmc_cd), data);
regulator_put(em_x270_sdio_ldo);
- if (machine_is_em_x270())
- gpio_free(GPIO95_MMC_WP);
- else
+ if (!machine_is_em_x270())
gpio_free(GPIO38_SD_PWEN);
}
-static int em_x270_mci_get_ro(struct device *dev)
-{
- return gpio_get_value(GPIO95_MMC_WP);
-}
-
static struct pxamci_platform_data em_x270_mci_platform_data = {
.detect_delay_ms = 250,
.ocr_mask = MMC_VDD_20_21|MMC_VDD_21_22|MMC_VDD_22_23|
@@ -635,15 +630,12 @@ static struct pxamci_platform_data em_x270_mci_platform_data = {
.init = em_x270_mci_init,
.setpower = em_x270_mci_setpower,
.exit = em_x270_mci_exit,
- .gpio_card_detect = -1,
- .gpio_card_ro = -1,
- .gpio_power = -1,
};
static void __init em_x270_init_mmc(void)
{
if (machine_is_em_x270())
- em_x270_mci_platform_data.get_ro = em_x270_mci_get_ro;
+ gpiod_add_lookup_table(&em_x270_mci_wp_gpio_table);
pxa_set_mci_info(&em_x270_mci_platform_data);
}
diff --git a/arch/arm/mach-pxa/gumstix.c b/arch/arm/mach-pxa/gumstix.c
index 9c5b2fb054f9..4764acca5480 100644
--- a/arch/arm/mach-pxa/gumstix.c
+++ b/arch/arm/mach-pxa/gumstix.c
@@ -90,9 +90,6 @@ static struct platform_device *devices[] __initdata = {
#ifdef CONFIG_MMC_PXA
static struct pxamci_platform_data gumstix_mci_platform_data = {
.ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34,
- .gpio_card_detect = -1,
- .gpio_card_ro = -1,
- .gpio_power = -1,
};
static void __init gumstix_mmc_init(void)
diff --git a/arch/arm/mach-pxa/idp.c b/arch/arm/mach-pxa/idp.c
index 88e0068f92a8..7bfc246a1d75 100644
--- a/arch/arm/mach-pxa/idp.c
+++ b/arch/arm/mach-pxa/idp.c
@@ -160,9 +160,6 @@ static struct pxafb_mach_info sharp_lm8v31 = {
static struct pxamci_platform_data idp_mci_platform_data = {
.ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34,
- .gpio_card_detect = -1,
- .gpio_card_ro = -1,
- .gpio_power = -1,
};
static void __init idp_init(void)
diff --git a/arch/arm/mach-pxa/littleton.c b/arch/arm/mach-pxa/littleton.c
index 9e132b3e48c6..8e0b60a33026 100644
--- a/arch/arm/mach-pxa/littleton.c
+++ b/arch/arm/mach-pxa/littleton.c
@@ -20,7 +20,7 @@
#include <linux/delay.h>
#include <linux/platform_device.h>
#include <linux/clk.h>
-#include <linux/gpio.h>
+#include <linux/gpio/machine.h>
#include <linux/spi/spi.h>
#include <linux/spi/pxa2xx_spi.h>
#include <linux/smc91x.h>
@@ -51,8 +51,6 @@
#include "generic.h"
-#define GPIO_MMC1_CARD_DETECT mfp_to_gpio(MFP_PIN_GPIO15)
-
/* Littleton MFP configurations */
static mfp_cfg_t littleton_mfp_cfg[] __initdata = {
/* LCD */
@@ -278,13 +276,21 @@ static inline void littleton_init_keypad(void) {}
static struct pxamci_platform_data littleton_mci_platform_data = {
.detect_delay_ms = 200,
.ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34,
- .gpio_card_detect = GPIO_MMC1_CARD_DETECT,
- .gpio_card_ro = -1,
- .gpio_power = -1,
+};
+
+static struct gpiod_lookup_table littleton_mci_gpio_table = {
+ .dev_id = "pxa2xx-mci.0",
+ .table = {
+ /* Card detect on MFP (gpio-pxa) GPIO 15 */
+ GPIO_LOOKUP("gpio-pxa", MFP_PIN_GPIO15,
+ "cd", GPIO_ACTIVE_LOW),
+ { },
+ },
};
static void __init littleton_init_mmc(void)
{
+ gpiod_add_lookup_table(&littleton_mci_gpio_table);
pxa_set_mci_info(&littleton_mci_platform_data);
}
#else
diff --git a/arch/arm/mach-pxa/lubbock.c b/arch/arm/mach-pxa/lubbock.c
index fe2ef9b78602..c576e8462043 100644
--- a/arch/arm/mach-pxa/lubbock.c
+++ b/arch/arm/mach-pxa/lubbock.c
@@ -440,9 +440,6 @@ static struct pxamci_platform_data lubbock_mci_platform_data = {
.init = lubbock_mci_init,
.get_ro = lubbock_mci_get_ro,
.exit = lubbock_mci_exit,
- .gpio_card_detect = -1,
- .gpio_card_ro = -1,
- .gpio_power = -1,
};
static void lubbock_irda_transceiver_mode(struct device *dev, int mode)
diff --git a/arch/arm/mach-pxa/magician.c b/arch/arm/mach-pxa/magician.c
index 14c0f80bc9e7..08b079653c3f 100644
--- a/arch/arm/mach-pxa/magician.c
+++ b/arch/arm/mach-pxa/magician.c
@@ -775,12 +775,31 @@ static struct pxamci_platform_data magician_mci_info = {
.ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34,
.init = magician_mci_init,
.exit = magician_mci_exit,
- .gpio_card_detect = -1,
- .gpio_card_ro = EGPIO_MAGICIAN_nSD_READONLY,
.gpio_card_ro_invert = 1,
- .gpio_power = EGPIO_MAGICIAN_SD_POWER,
};
+/*
+ * Write protect on EGPIO register 5 index 4, this is on the second HTC
+ * EGPIO chip which starts at register 4, so we need offset 8+4=12 on that
+ * particular chip.
+ */
+#define EGPIO_MAGICIAN_nSD_READONLY_OFFSET 12
+/*
+ * Power on EGPIO register 2 index 0, so this is on the first HTC EGPIO chip
+ * starting at register 0 so we need offset 2*8+0 = 16 on that chip.
+ */
+#define EGPIO_MAGICIAN_nSD_POWER_OFFSET 16
+
+static struct gpiod_lookup_table magician_mci_gpio_table = {
+ .dev_id = "pxa2xx-mci.0",
+ .table = {
+ GPIO_LOOKUP("htc-egpio-1", EGPIO_MAGICIAN_nSD_READONLY_OFFSET,
+ "wp", GPIO_ACTIVE_HIGH),
+ GPIO_LOOKUP("htc-egpio-0", EGPIO_MAGICIAN_nSD_POWER_OFFSET,
+ "power", GPIO_ACTIVE_HIGH),
+ { },
+ },
+};
/*
* USB OHCI
@@ -979,6 +998,7 @@ static void __init magician_init(void)
i2c_register_board_info(1,
ARRAY_AND_SIZE(magician_pwr_i2c_board_info));
+ gpiod_add_lookup_table(&magician_mci_gpio_table);
pxa_set_mci_info(&magician_mci_info);
pxa_set_ohci_info(&magician_ohci_info);
pxa_set_udc_info(&magician_udc_info);
diff --git a/arch/arm/mach-pxa/mainstone.c b/arch/arm/mach-pxa/mainstone.c
index afd62a94fdbf..9e39fc2ad2d9 100644
--- a/arch/arm/mach-pxa/mainstone.c
+++ b/arch/arm/mach-pxa/mainstone.c
@@ -361,9 +361,6 @@ static struct pxamci_platform_data mainstone_mci_platform_data = {
.init = mainstone_mci_init,
.setpower = mainstone_mci_setpower,
.exit = mainstone_mci_exit,
- .gpio_card_detect = -1,
- .gpio_card_ro = -1,
- .gpio_power = -1,
};
static void mainstone_irda_transceiver_mode(struct device *dev, int mode)
diff --git a/arch/arm/mach-pxa/mioa701.c b/arch/arm/mach-pxa/mioa701.c
index 04dc78d0809f..d0fa5c72622d 100644
--- a/arch/arm/mach-pxa/mioa701.c
+++ b/arch/arm/mach-pxa/mioa701.c
@@ -31,6 +31,7 @@
#include <linux/rtc.h>
#include <linux/leds.h>
#include <linux/gpio.h>
+#include <linux/gpio/machine.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
#include <linux/pda_power.h>
@@ -397,9 +398,22 @@ struct gpio_vbus_mach_info gpio_vbus_data = {
static struct pxamci_platform_data mioa701_mci_info = {
.detect_delay_ms = 250,
.ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34,
- .gpio_card_detect = GPIO15_SDIO_INSERT,
- .gpio_card_ro = GPIO78_SDIO_RO,
- .gpio_power = GPIO91_SDIO_EN,
+};
+
+static struct gpiod_lookup_table mioa701_mci_gpio_table = {
+ .dev_id = "pxa2xx-mci.0",
+ .table = {
+ /* Card detect on GPIO 15 */
+ GPIO_LOOKUP("gpio-pxa", GPIO15_SDIO_INSERT,
+ "cd", GPIO_ACTIVE_LOW),
+ /* Write protect on GPIO 78 */
+ GPIO_LOOKUP("gpio-pxa", GPIO78_SDIO_RO,
+ "wp", GPIO_ACTIVE_LOW),
+ /* Power on GPIO 91 */
+ GPIO_LOOKUP("gpio-pxa", GPIO91_SDIO_EN,
+ "power", GPIO_ACTIVE_HIGH),
+ { },
+ },
};
/* FlashRAM */
@@ -743,6 +757,7 @@ static void __init mioa701_machine_init(void)
pr_err("MioA701: Failed to request GPIOs: %d", rc);
bootstrap_init();
pxa_set_fb_info(NULL, &mioa701_pxafb_info);
+ gpiod_add_lookup_table(&mioa701_mci_gpio_table);
pxa_set_mci_info(&mioa701_mci_info);
pxa_set_keypad_info(&mioa701_keypad_info);
pxa_set_udc_info(&mioa701_udc_info);
diff --git a/arch/arm/mach-pxa/mxm8x10.c b/arch/arm/mach-pxa/mxm8x10.c
index 616b22397d73..e4248a3a8dfc 100644
--- a/arch/arm/mach-pxa/mxm8x10.c
+++ b/arch/arm/mach-pxa/mxm8x10.c
@@ -21,7 +21,7 @@
#include <linux/serial_8250.h>
#include <linux/dm9000.h>
-#include <linux/gpio.h>
+#include <linux/gpio/machine.h>
#include <linux/platform_data/i2c-pxa.h>
#include <linux/platform_data/mtd-nand-pxa3xx.h>
@@ -326,13 +326,24 @@ static mfp_cfg_t mfp_cfg[] __initdata = {
static struct pxamci_platform_data mxm_8x10_mci_platform_data = {
.ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34,
.detect_delay_ms = 10,
- .gpio_card_detect = MXM_8X10_SD_nCD,
- .gpio_card_ro = MXM_8X10_SD_WP,
- .gpio_power = -1
+};
+
+static struct gpiod_lookup_table mxm_8x10_mci_gpio_table = {
+ .dev_id = "pxa2xx-mci.0",
+ .table = {
+ /* Card detect on GPIO 72 */
+ GPIO_LOOKUP("gpio-pxa", MXM_8X10_SD_nCD,
+ "cd", GPIO_ACTIVE_LOW),
+ /* Write protect on GPIO 84 */
+ GPIO_LOOKUP("gpio-pxa", MXM_8X10_SD_WP,
+ "wp", GPIO_ACTIVE_LOW),
+ { },
+ },
};
void __init mxm_8x10_mmc_init(void)
{
+ gpiod_add_lookup_table(&mxm_8x10_mci_gpio_table);
pxa_set_mci_info(&mxm_8x10_mci_platform_data);
}
#endif
diff --git a/arch/arm/mach-pxa/palm27x.c b/arch/arm/mach-pxa/palm27x.c
index 1efe9bcf07fa..b94c45f65215 100644
--- a/arch/arm/mach-pxa/palm27x.c
+++ b/arch/arm/mach-pxa/palm27x.c
@@ -49,14 +49,10 @@ static struct pxamci_platform_data palm27x_mci_platform_data = {
.detect_delay_ms = 200,
};
-void __init palm27x_mmc_init(int detect, int ro, int power,
- int power_inverted)
+void __init palm27x_mmc_init(struct gpiod_lookup_table *gtable)
{
- palm27x_mci_platform_data.gpio_card_detect = detect;
- palm27x_mci_platform_data.gpio_card_ro = ro;
- palm27x_mci_platform_data.gpio_power = power;
- palm27x_mci_platform_data.gpio_power_invert = power_inverted;
-
+ if (gtable)
+ gpiod_add_lookup_table(gtable);
pxa_set_mci_info(&palm27x_mci_platform_data);
}
#endif
diff --git a/arch/arm/mach-pxa/palm27x.h b/arch/arm/mach-pxa/palm27x.h
index d4eac3d6ffb5..cd071f876132 100644
--- a/arch/arm/mach-pxa/palm27x.h
+++ b/arch/arm/mach-pxa/palm27x.h
@@ -12,12 +12,12 @@
#ifndef __INCLUDE_MACH_PALM27X__
#define __INCLUDE_MACH_PALM27X__
+#include <linux/gpio/machine.h>
+
#if defined(CONFIG_MMC_PXA) || defined(CONFIG_MMC_PXA_MODULE)
-extern void __init palm27x_mmc_init(int detect, int ro, int power,
- int power_inverted);
+extern void __init palm27x_mmc_init(struct gpiod_lookup_table *gtable);
#else
-static inline void palm27x_mmc_init(int detect, int ro, int power,
- int power_inverted)
+static inline void palm27x_mmc_init(struct gpiod_lookup_table *gtable)
{}
#endif
diff --git a/arch/arm/mach-pxa/palmld.c b/arch/arm/mach-pxa/palmld.c
index 980f2847f5b5..bf2b0cfc86df 100644
--- a/arch/arm/mach-pxa/palmld.c
+++ b/arch/arm/mach-pxa/palmld.c
@@ -288,8 +288,20 @@ static struct platform_device palmld_ide_device = {
.id = -1,
};
+static struct gpiod_lookup_table palmld_ide_gpio_table = {
+ .dev_id = "pata_palmld",
+ .table = {
+ GPIO_LOOKUP("gpio-pxa", GPIO_NR_PALMLD_IDE_PWEN,
+ "power", GPIO_ACTIVE_HIGH),
+ GPIO_LOOKUP("gpio-pxa", GPIO_NR_PALMLD_IDE_RESET,
+ "reset", GPIO_ACTIVE_LOW),
+ { },
+ },
+};
+
static void __init palmld_ide_init(void)
{
+ gpiod_add_lookup_table(&palmld_ide_gpio_table);
platform_device_register(&palmld_ide_device);
}
#else
@@ -320,6 +332,19 @@ static void __init palmld_map_io(void)
iotable_init(palmld_io_desc, ARRAY_SIZE(palmld_io_desc));
}
+static struct gpiod_lookup_table palmld_mci_gpio_table = {
+ .dev_id = "pxa2xx-mci.0",
+ .table = {
+ GPIO_LOOKUP("gpio-pxa", GPIO_NR_PALMLD_SD_DETECT_N,
+ "cd", GPIO_ACTIVE_LOW),
+ GPIO_LOOKUP("gpio-pxa", GPIO_NR_PALMLD_SD_READONLY,
+ "wp", GPIO_ACTIVE_LOW),
+ GPIO_LOOKUP("gpio-pxa", GPIO_NR_PALMLD_SD_POWER,
+ "power", GPIO_ACTIVE_HIGH),
+ { },
+ },
+};
+
static void __init palmld_init(void)
{
pxa2xx_mfp_config(ARRAY_AND_SIZE(palmld_pin_config));
@@ -327,8 +352,7 @@ static void __init palmld_init(void)
pxa_set_btuart_info(NULL);
pxa_set_stuart_info(NULL);
- palm27x_mmc_init(GPIO_NR_PALMLD_SD_DETECT_N, GPIO_NR_PALMLD_SD_READONLY,
- GPIO_NR_PALMLD_SD_POWER, 0);
+ palm27x_mmc_init(&palmld_mci_gpio_table);
palm27x_pm_init(PALMLD_STR_BASE);
palm27x_lcd_init(-1, &palm_320x480_lcd_mode);
palm27x_irda_init(GPIO_NR_PALMLD_IR_DISABLE);
diff --git a/arch/arm/mach-pxa/palmt5.c b/arch/arm/mach-pxa/palmt5.c
index 876144aa3564..8811f11f670e 100644
--- a/arch/arm/mach-pxa/palmt5.c
+++ b/arch/arm/mach-pxa/palmt5.c
@@ -182,6 +182,19 @@ static void __init palmt5_reserve(void)
memblock_reserve(0xa0200000, 0x1000);
}
+static struct gpiod_lookup_table palmt5_mci_gpio_table = {
+ .dev_id = "pxa2xx-mci.0",
+ .table = {
+ GPIO_LOOKUP("gpio-pxa", GPIO_NR_PALMT5_SD_DETECT_N,
+ "cd", GPIO_ACTIVE_LOW),
+ GPIO_LOOKUP("gpio-pxa", GPIO_NR_PALMT5_SD_READONLY,
+ "wp", GPIO_ACTIVE_LOW),
+ GPIO_LOOKUP("gpio-pxa", GPIO_NR_PALMT5_SD_POWER,
+ "power", GPIO_ACTIVE_HIGH),
+ { },
+ },
+};
+
static void __init palmt5_init(void)
{
pxa2xx_mfp_config(ARRAY_AND_SIZE(palmt5_pin_config));
@@ -189,8 +202,7 @@ static void __init palmt5_init(void)
pxa_set_btuart_info(NULL);
pxa_set_stuart_info(NULL);
- palm27x_mmc_init(GPIO_NR_PALMT5_SD_DETECT_N, GPIO_NR_PALMT5_SD_READONLY,
- GPIO_NR_PALMT5_SD_POWER, 0);
+ palm27x_mmc_init(&palmt5_mci_gpio_table);
palm27x_pm_init(PALMT5_STR_BASE);
palm27x_lcd_init(-1, &palm_320x480_lcd_mode);
palm27x_udc_init(GPIO_NR_PALMT5_USB_DETECT_N,
diff --git a/arch/arm/mach-pxa/palmtc.c b/arch/arm/mach-pxa/palmtc.c
index 18946594a7c8..7ce4fc287115 100644
--- a/arch/arm/mach-pxa/palmtc.c
+++ b/arch/arm/mach-pxa/palmtc.c
@@ -20,7 +20,7 @@
#include <linux/input.h>
#include <linux/pwm.h>
#include <linux/pwm_backlight.h>
-#include <linux/gpio.h>
+#include <linux/gpio/machine.h>
#include <linux/input/matrix_keypad.h>
#include <linux/ucb1400.h>
#include <linux/power_supply.h>
@@ -120,14 +120,25 @@ static unsigned long palmtc_pin_config[] __initdata = {
#if defined(CONFIG_MMC_PXA) || defined(CONFIG_MMC_PXA_MODULE)
static struct pxamci_platform_data palmtc_mci_platform_data = {
.ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34,
- .gpio_power = GPIO_NR_PALMTC_SD_POWER,
- .gpio_card_ro = GPIO_NR_PALMTC_SD_READONLY,
- .gpio_card_detect = GPIO_NR_PALMTC_SD_DETECT_N,
.detect_delay_ms = 200,
};
+static struct gpiod_lookup_table palmtc_mci_gpio_table = {
+ .dev_id = "pxa2xx-mci.0",
+ .table = {
+ GPIO_LOOKUP("gpio-pxa", GPIO_NR_PALMTC_SD_DETECT_N,
+ "cd", GPIO_ACTIVE_LOW),
+ GPIO_LOOKUP("gpio-pxa", GPIO_NR_PALMTC_SD_READONLY,
+ "wp", GPIO_ACTIVE_LOW),
+ GPIO_LOOKUP("gpio-pxa", GPIO_NR_PALMTC_SD_POWER,
+ "power", GPIO_ACTIVE_HIGH),
+ { },
+ },
+};
+
static void __init palmtc_mmc_init(void)
{
+ gpiod_add_lookup_table(&palmtc_mci_gpio_table);
pxa_set_mci_info(&palmtc_mci_platform_data);
}
#else
diff --git a/arch/arm/mach-pxa/palmte2.c b/arch/arm/mach-pxa/palmte2.c
index 36b46141a28b..e830005af8d0 100644
--- a/arch/arm/mach-pxa/palmte2.c
+++ b/arch/arm/mach-pxa/palmte2.c
@@ -19,6 +19,7 @@
#include <linux/delay.h>
#include <linux/irq.h>
#include <linux/gpio_keys.h>
+#include <linux/gpio/machine.h>
#include <linux/input.h>
#include <linux/pda_power.h>
#include <linux/pwm.h>
@@ -101,9 +102,19 @@ static unsigned long palmte2_pin_config[] __initdata = {
******************************************************************************/
static struct pxamci_platform_data palmte2_mci_platform_data = {
.ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34,
- .gpio_card_detect = GPIO_NR_PALMTE2_SD_DETECT_N,
- .gpio_card_ro = GPIO_NR_PALMTE2_SD_READONLY,
- .gpio_power = GPIO_NR_PALMTE2_SD_POWER,
+};
+
+static struct gpiod_lookup_table palmte2_mci_gpio_table = {
+ .dev_id = "pxa2xx-mci.0",
+ .table = {
+ GPIO_LOOKUP("gpio-pxa", GPIO_NR_PALMTE2_SD_DETECT_N,
+ "cd", GPIO_ACTIVE_LOW),
+ GPIO_LOOKUP("gpio-pxa", GPIO_NR_PALMTE2_SD_READONLY,
+ "wp", GPIO_ACTIVE_LOW),
+ GPIO_LOOKUP("gpio-pxa", GPIO_NR_PALMTE2_SD_POWER,
+ "power", GPIO_ACTIVE_HIGH),
+ { },
+ },
};
#if defined(CONFIG_KEYBOARD_GPIO) || defined(CONFIG_KEYBOARD_GPIO_MODULE)
@@ -354,6 +365,7 @@ static void __init palmte2_init(void)
pxa_set_stuart_info(NULL);
pxa_set_fb_info(NULL, &palmte2_lcd_screen);
+ gpiod_add_lookup_table(&palmte2_mci_gpio_table);
pxa_set_mci_info(&palmte2_mci_platform_data);
palmte2_udc_init();
pxa_set_ac97_info(&palmte2_ac97_pdata);
diff --git a/arch/arm/mach-pxa/palmtreo.c b/arch/arm/mach-pxa/palmtreo.c
index b66b0b11d717..70f1a8a3aa94 100644
--- a/arch/arm/mach-pxa/palmtreo.c
+++ b/arch/arm/mach-pxa/palmtreo.c
@@ -480,23 +480,46 @@ void __init treo680_gpio_init(void)
gpio_free(GPIO_NR_TREO680_LCD_EN_N);
}
+static struct gpiod_lookup_table treo680_mci_gpio_table = {
+ .dev_id = "pxa2xx-mci.0",
+ .table = {
+ GPIO_LOOKUP("gpio-pxa", GPIO_NR_TREO_SD_DETECT_N,
+ "cd", GPIO_ACTIVE_LOW),
+ GPIO_LOOKUP("gpio-pxa", GPIO_NR_TREO680_SD_READONLY,
+ "wp", GPIO_ACTIVE_LOW),
+ GPIO_LOOKUP("gpio-pxa", GPIO_NR_TREO680_SD_POWER,
+ "power", GPIO_ACTIVE_HIGH),
+ { },
+ },
+};
+
static void __init treo680_init(void)
{
pxa2xx_mfp_config(ARRAY_AND_SIZE(treo680_pin_config));
palmphone_common_init();
treo680_gpio_init();
- palm27x_mmc_init(GPIO_NR_TREO_SD_DETECT_N, GPIO_NR_TREO680_SD_READONLY,
- GPIO_NR_TREO680_SD_POWER, 0);
+ palm27x_mmc_init(&treo680_mci_gpio_table);
}
#endif
#ifdef CONFIG_MACH_CENTRO
+
+static struct gpiod_lookup_table centro685_mci_gpio_table = {
+ .dev_id = "pxa2xx-mci.0",
+ .table = {
+ GPIO_LOOKUP("gpio-pxa", GPIO_NR_TREO_SD_DETECT_N,
+ "cd", GPIO_ACTIVE_LOW),
+ GPIO_LOOKUP("gpio-pxa", GPIO_NR_CENTRO_SD_POWER,
+ "power", GPIO_ACTIVE_LOW),
+ { },
+ },
+};
+
static void __init centro_init(void)
{
pxa2xx_mfp_config(ARRAY_AND_SIZE(centro685_pin_config));
palmphone_common_init();
- palm27x_mmc_init(GPIO_NR_TREO_SD_DETECT_N, -1,
- GPIO_NR_CENTRO_SD_POWER, 1);
+ palm27x_mmc_init(&centro685_mci_gpio_table);
}
#endif
diff --git a/arch/arm/mach-pxa/palmtx.c b/arch/arm/mach-pxa/palmtx.c
index 1d06a8e91d8f..ef71bf2abb47 100644
--- a/arch/arm/mach-pxa/palmtx.c
+++ b/arch/arm/mach-pxa/palmtx.c
@@ -337,6 +337,19 @@ static void __init palmtx_map_io(void)
iotable_init(palmtx_io_desc, ARRAY_SIZE(palmtx_io_desc));
}
+static struct gpiod_lookup_table palmtx_mci_gpio_table = {
+ .dev_id = "pxa2xx-mci.0",
+ .table = {
+ GPIO_LOOKUP("gpio-pxa", GPIO_NR_PALMTX_SD_DETECT_N,
+ "cd", GPIO_ACTIVE_LOW),
+ GPIO_LOOKUP("gpio-pxa", GPIO_NR_PALMTX_SD_READONLY,
+ "wp", GPIO_ACTIVE_LOW),
+ GPIO_LOOKUP("gpio-pxa", GPIO_NR_PALMTX_SD_POWER,
+ "power", GPIO_ACTIVE_HIGH),
+ { },
+ },
+};
+
static void __init palmtx_init(void)
{
pxa2xx_mfp_config(ARRAY_AND_SIZE(palmtx_pin_config));
@@ -344,8 +357,7 @@ static void __init palmtx_init(void)
pxa_set_btuart_info(NULL);
pxa_set_stuart_info(NULL);
- palm27x_mmc_init(GPIO_NR_PALMTX_SD_DETECT_N, GPIO_NR_PALMTX_SD_READONLY,
- GPIO_NR_PALMTX_SD_POWER, 0);
+ palm27x_mmc_init(&palmtx_mci_gpio_table);
palm27x_pm_init(PALMTX_STR_BASE);
palm27x_lcd_init(-1, &palm_320x480_lcd_mode);
palm27x_udc_init(GPIO_NR_PALMTX_USB_DETECT_N,
diff --git a/arch/arm/mach-pxa/palmz72.c b/arch/arm/mach-pxa/palmz72.c
index 4d475f6f4a77..ea1c7b2ed8d4 100644
--- a/arch/arm/mach-pxa/palmz72.c
+++ b/arch/arm/mach-pxa/palmz72.c
@@ -386,6 +386,19 @@ static void __init palmz72_camera_init(void)
static inline void palmz72_camera_init(void) {}
#endif
+static struct gpiod_lookup_table palmz72_mci_gpio_table = {
+ .dev_id = "pxa2xx-mci.0",
+ .table = {
+ GPIO_LOOKUP("gpio-pxa", GPIO_NR_PALMZ72_SD_DETECT_N,
+ "cd", GPIO_ACTIVE_LOW),
+ GPIO_LOOKUP("gpio-pxa", GPIO_NR_PALMZ72_SD_RO,
+ "wp", GPIO_ACTIVE_LOW),
+ GPIO_LOOKUP("gpio-pxa", GPIO_NR_PALMZ72_SD_POWER_N,
+ "power", GPIO_ACTIVE_LOW),
+ { },
+ },
+};
+
/******************************************************************************
* Machine init
******************************************************************************/
@@ -396,8 +409,7 @@ static void __init palmz72_init(void)
pxa_set_btuart_info(NULL);
pxa_set_stuart_info(NULL);
- palm27x_mmc_init(GPIO_NR_PALMZ72_SD_DETECT_N, GPIO_NR_PALMZ72_SD_RO,
- GPIO_NR_PALMZ72_SD_POWER_N, 1);
+ palm27x_mmc_init(&palmz72_mci_gpio_table);
palm27x_lcd_init(-1, &palm_320x320_lcd_mode);
palm27x_udc_init(GPIO_NR_PALMZ72_USB_DETECT_N,
GPIO_NR_PALMZ72_USB_PULLUP, 0);
diff --git a/arch/arm/mach-pxa/pcm990-baseboard.c b/arch/arm/mach-pxa/pcm990-baseboard.c
index 973568d4b9ec..be19e3a4eacc 100644
--- a/arch/arm/mach-pxa/pcm990-baseboard.c
+++ b/arch/arm/mach-pxa/pcm990-baseboard.c
@@ -370,9 +370,6 @@ static struct pxamci_platform_data pcm990_mci_platform_data = {
.init = pcm990_mci_init,
.setpower = pcm990_mci_setpower,
.exit = pcm990_mci_exit,
- .gpio_card_detect = -1,
- .gpio_card_ro = -1,
- .gpio_power = -1,
};
static struct pxaohci_platform_data pcm990_ohci_platform_data = {
diff --git a/arch/arm/mach-pxa/poodle.c b/arch/arm/mach-pxa/poodle.c
index 1adde1251e2b..c2a43d4cfd3e 100644
--- a/arch/arm/mach-pxa/poodle.c
+++ b/arch/arm/mach-pxa/poodle.c
@@ -23,6 +23,7 @@
#include <linux/delay.h>
#include <linux/mtd/physmap.h>
#include <linux/gpio.h>
+#include <linux/gpio/machine.h>
#include <linux/i2c.h>
#include <linux/platform_data/i2c-pxa.h>
#include <linux/regulator/machine.h>
@@ -288,11 +289,18 @@ static struct pxamci_platform_data poodle_mci_platform_data = {
.init = poodle_mci_init,
.setpower = poodle_mci_setpower,
.exit = poodle_mci_exit,
- .gpio_card_detect = POODLE_GPIO_nSD_DETECT,
- .gpio_card_ro = POODLE_GPIO_nSD_WP,
- .gpio_power = -1,
};
+static struct gpiod_lookup_table poodle_mci_gpio_table = {
+ .dev_id = "pxa2xx-mci.0",
+ .table = {
+ GPIO_LOOKUP("gpio-pxa", POODLE_GPIO_nSD_DETECT,
+ "cd", GPIO_ACTIVE_LOW),
+ GPIO_LOOKUP("gpio-pxa", POODLE_GPIO_nSD_WP,
+ "wp", GPIO_ACTIVE_LOW),
+ { },
+ },
+};
/*
* Irda
@@ -439,6 +447,7 @@ static void __init poodle_init(void)
pxa_set_fb_info(&poodle_locomo_device.dev, &poodle_fb_info);
pxa_set_udc_info(&udc_info);
+ gpiod_add_lookup_table(&poodle_mci_gpio_table);
pxa_set_mci_info(&poodle_mci_platform_data);
pxa_set_ficp_info(&poodle_ficp_platform_data);
pxa_set_i2c_info(NULL);
diff --git a/arch/arm/mach-pxa/raumfeld.c b/arch/arm/mach-pxa/raumfeld.c
index bd3c23ad6ce6..e1db072756f2 100644
--- a/arch/arm/mach-pxa/raumfeld.c
+++ b/arch/arm/mach-pxa/raumfeld.c
@@ -749,9 +749,6 @@ static struct pxamci_platform_data raumfeld_mci_platform_data = {
.init = raumfeld_mci_init,
.exit = raumfeld_mci_exit,
.detect_delay_ms = 200,
- .gpio_card_detect = -1,
- .gpio_card_ro = -1,
- .gpio_power = -1,
};
/*
diff --git a/arch/arm/mach-pxa/spitz.c b/arch/arm/mach-pxa/spitz.c
index 5d50025492b7..306818e2cf54 100644
--- a/arch/arm/mach-pxa/spitz.c
+++ b/arch/arm/mach-pxa/spitz.c
@@ -18,6 +18,7 @@
#include <linux/delay.h>
#include <linux/gpio_keys.h>
#include <linux/gpio.h>
+#include <linux/gpio/machine.h>
#include <linux/leds.h>
#include <linux/i2c.h>
#include <linux/platform_data/i2c-pxa.h>
@@ -615,13 +616,22 @@ static struct pxamci_platform_data spitz_mci_platform_data = {
.detect_delay_ms = 250,
.ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34,
.setpower = spitz_mci_setpower,
- .gpio_card_detect = SPITZ_GPIO_nSD_DETECT,
- .gpio_card_ro = SPITZ_GPIO_nSD_WP,
- .gpio_power = -1,
+};
+
+static struct gpiod_lookup_table spitz_mci_gpio_table = {
+ .dev_id = "pxa2xx-mci.0",
+ .table = {
+ GPIO_LOOKUP("gpio-pxa", SPITZ_GPIO_nSD_DETECT,
+ "cd", GPIO_ACTIVE_LOW),
+ GPIO_LOOKUP("gpio-pxa", SPITZ_GPIO_nSD_WP,
+ "wp", GPIO_ACTIVE_LOW),
+ { },
+ },
};
static void __init spitz_mmc_init(void)
{
+ gpiod_add_lookup_table(&spitz_mci_gpio_table);
pxa_set_mci_info(&spitz_mci_platform_data);
}
#else
diff --git a/arch/arm/mach-pxa/stargate2.c b/arch/arm/mach-pxa/stargate2.c
index bbea5fa9a140..e0d6c872270a 100644
--- a/arch/arm/mach-pxa/stargate2.c
+++ b/arch/arm/mach-pxa/stargate2.c
@@ -436,9 +436,6 @@ static int imote2_mci_get_ro(struct device *dev)
static struct pxamci_platform_data imote2_mci_platform_data = {
.ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34, /* default anyway */
.get_ro = imote2_mci_get_ro,
- .gpio_card_detect = -1,
- .gpio_card_ro = -1,
- .gpio_power = -1,
};
static struct gpio_led imote2_led_pins[] = {
diff --git a/arch/arm/mach-pxa/tosa.c b/arch/arm/mach-pxa/tosa.c
index cb5cd8e78c94..e8a93c088c35 100644
--- a/arch/arm/mach-pxa/tosa.c
+++ b/arch/arm/mach-pxa/tosa.c
@@ -31,6 +31,7 @@
#include <linux/gpio_keys.h>
#include <linux/input.h>
#include <linux/gpio.h>
+#include <linux/gpio/machine.h>
#include <linux/power/gpio-charger.h>
#include <linux/spi/spi.h>
#include <linux/spi/pxa2xx_spi.h>
@@ -291,9 +292,19 @@ static struct pxamci_platform_data tosa_mci_platform_data = {
.ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34,
.init = tosa_mci_init,
.exit = tosa_mci_exit,
- .gpio_card_detect = TOSA_GPIO_nSD_DETECT,
- .gpio_card_ro = TOSA_GPIO_SD_WP,
- .gpio_power = TOSA_GPIO_PWR_ON,
+};
+
+static struct gpiod_lookup_table tosa_mci_gpio_table = {
+ .dev_id = "pxa2xx-mci.0",
+ .table = {
+ GPIO_LOOKUP("gpio-pxa", TOSA_GPIO_nSD_DETECT,
+ "cd", GPIO_ACTIVE_LOW),
+ GPIO_LOOKUP("gpio-pxa", TOSA_GPIO_SD_WP,
+ "wp", GPIO_ACTIVE_LOW),
+ GPIO_LOOKUP("gpio-pxa", TOSA_GPIO_PWR_ON,
+ "power", GPIO_ACTIVE_HIGH),
+ { },
+ },
};
/*
@@ -908,6 +919,7 @@ static void __init tosa_init(void)
/* enable batt_fault */
PMCR = 0x01;
+ gpiod_add_lookup_table(&tosa_mci_gpio_table);
pxa_set_mci_info(&tosa_mci_platform_data);
pxa_set_ficp_info(&tosa_ficp_platform_data);
pxa_set_i2c_info(NULL);
diff --git a/arch/arm/mach-pxa/trizeps4.c b/arch/arm/mach-pxa/trizeps4.c
index 55b8c501b6fc..c76f1daecfc9 100644
--- a/arch/arm/mach-pxa/trizeps4.c
+++ b/arch/arm/mach-pxa/trizeps4.c
@@ -355,9 +355,6 @@ static struct pxamci_platform_data trizeps4_mci_platform_data = {
.exit = trizeps4_mci_exit,
.get_ro = NULL, /* write-protection not supported */
.setpower = NULL, /* power-switching not supported */
- .gpio_card_detect = -1,
- .gpio_card_ro = -1,
- .gpio_power = -1,
};
/****************************************************************************
diff --git a/arch/arm/mach-pxa/vpac270.c b/arch/arm/mach-pxa/vpac270.c
index f65dfb6e20e2..829284406fa3 100644
--- a/arch/arm/mach-pxa/vpac270.c
+++ b/arch/arm/mach-pxa/vpac270.c
@@ -17,6 +17,7 @@
#include <linux/input.h>
#include <linux/leds.h>
#include <linux/gpio.h>
+#include <linux/gpio/machine.h>
#include <linux/usb/gpio_vbus.h>
#include <linux/mtd/mtd.h>
#include <linux/mtd/partitions.h>
@@ -240,14 +241,23 @@ static void __init vpac270_onenand_init(void) {}
#if defined(CONFIG_MMC_PXA) || defined(CONFIG_MMC_PXA_MODULE)
static struct pxamci_platform_data vpac270_mci_platform_data = {
.ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34,
- .gpio_power = -1,
- .gpio_card_detect = GPIO53_VPAC270_SD_DETECT_N,
- .gpio_card_ro = GPIO52_VPAC270_SD_READONLY,
.detect_delay_ms = 200,
};
+static struct gpiod_lookup_table vpac270_mci_gpio_table = {
+ .dev_id = "pxa2xx-mci.0",
+ .table = {
+ GPIO_LOOKUP("gpio-pxa", GPIO53_VPAC270_SD_DETECT_N,
+ "cd", GPIO_ACTIVE_LOW),
+ GPIO_LOOKUP("gpio-pxa", GPIO52_VPAC270_SD_READONLY,
+ "wp", GPIO_ACTIVE_LOW),
+ { },
+ },
+};
+
static void __init vpac270_mmc_init(void)
{
+ gpiod_add_lookup_table(&vpac270_mci_gpio_table);
pxa_set_mci_info(&vpac270_mci_platform_data);
}
#else
diff --git a/arch/arm/mach-pxa/z2.c b/arch/arm/mach-pxa/z2.c
index 6fffcfc4621e..e2353e75bb28 100644
--- a/arch/arm/mach-pxa/z2.c
+++ b/arch/arm/mach-pxa/z2.c
@@ -27,6 +27,7 @@
#include <linux/power_supply.h>
#include <linux/mtd/physmap.h>
#include <linux/gpio.h>
+#include <linux/gpio/machine.h>
#include <linux/gpio_keys.h>
#include <linux/delay.h>
#include <linux/regulator/machine.h>
@@ -290,14 +291,21 @@ static inline void z2_lcd_init(void) {}
#if defined(CONFIG_MMC_PXA) || defined(CONFIG_MMC_PXA_MODULE)
static struct pxamci_platform_data z2_mci_platform_data = {
.ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34,
- .gpio_card_detect = GPIO96_ZIPITZ2_SD_DETECT,
- .gpio_power = -1,
- .gpio_card_ro = -1,
.detect_delay_ms = 200,
};
+static struct gpiod_lookup_table z2_mci_gpio_table = {
+ .dev_id = "pxa2xx-mci.0",
+ .table = {
+ GPIO_LOOKUP("gpio-pxa", GPIO96_ZIPITZ2_SD_DETECT,
+ "cd", GPIO_ACTIVE_LOW),
+ { },
+ },
+};
+
static void __init z2_mmc_init(void)
{
+ gpiod_add_lookup_table(&z2_mci_gpio_table);
pxa_set_mci_info(&z2_mci_platform_data);
}
#else
diff --git a/arch/arm/mach-pxa/zeus.c b/arch/arm/mach-pxa/zeus.c
index d53ea12fc766..897ef59fbe0c 100644
--- a/arch/arm/mach-pxa/zeus.c
+++ b/arch/arm/mach-pxa/zeus.c
@@ -663,10 +663,18 @@ static struct pxafb_mach_info zeus_fb_info = {
static struct pxamci_platform_data zeus_mci_platform_data = {
.ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34,
.detect_delay_ms = 250,
- .gpio_card_detect = ZEUS_MMC_CD_GPIO,
- .gpio_card_ro = ZEUS_MMC_WP_GPIO,
.gpio_card_ro_invert = 1,
- .gpio_power = -1
+};
+
+static struct gpiod_lookup_table zeus_mci_gpio_table = {
+ .dev_id = "pxa2xx-mci.0",
+ .table = {
+ GPIO_LOOKUP("gpio-pxa", ZEUS_MMC_CD_GPIO,
+ "cd", GPIO_ACTIVE_LOW),
+ GPIO_LOOKUP("gpio-pxa", ZEUS_MMC_WP_GPIO,
+ "wp", GPIO_ACTIVE_HIGH),
+ { },
+ },
};
/*
@@ -883,6 +891,7 @@ static void __init zeus_init(void)
else
pxa_set_fb_info(NULL, &zeus_fb_info);
+ gpiod_add_lookup_table(&zeus_mci_gpio_table);
pxa_set_mci_info(&zeus_mci_platform_data);
pxa_set_udc_info(&zeus_udc_info);
pxa_set_ac97_info(&zeus_ac97_info);
diff --git a/arch/arm/mach-pxa/zylonite.c b/arch/arm/mach-pxa/zylonite.c
index 52e70a5c1281..1f88d7bae849 100644
--- a/arch/arm/mach-pxa/zylonite.c
+++ b/arch/arm/mach-pxa/zylonite.c
@@ -19,7 +19,7 @@
#include <linux/leds.h>
#include <linux/init.h>
#include <linux/platform_device.h>
-#include <linux/gpio.h>
+#include <linux/gpio/machine.h>
#include <linux/pwm.h>
#include <linux/pwm_backlight.h>
#include <linux/smc91x.h>
@@ -227,33 +227,68 @@ static inline void zylonite_init_lcd(void) {}
static struct pxamci_platform_data zylonite_mci_platform_data = {
.detect_delay_ms= 200,
.ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34,
- .gpio_card_detect = EXT_GPIO(0),
- .gpio_card_ro = EXT_GPIO(2),
- .gpio_power = -1,
+};
+
+#define PCA9539A_MCI_CD 0
+#define PCA9539A_MCI1_CD 1
+#define PCA9539A_MCI_WP 2
+#define PCA9539A_MCI1_WP 3
+#define PCA9539A_MCI3_CD 30
+#define PCA9539A_MCI3_WP 31
+
+static struct gpiod_lookup_table zylonite_mci_gpio_table = {
+ .dev_id = "pxa2xx-mci.0",
+ .table = {
+ GPIO_LOOKUP("i2c-pca9539-a", PCA9539A_MCI_CD,
+ "cd", GPIO_ACTIVE_LOW),
+ GPIO_LOOKUP("i2c-pca9539-a", PCA9539A_MCI_WP,
+ "wp", GPIO_ACTIVE_LOW),
+ { },
+ },
};
static struct pxamci_platform_data zylonite_mci2_platform_data = {
.detect_delay_ms= 200,
.ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34,
- .gpio_card_detect = EXT_GPIO(1),
- .gpio_card_ro = EXT_GPIO(3),
- .gpio_power = -1,
+};
+
+static struct gpiod_lookup_table zylonite_mci2_gpio_table = {
+ .dev_id = "pxa2xx-mci.1",
+ .table = {
+ GPIO_LOOKUP("i2c-pca9539-a", PCA9539A_MCI1_CD,
+ "cd", GPIO_ACTIVE_LOW),
+ GPIO_LOOKUP("i2c-pca9539-a", PCA9539A_MCI1_WP,
+ "wp", GPIO_ACTIVE_LOW),
+ { },
+ },
};
static struct pxamci_platform_data zylonite_mci3_platform_data = {
.detect_delay_ms= 200,
.ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34,
- .gpio_card_detect = EXT_GPIO(30),
- .gpio_card_ro = EXT_GPIO(31),
- .gpio_power = -1,
+};
+
+static struct gpiod_lookup_table zylonite_mci3_gpio_table = {
+ .dev_id = "pxa2xx-mci.2",
+ .table = {
+ GPIO_LOOKUP("i2c-pca9539-a", PCA9539A_MCI3_CD,
+ "cd", GPIO_ACTIVE_LOW),
+ GPIO_LOOKUP("i2c-pca9539-a", PCA9539A_MCI3_WP,
+ "wp", GPIO_ACTIVE_LOW),
+ { },
+ },
};
static void __init zylonite_init_mmc(void)
{
+ gpiod_add_lookup_table(&zylonite_mci_gpio_table);
pxa_set_mci_info(&zylonite_mci_platform_data);
+ gpiod_add_lookup_table(&zylonite_mci2_gpio_table);
pxa3xx_set_mci2_info(&zylonite_mci2_platform_data);
- if (cpu_is_pxa310())
+ if (cpu_is_pxa310()) {
+ gpiod_add_lookup_table(&zylonite_mci3_gpio_table);
pxa3xx_set_mci3_info(&zylonite_mci3_platform_data);
+ }
}
#else
static inline void zylonite_init_mmc(void) {}
diff --git a/arch/arm/mach-pxa/zylonite_pxa300.c b/arch/arm/mach-pxa/zylonite_pxa300.c
index 0ff4e218080f..8f930a9dd0fd 100644
--- a/arch/arm/mach-pxa/zylonite_pxa300.c
+++ b/arch/arm/mach-pxa/zylonite_pxa300.c
@@ -230,11 +230,13 @@ static struct pca953x_platform_data gpio_exp[] = {
static struct i2c_board_info zylonite_i2c_board_info[] = {
{
.type = "pca9539",
+ .dev_name = "pca9539-a",
.addr = 0x74,
.platform_data = &gpio_exp[0],
.irq = PXA_GPIO_TO_IRQ(18),
}, {
.type = "pca9539",
+ .dev_name = "pca9539-b",
.addr = 0x75,
.platform_data = &gpio_exp[1],
.irq = PXA_GPIO_TO_IRQ(19),
diff --git a/arch/arm/mach-s3c24xx/mach-at2440evb.c b/arch/arm/mach-s3c24xx/mach-at2440evb.c
index 68a4fa94257a..58c5ef3cf1d7 100644
--- a/arch/arm/mach-s3c24xx/mach-at2440evb.c
+++ b/arch/arm/mach-s3c24xx/mach-at2440evb.c
@@ -9,7 +9,7 @@
#include <linux/kernel.h>
#include <linux/types.h>
-#include <linux/gpio.h>
+#include <linux/gpio/machine.h>
#include <linux/interrupt.h>
#include <linux/list.h>
#include <linux/timer.h>
@@ -136,7 +136,16 @@ static struct platform_device at2440evb_device_eth = {
};
static struct s3c24xx_mci_pdata at2440evb_mci_pdata __initdata = {
- .gpio_detect = S3C2410_GPG(10),
+ /* Intentionally left blank */
+};
+
+static struct gpiod_lookup_table at2440evb_mci_gpio_table = {
+ .dev_id = "s3c2410-sdi",
+ .table = {
+ /* Card detect S3C2410_GPG(10) */
+ GPIO_LOOKUP("GPG", 10, "cd", GPIO_ACTIVE_LOW),
+ { },
+ },
};
/* 7" LCD panel */
@@ -200,6 +209,7 @@ static void __init at2440evb_init_time(void)
static void __init at2440evb_init(void)
{
s3c24xx_fb_set_platdata(&at2440evb_fb_info);
+ gpiod_add_lookup_table(&at2440evb_mci_gpio_table);
s3c24xx_mci_set_platdata(&at2440evb_mci_pdata);
s3c_nand_set_platdata(&at2440evb_nand_info);
s3c_i2c0_set_platdata(NULL);
diff --git a/arch/arm/mach-s3c24xx/mach-h1940.c b/arch/arm/mach-s3c24xx/mach-h1940.c
index e064c73a57d3..74d6b68e91c7 100644
--- a/arch/arm/mach-s3c24xx/mach-h1940.c
+++ b/arch/arm/mach-s3c24xx/mach-h1940.c
@@ -18,6 +18,7 @@
#include <linux/platform_device.h>
#include <linux/io.h>
#include <linux/gpio.h>
+#include <linux/gpio/machine.h>
#include <linux/input.h>
#include <linux/gpio_keys.h>
#include <linux/pwm.h>
@@ -459,12 +460,21 @@ static void h1940_set_mmc_power(unsigned char power_mode, unsigned short vdd)
}
static struct s3c24xx_mci_pdata h1940_mmc_cfg __initdata = {
- .gpio_detect = S3C2410_GPF(5),
- .gpio_wprotect = S3C2410_GPH(8),
.set_power = h1940_set_mmc_power,
.ocr_avail = MMC_VDD_32_33,
};
+static struct gpiod_lookup_table h1940_mmc_gpio_table = {
+ .dev_id = "s3c2410-sdi",
+ .table = {
+ /* Card detect S3C2410_GPF(5) */
+ GPIO_LOOKUP("GPF", 5, "cd", GPIO_ACTIVE_LOW),
+ /* Write protect S3C2410_GPH(8) */
+ GPIO_LOOKUP("GPH", 8, "wp", GPIO_ACTIVE_LOW),
+ { },
+ },
+};
+
static struct pwm_lookup h1940_pwm_lookup[] = {
PWM_LOOKUP("samsung-pwm", 0, "pwm-backlight", NULL, 36296,
PWM_POLARITY_NORMAL),
@@ -680,6 +690,7 @@ static void __init h1940_init(void)
u32 tmp;
s3c24xx_fb_set_platdata(&h1940_fb_info);
+ gpiod_add_lookup_table(&h1940_mmc_gpio_table);
s3c24xx_mci_set_platdata(&h1940_mmc_cfg);
s3c24xx_udc_set_platdata(&h1940_udc_cfg);
s3c24xx_ts_set_platdata(&h1940_ts_cfg);
diff --git a/arch/arm/mach-s3c24xx/mach-mini2440.c b/arch/arm/mach-s3c24xx/mach-mini2440.c
index 50d67d760efd..9035f868fb34 100644
--- a/arch/arm/mach-s3c24xx/mach-mini2440.c
+++ b/arch/arm/mach-s3c24xx/mach-mini2440.c
@@ -15,6 +15,7 @@
#include <linux/timer.h>
#include <linux/init.h>
#include <linux/gpio.h>
+#include <linux/gpio/machine.h>
#include <linux/input.h>
#include <linux/io.h>
#include <linux/serial_core.h>
@@ -234,13 +235,22 @@ static struct s3c2410fb_mach_info mini2440_fb_info __initdata = {
/* MMC/SD */
static struct s3c24xx_mci_pdata mini2440_mmc_cfg __initdata = {
- .gpio_detect = S3C2410_GPG(8),
- .gpio_wprotect = S3C2410_GPH(8),
.wprotect_invert = 1,
.set_power = NULL,
.ocr_avail = MMC_VDD_32_33|MMC_VDD_33_34,
};
+static struct gpiod_lookup_table mini2440_mmc_gpio_table = {
+ .dev_id = "s3c2410-sdi",
+ .table = {
+ /* Card detect S3C2410_GPG(8) */
+ GPIO_LOOKUP("GPG", 8, "cd", GPIO_ACTIVE_LOW),
+ /* Write protect S3C2410_GPH(8) */
+ GPIO_LOOKUP("GPH", 8, "wp", GPIO_ACTIVE_HIGH),
+ { },
+ },
+};
+
/* NAND Flash on MINI2440 board */
static struct mtd_partition mini2440_default_nand_part[] __initdata = {
@@ -696,6 +706,7 @@ static void __init mini2440_init(void)
}
s3c24xx_udc_set_platdata(&mini2440_udc_cfg);
+ gpiod_add_lookup_table(&mini2440_mmc_gpio_table);
s3c24xx_mci_set_platdata(&mini2440_mmc_cfg);
s3c_nand_set_platdata(&mini2440_nand_info);
s3c_i2c0_set_platdata(NULL);
diff --git a/arch/arm/mach-s3c24xx/mach-n30.c b/arch/arm/mach-s3c24xx/mach-n30.c
index eec51fadb14a..d856f23939af 100644
--- a/arch/arm/mach-s3c24xx/mach-n30.c
+++ b/arch/arm/mach-s3c24xx/mach-n30.c
@@ -17,6 +17,7 @@
#include <linux/gpio_keys.h>
#include <linux/init.h>
#include <linux/gpio.h>
+#include <linux/gpio/machine.h>
#include <linux/input.h>
#include <linux/interrupt.h>
#include <linux/platform_device.h>
@@ -350,12 +351,21 @@ static void n30_sdi_set_power(unsigned char power_mode, unsigned short vdd)
}
static struct s3c24xx_mci_pdata n30_mci_cfg __initdata = {
- .gpio_detect = S3C2410_GPF(1),
- .gpio_wprotect = S3C2410_GPG(10),
.ocr_avail = MMC_VDD_32_33,
.set_power = n30_sdi_set_power,
};
+static struct gpiod_lookup_table n30_mci_gpio_table = {
+ .dev_id = "s3c2410-sdi",
+ .table = {
+ /* Card detect S3C2410_GPF(1) */
+ GPIO_LOOKUP("GPF", 1, "cd", GPIO_ACTIVE_LOW),
+ /* Write protect S3C2410_GPG(10) */
+ GPIO_LOOKUP("GPG", 10, "wp", GPIO_ACTIVE_LOW),
+ { },
+ },
+};
+
static struct platform_device *n30_devices[] __initdata = {
&s3c_device_lcd,
&s3c_device_wdt,
@@ -549,6 +559,7 @@ static void __init n30_init(void)
s3c24xx_fb_set_platdata(&n30_fb_info);
s3c24xx_udc_set_platdata(&n30_udc_cfg);
+ gpiod_add_lookup_table(&n30_mci_gpio_table);
s3c24xx_mci_set_platdata(&n30_mci_cfg);
s3c_i2c0_set_platdata(&n30_i2ccfg);
diff --git a/arch/arm/mach-s3c24xx/mach-rx1950.c b/arch/arm/mach-s3c24xx/mach-rx1950.c
index 7f5a18fa305b..29f9b345a531 100644
--- a/arch/arm/mach-s3c24xx/mach-rx1950.c
+++ b/arch/arm/mach-s3c24xx/mach-rx1950.c
@@ -14,6 +14,7 @@
#include <linux/timer.h>
#include <linux/init.h>
#include <linux/gpio.h>
+#include <linux/gpio/machine.h>
#include <linux/platform_device.h>
#include <linux/serial_core.h>
#include <linux/serial_s3c.h>
@@ -558,12 +559,21 @@ static void rx1950_set_mmc_power(unsigned char power_mode, unsigned short vdd)
}
static struct s3c24xx_mci_pdata rx1950_mmc_cfg __initdata = {
- .gpio_detect = S3C2410_GPF(5),
- .gpio_wprotect = S3C2410_GPH(8),
.set_power = rx1950_set_mmc_power,
.ocr_avail = MMC_VDD_32_33,
};
+static struct gpiod_lookup_table rx1950_mmc_gpio_table = {
+ .dev_id = "s3c2410-sdi",
+ .table = {
+ /* Card detect S3C2410_GPF(5) */
+ GPIO_LOOKUP("GPF", 5, "cd", GPIO_ACTIVE_LOW),
+ /* Write protect S3C2410_GPH(8) */
+ GPIO_LOOKUP("GPH", 8, "wp", GPIO_ACTIVE_LOW),
+ { },
+ },
+};
+
static struct mtd_partition rx1950_nand_part[] = {
[0] = {
.name = "Boot0",
@@ -762,6 +772,7 @@ static void __init rx1950_init_machine(void)
s3c24xx_fb_set_platdata(&rx1950_lcd_cfg);
s3c24xx_udc_set_platdata(&rx1950_udc_cfg);
s3c24xx_ts_set_platdata(&rx1950_ts_cfg);
+ gpiod_add_lookup_table(&rx1950_mmc_gpio_table);
s3c24xx_mci_set_platdata(&rx1950_mmc_cfg);
s3c_i2c0_set_platdata(NULL);
s3c_nand_set_platdata(&rx1950_nand_info);
diff --git a/arch/arm/mm/dma-mapping-nommu.c b/arch/arm/mm/dma-mapping-nommu.c
index 712416ecd8e6..f304b10e23a4 100644
--- a/arch/arm/mm/dma-mapping-nommu.c
+++ b/arch/arm/mm/dma-mapping-nommu.c
@@ -22,7 +22,7 @@
#include "dma.h"
/*
- * dma_direct_ops is used if
+ * The generic direct mapping code is used if
* - MMU/MPU is off
* - cpu is v7m w/o cache support
* - device is coherent
@@ -209,16 +209,9 @@ const struct dma_map_ops arm_nommu_dma_ops = {
};
EXPORT_SYMBOL(arm_nommu_dma_ops);
-static const struct dma_map_ops *arm_nommu_get_dma_map_ops(bool coherent)
-{
- return coherent ? &dma_direct_ops : &arm_nommu_dma_ops;
-}
-
void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
const struct iommu_ops *iommu, bool coherent)
{
- const struct dma_map_ops *dma_ops;
-
if (IS_ENABLED(CONFIG_CPU_V7M)) {
/*
* Cache support for v7m is optional, so can be treated as
@@ -234,7 +227,6 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
dev->archdata.dma_coherent = (get_cr() & CR_M) ? coherent : true;
}
- dma_ops = arm_nommu_get_dma_map_ops(dev->archdata.dma_coherent);
-
- set_dma_ops(dev, dma_ops);
+ if (!dev->archdata.dma_coherent)
+ set_dma_ops(dev, &arm_nommu_dma_ops);
}
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 78de138aa66d..f1e2922e447c 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -179,11 +179,6 @@ static void arm_dma_sync_single_for_device(struct device *dev,
__dma_page_cpu_to_dev(page, offset, size, dir);
}
-static int arm_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
- return dma_addr == ARM_MAPPING_ERROR;
-}
-
const struct dma_map_ops arm_dma_ops = {
.alloc = arm_dma_alloc,
.free = arm_dma_free,
@@ -197,7 +192,6 @@ const struct dma_map_ops arm_dma_ops = {
.sync_single_for_device = arm_dma_sync_single_for_device,
.sync_sg_for_cpu = arm_dma_sync_sg_for_cpu,
.sync_sg_for_device = arm_dma_sync_sg_for_device,
- .mapping_error = arm_dma_mapping_error,
.dma_supported = arm_dma_supported,
};
EXPORT_SYMBOL(arm_dma_ops);
@@ -217,7 +211,6 @@ const struct dma_map_ops arm_coherent_dma_ops = {
.get_sgtable = arm_dma_get_sgtable,
.map_page = arm_coherent_dma_map_page,
.map_sg = arm_dma_map_sg,
- .mapping_error = arm_dma_mapping_error,
.dma_supported = arm_dma_supported,
};
EXPORT_SYMBOL(arm_coherent_dma_ops);
@@ -774,7 +767,7 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
gfp &= ~(__GFP_COMP);
args.gfp = gfp;
- *handle = ARM_MAPPING_ERROR;
+ *handle = DMA_MAPPING_ERROR;
allowblock = gfpflags_allow_blocking(gfp);
cma = allowblock ? dev_get_cma_area(dev) : false;
@@ -1217,7 +1210,7 @@ static inline dma_addr_t __alloc_iova(struct dma_iommu_mapping *mapping,
if (i == mapping->nr_bitmaps) {
if (extend_iommu_mapping(mapping)) {
spin_unlock_irqrestore(&mapping->lock, flags);
- return ARM_MAPPING_ERROR;
+ return DMA_MAPPING_ERROR;
}
start = bitmap_find_next_zero_area(mapping->bitmaps[i],
@@ -1225,7 +1218,7 @@ static inline dma_addr_t __alloc_iova(struct dma_iommu_mapping *mapping,
if (start > mapping->bits) {
spin_unlock_irqrestore(&mapping->lock, flags);
- return ARM_MAPPING_ERROR;
+ return DMA_MAPPING_ERROR;
}
bitmap_set(mapping->bitmaps[i], start, count);
@@ -1409,7 +1402,7 @@ __iommu_create_mapping(struct device *dev, struct page **pages, size_t size,
int i;
dma_addr = __alloc_iova(mapping, size);
- if (dma_addr == ARM_MAPPING_ERROR)
+ if (dma_addr == DMA_MAPPING_ERROR)
return dma_addr;
iova = dma_addr;
@@ -1436,7 +1429,7 @@ __iommu_create_mapping(struct device *dev, struct page **pages, size_t size,
fail:
iommu_unmap(mapping->domain, dma_addr, iova-dma_addr);
__free_iova(mapping, dma_addr, size);
- return ARM_MAPPING_ERROR;
+ return DMA_MAPPING_ERROR;
}
static int __iommu_remove_mapping(struct device *dev, dma_addr_t iova, size_t size)
@@ -1497,7 +1490,7 @@ static void *__iommu_alloc_simple(struct device *dev, size_t size, gfp_t gfp,
return NULL;
*handle = __iommu_create_mapping(dev, &page, size, attrs);
- if (*handle == ARM_MAPPING_ERROR)
+ if (*handle == DMA_MAPPING_ERROR)
goto err_mapping;
return addr;
@@ -1525,7 +1518,7 @@ static void *__arm_iommu_alloc_attrs(struct device *dev, size_t size,
struct page **pages;
void *addr = NULL;
- *handle = ARM_MAPPING_ERROR;
+ *handle = DMA_MAPPING_ERROR;
size = PAGE_ALIGN(size);
if (coherent_flag == COHERENT || !gfpflags_allow_blocking(gfp))
@@ -1546,7 +1539,7 @@ static void *__arm_iommu_alloc_attrs(struct device *dev, size_t size,
return NULL;
*handle = __iommu_create_mapping(dev, pages, size, attrs);
- if (*handle == ARM_MAPPING_ERROR)
+ if (*handle == DMA_MAPPING_ERROR)
goto err_buffer;
if (attrs & DMA_ATTR_NO_KERNEL_MAPPING)
@@ -1696,10 +1689,10 @@ static int __map_sg_chunk(struct device *dev, struct scatterlist *sg,
int prot;
size = PAGE_ALIGN(size);
- *handle = ARM_MAPPING_ERROR;
+ *handle = DMA_MAPPING_ERROR;
iova_base = iova = __alloc_iova(mapping, size);
- if (iova == ARM_MAPPING_ERROR)
+ if (iova == DMA_MAPPING_ERROR)
return -ENOMEM;
for (count = 0, s = sg; count < (size >> PAGE_SHIFT); s = sg_next(s)) {
@@ -1739,7 +1732,7 @@ static int __iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents,
for (i = 1; i < nents; i++) {
s = sg_next(s);
- s->dma_address = ARM_MAPPING_ERROR;
+ s->dma_address = DMA_MAPPING_ERROR;
s->dma_length = 0;
if (s->offset || (size & ~PAGE_MASK) || size + s->length > max) {
@@ -1914,7 +1907,7 @@ static dma_addr_t arm_coherent_iommu_map_page(struct device *dev, struct page *p
int ret, prot, len = PAGE_ALIGN(size + offset);
dma_addr = __alloc_iova(mapping, len);
- if (dma_addr == ARM_MAPPING_ERROR)
+ if (dma_addr == DMA_MAPPING_ERROR)
return dma_addr;
prot = __dma_info_to_prot(dir, attrs);
@@ -1926,7 +1919,7 @@ static dma_addr_t arm_coherent_iommu_map_page(struct device *dev, struct page *p
return dma_addr + offset;
fail:
__free_iova(mapping, dma_addr, len);
- return ARM_MAPPING_ERROR;
+ return DMA_MAPPING_ERROR;
}
/**
@@ -2020,7 +2013,7 @@ static dma_addr_t arm_iommu_map_resource(struct device *dev,
size_t len = PAGE_ALIGN(size + offset);
dma_addr = __alloc_iova(mapping, len);
- if (dma_addr == ARM_MAPPING_ERROR)
+ if (dma_addr == DMA_MAPPING_ERROR)
return dma_addr;
prot = __dma_info_to_prot(dir, attrs) | IOMMU_MMIO;
@@ -2032,7 +2025,7 @@ static dma_addr_t arm_iommu_map_resource(struct device *dev,
return dma_addr + offset;
fail:
__free_iova(mapping, dma_addr, len);
- return ARM_MAPPING_ERROR;
+ return DMA_MAPPING_ERROR;
}
/**
@@ -2105,7 +2098,6 @@ const struct dma_map_ops iommu_ops = {
.map_resource = arm_iommu_map_resource,
.unmap_resource = arm_iommu_unmap_resource,
- .mapping_error = arm_dma_mapping_error,
.dma_supported = arm_dma_supported,
};
@@ -2124,7 +2116,6 @@ const struct dma_map_ops iommu_coherent_ops = {
.map_resource = arm_iommu_map_resource,
.unmap_resource = arm_iommu_unmap_resource,
- .mapping_error = arm_dma_mapping_error,
.dma_supported = arm_dma_supported,
};
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index f967101862f5..86b18c1bd33c 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -23,7 +23,6 @@ config ARM64
select ARCH_HAS_MEMBARRIER_SYNC_CORE
select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_SET_MEMORY
- select ARCH_HAS_SG_CHAIN
select ARCH_HAS_STRICT_KERNEL_RWX
select ARCH_HAS_STRICT_MODULE_RWX
select ARCH_HAS_SYNC_DMA_FOR_DEVICE
@@ -81,7 +80,7 @@ config ARM64
select CPU_PM if (SUSPEND || CPU_IDLE)
select CRC32
select DCACHE_WORD_ACCESS
- select DMA_DIRECT_OPS
+ select DMA_DIRECT_REMAP
select EDAC_SUPPORT
select FRAME_POINTER
select GENERIC_ALLOCATOR
diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h
index c41f3fb1446c..95dbf3ef735a 100644
--- a/arch/arm64/include/asm/dma-mapping.h
+++ b/arch/arm64/include/asm/dma-mapping.h
@@ -24,15 +24,9 @@
#include <xen/xen.h>
#include <asm/xen/hypervisor.h>
-extern const struct dma_map_ops dummy_dma_ops;
-
static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
{
- /*
- * We expect no ISA devices, and all other DMA masters are expected to
- * have someone call arch_setup_dma_ops at device creation time.
- */
- return &dummy_dma_ops;
+ return NULL;
}
void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index a53704406099..fb0908456a1f 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -33,113 +33,6 @@
#include <asm/cacheflush.h>
-static struct gen_pool *atomic_pool __ro_after_init;
-
-#define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K
-static size_t atomic_pool_size __initdata = DEFAULT_DMA_COHERENT_POOL_SIZE;
-
-static int __init early_coherent_pool(char *p)
-{
- atomic_pool_size = memparse(p, &p);
- return 0;
-}
-early_param("coherent_pool", early_coherent_pool);
-
-static void *__alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags)
-{
- unsigned long val;
- void *ptr = NULL;
-
- if (!atomic_pool) {
- WARN(1, "coherent pool not initialised!\n");
- return NULL;
- }
-
- val = gen_pool_alloc(atomic_pool, size);
- if (val) {
- phys_addr_t phys = gen_pool_virt_to_phys(atomic_pool, val);
-
- *ret_page = phys_to_page(phys);
- ptr = (void *)val;
- memset(ptr, 0, size);
- }
-
- return ptr;
-}
-
-static bool __in_atomic_pool(void *start, size_t size)
-{
- return addr_in_gen_pool(atomic_pool, (unsigned long)start, size);
-}
-
-static int __free_from_pool(void *start, size_t size)
-{
- if (!__in_atomic_pool(start, size))
- return 0;
-
- gen_pool_free(atomic_pool, (unsigned long)start, size);
-
- return 1;
-}
-
-void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
- gfp_t flags, unsigned long attrs)
-{
- struct page *page;
- void *ptr, *coherent_ptr;
- pgprot_t prot = pgprot_writecombine(PAGE_KERNEL);
-
- size = PAGE_ALIGN(size);
-
- if (!gfpflags_allow_blocking(flags)) {
- struct page *page = NULL;
- void *addr = __alloc_from_pool(size, &page, flags);
-
- if (addr)
- *dma_handle = phys_to_dma(dev, page_to_phys(page));
-
- return addr;
- }
-
- ptr = dma_direct_alloc_pages(dev, size, dma_handle, flags, attrs);
- if (!ptr)
- goto no_mem;
-
- /* remove any dirty cache lines on the kernel alias */
- __dma_flush_area(ptr, size);
-
- /* create a coherent mapping */
- page = virt_to_page(ptr);
- coherent_ptr = dma_common_contiguous_remap(page, size, VM_USERMAP,
- prot, __builtin_return_address(0));
- if (!coherent_ptr)
- goto no_map;
-
- return coherent_ptr;
-
-no_map:
- dma_direct_free_pages(dev, size, ptr, *dma_handle, attrs);
-no_mem:
- return NULL;
-}
-
-void arch_dma_free(struct device *dev, size_t size, void *vaddr,
- dma_addr_t dma_handle, unsigned long attrs)
-{
- if (!__free_from_pool(vaddr, PAGE_ALIGN(size))) {
- void *kaddr = phys_to_virt(dma_to_phys(dev, dma_handle));
-
- vunmap(vaddr);
- dma_direct_free_pages(dev, size, kaddr, dma_handle, attrs);
- }
-}
-
-long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr,
- dma_addr_t dma_addr)
-{
- return __phys_to_pfn(dma_to_phys(dev, dma_addr));
-}
-
pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot,
unsigned long attrs)
{
@@ -160,6 +53,11 @@ void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
__dma_unmap_area(phys_to_virt(paddr), size, dir);
}
+void arch_dma_prep_coherent(struct page *page, size_t size)
+{
+ __dma_flush_area(page_address(page), size);
+}
+
#ifdef CONFIG_IOMMU_DMA
static int __swiotlb_get_sgtable_page(struct sg_table *sgt,
struct page *page, size_t size)
@@ -191,167 +89,13 @@ static int __swiotlb_mmap_pfn(struct vm_area_struct *vma,
}
#endif /* CONFIG_IOMMU_DMA */
-static int __init atomic_pool_init(void)
-{
- pgprot_t prot = __pgprot(PROT_NORMAL_NC);
- unsigned long nr_pages = atomic_pool_size >> PAGE_SHIFT;
- struct page *page;
- void *addr;
- unsigned int pool_size_order = get_order(atomic_pool_size);
-
- if (dev_get_cma_area(NULL))
- page = dma_alloc_from_contiguous(NULL, nr_pages,
- pool_size_order, false);
- else
- page = alloc_pages(GFP_DMA32, pool_size_order);
-
- if (page) {
- int ret;
- void *page_addr = page_address(page);
-
- memset(page_addr, 0, atomic_pool_size);
- __dma_flush_area(page_addr, atomic_pool_size);
-
- atomic_pool = gen_pool_create(PAGE_SHIFT, -1);
- if (!atomic_pool)
- goto free_page;
-
- addr = dma_common_contiguous_remap(page, atomic_pool_size,
- VM_USERMAP, prot, atomic_pool_init);
-
- if (!addr)
- goto destroy_genpool;
-
- ret = gen_pool_add_virt(atomic_pool, (unsigned long)addr,
- page_to_phys(page),
- atomic_pool_size, -1);
- if (ret)
- goto remove_mapping;
-
- gen_pool_set_algo(atomic_pool,
- gen_pool_first_fit_order_align,
- NULL);
-
- pr_info("DMA: preallocated %zu KiB pool for atomic allocations\n",
- atomic_pool_size / 1024);
- return 0;
- }
- goto out;
-
-remove_mapping:
- dma_common_free_remap(addr, atomic_pool_size, VM_USERMAP);
-destroy_genpool:
- gen_pool_destroy(atomic_pool);
- atomic_pool = NULL;
-free_page:
- if (!dma_release_from_contiguous(NULL, page, nr_pages))
- __free_pages(page, pool_size_order);
-out:
- pr_err("DMA: failed to allocate %zu KiB pool for atomic coherent allocation\n",
- atomic_pool_size / 1024);
- return -ENOMEM;
-}
-
-/********************************************
- * The following APIs are for dummy DMA ops *
- ********************************************/
-
-static void *__dummy_alloc(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t flags,
- unsigned long attrs)
-{
- return NULL;
-}
-
-static void __dummy_free(struct device *dev, size_t size,
- void *vaddr, dma_addr_t dma_handle,
- unsigned long attrs)
-{
-}
-
-static int __dummy_mmap(struct device *dev,
- struct vm_area_struct *vma,
- void *cpu_addr, dma_addr_t dma_addr, size_t size,
- unsigned long attrs)
-{
- return -ENXIO;
-}
-
-static dma_addr_t __dummy_map_page(struct device *dev, struct page *page,
- unsigned long offset, size_t size,
- enum dma_data_direction dir,
- unsigned long attrs)
-{
- return 0;
-}
-
-static void __dummy_unmap_page(struct device *dev, dma_addr_t dev_addr,
- size_t size, enum dma_data_direction dir,
- unsigned long attrs)
-{
-}
-
-static int __dummy_map_sg(struct device *dev, struct scatterlist *sgl,
- int nelems, enum dma_data_direction dir,
- unsigned long attrs)
-{
- return 0;
-}
-
-static void __dummy_unmap_sg(struct device *dev,
- struct scatterlist *sgl, int nelems,
- enum dma_data_direction dir,
- unsigned long attrs)
-{
-}
-
-static void __dummy_sync_single(struct device *dev,
- dma_addr_t dev_addr, size_t size,
- enum dma_data_direction dir)
-{
-}
-
-static void __dummy_sync_sg(struct device *dev,
- struct scatterlist *sgl, int nelems,
- enum dma_data_direction dir)
-{
-}
-
-static int __dummy_mapping_error(struct device *hwdev, dma_addr_t dma_addr)
-{
- return 1;
-}
-
-static int __dummy_dma_supported(struct device *hwdev, u64 mask)
-{
- return 0;
-}
-
-const struct dma_map_ops dummy_dma_ops = {
- .alloc = __dummy_alloc,
- .free = __dummy_free,
- .mmap = __dummy_mmap,
- .map_page = __dummy_map_page,
- .unmap_page = __dummy_unmap_page,
- .map_sg = __dummy_map_sg,
- .unmap_sg = __dummy_unmap_sg,
- .sync_single_for_cpu = __dummy_sync_single,
- .sync_single_for_device = __dummy_sync_single,
- .sync_sg_for_cpu = __dummy_sync_sg,
- .sync_sg_for_device = __dummy_sync_sg,
- .mapping_error = __dummy_mapping_error,
- .dma_supported = __dummy_dma_supported,
-};
-EXPORT_SYMBOL(dummy_dma_ops);
-
static int __init arm64_dma_init(void)
{
WARN_TAINT(ARCH_DMA_MINALIGN < cache_line_size(),
TAINT_CPU_OUT_OF_SPEC,
"ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)",
ARCH_DMA_MINALIGN, cache_line_size());
-
- return atomic_pool_init();
+ return dma_atomic_pool_init(GFP_DMA32, __pgprot(PROT_NORMAL_NC));
}
arch_initcall(arm64_dma_init);
@@ -397,17 +141,17 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size,
page = alloc_pages(gfp, get_order(size));
addr = page ? page_address(page) : NULL;
} else {
- addr = __alloc_from_pool(size, &page, gfp);
+ addr = dma_alloc_from_pool(size, &page, gfp);
}
if (!addr)
return NULL;
*handle = iommu_dma_map_page(dev, page, 0, iosize, ioprot);
- if (iommu_dma_mapping_error(dev, *handle)) {
+ if (*handle == DMA_MAPPING_ERROR) {
if (coherent)
__free_pages(page, get_order(size));
else
- __free_from_pool(addr, size);
+ dma_free_from_pool(addr, size);
addr = NULL;
}
} else if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) {
@@ -420,7 +164,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size,
return NULL;
*handle = iommu_dma_map_page(dev, page, 0, iosize, ioprot);
- if (iommu_dma_mapping_error(dev, *handle)) {
+ if (*handle == DMA_MAPPING_ERROR) {
dma_release_from_contiguous(dev, page,
size >> PAGE_SHIFT);
return NULL;
@@ -471,9 +215,9 @@ static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,
* coherent devices.
* Hence how dodgy the below logic looks...
*/
- if (__in_atomic_pool(cpu_addr, size)) {
+ if (dma_in_atomic_pool(cpu_addr, size)) {
iommu_dma_unmap_page(dev, handle, iosize, 0, 0);
- __free_from_pool(cpu_addr, size);
+ dma_free_from_pool(cpu_addr, size);
} else if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) {
struct page *page = vmalloc_to_page(cpu_addr);
@@ -580,7 +324,7 @@ static dma_addr_t __iommu_map_page(struct device *dev, struct page *page,
dma_addr_t dev_addr = iommu_dma_map_page(dev, page, offset, size, prot);
if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
- !iommu_dma_mapping_error(dev, dev_addr))
+ dev_addr != DMA_MAPPING_ERROR)
__dma_map_area(page_address(page) + offset, size, dir);
return dev_addr;
@@ -663,7 +407,6 @@ static const struct dma_map_ops iommu_dma_ops = {
.sync_sg_for_device = __iommu_sync_sg_for_device,
.map_resource = iommu_dma_map_resource,
.unmap_resource = iommu_dma_unmap_resource,
- .mapping_error = iommu_dma_mapping_error,
};
static int __init __iommu_dma_init(void)
@@ -719,9 +462,6 @@ static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
const struct iommu_ops *iommu, bool coherent)
{
- if (!dev->dma_ops)
- dev->dma_ops = &swiotlb_dma_ops;
-
dev->dma_coherent = coherent;
__iommu_setup_dma_ops(dev, dma_base, size, iommu);
diff --git a/arch/c6x/Kconfig b/arch/c6x/Kconfig
index 84420109113d..456e154674d1 100644
--- a/arch/c6x/Kconfig
+++ b/arch/c6x/Kconfig
@@ -9,7 +9,6 @@ config C6X
select ARCH_HAS_SYNC_DMA_FOR_CPU
select ARCH_HAS_SYNC_DMA_FOR_DEVICE
select CLKDEV_LOOKUP
- select DMA_DIRECT_OPS
select GENERIC_ATOMIC64
select GENERIC_IRQ_SHOW
select HAVE_ARCH_TRACEHOOK
diff --git a/arch/c6x/mm/dma-coherent.c b/arch/c6x/mm/dma-coherent.c
index 01305c787201..75b79571732c 100644
--- a/arch/c6x/mm/dma-coherent.c
+++ b/arch/c6x/mm/dma-coherent.c
@@ -78,6 +78,7 @@ static void __free_dma_pages(u32 addr, int order)
void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
gfp_t gfp, unsigned long attrs)
{
+ void *ret;
u32 paddr;
int order;
@@ -94,7 +95,9 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
if (!paddr)
return NULL;
- return phys_to_virt(paddr);
+ ret = phys_to_virt(paddr);
+ memset(ret, 0, 1 << order);
+ return ret;
}
/*
diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig
index cb64f8dacd08..37bed8aadf95 100644
--- a/arch/csky/Kconfig
+++ b/arch/csky/Kconfig
@@ -7,8 +7,7 @@ config CSKY
select COMMON_CLK
select CLKSRC_MMIO
select CLKSRC_OF
- select DMA_DIRECT_OPS
- select DMA_NONCOHERENT_OPS
+ select DMA_DIRECT_REMAP
select IRQ_DOMAIN
select HANDLE_DOMAIN_IRQ
select DW_APB_TIMER_OF
diff --git a/arch/csky/mm/dma-mapping.c b/arch/csky/mm/dma-mapping.c
index 85437b21e045..80783bb71c5c 100644
--- a/arch/csky/mm/dma-mapping.c
+++ b/arch/csky/mm/dma-mapping.c
@@ -14,73 +14,13 @@
#include <linux/version.h>
#include <asm/cache.h>
-static struct gen_pool *atomic_pool;
-static size_t atomic_pool_size __initdata = SZ_256K;
-
-static int __init early_coherent_pool(char *p)
-{
- atomic_pool_size = memparse(p, &p);
- return 0;
-}
-early_param("coherent_pool", early_coherent_pool);
-
static int __init atomic_pool_init(void)
{
- struct page *page;
- size_t size = atomic_pool_size;
- void *ptr;
- int ret;
-
- atomic_pool = gen_pool_create(PAGE_SHIFT, -1);
- if (!atomic_pool)
- BUG();
-
- page = alloc_pages(GFP_KERNEL | GFP_DMA, get_order(size));
- if (!page)
- BUG();
-
- ptr = dma_common_contiguous_remap(page, size, VM_ALLOC,
- pgprot_noncached(PAGE_KERNEL),
- __builtin_return_address(0));
- if (!ptr)
- BUG();
-
- ret = gen_pool_add_virt(atomic_pool, (unsigned long)ptr,
- page_to_phys(page), atomic_pool_size, -1);
- if (ret)
- BUG();
-
- gen_pool_set_algo(atomic_pool, gen_pool_first_fit_order_align, NULL);
-
- pr_info("DMA: preallocated %zu KiB pool for atomic coherent pool\n",
- atomic_pool_size / 1024);
-
- pr_info("DMA: vaddr: 0x%x phy: 0x%lx,\n", (unsigned int)ptr,
- page_to_phys(page));
-
- return 0;
+ return dma_atomic_pool_init(GFP_KERNEL, pgprot_noncached(PAGE_KERNEL));
}
postcore_initcall(atomic_pool_init);
-static void *csky_dma_alloc_atomic(struct device *dev, size_t size,
- dma_addr_t *dma_handle)
-{
- unsigned long addr;
-
- addr = gen_pool_alloc(atomic_pool, size);
- if (addr)
- *dma_handle = gen_pool_virt_to_phys(atomic_pool, addr);
-
- return (void *)addr;
-}
-
-static void csky_dma_free_atomic(struct device *dev, size_t size, void *vaddr,
- dma_addr_t dma_handle, unsigned long attrs)
-{
- gen_pool_free(atomic_pool, (unsigned long)vaddr, size);
-}
-
-static void __dma_clear_buffer(struct page *page, size_t size)
+void arch_dma_prep_coherent(struct page *page, size_t size)
{
if (PageHighMem(page)) {
unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
@@ -107,84 +47,6 @@ static void __dma_clear_buffer(struct page *page, size_t size)
}
}
-static void *csky_dma_alloc_nonatomic(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t gfp,
- unsigned long attrs)
-{
- void *vaddr;
- struct page *page;
- unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
-
- if (DMA_ATTR_NON_CONSISTENT & attrs) {
- pr_err("csky %s can't support DMA_ATTR_NON_CONSISTENT.\n", __func__);
- return NULL;
- }
-
- if (IS_ENABLED(CONFIG_DMA_CMA))
- page = dma_alloc_from_contiguous(dev, count, get_order(size),
- gfp);
- else
- page = alloc_pages(gfp, get_order(size));
-
- if (!page) {
- pr_err("csky %s no more free pages.\n", __func__);
- return NULL;
- }
-
- *dma_handle = page_to_phys(page);
-
- __dma_clear_buffer(page, size);
-
- if (attrs & DMA_ATTR_NO_KERNEL_MAPPING)
- return page;
-
- vaddr = dma_common_contiguous_remap(page, PAGE_ALIGN(size), VM_USERMAP,
- pgprot_noncached(PAGE_KERNEL), __builtin_return_address(0));
- if (!vaddr)
- BUG();
-
- return vaddr;
-}
-
-static void csky_dma_free_nonatomic(
- struct device *dev,
- size_t size,
- void *vaddr,
- dma_addr_t dma_handle,
- unsigned long attrs
- )
-{
- struct page *page = phys_to_page(dma_handle);
- unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
-
- if ((unsigned int)vaddr >= VMALLOC_START)
- dma_common_free_remap(vaddr, size, VM_USERMAP);
-
- if (IS_ENABLED(CONFIG_DMA_CMA))
- dma_release_from_contiguous(dev, page, count);
- else
- __free_pages(page, get_order(size));
-}
-
-void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
- gfp_t gfp, unsigned long attrs)
-{
- if (gfpflags_allow_blocking(gfp))
- return csky_dma_alloc_nonatomic(dev, size, dma_handle, gfp,
- attrs);
- else
- return csky_dma_alloc_atomic(dev, size, dma_handle);
-}
-
-void arch_dma_free(struct device *dev, size_t size, void *vaddr,
- dma_addr_t dma_handle, unsigned long attrs)
-{
- if (!addr_in_gen_pool(atomic_pool, (unsigned int) vaddr, size))
- csky_dma_free_nonatomic(dev, size, vaddr, dma_handle, attrs);
- else
- csky_dma_free_atomic(dev, size, vaddr, dma_handle, attrs);
-}
-
static inline void cache_op(phys_addr_t paddr, size_t size,
void (*fn)(unsigned long start, unsigned long end))
{
diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig
index d19c6b16cd5d..6472a0685470 100644
--- a/arch/h8300/Kconfig
+++ b/arch/h8300/Kconfig
@@ -22,7 +22,6 @@ config H8300
select HAVE_ARCH_KGDB
select HAVE_ARCH_HASH
select CPU_NO_EFFICIENT_FFS
- select DMA_DIRECT_OPS
config CPU_BIG_ENDIAN
def_bool y
diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig
index 2b688af379e6..d71036c598de 100644
--- a/arch/hexagon/Kconfig
+++ b/arch/hexagon/Kconfig
@@ -31,7 +31,6 @@ config HEXAGON
select GENERIC_CLOCKEVENTS_BROADCAST
select MODULES_USE_ELF_RELA
select GENERIC_CPU_DEVICES
- select DMA_DIRECT_OPS
---help---
Qualcomm Hexagon is a processor architecture designed for high
performance and low power across a wide variety of applications.
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 36773def6920..cbf6c67c7166 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -28,8 +28,8 @@ config IA64
select HAVE_ARCH_TRACEHOOK
select HAVE_MEMBLOCK_NODE_MAP
select HAVE_VIRT_CPU_ACCOUNTING
- select ARCH_HAS_DMA_MARK_CLEAN
- select ARCH_HAS_SG_CHAIN
+ select ARCH_HAS_DMA_COHERENT_TO_PFN if SWIOTLB
+ select ARCH_HAS_SYNC_DMA_FOR_CPU
select VIRT_TO_BUS
select ARCH_DISCARD_MEMBLOCK
select GENERIC_IRQ_PROBE
diff --git a/arch/ia64/hp/common/hwsw_iommu.c b/arch/ia64/hp/common/hwsw_iommu.c
index 58969039bed2..8840ed97712f 100644
--- a/arch/ia64/hp/common/hwsw_iommu.c
+++ b/arch/ia64/hp/common/hwsw_iommu.c
@@ -38,7 +38,7 @@ static inline int use_swiotlb(struct device *dev)
const struct dma_map_ops *hwsw_dma_get_ops(struct device *dev)
{
if (use_swiotlb(dev))
- return &swiotlb_dma_ops;
+ return NULL;
return &sba_dma_ops;
}
EXPORT_SYMBOL(hwsw_dma_get_ops);
diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c
index e8a93b07283e..5a361e51cb1e 100644
--- a/arch/ia64/hp/common/sba_iommu.c
+++ b/arch/ia64/hp/common/sba_iommu.c
@@ -907,11 +907,12 @@ sba_mark_invalid(struct ioc *ioc, dma_addr_t iova, size_t byte_cnt)
}
/**
- * sba_map_single_attrs - map one buffer and return IOVA for DMA
+ * sba_map_page - map one buffer and return IOVA for DMA
* @dev: instance of PCI owned by the driver that's asking.
- * @addr: driver buffer to map.
- * @size: number of bytes to map in driver buffer.
- * @dir: R/W or both.
+ * @page: page to map
+ * @poff: offset into page
+ * @size: number of bytes to map
+ * @dir: dma direction
* @attrs: optional dma attributes
*
* See Documentation/DMA-API-HOWTO.txt
@@ -944,7 +945,7 @@ static dma_addr_t sba_map_page(struct device *dev, struct page *page,
** Device is bit capable of DMA'ing to the buffer...
** just return the PCI address of ptr
*/
- DBG_BYPASS("sba_map_single_attrs() bypass mask/addr: "
+ DBG_BYPASS("sba_map_page() bypass mask/addr: "
"0x%lx/0x%lx\n",
to_pci_dev(dev)->dma_mask, pci_addr);
return pci_addr;
@@ -966,14 +967,14 @@ static dma_addr_t sba_map_page(struct device *dev, struct page *page,
#ifdef ASSERT_PDIR_SANITY
spin_lock_irqsave(&ioc->res_lock, flags);
- if (sba_check_pdir(ioc,"Check before sba_map_single_attrs()"))
+ if (sba_check_pdir(ioc,"Check before sba_map_page()"))
panic("Sanity check failed");
spin_unlock_irqrestore(&ioc->res_lock, flags);
#endif
pide = sba_alloc_range(ioc, dev, size);
if (pide < 0)
- return 0;
+ return DMA_MAPPING_ERROR;
iovp = (dma_addr_t) pide << iovp_shift;
@@ -997,20 +998,12 @@ static dma_addr_t sba_map_page(struct device *dev, struct page *page,
/* form complete address */
#ifdef ASSERT_PDIR_SANITY
spin_lock_irqsave(&ioc->res_lock, flags);
- sba_check_pdir(ioc,"Check after sba_map_single_attrs()");
+ sba_check_pdir(ioc,"Check after sba_map_page()");
spin_unlock_irqrestore(&ioc->res_lock, flags);
#endif
return SBA_IOVA(ioc, iovp, offset);
}
-static dma_addr_t sba_map_single_attrs(struct device *dev, void *addr,
- size_t size, enum dma_data_direction dir,
- unsigned long attrs)
-{
- return sba_map_page(dev, virt_to_page(addr),
- (unsigned long)addr & ~PAGE_MASK, size, dir, attrs);
-}
-
#ifdef ENABLE_MARK_CLEAN
static SBA_INLINE void
sba_mark_clean(struct ioc *ioc, dma_addr_t iova, size_t size)
@@ -1036,7 +1029,7 @@ sba_mark_clean(struct ioc *ioc, dma_addr_t iova, size_t size)
#endif
/**
- * sba_unmap_single_attrs - unmap one IOVA and free resources
+ * sba_unmap_page - unmap one IOVA and free resources
* @dev: instance of PCI owned by the driver that's asking.
* @iova: IOVA of driver buffer previously mapped.
* @size: number of bytes mapped in driver buffer.
@@ -1063,7 +1056,7 @@ static void sba_unmap_page(struct device *dev, dma_addr_t iova, size_t size,
/*
** Address does not fall w/in IOVA, must be bypassing
*/
- DBG_BYPASS("sba_unmap_single_attrs() bypass addr: 0x%lx\n",
+ DBG_BYPASS("sba_unmap_page() bypass addr: 0x%lx\n",
iova);
#ifdef ENABLE_MARK_CLEAN
@@ -1114,12 +1107,6 @@ static void sba_unmap_page(struct device *dev, dma_addr_t iova, size_t size,
#endif /* DELAYED_RESOURCE_CNT == 0 */
}
-void sba_unmap_single_attrs(struct device *dev, dma_addr_t iova, size_t size,
- enum dma_data_direction dir, unsigned long attrs)
-{
- sba_unmap_page(dev, iova, size, dir, attrs);
-}
-
/**
* sba_alloc_coherent - allocate/map shared mem for DMA
* @dev: instance of PCI owned by the driver that's asking.
@@ -1132,30 +1119,24 @@ static void *
sba_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
gfp_t flags, unsigned long attrs)
{
+ struct page *page;
struct ioc *ioc;
+ int node = -1;
void *addr;
ioc = GET_IOC(dev);
ASSERT(ioc);
-
#ifdef CONFIG_NUMA
- {
- struct page *page;
-
- page = alloc_pages_node(ioc->node, flags, get_order(size));
- if (unlikely(!page))
- return NULL;
-
- addr = page_address(page);
- }
-#else
- addr = (void *) __get_free_pages(flags, get_order(size));
+ node = ioc->node;
#endif
- if (unlikely(!addr))
+
+ page = alloc_pages_node(node, flags, get_order(size));
+ if (unlikely(!page))
return NULL;
+ addr = page_address(page);
memset(addr, 0, size);
- *dma_handle = virt_to_phys(addr);
+ *dma_handle = page_to_phys(page);
#ifdef ALLOW_IOV_BYPASS
ASSERT(dev->coherent_dma_mask);
@@ -1174,9 +1155,10 @@ sba_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
* If device can't bypass or bypass is disabled, pass the 32bit fake
* device to map single to get an iova mapping.
*/
- *dma_handle = sba_map_single_attrs(&ioc->sac_only_dev->dev, addr,
- size, 0, 0);
-
+ *dma_handle = sba_map_page(&ioc->sac_only_dev->dev, page, 0, size,
+ DMA_BIDIRECTIONAL, 0);
+ if (dma_mapping_error(dev, *dma_handle))
+ return NULL;
return addr;
}
@@ -1193,7 +1175,7 @@ sba_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
static void sba_free_coherent(struct device *dev, size_t size, void *vaddr,
dma_addr_t dma_handle, unsigned long attrs)
{
- sba_unmap_single_attrs(dev, dma_handle, size, 0, 0);
+ sba_unmap_page(dev, dma_handle, size, 0, 0);
free_pages((unsigned long) vaddr, get_order(size));
}
@@ -1483,7 +1465,10 @@ static int sba_map_sg_attrs(struct device *dev, struct scatterlist *sglist,
/* Fast path single entry scatterlists. */
if (nents == 1) {
sglist->dma_length = sglist->length;
- sglist->dma_address = sba_map_single_attrs(dev, sba_sg_address(sglist), sglist->length, dir, attrs);
+ sglist->dma_address = sba_map_page(dev, sg_page(sglist),
+ sglist->offset, sglist->length, dir, attrs);
+ if (dma_mapping_error(dev, sglist->dma_address))
+ return 0;
return 1;
}
@@ -1572,8 +1557,8 @@ static void sba_unmap_sg_attrs(struct device *dev, struct scatterlist *sglist,
while (nents && sglist->dma_length) {
- sba_unmap_single_attrs(dev, sglist->dma_address,
- sglist->dma_length, dir, attrs);
+ sba_unmap_page(dev, sglist->dma_address, sglist->dma_length,
+ dir, attrs);
sglist = sg_next(sglist);
nents--;
}
@@ -2080,8 +2065,6 @@ static int __init acpi_sba_ioc_init_acpi(void)
/* This has to run before acpi_scan_init(). */
arch_initcall(acpi_sba_ioc_init_acpi);
-extern const struct dma_map_ops swiotlb_dma_ops;
-
static int __init
sba_init(void)
{
@@ -2095,7 +2078,7 @@ sba_init(void)
* a successful kdump kernel boot is to use the swiotlb.
*/
if (is_kdump_kernel()) {
- dma_ops = &swiotlb_dma_ops;
+ dma_ops = NULL;
if (swiotlb_late_init_with_default_size(64 * (1<<20)) != 0)
panic("Unable to initialize software I/O TLB:"
" Try machvec=dig boot option");
@@ -2117,7 +2100,7 @@ sba_init(void)
* If we didn't find something sba_iommu can claim, we
* need to setup the swiotlb and switch to the dig machvec.
*/
- dma_ops = &swiotlb_dma_ops;
+ dma_ops = NULL;
if (swiotlb_late_init_with_default_size(64 * (1<<20)) != 0)
panic("Unable to find SBA IOMMU or initialize "
"software I/O TLB: Try machvec=dig boot option");
@@ -2170,11 +2153,6 @@ static int sba_dma_supported (struct device *dev, u64 mask)
return ((mask & 0xFFFFFFFFUL) == 0xFFFFFFFFUL);
}
-static int sba_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
- return 0;
-}
-
__setup("nosbagart", nosbagart);
static int __init
@@ -2208,7 +2186,6 @@ const struct dma_map_ops sba_dma_ops = {
.map_sg = sba_map_sg_attrs,
.unmap_sg = sba_unmap_sg_attrs,
.dma_supported = sba_dma_supported,
- .mapping_error = sba_dma_mapping_error,
};
void sba_dma_init(void)
diff --git a/arch/ia64/hp/sim/simscsi.c b/arch/ia64/hp/sim/simscsi.c
index 7e1426e76d96..f86844fc0725 100644
--- a/arch/ia64/hp/sim/simscsi.c
+++ b/arch/ia64/hp/sim/simscsi.c
@@ -347,7 +347,7 @@ static struct scsi_host_template driver_template = {
.sg_tablesize = SG_ALL,
.max_sectors = 1024,
.cmd_per_lun = SIMSCSI_REQ_QUEUE_LEN,
- .use_clustering = DISABLE_CLUSTERING,
+ .dma_boundary = PAGE_SIZE - 1,
};
static int __init
diff --git a/arch/ia64/kernel/dma-mapping.c b/arch/ia64/kernel/dma-mapping.c
index 7a471d8d67d4..ad7d9963de34 100644
--- a/arch/ia64/kernel/dma-mapping.c
+++ b/arch/ia64/kernel/dma-mapping.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-#include <linux/dma-mapping.h>
+#include <linux/dma-direct.h>
#include <linux/swiotlb.h>
#include <linux/export.h>
@@ -16,9 +16,26 @@ const struct dma_map_ops *dma_get_ops(struct device *dev)
EXPORT_SYMBOL(dma_get_ops);
#ifdef CONFIG_SWIOTLB
+void *arch_dma_alloc(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
+{
+ return dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs);
+}
+
+void arch_dma_free(struct device *dev, size_t size, void *cpu_addr,
+ dma_addr_t dma_addr, unsigned long attrs)
+{
+ dma_direct_free_pages(dev, size, cpu_addr, dma_addr, attrs);
+}
+
+long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr,
+ dma_addr_t dma_addr)
+{
+ return page_to_pfn(virt_to_page(cpu_addr));
+}
+
void __init swiotlb_dma_init(void)
{
- dma_ops = &swiotlb_dma_ops;
swiotlb_init(1);
}
#endif
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 904fe55e10fc..055382622f07 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -8,6 +8,7 @@
#include <linux/kernel.h>
#include <linux/init.h>
+#include <linux/dma-noncoherent.h>
#include <linux/efi.h>
#include <linux/elf.h>
#include <linux/memblock.h>
@@ -71,18 +72,14 @@ __ia64_sync_icache_dcache (pte_t pte)
* DMA can be marked as "clean" so that lazy_mmu_prot_update() doesn't have to
* flush them when they get mapped into an executable vm-area.
*/
-void
-dma_mark_clean(void *addr, size_t size)
+void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
+ size_t size, enum dma_data_direction dir)
{
- unsigned long pg_addr, end;
-
- pg_addr = PAGE_ALIGN((unsigned long) addr);
- end = (unsigned long) addr + size;
- while (pg_addr + PAGE_SIZE <= end) {
- struct page *page = virt_to_page(pg_addr);
- set_bit(PG_arch_1, &page->flags);
- pg_addr += PAGE_SIZE;
- }
+ unsigned long pfn = PHYS_PFN(paddr);
+
+ do {
+ set_bit(PG_arch_1, &pfn_to_page(pfn)->flags);
+ } while (++pfn <= PHYS_PFN(paddr + size - 1));
}
inline void
diff --git a/arch/ia64/sn/pci/pci_dma.c b/arch/ia64/sn/pci/pci_dma.c
index 4ce4ee4ef9f2..b7d42e4edc1f 100644
--- a/arch/ia64/sn/pci/pci_dma.c
+++ b/arch/ia64/sn/pci/pci_dma.c
@@ -196,7 +196,7 @@ static dma_addr_t sn_dma_map_page(struct device *dev, struct page *page,
if (!dma_addr) {
printk(KERN_ERR "%s: out of ATEs\n", __func__);
- return 0;
+ return DMA_MAPPING_ERROR;
}
return dma_addr;
}
@@ -314,11 +314,6 @@ static int sn_dma_map_sg(struct device *dev, struct scatterlist *sgl,
return nhwentries;
}
-static int sn_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
- return 0;
-}
-
static u64 sn_dma_get_required_mask(struct device *dev)
{
return DMA_BIT_MASK(64);
@@ -441,7 +436,6 @@ static struct dma_map_ops sn_dma_ops = {
.unmap_page = sn_dma_unmap_page,
.map_sg = sn_dma_map_sg,
.unmap_sg = sn_dma_unmap_sg,
- .mapping_error = sn_dma_mapping_error,
.dma_supported = sn_dma_supported,
.get_required_mask = sn_dma_get_required_mask,
};
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 1bc9f1ba759a..8a5868e9a3a0 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -26,7 +26,6 @@ config M68K
select MODULES_USE_ELF_RELA
select OLD_SIGSUSPEND3
select OLD_SIGACTION
- select DMA_DIRECT_OPS if HAS_DMA
select ARCH_DISCARD_MEMBLOCK
config CPU_BIG_ENDIAN
diff --git a/arch/m68k/kernel/dma.c b/arch/m68k/kernel/dma.c
index e99993c57d6b..b4aa853051bd 100644
--- a/arch/m68k/kernel/dma.c
+++ b/arch/m68k/kernel/dma.c
@@ -32,7 +32,7 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
size = PAGE_ALIGN(size);
order = get_order(size);
- page = alloc_pages(flag, order);
+ page = alloc_pages(flag | __GFP_ZERO, order);
if (!page)
return NULL;
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index effed2efd306..eda9e2315ef5 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -12,7 +12,6 @@ config MICROBLAZE
select TIMER_OF
select CLONE_BACKWARDS3
select COMMON_CLK
- select DMA_DIRECT_OPS
select GENERIC_ATOMIC64
select GENERIC_CLOCKEVENTS
select GENERIC_CPU_DEVICES
diff --git a/arch/microblaze/mm/consistent.c b/arch/microblaze/mm/consistent.c
index 45e0a1aa9357..3002cbca3059 100644
--- a/arch/microblaze/mm/consistent.c
+++ b/arch/microblaze/mm/consistent.c
@@ -81,7 +81,7 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
size = PAGE_ALIGN(size);
order = get_order(size);
- vaddr = __get_free_pages(gfp, order);
+ vaddr = __get_free_pages(gfp | __GFP_ZERO, order);
if (!vaddr)
return NULL;
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index e49b5a0c8585..63183a8454d6 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -18,7 +18,6 @@ config MIPS
select CLONE_BACKWARDS
select CPU_NO_EFFICIENT_FFS if (TARGET_ISA_REV < 1)
select CPU_PM if CPU_IDLE
- select DMA_DIRECT_OPS
select GENERIC_ATOMIC64 if !64BIT
select GENERIC_CLOCKEVENTS
select GENERIC_CMOS_UPDATE
diff --git a/arch/mips/include/asm/dma-mapping.h b/arch/mips/include/asm/dma-mapping.h
index b4c477eb46ce..20dfaad3a55d 100644
--- a/arch/mips/include/asm/dma-mapping.h
+++ b/arch/mips/include/asm/dma-mapping.h
@@ -10,10 +10,8 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
{
#if defined(CONFIG_MACH_JAZZ)
return &jazz_dma_ops;
-#elif defined(CONFIG_SWIOTLB)
- return &swiotlb_dma_ops;
#else
- return &dma_direct_ops;
+ return NULL;
#endif
}
diff --git a/arch/mips/include/asm/jazzdma.h b/arch/mips/include/asm/jazzdma.h
index d913439c738c..d13f940022d5 100644
--- a/arch/mips/include/asm/jazzdma.h
+++ b/arch/mips/include/asm/jazzdma.h
@@ -40,12 +40,6 @@ extern int vdma_get_enable(int channel);
#define VDMA_OFFSET(a) ((unsigned int)(a) & (VDMA_PAGESIZE-1))
/*
- * error code returned by vdma_alloc()
- * (See also arch/mips/kernel/jazzdma.c)
- */
-#define VDMA_ERROR 0xffffffff
-
-/*
* VDMA pagetable entry description
*/
typedef volatile struct VDMA_PGTBL_ENTRY {
diff --git a/arch/mips/include/asm/mach-jz4740/jz4740_mmc.h b/arch/mips/include/asm/mach-jz4740/jz4740_mmc.h
index e9cc62cfac99..9a7de47c7c79 100644
--- a/arch/mips/include/asm/mach-jz4740/jz4740_mmc.h
+++ b/arch/mips/include/asm/mach-jz4740/jz4740_mmc.h
@@ -3,12 +3,8 @@
#define __LINUX_MMC_JZ4740_MMC
struct jz4740_mmc_platform_data {
- int gpio_power;
- int gpio_card_detect;
- int gpio_read_only;
unsigned card_detect_active_low:1;
unsigned read_only_active_low:1;
- unsigned power_active_low:1;
unsigned data_1bit:1;
};
diff --git a/arch/mips/include/asm/mach-rc32434/rb.h b/arch/mips/include/asm/mach-rc32434/rb.h
index aac8ce8902e7..5dfd4d66d6fc 100644
--- a/arch/mips/include/asm/mach-rc32434/rb.h
+++ b/arch/mips/include/asm/mach-rc32434/rb.h
@@ -71,12 +71,6 @@ struct korina_device {
struct net_device *dev;
};
-struct cf_device {
- int gpio_pin;
- void *dev;
- struct gendisk *gd;
-};
-
struct mpmc_device {
unsigned char state;
spinlock_t lock;
diff --git a/arch/mips/jazz/jazzdma.c b/arch/mips/jazz/jazzdma.c
index 4c41ed0a637e..6256d35dbf4d 100644
--- a/arch/mips/jazz/jazzdma.c
+++ b/arch/mips/jazz/jazzdma.c
@@ -104,12 +104,12 @@ unsigned long vdma_alloc(unsigned long paddr, unsigned long size)
if (vdma_debug)
printk("vdma_alloc: Invalid physical address: %08lx\n",
paddr);
- return VDMA_ERROR; /* invalid physical address */
+ return DMA_MAPPING_ERROR; /* invalid physical address */
}
if (size > 0x400000 || size == 0) {
if (vdma_debug)
printk("vdma_alloc: Invalid size: %08lx\n", size);
- return VDMA_ERROR; /* invalid physical address */
+ return DMA_MAPPING_ERROR; /* invalid physical address */
}
spin_lock_irqsave(&vdma_lock, flags);
@@ -123,7 +123,7 @@ unsigned long vdma_alloc(unsigned long paddr, unsigned long size)
first < VDMA_PGTBL_ENTRIES) first++;
if (first + pages > VDMA_PGTBL_ENTRIES) { /* nothing free */
spin_unlock_irqrestore(&vdma_lock, flags);
- return VDMA_ERROR;
+ return DMA_MAPPING_ERROR;
}
last = first + 1;
@@ -569,7 +569,7 @@ static void *jazz_dma_alloc(struct device *dev, size_t size,
return NULL;
*dma_handle = vdma_alloc(virt_to_phys(ret), size);
- if (*dma_handle == VDMA_ERROR) {
+ if (*dma_handle == DMA_MAPPING_ERROR) {
dma_direct_free_pages(dev, size, ret, *dma_handle, attrs);
return NULL;
}
@@ -620,7 +620,7 @@ static int jazz_dma_map_sg(struct device *dev, struct scatterlist *sglist,
arch_sync_dma_for_device(dev, sg_phys(sg), sg->length,
dir);
sg->dma_address = vdma_alloc(sg_phys(sg), sg->length);
- if (sg->dma_address == VDMA_ERROR)
+ if (sg->dma_address == DMA_MAPPING_ERROR)
return 0;
sg_dma_len(sg) = sg->length;
}
@@ -674,11 +674,6 @@ static void jazz_dma_sync_sg_for_cpu(struct device *dev,
arch_sync_dma_for_cpu(dev, sg_phys(sg), sg->length, dir);
}
-static int jazz_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
- return dma_addr == VDMA_ERROR;
-}
-
const struct dma_map_ops jazz_dma_ops = {
.alloc = jazz_dma_alloc,
.free = jazz_dma_free,
@@ -692,6 +687,5 @@ const struct dma_map_ops jazz_dma_ops = {
.sync_sg_for_device = jazz_dma_sync_sg_for_device,
.dma_supported = dma_direct_supported,
.cache_sync = arch_dma_cache_sync,
- .mapping_error = jazz_dma_mapping_error,
};
EXPORT_SYMBOL(jazz_dma_ops);
diff --git a/arch/mips/jz4740/board-qi_lb60.c b/arch/mips/jz4740/board-qi_lb60.c
index af0c8ace0141..6718efb400f4 100644
--- a/arch/mips/jz4740/board-qi_lb60.c
+++ b/arch/mips/jz4740/board-qi_lb60.c
@@ -43,9 +43,6 @@
#include "clock.h"
/* GPIOs */
-#define QI_LB60_GPIO_SD_CD JZ_GPIO_PORTD(0)
-#define QI_LB60_GPIO_SD_VCC_EN_N JZ_GPIO_PORTD(2)
-
#define QI_LB60_GPIO_KEYOUT(x) (JZ_GPIO_PORTC(10) + (x))
#define QI_LB60_GPIO_KEYIN(x) (JZ_GPIO_PORTD(18) + (x))
#define QI_LB60_GPIO_KEYIN8 JZ_GPIO_PORTD(26)
@@ -386,10 +383,16 @@ static struct platform_device qi_lb60_gpio_keys = {
};
static struct jz4740_mmc_platform_data qi_lb60_mmc_pdata = {
- .gpio_card_detect = QI_LB60_GPIO_SD_CD,
- .gpio_read_only = -1,
- .gpio_power = QI_LB60_GPIO_SD_VCC_EN_N,
- .power_active_low = 1,
+ /* Intentionally left blank */
+};
+
+static struct gpiod_lookup_table qi_lb60_mmc_gpio_table = {
+ .dev_id = "jz4740-mmc.0",
+ .table = {
+ GPIO_LOOKUP("GPIOD", 0, "cd", GPIO_ACTIVE_HIGH),
+ GPIO_LOOKUP("GPIOD", 2, "power", GPIO_ACTIVE_LOW),
+ { },
+ },
};
/* beeper */
@@ -500,6 +503,7 @@ static int __init qi_lb60_init_platform_devices(void)
gpiod_add_lookup_table(&qi_lb60_audio_gpio_table);
gpiod_add_lookup_table(&qi_lb60_nand_gpio_table);
gpiod_add_lookup_table(&qi_lb60_spigpio_gpio_table);
+ gpiod_add_lookup_table(&qi_lb60_mmc_gpio_table);
spi_register_board_info(qi_lb60_spi_board_info,
ARRAY_SIZE(qi_lb60_spi_board_info));
diff --git a/arch/mips/rb532/devices.c b/arch/mips/rb532/devices.c
index 2b23ad640f39..828d8cc3a5df 100644
--- a/arch/mips/rb532/devices.c
+++ b/arch/mips/rb532/devices.c
@@ -23,6 +23,7 @@
#include <linux/mtd/platnand.h>
#include <linux/mtd/mtd.h>
#include <linux/gpio.h>
+#include <linux/gpio/machine.h>
#include <linux/gpio_keys.h>
#include <linux/input.h>
#include <linux/serial_8250.h>
@@ -127,14 +128,18 @@ static struct resource cf_slot0_res[] = {
}
};
-static struct cf_device cf_slot0_data = {
- .gpio_pin = CF_GPIO_NUM
+static struct gpiod_lookup_table cf_slot0_gpio_table = {
+ .dev_id = "pata-rb532-cf",
+ .table = {
+ GPIO_LOOKUP("gpio0", CF_GPIO_NUM,
+ NULL, GPIO_ACTIVE_HIGH),
+ { },
+ },
};
static struct platform_device cf_slot0 = {
.id = -1,
.name = "pata-rb532-cf",
- .dev.platform_data = &cf_slot0_data,
.resource = cf_slot0_res,
.num_resources = ARRAY_SIZE(cf_slot0_res),
};
@@ -305,6 +310,7 @@ static int __init plat_setup_devices(void)
dev_set_drvdata(&korina_dev0.dev, &korina_dev0_data);
+ gpiod_add_lookup_table(&cf_slot0_gpio_table);
return platform_add_devices(rb532_devs, ARRAY_SIZE(rb532_devs));
}
diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig
index 7a04adacb2f0..1af6bbae7220 100644
--- a/arch/nds32/Kconfig
+++ b/arch/nds32/Kconfig
@@ -11,7 +11,6 @@ config NDS32
select CLKSRC_MMIO
select CLONE_BACKWARDS
select COMMON_CLK
- select DMA_DIRECT_OPS
select GENERIC_ATOMIC64
select GENERIC_CPU_DEVICES
select GENERIC_CLOCKEVENTS
diff --git a/arch/nios2/Kconfig b/arch/nios2/Kconfig
index 7e95506e957a..f6c4b0f49997 100644
--- a/arch/nios2/Kconfig
+++ b/arch/nios2/Kconfig
@@ -4,7 +4,6 @@ config NIOS2
select ARCH_HAS_SYNC_DMA_FOR_CPU
select ARCH_HAS_SYNC_DMA_FOR_DEVICE
select ARCH_NO_SWAP
- select DMA_DIRECT_OPS
select TIMER_OF
select GENERIC_ATOMIC64
select GENERIC_CLOCKEVENTS
diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig
index 285f7d05c8ed..d0feebad5a8f 100644
--- a/arch/openrisc/Kconfig
+++ b/arch/openrisc/Kconfig
@@ -7,7 +7,6 @@
config OPENRISC
def_bool y
select ARCH_HAS_SYNC_DMA_FOR_DEVICE
- select DMA_DIRECT_OPS
select OF
select OF_EARLY_FLATTREE
select IRQ_DOMAIN
diff --git a/arch/openrisc/kernel/dma.c b/arch/openrisc/kernel/dma.c
index 159336adfa2f..f79457cb3741 100644
--- a/arch/openrisc/kernel/dma.c
+++ b/arch/openrisc/kernel/dma.c
@@ -89,7 +89,7 @@ arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
.mm = &init_mm
};
- page = alloc_pages_exact(size, gfp);
+ page = alloc_pages_exact(size, gfp | __GFP_ZERO);
if (!page)
return NULL;
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 92a339ee28b3..6e1b71da0e71 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -11,6 +11,7 @@ config PARISC
select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_STRICT_KERNEL_RWX
select ARCH_HAS_UBSAN_SANITIZE_ALL
+ select ARCH_NO_SG_CHAIN
select ARCH_SUPPORTS_MEMORY_FAILURE
select RTC_CLASS
select RTC_DRV_GENERIC
@@ -184,7 +185,6 @@ config PA11
depends on PA7000 || PA7100LC || PA7200 || PA7300LC
select ARCH_HAS_SYNC_DMA_FOR_CPU
select ARCH_HAS_SYNC_DMA_FOR_DEVICE
- select DMA_DIRECT_OPS
select DMA_NONCOHERENT_CACHE_SYNC
config PREFETCH
diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c
index 04c48f1ef3fb..239162355b58 100644
--- a/arch/parisc/kernel/pci-dma.c
+++ b/arch/parisc/kernel/pci-dma.c
@@ -404,7 +404,7 @@ static void *pcxl_dma_alloc(struct device *dev, size_t size,
order = get_order(size);
size = 1 << (order + PAGE_SHIFT);
vaddr = pcxl_alloc_range(size);
- paddr = __get_free_pages(flag, order);
+ paddr = __get_free_pages(flag | __GFP_ZERO, order);
flush_kernel_dcache_range(paddr, size);
paddr = __pa(paddr);
map_uncached_pages(vaddr, size, paddr);
@@ -429,7 +429,7 @@ static void *pcx_dma_alloc(struct device *dev, size_t size,
if ((attrs & DMA_ATTR_NON_CONSISTENT) == 0)
return NULL;
- addr = (void *)__get_free_pages(flag, get_order(size));
+ addr = (void *)__get_free_pages(flag | __GFP_ZERO, get_order(size));
if (addr)
*dma_handle = (dma_addr_t)virt_to_phys(addr);
diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c
index 2b108ee3b217..f2cf86ac279b 100644
--- a/arch/parisc/kernel/setup.c
+++ b/arch/parisc/kernel/setup.c
@@ -99,10 +99,6 @@ void __init dma_ops_init(void)
case pcxl2:
pa7300lc_init();
- case pcxl: /* falls through */
- case pcxs:
- case pcxt:
- hppa_dma_ops = &dma_direct_ops;
break;
default:
break;
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index a1e858e42ada..50f27a656051 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -139,7 +139,6 @@ config PPC
select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_MEMBARRIER_CALLBACKS
select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE && PPC64
- select ARCH_HAS_SG_CHAIN
select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) && !RELOCATABLE && !HIBERNATION)
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
select ARCH_HAS_UACCESS_FLUSHCACHE if PPC64
diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h
index dacd0f93f2b2..ebf66809f2d3 100644
--- a/arch/powerpc/include/asm/dma-mapping.h
+++ b/arch/powerpc/include/asm/dma-mapping.h
@@ -103,7 +103,6 @@ static inline void set_dma_offset(struct device *dev, dma_addr_t off)
}
#define HAVE_ARCH_DMA_SET_MASK 1
-extern int dma_set_mask(struct device *dev, u64 dma_mask);
extern u64 __dma_get_required_mask(struct device *dev);
diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index e847ff69cb2b..17524d222a7b 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -143,8 +143,6 @@ struct scatterlist;
#ifdef CONFIG_PPC64
-#define IOMMU_MAPPING_ERROR (~(dma_addr_t)0x0)
-
static inline void set_iommu_table_base(struct device *dev,
struct iommu_table *base)
{
@@ -239,8 +237,6 @@ static inline void iommu_del_device(struct device *dev)
}
#endif /* !CONFIG_IOMMU_API */
-int dma_iommu_mapping_error(struct device *dev, dma_addr_t dma_addr);
-
#else
static inline void *get_iommu_table_base(struct device *dev)
diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c
index 2ca6cfaebf65..9c9bcaae2f75 100644
--- a/arch/powerpc/kernel/dma-iommu.c
+++ b/arch/powerpc/kernel/dma-iommu.c
@@ -105,11 +105,6 @@ static u64 dma_iommu_get_required_mask(struct device *dev)
return mask;
}
-int dma_iommu_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
- return dma_addr == IOMMU_MAPPING_ERROR;
-}
-
struct dma_map_ops dma_iommu_ops = {
.alloc = dma_iommu_alloc_coherent,
.free = dma_iommu_free_coherent,
@@ -120,5 +115,4 @@ struct dma_map_ops dma_iommu_ops = {
.map_page = dma_iommu_map_page,
.unmap_page = dma_iommu_unmap_page,
.get_required_mask = dma_iommu_get_required_mask,
- .mapping_error = dma_iommu_mapping_error,
};
diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c
index 678811abccfc..7d5fc9751622 100644
--- a/arch/powerpc/kernel/dma-swiotlb.c
+++ b/arch/powerpc/kernel/dma-swiotlb.c
@@ -50,16 +50,15 @@ const struct dma_map_ops powerpc_swiotlb_dma_ops = {
.alloc = __dma_nommu_alloc_coherent,
.free = __dma_nommu_free_coherent,
.mmap = dma_nommu_mmap_coherent,
- .map_sg = swiotlb_map_sg_attrs,
- .unmap_sg = swiotlb_unmap_sg_attrs,
+ .map_sg = dma_direct_map_sg,
+ .unmap_sg = dma_direct_unmap_sg,
.dma_supported = swiotlb_dma_supported,
- .map_page = swiotlb_map_page,
- .unmap_page = swiotlb_unmap_page,
- .sync_single_for_cpu = swiotlb_sync_single_for_cpu,
- .sync_single_for_device = swiotlb_sync_single_for_device,
- .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
- .sync_sg_for_device = swiotlb_sync_sg_for_device,
- .mapping_error = dma_direct_mapping_error,
+ .map_page = dma_direct_map_page,
+ .unmap_page = dma_direct_unmap_page,
+ .sync_single_for_cpu = dma_direct_sync_single_for_cpu,
+ .sync_single_for_device = dma_direct_sync_single_for_device,
+ .sync_sg_for_cpu = dma_direct_sync_sg_for_cpu,
+ .sync_sg_for_device = dma_direct_sync_sg_for_device,
.get_required_mask = swiotlb_powerpc_get_required,
};
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 9d5d109f15c0..d0625480b59e 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -198,11 +198,11 @@ static unsigned long iommu_range_alloc(struct device *dev,
if (unlikely(npages == 0)) {
if (printk_ratelimit())
WARN_ON(1);
- return IOMMU_MAPPING_ERROR;
+ return DMA_MAPPING_ERROR;
}
if (should_fail_iommu(dev))
- return IOMMU_MAPPING_ERROR;
+ return DMA_MAPPING_ERROR;
/*
* We don't need to disable preemption here because any CPU can
@@ -278,7 +278,7 @@ again:
} else {
/* Give up */
spin_unlock_irqrestore(&(pool->lock), flags);
- return IOMMU_MAPPING_ERROR;
+ return DMA_MAPPING_ERROR;
}
}
@@ -310,13 +310,13 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
unsigned long attrs)
{
unsigned long entry;
- dma_addr_t ret = IOMMU_MAPPING_ERROR;
+ dma_addr_t ret = DMA_MAPPING_ERROR;
int build_fail;
entry = iommu_range_alloc(dev, tbl, npages, NULL, mask, align_order);
- if (unlikely(entry == IOMMU_MAPPING_ERROR))
- return IOMMU_MAPPING_ERROR;
+ if (unlikely(entry == DMA_MAPPING_ERROR))
+ return DMA_MAPPING_ERROR;
entry += tbl->it_offset; /* Offset into real TCE table */
ret = entry << tbl->it_page_shift; /* Set the return dma address */
@@ -328,12 +328,12 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
/* tbl->it_ops->set() only returns non-zero for transient errors.
* Clean up the table bitmap in this case and return
- * IOMMU_MAPPING_ERROR. For all other errors the functionality is
+ * DMA_MAPPING_ERROR. For all other errors the functionality is
* not altered.
*/
if (unlikely(build_fail)) {
__iommu_free(tbl, ret, npages);
- return IOMMU_MAPPING_ERROR;
+ return DMA_MAPPING_ERROR;
}
/* Flush/invalidate TLB caches if necessary */
@@ -478,7 +478,7 @@ int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl,
DBG(" - vaddr: %lx, size: %lx\n", vaddr, slen);
/* Handle failure */
- if (unlikely(entry == IOMMU_MAPPING_ERROR)) {
+ if (unlikely(entry == DMA_MAPPING_ERROR)) {
if (!(attrs & DMA_ATTR_NO_WARN) &&
printk_ratelimit())
dev_info(dev, "iommu_alloc failed, tbl %p "
@@ -545,7 +545,7 @@ int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl,
*/
if (outcount < incount) {
outs = sg_next(outs);
- outs->dma_address = IOMMU_MAPPING_ERROR;
+ outs->dma_address = DMA_MAPPING_ERROR;
outs->dma_length = 0;
}
@@ -563,7 +563,7 @@ int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl,
npages = iommu_num_pages(s->dma_address, s->dma_length,
IOMMU_PAGE_SIZE(tbl));
__iommu_free(tbl, vaddr, npages);
- s->dma_address = IOMMU_MAPPING_ERROR;
+ s->dma_address = DMA_MAPPING_ERROR;
s->dma_length = 0;
}
if (s == outs)
@@ -777,7 +777,7 @@ dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl,
unsigned long mask, enum dma_data_direction direction,
unsigned long attrs)
{
- dma_addr_t dma_handle = IOMMU_MAPPING_ERROR;
+ dma_addr_t dma_handle = DMA_MAPPING_ERROR;
void *vaddr;
unsigned long uaddr;
unsigned int npages, align;
@@ -797,7 +797,7 @@ dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl,
dma_handle = iommu_alloc(dev, tbl, vaddr, npages, direction,
mask >> tbl->it_page_shift, align,
attrs);
- if (dma_handle == IOMMU_MAPPING_ERROR) {
+ if (dma_handle == DMA_MAPPING_ERROR) {
if (!(attrs & DMA_ATTR_NO_WARN) &&
printk_ratelimit()) {
dev_info(dev, "iommu_alloc failed, tbl %p "
@@ -869,7 +869,7 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl,
io_order = get_iommu_order(size, tbl);
mapping = iommu_alloc(dev, tbl, ret, nio_pages, DMA_BIDIRECTIONAL,
mask >> tbl->it_page_shift, io_order, 0);
- if (mapping == IOMMU_MAPPING_ERROR) {
+ if (mapping == DMA_MAPPING_ERROR) {
free_pages((unsigned long)ret, order);
return NULL;
}
diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c
index 12352a58072a..af2a3c15e0ec 100644
--- a/arch/powerpc/platforms/cell/iommu.c
+++ b/arch/powerpc/platforms/cell/iommu.c
@@ -654,7 +654,6 @@ static const struct dma_map_ops dma_iommu_fixed_ops = {
.dma_supported = dma_suported_and_switch,
.map_page = dma_fixed_map_page,
.unmap_page = dma_fixed_unmap_page,
- .mapping_error = dma_iommu_mapping_error,
};
static void cell_dma_dev_setup(struct device *dev)
diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c
index 93cc9eec6601..1fad4649735b 100644
--- a/arch/powerpc/platforms/pseries/vio.c
+++ b/arch/powerpc/platforms/pseries/vio.c
@@ -519,7 +519,7 @@ static dma_addr_t vio_dma_iommu_map_page(struct device *dev, struct page *page,
{
struct vio_dev *viodev = to_vio_dev(dev);
struct iommu_table *tbl;
- dma_addr_t ret = IOMMU_MAPPING_ERROR;
+ dma_addr_t ret = DMA_MAPPING_ERROR;
tbl = get_iommu_table_base(dev);
if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl)))) {
@@ -625,7 +625,6 @@ static const struct dma_map_ops vio_dma_mapping_ops = {
.unmap_page = vio_dma_iommu_unmap_page,
.dma_supported = vio_dma_iommu_dma_supported,
.get_required_mask = vio_dma_get_required_mask,
- .mapping_error = dma_iommu_mapping_error,
};
/**
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index ee833e6f5ccb..106539bb914e 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -19,7 +19,6 @@ config RISCV
select ARCH_WANT_FRAME_POINTERS
select CLONE_BACKWARDS
select COMMON_CLK
- select DMA_DIRECT_OPS
select GENERIC_CLOCKEVENTS
select GENERIC_CPU_DEVICES
select GENERIC_IRQ_SHOW
diff --git a/arch/riscv/include/asm/dma-mapping.h b/arch/riscv/include/asm/dma-mapping.h
deleted file mode 100644
index 8facc1c8fa05..000000000000
--- a/arch/riscv/include/asm/dma-mapping.h
+++ /dev/null
@@ -1,15 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#ifndef _RISCV_ASM_DMA_MAPPING_H
-#define _RISCV_ASM_DMA_MAPPING_H 1
-
-#ifdef CONFIG_SWIOTLB
-#include <linux/swiotlb.h>
-static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
-{
- return &swiotlb_dma_ops;
-}
-#else
-#include <asm-generic/dma-mapping.h>
-#endif /* CONFIG_SWIOTLB */
-
-#endif /* _RISCV_ASM_DMA_MAPPING_H */
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 5173366af8f3..21d271d04ca6 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -73,7 +73,6 @@ config S390
select ARCH_HAS_KCOV
select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_SET_MEMORY
- select ARCH_HAS_SG_CHAIN
select ARCH_HAS_STRICT_KERNEL_RWX
select ARCH_HAS_STRICT_MODULE_RWX
select ARCH_HAS_UBSAN_SANITIZE_ALL
@@ -140,7 +139,6 @@ config S390
select HAVE_COPY_THREAD_TLS
select HAVE_DEBUG_KMEMLEAK
select HAVE_DMA_CONTIGUOUS
- select DMA_DIRECT_OPS
select HAVE_DYNAMIC_FTRACE
select HAVE_DYNAMIC_FTRACE_WITH_REGS
select HAVE_EFFICIENT_UNALIGNED_ACCESS
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index d387a0fbdd7e..9e52d1527f71 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -15,8 +15,6 @@
#include <linux/pci.h>
#include <asm/pci_dma.h>
-#define S390_MAPPING_ERROR (~(dma_addr_t) 0x0)
-
static struct kmem_cache *dma_region_table_cache;
static struct kmem_cache *dma_page_table_cache;
static int s390_iommu_strict;
@@ -301,7 +299,7 @@ static dma_addr_t dma_alloc_address(struct device *dev, int size)
out_error:
spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
- return S390_MAPPING_ERROR;
+ return DMA_MAPPING_ERROR;
}
static void dma_free_address(struct device *dev, dma_addr_t dma_addr, int size)
@@ -349,7 +347,7 @@ static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page,
/* This rounds up number of pages based on size and offset */
nr_pages = iommu_num_pages(pa, size, PAGE_SIZE);
dma_addr = dma_alloc_address(dev, nr_pages);
- if (dma_addr == S390_MAPPING_ERROR) {
+ if (dma_addr == DMA_MAPPING_ERROR) {
ret = -ENOSPC;
goto out_err;
}
@@ -372,7 +370,7 @@ out_free:
out_err:
zpci_err("map error:\n");
zpci_err_dma(ret, pa);
- return S390_MAPPING_ERROR;
+ return DMA_MAPPING_ERROR;
}
static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr,
@@ -406,7 +404,7 @@ static void *s390_dma_alloc(struct device *dev, size_t size,
dma_addr_t map;
size = PAGE_ALIGN(size);
- page = alloc_pages(flag, get_order(size));
+ page = alloc_pages(flag | __GFP_ZERO, get_order(size));
if (!page)
return NULL;
@@ -449,7 +447,7 @@ static int __s390_dma_map_sg(struct device *dev, struct scatterlist *sg,
int ret;
dma_addr_base = dma_alloc_address(dev, nr_pages);
- if (dma_addr_base == S390_MAPPING_ERROR)
+ if (dma_addr_base == DMA_MAPPING_ERROR)
return -ENOMEM;
dma_addr = dma_addr_base;
@@ -496,7 +494,7 @@ static int s390_dma_map_sg(struct device *dev, struct scatterlist *sg,
for (i = 1; i < nr_elements; i++) {
s = sg_next(s);
- s->dma_address = S390_MAPPING_ERROR;
+ s->dma_address = DMA_MAPPING_ERROR;
s->dma_length = 0;
if (s->offset || (size & ~PAGE_MASK) ||
@@ -546,11 +544,6 @@ static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
}
}
-static int s390_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
- return dma_addr == S390_MAPPING_ERROR;
-}
-
int zpci_dma_init_device(struct zpci_dev *zdev)
{
int rc;
@@ -675,7 +668,6 @@ const struct dma_map_ops s390_pci_dma_ops = {
.unmap_sg = s390_dma_unmap_sg,
.map_page = s390_dma_map_pages,
.unmap_page = s390_dma_unmap_pages,
- .mapping_error = s390_mapping_error,
/* dma_supported is unconditionally true without a callback */
};
EXPORT_SYMBOL_GPL(s390_pci_dma_ops);
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index f82a4da7adf3..10fd4e9c454b 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -7,7 +7,6 @@ config SUPERH
select ARCH_NO_COHERENT_DMA_MMAP if !MMU
select HAVE_PATA_PLATFORM
select CLKDEV_LOOKUP
- select DMA_DIRECT_OPS
select HAVE_IDE if HAS_IOPORT_MAP
select HAVE_MEMBLOCK_NODE_MAP
select ARCH_DISCARD_MEMBLOCK
diff --git a/arch/sh/boards/mach-dreamcast/Makefile b/arch/sh/boards/mach-dreamcast/Makefile
index 8692cb312ace..37b2452206aa 100644
--- a/arch/sh/boards/mach-dreamcast/Makefile
+++ b/arch/sh/boards/mach-dreamcast/Makefile
@@ -3,5 +3,5 @@
# Makefile for the Sega Dreamcast specific parts of the kernel
#
-obj-y := setup.o irq.o rtc.o
-
+obj-y := setup.o irq.o
+obj-$(CONFIG_RTC_DRV_GENERIC) += rtc.o
diff --git a/arch/sh/boards/mach-dreamcast/rtc.c b/arch/sh/boards/mach-dreamcast/rtc.c
index e468dcce1927..7873cd27e4e0 100644
--- a/arch/sh/boards/mach-dreamcast/rtc.c
+++ b/arch/sh/boards/mach-dreamcast/rtc.c
@@ -9,8 +9,9 @@
*/
#include <linux/time.h>
-#include <asm/rtc.h>
-#include <asm/io.h>
+#include <linux/rtc.h>
+#include <linux/io.h>
+#include <linux/platform_device.h>
/* The AICA RTC has an Epoch of 1/1/1950, so we must subtract 20 years (in
seconds) to get the standard Unix Epoch when getting the time, and add
@@ -24,13 +25,15 @@
/**
* aica_rtc_gettimeofday - Get the time from the AICA RTC
- * @ts: pointer to resulting timespec
+ * @dev: the RTC device (ignored)
+ * @tm: pointer to resulting RTC time structure
*
* Grabs the current RTC seconds counter and adjusts it to the Unix Epoch.
*/
-static void aica_rtc_gettimeofday(struct timespec *ts)
+static int aica_rtc_gettimeofday(struct device *dev, struct rtc_time *tm)
{
unsigned long val1, val2;
+ time64_t t;
do {
val1 = ((__raw_readl(AICA_RTC_SECS_H) & 0xffff) << 16) |
@@ -40,22 +43,26 @@ static void aica_rtc_gettimeofday(struct timespec *ts)
(__raw_readl(AICA_RTC_SECS_L) & 0xffff);
} while (val1 != val2);
- ts->tv_sec = val1 - TWENTY_YEARS;
+ /* normalize to 1970..2106 time range */
+ t = (u32)(val1 - TWENTY_YEARS);
- /* Can't get nanoseconds with just a seconds counter. */
- ts->tv_nsec = 0;
+ rtc_time64_to_tm(t, tm);
+
+ return 0;
}
/**
* aica_rtc_settimeofday - Set the AICA RTC to the current time
- * @secs: contains the time_t to set
+ * @dev: the RTC device (ignored)
+ * @tm: pointer to new RTC time structure
*
* Adjusts the given @tv to the AICA Epoch and sets the RTC seconds counter.
*/
-static int aica_rtc_settimeofday(const time_t secs)
+static int aica_rtc_settimeofday(struct device *dev, struct rtc_time *tm)
{
unsigned long val1, val2;
- unsigned long adj = secs + TWENTY_YEARS;
+ time64_t secs = rtc_tm_to_time64(tm);
+ u32 adj = secs + TWENTY_YEARS;
do {
__raw_writel((adj & 0xffff0000) >> 16, AICA_RTC_SECS_H);
@@ -71,9 +78,19 @@ static int aica_rtc_settimeofday(const time_t secs)
return 0;
}
-void aica_time_init(void)
+static const struct rtc_class_ops rtc_generic_ops = {
+ .read_time = aica_rtc_gettimeofday,
+ .set_time = aica_rtc_settimeofday,
+};
+
+static int __init aica_time_init(void)
{
- rtc_sh_get_time = aica_rtc_gettimeofday;
- rtc_sh_set_time = aica_rtc_settimeofday;
-}
+ struct platform_device *pdev;
+
+ pdev = platform_device_register_data(NULL, "rtc-generic", -1,
+ &rtc_generic_ops,
+ sizeof(rtc_generic_ops));
+ return PTR_ERR_OR_ZERO(pdev);
+}
+arch_initcall(aica_time_init);
diff --git a/arch/sh/boards/mach-dreamcast/setup.c b/arch/sh/boards/mach-dreamcast/setup.c
index 54bbdb32f2d3..2d966c1c2cc1 100644
--- a/arch/sh/boards/mach-dreamcast/setup.c
+++ b/arch/sh/boards/mach-dreamcast/setup.c
@@ -29,7 +29,6 @@
static void __init dreamcast_setup(char **cmdline_p)
{
- board_time_init = aica_time_init;
}
static struct sh_machine_vector mv_dreamcast __initmv = {
diff --git a/arch/sh/boards/mach-ecovec24/setup.c b/arch/sh/boards/mach-ecovec24/setup.c
index 46467f82bf2c..22b4106b8084 100644
--- a/arch/sh/boards/mach-ecovec24/setup.c
+++ b/arch/sh/boards/mach-ecovec24/setup.c
@@ -693,13 +693,20 @@ static struct gpiod_lookup_table sdhi0_power_gpiod_table = {
},
};
+static struct gpiod_lookup_table sdhi0_gpio_table = {
+ .dev_id = "sh_mobile_sdhi.0",
+ .table = {
+ /* Card detect */
+ GPIO_LOOKUP("sh7724_pfc", GPIO_PTY7, "cd", GPIO_ACTIVE_LOW),
+ { },
+ },
+};
+
static struct tmio_mmc_data sdhi0_info = {
.chan_priv_tx = (void *)SHDMA_SLAVE_SDHI0_TX,
.chan_priv_rx = (void *)SHDMA_SLAVE_SDHI0_RX,
.capabilities = MMC_CAP_SDIO_IRQ | MMC_CAP_POWER_OFF_CARD |
MMC_CAP_NEEDS_POLL,
- .flags = TMIO_MMC_USE_GPIO_CD,
- .cd_gpio = GPIO_PTY7,
};
static struct resource sdhi0_resources[] = {
@@ -732,8 +739,15 @@ static struct tmio_mmc_data sdhi1_info = {
.chan_priv_rx = (void *)SHDMA_SLAVE_SDHI1_RX,
.capabilities = MMC_CAP_SDIO_IRQ | MMC_CAP_POWER_OFF_CARD |
MMC_CAP_NEEDS_POLL,
- .flags = TMIO_MMC_USE_GPIO_CD,
- .cd_gpio = GPIO_PTW7,
+};
+
+static struct gpiod_lookup_table sdhi1_gpio_table = {
+ .dev_id = "sh_mobile_sdhi.1",
+ .table = {
+ /* Card detect */
+ GPIO_LOOKUP("sh7724_pfc", GPIO_PTW7, "cd", GPIO_ACTIVE_LOW),
+ { },
+ },
};
static struct resource sdhi1_resources[] = {
@@ -773,9 +787,19 @@ static struct mmc_spi_platform_data mmc_spi_info = {
.caps2 = MMC_CAP2_RO_ACTIVE_HIGH,
.ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34, /* 3.3V only */
.setpower = mmc_spi_setpower,
- .flags = MMC_SPI_USE_CD_GPIO | MMC_SPI_USE_RO_GPIO,
- .cd_gpio = GPIO_PTY7,
- .ro_gpio = GPIO_PTY6,
+};
+
+static struct gpiod_lookup_table mmc_spi_gpio_table = {
+ .dev_id = "mmc_spi.0", /* device "mmc_spi" @ CS0 */
+ .table = {
+ /* Card detect */
+ GPIO_LOOKUP_IDX("sh7724_pfc", GPIO_PTY7, NULL, 0,
+ GPIO_ACTIVE_LOW),
+ /* Write protect */
+ GPIO_LOOKUP_IDX("sh7724_pfc", GPIO_PTY6, NULL, 1,
+ GPIO_ACTIVE_HIGH),
+ { },
+ },
};
static struct spi_board_info spi_bus[] = {
@@ -1279,6 +1303,7 @@ static int __init arch_setup(void)
gpio_request(GPIO_PTB6, NULL); /* 3.3V power control */
gpio_direction_output(GPIO_PTB6, 0); /* disable power by default */
+ gpiod_add_lookup_table(&mmc_spi_gpio_table);
spi_register_board_info(spi_bus, ARRAY_SIZE(spi_bus));
#endif
@@ -1431,6 +1456,10 @@ static int __init arch_setup(void)
gpiod_add_lookup_table(&cn12_power_gpiod_table);
#if defined(CONFIG_MMC_SDHI) || defined(CONFIG_MMC_SDHI_MODULE)
gpiod_add_lookup_table(&sdhi0_power_gpiod_table);
+ gpiod_add_lookup_table(&sdhi0_gpio_table);
+#if !defined(CONFIG_MMC_SH_MMCIF) && !defined(CONFIG_MMC_SH_MMCIF_MODULE)
+ gpiod_add_lookup_table(&sdhi1_gpio_table);
+#endif
#endif
return platform_add_devices(ecovec_devices,
diff --git a/arch/sh/boards/mach-sh03/Makefile b/arch/sh/boards/mach-sh03/Makefile
index ab52d5a2481a..f89c25c6a39c 100644
--- a/arch/sh/boards/mach-sh03/Makefile
+++ b/arch/sh/boards/mach-sh03/Makefile
@@ -3,4 +3,5 @@
# Makefile for the Interface (CTP/PCI-SH03) specific parts of the kernel
#
-obj-y := setup.o rtc.o
+obj-y := setup.o
+obj-$(CONFIG_RTC_DRV_GENERIC) += rtc.o
diff --git a/arch/sh/boards/mach-sh03/rtc.c b/arch/sh/boards/mach-sh03/rtc.c
index dc3d50e3b7a2..8b23ed7c201c 100644
--- a/arch/sh/boards/mach-sh03/rtc.c
+++ b/arch/sh/boards/mach-sh03/rtc.c
@@ -13,8 +13,9 @@
#include <linux/bcd.h>
#include <linux/rtc.h>
#include <linux/spinlock.h>
-#include <asm/io.h>
-#include <asm/rtc.h>
+#include <linux/io.h>
+#include <linux/rtc.h>
+#include <linux/platform_device.h>
#define RTC_BASE 0xb0000000
#define RTC_SEC1 (RTC_BASE + 0)
@@ -38,7 +39,7 @@
static DEFINE_SPINLOCK(sh03_rtc_lock);
-unsigned long get_cmos_time(void)
+static int sh03_rtc_gettimeofday(struct device *dev, struct rtc_time *tm)
{
unsigned int year, mon, day, hour, min, sec;
@@ -75,17 +76,18 @@ unsigned long get_cmos_time(void)
}
spin_unlock(&sh03_rtc_lock);
- return mktime(year, mon, day, hour, min, sec);
-}
-void sh03_rtc_gettimeofday(struct timespec *tv)
-{
+ tm->tm_sec = sec;
+ tm->tm_min = min;
+ tm->tm_hour = hour;
+ tm->tm_mday = day;
+ tm->tm_mon = mon;
+ tm->tm_year = year - 1900;
- tv->tv_sec = get_cmos_time();
- tv->tv_nsec = 0;
+ return 0;
}
-static int set_rtc_mmss(unsigned long nowtime)
+static int set_rtc_mmss(struct rtc_time *tm)
{
int retval = 0;
int real_seconds, real_minutes, cmos_minutes;
@@ -97,8 +99,8 @@ static int set_rtc_mmss(unsigned long nowtime)
if (!(__raw_readb(RTC_CTL) & RTC_BUSY))
break;
cmos_minutes = (__raw_readb(RTC_MIN1) & 0xf) + (__raw_readb(RTC_MIN10) & 0xf) * 10;
- real_seconds = nowtime % 60;
- real_minutes = nowtime / 60;
+ real_seconds = tm->tm_sec;
+ real_minutes = tm->tm_min;
if (((abs(real_minutes - cmos_minutes) + 15)/30) & 1)
real_minutes += 30; /* correct for half hour time zone */
real_minutes %= 60;
@@ -112,22 +114,31 @@ static int set_rtc_mmss(unsigned long nowtime)
printk_once(KERN_NOTICE
"set_rtc_mmss: can't update from %d to %d\n",
cmos_minutes, real_minutes);
- retval = -1;
+ retval = -EINVAL;
}
spin_unlock(&sh03_rtc_lock);
return retval;
}
-int sh03_rtc_settimeofday(const time_t secs)
+int sh03_rtc_settimeofday(struct device *dev, struct rtc_time *tm)
{
- unsigned long nowtime = secs;
-
- return set_rtc_mmss(nowtime);
+ return set_rtc_mmss(tm);
}
-void sh03_time_init(void)
+static const struct rtc_class_ops rtc_generic_ops = {
+ .read_time = sh03_rtc_gettimeofday,
+ .set_time = sh03_rtc_settimeofday,
+};
+
+static int __init sh03_time_init(void)
{
- rtc_sh_get_time = sh03_rtc_gettimeofday;
- rtc_sh_set_time = sh03_rtc_settimeofday;
+ struct platform_device *pdev;
+
+ pdev = platform_device_register_data(NULL, "rtc-generic", -1,
+ &rtc_generic_ops,
+ sizeof(rtc_generic_ops));
+
+ return PTR_ERR_OR_ZERO(pdev);
}
+arch_initcall(sh03_time_init);
diff --git a/arch/sh/boards/mach-sh03/setup.c b/arch/sh/boards/mach-sh03/setup.c
index 85e7059a77e9..3901b6031ad5 100644
--- a/arch/sh/boards/mach-sh03/setup.c
+++ b/arch/sh/boards/mach-sh03/setup.c
@@ -22,14 +22,6 @@ static void __init init_sh03_IRQ(void)
plat_irq_setup_pins(IRQ_MODE_IRQ);
}
-/* arch/sh/boards/sh03/rtc.c */
-void sh03_time_init(void);
-
-static void __init sh03_setup(char **cmdline_p)
-{
- board_time_init = sh03_time_init;
-}
-
static struct resource cf_ide_resources[] = {
[0] = {
.start = 0x1f0,
@@ -101,6 +93,5 @@ device_initcall(sh03_devices_setup);
static struct sh_machine_vector mv_sh03 __initmv = {
.mv_name = "Interface (CTP/PCI-SH03)",
- .mv_setup = sh03_setup,
.mv_init_irq = init_sh03_IRQ,
};
diff --git a/arch/sh/boards/of-generic.c b/arch/sh/boards/of-generic.c
index c24970e8790e..958f46da3a79 100644
--- a/arch/sh/boards/of-generic.c
+++ b/arch/sh/boards/of-generic.c
@@ -114,18 +114,10 @@ static void __init sh_of_mem_reserve(void)
early_init_fdt_scan_reserved_mem();
}
-static void __init sh_of_time_init(void)
-{
- pr_info("SH generic board support: scanning for clocksource devices\n");
- timer_probe();
-}
-
static void __init sh_of_setup(char **cmdline_p)
{
struct device_node *root;
- board_time_init = sh_of_time_init;
-
sh_mv.mv_name = "Unknown SH model";
root = of_find_node_by_path("/");
if (root) {
diff --git a/arch/sh/configs/dreamcast_defconfig b/arch/sh/configs/dreamcast_defconfig
index 3f08dc54480b..1d27666c029f 100644
--- a/arch/sh/configs/dreamcast_defconfig
+++ b/arch/sh/configs/dreamcast_defconfig
@@ -70,3 +70,5 @@ CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_HUGETLBFS=y
# CONFIG_CRYPTO_ANSI_CPRNG is not set
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_GENERIC=y
diff --git a/arch/sh/configs/sh03_defconfig b/arch/sh/configs/sh03_defconfig
index 2156223405a1..489ffdfb1517 100644
--- a/arch/sh/configs/sh03_defconfig
+++ b/arch/sh/configs/sh03_defconfig
@@ -130,3 +130,5 @@ CONFIG_CRYPTO_SHA1=y
CONFIG_CRYPTO_DEFLATE=y
# CONFIG_CRYPTO_ANSI_CPRNG is not set
CONFIG_CRC_CCITT=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_GENERIC=y
diff --git a/arch/sh/include/asm/rtc.h b/arch/sh/include/asm/rtc.h
index c63555ee1255..69dbae2949b0 100644
--- a/arch/sh/include/asm/rtc.h
+++ b/arch/sh/include/asm/rtc.h
@@ -3,9 +3,6 @@
#define _ASM_RTC_H
void time_init(void);
-extern void (*board_time_init)(void);
-extern void (*rtc_sh_get_time)(struct timespec *);
-extern int (*rtc_sh_set_time)(const time_t);
#define RTC_CAP_4_DIGIT_YEAR (1 << 0)
diff --git a/arch/sh/include/mach-dreamcast/mach/sysasic.h b/arch/sh/include/mach-dreamcast/mach/sysasic.h
index da10aeff22f3..ed69ce7f2030 100644
--- a/arch/sh/include/mach-dreamcast/mach/sysasic.h
+++ b/arch/sh/include/mach-dreamcast/mach/sysasic.h
@@ -41,7 +41,6 @@
/* arch/sh/boards/mach-dreamcast/irq.c */
extern int systemasic_irq_demux(int);
extern void systemasic_irq_init(void);
-extern void aica_time_init(void);
#endif /* __ASM_SH_DREAMCAST_SYSASIC_H */
diff --git a/arch/sh/kernel/time.c b/arch/sh/kernel/time.c
index 6742d6e3af17..e16b2cd269a3 100644
--- a/arch/sh/kernel/time.c
+++ b/arch/sh/kernel/time.c
@@ -19,77 +19,6 @@
#include <asm/clock.h>
#include <asm/rtc.h>
-/* Dummy RTC ops */
-static void null_rtc_get_time(struct timespec *tv)
-{
- tv->tv_sec = mktime(2000, 1, 1, 0, 0, 0);
- tv->tv_nsec = 0;
-}
-
-static int null_rtc_set_time(const time_t secs)
-{
- return 0;
-}
-
-void (*rtc_sh_get_time)(struct timespec *) = null_rtc_get_time;
-int (*rtc_sh_set_time)(const time_t) = null_rtc_set_time;
-
-void read_persistent_clock(struct timespec *ts)
-{
- rtc_sh_get_time(ts);
-}
-
-#ifdef CONFIG_GENERIC_CMOS_UPDATE
-int update_persistent_clock(struct timespec now)
-{
- return rtc_sh_set_time(now.tv_sec);
-}
-#endif
-
-static int rtc_generic_get_time(struct device *dev, struct rtc_time *tm)
-{
- struct timespec tv;
-
- rtc_sh_get_time(&tv);
- rtc_time_to_tm(tv.tv_sec, tm);
- return 0;
-}
-
-static int rtc_generic_set_time(struct device *dev, struct rtc_time *tm)
-{
- unsigned long secs;
-
- rtc_tm_to_time(tm, &secs);
- if ((rtc_sh_set_time == null_rtc_set_time) ||
- (rtc_sh_set_time(secs) < 0))
- return -EOPNOTSUPP;
-
- return 0;
-}
-
-static const struct rtc_class_ops rtc_generic_ops = {
- .read_time = rtc_generic_get_time,
- .set_time = rtc_generic_set_time,
-};
-
-static int __init rtc_generic_init(void)
-{
- struct platform_device *pdev;
-
- if (rtc_sh_get_time == null_rtc_get_time)
- return -ENODEV;
-
- pdev = platform_device_register_data(NULL, "rtc-generic", -1,
- &rtc_generic_ops,
- sizeof(rtc_generic_ops));
-
-
- return PTR_ERR_OR_ZERO(pdev);
-}
-device_initcall(rtc_generic_init);
-
-void (*board_time_init)(void);
-
static void __init sh_late_time_init(void)
{
/*
@@ -107,8 +36,7 @@ static void __init sh_late_time_init(void)
void __init time_init(void)
{
- if (board_time_init)
- board_time_init();
+ timer_probe();
clk_init();
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 490b2c95c212..f5bb9ded1d18 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -40,7 +40,6 @@ config SPARC
select MODULES_USE_ELF_RELA
select ODD_RT_SIGACTION
select OLD_SIGSUSPEND
- select ARCH_HAS_SG_CHAIN
select CPU_NO_EFFICIENT_FFS
select LOCKDEP_SMALL if LOCKDEP
select NEED_DMA_MAP_STATE
@@ -49,7 +48,6 @@ config SPARC
config SPARC32
def_bool !64BIT
select ARCH_HAS_SYNC_DMA_FOR_CPU
- select DMA_DIRECT_OPS
select GENERIC_ATOMIC64
select CLZ_TAB
select HAVE_UID16
diff --git a/arch/sparc/include/asm/dma-mapping.h b/arch/sparc/include/asm/dma-mapping.h
index b0bb2fcaf1c9..ed32845bd2d2 100644
--- a/arch/sparc/include/asm/dma-mapping.h
+++ b/arch/sparc/include/asm/dma-mapping.h
@@ -2,9 +2,7 @@
#ifndef ___ASM_SPARC_DMA_MAPPING_H
#define ___ASM_SPARC_DMA_MAPPING_H
-#include <linux/scatterlist.h>
-#include <linux/mm.h>
-#include <linux/dma-debug.h>
+#include <asm/cpu_type.h>
extern const struct dma_map_ops *dma_ops;
@@ -14,11 +12,11 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
{
#ifdef CONFIG_SPARC_LEON
if (sparc_cpu_model == sparc_leon)
- return &dma_direct_ops;
+ return NULL;
#endif
#if defined(CONFIG_SPARC32) && defined(CONFIG_PCI)
if (bus == &pci_bus_type)
- return &dma_direct_ops;
+ return NULL;
#endif
return dma_ops;
}
diff --git a/arch/sparc/include/asm/dma.h b/arch/sparc/include/asm/dma.h
index a1d7c86917c6..462e7c794a09 100644
--- a/arch/sparc/include/asm/dma.h
+++ b/arch/sparc/include/asm/dma.h
@@ -91,54 +91,10 @@ extern int isa_dma_bridge_buggy;
#endif
#ifdef CONFIG_SPARC32
-
-/* Routines for data transfer buffers. */
struct device;
-struct scatterlist;
-
-struct sparc32_dma_ops {
- __u32 (*get_scsi_one)(struct device *, char *, unsigned long);
- void (*get_scsi_sgl)(struct device *, struct scatterlist *, int);
- void (*release_scsi_one)(struct device *, __u32, unsigned long);
- void (*release_scsi_sgl)(struct device *, struct scatterlist *,int);
-#ifdef CONFIG_SBUS
- int (*map_dma_area)(struct device *, dma_addr_t *, unsigned long, unsigned long, int);
- void (*unmap_dma_area)(struct device *, unsigned long, int);
-#endif
-};
-extern const struct sparc32_dma_ops *sparc32_dma_ops;
-
-#define mmu_get_scsi_one(dev,vaddr,len) \
- sparc32_dma_ops->get_scsi_one(dev, vaddr, len)
-#define mmu_get_scsi_sgl(dev,sg,sz) \
- sparc32_dma_ops->get_scsi_sgl(dev, sg, sz)
-#define mmu_release_scsi_one(dev,vaddr,len) \
- sparc32_dma_ops->release_scsi_one(dev, vaddr,len)
-#define mmu_release_scsi_sgl(dev,sg,sz) \
- sparc32_dma_ops->release_scsi_sgl(dev, sg, sz)
-
-#ifdef CONFIG_SBUS
-/*
- * mmu_map/unmap are provided by iommu/iounit; Invalid to call on IIep.
- *
- * The mmu_map_dma_area establishes two mappings in one go.
- * These mappings point to pages normally mapped at 'va' (linear address).
- * First mapping is for CPU visible address at 'a', uncached.
- * This is an alias, but it works because it is an uncached mapping.
- * Second mapping is for device visible address, or "bus" address.
- * The bus address is returned at '*pba'.
- *
- * These functions seem distinct, but are hard to split.
- * On sun4m, page attributes depend on the CPU type, so we have to
- * know if we are mapping RAM or I/O, so it has to be an additional argument
- * to a separate mapping function for CPU visible mappings.
- */
-#define sbus_map_dma_area(dev,pba,va,a,len) \
- sparc32_dma_ops->map_dma_area(dev, pba, va, a, len)
-#define sbus_unmap_dma_area(dev,ba,len) \
- sparc32_dma_ops->unmap_dma_area(dev, ba, len)
-#endif /* CONFIG_SBUS */
+unsigned long sparc_dma_alloc_resource(struct device *dev, size_t len);
+bool sparc_dma_free_resource(void *cpu_addr, size_t size);
#endif
#endif /* !(_ASM_SPARC_DMA_H) */
diff --git a/arch/sparc/include/asm/leon.h b/arch/sparc/include/asm/leon.h
index 8c01f0f6b1ed..c1e05e4ab9e3 100644
--- a/arch/sparc/include/asm/leon.h
+++ b/arch/sparc/include/asm/leon.h
@@ -254,4 +254,13 @@ extern int leon_ipi_irq;
#define _pfn_valid(pfn) ((pfn < last_valid_pfn) && (pfn >= PFN(phys_base)))
#define _SRMMU_PTE_PMASK_LEON 0xffffffff
+/*
+ * On LEON PCI Memory space is mapped 1:1 with physical address space.
+ *
+ * I/O space is located at low 64Kbytes in PCI I/O space. The I/O addresses
+ * are converted into CPU addresses to virtual addresses that are mapped with
+ * MMU to the PCI Host PCI I/O space window which are translated to the low
+ * 64Kbytes by the Host controller.
+ */
+
#endif
diff --git a/arch/sparc/include/asm/pci.h b/arch/sparc/include/asm/pci.h
index cad79a6ce0e4..cfec79bb1831 100644
--- a/arch/sparc/include/asm/pci.h
+++ b/arch/sparc/include/asm/pci.h
@@ -1,9 +1,54 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef ___ASM_SPARC_PCI_H
#define ___ASM_SPARC_PCI_H
-#if defined(__sparc__) && defined(__arch64__)
-#include <asm/pci_64.h>
+
+
+/* Can be used to override the logic in pci_scan_bus for skipping
+ * already-configured bus numbers - to be used for buggy BIOSes
+ * or architectures with incomplete PCI setup by the loader.
+ */
+#define pcibios_assign_all_busses() 0
+
+#define PCIBIOS_MIN_IO 0UL
+#define PCIBIOS_MIN_MEM 0UL
+
+#define PCI_IRQ_NONE 0xffffffff
+
+
+#ifdef CONFIG_SPARC64
+
+/* PCI IOMMU mapping bypass support. */
+
+/* PCI 64-bit addressing works for all slots on all controller
+ * types on sparc64. However, it requires that the device
+ * can drive enough of the 64 bits.
+ */
+#define PCI64_REQUIRED_MASK (~(u64)0)
+#define PCI64_ADDR_BASE 0xfffc000000000000UL
+
+/* Return the index of the PCI controller for device PDEV. */
+int pci_domain_nr(struct pci_bus *bus);
+static inline int pci_proc_domain(struct pci_bus *bus)
+{
+ return 1;
+}
+
+/* Platform support for /proc/bus/pci/X/Y mmap()s. */
+#define HAVE_PCI_MMAP
+#define arch_can_pci_mmap_io() 1
+#define HAVE_ARCH_PCI_GET_UNMAPPED_AREA
+#define get_pci_unmapped_area get_fb_unmapped_area
+
+#define HAVE_ARCH_PCI_RESOURCE_TO_USER
+#endif /* CONFIG_SPARC64 */
+
+#if defined(CONFIG_SPARC64) || defined(CONFIG_LEON_PCI)
+static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
+{
+ return PCI_IRQ_NONE;
+}
#else
-#include <asm/pci_32.h>
-#endif
+#include <asm-generic/pci.h>
#endif
+
+#endif /* ___ASM_SPARC_PCI_H */
diff --git a/arch/sparc/include/asm/pci_32.h b/arch/sparc/include/asm/pci_32.h
deleted file mode 100644
index cfc0ee9476c6..000000000000
--- a/arch/sparc/include/asm/pci_32.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __SPARC_PCI_H
-#define __SPARC_PCI_H
-
-#ifdef __KERNEL__
-
-#include <linux/dma-mapping.h>
-
-/* Can be used to override the logic in pci_scan_bus for skipping
- * already-configured bus numbers - to be used for buggy BIOSes
- * or architectures with incomplete PCI setup by the loader.
- */
-#define pcibios_assign_all_busses() 0
-
-#define PCIBIOS_MIN_IO 0UL
-#define PCIBIOS_MIN_MEM 0UL
-
-#define PCI_IRQ_NONE 0xffffffff
-
-#endif /* __KERNEL__ */
-
-#ifndef CONFIG_LEON_PCI
-/* generic pci stuff */
-#include <asm-generic/pci.h>
-#else
-/*
- * On LEON PCI Memory space is mapped 1:1 with physical address space.
- *
- * I/O space is located at low 64Kbytes in PCI I/O space. The I/O addresses
- * are converted into CPU addresses to virtual addresses that are mapped with
- * MMU to the PCI Host PCI I/O space window which are translated to the low
- * 64Kbytes by the Host controller.
- */
-
-static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
-{
- return PCI_IRQ_NONE;
-}
-#endif
-
-#endif /* __SPARC_PCI_H */
diff --git a/arch/sparc/include/asm/pci_64.h b/arch/sparc/include/asm/pci_64.h
deleted file mode 100644
index fac77813402c..000000000000
--- a/arch/sparc/include/asm/pci_64.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __SPARC64_PCI_H
-#define __SPARC64_PCI_H
-
-#ifdef __KERNEL__
-
-#include <linux/dma-mapping.h>
-
-/* Can be used to override the logic in pci_scan_bus for skipping
- * already-configured bus numbers - to be used for buggy BIOSes
- * or architectures with incomplete PCI setup by the loader.
- */
-#define pcibios_assign_all_busses() 0
-
-#define PCIBIOS_MIN_IO 0UL
-#define PCIBIOS_MIN_MEM 0UL
-
-#define PCI_IRQ_NONE 0xffffffff
-
-/* PCI IOMMU mapping bypass support. */
-
-/* PCI 64-bit addressing works for all slots on all controller
- * types on sparc64. However, it requires that the device
- * can drive enough of the 64 bits.
- */
-#define PCI64_REQUIRED_MASK (~(u64)0)
-#define PCI64_ADDR_BASE 0xfffc000000000000UL
-
-/* Return the index of the PCI controller for device PDEV. */
-
-int pci_domain_nr(struct pci_bus *bus);
-static inline int pci_proc_domain(struct pci_bus *bus)
-{
- return 1;
-}
-
-/* Platform support for /proc/bus/pci/X/Y mmap()s. */
-
-#define HAVE_PCI_MMAP
-#define arch_can_pci_mmap_io() 1
-#define HAVE_ARCH_PCI_GET_UNMAPPED_AREA
-#define get_pci_unmapped_area get_fb_unmapped_area
-
-static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
-{
- return PCI_IRQ_NONE;
-}
-
-#define HAVE_ARCH_PCI_RESOURCE_TO_USER
-#endif /* __KERNEL__ */
-
-#endif /* __SPARC64_PCI_H */
diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c
index 05eb016fc41b..b1a09080e8da 100644
--- a/arch/sparc/kernel/iommu.c
+++ b/arch/sparc/kernel/iommu.c
@@ -314,7 +314,7 @@ bad:
bad_no_ctx:
if (printk_ratelimit())
WARN_ON(1);
- return SPARC_MAPPING_ERROR;
+ return DMA_MAPPING_ERROR;
}
static void strbuf_flush(struct strbuf *strbuf, struct iommu *iommu,
@@ -547,7 +547,7 @@ static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist,
if (outcount < incount) {
outs = sg_next(outs);
- outs->dma_address = SPARC_MAPPING_ERROR;
+ outs->dma_address = DMA_MAPPING_ERROR;
outs->dma_length = 0;
}
@@ -573,7 +573,7 @@ iommu_map_failed:
iommu_tbl_range_free(&iommu->tbl, vaddr, npages,
IOMMU_ERROR_CODE);
- s->dma_address = SPARC_MAPPING_ERROR;
+ s->dma_address = DMA_MAPPING_ERROR;
s->dma_length = 0;
}
if (s == outs)
@@ -741,11 +741,6 @@ static void dma_4u_sync_sg_for_cpu(struct device *dev,
spin_unlock_irqrestore(&iommu->lock, flags);
}
-static int dma_4u_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
- return dma_addr == SPARC_MAPPING_ERROR;
-}
-
static int dma_4u_supported(struct device *dev, u64 device_mask)
{
struct iommu *iommu = dev->archdata.iommu;
@@ -771,7 +766,6 @@ static const struct dma_map_ops sun4u_dma_ops = {
.sync_single_for_cpu = dma_4u_sync_single_for_cpu,
.sync_sg_for_cpu = dma_4u_sync_sg_for_cpu,
.dma_supported = dma_4u_supported,
- .mapping_error = dma_4u_mapping_error,
};
const struct dma_map_ops *dma_ops = &sun4u_dma_ops;
diff --git a/arch/sparc/kernel/iommu_common.h b/arch/sparc/kernel/iommu_common.h
index e3c02ba32500..d62ed9c5682d 100644
--- a/arch/sparc/kernel/iommu_common.h
+++ b/arch/sparc/kernel/iommu_common.h
@@ -48,6 +48,4 @@ static inline int is_span_boundary(unsigned long entry,
return iommu_is_span_boundary(entry, nr, shift, boundary_size);
}
-#define SPARC_MAPPING_ERROR (~(dma_addr_t)0x0)
-
#endif /* _IOMMU_COMMON_H */
diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c
index aeaad04fdd14..f89603855f1e 100644
--- a/arch/sparc/kernel/ioport.c
+++ b/arch/sparc/kernel/ioport.c
@@ -52,8 +52,6 @@
#include <asm/io-unit.h>
#include <asm/leon.h>
-const struct sparc32_dma_ops *sparc32_dma_ops;
-
/* This function must make sure that caches and memory are coherent after DMA
* On LEON systems without cache snooping it flushes the entire D-CACHE.
*/
@@ -247,178 +245,60 @@ static void _sparc_free_io(struct resource *res)
release_resource(res);
}
-#ifdef CONFIG_SBUS
-
-void sbus_set_sbus64(struct device *dev, int x)
-{
- printk("sbus_set_sbus64: unsupported\n");
-}
-EXPORT_SYMBOL(sbus_set_sbus64);
-
-/*
- * Allocate a chunk of memory suitable for DMA.
- * Typically devices use them for control blocks.
- * CPU may access them without any explicit flushing.
- */
-static void *sbus_alloc_coherent(struct device *dev, size_t len,
- dma_addr_t *dma_addrp, gfp_t gfp,
- unsigned long attrs)
+unsigned long sparc_dma_alloc_resource(struct device *dev, size_t len)
{
- struct platform_device *op = to_platform_device(dev);
- unsigned long len_total = PAGE_ALIGN(len);
- unsigned long va;
struct resource *res;
- int order;
-
- /* XXX why are some lengths signed, others unsigned? */
- if (len <= 0) {
- return NULL;
- }
- /* XXX So what is maxphys for us and how do drivers know it? */
- if (len > 256*1024) { /* __get_free_pages() limit */
- return NULL;
- }
-
- order = get_order(len_total);
- va = __get_free_pages(gfp, order);
- if (va == 0)
- goto err_nopages;
- if ((res = kzalloc(sizeof(struct resource), GFP_KERNEL)) == NULL)
- goto err_nomem;
+ res = kzalloc(sizeof(*res), GFP_KERNEL);
+ if (!res)
+ return 0;
+ res->name = dev->of_node->full_name;
- if (allocate_resource(&_sparc_dvma, res, len_total,
- _sparc_dvma.start, _sparc_dvma.end, PAGE_SIZE, NULL, NULL) != 0) {
- printk("sbus_alloc_consistent: cannot occupy 0x%lx", len_total);
- goto err_nova;
+ if (allocate_resource(&_sparc_dvma, res, len, _sparc_dvma.start,
+ _sparc_dvma.end, PAGE_SIZE, NULL, NULL) != 0) {
+ printk("%s: cannot occupy 0x%zx", __func__, len);
+ kfree(res);
+ return 0;
}
- // XXX The sbus_map_dma_area does this for us below, see comments.
- // srmmu_mapiorange(0, virt_to_phys(va), res->start, len_total);
- /*
- * XXX That's where sdev would be used. Currently we load
- * all iommu tables with the same translations.
- */
- if (sbus_map_dma_area(dev, dma_addrp, va, res->start, len_total) != 0)
- goto err_noiommu;
-
- res->name = op->dev.of_node->full_name;
-
- return (void *)(unsigned long)res->start;
-
-err_noiommu:
- release_resource(res);
-err_nova:
- kfree(res);
-err_nomem:
- free_pages(va, order);
-err_nopages:
- return NULL;
+ return res->start;
}
-static void sbus_free_coherent(struct device *dev, size_t n, void *p,
- dma_addr_t ba, unsigned long attrs)
+bool sparc_dma_free_resource(void *cpu_addr, size_t size)
{
+ unsigned long addr = (unsigned long)cpu_addr;
struct resource *res;
- struct page *pgv;
- if ((res = lookup_resource(&_sparc_dvma,
- (unsigned long)p)) == NULL) {
- printk("sbus_free_consistent: cannot free %p\n", p);
- return;
+ res = lookup_resource(&_sparc_dvma, addr);
+ if (!res) {
+ printk("%s: cannot free %p\n", __func__, cpu_addr);
+ return false;
}
- if (((unsigned long)p & (PAGE_SIZE-1)) != 0) {
- printk("sbus_free_consistent: unaligned va %p\n", p);
- return;
+ if ((addr & (PAGE_SIZE - 1)) != 0) {
+ printk("%s: unaligned va %p\n", __func__, cpu_addr);
+ return false;
}
- n = PAGE_ALIGN(n);
- if (resource_size(res) != n) {
- printk("sbus_free_consistent: region 0x%lx asked 0x%zx\n",
- (long)resource_size(res), n);
- return;
+ size = PAGE_ALIGN(size);
+ if (resource_size(res) != size) {
+ printk("%s: region 0x%lx asked 0x%zx\n",
+ __func__, (long)resource_size(res), size);
+ return false;
}
release_resource(res);
kfree(res);
-
- pgv = virt_to_page(p);
- sbus_unmap_dma_area(dev, ba, n);
-
- __free_pages(pgv, get_order(n));
-}
-
-/*
- * Map a chunk of memory so that devices can see it.
- * CPU view of this memory may be inconsistent with
- * a device view and explicit flushing is necessary.
- */
-static dma_addr_t sbus_map_page(struct device *dev, struct page *page,
- unsigned long offset, size_t len,
- enum dma_data_direction dir,
- unsigned long attrs)
-{
- void *va = page_address(page) + offset;
-
- /* XXX why are some lengths signed, others unsigned? */
- if (len <= 0) {
- return 0;
- }
- /* XXX So what is maxphys for us and how do drivers know it? */
- if (len > 256*1024) { /* __get_free_pages() limit */
- return 0;
- }
- return mmu_get_scsi_one(dev, va, len);
-}
-
-static void sbus_unmap_page(struct device *dev, dma_addr_t ba, size_t n,
- enum dma_data_direction dir, unsigned long attrs)
-{
- mmu_release_scsi_one(dev, ba, n);
-}
-
-static int sbus_map_sg(struct device *dev, struct scatterlist *sg, int n,
- enum dma_data_direction dir, unsigned long attrs)
-{
- mmu_get_scsi_sgl(dev, sg, n);
- return n;
-}
-
-static void sbus_unmap_sg(struct device *dev, struct scatterlist *sg, int n,
- enum dma_data_direction dir, unsigned long attrs)
-{
- mmu_release_scsi_sgl(dev, sg, n);
-}
-
-static void sbus_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
- int n, enum dma_data_direction dir)
-{
- BUG();
+ return true;
}
-static void sbus_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
- int n, enum dma_data_direction dir)
-{
- BUG();
-}
+#ifdef CONFIG_SBUS
-static int sbus_dma_supported(struct device *dev, u64 mask)
+void sbus_set_sbus64(struct device *dev, int x)
{
- return 0;
+ printk("sbus_set_sbus64: unsupported\n");
}
-
-static const struct dma_map_ops sbus_dma_ops = {
- .alloc = sbus_alloc_coherent,
- .free = sbus_free_coherent,
- .map_page = sbus_map_page,
- .unmap_page = sbus_unmap_page,
- .map_sg = sbus_map_sg,
- .unmap_sg = sbus_unmap_sg,
- .sync_sg_for_cpu = sbus_sync_sg_for_cpu,
- .sync_sg_for_device = sbus_sync_sg_for_device,
- .dma_supported = sbus_dma_supported,
-};
+EXPORT_SYMBOL(sbus_set_sbus64);
static int __init sparc_register_ioport(void)
{
@@ -438,45 +318,30 @@ arch_initcall(sparc_register_ioport);
void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
gfp_t gfp, unsigned long attrs)
{
- unsigned long len_total = PAGE_ALIGN(size);
+ unsigned long addr;
void *va;
- struct resource *res;
- int order;
- if (size == 0) {
+ if (!size || size > 256 * 1024) /* __get_free_pages() limit */
return NULL;
- }
- if (size > 256*1024) { /* __get_free_pages() limit */
- return NULL;
- }
- order = get_order(len_total);
- va = (void *) __get_free_pages(gfp, order);
- if (va == NULL) {
- printk("%s: no %ld pages\n", __func__, len_total>>PAGE_SHIFT);
- goto err_nopages;
+ size = PAGE_ALIGN(size);
+ va = (void *) __get_free_pages(gfp | __GFP_ZERO, get_order(size));
+ if (!va) {
+ printk("%s: no %zd pages\n", __func__, size >> PAGE_SHIFT);
+ return NULL;
}
- if ((res = kzalloc(sizeof(struct resource), GFP_KERNEL)) == NULL) {
- printk("%s: no core\n", __func__);
+ addr = sparc_dma_alloc_resource(dev, size);
+ if (!addr)
goto err_nomem;
- }
- if (allocate_resource(&_sparc_dvma, res, len_total,
- _sparc_dvma.start, _sparc_dvma.end, PAGE_SIZE, NULL, NULL) != 0) {
- printk("%s: cannot occupy 0x%lx", __func__, len_total);
- goto err_nova;
- }
- srmmu_mapiorange(0, virt_to_phys(va), res->start, len_total);
+ srmmu_mapiorange(0, virt_to_phys(va), addr, size);
*dma_handle = virt_to_phys(va);
- return (void *) res->start;
+ return (void *)addr;
-err_nova:
- kfree(res);
err_nomem:
- free_pages((unsigned long)va, order);
-err_nopages:
+ free_pages((unsigned long)va, get_order(size));
return NULL;
}
@@ -491,31 +356,11 @@ err_nopages:
void arch_dma_free(struct device *dev, size_t size, void *cpu_addr,
dma_addr_t dma_addr, unsigned long attrs)
{
- struct resource *res;
-
- if ((res = lookup_resource(&_sparc_dvma,
- (unsigned long)cpu_addr)) == NULL) {
- printk("%s: cannot free %p\n", __func__, cpu_addr);
+ if (!sparc_dma_free_resource(cpu_addr, PAGE_ALIGN(size)))
return;
- }
-
- if (((unsigned long)cpu_addr & (PAGE_SIZE-1)) != 0) {
- printk("%s: unaligned va %p\n", __func__, cpu_addr);
- return;
- }
-
- size = PAGE_ALIGN(size);
- if (resource_size(res) != size) {
- printk("%s: region 0x%lx asked 0x%zx\n", __func__,
- (long)resource_size(res), size);
- return;
- }
dma_make_coherent(dma_addr, size);
srmmu_unmapiorange((unsigned long)cpu_addr, size);
-
- release_resource(res);
- kfree(res);
free_pages((unsigned long)phys_to_virt(dma_addr), get_order(size));
}
@@ -528,7 +373,7 @@ void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
dma_make_coherent(paddr, PAGE_ALIGN(size));
}
-const struct dma_map_ops *dma_ops = &sbus_dma_ops;
+const struct dma_map_ops *dma_ops;
EXPORT_SYMBOL(dma_ops);
#ifdef CONFIG_PROC_FS
diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c
index 565d9ac883d0..fa0e42b4cbfb 100644
--- a/arch/sparc/kernel/pci_sun4v.c
+++ b/arch/sparc/kernel/pci_sun4v.c
@@ -414,12 +414,12 @@ static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page,
bad:
if (printk_ratelimit())
WARN_ON(1);
- return SPARC_MAPPING_ERROR;
+ return DMA_MAPPING_ERROR;
iommu_map_fail:
local_irq_restore(flags);
iommu_tbl_range_free(tbl, bus_addr, npages, IOMMU_ERROR_CODE);
- return SPARC_MAPPING_ERROR;
+ return DMA_MAPPING_ERROR;
}
static void dma_4v_unmap_page(struct device *dev, dma_addr_t bus_addr,
@@ -592,7 +592,7 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
if (outcount < incount) {
outs = sg_next(outs);
- outs->dma_address = SPARC_MAPPING_ERROR;
+ outs->dma_address = DMA_MAPPING_ERROR;
outs->dma_length = 0;
}
@@ -609,7 +609,7 @@ iommu_map_failed:
iommu_tbl_range_free(tbl, vaddr, npages,
IOMMU_ERROR_CODE);
/* XXX demap? XXX */
- s->dma_address = SPARC_MAPPING_ERROR;
+ s->dma_address = DMA_MAPPING_ERROR;
s->dma_length = 0;
}
if (s == outs)
@@ -688,11 +688,6 @@ static int dma_4v_supported(struct device *dev, u64 device_mask)
return pci64_dma_supported(to_pci_dev(dev), device_mask);
}
-static int dma_4v_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
- return dma_addr == SPARC_MAPPING_ERROR;
-}
-
static const struct dma_map_ops sun4v_dma_ops = {
.alloc = dma_4v_alloc_coherent,
.free = dma_4v_free_coherent,
@@ -701,7 +696,6 @@ static const struct dma_map_ops sun4v_dma_ops = {
.map_sg = dma_4v_map_sg,
.unmap_sg = dma_4v_unmap_sg,
.dma_supported = dma_4v_supported,
- .mapping_error = dma_4v_mapping_error,
};
static void pci_sun4v_scan_bus(struct pci_pbm_info *pbm, struct device *parent)
diff --git a/arch/sparc/mm/io-unit.c b/arch/sparc/mm/io-unit.c
index c8cb27d3ea75..f770ee7229d8 100644
--- a/arch/sparc/mm/io-unit.c
+++ b/arch/sparc/mm/io-unit.c
@@ -12,7 +12,7 @@
#include <linux/mm.h>
#include <linux/highmem.h> /* pte_offset_map => kmap_atomic */
#include <linux/bitops.h>
-#include <linux/scatterlist.h>
+#include <linux/dma-mapping.h>
#include <linux/of.h>
#include <linux/of_device.h>
@@ -140,34 +140,44 @@ nexti: scan = find_next_zero_bit(iounit->bmap, limit, scan);
return vaddr;
}
-static __u32 iounit_get_scsi_one(struct device *dev, char *vaddr, unsigned long len)
+static dma_addr_t iounit_map_page(struct device *dev, struct page *page,
+ unsigned long offset, size_t len, enum dma_data_direction dir,
+ unsigned long attrs)
{
+ void *vaddr = page_address(page) + offset;
struct iounit_struct *iounit = dev->archdata.iommu;
unsigned long ret, flags;
+ /* XXX So what is maxphys for us and how do drivers know it? */
+ if (!len || len > 256 * 1024)
+ return DMA_MAPPING_ERROR;
+
spin_lock_irqsave(&iounit->lock, flags);
ret = iounit_get_area(iounit, (unsigned long)vaddr, len);
spin_unlock_irqrestore(&iounit->lock, flags);
return ret;
}
-static void iounit_get_scsi_sgl(struct device *dev, struct scatterlist *sg, int sz)
+static int iounit_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
+ enum dma_data_direction dir, unsigned long attrs)
{
struct iounit_struct *iounit = dev->archdata.iommu;
+ struct scatterlist *sg;
unsigned long flags;
+ int i;
/* FIXME: Cache some resolved pages - often several sg entries are to the same page */
spin_lock_irqsave(&iounit->lock, flags);
- while (sz != 0) {
- --sz;
+ for_each_sg(sgl, sg, nents, i) {
sg->dma_address = iounit_get_area(iounit, (unsigned long) sg_virt(sg), sg->length);
sg->dma_length = sg->length;
- sg = sg_next(sg);
}
spin_unlock_irqrestore(&iounit->lock, flags);
+ return nents;
}
-static void iounit_release_scsi_one(struct device *dev, __u32 vaddr, unsigned long len)
+static void iounit_unmap_page(struct device *dev, dma_addr_t vaddr, size_t len,
+ enum dma_data_direction dir, unsigned long attrs)
{
struct iounit_struct *iounit = dev->archdata.iommu;
unsigned long flags;
@@ -181,34 +191,47 @@ static void iounit_release_scsi_one(struct device *dev, __u32 vaddr, unsigned lo
spin_unlock_irqrestore(&iounit->lock, flags);
}
-static void iounit_release_scsi_sgl(struct device *dev, struct scatterlist *sg, int sz)
+static void iounit_unmap_sg(struct device *dev, struct scatterlist *sgl,
+ int nents, enum dma_data_direction dir, unsigned long attrs)
{
struct iounit_struct *iounit = dev->archdata.iommu;
- unsigned long flags;
- unsigned long vaddr, len;
+ unsigned long flags, vaddr, len;
+ struct scatterlist *sg;
+ int i;
spin_lock_irqsave(&iounit->lock, flags);
- while (sz != 0) {
- --sz;
+ for_each_sg(sgl, sg, nents, i) {
len = ((sg->dma_address & ~PAGE_MASK) + sg->length + (PAGE_SIZE-1)) >> PAGE_SHIFT;
vaddr = (sg->dma_address - IOUNIT_DMA_BASE) >> PAGE_SHIFT;
IOD(("iounit_release %08lx-%08lx\n", (long)vaddr, (long)len+vaddr));
for (len += vaddr; vaddr < len; vaddr++)
clear_bit(vaddr, iounit->bmap);
- sg = sg_next(sg);
}
spin_unlock_irqrestore(&iounit->lock, flags);
}
#ifdef CONFIG_SBUS
-static int iounit_map_dma_area(struct device *dev, dma_addr_t *pba, unsigned long va, unsigned long addr, int len)
+static void *iounit_alloc(struct device *dev, size_t len,
+ dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
{
struct iounit_struct *iounit = dev->archdata.iommu;
- unsigned long page, end;
+ unsigned long va, addr, page, end, ret;
pgprot_t dvma_prot;
iopte_t __iomem *iopte;
- *pba = addr;
+ /* XXX So what is maxphys for us and how do drivers know it? */
+ if (!len || len > 256 * 1024)
+ return NULL;
+
+ len = PAGE_ALIGN(len);
+ va = __get_free_pages(gfp | __GFP_ZERO, get_order(len));
+ if (!va)
+ return NULL;
+
+ addr = ret = sparc_dma_alloc_resource(dev, len);
+ if (!addr)
+ goto out_free_pages;
+ *dma_handle = addr;
dvma_prot = __pgprot(SRMMU_CACHE | SRMMU_ET_PTE | SRMMU_PRIV);
end = PAGE_ALIGN((addr + len));
@@ -237,27 +260,32 @@ static int iounit_map_dma_area(struct device *dev, dma_addr_t *pba, unsigned lon
flush_cache_all();
flush_tlb_all();
- return 0;
+ return (void *)ret;
+
+out_free_pages:
+ free_pages(va, get_order(len));
+ return NULL;
}
-static void iounit_unmap_dma_area(struct device *dev, unsigned long addr, int len)
+static void iounit_free(struct device *dev, size_t size, void *cpu_addr,
+ dma_addr_t dma_addr, unsigned long attrs)
{
/* XXX Somebody please fill this in */
}
#endif
-static const struct sparc32_dma_ops iounit_dma_ops = {
- .get_scsi_one = iounit_get_scsi_one,
- .get_scsi_sgl = iounit_get_scsi_sgl,
- .release_scsi_one = iounit_release_scsi_one,
- .release_scsi_sgl = iounit_release_scsi_sgl,
+static const struct dma_map_ops iounit_dma_ops = {
#ifdef CONFIG_SBUS
- .map_dma_area = iounit_map_dma_area,
- .unmap_dma_area = iounit_unmap_dma_area,
+ .alloc = iounit_alloc,
+ .free = iounit_free,
#endif
+ .map_page = iounit_map_page,
+ .unmap_page = iounit_unmap_page,
+ .map_sg = iounit_map_sg,
+ .unmap_sg = iounit_unmap_sg,
};
void __init ld_mmu_iounit(void)
{
- sparc32_dma_ops = &iounit_dma_ops;
+ dma_ops = &iounit_dma_ops;
}
diff --git a/arch/sparc/mm/iommu.c b/arch/sparc/mm/iommu.c
index 2c5f8a648f8c..e8d5d73ca40d 100644
--- a/arch/sparc/mm/iommu.c
+++ b/arch/sparc/mm/iommu.c
@@ -13,7 +13,7 @@
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/highmem.h> /* pte_offset_map => kmap_atomic */
-#include <linux/scatterlist.h>
+#include <linux/dma-mapping.h>
#include <linux/of.h>
#include <linux/of_device.h>
@@ -205,59 +205,67 @@ static u32 iommu_get_one(struct device *dev, struct page *page, int npages)
return busa0;
}
-static u32 iommu_get_scsi_one(struct device *dev, char *vaddr, unsigned int len)
+static dma_addr_t __sbus_iommu_map_page(struct device *dev, struct page *page,
+ unsigned long offset, size_t len)
{
- unsigned long off;
- int npages;
- struct page *page;
- u32 busa;
-
- off = (unsigned long)vaddr & ~PAGE_MASK;
- npages = (off + len + PAGE_SIZE-1) >> PAGE_SHIFT;
- page = virt_to_page((unsigned long)vaddr & PAGE_MASK);
- busa = iommu_get_one(dev, page, npages);
- return busa + off;
+ void *vaddr = page_address(page) + offset;
+ unsigned long off = (unsigned long)vaddr & ~PAGE_MASK;
+ unsigned long npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+
+ /* XXX So what is maxphys for us and how do drivers know it? */
+ if (!len || len > 256 * 1024)
+ return DMA_MAPPING_ERROR;
+ return iommu_get_one(dev, virt_to_page(vaddr), npages) + off;
}
-static __u32 iommu_get_scsi_one_gflush(struct device *dev, char *vaddr, unsigned long len)
+static dma_addr_t sbus_iommu_map_page_gflush(struct device *dev,
+ struct page *page, unsigned long offset, size_t len,
+ enum dma_data_direction dir, unsigned long attrs)
{
flush_page_for_dma(0);
- return iommu_get_scsi_one(dev, vaddr, len);
+ return __sbus_iommu_map_page(dev, page, offset, len);
}
-static __u32 iommu_get_scsi_one_pflush(struct device *dev, char *vaddr, unsigned long len)
+static dma_addr_t sbus_iommu_map_page_pflush(struct device *dev,
+ struct page *page, unsigned long offset, size_t len,
+ enum dma_data_direction dir, unsigned long attrs)
{
- unsigned long page = ((unsigned long) vaddr) & PAGE_MASK;
+ void *vaddr = page_address(page) + offset;
+ unsigned long p = ((unsigned long)vaddr) & PAGE_MASK;
- while(page < ((unsigned long)(vaddr + len))) {
- flush_page_for_dma(page);
- page += PAGE_SIZE;
+ while (p < (unsigned long)vaddr + len) {
+ flush_page_for_dma(p);
+ p += PAGE_SIZE;
}
- return iommu_get_scsi_one(dev, vaddr, len);
+
+ return __sbus_iommu_map_page(dev, page, offset, len);
}
-static void iommu_get_scsi_sgl_gflush(struct device *dev, struct scatterlist *sg, int sz)
+static int sbus_iommu_map_sg_gflush(struct device *dev, struct scatterlist *sgl,
+ int nents, enum dma_data_direction dir, unsigned long attrs)
{
- int n;
+ struct scatterlist *sg;
+ int i, n;
flush_page_for_dma(0);
- while (sz != 0) {
- --sz;
+
+ for_each_sg(sgl, sg, nents, i) {
n = (sg->length + sg->offset + PAGE_SIZE-1) >> PAGE_SHIFT;
sg->dma_address = iommu_get_one(dev, sg_page(sg), n) + sg->offset;
sg->dma_length = sg->length;
- sg = sg_next(sg);
}
+
+ return nents;
}
-static void iommu_get_scsi_sgl_pflush(struct device *dev, struct scatterlist *sg, int sz)
+static int sbus_iommu_map_sg_pflush(struct device *dev, struct scatterlist *sgl,
+ int nents, enum dma_data_direction dir, unsigned long attrs)
{
unsigned long page, oldpage = 0;
- int n, i;
-
- while(sz != 0) {
- --sz;
+ struct scatterlist *sg;
+ int i, j, n;
+ for_each_sg(sgl, sg, nents, j) {
n = (sg->length + sg->offset + PAGE_SIZE-1) >> PAGE_SHIFT;
/*
@@ -277,8 +285,9 @@ static void iommu_get_scsi_sgl_pflush(struct device *dev, struct scatterlist *sg
sg->dma_address = iommu_get_one(dev, sg_page(sg), n) + sg->offset;
sg->dma_length = sg->length;
- sg = sg_next(sg);
}
+
+ return nents;
}
static void iommu_release_one(struct device *dev, u32 busa, int npages)
@@ -297,40 +306,52 @@ static void iommu_release_one(struct device *dev, u32 busa, int npages)
bit_map_clear(&iommu->usemap, ioptex, npages);
}
-static void iommu_release_scsi_one(struct device *dev, __u32 vaddr, unsigned long len)
+static void sbus_iommu_unmap_page(struct device *dev, dma_addr_t dma_addr,
+ size_t len, enum dma_data_direction dir, unsigned long attrs)
{
- unsigned long off;
+ unsigned long off = dma_addr & ~PAGE_MASK;
int npages;
- off = vaddr & ~PAGE_MASK;
npages = (off + len + PAGE_SIZE-1) >> PAGE_SHIFT;
- iommu_release_one(dev, vaddr & PAGE_MASK, npages);
+ iommu_release_one(dev, dma_addr & PAGE_MASK, npages);
}
-static void iommu_release_scsi_sgl(struct device *dev, struct scatterlist *sg, int sz)
+static void sbus_iommu_unmap_sg(struct device *dev, struct scatterlist *sgl,
+ int nents, enum dma_data_direction dir, unsigned long attrs)
{
- int n;
-
- while(sz != 0) {
- --sz;
+ struct scatterlist *sg;
+ int i, n;
+ for_each_sg(sgl, sg, nents, i) {
n = (sg->length + sg->offset + PAGE_SIZE-1) >> PAGE_SHIFT;
iommu_release_one(dev, sg->dma_address & PAGE_MASK, n);
sg->dma_address = 0x21212121;
- sg = sg_next(sg);
}
}
#ifdef CONFIG_SBUS
-static int iommu_map_dma_area(struct device *dev, dma_addr_t *pba, unsigned long va,
- unsigned long addr, int len)
+static void *sbus_iommu_alloc(struct device *dev, size_t len,
+ dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
{
struct iommu_struct *iommu = dev->archdata.iommu;
- unsigned long page, end;
+ unsigned long va, addr, page, end, ret;
iopte_t *iopte = iommu->page_table;
iopte_t *first;
int ioptex;
+ /* XXX So what is maxphys for us and how do drivers know it? */
+ if (!len || len > 256 * 1024)
+ return NULL;
+
+ len = PAGE_ALIGN(len);
+ va = __get_free_pages(gfp | __GFP_ZERO, get_order(len));
+ if (va == 0)
+ return NULL;
+
+ addr = ret = sparc_dma_alloc_resource(dev, len);
+ if (!addr)
+ goto out_free_pages;
+
BUG_ON((va & ~PAGE_MASK) != 0);
BUG_ON((addr & ~PAGE_MASK) != 0);
BUG_ON((len & ~PAGE_MASK) != 0);
@@ -385,16 +406,25 @@ static int iommu_map_dma_area(struct device *dev, dma_addr_t *pba, unsigned long
flush_tlb_all();
iommu_invalidate(iommu->regs);
- *pba = iommu->start + (ioptex << PAGE_SHIFT);
- return 0;
+ *dma_handle = iommu->start + (ioptex << PAGE_SHIFT);
+ return (void *)ret;
+
+out_free_pages:
+ free_pages(va, get_order(len));
+ return NULL;
}
-static void iommu_unmap_dma_area(struct device *dev, unsigned long busa, int len)
+static void sbus_iommu_free(struct device *dev, size_t len, void *cpu_addr,
+ dma_addr_t busa, unsigned long attrs)
{
struct iommu_struct *iommu = dev->archdata.iommu;
iopte_t *iopte = iommu->page_table;
- unsigned long end;
+ struct page *page = virt_to_page(cpu_addr);
int ioptex = (busa - iommu->start) >> PAGE_SHIFT;
+ unsigned long end;
+
+ if (!sparc_dma_free_resource(cpu_addr, len))
+ return;
BUG_ON((busa & ~PAGE_MASK) != 0);
BUG_ON((len & ~PAGE_MASK) != 0);
@@ -408,38 +438,40 @@ static void iommu_unmap_dma_area(struct device *dev, unsigned long busa, int len
flush_tlb_all();
iommu_invalidate(iommu->regs);
bit_map_clear(&iommu->usemap, ioptex, len >> PAGE_SHIFT);
+
+ __free_pages(page, get_order(len));
}
#endif
-static const struct sparc32_dma_ops iommu_dma_gflush_ops = {
- .get_scsi_one = iommu_get_scsi_one_gflush,
- .get_scsi_sgl = iommu_get_scsi_sgl_gflush,
- .release_scsi_one = iommu_release_scsi_one,
- .release_scsi_sgl = iommu_release_scsi_sgl,
+static const struct dma_map_ops sbus_iommu_dma_gflush_ops = {
#ifdef CONFIG_SBUS
- .map_dma_area = iommu_map_dma_area,
- .unmap_dma_area = iommu_unmap_dma_area,
+ .alloc = sbus_iommu_alloc,
+ .free = sbus_iommu_free,
#endif
+ .map_page = sbus_iommu_map_page_gflush,
+ .unmap_page = sbus_iommu_unmap_page,
+ .map_sg = sbus_iommu_map_sg_gflush,
+ .unmap_sg = sbus_iommu_unmap_sg,
};
-static const struct sparc32_dma_ops iommu_dma_pflush_ops = {
- .get_scsi_one = iommu_get_scsi_one_pflush,
- .get_scsi_sgl = iommu_get_scsi_sgl_pflush,
- .release_scsi_one = iommu_release_scsi_one,
- .release_scsi_sgl = iommu_release_scsi_sgl,
+static const struct dma_map_ops sbus_iommu_dma_pflush_ops = {
#ifdef CONFIG_SBUS
- .map_dma_area = iommu_map_dma_area,
- .unmap_dma_area = iommu_unmap_dma_area,
+ .alloc = sbus_iommu_alloc,
+ .free = sbus_iommu_free,
#endif
+ .map_page = sbus_iommu_map_page_pflush,
+ .unmap_page = sbus_iommu_unmap_page,
+ .map_sg = sbus_iommu_map_sg_pflush,
+ .unmap_sg = sbus_iommu_unmap_sg,
};
void __init ld_mmu_iommu(void)
{
if (flush_page_for_dma_global) {
/* flush_page_for_dma flushes everything, no matter of what page is it */
- sparc32_dma_ops = &iommu_dma_gflush_ops;
+ dma_ops = &sbus_iommu_dma_gflush_ops;
} else {
- sparc32_dma_ops = &iommu_dma_pflush_ops;
+ dma_ops = &sbus_iommu_dma_pflush_ops;
}
if (viking_mxcc_present || srmmu_modtype == HyperSparc) {
diff --git a/arch/unicore32/Kconfig b/arch/unicore32/Kconfig
index a4c05159dca5..2681027d7bff 100644
--- a/arch/unicore32/Kconfig
+++ b/arch/unicore32/Kconfig
@@ -4,7 +4,6 @@ config UNICORE32
select ARCH_HAS_DEVMEM_IS_ALLOWED
select ARCH_MIGHT_HAVE_PC_PARPORT
select ARCH_MIGHT_HAVE_PC_SERIO
- select DMA_DIRECT_OPS
select HAVE_GENERIC_DMA_COHERENT
select HAVE_KERNEL_GZIP
select HAVE_KERNEL_BZIP2
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index c7094f813183..57552f2b37eb 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -66,7 +66,6 @@ config X86
select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64
select ARCH_HAS_UACCESS_MCSAFE if X86_64 && X86_MCE
select ARCH_HAS_SET_MEMORY
- select ARCH_HAS_SG_CHAIN
select ARCH_HAS_STRICT_KERNEL_RWX
select ARCH_HAS_STRICT_MODULE_RWX
select ARCH_HAS_SYNC_CORE_BEFORE_USERMODE
@@ -90,7 +89,6 @@ config X86
select CLOCKSOURCE_VALIDATE_LAST_CYCLE
select CLOCKSOURCE_WATCHDOG
select DCACHE_WORD_ACCESS
- select DMA_DIRECT_OPS
select EDAC_ATOMIC_SCRUB
select EDAC_SUPPORT
select GENERIC_CLOCKEVENTS
diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index 3f9d1b4019bb..e0ff3ac8c127 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -50,8 +50,6 @@ static unsigned long iommu_pages; /* .. and in pages */
static u32 *iommu_gatt_base; /* Remapping table */
-static dma_addr_t bad_dma_addr;
-
/*
* If this is disabled the IOMMU will use an optimized flushing strategy
* of only flushing when an mapping is reused. With it true the GART is
@@ -74,8 +72,6 @@ static u32 gart_unmapped_entry;
(((x) & 0xfffff000) | (((x) >> 32) << 4) | GPTE_VALID | GPTE_COHERENT)
#define GPTE_DECODE(x) (((x) & 0xfffff000) | (((u64)(x) & 0xff0) << 28))
-#define EMERGENCY_PAGES 32 /* = 128KB */
-
#ifdef CONFIG_AGP
#define AGPEXTERN extern
#else
@@ -155,9 +151,6 @@ static void flush_gart(void)
#ifdef CONFIG_IOMMU_LEAK
/* Debugging aid for drivers that don't free their IOMMU tables */
-static int leak_trace;
-static int iommu_leak_pages = 20;
-
static void dump_leak(void)
{
static int dump;
@@ -184,14 +177,6 @@ static void iommu_full(struct device *dev, size_t size, int dir)
*/
dev_err(dev, "PCI-DMA: Out of IOMMU space for %lu bytes\n", size);
-
- if (size > PAGE_SIZE*EMERGENCY_PAGES) {
- if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL)
- panic("PCI-DMA: Memory would be corrupted\n");
- if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL)
- panic(KERN_ERR
- "PCI-DMA: Random memory would be DMAed\n");
- }
#ifdef CONFIG_IOMMU_LEAK
dump_leak();
#endif
@@ -220,7 +205,7 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
int i;
if (unlikely(phys_mem + size > GART_MAX_PHYS_ADDR))
- return bad_dma_addr;
+ return DMA_MAPPING_ERROR;
iommu_page = alloc_iommu(dev, npages, align_mask);
if (iommu_page == -1) {
@@ -229,7 +214,7 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
if (panic_on_overflow)
panic("dma_map_area overflow %lu bytes\n", size);
iommu_full(dev, size, dir);
- return bad_dma_addr;
+ return DMA_MAPPING_ERROR;
}
for (i = 0; i < npages; i++) {
@@ -271,7 +256,7 @@ static void gart_unmap_page(struct device *dev, dma_addr_t dma_addr,
int npages;
int i;
- if (dma_addr < iommu_bus_base + EMERGENCY_PAGES*PAGE_SIZE ||
+ if (dma_addr == DMA_MAPPING_ERROR ||
dma_addr >= iommu_bus_base + iommu_size)
return;
@@ -315,7 +300,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg,
if (nonforced_iommu(dev, addr, s->length)) {
addr = dma_map_area(dev, addr, s->length, dir, 0);
- if (addr == bad_dma_addr) {
+ if (addr == DMA_MAPPING_ERROR) {
if (i > 0)
gart_unmap_sg(dev, sg, i, dir, 0);
nents = 0;
@@ -471,7 +456,7 @@ error:
iommu_full(dev, pages << PAGE_SHIFT, dir);
for_each_sg(sg, s, nents, i)
- s->dma_address = bad_dma_addr;
+ s->dma_address = DMA_MAPPING_ERROR;
return 0;
}
@@ -490,7 +475,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr,
*dma_addr = dma_map_area(dev, virt_to_phys(vaddr), size,
DMA_BIDIRECTIONAL, (1UL << get_order(size)) - 1);
flush_gart();
- if (unlikely(*dma_addr == bad_dma_addr))
+ if (unlikely(*dma_addr == DMA_MAPPING_ERROR))
goto out_free;
return vaddr;
out_free:
@@ -507,11 +492,6 @@ gart_free_coherent(struct device *dev, size_t size, void *vaddr,
dma_direct_free_pages(dev, size, vaddr, dma_addr, attrs);
}
-static int gart_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
- return (dma_addr == bad_dma_addr);
-}
-
static int no_agp;
static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size)
@@ -695,7 +675,6 @@ static const struct dma_map_ops gart_dma_ops = {
.unmap_page = gart_unmap_page,
.alloc = gart_alloc_coherent,
.free = gart_free_coherent,
- .mapping_error = gart_mapping_error,
.dma_supported = dma_direct_supported,
};
@@ -730,7 +709,6 @@ int __init gart_iommu_init(void)
unsigned long aper_base, aper_size;
unsigned long start_pfn, end_pfn;
unsigned long scratch;
- long i;
if (!amd_nb_has_feature(AMD_NB_GART))
return 0;
@@ -774,29 +752,12 @@ int __init gart_iommu_init(void)
if (!iommu_gart_bitmap)
panic("Cannot allocate iommu bitmap\n");
-#ifdef CONFIG_IOMMU_LEAK
- if (leak_trace) {
- int ret;
-
- ret = dma_debug_resize_entries(iommu_pages);
- if (ret)
- pr_debug("PCI-DMA: Cannot trace all the entries\n");
- }
-#endif
-
- /*
- * Out of IOMMU space handling.
- * Reserve some invalid pages at the beginning of the GART.
- */
- bitmap_set(iommu_gart_bitmap, 0, EMERGENCY_PAGES);
-
pr_info("PCI-DMA: Reserving %luMB of IOMMU area in the AGP aperture\n",
iommu_size >> 20);
agp_memory_reserved = iommu_size;
iommu_start = aper_size - iommu_size;
iommu_bus_base = info.aper_base + iommu_start;
- bad_dma_addr = iommu_bus_base;
iommu_gatt_base = agp_gatt_table + (iommu_start>>PAGE_SHIFT);
/*
@@ -838,8 +799,6 @@ int __init gart_iommu_init(void)
if (!scratch)
panic("Cannot allocate iommu scratch page");
gart_unmapped_entry = GPTE_ENCODE(__pa(scratch));
- for (i = EMERGENCY_PAGES; i < iommu_pages; i++)
- iommu_gatt_base[i] = gart_unmapped_entry;
flush_gart();
dma_ops = &gart_dma_ops;
@@ -853,16 +812,6 @@ void __init gart_parse_options(char *p)
{
int arg;
-#ifdef CONFIG_IOMMU_LEAK
- if (!strncmp(p, "leak", 4)) {
- leak_trace = 1;
- p += 4;
- if (*p == '=')
- ++p;
- if (isdigit(*p) && get_option(&p, &arg))
- iommu_leak_pages = arg;
- }
-#endif
if (isdigit(*p) && get_option(&p, &arg))
iommu_size = arg;
if (!strncmp(p, "fullflush", 9))
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index bbfc8b1e9104..c70720f61a34 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -51,8 +51,6 @@
#include <asm/x86_init.h>
#include <asm/iommu_table.h>
-#define CALGARY_MAPPING_ERROR 0
-
#ifdef CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT
int use_calgary __read_mostly = 1;
#else
@@ -157,8 +155,6 @@ static const unsigned long phb_debug_offsets[] = {
#define PHB_DEBUG_STUFF_OFFSET 0x0020
-#define EMERGENCY_PAGES 32 /* = 128KB */
-
unsigned int specified_table_size = TCE_TABLE_SIZE_UNSPECIFIED;
static int translate_empty_slots __read_mostly = 0;
static int calgary_detected __read_mostly = 0;
@@ -255,7 +251,7 @@ static unsigned long iommu_range_alloc(struct device *dev,
if (panic_on_overflow)
panic("Calgary: fix the allocator.\n");
else
- return CALGARY_MAPPING_ERROR;
+ return DMA_MAPPING_ERROR;
}
}
@@ -274,11 +270,10 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
dma_addr_t ret;
entry = iommu_range_alloc(dev, tbl, npages);
-
- if (unlikely(entry == CALGARY_MAPPING_ERROR)) {
+ if (unlikely(entry == DMA_MAPPING_ERROR)) {
pr_warn("failed to allocate %u pages in iommu %p\n",
npages, tbl);
- return CALGARY_MAPPING_ERROR;
+ return DMA_MAPPING_ERROR;
}
/* set the return dma address */
@@ -294,12 +289,10 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
unsigned int npages)
{
unsigned long entry;
- unsigned long badend;
unsigned long flags;
/* were we called with bad_dma_address? */
- badend = CALGARY_MAPPING_ERROR + (EMERGENCY_PAGES * PAGE_SIZE);
- if (unlikely(dma_addr < badend)) {
+ if (unlikely(dma_addr == DMA_MAPPING_ERROR)) {
WARN(1, KERN_ERR "Calgary: driver tried unmapping bad DMA "
"address 0x%Lx\n", dma_addr);
return;
@@ -383,7 +376,7 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg,
npages = iommu_num_pages(vaddr, s->length, PAGE_SIZE);
entry = iommu_range_alloc(dev, tbl, npages);
- if (entry == CALGARY_MAPPING_ERROR) {
+ if (entry == DMA_MAPPING_ERROR) {
/* makes sure unmap knows to stop */
s->dma_length = 0;
goto error;
@@ -401,7 +394,7 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg,
error:
calgary_unmap_sg(dev, sg, nelems, dir, 0);
for_each_sg(sg, s, nelems, i) {
- sg->dma_address = CALGARY_MAPPING_ERROR;
+ sg->dma_address = DMA_MAPPING_ERROR;
sg->dma_length = 0;
}
return 0;
@@ -454,7 +447,7 @@ static void* calgary_alloc_coherent(struct device *dev, size_t size,
/* set up tces to cover the allocated range */
mapping = iommu_alloc(dev, tbl, ret, npages, DMA_BIDIRECTIONAL);
- if (mapping == CALGARY_MAPPING_ERROR)
+ if (mapping == DMA_MAPPING_ERROR)
goto free;
*dma_handle = mapping;
return ret;
@@ -479,11 +472,6 @@ static void calgary_free_coherent(struct device *dev, size_t size,
free_pages((unsigned long)vaddr, get_order(size));
}
-static int calgary_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
- return dma_addr == CALGARY_MAPPING_ERROR;
-}
-
static const struct dma_map_ops calgary_dma_ops = {
.alloc = calgary_alloc_coherent,
.free = calgary_free_coherent,
@@ -491,7 +479,6 @@ static const struct dma_map_ops calgary_dma_ops = {
.unmap_sg = calgary_unmap_sg,
.map_page = calgary_map_page,
.unmap_page = calgary_unmap_page,
- .mapping_error = calgary_mapping_error,
.dma_supported = dma_direct_supported,
};
@@ -739,9 +726,6 @@ static void __init calgary_reserve_regions(struct pci_dev *dev)
u64 start;
struct iommu_table *tbl = pci_iommu(dev->bus);
- /* reserve EMERGENCY_PAGES from bad_dma_address and up */
- iommu_range_reserve(tbl, CALGARY_MAPPING_ERROR, EMERGENCY_PAGES);
-
/* avoid the BIOS/VGA first 640KB-1MB region */
/* for CalIOC2 - avoid the entire first MB */
if (is_calgary(dev->device)) {
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index f4562fcec681..d460998ae828 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -17,7 +17,7 @@
static bool disable_dac_quirk __read_mostly;
-const struct dma_map_ops *dma_ops = &dma_direct_ops;
+const struct dma_map_ops *dma_ops;
EXPORT_SYMBOL(dma_ops);
#ifdef CONFIG_IOMMU_DEBUG
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index bd08b9e1c9e2..5f5302028a9a 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -62,10 +62,8 @@ IOMMU_INIT(pci_swiotlb_detect_4gb,
void __init pci_swiotlb_init(void)
{
- if (swiotlb) {
+ if (swiotlb)
swiotlb_init(0);
- dma_ops = &swiotlb_dma_ops;
- }
}
void __init pci_swiotlb_late_init(void)
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 006f373f54ab..385afa2b9e17 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -381,13 +381,6 @@ void __init mem_encrypt_init(void)
swiotlb_update_mem_attributes();
/*
- * With SEV, DMA operations cannot use encryption, we need to use
- * SWIOTLB to bounce buffer DMA operation.
- */
- if (sev_active())
- dma_ops = &swiotlb_dma_ops;
-
- /*
* With SEV, we need to unroll the rep string I/O instructions.
*/
if (sev_active())
diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c
index 7a5bafb76d77..3cdafea55ab6 100644
--- a/arch/x86/pci/sta2x11-fixup.c
+++ b/arch/x86/pci/sta2x11-fixup.c
@@ -168,7 +168,6 @@ static void sta2x11_setup_pdev(struct pci_dev *pdev)
return;
pci_set_consistent_dma_mask(pdev, STA2X11_AMBA_SIZE - 1);
pci_set_dma_mask(pdev, STA2X11_AMBA_SIZE - 1);
- pdev->dev.dma_ops = &swiotlb_dma_ops;
pdev->dev.archdata.is_sta2x11 = true;
/* We must enable all devices as master, for audio DMA to work */
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index d29b7365da8d..36338e7564a3 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -1,7 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
config XTENSA
def_bool y
- select ARCH_HAS_SG_CHAIN
select ARCH_HAS_SYNC_DMA_FOR_CPU
select ARCH_HAS_SYNC_DMA_FOR_DEVICE
select ARCH_NO_COHERENT_DMA_MMAP if !MMU
@@ -10,7 +9,7 @@ config XTENSA
select BUILDTIME_EXTABLE_SORT
select CLONE_BACKWARDS
select COMMON_CLK
- select DMA_DIRECT_OPS
+ select DMA_REMAP if MMU
select GENERIC_ATOMIC64
select GENERIC_CLOCKEVENTS
select GENERIC_IRQ_SHOW
diff --git a/arch/xtensa/kernel/pci-dma.c b/arch/xtensa/kernel/pci-dma.c
index 1fc138b6bc0a..9171bff76fc4 100644
--- a/arch/xtensa/kernel/pci-dma.c
+++ b/arch/xtensa/kernel/pci-dma.c
@@ -160,7 +160,7 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
flag & __GFP_NOWARN);
if (!page)
- page = alloc_pages(flag, get_order(size));
+ page = alloc_pages(flag | __GFP_ZERO, get_order(size));
if (!page)
return NULL;
diff --git a/block/Kconfig b/block/Kconfig
index f7045aa47edb..8044452a4fd3 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -155,12 +155,6 @@ config BLK_CGROUP_IOLATENCY
Note, this is an experimental interface and could be changed someday.
-config BLK_WBT_SQ
- bool "Single queue writeback throttling"
- depends on BLK_WBT
- ---help---
- Enable writeback throttling by default on legacy single queue devices
-
config BLK_WBT_MQ
bool "Multiqueue writeback throttling"
default y
diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched
index f95a48b0d7b2..4626b88b2d5a 100644
--- a/block/Kconfig.iosched
+++ b/block/Kconfig.iosched
@@ -3,67 +3,6 @@ if BLOCK
menu "IO Schedulers"
-config IOSCHED_NOOP
- bool
- default y
- ---help---
- The no-op I/O scheduler is a minimal scheduler that does basic merging
- and sorting. Its main uses include non-disk based block devices like
- memory devices, and specialised software or hardware environments
- that do their own scheduling and require only minimal assistance from
- the kernel.
-
-config IOSCHED_DEADLINE
- tristate "Deadline I/O scheduler"
- default y
- ---help---
- The deadline I/O scheduler is simple and compact. It will provide
- CSCAN service with FIFO expiration of requests, switching to
- a new point in the service tree and doing a batch of IO from there
- in case of expiry.
-
-config IOSCHED_CFQ
- tristate "CFQ I/O scheduler"
- default y
- ---help---
- The CFQ I/O scheduler tries to distribute bandwidth equally
- among all processes in the system. It should provide a fair
- and low latency working environment, suitable for both desktop
- and server systems.
-
- This is the default I/O scheduler.
-
-config CFQ_GROUP_IOSCHED
- bool "CFQ Group Scheduling support"
- depends on IOSCHED_CFQ && BLK_CGROUP
- ---help---
- Enable group IO scheduling in CFQ.
-
-choice
-
- prompt "Default I/O scheduler"
- default DEFAULT_CFQ
- help
- Select the I/O scheduler which will be used by default for all
- block devices.
-
- config DEFAULT_DEADLINE
- bool "Deadline" if IOSCHED_DEADLINE=y
-
- config DEFAULT_CFQ
- bool "CFQ" if IOSCHED_CFQ=y
-
- config DEFAULT_NOOP
- bool "No-op"
-
-endchoice
-
-config DEFAULT_IOSCHED
- string
- default "deadline" if DEFAULT_DEADLINE
- default "cfq" if DEFAULT_CFQ
- default "noop" if DEFAULT_NOOP
-
config MQ_IOSCHED_DEADLINE
tristate "MQ deadline I/O scheduler"
default y
diff --git a/block/Makefile b/block/Makefile
index 27eac600474f..eee1b4ceecf9 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -3,7 +3,7 @@
# Makefile for the kernel block layer
#
-obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \
+obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-sysfs.o \
blk-flush.o blk-settings.o blk-ioc.o blk-map.o \
blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \
@@ -18,9 +18,6 @@ obj-$(CONFIG_BLK_DEV_BSGLIB) += bsg-lib.o
obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o
obj-$(CONFIG_BLK_DEV_THROTTLING) += blk-throttle.o
obj-$(CONFIG_BLK_CGROUP_IOLATENCY) += blk-iolatency.o
-obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o
-obj-$(CONFIG_IOSCHED_DEADLINE) += deadline-iosched.o
-obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o
obj-$(CONFIG_MQ_IOSCHED_DEADLINE) += mq-deadline.o
obj-$(CONFIG_MQ_IOSCHED_KYBER) += kyber-iosched.o
bfq-y := bfq-iosched.o bfq-wf2q.o bfq-cgroup.o
diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index 9fe5952d117d..c6113af31960 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -334,7 +334,7 @@ static void bfqg_stats_xfer_dead(struct bfq_group *bfqg)
parent = bfqg_parent(bfqg);
- lockdep_assert_held(bfqg_to_blkg(bfqg)->q->queue_lock);
+ lockdep_assert_held(&bfqg_to_blkg(bfqg)->q->queue_lock);
if (unlikely(!parent))
return;
@@ -642,7 +642,7 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
uint64_t serial_nr;
rcu_read_lock();
- serial_nr = bio_blkcg(bio)->css.serial_nr;
+ serial_nr = __bio_blkcg(bio)->css.serial_nr;
/*
* Check whether blkcg has changed. The condition may trigger
@@ -651,7 +651,7 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
if (unlikely(!bfqd) || likely(bic->blkcg_serial_nr == serial_nr))
goto out;
- bfqg = __bfq_bic_change_cgroup(bfqd, bic, bio_blkcg(bio));
+ bfqg = __bfq_bic_change_cgroup(bfqd, bic, __bio_blkcg(bio));
/*
* Update blkg_path for bfq_log_* functions. We cache this
* path, and update it here, for the following
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 97337214bec4..cd307767a134 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -399,9 +399,9 @@ static struct bfq_io_cq *bfq_bic_lookup(struct bfq_data *bfqd,
unsigned long flags;
struct bfq_io_cq *icq;
- spin_lock_irqsave(q->queue_lock, flags);
+ spin_lock_irqsave(&q->queue_lock, flags);
icq = icq_to_bic(ioc_lookup_icq(ioc, q));
- spin_unlock_irqrestore(q->queue_lock, flags);
+ spin_unlock_irqrestore(&q->queue_lock, flags);
return icq;
}
@@ -4066,7 +4066,7 @@ static void bfq_update_dispatch_stats(struct request_queue *q,
* In addition, the following queue lock guarantees that
* bfqq_group(bfqq) exists as well.
*/
- spin_lock_irq(q->queue_lock);
+ spin_lock_irq(&q->queue_lock);
if (idle_timer_disabled)
/*
* Since the idle timer has been disabled,
@@ -4085,7 +4085,7 @@ static void bfq_update_dispatch_stats(struct request_queue *q,
bfqg_stats_set_start_empty_time(bfqg);
bfqg_stats_update_io_remove(bfqg, rq->cmd_flags);
}
- spin_unlock_irq(q->queue_lock);
+ spin_unlock_irq(&q->queue_lock);
}
#else
static inline void bfq_update_dispatch_stats(struct request_queue *q,
@@ -4416,7 +4416,7 @@ static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,
rcu_read_lock();
- bfqg = bfq_find_set_group(bfqd, bio_blkcg(bio));
+ bfqg = bfq_find_set_group(bfqd, __bio_blkcg(bio));
if (!bfqg) {
bfqq = &bfqd->oom_bfqq;
goto out;
@@ -4669,11 +4669,11 @@ static void bfq_update_insert_stats(struct request_queue *q,
* In addition, the following queue lock guarantees that
* bfqq_group(bfqq) exists as well.
*/
- spin_lock_irq(q->queue_lock);
+ spin_lock_irq(&q->queue_lock);
bfqg_stats_update_io_add(bfqq_group(bfqq), bfqq, cmd_flags);
if (idle_timer_disabled)
bfqg_stats_update_idle_time(bfqq_group(bfqq));
- spin_unlock_irq(q->queue_lock);
+ spin_unlock_irq(&q->queue_lock);
}
#else
static inline void bfq_update_insert_stats(struct request_queue *q,
@@ -5414,9 +5414,9 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
}
eq->elevator_data = bfqd;
- spin_lock_irq(q->queue_lock);
+ spin_lock_irq(&q->queue_lock);
q->elevator = eq;
- spin_unlock_irq(q->queue_lock);
+ spin_unlock_irq(&q->queue_lock);
/*
* Our fallback bfqq if bfq_find_alloc_queue() runs into OOM issues.
@@ -5756,7 +5756,7 @@ static struct elv_fs_entry bfq_attrs[] = {
};
static struct elevator_type iosched_bfq_mq = {
- .ops.mq = {
+ .ops = {
.limit_depth = bfq_limit_depth,
.prepare_request = bfq_prepare_request,
.requeue_request = bfq_finish_requeue_request,
@@ -5777,7 +5777,6 @@ static struct elevator_type iosched_bfq_mq = {
.exit_sched = bfq_exit_queue,
},
- .uses_mq = true,
.icq_size = sizeof(struct bfq_io_cq),
.icq_align = __alignof__(struct bfq_io_cq),
.elevator_attrs = bfq_attrs,
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 290af497997b..1b633a3526d4 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -390,7 +390,6 @@ void bio_integrity_advance(struct bio *bio, unsigned int bytes_done)
bip->bip_iter.bi_sector += bytes_done >> 9;
bvec_iter_advance(bip->bip_vec, &bip->bip_iter, bytes);
}
-EXPORT_SYMBOL(bio_integrity_advance);
/**
* bio_integrity_trim - Trim integrity vector
@@ -460,7 +459,6 @@ void bioset_integrity_free(struct bio_set *bs)
mempool_exit(&bs->bio_integrity_pool);
mempool_exit(&bs->bvec_integrity_pool);
}
-EXPORT_SYMBOL(bioset_integrity_free);
void __init bio_integrity_init(void)
{
diff --git a/block/bio.c b/block/bio.c
index 4d86e90654b2..8281bfcbc265 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -244,7 +244,7 @@ fallback:
void bio_uninit(struct bio *bio)
{
- bio_disassociate_task(bio);
+ bio_disassociate_blkg(bio);
}
EXPORT_SYMBOL(bio_uninit);
@@ -571,14 +571,13 @@ void bio_put(struct bio *bio)
}
EXPORT_SYMBOL(bio_put);
-inline int bio_phys_segments(struct request_queue *q, struct bio *bio)
+int bio_phys_segments(struct request_queue *q, struct bio *bio)
{
if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
blk_recount_segments(q, bio);
return bio->bi_phys_segments;
}
-EXPORT_SYMBOL(bio_phys_segments);
/**
* __bio_clone_fast - clone a bio that shares the original bio's biovec
@@ -610,7 +609,8 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
bio->bi_iter = bio_src->bi_iter;
bio->bi_io_vec = bio_src->bi_io_vec;
- bio_clone_blkcg_association(bio, bio_src);
+ bio_clone_blkg_association(bio, bio_src);
+ blkcg_bio_issue_init(bio);
}
EXPORT_SYMBOL(__bio_clone_fast);
@@ -901,7 +901,6 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
return 0;
}
-EXPORT_SYMBOL_GPL(bio_iov_iter_get_pages);
static void submit_bio_wait_endio(struct bio *bio)
{
@@ -1592,7 +1591,6 @@ void bio_set_pages_dirty(struct bio *bio)
set_page_dirty_lock(bvec->bv_page);
}
}
-EXPORT_SYMBOL_GPL(bio_set_pages_dirty);
static void bio_release_pages(struct bio *bio)
{
@@ -1662,17 +1660,33 @@ defer:
spin_unlock_irqrestore(&bio_dirty_lock, flags);
schedule_work(&bio_dirty_work);
}
-EXPORT_SYMBOL_GPL(bio_check_pages_dirty);
+
+void update_io_ticks(struct hd_struct *part, unsigned long now)
+{
+ unsigned long stamp;
+again:
+ stamp = READ_ONCE(part->stamp);
+ if (unlikely(stamp != now)) {
+ if (likely(cmpxchg(&part->stamp, stamp, now) == stamp)) {
+ __part_stat_add(part, io_ticks, 1);
+ }
+ }
+ if (part->partno) {
+ part = &part_to_disk(part)->part0;
+ goto again;
+ }
+}
void generic_start_io_acct(struct request_queue *q, int op,
unsigned long sectors, struct hd_struct *part)
{
const int sgrp = op_stat_group(op);
- int cpu = part_stat_lock();
- part_round_stats(q, cpu, part);
- part_stat_inc(cpu, part, ios[sgrp]);
- part_stat_add(cpu, part, sectors[sgrp], sectors);
+ part_stat_lock();
+
+ update_io_ticks(part, jiffies);
+ part_stat_inc(part, ios[sgrp]);
+ part_stat_add(part, sectors[sgrp], sectors);
part_inc_in_flight(q, part, op_is_write(op));
part_stat_unlock();
@@ -1682,12 +1696,15 @@ EXPORT_SYMBOL(generic_start_io_acct);
void generic_end_io_acct(struct request_queue *q, int req_op,
struct hd_struct *part, unsigned long start_time)
{
- unsigned long duration = jiffies - start_time;
+ unsigned long now = jiffies;
+ unsigned long duration = now - start_time;
const int sgrp = op_stat_group(req_op);
- int cpu = part_stat_lock();
- part_stat_add(cpu, part, nsecs[sgrp], jiffies_to_nsecs(duration));
- part_round_stats(q, cpu, part);
+ part_stat_lock();
+
+ update_io_ticks(part, now);
+ part_stat_add(part, nsecs[sgrp], jiffies_to_nsecs(duration));
+ part_stat_add(part, time_in_queue, duration);
part_dec_in_flight(q, part, op_is_write(req_op));
part_stat_unlock();
@@ -1957,102 +1974,133 @@ EXPORT_SYMBOL(bioset_init_from_src);
#ifdef CONFIG_BLK_CGROUP
-#ifdef CONFIG_MEMCG
/**
- * bio_associate_blkcg_from_page - associate a bio with the page's blkcg
+ * bio_disassociate_blkg - puts back the blkg reference if associated
* @bio: target bio
- * @page: the page to lookup the blkcg from
*
- * Associate @bio with the blkcg from @page's owning memcg. This works like
- * every other associate function wrt references.
+ * Helper to disassociate the blkg from @bio if a blkg is associated.
*/
-int bio_associate_blkcg_from_page(struct bio *bio, struct page *page)
+void bio_disassociate_blkg(struct bio *bio)
{
- struct cgroup_subsys_state *blkcg_css;
-
- if (unlikely(bio->bi_css))
- return -EBUSY;
- if (!page->mem_cgroup)
- return 0;
- blkcg_css = cgroup_get_e_css(page->mem_cgroup->css.cgroup,
- &io_cgrp_subsys);
- bio->bi_css = blkcg_css;
- return 0;
+ if (bio->bi_blkg) {
+ blkg_put(bio->bi_blkg);
+ bio->bi_blkg = NULL;
+ }
}
-#endif /* CONFIG_MEMCG */
+EXPORT_SYMBOL_GPL(bio_disassociate_blkg);
/**
- * bio_associate_blkcg - associate a bio with the specified blkcg
+ * __bio_associate_blkg - associate a bio with the a blkg
* @bio: target bio
- * @blkcg_css: css of the blkcg to associate
+ * @blkg: the blkg to associate
*
- * Associate @bio with the blkcg specified by @blkcg_css. Block layer will
- * treat @bio as if it were issued by a task which belongs to the blkcg.
+ * This tries to associate @bio with the specified @blkg. Association failure
+ * is handled by walking up the blkg tree. Therefore, the blkg associated can
+ * be anything between @blkg and the root_blkg. This situation only happens
+ * when a cgroup is dying and then the remaining bios will spill to the closest
+ * alive blkg.
*
- * This function takes an extra reference of @blkcg_css which will be put
- * when @bio is released. The caller must own @bio and is responsible for
- * synchronizing calls to this function.
+ * A reference will be taken on the @blkg and will be released when @bio is
+ * freed.
*/
-int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css)
+static void __bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg)
{
- if (unlikely(bio->bi_css))
- return -EBUSY;
- css_get(blkcg_css);
- bio->bi_css = blkcg_css;
- return 0;
+ bio_disassociate_blkg(bio);
+
+ bio->bi_blkg = blkg_tryget_closest(blkg);
}
-EXPORT_SYMBOL_GPL(bio_associate_blkcg);
/**
- * bio_associate_blkg - associate a bio with the specified blkg
+ * bio_associate_blkg_from_css - associate a bio with a specified css
* @bio: target bio
- * @blkg: the blkg to associate
+ * @css: target css
*
- * Associate @bio with the blkg specified by @blkg. This is the queue specific
- * blkcg information associated with the @bio, a reference will be taken on the
- * @blkg and will be freed when the bio is freed.
+ * Associate @bio with the blkg found by combining the css's blkg and the
+ * request_queue of the @bio. This falls back to the queue's root_blkg if
+ * the association fails with the css.
*/
-int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg)
+void bio_associate_blkg_from_css(struct bio *bio,
+ struct cgroup_subsys_state *css)
{
- if (unlikely(bio->bi_blkg))
- return -EBUSY;
- if (!blkg_try_get(blkg))
- return -ENODEV;
- bio->bi_blkg = blkg;
- return 0;
+ struct request_queue *q = bio->bi_disk->queue;
+ struct blkcg_gq *blkg;
+
+ rcu_read_lock();
+
+ if (!css || !css->parent)
+ blkg = q->root_blkg;
+ else
+ blkg = blkg_lookup_create(css_to_blkcg(css), q);
+
+ __bio_associate_blkg(bio, blkg);
+
+ rcu_read_unlock();
}
+EXPORT_SYMBOL_GPL(bio_associate_blkg_from_css);
+#ifdef CONFIG_MEMCG
/**
- * bio_disassociate_task - undo bio_associate_current()
+ * bio_associate_blkg_from_page - associate a bio with the page's blkg
* @bio: target bio
+ * @page: the page to lookup the blkcg from
+ *
+ * Associate @bio with the blkg from @page's owning memcg and the respective
+ * request_queue. If cgroup_e_css returns %NULL, fall back to the queue's
+ * root_blkg.
*/
-void bio_disassociate_task(struct bio *bio)
+void bio_associate_blkg_from_page(struct bio *bio, struct page *page)
{
- if (bio->bi_ioc) {
- put_io_context(bio->bi_ioc);
- bio->bi_ioc = NULL;
- }
- if (bio->bi_css) {
- css_put(bio->bi_css);
- bio->bi_css = NULL;
- }
- if (bio->bi_blkg) {
- blkg_put(bio->bi_blkg);
- bio->bi_blkg = NULL;
- }
+ struct cgroup_subsys_state *css;
+
+ if (!page->mem_cgroup)
+ return;
+
+ rcu_read_lock();
+
+ css = cgroup_e_css(page->mem_cgroup->css.cgroup, &io_cgrp_subsys);
+ bio_associate_blkg_from_css(bio, css);
+
+ rcu_read_unlock();
+}
+#endif /* CONFIG_MEMCG */
+
+/**
+ * bio_associate_blkg - associate a bio with a blkg
+ * @bio: target bio
+ *
+ * Associate @bio with the blkg found from the bio's css and request_queue.
+ * If one is not found, bio_lookup_blkg() creates the blkg. If a blkg is
+ * already associated, the css is reused and association redone as the
+ * request_queue may have changed.
+ */
+void bio_associate_blkg(struct bio *bio)
+{
+ struct cgroup_subsys_state *css;
+
+ rcu_read_lock();
+
+ if (bio->bi_blkg)
+ css = &bio_blkcg(bio)->css;
+ else
+ css = blkcg_css();
+
+ bio_associate_blkg_from_css(bio, css);
+
+ rcu_read_unlock();
}
+EXPORT_SYMBOL_GPL(bio_associate_blkg);
/**
- * bio_clone_blkcg_association - clone blkcg association from src to dst bio
+ * bio_clone_blkg_association - clone blkg association from src to dst bio
* @dst: destination bio
* @src: source bio
*/
-void bio_clone_blkcg_association(struct bio *dst, struct bio *src)
+void bio_clone_blkg_association(struct bio *dst, struct bio *src)
{
- if (src->bi_css)
- WARN_ON(bio_associate_blkcg(dst, src->bi_css));
+ if (src->bi_blkg)
+ __bio_associate_blkg(dst, src->bi_blkg);
}
-EXPORT_SYMBOL_GPL(bio_clone_blkcg_association);
+EXPORT_SYMBOL_GPL(bio_clone_blkg_association);
#endif /* CONFIG_BLK_CGROUP */
static void __init biovec_init_slabs(void)
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index c630e02836a8..c8cc1cbb6370 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -76,14 +76,42 @@ static void blkg_free(struct blkcg_gq *blkg)
if (blkg->pd[i])
blkcg_policy[i]->pd_free_fn(blkg->pd[i]);
- if (blkg->blkcg != &blkcg_root)
- blk_exit_rl(blkg->q, &blkg->rl);
-
blkg_rwstat_exit(&blkg->stat_ios);
blkg_rwstat_exit(&blkg->stat_bytes);
kfree(blkg);
}
+static void __blkg_release(struct rcu_head *rcu)
+{
+ struct blkcg_gq *blkg = container_of(rcu, struct blkcg_gq, rcu_head);
+
+ percpu_ref_exit(&blkg->refcnt);
+
+ /* release the blkcg and parent blkg refs this blkg has been holding */
+ css_put(&blkg->blkcg->css);
+ if (blkg->parent)
+ blkg_put(blkg->parent);
+
+ wb_congested_put(blkg->wb_congested);
+
+ blkg_free(blkg);
+}
+
+/*
+ * A group is RCU protected, but having an rcu lock does not mean that one
+ * can access all the fields of blkg and assume these are valid. For
+ * example, don't try to follow throtl_data and request queue links.
+ *
+ * Having a reference to blkg under an rcu allows accesses to only values
+ * local to groups like group stats and group rate limits.
+ */
+static void blkg_release(struct percpu_ref *ref)
+{
+ struct blkcg_gq *blkg = container_of(ref, struct blkcg_gq, refcnt);
+
+ call_rcu(&blkg->rcu_head, __blkg_release);
+}
+
/**
* blkg_alloc - allocate a blkg
* @blkcg: block cgroup the new blkg is associated with
@@ -110,14 +138,6 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q,
blkg->q = q;
INIT_LIST_HEAD(&blkg->q_node);
blkg->blkcg = blkcg;
- atomic_set(&blkg->refcnt, 1);
-
- /* root blkg uses @q->root_rl, init rl only for !root blkgs */
- if (blkcg != &blkcg_root) {
- if (blk_init_rl(&blkg->rl, q, gfp_mask))
- goto err_free;
- blkg->rl.blkg = blkg;
- }
for (i = 0; i < BLKCG_MAX_POLS; i++) {
struct blkcg_policy *pol = blkcg_policy[i];
@@ -157,7 +177,7 @@ struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg,
blkg = radix_tree_lookup(&blkcg->blkg_tree, q->id);
if (blkg && blkg->q == q) {
if (update_hint) {
- lockdep_assert_held(q->queue_lock);
+ lockdep_assert_held(&q->queue_lock);
rcu_assign_pointer(blkcg->blkg_hint, blkg);
}
return blkg;
@@ -180,7 +200,13 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
int i, ret;
WARN_ON_ONCE(!rcu_read_lock_held());
- lockdep_assert_held(q->queue_lock);
+ lockdep_assert_held(&q->queue_lock);
+
+ /* request_queue is dying, do not create/recreate a blkg */
+ if (blk_queue_dying(q)) {
+ ret = -ENODEV;
+ goto err_free_blkg;
+ }
/* blkg holds a reference to blkcg */
if (!css_tryget_online(&blkcg->css)) {
@@ -217,6 +243,11 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
blkg_get(blkg->parent);
}
+ ret = percpu_ref_init(&blkg->refcnt, blkg_release, 0,
+ GFP_NOWAIT | __GFP_NOWARN);
+ if (ret)
+ goto err_cancel_ref;
+
/* invoke per-policy init */
for (i = 0; i < BLKCG_MAX_POLS; i++) {
struct blkcg_policy *pol = blkcg_policy[i];
@@ -249,6 +280,8 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
blkg_put(blkg);
return ERR_PTR(ret);
+err_cancel_ref:
+ percpu_ref_exit(&blkg->refcnt);
err_put_congested:
wb_congested_put(wb_congested);
err_put_css:
@@ -259,7 +292,7 @@ err_free_blkg:
}
/**
- * blkg_lookup_create - lookup blkg, try to create one if not there
+ * __blkg_lookup_create - lookup blkg, try to create one if not there
* @blkcg: blkcg of interest
* @q: request_queue of interest
*
@@ -268,24 +301,16 @@ err_free_blkg:
* that all non-root blkg's have access to the parent blkg. This function
* should be called under RCU read lock and @q->queue_lock.
*
- * Returns pointer to the looked up or created blkg on success, ERR_PTR()
- * value on error. If @q is dead, returns ERR_PTR(-EINVAL). If @q is not
- * dead and bypassing, returns ERR_PTR(-EBUSY).
+ * Returns the blkg or the closest blkg if blkg_create() fails as it walks
+ * down from root.
*/
-struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
- struct request_queue *q)
+struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
+ struct request_queue *q)
{
struct blkcg_gq *blkg;
WARN_ON_ONCE(!rcu_read_lock_held());
- lockdep_assert_held(q->queue_lock);
-
- /*
- * This could be the first entry point of blkcg implementation and
- * we shouldn't allow anything to go through for a bypassing queue.
- */
- if (unlikely(blk_queue_bypass(q)))
- return ERR_PTR(blk_queue_dying(q) ? -ENODEV : -EBUSY);
+ lockdep_assert_held(&q->queue_lock);
blkg = __blkg_lookup(blkcg, q, true);
if (blkg)
@@ -293,30 +318,64 @@ struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
/*
* Create blkgs walking down from blkcg_root to @blkcg, so that all
- * non-root blkgs have access to their parents.
+ * non-root blkgs have access to their parents. Returns the closest
+ * blkg to the intended blkg should blkg_create() fail.
*/
while (true) {
struct blkcg *pos = blkcg;
struct blkcg *parent = blkcg_parent(blkcg);
-
- while (parent && !__blkg_lookup(parent, q, false)) {
+ struct blkcg_gq *ret_blkg = q->root_blkg;
+
+ while (parent) {
+ blkg = __blkg_lookup(parent, q, false);
+ if (blkg) {
+ /* remember closest blkg */
+ ret_blkg = blkg;
+ break;
+ }
pos = parent;
parent = blkcg_parent(parent);
}
blkg = blkg_create(pos, q, NULL);
- if (pos == blkcg || IS_ERR(blkg))
+ if (IS_ERR(blkg))
+ return ret_blkg;
+ if (pos == blkcg)
return blkg;
}
}
+/**
+ * blkg_lookup_create - find or create a blkg
+ * @blkcg: target block cgroup
+ * @q: target request_queue
+ *
+ * This looks up or creates the blkg representing the unique pair
+ * of the blkcg and the request_queue.
+ */
+struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
+ struct request_queue *q)
+{
+ struct blkcg_gq *blkg = blkg_lookup(blkcg, q);
+
+ if (unlikely(!blkg)) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&q->queue_lock, flags);
+ blkg = __blkg_lookup_create(blkcg, q);
+ spin_unlock_irqrestore(&q->queue_lock, flags);
+ }
+
+ return blkg;
+}
+
static void blkg_destroy(struct blkcg_gq *blkg)
{
struct blkcg *blkcg = blkg->blkcg;
struct blkcg_gq *parent = blkg->parent;
int i;
- lockdep_assert_held(blkg->q->queue_lock);
+ lockdep_assert_held(&blkg->q->queue_lock);
lockdep_assert_held(&blkcg->lock);
/* Something wrong if we are trying to remove same group twice */
@@ -353,7 +412,7 @@ static void blkg_destroy(struct blkcg_gq *blkg)
* Put the reference taken at the time of creation so that when all
* queues are gone, group can be destroyed.
*/
- blkg_put(blkg);
+ percpu_ref_kill(&blkg->refcnt);
}
/**
@@ -366,8 +425,7 @@ static void blkg_destroy_all(struct request_queue *q)
{
struct blkcg_gq *blkg, *n;
- lockdep_assert_held(q->queue_lock);
-
+ spin_lock_irq(&q->queue_lock);
list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) {
struct blkcg *blkcg = blkg->blkcg;
@@ -377,7 +435,7 @@ static void blkg_destroy_all(struct request_queue *q)
}
q->root_blkg = NULL;
- q->root_rl.blkg = NULL;
+ spin_unlock_irq(&q->queue_lock);
}
/*
@@ -403,41 +461,6 @@ void __blkg_release_rcu(struct rcu_head *rcu_head)
}
EXPORT_SYMBOL_GPL(__blkg_release_rcu);
-/*
- * The next function used by blk_queue_for_each_rl(). It's a bit tricky
- * because the root blkg uses @q->root_rl instead of its own rl.
- */
-struct request_list *__blk_queue_next_rl(struct request_list *rl,
- struct request_queue *q)
-{
- struct list_head *ent;
- struct blkcg_gq *blkg;
-
- /*
- * Determine the current blkg list_head. The first entry is
- * root_rl which is off @q->blkg_list and mapped to the head.
- */
- if (rl == &q->root_rl) {
- ent = &q->blkg_list;
- /* There are no more block groups, hence no request lists */
- if (list_empty(ent))
- return NULL;
- } else {
- blkg = container_of(rl, struct blkcg_gq, rl);
- ent = &blkg->q_node;
- }
-
- /* walk to the next list_head, skip root blkcg */
- ent = ent->next;
- if (ent == &q->root_blkg->q_node)
- ent = ent->next;
- if (ent == &q->blkg_list)
- return NULL;
-
- blkg = container_of(ent, struct blkcg_gq, q_node);
- return &blkg->rl;
-}
-
static int blkcg_reset_stats(struct cgroup_subsys_state *css,
struct cftype *cftype, u64 val)
{
@@ -477,7 +500,6 @@ const char *blkg_dev_name(struct blkcg_gq *blkg)
return dev_name(blkg->q->backing_dev_info->dev);
return NULL;
}
-EXPORT_SYMBOL_GPL(blkg_dev_name);
/**
* blkcg_print_blkgs - helper for printing per-blkg data
@@ -508,10 +530,10 @@ void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
rcu_read_lock();
hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
- spin_lock_irq(blkg->q->queue_lock);
+ spin_lock_irq(&blkg->q->queue_lock);
if (blkcg_policy_enabled(blkg->q, pol))
total += prfill(sf, blkg->pd[pol->plid], data);
- spin_unlock_irq(blkg->q->queue_lock);
+ spin_unlock_irq(&blkg->q->queue_lock);
}
rcu_read_unlock();
@@ -709,7 +731,7 @@ u64 blkg_stat_recursive_sum(struct blkcg_gq *blkg,
struct cgroup_subsys_state *pos_css;
u64 sum = 0;
- lockdep_assert_held(blkg->q->queue_lock);
+ lockdep_assert_held(&blkg->q->queue_lock);
rcu_read_lock();
blkg_for_each_descendant_pre(pos_blkg, pos_css, blkg) {
@@ -752,7 +774,7 @@ struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkcg_gq *blkg,
struct blkg_rwstat sum = { };
int i;
- lockdep_assert_held(blkg->q->queue_lock);
+ lockdep_assert_held(&blkg->q->queue_lock);
rcu_read_lock();
blkg_for_each_descendant_pre(pos_blkg, pos_css, blkg) {
@@ -783,18 +805,10 @@ static struct blkcg_gq *blkg_lookup_check(struct blkcg *blkcg,
struct request_queue *q)
{
WARN_ON_ONCE(!rcu_read_lock_held());
- lockdep_assert_held(q->queue_lock);
+ lockdep_assert_held(&q->queue_lock);
if (!blkcg_policy_enabled(q, pol))
return ERR_PTR(-EOPNOTSUPP);
-
- /*
- * This could be the first entry point of blkcg implementation and
- * we shouldn't allow anything to go through for a bypassing queue.
- */
- if (unlikely(blk_queue_bypass(q)))
- return ERR_PTR(blk_queue_dying(q) ? -ENODEV : -EBUSY);
-
return __blkg_lookup(blkcg, q, true /* update_hint */);
}
@@ -812,7 +826,7 @@ static struct blkcg_gq *blkg_lookup_check(struct blkcg *blkcg,
*/
int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
char *input, struct blkg_conf_ctx *ctx)
- __acquires(rcu) __acquires(disk->queue->queue_lock)
+ __acquires(rcu) __acquires(&disk->queue->queue_lock)
{
struct gendisk *disk;
struct request_queue *q;
@@ -840,7 +854,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
q = disk->queue;
rcu_read_lock();
- spin_lock_irq(q->queue_lock);
+ spin_lock_irq(&q->queue_lock);
blkg = blkg_lookup_check(blkcg, pol, q);
if (IS_ERR(blkg)) {
@@ -867,7 +881,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
}
/* Drop locks to do new blkg allocation with GFP_KERNEL. */
- spin_unlock_irq(q->queue_lock);
+ spin_unlock_irq(&q->queue_lock);
rcu_read_unlock();
new_blkg = blkg_alloc(pos, q, GFP_KERNEL);
@@ -877,7 +891,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
}
rcu_read_lock();
- spin_lock_irq(q->queue_lock);
+ spin_lock_irq(&q->queue_lock);
blkg = blkg_lookup_check(pos, pol, q);
if (IS_ERR(blkg)) {
@@ -905,7 +919,7 @@ success:
return 0;
fail_unlock:
- spin_unlock_irq(q->queue_lock);
+ spin_unlock_irq(&q->queue_lock);
rcu_read_unlock();
fail:
put_disk_and_module(disk);
@@ -921,7 +935,6 @@ fail:
}
return ret;
}
-EXPORT_SYMBOL_GPL(blkg_conf_prep);
/**
* blkg_conf_finish - finish up per-blkg config update
@@ -931,13 +944,12 @@ EXPORT_SYMBOL_GPL(blkg_conf_prep);
* with blkg_conf_prep().
*/
void blkg_conf_finish(struct blkg_conf_ctx *ctx)
- __releases(ctx->disk->queue->queue_lock) __releases(rcu)
+ __releases(&ctx->disk->queue->queue_lock) __releases(rcu)
{
- spin_unlock_irq(ctx->disk->queue->queue_lock);
+ spin_unlock_irq(&ctx->disk->queue->queue_lock);
rcu_read_unlock();
put_disk_and_module(ctx->disk);
}
-EXPORT_SYMBOL_GPL(blkg_conf_finish);
static int blkcg_print_stat(struct seq_file *sf, void *v)
{
@@ -967,7 +979,7 @@ static int blkcg_print_stat(struct seq_file *sf, void *v)
*/
off += scnprintf(buf+off, size-off, "%s ", dname);
- spin_lock_irq(blkg->q->queue_lock);
+ spin_lock_irq(&blkg->q->queue_lock);
rwstat = blkg_rwstat_recursive_sum(blkg, NULL,
offsetof(struct blkcg_gq, stat_bytes));
@@ -981,7 +993,7 @@ static int blkcg_print_stat(struct seq_file *sf, void *v)
wios = atomic64_read(&rwstat.aux_cnt[BLKG_RWSTAT_WRITE]);
dios = atomic64_read(&rwstat.aux_cnt[BLKG_RWSTAT_DISCARD]);
- spin_unlock_irq(blkg->q->queue_lock);
+ spin_unlock_irq(&blkg->q->queue_lock);
if (rbytes || wbytes || rios || wios) {
has_stats = true;
@@ -1102,9 +1114,9 @@ void blkcg_destroy_blkgs(struct blkcg *blkcg)
struct blkcg_gq, blkcg_node);
struct request_queue *q = blkg->q;
- if (spin_trylock(q->queue_lock)) {
+ if (spin_trylock(&q->queue_lock)) {
blkg_destroy(blkg);
- spin_unlock(q->queue_lock);
+ spin_unlock(&q->queue_lock);
} else {
spin_unlock_irq(&blkcg->lock);
cpu_relax();
@@ -1225,36 +1237,31 @@ int blkcg_init_queue(struct request_queue *q)
/* Make sure the root blkg exists. */
rcu_read_lock();
- spin_lock_irq(q->queue_lock);
+ spin_lock_irq(&q->queue_lock);
blkg = blkg_create(&blkcg_root, q, new_blkg);
if (IS_ERR(blkg))
goto err_unlock;
q->root_blkg = blkg;
- q->root_rl.blkg = blkg;
- spin_unlock_irq(q->queue_lock);
+ spin_unlock_irq(&q->queue_lock);
rcu_read_unlock();
if (preloaded)
radix_tree_preload_end();
ret = blk_iolatency_init(q);
- if (ret) {
- spin_lock_irq(q->queue_lock);
- blkg_destroy_all(q);
- spin_unlock_irq(q->queue_lock);
- return ret;
- }
+ if (ret)
+ goto err_destroy_all;
ret = blk_throtl_init(q);
- if (ret) {
- spin_lock_irq(q->queue_lock);
- blkg_destroy_all(q);
- spin_unlock_irq(q->queue_lock);
- }
- return ret;
+ if (ret)
+ goto err_destroy_all;
+ return 0;
+err_destroy_all:
+ blkg_destroy_all(q);
+ return ret;
err_unlock:
- spin_unlock_irq(q->queue_lock);
+ spin_unlock_irq(&q->queue_lock);
rcu_read_unlock();
if (preloaded)
radix_tree_preload_end();
@@ -1269,7 +1276,7 @@ err_unlock:
*/
void blkcg_drain_queue(struct request_queue *q)
{
- lockdep_assert_held(q->queue_lock);
+ lockdep_assert_held(&q->queue_lock);
/*
* @q could be exiting and already have destroyed all blkgs as
@@ -1289,10 +1296,7 @@ void blkcg_drain_queue(struct request_queue *q)
*/
void blkcg_exit_queue(struct request_queue *q)
{
- spin_lock_irq(q->queue_lock);
blkg_destroy_all(q);
- spin_unlock_irq(q->queue_lock);
-
blk_throtl_exit(q);
}
@@ -1396,10 +1400,8 @@ int blkcg_activate_policy(struct request_queue *q,
if (blkcg_policy_enabled(q, pol))
return 0;
- if (q->mq_ops)
+ if (queue_is_mq(q))
blk_mq_freeze_queue(q);
- else
- blk_queue_bypass_start(q);
pd_prealloc:
if (!pd_prealloc) {
pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q->node);
@@ -1409,7 +1411,7 @@ pd_prealloc:
}
}
- spin_lock_irq(q->queue_lock);
+ spin_lock_irq(&q->queue_lock);
list_for_each_entry(blkg, &q->blkg_list, q_node) {
struct blkg_policy_data *pd;
@@ -1421,7 +1423,7 @@ pd_prealloc:
if (!pd)
swap(pd, pd_prealloc);
if (!pd) {
- spin_unlock_irq(q->queue_lock);
+ spin_unlock_irq(&q->queue_lock);
goto pd_prealloc;
}
@@ -1435,12 +1437,10 @@ pd_prealloc:
__set_bit(pol->plid, q->blkcg_pols);
ret = 0;
- spin_unlock_irq(q->queue_lock);
+ spin_unlock_irq(&q->queue_lock);
out_bypass_end:
- if (q->mq_ops)
+ if (queue_is_mq(q))
blk_mq_unfreeze_queue(q);
- else
- blk_queue_bypass_end(q);
if (pd_prealloc)
pol->pd_free_fn(pd_prealloc);
return ret;
@@ -1463,12 +1463,10 @@ void blkcg_deactivate_policy(struct request_queue *q,
if (!blkcg_policy_enabled(q, pol))
return;
- if (q->mq_ops)
+ if (queue_is_mq(q))
blk_mq_freeze_queue(q);
- else
- blk_queue_bypass_start(q);
- spin_lock_irq(q->queue_lock);
+ spin_lock_irq(&q->queue_lock);
__clear_bit(pol->plid, q->blkcg_pols);
@@ -1481,12 +1479,10 @@ void blkcg_deactivate_policy(struct request_queue *q,
}
}
- spin_unlock_irq(q->queue_lock);
+ spin_unlock_irq(&q->queue_lock);
- if (q->mq_ops)
+ if (queue_is_mq(q))
blk_mq_unfreeze_queue(q);
- else
- blk_queue_bypass_end(q);
}
EXPORT_SYMBOL_GPL(blkcg_deactivate_policy);
@@ -1748,8 +1744,7 @@ void blkcg_maybe_throttle_current(void)
blkg = blkg_lookup(blkcg, q);
if (!blkg)
goto out;
- blkg = blkg_try_get(blkg);
- if (!blkg)
+ if (!blkg_tryget(blkg))
goto out;
rcu_read_unlock();
@@ -1761,7 +1756,6 @@ out:
rcu_read_unlock();
blk_put_queue(q);
}
-EXPORT_SYMBOL_GPL(blkcg_maybe_throttle_current);
/**
* blkcg_schedule_throttle - this task needs to check for throttling
@@ -1795,7 +1789,6 @@ void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay)
current->use_memdelay = use_memdelay;
set_notify_resume(current);
}
-EXPORT_SYMBOL_GPL(blkcg_schedule_throttle);
/**
* blkcg_add_delay - add delay to this blkg
@@ -1810,7 +1803,6 @@ void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta)
blkcg_scale_delay(blkg, now);
atomic64_add(delta, &blkg->delay_nsec);
}
-EXPORT_SYMBOL_GPL(blkcg_add_delay);
module_param(blkcg_debug_stats, bool, 0644);
MODULE_PARM_DESC(blkcg_debug_stats, "True if you want debug stats, false if not");
diff --git a/block/blk-core.c b/block/blk-core.c
index deb56932f8c4..c78042975737 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -58,11 +58,6 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(block_unplug);
DEFINE_IDA(blk_queue_ida);
/*
- * For the allocated request tables
- */
-struct kmem_cache *request_cachep;
-
-/*
* For queue allocation
*/
struct kmem_cache *blk_requestq_cachep;
@@ -79,11 +74,7 @@ static struct workqueue_struct *kblockd_workqueue;
*/
void blk_queue_flag_set(unsigned int flag, struct request_queue *q)
{
- unsigned long flags;
-
- spin_lock_irqsave(q->queue_lock, flags);
- queue_flag_set(flag, q);
- spin_unlock_irqrestore(q->queue_lock, flags);
+ set_bit(flag, &q->queue_flags);
}
EXPORT_SYMBOL(blk_queue_flag_set);
@@ -94,11 +85,7 @@ EXPORT_SYMBOL(blk_queue_flag_set);
*/
void blk_queue_flag_clear(unsigned int flag, struct request_queue *q)
{
- unsigned long flags;
-
- spin_lock_irqsave(q->queue_lock, flags);
- queue_flag_clear(flag, q);
- spin_unlock_irqrestore(q->queue_lock, flags);
+ clear_bit(flag, &q->queue_flags);
}
EXPORT_SYMBOL(blk_queue_flag_clear);
@@ -112,85 +99,15 @@ EXPORT_SYMBOL(blk_queue_flag_clear);
*/
bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q)
{
- unsigned long flags;
- bool res;
-
- spin_lock_irqsave(q->queue_lock, flags);
- res = queue_flag_test_and_set(flag, q);
- spin_unlock_irqrestore(q->queue_lock, flags);
-
- return res;
+ return test_and_set_bit(flag, &q->queue_flags);
}
EXPORT_SYMBOL_GPL(blk_queue_flag_test_and_set);
-/**
- * blk_queue_flag_test_and_clear - atomically test and clear a queue flag
- * @flag: flag to be cleared
- * @q: request queue
- *
- * Returns the previous value of @flag - 0 if the flag was not set and 1 if
- * the flag was set.
- */
-bool blk_queue_flag_test_and_clear(unsigned int flag, struct request_queue *q)
-{
- unsigned long flags;
- bool res;
-
- spin_lock_irqsave(q->queue_lock, flags);
- res = queue_flag_test_and_clear(flag, q);
- spin_unlock_irqrestore(q->queue_lock, flags);
-
- return res;
-}
-EXPORT_SYMBOL_GPL(blk_queue_flag_test_and_clear);
-
-static void blk_clear_congested(struct request_list *rl, int sync)
-{
-#ifdef CONFIG_CGROUP_WRITEBACK
- clear_wb_congested(rl->blkg->wb_congested, sync);
-#else
- /*
- * If !CGROUP_WRITEBACK, all blkg's map to bdi->wb and we shouldn't
- * flip its congestion state for events on other blkcgs.
- */
- if (rl == &rl->q->root_rl)
- clear_wb_congested(rl->q->backing_dev_info->wb.congested, sync);
-#endif
-}
-
-static void blk_set_congested(struct request_list *rl, int sync)
-{
-#ifdef CONFIG_CGROUP_WRITEBACK
- set_wb_congested(rl->blkg->wb_congested, sync);
-#else
- /* see blk_clear_congested() */
- if (rl == &rl->q->root_rl)
- set_wb_congested(rl->q->backing_dev_info->wb.congested, sync);
-#endif
-}
-
-void blk_queue_congestion_threshold(struct request_queue *q)
-{
- int nr;
-
- nr = q->nr_requests - (q->nr_requests / 8) + 1;
- if (nr > q->nr_requests)
- nr = q->nr_requests;
- q->nr_congestion_on = nr;
-
- nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1;
- if (nr < 1)
- nr = 1;
- q->nr_congestion_off = nr;
-}
-
void blk_rq_init(struct request_queue *q, struct request *rq)
{
memset(rq, 0, sizeof(*rq));
INIT_LIST_HEAD(&rq->queuelist);
- INIT_LIST_HEAD(&rq->timeout_list);
- rq->cpu = -1;
rq->q = q;
rq->__sector = (sector_t) -1;
INIT_HLIST_NODE(&rq->hash);
@@ -256,10 +173,11 @@ static void print_req_error(struct request *req, blk_status_t status)
if (WARN_ON_ONCE(idx >= ARRAY_SIZE(blk_errors)))
return;
- printk_ratelimited(KERN_ERR "%s: %s error, dev %s, sector %llu\n",
- __func__, blk_errors[idx].name, req->rq_disk ?
- req->rq_disk->disk_name : "?",
- (unsigned long long)blk_rq_pos(req));
+ printk_ratelimited(KERN_ERR "%s: %s error, dev %s, sector %llu flags %x\n",
+ __func__, blk_errors[idx].name,
+ req->rq_disk ? req->rq_disk->disk_name : "?",
+ (unsigned long long)blk_rq_pos(req),
+ req->cmd_flags);
}
static void req_bio_endio(struct request *rq, struct bio *bio,
@@ -292,99 +210,6 @@ void blk_dump_rq_flags(struct request *rq, char *msg)
}
EXPORT_SYMBOL(blk_dump_rq_flags);
-static void blk_delay_work(struct work_struct *work)
-{
- struct request_queue *q;
-
- q = container_of(work, struct request_queue, delay_work.work);
- spin_lock_irq(q->queue_lock);
- __blk_run_queue(q);
- spin_unlock_irq(q->queue_lock);
-}
-
-/**
- * blk_delay_queue - restart queueing after defined interval
- * @q: The &struct request_queue in question
- * @msecs: Delay in msecs
- *
- * Description:
- * Sometimes queueing needs to be postponed for a little while, to allow
- * resources to come back. This function will make sure that queueing is
- * restarted around the specified time.
- */
-void blk_delay_queue(struct request_queue *q, unsigned long msecs)
-{
- lockdep_assert_held(q->queue_lock);
- WARN_ON_ONCE(q->mq_ops);
-
- if (likely(!blk_queue_dead(q)))
- queue_delayed_work(kblockd_workqueue, &q->delay_work,
- msecs_to_jiffies(msecs));
-}
-EXPORT_SYMBOL(blk_delay_queue);
-
-/**
- * blk_start_queue_async - asynchronously restart a previously stopped queue
- * @q: The &struct request_queue in question
- *
- * Description:
- * blk_start_queue_async() will clear the stop flag on the queue, and
- * ensure that the request_fn for the queue is run from an async
- * context.
- **/
-void blk_start_queue_async(struct request_queue *q)
-{
- lockdep_assert_held(q->queue_lock);
- WARN_ON_ONCE(q->mq_ops);
-
- queue_flag_clear(QUEUE_FLAG_STOPPED, q);
- blk_run_queue_async(q);
-}
-EXPORT_SYMBOL(blk_start_queue_async);
-
-/**
- * blk_start_queue - restart a previously stopped queue
- * @q: The &struct request_queue in question
- *
- * Description:
- * blk_start_queue() will clear the stop flag on the queue, and call
- * the request_fn for the queue if it was in a stopped state when
- * entered. Also see blk_stop_queue().
- **/
-void blk_start_queue(struct request_queue *q)
-{
- lockdep_assert_held(q->queue_lock);
- WARN_ON_ONCE(q->mq_ops);
-
- queue_flag_clear(QUEUE_FLAG_STOPPED, q);
- __blk_run_queue(q);
-}
-EXPORT_SYMBOL(blk_start_queue);
-
-/**
- * blk_stop_queue - stop a queue
- * @q: The &struct request_queue in question
- *
- * Description:
- * The Linux block layer assumes that a block driver will consume all
- * entries on the request queue when the request_fn strategy is called.
- * Often this will not happen, because of hardware limitations (queue
- * depth settings). If a device driver gets a 'queue full' response,
- * or if it simply chooses not to queue more I/O at one point, it can
- * call this function to prevent the request_fn from being called until
- * the driver has signalled it's ready to go again. This happens by calling
- * blk_start_queue() to restart queue operations.
- **/
-void blk_stop_queue(struct request_queue *q)
-{
- lockdep_assert_held(q->queue_lock);
- WARN_ON_ONCE(q->mq_ops);
-
- cancel_delayed_work(&q->delay_work);
- queue_flag_set(QUEUE_FLAG_STOPPED, q);
-}
-EXPORT_SYMBOL(blk_stop_queue);
-
/**
* blk_sync_queue - cancel any pending callbacks on a queue
* @q: the queue
@@ -408,15 +233,13 @@ void blk_sync_queue(struct request_queue *q)
del_timer_sync(&q->timeout);
cancel_work_sync(&q->timeout_work);
- if (q->mq_ops) {
+ if (queue_is_mq(q)) {
struct blk_mq_hw_ctx *hctx;
int i;
cancel_delayed_work_sync(&q->requeue_work);
queue_for_each_hw_ctx(q, hctx, i)
cancel_delayed_work_sync(&hctx->run_work);
- } else {
- cancel_delayed_work_sync(&q->delay_work);
}
}
EXPORT_SYMBOL(blk_sync_queue);
@@ -442,250 +265,12 @@ void blk_clear_pm_only(struct request_queue *q)
}
EXPORT_SYMBOL_GPL(blk_clear_pm_only);
-/**
- * __blk_run_queue_uncond - run a queue whether or not it has been stopped
- * @q: The queue to run
- *
- * Description:
- * Invoke request handling on a queue if there are any pending requests.
- * May be used to restart request handling after a request has completed.
- * This variant runs the queue whether or not the queue has been
- * stopped. Must be called with the queue lock held and interrupts
- * disabled. See also @blk_run_queue.
- */
-inline void __blk_run_queue_uncond(struct request_queue *q)
-{
- lockdep_assert_held(q->queue_lock);
- WARN_ON_ONCE(q->mq_ops);
-
- if (unlikely(blk_queue_dead(q)))
- return;
-
- /*
- * Some request_fn implementations, e.g. scsi_request_fn(), unlock
- * the queue lock internally. As a result multiple threads may be
- * running such a request function concurrently. Keep track of the
- * number of active request_fn invocations such that blk_drain_queue()
- * can wait until all these request_fn calls have finished.
- */
- q->request_fn_active++;
- q->request_fn(q);
- q->request_fn_active--;
-}
-EXPORT_SYMBOL_GPL(__blk_run_queue_uncond);
-
-/**
- * __blk_run_queue - run a single device queue
- * @q: The queue to run
- *
- * Description:
- * See @blk_run_queue.
- */
-void __blk_run_queue(struct request_queue *q)
-{
- lockdep_assert_held(q->queue_lock);
- WARN_ON_ONCE(q->mq_ops);
-
- if (unlikely(blk_queue_stopped(q)))
- return;
-
- __blk_run_queue_uncond(q);
-}
-EXPORT_SYMBOL(__blk_run_queue);
-
-/**
- * blk_run_queue_async - run a single device queue in workqueue context
- * @q: The queue to run
- *
- * Description:
- * Tells kblockd to perform the equivalent of @blk_run_queue on behalf
- * of us.
- *
- * Note:
- * Since it is not allowed to run q->delay_work after blk_cleanup_queue()
- * has canceled q->delay_work, callers must hold the queue lock to avoid
- * race conditions between blk_cleanup_queue() and blk_run_queue_async().
- */
-void blk_run_queue_async(struct request_queue *q)
-{
- lockdep_assert_held(q->queue_lock);
- WARN_ON_ONCE(q->mq_ops);
-
- if (likely(!blk_queue_stopped(q) && !blk_queue_dead(q)))
- mod_delayed_work(kblockd_workqueue, &q->delay_work, 0);
-}
-EXPORT_SYMBOL(blk_run_queue_async);
-
-/**
- * blk_run_queue - run a single device queue
- * @q: The queue to run
- *
- * Description:
- * Invoke request handling on this queue, if it has pending work to do.
- * May be used to restart queueing when a request has completed.
- */
-void blk_run_queue(struct request_queue *q)
-{
- unsigned long flags;
-
- WARN_ON_ONCE(q->mq_ops);
-
- spin_lock_irqsave(q->queue_lock, flags);
- __blk_run_queue(q);
- spin_unlock_irqrestore(q->queue_lock, flags);
-}
-EXPORT_SYMBOL(blk_run_queue);
-
void blk_put_queue(struct request_queue *q)
{
kobject_put(&q->kobj);
}
EXPORT_SYMBOL(blk_put_queue);
-/**
- * __blk_drain_queue - drain requests from request_queue
- * @q: queue to drain
- * @drain_all: whether to drain all requests or only the ones w/ ELVPRIV
- *
- * Drain requests from @q. If @drain_all is set, all requests are drained.
- * If not, only ELVPRIV requests are drained. The caller is responsible
- * for ensuring that no new requests which need to be drained are queued.
- */
-static void __blk_drain_queue(struct request_queue *q, bool drain_all)
- __releases(q->queue_lock)
- __acquires(q->queue_lock)
-{
- int i;
-
- lockdep_assert_held(q->queue_lock);
- WARN_ON_ONCE(q->mq_ops);
-
- while (true) {
- bool drain = false;
-
- /*
- * The caller might be trying to drain @q before its
- * elevator is initialized.
- */
- if (q->elevator)
- elv_drain_elevator(q);
-
- blkcg_drain_queue(q);
-
- /*
- * This function might be called on a queue which failed
- * driver init after queue creation or is not yet fully
- * active yet. Some drivers (e.g. fd and loop) get unhappy
- * in such cases. Kick queue iff dispatch queue has
- * something on it and @q has request_fn set.
- */
- if (!list_empty(&q->queue_head) && q->request_fn)
- __blk_run_queue(q);
-
- drain |= q->nr_rqs_elvpriv;
- drain |= q->request_fn_active;
-
- /*
- * Unfortunately, requests are queued at and tracked from
- * multiple places and there's no single counter which can
- * be drained. Check all the queues and counters.
- */
- if (drain_all) {
- struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL);
- drain |= !list_empty(&q->queue_head);
- for (i = 0; i < 2; i++) {
- drain |= q->nr_rqs[i];
- drain |= q->in_flight[i];
- if (fq)
- drain |= !list_empty(&fq->flush_queue[i]);
- }
- }
-
- if (!drain)
- break;
-
- spin_unlock_irq(q->queue_lock);
-
- msleep(10);
-
- spin_lock_irq(q->queue_lock);
- }
-
- /*
- * With queue marked dead, any woken up waiter will fail the
- * allocation path, so the wakeup chaining is lost and we're
- * left with hung waiters. We need to wake up those waiters.
- */
- if (q->request_fn) {
- struct request_list *rl;
-
- blk_queue_for_each_rl(rl, q)
- for (i = 0; i < ARRAY_SIZE(rl->wait); i++)
- wake_up_all(&rl->wait[i]);
- }
-}
-
-void blk_drain_queue(struct request_queue *q)
-{
- spin_lock_irq(q->queue_lock);
- __blk_drain_queue(q, true);
- spin_unlock_irq(q->queue_lock);
-}
-
-/**
- * blk_queue_bypass_start - enter queue bypass mode
- * @q: queue of interest
- *
- * In bypass mode, only the dispatch FIFO queue of @q is used. This
- * function makes @q enter bypass mode and drains all requests which were
- * throttled or issued before. On return, it's guaranteed that no request
- * is being throttled or has ELVPRIV set and blk_queue_bypass() %true
- * inside queue or RCU read lock.
- */
-void blk_queue_bypass_start(struct request_queue *q)
-{
- WARN_ON_ONCE(q->mq_ops);
-
- spin_lock_irq(q->queue_lock);
- q->bypass_depth++;
- queue_flag_set(QUEUE_FLAG_BYPASS, q);
- spin_unlock_irq(q->queue_lock);
-
- /*
- * Queues start drained. Skip actual draining till init is
- * complete. This avoids lenghty delays during queue init which
- * can happen many times during boot.
- */
- if (blk_queue_init_done(q)) {
- spin_lock_irq(q->queue_lock);
- __blk_drain_queue(q, false);
- spin_unlock_irq(q->queue_lock);
-
- /* ensure blk_queue_bypass() is %true inside RCU read lock */
- synchronize_rcu();
- }
-}
-EXPORT_SYMBOL_GPL(blk_queue_bypass_start);
-
-/**
- * blk_queue_bypass_end - leave queue bypass mode
- * @q: queue of interest
- *
- * Leave bypass mode and restore the normal queueing behavior.
- *
- * Note: although blk_queue_bypass_start() is only called for blk-sq queues,
- * this function is called for both blk-sq and blk-mq queues.
- */
-void blk_queue_bypass_end(struct request_queue *q)
-{
- spin_lock_irq(q->queue_lock);
- if (!--q->bypass_depth)
- queue_flag_clear(QUEUE_FLAG_BYPASS, q);
- WARN_ON_ONCE(q->bypass_depth < 0);
- spin_unlock_irq(q->queue_lock);
-}
-EXPORT_SYMBOL_GPL(blk_queue_bypass_end);
-
void blk_set_queue_dying(struct request_queue *q)
{
blk_queue_flag_set(QUEUE_FLAG_DYING, q);
@@ -697,20 +282,8 @@ void blk_set_queue_dying(struct request_queue *q)
*/
blk_freeze_queue_start(q);
- if (q->mq_ops)
+ if (queue_is_mq(q))
blk_mq_wake_waiters(q);
- else {
- struct request_list *rl;
-
- spin_lock_irq(q->queue_lock);
- blk_queue_for_each_rl(rl, q) {
- if (rl->rq_pool) {
- wake_up_all(&rl->wait[BLK_RW_SYNC]);
- wake_up_all(&rl->wait[BLK_RW_ASYNC]);
- }
- }
- spin_unlock_irq(q->queue_lock);
- }
/* Make blk_queue_enter() reexamine the DYING flag. */
wake_up_all(&q->mq_freeze_wq);
@@ -755,29 +328,13 @@ void blk_exit_queue(struct request_queue *q)
*/
void blk_cleanup_queue(struct request_queue *q)
{
- spinlock_t *lock = q->queue_lock;
-
/* mark @q DYING, no new request or merges will be allowed afterwards */
mutex_lock(&q->sysfs_lock);
blk_set_queue_dying(q);
- spin_lock_irq(lock);
-
- /*
- * A dying queue is permanently in bypass mode till released. Note
- * that, unlike blk_queue_bypass_start(), we aren't performing
- * synchronize_rcu() after entering bypass mode to avoid the delay
- * as some drivers create and destroy a lot of queues while
- * probing. This is still safe because blk_release_queue() will be
- * called only after the queue refcnt drops to zero and nothing,
- * RCU or not, would be traversing the queue by then.
- */
- q->bypass_depth++;
- queue_flag_set(QUEUE_FLAG_BYPASS, q);
- queue_flag_set(QUEUE_FLAG_NOMERGES, q);
- queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
- queue_flag_set(QUEUE_FLAG_DYING, q);
- spin_unlock_irq(lock);
+ blk_queue_flag_set(QUEUE_FLAG_NOMERGES, q);
+ blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
+ blk_queue_flag_set(QUEUE_FLAG_DYING, q);
mutex_unlock(&q->sysfs_lock);
/*
@@ -788,9 +345,7 @@ void blk_cleanup_queue(struct request_queue *q)
rq_qos_exit(q);
- spin_lock_irq(lock);
- queue_flag_set(QUEUE_FLAG_DEAD, q);
- spin_unlock_irq(lock);
+ blk_queue_flag_set(QUEUE_FLAG_DEAD, q);
/*
* make sure all in-progress dispatch are completed because
@@ -801,7 +356,7 @@ void blk_cleanup_queue(struct request_queue *q)
* We rely on driver to deal with the race in case that queue
* initialization isn't done.
*/
- if (q->mq_ops && blk_queue_init_done(q))
+ if (queue_is_mq(q) && blk_queue_init_done(q))
blk_mq_quiesce_queue(q);
/* for synchronous bio-based driver finish in-flight integrity i/o */
@@ -819,98 +374,19 @@ void blk_cleanup_queue(struct request_queue *q)
blk_exit_queue(q);
- if (q->mq_ops)
+ if (queue_is_mq(q))
blk_mq_free_queue(q);
- percpu_ref_exit(&q->q_usage_counter);
- spin_lock_irq(lock);
- if (q->queue_lock != &q->__queue_lock)
- q->queue_lock = &q->__queue_lock;
- spin_unlock_irq(lock);
+ percpu_ref_exit(&q->q_usage_counter);
/* @q is and will stay empty, shutdown and put */
blk_put_queue(q);
}
EXPORT_SYMBOL(blk_cleanup_queue);
-/* Allocate memory local to the request queue */
-static void *alloc_request_simple(gfp_t gfp_mask, void *data)
-{
- struct request_queue *q = data;
-
- return kmem_cache_alloc_node(request_cachep, gfp_mask, q->node);
-}
-
-static void free_request_simple(void *element, void *data)
-{
- kmem_cache_free(request_cachep, element);
-}
-
-static void *alloc_request_size(gfp_t gfp_mask, void *data)
-{
- struct request_queue *q = data;
- struct request *rq;
-
- rq = kmalloc_node(sizeof(struct request) + q->cmd_size, gfp_mask,
- q->node);
- if (rq && q->init_rq_fn && q->init_rq_fn(q, rq, gfp_mask) < 0) {
- kfree(rq);
- rq = NULL;
- }
- return rq;
-}
-
-static void free_request_size(void *element, void *data)
-{
- struct request_queue *q = data;
-
- if (q->exit_rq_fn)
- q->exit_rq_fn(q, element);
- kfree(element);
-}
-
-int blk_init_rl(struct request_list *rl, struct request_queue *q,
- gfp_t gfp_mask)
-{
- if (unlikely(rl->rq_pool) || q->mq_ops)
- return 0;
-
- rl->q = q;
- rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0;
- rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0;
- init_waitqueue_head(&rl->wait[BLK_RW_SYNC]);
- init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]);
-
- if (q->cmd_size) {
- rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ,
- alloc_request_size, free_request_size,
- q, gfp_mask, q->node);
- } else {
- rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ,
- alloc_request_simple, free_request_simple,
- q, gfp_mask, q->node);
- }
- if (!rl->rq_pool)
- return -ENOMEM;
-
- if (rl != &q->root_rl)
- WARN_ON_ONCE(!blk_get_queue(q));
-
- return 0;
-}
-
-void blk_exit_rl(struct request_queue *q, struct request_list *rl)
-{
- if (rl->rq_pool) {
- mempool_destroy(rl->rq_pool);
- if (rl != &q->root_rl)
- blk_put_queue(q);
- }
-}
-
struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
{
- return blk_alloc_queue_node(gfp_mask, NUMA_NO_NODE, NULL);
+ return blk_alloc_queue_node(gfp_mask, NUMA_NO_NODE);
}
EXPORT_SYMBOL(blk_alloc_queue);
@@ -990,17 +466,8 @@ static void blk_rq_timed_out_timer(struct timer_list *t)
* blk_alloc_queue_node - allocate a request queue
* @gfp_mask: memory allocation flags
* @node_id: NUMA node to allocate memory from
- * @lock: For legacy queues, pointer to a spinlock that will be used to e.g.
- * serialize calls to the legacy .request_fn() callback. Ignored for
- * blk-mq request queues.
- *
- * Note: pass the queue lock as the third argument to this function instead of
- * setting the queue lock pointer explicitly to avoid triggering a sporadic
- * crash in the blkcg code. This function namely calls blkcg_init_queue() and
- * the queue lock pointer must be set before blkcg_init_queue() is called.
*/
-struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id,
- spinlock_t *lock)
+struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
{
struct request_queue *q;
int ret;
@@ -1012,8 +479,6 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id,
INIT_LIST_HEAD(&q->queue_head);
q->last_merge = NULL;
- q->end_sector = 0;
- q->boundary_rq = NULL;
q->id = ida_simple_get(&blk_queue_ida, 0, 0, gfp_mask);
if (q->id < 0)
@@ -1041,12 +506,10 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id,
laptop_mode_timer_fn, 0);
timer_setup(&q->timeout, blk_rq_timed_out_timer, 0);
INIT_WORK(&q->timeout_work, NULL);
- INIT_LIST_HEAD(&q->timeout_list);
INIT_LIST_HEAD(&q->icq_list);
#ifdef CONFIG_BLK_CGROUP
INIT_LIST_HEAD(&q->blkg_list);
#endif
- INIT_DELAYED_WORK(&q->delay_work, blk_delay_work);
kobject_init(&q->kobj, &blk_queue_ktype);
@@ -1054,18 +517,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id,
mutex_init(&q->blk_trace_mutex);
#endif
mutex_init(&q->sysfs_lock);
- spin_lock_init(&q->__queue_lock);
-
- q->queue_lock = lock ? : &q->__queue_lock;
-
- /*
- * A queue starts its life with bypass turned on to avoid
- * unnecessary bypass on/off overhead and nasty surprises during
- * init. The initial bypass will be finished when the queue is
- * registered by blk_register_queue().
- */
- q->bypass_depth = 1;
- queue_flag_set_unlocked(QUEUE_FLAG_BYPASS, q);
+ spin_lock_init(&q->queue_lock);
init_waitqueue_head(&q->mq_freeze_wq);
@@ -1099,105 +551,6 @@ fail_q:
}
EXPORT_SYMBOL(blk_alloc_queue_node);
-/**
- * blk_init_queue - prepare a request queue for use with a block device
- * @rfn: The function to be called to process requests that have been
- * placed on the queue.
- * @lock: Request queue spin lock
- *
- * Description:
- * If a block device wishes to use the standard request handling procedures,
- * which sorts requests and coalesces adjacent requests, then it must
- * call blk_init_queue(). The function @rfn will be called when there
- * are requests on the queue that need to be processed. If the device
- * supports plugging, then @rfn may not be called immediately when requests
- * are available on the queue, but may be called at some time later instead.
- * Plugged queues are generally unplugged when a buffer belonging to one
- * of the requests on the queue is needed, or due to memory pressure.
- *
- * @rfn is not required, or even expected, to remove all requests off the
- * queue, but only as many as it can handle at a time. If it does leave
- * requests on the queue, it is responsible for arranging that the requests
- * get dealt with eventually.
- *
- * The queue spin lock must be held while manipulating the requests on the
- * request queue; this lock will be taken also from interrupt context, so irq
- * disabling is needed for it.
- *
- * Function returns a pointer to the initialized request queue, or %NULL if
- * it didn't succeed.
- *
- * Note:
- * blk_init_queue() must be paired with a blk_cleanup_queue() call
- * when the block device is deactivated (such as at module unload).
- **/
-
-struct request_queue *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock)
-{
- return blk_init_queue_node(rfn, lock, NUMA_NO_NODE);
-}
-EXPORT_SYMBOL(blk_init_queue);
-
-struct request_queue *
-blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
-{
- struct request_queue *q;
-
- q = blk_alloc_queue_node(GFP_KERNEL, node_id, lock);
- if (!q)
- return NULL;
-
- q->request_fn = rfn;
- if (blk_init_allocated_queue(q) < 0) {
- blk_cleanup_queue(q);
- return NULL;
- }
-
- return q;
-}
-EXPORT_SYMBOL(blk_init_queue_node);
-
-static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio);
-
-
-int blk_init_allocated_queue(struct request_queue *q)
-{
- WARN_ON_ONCE(q->mq_ops);
-
- q->fq = blk_alloc_flush_queue(q, NUMA_NO_NODE, q->cmd_size, GFP_KERNEL);
- if (!q->fq)
- return -ENOMEM;
-
- if (q->init_rq_fn && q->init_rq_fn(q, q->fq->flush_rq, GFP_KERNEL))
- goto out_free_flush_queue;
-
- if (blk_init_rl(&q->root_rl, q, GFP_KERNEL))
- goto out_exit_flush_rq;
-
- INIT_WORK(&q->timeout_work, blk_timeout_work);
- q->queue_flags |= QUEUE_FLAG_DEFAULT;
-
- /*
- * This also sets hw/phys segments, boundary and size
- */
- blk_queue_make_request(q, blk_queue_bio);
-
- q->sg_reserved_size = INT_MAX;
-
- if (elevator_init(q))
- goto out_exit_flush_rq;
- return 0;
-
-out_exit_flush_rq:
- if (q->exit_rq_fn)
- q->exit_rq_fn(q, q->fq->flush_rq);
-out_free_flush_queue:
- blk_free_flush_queue(q->fq);
- q->fq = NULL;
- return -ENOMEM;
-}
-EXPORT_SYMBOL(blk_init_allocated_queue);
-
bool blk_get_queue(struct request_queue *q)
{
if (likely(!blk_queue_dying(q))) {
@@ -1209,406 +562,6 @@ bool blk_get_queue(struct request_queue *q)
}
EXPORT_SYMBOL(blk_get_queue);
-static inline void blk_free_request(struct request_list *rl, struct request *rq)
-{
- if (rq->rq_flags & RQF_ELVPRIV) {
- elv_put_request(rl->q, rq);
- if (rq->elv.icq)
- put_io_context(rq->elv.icq->ioc);
- }
-
- mempool_free(rq, rl->rq_pool);
-}
-
-/*
- * ioc_batching returns true if the ioc is a valid batching request and
- * should be given priority access to a request.
- */
-static inline int ioc_batching(struct request_queue *q, struct io_context *ioc)
-{
- if (!ioc)
- return 0;
-
- /*
- * Make sure the process is able to allocate at least 1 request
- * even if the batch times out, otherwise we could theoretically
- * lose wakeups.
- */
- return ioc->nr_batch_requests == q->nr_batching ||
- (ioc->nr_batch_requests > 0
- && time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME));
-}
-
-/*
- * ioc_set_batching sets ioc to be a new "batcher" if it is not one. This
- * will cause the process to be a "batcher" on all queues in the system. This
- * is the behaviour we want though - once it gets a wakeup it should be given
- * a nice run.
- */
-static void ioc_set_batching(struct request_queue *q, struct io_context *ioc)
-{
- if (!ioc || ioc_batching(q, ioc))
- return;
-
- ioc->nr_batch_requests = q->nr_batching;
- ioc->last_waited = jiffies;
-}
-
-static void __freed_request(struct request_list *rl, int sync)
-{
- struct request_queue *q = rl->q;
-
- if (rl->count[sync] < queue_congestion_off_threshold(q))
- blk_clear_congested(rl, sync);
-
- if (rl->count[sync] + 1 <= q->nr_requests) {
- if (waitqueue_active(&rl->wait[sync]))
- wake_up(&rl->wait[sync]);
-
- blk_clear_rl_full(rl, sync);
- }
-}
-
-/*
- * A request has just been released. Account for it, update the full and
- * congestion status, wake up any waiters. Called under q->queue_lock.
- */
-static void freed_request(struct request_list *rl, bool sync,
- req_flags_t rq_flags)
-{
- struct request_queue *q = rl->q;
-
- q->nr_rqs[sync]--;
- rl->count[sync]--;
- if (rq_flags & RQF_ELVPRIV)
- q->nr_rqs_elvpriv--;
-
- __freed_request(rl, sync);
-
- if (unlikely(rl->starved[sync ^ 1]))
- __freed_request(rl, sync ^ 1);
-}
-
-int blk_update_nr_requests(struct request_queue *q, unsigned int nr)
-{
- struct request_list *rl;
- int on_thresh, off_thresh;
-
- WARN_ON_ONCE(q->mq_ops);
-
- spin_lock_irq(q->queue_lock);
- q->nr_requests = nr;
- blk_queue_congestion_threshold(q);
- on_thresh = queue_congestion_on_threshold(q);
- off_thresh = queue_congestion_off_threshold(q);
-
- blk_queue_for_each_rl(rl, q) {
- if (rl->count[BLK_RW_SYNC] >= on_thresh)
- blk_set_congested(rl, BLK_RW_SYNC);
- else if (rl->count[BLK_RW_SYNC] < off_thresh)
- blk_clear_congested(rl, BLK_RW_SYNC);
-
- if (rl->count[BLK_RW_ASYNC] >= on_thresh)
- blk_set_congested(rl, BLK_RW_ASYNC);
- else if (rl->count[BLK_RW_ASYNC] < off_thresh)
- blk_clear_congested(rl, BLK_RW_ASYNC);
-
- if (rl->count[BLK_RW_SYNC] >= q->nr_requests) {
- blk_set_rl_full(rl, BLK_RW_SYNC);
- } else {
- blk_clear_rl_full(rl, BLK_RW_SYNC);
- wake_up(&rl->wait[BLK_RW_SYNC]);
- }
-
- if (rl->count[BLK_RW_ASYNC] >= q->nr_requests) {
- blk_set_rl_full(rl, BLK_RW_ASYNC);
- } else {
- blk_clear_rl_full(rl, BLK_RW_ASYNC);
- wake_up(&rl->wait[BLK_RW_ASYNC]);
- }
- }
-
- spin_unlock_irq(q->queue_lock);
- return 0;
-}
-
-/**
- * __get_request - get a free request
- * @rl: request list to allocate from
- * @op: operation and flags
- * @bio: bio to allocate request for (can be %NULL)
- * @flags: BLQ_MQ_REQ_* flags
- * @gfp_mask: allocator flags
- *
- * Get a free request from @q. This function may fail under memory
- * pressure or if @q is dead.
- *
- * Must be called with @q->queue_lock held and,
- * Returns ERR_PTR on failure, with @q->queue_lock held.
- * Returns request pointer on success, with @q->queue_lock *not held*.
- */
-static struct request *__get_request(struct request_list *rl, unsigned int op,
- struct bio *bio, blk_mq_req_flags_t flags, gfp_t gfp_mask)
-{
- struct request_queue *q = rl->q;
- struct request *rq;
- struct elevator_type *et = q->elevator->type;
- struct io_context *ioc = rq_ioc(bio);
- struct io_cq *icq = NULL;
- const bool is_sync = op_is_sync(op);
- int may_queue;
- req_flags_t rq_flags = RQF_ALLOCED;
-
- lockdep_assert_held(q->queue_lock);
-
- if (unlikely(blk_queue_dying(q)))
- return ERR_PTR(-ENODEV);
-
- may_queue = elv_may_queue(q, op);
- if (may_queue == ELV_MQUEUE_NO)
- goto rq_starved;
-
- if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) {
- if (rl->count[is_sync]+1 >= q->nr_requests) {
- /*
- * The queue will fill after this allocation, so set
- * it as full, and mark this process as "batching".
- * This process will be allowed to complete a batch of
- * requests, others will be blocked.
- */
- if (!blk_rl_full(rl, is_sync)) {
- ioc_set_batching(q, ioc);
- blk_set_rl_full(rl, is_sync);
- } else {
- if (may_queue != ELV_MQUEUE_MUST
- && !ioc_batching(q, ioc)) {
- /*
- * The queue is full and the allocating
- * process is not a "batcher", and not
- * exempted by the IO scheduler
- */
- return ERR_PTR(-ENOMEM);
- }
- }
- }
- blk_set_congested(rl, is_sync);
- }
-
- /*
- * Only allow batching queuers to allocate up to 50% over the defined
- * limit of requests, otherwise we could have thousands of requests
- * allocated with any setting of ->nr_requests
- */
- if (rl->count[is_sync] >= (3 * q->nr_requests / 2))
- return ERR_PTR(-ENOMEM);
-
- q->nr_rqs[is_sync]++;
- rl->count[is_sync]++;
- rl->starved[is_sync] = 0;
-
- /*
- * Decide whether the new request will be managed by elevator. If
- * so, mark @rq_flags and increment elvpriv. Non-zero elvpriv will
- * prevent the current elevator from being destroyed until the new
- * request is freed. This guarantees icq's won't be destroyed and
- * makes creating new ones safe.
- *
- * Flush requests do not use the elevator so skip initialization.
- * This allows a request to share the flush and elevator data.
- *
- * Also, lookup icq while holding queue_lock. If it doesn't exist,
- * it will be created after releasing queue_lock.
- */
- if (!op_is_flush(op) && !blk_queue_bypass(q)) {
- rq_flags |= RQF_ELVPRIV;
- q->nr_rqs_elvpriv++;
- if (et->icq_cache && ioc)
- icq = ioc_lookup_icq(ioc, q);
- }
-
- if (blk_queue_io_stat(q))
- rq_flags |= RQF_IO_STAT;
- spin_unlock_irq(q->queue_lock);
-
- /* allocate and init request */
- rq = mempool_alloc(rl->rq_pool, gfp_mask);
- if (!rq)
- goto fail_alloc;
-
- blk_rq_init(q, rq);
- blk_rq_set_rl(rq, rl);
- rq->cmd_flags = op;
- rq->rq_flags = rq_flags;
- if (flags & BLK_MQ_REQ_PREEMPT)
- rq->rq_flags |= RQF_PREEMPT;
-
- /* init elvpriv */
- if (rq_flags & RQF_ELVPRIV) {
- if (unlikely(et->icq_cache && !icq)) {
- if (ioc)
- icq = ioc_create_icq(ioc, q, gfp_mask);
- if (!icq)
- goto fail_elvpriv;
- }
-
- rq->elv.icq = icq;
- if (unlikely(elv_set_request(q, rq, bio, gfp_mask)))
- goto fail_elvpriv;
-
- /* @rq->elv.icq holds io_context until @rq is freed */
- if (icq)
- get_io_context(icq->ioc);
- }
-out:
- /*
- * ioc may be NULL here, and ioc_batching will be false. That's
- * OK, if the queue is under the request limit then requests need
- * not count toward the nr_batch_requests limit. There will always
- * be some limit enforced by BLK_BATCH_TIME.
- */
- if (ioc_batching(q, ioc))
- ioc->nr_batch_requests--;
-
- trace_block_getrq(q, bio, op);
- return rq;
-
-fail_elvpriv:
- /*
- * elvpriv init failed. ioc, icq and elvpriv aren't mempool backed
- * and may fail indefinitely under memory pressure and thus
- * shouldn't stall IO. Treat this request as !elvpriv. This will
- * disturb iosched and blkcg but weird is bettern than dead.
- */
- printk_ratelimited(KERN_WARNING "%s: dev %s: request aux data allocation failed, iosched may be disturbed\n",
- __func__, dev_name(q->backing_dev_info->dev));
-
- rq->rq_flags &= ~RQF_ELVPRIV;
- rq->elv.icq = NULL;
-
- spin_lock_irq(q->queue_lock);
- q->nr_rqs_elvpriv--;
- spin_unlock_irq(q->queue_lock);
- goto out;
-
-fail_alloc:
- /*
- * Allocation failed presumably due to memory. Undo anything we
- * might have messed up.
- *
- * Allocating task should really be put onto the front of the wait
- * queue, but this is pretty rare.
- */
- spin_lock_irq(q->queue_lock);
- freed_request(rl, is_sync, rq_flags);
-
- /*
- * in the very unlikely event that allocation failed and no
- * requests for this direction was pending, mark us starved so that
- * freeing of a request in the other direction will notice
- * us. another possible fix would be to split the rq mempool into
- * READ and WRITE
- */
-rq_starved:
- if (unlikely(rl->count[is_sync] == 0))
- rl->starved[is_sync] = 1;
- return ERR_PTR(-ENOMEM);
-}
-
-/**
- * get_request - get a free request
- * @q: request_queue to allocate request from
- * @op: operation and flags
- * @bio: bio to allocate request for (can be %NULL)
- * @flags: BLK_MQ_REQ_* flags.
- * @gfp: allocator flags
- *
- * Get a free request from @q. If %BLK_MQ_REQ_NOWAIT is set in @flags,
- * this function keeps retrying under memory pressure and fails iff @q is dead.
- *
- * Must be called with @q->queue_lock held and,
- * Returns ERR_PTR on failure, with @q->queue_lock held.
- * Returns request pointer on success, with @q->queue_lock *not held*.
- */
-static struct request *get_request(struct request_queue *q, unsigned int op,
- struct bio *bio, blk_mq_req_flags_t flags, gfp_t gfp)
-{
- const bool is_sync = op_is_sync(op);
- DEFINE_WAIT(wait);
- struct request_list *rl;
- struct request *rq;
-
- lockdep_assert_held(q->queue_lock);
- WARN_ON_ONCE(q->mq_ops);
-
- rl = blk_get_rl(q, bio); /* transferred to @rq on success */
-retry:
- rq = __get_request(rl, op, bio, flags, gfp);
- if (!IS_ERR(rq))
- return rq;
-
- if (op & REQ_NOWAIT) {
- blk_put_rl(rl);
- return ERR_PTR(-EAGAIN);
- }
-
- if ((flags & BLK_MQ_REQ_NOWAIT) || unlikely(blk_queue_dying(q))) {
- blk_put_rl(rl);
- return rq;
- }
-
- /* wait on @rl and retry */
- prepare_to_wait_exclusive(&rl->wait[is_sync], &wait,
- TASK_UNINTERRUPTIBLE);
-
- trace_block_sleeprq(q, bio, op);
-
- spin_unlock_irq(q->queue_lock);
- io_schedule();
-
- /*
- * After sleeping, we become a "batching" process and will be able
- * to allocate at least one request, and up to a big batch of them
- * for a small period time. See ioc_batching, ioc_set_batching
- */
- ioc_set_batching(q, current->io_context);
-
- spin_lock_irq(q->queue_lock);
- finish_wait(&rl->wait[is_sync], &wait);
-
- goto retry;
-}
-
-/* flags: BLK_MQ_REQ_PREEMPT and/or BLK_MQ_REQ_NOWAIT. */
-static struct request *blk_old_get_request(struct request_queue *q,
- unsigned int op, blk_mq_req_flags_t flags)
-{
- struct request *rq;
- gfp_t gfp_mask = flags & BLK_MQ_REQ_NOWAIT ? GFP_ATOMIC : GFP_NOIO;
- int ret = 0;
-
- WARN_ON_ONCE(q->mq_ops);
-
- /* create ioc upfront */
- create_io_context(gfp_mask, q->node);
-
- ret = blk_queue_enter(q, flags);
- if (ret)
- return ERR_PTR(ret);
- spin_lock_irq(q->queue_lock);
- rq = get_request(q, op, NULL, flags, gfp_mask);
- if (IS_ERR(rq)) {
- spin_unlock_irq(q->queue_lock);
- blk_queue_exit(q);
- return rq;
- }
-
- /* q->queue_lock is unlocked at this point */
- rq->__data_len = 0;
- rq->__sector = (sector_t) -1;
- rq->bio = rq->biotail = NULL;
- return rq;
-}
-
/**
* blk_get_request - allocate a request
* @q: request queue to allocate a request for
@@ -1623,170 +576,17 @@ struct request *blk_get_request(struct request_queue *q, unsigned int op,
WARN_ON_ONCE(op & REQ_NOWAIT);
WARN_ON_ONCE(flags & ~(BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_PREEMPT));
- if (q->mq_ops) {
- req = blk_mq_alloc_request(q, op, flags);
- if (!IS_ERR(req) && q->mq_ops->initialize_rq_fn)
- q->mq_ops->initialize_rq_fn(req);
- } else {
- req = blk_old_get_request(q, op, flags);
- if (!IS_ERR(req) && q->initialize_rq_fn)
- q->initialize_rq_fn(req);
- }
+ req = blk_mq_alloc_request(q, op, flags);
+ if (!IS_ERR(req) && q->mq_ops->initialize_rq_fn)
+ q->mq_ops->initialize_rq_fn(req);
return req;
}
EXPORT_SYMBOL(blk_get_request);
-/**
- * blk_requeue_request - put a request back on queue
- * @q: request queue where request should be inserted
- * @rq: request to be inserted
- *
- * Description:
- * Drivers often keep queueing requests until the hardware cannot accept
- * more, when that condition happens we need to put the request back
- * on the queue. Must be called with queue lock held.
- */
-void blk_requeue_request(struct request_queue *q, struct request *rq)
-{
- lockdep_assert_held(q->queue_lock);
- WARN_ON_ONCE(q->mq_ops);
-
- blk_delete_timer(rq);
- blk_clear_rq_complete(rq);
- trace_block_rq_requeue(q, rq);
- rq_qos_requeue(q, rq);
-
- if (rq->rq_flags & RQF_QUEUED)
- blk_queue_end_tag(q, rq);
-
- BUG_ON(blk_queued_rq(rq));
-
- elv_requeue_request(q, rq);
-}
-EXPORT_SYMBOL(blk_requeue_request);
-
-static void add_acct_request(struct request_queue *q, struct request *rq,
- int where)
-{
- blk_account_io_start(rq, true);
- __elv_add_request(q, rq, where);
-}
-
-static void part_round_stats_single(struct request_queue *q, int cpu,
- struct hd_struct *part, unsigned long now,
- unsigned int inflight)
-{
- if (inflight) {
- __part_stat_add(cpu, part, time_in_queue,
- inflight * (now - part->stamp));
- __part_stat_add(cpu, part, io_ticks, (now - part->stamp));
- }
- part->stamp = now;
-}
-
-/**
- * part_round_stats() - Round off the performance stats on a struct disk_stats.
- * @q: target block queue
- * @cpu: cpu number for stats access
- * @part: target partition
- *
- * The average IO queue length and utilisation statistics are maintained
- * by observing the current state of the queue length and the amount of
- * time it has been in this state for.
- *
- * Normally, that accounting is done on IO completion, but that can result
- * in more than a second's worth of IO being accounted for within any one
- * second, leading to >100% utilisation. To deal with that, we call this
- * function to do a round-off before returning the results when reading
- * /proc/diskstats. This accounts immediately for all queue usage up to
- * the current jiffies and restarts the counters again.
- */
-void part_round_stats(struct request_queue *q, int cpu, struct hd_struct *part)
-{
- struct hd_struct *part2 = NULL;
- unsigned long now = jiffies;
- unsigned int inflight[2];
- int stats = 0;
-
- if (part->stamp != now)
- stats |= 1;
-
- if (part->partno) {
- part2 = &part_to_disk(part)->part0;
- if (part2->stamp != now)
- stats |= 2;
- }
-
- if (!stats)
- return;
-
- part_in_flight(q, part, inflight);
-
- if (stats & 2)
- part_round_stats_single(q, cpu, part2, now, inflight[1]);
- if (stats & 1)
- part_round_stats_single(q, cpu, part, now, inflight[0]);
-}
-EXPORT_SYMBOL_GPL(part_round_stats);
-
-void __blk_put_request(struct request_queue *q, struct request *req)
-{
- req_flags_t rq_flags = req->rq_flags;
-
- if (unlikely(!q))
- return;
-
- if (q->mq_ops) {
- blk_mq_free_request(req);
- return;
- }
-
- lockdep_assert_held(q->queue_lock);
-
- blk_req_zone_write_unlock(req);
- blk_pm_put_request(req);
- blk_pm_mark_last_busy(req);
-
- elv_completed_request(q, req);
-
- /* this is a bio leak */
- WARN_ON(req->bio != NULL);
-
- rq_qos_done(q, req);
-
- /*
- * Request may not have originated from ll_rw_blk. if not,
- * it didn't come out of our reserved rq pools
- */
- if (rq_flags & RQF_ALLOCED) {
- struct request_list *rl = blk_rq_rl(req);
- bool sync = op_is_sync(req->cmd_flags);
-
- BUG_ON(!list_empty(&req->queuelist));
- BUG_ON(ELV_ON_HASH(req));
-
- blk_free_request(rl, req);
- freed_request(rl, sync, rq_flags);
- blk_put_rl(rl);
- blk_queue_exit(q);
- }
-}
-EXPORT_SYMBOL_GPL(__blk_put_request);
-
void blk_put_request(struct request *req)
{
- struct request_queue *q = req->q;
-
- if (q->mq_ops)
- blk_mq_free_request(req);
- else {
- unsigned long flags;
-
- spin_lock_irqsave(q->queue_lock, flags);
- __blk_put_request(q, req);
- spin_unlock_irqrestore(q->queue_lock, flags);
- }
+ blk_mq_free_request(req);
}
EXPORT_SYMBOL(blk_put_request);
@@ -1806,7 +606,6 @@ bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
req->biotail->bi_next = bio;
req->biotail = bio;
req->__data_len += bio->bi_iter.bi_size;
- req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
blk_account_io_start(req, false);
return true;
@@ -1830,7 +629,6 @@ bool bio_attempt_front_merge(struct request_queue *q, struct request *req,
req->__sector = bio->bi_iter.bi_sector;
req->__data_len += bio->bi_iter.bi_size;
- req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
blk_account_io_start(req, false);
return true;
@@ -1850,7 +648,6 @@ bool bio_attempt_discard_merge(struct request_queue *q, struct request *req,
req->biotail->bi_next = bio;
req->biotail = bio;
req->__data_len += bio->bi_iter.bi_size;
- req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
req->nr_phys_segments = segments + 1;
blk_account_io_start(req, false);
@@ -1883,7 +680,6 @@ no_merge:
* Caller must ensure !blk_queue_nomerges(q) beforehand.
*/
bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
- unsigned int *request_count,
struct request **same_queue_rq)
{
struct blk_plug *plug;
@@ -1893,25 +689,19 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
plug = current->plug;
if (!plug)
return false;
- *request_count = 0;
- if (q->mq_ops)
- plug_list = &plug->mq_list;
- else
- plug_list = &plug->list;
+ plug_list = &plug->mq_list;
list_for_each_entry_reverse(rq, plug_list, queuelist) {
bool merged = false;
- if (rq->q == q) {
- (*request_count)++;
+ if (rq->q == q && same_queue_rq) {
/*
* Only blk-mq multiple hardware queues case checks the
* rq in the same queue, there should be only one such
* rq in a queue
**/
- if (same_queue_rq)
- *same_queue_rq = rq;
+ *same_queue_rq = rq;
}
if (rq->q != q || !blk_rq_merge_ok(rq, bio))
@@ -1938,176 +728,18 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
return false;
}
-unsigned int blk_plug_queued_count(struct request_queue *q)
-{
- struct blk_plug *plug;
- struct request *rq;
- struct list_head *plug_list;
- unsigned int ret = 0;
-
- plug = current->plug;
- if (!plug)
- goto out;
-
- if (q->mq_ops)
- plug_list = &plug->mq_list;
- else
- plug_list = &plug->list;
-
- list_for_each_entry(rq, plug_list, queuelist) {
- if (rq->q == q)
- ret++;
- }
-out:
- return ret;
-}
-
void blk_init_request_from_bio(struct request *req, struct bio *bio)
{
- struct io_context *ioc = rq_ioc(bio);
-
if (bio->bi_opf & REQ_RAHEAD)
req->cmd_flags |= REQ_FAILFAST_MASK;
req->__sector = bio->bi_iter.bi_sector;
- if (ioprio_valid(bio_prio(bio)))
- req->ioprio = bio_prio(bio);
- else if (ioc)
- req->ioprio = ioc->ioprio;
- else
- req->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0);
+ req->ioprio = bio_prio(bio);
req->write_hint = bio->bi_write_hint;
blk_rq_bio_prep(req->q, req, bio);
}
EXPORT_SYMBOL_GPL(blk_init_request_from_bio);
-static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
-{
- struct blk_plug *plug;
- int where = ELEVATOR_INSERT_SORT;
- struct request *req, *free;
- unsigned int request_count = 0;
-
- /*
- * low level driver can indicate that it wants pages above a
- * certain limit bounced to low memory (ie for highmem, or even
- * ISA dma in theory)
- */
- blk_queue_bounce(q, &bio);
-
- blk_queue_split(q, &bio);
-
- if (!bio_integrity_prep(bio))
- return BLK_QC_T_NONE;
-
- if (op_is_flush(bio->bi_opf)) {
- spin_lock_irq(q->queue_lock);
- where = ELEVATOR_INSERT_FLUSH;
- goto get_rq;
- }
-
- /*
- * Check if we can merge with the plugged list before grabbing
- * any locks.
- */
- if (!blk_queue_nomerges(q)) {
- if (blk_attempt_plug_merge(q, bio, &request_count, NULL))
- return BLK_QC_T_NONE;
- } else
- request_count = blk_plug_queued_count(q);
-
- spin_lock_irq(q->queue_lock);
-
- switch (elv_merge(q, &req, bio)) {
- case ELEVATOR_BACK_MERGE:
- if (!bio_attempt_back_merge(q, req, bio))
- break;
- elv_bio_merged(q, req, bio);
- free = attempt_back_merge(q, req);
- if (free)
- __blk_put_request(q, free);
- else
- elv_merged_request(q, req, ELEVATOR_BACK_MERGE);
- goto out_unlock;
- case ELEVATOR_FRONT_MERGE:
- if (!bio_attempt_front_merge(q, req, bio))
- break;
- elv_bio_merged(q, req, bio);
- free = attempt_front_merge(q, req);
- if (free)
- __blk_put_request(q, free);
- else
- elv_merged_request(q, req, ELEVATOR_FRONT_MERGE);
- goto out_unlock;
- default:
- break;
- }
-
-get_rq:
- rq_qos_throttle(q, bio, q->queue_lock);
-
- /*
- * Grab a free request. This is might sleep but can not fail.
- * Returns with the queue unlocked.
- */
- blk_queue_enter_live(q);
- req = get_request(q, bio->bi_opf, bio, 0, GFP_NOIO);
- if (IS_ERR(req)) {
- blk_queue_exit(q);
- rq_qos_cleanup(q, bio);
- if (PTR_ERR(req) == -ENOMEM)
- bio->bi_status = BLK_STS_RESOURCE;
- else
- bio->bi_status = BLK_STS_IOERR;
- bio_endio(bio);
- goto out_unlock;
- }
-
- rq_qos_track(q, req, bio);
-
- /*
- * After dropping the lock and possibly sleeping here, our request
- * may now be mergeable after it had proven unmergeable (above).
- * We don't worry about that case for efficiency. It won't happen
- * often, and the elevators are able to handle it.
- */
- blk_init_request_from_bio(req, bio);
-
- if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags))
- req->cpu = raw_smp_processor_id();
-
- plug = current->plug;
- if (plug) {
- /*
- * If this is the first request added after a plug, fire
- * of a plug trace.
- *
- * @request_count may become stale because of schedule
- * out, so check plug list again.
- */
- if (!request_count || list_empty(&plug->list))
- trace_block_plug(q);
- else {
- struct request *last = list_entry_rq(plug->list.prev);
- if (request_count >= BLK_MAX_REQUEST_COUNT ||
- blk_rq_bytes(last) >= BLK_PLUG_FLUSH_SIZE) {
- blk_flush_plug_list(plug, false);
- trace_block_plug(q);
- }
- }
- list_add_tail(&req->queuelist, &plug->list);
- blk_account_io_start(req, true);
- } else {
- spin_lock_irq(q->queue_lock);
- add_acct_request(q, req, where);
- __blk_run_queue(q);
-out_unlock:
- spin_unlock_irq(q->queue_lock);
- }
-
- return BLK_QC_T_NONE;
-}
-
static void handle_bad_sector(struct bio *bio, sector_t maxsector)
{
char b[BDEVNAME_SIZE];
@@ -2259,7 +891,7 @@ generic_make_request_checks(struct bio *bio)
* For a REQ_NOWAIT based request, return -EOPNOTSUPP
* if queue is not a request based queue.
*/
- if ((bio->bi_opf & REQ_NOWAIT) && !queue_is_rq_based(q))
+ if ((bio->bi_opf & REQ_NOWAIT) && !queue_is_mq(q))
goto not_supported;
if (should_fail_bio(bio))
@@ -2289,6 +921,9 @@ generic_make_request_checks(struct bio *bio)
}
}
+ if (!test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
+ bio->bi_opf &= ~REQ_HIPRI;
+
switch (bio_op(bio)) {
case REQ_OP_DISCARD:
if (!blk_queue_discard(q))
@@ -2561,17 +1196,6 @@ blk_qc_t submit_bio(struct bio *bio)
}
EXPORT_SYMBOL(submit_bio);
-bool blk_poll(struct request_queue *q, blk_qc_t cookie)
-{
- if (!q->poll_fn || !blk_qc_t_valid(cookie))
- return false;
-
- if (current->plug)
- blk_flush_plug_list(current->plug, false);
- return q->poll_fn(q, cookie);
-}
-EXPORT_SYMBOL_GPL(blk_poll);
-
/**
* blk_cloned_rq_check_limits - Helper function to check a cloned request
* for new the queue limits
@@ -2619,8 +1243,7 @@ static int blk_cloned_rq_check_limits(struct request_queue *q,
*/
blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *rq)
{
- unsigned long flags;
- int where = ELEVATOR_INSERT_BACK;
+ blk_qc_t unused;
if (blk_cloned_rq_check_limits(q, rq))
return BLK_STS_IOERR;
@@ -2629,38 +1252,15 @@ blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *
should_fail_request(&rq->rq_disk->part0, blk_rq_bytes(rq)))
return BLK_STS_IOERR;
- if (q->mq_ops) {
- if (blk_queue_io_stat(q))
- blk_account_io_start(rq, true);
- /*
- * Since we have a scheduler attached on the top device,
- * bypass a potential scheduler on the bottom device for
- * insert.
- */
- return blk_mq_request_issue_directly(rq);
- }
-
- spin_lock_irqsave(q->queue_lock, flags);
- if (unlikely(blk_queue_dying(q))) {
- spin_unlock_irqrestore(q->queue_lock, flags);
- return BLK_STS_IOERR;
- }
+ if (blk_queue_io_stat(q))
+ blk_account_io_start(rq, true);
/*
- * Submitting request must be dequeued before calling this function
- * because it will be linked to another request_queue
+ * Since we have a scheduler attached on the top device,
+ * bypass a potential scheduler on the bottom device for
+ * insert.
*/
- BUG_ON(blk_queued_rq(rq));
-
- if (op_is_flush(rq->cmd_flags))
- where = ELEVATOR_INSERT_FLUSH;
-
- add_acct_request(q, rq, where);
- if (where == ELEVATOR_INSERT_FLUSH)
- __blk_run_queue(q);
- spin_unlock_irqrestore(q->queue_lock, flags);
-
- return BLK_STS_OK;
+ return blk_mq_try_issue_directly(rq->mq_hctx, rq, &unused, true, true);
}
EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
@@ -2710,11 +1310,10 @@ void blk_account_io_completion(struct request *req, unsigned int bytes)
if (blk_do_io_stat(req)) {
const int sgrp = op_stat_group(req_op(req));
struct hd_struct *part;
- int cpu;
- cpu = part_stat_lock();
+ part_stat_lock();
part = req->part;
- part_stat_add(cpu, part, sectors[sgrp], bytes >> 9);
+ part_stat_add(part, sectors[sgrp], bytes >> 9);
part_stat_unlock();
}
}
@@ -2729,14 +1328,14 @@ void blk_account_io_done(struct request *req, u64 now)
if (blk_do_io_stat(req) && !(req->rq_flags & RQF_FLUSH_SEQ)) {
const int sgrp = op_stat_group(req_op(req));
struct hd_struct *part;
- int cpu;
- cpu = part_stat_lock();
+ part_stat_lock();
part = req->part;
- part_stat_inc(cpu, part, ios[sgrp]);
- part_stat_add(cpu, part, nsecs[sgrp], now - req->start_time_ns);
- part_round_stats(req->q, cpu, part);
+ update_io_ticks(part, jiffies);
+ part_stat_inc(part, ios[sgrp]);
+ part_stat_add(part, nsecs[sgrp], now - req->start_time_ns);
+ part_stat_add(part, time_in_queue, nsecs_to_jiffies64(now - req->start_time_ns));
part_dec_in_flight(req->q, part, rq_data_dir(req));
hd_struct_put(part);
@@ -2748,16 +1347,15 @@ void blk_account_io_start(struct request *rq, bool new_io)
{
struct hd_struct *part;
int rw = rq_data_dir(rq);
- int cpu;
if (!blk_do_io_stat(rq))
return;
- cpu = part_stat_lock();
+ part_stat_lock();
if (!new_io) {
part = rq->part;
- part_stat_inc(cpu, part, merges[rw]);
+ part_stat_inc(part, merges[rw]);
} else {
part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
if (!hd_struct_try_get(part)) {
@@ -2772,232 +1370,14 @@ void blk_account_io_start(struct request *rq, bool new_io)
part = &rq->rq_disk->part0;
hd_struct_get(part);
}
- part_round_stats(rq->q, cpu, part);
part_inc_in_flight(rq->q, part, rw);
rq->part = part;
}
- part_stat_unlock();
-}
-
-static struct request *elv_next_request(struct request_queue *q)
-{
- struct request *rq;
- struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL);
-
- WARN_ON_ONCE(q->mq_ops);
-
- while (1) {
- list_for_each_entry(rq, &q->queue_head, queuelist) {
-#ifdef CONFIG_PM
- /*
- * If a request gets queued in state RPM_SUSPENDED
- * then that's a kernel bug.
- */
- WARN_ON_ONCE(q->rpm_status == RPM_SUSPENDED);
-#endif
- return rq;
- }
-
- /*
- * Flush request is running and flush request isn't queueable
- * in the drive, we can hold the queue till flush request is
- * finished. Even we don't do this, driver can't dispatch next
- * requests and will requeue them. And this can improve
- * throughput too. For example, we have request flush1, write1,
- * flush 2. flush1 is dispatched, then queue is hold, write1
- * isn't inserted to queue. After flush1 is finished, flush2
- * will be dispatched. Since disk cache is already clean,
- * flush2 will be finished very soon, so looks like flush2 is
- * folded to flush1.
- * Since the queue is hold, a flag is set to indicate the queue
- * should be restarted later. Please see flush_end_io() for
- * details.
- */
- if (fq->flush_pending_idx != fq->flush_running_idx &&
- !queue_flush_queueable(q)) {
- fq->flush_queue_delayed = 1;
- return NULL;
- }
- if (unlikely(blk_queue_bypass(q)) ||
- !q->elevator->type->ops.sq.elevator_dispatch_fn(q, 0))
- return NULL;
- }
-}
-
-/**
- * blk_peek_request - peek at the top of a request queue
- * @q: request queue to peek at
- *
- * Description:
- * Return the request at the top of @q. The returned request
- * should be started using blk_start_request() before LLD starts
- * processing it.
- *
- * Return:
- * Pointer to the request at the top of @q if available. Null
- * otherwise.
- */
-struct request *blk_peek_request(struct request_queue *q)
-{
- struct request *rq;
- int ret;
-
- lockdep_assert_held(q->queue_lock);
- WARN_ON_ONCE(q->mq_ops);
-
- while ((rq = elv_next_request(q)) != NULL) {
- if (!(rq->rq_flags & RQF_STARTED)) {
- /*
- * This is the first time the device driver
- * sees this request (possibly after
- * requeueing). Notify IO scheduler.
- */
- if (rq->rq_flags & RQF_SORTED)
- elv_activate_rq(q, rq);
-
- /*
- * just mark as started even if we don't start
- * it, a request that has been delayed should
- * not be passed by new incoming requests
- */
- rq->rq_flags |= RQF_STARTED;
- trace_block_rq_issue(q, rq);
- }
-
- if (!q->boundary_rq || q->boundary_rq == rq) {
- q->end_sector = rq_end_sector(rq);
- q->boundary_rq = NULL;
- }
-
- if (rq->rq_flags & RQF_DONTPREP)
- break;
-
- if (q->dma_drain_size && blk_rq_bytes(rq)) {
- /*
- * make sure space for the drain appears we
- * know we can do this because max_hw_segments
- * has been adjusted to be one fewer than the
- * device can handle
- */
- rq->nr_phys_segments++;
- }
-
- if (!q->prep_rq_fn)
- break;
-
- ret = q->prep_rq_fn(q, rq);
- if (ret == BLKPREP_OK) {
- break;
- } else if (ret == BLKPREP_DEFER) {
- /*
- * the request may have been (partially) prepped.
- * we need to keep this request in the front to
- * avoid resource deadlock. RQF_STARTED will
- * prevent other fs requests from passing this one.
- */
- if (q->dma_drain_size && blk_rq_bytes(rq) &&
- !(rq->rq_flags & RQF_DONTPREP)) {
- /*
- * remove the space for the drain we added
- * so that we don't add it again
- */
- --rq->nr_phys_segments;
- }
-
- rq = NULL;
- break;
- } else if (ret == BLKPREP_KILL || ret == BLKPREP_INVALID) {
- rq->rq_flags |= RQF_QUIET;
- /*
- * Mark this request as started so we don't trigger
- * any debug logic in the end I/O path.
- */
- blk_start_request(rq);
- __blk_end_request_all(rq, ret == BLKPREP_INVALID ?
- BLK_STS_TARGET : BLK_STS_IOERR);
- } else {
- printk(KERN_ERR "%s: bad return=%d\n", __func__, ret);
- break;
- }
- }
-
- return rq;
-}
-EXPORT_SYMBOL(blk_peek_request);
-
-static void blk_dequeue_request(struct request *rq)
-{
- struct request_queue *q = rq->q;
+ update_io_ticks(part, jiffies);
- BUG_ON(list_empty(&rq->queuelist));
- BUG_ON(ELV_ON_HASH(rq));
-
- list_del_init(&rq->queuelist);
-
- /*
- * the time frame between a request being removed from the lists
- * and to it is freed is accounted as io that is in progress at
- * the driver side.
- */
- if (blk_account_rq(rq))
- q->in_flight[rq_is_sync(rq)]++;
-}
-
-/**
- * blk_start_request - start request processing on the driver
- * @req: request to dequeue
- *
- * Description:
- * Dequeue @req and start timeout timer on it. This hands off the
- * request to the driver.
- */
-void blk_start_request(struct request *req)
-{
- lockdep_assert_held(req->q->queue_lock);
- WARN_ON_ONCE(req->q->mq_ops);
-
- blk_dequeue_request(req);
-
- if (test_bit(QUEUE_FLAG_STATS, &req->q->queue_flags)) {
- req->io_start_time_ns = ktime_get_ns();
-#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
- req->throtl_size = blk_rq_sectors(req);
-#endif
- req->rq_flags |= RQF_STATS;
- rq_qos_issue(req->q, req);
- }
-
- BUG_ON(blk_rq_is_complete(req));
- blk_add_timer(req);
-}
-EXPORT_SYMBOL(blk_start_request);
-
-/**
- * blk_fetch_request - fetch a request from a request queue
- * @q: request queue to fetch a request from
- *
- * Description:
- * Return the request at the top of @q. The request is started on
- * return and LLD can start processing it immediately.
- *
- * Return:
- * Pointer to the request at the top of @q if available. Null
- * otherwise.
- */
-struct request *blk_fetch_request(struct request_queue *q)
-{
- struct request *rq;
-
- lockdep_assert_held(q->queue_lock);
- WARN_ON_ONCE(q->mq_ops);
-
- rq = blk_peek_request(q);
- if (rq)
- blk_start_request(rq);
- return rq;
+ part_stat_unlock();
}
-EXPORT_SYMBOL(blk_fetch_request);
/*
* Steal bios from a request and add them to a bio list.
@@ -3124,255 +1504,6 @@ bool blk_update_request(struct request *req, blk_status_t error,
}
EXPORT_SYMBOL_GPL(blk_update_request);
-static bool blk_update_bidi_request(struct request *rq, blk_status_t error,
- unsigned int nr_bytes,
- unsigned int bidi_bytes)
-{
- if (blk_update_request(rq, error, nr_bytes))
- return true;
-
- /* Bidi request must be completed as a whole */
- if (unlikely(blk_bidi_rq(rq)) &&
- blk_update_request(rq->next_rq, error, bidi_bytes))
- return true;
-
- if (blk_queue_add_random(rq->q))
- add_disk_randomness(rq->rq_disk);
-
- return false;
-}
-
-/**
- * blk_unprep_request - unprepare a request
- * @req: the request
- *
- * This function makes a request ready for complete resubmission (or
- * completion). It happens only after all error handling is complete,
- * so represents the appropriate moment to deallocate any resources
- * that were allocated to the request in the prep_rq_fn. The queue
- * lock is held when calling this.
- */
-void blk_unprep_request(struct request *req)
-{
- struct request_queue *q = req->q;
-
- req->rq_flags &= ~RQF_DONTPREP;
- if (q->unprep_rq_fn)
- q->unprep_rq_fn(q, req);
-}
-EXPORT_SYMBOL_GPL(blk_unprep_request);
-
-void blk_finish_request(struct request *req, blk_status_t error)
-{
- struct request_queue *q = req->q;
- u64 now = ktime_get_ns();
-
- lockdep_assert_held(req->q->queue_lock);
- WARN_ON_ONCE(q->mq_ops);
-
- if (req->rq_flags & RQF_STATS)
- blk_stat_add(req, now);
-
- if (req->rq_flags & RQF_QUEUED)
- blk_queue_end_tag(q, req);
-
- BUG_ON(blk_queued_rq(req));
-
- if (unlikely(laptop_mode) && !blk_rq_is_passthrough(req))
- laptop_io_completion(req->q->backing_dev_info);
-
- blk_delete_timer(req);
-
- if (req->rq_flags & RQF_DONTPREP)
- blk_unprep_request(req);
-
- blk_account_io_done(req, now);
-
- if (req->end_io) {
- rq_qos_done(q, req);
- req->end_io(req, error);
- } else {
- if (blk_bidi_rq(req))
- __blk_put_request(req->next_rq->q, req->next_rq);
-
- __blk_put_request(q, req);
- }
-}
-EXPORT_SYMBOL(blk_finish_request);
-
-/**
- * blk_end_bidi_request - Complete a bidi request
- * @rq: the request to complete
- * @error: block status code
- * @nr_bytes: number of bytes to complete @rq
- * @bidi_bytes: number of bytes to complete @rq->next_rq
- *
- * Description:
- * Ends I/O on a number of bytes attached to @rq and @rq->next_rq.
- * Drivers that supports bidi can safely call this member for any
- * type of request, bidi or uni. In the later case @bidi_bytes is
- * just ignored.
- *
- * Return:
- * %false - we are done with this request
- * %true - still buffers pending for this request
- **/
-static bool blk_end_bidi_request(struct request *rq, blk_status_t error,
- unsigned int nr_bytes, unsigned int bidi_bytes)
-{
- struct request_queue *q = rq->q;
- unsigned long flags;
-
- WARN_ON_ONCE(q->mq_ops);
-
- if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
- return true;
-
- spin_lock_irqsave(q->queue_lock, flags);
- blk_finish_request(rq, error);
- spin_unlock_irqrestore(q->queue_lock, flags);
-
- return false;
-}
-
-/**
- * __blk_end_bidi_request - Complete a bidi request with queue lock held
- * @rq: the request to complete
- * @error: block status code
- * @nr_bytes: number of bytes to complete @rq
- * @bidi_bytes: number of bytes to complete @rq->next_rq
- *
- * Description:
- * Identical to blk_end_bidi_request() except that queue lock is
- * assumed to be locked on entry and remains so on return.
- *
- * Return:
- * %false - we are done with this request
- * %true - still buffers pending for this request
- **/
-static bool __blk_end_bidi_request(struct request *rq, blk_status_t error,
- unsigned int nr_bytes, unsigned int bidi_bytes)
-{
- lockdep_assert_held(rq->q->queue_lock);
- WARN_ON_ONCE(rq->q->mq_ops);
-
- if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
- return true;
-
- blk_finish_request(rq, error);
-
- return false;
-}
-
-/**
- * blk_end_request - Helper function for drivers to complete the request.
- * @rq: the request being processed
- * @error: block status code
- * @nr_bytes: number of bytes to complete
- *
- * Description:
- * Ends I/O on a number of bytes attached to @rq.
- * If @rq has leftover, sets it up for the next range of segments.
- *
- * Return:
- * %false - we are done with this request
- * %true - still buffers pending for this request
- **/
-bool blk_end_request(struct request *rq, blk_status_t error,
- unsigned int nr_bytes)
-{
- WARN_ON_ONCE(rq->q->mq_ops);
- return blk_end_bidi_request(rq, error, nr_bytes, 0);
-}
-EXPORT_SYMBOL(blk_end_request);
-
-/**
- * blk_end_request_all - Helper function for drives to finish the request.
- * @rq: the request to finish
- * @error: block status code
- *
- * Description:
- * Completely finish @rq.
- */
-void blk_end_request_all(struct request *rq, blk_status_t error)
-{
- bool pending;
- unsigned int bidi_bytes = 0;
-
- if (unlikely(blk_bidi_rq(rq)))
- bidi_bytes = blk_rq_bytes(rq->next_rq);
-
- pending = blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);
- BUG_ON(pending);
-}
-EXPORT_SYMBOL(blk_end_request_all);
-
-/**
- * __blk_end_request - Helper function for drivers to complete the request.
- * @rq: the request being processed
- * @error: block status code
- * @nr_bytes: number of bytes to complete
- *
- * Description:
- * Must be called with queue lock held unlike blk_end_request().
- *
- * Return:
- * %false - we are done with this request
- * %true - still buffers pending for this request
- **/
-bool __blk_end_request(struct request *rq, blk_status_t error,
- unsigned int nr_bytes)
-{
- lockdep_assert_held(rq->q->queue_lock);
- WARN_ON_ONCE(rq->q->mq_ops);
-
- return __blk_end_bidi_request(rq, error, nr_bytes, 0);
-}
-EXPORT_SYMBOL(__blk_end_request);
-
-/**
- * __blk_end_request_all - Helper function for drives to finish the request.
- * @rq: the request to finish
- * @error: block status code
- *
- * Description:
- * Completely finish @rq. Must be called with queue lock held.
- */
-void __blk_end_request_all(struct request *rq, blk_status_t error)
-{
- bool pending;
- unsigned int bidi_bytes = 0;
-
- lockdep_assert_held(rq->q->queue_lock);
- WARN_ON_ONCE(rq->q->mq_ops);
-
- if (unlikely(blk_bidi_rq(rq)))
- bidi_bytes = blk_rq_bytes(rq->next_rq);
-
- pending = __blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);
- BUG_ON(pending);
-}
-EXPORT_SYMBOL(__blk_end_request_all);
-
-/**
- * __blk_end_request_cur - Helper function to finish the current request chunk.
- * @rq: the request to finish the current chunk for
- * @error: block status code
- *
- * Description:
- * Complete the current consecutively mapped chunk from @rq. Must
- * be called with queue lock held.
- *
- * Return:
- * %false - we are done with this request
- * %true - still buffers pending for this request
- */
-bool __blk_end_request_cur(struct request *rq, blk_status_t error)
-{
- return __blk_end_request(rq, error, blk_rq_cur_bytes(rq));
-}
-EXPORT_SYMBOL(__blk_end_request_cur);
-
void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
struct bio *bio)
{
@@ -3428,8 +1559,8 @@ EXPORT_SYMBOL_GPL(rq_flush_dcache_pages);
*/
int blk_lld_busy(struct request_queue *q)
{
- if (q->lld_busy_fn)
- return q->lld_busy_fn(q);
+ if (queue_is_mq(q) && q->mq_ops->busy)
+ return q->mq_ops->busy(q);
return 0;
}
@@ -3460,7 +1591,6 @@ EXPORT_SYMBOL_GPL(blk_rq_unprep_clone);
*/
static void __blk_rq_prep_clone(struct request *dst, struct request *src)
{
- dst->cpu = src->cpu;
dst->__sector = blk_rq_pos(src);
dst->__data_len = blk_rq_bytes(src);
if (src->rq_flags & RQF_SPECIAL_PAYLOAD) {
@@ -3572,9 +1702,11 @@ void blk_start_plug(struct blk_plug *plug)
if (tsk->plug)
return;
- INIT_LIST_HEAD(&plug->list);
INIT_LIST_HEAD(&plug->mq_list);
INIT_LIST_HEAD(&plug->cb_list);
+ plug->rq_count = 0;
+ plug->multiple_queues = false;
+
/*
* Store ordering should not be needed here, since a potential
* preempt will imply a full memory barrier
@@ -3583,36 +1715,6 @@ void blk_start_plug(struct blk_plug *plug)
}
EXPORT_SYMBOL(blk_start_plug);
-static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b)
-{
- struct request *rqa = container_of(a, struct request, queuelist);
- struct request *rqb = container_of(b, struct request, queuelist);
-
- return !(rqa->q < rqb->q ||
- (rqa->q == rqb->q && blk_rq_pos(rqa) < blk_rq_pos(rqb)));
-}
-
-/*
- * If 'from_schedule' is true, then postpone the dispatch of requests
- * until a safe kblockd context. We due this to avoid accidental big
- * additional stack usage in driver dispatch, in places where the originally
- * plugger did not intend it.
- */
-static void queue_unplugged(struct request_queue *q, unsigned int depth,
- bool from_schedule)
- __releases(q->queue_lock)
-{
- lockdep_assert_held(q->queue_lock);
-
- trace_block_unplug(q, depth, !from_schedule);
-
- if (from_schedule)
- blk_run_queue_async(q);
- else
- __blk_run_queue(q);
- spin_unlock_irq(q->queue_lock);
-}
-
static void flush_plug_callbacks(struct blk_plug *plug, bool from_schedule)
{
LIST_HEAD(callbacks);
@@ -3657,65 +1759,10 @@ EXPORT_SYMBOL(blk_check_plugged);
void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
{
- struct request_queue *q;
- struct request *rq;
- LIST_HEAD(list);
- unsigned int depth;
-
flush_plug_callbacks(plug, from_schedule);
if (!list_empty(&plug->mq_list))
blk_mq_flush_plug_list(plug, from_schedule);
-
- if (list_empty(&plug->list))
- return;
-
- list_splice_init(&plug->list, &list);
-
- list_sort(NULL, &list, plug_rq_cmp);
-
- q = NULL;
- depth = 0;
-
- while (!list_empty(&list)) {
- rq = list_entry_rq(list.next);
- list_del_init(&rq->queuelist);
- BUG_ON(!rq->q);
- if (rq->q != q) {
- /*
- * This drops the queue lock
- */
- if (q)
- queue_unplugged(q, depth, from_schedule);
- q = rq->q;
- depth = 0;
- spin_lock_irq(q->queue_lock);
- }
-
- /*
- * Short-circuit if @q is dead
- */
- if (unlikely(blk_queue_dying(q))) {
- __blk_end_request_all(rq, BLK_STS_IOERR);
- continue;
- }
-
- /*
- * rq is already accounted, so use raw insert
- */
- if (op_is_flush(rq->cmd_flags))
- __elv_add_request(q, rq, ELEVATOR_INSERT_FLUSH);
- else
- __elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE);
-
- depth++;
- }
-
- /*
- * This drops the queue lock
- */
- if (q)
- queue_unplugged(q, depth, from_schedule);
}
void blk_finish_plug(struct blk_plug *plug)
@@ -3742,9 +1789,6 @@ int __init blk_dev_init(void)
if (!kblockd_workqueue)
panic("Failed to create kblockd\n");
- request_cachep = kmem_cache_create("blkdev_requests",
- sizeof(struct request), 0, SLAB_PANIC, NULL);
-
blk_requestq_cachep = kmem_cache_create("request_queue",
sizeof(struct request_queue), 0, SLAB_PANIC, NULL);
diff --git a/block/blk-exec.c b/block/blk-exec.c
index f7b292f12449..a34b7d918742 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -48,8 +48,6 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
struct request *rq, int at_head,
rq_end_io_fn *done)
{
- int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
-
WARN_ON(irqs_disabled());
WARN_ON(!blk_rq_is_passthrough(rq));
@@ -60,23 +58,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
* don't check dying flag for MQ because the request won't
* be reused after dying flag is set
*/
- if (q->mq_ops) {
- blk_mq_sched_insert_request(rq, at_head, true, false);
- return;
- }
-
- spin_lock_irq(q->queue_lock);
-
- if (unlikely(blk_queue_dying(q))) {
- rq->rq_flags |= RQF_QUIET;
- __blk_end_request_all(rq, BLK_STS_IOERR);
- spin_unlock_irq(q->queue_lock);
- return;
- }
-
- __elv_add_request(q, rq, where);
- __blk_run_queue(q);
- spin_unlock_irq(q->queue_lock);
+ blk_mq_sched_insert_request(rq, at_head, true, false);
}
EXPORT_SYMBOL_GPL(blk_execute_rq_nowait);
diff --git a/block/blk-flush.c b/block/blk-flush.c
index 8b44b86779da..a3fc7191c694 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -93,7 +93,7 @@ enum {
FLUSH_PENDING_TIMEOUT = 5 * HZ,
};
-static bool blk_kick_flush(struct request_queue *q,
+static void blk_kick_flush(struct request_queue *q,
struct blk_flush_queue *fq, unsigned int flags);
static unsigned int blk_flush_policy(unsigned long fflags, struct request *rq)
@@ -132,18 +132,9 @@ static void blk_flush_restore_request(struct request *rq)
rq->end_io = rq->flush.saved_end_io;
}
-static bool blk_flush_queue_rq(struct request *rq, bool add_front)
+static void blk_flush_queue_rq(struct request *rq, bool add_front)
{
- if (rq->q->mq_ops) {
- blk_mq_add_to_requeue_list(rq, add_front, true);
- return false;
- } else {
- if (add_front)
- list_add(&rq->queuelist, &rq->q->queue_head);
- else
- list_add_tail(&rq->queuelist, &rq->q->queue_head);
- return true;
- }
+ blk_mq_add_to_requeue_list(rq, add_front, true);
}
/**
@@ -157,18 +148,17 @@ static bool blk_flush_queue_rq(struct request *rq, bool add_front)
* completion and trigger the next step.
*
* CONTEXT:
- * spin_lock_irq(q->queue_lock or fq->mq_flush_lock)
+ * spin_lock_irq(fq->mq_flush_lock)
*
* RETURNS:
* %true if requests were added to the dispatch queue, %false otherwise.
*/
-static bool blk_flush_complete_seq(struct request *rq,
+static void blk_flush_complete_seq(struct request *rq,
struct blk_flush_queue *fq,
unsigned int seq, blk_status_t error)
{
struct request_queue *q = rq->q;
struct list_head *pending = &fq->flush_queue[fq->flush_pending_idx];
- bool queued = false, kicked;
unsigned int cmd_flags;
BUG_ON(rq->flush.seq & seq);
@@ -191,7 +181,7 @@ static bool blk_flush_complete_seq(struct request *rq,
case REQ_FSEQ_DATA:
list_move_tail(&rq->flush.list, &fq->flush_data_in_flight);
- queued = blk_flush_queue_rq(rq, true);
+ blk_flush_queue_rq(rq, true);
break;
case REQ_FSEQ_DONE:
@@ -204,42 +194,34 @@ static bool blk_flush_complete_seq(struct request *rq,
BUG_ON(!list_empty(&rq->queuelist));
list_del_init(&rq->flush.list);
blk_flush_restore_request(rq);
- if (q->mq_ops)
- blk_mq_end_request(rq, error);
- else
- __blk_end_request_all(rq, error);
+ blk_mq_end_request(rq, error);
break;
default:
BUG();
}
- kicked = blk_kick_flush(q, fq, cmd_flags);
- return kicked | queued;
+ blk_kick_flush(q, fq, cmd_flags);
}
static void flush_end_io(struct request *flush_rq, blk_status_t error)
{
struct request_queue *q = flush_rq->q;
struct list_head *running;
- bool queued = false;
struct request *rq, *n;
unsigned long flags = 0;
struct blk_flush_queue *fq = blk_get_flush_queue(q, flush_rq->mq_ctx);
+ struct blk_mq_hw_ctx *hctx;
- if (q->mq_ops) {
- struct blk_mq_hw_ctx *hctx;
-
- /* release the tag's ownership to the req cloned from */
- spin_lock_irqsave(&fq->mq_flush_lock, flags);
- hctx = blk_mq_map_queue(q, flush_rq->mq_ctx->cpu);
- if (!q->elevator) {
- blk_mq_tag_set_rq(hctx, flush_rq->tag, fq->orig_rq);
- flush_rq->tag = -1;
- } else {
- blk_mq_put_driver_tag_hctx(hctx, flush_rq);
- flush_rq->internal_tag = -1;
- }
+ /* release the tag's ownership to the req cloned from */
+ spin_lock_irqsave(&fq->mq_flush_lock, flags);
+ hctx = flush_rq->mq_hctx;
+ if (!q->elevator) {
+ blk_mq_tag_set_rq(hctx, flush_rq->tag, fq->orig_rq);
+ flush_rq->tag = -1;
+ } else {
+ blk_mq_put_driver_tag_hctx(hctx, flush_rq);
+ flush_rq->internal_tag = -1;
}
running = &fq->flush_queue[fq->flush_running_idx];
@@ -248,35 +230,16 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error)
/* account completion of the flush request */
fq->flush_running_idx ^= 1;
- if (!q->mq_ops)
- elv_completed_request(q, flush_rq);
-
/* and push the waiting requests to the next stage */
list_for_each_entry_safe(rq, n, running, flush.list) {
unsigned int seq = blk_flush_cur_seq(rq);
BUG_ON(seq != REQ_FSEQ_PREFLUSH && seq != REQ_FSEQ_POSTFLUSH);
- queued |= blk_flush_complete_seq(rq, fq, seq, error);
+ blk_flush_complete_seq(rq, fq, seq, error);
}
- /*
- * Kick the queue to avoid stall for two cases:
- * 1. Moving a request silently to empty queue_head may stall the
- * queue.
- * 2. When flush request is running in non-queueable queue, the
- * queue is hold. Restart the queue after flush request is finished
- * to avoid stall.
- * This function is called from request completion path and calling
- * directly into request_fn may confuse the driver. Always use
- * kblockd.
- */
- if (queued || fq->flush_queue_delayed) {
- WARN_ON(q->mq_ops);
- blk_run_queue_async(q);
- }
fq->flush_queue_delayed = 0;
- if (q->mq_ops)
- spin_unlock_irqrestore(&fq->mq_flush_lock, flags);
+ spin_unlock_irqrestore(&fq->mq_flush_lock, flags);
}
/**
@@ -289,12 +252,10 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error)
* Please read the comment at the top of this file for more info.
*
* CONTEXT:
- * spin_lock_irq(q->queue_lock or fq->mq_flush_lock)
+ * spin_lock_irq(fq->mq_flush_lock)
*
- * RETURNS:
- * %true if flush was issued, %false otherwise.
*/
-static bool blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq,
+static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq,
unsigned int flags)
{
struct list_head *pending = &fq->flush_queue[fq->flush_pending_idx];
@@ -304,7 +265,7 @@ static bool blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq,
/* C1 described at the top of this file */
if (fq->flush_pending_idx != fq->flush_running_idx || list_empty(pending))
- return false;
+ return;
/* C2 and C3
*
@@ -312,11 +273,10 @@ static bool blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq,
* assigned to empty flushes, and we deadlock if we are expecting
* other requests to make progress. Don't defer for that case.
*/
- if (!list_empty(&fq->flush_data_in_flight) &&
- !(q->mq_ops && q->elevator) &&
+ if (!list_empty(&fq->flush_data_in_flight) && q->elevator &&
time_before(jiffies,
fq->flush_pending_since + FLUSH_PENDING_TIMEOUT))
- return false;
+ return;
/*
* Issue flush and toggle pending_idx. This makes pending_idx
@@ -334,19 +294,15 @@ static bool blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq,
* In case of IO scheduler, flush rq need to borrow scheduler tag
* just for cheating put/get driver tag.
*/
- if (q->mq_ops) {
- struct blk_mq_hw_ctx *hctx;
-
- flush_rq->mq_ctx = first_rq->mq_ctx;
-
- if (!q->elevator) {
- fq->orig_rq = first_rq;
- flush_rq->tag = first_rq->tag;
- hctx = blk_mq_map_queue(q, first_rq->mq_ctx->cpu);
- blk_mq_tag_set_rq(hctx, first_rq->tag, flush_rq);
- } else {
- flush_rq->internal_tag = first_rq->internal_tag;
- }
+ flush_rq->mq_ctx = first_rq->mq_ctx;
+ flush_rq->mq_hctx = first_rq->mq_hctx;
+
+ if (!q->elevator) {
+ fq->orig_rq = first_rq;
+ flush_rq->tag = first_rq->tag;
+ blk_mq_tag_set_rq(flush_rq->mq_hctx, first_rq->tag, flush_rq);
+ } else {
+ flush_rq->internal_tag = first_rq->internal_tag;
}
flush_rq->cmd_flags = REQ_OP_FLUSH | REQ_PREFLUSH;
@@ -355,62 +311,17 @@ static bool blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq,
flush_rq->rq_disk = first_rq->rq_disk;
flush_rq->end_io = flush_end_io;
- return blk_flush_queue_rq(flush_rq, false);
-}
-
-static void flush_data_end_io(struct request *rq, blk_status_t error)
-{
- struct request_queue *q = rq->q;
- struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL);
-
- lockdep_assert_held(q->queue_lock);
-
- /*
- * Updating q->in_flight[] here for making this tag usable
- * early. Because in blk_queue_start_tag(),
- * q->in_flight[BLK_RW_ASYNC] is used to limit async I/O and
- * reserve tags for sync I/O.
- *
- * More importantly this way can avoid the following I/O
- * deadlock:
- *
- * - suppose there are 40 fua requests comming to flush queue
- * and queue depth is 31
- * - 30 rqs are scheduled then blk_queue_start_tag() can't alloc
- * tag for async I/O any more
- * - all the 30 rqs are completed before FLUSH_PENDING_TIMEOUT
- * and flush_data_end_io() is called
- * - the other rqs still can't go ahead if not updating
- * q->in_flight[BLK_RW_ASYNC] here, meantime these rqs
- * are held in flush data queue and make no progress of
- * handling post flush rq
- * - only after the post flush rq is handled, all these rqs
- * can be completed
- */
-
- elv_completed_request(q, rq);
-
- /* for avoiding double accounting */
- rq->rq_flags &= ~RQF_STARTED;
-
- /*
- * After populating an empty queue, kick it to avoid stall. Read
- * the comment in flush_end_io().
- */
- if (blk_flush_complete_seq(rq, fq, REQ_FSEQ_DATA, error))
- blk_run_queue_async(q);
+ blk_flush_queue_rq(flush_rq, false);
}
static void mq_flush_data_end_io(struct request *rq, blk_status_t error)
{
struct request_queue *q = rq->q;
- struct blk_mq_hw_ctx *hctx;
+ struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
struct blk_mq_ctx *ctx = rq->mq_ctx;
unsigned long flags;
struct blk_flush_queue *fq = blk_get_flush_queue(q, ctx);
- hctx = blk_mq_map_queue(q, ctx->cpu);
-
if (q->elevator) {
WARN_ON(rq->tag < 0);
blk_mq_put_driver_tag_hctx(hctx, rq);
@@ -443,9 +354,6 @@ void blk_insert_flush(struct request *rq)
unsigned int policy = blk_flush_policy(fflags, rq);
struct blk_flush_queue *fq = blk_get_flush_queue(q, rq->mq_ctx);
- if (!q->mq_ops)
- lockdep_assert_held(q->queue_lock);
-
/*
* @policy now records what operations need to be done. Adjust
* REQ_PREFLUSH and FUA for the driver.
@@ -468,10 +376,7 @@ void blk_insert_flush(struct request *rq)
* complete the request.
*/
if (!policy) {
- if (q->mq_ops)
- blk_mq_end_request(rq, 0);
- else
- __blk_end_request(rq, 0, 0);
+ blk_mq_end_request(rq, 0);
return;
}
@@ -484,10 +389,7 @@ void blk_insert_flush(struct request *rq)
*/
if ((policy & REQ_FSEQ_DATA) &&
!(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) {
- if (q->mq_ops)
- blk_mq_request_bypass_insert(rq, false);
- else
- list_add_tail(&rq->queuelist, &q->queue_head);
+ blk_mq_request_bypass_insert(rq, false);
return;
}
@@ -499,17 +401,12 @@ void blk_insert_flush(struct request *rq)
INIT_LIST_HEAD(&rq->flush.list);
rq->rq_flags |= RQF_FLUSH_SEQ;
rq->flush.saved_end_io = rq->end_io; /* Usually NULL */
- if (q->mq_ops) {
- rq->end_io = mq_flush_data_end_io;
- spin_lock_irq(&fq->mq_flush_lock);
- blk_flush_complete_seq(rq, fq, REQ_FSEQ_ACTIONS & ~policy, 0);
- spin_unlock_irq(&fq->mq_flush_lock);
- return;
- }
- rq->end_io = flush_data_end_io;
+ rq->end_io = mq_flush_data_end_io;
+ spin_lock_irq(&fq->mq_flush_lock);
blk_flush_complete_seq(rq, fq, REQ_FSEQ_ACTIONS & ~policy, 0);
+ spin_unlock_irq(&fq->mq_flush_lock);
}
/**
@@ -575,8 +472,7 @@ struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q,
if (!fq)
goto fail;
- if (q->mq_ops)
- spin_lock_init(&fq->mq_flush_lock);
+ spin_lock_init(&fq->mq_flush_lock);
rq_sz = round_up(rq_sz + cmd_size, cache_line_size());
fq->flush_rq = kzalloc_node(rq_sz, flags, node);
diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index 01580f88fcb3..5ed59ac6ae58 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -28,7 +28,6 @@ void get_io_context(struct io_context *ioc)
BUG_ON(atomic_long_read(&ioc->refcount) <= 0);
atomic_long_inc(&ioc->refcount);
}
-EXPORT_SYMBOL(get_io_context);
static void icq_free_icq_rcu(struct rcu_head *head)
{
@@ -48,10 +47,8 @@ static void ioc_exit_icq(struct io_cq *icq)
if (icq->flags & ICQ_EXITED)
return;
- if (et->uses_mq && et->ops.mq.exit_icq)
- et->ops.mq.exit_icq(icq);
- else if (!et->uses_mq && et->ops.sq.elevator_exit_icq_fn)
- et->ops.sq.elevator_exit_icq_fn(icq);
+ if (et->ops.exit_icq)
+ et->ops.exit_icq(icq);
icq->flags |= ICQ_EXITED;
}
@@ -113,9 +110,9 @@ static void ioc_release_fn(struct work_struct *work)
struct io_cq, ioc_node);
struct request_queue *q = icq->q;
- if (spin_trylock(q->queue_lock)) {
+ if (spin_trylock(&q->queue_lock)) {
ioc_destroy_icq(icq);
- spin_unlock(q->queue_lock);
+ spin_unlock(&q->queue_lock);
} else {
spin_unlock_irqrestore(&ioc->lock, flags);
cpu_relax();
@@ -162,7 +159,6 @@ void put_io_context(struct io_context *ioc)
if (free_ioc)
kmem_cache_free(iocontext_cachep, ioc);
}
-EXPORT_SYMBOL(put_io_context);
/**
* put_io_context_active - put active reference on ioc
@@ -173,7 +169,6 @@ EXPORT_SYMBOL(put_io_context);
*/
void put_io_context_active(struct io_context *ioc)
{
- struct elevator_type *et;
unsigned long flags;
struct io_cq *icq;
@@ -187,25 +182,12 @@ void put_io_context_active(struct io_context *ioc)
* reverse double locking. Read comment in ioc_release_fn() for
* explanation on the nested locking annotation.
*/
-retry:
spin_lock_irqsave_nested(&ioc->lock, flags, 1);
hlist_for_each_entry(icq, &ioc->icq_list, ioc_node) {
if (icq->flags & ICQ_EXITED)
continue;
- et = icq->q->elevator->type;
- if (et->uses_mq) {
- ioc_exit_icq(icq);
- } else {
- if (spin_trylock(icq->q->queue_lock)) {
- ioc_exit_icq(icq);
- spin_unlock(icq->q->queue_lock);
- } else {
- spin_unlock_irqrestore(&ioc->lock, flags);
- cpu_relax();
- goto retry;
- }
- }
+ ioc_exit_icq(icq);
}
spin_unlock_irqrestore(&ioc->lock, flags);
@@ -232,7 +214,7 @@ static void __ioc_clear_queue(struct list_head *icq_list)
while (!list_empty(icq_list)) {
struct io_cq *icq = list_entry(icq_list->next,
- struct io_cq, q_node);
+ struct io_cq, q_node);
struct io_context *ioc = icq->ioc;
spin_lock_irqsave(&ioc->lock, flags);
@@ -251,16 +233,11 @@ void ioc_clear_queue(struct request_queue *q)
{
LIST_HEAD(icq_list);
- spin_lock_irq(q->queue_lock);
+ spin_lock_irq(&q->queue_lock);
list_splice_init(&q->icq_list, &icq_list);
+ spin_unlock_irq(&q->queue_lock);
- if (q->mq_ops) {
- spin_unlock_irq(q->queue_lock);
- __ioc_clear_queue(&icq_list);
- } else {
- __ioc_clear_queue(&icq_list);
- spin_unlock_irq(q->queue_lock);
- }
+ __ioc_clear_queue(&icq_list);
}
int create_task_io_context(struct task_struct *task, gfp_t gfp_flags, int node)
@@ -336,7 +313,6 @@ struct io_context *get_task_io_context(struct task_struct *task,
return NULL;
}
-EXPORT_SYMBOL(get_task_io_context);
/**
* ioc_lookup_icq - lookup io_cq from ioc
@@ -350,7 +326,7 @@ struct io_cq *ioc_lookup_icq(struct io_context *ioc, struct request_queue *q)
{
struct io_cq *icq;
- lockdep_assert_held(q->queue_lock);
+ lockdep_assert_held(&q->queue_lock);
/*
* icq's are indexed from @ioc using radix tree and hint pointer,
@@ -409,16 +385,14 @@ struct io_cq *ioc_create_icq(struct io_context *ioc, struct request_queue *q,
INIT_HLIST_NODE(&icq->ioc_node);
/* lock both q and ioc and try to link @icq */
- spin_lock_irq(q->queue_lock);
+ spin_lock_irq(&q->queue_lock);
spin_lock(&ioc->lock);
if (likely(!radix_tree_insert(&ioc->icq_tree, q->id, icq))) {
hlist_add_head(&icq->ioc_node, &ioc->icq_list);
list_add(&icq->q_node, &q->icq_list);
- if (et->uses_mq && et->ops.mq.init_icq)
- et->ops.mq.init_icq(icq);
- else if (!et->uses_mq && et->ops.sq.elevator_init_icq_fn)
- et->ops.sq.elevator_init_icq_fn(icq);
+ if (et->ops.init_icq)
+ et->ops.init_icq(icq);
} else {
kmem_cache_free(et->icq_cache, icq);
icq = ioc_lookup_icq(ioc, q);
@@ -427,7 +401,7 @@ struct io_cq *ioc_create_icq(struct io_context *ioc, struct request_queue *q,
}
spin_unlock(&ioc->lock);
- spin_unlock_irq(q->queue_lock);
+ spin_unlock_irq(&q->queue_lock);
radix_tree_preload_end();
return icq;
}
diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
index 38c35c32aff2..fc714ef402a6 100644
--- a/block/blk-iolatency.c
+++ b/block/blk-iolatency.c
@@ -262,29 +262,25 @@ static inline void iolat_update_total_lat_avg(struct iolatency_grp *iolat,
stat->rqs.mean);
}
-static inline bool iolatency_may_queue(struct iolatency_grp *iolat,
- wait_queue_entry_t *wait,
- bool first_block)
+static void iolat_cleanup_cb(struct rq_wait *rqw, void *private_data)
{
- struct rq_wait *rqw = &iolat->rq_wait;
+ atomic_dec(&rqw->inflight);
+ wake_up(&rqw->wait);
+}
- if (first_block && waitqueue_active(&rqw->wait) &&
- rqw->wait.head.next != &wait->entry)
- return false;
+static bool iolat_acquire_inflight(struct rq_wait *rqw, void *private_data)
+{
+ struct iolatency_grp *iolat = private_data;
return rq_wait_inc_below(rqw, iolat->rq_depth.max_depth);
}
static void __blkcg_iolatency_throttle(struct rq_qos *rqos,
struct iolatency_grp *iolat,
- spinlock_t *lock, bool issue_as_root,
+ bool issue_as_root,
bool use_memdelay)
- __releases(lock)
- __acquires(lock)
{
struct rq_wait *rqw = &iolat->rq_wait;
unsigned use_delay = atomic_read(&lat_to_blkg(iolat)->use_delay);
- DEFINE_WAIT(wait);
- bool first_block = true;
if (use_delay)
blkcg_schedule_throttle(rqos->q, use_memdelay);
@@ -301,27 +297,7 @@ static void __blkcg_iolatency_throttle(struct rq_qos *rqos,
return;
}
- if (iolatency_may_queue(iolat, &wait, first_block))
- return;
-
- do {
- prepare_to_wait_exclusive(&rqw->wait, &wait,
- TASK_UNINTERRUPTIBLE);
-
- if (iolatency_may_queue(iolat, &wait, first_block))
- break;
- first_block = false;
-
- if (lock) {
- spin_unlock_irq(lock);
- io_schedule();
- spin_lock_irq(lock);
- } else {
- io_schedule();
- }
- } while (1);
-
- finish_wait(&rqw->wait, &wait);
+ rq_qos_wait(rqw, iolat, iolat_acquire_inflight, iolat_cleanup_cb);
}
#define SCALE_DOWN_FACTOR 2
@@ -478,38 +454,15 @@ static void check_scale_change(struct iolatency_grp *iolat)
scale_change(iolat, direction > 0);
}
-static void blkcg_iolatency_throttle(struct rq_qos *rqos, struct bio *bio,
- spinlock_t *lock)
+static void blkcg_iolatency_throttle(struct rq_qos *rqos, struct bio *bio)
{
struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos);
- struct blkcg *blkcg;
- struct blkcg_gq *blkg;
- struct request_queue *q = rqos->q;
+ struct blkcg_gq *blkg = bio->bi_blkg;
bool issue_as_root = bio_issue_as_root_blkg(bio);
if (!blk_iolatency_enabled(blkiolat))
return;
- rcu_read_lock();
- blkcg = bio_blkcg(bio);
- bio_associate_blkcg(bio, &blkcg->css);
- blkg = blkg_lookup(blkcg, q);
- if (unlikely(!blkg)) {
- if (!lock)
- spin_lock_irq(q->queue_lock);
- blkg = blkg_lookup_create(blkcg, q);
- if (IS_ERR(blkg))
- blkg = NULL;
- if (!lock)
- spin_unlock_irq(q->queue_lock);
- }
- if (!blkg)
- goto out;
-
- bio_issue_init(&bio->bi_issue, bio_sectors(bio));
- bio_associate_blkg(bio, blkg);
-out:
- rcu_read_unlock();
while (blkg && blkg->parent) {
struct iolatency_grp *iolat = blkg_to_lat(blkg);
if (!iolat) {
@@ -518,7 +471,7 @@ out:
}
check_scale_change(iolat);
- __blkcg_iolatency_throttle(rqos, iolat, lock, issue_as_root,
+ __blkcg_iolatency_throttle(rqos, iolat, issue_as_root,
(bio->bi_opf & REQ_SWAP) == REQ_SWAP);
blkg = blkg->parent;
}
@@ -640,7 +593,7 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio)
bool enabled = false;
blkg = bio->bi_blkg;
- if (!blkg)
+ if (!blkg || !bio_flagged(bio, BIO_TRACKED))
return;
iolat = blkg_to_lat(bio->bi_blkg);
@@ -730,7 +683,7 @@ static void blkiolatency_timer_fn(struct timer_list *t)
* We could be exiting, don't access the pd unless we have a
* ref on the blkg.
*/
- if (!blkg_try_get(blkg))
+ if (!blkg_tryget(blkg))
continue;
iolat = blkg_to_lat(blkg);
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 7695034f4b87..71e9ac03f621 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -195,7 +195,7 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
goto split;
}
- if (bvprvp && blk_queue_cluster(q)) {
+ if (bvprvp) {
if (seg_size + bv.bv_len > queue_max_segment_size(q))
goto new_segment;
if (!biovec_phys_mergeable(q, bvprvp, &bv))
@@ -295,7 +295,7 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
bool no_sg_merge)
{
struct bio_vec bv, bvprv = { NULL };
- int cluster, prev = 0;
+ int prev = 0;
unsigned int seg_size, nr_phys_segs;
struct bio *fbio, *bbio;
struct bvec_iter iter;
@@ -313,7 +313,6 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
}
fbio = bio;
- cluster = blk_queue_cluster(q);
seg_size = 0;
nr_phys_segs = 0;
for_each_bio(bio) {
@@ -325,7 +324,7 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
if (no_sg_merge)
goto new_segment;
- if (prev && cluster) {
+ if (prev) {
if (seg_size + bv.bv_len
> queue_max_segment_size(q))
goto new_segment;
@@ -389,16 +388,12 @@ void blk_recount_segments(struct request_queue *q, struct bio *bio)
bio_set_flag(bio, BIO_SEG_VALID);
}
-EXPORT_SYMBOL(blk_recount_segments);
static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio,
struct bio *nxt)
{
struct bio_vec end_bv = { NULL }, nxt_bv;
- if (!blk_queue_cluster(q))
- return 0;
-
if (bio->bi_seg_back_size + nxt->bi_seg_front_size >
queue_max_segment_size(q))
return 0;
@@ -415,12 +410,12 @@ static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio,
static inline void
__blk_segment_map_sg(struct request_queue *q, struct bio_vec *bvec,
struct scatterlist *sglist, struct bio_vec *bvprv,
- struct scatterlist **sg, int *nsegs, int *cluster)
+ struct scatterlist **sg, int *nsegs)
{
int nbytes = bvec->bv_len;
- if (*sg && *cluster) {
+ if (*sg) {
if ((*sg)->length + nbytes > queue_max_segment_size(q))
goto new_segment;
if (!biovec_phys_mergeable(q, bvprv, bvec))
@@ -466,12 +461,12 @@ static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio,
{
struct bio_vec bvec, bvprv = { NULL };
struct bvec_iter iter;
- int cluster = blk_queue_cluster(q), nsegs = 0;
+ int nsegs = 0;
for_each_bio(bio)
bio_for_each_segment(bvec, bio, iter)
__blk_segment_map_sg(q, &bvec, sglist, &bvprv, sg,
- &nsegs, &cluster);
+ &nsegs);
return nsegs;
}
@@ -596,17 +591,6 @@ int ll_front_merge_fn(struct request_queue *q, struct request *req,
return ll_new_hw_segment(q, req, bio);
}
-/*
- * blk-mq uses req->special to carry normal driver per-request payload, it
- * does not indicate a prepared command that we cannot merge with.
- */
-static bool req_no_special_merge(struct request *req)
-{
- struct request_queue *q = req->q;
-
- return !q->mq_ops && req->special;
-}
-
static bool req_attempt_discard_merge(struct request_queue *q, struct request *req,
struct request *next)
{
@@ -632,13 +616,6 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
unsigned int seg_size =
req->biotail->bi_seg_back_size + next->bio->bi_seg_front_size;
- /*
- * First check if the either of the requests are re-queued
- * requests. Can't merge them if they are.
- */
- if (req_no_special_merge(req) || req_no_special_merge(next))
- return 0;
-
if (req_gap_back_merge(req, next->bio))
return 0;
@@ -703,12 +680,10 @@ static void blk_account_io_merge(struct request *req)
{
if (blk_do_io_stat(req)) {
struct hd_struct *part;
- int cpu;
- cpu = part_stat_lock();
+ part_stat_lock();
part = req->part;
- part_round_stats(req->q, cpu, part);
part_dec_in_flight(req->q, part, rq_data_dir(req));
hd_struct_put(part);
@@ -731,7 +706,8 @@ static inline bool blk_discard_mergable(struct request *req)
return false;
}
-enum elv_merge blk_try_req_merge(struct request *req, struct request *next)
+static enum elv_merge blk_try_req_merge(struct request *req,
+ struct request *next)
{
if (blk_discard_mergable(req))
return ELEVATOR_DISCARD_MERGE;
@@ -748,9 +724,6 @@ enum elv_merge blk_try_req_merge(struct request *req, struct request *next)
static struct request *attempt_merge(struct request_queue *q,
struct request *req, struct request *next)
{
- if (!q->mq_ops)
- lockdep_assert_held(q->queue_lock);
-
if (!rq_mergeable(req) || !rq_mergeable(next))
return NULL;
@@ -758,8 +731,7 @@ static struct request *attempt_merge(struct request_queue *q,
return NULL;
if (rq_data_dir(req) != rq_data_dir(next)
- || req->rq_disk != next->rq_disk
- || req_no_special_merge(next))
+ || req->rq_disk != next->rq_disk)
return NULL;
if (req_op(req) == REQ_OP_WRITE_SAME &&
@@ -773,6 +745,9 @@ static struct request *attempt_merge(struct request_queue *q,
if (req->write_hint != next->write_hint)
return NULL;
+ if (req->ioprio != next->ioprio)
+ return NULL;
+
/*
* If we are allowed to merge, then append bio list
* from next to rq and release next. merge_requests_fn
@@ -828,10 +803,6 @@ static struct request *attempt_merge(struct request_queue *q,
*/
blk_account_io_merge(next);
- req->ioprio = ioprio_best(req->ioprio, next->ioprio);
- if (blk_rq_cpu_valid(next))
- req->cpu = next->cpu;
-
/*
* ownership of bio passed from next to req, return 'next' for
* the caller to free
@@ -863,16 +834,11 @@ struct request *attempt_front_merge(struct request_queue *q, struct request *rq)
int blk_attempt_req_merge(struct request_queue *q, struct request *rq,
struct request *next)
{
- struct elevator_queue *e = q->elevator;
struct request *free;
- if (!e->uses_mq && e->type->ops.sq.elevator_allow_rq_merge_fn)
- if (!e->type->ops.sq.elevator_allow_rq_merge_fn(q, rq, next))
- return 0;
-
free = attempt_merge(q, rq, next);
if (free) {
- __blk_put_request(q, free);
+ blk_put_request(free);
return 1;
}
@@ -891,8 +857,8 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
if (bio_data_dir(bio) != rq_data_dir(rq))
return false;
- /* must be same device and not a special request */
- if (rq->rq_disk != bio->bi_disk || req_no_special_merge(rq))
+ /* must be same device */
+ if (rq->rq_disk != bio->bi_disk)
return false;
/* only merge integrity protected bio into ditto rq */
@@ -911,6 +877,9 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
if (rq->write_hint != bio->bi_write_hint)
return false;
+ if (rq->ioprio != bio_prio(bio))
+ return false;
+
return true;
}
diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c
index 3eb169f15842..03a534820271 100644
--- a/block/blk-mq-cpumap.c
+++ b/block/blk-mq-cpumap.c
@@ -14,9 +14,10 @@
#include "blk.h"
#include "blk-mq.h"
-static int cpu_to_queue_index(unsigned int nr_queues, const int cpu)
+static int cpu_to_queue_index(struct blk_mq_queue_map *qmap,
+ unsigned int nr_queues, const int cpu)
{
- return cpu % nr_queues;
+ return qmap->queue_offset + (cpu % nr_queues);
}
static int get_first_sibling(unsigned int cpu)
@@ -30,10 +31,10 @@ static int get_first_sibling(unsigned int cpu)
return cpu;
}
-int blk_mq_map_queues(struct blk_mq_tag_set *set)
+int blk_mq_map_queues(struct blk_mq_queue_map *qmap)
{
- unsigned int *map = set->mq_map;
- unsigned int nr_queues = set->nr_hw_queues;
+ unsigned int *map = qmap->mq_map;
+ unsigned int nr_queues = qmap->nr_queues;
unsigned int cpu, first_sibling;
for_each_possible_cpu(cpu) {
@@ -44,11 +45,11 @@ int blk_mq_map_queues(struct blk_mq_tag_set *set)
* performace optimizations.
*/
if (cpu < nr_queues) {
- map[cpu] = cpu_to_queue_index(nr_queues, cpu);
+ map[cpu] = cpu_to_queue_index(qmap, nr_queues, cpu);
} else {
first_sibling = get_first_sibling(cpu);
if (first_sibling == cpu)
- map[cpu] = cpu_to_queue_index(nr_queues, cpu);
+ map[cpu] = cpu_to_queue_index(qmap, nr_queues, cpu);
else
map[cpu] = map[first_sibling];
}
@@ -62,12 +63,12 @@ EXPORT_SYMBOL_GPL(blk_mq_map_queues);
* We have no quick way of doing reverse lookups. This is only used at
* queue init time, so runtime isn't important.
*/
-int blk_mq_hw_queue_to_node(unsigned int *mq_map, unsigned int index)
+int blk_mq_hw_queue_to_node(struct blk_mq_queue_map *qmap, unsigned int index)
{
int i;
for_each_possible_cpu(i) {
- if (index == mq_map[i])
+ if (index == qmap->mq_map[i])
return local_memory_node(cpu_to_node(i));
}
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 10b284a1f18d..90d68760af08 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -23,6 +23,7 @@
#include "blk-mq.h"
#include "blk-mq-debugfs.h"
#include "blk-mq-tag.h"
+#include "blk-rq-qos.h"
static void print_stat(struct seq_file *m, struct blk_rq_stat *stat)
{
@@ -112,10 +113,8 @@ static int queue_pm_only_show(void *data, struct seq_file *m)
#define QUEUE_FLAG_NAME(name) [QUEUE_FLAG_##name] = #name
static const char *const blk_queue_flag_name[] = {
- QUEUE_FLAG_NAME(QUEUED),
QUEUE_FLAG_NAME(STOPPED),
QUEUE_FLAG_NAME(DYING),
- QUEUE_FLAG_NAME(BYPASS),
QUEUE_FLAG_NAME(BIDI),
QUEUE_FLAG_NAME(NOMERGES),
QUEUE_FLAG_NAME(SAME_COMP),
@@ -318,7 +317,6 @@ static const char *const cmd_flag_name[] = {
static const char *const rqf_name[] = {
RQF_NAME(SORTED),
RQF_NAME(STARTED),
- RQF_NAME(QUEUED),
RQF_NAME(SOFTBARRIER),
RQF_NAME(FLUSH_SEQ),
RQF_NAME(MIXED_MERGE),
@@ -424,15 +422,18 @@ struct show_busy_params {
/*
* Note: the state of a request may change while this function is in progress,
- * e.g. due to a concurrent blk_mq_finish_request() call.
+ * e.g. due to a concurrent blk_mq_finish_request() call. Returns true to
+ * keep iterating requests.
*/
-static void hctx_show_busy_rq(struct request *rq, void *data, bool reserved)
+static bool hctx_show_busy_rq(struct request *rq, void *data, bool reserved)
{
const struct show_busy_params *params = data;
- if (blk_mq_map_queue(rq->q, rq->mq_ctx->cpu) == params->hctx)
+ if (rq->mq_hctx == params->hctx)
__blk_mq_debugfs_rq_show(params->m,
list_entry_rq(&rq->queuelist));
+
+ return true;
}
static int hctx_busy_show(void *data, struct seq_file *m)
@@ -446,6 +447,21 @@ static int hctx_busy_show(void *data, struct seq_file *m)
return 0;
}
+static const char *const hctx_types[] = {
+ [HCTX_TYPE_DEFAULT] = "default",
+ [HCTX_TYPE_READ] = "read",
+ [HCTX_TYPE_POLL] = "poll",
+};
+
+static int hctx_type_show(void *data, struct seq_file *m)
+{
+ struct blk_mq_hw_ctx *hctx = data;
+
+ BUILD_BUG_ON(ARRAY_SIZE(hctx_types) != HCTX_MAX_TYPES);
+ seq_printf(m, "%s\n", hctx_types[hctx->type]);
+ return 0;
+}
+
static int hctx_ctx_map_show(void *data, struct seq_file *m)
{
struct blk_mq_hw_ctx *hctx = data;
@@ -636,36 +652,43 @@ static int hctx_dispatch_busy_show(void *data, struct seq_file *m)
return 0;
}
-static void *ctx_rq_list_start(struct seq_file *m, loff_t *pos)
- __acquires(&ctx->lock)
-{
- struct blk_mq_ctx *ctx = m->private;
-
- spin_lock(&ctx->lock);
- return seq_list_start(&ctx->rq_list, *pos);
-}
-
-static void *ctx_rq_list_next(struct seq_file *m, void *v, loff_t *pos)
-{
- struct blk_mq_ctx *ctx = m->private;
-
- return seq_list_next(v, &ctx->rq_list, pos);
+#define CTX_RQ_SEQ_OPS(name, type) \
+static void *ctx_##name##_rq_list_start(struct seq_file *m, loff_t *pos) \
+ __acquires(&ctx->lock) \
+{ \
+ struct blk_mq_ctx *ctx = m->private; \
+ \
+ spin_lock(&ctx->lock); \
+ return seq_list_start(&ctx->rq_lists[type], *pos); \
+} \
+ \
+static void *ctx_##name##_rq_list_next(struct seq_file *m, void *v, \
+ loff_t *pos) \
+{ \
+ struct blk_mq_ctx *ctx = m->private; \
+ \
+ return seq_list_next(v, &ctx->rq_lists[type], pos); \
+} \
+ \
+static void ctx_##name##_rq_list_stop(struct seq_file *m, void *v) \
+ __releases(&ctx->lock) \
+{ \
+ struct blk_mq_ctx *ctx = m->private; \
+ \
+ spin_unlock(&ctx->lock); \
+} \
+ \
+static const struct seq_operations ctx_##name##_rq_list_seq_ops = { \
+ .start = ctx_##name##_rq_list_start, \
+ .next = ctx_##name##_rq_list_next, \
+ .stop = ctx_##name##_rq_list_stop, \
+ .show = blk_mq_debugfs_rq_show, \
}
-static void ctx_rq_list_stop(struct seq_file *m, void *v)
- __releases(&ctx->lock)
-{
- struct blk_mq_ctx *ctx = m->private;
-
- spin_unlock(&ctx->lock);
-}
+CTX_RQ_SEQ_OPS(default, HCTX_TYPE_DEFAULT);
+CTX_RQ_SEQ_OPS(read, HCTX_TYPE_READ);
+CTX_RQ_SEQ_OPS(poll, HCTX_TYPE_POLL);
-static const struct seq_operations ctx_rq_list_seq_ops = {
- .start = ctx_rq_list_start,
- .next = ctx_rq_list_next,
- .stop = ctx_rq_list_stop,
- .show = blk_mq_debugfs_rq_show,
-};
static int ctx_dispatched_show(void *data, struct seq_file *m)
{
struct blk_mq_ctx *ctx = data;
@@ -798,11 +821,14 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_hctx_attrs[] = {
{"run", 0600, hctx_run_show, hctx_run_write},
{"active", 0400, hctx_active_show},
{"dispatch_busy", 0400, hctx_dispatch_busy_show},
+ {"type", 0400, hctx_type_show},
{},
};
static const struct blk_mq_debugfs_attr blk_mq_debugfs_ctx_attrs[] = {
- {"rq_list", 0400, .seq_ops = &ctx_rq_list_seq_ops},
+ {"default_rq_list", 0400, .seq_ops = &ctx_default_rq_list_seq_ops},
+ {"read_rq_list", 0400, .seq_ops = &ctx_read_rq_list_seq_ops},
+ {"poll_rq_list", 0400, .seq_ops = &ctx_poll_rq_list_seq_ops},
{"dispatched", 0600, ctx_dispatched_show, ctx_dispatched_write},
{"merged", 0600, ctx_merged_show, ctx_merged_write},
{"completed", 0600, ctx_completed_show, ctx_completed_write},
@@ -856,6 +882,15 @@ int blk_mq_debugfs_register(struct request_queue *q)
goto err;
}
+ if (q->rq_qos) {
+ struct rq_qos *rqos = q->rq_qos;
+
+ while (rqos) {
+ blk_mq_debugfs_register_rqos(rqos);
+ rqos = rqos->next;
+ }
+ }
+
return 0;
err:
@@ -978,6 +1013,50 @@ void blk_mq_debugfs_unregister_sched(struct request_queue *q)
q->sched_debugfs_dir = NULL;
}
+void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos)
+{
+ debugfs_remove_recursive(rqos->debugfs_dir);
+ rqos->debugfs_dir = NULL;
+}
+
+int blk_mq_debugfs_register_rqos(struct rq_qos *rqos)
+{
+ struct request_queue *q = rqos->q;
+ const char *dir_name = rq_qos_id_to_name(rqos->id);
+
+ if (!q->debugfs_dir)
+ return -ENOENT;
+
+ if (rqos->debugfs_dir || !rqos->ops->debugfs_attrs)
+ return 0;
+
+ if (!q->rqos_debugfs_dir) {
+ q->rqos_debugfs_dir = debugfs_create_dir("rqos",
+ q->debugfs_dir);
+ if (!q->rqos_debugfs_dir)
+ return -ENOMEM;
+ }
+
+ rqos->debugfs_dir = debugfs_create_dir(dir_name,
+ rqos->q->rqos_debugfs_dir);
+ if (!rqos->debugfs_dir)
+ return -ENOMEM;
+
+ if (!debugfs_create_files(rqos->debugfs_dir, rqos,
+ rqos->ops->debugfs_attrs))
+ goto err;
+ return 0;
+ err:
+ blk_mq_debugfs_unregister_rqos(rqos);
+ return -ENOMEM;
+}
+
+void blk_mq_debugfs_unregister_queue_rqos(struct request_queue *q)
+{
+ debugfs_remove_recursive(q->rqos_debugfs_dir);
+ q->rqos_debugfs_dir = NULL;
+}
+
int blk_mq_debugfs_register_sched_hctx(struct request_queue *q,
struct blk_mq_hw_ctx *hctx)
{
diff --git a/block/blk-mq-debugfs.h b/block/blk-mq-debugfs.h
index a9160be12be0..8c9012a578c1 100644
--- a/block/blk-mq-debugfs.h
+++ b/block/blk-mq-debugfs.h
@@ -31,6 +31,10 @@ void blk_mq_debugfs_unregister_sched(struct request_queue *q);
int blk_mq_debugfs_register_sched_hctx(struct request_queue *q,
struct blk_mq_hw_ctx *hctx);
void blk_mq_debugfs_unregister_sched_hctx(struct blk_mq_hw_ctx *hctx);
+
+int blk_mq_debugfs_register_rqos(struct rq_qos *rqos);
+void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos);
+void blk_mq_debugfs_unregister_queue_rqos(struct request_queue *q);
#else
static inline int blk_mq_debugfs_register(struct request_queue *q)
{
@@ -78,6 +82,19 @@ static inline int blk_mq_debugfs_register_sched_hctx(struct request_queue *q,
static inline void blk_mq_debugfs_unregister_sched_hctx(struct blk_mq_hw_ctx *hctx)
{
}
+
+static inline int blk_mq_debugfs_register_rqos(struct rq_qos *rqos)
+{
+ return 0;
+}
+
+static inline void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos)
+{
+}
+
+static inline void blk_mq_debugfs_unregister_queue_rqos(struct request_queue *q)
+{
+}
#endif
#ifdef CONFIG_BLK_DEBUG_FS_ZONED
diff --git a/block/blk-mq-pci.c b/block/blk-mq-pci.c
index db644ec624f5..1dce18553984 100644
--- a/block/blk-mq-pci.c
+++ b/block/blk-mq-pci.c
@@ -31,26 +31,26 @@
* that maps a queue to the CPUs that have irq affinity for the corresponding
* vector.
*/
-int blk_mq_pci_map_queues(struct blk_mq_tag_set *set, struct pci_dev *pdev,
+int blk_mq_pci_map_queues(struct blk_mq_queue_map *qmap, struct pci_dev *pdev,
int offset)
{
const struct cpumask *mask;
unsigned int queue, cpu;
- for (queue = 0; queue < set->nr_hw_queues; queue++) {
+ for (queue = 0; queue < qmap->nr_queues; queue++) {
mask = pci_irq_get_affinity(pdev, queue + offset);
if (!mask)
goto fallback;
for_each_cpu(cpu, mask)
- set->mq_map[cpu] = queue;
+ qmap->mq_map[cpu] = qmap->queue_offset + queue;
}
return 0;
fallback:
- WARN_ON_ONCE(set->nr_hw_queues > 1);
- blk_mq_clear_mq_map(set);
+ WARN_ON_ONCE(qmap->nr_queues > 1);
+ blk_mq_clear_mq_map(qmap);
return 0;
}
EXPORT_SYMBOL_GPL(blk_mq_pci_map_queues);
diff --git a/block/blk-mq-rdma.c b/block/blk-mq-rdma.c
index 996167f1de18..45030a81a1ed 100644
--- a/block/blk-mq-rdma.c
+++ b/block/blk-mq-rdma.c
@@ -29,24 +29,24 @@
* @set->nr_hw_queues, or @dev does not provide an affinity mask for a
* vector, we fallback to the naive mapping.
*/
-int blk_mq_rdma_map_queues(struct blk_mq_tag_set *set,
+int blk_mq_rdma_map_queues(struct blk_mq_queue_map *map,
struct ib_device *dev, int first_vec)
{
const struct cpumask *mask;
unsigned int queue, cpu;
- for (queue = 0; queue < set->nr_hw_queues; queue++) {
+ for (queue = 0; queue < map->nr_queues; queue++) {
mask = ib_get_vector_affinity(dev, first_vec + queue);
if (!mask)
goto fallback;
for_each_cpu(cpu, mask)
- set->mq_map[cpu] = queue;
+ map->mq_map[cpu] = map->queue_offset + queue;
}
return 0;
fallback:
- return blk_mq_map_queues(set);
+ return blk_mq_map_queues(map);
}
EXPORT_SYMBOL_GPL(blk_mq_rdma_map_queues);
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 29bfe8017a2d..140933e4a7d1 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -31,15 +31,22 @@ void blk_mq_sched_free_hctx_data(struct request_queue *q,
}
EXPORT_SYMBOL_GPL(blk_mq_sched_free_hctx_data);
-void blk_mq_sched_assign_ioc(struct request *rq, struct bio *bio)
+void blk_mq_sched_assign_ioc(struct request *rq)
{
struct request_queue *q = rq->q;
- struct io_context *ioc = rq_ioc(bio);
+ struct io_context *ioc;
struct io_cq *icq;
- spin_lock_irq(q->queue_lock);
+ /*
+ * May not have an IO context if it's a passthrough request
+ */
+ ioc = current->io_context;
+ if (!ioc)
+ return;
+
+ spin_lock_irq(&q->queue_lock);
icq = ioc_lookup_icq(ioc, q);
- spin_unlock_irq(q->queue_lock);
+ spin_unlock_irq(&q->queue_lock);
if (!icq) {
icq = ioc_create_icq(ioc, q, GFP_ATOMIC);
@@ -54,13 +61,14 @@ void blk_mq_sched_assign_ioc(struct request *rq, struct bio *bio)
* Mark a hardware queue as needing a restart. For shared queues, maintain
* a count of how many hardware queues are marked for restart.
*/
-static void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
+void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
{
if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
return;
set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
}
+EXPORT_SYMBOL_GPL(blk_mq_sched_mark_restart_hctx);
void blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx)
{
@@ -85,14 +93,13 @@ static void blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
do {
struct request *rq;
- if (e->type->ops.mq.has_work &&
- !e->type->ops.mq.has_work(hctx))
+ if (e->type->ops.has_work && !e->type->ops.has_work(hctx))
break;
if (!blk_mq_get_dispatch_budget(hctx))
break;
- rq = e->type->ops.mq.dispatch_request(hctx);
+ rq = e->type->ops.dispatch_request(hctx);
if (!rq) {
blk_mq_put_dispatch_budget(hctx);
break;
@@ -110,7 +117,7 @@ static void blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
static struct blk_mq_ctx *blk_mq_next_ctx(struct blk_mq_hw_ctx *hctx,
struct blk_mq_ctx *ctx)
{
- unsigned idx = ctx->index_hw;
+ unsigned short idx = ctx->index_hw[hctx->type];
if (++idx == hctx->nr_ctx)
idx = 0;
@@ -163,7 +170,7 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
{
struct request_queue *q = hctx->queue;
struct elevator_queue *e = q->elevator;
- const bool has_sched_dispatch = e && e->type->ops.mq.dispatch_request;
+ const bool has_sched_dispatch = e && e->type->ops.dispatch_request;
LIST_HEAD(rq_list);
/* RCU or SRCU read lock is needed before checking quiesced flag */
@@ -295,11 +302,14 @@ EXPORT_SYMBOL_GPL(blk_mq_bio_list_merge);
* too much time checking for merges.
*/
static bool blk_mq_attempt_merge(struct request_queue *q,
+ struct blk_mq_hw_ctx *hctx,
struct blk_mq_ctx *ctx, struct bio *bio)
{
+ enum hctx_type type = hctx->type;
+
lockdep_assert_held(&ctx->lock);
- if (blk_mq_bio_list_merge(q, &ctx->rq_list, bio)) {
+ if (blk_mq_bio_list_merge(q, &ctx->rq_lists[type], bio)) {
ctx->rq_merged++;
return true;
}
@@ -311,19 +321,21 @@ bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
{
struct elevator_queue *e = q->elevator;
struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
- struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
+ struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, bio->bi_opf, ctx->cpu);
bool ret = false;
+ enum hctx_type type;
- if (e && e->type->ops.mq.bio_merge) {
+ if (e && e->type->ops.bio_merge) {
blk_mq_put_ctx(ctx);
- return e->type->ops.mq.bio_merge(hctx, bio);
+ return e->type->ops.bio_merge(hctx, bio);
}
+ type = hctx->type;
if ((hctx->flags & BLK_MQ_F_SHOULD_MERGE) &&
- !list_empty_careful(&ctx->rq_list)) {
+ !list_empty_careful(&ctx->rq_lists[type])) {
/* default per sw-queue merge */
spin_lock(&ctx->lock);
- ret = blk_mq_attempt_merge(q, ctx, bio);
+ ret = blk_mq_attempt_merge(q, hctx, ctx, bio);
spin_unlock(&ctx->lock);
}
@@ -367,7 +379,7 @@ void blk_mq_sched_insert_request(struct request *rq, bool at_head,
struct request_queue *q = rq->q;
struct elevator_queue *e = q->elevator;
struct blk_mq_ctx *ctx = rq->mq_ctx;
- struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
+ struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
/* flush rq in flush machinery need to be dispatched directly */
if (!(rq->rq_flags & RQF_FLUSH_SEQ) && op_is_flush(rq->cmd_flags)) {
@@ -380,11 +392,11 @@ void blk_mq_sched_insert_request(struct request *rq, bool at_head,
if (blk_mq_sched_bypass_insert(hctx, !!e, rq))
goto run;
- if (e && e->type->ops.mq.insert_requests) {
+ if (e && e->type->ops.insert_requests) {
LIST_HEAD(list);
list_add(&rq->queuelist, &list);
- e->type->ops.mq.insert_requests(hctx, &list, at_head);
+ e->type->ops.insert_requests(hctx, &list, at_head);
} else {
spin_lock(&ctx->lock);
__blk_mq_insert_request(hctx, rq, at_head);
@@ -396,27 +408,25 @@ run:
blk_mq_run_hw_queue(hctx, async);
}
-void blk_mq_sched_insert_requests(struct request_queue *q,
+void blk_mq_sched_insert_requests(struct blk_mq_hw_ctx *hctx,
struct blk_mq_ctx *ctx,
struct list_head *list, bool run_queue_async)
{
- struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
- struct elevator_queue *e = hctx->queue->elevator;
+ struct elevator_queue *e;
- if (e && e->type->ops.mq.insert_requests)
- e->type->ops.mq.insert_requests(hctx, list, false);
+ e = hctx->queue->elevator;
+ if (e && e->type->ops.insert_requests)
+ e->type->ops.insert_requests(hctx, list, false);
else {
/*
* try to issue requests directly if the hw queue isn't
* busy in case of 'none' scheduler, and this way may save
* us one extra enqueue & dequeue to sw queue.
*/
- if (!hctx->dispatch_busy && !e && !run_queue_async) {
+ if (!hctx->dispatch_busy && !e && !run_queue_async)
blk_mq_try_issue_list_directly(hctx, list);
- if (list_empty(list))
- return;
- }
- blk_mq_insert_requests(hctx, ctx, list);
+ else
+ blk_mq_insert_requests(hctx, ctx, list);
}
blk_mq_run_hw_queue(hctx, run_queue_async);
@@ -489,15 +499,15 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
goto err;
}
- ret = e->ops.mq.init_sched(q, e);
+ ret = e->ops.init_sched(q, e);
if (ret)
goto err;
blk_mq_debugfs_register_sched(q);
queue_for_each_hw_ctx(q, hctx, i) {
- if (e->ops.mq.init_hctx) {
- ret = e->ops.mq.init_hctx(hctx, i);
+ if (e->ops.init_hctx) {
+ ret = e->ops.init_hctx(hctx, i);
if (ret) {
eq = q->elevator;
blk_mq_exit_sched(q, eq);
@@ -523,14 +533,14 @@ void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e)
queue_for_each_hw_ctx(q, hctx, i) {
blk_mq_debugfs_unregister_sched_hctx(hctx);
- if (e->type->ops.mq.exit_hctx && hctx->sched_data) {
- e->type->ops.mq.exit_hctx(hctx, i);
+ if (e->type->ops.exit_hctx && hctx->sched_data) {
+ e->type->ops.exit_hctx(hctx, i);
hctx->sched_data = NULL;
}
}
blk_mq_debugfs_unregister_sched(q);
- if (e->type->ops.mq.exit_sched)
- e->type->ops.mq.exit_sched(e);
+ if (e->type->ops.exit_sched)
+ e->type->ops.exit_sched(e);
blk_mq_sched_tags_teardown(q);
q->elevator = NULL;
}
diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h
index 8a9544203173..c7bdb52367ac 100644
--- a/block/blk-mq-sched.h
+++ b/block/blk-mq-sched.h
@@ -8,18 +8,19 @@
void blk_mq_sched_free_hctx_data(struct request_queue *q,
void (*exit)(struct blk_mq_hw_ctx *));
-void blk_mq_sched_assign_ioc(struct request *rq, struct bio *bio);
+void blk_mq_sched_assign_ioc(struct request *rq);
void blk_mq_sched_request_inserted(struct request *rq);
bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
struct request **merged_request);
bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio);
bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq);
+void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx);
void blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx);
void blk_mq_sched_insert_request(struct request *rq, bool at_head,
bool run_queue, bool async);
-void blk_mq_sched_insert_requests(struct request_queue *q,
+void blk_mq_sched_insert_requests(struct blk_mq_hw_ctx *hctx,
struct blk_mq_ctx *ctx,
struct list_head *list, bool run_queue_async);
@@ -43,8 +44,8 @@ blk_mq_sched_allow_merge(struct request_queue *q, struct request *rq,
{
struct elevator_queue *e = q->elevator;
- if (e && e->type->ops.mq.allow_merge)
- return e->type->ops.mq.allow_merge(q, rq, bio);
+ if (e && e->type->ops.allow_merge)
+ return e->type->ops.allow_merge(q, rq, bio);
return true;
}
@@ -53,8 +54,8 @@ static inline void blk_mq_sched_completed_request(struct request *rq, u64 now)
{
struct elevator_queue *e = rq->q->elevator;
- if (e && e->type->ops.mq.completed_request)
- e->type->ops.mq.completed_request(rq, now);
+ if (e && e->type->ops.completed_request)
+ e->type->ops.completed_request(rq, now);
}
static inline void blk_mq_sched_started_request(struct request *rq)
@@ -62,8 +63,8 @@ static inline void blk_mq_sched_started_request(struct request *rq)
struct request_queue *q = rq->q;
struct elevator_queue *e = q->elevator;
- if (e && e->type->ops.mq.started_request)
- e->type->ops.mq.started_request(rq);
+ if (e && e->type->ops.started_request)
+ e->type->ops.started_request(rq);
}
static inline void blk_mq_sched_requeue_request(struct request *rq)
@@ -71,16 +72,16 @@ static inline void blk_mq_sched_requeue_request(struct request *rq)
struct request_queue *q = rq->q;
struct elevator_queue *e = q->elevator;
- if (e && e->type->ops.mq.requeue_request)
- e->type->ops.mq.requeue_request(rq);
+ if (e && e->type->ops.requeue_request)
+ e->type->ops.requeue_request(rq);
}
static inline bool blk_mq_sched_has_work(struct blk_mq_hw_ctx *hctx)
{
struct elevator_queue *e = hctx->queue->elevator;
- if (e && e->type->ops.mq.has_work)
- return e->type->ops.mq.has_work(hctx);
+ if (e && e->type->ops.has_work)
+ return e->type->ops.has_work(hctx);
return false;
}
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index aafb44224c89..3f9c3f4ac44c 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -15,6 +15,18 @@
static void blk_mq_sysfs_release(struct kobject *kobj)
{
+ struct blk_mq_ctxs *ctxs = container_of(kobj, struct blk_mq_ctxs, kobj);
+
+ free_percpu(ctxs->queue_ctx);
+ kfree(ctxs);
+}
+
+static void blk_mq_ctx_sysfs_release(struct kobject *kobj)
+{
+ struct blk_mq_ctx *ctx = container_of(kobj, struct blk_mq_ctx, kobj);
+
+ /* ctx->ctxs won't be released until all ctx are freed */
+ kobject_put(&ctx->ctxs->kobj);
}
static void blk_mq_hw_sysfs_release(struct kobject *kobj)
@@ -203,7 +215,7 @@ static struct kobj_type blk_mq_ktype = {
static struct kobj_type blk_mq_ctx_ktype = {
.sysfs_ops = &blk_mq_sysfs_ops,
.default_attrs = default_ctx_attrs,
- .release = blk_mq_sysfs_release,
+ .release = blk_mq_ctx_sysfs_release,
};
static struct kobj_type blk_mq_hw_ktype = {
@@ -235,7 +247,7 @@ static int blk_mq_register_hctx(struct blk_mq_hw_ctx *hctx)
if (!hctx->nr_ctx)
return 0;
- ret = kobject_add(&hctx->kobj, &q->mq_kobj, "%u", hctx->queue_num);
+ ret = kobject_add(&hctx->kobj, q->mq_kobj, "%u", hctx->queue_num);
if (ret)
return ret;
@@ -258,8 +270,8 @@ void blk_mq_unregister_dev(struct device *dev, struct request_queue *q)
queue_for_each_hw_ctx(q, hctx, i)
blk_mq_unregister_hctx(hctx);
- kobject_uevent(&q->mq_kobj, KOBJ_REMOVE);
- kobject_del(&q->mq_kobj);
+ kobject_uevent(q->mq_kobj, KOBJ_REMOVE);
+ kobject_del(q->mq_kobj);
kobject_put(&dev->kobj);
q->mq_sysfs_init_done = false;
@@ -279,7 +291,7 @@ void blk_mq_sysfs_deinit(struct request_queue *q)
ctx = per_cpu_ptr(q->queue_ctx, cpu);
kobject_put(&ctx->kobj);
}
- kobject_put(&q->mq_kobj);
+ kobject_put(q->mq_kobj);
}
void blk_mq_sysfs_init(struct request_queue *q)
@@ -287,10 +299,12 @@ void blk_mq_sysfs_init(struct request_queue *q)
struct blk_mq_ctx *ctx;
int cpu;
- kobject_init(&q->mq_kobj, &blk_mq_ktype);
+ kobject_init(q->mq_kobj, &blk_mq_ktype);
for_each_possible_cpu(cpu) {
ctx = per_cpu_ptr(q->queue_ctx, cpu);
+
+ kobject_get(q->mq_kobj);
kobject_init(&ctx->kobj, &blk_mq_ctx_ktype);
}
}
@@ -303,11 +317,11 @@ int __blk_mq_register_dev(struct device *dev, struct request_queue *q)
WARN_ON_ONCE(!q->kobj.parent);
lockdep_assert_held(&q->sysfs_lock);
- ret = kobject_add(&q->mq_kobj, kobject_get(&dev->kobj), "%s", "mq");
+ ret = kobject_add(q->mq_kobj, kobject_get(&dev->kobj), "%s", "mq");
if (ret < 0)
goto out;
- kobject_uevent(&q->mq_kobj, KOBJ_ADD);
+ kobject_uevent(q->mq_kobj, KOBJ_ADD);
queue_for_each_hw_ctx(q, hctx, i) {
ret = blk_mq_register_hctx(hctx);
@@ -324,8 +338,8 @@ unreg:
while (--i >= 0)
blk_mq_unregister_hctx(q->queue_hw_ctx[i]);
- kobject_uevent(&q->mq_kobj, KOBJ_REMOVE);
- kobject_del(&q->mq_kobj);
+ kobject_uevent(q->mq_kobj, KOBJ_REMOVE);
+ kobject_del(q->mq_kobj);
kobject_put(&dev->kobj);
return ret;
}
@@ -340,7 +354,6 @@ int blk_mq_register_dev(struct device *dev, struct request_queue *q)
return ret;
}
-EXPORT_SYMBOL_GPL(blk_mq_register_dev);
void blk_mq_sysfs_unregister(struct request_queue *q)
{
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index cfda95b85d34..2089c6c62f44 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -110,7 +110,7 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
struct sbitmap_queue *bt;
struct sbq_wait_state *ws;
- DEFINE_WAIT(wait);
+ DEFINE_SBQ_WAIT(wait);
unsigned int tag_offset;
bool drop_ctx;
int tag;
@@ -154,8 +154,7 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
if (tag != -1)
break;
- prepare_to_wait_exclusive(&ws->wait, &wait,
- TASK_UNINTERRUPTIBLE);
+ sbitmap_prepare_to_wait(bt, ws, &wait, TASK_UNINTERRUPTIBLE);
tag = __blk_mq_get_tag(data, bt);
if (tag != -1)
@@ -167,16 +166,17 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
bt_prev = bt;
io_schedule();
+ sbitmap_finish_wait(bt, ws, &wait);
+
data->ctx = blk_mq_get_ctx(data->q);
- data->hctx = blk_mq_map_queue(data->q, data->ctx->cpu);
+ data->hctx = blk_mq_map_queue(data->q, data->cmd_flags,
+ data->ctx->cpu);
tags = blk_mq_tags_from_data(data);
if (data->flags & BLK_MQ_REQ_RESERVED)
bt = &tags->breserved_tags;
else
bt = &tags->bitmap_tags;
- finish_wait(&ws->wait, &wait);
-
/*
* If destination hw queue is changed, fake wake up on
* previous queue for compensating the wake up miss, so
@@ -191,7 +191,7 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
if (drop_ctx && data->ctx)
blk_mq_put_ctx(data->ctx);
- finish_wait(&ws->wait, &wait);
+ sbitmap_finish_wait(bt, ws, &wait);
found_tag:
return tag + tag_offset;
@@ -235,7 +235,7 @@ static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
* test and set the bit before assigning ->rqs[].
*/
if (rq && rq->q == hctx->queue)
- iter_data->fn(hctx, rq, iter_data->data, reserved);
+ return iter_data->fn(hctx, rq, iter_data->data, reserved);
return true;
}
@@ -247,7 +247,8 @@ static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
* @fn: Pointer to the function that will be called for each request
* associated with @hctx that has been assigned a driver tag.
* @fn will be called as follows: @fn(@hctx, rq, @data, @reserved)
- * where rq is a pointer to a request.
+ * where rq is a pointer to a request. Return true to continue
+ * iterating tags, false to stop.
* @data: Will be passed as third argument to @fn.
* @reserved: Indicates whether @bt is the breserved_tags member or the
* bitmap_tags member of struct blk_mq_tags.
@@ -288,7 +289,7 @@ static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
*/
rq = tags->rqs[bitnr];
if (rq && blk_mq_request_started(rq))
- iter_data->fn(rq, iter_data->data, reserved);
+ return iter_data->fn(rq, iter_data->data, reserved);
return true;
}
@@ -300,7 +301,8 @@ static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
* or the bitmap_tags member of struct blk_mq_tags.
* @fn: Pointer to the function that will be called for each started
* request. @fn will be called as follows: @fn(rq, @data,
- * @reserved) where rq is a pointer to a request.
+ * @reserved) where rq is a pointer to a request. Return true
+ * to continue iterating tags, false to stop.
* @data: Will be passed as second argument to @fn.
* @reserved: Indicates whether @bt is the breserved_tags member or the
* bitmap_tags member of struct blk_mq_tags.
@@ -325,7 +327,8 @@ static void bt_tags_for_each(struct blk_mq_tags *tags, struct sbitmap_queue *bt,
* @fn: Pointer to the function that will be called for each started
* request. @fn will be called as follows: @fn(rq, @priv,
* reserved) where rq is a pointer to a request. 'reserved'
- * indicates whether or not @rq is a reserved request.
+ * indicates whether or not @rq is a reserved request. Return
+ * true to continue iterating tags, false to stop.
* @priv: Will be passed as second argument to @fn.
*/
static void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags,
@@ -342,7 +345,8 @@ static void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags,
* @fn: Pointer to the function that will be called for each started
* request. @fn will be called as follows: @fn(rq, @priv,
* reserved) where rq is a pointer to a request. 'reserved'
- * indicates whether or not @rq is a reserved request.
+ * indicates whether or not @rq is a reserved request. Return
+ * true to continue iterating tags, false to stop.
* @priv: Will be passed as second argument to @fn.
*/
void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
@@ -526,16 +530,7 @@ int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx,
*/
u32 blk_mq_unique_tag(struct request *rq)
{
- struct request_queue *q = rq->q;
- struct blk_mq_hw_ctx *hctx;
- int hwq = 0;
-
- if (q->mq_ops) {
- hctx = blk_mq_map_queue(q, rq->mq_ctx->cpu);
- hwq = hctx->queue_num;
- }
-
- return (hwq << BLK_MQ_UNIQUE_TAG_BITS) |
+ return (rq->mq_hctx->queue_num << BLK_MQ_UNIQUE_TAG_BITS) |
(rq->tag & BLK_MQ_UNIQUE_TAG_MASK);
}
EXPORT_SYMBOL(blk_mq_unique_tag);
diff --git a/block/blk-mq-virtio.c b/block/blk-mq-virtio.c
index c3afbca11299..370827163835 100644
--- a/block/blk-mq-virtio.c
+++ b/block/blk-mq-virtio.c
@@ -29,7 +29,7 @@
* that maps a queue to the CPUs that have irq affinity for the corresponding
* vector.
*/
-int blk_mq_virtio_map_queues(struct blk_mq_tag_set *set,
+int blk_mq_virtio_map_queues(struct blk_mq_queue_map *qmap,
struct virtio_device *vdev, int first_vec)
{
const struct cpumask *mask;
@@ -38,17 +38,17 @@ int blk_mq_virtio_map_queues(struct blk_mq_tag_set *set,
if (!vdev->config->get_vq_affinity)
goto fallback;
- for (queue = 0; queue < set->nr_hw_queues; queue++) {
+ for (queue = 0; queue < qmap->nr_queues; queue++) {
mask = vdev->config->get_vq_affinity(vdev, first_vec + queue);
if (!mask)
goto fallback;
for_each_cpu(cpu, mask)
- set->mq_map[cpu] = queue;
+ qmap->mq_map[cpu] = qmap->queue_offset + queue;
}
return 0;
fallback:
- return blk_mq_map_queues(set);
+ return blk_mq_map_queues(qmap);
}
EXPORT_SYMBOL_GPL(blk_mq_virtio_map_queues);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 6a7566244de3..3ba37b9e15e9 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -38,7 +38,6 @@
#include "blk-mq-sched.h"
#include "blk-rq-qos.h"
-static bool blk_mq_poll(struct request_queue *q, blk_qc_t cookie);
static void blk_mq_poll_stats_start(struct request_queue *q);
static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb);
@@ -75,14 +74,18 @@ static bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx)
static void blk_mq_hctx_mark_pending(struct blk_mq_hw_ctx *hctx,
struct blk_mq_ctx *ctx)
{
- if (!sbitmap_test_bit(&hctx->ctx_map, ctx->index_hw))
- sbitmap_set_bit(&hctx->ctx_map, ctx->index_hw);
+ const int bit = ctx->index_hw[hctx->type];
+
+ if (!sbitmap_test_bit(&hctx->ctx_map, bit))
+ sbitmap_set_bit(&hctx->ctx_map, bit);
}
static void blk_mq_hctx_clear_pending(struct blk_mq_hw_ctx *hctx,
struct blk_mq_ctx *ctx)
{
- sbitmap_clear_bit(&hctx->ctx_map, ctx->index_hw);
+ const int bit = ctx->index_hw[hctx->type];
+
+ sbitmap_clear_bit(&hctx->ctx_map, bit);
}
struct mq_inflight {
@@ -90,33 +93,33 @@ struct mq_inflight {
unsigned int *inflight;
};
-static void blk_mq_check_inflight(struct blk_mq_hw_ctx *hctx,
+static bool blk_mq_check_inflight(struct blk_mq_hw_ctx *hctx,
struct request *rq, void *priv,
bool reserved)
{
struct mq_inflight *mi = priv;
/*
- * index[0] counts the specific partition that was asked for. index[1]
- * counts the ones that are active on the whole device, so increment
- * that if mi->part is indeed a partition, and not a whole device.
+ * index[0] counts the specific partition that was asked for.
*/
if (rq->part == mi->part)
mi->inflight[0]++;
- if (mi->part->partno)
- mi->inflight[1]++;
+
+ return true;
}
-void blk_mq_in_flight(struct request_queue *q, struct hd_struct *part,
- unsigned int inflight[2])
+unsigned int blk_mq_in_flight(struct request_queue *q, struct hd_struct *part)
{
+ unsigned inflight[2];
struct mq_inflight mi = { .part = part, .inflight = inflight, };
inflight[0] = inflight[1] = 0;
blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight, &mi);
+
+ return inflight[0];
}
-static void blk_mq_check_inflight_rw(struct blk_mq_hw_ctx *hctx,
+static bool blk_mq_check_inflight_rw(struct blk_mq_hw_ctx *hctx,
struct request *rq, void *priv,
bool reserved)
{
@@ -124,6 +127,8 @@ static void blk_mq_check_inflight_rw(struct blk_mq_hw_ctx *hctx,
if (rq->part == mi->part)
mi->inflight[rq_data_dir(rq)]++;
+
+ return true;
}
void blk_mq_in_flight_rw(struct request_queue *q, struct hd_struct *part,
@@ -142,7 +147,7 @@ void blk_freeze_queue_start(struct request_queue *q)
freeze_depth = atomic_inc_return(&q->mq_freeze_depth);
if (freeze_depth == 1) {
percpu_ref_kill(&q->q_usage_counter);
- if (q->mq_ops)
+ if (queue_is_mq(q))
blk_mq_run_hw_queues(q, false);
}
}
@@ -177,8 +182,6 @@ void blk_freeze_queue(struct request_queue *q)
* exported to drivers as the only user for unfreeze is blk_mq.
*/
blk_freeze_queue_start(q);
- if (!q->mq_ops)
- blk_drain_queue(q);
blk_mq_freeze_queue_wait(q);
}
@@ -275,6 +278,15 @@ bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx)
}
EXPORT_SYMBOL(blk_mq_can_queue);
+/*
+ * Only need start/end time stamping if we have stats enabled, or using
+ * an IO scheduler.
+ */
+static inline bool blk_mq_need_time_stamp(struct request *rq)
+{
+ return (rq->rq_flags & RQF_IO_STAT) || rq->q->elevator;
+}
+
static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
unsigned int tag, unsigned int op)
{
@@ -298,8 +310,8 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
/* csd/requeue_work/fifo_time is initialized before use */
rq->q = data->q;
rq->mq_ctx = data->ctx;
+ rq->mq_hctx = data->hctx;
rq->rq_flags = rq_flags;
- rq->cpu = -1;
rq->cmd_flags = op;
if (data->flags & BLK_MQ_REQ_PREEMPT)
rq->rq_flags |= RQF_PREEMPT;
@@ -310,7 +322,10 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
RB_CLEAR_NODE(&rq->rb_node);
rq->rq_disk = NULL;
rq->part = NULL;
- rq->start_time_ns = ktime_get_ns();
+ if (blk_mq_need_time_stamp(rq))
+ rq->start_time_ns = ktime_get_ns();
+ else
+ rq->start_time_ns = 0;
rq->io_start_time_ns = 0;
rq->nr_phys_segments = 0;
#if defined(CONFIG_BLK_DEV_INTEGRITY)
@@ -319,27 +334,22 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
rq->special = NULL;
/* tag was already set */
rq->extra_len = 0;
- rq->__deadline = 0;
+ WRITE_ONCE(rq->deadline, 0);
- INIT_LIST_HEAD(&rq->timeout_list);
rq->timeout = 0;
rq->end_io = NULL;
rq->end_io_data = NULL;
rq->next_rq = NULL;
-#ifdef CONFIG_BLK_CGROUP
- rq->rl = NULL;
-#endif
-
data->ctx->rq_dispatched[op_is_sync(op)]++;
refcount_set(&rq->ref, 1);
return rq;
}
static struct request *blk_mq_get_request(struct request_queue *q,
- struct bio *bio, unsigned int op,
- struct blk_mq_alloc_data *data)
+ struct bio *bio,
+ struct blk_mq_alloc_data *data)
{
struct elevator_queue *e = q->elevator;
struct request *rq;
@@ -353,8 +363,9 @@ static struct request *blk_mq_get_request(struct request_queue *q,
put_ctx_on_error = true;
}
if (likely(!data->hctx))
- data->hctx = blk_mq_map_queue(q, data->ctx->cpu);
- if (op & REQ_NOWAIT)
+ data->hctx = blk_mq_map_queue(q, data->cmd_flags,
+ data->ctx->cpu);
+ if (data->cmd_flags & REQ_NOWAIT)
data->flags |= BLK_MQ_REQ_NOWAIT;
if (e) {
@@ -365,9 +376,10 @@ static struct request *blk_mq_get_request(struct request_queue *q,
* dispatch list. Don't include reserved tags in the
* limiting, as it isn't useful.
*/
- if (!op_is_flush(op) && e->type->ops.mq.limit_depth &&
+ if (!op_is_flush(data->cmd_flags) &&
+ e->type->ops.limit_depth &&
!(data->flags & BLK_MQ_REQ_RESERVED))
- e->type->ops.mq.limit_depth(op, data);
+ e->type->ops.limit_depth(data->cmd_flags, data);
} else {
blk_mq_tag_busy(data->hctx);
}
@@ -382,14 +394,14 @@ static struct request *blk_mq_get_request(struct request_queue *q,
return NULL;
}
- rq = blk_mq_rq_ctx_init(data, tag, op);
- if (!op_is_flush(op)) {
+ rq = blk_mq_rq_ctx_init(data, tag, data->cmd_flags);
+ if (!op_is_flush(data->cmd_flags)) {
rq->elv.icq = NULL;
- if (e && e->type->ops.mq.prepare_request) {
- if (e->type->icq_cache && rq_ioc(bio))
- blk_mq_sched_assign_ioc(rq, bio);
+ if (e && e->type->ops.prepare_request) {
+ if (e->type->icq_cache)
+ blk_mq_sched_assign_ioc(rq);
- e->type->ops.mq.prepare_request(rq, bio);
+ e->type->ops.prepare_request(rq, bio);
rq->rq_flags |= RQF_ELVPRIV;
}
}
@@ -400,7 +412,7 @@ static struct request *blk_mq_get_request(struct request_queue *q,
struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op,
blk_mq_req_flags_t flags)
{
- struct blk_mq_alloc_data alloc_data = { .flags = flags };
+ struct blk_mq_alloc_data alloc_data = { .flags = flags, .cmd_flags = op };
struct request *rq;
int ret;
@@ -408,7 +420,7 @@ struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op,
if (ret)
return ERR_PTR(ret);
- rq = blk_mq_get_request(q, NULL, op, &alloc_data);
+ rq = blk_mq_get_request(q, NULL, &alloc_data);
blk_queue_exit(q);
if (!rq)
@@ -426,7 +438,7 @@ EXPORT_SYMBOL(blk_mq_alloc_request);
struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
unsigned int op, blk_mq_req_flags_t flags, unsigned int hctx_idx)
{
- struct blk_mq_alloc_data alloc_data = { .flags = flags };
+ struct blk_mq_alloc_data alloc_data = { .flags = flags, .cmd_flags = op };
struct request *rq;
unsigned int cpu;
int ret;
@@ -459,7 +471,7 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
cpu = cpumask_first_and(alloc_data.hctx->cpumask, cpu_online_mask);
alloc_data.ctx = __blk_mq_get_ctx(q, cpu);
- rq = blk_mq_get_request(q, NULL, op, &alloc_data);
+ rq = blk_mq_get_request(q, NULL, &alloc_data);
blk_queue_exit(q);
if (!rq)
@@ -473,10 +485,11 @@ static void __blk_mq_free_request(struct request *rq)
{
struct request_queue *q = rq->q;
struct blk_mq_ctx *ctx = rq->mq_ctx;
- struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
+ struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
const int sched_tag = rq->internal_tag;
blk_pm_mark_last_busy(rq);
+ rq->mq_hctx = NULL;
if (rq->tag != -1)
blk_mq_put_tag(hctx, hctx->tags, ctx, rq->tag);
if (sched_tag != -1)
@@ -490,11 +503,11 @@ void blk_mq_free_request(struct request *rq)
struct request_queue *q = rq->q;
struct elevator_queue *e = q->elevator;
struct blk_mq_ctx *ctx = rq->mq_ctx;
- struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
+ struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
if (rq->rq_flags & RQF_ELVPRIV) {
- if (e && e->type->ops.mq.finish_request)
- e->type->ops.mq.finish_request(rq);
+ if (e && e->type->ops.finish_request)
+ e->type->ops.finish_request(rq);
if (rq->elv.icq) {
put_io_context(rq->elv.icq->ioc);
rq->elv.icq = NULL;
@@ -510,9 +523,6 @@ void blk_mq_free_request(struct request *rq)
rq_qos_done(q, rq);
- if (blk_rq_rl(rq))
- blk_put_rl(blk_rq_rl(rq));
-
WRITE_ONCE(rq->state, MQ_RQ_IDLE);
if (refcount_dec_and_test(&rq->ref))
__blk_mq_free_request(rq);
@@ -521,7 +531,10 @@ EXPORT_SYMBOL_GPL(blk_mq_free_request);
inline void __blk_mq_end_request(struct request *rq, blk_status_t error)
{
- u64 now = ktime_get_ns();
+ u64 now = 0;
+
+ if (blk_mq_need_time_stamp(rq))
+ now = ktime_get_ns();
if (rq->rq_flags & RQF_STATS) {
blk_mq_poll_stats_start(rq->q);
@@ -555,19 +568,19 @@ EXPORT_SYMBOL(blk_mq_end_request);
static void __blk_mq_complete_request_remote(void *data)
{
struct request *rq = data;
+ struct request_queue *q = rq->q;
- rq->q->softirq_done_fn(rq);
+ q->mq_ops->complete(rq);
}
static void __blk_mq_complete_request(struct request *rq)
{
struct blk_mq_ctx *ctx = rq->mq_ctx;
+ struct request_queue *q = rq->q;
bool shared = false;
int cpu;
- if (!blk_mq_mark_complete(rq))
- return;
-
+ WRITE_ONCE(rq->state, MQ_RQ_COMPLETE);
/*
* Most of single queue controllers, there is only one irq vector
* for handling IO completion, and the only irq's affinity is set
@@ -577,18 +590,23 @@ static void __blk_mq_complete_request(struct request *rq)
* So complete IO reqeust in softirq context in case of single queue
* for not degrading IO performance by irqsoff latency.
*/
- if (rq->q->nr_hw_queues == 1) {
+ if (q->nr_hw_queues == 1) {
__blk_complete_request(rq);
return;
}
- if (!test_bit(QUEUE_FLAG_SAME_COMP, &rq->q->queue_flags)) {
- rq->q->softirq_done_fn(rq);
+ /*
+ * For a polled request, always complete locallly, it's pointless
+ * to redirect the completion.
+ */
+ if ((rq->cmd_flags & REQ_HIPRI) ||
+ !test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags)) {
+ q->mq_ops->complete(rq);
return;
}
cpu = get_cpu();
- if (!test_bit(QUEUE_FLAG_SAME_FORCE, &rq->q->queue_flags))
+ if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags))
shared = cpus_share_cache(cpu, ctx->cpu);
if (cpu != ctx->cpu && !shared && cpu_online(ctx->cpu)) {
@@ -597,7 +615,7 @@ static void __blk_mq_complete_request(struct request *rq)
rq->csd.flags = 0;
smp_call_function_single_async(ctx->cpu, &rq->csd);
} else {
- rq->q->softirq_done_fn(rq);
+ q->mq_ops->complete(rq);
}
put_cpu();
}
@@ -630,11 +648,12 @@ static void hctx_lock(struct blk_mq_hw_ctx *hctx, int *srcu_idx)
* Ends all I/O on a request. It does not handle partial completions.
* The actual completion happens out-of-order, through a IPI handler.
**/
-void blk_mq_complete_request(struct request *rq)
+bool blk_mq_complete_request(struct request *rq)
{
if (unlikely(blk_should_fake_timeout(rq->q)))
- return;
+ return false;
__blk_mq_complete_request(rq);
+ return true;
}
EXPORT_SYMBOL(blk_mq_complete_request);
@@ -701,7 +720,7 @@ void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list)
/* this request will be re-inserted to io scheduler queue */
blk_mq_sched_requeue_request(rq);
- BUG_ON(blk_queued_rq(rq));
+ BUG_ON(!list_empty(&rq->queuelist));
blk_mq_add_to_requeue_list(rq, true, kick_requeue_list);
}
EXPORT_SYMBOL(blk_mq_requeue_request);
@@ -786,6 +805,32 @@ struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
}
EXPORT_SYMBOL(blk_mq_tag_to_rq);
+static bool blk_mq_rq_inflight(struct blk_mq_hw_ctx *hctx, struct request *rq,
+ void *priv, bool reserved)
+{
+ /*
+ * If we find a request that is inflight and the queue matches,
+ * we know the queue is busy. Return false to stop the iteration.
+ */
+ if (rq->state == MQ_RQ_IN_FLIGHT && rq->q == hctx->queue) {
+ bool *busy = priv;
+
+ *busy = true;
+ return false;
+ }
+
+ return true;
+}
+
+bool blk_mq_queue_inflight(struct request_queue *q)
+{
+ bool busy = false;
+
+ blk_mq_queue_tag_busy_iter(q, blk_mq_rq_inflight, &busy);
+ return busy;
+}
+EXPORT_SYMBOL_GPL(blk_mq_queue_inflight);
+
static void blk_mq_rq_timed_out(struct request *req, bool reserved)
{
req->rq_flags |= RQF_TIMED_OUT;
@@ -810,7 +855,7 @@ static bool blk_mq_req_expired(struct request *rq, unsigned long *next)
if (rq->rq_flags & RQF_TIMED_OUT)
return false;
- deadline = blk_rq_deadline(rq);
+ deadline = READ_ONCE(rq->deadline);
if (time_after_eq(jiffies, deadline))
return true;
@@ -821,7 +866,7 @@ static bool blk_mq_req_expired(struct request *rq, unsigned long *next)
return false;
}
-static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,
+static bool blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,
struct request *rq, void *priv, bool reserved)
{
unsigned long *next = priv;
@@ -831,7 +876,7 @@ static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,
* so we're not unnecessarilly synchronizing across CPUs.
*/
if (!blk_mq_req_expired(rq, next))
- return;
+ return true;
/*
* We have reason to believe the request may be expired. Take a
@@ -843,7 +888,7 @@ static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,
* timeout handler to posting a natural completion.
*/
if (!refcount_inc_not_zero(&rq->ref))
- return;
+ return true;
/*
* The request is now locked and cannot be reallocated underneath the
@@ -855,6 +900,8 @@ static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,
blk_mq_rq_timed_out(rq, reserved);
if (refcount_dec_and_test(&rq->ref))
__blk_mq_free_request(rq);
+
+ return true;
}
static void blk_mq_timeout_work(struct work_struct *work)
@@ -911,9 +958,10 @@ static bool flush_busy_ctx(struct sbitmap *sb, unsigned int bitnr, void *data)
struct flush_busy_ctx_data *flush_data = data;
struct blk_mq_hw_ctx *hctx = flush_data->hctx;
struct blk_mq_ctx *ctx = hctx->ctxs[bitnr];
+ enum hctx_type type = hctx->type;
spin_lock(&ctx->lock);
- list_splice_tail_init(&ctx->rq_list, flush_data->list);
+ list_splice_tail_init(&ctx->rq_lists[type], flush_data->list);
sbitmap_clear_bit(sb, bitnr);
spin_unlock(&ctx->lock);
return true;
@@ -945,12 +993,13 @@ static bool dispatch_rq_from_ctx(struct sbitmap *sb, unsigned int bitnr,
struct dispatch_rq_data *dispatch_data = data;
struct blk_mq_hw_ctx *hctx = dispatch_data->hctx;
struct blk_mq_ctx *ctx = hctx->ctxs[bitnr];
+ enum hctx_type type = hctx->type;
spin_lock(&ctx->lock);
- if (!list_empty(&ctx->rq_list)) {
- dispatch_data->rq = list_entry_rq(ctx->rq_list.next);
+ if (!list_empty(&ctx->rq_lists[type])) {
+ dispatch_data->rq = list_entry_rq(ctx->rq_lists[type].next);
list_del_init(&dispatch_data->rq->queuelist);
- if (list_empty(&ctx->rq_list))
+ if (list_empty(&ctx->rq_lists[type]))
sbitmap_clear_bit(sb, bitnr);
}
spin_unlock(&ctx->lock);
@@ -961,7 +1010,7 @@ static bool dispatch_rq_from_ctx(struct sbitmap *sb, unsigned int bitnr,
struct request *blk_mq_dequeue_from_ctx(struct blk_mq_hw_ctx *hctx,
struct blk_mq_ctx *start)
{
- unsigned off = start ? start->index_hw : 0;
+ unsigned off = start ? start->index_hw[hctx->type] : 0;
struct dispatch_rq_data data = {
.hctx = hctx,
.rq = NULL,
@@ -985,8 +1034,9 @@ bool blk_mq_get_driver_tag(struct request *rq)
{
struct blk_mq_alloc_data data = {
.q = rq->q,
- .hctx = blk_mq_map_queue(rq->q, rq->mq_ctx->cpu),
+ .hctx = rq->mq_hctx,
.flags = BLK_MQ_REQ_NOWAIT,
+ .cmd_flags = rq->cmd_flags,
};
bool shared;
@@ -1150,7 +1200,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
rq = list_first_entry(list, struct request, queuelist);
- hctx = blk_mq_map_queue(rq->q, rq->mq_ctx->cpu);
+ hctx = rq->mq_hctx;
if (!got_budget && !blk_mq_get_dispatch_budget(hctx))
break;
@@ -1223,6 +1273,14 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
if (!list_empty(list)) {
bool needs_restart;
+ /*
+ * If we didn't flush the entire list, we could have told
+ * the driver there was more coming, but that turned out to
+ * be a lie.
+ */
+ if (q->mq_ops->commit_rqs)
+ q->mq_ops->commit_rqs(hctx);
+
spin_lock(&hctx->lock);
list_splice_init(list, &hctx->dispatch);
spin_unlock(&hctx->lock);
@@ -1552,15 +1610,16 @@ static inline void __blk_mq_insert_req_list(struct blk_mq_hw_ctx *hctx,
bool at_head)
{
struct blk_mq_ctx *ctx = rq->mq_ctx;
+ enum hctx_type type = hctx->type;
lockdep_assert_held(&ctx->lock);
trace_block_rq_insert(hctx->queue, rq);
if (at_head)
- list_add(&rq->queuelist, &ctx->rq_list);
+ list_add(&rq->queuelist, &ctx->rq_lists[type]);
else
- list_add_tail(&rq->queuelist, &ctx->rq_list);
+ list_add_tail(&rq->queuelist, &ctx->rq_lists[type]);
}
void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
@@ -1580,8 +1639,7 @@ void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
*/
void blk_mq_request_bypass_insert(struct request *rq, bool run_queue)
{
- struct blk_mq_ctx *ctx = rq->mq_ctx;
- struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(rq->q, ctx->cpu);
+ struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
spin_lock(&hctx->lock);
list_add_tail(&rq->queuelist, &hctx->dispatch);
@@ -1596,6 +1654,7 @@ void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
{
struct request *rq;
+ enum hctx_type type = hctx->type;
/*
* preemption doesn't flush plug list, so it's possible ctx->cpu is
@@ -1607,35 +1666,46 @@ void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
}
spin_lock(&ctx->lock);
- list_splice_tail_init(list, &ctx->rq_list);
+ list_splice_tail_init(list, &ctx->rq_lists[type]);
blk_mq_hctx_mark_pending(hctx, ctx);
spin_unlock(&ctx->lock);
}
-static int plug_ctx_cmp(void *priv, struct list_head *a, struct list_head *b)
+static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b)
{
struct request *rqa = container_of(a, struct request, queuelist);
struct request *rqb = container_of(b, struct request, queuelist);
- return !(rqa->mq_ctx < rqb->mq_ctx ||
- (rqa->mq_ctx == rqb->mq_ctx &&
- blk_rq_pos(rqa) < blk_rq_pos(rqb)));
+ if (rqa->mq_ctx < rqb->mq_ctx)
+ return -1;
+ else if (rqa->mq_ctx > rqb->mq_ctx)
+ return 1;
+ else if (rqa->mq_hctx < rqb->mq_hctx)
+ return -1;
+ else if (rqa->mq_hctx > rqb->mq_hctx)
+ return 1;
+
+ return blk_rq_pos(rqa) > blk_rq_pos(rqb);
}
void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
{
+ struct blk_mq_hw_ctx *this_hctx;
struct blk_mq_ctx *this_ctx;
struct request_queue *this_q;
struct request *rq;
LIST_HEAD(list);
- LIST_HEAD(ctx_list);
+ LIST_HEAD(rq_list);
unsigned int depth;
list_splice_init(&plug->mq_list, &list);
+ plug->rq_count = 0;
- list_sort(NULL, &list, plug_ctx_cmp);
+ if (plug->rq_count > 2 && plug->multiple_queues)
+ list_sort(NULL, &list, plug_rq_cmp);
this_q = NULL;
+ this_hctx = NULL;
this_ctx = NULL;
depth = 0;
@@ -1643,30 +1713,31 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
rq = list_entry_rq(list.next);
list_del_init(&rq->queuelist);
BUG_ON(!rq->q);
- if (rq->mq_ctx != this_ctx) {
- if (this_ctx) {
+ if (rq->mq_hctx != this_hctx || rq->mq_ctx != this_ctx) {
+ if (this_hctx) {
trace_block_unplug(this_q, depth, !from_schedule);
- blk_mq_sched_insert_requests(this_q, this_ctx,
- &ctx_list,
+ blk_mq_sched_insert_requests(this_hctx, this_ctx,
+ &rq_list,
from_schedule);
}
- this_ctx = rq->mq_ctx;
this_q = rq->q;
+ this_ctx = rq->mq_ctx;
+ this_hctx = rq->mq_hctx;
depth = 0;
}
depth++;
- list_add_tail(&rq->queuelist, &ctx_list);
+ list_add_tail(&rq->queuelist, &rq_list);
}
/*
- * If 'this_ctx' is set, we know we have entries to complete
- * on 'ctx_list'. Do those.
+ * If 'this_hctx' is set, we know we have entries to complete
+ * on 'rq_list'. Do those.
*/
- if (this_ctx) {
+ if (this_hctx) {
trace_block_unplug(this_q, depth, !from_schedule);
- blk_mq_sched_insert_requests(this_q, this_ctx, &ctx_list,
+ blk_mq_sched_insert_requests(this_hctx, this_ctx, &rq_list,
from_schedule);
}
}
@@ -1675,27 +1746,17 @@ static void blk_mq_bio_to_request(struct request *rq, struct bio *bio)
{
blk_init_request_from_bio(rq, bio);
- blk_rq_set_rl(rq, blk_get_rl(rq->q, bio));
-
blk_account_io_start(rq, true);
}
-static blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx, struct request *rq)
-{
- if (rq->tag != -1)
- return blk_tag_to_qc_t(rq->tag, hctx->queue_num, false);
-
- return blk_tag_to_qc_t(rq->internal_tag, hctx->queue_num, true);
-}
-
static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx,
struct request *rq,
- blk_qc_t *cookie)
+ blk_qc_t *cookie, bool last)
{
struct request_queue *q = rq->q;
struct blk_mq_queue_data bd = {
.rq = rq,
- .last = true,
+ .last = last,
};
blk_qc_t new_cookie;
blk_status_t ret;
@@ -1727,77 +1788,74 @@ static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx,
return ret;
}
-static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
+blk_status_t blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
struct request *rq,
blk_qc_t *cookie,
- bool bypass_insert)
+ bool bypass, bool last)
{
struct request_queue *q = rq->q;
bool run_queue = true;
+ blk_status_t ret = BLK_STS_RESOURCE;
+ int srcu_idx;
+ bool force = false;
+ hctx_lock(hctx, &srcu_idx);
/*
- * RCU or SRCU read lock is needed before checking quiesced flag.
+ * hctx_lock is needed before checking quiesced flag.
*
- * When queue is stopped or quiesced, ignore 'bypass_insert' from
- * blk_mq_request_issue_directly(), and return BLK_STS_OK to caller,
- * and avoid driver to try to dispatch again.
+ * When queue is stopped or quiesced, ignore 'bypass', insert
+ * and return BLK_STS_OK to caller, and avoid driver to try to
+ * dispatch again.
*/
- if (blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)) {
+ if (unlikely(blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q))) {
run_queue = false;
- bypass_insert = false;
- goto insert;
+ bypass = false;
+ goto out_unlock;
}
- if (q->elevator && !bypass_insert)
- goto insert;
+ if (unlikely(q->elevator && !bypass))
+ goto out_unlock;
if (!blk_mq_get_dispatch_budget(hctx))
- goto insert;
+ goto out_unlock;
if (!blk_mq_get_driver_tag(rq)) {
blk_mq_put_dispatch_budget(hctx);
- goto insert;
+ goto out_unlock;
}
- return __blk_mq_issue_directly(hctx, rq, cookie);
-insert:
- if (bypass_insert)
- return BLK_STS_RESOURCE;
-
- blk_mq_request_bypass_insert(rq, run_queue);
- return BLK_STS_OK;
-}
-
-static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
- struct request *rq, blk_qc_t *cookie)
-{
- blk_status_t ret;
- int srcu_idx;
-
- might_sleep_if(hctx->flags & BLK_MQ_F_BLOCKING);
-
- hctx_lock(hctx, &srcu_idx);
-
- ret = __blk_mq_try_issue_directly(hctx, rq, cookie, false);
- if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE)
- blk_mq_request_bypass_insert(rq, true);
- else if (ret != BLK_STS_OK)
- blk_mq_end_request(rq, ret);
-
- hctx_unlock(hctx, srcu_idx);
-}
-
-blk_status_t blk_mq_request_issue_directly(struct request *rq)
-{
- blk_status_t ret;
- int srcu_idx;
- blk_qc_t unused_cookie;
- struct blk_mq_ctx *ctx = rq->mq_ctx;
- struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(rq->q, ctx->cpu);
-
- hctx_lock(hctx, &srcu_idx);
- ret = __blk_mq_try_issue_directly(hctx, rq, &unused_cookie, true);
+ /*
+ * Always add a request that has been through
+ *.queue_rq() to the hardware dispatch list.
+ */
+ force = true;
+ ret = __blk_mq_issue_directly(hctx, rq, cookie, last);
+out_unlock:
hctx_unlock(hctx, srcu_idx);
+ switch (ret) {
+ case BLK_STS_OK:
+ break;
+ case BLK_STS_DEV_RESOURCE:
+ case BLK_STS_RESOURCE:
+ if (force) {
+ blk_mq_request_bypass_insert(rq, run_queue);
+ /*
+ * We have to return BLK_STS_OK for the DM
+ * to avoid livelock. Otherwise, we return
+ * the real result to indicate whether the
+ * request is direct-issued successfully.
+ */
+ ret = bypass ? BLK_STS_OK : ret;
+ } else if (!bypass) {
+ blk_mq_sched_insert_request(rq, false,
+ run_queue, false);
+ }
+ break;
+ default:
+ if (!bypass)
+ blk_mq_end_request(rq, ret);
+ break;
+ }
return ret;
}
@@ -1805,22 +1863,42 @@ blk_status_t blk_mq_request_issue_directly(struct request *rq)
void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
struct list_head *list)
{
+ blk_qc_t unused;
+ blk_status_t ret = BLK_STS_OK;
+
while (!list_empty(list)) {
- blk_status_t ret;
struct request *rq = list_first_entry(list, struct request,
queuelist);
list_del_init(&rq->queuelist);
- ret = blk_mq_request_issue_directly(rq);
- if (ret != BLK_STS_OK) {
- if (ret == BLK_STS_RESOURCE ||
- ret == BLK_STS_DEV_RESOURCE) {
- blk_mq_request_bypass_insert(rq,
+ if (ret == BLK_STS_OK)
+ ret = blk_mq_try_issue_directly(hctx, rq, &unused,
+ false,
list_empty(list));
- break;
- }
- blk_mq_end_request(rq, ret);
- }
+ else
+ blk_mq_sched_insert_request(rq, false, true, false);
+ }
+
+ /*
+ * If we didn't flush the entire list, we could have told
+ * the driver there was more coming, but that turned out to
+ * be a lie.
+ */
+ if (ret != BLK_STS_OK && hctx->queue->mq_ops->commit_rqs)
+ hctx->queue->mq_ops->commit_rqs(hctx);
+}
+
+static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
+{
+ list_add_tail(&rq->queuelist, &plug->mq_list);
+ plug->rq_count++;
+ if (!plug->multiple_queues && !list_is_singular(&plug->mq_list)) {
+ struct request *tmp;
+
+ tmp = list_first_entry(&plug->mq_list, struct request,
+ queuelist);
+ if (tmp->q != rq->q)
+ plug->multiple_queues = true;
}
}
@@ -1828,9 +1906,8 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
{
const int is_sync = op_is_sync(bio->bi_opf);
const int is_flush_fua = op_is_flush(bio->bi_opf);
- struct blk_mq_alloc_data data = { .flags = 0 };
+ struct blk_mq_alloc_data data = { .flags = 0, .cmd_flags = bio->bi_opf };
struct request *rq;
- unsigned int request_count = 0;
struct blk_plug *plug;
struct request *same_queue_rq = NULL;
blk_qc_t cookie;
@@ -1843,15 +1920,15 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
return BLK_QC_T_NONE;
if (!is_flush_fua && !blk_queue_nomerges(q) &&
- blk_attempt_plug_merge(q, bio, &request_count, &same_queue_rq))
+ blk_attempt_plug_merge(q, bio, &same_queue_rq))
return BLK_QC_T_NONE;
if (blk_mq_sched_bio_merge(q, bio))
return BLK_QC_T_NONE;
- rq_qos_throttle(q, bio, NULL);
+ rq_qos_throttle(q, bio);
- rq = blk_mq_get_request(q, bio, bio->bi_opf, &data);
+ rq = blk_mq_get_request(q, bio, &data);
if (unlikely(!rq)) {
rq_qos_cleanup(q, bio);
if (bio->bi_opf & REQ_NOWAIT)
@@ -1873,21 +1950,17 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
/* bypass scheduler for flush rq */
blk_insert_flush(rq);
blk_mq_run_hw_queue(data.hctx, true);
- } else if (plug && q->nr_hw_queues == 1) {
+ } else if (plug && (q->nr_hw_queues == 1 || q->mq_ops->commit_rqs)) {
+ /*
+ * Use plugging if we have a ->commit_rqs() hook as well, as
+ * we know the driver uses bd->last in a smart fashion.
+ */
+ unsigned int request_count = plug->rq_count;
struct request *last = NULL;
blk_mq_put_ctx(data.ctx);
blk_mq_bio_to_request(rq, bio);
- /*
- * @request_count may become stale because of schedule
- * out, so check the list again.
- */
- if (list_empty(&plug->mq_list))
- request_count = 0;
- else if (blk_queue_nomerges(q))
- request_count = blk_plug_queued_count(q);
-
if (!request_count)
trace_block_plug(q);
else
@@ -1899,7 +1972,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
trace_block_plug(q);
}
- list_add_tail(&rq->queuelist, &plug->mq_list);
+ blk_add_rq_to_plug(plug, rq);
} else if (plug && !blk_queue_nomerges(q)) {
blk_mq_bio_to_request(rq, bio);
@@ -1912,23 +1985,24 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
*/
if (list_empty(&plug->mq_list))
same_queue_rq = NULL;
- if (same_queue_rq)
+ if (same_queue_rq) {
list_del_init(&same_queue_rq->queuelist);
- list_add_tail(&rq->queuelist, &plug->mq_list);
+ plug->rq_count--;
+ }
+ blk_add_rq_to_plug(plug, rq);
blk_mq_put_ctx(data.ctx);
if (same_queue_rq) {
- data.hctx = blk_mq_map_queue(q,
- same_queue_rq->mq_ctx->cpu);
+ data.hctx = same_queue_rq->mq_hctx;
blk_mq_try_issue_directly(data.hctx, same_queue_rq,
- &cookie);
+ &cookie, false, true);
}
} else if ((q->nr_hw_queues > 1 && is_sync) || (!q->elevator &&
!data.hctx->dispatch_busy)) {
blk_mq_put_ctx(data.ctx);
blk_mq_bio_to_request(rq, bio);
- blk_mq_try_issue_directly(data.hctx, rq, &cookie);
+ blk_mq_try_issue_directly(data.hctx, rq, &cookie, false, true);
} else {
blk_mq_put_ctx(data.ctx);
blk_mq_bio_to_request(rq, bio);
@@ -1986,7 +2060,7 @@ struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set,
struct blk_mq_tags *tags;
int node;
- node = blk_mq_hw_queue_to_node(set->mq_map, hctx_idx);
+ node = blk_mq_hw_queue_to_node(&set->map[0], hctx_idx);
if (node == NUMA_NO_NODE)
node = set->numa_node;
@@ -2042,7 +2116,7 @@ int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
size_t rq_size, left;
int node;
- node = blk_mq_hw_queue_to_node(set->mq_map, hctx_idx);
+ node = blk_mq_hw_queue_to_node(&set->map[0], hctx_idx);
if (node == NUMA_NO_NODE)
node = set->numa_node;
@@ -2122,13 +2196,15 @@ static int blk_mq_hctx_notify_dead(unsigned int cpu, struct hlist_node *node)
struct blk_mq_hw_ctx *hctx;
struct blk_mq_ctx *ctx;
LIST_HEAD(tmp);
+ enum hctx_type type;
hctx = hlist_entry_safe(node, struct blk_mq_hw_ctx, cpuhp_dead);
ctx = __blk_mq_get_ctx(hctx->queue, cpu);
+ type = hctx->type;
spin_lock(&ctx->lock);
- if (!list_empty(&ctx->rq_list)) {
- list_splice_init(&ctx->rq_list, &tmp);
+ if (!list_empty(&ctx->rq_lists[type])) {
+ list_splice_init(&ctx->rq_lists[type], &tmp);
blk_mq_hctx_clear_pending(hctx, ctx);
}
spin_unlock(&ctx->lock);
@@ -2259,24 +2335,30 @@ static int blk_mq_init_hctx(struct request_queue *q,
static void blk_mq_init_cpu_queues(struct request_queue *q,
unsigned int nr_hw_queues)
{
- unsigned int i;
+ struct blk_mq_tag_set *set = q->tag_set;
+ unsigned int i, j;
for_each_possible_cpu(i) {
struct blk_mq_ctx *__ctx = per_cpu_ptr(q->queue_ctx, i);
struct blk_mq_hw_ctx *hctx;
+ int k;
__ctx->cpu = i;
spin_lock_init(&__ctx->lock);
- INIT_LIST_HEAD(&__ctx->rq_list);
+ for (k = HCTX_TYPE_DEFAULT; k < HCTX_MAX_TYPES; k++)
+ INIT_LIST_HEAD(&__ctx->rq_lists[k]);
+
__ctx->queue = q;
/*
* Set local node, IFF we have more than one hw queue. If
* not, we remain on the home node of the device
*/
- hctx = blk_mq_map_queue(q, i);
- if (nr_hw_queues > 1 && hctx->numa_node == NUMA_NO_NODE)
- hctx->numa_node = local_memory_node(cpu_to_node(i));
+ for (j = 0; j < set->nr_maps; j++) {
+ hctx = blk_mq_map_queue_type(q, j, i);
+ if (nr_hw_queues > 1 && hctx->numa_node == NUMA_NO_NODE)
+ hctx->numa_node = local_memory_node(cpu_to_node(i));
+ }
}
}
@@ -2302,7 +2384,7 @@ static bool __blk_mq_alloc_rq_map(struct blk_mq_tag_set *set, int hctx_idx)
static void blk_mq_free_map_and_requests(struct blk_mq_tag_set *set,
unsigned int hctx_idx)
{
- if (set->tags[hctx_idx]) {
+ if (set->tags && set->tags[hctx_idx]) {
blk_mq_free_rqs(set, set->tags[hctx_idx], hctx_idx);
blk_mq_free_rq_map(set->tags[hctx_idx]);
set->tags[hctx_idx] = NULL;
@@ -2311,7 +2393,7 @@ static void blk_mq_free_map_and_requests(struct blk_mq_tag_set *set,
static void blk_mq_map_swqueue(struct request_queue *q)
{
- unsigned int i, hctx_idx;
+ unsigned int i, j, hctx_idx;
struct blk_mq_hw_ctx *hctx;
struct blk_mq_ctx *ctx;
struct blk_mq_tag_set *set = q->tag_set;
@@ -2333,7 +2415,7 @@ static void blk_mq_map_swqueue(struct request_queue *q)
* If the cpu isn't present, the cpu is mapped to first hctx.
*/
for_each_possible_cpu(i) {
- hctx_idx = q->mq_map[i];
+ hctx_idx = set->map[0].mq_map[i];
/* unmapped hw queue can be remapped after CPU topo changed */
if (!set->tags[hctx_idx] &&
!__blk_mq_alloc_rq_map(set, hctx_idx)) {
@@ -2343,15 +2425,35 @@ static void blk_mq_map_swqueue(struct request_queue *q)
* case, remap the current ctx to hctx[0] which
* is guaranteed to always have tags allocated
*/
- q->mq_map[i] = 0;
+ set->map[0].mq_map[i] = 0;
}
ctx = per_cpu_ptr(q->queue_ctx, i);
- hctx = blk_mq_map_queue(q, i);
+ for (j = 0; j < set->nr_maps; j++) {
+ if (!set->map[j].nr_queues)
+ continue;
+
+ hctx = blk_mq_map_queue_type(q, j, i);
+
+ /*
+ * If the CPU is already set in the mask, then we've
+ * mapped this one already. This can happen if
+ * devices share queues across queue maps.
+ */
+ if (cpumask_test_cpu(i, hctx->cpumask))
+ continue;
+
+ cpumask_set_cpu(i, hctx->cpumask);
+ hctx->type = j;
+ ctx->index_hw[hctx->type] = hctx->nr_ctx;
+ hctx->ctxs[hctx->nr_ctx++] = ctx;
- cpumask_set_cpu(i, hctx->cpumask);
- ctx->index_hw = hctx->nr_ctx;
- hctx->ctxs[hctx->nr_ctx++] = ctx;
+ /*
+ * If the nr_ctx type overflows, we have exceeded the
+ * amount of sw queues we can support.
+ */
+ BUG_ON(!hctx->nr_ctx);
+ }
}
mutex_unlock(&q->sysfs_lock);
@@ -2441,8 +2543,6 @@ static void blk_mq_del_queue_tag_set(struct request_queue *q)
static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set,
struct request_queue *q)
{
- q->tag_set = set;
-
mutex_lock(&set->tag_list_lock);
/*
@@ -2461,6 +2561,34 @@ static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set,
mutex_unlock(&set->tag_list_lock);
}
+/* All allocations will be freed in release handler of q->mq_kobj */
+static int blk_mq_alloc_ctxs(struct request_queue *q)
+{
+ struct blk_mq_ctxs *ctxs;
+ int cpu;
+
+ ctxs = kzalloc(sizeof(*ctxs), GFP_KERNEL);
+ if (!ctxs)
+ return -ENOMEM;
+
+ ctxs->queue_ctx = alloc_percpu(struct blk_mq_ctx);
+ if (!ctxs->queue_ctx)
+ goto fail;
+
+ for_each_possible_cpu(cpu) {
+ struct blk_mq_ctx *ctx = per_cpu_ptr(ctxs->queue_ctx, cpu);
+ ctx->ctxs = ctxs;
+ }
+
+ q->mq_kobj = &ctxs->kobj;
+ q->queue_ctx = ctxs->queue_ctx;
+
+ return 0;
+ fail:
+ kfree(ctxs);
+ return -ENOMEM;
+}
+
/*
* It is the actual release handler for mq, but we do it from
* request queue's release handler for avoiding use-after-free
@@ -2479,8 +2607,6 @@ void blk_mq_release(struct request_queue *q)
kobject_put(&hctx->kobj);
}
- q->mq_map = NULL;
-
kfree(q->queue_hw_ctx);
/*
@@ -2488,15 +2614,13 @@ void blk_mq_release(struct request_queue *q)
* both share lifetime with request queue.
*/
blk_mq_sysfs_deinit(q);
-
- free_percpu(q->queue_ctx);
}
struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
{
struct request_queue *uninit_q, *q;
- uninit_q = blk_alloc_queue_node(GFP_KERNEL, set->numa_node, NULL);
+ uninit_q = blk_alloc_queue_node(GFP_KERNEL, set->numa_node);
if (!uninit_q)
return ERR_PTR(-ENOMEM);
@@ -2523,6 +2647,7 @@ struct request_queue *blk_mq_init_sq_queue(struct blk_mq_tag_set *set,
memset(set, 0, sizeof(*set));
set->ops = ops;
set->nr_hw_queues = 1;
+ set->nr_maps = 1;
set->queue_depth = queue_depth;
set->numa_node = NUMA_NO_NODE;
set->flags = set_flags;
@@ -2600,7 +2725,7 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
int node;
struct blk_mq_hw_ctx *hctx;
- node = blk_mq_hw_queue_to_node(q->mq_map, i);
+ node = blk_mq_hw_queue_to_node(&set->map[0], i);
/*
* If the hw queue has been mapped to another numa node,
* we need to realloc the hctx. If allocation fails, fallback
@@ -2653,6 +2778,19 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
mutex_unlock(&q->sysfs_lock);
}
+/*
+ * Maximum number of hardware queues we support. For single sets, we'll never
+ * have more than the CPUs (software queues). For multiple sets, the tag_set
+ * user may have set ->nr_hw_queues larger.
+ */
+static unsigned int nr_hw_queues(struct blk_mq_tag_set *set)
+{
+ if (set->nr_maps == 1)
+ return nr_cpu_ids;
+
+ return max(set->nr_hw_queues, nr_cpu_ids);
+}
+
struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
struct request_queue *q)
{
@@ -2665,19 +2803,17 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
if (!q->poll_cb)
goto err_exit;
- q->queue_ctx = alloc_percpu(struct blk_mq_ctx);
- if (!q->queue_ctx)
+ if (blk_mq_alloc_ctxs(q))
goto err_exit;
/* init q->mq_kobj and sw queues' kobjects */
blk_mq_sysfs_init(q);
- q->queue_hw_ctx = kcalloc_node(nr_cpu_ids, sizeof(*(q->queue_hw_ctx)),
+ q->nr_queues = nr_hw_queues(set);
+ q->queue_hw_ctx = kcalloc_node(q->nr_queues, sizeof(*(q->queue_hw_ctx)),
GFP_KERNEL, set->numa_node);
if (!q->queue_hw_ctx)
- goto err_percpu;
-
- q->mq_map = set->mq_map;
+ goto err_sys_init;
blk_mq_realloc_hw_ctxs(set, q);
if (!q->nr_hw_queues)
@@ -2686,12 +2822,15 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
INIT_WORK(&q->timeout_work, blk_mq_timeout_work);
blk_queue_rq_timeout(q, set->timeout ? set->timeout : 30 * HZ);
- q->nr_queues = nr_cpu_ids;
+ q->tag_set = set;
q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT;
+ if (set->nr_maps > HCTX_TYPE_POLL &&
+ set->map[HCTX_TYPE_POLL].nr_queues)
+ blk_queue_flag_set(QUEUE_FLAG_POLL, q);
if (!(set->flags & BLK_MQ_F_SG_MERGE))
- queue_flag_set_unlocked(QUEUE_FLAG_NO_SG_MERGE, q);
+ blk_queue_flag_set(QUEUE_FLAG_NO_SG_MERGE, q);
q->sg_reserved_size = INT_MAX;
@@ -2700,8 +2839,6 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
spin_lock_init(&q->requeue_lock);
blk_queue_make_request(q, blk_mq_make_request);
- if (q->mq_ops->poll)
- q->poll_fn = blk_mq_poll;
/*
* Do this after blk_queue_make_request() overrides it...
@@ -2713,9 +2850,6 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
*/
q->poll_nsec = -1;
- if (set->ops->complete)
- blk_queue_softirq_done(q, set->ops->complete);
-
blk_mq_init_cpu_queues(q, set->nr_hw_queues);
blk_mq_add_queue_tag_set(set, q);
blk_mq_map_swqueue(q);
@@ -2732,8 +2866,8 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
err_hctxs:
kfree(q->queue_hw_ctx);
-err_percpu:
- free_percpu(q->queue_ctx);
+err_sys_init:
+ blk_mq_sysfs_deinit(q);
err_exit:
q->mq_ops = NULL;
return ERR_PTR(-ENOMEM);
@@ -2802,7 +2936,9 @@ static int blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
static int blk_mq_update_queue_map(struct blk_mq_tag_set *set)
{
- if (set->ops->map_queues) {
+ if (set->ops->map_queues && !is_kdump_kernel()) {
+ int i;
+
/*
* transport .map_queues is usually done in the following
* way:
@@ -2810,18 +2946,21 @@ static int blk_mq_update_queue_map(struct blk_mq_tag_set *set)
* for (queue = 0; queue < set->nr_hw_queues; queue++) {
* mask = get_cpu_mask(queue)
* for_each_cpu(cpu, mask)
- * set->mq_map[cpu] = queue;
+ * set->map[x].mq_map[cpu] = queue;
* }
*
* When we need to remap, the table has to be cleared for
* killing stale mapping since one CPU may not be mapped
* to any hw queue.
*/
- blk_mq_clear_mq_map(set);
+ for (i = 0; i < set->nr_maps; i++)
+ blk_mq_clear_mq_map(&set->map[i]);
return set->ops->map_queues(set);
- } else
- return blk_mq_map_queues(set);
+ } else {
+ BUG_ON(set->nr_maps > 1);
+ return blk_mq_map_queues(&set->map[0]);
+ }
}
/*
@@ -2832,7 +2971,7 @@ static int blk_mq_update_queue_map(struct blk_mq_tag_set *set)
*/
int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
{
- int ret;
+ int i, ret;
BUILD_BUG_ON(BLK_MQ_MAX_DEPTH > 1 << BLK_MQ_UNIQUE_TAG_BITS);
@@ -2855,6 +2994,11 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
set->queue_depth = BLK_MQ_MAX_DEPTH;
}
+ if (!set->nr_maps)
+ set->nr_maps = 1;
+ else if (set->nr_maps > HCTX_MAX_TYPES)
+ return -EINVAL;
+
/*
* If a crashdump is active, then we are potentially in a very
* memory constrained environment. Limit us to 1 queue and
@@ -2862,24 +3006,30 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
*/
if (is_kdump_kernel()) {
set->nr_hw_queues = 1;
+ set->nr_maps = 1;
set->queue_depth = min(64U, set->queue_depth);
}
/*
- * There is no use for more h/w queues than cpus.
+ * There is no use for more h/w queues than cpus if we just have
+ * a single map
*/
- if (set->nr_hw_queues > nr_cpu_ids)
+ if (set->nr_maps == 1 && set->nr_hw_queues > nr_cpu_ids)
set->nr_hw_queues = nr_cpu_ids;
- set->tags = kcalloc_node(nr_cpu_ids, sizeof(struct blk_mq_tags *),
+ set->tags = kcalloc_node(nr_hw_queues(set), sizeof(struct blk_mq_tags *),
GFP_KERNEL, set->numa_node);
if (!set->tags)
return -ENOMEM;
ret = -ENOMEM;
- set->mq_map = kcalloc_node(nr_cpu_ids, sizeof(*set->mq_map),
- GFP_KERNEL, set->numa_node);
- if (!set->mq_map)
- goto out_free_tags;
+ for (i = 0; i < set->nr_maps; i++) {
+ set->map[i].mq_map = kcalloc_node(nr_cpu_ids,
+ sizeof(set->map[i].mq_map[0]),
+ GFP_KERNEL, set->numa_node);
+ if (!set->map[i].mq_map)
+ goto out_free_mq_map;
+ set->map[i].nr_queues = is_kdump_kernel() ? 1 : set->nr_hw_queues;
+ }
ret = blk_mq_update_queue_map(set);
if (ret)
@@ -2895,9 +3045,10 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
return 0;
out_free_mq_map:
- kfree(set->mq_map);
- set->mq_map = NULL;
-out_free_tags:
+ for (i = 0; i < set->nr_maps; i++) {
+ kfree(set->map[i].mq_map);
+ set->map[i].mq_map = NULL;
+ }
kfree(set->tags);
set->tags = NULL;
return ret;
@@ -2906,13 +3057,15 @@ EXPORT_SYMBOL(blk_mq_alloc_tag_set);
void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
{
- int i;
+ int i, j;
- for (i = 0; i < nr_cpu_ids; i++)
+ for (i = 0; i < nr_hw_queues(set); i++)
blk_mq_free_map_and_requests(set, i);
- kfree(set->mq_map);
- set->mq_map = NULL;
+ for (j = 0; j < set->nr_maps; j++) {
+ kfree(set->map[j].mq_map);
+ set->map[j].mq_map = NULL;
+ }
kfree(set->tags);
set->tags = NULL;
@@ -3038,7 +3191,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
lockdep_assert_held(&set->tag_list_lock);
- if (nr_hw_queues > nr_cpu_ids)
+ if (set->nr_maps == 1 && nr_hw_queues > nr_cpu_ids)
nr_hw_queues = nr_cpu_ids;
if (nr_hw_queues < 1 || nr_hw_queues == set->nr_hw_queues)
return;
@@ -3073,7 +3226,7 @@ fallback:
pr_warn("Increasing nr_hw_queues to %d fails, fallback to %d\n",
nr_hw_queues, prev_nr_hw_queues);
set->nr_hw_queues = prev_nr_hw_queues;
- blk_mq_map_queues(set);
+ blk_mq_map_queues(&set->map[0]);
goto fallback;
}
blk_mq_map_swqueue(q);
@@ -3180,15 +3333,12 @@ static bool blk_mq_poll_hybrid_sleep(struct request_queue *q,
return false;
/*
- * poll_nsec can be:
+ * If we get here, hybrid polling is enabled. Hence poll_nsec can be:
*
- * -1: don't ever hybrid sleep
* 0: use half of prev avg
* >0: use this specific value
*/
- if (q->poll_nsec == -1)
- return false;
- else if (q->poll_nsec > 0)
+ if (q->poll_nsec > 0)
nsecs = q->poll_nsec;
else
nsecs = blk_mq_poll_nsecs(q, hctx, rq);
@@ -3225,11 +3375,57 @@ static bool blk_mq_poll_hybrid_sleep(struct request_queue *q,
return true;
}
-static bool __blk_mq_poll(struct blk_mq_hw_ctx *hctx, struct request *rq)
+static bool blk_mq_poll_hybrid(struct request_queue *q,
+ struct blk_mq_hw_ctx *hctx, blk_qc_t cookie)
{
- struct request_queue *q = hctx->queue;
+ struct request *rq;
+
+ if (q->poll_nsec == -1)
+ return false;
+
+ if (!blk_qc_t_is_internal(cookie))
+ rq = blk_mq_tag_to_rq(hctx->tags, blk_qc_t_to_tag(cookie));
+ else {
+ rq = blk_mq_tag_to_rq(hctx->sched_tags, blk_qc_t_to_tag(cookie));
+ /*
+ * With scheduling, if the request has completed, we'll
+ * get a NULL return here, as we clear the sched tag when
+ * that happens. The request still remains valid, like always,
+ * so we should be safe with just the NULL check.
+ */
+ if (!rq)
+ return false;
+ }
+
+ return blk_mq_poll_hybrid_sleep(q, hctx, rq);
+}
+
+/**
+ * blk_poll - poll for IO completions
+ * @q: the queue
+ * @cookie: cookie passed back at IO submission time
+ * @spin: whether to spin for completions
+ *
+ * Description:
+ * Poll for completions on the passed in queue. Returns number of
+ * completed entries found. If @spin is true, then blk_poll will continue
+ * looping until at least one completion is found, unless the task is
+ * otherwise marked running (or we need to reschedule).
+ */
+int blk_poll(struct request_queue *q, blk_qc_t cookie, bool spin)
+{
+ struct blk_mq_hw_ctx *hctx;
long state;
+ if (!blk_qc_t_valid(cookie) ||
+ !test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
+ return 0;
+
+ if (current->plug)
+ blk_flush_plug_list(current->plug, false);
+
+ hctx = q->queue_hw_ctx[blk_qc_t_to_queue_num(cookie)];
+
/*
* If we sleep, have the caller restart the poll loop to reset
* the state. Like for the other success return cases, the
@@ -3237,63 +3433,44 @@ static bool __blk_mq_poll(struct blk_mq_hw_ctx *hctx, struct request *rq)
* the IO isn't complete, we'll get called again and will go
* straight to the busy poll loop.
*/
- if (blk_mq_poll_hybrid_sleep(q, hctx, rq))
- return true;
+ if (blk_mq_poll_hybrid(q, hctx, cookie))
+ return 1;
hctx->poll_considered++;
state = current->state;
- while (!need_resched()) {
+ do {
int ret;
hctx->poll_invoked++;
- ret = q->mq_ops->poll(hctx, rq->tag);
+ ret = q->mq_ops->poll(hctx);
if (ret > 0) {
hctx->poll_success++;
- set_current_state(TASK_RUNNING);
- return true;
+ __set_current_state(TASK_RUNNING);
+ return ret;
}
if (signal_pending_state(state, current))
- set_current_state(TASK_RUNNING);
+ __set_current_state(TASK_RUNNING);
if (current->state == TASK_RUNNING)
- return true;
- if (ret < 0)
+ return 1;
+ if (ret < 0 || !spin)
break;
cpu_relax();
- }
+ } while (!need_resched());
__set_current_state(TASK_RUNNING);
- return false;
+ return 0;
}
+EXPORT_SYMBOL_GPL(blk_poll);
-static bool blk_mq_poll(struct request_queue *q, blk_qc_t cookie)
+unsigned int blk_mq_rq_cpu(struct request *rq)
{
- struct blk_mq_hw_ctx *hctx;
- struct request *rq;
-
- if (!test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
- return false;
-
- hctx = q->queue_hw_ctx[blk_qc_t_to_queue_num(cookie)];
- if (!blk_qc_t_is_internal(cookie))
- rq = blk_mq_tag_to_rq(hctx->tags, blk_qc_t_to_tag(cookie));
- else {
- rq = blk_mq_tag_to_rq(hctx->sched_tags, blk_qc_t_to_tag(cookie));
- /*
- * With scheduling, if the request has completed, we'll
- * get a NULL return here, as we clear the sched tag when
- * that happens. The request still remains valid, like always,
- * so we should be safe with just the NULL check.
- */
- if (!rq)
- return false;
- }
-
- return __blk_mq_poll(hctx, rq);
+ return rq->mq_ctx->cpu;
}
+EXPORT_SYMBOL(blk_mq_rq_cpu);
static int __init blk_mq_init(void)
{
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 9497b47e2526..d943d46b0785 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -7,17 +7,22 @@
struct blk_mq_tag_set;
+struct blk_mq_ctxs {
+ struct kobject kobj;
+ struct blk_mq_ctx __percpu *queue_ctx;
+};
+
/**
* struct blk_mq_ctx - State for a software queue facing the submitting CPUs
*/
struct blk_mq_ctx {
struct {
spinlock_t lock;
- struct list_head rq_list;
- } ____cacheline_aligned_in_smp;
+ struct list_head rq_lists[HCTX_MAX_TYPES];
+ } ____cacheline_aligned_in_smp;
unsigned int cpu;
- unsigned int index_hw;
+ unsigned short index_hw[HCTX_MAX_TYPES];
/* incremented at dispatch time */
unsigned long rq_dispatched[2];
@@ -27,6 +32,7 @@ struct blk_mq_ctx {
unsigned long ____cacheline_aligned_in_smp rq_completed[2];
struct request_queue *queue;
+ struct blk_mq_ctxs *ctxs;
struct kobject kobj;
} ____cacheline_aligned_in_smp;
@@ -62,20 +68,55 @@ void blk_mq_request_bypass_insert(struct request *rq, bool run_queue);
void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
struct list_head *list);
-/* Used by blk_insert_cloned_request() to issue request directly */
-blk_status_t blk_mq_request_issue_directly(struct request *rq);
+blk_status_t blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
+ struct request *rq,
+ blk_qc_t *cookie,
+ bool bypass, bool last);
void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
struct list_head *list);
/*
* CPU -> queue mappings
*/
-extern int blk_mq_hw_queue_to_node(unsigned int *map, unsigned int);
+extern int blk_mq_hw_queue_to_node(struct blk_mq_queue_map *qmap, unsigned int);
+
+/*
+ * blk_mq_map_queue_type() - map (hctx_type,cpu) to hardware queue
+ * @q: request queue
+ * @type: the hctx type index
+ * @cpu: CPU
+ */
+static inline struct blk_mq_hw_ctx *blk_mq_map_queue_type(struct request_queue *q,
+ enum hctx_type type,
+ unsigned int cpu)
+{
+ return q->queue_hw_ctx[q->tag_set->map[type].mq_map[cpu]];
+}
+/*
+ * blk_mq_map_queue() - map (cmd_flags,type) to hardware queue
+ * @q: request queue
+ * @flags: request command flags
+ * @cpu: CPU
+ */
static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q,
- int cpu)
+ unsigned int flags,
+ unsigned int cpu)
{
- return q->queue_hw_ctx[q->mq_map[cpu]];
+ enum hctx_type type = HCTX_TYPE_DEFAULT;
+
+ if ((flags & REQ_HIPRI) &&
+ q->tag_set->nr_maps > HCTX_TYPE_POLL &&
+ q->tag_set->map[HCTX_TYPE_POLL].nr_queues &&
+ test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
+ type = HCTX_TYPE_POLL;
+
+ else if (((flags & REQ_OP_MASK) == REQ_OP_READ) &&
+ q->tag_set->nr_maps > HCTX_TYPE_READ &&
+ q->tag_set->map[HCTX_TYPE_READ].nr_queues)
+ type = HCTX_TYPE_READ;
+
+ return blk_mq_map_queue_type(q, type, cpu);
}
/*
@@ -126,6 +167,7 @@ struct blk_mq_alloc_data {
struct request_queue *q;
blk_mq_req_flags_t flags;
unsigned int shallow_depth;
+ unsigned int cmd_flags;
/* input & output parameter */
struct blk_mq_ctx *ctx;
@@ -150,8 +192,7 @@ static inline bool blk_mq_hw_queue_mapped(struct blk_mq_hw_ctx *hctx)
return hctx->nr_ctx && hctx->tags;
}
-void blk_mq_in_flight(struct request_queue *q, struct hd_struct *part,
- unsigned int inflight[2]);
+unsigned int blk_mq_in_flight(struct request_queue *q, struct hd_struct *part);
void blk_mq_in_flight_rw(struct request_queue *q, struct hd_struct *part,
unsigned int inflight[2]);
@@ -195,21 +236,18 @@ static inline void blk_mq_put_driver_tag_hctx(struct blk_mq_hw_ctx *hctx,
static inline void blk_mq_put_driver_tag(struct request *rq)
{
- struct blk_mq_hw_ctx *hctx;
-
if (rq->tag == -1 || rq->internal_tag == -1)
return;
- hctx = blk_mq_map_queue(rq->q, rq->mq_ctx->cpu);
- __blk_mq_put_driver_tag(hctx, rq);
+ __blk_mq_put_driver_tag(rq->mq_hctx, rq);
}
-static inline void blk_mq_clear_mq_map(struct blk_mq_tag_set *set)
+static inline void blk_mq_clear_mq_map(struct blk_mq_queue_map *qmap)
{
int cpu;
for_each_possible_cpu(cpu)
- set->mq_map[cpu] = 0;
+ qmap->mq_map[cpu] = 0;
}
#endif
diff --git a/block/blk-pm.c b/block/blk-pm.c
index f8fdae01bea2..0a028c189897 100644
--- a/block/blk-pm.c
+++ b/block/blk-pm.c
@@ -89,12 +89,12 @@ int blk_pre_runtime_suspend(struct request_queue *q)
/* Switch q_usage_counter back to per-cpu mode. */
blk_mq_unfreeze_queue(q);
- spin_lock_irq(q->queue_lock);
+ spin_lock_irq(&q->queue_lock);
if (ret < 0)
pm_runtime_mark_last_busy(q->dev);
else
q->rpm_status = RPM_SUSPENDING;
- spin_unlock_irq(q->queue_lock);
+ spin_unlock_irq(&q->queue_lock);
if (ret)
blk_clear_pm_only(q);
@@ -121,14 +121,14 @@ void blk_post_runtime_suspend(struct request_queue *q, int err)
if (!q->dev)
return;
- spin_lock_irq(q->queue_lock);
+ spin_lock_irq(&q->queue_lock);
if (!err) {
q->rpm_status = RPM_SUSPENDED;
} else {
q->rpm_status = RPM_ACTIVE;
pm_runtime_mark_last_busy(q->dev);
}
- spin_unlock_irq(q->queue_lock);
+ spin_unlock_irq(&q->queue_lock);
if (err)
blk_clear_pm_only(q);
@@ -151,9 +151,9 @@ void blk_pre_runtime_resume(struct request_queue *q)
if (!q->dev)
return;
- spin_lock_irq(q->queue_lock);
+ spin_lock_irq(&q->queue_lock);
q->rpm_status = RPM_RESUMING;
- spin_unlock_irq(q->queue_lock);
+ spin_unlock_irq(&q->queue_lock);
}
EXPORT_SYMBOL(blk_pre_runtime_resume);
@@ -176,7 +176,7 @@ void blk_post_runtime_resume(struct request_queue *q, int err)
if (!q->dev)
return;
- spin_lock_irq(q->queue_lock);
+ spin_lock_irq(&q->queue_lock);
if (!err) {
q->rpm_status = RPM_ACTIVE;
pm_runtime_mark_last_busy(q->dev);
@@ -184,7 +184,7 @@ void blk_post_runtime_resume(struct request_queue *q, int err)
} else {
q->rpm_status = RPM_SUSPENDED;
}
- spin_unlock_irq(q->queue_lock);
+ spin_unlock_irq(&q->queue_lock);
if (!err)
blk_clear_pm_only(q);
@@ -207,10 +207,10 @@ EXPORT_SYMBOL(blk_post_runtime_resume);
*/
void blk_set_runtime_active(struct request_queue *q)
{
- spin_lock_irq(q->queue_lock);
+ spin_lock_irq(&q->queue_lock);
q->rpm_status = RPM_ACTIVE;
pm_runtime_mark_last_busy(q->dev);
pm_request_autosuspend(q->dev);
- spin_unlock_irq(q->queue_lock);
+ spin_unlock_irq(&q->queue_lock);
}
EXPORT_SYMBOL(blk_set_runtime_active);
diff --git a/block/blk-pm.h b/block/blk-pm.h
index a8564ea72a41..ea5507d23e75 100644
--- a/block/blk-pm.h
+++ b/block/blk-pm.h
@@ -21,7 +21,7 @@ static inline void blk_pm_mark_last_busy(struct request *rq)
static inline void blk_pm_requeue_request(struct request *rq)
{
- lockdep_assert_held(rq->q->queue_lock);
+ lockdep_assert_held(&rq->q->queue_lock);
if (rq->q->dev && !(rq->rq_flags & RQF_PM))
rq->q->nr_pending--;
@@ -30,7 +30,7 @@ static inline void blk_pm_requeue_request(struct request *rq)
static inline void blk_pm_add_request(struct request_queue *q,
struct request *rq)
{
- lockdep_assert_held(q->queue_lock);
+ lockdep_assert_held(&q->queue_lock);
if (q->dev && !(rq->rq_flags & RQF_PM))
q->nr_pending++;
@@ -38,7 +38,7 @@ static inline void blk_pm_add_request(struct request_queue *q,
static inline void blk_pm_put_request(struct request *rq)
{
- lockdep_assert_held(rq->q->queue_lock);
+ lockdep_assert_held(&rq->q->queue_lock);
if (rq->q->dev && !(rq->rq_flags & RQF_PM))
--rq->q->nr_pending;
diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c
index 0005dfd568dd..d169d7188fa6 100644
--- a/block/blk-rq-qos.c
+++ b/block/blk-rq-qos.c
@@ -27,75 +27,67 @@ bool rq_wait_inc_below(struct rq_wait *rq_wait, unsigned int limit)
return atomic_inc_below(&rq_wait->inflight, limit);
}
-void rq_qos_cleanup(struct request_queue *q, struct bio *bio)
+void __rq_qos_cleanup(struct rq_qos *rqos, struct bio *bio)
{
- struct rq_qos *rqos;
-
- for (rqos = q->rq_qos; rqos; rqos = rqos->next) {
+ do {
if (rqos->ops->cleanup)
rqos->ops->cleanup(rqos, bio);
- }
+ rqos = rqos->next;
+ } while (rqos);
}
-void rq_qos_done(struct request_queue *q, struct request *rq)
+void __rq_qos_done(struct rq_qos *rqos, struct request *rq)
{
- struct rq_qos *rqos;
-
- for (rqos = q->rq_qos; rqos; rqos = rqos->next) {
+ do {
if (rqos->ops->done)
rqos->ops->done(rqos, rq);
- }
+ rqos = rqos->next;
+ } while (rqos);
}
-void rq_qos_issue(struct request_queue *q, struct request *rq)
+void __rq_qos_issue(struct rq_qos *rqos, struct request *rq)
{
- struct rq_qos *rqos;
-
- for(rqos = q->rq_qos; rqos; rqos = rqos->next) {
+ do {
if (rqos->ops->issue)
rqos->ops->issue(rqos, rq);
- }
+ rqos = rqos->next;
+ } while (rqos);
}
-void rq_qos_requeue(struct request_queue *q, struct request *rq)
+void __rq_qos_requeue(struct rq_qos *rqos, struct request *rq)
{
- struct rq_qos *rqos;
-
- for(rqos = q->rq_qos; rqos; rqos = rqos->next) {
+ do {
if (rqos->ops->requeue)
rqos->ops->requeue(rqos, rq);
- }
+ rqos = rqos->next;
+ } while (rqos);
}
-void rq_qos_throttle(struct request_queue *q, struct bio *bio,
- spinlock_t *lock)
+void __rq_qos_throttle(struct rq_qos *rqos, struct bio *bio)
{
- struct rq_qos *rqos;
-
- for(rqos = q->rq_qos; rqos; rqos = rqos->next) {
+ do {
if (rqos->ops->throttle)
- rqos->ops->throttle(rqos, bio, lock);
- }
+ rqos->ops->throttle(rqos, bio);
+ rqos = rqos->next;
+ } while (rqos);
}
-void rq_qos_track(struct request_queue *q, struct request *rq, struct bio *bio)
+void __rq_qos_track(struct rq_qos *rqos, struct request *rq, struct bio *bio)
{
- struct rq_qos *rqos;
-
- for(rqos = q->rq_qos; rqos; rqos = rqos->next) {
+ do {
if (rqos->ops->track)
rqos->ops->track(rqos, rq, bio);
- }
+ rqos = rqos->next;
+ } while (rqos);
}
-void rq_qos_done_bio(struct request_queue *q, struct bio *bio)
+void __rq_qos_done_bio(struct rq_qos *rqos, struct bio *bio)
{
- struct rq_qos *rqos;
-
- for(rqos = q->rq_qos; rqos; rqos = rqos->next) {
+ do {
if (rqos->ops->done_bio)
rqos->ops->done_bio(rqos, bio);
- }
+ rqos = rqos->next;
+ } while (rqos);
}
/*
@@ -184,8 +176,96 @@ void rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle)
rq_depth_calc_max_depth(rqd);
}
+struct rq_qos_wait_data {
+ struct wait_queue_entry wq;
+ struct task_struct *task;
+ struct rq_wait *rqw;
+ acquire_inflight_cb_t *cb;
+ void *private_data;
+ bool got_token;
+};
+
+static int rq_qos_wake_function(struct wait_queue_entry *curr,
+ unsigned int mode, int wake_flags, void *key)
+{
+ struct rq_qos_wait_data *data = container_of(curr,
+ struct rq_qos_wait_data,
+ wq);
+
+ /*
+ * If we fail to get a budget, return -1 to interrupt the wake up loop
+ * in __wake_up_common.
+ */
+ if (!data->cb(data->rqw, data->private_data))
+ return -1;
+
+ data->got_token = true;
+ list_del_init(&curr->entry);
+ wake_up_process(data->task);
+ return 1;
+}
+
+/**
+ * rq_qos_wait - throttle on a rqw if we need to
+ * @private_data - caller provided specific data
+ * @acquire_inflight_cb - inc the rqw->inflight counter if we can
+ * @cleanup_cb - the callback to cleanup in case we race with a waker
+ *
+ * This provides a uniform place for the rq_qos users to do their throttling.
+ * Since you can end up with a lot of things sleeping at once, this manages the
+ * waking up based on the resources available. The acquire_inflight_cb should
+ * inc the rqw->inflight if we have the ability to do so, or return false if not
+ * and then we will sleep until the room becomes available.
+ *
+ * cleanup_cb is in case that we race with a waker and need to cleanup the
+ * inflight count accordingly.
+ */
+void rq_qos_wait(struct rq_wait *rqw, void *private_data,
+ acquire_inflight_cb_t *acquire_inflight_cb,
+ cleanup_cb_t *cleanup_cb)
+{
+ struct rq_qos_wait_data data = {
+ .wq = {
+ .func = rq_qos_wake_function,
+ .entry = LIST_HEAD_INIT(data.wq.entry),
+ },
+ .task = current,
+ .rqw = rqw,
+ .cb = acquire_inflight_cb,
+ .private_data = private_data,
+ };
+ bool has_sleeper;
+
+ has_sleeper = wq_has_sleeper(&rqw->wait);
+ if (!has_sleeper && acquire_inflight_cb(rqw, private_data))
+ return;
+
+ prepare_to_wait_exclusive(&rqw->wait, &data.wq, TASK_UNINTERRUPTIBLE);
+ do {
+ if (data.got_token)
+ break;
+ if (!has_sleeper && acquire_inflight_cb(rqw, private_data)) {
+ finish_wait(&rqw->wait, &data.wq);
+
+ /*
+ * We raced with wbt_wake_function() getting a token,
+ * which means we now have two. Put our local token
+ * and wake anyone else potentially waiting for one.
+ */
+ if (data.got_token)
+ cleanup_cb(rqw, private_data);
+ break;
+ }
+ io_schedule();
+ has_sleeper = false;
+ } while (1);
+ finish_wait(&rqw->wait, &data.wq);
+}
+
void rq_qos_exit(struct request_queue *q)
{
+ blk_mq_debugfs_unregister_queue_rqos(q);
+
while (q->rq_qos) {
struct rq_qos *rqos = q->rq_qos;
q->rq_qos = rqos->next;
diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h
index 32b02efbfa66..564851889550 100644
--- a/block/blk-rq-qos.h
+++ b/block/blk-rq-qos.h
@@ -7,6 +7,10 @@
#include <linux/atomic.h>
#include <linux/wait.h>
+#include "blk-mq-debugfs.h"
+
+struct blk_mq_debugfs_attr;
+
enum rq_qos_id {
RQ_QOS_WBT,
RQ_QOS_CGROUP,
@@ -22,10 +26,13 @@ struct rq_qos {
struct request_queue *q;
enum rq_qos_id id;
struct rq_qos *next;
+#ifdef CONFIG_BLK_DEBUG_FS
+ struct dentry *debugfs_dir;
+#endif
};
struct rq_qos_ops {
- void (*throttle)(struct rq_qos *, struct bio *, spinlock_t *);
+ void (*throttle)(struct rq_qos *, struct bio *);
void (*track)(struct rq_qos *, struct request *, struct bio *);
void (*issue)(struct rq_qos *, struct request *);
void (*requeue)(struct rq_qos *, struct request *);
@@ -33,6 +40,7 @@ struct rq_qos_ops {
void (*done_bio)(struct rq_qos *, struct bio *);
void (*cleanup)(struct rq_qos *, struct bio *);
void (*exit)(struct rq_qos *);
+ const struct blk_mq_debugfs_attr *debugfs_attrs;
};
struct rq_depth {
@@ -66,6 +74,17 @@ static inline struct rq_qos *blkcg_rq_qos(struct request_queue *q)
return rq_qos_id(q, RQ_QOS_CGROUP);
}
+static inline const char *rq_qos_id_to_name(enum rq_qos_id id)
+{
+ switch (id) {
+ case RQ_QOS_WBT:
+ return "wbt";
+ case RQ_QOS_CGROUP:
+ return "cgroup";
+ }
+ return "unknown";
+}
+
static inline void rq_wait_init(struct rq_wait *rq_wait)
{
atomic_set(&rq_wait->inflight, 0);
@@ -76,6 +95,9 @@ static inline void rq_qos_add(struct request_queue *q, struct rq_qos *rqos)
{
rqos->next = q->rq_qos;
q->rq_qos = rqos;
+
+ if (rqos->ops->debugfs_attrs)
+ blk_mq_debugfs_register_rqos(rqos);
}
static inline void rq_qos_del(struct request_queue *q, struct rq_qos *rqos)
@@ -91,19 +113,77 @@ static inline void rq_qos_del(struct request_queue *q, struct rq_qos *rqos)
}
prev = cur;
}
+
+ blk_mq_debugfs_unregister_rqos(rqos);
}
+typedef bool (acquire_inflight_cb_t)(struct rq_wait *rqw, void *private_data);
+typedef void (cleanup_cb_t)(struct rq_wait *rqw, void *private_data);
+
+void rq_qos_wait(struct rq_wait *rqw, void *private_data,
+ acquire_inflight_cb_t *acquire_inflight_cb,
+ cleanup_cb_t *cleanup_cb);
bool rq_wait_inc_below(struct rq_wait *rq_wait, unsigned int limit);
void rq_depth_scale_up(struct rq_depth *rqd);
void rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle);
bool rq_depth_calc_max_depth(struct rq_depth *rqd);
-void rq_qos_cleanup(struct request_queue *, struct bio *);
-void rq_qos_done(struct request_queue *, struct request *);
-void rq_qos_issue(struct request_queue *, struct request *);
-void rq_qos_requeue(struct request_queue *, struct request *);
-void rq_qos_done_bio(struct request_queue *q, struct bio *bio);
-void rq_qos_throttle(struct request_queue *, struct bio *, spinlock_t *);
-void rq_qos_track(struct request_queue *q, struct request *, struct bio *);
+void __rq_qos_cleanup(struct rq_qos *rqos, struct bio *bio);
+void __rq_qos_done(struct rq_qos *rqos, struct request *rq);
+void __rq_qos_issue(struct rq_qos *rqos, struct request *rq);
+void __rq_qos_requeue(struct rq_qos *rqos, struct request *rq);
+void __rq_qos_throttle(struct rq_qos *rqos, struct bio *bio);
+void __rq_qos_track(struct rq_qos *rqos, struct request *rq, struct bio *bio);
+void __rq_qos_done_bio(struct rq_qos *rqos, struct bio *bio);
+
+static inline void rq_qos_cleanup(struct request_queue *q, struct bio *bio)
+{
+ if (q->rq_qos)
+ __rq_qos_cleanup(q->rq_qos, bio);
+}
+
+static inline void rq_qos_done(struct request_queue *q, struct request *rq)
+{
+ if (q->rq_qos)
+ __rq_qos_done(q->rq_qos, rq);
+}
+
+static inline void rq_qos_issue(struct request_queue *q, struct request *rq)
+{
+ if (q->rq_qos)
+ __rq_qos_issue(q->rq_qos, rq);
+}
+
+static inline void rq_qos_requeue(struct request_queue *q, struct request *rq)
+{
+ if (q->rq_qos)
+ __rq_qos_requeue(q->rq_qos, rq);
+}
+
+static inline void rq_qos_done_bio(struct request_queue *q, struct bio *bio)
+{
+ if (q->rq_qos)
+ __rq_qos_done_bio(q->rq_qos, bio);
+}
+
+static inline void rq_qos_throttle(struct request_queue *q, struct bio *bio)
+{
+ /*
+ * BIO_TRACKED lets controllers know that a bio went through the
+ * normal rq_qos path.
+ */
+ bio_set_flag(bio, BIO_TRACKED);
+ if (q->rq_qos)
+ __rq_qos_throttle(q->rq_qos, bio);
+}
+
+static inline void rq_qos_track(struct request_queue *q, struct request *rq,
+ struct bio *bio)
+{
+ if (q->rq_qos)
+ __rq_qos_track(q->rq_qos, rq, bio);
+}
+
void rq_qos_exit(struct request_queue *);
+
#endif
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 696c04c1ab6c..3e7038e475ee 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -20,65 +20,12 @@ EXPORT_SYMBOL(blk_max_low_pfn);
unsigned long blk_max_pfn;
-/**
- * blk_queue_prep_rq - set a prepare_request function for queue
- * @q: queue
- * @pfn: prepare_request function
- *
- * It's possible for a queue to register a prepare_request callback which
- * is invoked before the request is handed to the request_fn. The goal of
- * the function is to prepare a request for I/O, it can be used to build a
- * cdb from the request data for instance.
- *
- */
-void blk_queue_prep_rq(struct request_queue *q, prep_rq_fn *pfn)
-{
- q->prep_rq_fn = pfn;
-}
-EXPORT_SYMBOL(blk_queue_prep_rq);
-
-/**
- * blk_queue_unprep_rq - set an unprepare_request function for queue
- * @q: queue
- * @ufn: unprepare_request function
- *
- * It's possible for a queue to register an unprepare_request callback
- * which is invoked before the request is finally completed. The goal
- * of the function is to deallocate any data that was allocated in the
- * prepare_request callback.
- *
- */
-void blk_queue_unprep_rq(struct request_queue *q, unprep_rq_fn *ufn)
-{
- q->unprep_rq_fn = ufn;
-}
-EXPORT_SYMBOL(blk_queue_unprep_rq);
-
-void blk_queue_softirq_done(struct request_queue *q, softirq_done_fn *fn)
-{
- q->softirq_done_fn = fn;
-}
-EXPORT_SYMBOL(blk_queue_softirq_done);
-
void blk_queue_rq_timeout(struct request_queue *q, unsigned int timeout)
{
q->rq_timeout = timeout;
}
EXPORT_SYMBOL_GPL(blk_queue_rq_timeout);
-void blk_queue_rq_timed_out(struct request_queue *q, rq_timed_out_fn *fn)
-{
- WARN_ON_ONCE(q->mq_ops);
- q->rq_timed_out_fn = fn;
-}
-EXPORT_SYMBOL_GPL(blk_queue_rq_timed_out);
-
-void blk_queue_lld_busy(struct request_queue *q, lld_busy_fn *fn)
-{
- q->lld_busy_fn = fn;
-}
-EXPORT_SYMBOL_GPL(blk_queue_lld_busy);
-
/**
* blk_set_default_limits - reset limits to default values
* @lim: the queue_limits structure to reset
@@ -109,7 +56,6 @@ void blk_set_default_limits(struct queue_limits *lim)
lim->alignment_offset = 0;
lim->io_opt = 0;
lim->misaligned = 0;
- lim->cluster = 1;
lim->zoned = BLK_ZONED_NONE;
}
EXPORT_SYMBOL(blk_set_default_limits);
@@ -169,8 +115,6 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)
q->make_request_fn = mfn;
blk_queue_dma_alignment(q, 511);
- blk_queue_congestion_threshold(q);
- q->nr_batching = BLK_BATCH_REQ;
blk_set_default_limits(&q->limits);
}
@@ -602,8 +546,6 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
t->io_min = max(t->io_min, b->io_min);
t->io_opt = lcm_not_zero(t->io_opt, b->io_opt);
- t->cluster &= b->cluster;
-
/* Physical block size a multiple of the logical block size? */
if (t->physical_block_size & (t->logical_block_size - 1)) {
t->physical_block_size = t->logical_block_size;
@@ -889,16 +831,14 @@ EXPORT_SYMBOL(blk_set_queue_depth);
*/
void blk_queue_write_cache(struct request_queue *q, bool wc, bool fua)
{
- spin_lock_irq(q->queue_lock);
if (wc)
- queue_flag_set(QUEUE_FLAG_WC, q);
+ blk_queue_flag_set(QUEUE_FLAG_WC, q);
else
- queue_flag_clear(QUEUE_FLAG_WC, q);
+ blk_queue_flag_clear(QUEUE_FLAG_WC, q);
if (fua)
- queue_flag_set(QUEUE_FLAG_FUA, q);
+ blk_queue_flag_set(QUEUE_FLAG_FUA, q);
else
- queue_flag_clear(QUEUE_FLAG_FUA, q);
- spin_unlock_irq(q->queue_lock);
+ blk_queue_flag_clear(QUEUE_FLAG_FUA, q);
wbt_set_write_cache(q, test_bit(QUEUE_FLAG_WC, &q->queue_flags));
}
diff --git a/block/blk-softirq.c b/block/blk-softirq.c
index e47a2f751884..457d9ba3eb20 100644
--- a/block/blk-softirq.c
+++ b/block/blk-softirq.c
@@ -34,7 +34,7 @@ static __latent_entropy void blk_done_softirq(struct softirq_action *h)
rq = list_entry(local_list.next, struct request, ipi_list);
list_del_init(&rq->ipi_list);
- rq->q->softirq_done_fn(rq);
+ rq->q->mq_ops->complete(rq);
}
}
@@ -98,11 +98,11 @@ static int blk_softirq_cpu_dead(unsigned int cpu)
void __blk_complete_request(struct request *req)
{
struct request_queue *q = req->q;
- int cpu, ccpu = q->mq_ops ? req->mq_ctx->cpu : req->cpu;
+ int cpu, ccpu = req->mq_ctx->cpu;
unsigned long flags;
bool shared = false;
- BUG_ON(!q->softirq_done_fn);
+ BUG_ON(!q->mq_ops->complete);
local_irq_save(flags);
cpu = smp_processor_id();
@@ -143,27 +143,6 @@ do_local:
local_irq_restore(flags);
}
-EXPORT_SYMBOL(__blk_complete_request);
-
-/**
- * blk_complete_request - end I/O on a request
- * @req: the request being processed
- *
- * Description:
- * Ends all I/O on a request. It does not handle partial completions,
- * unless the driver actually implements this in its completion callback
- * through requeueing. The actual completion happens out-of-order,
- * through a softirq handler. The user must have registered a completion
- * callback through blk_queue_softirq_done().
- **/
-void blk_complete_request(struct request *req)
-{
- if (unlikely(blk_should_fake_timeout(req->q)))
- return;
- if (!blk_mark_rq_complete(req))
- __blk_complete_request(req);
-}
-EXPORT_SYMBOL(blk_complete_request);
static __init int blk_softirq_init(void)
{
diff --git a/block/blk-stat.c b/block/blk-stat.c
index 90561af85a62..696a04176e4d 100644
--- a/block/blk-stat.c
+++ b/block/blk-stat.c
@@ -130,7 +130,6 @@ blk_stat_alloc_callback(void (*timer_fn)(struct blk_stat_callback *),
return cb;
}
-EXPORT_SYMBOL_GPL(blk_stat_alloc_callback);
void blk_stat_add_callback(struct request_queue *q,
struct blk_stat_callback *cb)
@@ -151,7 +150,6 @@ void blk_stat_add_callback(struct request_queue *q,
blk_queue_flag_set(QUEUE_FLAG_STATS, q);
spin_unlock(&q->stats->lock);
}
-EXPORT_SYMBOL_GPL(blk_stat_add_callback);
void blk_stat_remove_callback(struct request_queue *q,
struct blk_stat_callback *cb)
@@ -164,7 +162,6 @@ void blk_stat_remove_callback(struct request_queue *q,
del_timer_sync(&cb->timer);
}
-EXPORT_SYMBOL_GPL(blk_stat_remove_callback);
static void blk_stat_free_callback_rcu(struct rcu_head *head)
{
@@ -181,7 +178,6 @@ void blk_stat_free_callback(struct blk_stat_callback *cb)
if (cb)
call_rcu(&cb->rcu, blk_stat_free_callback_rcu);
}
-EXPORT_SYMBOL_GPL(blk_stat_free_callback);
void blk_stat_enable_accounting(struct request_queue *q)
{
diff --git a/block/blk-stat.h b/block/blk-stat.h
index f4a1568e81a4..17b47a86eefb 100644
--- a/block/blk-stat.h
+++ b/block/blk-stat.h
@@ -145,6 +145,11 @@ static inline void blk_stat_activate_nsecs(struct blk_stat_callback *cb,
mod_timer(&cb->timer, jiffies + nsecs_to_jiffies(nsecs));
}
+static inline void blk_stat_deactivate(struct blk_stat_callback *cb)
+{
+ del_timer_sync(&cb->timer);
+}
+
/**
* blk_stat_activate_msecs() - Gather block statistics during a time window in
* milliseconds.
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 844a454a7b3a..590d1ef2f961 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -68,7 +68,7 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count)
unsigned long nr;
int ret, err;
- if (!q->request_fn && !q->mq_ops)
+ if (!queue_is_mq(q))
return -EINVAL;
ret = queue_var_store(&nr, page, count);
@@ -78,11 +78,7 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count)
if (nr < BLKDEV_MIN_RQ)
nr = BLKDEV_MIN_RQ;
- if (q->request_fn)
- err = blk_update_nr_requests(q, nr);
- else
- err = blk_mq_update_nr_requests(q, nr);
-
+ err = blk_mq_update_nr_requests(q, nr);
if (err)
return err;
@@ -136,10 +132,7 @@ static ssize_t queue_max_integrity_segments_show(struct request_queue *q, char *
static ssize_t queue_max_segment_size_show(struct request_queue *q, char *page)
{
- if (blk_queue_cluster(q))
- return queue_var_show(queue_max_segment_size(q), (page));
-
- return queue_var_show(PAGE_SIZE, (page));
+ return queue_var_show(queue_max_segment_size(q), (page));
}
static ssize_t queue_logical_block_size_show(struct request_queue *q, char *page)
@@ -242,10 +235,10 @@ queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
if (max_sectors_kb > max_hw_sectors_kb || max_sectors_kb < page_kb)
return -EINVAL;
- spin_lock_irq(q->queue_lock);
+ spin_lock_irq(&q->queue_lock);
q->limits.max_sectors = max_sectors_kb << 1;
q->backing_dev_info->io_pages = max_sectors_kb >> (PAGE_SHIFT - 10);
- spin_unlock_irq(q->queue_lock);
+ spin_unlock_irq(&q->queue_lock);
return ret;
}
@@ -320,14 +313,12 @@ static ssize_t queue_nomerges_store(struct request_queue *q, const char *page,
if (ret < 0)
return ret;
- spin_lock_irq(q->queue_lock);
- queue_flag_clear(QUEUE_FLAG_NOMERGES, q);
- queue_flag_clear(QUEUE_FLAG_NOXMERGES, q);
+ blk_queue_flag_clear(QUEUE_FLAG_NOMERGES, q);
+ blk_queue_flag_clear(QUEUE_FLAG_NOXMERGES, q);
if (nm == 2)
- queue_flag_set(QUEUE_FLAG_NOMERGES, q);
+ blk_queue_flag_set(QUEUE_FLAG_NOMERGES, q);
else if (nm)
- queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
- spin_unlock_irq(q->queue_lock);
+ blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
return ret;
}
@@ -351,18 +342,16 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count)
if (ret < 0)
return ret;
- spin_lock_irq(q->queue_lock);
if (val == 2) {
- queue_flag_set(QUEUE_FLAG_SAME_COMP, q);
- queue_flag_set(QUEUE_FLAG_SAME_FORCE, q);
+ blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, q);
+ blk_queue_flag_set(QUEUE_FLAG_SAME_FORCE, q);
} else if (val == 1) {
- queue_flag_set(QUEUE_FLAG_SAME_COMP, q);
- queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q);
+ blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, q);
+ blk_queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q);
} else if (val == 0) {
- queue_flag_clear(QUEUE_FLAG_SAME_COMP, q);
- queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q);
+ blk_queue_flag_clear(QUEUE_FLAG_SAME_COMP, q);
+ blk_queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q);
}
- spin_unlock_irq(q->queue_lock);
#endif
return ret;
}
@@ -410,7 +399,8 @@ static ssize_t queue_poll_store(struct request_queue *q, const char *page,
unsigned long poll_on;
ssize_t ret;
- if (!q->mq_ops || !q->mq_ops->poll)
+ if (!q->tag_set || q->tag_set->nr_maps <= HCTX_TYPE_POLL ||
+ !q->tag_set->map[HCTX_TYPE_POLL].nr_queues)
return -EINVAL;
ret = queue_var_store(&poll_on, page, count);
@@ -425,6 +415,26 @@ static ssize_t queue_poll_store(struct request_queue *q, const char *page,
return ret;
}
+static ssize_t queue_io_timeout_show(struct request_queue *q, char *page)
+{
+ return sprintf(page, "%u\n", jiffies_to_msecs(q->rq_timeout));
+}
+
+static ssize_t queue_io_timeout_store(struct request_queue *q, const char *page,
+ size_t count)
+{
+ unsigned int val;
+ int err;
+
+ err = kstrtou32(page, 10, &val);
+ if (err || val == 0)
+ return -EINVAL;
+
+ blk_queue_rq_timeout(q, msecs_to_jiffies(val));
+
+ return count;
+}
+
static ssize_t queue_wb_lat_show(struct request_queue *q, char *page)
{
if (!wbt_rq_qos(q))
@@ -463,20 +473,14 @@ static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page,
* ends up either enabling or disabling wbt completely. We can't
* have IO inflight if that happens.
*/
- if (q->mq_ops) {
- blk_mq_freeze_queue(q);
- blk_mq_quiesce_queue(q);
- } else
- blk_queue_bypass_start(q);
+ blk_mq_freeze_queue(q);
+ blk_mq_quiesce_queue(q);
wbt_set_min_lat(q, val);
wbt_update_limits(q);
- if (q->mq_ops) {
- blk_mq_unquiesce_queue(q);
- blk_mq_unfreeze_queue(q);
- } else
- blk_queue_bypass_end(q);
+ blk_mq_unquiesce_queue(q);
+ blk_mq_unfreeze_queue(q);
return count;
}
@@ -699,6 +703,12 @@ static struct queue_sysfs_entry queue_dax_entry = {
.show = queue_dax_show,
};
+static struct queue_sysfs_entry queue_io_timeout_entry = {
+ .attr = {.name = "io_timeout", .mode = 0644 },
+ .show = queue_io_timeout_show,
+ .store = queue_io_timeout_store,
+};
+
static struct queue_sysfs_entry queue_wb_lat_entry = {
.attr = {.name = "wbt_lat_usec", .mode = 0644 },
.show = queue_wb_lat_show,
@@ -748,6 +758,7 @@ static struct attribute *default_attrs[] = {
&queue_dax_entry.attr,
&queue_wb_lat_entry.attr,
&queue_poll_delay_entry.attr,
+ &queue_io_timeout_entry.attr,
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
&throtl_sample_time_entry.attr,
#endif
@@ -847,24 +858,14 @@ static void __blk_release_queue(struct work_struct *work)
blk_free_queue_stats(q->stats);
- blk_exit_rl(q, &q->root_rl);
-
- if (q->queue_tags)
- __blk_queue_free_tags(q);
-
blk_queue_free_zone_bitmaps(q);
- if (!q->mq_ops) {
- if (q->exit_rq_fn)
- q->exit_rq_fn(q, q->fq->flush_rq);
- blk_free_flush_queue(q->fq);
- } else {
+ if (queue_is_mq(q))
blk_mq_release(q);
- }
blk_trace_shutdown(q);
- if (q->mq_ops)
+ if (queue_is_mq(q))
blk_mq_debugfs_unregister(q);
bioset_exit(&q->bio_split);
@@ -909,7 +910,7 @@ int blk_register_queue(struct gendisk *disk)
WARN_ONCE(test_bit(QUEUE_FLAG_REGISTERED, &q->queue_flags),
"%s is registering an already registered queue\n",
kobject_name(&dev->kobj));
- queue_flag_set_unlocked(QUEUE_FLAG_REGISTERED, q);
+ blk_queue_flag_set(QUEUE_FLAG_REGISTERED, q);
/*
* SCSI probing may synchronously create and destroy a lot of
@@ -921,9 +922,8 @@ int blk_register_queue(struct gendisk *disk)
* request_queues for non-existent devices never get registered.
*/
if (!blk_queue_init_done(q)) {
- queue_flag_set_unlocked(QUEUE_FLAG_INIT_DONE, q);
+ blk_queue_flag_set(QUEUE_FLAG_INIT_DONE, q);
percpu_ref_switch_to_percpu(&q->q_usage_counter);
- blk_queue_bypass_end(q);
}
ret = blk_trace_init_sysfs(dev);
@@ -939,7 +939,7 @@ int blk_register_queue(struct gendisk *disk)
goto unlock;
}
- if (q->mq_ops) {
+ if (queue_is_mq(q)) {
__blk_mq_register_dev(dev, q);
blk_mq_debugfs_register(q);
}
@@ -950,7 +950,7 @@ int blk_register_queue(struct gendisk *disk)
blk_throtl_register_queue(q);
- if (q->request_fn || (q->mq_ops && q->elevator)) {
+ if (q->elevator) {
ret = elv_register_queue(q);
if (ret) {
mutex_unlock(&q->sysfs_lock);
@@ -999,7 +999,7 @@ void blk_unregister_queue(struct gendisk *disk)
* Remove the sysfs attributes before unregistering the queue data
* structures that can be modified through sysfs.
*/
- if (q->mq_ops)
+ if (queue_is_mq(q))
blk_mq_unregister_dev(disk_to_dev(disk), q);
mutex_unlock(&q->sysfs_lock);
@@ -1008,7 +1008,7 @@ void blk_unregister_queue(struct gendisk *disk)
blk_trace_remove_sysfs(disk_to_dev(disk));
mutex_lock(&q->sysfs_lock);
- if (q->request_fn || (q->mq_ops && q->elevator))
+ if (q->elevator)
elv_unregister_queue(q);
mutex_unlock(&q->sysfs_lock);
diff --git a/block/blk-tag.c b/block/blk-tag.c
deleted file mode 100644
index fbc153aef166..000000000000
--- a/block/blk-tag.c
+++ /dev/null
@@ -1,378 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Functions related to tagged command queuing
- */
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/bio.h>
-#include <linux/blkdev.h>
-#include <linux/slab.h>
-
-#include "blk.h"
-
-/**
- * blk_queue_find_tag - find a request by its tag and queue
- * @q: The request queue for the device
- * @tag: The tag of the request
- *
- * Notes:
- * Should be used when a device returns a tag and you want to match
- * it with a request.
- *
- * no locks need be held.
- **/
-struct request *blk_queue_find_tag(struct request_queue *q, int tag)
-{
- return blk_map_queue_find_tag(q->queue_tags, tag);
-}
-EXPORT_SYMBOL(blk_queue_find_tag);
-
-/**
- * blk_free_tags - release a given set of tag maintenance info
- * @bqt: the tag map to free
- *
- * Drop the reference count on @bqt and frees it when the last reference
- * is dropped.
- */
-void blk_free_tags(struct blk_queue_tag *bqt)
-{
- if (atomic_dec_and_test(&bqt->refcnt)) {
- BUG_ON(find_first_bit(bqt->tag_map, bqt->max_depth) <
- bqt->max_depth);
-
- kfree(bqt->tag_index);
- bqt->tag_index = NULL;
-
- kfree(bqt->tag_map);
- bqt->tag_map = NULL;
-
- kfree(bqt);
- }
-}
-EXPORT_SYMBOL(blk_free_tags);
-
-/**
- * __blk_queue_free_tags - release tag maintenance info
- * @q: the request queue for the device
- *
- * Notes:
- * blk_cleanup_queue() will take care of calling this function, if tagging
- * has been used. So there's no need to call this directly.
- **/
-void __blk_queue_free_tags(struct request_queue *q)
-{
- struct blk_queue_tag *bqt = q->queue_tags;
-
- if (!bqt)
- return;
-
- blk_free_tags(bqt);
-
- q->queue_tags = NULL;
- queue_flag_clear_unlocked(QUEUE_FLAG_QUEUED, q);
-}
-
-/**
- * blk_queue_free_tags - release tag maintenance info
- * @q: the request queue for the device
- *
- * Notes:
- * This is used to disable tagged queuing to a device, yet leave
- * queue in function.
- **/
-void blk_queue_free_tags(struct request_queue *q)
-{
- queue_flag_clear_unlocked(QUEUE_FLAG_QUEUED, q);
-}
-EXPORT_SYMBOL(blk_queue_free_tags);
-
-static int
-init_tag_map(struct request_queue *q, struct blk_queue_tag *tags, int depth)
-{
- struct request **tag_index;
- unsigned long *tag_map;
- int nr_ulongs;
-
- if (q && depth > q->nr_requests * 2) {
- depth = q->nr_requests * 2;
- printk(KERN_ERR "%s: adjusted depth to %d\n",
- __func__, depth);
- }
-
- tag_index = kcalloc(depth, sizeof(struct request *), GFP_ATOMIC);
- if (!tag_index)
- goto fail;
-
- nr_ulongs = ALIGN(depth, BITS_PER_LONG) / BITS_PER_LONG;
- tag_map = kcalloc(nr_ulongs, sizeof(unsigned long), GFP_ATOMIC);
- if (!tag_map)
- goto fail;
-
- tags->real_max_depth = depth;
- tags->max_depth = depth;
- tags->tag_index = tag_index;
- tags->tag_map = tag_map;
-
- return 0;
-fail:
- kfree(tag_index);
- return -ENOMEM;
-}
-
-static struct blk_queue_tag *__blk_queue_init_tags(struct request_queue *q,
- int depth, int alloc_policy)
-{
- struct blk_queue_tag *tags;
-
- tags = kmalloc(sizeof(struct blk_queue_tag), GFP_ATOMIC);
- if (!tags)
- goto fail;
-
- if (init_tag_map(q, tags, depth))
- goto fail;
-
- atomic_set(&tags->refcnt, 1);
- tags->alloc_policy = alloc_policy;
- tags->next_tag = 0;
- return tags;
-fail:
- kfree(tags);
- return NULL;
-}
-
-/**
- * blk_init_tags - initialize the tag info for an external tag map
- * @depth: the maximum queue depth supported
- * @alloc_policy: tag allocation policy
- **/
-struct blk_queue_tag *blk_init_tags(int depth, int alloc_policy)
-{
- return __blk_queue_init_tags(NULL, depth, alloc_policy);
-}
-EXPORT_SYMBOL(blk_init_tags);
-
-/**
- * blk_queue_init_tags - initialize the queue tag info
- * @q: the request queue for the device
- * @depth: the maximum queue depth supported
- * @tags: the tag to use
- * @alloc_policy: tag allocation policy
- *
- * Queue lock must be held here if the function is called to resize an
- * existing map.
- **/
-int blk_queue_init_tags(struct request_queue *q, int depth,
- struct blk_queue_tag *tags, int alloc_policy)
-{
- int rc;
-
- BUG_ON(tags && q->queue_tags && tags != q->queue_tags);
-
- if (!tags && !q->queue_tags) {
- tags = __blk_queue_init_tags(q, depth, alloc_policy);
-
- if (!tags)
- return -ENOMEM;
-
- } else if (q->queue_tags) {
- rc = blk_queue_resize_tags(q, depth);
- if (rc)
- return rc;
- queue_flag_set(QUEUE_FLAG_QUEUED, q);
- return 0;
- } else
- atomic_inc(&tags->refcnt);
-
- /*
- * assign it, all done
- */
- q->queue_tags = tags;
- queue_flag_set_unlocked(QUEUE_FLAG_QUEUED, q);
- return 0;
-}
-EXPORT_SYMBOL(blk_queue_init_tags);
-
-/**
- * blk_queue_resize_tags - change the queueing depth
- * @q: the request queue for the device
- * @new_depth: the new max command queueing depth
- *
- * Notes:
- * Must be called with the queue lock held.
- **/
-int blk_queue_resize_tags(struct request_queue *q, int new_depth)
-{
- struct blk_queue_tag *bqt = q->queue_tags;
- struct request **tag_index;
- unsigned long *tag_map;
- int max_depth, nr_ulongs;
-
- if (!bqt)
- return -ENXIO;
-
- /*
- * if we already have large enough real_max_depth. just
- * adjust max_depth. *NOTE* as requests with tag value
- * between new_depth and real_max_depth can be in-flight, tag
- * map can not be shrunk blindly here.
- */
- if (new_depth <= bqt->real_max_depth) {
- bqt->max_depth = new_depth;
- return 0;
- }
-
- /*
- * Currently cannot replace a shared tag map with a new
- * one, so error out if this is the case
- */
- if (atomic_read(&bqt->refcnt) != 1)
- return -EBUSY;
-
- /*
- * save the old state info, so we can copy it back
- */
- tag_index = bqt->tag_index;
- tag_map = bqt->tag_map;
- max_depth = bqt->real_max_depth;
-
- if (init_tag_map(q, bqt, new_depth))
- return -ENOMEM;
-
- memcpy(bqt->tag_index, tag_index, max_depth * sizeof(struct request *));
- nr_ulongs = ALIGN(max_depth, BITS_PER_LONG) / BITS_PER_LONG;
- memcpy(bqt->tag_map, tag_map, nr_ulongs * sizeof(unsigned long));
-
- kfree(tag_index);
- kfree(tag_map);
- return 0;
-}
-EXPORT_SYMBOL(blk_queue_resize_tags);
-
-/**
- * blk_queue_end_tag - end tag operations for a request
- * @q: the request queue for the device
- * @rq: the request that has completed
- *
- * Description:
- * Typically called when end_that_request_first() returns %0, meaning
- * all transfers have been done for a request. It's important to call
- * this function before end_that_request_last(), as that will put the
- * request back on the free list thus corrupting the internal tag list.
- **/
-void blk_queue_end_tag(struct request_queue *q, struct request *rq)
-{
- struct blk_queue_tag *bqt = q->queue_tags;
- unsigned tag = rq->tag; /* negative tags invalid */
-
- lockdep_assert_held(q->queue_lock);
-
- BUG_ON(tag >= bqt->real_max_depth);
-
- list_del_init(&rq->queuelist);
- rq->rq_flags &= ~RQF_QUEUED;
- rq->tag = -1;
- rq->internal_tag = -1;
-
- if (unlikely(bqt->tag_index[tag] == NULL))
- printk(KERN_ERR "%s: tag %d is missing\n",
- __func__, tag);
-
- bqt->tag_index[tag] = NULL;
-
- if (unlikely(!test_bit(tag, bqt->tag_map))) {
- printk(KERN_ERR "%s: attempt to clear non-busy tag (%d)\n",
- __func__, tag);
- return;
- }
- /*
- * The tag_map bit acts as a lock for tag_index[bit], so we need
- * unlock memory barrier semantics.
- */
- clear_bit_unlock(tag, bqt->tag_map);
-}
-
-/**
- * blk_queue_start_tag - find a free tag and assign it
- * @q: the request queue for the device
- * @rq: the block request that needs tagging
- *
- * Description:
- * This can either be used as a stand-alone helper, or possibly be
- * assigned as the queue &prep_rq_fn (in which case &struct request
- * automagically gets a tag assigned). Note that this function
- * assumes that any type of request can be queued! if this is not
- * true for your device, you must check the request type before
- * calling this function. The request will also be removed from
- * the request queue, so it's the drivers responsibility to readd
- * it if it should need to be restarted for some reason.
- **/
-int blk_queue_start_tag(struct request_queue *q, struct request *rq)
-{
- struct blk_queue_tag *bqt = q->queue_tags;
- unsigned max_depth;
- int tag;
-
- lockdep_assert_held(q->queue_lock);
-
- if (unlikely((rq->rq_flags & RQF_QUEUED))) {
- printk(KERN_ERR
- "%s: request %p for device [%s] already tagged %d",
- __func__, rq,
- rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->tag);
- BUG();
- }
-
- /*
- * Protect against shared tag maps, as we may not have exclusive
- * access to the tag map.
- *
- * We reserve a few tags just for sync IO, since we don't want
- * to starve sync IO on behalf of flooding async IO.
- */
- max_depth = bqt->max_depth;
- if (!rq_is_sync(rq) && max_depth > 1) {
- switch (max_depth) {
- case 2:
- max_depth = 1;
- break;
- case 3:
- max_depth = 2;
- break;
- default:
- max_depth -= 2;
- }
- if (q->in_flight[BLK_RW_ASYNC] > max_depth)
- return 1;
- }
-
- do {
- if (bqt->alloc_policy == BLK_TAG_ALLOC_FIFO) {
- tag = find_first_zero_bit(bqt->tag_map, max_depth);
- if (tag >= max_depth)
- return 1;
- } else {
- int start = bqt->next_tag;
- int size = min_t(int, bqt->max_depth, max_depth + start);
- tag = find_next_zero_bit(bqt->tag_map, size, start);
- if (tag >= size && start + size > bqt->max_depth) {
- size = start + size - bqt->max_depth;
- tag = find_first_zero_bit(bqt->tag_map, size);
- }
- if (tag >= size)
- return 1;
- }
-
- } while (test_and_set_bit_lock(tag, bqt->tag_map));
- /*
- * We need lock ordering semantics given by test_and_set_bit_lock.
- * See blk_queue_end_tag for details.
- */
-
- bqt->next_tag = (tag + 1) % bqt->max_depth;
- rq->rq_flags |= RQF_QUEUED;
- rq->tag = tag;
- bqt->tag_index[tag] = rq;
- blk_start_request(rq);
- return 0;
-}
-EXPORT_SYMBOL(blk_queue_start_tag);
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index db1a3a2ae006..1b97a73d2fb1 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -1243,7 +1243,7 @@ static void throtl_pending_timer_fn(struct timer_list *t)
bool dispatched;
int ret;
- spin_lock_irq(q->queue_lock);
+ spin_lock_irq(&q->queue_lock);
if (throtl_can_upgrade(td, NULL))
throtl_upgrade_state(td);
@@ -1266,9 +1266,9 @@ again:
break;
/* this dispatch windows is still open, relax and repeat */
- spin_unlock_irq(q->queue_lock);
+ spin_unlock_irq(&q->queue_lock);
cpu_relax();
- spin_lock_irq(q->queue_lock);
+ spin_lock_irq(&q->queue_lock);
}
if (!dispatched)
@@ -1290,7 +1290,7 @@ again:
queue_work(kthrotld_workqueue, &td->dispatch_work);
}
out_unlock:
- spin_unlock_irq(q->queue_lock);
+ spin_unlock_irq(&q->queue_lock);
}
/**
@@ -1314,11 +1314,11 @@ static void blk_throtl_dispatch_work_fn(struct work_struct *work)
bio_list_init(&bio_list_on_stack);
- spin_lock_irq(q->queue_lock);
+ spin_lock_irq(&q->queue_lock);
for (rw = READ; rw <= WRITE; rw++)
while ((bio = throtl_pop_queued(&td_sq->queued[rw], NULL)))
bio_list_add(&bio_list_on_stack, bio);
- spin_unlock_irq(q->queue_lock);
+ spin_unlock_irq(&q->queue_lock);
if (!bio_list_empty(&bio_list_on_stack)) {
blk_start_plug(&plug);
@@ -2115,16 +2115,6 @@ static inline void throtl_update_latency_buckets(struct throtl_data *td)
}
#endif
-static void blk_throtl_assoc_bio(struct throtl_grp *tg, struct bio *bio)
-{
-#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
- /* fallback to root_blkg if we fail to get a blkg ref */
- if (bio->bi_css && (bio_associate_blkg(bio, tg_to_blkg(tg)) == -ENODEV))
- bio_associate_blkg(bio, bio->bi_disk->queue->root_blkg);
- bio_issue_init(&bio->bi_issue, bio_sectors(bio));
-#endif
-}
-
bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
struct bio *bio)
{
@@ -2141,14 +2131,10 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
if (bio_flagged(bio, BIO_THROTTLED) || !tg->has_rules[rw])
goto out;
- spin_lock_irq(q->queue_lock);
+ spin_lock_irq(&q->queue_lock);
throtl_update_latency_buckets(td);
- if (unlikely(blk_queue_bypass(q)))
- goto out_unlock;
-
- blk_throtl_assoc_bio(tg, bio);
blk_throtl_update_idletime(tg);
sq = &tg->service_queue;
@@ -2227,7 +2213,7 @@ again:
}
out_unlock:
- spin_unlock_irq(q->queue_lock);
+ spin_unlock_irq(&q->queue_lock);
out:
bio_set_flag(bio, BIO_THROTTLED);
@@ -2348,7 +2334,7 @@ static void tg_drain_bios(struct throtl_service_queue *parent_sq)
* Dispatch all currently throttled bios on @q through ->make_request_fn().
*/
void blk_throtl_drain(struct request_queue *q)
- __releases(q->queue_lock) __acquires(q->queue_lock)
+ __releases(&q->queue_lock) __acquires(&q->queue_lock)
{
struct throtl_data *td = q->td;
struct blkcg_gq *blkg;
@@ -2356,7 +2342,6 @@ void blk_throtl_drain(struct request_queue *q)
struct bio *bio;
int rw;
- queue_lockdep_assert_held(q);
rcu_read_lock();
/*
@@ -2372,7 +2357,7 @@ void blk_throtl_drain(struct request_queue *q)
tg_drain_bios(&td->service_queue);
rcu_read_unlock();
- spin_unlock_irq(q->queue_lock);
+ spin_unlock_irq(&q->queue_lock);
/* all bios now should be in td->service_queue, issue them */
for (rw = READ; rw <= WRITE; rw++)
@@ -2380,7 +2365,7 @@ void blk_throtl_drain(struct request_queue *q)
NULL)))
generic_make_request(bio);
- spin_lock_irq(q->queue_lock);
+ spin_lock_irq(&q->queue_lock);
}
int blk_throtl_init(struct request_queue *q)
@@ -2460,7 +2445,7 @@ void blk_throtl_register_queue(struct request_queue *q)
td->throtl_slice = DFL_THROTL_SLICE_HD;
#endif
- td->track_bio_latency = !queue_is_rq_based(q);
+ td->track_bio_latency = !queue_is_mq(q);
if (!td->track_bio_latency)
blk_stat_enable_accounting(q);
}
diff --git a/block/blk-timeout.c b/block/blk-timeout.c
index f2cfd56e1606..124c26128bf6 100644
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -68,80 +68,6 @@ ssize_t part_timeout_store(struct device *dev, struct device_attribute *attr,
#endif /* CONFIG_FAIL_IO_TIMEOUT */
-/*
- * blk_delete_timer - Delete/cancel timer for a given function.
- * @req: request that we are canceling timer for
- *
- */
-void blk_delete_timer(struct request *req)
-{
- list_del_init(&req->timeout_list);
-}
-
-static void blk_rq_timed_out(struct request *req)
-{
- struct request_queue *q = req->q;
- enum blk_eh_timer_return ret = BLK_EH_RESET_TIMER;
-
- if (q->rq_timed_out_fn)
- ret = q->rq_timed_out_fn(req);
- switch (ret) {
- case BLK_EH_RESET_TIMER:
- blk_add_timer(req);
- blk_clear_rq_complete(req);
- break;
- case BLK_EH_DONE:
- /*
- * LLD handles this for now but in the future
- * we can send a request msg to abort the command
- * and we can move more of the generic scsi eh code to
- * the blk layer.
- */
- break;
- default:
- printk(KERN_ERR "block: bad eh return: %d\n", ret);
- break;
- }
-}
-
-static void blk_rq_check_expired(struct request *rq, unsigned long *next_timeout,
- unsigned int *next_set)
-{
- const unsigned long deadline = blk_rq_deadline(rq);
-
- if (time_after_eq(jiffies, deadline)) {
- list_del_init(&rq->timeout_list);
-
- /*
- * Check if we raced with end io completion
- */
- if (!blk_mark_rq_complete(rq))
- blk_rq_timed_out(rq);
- } else if (!*next_set || time_after(*next_timeout, deadline)) {
- *next_timeout = deadline;
- *next_set = 1;
- }
-}
-
-void blk_timeout_work(struct work_struct *work)
-{
- struct request_queue *q =
- container_of(work, struct request_queue, timeout_work);
- unsigned long flags, next = 0;
- struct request *rq, *tmp;
- int next_set = 0;
-
- spin_lock_irqsave(q->queue_lock, flags);
-
- list_for_each_entry_safe(rq, tmp, &q->timeout_list, timeout_list)
- blk_rq_check_expired(rq, &next, &next_set);
-
- if (next_set)
- mod_timer(&q->timeout, round_jiffies_up(next));
-
- spin_unlock_irqrestore(q->queue_lock, flags);
-}
-
/**
* blk_abort_request -- Request request recovery for the specified command
* @req: pointer to the request of interest
@@ -149,24 +75,17 @@ void blk_timeout_work(struct work_struct *work)
* This function requests that the block layer start recovery for the
* request by deleting the timer and calling the q's timeout function.
* LLDDs who implement their own error recovery MAY ignore the timeout
- * event if they generated blk_abort_req. Must hold queue lock.
+ * event if they generated blk_abort_request.
*/
void blk_abort_request(struct request *req)
{
- if (req->q->mq_ops) {
- /*
- * All we need to ensure is that timeout scan takes place
- * immediately and that scan sees the new timeout value.
- * No need for fancy synchronizations.
- */
- blk_rq_set_deadline(req, jiffies);
- kblockd_schedule_work(&req->q->timeout_work);
- } else {
- if (blk_mark_rq_complete(req))
- return;
- blk_delete_timer(req);
- blk_rq_timed_out(req);
- }
+ /*
+ * All we need to ensure is that timeout scan takes place
+ * immediately and that scan sees the new timeout value.
+ * No need for fancy synchronizations.
+ */
+ WRITE_ONCE(req->deadline, jiffies);
+ kblockd_schedule_work(&req->q->timeout_work);
}
EXPORT_SYMBOL_GPL(blk_abort_request);
@@ -194,15 +113,6 @@ void blk_add_timer(struct request *req)
struct request_queue *q = req->q;
unsigned long expiry;
- if (!q->mq_ops)
- lockdep_assert_held(q->queue_lock);
-
- /* blk-mq has its own handler, so we don't need ->rq_timed_out_fn */
- if (!q->mq_ops && !q->rq_timed_out_fn)
- return;
-
- BUG_ON(!list_empty(&req->timeout_list));
-
/*
* Some LLDs, like scsi, peek at the timeout to prevent a
* command from being retried forever.
@@ -211,21 +121,16 @@ void blk_add_timer(struct request *req)
req->timeout = q->rq_timeout;
req->rq_flags &= ~RQF_TIMED_OUT;
- blk_rq_set_deadline(req, jiffies + req->timeout);
- /*
- * Only the non-mq case needs to add the request to a protected list.
- * For the mq case we simply scan the tag map.
- */
- if (!q->mq_ops)
- list_add_tail(&req->timeout_list, &req->q->timeout_list);
+ expiry = jiffies + req->timeout;
+ WRITE_ONCE(req->deadline, expiry);
/*
* If the timer isn't already pending or this timeout is earlier
* than an existing one, modify the timer. Round up to next nearest
* second.
*/
- expiry = blk_rq_timeout(round_jiffies_up(blk_rq_deadline(req)));
+ expiry = blk_rq_timeout(round_jiffies_up(expiry));
if (!timer_pending(&q->timeout) ||
time_before(expiry, q->timeout.expires)) {
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index 8ac93fcbaa2e..f0c56649775f 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -489,31 +489,21 @@ static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw)
}
struct wbt_wait_data {
- struct wait_queue_entry wq;
- struct task_struct *task;
struct rq_wb *rwb;
- struct rq_wait *rqw;
+ enum wbt_flags wb_acct;
unsigned long rw;
- bool got_token;
};
-static int wbt_wake_function(struct wait_queue_entry *curr, unsigned int mode,
- int wake_flags, void *key)
+static bool wbt_inflight_cb(struct rq_wait *rqw, void *private_data)
{
- struct wbt_wait_data *data = container_of(curr, struct wbt_wait_data,
- wq);
-
- /*
- * If we fail to get a budget, return -1 to interrupt the wake up
- * loop in __wake_up_common.
- */
- if (!rq_wait_inc_below(data->rqw, get_limit(data->rwb, data->rw)))
- return -1;
+ struct wbt_wait_data *data = private_data;
+ return rq_wait_inc_below(rqw, get_limit(data->rwb, data->rw));
+}
- data->got_token = true;
- list_del_init(&curr->entry);
- wake_up_process(data->task);
- return 1;
+static void wbt_cleanup_cb(struct rq_wait *rqw, void *private_data)
+{
+ struct wbt_wait_data *data = private_data;
+ wbt_rqw_done(data->rwb, rqw, data->wb_acct);
}
/*
@@ -521,57 +511,16 @@ static int wbt_wake_function(struct wait_queue_entry *curr, unsigned int mode,
* the timer to kick off queuing again.
*/
static void __wbt_wait(struct rq_wb *rwb, enum wbt_flags wb_acct,
- unsigned long rw, spinlock_t *lock)
- __releases(lock)
- __acquires(lock)
+ unsigned long rw)
{
struct rq_wait *rqw = get_rq_wait(rwb, wb_acct);
struct wbt_wait_data data = {
- .wq = {
- .func = wbt_wake_function,
- .entry = LIST_HEAD_INIT(data.wq.entry),
- },
- .task = current,
.rwb = rwb,
- .rqw = rqw,
+ .wb_acct = wb_acct,
.rw = rw,
};
- bool has_sleeper;
-
- has_sleeper = wq_has_sleeper(&rqw->wait);
- if (!has_sleeper && rq_wait_inc_below(rqw, get_limit(rwb, rw)))
- return;
- prepare_to_wait_exclusive(&rqw->wait, &data.wq, TASK_UNINTERRUPTIBLE);
- do {
- if (data.got_token)
- break;
-
- if (!has_sleeper &&
- rq_wait_inc_below(rqw, get_limit(rwb, rw))) {
- finish_wait(&rqw->wait, &data.wq);
-
- /*
- * We raced with wbt_wake_function() getting a token,
- * which means we now have two. Put our local token
- * and wake anyone else potentially waiting for one.
- */
- if (data.got_token)
- wbt_rqw_done(rwb, rqw, wb_acct);
- break;
- }
-
- if (lock) {
- spin_unlock_irq(lock);
- io_schedule();
- spin_lock_irq(lock);
- } else
- io_schedule();
-
- has_sleeper = false;
- } while (1);
-
- finish_wait(&rqw->wait, &data.wq);
+ rq_qos_wait(rqw, &data, wbt_inflight_cb, wbt_cleanup_cb);
}
static inline bool wbt_should_throttle(struct rq_wb *rwb, struct bio *bio)
@@ -624,7 +573,7 @@ static void wbt_cleanup(struct rq_qos *rqos, struct bio *bio)
* in an irq held spinlock, if it holds one when calling this function.
* If we do sleep, we'll release and re-grab it.
*/
-static void wbt_wait(struct rq_qos *rqos, struct bio *bio, spinlock_t *lock)
+static void wbt_wait(struct rq_qos *rqos, struct bio *bio)
{
struct rq_wb *rwb = RQWB(rqos);
enum wbt_flags flags;
@@ -636,7 +585,7 @@ static void wbt_wait(struct rq_qos *rqos, struct bio *bio, spinlock_t *lock)
return;
}
- __wbt_wait(rwb, flags, bio->bi_opf, lock);
+ __wbt_wait(rwb, flags, bio->bi_opf);
if (!blk_stat_is_active(rwb->cb))
rwb_arm_timer(rwb);
@@ -709,8 +658,7 @@ void wbt_enable_default(struct request_queue *q)
if (!test_bit(QUEUE_FLAG_REGISTERED, &q->queue_flags))
return;
- if ((q->mq_ops && IS_ENABLED(CONFIG_BLK_WBT_MQ)) ||
- (q->request_fn && IS_ENABLED(CONFIG_BLK_WBT_SQ)))
+ if (queue_is_mq(q) && IS_ENABLED(CONFIG_BLK_WBT_MQ))
wbt_init(q);
}
EXPORT_SYMBOL_GPL(wbt_enable_default);
@@ -760,11 +708,100 @@ void wbt_disable_default(struct request_queue *q)
if (!rqos)
return;
rwb = RQWB(rqos);
- if (rwb->enable_state == WBT_STATE_ON_DEFAULT)
+ if (rwb->enable_state == WBT_STATE_ON_DEFAULT) {
+ blk_stat_deactivate(rwb->cb);
rwb->wb_normal = 0;
+ }
}
EXPORT_SYMBOL_GPL(wbt_disable_default);
+#ifdef CONFIG_BLK_DEBUG_FS
+static int wbt_curr_win_nsec_show(void *data, struct seq_file *m)
+{
+ struct rq_qos *rqos = data;
+ struct rq_wb *rwb = RQWB(rqos);
+
+ seq_printf(m, "%llu\n", rwb->cur_win_nsec);
+ return 0;
+}
+
+static int wbt_enabled_show(void *data, struct seq_file *m)
+{
+ struct rq_qos *rqos = data;
+ struct rq_wb *rwb = RQWB(rqos);
+
+ seq_printf(m, "%d\n", rwb->enable_state);
+ return 0;
+}
+
+static int wbt_id_show(void *data, struct seq_file *m)
+{
+ struct rq_qos *rqos = data;
+
+ seq_printf(m, "%u\n", rqos->id);
+ return 0;
+}
+
+static int wbt_inflight_show(void *data, struct seq_file *m)
+{
+ struct rq_qos *rqos = data;
+ struct rq_wb *rwb = RQWB(rqos);
+ int i;
+
+ for (i = 0; i < WBT_NUM_RWQ; i++)
+ seq_printf(m, "%d: inflight %d\n", i,
+ atomic_read(&rwb->rq_wait[i].inflight));
+ return 0;
+}
+
+static int wbt_min_lat_nsec_show(void *data, struct seq_file *m)
+{
+ struct rq_qos *rqos = data;
+ struct rq_wb *rwb = RQWB(rqos);
+
+ seq_printf(m, "%lu\n", rwb->min_lat_nsec);
+ return 0;
+}
+
+static int wbt_unknown_cnt_show(void *data, struct seq_file *m)
+{
+ struct rq_qos *rqos = data;
+ struct rq_wb *rwb = RQWB(rqos);
+
+ seq_printf(m, "%u\n", rwb->unknown_cnt);
+ return 0;
+}
+
+static int wbt_normal_show(void *data, struct seq_file *m)
+{
+ struct rq_qos *rqos = data;
+ struct rq_wb *rwb = RQWB(rqos);
+
+ seq_printf(m, "%u\n", rwb->wb_normal);
+ return 0;
+}
+
+static int wbt_background_show(void *data, struct seq_file *m)
+{
+ struct rq_qos *rqos = data;
+ struct rq_wb *rwb = RQWB(rqos);
+
+ seq_printf(m, "%u\n", rwb->wb_background);
+ return 0;
+}
+
+static const struct blk_mq_debugfs_attr wbt_debugfs_attrs[] = {
+ {"curr_win_nsec", 0400, wbt_curr_win_nsec_show},
+ {"enabled", 0400, wbt_enabled_show},
+ {"id", 0400, wbt_id_show},
+ {"inflight", 0400, wbt_inflight_show},
+ {"min_lat_nsec", 0400, wbt_min_lat_nsec_show},
+ {"unknown_cnt", 0400, wbt_unknown_cnt_show},
+ {"wb_normal", 0400, wbt_normal_show},
+ {"wb_background", 0400, wbt_background_show},
+ {},
+};
+#endif
static struct rq_qos_ops wbt_rqos_ops = {
.throttle = wbt_wait,
@@ -774,6 +811,9 @@ static struct rq_qos_ops wbt_rqos_ops = {
.done = wbt_done,
.cleanup = wbt_cleanup,
.exit = wbt_exit,
+#ifdef CONFIG_BLK_DEBUG_FS
+ .debugfs_attrs = wbt_debugfs_attrs,
+#endif
};
int wbt_init(struct request_queue *q)
diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index a327bef07642..2d98803faec2 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -421,7 +421,7 @@ int blk_revalidate_disk_zones(struct gendisk *disk)
* BIO based queues do not use a scheduler so only q->nr_zones
* needs to be updated so that the sysfs exposed value is correct.
*/
- if (!queue_is_rq_based(q)) {
+ if (!queue_is_mq(q)) {
q->nr_zones = nr_zones;
return 0;
}
diff --git a/block/blk.h b/block/blk.h
index 0089fefdf771..848278c52030 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -7,12 +7,6 @@
#include <xen/xen.h>
#include "blk-mq.h"
-/* Amount of time in which a process may batch requests */
-#define BLK_BATCH_TIME (HZ/50UL)
-
-/* Number of requests a "batching" process may submit */
-#define BLK_BATCH_REQ 32
-
/* Max future timer expiry for timeouts */
#define BLK_MAX_TIMEOUT (5 * HZ)
@@ -38,85 +32,13 @@ struct blk_flush_queue {
};
extern struct kmem_cache *blk_requestq_cachep;
-extern struct kmem_cache *request_cachep;
extern struct kobj_type blk_queue_ktype;
extern struct ida blk_queue_ida;
-/*
- * @q->queue_lock is set while a queue is being initialized. Since we know
- * that no other threads access the queue object before @q->queue_lock has
- * been set, it is safe to manipulate queue flags without holding the
- * queue_lock if @q->queue_lock == NULL. See also blk_alloc_queue_node() and
- * blk_init_allocated_queue().
- */
-static inline void queue_lockdep_assert_held(struct request_queue *q)
-{
- if (q->queue_lock)
- lockdep_assert_held(q->queue_lock);
-}
-
-static inline void queue_flag_set_unlocked(unsigned int flag,
- struct request_queue *q)
-{
- if (test_bit(QUEUE_FLAG_INIT_DONE, &q->queue_flags) &&
- kref_read(&q->kobj.kref))
- lockdep_assert_held(q->queue_lock);
- __set_bit(flag, &q->queue_flags);
-}
-
-static inline void queue_flag_clear_unlocked(unsigned int flag,
- struct request_queue *q)
-{
- if (test_bit(QUEUE_FLAG_INIT_DONE, &q->queue_flags) &&
- kref_read(&q->kobj.kref))
- lockdep_assert_held(q->queue_lock);
- __clear_bit(flag, &q->queue_flags);
-}
-
-static inline int queue_flag_test_and_clear(unsigned int flag,
- struct request_queue *q)
-{
- queue_lockdep_assert_held(q);
-
- if (test_bit(flag, &q->queue_flags)) {
- __clear_bit(flag, &q->queue_flags);
- return 1;
- }
-
- return 0;
-}
-
-static inline int queue_flag_test_and_set(unsigned int flag,
- struct request_queue *q)
-{
- queue_lockdep_assert_held(q);
-
- if (!test_bit(flag, &q->queue_flags)) {
- __set_bit(flag, &q->queue_flags);
- return 0;
- }
-
- return 1;
-}
-
-static inline void queue_flag_set(unsigned int flag, struct request_queue *q)
-{
- queue_lockdep_assert_held(q);
- __set_bit(flag, &q->queue_flags);
-}
-
-static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
-{
- queue_lockdep_assert_held(q);
- __clear_bit(flag, &q->queue_flags);
-}
-
-static inline struct blk_flush_queue *blk_get_flush_queue(
- struct request_queue *q, struct blk_mq_ctx *ctx)
+static inline struct blk_flush_queue *
+blk_get_flush_queue(struct request_queue *q, struct blk_mq_ctx *ctx)
{
- if (q->mq_ops)
- return blk_mq_map_queue(q, ctx->cpu)->fq;
- return q->fq;
+ return blk_mq_map_queue(q, REQ_OP_FLUSH, ctx->cpu)->fq;
}
static inline void __blk_get_queue(struct request_queue *q)
@@ -128,15 +50,9 @@ struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q,
int node, int cmd_size, gfp_t flags);
void blk_free_flush_queue(struct blk_flush_queue *q);
-int blk_init_rl(struct request_list *rl, struct request_queue *q,
- gfp_t gfp_mask);
-void blk_exit_rl(struct request_queue *q, struct request_list *rl);
void blk_exit_queue(struct request_queue *q);
void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
struct bio *bio);
-void blk_queue_bypass_start(struct request_queue *q);
-void blk_queue_bypass_end(struct request_queue *q);
-void __blk_queue_free_tags(struct request_queue *q);
void blk_freeze_queue(struct request_queue *q);
static inline void blk_queue_enter_live(struct request_queue *q)
@@ -235,11 +151,8 @@ static inline bool bio_integrity_endio(struct bio *bio)
}
#endif /* CONFIG_BLK_DEV_INTEGRITY */
-void blk_timeout_work(struct work_struct *work);
unsigned long blk_rq_timeout(unsigned long timeout);
void blk_add_timer(struct request *req);
-void blk_delete_timer(struct request *);
-
bool bio_attempt_front_merge(struct request_queue *q, struct request *req,
struct bio *bio);
@@ -248,58 +161,19 @@ bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
bool bio_attempt_discard_merge(struct request_queue *q, struct request *req,
struct bio *bio);
bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
- unsigned int *request_count,
struct request **same_queue_rq);
-unsigned int blk_plug_queued_count(struct request_queue *q);
void blk_account_io_start(struct request *req, bool new_io);
void blk_account_io_completion(struct request *req, unsigned int bytes);
void blk_account_io_done(struct request *req, u64 now);
/*
- * EH timer and IO completion will both attempt to 'grab' the request, make
- * sure that only one of them succeeds. Steal the bottom bit of the
- * __deadline field for this.
- */
-static inline int blk_mark_rq_complete(struct request *rq)
-{
- return test_and_set_bit(0, &rq->__deadline);
-}
-
-static inline void blk_clear_rq_complete(struct request *rq)
-{
- clear_bit(0, &rq->__deadline);
-}
-
-static inline bool blk_rq_is_complete(struct request *rq)
-{
- return test_bit(0, &rq->__deadline);
-}
-
-/*
* Internal elevator interface
*/
#define ELV_ON_HASH(rq) ((rq)->rq_flags & RQF_HASHED)
void blk_insert_flush(struct request *rq);
-static inline void elv_activate_rq(struct request_queue *q, struct request *rq)
-{
- struct elevator_queue *e = q->elevator;
-
- if (e->type->ops.sq.elevator_activate_req_fn)
- e->type->ops.sq.elevator_activate_req_fn(q, rq);
-}
-
-static inline void elv_deactivate_rq(struct request_queue *q, struct request *rq)
-{
- struct elevator_queue *e = q->elevator;
-
- if (e->type->ops.sq.elevator_deactivate_req_fn)
- e->type->ops.sq.elevator_deactivate_req_fn(q, rq);
-}
-
-int elevator_init(struct request_queue *);
int elevator_init_mq(struct request_queue *q);
int elevator_switch_mq(struct request_queue *q,
struct elevator_type *new_e);
@@ -334,31 +208,8 @@ void blk_rq_set_mixed_merge(struct request *rq);
bool blk_rq_merge_ok(struct request *rq, struct bio *bio);
enum elv_merge blk_try_merge(struct request *rq, struct bio *bio);
-void blk_queue_congestion_threshold(struct request_queue *q);
-
int blk_dev_init(void);
-
-/*
- * Return the threshold (number of used requests) at which the queue is
- * considered to be congested. It include a little hysteresis to keep the
- * context switch rate down.
- */
-static inline int queue_congestion_on_threshold(struct request_queue *q)
-{
- return q->nr_congestion_on;
-}
-
-/*
- * The threshold at which a queue is considered to be uncongested
- */
-static inline int queue_congestion_off_threshold(struct request_queue *q)
-{
- return q->nr_congestion_off;
-}
-
-extern int blk_update_nr_requests(struct request_queue *, unsigned int);
-
/*
* Contribute to IO statistics IFF:
*
@@ -381,21 +232,6 @@ static inline void req_set_nomerge(struct request_queue *q, struct request *req)
}
/*
- * Steal a bit from this field for legacy IO path atomic IO marking. Note that
- * setting the deadline clears the bottom bit, potentially clearing the
- * completed bit. The user has to be OK with this (current ones are fine).
- */
-static inline void blk_rq_set_deadline(struct request *rq, unsigned long time)
-{
- rq->__deadline = time & ~0x1UL;
-}
-
-static inline unsigned long blk_rq_deadline(struct request *rq)
-{
- return rq->__deadline & ~0x1UL;
-}
-
-/*
* The max size one bio can handle is UINT_MAX becasue bvec_iter.bi_size
* is defined as 'unsigned int', meantime it has to aligned to with logical
* block size which is the minimum accepted unit by hardware.
@@ -417,22 +253,6 @@ void ioc_clear_queue(struct request_queue *q);
int create_task_io_context(struct task_struct *task, gfp_t gfp_mask, int node);
/**
- * rq_ioc - determine io_context for request allocation
- * @bio: request being allocated is for this bio (can be %NULL)
- *
- * Determine io_context to use for request allocation for @bio. May return
- * %NULL if %current->io_context doesn't exist.
- */
-static inline struct io_context *rq_ioc(struct bio *bio)
-{
-#ifdef CONFIG_BLK_CGROUP
- if (bio && bio->bi_ioc)
- return bio->bi_ioc;
-#endif
- return current->io_context;
-}
-
-/**
* create_io_context - try to create task->io_context
* @gfp_mask: allocation mask
* @node: allocation node
@@ -490,8 +310,6 @@ static inline void blk_queue_bounce(struct request_queue *q, struct bio **bio)
}
#endif /* CONFIG_BOUNCE */
-extern void blk_drain_queue(struct request_queue *q);
-
#ifdef CONFIG_BLK_CGROUP_IOLATENCY
extern int blk_iolatency_init(struct request_queue *q);
#else
diff --git a/block/bounce.c b/block/bounce.c
index 559c55bda040..ffb9e9ecfa7e 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -277,7 +277,8 @@ static struct bio *bounce_clone_bio(struct bio *bio_src, gfp_t gfp_mask,
}
}
- bio_clone_blkcg_association(bio, bio_src);
+ bio_clone_blkg_association(bio, bio_src);
+ blkcg_bio_issue_init(bio);
return bio;
}
diff --git a/block/bsg-lib.c b/block/bsg-lib.c
index f3501cdaf1a6..192129856342 100644
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -21,7 +21,7 @@
*
*/
#include <linux/slab.h>
-#include <linux/blkdev.h>
+#include <linux/blk-mq.h>
#include <linux/delay.h>
#include <linux/scatterlist.h>
#include <linux/bsg-lib.h>
@@ -31,6 +31,12 @@
#define uptr64(val) ((void __user *)(uintptr_t)(val))
+struct bsg_set {
+ struct blk_mq_tag_set tag_set;
+ bsg_job_fn *job_fn;
+ bsg_timeout_fn *timeout_fn;
+};
+
static int bsg_transport_check_proto(struct sg_io_v4 *hdr)
{
if (hdr->protocol != BSG_PROTOCOL_SCSI ||
@@ -129,7 +135,7 @@ static void bsg_teardown_job(struct kref *kref)
kfree(job->request_payload.sg_list);
kfree(job->reply_payload.sg_list);
- blk_end_request_all(rq, BLK_STS_OK);
+ blk_mq_end_request(rq, BLK_STS_OK);
}
void bsg_job_put(struct bsg_job *job)
@@ -157,15 +163,15 @@ void bsg_job_done(struct bsg_job *job, int result,
{
job->result = result;
job->reply_payload_rcv_len = reply_payload_rcv_len;
- blk_complete_request(blk_mq_rq_from_pdu(job));
+ blk_mq_complete_request(blk_mq_rq_from_pdu(job));
}
EXPORT_SYMBOL_GPL(bsg_job_done);
/**
- * bsg_softirq_done - softirq done routine for destroying the bsg requests
+ * bsg_complete - softirq done routine for destroying the bsg requests
* @rq: BSG request that holds the job to be destroyed
*/
-static void bsg_softirq_done(struct request *rq)
+static void bsg_complete(struct request *rq)
{
struct bsg_job *job = blk_mq_rq_to_pdu(rq);
@@ -224,54 +230,48 @@ failjob_rls_job:
}
/**
- * bsg_request_fn - generic handler for bsg requests
- * @q: request queue to manage
+ * bsg_queue_rq - generic handler for bsg requests
+ * @hctx: hardware queue
+ * @bd: queue data
*
* On error the create_bsg_job function should return a -Exyz error value
* that will be set to ->result.
*
* Drivers/subsys should pass this to the queue init function.
*/
-static void bsg_request_fn(struct request_queue *q)
- __releases(q->queue_lock)
- __acquires(q->queue_lock)
+static blk_status_t bsg_queue_rq(struct blk_mq_hw_ctx *hctx,
+ const struct blk_mq_queue_data *bd)
{
+ struct request_queue *q = hctx->queue;
struct device *dev = q->queuedata;
- struct request *req;
+ struct request *req = bd->rq;
+ struct bsg_set *bset =
+ container_of(q->tag_set, struct bsg_set, tag_set);
int ret;
+ blk_mq_start_request(req);
+
if (!get_device(dev))
- return;
-
- while (1) {
- req = blk_fetch_request(q);
- if (!req)
- break;
- spin_unlock_irq(q->queue_lock);
-
- if (!bsg_prepare_job(dev, req)) {
- blk_end_request_all(req, BLK_STS_OK);
- spin_lock_irq(q->queue_lock);
- continue;
- }
-
- ret = q->bsg_job_fn(blk_mq_rq_to_pdu(req));
- spin_lock_irq(q->queue_lock);
- if (ret)
- break;
- }
+ return BLK_STS_IOERR;
+
+ if (!bsg_prepare_job(dev, req))
+ return BLK_STS_IOERR;
+
+ ret = bset->job_fn(blk_mq_rq_to_pdu(req));
+ if (ret)
+ return BLK_STS_IOERR;
- spin_unlock_irq(q->queue_lock);
put_device(dev);
- spin_lock_irq(q->queue_lock);
+ return BLK_STS_OK;
}
/* called right after the request is allocated for the request_queue */
-static int bsg_init_rq(struct request_queue *q, struct request *req, gfp_t gfp)
+static int bsg_init_rq(struct blk_mq_tag_set *set, struct request *req,
+ unsigned int hctx_idx, unsigned int numa_node)
{
struct bsg_job *job = blk_mq_rq_to_pdu(req);
- job->reply = kzalloc(SCSI_SENSE_BUFFERSIZE, gfp);
+ job->reply = kzalloc(SCSI_SENSE_BUFFERSIZE, GFP_KERNEL);
if (!job->reply)
return -ENOMEM;
return 0;
@@ -289,13 +289,47 @@ static void bsg_initialize_rq(struct request *req)
job->dd_data = job + 1;
}
-static void bsg_exit_rq(struct request_queue *q, struct request *req)
+static void bsg_exit_rq(struct blk_mq_tag_set *set, struct request *req,
+ unsigned int hctx_idx)
{
struct bsg_job *job = blk_mq_rq_to_pdu(req);
kfree(job->reply);
}
+void bsg_remove_queue(struct request_queue *q)
+{
+ if (q) {
+ struct bsg_set *bset =
+ container_of(q->tag_set, struct bsg_set, tag_set);
+
+ bsg_unregister_queue(q);
+ blk_cleanup_queue(q);
+ blk_mq_free_tag_set(&bset->tag_set);
+ kfree(bset);
+ }
+}
+EXPORT_SYMBOL_GPL(bsg_remove_queue);
+
+static enum blk_eh_timer_return bsg_timeout(struct request *rq, bool reserved)
+{
+ struct bsg_set *bset =
+ container_of(rq->q->tag_set, struct bsg_set, tag_set);
+
+ if (!bset->timeout_fn)
+ return BLK_EH_DONE;
+ return bset->timeout_fn(rq);
+}
+
+static const struct blk_mq_ops bsg_mq_ops = {
+ .queue_rq = bsg_queue_rq,
+ .init_request = bsg_init_rq,
+ .exit_request = bsg_exit_rq,
+ .initialize_rq_fn = bsg_initialize_rq,
+ .complete = bsg_complete,
+ .timeout = bsg_timeout,
+};
+
/**
* bsg_setup_queue - Create and add the bsg hooks so we can receive requests
* @dev: device to attach bsg device to
@@ -304,28 +338,38 @@ static void bsg_exit_rq(struct request_queue *q, struct request *req)
* @dd_job_size: size of LLD data needed for each job
*/
struct request_queue *bsg_setup_queue(struct device *dev, const char *name,
- bsg_job_fn *job_fn, int dd_job_size)
+ bsg_job_fn *job_fn, bsg_timeout_fn *timeout, int dd_job_size)
{
+ struct bsg_set *bset;
+ struct blk_mq_tag_set *set;
struct request_queue *q;
- int ret;
+ int ret = -ENOMEM;
- q = blk_alloc_queue(GFP_KERNEL);
- if (!q)
+ bset = kzalloc(sizeof(*bset), GFP_KERNEL);
+ if (!bset)
return ERR_PTR(-ENOMEM);
- q->cmd_size = sizeof(struct bsg_job) + dd_job_size;
- q->init_rq_fn = bsg_init_rq;
- q->exit_rq_fn = bsg_exit_rq;
- q->initialize_rq_fn = bsg_initialize_rq;
- q->request_fn = bsg_request_fn;
- ret = blk_init_allocated_queue(q);
- if (ret)
- goto out_cleanup_queue;
+ bset->job_fn = job_fn;
+ bset->timeout_fn = timeout;
+
+ set = &bset->tag_set;
+ set->ops = &bsg_mq_ops,
+ set->nr_hw_queues = 1;
+ set->queue_depth = 128;
+ set->numa_node = NUMA_NO_NODE;
+ set->cmd_size = sizeof(struct bsg_job) + dd_job_size;
+ set->flags = BLK_MQ_F_NO_SCHED | BLK_MQ_F_BLOCKING;
+ if (blk_mq_alloc_tag_set(set))
+ goto out_tag_set;
+
+ q = blk_mq_init_queue(set);
+ if (IS_ERR(q)) {
+ ret = PTR_ERR(q);
+ goto out_queue;
+ }
q->queuedata = dev;
- q->bsg_job_fn = job_fn;
blk_queue_flag_set(QUEUE_FLAG_BIDI, q);
- blk_queue_softirq_done(q, bsg_softirq_done);
blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT);
ret = bsg_register_queue(q, dev, name, &bsg_transport_ops);
@@ -338,6 +382,10 @@ struct request_queue *bsg_setup_queue(struct device *dev, const char *name,
return q;
out_cleanup_queue:
blk_cleanup_queue(q);
+out_queue:
+ blk_mq_free_tag_set(set);
+out_tag_set:
+ kfree(bset);
return ERR_PTR(ret);
}
EXPORT_SYMBOL_GPL(bsg_setup_queue);
diff --git a/block/bsg.c b/block/bsg.c
index 9a442c23a715..44f6028b9567 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -471,7 +471,7 @@ int bsg_register_queue(struct request_queue *q, struct device *parent,
/*
* we need a proper transport to send commands, not a stacked device
*/
- if (!queue_is_rq_based(q))
+ if (!queue_is_mq(q))
return 0;
bcd = &q->bsg_dev;
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
deleted file mode 100644
index ed41aa978c4a..000000000000
--- a/block/cfq-iosched.c
+++ /dev/null
@@ -1,4916 +0,0 @@
-/*
- * CFQ, or complete fairness queueing, disk scheduler.
- *
- * Based on ideas from a previously unfinished io
- * scheduler (round robin per-process disk scheduling) and Andrea Arcangeli.
- *
- * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
- */
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/sched/clock.h>
-#include <linux/blkdev.h>
-#include <linux/elevator.h>
-#include <linux/ktime.h>
-#include <linux/rbtree.h>
-#include <linux/ioprio.h>
-#include <linux/blktrace_api.h>
-#include <linux/blk-cgroup.h>
-#include "blk.h"
-#include "blk-wbt.h"
-
-/*
- * tunables
- */
-/* max queue in one round of service */
-static const int cfq_quantum = 8;
-static const u64 cfq_fifo_expire[2] = { NSEC_PER_SEC / 4, NSEC_PER_SEC / 8 };
-/* maximum backwards seek, in KiB */
-static const int cfq_back_max = 16 * 1024;
-/* penalty of a backwards seek */
-static const int cfq_back_penalty = 2;
-static const u64 cfq_slice_sync = NSEC_PER_SEC / 10;
-static u64 cfq_slice_async = NSEC_PER_SEC / 25;
-static const int cfq_slice_async_rq = 2;
-static u64 cfq_slice_idle = NSEC_PER_SEC / 125;
-static u64 cfq_group_idle = NSEC_PER_SEC / 125;
-static const u64 cfq_target_latency = (u64)NSEC_PER_SEC * 3/10; /* 300 ms */
-static const int cfq_hist_divisor = 4;
-
-/*
- * offset from end of queue service tree for idle class
- */
-#define CFQ_IDLE_DELAY (NSEC_PER_SEC / 5)
-/* offset from end of group service tree under time slice mode */
-#define CFQ_SLICE_MODE_GROUP_DELAY (NSEC_PER_SEC / 5)
-/* offset from end of group service under IOPS mode */
-#define CFQ_IOPS_MODE_GROUP_DELAY (HZ / 5)
-
-/*
- * below this threshold, we consider thinktime immediate
- */
-#define CFQ_MIN_TT (2 * NSEC_PER_SEC / HZ)
-
-#define CFQ_SLICE_SCALE (5)
-#define CFQ_HW_QUEUE_MIN (5)
-#define CFQ_SERVICE_SHIFT 12
-
-#define CFQQ_SEEK_THR (sector_t)(8 * 100)
-#define CFQQ_CLOSE_THR (sector_t)(8 * 1024)
-#define CFQQ_SECT_THR_NONROT (sector_t)(2 * 32)
-#define CFQQ_SEEKY(cfqq) (hweight32(cfqq->seek_history) > 32/8)
-
-#define RQ_CIC(rq) icq_to_cic((rq)->elv.icq)
-#define RQ_CFQQ(rq) (struct cfq_queue *) ((rq)->elv.priv[0])
-#define RQ_CFQG(rq) (struct cfq_group *) ((rq)->elv.priv[1])
-
-static struct kmem_cache *cfq_pool;
-
-#define CFQ_PRIO_LISTS IOPRIO_BE_NR
-#define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE)
-#define cfq_class_rt(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_RT)
-
-#define sample_valid(samples) ((samples) > 80)
-#define rb_entry_cfqg(node) rb_entry((node), struct cfq_group, rb_node)
-
-/* blkio-related constants */
-#define CFQ_WEIGHT_LEGACY_MIN 10
-#define CFQ_WEIGHT_LEGACY_DFL 500
-#define CFQ_WEIGHT_LEGACY_MAX 1000
-
-struct cfq_ttime {
- u64 last_end_request;
-
- u64 ttime_total;
- u64 ttime_mean;
- unsigned long ttime_samples;
-};
-
-/*
- * Most of our rbtree usage is for sorting with min extraction, so
- * if we cache the leftmost node we don't have to walk down the tree
- * to find it. Idea borrowed from Ingo Molnars CFS scheduler. We should
- * move this into the elevator for the rq sorting as well.
- */
-struct cfq_rb_root {
- struct rb_root_cached rb;
- struct rb_node *rb_rightmost;
- unsigned count;
- u64 min_vdisktime;
- struct cfq_ttime ttime;
-};
-#define CFQ_RB_ROOT (struct cfq_rb_root) { .rb = RB_ROOT_CACHED, \
- .rb_rightmost = NULL, \
- .ttime = {.last_end_request = ktime_get_ns(),},}
-
-/*
- * Per process-grouping structure
- */
-struct cfq_queue {
- /* reference count */
- int ref;
- /* various state flags, see below */
- unsigned int flags;
- /* parent cfq_data */
- struct cfq_data *cfqd;
- /* service_tree member */
- struct rb_node rb_node;
- /* service_tree key */
- u64 rb_key;
- /* prio tree member */
- struct rb_node p_node;
- /* prio tree root we belong to, if any */
- struct rb_root *p_root;
- /* sorted list of pending requests */
- struct rb_root sort_list;
- /* if fifo isn't expired, next request to serve */
- struct request *next_rq;
- /* requests queued in sort_list */
- int queued[2];
- /* currently allocated requests */
- int allocated[2];
- /* fifo list of requests in sort_list */
- struct list_head fifo;
-
- /* time when queue got scheduled in to dispatch first request. */
- u64 dispatch_start;
- u64 allocated_slice;
- u64 slice_dispatch;
- /* time when first request from queue completed and slice started. */
- u64 slice_start;
- u64 slice_end;
- s64 slice_resid;
-
- /* pending priority requests */
- int prio_pending;
- /* number of requests that are on the dispatch list or inside driver */
- int dispatched;
-
- /* io prio of this group */
- unsigned short ioprio, org_ioprio;
- unsigned short ioprio_class, org_ioprio_class;
-
- pid_t pid;
-
- u32 seek_history;
- sector_t last_request_pos;
-
- struct cfq_rb_root *service_tree;
- struct cfq_queue *new_cfqq;
- struct cfq_group *cfqg;
- /* Number of sectors dispatched from queue in single dispatch round */
- unsigned long nr_sectors;
-};
-
-/*
- * First index in the service_trees.
- * IDLE is handled separately, so it has negative index
- */
-enum wl_class_t {
- BE_WORKLOAD = 0,
- RT_WORKLOAD = 1,
- IDLE_WORKLOAD = 2,
- CFQ_PRIO_NR,
-};
-
-/*
- * Second index in the service_trees.
- */
-enum wl_type_t {
- ASYNC_WORKLOAD = 0,
- SYNC_NOIDLE_WORKLOAD = 1,
- SYNC_WORKLOAD = 2
-};
-
-struct cfqg_stats {
-#ifdef CONFIG_CFQ_GROUP_IOSCHED
- /* number of ios merged */
- struct blkg_rwstat merged;
- /* total time spent on device in ns, may not be accurate w/ queueing */
- struct blkg_rwstat service_time;
- /* total time spent waiting in scheduler queue in ns */
- struct blkg_rwstat wait_time;
- /* number of IOs queued up */
- struct blkg_rwstat queued;
- /* total disk time and nr sectors dispatched by this group */
- struct blkg_stat time;
-#ifdef CONFIG_DEBUG_BLK_CGROUP
- /* time not charged to this cgroup */
- struct blkg_stat unaccounted_time;
- /* sum of number of ios queued across all samples */
- struct blkg_stat avg_queue_size_sum;
- /* count of samples taken for average */
- struct blkg_stat avg_queue_size_samples;
- /* how many times this group has been removed from service tree */
- struct blkg_stat dequeue;
- /* total time spent waiting for it to be assigned a timeslice. */
- struct blkg_stat group_wait_time;
- /* time spent idling for this blkcg_gq */
- struct blkg_stat idle_time;
- /* total time with empty current active q with other requests queued */
- struct blkg_stat empty_time;
- /* fields after this shouldn't be cleared on stat reset */
- u64 start_group_wait_time;
- u64 start_idle_time;
- u64 start_empty_time;
- uint16_t flags;
-#endif /* CONFIG_DEBUG_BLK_CGROUP */
-#endif /* CONFIG_CFQ_GROUP_IOSCHED */
-};
-
-/* Per-cgroup data */
-struct cfq_group_data {
- /* must be the first member */
- struct blkcg_policy_data cpd;
-
- unsigned int weight;
- unsigned int leaf_weight;
-};
-
-/* This is per cgroup per device grouping structure */
-struct cfq_group {
- /* must be the first member */
- struct blkg_policy_data pd;
-
- /* group service_tree member */
- struct rb_node rb_node;
-
- /* group service_tree key */
- u64 vdisktime;
-
- /*
- * The number of active cfqgs and sum of their weights under this
- * cfqg. This covers this cfqg's leaf_weight and all children's
- * weights, but does not cover weights of further descendants.
- *
- * If a cfqg is on the service tree, it's active. An active cfqg
- * also activates its parent and contributes to the children_weight
- * of the parent.
- */
- int nr_active;
- unsigned int children_weight;
-
- /*
- * vfraction is the fraction of vdisktime that the tasks in this
- * cfqg are entitled to. This is determined by compounding the
- * ratios walking up from this cfqg to the root.
- *
- * It is in fixed point w/ CFQ_SERVICE_SHIFT and the sum of all
- * vfractions on a service tree is approximately 1. The sum may
- * deviate a bit due to rounding errors and fluctuations caused by
- * cfqgs entering and leaving the service tree.
- */
- unsigned int vfraction;
-
- /*
- * There are two weights - (internal) weight is the weight of this
- * cfqg against the sibling cfqgs. leaf_weight is the wight of
- * this cfqg against the child cfqgs. For the root cfqg, both
- * weights are kept in sync for backward compatibility.
- */
- unsigned int weight;
- unsigned int new_weight;
- unsigned int dev_weight;
-
- unsigned int leaf_weight;
- unsigned int new_leaf_weight;
- unsigned int dev_leaf_weight;
-
- /* number of cfqq currently on this group */
- int nr_cfqq;
-
- /*
- * Per group busy queues average. Useful for workload slice calc. We
- * create the array for each prio class but at run time it is used
- * only for RT and BE class and slot for IDLE class remains unused.
- * This is primarily done to avoid confusion and a gcc warning.
- */
- unsigned int busy_queues_avg[CFQ_PRIO_NR];
- /*
- * rr lists of queues with requests. We maintain service trees for
- * RT and BE classes. These trees are subdivided in subclasses
- * of SYNC, SYNC_NOIDLE and ASYNC based on workload type. For IDLE
- * class there is no subclassification and all the cfq queues go on
- * a single tree service_tree_idle.
- * Counts are embedded in the cfq_rb_root
- */
- struct cfq_rb_root service_trees[2][3];
- struct cfq_rb_root service_tree_idle;
-
- u64 saved_wl_slice;
- enum wl_type_t saved_wl_type;
- enum wl_class_t saved_wl_class;
-
- /* number of requests that are on the dispatch list or inside driver */
- int dispatched;
- struct cfq_ttime ttime;
- struct cfqg_stats stats; /* stats for this cfqg */
-
- /* async queue for each priority case */
- struct cfq_queue *async_cfqq[2][IOPRIO_BE_NR];
- struct cfq_queue *async_idle_cfqq;
-
-};
-
-struct cfq_io_cq {
- struct io_cq icq; /* must be the first member */
- struct cfq_queue *cfqq[2];
- struct cfq_ttime ttime;
- int ioprio; /* the current ioprio */
-#ifdef CONFIG_CFQ_GROUP_IOSCHED
- uint64_t blkcg_serial_nr; /* the current blkcg serial */
-#endif
-};
-
-/*
- * Per block device queue structure
- */
-struct cfq_data {
- struct request_queue *queue;
- /* Root service tree for cfq_groups */
- struct cfq_rb_root grp_service_tree;
- struct cfq_group *root_group;
-
- /*
- * The priority currently being served
- */
- enum wl_class_t serving_wl_class;
- enum wl_type_t serving_wl_type;
- u64 workload_expires;
- struct cfq_group *serving_group;
-
- /*
- * Each priority tree is sorted by next_request position. These
- * trees are used when determining if two or more queues are
- * interleaving requests (see cfq_close_cooperator).
- */
- struct rb_root prio_trees[CFQ_PRIO_LISTS];
-
- unsigned int busy_queues;
- unsigned int busy_sync_queues;
-
- int rq_in_driver;
- int rq_in_flight[2];
-
- /*
- * queue-depth detection
- */
- int rq_queued;
- int hw_tag;
- /*
- * hw_tag can be
- * -1 => indeterminate, (cfq will behave as if NCQ is present, to allow better detection)
- * 1 => NCQ is present (hw_tag_est_depth is the estimated max depth)
- * 0 => no NCQ
- */
- int hw_tag_est_depth;
- unsigned int hw_tag_samples;
-
- /*
- * idle window management
- */
- struct hrtimer idle_slice_timer;
- struct work_struct unplug_work;
-
- struct cfq_queue *active_queue;
- struct cfq_io_cq *active_cic;
-
- sector_t last_position;
-
- /*
- * tunables, see top of file
- */
- unsigned int cfq_quantum;
- unsigned int cfq_back_penalty;
- unsigned int cfq_back_max;
- unsigned int cfq_slice_async_rq;
- unsigned int cfq_latency;
- u64 cfq_fifo_expire[2];
- u64 cfq_slice[2];
- u64 cfq_slice_idle;
- u64 cfq_group_idle;
- u64 cfq_target_latency;
-
- /*
- * Fallback dummy cfqq for extreme OOM conditions
- */
- struct cfq_queue oom_cfqq;
-
- u64 last_delayed_sync;
-};
-
-static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd);
-static void cfq_put_queue(struct cfq_queue *cfqq);
-
-static struct cfq_rb_root *st_for(struct cfq_group *cfqg,
- enum wl_class_t class,
- enum wl_type_t type)
-{
- if (!cfqg)
- return NULL;
-
- if (class == IDLE_WORKLOAD)
- return &cfqg->service_tree_idle;
-
- return &cfqg->service_trees[class][type];
-}
-
-enum cfqq_state_flags {
- CFQ_CFQQ_FLAG_on_rr = 0, /* on round-robin busy list */
- CFQ_CFQQ_FLAG_wait_request, /* waiting for a request */
- CFQ_CFQQ_FLAG_must_dispatch, /* must be allowed a dispatch */
- CFQ_CFQQ_FLAG_must_alloc_slice, /* per-slice must_alloc flag */
- CFQ_CFQQ_FLAG_fifo_expire, /* FIFO checked in this slice */
- CFQ_CFQQ_FLAG_idle_window, /* slice idling enabled */
- CFQ_CFQQ_FLAG_prio_changed, /* task priority has changed */
- CFQ_CFQQ_FLAG_slice_new, /* no requests dispatched in slice */
- CFQ_CFQQ_FLAG_sync, /* synchronous queue */
- CFQ_CFQQ_FLAG_coop, /* cfqq is shared */
- CFQ_CFQQ_FLAG_split_coop, /* shared cfqq will be splitted */
- CFQ_CFQQ_FLAG_deep, /* sync cfqq experienced large depth */
- CFQ_CFQQ_FLAG_wait_busy, /* Waiting for next request */
-};
-
-#define CFQ_CFQQ_FNS(name) \
-static inline void cfq_mark_cfqq_##name(struct cfq_queue *cfqq) \
-{ \
- (cfqq)->flags |= (1 << CFQ_CFQQ_FLAG_##name); \
-} \
-static inline void cfq_clear_cfqq_##name(struct cfq_queue *cfqq) \
-{ \
- (cfqq)->flags &= ~(1 << CFQ_CFQQ_FLAG_##name); \
-} \
-static inline int cfq_cfqq_##name(const struct cfq_queue *cfqq) \
-{ \
- return ((cfqq)->flags & (1 << CFQ_CFQQ_FLAG_##name)) != 0; \
-}
-
-CFQ_CFQQ_FNS(on_rr);
-CFQ_CFQQ_FNS(wait_request);
-CFQ_CFQQ_FNS(must_dispatch);
-CFQ_CFQQ_FNS(must_alloc_slice);
-CFQ_CFQQ_FNS(fifo_expire);
-CFQ_CFQQ_FNS(idle_window);
-CFQ_CFQQ_FNS(prio_changed);
-CFQ_CFQQ_FNS(slice_new);
-CFQ_CFQQ_FNS(sync);
-CFQ_CFQQ_FNS(coop);
-CFQ_CFQQ_FNS(split_coop);
-CFQ_CFQQ_FNS(deep);
-CFQ_CFQQ_FNS(wait_busy);
-#undef CFQ_CFQQ_FNS
-
-#if defined(CONFIG_CFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP)
-
-/* cfqg stats flags */
-enum cfqg_stats_flags {
- CFQG_stats_waiting = 0,
- CFQG_stats_idling,
- CFQG_stats_empty,
-};
-
-#define CFQG_FLAG_FNS(name) \
-static inline void cfqg_stats_mark_##name(struct cfqg_stats *stats) \
-{ \
- stats->flags |= (1 << CFQG_stats_##name); \
-} \
-static inline void cfqg_stats_clear_##name(struct cfqg_stats *stats) \
-{ \
- stats->flags &= ~(1 << CFQG_stats_##name); \
-} \
-static inline int cfqg_stats_##name(struct cfqg_stats *stats) \
-{ \
- return (stats->flags & (1 << CFQG_stats_##name)) != 0; \
-} \
-
-CFQG_FLAG_FNS(waiting)
-CFQG_FLAG_FNS(idling)
-CFQG_FLAG_FNS(empty)
-#undef CFQG_FLAG_FNS
-
-/* This should be called with the queue_lock held. */
-static void cfqg_stats_update_group_wait_time(struct cfqg_stats *stats)
-{
- u64 now;
-
- if (!cfqg_stats_waiting(stats))
- return;
-
- now = ktime_get_ns();
- if (now > stats->start_group_wait_time)
- blkg_stat_add(&stats->group_wait_time,
- now - stats->start_group_wait_time);
- cfqg_stats_clear_waiting(stats);
-}
-
-/* This should be called with the queue_lock held. */
-static void cfqg_stats_set_start_group_wait_time(struct cfq_group *cfqg,
- struct cfq_group *curr_cfqg)
-{
- struct cfqg_stats *stats = &cfqg->stats;
-
- if (cfqg_stats_waiting(stats))
- return;
- if (cfqg == curr_cfqg)
- return;
- stats->start_group_wait_time = ktime_get_ns();
- cfqg_stats_mark_waiting(stats);
-}
-
-/* This should be called with the queue_lock held. */
-static void cfqg_stats_end_empty_time(struct cfqg_stats *stats)
-{
- u64 now;
-
- if (!cfqg_stats_empty(stats))
- return;
-
- now = ktime_get_ns();
- if (now > stats->start_empty_time)
- blkg_stat_add(&stats->empty_time,
- now - stats->start_empty_time);
- cfqg_stats_clear_empty(stats);
-}
-
-static void cfqg_stats_update_dequeue(struct cfq_group *cfqg)
-{
- blkg_stat_add(&cfqg->stats.dequeue, 1);
-}
-
-static void cfqg_stats_set_start_empty_time(struct cfq_group *cfqg)
-{
- struct cfqg_stats *stats = &cfqg->stats;
-
- if (blkg_rwstat_total(&stats->queued))
- return;
-
- /*
- * group is already marked empty. This can happen if cfqq got new
- * request in parent group and moved to this group while being added
- * to service tree. Just ignore the event and move on.
- */
- if (cfqg_stats_empty(stats))
- return;
-
- stats->start_empty_time = ktime_get_ns();
- cfqg_stats_mark_empty(stats);
-}
-
-static void cfqg_stats_update_idle_time(struct cfq_group *cfqg)
-{
- struct cfqg_stats *stats = &cfqg->stats;
-
- if (cfqg_stats_idling(stats)) {
- u64 now = ktime_get_ns();
-
- if (now > stats->start_idle_time)
- blkg_stat_add(&stats->idle_time,
- now - stats->start_idle_time);
- cfqg_stats_clear_idling(stats);
- }
-}
-
-static void cfqg_stats_set_start_idle_time(struct cfq_group *cfqg)
-{
- struct cfqg_stats *stats = &cfqg->stats;
-
- BUG_ON(cfqg_stats_idling(stats));
-
- stats->start_idle_time = ktime_get_ns();
- cfqg_stats_mark_idling(stats);
-}
-
-static void cfqg_stats_update_avg_queue_size(struct cfq_group *cfqg)
-{
- struct cfqg_stats *stats = &cfqg->stats;
-
- blkg_stat_add(&stats->avg_queue_size_sum,
- blkg_rwstat_total(&stats->queued));
- blkg_stat_add(&stats->avg_queue_size_samples, 1);
- cfqg_stats_update_group_wait_time(stats);
-}
-
-#else /* CONFIG_CFQ_GROUP_IOSCHED && CONFIG_DEBUG_BLK_CGROUP */
-
-static inline void cfqg_stats_set_start_group_wait_time(struct cfq_group *cfqg, struct cfq_group *curr_cfqg) { }
-static inline void cfqg_stats_end_empty_time(struct cfqg_stats *stats) { }
-static inline void cfqg_stats_update_dequeue(struct cfq_group *cfqg) { }
-static inline void cfqg_stats_set_start_empty_time(struct cfq_group *cfqg) { }
-static inline void cfqg_stats_update_idle_time(struct cfq_group *cfqg) { }
-static inline void cfqg_stats_set_start_idle_time(struct cfq_group *cfqg) { }
-static inline void cfqg_stats_update_avg_queue_size(struct cfq_group *cfqg) { }
-
-#endif /* CONFIG_CFQ_GROUP_IOSCHED && CONFIG_DEBUG_BLK_CGROUP */
-
-#ifdef CONFIG_CFQ_GROUP_IOSCHED
-
-static inline struct cfq_group *pd_to_cfqg(struct blkg_policy_data *pd)
-{
- return pd ? container_of(pd, struct cfq_group, pd) : NULL;
-}
-
-static struct cfq_group_data
-*cpd_to_cfqgd(struct blkcg_policy_data *cpd)
-{
- return cpd ? container_of(cpd, struct cfq_group_data, cpd) : NULL;
-}
-
-static inline struct blkcg_gq *cfqg_to_blkg(struct cfq_group *cfqg)
-{
- return pd_to_blkg(&cfqg->pd);
-}
-
-static struct blkcg_policy blkcg_policy_cfq;
-
-static inline struct cfq_group *blkg_to_cfqg(struct blkcg_gq *blkg)
-{
- return pd_to_cfqg(blkg_to_pd(blkg, &blkcg_policy_cfq));
-}
-
-static struct cfq_group_data *blkcg_to_cfqgd(struct blkcg *blkcg)
-{
- return cpd_to_cfqgd(blkcg_to_cpd(blkcg, &blkcg_policy_cfq));
-}
-
-static inline struct cfq_group *cfqg_parent(struct cfq_group *cfqg)
-{
- struct blkcg_gq *pblkg = cfqg_to_blkg(cfqg)->parent;
-
- return pblkg ? blkg_to_cfqg(pblkg) : NULL;
-}
-
-static inline bool cfqg_is_descendant(struct cfq_group *cfqg,
- struct cfq_group *ancestor)
-{
- return cgroup_is_descendant(cfqg_to_blkg(cfqg)->blkcg->css.cgroup,
- cfqg_to_blkg(ancestor)->blkcg->css.cgroup);
-}
-
-static inline void cfqg_get(struct cfq_group *cfqg)
-{
- return blkg_get(cfqg_to_blkg(cfqg));
-}
-
-static inline void cfqg_put(struct cfq_group *cfqg)
-{
- return blkg_put(cfqg_to_blkg(cfqg));
-}
-
-#define cfq_log_cfqq(cfqd, cfqq, fmt, args...) do { \
- blk_add_cgroup_trace_msg((cfqd)->queue, \
- cfqg_to_blkg((cfqq)->cfqg)->blkcg, \
- "cfq%d%c%c " fmt, (cfqq)->pid, \
- cfq_cfqq_sync((cfqq)) ? 'S' : 'A', \
- cfqq_type((cfqq)) == SYNC_NOIDLE_WORKLOAD ? 'N' : ' ',\
- ##args); \
-} while (0)
-
-#define cfq_log_cfqg(cfqd, cfqg, fmt, args...) do { \
- blk_add_cgroup_trace_msg((cfqd)->queue, \
- cfqg_to_blkg(cfqg)->blkcg, fmt, ##args); \
-} while (0)
-
-static inline void cfqg_stats_update_io_add(struct cfq_group *cfqg,
- struct cfq_group *curr_cfqg,
- unsigned int op)
-{
- blkg_rwstat_add(&cfqg->stats.queued, op, 1);
- cfqg_stats_end_empty_time(&cfqg->stats);
- cfqg_stats_set_start_group_wait_time(cfqg, curr_cfqg);
-}
-
-static inline void cfqg_stats_update_timeslice_used(struct cfq_group *cfqg,
- uint64_t time, unsigned long unaccounted_time)
-{
- blkg_stat_add(&cfqg->stats.time, time);
-#ifdef CONFIG_DEBUG_BLK_CGROUP
- blkg_stat_add(&cfqg->stats.unaccounted_time, unaccounted_time);
-#endif
-}
-
-static inline void cfqg_stats_update_io_remove(struct cfq_group *cfqg,
- unsigned int op)
-{
- blkg_rwstat_add(&cfqg->stats.queued, op, -1);
-}
-
-static inline void cfqg_stats_update_io_merged(struct cfq_group *cfqg,
- unsigned int op)
-{
- blkg_rwstat_add(&cfqg->stats.merged, op, 1);
-}
-
-static inline void cfqg_stats_update_completion(struct cfq_group *cfqg,
- u64 start_time_ns,
- u64 io_start_time_ns,
- unsigned int op)
-{
- struct cfqg_stats *stats = &cfqg->stats;
- u64 now = ktime_get_ns();
-
- if (now > io_start_time_ns)
- blkg_rwstat_add(&stats->service_time, op,
- now - io_start_time_ns);
- if (io_start_time_ns > start_time_ns)
- blkg_rwstat_add(&stats->wait_time, op,
- io_start_time_ns - start_time_ns);
-}
-
-/* @stats = 0 */
-static void cfqg_stats_reset(struct cfqg_stats *stats)
-{
- /* queued stats shouldn't be cleared */
- blkg_rwstat_reset(&stats->merged);
- blkg_rwstat_reset(&stats->service_time);
- blkg_rwstat_reset(&stats->wait_time);
- blkg_stat_reset(&stats->time);
-#ifdef CONFIG_DEBUG_BLK_CGROUP
- blkg_stat_reset(&stats->unaccounted_time);
- blkg_stat_reset(&stats->avg_queue_size_sum);
- blkg_stat_reset(&stats->avg_queue_size_samples);
- blkg_stat_reset(&stats->dequeue);
- blkg_stat_reset(&stats->group_wait_time);
- blkg_stat_reset(&stats->idle_time);
- blkg_stat_reset(&stats->empty_time);
-#endif
-}
-
-/* @to += @from */
-static void cfqg_stats_add_aux(struct cfqg_stats *to, struct cfqg_stats *from)
-{
- /* queued stats shouldn't be cleared */
- blkg_rwstat_add_aux(&to->merged, &from->merged);
- blkg_rwstat_add_aux(&to->service_time, &from->service_time);
- blkg_rwstat_add_aux(&to->wait_time, &from->wait_time);
- blkg_stat_add_aux(&from->time, &from->time);
-#ifdef CONFIG_DEBUG_BLK_CGROUP
- blkg_stat_add_aux(&to->unaccounted_time, &from->unaccounted_time);
- blkg_stat_add_aux(&to->avg_queue_size_sum, &from->avg_queue_size_sum);
- blkg_stat_add_aux(&to->avg_queue_size_samples, &from->avg_queue_size_samples);
- blkg_stat_add_aux(&to->dequeue, &from->dequeue);
- blkg_stat_add_aux(&to->group_wait_time, &from->group_wait_time);
- blkg_stat_add_aux(&to->idle_time, &from->idle_time);
- blkg_stat_add_aux(&to->empty_time, &from->empty_time);
-#endif
-}
-
-/*
- * Transfer @cfqg's stats to its parent's aux counts so that the ancestors'
- * recursive stats can still account for the amount used by this cfqg after
- * it's gone.
- */
-static void cfqg_stats_xfer_dead(struct cfq_group *cfqg)
-{
- struct cfq_group *parent = cfqg_parent(cfqg);
-
- lockdep_assert_held(cfqg_to_blkg(cfqg)->q->queue_lock);
-
- if (unlikely(!parent))
- return;
-
- cfqg_stats_add_aux(&parent->stats, &cfqg->stats);
- cfqg_stats_reset(&cfqg->stats);
-}
-
-#else /* CONFIG_CFQ_GROUP_IOSCHED */
-
-static inline struct cfq_group *cfqg_parent(struct cfq_group *cfqg) { return NULL; }
-static inline bool cfqg_is_descendant(struct cfq_group *cfqg,
- struct cfq_group *ancestor)
-{
- return true;
-}
-static inline void cfqg_get(struct cfq_group *cfqg) { }
-static inline void cfqg_put(struct cfq_group *cfqg) { }
-
-#define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \
- blk_add_trace_msg((cfqd)->queue, "cfq%d%c%c " fmt, (cfqq)->pid, \
- cfq_cfqq_sync((cfqq)) ? 'S' : 'A', \
- cfqq_type((cfqq)) == SYNC_NOIDLE_WORKLOAD ? 'N' : ' ',\
- ##args)
-#define cfq_log_cfqg(cfqd, cfqg, fmt, args...) do {} while (0)
-
-static inline void cfqg_stats_update_io_add(struct cfq_group *cfqg,
- struct cfq_group *curr_cfqg, unsigned int op) { }
-static inline void cfqg_stats_update_timeslice_used(struct cfq_group *cfqg,
- uint64_t time, unsigned long unaccounted_time) { }
-static inline void cfqg_stats_update_io_remove(struct cfq_group *cfqg,
- unsigned int op) { }
-static inline void cfqg_stats_update_io_merged(struct cfq_group *cfqg,
- unsigned int op) { }
-static inline void cfqg_stats_update_completion(struct cfq_group *cfqg,
- u64 start_time_ns,
- u64 io_start_time_ns,
- unsigned int op) { }
-
-#endif /* CONFIG_CFQ_GROUP_IOSCHED */
-
-#define cfq_log(cfqd, fmt, args...) \
- blk_add_trace_msg((cfqd)->queue, "cfq " fmt, ##args)
-
-/* Traverses through cfq group service trees */
-#define for_each_cfqg_st(cfqg, i, j, st) \
- for (i = 0; i <= IDLE_WORKLOAD; i++) \
- for (j = 0, st = i < IDLE_WORKLOAD ? &cfqg->service_trees[i][j]\
- : &cfqg->service_tree_idle; \
- (i < IDLE_WORKLOAD && j <= SYNC_WORKLOAD) || \
- (i == IDLE_WORKLOAD && j == 0); \
- j++, st = i < IDLE_WORKLOAD ? \
- &cfqg->service_trees[i][j]: NULL) \
-
-static inline bool cfq_io_thinktime_big(struct cfq_data *cfqd,
- struct cfq_ttime *ttime, bool group_idle)
-{
- u64 slice;
- if (!sample_valid(ttime->ttime_samples))
- return false;
- if (group_idle)
- slice = cfqd->cfq_group_idle;
- else
- slice = cfqd->cfq_slice_idle;
- return ttime->ttime_mean > slice;
-}
-
-static inline bool iops_mode(struct cfq_data *cfqd)
-{
- /*
- * If we are not idling on queues and it is a NCQ drive, parallel
- * execution of requests is on and measuring time is not possible
- * in most of the cases until and unless we drive shallower queue
- * depths and that becomes a performance bottleneck. In such cases
- * switch to start providing fairness in terms of number of IOs.
- */
- if (!cfqd->cfq_slice_idle && cfqd->hw_tag)
- return true;
- else
- return false;
-}
-
-static inline enum wl_class_t cfqq_class(struct cfq_queue *cfqq)
-{
- if (cfq_class_idle(cfqq))
- return IDLE_WORKLOAD;
- if (cfq_class_rt(cfqq))
- return RT_WORKLOAD;
- return BE_WORKLOAD;
-}
-
-
-static enum wl_type_t cfqq_type(struct cfq_queue *cfqq)
-{
- if (!cfq_cfqq_sync(cfqq))
- return ASYNC_WORKLOAD;
- if (!cfq_cfqq_idle_window(cfqq))
- return SYNC_NOIDLE_WORKLOAD;
- return SYNC_WORKLOAD;
-}
-
-static inline int cfq_group_busy_queues_wl(enum wl_class_t wl_class,
- struct cfq_data *cfqd,
- struct cfq_group *cfqg)
-{
- if (wl_class == IDLE_WORKLOAD)
- return cfqg->service_tree_idle.count;
-
- return cfqg->service_trees[wl_class][ASYNC_WORKLOAD].count +
- cfqg->service_trees[wl_class][SYNC_NOIDLE_WORKLOAD].count +
- cfqg->service_trees[wl_class][SYNC_WORKLOAD].count;
-}
-
-static inline int cfqg_busy_async_queues(struct cfq_data *cfqd,
- struct cfq_group *cfqg)
-{
- return cfqg->service_trees[RT_WORKLOAD][ASYNC_WORKLOAD].count +
- cfqg->service_trees[BE_WORKLOAD][ASYNC_WORKLOAD].count;
-}
-
-static void cfq_dispatch_insert(struct request_queue *, struct request *);
-static struct cfq_queue *cfq_get_queue(struct cfq_data *cfqd, bool is_sync,
- struct cfq_io_cq *cic, struct bio *bio);
-
-static inline struct cfq_io_cq *icq_to_cic(struct io_cq *icq)
-{
- /* cic->icq is the first member, %NULL will convert to %NULL */
- return container_of(icq, struct cfq_io_cq, icq);
-}
-
-static inline struct cfq_io_cq *cfq_cic_lookup(struct cfq_data *cfqd,
- struct io_context *ioc)
-{
- if (ioc)
- return icq_to_cic(ioc_lookup_icq(ioc, cfqd->queue));
- return NULL;
-}
-
-static inline struct cfq_queue *cic_to_cfqq(struct cfq_io_cq *cic, bool is_sync)
-{
- return cic->cfqq[is_sync];
-}
-
-static inline void cic_set_cfqq(struct cfq_io_cq *cic, struct cfq_queue *cfqq,
- bool is_sync)
-{
- cic->cfqq[is_sync] = cfqq;
-}
-
-static inline struct cfq_data *cic_to_cfqd(struct cfq_io_cq *cic)
-{
- return cic->icq.q->elevator->elevator_data;
-}
-
-/*
- * scheduler run of queue, if there are requests pending and no one in the
- * driver that will restart queueing
- */
-static inline void cfq_schedule_dispatch(struct cfq_data *cfqd)
-{
- if (cfqd->busy_queues) {
- cfq_log(cfqd, "schedule dispatch");
- kblockd_schedule_work(&cfqd->unplug_work);
- }
-}
-
-/*
- * Scale schedule slice based on io priority. Use the sync time slice only
- * if a queue is marked sync and has sync io queued. A sync queue with async
- * io only, should not get full sync slice length.
- */
-static inline u64 cfq_prio_slice(struct cfq_data *cfqd, bool sync,
- unsigned short prio)
-{
- u64 base_slice = cfqd->cfq_slice[sync];
- u64 slice = div_u64(base_slice, CFQ_SLICE_SCALE);
-
- WARN_ON(prio >= IOPRIO_BE_NR);
-
- return base_slice + (slice * (4 - prio));
-}
-
-static inline u64
-cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
-{
- return cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio);
-}
-
-/**
- * cfqg_scale_charge - scale disk time charge according to cfqg weight
- * @charge: disk time being charged
- * @vfraction: vfraction of the cfqg, fixed point w/ CFQ_SERVICE_SHIFT
- *
- * Scale @charge according to @vfraction, which is in range (0, 1]. The
- * scaling is inversely proportional.
- *
- * scaled = charge / vfraction
- *
- * The result is also in fixed point w/ CFQ_SERVICE_SHIFT.
- */
-static inline u64 cfqg_scale_charge(u64 charge,
- unsigned int vfraction)
-{
- u64 c = charge << CFQ_SERVICE_SHIFT; /* make it fixed point */
-
- /* charge / vfraction */
- c <<= CFQ_SERVICE_SHIFT;
- return div_u64(c, vfraction);
-}
-
-static inline u64 max_vdisktime(u64 min_vdisktime, u64 vdisktime)
-{
- s64 delta = (s64)(vdisktime - min_vdisktime);
- if (delta > 0)
- min_vdisktime = vdisktime;
-
- return min_vdisktime;
-}
-
-static void update_min_vdisktime(struct cfq_rb_root *st)
-{
- if (!RB_EMPTY_ROOT(&st->rb.rb_root)) {
- struct cfq_group *cfqg = rb_entry_cfqg(st->rb.rb_leftmost);
-
- st->min_vdisktime = max_vdisktime(st->min_vdisktime,
- cfqg->vdisktime);
- }
-}
-
-/*
- * get averaged number of queues of RT/BE priority.
- * average is updated, with a formula that gives more weight to higher numbers,
- * to quickly follows sudden increases and decrease slowly
- */
-
-static inline unsigned cfq_group_get_avg_queues(struct cfq_data *cfqd,
- struct cfq_group *cfqg, bool rt)
-{
- unsigned min_q, max_q;
- unsigned mult = cfq_hist_divisor - 1;
- unsigned round = cfq_hist_divisor / 2;
- unsigned busy = cfq_group_busy_queues_wl(rt, cfqd, cfqg);
-
- min_q = min(cfqg->busy_queues_avg[rt], busy);
- max_q = max(cfqg->busy_queues_avg[rt], busy);
- cfqg->busy_queues_avg[rt] = (mult * max_q + min_q + round) /
- cfq_hist_divisor;
- return cfqg->busy_queues_avg[rt];
-}
-
-static inline u64
-cfq_group_slice(struct cfq_data *cfqd, struct cfq_group *cfqg)
-{
- return cfqd->cfq_target_latency * cfqg->vfraction >> CFQ_SERVICE_SHIFT;
-}
-
-static inline u64
-cfq_scaled_cfqq_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
-{
- u64 slice = cfq_prio_to_slice(cfqd, cfqq);
- if (cfqd->cfq_latency) {
- /*
- * interested queues (we consider only the ones with the same
- * priority class in the cfq group)
- */
- unsigned iq = cfq_group_get_avg_queues(cfqd, cfqq->cfqg,
- cfq_class_rt(cfqq));
- u64 sync_slice = cfqd->cfq_slice[1];
- u64 expect_latency = sync_slice * iq;
- u64 group_slice = cfq_group_slice(cfqd, cfqq->cfqg);
-
- if (expect_latency > group_slice) {
- u64 base_low_slice = 2 * cfqd->cfq_slice_idle;
- u64 low_slice;
-
- /* scale low_slice according to IO priority
- * and sync vs async */
- low_slice = div64_u64(base_low_slice*slice, sync_slice);
- low_slice = min(slice, low_slice);
- /* the adapted slice value is scaled to fit all iqs
- * into the target latency */
- slice = div64_u64(slice*group_slice, expect_latency);
- slice = max(slice, low_slice);
- }
- }
- return slice;
-}
-
-static inline void
-cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
-{
- u64 slice = cfq_scaled_cfqq_slice(cfqd, cfqq);
- u64 now = ktime_get_ns();
-
- cfqq->slice_start = now;
- cfqq->slice_end = now + slice;
- cfqq->allocated_slice = slice;
- cfq_log_cfqq(cfqd, cfqq, "set_slice=%llu", cfqq->slice_end - now);
-}
-
-/*
- * We need to wrap this check in cfq_cfqq_slice_new(), since ->slice_end
- * isn't valid until the first request from the dispatch is activated
- * and the slice time set.
- */
-static inline bool cfq_slice_used(struct cfq_queue *cfqq)
-{
- if (cfq_cfqq_slice_new(cfqq))
- return false;
- if (ktime_get_ns() < cfqq->slice_end)
- return false;
-
- return true;
-}
-
-/*
- * Lifted from AS - choose which of rq1 and rq2 that is best served now.
- * We choose the request that is closest to the head right now. Distance
- * behind the head is penalized and only allowed to a certain extent.
- */
-static struct request *
-cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2, sector_t last)
-{
- sector_t s1, s2, d1 = 0, d2 = 0;
- unsigned long back_max;
-#define CFQ_RQ1_WRAP 0x01 /* request 1 wraps */
-#define CFQ_RQ2_WRAP 0x02 /* request 2 wraps */
- unsigned wrap = 0; /* bit mask: requests behind the disk head? */
-
- if (rq1 == NULL || rq1 == rq2)
- return rq2;
- if (rq2 == NULL)
- return rq1;
-
- if (rq_is_sync(rq1) != rq_is_sync(rq2))
- return rq_is_sync(rq1) ? rq1 : rq2;
-
- if ((rq1->cmd_flags ^ rq2->cmd_flags) & REQ_PRIO)
- return rq1->cmd_flags & REQ_PRIO ? rq1 : rq2;
-
- s1 = blk_rq_pos(rq1);
- s2 = blk_rq_pos(rq2);
-
- /*
- * by definition, 1KiB is 2 sectors
- */
- back_max = cfqd->cfq_back_max * 2;
-
- /*
- * Strict one way elevator _except_ in the case where we allow
- * short backward seeks which are biased as twice the cost of a
- * similar forward seek.
- */
- if (s1 >= last)
- d1 = s1 - last;
- else if (s1 + back_max >= last)
- d1 = (last - s1) * cfqd->cfq_back_penalty;
- else
- wrap |= CFQ_RQ1_WRAP;
-
- if (s2 >= last)
- d2 = s2 - last;
- else if (s2 + back_max >= last)
- d2 = (last - s2) * cfqd->cfq_back_penalty;
- else
- wrap |= CFQ_RQ2_WRAP;
-
- /* Found required data */
-
- /*
- * By doing switch() on the bit mask "wrap" we avoid having to
- * check two variables for all permutations: --> faster!
- */
- switch (wrap) {
- case 0: /* common case for CFQ: rq1 and rq2 not wrapped */
- if (d1 < d2)
- return rq1;
- else if (d2 < d1)
- return rq2;
- else {
- if (s1 >= s2)
- return rq1;
- else
- return rq2;
- }
-
- case CFQ_RQ2_WRAP:
- return rq1;
- case CFQ_RQ1_WRAP:
- return rq2;
- case (CFQ_RQ1_WRAP|CFQ_RQ2_WRAP): /* both rqs wrapped */
- default:
- /*
- * Since both rqs are wrapped,
- * start with the one that's further behind head
- * (--> only *one* back seek required),
- * since back seek takes more time than forward.
- */
- if (s1 <= s2)
- return rq1;
- else
- return rq2;
- }
-}
-
-static struct cfq_queue *cfq_rb_first(struct cfq_rb_root *root)
-{
- /* Service tree is empty */
- if (!root->count)
- return NULL;
-
- return rb_entry(rb_first_cached(&root->rb), struct cfq_queue, rb_node);
-}
-
-static struct cfq_group *cfq_rb_first_group(struct cfq_rb_root *root)
-{
- return rb_entry_cfqg(rb_first_cached(&root->rb));
-}
-
-static void cfq_rb_erase(struct rb_node *n, struct cfq_rb_root *root)
-{
- if (root->rb_rightmost == n)
- root->rb_rightmost = rb_prev(n);
-
- rb_erase_cached(n, &root->rb);
- RB_CLEAR_NODE(n);
-
- --root->count;
-}
-
-/*
- * would be nice to take fifo expire time into account as well
- */
-static struct request *
-cfq_find_next_rq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
- struct request *last)
-{
- struct rb_node *rbnext = rb_next(&last->rb_node);
- struct rb_node *rbprev = rb_prev(&last->rb_node);
- struct request *next = NULL, *prev = NULL;
-
- BUG_ON(RB_EMPTY_NODE(&last->rb_node));
-
- if (rbprev)
- prev = rb_entry_rq(rbprev);
-
- if (rbnext)
- next = rb_entry_rq(rbnext);
- else {
- rbnext = rb_first(&cfqq->sort_list);
- if (rbnext && rbnext != &last->rb_node)
- next = rb_entry_rq(rbnext);
- }
-
- return cfq_choose_req(cfqd, next, prev, blk_rq_pos(last));
-}
-
-static u64 cfq_slice_offset(struct cfq_data *cfqd,
- struct cfq_queue *cfqq)
-{
- /*
- * just an approximation, should be ok.
- */
- return (cfqq->cfqg->nr_cfqq - 1) * (cfq_prio_slice(cfqd, 1, 0) -
- cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio));
-}
-
-static inline s64
-cfqg_key(struct cfq_rb_root *st, struct cfq_group *cfqg)
-{
- return cfqg->vdisktime - st->min_vdisktime;
-}
-
-static void
-__cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg)
-{
- struct rb_node **node = &st->rb.rb_root.rb_node;
- struct rb_node *parent = NULL;
- struct cfq_group *__cfqg;
- s64 key = cfqg_key(st, cfqg);
- bool leftmost = true, rightmost = true;
-
- while (*node != NULL) {
- parent = *node;
- __cfqg = rb_entry_cfqg(parent);
-
- if (key < cfqg_key(st, __cfqg)) {
- node = &parent->rb_left;
- rightmost = false;
- } else {
- node = &parent->rb_right;
- leftmost = false;
- }
- }
-
- if (rightmost)
- st->rb_rightmost = &cfqg->rb_node;
-
- rb_link_node(&cfqg->rb_node, parent, node);
- rb_insert_color_cached(&cfqg->rb_node, &st->rb, leftmost);
-}
-
-/*
- * This has to be called only on activation of cfqg
- */
-static void
-cfq_update_group_weight(struct cfq_group *cfqg)
-{
- if (cfqg->new_weight) {
- cfqg->weight = cfqg->new_weight;
- cfqg->new_weight = 0;
- }
-}
-
-static void
-cfq_update_group_leaf_weight(struct cfq_group *cfqg)
-{
- BUG_ON(!RB_EMPTY_NODE(&cfqg->rb_node));
-
- if (cfqg->new_leaf_weight) {
- cfqg->leaf_weight = cfqg->new_leaf_weight;
- cfqg->new_leaf_weight = 0;
- }
-}
-
-static void
-cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg)
-{
- unsigned int vfr = 1 << CFQ_SERVICE_SHIFT; /* start with 1 */
- struct cfq_group *pos = cfqg;
- struct cfq_group *parent;
- bool propagate;
-
- /* add to the service tree */
- BUG_ON(!RB_EMPTY_NODE(&cfqg->rb_node));
-
- /*
- * Update leaf_weight. We cannot update weight at this point
- * because cfqg might already have been activated and is
- * contributing its current weight to the parent's child_weight.
- */
- cfq_update_group_leaf_weight(cfqg);
- __cfq_group_service_tree_add(st, cfqg);
-
- /*
- * Activate @cfqg and calculate the portion of vfraction @cfqg is
- * entitled to. vfraction is calculated by walking the tree
- * towards the root calculating the fraction it has at each level.
- * The compounded ratio is how much vfraction @cfqg owns.
- *
- * Start with the proportion tasks in this cfqg has against active
- * children cfqgs - its leaf_weight against children_weight.
- */
- propagate = !pos->nr_active++;
- pos->children_weight += pos->leaf_weight;
- vfr = vfr * pos->leaf_weight / pos->children_weight;
-
- /*
- * Compound ->weight walking up the tree. Both activation and
- * vfraction calculation are done in the same loop. Propagation
- * stops once an already activated node is met. vfraction
- * calculation should always continue to the root.
- */
- while ((parent = cfqg_parent(pos))) {
- if (propagate) {
- cfq_update_group_weight(pos);
- propagate = !parent->nr_active++;
- parent->children_weight += pos->weight;
- }
- vfr = vfr * pos->weight / parent->children_weight;
- pos = parent;
- }
-
- cfqg->vfraction = max_t(unsigned, vfr, 1);
-}
-
-static inline u64 cfq_get_cfqg_vdisktime_delay(struct cfq_data *cfqd)
-{
- if (!iops_mode(cfqd))
- return CFQ_SLICE_MODE_GROUP_DELAY;
- else
- return CFQ_IOPS_MODE_GROUP_DELAY;
-}
-
-static void
-cfq_group_notify_queue_add(struct cfq_data *cfqd, struct cfq_group *cfqg)
-{
- struct cfq_rb_root *st = &cfqd->grp_service_tree;
- struct cfq_group *__cfqg;
- struct rb_node *n;
-
- cfqg->nr_cfqq++;
- if (!RB_EMPTY_NODE(&cfqg->rb_node))
- return;
-
- /*
- * Currently put the group at the end. Later implement something
- * so that groups get lesser vtime based on their weights, so that
- * if group does not loose all if it was not continuously backlogged.
- */
- n = st->rb_rightmost;
- if (n) {
- __cfqg = rb_entry_cfqg(n);
- cfqg->vdisktime = __cfqg->vdisktime +
- cfq_get_cfqg_vdisktime_delay(cfqd);
- } else
- cfqg->vdisktime = st->min_vdisktime;
- cfq_group_service_tree_add(st, cfqg);
-}
-
-static void
-cfq_group_service_tree_del(struct cfq_rb_root *st, struct cfq_group *cfqg)
-{
- struct cfq_group *pos = cfqg;
- bool propagate;
-
- /*
- * Undo activation from cfq_group_service_tree_add(). Deactivate
- * @cfqg and propagate deactivation upwards.
- */
- propagate = !--pos->nr_active;
- pos->children_weight -= pos->leaf_weight;
-
- while (propagate) {
- struct cfq_group *parent = cfqg_parent(pos);
-
- /* @pos has 0 nr_active at this point */
- WARN_ON_ONCE(pos->children_weight);
- pos->vfraction = 0;
-
- if (!parent)
- break;
-
- propagate = !--parent->nr_active;
- parent->children_weight -= pos->weight;
- pos = parent;
- }
-
- /* remove from the service tree */
- if (!RB_EMPTY_NODE(&cfqg->rb_node))
- cfq_rb_erase(&cfqg->rb_node, st);
-}
-
-static void
-cfq_group_notify_queue_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
-{
- struct cfq_rb_root *st = &cfqd->grp_service_tree;
-
- BUG_ON(cfqg->nr_cfqq < 1);
- cfqg->nr_cfqq--;
-
- /* If there are other cfq queues under this group, don't delete it */
- if (cfqg->nr_cfqq)
- return;
-
- cfq_log_cfqg(cfqd, cfqg, "del_from_rr group");
- cfq_group_service_tree_del(st, cfqg);
- cfqg->saved_wl_slice = 0;
- cfqg_stats_update_dequeue(cfqg);
-}
-
-static inline u64 cfq_cfqq_slice_usage(struct cfq_queue *cfqq,
- u64 *unaccounted_time)
-{
- u64 slice_used;
- u64 now = ktime_get_ns();
-
- /*
- * Queue got expired before even a single request completed or
- * got expired immediately after first request completion.
- */
- if (!cfqq->slice_start || cfqq->slice_start == now) {
- /*
- * Also charge the seek time incurred to the group, otherwise
- * if there are mutiple queues in the group, each can dispatch
- * a single request on seeky media and cause lots of seek time
- * and group will never know it.
- */
- slice_used = max_t(u64, (now - cfqq->dispatch_start),
- jiffies_to_nsecs(1));
- } else {
- slice_used = now - cfqq->slice_start;
- if (slice_used > cfqq->allocated_slice) {
- *unaccounted_time = slice_used - cfqq->allocated_slice;
- slice_used = cfqq->allocated_slice;
- }
- if (cfqq->slice_start > cfqq->dispatch_start)
- *unaccounted_time += cfqq->slice_start -
- cfqq->dispatch_start;
- }
-
- return slice_used;
-}
-
-static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
- struct cfq_queue *cfqq)
-{
- struct cfq_rb_root *st = &cfqd->grp_service_tree;
- u64 used_sl, charge, unaccounted_sl = 0;
- int nr_sync = cfqg->nr_cfqq - cfqg_busy_async_queues(cfqd, cfqg)
- - cfqg->service_tree_idle.count;
- unsigned int vfr;
- u64 now = ktime_get_ns();
-
- BUG_ON(nr_sync < 0);
- used_sl = charge = cfq_cfqq_slice_usage(cfqq, &unaccounted_sl);
-
- if (iops_mode(cfqd))
- charge = cfqq->slice_dispatch;
- else if (!cfq_cfqq_sync(cfqq) && !nr_sync)
- charge = cfqq->allocated_slice;
-
- /*
- * Can't update vdisktime while on service tree and cfqg->vfraction
- * is valid only while on it. Cache vfr, leave the service tree,
- * update vdisktime and go back on. The re-addition to the tree
- * will also update the weights as necessary.
- */
- vfr = cfqg->vfraction;
- cfq_group_service_tree_del(st, cfqg);
- cfqg->vdisktime += cfqg_scale_charge(charge, vfr);
- cfq_group_service_tree_add(st, cfqg);
-
- /* This group is being expired. Save the context */
- if (cfqd->workload_expires > now) {
- cfqg->saved_wl_slice = cfqd->workload_expires - now;
- cfqg->saved_wl_type = cfqd->serving_wl_type;
- cfqg->saved_wl_class = cfqd->serving_wl_class;
- } else
- cfqg->saved_wl_slice = 0;
-
- cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime,
- st->min_vdisktime);
- cfq_log_cfqq(cfqq->cfqd, cfqq,
- "sl_used=%llu disp=%llu charge=%llu iops=%u sect=%lu",
- used_sl, cfqq->slice_dispatch, charge,
- iops_mode(cfqd), cfqq->nr_sectors);
- cfqg_stats_update_timeslice_used(cfqg, used_sl, unaccounted_sl);
- cfqg_stats_set_start_empty_time(cfqg);
-}
-
-/**
- * cfq_init_cfqg_base - initialize base part of a cfq_group
- * @cfqg: cfq_group to initialize
- *
- * Initialize the base part which is used whether %CONFIG_CFQ_GROUP_IOSCHED
- * is enabled or not.
- */
-static void cfq_init_cfqg_base(struct cfq_group *cfqg)
-{
- struct cfq_rb_root *st;
- int i, j;
-
- for_each_cfqg_st(cfqg, i, j, st)
- *st = CFQ_RB_ROOT;
- RB_CLEAR_NODE(&cfqg->rb_node);
-
- cfqg->ttime.last_end_request = ktime_get_ns();
-}
-
-#ifdef CONFIG_CFQ_GROUP_IOSCHED
-static int __cfq_set_weight(struct cgroup_subsys_state *css, u64 val,
- bool on_dfl, bool reset_dev, bool is_leaf_weight);
-
-static void cfqg_stats_exit(struct cfqg_stats *stats)
-{
- blkg_rwstat_exit(&stats->merged);
- blkg_rwstat_exit(&stats->service_time);
- blkg_rwstat_exit(&stats->wait_time);
- blkg_rwstat_exit(&stats->queued);
- blkg_stat_exit(&stats->time);
-#ifdef CONFIG_DEBUG_BLK_CGROUP
- blkg_stat_exit(&stats->unaccounted_time);
- blkg_stat_exit(&stats->avg_queue_size_sum);
- blkg_stat_exit(&stats->avg_queue_size_samples);
- blkg_stat_exit(&stats->dequeue);
- blkg_stat_exit(&stats->group_wait_time);
- blkg_stat_exit(&stats->idle_time);
- blkg_stat_exit(&stats->empty_time);
-#endif
-}
-
-static int cfqg_stats_init(struct cfqg_stats *stats, gfp_t gfp)
-{
- if (blkg_rwstat_init(&stats->merged, gfp) ||
- blkg_rwstat_init(&stats->service_time, gfp) ||
- blkg_rwstat_init(&stats->wait_time, gfp) ||
- blkg_rwstat_init(&stats->queued, gfp) ||
- blkg_stat_init(&stats->time, gfp))
- goto err;
-
-#ifdef CONFIG_DEBUG_BLK_CGROUP
- if (blkg_stat_init(&stats->unaccounted_time, gfp) ||
- blkg_stat_init(&stats->avg_queue_size_sum, gfp) ||
- blkg_stat_init(&stats->avg_queue_size_samples, gfp) ||
- blkg_stat_init(&stats->dequeue, gfp) ||
- blkg_stat_init(&stats->group_wait_time, gfp) ||
- blkg_stat_init(&stats->idle_time, gfp) ||
- blkg_stat_init(&stats->empty_time, gfp))
- goto err;
-#endif
- return 0;
-err:
- cfqg_stats_exit(stats);
- return -ENOMEM;
-}
-
-static struct blkcg_policy_data *cfq_cpd_alloc(gfp_t gfp)
-{
- struct cfq_group_data *cgd;
-
- cgd = kzalloc(sizeof(*cgd), gfp);
- if (!cgd)
- return NULL;
- return &cgd->cpd;
-}
-
-static void cfq_cpd_init(struct blkcg_policy_data *cpd)
-{
- struct cfq_group_data *cgd = cpd_to_cfqgd(cpd);
- unsigned int weight = cgroup_subsys_on_dfl(io_cgrp_subsys) ?
- CGROUP_WEIGHT_DFL : CFQ_WEIGHT_LEGACY_DFL;
-
- if (cpd_to_blkcg(cpd) == &blkcg_root)
- weight *= 2;
-
- cgd->weight = weight;
- cgd->leaf_weight = weight;
-}
-
-static void cfq_cpd_free(struct blkcg_policy_data *cpd)
-{
- kfree(cpd_to_cfqgd(cpd));
-}
-
-static void cfq_cpd_bind(struct blkcg_policy_data *cpd)
-{
- struct blkcg *blkcg = cpd_to_blkcg(cpd);
- bool on_dfl = cgroup_subsys_on_dfl(io_cgrp_subsys);
- unsigned int weight = on_dfl ? CGROUP_WEIGHT_DFL : CFQ_WEIGHT_LEGACY_DFL;
-
- if (blkcg == &blkcg_root)
- weight *= 2;
-
- WARN_ON_ONCE(__cfq_set_weight(&blkcg->css, weight, on_dfl, true, false));
- WARN_ON_ONCE(__cfq_set_weight(&blkcg->css, weight, on_dfl, true, true));
-}
-
-static struct blkg_policy_data *cfq_pd_alloc(gfp_t gfp, int node)
-{
- struct cfq_group *cfqg;
-
- cfqg = kzalloc_node(sizeof(*cfqg), gfp, node);
- if (!cfqg)
- return NULL;
-
- cfq_init_cfqg_base(cfqg);
- if (cfqg_stats_init(&cfqg->stats, gfp)) {
- kfree(cfqg);
- return NULL;
- }
-
- return &cfqg->pd;
-}
-
-static void cfq_pd_init(struct blkg_policy_data *pd)
-{
- struct cfq_group *cfqg = pd_to_cfqg(pd);
- struct cfq_group_data *cgd = blkcg_to_cfqgd(pd->blkg->blkcg);
-
- cfqg->weight = cgd->weight;
- cfqg->leaf_weight = cgd->leaf_weight;
-}
-
-static void cfq_pd_offline(struct blkg_policy_data *pd)
-{
- struct cfq_group *cfqg = pd_to_cfqg(pd);
- int i;
-
- for (i = 0; i < IOPRIO_BE_NR; i++) {
- if (cfqg->async_cfqq[0][i]) {
- cfq_put_queue(cfqg->async_cfqq[0][i]);
- cfqg->async_cfqq[0][i] = NULL;
- }
- if (cfqg->async_cfqq[1][i]) {
- cfq_put_queue(cfqg->async_cfqq[1][i]);
- cfqg->async_cfqq[1][i] = NULL;
- }
- }
-
- if (cfqg->async_idle_cfqq) {
- cfq_put_queue(cfqg->async_idle_cfqq);
- cfqg->async_idle_cfqq = NULL;
- }
-
- /*
- * @blkg is going offline and will be ignored by
- * blkg_[rw]stat_recursive_sum(). Transfer stats to the parent so
- * that they don't get lost. If IOs complete after this point, the
- * stats for them will be lost. Oh well...
- */
- cfqg_stats_xfer_dead(cfqg);
-}
-
-static void cfq_pd_free(struct blkg_policy_data *pd)
-{
- struct cfq_group *cfqg = pd_to_cfqg(pd);
-
- cfqg_stats_exit(&cfqg->stats);
- return kfree(cfqg);
-}
-
-static void cfq_pd_reset_stats(struct blkg_policy_data *pd)
-{
- struct cfq_group *cfqg = pd_to_cfqg(pd);
-
- cfqg_stats_reset(&cfqg->stats);
-}
-
-static struct cfq_group *cfq_lookup_cfqg(struct cfq_data *cfqd,
- struct blkcg *blkcg)
-{
- struct blkcg_gq *blkg;
-
- blkg = blkg_lookup(blkcg, cfqd->queue);
- if (likely(blkg))
- return blkg_to_cfqg(blkg);
- return NULL;
-}
-
-static void cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg)
-{
- cfqq->cfqg = cfqg;
- /* cfqq reference on cfqg */
- cfqg_get(cfqg);
-}
-
-static u64 cfqg_prfill_weight_device(struct seq_file *sf,
- struct blkg_policy_data *pd, int off)
-{
- struct cfq_group *cfqg = pd_to_cfqg(pd);
-
- if (!cfqg->dev_weight)
- return 0;
- return __blkg_prfill_u64(sf, pd, cfqg->dev_weight);
-}
-
-static int cfqg_print_weight_device(struct seq_file *sf, void *v)
-{
- blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
- cfqg_prfill_weight_device, &blkcg_policy_cfq,
- 0, false);
- return 0;
-}
-
-static u64 cfqg_prfill_leaf_weight_device(struct seq_file *sf,
- struct blkg_policy_data *pd, int off)
-{
- struct cfq_group *cfqg = pd_to_cfqg(pd);
-
- if (!cfqg->dev_leaf_weight)
- return 0;
- return __blkg_prfill_u64(sf, pd, cfqg->dev_leaf_weight);
-}
-
-static int cfqg_print_leaf_weight_device(struct seq_file *sf, void *v)
-{
- blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
- cfqg_prfill_leaf_weight_device, &blkcg_policy_cfq,
- 0, false);
- return 0;
-}
-
-static int cfq_print_weight(struct seq_file *sf, void *v)
-{
- struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
- struct cfq_group_data *cgd = blkcg_to_cfqgd(blkcg);
- unsigned int val = 0;
-
- if (cgd)
- val = cgd->weight;
-
- seq_printf(sf, "%u\n", val);
- return 0;
-}
-
-static int cfq_print_leaf_weight(struct seq_file *sf, void *v)
-{
- struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
- struct cfq_group_data *cgd = blkcg_to_cfqgd(blkcg);
- unsigned int val = 0;
-
- if (cgd)
- val = cgd->leaf_weight;
-
- seq_printf(sf, "%u\n", val);
- return 0;
-}
-
-static ssize_t __cfqg_set_weight_device(struct kernfs_open_file *of,
- char *buf, size_t nbytes, loff_t off,
- bool on_dfl, bool is_leaf_weight)
-{
- unsigned int min = on_dfl ? CGROUP_WEIGHT_MIN : CFQ_WEIGHT_LEGACY_MIN;
- unsigned int max = on_dfl ? CGROUP_WEIGHT_MAX : CFQ_WEIGHT_LEGACY_MAX;
- struct blkcg *blkcg = css_to_blkcg(of_css(of));
- struct blkg_conf_ctx ctx;
- struct cfq_group *cfqg;
- struct cfq_group_data *cfqgd;
- int ret;
- u64 v;
-
- ret = blkg_conf_prep(blkcg, &blkcg_policy_cfq, buf, &ctx);
- if (ret)
- return ret;
-
- if (sscanf(ctx.body, "%llu", &v) == 1) {
- /* require "default" on dfl */
- ret = -ERANGE;
- if (!v && on_dfl)
- goto out_finish;
- } else if (!strcmp(strim(ctx.body), "default")) {
- v = 0;
- } else {
- ret = -EINVAL;
- goto out_finish;
- }
-
- cfqg = blkg_to_cfqg(ctx.blkg);
- cfqgd = blkcg_to_cfqgd(blkcg);
-
- ret = -ERANGE;
- if (!v || (v >= min && v <= max)) {
- if (!is_leaf_weight) {
- cfqg->dev_weight = v;
- cfqg->new_weight = v ?: cfqgd->weight;
- } else {
- cfqg->dev_leaf_weight = v;
- cfqg->new_leaf_weight = v ?: cfqgd->leaf_weight;
- }
- ret = 0;
- }
-out_finish:
- blkg_conf_finish(&ctx);
- return ret ?: nbytes;
-}
-
-static ssize_t cfqg_set_weight_device(struct kernfs_open_file *of,
- char *buf, size_t nbytes, loff_t off)
-{
- return __cfqg_set_weight_device(of, buf, nbytes, off, false, false);
-}
-
-static ssize_t cfqg_set_leaf_weight_device(struct kernfs_open_file *of,
- char *buf, size_t nbytes, loff_t off)
-{
- return __cfqg_set_weight_device(of, buf, nbytes, off, false, true);
-}
-
-static int __cfq_set_weight(struct cgroup_subsys_state *css, u64 val,
- bool on_dfl, bool reset_dev, bool is_leaf_weight)
-{
- unsigned int min = on_dfl ? CGROUP_WEIGHT_MIN : CFQ_WEIGHT_LEGACY_MIN;
- unsigned int max = on_dfl ? CGROUP_WEIGHT_MAX : CFQ_WEIGHT_LEGACY_MAX;
- struct blkcg *blkcg = css_to_blkcg(css);
- struct blkcg_gq *blkg;
- struct cfq_group_data *cfqgd;
- int ret = 0;
-
- if (val < min || val > max)
- return -ERANGE;
-
- spin_lock_irq(&blkcg->lock);
- cfqgd = blkcg_to_cfqgd(blkcg);
- if (!cfqgd) {
- ret = -EINVAL;
- goto out;
- }
-
- if (!is_leaf_weight)
- cfqgd->weight = val;
- else
- cfqgd->leaf_weight = val;
-
- hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
- struct cfq_group *cfqg = blkg_to_cfqg(blkg);
-
- if (!cfqg)
- continue;
-
- if (!is_leaf_weight) {
- if (reset_dev)
- cfqg->dev_weight = 0;
- if (!cfqg->dev_weight)
- cfqg->new_weight = cfqgd->weight;
- } else {
- if (reset_dev)
- cfqg->dev_leaf_weight = 0;
- if (!cfqg->dev_leaf_weight)
- cfqg->new_leaf_weight = cfqgd->leaf_weight;
- }
- }
-
-out:
- spin_unlock_irq(&blkcg->lock);
- return ret;
-}
-
-static int cfq_set_weight(struct cgroup_subsys_state *css, struct cftype *cft,
- u64 val)
-{
- return __cfq_set_weight(css, val, false, false, false);
-}
-
-static int cfq_set_leaf_weight(struct cgroup_subsys_state *css,
- struct cftype *cft, u64 val)
-{
- return __cfq_set_weight(css, val, false, false, true);
-}
-
-static int cfqg_print_stat(struct seq_file *sf, void *v)
-{
- blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_stat,
- &blkcg_policy_cfq, seq_cft(sf)->private, false);
- return 0;
-}
-
-static int cfqg_print_rwstat(struct seq_file *sf, void *v)
-{
- blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_rwstat,
- &blkcg_policy_cfq, seq_cft(sf)->private, true);
- return 0;
-}
-
-static u64 cfqg_prfill_stat_recursive(struct seq_file *sf,
- struct blkg_policy_data *pd, int off)
-{
- u64 sum = blkg_stat_recursive_sum(pd_to_blkg(pd),
- &blkcg_policy_cfq, off);
- return __blkg_prfill_u64(sf, pd, sum);
-}
-
-static u64 cfqg_prfill_rwstat_recursive(struct seq_file *sf,
- struct blkg_policy_data *pd, int off)
-{
- struct blkg_rwstat sum = blkg_rwstat_recursive_sum(pd_to_blkg(pd),
- &blkcg_policy_cfq, off);
- return __blkg_prfill_rwstat(sf, pd, &sum);
-}
-
-static int cfqg_print_stat_recursive(struct seq_file *sf, void *v)
-{
- blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
- cfqg_prfill_stat_recursive, &blkcg_policy_cfq,
- seq_cft(sf)->private, false);
- return 0;
-}
-
-static int cfqg_print_rwstat_recursive(struct seq_file *sf, void *v)
-{
- blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
- cfqg_prfill_rwstat_recursive, &blkcg_policy_cfq,
- seq_cft(sf)->private, true);
- return 0;
-}
-
-static u64 cfqg_prfill_sectors(struct seq_file *sf, struct blkg_policy_data *pd,
- int off)
-{
- u64 sum = blkg_rwstat_total(&pd->blkg->stat_bytes);
-
- return __blkg_prfill_u64(sf, pd, sum >> 9);
-}
-
-static int cfqg_print_stat_sectors(struct seq_file *sf, void *v)
-{
- blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
- cfqg_prfill_sectors, &blkcg_policy_cfq, 0, false);
- return 0;
-}
-
-static u64 cfqg_prfill_sectors_recursive(struct seq_file *sf,
- struct blkg_policy_data *pd, int off)
-{
- struct blkg_rwstat tmp = blkg_rwstat_recursive_sum(pd->blkg, NULL,
- offsetof(struct blkcg_gq, stat_bytes));
- u64 sum = atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_READ]) +
- atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_WRITE]);
-
- return __blkg_prfill_u64(sf, pd, sum >> 9);
-}
-
-static int cfqg_print_stat_sectors_recursive(struct seq_file *sf, void *v)
-{
- blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
- cfqg_prfill_sectors_recursive, &blkcg_policy_cfq, 0,
- false);
- return 0;
-}
-
-#ifdef CONFIG_DEBUG_BLK_CGROUP
-static u64 cfqg_prfill_avg_queue_size(struct seq_file *sf,
- struct blkg_policy_data *pd, int off)
-{
- struct cfq_group *cfqg = pd_to_cfqg(pd);
- u64 samples = blkg_stat_read(&cfqg->stats.avg_queue_size_samples);
- u64 v = 0;
-
- if (samples) {
- v = blkg_stat_read(&cfqg->stats.avg_queue_size_sum);
- v = div64_u64(v, samples);
- }
- __blkg_prfill_u64(sf, pd, v);
- return 0;
-}
-
-/* print avg_queue_size */
-static int cfqg_print_avg_queue_size(struct seq_file *sf, void *v)
-{
- blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
- cfqg_prfill_avg_queue_size, &blkcg_policy_cfq,
- 0, false);
- return 0;
-}
-#endif /* CONFIG_DEBUG_BLK_CGROUP */
-
-static struct cftype cfq_blkcg_legacy_files[] = {
- /* on root, weight is mapped to leaf_weight */
- {
- .name = "weight_device",
- .flags = CFTYPE_ONLY_ON_ROOT,
- .seq_show = cfqg_print_leaf_weight_device,
- .write = cfqg_set_leaf_weight_device,
- },
- {
- .name = "weight",
- .flags = CFTYPE_ONLY_ON_ROOT,
- .seq_show = cfq_print_leaf_weight,
- .write_u64 = cfq_set_leaf_weight,
- },
-
- /* no such mapping necessary for !roots */
- {
- .name = "weight_device",
- .flags = CFTYPE_NOT_ON_ROOT,
- .seq_show = cfqg_print_weight_device,
- .write = cfqg_set_weight_device,
- },
- {
- .name = "weight",
- .flags = CFTYPE_NOT_ON_ROOT,
- .seq_show = cfq_print_weight,
- .write_u64 = cfq_set_weight,
- },
-
- {
- .name = "leaf_weight_device",
- .seq_show = cfqg_print_leaf_weight_device,
- .write = cfqg_set_leaf_weight_device,
- },
- {
- .name = "leaf_weight",
- .seq_show = cfq_print_leaf_weight,
- .write_u64 = cfq_set_leaf_weight,
- },
-
- /* statistics, covers only the tasks in the cfqg */
- {
- .name = "time",
- .private = offsetof(struct cfq_group, stats.time),
- .seq_show = cfqg_print_stat,
- },
- {
- .name = "sectors",
- .seq_show = cfqg_print_stat_sectors,
- },
- {
- .name = "io_service_bytes",
- .private = (unsigned long)&blkcg_policy_cfq,
- .seq_show = blkg_print_stat_bytes,
- },
- {
- .name = "io_serviced",
- .private = (unsigned long)&blkcg_policy_cfq,
- .seq_show = blkg_print_stat_ios,
- },
- {
- .name = "io_service_time",
- .private = offsetof(struct cfq_group, stats.service_time),
- .seq_show = cfqg_print_rwstat,
- },
- {
- .name = "io_wait_time",
- .private = offsetof(struct cfq_group, stats.wait_time),
- .seq_show = cfqg_print_rwstat,
- },
- {
- .name = "io_merged",
- .private = offsetof(struct cfq_group, stats.merged),
- .seq_show = cfqg_print_rwstat,
- },
- {
- .name = "io_queued",
- .private = offsetof(struct cfq_group, stats.queued),
- .seq_show = cfqg_print_rwstat,
- },
-
- /* the same statictics which cover the cfqg and its descendants */
- {
- .name = "time_recursive",
- .private = offsetof(struct cfq_group, stats.time),
- .seq_show = cfqg_print_stat_recursive,
- },
- {
- .name = "sectors_recursive",
- .seq_show = cfqg_print_stat_sectors_recursive,
- },
- {
- .name = "io_service_bytes_recursive",
- .private = (unsigned long)&blkcg_policy_cfq,
- .seq_show = blkg_print_stat_bytes_recursive,
- },
- {
- .name = "io_serviced_recursive",
- .private = (unsigned long)&blkcg_policy_cfq,
- .seq_show = blkg_print_stat_ios_recursive,
- },
- {
- .name = "io_service_time_recursive",
- .private = offsetof(struct cfq_group, stats.service_time),
- .seq_show = cfqg_print_rwstat_recursive,
- },
- {
- .name = "io_wait_time_recursive",
- .private = offsetof(struct cfq_group, stats.wait_time),
- .seq_show = cfqg_print_rwstat_recursive,
- },
- {
- .name = "io_merged_recursive",
- .private = offsetof(struct cfq_group, stats.merged),
- .seq_show = cfqg_print_rwstat_recursive,
- },
- {
- .name = "io_queued_recursive",
- .private = offsetof(struct cfq_group, stats.queued),
- .seq_show = cfqg_print_rwstat_recursive,
- },
-#ifdef CONFIG_DEBUG_BLK_CGROUP
- {
- .name = "avg_queue_size",
- .seq_show = cfqg_print_avg_queue_size,
- },
- {
- .name = "group_wait_time",
- .private = offsetof(struct cfq_group, stats.group_wait_time),
- .seq_show = cfqg_print_stat,
- },
- {
- .name = "idle_time",
- .private = offsetof(struct cfq_group, stats.idle_time),
- .seq_show = cfqg_print_stat,
- },
- {
- .name = "empty_time",
- .private = offsetof(struct cfq_group, stats.empty_time),
- .seq_show = cfqg_print_stat,
- },
- {
- .name = "dequeue",
- .private = offsetof(struct cfq_group, stats.dequeue),
- .seq_show = cfqg_print_stat,
- },
- {
- .name = "unaccounted_time",
- .private = offsetof(struct cfq_group, stats.unaccounted_time),
- .seq_show = cfqg_print_stat,
- },
-#endif /* CONFIG_DEBUG_BLK_CGROUP */
- { } /* terminate */
-};
-
-static int cfq_print_weight_on_dfl(struct seq_file *sf, void *v)
-{
- struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
- struct cfq_group_data *cgd = blkcg_to_cfqgd(blkcg);
-
- seq_printf(sf, "default %u\n", cgd->weight);
- blkcg_print_blkgs(sf, blkcg, cfqg_prfill_weight_device,
- &blkcg_policy_cfq, 0, false);
- return 0;
-}
-
-static ssize_t cfq_set_weight_on_dfl(struct kernfs_open_file *of,
- char *buf, size_t nbytes, loff_t off)
-{
- char *endp;
- int ret;
- u64 v;
-
- buf = strim(buf);
-
- /* "WEIGHT" or "default WEIGHT" sets the default weight */
- v = simple_strtoull(buf, &endp, 0);
- if (*endp == '\0' || sscanf(buf, "default %llu", &v) == 1) {
- ret = __cfq_set_weight(of_css(of), v, true, false, false);
- return ret ?: nbytes;
- }
-
- /* "MAJ:MIN WEIGHT" */
- return __cfqg_set_weight_device(of, buf, nbytes, off, true, false);
-}
-
-static struct cftype cfq_blkcg_files[] = {
- {
- .name = "weight",
- .flags = CFTYPE_NOT_ON_ROOT,
- .seq_show = cfq_print_weight_on_dfl,
- .write = cfq_set_weight_on_dfl,
- },
- { } /* terminate */
-};
-
-#else /* GROUP_IOSCHED */
-static struct cfq_group *cfq_lookup_cfqg(struct cfq_data *cfqd,
- struct blkcg *blkcg)
-{
- return cfqd->root_group;
-}
-
-static inline void
-cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg) {
- cfqq->cfqg = cfqg;
-}
-
-#endif /* GROUP_IOSCHED */
-
-/*
- * The cfqd->service_trees holds all pending cfq_queue's that have
- * requests waiting to be processed. It is sorted in the order that
- * we will service the queues.
- */
-static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
- bool add_front)
-{
- struct rb_node **p, *parent;
- struct cfq_queue *__cfqq;
- u64 rb_key;
- struct cfq_rb_root *st;
- bool leftmost = true;
- int new_cfqq = 1;
- u64 now = ktime_get_ns();
-
- st = st_for(cfqq->cfqg, cfqq_class(cfqq), cfqq_type(cfqq));
- if (cfq_class_idle(cfqq)) {
- rb_key = CFQ_IDLE_DELAY;
- parent = st->rb_rightmost;
- if (parent && parent != &cfqq->rb_node) {
- __cfqq = rb_entry(parent, struct cfq_queue, rb_node);
- rb_key += __cfqq->rb_key;
- } else
- rb_key += now;
- } else if (!add_front) {
- /*
- * Get our rb key offset. Subtract any residual slice
- * value carried from last service. A negative resid
- * count indicates slice overrun, and this should position
- * the next service time further away in the tree.
- */
- rb_key = cfq_slice_offset(cfqd, cfqq) + now;
- rb_key -= cfqq->slice_resid;
- cfqq->slice_resid = 0;
- } else {
- rb_key = -NSEC_PER_SEC;
- __cfqq = cfq_rb_first(st);
- rb_key += __cfqq ? __cfqq->rb_key : now;
- }
-
- if (!RB_EMPTY_NODE(&cfqq->rb_node)) {
- new_cfqq = 0;
- /*
- * same position, nothing more to do
- */
- if (rb_key == cfqq->rb_key && cfqq->service_tree == st)
- return;
-
- cfq_rb_erase(&cfqq->rb_node, cfqq->service_tree);
- cfqq->service_tree = NULL;
- }
-
- parent = NULL;
- cfqq->service_tree = st;
- p = &st->rb.rb_root.rb_node;
- while (*p) {
- parent = *p;
- __cfqq = rb_entry(parent, struct cfq_queue, rb_node);
-
- /*
- * sort by key, that represents service time.
- */
- if (rb_key < __cfqq->rb_key)
- p = &parent->rb_left;
- else {
- p = &parent->rb_right;
- leftmost = false;
- }
- }
-
- cfqq->rb_key = rb_key;
- rb_link_node(&cfqq->rb_node, parent, p);
- rb_insert_color_cached(&cfqq->rb_node, &st->rb, leftmost);
- st->count++;
- if (add_front || !new_cfqq)
- return;
- cfq_group_notify_queue_add(cfqd, cfqq->cfqg);
-}
-
-static struct cfq_queue *
-cfq_prio_tree_lookup(struct cfq_data *cfqd, struct rb_root *root,
- sector_t sector, struct rb_node **ret_parent,
- struct rb_node ***rb_link)
-{
- struct rb_node **p, *parent;
- struct cfq_queue *cfqq = NULL;
-
- parent = NULL;
- p = &root->rb_node;
- while (*p) {
- struct rb_node **n;
-
- parent = *p;
- cfqq = rb_entry(parent, struct cfq_queue, p_node);
-
- /*
- * Sort strictly based on sector. Smallest to the left,
- * largest to the right.
- */
- if (sector > blk_rq_pos(cfqq->next_rq))
- n = &(*p)->rb_right;
- else if (sector < blk_rq_pos(cfqq->next_rq))
- n = &(*p)->rb_left;
- else
- break;
- p = n;
- cfqq = NULL;
- }
-
- *ret_parent = parent;
- if (rb_link)
- *rb_link = p;
- return cfqq;
-}
-
-static void cfq_prio_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq)
-{
- struct rb_node **p, *parent;
- struct cfq_queue *__cfqq;
-
- if (cfqq->p_root) {
- rb_erase(&cfqq->p_node, cfqq->p_root);
- cfqq->p_root = NULL;
- }
-
- if (cfq_class_idle(cfqq))
- return;
- if (!cfqq->next_rq)
- return;
-
- cfqq->p_root = &cfqd->prio_trees[cfqq->org_ioprio];
- __cfqq = cfq_prio_tree_lookup(cfqd, cfqq->p_root,
- blk_rq_pos(cfqq->next_rq), &parent, &p);
- if (!__cfqq) {
- rb_link_node(&cfqq->p_node, parent, p);
- rb_insert_color(&cfqq->p_node, cfqq->p_root);
- } else
- cfqq->p_root = NULL;
-}
-
-/*
- * Update cfqq's position in the service tree.
- */
-static void cfq_resort_rr_list(struct cfq_data *cfqd, struct cfq_queue *cfqq)
-{
- /*
- * Resorting requires the cfqq to be on the RR list already.
- */
- if (cfq_cfqq_on_rr(cfqq)) {
- cfq_service_tree_add(cfqd, cfqq, 0);
- cfq_prio_tree_add(cfqd, cfqq);
- }
-}
-
-/*
- * add to busy list of queues for service, trying to be fair in ordering
- * the pending list according to last request service
- */
-static void cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
-{
- cfq_log_cfqq(cfqd, cfqq, "add_to_rr");
- BUG_ON(cfq_cfqq_on_rr(cfqq));
- cfq_mark_cfqq_on_rr(cfqq);
- cfqd->busy_queues++;
- if (cfq_cfqq_sync(cfqq))
- cfqd->busy_sync_queues++;
-
- cfq_resort_rr_list(cfqd, cfqq);
-}
-
-/*
- * Called when the cfqq no longer has requests pending, remove it from
- * the service tree.
- */
-static void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
-{
- cfq_log_cfqq(cfqd, cfqq, "del_from_rr");
- BUG_ON(!cfq_cfqq_on_rr(cfqq));
- cfq_clear_cfqq_on_rr(cfqq);
-
- if (!RB_EMPTY_NODE(&cfqq->rb_node)) {
- cfq_rb_erase(&cfqq->rb_node, cfqq->service_tree);
- cfqq->service_tree = NULL;
- }
- if (cfqq->p_root) {
- rb_erase(&cfqq->p_node, cfqq->p_root);
- cfqq->p_root = NULL;
- }
-
- cfq_group_notify_queue_del(cfqd, cfqq->cfqg);
- BUG_ON(!cfqd->busy_queues);
- cfqd->busy_queues--;
- if (cfq_cfqq_sync(cfqq))
- cfqd->busy_sync_queues--;
-}
-
-/*
- * rb tree support functions
- */
-static void cfq_del_rq_rb(struct request *rq)
-{
- struct cfq_queue *cfqq = RQ_CFQQ(rq);
- const int sync = rq_is_sync(rq);
-
- BUG_ON(!cfqq->queued[sync]);
- cfqq->queued[sync]--;
-
- elv_rb_del(&cfqq->sort_list, rq);
-
- if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list)) {
- /*
- * Queue will be deleted from service tree when we actually
- * expire it later. Right now just remove it from prio tree
- * as it is empty.
- */
- if (cfqq->p_root) {
- rb_erase(&cfqq->p_node, cfqq->p_root);
- cfqq->p_root = NULL;
- }
- }
-}
-
-static void cfq_add_rq_rb(struct request *rq)
-{
- struct cfq_queue *cfqq = RQ_CFQQ(rq);
- struct cfq_data *cfqd = cfqq->cfqd;
- struct request *prev;
-
- cfqq->queued[rq_is_sync(rq)]++;
-
- elv_rb_add(&cfqq->sort_list, rq);
-
- if (!cfq_cfqq_on_rr(cfqq))
- cfq_add_cfqq_rr(cfqd, cfqq);
-
- /*
- * check if this request is a better next-serve candidate
- */
- prev = cfqq->next_rq;
- cfqq->next_rq = cfq_choose_req(cfqd, cfqq->next_rq, rq, cfqd->last_position);
-
- /*
- * adjust priority tree position, if ->next_rq changes
- */
- if (prev != cfqq->next_rq)
- cfq_prio_tree_add(cfqd, cfqq);
-
- BUG_ON(!cfqq->next_rq);
-}
-
-static void cfq_reposition_rq_rb(struct cfq_queue *cfqq, struct request *rq)
-{
- elv_rb_del(&cfqq->sort_list, rq);
- cfqq->queued[rq_is_sync(rq)]--;
- cfqg_stats_update_io_remove(RQ_CFQG(rq), rq->cmd_flags);
- cfq_add_rq_rb(rq);
- cfqg_stats_update_io_add(RQ_CFQG(rq), cfqq->cfqd->serving_group,
- rq->cmd_flags);
-}
-
-static struct request *
-cfq_find_rq_fmerge(struct cfq_data *cfqd, struct bio *bio)
-{
- struct task_struct *tsk = current;
- struct cfq_io_cq *cic;
- struct cfq_queue *cfqq;
-
- cic = cfq_cic_lookup(cfqd, tsk->io_context);
- if (!cic)
- return NULL;
-
- cfqq = cic_to_cfqq(cic, op_is_sync(bio->bi_opf));
- if (cfqq)
- return elv_rb_find(&cfqq->sort_list, bio_end_sector(bio));
-
- return NULL;
-}
-
-static void cfq_activate_request(struct request_queue *q, struct request *rq)
-{
- struct cfq_data *cfqd = q->elevator->elevator_data;
-
- cfqd->rq_in_driver++;
- cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "activate rq, drv=%d",
- cfqd->rq_in_driver);
-
- cfqd->last_position = blk_rq_pos(rq) + blk_rq_sectors(rq);
-}
-
-static void cfq_deactivate_request(struct request_queue *q, struct request *rq)
-{
- struct cfq_data *cfqd = q->elevator->elevator_data;
-
- WARN_ON(!cfqd->rq_in_driver);
- cfqd->rq_in_driver--;
- cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "deactivate rq, drv=%d",
- cfqd->rq_in_driver);
-}
-
-static void cfq_remove_request(struct request *rq)
-{
- struct cfq_queue *cfqq = RQ_CFQQ(rq);
-
- if (cfqq->next_rq == rq)
- cfqq->next_rq = cfq_find_next_rq(cfqq->cfqd, cfqq, rq);
-
- list_del_init(&rq->queuelist);
- cfq_del_rq_rb(rq);
-
- cfqq->cfqd->rq_queued--;
- cfqg_stats_update_io_remove(RQ_CFQG(rq), rq->cmd_flags);
- if (rq->cmd_flags & REQ_PRIO) {
- WARN_ON(!cfqq->prio_pending);
- cfqq->prio_pending--;
- }
-}
-
-static enum elv_merge cfq_merge(struct request_queue *q, struct request **req,
- struct bio *bio)
-{
- struct cfq_data *cfqd = q->elevator->elevator_data;
- struct request *__rq;
-
- __rq = cfq_find_rq_fmerge(cfqd, bio);
- if (__rq && elv_bio_merge_ok(__rq, bio)) {
- *req = __rq;
- return ELEVATOR_FRONT_MERGE;
- }
-
- return ELEVATOR_NO_MERGE;
-}
-
-static void cfq_merged_request(struct request_queue *q, struct request *req,
- enum elv_merge type)
-{
- if (type == ELEVATOR_FRONT_MERGE) {
- struct cfq_queue *cfqq = RQ_CFQQ(req);
-
- cfq_reposition_rq_rb(cfqq, req);
- }
-}
-
-static void cfq_bio_merged(struct request_queue *q, struct request *req,
- struct bio *bio)
-{
- cfqg_stats_update_io_merged(RQ_CFQG(req), bio->bi_opf);
-}
-
-static void
-cfq_merged_requests(struct request_queue *q, struct request *rq,
- struct request *next)
-{
- struct cfq_queue *cfqq = RQ_CFQQ(rq);
- struct cfq_data *cfqd = q->elevator->elevator_data;
-
- /*
- * reposition in fifo if next is older than rq
- */
- if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) &&
- next->fifo_time < rq->fifo_time &&
- cfqq == RQ_CFQQ(next)) {
- list_move(&rq->queuelist, &next->queuelist);
- rq->fifo_time = next->fifo_time;
- }
-
- if (cfqq->next_rq == next)
- cfqq->next_rq = rq;
- cfq_remove_request(next);
- cfqg_stats_update_io_merged(RQ_CFQG(rq), next->cmd_flags);
-
- cfqq = RQ_CFQQ(next);
- /*
- * all requests of this queue are merged to other queues, delete it
- * from the service tree. If it's the active_queue,
- * cfq_dispatch_requests() will choose to expire it or do idle
- */
- if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list) &&
- cfqq != cfqd->active_queue)
- cfq_del_cfqq_rr(cfqd, cfqq);
-}
-
-static int cfq_allow_bio_merge(struct request_queue *q, struct request *rq,
- struct bio *bio)
-{
- struct cfq_data *cfqd = q->elevator->elevator_data;
- bool is_sync = op_is_sync(bio->bi_opf);
- struct cfq_io_cq *cic;
- struct cfq_queue *cfqq;
-
- /*
- * Disallow merge of a sync bio into an async request.
- */
- if (is_sync && !rq_is_sync(rq))
- return false;
-
- /*
- * Lookup the cfqq that this bio will be queued with and allow
- * merge only if rq is queued there.
- */
- cic = cfq_cic_lookup(cfqd, current->io_context);
- if (!cic)
- return false;
-
- cfqq = cic_to_cfqq(cic, is_sync);
- return cfqq == RQ_CFQQ(rq);
-}
-
-static int cfq_allow_rq_merge(struct request_queue *q, struct request *rq,
- struct request *next)
-{
- return RQ_CFQQ(rq) == RQ_CFQQ(next);
-}
-
-static inline void cfq_del_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq)
-{
- hrtimer_try_to_cancel(&cfqd->idle_slice_timer);
- cfqg_stats_update_idle_time(cfqq->cfqg);
-}
-
-static void __cfq_set_active_queue(struct cfq_data *cfqd,
- struct cfq_queue *cfqq)
-{
- if (cfqq) {
- cfq_log_cfqq(cfqd, cfqq, "set_active wl_class:%d wl_type:%d",
- cfqd->serving_wl_class, cfqd->serving_wl_type);
- cfqg_stats_update_avg_queue_size(cfqq->cfqg);
- cfqq->slice_start = 0;
- cfqq->dispatch_start = ktime_get_ns();
- cfqq->allocated_slice = 0;
- cfqq->slice_end = 0;
- cfqq->slice_dispatch = 0;
- cfqq->nr_sectors = 0;
-
- cfq_clear_cfqq_wait_request(cfqq);
- cfq_clear_cfqq_must_dispatch(cfqq);
- cfq_clear_cfqq_must_alloc_slice(cfqq);
- cfq_clear_cfqq_fifo_expire(cfqq);
- cfq_mark_cfqq_slice_new(cfqq);
-
- cfq_del_timer(cfqd, cfqq);
- }
-
- cfqd->active_queue = cfqq;
-}
-
-/*
- * current cfqq expired its slice (or was too idle), select new one
- */
-static void
-__cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
- bool timed_out)
-{
- cfq_log_cfqq(cfqd, cfqq, "slice expired t=%d", timed_out);
-
- if (cfq_cfqq_wait_request(cfqq))
- cfq_del_timer(cfqd, cfqq);
-
- cfq_clear_cfqq_wait_request(cfqq);
- cfq_clear_cfqq_wait_busy(cfqq);
-
- /*
- * If this cfqq is shared between multiple processes, check to
- * make sure that those processes are still issuing I/Os within
- * the mean seek distance. If not, it may be time to break the
- * queues apart again.
- */
- if (cfq_cfqq_coop(cfqq) && CFQQ_SEEKY(cfqq))
- cfq_mark_cfqq_split_coop(cfqq);
-
- /*
- * store what was left of this slice, if the queue idled/timed out
- */
- if (timed_out) {
- if (cfq_cfqq_slice_new(cfqq))
- cfqq->slice_resid = cfq_scaled_cfqq_slice(cfqd, cfqq);
- else
- cfqq->slice_resid = cfqq->slice_end - ktime_get_ns();
- cfq_log_cfqq(cfqd, cfqq, "resid=%lld", cfqq->slice_resid);
- }
-
- cfq_group_served(cfqd, cfqq->cfqg, cfqq);
-
- if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list))
- cfq_del_cfqq_rr(cfqd, cfqq);
-
- cfq_resort_rr_list(cfqd, cfqq);
-
- if (cfqq == cfqd->active_queue)
- cfqd->active_queue = NULL;
-
- if (cfqd->active_cic) {
- put_io_context(cfqd->active_cic->icq.ioc);
- cfqd->active_cic = NULL;
- }
-}
-
-static inline void cfq_slice_expired(struct cfq_data *cfqd, bool timed_out)
-{
- struct cfq_queue *cfqq = cfqd->active_queue;
-
- if (cfqq)
- __cfq_slice_expired(cfqd, cfqq, timed_out);
-}
-
-/*
- * Get next queue for service. Unless we have a queue preemption,
- * we'll simply select the first cfqq in the service tree.
- */
-static struct cfq_queue *cfq_get_next_queue(struct cfq_data *cfqd)
-{
- struct cfq_rb_root *st = st_for(cfqd->serving_group,
- cfqd->serving_wl_class, cfqd->serving_wl_type);
-
- if (!cfqd->rq_queued)
- return NULL;
-
- /* There is nothing to dispatch */
- if (!st)
- return NULL;
- if (RB_EMPTY_ROOT(&st->rb.rb_root))
- return NULL;
- return cfq_rb_first(st);
-}
-
-static struct cfq_queue *cfq_get_next_queue_forced(struct cfq_data *cfqd)
-{
- struct cfq_group *cfqg;
- struct cfq_queue *cfqq;
- int i, j;
- struct cfq_rb_root *st;
-
- if (!cfqd->rq_queued)
- return NULL;
-
- cfqg = cfq_get_next_cfqg(cfqd);
- if (!cfqg)
- return NULL;
-
- for_each_cfqg_st(cfqg, i, j, st) {
- cfqq = cfq_rb_first(st);
- if (cfqq)
- return cfqq;
- }
- return NULL;
-}
-
-/*
- * Get and set a new active queue for service.
- */
-static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd,
- struct cfq_queue *cfqq)
-{
- if (!cfqq)
- cfqq = cfq_get_next_queue(cfqd);
-
- __cfq_set_active_queue(cfqd, cfqq);
- return cfqq;
-}
-
-static inline sector_t cfq_dist_from_last(struct cfq_data *cfqd,
- struct request *rq)
-{
- if (blk_rq_pos(rq) >= cfqd->last_position)
- return blk_rq_pos(rq) - cfqd->last_position;
- else
- return cfqd->last_position - blk_rq_pos(rq);
-}
-
-static inline int cfq_rq_close(struct cfq_data *cfqd, struct cfq_queue *cfqq,
- struct request *rq)
-{
- return cfq_dist_from_last(cfqd, rq) <= CFQQ_CLOSE_THR;
-}
-
-static struct cfq_queue *cfqq_close(struct cfq_data *cfqd,
- struct cfq_queue *cur_cfqq)
-{
- struct rb_root *root = &cfqd->prio_trees[cur_cfqq->org_ioprio];
- struct rb_node *parent, *node;
- struct cfq_queue *__cfqq;
- sector_t sector = cfqd->last_position;
-
- if (RB_EMPTY_ROOT(root))
- return NULL;
-
- /*
- * First, if we find a request starting at the end of the last
- * request, choose it.
- */
- __cfqq = cfq_prio_tree_lookup(cfqd, root, sector, &parent, NULL);
- if (__cfqq)
- return __cfqq;
-
- /*
- * If the exact sector wasn't found, the parent of the NULL leaf
- * will contain the closest sector.
- */
- __cfqq = rb_entry(parent, struct cfq_queue, p_node);
- if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_rq))
- return __cfqq;
-
- if (blk_rq_pos(__cfqq->next_rq) < sector)
- node = rb_next(&__cfqq->p_node);
- else
- node = rb_prev(&__cfqq->p_node);
- if (!node)
- return NULL;
-
- __cfqq = rb_entry(node, struct cfq_queue, p_node);
- if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_rq))
- return __cfqq;
-
- return NULL;
-}
-
-/*
- * cfqd - obvious
- * cur_cfqq - passed in so that we don't decide that the current queue is
- * closely cooperating with itself.
- *
- * So, basically we're assuming that that cur_cfqq has dispatched at least
- * one request, and that cfqd->last_position reflects a position on the disk
- * associated with the I/O issued by cur_cfqq. I'm not sure this is a valid
- * assumption.
- */
-static struct cfq_queue *cfq_close_cooperator(struct cfq_data *cfqd,
- struct cfq_queue *cur_cfqq)
-{
- struct cfq_queue *cfqq;
-
- if (cfq_class_idle(cur_cfqq))
- return NULL;
- if (!cfq_cfqq_sync(cur_cfqq))
- return NULL;
- if (CFQQ_SEEKY(cur_cfqq))
- return NULL;
-
- /*
- * Don't search priority tree if it's the only queue in the group.
- */
- if (cur_cfqq->cfqg->nr_cfqq == 1)
- return NULL;
-
- /*
- * We should notice if some of the queues are cooperating, eg
- * working closely on the same area of the disk. In that case,
- * we can group them together and don't waste time idling.
- */
- cfqq = cfqq_close(cfqd, cur_cfqq);
- if (!cfqq)
- return NULL;
-
- /* If new queue belongs to different cfq_group, don't choose it */
- if (cur_cfqq->cfqg != cfqq->cfqg)
- return NULL;
-
- /*
- * It only makes sense to merge sync queues.
- */
- if (!cfq_cfqq_sync(cfqq))
- return NULL;
- if (CFQQ_SEEKY(cfqq))
- return NULL;
-
- /*
- * Do not merge queues of different priority classes
- */
- if (cfq_class_rt(cfqq) != cfq_class_rt(cur_cfqq))
- return NULL;
-
- return cfqq;
-}
-
-/*
- * Determine whether we should enforce idle window for this queue.
- */
-
-static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq)
-{
- enum wl_class_t wl_class = cfqq_class(cfqq);
- struct cfq_rb_root *st = cfqq->service_tree;
-
- BUG_ON(!st);
- BUG_ON(!st->count);
-
- if (!cfqd->cfq_slice_idle)
- return false;
-
- /* We never do for idle class queues. */
- if (wl_class == IDLE_WORKLOAD)
- return false;
-
- /* We do for queues that were marked with idle window flag. */
- if (cfq_cfqq_idle_window(cfqq) &&
- !(blk_queue_nonrot(cfqd->queue) && cfqd->hw_tag))
- return true;
-
- /*
- * Otherwise, we do only if they are the last ones
- * in their service tree.
- */
- if (st->count == 1 && cfq_cfqq_sync(cfqq) &&
- !cfq_io_thinktime_big(cfqd, &st->ttime, false))
- return true;
- cfq_log_cfqq(cfqd, cfqq, "Not idling. st->count:%d", st->count);
- return false;
-}
-
-static void cfq_arm_slice_timer(struct cfq_data *cfqd)
-{
- struct cfq_queue *cfqq = cfqd->active_queue;
- struct cfq_rb_root *st = cfqq->service_tree;
- struct cfq_io_cq *cic;
- u64 sl, group_idle = 0;
- u64 now = ktime_get_ns();
-
- /*
- * SSD device without seek penalty, disable idling. But only do so
- * for devices that support queuing, otherwise we still have a problem
- * with sync vs async workloads.
- */
- if (blk_queue_nonrot(cfqd->queue) && cfqd->hw_tag &&
- !cfqd->cfq_group_idle)
- return;
-
- WARN_ON(!RB_EMPTY_ROOT(&cfqq->sort_list));
- WARN_ON(cfq_cfqq_slice_new(cfqq));
-
- /*
- * idle is disabled, either manually or by past process history
- */
- if (!cfq_should_idle(cfqd, cfqq)) {
- /* no queue idling. Check for group idling */
- if (cfqd->cfq_group_idle)
- group_idle = cfqd->cfq_group_idle;
- else
- return;
- }
-
- /*
- * still active requests from this queue, don't idle
- */
- if (cfqq->dispatched)
- return;
-
- /*
- * task has exited, don't wait
- */
- cic = cfqd->active_cic;
- if (!cic || !atomic_read(&cic->icq.ioc->active_ref))
- return;
-
- /*
- * If our average think time is larger than the remaining time
- * slice, then don't idle. This avoids overrunning the allotted
- * time slice.
- */
- if (sample_valid(cic->ttime.ttime_samples) &&
- (cfqq->slice_end - now < cic->ttime.ttime_mean)) {
- cfq_log_cfqq(cfqd, cfqq, "Not idling. think_time:%llu",
- cic->ttime.ttime_mean);
- return;
- }
-
- /*
- * There are other queues in the group or this is the only group and
- * it has too big thinktime, don't do group idle.
- */
- if (group_idle &&
- (cfqq->cfqg->nr_cfqq > 1 ||
- cfq_io_thinktime_big(cfqd, &st->ttime, true)))
- return;
-
- cfq_mark_cfqq_wait_request(cfqq);
-
- if (group_idle)
- sl = cfqd->cfq_group_idle;
- else
- sl = cfqd->cfq_slice_idle;
-
- hrtimer_start(&cfqd->idle_slice_timer, ns_to_ktime(sl),
- HRTIMER_MODE_REL);
- cfqg_stats_set_start_idle_time(cfqq->cfqg);
- cfq_log_cfqq(cfqd, cfqq, "arm_idle: %llu group_idle: %d", sl,
- group_idle ? 1 : 0);
-}
-
-/*
- * Move request from internal lists to the request queue dispatch list.
- */
-static void cfq_dispatch_insert(struct request_queue *q, struct request *rq)
-{
- struct cfq_data *cfqd = q->elevator->elevator_data;
- struct cfq_queue *cfqq = RQ_CFQQ(rq);
-
- cfq_log_cfqq(cfqd, cfqq, "dispatch_insert");
-
- cfqq->next_rq = cfq_find_next_rq(cfqd, cfqq, rq);
- cfq_remove_request(rq);
- cfqq->dispatched++;
- (RQ_CFQG(rq))->dispatched++;
- elv_dispatch_sort(q, rq);
-
- cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]++;
- cfqq->nr_sectors += blk_rq_sectors(rq);
-}
-
-/*
- * return expired entry, or NULL to just start from scratch in rbtree
- */
-static struct request *cfq_check_fifo(struct cfq_queue *cfqq)
-{
- struct request *rq = NULL;
-
- if (cfq_cfqq_fifo_expire(cfqq))
- return NULL;
-
- cfq_mark_cfqq_fifo_expire(cfqq);
-
- if (list_empty(&cfqq->fifo))
- return NULL;
-
- rq = rq_entry_fifo(cfqq->fifo.next);
- if (ktime_get_ns() < rq->fifo_time)
- rq = NULL;
-
- return rq;
-}
-
-static inline int
-cfq_prio_to_maxrq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
-{
- const int base_rq = cfqd->cfq_slice_async_rq;
-
- WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR);
-
- return 2 * base_rq * (IOPRIO_BE_NR - cfqq->ioprio);
-}
-
-/*
- * Must be called with the queue_lock held.
- */
-static int cfqq_process_refs(struct cfq_queue *cfqq)
-{
- int process_refs, io_refs;
-
- io_refs = cfqq->allocated[READ] + cfqq->allocated[WRITE];
- process_refs = cfqq->ref - io_refs;
- BUG_ON(process_refs < 0);
- return process_refs;
-}
-
-static void cfq_setup_merge(struct cfq_queue *cfqq, struct cfq_queue *new_cfqq)
-{
- int process_refs, new_process_refs;
- struct cfq_queue *__cfqq;
-
- /*
- * If there are no process references on the new_cfqq, then it is
- * unsafe to follow the ->new_cfqq chain as other cfqq's in the
- * chain may have dropped their last reference (not just their
- * last process reference).
- */
- if (!cfqq_process_refs(new_cfqq))
- return;
-
- /* Avoid a circular list and skip interim queue merges */
- while ((__cfqq = new_cfqq->new_cfqq)) {
- if (__cfqq == cfqq)
- return;
- new_cfqq = __cfqq;
- }
-
- process_refs = cfqq_process_refs(cfqq);
- new_process_refs = cfqq_process_refs(new_cfqq);
- /*
- * If the process for the cfqq has gone away, there is no
- * sense in merging the queues.
- */
- if (process_refs == 0 || new_process_refs == 0)
- return;
-
- /*
- * Merge in the direction of the lesser amount of work.
- */
- if (new_process_refs >= process_refs) {
- cfqq->new_cfqq = new_cfqq;
- new_cfqq->ref += process_refs;
- } else {
- new_cfqq->new_cfqq = cfqq;
- cfqq->ref += new_process_refs;
- }
-}
-
-static enum wl_type_t cfq_choose_wl_type(struct cfq_data *cfqd,
- struct cfq_group *cfqg, enum wl_class_t wl_class)
-{
- struct cfq_queue *queue;
- int i;
- bool key_valid = false;
- u64 lowest_key = 0;
- enum wl_type_t cur_best = SYNC_NOIDLE_WORKLOAD;
-
- for (i = 0; i <= SYNC_WORKLOAD; ++i) {
- /* select the one with lowest rb_key */
- queue = cfq_rb_first(st_for(cfqg, wl_class, i));
- if (queue &&
- (!key_valid || queue->rb_key < lowest_key)) {
- lowest_key = queue->rb_key;
- cur_best = i;
- key_valid = true;
- }
- }
-
- return cur_best;
-}
-
-static void
-choose_wl_class_and_type(struct cfq_data *cfqd, struct cfq_group *cfqg)
-{
- u64 slice;
- unsigned count;
- struct cfq_rb_root *st;
- u64 group_slice;
- enum wl_class_t original_class = cfqd->serving_wl_class;
- u64 now = ktime_get_ns();
-
- /* Choose next priority. RT > BE > IDLE */
- if (cfq_group_busy_queues_wl(RT_WORKLOAD, cfqd, cfqg))
- cfqd->serving_wl_class = RT_WORKLOAD;
- else if (cfq_group_busy_queues_wl(BE_WORKLOAD, cfqd, cfqg))
- cfqd->serving_wl_class = BE_WORKLOAD;
- else {
- cfqd->serving_wl_class = IDLE_WORKLOAD;
- cfqd->workload_expires = now + jiffies_to_nsecs(1);
- return;
- }
-
- if (original_class != cfqd->serving_wl_class)
- goto new_workload;
-
- /*
- * For RT and BE, we have to choose also the type
- * (SYNC, SYNC_NOIDLE, ASYNC), and to compute a workload
- * expiration time
- */
- st = st_for(cfqg, cfqd->serving_wl_class, cfqd->serving_wl_type);
- count = st->count;
-
- /*
- * check workload expiration, and that we still have other queues ready
- */
- if (count && !(now > cfqd->workload_expires))
- return;
-
-new_workload:
- /* otherwise select new workload type */
- cfqd->serving_wl_type = cfq_choose_wl_type(cfqd, cfqg,
- cfqd->serving_wl_class);
- st = st_for(cfqg, cfqd->serving_wl_class, cfqd->serving_wl_type);
- count = st->count;
-
- /*
- * the workload slice is computed as a fraction of target latency
- * proportional to the number of queues in that workload, over
- * all the queues in the same priority class
- */
- group_slice = cfq_group_slice(cfqd, cfqg);
-
- slice = div_u64(group_slice * count,
- max_t(unsigned, cfqg->busy_queues_avg[cfqd->serving_wl_class],
- cfq_group_busy_queues_wl(cfqd->serving_wl_class, cfqd,
- cfqg)));
-
- if (cfqd->serving_wl_type == ASYNC_WORKLOAD) {
- u64 tmp;
-
- /*
- * Async queues are currently system wide. Just taking
- * proportion of queues with-in same group will lead to higher
- * async ratio system wide as generally root group is going
- * to have higher weight. A more accurate thing would be to
- * calculate system wide asnc/sync ratio.
- */
- tmp = cfqd->cfq_target_latency *
- cfqg_busy_async_queues(cfqd, cfqg);
- tmp = div_u64(tmp, cfqd->busy_queues);
- slice = min_t(u64, slice, tmp);
-
- /* async workload slice is scaled down according to
- * the sync/async slice ratio. */
- slice = div64_u64(slice*cfqd->cfq_slice[0], cfqd->cfq_slice[1]);
- } else
- /* sync workload slice is at least 2 * cfq_slice_idle */
- slice = max(slice, 2 * cfqd->cfq_slice_idle);
-
- slice = max_t(u64, slice, CFQ_MIN_TT);
- cfq_log(cfqd, "workload slice:%llu", slice);
- cfqd->workload_expires = now + slice;
-}
-
-static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd)
-{
- struct cfq_rb_root *st = &cfqd->grp_service_tree;
- struct cfq_group *cfqg;
-
- if (RB_EMPTY_ROOT(&st->rb.rb_root))
- return NULL;
- cfqg = cfq_rb_first_group(st);
- update_min_vdisktime(st);
- return cfqg;
-}
-
-static void cfq_choose_cfqg(struct cfq_data *cfqd)
-{
- struct cfq_group *cfqg = cfq_get_next_cfqg(cfqd);
- u64 now = ktime_get_ns();
-
- cfqd->serving_group = cfqg;
-
- /* Restore the workload type data */
- if (cfqg->saved_wl_slice) {
- cfqd->workload_expires = now + cfqg->saved_wl_slice;
- cfqd->serving_wl_type = cfqg->saved_wl_type;
- cfqd->serving_wl_class = cfqg->saved_wl_class;
- } else
- cfqd->workload_expires = now - 1;
-
- choose_wl_class_and_type(cfqd, cfqg);
-}
-
-/*
- * Select a queue for service. If we have a current active queue,
- * check whether to continue servicing it, or retrieve and set a new one.
- */
-static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
-{
- struct cfq_queue *cfqq, *new_cfqq = NULL;
- u64 now = ktime_get_ns();
-
- cfqq = cfqd->active_queue;
- if (!cfqq)
- goto new_queue;
-
- if (!cfqd->rq_queued)
- return NULL;
-
- /*
- * We were waiting for group to get backlogged. Expire the queue
- */
- if (cfq_cfqq_wait_busy(cfqq) && !RB_EMPTY_ROOT(&cfqq->sort_list))
- goto expire;
-
- /*
- * The active queue has run out of time, expire it and select new.
- */
- if (cfq_slice_used(cfqq) && !cfq_cfqq_must_dispatch(cfqq)) {
- /*
- * If slice had not expired at the completion of last request
- * we might not have turned on wait_busy flag. Don't expire
- * the queue yet. Allow the group to get backlogged.
- *
- * The very fact that we have used the slice, that means we
- * have been idling all along on this queue and it should be
- * ok to wait for this request to complete.
- */
- if (cfqq->cfqg->nr_cfqq == 1 && RB_EMPTY_ROOT(&cfqq->sort_list)
- && cfqq->dispatched && cfq_should_idle(cfqd, cfqq)) {
- cfqq = NULL;
- goto keep_queue;
- } else
- goto check_group_idle;
- }
-
- /*
- * The active queue has requests and isn't expired, allow it to
- * dispatch.
- */
- if (!RB_EMPTY_ROOT(&cfqq->sort_list))
- goto keep_queue;
-
- /*
- * If another queue has a request waiting within our mean seek
- * distance, let it run. The expire code will check for close
- * cooperators and put the close queue at the front of the service
- * tree. If possible, merge the expiring queue with the new cfqq.
- */
- new_cfqq = cfq_close_cooperator(cfqd, cfqq);
- if (new_cfqq) {
- if (!cfqq->new_cfqq)
- cfq_setup_merge(cfqq, new_cfqq);
- goto expire;
- }
-
- /*
- * No requests pending. If the active queue still has requests in
- * flight or is idling for a new request, allow either of these
- * conditions to happen (or time out) before selecting a new queue.
- */
- if (hrtimer_active(&cfqd->idle_slice_timer)) {
- cfqq = NULL;
- goto keep_queue;
- }
-
- /*
- * This is a deep seek queue, but the device is much faster than
- * the queue can deliver, don't idle
- **/
- if (CFQQ_SEEKY(cfqq) && cfq_cfqq_idle_window(cfqq) &&
- (cfq_cfqq_slice_new(cfqq) ||
- (cfqq->slice_end - now > now - cfqq->slice_start))) {
- cfq_clear_cfqq_deep(cfqq);
- cfq_clear_cfqq_idle_window(cfqq);
- }
-
- if (cfqq->dispatched && cfq_should_idle(cfqd, cfqq)) {
- cfqq = NULL;
- goto keep_queue;
- }
-
- /*
- * If group idle is enabled and there are requests dispatched from
- * this group, wait for requests to complete.
- */
-check_group_idle:
- if (cfqd->cfq_group_idle && cfqq->cfqg->nr_cfqq == 1 &&
- cfqq->cfqg->dispatched &&
- !cfq_io_thinktime_big(cfqd, &cfqq->cfqg->ttime, true)) {
- cfqq = NULL;
- goto keep_queue;
- }
-
-expire:
- cfq_slice_expired(cfqd, 0);
-new_queue:
- /*
- * Current queue expired. Check if we have to switch to a new
- * service tree
- */
- if (!new_cfqq)
- cfq_choose_cfqg(cfqd);
-
- cfqq = cfq_set_active_queue(cfqd, new_cfqq);
-keep_queue:
- return cfqq;
-}
-
-static int __cfq_forced_dispatch_cfqq(struct cfq_queue *cfqq)
-{
- int dispatched = 0;
-
- while (cfqq->next_rq) {
- cfq_dispatch_insert(cfqq->cfqd->queue, cfqq->next_rq);
- dispatched++;
- }
-
- BUG_ON(!list_empty(&cfqq->fifo));
-
- /* By default cfqq is not expired if it is empty. Do it explicitly */
- __cfq_slice_expired(cfqq->cfqd, cfqq, 0);
- return dispatched;
-}
-
-/*
- * Drain our current requests. Used for barriers and when switching
- * io schedulers on-the-fly.
- */
-static int cfq_forced_dispatch(struct cfq_data *cfqd)
-{
- struct cfq_queue *cfqq;
- int dispatched = 0;
-
- /* Expire the timeslice of the current active queue first */
- cfq_slice_expired(cfqd, 0);
- while ((cfqq = cfq_get_next_queue_forced(cfqd)) != NULL) {
- __cfq_set_active_queue(cfqd, cfqq);
- dispatched += __cfq_forced_dispatch_cfqq(cfqq);
- }
-
- BUG_ON(cfqd->busy_queues);
-
- cfq_log(cfqd, "forced_dispatch=%d", dispatched);
- return dispatched;
-}
-
-static inline bool cfq_slice_used_soon(struct cfq_data *cfqd,
- struct cfq_queue *cfqq)
-{
- u64 now = ktime_get_ns();
-
- /* the queue hasn't finished any request, can't estimate */
- if (cfq_cfqq_slice_new(cfqq))
- return true;
- if (now + cfqd->cfq_slice_idle * cfqq->dispatched > cfqq->slice_end)
- return true;
-
- return false;
-}
-
-static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq)
-{
- unsigned int max_dispatch;
-
- if (cfq_cfqq_must_dispatch(cfqq))
- return true;
-
- /*
- * Drain async requests before we start sync IO
- */
- if (cfq_should_idle(cfqd, cfqq) && cfqd->rq_in_flight[BLK_RW_ASYNC])
- return false;
-
- /*
- * If this is an async queue and we have sync IO in flight, let it wait
- */
- if (cfqd->rq_in_flight[BLK_RW_SYNC] && !cfq_cfqq_sync(cfqq))
- return false;
-
- max_dispatch = max_t(unsigned int, cfqd->cfq_quantum / 2, 1);
- if (cfq_class_idle(cfqq))
- max_dispatch = 1;
-
- /*
- * Does this cfqq already have too much IO in flight?
- */
- if (cfqq->dispatched >= max_dispatch) {
- bool promote_sync = false;
- /*
- * idle queue must always only have a single IO in flight
- */
- if (cfq_class_idle(cfqq))
- return false;
-
- /*
- * If there is only one sync queue
- * we can ignore async queue here and give the sync
- * queue no dispatch limit. The reason is a sync queue can
- * preempt async queue, limiting the sync queue doesn't make
- * sense. This is useful for aiostress test.
- */
- if (cfq_cfqq_sync(cfqq) && cfqd->busy_sync_queues == 1)
- promote_sync = true;
-
- /*
- * We have other queues, don't allow more IO from this one
- */
- if (cfqd->busy_queues > 1 && cfq_slice_used_soon(cfqd, cfqq) &&
- !promote_sync)
- return false;
-
- /*
- * Sole queue user, no limit
- */
- if (cfqd->busy_queues == 1 || promote_sync)
- max_dispatch = -1;
- else
- /*
- * Normally we start throttling cfqq when cfq_quantum/2
- * requests have been dispatched. But we can drive
- * deeper queue depths at the beginning of slice
- * subjected to upper limit of cfq_quantum.
- * */
- max_dispatch = cfqd->cfq_quantum;
- }
-
- /*
- * Async queues must wait a bit before being allowed dispatch.
- * We also ramp up the dispatch depth gradually for async IO,
- * based on the last sync IO we serviced
- */
- if (!cfq_cfqq_sync(cfqq) && cfqd->cfq_latency) {
- u64 last_sync = ktime_get_ns() - cfqd->last_delayed_sync;
- unsigned int depth;
-
- depth = div64_u64(last_sync, cfqd->cfq_slice[1]);
- if (!depth && !cfqq->dispatched)
- depth = 1;
- if (depth < max_dispatch)
- max_dispatch = depth;
- }
-
- /*
- * If we're below the current max, allow a dispatch
- */
- return cfqq->dispatched < max_dispatch;
-}
-
-/*
- * Dispatch a request from cfqq, moving them to the request queue
- * dispatch list.
- */
-static bool cfq_dispatch_request(struct cfq_data *cfqd, struct cfq_queue *cfqq)
-{
- struct request *rq;
-
- BUG_ON(RB_EMPTY_ROOT(&cfqq->sort_list));
-
- rq = cfq_check_fifo(cfqq);
- if (rq)
- cfq_mark_cfqq_must_dispatch(cfqq);
-
- if (!cfq_may_dispatch(cfqd, cfqq))
- return false;
-
- /*
- * follow expired path, else get first next available
- */
- if (!rq)
- rq = cfqq->next_rq;
- else
- cfq_log_cfqq(cfqq->cfqd, cfqq, "fifo=%p", rq);
-
- /*
- * insert request into driver dispatch list
- */
- cfq_dispatch_insert(cfqd->queue, rq);
-
- if (!cfqd->active_cic) {
- struct cfq_io_cq *cic = RQ_CIC(rq);
-
- atomic_long_inc(&cic->icq.ioc->refcount);
- cfqd->active_cic = cic;
- }
-
- return true;
-}
-
-/*
- * Find the cfqq that we need to service and move a request from that to the
- * dispatch list
- */
-static int cfq_dispatch_requests(struct request_queue *q, int force)
-{
- struct cfq_data *cfqd = q->elevator->elevator_data;
- struct cfq_queue *cfqq;
-
- if (!cfqd->busy_queues)
- return 0;
-
- if (unlikely(force))
- return cfq_forced_dispatch(cfqd);
-
- cfqq = cfq_select_queue(cfqd);
- if (!cfqq)
- return 0;
-
- /*
- * Dispatch a request from this cfqq, if it is allowed
- */
- if (!cfq_dispatch_request(cfqd, cfqq))
- return 0;
-
- cfqq->slice_dispatch++;
- cfq_clear_cfqq_must_dispatch(cfqq);
-
- /*
- * expire an async queue immediately if it has used up its slice. idle
- * queue always expire after 1 dispatch round.
- */
- if (cfqd->busy_queues > 1 && ((!cfq_cfqq_sync(cfqq) &&
- cfqq->slice_dispatch >= cfq_prio_to_maxrq(cfqd, cfqq)) ||
- cfq_class_idle(cfqq))) {
- cfqq->slice_end = ktime_get_ns() + 1;
- cfq_slice_expired(cfqd, 0);
- }
-
- cfq_log_cfqq(cfqd, cfqq, "dispatched a request");
- return 1;
-}
-
-/*
- * task holds one reference to the queue, dropped when task exits. each rq
- * in-flight on this queue also holds a reference, dropped when rq is freed.
- *
- * Each cfq queue took a reference on the parent group. Drop it now.
- * queue lock must be held here.
- */
-static void cfq_put_queue(struct cfq_queue *cfqq)
-{
- struct cfq_data *cfqd = cfqq->cfqd;
- struct cfq_group *cfqg;
-
- BUG_ON(cfqq->ref <= 0);
-
- cfqq->ref--;
- if (cfqq->ref)
- return;
-
- cfq_log_cfqq(cfqd, cfqq, "put_queue");
- BUG_ON(rb_first(&cfqq->sort_list));
- BUG_ON(cfqq->allocated[READ] + cfqq->allocated[WRITE]);
- cfqg = cfqq->cfqg;
-
- if (unlikely(cfqd->active_queue == cfqq)) {
- __cfq_slice_expired(cfqd, cfqq, 0);
- cfq_schedule_dispatch(cfqd);
- }
-
- BUG_ON(cfq_cfqq_on_rr(cfqq));
- kmem_cache_free(cfq_pool, cfqq);
- cfqg_put(cfqg);
-}
-
-static void cfq_put_cooperator(struct cfq_queue *cfqq)
-{
- struct cfq_queue *__cfqq, *next;
-
- /*
- * If this queue was scheduled to merge with another queue, be
- * sure to drop the reference taken on that queue (and others in
- * the merge chain). See cfq_setup_merge and cfq_merge_cfqqs.
- */
- __cfqq = cfqq->new_cfqq;
- while (__cfqq) {
- if (__cfqq == cfqq) {
- WARN(1, "cfqq->new_cfqq loop detected\n");
- break;
- }
- next = __cfqq->new_cfqq;
- cfq_put_queue(__cfqq);
- __cfqq = next;
- }
-}
-
-static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
-{
- if (unlikely(cfqq == cfqd->active_queue)) {
- __cfq_slice_expired(cfqd, cfqq, 0);
- cfq_schedule_dispatch(cfqd);
- }
-
- cfq_put_cooperator(cfqq);
-
- cfq_put_queue(cfqq);
-}
-
-static void cfq_init_icq(struct io_cq *icq)
-{
- struct cfq_io_cq *cic = icq_to_cic(icq);
-
- cic->ttime.last_end_request = ktime_get_ns();
-}
-
-static void cfq_exit_icq(struct io_cq *icq)
-{
- struct cfq_io_cq *cic = icq_to_cic(icq);
- struct cfq_data *cfqd = cic_to_cfqd(cic);
-
- if (cic_to_cfqq(cic, false)) {
- cfq_exit_cfqq(cfqd, cic_to_cfqq(cic, false));
- cic_set_cfqq(cic, NULL, false);
- }
-
- if (cic_to_cfqq(cic, true)) {
- cfq_exit_cfqq(cfqd, cic_to_cfqq(cic, true));
- cic_set_cfqq(cic, NULL, true);
- }
-}
-
-static void cfq_init_prio_data(struct cfq_queue *cfqq, struct cfq_io_cq *cic)
-{
- struct task_struct *tsk = current;
- int ioprio_class;
-
- if (!cfq_cfqq_prio_changed(cfqq))
- return;
-
- ioprio_class = IOPRIO_PRIO_CLASS(cic->ioprio);
- switch (ioprio_class) {
- default:
- printk(KERN_ERR "cfq: bad prio %x\n", ioprio_class);
- /* fall through */
- case IOPRIO_CLASS_NONE:
- /*
- * no prio set, inherit CPU scheduling settings
- */
- cfqq->ioprio = task_nice_ioprio(tsk);
- cfqq->ioprio_class = task_nice_ioclass(tsk);
- break;
- case IOPRIO_CLASS_RT:
- cfqq->ioprio = IOPRIO_PRIO_DATA(cic->ioprio);
- cfqq->ioprio_class = IOPRIO_CLASS_RT;
- break;
- case IOPRIO_CLASS_BE:
- cfqq->ioprio = IOPRIO_PRIO_DATA(cic->ioprio);
- cfqq->ioprio_class = IOPRIO_CLASS_BE;
- break;
- case IOPRIO_CLASS_IDLE:
- cfqq->ioprio_class = IOPRIO_CLASS_IDLE;
- cfqq->ioprio = 7;
- cfq_clear_cfqq_idle_window(cfqq);
- break;
- }
-
- /*
- * keep track of original prio settings in case we have to temporarily
- * elevate the priority of this queue
- */
- cfqq->org_ioprio = cfqq->ioprio;
- cfqq->org_ioprio_class = cfqq->ioprio_class;
- cfq_clear_cfqq_prio_changed(cfqq);
-}
-
-static void check_ioprio_changed(struct cfq_io_cq *cic, struct bio *bio)
-{
- int ioprio = cic->icq.ioc->ioprio;
- struct cfq_data *cfqd = cic_to_cfqd(cic);
- struct cfq_queue *cfqq;
-
- /*
- * Check whether ioprio has changed. The condition may trigger
- * spuriously on a newly created cic but there's no harm.
- */
- if (unlikely(!cfqd) || likely(cic->ioprio == ioprio))
- return;
-
- cfqq = cic_to_cfqq(cic, false);
- if (cfqq) {
- cfq_put_queue(cfqq);
- cfqq = cfq_get_queue(cfqd, BLK_RW_ASYNC, cic, bio);
- cic_set_cfqq(cic, cfqq, false);
- }
-
- cfqq = cic_to_cfqq(cic, true);
- if (cfqq)
- cfq_mark_cfqq_prio_changed(cfqq);
-
- cic->ioprio = ioprio;
-}
-
-static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
- pid_t pid, bool is_sync)
-{
- RB_CLEAR_NODE(&cfqq->rb_node);
- RB_CLEAR_NODE(&cfqq->p_node);
- INIT_LIST_HEAD(&cfqq->fifo);
-
- cfqq->ref = 0;
- cfqq->cfqd = cfqd;
-
- cfq_mark_cfqq_prio_changed(cfqq);
-
- if (is_sync) {
- if (!cfq_class_idle(cfqq))
- cfq_mark_cfqq_idle_window(cfqq);
- cfq_mark_cfqq_sync(cfqq);
- }
- cfqq->pid = pid;
-}
-
-#ifdef CONFIG_CFQ_GROUP_IOSCHED
-static void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio)
-{
- struct cfq_data *cfqd = cic_to_cfqd(cic);
- struct cfq_queue *cfqq;
- uint64_t serial_nr;
-
- rcu_read_lock();
- serial_nr = bio_blkcg(bio)->css.serial_nr;
- rcu_read_unlock();
-
- /*
- * Check whether blkcg has changed. The condition may trigger
- * spuriously on a newly created cic but there's no harm.
- */
- if (unlikely(!cfqd) || likely(cic->blkcg_serial_nr == serial_nr))
- return;
-
- /*
- * Drop reference to queues. New queues will be assigned in new
- * group upon arrival of fresh requests.
- */
- cfqq = cic_to_cfqq(cic, false);
- if (cfqq) {
- cfq_log_cfqq(cfqd, cfqq, "changed cgroup");
- cic_set_cfqq(cic, NULL, false);
- cfq_put_queue(cfqq);
- }
-
- cfqq = cic_to_cfqq(cic, true);
- if (cfqq) {
- cfq_log_cfqq(cfqd, cfqq, "changed cgroup");
- cic_set_cfqq(cic, NULL, true);
- cfq_put_queue(cfqq);
- }
-
- cic->blkcg_serial_nr = serial_nr;
-}
-#else
-static inline void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio)
-{
-}
-#endif /* CONFIG_CFQ_GROUP_IOSCHED */
-
-static struct cfq_queue **
-cfq_async_queue_prio(struct cfq_group *cfqg, int ioprio_class, int ioprio)
-{
- switch (ioprio_class) {
- case IOPRIO_CLASS_RT:
- return &cfqg->async_cfqq[0][ioprio];
- case IOPRIO_CLASS_NONE:
- ioprio = IOPRIO_NORM;
- /* fall through */
- case IOPRIO_CLASS_BE:
- return &cfqg->async_cfqq[1][ioprio];
- case IOPRIO_CLASS_IDLE:
- return &cfqg->async_idle_cfqq;
- default:
- BUG();
- }
-}
-
-static struct cfq_queue *
-cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct cfq_io_cq *cic,
- struct bio *bio)
-{
- int ioprio_class = IOPRIO_PRIO_CLASS(cic->ioprio);
- int ioprio = IOPRIO_PRIO_DATA(cic->ioprio);
- struct cfq_queue **async_cfqq = NULL;
- struct cfq_queue *cfqq;
- struct cfq_group *cfqg;
-
- rcu_read_lock();
- cfqg = cfq_lookup_cfqg(cfqd, bio_blkcg(bio));
- if (!cfqg) {
- cfqq = &cfqd->oom_cfqq;
- goto out;
- }
-
- if (!is_sync) {
- if (!ioprio_valid(cic->ioprio)) {
- struct task_struct *tsk = current;
- ioprio = task_nice_ioprio(tsk);
- ioprio_class = task_nice_ioclass(tsk);
- }
- async_cfqq = cfq_async_queue_prio(cfqg, ioprio_class, ioprio);
- cfqq = *async_cfqq;
- if (cfqq)
- goto out;
- }
-
- cfqq = kmem_cache_alloc_node(cfq_pool,
- GFP_NOWAIT | __GFP_ZERO | __GFP_NOWARN,
- cfqd->queue->node);
- if (!cfqq) {
- cfqq = &cfqd->oom_cfqq;
- goto out;
- }
-
- /* cfq_init_cfqq() assumes cfqq->ioprio_class is initialized. */
- cfqq->ioprio_class = IOPRIO_CLASS_NONE;
- cfq_init_cfqq(cfqd, cfqq, current->pid, is_sync);
- cfq_init_prio_data(cfqq, cic);
- cfq_link_cfqq_cfqg(cfqq, cfqg);
- cfq_log_cfqq(cfqd, cfqq, "alloced");
-
- if (async_cfqq) {
- /* a new async queue is created, pin and remember */
- cfqq->ref++;
- *async_cfqq = cfqq;
- }
-out:
- cfqq->ref++;
- rcu_read_unlock();
- return cfqq;
-}
-
-static void
-__cfq_update_io_thinktime(struct cfq_ttime *ttime, u64 slice_idle)
-{
- u64 elapsed = ktime_get_ns() - ttime->last_end_request;
- elapsed = min(elapsed, 2UL * slice_idle);
-
- ttime->ttime_samples = (7*ttime->ttime_samples + 256) / 8;
- ttime->ttime_total = div_u64(7*ttime->ttime_total + 256*elapsed, 8);
- ttime->ttime_mean = div64_ul(ttime->ttime_total + 128,
- ttime->ttime_samples);
-}
-
-static void
-cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_queue *cfqq,
- struct cfq_io_cq *cic)
-{
- if (cfq_cfqq_sync(cfqq)) {
- __cfq_update_io_thinktime(&cic->ttime, cfqd->cfq_slice_idle);
- __cfq_update_io_thinktime(&cfqq->service_tree->ttime,
- cfqd->cfq_slice_idle);
- }
-#ifdef CONFIG_CFQ_GROUP_IOSCHED
- __cfq_update_io_thinktime(&cfqq->cfqg->ttime, cfqd->cfq_group_idle);
-#endif
-}
-
-static void
-cfq_update_io_seektime(struct cfq_data *cfqd, struct cfq_queue *cfqq,
- struct request *rq)
-{
- sector_t sdist = 0;
- sector_t n_sec = blk_rq_sectors(rq);
- if (cfqq->last_request_pos) {
- if (cfqq->last_request_pos < blk_rq_pos(rq))
- sdist = blk_rq_pos(rq) - cfqq->last_request_pos;
- else
- sdist = cfqq->last_request_pos - blk_rq_pos(rq);
- }
-
- cfqq->seek_history <<= 1;
- if (blk_queue_nonrot(cfqd->queue))
- cfqq->seek_history |= (n_sec < CFQQ_SECT_THR_NONROT);
- else
- cfqq->seek_history |= (sdist > CFQQ_SEEK_THR);
-}
-
-static inline bool req_noidle(struct request *req)
-{
- return req_op(req) == REQ_OP_WRITE &&
- (req->cmd_flags & (REQ_SYNC | REQ_IDLE)) == REQ_SYNC;
-}
-
-/*
- * Disable idle window if the process thinks too long or seeks so much that
- * it doesn't matter
- */
-static void
-cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
- struct cfq_io_cq *cic)
-{
- int old_idle, enable_idle;
-
- /*
- * Don't idle for async or idle io prio class
- */
- if (!cfq_cfqq_sync(cfqq) || cfq_class_idle(cfqq))
- return;
-
- enable_idle = old_idle = cfq_cfqq_idle_window(cfqq);
-
- if (cfqq->queued[0] + cfqq->queued[1] >= 4)
- cfq_mark_cfqq_deep(cfqq);
-
- if (cfqq->next_rq && req_noidle(cfqq->next_rq))
- enable_idle = 0;
- else if (!atomic_read(&cic->icq.ioc->active_ref) ||
- !cfqd->cfq_slice_idle ||
- (!cfq_cfqq_deep(cfqq) && CFQQ_SEEKY(cfqq)))
- enable_idle = 0;
- else if (sample_valid(cic->ttime.ttime_samples)) {
- if (cic->ttime.ttime_mean > cfqd->cfq_slice_idle)
- enable_idle = 0;
- else
- enable_idle = 1;
- }
-
- if (old_idle != enable_idle) {
- cfq_log_cfqq(cfqd, cfqq, "idle=%d", enable_idle);
- if (enable_idle)
- cfq_mark_cfqq_idle_window(cfqq);
- else
- cfq_clear_cfqq_idle_window(cfqq);
- }
-}
-
-/*
- * Check if new_cfqq should preempt the currently active queue. Return 0 for
- * no or if we aren't sure, a 1 will cause a preempt.
- */
-static bool
-cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
- struct request *rq)
-{
- struct cfq_queue *cfqq;
-
- cfqq = cfqd->active_queue;
- if (!cfqq)
- return false;
-
- if (cfq_class_idle(new_cfqq))
- return false;
-
- if (cfq_class_idle(cfqq))
- return true;
-
- /*
- * Don't allow a non-RT request to preempt an ongoing RT cfqq timeslice.
- */
- if (cfq_class_rt(cfqq) && !cfq_class_rt(new_cfqq))
- return false;
-
- /*
- * if the new request is sync, but the currently running queue is
- * not, let the sync request have priority.
- */
- if (rq_is_sync(rq) && !cfq_cfqq_sync(cfqq) && !cfq_cfqq_must_dispatch(cfqq))
- return true;
-
- /*
- * Treat ancestors of current cgroup the same way as current cgroup.
- * For anybody else we disallow preemption to guarantee service
- * fairness among cgroups.
- */
- if (!cfqg_is_descendant(cfqq->cfqg, new_cfqq->cfqg))
- return false;
-
- if (cfq_slice_used(cfqq))
- return true;
-
- /*
- * Allow an RT request to pre-empt an ongoing non-RT cfqq timeslice.
- */
- if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq))
- return true;
-
- WARN_ON_ONCE(cfqq->ioprio_class != new_cfqq->ioprio_class);
- /* Allow preemption only if we are idling on sync-noidle tree */
- if (cfqd->serving_wl_type == SYNC_NOIDLE_WORKLOAD &&
- cfqq_type(new_cfqq) == SYNC_NOIDLE_WORKLOAD &&
- RB_EMPTY_ROOT(&cfqq->sort_list))
- return true;
-
- /*
- * So both queues are sync. Let the new request get disk time if
- * it's a metadata request and the current queue is doing regular IO.
- */
- if ((rq->cmd_flags & REQ_PRIO) && !cfqq->prio_pending)
- return true;
-
- /* An idle queue should not be idle now for some reason */
- if (RB_EMPTY_ROOT(&cfqq->sort_list) && !cfq_should_idle(cfqd, cfqq))
- return true;
-
- if (!cfqd->active_cic || !cfq_cfqq_wait_request(cfqq))
- return false;
-
- /*
- * if this request is as-good as one we would expect from the
- * current cfqq, let it preempt
- */
- if (cfq_rq_close(cfqd, cfqq, rq))
- return true;
-
- return false;
-}
-
-/*
- * cfqq preempts the active queue. if we allowed preempt with no slice left,
- * let it have half of its nominal slice.
- */
-static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
-{
- enum wl_type_t old_type = cfqq_type(cfqd->active_queue);
-
- cfq_log_cfqq(cfqd, cfqq, "preempt");
- cfq_slice_expired(cfqd, 1);
-
- /*
- * workload type is changed, don't save slice, otherwise preempt
- * doesn't happen
- */
- if (old_type != cfqq_type(cfqq))
- cfqq->cfqg->saved_wl_slice = 0;
-
- /*
- * Put the new queue at the front of the of the current list,
- * so we know that it will be selected next.
- */
- BUG_ON(!cfq_cfqq_on_rr(cfqq));
-
- cfq_service_tree_add(cfqd, cfqq, 1);
-
- cfqq->slice_end = 0;
- cfq_mark_cfqq_slice_new(cfqq);
-}
-
-/*
- * Called when a new fs request (rq) is added (to cfqq). Check if there's
- * something we should do about it
- */
-static void
-cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
- struct request *rq)
-{
- struct cfq_io_cq *cic = RQ_CIC(rq);
-
- cfqd->rq_queued++;
- if (rq->cmd_flags & REQ_PRIO)
- cfqq->prio_pending++;
-
- cfq_update_io_thinktime(cfqd, cfqq, cic);
- cfq_update_io_seektime(cfqd, cfqq, rq);
- cfq_update_idle_window(cfqd, cfqq, cic);
-
- cfqq->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq);
-
- if (cfqq == cfqd->active_queue) {
- /*
- * Remember that we saw a request from this process, but
- * don't start queuing just yet. Otherwise we risk seeing lots
- * of tiny requests, because we disrupt the normal plugging
- * and merging. If the request is already larger than a single
- * page, let it rip immediately. For that case we assume that
- * merging is already done. Ditto for a busy system that
- * has other work pending, don't risk delaying until the
- * idle timer unplug to continue working.
- */
- if (cfq_cfqq_wait_request(cfqq)) {
- if (blk_rq_bytes(rq) > PAGE_SIZE ||
- cfqd->busy_queues > 1) {
- cfq_del_timer(cfqd, cfqq);
- cfq_clear_cfqq_wait_request(cfqq);
- __blk_run_queue(cfqd->queue);
- } else {
- cfqg_stats_update_idle_time(cfqq->cfqg);
- cfq_mark_cfqq_must_dispatch(cfqq);
- }
- }
- } else if (cfq_should_preempt(cfqd, cfqq, rq)) {
- /*
- * not the active queue - expire current slice if it is
- * idle and has expired it's mean thinktime or this new queue
- * has some old slice time left and is of higher priority or
- * this new queue is RT and the current one is BE
- */
- cfq_preempt_queue(cfqd, cfqq);
- __blk_run_queue(cfqd->queue);
- }
-}
-
-static void cfq_insert_request(struct request_queue *q, struct request *rq)
-{
- struct cfq_data *cfqd = q->elevator->elevator_data;
- struct cfq_queue *cfqq = RQ_CFQQ(rq);
-
- cfq_log_cfqq(cfqd, cfqq, "insert_request");
- cfq_init_prio_data(cfqq, RQ_CIC(rq));
-
- rq->fifo_time = ktime_get_ns() + cfqd->cfq_fifo_expire[rq_is_sync(rq)];
- list_add_tail(&rq->queuelist, &cfqq->fifo);
- cfq_add_rq_rb(rq);
- cfqg_stats_update_io_add(RQ_CFQG(rq), cfqd->serving_group,
- rq->cmd_flags);
- cfq_rq_enqueued(cfqd, cfqq, rq);
-}
-
-/*
- * Update hw_tag based on peak queue depth over 50 samples under
- * sufficient load.
- */
-static void cfq_update_hw_tag(struct cfq_data *cfqd)
-{
- struct cfq_queue *cfqq = cfqd->active_queue;
-
- if (cfqd->rq_in_driver > cfqd->hw_tag_est_depth)
- cfqd->hw_tag_est_depth = cfqd->rq_in_driver;
-
- if (cfqd->hw_tag == 1)
- return;
-
- if (cfqd->rq_queued <= CFQ_HW_QUEUE_MIN &&
- cfqd->rq_in_driver <= CFQ_HW_QUEUE_MIN)
- return;
-
- /*
- * If active queue hasn't enough requests and can idle, cfq might not
- * dispatch sufficient requests to hardware. Don't zero hw_tag in this
- * case
- */
- if (cfqq && cfq_cfqq_idle_window(cfqq) &&
- cfqq->dispatched + cfqq->queued[0] + cfqq->queued[1] <
- CFQ_HW_QUEUE_MIN && cfqd->rq_in_driver < CFQ_HW_QUEUE_MIN)
- return;
-
- if (cfqd->hw_tag_samples++ < 50)
- return;
-
- if (cfqd->hw_tag_est_depth >= CFQ_HW_QUEUE_MIN)
- cfqd->hw_tag = 1;
- else
- cfqd->hw_tag = 0;
-}
-
-static bool cfq_should_wait_busy(struct cfq_data *cfqd, struct cfq_queue *cfqq)
-{
- struct cfq_io_cq *cic = cfqd->active_cic;
- u64 now = ktime_get_ns();
-
- /* If the queue already has requests, don't wait */
- if (!RB_EMPTY_ROOT(&cfqq->sort_list))
- return false;
-
- /* If there are other queues in the group, don't wait */
- if (cfqq->cfqg->nr_cfqq > 1)
- return false;
-
- /* the only queue in the group, but think time is big */
- if (cfq_io_thinktime_big(cfqd, &cfqq->cfqg->ttime, true))
- return false;
-
- if (cfq_slice_used(cfqq))
- return true;
-
- /* if slice left is less than think time, wait busy */
- if (cic && sample_valid(cic->ttime.ttime_samples)
- && (cfqq->slice_end - now < cic->ttime.ttime_mean))
- return true;
-
- /*
- * If think times is less than a jiffy than ttime_mean=0 and above
- * will not be true. It might happen that slice has not expired yet
- * but will expire soon (4-5 ns) during select_queue(). To cover the
- * case where think time is less than a jiffy, mark the queue wait
- * busy if only 1 jiffy is left in the slice.
- */
- if (cfqq->slice_end - now <= jiffies_to_nsecs(1))
- return true;
-
- return false;
-}
-
-static void cfq_completed_request(struct request_queue *q, struct request *rq)
-{
- struct cfq_queue *cfqq = RQ_CFQQ(rq);
- struct cfq_data *cfqd = cfqq->cfqd;
- const int sync = rq_is_sync(rq);
- u64 now = ktime_get_ns();
-
- cfq_log_cfqq(cfqd, cfqq, "complete rqnoidle %d", req_noidle(rq));
-
- cfq_update_hw_tag(cfqd);
-
- WARN_ON(!cfqd->rq_in_driver);
- WARN_ON(!cfqq->dispatched);
- cfqd->rq_in_driver--;
- cfqq->dispatched--;
- (RQ_CFQG(rq))->dispatched--;
- cfqg_stats_update_completion(cfqq->cfqg, rq->start_time_ns,
- rq->io_start_time_ns, rq->cmd_flags);
-
- cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--;
-
- if (sync) {
- struct cfq_rb_root *st;
-
- RQ_CIC(rq)->ttime.last_end_request = now;
-
- if (cfq_cfqq_on_rr(cfqq))
- st = cfqq->service_tree;
- else
- st = st_for(cfqq->cfqg, cfqq_class(cfqq),
- cfqq_type(cfqq));
-
- st->ttime.last_end_request = now;
- if (rq->start_time_ns + cfqd->cfq_fifo_expire[1] <= now)
- cfqd->last_delayed_sync = now;
- }
-
-#ifdef CONFIG_CFQ_GROUP_IOSCHED
- cfqq->cfqg->ttime.last_end_request = now;
-#endif
-
- /*
- * If this is the active queue, check if it needs to be expired,
- * or if we want to idle in case it has no pending requests.
- */
- if (cfqd->active_queue == cfqq) {
- const bool cfqq_empty = RB_EMPTY_ROOT(&cfqq->sort_list);
-
- if (cfq_cfqq_slice_new(cfqq)) {
- cfq_set_prio_slice(cfqd, cfqq);
- cfq_clear_cfqq_slice_new(cfqq);
- }
-
- /*
- * Should we wait for next request to come in before we expire
- * the queue.
- */
- if (cfq_should_wait_busy(cfqd, cfqq)) {
- u64 extend_sl = cfqd->cfq_slice_idle;
- if (!cfqd->cfq_slice_idle)
- extend_sl = cfqd->cfq_group_idle;
- cfqq->slice_end = now + extend_sl;
- cfq_mark_cfqq_wait_busy(cfqq);
- cfq_log_cfqq(cfqd, cfqq, "will busy wait");
- }
-
- /*
- * Idling is not enabled on:
- * - expired queues
- * - idle-priority queues
- * - async queues
- * - queues with still some requests queued
- * - when there is a close cooperator
- */
- if (cfq_slice_used(cfqq) || cfq_class_idle(cfqq))
- cfq_slice_expired(cfqd, 1);
- else if (sync && cfqq_empty &&
- !cfq_close_cooperator(cfqd, cfqq)) {
- cfq_arm_slice_timer(cfqd);
- }
- }
-
- if (!cfqd->rq_in_driver)
- cfq_schedule_dispatch(cfqd);
-}
-
-static void cfqq_boost_on_prio(struct cfq_queue *cfqq, unsigned int op)
-{
- /*
- * If REQ_PRIO is set, boost class and prio level, if it's below
- * BE/NORM. If prio is not set, restore the potentially boosted
- * class/prio level.
- */
- if (!(op & REQ_PRIO)) {
- cfqq->ioprio_class = cfqq->org_ioprio_class;
- cfqq->ioprio = cfqq->org_ioprio;
- } else {
- if (cfq_class_idle(cfqq))
- cfqq->ioprio_class = IOPRIO_CLASS_BE;
- if (cfqq->ioprio > IOPRIO_NORM)
- cfqq->ioprio = IOPRIO_NORM;
- }
-}
-
-static inline int __cfq_may_queue(struct cfq_queue *cfqq)
-{
- if (cfq_cfqq_wait_request(cfqq) && !cfq_cfqq_must_alloc_slice(cfqq)) {
- cfq_mark_cfqq_must_alloc_slice(cfqq);
- return ELV_MQUEUE_MUST;
- }
-
- return ELV_MQUEUE_MAY;
-}
-
-static int cfq_may_queue(struct request_queue *q, unsigned int op)
-{
- struct cfq_data *cfqd = q->elevator->elevator_data;
- struct task_struct *tsk = current;
- struct cfq_io_cq *cic;
- struct cfq_queue *cfqq;
-
- /*
- * don't force setup of a queue from here, as a call to may_queue
- * does not necessarily imply that a request actually will be queued.
- * so just lookup a possibly existing queue, or return 'may queue'
- * if that fails
- */
- cic = cfq_cic_lookup(cfqd, tsk->io_context);
- if (!cic)
- return ELV_MQUEUE_MAY;
-
- cfqq = cic_to_cfqq(cic, op_is_sync(op));
- if (cfqq) {
- cfq_init_prio_data(cfqq, cic);
- cfqq_boost_on_prio(cfqq, op);
-
- return __cfq_may_queue(cfqq);
- }
-
- return ELV_MQUEUE_MAY;
-}
-
-/*
- * queue lock held here
- */
-static void cfq_put_request(struct request *rq)
-{
- struct cfq_queue *cfqq = RQ_CFQQ(rq);
-
- if (cfqq) {
- const int rw = rq_data_dir(rq);
-
- BUG_ON(!cfqq->allocated[rw]);
- cfqq->allocated[rw]--;
-
- /* Put down rq reference on cfqg */
- cfqg_put(RQ_CFQG(rq));
- rq->elv.priv[0] = NULL;
- rq->elv.priv[1] = NULL;
-
- cfq_put_queue(cfqq);
- }
-}
-
-static struct cfq_queue *
-cfq_merge_cfqqs(struct cfq_data *cfqd, struct cfq_io_cq *cic,
- struct cfq_queue *cfqq)
-{
- cfq_log_cfqq(cfqd, cfqq, "merging with queue %p", cfqq->new_cfqq);
- cic_set_cfqq(cic, cfqq->new_cfqq, 1);
- cfq_mark_cfqq_coop(cfqq->new_cfqq);
- cfq_put_queue(cfqq);
- return cic_to_cfqq(cic, 1);
-}
-
-/*
- * Returns NULL if a new cfqq should be allocated, or the old cfqq if this
- * was the last process referring to said cfqq.
- */
-static struct cfq_queue *
-split_cfqq(struct cfq_io_cq *cic, struct cfq_queue *cfqq)
-{
- if (cfqq_process_refs(cfqq) == 1) {
- cfqq->pid = current->pid;
- cfq_clear_cfqq_coop(cfqq);
- cfq_clear_cfqq_split_coop(cfqq);
- return cfqq;
- }
-
- cic_set_cfqq(cic, NULL, 1);
-
- cfq_put_cooperator(cfqq);
-
- cfq_put_queue(cfqq);
- return NULL;
-}
-/*
- * Allocate cfq data structures associated with this request.
- */
-static int
-cfq_set_request(struct request_queue *q, struct request *rq, struct bio *bio,
- gfp_t gfp_mask)
-{
- struct cfq_data *cfqd = q->elevator->elevator_data;
- struct cfq_io_cq *cic = icq_to_cic(rq->elv.icq);
- const int rw = rq_data_dir(rq);
- const bool is_sync = rq_is_sync(rq);
- struct cfq_queue *cfqq;
-
- spin_lock_irq(q->queue_lock);
-
- check_ioprio_changed(cic, bio);
- check_blkcg_changed(cic, bio);
-new_queue:
- cfqq = cic_to_cfqq(cic, is_sync);
- if (!cfqq || cfqq == &cfqd->oom_cfqq) {
- if (cfqq)
- cfq_put_queue(cfqq);
- cfqq = cfq_get_queue(cfqd, is_sync, cic, bio);
- cic_set_cfqq(cic, cfqq, is_sync);
- } else {
- /*
- * If the queue was seeky for too long, break it apart.
- */
- if (cfq_cfqq_coop(cfqq) && cfq_cfqq_split_coop(cfqq)) {
- cfq_log_cfqq(cfqd, cfqq, "breaking apart cfqq");
- cfqq = split_cfqq(cic, cfqq);
- if (!cfqq)
- goto new_queue;
- }
-
- /*
- * Check to see if this queue is scheduled to merge with
- * another, closely cooperating queue. The merging of
- * queues happens here as it must be done in process context.
- * The reference on new_cfqq was taken in merge_cfqqs.
- */
- if (cfqq->new_cfqq)
- cfqq = cfq_merge_cfqqs(cfqd, cic, cfqq);
- }
-
- cfqq->allocated[rw]++;
-
- cfqq->ref++;
- cfqg_get(cfqq->cfqg);
- rq->elv.priv[0] = cfqq;
- rq->elv.priv[1] = cfqq->cfqg;
- spin_unlock_irq(q->queue_lock);
-
- return 0;
-}
-
-static void cfq_kick_queue(struct work_struct *work)
-{
- struct cfq_data *cfqd =
- container_of(work, struct cfq_data, unplug_work);
- struct request_queue *q = cfqd->queue;
-
- spin_lock_irq(q->queue_lock);
- __blk_run_queue(cfqd->queue);
- spin_unlock_irq(q->queue_lock);
-}
-
-/*
- * Timer running if the active_queue is currently idling inside its time slice
- */
-static enum hrtimer_restart cfq_idle_slice_timer(struct hrtimer *timer)
-{
- struct cfq_data *cfqd = container_of(timer, struct cfq_data,
- idle_slice_timer);
- struct cfq_queue *cfqq;
- unsigned long flags;
- int timed_out = 1;
-
- cfq_log(cfqd, "idle timer fired");
-
- spin_lock_irqsave(cfqd->queue->queue_lock, flags);
-
- cfqq = cfqd->active_queue;
- if (cfqq) {
- timed_out = 0;
-
- /*
- * We saw a request before the queue expired, let it through
- */
- if (cfq_cfqq_must_dispatch(cfqq))
- goto out_kick;
-
- /*
- * expired
- */
- if (cfq_slice_used(cfqq))
- goto expire;
-
- /*
- * only expire and reinvoke request handler, if there are
- * other queues with pending requests
- */
- if (!cfqd->busy_queues)
- goto out_cont;
-
- /*
- * not expired and it has a request pending, let it dispatch
- */
- if (!RB_EMPTY_ROOT(&cfqq->sort_list))
- goto out_kick;
-
- /*
- * Queue depth flag is reset only when the idle didn't succeed
- */
- cfq_clear_cfqq_deep(cfqq);
- }
-expire:
- cfq_slice_expired(cfqd, timed_out);
-out_kick:
- cfq_schedule_dispatch(cfqd);
-out_cont:
- spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
- return HRTIMER_NORESTART;
-}
-
-static void cfq_shutdown_timer_wq(struct cfq_data *cfqd)
-{
- hrtimer_cancel(&cfqd->idle_slice_timer);
- cancel_work_sync(&cfqd->unplug_work);
-}
-
-static void cfq_exit_queue(struct elevator_queue *e)
-{
- struct cfq_data *cfqd = e->elevator_data;
- struct request_queue *q = cfqd->queue;
-
- cfq_shutdown_timer_wq(cfqd);
-
- spin_lock_irq(q->queue_lock);
-
- if (cfqd->active_queue)
- __cfq_slice_expired(cfqd, cfqd->active_queue, 0);
-
- spin_unlock_irq(q->queue_lock);
-
- cfq_shutdown_timer_wq(cfqd);
-
-#ifdef CONFIG_CFQ_GROUP_IOSCHED
- blkcg_deactivate_policy(q, &blkcg_policy_cfq);
-#else
- kfree(cfqd->root_group);
-#endif
- kfree(cfqd);
-}
-
-static int cfq_init_queue(struct request_queue *q, struct elevator_type *e)
-{
- struct cfq_data *cfqd;
- struct blkcg_gq *blkg __maybe_unused;
- int i, ret;
- struct elevator_queue *eq;
-
- eq = elevator_alloc(q, e);
- if (!eq)
- return -ENOMEM;
-
- cfqd = kzalloc_node(sizeof(*cfqd), GFP_KERNEL, q->node);
- if (!cfqd) {
- kobject_put(&eq->kobj);
- return -ENOMEM;
- }
- eq->elevator_data = cfqd;
-
- cfqd->queue = q;
- spin_lock_irq(q->queue_lock);
- q->elevator = eq;
- spin_unlock_irq(q->queue_lock);
-
- /* Init root service tree */
- cfqd->grp_service_tree = CFQ_RB_ROOT;
-
- /* Init root group and prefer root group over other groups by default */
-#ifdef CONFIG_CFQ_GROUP_IOSCHED
- ret = blkcg_activate_policy(q, &blkcg_policy_cfq);
- if (ret)
- goto out_free;
-
- cfqd->root_group = blkg_to_cfqg(q->root_blkg);
-#else
- ret = -ENOMEM;
- cfqd->root_group = kzalloc_node(sizeof(*cfqd->root_group),
- GFP_KERNEL, cfqd->queue->node);
- if (!cfqd->root_group)
- goto out_free;
-
- cfq_init_cfqg_base(cfqd->root_group);
- cfqd->root_group->weight = 2 * CFQ_WEIGHT_LEGACY_DFL;
- cfqd->root_group->leaf_weight = 2 * CFQ_WEIGHT_LEGACY_DFL;
-#endif
-
- /*
- * Not strictly needed (since RB_ROOT just clears the node and we
- * zeroed cfqd on alloc), but better be safe in case someone decides
- * to add magic to the rb code
- */
- for (i = 0; i < CFQ_PRIO_LISTS; i++)
- cfqd->prio_trees[i] = RB_ROOT;
-
- /*
- * Our fallback cfqq if cfq_get_queue() runs into OOM issues.
- * Grab a permanent reference to it, so that the normal code flow
- * will not attempt to free it. oom_cfqq is linked to root_group
- * but shouldn't hold a reference as it'll never be unlinked. Lose
- * the reference from linking right away.
- */
- cfq_init_cfqq(cfqd, &cfqd->oom_cfqq, 1, 0);
- cfqd->oom_cfqq.ref++;
-
- spin_lock_irq(q->queue_lock);
- cfq_link_cfqq_cfqg(&cfqd->oom_cfqq, cfqd->root_group);
- cfqg_put(cfqd->root_group);
- spin_unlock_irq(q->queue_lock);
-
- hrtimer_init(&cfqd->idle_slice_timer, CLOCK_MONOTONIC,
- HRTIMER_MODE_REL);
- cfqd->idle_slice_timer.function = cfq_idle_slice_timer;
-
- INIT_WORK(&cfqd->unplug_work, cfq_kick_queue);
-
- cfqd->cfq_quantum = cfq_quantum;
- cfqd->cfq_fifo_expire[0] = cfq_fifo_expire[0];
- cfqd->cfq_fifo_expire[1] = cfq_fifo_expire[1];
- cfqd->cfq_back_max = cfq_back_max;
- cfqd->cfq_back_penalty = cfq_back_penalty;
- cfqd->cfq_slice[0] = cfq_slice_async;
- cfqd->cfq_slice[1] = cfq_slice_sync;
- cfqd->cfq_target_latency = cfq_target_latency;
- cfqd->cfq_slice_async_rq = cfq_slice_async_rq;
- cfqd->cfq_slice_idle = cfq_slice_idle;
- cfqd->cfq_group_idle = cfq_group_idle;
- cfqd->cfq_latency = 1;
- cfqd->hw_tag = -1;
- /*
- * we optimistically start assuming sync ops weren't delayed in last
- * second, in order to have larger depth for async operations.
- */
- cfqd->last_delayed_sync = ktime_get_ns() - NSEC_PER_SEC;
- return 0;
-
-out_free:
- kfree(cfqd);
- kobject_put(&eq->kobj);
- return ret;
-}
-
-static void cfq_registered_queue(struct request_queue *q)
-{
- struct elevator_queue *e = q->elevator;
- struct cfq_data *cfqd = e->elevator_data;
-
- /*
- * Default to IOPS mode with no idling for SSDs
- */
- if (blk_queue_nonrot(q))
- cfqd->cfq_slice_idle = 0;
- wbt_disable_default(q);
-}
-
-/*
- * sysfs parts below -->
- */
-static ssize_t
-cfq_var_show(unsigned int var, char *page)
-{
- return sprintf(page, "%u\n", var);
-}
-
-static void
-cfq_var_store(unsigned int *var, const char *page)
-{
- char *p = (char *) page;
-
- *var = simple_strtoul(p, &p, 10);
-}
-
-#define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \
-static ssize_t __FUNC(struct elevator_queue *e, char *page) \
-{ \
- struct cfq_data *cfqd = e->elevator_data; \
- u64 __data = __VAR; \
- if (__CONV) \
- __data = div_u64(__data, NSEC_PER_MSEC); \
- return cfq_var_show(__data, (page)); \
-}
-SHOW_FUNCTION(cfq_quantum_show, cfqd->cfq_quantum, 0);
-SHOW_FUNCTION(cfq_fifo_expire_sync_show, cfqd->cfq_fifo_expire[1], 1);
-SHOW_FUNCTION(cfq_fifo_expire_async_show, cfqd->cfq_fifo_expire[0], 1);
-SHOW_FUNCTION(cfq_back_seek_max_show, cfqd->cfq_back_max, 0);
-SHOW_FUNCTION(cfq_back_seek_penalty_show, cfqd->cfq_back_penalty, 0);
-SHOW_FUNCTION(cfq_slice_idle_show, cfqd->cfq_slice_idle, 1);
-SHOW_FUNCTION(cfq_group_idle_show, cfqd->cfq_group_idle, 1);
-SHOW_FUNCTION(cfq_slice_sync_show, cfqd->cfq_slice[1], 1);
-SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1);
-SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0);
-SHOW_FUNCTION(cfq_low_latency_show, cfqd->cfq_latency, 0);
-SHOW_FUNCTION(cfq_target_latency_show, cfqd->cfq_target_latency, 1);
-#undef SHOW_FUNCTION
-
-#define USEC_SHOW_FUNCTION(__FUNC, __VAR) \
-static ssize_t __FUNC(struct elevator_queue *e, char *page) \
-{ \
- struct cfq_data *cfqd = e->elevator_data; \
- u64 __data = __VAR; \
- __data = div_u64(__data, NSEC_PER_USEC); \
- return cfq_var_show(__data, (page)); \
-}
-USEC_SHOW_FUNCTION(cfq_slice_idle_us_show, cfqd->cfq_slice_idle);
-USEC_SHOW_FUNCTION(cfq_group_idle_us_show, cfqd->cfq_group_idle);
-USEC_SHOW_FUNCTION(cfq_slice_sync_us_show, cfqd->cfq_slice[1]);
-USEC_SHOW_FUNCTION(cfq_slice_async_us_show, cfqd->cfq_slice[0]);
-USEC_SHOW_FUNCTION(cfq_target_latency_us_show, cfqd->cfq_target_latency);
-#undef USEC_SHOW_FUNCTION
-
-#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \
-static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \
-{ \
- struct cfq_data *cfqd = e->elevator_data; \
- unsigned int __data, __min = (MIN), __max = (MAX); \
- \
- cfq_var_store(&__data, (page)); \
- if (__data < __min) \
- __data = __min; \
- else if (__data > __max) \
- __data = __max; \
- if (__CONV) \
- *(__PTR) = (u64)__data * NSEC_PER_MSEC; \
- else \
- *(__PTR) = __data; \
- return count; \
-}
-STORE_FUNCTION(cfq_quantum_store, &cfqd->cfq_quantum, 1, UINT_MAX, 0);
-STORE_FUNCTION(cfq_fifo_expire_sync_store, &cfqd->cfq_fifo_expire[1], 1,
- UINT_MAX, 1);
-STORE_FUNCTION(cfq_fifo_expire_async_store, &cfqd->cfq_fifo_expire[0], 1,
- UINT_MAX, 1);
-STORE_FUNCTION(cfq_back_seek_max_store, &cfqd->cfq_back_max, 0, UINT_MAX, 0);
-STORE_FUNCTION(cfq_back_seek_penalty_store, &cfqd->cfq_back_penalty, 1,
- UINT_MAX, 0);
-STORE_FUNCTION(cfq_slice_idle_store, &cfqd->cfq_slice_idle, 0, UINT_MAX, 1);
-STORE_FUNCTION(cfq_group_idle_store, &cfqd->cfq_group_idle, 0, UINT_MAX, 1);
-STORE_FUNCTION(cfq_slice_sync_store, &cfqd->cfq_slice[1], 1, UINT_MAX, 1);
-STORE_FUNCTION(cfq_slice_async_store, &cfqd->cfq_slice[0], 1, UINT_MAX, 1);
-STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1,
- UINT_MAX, 0);
-STORE_FUNCTION(cfq_low_latency_store, &cfqd->cfq_latency, 0, 1, 0);
-STORE_FUNCTION(cfq_target_latency_store, &cfqd->cfq_target_latency, 1, UINT_MAX, 1);
-#undef STORE_FUNCTION
-
-#define USEC_STORE_FUNCTION(__FUNC, __PTR, MIN, MAX) \
-static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \
-{ \
- struct cfq_data *cfqd = e->elevator_data; \
- unsigned int __data, __min = (MIN), __max = (MAX); \
- \
- cfq_var_store(&__data, (page)); \
- if (__data < __min) \
- __data = __min; \
- else if (__data > __max) \
- __data = __max; \
- *(__PTR) = (u64)__data * NSEC_PER_USEC; \
- return count; \
-}
-USEC_STORE_FUNCTION(cfq_slice_idle_us_store, &cfqd->cfq_slice_idle, 0, UINT_MAX);
-USEC_STORE_FUNCTION(cfq_group_idle_us_store, &cfqd->cfq_group_idle, 0, UINT_MAX);
-USEC_STORE_FUNCTION(cfq_slice_sync_us_store, &cfqd->cfq_slice[1], 1, UINT_MAX);
-USEC_STORE_FUNCTION(cfq_slice_async_us_store, &cfqd->cfq_slice[0], 1, UINT_MAX);
-USEC_STORE_FUNCTION(cfq_target_latency_us_store, &cfqd->cfq_target_latency, 1, UINT_MAX);
-#undef USEC_STORE_FUNCTION
-
-#define CFQ_ATTR(name) \
- __ATTR(name, 0644, cfq_##name##_show, cfq_##name##_store)
-
-static struct elv_fs_entry cfq_attrs[] = {
- CFQ_ATTR(quantum),
- CFQ_ATTR(fifo_expire_sync),
- CFQ_ATTR(fifo_expire_async),
- CFQ_ATTR(back_seek_max),
- CFQ_ATTR(back_seek_penalty),
- CFQ_ATTR(slice_sync),
- CFQ_ATTR(slice_sync_us),
- CFQ_ATTR(slice_async),
- CFQ_ATTR(slice_async_us),
- CFQ_ATTR(slice_async_rq),
- CFQ_ATTR(slice_idle),
- CFQ_ATTR(slice_idle_us),
- CFQ_ATTR(group_idle),
- CFQ_ATTR(group_idle_us),
- CFQ_ATTR(low_latency),
- CFQ_ATTR(target_latency),
- CFQ_ATTR(target_latency_us),
- __ATTR_NULL
-};
-
-static struct elevator_type iosched_cfq = {
- .ops.sq = {
- .elevator_merge_fn = cfq_merge,
- .elevator_merged_fn = cfq_merged_request,
- .elevator_merge_req_fn = cfq_merged_requests,
- .elevator_allow_bio_merge_fn = cfq_allow_bio_merge,
- .elevator_allow_rq_merge_fn = cfq_allow_rq_merge,
- .elevator_bio_merged_fn = cfq_bio_merged,
- .elevator_dispatch_fn = cfq_dispatch_requests,
- .elevator_add_req_fn = cfq_insert_request,
- .elevator_activate_req_fn = cfq_activate_request,
- .elevator_deactivate_req_fn = cfq_deactivate_request,
- .elevator_completed_req_fn = cfq_completed_request,
- .elevator_former_req_fn = elv_rb_former_request,
- .elevator_latter_req_fn = elv_rb_latter_request,
- .elevator_init_icq_fn = cfq_init_icq,
- .elevator_exit_icq_fn = cfq_exit_icq,
- .elevator_set_req_fn = cfq_set_request,
- .elevator_put_req_fn = cfq_put_request,
- .elevator_may_queue_fn = cfq_may_queue,
- .elevator_init_fn = cfq_init_queue,
- .elevator_exit_fn = cfq_exit_queue,
- .elevator_registered_fn = cfq_registered_queue,
- },
- .icq_size = sizeof(struct cfq_io_cq),
- .icq_align = __alignof__(struct cfq_io_cq),
- .elevator_attrs = cfq_attrs,
- .elevator_name = "cfq",
- .elevator_owner = THIS_MODULE,
-};
-
-#ifdef CONFIG_CFQ_GROUP_IOSCHED
-static struct blkcg_policy blkcg_policy_cfq = {
- .dfl_cftypes = cfq_blkcg_files,
- .legacy_cftypes = cfq_blkcg_legacy_files,
-
- .cpd_alloc_fn = cfq_cpd_alloc,
- .cpd_init_fn = cfq_cpd_init,
- .cpd_free_fn = cfq_cpd_free,
- .cpd_bind_fn = cfq_cpd_bind,
-
- .pd_alloc_fn = cfq_pd_alloc,
- .pd_init_fn = cfq_pd_init,
- .pd_offline_fn = cfq_pd_offline,
- .pd_free_fn = cfq_pd_free,
- .pd_reset_stats_fn = cfq_pd_reset_stats,
-};
-#endif
-
-static int __init cfq_init(void)
-{
- int ret;
-
-#ifdef CONFIG_CFQ_GROUP_IOSCHED
- ret = blkcg_policy_register(&blkcg_policy_cfq);
- if (ret)
- return ret;
-#else
- cfq_group_idle = 0;
-#endif
-
- ret = -ENOMEM;
- cfq_pool = KMEM_CACHE(cfq_queue, 0);
- if (!cfq_pool)
- goto err_pol_unreg;
-
- ret = elv_register(&iosched_cfq);
- if (ret)
- goto err_free_pool;
-
- return 0;
-
-err_free_pool:
- kmem_cache_destroy(cfq_pool);
-err_pol_unreg:
-#ifdef CONFIG_CFQ_GROUP_IOSCHED
- blkcg_policy_unregister(&blkcg_policy_cfq);
-#endif
- return ret;
-}
-
-static void __exit cfq_exit(void)
-{
-#ifdef CONFIG_CFQ_GROUP_IOSCHED
- blkcg_policy_unregister(&blkcg_policy_cfq);
-#endif
- elv_unregister(&iosched_cfq);
- kmem_cache_destroy(cfq_pool);
-}
-
-module_init(cfq_init);
-module_exit(cfq_exit);
-
-MODULE_AUTHOR("Jens Axboe");
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Completely Fair Queueing IO scheduler");
diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c
deleted file mode 100644
index ef2f1f09e9b3..000000000000
--- a/block/deadline-iosched.c
+++ /dev/null
@@ -1,560 +0,0 @@
-/*
- * Deadline i/o scheduler.
- *
- * Copyright (C) 2002 Jens Axboe <axboe@kernel.dk>
- */
-#include <linux/kernel.h>
-#include <linux/fs.h>
-#include <linux/blkdev.h>
-#include <linux/elevator.h>
-#include <linux/bio.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/init.h>
-#include <linux/compiler.h>
-#include <linux/rbtree.h>
-
-/*
- * See Documentation/block/deadline-iosched.txt
- */
-static const int read_expire = HZ / 2; /* max time before a read is submitted. */
-static const int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */
-static const int writes_starved = 2; /* max times reads can starve a write */
-static const int fifo_batch = 16; /* # of sequential requests treated as one
- by the above parameters. For throughput. */
-
-struct deadline_data {
- /*
- * run time data
- */
-
- /*
- * requests (deadline_rq s) are present on both sort_list and fifo_list
- */
- struct rb_root sort_list[2];
- struct list_head fifo_list[2];
-
- /*
- * next in sort order. read, write or both are NULL
- */
- struct request *next_rq[2];
- unsigned int batching; /* number of sequential requests made */
- unsigned int starved; /* times reads have starved writes */
-
- /*
- * settings that change how the i/o scheduler behaves
- */
- int fifo_expire[2];
- int fifo_batch;
- int writes_starved;
- int front_merges;
-};
-
-static inline struct rb_root *
-deadline_rb_root(struct deadline_data *dd, struct request *rq)
-{
- return &dd->sort_list[rq_data_dir(rq)];
-}
-
-/*
- * get the request after `rq' in sector-sorted order
- */
-static inline struct request *
-deadline_latter_request(struct request *rq)
-{
- struct rb_node *node = rb_next(&rq->rb_node);
-
- if (node)
- return rb_entry_rq(node);
-
- return NULL;
-}
-
-static void
-deadline_add_rq_rb(struct deadline_data *dd, struct request *rq)
-{
- struct rb_root *root = deadline_rb_root(dd, rq);
-
- elv_rb_add(root, rq);
-}
-
-static inline void
-deadline_del_rq_rb(struct deadline_data *dd, struct request *rq)
-{
- const int data_dir = rq_data_dir(rq);
-
- if (dd->next_rq[data_dir] == rq)
- dd->next_rq[data_dir] = deadline_latter_request(rq);
-
- elv_rb_del(deadline_rb_root(dd, rq), rq);
-}
-
-/*
- * add rq to rbtree and fifo
- */
-static void
-deadline_add_request(struct request_queue *q, struct request *rq)
-{
- struct deadline_data *dd = q->elevator->elevator_data;
- const int data_dir = rq_data_dir(rq);
-
- /*
- * This may be a requeue of a write request that has locked its
- * target zone. If it is the case, this releases the zone lock.
- */
- blk_req_zone_write_unlock(rq);
-
- deadline_add_rq_rb(dd, rq);
-
- /*
- * set expire time and add to fifo list
- */
- rq->fifo_time = jiffies + dd->fifo_expire[data_dir];
- list_add_tail(&rq->queuelist, &dd->fifo_list[data_dir]);
-}
-
-/*
- * remove rq from rbtree and fifo.
- */
-static void deadline_remove_request(struct request_queue *q, struct request *rq)
-{
- struct deadline_data *dd = q->elevator->elevator_data;
-
- rq_fifo_clear(rq);
- deadline_del_rq_rb(dd, rq);
-}
-
-static enum elv_merge
-deadline_merge(struct request_queue *q, struct request **req, struct bio *bio)
-{
- struct deadline_data *dd = q->elevator->elevator_data;
- struct request *__rq;
-
- /*
- * check for front merge
- */
- if (dd->front_merges) {
- sector_t sector = bio_end_sector(bio);
-
- __rq = elv_rb_find(&dd->sort_list[bio_data_dir(bio)], sector);
- if (__rq) {
- BUG_ON(sector != blk_rq_pos(__rq));
-
- if (elv_bio_merge_ok(__rq, bio)) {
- *req = __rq;
- return ELEVATOR_FRONT_MERGE;
- }
- }
- }
-
- return ELEVATOR_NO_MERGE;
-}
-
-static void deadline_merged_request(struct request_queue *q,
- struct request *req, enum elv_merge type)
-{
- struct deadline_data *dd = q->elevator->elevator_data;
-
- /*
- * if the merge was a front merge, we need to reposition request
- */
- if (type == ELEVATOR_FRONT_MERGE) {
- elv_rb_del(deadline_rb_root(dd, req), req);
- deadline_add_rq_rb(dd, req);
- }
-}
-
-static void
-deadline_merged_requests(struct request_queue *q, struct request *req,
- struct request *next)
-{
- /*
- * if next expires before rq, assign its expire time to rq
- * and move into next position (next will be deleted) in fifo
- */
- if (!list_empty(&req->queuelist) && !list_empty(&next->queuelist)) {
- if (time_before((unsigned long)next->fifo_time,
- (unsigned long)req->fifo_time)) {
- list_move(&req->queuelist, &next->queuelist);
- req->fifo_time = next->fifo_time;
- }
- }
-
- /*
- * kill knowledge of next, this one is a goner
- */
- deadline_remove_request(q, next);
-}
-
-/*
- * move request from sort list to dispatch queue.
- */
-static inline void
-deadline_move_to_dispatch(struct deadline_data *dd, struct request *rq)
-{
- struct request_queue *q = rq->q;
-
- /*
- * For a zoned block device, write requests must write lock their
- * target zone.
- */
- blk_req_zone_write_lock(rq);
-
- deadline_remove_request(q, rq);
- elv_dispatch_add_tail(q, rq);
-}
-
-/*
- * move an entry to dispatch queue
- */
-static void
-deadline_move_request(struct deadline_data *dd, struct request *rq)
-{
- const int data_dir = rq_data_dir(rq);
-
- dd->next_rq[READ] = NULL;
- dd->next_rq[WRITE] = NULL;
- dd->next_rq[data_dir] = deadline_latter_request(rq);
-
- /*
- * take it off the sort and fifo list, move
- * to dispatch queue
- */
- deadline_move_to_dispatch(dd, rq);
-}
-
-/*
- * deadline_check_fifo returns 0 if there are no expired requests on the fifo,
- * 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir])
- */
-static inline int deadline_check_fifo(struct deadline_data *dd, int ddir)
-{
- struct request *rq = rq_entry_fifo(dd->fifo_list[ddir].next);
-
- /*
- * rq is expired!
- */
- if (time_after_eq(jiffies, (unsigned long)rq->fifo_time))
- return 1;
-
- return 0;
-}
-
-/*
- * For the specified data direction, return the next request to dispatch using
- * arrival ordered lists.
- */
-static struct request *
-deadline_fifo_request(struct deadline_data *dd, int data_dir)
-{
- struct request *rq;
-
- if (WARN_ON_ONCE(data_dir != READ && data_dir != WRITE))
- return NULL;
-
- if (list_empty(&dd->fifo_list[data_dir]))
- return NULL;
-
- rq = rq_entry_fifo(dd->fifo_list[data_dir].next);
- if (data_dir == READ || !blk_queue_is_zoned(rq->q))
- return rq;
-
- /*
- * Look for a write request that can be dispatched, that is one with
- * an unlocked target zone.
- */
- list_for_each_entry(rq, &dd->fifo_list[WRITE], queuelist) {
- if (blk_req_can_dispatch_to_zone(rq))
- return rq;
- }
-
- return NULL;
-}
-
-/*
- * For the specified data direction, return the next request to dispatch using
- * sector position sorted lists.
- */
-static struct request *
-deadline_next_request(struct deadline_data *dd, int data_dir)
-{
- struct request *rq;
-
- if (WARN_ON_ONCE(data_dir != READ && data_dir != WRITE))
- return NULL;
-
- rq = dd->next_rq[data_dir];
- if (!rq)
- return NULL;
-
- if (data_dir == READ || !blk_queue_is_zoned(rq->q))
- return rq;
-
- /*
- * Look for a write request that can be dispatched, that is one with
- * an unlocked target zone.
- */
- while (rq) {
- if (blk_req_can_dispatch_to_zone(rq))
- return rq;
- rq = deadline_latter_request(rq);
- }
-
- return NULL;
-}
-
-/*
- * deadline_dispatch_requests selects the best request according to
- * read/write expire, fifo_batch, etc
- */
-static int deadline_dispatch_requests(struct request_queue *q, int force)
-{
- struct deadline_data *dd = q->elevator->elevator_data;
- const int reads = !list_empty(&dd->fifo_list[READ]);
- const int writes = !list_empty(&dd->fifo_list[WRITE]);
- struct request *rq, *next_rq;
- int data_dir;
-
- /*
- * batches are currently reads XOR writes
- */
- rq = deadline_next_request(dd, WRITE);
- if (!rq)
- rq = deadline_next_request(dd, READ);
-
- if (rq && dd->batching < dd->fifo_batch)
- /* we have a next request are still entitled to batch */
- goto dispatch_request;
-
- /*
- * at this point we are not running a batch. select the appropriate
- * data direction (read / write)
- */
-
- if (reads) {
- BUG_ON(RB_EMPTY_ROOT(&dd->sort_list[READ]));
-
- if (deadline_fifo_request(dd, WRITE) &&
- (dd->starved++ >= dd->writes_starved))
- goto dispatch_writes;
-
- data_dir = READ;
-
- goto dispatch_find_request;
- }
-
- /*
- * there are either no reads or writes have been starved
- */
-
- if (writes) {
-dispatch_writes:
- BUG_ON(RB_EMPTY_ROOT(&dd->sort_list[WRITE]));
-
- dd->starved = 0;
-
- data_dir = WRITE;
-
- goto dispatch_find_request;
- }
-
- return 0;
-
-dispatch_find_request:
- /*
- * we are not running a batch, find best request for selected data_dir
- */
- next_rq = deadline_next_request(dd, data_dir);
- if (deadline_check_fifo(dd, data_dir) || !next_rq) {
- /*
- * A deadline has expired, the last request was in the other
- * direction, or we have run out of higher-sectored requests.
- * Start again from the request with the earliest expiry time.
- */
- rq = deadline_fifo_request(dd, data_dir);
- } else {
- /*
- * The last req was the same dir and we have a next request in
- * sort order. No expired requests so continue on from here.
- */
- rq = next_rq;
- }
-
- /*
- * For a zoned block device, if we only have writes queued and none of
- * them can be dispatched, rq will be NULL.
- */
- if (!rq)
- return 0;
-
- dd->batching = 0;
-
-dispatch_request:
- /*
- * rq is the selected appropriate request.
- */
- dd->batching++;
- deadline_move_request(dd, rq);
-
- return 1;
-}
-
-/*
- * For zoned block devices, write unlock the target zone of completed
- * write requests.
- */
-static void
-deadline_completed_request(struct request_queue *q, struct request *rq)
-{
- blk_req_zone_write_unlock(rq);
-}
-
-static void deadline_exit_queue(struct elevator_queue *e)
-{
- struct deadline_data *dd = e->elevator_data;
-
- BUG_ON(!list_empty(&dd->fifo_list[READ]));
- BUG_ON(!list_empty(&dd->fifo_list[WRITE]));
-
- kfree(dd);
-}
-
-/*
- * initialize elevator private data (deadline_data).
- */
-static int deadline_init_queue(struct request_queue *q, struct elevator_type *e)
-{
- struct deadline_data *dd;
- struct elevator_queue *eq;
-
- eq = elevator_alloc(q, e);
- if (!eq)
- return -ENOMEM;
-
- dd = kzalloc_node(sizeof(*dd), GFP_KERNEL, q->node);
- if (!dd) {
- kobject_put(&eq->kobj);
- return -ENOMEM;
- }
- eq->elevator_data = dd;
-
- INIT_LIST_HEAD(&dd->fifo_list[READ]);
- INIT_LIST_HEAD(&dd->fifo_list[WRITE]);
- dd->sort_list[READ] = RB_ROOT;
- dd->sort_list[WRITE] = RB_ROOT;
- dd->fifo_expire[READ] = read_expire;
- dd->fifo_expire[WRITE] = write_expire;
- dd->writes_starved = writes_starved;
- dd->front_merges = 1;
- dd->fifo_batch = fifo_batch;
-
- spin_lock_irq(q->queue_lock);
- q->elevator = eq;
- spin_unlock_irq(q->queue_lock);
- return 0;
-}
-
-/*
- * sysfs parts below
- */
-
-static ssize_t
-deadline_var_show(int var, char *page)
-{
- return sprintf(page, "%d\n", var);
-}
-
-static void
-deadline_var_store(int *var, const char *page)
-{
- char *p = (char *) page;
-
- *var = simple_strtol(p, &p, 10);
-}
-
-#define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \
-static ssize_t __FUNC(struct elevator_queue *e, char *page) \
-{ \
- struct deadline_data *dd = e->elevator_data; \
- int __data = __VAR; \
- if (__CONV) \
- __data = jiffies_to_msecs(__data); \
- return deadline_var_show(__data, (page)); \
-}
-SHOW_FUNCTION(deadline_read_expire_show, dd->fifo_expire[READ], 1);
-SHOW_FUNCTION(deadline_write_expire_show, dd->fifo_expire[WRITE], 1);
-SHOW_FUNCTION(deadline_writes_starved_show, dd->writes_starved, 0);
-SHOW_FUNCTION(deadline_front_merges_show, dd->front_merges, 0);
-SHOW_FUNCTION(deadline_fifo_batch_show, dd->fifo_batch, 0);
-#undef SHOW_FUNCTION
-
-#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \
-static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \
-{ \
- struct deadline_data *dd = e->elevator_data; \
- int __data; \
- deadline_var_store(&__data, (page)); \
- if (__data < (MIN)) \
- __data = (MIN); \
- else if (__data > (MAX)) \
- __data = (MAX); \
- if (__CONV) \
- *(__PTR) = msecs_to_jiffies(__data); \
- else \
- *(__PTR) = __data; \
- return count; \
-}
-STORE_FUNCTION(deadline_read_expire_store, &dd->fifo_expire[READ], 0, INT_MAX, 1);
-STORE_FUNCTION(deadline_write_expire_store, &dd->fifo_expire[WRITE], 0, INT_MAX, 1);
-STORE_FUNCTION(deadline_writes_starved_store, &dd->writes_starved, INT_MIN, INT_MAX, 0);
-STORE_FUNCTION(deadline_front_merges_store, &dd->front_merges, 0, 1, 0);
-STORE_FUNCTION(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX, 0);
-#undef STORE_FUNCTION
-
-#define DD_ATTR(name) \
- __ATTR(name, 0644, deadline_##name##_show, deadline_##name##_store)
-
-static struct elv_fs_entry deadline_attrs[] = {
- DD_ATTR(read_expire),
- DD_ATTR(write_expire),
- DD_ATTR(writes_starved),
- DD_ATTR(front_merges),
- DD_ATTR(fifo_batch),
- __ATTR_NULL
-};
-
-static struct elevator_type iosched_deadline = {
- .ops.sq = {
- .elevator_merge_fn = deadline_merge,
- .elevator_merged_fn = deadline_merged_request,
- .elevator_merge_req_fn = deadline_merged_requests,
- .elevator_dispatch_fn = deadline_dispatch_requests,
- .elevator_completed_req_fn = deadline_completed_request,
- .elevator_add_req_fn = deadline_add_request,
- .elevator_former_req_fn = elv_rb_former_request,
- .elevator_latter_req_fn = elv_rb_latter_request,
- .elevator_init_fn = deadline_init_queue,
- .elevator_exit_fn = deadline_exit_queue,
- },
-
- .elevator_attrs = deadline_attrs,
- .elevator_name = "deadline",
- .elevator_owner = THIS_MODULE,
-};
-
-static int __init deadline_init(void)
-{
- return elv_register(&iosched_deadline);
-}
-
-static void __exit deadline_exit(void)
-{
- elv_unregister(&iosched_deadline);
-}
-
-module_init(deadline_init);
-module_exit(deadline_exit);
-
-MODULE_AUTHOR("Jens Axboe");
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("deadline IO scheduler");
diff --git a/block/elevator.c b/block/elevator.c
index 8fdcd64ae12e..f05e90d4e695 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -61,10 +61,8 @@ static int elv_iosched_allow_bio_merge(struct request *rq, struct bio *bio)
struct request_queue *q = rq->q;
struct elevator_queue *e = q->elevator;
- if (e->uses_mq && e->type->ops.mq.allow_merge)
- return e->type->ops.mq.allow_merge(q, rq, bio);
- else if (!e->uses_mq && e->type->ops.sq.elevator_allow_bio_merge_fn)
- return e->type->ops.sq.elevator_allow_bio_merge_fn(q, rq, bio);
+ if (e->type->ops.allow_merge)
+ return e->type->ops.allow_merge(q, rq, bio);
return 1;
}
@@ -95,14 +93,14 @@ static bool elevator_match(const struct elevator_type *e, const char *name)
}
/*
- * Return scheduler with name 'name' and with matching 'mq capability
+ * Return scheduler with name 'name'
*/
-static struct elevator_type *elevator_find(const char *name, bool mq)
+static struct elevator_type *elevator_find(const char *name)
{
struct elevator_type *e;
list_for_each_entry(e, &elv_list, list) {
- if (elevator_match(e, name) && (mq == e->uses_mq))
+ if (elevator_match(e, name))
return e;
}
@@ -121,12 +119,12 @@ static struct elevator_type *elevator_get(struct request_queue *q,
spin_lock(&elv_list_lock);
- e = elevator_find(name, q->mq_ops != NULL);
+ e = elevator_find(name);
if (!e && try_loading) {
spin_unlock(&elv_list_lock);
request_module("%s-iosched", name);
spin_lock(&elv_list_lock);
- e = elevator_find(name, q->mq_ops != NULL);
+ e = elevator_find(name);
}
if (e && !try_module_get(e->elevator_owner))
@@ -150,26 +148,6 @@ static int __init elevator_setup(char *str)
__setup("elevator=", elevator_setup);
-/* called during boot to load the elevator chosen by the elevator param */
-void __init load_default_elevator_module(void)
-{
- struct elevator_type *e;
-
- if (!chosen_elevator[0])
- return;
-
- /*
- * Boot parameter is deprecated, we haven't supported that for MQ.
- * Only look for non-mq schedulers from here.
- */
- spin_lock(&elv_list_lock);
- e = elevator_find(chosen_elevator, false);
- spin_unlock(&elv_list_lock);
-
- if (!e)
- request_module("%s-iosched", chosen_elevator);
-}
-
static struct kobj_type elv_ktype;
struct elevator_queue *elevator_alloc(struct request_queue *q,
@@ -185,7 +163,6 @@ struct elevator_queue *elevator_alloc(struct request_queue *q,
kobject_init(&eq->kobj, &elv_ktype);
mutex_init(&eq->sysfs_lock);
hash_init(eq->hash);
- eq->uses_mq = e->uses_mq;
return eq;
}
@@ -200,54 +177,11 @@ static void elevator_release(struct kobject *kobj)
kfree(e);
}
-/*
- * Use the default elevator specified by config boot param for non-mq devices,
- * or by config option. Don't try to load modules as we could be running off
- * async and request_module() isn't allowed from async.
- */
-int elevator_init(struct request_queue *q)
-{
- struct elevator_type *e = NULL;
- int err = 0;
-
- /*
- * q->sysfs_lock must be held to provide mutual exclusion between
- * elevator_switch() and here.
- */
- mutex_lock(&q->sysfs_lock);
- if (unlikely(q->elevator))
- goto out_unlock;
-
- if (*chosen_elevator) {
- e = elevator_get(q, chosen_elevator, false);
- if (!e)
- printk(KERN_ERR "I/O scheduler %s not found\n",
- chosen_elevator);
- }
-
- if (!e)
- e = elevator_get(q, CONFIG_DEFAULT_IOSCHED, false);
- if (!e) {
- printk(KERN_ERR
- "Default I/O scheduler not found. Using noop.\n");
- e = elevator_get(q, "noop", false);
- }
-
- err = e->ops.sq.elevator_init_fn(q, e);
- if (err)
- elevator_put(e);
-out_unlock:
- mutex_unlock(&q->sysfs_lock);
- return err;
-}
-
void elevator_exit(struct request_queue *q, struct elevator_queue *e)
{
mutex_lock(&e->sysfs_lock);
- if (e->uses_mq && e->type->ops.mq.exit_sched)
+ if (e->type->ops.exit_sched)
blk_mq_exit_sched(q, e);
- else if (!e->uses_mq && e->type->ops.sq.elevator_exit_fn)
- e->type->ops.sq.elevator_exit_fn(e);
mutex_unlock(&e->sysfs_lock);
kobject_put(&e->kobj);
@@ -356,68 +290,6 @@ struct request *elv_rb_find(struct rb_root *root, sector_t sector)
}
EXPORT_SYMBOL(elv_rb_find);
-/*
- * Insert rq into dispatch queue of q. Queue lock must be held on
- * entry. rq is sort instead into the dispatch queue. To be used by
- * specific elevators.
- */
-void elv_dispatch_sort(struct request_queue *q, struct request *rq)
-{
- sector_t boundary;
- struct list_head *entry;
-
- if (q->last_merge == rq)
- q->last_merge = NULL;
-
- elv_rqhash_del(q, rq);
-
- q->nr_sorted--;
-
- boundary = q->end_sector;
- list_for_each_prev(entry, &q->queue_head) {
- struct request *pos = list_entry_rq(entry);
-
- if (req_op(rq) != req_op(pos))
- break;
- if (rq_data_dir(rq) != rq_data_dir(pos))
- break;
- if (pos->rq_flags & (RQF_STARTED | RQF_SOFTBARRIER))
- break;
- if (blk_rq_pos(rq) >= boundary) {
- if (blk_rq_pos(pos) < boundary)
- continue;
- } else {
- if (blk_rq_pos(pos) >= boundary)
- break;
- }
- if (blk_rq_pos(rq) >= blk_rq_pos(pos))
- break;
- }
-
- list_add(&rq->queuelist, entry);
-}
-EXPORT_SYMBOL(elv_dispatch_sort);
-
-/*
- * Insert rq into dispatch queue of q. Queue lock must be held on
- * entry. rq is added to the back of the dispatch queue. To be used by
- * specific elevators.
- */
-void elv_dispatch_add_tail(struct request_queue *q, struct request *rq)
-{
- if (q->last_merge == rq)
- q->last_merge = NULL;
-
- elv_rqhash_del(q, rq);
-
- q->nr_sorted--;
-
- q->end_sector = rq_end_sector(rq);
- q->boundary_rq = rq;
- list_add_tail(&rq->queuelist, &q->queue_head);
-}
-EXPORT_SYMBOL(elv_dispatch_add_tail);
-
enum elv_merge elv_merge(struct request_queue *q, struct request **req,
struct bio *bio)
{
@@ -457,10 +329,8 @@ enum elv_merge elv_merge(struct request_queue *q, struct request **req,
return ELEVATOR_BACK_MERGE;
}
- if (e->uses_mq && e->type->ops.mq.request_merge)
- return e->type->ops.mq.request_merge(q, req, bio);
- else if (!e->uses_mq && e->type->ops.sq.elevator_merge_fn)
- return e->type->ops.sq.elevator_merge_fn(q, req, bio);
+ if (e->type->ops.request_merge)
+ return e->type->ops.request_merge(q, req, bio);
return ELEVATOR_NO_MERGE;
}
@@ -511,10 +381,8 @@ void elv_merged_request(struct request_queue *q, struct request *rq,
{
struct elevator_queue *e = q->elevator;
- if (e->uses_mq && e->type->ops.mq.request_merged)
- e->type->ops.mq.request_merged(q, rq, type);
- else if (!e->uses_mq && e->type->ops.sq.elevator_merged_fn)
- e->type->ops.sq.elevator_merged_fn(q, rq, type);
+ if (e->type->ops.request_merged)
+ e->type->ops.request_merged(q, rq, type);
if (type == ELEVATOR_BACK_MERGE)
elv_rqhash_reposition(q, rq);
@@ -526,176 +394,20 @@ void elv_merge_requests(struct request_queue *q, struct request *rq,
struct request *next)
{
struct elevator_queue *e = q->elevator;
- bool next_sorted = false;
-
- if (e->uses_mq && e->type->ops.mq.requests_merged)
- e->type->ops.mq.requests_merged(q, rq, next);
- else if (e->type->ops.sq.elevator_merge_req_fn) {
- next_sorted = (__force bool)(next->rq_flags & RQF_SORTED);
- if (next_sorted)
- e->type->ops.sq.elevator_merge_req_fn(q, rq, next);
- }
- elv_rqhash_reposition(q, rq);
-
- if (next_sorted) {
- elv_rqhash_del(q, next);
- q->nr_sorted--;
- }
+ if (e->type->ops.requests_merged)
+ e->type->ops.requests_merged(q, rq, next);
+ elv_rqhash_reposition(q, rq);
q->last_merge = rq;
}
-void elv_bio_merged(struct request_queue *q, struct request *rq,
- struct bio *bio)
-{
- struct elevator_queue *e = q->elevator;
-
- if (WARN_ON_ONCE(e->uses_mq))
- return;
-
- if (e->type->ops.sq.elevator_bio_merged_fn)
- e->type->ops.sq.elevator_bio_merged_fn(q, rq, bio);
-}
-
-void elv_requeue_request(struct request_queue *q, struct request *rq)
-{
- /*
- * it already went through dequeue, we need to decrement the
- * in_flight count again
- */
- if (blk_account_rq(rq)) {
- q->in_flight[rq_is_sync(rq)]--;
- if (rq->rq_flags & RQF_SORTED)
- elv_deactivate_rq(q, rq);
- }
-
- rq->rq_flags &= ~RQF_STARTED;
-
- blk_pm_requeue_request(rq);
-
- __elv_add_request(q, rq, ELEVATOR_INSERT_REQUEUE);
-}
-
-void elv_drain_elevator(struct request_queue *q)
-{
- struct elevator_queue *e = q->elevator;
- static int printed;
-
- if (WARN_ON_ONCE(e->uses_mq))
- return;
-
- lockdep_assert_held(q->queue_lock);
-
- while (e->type->ops.sq.elevator_dispatch_fn(q, 1))
- ;
- if (q->nr_sorted && !blk_queue_is_zoned(q) && printed++ < 10 ) {
- printk(KERN_ERR "%s: forced dispatching is broken "
- "(nr_sorted=%u), please report this\n",
- q->elevator->type->elevator_name, q->nr_sorted);
- }
-}
-
-void __elv_add_request(struct request_queue *q, struct request *rq, int where)
-{
- trace_block_rq_insert(q, rq);
-
- blk_pm_add_request(q, rq);
-
- rq->q = q;
-
- if (rq->rq_flags & RQF_SOFTBARRIER) {
- /* barriers are scheduling boundary, update end_sector */
- if (!blk_rq_is_passthrough(rq)) {
- q->end_sector = rq_end_sector(rq);
- q->boundary_rq = rq;
- }
- } else if (!(rq->rq_flags & RQF_ELVPRIV) &&
- (where == ELEVATOR_INSERT_SORT ||
- where == ELEVATOR_INSERT_SORT_MERGE))
- where = ELEVATOR_INSERT_BACK;
-
- switch (where) {
- case ELEVATOR_INSERT_REQUEUE:
- case ELEVATOR_INSERT_FRONT:
- rq->rq_flags |= RQF_SOFTBARRIER;
- list_add(&rq->queuelist, &q->queue_head);
- break;
-
- case ELEVATOR_INSERT_BACK:
- rq->rq_flags |= RQF_SOFTBARRIER;
- elv_drain_elevator(q);
- list_add_tail(&rq->queuelist, &q->queue_head);
- /*
- * We kick the queue here for the following reasons.
- * - The elevator might have returned NULL previously
- * to delay requests and returned them now. As the
- * queue wasn't empty before this request, ll_rw_blk
- * won't run the queue on return, resulting in hang.
- * - Usually, back inserted requests won't be merged
- * with anything. There's no point in delaying queue
- * processing.
- */
- __blk_run_queue(q);
- break;
-
- case ELEVATOR_INSERT_SORT_MERGE:
- /*
- * If we succeed in merging this request with one in the
- * queue already, we are done - rq has now been freed,
- * so no need to do anything further.
- */
- if (elv_attempt_insert_merge(q, rq))
- break;
- /* fall through */
- case ELEVATOR_INSERT_SORT:
- BUG_ON(blk_rq_is_passthrough(rq));
- rq->rq_flags |= RQF_SORTED;
- q->nr_sorted++;
- if (rq_mergeable(rq)) {
- elv_rqhash_add(q, rq);
- if (!q->last_merge)
- q->last_merge = rq;
- }
-
- /*
- * Some ioscheds (cfq) run q->request_fn directly, so
- * rq cannot be accessed after calling
- * elevator_add_req_fn.
- */
- q->elevator->type->ops.sq.elevator_add_req_fn(q, rq);
- break;
-
- case ELEVATOR_INSERT_FLUSH:
- rq->rq_flags |= RQF_SOFTBARRIER;
- blk_insert_flush(rq);
- break;
- default:
- printk(KERN_ERR "%s: bad insertion point %d\n",
- __func__, where);
- BUG();
- }
-}
-EXPORT_SYMBOL(__elv_add_request);
-
-void elv_add_request(struct request_queue *q, struct request *rq, int where)
-{
- unsigned long flags;
-
- spin_lock_irqsave(q->queue_lock, flags);
- __elv_add_request(q, rq, where);
- spin_unlock_irqrestore(q->queue_lock, flags);
-}
-EXPORT_SYMBOL(elv_add_request);
-
struct request *elv_latter_request(struct request_queue *q, struct request *rq)
{
struct elevator_queue *e = q->elevator;
- if (e->uses_mq && e->type->ops.mq.next_request)
- return e->type->ops.mq.next_request(q, rq);
- else if (!e->uses_mq && e->type->ops.sq.elevator_latter_req_fn)
- return e->type->ops.sq.elevator_latter_req_fn(q, rq);
+ if (e->type->ops.next_request)
+ return e->type->ops.next_request(q, rq);
return NULL;
}
@@ -704,66 +416,10 @@ struct request *elv_former_request(struct request_queue *q, struct request *rq)
{
struct elevator_queue *e = q->elevator;
- if (e->uses_mq && e->type->ops.mq.former_request)
- return e->type->ops.mq.former_request(q, rq);
- if (!e->uses_mq && e->type->ops.sq.elevator_former_req_fn)
- return e->type->ops.sq.elevator_former_req_fn(q, rq);
- return NULL;
-}
-
-int elv_set_request(struct request_queue *q, struct request *rq,
- struct bio *bio, gfp_t gfp_mask)
-{
- struct elevator_queue *e = q->elevator;
-
- if (WARN_ON_ONCE(e->uses_mq))
- return 0;
-
- if (e->type->ops.sq.elevator_set_req_fn)
- return e->type->ops.sq.elevator_set_req_fn(q, rq, bio, gfp_mask);
- return 0;
-}
-
-void elv_put_request(struct request_queue *q, struct request *rq)
-{
- struct elevator_queue *e = q->elevator;
-
- if (WARN_ON_ONCE(e->uses_mq))
- return;
-
- if (e->type->ops.sq.elevator_put_req_fn)
- e->type->ops.sq.elevator_put_req_fn(rq);
-}
-
-int elv_may_queue(struct request_queue *q, unsigned int op)
-{
- struct elevator_queue *e = q->elevator;
-
- if (WARN_ON_ONCE(e->uses_mq))
- return 0;
-
- if (e->type->ops.sq.elevator_may_queue_fn)
- return e->type->ops.sq.elevator_may_queue_fn(q, op);
-
- return ELV_MQUEUE_MAY;
-}
-
-void elv_completed_request(struct request_queue *q, struct request *rq)
-{
- struct elevator_queue *e = q->elevator;
-
- if (WARN_ON_ONCE(e->uses_mq))
- return;
+ if (e->type->ops.former_request)
+ return e->type->ops.former_request(q, rq);
- /*
- * request is released from the driver, io must be done
- */
- if (blk_account_rq(rq)) {
- q->in_flight[rq_is_sync(rq)]--;
- if ((rq->rq_flags & RQF_SORTED) &&
- e->type->ops.sq.elevator_completed_req_fn)
- e->type->ops.sq.elevator_completed_req_fn(q, rq);
- }
+ return NULL;
}
#define to_elv(atr) container_of((atr), struct elv_fs_entry, attr)
@@ -832,8 +488,6 @@ int elv_register_queue(struct request_queue *q)
}
kobject_uevent(&e->kobj, KOBJ_ADD);
e->registered = 1;
- if (!e->uses_mq && e->type->ops.sq.elevator_registered_fn)
- e->type->ops.sq.elevator_registered_fn(q);
}
return error;
}
@@ -873,7 +527,7 @@ int elv_register(struct elevator_type *e)
/* register, don't allow duplicate names */
spin_lock(&elv_list_lock);
- if (elevator_find(e->elevator_name, e->uses_mq)) {
+ if (elevator_find(e->elevator_name)) {
spin_unlock(&elv_list_lock);
kmem_cache_destroy(e->icq_cache);
return -EBUSY;
@@ -881,12 +535,6 @@ int elv_register(struct elevator_type *e)
list_add_tail(&e->list, &elv_list);
spin_unlock(&elv_list_lock);
- /* print pretty message */
- if (elevator_match(e, chosen_elevator) ||
- (!*chosen_elevator &&
- elevator_match(e, CONFIG_DEFAULT_IOSCHED)))
- def = " (default)";
-
printk(KERN_INFO "io scheduler %s registered%s\n", e->elevator_name,
def);
return 0;
@@ -989,71 +637,17 @@ out_unlock:
*/
static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
{
- struct elevator_queue *old = q->elevator;
- bool old_registered = false;
int err;
lockdep_assert_held(&q->sysfs_lock);
- if (q->mq_ops) {
- blk_mq_freeze_queue(q);
- blk_mq_quiesce_queue(q);
-
- err = elevator_switch_mq(q, new_e);
-
- blk_mq_unquiesce_queue(q);
- blk_mq_unfreeze_queue(q);
-
- return err;
- }
-
- /*
- * Turn on BYPASS and drain all requests w/ elevator private data.
- * Block layer doesn't call into a quiesced elevator - all requests
- * are directly put on the dispatch list without elevator data
- * using INSERT_BACK. All requests have SOFTBARRIER set and no
- * merge happens either.
- */
- if (old) {
- old_registered = old->registered;
-
- blk_queue_bypass_start(q);
-
- /* unregister and clear all auxiliary data of the old elevator */
- if (old_registered)
- elv_unregister_queue(q);
-
- ioc_clear_queue(q);
- }
+ blk_mq_freeze_queue(q);
+ blk_mq_quiesce_queue(q);
- /* allocate, init and register new elevator */
- err = new_e->ops.sq.elevator_init_fn(q, new_e);
- if (err)
- goto fail_init;
-
- err = elv_register_queue(q);
- if (err)
- goto fail_register;
-
- /* done, kill the old one and finish */
- if (old) {
- elevator_exit(q, old);
- blk_queue_bypass_end(q);
- }
+ err = elevator_switch_mq(q, new_e);
- blk_add_trace_msg(q, "elv switch: %s", new_e->elevator_name);
-
- return 0;
-
-fail_register:
- elevator_exit(q, q->elevator);
-fail_init:
- /* switch failed, restore and re-register old elevator */
- if (old) {
- q->elevator = old;
- elv_register_queue(q);
- blk_queue_bypass_end(q);
- }
+ blk_mq_unquiesce_queue(q);
+ blk_mq_unfreeze_queue(q);
return err;
}
@@ -1073,7 +667,7 @@ static int __elevator_change(struct request_queue *q, const char *name)
/*
* Special case for mq, turn off scheduling
*/
- if (q->mq_ops && !strncmp(name, "none", 4))
+ if (!strncmp(name, "none", 4))
return elevator_switch(q, NULL);
strlcpy(elevator_name, name, sizeof(elevator_name));
@@ -1091,8 +685,7 @@ static int __elevator_change(struct request_queue *q, const char *name)
static inline bool elv_support_iosched(struct request_queue *q)
{
- if (q->mq_ops && q->tag_set && (q->tag_set->flags &
- BLK_MQ_F_NO_SCHED))
+ if (q->tag_set && (q->tag_set->flags & BLK_MQ_F_NO_SCHED))
return false;
return true;
}
@@ -1102,7 +695,7 @@ ssize_t elv_iosched_store(struct request_queue *q, const char *name,
{
int ret;
- if (!(q->mq_ops || q->request_fn) || !elv_support_iosched(q))
+ if (!queue_is_mq(q) || !elv_support_iosched(q))
return count;
ret = __elevator_change(q, name);
@@ -1117,10 +710,9 @@ ssize_t elv_iosched_show(struct request_queue *q, char *name)
struct elevator_queue *e = q->elevator;
struct elevator_type *elv = NULL;
struct elevator_type *__e;
- bool uses_mq = q->mq_ops != NULL;
int len = 0;
- if (!queue_is_rq_based(q))
+ if (!queue_is_mq(q))
return sprintf(name, "none\n");
if (!q->elevator)
@@ -1130,19 +722,16 @@ ssize_t elv_iosched_show(struct request_queue *q, char *name)
spin_lock(&elv_list_lock);
list_for_each_entry(__e, &elv_list, list) {
- if (elv && elevator_match(elv, __e->elevator_name) &&
- (__e->uses_mq == uses_mq)) {
+ if (elv && elevator_match(elv, __e->elevator_name)) {
len += sprintf(name+len, "[%s] ", elv->elevator_name);
continue;
}
- if (__e->uses_mq && q->mq_ops && elv_support_iosched(q))
- len += sprintf(name+len, "%s ", __e->elevator_name);
- else if (!__e->uses_mq && !q->mq_ops)
+ if (elv_support_iosched(q))
len += sprintf(name+len, "%s ", __e->elevator_name);
}
spin_unlock(&elv_list_lock);
- if (q->mq_ops && q->elevator)
+ if (q->elevator)
len += sprintf(name+len, "none");
len += sprintf(len+name, "\n");
diff --git a/block/genhd.c b/block/genhd.c
index cff6bdf27226..1dd8fd6613b8 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -47,51 +47,64 @@ static void disk_release_events(struct gendisk *disk);
void part_inc_in_flight(struct request_queue *q, struct hd_struct *part, int rw)
{
- if (q->mq_ops)
+ if (queue_is_mq(q))
return;
- atomic_inc(&part->in_flight[rw]);
+ part_stat_local_inc(part, in_flight[rw]);
if (part->partno)
- atomic_inc(&part_to_disk(part)->part0.in_flight[rw]);
+ part_stat_local_inc(&part_to_disk(part)->part0, in_flight[rw]);
}
void part_dec_in_flight(struct request_queue *q, struct hd_struct *part, int rw)
{
- if (q->mq_ops)
+ if (queue_is_mq(q))
return;
- atomic_dec(&part->in_flight[rw]);
+ part_stat_local_dec(part, in_flight[rw]);
if (part->partno)
- atomic_dec(&part_to_disk(part)->part0.in_flight[rw]);
+ part_stat_local_dec(&part_to_disk(part)->part0, in_flight[rw]);
}
-void part_in_flight(struct request_queue *q, struct hd_struct *part,
- unsigned int inflight[2])
+unsigned int part_in_flight(struct request_queue *q, struct hd_struct *part)
{
- if (q->mq_ops) {
- blk_mq_in_flight(q, part, inflight);
- return;
+ int cpu;
+ unsigned int inflight;
+
+ if (queue_is_mq(q)) {
+ return blk_mq_in_flight(q, part);
}
- inflight[0] = atomic_read(&part->in_flight[0]) +
- atomic_read(&part->in_flight[1]);
- if (part->partno) {
- part = &part_to_disk(part)->part0;
- inflight[1] = atomic_read(&part->in_flight[0]) +
- atomic_read(&part->in_flight[1]);
+ inflight = 0;
+ for_each_possible_cpu(cpu) {
+ inflight += part_stat_local_read_cpu(part, in_flight[0], cpu) +
+ part_stat_local_read_cpu(part, in_flight[1], cpu);
}
+ if ((int)inflight < 0)
+ inflight = 0;
+
+ return inflight;
}
void part_in_flight_rw(struct request_queue *q, struct hd_struct *part,
unsigned int inflight[2])
{
- if (q->mq_ops) {
+ int cpu;
+
+ if (queue_is_mq(q)) {
blk_mq_in_flight_rw(q, part, inflight);
return;
}
- inflight[0] = atomic_read(&part->in_flight[0]);
- inflight[1] = atomic_read(&part->in_flight[1]);
+ inflight[0] = 0;
+ inflight[1] = 0;
+ for_each_possible_cpu(cpu) {
+ inflight[0] += part_stat_local_read_cpu(part, in_flight[0], cpu);
+ inflight[1] += part_stat_local_read_cpu(part, in_flight[1], cpu);
+ }
+ if ((int)inflight[0] < 0)
+ inflight[0] = 0;
+ if ((int)inflight[1] < 0)
+ inflight[1] = 0;
}
struct hd_struct *__disk_get_part(struct gendisk *disk, int partno)
@@ -1325,8 +1338,7 @@ static int diskstats_show(struct seq_file *seqf, void *v)
struct disk_part_iter piter;
struct hd_struct *hd;
char buf[BDEVNAME_SIZE];
- unsigned int inflight[2];
- int cpu;
+ unsigned int inflight;
/*
if (&disk_to_dev(gp)->kobj.entry == block_class.devices.next)
@@ -1338,10 +1350,7 @@ static int diskstats_show(struct seq_file *seqf, void *v)
disk_part_iter_init(&piter, gp, DISK_PITER_INCL_EMPTY_PART0);
while ((hd = disk_part_iter_next(&piter))) {
- cpu = part_stat_lock();
- part_round_stats(gp->queue, cpu, hd);
- part_stat_unlock();
- part_in_flight(gp->queue, hd, inflight);
+ inflight = part_in_flight(gp->queue, hd);
seq_printf(seqf, "%4d %7d %s "
"%lu %lu %lu %u "
"%lu %lu %lu %u "
@@ -1357,7 +1366,7 @@ static int diskstats_show(struct seq_file *seqf, void *v)
part_stat_read(hd, merges[STAT_WRITE]),
part_stat_read(hd, sectors[STAT_WRITE]),
(unsigned int)part_stat_read_msecs(hd, STAT_WRITE),
- inflight[0],
+ inflight,
jiffies_to_msecs(part_stat_read(hd, io_ticks)),
jiffies_to_msecs(part_stat_read(hd, time_in_queue)),
part_stat_read(hd, ios[STAT_DISCARD]),
diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c
index eccac01a10b6..ec6a04e01bc1 100644
--- a/block/kyber-iosched.c
+++ b/block/kyber-iosched.c
@@ -195,7 +195,7 @@ struct kyber_hctx_data {
unsigned int batching;
struct kyber_ctx_queue *kcqs;
struct sbitmap kcq_map[KYBER_NUM_DOMAINS];
- wait_queue_entry_t domain_wait[KYBER_NUM_DOMAINS];
+ struct sbq_wait domain_wait[KYBER_NUM_DOMAINS];
struct sbq_wait_state *domain_ws[KYBER_NUM_DOMAINS];
atomic_t wait_index[KYBER_NUM_DOMAINS];
};
@@ -501,10 +501,11 @@ static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
INIT_LIST_HEAD(&khd->rqs[i]);
- init_waitqueue_func_entry(&khd->domain_wait[i],
+ khd->domain_wait[i].sbq = NULL;
+ init_waitqueue_func_entry(&khd->domain_wait[i].wait,
kyber_domain_wake);
- khd->domain_wait[i].private = hctx;
- INIT_LIST_HEAD(&khd->domain_wait[i].entry);
+ khd->domain_wait[i].wait.private = hctx;
+ INIT_LIST_HEAD(&khd->domain_wait[i].wait.entry);
atomic_set(&khd->wait_index[i], 0);
}
@@ -576,7 +577,7 @@ static bool kyber_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio)
{
struct kyber_hctx_data *khd = hctx->sched_data;
struct blk_mq_ctx *ctx = blk_mq_get_ctx(hctx->queue);
- struct kyber_ctx_queue *kcq = &khd->kcqs[ctx->index_hw];
+ struct kyber_ctx_queue *kcq = &khd->kcqs[ctx->index_hw[hctx->type]];
unsigned int sched_domain = kyber_sched_domain(bio->bi_opf);
struct list_head *rq_list = &kcq->rq_list[sched_domain];
bool merged;
@@ -602,7 +603,7 @@ static void kyber_insert_requests(struct blk_mq_hw_ctx *hctx,
list_for_each_entry_safe(rq, next, rq_list, queuelist) {
unsigned int sched_domain = kyber_sched_domain(rq->cmd_flags);
- struct kyber_ctx_queue *kcq = &khd->kcqs[rq->mq_ctx->index_hw];
+ struct kyber_ctx_queue *kcq = &khd->kcqs[rq->mq_ctx->index_hw[hctx->type]];
struct list_head *head = &kcq->rq_list[sched_domain];
spin_lock(&kcq->lock);
@@ -611,7 +612,7 @@ static void kyber_insert_requests(struct blk_mq_hw_ctx *hctx,
else
list_move_tail(&rq->queuelist, head);
sbitmap_set_bit(&khd->kcq_map[sched_domain],
- rq->mq_ctx->index_hw);
+ rq->mq_ctx->index_hw[hctx->type]);
blk_mq_sched_request_inserted(rq);
spin_unlock(&kcq->lock);
}
@@ -698,12 +699,13 @@ static void kyber_flush_busy_kcqs(struct kyber_hctx_data *khd,
flush_busy_kcq, &data);
}
-static int kyber_domain_wake(wait_queue_entry_t *wait, unsigned mode, int flags,
+static int kyber_domain_wake(wait_queue_entry_t *wqe, unsigned mode, int flags,
void *key)
{
- struct blk_mq_hw_ctx *hctx = READ_ONCE(wait->private);
+ struct blk_mq_hw_ctx *hctx = READ_ONCE(wqe->private);
+ struct sbq_wait *wait = container_of(wqe, struct sbq_wait, wait);
- list_del_init(&wait->entry);
+ sbitmap_del_wait_queue(wait);
blk_mq_run_hw_queue(hctx, true);
return 1;
}
@@ -714,7 +716,7 @@ static int kyber_get_domain_token(struct kyber_queue_data *kqd,
{
unsigned int sched_domain = khd->cur_domain;
struct sbitmap_queue *domain_tokens = &kqd->domain_tokens[sched_domain];
- wait_queue_entry_t *wait = &khd->domain_wait[sched_domain];
+ struct sbq_wait *wait = &khd->domain_wait[sched_domain];
struct sbq_wait_state *ws;
int nr;
@@ -725,11 +727,11 @@ static int kyber_get_domain_token(struct kyber_queue_data *kqd,
* run when one becomes available. Note that this is serialized on
* khd->lock, but we still need to be careful about the waker.
*/
- if (nr < 0 && list_empty_careful(&wait->entry)) {
+ if (nr < 0 && list_empty_careful(&wait->wait.entry)) {
ws = sbq_wait_ptr(domain_tokens,
&khd->wait_index[sched_domain]);
khd->domain_ws[sched_domain] = ws;
- add_wait_queue(&ws->wait, wait);
+ sbitmap_add_wait_queue(domain_tokens, ws, wait);
/*
* Try again in case a token was freed before we got on the wait
@@ -745,10 +747,10 @@ static int kyber_get_domain_token(struct kyber_queue_data *kqd,
* between the !list_empty_careful() check and us grabbing the lock, but
* list_del_init() is okay with that.
*/
- if (nr >= 0 && !list_empty_careful(&wait->entry)) {
+ if (nr >= 0 && !list_empty_careful(&wait->wait.entry)) {
ws = khd->domain_ws[sched_domain];
spin_lock_irq(&ws->wait.lock);
- list_del_init(&wait->entry);
+ sbitmap_del_wait_queue(wait);
spin_unlock_irq(&ws->wait.lock);
}
@@ -951,7 +953,7 @@ static int kyber_##name##_waiting_show(void *data, struct seq_file *m) \
{ \
struct blk_mq_hw_ctx *hctx = data; \
struct kyber_hctx_data *khd = hctx->sched_data; \
- wait_queue_entry_t *wait = &khd->domain_wait[domain]; \
+ wait_queue_entry_t *wait = &khd->domain_wait[domain].wait; \
\
seq_printf(m, "%d\n", !list_empty_careful(&wait->entry)); \
return 0; \
@@ -1017,7 +1019,7 @@ static const struct blk_mq_debugfs_attr kyber_hctx_debugfs_attrs[] = {
#endif
static struct elevator_type kyber_sched = {
- .ops.mq = {
+ .ops = {
.init_sched = kyber_init_sched,
.exit_sched = kyber_exit_sched,
.init_hctx = kyber_init_hctx,
@@ -1032,7 +1034,6 @@ static struct elevator_type kyber_sched = {
.dispatch_request = kyber_dispatch_request,
.has_work = kyber_has_work,
},
- .uses_mq = true,
#ifdef CONFIG_BLK_DEBUG_FS
.queue_debugfs_attrs = kyber_queue_debugfs_attrs,
.hctx_debugfs_attrs = kyber_hctx_debugfs_attrs,
diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index 099a9e05854c..14288f864e94 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -373,9 +373,16 @@ done:
/*
* One confusing aspect here is that we get called for a specific
- * hardware queue, but we return a request that may not be for a
+ * hardware queue, but we may return a request that is for a
* different hardware queue. This is because mq-deadline has shared
* state for all hardware queues, in terms of sorting, FIFOs, etc.
+ *
+ * For a zoned block device, __dd_dispatch_request() may return NULL
+ * if all the queued write requests are directed at zones that are already
+ * locked due to on-going write requests. In this case, make sure to mark
+ * the queue as needing a restart to ensure that the queue is run again
+ * and the pending writes dispatched once the target zones for the ongoing
+ * write requests are unlocked in dd_finish_request().
*/
static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx)
{
@@ -384,6 +391,9 @@ static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx)
spin_lock(&dd->lock);
rq = __dd_dispatch_request(dd);
+ if (!rq && blk_queue_is_zoned(hctx->queue) &&
+ !list_empty(&dd->fifo_list[WRITE]))
+ blk_mq_sched_mark_restart_hctx(hctx);
spin_unlock(&dd->lock);
return rq;
@@ -761,7 +771,7 @@ static const struct blk_mq_debugfs_attr deadline_queue_debugfs_attrs[] = {
#endif
static struct elevator_type mq_deadline = {
- .ops.mq = {
+ .ops = {
.insert_requests = dd_insert_requests,
.dispatch_request = dd_dispatch_request,
.prepare_request = dd_prepare_request,
@@ -777,7 +787,6 @@ static struct elevator_type mq_deadline = {
.exit_sched = dd_exit_queue,
},
- .uses_mq = true,
#ifdef CONFIG_BLK_DEBUG_FS
.queue_debugfs_attrs = deadline_queue_debugfs_attrs,
#endif
diff --git a/block/noop-iosched.c b/block/noop-iosched.c
deleted file mode 100644
index 2d1b15d89b45..000000000000
--- a/block/noop-iosched.c
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * elevator noop
- */
-#include <linux/blkdev.h>
-#include <linux/elevator.h>
-#include <linux/bio.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/init.h>
-
-struct noop_data {
- struct list_head queue;
-};
-
-static void noop_merged_requests(struct request_queue *q, struct request *rq,
- struct request *next)
-{
- list_del_init(&next->queuelist);
-}
-
-static int noop_dispatch(struct request_queue *q, int force)
-{
- struct noop_data *nd = q->elevator->elevator_data;
- struct request *rq;
-
- rq = list_first_entry_or_null(&nd->queue, struct request, queuelist);
- if (rq) {
- list_del_init(&rq->queuelist);
- elv_dispatch_sort(q, rq);
- return 1;
- }
- return 0;
-}
-
-static void noop_add_request(struct request_queue *q, struct request *rq)
-{
- struct noop_data *nd = q->elevator->elevator_data;
-
- list_add_tail(&rq->queuelist, &nd->queue);
-}
-
-static struct request *
-noop_former_request(struct request_queue *q, struct request *rq)
-{
- struct noop_data *nd = q->elevator->elevator_data;
-
- if (rq->queuelist.prev == &nd->queue)
- return NULL;
- return list_prev_entry(rq, queuelist);
-}
-
-static struct request *
-noop_latter_request(struct request_queue *q, struct request *rq)
-{
- struct noop_data *nd = q->elevator->elevator_data;
-
- if (rq->queuelist.next == &nd->queue)
- return NULL;
- return list_next_entry(rq, queuelist);
-}
-
-static int noop_init_queue(struct request_queue *q, struct elevator_type *e)
-{
- struct noop_data *nd;
- struct elevator_queue *eq;
-
- eq = elevator_alloc(q, e);
- if (!eq)
- return -ENOMEM;
-
- nd = kmalloc_node(sizeof(*nd), GFP_KERNEL, q->node);
- if (!nd) {
- kobject_put(&eq->kobj);
- return -ENOMEM;
- }
- eq->elevator_data = nd;
-
- INIT_LIST_HEAD(&nd->queue);
-
- spin_lock_irq(q->queue_lock);
- q->elevator = eq;
- spin_unlock_irq(q->queue_lock);
- return 0;
-}
-
-static void noop_exit_queue(struct elevator_queue *e)
-{
- struct noop_data *nd = e->elevator_data;
-
- BUG_ON(!list_empty(&nd->queue));
- kfree(nd);
-}
-
-static struct elevator_type elevator_noop = {
- .ops.sq = {
- .elevator_merge_req_fn = noop_merged_requests,
- .elevator_dispatch_fn = noop_dispatch,
- .elevator_add_req_fn = noop_add_request,
- .elevator_former_req_fn = noop_former_request,
- .elevator_latter_req_fn = noop_latter_request,
- .elevator_init_fn = noop_init_queue,
- .elevator_exit_fn = noop_exit_queue,
- },
- .elevator_name = "noop",
- .elevator_owner = THIS_MODULE,
-};
-
-static int __init noop_init(void)
-{
- return elv_register(&elevator_noop);
-}
-
-static void __exit noop_exit(void)
-{
- elv_unregister(&elevator_noop);
-}
-
-module_init(noop_init);
-module_exit(noop_exit);
-
-
-MODULE_AUTHOR("Jens Axboe");
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("No-op IO scheduler");
diff --git a/block/partition-generic.c b/block/partition-generic.c
index d3d14e81fb12..8e596a8dff32 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -120,13 +120,9 @@ ssize_t part_stat_show(struct device *dev,
{
struct hd_struct *p = dev_to_part(dev);
struct request_queue *q = part_to_disk(p)->queue;
- unsigned int inflight[2];
- int cpu;
+ unsigned int inflight;
- cpu = part_stat_lock();
- part_round_stats(q, cpu, p);
- part_stat_unlock();
- part_in_flight(q, p, inflight);
+ inflight = part_in_flight(q, p);
return sprintf(buf,
"%8lu %8lu %8llu %8u "
"%8lu %8lu %8llu %8u "
@@ -141,7 +137,7 @@ ssize_t part_stat_show(struct device *dev,
part_stat_read(p, merges[STAT_WRITE]),
(unsigned long long)part_stat_read(p, sectors[STAT_WRITE]),
(unsigned int)part_stat_read_msecs(p, STAT_WRITE),
- inflight[0],
+ inflight,
jiffies_to_msecs(part_stat_read(p, io_ticks)),
jiffies_to_msecs(part_stat_read(p, time_in_queue)),
part_stat_read(p, ios[STAT_DISCARD]),
@@ -249,9 +245,10 @@ struct device_type part_type = {
.uevent = part_uevent,
};
-static void delete_partition_rcu_cb(struct rcu_head *head)
+static void delete_partition_work_fn(struct work_struct *work)
{
- struct hd_struct *part = container_of(head, struct hd_struct, rcu_head);
+ struct hd_struct *part = container_of(to_rcu_work(work), struct hd_struct,
+ rcu_work);
part->start_sect = 0;
part->nr_sects = 0;
@@ -262,7 +259,8 @@ static void delete_partition_rcu_cb(struct rcu_head *head)
void __delete_partition(struct percpu_ref *ref)
{
struct hd_struct *part = container_of(ref, struct hd_struct, ref);
- call_rcu(&part->rcu_head, delete_partition_rcu_cb);
+ INIT_RCU_WORK(&part->rcu_work, delete_partition_work_fn);
+ queue_rcu_work(system_wq, &part->rcu_work);
}
/*
diff --git a/drivers/acpi/nfit/Kconfig b/drivers/acpi/nfit/Kconfig
index f7c57e33499e..52eefd732cf2 100644
--- a/drivers/acpi/nfit/Kconfig
+++ b/drivers/acpi/nfit/Kconfig
@@ -13,3 +13,14 @@ config ACPI_NFIT
To compile this driver as a module, choose M here:
the module will be called nfit.
+
+config NFIT_SECURITY_DEBUG
+ bool "Enable debug for NVDIMM security commands"
+ depends on ACPI_NFIT
+ help
+ Some NVDIMM devices and controllers support encryption and
+ other security features. The payloads for the commands that
+ enable those features may contain sensitive clear-text
+ security material. Disable debug of those command payloads
+ by default. If you are a kernel developer actively working
+ on NVDIMM security enabling say Y, otherwise say N.
diff --git a/drivers/acpi/nfit/Makefile b/drivers/acpi/nfit/Makefile
index a407e769f103..751081c47886 100644
--- a/drivers/acpi/nfit/Makefile
+++ b/drivers/acpi/nfit/Makefile
@@ -1,3 +1,4 @@
obj-$(CONFIG_ACPI_NFIT) := nfit.o
nfit-y := core.o
+nfit-y += intel.o
nfit-$(CONFIG_X86_MCE) += mce.o
diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index 5912d30020c7..011d3db19c80 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -24,6 +24,7 @@
#include <linux/nd.h>
#include <asm/cacheflush.h>
#include <acpi/nfit.h>
+#include "intel.h"
#include "nfit.h"
#include "intel.h"
@@ -380,6 +381,16 @@ static u8 nfit_dsm_revid(unsigned family, unsigned func)
[NVDIMM_INTEL_QUERY_FWUPDATE] = 2,
[NVDIMM_INTEL_SET_THRESHOLD] = 2,
[NVDIMM_INTEL_INJECT_ERROR] = 2,
+ [NVDIMM_INTEL_GET_SECURITY_STATE] = 2,
+ [NVDIMM_INTEL_SET_PASSPHRASE] = 2,
+ [NVDIMM_INTEL_DISABLE_PASSPHRASE] = 2,
+ [NVDIMM_INTEL_UNLOCK_UNIT] = 2,
+ [NVDIMM_INTEL_FREEZE_LOCK] = 2,
+ [NVDIMM_INTEL_SECURE_ERASE] = 2,
+ [NVDIMM_INTEL_OVERWRITE] = 2,
+ [NVDIMM_INTEL_QUERY_OVERWRITE] = 2,
+ [NVDIMM_INTEL_SET_MASTER_PASSPHRASE] = 2,
+ [NVDIMM_INTEL_MASTER_SECURE_ERASE] = 2,
},
};
u8 id;
@@ -394,6 +405,17 @@ static u8 nfit_dsm_revid(unsigned family, unsigned func)
return id;
}
+static bool payload_dumpable(struct nvdimm *nvdimm, unsigned int func)
+{
+ struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
+
+ if (nfit_mem && nfit_mem->family == NVDIMM_FAMILY_INTEL
+ && func >= NVDIMM_INTEL_GET_SECURITY_STATE
+ && func <= NVDIMM_INTEL_MASTER_SECURE_ERASE)
+ return IS_ENABLED(CONFIG_NFIT_SECURITY_DEBUG);
+ return true;
+}
+
int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
unsigned int cmd, void *buf, unsigned int buf_len, int *cmd_rc)
{
@@ -478,9 +500,10 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
dev_dbg(dev, "%s cmd: %d: func: %d input length: %d\n",
dimm_name, cmd, func, in_buf.buffer.length);
- print_hex_dump_debug("nvdimm in ", DUMP_PREFIX_OFFSET, 4, 4,
- in_buf.buffer.pointer,
- min_t(u32, 256, in_buf.buffer.length), true);
+ if (payload_dumpable(nvdimm, func))
+ print_hex_dump_debug("nvdimm in ", DUMP_PREFIX_OFFSET, 4, 4,
+ in_buf.buffer.pointer,
+ min_t(u32, 256, in_buf.buffer.length), true);
/* call the BIOS, prefer the named methods over _DSM if available */
if (nvdimm && cmd == ND_CMD_GET_CONFIG_SIZE
@@ -1573,18 +1596,10 @@ static DEVICE_ATTR_RO(flags);
static ssize_t id_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
- struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
+ struct nvdimm *nvdimm = to_nvdimm(dev);
+ struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
- if (dcr->valid_fields & ACPI_NFIT_CONTROL_MFG_INFO_VALID)
- return sprintf(buf, "%04x-%02x-%04x-%08x\n",
- be16_to_cpu(dcr->vendor_id),
- dcr->manufacturing_location,
- be16_to_cpu(dcr->manufacturing_date),
- be32_to_cpu(dcr->serial_number));
- else
- return sprintf(buf, "%04x-%08x\n",
- be16_to_cpu(dcr->vendor_id),
- be32_to_cpu(dcr->serial_number));
+ return sprintf(buf, "%s\n", nfit_mem->id);
}
static DEVICE_ATTR_RO(id);
@@ -1780,10 +1795,23 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
const guid_t *guid;
int i;
int family = -1;
+ struct acpi_nfit_control_region *dcr = nfit_mem->dcr;
/* nfit test assumes 1:1 relationship between commands and dsms */
nfit_mem->dsm_mask = acpi_desc->dimm_cmd_force_en;
nfit_mem->family = NVDIMM_FAMILY_INTEL;
+
+ if (dcr->valid_fields & ACPI_NFIT_CONTROL_MFG_INFO_VALID)
+ sprintf(nfit_mem->id, "%04x-%02x-%04x-%08x",
+ be16_to_cpu(dcr->vendor_id),
+ dcr->manufacturing_location,
+ be16_to_cpu(dcr->manufacturing_date),
+ be32_to_cpu(dcr->serial_number));
+ else
+ sprintf(nfit_mem->id, "%04x-%08x",
+ be16_to_cpu(dcr->vendor_id),
+ be32_to_cpu(dcr->serial_number));
+
adev = to_acpi_dev(acpi_desc);
if (!adev) {
/* unit test case */
@@ -1904,6 +1932,16 @@ static void shutdown_dimm_notify(void *data)
mutex_unlock(&acpi_desc->init_mutex);
}
+static const struct nvdimm_security_ops *acpi_nfit_get_security_ops(int family)
+{
+ switch (family) {
+ case NVDIMM_FAMILY_INTEL:
+ return intel_security_ops;
+ default:
+ return NULL;
+ }
+}
+
static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
{
struct nfit_mem *nfit_mem;
@@ -1970,10 +2008,11 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
flush = nfit_mem->nfit_flush ? nfit_mem->nfit_flush->flush
: NULL;
- nvdimm = nvdimm_create(acpi_desc->nvdimm_bus, nfit_mem,
+ nvdimm = __nvdimm_create(acpi_desc->nvdimm_bus, nfit_mem,
acpi_nfit_dimm_attribute_groups,
flags, cmd_mask, flush ? flush->hint_count : 0,
- nfit_mem->flush_wpq);
+ nfit_mem->flush_wpq, &nfit_mem->id[0],
+ acpi_nfit_get_security_ops(nfit_mem->family));
if (!nvdimm)
return -ENOMEM;
@@ -2008,6 +2047,11 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
if (!nvdimm)
continue;
+ rc = nvdimm_security_setup_events(nvdimm);
+ if (rc < 0)
+ dev_warn(acpi_desc->dev,
+ "security event setup failed: %d\n", rc);
+
nfit_kernfs = sysfs_get_dirent(nvdimm_kobj(nvdimm)->sd, "nfit");
if (nfit_kernfs)
nfit_mem->flags_attr = sysfs_get_dirent(nfit_kernfs,
@@ -3337,7 +3381,7 @@ static int acpi_nfit_flush_probe(struct nvdimm_bus_descriptor *nd_desc)
return 0;
}
-static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc,
+static int __acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc,
struct nvdimm *nvdimm, unsigned int cmd)
{
struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc);
@@ -3359,6 +3403,23 @@ static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc,
return 0;
}
+/* prevent security commands from being issued via ioctl */
+static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc,
+ struct nvdimm *nvdimm, unsigned int cmd, void *buf)
+{
+ struct nd_cmd_pkg *call_pkg = buf;
+ unsigned int func;
+
+ if (nvdimm && cmd == ND_CMD_CALL &&
+ call_pkg->nd_family == NVDIMM_FAMILY_INTEL) {
+ func = call_pkg->nd_command;
+ if ((1 << func) & NVDIMM_INTEL_SECURITY_CMDMASK)
+ return -EOPNOTSUPP;
+ }
+
+ return __acpi_nfit_clear_to_send(nd_desc, nvdimm, cmd);
+}
+
int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc,
enum nfit_ars_state req_type)
{
@@ -3474,7 +3535,13 @@ static int acpi_nfit_add(struct acpi_device *adev)
status = acpi_get_table(ACPI_SIG_NFIT, 0, &tbl);
if (ACPI_FAILURE(status)) {
- /* This is ok, we could have an nvdimm hotplugged later */
+ /* The NVDIMM root device allows OS to trigger enumeration of
+ * NVDIMMs through NFIT at boot time and re-enumeration at
+ * root level via the _FIT method during runtime.
+ * This is ok to return 0 here, we could have an nvdimm
+ * hotplugged later and evaluate _FIT method which returns
+ * data in the format of a series of NFIT Structures.
+ */
dev_dbg(dev, "failed to find NFIT at startup\n");
return 0;
}
diff --git a/drivers/acpi/nfit/intel.c b/drivers/acpi/nfit/intel.c
new file mode 100644
index 000000000000..850b2927b4e7
--- /dev/null
+++ b/drivers/acpi/nfit/intel.c
@@ -0,0 +1,388 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2018 Intel Corporation. All rights reserved. */
+#include <linux/libnvdimm.h>
+#include <linux/ndctl.h>
+#include <linux/acpi.h>
+#include <asm/smp.h>
+#include "intel.h"
+#include "nfit.h"
+
+static enum nvdimm_security_state intel_security_state(struct nvdimm *nvdimm,
+ enum nvdimm_passphrase_type ptype)
+{
+ struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
+ struct {
+ struct nd_cmd_pkg pkg;
+ struct nd_intel_get_security_state cmd;
+ } nd_cmd = {
+ .pkg = {
+ .nd_command = NVDIMM_INTEL_GET_SECURITY_STATE,
+ .nd_family = NVDIMM_FAMILY_INTEL,
+ .nd_size_out =
+ sizeof(struct nd_intel_get_security_state),
+ .nd_fw_size =
+ sizeof(struct nd_intel_get_security_state),
+ },
+ };
+ int rc;
+
+ if (!test_bit(NVDIMM_INTEL_GET_SECURITY_STATE, &nfit_mem->dsm_mask))
+ return -ENXIO;
+
+ /*
+ * Short circuit the state retrieval while we are doing overwrite.
+ * The DSM spec states that the security state is indeterminate
+ * until the overwrite DSM completes.
+ */
+ if (nvdimm_in_overwrite(nvdimm) && ptype == NVDIMM_USER)
+ return NVDIMM_SECURITY_OVERWRITE;
+
+ rc = nvdimm_ctl(nvdimm, ND_CMD_CALL, &nd_cmd, sizeof(nd_cmd), NULL);
+ if (rc < 0)
+ return rc;
+ if (nd_cmd.cmd.status)
+ return -EIO;
+
+ /* check and see if security is enabled and locked */
+ if (ptype == NVDIMM_MASTER) {
+ if (nd_cmd.cmd.extended_state & ND_INTEL_SEC_ESTATE_ENABLED)
+ return NVDIMM_SECURITY_UNLOCKED;
+ else if (nd_cmd.cmd.extended_state &
+ ND_INTEL_SEC_ESTATE_PLIMIT)
+ return NVDIMM_SECURITY_FROZEN;
+ } else {
+ if (nd_cmd.cmd.state & ND_INTEL_SEC_STATE_UNSUPPORTED)
+ return -ENXIO;
+ else if (nd_cmd.cmd.state & ND_INTEL_SEC_STATE_ENABLED) {
+ if (nd_cmd.cmd.state & ND_INTEL_SEC_STATE_LOCKED)
+ return NVDIMM_SECURITY_LOCKED;
+ else if (nd_cmd.cmd.state & ND_INTEL_SEC_STATE_FROZEN
+ || nd_cmd.cmd.state &
+ ND_INTEL_SEC_STATE_PLIMIT)
+ return NVDIMM_SECURITY_FROZEN;
+ else
+ return NVDIMM_SECURITY_UNLOCKED;
+ }
+ }
+
+ /* this should cover master security disabled as well */
+ return NVDIMM_SECURITY_DISABLED;
+}
+
+static int intel_security_freeze(struct nvdimm *nvdimm)
+{
+ struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
+ struct {
+ struct nd_cmd_pkg pkg;
+ struct nd_intel_freeze_lock cmd;
+ } nd_cmd = {
+ .pkg = {
+ .nd_command = NVDIMM_INTEL_FREEZE_LOCK,
+ .nd_family = NVDIMM_FAMILY_INTEL,
+ .nd_size_out = ND_INTEL_STATUS_SIZE,
+ .nd_fw_size = ND_INTEL_STATUS_SIZE,
+ },
+ };
+ int rc;
+
+ if (!test_bit(NVDIMM_INTEL_FREEZE_LOCK, &nfit_mem->dsm_mask))
+ return -ENOTTY;
+
+ rc = nvdimm_ctl(nvdimm, ND_CMD_CALL, &nd_cmd, sizeof(nd_cmd), NULL);
+ if (rc < 0)
+ return rc;
+ if (nd_cmd.cmd.status)
+ return -EIO;
+ return 0;
+}
+
+static int intel_security_change_key(struct nvdimm *nvdimm,
+ const struct nvdimm_key_data *old_data,
+ const struct nvdimm_key_data *new_data,
+ enum nvdimm_passphrase_type ptype)
+{
+ struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
+ unsigned int cmd = ptype == NVDIMM_MASTER ?
+ NVDIMM_INTEL_SET_MASTER_PASSPHRASE :
+ NVDIMM_INTEL_SET_PASSPHRASE;
+ struct {
+ struct nd_cmd_pkg pkg;
+ struct nd_intel_set_passphrase cmd;
+ } nd_cmd = {
+ .pkg = {
+ .nd_family = NVDIMM_FAMILY_INTEL,
+ .nd_size_in = ND_INTEL_PASSPHRASE_SIZE * 2,
+ .nd_size_out = ND_INTEL_STATUS_SIZE,
+ .nd_fw_size = ND_INTEL_STATUS_SIZE,
+ .nd_command = cmd,
+ },
+ };
+ int rc;
+
+ if (!test_bit(cmd, &nfit_mem->dsm_mask))
+ return -ENOTTY;
+
+ if (old_data)
+ memcpy(nd_cmd.cmd.old_pass, old_data->data,
+ sizeof(nd_cmd.cmd.old_pass));
+ memcpy(nd_cmd.cmd.new_pass, new_data->data,
+ sizeof(nd_cmd.cmd.new_pass));
+ rc = nvdimm_ctl(nvdimm, ND_CMD_CALL, &nd_cmd, sizeof(nd_cmd), NULL);
+ if (rc < 0)
+ return rc;
+
+ switch (nd_cmd.cmd.status) {
+ case 0:
+ return 0;
+ case ND_INTEL_STATUS_INVALID_PASS:
+ return -EINVAL;
+ case ND_INTEL_STATUS_NOT_SUPPORTED:
+ return -EOPNOTSUPP;
+ case ND_INTEL_STATUS_INVALID_STATE:
+ default:
+ return -EIO;
+ }
+}
+
+static void nvdimm_invalidate_cache(void);
+
+static int intel_security_unlock(struct nvdimm *nvdimm,
+ const struct nvdimm_key_data *key_data)
+{
+ struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
+ struct {
+ struct nd_cmd_pkg pkg;
+ struct nd_intel_unlock_unit cmd;
+ } nd_cmd = {
+ .pkg = {
+ .nd_command = NVDIMM_INTEL_UNLOCK_UNIT,
+ .nd_family = NVDIMM_FAMILY_INTEL,
+ .nd_size_in = ND_INTEL_PASSPHRASE_SIZE,
+ .nd_size_out = ND_INTEL_STATUS_SIZE,
+ .nd_fw_size = ND_INTEL_STATUS_SIZE,
+ },
+ };
+ int rc;
+
+ if (!test_bit(NVDIMM_INTEL_UNLOCK_UNIT, &nfit_mem->dsm_mask))
+ return -ENOTTY;
+
+ memcpy(nd_cmd.cmd.passphrase, key_data->data,
+ sizeof(nd_cmd.cmd.passphrase));
+ rc = nvdimm_ctl(nvdimm, ND_CMD_CALL, &nd_cmd, sizeof(nd_cmd), NULL);
+ if (rc < 0)
+ return rc;
+ switch (nd_cmd.cmd.status) {
+ case 0:
+ break;
+ case ND_INTEL_STATUS_INVALID_PASS:
+ return -EINVAL;
+ default:
+ return -EIO;
+ }
+
+ /* DIMM unlocked, invalidate all CPU caches before we read it */
+ nvdimm_invalidate_cache();
+
+ return 0;
+}
+
+static int intel_security_disable(struct nvdimm *nvdimm,
+ const struct nvdimm_key_data *key_data)
+{
+ int rc;
+ struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
+ struct {
+ struct nd_cmd_pkg pkg;
+ struct nd_intel_disable_passphrase cmd;
+ } nd_cmd = {
+ .pkg = {
+ .nd_command = NVDIMM_INTEL_DISABLE_PASSPHRASE,
+ .nd_family = NVDIMM_FAMILY_INTEL,
+ .nd_size_in = ND_INTEL_PASSPHRASE_SIZE,
+ .nd_size_out = ND_INTEL_STATUS_SIZE,
+ .nd_fw_size = ND_INTEL_STATUS_SIZE,
+ },
+ };
+
+ if (!test_bit(NVDIMM_INTEL_DISABLE_PASSPHRASE, &nfit_mem->dsm_mask))
+ return -ENOTTY;
+
+ memcpy(nd_cmd.cmd.passphrase, key_data->data,
+ sizeof(nd_cmd.cmd.passphrase));
+ rc = nvdimm_ctl(nvdimm, ND_CMD_CALL, &nd_cmd, sizeof(nd_cmd), NULL);
+ if (rc < 0)
+ return rc;
+
+ switch (nd_cmd.cmd.status) {
+ case 0:
+ break;
+ case ND_INTEL_STATUS_INVALID_PASS:
+ return -EINVAL;
+ case ND_INTEL_STATUS_INVALID_STATE:
+ default:
+ return -ENXIO;
+ }
+
+ return 0;
+}
+
+static int intel_security_erase(struct nvdimm *nvdimm,
+ const struct nvdimm_key_data *key,
+ enum nvdimm_passphrase_type ptype)
+{
+ int rc;
+ struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
+ unsigned int cmd = ptype == NVDIMM_MASTER ?
+ NVDIMM_INTEL_MASTER_SECURE_ERASE : NVDIMM_INTEL_SECURE_ERASE;
+ struct {
+ struct nd_cmd_pkg pkg;
+ struct nd_intel_secure_erase cmd;
+ } nd_cmd = {
+ .pkg = {
+ .nd_family = NVDIMM_FAMILY_INTEL,
+ .nd_size_in = ND_INTEL_PASSPHRASE_SIZE,
+ .nd_size_out = ND_INTEL_STATUS_SIZE,
+ .nd_fw_size = ND_INTEL_STATUS_SIZE,
+ .nd_command = cmd,
+ },
+ };
+
+ if (!test_bit(cmd, &nfit_mem->dsm_mask))
+ return -ENOTTY;
+
+ /* flush all cache before we erase DIMM */
+ nvdimm_invalidate_cache();
+ memcpy(nd_cmd.cmd.passphrase, key->data,
+ sizeof(nd_cmd.cmd.passphrase));
+ rc = nvdimm_ctl(nvdimm, ND_CMD_CALL, &nd_cmd, sizeof(nd_cmd), NULL);
+ if (rc < 0)
+ return rc;
+
+ switch (nd_cmd.cmd.status) {
+ case 0:
+ break;
+ case ND_INTEL_STATUS_NOT_SUPPORTED:
+ return -EOPNOTSUPP;
+ case ND_INTEL_STATUS_INVALID_PASS:
+ return -EINVAL;
+ case ND_INTEL_STATUS_INVALID_STATE:
+ default:
+ return -ENXIO;
+ }
+
+ /* DIMM erased, invalidate all CPU caches before we read it */
+ nvdimm_invalidate_cache();
+ return 0;
+}
+
+static int intel_security_query_overwrite(struct nvdimm *nvdimm)
+{
+ int rc;
+ struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
+ struct {
+ struct nd_cmd_pkg pkg;
+ struct nd_intel_query_overwrite cmd;
+ } nd_cmd = {
+ .pkg = {
+ .nd_command = NVDIMM_INTEL_QUERY_OVERWRITE,
+ .nd_family = NVDIMM_FAMILY_INTEL,
+ .nd_size_out = ND_INTEL_STATUS_SIZE,
+ .nd_fw_size = ND_INTEL_STATUS_SIZE,
+ },
+ };
+
+ if (!test_bit(NVDIMM_INTEL_QUERY_OVERWRITE, &nfit_mem->dsm_mask))
+ return -ENOTTY;
+
+ rc = nvdimm_ctl(nvdimm, ND_CMD_CALL, &nd_cmd, sizeof(nd_cmd), NULL);
+ if (rc < 0)
+ return rc;
+
+ switch (nd_cmd.cmd.status) {
+ case 0:
+ break;
+ case ND_INTEL_STATUS_OQUERY_INPROGRESS:
+ return -EBUSY;
+ default:
+ return -ENXIO;
+ }
+
+ /* flush all cache before we make the nvdimms available */
+ nvdimm_invalidate_cache();
+ return 0;
+}
+
+static int intel_security_overwrite(struct nvdimm *nvdimm,
+ const struct nvdimm_key_data *nkey)
+{
+ int rc;
+ struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
+ struct {
+ struct nd_cmd_pkg pkg;
+ struct nd_intel_overwrite cmd;
+ } nd_cmd = {
+ .pkg = {
+ .nd_command = NVDIMM_INTEL_OVERWRITE,
+ .nd_family = NVDIMM_FAMILY_INTEL,
+ .nd_size_in = ND_INTEL_PASSPHRASE_SIZE,
+ .nd_size_out = ND_INTEL_STATUS_SIZE,
+ .nd_fw_size = ND_INTEL_STATUS_SIZE,
+ },
+ };
+
+ if (!test_bit(NVDIMM_INTEL_OVERWRITE, &nfit_mem->dsm_mask))
+ return -ENOTTY;
+
+ /* flush all cache before we erase DIMM */
+ nvdimm_invalidate_cache();
+ if (nkey)
+ memcpy(nd_cmd.cmd.passphrase, nkey->data,
+ sizeof(nd_cmd.cmd.passphrase));
+ rc = nvdimm_ctl(nvdimm, ND_CMD_CALL, &nd_cmd, sizeof(nd_cmd), NULL);
+ if (rc < 0)
+ return rc;
+
+ switch (nd_cmd.cmd.status) {
+ case 0:
+ return 0;
+ case ND_INTEL_STATUS_OVERWRITE_UNSUPPORTED:
+ return -ENOTSUPP;
+ case ND_INTEL_STATUS_INVALID_PASS:
+ return -EINVAL;
+ case ND_INTEL_STATUS_INVALID_STATE:
+ default:
+ return -ENXIO;
+ }
+}
+
+/*
+ * TODO: define a cross arch wbinvd equivalent when/if
+ * NVDIMM_FAMILY_INTEL command support arrives on another arch.
+ */
+#ifdef CONFIG_X86
+static void nvdimm_invalidate_cache(void)
+{
+ wbinvd_on_all_cpus();
+}
+#else
+static void nvdimm_invalidate_cache(void)
+{
+ WARN_ON_ONCE("cache invalidation required after unlock\n");
+}
+#endif
+
+static const struct nvdimm_security_ops __intel_security_ops = {
+ .state = intel_security_state,
+ .freeze = intel_security_freeze,
+ .change_key = intel_security_change_key,
+ .disable = intel_security_disable,
+#ifdef CONFIG_X86
+ .unlock = intel_security_unlock,
+ .erase = intel_security_erase,
+ .overwrite = intel_security_overwrite,
+ .query_overwrite = intel_security_query_overwrite,
+#endif
+};
+
+const struct nvdimm_security_ops *intel_security_ops = &__intel_security_ops;
diff --git a/drivers/acpi/nfit/intel.h b/drivers/acpi/nfit/intel.h
index 86746312381f..0aca682ab9d7 100644
--- a/drivers/acpi/nfit/intel.h
+++ b/drivers/acpi/nfit/intel.h
@@ -35,4 +35,80 @@ struct nd_intel_smart {
};
} __packed;
+extern const struct nvdimm_security_ops *intel_security_ops;
+
+#define ND_INTEL_STATUS_SIZE 4
+#define ND_INTEL_PASSPHRASE_SIZE 32
+
+#define ND_INTEL_STATUS_NOT_SUPPORTED 1
+#define ND_INTEL_STATUS_RETRY 5
+#define ND_INTEL_STATUS_NOT_READY 9
+#define ND_INTEL_STATUS_INVALID_STATE 10
+#define ND_INTEL_STATUS_INVALID_PASS 11
+#define ND_INTEL_STATUS_OVERWRITE_UNSUPPORTED 0x10007
+#define ND_INTEL_STATUS_OQUERY_INPROGRESS 0x10007
+#define ND_INTEL_STATUS_OQUERY_SEQUENCE_ERR 0x20007
+
+#define ND_INTEL_SEC_STATE_ENABLED 0x02
+#define ND_INTEL_SEC_STATE_LOCKED 0x04
+#define ND_INTEL_SEC_STATE_FROZEN 0x08
+#define ND_INTEL_SEC_STATE_PLIMIT 0x10
+#define ND_INTEL_SEC_STATE_UNSUPPORTED 0x20
+#define ND_INTEL_SEC_STATE_OVERWRITE 0x40
+
+#define ND_INTEL_SEC_ESTATE_ENABLED 0x01
+#define ND_INTEL_SEC_ESTATE_PLIMIT 0x02
+
+struct nd_intel_get_security_state {
+ u32 status;
+ u8 extended_state;
+ u8 reserved[3];
+ u8 state;
+ u8 reserved1[3];
+} __packed;
+
+struct nd_intel_set_passphrase {
+ u8 old_pass[ND_INTEL_PASSPHRASE_SIZE];
+ u8 new_pass[ND_INTEL_PASSPHRASE_SIZE];
+ u32 status;
+} __packed;
+
+struct nd_intel_unlock_unit {
+ u8 passphrase[ND_INTEL_PASSPHRASE_SIZE];
+ u32 status;
+} __packed;
+
+struct nd_intel_disable_passphrase {
+ u8 passphrase[ND_INTEL_PASSPHRASE_SIZE];
+ u32 status;
+} __packed;
+
+struct nd_intel_freeze_lock {
+ u32 status;
+} __packed;
+
+struct nd_intel_secure_erase {
+ u8 passphrase[ND_INTEL_PASSPHRASE_SIZE];
+ u32 status;
+} __packed;
+
+struct nd_intel_overwrite {
+ u8 passphrase[ND_INTEL_PASSPHRASE_SIZE];
+ u32 status;
+} __packed;
+
+struct nd_intel_query_overwrite {
+ u32 status;
+} __packed;
+
+struct nd_intel_set_master_passphrase {
+ u8 old_pass[ND_INTEL_PASSPHRASE_SIZE];
+ u8 new_pass[ND_INTEL_PASSPHRASE_SIZE];
+ u32 status;
+} __packed;
+
+struct nd_intel_master_secure_erase {
+ u8 passphrase[ND_INTEL_PASSPHRASE_SIZE];
+ u32 status;
+} __packed;
#endif
diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h
index df0f6b8407e7..33691aecfcee 100644
--- a/drivers/acpi/nfit/nfit.h
+++ b/drivers/acpi/nfit/nfit.h
@@ -60,14 +60,33 @@ enum nvdimm_family_cmds {
NVDIMM_INTEL_QUERY_FWUPDATE = 16,
NVDIMM_INTEL_SET_THRESHOLD = 17,
NVDIMM_INTEL_INJECT_ERROR = 18,
+ NVDIMM_INTEL_GET_SECURITY_STATE = 19,
+ NVDIMM_INTEL_SET_PASSPHRASE = 20,
+ NVDIMM_INTEL_DISABLE_PASSPHRASE = 21,
+ NVDIMM_INTEL_UNLOCK_UNIT = 22,
+ NVDIMM_INTEL_FREEZE_LOCK = 23,
+ NVDIMM_INTEL_SECURE_ERASE = 24,
+ NVDIMM_INTEL_OVERWRITE = 25,
+ NVDIMM_INTEL_QUERY_OVERWRITE = 26,
+ NVDIMM_INTEL_SET_MASTER_PASSPHRASE = 27,
+ NVDIMM_INTEL_MASTER_SECURE_ERASE = 28,
};
+#define NVDIMM_INTEL_SECURITY_CMDMASK \
+(1 << NVDIMM_INTEL_GET_SECURITY_STATE | 1 << NVDIMM_INTEL_SET_PASSPHRASE \
+| 1 << NVDIMM_INTEL_DISABLE_PASSPHRASE | 1 << NVDIMM_INTEL_UNLOCK_UNIT \
+| 1 << NVDIMM_INTEL_FREEZE_LOCK | 1 << NVDIMM_INTEL_SECURE_ERASE \
+| 1 << NVDIMM_INTEL_OVERWRITE | 1 << NVDIMM_INTEL_QUERY_OVERWRITE \
+| 1 << NVDIMM_INTEL_SET_MASTER_PASSPHRASE \
+| 1 << NVDIMM_INTEL_MASTER_SECURE_ERASE)
+
#define NVDIMM_INTEL_CMDMASK \
(NVDIMM_STANDARD_CMDMASK | 1 << NVDIMM_INTEL_GET_MODES \
| 1 << NVDIMM_INTEL_GET_FWINFO | 1 << NVDIMM_INTEL_START_FWUPDATE \
| 1 << NVDIMM_INTEL_SEND_FWUPDATE | 1 << NVDIMM_INTEL_FINISH_FWUPDATE \
| 1 << NVDIMM_INTEL_QUERY_FWUPDATE | 1 << NVDIMM_INTEL_SET_THRESHOLD \
- | 1 << NVDIMM_INTEL_INJECT_ERROR | 1 << NVDIMM_INTEL_LATCH_SHUTDOWN)
+ | 1 << NVDIMM_INTEL_INJECT_ERROR | 1 << NVDIMM_INTEL_LATCH_SHUTDOWN \
+ | NVDIMM_INTEL_SECURITY_CMDMASK)
enum nfit_uuids {
/* for simplicity alias the uuid index with the family id */
@@ -164,6 +183,8 @@ enum nfit_mem_flags {
NFIT_MEM_DIRTY_COUNT,
};
+#define NFIT_DIMM_ID_LEN 22
+
/* assembled tables for a given dimm/memory-device */
struct nfit_mem {
struct nvdimm *nvdimm;
@@ -181,6 +202,7 @@ struct nfit_mem {
struct list_head list;
struct acpi_device *adev;
struct acpi_nfit_desc *acpi_desc;
+ char id[NFIT_DIMM_ID_LEN+1];
struct resource *flush_wpq;
unsigned long dsm_mask;
unsigned long flags;
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index e9eda5558c1f..5efd4219f112 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -1456,6 +1456,11 @@ int acpi_dma_configure(struct device *dev, enum dev_dma_attr attr)
const struct iommu_ops *iommu;
u64 dma_addr = 0, size = 0;
+ if (attr == DEV_DMA_NOT_SUPPORTED) {
+ set_dma_ops(dev, &dma_dummy_ops);
+ return 0;
+ }
+
iort_dma_setup(dev, &dma_addr, &size);
iommu = iort_iommu_configure(dev);
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index 01306c018398..938ed513b070 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -919,8 +919,6 @@ static void ata_eh_set_pending(struct ata_port *ap, int fastdrain)
void ata_qc_schedule_eh(struct ata_queued_cmd *qc)
{
struct ata_port *ap = qc->ap;
- struct request_queue *q = qc->scsicmd->device->request_queue;
- unsigned long flags;
WARN_ON(!ap->ops->error_handler);
@@ -932,9 +930,7 @@ void ata_qc_schedule_eh(struct ata_queued_cmd *qc)
* Note that ATA_QCFLAG_FAILED is unconditionally set after
* this function completes.
*/
- spin_lock_irqsave(q->queue_lock, flags);
blk_abort_request(qc->scsicmd->request);
- spin_unlock_irqrestore(q->queue_lock, flags);
}
/**
diff --git a/drivers/ata/pata_palmld.c b/drivers/ata/pata_palmld.c
index d071ab6864a8..26817fd91700 100644
--- a/drivers/ata/pata_palmld.c
+++ b/drivers/ata/pata_palmld.c
@@ -26,16 +26,17 @@
#include <linux/irq.h>
#include <linux/platform_device.h>
#include <linux/delay.h>
-#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
#include <scsi/scsi_host.h>
#include <mach/palmld.h>
#define DRV_NAME "pata_palmld"
-static struct gpio palmld_hdd_gpios[] = {
- { GPIO_NR_PALMLD_IDE_PWEN, GPIOF_INIT_HIGH, "HDD Power" },
- { GPIO_NR_PALMLD_IDE_RESET, GPIOF_INIT_LOW, "HDD Reset" },
+struct palmld_pata {
+ struct ata_host *host;
+ struct gpio_desc *power;
+ struct gpio_desc *reset;
};
static struct scsi_host_template palmld_sht = {
@@ -50,39 +51,44 @@ static struct ata_port_operations palmld_port_ops = {
static int palmld_pata_probe(struct platform_device *pdev)
{
- struct ata_host *host;
+ struct palmld_pata *lda;
struct ata_port *ap;
void __iomem *mem;
+ struct device *dev = &pdev->dev;
int ret;
+ lda = devm_kzalloc(dev, sizeof(*lda), GFP_KERNEL);
+ if (!lda)
+ return -ENOMEM;
+
/* allocate host */
- host = ata_host_alloc(&pdev->dev, 1);
- if (!host) {
- ret = -ENOMEM;
- goto err1;
- }
+ lda->host = ata_host_alloc(dev, 1);
+ if (!lda->host)
+ return -ENOMEM;
/* remap drive's physical memory address */
- mem = devm_ioremap(&pdev->dev, PALMLD_IDE_PHYS, 0x1000);
- if (!mem) {
- ret = -ENOMEM;
- goto err1;
+ mem = devm_ioremap(dev, PALMLD_IDE_PHYS, 0x1000);
+ if (!mem)
+ return -ENOMEM;
+
+ /* request and activate power and reset GPIOs */
+ lda->power = devm_gpiod_get(dev, "power", GPIOD_OUT_HIGH);
+ if (IS_ERR(lda->power))
+ return PTR_ERR(lda->power);
+ lda->reset = devm_gpiod_get(dev, "reset", GPIOD_OUT_HIGH);
+ if (IS_ERR(lda->reset)) {
+ gpiod_set_value(lda->power, 0);
+ return PTR_ERR(lda->reset);
}
- /* request and activate power GPIO, IRQ GPIO */
- ret = gpio_request_array(palmld_hdd_gpios,
- ARRAY_SIZE(palmld_hdd_gpios));
- if (ret)
- goto err1;
-
- /* reset the drive */
- gpio_set_value(GPIO_NR_PALMLD_IDE_RESET, 0);
+ /* Assert reset to reset the drive */
+ gpiod_set_value(lda->reset, 1);
msleep(30);
- gpio_set_value(GPIO_NR_PALMLD_IDE_RESET, 1);
+ gpiod_set_value(lda->reset, 0);
msleep(30);
/* setup the ata port */
- ap = host->ports[0];
+ ap = lda->host->ports[0];
ap->ops = &palmld_port_ops;
ap->pio_mask = ATA_PIO4;
ap->flags |= ATA_FLAG_PIO_POLLING;
@@ -96,27 +102,26 @@ static int palmld_pata_probe(struct platform_device *pdev)
ata_sff_std_ports(&ap->ioaddr);
/* activate host */
- ret = ata_host_activate(host, 0, NULL, IRQF_TRIGGER_RISING,
- &palmld_sht);
- if (ret)
- goto err2;
-
- return ret;
+ ret = ata_host_activate(lda->host, 0, NULL, IRQF_TRIGGER_RISING,
+ &palmld_sht);
+ /* power down on failure */
+ if (ret) {
+ gpiod_set_value(lda->power, 0);
+ return ret;
+ }
-err2:
- gpio_free_array(palmld_hdd_gpios, ARRAY_SIZE(palmld_hdd_gpios));
-err1:
- return ret;
+ platform_set_drvdata(pdev, lda);
+ return 0;
}
-static int palmld_pata_remove(struct platform_device *dev)
+static int palmld_pata_remove(struct platform_device *pdev)
{
- ata_platform_remove_one(dev);
+ struct palmld_pata *lda = platform_get_drvdata(pdev);
- /* power down the HDD */
- gpio_set_value(GPIO_NR_PALMLD_IDE_PWEN, 0);
+ ata_platform_remove_one(pdev);
- gpio_free_array(palmld_hdd_gpios, ARRAY_SIZE(palmld_hdd_gpios));
+ /* power down the HDD */
+ gpiod_set_value(lda->power, 0);
return 0;
}
diff --git a/drivers/ata/pata_pxa.c b/drivers/ata/pata_pxa.c
index e8b6a2e464c9..4b9b9e120188 100644
--- a/drivers/ata/pata_pxa.c
+++ b/drivers/ata/pata_pxa.c
@@ -25,7 +25,6 @@
#include <linux/libata.h>
#include <linux/platform_device.h>
#include <linux/dmaengine.h>
-#include <linux/gpio.h>
#include <linux/slab.h>
#include <linux/completion.h>
diff --git a/drivers/ata/pata_rb532_cf.c b/drivers/ata/pata_rb532_cf.c
index 653b9a0bf727..66bb5bff126b 100644
--- a/drivers/ata/pata_rb532_cf.c
+++ b/drivers/ata/pata_rb532_cf.c
@@ -27,7 +27,7 @@
#include <linux/io.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
-#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
#include <linux/libata.h>
#include <scsi/scsi_host.h>
@@ -49,7 +49,7 @@
struct rb532_cf_info {
void __iomem *iobase;
- unsigned int gpio_line;
+ struct gpio_desc *gpio_line;
unsigned int irq;
};
@@ -60,7 +60,7 @@ static irqreturn_t rb532_pata_irq_handler(int irq, void *dev_instance)
struct ata_host *ah = dev_instance;
struct rb532_cf_info *info = ah->private_data;
- if (gpio_get_value(info->gpio_line)) {
+ if (gpiod_get_value(info->gpio_line)) {
irq_set_irq_type(info->irq, IRQ_TYPE_LEVEL_LOW);
ata_sff_interrupt(info->irq, dev_instance);
} else {
@@ -106,10 +106,9 @@ static void rb532_pata_setup_ports(struct ata_host *ah)
static int rb532_pata_driver_probe(struct platform_device *pdev)
{
int irq;
- int gpio;
+ struct gpio_desc *gpiod;
struct resource *res;
struct ata_host *ah;
- struct cf_device *pdata;
struct rb532_cf_info *info;
int ret;
@@ -125,23 +124,12 @@ static int rb532_pata_driver_probe(struct platform_device *pdev)
return -ENOENT;
}
- pdata = dev_get_platdata(&pdev->dev);
- if (!pdata) {
- dev_err(&pdev->dev, "no platform data specified\n");
- return -EINVAL;
- }
-
- gpio = pdata->gpio_pin;
- if (gpio < 0) {
+ gpiod = devm_gpiod_get(&pdev->dev, NULL, GPIOD_IN);
+ if (IS_ERR(gpiod)) {
dev_err(&pdev->dev, "no GPIO found for irq%d\n", irq);
- return -ENOENT;
- }
-
- ret = gpio_request(gpio, DRV_NAME);
- if (ret) {
- dev_err(&pdev->dev, "GPIO request failed\n");
- return ret;
+ return PTR_ERR(gpiod);
}
+ gpiod_set_consumer_name(gpiod, DRV_NAME);
/* allocate host */
ah = ata_host_alloc(&pdev->dev, RB500_CF_MAXPORTS);
@@ -153,7 +141,7 @@ static int rb532_pata_driver_probe(struct platform_device *pdev)
return -ENOMEM;
ah->private_data = info;
- info->gpio_line = gpio;
+ info->gpio_line = gpiod;
info->irq = irq;
info->iobase = devm_ioremap_nocache(&pdev->dev, res->start,
@@ -161,26 +149,14 @@ static int rb532_pata_driver_probe(struct platform_device *pdev)
if (!info->iobase)
return -ENOMEM;
- ret = gpio_direction_input(gpio);
- if (ret) {
- dev_err(&pdev->dev, "unable to set GPIO direction, err=%d\n",
- ret);
- goto err_free_gpio;
- }
-
rb532_pata_setup_ports(ah);
ret = ata_host_activate(ah, irq, rb532_pata_irq_handler,
IRQF_TRIGGER_LOW, &rb532_pata_sht);
if (ret)
- goto err_free_gpio;
+ return ret;
return 0;
-
-err_free_gpio:
- gpio_free(gpio);
-
- return ret;
}
static int rb532_pata_driver_remove(struct platform_device *pdev)
@@ -189,7 +165,6 @@ static int rb532_pata_driver_remove(struct platform_device *pdev)
struct rb532_cf_info *info = ah->private_data;
ata_host_detach(ah);
- gpio_free(info->gpio_line);
return 0;
}
diff --git a/drivers/ata/sata_highbank.c b/drivers/ata/sata_highbank.c
index e67815b896fc..c8fc9280d6e4 100644
--- a/drivers/ata/sata_highbank.c
+++ b/drivers/ata/sata_highbank.c
@@ -31,8 +31,7 @@
#include <linux/interrupt.h>
#include <linux/delay.h>
#include <linux/export.h>
-#include <linux/gpio.h>
-#include <linux/of_gpio.h>
+#include <linux/gpio/consumer.h>
#include "ahci.h"
@@ -85,7 +84,7 @@ struct ecx_plat_data {
/* number of extra clocks that the SGPIO PIC controller expects */
u32 pre_clocks;
u32 post_clocks;
- unsigned sgpio_gpio[SGPIO_PINS];
+ struct gpio_desc *sgpio_gpiod[SGPIO_PINS];
u32 sgpio_pattern;
u32 port_to_sgpio[SGPIO_PORTS];
};
@@ -131,9 +130,9 @@ static void ecx_parse_sgpio(struct ecx_plat_data *pdata, u32 port, u32 state)
*/
static void ecx_led_cycle_clock(struct ecx_plat_data *pdata)
{
- gpio_set_value(pdata->sgpio_gpio[SCLOCK], 1);
+ gpiod_set_value(pdata->sgpio_gpiod[SCLOCK], 1);
udelay(50);
- gpio_set_value(pdata->sgpio_gpio[SCLOCK], 0);
+ gpiod_set_value(pdata->sgpio_gpiod[SCLOCK], 0);
udelay(50);
}
@@ -164,15 +163,15 @@ static ssize_t ecx_transmit_led_message(struct ata_port *ap, u32 state,
for (i = 0; i < pdata->pre_clocks; i++)
ecx_led_cycle_clock(pdata);
- gpio_set_value(pdata->sgpio_gpio[SLOAD], 1);
+ gpiod_set_value(pdata->sgpio_gpiod[SLOAD], 1);
ecx_led_cycle_clock(pdata);
- gpio_set_value(pdata->sgpio_gpio[SLOAD], 0);
+ gpiod_set_value(pdata->sgpio_gpiod[SLOAD], 0);
/*
* bit-bang out the SGPIO pattern, by consuming a bit and then
* clocking it out.
*/
for (i = 0; i < (SGPIO_SIGNALS * pdata->n_ports); i++) {
- gpio_set_value(pdata->sgpio_gpio[SDATA], sgpio_out & 1);
+ gpiod_set_value(pdata->sgpio_gpiod[SDATA], sgpio_out & 1);
sgpio_out >>= 1;
ecx_led_cycle_clock(pdata);
}
@@ -193,21 +192,19 @@ static void highbank_set_em_messages(struct device *dev,
struct device_node *np = dev->of_node;
struct ecx_plat_data *pdata = hpriv->plat_data;
int i;
- int err;
for (i = 0; i < SGPIO_PINS; i++) {
- err = of_get_named_gpio(np, "calxeda,sgpio-gpio", i);
- if (err < 0)
- return;
-
- pdata->sgpio_gpio[i] = err;
- err = gpio_request(pdata->sgpio_gpio[i], "CX SGPIO");
- if (err) {
- pr_err("sata_highbank gpio_request %d failed: %d\n",
- i, err);
- return;
+ struct gpio_desc *gpiod;
+
+ gpiod = devm_gpiod_get_index(dev, "calxeda,sgpio", i,
+ GPIOD_OUT_HIGH);
+ if (IS_ERR(gpiod)) {
+ dev_err(dev, "failed to get GPIO %d\n", i);
+ continue;
}
- gpio_direction_output(pdata->sgpio_gpio[i], 1);
+ gpiod_set_consumer_name(gpiod, "CX SGPIO");
+
+ pdata->sgpio_gpiod[i] = gpiod;
}
of_property_read_u32_array(np, "calxeda,led-order",
pdata->port_to_sgpio,
diff --git a/drivers/ata/sata_rcar.c b/drivers/ata/sata_rcar.c
index 4b1ff5bc256a..59b2317acea9 100644
--- a/drivers/ata/sata_rcar.c
+++ b/drivers/ata/sata_rcar.c
@@ -891,7 +891,9 @@ static int sata_rcar_probe(struct platform_device *pdev)
int ret = 0;
irq = platform_get_irq(pdev, 0);
- if (irq <= 0)
+ if (irq < 0)
+ return irq;
+ if (!irq)
return -EINVAL;
priv = devm_kzalloc(dev, sizeof(struct sata_rcar_priv), GFP_KERNEL);
diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 0fb5f140f1b0..445cbd8266ca 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -1137,8 +1137,7 @@ int platform_dma_configure(struct device *dev)
ret = of_dma_configure(dev, dev->of_node, true);
} else if (has_acpi_companion(dev)) {
attr = acpi_get_dma_attr(to_acpi_device_node(dev->fwnode));
- if (attr != DEV_DMA_NOT_SUPPORTED)
- ret = acpi_dma_configure(dev, attr);
+ ret = acpi_dma_configure(dev, attr);
}
return ret;
@@ -1178,37 +1177,6 @@ int __init platform_bus_init(void)
return error;
}
-#ifndef ARCH_HAS_DMA_GET_REQUIRED_MASK
-static u64 dma_default_get_required_mask(struct device *dev)
-{
- u32 low_totalram = ((max_pfn - 1) << PAGE_SHIFT);
- u32 high_totalram = ((max_pfn - 1) >> (32 - PAGE_SHIFT));
- u64 mask;
-
- if (!high_totalram) {
- /* convert to mask just covering totalram */
- low_totalram = (1 << (fls(low_totalram) - 1));
- low_totalram += low_totalram - 1;
- mask = low_totalram;
- } else {
- high_totalram = (1 << (fls(high_totalram) - 1));
- high_totalram += high_totalram - 1;
- mask = (((u64)high_totalram) << 32) + 0xffffffff;
- }
- return mask;
-}
-
-u64 dma_get_required_mask(struct device *dev)
-{
- const struct dma_map_ops *ops = get_dma_ops(dev);
-
- if (ops->get_required_mask)
- return ops->get_required_mask(dev);
- return dma_default_get_required_mask(dev);
-}
-EXPORT_SYMBOL_GPL(dma_get_required_mask);
-#endif
-
static __initdata LIST_HEAD(early_platform_driver_list);
static __initdata LIST_HEAD(early_platform_device_list);
diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h
index 7ca76ed2e71a..84d0fcebd6af 100644
--- a/drivers/block/aoe/aoe.h
+++ b/drivers/block/aoe/aoe.h
@@ -100,6 +100,10 @@ enum {
MAX_TAINT = 1000, /* cap on aoetgt taint */
};
+struct aoe_req {
+ unsigned long nr_bios;
+};
+
struct buf {
ulong nframesout;
struct bio *bio;
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index ed26b7287256..e2c6aae2d636 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -387,6 +387,7 @@ aoeblk_gdalloc(void *vp)
set = &d->tag_set;
set->ops = &aoeblk_mq_ops;
+ set->cmd_size = sizeof(struct aoe_req);
set->nr_hw_queues = 1;
set->queue_depth = 128;
set->numa_node = NUMA_NO_NODE;
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index bb2fba651bd2..3cf9bc5d8d95 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -822,17 +822,6 @@ out:
spin_unlock_irqrestore(&d->lock, flags);
}
-static unsigned long
-rqbiocnt(struct request *r)
-{
- struct bio *bio;
- unsigned long n = 0;
-
- __rq_for_each_bio(bio, r)
- n++;
- return n;
-}
-
static void
bufinit(struct buf *buf, struct request *rq, struct bio *bio)
{
@@ -847,6 +836,7 @@ nextbuf(struct aoedev *d)
{
struct request *rq;
struct request_queue *q;
+ struct aoe_req *req;
struct buf *buf;
struct bio *bio;
@@ -865,7 +855,11 @@ nextbuf(struct aoedev *d)
blk_mq_start_request(rq);
d->ip.rq = rq;
d->ip.nxbio = rq->bio;
- rq->special = (void *) rqbiocnt(rq);
+
+ req = blk_mq_rq_to_pdu(rq);
+ req->nr_bios = 0;
+ __rq_for_each_bio(bio, rq)
+ req->nr_bios++;
}
buf = mempool_alloc(d->bufpool, GFP_ATOMIC);
if (buf == NULL) {
@@ -1069,16 +1063,13 @@ aoe_end_request(struct aoedev *d, struct request *rq, int fastfail)
static void
aoe_end_buf(struct aoedev *d, struct buf *buf)
{
- struct request *rq;
- unsigned long n;
+ struct request *rq = buf->rq;
+ struct aoe_req *req = blk_mq_rq_to_pdu(rq);
if (buf == d->ip.buf)
d->ip.buf = NULL;
- rq = buf->rq;
mempool_free(buf, d->bufpool);
- n = (unsigned long) rq->special;
- rq->special = (void *) --n;
- if (n == 0)
+ if (--req->nr_bios == 0)
aoe_end_request(d, rq, 0);
}
diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c
index 9063f8efbd3b..5b49f1b33ebe 100644
--- a/drivers/block/aoe/aoedev.c
+++ b/drivers/block/aoe/aoedev.c
@@ -160,21 +160,22 @@ static void
aoe_failip(struct aoedev *d)
{
struct request *rq;
+ struct aoe_req *req;
struct bio *bio;
- unsigned long n;
aoe_failbuf(d, d->ip.buf);
-
rq = d->ip.rq;
if (rq == NULL)
return;
+
+ req = blk_mq_rq_to_pdu(rq);
while ((bio = d->ip.nxbio)) {
bio->bi_status = BLK_STS_IOERR;
d->ip.nxbio = bio->bi_next;
- n = (unsigned long) rq->special;
- rq->special = (void *) --n;
+ req->nr_bios--;
}
- if ((unsigned long) rq->special == 0)
+
+ if (!req->nr_bios)
aoe_end_request(d, rq, 0);
}
diff --git a/drivers/block/aoe/aoemain.c b/drivers/block/aoe/aoemain.c
index 251482066977..1e4e2971171c 100644
--- a/drivers/block/aoe/aoemain.c
+++ b/drivers/block/aoe/aoemain.c
@@ -24,7 +24,7 @@ static void discover_timer(struct timer_list *t)
aoecmd_cfg(0xffff, 0xff);
}
-static void
+static void __exit
aoe_exit(void)
{
del_timer_sync(&timer);
diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
index f88b4c26d422..b0dbbdfeb33e 100644
--- a/drivers/block/ataflop.c
+++ b/drivers/block/ataflop.c
@@ -1471,6 +1471,15 @@ static void setup_req_params( int drive )
ReqTrack, ReqSector, (unsigned long)ReqData ));
}
+static void ataflop_commit_rqs(struct blk_mq_hw_ctx *hctx)
+{
+ spin_lock_irq(&ataflop_lock);
+ atari_disable_irq(IRQ_MFP_FDC);
+ finish_fdc();
+ atari_enable_irq(IRQ_MFP_FDC);
+ spin_unlock_irq(&ataflop_lock);
+}
+
static blk_status_t ataflop_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
@@ -1947,6 +1956,7 @@ static const struct block_device_operations floppy_fops = {
static const struct blk_mq_ops ataflop_mq_ops = {
.queue_rq = ataflop_queue_rq,
+ .commit_rqs = ataflop_commit_rqs,
};
static struct kobject *floppy_find(dev_t dev, int *part, void *data)
@@ -1982,6 +1992,7 @@ static int __init atari_floppy_init (void)
&ataflop_mq_ops, 2,
BLK_MQ_F_SHOULD_MERGE);
if (IS_ERR(unit[i].disk->queue)) {
+ put_disk(unit[i].disk);
ret = PTR_ERR(unit[i].disk->queue);
unit[i].disk->queue = NULL;
goto err;
@@ -2033,18 +2044,13 @@ static int __init atari_floppy_init (void)
return 0;
err:
- do {
+ while (--i >= 0) {
struct gendisk *disk = unit[i].disk;
- if (disk) {
- if (disk->queue) {
- blk_cleanup_queue(disk->queue);
- disk->queue = NULL;
- }
- blk_mq_free_tag_set(&unit[i].tag_set);
- put_disk(unit[i].disk);
- }
- } while (i--);
+ blk_cleanup_queue(disk->queue);
+ blk_mq_free_tag_set(&unit[i].tag_set);
+ put_disk(unit[i].disk);
+ }
unregister_blkdev(FLOPPY_MAJOR, "fd");
return ret;
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index fa8204214ac0..f973a2a845c8 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2792,7 +2792,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
drbd_init_set_defaults(device);
- q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE, &resource->req_lock);
+ q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE);
if (!q)
goto out_no_q;
device->rq_queue = q;
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index fb23578e9a41..6f2856c6d0f2 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -2231,7 +2231,6 @@ static void request_done(int uptodate)
{
struct request *req = current_req;
struct request_queue *q;
- unsigned long flags;
int block;
char msg[sizeof("request done ") + sizeof(int) * 3];
@@ -2254,10 +2253,7 @@ static void request_done(int uptodate)
if (block > _floppy->sect)
DRS->maxtrack = 1;
- /* unlock chained buffers */
- spin_lock_irqsave(q->queue_lock, flags);
floppy_end_request(req, 0);
- spin_unlock_irqrestore(q->queue_lock, flags);
} else {
if (rq_data_dir(req) == WRITE) {
/* record write error information */
@@ -2269,9 +2265,7 @@ static void request_done(int uptodate)
DRWE->last_error_sector = blk_rq_pos(req);
DRWE->last_error_generation = DRS->generation;
}
- spin_lock_irqsave(q->queue_lock, flags);
floppy_end_request(req, BLK_STS_IOERR);
- spin_unlock_irqrestore(q->queue_lock, flags);
}
}
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index cb0cc8685076..0939f36548c9 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -77,13 +77,14 @@
#include <linux/falloc.h>
#include <linux/uio.h>
#include <linux/ioprio.h>
+#include <linux/blk-cgroup.h>
#include "loop.h"
#include <linux/uaccess.h>
static DEFINE_IDR(loop_index_idr);
-static DEFINE_MUTEX(loop_index_mutex);
+static DEFINE_MUTEX(loop_ctl_mutex);
static int max_part;
static int part_shift;
@@ -630,18 +631,7 @@ static void loop_reread_partitions(struct loop_device *lo,
{
int rc;
- /*
- * bd_mutex has been held already in release path, so don't
- * acquire it if this function is called in such case.
- *
- * If the reread partition isn't from release path, lo_refcnt
- * must be at least one and it can only become zero when the
- * current holder is released.
- */
- if (!atomic_read(&lo->lo_refcnt))
- rc = __blkdev_reread_part(bdev);
- else
- rc = blkdev_reread_part(bdev);
+ rc = blkdev_reread_part(bdev);
if (rc)
pr_warn("%s: partition scan of loop%d (%s) failed (rc=%d)\n",
__func__, lo->lo_number, lo->lo_file_name, rc);
@@ -688,26 +678,30 @@ static int loop_validate_file(struct file *file, struct block_device *bdev)
static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
unsigned int arg)
{
- struct file *file, *old_file;
+ struct file *file = NULL, *old_file;
int error;
+ bool partscan;
+ error = mutex_lock_killable(&loop_ctl_mutex);
+ if (error)
+ return error;
error = -ENXIO;
if (lo->lo_state != Lo_bound)
- goto out;
+ goto out_err;
/* the loop device has to be read-only */
error = -EINVAL;
if (!(lo->lo_flags & LO_FLAGS_READ_ONLY))
- goto out;
+ goto out_err;
error = -EBADF;
file = fget(arg);
if (!file)
- goto out;
+ goto out_err;
error = loop_validate_file(file, bdev);
if (error)
- goto out_putf;
+ goto out_err;
old_file = lo->lo_backing_file;
@@ -715,7 +709,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
/* size of the new backing store needs to be the same */
if (get_loop_size(lo, file) != get_loop_size(lo, old_file))
- goto out_putf;
+ goto out_err;
/* and ... switch */
blk_mq_freeze_queue(lo->lo_queue);
@@ -726,15 +720,22 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
loop_update_dio(lo);
blk_mq_unfreeze_queue(lo->lo_queue);
-
+ partscan = lo->lo_flags & LO_FLAGS_PARTSCAN;
+ mutex_unlock(&loop_ctl_mutex);
+ /*
+ * We must drop file reference outside of loop_ctl_mutex as dropping
+ * the file ref can take bd_mutex which creates circular locking
+ * dependency.
+ */
fput(old_file);
- if (lo->lo_flags & LO_FLAGS_PARTSCAN)
+ if (partscan)
loop_reread_partitions(lo, bdev);
return 0;
- out_putf:
- fput(file);
- out:
+out_err:
+ mutex_unlock(&loop_ctl_mutex);
+ if (file)
+ fput(file);
return error;
}
@@ -909,6 +910,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
int lo_flags = 0;
int error;
loff_t size;
+ bool partscan;
/* This is safe, since we have a reference from open(). */
__module_get(THIS_MODULE);
@@ -918,13 +920,17 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
if (!file)
goto out;
+ error = mutex_lock_killable(&loop_ctl_mutex);
+ if (error)
+ goto out_putf;
+
error = -EBUSY;
if (lo->lo_state != Lo_unbound)
- goto out_putf;
+ goto out_unlock;
error = loop_validate_file(file, bdev);
if (error)
- goto out_putf;
+ goto out_unlock;
mapping = file->f_mapping;
inode = mapping->host;
@@ -936,10 +942,10 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
error = -EFBIG;
size = get_loop_size(lo, file);
if ((loff_t)(sector_t)size != size)
- goto out_putf;
+ goto out_unlock;
error = loop_prepare_queue(lo);
if (error)
- goto out_putf;
+ goto out_unlock;
error = 0;
@@ -971,18 +977,22 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
lo->lo_state = Lo_bound;
if (part_shift)
lo->lo_flags |= LO_FLAGS_PARTSCAN;
- if (lo->lo_flags & LO_FLAGS_PARTSCAN)
- loop_reread_partitions(lo, bdev);
+ partscan = lo->lo_flags & LO_FLAGS_PARTSCAN;
/* Grab the block_device to prevent its destruction after we
- * put /dev/loopXX inode. Later in loop_clr_fd() we bdput(bdev).
+ * put /dev/loopXX inode. Later in __loop_clr_fd() we bdput(bdev).
*/
bdgrab(bdev);
+ mutex_unlock(&loop_ctl_mutex);
+ if (partscan)
+ loop_reread_partitions(lo, bdev);
return 0;
- out_putf:
+out_unlock:
+ mutex_unlock(&loop_ctl_mutex);
+out_putf:
fput(file);
- out:
+out:
/* This is safe: open() is still holding a reference. */
module_put(THIS_MODULE);
return error;
@@ -1025,39 +1035,31 @@ loop_init_xfer(struct loop_device *lo, struct loop_func_table *xfer,
return err;
}
-static int loop_clr_fd(struct loop_device *lo)
+static int __loop_clr_fd(struct loop_device *lo, bool release)
{
- struct file *filp = lo->lo_backing_file;
+ struct file *filp = NULL;
gfp_t gfp = lo->old_gfp_mask;
struct block_device *bdev = lo->lo_device;
+ int err = 0;
+ bool partscan = false;
+ int lo_number;
- if (lo->lo_state != Lo_bound)
- return -ENXIO;
-
- /*
- * If we've explicitly asked to tear down the loop device,
- * and it has an elevated reference count, set it for auto-teardown when
- * the last reference goes away. This stops $!~#$@ udev from
- * preventing teardown because it decided that it needs to run blkid on
- * the loopback device whenever they appear. xfstests is notorious for
- * failing tests because blkid via udev races with a losetup
- * <dev>/do something like mkfs/losetup -d <dev> causing the losetup -d
- * command to fail with EBUSY.
- */
- if (atomic_read(&lo->lo_refcnt) > 1) {
- lo->lo_flags |= LO_FLAGS_AUTOCLEAR;
- mutex_unlock(&lo->lo_ctl_mutex);
- return 0;
+ mutex_lock(&loop_ctl_mutex);
+ if (WARN_ON_ONCE(lo->lo_state != Lo_rundown)) {
+ err = -ENXIO;
+ goto out_unlock;
}
- if (filp == NULL)
- return -EINVAL;
+ filp = lo->lo_backing_file;
+ if (filp == NULL) {
+ err = -EINVAL;
+ goto out_unlock;
+ }
/* freeze request queue during the transition */
blk_mq_freeze_queue(lo->lo_queue);
spin_lock_irq(&lo->lo_lock);
- lo->lo_state = Lo_rundown;
lo->lo_backing_file = NULL;
spin_unlock_irq(&lo->lo_lock);
@@ -1093,21 +1095,73 @@ static int loop_clr_fd(struct loop_device *lo)
module_put(THIS_MODULE);
blk_mq_unfreeze_queue(lo->lo_queue);
- if (lo->lo_flags & LO_FLAGS_PARTSCAN && bdev)
- loop_reread_partitions(lo, bdev);
+ partscan = lo->lo_flags & LO_FLAGS_PARTSCAN && bdev;
+ lo_number = lo->lo_number;
lo->lo_flags = 0;
if (!part_shift)
lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN;
loop_unprepare_queue(lo);
- mutex_unlock(&lo->lo_ctl_mutex);
+out_unlock:
+ mutex_unlock(&loop_ctl_mutex);
+ if (partscan) {
+ /*
+ * bd_mutex has been held already in release path, so don't
+ * acquire it if this function is called in such case.
+ *
+ * If the reread partition isn't from release path, lo_refcnt
+ * must be at least one and it can only become zero when the
+ * current holder is released.
+ */
+ if (release)
+ err = __blkdev_reread_part(bdev);
+ else
+ err = blkdev_reread_part(bdev);
+ pr_warn("%s: partition scan of loop%d failed (rc=%d)\n",
+ __func__, lo_number, err);
+ /* Device is gone, no point in returning error */
+ err = 0;
+ }
/*
- * Need not hold lo_ctl_mutex to fput backing file.
- * Calling fput holding lo_ctl_mutex triggers a circular
+ * Need not hold loop_ctl_mutex to fput backing file.
+ * Calling fput holding loop_ctl_mutex triggers a circular
* lock dependency possibility warning as fput can take
- * bd_mutex which is usually taken before lo_ctl_mutex.
+ * bd_mutex which is usually taken before loop_ctl_mutex.
*/
- fput(filp);
- return 0;
+ if (filp)
+ fput(filp);
+ return err;
+}
+
+static int loop_clr_fd(struct loop_device *lo)
+{
+ int err;
+
+ err = mutex_lock_killable(&loop_ctl_mutex);
+ if (err)
+ return err;
+ if (lo->lo_state != Lo_bound) {
+ mutex_unlock(&loop_ctl_mutex);
+ return -ENXIO;
+ }
+ /*
+ * If we've explicitly asked to tear down the loop device,
+ * and it has an elevated reference count, set it for auto-teardown when
+ * the last reference goes away. This stops $!~#$@ udev from
+ * preventing teardown because it decided that it needs to run blkid on
+ * the loopback device whenever they appear. xfstests is notorious for
+ * failing tests because blkid via udev races with a losetup
+ * <dev>/do something like mkfs/losetup -d <dev> causing the losetup -d
+ * command to fail with EBUSY.
+ */
+ if (atomic_read(&lo->lo_refcnt) > 1) {
+ lo->lo_flags |= LO_FLAGS_AUTOCLEAR;
+ mutex_unlock(&loop_ctl_mutex);
+ return 0;
+ }
+ lo->lo_state = Lo_rundown;
+ mutex_unlock(&loop_ctl_mutex);
+
+ return __loop_clr_fd(lo, false);
}
static int
@@ -1116,47 +1170,58 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
int err;
struct loop_func_table *xfer;
kuid_t uid = current_uid();
+ struct block_device *bdev;
+ bool partscan = false;
+ err = mutex_lock_killable(&loop_ctl_mutex);
+ if (err)
+ return err;
if (lo->lo_encrypt_key_size &&
!uid_eq(lo->lo_key_owner, uid) &&
- !capable(CAP_SYS_ADMIN))
- return -EPERM;
- if (lo->lo_state != Lo_bound)
- return -ENXIO;
- if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE)
- return -EINVAL;
+ !capable(CAP_SYS_ADMIN)) {
+ err = -EPERM;
+ goto out_unlock;
+ }
+ if (lo->lo_state != Lo_bound) {
+ err = -ENXIO;
+ goto out_unlock;
+ }
+ if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE) {
+ err = -EINVAL;
+ goto out_unlock;
+ }
/* I/O need to be drained during transfer transition */
blk_mq_freeze_queue(lo->lo_queue);
err = loop_release_xfer(lo);
if (err)
- goto exit;
+ goto out_unfreeze;
if (info->lo_encrypt_type) {
unsigned int type = info->lo_encrypt_type;
if (type >= MAX_LO_CRYPT) {
err = -EINVAL;
- goto exit;
+ goto out_unfreeze;
}
xfer = xfer_funcs[type];
if (xfer == NULL) {
err = -EINVAL;
- goto exit;
+ goto out_unfreeze;
}
} else
xfer = NULL;
err = loop_init_xfer(lo, xfer, info);
if (err)
- goto exit;
+ goto out_unfreeze;
if (lo->lo_offset != info->lo_offset ||
lo->lo_sizelimit != info->lo_sizelimit) {
if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) {
err = -EFBIG;
- goto exit;
+ goto out_unfreeze;
}
}
@@ -1188,15 +1253,20 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
/* update dio if lo_offset or transfer is changed */
__loop_update_dio(lo, lo->use_dio);
- exit:
+out_unfreeze:
blk_mq_unfreeze_queue(lo->lo_queue);
if (!err && (info->lo_flags & LO_FLAGS_PARTSCAN) &&
!(lo->lo_flags & LO_FLAGS_PARTSCAN)) {
lo->lo_flags |= LO_FLAGS_PARTSCAN;
lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN;
- loop_reread_partitions(lo, lo->lo_device);
+ bdev = lo->lo_device;
+ partscan = true;
}
+out_unlock:
+ mutex_unlock(&loop_ctl_mutex);
+ if (partscan)
+ loop_reread_partitions(lo, bdev);
return err;
}
@@ -1204,12 +1274,15 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
static int
loop_get_status(struct loop_device *lo, struct loop_info64 *info)
{
- struct file *file;
+ struct path path;
struct kstat stat;
int ret;
+ ret = mutex_lock_killable(&loop_ctl_mutex);
+ if (ret)
+ return ret;
if (lo->lo_state != Lo_bound) {
- mutex_unlock(&lo->lo_ctl_mutex);
+ mutex_unlock(&loop_ctl_mutex);
return -ENXIO;
}
@@ -1228,17 +1301,17 @@ loop_get_status(struct loop_device *lo, struct loop_info64 *info)
lo->lo_encrypt_key_size);
}
- /* Drop lo_ctl_mutex while we call into the filesystem. */
- file = get_file(lo->lo_backing_file);
- mutex_unlock(&lo->lo_ctl_mutex);
- ret = vfs_getattr(&file->f_path, &stat, STATX_INO,
- AT_STATX_SYNC_AS_STAT);
+ /* Drop loop_ctl_mutex while we call into the filesystem. */
+ path = lo->lo_backing_file->f_path;
+ path_get(&path);
+ mutex_unlock(&loop_ctl_mutex);
+ ret = vfs_getattr(&path, &stat, STATX_INO, AT_STATX_SYNC_AS_STAT);
if (!ret) {
info->lo_device = huge_encode_dev(stat.dev);
info->lo_inode = stat.ino;
info->lo_rdevice = huge_encode_dev(stat.rdev);
}
- fput(file);
+ path_put(&path);
return ret;
}
@@ -1322,10 +1395,8 @@ loop_get_status_old(struct loop_device *lo, struct loop_info __user *arg) {
struct loop_info64 info64;
int err;
- if (!arg) {
- mutex_unlock(&lo->lo_ctl_mutex);
+ if (!arg)
return -EINVAL;
- }
err = loop_get_status(lo, &info64);
if (!err)
err = loop_info64_to_old(&info64, &info);
@@ -1340,10 +1411,8 @@ loop_get_status64(struct loop_device *lo, struct loop_info64 __user *arg) {
struct loop_info64 info64;
int err;
- if (!arg) {
- mutex_unlock(&lo->lo_ctl_mutex);
+ if (!arg)
return -EINVAL;
- }
err = loop_get_status(lo, &info64);
if (!err && copy_to_user(arg, &info64, sizeof(info64)))
err = -EFAULT;
@@ -1393,70 +1462,73 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg)
return 0;
}
+static int lo_simple_ioctl(struct loop_device *lo, unsigned int cmd,
+ unsigned long arg)
+{
+ int err;
+
+ err = mutex_lock_killable(&loop_ctl_mutex);
+ if (err)
+ return err;
+ switch (cmd) {
+ case LOOP_SET_CAPACITY:
+ err = loop_set_capacity(lo);
+ break;
+ case LOOP_SET_DIRECT_IO:
+ err = loop_set_dio(lo, arg);
+ break;
+ case LOOP_SET_BLOCK_SIZE:
+ err = loop_set_block_size(lo, arg);
+ break;
+ default:
+ err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL;
+ }
+ mutex_unlock(&loop_ctl_mutex);
+ return err;
+}
+
static int lo_ioctl(struct block_device *bdev, fmode_t mode,
unsigned int cmd, unsigned long arg)
{
struct loop_device *lo = bdev->bd_disk->private_data;
int err;
- err = mutex_lock_killable_nested(&lo->lo_ctl_mutex, 1);
- if (err)
- goto out_unlocked;
-
switch (cmd) {
case LOOP_SET_FD:
- err = loop_set_fd(lo, mode, bdev, arg);
- break;
+ return loop_set_fd(lo, mode, bdev, arg);
case LOOP_CHANGE_FD:
- err = loop_change_fd(lo, bdev, arg);
- break;
+ return loop_change_fd(lo, bdev, arg);
case LOOP_CLR_FD:
- /* loop_clr_fd would have unlocked lo_ctl_mutex on success */
- err = loop_clr_fd(lo);
- if (!err)
- goto out_unlocked;
- break;
+ return loop_clr_fd(lo);
case LOOP_SET_STATUS:
err = -EPERM;
- if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN))
+ if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) {
err = loop_set_status_old(lo,
(struct loop_info __user *)arg);
+ }
break;
case LOOP_GET_STATUS:
- err = loop_get_status_old(lo, (struct loop_info __user *) arg);
- /* loop_get_status() unlocks lo_ctl_mutex */
- goto out_unlocked;
+ return loop_get_status_old(lo, (struct loop_info __user *) arg);
case LOOP_SET_STATUS64:
err = -EPERM;
- if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN))
+ if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) {
err = loop_set_status64(lo,
(struct loop_info64 __user *) arg);
+ }
break;
case LOOP_GET_STATUS64:
- err = loop_get_status64(lo, (struct loop_info64 __user *) arg);
- /* loop_get_status() unlocks lo_ctl_mutex */
- goto out_unlocked;
+ return loop_get_status64(lo, (struct loop_info64 __user *) arg);
case LOOP_SET_CAPACITY:
- err = -EPERM;
- if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN))
- err = loop_set_capacity(lo);
- break;
case LOOP_SET_DIRECT_IO:
- err = -EPERM;
- if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN))
- err = loop_set_dio(lo, arg);
- break;
case LOOP_SET_BLOCK_SIZE:
- err = -EPERM;
- if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN))
- err = loop_set_block_size(lo, arg);
- break;
+ if (!(mode & FMODE_WRITE) && !capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ /* Fall through */
default:
- err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL;
+ err = lo_simple_ioctl(lo, cmd, arg);
+ break;
}
- mutex_unlock(&lo->lo_ctl_mutex);
-out_unlocked:
return err;
}
@@ -1570,10 +1642,8 @@ loop_get_status_compat(struct loop_device *lo,
struct loop_info64 info64;
int err;
- if (!arg) {
- mutex_unlock(&lo->lo_ctl_mutex);
+ if (!arg)
return -EINVAL;
- }
err = loop_get_status(lo, &info64);
if (!err)
err = loop_info64_to_compat(&info64, arg);
@@ -1588,20 +1658,12 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode,
switch(cmd) {
case LOOP_SET_STATUS:
- err = mutex_lock_killable(&lo->lo_ctl_mutex);
- if (!err) {
- err = loop_set_status_compat(lo,
- (const struct compat_loop_info __user *)arg);
- mutex_unlock(&lo->lo_ctl_mutex);
- }
+ err = loop_set_status_compat(lo,
+ (const struct compat_loop_info __user *)arg);
break;
case LOOP_GET_STATUS:
- err = mutex_lock_killable(&lo->lo_ctl_mutex);
- if (!err) {
- err = loop_get_status_compat(lo,
- (struct compat_loop_info __user *)arg);
- /* loop_get_status() unlocks lo_ctl_mutex */
- }
+ err = loop_get_status_compat(lo,
+ (struct compat_loop_info __user *)arg);
break;
case LOOP_SET_CAPACITY:
case LOOP_CLR_FD:
@@ -1625,9 +1687,11 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode,
static int lo_open(struct block_device *bdev, fmode_t mode)
{
struct loop_device *lo;
- int err = 0;
+ int err;
- mutex_lock(&loop_index_mutex);
+ err = mutex_lock_killable(&loop_ctl_mutex);
+ if (err)
+ return err;
lo = bdev->bd_disk->private_data;
if (!lo) {
err = -ENXIO;
@@ -1636,26 +1700,30 @@ static int lo_open(struct block_device *bdev, fmode_t mode)
atomic_inc(&lo->lo_refcnt);
out:
- mutex_unlock(&loop_index_mutex);
+ mutex_unlock(&loop_ctl_mutex);
return err;
}
-static void __lo_release(struct loop_device *lo)
+static void lo_release(struct gendisk *disk, fmode_t mode)
{
- int err;
+ struct loop_device *lo;
+ mutex_lock(&loop_ctl_mutex);
+ lo = disk->private_data;
if (atomic_dec_return(&lo->lo_refcnt))
- return;
+ goto out_unlock;
- mutex_lock(&lo->lo_ctl_mutex);
if (lo->lo_flags & LO_FLAGS_AUTOCLEAR) {
+ if (lo->lo_state != Lo_bound)
+ goto out_unlock;
+ lo->lo_state = Lo_rundown;
+ mutex_unlock(&loop_ctl_mutex);
/*
* In autoclear mode, stop the loop thread
* and remove configuration after last close.
*/
- err = loop_clr_fd(lo);
- if (!err)
- return;
+ __loop_clr_fd(lo, true);
+ return;
} else if (lo->lo_state == Lo_bound) {
/*
* Otherwise keep thread (if running) and config,
@@ -1665,14 +1733,8 @@ static void __lo_release(struct loop_device *lo)
blk_mq_unfreeze_queue(lo->lo_queue);
}
- mutex_unlock(&lo->lo_ctl_mutex);
-}
-
-static void lo_release(struct gendisk *disk, fmode_t mode)
-{
- mutex_lock(&loop_index_mutex);
- __lo_release(disk->private_data);
- mutex_unlock(&loop_index_mutex);
+out_unlock:
+ mutex_unlock(&loop_ctl_mutex);
}
static const struct block_device_operations lo_fops = {
@@ -1711,10 +1773,10 @@ static int unregister_transfer_cb(int id, void *ptr, void *data)
struct loop_device *lo = ptr;
struct loop_func_table *xfer = data;
- mutex_lock(&lo->lo_ctl_mutex);
+ mutex_lock(&loop_ctl_mutex);
if (lo->lo_encryption == xfer)
loop_release_xfer(lo);
- mutex_unlock(&lo->lo_ctl_mutex);
+ mutex_unlock(&loop_ctl_mutex);
return 0;
}
@@ -1759,8 +1821,8 @@ static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx,
/* always use the first bio's css */
#ifdef CONFIG_BLK_CGROUP
- if (cmd->use_aio && rq->bio && rq->bio->bi_css) {
- cmd->css = rq->bio->bi_css;
+ if (cmd->use_aio && rq->bio && rq->bio->bi_blkg) {
+ cmd->css = &bio_blkcg(rq->bio)->css;
css_get(cmd->css);
} else
#endif
@@ -1853,7 +1915,7 @@ static int loop_add(struct loop_device **l, int i)
goto out_free_idr;
lo->lo_queue = blk_mq_init_queue(&lo->tag_set);
- if (IS_ERR_OR_NULL(lo->lo_queue)) {
+ if (IS_ERR(lo->lo_queue)) {
err = PTR_ERR(lo->lo_queue);
goto out_cleanup_tags;
}
@@ -1895,7 +1957,6 @@ static int loop_add(struct loop_device **l, int i)
if (!part_shift)
disk->flags |= GENHD_FL_NO_PART_SCAN;
disk->flags |= GENHD_FL_EXT_DEVT;
- mutex_init(&lo->lo_ctl_mutex);
atomic_set(&lo->lo_refcnt, 0);
lo->lo_number = i;
spin_lock_init(&lo->lo_lock);
@@ -1974,7 +2035,7 @@ static struct kobject *loop_probe(dev_t dev, int *part, void *data)
struct kobject *kobj;
int err;
- mutex_lock(&loop_index_mutex);
+ mutex_lock(&loop_ctl_mutex);
err = loop_lookup(&lo, MINOR(dev) >> part_shift);
if (err < 0)
err = loop_add(&lo, MINOR(dev) >> part_shift);
@@ -1982,7 +2043,7 @@ static struct kobject *loop_probe(dev_t dev, int *part, void *data)
kobj = NULL;
else
kobj = get_disk_and_module(lo->lo_disk);
- mutex_unlock(&loop_index_mutex);
+ mutex_unlock(&loop_ctl_mutex);
*part = 0;
return kobj;
@@ -1992,9 +2053,13 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd,
unsigned long parm)
{
struct loop_device *lo;
- int ret = -ENOSYS;
+ int ret;
- mutex_lock(&loop_index_mutex);
+ ret = mutex_lock_killable(&loop_ctl_mutex);
+ if (ret)
+ return ret;
+
+ ret = -ENOSYS;
switch (cmd) {
case LOOP_CTL_ADD:
ret = loop_lookup(&lo, parm);
@@ -2008,21 +2073,15 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd,
ret = loop_lookup(&lo, parm);
if (ret < 0)
break;
- ret = mutex_lock_killable(&lo->lo_ctl_mutex);
- if (ret)
- break;
if (lo->lo_state != Lo_unbound) {
ret = -EBUSY;
- mutex_unlock(&lo->lo_ctl_mutex);
break;
}
if (atomic_read(&lo->lo_refcnt) > 0) {
ret = -EBUSY;
- mutex_unlock(&lo->lo_ctl_mutex);
break;
}
lo->lo_disk->private_data = NULL;
- mutex_unlock(&lo->lo_ctl_mutex);
idr_remove(&loop_index_idr, lo->lo_number);
loop_remove(lo);
break;
@@ -2032,7 +2091,7 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd,
break;
ret = loop_add(&lo, -1);
}
- mutex_unlock(&loop_index_mutex);
+ mutex_unlock(&loop_ctl_mutex);
return ret;
}
@@ -2116,10 +2175,10 @@ static int __init loop_init(void)
THIS_MODULE, loop_probe, NULL, NULL);
/* pre-create number of devices given by config or max_loop */
- mutex_lock(&loop_index_mutex);
+ mutex_lock(&loop_ctl_mutex);
for (i = 0; i < nr; i++)
loop_add(&lo, i);
- mutex_unlock(&loop_index_mutex);
+ mutex_unlock(&loop_ctl_mutex);
printk(KERN_INFO "loop: module loaded\n");
return 0;
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index 4d42c7af7de7..af75a5ee4094 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -54,7 +54,6 @@ struct loop_device {
spinlock_t lo_lock;
int lo_state;
- struct mutex lo_ctl_mutex;
struct kthread_worker worker;
struct task_struct *worker_task;
bool use_dio;
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index a7daa8acbab3..88e8440e75c3 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -168,41 +168,6 @@ static bool mtip_check_surprise_removal(struct pci_dev *pdev)
return false; /* device present */
}
-/* we have to use runtime tag to setup command header */
-static void mtip_init_cmd_header(struct request *rq)
-{
- struct driver_data *dd = rq->q->queuedata;
- struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq);
-
- /* Point the command headers at the command tables. */
- cmd->command_header = dd->port->command_list +
- (sizeof(struct mtip_cmd_hdr) * rq->tag);
- cmd->command_header_dma = dd->port->command_list_dma +
- (sizeof(struct mtip_cmd_hdr) * rq->tag);
-
- if (test_bit(MTIP_PF_HOST_CAP_64, &dd->port->flags))
- cmd->command_header->ctbau = __force_bit2int cpu_to_le32((cmd->command_dma >> 16) >> 16);
-
- cmd->command_header->ctba = __force_bit2int cpu_to_le32(cmd->command_dma & 0xFFFFFFFF);
-}
-
-static struct mtip_cmd *mtip_get_int_command(struct driver_data *dd)
-{
- struct request *rq;
-
- if (mtip_check_surprise_removal(dd->pdev))
- return NULL;
-
- rq = blk_mq_alloc_request(dd->queue, REQ_OP_DRV_IN, BLK_MQ_REQ_RESERVED);
- if (IS_ERR(rq))
- return NULL;
-
- /* Internal cmd isn't submitted via .queue_rq */
- mtip_init_cmd_header(rq);
-
- return blk_mq_rq_to_pdu(rq);
-}
-
static struct mtip_cmd *mtip_cmd_from_tag(struct driver_data *dd,
unsigned int tag)
{
@@ -1023,13 +988,14 @@ static int mtip_exec_internal_command(struct mtip_port *port,
return -EFAULT;
}
- int_cmd = mtip_get_int_command(dd);
- if (!int_cmd) {
+ if (mtip_check_surprise_removal(dd->pdev))
+ return -EFAULT;
+
+ rq = blk_mq_alloc_request(dd->queue, REQ_OP_DRV_IN, BLK_MQ_REQ_RESERVED);
+ if (IS_ERR(rq)) {
dbg_printk(MTIP_DRV_NAME "Unable to allocate tag for PIO cmd\n");
return -EFAULT;
}
- rq = blk_mq_rq_from_pdu(int_cmd);
- rq->special = &icmd;
set_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags);
@@ -1050,6 +1016,8 @@ static int mtip_exec_internal_command(struct mtip_port *port,
}
/* Copy the command to the command table */
+ int_cmd = blk_mq_rq_to_pdu(rq);
+ int_cmd->icmd = &icmd;
memcpy(int_cmd->command, fis, fis_len*4);
rq->timeout = timeout;
@@ -1423,23 +1391,19 @@ static int mtip_get_smart_attr(struct mtip_port *port, unsigned int id,
* @dd pointer to driver_data structure
* @lba starting lba
* @len # of 512b sectors to trim
- *
- * return value
- * -ENOMEM Out of dma memory
- * -EINVAL Invalid parameters passed in, trim not supported
- * -EIO Error submitting trim request to hw
*/
-static int mtip_send_trim(struct driver_data *dd, unsigned int lba,
- unsigned int len)
+static blk_status_t mtip_send_trim(struct driver_data *dd, unsigned int lba,
+ unsigned int len)
{
- int i, rv = 0;
u64 tlba, tlen, sect_left;
struct mtip_trim_entry *buf;
dma_addr_t dma_addr;
struct host_to_dev_fis fis;
+ blk_status_t ret = BLK_STS_OK;
+ int i;
if (!len || dd->trim_supp == false)
- return -EINVAL;
+ return BLK_STS_IOERR;
/* Trim request too big */
WARN_ON(len > (MTIP_MAX_TRIM_ENTRY_LEN * MTIP_MAX_TRIM_ENTRIES));
@@ -1454,7 +1418,7 @@ static int mtip_send_trim(struct driver_data *dd, unsigned int lba,
buf = dmam_alloc_coherent(&dd->pdev->dev, ATA_SECT_SIZE, &dma_addr,
GFP_KERNEL);
if (!buf)
- return -ENOMEM;
+ return BLK_STS_RESOURCE;
memset(buf, 0, ATA_SECT_SIZE);
for (i = 0, sect_left = len, tlba = lba;
@@ -1463,8 +1427,8 @@ static int mtip_send_trim(struct driver_data *dd, unsigned int lba,
tlen = (sect_left >= MTIP_MAX_TRIM_ENTRY_LEN ?
MTIP_MAX_TRIM_ENTRY_LEN :
sect_left);
- buf[i].lba = __force_bit2int cpu_to_le32(tlba);
- buf[i].range = __force_bit2int cpu_to_le16(tlen);
+ buf[i].lba = cpu_to_le32(tlba);
+ buf[i].range = cpu_to_le16(tlen);
tlba += tlen;
sect_left -= tlen;
}
@@ -1486,10 +1450,10 @@ static int mtip_send_trim(struct driver_data *dd, unsigned int lba,
ATA_SECT_SIZE,
0,
MTIP_TRIM_TIMEOUT_MS) < 0)
- rv = -EIO;
+ ret = BLK_STS_IOERR;
dmam_free_coherent(&dd->pdev->dev, ATA_SECT_SIZE, buf, dma_addr);
- return rv;
+ return ret;
}
/*
@@ -1585,23 +1549,20 @@ static inline void fill_command_sg(struct driver_data *dd,
int n;
unsigned int dma_len;
struct mtip_cmd_sg *command_sg;
- struct scatterlist *sg = command->sg;
+ struct scatterlist *sg;
command_sg = command->command + AHCI_CMD_TBL_HDR_SZ;
- for (n = 0; n < nents; n++) {
+ for_each_sg(command->sg, sg, nents, n) {
dma_len = sg_dma_len(sg);
if (dma_len > 0x400000)
dev_err(&dd->pdev->dev,
"DMA segment length truncated\n");
- command_sg->info = __force_bit2int
- cpu_to_le32((dma_len-1) & 0x3FFFFF);
- command_sg->dba = __force_bit2int
- cpu_to_le32(sg_dma_address(sg));
- command_sg->dba_upper = __force_bit2int
+ command_sg->info = cpu_to_le32((dma_len-1) & 0x3FFFFF);
+ command_sg->dba = cpu_to_le32(sg_dma_address(sg));
+ command_sg->dba_upper =
cpu_to_le32((sg_dma_address(sg) >> 16) >> 16);
command_sg++;
- sg++;
}
}
@@ -2171,7 +2132,6 @@ static int mtip_hw_ioctl(struct driver_data *dd, unsigned int cmd,
* @dd Pointer to the driver data structure.
* @start First sector to read.
* @nsect Number of sectors to read.
- * @nents Number of entries in scatter list for the read command.
* @tag The tag of this read command.
* @callback Pointer to the function that should be called
* when the read completes.
@@ -2183,16 +2143,20 @@ static int mtip_hw_ioctl(struct driver_data *dd, unsigned int cmd,
* None
*/
static void mtip_hw_submit_io(struct driver_data *dd, struct request *rq,
- struct mtip_cmd *command, int nents,
+ struct mtip_cmd *command,
struct blk_mq_hw_ctx *hctx)
{
+ struct mtip_cmd_hdr *hdr =
+ dd->port->command_list + sizeof(struct mtip_cmd_hdr) * rq->tag;
struct host_to_dev_fis *fis;
struct mtip_port *port = dd->port;
int dma_dir = rq_data_dir(rq) == READ ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
u64 start = blk_rq_pos(rq);
unsigned int nsect = blk_rq_sectors(rq);
+ unsigned int nents;
/* Map the scatter list for DMA access */
+ nents = blk_rq_map_sg(hctx->queue, rq, command->sg);
nents = dma_map_sg(&dd->pdev->dev, command->sg, nents, dma_dir);
prefetch(&port->flags);
@@ -2233,10 +2197,11 @@ static void mtip_hw_submit_io(struct driver_data *dd, struct request *rq,
fis->device |= 1 << 7;
/* Populate the command header */
- command->command_header->opts =
- __force_bit2int cpu_to_le32(
- (nents << 16) | 5 | AHCI_CMD_PREFETCH);
- command->command_header->byte_count = 0;
+ hdr->ctba = cpu_to_le32(command->command_dma & 0xFFFFFFFF);
+ if (test_bit(MTIP_PF_HOST_CAP_64, &dd->port->flags))
+ hdr->ctbau = cpu_to_le32((command->command_dma >> 16) >> 16);
+ hdr->opts = cpu_to_le32((nents << 16) | 5 | AHCI_CMD_PREFETCH);
+ hdr->byte_count = 0;
command->direction = dma_dir;
@@ -2715,12 +2680,12 @@ static void mtip_softirq_done_fn(struct request *rq)
cmd->direction);
if (unlikely(cmd->unaligned))
- up(&dd->port->cmd_slot_unal);
+ atomic_inc(&dd->port->cmd_slot_unal);
blk_mq_end_request(rq, cmd->status);
}
-static void mtip_abort_cmd(struct request *req, void *data, bool reserved)
+static bool mtip_abort_cmd(struct request *req, void *data, bool reserved)
{
struct mtip_cmd *cmd = blk_mq_rq_to_pdu(req);
struct driver_data *dd = data;
@@ -2730,14 +2695,16 @@ static void mtip_abort_cmd(struct request *req, void *data, bool reserved)
clear_bit(req->tag, dd->port->cmds_to_issue);
cmd->status = BLK_STS_IOERR;
mtip_softirq_done_fn(req);
+ return true;
}
-static void mtip_queue_cmd(struct request *req, void *data, bool reserved)
+static bool mtip_queue_cmd(struct request *req, void *data, bool reserved)
{
struct driver_data *dd = data;
set_bit(req->tag, dd->port->cmds_to_issue);
blk_abort_request(req);
+ return true;
}
/*
@@ -2803,10 +2770,7 @@ restart_eh:
blk_mq_quiesce_queue(dd->queue);
- spin_lock(dd->queue->queue_lock);
- blk_mq_tagset_busy_iter(&dd->tags,
- mtip_queue_cmd, dd);
- spin_unlock(dd->queue->queue_lock);
+ blk_mq_tagset_busy_iter(&dd->tags, mtip_queue_cmd, dd);
set_bit(MTIP_PF_ISSUE_CMDS_BIT, &dd->port->flags);
@@ -3026,7 +2990,7 @@ static int mtip_hw_init(struct driver_data *dd)
else
dd->unal_qdepth = 0;
- sema_init(&dd->port->cmd_slot_unal, dd->unal_qdepth);
+ atomic_set(&dd->port->cmd_slot_unal, dd->unal_qdepth);
/* Spinlock to prevent concurrent issue */
for (i = 0; i < MTIP_MAX_SLOT_GROUPS; i++)
@@ -3531,58 +3495,24 @@ static inline bool is_se_active(struct driver_data *dd)
return false;
}
-/*
- * Block layer make request function.
- *
- * This function is called by the kernel to process a BIO for
- * the P320 device.
- *
- * @queue Pointer to the request queue. Unused other than to obtain
- * the driver data structure.
- * @rq Pointer to the request.
- *
- */
-static int mtip_submit_request(struct blk_mq_hw_ctx *hctx, struct request *rq)
+static inline bool is_stopped(struct driver_data *dd, struct request *rq)
{
- struct driver_data *dd = hctx->queue->queuedata;
- struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq);
- unsigned int nents;
-
- if (is_se_active(dd))
- return -ENODATA;
-
- if (unlikely(dd->dd_flag & MTIP_DDF_STOP_IO)) {
- if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
- &dd->dd_flag))) {
- return -ENXIO;
- }
- if (unlikely(test_bit(MTIP_DDF_OVER_TEMP_BIT, &dd->dd_flag))) {
- return -ENODATA;
- }
- if (unlikely(test_bit(MTIP_DDF_WRITE_PROTECT_BIT,
- &dd->dd_flag) &&
- rq_data_dir(rq))) {
- return -ENODATA;
- }
- if (unlikely(test_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag) ||
- test_bit(MTIP_DDF_REBUILD_FAILED_BIT, &dd->dd_flag)))
- return -ENODATA;
- }
-
- if (req_op(rq) == REQ_OP_DISCARD) {
- int err;
-
- err = mtip_send_trim(dd, blk_rq_pos(rq), blk_rq_sectors(rq));
- blk_mq_end_request(rq, err ? BLK_STS_IOERR : BLK_STS_OK);
- return 0;
- }
+ if (likely(!(dd->dd_flag & MTIP_DDF_STOP_IO)))
+ return false;
- /* Create the scatter list for this request. */
- nents = blk_rq_map_sg(hctx->queue, rq, cmd->sg);
+ if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag))
+ return true;
+ if (test_bit(MTIP_DDF_OVER_TEMP_BIT, &dd->dd_flag))
+ return true;
+ if (test_bit(MTIP_DDF_WRITE_PROTECT_BIT, &dd->dd_flag) &&
+ rq_data_dir(rq))
+ return true;
+ if (test_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag))
+ return true;
+ if (test_bit(MTIP_DDF_REBUILD_FAILED_BIT, &dd->dd_flag))
+ return true;
- /* Issue the read/write. */
- mtip_hw_submit_io(dd, rq, cmd, nents, hctx);
- return 0;
+ return false;
}
static bool mtip_check_unal_depth(struct blk_mq_hw_ctx *hctx,
@@ -3603,7 +3533,7 @@ static bool mtip_check_unal_depth(struct blk_mq_hw_ctx *hctx,
cmd->unaligned = 1;
}
- if (cmd->unaligned && down_trylock(&dd->port->cmd_slot_unal))
+ if (cmd->unaligned && atomic_dec_if_positive(&dd->port->cmd_slot_unal) >= 0)
return true;
return false;
@@ -3613,32 +3543,33 @@ static blk_status_t mtip_issue_reserved_cmd(struct blk_mq_hw_ctx *hctx,
struct request *rq)
{
struct driver_data *dd = hctx->queue->queuedata;
- struct mtip_int_cmd *icmd = rq->special;
struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq);
+ struct mtip_int_cmd *icmd = cmd->icmd;
+ struct mtip_cmd_hdr *hdr =
+ dd->port->command_list + sizeof(struct mtip_cmd_hdr) * rq->tag;
struct mtip_cmd_sg *command_sg;
if (mtip_commands_active(dd->port))
- return BLK_STS_RESOURCE;
+ return BLK_STS_DEV_RESOURCE;
+ hdr->ctba = cpu_to_le32(cmd->command_dma & 0xFFFFFFFF);
+ if (test_bit(MTIP_PF_HOST_CAP_64, &dd->port->flags))
+ hdr->ctbau = cpu_to_le32((cmd->command_dma >> 16) >> 16);
/* Populate the SG list */
- cmd->command_header->opts =
- __force_bit2int cpu_to_le32(icmd->opts | icmd->fis_len);
+ hdr->opts = cpu_to_le32(icmd->opts | icmd->fis_len);
if (icmd->buf_len) {
command_sg = cmd->command + AHCI_CMD_TBL_HDR_SZ;
- command_sg->info =
- __force_bit2int cpu_to_le32((icmd->buf_len-1) & 0x3FFFFF);
- command_sg->dba =
- __force_bit2int cpu_to_le32(icmd->buffer & 0xFFFFFFFF);
+ command_sg->info = cpu_to_le32((icmd->buf_len-1) & 0x3FFFFF);
+ command_sg->dba = cpu_to_le32(icmd->buffer & 0xFFFFFFFF);
command_sg->dba_upper =
- __force_bit2int cpu_to_le32((icmd->buffer >> 16) >> 16);
+ cpu_to_le32((icmd->buffer >> 16) >> 16);
- cmd->command_header->opts |=
- __force_bit2int cpu_to_le32((1 << 16));
+ hdr->opts |= cpu_to_le32((1 << 16));
}
/* Populate the command header */
- cmd->command_header->byte_count = 0;
+ hdr->byte_count = 0;
blk_mq_start_request(rq);
mtip_issue_non_ncq_command(dd->port, rq->tag);
@@ -3648,23 +3579,25 @@ static blk_status_t mtip_issue_reserved_cmd(struct blk_mq_hw_ctx *hctx,
static blk_status_t mtip_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
+ struct driver_data *dd = hctx->queue->queuedata;
struct request *rq = bd->rq;
- int ret;
-
- mtip_init_cmd_header(rq);
+ struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq);
if (blk_rq_is_passthrough(rq))
return mtip_issue_reserved_cmd(hctx, rq);
if (unlikely(mtip_check_unal_depth(hctx, rq)))
- return BLK_STS_RESOURCE;
+ return BLK_STS_DEV_RESOURCE;
+
+ if (is_se_active(dd) || is_stopped(dd, rq))
+ return BLK_STS_IOERR;
blk_mq_start_request(rq);
- ret = mtip_submit_request(hctx, rq);
- if (likely(!ret))
- return BLK_STS_OK;
- return BLK_STS_IOERR;
+ if (req_op(rq) == REQ_OP_DISCARD)
+ return mtip_send_trim(dd, blk_rq_pos(rq), blk_rq_sectors(rq));
+ mtip_hw_submit_io(dd, rq, cmd, hctx);
+ return BLK_STS_OK;
}
static void mtip_free_cmd(struct blk_mq_tag_set *set, struct request *rq,
@@ -3920,12 +3853,13 @@ protocol_init_error:
return rv;
}
-static void mtip_no_dev_cleanup(struct request *rq, void *data, bool reserv)
+static bool mtip_no_dev_cleanup(struct request *rq, void *data, bool reserv)
{
struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq);
cmd->status = BLK_STS_IOERR;
blk_mq_complete_request(rq);
+ return true;
}
/*
diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h
index e20e55dab443..abce25f27f57 100644
--- a/drivers/block/mtip32xx/mtip32xx.h
+++ b/drivers/block/mtip32xx/mtip32xx.h
@@ -126,8 +126,6 @@
#define MTIP_DFS_MAX_BUF_SIZE 1024
-#define __force_bit2int (unsigned int __force)
-
enum {
/* below are bit numbers in 'flags' defined in mtip_port */
MTIP_PF_IC_ACTIVE_BIT = 0, /* pio/ioctl */
@@ -174,10 +172,10 @@ enum {
struct smart_attr {
u8 attr_id;
- u16 flags;
+ __le16 flags;
u8 cur;
u8 worst;
- u32 data;
+ __le32 data;
u8 res[3];
} __packed;
@@ -200,9 +198,9 @@ struct mtip_work {
#define MTIP_MAX_TRIM_ENTRY_LEN 0xfff8
struct mtip_trim_entry {
- u32 lba; /* starting lba of region */
- u16 rsvd; /* unused */
- u16 range; /* # of 512b blocks to trim */
+ __le32 lba; /* starting lba of region */
+ __le16 rsvd; /* unused */
+ __le16 range; /* # of 512b blocks to trim */
} __packed;
struct mtip_trim {
@@ -278,24 +276,24 @@ struct mtip_cmd_hdr {
* - Bit 5 Unused in this implementation.
* - Bits 4:0 Length of the command FIS in DWords (DWord = 4 bytes).
*/
- unsigned int opts;
+ __le32 opts;
/* This field is unsed when using NCQ. */
union {
- unsigned int byte_count;
- unsigned int status;
+ __le32 byte_count;
+ __le32 status;
};
/*
* Lower 32 bits of the command table address associated with this
* header. The command table addresses must be 128 byte aligned.
*/
- unsigned int ctba;
+ __le32 ctba;
/*
* If 64 bit addressing is used this field is the upper 32 bits
* of the command table address associated with this command.
*/
- unsigned int ctbau;
+ __le32 ctbau;
/* Reserved and unused. */
- unsigned int res[4];
+ u32 res[4];
};
/* Command scatter gather structure (PRD). */
@@ -305,31 +303,28 @@ struct mtip_cmd_sg {
* address must be 8 byte aligned signified by bits 2:0 being
* set to 0.
*/
- unsigned int dba;
+ __le32 dba;
/*
* When 64 bit addressing is used this field is the upper
* 32 bits of the data buffer address.
*/
- unsigned int dba_upper;
+ __le32 dba_upper;
/* Unused. */
- unsigned int reserved;
+ __le32 reserved;
/*
* Bit 31: interrupt when this data block has been transferred.
* Bits 30..22: reserved
* Bits 21..0: byte count (minus 1). For P320 the byte count must be
* 8 byte aligned signified by bits 2:0 being set to 1.
*/
- unsigned int info;
+ __le32 info;
};
struct mtip_port;
+struct mtip_int_cmd;
+
/* Structure used to describe a command. */
struct mtip_cmd {
-
- struct mtip_cmd_hdr *command_header; /* ptr to command header entry */
-
- dma_addr_t command_header_dma; /* corresponding physical address */
-
void *command; /* ptr to command table entry */
dma_addr_t command_dma; /* corresponding physical address */
@@ -338,7 +333,10 @@ struct mtip_cmd {
int unaligned; /* command is unaligned on 4k boundary */
- struct scatterlist sg[MTIP_MAX_SG]; /* Scatter list entries */
+ union {
+ struct scatterlist sg[MTIP_MAX_SG]; /* Scatter list entries */
+ struct mtip_int_cmd *icmd;
+ };
int retries; /* The number of retries left for this command. */
@@ -435,8 +433,8 @@ struct mtip_port {
*/
unsigned long ic_pause_timer;
- /* Semaphore to control queue depth of unaligned IOs */
- struct semaphore cmd_slot_unal;
+ /* Counter to control queue depth of unaligned IOs */
+ atomic_t cmd_slot_unal;
/* Spinlock for working around command-issue bug. */
spinlock_t cmd_issue_lock[MTIP_MAX_SLOT_GROUPS];
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 4d4d6129ff66..08696f5f00bb 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -734,12 +734,13 @@ static void recv_work(struct work_struct *work)
kfree(args);
}
-static void nbd_clear_req(struct request *req, void *data, bool reserved)
+static bool nbd_clear_req(struct request *req, void *data, bool reserved)
{
struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req);
cmd->status = BLK_STS_IOERR;
blk_mq_complete_request(req);
+ return true;
}
static void nbd_clear_que(struct nbd_device *nbd)
diff --git a/drivers/block/null_blk.h b/drivers/block/null_blk.h
index 7685df43f1ef..b3df2793e7cd 100644
--- a/drivers/block/null_blk.h
+++ b/drivers/block/null_blk.h
@@ -49,6 +49,7 @@ struct nullb_device {
unsigned long completion_nsec; /* time in ns to complete a request */
unsigned long cache_size; /* disk cache size in MB */
unsigned long zone_size; /* zone size in MB if device is zoned */
+ unsigned int zone_nr_conv; /* number of conventional zones */
unsigned int submit_queues; /* number of submission queues */
unsigned int home_node; /* home node for the device */
unsigned int queue_mode; /* block interface */
diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c
index 09339203dfba..62c9654b9ce8 100644
--- a/drivers/block/null_blk_main.c
+++ b/drivers/block/null_blk_main.c
@@ -188,6 +188,10 @@ static unsigned long g_zone_size = 256;
module_param_named(zone_size, g_zone_size, ulong, S_IRUGO);
MODULE_PARM_DESC(zone_size, "Zone size in MB when block device is zoned. Must be power-of-two: Default: 256");
+static unsigned int g_zone_nr_conv;
+module_param_named(zone_nr_conv, g_zone_nr_conv, uint, 0444);
+MODULE_PARM_DESC(zone_nr_conv, "Number of conventional zones when block device is zoned. Default: 0");
+
static struct nullb_device *null_alloc_dev(void);
static void null_free_dev(struct nullb_device *dev);
static void null_del_dev(struct nullb *nullb);
@@ -293,6 +297,7 @@ NULLB_DEVICE_ATTR(mbps, uint);
NULLB_DEVICE_ATTR(cache_size, ulong);
NULLB_DEVICE_ATTR(zoned, bool);
NULLB_DEVICE_ATTR(zone_size, ulong);
+NULLB_DEVICE_ATTR(zone_nr_conv, uint);
static ssize_t nullb_device_power_show(struct config_item *item, char *page)
{
@@ -407,6 +412,7 @@ static struct configfs_attribute *nullb_device_attrs[] = {
&nullb_device_attr_badblocks,
&nullb_device_attr_zoned,
&nullb_device_attr_zone_size,
+ &nullb_device_attr_zone_nr_conv,
NULL,
};
@@ -520,6 +526,7 @@ static struct nullb_device *null_alloc_dev(void)
dev->use_per_node_hctx = g_use_per_node_hctx;
dev->zoned = g_zoned;
dev->zone_size = g_zone_size;
+ dev->zone_nr_conv = g_zone_nr_conv;
return dev;
}
@@ -635,14 +642,9 @@ static void null_cmd_end_timer(struct nullb_cmd *cmd)
hrtimer_start(&cmd->timer, kt, HRTIMER_MODE_REL);
}
-static void null_softirq_done_fn(struct request *rq)
+static void null_complete_rq(struct request *rq)
{
- struct nullb *nullb = rq->q->queuedata;
-
- if (nullb->dev->queue_mode == NULL_Q_MQ)
- end_cmd(blk_mq_rq_to_pdu(rq));
- else
- end_cmd(rq->special);
+ end_cmd(blk_mq_rq_to_pdu(rq));
}
static struct nullb_page *null_alloc_page(gfp_t gfp_flags)
@@ -1350,7 +1352,7 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
static const struct blk_mq_ops null_mq_ops = {
.queue_rq = null_queue_rq,
- .complete = null_softirq_done_fn,
+ .complete = null_complete_rq,
.timeout = null_timeout_rq,
};
@@ -1657,8 +1659,7 @@ static int null_add_dev(struct nullb_device *dev)
}
null_init_queues(nullb);
} else if (dev->queue_mode == NULL_Q_BIO) {
- nullb->q = blk_alloc_queue_node(GFP_KERNEL, dev->home_node,
- NULL);
+ nullb->q = blk_alloc_queue_node(GFP_KERNEL, dev->home_node);
if (!nullb->q) {
rv = -ENOMEM;
goto out_cleanup_queues;
diff --git a/drivers/block/null_blk_zoned.c b/drivers/block/null_blk_zoned.c
index c0b0e4a3fa8f..5d1c261a2cfd 100644
--- a/drivers/block/null_blk_zoned.c
+++ b/drivers/block/null_blk_zoned.c
@@ -29,7 +29,25 @@ int null_zone_init(struct nullb_device *dev)
if (!dev->zones)
return -ENOMEM;
- for (i = 0; i < dev->nr_zones; i++) {
+ if (dev->zone_nr_conv >= dev->nr_zones) {
+ dev->zone_nr_conv = dev->nr_zones - 1;
+ pr_info("null_blk: changed the number of conventional zones to %u",
+ dev->zone_nr_conv);
+ }
+
+ for (i = 0; i < dev->zone_nr_conv; i++) {
+ struct blk_zone *zone = &dev->zones[i];
+
+ zone->start = sector;
+ zone->len = dev->zone_size_sects;
+ zone->wp = zone->start + zone->len;
+ zone->type = BLK_ZONE_TYPE_CONVENTIONAL;
+ zone->cond = BLK_ZONE_COND_NOT_WP;
+
+ sector += dev->zone_size_sects;
+ }
+
+ for (i = dev->zone_nr_conv; i < dev->nr_zones; i++) {
struct blk_zone *zone = &dev->zones[i];
zone->start = zone->wp = sector;
@@ -98,6 +116,8 @@ void null_zone_write(struct nullb_cmd *cmd, sector_t sector,
if (zone->wp == zone->start + zone->len)
zone->cond = BLK_ZONE_COND_FULL;
break;
+ case BLK_ZONE_COND_NOT_WP:
+ break;
default:
/* Invalid zone condition */
cmd->error = BLK_STS_IOERR;
@@ -111,6 +131,11 @@ void null_zone_reset(struct nullb_cmd *cmd, sector_t sector)
unsigned int zno = null_zone_no(dev, sector);
struct blk_zone *zone = &dev->zones[zno];
+ if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) {
+ cmd->error = BLK_STS_IOERR;
+ return;
+ }
+
zone->cond = BLK_ZONE_COND_EMPTY;
zone->wp = zone->start;
}
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index ae4971e5d9a8..0ff9b12d0e35 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -242,6 +242,11 @@ struct pd_unit {
static struct pd_unit pd[PD_UNITS];
+struct pd_req {
+ /* for REQ_OP_DRV_IN: */
+ enum action (*func)(struct pd_unit *disk);
+};
+
static char pd_scratch[512]; /* scratch block buffer */
static char *pd_errs[17] = { "ERR", "INDEX", "ECC", "DRQ", "SEEK", "WRERR",
@@ -502,8 +507,9 @@ static enum action do_pd_io_start(void)
static enum action pd_special(void)
{
- enum action (*func)(struct pd_unit *) = pd_req->special;
- return func(pd_current);
+ struct pd_req *req = blk_mq_rq_to_pdu(pd_req);
+
+ return req->func(pd_current);
}
static int pd_next_buf(void)
@@ -767,12 +773,14 @@ static int pd_special_command(struct pd_unit *disk,
enum action (*func)(struct pd_unit *disk))
{
struct request *rq;
+ struct pd_req *req;
rq = blk_get_request(disk->gd->queue, REQ_OP_DRV_IN, 0);
if (IS_ERR(rq))
return PTR_ERR(rq);
+ req = blk_mq_rq_to_pdu(rq);
- rq->special = func;
+ req->func = func;
blk_execute_rq(disk->gd->queue, disk->gd, rq, 0);
blk_put_request(rq);
return 0;
@@ -892,9 +900,21 @@ static void pd_probe_drive(struct pd_unit *disk)
disk->gd = p;
p->private_data = disk;
- p->queue = blk_mq_init_sq_queue(&disk->tag_set, &pd_mq_ops, 2,
- BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING);
+ memset(&disk->tag_set, 0, sizeof(disk->tag_set));
+ disk->tag_set.ops = &pd_mq_ops;
+ disk->tag_set.cmd_size = sizeof(struct pd_req);
+ disk->tag_set.nr_hw_queues = 1;
+ disk->tag_set.nr_maps = 1;
+ disk->tag_set.queue_depth = 2;
+ disk->tag_set.numa_node = NUMA_NO_NODE;
+ disk->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING;
+
+ if (blk_mq_alloc_tag_set(&disk->tag_set))
+ return;
+
+ p->queue = blk_mq_init_queue(&disk->tag_set);
if (IS_ERR(p->queue)) {
+ blk_mq_free_tag_set(&disk->tag_set);
p->queue = NULL;
return;
}
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 9381f4e3b221..f5a71023f76c 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -2203,9 +2203,7 @@ static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write)
* Some CDRW drives can not handle writes larger than one packet,
* even if the size is a multiple of the packet size.
*/
- spin_lock_irq(q->queue_lock);
blk_queue_max_hw_sectors(q, pd->settings.size);
- spin_unlock_irq(q->queue_lock);
set_bit(PACKET_WRITABLE, &pd->flags);
} else {
pkt_set_speed(pd, MAX_SPEED, MAX_SPEED);
diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c
index 2459dcc04b1c..a10d5736d8f7 100644
--- a/drivers/block/skd_main.c
+++ b/drivers/block/skd_main.c
@@ -181,6 +181,7 @@ struct skd_request_context {
struct fit_completion_entry_v1 completion;
struct fit_comp_error_info err_info;
+ int retries;
blk_status_t status;
};
@@ -382,11 +383,12 @@ static void skd_log_skreq(struct skd_device *skdev,
* READ/WRITE REQUESTS
*****************************************************************************
*/
-static void skd_inc_in_flight(struct request *rq, void *data, bool reserved)
+static bool skd_inc_in_flight(struct request *rq, void *data, bool reserved)
{
int *count = data;
count++;
+ return true;
}
static int skd_in_flight(struct skd_device *skdev)
@@ -494,6 +496,11 @@ static blk_status_t skd_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
if (unlikely(skdev->state != SKD_DRVR_STATE_ONLINE))
return skd_fail_all(q) ? BLK_STS_IOERR : BLK_STS_RESOURCE;
+ if (!(req->rq_flags & RQF_DONTPREP)) {
+ skreq->retries = 0;
+ req->rq_flags |= RQF_DONTPREP;
+ }
+
blk_mq_start_request(req);
WARN_ONCE(tag >= skd_max_queue_depth, "%#x > %#x (nr_requests = %lu)\n",
@@ -1425,7 +1432,7 @@ static void skd_resolve_req_exception(struct skd_device *skdev,
break;
case SKD_CHECK_STATUS_REQUEUE_REQUEST:
- if ((unsigned long) ++req->special < SKD_MAX_RETRIES) {
+ if (++skreq->retries < SKD_MAX_RETRIES) {
skd_log_skreq(skdev, skreq, "retry");
blk_mq_requeue_request(req, true);
break;
@@ -1887,13 +1894,13 @@ static void skd_isr_fwstate(struct skd_device *skdev)
skd_skdev_state_to_str(skdev->state), skdev->state);
}
-static void skd_recover_request(struct request *req, void *data, bool reserved)
+static bool skd_recover_request(struct request *req, void *data, bool reserved)
{
struct skd_device *const skdev = data;
struct skd_request_context *skreq = blk_mq_rq_to_pdu(req);
if (skreq->state != SKD_REQ_STATE_BUSY)
- return;
+ return true;
skd_log_skreq(skdev, skreq, "recover");
@@ -1904,6 +1911,7 @@ static void skd_recover_request(struct request *req, void *data, bool reserved)
skreq->state = SKD_REQ_STATE_IDLE;
skreq->status = BLK_STS_IOERR;
blk_mq_complete_request(req);
+ return true;
}
static void skd_recover_requests(struct skd_device *skdev)
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index b54fa6726303..9c0553dd13e7 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -6,7 +6,7 @@
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/types.h>
-#include <linux/blkdev.h>
+#include <linux/blk-mq.h>
#include <linux/hdreg.h>
#include <linux/genhd.h>
#include <linux/cdrom.h>
@@ -45,6 +45,8 @@ MODULE_VERSION(DRV_MODULE_VERSION);
#define WAITING_FOR_GEN_CMD 0x04
#define WAITING_FOR_ANY -1
+#define VDC_MAX_RETRIES 10
+
static struct workqueue_struct *sunvdc_wq;
struct vdc_req_entry {
@@ -66,9 +68,10 @@ struct vdc_port {
u64 max_xfer_size;
u32 vdisk_block_size;
+ u32 drain;
u64 ldc_timeout;
- struct timer_list ldc_reset_timer;
+ struct delayed_work ldc_reset_timer_work;
struct work_struct ldc_reset_work;
/* The server fills these in for us in the disk attribute
@@ -80,12 +83,14 @@ struct vdc_port {
u8 vdisk_mtype;
u32 vdisk_phys_blksz;
+ struct blk_mq_tag_set tag_set;
+
char disk_name[32];
};
static void vdc_ldc_reset(struct vdc_port *port);
static void vdc_ldc_reset_work(struct work_struct *work);
-static void vdc_ldc_reset_timer(struct timer_list *t);
+static void vdc_ldc_reset_timer_work(struct work_struct *work);
static inline struct vdc_port *to_vdc_port(struct vio_driver_state *vio)
{
@@ -175,11 +180,8 @@ static void vdc_blk_queue_start(struct vdc_port *port)
* handshake completes, so check for initial handshake before we've
* allocated a disk.
*/
- if (port->disk && blk_queue_stopped(port->disk->queue) &&
- vdc_tx_dring_avail(dr) * 100 / VDC_TX_RING_SIZE >= 50) {
- blk_start_queue(port->disk->queue);
- }
-
+ if (port->disk && vdc_tx_dring_avail(dr) * 100 / VDC_TX_RING_SIZE >= 50)
+ blk_mq_start_hw_queues(port->disk->queue);
}
static void vdc_finish(struct vio_driver_state *vio, int err, int waiting_for)
@@ -197,7 +199,7 @@ static void vdc_handshake_complete(struct vio_driver_state *vio)
{
struct vdc_port *port = to_vdc_port(vio);
- del_timer(&port->ldc_reset_timer);
+ cancel_delayed_work(&port->ldc_reset_timer_work);
vdc_finish(vio, 0, WAITING_FOR_LINK_UP);
vdc_blk_queue_start(port);
}
@@ -320,7 +322,7 @@ static void vdc_end_one(struct vdc_port *port, struct vio_dring_state *dr,
rqe->req = NULL;
- __blk_end_request(req, (desc->status ? BLK_STS_IOERR : 0), desc->size);
+ blk_mq_end_request(req, desc->status ? BLK_STS_IOERR : 0);
vdc_blk_queue_start(port);
}
@@ -431,6 +433,7 @@ static int __vdc_tx_trigger(struct vdc_port *port)
.end_idx = dr->prod,
};
int err, delay;
+ int retries = 0;
hdr.seq = dr->snd_nxt;
delay = 1;
@@ -443,6 +446,8 @@ static int __vdc_tx_trigger(struct vdc_port *port)
udelay(delay);
if ((delay <<= 1) > 128)
delay = 128;
+ if (retries++ > VDC_MAX_RETRIES)
+ break;
} while (err == -EAGAIN);
if (err == -ENOTCONN)
@@ -525,29 +530,40 @@ static int __send_request(struct request *req)
return err;
}
-static void do_vdc_request(struct request_queue *rq)
+static blk_status_t vdc_queue_rq(struct blk_mq_hw_ctx *hctx,
+ const struct blk_mq_queue_data *bd)
{
- struct request *req;
+ struct vdc_port *port = hctx->queue->queuedata;
+ struct vio_dring_state *dr;
+ unsigned long flags;
- while ((req = blk_peek_request(rq)) != NULL) {
- struct vdc_port *port;
- struct vio_dring_state *dr;
+ dr = &port->vio.drings[VIO_DRIVER_TX_RING];
- port = req->rq_disk->private_data;
- dr = &port->vio.drings[VIO_DRIVER_TX_RING];
- if (unlikely(vdc_tx_dring_avail(dr) < 1))
- goto wait;
+ blk_mq_start_request(bd->rq);
- blk_start_request(req);
+ spin_lock_irqsave(&port->vio.lock, flags);
- if (__send_request(req) < 0) {
- blk_requeue_request(rq, req);
-wait:
- /* Avoid pointless unplugs. */
- blk_stop_queue(rq);
- break;
- }
+ /*
+ * Doing drain, just end the request in error
+ */
+ if (unlikely(port->drain)) {
+ spin_unlock_irqrestore(&port->vio.lock, flags);
+ return BLK_STS_IOERR;
+ }
+
+ if (unlikely(vdc_tx_dring_avail(dr) < 1)) {
+ spin_unlock_irqrestore(&port->vio.lock, flags);
+ blk_mq_stop_hw_queue(hctx);
+ return BLK_STS_DEV_RESOURCE;
+ }
+
+ if (__send_request(bd->rq) < 0) {
+ spin_unlock_irqrestore(&port->vio.lock, flags);
+ return BLK_STS_IOERR;
}
+
+ spin_unlock_irqrestore(&port->vio.lock, flags);
+ return BLK_STS_OK;
}
static int generic_request(struct vdc_port *port, u8 op, void *buf, int len)
@@ -759,6 +775,31 @@ static void vdc_port_down(struct vdc_port *port)
vio_ldc_free(&port->vio);
}
+static const struct blk_mq_ops vdc_mq_ops = {
+ .queue_rq = vdc_queue_rq,
+};
+
+static void cleanup_queue(struct request_queue *q)
+{
+ struct vdc_port *port = q->queuedata;
+
+ blk_cleanup_queue(q);
+ blk_mq_free_tag_set(&port->tag_set);
+}
+
+static struct request_queue *init_queue(struct vdc_port *port)
+{
+ struct request_queue *q;
+
+ q = blk_mq_init_sq_queue(&port->tag_set, &vdc_mq_ops, VDC_TX_RING_SIZE,
+ BLK_MQ_F_SHOULD_MERGE);
+ if (IS_ERR(q))
+ return q;
+
+ q->queuedata = port;
+ return q;
+}
+
static int probe_disk(struct vdc_port *port)
{
struct request_queue *q;
@@ -796,17 +837,17 @@ static int probe_disk(struct vdc_port *port)
(u64)geom.num_sec);
}
- q = blk_init_queue(do_vdc_request, &port->vio.lock);
- if (!q) {
+ q = init_queue(port);
+ if (IS_ERR(q)) {
printk(KERN_ERR PFX "%s: Could not allocate queue.\n",
port->vio.name);
- return -ENOMEM;
+ return PTR_ERR(q);
}
g = alloc_disk(1 << PARTITION_SHIFT);
if (!g) {
printk(KERN_ERR PFX "%s: Could not allocate gendisk.\n",
port->vio.name);
- blk_cleanup_queue(q);
+ cleanup_queue(q);
return -ENOMEM;
}
@@ -981,7 +1022,7 @@ static int vdc_port_probe(struct vio_dev *vdev, const struct vio_device_id *id)
*/
ldc_timeout = mdesc_get_property(hp, vdev->mp, "vdc-timeout", NULL);
port->ldc_timeout = ldc_timeout ? *ldc_timeout : 0;
- timer_setup(&port->ldc_reset_timer, vdc_ldc_reset_timer, 0);
+ INIT_DELAYED_WORK(&port->ldc_reset_timer_work, vdc_ldc_reset_timer_work);
INIT_WORK(&port->ldc_reset_work, vdc_ldc_reset_work);
err = vio_driver_init(&port->vio, vdev, VDEV_DISK,
@@ -1034,18 +1075,14 @@ static int vdc_port_remove(struct vio_dev *vdev)
struct vdc_port *port = dev_get_drvdata(&vdev->dev);
if (port) {
- unsigned long flags;
-
- spin_lock_irqsave(&port->vio.lock, flags);
- blk_stop_queue(port->disk->queue);
- spin_unlock_irqrestore(&port->vio.lock, flags);
+ blk_mq_stop_hw_queues(port->disk->queue);
flush_work(&port->ldc_reset_work);
- del_timer_sync(&port->ldc_reset_timer);
+ cancel_delayed_work_sync(&port->ldc_reset_timer_work);
del_timer_sync(&port->vio.timer);
del_gendisk(port->disk);
- blk_cleanup_queue(port->disk->queue);
+ cleanup_queue(port->disk->queue);
put_disk(port->disk);
port->disk = NULL;
@@ -1080,32 +1117,46 @@ static void vdc_requeue_inflight(struct vdc_port *port)
}
rqe->req = NULL;
- blk_requeue_request(port->disk->queue, req);
+ blk_mq_requeue_request(req, false);
}
}
static void vdc_queue_drain(struct vdc_port *port)
{
- struct request *req;
+ struct request_queue *q = port->disk->queue;
+
+ /*
+ * Mark the queue as draining, then freeze/quiesce to ensure
+ * that all existing requests are seen in ->queue_rq() and killed
+ */
+ port->drain = 1;
+ spin_unlock_irq(&port->vio.lock);
- while ((req = blk_fetch_request(port->disk->queue)) != NULL)
- __blk_end_request_all(req, BLK_STS_IOERR);
+ blk_mq_freeze_queue(q);
+ blk_mq_quiesce_queue(q);
+
+ spin_lock_irq(&port->vio.lock);
+ port->drain = 0;
+ blk_mq_unquiesce_queue(q);
+ blk_mq_unfreeze_queue(q);
}
-static void vdc_ldc_reset_timer(struct timer_list *t)
+static void vdc_ldc_reset_timer_work(struct work_struct *work)
{
- struct vdc_port *port = from_timer(port, t, ldc_reset_timer);
- struct vio_driver_state *vio = &port->vio;
- unsigned long flags;
+ struct vdc_port *port;
+ struct vio_driver_state *vio;
- spin_lock_irqsave(&vio->lock, flags);
+ port = container_of(work, struct vdc_port, ldc_reset_timer_work.work);
+ vio = &port->vio;
+
+ spin_lock_irq(&vio->lock);
if (!(port->vio.hs_state & VIO_HS_COMPLETE)) {
pr_warn(PFX "%s ldc down %llu seconds, draining queue\n",
port->disk_name, port->ldc_timeout);
vdc_queue_drain(port);
vdc_blk_queue_start(port);
}
- spin_unlock_irqrestore(&vio->lock, flags);
+ spin_unlock_irq(&vio->lock);
}
static void vdc_ldc_reset_work(struct work_struct *work)
@@ -1129,7 +1180,7 @@ static void vdc_ldc_reset(struct vdc_port *port)
assert_spin_locked(&port->vio.lock);
pr_warn(PFX "%s ldc link reset\n", port->disk_name);
- blk_stop_queue(port->disk->queue);
+ blk_mq_stop_hw_queues(port->disk->queue);
vdc_requeue_inflight(port);
vdc_port_down(port);
@@ -1146,7 +1197,7 @@ static void vdc_ldc_reset(struct vdc_port *port)
}
if (port->ldc_timeout)
- mod_timer(&port->ldc_reset_timer,
+ mod_delayed_work(system_wq, &port->ldc_reset_timer_work,
round_jiffies(jiffies + HZ * port->ldc_timeout));
mod_timer(&port->vio.timer, round_jiffies(jiffies + HZ));
return;
diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c
index 064b8c5c7a32..4478eb7efee0 100644
--- a/drivers/block/sx8.c
+++ b/drivers/block/sx8.c
@@ -243,7 +243,6 @@ struct carm_port {
unsigned int port_no;
struct gendisk *disk;
struct carm_host *host;
- struct blk_mq_tag_set tag_set;
/* attached device characteristics */
u64 capacity;
@@ -254,13 +253,10 @@ struct carm_port {
};
struct carm_request {
- unsigned int tag;
int n_elem;
unsigned int msg_type;
unsigned int msg_subtype;
unsigned int msg_bucket;
- struct request *rq;
- struct carm_port *port;
struct scatterlist sg[CARM_MAX_REQ_SG];
};
@@ -291,9 +287,6 @@ struct carm_host {
unsigned int wait_q_cons;
struct request_queue *wait_q[CARM_MAX_WAIT_Q];
- unsigned int n_msgs;
- u64 msg_alloc;
- struct carm_request req[CARM_MAX_REQ];
void *msg_base;
dma_addr_t msg_dma;
@@ -478,10 +471,10 @@ static inline dma_addr_t carm_ref_msg_dma(struct carm_host *host,
}
static int carm_send_msg(struct carm_host *host,
- struct carm_request *crq)
+ struct carm_request *crq, unsigned tag)
{
void __iomem *mmio = host->mmio;
- u32 msg = (u32) carm_ref_msg_dma(host, crq->tag);
+ u32 msg = (u32) carm_ref_msg_dma(host, tag);
u32 cm_bucket = crq->msg_bucket;
u32 tmp;
int rc = 0;
@@ -506,99 +499,24 @@ static int carm_send_msg(struct carm_host *host,
return rc;
}
-static struct carm_request *carm_get_request(struct carm_host *host)
-{
- unsigned int i;
-
- /* obey global hardware limit on S/G entries */
- if (host->hw_sg_used >= (CARM_MAX_HOST_SG - CARM_MAX_REQ_SG))
- return NULL;
-
- for (i = 0; i < max_queue; i++)
- if ((host->msg_alloc & (1ULL << i)) == 0) {
- struct carm_request *crq = &host->req[i];
- crq->port = NULL;
- crq->n_elem = 0;
-
- host->msg_alloc |= (1ULL << i);
- host->n_msgs++;
-
- assert(host->n_msgs <= CARM_MAX_REQ);
- sg_init_table(crq->sg, CARM_MAX_REQ_SG);
- return crq;
- }
-
- DPRINTK("no request available, returning NULL\n");
- return NULL;
-}
-
-static int carm_put_request(struct carm_host *host, struct carm_request *crq)
-{
- assert(crq->tag < max_queue);
-
- if (unlikely((host->msg_alloc & (1ULL << crq->tag)) == 0))
- return -EINVAL; /* tried to clear a tag that was not active */
-
- assert(host->hw_sg_used >= crq->n_elem);
-
- host->msg_alloc &= ~(1ULL << crq->tag);
- host->hw_sg_used -= crq->n_elem;
- host->n_msgs--;
-
- return 0;
-}
-
-static struct carm_request *carm_get_special(struct carm_host *host)
-{
- unsigned long flags;
- struct carm_request *crq = NULL;
- struct request *rq;
- int tries = 5000;
-
- while (tries-- > 0) {
- spin_lock_irqsave(&host->lock, flags);
- crq = carm_get_request(host);
- spin_unlock_irqrestore(&host->lock, flags);
-
- if (crq)
- break;
- msleep(10);
- }
-
- if (!crq)
- return NULL;
-
- rq = blk_get_request(host->oob_q, REQ_OP_DRV_OUT, 0);
- if (IS_ERR(rq)) {
- spin_lock_irqsave(&host->lock, flags);
- carm_put_request(host, crq);
- spin_unlock_irqrestore(&host->lock, flags);
- return NULL;
- }
-
- crq->rq = rq;
- return crq;
-}
-
static int carm_array_info (struct carm_host *host, unsigned int array_idx)
{
struct carm_msg_ioctl *ioc;
- unsigned int idx;
u32 msg_data;
dma_addr_t msg_dma;
struct carm_request *crq;
+ struct request *rq;
int rc;
- crq = carm_get_special(host);
- if (!crq) {
+ rq = blk_mq_alloc_request(host->oob_q, REQ_OP_DRV_OUT, 0);
+ if (IS_ERR(rq)) {
rc = -ENOMEM;
goto err_out;
}
+ crq = blk_mq_rq_to_pdu(rq);
- idx = crq->tag;
-
- ioc = carm_ref_msg(host, idx);
- msg_dma = carm_ref_msg_dma(host, idx);
+ ioc = carm_ref_msg(host, rq->tag);
+ msg_dma = carm_ref_msg_dma(host, rq->tag);
msg_data = (u32) (msg_dma + sizeof(struct carm_array_info));
crq->msg_type = CARM_MSG_ARRAY;
@@ -612,7 +530,7 @@ static int carm_array_info (struct carm_host *host, unsigned int array_idx)
ioc->type = CARM_MSG_ARRAY;
ioc->subtype = CARM_ARRAY_INFO;
ioc->array_id = (u8) array_idx;
- ioc->handle = cpu_to_le32(TAG_ENCODE(idx));
+ ioc->handle = cpu_to_le32(TAG_ENCODE(rq->tag));
ioc->data_addr = cpu_to_le32(msg_data);
spin_lock_irq(&host->lock);
@@ -620,9 +538,8 @@ static int carm_array_info (struct carm_host *host, unsigned int array_idx)
host->state == HST_DEV_SCAN);
spin_unlock_irq(&host->lock);
- DPRINTK("blk_execute_rq_nowait, tag == %u\n", idx);
- crq->rq->special = crq;
- blk_execute_rq_nowait(host->oob_q, NULL, crq->rq, true, NULL);
+ DPRINTK("blk_execute_rq_nowait, tag == %u\n", rq->tag);
+ blk_execute_rq_nowait(host->oob_q, NULL, rq, true, NULL);
return 0;
@@ -637,21 +554,21 @@ typedef unsigned int (*carm_sspc_t)(struct carm_host *, unsigned int, void *);
static int carm_send_special (struct carm_host *host, carm_sspc_t func)
{
+ struct request *rq;
struct carm_request *crq;
struct carm_msg_ioctl *ioc;
void *mem;
- unsigned int idx, msg_size;
+ unsigned int msg_size;
int rc;
- crq = carm_get_special(host);
- if (!crq)
+ rq = blk_mq_alloc_request(host->oob_q, REQ_OP_DRV_OUT, 0);
+ if (IS_ERR(rq))
return -ENOMEM;
+ crq = blk_mq_rq_to_pdu(rq);
- idx = crq->tag;
+ mem = carm_ref_msg(host, rq->tag);
- mem = carm_ref_msg(host, idx);
-
- msg_size = func(host, idx, mem);
+ msg_size = func(host, rq->tag, mem);
ioc = mem;
crq->msg_type = ioc->type;
@@ -660,9 +577,8 @@ static int carm_send_special (struct carm_host *host, carm_sspc_t func)
BUG_ON(rc < 0);
crq->msg_bucket = (u32) rc;
- DPRINTK("blk_execute_rq_nowait, tag == %u\n", idx);
- crq->rq->special = crq;
- blk_execute_rq_nowait(host->oob_q, NULL, crq->rq, true, NULL);
+ DPRINTK("blk_execute_rq_nowait, tag == %u\n", rq->tag);
+ blk_execute_rq_nowait(host->oob_q, NULL, rq, true, NULL);
return 0;
}
@@ -744,19 +660,6 @@ static unsigned int carm_fill_get_fw_ver(struct carm_host *host,
sizeof(struct carm_fw_ver);
}
-static inline void carm_end_request_queued(struct carm_host *host,
- struct carm_request *crq,
- blk_status_t error)
-{
- struct request *req = crq->rq;
- int rc;
-
- blk_mq_end_request(req, error);
-
- rc = carm_put_request(host, crq);
- assert(rc == 0);
-}
-
static inline void carm_push_q (struct carm_host *host, struct request_queue *q)
{
unsigned int idx = host->wait_q_prod % CARM_MAX_WAIT_Q;
@@ -791,101 +694,50 @@ static inline void carm_round_robin(struct carm_host *host)
}
}
-static inline void carm_end_rq(struct carm_host *host, struct carm_request *crq,
- blk_status_t error)
-{
- carm_end_request_queued(host, crq, error);
- if (max_queue == 1)
- carm_round_robin(host);
- else if ((host->n_msgs <= CARM_MSG_LOW_WATER) &&
- (host->hw_sg_used <= CARM_SG_LOW_WATER)) {
- carm_round_robin(host);
- }
-}
-
-static blk_status_t carm_oob_queue_rq(struct blk_mq_hw_ctx *hctx,
- const struct blk_mq_queue_data *bd)
+static inline enum dma_data_direction carm_rq_dir(struct request *rq)
{
- struct request_queue *q = hctx->queue;
- struct carm_host *host = q->queuedata;
- struct carm_request *crq;
- int rc;
-
- blk_mq_start_request(bd->rq);
-
- spin_lock_irq(&host->lock);
-
- crq = bd->rq->special;
- assert(crq != NULL);
- assert(crq->rq == bd->rq);
-
- crq->n_elem = 0;
-
- DPRINTK("send req\n");
- rc = carm_send_msg(host, crq);
- if (rc) {
- carm_push_q(host, q);
- spin_unlock_irq(&host->lock);
- return BLK_STS_DEV_RESOURCE;
- }
-
- spin_unlock_irq(&host->lock);
- return BLK_STS_OK;
+ return op_is_write(req_op(rq)) ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
}
static blk_status_t carm_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
struct request_queue *q = hctx->queue;
+ struct request *rq = bd->rq;
struct carm_port *port = q->queuedata;
struct carm_host *host = port->host;
+ struct carm_request *crq = blk_mq_rq_to_pdu(rq);
struct carm_msg_rw *msg;
- struct carm_request *crq;
- struct request *rq = bd->rq;
struct scatterlist *sg;
- int writing = 0, pci_dir, i, n_elem, rc;
- u32 tmp;
+ int i, n_elem = 0, rc;
unsigned int msg_size;
+ u32 tmp;
+
+ crq->n_elem = 0;
+ sg_init_table(crq->sg, CARM_MAX_REQ_SG);
blk_mq_start_request(rq);
spin_lock_irq(&host->lock);
-
- crq = carm_get_request(host);
- if (!crq) {
- carm_push_q(host, q);
- spin_unlock_irq(&host->lock);
- return BLK_STS_DEV_RESOURCE;
- }
- crq->rq = rq;
-
- if (rq_data_dir(rq) == WRITE) {
- writing = 1;
- pci_dir = DMA_TO_DEVICE;
- } else {
- pci_dir = DMA_FROM_DEVICE;
- }
+ if (req_op(rq) == REQ_OP_DRV_OUT)
+ goto send_msg;
/* get scatterlist from block layer */
sg = &crq->sg[0];
n_elem = blk_rq_map_sg(q, rq, sg);
- if (n_elem <= 0) {
- /* request with no s/g entries? */
- carm_end_rq(host, crq, BLK_STS_IOERR);
- spin_unlock_irq(&host->lock);
- return BLK_STS_IOERR;
- }
+ if (n_elem <= 0)
+ goto out_ioerr;
/* map scatterlist to PCI bus addresses */
- n_elem = dma_map_sg(&host->pdev->dev, sg, n_elem, pci_dir);
- if (n_elem <= 0) {
- /* request with no s/g entries? */
- carm_end_rq(host, crq, BLK_STS_IOERR);
- spin_unlock_irq(&host->lock);
- return BLK_STS_IOERR;
- }
+ n_elem = dma_map_sg(&host->pdev->dev, sg, n_elem, carm_rq_dir(rq));
+ if (n_elem <= 0)
+ goto out_ioerr;
+
+ /* obey global hardware limit on S/G entries */
+ if (host->hw_sg_used >= CARM_MAX_HOST_SG - n_elem)
+ goto out_resource;
+
crq->n_elem = n_elem;
- crq->port = port;
host->hw_sg_used += n_elem;
/*
@@ -893,9 +745,9 @@ static blk_status_t carm_queue_rq(struct blk_mq_hw_ctx *hctx,
*/
VPRINTK("build msg\n");
- msg = (struct carm_msg_rw *) carm_ref_msg(host, crq->tag);
+ msg = (struct carm_msg_rw *) carm_ref_msg(host, rq->tag);
- if (writing) {
+ if (rq_data_dir(rq) == WRITE) {
msg->type = CARM_MSG_WRITE;
crq->msg_type = CARM_MSG_WRITE;
} else {
@@ -906,7 +758,7 @@ static blk_status_t carm_queue_rq(struct blk_mq_hw_ctx *hctx,
msg->id = port->port_no;
msg->sg_count = n_elem;
msg->sg_type = SGT_32BIT;
- msg->handle = cpu_to_le32(TAG_ENCODE(crq->tag));
+ msg->handle = cpu_to_le32(TAG_ENCODE(rq->tag));
msg->lba = cpu_to_le32(blk_rq_pos(rq) & 0xffffffff);
tmp = (blk_rq_pos(rq) >> 16) >> 16;
msg->lba_high = cpu_to_le16( (u16) tmp );
@@ -923,22 +775,28 @@ static blk_status_t carm_queue_rq(struct blk_mq_hw_ctx *hctx,
rc = carm_lookup_bucket(msg_size);
BUG_ON(rc < 0);
crq->msg_bucket = (u32) rc;
-
+send_msg:
/*
* queue read/write message to hardware
*/
-
- VPRINTK("send msg, tag == %u\n", crq->tag);
- rc = carm_send_msg(host, crq);
+ VPRINTK("send msg, tag == %u\n", rq->tag);
+ rc = carm_send_msg(host, crq, rq->tag);
if (rc) {
- carm_put_request(host, crq);
- carm_push_q(host, q);
- spin_unlock_irq(&host->lock);
- return BLK_STS_DEV_RESOURCE;
+ host->hw_sg_used -= n_elem;
+ goto out_resource;
}
spin_unlock_irq(&host->lock);
return BLK_STS_OK;
+out_resource:
+ dma_unmap_sg(&host->pdev->dev, &crq->sg[0], n_elem, carm_rq_dir(rq));
+ carm_push_q(host, q);
+ spin_unlock_irq(&host->lock);
+ return BLK_STS_DEV_RESOURCE;
+out_ioerr:
+ carm_round_robin(host);
+ spin_unlock_irq(&host->lock);
+ return BLK_STS_IOERR;
}
static void carm_handle_array_info(struct carm_host *host,
@@ -954,8 +812,6 @@ static void carm_handle_array_info(struct carm_host *host,
DPRINTK("ENTER\n");
- carm_end_rq(host, crq, error);
-
if (error)
goto out;
if (le32_to_cpu(desc->array_status) & ARRAY_NO_EXIST)
@@ -1011,8 +867,6 @@ static void carm_handle_scan_chan(struct carm_host *host,
DPRINTK("ENTER\n");
- carm_end_rq(host, crq, error);
-
if (error) {
new_state = HST_ERROR;
goto out;
@@ -1040,8 +894,6 @@ static void carm_handle_generic(struct carm_host *host,
{
DPRINTK("ENTER\n");
- carm_end_rq(host, crq, error);
-
assert(host->state == cur_state);
if (error)
host->state = HST_ERROR;
@@ -1050,28 +902,12 @@ static void carm_handle_generic(struct carm_host *host,
schedule_work(&host->fsm_task);
}
-static inline void carm_handle_rw(struct carm_host *host,
- struct carm_request *crq, blk_status_t error)
-{
- int pci_dir;
-
- VPRINTK("ENTER\n");
-
- if (rq_data_dir(crq->rq) == WRITE)
- pci_dir = DMA_TO_DEVICE;
- else
- pci_dir = DMA_FROM_DEVICE;
-
- dma_unmap_sg(&host->pdev->dev, &crq->sg[0], crq->n_elem, pci_dir);
-
- carm_end_rq(host, crq, error);
-}
-
static inline void carm_handle_resp(struct carm_host *host,
__le32 ret_handle_le, u32 status)
{
u32 handle = le32_to_cpu(ret_handle_le);
unsigned int msg_idx;
+ struct request *rq;
struct carm_request *crq;
blk_status_t error = (status == RMSG_OK) ? 0 : BLK_STS_IOERR;
u8 *mem;
@@ -1087,13 +923,15 @@ static inline void carm_handle_resp(struct carm_host *host,
msg_idx = TAG_DECODE(handle);
VPRINTK("tag == %u\n", msg_idx);
- crq = &host->req[msg_idx];
+ rq = blk_mq_tag_to_rq(host->tag_set.tags[0], msg_idx);
+ crq = blk_mq_rq_to_pdu(rq);
/* fast path */
if (likely(crq->msg_type == CARM_MSG_READ ||
crq->msg_type == CARM_MSG_WRITE)) {
- carm_handle_rw(host, crq, error);
- return;
+ dma_unmap_sg(&host->pdev->dev, &crq->sg[0], crq->n_elem,
+ carm_rq_dir(rq));
+ goto done;
}
mem = carm_ref_msg(host, msg_idx);
@@ -1103,7 +941,7 @@ static inline void carm_handle_resp(struct carm_host *host,
switch (crq->msg_subtype) {
case CARM_IOC_SCAN_CHAN:
carm_handle_scan_chan(host, crq, mem, error);
- break;
+ goto done;
default:
/* unknown / invalid response */
goto err_out;
@@ -1116,11 +954,11 @@ static inline void carm_handle_resp(struct carm_host *host,
case MISC_ALLOC_MEM:
carm_handle_generic(host, crq, error,
HST_ALLOC_BUF, HST_SYNC_TIME);
- break;
+ goto done;
case MISC_SET_TIME:
carm_handle_generic(host, crq, error,
HST_SYNC_TIME, HST_GET_FW_VER);
- break;
+ goto done;
case MISC_GET_FW_VER: {
struct carm_fw_ver *ver = (struct carm_fw_ver *)
(mem + sizeof(struct carm_msg_get_fw_ver));
@@ -1130,7 +968,7 @@ static inline void carm_handle_resp(struct carm_host *host,
}
carm_handle_generic(host, crq, error,
HST_GET_FW_VER, HST_PORT_SCAN);
- break;
+ goto done;
}
default:
/* unknown / invalid response */
@@ -1161,7 +999,13 @@ static inline void carm_handle_resp(struct carm_host *host,
err_out:
printk(KERN_WARNING DRV_NAME "(%s): BUG: unhandled message type %d/%d\n",
pci_name(host->pdev), crq->msg_type, crq->msg_subtype);
- carm_end_rq(host, crq, BLK_STS_IOERR);
+ error = BLK_STS_IOERR;
+done:
+ host->hw_sg_used -= crq->n_elem;
+ blk_mq_end_request(blk_mq_rq_from_pdu(crq), error);
+
+ if (host->hw_sg_used <= CARM_SG_LOW_WATER)
+ carm_round_robin(host);
}
static inline void carm_handle_responses(struct carm_host *host)
@@ -1491,78 +1335,56 @@ static int carm_init_host(struct carm_host *host)
return 0;
}
-static const struct blk_mq_ops carm_oob_mq_ops = {
- .queue_rq = carm_oob_queue_rq,
-};
-
static const struct blk_mq_ops carm_mq_ops = {
.queue_rq = carm_queue_rq,
};
-static int carm_init_disks(struct carm_host *host)
+static int carm_init_disk(struct carm_host *host, unsigned int port_no)
{
- unsigned int i;
- int rc = 0;
+ struct carm_port *port = &host->port[port_no];
+ struct gendisk *disk;
+ struct request_queue *q;
- for (i = 0; i < CARM_MAX_PORTS; i++) {
- struct gendisk *disk;
- struct request_queue *q;
- struct carm_port *port;
+ port->host = host;
+ port->port_no = port_no;
- port = &host->port[i];
- port->host = host;
- port->port_no = i;
+ disk = alloc_disk(CARM_MINORS_PER_MAJOR);
+ if (!disk)
+ return -ENOMEM;
- disk = alloc_disk(CARM_MINORS_PER_MAJOR);
- if (!disk) {
- rc = -ENOMEM;
- break;
- }
+ port->disk = disk;
+ sprintf(disk->disk_name, DRV_NAME "/%u",
+ (unsigned int)host->id * CARM_MAX_PORTS + port_no);
+ disk->major = host->major;
+ disk->first_minor = port_no * CARM_MINORS_PER_MAJOR;
+ disk->fops = &carm_bd_ops;
+ disk->private_data = port;
- port->disk = disk;
- sprintf(disk->disk_name, DRV_NAME "/%u",
- (unsigned int) (host->id * CARM_MAX_PORTS) + i);
- disk->major = host->major;
- disk->first_minor = i * CARM_MINORS_PER_MAJOR;
- disk->fops = &carm_bd_ops;
- disk->private_data = port;
-
- q = blk_mq_init_sq_queue(&port->tag_set, &carm_mq_ops,
- max_queue, BLK_MQ_F_SHOULD_MERGE);
- if (IS_ERR(q)) {
- rc = PTR_ERR(q);
- break;
- }
- disk->queue = q;
- blk_queue_max_segments(q, CARM_MAX_REQ_SG);
- blk_queue_segment_boundary(q, CARM_SG_BOUNDARY);
+ q = blk_mq_init_queue(&host->tag_set);
+ if (IS_ERR(q))
+ return PTR_ERR(q);
- q->queuedata = port;
- }
+ blk_queue_max_segments(q, CARM_MAX_REQ_SG);
+ blk_queue_segment_boundary(q, CARM_SG_BOUNDARY);
- return rc;
+ q->queuedata = port;
+ disk->queue = q;
+ return 0;
}
-static void carm_free_disks(struct carm_host *host)
+static void carm_free_disk(struct carm_host *host, unsigned int port_no)
{
- unsigned int i;
-
- for (i = 0; i < CARM_MAX_PORTS; i++) {
- struct carm_port *port = &host->port[i];
- struct gendisk *disk = port->disk;
+ struct carm_port *port = &host->port[port_no];
+ struct gendisk *disk = port->disk;
- if (disk) {
- struct request_queue *q = disk->queue;
+ if (!disk)
+ return;
- if (disk->flags & GENHD_FL_UP)
- del_gendisk(disk);
- if (q) {
- blk_mq_free_tag_set(&port->tag_set);
- blk_cleanup_queue(q);
- }
- put_disk(disk);
- }
- }
+ if (disk->flags & GENHD_FL_UP)
+ del_gendisk(disk);
+ if (disk->queue)
+ blk_cleanup_queue(disk->queue);
+ put_disk(disk);
}
static int carm_init_shm(struct carm_host *host)
@@ -1618,9 +1440,6 @@ static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
INIT_WORK(&host->fsm_task, carm_fsm_task);
init_completion(&host->probe_comp);
- for (i = 0; i < ARRAY_SIZE(host->req); i++)
- host->req[i].tag = i;
-
host->mmio = ioremap(pci_resource_start(pdev, 0),
pci_resource_len(pdev, 0));
if (!host->mmio) {
@@ -1637,14 +1456,26 @@ static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
goto err_out_iounmap;
}
- q = blk_mq_init_sq_queue(&host->tag_set, &carm_oob_mq_ops, 1,
- BLK_MQ_F_NO_SCHED);
+ memset(&host->tag_set, 0, sizeof(host->tag_set));
+ host->tag_set.ops = &carm_mq_ops;
+ host->tag_set.cmd_size = sizeof(struct carm_request);
+ host->tag_set.nr_hw_queues = 1;
+ host->tag_set.nr_maps = 1;
+ host->tag_set.queue_depth = max_queue;
+ host->tag_set.numa_node = NUMA_NO_NODE;
+ host->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
+
+ rc = blk_mq_alloc_tag_set(&host->tag_set);
+ if (rc)
+ goto err_out_dma_free;
+
+ q = blk_mq_init_queue(&host->tag_set);
if (IS_ERR(q)) {
- printk(KERN_ERR DRV_NAME "(%s): OOB queue alloc failure\n",
- pci_name(pdev));
rc = PTR_ERR(q);
+ blk_mq_free_tag_set(&host->tag_set);
goto err_out_dma_free;
}
+
host->oob_q = q;
q->queuedata = host;
@@ -1667,9 +1498,11 @@ static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
if (host->flags & FL_DYN_MAJOR)
host->major = rc;
- rc = carm_init_disks(host);
- if (rc)
- goto err_out_blkdev_disks;
+ for (i = 0; i < CARM_MAX_PORTS; i++) {
+ rc = carm_init_disk(host, i);
+ if (rc)
+ goto err_out_blkdev_disks;
+ }
pci_set_master(pdev);
@@ -1699,7 +1532,8 @@ static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
err_out_free_irq:
free_irq(pdev->irq, host);
err_out_blkdev_disks:
- carm_free_disks(host);
+ for (i = 0; i < CARM_MAX_PORTS; i++)
+ carm_free_disk(host, i);
unregister_blkdev(host->major, host->name);
err_out_free_majors:
if (host->major == 160)
@@ -1724,6 +1558,7 @@ err_out:
static void carm_remove_one (struct pci_dev *pdev)
{
struct carm_host *host = pci_get_drvdata(pdev);
+ unsigned int i;
if (!host) {
printk(KERN_ERR PFX "BUG: no host data for PCI(%s)\n",
@@ -1732,7 +1567,8 @@ static void carm_remove_one (struct pci_dev *pdev)
}
free_irq(pdev->irq, host);
- carm_free_disks(host);
+ for (i = 0; i < CARM_MAX_PORTS; i++)
+ carm_free_disk(host, i);
unregister_blkdev(host->major, host->name);
if (host->major == 160)
clear_bit(0, &carm_major_alloc);
diff --git a/drivers/block/umem.c b/drivers/block/umem.c
index be3e3ab79950..aa035cf8a51d 100644
--- a/drivers/block/umem.c
+++ b/drivers/block/umem.c
@@ -888,8 +888,7 @@ static int mm_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
card->biotail = &card->bio;
spin_lock_init(&card->lock);
- card->queue = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE,
- &card->lock);
+ card->queue = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE);
if (!card->queue)
goto failed_alloc;
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 086c6bb12baa..912c4265e592 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -214,6 +214,20 @@ static void virtblk_done(struct virtqueue *vq)
spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
}
+static void virtio_commit_rqs(struct blk_mq_hw_ctx *hctx)
+{
+ struct virtio_blk *vblk = hctx->queue->queuedata;
+ struct virtio_blk_vq *vq = &vblk->vqs[hctx->queue_num];
+ bool kick;
+
+ spin_lock_irq(&vq->lock);
+ kick = virtqueue_kick_prepare(vq->vq);
+ spin_unlock_irq(&vq->lock);
+
+ if (kick)
+ virtqueue_notify(vq->vq);
+}
+
static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
@@ -624,7 +638,7 @@ static int virtblk_map_queues(struct blk_mq_tag_set *set)
{
struct virtio_blk *vblk = set->driver_data;
- return blk_mq_virtio_map_queues(set, vblk->vdev, 0);
+ return blk_mq_virtio_map_queues(&set->map[0], vblk->vdev, 0);
}
#ifdef CONFIG_VIRTIO_BLK_SCSI
@@ -638,6 +652,7 @@ static void virtblk_initialize_rq(struct request *req)
static const struct blk_mq_ops virtio_mq_ops = {
.queue_rq = virtio_queue_rq,
+ .commit_rqs = virtio_commit_rqs,
.complete = virtblk_request_done,
.init_request = virtblk_init_request,
#ifdef CONFIG_VIRTIO_BLK_SCSI
diff --git a/drivers/firewire/sbp2.c b/drivers/firewire/sbp2.c
index 6bac03999fd4..09b845e90114 100644
--- a/drivers/firewire/sbp2.c
+++ b/drivers/firewire/sbp2.c
@@ -1610,7 +1610,6 @@ static struct scsi_host_template scsi_driver_template = {
.eh_abort_handler = sbp2_scsi_abort,
.this_id = -1,
.sg_tablesize = SG_ALL,
- .use_clustering = ENABLE_CLUSTERING,
.can_queue = 1,
.sdev_attrs = sbp2_scsi_sysfs_attrs,
};
diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c
index 023a32cfac42..540166443c34 100644
--- a/drivers/gpio/gpio-pca953x.c
+++ b/drivers/gpio/gpio-pca953x.c
@@ -449,7 +449,7 @@ static void pca953x_setup_gpio(struct pca953x_chip *chip, int gpios)
gc->base = chip->gpio_start;
gc->ngpio = gpios;
- gc->label = chip->client->name;
+ gc->label = dev_name(&chip->client->dev);
gc->parent = &chip->client->dev;
gc->owner = THIS_MODULE;
gc->names = chip->names;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index f05a29ff586e..9fd5fbe8bebf 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -583,7 +583,7 @@ static int vmw_dma_select_mode(struct vmw_private *dev_priv)
dev_priv->map_mode = vmw_dma_map_populate;
- if (dma_ops->sync_single_for_cpu)
+ if (dma_ops && dma_ops->sync_single_for_cpu)
dev_priv->map_mode = vmw_dma_alloc_coherent;
#ifdef CONFIG_SWIOTLB
if (swiotlb_nr_tbl() == 0)
diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index 8b2b72b93885..da58020a144e 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -94,7 +94,7 @@ int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk,
rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, 0);
ide_req(rq)->type = ATA_PRIV_MISC;
- rq->special = (char *)pc;
+ ide_req(rq)->special = pc;
if (buf && bufflen) {
error = blk_rq_map_kern(drive->queue, rq, buf, bufflen,
@@ -172,8 +172,8 @@ EXPORT_SYMBOL_GPL(ide_create_request_sense_cmd);
void ide_prep_sense(ide_drive_t *drive, struct request *rq)
{
struct request_sense *sense = &drive->sense_data;
- struct request *sense_rq = drive->sense_rq;
- struct scsi_request *req = scsi_req(sense_rq);
+ struct request *sense_rq;
+ struct scsi_request *req;
unsigned int cmd_len, sense_len;
int err;
@@ -196,9 +196,16 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq)
if (ata_sense_request(rq) || drive->sense_rq_armed)
return;
+ sense_rq = drive->sense_rq;
+ if (!sense_rq) {
+ sense_rq = blk_mq_alloc_request(drive->queue, REQ_OP_DRV_IN,
+ BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT);
+ drive->sense_rq = sense_rq;
+ }
+ req = scsi_req(sense_rq);
+
memset(sense, 0, sizeof(*sense));
- blk_rq_init(rq->q, sense_rq);
scsi_req_init(req);
err = blk_rq_map_kern(drive->queue, sense_rq, sense, sense_len,
@@ -207,6 +214,8 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq)
if (printk_ratelimit())
printk(KERN_WARNING PFX "%s: failed to map sense "
"buffer\n", drive->name);
+ blk_mq_free_request(sense_rq);
+ drive->sense_rq = NULL;
return;
}
@@ -226,6 +235,8 @@ EXPORT_SYMBOL_GPL(ide_prep_sense);
int ide_queue_sense_rq(ide_drive_t *drive, void *special)
{
+ struct request *sense_rq = drive->sense_rq;
+
/* deferred failure from ide_prep_sense() */
if (!drive->sense_rq_armed) {
printk(KERN_WARNING PFX "%s: error queuing a sense request\n",
@@ -233,12 +244,12 @@ int ide_queue_sense_rq(ide_drive_t *drive, void *special)
return -ENOMEM;
}
- drive->sense_rq->special = special;
+ ide_req(sense_rq)->special = special;
drive->sense_rq_armed = false;
drive->hwif->rq = NULL;
- elv_add_request(drive->queue, drive->sense_rq, ELEVATOR_INSERT_FRONT);
+ ide_insert_request_head(drive, sense_rq);
return 0;
}
EXPORT_SYMBOL_GPL(ide_queue_sense_rq);
@@ -270,10 +281,8 @@ void ide_retry_pc(ide_drive_t *drive)
*/
drive->hwif->rq = NULL;
ide_requeue_and_plug(drive, failed_rq);
- if (ide_queue_sense_rq(drive, pc)) {
- blk_start_request(failed_rq);
+ if (ide_queue_sense_rq(drive, pc))
ide_complete_rq(drive, BLK_STS_IOERR, blk_rq_bytes(failed_rq));
- }
}
EXPORT_SYMBOL_GPL(ide_retry_pc);
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index f9b59d41813f..1f03884a6808 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -211,12 +211,12 @@ static void cdrom_analyze_sense_data(ide_drive_t *drive,
static void ide_cd_complete_failed_rq(ide_drive_t *drive, struct request *rq)
{
/*
- * For ATA_PRIV_SENSE, "rq->special" points to the original
+ * For ATA_PRIV_SENSE, "ide_req(rq)->special" points to the original
* failed request. Also, the sense data should be read
* directly from rq which might be different from the original
* sense buffer if it got copied during mapping.
*/
- struct request *failed = (struct request *)rq->special;
+ struct request *failed = ide_req(rq)->special;
void *sense = bio_data(rq->bio);
if (failed) {
@@ -258,11 +258,22 @@ static int ide_cd_breathe(ide_drive_t *drive, struct request *rq)
/*
* take a breather
*/
- blk_delay_queue(drive->queue, 1);
+ blk_mq_requeue_request(rq, false);
+ blk_mq_delay_kick_requeue_list(drive->queue, 1);
return 1;
}
}
+static void ide_cd_free_sense(ide_drive_t *drive)
+{
+ if (!drive->sense_rq)
+ return;
+
+ blk_mq_free_request(drive->sense_rq);
+ drive->sense_rq = NULL;
+ drive->sense_rq_armed = false;
+}
+
/**
* Returns:
* 0: if the request should be continued.
@@ -516,6 +527,82 @@ static bool ide_cd_error_cmd(ide_drive_t *drive, struct ide_cmd *cmd)
return false;
}
+/* standard prep_rq that builds 10 byte cmds */
+static bool ide_cdrom_prep_fs(struct request_queue *q, struct request *rq)
+{
+ int hard_sect = queue_logical_block_size(q);
+ long block = (long)blk_rq_pos(rq) / (hard_sect >> 9);
+ unsigned long blocks = blk_rq_sectors(rq) / (hard_sect >> 9);
+ struct scsi_request *req = scsi_req(rq);
+
+ if (rq_data_dir(rq) == READ)
+ req->cmd[0] = GPCMD_READ_10;
+ else
+ req->cmd[0] = GPCMD_WRITE_10;
+
+ /*
+ * fill in lba
+ */
+ req->cmd[2] = (block >> 24) & 0xff;
+ req->cmd[3] = (block >> 16) & 0xff;
+ req->cmd[4] = (block >> 8) & 0xff;
+ req->cmd[5] = block & 0xff;
+
+ /*
+ * and transfer length
+ */
+ req->cmd[7] = (blocks >> 8) & 0xff;
+ req->cmd[8] = blocks & 0xff;
+ req->cmd_len = 10;
+ return true;
+}
+
+/*
+ * Most of the SCSI commands are supported directly by ATAPI devices.
+ * This transform handles the few exceptions.
+ */
+static bool ide_cdrom_prep_pc(struct request *rq)
+{
+ u8 *c = scsi_req(rq)->cmd;
+
+ /* transform 6-byte read/write commands to the 10-byte version */
+ if (c[0] == READ_6 || c[0] == WRITE_6) {
+ c[8] = c[4];
+ c[5] = c[3];
+ c[4] = c[2];
+ c[3] = c[1] & 0x1f;
+ c[2] = 0;
+ c[1] &= 0xe0;
+ c[0] += (READ_10 - READ_6);
+ scsi_req(rq)->cmd_len = 10;
+ return true;
+ }
+
+ /*
+ * it's silly to pretend we understand 6-byte sense commands, just
+ * reject with ILLEGAL_REQUEST and the caller should take the
+ * appropriate action
+ */
+ if (c[0] == MODE_SENSE || c[0] == MODE_SELECT) {
+ scsi_req(rq)->result = ILLEGAL_REQUEST;
+ return false;
+ }
+
+ return true;
+}
+
+static bool ide_cdrom_prep_rq(ide_drive_t *drive, struct request *rq)
+{
+ if (!blk_rq_is_passthrough(rq)) {
+ scsi_req_init(scsi_req(rq));
+
+ return ide_cdrom_prep_fs(drive->queue, rq);
+ } else if (blk_rq_is_scsi(rq))
+ return ide_cdrom_prep_pc(rq);
+
+ return true;
+}
+
static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive)
{
ide_hwif_t *hwif = drive->hwif;
@@ -675,7 +762,7 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive)
out_end:
if (blk_rq_is_scsi(rq) && rc == 0) {
scsi_req(rq)->resid_len = 0;
- blk_end_request_all(rq, BLK_STS_OK);
+ blk_mq_end_request(rq, BLK_STS_OK);
hwif->rq = NULL;
} else {
if (sense && uptodate)
@@ -705,6 +792,8 @@ out_end:
if (sense && rc == 2)
ide_error(drive, "request sense failure", stat);
}
+
+ ide_cd_free_sense(drive);
return ide_stopped;
}
@@ -729,7 +818,7 @@ static ide_startstop_t cdrom_start_rw(ide_drive_t *drive, struct request *rq)
* We may be retrying this request after an error. Fix up any
* weirdness which might be present in the request packet.
*/
- q->prep_rq_fn(q, rq);
+ ide_cdrom_prep_rq(drive, rq);
}
/* fs requests *must* be hardware frame aligned */
@@ -1323,82 +1412,6 @@ static int ide_cdrom_probe_capabilities(ide_drive_t *drive)
return nslots;
}
-/* standard prep_rq_fn that builds 10 byte cmds */
-static int ide_cdrom_prep_fs(struct request_queue *q, struct request *rq)
-{
- int hard_sect = queue_logical_block_size(q);
- long block = (long)blk_rq_pos(rq) / (hard_sect >> 9);
- unsigned long blocks = blk_rq_sectors(rq) / (hard_sect >> 9);
- struct scsi_request *req = scsi_req(rq);
-
- q->initialize_rq_fn(rq);
-
- if (rq_data_dir(rq) == READ)
- req->cmd[0] = GPCMD_READ_10;
- else
- req->cmd[0] = GPCMD_WRITE_10;
-
- /*
- * fill in lba
- */
- req->cmd[2] = (block >> 24) & 0xff;
- req->cmd[3] = (block >> 16) & 0xff;
- req->cmd[4] = (block >> 8) & 0xff;
- req->cmd[5] = block & 0xff;
-
- /*
- * and transfer length
- */
- req->cmd[7] = (blocks >> 8) & 0xff;
- req->cmd[8] = blocks & 0xff;
- req->cmd_len = 10;
- return BLKPREP_OK;
-}
-
-/*
- * Most of the SCSI commands are supported directly by ATAPI devices.
- * This transform handles the few exceptions.
- */
-static int ide_cdrom_prep_pc(struct request *rq)
-{
- u8 *c = scsi_req(rq)->cmd;
-
- /* transform 6-byte read/write commands to the 10-byte version */
- if (c[0] == READ_6 || c[0] == WRITE_6) {
- c[8] = c[4];
- c[5] = c[3];
- c[4] = c[2];
- c[3] = c[1] & 0x1f;
- c[2] = 0;
- c[1] &= 0xe0;
- c[0] += (READ_10 - READ_6);
- scsi_req(rq)->cmd_len = 10;
- return BLKPREP_OK;
- }
-
- /*
- * it's silly to pretend we understand 6-byte sense commands, just
- * reject with ILLEGAL_REQUEST and the caller should take the
- * appropriate action
- */
- if (c[0] == MODE_SENSE || c[0] == MODE_SELECT) {
- scsi_req(rq)->result = ILLEGAL_REQUEST;
- return BLKPREP_KILL;
- }
-
- return BLKPREP_OK;
-}
-
-static int ide_cdrom_prep_fn(struct request_queue *q, struct request *rq)
-{
- if (!blk_rq_is_passthrough(rq))
- return ide_cdrom_prep_fs(q, rq);
- else if (blk_rq_is_scsi(rq))
- return ide_cdrom_prep_pc(rq);
-
- return 0;
-}
-
struct cd_list_entry {
const char *id_model;
const char *id_firmware;
@@ -1508,7 +1521,7 @@ static int ide_cdrom_setup(ide_drive_t *drive)
ide_debug_log(IDE_DBG_PROBE, "enter");
- blk_queue_prep_rq(q, ide_cdrom_prep_fn);
+ drive->prep_rq = ide_cdrom_prep_rq;
blk_queue_dma_alignment(q, 31);
blk_queue_update_dma_pad(q, 15);
@@ -1569,7 +1582,7 @@ static void ide_cd_release(struct device *dev)
if (devinfo->handle == drive)
unregister_cdrom(devinfo);
drive->driver_data = NULL;
- blk_queue_prep_rq(drive->queue, NULL);
+ drive->prep_rq = NULL;
g->private_data = NULL;
put_disk(g);
kfree(info);
diff --git a/drivers/ide/ide-devsets.c b/drivers/ide/ide-devsets.c
index f4f8afdf8bbe..f2f93ed40356 100644
--- a/drivers/ide/ide-devsets.c
+++ b/drivers/ide/ide-devsets.c
@@ -171,7 +171,7 @@ int ide_devset_execute(ide_drive_t *drive, const struct ide_devset *setting,
scsi_req(rq)->cmd_len = 5;
scsi_req(rq)->cmd[0] = REQ_DEVSET_EXEC;
*(int *)&scsi_req(rq)->cmd[1] = arg;
- rq->special = setting->set;
+ ide_req(rq)->special = setting->set;
blk_execute_rq(q, NULL, rq, 0);
ret = scsi_req(rq)->result;
@@ -182,7 +182,7 @@ int ide_devset_execute(ide_drive_t *drive, const struct ide_devset *setting,
ide_startstop_t ide_do_devset(ide_drive_t *drive, struct request *rq)
{
- int err, (*setfunc)(ide_drive_t *, int) = rq->special;
+ int err, (*setfunc)(ide_drive_t *, int) = ide_req(rq)->special;
err = setfunc(drive, *(int *)&scsi_req(rq)->cmd[1]);
if (err)
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index e3b4e659082d..197912af5c2f 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -427,16 +427,15 @@ static void ide_disk_unlock_native_capacity(ide_drive_t *drive)
drive->dev_flags |= IDE_DFLAG_NOHPA; /* disable HPA on resume */
}
-static int idedisk_prep_fn(struct request_queue *q, struct request *rq)
+static bool idedisk_prep_rq(ide_drive_t *drive, struct request *rq)
{
- ide_drive_t *drive = q->queuedata;
struct ide_cmd *cmd;
if (req_op(rq) != REQ_OP_FLUSH)
- return BLKPREP_OK;
+ return true;
- if (rq->special) {
- cmd = rq->special;
+ if (ide_req(rq)->special) {
+ cmd = ide_req(rq)->special;
memset(cmd, 0, sizeof(*cmd));
} else {
cmd = kzalloc(sizeof(*cmd), GFP_ATOMIC);
@@ -456,10 +455,10 @@ static int idedisk_prep_fn(struct request_queue *q, struct request *rq)
rq->cmd_flags &= ~REQ_OP_MASK;
rq->cmd_flags |= REQ_OP_DRV_OUT;
ide_req(rq)->type = ATA_PRIV_TASKFILE;
- rq->special = cmd;
+ ide_req(rq)->special = cmd;
cmd->rq = rq;
- return BLKPREP_OK;
+ return true;
}
ide_devset_get(multcount, mult_count);
@@ -548,7 +547,7 @@ static void update_flush(ide_drive_t *drive)
if (barrier) {
wc = true;
- blk_queue_prep_rq(drive->queue, idedisk_prep_fn);
+ drive->prep_rq = idedisk_prep_rq;
}
}
diff --git a/drivers/ide/ide-eh.c b/drivers/ide/ide-eh.c
index 47d5f3379748..e1323e058454 100644
--- a/drivers/ide/ide-eh.c
+++ b/drivers/ide/ide-eh.c
@@ -125,7 +125,7 @@ ide_startstop_t ide_error(ide_drive_t *drive, const char *msg, u8 stat)
/* retry only "normal" I/O: */
if (blk_rq_is_passthrough(rq)) {
if (ata_taskfile_request(rq)) {
- struct ide_cmd *cmd = rq->special;
+ struct ide_cmd *cmd = ide_req(rq)->special;
if (cmd)
ide_complete_cmd(drive, cmd, stat, err);
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index a8df300f949c..780d33ccc5d8 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -276,7 +276,7 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive,
switch (ide_req(rq)->type) {
case ATA_PRIV_MISC:
case ATA_PRIV_SENSE:
- pc = (struct ide_atapi_pc *)rq->special;
+ pc = (struct ide_atapi_pc *)ide_req(rq)->special;
break;
default:
BUG();
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 0d93e0cfbeaf..8445b484ae69 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -67,7 +67,15 @@ int ide_end_rq(ide_drive_t *drive, struct request *rq, blk_status_t error,
ide_dma_on(drive);
}
- return blk_end_request(rq, error, nr_bytes);
+ if (!blk_update_request(rq, error, nr_bytes)) {
+ if (rq == drive->sense_rq)
+ drive->sense_rq = NULL;
+
+ __blk_mq_end_request(rq, error);
+ return 0;
+ }
+
+ return 1;
}
EXPORT_SYMBOL_GPL(ide_end_rq);
@@ -103,7 +111,7 @@ void ide_complete_cmd(ide_drive_t *drive, struct ide_cmd *cmd, u8 stat, u8 err)
}
if (rq && ata_taskfile_request(rq)) {
- struct ide_cmd *orig_cmd = rq->special;
+ struct ide_cmd *orig_cmd = ide_req(rq)->special;
if (cmd->tf_flags & IDE_TFLAG_DYN)
kfree(orig_cmd);
@@ -253,7 +261,7 @@ EXPORT_SYMBOL_GPL(ide_init_sg_cmd);
static ide_startstop_t execute_drive_cmd (ide_drive_t *drive,
struct request *rq)
{
- struct ide_cmd *cmd = rq->special;
+ struct ide_cmd *cmd = ide_req(rq)->special;
if (cmd) {
if (cmd->protocol == ATA_PROT_PIO) {
@@ -307,8 +315,6 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq)
{
ide_startstop_t startstop;
- BUG_ON(!(rq->rq_flags & RQF_STARTED));
-
#ifdef DEBUG
printk("%s: start_request: current=0x%08lx\n",
drive->hwif->name, (unsigned long) rq);
@@ -320,6 +326,9 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq)
goto kill_rq;
}
+ if (drive->prep_rq && !drive->prep_rq(drive, rq))
+ return ide_stopped;
+
if (ata_pm_request(rq))
ide_check_pm_state(drive, rq);
@@ -343,7 +352,7 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq)
if (ata_taskfile_request(rq))
return execute_drive_cmd(drive, rq);
else if (ata_pm_request(rq)) {
- struct ide_pm_state *pm = rq->special;
+ struct ide_pm_state *pm = ide_req(rq)->special;
#ifdef DEBUG_PM
printk("%s: start_power_step(step: %d)\n",
drive->name, pm->pm_step);
@@ -430,44 +439,42 @@ static inline void ide_unlock_host(struct ide_host *host)
}
}
-static void __ide_requeue_and_plug(struct request_queue *q, struct request *rq)
-{
- if (rq)
- blk_requeue_request(q, rq);
- if (rq || blk_peek_request(q)) {
- /* Use 3ms as that was the old plug delay */
- blk_delay_queue(q, 3);
- }
-}
-
void ide_requeue_and_plug(ide_drive_t *drive, struct request *rq)
{
struct request_queue *q = drive->queue;
- unsigned long flags;
- spin_lock_irqsave(q->queue_lock, flags);
- __ide_requeue_and_plug(q, rq);
- spin_unlock_irqrestore(q->queue_lock, flags);
+ /* Use 3ms as that was the old plug delay */
+ if (rq) {
+ blk_mq_requeue_request(rq, false);
+ blk_mq_delay_kick_requeue_list(q, 3);
+ } else
+ blk_mq_delay_run_hw_queue(q->queue_hw_ctx[0], 3);
}
/*
* Issue a new request to a device.
*/
-void do_ide_request(struct request_queue *q)
+blk_status_t ide_queue_rq(struct blk_mq_hw_ctx *hctx,
+ const struct blk_mq_queue_data *bd)
{
- ide_drive_t *drive = q->queuedata;
+ ide_drive_t *drive = hctx->queue->queuedata;
ide_hwif_t *hwif = drive->hwif;
struct ide_host *host = hwif->host;
- struct request *rq = NULL;
+ struct request *rq = bd->rq;
ide_startstop_t startstop;
- spin_unlock_irq(q->queue_lock);
+ if (!blk_rq_is_passthrough(rq) && !(rq->rq_flags & RQF_DONTPREP)) {
+ rq->rq_flags |= RQF_DONTPREP;
+ ide_req(rq)->special = NULL;
+ }
/* HLD do_request() callback might sleep, make sure it's okay */
might_sleep();
if (ide_lock_host(host, hwif))
- goto plug_device_2;
+ return BLK_STS_DEV_RESOURCE;
+
+ blk_mq_start_request(rq);
spin_lock_irq(&hwif->lock);
@@ -503,21 +510,16 @@ repeat:
hwif->cur_dev = drive;
drive->dev_flags &= ~(IDE_DFLAG_SLEEPING | IDE_DFLAG_PARKED);
- spin_unlock_irq(&hwif->lock);
- spin_lock_irq(q->queue_lock);
/*
* we know that the queue isn't empty, but this can happen
- * if the q->prep_rq_fn() decides to kill a request
+ * if ->prep_rq() decides to kill a request
*/
- if (!rq)
- rq = blk_fetch_request(drive->queue);
-
- spin_unlock_irq(q->queue_lock);
- spin_lock_irq(&hwif->lock);
-
if (!rq) {
- ide_unlock_port(hwif);
- goto out;
+ rq = bd->rq;
+ if (!rq) {
+ ide_unlock_port(hwif);
+ goto out;
+ }
}
/*
@@ -551,23 +553,24 @@ repeat:
if (startstop == ide_stopped) {
rq = hwif->rq;
hwif->rq = NULL;
- goto repeat;
+ if (rq)
+ goto repeat;
+ ide_unlock_port(hwif);
+ goto out;
}
- } else
- goto plug_device;
+ } else {
+plug_device:
+ spin_unlock_irq(&hwif->lock);
+ ide_unlock_host(host);
+ ide_requeue_and_plug(drive, rq);
+ return BLK_STS_OK;
+ }
+
out:
spin_unlock_irq(&hwif->lock);
if (rq == NULL)
ide_unlock_host(host);
- spin_lock_irq(q->queue_lock);
- return;
-
-plug_device:
- spin_unlock_irq(&hwif->lock);
- ide_unlock_host(host);
-plug_device_2:
- spin_lock_irq(q->queue_lock);
- __ide_requeue_and_plug(q, rq);
+ return BLK_STS_OK;
}
static int drive_is_ready(ide_drive_t *drive)
@@ -887,3 +890,16 @@ void ide_pad_transfer(ide_drive_t *drive, int write, int len)
}
}
EXPORT_SYMBOL_GPL(ide_pad_transfer);
+
+void ide_insert_request_head(ide_drive_t *drive, struct request *rq)
+{
+ ide_hwif_t *hwif = drive->hwif;
+ unsigned long flags;
+
+ spin_lock_irqsave(&hwif->lock, flags);
+ list_add_tail(&rq->queuelist, &drive->rq_list);
+ spin_unlock_irqrestore(&hwif->lock, flags);
+
+ kblockd_schedule_work(&drive->rq_work);
+}
+EXPORT_SYMBOL_GPL(ide_insert_request_head);
diff --git a/drivers/ide/ide-park.c b/drivers/ide/ide-park.c
index 622f0edb3945..102aa3bc3e7f 100644
--- a/drivers/ide/ide-park.c
+++ b/drivers/ide/ide-park.c
@@ -27,7 +27,7 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
spin_unlock_irq(&hwif->lock);
if (start_queue)
- blk_run_queue(q);
+ blk_mq_run_hw_queues(q, true);
return;
}
spin_unlock_irq(&hwif->lock);
@@ -36,7 +36,7 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
scsi_req(rq)->cmd[0] = REQ_PARK_HEADS;
scsi_req(rq)->cmd_len = 1;
ide_req(rq)->type = ATA_PRIV_MISC;
- rq->special = &timeout;
+ ide_req(rq)->special = &timeout;
blk_execute_rq(q, NULL, rq, 1);
rc = scsi_req(rq)->result ? -EIO : 0;
blk_put_request(rq);
@@ -54,7 +54,7 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
scsi_req(rq)->cmd[0] = REQ_UNPARK_HEADS;
scsi_req(rq)->cmd_len = 1;
ide_req(rq)->type = ATA_PRIV_MISC;
- elv_add_request(q, rq, ELEVATOR_INSERT_FRONT);
+ ide_insert_request_head(drive, rq);
out:
return;
@@ -67,7 +67,7 @@ ide_startstop_t ide_do_park_unpark(ide_drive_t *drive, struct request *rq)
memset(&cmd, 0, sizeof(cmd));
if (scsi_req(rq)->cmd[0] == REQ_PARK_HEADS) {
- drive->sleep = *(unsigned long *)rq->special;
+ drive->sleep = *(unsigned long *)ide_req(rq)->special;
drive->dev_flags |= IDE_DFLAG_SLEEPING;
tf->command = ATA_CMD_IDLEIMMEDIATE;
tf->feature = 0x44;
diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c
index 59217aa1d1fb..192e6c65d34e 100644
--- a/drivers/ide/ide-pm.c
+++ b/drivers/ide/ide-pm.c
@@ -21,7 +21,7 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg)
memset(&rqpm, 0, sizeof(rqpm));
rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, 0);
ide_req(rq)->type = ATA_PRIV_PM_SUSPEND;
- rq->special = &rqpm;
+ ide_req(rq)->special = &rqpm;
rqpm.pm_step = IDE_PM_START_SUSPEND;
if (mesg.event == PM_EVENT_PRETHAW)
mesg.event = PM_EVENT_FREEZE;
@@ -40,32 +40,17 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg)
return ret;
}
-static void ide_end_sync_rq(struct request *rq, blk_status_t error)
-{
- complete(rq->end_io_data);
-}
-
static int ide_pm_execute_rq(struct request *rq)
{
struct request_queue *q = rq->q;
- DECLARE_COMPLETION_ONSTACK(wait);
- rq->end_io_data = &wait;
- rq->end_io = ide_end_sync_rq;
-
- spin_lock_irq(q->queue_lock);
if (unlikely(blk_queue_dying(q))) {
rq->rq_flags |= RQF_QUIET;
scsi_req(rq)->result = -ENXIO;
- __blk_end_request_all(rq, BLK_STS_OK);
- spin_unlock_irq(q->queue_lock);
+ blk_mq_end_request(rq, BLK_STS_OK);
return -ENXIO;
}
- __elv_add_request(q, rq, ELEVATOR_INSERT_FRONT);
- __blk_run_queue_uncond(q);
- spin_unlock_irq(q->queue_lock);
-
- wait_for_completion_io(&wait);
+ blk_execute_rq(q, NULL, rq, true);
return scsi_req(rq)->result ? -EIO : 0;
}
@@ -79,6 +64,8 @@ int generic_ide_resume(struct device *dev)
struct ide_pm_state rqpm;
int err;
+ blk_mq_start_stopped_hw_queues(drive->queue, true);
+
if (ide_port_acpi(hwif)) {
/* call ACPI _PS0 / _STM only once */
if ((drive->dn & 1) == 0 || pair == NULL) {
@@ -92,7 +79,7 @@ int generic_ide_resume(struct device *dev)
memset(&rqpm, 0, sizeof(rqpm));
rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, BLK_MQ_REQ_PREEMPT);
ide_req(rq)->type = ATA_PRIV_PM_RESUME;
- rq->special = &rqpm;
+ ide_req(rq)->special = &rqpm;
rqpm.pm_step = IDE_PM_START_RESUME;
rqpm.pm_state = PM_EVENT_ON;
@@ -111,7 +98,7 @@ int generic_ide_resume(struct device *dev)
void ide_complete_power_step(ide_drive_t *drive, struct request *rq)
{
- struct ide_pm_state *pm = rq->special;
+ struct ide_pm_state *pm = ide_req(rq)->special;
#ifdef DEBUG_PM
printk(KERN_INFO "%s: complete_power_step(step: %d)\n",
@@ -141,7 +128,7 @@ void ide_complete_power_step(ide_drive_t *drive, struct request *rq)
ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request *rq)
{
- struct ide_pm_state *pm = rq->special;
+ struct ide_pm_state *pm = ide_req(rq)->special;
struct ide_cmd cmd = { };
switch (pm->pm_step) {
@@ -213,8 +200,7 @@ out_do_tf:
void ide_complete_pm_rq(ide_drive_t *drive, struct request *rq)
{
struct request_queue *q = drive->queue;
- struct ide_pm_state *pm = rq->special;
- unsigned long flags;
+ struct ide_pm_state *pm = ide_req(rq)->special;
ide_complete_power_step(drive, rq);
if (pm->pm_step != IDE_PM_COMPLETED)
@@ -224,22 +210,19 @@ void ide_complete_pm_rq(ide_drive_t *drive, struct request *rq)
printk("%s: completing PM request, %s\n", drive->name,
(ide_req(rq)->type == ATA_PRIV_PM_SUSPEND) ? "suspend" : "resume");
#endif
- spin_lock_irqsave(q->queue_lock, flags);
if (ide_req(rq)->type == ATA_PRIV_PM_SUSPEND)
- blk_stop_queue(q);
+ blk_mq_stop_hw_queues(q);
else
drive->dev_flags &= ~IDE_DFLAG_BLOCKED;
- spin_unlock_irqrestore(q->queue_lock, flags);
drive->hwif->rq = NULL;
- if (blk_end_request(rq, BLK_STS_OK, 0))
- BUG();
+ blk_mq_end_request(rq, BLK_STS_OK);
}
void ide_check_pm_state(ide_drive_t *drive, struct request *rq)
{
- struct ide_pm_state *pm = rq->special;
+ struct ide_pm_state *pm = ide_req(rq)->special;
if (blk_rq_is_private(rq) &&
ide_req(rq)->type == ATA_PRIV_PM_SUSPEND &&
@@ -260,7 +243,6 @@ void ide_check_pm_state(ide_drive_t *drive, struct request *rq)
ide_hwif_t *hwif = drive->hwif;
const struct ide_tp_ops *tp_ops = hwif->tp_ops;
struct request_queue *q = drive->queue;
- unsigned long flags;
int rc;
#ifdef DEBUG_PM
printk("%s: Wakeup request inited, waiting for !BSY...\n", drive->name);
@@ -274,8 +256,6 @@ void ide_check_pm_state(ide_drive_t *drive, struct request *rq)
if (rc)
printk(KERN_WARNING "%s: drive not ready on wakeup\n", drive->name);
- spin_lock_irqsave(q->queue_lock, flags);
- blk_start_queue(q);
- spin_unlock_irqrestore(q->queue_lock, flags);
+ blk_mq_start_hw_queues(q);
}
}
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 3b75a7b7a284..63627be0811a 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -746,10 +746,16 @@ static void ide_initialize_rq(struct request *rq)
{
struct ide_request *req = blk_mq_rq_to_pdu(rq);
+ req->special = NULL;
scsi_req_init(&req->sreq);
req->sreq.sense = req->sense;
}
+static const struct blk_mq_ops ide_mq_ops = {
+ .queue_rq = ide_queue_rq,
+ .initialize_rq_fn = ide_initialize_rq,
+};
+
/*
* init request queue
*/
@@ -759,6 +765,7 @@ static int ide_init_queue(ide_drive_t *drive)
ide_hwif_t *hwif = drive->hwif;
int max_sectors = 256;
int max_sg_entries = PRD_ENTRIES;
+ struct blk_mq_tag_set *set;
/*
* Our default set up assumes the normal IDE case,
@@ -767,19 +774,26 @@ static int ide_init_queue(ide_drive_t *drive)
* limits and LBA48 we could raise it but as yet
* do not.
*/
- q = blk_alloc_queue_node(GFP_KERNEL, hwif_to_node(hwif), NULL);
- if (!q)
+
+ set = &drive->tag_set;
+ set->ops = &ide_mq_ops;
+ set->nr_hw_queues = 1;
+ set->queue_depth = 32;
+ set->reserved_tags = 1;
+ set->cmd_size = sizeof(struct ide_request);
+ set->numa_node = hwif_to_node(hwif);
+ set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING;
+ if (blk_mq_alloc_tag_set(set))
return 1;
- q->request_fn = do_ide_request;
- q->initialize_rq_fn = ide_initialize_rq;
- q->cmd_size = sizeof(struct ide_request);
- blk_queue_flag_set(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
- if (blk_init_allocated_queue(q) < 0) {
- blk_cleanup_queue(q);
+ q = blk_mq_init_queue(set);
+ if (IS_ERR(q)) {
+ blk_mq_free_tag_set(set);
return 1;
}
+ blk_queue_flag_set(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
+
q->queuedata = drive;
blk_queue_segment_boundary(q, 0xffff);
@@ -965,8 +979,12 @@ static void drive_release_dev (struct device *dev)
ide_proc_unregister_device(drive);
+ if (drive->sense_rq)
+ blk_mq_free_request(drive->sense_rq);
+
blk_cleanup_queue(drive->queue);
drive->queue = NULL;
+ blk_mq_free_tag_set(&drive->tag_set);
drive->dev_flags &= ~IDE_DFLAG_PRESENT;
@@ -1133,6 +1151,28 @@ static void ide_port_cable_detect(ide_hwif_t *hwif)
}
}
+/*
+ * Deferred request list insertion handler
+ */
+static void drive_rq_insert_work(struct work_struct *work)
+{
+ ide_drive_t *drive = container_of(work, ide_drive_t, rq_work);
+ ide_hwif_t *hwif = drive->hwif;
+ struct request *rq;
+ LIST_HEAD(list);
+
+ spin_lock_irq(&hwif->lock);
+ if (!list_empty(&drive->rq_list))
+ list_splice_init(&drive->rq_list, &list);
+ spin_unlock_irq(&hwif->lock);
+
+ while (!list_empty(&list)) {
+ rq = list_first_entry(&list, struct request, queuelist);
+ list_del_init(&rq->queuelist);
+ blk_execute_rq_nowait(drive->queue, rq->rq_disk, rq, true, NULL);
+ }
+}
+
static const u8 ide_hwif_to_major[] =
{ IDE0_MAJOR, IDE1_MAJOR, IDE2_MAJOR, IDE3_MAJOR, IDE4_MAJOR,
IDE5_MAJOR, IDE6_MAJOR, IDE7_MAJOR, IDE8_MAJOR, IDE9_MAJOR };
@@ -1145,12 +1185,10 @@ static void ide_port_init_devices_data(ide_hwif_t *hwif)
ide_port_for_each_dev(i, drive, hwif) {
u8 j = (hwif->index * MAX_DRIVES) + i;
u16 *saved_id = drive->id;
- struct request *saved_sense_rq = drive->sense_rq;
memset(drive, 0, sizeof(*drive));
memset(saved_id, 0, SECTOR_SIZE);
drive->id = saved_id;
- drive->sense_rq = saved_sense_rq;
drive->media = ide_disk;
drive->select = (i << 4) | ATA_DEVICE_OBS;
@@ -1166,6 +1204,9 @@ static void ide_port_init_devices_data(ide_hwif_t *hwif)
INIT_LIST_HEAD(&drive->list);
init_completion(&drive->gendev_rel_comp);
+
+ INIT_WORK(&drive->rq_work, drive_rq_insert_work);
+ INIT_LIST_HEAD(&drive->rq_list);
}
}
@@ -1255,7 +1296,6 @@ static void ide_port_free_devices(ide_hwif_t *hwif)
int i;
ide_port_for_each_dev(i, drive, hwif) {
- kfree(drive->sense_rq);
kfree(drive->id);
kfree(drive);
}
@@ -1283,17 +1323,10 @@ static int ide_port_alloc_devices(ide_hwif_t *hwif, int node)
if (drive->id == NULL)
goto out_free_drive;
- drive->sense_rq = kmalloc(sizeof(struct request) +
- sizeof(struct ide_request), GFP_KERNEL);
- if (!drive->sense_rq)
- goto out_free_id;
-
hwif->devices[i] = drive;
}
return 0;
-out_free_id:
- kfree(drive->id);
out_free_drive:
kfree(drive);
out_nomem:
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index 34c1165226a4..db1a65f4b490 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -639,7 +639,7 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive,
goto out;
}
if (req->cmd[13] & REQ_IDETAPE_PC1) {
- pc = (struct ide_atapi_pc *)rq->special;
+ pc = (struct ide_atapi_pc *)ide_req(rq)->special;
req->cmd[13] &= ~(REQ_IDETAPE_PC1);
req->cmd[13] |= REQ_IDETAPE_PC2;
goto out;
diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c
index c21d5c50ae3a..17b2e379e872 100644
--- a/drivers/ide/ide-taskfile.c
+++ b/drivers/ide/ide-taskfile.c
@@ -440,7 +440,7 @@ int ide_raw_taskfile(ide_drive_t *drive, struct ide_cmd *cmd, u8 *buf,
goto put_req;
}
- rq->special = cmd;
+ ide_req(rq)->special = cmd;
cmd->rq = rq;
blk_execute_rq(drive->queue, NULL, rq, 0);
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index 867cee5e27b2..69dee36e0e89 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -38,4 +38,4 @@ ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \
uverbs_std_types_cq.o \
uverbs_std_types_flow_action.o uverbs_std_types_dm.o \
uverbs_std_types_mr.o uverbs_std_types_counters.o \
- uverbs_uapi.o
+ uverbs_uapi.o uverbs_std_types_device.o
diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c
index 324ef85a13b6..f82b4260de42 100644
--- a/drivers/infiniband/core/agent.c
+++ b/drivers/infiniband/core/agent.c
@@ -137,13 +137,13 @@ void agent_send_response(const struct ib_mad_hdr *mad_hdr, const struct ib_grh *
err2:
ib_free_send_mad(send_buf);
err1:
- rdma_destroy_ah(ah);
+ rdma_destroy_ah(ah, RDMA_DESTROY_AH_SLEEPABLE);
}
static void agent_send_handler(struct ib_mad_agent *mad_agent,
struct ib_mad_send_wc *mad_send_wc)
{
- rdma_destroy_ah(mad_send_wc->send_buf->ah);
+ rdma_destroy_ah(mad_send_wc->send_buf->ah, RDMA_DESTROY_AH_SLEEPABLE);
ib_free_send_mad(mad_send_wc->send_buf);
}
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 5b2fce4a7091..7b04590f307f 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -215,10 +215,6 @@ static void free_gid_entry_locked(struct ib_gid_table_entry *entry)
dev_dbg(&device->dev, "%s port=%d index=%d gid %pI6\n", __func__,
port_num, entry->attr.index, entry->attr.gid.raw);
- if (rdma_cap_roce_gid_table(device, port_num) &&
- entry->state != GID_TABLE_ENTRY_INVALID)
- device->del_gid(&entry->attr, &entry->context);
-
write_lock_irq(&table->rwlock);
/*
@@ -324,7 +320,7 @@ static int add_roce_gid(struct ib_gid_table_entry *entry)
return -EINVAL;
}
if (rdma_cap_roce_gid_table(attr->device, attr->port_num)) {
- ret = attr->device->add_gid(attr, &entry->context);
+ ret = attr->device->ops.add_gid(attr, &entry->context);
if (ret) {
dev_err(&attr->device->dev,
"%s GID add failed port=%d index=%d\n",
@@ -364,6 +360,9 @@ static void del_gid(struct ib_device *ib_dev, u8 port,
table->data_vec[ix] = NULL;
write_unlock_irq(&table->rwlock);
+ if (rdma_cap_roce_gid_table(ib_dev, port))
+ ib_dev->ops.del_gid(&entry->attr, &entry->context);
+
put_gid_entry_locked(entry);
}
@@ -548,8 +547,8 @@ int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
unsigned long mask;
int ret;
- if (ib_dev->get_netdev) {
- idev = ib_dev->get_netdev(ib_dev, port);
+ if (ib_dev->ops.get_netdev) {
+ idev = ib_dev->ops.get_netdev(ib_dev, port);
if (idev && attr->ndev != idev) {
union ib_gid default_gid;
@@ -1296,9 +1295,9 @@ static int config_non_roce_gid_cache(struct ib_device *device,
mutex_lock(&table->lock);
for (i = 0; i < gid_tbl_len; ++i) {
- if (!device->query_gid)
+ if (!device->ops.query_gid)
continue;
- ret = device->query_gid(device, port, i, &gid_attr.gid);
+ ret = device->ops.query_gid(device, port, i, &gid_attr.gid);
if (ret) {
dev_warn(&device->dev,
"query_gid failed (%d) for index %d\n", ret,
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index edb2cb758be7..37980c7564c0 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -343,7 +343,7 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
ret = -ENODEV;
goto out;
}
- ah = rdma_create_ah(mad_agent->qp->pd, &av->ah_attr);
+ ah = rdma_create_ah(mad_agent->qp->pd, &av->ah_attr, 0);
if (IS_ERR(ah)) {
ret = PTR_ERR(ah);
goto out;
@@ -355,7 +355,7 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
GFP_ATOMIC,
IB_MGMT_BASE_VERSION);
if (IS_ERR(m)) {
- rdma_destroy_ah(ah);
+ rdma_destroy_ah(ah, 0);
ret = PTR_ERR(m);
goto out;
}
@@ -400,7 +400,7 @@ static int cm_create_response_msg_ah(struct cm_port *port,
static void cm_free_msg(struct ib_mad_send_buf *msg)
{
if (msg->ah)
- rdma_destroy_ah(msg->ah);
+ rdma_destroy_ah(msg->ah, 0);
if (msg->context[0])
cm_deref_id(msg->context[0]);
ib_free_send_mad(msg);
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 15d5bb7bf6bb..63a7cc00bae0 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -494,7 +494,7 @@ static void _cma_attach_to_dev(struct rdma_id_private *id_priv,
id_priv->id.route.addr.dev_addr.transport =
rdma_node_get_transport(cma_dev->device->node_type);
list_add_tail(&id_priv->list, &cma_dev->id_list);
- rdma_restrack_add(&id_priv->res);
+ rdma_restrack_kadd(&id_priv->res);
}
static void cma_attach_to_dev(struct rdma_id_private *id_priv,
diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c
index 8c2dfb3e294e..3ec2c415bb70 100644
--- a/drivers/infiniband/core/cma_configfs.c
+++ b/drivers/infiniband/core/cma_configfs.c
@@ -33,7 +33,10 @@
#include <linux/module.h>
#include <linux/configfs.h>
#include <rdma/ib_verbs.h>
+#include <rdma/rdma_cm.h>
+
#include "core_priv.h"
+#include "cma_priv.h"
struct cma_device;
diff --git a/drivers/infiniband/core/cma_priv.h b/drivers/infiniband/core/cma_priv.h
index 194cfe78c447..cf47c69436a7 100644
--- a/drivers/infiniband/core/cma_priv.h
+++ b/drivers/infiniband/core/cma_priv.h
@@ -94,4 +94,32 @@ struct rdma_id_private {
*/
struct rdma_restrack_entry res;
};
+
+#if IS_ENABLED(CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS)
+int cma_configfs_init(void);
+void cma_configfs_exit(void);
+#else
+static inline int cma_configfs_init(void)
+{
+ return 0;
+}
+
+static inline void cma_configfs_exit(void)
+{
+}
+#endif
+
+void cma_ref_dev(struct cma_device *dev);
+void cma_deref_dev(struct cma_device *dev);
+typedef bool (*cma_device_filter)(struct ib_device *, void *);
+struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter,
+ void *cookie);
+int cma_get_default_gid_type(struct cma_device *dev, unsigned int port);
+int cma_set_default_gid_type(struct cma_device *dev, unsigned int port,
+ enum ib_gid_type default_gid_type);
+int cma_get_default_roce_tos(struct cma_device *dev, unsigned int port);
+int cma_set_default_roce_tos(struct cma_device *dev, unsigned int port,
+ u8 default_roce_tos);
+struct ib_device *cma_get_ib_dev(struct cma_device *dev);
+
#endif /* _CMA_PRIV_H */
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index bb9007a0cca7..3cd830d52967 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -54,35 +54,6 @@ struct pkey_index_qp_list {
struct list_head qp_list;
};
-#if IS_ENABLED(CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS)
-int cma_configfs_init(void);
-void cma_configfs_exit(void);
-#else
-static inline int cma_configfs_init(void)
-{
- return 0;
-}
-
-static inline void cma_configfs_exit(void)
-{
-}
-#endif
-struct cma_device;
-void cma_ref_dev(struct cma_device *cma_dev);
-void cma_deref_dev(struct cma_device *cma_dev);
-typedef bool (*cma_device_filter)(struct ib_device *, void *);
-struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter,
- void *cookie);
-int cma_get_default_gid_type(struct cma_device *cma_dev,
- unsigned int port);
-int cma_set_default_gid_type(struct cma_device *cma_dev,
- unsigned int port,
- enum ib_gid_type default_gid_type);
-int cma_get_default_roce_tos(struct cma_device *cma_dev, unsigned int port);
-int cma_set_default_roce_tos(struct cma_device *a_dev, unsigned int port,
- u8 default_roce_tos);
-struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev);
-
int ib_device_register_sysfs(struct ib_device *device,
int (*port_callback)(struct ib_device *,
u8, struct kobject *));
@@ -244,10 +215,10 @@ static inline int ib_security_modify_qp(struct ib_qp *qp,
int qp_attr_mask,
struct ib_udata *udata)
{
- return qp->device->modify_qp(qp->real_qp,
- qp_attr,
- qp_attr_mask,
- udata);
+ return qp->device->ops.modify_qp(qp->real_qp,
+ qp_attr,
+ qp_attr_mask,
+ udata);
}
static inline int ib_create_qp_security(struct ib_qp *qp,
@@ -296,6 +267,7 @@ static inline int ib_mad_enforce_security(struct ib_mad_agent_private *map,
#endif
struct ib_device *ib_device_get_by_index(u32 ifindex);
+void ib_device_put(struct ib_device *device);
/* RDMA device netlink */
void nldev_init(void);
void nldev_exit(void);
@@ -308,10 +280,10 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev,
{
struct ib_qp *qp;
- if (!dev->create_qp)
+ if (!dev->ops.create_qp)
return ERR_PTR(-EOPNOTSUPP);
- qp = dev->create_qp(pd, attr, udata);
+ qp = dev->ops.create_qp(pd, attr, udata);
if (IS_ERR(qp))
return qp;
@@ -325,7 +297,10 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev,
*/
if (attr->qp_type < IB_QPT_XRC_INI) {
qp->res.type = RDMA_RESTRACK_QP;
- rdma_restrack_add(&qp->res);
+ if (uobj)
+ rdma_restrack_uadd(&qp->res);
+ else
+ rdma_restrack_kadd(&qp->res);
} else
qp->res.valid = false;
diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c
index b1e5365ddafa..d61e5e1427c2 100644
--- a/drivers/infiniband/core/cq.c
+++ b/drivers/infiniband/core/cq.c
@@ -145,7 +145,7 @@ struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private,
struct ib_cq *cq;
int ret = -ENOMEM;
- cq = dev->create_cq(dev, &cq_attr, NULL, NULL);
+ cq = dev->ops.create_cq(dev, &cq_attr, NULL, NULL);
if (IS_ERR(cq))
return cq;
@@ -162,7 +162,7 @@ struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private,
cq->res.type = RDMA_RESTRACK_CQ;
rdma_restrack_set_task(&cq->res, caller);
- rdma_restrack_add(&cq->res);
+ rdma_restrack_kadd(&cq->res);
switch (cq->poll_ctx) {
case IB_POLL_DIRECT:
@@ -193,7 +193,7 @@ out_free_wc:
kfree(cq->wc);
rdma_restrack_del(&cq->res);
out_destroy_cq:
- cq->device->destroy_cq(cq);
+ cq->device->ops.destroy_cq(cq);
return ERR_PTR(ret);
}
EXPORT_SYMBOL(__ib_alloc_cq);
@@ -225,7 +225,7 @@ void ib_free_cq(struct ib_cq *cq)
kfree(cq->wc);
rdma_restrack_del(&cq->res);
- ret = cq->device->destroy_cq(cq);
+ ret = cq->device->ops.destroy_cq(cq);
WARN_ON_ONCE(ret);
}
EXPORT_SYMBOL(ib_free_cq);
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 87eb4f2cdd7d..47ab34ee1a9d 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -96,7 +96,7 @@ static struct notifier_block ibdev_lsm_nb = {
static int ib_device_check_mandatory(struct ib_device *device)
{
-#define IB_MANDATORY_FUNC(x) { offsetof(struct ib_device, x), #x }
+#define IB_MANDATORY_FUNC(x) { offsetof(struct ib_device_ops, x), #x }
static const struct {
size_t offset;
char *name;
@@ -122,7 +122,8 @@ static int ib_device_check_mandatory(struct ib_device *device)
int i;
for (i = 0; i < ARRAY_SIZE(mandatory_table); ++i) {
- if (!*(void **) ((void *) device + mandatory_table[i].offset)) {
+ if (!*(void **) ((void *) &device->ops +
+ mandatory_table[i].offset)) {
dev_warn(&device->dev,
"Device is missing mandatory function %s\n",
mandatory_table[i].name);
@@ -145,7 +146,8 @@ static struct ib_device *__ib_device_get_by_index(u32 index)
}
/*
- * Caller is responsible to return refrerence count by calling put_device()
+ * Caller must perform ib_device_put() to return the device reference count
+ * when ib_device_get_by_index() returns valid device pointer.
*/
struct ib_device *ib_device_get_by_index(u32 index)
{
@@ -153,13 +155,21 @@ struct ib_device *ib_device_get_by_index(u32 index)
down_read(&lists_rwsem);
device = __ib_device_get_by_index(index);
- if (device)
- get_device(&device->dev);
-
+ if (device) {
+ /* Do not return a device if unregistration has started. */
+ if (!refcount_inc_not_zero(&device->refcount))
+ device = NULL;
+ }
up_read(&lists_rwsem);
return device;
}
+void ib_device_put(struct ib_device *device)
+{
+ if (refcount_dec_and_test(&device->refcount))
+ complete(&device->unreg_completion);
+}
+
static struct ib_device *__ib_device_get_by_name(const char *name)
{
struct ib_device *device;
@@ -293,6 +303,8 @@ struct ib_device *ib_alloc_device(size_t size)
rwlock_init(&device->client_data_lock);
INIT_LIST_HEAD(&device->client_data_list);
INIT_LIST_HEAD(&device->port_list);
+ refcount_set(&device->refcount, 1);
+ init_completion(&device->unreg_completion);
return device;
}
@@ -362,8 +374,8 @@ static int read_port_immutable(struct ib_device *device)
return -ENOMEM;
for (port = start_port; port <= end_port; ++port) {
- ret = device->get_port_immutable(device, port,
- &device->port_immutable[port]);
+ ret = device->ops.get_port_immutable(
+ device, port, &device->port_immutable[port]);
if (ret)
return ret;
@@ -375,8 +387,8 @@ static int read_port_immutable(struct ib_device *device)
void ib_get_device_fw_str(struct ib_device *dev, char *str)
{
- if (dev->get_dev_fw_str)
- dev->get_dev_fw_str(dev, str);
+ if (dev->ops.get_dev_fw_str)
+ dev->ops.get_dev_fw_str(dev, str);
else
str[0] = '\0';
}
@@ -525,7 +537,7 @@ static int setup_device(struct ib_device *device)
}
memset(&device->attrs, 0, sizeof(device->attrs));
- ret = device->query_device(device, &device->attrs, &uhw);
+ ret = device->ops.query_device(device, &device->attrs, &uhw);
if (ret) {
dev_warn(&device->dev,
"Couldn't query the device attributes\n");
@@ -641,6 +653,13 @@ void ib_unregister_device(struct ib_device *device)
struct ib_client_data *context, *tmp;
unsigned long flags;
+ /*
+ * Wait for all netlink command callers to finish working on the
+ * device.
+ */
+ ib_device_put(device);
+ wait_for_completion(&device->unreg_completion);
+
mutex_lock(&device_mutex);
down_write(&lists_rwsem);
@@ -905,14 +924,14 @@ int ib_query_port(struct ib_device *device,
return -EINVAL;
memset(port_attr, 0, sizeof(*port_attr));
- err = device->query_port(device, port_num, port_attr);
+ err = device->ops.query_port(device, port_num, port_attr);
if (err || port_attr->subnet_prefix)
return err;
if (rdma_port_get_link_layer(device, port_num) != IB_LINK_LAYER_INFINIBAND)
return 0;
- err = device->query_gid(device, port_num, 0, &gid);
+ err = device->ops.query_gid(device, port_num, 0, &gid);
if (err)
return err;
@@ -946,8 +965,8 @@ void ib_enum_roce_netdev(struct ib_device *ib_dev,
if (rdma_protocol_roce(ib_dev, port)) {
struct net_device *idev = NULL;
- if (ib_dev->get_netdev)
- idev = ib_dev->get_netdev(ib_dev, port);
+ if (ib_dev->ops.get_netdev)
+ idev = ib_dev->ops.get_netdev(ib_dev, port);
if (idev &&
idev->reg_state >= NETREG_UNREGISTERED) {
@@ -1024,7 +1043,10 @@ int ib_enum_all_devs(nldev_callback nldev_cb, struct sk_buff *skb,
int ib_query_pkey(struct ib_device *device,
u8 port_num, u16 index, u16 *pkey)
{
- return device->query_pkey(device, port_num, index, pkey);
+ if (!rdma_is_port_valid(device, port_num))
+ return -EINVAL;
+
+ return device->ops.query_pkey(device, port_num, index, pkey);
}
EXPORT_SYMBOL(ib_query_pkey);
@@ -1041,11 +1063,11 @@ int ib_modify_device(struct ib_device *device,
int device_modify_mask,
struct ib_device_modify *device_modify)
{
- if (!device->modify_device)
+ if (!device->ops.modify_device)
return -ENOSYS;
- return device->modify_device(device, device_modify_mask,
- device_modify);
+ return device->ops.modify_device(device, device_modify_mask,
+ device_modify);
}
EXPORT_SYMBOL(ib_modify_device);
@@ -1069,9 +1091,10 @@ int ib_modify_port(struct ib_device *device,
if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
- if (device->modify_port)
- rc = device->modify_port(device, port_num, port_modify_mask,
- port_modify);
+ if (device->ops.modify_port)
+ rc = device->ops.modify_port(device, port_num,
+ port_modify_mask,
+ port_modify);
else
rc = rdma_protocol_roce(device, port_num) ? 0 : -ENOSYS;
return rc;
@@ -1198,6 +1221,105 @@ struct net_device *ib_get_net_dev_by_params(struct ib_device *dev,
}
EXPORT_SYMBOL(ib_get_net_dev_by_params);
+void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
+{
+ struct ib_device_ops *dev_ops = &dev->ops;
+#define SET_DEVICE_OP(ptr, name) \
+ do { \
+ if (ops->name) \
+ if (!((ptr)->name)) \
+ (ptr)->name = ops->name; \
+ } while (0)
+
+ SET_DEVICE_OP(dev_ops, add_gid);
+ SET_DEVICE_OP(dev_ops, alloc_dm);
+ SET_DEVICE_OP(dev_ops, alloc_fmr);
+ SET_DEVICE_OP(dev_ops, alloc_hw_stats);
+ SET_DEVICE_OP(dev_ops, alloc_mr);
+ SET_DEVICE_OP(dev_ops, alloc_mw);
+ SET_DEVICE_OP(dev_ops, alloc_pd);
+ SET_DEVICE_OP(dev_ops, alloc_rdma_netdev);
+ SET_DEVICE_OP(dev_ops, alloc_ucontext);
+ SET_DEVICE_OP(dev_ops, alloc_xrcd);
+ SET_DEVICE_OP(dev_ops, attach_mcast);
+ SET_DEVICE_OP(dev_ops, check_mr_status);
+ SET_DEVICE_OP(dev_ops, create_ah);
+ SET_DEVICE_OP(dev_ops, create_counters);
+ SET_DEVICE_OP(dev_ops, create_cq);
+ SET_DEVICE_OP(dev_ops, create_flow);
+ SET_DEVICE_OP(dev_ops, create_flow_action_esp);
+ SET_DEVICE_OP(dev_ops, create_qp);
+ SET_DEVICE_OP(dev_ops, create_rwq_ind_table);
+ SET_DEVICE_OP(dev_ops, create_srq);
+ SET_DEVICE_OP(dev_ops, create_wq);
+ SET_DEVICE_OP(dev_ops, dealloc_dm);
+ SET_DEVICE_OP(dev_ops, dealloc_fmr);
+ SET_DEVICE_OP(dev_ops, dealloc_mw);
+ SET_DEVICE_OP(dev_ops, dealloc_pd);
+ SET_DEVICE_OP(dev_ops, dealloc_ucontext);
+ SET_DEVICE_OP(dev_ops, dealloc_xrcd);
+ SET_DEVICE_OP(dev_ops, del_gid);
+ SET_DEVICE_OP(dev_ops, dereg_mr);
+ SET_DEVICE_OP(dev_ops, destroy_ah);
+ SET_DEVICE_OP(dev_ops, destroy_counters);
+ SET_DEVICE_OP(dev_ops, destroy_cq);
+ SET_DEVICE_OP(dev_ops, destroy_flow);
+ SET_DEVICE_OP(dev_ops, destroy_flow_action);
+ SET_DEVICE_OP(dev_ops, destroy_qp);
+ SET_DEVICE_OP(dev_ops, destroy_rwq_ind_table);
+ SET_DEVICE_OP(dev_ops, destroy_srq);
+ SET_DEVICE_OP(dev_ops, destroy_wq);
+ SET_DEVICE_OP(dev_ops, detach_mcast);
+ SET_DEVICE_OP(dev_ops, disassociate_ucontext);
+ SET_DEVICE_OP(dev_ops, drain_rq);
+ SET_DEVICE_OP(dev_ops, drain_sq);
+ SET_DEVICE_OP(dev_ops, get_dev_fw_str);
+ SET_DEVICE_OP(dev_ops, get_dma_mr);
+ SET_DEVICE_OP(dev_ops, get_hw_stats);
+ SET_DEVICE_OP(dev_ops, get_link_layer);
+ SET_DEVICE_OP(dev_ops, get_netdev);
+ SET_DEVICE_OP(dev_ops, get_port_immutable);
+ SET_DEVICE_OP(dev_ops, get_vector_affinity);
+ SET_DEVICE_OP(dev_ops, get_vf_config);
+ SET_DEVICE_OP(dev_ops, get_vf_stats);
+ SET_DEVICE_OP(dev_ops, map_mr_sg);
+ SET_DEVICE_OP(dev_ops, map_phys_fmr);
+ SET_DEVICE_OP(dev_ops, mmap);
+ SET_DEVICE_OP(dev_ops, modify_ah);
+ SET_DEVICE_OP(dev_ops, modify_cq);
+ SET_DEVICE_OP(dev_ops, modify_device);
+ SET_DEVICE_OP(dev_ops, modify_flow_action_esp);
+ SET_DEVICE_OP(dev_ops, modify_port);
+ SET_DEVICE_OP(dev_ops, modify_qp);
+ SET_DEVICE_OP(dev_ops, modify_srq);
+ SET_DEVICE_OP(dev_ops, modify_wq);
+ SET_DEVICE_OP(dev_ops, peek_cq);
+ SET_DEVICE_OP(dev_ops, poll_cq);
+ SET_DEVICE_OP(dev_ops, post_recv);
+ SET_DEVICE_OP(dev_ops, post_send);
+ SET_DEVICE_OP(dev_ops, post_srq_recv);
+ SET_DEVICE_OP(dev_ops, process_mad);
+ SET_DEVICE_OP(dev_ops, query_ah);
+ SET_DEVICE_OP(dev_ops, query_device);
+ SET_DEVICE_OP(dev_ops, query_gid);
+ SET_DEVICE_OP(dev_ops, query_pkey);
+ SET_DEVICE_OP(dev_ops, query_port);
+ SET_DEVICE_OP(dev_ops, query_qp);
+ SET_DEVICE_OP(dev_ops, query_srq);
+ SET_DEVICE_OP(dev_ops, rdma_netdev_get_params);
+ SET_DEVICE_OP(dev_ops, read_counters);
+ SET_DEVICE_OP(dev_ops, reg_dm_mr);
+ SET_DEVICE_OP(dev_ops, reg_user_mr);
+ SET_DEVICE_OP(dev_ops, req_ncomp_notif);
+ SET_DEVICE_OP(dev_ops, req_notify_cq);
+ SET_DEVICE_OP(dev_ops, rereg_user_mr);
+ SET_DEVICE_OP(dev_ops, resize_cq);
+ SET_DEVICE_OP(dev_ops, set_vf_guid);
+ SET_DEVICE_OP(dev_ops, set_vf_link_state);
+ SET_DEVICE_OP(dev_ops, unmap_fmr);
+}
+EXPORT_SYMBOL(ib_set_device_ops);
+
static const struct rdma_nl_cbs ibnl_ls_cb_table[RDMA_NL_LS_NUM_OPS] = {
[RDMA_NL_LS_OP_RESOLVE] = {
.doit = ib_nl_handle_resolve_resp,
diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c
index 83ba0068e8bb..7d841b689a1e 100644
--- a/drivers/infiniband/core/fmr_pool.c
+++ b/drivers/infiniband/core/fmr_pool.c
@@ -211,8 +211,8 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
return ERR_PTR(-EINVAL);
device = pd->device;
- if (!device->alloc_fmr || !device->dealloc_fmr ||
- !device->map_phys_fmr || !device->unmap_fmr) {
+ if (!device->ops.alloc_fmr || !device->ops.dealloc_fmr ||
+ !device->ops.map_phys_fmr || !device->ops.unmap_fmr) {
dev_info(&device->dev, "Device does not support FMRs\n");
return ERR_PTR(-ENOSYS);
}
@@ -474,7 +474,7 @@ EXPORT_SYMBOL(ib_fmr_pool_map_phys);
* Unmap an FMR. The FMR mapping may remain valid until the FMR is
* reused (or until ib_flush_fmr_pool() is called).
*/
-int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr)
+void ib_fmr_pool_unmap(struct ib_pool_fmr *fmr)
{
struct ib_fmr_pool *pool;
unsigned long flags;
@@ -503,7 +503,5 @@ int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr)
#endif
spin_unlock_irqrestore(&pool->pool_lock, flags);
-
- return 0;
}
EXPORT_SYMBOL(ib_fmr_pool_unmap);
diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
index ba668d49c751..476abc74178e 100644
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c
@@ -502,17 +502,21 @@ static void iw_cm_check_wildcard(struct sockaddr_storage *pm_addr,
*/
static int iw_cm_map(struct iw_cm_id *cm_id, bool active)
{
+ const char *devname = dev_name(&cm_id->device->dev);
+ const char *ifname = cm_id->device->iwcm->ifname;
struct iwpm_dev_data pm_reg_msg;
struct iwpm_sa_data pm_msg;
int status;
+ if (strlen(devname) >= sizeof(pm_reg_msg.dev_name) ||
+ strlen(ifname) >= sizeof(pm_reg_msg.if_name))
+ return -EINVAL;
+
cm_id->m_local_addr = cm_id->local_addr;
cm_id->m_remote_addr = cm_id->remote_addr;
- memcpy(pm_reg_msg.dev_name, dev_name(&cm_id->device->dev),
- sizeof(pm_reg_msg.dev_name));
- memcpy(pm_reg_msg.if_name, cm_id->device->iwcm->ifname,
- sizeof(pm_reg_msg.if_name));
+ strncpy(pm_reg_msg.dev_name, devname, sizeof(pm_reg_msg.dev_name));
+ strncpy(pm_reg_msg.if_name, ifname, sizeof(pm_reg_msg.if_name));
if (iwpm_register_pid(&pm_reg_msg, RDMA_NL_IWCM) ||
!iwpm_valid_pid())
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index d7025cd5be28..7870823bac47 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -888,10 +888,10 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
}
/* No GRH for DR SMP */
- ret = device->process_mad(device, 0, port_num, &mad_wc, NULL,
- (const struct ib_mad_hdr *)smp, mad_size,
- (struct ib_mad_hdr *)mad_priv->mad,
- &mad_size, &out_mad_pkey_index);
+ ret = device->ops.process_mad(device, 0, port_num, &mad_wc, NULL,
+ (const struct ib_mad_hdr *)smp, mad_size,
+ (struct ib_mad_hdr *)mad_priv->mad,
+ &mad_size, &out_mad_pkey_index);
switch (ret)
{
case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY:
@@ -2305,14 +2305,12 @@ static void ib_mad_recv_done(struct ib_cq *cq, struct ib_wc *wc)
}
/* Give driver "right of first refusal" on incoming MAD */
- if (port_priv->device->process_mad) {
- ret = port_priv->device->process_mad(port_priv->device, 0,
- port_priv->port_num,
- wc, &recv->grh,
- (const struct ib_mad_hdr *)recv->mad,
- recv->mad_size,
- (struct ib_mad_hdr *)response->mad,
- &mad_size, &resp_mad_pkey_index);
+ if (port_priv->device->ops.process_mad) {
+ ret = port_priv->device->ops.process_mad(
+ port_priv->device, 0, port_priv->port_num, wc,
+ &recv->grh, (const struct ib_mad_hdr *)recv->mad,
+ recv->mad_size, (struct ib_mad_hdr *)response->mad,
+ &mad_size, &resp_mad_pkey_index);
if (opa)
wc->pkey_index = resp_mad_pkey_index;
diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c
index e5cf09c66fe6..5ec57abc0849 100644
--- a/drivers/infiniband/core/mad_rmpp.c
+++ b/drivers/infiniband/core/mad_rmpp.c
@@ -81,7 +81,7 @@ static void destroy_rmpp_recv(struct mad_rmpp_recv *rmpp_recv)
{
deref_rmpp_recv(rmpp_recv);
wait_for_completion(&rmpp_recv->comp);
- rdma_destroy_ah(rmpp_recv->ah);
+ rdma_destroy_ah(rmpp_recv->ah, RDMA_DESTROY_AH_SLEEPABLE);
kfree(rmpp_recv);
}
@@ -171,7 +171,7 @@ static struct ib_mad_send_buf *alloc_response_msg(struct ib_mad_agent *agent,
hdr_len, 0, GFP_KERNEL,
IB_MGMT_BASE_VERSION);
if (IS_ERR(msg))
- rdma_destroy_ah(ah);
+ rdma_destroy_ah(ah, RDMA_DESTROY_AH_SLEEPABLE);
else {
msg->ah = ah;
msg->context[0] = ah;
@@ -201,7 +201,7 @@ static void ack_ds_ack(struct ib_mad_agent_private *agent,
ret = ib_post_send_mad(msg, NULL);
if (ret) {
- rdma_destroy_ah(msg->ah);
+ rdma_destroy_ah(msg->ah, RDMA_DESTROY_AH_SLEEPABLE);
ib_free_send_mad(msg);
}
}
@@ -209,7 +209,8 @@ static void ack_ds_ack(struct ib_mad_agent_private *agent,
void ib_rmpp_send_handler(struct ib_mad_send_wc *mad_send_wc)
{
if (mad_send_wc->send_buf->context[0] == mad_send_wc->send_buf->ah)
- rdma_destroy_ah(mad_send_wc->send_buf->ah);
+ rdma_destroy_ah(mad_send_wc->send_buf->ah,
+ RDMA_DESTROY_AH_SLEEPABLE);
ib_free_send_mad(mad_send_wc->send_buf);
}
@@ -237,7 +238,7 @@ static void nack_recv(struct ib_mad_agent_private *agent,
ret = ib_post_send_mad(msg, NULL);
if (ret) {
- rdma_destroy_ah(msg->ah);
+ rdma_destroy_ah(msg->ah, RDMA_DESTROY_AH_SLEEPABLE);
ib_free_send_mad(msg);
}
}
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index 573399e3ccc1..e600fc23ae62 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -227,6 +227,7 @@ static int fill_port_info(struct sk_buff *msg,
struct net_device *netdev = NULL;
struct ib_port_attr attr;
int ret;
+ u64 cap_flags = 0;
if (fill_nldev_handle(msg, device))
return -EMSGSIZE;
@@ -239,10 +240,12 @@ static int fill_port_info(struct sk_buff *msg,
return ret;
if (rdma_protocol_ib(device, port)) {
- BUILD_BUG_ON(sizeof(attr.port_cap_flags) > sizeof(u64));
+ BUILD_BUG_ON((sizeof(attr.port_cap_flags) +
+ sizeof(attr.port_cap_flags2)) > sizeof(u64));
+ cap_flags = attr.port_cap_flags |
+ ((u64)attr.port_cap_flags2 << 32);
if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
- (u64)attr.port_cap_flags,
- RDMA_NLDEV_ATTR_PAD))
+ cap_flags, RDMA_NLDEV_ATTR_PAD))
return -EMSGSIZE;
if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SUBNET_PREFIX,
attr.subnet_prefix, RDMA_NLDEV_ATTR_PAD))
@@ -259,8 +262,8 @@ static int fill_port_info(struct sk_buff *msg,
if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_PHYS_STATE, attr.phys_state))
return -EMSGSIZE;
- if (device->get_netdev)
- netdev = device->get_netdev(device, port);
+ if (device->ops.get_netdev)
+ netdev = device->ops.get_netdev(device, port);
if (netdev && net_eq(dev_net(netdev), net)) {
ret = nla_put_u32(msg,
@@ -308,6 +311,7 @@ static int fill_res_info(struct sk_buff *msg, struct ib_device *device)
[RDMA_RESTRACK_QP] = "qp",
[RDMA_RESTRACK_CM_ID] = "cm_id",
[RDMA_RESTRACK_MR] = "mr",
+ [RDMA_RESTRACK_CTX] = "ctx",
};
struct rdma_restrack_root *res = &device->res;
@@ -636,13 +640,13 @@ static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
nlmsg_end(msg, nlh);
- put_device(&device->dev);
+ ib_device_put(device);
return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
err_free:
nlmsg_free(msg);
err:
- put_device(&device->dev);
+ ib_device_put(device);
return err;
}
@@ -672,7 +676,7 @@ static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
err = ib_device_rename(device, name);
}
- put_device(&device->dev);
+ ib_device_put(device);
return err;
}
@@ -756,14 +760,14 @@ static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
goto err_free;
nlmsg_end(msg, nlh);
- put_device(&device->dev);
+ ib_device_put(device);
return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
err_free:
nlmsg_free(msg);
err:
- put_device(&device->dev);
+ ib_device_put(device);
return err;
}
@@ -820,7 +824,7 @@ static int nldev_port_get_dumpit(struct sk_buff *skb,
}
out:
- put_device(&device->dev);
+ ib_device_put(device);
cb->args[0] = idx;
return skb->len;
}
@@ -859,13 +863,13 @@ static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
goto err_free;
nlmsg_end(msg, nlh);
- put_device(&device->dev);
+ ib_device_put(device);
return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
err_free:
nlmsg_free(msg);
err:
- put_device(&device->dev);
+ ib_device_put(device);
return ret;
}
@@ -1058,7 +1062,7 @@ next: idx++;
if (!filled)
goto err;
- put_device(&device->dev);
+ ib_device_put(device);
return skb->len;
res_err:
@@ -1069,7 +1073,7 @@ err:
nlmsg_cancel(skb, nlh);
err_index:
- put_device(&device->dev);
+ ib_device_put(device);
return ret;
}
diff --git a/drivers/infiniband/core/opa_smi.h b/drivers/infiniband/core/opa_smi.h
index 3bfab3505a29..af4879bdf3d6 100644
--- a/drivers/infiniband/core/opa_smi.h
+++ b/drivers/infiniband/core/opa_smi.h
@@ -55,7 +55,7 @@ static inline enum smi_action opa_smi_check_local_smp(struct opa_smp *smp,
{
/* C14-9:3 -- We're at the end of the DR segment of path */
/* C14-9:4 -- Hop Pointer = Hop Count + 1 -> give to SMA/SM */
- return (device->process_mad &&
+ return (device->ops.process_mad &&
!opa_get_smp_direction(smp) &&
(smp->hop_ptr == smp->hop_cnt + 1)) ?
IB_SMI_HANDLE : IB_SMI_DISCARD;
@@ -70,7 +70,7 @@ static inline enum smi_action opa_smi_check_local_returning_smp(struct opa_smp *
{
/* C14-13:3 -- We're at the end of the DR segment of path */
/* C14-13:4 -- Hop Pointer == 0 -> give to SM */
- return (device->process_mad &&
+ return (device->ops.process_mad &&
opa_get_smp_direction(smp) &&
!smp->hop_ptr) ? IB_SMI_HANDLE : IB_SMI_DISCARD;
}
diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
index 752a55c6bdce..6c4747e61d2b 100644
--- a/drivers/infiniband/core/rdma_core.c
+++ b/drivers/infiniband/core/rdma_core.c
@@ -224,12 +224,14 @@ out_unlock:
* uverbs_put_destroy.
*/
struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj,
- u32 id, struct ib_uverbs_file *ufile)
+ u32 id,
+ const struct uverbs_attr_bundle *attrs)
{
struct ib_uobject *uobj;
int ret;
- uobj = rdma_lookup_get_uobject(obj, ufile, id, UVERBS_LOOKUP_DESTROY);
+ uobj = rdma_lookup_get_uobject(obj, attrs->ufile, id,
+ UVERBS_LOOKUP_DESTROY);
if (IS_ERR(uobj))
return uobj;
@@ -243,21 +245,20 @@ struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj,
}
/*
- * Does both uobj_get_destroy() and uobj_put_destroy(). Returns success_res
- * on success (negative errno on failure). For use by callers that do not need
- * the uobj.
+ * Does both uobj_get_destroy() and uobj_put_destroy(). Returns 0 on success
+ * (negative errno on failure). For use by callers that do not need the uobj.
*/
int __uobj_perform_destroy(const struct uverbs_api_object *obj, u32 id,
- struct ib_uverbs_file *ufile, int success_res)
+ const struct uverbs_attr_bundle *attrs)
{
struct ib_uobject *uobj;
- uobj = __uobj_get_destroy(obj, id, ufile);
+ uobj = __uobj_get_destroy(obj, id, attrs);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE);
- return success_res;
+ return 0;
}
/* alloc_uobj must be undone by uverbs_destroy_uobject() */
@@ -267,7 +268,7 @@ static struct ib_uobject *alloc_uobj(struct ib_uverbs_file *ufile,
struct ib_uobject *uobj;
struct ib_ucontext *ucontext;
- ucontext = ib_uverbs_get_ucontext(ufile);
+ ucontext = ib_uverbs_get_ucontext_file(ufile);
if (IS_ERR(ucontext))
return ERR_CAST(ucontext);
@@ -397,16 +398,23 @@ struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_api_object *obj,
struct ib_uobject *uobj;
int ret;
- if (!obj)
- return ERR_PTR(-EINVAL);
+ if (IS_ERR(obj) && PTR_ERR(obj) == -ENOMSG) {
+ /* must be UVERBS_IDR_ANY_OBJECT, see uapi_get_object() */
+ uobj = lookup_get_idr_uobject(NULL, ufile, id, mode);
+ if (IS_ERR(uobj))
+ return uobj;
+ } else {
+ if (IS_ERR(obj))
+ return ERR_PTR(-EINVAL);
- uobj = obj->type_class->lookup_get(obj, ufile, id, mode);
- if (IS_ERR(uobj))
- return uobj;
+ uobj = obj->type_class->lookup_get(obj, ufile, id, mode);
+ if (IS_ERR(uobj))
+ return uobj;
- if (uobj->uapi_object != obj) {
- ret = -EINVAL;
- goto free;
+ if (uobj->uapi_object != obj) {
+ ret = -EINVAL;
+ goto free;
+ }
}
/*
@@ -426,7 +434,7 @@ struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_api_object *obj,
return uobj;
free:
- obj->type_class->lookup_put(uobj, mode);
+ uobj->uapi_object->type_class->lookup_put(uobj, mode);
uverbs_uobject_put(uobj);
return ERR_PTR(ret);
}
@@ -490,7 +498,7 @@ struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_api_object *obj,
{
struct ib_uobject *ret;
- if (!obj)
+ if (IS_ERR(obj))
return ERR_PTR(-EINVAL);
/*
@@ -812,18 +820,20 @@ static void ufile_destroy_ucontext(struct ib_uverbs_file *ufile,
*/
if (reason == RDMA_REMOVE_DRIVER_REMOVE) {
uverbs_user_mmap_disassociate(ufile);
- if (ib_dev->disassociate_ucontext)
- ib_dev->disassociate_ucontext(ucontext);
+ if (ib_dev->ops.disassociate_ucontext)
+ ib_dev->ops.disassociate_ucontext(ucontext);
}
ib_rdmacg_uncharge(&ucontext->cg_obj, ib_dev,
RDMACG_RESOURCE_HCA_HANDLE);
+ rdma_restrack_del(&ucontext->res);
+
/*
* FIXME: Drivers are not permitted to fail dealloc_ucontext, remove
* the error return.
*/
- ret = ib_dev->dealloc_ucontext(ucontext);
+ ret = ib_dev->ops.dealloc_ucontext(ucontext);
WARN_ON(ret);
ufile->ucontext = NULL;
diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h
index 4886d2bba7c7..be6b8e1257d0 100644
--- a/drivers/infiniband/core/rdma_core.h
+++ b/drivers/infiniband/core/rdma_core.h
@@ -118,43 +118,67 @@ void release_ufile_idr_uobject(struct ib_uverbs_file *ufile);
* Depending on ID the slot pointer in the radix tree points at one of these
* structs.
*/
-struct uverbs_api_object {
- const struct uverbs_obj_type *type_attrs;
- const struct uverbs_obj_type_class *type_class;
-};
struct uverbs_api_ioctl_method {
- int (__rcu *handler)(struct ib_uverbs_file *ufile,
- struct uverbs_attr_bundle *ctx);
+ int(__rcu *handler)(struct uverbs_attr_bundle *attrs);
DECLARE_BITMAP(attr_mandatory, UVERBS_API_ATTR_BKEY_LEN);
u16 bundle_size;
u8 use_stack:1;
u8 driver_method:1;
+ u8 disabled:1;
+ u8 has_udata:1;
u8 key_bitmap_len;
u8 destroy_bkey;
};
+struct uverbs_api_write_method {
+ int (*handler)(struct uverbs_attr_bundle *attrs);
+ u8 disabled:1;
+ u8 is_ex:1;
+ u8 has_udata:1;
+ u8 has_resp:1;
+ u8 req_size;
+ u8 resp_size;
+};
+
struct uverbs_api_attr {
struct uverbs_attr_spec spec;
};
-struct uverbs_api_object;
struct uverbs_api {
/* radix tree contains struct uverbs_api_* pointers */
struct radix_tree_root radix;
enum rdma_driver_id driver_id;
+
+ unsigned int num_write;
+ unsigned int num_write_ex;
+ struct uverbs_api_write_method notsupp_method;
+ const struct uverbs_api_write_method **write_methods;
+ const struct uverbs_api_write_method **write_ex_methods;
};
+/*
+ * Get an uverbs_api_object that corresponds to the given object_id.
+ * Note:
+ * -ENOMSG means that any object is allowed to match during lookup.
+ */
static inline const struct uverbs_api_object *
uapi_get_object(struct uverbs_api *uapi, u16 object_id)
{
- return radix_tree_lookup(&uapi->radix, uapi_key_obj(object_id));
+ const struct uverbs_api_object *res;
+
+ if (object_id == UVERBS_IDR_ANY_OBJECT)
+ return ERR_PTR(-ENOMSG);
+
+ res = radix_tree_lookup(&uapi->radix, uapi_key_obj(object_id));
+ if (!res)
+ return ERR_PTR(-ENOENT);
+
+ return res;
}
char *uapi_key_format(char *S, unsigned int key);
-struct uverbs_api *uverbs_alloc_api(
- const struct uverbs_object_tree_def *const *driver_specs,
- enum rdma_driver_id driver_id);
+struct uverbs_api *uverbs_alloc_api(struct ib_device *ibdev);
void uverbs_disassociate_api_pre(struct ib_uverbs_device *uverbs_dev);
void uverbs_disassociate_api(struct uverbs_api *uapi);
void uverbs_destroy_api(struct uverbs_api *uapi);
@@ -162,4 +186,37 @@ void uapi_compute_bundle_size(struct uverbs_api_ioctl_method *method_elm,
unsigned int num_attrs);
void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile);
+extern const struct uapi_definition uverbs_def_obj_counters[];
+extern const struct uapi_definition uverbs_def_obj_cq[];
+extern const struct uapi_definition uverbs_def_obj_device[];
+extern const struct uapi_definition uverbs_def_obj_dm[];
+extern const struct uapi_definition uverbs_def_obj_flow_action[];
+extern const struct uapi_definition uverbs_def_obj_intf[];
+extern const struct uapi_definition uverbs_def_obj_mr[];
+extern const struct uapi_definition uverbs_def_write_intf[];
+
+static inline const struct uverbs_api_write_method *
+uapi_get_method(const struct uverbs_api *uapi, u32 command)
+{
+ u32 cmd_idx = command & IB_USER_VERBS_CMD_COMMAND_MASK;
+
+ if (command & ~(u32)(IB_USER_VERBS_CMD_FLAG_EXTENDED |
+ IB_USER_VERBS_CMD_COMMAND_MASK))
+ return ERR_PTR(-EINVAL);
+
+ if (command & IB_USER_VERBS_CMD_FLAG_EXTENDED) {
+ if (cmd_idx >= uapi->num_write_ex)
+ return ERR_PTR(-EOPNOTSUPP);
+ return uapi->write_ex_methods[cmd_idx];
+ }
+
+ if (cmd_idx >= uapi->num_write)
+ return ERR_PTR(-EOPNOTSUPP);
+ return uapi->write_methods[cmd_idx];
+}
+
+void uverbs_fill_udata(struct uverbs_attr_bundle *bundle,
+ struct ib_udata *udata, unsigned int attr_in,
+ unsigned int attr_out);
+
#endif /* RDMA_CORE_H */
diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c
index 06d8657ce583..46a5c553c624 100644
--- a/drivers/infiniband/core/restrack.c
+++ b/drivers/infiniband/core/restrack.c
@@ -32,6 +32,7 @@ static const char *type2str(enum rdma_restrack_type type)
[RDMA_RESTRACK_QP] = "QP",
[RDMA_RESTRACK_CM_ID] = "CM_ID",
[RDMA_RESTRACK_MR] = "MR",
+ [RDMA_RESTRACK_CTX] = "CTX",
};
return names[type];
@@ -130,31 +131,14 @@ static struct ib_device *res_to_dev(struct rdma_restrack_entry *res)
res)->id.device;
case RDMA_RESTRACK_MR:
return container_of(res, struct ib_mr, res)->device;
+ case RDMA_RESTRACK_CTX:
+ return container_of(res, struct ib_ucontext, res)->device;
default:
WARN_ONCE(true, "Wrong resource tracking type %u\n", res->type);
return NULL;
}
}
-static bool res_is_user(struct rdma_restrack_entry *res)
-{
- switch (res->type) {
- case RDMA_RESTRACK_PD:
- return container_of(res, struct ib_pd, res)->uobject;
- case RDMA_RESTRACK_CQ:
- return container_of(res, struct ib_cq, res)->uobject;
- case RDMA_RESTRACK_QP:
- return container_of(res, struct ib_qp, res)->uobject;
- case RDMA_RESTRACK_CM_ID:
- return !res->kern_name;
- case RDMA_RESTRACK_MR:
- return container_of(res, struct ib_mr, res)->pd->uobject;
- default:
- WARN_ONCE(true, "Wrong resource tracking type %u\n", res->type);
- return false;
- }
-}
-
void rdma_restrack_set_task(struct rdma_restrack_entry *res,
const char *caller)
{
@@ -170,17 +154,17 @@ void rdma_restrack_set_task(struct rdma_restrack_entry *res,
}
EXPORT_SYMBOL(rdma_restrack_set_task);
-void rdma_restrack_add(struct rdma_restrack_entry *res)
+static void rdma_restrack_add(struct rdma_restrack_entry *res)
{
struct ib_device *dev = res_to_dev(res);
if (!dev)
return;
- if (res->type != RDMA_RESTRACK_CM_ID || !res_is_user(res))
+ if (res->type != RDMA_RESTRACK_CM_ID || rdma_is_kernel_res(res))
res->task = NULL;
- if (res_is_user(res)) {
+ if (!rdma_is_kernel_res(res)) {
if (!res->task)
rdma_restrack_set_task(res, NULL);
res->kern_name = NULL;
@@ -196,7 +180,28 @@ void rdma_restrack_add(struct rdma_restrack_entry *res)
hash_add(dev->res.hash, &res->node, res->type);
up_write(&dev->res.rwsem);
}
-EXPORT_SYMBOL(rdma_restrack_add);
+
+/**
+ * rdma_restrack_kadd() - add kernel object to the reource tracking database
+ * @res: resource entry
+ */
+void rdma_restrack_kadd(struct rdma_restrack_entry *res)
+{
+ res->user = false;
+ rdma_restrack_add(res);
+}
+EXPORT_SYMBOL(rdma_restrack_kadd);
+
+/**
+ * rdma_restrack_uadd() - add user object to the reource tracking database
+ * @res: resource entry
+ */
+void rdma_restrack_uadd(struct rdma_restrack_entry *res)
+{
+ res->user = true;
+ rdma_restrack_add(res);
+}
+EXPORT_SYMBOL(rdma_restrack_uadd);
int __must_check rdma_restrack_get(struct rdma_restrack_entry *res)
{
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index be5ba5e15496..97e6d7b69abf 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -1147,7 +1147,7 @@ static void free_sm_ah(struct kref *kref)
{
struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref);
- rdma_destroy_ah(sm_ah->ah);
+ rdma_destroy_ah(sm_ah->ah, 0);
kfree(sm_ah);
}
@@ -2276,7 +2276,8 @@ static void update_sm_ah(struct work_struct *work)
cpu_to_be64(IB_SA_WELL_KNOWN_GUID));
}
- new_ah->ah = rdma_create_ah(port->agent->qp->pd, &ah_attr);
+ new_ah->ah = rdma_create_ah(port->agent->qp->pd, &ah_attr,
+ RDMA_CREATE_AH_SLEEPABLE);
if (IS_ERR(new_ah->ah)) {
pr_warn("Couldn't create new SM AH\n");
kfree(new_ah);
diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c
index 1143c0448666..1efadbccf394 100644
--- a/drivers/infiniband/core/security.c
+++ b/drivers/infiniband/core/security.c
@@ -626,10 +626,10 @@ int ib_security_modify_qp(struct ib_qp *qp,
}
if (!ret)
- ret = real_qp->device->modify_qp(real_qp,
- qp_attr,
- qp_attr_mask,
- udata);
+ ret = real_qp->device->ops.modify_qp(real_qp,
+ qp_attr,
+ qp_attr_mask,
+ udata);
if (new_pps) {
/* Clean up the lists and free the appropriate
diff --git a/drivers/infiniband/core/smi.h b/drivers/infiniband/core/smi.h
index 33c91c8a16e9..91d9b353ab85 100644
--- a/drivers/infiniband/core/smi.h
+++ b/drivers/infiniband/core/smi.h
@@ -67,7 +67,7 @@ static inline enum smi_action smi_check_local_smp(struct ib_smp *smp,
{
/* C14-9:3 -- We're at the end of the DR segment of path */
/* C14-9:4 -- Hop Pointer = Hop Count + 1 -> give to SMA/SM */
- return ((device->process_mad &&
+ return ((device->ops.process_mad &&
!ib_get_smp_direction(smp) &&
(smp->hop_ptr == smp->hop_cnt + 1)) ?
IB_SMI_HANDLE : IB_SMI_DISCARD);
@@ -82,7 +82,7 @@ static inline enum smi_action smi_check_local_returning_smp(struct ib_smp *smp,
{
/* C14-13:3 -- We're at the end of the DR segment of path */
/* C14-13:4 -- Hop Pointer == 0 -> give to SM */
- return ((device->process_mad &&
+ return ((device->ops.process_mad &&
ib_get_smp_direction(smp) &&
!smp->hop_ptr) ? IB_SMI_HANDLE : IB_SMI_DISCARD);
}
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 6fcce2c206c6..80f68eb0ba5c 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -462,7 +462,7 @@ static int get_perf_mad(struct ib_device *dev, int port_num, __be16 attr,
u16 out_mad_pkey_index = 0;
ssize_t ret;
- if (!dev->process_mad)
+ if (!dev->ops.process_mad)
return -ENOSYS;
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
@@ -481,11 +481,11 @@ static int get_perf_mad(struct ib_device *dev, int port_num, __be16 attr,
if (attr != IB_PMA_CLASS_PORT_INFO)
in_mad->data[41] = port_num; /* PortSelect field */
- if ((dev->process_mad(dev, IB_MAD_IGNORE_MKEY,
- port_num, NULL, NULL,
- (const struct ib_mad_hdr *)in_mad, mad_size,
- (struct ib_mad_hdr *)out_mad, &mad_size,
- &out_mad_pkey_index) &
+ if ((dev->ops.process_mad(dev, IB_MAD_IGNORE_MKEY,
+ port_num, NULL, NULL,
+ (const struct ib_mad_hdr *)in_mad, mad_size,
+ (struct ib_mad_hdr *)out_mad, &mad_size,
+ &out_mad_pkey_index) &
(IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) !=
(IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) {
ret = -EINVAL;
@@ -786,7 +786,7 @@ static int update_hw_stats(struct ib_device *dev, struct rdma_hw_stats *stats,
if (time_is_after_eq_jiffies(stats->timestamp + stats->lifespan))
return 0;
- ret = dev->get_hw_stats(dev, stats, port_num, index);
+ ret = dev->ops.get_hw_stats(dev, stats, port_num, index);
if (ret < 0)
return ret;
if (ret == stats->num_counters)
@@ -946,7 +946,7 @@ static void setup_hw_stats(struct ib_device *device, struct ib_port *port,
struct rdma_hw_stats *stats;
int i, ret;
- stats = device->alloc_hw_stats(device, port_num);
+ stats = device->ops.alloc_hw_stats(device, port_num);
if (!stats)
return;
@@ -964,8 +964,8 @@ static void setup_hw_stats(struct ib_device *device, struct ib_port *port,
if (!hsag)
goto err_free_stats;
- ret = device->get_hw_stats(device, stats, port_num,
- stats->num_counters);
+ ret = device->ops.get_hw_stats(device, stats, port_num,
+ stats->num_counters);
if (ret != stats->num_counters)
goto err_free_hsag;
@@ -1057,7 +1057,7 @@ static int add_port(struct ib_device *device, int port_num,
goto err_put;
}
- if (device->process_mad) {
+ if (device->ops.process_mad) {
p->pma_table = get_counter_table(device, port_num);
ret = sysfs_create_group(&p->kobj, p->pma_table);
if (ret)
@@ -1124,7 +1124,7 @@ static int add_port(struct ib_device *device, int port_num,
* port, so holder should be device. Therefore skip per port conunter
* initialization.
*/
- if (device->alloc_hw_stats && port_num)
+ if (device->ops.alloc_hw_stats && port_num)
setup_hw_stats(device, p, port_num);
list_add_tail(&p->kobj.entry, &device->port_list);
@@ -1245,7 +1245,7 @@ static ssize_t node_desc_store(struct device *device,
struct ib_device_modify desc = {};
int ret;
- if (!dev->modify_device)
+ if (!dev->ops.modify_device)
return -EIO;
memcpy(desc.node_desc, buf, min_t(int, count, IB_DEVICE_NODE_DESC_MAX));
@@ -1341,7 +1341,7 @@ int ib_device_register_sysfs(struct ib_device *device,
}
}
- if (device->alloc_hw_stats)
+ if (device->ops.alloc_hw_stats)
setup_hw_stats(device, NULL, 0);
return 0;
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index 73332b9a25b5..7541fbaf58a3 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -1242,7 +1242,7 @@ static void ib_ucm_add_one(struct ib_device *device)
dev_t base;
struct ib_ucm_device *ucm_dev;
- if (!device->alloc_ucontext || !rdma_cap_ib_cm(device, 1))
+ if (!device->ops.alloc_ucontext || !rdma_cap_ib_cm(device, 1))
return;
ucm_dev = kzalloc(sizeof *ucm_dev, GFP_KERNEL);
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index f55f48f6b272..de8d31ab8945 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -88,10 +88,9 @@ enum {
struct ib_umad_port {
struct cdev cdev;
- struct device *dev;
-
+ struct device dev;
struct cdev sm_cdev;
- struct device *sm_dev;
+ struct device sm_dev;
struct semaphore sm_sem;
struct mutex file_mutex;
@@ -104,8 +103,8 @@ struct ib_umad_port {
};
struct ib_umad_device {
- struct kobject kobj;
- struct ib_umad_port port[0];
+ struct kref kref;
+ struct ib_umad_port ports[];
};
struct ib_umad_file {
@@ -130,8 +129,6 @@ struct ib_umad_packet {
struct ib_user_mad mad;
};
-static struct class *umad_class;
-
static const dev_t base_umad_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE);
static const dev_t base_issm_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE) +
IB_UMAD_NUM_FIXED_MINOR;
@@ -143,17 +140,23 @@ static DEFINE_IDA(umad_ida);
static void ib_umad_add_one(struct ib_device *device);
static void ib_umad_remove_one(struct ib_device *device, void *client_data);
-static void ib_umad_release_dev(struct kobject *kobj)
+static void ib_umad_dev_free(struct kref *kref)
{
struct ib_umad_device *dev =
- container_of(kobj, struct ib_umad_device, kobj);
+ container_of(kref, struct ib_umad_device, kref);
kfree(dev);
}
-static struct kobj_type ib_umad_dev_ktype = {
- .release = ib_umad_release_dev,
-};
+static void ib_umad_dev_get(struct ib_umad_device *dev)
+{
+ kref_get(&dev->kref);
+}
+
+static void ib_umad_dev_put(struct ib_umad_device *dev)
+{
+ kref_put(&dev->kref, ib_umad_dev_free);
+}
static int hdr_size(struct ib_umad_file *file)
{
@@ -205,7 +208,7 @@ static void send_handler(struct ib_mad_agent *agent,
struct ib_umad_packet *packet = send_wc->send_buf->context[0];
dequeue_send(file, packet);
- rdma_destroy_ah(packet->msg->ah);
+ rdma_destroy_ah(packet->msg->ah, RDMA_DESTROY_AH_SLEEPABLE);
ib_free_send_mad(packet->msg);
if (send_wc->status == IB_WC_RESP_TIMEOUT_ERR) {
@@ -621,7 +624,7 @@ err_send:
err_msg:
ib_free_send_mad(packet->msg);
err_ah:
- rdma_destroy_ah(ah);
+ rdma_destroy_ah(ah, RDMA_DESTROY_AH_SLEEPABLE);
err_up:
mutex_unlock(&file->mutex);
err:
@@ -657,7 +660,7 @@ static int ib_umad_reg_agent(struct ib_umad_file *file, void __user *arg,
mutex_lock(&file->mutex);
if (!file->port->ib_dev) {
- dev_notice(file->port->dev,
+ dev_notice(&file->port->dev,
"ib_umad_reg_agent: invalid device\n");
ret = -EPIPE;
goto out;
@@ -669,7 +672,7 @@ static int ib_umad_reg_agent(struct ib_umad_file *file, void __user *arg,
}
if (ureq.qpn != 0 && ureq.qpn != 1) {
- dev_notice(file->port->dev,
+ dev_notice(&file->port->dev,
"ib_umad_reg_agent: invalid QPN %d specified\n",
ureq.qpn);
ret = -EINVAL;
@@ -680,7 +683,7 @@ static int ib_umad_reg_agent(struct ib_umad_file *file, void __user *arg,
if (!__get_agent(file, agent_id))
goto found;
- dev_notice(file->port->dev,
+ dev_notice(&file->port->dev,
"ib_umad_reg_agent: Max Agents (%u) reached\n",
IB_UMAD_MAX_AGENTS);
ret = -ENOMEM;
@@ -725,10 +728,10 @@ found:
if (!file->already_used) {
file->already_used = 1;
if (!file->use_pkey_index) {
- dev_warn(file->port->dev,
+ dev_warn(&file->port->dev,
"process %s did not enable P_Key index support.\n",
current->comm);
- dev_warn(file->port->dev,
+ dev_warn(&file->port->dev,
" Documentation/infiniband/user_mad.txt has info on the new ABI.\n");
}
}
@@ -759,7 +762,7 @@ static int ib_umad_reg_agent2(struct ib_umad_file *file, void __user *arg)
mutex_lock(&file->mutex);
if (!file->port->ib_dev) {
- dev_notice(file->port->dev,
+ dev_notice(&file->port->dev,
"ib_umad_reg_agent2: invalid device\n");
ret = -EPIPE;
goto out;
@@ -771,7 +774,7 @@ static int ib_umad_reg_agent2(struct ib_umad_file *file, void __user *arg)
}
if (ureq.qpn != 0 && ureq.qpn != 1) {
- dev_notice(file->port->dev,
+ dev_notice(&file->port->dev,
"ib_umad_reg_agent2: invalid QPN %d specified\n",
ureq.qpn);
ret = -EINVAL;
@@ -779,7 +782,7 @@ static int ib_umad_reg_agent2(struct ib_umad_file *file, void __user *arg)
}
if (ureq.flags & ~IB_USER_MAD_REG_FLAGS_CAP) {
- dev_notice(file->port->dev,
+ dev_notice(&file->port->dev,
"ib_umad_reg_agent2 failed: invalid registration flags specified 0x%x; supported 0x%x\n",
ureq.flags, IB_USER_MAD_REG_FLAGS_CAP);
ret = -EINVAL;
@@ -796,7 +799,7 @@ static int ib_umad_reg_agent2(struct ib_umad_file *file, void __user *arg)
if (!__get_agent(file, agent_id))
goto found;
- dev_notice(file->port->dev,
+ dev_notice(&file->port->dev,
"ib_umad_reg_agent2: Max Agents (%u) reached\n",
IB_UMAD_MAX_AGENTS);
ret = -ENOMEM;
@@ -808,7 +811,7 @@ found:
req.mgmt_class = ureq.mgmt_class;
req.mgmt_class_version = ureq.mgmt_class_version;
if (ureq.oui & 0xff000000) {
- dev_notice(file->port->dev,
+ dev_notice(&file->port->dev,
"ib_umad_reg_agent2 failed: oui invalid 0x%08x\n",
ureq.oui);
ret = -EINVAL;
@@ -986,8 +989,7 @@ static int ib_umad_open(struct inode *inode, struct file *filp)
goto out;
}
- kobject_get(&port->umad_dev->kobj);
-
+ ib_umad_dev_get(port->umad_dev);
out:
mutex_unlock(&port->file_mutex);
return ret;
@@ -1025,8 +1027,7 @@ static int ib_umad_close(struct inode *inode, struct file *filp)
mutex_unlock(&file->port->file_mutex);
kfree(file);
- kobject_put(&dev->kobj);
-
+ ib_umad_dev_put(dev);
return 0;
}
@@ -1076,8 +1077,7 @@ static int ib_umad_sm_open(struct inode *inode, struct file *filp)
if (ret)
goto err_clr_sm_cap;
- kobject_get(&port->umad_dev->kobj);
-
+ ib_umad_dev_get(port->umad_dev);
return 0;
err_clr_sm_cap:
@@ -1106,8 +1106,7 @@ static int ib_umad_sm_close(struct inode *inode, struct file *filp)
up(&port->sm_sem);
- kobject_put(&port->umad_dev->kobj);
-
+ ib_umad_dev_put(port->umad_dev);
return ret;
}
@@ -1124,7 +1123,7 @@ static struct ib_client umad_client = {
.remove = ib_umad_remove_one
};
-static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
+static ssize_t ibdev_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct ib_umad_port *port = dev_get_drvdata(dev);
@@ -1134,9 +1133,9 @@ static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
return sprintf(buf, "%s\n", dev_name(&port->ib_dev->dev));
}
-static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
+static DEVICE_ATTR_RO(ibdev);
-static ssize_t show_port(struct device *dev, struct device_attribute *attr,
+static ssize_t port_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct ib_umad_port *port = dev_get_drvdata(dev);
@@ -1146,10 +1145,59 @@ static ssize_t show_port(struct device *dev, struct device_attribute *attr,
return sprintf(buf, "%d\n", port->port_num);
}
-static DEVICE_ATTR(port, S_IRUGO, show_port, NULL);
+static DEVICE_ATTR_RO(port);
-static CLASS_ATTR_STRING(abi_version, S_IRUGO,
- __stringify(IB_USER_MAD_ABI_VERSION));
+static struct attribute *umad_class_dev_attrs[] = {
+ &dev_attr_ibdev.attr,
+ &dev_attr_port.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(umad_class_dev);
+
+static char *umad_devnode(struct device *dev, umode_t *mode)
+{
+ return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
+}
+
+static ssize_t abi_version_show(struct class *class,
+ struct class_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%d\n", IB_USER_MAD_ABI_VERSION);
+}
+static CLASS_ATTR_RO(abi_version);
+
+static struct attribute *umad_class_attrs[] = {
+ &class_attr_abi_version.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(umad_class);
+
+static struct class umad_class = {
+ .name = "infiniband_mad",
+ .devnode = umad_devnode,
+ .class_groups = umad_class_groups,
+ .dev_groups = umad_class_dev_groups,
+};
+
+static void ib_umad_release_port(struct device *device)
+{
+ struct ib_umad_port *port = dev_get_drvdata(device);
+ struct ib_umad_device *umad_dev = port->umad_dev;
+
+ ib_umad_dev_put(umad_dev);
+}
+
+static void ib_umad_init_port_dev(struct device *dev,
+ struct ib_umad_port *port,
+ const struct ib_device *device)
+{
+ device_initialize(dev);
+ ib_umad_dev_get(port->umad_dev);
+ dev->class = &umad_class;
+ dev->parent = device->dev.parent;
+ dev_set_drvdata(dev, port);
+ dev->release = ib_umad_release_port;
+}
static int ib_umad_init_port(struct ib_device *device, int port_num,
struct ib_umad_device *umad_dev,
@@ -1158,6 +1206,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
int devnum;
dev_t base_umad;
dev_t base_issm;
+ int ret;
devnum = ida_alloc_max(&umad_ida, IB_UMAD_MAX_PORTS - 1, GFP_KERNEL);
if (devnum < 0)
@@ -1172,63 +1221,41 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
}
port->ib_dev = device;
+ port->umad_dev = umad_dev;
port->port_num = port_num;
sema_init(&port->sm_sem, 1);
mutex_init(&port->file_mutex);
INIT_LIST_HEAD(&port->file_list);
+ ib_umad_init_port_dev(&port->dev, port, device);
+ port->dev.devt = base_umad;
+ dev_set_name(&port->dev, "umad%d", port->dev_num);
cdev_init(&port->cdev, &umad_fops);
port->cdev.owner = THIS_MODULE;
- cdev_set_parent(&port->cdev, &umad_dev->kobj);
- kobject_set_name(&port->cdev.kobj, "umad%d", port->dev_num);
- if (cdev_add(&port->cdev, base_umad, 1))
- goto err_cdev;
- port->dev = device_create(umad_class, device->dev.parent,
- port->cdev.dev, port,
- "umad%d", port->dev_num);
- if (IS_ERR(port->dev))
+ ret = cdev_device_add(&port->cdev, &port->dev);
+ if (ret)
goto err_cdev;
- if (device_create_file(port->dev, &dev_attr_ibdev))
- goto err_dev;
- if (device_create_file(port->dev, &dev_attr_port))
- goto err_dev;
-
+ ib_umad_init_port_dev(&port->sm_dev, port, device);
+ port->sm_dev.devt = base_issm;
+ dev_set_name(&port->sm_dev, "issm%d", port->dev_num);
cdev_init(&port->sm_cdev, &umad_sm_fops);
port->sm_cdev.owner = THIS_MODULE;
- cdev_set_parent(&port->sm_cdev, &umad_dev->kobj);
- kobject_set_name(&port->sm_cdev.kobj, "issm%d", port->dev_num);
- if (cdev_add(&port->sm_cdev, base_issm, 1))
- goto err_sm_cdev;
-
- port->sm_dev = device_create(umad_class, device->dev.parent,
- port->sm_cdev.dev, port,
- "issm%d", port->dev_num);
- if (IS_ERR(port->sm_dev))
- goto err_sm_cdev;
-
- if (device_create_file(port->sm_dev, &dev_attr_ibdev))
- goto err_sm_dev;
- if (device_create_file(port->sm_dev, &dev_attr_port))
- goto err_sm_dev;
-
- return 0;
-err_sm_dev:
- device_destroy(umad_class, port->sm_cdev.dev);
+ ret = cdev_device_add(&port->sm_cdev, &port->sm_dev);
+ if (ret)
+ goto err_dev;
-err_sm_cdev:
- cdev_del(&port->sm_cdev);
+ return 0;
err_dev:
- device_destroy(umad_class, port->cdev.dev);
-
+ put_device(&port->sm_dev);
+ cdev_device_del(&port->cdev, &port->dev);
err_cdev:
- cdev_del(&port->cdev);
+ put_device(&port->dev);
ida_free(&umad_ida, devnum);
-
- return -1;
+ return ret;
}
static void ib_umad_kill_port(struct ib_umad_port *port)
@@ -1236,17 +1263,11 @@ static void ib_umad_kill_port(struct ib_umad_port *port)
struct ib_umad_file *file;
int id;
- dev_set_drvdata(port->dev, NULL);
- dev_set_drvdata(port->sm_dev, NULL);
-
- device_destroy(umad_class, port->cdev.dev);
- device_destroy(umad_class, port->sm_cdev.dev);
-
- cdev_del(&port->cdev);
- cdev_del(&port->sm_cdev);
-
mutex_lock(&port->file_mutex);
+ /* Mark ib_dev NULL and block ioctl or other file ops to progress
+ * further.
+ */
port->ib_dev = NULL;
list_for_each_entry(file, &port->file_list, port_list) {
@@ -1260,6 +1281,11 @@ static void ib_umad_kill_port(struct ib_umad_port *port)
}
mutex_unlock(&port->file_mutex);
+
+ cdev_device_del(&port->sm_cdev, &port->sm_dev);
+ put_device(&port->sm_dev);
+ cdev_device_del(&port->cdev, &port->dev);
+ put_device(&port->dev);
ida_free(&umad_ida, port->dev_num);
}
@@ -1272,22 +1298,17 @@ static void ib_umad_add_one(struct ib_device *device)
s = rdma_start_port(device);
e = rdma_end_port(device);
- umad_dev = kzalloc(sizeof *umad_dev +
- (e - s + 1) * sizeof (struct ib_umad_port),
- GFP_KERNEL);
+ umad_dev = kzalloc(struct_size(umad_dev, ports, e - s + 1), GFP_KERNEL);
if (!umad_dev)
return;
- kobject_init(&umad_dev->kobj, &ib_umad_dev_ktype);
-
+ kref_init(&umad_dev->kref);
for (i = s; i <= e; ++i) {
if (!rdma_cap_ib_mad(device, i))
continue;
- umad_dev->port[i - s].umad_dev = umad_dev;
-
if (ib_umad_init_port(device, i, umad_dev,
- &umad_dev->port[i - s]))
+ &umad_dev->ports[i - s]))
goto err;
count++;
@@ -1305,10 +1326,10 @@ err:
if (!rdma_cap_ib_mad(device, i))
continue;
- ib_umad_kill_port(&umad_dev->port[i - s]);
+ ib_umad_kill_port(&umad_dev->ports[i - s]);
}
free:
- kobject_put(&umad_dev->kobj);
+ ib_umad_dev_put(umad_dev);
}
static void ib_umad_remove_one(struct ib_device *device, void *client_data)
@@ -1321,15 +1342,9 @@ static void ib_umad_remove_one(struct ib_device *device, void *client_data)
for (i = 0; i <= rdma_end_port(device) - rdma_start_port(device); ++i) {
if (rdma_cap_ib_mad(device, i + rdma_start_port(device)))
- ib_umad_kill_port(&umad_dev->port[i]);
+ ib_umad_kill_port(&umad_dev->ports[i]);
}
-
- kobject_put(&umad_dev->kobj);
-}
-
-static char *umad_devnode(struct device *dev, umode_t *mode)
-{
- return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
+ ib_umad_dev_put(umad_dev);
}
static int __init ib_umad_init(void)
@@ -1338,7 +1353,7 @@ static int __init ib_umad_init(void)
ret = register_chrdev_region(base_umad_dev,
IB_UMAD_NUM_FIXED_MINOR * 2,
- "infiniband_mad");
+ umad_class.name);
if (ret) {
pr_err("couldn't register device number\n");
goto out;
@@ -1346,28 +1361,19 @@ static int __init ib_umad_init(void)
ret = alloc_chrdev_region(&dynamic_umad_dev, 0,
IB_UMAD_NUM_DYNAMIC_MINOR * 2,
- "infiniband_mad");
+ umad_class.name);
if (ret) {
pr_err("couldn't register dynamic device number\n");
goto out_alloc;
}
dynamic_issm_dev = dynamic_umad_dev + IB_UMAD_NUM_DYNAMIC_MINOR;
- umad_class = class_create(THIS_MODULE, "infiniband_mad");
- if (IS_ERR(umad_class)) {
- ret = PTR_ERR(umad_class);
+ ret = class_register(&umad_class);
+ if (ret) {
pr_err("couldn't create class infiniband_mad\n");
goto out_chrdev;
}
- umad_class->devnode = umad_devnode;
-
- ret = class_create_file(umad_class, &class_attr_abi_version.attr);
- if (ret) {
- pr_err("couldn't create abi_version attribute\n");
- goto out_class;
- }
-
ret = ib_register_client(&umad_client);
if (ret) {
pr_err("couldn't register ib_umad client\n");
@@ -1377,7 +1383,7 @@ static int __init ib_umad_init(void)
return 0;
out_class:
- class_destroy(umad_class);
+ class_unregister(&umad_class);
out_chrdev:
unregister_chrdev_region(dynamic_umad_dev,
@@ -1394,7 +1400,7 @@ out:
static void __exit ib_umad_cleanup(void)
{
ib_unregister_client(&umad_client);
- class_destroy(umad_class);
+ class_unregister(&umad_class);
unregister_chrdev_region(base_umad_dev,
IB_UMAD_NUM_FIXED_MINOR * 2);
unregister_chrdev_region(dynamic_umad_dev,
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index c97935a0c7c6..ea0bc6885517 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -161,9 +161,6 @@ struct ib_uverbs_file {
struct mutex umap_lock;
struct list_head umaps;
- u64 uverbs_cmd_mask;
- u64 uverbs_ex_cmd_mask;
-
struct idr idr;
/* spinlock protects write access to idr */
spinlock_t idr_lock;
@@ -249,7 +246,6 @@ int uverbs_dealloc_mw(struct ib_mw *mw);
void ib_uverbs_detach_umcast(struct ib_qp *qp,
struct ib_uqp_object *uobj);
-void create_udata(struct uverbs_attr_bundle *ctx, struct ib_udata *udata);
long ib_uverbs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
struct ib_uverbs_flow_spec {
@@ -297,63 +293,29 @@ extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_FLOW_ACTION);
extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_DM);
extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_COUNTERS);
-#define IB_UVERBS_DECLARE_CMD(name) \
- ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \
- const char __user *buf, int in_len, \
- int out_len)
-
-IB_UVERBS_DECLARE_CMD(get_context);
-IB_UVERBS_DECLARE_CMD(query_device);
-IB_UVERBS_DECLARE_CMD(query_port);
-IB_UVERBS_DECLARE_CMD(alloc_pd);
-IB_UVERBS_DECLARE_CMD(dealloc_pd);
-IB_UVERBS_DECLARE_CMD(reg_mr);
-IB_UVERBS_DECLARE_CMD(rereg_mr);
-IB_UVERBS_DECLARE_CMD(dereg_mr);
-IB_UVERBS_DECLARE_CMD(alloc_mw);
-IB_UVERBS_DECLARE_CMD(dealloc_mw);
-IB_UVERBS_DECLARE_CMD(create_comp_channel);
-IB_UVERBS_DECLARE_CMD(create_cq);
-IB_UVERBS_DECLARE_CMD(resize_cq);
-IB_UVERBS_DECLARE_CMD(poll_cq);
-IB_UVERBS_DECLARE_CMD(req_notify_cq);
-IB_UVERBS_DECLARE_CMD(destroy_cq);
-IB_UVERBS_DECLARE_CMD(create_qp);
-IB_UVERBS_DECLARE_CMD(open_qp);
-IB_UVERBS_DECLARE_CMD(query_qp);
-IB_UVERBS_DECLARE_CMD(modify_qp);
-IB_UVERBS_DECLARE_CMD(destroy_qp);
-IB_UVERBS_DECLARE_CMD(post_send);
-IB_UVERBS_DECLARE_CMD(post_recv);
-IB_UVERBS_DECLARE_CMD(post_srq_recv);
-IB_UVERBS_DECLARE_CMD(create_ah);
-IB_UVERBS_DECLARE_CMD(destroy_ah);
-IB_UVERBS_DECLARE_CMD(attach_mcast);
-IB_UVERBS_DECLARE_CMD(detach_mcast);
-IB_UVERBS_DECLARE_CMD(create_srq);
-IB_UVERBS_DECLARE_CMD(modify_srq);
-IB_UVERBS_DECLARE_CMD(query_srq);
-IB_UVERBS_DECLARE_CMD(destroy_srq);
-IB_UVERBS_DECLARE_CMD(create_xsrq);
-IB_UVERBS_DECLARE_CMD(open_xrcd);
-IB_UVERBS_DECLARE_CMD(close_xrcd);
-
-#define IB_UVERBS_DECLARE_EX_CMD(name) \
- int ib_uverbs_ex_##name(struct ib_uverbs_file *file, \
- struct ib_udata *ucore, \
- struct ib_udata *uhw)
-
-IB_UVERBS_DECLARE_EX_CMD(create_flow);
-IB_UVERBS_DECLARE_EX_CMD(destroy_flow);
-IB_UVERBS_DECLARE_EX_CMD(query_device);
-IB_UVERBS_DECLARE_EX_CMD(create_cq);
-IB_UVERBS_DECLARE_EX_CMD(create_qp);
-IB_UVERBS_DECLARE_EX_CMD(create_wq);
-IB_UVERBS_DECLARE_EX_CMD(modify_wq);
-IB_UVERBS_DECLARE_EX_CMD(destroy_wq);
-IB_UVERBS_DECLARE_EX_CMD(create_rwq_ind_table);
-IB_UVERBS_DECLARE_EX_CMD(destroy_rwq_ind_table);
-IB_UVERBS_DECLARE_EX_CMD(modify_qp);
-IB_UVERBS_DECLARE_EX_CMD(modify_cq);
+/*
+ * ib_uverbs_query_port_resp.port_cap_flags started out as just a copy of the
+ * PortInfo CapabilityMask, but was extended with unique bits.
+ */
+static inline u32 make_port_cap_flags(const struct ib_port_attr *attr)
+{
+ u32 res;
+
+ /* All IBA CapabilityMask bits are passed through here, except bit 26,
+ * which is overridden with IP_BASED_GIDS. This is due to a historical
+ * mistake in the implementation of IP_BASED_GIDS. Otherwise all other
+ * bits match the IBA definition across all kernel versions.
+ */
+ res = attr->port_cap_flags & ~(u32)IB_UVERBS_PCF_IP_BASED_GIDS;
+
+ if (attr->ip_gids)
+ res |= IB_UVERBS_PCF_IP_BASED_GIDS;
+
+ return res;
+}
+
+void copy_port_attr_to_resp(struct ib_port_attr *attr,
+ struct ib_uverbs_query_port_resp *resp,
+ struct ib_device *ib_dev, u8 port_num);
#endif /* UVERBS_H */
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index a93853770e3c..6b12cc5f97b2 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -47,11 +47,134 @@
#include "uverbs.h"
#include "core_priv.h"
+/*
+ * Copy a response to userspace. If the provided 'resp' is larger than the
+ * user buffer it is silently truncated. If the user provided a larger buffer
+ * then the trailing portion is zero filled.
+ *
+ * These semantics are intended to support future extension of the output
+ * structures.
+ */
+static int uverbs_response(struct uverbs_attr_bundle *attrs, const void *resp,
+ size_t resp_len)
+{
+ int ret;
+
+ if (copy_to_user(attrs->ucore.outbuf, resp,
+ min(attrs->ucore.outlen, resp_len)))
+ return -EFAULT;
+
+ if (resp_len < attrs->ucore.outlen) {
+ /*
+ * Zero fill any extra memory that user
+ * space might have provided.
+ */
+ ret = clear_user(attrs->ucore.outbuf + resp_len,
+ attrs->ucore.outlen - resp_len);
+ if (ret)
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+/*
+ * Copy a request from userspace. If the provided 'req' is larger than the
+ * user buffer then the user buffer is zero extended into the 'req'. If 'req'
+ * is smaller than the user buffer then the uncopied bytes in the user buffer
+ * must be zero.
+ */
+static int uverbs_request(struct uverbs_attr_bundle *attrs, void *req,
+ size_t req_len)
+{
+ if (copy_from_user(req, attrs->ucore.inbuf,
+ min(attrs->ucore.inlen, req_len)))
+ return -EFAULT;
+
+ if (attrs->ucore.inlen < req_len) {
+ memset(req + attrs->ucore.inlen, 0,
+ req_len - attrs->ucore.inlen);
+ } else if (attrs->ucore.inlen > req_len) {
+ if (!ib_is_buffer_cleared(attrs->ucore.inbuf + req_len,
+ attrs->ucore.inlen - req_len))
+ return -EOPNOTSUPP;
+ }
+ return 0;
+}
+
+/*
+ * Generate the value for the 'response_length' protocol used by write_ex.
+ * This is the number of bytes the kernel actually wrote. Userspace can use
+ * this to detect what structure members in the response the kernel
+ * understood.
+ */
+static u32 uverbs_response_length(struct uverbs_attr_bundle *attrs,
+ size_t resp_len)
+{
+ return min_t(size_t, attrs->ucore.outlen, resp_len);
+}
+
+/*
+ * The iterator version of the request interface is for handlers that need to
+ * step over a flex array at the end of a command header.
+ */
+struct uverbs_req_iter {
+ const void __user *cur;
+ const void __user *end;
+};
+
+static int uverbs_request_start(struct uverbs_attr_bundle *attrs,
+ struct uverbs_req_iter *iter,
+ void *req,
+ size_t req_len)
+{
+ if (attrs->ucore.inlen < req_len)
+ return -ENOSPC;
+
+ if (copy_from_user(req, attrs->ucore.inbuf, req_len))
+ return -EFAULT;
+
+ iter->cur = attrs->ucore.inbuf + req_len;
+ iter->end = attrs->ucore.inbuf + attrs->ucore.inlen;
+ return 0;
+}
+
+static int uverbs_request_next(struct uverbs_req_iter *iter, void *val,
+ size_t len)
+{
+ if (iter->cur + len > iter->end)
+ return -ENOSPC;
+
+ if (copy_from_user(val, iter->cur, len))
+ return -EFAULT;
+
+ iter->cur += len;
+ return 0;
+}
+
+static const void __user *uverbs_request_next_ptr(struct uverbs_req_iter *iter,
+ size_t len)
+{
+ const void __user *res = iter->cur;
+
+ if (iter->cur + len > iter->end)
+ return ERR_PTR(-ENOSPC);
+ iter->cur += len;
+ return res;
+}
+
+static int uverbs_request_finish(struct uverbs_req_iter *iter)
+{
+ if (!ib_is_buffer_cleared(iter->cur, iter->end - iter->cur))
+ return -EOPNOTSUPP;
+ return 0;
+}
+
static struct ib_uverbs_completion_event_file *
-_ib_uverbs_lookup_comp_file(s32 fd, struct ib_uverbs_file *ufile)
+_ib_uverbs_lookup_comp_file(s32 fd, const struct uverbs_attr_bundle *attrs)
{
struct ib_uobject *uobj = ufd_get_read(UVERBS_OBJECT_COMP_CHANNEL,
- fd, ufile);
+ fd, attrs);
if (IS_ERR(uobj))
return (void *)uobj;
@@ -65,24 +188,20 @@ _ib_uverbs_lookup_comp_file(s32 fd, struct ib_uverbs_file *ufile)
#define ib_uverbs_lookup_comp_file(_fd, _ufile) \
_ib_uverbs_lookup_comp_file((_fd)*typecheck(s32, _fd), _ufile)
-ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
- const char __user *buf,
- int in_len, int out_len)
+static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs)
{
+ struct ib_uverbs_file *file = attrs->ufile;
struct ib_uverbs_get_context cmd;
struct ib_uverbs_get_context_resp resp;
- struct ib_udata udata;
struct ib_ucontext *ucontext;
struct file *filp;
struct ib_rdmacg_object cg_obj;
struct ib_device *ib_dev;
int ret;
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
mutex_lock(&file->ucontext_lock);
ib_dev = srcu_dereference(file->device->ib_dev,
@@ -97,16 +216,11 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
goto err;
}
- ib_uverbs_init_udata(&udata, buf + sizeof(cmd),
- u64_to_user_ptr(cmd.response) + sizeof(resp),
- in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
- out_len - sizeof(resp));
-
ret = ib_rdmacg_try_charge(&cg_obj, ib_dev, RDMACG_RESOURCE_HCA_HANDLE);
if (ret)
goto err;
- ucontext = ib_dev->alloc_ucontext(ib_dev, &udata);
+ ucontext = ib_dev->ops.alloc_ucontext(ib_dev, &attrs->driver_udata);
if (IS_ERR(ucontext)) {
ret = PTR_ERR(ucontext);
goto err_alloc;
@@ -141,13 +255,15 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
goto err_fd;
}
- if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) {
- ret = -EFAULT;
+ ret = uverbs_response(attrs, &resp, sizeof(resp));
+ if (ret)
goto err_file;
- }
fd_install(resp.async_fd, filp);
+ ucontext->res.type = RDMA_RESTRACK_CTX;
+ rdma_restrack_uadd(&ucontext->res);
+
/*
* Make sure that ib_uverbs_get_ucontext() sees the pointer update
* only after all writes to setup the ucontext have completed
@@ -156,7 +272,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
mutex_unlock(&file->ucontext_lock);
- return in_len;
+ return 0;
err_file:
ib_uverbs_free_async_event_file(file);
@@ -166,7 +282,7 @@ err_fd:
put_unused_fd(resp.async_fd);
err_free:
- ib_dev->dealloc_ucontext(ucontext);
+ ib_dev->ops.dealloc_ucontext(ucontext);
err_alloc:
ib_rdmacg_uncharge(&cg_obj, ib_dev, RDMACG_RESOURCE_HCA_HANDLE);
@@ -224,57 +340,28 @@ static void copy_query_dev_fields(struct ib_ucontext *ucontext,
resp->phys_port_cnt = ib_dev->phys_port_cnt;
}
-ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file,
- const char __user *buf,
- int in_len, int out_len)
+static int ib_uverbs_query_device(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_query_device cmd;
struct ib_uverbs_query_device_resp resp;
struct ib_ucontext *ucontext;
+ int ret;
- ucontext = ib_uverbs_get_ucontext(file);
+ ucontext = ib_uverbs_get_ucontext(attrs);
if (IS_ERR(ucontext))
return PTR_ERR(ucontext);
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
memset(&resp, 0, sizeof resp);
copy_query_dev_fields(ucontext, &resp, &ucontext->device->attrs);
- if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp))
- return -EFAULT;
-
- return in_len;
-}
-
-/*
- * ib_uverbs_query_port_resp.port_cap_flags started out as just a copy of the
- * PortInfo CapabilityMask, but was extended with unique bits.
- */
-static u32 make_port_cap_flags(const struct ib_port_attr *attr)
-{
- u32 res;
-
- /* All IBA CapabilityMask bits are passed through here, except bit 26,
- * which is overridden with IP_BASED_GIDS. This is due to a historical
- * mistake in the implementation of IP_BASED_GIDS. Otherwise all other
- * bits match the IBA definition across all kernel versions.
- */
- res = attr->port_cap_flags & ~(u32)IB_UVERBS_PCF_IP_BASED_GIDS;
-
- if (attr->ip_gids)
- res |= IB_UVERBS_PCF_IP_BASED_GIDS;
-
- return res;
+ return uverbs_response(attrs, &resp, sizeof(resp));
}
-ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file,
- const char __user *buf,
- int in_len, int out_len)
+static int ib_uverbs_query_port(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_query_port cmd;
struct ib_uverbs_query_port_resp resp;
@@ -283,88 +370,43 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file,
struct ib_ucontext *ucontext;
struct ib_device *ib_dev;
- ucontext = ib_uverbs_get_ucontext(file);
+ ucontext = ib_uverbs_get_ucontext(attrs);
if (IS_ERR(ucontext))
return PTR_ERR(ucontext);
ib_dev = ucontext->device;
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
ret = ib_query_port(ib_dev, cmd.port_num, &attr);
if (ret)
return ret;
memset(&resp, 0, sizeof resp);
+ copy_port_attr_to_resp(&attr, &resp, ib_dev, cmd.port_num);
- resp.state = attr.state;
- resp.max_mtu = attr.max_mtu;
- resp.active_mtu = attr.active_mtu;
- resp.gid_tbl_len = attr.gid_tbl_len;
- resp.port_cap_flags = make_port_cap_flags(&attr);
- resp.max_msg_sz = attr.max_msg_sz;
- resp.bad_pkey_cntr = attr.bad_pkey_cntr;
- resp.qkey_viol_cntr = attr.qkey_viol_cntr;
- resp.pkey_tbl_len = attr.pkey_tbl_len;
-
- if (rdma_is_grh_required(ib_dev, cmd.port_num))
- resp.flags |= IB_UVERBS_QPF_GRH_REQUIRED;
-
- if (rdma_cap_opa_ah(ib_dev, cmd.port_num)) {
- resp.lid = OPA_TO_IB_UCAST_LID(attr.lid);
- resp.sm_lid = OPA_TO_IB_UCAST_LID(attr.sm_lid);
- } else {
- resp.lid = ib_lid_cpu16(attr.lid);
- resp.sm_lid = ib_lid_cpu16(attr.sm_lid);
- }
- resp.lmc = attr.lmc;
- resp.max_vl_num = attr.max_vl_num;
- resp.sm_sl = attr.sm_sl;
- resp.subnet_timeout = attr.subnet_timeout;
- resp.init_type_reply = attr.init_type_reply;
- resp.active_width = attr.active_width;
- resp.active_speed = attr.active_speed;
- resp.phys_state = attr.phys_state;
- resp.link_layer = rdma_port_get_link_layer(ib_dev,
- cmd.port_num);
-
- if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp))
- return -EFAULT;
-
- return in_len;
+ return uverbs_response(attrs, &resp, sizeof(resp));
}
-ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
- const char __user *buf,
- int in_len, int out_len)
+static int ib_uverbs_alloc_pd(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_alloc_pd cmd;
struct ib_uverbs_alloc_pd_resp resp;
- struct ib_udata udata;
struct ib_uobject *uobj;
struct ib_pd *pd;
int ret;
struct ib_device *ib_dev;
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- ib_uverbs_init_udata(&udata, buf + sizeof(cmd),
- u64_to_user_ptr(cmd.response) + sizeof(resp),
- in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
- out_len - sizeof(resp));
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- uobj = uobj_alloc(UVERBS_OBJECT_PD, file, &ib_dev);
+ uobj = uobj_alloc(UVERBS_OBJECT_PD, attrs, &ib_dev);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
- pd = ib_dev->alloc_pd(ib_dev, uobj->context, &udata);
+ pd = ib_dev->ops.alloc_pd(ib_dev, uobj->context, &attrs->driver_udata);
if (IS_ERR(pd)) {
ret = PTR_ERR(pd);
goto err;
@@ -379,14 +421,13 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
memset(&resp, 0, sizeof resp);
resp.pd_handle = uobj->id;
pd->res.type = RDMA_RESTRACK_PD;
- rdma_restrack_add(&pd->res);
+ rdma_restrack_uadd(&pd->res);
- if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) {
- ret = -EFAULT;
+ ret = uverbs_response(attrs, &resp, sizeof(resp));
+ if (ret)
goto err_copy;
- }
- return uobj_alloc_commit(uobj, in_len);
+ return uobj_alloc_commit(uobj);
err_copy:
ib_dealloc_pd(pd);
@@ -396,17 +437,16 @@ err:
return ret;
}
-ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file,
- const char __user *buf,
- int in_len, int out_len)
+static int ib_uverbs_dealloc_pd(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_dealloc_pd cmd;
+ int ret;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- return uobj_perform_destroy(UVERBS_OBJECT_PD, cmd.pd_handle, file,
- in_len);
+ return uobj_perform_destroy(UVERBS_OBJECT_PD, cmd.pd_handle, attrs);
}
struct xrcd_table_entry {
@@ -494,13 +534,11 @@ static void xrcd_table_delete(struct ib_uverbs_device *dev,
}
}
-ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_open_xrcd(struct uverbs_attr_bundle *attrs)
{
+ struct ib_uverbs_device *ibudev = attrs->ufile->device;
struct ib_uverbs_open_xrcd cmd;
struct ib_uverbs_open_xrcd_resp resp;
- struct ib_udata udata;
struct ib_uxrcd_object *obj;
struct ib_xrcd *xrcd = NULL;
struct fd f = {NULL, 0};
@@ -509,18 +547,11 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
int new_xrcd = 0;
struct ib_device *ib_dev;
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- ib_uverbs_init_udata(&udata, buf + sizeof(cmd),
- u64_to_user_ptr(cmd.response) + sizeof(resp),
- in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
- out_len - sizeof(resp));
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- mutex_lock(&file->device->xrcd_tree_mutex);
+ mutex_lock(&ibudev->xrcd_tree_mutex);
if (cmd.fd != -1) {
/* search for file descriptor */
@@ -531,7 +562,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
}
inode = file_inode(f.file);
- xrcd = find_xrcd(file->device, inode);
+ xrcd = find_xrcd(ibudev, inode);
if (!xrcd && !(cmd.oflags & O_CREAT)) {
/* no file descriptor. Need CREATE flag */
ret = -EAGAIN;
@@ -544,7 +575,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
}
}
- obj = (struct ib_uxrcd_object *)uobj_alloc(UVERBS_OBJECT_XRCD, file,
+ obj = (struct ib_uxrcd_object *)uobj_alloc(UVERBS_OBJECT_XRCD, attrs,
&ib_dev);
if (IS_ERR(obj)) {
ret = PTR_ERR(obj);
@@ -552,7 +583,8 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
}
if (!xrcd) {
- xrcd = ib_dev->alloc_xrcd(ib_dev, obj->uobject.context, &udata);
+ xrcd = ib_dev->ops.alloc_xrcd(ib_dev, obj->uobject.context,
+ &attrs->driver_udata);
if (IS_ERR(xrcd)) {
ret = PTR_ERR(xrcd);
goto err;
@@ -574,29 +606,28 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
if (inode) {
if (new_xrcd) {
/* create new inode/xrcd table entry */
- ret = xrcd_table_insert(file->device, inode, xrcd);
+ ret = xrcd_table_insert(ibudev, inode, xrcd);
if (ret)
goto err_dealloc_xrcd;
}
atomic_inc(&xrcd->usecnt);
}
- if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) {
- ret = -EFAULT;
+ ret = uverbs_response(attrs, &resp, sizeof(resp));
+ if (ret)
goto err_copy;
- }
if (f.file)
fdput(f);
- mutex_unlock(&file->device->xrcd_tree_mutex);
+ mutex_unlock(&ibudev->xrcd_tree_mutex);
- return uobj_alloc_commit(&obj->uobject, in_len);
+ return uobj_alloc_commit(&obj->uobject);
err_copy:
if (inode) {
if (new_xrcd)
- xrcd_table_delete(file->device, inode);
+ xrcd_table_delete(ibudev, inode);
atomic_dec(&xrcd->usecnt);
}
@@ -610,22 +641,21 @@ err_tree_mutex_unlock:
if (f.file)
fdput(f);
- mutex_unlock(&file->device->xrcd_tree_mutex);
+ mutex_unlock(&ibudev->xrcd_tree_mutex);
return ret;
}
-ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_close_xrcd(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_close_xrcd cmd;
+ int ret;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- return uobj_perform_destroy(UVERBS_OBJECT_XRCD, cmd.xrcd_handle, file,
- in_len);
+ return uobj_perform_destroy(UVERBS_OBJECT_XRCD, cmd.xrcd_handle, attrs);
}
int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject,
@@ -653,29 +683,19 @@ int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject,
return ret;
}
-ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_reg_mr cmd;
struct ib_uverbs_reg_mr_resp resp;
- struct ib_udata udata;
struct ib_uobject *uobj;
struct ib_pd *pd;
struct ib_mr *mr;
int ret;
struct ib_device *ib_dev;
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- ib_uverbs_init_udata(&udata, buf + sizeof(cmd),
- u64_to_user_ptr(cmd.response) + sizeof(resp),
- in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
- out_len - sizeof(resp));
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))
return -EINVAL;
@@ -684,11 +704,11 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
if (ret)
return ret;
- uobj = uobj_alloc(UVERBS_OBJECT_MR, file, &ib_dev);
+ uobj = uobj_alloc(UVERBS_OBJECT_MR, attrs, &ib_dev);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
- pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file);
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, attrs);
if (!pd) {
ret = -EINVAL;
goto err_free;
@@ -703,8 +723,9 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
}
}
- mr = pd->device->reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va,
- cmd.access_flags, &udata);
+ mr = pd->device->ops.reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va,
+ cmd.access_flags,
+ &attrs->driver_udata);
if (IS_ERR(mr)) {
ret = PTR_ERR(mr);
goto err_put;
@@ -716,7 +737,7 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
mr->uobject = uobj;
atomic_inc(&pd->usecnt);
mr->res.type = RDMA_RESTRACK_MR;
- rdma_restrack_add(&mr->res);
+ rdma_restrack_uadd(&mr->res);
uobj->object = mr;
@@ -725,14 +746,13 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
resp.rkey = mr->rkey;
resp.mr_handle = uobj->id;
- if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) {
- ret = -EFAULT;
+ ret = uverbs_response(attrs, &resp, sizeof(resp));
+ if (ret)
goto err_copy;
- }
uobj_put_obj_read(pd);
- return uobj_alloc_commit(uobj, in_len);
+ return uobj_alloc_commit(uobj);
err_copy:
ib_dereg_mr(mr);
@@ -745,29 +765,19 @@ err_free:
return ret;
}
-ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_rereg_mr cmd;
struct ib_uverbs_rereg_mr_resp resp;
- struct ib_udata udata;
struct ib_pd *pd = NULL;
struct ib_mr *mr;
struct ib_pd *old_pd;
int ret;
struct ib_uobject *uobj;
- if (out_len < sizeof(resp))
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof(cmd)))
- return -EFAULT;
-
- ib_uverbs_init_udata(&udata, buf + sizeof(cmd),
- u64_to_user_ptr(cmd.response) + sizeof(resp),
- in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
- out_len - sizeof(resp));
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
if (cmd.flags & ~IB_MR_REREG_SUPPORTED || !cmd.flags)
return -EINVAL;
@@ -777,7 +787,7 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
(cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)))
return -EINVAL;
- uobj = uobj_get_write(UVERBS_OBJECT_MR, cmd.mr_handle, file);
+ uobj = uobj_get_write(UVERBS_OBJECT_MR, cmd.mr_handle, attrs);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -796,7 +806,7 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
if (cmd.flags & IB_MR_REREG_PD) {
pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle,
- file);
+ attrs);
if (!pd) {
ret = -EINVAL;
goto put_uobjs;
@@ -804,9 +814,10 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
}
old_pd = mr->pd;
- ret = mr->device->rereg_user_mr(mr, cmd.flags, cmd.start,
- cmd.length, cmd.hca_va,
- cmd.access_flags, pd, &udata);
+ ret = mr->device->ops.rereg_user_mr(mr, cmd.flags, cmd.start,
+ cmd.length, cmd.hca_va,
+ cmd.access_flags, pd,
+ &attrs->driver_udata);
if (!ret) {
if (cmd.flags & IB_MR_REREG_PD) {
atomic_inc(&pd->usecnt);
@@ -821,10 +832,7 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
resp.lkey = mr->lkey;
resp.rkey = mr->rkey;
- if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof(resp)))
- ret = -EFAULT;
- else
- ret = in_len;
+ ret = uverbs_response(attrs, &resp, sizeof(resp));
put_uobj_pd:
if (cmd.flags & IB_MR_REREG_PD)
@@ -836,54 +844,43 @@ put_uobjs:
return ret;
}
-ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_dereg_mr(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_dereg_mr cmd;
+ int ret;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- return uobj_perform_destroy(UVERBS_OBJECT_MR, cmd.mr_handle, file,
- in_len);
+ return uobj_perform_destroy(UVERBS_OBJECT_MR, cmd.mr_handle, attrs);
}
-ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_alloc_mw(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_alloc_mw cmd;
struct ib_uverbs_alloc_mw_resp resp;
struct ib_uobject *uobj;
struct ib_pd *pd;
struct ib_mw *mw;
- struct ib_udata udata;
int ret;
struct ib_device *ib_dev;
- if (out_len < sizeof(resp))
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof(cmd)))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- uobj = uobj_alloc(UVERBS_OBJECT_MW, file, &ib_dev);
+ uobj = uobj_alloc(UVERBS_OBJECT_MW, attrs, &ib_dev);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
- pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file);
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, attrs);
if (!pd) {
ret = -EINVAL;
goto err_free;
}
- ib_uverbs_init_udata(&udata, buf + sizeof(cmd),
- u64_to_user_ptr(cmd.response) + sizeof(resp),
- in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
- out_len - sizeof(resp));
-
- mw = pd->device->alloc_mw(pd, cmd.mw_type, &udata);
+ mw = pd->device->ops.alloc_mw(pd, cmd.mw_type, &attrs->driver_udata);
if (IS_ERR(mw)) {
ret = PTR_ERR(mw);
goto err_put;
@@ -900,13 +897,12 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
resp.rkey = mw->rkey;
resp.mw_handle = uobj->id;
- if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof(resp))) {
- ret = -EFAULT;
+ ret = uverbs_response(attrs, &resp, sizeof(resp));
+ if (ret)
goto err_copy;
- }
uobj_put_obj_read(pd);
- return uobj_alloc_commit(uobj, in_len);
+ return uobj_alloc_commit(uobj);
err_copy:
uverbs_dealloc_mw(mw);
@@ -917,36 +913,32 @@ err_free:
return ret;
}
-ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_dealloc_mw(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_dealloc_mw cmd;
+ int ret;
- if (copy_from_user(&cmd, buf, sizeof(cmd)))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- return uobj_perform_destroy(UVERBS_OBJECT_MW, cmd.mw_handle, file,
- in_len);
+ return uobj_perform_destroy(UVERBS_OBJECT_MW, cmd.mw_handle, attrs);
}
-ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_create_comp_channel(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_create_comp_channel cmd;
struct ib_uverbs_create_comp_channel_resp resp;
struct ib_uobject *uobj;
struct ib_uverbs_completion_event_file *ev_file;
struct ib_device *ib_dev;
+ int ret;
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- uobj = uobj_alloc(UVERBS_OBJECT_COMP_CHANNEL, file, &ib_dev);
+ uobj = uobj_alloc(UVERBS_OBJECT_COMP_CHANNEL, attrs, &ib_dev);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -956,25 +948,17 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
uobj);
ib_uverbs_init_event_queue(&ev_file->ev_queue);
- if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) {
+ ret = uverbs_response(attrs, &resp, sizeof(resp));
+ if (ret) {
uobj_alloc_abort(uobj);
- return -EFAULT;
+ return ret;
}
- return uobj_alloc_commit(uobj, in_len);
+ return uobj_alloc_commit(uobj);
}
-static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
- struct ib_udata *ucore,
- struct ib_udata *uhw,
- struct ib_uverbs_ex_create_cq *cmd,
- size_t cmd_sz,
- int (*cb)(struct ib_uverbs_file *file,
- struct ib_ucq_object *obj,
- struct ib_uverbs_ex_create_cq_resp *resp,
- struct ib_udata *udata,
- void *context),
- void *context)
+static struct ib_ucq_object *create_cq(struct uverbs_attr_bundle *attrs,
+ struct ib_uverbs_ex_create_cq *cmd)
{
struct ib_ucq_object *obj;
struct ib_uverbs_completion_event_file *ev_file = NULL;
@@ -984,21 +968,16 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
struct ib_cq_init_attr attr = {};
struct ib_device *ib_dev;
- if (cmd->comp_vector >= file->device->num_comp_vectors)
+ if (cmd->comp_vector >= attrs->ufile->device->num_comp_vectors)
return ERR_PTR(-EINVAL);
- obj = (struct ib_ucq_object *)uobj_alloc(UVERBS_OBJECT_CQ, file,
+ obj = (struct ib_ucq_object *)uobj_alloc(UVERBS_OBJECT_CQ, attrs,
&ib_dev);
if (IS_ERR(obj))
return obj;
- if (!ib_dev->create_cq) {
- ret = -EOPNOTSUPP;
- goto err;
- }
-
if (cmd->comp_channel >= 0) {
- ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel, file);
+ ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel, attrs);
if (IS_ERR(ev_file)) {
ret = PTR_ERR(ev_file);
goto err;
@@ -1013,11 +992,10 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
attr.cqe = cmd->cqe;
attr.comp_vector = cmd->comp_vector;
+ attr.flags = cmd->flags;
- if (cmd_sz > offsetof(typeof(*cmd), flags) + sizeof(cmd->flags))
- attr.flags = cmd->flags;
-
- cq = ib_dev->create_cq(ib_dev, &attr, obj->uobject.context, uhw);
+ cq = ib_dev->ops.create_cq(ib_dev, &attr, obj->uobject.context,
+ &attrs->driver_udata);
if (IS_ERR(cq)) {
ret = PTR_ERR(cq);
goto err_file;
@@ -1034,18 +1012,16 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
memset(&resp, 0, sizeof resp);
resp.base.cq_handle = obj->uobject.id;
resp.base.cqe = cq->cqe;
-
- resp.response_length = offsetof(typeof(resp), response_length) +
- sizeof(resp.response_length);
+ resp.response_length = uverbs_response_length(attrs, sizeof(resp));
cq->res.type = RDMA_RESTRACK_CQ;
- rdma_restrack_add(&cq->res);
+ rdma_restrack_uadd(&cq->res);
- ret = cb(file, obj, &resp, ucore, context);
+ ret = uverbs_response(attrs, &resp, sizeof(resp));
if (ret)
goto err_cb;
- ret = uobj_alloc_commit(&obj->uobject, 0);
+ ret = uobj_alloc_commit(&obj->uobject);
if (ret)
return ERR_PTR(ret);
return obj;
@@ -1055,7 +1031,7 @@ err_cb:
err_file:
if (ev_file)
- ib_uverbs_release_ucq(file, ev_file, obj);
+ ib_uverbs_release_ucq(attrs->ufile, ev_file, obj);
err:
uobj_alloc_abort(&obj->uobject);
@@ -1063,41 +1039,16 @@ err:
return ERR_PTR(ret);
}
-static int ib_uverbs_create_cq_cb(struct ib_uverbs_file *file,
- struct ib_ucq_object *obj,
- struct ib_uverbs_ex_create_cq_resp *resp,
- struct ib_udata *ucore, void *context)
-{
- if (ib_copy_to_udata(ucore, &resp->base, sizeof(resp->base)))
- return -EFAULT;
-
- return 0;
-}
-
-ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_create_cq(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_create_cq cmd;
struct ib_uverbs_ex_create_cq cmd_ex;
- struct ib_uverbs_create_cq_resp resp;
- struct ib_udata ucore;
- struct ib_udata uhw;
struct ib_ucq_object *obj;
+ int ret;
- if (out_len < sizeof(resp))
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof(cmd)))
- return -EFAULT;
-
- ib_uverbs_init_udata(&ucore, buf, u64_to_user_ptr(cmd.response),
- sizeof(cmd), sizeof(resp));
-
- ib_uverbs_init_udata(&uhw, buf + sizeof(cmd),
- u64_to_user_ptr(cmd.response) + sizeof(resp),
- in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
- out_len - sizeof(resp));
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
memset(&cmd_ex, 0, sizeof(cmd_ex));
cmd_ex.user_handle = cmd.user_handle;
@@ -1105,43 +1056,19 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
cmd_ex.comp_vector = cmd.comp_vector;
cmd_ex.comp_channel = cmd.comp_channel;
- obj = create_cq(file, &ucore, &uhw, &cmd_ex,
- offsetof(typeof(cmd_ex), comp_channel) +
- sizeof(cmd.comp_channel), ib_uverbs_create_cq_cb,
- NULL);
-
- if (IS_ERR(obj))
- return PTR_ERR(obj);
-
- return in_len;
-}
-
-static int ib_uverbs_ex_create_cq_cb(struct ib_uverbs_file *file,
- struct ib_ucq_object *obj,
- struct ib_uverbs_ex_create_cq_resp *resp,
- struct ib_udata *ucore, void *context)
-{
- if (ib_copy_to_udata(ucore, resp, resp->response_length))
- return -EFAULT;
-
- return 0;
+ obj = create_cq(attrs, &cmd_ex);
+ return PTR_ERR_OR_ZERO(obj);
}
-int ib_uverbs_ex_create_cq(struct ib_uverbs_file *file,
- struct ib_udata *ucore,
- struct ib_udata *uhw)
+static int ib_uverbs_ex_create_cq(struct uverbs_attr_bundle *attrs)
{
- struct ib_uverbs_ex_create_cq_resp resp;
struct ib_uverbs_ex_create_cq cmd;
struct ib_ucq_object *obj;
- int err;
-
- if (ucore->inlen < sizeof(cmd))
- return -EINVAL;
+ int ret;
- err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
- if (err)
- return err;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
if (cmd.comp_mask)
return -EINVAL;
@@ -1149,52 +1076,36 @@ int ib_uverbs_ex_create_cq(struct ib_uverbs_file *file,
if (cmd.reserved)
return -EINVAL;
- if (ucore->outlen < (offsetof(typeof(resp), response_length) +
- sizeof(resp.response_length)))
- return -ENOSPC;
-
- obj = create_cq(file, ucore, uhw, &cmd,
- min(ucore->inlen, sizeof(cmd)),
- ib_uverbs_ex_create_cq_cb, NULL);
-
+ obj = create_cq(attrs, &cmd);
return PTR_ERR_OR_ZERO(obj);
}
-ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_resize_cq(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_resize_cq cmd;
struct ib_uverbs_resize_cq_resp resp = {};
- struct ib_udata udata;
struct ib_cq *cq;
int ret = -EINVAL;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- ib_uverbs_init_udata(&udata, buf + sizeof(cmd),
- u64_to_user_ptr(cmd.response) + sizeof(resp),
- in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
- out_len - sizeof(resp));
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file);
+ cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs);
if (!cq)
return -EINVAL;
- ret = cq->device->resize_cq(cq, cmd.cqe, &udata);
+ ret = cq->device->ops.resize_cq(cq, cmd.cqe, &attrs->driver_udata);
if (ret)
goto out;
resp.cqe = cq->cqe;
- if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp.cqe))
- ret = -EFAULT;
-
+ ret = uverbs_response(attrs, &resp, sizeof(resp));
out:
uobj_put_obj_read(cq);
- return ret ? ret : in_len;
+ return ret;
}
static int copy_wc_to_user(struct ib_device *ib_dev, void __user *dest,
@@ -1227,9 +1138,7 @@ static int copy_wc_to_user(struct ib_device *ib_dev, void __user *dest,
return 0;
}
-ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_poll_cq(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_poll_cq cmd;
struct ib_uverbs_poll_cq_resp resp;
@@ -1239,15 +1148,16 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
struct ib_wc wc;
int ret;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file);
+ cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs);
if (!cq)
return -EINVAL;
/* we copy a struct ib_uverbs_poll_cq_resp to user space */
- header_ptr = u64_to_user_ptr(cmd.response);
+ header_ptr = attrs->ucore.outbuf;
data_ptr = header_ptr + sizeof resp;
memset(&resp, 0, sizeof resp);
@@ -1271,24 +1181,24 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
goto out_put;
}
- ret = in_len;
+ ret = 0;
out_put:
uobj_put_obj_read(cq);
return ret;
}
-ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_req_notify_cq(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_req_notify_cq cmd;
struct ib_cq *cq;
+ int ret;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file);
+ cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs);
if (!cq)
return -EINVAL;
@@ -1297,22 +1207,22 @@ ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file,
uobj_put_obj_read(cq);
- return in_len;
+ return 0;
}
-ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_destroy_cq(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_destroy_cq cmd;
struct ib_uverbs_destroy_cq_resp resp;
struct ib_uobject *uobj;
struct ib_ucq_object *obj;
+ int ret;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- uobj = uobj_get_destroy(UVERBS_OBJECT_CQ, cmd.cq_handle, file);
+ uobj = uobj_get_destroy(UVERBS_OBJECT_CQ, cmd.cq_handle, attrs);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -1323,21 +1233,11 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
uobj_put_destroy(uobj);
- if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp))
- return -EFAULT;
-
- return in_len;
+ return uverbs_response(attrs, &resp, sizeof(resp));
}
-static int create_qp(struct ib_uverbs_file *file,
- struct ib_udata *ucore,
- struct ib_udata *uhw,
- struct ib_uverbs_ex_create_qp *cmd,
- size_t cmd_sz,
- int (*cb)(struct ib_uverbs_file *file,
- struct ib_uverbs_ex_create_qp_resp *resp,
- struct ib_udata *udata),
- void *context)
+static int create_qp(struct uverbs_attr_bundle *attrs,
+ struct ib_uverbs_ex_create_qp *cmd)
{
struct ib_uqp_object *obj;
struct ib_device *device;
@@ -1347,7 +1247,6 @@ static int create_qp(struct ib_uverbs_file *file,
struct ib_cq *scq = NULL, *rcq = NULL;
struct ib_srq *srq = NULL;
struct ib_qp *qp;
- char *buf;
struct ib_qp_init_attr attr = {};
struct ib_uverbs_ex_create_qp_resp resp;
int ret;
@@ -1358,7 +1257,7 @@ static int create_qp(struct ib_uverbs_file *file,
if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
return -EPERM;
- obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, file,
+ obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, attrs,
&ib_dev);
if (IS_ERR(obj))
return PTR_ERR(obj);
@@ -1366,12 +1265,10 @@ static int create_qp(struct ib_uverbs_file *file,
obj->uevent.uobject.user_handle = cmd->user_handle;
mutex_init(&obj->mcast_lock);
- if (cmd_sz >= offsetof(typeof(*cmd), rwq_ind_tbl_handle) +
- sizeof(cmd->rwq_ind_tbl_handle) &&
- (cmd->comp_mask & IB_UVERBS_CREATE_QP_MASK_IND_TABLE)) {
+ if (cmd->comp_mask & IB_UVERBS_CREATE_QP_MASK_IND_TABLE) {
ind_tbl = uobj_get_obj_read(rwq_ind_table,
UVERBS_OBJECT_RWQ_IND_TBL,
- cmd->rwq_ind_tbl_handle, file);
+ cmd->rwq_ind_tbl_handle, attrs);
if (!ind_tbl) {
ret = -EINVAL;
goto err_put;
@@ -1380,13 +1277,6 @@ static int create_qp(struct ib_uverbs_file *file,
attr.rwq_ind_tbl = ind_tbl;
}
- if (cmd_sz > sizeof(*cmd) &&
- !ib_is_udata_cleared(ucore, sizeof(*cmd),
- cmd_sz - sizeof(*cmd))) {
- ret = -EOPNOTSUPP;
- goto err_put;
- }
-
if (ind_tbl && (cmd->max_recv_wr || cmd->max_recv_sge || cmd->is_srq)) {
ret = -EINVAL;
goto err_put;
@@ -1397,7 +1287,7 @@ static int create_qp(struct ib_uverbs_file *file,
if (cmd->qp_type == IB_QPT_XRC_TGT) {
xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd->pd_handle,
- file);
+ attrs);
if (IS_ERR(xrcd_uobj)) {
ret = -EINVAL;
@@ -1417,7 +1307,7 @@ static int create_qp(struct ib_uverbs_file *file,
} else {
if (cmd->is_srq) {
srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ,
- cmd->srq_handle, file);
+ cmd->srq_handle, attrs);
if (!srq || srq->srq_type == IB_SRQT_XRC) {
ret = -EINVAL;
goto err_put;
@@ -1428,7 +1318,7 @@ static int create_qp(struct ib_uverbs_file *file,
if (cmd->recv_cq_handle != cmd->send_cq_handle) {
rcq = uobj_get_obj_read(
cq, UVERBS_OBJECT_CQ,
- cmd->recv_cq_handle, file);
+ cmd->recv_cq_handle, attrs);
if (!rcq) {
ret = -EINVAL;
goto err_put;
@@ -1439,11 +1329,11 @@ static int create_qp(struct ib_uverbs_file *file,
if (has_sq)
scq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ,
- cmd->send_cq_handle, file);
+ cmd->send_cq_handle, attrs);
if (!ind_tbl)
rcq = rcq ?: scq;
pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle,
- file);
+ attrs);
if (!pd || (!scq && has_sq)) {
ret = -EINVAL;
goto err_put;
@@ -1453,7 +1343,7 @@ static int create_qp(struct ib_uverbs_file *file,
}
attr.event_handler = ib_uverbs_qp_event_handler;
- attr.qp_context = file;
+ attr.qp_context = attrs->ufile;
attr.send_cq = scq;
attr.recv_cq = rcq;
attr.srq = srq;
@@ -1473,10 +1363,7 @@ static int create_qp(struct ib_uverbs_file *file,
INIT_LIST_HEAD(&obj->uevent.event_list);
INIT_LIST_HEAD(&obj->mcast_list);
- if (cmd_sz >= offsetof(typeof(*cmd), create_flags) +
- sizeof(cmd->create_flags))
- attr.create_flags = cmd->create_flags;
-
+ attr.create_flags = cmd->create_flags;
if (attr.create_flags & ~(IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK |
IB_QP_CREATE_CROSS_CHANNEL |
IB_QP_CREATE_MANAGED_SEND |
@@ -1498,18 +1385,10 @@ static int create_qp(struct ib_uverbs_file *file,
attr.source_qpn = cmd->source_qpn;
}
- buf = (void *)cmd + sizeof(*cmd);
- if (cmd_sz > sizeof(*cmd))
- if (!(buf[0] == 0 && !memcmp(buf, buf + 1,
- cmd_sz - sizeof(*cmd) - 1))) {
- ret = -EINVAL;
- goto err_put;
- }
-
if (cmd->qp_type == IB_QPT_XRC_TGT)
qp = ib_create_qp(pd, &attr);
else
- qp = _ib_create_qp(device, pd, &attr, uhw,
+ qp = _ib_create_qp(device, pd, &attr, &attrs->driver_udata,
&obj->uevent.uobject);
if (IS_ERR(qp)) {
@@ -1557,11 +1436,9 @@ static int create_qp(struct ib_uverbs_file *file,
resp.base.max_recv_wr = attr.cap.max_recv_wr;
resp.base.max_send_wr = attr.cap.max_send_wr;
resp.base.max_inline_data = attr.cap.max_inline_data;
+ resp.response_length = uverbs_response_length(attrs, sizeof(resp));
- resp.response_length = offsetof(typeof(resp), response_length) +
- sizeof(resp.response_length);
-
- ret = cb(file, &resp, ucore);
+ ret = uverbs_response(attrs, &resp, sizeof(resp));
if (ret)
goto err_cb;
@@ -1583,7 +1460,7 @@ static int create_qp(struct ib_uverbs_file *file,
if (ind_tbl)
uobj_put_obj_read(ind_tbl);
- return uobj_alloc_commit(&obj->uevent.uobject, 0);
+ return uobj_alloc_commit(&obj->uevent.uobject);
err_cb:
ib_destroy_qp(qp);
@@ -1605,39 +1482,15 @@ err_put:
return ret;
}
-static int ib_uverbs_create_qp_cb(struct ib_uverbs_file *file,
- struct ib_uverbs_ex_create_qp_resp *resp,
- struct ib_udata *ucore)
-{
- if (ib_copy_to_udata(ucore, &resp->base, sizeof(resp->base)))
- return -EFAULT;
-
- return 0;
-}
-
-ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_create_qp(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_create_qp cmd;
struct ib_uverbs_ex_create_qp cmd_ex;
- struct ib_udata ucore;
- struct ib_udata uhw;
- ssize_t resp_size = sizeof(struct ib_uverbs_create_qp_resp);
- int err;
-
- if (out_len < resp_size)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof(cmd)))
- return -EFAULT;
+ int ret;
- ib_uverbs_init_udata(&ucore, buf, u64_to_user_ptr(cmd.response),
- sizeof(cmd), resp_size);
- ib_uverbs_init_udata(&uhw, buf + sizeof(cmd),
- u64_to_user_ptr(cmd.response) + resp_size,
- in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
- out_len - resp_size);
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
memset(&cmd_ex, 0, sizeof(cmd_ex));
cmd_ex.user_handle = cmd.user_handle;
@@ -1654,42 +1507,17 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
cmd_ex.qp_type = cmd.qp_type;
cmd_ex.is_srq = cmd.is_srq;
- err = create_qp(file, &ucore, &uhw, &cmd_ex,
- offsetof(typeof(cmd_ex), is_srq) +
- sizeof(cmd.is_srq), ib_uverbs_create_qp_cb,
- NULL);
-
- if (err)
- return err;
-
- return in_len;
+ return create_qp(attrs, &cmd_ex);
}
-static int ib_uverbs_ex_create_qp_cb(struct ib_uverbs_file *file,
- struct ib_uverbs_ex_create_qp_resp *resp,
- struct ib_udata *ucore)
+static int ib_uverbs_ex_create_qp(struct uverbs_attr_bundle *attrs)
{
- if (ib_copy_to_udata(ucore, resp, resp->response_length))
- return -EFAULT;
-
- return 0;
-}
-
-int ib_uverbs_ex_create_qp(struct ib_uverbs_file *file,
- struct ib_udata *ucore,
- struct ib_udata *uhw)
-{
- struct ib_uverbs_ex_create_qp_resp resp;
- struct ib_uverbs_ex_create_qp cmd = {0};
- int err;
-
- if (ucore->inlen < (offsetof(typeof(cmd), comp_mask) +
- sizeof(cmd.comp_mask)))
- return -EINVAL;
+ struct ib_uverbs_ex_create_qp cmd;
+ int ret;
- err = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
- if (err)
- return err;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
if (cmd.comp_mask & ~IB_UVERBS_CREATE_QP_SUP_COMP_MASK)
return -EINVAL;
@@ -1697,26 +1525,13 @@ int ib_uverbs_ex_create_qp(struct ib_uverbs_file *file,
if (cmd.reserved)
return -EINVAL;
- if (ucore->outlen < (offsetof(typeof(resp), response_length) +
- sizeof(resp.response_length)))
- return -ENOSPC;
-
- err = create_qp(file, ucore, uhw, &cmd,
- min(ucore->inlen, sizeof(cmd)),
- ib_uverbs_ex_create_qp_cb, NULL);
-
- if (err)
- return err;
-
- return 0;
+ return create_qp(attrs, &cmd);
}
-ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
- const char __user *buf, int in_len, int out_len)
+static int ib_uverbs_open_qp(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_open_qp cmd;
struct ib_uverbs_create_qp_resp resp;
- struct ib_udata udata;
struct ib_uqp_object *obj;
struct ib_xrcd *xrcd;
struct ib_uobject *uninitialized_var(xrcd_uobj);
@@ -1725,23 +1540,16 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
int ret;
struct ib_device *ib_dev;
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- ib_uverbs_init_udata(&udata, buf + sizeof(cmd),
- u64_to_user_ptr(cmd.response) + sizeof(resp),
- in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
- out_len - sizeof(resp));
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, file,
+ obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, attrs,
&ib_dev);
if (IS_ERR(obj))
return PTR_ERR(obj);
- xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd.pd_handle, file);
+ xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd.pd_handle, attrs);
if (IS_ERR(xrcd_uobj)) {
ret = -EINVAL;
goto err_put;
@@ -1754,7 +1562,7 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
}
attr.event_handler = ib_uverbs_qp_event_handler;
- attr.qp_context = file;
+ attr.qp_context = attrs->ufile;
attr.qp_num = cmd.qpn;
attr.qp_type = cmd.qp_type;
@@ -1775,17 +1583,16 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
resp.qpn = qp->qp_num;
resp.qp_handle = obj->uevent.uobject.id;
- if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) {
- ret = -EFAULT;
+ ret = uverbs_response(attrs, &resp, sizeof(resp));
+ if (ret)
goto err_destroy;
- }
obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject);
atomic_inc(&obj->uxrcd->refcnt);
qp->uobject = &obj->uevent.uobject;
uobj_put_read(xrcd_uobj);
- return uobj_alloc_commit(&obj->uevent.uobject, in_len);
+ return uobj_alloc_commit(&obj->uevent.uobject);
err_destroy:
ib_destroy_qp(qp);
@@ -1818,9 +1625,7 @@ static void copy_ah_attr_to_uverbs(struct ib_uverbs_qp_dest *uverb_attr,
uverb_attr->port_num = rdma_ah_get_port_num(rdma_attr);
}
-ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_query_qp(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_query_qp cmd;
struct ib_uverbs_query_qp_resp resp;
@@ -1829,8 +1634,9 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
struct ib_qp_init_attr *init_attr;
int ret;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
attr = kmalloc(sizeof *attr, GFP_KERNEL);
init_attr = kmalloc(sizeof *init_attr, GFP_KERNEL);
@@ -1839,7 +1645,7 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
goto out;
}
- qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file);
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs);
if (!qp) {
ret = -EINVAL;
goto out;
@@ -1886,14 +1692,13 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
resp.max_inline_data = init_attr->cap.max_inline_data;
resp.sq_sig_all = init_attr->sq_sig_type == IB_SIGNAL_ALL_WR;
- if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp))
- ret = -EFAULT;
+ ret = uverbs_response(attrs, &resp, sizeof(resp));
out:
kfree(attr);
kfree(init_attr);
- return ret ? ret : in_len;
+ return ret;
}
/* Remove ignored fields set in the attribute mask */
@@ -1933,8 +1738,8 @@ static void copy_ah_attr_from_uverbs(struct ib_device *dev,
rdma_ah_set_make_grd(rdma_attr, false);
}
-static int modify_qp(struct ib_uverbs_file *file,
- struct ib_uverbs_ex_modify_qp *cmd, struct ib_udata *udata)
+static int modify_qp(struct uverbs_attr_bundle *attrs,
+ struct ib_uverbs_ex_modify_qp *cmd)
{
struct ib_qp_attr *attr;
struct ib_qp *qp;
@@ -1944,7 +1749,8 @@ static int modify_qp(struct ib_uverbs_file *file,
if (!attr)
return -ENOMEM;
- qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd->base.qp_handle, file);
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd->base.qp_handle,
+ attrs);
if (!qp) {
ret = -EINVAL;
goto out;
@@ -2081,7 +1887,7 @@ static int modify_qp(struct ib_uverbs_file *file,
ret = ib_modify_qp_with_udata(qp, attr,
modify_qp_mask(qp->qp_type,
cmd->base.attr_mask),
- udata);
+ &attrs->driver_udata);
release_qp:
uobj_put_obj_read(qp);
@@ -2091,80 +1897,64 @@ out:
return ret;
}
-ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_modify_qp(struct uverbs_attr_bundle *attrs)
{
- struct ib_uverbs_ex_modify_qp cmd = {};
- struct ib_udata udata;
+ struct ib_uverbs_ex_modify_qp cmd;
int ret;
- if (copy_from_user(&cmd.base, buf, sizeof(cmd.base)))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd.base, sizeof(cmd.base));
+ if (ret)
+ return ret;
if (cmd.base.attr_mask &
~((IB_USER_LEGACY_LAST_QP_ATTR_MASK << 1) - 1))
return -EOPNOTSUPP;
- ib_uverbs_init_udata(&udata, buf + sizeof(cmd.base), NULL,
- in_len - sizeof(cmd.base) - sizeof(struct ib_uverbs_cmd_hdr),
- out_len);
-
- ret = modify_qp(file, &cmd, &udata);
- if (ret)
- return ret;
-
- return in_len;
+ return modify_qp(attrs, &cmd);
}
-int ib_uverbs_ex_modify_qp(struct ib_uverbs_file *file,
- struct ib_udata *ucore,
- struct ib_udata *uhw)
+static int ib_uverbs_ex_modify_qp(struct uverbs_attr_bundle *attrs)
{
- struct ib_uverbs_ex_modify_qp cmd = {};
+ struct ib_uverbs_ex_modify_qp cmd;
+ struct ib_uverbs_ex_modify_qp_resp resp = {
+ .response_length = uverbs_response_length(attrs, sizeof(resp))
+ };
int ret;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
+
/*
* Last bit is reserved for extending the attr_mask by
* using another field.
*/
BUILD_BUG_ON(IB_USER_LAST_QP_ATTR_MASK == (1 << 31));
- if (ucore->inlen < sizeof(cmd.base))
- return -EINVAL;
-
- ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
- if (ret)
- return ret;
-
if (cmd.base.attr_mask &
~((IB_USER_LAST_QP_ATTR_MASK << 1) - 1))
return -EOPNOTSUPP;
- if (ucore->inlen > sizeof(cmd)) {
- if (!ib_is_udata_cleared(ucore, sizeof(cmd),
- ucore->inlen - sizeof(cmd)))
- return -EOPNOTSUPP;
- }
-
- ret = modify_qp(file, &cmd, uhw);
+ ret = modify_qp(attrs, &cmd);
+ if (ret)
+ return ret;
- return ret;
+ return uverbs_response(attrs, &resp, sizeof(resp));
}
-ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_destroy_qp(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_destroy_qp cmd;
struct ib_uverbs_destroy_qp_resp resp;
struct ib_uobject *uobj;
struct ib_uqp_object *obj;
+ int ret;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- uobj = uobj_get_destroy(UVERBS_OBJECT_QP, cmd.qp_handle, file);
+ uobj = uobj_get_destroy(UVERBS_OBJECT_QP, cmd.qp_handle, attrs);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -2174,10 +1964,7 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
uobj_put_destroy(uobj);
- if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp))
- return -EFAULT;
-
- return in_len;
+ return uverbs_response(attrs, &resp, sizeof(resp));
}
static void *alloc_wr(size_t wr_size, __u32 num_sge)
@@ -2190,9 +1977,7 @@ static void *alloc_wr(size_t wr_size, __u32 num_sge)
num_sge * sizeof (struct ib_sge), GFP_KERNEL);
}
-ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_post_send(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_post_send cmd;
struct ib_uverbs_post_send_resp resp;
@@ -2202,24 +1987,31 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
struct ib_qp *qp;
int i, sg_ind;
int is_ud;
- ssize_t ret = -EINVAL;
+ int ret, ret2;
size_t next_size;
+ const struct ib_sge __user *sgls;
+ const void __user *wqes;
+ struct uverbs_req_iter iter;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- if (in_len < sizeof cmd + cmd.wqe_size * cmd.wr_count +
- cmd.sge_count * sizeof (struct ib_uverbs_sge))
- return -EINVAL;
-
- if (cmd.wqe_size < sizeof (struct ib_uverbs_send_wr))
- return -EINVAL;
+ ret = uverbs_request_start(attrs, &iter, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
+ wqes = uverbs_request_next_ptr(&iter, cmd.wqe_size * cmd.wr_count);
+ if (IS_ERR(wqes))
+ return PTR_ERR(wqes);
+ sgls = uverbs_request_next_ptr(
+ &iter, cmd.sge_count * sizeof(struct ib_uverbs_sge));
+ if (IS_ERR(sgls))
+ return PTR_ERR(sgls);
+ ret = uverbs_request_finish(&iter);
+ if (ret)
+ return ret;
user_wr = kmalloc(cmd.wqe_size, GFP_KERNEL);
if (!user_wr)
return -ENOMEM;
- qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file);
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs);
if (!qp)
goto out;
@@ -2227,8 +2019,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
sg_ind = 0;
last = NULL;
for (i = 0; i < cmd.wr_count; ++i) {
- if (copy_from_user(user_wr,
- buf + sizeof cmd + i * cmd.wqe_size,
+ if (copy_from_user(user_wr, wqes + i * cmd.wqe_size,
cmd.wqe_size)) {
ret = -EFAULT;
goto out_put;
@@ -2256,7 +2047,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
}
ud->ah = uobj_get_obj_read(ah, UVERBS_OBJECT_AH,
- user_wr->wr.ud.ah, file);
+ user_wr->wr.ud.ah, attrs);
if (!ud->ah) {
kfree(ud);
ret = -EINVAL;
@@ -2336,11 +2127,9 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
if (next->num_sge) {
next->sg_list = (void *) next +
ALIGN(next_size, sizeof(struct ib_sge));
- if (copy_from_user(next->sg_list,
- buf + sizeof cmd +
- cmd.wr_count * cmd.wqe_size +
- sg_ind * sizeof (struct ib_sge),
- next->num_sge * sizeof (struct ib_sge))) {
+ if (copy_from_user(next->sg_list, sgls + sg_ind,
+ next->num_sge *
+ sizeof(struct ib_sge))) {
ret = -EFAULT;
goto out_put;
}
@@ -2350,7 +2139,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
}
resp.bad_wr = 0;
- ret = qp->device->post_send(qp->real_qp, wr, &bad_wr);
+ ret = qp->device->ops.post_send(qp->real_qp, wr, &bad_wr);
if (ret)
for (next = wr; next; next = next->next) {
++resp.bad_wr;
@@ -2358,8 +2147,9 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
break;
}
- if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp))
- ret = -EFAULT;
+ ret2 = uverbs_response(attrs, &resp, sizeof(resp));
+ if (ret2)
+ ret = ret2;
out_put:
uobj_put_obj_read(qp);
@@ -2375,28 +2165,35 @@ out_put:
out:
kfree(user_wr);
- return ret ? ret : in_len;
+ return ret;
}
-static struct ib_recv_wr *ib_uverbs_unmarshall_recv(const char __user *buf,
- int in_len,
- u32 wr_count,
- u32 sge_count,
- u32 wqe_size)
+static struct ib_recv_wr *
+ib_uverbs_unmarshall_recv(struct uverbs_req_iter *iter, u32 wr_count,
+ u32 wqe_size, u32 sge_count)
{
struct ib_uverbs_recv_wr *user_wr;
struct ib_recv_wr *wr = NULL, *last, *next;
int sg_ind;
int i;
int ret;
-
- if (in_len < wqe_size * wr_count +
- sge_count * sizeof (struct ib_uverbs_sge))
- return ERR_PTR(-EINVAL);
+ const struct ib_sge __user *sgls;
+ const void __user *wqes;
if (wqe_size < sizeof (struct ib_uverbs_recv_wr))
return ERR_PTR(-EINVAL);
+ wqes = uverbs_request_next_ptr(iter, wqe_size * wr_count);
+ if (IS_ERR(wqes))
+ return ERR_CAST(wqes);
+ sgls = uverbs_request_next_ptr(
+ iter, sge_count * sizeof(struct ib_uverbs_sge));
+ if (IS_ERR(sgls))
+ return ERR_CAST(sgls);
+ ret = uverbs_request_finish(iter);
+ if (ret)
+ return ERR_PTR(ret);
+
user_wr = kmalloc(wqe_size, GFP_KERNEL);
if (!user_wr)
return ERR_PTR(-ENOMEM);
@@ -2404,7 +2201,7 @@ static struct ib_recv_wr *ib_uverbs_unmarshall_recv(const char __user *buf,
sg_ind = 0;
last = NULL;
for (i = 0; i < wr_count; ++i) {
- if (copy_from_user(user_wr, buf + i * wqe_size,
+ if (copy_from_user(user_wr, wqes + i * wqe_size,
wqe_size)) {
ret = -EFAULT;
goto err;
@@ -2443,10 +2240,9 @@ static struct ib_recv_wr *ib_uverbs_unmarshall_recv(const char __user *buf,
if (next->num_sge) {
next->sg_list = (void *) next +
ALIGN(sizeof *next, sizeof (struct ib_sge));
- if (copy_from_user(next->sg_list,
- buf + wr_count * wqe_size +
- sg_ind * sizeof (struct ib_sge),
- next->num_sge * sizeof (struct ib_sge))) {
+ if (copy_from_user(next->sg_list, sgls + sg_ind,
+ next->num_sge *
+ sizeof(struct ib_sge))) {
ret = -EFAULT;
goto err;
}
@@ -2470,32 +2266,33 @@ err:
return ERR_PTR(ret);
}
-ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_post_recv(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_post_recv cmd;
struct ib_uverbs_post_recv_resp resp;
struct ib_recv_wr *wr, *next;
const struct ib_recv_wr *bad_wr;
struct ib_qp *qp;
- ssize_t ret = -EINVAL;
+ int ret, ret2;
+ struct uverbs_req_iter iter;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request_start(attrs, &iter, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd,
- in_len - sizeof cmd, cmd.wr_count,
- cmd.sge_count, cmd.wqe_size);
+ wr = ib_uverbs_unmarshall_recv(&iter, cmd.wr_count, cmd.wqe_size,
+ cmd.sge_count);
if (IS_ERR(wr))
return PTR_ERR(wr);
- qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file);
- if (!qp)
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs);
+ if (!qp) {
+ ret = -EINVAL;
goto out;
+ }
resp.bad_wr = 0;
- ret = qp->device->post_recv(qp->real_qp, wr, &bad_wr);
+ ret = qp->device->ops.post_recv(qp->real_qp, wr, &bad_wr);
uobj_put_obj_read(qp);
if (ret) {
@@ -2506,9 +2303,9 @@ ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
}
}
- if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp))
- ret = -EFAULT;
-
+ ret2 = uverbs_response(attrs, &resp, sizeof(resp));
+ if (ret2)
+ ret = ret2;
out:
while (wr) {
next = wr->next;
@@ -2516,36 +2313,36 @@ out:
wr = next;
}
- return ret ? ret : in_len;
+ return ret;
}
-ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_post_srq_recv(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_post_srq_recv cmd;
struct ib_uverbs_post_srq_recv_resp resp;
struct ib_recv_wr *wr, *next;
const struct ib_recv_wr *bad_wr;
struct ib_srq *srq;
- ssize_t ret = -EINVAL;
+ int ret, ret2;
+ struct uverbs_req_iter iter;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request_start(attrs, &iter, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd,
- in_len - sizeof cmd, cmd.wr_count,
- cmd.sge_count, cmd.wqe_size);
+ wr = ib_uverbs_unmarshall_recv(&iter, cmd.wr_count, cmd.wqe_size,
+ cmd.sge_count);
if (IS_ERR(wr))
return PTR_ERR(wr);
- srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file);
- if (!srq)
+ srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, attrs);
+ if (!srq) {
+ ret = -EINVAL;
goto out;
+ }
resp.bad_wr = 0;
- ret = srq->device->post_srq_recv ?
- srq->device->post_srq_recv(srq, wr, &bad_wr) : -EOPNOTSUPP;
+ ret = srq->device->ops.post_srq_recv(srq, wr, &bad_wr);
uobj_put_obj_read(srq);
@@ -2556,8 +2353,9 @@ ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file,
break;
}
- if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp))
- ret = -EFAULT;
+ ret2 = uverbs_response(attrs, &resp, sizeof(resp));
+ if (ret2)
+ ret = ret2;
out:
while (wr) {
@@ -2566,12 +2364,10 @@ out:
wr = next;
}
- return ret ? ret : in_len;
+ return ret;
}
-ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_create_ah(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_create_ah cmd;
struct ib_uverbs_create_ah_resp resp;
@@ -2580,21 +2376,13 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
struct ib_ah *ah;
struct rdma_ah_attr attr = {};
int ret;
- struct ib_udata udata;
struct ib_device *ib_dev;
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- ib_uverbs_init_udata(&udata, buf + sizeof(cmd),
- u64_to_user_ptr(cmd.response) + sizeof(resp),
- in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
- out_len - sizeof(resp));
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- uobj = uobj_alloc(UVERBS_OBJECT_AH, file, &ib_dev);
+ uobj = uobj_alloc(UVERBS_OBJECT_AH, attrs, &ib_dev);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -2603,7 +2391,7 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
goto err;
}
- pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file);
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, attrs);
if (!pd) {
ret = -EINVAL;
goto err;
@@ -2627,7 +2415,7 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
rdma_ah_set_ah_flags(&attr, 0);
}
- ah = rdma_create_user_ah(pd, &attr, &udata);
+ ah = rdma_create_user_ah(pd, &attr, &attrs->driver_udata);
if (IS_ERR(ah)) {
ret = PTR_ERR(ah);
goto err_put;
@@ -2639,16 +2427,15 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
resp.ah_handle = uobj->id;
- if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) {
- ret = -EFAULT;
+ ret = uverbs_response(attrs, &resp, sizeof(resp));
+ if (ret)
goto err_copy;
- }
uobj_put_obj_read(pd);
- return uobj_alloc_commit(uobj, in_len);
+ return uobj_alloc_commit(uobj);
err_copy:
- rdma_destroy_ah(ah);
+ rdma_destroy_ah(ah, RDMA_DESTROY_AH_SLEEPABLE);
err_put:
uobj_put_obj_read(pd);
@@ -2658,21 +2445,19 @@ err:
return ret;
}
-ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file,
- const char __user *buf, int in_len, int out_len)
+static int ib_uverbs_destroy_ah(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_destroy_ah cmd;
+ int ret;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- return uobj_perform_destroy(UVERBS_OBJECT_AH, cmd.ah_handle, file,
- in_len);
+ return uobj_perform_destroy(UVERBS_OBJECT_AH, cmd.ah_handle, attrs);
}
-ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_attach_mcast(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_attach_mcast cmd;
struct ib_qp *qp;
@@ -2680,10 +2465,11 @@ ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
struct ib_uverbs_mcast_entry *mcast;
int ret;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file);
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs);
if (!qp)
return -EINVAL;
@@ -2716,12 +2502,10 @@ out_put:
mutex_unlock(&obj->mcast_lock);
uobj_put_obj_read(qp);
- return ret ? ret : in_len;
+ return ret;
}
-ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_detach_mcast(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_detach_mcast cmd;
struct ib_uqp_object *obj;
@@ -2730,10 +2514,11 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
int ret = -EINVAL;
bool found = false;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file);
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs);
if (!qp)
return -EINVAL;
@@ -2759,7 +2544,7 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
out_put:
mutex_unlock(&obj->mcast_lock);
uobj_put_obj_read(qp);
- return ret ? ret : in_len;
+ return ret;
}
struct ib_uflow_resources *flow_resources_alloc(size_t num_specs)
@@ -2838,7 +2623,7 @@ void flow_resources_add(struct ib_uflow_resources *uflow_res,
}
EXPORT_SYMBOL(flow_resources_add);
-static int kern_spec_to_ib_spec_action(struct ib_uverbs_file *ufile,
+static int kern_spec_to_ib_spec_action(const struct uverbs_attr_bundle *attrs,
struct ib_uverbs_flow_spec *kern_spec,
union ib_flow_spec *ib_spec,
struct ib_uflow_resources *uflow_res)
@@ -2867,7 +2652,7 @@ static int kern_spec_to_ib_spec_action(struct ib_uverbs_file *ufile,
ib_spec->action.act = uobj_get_obj_read(flow_action,
UVERBS_OBJECT_FLOW_ACTION,
kern_spec->action.handle,
- ufile);
+ attrs);
if (!ib_spec->action.act)
return -EINVAL;
ib_spec->action.size =
@@ -2885,7 +2670,7 @@ static int kern_spec_to_ib_spec_action(struct ib_uverbs_file *ufile,
uobj_get_obj_read(counters,
UVERBS_OBJECT_COUNTERS,
kern_spec->flow_count.handle,
- ufile);
+ attrs);
if (!ib_spec->flow_count.counters)
return -EINVAL;
ib_spec->flow_count.size =
@@ -3066,7 +2851,7 @@ static int kern_spec_to_ib_spec_filter(struct ib_uverbs_flow_spec *kern_spec,
kern_filter_sz, ib_spec);
}
-static int kern_spec_to_ib_spec(struct ib_uverbs_file *ufile,
+static int kern_spec_to_ib_spec(struct uverbs_attr_bundle *attrs,
struct ib_uverbs_flow_spec *kern_spec,
union ib_flow_spec *ib_spec,
struct ib_uflow_resources *uflow_res)
@@ -3075,17 +2860,15 @@ static int kern_spec_to_ib_spec(struct ib_uverbs_file *ufile,
return -EINVAL;
if (kern_spec->type >= IB_FLOW_SPEC_ACTION_TAG)
- return kern_spec_to_ib_spec_action(ufile, kern_spec, ib_spec,
+ return kern_spec_to_ib_spec_action(attrs, kern_spec, ib_spec,
uflow_res);
else
return kern_spec_to_ib_spec_filter(kern_spec, ib_spec);
}
-int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
- struct ib_udata *ucore,
- struct ib_udata *uhw)
+static int ib_uverbs_ex_create_wq(struct uverbs_attr_bundle *attrs)
{
- struct ib_uverbs_ex_create_wq cmd = {};
+ struct ib_uverbs_ex_create_wq cmd;
struct ib_uverbs_ex_create_wq_resp resp = {};
struct ib_uwq_object *obj;
int err = 0;
@@ -3093,43 +2876,27 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
struct ib_pd *pd;
struct ib_wq *wq;
struct ib_wq_init_attr wq_init_attr = {};
- size_t required_cmd_sz;
- size_t required_resp_len;
struct ib_device *ib_dev;
- required_cmd_sz = offsetof(typeof(cmd), max_sge) + sizeof(cmd.max_sge);
- required_resp_len = offsetof(typeof(resp), wqn) + sizeof(resp.wqn);
-
- if (ucore->inlen < required_cmd_sz)
- return -EINVAL;
-
- if (ucore->outlen < required_resp_len)
- return -ENOSPC;
-
- if (ucore->inlen > sizeof(cmd) &&
- !ib_is_udata_cleared(ucore, sizeof(cmd),
- ucore->inlen - sizeof(cmd)))
- return -EOPNOTSUPP;
-
- err = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
+ err = uverbs_request(attrs, &cmd, sizeof(cmd));
if (err)
return err;
if (cmd.comp_mask)
return -EOPNOTSUPP;
- obj = (struct ib_uwq_object *)uobj_alloc(UVERBS_OBJECT_WQ, file,
+ obj = (struct ib_uwq_object *)uobj_alloc(UVERBS_OBJECT_WQ, attrs,
&ib_dev);
if (IS_ERR(obj))
return PTR_ERR(obj);
- pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file);
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, attrs);
if (!pd) {
err = -EINVAL;
goto err_uobj;
}
- cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file);
+ cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs);
if (!cq) {
err = -EINVAL;
goto err_put_pd;
@@ -3138,20 +2905,14 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
wq_init_attr.cq = cq;
wq_init_attr.max_sge = cmd.max_sge;
wq_init_attr.max_wr = cmd.max_wr;
- wq_init_attr.wq_context = file;
+ wq_init_attr.wq_context = attrs->ufile;
wq_init_attr.wq_type = cmd.wq_type;
wq_init_attr.event_handler = ib_uverbs_wq_event_handler;
- if (ucore->inlen >= (offsetof(typeof(cmd), create_flags) +
- sizeof(cmd.create_flags)))
- wq_init_attr.create_flags = cmd.create_flags;
+ wq_init_attr.create_flags = cmd.create_flags;
obj->uevent.events_reported = 0;
INIT_LIST_HEAD(&obj->uevent.event_list);
- if (!pd->device->create_wq) {
- err = -EOPNOTSUPP;
- goto err_put_cq;
- }
- wq = pd->device->create_wq(pd, &wq_init_attr, uhw);
+ wq = pd->device->ops.create_wq(pd, &wq_init_attr, &attrs->driver_udata);
if (IS_ERR(wq)) {
err = PTR_ERR(wq);
goto err_put_cq;
@@ -3175,15 +2936,14 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
resp.max_sge = wq_init_attr.max_sge;
resp.max_wr = wq_init_attr.max_wr;
resp.wqn = wq->wq_num;
- resp.response_length = required_resp_len;
- err = ib_copy_to_udata(ucore,
- &resp, resp.response_length);
+ resp.response_length = uverbs_response_length(attrs, sizeof(resp));
+ err = uverbs_response(attrs, &resp, sizeof(resp));
if (err)
goto err_copy;
uobj_put_obj_read(pd);
uobj_put_obj_read(cq);
- return uobj_alloc_commit(&obj->uevent.uobject, 0);
+ return uobj_alloc_commit(&obj->uevent.uobject);
err_copy:
ib_destroy_wq(wq);
@@ -3197,41 +2957,23 @@ err_uobj:
return err;
}
-int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file,
- struct ib_udata *ucore,
- struct ib_udata *uhw)
+static int ib_uverbs_ex_destroy_wq(struct uverbs_attr_bundle *attrs)
{
- struct ib_uverbs_ex_destroy_wq cmd = {};
+ struct ib_uverbs_ex_destroy_wq cmd;
struct ib_uverbs_ex_destroy_wq_resp resp = {};
struct ib_uobject *uobj;
struct ib_uwq_object *obj;
- size_t required_cmd_sz;
- size_t required_resp_len;
int ret;
- required_cmd_sz = offsetof(typeof(cmd), wq_handle) + sizeof(cmd.wq_handle);
- required_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved);
-
- if (ucore->inlen < required_cmd_sz)
- return -EINVAL;
-
- if (ucore->outlen < required_resp_len)
- return -ENOSPC;
-
- if (ucore->inlen > sizeof(cmd) &&
- !ib_is_udata_cleared(ucore, sizeof(cmd),
- ucore->inlen - sizeof(cmd)))
- return -EOPNOTSUPP;
-
- ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
if (ret)
return ret;
if (cmd.comp_mask)
return -EOPNOTSUPP;
- resp.response_length = required_resp_len;
- uobj = uobj_get_destroy(UVERBS_OBJECT_WQ, cmd.wq_handle, file);
+ resp.response_length = uverbs_response_length(attrs, sizeof(resp));
+ uobj = uobj_get_destroy(UVERBS_OBJECT_WQ, cmd.wq_handle, attrs);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -3240,29 +2982,17 @@ int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file,
uobj_put_destroy(uobj);
- return ib_copy_to_udata(ucore, &resp, resp.response_length);
+ return uverbs_response(attrs, &resp, sizeof(resp));
}
-int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
- struct ib_udata *ucore,
- struct ib_udata *uhw)
+static int ib_uverbs_ex_modify_wq(struct uverbs_attr_bundle *attrs)
{
- struct ib_uverbs_ex_modify_wq cmd = {};
+ struct ib_uverbs_ex_modify_wq cmd;
struct ib_wq *wq;
struct ib_wq_attr wq_attr = {};
- size_t required_cmd_sz;
int ret;
- required_cmd_sz = offsetof(typeof(cmd), curr_wq_state) + sizeof(cmd.curr_wq_state);
- if (ucore->inlen < required_cmd_sz)
- return -EINVAL;
-
- if (ucore->inlen > sizeof(cmd) &&
- !ib_is_udata_cleared(ucore, sizeof(cmd),
- ucore->inlen - sizeof(cmd)))
- return -EOPNOTSUPP;
-
- ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
if (ret)
return ret;
@@ -3272,7 +3002,7 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE | IB_WQ_FLAGS))
return -EINVAL;
- wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, cmd.wq_handle, file);
+ wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, cmd.wq_handle, attrs);
if (!wq)
return -EINVAL;
@@ -3282,24 +3012,18 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
wq_attr.flags = cmd.flags;
wq_attr.flags_mask = cmd.flags_mask;
}
- if (!wq->device->modify_wq) {
- ret = -EOPNOTSUPP;
- goto out;
- }
- ret = wq->device->modify_wq(wq, &wq_attr, cmd.attr_mask, uhw);
-out:
+ ret = wq->device->ops.modify_wq(wq, &wq_attr, cmd.attr_mask,
+ &attrs->driver_udata);
uobj_put_obj_read(wq);
return ret;
}
-int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
- struct ib_udata *ucore,
- struct ib_udata *uhw)
+static int ib_uverbs_ex_create_rwq_ind_table(struct uverbs_attr_bundle *attrs)
{
- struct ib_uverbs_ex_create_rwq_ind_table cmd = {};
+ struct ib_uverbs_ex_create_rwq_ind_table cmd;
struct ib_uverbs_ex_create_rwq_ind_table_resp resp = {};
struct ib_uobject *uobj;
- int err = 0;
+ int err;
struct ib_rwq_ind_table_init_attr init_attr = {};
struct ib_rwq_ind_table *rwq_ind_tbl;
struct ib_wq **wqs = NULL;
@@ -3307,27 +3031,13 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
struct ib_wq *wq = NULL;
int i, j, num_read_wqs;
u32 num_wq_handles;
- u32 expected_in_size;
- size_t required_cmd_sz_header;
- size_t required_resp_len;
+ struct uverbs_req_iter iter;
struct ib_device *ib_dev;
- required_cmd_sz_header = offsetof(typeof(cmd), log_ind_tbl_size) + sizeof(cmd.log_ind_tbl_size);
- required_resp_len = offsetof(typeof(resp), ind_tbl_num) + sizeof(resp.ind_tbl_num);
-
- if (ucore->inlen < required_cmd_sz_header)
- return -EINVAL;
-
- if (ucore->outlen < required_resp_len)
- return -ENOSPC;
-
- err = ib_copy_from_udata(&cmd, ucore, required_cmd_sz_header);
+ err = uverbs_request_start(attrs, &iter, &cmd, sizeof(cmd));
if (err)
return err;
- ucore->inbuf += required_cmd_sz_header;
- ucore->inlen -= required_cmd_sz_header;
-
if (cmd.comp_mask)
return -EOPNOTSUPP;
@@ -3335,26 +3045,17 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
return -EINVAL;
num_wq_handles = 1 << cmd.log_ind_tbl_size;
- expected_in_size = num_wq_handles * sizeof(__u32);
- if (num_wq_handles == 1)
- /* input size for wq handles is u64 aligned */
- expected_in_size += sizeof(__u32);
-
- if (ucore->inlen < expected_in_size)
- return -EINVAL;
-
- if (ucore->inlen > expected_in_size &&
- !ib_is_udata_cleared(ucore, expected_in_size,
- ucore->inlen - expected_in_size))
- return -EOPNOTSUPP;
-
wqs_handles = kcalloc(num_wq_handles, sizeof(*wqs_handles),
GFP_KERNEL);
if (!wqs_handles)
return -ENOMEM;
- err = ib_copy_from_udata(wqs_handles, ucore,
- num_wq_handles * sizeof(__u32));
+ err = uverbs_request_next(&iter, wqs_handles,
+ num_wq_handles * sizeof(__u32));
+ if (err)
+ goto err_free;
+
+ err = uverbs_request_finish(&iter);
if (err)
goto err_free;
@@ -3367,7 +3068,7 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
for (num_read_wqs = 0; num_read_wqs < num_wq_handles;
num_read_wqs++) {
wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ,
- wqs_handles[num_read_wqs], file);
+ wqs_handles[num_read_wqs], attrs);
if (!wq) {
err = -EINVAL;
goto put_wqs;
@@ -3376,7 +3077,7 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
wqs[num_read_wqs] = wq;
}
- uobj = uobj_alloc(UVERBS_OBJECT_RWQ_IND_TBL, file, &ib_dev);
+ uobj = uobj_alloc(UVERBS_OBJECT_RWQ_IND_TBL, attrs, &ib_dev);
if (IS_ERR(uobj)) {
err = PTR_ERR(uobj);
goto put_wqs;
@@ -3385,11 +3086,8 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
init_attr.log_ind_tbl_size = cmd.log_ind_tbl_size;
init_attr.ind_tbl = wqs;
- if (!ib_dev->create_rwq_ind_table) {
- err = -EOPNOTSUPP;
- goto err_uobj;
- }
- rwq_ind_tbl = ib_dev->create_rwq_ind_table(ib_dev, &init_attr, uhw);
+ rwq_ind_tbl = ib_dev->ops.create_rwq_ind_table(ib_dev, &init_attr,
+ &attrs->driver_udata);
if (IS_ERR(rwq_ind_tbl)) {
err = PTR_ERR(rwq_ind_tbl);
@@ -3408,10 +3106,9 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
resp.ind_tbl_handle = uobj->id;
resp.ind_tbl_num = rwq_ind_tbl->ind_tbl_num;
- resp.response_length = required_resp_len;
+ resp.response_length = uverbs_response_length(attrs, sizeof(resp));
- err = ib_copy_to_udata(ucore,
- &resp, resp.response_length);
+ err = uverbs_response(attrs, &resp, sizeof(resp));
if (err)
goto err_copy;
@@ -3420,7 +3117,7 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
for (j = 0; j < num_read_wqs; j++)
uobj_put_obj_read(wqs[j]);
- return uobj_alloc_commit(uobj, 0);
+ return uobj_alloc_commit(uobj);
err_copy:
ib_destroy_rwq_ind_table(rwq_ind_tbl);
@@ -3435,25 +3132,12 @@ err_free:
return err;
}
-int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file,
- struct ib_udata *ucore,
- struct ib_udata *uhw)
+static int ib_uverbs_ex_destroy_rwq_ind_table(struct uverbs_attr_bundle *attrs)
{
- struct ib_uverbs_ex_destroy_rwq_ind_table cmd = {};
- int ret;
- size_t required_cmd_sz;
-
- required_cmd_sz = offsetof(typeof(cmd), ind_tbl_handle) + sizeof(cmd.ind_tbl_handle);
-
- if (ucore->inlen < required_cmd_sz)
- return -EINVAL;
-
- if (ucore->inlen > sizeof(cmd) &&
- !ib_is_udata_cleared(ucore, sizeof(cmd),
- ucore->inlen - sizeof(cmd)))
- return -EOPNOTSUPP;
+ struct ib_uverbs_ex_destroy_rwq_ind_table cmd;
+ int ret;
- ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
if (ret)
return ret;
@@ -3461,12 +3145,10 @@ int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file,
return -EOPNOTSUPP;
return uobj_perform_destroy(UVERBS_OBJECT_RWQ_IND_TBL,
- cmd.ind_tbl_handle, file, 0);
+ cmd.ind_tbl_handle, attrs);
}
-int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
- struct ib_udata *ucore,
- struct ib_udata *uhw)
+static int ib_uverbs_ex_create_flow(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_create_flow cmd;
struct ib_uverbs_create_flow_resp resp;
@@ -3477,24 +3159,16 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
struct ib_qp *qp;
struct ib_uflow_resources *uflow_res;
struct ib_uverbs_flow_spec_hdr *kern_spec;
- int err = 0;
+ struct uverbs_req_iter iter;
+ int err;
void *ib_spec;
int i;
struct ib_device *ib_dev;
- if (ucore->inlen < sizeof(cmd))
- return -EINVAL;
-
- if (ucore->outlen < sizeof(resp))
- return -ENOSPC;
-
- err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
+ err = uverbs_request_start(attrs, &iter, &cmd, sizeof(cmd));
if (err)
return err;
- ucore->inbuf += sizeof(cmd);
- ucore->inlen -= sizeof(cmd);
-
if (cmd.comp_mask)
return -EINVAL;
@@ -3512,8 +3186,7 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
if (cmd.flow_attr.num_of_specs > IB_FLOW_SPEC_SUPPORT_LAYERS)
return -EINVAL;
- if (cmd.flow_attr.size > ucore->inlen ||
- cmd.flow_attr.size >
+ if (cmd.flow_attr.size >
(cmd.flow_attr.num_of_specs * sizeof(struct ib_uverbs_flow_spec)))
return -EINVAL;
@@ -3528,21 +3201,25 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
return -ENOMEM;
*kern_flow_attr = cmd.flow_attr;
- err = ib_copy_from_udata(&kern_flow_attr->flow_specs, ucore,
- cmd.flow_attr.size);
+ err = uverbs_request_next(&iter, &kern_flow_attr->flow_specs,
+ cmd.flow_attr.size);
if (err)
goto err_free_attr;
} else {
kern_flow_attr = &cmd.flow_attr;
}
- uobj = uobj_alloc(UVERBS_OBJECT_FLOW, file, &ib_dev);
+ err = uverbs_request_finish(&iter);
+ if (err)
+ goto err_free_attr;
+
+ uobj = uobj_alloc(UVERBS_OBJECT_FLOW, attrs, &ib_dev);
if (IS_ERR(uobj)) {
err = PTR_ERR(uobj);
goto err_free_attr;
}
- qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file);
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs);
if (!qp) {
err = -EINVAL;
goto err_uobj;
@@ -3553,11 +3230,6 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
goto err_put;
}
- if (!qp->device->create_flow) {
- err = -EOPNOTSUPP;
- goto err_put;
- }
-
flow_attr = kzalloc(struct_size(flow_attr, flows,
cmd.flow_attr.num_of_specs), GFP_KERNEL);
if (!flow_attr) {
@@ -3584,7 +3256,7 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
cmd.flow_attr.size >= kern_spec->size;
i++) {
err = kern_spec_to_ib_spec(
- file, (struct ib_uverbs_flow_spec *)kern_spec,
+ attrs, (struct ib_uverbs_flow_spec *)kern_spec,
ib_spec, uflow_res);
if (err)
goto err_free;
@@ -3602,8 +3274,8 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
goto err_free;
}
- flow_id = qp->device->create_flow(qp, flow_attr,
- IB_FLOW_DOMAIN_USER, uhw);
+ flow_id = qp->device->ops.create_flow(
+ qp, flow_attr, IB_FLOW_DOMAIN_USER, &attrs->driver_udata);
if (IS_ERR(flow_id)) {
err = PTR_ERR(flow_id);
@@ -3615,8 +3287,7 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
memset(&resp, 0, sizeof(resp));
resp.flow_handle = uobj->id;
- err = ib_copy_to_udata(ucore,
- &resp, sizeof(resp));
+ err = uverbs_response(attrs, &resp, sizeof(resp));
if (err)
goto err_copy;
@@ -3624,9 +3295,9 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
kfree(flow_attr);
if (cmd.flow_attr.num_of_specs)
kfree(kern_flow_attr);
- return uobj_alloc_commit(uobj, 0);
+ return uobj_alloc_commit(uobj);
err_copy:
- if (!qp->device->destroy_flow(flow_id))
+ if (!qp->device->ops.destroy_flow(flow_id))
atomic_dec(&qp->usecnt);
err_free:
ib_uverbs_flow_resources_free(uflow_res);
@@ -3642,28 +3313,22 @@ err_free_attr:
return err;
}
-int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file,
- struct ib_udata *ucore,
- struct ib_udata *uhw)
+static int ib_uverbs_ex_destroy_flow(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_destroy_flow cmd;
int ret;
- if (ucore->inlen < sizeof(cmd))
- return -EINVAL;
-
- ret = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
if (ret)
return ret;
if (cmd.comp_mask)
return -EINVAL;
- return uobj_perform_destroy(UVERBS_OBJECT_FLOW, cmd.flow_handle, file,
- 0);
+ return uobj_perform_destroy(UVERBS_OBJECT_FLOW, cmd.flow_handle, attrs);
}
-static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
+static int __uverbs_create_xsrq(struct uverbs_attr_bundle *attrs,
struct ib_uverbs_create_xsrq *cmd,
struct ib_udata *udata)
{
@@ -3676,7 +3341,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
int ret;
struct ib_device *ib_dev;
- obj = (struct ib_usrq_object *)uobj_alloc(UVERBS_OBJECT_SRQ, file,
+ obj = (struct ib_usrq_object *)uobj_alloc(UVERBS_OBJECT_SRQ, attrs,
&ib_dev);
if (IS_ERR(obj))
return PTR_ERR(obj);
@@ -3686,7 +3351,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
if (cmd->srq_type == IB_SRQT_XRC) {
xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd->xrcd_handle,
- file);
+ attrs);
if (IS_ERR(xrcd_uobj)) {
ret = -EINVAL;
goto err;
@@ -3704,21 +3369,21 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
if (ib_srq_has_cq(cmd->srq_type)) {
attr.ext.cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ,
- cmd->cq_handle, file);
+ cmd->cq_handle, attrs);
if (!attr.ext.cq) {
ret = -EINVAL;
goto err_put_xrcd;
}
}
- pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, file);
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, attrs);
if (!pd) {
ret = -EINVAL;
goto err_put_cq;
}
attr.event_handler = ib_uverbs_srq_event_handler;
- attr.srq_context = file;
+ attr.srq_context = attrs->ufile;
attr.srq_type = cmd->srq_type;
attr.attr.max_wr = cmd->max_wr;
attr.attr.max_sge = cmd->max_sge;
@@ -3727,7 +3392,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
obj->uevent.events_reported = 0;
INIT_LIST_HEAD(&obj->uevent.event_list);
- srq = pd->device->create_srq(pd, &attr, udata);
+ srq = pd->device->ops.create_srq(pd, &attr, udata);
if (IS_ERR(srq)) {
ret = PTR_ERR(srq);
goto err_put;
@@ -3763,11 +3428,9 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
if (cmd->srq_type == IB_SRQT_XRC)
resp.srqn = srq->ext.xrc.srq_num;
- if (copy_to_user(u64_to_user_ptr(cmd->response),
- &resp, sizeof resp)) {
- ret = -EFAULT;
+ ret = uverbs_response(attrs, &resp, sizeof(resp));
+ if (ret)
goto err_copy;
- }
if (cmd->srq_type == IB_SRQT_XRC)
uobj_put_read(xrcd_uobj);
@@ -3776,7 +3439,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
uobj_put_obj_read(attr.ext.cq);
uobj_put_obj_read(pd);
- return uobj_alloc_commit(&obj->uevent.uobject, 0);
+ return uobj_alloc_commit(&obj->uevent.uobject);
err_copy:
ib_destroy_srq(srq);
@@ -3799,21 +3462,15 @@ err:
return ret;
}
-ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_create_srq(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_create_srq cmd;
struct ib_uverbs_create_xsrq xcmd;
- struct ib_uverbs_create_srq_resp resp;
- struct ib_udata udata;
int ret;
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
memset(&xcmd, 0, sizeof(xcmd));
xcmd.response = cmd.response;
@@ -3824,77 +3481,48 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
xcmd.max_sge = cmd.max_sge;
xcmd.srq_limit = cmd.srq_limit;
- ib_uverbs_init_udata(&udata, buf + sizeof(cmd),
- u64_to_user_ptr(cmd.response) + sizeof(resp),
- in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
- out_len - sizeof(resp));
-
- ret = __uverbs_create_xsrq(file, &xcmd, &udata);
- if (ret)
- return ret;
-
- return in_len;
+ return __uverbs_create_xsrq(attrs, &xcmd, &attrs->driver_udata);
}
-ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file,
- const char __user *buf, int in_len, int out_len)
+static int ib_uverbs_create_xsrq(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_create_xsrq cmd;
- struct ib_uverbs_create_srq_resp resp;
- struct ib_udata udata;
int ret;
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- ib_uverbs_init_udata(&udata, buf + sizeof(cmd),
- u64_to_user_ptr(cmd.response) + sizeof(resp),
- in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
- out_len - sizeof(resp));
-
- ret = __uverbs_create_xsrq(file, &cmd, &udata);
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
if (ret)
return ret;
- return in_len;
+ return __uverbs_create_xsrq(attrs, &cmd, &attrs->driver_udata);
}
-ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_modify_srq(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_modify_srq cmd;
- struct ib_udata udata;
struct ib_srq *srq;
struct ib_srq_attr attr;
int ret;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- ib_uverbs_init_udata(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd,
- out_len);
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file);
+ srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, attrs);
if (!srq)
return -EINVAL;
attr.max_wr = cmd.max_wr;
attr.srq_limit = cmd.srq_limit;
- ret = srq->device->modify_srq(srq, &attr, cmd.attr_mask, &udata);
+ ret = srq->device->ops.modify_srq(srq, &attr, cmd.attr_mask,
+ &attrs->driver_udata);
uobj_put_obj_read(srq);
- return ret ? ret : in_len;
+ return ret;
}
-ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file,
- const char __user *buf,
- int in_len, int out_len)
+static int ib_uverbs_query_srq(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_query_srq cmd;
struct ib_uverbs_query_srq_resp resp;
@@ -3902,13 +3530,11 @@ ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file,
struct ib_srq *srq;
int ret;
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file);
+ srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, attrs);
if (!srq)
return -EINVAL;
@@ -3925,25 +3551,22 @@ ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file,
resp.max_sge = attr.max_sge;
resp.srq_limit = attr.srq_limit;
- if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp))
- return -EFAULT;
-
- return in_len;
+ return uverbs_response(attrs, &resp, sizeof(resp));
}
-ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_destroy_srq(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_destroy_srq cmd;
struct ib_uverbs_destroy_srq_resp resp;
struct ib_uobject *uobj;
struct ib_uevent_object *obj;
+ int ret;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- uobj = uobj_get_destroy(UVERBS_OBJECT_SRQ, cmd.srq_handle, file);
+ uobj = uobj_get_destroy(UVERBS_OBJECT_SRQ, cmd.srq_handle, attrs);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -3953,35 +3576,24 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
uobj_put_destroy(uobj);
- if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof(resp)))
- return -EFAULT;
-
- return in_len;
+ return uverbs_response(attrs, &resp, sizeof(resp));
}
-int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
- struct ib_udata *ucore,
- struct ib_udata *uhw)
+static int ib_uverbs_ex_query_device(struct uverbs_attr_bundle *attrs)
{
- struct ib_uverbs_ex_query_device_resp resp = { {0} };
+ struct ib_uverbs_ex_query_device_resp resp = {};
struct ib_uverbs_ex_query_device cmd;
struct ib_device_attr attr = {0};
struct ib_ucontext *ucontext;
struct ib_device *ib_dev;
int err;
- ucontext = ib_uverbs_get_ucontext(file);
+ ucontext = ib_uverbs_get_ucontext(attrs);
if (IS_ERR(ucontext))
return PTR_ERR(ucontext);
ib_dev = ucontext->device;
- if (!ib_dev->query_device)
- return -EOPNOTSUPP;
-
- if (ucore->inlen < sizeof(cmd))
- return -EINVAL;
-
- err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
+ err = uverbs_request(attrs, &cmd, sizeof(cmd));
if (err)
return err;
@@ -3991,20 +3603,12 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
if (cmd.reserved)
return -EINVAL;
- resp.response_length = offsetof(typeof(resp), odp_caps);
-
- if (ucore->outlen < resp.response_length)
- return -ENOSPC;
-
- err = ib_dev->query_device(ib_dev, &attr, uhw);
+ err = ib_dev->ops.query_device(ib_dev, &attr, &attrs->driver_udata);
if (err)
return err;
copy_query_dev_fields(ucontext, &resp.base, &attr);
- if (ucore->outlen < resp.response_length + sizeof(resp.odp_caps))
- goto end;
-
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
resp.odp_caps.general_caps = attr.odp_caps.general_caps;
resp.odp_caps.per_transport_caps.rc_odp_caps =
@@ -4014,99 +3618,39 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
resp.odp_caps.per_transport_caps.ud_odp_caps =
attr.odp_caps.per_transport_caps.ud_odp_caps;
#endif
- resp.response_length += sizeof(resp.odp_caps);
-
- if (ucore->outlen < resp.response_length + sizeof(resp.timestamp_mask))
- goto end;
resp.timestamp_mask = attr.timestamp_mask;
- resp.response_length += sizeof(resp.timestamp_mask);
-
- if (ucore->outlen < resp.response_length + sizeof(resp.hca_core_clock))
- goto end;
-
resp.hca_core_clock = attr.hca_core_clock;
- resp.response_length += sizeof(resp.hca_core_clock);
-
- if (ucore->outlen < resp.response_length + sizeof(resp.device_cap_flags_ex))
- goto end;
-
resp.device_cap_flags_ex = attr.device_cap_flags;
- resp.response_length += sizeof(resp.device_cap_flags_ex);
-
- if (ucore->outlen < resp.response_length + sizeof(resp.rss_caps))
- goto end;
-
resp.rss_caps.supported_qpts = attr.rss_caps.supported_qpts;
resp.rss_caps.max_rwq_indirection_tables =
attr.rss_caps.max_rwq_indirection_tables;
resp.rss_caps.max_rwq_indirection_table_size =
attr.rss_caps.max_rwq_indirection_table_size;
-
- resp.response_length += sizeof(resp.rss_caps);
-
- if (ucore->outlen < resp.response_length + sizeof(resp.max_wq_type_rq))
- goto end;
-
resp.max_wq_type_rq = attr.max_wq_type_rq;
- resp.response_length += sizeof(resp.max_wq_type_rq);
-
- if (ucore->outlen < resp.response_length + sizeof(resp.raw_packet_caps))
- goto end;
-
resp.raw_packet_caps = attr.raw_packet_caps;
- resp.response_length += sizeof(resp.raw_packet_caps);
-
- if (ucore->outlen < resp.response_length + sizeof(resp.tm_caps))
- goto end;
-
resp.tm_caps.max_rndv_hdr_size = attr.tm_caps.max_rndv_hdr_size;
resp.tm_caps.max_num_tags = attr.tm_caps.max_num_tags;
resp.tm_caps.max_ops = attr.tm_caps.max_ops;
resp.tm_caps.max_sge = attr.tm_caps.max_sge;
resp.tm_caps.flags = attr.tm_caps.flags;
- resp.response_length += sizeof(resp.tm_caps);
-
- if (ucore->outlen < resp.response_length + sizeof(resp.cq_moderation_caps))
- goto end;
-
resp.cq_moderation_caps.max_cq_moderation_count =
attr.cq_caps.max_cq_moderation_count;
resp.cq_moderation_caps.max_cq_moderation_period =
attr.cq_caps.max_cq_moderation_period;
- resp.response_length += sizeof(resp.cq_moderation_caps);
-
- if (ucore->outlen < resp.response_length + sizeof(resp.max_dm_size))
- goto end;
-
resp.max_dm_size = attr.max_dm_size;
- resp.response_length += sizeof(resp.max_dm_size);
-end:
- err = ib_copy_to_udata(ucore, &resp, resp.response_length);
- return err;
+ resp.response_length = uverbs_response_length(attrs, sizeof(resp));
+
+ return uverbs_response(attrs, &resp, sizeof(resp));
}
-int ib_uverbs_ex_modify_cq(struct ib_uverbs_file *file,
- struct ib_udata *ucore,
- struct ib_udata *uhw)
+static int ib_uverbs_ex_modify_cq(struct uverbs_attr_bundle *attrs)
{
- struct ib_uverbs_ex_modify_cq cmd = {};
+ struct ib_uverbs_ex_modify_cq cmd;
struct ib_cq *cq;
- size_t required_cmd_sz;
int ret;
- required_cmd_sz = offsetof(typeof(cmd), reserved) +
- sizeof(cmd.reserved);
- if (ucore->inlen < required_cmd_sz)
- return -EINVAL;
-
- /* sanity checks */
- if (ucore->inlen > sizeof(cmd) &&
- !ib_is_udata_cleared(ucore, sizeof(cmd),
- ucore->inlen - sizeof(cmd)))
- return -EOPNOTSUPP;
-
- ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
if (ret)
return ret;
@@ -4116,7 +3660,7 @@ int ib_uverbs_ex_modify_cq(struct ib_uverbs_file *file,
if (cmd.attr_mask > IB_CQ_MODERATE)
return -EOPNOTSUPP;
- cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file);
+ cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs);
if (!cq)
return -EINVAL;
@@ -4126,3 +3670,381 @@ int ib_uverbs_ex_modify_cq(struct ib_uverbs_file *file,
return ret;
}
+
+/*
+ * Describe the input structs for write(). Some write methods have an input
+ * only struct, most have an input and output. If the struct has an output then
+ * the 'response' u64 must be the first field in the request structure.
+ *
+ * If udata is present then both the request and response structs have a
+ * trailing driver_data flex array. In this case the size of the base struct
+ * cannot be changed.
+ */
+#define offsetof_after(_struct, _member) \
+ (offsetof(_struct, _member) + sizeof(((_struct *)NULL)->_member))
+
+#define UAPI_DEF_WRITE_IO(req, resp) \
+ .write.has_resp = 1 + \
+ BUILD_BUG_ON_ZERO(offsetof(req, response) != 0) + \
+ BUILD_BUG_ON_ZERO(sizeof(((req *)0)->response) != \
+ sizeof(u64)), \
+ .write.req_size = sizeof(req), .write.resp_size = sizeof(resp)
+
+#define UAPI_DEF_WRITE_I(req) .write.req_size = sizeof(req)
+
+#define UAPI_DEF_WRITE_UDATA_IO(req, resp) \
+ UAPI_DEF_WRITE_IO(req, resp), \
+ .write.has_udata = \
+ 1 + \
+ BUILD_BUG_ON_ZERO(offsetof(req, driver_data) != \
+ sizeof(req)) + \
+ BUILD_BUG_ON_ZERO(offsetof(resp, driver_data) != \
+ sizeof(resp))
+
+#define UAPI_DEF_WRITE_UDATA_I(req) \
+ UAPI_DEF_WRITE_I(req), \
+ .write.has_udata = \
+ 1 + BUILD_BUG_ON_ZERO(offsetof(req, driver_data) != \
+ sizeof(req))
+
+/*
+ * The _EX versions are for use with WRITE_EX and allow the last struct member
+ * to be specified. Buffers that do not include that member will be rejected.
+ */
+#define UAPI_DEF_WRITE_IO_EX(req, req_last_member, resp, resp_last_member) \
+ .write.has_resp = 1, \
+ .write.req_size = offsetof_after(req, req_last_member), \
+ .write.resp_size = offsetof_after(resp, resp_last_member)
+
+#define UAPI_DEF_WRITE_I_EX(req, req_last_member) \
+ .write.req_size = offsetof_after(req, req_last_member)
+
+const struct uapi_definition uverbs_def_write_intf[] = {
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_AH,
+ DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_CREATE_AH,
+ ib_uverbs_create_ah,
+ UAPI_DEF_WRITE_UDATA_IO(
+ struct ib_uverbs_create_ah,
+ struct ib_uverbs_create_ah_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(create_ah)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_DESTROY_AH,
+ ib_uverbs_destroy_ah,
+ UAPI_DEF_WRITE_I(struct ib_uverbs_destroy_ah),
+ UAPI_DEF_METHOD_NEEDS_FN(destroy_ah))),
+
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_COMP_CHANNEL,
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL,
+ ib_uverbs_create_comp_channel,
+ UAPI_DEF_WRITE_IO(
+ struct ib_uverbs_create_comp_channel,
+ struct ib_uverbs_create_comp_channel_resp))),
+
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_CQ,
+ DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_CREATE_CQ,
+ ib_uverbs_create_cq,
+ UAPI_DEF_WRITE_UDATA_IO(
+ struct ib_uverbs_create_cq,
+ struct ib_uverbs_create_cq_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(create_cq)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_DESTROY_CQ,
+ ib_uverbs_destroy_cq,
+ UAPI_DEF_WRITE_IO(struct ib_uverbs_destroy_cq,
+ struct ib_uverbs_destroy_cq_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(destroy_cq)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_POLL_CQ,
+ ib_uverbs_poll_cq,
+ UAPI_DEF_WRITE_IO(struct ib_uverbs_poll_cq,
+ struct ib_uverbs_poll_cq_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(poll_cq)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_REQ_NOTIFY_CQ,
+ ib_uverbs_req_notify_cq,
+ UAPI_DEF_WRITE_I(struct ib_uverbs_req_notify_cq),
+ UAPI_DEF_METHOD_NEEDS_FN(req_notify_cq)),
+ DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_RESIZE_CQ,
+ ib_uverbs_resize_cq,
+ UAPI_DEF_WRITE_UDATA_IO(
+ struct ib_uverbs_resize_cq,
+ struct ib_uverbs_resize_cq_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(resize_cq)),
+ DECLARE_UVERBS_WRITE_EX(
+ IB_USER_VERBS_EX_CMD_CREATE_CQ,
+ ib_uverbs_ex_create_cq,
+ UAPI_DEF_WRITE_IO_EX(struct ib_uverbs_ex_create_cq,
+ reserved,
+ struct ib_uverbs_ex_create_cq_resp,
+ response_length),
+ UAPI_DEF_METHOD_NEEDS_FN(create_cq)),
+ DECLARE_UVERBS_WRITE_EX(
+ IB_USER_VERBS_EX_CMD_MODIFY_CQ,
+ ib_uverbs_ex_modify_cq,
+ UAPI_DEF_WRITE_I(struct ib_uverbs_ex_modify_cq),
+ UAPI_DEF_METHOD_NEEDS_FN(create_cq))),
+
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_DEVICE,
+ DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_GET_CONTEXT,
+ ib_uverbs_get_context,
+ UAPI_DEF_WRITE_UDATA_IO(
+ struct ib_uverbs_get_context,
+ struct ib_uverbs_get_context_resp)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_QUERY_DEVICE,
+ ib_uverbs_query_device,
+ UAPI_DEF_WRITE_IO(struct ib_uverbs_query_device,
+ struct ib_uverbs_query_device_resp)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_QUERY_PORT,
+ ib_uverbs_query_port,
+ UAPI_DEF_WRITE_IO(struct ib_uverbs_query_port,
+ struct ib_uverbs_query_port_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(query_port)),
+ DECLARE_UVERBS_WRITE_EX(
+ IB_USER_VERBS_EX_CMD_QUERY_DEVICE,
+ ib_uverbs_ex_query_device,
+ UAPI_DEF_WRITE_IO_EX(
+ struct ib_uverbs_ex_query_device,
+ reserved,
+ struct ib_uverbs_ex_query_device_resp,
+ response_length),
+ UAPI_DEF_METHOD_NEEDS_FN(query_device)),
+ UAPI_DEF_OBJ_NEEDS_FN(alloc_ucontext),
+ UAPI_DEF_OBJ_NEEDS_FN(dealloc_ucontext)),
+
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_FLOW,
+ DECLARE_UVERBS_WRITE_EX(
+ IB_USER_VERBS_EX_CMD_CREATE_FLOW,
+ ib_uverbs_ex_create_flow,
+ UAPI_DEF_WRITE_IO_EX(struct ib_uverbs_create_flow,
+ flow_attr,
+ struct ib_uverbs_create_flow_resp,
+ flow_handle),
+ UAPI_DEF_METHOD_NEEDS_FN(create_flow)),
+ DECLARE_UVERBS_WRITE_EX(
+ IB_USER_VERBS_EX_CMD_DESTROY_FLOW,
+ ib_uverbs_ex_destroy_flow,
+ UAPI_DEF_WRITE_I(struct ib_uverbs_destroy_flow),
+ UAPI_DEF_METHOD_NEEDS_FN(destroy_flow))),
+
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_MR,
+ DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_DEREG_MR,
+ ib_uverbs_dereg_mr,
+ UAPI_DEF_WRITE_I(struct ib_uverbs_dereg_mr),
+ UAPI_DEF_METHOD_NEEDS_FN(dereg_mr)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_REG_MR,
+ ib_uverbs_reg_mr,
+ UAPI_DEF_WRITE_UDATA_IO(struct ib_uverbs_reg_mr,
+ struct ib_uverbs_reg_mr_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(reg_user_mr)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_REREG_MR,
+ ib_uverbs_rereg_mr,
+ UAPI_DEF_WRITE_UDATA_IO(struct ib_uverbs_rereg_mr,
+ struct ib_uverbs_rereg_mr_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(rereg_user_mr))),
+
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_MW,
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_ALLOC_MW,
+ ib_uverbs_alloc_mw,
+ UAPI_DEF_WRITE_UDATA_IO(struct ib_uverbs_alloc_mw,
+ struct ib_uverbs_alloc_mw_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(alloc_mw)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_DEALLOC_MW,
+ ib_uverbs_dealloc_mw,
+ UAPI_DEF_WRITE_I(struct ib_uverbs_dealloc_mw),
+ UAPI_DEF_METHOD_NEEDS_FN(dealloc_mw))),
+
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_PD,
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_ALLOC_PD,
+ ib_uverbs_alloc_pd,
+ UAPI_DEF_WRITE_UDATA_IO(struct ib_uverbs_alloc_pd,
+ struct ib_uverbs_alloc_pd_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(alloc_pd)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_DEALLOC_PD,
+ ib_uverbs_dealloc_pd,
+ UAPI_DEF_WRITE_I(struct ib_uverbs_dealloc_pd),
+ UAPI_DEF_METHOD_NEEDS_FN(dealloc_pd))),
+
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_QP,
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_ATTACH_MCAST,
+ ib_uverbs_attach_mcast,
+ UAPI_DEF_WRITE_I(struct ib_uverbs_attach_mcast),
+ UAPI_DEF_METHOD_NEEDS_FN(attach_mcast),
+ UAPI_DEF_METHOD_NEEDS_FN(detach_mcast)),
+ DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_CREATE_QP,
+ ib_uverbs_create_qp,
+ UAPI_DEF_WRITE_UDATA_IO(
+ struct ib_uverbs_create_qp,
+ struct ib_uverbs_create_qp_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(create_qp)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_DESTROY_QP,
+ ib_uverbs_destroy_qp,
+ UAPI_DEF_WRITE_IO(struct ib_uverbs_destroy_qp,
+ struct ib_uverbs_destroy_qp_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(destroy_qp)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_DETACH_MCAST,
+ ib_uverbs_detach_mcast,
+ UAPI_DEF_WRITE_I(struct ib_uverbs_detach_mcast),
+ UAPI_DEF_METHOD_NEEDS_FN(detach_mcast)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_MODIFY_QP,
+ ib_uverbs_modify_qp,
+ UAPI_DEF_WRITE_I(struct ib_uverbs_modify_qp),
+ UAPI_DEF_METHOD_NEEDS_FN(modify_qp)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_POST_RECV,
+ ib_uverbs_post_recv,
+ UAPI_DEF_WRITE_IO(struct ib_uverbs_post_recv,
+ struct ib_uverbs_post_recv_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(post_recv)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_POST_SEND,
+ ib_uverbs_post_send,
+ UAPI_DEF_WRITE_IO(struct ib_uverbs_post_send,
+ struct ib_uverbs_post_send_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(post_send)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_QUERY_QP,
+ ib_uverbs_query_qp,
+ UAPI_DEF_WRITE_IO(struct ib_uverbs_query_qp,
+ struct ib_uverbs_query_qp_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(query_qp)),
+ DECLARE_UVERBS_WRITE_EX(
+ IB_USER_VERBS_EX_CMD_CREATE_QP,
+ ib_uverbs_ex_create_qp,
+ UAPI_DEF_WRITE_IO_EX(struct ib_uverbs_ex_create_qp,
+ comp_mask,
+ struct ib_uverbs_ex_create_qp_resp,
+ response_length),
+ UAPI_DEF_METHOD_NEEDS_FN(create_qp)),
+ DECLARE_UVERBS_WRITE_EX(
+ IB_USER_VERBS_EX_CMD_MODIFY_QP,
+ ib_uverbs_ex_modify_qp,
+ UAPI_DEF_WRITE_IO_EX(struct ib_uverbs_ex_modify_qp,
+ base,
+ struct ib_uverbs_ex_modify_qp_resp,
+ response_length),
+ UAPI_DEF_METHOD_NEEDS_FN(modify_qp))),
+
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_RWQ_IND_TBL,
+ DECLARE_UVERBS_WRITE_EX(
+ IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL,
+ ib_uverbs_ex_create_rwq_ind_table,
+ UAPI_DEF_WRITE_IO_EX(
+ struct ib_uverbs_ex_create_rwq_ind_table,
+ log_ind_tbl_size,
+ struct ib_uverbs_ex_create_rwq_ind_table_resp,
+ ind_tbl_num),
+ UAPI_DEF_METHOD_NEEDS_FN(create_rwq_ind_table)),
+ DECLARE_UVERBS_WRITE_EX(
+ IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL,
+ ib_uverbs_ex_destroy_rwq_ind_table,
+ UAPI_DEF_WRITE_I(
+ struct ib_uverbs_ex_destroy_rwq_ind_table),
+ UAPI_DEF_METHOD_NEEDS_FN(destroy_rwq_ind_table))),
+
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_WQ,
+ DECLARE_UVERBS_WRITE_EX(
+ IB_USER_VERBS_EX_CMD_CREATE_WQ,
+ ib_uverbs_ex_create_wq,
+ UAPI_DEF_WRITE_IO_EX(struct ib_uverbs_ex_create_wq,
+ max_sge,
+ struct ib_uverbs_ex_create_wq_resp,
+ wqn),
+ UAPI_DEF_METHOD_NEEDS_FN(create_wq)),
+ DECLARE_UVERBS_WRITE_EX(
+ IB_USER_VERBS_EX_CMD_DESTROY_WQ,
+ ib_uverbs_ex_destroy_wq,
+ UAPI_DEF_WRITE_IO_EX(struct ib_uverbs_ex_destroy_wq,
+ wq_handle,
+ struct ib_uverbs_ex_destroy_wq_resp,
+ reserved),
+ UAPI_DEF_METHOD_NEEDS_FN(destroy_wq)),
+ DECLARE_UVERBS_WRITE_EX(
+ IB_USER_VERBS_EX_CMD_MODIFY_WQ,
+ ib_uverbs_ex_modify_wq,
+ UAPI_DEF_WRITE_I_EX(struct ib_uverbs_ex_modify_wq,
+ curr_wq_state),
+ UAPI_DEF_METHOD_NEEDS_FN(modify_wq))),
+
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_SRQ,
+ DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_CREATE_SRQ,
+ ib_uverbs_create_srq,
+ UAPI_DEF_WRITE_UDATA_IO(
+ struct ib_uverbs_create_srq,
+ struct ib_uverbs_create_srq_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(create_srq)),
+ DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_CREATE_XSRQ,
+ ib_uverbs_create_xsrq,
+ UAPI_DEF_WRITE_UDATA_IO(
+ struct ib_uverbs_create_xsrq,
+ struct ib_uverbs_create_srq_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(create_srq)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_DESTROY_SRQ,
+ ib_uverbs_destroy_srq,
+ UAPI_DEF_WRITE_IO(struct ib_uverbs_destroy_srq,
+ struct ib_uverbs_destroy_srq_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(destroy_srq)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_MODIFY_SRQ,
+ ib_uverbs_modify_srq,
+ UAPI_DEF_WRITE_UDATA_I(struct ib_uverbs_modify_srq),
+ UAPI_DEF_METHOD_NEEDS_FN(modify_srq)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_POST_SRQ_RECV,
+ ib_uverbs_post_srq_recv,
+ UAPI_DEF_WRITE_IO(struct ib_uverbs_post_srq_recv,
+ struct ib_uverbs_post_srq_recv_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(post_srq_recv)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_QUERY_SRQ,
+ ib_uverbs_query_srq,
+ UAPI_DEF_WRITE_IO(struct ib_uverbs_query_srq,
+ struct ib_uverbs_query_srq_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(query_srq))),
+
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_XRCD,
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_CLOSE_XRCD,
+ ib_uverbs_close_xrcd,
+ UAPI_DEF_WRITE_I(struct ib_uverbs_close_xrcd),
+ UAPI_DEF_METHOD_NEEDS_FN(dealloc_xrcd)),
+ DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_OPEN_QP,
+ ib_uverbs_open_qp,
+ UAPI_DEF_WRITE_UDATA_IO(
+ struct ib_uverbs_open_qp,
+ struct ib_uverbs_create_qp_resp)),
+ DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_OPEN_XRCD,
+ ib_uverbs_open_xrcd,
+ UAPI_DEF_WRITE_UDATA_IO(
+ struct ib_uverbs_open_xrcd,
+ struct ib_uverbs_open_xrcd_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(alloc_xrcd))),
+
+ {},
+};
diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c
index b0e493e8d860..8c81ff698052 100644
--- a/drivers/infiniband/core/uverbs_ioctl.c
+++ b/drivers/infiniband/core/uverbs_ioctl.c
@@ -404,8 +404,7 @@ static int uverbs_set_attr(struct bundle_priv *pbundle,
static int ib_uverbs_run_method(struct bundle_priv *pbundle,
unsigned int num_attrs)
{
- int (*handler)(struct ib_uverbs_file *ufile,
- struct uverbs_attr_bundle *ctx);
+ int (*handler)(struct uverbs_attr_bundle *attrs);
size_t uattrs_size = array_size(sizeof(*pbundle->uattrs), num_attrs);
unsigned int destroy_bkey = pbundle->method_elm->destroy_bkey;
unsigned int i;
@@ -436,6 +435,11 @@ static int ib_uverbs_run_method(struct bundle_priv *pbundle,
pbundle->method_elm->key_bitmap_len)))
return -EINVAL;
+ if (pbundle->method_elm->has_udata)
+ uverbs_fill_udata(&pbundle->bundle,
+ &pbundle->bundle.driver_udata,
+ UVERBS_ATTR_UHW_IN, UVERBS_ATTR_UHW_OUT);
+
if (destroy_bkey != UVERBS_API_ATTR_BKEY_LEN) {
struct uverbs_obj_attr *destroy_attr =
&pbundle->bundle.attrs[destroy_bkey].obj_attr;
@@ -445,10 +449,10 @@ static int ib_uverbs_run_method(struct bundle_priv *pbundle,
return ret;
__clear_bit(destroy_bkey, pbundle->uobj_finalize);
- ret = handler(pbundle->bundle.ufile, &pbundle->bundle);
+ ret = handler(&pbundle->bundle);
uobj_put_destroy(destroy_attr->uobject);
} else {
- ret = handler(pbundle->bundle.ufile, &pbundle->bundle);
+ ret = handler(&pbundle->bundle);
}
/*
@@ -662,35 +666,37 @@ int uverbs_get_flags32(u32 *to, const struct uverbs_attr_bundle *attrs_bundle,
EXPORT_SYMBOL(uverbs_get_flags32);
/*
- * This is for ease of conversion. The purpose is to convert all drivers to
- * use uverbs_attr_bundle instead of ib_udata. Assume attr == 0 is input and
- * attr == 1 is output.
+ * Fill a ib_udata struct (core or uhw) using the given attribute IDs.
+ * This is primarily used to convert the UVERBS_ATTR_UHW() into the
+ * ib_udata format used by the drivers.
*/
-void create_udata(struct uverbs_attr_bundle *bundle, struct ib_udata *udata)
+void uverbs_fill_udata(struct uverbs_attr_bundle *bundle,
+ struct ib_udata *udata, unsigned int attr_in,
+ unsigned int attr_out)
{
struct bundle_priv *pbundle =
container_of(bundle, struct bundle_priv, bundle);
- const struct uverbs_attr *uhw_in =
- uverbs_attr_get(bundle, UVERBS_ATTR_UHW_IN);
- const struct uverbs_attr *uhw_out =
- uverbs_attr_get(bundle, UVERBS_ATTR_UHW_OUT);
-
- if (!IS_ERR(uhw_in)) {
- udata->inlen = uhw_in->ptr_attr.len;
- if (uverbs_attr_ptr_is_inline(uhw_in))
+ const struct uverbs_attr *in =
+ uverbs_attr_get(&pbundle->bundle, attr_in);
+ const struct uverbs_attr *out =
+ uverbs_attr_get(&pbundle->bundle, attr_out);
+
+ if (!IS_ERR(in)) {
+ udata->inlen = in->ptr_attr.len;
+ if (uverbs_attr_ptr_is_inline(in))
udata->inbuf =
- &pbundle->user_attrs[uhw_in->ptr_attr.uattr_idx]
+ &pbundle->user_attrs[in->ptr_attr.uattr_idx]
.data;
else
- udata->inbuf = u64_to_user_ptr(uhw_in->ptr_attr.data);
+ udata->inbuf = u64_to_user_ptr(in->ptr_attr.data);
} else {
udata->inbuf = NULL;
udata->inlen = 0;
}
- if (!IS_ERR(uhw_out)) {
- udata->outbuf = u64_to_user_ptr(uhw_out->ptr_attr.data);
- udata->outlen = uhw_out->ptr_attr.len;
+ if (!IS_ERR(out)) {
+ udata->outbuf = u64_to_user_ptr(out->ptr_attr.data);
+ udata->outlen = out->ptr_attr.len;
} else {
udata->outbuf = NULL;
udata->outlen = 0;
@@ -745,3 +751,14 @@ int _uverbs_get_const(s64 *to, const struct uverbs_attr_bundle *attrs_bundle,
return 0;
}
EXPORT_SYMBOL(_uverbs_get_const);
+
+int uverbs_copy_to_struct_or_zero(const struct uverbs_attr_bundle *bundle,
+ size_t idx, const void *from, size_t size)
+{
+ const struct uverbs_attr *attr = uverbs_attr_get(bundle, idx);
+
+ if (clear_user(u64_to_user_ptr(attr->ptr_attr.data),
+ attr->ptr_attr.len))
+ return -EFAULT;
+ return uverbs_copy_to(bundle, idx, from, size);
+}
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 6d373f5515b7..9f9172eb1512 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -74,64 +74,6 @@ static dev_t dynamic_uverbs_dev;
static struct class *uverbs_class;
static DEFINE_IDA(uverbs_ida);
-
-static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len) = {
- [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context,
- [IB_USER_VERBS_CMD_QUERY_DEVICE] = ib_uverbs_query_device,
- [IB_USER_VERBS_CMD_QUERY_PORT] = ib_uverbs_query_port,
- [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd,
- [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd,
- [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr,
- [IB_USER_VERBS_CMD_REREG_MR] = ib_uverbs_rereg_mr,
- [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr,
- [IB_USER_VERBS_CMD_ALLOC_MW] = ib_uverbs_alloc_mw,
- [IB_USER_VERBS_CMD_DEALLOC_MW] = ib_uverbs_dealloc_mw,
- [IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel,
- [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq,
- [IB_USER_VERBS_CMD_RESIZE_CQ] = ib_uverbs_resize_cq,
- [IB_USER_VERBS_CMD_POLL_CQ] = ib_uverbs_poll_cq,
- [IB_USER_VERBS_CMD_REQ_NOTIFY_CQ] = ib_uverbs_req_notify_cq,
- [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq,
- [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp,
- [IB_USER_VERBS_CMD_QUERY_QP] = ib_uverbs_query_qp,
- [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp,
- [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp,
- [IB_USER_VERBS_CMD_POST_SEND] = ib_uverbs_post_send,
- [IB_USER_VERBS_CMD_POST_RECV] = ib_uverbs_post_recv,
- [IB_USER_VERBS_CMD_POST_SRQ_RECV] = ib_uverbs_post_srq_recv,
- [IB_USER_VERBS_CMD_CREATE_AH] = ib_uverbs_create_ah,
- [IB_USER_VERBS_CMD_DESTROY_AH] = ib_uverbs_destroy_ah,
- [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast,
- [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast,
- [IB_USER_VERBS_CMD_CREATE_SRQ] = ib_uverbs_create_srq,
- [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq,
- [IB_USER_VERBS_CMD_QUERY_SRQ] = ib_uverbs_query_srq,
- [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq,
- [IB_USER_VERBS_CMD_OPEN_XRCD] = ib_uverbs_open_xrcd,
- [IB_USER_VERBS_CMD_CLOSE_XRCD] = ib_uverbs_close_xrcd,
- [IB_USER_VERBS_CMD_CREATE_XSRQ] = ib_uverbs_create_xsrq,
- [IB_USER_VERBS_CMD_OPEN_QP] = ib_uverbs_open_qp,
-};
-
-static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file,
- struct ib_udata *ucore,
- struct ib_udata *uhw) = {
- [IB_USER_VERBS_EX_CMD_CREATE_FLOW] = ib_uverbs_ex_create_flow,
- [IB_USER_VERBS_EX_CMD_DESTROY_FLOW] = ib_uverbs_ex_destroy_flow,
- [IB_USER_VERBS_EX_CMD_QUERY_DEVICE] = ib_uverbs_ex_query_device,
- [IB_USER_VERBS_EX_CMD_CREATE_CQ] = ib_uverbs_ex_create_cq,
- [IB_USER_VERBS_EX_CMD_CREATE_QP] = ib_uverbs_ex_create_qp,
- [IB_USER_VERBS_EX_CMD_CREATE_WQ] = ib_uverbs_ex_create_wq,
- [IB_USER_VERBS_EX_CMD_MODIFY_WQ] = ib_uverbs_ex_modify_wq,
- [IB_USER_VERBS_EX_CMD_DESTROY_WQ] = ib_uverbs_ex_destroy_wq,
- [IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL] = ib_uverbs_ex_create_rwq_ind_table,
- [IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL] = ib_uverbs_ex_destroy_rwq_ind_table,
- [IB_USER_VERBS_EX_CMD_MODIFY_QP] = ib_uverbs_ex_modify_qp,
- [IB_USER_VERBS_EX_CMD_MODIFY_CQ] = ib_uverbs_ex_modify_cq,
-};
-
static void ib_uverbs_add_one(struct ib_device *device);
static void ib_uverbs_remove_one(struct ib_device *device, void *client_data);
@@ -139,7 +81,7 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data);
* Must be called with the ufile->device->disassociate_srcu held, and the lock
* must be held until use of the ucontext is finished.
*/
-struct ib_ucontext *ib_uverbs_get_ucontext(struct ib_uverbs_file *ufile)
+struct ib_ucontext *ib_uverbs_get_ucontext_file(struct ib_uverbs_file *ufile)
{
/*
* We do not hold the hw_destroy_rwsem lock for this flow, instead
@@ -157,14 +99,14 @@ struct ib_ucontext *ib_uverbs_get_ucontext(struct ib_uverbs_file *ufile)
return ucontext;
}
-EXPORT_SYMBOL(ib_uverbs_get_ucontext);
+EXPORT_SYMBOL(ib_uverbs_get_ucontext_file);
int uverbs_dealloc_mw(struct ib_mw *mw)
{
struct ib_pd *pd = mw->pd;
int ret;
- ret = mw->device->dealloc_mw(mw);
+ ret = mw->device->ops.dealloc_mw(mw);
if (!ret)
atomic_dec(&pd->usecnt);
return ret;
@@ -255,7 +197,7 @@ void ib_uverbs_release_file(struct kref *ref)
srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
ib_dev = srcu_dereference(file->device->ib_dev,
&file->device->disassociate_srcu);
- if (ib_dev && !ib_dev->disassociate_ucontext)
+ if (ib_dev && !ib_dev->ops.disassociate_ucontext)
module_put(ib_dev->owner);
srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
@@ -646,51 +588,19 @@ err_put_refs:
return filp;
}
-static bool verify_command_mask(struct ib_uverbs_file *ufile, u32 command,
- bool extended)
-{
- if (!extended)
- return ufile->uverbs_cmd_mask & BIT_ULL(command);
-
- return ufile->uverbs_ex_cmd_mask & BIT_ULL(command);
-}
-
-static bool verify_command_idx(u32 command, bool extended)
-{
- if (extended)
- return command < ARRAY_SIZE(uverbs_ex_cmd_table) &&
- uverbs_ex_cmd_table[command];
-
- return command < ARRAY_SIZE(uverbs_cmd_table) &&
- uverbs_cmd_table[command];
-}
-
-static ssize_t process_hdr(struct ib_uverbs_cmd_hdr *hdr,
- u32 *command, bool *extended)
-{
- if (hdr->command & ~(u32)(IB_USER_VERBS_CMD_FLAG_EXTENDED |
- IB_USER_VERBS_CMD_COMMAND_MASK))
- return -EINVAL;
-
- *command = hdr->command & IB_USER_VERBS_CMD_COMMAND_MASK;
- *extended = hdr->command & IB_USER_VERBS_CMD_FLAG_EXTENDED;
-
- if (!verify_command_idx(*command, *extended))
- return -EOPNOTSUPP;
-
- return 0;
-}
-
static ssize_t verify_hdr(struct ib_uverbs_cmd_hdr *hdr,
- struct ib_uverbs_ex_cmd_hdr *ex_hdr,
- size_t count, bool extended)
+ struct ib_uverbs_ex_cmd_hdr *ex_hdr, size_t count,
+ const struct uverbs_api_write_method *method_elm)
{
- if (extended) {
+ if (method_elm->is_ex) {
count -= sizeof(*hdr) + sizeof(*ex_hdr);
if ((hdr->in_words + ex_hdr->provider_in_words) * 8 != count)
return -EINVAL;
+ if (hdr->in_words * 8 < method_elm->req_size)
+ return -ENOSPC;
+
if (ex_hdr->cmd_hdr_reserved)
return -EINVAL;
@@ -698,6 +608,9 @@ static ssize_t verify_hdr(struct ib_uverbs_cmd_hdr *hdr,
if (!hdr->out_words && !ex_hdr->provider_out_words)
return -EINVAL;
+ if (hdr->out_words * 8 < method_elm->resp_size)
+ return -ENOSPC;
+
if (!access_ok(VERIFY_WRITE,
u64_to_user_ptr(ex_hdr->response),
(hdr->out_words + ex_hdr->provider_out_words) * 8))
@@ -714,6 +627,24 @@ static ssize_t verify_hdr(struct ib_uverbs_cmd_hdr *hdr,
if (hdr->in_words * 4 != count)
return -EINVAL;
+ if (count < method_elm->req_size + sizeof(hdr)) {
+ /*
+ * rdma-core v18 and v19 have a bug where they send DESTROY_CQ
+ * with a 16 byte write instead of 24. Old kernels didn't
+ * check the size so they allowed this. Now that the size is
+ * checked provide a compatibility work around to not break
+ * those userspaces.
+ */
+ if (hdr->command == IB_USER_VERBS_CMD_DESTROY_CQ &&
+ count == 16) {
+ hdr->in_words = 6;
+ return 0;
+ }
+ return -ENOSPC;
+ }
+ if (hdr->out_words * 4 < method_elm->resp_size)
+ return -ENOSPC;
+
return 0;
}
@@ -721,11 +652,12 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
size_t count, loff_t *pos)
{
struct ib_uverbs_file *file = filp->private_data;
+ const struct uverbs_api_write_method *method_elm;
+ struct uverbs_api *uapi = file->device->uapi;
struct ib_uverbs_ex_cmd_hdr ex_hdr;
struct ib_uverbs_cmd_hdr hdr;
- bool extended;
+ struct uverbs_attr_bundle bundle;
int srcu_key;
- u32 command;
ssize_t ret;
if (!ib_safe_file_access(filp)) {
@@ -740,57 +672,92 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
if (copy_from_user(&hdr, buf, sizeof(hdr)))
return -EFAULT;
- ret = process_hdr(&hdr, &command, &extended);
- if (ret)
- return ret;
+ method_elm = uapi_get_method(uapi, hdr.command);
+ if (IS_ERR(method_elm))
+ return PTR_ERR(method_elm);
- if (extended) {
+ if (method_elm->is_ex) {
if (count < (sizeof(hdr) + sizeof(ex_hdr)))
return -EINVAL;
if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr)))
return -EFAULT;
}
- ret = verify_hdr(&hdr, &ex_hdr, count, extended);
+ ret = verify_hdr(&hdr, &ex_hdr, count, method_elm);
if (ret)
return ret;
srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
- if (!verify_command_mask(file, command, extended)) {
- ret = -EOPNOTSUPP;
- goto out;
- }
-
buf += sizeof(hdr);
- if (!extended) {
- ret = uverbs_cmd_table[command](file, buf,
- hdr.in_words * 4,
- hdr.out_words * 4);
- } else {
- struct ib_udata ucore;
- struct ib_udata uhw;
+ bundle.ufile = file;
+ if (!method_elm->is_ex) {
+ size_t in_len = hdr.in_words * 4 - sizeof(hdr);
+ size_t out_len = hdr.out_words * 4;
+ u64 response = 0;
+
+ if (method_elm->has_udata) {
+ bundle.driver_udata.inlen =
+ in_len - method_elm->req_size;
+ in_len = method_elm->req_size;
+ if (bundle.driver_udata.inlen)
+ bundle.driver_udata.inbuf = buf + in_len;
+ else
+ bundle.driver_udata.inbuf = NULL;
+ } else {
+ memset(&bundle.driver_udata, 0,
+ sizeof(bundle.driver_udata));
+ }
+
+ if (method_elm->has_resp) {
+ /*
+ * The macros check that if has_resp is set
+ * then the command request structure starts
+ * with a '__aligned u64 response' member.
+ */
+ ret = get_user(response, (const u64 *)buf);
+ if (ret)
+ goto out_unlock;
+
+ if (method_elm->has_udata) {
+ bundle.driver_udata.outlen =
+ out_len - method_elm->resp_size;
+ out_len = method_elm->resp_size;
+ if (bundle.driver_udata.outlen)
+ bundle.driver_udata.outbuf =
+ u64_to_user_ptr(response +
+ out_len);
+ else
+ bundle.driver_udata.outbuf = NULL;
+ }
+ } else {
+ bundle.driver_udata.outlen = 0;
+ bundle.driver_udata.outbuf = NULL;
+ }
+ ib_uverbs_init_udata_buf_or_null(
+ &bundle.ucore, buf, u64_to_user_ptr(response),
+ in_len, out_len);
+ } else {
buf += sizeof(ex_hdr);
- ib_uverbs_init_udata_buf_or_null(&ucore, buf,
+ ib_uverbs_init_udata_buf_or_null(&bundle.ucore, buf,
u64_to_user_ptr(ex_hdr.response),
hdr.in_words * 8, hdr.out_words * 8);
- ib_uverbs_init_udata_buf_or_null(&uhw,
- buf + ucore.inlen,
- u64_to_user_ptr(ex_hdr.response) + ucore.outlen,
- ex_hdr.provider_in_words * 8,
- ex_hdr.provider_out_words * 8);
+ ib_uverbs_init_udata_buf_or_null(
+ &bundle.driver_udata, buf + bundle.ucore.inlen,
+ u64_to_user_ptr(ex_hdr.response) + bundle.ucore.outlen,
+ ex_hdr.provider_in_words * 8,
+ ex_hdr.provider_out_words * 8);
- ret = uverbs_ex_cmd_table[command](file, &ucore, &uhw);
- ret = (ret) ? : count;
}
-out:
+ ret = method_elm->handler(&bundle);
+out_unlock:
srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
- return ret;
+ return (ret) ? : count;
}
static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
@@ -801,13 +768,13 @@ static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
int srcu_key;
srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
- ucontext = ib_uverbs_get_ucontext(file);
+ ucontext = ib_uverbs_get_ucontext_file(file);
if (IS_ERR(ucontext)) {
ret = PTR_ERR(ucontext);
goto out;
}
- ret = ucontext->device->mmap(ucontext, vma);
+ ret = ucontext->device->ops.mmap(ucontext, vma);
out:
srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
return ret;
@@ -1069,7 +1036,7 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
/* In case IB device supports disassociate ucontext, there is no hard
* dependency between uverbs device and its low level device.
*/
- module_dependent = !(ib_dev->disassociate_ucontext);
+ module_dependent = !(ib_dev->ops.disassociate_ucontext);
if (module_dependent) {
if (!try_module_get(ib_dev->owner)) {
@@ -1102,9 +1069,6 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
mutex_unlock(&dev->lists_mutex);
srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
- file->uverbs_cmd_mask = ib_dev->uverbs_cmd_mask;
- file->uverbs_ex_cmd_mask = ib_dev->uverbs_ex_cmd_mask;
-
setup_ufile_idr_uobject(file);
return nonseekable_open(inode, filp);
@@ -1224,7 +1188,7 @@ static int ib_uverbs_create_uapi(struct ib_device *device,
{
struct uverbs_api *uapi;
- uapi = uverbs_alloc_api(device->driver_specs, device->driver_id);
+ uapi = uverbs_alloc_api(device);
if (IS_ERR(uapi))
return PTR_ERR(uapi);
@@ -1239,7 +1203,7 @@ static void ib_uverbs_add_one(struct ib_device *device)
struct ib_uverbs_device *uverbs_dev;
int ret;
- if (!device->alloc_ucontext)
+ if (!device->ops.alloc_ucontext)
return;
uverbs_dev = kzalloc(sizeof(*uverbs_dev), GFP_KERNEL);
@@ -1285,7 +1249,7 @@ static void ib_uverbs_add_one(struct ib_device *device)
dev_set_name(&uverbs_dev->dev, "uverbs%d", uverbs_dev->devnum);
cdev_init(&uverbs_dev->cdev,
- device->mmap ? &uverbs_mmap_fops : &uverbs_fops);
+ device->ops.mmap ? &uverbs_mmap_fops : &uverbs_fops);
uverbs_dev->cdev.owner = THIS_MODULE;
ret = cdev_device_add(&uverbs_dev->cdev, &uverbs_dev->dev);
@@ -1373,7 +1337,7 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data)
cdev_device_del(&uverbs_dev->cdev, &uverbs_dev->dev);
ida_free(&uverbs_ida, uverbs_dev->devnum);
- if (device->disassociate_ucontext) {
+ if (device->ops.disassociate_ucontext) {
/* We disassociate HW resources and immediately return.
* Userspace will see a EIO errno for all future access.
* Upon returning, ib_device may be freed internally and is not
diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c
index 203cc96ac6f5..cbc72312eb41 100644
--- a/drivers/infiniband/core/uverbs_std_types.c
+++ b/drivers/infiniband/core/uverbs_std_types.c
@@ -42,7 +42,8 @@
static int uverbs_free_ah(struct ib_uobject *uobject,
enum rdma_remove_reason why)
{
- return rdma_destroy_ah((struct ib_ah *)uobject->object);
+ return rdma_destroy_ah((struct ib_ah *)uobject->object,
+ RDMA_DESTROY_AH_SLEEPABLE);
}
static int uverbs_free_flow(struct ib_uobject *uobject,
@@ -54,7 +55,7 @@ static int uverbs_free_flow(struct ib_uobject *uobject,
struct ib_qp *qp = flow->qp;
int ret;
- ret = flow->device->destroy_flow(flow);
+ ret = flow->device->ops.destroy_flow(flow);
if (!ret) {
if (qp)
atomic_dec(&qp->usecnt);
@@ -210,8 +211,7 @@ static int uverbs_hot_unplug_completion_event_file(struct ib_uobject *uobj,
return 0;
};
-int uverbs_destroy_def_handler(struct ib_uverbs_file *file,
- struct uverbs_attr_bundle *attrs)
+int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs)
{
return 0;
}
@@ -229,58 +229,106 @@ DECLARE_UVERBS_NAMED_OBJECT(
UVERBS_OBJECT_QP,
UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), uverbs_free_qp));
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ UVERBS_METHOD_MW_DESTROY,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_MW_HANDLE,
+ UVERBS_OBJECT_MW,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_MW,
- UVERBS_TYPE_ALLOC_IDR(uverbs_free_mw));
+ UVERBS_TYPE_ALLOC_IDR(uverbs_free_mw),
+ &UVERBS_METHOD(UVERBS_METHOD_MW_DESTROY));
DECLARE_UVERBS_NAMED_OBJECT(
UVERBS_OBJECT_SRQ,
UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_usrq_object),
uverbs_free_srq));
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ UVERBS_METHOD_AH_DESTROY,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_AH_HANDLE,
+ UVERBS_OBJECT_AH,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_AH,
- UVERBS_TYPE_ALLOC_IDR(uverbs_free_ah));
+ UVERBS_TYPE_ALLOC_IDR(uverbs_free_ah),
+ &UVERBS_METHOD(UVERBS_METHOD_AH_DESTROY));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ UVERBS_METHOD_FLOW_DESTROY,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_FLOW_HANDLE,
+ UVERBS_OBJECT_FLOW,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
DECLARE_UVERBS_NAMED_OBJECT(
UVERBS_OBJECT_FLOW,
UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uflow_object),
- uverbs_free_flow));
+ uverbs_free_flow),
+ &UVERBS_METHOD(UVERBS_METHOD_FLOW_DESTROY));
DECLARE_UVERBS_NAMED_OBJECT(
UVERBS_OBJECT_WQ,
UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uwq_object), uverbs_free_wq));
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ UVERBS_METHOD_RWQ_IND_TBL_DESTROY,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_RWQ_IND_TBL_HANDLE,
+ UVERBS_OBJECT_RWQ_IND_TBL,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL,
- UVERBS_TYPE_ALLOC_IDR(uverbs_free_rwq_ind_tbl));
+ UVERBS_TYPE_ALLOC_IDR(uverbs_free_rwq_ind_tbl),
+ &UVERBS_METHOD(UVERBS_METHOD_RWQ_IND_TBL_DESTROY));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ UVERBS_METHOD_XRCD_DESTROY,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_XRCD_HANDLE,
+ UVERBS_OBJECT_XRCD,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
DECLARE_UVERBS_NAMED_OBJECT(
UVERBS_OBJECT_XRCD,
UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uxrcd_object),
- uverbs_free_xrcd));
+ uverbs_free_xrcd),
+ &UVERBS_METHOD(UVERBS_METHOD_XRCD_DESTROY));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ UVERBS_METHOD_PD_DESTROY,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_PD_HANDLE,
+ UVERBS_OBJECT_PD,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_PD,
- UVERBS_TYPE_ALLOC_IDR(uverbs_free_pd));
-
-DECLARE_UVERBS_GLOBAL_METHODS(UVERBS_OBJECT_DEVICE);
-
-DECLARE_UVERBS_OBJECT_TREE(uverbs_default_objects,
- &UVERBS_OBJECT(UVERBS_OBJECT_DEVICE),
- &UVERBS_OBJECT(UVERBS_OBJECT_PD),
- &UVERBS_OBJECT(UVERBS_OBJECT_MR),
- &UVERBS_OBJECT(UVERBS_OBJECT_COMP_CHANNEL),
- &UVERBS_OBJECT(UVERBS_OBJECT_CQ),
- &UVERBS_OBJECT(UVERBS_OBJECT_QP),
- &UVERBS_OBJECT(UVERBS_OBJECT_AH),
- &UVERBS_OBJECT(UVERBS_OBJECT_MW),
- &UVERBS_OBJECT(UVERBS_OBJECT_SRQ),
- &UVERBS_OBJECT(UVERBS_OBJECT_FLOW),
- &UVERBS_OBJECT(UVERBS_OBJECT_WQ),
- &UVERBS_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL),
- &UVERBS_OBJECT(UVERBS_OBJECT_XRCD),
- &UVERBS_OBJECT(UVERBS_OBJECT_FLOW_ACTION),
- &UVERBS_OBJECT(UVERBS_OBJECT_DM),
- &UVERBS_OBJECT(UVERBS_OBJECT_COUNTERS));
-
-const struct uverbs_object_tree_def *uverbs_default_get_objects(void)
-{
- return &uverbs_default_objects;
-}
+ UVERBS_TYPE_ALLOC_IDR(uverbs_free_pd),
+ &UVERBS_METHOD(UVERBS_METHOD_PD_DESTROY));
+
+const struct uapi_definition uverbs_def_obj_intf[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_PD,
+ UAPI_DEF_OBJ_NEEDS_FN(dealloc_pd)),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_COMP_CHANNEL,
+ UAPI_DEF_OBJ_NEEDS_FN(dealloc_pd)),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_QP,
+ UAPI_DEF_OBJ_NEEDS_FN(destroy_qp)),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_AH,
+ UAPI_DEF_OBJ_NEEDS_FN(destroy_ah)),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_MW,
+ UAPI_DEF_OBJ_NEEDS_FN(dealloc_mw)),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_SRQ,
+ UAPI_DEF_OBJ_NEEDS_FN(destroy_srq)),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_FLOW,
+ UAPI_DEF_OBJ_NEEDS_FN(destroy_flow)),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_WQ,
+ UAPI_DEF_OBJ_NEEDS_FN(destroy_wq)),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
+ UVERBS_OBJECT_RWQ_IND_TBL,
+ UAPI_DEF_OBJ_NEEDS_FN(destroy_rwq_ind_table)),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_XRCD,
+ UAPI_DEF_OBJ_NEEDS_FN(dealloc_xrcd)),
+ {}
+};
diff --git a/drivers/infiniband/core/uverbs_std_types_counters.c b/drivers/infiniband/core/uverbs_std_types_counters.c
index a0ffdcf9a51c..309c5e80988d 100644
--- a/drivers/infiniband/core/uverbs_std_types_counters.c
+++ b/drivers/infiniband/core/uverbs_std_types_counters.c
@@ -44,11 +44,11 @@ static int uverbs_free_counters(struct ib_uobject *uobject,
if (ret)
return ret;
- return counters->device->destroy_counters(counters);
+ return counters->device->ops.destroy_counters(counters);
}
static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_CREATE)(
- struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+ struct uverbs_attr_bundle *attrs)
{
struct ib_uobject *uobj = uverbs_attr_get_uobject(
attrs, UVERBS_ATTR_CREATE_COUNTERS_HANDLE);
@@ -61,10 +61,10 @@ static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_CREATE)(
* have the ability to remove methods from parse tree once
* such condition is met.
*/
- if (!ib_dev->create_counters)
+ if (!ib_dev->ops.create_counters)
return -EOPNOTSUPP;
- counters = ib_dev->create_counters(ib_dev, attrs);
+ counters = ib_dev->ops.create_counters(ib_dev, attrs);
if (IS_ERR(counters)) {
ret = PTR_ERR(counters);
goto err_create_counters;
@@ -82,7 +82,7 @@ err_create_counters:
}
static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_READ)(
- struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+ struct uverbs_attr_bundle *attrs)
{
struct ib_counters_read_attr read_attr = {};
const struct uverbs_attr *uattr;
@@ -90,7 +90,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_READ)(
uverbs_attr_get_obj(attrs, UVERBS_ATTR_READ_COUNTERS_HANDLE);
int ret;
- if (!counters->device->read_counters)
+ if (!counters->device->ops.read_counters)
return -EOPNOTSUPP;
if (!atomic_read(&counters->usecnt))
@@ -109,7 +109,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_READ)(
if (IS_ERR(read_attr.counters_buff))
return PTR_ERR(read_attr.counters_buff);
- ret = counters->device->read_counters(counters, &read_attr, attrs);
+ ret = counters->device->ops.read_counters(counters, &read_attr, attrs);
if (ret)
return ret;
@@ -149,3 +149,9 @@ DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_COUNTERS,
&UVERBS_METHOD(UVERBS_METHOD_COUNTERS_CREATE),
&UVERBS_METHOD(UVERBS_METHOD_COUNTERS_DESTROY),
&UVERBS_METHOD(UVERBS_METHOD_COUNTERS_READ));
+
+const struct uapi_definition uverbs_def_obj_counters[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_COUNTERS,
+ UAPI_DEF_OBJ_NEEDS_FN(destroy_counters)),
+ {}
+};
diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c
index 5b5f2052cd52..a59ea89e3f2b 100644
--- a/drivers/infiniband/core/uverbs_std_types_cq.c
+++ b/drivers/infiniband/core/uverbs_std_types_cq.c
@@ -58,13 +58,12 @@ static int uverbs_free_cq(struct ib_uobject *uobject,
}
static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(
- struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+ struct uverbs_attr_bundle *attrs)
{
struct ib_ucq_object *obj = container_of(
uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_CQ_HANDLE),
typeof(*obj), uobject);
struct ib_device *ib_dev = obj->uobject.context->device;
- struct ib_udata uhw;
int ret;
u64 user_handle;
struct ib_cq_init_attr attr = {};
@@ -72,7 +71,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(
struct ib_uverbs_completion_event_file *ev_file = NULL;
struct ib_uobject *ev_file_uobj;
- if (!ib_dev->create_cq || !ib_dev->destroy_cq)
+ if (!ib_dev->ops.create_cq || !ib_dev->ops.destroy_cq)
return -EOPNOTSUPP;
ret = uverbs_copy_from(&attr.comp_vector, attrs,
@@ -101,7 +100,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(
uverbs_uobject_get(ev_file_uobj);
}
- if (attr.comp_vector >= file->device->num_comp_vectors) {
+ if (attr.comp_vector >= attrs->ufile->device->num_comp_vectors) {
ret = -EINVAL;
goto err_event_file;
}
@@ -111,10 +110,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(
INIT_LIST_HEAD(&obj->comp_list);
INIT_LIST_HEAD(&obj->async_list);
- /* Temporary, only until drivers get the new uverbs_attr_bundle */
- create_udata(attrs, &uhw);
-
- cq = ib_dev->create_cq(ib_dev, &attr, obj->uobject.context, &uhw);
+ cq = ib_dev->ops.create_cq(ib_dev, &attr, obj->uobject.context,
+ &attrs->driver_udata);
if (IS_ERR(cq)) {
ret = PTR_ERR(cq);
goto err_event_file;
@@ -129,7 +126,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(
obj->uobject.user_handle = user_handle;
atomic_set(&cq->usecnt, 0);
cq->res.type = RDMA_RESTRACK_CQ;
- rdma_restrack_add(&cq->res);
+ rdma_restrack_uadd(&cq->res);
ret = uverbs_copy_to(attrs, UVERBS_ATTR_CREATE_CQ_RESP_CQE, &cq->cqe,
sizeof(cq->cqe));
@@ -173,7 +170,7 @@ DECLARE_UVERBS_NAMED_METHOD(
UVERBS_ATTR_UHW());
static int UVERBS_HANDLER(UVERBS_METHOD_CQ_DESTROY)(
- struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+ struct uverbs_attr_bundle *attrs)
{
struct ib_uobject *uobj =
uverbs_attr_get_uobject(attrs, UVERBS_ATTR_DESTROY_CQ_HANDLE);
@@ -207,3 +204,9 @@ DECLARE_UVERBS_NAMED_OBJECT(
&UVERBS_METHOD(UVERBS_METHOD_CQ_DESTROY)
#endif
);
+
+const struct uapi_definition uverbs_def_obj_cq[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_CQ,
+ UAPI_DEF_OBJ_NEEDS_FN(destroy_cq)),
+ {}
+};
diff --git a/drivers/infiniband/core/uverbs_std_types_device.c b/drivers/infiniband/core/uverbs_std_types_device.c
new file mode 100644
index 000000000000..5030ec480370
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_std_types_device.c
@@ -0,0 +1,224 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved.
+ */
+
+#include <rdma/uverbs_std_types.h>
+#include "rdma_core.h"
+#include "uverbs.h"
+#include <rdma/uverbs_ioctl.h>
+#include <rdma/opa_addr.h>
+
+/*
+ * This ioctl method allows calling any defined write or write_ex
+ * handler. This essentially replaces the hdr/ex_hdr system with the ioctl
+ * marshalling, and brings the non-ex path into the same marshalling as the ex
+ * path.
+ */
+static int UVERBS_HANDLER(UVERBS_METHOD_INVOKE_WRITE)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct uverbs_api *uapi = attrs->ufile->device->uapi;
+ const struct uverbs_api_write_method *method_elm;
+ u32 cmd;
+ int rc;
+
+ rc = uverbs_get_const(&cmd, attrs, UVERBS_ATTR_WRITE_CMD);
+ if (rc)
+ return rc;
+
+ method_elm = uapi_get_method(uapi, cmd);
+ if (IS_ERR(method_elm))
+ return PTR_ERR(method_elm);
+
+ uverbs_fill_udata(attrs, &attrs->ucore, UVERBS_ATTR_CORE_IN,
+ UVERBS_ATTR_CORE_OUT);
+
+ if (attrs->ucore.inlen < method_elm->req_size ||
+ attrs->ucore.outlen < method_elm->resp_size)
+ return -ENOSPC;
+
+ return method_elm->handler(attrs);
+}
+
+DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_INVOKE_WRITE,
+ UVERBS_ATTR_CONST_IN(UVERBS_ATTR_WRITE_CMD,
+ enum ib_uverbs_write_cmds,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CORE_IN,
+ UVERBS_ATTR_MIN_SIZE(sizeof(u32)),
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CORE_OUT,
+ UVERBS_ATTR_MIN_SIZE(0),
+ UA_OPTIONAL),
+ UVERBS_ATTR_UHW());
+
+static uint32_t *
+gather_objects_handle(struct ib_uverbs_file *ufile,
+ const struct uverbs_api_object *uapi_object,
+ struct uverbs_attr_bundle *attrs,
+ ssize_t out_len,
+ u64 *total)
+{
+ u64 max_count = out_len / sizeof(u32);
+ struct ib_uobject *obj;
+ u64 count = 0;
+ u32 *handles;
+
+ /* Allocated memory that cannot page out where we gather
+ * all object ids under a spin_lock.
+ */
+ handles = uverbs_zalloc(attrs, out_len);
+ if (IS_ERR(handles))
+ return handles;
+
+ spin_lock_irq(&ufile->uobjects_lock);
+ list_for_each_entry(obj, &ufile->uobjects, list) {
+ u32 obj_id = obj->id;
+
+ if (obj->uapi_object != uapi_object)
+ continue;
+
+ if (count >= max_count)
+ break;
+
+ handles[count] = obj_id;
+ count++;
+ }
+ spin_unlock_irq(&ufile->uobjects_lock);
+
+ *total = count;
+ return handles;
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_INFO_HANDLES)(
+ struct uverbs_attr_bundle *attrs)
+{
+ const struct uverbs_api_object *uapi_object;
+ ssize_t out_len;
+ u64 total = 0;
+ u16 object_id;
+ u32 *handles;
+ int ret;
+
+ out_len = uverbs_attr_get_len(attrs, UVERBS_ATTR_INFO_HANDLES_LIST);
+ if (out_len <= 0 || (out_len % sizeof(u32) != 0))
+ return -EINVAL;
+
+ ret = uverbs_get_const(&object_id, attrs, UVERBS_ATTR_INFO_OBJECT_ID);
+ if (ret)
+ return ret;
+
+ uapi_object = uapi_get_object(attrs->ufile->device->uapi, object_id);
+ if (!uapi_object)
+ return -EINVAL;
+
+ handles = gather_objects_handle(attrs->ufile, uapi_object, attrs,
+ out_len, &total);
+ if (IS_ERR(handles))
+ return PTR_ERR(handles);
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_INFO_HANDLES_LIST, handles,
+ sizeof(u32) * total);
+ if (ret)
+ goto err;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_INFO_TOTAL_HANDLES, &total,
+ sizeof(total));
+err:
+ return ret;
+}
+
+void copy_port_attr_to_resp(struct ib_port_attr *attr,
+ struct ib_uverbs_query_port_resp *resp,
+ struct ib_device *ib_dev, u8 port_num)
+{
+ resp->state = attr->state;
+ resp->max_mtu = attr->max_mtu;
+ resp->active_mtu = attr->active_mtu;
+ resp->gid_tbl_len = attr->gid_tbl_len;
+ resp->port_cap_flags = make_port_cap_flags(attr);
+ resp->max_msg_sz = attr->max_msg_sz;
+ resp->bad_pkey_cntr = attr->bad_pkey_cntr;
+ resp->qkey_viol_cntr = attr->qkey_viol_cntr;
+ resp->pkey_tbl_len = attr->pkey_tbl_len;
+
+ if (rdma_is_grh_required(ib_dev, port_num))
+ resp->flags |= IB_UVERBS_QPF_GRH_REQUIRED;
+
+ if (rdma_cap_opa_ah(ib_dev, port_num)) {
+ resp->lid = OPA_TO_IB_UCAST_LID(attr->lid);
+ resp->sm_lid = OPA_TO_IB_UCAST_LID(attr->sm_lid);
+ } else {
+ resp->lid = ib_lid_cpu16(attr->lid);
+ resp->sm_lid = ib_lid_cpu16(attr->sm_lid);
+ }
+
+ resp->lmc = attr->lmc;
+ resp->max_vl_num = attr->max_vl_num;
+ resp->sm_sl = attr->sm_sl;
+ resp->subnet_timeout = attr->subnet_timeout;
+ resp->init_type_reply = attr->init_type_reply;
+ resp->active_width = attr->active_width;
+ resp->active_speed = attr->active_speed;
+ resp->phys_state = attr->phys_state;
+ resp->link_layer = rdma_port_get_link_layer(ib_dev, port_num);
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_PORT)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_device *ib_dev = attrs->ufile->device->ib_dev;
+ struct ib_port_attr attr = {};
+ struct ib_uverbs_query_port_resp_ex resp = {};
+ int ret;
+ u8 port_num;
+
+ /* FIXME: Extend the UAPI_DEF_OBJ_NEEDS_FN stuff.. */
+ if (!ib_dev->ops.query_port)
+ return -EOPNOTSUPP;
+
+ ret = uverbs_get_const(&port_num, attrs,
+ UVERBS_ATTR_QUERY_PORT_PORT_NUM);
+ if (ret)
+ return ret;
+
+ ret = ib_query_port(ib_dev, port_num, &attr);
+ if (ret)
+ return ret;
+
+ copy_port_attr_to_resp(&attr, &resp.legacy_resp, ib_dev, port_num);
+ resp.port_cap_flags2 = attr.port_cap_flags2;
+
+ return uverbs_copy_to_struct_or_zero(attrs, UVERBS_ATTR_QUERY_PORT_RESP,
+ &resp, sizeof(resp));
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_INFO_HANDLES,
+ /* Also includes any device specific object ids */
+ UVERBS_ATTR_CONST_IN(UVERBS_ATTR_INFO_OBJECT_ID,
+ enum uverbs_default_objects, UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_INFO_TOTAL_HANDLES,
+ UVERBS_ATTR_TYPE(u32), UA_OPTIONAL),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_INFO_HANDLES_LIST,
+ UVERBS_ATTR_MIN_SIZE(sizeof(u32)), UA_OPTIONAL));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_QUERY_PORT,
+ UVERBS_ATTR_CONST_IN(UVERBS_ATTR_QUERY_PORT_PORT_NUM, u8, UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(
+ UVERBS_ATTR_QUERY_PORT_RESP,
+ UVERBS_ATTR_STRUCT(struct ib_uverbs_query_port_resp_ex,
+ reserved),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_GLOBAL_METHODS(UVERBS_OBJECT_DEVICE,
+ &UVERBS_METHOD(UVERBS_METHOD_INVOKE_WRITE),
+ &UVERBS_METHOD(UVERBS_METHOD_INFO_HANDLES),
+ &UVERBS_METHOD(UVERBS_METHOD_QUERY_PORT));
+
+const struct uapi_definition uverbs_def_obj_device[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_DEVICE),
+ {},
+};
diff --git a/drivers/infiniband/core/uverbs_std_types_dm.c b/drivers/infiniband/core/uverbs_std_types_dm.c
index edc3ff7733d4..2ef70637bee1 100644
--- a/drivers/infiniband/core/uverbs_std_types_dm.c
+++ b/drivers/infiniband/core/uverbs_std_types_dm.c
@@ -43,12 +43,11 @@ static int uverbs_free_dm(struct ib_uobject *uobject,
if (ret)
return ret;
- return dm->device->dealloc_dm(dm);
+ return dm->device->ops.dealloc_dm(dm);
}
-static int
-UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(struct ib_uverbs_file *file,
- struct uverbs_attr_bundle *attrs)
+static int UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(
+ struct uverbs_attr_bundle *attrs)
{
struct ib_dm_alloc_attr attr = {};
struct ib_uobject *uobj =
@@ -58,7 +57,7 @@ UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(struct ib_uverbs_file *file,
struct ib_dm *dm;
int ret;
- if (!ib_dev->alloc_dm)
+ if (!ib_dev->ops.alloc_dm)
return -EOPNOTSUPP;
ret = uverbs_copy_from(&attr.length, attrs,
@@ -71,7 +70,7 @@ UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(struct ib_uverbs_file *file,
if (ret)
return ret;
- dm = ib_dev->alloc_dm(ib_dev, uobj->context, &attr, attrs);
+ dm = ib_dev->ops.alloc_dm(ib_dev, uobj->context, &attr, attrs);
if (IS_ERR(dm))
return PTR_ERR(dm);
@@ -109,3 +108,9 @@ DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_DM,
UVERBS_TYPE_ALLOC_IDR(uverbs_free_dm),
&UVERBS_METHOD(UVERBS_METHOD_DM_ALLOC),
&UVERBS_METHOD(UVERBS_METHOD_DM_FREE));
+
+const struct uapi_definition uverbs_def_obj_dm[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_DM,
+ UAPI_DEF_OBJ_NEEDS_FN(dealloc_dm)),
+ {}
+};
diff --git a/drivers/infiniband/core/uverbs_std_types_flow_action.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c
index cb9486ad5c67..4962b87fa600 100644
--- a/drivers/infiniband/core/uverbs_std_types_flow_action.c
+++ b/drivers/infiniband/core/uverbs_std_types_flow_action.c
@@ -43,7 +43,7 @@ static int uverbs_free_flow_action(struct ib_uobject *uobject,
if (ret)
return ret;
- return action->device->destroy_flow_action(action);
+ return action->device->ops.destroy_flow_action(action);
}
static u64 esp_flags_uverbs_to_verbs(struct uverbs_attr_bundle *attrs,
@@ -223,7 +223,6 @@ struct ib_flow_action_esp_attr {
#define ESP_LAST_SUPPORTED_FLAG IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW
static int parse_flow_action_esp(struct ib_device *ib_dev,
- struct ib_uverbs_file *file,
struct uverbs_attr_bundle *attrs,
struct ib_flow_action_esp_attr *esp_attr,
bool is_modify)
@@ -305,7 +304,7 @@ static int parse_flow_action_esp(struct ib_device *ib_dev,
}
static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)(
- struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+ struct uverbs_attr_bundle *attrs)
{
struct ib_uobject *uobj = uverbs_attr_get_uobject(
attrs, UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE);
@@ -314,15 +313,16 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)(
struct ib_flow_action *action;
struct ib_flow_action_esp_attr esp_attr = {};
- if (!ib_dev->create_flow_action_esp)
+ if (!ib_dev->ops.create_flow_action_esp)
return -EOPNOTSUPP;
- ret = parse_flow_action_esp(ib_dev, file, attrs, &esp_attr, false);
+ ret = parse_flow_action_esp(ib_dev, attrs, &esp_attr, false);
if (ret)
return ret;
/* No need to check as this attribute is marked as MANDATORY */
- action = ib_dev->create_flow_action_esp(ib_dev, &esp_attr.hdr, attrs);
+ action = ib_dev->ops.create_flow_action_esp(ib_dev, &esp_attr.hdr,
+ attrs);
if (IS_ERR(action))
return PTR_ERR(action);
@@ -333,7 +333,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)(
}
static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)(
- struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+ struct uverbs_attr_bundle *attrs)
{
struct ib_uobject *uobj = uverbs_attr_get_uobject(
attrs, UVERBS_ATTR_MODIFY_FLOW_ACTION_ESP_HANDLE);
@@ -341,19 +341,19 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)(
int ret;
struct ib_flow_action_esp_attr esp_attr = {};
- if (!action->device->modify_flow_action_esp)
+ if (!action->device->ops.modify_flow_action_esp)
return -EOPNOTSUPP;
- ret = parse_flow_action_esp(action->device, file, attrs, &esp_attr,
- true);
+ ret = parse_flow_action_esp(action->device, attrs, &esp_attr, true);
if (ret)
return ret;
if (action->type != IB_FLOW_ACTION_ESP)
return -EINVAL;
- return action->device->modify_flow_action_esp(action, &esp_attr.hdr,
- attrs);
+ return action->device->ops.modify_flow_action_esp(action,
+ &esp_attr.hdr,
+ attrs);
}
static const struct uverbs_attr_spec uverbs_flow_action_esp_keymat[] = {
@@ -438,3 +438,10 @@ DECLARE_UVERBS_NAMED_OBJECT(
&UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE),
&UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_DESTROY),
&UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY));
+
+const struct uapi_definition uverbs_def_obj_flow_action[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
+ UVERBS_OBJECT_FLOW_ACTION,
+ UAPI_DEF_OBJ_NEEDS_FN(destroy_flow_action)),
+ {}
+};
diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c
index cf02e774303e..4d4be0c2b752 100644
--- a/drivers/infiniband/core/uverbs_std_types_mr.c
+++ b/drivers/infiniband/core/uverbs_std_types_mr.c
@@ -39,8 +39,44 @@ static int uverbs_free_mr(struct ib_uobject *uobject,
return ib_dereg_mr((struct ib_mr *)uobject->object);
}
+static int UVERBS_HANDLER(UVERBS_METHOD_ADVISE_MR)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_pd *pd =
+ uverbs_attr_get_obj(attrs, UVERBS_ATTR_ADVISE_MR_PD_HANDLE);
+ enum ib_uverbs_advise_mr_advice advice;
+ struct ib_device *ib_dev = pd->device;
+ struct ib_sge *sg_list;
+ int num_sge;
+ u32 flags;
+ int ret;
+
+ /* FIXME: Extend the UAPI_DEF_OBJ_NEEDS_FN stuff.. */
+ if (!ib_dev->ops.advise_mr)
+ return -EOPNOTSUPP;
+
+ ret = uverbs_get_const(&advice, attrs, UVERBS_ATTR_ADVISE_MR_ADVICE);
+ if (ret)
+ return ret;
+
+ ret = uverbs_get_flags32(&flags, attrs, UVERBS_ATTR_ADVISE_MR_FLAGS,
+ IB_UVERBS_ADVISE_MR_FLAG_FLUSH);
+ if (ret)
+ return ret;
+
+ num_sge = uverbs_attr_ptr_get_array_size(
+ attrs, UVERBS_ATTR_ADVISE_MR_SGE_LIST, sizeof(struct ib_sge));
+ if (num_sge < 0)
+ return num_sge;
+
+ sg_list = uverbs_attr_get_alloced_ptr(attrs,
+ UVERBS_ATTR_ADVISE_MR_SGE_LIST);
+ return ib_dev->ops.advise_mr(pd, advice, flags, sg_list, num_sge,
+ attrs);
+}
+
static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)(
- struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+ struct uverbs_attr_bundle *attrs)
{
struct ib_dm_mr_attr attr = {};
struct ib_uobject *uobj =
@@ -54,7 +90,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)(
struct ib_mr *mr;
int ret;
- if (!ib_dev->reg_dm_mr)
+ if (!ib_dev->ops.reg_dm_mr)
return -EOPNOTSUPP;
ret = uverbs_copy_from(&attr.offset, attrs, UVERBS_ATTR_REG_DM_MR_OFFSET);
@@ -83,7 +119,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)(
attr.length > dm->length - attr.offset)
return -EINVAL;
- mr = pd->device->reg_dm_mr(pd, dm, &attr, attrs);
+ mr = pd->device->ops.reg_dm_mr(pd, dm, &attr, attrs);
if (IS_ERR(mr))
return PTR_ERR(mr);
@@ -115,6 +151,23 @@ err_dereg:
}
DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_ADVISE_MR,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_ADVISE_MR_PD_HANDLE,
+ UVERBS_OBJECT_PD,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_CONST_IN(UVERBS_ATTR_ADVISE_MR_ADVICE,
+ enum ib_uverbs_advise_mr_advice,
+ UA_MANDATORY),
+ UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_ADVISE_MR_FLAGS,
+ enum ib_uverbs_advise_mr_flag,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ADVISE_MR_SGE_LIST,
+ UVERBS_ATTR_MIN_SIZE(sizeof(struct ib_uverbs_sge)),
+ UA_MANDATORY,
+ UA_ALLOC_AND_COPY));
+
+DECLARE_UVERBS_NAMED_METHOD(
UVERBS_METHOD_DM_MR_REG,
UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_HANDLE,
UVERBS_OBJECT_MR,
@@ -143,7 +196,22 @@ DECLARE_UVERBS_NAMED_METHOD(
UVERBS_ATTR_TYPE(u32),
UA_MANDATORY));
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ UVERBS_METHOD_MR_DESTROY,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_MR_HANDLE,
+ UVERBS_OBJECT_MR,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
DECLARE_UVERBS_NAMED_OBJECT(
UVERBS_OBJECT_MR,
UVERBS_TYPE_ALLOC_IDR(uverbs_free_mr),
- &UVERBS_METHOD(UVERBS_METHOD_DM_MR_REG));
+ &UVERBS_METHOD(UVERBS_METHOD_DM_MR_REG),
+ &UVERBS_METHOD(UVERBS_METHOD_MR_DESTROY),
+ &UVERBS_METHOD(UVERBS_METHOD_ADVISE_MR));
+
+const struct uapi_definition uverbs_def_obj_mr[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_MR,
+ UAPI_DEF_OBJ_NEEDS_FN(dereg_mr)),
+ {}
+};
diff --git a/drivers/infiniband/core/uverbs_uapi.c b/drivers/infiniband/core/uverbs_uapi.c
index 86f3fc5e04b4..9ae08e4b78a3 100644
--- a/drivers/infiniband/core/uverbs_uapi.c
+++ b/drivers/infiniband/core/uverbs_uapi.c
@@ -8,6 +8,11 @@
#include "rdma_core.h"
#include "uverbs.h"
+static int ib_uverbs_notsupp(struct uverbs_attr_bundle *attrs)
+{
+ return -EOPNOTSUPP;
+}
+
static void *uapi_add_elm(struct uverbs_api *uapi, u32 key, size_t alloc_size)
{
void *elm;
@@ -26,6 +31,70 @@ static void *uapi_add_elm(struct uverbs_api *uapi, u32 key, size_t alloc_size)
return elm;
}
+static void *uapi_add_get_elm(struct uverbs_api *uapi, u32 key,
+ size_t alloc_size, bool *exists)
+{
+ void *elm;
+
+ elm = uapi_add_elm(uapi, key, alloc_size);
+ if (!IS_ERR(elm)) {
+ *exists = false;
+ return elm;
+ }
+
+ if (elm != ERR_PTR(-EEXIST))
+ return elm;
+
+ elm = radix_tree_lookup(&uapi->radix, key);
+ if (WARN_ON(!elm))
+ return ERR_PTR(-EINVAL);
+ *exists = true;
+ return elm;
+}
+
+static int uapi_create_write(struct uverbs_api *uapi,
+ struct ib_device *ibdev,
+ const struct uapi_definition *def,
+ u32 obj_key,
+ u32 *cur_method_key)
+{
+ struct uverbs_api_write_method *method_elm;
+ u32 method_key = obj_key;
+ bool exists;
+
+ if (def->write.is_ex)
+ method_key |= uapi_key_write_ex_method(def->write.command_num);
+ else
+ method_key |= uapi_key_write_method(def->write.command_num);
+
+ method_elm = uapi_add_get_elm(uapi, method_key, sizeof(*method_elm),
+ &exists);
+ if (IS_ERR(method_elm))
+ return PTR_ERR(method_elm);
+
+ if (WARN_ON(exists && (def->write.is_ex != method_elm->is_ex)))
+ return -EINVAL;
+
+ method_elm->is_ex = def->write.is_ex;
+ method_elm->handler = def->func_write;
+ if (def->write.is_ex)
+ method_elm->disabled = !(ibdev->uverbs_ex_cmd_mask &
+ BIT_ULL(def->write.command_num));
+ else
+ method_elm->disabled = !(ibdev->uverbs_cmd_mask &
+ BIT_ULL(def->write.command_num));
+
+ if (!def->write.is_ex && def->func_write) {
+ method_elm->has_udata = def->write.has_udata;
+ method_elm->has_resp = def->write.has_resp;
+ method_elm->req_size = def->write.req_size;
+ method_elm->resp_size = def->write.resp_size;
+ }
+
+ *cur_method_key = method_key;
+ return 0;
+}
+
static int uapi_merge_method(struct uverbs_api *uapi,
struct uverbs_api_object *obj_elm, u32 obj_key,
const struct uverbs_method_def *method,
@@ -34,23 +103,21 @@ static int uapi_merge_method(struct uverbs_api *uapi,
u32 method_key = obj_key | uapi_key_ioctl_method(method->id);
struct uverbs_api_ioctl_method *method_elm;
unsigned int i;
+ bool exists;
if (!method->attrs)
return 0;
- method_elm = uapi_add_elm(uapi, method_key, sizeof(*method_elm));
- if (IS_ERR(method_elm)) {
- if (method_elm != ERR_PTR(-EEXIST))
- return PTR_ERR(method_elm);
-
+ method_elm = uapi_add_get_elm(uapi, method_key, sizeof(*method_elm),
+ &exists);
+ if (IS_ERR(method_elm))
+ return PTR_ERR(method_elm);
+ if (exists) {
/*
* This occurs when a driver uses ADD_UVERBS_ATTRIBUTES_SIMPLE
*/
if (WARN_ON(method->handler))
return -EINVAL;
- method_elm = radix_tree_lookup(&uapi->radix, method_key);
- if (WARN_ON(!method_elm))
- return -EINVAL;
} else {
WARN_ON(!method->handler);
rcu_assign_pointer(method_elm->handler, method->handler);
@@ -98,72 +165,183 @@ static int uapi_merge_method(struct uverbs_api *uapi,
return 0;
}
-static int uapi_merge_tree(struct uverbs_api *uapi,
- const struct uverbs_object_tree_def *tree,
- bool is_driver)
+static int uapi_merge_obj_tree(struct uverbs_api *uapi,
+ const struct uverbs_object_def *obj,
+ bool is_driver)
{
- unsigned int i, j;
+ struct uverbs_api_object *obj_elm;
+ unsigned int i;
+ u32 obj_key;
+ bool exists;
int rc;
- if (!tree->objects)
+ obj_key = uapi_key_obj(obj->id);
+ obj_elm = uapi_add_get_elm(uapi, obj_key, sizeof(*obj_elm), &exists);
+ if (IS_ERR(obj_elm))
+ return PTR_ERR(obj_elm);
+
+ if (obj->type_attrs) {
+ if (WARN_ON(obj_elm->type_attrs))
+ return -EINVAL;
+
+ obj_elm->id = obj->id;
+ obj_elm->type_attrs = obj->type_attrs;
+ obj_elm->type_class = obj->type_attrs->type_class;
+ /*
+ * Today drivers are only permitted to use idr_class
+ * types. They cannot use FD types because we currently have
+ * no way to revoke the fops pointer after device
+ * disassociation.
+ */
+ if (WARN_ON(is_driver &&
+ obj->type_attrs->type_class != &uverbs_idr_class))
+ return -EINVAL;
+ }
+
+ if (!obj->methods)
return 0;
- for (i = 0; i != tree->num_objects; i++) {
- const struct uverbs_object_def *obj = (*tree->objects)[i];
- struct uverbs_api_object *obj_elm;
- u32 obj_key;
+ for (i = 0; i != obj->num_methods; i++) {
+ const struct uverbs_method_def *method = (*obj->methods)[i];
- if (!obj)
+ if (!method)
continue;
- obj_key = uapi_key_obj(obj->id);
- obj_elm = uapi_add_elm(uapi, obj_key, sizeof(*obj_elm));
- if (IS_ERR(obj_elm)) {
- if (obj_elm != ERR_PTR(-EEXIST))
- return PTR_ERR(obj_elm);
+ rc = uapi_merge_method(uapi, obj_elm, obj_key, method,
+ is_driver);
+ if (rc)
+ return rc;
+ }
- /* This occurs when a driver uses ADD_UVERBS_METHODS */
- if (WARN_ON(obj->type_attrs))
- return -EINVAL;
- obj_elm = radix_tree_lookup(&uapi->radix, obj_key);
- if (WARN_ON(!obj_elm))
+ return 0;
+}
+
+static int uapi_disable_elm(struct uverbs_api *uapi,
+ const struct uapi_definition *def,
+ u32 obj_key,
+ u32 method_key)
+{
+ bool exists;
+
+ if (def->scope == UAPI_SCOPE_OBJECT) {
+ struct uverbs_api_object *obj_elm;
+
+ obj_elm = uapi_add_get_elm(
+ uapi, obj_key, sizeof(*obj_elm), &exists);
+ if (IS_ERR(obj_elm))
+ return PTR_ERR(obj_elm);
+ obj_elm->disabled = 1;
+ return 0;
+ }
+
+ if (def->scope == UAPI_SCOPE_METHOD &&
+ uapi_key_is_ioctl_method(method_key)) {
+ struct uverbs_api_ioctl_method *method_elm;
+
+ method_elm = uapi_add_get_elm(uapi, method_key,
+ sizeof(*method_elm), &exists);
+ if (IS_ERR(method_elm))
+ return PTR_ERR(method_elm);
+ method_elm->disabled = 1;
+ return 0;
+ }
+
+ if (def->scope == UAPI_SCOPE_METHOD &&
+ (uapi_key_is_write_method(method_key) ||
+ uapi_key_is_write_ex_method(method_key))) {
+ struct uverbs_api_write_method *write_elm;
+
+ write_elm = uapi_add_get_elm(uapi, method_key,
+ sizeof(*write_elm), &exists);
+ if (IS_ERR(write_elm))
+ return PTR_ERR(write_elm);
+ write_elm->disabled = 1;
+ return 0;
+ }
+
+ WARN_ON(true);
+ return -EINVAL;
+}
+
+static int uapi_merge_def(struct uverbs_api *uapi, struct ib_device *ibdev,
+ const struct uapi_definition *def_list,
+ bool is_driver)
+{
+ const struct uapi_definition *def = def_list;
+ u32 cur_obj_key = UVERBS_API_KEY_ERR;
+ u32 cur_method_key = UVERBS_API_KEY_ERR;
+ bool exists;
+ int rc;
+
+ if (!def_list)
+ return 0;
+
+ for (;; def++) {
+ switch ((enum uapi_definition_kind)def->kind) {
+ case UAPI_DEF_CHAIN:
+ rc = uapi_merge_def(uapi, ibdev, def->chain, is_driver);
+ if (rc)
+ return rc;
+ continue;
+
+ case UAPI_DEF_CHAIN_OBJ_TREE:
+ if (WARN_ON(def->object_start.object_id !=
+ def->chain_obj_tree->id))
return -EINVAL;
- } else {
- obj_elm->type_attrs = obj->type_attrs;
- if (obj->type_attrs) {
- obj_elm->type_class =
- obj->type_attrs->type_class;
- /*
- * Today drivers are only permitted to use
- * idr_class types. They cannot use FD types
- * because we currently have no way to revoke
- * the fops pointer after device
- * disassociation.
- */
- if (WARN_ON(is_driver &&
- obj->type_attrs->type_class !=
- &uverbs_idr_class))
- return -EINVAL;
- }
- }
- if (!obj->methods)
+ cur_obj_key = uapi_key_obj(def->object_start.object_id);
+ rc = uapi_merge_obj_tree(uapi, def->chain_obj_tree,
+ is_driver);
+ if (rc)
+ return rc;
continue;
- for (j = 0; j != obj->num_methods; j++) {
- const struct uverbs_method_def *method =
- (*obj->methods)[j];
- if (!method)
+ case UAPI_DEF_END:
+ return 0;
+
+ case UAPI_DEF_IS_SUPPORTED_DEV_FN: {
+ void **ibdev_fn =
+ (void *)(&ibdev->ops) + def->needs_fn_offset;
+
+ if (*ibdev_fn)
continue;
+ rc = uapi_disable_elm(
+ uapi, def, cur_obj_key, cur_method_key);
+ if (rc)
+ return rc;
+ continue;
+ }
- rc = uapi_merge_method(uapi, obj_elm, obj_key, method,
- is_driver);
+ case UAPI_DEF_IS_SUPPORTED_FUNC:
+ if (def->func_is_supported(ibdev))
+ continue;
+ rc = uapi_disable_elm(
+ uapi, def, cur_obj_key, cur_method_key);
if (rc)
return rc;
+ continue;
+
+ case UAPI_DEF_OBJECT_START: {
+ struct uverbs_api_object *obj_elm;
+
+ cur_obj_key = uapi_key_obj(def->object_start.object_id);
+ obj_elm = uapi_add_get_elm(uapi, cur_obj_key,
+ sizeof(*obj_elm), &exists);
+ if (IS_ERR(obj_elm))
+ return PTR_ERR(obj_elm);
+ continue;
}
- }
- return 0;
+ case UAPI_DEF_WRITE:
+ rc = uapi_create_write(
+ uapi, ibdev, def, cur_obj_key, &cur_method_key);
+ if (rc)
+ return rc;
+ continue;
+ }
+ WARN_ON(true);
+ return -EINVAL;
+ }
}
static int
@@ -186,13 +364,16 @@ uapi_finalize_ioctl_method(struct uverbs_api *uapi,
u32 attr_bkey = uapi_bkey_attr(attr_key);
u8 type = elm->spec.type;
- if (uapi_key_attr_to_method(iter.index) !=
- uapi_key_attr_to_method(method_key))
+ if (uapi_key_attr_to_ioctl_method(iter.index) !=
+ uapi_key_attr_to_ioctl_method(method_key))
break;
if (elm->spec.mandatory)
__set_bit(attr_bkey, method_elm->attr_mandatory);
+ if (elm->spec.is_udata)
+ method_elm->has_udata = true;
+
if (type == UVERBS_ATTR_TYPE_IDR ||
type == UVERBS_ATTR_TYPE_FD) {
u8 access = elm->spec.u.obj.access;
@@ -229,9 +410,13 @@ uapi_finalize_ioctl_method(struct uverbs_api *uapi,
static int uapi_finalize(struct uverbs_api *uapi)
{
+ const struct uverbs_api_write_method **data;
+ unsigned long max_write_ex = 0;
+ unsigned long max_write = 0;
struct radix_tree_iter iter;
void __rcu **slot;
int rc;
+ int i;
radix_tree_for_each_slot (slot, &uapi->radix, &iter, 0) {
struct uverbs_api_ioctl_method *method_elm =
@@ -243,29 +428,209 @@ static int uapi_finalize(struct uverbs_api *uapi)
if (rc)
return rc;
}
+
+ if (uapi_key_is_write_method(iter.index))
+ max_write = max(max_write,
+ iter.index & UVERBS_API_ATTR_KEY_MASK);
+ if (uapi_key_is_write_ex_method(iter.index))
+ max_write_ex =
+ max(max_write_ex,
+ iter.index & UVERBS_API_ATTR_KEY_MASK);
+ }
+
+ uapi->notsupp_method.handler = ib_uverbs_notsupp;
+ uapi->num_write = max_write + 1;
+ uapi->num_write_ex = max_write_ex + 1;
+ data = kmalloc_array(uapi->num_write + uapi->num_write_ex,
+ sizeof(*uapi->write_methods), GFP_KERNEL);
+ for (i = 0; i != uapi->num_write + uapi->num_write_ex; i++)
+ data[i] = &uapi->notsupp_method;
+ uapi->write_methods = data;
+ uapi->write_ex_methods = data + uapi->num_write;
+
+ radix_tree_for_each_slot (slot, &uapi->radix, &iter, 0) {
+ if (uapi_key_is_write_method(iter.index))
+ uapi->write_methods[iter.index &
+ UVERBS_API_ATTR_KEY_MASK] =
+ rcu_dereference_protected(*slot, true);
+ if (uapi_key_is_write_ex_method(iter.index))
+ uapi->write_ex_methods[iter.index &
+ UVERBS_API_ATTR_KEY_MASK] =
+ rcu_dereference_protected(*slot, true);
}
return 0;
}
-void uverbs_destroy_api(struct uverbs_api *uapi)
+static void uapi_remove_range(struct uverbs_api *uapi, u32 start, u32 last)
{
struct radix_tree_iter iter;
void __rcu **slot;
- if (!uapi)
- return;
-
- radix_tree_for_each_slot (slot, &uapi->radix, &iter, 0) {
+ radix_tree_for_each_slot (slot, &uapi->radix, &iter, start) {
+ if (iter.index > last)
+ return;
kfree(rcu_dereference_protected(*slot, true));
radix_tree_iter_delete(&uapi->radix, &iter, slot);
}
+}
+
+static void uapi_remove_object(struct uverbs_api *uapi, u32 obj_key)
+{
+ uapi_remove_range(uapi, obj_key,
+ obj_key | UVERBS_API_METHOD_KEY_MASK |
+ UVERBS_API_ATTR_KEY_MASK);
+}
+
+static void uapi_remove_method(struct uverbs_api *uapi, u32 method_key)
+{
+ uapi_remove_range(uapi, method_key,
+ method_key | UVERBS_API_ATTR_KEY_MASK);
+}
+
+
+static u32 uapi_get_obj_id(struct uverbs_attr_spec *spec)
+{
+ if (spec->type == UVERBS_ATTR_TYPE_IDR ||
+ spec->type == UVERBS_ATTR_TYPE_FD)
+ return spec->u.obj.obj_type;
+ if (spec->type == UVERBS_ATTR_TYPE_IDRS_ARRAY)
+ return spec->u2.objs_arr.obj_type;
+ return UVERBS_API_KEY_ERR;
+}
+
+static void uapi_key_okay(u32 key)
+{
+ unsigned int count = 0;
+
+ if (uapi_key_is_object(key))
+ count++;
+ if (uapi_key_is_ioctl_method(key))
+ count++;
+ if (uapi_key_is_write_method(key))
+ count++;
+ if (uapi_key_is_write_ex_method(key))
+ count++;
+ if (uapi_key_is_attr(key))
+ count++;
+ WARN(count != 1, "Bad count %d key=%x", count, key);
+}
+
+static void uapi_finalize_disable(struct uverbs_api *uapi)
+{
+ struct radix_tree_iter iter;
+ u32 starting_key = 0;
+ bool scan_again = false;
+ void __rcu **slot;
+
+again:
+ radix_tree_for_each_slot (slot, &uapi->radix, &iter, starting_key) {
+ uapi_key_okay(iter.index);
+
+ if (uapi_key_is_object(iter.index)) {
+ struct uverbs_api_object *obj_elm =
+ rcu_dereference_protected(*slot, true);
+
+ if (obj_elm->disabled) {
+ /* Have to check all the attrs again */
+ scan_again = true;
+ starting_key = iter.index;
+ uapi_remove_object(uapi, iter.index);
+ goto again;
+ }
+ continue;
+ }
+
+ if (uapi_key_is_ioctl_method(iter.index)) {
+ struct uverbs_api_ioctl_method *method_elm =
+ rcu_dereference_protected(*slot, true);
+
+ if (method_elm->disabled) {
+ starting_key = iter.index;
+ uapi_remove_method(uapi, iter.index);
+ goto again;
+ }
+ continue;
+ }
+
+ if (uapi_key_is_write_method(iter.index) ||
+ uapi_key_is_write_ex_method(iter.index)) {
+ struct uverbs_api_write_method *method_elm =
+ rcu_dereference_protected(*slot, true);
+
+ if (method_elm->disabled) {
+ kfree(method_elm);
+ radix_tree_iter_delete(&uapi->radix, &iter, slot);
+ }
+ continue;
+ }
+
+ if (uapi_key_is_attr(iter.index)) {
+ struct uverbs_api_attr *attr_elm =
+ rcu_dereference_protected(*slot, true);
+ const struct uverbs_api_object *tmp_obj;
+ u32 obj_key;
+
+ /*
+ * If the method has a mandatory object handle
+ * attribute which relies on an object which is not
+ * present then the entire method is uncallable.
+ */
+ if (!attr_elm->spec.mandatory)
+ continue;
+ obj_key = uapi_get_obj_id(&attr_elm->spec);
+ if (obj_key == UVERBS_API_KEY_ERR)
+ continue;
+ tmp_obj = uapi_get_object(uapi, obj_key);
+ if (IS_ERR(tmp_obj)) {
+ if (PTR_ERR(tmp_obj) == -ENOMSG)
+ continue;
+ } else {
+ if (!tmp_obj->disabled)
+ continue;
+ }
+
+ starting_key = iter.index;
+ uapi_remove_method(
+ uapi,
+ iter.index & (UVERBS_API_OBJ_KEY_MASK |
+ UVERBS_API_METHOD_KEY_MASK));
+ goto again;
+ }
+
+ WARN_ON(false);
+ }
+
+ if (!scan_again)
+ return;
+ scan_again = false;
+ starting_key = 0;
+ goto again;
+}
+
+void uverbs_destroy_api(struct uverbs_api *uapi)
+{
+ if (!uapi)
+ return;
+
+ uapi_remove_range(uapi, 0, U32_MAX);
+ kfree(uapi->write_methods);
kfree(uapi);
}
-struct uverbs_api *uverbs_alloc_api(
- const struct uverbs_object_tree_def *const *driver_specs,
- enum rdma_driver_id driver_id)
+static const struct uapi_definition uverbs_core_api[] = {
+ UAPI_DEF_CHAIN(uverbs_def_obj_counters),
+ UAPI_DEF_CHAIN(uverbs_def_obj_cq),
+ UAPI_DEF_CHAIN(uverbs_def_obj_device),
+ UAPI_DEF_CHAIN(uverbs_def_obj_dm),
+ UAPI_DEF_CHAIN(uverbs_def_obj_flow_action),
+ UAPI_DEF_CHAIN(uverbs_def_obj_intf),
+ UAPI_DEF_CHAIN(uverbs_def_obj_mr),
+ UAPI_DEF_CHAIN(uverbs_def_write_intf),
+ {},
+};
+
+struct uverbs_api *uverbs_alloc_api(struct ib_device *ibdev)
{
struct uverbs_api *uapi;
int rc;
@@ -275,18 +640,16 @@ struct uverbs_api *uverbs_alloc_api(
return ERR_PTR(-ENOMEM);
INIT_RADIX_TREE(&uapi->radix, GFP_KERNEL);
- uapi->driver_id = driver_id;
+ uapi->driver_id = ibdev->driver_id;
- rc = uapi_merge_tree(uapi, uverbs_default_get_objects(), false);
+ rc = uapi_merge_def(uapi, ibdev, uverbs_core_api, false);
+ if (rc)
+ goto err;
+ rc = uapi_merge_def(uapi, ibdev, ibdev->driver_def, true);
if (rc)
goto err;
- for (; driver_specs && *driver_specs; driver_specs++) {
- rc = uapi_merge_tree(uapi, *driver_specs, true);
- if (rc)
- goto err;
- }
-
+ uapi_finalize_disable(uapi);
rc = uapi_finalize(uapi);
if (rc)
goto err;
@@ -294,8 +657,9 @@ struct uverbs_api *uverbs_alloc_api(
return uapi;
err:
if (rc != -ENOMEM)
- pr_err("Setup of uverbs_api failed, kernel parsing tree description is not valid (%d)??\n",
- rc);
+ dev_err(&ibdev->dev,
+ "Setup of uverbs_api failed, kernel parsing tree description is not valid (%d)??\n",
+ rc);
uverbs_destroy_api(uapi);
return ERR_PTR(rc);
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 178899e3ce73..ac011836bb54 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -141,6 +141,10 @@ __attribute_const__ int ib_rate_to_mult(enum ib_rate rate)
case IB_RATE_100_GBPS: return 40;
case IB_RATE_200_GBPS: return 80;
case IB_RATE_300_GBPS: return 120;
+ case IB_RATE_28_GBPS: return 11;
+ case IB_RATE_50_GBPS: return 20;
+ case IB_RATE_400_GBPS: return 160;
+ case IB_RATE_600_GBPS: return 240;
default: return -1;
}
}
@@ -166,6 +170,10 @@ __attribute_const__ enum ib_rate mult_to_ib_rate(int mult)
case 40: return IB_RATE_100_GBPS;
case 80: return IB_RATE_200_GBPS;
case 120: return IB_RATE_300_GBPS;
+ case 11: return IB_RATE_28_GBPS;
+ case 20: return IB_RATE_50_GBPS;
+ case 160: return IB_RATE_400_GBPS;
+ case 240: return IB_RATE_600_GBPS;
default: return IB_RATE_PORT_CURRENT;
}
}
@@ -191,6 +199,10 @@ __attribute_const__ int ib_rate_to_mbps(enum ib_rate rate)
case IB_RATE_100_GBPS: return 103125;
case IB_RATE_200_GBPS: return 206250;
case IB_RATE_300_GBPS: return 309375;
+ case IB_RATE_28_GBPS: return 28125;
+ case IB_RATE_50_GBPS: return 53125;
+ case IB_RATE_400_GBPS: return 425000;
+ case IB_RATE_600_GBPS: return 637500;
default: return -1;
}
}
@@ -214,8 +226,8 @@ EXPORT_SYMBOL(rdma_node_get_transport);
enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device, u8 port_num)
{
enum rdma_transport_type lt;
- if (device->get_link_layer)
- return device->get_link_layer(device, port_num);
+ if (device->ops.get_link_layer)
+ return device->ops.get_link_layer(device, port_num);
lt = rdma_node_get_transport(device->node_type);
if (lt == RDMA_TRANSPORT_IB)
@@ -243,7 +255,7 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
struct ib_pd *pd;
int mr_access_flags = 0;
- pd = device->alloc_pd(device, NULL, NULL);
+ pd = device->ops.alloc_pd(device, NULL, NULL);
if (IS_ERR(pd))
return pd;
@@ -265,12 +277,12 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
pd->res.type = RDMA_RESTRACK_PD;
rdma_restrack_set_task(&pd->res, caller);
- rdma_restrack_add(&pd->res);
+ rdma_restrack_kadd(&pd->res);
if (mr_access_flags) {
struct ib_mr *mr;
- mr = pd->device->get_dma_mr(pd, mr_access_flags);
+ mr = pd->device->ops.get_dma_mr(pd, mr_access_flags);
if (IS_ERR(mr)) {
ib_dealloc_pd(pd);
return ERR_CAST(mr);
@@ -307,7 +319,7 @@ void ib_dealloc_pd(struct ib_pd *pd)
int ret;
if (pd->__internal_mr) {
- ret = pd->device->dereg_mr(pd->__internal_mr);
+ ret = pd->device->ops.dereg_mr(pd->__internal_mr);
WARN_ON(ret);
pd->__internal_mr = NULL;
}
@@ -319,7 +331,7 @@ void ib_dealloc_pd(struct ib_pd *pd)
rdma_restrack_del(&pd->res);
/* Making delalloc_pd a void return is a WIP, no driver should return
an error here. */
- ret = pd->device->dealloc_pd(pd);
+ ret = pd->device->ops.dealloc_pd(pd);
WARN_ONCE(ret, "Infiniband HW driver failed dealloc_pd");
}
EXPORT_SYMBOL(ib_dealloc_pd);
@@ -475,14 +487,17 @@ rdma_update_sgid_attr(struct rdma_ah_attr *ah_attr,
static struct ib_ah *_rdma_create_ah(struct ib_pd *pd,
struct rdma_ah_attr *ah_attr,
+ u32 flags,
struct ib_udata *udata)
{
struct ib_ah *ah;
- if (!pd->device->create_ah)
+ might_sleep_if(flags & RDMA_CREATE_AH_SLEEPABLE);
+
+ if (!pd->device->ops.create_ah)
return ERR_PTR(-EOPNOTSUPP);
- ah = pd->device->create_ah(pd, ah_attr, udata);
+ ah = pd->device->ops.create_ah(pd, ah_attr, flags, udata);
if (!IS_ERR(ah)) {
ah->device = pd->device;
@@ -502,12 +517,14 @@ static struct ib_ah *_rdma_create_ah(struct ib_pd *pd,
* given address vector.
* @pd: The protection domain associated with the address handle.
* @ah_attr: The attributes of the address vector.
+ * @flags: Create address handle flags (see enum rdma_create_ah_flags).
*
* It returns 0 on success and returns appropriate error code on error.
* The address handle is used to reference a local or global destination
* in all UD QP post sends.
*/
-struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr)
+struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
+ u32 flags)
{
const struct ib_gid_attr *old_sgid_attr;
struct ib_ah *ah;
@@ -517,7 +534,7 @@ struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr)
if (ret)
return ERR_PTR(ret);
- ah = _rdma_create_ah(pd, ah_attr, NULL);
+ ah = _rdma_create_ah(pd, ah_attr, flags, NULL);
rdma_unfill_sgid_attr(ah_attr, old_sgid_attr);
return ah;
@@ -557,7 +574,7 @@ struct ib_ah *rdma_create_user_ah(struct ib_pd *pd,
}
}
- ah = _rdma_create_ah(pd, ah_attr, udata);
+ ah = _rdma_create_ah(pd, ah_attr, RDMA_CREATE_AH_SLEEPABLE, udata);
out:
rdma_unfill_sgid_attr(ah_attr, old_sgid_attr);
@@ -869,7 +886,7 @@ struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc,
if (ret)
return ERR_PTR(ret);
- ah = rdma_create_ah(pd, &ah_attr);
+ ah = rdma_create_ah(pd, &ah_attr, RDMA_CREATE_AH_SLEEPABLE);
rdma_destroy_ah_attr(&ah_attr);
return ah;
@@ -888,8 +905,8 @@ int rdma_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr)
if (ret)
return ret;
- ret = ah->device->modify_ah ?
- ah->device->modify_ah(ah, ah_attr) :
+ ret = ah->device->ops.modify_ah ?
+ ah->device->ops.modify_ah(ah, ah_attr) :
-EOPNOTSUPP;
ah->sgid_attr = rdma_update_sgid_attr(ah_attr, ah->sgid_attr);
@@ -902,20 +919,22 @@ int rdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr)
{
ah_attr->grh.sgid_attr = NULL;
- return ah->device->query_ah ?
- ah->device->query_ah(ah, ah_attr) :
+ return ah->device->ops.query_ah ?
+ ah->device->ops.query_ah(ah, ah_attr) :
-EOPNOTSUPP;
}
EXPORT_SYMBOL(rdma_query_ah);
-int rdma_destroy_ah(struct ib_ah *ah)
+int rdma_destroy_ah(struct ib_ah *ah, u32 flags)
{
const struct ib_gid_attr *sgid_attr = ah->sgid_attr;
struct ib_pd *pd;
int ret;
+ might_sleep_if(flags & RDMA_DESTROY_AH_SLEEPABLE);
+
pd = ah->pd;
- ret = ah->device->destroy_ah(ah);
+ ret = ah->device->ops.destroy_ah(ah, flags);
if (!ret) {
atomic_dec(&pd->usecnt);
if (sgid_attr)
@@ -933,10 +952,10 @@ struct ib_srq *ib_create_srq(struct ib_pd *pd,
{
struct ib_srq *srq;
- if (!pd->device->create_srq)
+ if (!pd->device->ops.create_srq)
return ERR_PTR(-EOPNOTSUPP);
- srq = pd->device->create_srq(pd, srq_init_attr, NULL);
+ srq = pd->device->ops.create_srq(pd, srq_init_attr, NULL);
if (!IS_ERR(srq)) {
srq->device = pd->device;
@@ -965,17 +984,17 @@ int ib_modify_srq(struct ib_srq *srq,
struct ib_srq_attr *srq_attr,
enum ib_srq_attr_mask srq_attr_mask)
{
- return srq->device->modify_srq ?
- srq->device->modify_srq(srq, srq_attr, srq_attr_mask, NULL) :
- -EOPNOTSUPP;
+ return srq->device->ops.modify_srq ?
+ srq->device->ops.modify_srq(srq, srq_attr, srq_attr_mask,
+ NULL) : -EOPNOTSUPP;
}
EXPORT_SYMBOL(ib_modify_srq);
int ib_query_srq(struct ib_srq *srq,
struct ib_srq_attr *srq_attr)
{
- return srq->device->query_srq ?
- srq->device->query_srq(srq, srq_attr) : -EOPNOTSUPP;
+ return srq->device->ops.query_srq ?
+ srq->device->ops.query_srq(srq, srq_attr) : -EOPNOTSUPP;
}
EXPORT_SYMBOL(ib_query_srq);
@@ -997,7 +1016,7 @@ int ib_destroy_srq(struct ib_srq *srq)
if (srq_type == IB_SRQT_XRC)
xrcd = srq->ext.xrc.xrcd;
- ret = srq->device->destroy_srq(srq);
+ ret = srq->device->ops.destroy_srq(srq);
if (!ret) {
atomic_dec(&pd->usecnt);
if (srq_type == IB_SRQT_XRC)
@@ -1106,7 +1125,7 @@ static struct ib_qp *ib_create_xrc_qp(struct ib_qp *qp,
if (!IS_ERR(qp))
__ib_insert_xrcd_qp(qp_init_attr->xrcd, real_qp);
else
- real_qp->device->destroy_qp(real_qp);
+ real_qp->device->ops.destroy_qp(real_qp);
return qp;
}
@@ -1692,10 +1711,10 @@ int ib_get_eth_speed(struct ib_device *dev, u8 port_num, u8 *speed, u8 *width)
if (rdma_port_get_link_layer(dev, port_num) != IB_LINK_LAYER_ETHERNET)
return -EINVAL;
- if (!dev->get_netdev)
+ if (!dev->ops.get_netdev)
return -EOPNOTSUPP;
- netdev = dev->get_netdev(dev, port_num);
+ netdev = dev->ops.get_netdev(dev, port_num);
if (!netdev)
return -ENODEV;
@@ -1753,9 +1772,9 @@ int ib_query_qp(struct ib_qp *qp,
qp_attr->ah_attr.grh.sgid_attr = NULL;
qp_attr->alt_ah_attr.grh.sgid_attr = NULL;
- return qp->device->query_qp ?
- qp->device->query_qp(qp->real_qp, qp_attr, qp_attr_mask, qp_init_attr) :
- -EOPNOTSUPP;
+ return qp->device->ops.query_qp ?
+ qp->device->ops.query_qp(qp->real_qp, qp_attr, qp_attr_mask,
+ qp_init_attr) : -EOPNOTSUPP;
}
EXPORT_SYMBOL(ib_query_qp);
@@ -1841,7 +1860,7 @@ int ib_destroy_qp(struct ib_qp *qp)
rdma_rw_cleanup_mrs(qp);
rdma_restrack_del(&qp->res);
- ret = qp->device->destroy_qp(qp);
+ ret = qp->device->ops.destroy_qp(qp);
if (!ret) {
if (alt_path_sgid_attr)
rdma_put_gid_attr(alt_path_sgid_attr);
@@ -1879,7 +1898,7 @@ struct ib_cq *__ib_create_cq(struct ib_device *device,
{
struct ib_cq *cq;
- cq = device->create_cq(device, cq_attr, NULL, NULL);
+ cq = device->ops.create_cq(device, cq_attr, NULL, NULL);
if (!IS_ERR(cq)) {
cq->device = device;
@@ -1890,7 +1909,7 @@ struct ib_cq *__ib_create_cq(struct ib_device *device,
atomic_set(&cq->usecnt, 0);
cq->res.type = RDMA_RESTRACK_CQ;
rdma_restrack_set_task(&cq->res, caller);
- rdma_restrack_add(&cq->res);
+ rdma_restrack_kadd(&cq->res);
}
return cq;
@@ -1899,8 +1918,9 @@ EXPORT_SYMBOL(__ib_create_cq);
int rdma_set_cq_moderation(struct ib_cq *cq, u16 cq_count, u16 cq_period)
{
- return cq->device->modify_cq ?
- cq->device->modify_cq(cq, cq_count, cq_period) : -EOPNOTSUPP;
+ return cq->device->ops.modify_cq ?
+ cq->device->ops.modify_cq(cq, cq_count,
+ cq_period) : -EOPNOTSUPP;
}
EXPORT_SYMBOL(rdma_set_cq_moderation);
@@ -1910,14 +1930,14 @@ int ib_destroy_cq(struct ib_cq *cq)
return -EBUSY;
rdma_restrack_del(&cq->res);
- return cq->device->destroy_cq(cq);
+ return cq->device->ops.destroy_cq(cq);
}
EXPORT_SYMBOL(ib_destroy_cq);
int ib_resize_cq(struct ib_cq *cq, int cqe)
{
- return cq->device->resize_cq ?
- cq->device->resize_cq(cq, cqe, NULL) : -EOPNOTSUPP;
+ return cq->device->ops.resize_cq ?
+ cq->device->ops.resize_cq(cq, cqe, NULL) : -EOPNOTSUPP;
}
EXPORT_SYMBOL(ib_resize_cq);
@@ -1930,7 +1950,7 @@ int ib_dereg_mr(struct ib_mr *mr)
int ret;
rdma_restrack_del(&mr->res);
- ret = mr->device->dereg_mr(mr);
+ ret = mr->device->ops.dereg_mr(mr);
if (!ret) {
atomic_dec(&pd->usecnt);
if (dm)
@@ -1959,10 +1979,10 @@ struct ib_mr *ib_alloc_mr(struct ib_pd *pd,
{
struct ib_mr *mr;
- if (!pd->device->alloc_mr)
+ if (!pd->device->ops.alloc_mr)
return ERR_PTR(-EOPNOTSUPP);
- mr = pd->device->alloc_mr(pd, mr_type, max_num_sg);
+ mr = pd->device->ops.alloc_mr(pd, mr_type, max_num_sg);
if (!IS_ERR(mr)) {
mr->device = pd->device;
mr->pd = pd;
@@ -1971,7 +1991,7 @@ struct ib_mr *ib_alloc_mr(struct ib_pd *pd,
atomic_inc(&pd->usecnt);
mr->need_inval = false;
mr->res.type = RDMA_RESTRACK_MR;
- rdma_restrack_add(&mr->res);
+ rdma_restrack_kadd(&mr->res);
}
return mr;
@@ -1986,10 +2006,10 @@ struct ib_fmr *ib_alloc_fmr(struct ib_pd *pd,
{
struct ib_fmr *fmr;
- if (!pd->device->alloc_fmr)
+ if (!pd->device->ops.alloc_fmr)
return ERR_PTR(-EOPNOTSUPP);
- fmr = pd->device->alloc_fmr(pd, mr_access_flags, fmr_attr);
+ fmr = pd->device->ops.alloc_fmr(pd, mr_access_flags, fmr_attr);
if (!IS_ERR(fmr)) {
fmr->device = pd->device;
fmr->pd = pd;
@@ -2008,7 +2028,7 @@ int ib_unmap_fmr(struct list_head *fmr_list)
return 0;
fmr = list_entry(fmr_list->next, struct ib_fmr, list);
- return fmr->device->unmap_fmr(fmr_list);
+ return fmr->device->ops.unmap_fmr(fmr_list);
}
EXPORT_SYMBOL(ib_unmap_fmr);
@@ -2018,7 +2038,7 @@ int ib_dealloc_fmr(struct ib_fmr *fmr)
int ret;
pd = fmr->pd;
- ret = fmr->device->dealloc_fmr(fmr);
+ ret = fmr->device->ops.dealloc_fmr(fmr);
if (!ret)
atomic_dec(&pd->usecnt);
@@ -2070,14 +2090,14 @@ int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
{
int ret;
- if (!qp->device->attach_mcast)
+ if (!qp->device->ops.attach_mcast)
return -EOPNOTSUPP;
if (!rdma_is_multicast_addr((struct in6_addr *)gid->raw) ||
qp->qp_type != IB_QPT_UD || !is_valid_mcast_lid(qp, lid))
return -EINVAL;
- ret = qp->device->attach_mcast(qp, gid, lid);
+ ret = qp->device->ops.attach_mcast(qp, gid, lid);
if (!ret)
atomic_inc(&qp->usecnt);
return ret;
@@ -2088,14 +2108,14 @@ int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
{
int ret;
- if (!qp->device->detach_mcast)
+ if (!qp->device->ops.detach_mcast)
return -EOPNOTSUPP;
if (!rdma_is_multicast_addr((struct in6_addr *)gid->raw) ||
qp->qp_type != IB_QPT_UD || !is_valid_mcast_lid(qp, lid))
return -EINVAL;
- ret = qp->device->detach_mcast(qp, gid, lid);
+ ret = qp->device->ops.detach_mcast(qp, gid, lid);
if (!ret)
atomic_dec(&qp->usecnt);
return ret;
@@ -2106,10 +2126,10 @@ struct ib_xrcd *__ib_alloc_xrcd(struct ib_device *device, const char *caller)
{
struct ib_xrcd *xrcd;
- if (!device->alloc_xrcd)
+ if (!device->ops.alloc_xrcd)
return ERR_PTR(-EOPNOTSUPP);
- xrcd = device->alloc_xrcd(device, NULL, NULL);
+ xrcd = device->ops.alloc_xrcd(device, NULL, NULL);
if (!IS_ERR(xrcd)) {
xrcd->device = device;
xrcd->inode = NULL;
@@ -2137,7 +2157,7 @@ int ib_dealloc_xrcd(struct ib_xrcd *xrcd)
return ret;
}
- return xrcd->device->dealloc_xrcd(xrcd);
+ return xrcd->device->ops.dealloc_xrcd(xrcd);
}
EXPORT_SYMBOL(ib_dealloc_xrcd);
@@ -2160,10 +2180,10 @@ struct ib_wq *ib_create_wq(struct ib_pd *pd,
{
struct ib_wq *wq;
- if (!pd->device->create_wq)
+ if (!pd->device->ops.create_wq)
return ERR_PTR(-EOPNOTSUPP);
- wq = pd->device->create_wq(pd, wq_attr, NULL);
+ wq = pd->device->ops.create_wq(pd, wq_attr, NULL);
if (!IS_ERR(wq)) {
wq->event_handler = wq_attr->event_handler;
wq->wq_context = wq_attr->wq_context;
@@ -2193,7 +2213,7 @@ int ib_destroy_wq(struct ib_wq *wq)
if (atomic_read(&wq->usecnt))
return -EBUSY;
- err = wq->device->destroy_wq(wq);
+ err = wq->device->ops.destroy_wq(wq);
if (!err) {
atomic_dec(&pd->usecnt);
atomic_dec(&cq->usecnt);
@@ -2215,10 +2235,10 @@ int ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
{
int err;
- if (!wq->device->modify_wq)
+ if (!wq->device->ops.modify_wq)
return -EOPNOTSUPP;
- err = wq->device->modify_wq(wq, wq_attr, wq_attr_mask, NULL);
+ err = wq->device->ops.modify_wq(wq, wq_attr, wq_attr_mask, NULL);
return err;
}
EXPORT_SYMBOL(ib_modify_wq);
@@ -2240,12 +2260,12 @@ struct ib_rwq_ind_table *ib_create_rwq_ind_table(struct ib_device *device,
int i;
u32 table_size;
- if (!device->create_rwq_ind_table)
+ if (!device->ops.create_rwq_ind_table)
return ERR_PTR(-EOPNOTSUPP);
table_size = (1 << init_attr->log_ind_tbl_size);
- rwq_ind_table = device->create_rwq_ind_table(device,
- init_attr, NULL);
+ rwq_ind_table = device->ops.create_rwq_ind_table(device,
+ init_attr, NULL);
if (IS_ERR(rwq_ind_table))
return rwq_ind_table;
@@ -2275,7 +2295,7 @@ int ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *rwq_ind_table)
if (atomic_read(&rwq_ind_table->usecnt))
return -EBUSY;
- err = rwq_ind_table->device->destroy_rwq_ind_table(rwq_ind_table);
+ err = rwq_ind_table->device->ops.destroy_rwq_ind_table(rwq_ind_table);
if (!err) {
for (i = 0; i < table_size; i++)
atomic_dec(&ind_tbl[i]->usecnt);
@@ -2288,48 +2308,50 @@ EXPORT_SYMBOL(ib_destroy_rwq_ind_table);
int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
struct ib_mr_status *mr_status)
{
- return mr->device->check_mr_status ?
- mr->device->check_mr_status(mr, check_mask, mr_status) : -EOPNOTSUPP;
+ if (!mr->device->ops.check_mr_status)
+ return -EOPNOTSUPP;
+
+ return mr->device->ops.check_mr_status(mr, check_mask, mr_status);
}
EXPORT_SYMBOL(ib_check_mr_status);
int ib_set_vf_link_state(struct ib_device *device, int vf, u8 port,
int state)
{
- if (!device->set_vf_link_state)
+ if (!device->ops.set_vf_link_state)
return -EOPNOTSUPP;
- return device->set_vf_link_state(device, vf, port, state);
+ return device->ops.set_vf_link_state(device, vf, port, state);
}
EXPORT_SYMBOL(ib_set_vf_link_state);
int ib_get_vf_config(struct ib_device *device, int vf, u8 port,
struct ifla_vf_info *info)
{
- if (!device->get_vf_config)
+ if (!device->ops.get_vf_config)
return -EOPNOTSUPP;
- return device->get_vf_config(device, vf, port, info);
+ return device->ops.get_vf_config(device, vf, port, info);
}
EXPORT_SYMBOL(ib_get_vf_config);
int ib_get_vf_stats(struct ib_device *device, int vf, u8 port,
struct ifla_vf_stats *stats)
{
- if (!device->get_vf_stats)
+ if (!device->ops.get_vf_stats)
return -EOPNOTSUPP;
- return device->get_vf_stats(device, vf, port, stats);
+ return device->ops.get_vf_stats(device, vf, port, stats);
}
EXPORT_SYMBOL(ib_get_vf_stats);
int ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid,
int type)
{
- if (!device->set_vf_guid)
+ if (!device->ops.set_vf_guid)
return -EOPNOTSUPP;
- return device->set_vf_guid(device, vf, port, guid, type);
+ return device->ops.set_vf_guid(device, vf, port, guid, type);
}
EXPORT_SYMBOL(ib_set_vf_guid);
@@ -2361,12 +2383,12 @@ EXPORT_SYMBOL(ib_set_vf_guid);
int ib_map_mr_sg(struct ib_mr *mr, struct scatterlist *sg, int sg_nents,
unsigned int *sg_offset, unsigned int page_size)
{
- if (unlikely(!mr->device->map_mr_sg))
+ if (unlikely(!mr->device->ops.map_mr_sg))
return -EOPNOTSUPP;
mr->page_size = page_size;
- return mr->device->map_mr_sg(mr, sg, sg_nents, sg_offset);
+ return mr->device->ops.map_mr_sg(mr, sg, sg_nents, sg_offset);
}
EXPORT_SYMBOL(ib_map_mr_sg);
@@ -2565,8 +2587,8 @@ static void __ib_drain_rq(struct ib_qp *qp)
*/
void ib_drain_sq(struct ib_qp *qp)
{
- if (qp->device->drain_sq)
- qp->device->drain_sq(qp);
+ if (qp->device->ops.drain_sq)
+ qp->device->ops.drain_sq(qp);
else
__ib_drain_sq(qp);
}
@@ -2593,8 +2615,8 @@ EXPORT_SYMBOL(ib_drain_sq);
*/
void ib_drain_rq(struct ib_qp *qp)
{
- if (qp->device->drain_rq)
- qp->device->drain_rq(qp);
+ if (qp->device->ops.drain_rq)
+ qp->device->ops.drain_rq(qp);
else
__ib_drain_rq(qp);
}
@@ -2632,10 +2654,11 @@ struct net_device *rdma_alloc_netdev(struct ib_device *device, u8 port_num,
struct net_device *netdev;
int rc;
- if (!device->rdma_netdev_get_params)
+ if (!device->ops.rdma_netdev_get_params)
return ERR_PTR(-EOPNOTSUPP);
- rc = device->rdma_netdev_get_params(device, port_num, type, &params);
+ rc = device->ops.rdma_netdev_get_params(device, port_num, type,
+ &params);
if (rc)
return ERR_PTR(rc);
@@ -2657,10 +2680,11 @@ int rdma_init_netdev(struct ib_device *device, u8 port_num,
struct rdma_netdev_alloc_params params;
int rc;
- if (!device->rdma_netdev_get_params)
+ if (!device->ops.rdma_netdev_get_params)
return -EOPNOTSUPP;
- rc = device->rdma_netdev_get_params(device, port_num, type, &params);
+ rc = device->ops.rdma_netdev_get_params(device, port_num, type,
+ &params);
if (rc)
return rc;
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 54fdd4cf5288..1e2515e2eb62 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -647,13 +647,14 @@ fail:
}
/* Address Handles */
-int bnxt_re_destroy_ah(struct ib_ah *ib_ah)
+int bnxt_re_destroy_ah(struct ib_ah *ib_ah, u32 flags)
{
struct bnxt_re_ah *ah = container_of(ib_ah, struct bnxt_re_ah, ib_ah);
struct bnxt_re_dev *rdev = ah->rdev;
int rc;
- rc = bnxt_qplib_destroy_ah(&rdev->qplib_res, &ah->qplib_ah);
+ rc = bnxt_qplib_destroy_ah(&rdev->qplib_res, &ah->qplib_ah,
+ !(flags & RDMA_DESTROY_AH_SLEEPABLE));
if (rc) {
dev_err(rdev_to_dev(rdev), "Failed to destroy HW AH");
return rc;
@@ -664,6 +665,7 @@ int bnxt_re_destroy_ah(struct ib_ah *ib_ah)
struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd,
struct rdma_ah_attr *ah_attr,
+ u32 flags,
struct ib_udata *udata)
{
struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
@@ -698,7 +700,7 @@ struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd,
ah->qplib_ah.flow_label = grh->flow_label;
ah->qplib_ah.hop_limit = grh->hop_limit;
ah->qplib_ah.sl = rdma_ah_get_sl(ah_attr);
- if (ib_pd->uobject &&
+ if (udata &&
!rdma_is_multicast_addr((struct in6_addr *)
grh->dgid.raw) &&
!rdma_link_local_addr((struct in6_addr *)
@@ -722,14 +724,15 @@ struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd,
}
memcpy(ah->qplib_ah.dmac, ah_attr->roce.dmac, ETH_ALEN);
- rc = bnxt_qplib_create_ah(&rdev->qplib_res, &ah->qplib_ah);
+ rc = bnxt_qplib_create_ah(&rdev->qplib_res, &ah->qplib_ah,
+ !(flags & RDMA_CREATE_AH_SLEEPABLE));
if (rc) {
dev_err(rdev_to_dev(rdev), "Failed to allocate HW AH");
goto fail;
}
/* Write AVID to shared page. */
- if (ib_pd->uobject) {
+ if (udata) {
struct ib_ucontext *ib_uctx = ib_pd->uobject->context;
struct bnxt_re_ucontext *uctx;
unsigned long flag;
@@ -818,7 +821,7 @@ int bnxt_re_destroy_qp(struct ib_qp *ib_qp)
if (ib_qp->qp_type == IB_QPT_GSI && rdev->qp1_sqp) {
rc = bnxt_qplib_destroy_ah(&rdev->qplib_res,
- &rdev->sqp_ah->qplib_ah);
+ &rdev->sqp_ah->qplib_ah, false);
if (rc) {
dev_err(rdev_to_dev(rdev),
"Failed to destroy HW AH for shadow QP");
@@ -958,7 +961,7 @@ static struct bnxt_re_ah *bnxt_re_create_shadow_qp_ah
/* Have DMAC same as SMAC */
ether_addr_copy(ah->qplib_ah.dmac, rdev->netdev->dev_addr);
- rc = bnxt_qplib_create_ah(&rdev->qplib_res, &ah->qplib_ah);
+ rc = bnxt_qplib_create_ah(&rdev->qplib_res, &ah->qplib_ah, false);
if (rc) {
dev_err(rdev_to_dev(rdev),
"Failed to allocate HW AH for Shadow QP");
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
index aa33e7b82c84..c4af72604b4f 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
@@ -169,10 +169,11 @@ struct ib_pd *bnxt_re_alloc_pd(struct ib_device *ibdev,
int bnxt_re_dealloc_pd(struct ib_pd *pd);
struct ib_ah *bnxt_re_create_ah(struct ib_pd *pd,
struct rdma_ah_attr *ah_attr,
+ u32 flags,
struct ib_udata *udata);
int bnxt_re_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
int bnxt_re_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
-int bnxt_re_destroy_ah(struct ib_ah *ah);
+int bnxt_re_destroy_ah(struct ib_ah *ah, u32 flags);
struct ib_srq *bnxt_re_create_srq(struct ib_pd *pd,
struct ib_srq_init_attr *srq_init_attr,
struct ib_udata *udata);
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index 77f095e5fbe3..e7a997f2a537 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -568,6 +568,50 @@ static void bnxt_re_unregister_ib(struct bnxt_re_dev *rdev)
ib_unregister_device(&rdev->ibdev);
}
+static const struct ib_device_ops bnxt_re_dev_ops = {
+ .add_gid = bnxt_re_add_gid,
+ .alloc_hw_stats = bnxt_re_ib_alloc_hw_stats,
+ .alloc_mr = bnxt_re_alloc_mr,
+ .alloc_pd = bnxt_re_alloc_pd,
+ .alloc_ucontext = bnxt_re_alloc_ucontext,
+ .create_ah = bnxt_re_create_ah,
+ .create_cq = bnxt_re_create_cq,
+ .create_qp = bnxt_re_create_qp,
+ .create_srq = bnxt_re_create_srq,
+ .dealloc_pd = bnxt_re_dealloc_pd,
+ .dealloc_ucontext = bnxt_re_dealloc_ucontext,
+ .del_gid = bnxt_re_del_gid,
+ .dereg_mr = bnxt_re_dereg_mr,
+ .destroy_ah = bnxt_re_destroy_ah,
+ .destroy_cq = bnxt_re_destroy_cq,
+ .destroy_qp = bnxt_re_destroy_qp,
+ .destroy_srq = bnxt_re_destroy_srq,
+ .get_dev_fw_str = bnxt_re_query_fw_str,
+ .get_dma_mr = bnxt_re_get_dma_mr,
+ .get_hw_stats = bnxt_re_ib_get_hw_stats,
+ .get_link_layer = bnxt_re_get_link_layer,
+ .get_netdev = bnxt_re_get_netdev,
+ .get_port_immutable = bnxt_re_get_port_immutable,
+ .map_mr_sg = bnxt_re_map_mr_sg,
+ .mmap = bnxt_re_mmap,
+ .modify_ah = bnxt_re_modify_ah,
+ .modify_device = bnxt_re_modify_device,
+ .modify_qp = bnxt_re_modify_qp,
+ .modify_srq = bnxt_re_modify_srq,
+ .poll_cq = bnxt_re_poll_cq,
+ .post_recv = bnxt_re_post_recv,
+ .post_send = bnxt_re_post_send,
+ .post_srq_recv = bnxt_re_post_srq_recv,
+ .query_ah = bnxt_re_query_ah,
+ .query_device = bnxt_re_query_device,
+ .query_pkey = bnxt_re_query_pkey,
+ .query_port = bnxt_re_query_port,
+ .query_qp = bnxt_re_query_qp,
+ .query_srq = bnxt_re_query_srq,
+ .reg_user_mr = bnxt_re_reg_user_mr,
+ .req_notify_cq = bnxt_re_req_notify_cq,
+};
+
static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
{
struct ib_device *ibdev = &rdev->ibdev;
@@ -614,60 +658,10 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
(1ull << IB_USER_VERBS_CMD_DESTROY_AH);
/* POLL_CQ and REQ_NOTIFY_CQ is directly handled in libbnxt_re */
- /* Kernel verbs */
- ibdev->query_device = bnxt_re_query_device;
- ibdev->modify_device = bnxt_re_modify_device;
-
- ibdev->query_port = bnxt_re_query_port;
- ibdev->get_port_immutable = bnxt_re_get_port_immutable;
- ibdev->get_dev_fw_str = bnxt_re_query_fw_str;
- ibdev->query_pkey = bnxt_re_query_pkey;
- ibdev->get_netdev = bnxt_re_get_netdev;
- ibdev->add_gid = bnxt_re_add_gid;
- ibdev->del_gid = bnxt_re_del_gid;
- ibdev->get_link_layer = bnxt_re_get_link_layer;
-
- ibdev->alloc_pd = bnxt_re_alloc_pd;
- ibdev->dealloc_pd = bnxt_re_dealloc_pd;
-
- ibdev->create_ah = bnxt_re_create_ah;
- ibdev->modify_ah = bnxt_re_modify_ah;
- ibdev->query_ah = bnxt_re_query_ah;
- ibdev->destroy_ah = bnxt_re_destroy_ah;
-
- ibdev->create_srq = bnxt_re_create_srq;
- ibdev->modify_srq = bnxt_re_modify_srq;
- ibdev->query_srq = bnxt_re_query_srq;
- ibdev->destroy_srq = bnxt_re_destroy_srq;
- ibdev->post_srq_recv = bnxt_re_post_srq_recv;
-
- ibdev->create_qp = bnxt_re_create_qp;
- ibdev->modify_qp = bnxt_re_modify_qp;
- ibdev->query_qp = bnxt_re_query_qp;
- ibdev->destroy_qp = bnxt_re_destroy_qp;
-
- ibdev->post_send = bnxt_re_post_send;
- ibdev->post_recv = bnxt_re_post_recv;
-
- ibdev->create_cq = bnxt_re_create_cq;
- ibdev->destroy_cq = bnxt_re_destroy_cq;
- ibdev->poll_cq = bnxt_re_poll_cq;
- ibdev->req_notify_cq = bnxt_re_req_notify_cq;
-
- ibdev->get_dma_mr = bnxt_re_get_dma_mr;
- ibdev->dereg_mr = bnxt_re_dereg_mr;
- ibdev->alloc_mr = bnxt_re_alloc_mr;
- ibdev->map_mr_sg = bnxt_re_map_mr_sg;
-
- ibdev->reg_user_mr = bnxt_re_reg_user_mr;
- ibdev->alloc_ucontext = bnxt_re_alloc_ucontext;
- ibdev->dealloc_ucontext = bnxt_re_dealloc_ucontext;
- ibdev->mmap = bnxt_re_mmap;
- ibdev->get_hw_stats = bnxt_re_ib_get_hw_stats;
- ibdev->alloc_hw_stats = bnxt_re_ib_alloc_hw_stats;
rdma_set_device_sysfs_group(ibdev, &bnxt_re_dev_attr_group);
ibdev->driver_id = RDMA_DRIVER_BNXT_RE;
+ ib_set_device_ops(ibdev, &bnxt_re_dev_ops);
return ib_register_device(ibdev, "bnxt_re%d", NULL);
}
@@ -1203,6 +1197,35 @@ static int bnxt_re_setup_qos(struct bnxt_re_dev *rdev)
return 0;
}
+static void bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_en_dev *en_dev = rdev->en_dev;
+ struct hwrm_ver_get_output resp = {0};
+ struct hwrm_ver_get_input req = {0};
+ struct bnxt_fw_msg fw_msg;
+ int rc = 0;
+
+ memset(&fw_msg, 0, sizeof(fw_msg));
+ bnxt_re_init_hwrm_hdr(rdev, (void *)&req,
+ HWRM_VER_GET, -1, -1);
+ req.hwrm_intf_maj = HWRM_VERSION_MAJOR;
+ req.hwrm_intf_min = HWRM_VERSION_MINOR;
+ req.hwrm_intf_upd = HWRM_VERSION_UPDATE;
+ bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
+ sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
+ rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev),
+ "Failed to query HW version, rc = 0x%x", rc);
+ return;
+ }
+ rdev->qplib_ctx.hwrm_intf_ver =
+ (u64)resp.hwrm_intf_major << 48 |
+ (u64)resp.hwrm_intf_minor << 32 |
+ (u64)resp.hwrm_intf_build << 16 |
+ resp.hwrm_intf_patch;
+}
+
static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev)
{
int rc;
@@ -1285,10 +1308,13 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
}
set_bit(BNXT_RE_FLAG_GOT_MSIX, &rdev->flags);
+ bnxt_re_query_hwrm_intf_version(rdev);
+
/* Establish RCFW Communication Channel to initialize the context
* memory for the function and all child VFs
*/
rc = bnxt_qplib_alloc_rcfw_channel(rdev->en_dev->pdev, &rdev->rcfw,
+ &rdev->qplib_ctx,
BNXT_RE_MAX_QPC_COUNT);
if (rc) {
pr_err("Failed to allocate RCFW Channel: %#x\n", rc);
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
index be4e33e9f962..326805461265 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
@@ -58,7 +58,7 @@ static int __wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie)
u16 cbit;
int rc;
- cbit = cookie % RCFW_MAX_OUTSTANDING_CMD;
+ cbit = cookie % rcfw->cmdq_depth;
rc = wait_event_timeout(rcfw->waitq,
!test_bit(cbit, rcfw->cmdq_bitmap),
msecs_to_jiffies(RCFW_CMD_WAIT_TIME_MS));
@@ -70,7 +70,7 @@ static int __block_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie)
u32 count = RCFW_BLOCKED_CMD_WAIT_COUNT;
u16 cbit;
- cbit = cookie % RCFW_MAX_OUTSTANDING_CMD;
+ cbit = cookie % rcfw->cmdq_depth;
if (!test_bit(cbit, rcfw->cmdq_bitmap))
goto done;
do {
@@ -86,6 +86,7 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req,
{
struct bnxt_qplib_cmdqe *cmdqe, **cmdq_ptr;
struct bnxt_qplib_hwq *cmdq = &rcfw->cmdq;
+ u32 cmdq_depth = rcfw->cmdq_depth;
struct bnxt_qplib_crsq *crsqe;
u32 sw_prod, cmdq_prod;
unsigned long flags;
@@ -124,7 +125,7 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req,
cookie = rcfw->seq_num & RCFW_MAX_COOKIE_VALUE;
- cbit = cookie % RCFW_MAX_OUTSTANDING_CMD;
+ cbit = cookie % rcfw->cmdq_depth;
if (is_block)
cookie |= RCFW_CMD_IS_BLOCKING;
@@ -153,7 +154,8 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req,
do {
/* Locate the next cmdq slot */
sw_prod = HWQ_CMP(cmdq->prod, cmdq);
- cmdqe = &cmdq_ptr[get_cmdq_pg(sw_prod)][get_cmdq_idx(sw_prod)];
+ cmdqe = &cmdq_ptr[get_cmdq_pg(sw_prod, cmdq_depth)]
+ [get_cmdq_idx(sw_prod, cmdq_depth)];
if (!cmdqe) {
dev_err(&rcfw->pdev->dev,
"RCFW request failed with no cmdqe!\n");
@@ -326,7 +328,7 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
mcookie = qp_event->cookie;
blocked = cookie & RCFW_CMD_IS_BLOCKING;
cookie &= RCFW_MAX_COOKIE_VALUE;
- cbit = cookie % RCFW_MAX_OUTSTANDING_CMD;
+ cbit = cookie % rcfw->cmdq_depth;
crsqe = &rcfw->crsqe_tbl[cbit];
if (crsqe->resp &&
crsqe->resp->cookie == mcookie) {
@@ -555,6 +557,7 @@ void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw)
int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev,
struct bnxt_qplib_rcfw *rcfw,
+ struct bnxt_qplib_ctx *ctx,
int qp_tbl_sz)
{
rcfw->pdev = pdev;
@@ -567,11 +570,18 @@ int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev,
"HW channel CREQ allocation failed\n");
goto fail;
}
- rcfw->cmdq.max_elements = BNXT_QPLIB_CMDQE_MAX_CNT;
- if (bnxt_qplib_alloc_init_hwq(rcfw->pdev, &rcfw->cmdq, NULL, 0,
- &rcfw->cmdq.max_elements,
- BNXT_QPLIB_CMDQE_UNITS, 0, PAGE_SIZE,
- HWQ_TYPE_CTX)) {
+ if (ctx->hwrm_intf_ver < HWRM_VERSION_RCFW_CMDQ_DEPTH_CHECK)
+ rcfw->cmdq_depth = BNXT_QPLIB_CMDQE_MAX_CNT_256;
+ else
+ rcfw->cmdq_depth = BNXT_QPLIB_CMDQE_MAX_CNT_8192;
+
+ rcfw->cmdq.max_elements = rcfw->cmdq_depth;
+ if (bnxt_qplib_alloc_init_hwq
+ (rcfw->pdev, &rcfw->cmdq, NULL, 0,
+ &rcfw->cmdq.max_elements,
+ BNXT_QPLIB_CMDQE_UNITS, 0,
+ bnxt_qplib_cmdqe_page_size(rcfw->cmdq_depth),
+ HWQ_TYPE_CTX)) {
dev_err(&rcfw->pdev->dev,
"HW channel CMDQ allocation failed\n");
goto fail;
@@ -674,7 +684,7 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
/* General */
rcfw->seq_num = 0;
set_bit(FIRMWARE_FIRST_FLAG, &rcfw->flags);
- bmap_size = BITS_TO_LONGS(RCFW_MAX_OUTSTANDING_CMD *
+ bmap_size = BITS_TO_LONGS(rcfw->cmdq_depth *
sizeof(unsigned long));
rcfw->cmdq_bitmap = kzalloc(bmap_size, GFP_KERNEL);
if (!rcfw->cmdq_bitmap)
@@ -734,7 +744,7 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
init.cmdq_pbl = cpu_to_le64(rcfw->cmdq.pbl[PBL_LVL_0].pg_map_arr[0]);
init.cmdq_size_cmdq_lvl = cpu_to_le16(
- ((BNXT_QPLIB_CMDQE_MAX_CNT << CMDQ_INIT_CMDQ_SIZE_SFT) &
+ ((rcfw->cmdq_depth << CMDQ_INIT_CMDQ_SIZE_SFT) &
CMDQ_INIT_CMDQ_SIZE_MASK) |
((rcfw->cmdq.level << CMDQ_INIT_CMDQ_LVL_SFT) &
CMDQ_INIT_CMDQ_LVL_MASK));
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
index 9a8687dc0a79..be0ef0e8c53e 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
@@ -63,32 +63,60 @@
#define RCFW_CMD_WAIT_TIME_MS 20000 /* 20 Seconds timeout */
+/* Cmdq contains a fix number of a 16-Byte slots */
+struct bnxt_qplib_cmdqe {
+ u8 data[16];
+};
+
/* CMDQ elements */
-#define BNXT_QPLIB_CMDQE_MAX_CNT 256
+#define BNXT_QPLIB_CMDQE_MAX_CNT_256 256
+#define BNXT_QPLIB_CMDQE_MAX_CNT_8192 8192
#define BNXT_QPLIB_CMDQE_UNITS sizeof(struct bnxt_qplib_cmdqe)
-#define BNXT_QPLIB_CMDQE_CNT_PER_PG (PAGE_SIZE / BNXT_QPLIB_CMDQE_UNITS)
+#define BNXT_QPLIB_CMDQE_BYTES(depth) ((depth) * BNXT_QPLIB_CMDQE_UNITS)
+
+static inline u32 bnxt_qplib_cmdqe_npages(u32 depth)
+{
+ u32 npages;
+
+ npages = BNXT_QPLIB_CMDQE_BYTES(depth) / PAGE_SIZE;
+ if (BNXT_QPLIB_CMDQE_BYTES(depth) % PAGE_SIZE)
+ npages++;
+ return npages;
+}
+
+static inline u32 bnxt_qplib_cmdqe_page_size(u32 depth)
+{
+ return (bnxt_qplib_cmdqe_npages(depth) * PAGE_SIZE);
+}
+
+static inline u32 bnxt_qplib_cmdqe_cnt_per_pg(u32 depth)
+{
+ return (bnxt_qplib_cmdqe_page_size(depth) /
+ BNXT_QPLIB_CMDQE_UNITS);
+}
-#define MAX_CMDQ_IDX (BNXT_QPLIB_CMDQE_MAX_CNT - 1)
-#define MAX_CMDQ_IDX_PER_PG (BNXT_QPLIB_CMDQE_CNT_PER_PG - 1)
+#define MAX_CMDQ_IDX(depth) ((depth) - 1)
+
+static inline u32 bnxt_qplib_max_cmdq_idx_per_pg(u32 depth)
+{
+ return (bnxt_qplib_cmdqe_cnt_per_pg(depth) - 1);
+}
-#define RCFW_MAX_OUTSTANDING_CMD BNXT_QPLIB_CMDQE_MAX_CNT
#define RCFW_MAX_COOKIE_VALUE 0x7FFF
#define RCFW_CMD_IS_BLOCKING 0x8000
#define RCFW_BLOCKED_CMD_WAIT_COUNT 0x4E20
-/* Cmdq contains a fix number of a 16-Byte slots */
-struct bnxt_qplib_cmdqe {
- u8 data[16];
-};
+#define HWRM_VERSION_RCFW_CMDQ_DEPTH_CHECK 0x1000900020011ULL
-static inline u32 get_cmdq_pg(u32 val)
+static inline u32 get_cmdq_pg(u32 val, u32 depth)
{
- return (val & ~MAX_CMDQ_IDX_PER_PG) / BNXT_QPLIB_CMDQE_CNT_PER_PG;
+ return (val & ~(bnxt_qplib_max_cmdq_idx_per_pg(depth))) /
+ (bnxt_qplib_cmdqe_cnt_per_pg(depth));
}
-static inline u32 get_cmdq_idx(u32 val)
+static inline u32 get_cmdq_idx(u32 val, u32 depth)
{
- return val & MAX_CMDQ_IDX_PER_PG;
+ return val & (bnxt_qplib_max_cmdq_idx_per_pg(depth));
}
/* Crsq buf is 1024-Byte */
@@ -194,11 +222,14 @@ struct bnxt_qplib_rcfw {
struct bnxt_qplib_qp_node *qp_tbl;
u64 oos_prev;
u32 init_oos_stats;
+ u32 cmdq_depth;
};
void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw);
int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev,
- struct bnxt_qplib_rcfw *rcfw, int qp_tbl_sz);
+ struct bnxt_qplib_rcfw *rcfw,
+ struct bnxt_qplib_ctx *ctx,
+ int qp_tbl_sz);
void bnxt_qplib_rcfw_stop_irq(struct bnxt_qplib_rcfw *rcfw, bool kill);
void bnxt_qplib_disable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw);
int bnxt_qplib_rcfw_start_irq(struct bnxt_qplib_rcfw *rcfw, int msix_vector,
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h
index 2e5c052da5a9..1e80aa7bbcce 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_res.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h
@@ -177,6 +177,7 @@ struct bnxt_qplib_ctx {
struct bnxt_qplib_hwq tqm_tbl[MAX_TQM_ALLOC_REQ];
struct bnxt_qplib_stats stats;
struct bnxt_qplib_vf_res vf_res;
+ u64 hwrm_intf_ver;
};
struct bnxt_qplib_res {
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
index 5216b5f844cc..be03b5738f71 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
@@ -488,7 +488,8 @@ int bnxt_qplib_add_pkey(struct bnxt_qplib_res *res,
}
/* AH */
-int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah)
+int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah,
+ bool block)
{
struct bnxt_qplib_rcfw *rcfw = res->rcfw;
struct cmdq_create_ah req;
@@ -522,7 +523,7 @@ int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah)
req.dest_mac[2] = cpu_to_le16(temp16[2]);
rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp,
- NULL, 1);
+ NULL, block);
if (rc)
return rc;
@@ -530,7 +531,8 @@ int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah)
return 0;
}
-int bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah)
+int bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah,
+ bool block)
{
struct bnxt_qplib_rcfw *rcfw = res->rcfw;
struct cmdq_destroy_ah req;
@@ -544,7 +546,7 @@ int bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah)
req.ah_cid = cpu_to_le32(ah->id);
rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp,
- NULL, 1);
+ NULL, block);
if (rc)
return rc;
return 0;
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h
index 8079d7f5a008..39454b3f738d 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h
@@ -241,8 +241,10 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
int bnxt_qplib_set_func_resources(struct bnxt_qplib_res *res,
struct bnxt_qplib_rcfw *rcfw,
struct bnxt_qplib_ctx *ctx);
-int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah);
-int bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah);
+int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah,
+ bool block);
+int bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah,
+ bool block);
int bnxt_qplib_alloc_mrw(struct bnxt_qplib_res *res,
struct bnxt_qplib_mrw *mrw);
int bnxt_qplib_dereg_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw,
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c
index dcb4bba522ba..df4f7a3f043d 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c
@@ -291,13 +291,12 @@ int cxio_create_qp(struct cxio_rdev *rdev_p, u32 kernel_domain,
if (!wq->sq)
goto err3;
- wq->queue = dma_alloc_coherent(&(rdev_p->rnic_info.pdev->dev),
+ wq->queue = dma_zalloc_coherent(&(rdev_p->rnic_info.pdev->dev),
depth * sizeof(union t3_wr),
&(wq->dma_addr), GFP_KERNEL);
if (!wq->queue)
goto err4;
- memset(wq->queue, 0, depth * sizeof(union t3_wr));
dma_unmap_addr_set(wq, mapping, wq->dma_addr);
wq->doorbell = (void __iomem *)rdev_p->rnic_info.kdb_addr;
if (!kernel_domain)
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index ebbec02cebe0..b34b1a1bd94b 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -836,7 +836,7 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd,
* Kernel users need more wq space for fastreg WRs which can take
* 2 WR fragments.
*/
- ucontext = pd->uobject ? to_iwch_ucontext(pd->uobject->context) : NULL;
+ ucontext = udata ? to_iwch_ucontext(pd->uobject->context) : NULL;
if (!ucontext && wqsize < (rqsize + (2 * sqsize)))
wqsize = roundup_pow_of_two(rqsize +
roundup_pow_of_two(attrs->cap.max_send_wr * 2));
@@ -1317,6 +1317,39 @@ static void get_dev_fw_ver_str(struct ib_device *ibdev, char *str)
snprintf(str, IB_FW_VERSION_NAME_MAX, "%s", info.fw_version);
}
+static const struct ib_device_ops iwch_dev_ops = {
+ .alloc_hw_stats = iwch_alloc_stats,
+ .alloc_mr = iwch_alloc_mr,
+ .alloc_mw = iwch_alloc_mw,
+ .alloc_pd = iwch_allocate_pd,
+ .alloc_ucontext = iwch_alloc_ucontext,
+ .create_cq = iwch_create_cq,
+ .create_qp = iwch_create_qp,
+ .dealloc_mw = iwch_dealloc_mw,
+ .dealloc_pd = iwch_deallocate_pd,
+ .dealloc_ucontext = iwch_dealloc_ucontext,
+ .dereg_mr = iwch_dereg_mr,
+ .destroy_cq = iwch_destroy_cq,
+ .destroy_qp = iwch_destroy_qp,
+ .get_dev_fw_str = get_dev_fw_ver_str,
+ .get_dma_mr = iwch_get_dma_mr,
+ .get_hw_stats = iwch_get_mib,
+ .get_port_immutable = iwch_port_immutable,
+ .map_mr_sg = iwch_map_mr_sg,
+ .mmap = iwch_mmap,
+ .modify_qp = iwch_ib_modify_qp,
+ .poll_cq = iwch_poll_cq,
+ .post_recv = iwch_post_receive,
+ .post_send = iwch_post_send,
+ .query_device = iwch_query_device,
+ .query_gid = iwch_query_gid,
+ .query_pkey = iwch_query_pkey,
+ .query_port = iwch_query_port,
+ .reg_user_mr = iwch_reg_user_mr,
+ .req_notify_cq = iwch_arm_cq,
+ .resize_cq = iwch_resize_cq,
+};
+
int iwch_register_device(struct iwch_dev *dev)
{
int ret;
@@ -1356,37 +1389,7 @@ int iwch_register_device(struct iwch_dev *dev)
dev->ibdev.phys_port_cnt = dev->rdev.port_info.nports;
dev->ibdev.num_comp_vectors = 1;
dev->ibdev.dev.parent = &dev->rdev.rnic_info.pdev->dev;
- dev->ibdev.query_device = iwch_query_device;
- dev->ibdev.query_port = iwch_query_port;
- dev->ibdev.query_pkey = iwch_query_pkey;
- dev->ibdev.query_gid = iwch_query_gid;
- dev->ibdev.alloc_ucontext = iwch_alloc_ucontext;
- dev->ibdev.dealloc_ucontext = iwch_dealloc_ucontext;
- dev->ibdev.mmap = iwch_mmap;
- dev->ibdev.alloc_pd = iwch_allocate_pd;
- dev->ibdev.dealloc_pd = iwch_deallocate_pd;
- dev->ibdev.create_qp = iwch_create_qp;
- dev->ibdev.modify_qp = iwch_ib_modify_qp;
- dev->ibdev.destroy_qp = iwch_destroy_qp;
- dev->ibdev.create_cq = iwch_create_cq;
- dev->ibdev.destroy_cq = iwch_destroy_cq;
- dev->ibdev.resize_cq = iwch_resize_cq;
- dev->ibdev.poll_cq = iwch_poll_cq;
- dev->ibdev.get_dma_mr = iwch_get_dma_mr;
- dev->ibdev.reg_user_mr = iwch_reg_user_mr;
- dev->ibdev.dereg_mr = iwch_dereg_mr;
- dev->ibdev.alloc_mw = iwch_alloc_mw;
- dev->ibdev.dealloc_mw = iwch_dealloc_mw;
- dev->ibdev.alloc_mr = iwch_alloc_mr;
- dev->ibdev.map_mr_sg = iwch_map_mr_sg;
- dev->ibdev.req_notify_cq = iwch_arm_cq;
- dev->ibdev.post_send = iwch_post_send;
- dev->ibdev.post_recv = iwch_post_receive;
- dev->ibdev.alloc_hw_stats = iwch_alloc_stats;
- dev->ibdev.get_hw_stats = iwch_get_mib;
dev->ibdev.uverbs_abi_ver = IWCH_UVERBS_ABI_VERSION;
- dev->ibdev.get_port_immutable = iwch_port_immutable;
- dev->ibdev.get_dev_fw_str = get_dev_fw_ver_str;
dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL);
if (!dev->ibdev.iwcm)
@@ -1405,6 +1408,7 @@ int iwch_register_device(struct iwch_dev *dev)
dev->ibdev.driver_id = RDMA_DRIVER_CXGB3;
rdma_set_device_sysfs_group(&dev->ibdev, &iwch_attr_group);
+ ib_set_device_ops(&dev->ibdev, &iwch_dev_ops);
ret = ib_register_device(&dev->ibdev, "cxgb3_%d", NULL);
if (ret)
kfree(dev->ibdev.iwcm);
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 97ecc8c684f5..8221813219e5 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -2793,7 +2793,8 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
break;
case MPA_REQ_SENT:
(void)stop_ep_timer(ep);
- if (mpa_rev == 1 || (mpa_rev == 2 && ep->tried_with_mpa_v1))
+ if (status != CPL_ERR_CONN_RESET || mpa_rev == 1 ||
+ (mpa_rev == 2 && ep->tried_with_mpa_v1))
connect_reply_upcall(ep, -ECONNRESET);
else {
/*
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index cbb3c0ddd990..586b0c37481f 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -531,6 +531,44 @@ static int fill_res_entry(struct sk_buff *msg, struct rdma_restrack_entry *res)
c4iw_restrack_funcs[res->type](msg, res) : 0;
}
+static const struct ib_device_ops c4iw_dev_ops = {
+ .alloc_hw_stats = c4iw_alloc_stats,
+ .alloc_mr = c4iw_alloc_mr,
+ .alloc_mw = c4iw_alloc_mw,
+ .alloc_pd = c4iw_allocate_pd,
+ .alloc_ucontext = c4iw_alloc_ucontext,
+ .create_cq = c4iw_create_cq,
+ .create_qp = c4iw_create_qp,
+ .create_srq = c4iw_create_srq,
+ .dealloc_mw = c4iw_dealloc_mw,
+ .dealloc_pd = c4iw_deallocate_pd,
+ .dealloc_ucontext = c4iw_dealloc_ucontext,
+ .dereg_mr = c4iw_dereg_mr,
+ .destroy_cq = c4iw_destroy_cq,
+ .destroy_qp = c4iw_destroy_qp,
+ .destroy_srq = c4iw_destroy_srq,
+ .get_dev_fw_str = get_dev_fw_str,
+ .get_dma_mr = c4iw_get_dma_mr,
+ .get_hw_stats = c4iw_get_mib,
+ .get_netdev = get_netdev,
+ .get_port_immutable = c4iw_port_immutable,
+ .map_mr_sg = c4iw_map_mr_sg,
+ .mmap = c4iw_mmap,
+ .modify_qp = c4iw_ib_modify_qp,
+ .modify_srq = c4iw_modify_srq,
+ .poll_cq = c4iw_poll_cq,
+ .post_recv = c4iw_post_receive,
+ .post_send = c4iw_post_send,
+ .post_srq_recv = c4iw_post_srq_recv,
+ .query_device = c4iw_query_device,
+ .query_gid = c4iw_query_gid,
+ .query_pkey = c4iw_query_pkey,
+ .query_port = c4iw_query_port,
+ .query_qp = c4iw_ib_query_qp,
+ .reg_user_mr = c4iw_reg_user_mr,
+ .req_notify_cq = c4iw_arm_cq,
+};
+
void c4iw_register_device(struct work_struct *work)
{
int ret;
@@ -573,42 +611,7 @@ void c4iw_register_device(struct work_struct *work)
dev->ibdev.phys_port_cnt = dev->rdev.lldi.nports;
dev->ibdev.num_comp_vectors = dev->rdev.lldi.nciq;
dev->ibdev.dev.parent = &dev->rdev.lldi.pdev->dev;
- dev->ibdev.query_device = c4iw_query_device;
- dev->ibdev.query_port = c4iw_query_port;
- dev->ibdev.query_pkey = c4iw_query_pkey;
- dev->ibdev.query_gid = c4iw_query_gid;
- dev->ibdev.alloc_ucontext = c4iw_alloc_ucontext;
- dev->ibdev.dealloc_ucontext = c4iw_dealloc_ucontext;
- dev->ibdev.mmap = c4iw_mmap;
- dev->ibdev.alloc_pd = c4iw_allocate_pd;
- dev->ibdev.dealloc_pd = c4iw_deallocate_pd;
- dev->ibdev.create_qp = c4iw_create_qp;
- dev->ibdev.modify_qp = c4iw_ib_modify_qp;
- dev->ibdev.query_qp = c4iw_ib_query_qp;
- dev->ibdev.destroy_qp = c4iw_destroy_qp;
- dev->ibdev.create_srq = c4iw_create_srq;
- dev->ibdev.modify_srq = c4iw_modify_srq;
- dev->ibdev.destroy_srq = c4iw_destroy_srq;
- dev->ibdev.create_cq = c4iw_create_cq;
- dev->ibdev.destroy_cq = c4iw_destroy_cq;
- dev->ibdev.poll_cq = c4iw_poll_cq;
- dev->ibdev.get_dma_mr = c4iw_get_dma_mr;
- dev->ibdev.reg_user_mr = c4iw_reg_user_mr;
- dev->ibdev.dereg_mr = c4iw_dereg_mr;
- dev->ibdev.alloc_mw = c4iw_alloc_mw;
- dev->ibdev.dealloc_mw = c4iw_dealloc_mw;
- dev->ibdev.alloc_mr = c4iw_alloc_mr;
- dev->ibdev.map_mr_sg = c4iw_map_mr_sg;
- dev->ibdev.req_notify_cq = c4iw_arm_cq;
- dev->ibdev.post_send = c4iw_post_send;
- dev->ibdev.post_recv = c4iw_post_receive;
- dev->ibdev.post_srq_recv = c4iw_post_srq_recv;
- dev->ibdev.alloc_hw_stats = c4iw_alloc_stats;
- dev->ibdev.get_hw_stats = c4iw_get_mib;
dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION;
- dev->ibdev.get_port_immutable = c4iw_port_immutable;
- dev->ibdev.get_dev_fw_str = get_dev_fw_str;
- dev->ibdev.get_netdev = get_netdev;
dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL);
if (!dev->ibdev.iwcm) {
@@ -630,6 +633,7 @@ void c4iw_register_device(struct work_struct *work)
rdma_set_device_sysfs_group(&dev->ibdev, &c4iw_attr_group);
dev->ibdev.driver_id = RDMA_DRIVER_CXGB4;
+ ib_set_device_ops(&dev->ibdev, &c4iw_dev_ops);
ret = ib_register_device(&dev->ibdev, "cxgb4_%d", NULL);
if (ret)
goto err_kfree_iwcm;
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index 13478f3b7057..981ff5cfb5d1 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -2163,7 +2163,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
if (sqsize < 8)
sqsize = 8;
- ucontext = pd->uobject ? to_c4iw_ucontext(pd->uobject->context) : NULL;
+ ucontext = udata ? to_c4iw_ucontext(pd->uobject->context) : NULL;
qhp = kzalloc(sizeof(*qhp), GFP_KERNEL);
if (!qhp)
@@ -2564,13 +2564,12 @@ static int alloc_srq_queue(struct c4iw_srq *srq, struct c4iw_dev_ucontext *uctx,
wq->rqt_abs_idx = (wq->rqt_hwaddr - rdev->lldi.vr->rq.start) >>
T4_RQT_ENTRY_SHIFT;
- wq->queue = dma_alloc_coherent(&rdev->lldi.pdev->dev,
+ wq->queue = dma_zalloc_coherent(&rdev->lldi.pdev->dev,
wq->memsize, &wq->dma_addr,
GFP_KERNEL);
if (!wq->queue)
goto err_free_rqtpool;
- memset(wq->queue, 0, wq->memsize);
dma_unmap_addr_set(wq, mapping, wq->dma_addr);
wq->bar2_va = c4iw_bar2_addrs(rdev, wq->qid, CXGB4_BAR2_QTYPE_EGRESS,
@@ -2713,7 +2712,7 @@ struct ib_srq *c4iw_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *attrs,
rqsize = attrs->attr.max_wr + 1;
rqsize = roundup_pow_of_two(max_t(u16, rqsize, 16));
- ucontext = pd->uobject ? to_c4iw_ucontext(pd->uobject->context) : NULL;
+ ucontext = udata ? to_c4iw_ucontext(pd->uobject->context) : NULL;
srq = kzalloc(sizeof(*srq), GFP_KERNEL);
if (!srq)
diff --git a/drivers/infiniband/hw/hfi1/Makefile b/drivers/infiniband/hw/hfi1/Makefile
index ff790390c91a..3ce9dc8c3463 100644
--- a/drivers/infiniband/hw/hfi1/Makefile
+++ b/drivers/infiniband/hw/hfi1/Makefile
@@ -34,6 +34,7 @@ hfi1-y := \
ruc.o \
sdma.o \
sysfs.o \
+ tid_rdma.o \
trace.o \
uc.o \
ud.o \
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 7e6d70936c63..b443642eac02 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -1072,6 +1072,8 @@ static void log_state_transition(struct hfi1_pportdata *ppd, u32 state);
static void log_physical_state(struct hfi1_pportdata *ppd, u32 state);
static int wait_physical_linkstate(struct hfi1_pportdata *ppd, u32 state,
int msecs);
+static int wait_phys_link_out_of_offline(struct hfi1_pportdata *ppd,
+ int msecs);
static void read_planned_down_reason_code(struct hfi1_devdata *dd, u8 *pdrrc);
static void read_link_down_reason(struct hfi1_devdata *dd, u8 *ldr);
static void handle_temp_err(struct hfi1_devdata *dd);
@@ -10770,13 +10772,15 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
break;
ppd->port_error_action = 0;
- ppd->host_link_state = HLS_DN_POLL;
if (quick_linkup) {
/* quick linkup does not go into polling */
ret = do_quick_linkup(dd);
} else {
ret1 = set_physical_link_state(dd, PLS_POLLING);
+ if (!ret1)
+ ret1 = wait_phys_link_out_of_offline(ppd,
+ 3000);
if (ret1 != HCMD_SUCCESS) {
dd_dev_err(dd,
"Failed to transition to Polling link state, return 0x%x\n",
@@ -10784,6 +10788,14 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
ret = -EINVAL;
}
}
+
+ /*
+ * Change the host link state after requesting DC8051 to
+ * change its physical state so that we can ignore any
+ * interrupt with stale LNI(XX) error, which will not be
+ * cleared until DC8051 transitions to Polling state.
+ */
+ ppd->host_link_state = HLS_DN_POLL;
ppd->offline_disabled_reason =
HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE);
/*
@@ -12928,6 +12940,39 @@ static int wait_phys_link_offline_substates(struct hfi1_pportdata *ppd,
return read_state;
}
+/*
+ * wait_phys_link_out_of_offline - wait for any out of offline state
+ * @ppd: port device
+ * @msecs: the number of milliseconds to wait
+ *
+ * Wait up to msecs milliseconds for any out of offline physical link
+ * state change to occur.
+ * Returns 0 if at least one state is reached, otherwise -ETIMEDOUT.
+ */
+static int wait_phys_link_out_of_offline(struct hfi1_pportdata *ppd,
+ int msecs)
+{
+ u32 read_state;
+ unsigned long timeout;
+
+ timeout = jiffies + msecs_to_jiffies(msecs);
+ while (1) {
+ read_state = read_physical_state(ppd->dd);
+ if ((read_state & 0xF0) != PLS_OFFLINE)
+ break;
+ if (time_after(jiffies, timeout)) {
+ dd_dev_err(ppd->dd,
+ "timeout waiting for phy link out of offline. Read state 0x%x, %dms\n",
+ read_state, msecs);
+ return -ETIMEDOUT;
+ }
+ usleep_range(1950, 2050); /* sleep 2ms-ish */
+ }
+
+ log_state_transition(ppd, read_state);
+ return read_state;
+}
+
#define CLEAR_STATIC_RATE_CONTROL_SMASK(r) \
(r &= ~SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK)
diff --git a/drivers/infiniband/hw/hfi1/chip_registers.h b/drivers/infiniband/hw/hfi1/chip_registers.h
index c6163a347e93..c0800ea5a3f8 100644
--- a/drivers/infiniband/hw/hfi1/chip_registers.h
+++ b/drivers/infiniband/hw/hfi1/chip_registers.h
@@ -935,6 +935,10 @@
#define SEND_CTXT_CREDIT_CTRL_THRESHOLD_MASK 0x7FFull
#define SEND_CTXT_CREDIT_CTRL_THRESHOLD_SHIFT 0
#define SEND_CTXT_CREDIT_CTRL_THRESHOLD_SMASK 0x7FFull
+#define SEND_CTXT_CREDIT_STATUS (TXE + 0x000000100018)
+#define SEND_CTXT_CREDIT_STATUS_CURRENT_FREE_COUNTER_MASK 0x7FFull
+#define SEND_CTXT_CREDIT_STATUS_CURRENT_FREE_COUNTER_SHIFT 32
+#define SEND_CTXT_CREDIT_STATUS_LAST_RETURNED_COUNTER_SMASK 0x7FFull
#define SEND_CTXT_CREDIT_FORCE (TXE + 0x000000100028)
#define SEND_CTXT_CREDIT_FORCE_FORCE_RETURN_SMASK 0x1ull
#define SEND_CTXT_CREDIT_RETURN_ADDR (TXE + 0x000000100020)
diff --git a/drivers/infiniband/hw/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h
index 7108d4d92259..40d3cfb58bd1 100644
--- a/drivers/infiniband/hw/hfi1/common.h
+++ b/drivers/infiniband/hw/hfi1/common.h
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -136,18 +136,21 @@
HFI1_CAP_ALLOW_PERM_JKEY | \
HFI1_CAP_STATIC_RATE_CTRL | \
HFI1_CAP_PRINT_UNIMPL | \
- HFI1_CAP_TID_UNMAP)
+ HFI1_CAP_TID_UNMAP | \
+ HFI1_CAP_OPFN)
/*
* A set of capability bits that are "global" and are not allowed to be
* set in the user bitmask.
*/
#define HFI1_CAP_RESERVED_MASK ((HFI1_CAP_SDMA | \
- HFI1_CAP_USE_SDMA_HEAD | \
- HFI1_CAP_EXTENDED_PSN | \
- HFI1_CAP_PRINT_UNIMPL | \
- HFI1_CAP_NO_INTEGRITY | \
- HFI1_CAP_PKEY_CHECK) << \
- HFI1_CAP_USER_SHIFT)
+ HFI1_CAP_USE_SDMA_HEAD | \
+ HFI1_CAP_EXTENDED_PSN | \
+ HFI1_CAP_PRINT_UNIMPL | \
+ HFI1_CAP_NO_INTEGRITY | \
+ HFI1_CAP_PKEY_CHECK | \
+ HFI1_CAP_TID_RDMA | \
+ HFI1_CAP_OPFN) << \
+ HFI1_CAP_USER_SHIFT)
/*
* Set of capabilities that need to be enabled for kernel context in
* order to be allowed for user contexts, as well.
diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c
index 9f992ae36c89..0a557795563c 100644
--- a/drivers/infiniband/hw/hfi1/debugfs.c
+++ b/drivers/infiniband/hw/hfi1/debugfs.c
@@ -407,6 +407,54 @@ DEBUGFS_SEQ_FILE_OPS(rcds);
DEBUGFS_SEQ_FILE_OPEN(rcds)
DEBUGFS_FILE_OPS(rcds);
+static void *_pios_seq_start(struct seq_file *s, loff_t *pos)
+{
+ struct hfi1_ibdev *ibd;
+ struct hfi1_devdata *dd;
+
+ ibd = (struct hfi1_ibdev *)s->private;
+ dd = dd_from_dev(ibd);
+ if (!dd->send_contexts || *pos >= dd->num_send_contexts)
+ return NULL;
+ return pos;
+}
+
+static void *_pios_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private;
+ struct hfi1_devdata *dd = dd_from_dev(ibd);
+
+ ++*pos;
+ if (!dd->send_contexts || *pos >= dd->num_send_contexts)
+ return NULL;
+ return pos;
+}
+
+static void _pios_seq_stop(struct seq_file *s, void *v)
+{
+}
+
+static int _pios_seq_show(struct seq_file *s, void *v)
+{
+ struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private;
+ struct hfi1_devdata *dd = dd_from_dev(ibd);
+ struct send_context_info *sci;
+ loff_t *spos = v;
+ loff_t i = *spos;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dd->sc_lock, flags);
+ sci = &dd->send_contexts[i];
+ if (sci && sci->type != SC_USER && sci->allocated && sci->sc)
+ seqfile_dump_sci(s, i, sci);
+ spin_unlock_irqrestore(&dd->sc_lock, flags);
+ return 0;
+}
+
+DEBUGFS_SEQ_FILE_OPS(pios);
+DEBUGFS_SEQ_FILE_OPEN(pios)
+DEBUGFS_FILE_OPS(pios);
+
/* read the per-device counters */
static ssize_t dev_counters_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
@@ -1143,6 +1191,7 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
DEBUGFS_SEQ_FILE_CREATE(qp_stats, ibd->hfi1_ibdev_dbg, ibd);
DEBUGFS_SEQ_FILE_CREATE(sdes, ibd->hfi1_ibdev_dbg, ibd);
DEBUGFS_SEQ_FILE_CREATE(rcds, ibd->hfi1_ibdev_dbg, ibd);
+ DEBUGFS_SEQ_FILE_CREATE(pios, ibd->hfi1_ibdev_dbg, ibd);
DEBUGFS_SEQ_FILE_CREATE(sdma_cpu_list, ibd->hfi1_ibdev_dbg, ibd);
/* dev counter files */
for (i = 0; i < ARRAY_SIZE(cntr_ops); i++)
diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c
index a41f85558312..a8ad70730203 100644
--- a/drivers/infiniband/hw/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -430,40 +430,60 @@ static const hfi1_handle_cnp hfi1_handle_cnp_tbl[2] = {
[HFI1_PKT_TYPE_16B] = &return_cnp_16B
};
-void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
- bool do_cnp)
+/**
+ * hfi1_process_ecn_slowpath - Process FECN or BECN bits
+ * @qp: The packet's destination QP
+ * @pkt: The packet itself.
+ * @prescan: Is the caller the RXQ prescan
+ *
+ * Process the packet's FECN or BECN bits. By now, the packet
+ * has already been evaluated whether processing of those bit should
+ * be done.
+ * The significance of the @prescan argument is that if the caller
+ * is the RXQ prescan, a CNP will be send out instead of waiting for the
+ * normal packet processing to send an ACK with BECN set (or a CNP).
+ */
+bool hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
+ bool prescan)
{
struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
struct ib_other_headers *ohdr = pkt->ohdr;
struct ib_grh *grh = pkt->grh;
- u32 rqpn = 0, bth1;
+ u32 rqpn = 0;
u16 pkey;
u32 rlid, slid, dlid = 0;
- u8 hdr_type, sc, svc_type;
- bool is_mcast = false;
+ u8 hdr_type, sc, svc_type, opcode;
+ bool is_mcast = false, ignore_fecn = false, do_cnp = false,
+ fecn, becn;
/* can be called from prescan */
if (pkt->etype == RHF_RCV_TYPE_BYPASS) {
- is_mcast = hfi1_is_16B_mcast(dlid);
pkey = hfi1_16B_get_pkey(pkt->hdr);
sc = hfi1_16B_get_sc(pkt->hdr);
dlid = hfi1_16B_get_dlid(pkt->hdr);
slid = hfi1_16B_get_slid(pkt->hdr);
+ is_mcast = hfi1_is_16B_mcast(dlid);
+ opcode = ib_bth_get_opcode(ohdr);
hdr_type = HFI1_PKT_TYPE_16B;
+ fecn = hfi1_16B_get_fecn(pkt->hdr);
+ becn = hfi1_16B_get_becn(pkt->hdr);
} else {
- is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) &&
- (dlid != be16_to_cpu(IB_LID_PERMISSIVE));
pkey = ib_bth_get_pkey(ohdr);
sc = hfi1_9B_get_sc5(pkt->hdr, pkt->rhf);
- dlid = ib_get_dlid(pkt->hdr);
+ dlid = qp->ibqp.qp_type != IB_QPT_UD ? ib_get_dlid(pkt->hdr) :
+ ppd->lid;
slid = ib_get_slid(pkt->hdr);
+ is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) &&
+ (dlid != be16_to_cpu(IB_LID_PERMISSIVE));
+ opcode = ib_bth_get_opcode(ohdr);
hdr_type = HFI1_PKT_TYPE_9B;
+ fecn = ib_bth_get_fecn(ohdr);
+ becn = ib_bth_get_becn(ohdr);
}
switch (qp->ibqp.qp_type) {
case IB_QPT_UD:
- dlid = ppd->lid;
rlid = slid;
rqpn = ib_get_sqpn(pkt->ohdr);
svc_type = IB_CC_SVCTYPE_UD;
@@ -485,22 +505,31 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
svc_type = IB_CC_SVCTYPE_RC;
break;
default:
- return;
+ return false;
}
- bth1 = be32_to_cpu(ohdr->bth[1]);
+ ignore_fecn = is_mcast || (opcode == IB_OPCODE_CNP) ||
+ (opcode == IB_OPCODE_RC_ACKNOWLEDGE);
+ /*
+ * ACKNOWLEDGE packets do not get a CNP but this will be
+ * guarded by ignore_fecn above.
+ */
+ do_cnp = prescan ||
+ (opcode >= IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST &&
+ opcode <= IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE);
+
/* Call appropriate CNP handler */
- if (do_cnp && (bth1 & IB_FECN_SMASK))
+ if (!ignore_fecn && do_cnp && fecn)
hfi1_handle_cnp_tbl[hdr_type](ibp, qp, rqpn, pkey,
dlid, rlid, sc, grh);
- if (!is_mcast && (bth1 & IB_BECN_SMASK)) {
- u32 lqpn = bth1 & RVT_QPN_MASK;
+ if (becn) {
+ u32 lqpn = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
u8 sl = ibp->sc_to_sl[sc];
process_becn(ppd, sl, rlid, lqpn, rqpn, svc_type);
}
-
+ return !ignore_fecn && fecn;
}
struct ps_mdata {
@@ -599,7 +628,6 @@ static void __prescan_rxq(struct hfi1_packet *packet)
struct rvt_dev_info *rdi = &rcd->dd->verbs_dev.rdi;
u64 rhf = rhf_to_cpu(rhf_addr);
u32 etype = rhf_rcv_type(rhf), qpn, bth1;
- int is_ecn = 0;
u8 lnh;
if (ps_done(&mdata, rhf, rcd))
@@ -625,12 +653,10 @@ static void __prescan_rxq(struct hfi1_packet *packet)
goto next; /* just in case */
}
- bth1 = be32_to_cpu(packet->ohdr->bth[1]);
- is_ecn = !!(bth1 & (IB_FECN_SMASK | IB_BECN_SMASK));
-
- if (!is_ecn)
+ if (!hfi1_may_ecn(packet))
goto next;
+ bth1 = be32_to_cpu(packet->ohdr->bth[1]);
qpn = bth1 & RVT_QPN_MASK;
rcu_read_lock();
qp = rvt_lookup_qpn(rdi, &ibp->rvp, qpn);
@@ -640,7 +666,7 @@ static void __prescan_rxq(struct hfi1_packet *packet)
goto next;
}
- process_ecn(qp, packet, true);
+ hfi1_process_ecn_slowpath(qp, packet, true);
rcu_read_unlock();
/* turn off BECN, FECN */
@@ -1400,7 +1426,7 @@ static int hfi1_bypass_ingress_pkt_check(struct hfi1_packet *packet)
if ((!(hfi1_is_16B_mcast(packet->dlid))) &&
(packet->dlid !=
opa_get_lid(be32_to_cpu(OPA_LID_PERMISSIVE), 16B))) {
- if (packet->dlid != ppd->lid)
+ if ((packet->dlid & ~((1 << ppd->lmc) - 1)) != ppd->lid)
return -EINVAL;
}
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index 2b882347d0c2..6db2276f5c13 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -1804,13 +1804,20 @@ static inline struct hfi1_ibport *rcd_to_iport(struct hfi1_ctxtdata *rcd)
return &rcd->ppd->ibport_data;
}
-void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
- bool do_cnp);
-static inline bool process_ecn(struct rvt_qp *qp, struct hfi1_packet *pkt,
- bool do_cnp)
+/**
+ * hfi1_may_ecn - Check whether FECN or BECN processing should be done
+ * @pkt: the packet to be evaluated
+ *
+ * Check whether the FECN or BECN bits in the packet's header are
+ * enabled, depending on packet type.
+ *
+ * This function only checks for FECN and BECN bits. Additional checks
+ * are done in the slowpath (hfi1_process_ecn_slowpath()) in order to
+ * ensure correct handling.
+ */
+static inline bool hfi1_may_ecn(struct hfi1_packet *pkt)
{
- bool becn;
- bool fecn;
+ bool fecn, becn;
if (pkt->etype == RHF_RCV_TYPE_BYPASS) {
fecn = hfi1_16B_get_fecn(pkt->hdr);
@@ -1819,10 +1826,18 @@ static inline bool process_ecn(struct rvt_qp *qp, struct hfi1_packet *pkt,
fecn = ib_bth_get_fecn(pkt->ohdr);
becn = ib_bth_get_becn(pkt->ohdr);
}
- if (unlikely(fecn || becn)) {
- hfi1_process_ecn_slowpath(qp, pkt, do_cnp);
- return fecn;
- }
+ return fecn || becn;
+}
+
+bool hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
+ bool prescan);
+static inline bool process_ecn(struct rvt_qp *qp, struct hfi1_packet *pkt)
+{
+ bool do_work;
+
+ do_work = hfi1_may_ecn(pkt);
+ if (unlikely(do_work))
+ return hfi1_process_ecn_slowpath(qp, pkt, false);
return false;
}
diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c
index 88a0cf930136..4228393e6c4c 100644
--- a/drivers/infiniband/hw/hfi1/mad.c
+++ b/drivers/infiniband/hw/hfi1/mad.c
@@ -305,7 +305,7 @@ static struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u32 dlid)
rcu_read_lock();
qp0 = rcu_dereference(ibp->rvp.qp[0]);
if (qp0)
- ah = rdma_create_ah(qp0->ibqp.pd, &attr);
+ ah = rdma_create_ah(qp0->ibqp.pd, &attr, 0);
rcu_read_unlock();
return ah;
}
diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c
index 9ab50d2308dc..dd5a5c030066 100644
--- a/drivers/infiniband/hw/hfi1/pio.c
+++ b/drivers/infiniband/hw/hfi1/pio.c
@@ -742,6 +742,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
spin_lock_init(&sc->alloc_lock);
spin_lock_init(&sc->release_lock);
spin_lock_init(&sc->credit_ctrl_lock);
+ seqlock_init(&sc->waitlock);
INIT_LIST_HEAD(&sc->piowait);
INIT_WORK(&sc->halt_work, sc_halted);
init_waitqueue_head(&sc->halt_wait);
@@ -1593,7 +1594,6 @@ void hfi1_sc_wantpiobuf_intr(struct send_context *sc, u32 needint)
static void sc_piobufavail(struct send_context *sc)
{
struct hfi1_devdata *dd = sc->dd;
- struct hfi1_ibdev *dev = &dd->verbs_dev;
struct list_head *list;
struct rvt_qp *qps[PIO_WAIT_BATCH_SIZE];
struct rvt_qp *qp;
@@ -1612,7 +1612,7 @@ static void sc_piobufavail(struct send_context *sc)
* could end up with QPs on the wait list with the interrupt
* disabled.
*/
- write_seqlock_irqsave(&dev->iowait_lock, flags);
+ write_seqlock_irqsave(&sc->waitlock, flags);
while (!list_empty(list)) {
struct iowait *wait;
@@ -1636,7 +1636,7 @@ static void sc_piobufavail(struct send_context *sc)
if (!list_empty(list))
hfi1_sc_wantpiobuf_intr(sc, 1);
}
- write_sequnlock_irqrestore(&dev->iowait_lock, flags);
+ write_sequnlock_irqrestore(&sc->waitlock, flags);
/* Wake up the most starved one first */
if (n)
@@ -2137,3 +2137,28 @@ void free_credit_return(struct hfi1_devdata *dd)
kfree(dd->cr_base);
dd->cr_base = NULL;
}
+
+void seqfile_dump_sci(struct seq_file *s, u32 i,
+ struct send_context_info *sci)
+{
+ struct send_context *sc = sci->sc;
+ u64 reg;
+
+ seq_printf(s, "SCI %u: type %u base %u credits %u\n",
+ i, sci->type, sci->base, sci->credits);
+ seq_printf(s, " flags 0x%x sw_inx %u hw_ctxt %u grp %u\n",
+ sc->flags, sc->sw_index, sc->hw_context, sc->group);
+ seq_printf(s, " sr_size %u credits %u sr_head %u sr_tail %u\n",
+ sc->sr_size, sc->credits, sc->sr_head, sc->sr_tail);
+ seq_printf(s, " fill %lu free %lu fill_wrap %u alloc_free %lu\n",
+ sc->fill, sc->free, sc->fill_wrap, sc->alloc_free);
+ seq_printf(s, " credit_intr_count %u credit_ctrl 0x%llx\n",
+ sc->credit_intr_count, sc->credit_ctrl);
+ reg = read_kctxt_csr(sc->dd, sc->hw_context, SC(CREDIT_STATUS));
+ seq_printf(s, " *hw_free %llu CurrentFree %llu LastReturned %llu\n",
+ (le64_to_cpu(*sc->hw_free) & CR_COUNTER_SMASK) >>
+ CR_COUNTER_SHIFT,
+ (reg >> SC(CREDIT_STATUS_CURRENT_FREE_COUNTER_SHIFT)) &
+ SC(CREDIT_STATUS_CURRENT_FREE_COUNTER_MASK),
+ reg & SC(CREDIT_STATUS_LAST_RETURNED_COUNTER_SMASK));
+}
diff --git a/drivers/infiniband/hw/hfi1/pio.h b/drivers/infiniband/hw/hfi1/pio.h
index aaf372c3e5d6..c9a58b642bdd 100644
--- a/drivers/infiniband/hw/hfi1/pio.h
+++ b/drivers/infiniband/hw/hfi1/pio.h
@@ -127,6 +127,8 @@ struct send_context {
volatile __le64 *hw_free; /* HW free counter */
/* list for PIO waiters */
struct list_head piowait ____cacheline_aligned_in_smp;
+ seqlock_t waitlock;
+
spinlock_t credit_ctrl_lock ____cacheline_aligned_in_smp;
u32 credit_intr_count; /* count of credit intr users */
u64 credit_ctrl; /* cache for credit control */
@@ -329,4 +331,7 @@ void seg_pio_copy_start(struct pio_buf *pbuf, u64 pbc,
void seg_pio_copy_mid(struct pio_buf *pbuf, const void *from, size_t nbytes);
void seg_pio_copy_end(struct pio_buf *pbuf);
+void seqfile_dump_sci(struct seq_file *s, u32 i,
+ struct send_context_info *sci);
+
#endif /* _PIO_H */
diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
index 1a016248039f..5344e8993b28 100644
--- a/drivers/infiniband/hw/hfi1/qp.c
+++ b/drivers/infiniband/hw/hfi1/qp.c
@@ -375,20 +375,18 @@ bool _hfi1_schedule_send(struct rvt_qp *qp)
static void qp_pio_drain(struct rvt_qp *qp)
{
- struct hfi1_ibdev *dev;
struct hfi1_qp_priv *priv = qp->priv;
if (!priv->s_sendcontext)
return;
- dev = to_idev(qp->ibqp.device);
while (iowait_pio_pending(&priv->s_iowait)) {
- write_seqlock_irq(&dev->iowait_lock);
+ write_seqlock_irq(&priv->s_sendcontext->waitlock);
hfi1_sc_wantpiobuf_intr(priv->s_sendcontext, 1);
- write_sequnlock_irq(&dev->iowait_lock);
+ write_sequnlock_irq(&priv->s_sendcontext->waitlock);
iowait_pio_drain(&priv->s_iowait);
- write_seqlock_irq(&dev->iowait_lock);
+ write_seqlock_irq(&priv->s_sendcontext->waitlock);
hfi1_sc_wantpiobuf_intr(priv->s_sendcontext, 0);
- write_sequnlock_irq(&dev->iowait_lock);
+ write_sequnlock_irq(&priv->s_sendcontext->waitlock);
}
}
@@ -459,7 +457,6 @@ static int iowait_sleep(
struct hfi1_qp_priv *priv;
unsigned long flags;
int ret = 0;
- struct hfi1_ibdev *dev;
qp = tx->qp;
priv = qp->priv;
@@ -472,9 +469,8 @@ static int iowait_sleep(
* buffer and undoing the side effects of the copy.
*/
/* Make a common routine? */
- dev = &sde->dd->verbs_dev;
list_add_tail(&stx->list, &wait->tx_head);
- write_seqlock(&dev->iowait_lock);
+ write_seqlock(&sde->waitlock);
if (sdma_progress(sde, seq, stx))
goto eagain;
if (list_empty(&priv->s_iowait.list)) {
@@ -485,11 +481,11 @@ static int iowait_sleep(
qp->s_flags |= RVT_S_WAIT_DMA_DESC;
iowait_queue(pkts_sent, &priv->s_iowait,
&sde->dmawait);
- priv->s_iowait.lock = &dev->iowait_lock;
+ priv->s_iowait.lock = &sde->waitlock;
trace_hfi1_qpsleep(qp, RVT_S_WAIT_DMA_DESC);
rvt_get_qp(qp);
}
- write_sequnlock(&dev->iowait_lock);
+ write_sequnlock(&sde->waitlock);
hfi1_qp_unbusy(qp, wait);
spin_unlock_irqrestore(&qp->s_lock, flags);
ret = -EBUSY;
@@ -499,7 +495,7 @@ static int iowait_sleep(
}
return ret;
eagain:
- write_sequnlock(&dev->iowait_lock);
+ write_sequnlock(&sde->waitlock);
spin_unlock_irqrestore(&qp->s_lock, flags);
list_del_init(&stx->list);
return -EAGAIN;
diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 188aa4f686a0..be603f35d7e4 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -1157,6 +1157,7 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah)
if (cmp_psn(wqe->lpsn, qp->s_sending_psn) >= 0 &&
cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)
break;
+ rvt_qp_wqe_unreserve(qp, wqe);
s_last = qp->s_last;
trace_hfi1_qp_send_completion(qp, wqe, s_last);
if (++s_last >= qp->s_size)
@@ -1209,6 +1210,7 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
u32 s_last;
rvt_put_swqe(wqe);
+ rvt_qp_wqe_unreserve(qp, wqe);
s_last = qp->s_last;
trace_hfi1_qp_send_completion(qp, wqe, s_last);
if (++s_last >= qp->s_size)
@@ -2049,8 +2051,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
struct ib_reth *reth;
unsigned long flags;
int ret;
- bool is_fecn = false;
- bool copy_last = false;
+ bool copy_last = false, fecn;
u32 rkey;
u8 extra_bytes = pad + packet->extra_byte + (SIZE_OF_CRC << 2);
@@ -2059,7 +2060,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
if (hfi1_ruc_check_hdr(ibp, packet))
return;
- is_fecn = process_ecn(qp, packet, false);
+ fecn = process_ecn(qp, packet);
/*
* Process responses (ACKs) before anything else. Note that the
@@ -2070,8 +2071,6 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) &&
opcode <= OP(ATOMIC_ACKNOWLEDGE)) {
rc_rcv_resp(packet);
- if (is_fecn)
- goto send_ack;
return;
}
@@ -2347,11 +2346,11 @@ send_last:
/* Schedule the send engine. */
qp->s_flags |= RVT_S_RESP_PENDING;
+ if (fecn)
+ qp->s_flags |= RVT_S_ECN;
hfi1_schedule_send(qp);
spin_unlock_irqrestore(&qp->s_lock, flags);
- if (is_fecn)
- goto send_ack;
return;
}
@@ -2413,11 +2412,11 @@ send_last:
/* Schedule the send engine. */
qp->s_flags |= RVT_S_RESP_PENDING;
+ if (fecn)
+ qp->s_flags |= RVT_S_ECN;
hfi1_schedule_send(qp);
spin_unlock_irqrestore(&qp->s_lock, flags);
- if (is_fecn)
- goto send_ack;
return;
}
@@ -2430,16 +2429,9 @@ send_last:
qp->r_ack_psn = psn;
qp->r_nak_state = 0;
/* Send an ACK if requested or required. */
- if (psn & IB_BTH_REQ_ACK) {
- if (packet->numpkt == 0) {
- rc_cancel_ack(qp);
- goto send_ack;
- }
- if (qp->r_adefered >= HFI1_PSN_CREDIT) {
- rc_cancel_ack(qp);
- goto send_ack;
- }
- if (unlikely(is_fecn)) {
+ if (psn & IB_BTH_REQ_ACK || fecn) {
+ if (packet->numpkt == 0 || fecn ||
+ qp->r_adefered >= HFI1_PSN_CREDIT) {
rc_cancel_ack(qp);
goto send_ack;
}
@@ -2480,7 +2472,7 @@ nack_acc:
qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
qp->r_ack_psn = qp->r_psn;
send_ack:
- hfi1_send_rc_ack(packet, is_fecn);
+ hfi1_send_rc_ack(packet, fecn);
}
void hfi1_rc_hdrerr(
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index 891d2386d1ca..b84356e1a4c1 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -1424,6 +1424,7 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
seqlock_init(&sde->head_lock);
spin_lock_init(&sde->senddmactrl_lock);
spin_lock_init(&sde->flushlist_lock);
+ seqlock_init(&sde->waitlock);
/* insure there is always a zero bit */
sde->ahg_bits = 0xfffffffe00000000ULL;
@@ -1758,7 +1759,6 @@ static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
struct iowait *wait, *nw;
struct iowait *waits[SDMA_WAIT_BATCH_SIZE];
uint i, n = 0, seq, max_idx = 0;
- struct hfi1_ibdev *dev = &sde->dd->verbs_dev;
u8 max_starved_cnt = 0;
#ifdef CONFIG_SDMA_VERBOSITY
@@ -1768,10 +1768,10 @@ static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
#endif
do {
- seq = read_seqbegin(&dev->iowait_lock);
+ seq = read_seqbegin(&sde->waitlock);
if (!list_empty(&sde->dmawait)) {
/* at least one item */
- write_seqlock(&dev->iowait_lock);
+ write_seqlock(&sde->waitlock);
/* Harvest waiters wanting DMA descriptors */
list_for_each_entry_safe(
wait,
@@ -1794,10 +1794,10 @@ static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
list_del_init(&wait->list);
waits[n++] = wait;
}
- write_sequnlock(&dev->iowait_lock);
+ write_sequnlock(&sde->waitlock);
break;
}
- } while (read_seqretry(&dev->iowait_lock, seq));
+ } while (read_seqretry(&sde->waitlock, seq));
/* Schedule the most starved one first */
if (n)
diff --git a/drivers/infiniband/hw/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h
index 6dc63d7c5685..1e2e40f79cb2 100644
--- a/drivers/infiniband/hw/hfi1/sdma.h
+++ b/drivers/infiniband/hw/hfi1/sdma.h
@@ -382,6 +382,7 @@ struct sdma_engine {
u64 progress_int_cnt;
/* private: */
+ seqlock_t waitlock;
struct list_head dmawait;
/* CONFIG SDMA for now, just blindly duplicate */
diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c
new file mode 100644
index 000000000000..da1ecb68a928
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/tid_rdma.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+/*
+ * Copyright(c) 2018 Intel Corporation.
+ *
+ */
+
+#include "hfi.h"
+#include "verbs.h"
+#include "tid_rdma.h"
+
+/**
+ * qp_to_rcd - determine the receive context used by a qp
+ * @qp - the qp
+ *
+ * This routine returns the receive context associated
+ * with a a qp's qpn.
+ *
+ * Returns the context.
+ */
+static struct hfi1_ctxtdata *qp_to_rcd(struct rvt_dev_info *rdi,
+ struct rvt_qp *qp)
+{
+ struct hfi1_ibdev *verbs_dev = container_of(rdi,
+ struct hfi1_ibdev,
+ rdi);
+ struct hfi1_devdata *dd = container_of(verbs_dev,
+ struct hfi1_devdata,
+ verbs_dev);
+ unsigned int ctxt;
+
+ if (qp->ibqp.qp_num == 0)
+ ctxt = 0;
+ else
+ ctxt = ((qp->ibqp.qp_num >> dd->qos_shift) %
+ (dd->n_krcv_queues - 1)) + 1;
+
+ return dd->rcd[ctxt];
+}
+
+int hfi1_qp_priv_init(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+ struct ib_qp_init_attr *init_attr)
+{
+ struct hfi1_qp_priv *qpriv = qp->priv;
+
+ qpriv->rcd = qp_to_rcd(rdi, qp);
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.h b/drivers/infiniband/hw/hfi1/tid_rdma.h
new file mode 100644
index 000000000000..6fcd3adcdcc3
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/tid_rdma.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
+/*
+ * Copyright(c) 2018 Intel Corporation.
+ *
+ */
+#ifndef HFI1_TID_RDMA_H
+#define HFI1_TID_RDMA_H
+
+int hfi1_qp_priv_init(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+ struct ib_qp_init_attr *init_attr);
+
+#endif /* HFI1_TID_RDMA_H */
+
diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c
index 6aca0c5a7f97..6ba47037c424 100644
--- a/drivers/infiniband/hw/hfi1/uc.c
+++ b/drivers/infiniband/hw/hfi1/uc.c
@@ -321,7 +321,7 @@ void hfi1_uc_rcv(struct hfi1_packet *packet)
if (hfi1_ruc_check_hdr(ibp, packet))
return;
- process_ecn(qp, packet, true);
+ process_ecn(qp, packet);
psn = ib_bth_get_psn(ohdr);
/* Compare the PSN verses the expected PSN. */
diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c
index 4baa8f4d49de..88242fe95eaa 100644
--- a/drivers/infiniband/hw/hfi1/ud.c
+++ b/drivers/infiniband/hw/hfi1/ud.c
@@ -51,6 +51,7 @@
#include "hfi.h"
#include "mad.h"
#include "verbs_txreq.h"
+#include "trace_ibhdrs.h"
#include "qp.h"
/* We support only two types - 9B and 16B for now */
@@ -656,18 +657,19 @@ void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp,
u32 bth0, plen, vl, hwords = 7;
u16 len;
u8 l4;
- struct hfi1_16b_header hdr;
+ struct hfi1_opa_header hdr;
struct ib_other_headers *ohdr;
struct pio_buf *pbuf;
struct send_context *ctxt = qp_to_send_context(qp, sc5);
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
u32 nwords;
+ hdr.hdr_type = HFI1_PKT_TYPE_16B;
/* Populate length */
nwords = ((hfi1_get_16b_padding(hwords << 2, 0) +
SIZE_OF_LT) >> 2) + SIZE_OF_CRC;
if (old_grh) {
- struct ib_grh *grh = &hdr.u.l.grh;
+ struct ib_grh *grh = &hdr.opah.u.l.grh;
grh->version_tclass_flow = old_grh->version_tclass_flow;
grh->paylen = cpu_to_be16(
@@ -675,11 +677,11 @@ void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp,
grh->hop_limit = 0xff;
grh->sgid = old_grh->dgid;
grh->dgid = old_grh->sgid;
- ohdr = &hdr.u.l.oth;
+ ohdr = &hdr.opah.u.l.oth;
l4 = OPA_16B_L4_IB_GLOBAL;
hwords += sizeof(struct ib_grh) / sizeof(u32);
} else {
- ohdr = &hdr.u.oth;
+ ohdr = &hdr.opah.u.oth;
l4 = OPA_16B_L4_IB_LOCAL;
}
@@ -693,7 +695,7 @@ void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp,
/* Convert dwords to flits */
len = (hwords + nwords) >> 1;
- hfi1_make_16b_hdr(&hdr, slid, dlid, len, pkey, 1, 0, l4, sc5);
+ hfi1_make_16b_hdr(&hdr.opah, slid, dlid, len, pkey, 1, 0, l4, sc5);
plen = 2 /* PBC */ + hwords + nwords;
pbc_flags |= PBC_PACKET_BYPASS | PBC_INSERT_BYPASS_ICRC;
@@ -701,9 +703,11 @@ void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp,
pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen);
if (ctxt) {
pbuf = sc_buffer_alloc(ctxt, plen, NULL, NULL);
- if (pbuf)
+ if (pbuf) {
+ trace_pio_output_ibhdr(ppd->dd, &hdr, sc5);
ppd->dd->pio_inline_send(ppd->dd, pbuf, pbc,
&hdr, hwords);
+ }
}
}
@@ -715,14 +719,15 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn,
u32 bth0, plen, vl, hwords = 5;
u16 lrh0;
u8 sl = ibp->sc_to_sl[sc5];
- struct ib_header hdr;
+ struct hfi1_opa_header hdr;
struct ib_other_headers *ohdr;
struct pio_buf *pbuf;
struct send_context *ctxt = qp_to_send_context(qp, sc5);
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
+ hdr.hdr_type = HFI1_PKT_TYPE_9B;
if (old_grh) {
- struct ib_grh *grh = &hdr.u.l.grh;
+ struct ib_grh *grh = &hdr.ibh.u.l.grh;
grh->version_tclass_flow = old_grh->version_tclass_flow;
grh->paylen = cpu_to_be16(
@@ -730,11 +735,11 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn,
grh->hop_limit = 0xff;
grh->sgid = old_grh->dgid;
grh->dgid = old_grh->sgid;
- ohdr = &hdr.u.l.oth;
+ ohdr = &hdr.ibh.u.l.oth;
lrh0 = HFI1_LRH_GRH;
hwords += sizeof(struct ib_grh) / sizeof(u32);
} else {
- ohdr = &hdr.u.oth;
+ ohdr = &hdr.ibh.u.oth;
lrh0 = HFI1_LRH_BTH;
}
@@ -746,16 +751,18 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn,
ohdr->bth[1] = cpu_to_be32(remote_qpn | (1 << IB_BECN_SHIFT));
ohdr->bth[2] = 0; /* PSN 0 */
- hfi1_make_ib_hdr(&hdr, lrh0, hwords + SIZE_OF_CRC, dlid, slid);
+ hfi1_make_ib_hdr(&hdr.ibh, lrh0, hwords + SIZE_OF_CRC, dlid, slid);
plen = 2 /* PBC */ + hwords;
pbc_flags |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT);
vl = sc_to_vlt(ppd->dd, sc5);
pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen);
if (ctxt) {
pbuf = sc_buffer_alloc(ctxt, plen, NULL, NULL);
- if (pbuf)
+ if (pbuf) {
+ trace_pio_output_ibhdr(ppd->dd, &hdr, sc5);
ppd->dd->pio_inline_send(ppd->dd, pbuf, pbc,
&hdr, hwords);
+ }
}
}
@@ -912,7 +919,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
src_qp = hfi1_16B_get_src_qpn(packet->mgmt);
}
- process_ecn(qp, packet, (opcode != IB_OPCODE_CNP));
+ process_ecn(qp, packet);
/*
* Get the number of bytes the message was padded by
* and drop incomplete packets.
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
index 3f0aadccd9f6..e5e7fad09f32 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c
@@ -130,7 +130,6 @@ static int defer_packet_queue(
{
struct hfi1_user_sdma_pkt_q *pq =
container_of(wait->iow, struct hfi1_user_sdma_pkt_q, busy);
- struct hfi1_ibdev *dev = &pq->dd->verbs_dev;
struct user_sdma_txreq *tx =
container_of(txreq, struct user_sdma_txreq, txreq);
@@ -144,10 +143,10 @@ static int defer_packet_queue(
* it is supposed to be enqueued.
*/
xchg(&pq->state, SDMA_PKT_Q_DEFERRED);
- write_seqlock(&dev->iowait_lock);
+ write_seqlock(&sde->waitlock);
if (list_empty(&pq->busy.list))
iowait_queue(pkts_sent, &pq->busy, &sde->dmawait);
- write_sequnlock(&dev->iowait_lock);
+ write_sequnlock(&sde->waitlock);
return -EBUSY;
eagain:
return -EAGAIN;
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index a365089a9305..ec582d86025f 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -765,7 +765,6 @@ static int pio_wait(struct rvt_qp *qp,
{
struct hfi1_qp_priv *priv = qp->priv;
struct hfi1_devdata *dd = sc->dd;
- struct hfi1_ibdev *dev = &dd->verbs_dev;
unsigned long flags;
int ret = 0;
@@ -777,7 +776,7 @@ static int pio_wait(struct rvt_qp *qp,
*/
spin_lock_irqsave(&qp->s_lock, flags);
if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
- write_seqlock(&dev->iowait_lock);
+ write_seqlock(&sc->waitlock);
list_add_tail(&ps->s_txreq->txreq.list,
&ps->wait->tx_head);
if (list_empty(&priv->s_iowait.list)) {
@@ -790,14 +789,14 @@ static int pio_wait(struct rvt_qp *qp,
was_empty = list_empty(&sc->piowait);
iowait_queue(ps->pkts_sent, &priv->s_iowait,
&sc->piowait);
- priv->s_iowait.lock = &dev->iowait_lock;
+ priv->s_iowait.lock = &sc->waitlock;
trace_hfi1_qpsleep(qp, RVT_S_WAIT_PIO);
rvt_get_qp(qp);
/* counting: only call wantpiobuf_intr if first user */
if (was_empty)
hfi1_sc_wantpiobuf_intr(sc, 1);
}
- write_sequnlock(&dev->iowait_lock);
+ write_sequnlock(&sc->waitlock);
hfi1_qp_unbusy(qp, ps->wait);
ret = -EBUSY;
}
@@ -919,6 +918,8 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
if (slen > len)
slen = len;
+ if (slen > ss->sge.sge_length)
+ slen = ss->sge.sge_length;
rvt_update_sge(ss, slen, false);
seg_pio_copy_mid(pbuf, addr, slen);
len -= slen;
@@ -1616,6 +1617,16 @@ static int get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
return count;
}
+static const struct ib_device_ops hfi1_dev_ops = {
+ .alloc_hw_stats = alloc_hw_stats,
+ .alloc_rdma_netdev = hfi1_vnic_alloc_rn,
+ .get_dev_fw_str = hfi1_get_dev_fw_str,
+ .get_hw_stats = get_hw_stats,
+ .modify_device = modify_device,
+ /* keep process mad in the driver */
+ .process_mad = hfi1_process_mad,
+};
+
/**
* hfi1_register_ib_device - register our device with the infiniband core
* @dd: the device data structure
@@ -1659,14 +1670,8 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
ibdev->owner = THIS_MODULE;
ibdev->phys_port_cnt = dd->num_pports;
ibdev->dev.parent = &dd->pcidev->dev;
- ibdev->modify_device = modify_device;
- ibdev->alloc_hw_stats = alloc_hw_stats;
- ibdev->get_hw_stats = get_hw_stats;
- ibdev->alloc_rdma_netdev = hfi1_vnic_alloc_rn;
- /* keep process mad in the driver */
- ibdev->process_mad = hfi1_process_mad;
- ibdev->get_dev_fw_str = hfi1_get_dev_fw_str;
+ ib_set_device_ops(ibdev, &hfi1_dev_ops);
strlcpy(ibdev->node_desc, init_utsname()->nodename,
sizeof(ibdev->node_desc));
@@ -1704,6 +1709,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
dd->verbs_dev.rdi.dparms.max_mad_size = OPA_MGMT_MAD_SIZE;
dd->verbs_dev.rdi.driver_f.qp_priv_alloc = qp_priv_alloc;
+ dd->verbs_dev.rdi.driver_f.qp_priv_init = hfi1_qp_priv_init;
dd->verbs_dev.rdi.driver_f.qp_priv_free = qp_priv_free;
dd->verbs_dev.rdi.driver_f.free_all_qps = free_all_qps;
dd->verbs_dev.rdi.driver_f.notify_qp_reset = notify_qp_reset;
diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h
index 64c9054db5f3..1ad0b14bdb3c 100644
--- a/drivers/infiniband/hw/hfi1/verbs.h
+++ b/drivers/infiniband/hw/hfi1/verbs.h
@@ -71,6 +71,7 @@ struct hfi1_devdata;
struct hfi1_packet;
#include "iowait.h"
+#include "tid_rdma.h"
#define HFI1_MAX_RDMA_ATOMIC 16
@@ -156,6 +157,7 @@ struct hfi1_qp_priv {
struct hfi1_ahg_info *s_ahg; /* ahg info for next header */
struct sdma_engine *s_sde; /* current sde */
struct send_context *s_sendcontext; /* current sendcontext */
+ struct hfi1_ctxtdata *rcd; /* QP's receive context */
u8 s_sc; /* SC[0..4] for next packet */
struct iowait s_iowait;
struct rvt_qp *owner;
diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c
index c9876d9e3cb9..a922db58be14 100644
--- a/drivers/infiniband/hw/hfi1/vnic_main.c
+++ b/drivers/infiniband/hw/hfi1/vnic_main.c
@@ -816,14 +816,14 @@ struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device,
size = sizeof(struct opa_vnic_rdma_netdev) + sizeof(*vinfo);
netdev = alloc_netdev_mqs(size, name, name_assign_type, setup,
- chip_sdma_engines(dd), dd->num_vnic_contexts);
+ dd->num_sdma, dd->num_vnic_contexts);
if (!netdev)
return ERR_PTR(-ENOMEM);
rn = netdev_priv(netdev);
vinfo = opa_vnic_dev_priv(netdev);
vinfo->dd = dd;
- vinfo->num_tx_q = chip_sdma_engines(dd);
+ vinfo->num_tx_q = dd->num_sdma;
vinfo->num_rx_q = dd->num_vnic_contexts;
vinfo->netdev = netdev;
rn->free_rdma_netdev = hfi1_vnic_free_rn;
diff --git a/drivers/infiniband/hw/hfi1/vnic_sdma.c b/drivers/infiniband/hw/hfi1/vnic_sdma.c
index 97bd940a056a..1f81c480e028 100644
--- a/drivers/infiniband/hw/hfi1/vnic_sdma.c
+++ b/drivers/infiniband/hw/hfi1/vnic_sdma.c
@@ -57,7 +57,6 @@
#define HFI1_VNIC_TXREQ_NAME_LEN 32
#define HFI1_VNIC_SDMA_DESC_WTRMRK 64
-#define HFI1_VNIC_SDMA_RETRY_COUNT 1
/*
* struct vnic_txreq - VNIC transmit descriptor
@@ -67,7 +66,6 @@
* @pad: pad buffer
* @plen: pad length
* @pbc_val: pbc value
- * @retry_count: tx retry count
*/
struct vnic_txreq {
struct sdma_txreq txreq;
@@ -77,8 +75,6 @@ struct vnic_txreq {
unsigned char pad[HFI1_VNIC_MAX_PAD];
u16 plen;
__le64 pbc_val;
-
- u32 retry_count;
};
static void vnic_sdma_complete(struct sdma_txreq *txreq,
@@ -196,7 +192,6 @@ int hfi1_vnic_send_dma(struct hfi1_devdata *dd, u8 q_idx,
ret = build_vnic_tx_desc(sde, tx, pbc);
if (unlikely(ret))
goto free_desc;
- tx->retry_count = 0;
ret = sdma_send_txreq(sde, iowait_get_ib_work(&vnic_sdma->wait),
&tx->txreq, vnic_sdma->pkts_sent);
@@ -237,18 +232,17 @@ static int hfi1_vnic_sdma_sleep(struct sdma_engine *sde,
{
struct hfi1_vnic_sdma *vnic_sdma =
container_of(wait->iow, struct hfi1_vnic_sdma, wait);
- struct hfi1_ibdev *dev = &vnic_sdma->dd->verbs_dev;
- struct vnic_txreq *tx = container_of(txreq, struct vnic_txreq, txreq);
- if (sdma_progress(sde, seq, txreq))
- if (tx->retry_count++ < HFI1_VNIC_SDMA_RETRY_COUNT)
- return -EAGAIN;
+ write_seqlock(&sde->waitlock);
+ if (sdma_progress(sde, seq, txreq)) {
+ write_sequnlock(&sde->waitlock);
+ return -EAGAIN;
+ }
vnic_sdma->state = HFI1_VNIC_SDMA_Q_DEFERRED;
- write_seqlock(&dev->iowait_lock);
if (list_empty(&vnic_sdma->wait.list))
iowait_queue(pkts_sent, wait->iow, &sde->dmawait);
- write_sequnlock(&dev->iowait_lock);
+ write_sequnlock(&sde->waitlock);
return -EBUSY;
}
diff --git a/drivers/infiniband/hw/hns/Makefile b/drivers/infiniband/hw/hns/Makefile
index cf03404b9d58..004c88b32e13 100644
--- a/drivers/infiniband/hw/hns/Makefile
+++ b/drivers/infiniband/hw/hns/Makefile
@@ -7,7 +7,7 @@ ccflags-y := -Idrivers/net/ethernet/hisilicon/hns3
obj-$(CONFIG_INFINIBAND_HNS) += hns-roce.o
hns-roce-objs := hns_roce_main.o hns_roce_cmd.o hns_roce_pd.o \
hns_roce_ah.o hns_roce_hem.o hns_roce_mr.o hns_roce_qp.o \
- hns_roce_cq.o hns_roce_alloc.o hns_roce_db.o
+ hns_roce_cq.o hns_roce_alloc.o hns_roce_db.o hns_roce_srq.o
obj-$(CONFIG_INFINIBAND_HNS_HIP06) += hns-roce-hw-v1.o
hns-roce-hw-v1-objs := hns_roce_hw_v1.o
obj-$(CONFIG_INFINIBAND_HNS_HIP08) += hns-roce-hw-v2.o
diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c
index 9990dc9eb96a..b3c8c45ec1e3 100644
--- a/drivers/infiniband/hw/hns/hns_roce_ah.c
+++ b/drivers/infiniband/hw/hns/hns_roce_ah.c
@@ -41,6 +41,7 @@
struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd,
struct rdma_ah_attr *ah_attr,
+ u32 flags,
struct ib_udata *udata)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ibpd->device);
@@ -110,7 +111,7 @@ int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr)
return 0;
}
-int hns_roce_destroy_ah(struct ib_ah *ah)
+int hns_roce_destroy_ah(struct ib_ah *ah, u32 flags)
{
kfree(to_hr_ah(ah));
diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c
index 46f65f9f59d0..6300033a448f 100644
--- a/drivers/infiniband/hw/hns/hns_roce_alloc.c
+++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c
@@ -239,6 +239,8 @@ err_free:
void hns_roce_cleanup_bitmap(struct hns_roce_dev *hr_dev)
{
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ)
+ hns_roce_cleanup_srq_table(hr_dev);
hns_roce_cleanup_qp_table(hr_dev);
hns_roce_cleanup_cq_table(hr_dev);
hns_roce_cleanup_mr_table(hr_dev);
diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.h b/drivers/infiniband/hw/hns/hns_roce_cmd.h
index 9549ae51a0dd..927701df5eff 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cmd.h
+++ b/drivers/infiniband/hw/hns/hns_roce_cmd.h
@@ -120,6 +120,10 @@ enum {
HNS_ROCE_CMD_SQD2RTS_QP = 0x20,
HNS_ROCE_CMD_2RST_QP = 0x21,
HNS_ROCE_CMD_QUERY_QP = 0x22,
+ HNS_ROCE_CMD_SW2HW_SRQ = 0x70,
+ HNS_ROCE_CMD_MODIFY_SRQC = 0x72,
+ HNS_ROCE_CMD_QUERY_SRQC = 0x73,
+ HNS_ROCE_CMD_HW2SW_SRQ = 0x74,
};
int hns_roce_cmd_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param,
diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h
index 93d4b4ec002d..f4c92a7ac1ce 100644
--- a/drivers/infiniband/hw/hns/hns_roce_common.h
+++ b/drivers/infiniband/hw/hns/hns_roce_common.h
@@ -376,9 +376,6 @@
#define ROCEE_RX_CMQ_TAIL_REG 0x07024
#define ROCEE_RX_CMQ_HEAD_REG 0x07028
-#define ROCEE_VF_MB_CFG0_REG 0x40
-#define ROCEE_VF_MB_STATUS_REG 0x58
-
#define ROCEE_VF_EQ_DB_CFG0_REG 0x238
#define ROCEE_VF_EQ_DB_CFG1_REG 0x23C
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index d39bdfdb5de9..509e467843f6 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -111,6 +111,9 @@
#define PAGES_SHIFT_24 24
#define PAGES_SHIFT_32 32
+#define HNS_ROCE_IDX_QUE_ENTRY_SZ 4
+#define SRQ_DB_REG 0x230
+
enum {
HNS_ROCE_SUPPORT_RQ_RECORD_DB = 1 << 0,
HNS_ROCE_SUPPORT_SQ_RECORD_DB = 1 << 1,
@@ -196,6 +199,7 @@ enum {
HNS_ROCE_CAP_FLAG_RQ_INLINE = BIT(2),
HNS_ROCE_CAP_FLAG_RECORD_DB = BIT(3),
HNS_ROCE_CAP_FLAG_SQ_RECORD_DB = BIT(4),
+ HNS_ROCE_CAP_FLAG_SRQ = BIT(5),
HNS_ROCE_CAP_FLAG_MW = BIT(7),
HNS_ROCE_CAP_FLAG_FRMR = BIT(8),
HNS_ROCE_CAP_FLAG_ATOMIC = BIT(10),
@@ -204,6 +208,8 @@ enum {
enum hns_roce_mtt_type {
MTT_TYPE_WQE,
MTT_TYPE_CQE,
+ MTT_TYPE_SRQWQE,
+ MTT_TYPE_IDX
};
enum {
@@ -339,6 +345,10 @@ struct hns_roce_mr_table {
struct hns_roce_hem_table mtpt_table;
struct hns_roce_buddy mtt_cqe_buddy;
struct hns_roce_hem_table mtt_cqe_table;
+ struct hns_roce_buddy mtt_srqwqe_buddy;
+ struct hns_roce_hem_table mtt_srqwqe_table;
+ struct hns_roce_buddy mtt_idx_buddy;
+ struct hns_roce_hem_table mtt_idx_table;
};
struct hns_roce_wq {
@@ -429,9 +439,37 @@ struct hns_roce_cq {
struct completion free;
};
+struct hns_roce_idx_que {
+ struct hns_roce_buf idx_buf;
+ int entry_sz;
+ u32 buf_size;
+ struct ib_umem *umem;
+ struct hns_roce_mtt mtt;
+ u64 *bitmap;
+};
+
struct hns_roce_srq {
struct ib_srq ibsrq;
- int srqn;
+ void (*event)(struct hns_roce_srq *srq, enum hns_roce_event event);
+ unsigned long srqn;
+ int max;
+ int max_gs;
+ int wqe_shift;
+ void __iomem *db_reg_l;
+
+ atomic_t refcount;
+ struct completion free;
+
+ struct hns_roce_buf buf;
+ u64 *wrid;
+ struct ib_umem *umem;
+ struct hns_roce_mtt mtt;
+ struct hns_roce_idx_que idx_que;
+ spinlock_t lock;
+ int head;
+ int tail;
+ u16 wqe_ctr;
+ struct mutex mutex;
};
struct hns_roce_uar_table {
@@ -453,6 +491,12 @@ struct hns_roce_cq_table {
struct hns_roce_hem_table table;
};
+struct hns_roce_srq_table {
+ struct hns_roce_bitmap bitmap;
+ struct xarray xa;
+ struct hns_roce_hem_table table;
+};
+
struct hns_roce_raq_table {
struct hns_roce_buf_list *e_raq_buf;
};
@@ -603,6 +647,12 @@ struct hns_roce_aeqe {
} qp_event;
struct {
+ __le32 srq;
+ u32 rsv0;
+ u32 rsv1;
+ } srq_event;
+
+ struct {
__le32 cq;
u32 rsv0;
u32 rsv1;
@@ -679,7 +729,12 @@ struct hns_roce_caps {
u32 max_extend_sg;
int num_qps; /* 256k */
int reserved_qps;
+ u32 max_srq_sg;
+ int num_srqs;
u32 max_wqes; /* 16k */
+ u32 max_srqs;
+ u32 max_srq_wrs;
+ u32 max_srq_sges;
u32 max_sq_desc_sz; /* 64 */
u32 max_rq_desc_sz; /* 64 */
u32 max_srq_desc_sz;
@@ -690,12 +745,16 @@ struct hns_roce_caps {
int min_cqes;
u32 min_wqes;
int reserved_cqs;
+ int reserved_srqs;
+ u32 max_srqwqes;
int num_aeq_vectors; /* 1 */
int num_comp_vectors;
int num_other_vectors;
int num_mtpts;
u32 num_mtt_segs;
u32 num_cqe_segs;
+ u32 num_srqwqe_segs;
+ u32 num_idx_segs;
int reserved_mrws;
int reserved_uars;
int num_pds;
@@ -709,6 +768,8 @@ struct hns_roce_caps {
int irrl_entry_sz;
int trrl_entry_sz;
int cqc_entry_sz;
+ int srqc_entry_sz;
+ int idx_entry_sz;
u32 pbl_ba_pg_sz;
u32 pbl_buf_pg_sz;
u32 pbl_hop_num;
@@ -737,6 +798,12 @@ struct hns_roce_caps {
u32 cqe_ba_pg_sz;
u32 cqe_buf_pg_sz;
u32 cqe_hop_num;
+ u32 srqwqe_ba_pg_sz;
+ u32 srqwqe_buf_pg_sz;
+ u32 srqwqe_hop_num;
+ u32 idx_ba_pg_sz;
+ u32 idx_buf_pg_sz;
+ u32 idx_hop_num;
u32 eqe_ba_pg_sz;
u32 eqe_buf_pg_sz;
u32 eqe_hop_num;
@@ -805,6 +872,19 @@ struct hns_roce_hw {
int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period);
int (*init_eq)(struct hns_roce_dev *hr_dev);
void (*cleanup_eq)(struct hns_roce_dev *hr_dev);
+ void (*write_srqc)(struct hns_roce_dev *hr_dev,
+ struct hns_roce_srq *srq, u32 pdn, u16 xrcd, u32 cqn,
+ void *mb_buf, u64 *mtts_wqe, u64 *mtts_idx,
+ dma_addr_t dma_handle_wqe,
+ dma_addr_t dma_handle_idx);
+ int (*modify_srq)(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr,
+ enum ib_srq_attr_mask srq_attr_mask,
+ struct ib_udata *udata);
+ int (*query_srq)(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
+ int (*post_srq_recv)(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr);
+ const struct ib_device_ops *hns_roce_dev_ops;
+ const struct ib_device_ops *hns_roce_dev_srq_ops;
};
struct hns_roce_dev {
@@ -839,6 +919,7 @@ struct hns_roce_dev {
struct hns_roce_uar_table uar_table;
struct hns_roce_mr_table mr_table;
struct hns_roce_cq_table cq_table;
+ struct hns_roce_srq_table srq_table;
struct hns_roce_qp_table qp_table;
struct hns_roce_eq_table eq_table;
@@ -951,12 +1032,14 @@ int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev);
int hns_roce_init_eq_table(struct hns_roce_dev *hr_dev);
int hns_roce_init_cq_table(struct hns_roce_dev *hr_dev);
int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev);
+int hns_roce_init_srq_table(struct hns_roce_dev *hr_dev);
void hns_roce_cleanup_pd_table(struct hns_roce_dev *hr_dev);
void hns_roce_cleanup_mr_table(struct hns_roce_dev *hr_dev);
void hns_roce_cleanup_eq_table(struct hns_roce_dev *hr_dev);
void hns_roce_cleanup_cq_table(struct hns_roce_dev *hr_dev);
void hns_roce_cleanup_qp_table(struct hns_roce_dev *hr_dev);
+void hns_roce_cleanup_srq_table(struct hns_roce_dev *hr_dev);
int hns_roce_bitmap_alloc(struct hns_roce_bitmap *bitmap, unsigned long *obj);
void hns_roce_bitmap_free(struct hns_roce_bitmap *bitmap, unsigned long obj,
@@ -973,9 +1056,10 @@ void hns_roce_bitmap_free_range(struct hns_roce_bitmap *bitmap,
struct ib_ah *hns_roce_create_ah(struct ib_pd *pd,
struct rdma_ah_attr *ah_attr,
+ u32 flags,
struct ib_udata *udata);
int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
-int hns_roce_destroy_ah(struct ib_ah *ah);
+int hns_roce_destroy_ah(struct ib_ah *ah, u32 flags);
struct ib_pd *hns_roce_alloc_pd(struct ib_device *ib_dev,
struct ib_ucontext *context,
@@ -1011,6 +1095,14 @@ int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct,
int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev,
struct hns_roce_mtt *mtt, struct ib_umem *umem);
+struct ib_srq *hns_roce_create_srq(struct ib_pd *pd,
+ struct ib_srq_init_attr *srq_init_attr,
+ struct ib_udata *udata);
+int hns_roce_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr,
+ enum ib_srq_attr_mask srq_attr_mask,
+ struct ib_udata *udata);
+int hns_roce_destroy_srq(struct ib_srq *ibsrq);
+
struct ib_qp *hns_roce_create_qp(struct ib_pd *ib_pd,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata);
@@ -1052,6 +1144,7 @@ void hns_roce_free_db(struct hns_roce_dev *hr_dev, struct hns_roce_db *db);
void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn);
void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type);
void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type);
+void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type);
int hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index);
int hns_roce_init(struct hns_roce_dev *hr_dev);
void hns_roce_exit(struct hns_roce_dev *hr_dev);
diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c
index f6faefed96e8..4cdbcafa5915 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hem.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hem.c
@@ -46,7 +46,9 @@ bool hns_roce_check_whether_mhop(struct hns_roce_dev *hr_dev, u32 type)
(hr_dev->caps.cqc_hop_num && type == HEM_TYPE_CQC) ||
(hr_dev->caps.srqc_hop_num && type == HEM_TYPE_SRQC) ||
(hr_dev->caps.cqe_hop_num && type == HEM_TYPE_CQE) ||
- (hr_dev->caps.mtt_hop_num && type == HEM_TYPE_MTT))
+ (hr_dev->caps.mtt_hop_num && type == HEM_TYPE_MTT) ||
+ (hr_dev->caps.srqwqe_hop_num && type == HEM_TYPE_SRQWQE) ||
+ (hr_dev->caps.idx_hop_num && type == HEM_TYPE_IDX))
return true;
return false;
@@ -147,6 +149,22 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev,
mhop->ba_l0_num = mhop->bt_chunk_size / 8;
mhop->hop_num = hr_dev->caps.cqe_hop_num;
break;
+ case HEM_TYPE_SRQWQE:
+ mhop->buf_chunk_size = 1 << (hr_dev->caps.srqwqe_buf_pg_sz
+ + PAGE_SHIFT);
+ mhop->bt_chunk_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz
+ + PAGE_SHIFT);
+ mhop->ba_l0_num = mhop->bt_chunk_size / 8;
+ mhop->hop_num = hr_dev->caps.srqwqe_hop_num;
+ break;
+ case HEM_TYPE_IDX:
+ mhop->buf_chunk_size = 1 << (hr_dev->caps.idx_buf_pg_sz
+ + PAGE_SHIFT);
+ mhop->bt_chunk_size = 1 << (hr_dev->caps.idx_ba_pg_sz
+ + PAGE_SHIFT);
+ mhop->ba_l0_num = mhop->bt_chunk_size / 8;
+ mhop->hop_num = hr_dev->caps.idx_hop_num;
+ break;
default:
dev_err(dev, "Table %d not support multi-hop addressing!\n",
table->type);
@@ -906,6 +924,18 @@ int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev,
bt_chunk_size = buf_chunk_size;
hop_num = hr_dev->caps.cqe_hop_num;
break;
+ case HEM_TYPE_SRQWQE:
+ buf_chunk_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz
+ + PAGE_SHIFT);
+ bt_chunk_size = buf_chunk_size;
+ hop_num = hr_dev->caps.srqwqe_hop_num;
+ break;
+ case HEM_TYPE_IDX:
+ buf_chunk_size = 1 << (hr_dev->caps.idx_ba_pg_sz
+ + PAGE_SHIFT);
+ bt_chunk_size = buf_chunk_size;
+ hop_num = hr_dev->caps.idx_hop_num;
+ break;
default:
dev_err(dev,
"Table %d not support to init hem table here!\n",
@@ -1041,6 +1071,15 @@ void hns_roce_cleanup_hem_table(struct hns_roce_dev *hr_dev,
void hns_roce_cleanup_hem(struct hns_roce_dev *hr_dev)
{
+ if ((hr_dev->caps.num_idx_segs))
+ hns_roce_cleanup_hem_table(hr_dev,
+ &hr_dev->mr_table.mtt_idx_table);
+ if (hr_dev->caps.num_srqwqe_segs)
+ hns_roce_cleanup_hem_table(hr_dev,
+ &hr_dev->mr_table.mtt_srqwqe_table);
+ if (hr_dev->caps.srqc_entry_sz)
+ hns_roce_cleanup_hem_table(hr_dev,
+ &hr_dev->srq_table.table);
hns_roce_cleanup_hem_table(hr_dev, &hr_dev->cq_table.table);
if (hr_dev->caps.trrl_entry_sz)
hns_roce_cleanup_hem_table(hr_dev,
diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.h b/drivers/infiniband/hw/hns/hns_roce_hem.h
index e8850d59e780..a650278c6fbd 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hem.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hem.h
@@ -48,6 +48,8 @@ enum {
/* UNMAP HEM */
HEM_TYPE_MTT,
HEM_TYPE_CQE,
+ HEM_TYPE_SRQWQE,
+ HEM_TYPE_IDX,
HEM_TYPE_IRRL,
HEM_TYPE_TRRL,
};
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index ca05810c92dc..b74c742b000c 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -3926,7 +3926,7 @@ int hns_roce_v1_destroy_qp(struct ib_qp *ibqp)
struct hns_roce_qp_work *qp_work;
struct hns_roce_v1_priv *priv;
struct hns_roce_cq *send_cq, *recv_cq;
- int is_user = !!ibqp->pd->uobject;
+ bool is_user = ibqp->uobject;
int is_timeout = 0;
int ret;
@@ -4793,6 +4793,16 @@ static void hns_roce_v1_cleanup_eq_table(struct hns_roce_dev *hr_dev)
kfree(eq_table->eq);
}
+static const struct ib_device_ops hns_roce_v1_dev_ops = {
+ .destroy_qp = hns_roce_v1_destroy_qp,
+ .modify_cq = hns_roce_v1_modify_cq,
+ .poll_cq = hns_roce_v1_poll_cq,
+ .post_recv = hns_roce_v1_post_recv,
+ .post_send = hns_roce_v1_post_send,
+ .query_qp = hns_roce_v1_query_qp,
+ .req_notify_cq = hns_roce_v1_req_notify_cq,
+};
+
static const struct hns_roce_hw hns_roce_hw_v1 = {
.reset = hns_roce_v1_reset,
.hw_profile = hns_roce_v1_profile,
@@ -4818,6 +4828,7 @@ static const struct hns_roce_hw hns_roce_hw_v1 = {
.destroy_cq = hns_roce_v1_destroy_cq,
.init_eq = hns_roce_v1_init_eq_table,
.cleanup_eq = hns_roce_v1_cleanup_eq_table,
+ .hns_roce_dev_ops = &hns_roce_v1_dev_ops,
};
static const struct of_device_id hns_roce_of_match[] = {
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index 3beb1523e17c..3a669451cf86 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -1082,6 +1082,33 @@ static int hns_roce_query_pf_resource(struct hns_roce_dev *hr_dev)
return 0;
}
+static int hns_roce_set_vf_switch_param(struct hns_roce_dev *hr_dev,
+ int vf_id)
+{
+ struct hns_roce_cmq_desc desc;
+ struct hns_roce_vf_switch *swt;
+ int ret;
+
+ swt = (struct hns_roce_vf_switch *)desc.data;
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_SWITCH_PARAMETER_CFG, true);
+ swt->rocee_sel |= cpu_to_le16(HNS_ICL_SWITCH_CMD_ROCEE_SEL);
+ roce_set_field(swt->fun_id,
+ VF_SWITCH_DATA_FUN_ID_VF_ID_M,
+ VF_SWITCH_DATA_FUN_ID_VF_ID_S,
+ vf_id);
+ ret = hns_roce_cmq_send(hr_dev, &desc, 1);
+ if (ret)
+ return ret;
+ desc.flag =
+ cpu_to_le16(HNS_ROCE_CMD_FLAG_NO_INTR | HNS_ROCE_CMD_FLAG_IN);
+ desc.flag &= cpu_to_le16(~HNS_ROCE_CMD_FLAG_WR);
+ roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_LPBK_S, 1);
+ roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_LCL_LPBK_S, 1);
+ roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_DST_OVRD_S, 1);
+
+ return hns_roce_cmq_send(hr_dev, &desc, 1);
+}
+
static int hns_roce_alloc_vf_resource(struct hns_roce_dev *hr_dev)
{
struct hns_roce_cmq_desc desc[2];
@@ -1269,6 +1296,15 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
return ret;
}
+ if (hr_dev->pci_dev->revision == 0x21) {
+ ret = hns_roce_set_vf_switch_param(hr_dev, 0);
+ if (ret) {
+ dev_err(hr_dev->dev,
+ "Set function switch param fail, ret = %d.\n",
+ ret);
+ return ret;
+ }
+ }
hr_dev->vendor_part_id = hr_dev->pci_dev->device;
hr_dev->sys_image_guid = be64_to_cpu(hr_dev->ib_dev.node_guid);
@@ -1276,11 +1312,14 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
caps->num_qps = HNS_ROCE_V2_MAX_QP_NUM;
caps->max_wqes = HNS_ROCE_V2_MAX_WQE_NUM;
caps->num_cqs = HNS_ROCE_V2_MAX_CQ_NUM;
+ caps->num_srqs = HNS_ROCE_V2_MAX_SRQ_NUM;
caps->max_cqes = HNS_ROCE_V2_MAX_CQE_NUM;
+ caps->max_srqwqes = HNS_ROCE_V2_MAX_SRQWQE_NUM;
caps->max_sq_sg = HNS_ROCE_V2_MAX_SQ_SGE_NUM;
caps->max_extend_sg = HNS_ROCE_V2_MAX_EXTEND_SGE_NUM;
caps->max_rq_sg = HNS_ROCE_V2_MAX_RQ_SGE_NUM;
caps->max_sq_inline = HNS_ROCE_V2_MAX_SQ_INLINE;
+ caps->max_srq_sg = HNS_ROCE_V2_MAX_SRQ_SGE_NUM;
caps->num_uars = HNS_ROCE_V2_UAR_NUM;
caps->phy_num_uars = HNS_ROCE_V2_PHY_UAR_NUM;
caps->num_aeq_vectors = HNS_ROCE_V2_AEQE_VEC_NUM;
@@ -1289,6 +1328,8 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
caps->num_mtpts = HNS_ROCE_V2_MAX_MTPT_NUM;
caps->num_mtt_segs = HNS_ROCE_V2_MAX_MTT_SEGS;
caps->num_cqe_segs = HNS_ROCE_V2_MAX_CQE_SEGS;
+ caps->num_srqwqe_segs = HNS_ROCE_V2_MAX_SRQWQE_SEGS;
+ caps->num_idx_segs = HNS_ROCE_V2_MAX_IDX_SEGS;
caps->num_pds = HNS_ROCE_V2_MAX_PD_NUM;
caps->max_qp_init_rdma = HNS_ROCE_V2_MAX_QP_INIT_RDMA;
caps->max_qp_dest_rdma = HNS_ROCE_V2_MAX_QP_DEST_RDMA;
@@ -1299,8 +1340,10 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
caps->irrl_entry_sz = HNS_ROCE_V2_IRRL_ENTRY_SZ;
caps->trrl_entry_sz = HNS_ROCE_V2_TRRL_ENTRY_SZ;
caps->cqc_entry_sz = HNS_ROCE_V2_CQC_ENTRY_SZ;
+ caps->srqc_entry_sz = HNS_ROCE_V2_SRQC_ENTRY_SZ;
caps->mtpt_entry_sz = HNS_ROCE_V2_MTPT_ENTRY_SZ;
caps->mtt_entry_sz = HNS_ROCE_V2_MTT_ENTRY_SZ;
+ caps->idx_entry_sz = 4;
caps->cq_entry_sz = HNS_ROCE_V2_CQE_ENTRY_SIZE;
caps->page_size_cap = HNS_ROCE_V2_PAGE_SIZE_SUPPORTED;
caps->reserved_lkey = 0;
@@ -1308,6 +1351,7 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
caps->reserved_mrws = 1;
caps->reserved_uars = 0;
caps->reserved_cqs = 0;
+ caps->reserved_srqs = 0;
caps->reserved_qps = HNS_ROCE_V2_RSV_QPS;
caps->qpc_ba_pg_sz = 0;
@@ -1331,6 +1375,12 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
caps->cqe_ba_pg_sz = 0;
caps->cqe_buf_pg_sz = 0;
caps->cqe_hop_num = HNS_ROCE_CQE_HOP_NUM;
+ caps->srqwqe_ba_pg_sz = 0;
+ caps->srqwqe_buf_pg_sz = 0;
+ caps->srqwqe_hop_num = HNS_ROCE_SRQWQE_HOP_NUM;
+ caps->idx_ba_pg_sz = 0;
+ caps->idx_buf_pg_sz = 0;
+ caps->idx_hop_num = HNS_ROCE_IDX_HOP_NUM;
caps->eqe_ba_pg_sz = 0;
caps->eqe_buf_pg_sz = 0;
caps->eqe_hop_num = HNS_ROCE_EQE_HOP_NUM;
@@ -1354,8 +1404,13 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
caps->local_ca_ack_delay = 0;
caps->max_mtu = IB_MTU_4096;
+ caps->max_srqs = HNS_ROCE_V2_MAX_SRQ;
+ caps->max_srq_wrs = HNS_ROCE_V2_MAX_SRQ_WR;
+ caps->max_srq_sges = HNS_ROCE_V2_MAX_SRQ_SGE;
+
if (hr_dev->pci_dev->revision == 0x21)
- caps->flags |= HNS_ROCE_CAP_FLAG_ATOMIC;
+ caps->flags |= HNS_ROCE_CAP_FLAG_ATOMIC |
+ HNS_ROCE_CAP_FLAG_SRQ;
ret = hns_roce_v2_set_bt(hr_dev);
if (ret)
@@ -1587,30 +1642,62 @@ static void hns_roce_v2_exit(struct hns_roce_dev *hr_dev)
hns_roce_free_link_table(hr_dev, &priv->tsq);
}
+static int hns_roce_query_mbox_status(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_cmq_desc desc;
+ struct hns_roce_mbox_status *mb_st =
+ (struct hns_roce_mbox_status *)desc.data;
+ enum hns_roce_cmd_return_status status;
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_QUERY_MB_ST, true);
+
+ status = hns_roce_cmq_send(hr_dev, &desc, 1);
+ if (status)
+ return status;
+
+ return cpu_to_le32(mb_st->mb_status_hw_run);
+}
+
static int hns_roce_v2_cmd_pending(struct hns_roce_dev *hr_dev)
{
- u32 status = readl(hr_dev->reg_base + ROCEE_VF_MB_STATUS_REG);
+ u32 status = hns_roce_query_mbox_status(hr_dev);
return status >> HNS_ROCE_HW_RUN_BIT_SHIFT;
}
static int hns_roce_v2_cmd_complete(struct hns_roce_dev *hr_dev)
{
- u32 status = readl(hr_dev->reg_base + ROCEE_VF_MB_STATUS_REG);
+ u32 status = hns_roce_query_mbox_status(hr_dev);
return status & HNS_ROCE_HW_MB_STATUS_MASK;
}
+static int hns_roce_mbox_post(struct hns_roce_dev *hr_dev, u64 in_param,
+ u64 out_param, u32 in_modifier, u8 op_modifier,
+ u16 op, u16 token, int event)
+{
+ struct hns_roce_cmq_desc desc;
+ struct hns_roce_post_mbox *mb = (struct hns_roce_post_mbox *)desc.data;
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_POST_MB, false);
+
+ mb->in_param_l = cpu_to_le64(in_param);
+ mb->in_param_h = cpu_to_le64(in_param) >> 32;
+ mb->out_param_l = cpu_to_le64(out_param);
+ mb->out_param_h = cpu_to_le64(out_param) >> 32;
+ mb->cmd_tag = cpu_to_le32(in_modifier << 8 | op);
+ mb->token_event_en = cpu_to_le32(event << 16 | token);
+
+ return hns_roce_cmq_send(hr_dev, &desc, 1);
+}
+
static int hns_roce_v2_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param,
u64 out_param, u32 in_modifier, u8 op_modifier,
u16 op, u16 token, int event)
{
struct device *dev = hr_dev->dev;
- u32 __iomem *hcr = (u32 __iomem *)(hr_dev->reg_base +
- ROCEE_VF_MB_CFG0_REG);
unsigned long end;
- u32 val0 = 0;
- u32 val1 = 0;
+ int ret;
end = msecs_to_jiffies(HNS_ROCE_V2_GO_BIT_TIMEOUT_MSECS) + jiffies;
while (hns_roce_v2_cmd_pending(hr_dev)) {
@@ -1622,27 +1709,12 @@ static int hns_roce_v2_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param,
cond_resched();
}
- roce_set_field(val0, HNS_ROCE_VF_MB4_TAG_MASK,
- HNS_ROCE_VF_MB4_TAG_SHIFT, in_modifier);
- roce_set_field(val0, HNS_ROCE_VF_MB4_CMD_MASK,
- HNS_ROCE_VF_MB4_CMD_SHIFT, op);
- roce_set_field(val1, HNS_ROCE_VF_MB5_EVENT_MASK,
- HNS_ROCE_VF_MB5_EVENT_SHIFT, event);
- roce_set_field(val1, HNS_ROCE_VF_MB5_TOKEN_MASK,
- HNS_ROCE_VF_MB5_TOKEN_SHIFT, token);
-
- writeq(in_param, hcr + 0);
- writeq(out_param, hcr + 2);
-
- /* Memory barrier */
- wmb();
-
- writel(val0, hcr + 4);
- writel(val1, hcr + 5);
-
- mmiowb();
+ ret = hns_roce_mbox_post(hr_dev, in_param, out_param, in_modifier,
+ op_modifier, op, token, event);
+ if (ret)
+ dev_err(dev, "Post mailbox fail(%d)\n", ret);
- return 0;
+ return ret;
}
static int hns_roce_v2_chk_mbox(struct hns_roce_dev *hr_dev,
@@ -2007,6 +2079,27 @@ static struct hns_roce_v2_cqe *next_cqe_sw_v2(struct hns_roce_cq *hr_cq)
return get_sw_cqe_v2(hr_cq, hr_cq->cons_index);
}
+static void *get_srq_wqe(struct hns_roce_srq *srq, int n)
+{
+ return hns_roce_buf_offset(&srq->buf, n << srq->wqe_shift);
+}
+
+static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, int wqe_index)
+{
+ u32 bitmap_num;
+ int bit_num;
+
+ /* always called with interrupts disabled. */
+ spin_lock(&srq->lock);
+
+ bitmap_num = wqe_index / (sizeof(u64) * 8);
+ bit_num = wqe_index % (sizeof(u64) * 8);
+ srq->idx_que.bitmap[bitmap_num] |= (1ULL << bit_num);
+ srq->tail++;
+
+ spin_unlock(&srq->lock);
+}
+
static void hns_roce_v2_cq_set_ci(struct hns_roce_cq *hr_cq, u32 cons_index)
{
*hr_cq->set_ci_db = cons_index & 0xffffff;
@@ -2018,6 +2111,7 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn,
struct hns_roce_v2_cqe *cqe, *dest;
u32 prod_index;
int nfreed = 0;
+ int wqe_index;
u8 owner_bit;
for (prod_index = hr_cq->cons_index; get_sw_cqe_v2(hr_cq, prod_index);
@@ -2035,7 +2129,13 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn,
if ((roce_get_field(cqe->byte_16, V2_CQE_BYTE_16_LCL_QPN_M,
V2_CQE_BYTE_16_LCL_QPN_S) &
HNS_ROCE_V2_CQE_QPN_MASK) == qpn) {
- /* In v1 engine, not support SRQ */
+ if (srq &&
+ roce_get_bit(cqe->byte_4, V2_CQE_BYTE_4_S_R_S)) {
+ wqe_index = roce_get_field(cqe->byte_4,
+ V2_CQE_BYTE_4_WQE_INDX_M,
+ V2_CQE_BYTE_4_WQE_INDX_S);
+ hns_roce_free_srq_wqe(srq, wqe_index);
+ }
++nfreed;
} else if (nfreed) {
dest = get_cqe_v2(hr_cq, (prod_index + nfreed) &
@@ -2212,6 +2312,7 @@ static int hns_roce_handle_recv_inl_wqe(struct hns_roce_v2_cqe *cqe,
static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
struct hns_roce_qp **cur_qp, struct ib_wc *wc)
{
+ struct hns_roce_srq *srq = NULL;
struct hns_roce_dev *hr_dev;
struct hns_roce_v2_cqe *cqe;
struct hns_roce_qp *hr_qp;
@@ -2254,6 +2355,37 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
wc->qp = &(*cur_qp)->ibqp;
wc->vendor_err = 0;
+ if (is_send) {
+ wq = &(*cur_qp)->sq;
+ if ((*cur_qp)->sq_signal_bits) {
+ /*
+ * If sg_signal_bit is 1,
+ * firstly tail pointer updated to wqe
+ * which current cqe correspond to
+ */
+ wqe_ctr = (u16)roce_get_field(cqe->byte_4,
+ V2_CQE_BYTE_4_WQE_INDX_M,
+ V2_CQE_BYTE_4_WQE_INDX_S);
+ wq->tail += (wqe_ctr - (u16)wq->tail) &
+ (wq->wqe_cnt - 1);
+ }
+
+ wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
+ ++wq->tail;
+ } else if ((*cur_qp)->ibqp.srq) {
+ srq = to_hr_srq((*cur_qp)->ibqp.srq);
+ wqe_ctr = le16_to_cpu(roce_get_field(cqe->byte_4,
+ V2_CQE_BYTE_4_WQE_INDX_M,
+ V2_CQE_BYTE_4_WQE_INDX_S));
+ wc->wr_id = srq->wrid[wqe_ctr];
+ hns_roce_free_srq_wqe(srq, wqe_ctr);
+ } else {
+ /* Update tail pointer, record wr_id */
+ wq = &(*cur_qp)->rq;
+ wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
+ ++wq->tail;
+ }
+
status = roce_get_field(cqe->byte_4, V2_CQE_BYTE_4_STATUS_M,
V2_CQE_BYTE_4_STATUS_S);
switch (status & HNS_ROCE_V2_CQE_STATUS_MASK) {
@@ -2373,23 +2505,6 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
wc->status = IB_WC_GENERAL_ERR;
break;
}
-
- wq = &(*cur_qp)->sq;
- if ((*cur_qp)->sq_signal_bits) {
- /*
- * If sg_signal_bit is 1,
- * firstly tail pointer updated to wqe
- * which current cqe correspond to
- */
- wqe_ctr = (u16)roce_get_field(cqe->byte_4,
- V2_CQE_BYTE_4_WQE_INDX_M,
- V2_CQE_BYTE_4_WQE_INDX_S);
- wq->tail += (wqe_ctr - (u16)wq->tail) &
- (wq->wqe_cnt - 1);
- }
-
- wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
- ++wq->tail;
} else {
/* RQ correspond to CQE */
wc->byte_len = le32_to_cpu(cqe->byte_cnt);
@@ -2434,11 +2549,6 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
return -EAGAIN;
}
- /* Update tail pointer, record wr_id */
- wq = &(*cur_qp)->rq;
- wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
- ++wq->tail;
-
wc->sl = (u8)roce_get_field(cqe->byte_32, V2_CQE_BYTE_32_SL_M,
V2_CQE_BYTE_32_SL_S);
wc->src_qp = (u8)roce_get_field(cqe->byte_32,
@@ -2747,6 +2857,8 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
roce_set_field(context->byte_20_smac_sgid_idx,
V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S,
+ (hr_qp->ibqp.qp_type == IB_QPT_XRC_INI ||
+ hr_qp->ibqp.qp_type == IB_QPT_XRC_TGT || ibqp->srq) ? 0 :
ilog2((unsigned int)hr_qp->rq.wqe_cnt));
roce_set_field(qpc_mask->byte_20_smac_sgid_idx,
V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, 0);
@@ -3088,6 +3200,8 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp,
roce_set_field(context->byte_20_smac_sgid_idx,
V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S,
+ (hr_qp->ibqp.qp_type == IB_QPT_XRC_INI ||
+ hr_qp->ibqp.qp_type == IB_QPT_XRC_TGT || ibqp->srq) ? 0 :
ilog2((unsigned int)hr_qp->rq.wqe_cnt));
roce_set_field(qpc_mask->byte_20_smac_sgid_idx,
V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, 0);
@@ -3601,6 +3715,21 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp,
return 0;
}
+static inline bool hns_roce_v2_check_qp_stat(enum ib_qp_state cur_state,
+ enum ib_qp_state new_state)
+{
+
+ if ((cur_state != IB_QPS_RESET &&
+ (new_state == IB_QPS_ERR || new_state == IB_QPS_RESET)) ||
+ ((cur_state == IB_QPS_RTS || cur_state == IB_QPS_SQD) &&
+ (new_state == IB_QPS_RTS || new_state == IB_QPS_SQD)) ||
+ (cur_state == IB_QPS_SQE && new_state == IB_QPS_RTS))
+ return true;
+
+ return false;
+
+}
+
static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
const struct ib_qp_attr *attr,
int attr_mask, enum ib_qp_state cur_state,
@@ -3626,6 +3755,7 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
*/
memset(qpc_mask, 0xff, sizeof(*qpc_mask));
if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
+ memset(qpc_mask, 0, sizeof(*qpc_mask));
modify_qp_reset_to_init(ibqp, attr, attr_mask, context,
qpc_mask);
} else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) {
@@ -3641,21 +3771,7 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
qpc_mask);
if (ret)
goto out;
- } else if ((cur_state == IB_QPS_RTS && new_state == IB_QPS_RTS) ||
- (cur_state == IB_QPS_SQE && new_state == IB_QPS_RTS) ||
- (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD) ||
- (cur_state == IB_QPS_SQD && new_state == IB_QPS_SQD) ||
- (cur_state == IB_QPS_SQD && new_state == IB_QPS_RTS) ||
- (cur_state == IB_QPS_INIT && new_state == IB_QPS_RESET) ||
- (cur_state == IB_QPS_RTR && new_state == IB_QPS_RESET) ||
- (cur_state == IB_QPS_RTS && new_state == IB_QPS_RESET) ||
- (cur_state == IB_QPS_ERR && new_state == IB_QPS_RESET) ||
- (cur_state == IB_QPS_INIT && new_state == IB_QPS_ERR) ||
- (cur_state == IB_QPS_RTR && new_state == IB_QPS_ERR) ||
- (cur_state == IB_QPS_RTS && new_state == IB_QPS_ERR) ||
- (cur_state == IB_QPS_SQD && new_state == IB_QPS_ERR) ||
- (cur_state == IB_QPS_SQE && new_state == IB_QPS_ERR) ||
- (cur_state == IB_QPS_ERR && new_state == IB_QPS_ERR)) {
+ } else if (hns_roce_v2_check_qp_stat(cur_state, new_state)) {
/* Nothing */
;
} else {
@@ -3789,6 +3905,11 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC))
set_access_flags(hr_qp, context, qpc_mask, attr, attr_mask);
+ roce_set_bit(context->byte_108_rx_reqepsn, V2_QPC_BYTE_108_INV_CREDIT_S,
+ ibqp->srq ? 1 : 0);
+ roce_set_bit(qpc_mask->byte_108_rx_reqepsn,
+ V2_QPC_BYTE_108_INV_CREDIT_S, 0);
+
/* Every status migrate must change state */
roce_set_field(context->byte_60_qpst_tempid, V2_QPC_BYTE_60_QP_ST_M,
V2_QPC_BYTE_60_QP_ST_S, new_state);
@@ -4012,7 +4133,7 @@ out:
static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev,
struct hns_roce_qp *hr_qp,
- int is_user)
+ bool is_user)
{
struct hns_roce_cq *send_cq, *recv_cq;
struct device *dev = hr_dev->dev;
@@ -4074,7 +4195,8 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev,
hns_roce_free_db(hr_dev, &hr_qp->rdb);
}
- if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) {
+ if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) &&
+ hr_qp->rq.wqe_cnt) {
kfree(hr_qp->rq_inl_buf.wqe_list[0].sg_list);
kfree(hr_qp->rq_inl_buf.wqe_list);
}
@@ -4088,7 +4210,7 @@ static int hns_roce_v2_destroy_qp(struct ib_qp *ibqp)
struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
int ret;
- ret = hns_roce_v2_destroy_qp_common(hr_dev, hr_qp, !!ibqp->pd->uobject);
+ ret = hns_roce_v2_destroy_qp_common(hr_dev, hr_qp, ibqp->uobject);
if (ret) {
dev_err(hr_dev->dev, "Destroy qp failed(%d)\n", ret);
return ret;
@@ -4384,6 +4506,7 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
int aeqe_found = 0;
int event_type;
int sub_type;
+ u32 srqn;
u32 qpn;
u32 cqn;
@@ -4406,6 +4529,9 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
cqn = roce_get_field(aeqe->event.cq_event.cq,
HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M,
HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S);
+ srqn = roce_get_field(aeqe->event.srq_event.srq,
+ HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M,
+ HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S);
switch (event_type) {
case HNS_ROCE_EVENT_TYPE_PATH_MIG:
@@ -4413,13 +4539,14 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
case HNS_ROCE_EVENT_TYPE_COMM_EST:
case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
+ case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
hns_roce_qp_event(hr_dev, qpn, event_type);
break;
case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
- case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR:
+ hns_roce_srq_event(hr_dev, srqn, event_type);
break;
case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
@@ -4964,13 +5091,12 @@ static int hns_roce_mhop_alloc_eq(struct hns_roce_dev *hr_dev,
eqe_alloc = i * (buf_chk_sz / eq->eqe_size);
size = (eq->entries - eqe_alloc) * eq->eqe_size;
}
- eq->buf[i] = dma_alloc_coherent(dev, size,
+ eq->buf[i] = dma_zalloc_coherent(dev, size,
&(eq->buf_dma[i]),
GFP_KERNEL);
if (!eq->buf[i])
goto err_dma_alloc_buf;
- memset(eq->buf[i], 0, size);
*(eq->bt_l0 + i) = eq->buf_dma[i];
eq_buf_cnt++;
@@ -5000,13 +5126,12 @@ static int hns_roce_mhop_alloc_eq(struct hns_roce_dev *hr_dev,
size = (eq->entries - eqe_alloc)
* eq->eqe_size;
}
- eq->buf[idx] = dma_alloc_coherent(dev, size,
+ eq->buf[idx] = dma_zalloc_coherent(dev, size,
&(eq->buf_dma[idx]),
GFP_KERNEL);
if (!eq->buf[idx])
goto err_dma_alloc_buf;
- memset(eq->buf[idx], 0, size);
*(eq->bt_l1[i] + j) = eq->buf_dma[idx];
eq_buf_cnt++;
@@ -5116,7 +5241,7 @@ static int hns_roce_v2_create_eq(struct hns_roce_dev *hr_dev,
goto free_cmd_mbox;
}
- eq->buf_list->buf = dma_alloc_coherent(dev, buf_chk_sz,
+ eq->buf_list->buf = dma_zalloc_coherent(dev, buf_chk_sz,
&(eq->buf_list->map),
GFP_KERNEL);
if (!eq->buf_list->buf) {
@@ -5124,7 +5249,6 @@ static int hns_roce_v2_create_eq(struct hns_roce_dev *hr_dev,
goto err_alloc_buf;
}
- memset(eq->buf_list->buf, 0, buf_chk_sz);
} else {
ret = hns_roce_mhop_alloc_eq(hr_dev, eq);
if (ret) {
@@ -5332,6 +5456,300 @@ static void hns_roce_v2_cleanup_eq_table(struct hns_roce_dev *hr_dev)
destroy_workqueue(hr_dev->irq_workq);
}
+static void hns_roce_v2_write_srqc(struct hns_roce_dev *hr_dev,
+ struct hns_roce_srq *srq, u32 pdn, u16 xrcd,
+ u32 cqn, void *mb_buf, u64 *mtts_wqe,
+ u64 *mtts_idx, dma_addr_t dma_handle_wqe,
+ dma_addr_t dma_handle_idx)
+{
+ struct hns_roce_srq_context *srq_context;
+
+ srq_context = mb_buf;
+ memset(srq_context, 0, sizeof(*srq_context));
+
+ roce_set_field(srq_context->byte_4_srqn_srqst, SRQC_BYTE_4_SRQ_ST_M,
+ SRQC_BYTE_4_SRQ_ST_S, 1);
+
+ roce_set_field(srq_context->byte_4_srqn_srqst,
+ SRQC_BYTE_4_SRQ_WQE_HOP_NUM_M,
+ SRQC_BYTE_4_SRQ_WQE_HOP_NUM_S,
+ (hr_dev->caps.srqwqe_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 :
+ hr_dev->caps.srqwqe_hop_num));
+ roce_set_field(srq_context->byte_4_srqn_srqst,
+ SRQC_BYTE_4_SRQ_SHIFT_M, SRQC_BYTE_4_SRQ_SHIFT_S,
+ ilog2(srq->max));
+
+ roce_set_field(srq_context->byte_4_srqn_srqst, SRQC_BYTE_4_SRQN_M,
+ SRQC_BYTE_4_SRQN_S, srq->srqn);
+
+ roce_set_field(srq_context->byte_8_limit_wl, SRQC_BYTE_8_SRQ_LIMIT_WL_M,
+ SRQC_BYTE_8_SRQ_LIMIT_WL_S, 0);
+
+ roce_set_field(srq_context->byte_12_xrcd, SRQC_BYTE_12_SRQ_XRCD_M,
+ SRQC_BYTE_12_SRQ_XRCD_S, xrcd);
+
+ srq_context->wqe_bt_ba = cpu_to_le32((u32)(dma_handle_wqe >> 3));
+
+ roce_set_field(srq_context->byte_24_wqe_bt_ba,
+ SRQC_BYTE_24_SRQ_WQE_BT_BA_M,
+ SRQC_BYTE_24_SRQ_WQE_BT_BA_S,
+ cpu_to_le32(dma_handle_wqe >> 35));
+
+ roce_set_field(srq_context->byte_28_rqws_pd, SRQC_BYTE_28_PD_M,
+ SRQC_BYTE_28_PD_S, pdn);
+ roce_set_field(srq_context->byte_28_rqws_pd, SRQC_BYTE_28_RQWS_M,
+ SRQC_BYTE_28_RQWS_S, srq->max_gs <= 0 ? 0 :
+ fls(srq->max_gs - 1));
+
+ srq_context->idx_bt_ba = (u32)(dma_handle_idx >> 3);
+ srq_context->idx_bt_ba = cpu_to_le32(srq_context->idx_bt_ba);
+ roce_set_field(srq_context->rsv_idx_bt_ba,
+ SRQC_BYTE_36_SRQ_IDX_BT_BA_M,
+ SRQC_BYTE_36_SRQ_IDX_BT_BA_S,
+ cpu_to_le32(dma_handle_idx >> 35));
+
+ srq_context->idx_cur_blk_addr = (u32)(mtts_idx[0] >> PAGE_ADDR_SHIFT);
+ srq_context->idx_cur_blk_addr =
+ cpu_to_le32(srq_context->idx_cur_blk_addr);
+ roce_set_field(srq_context->byte_44_idxbufpgsz_addr,
+ SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_M,
+ SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_S,
+ cpu_to_le32((mtts_idx[0]) >> (32 + PAGE_ADDR_SHIFT)));
+ roce_set_field(srq_context->byte_44_idxbufpgsz_addr,
+ SRQC_BYTE_44_SRQ_IDX_HOP_NUM_M,
+ SRQC_BYTE_44_SRQ_IDX_HOP_NUM_S,
+ hr_dev->caps.idx_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 :
+ hr_dev->caps.idx_hop_num);
+
+ roce_set_field(srq_context->byte_44_idxbufpgsz_addr,
+ SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_M,
+ SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_S,
+ hr_dev->caps.idx_ba_pg_sz);
+ roce_set_field(srq_context->byte_44_idxbufpgsz_addr,
+ SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_M,
+ SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_S,
+ hr_dev->caps.idx_buf_pg_sz);
+
+ srq_context->idx_nxt_blk_addr = (u32)(mtts_idx[1] >> PAGE_ADDR_SHIFT);
+ srq_context->idx_nxt_blk_addr =
+ cpu_to_le32(srq_context->idx_nxt_blk_addr);
+ roce_set_field(srq_context->rsv_idxnxtblkaddr,
+ SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_M,
+ SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_S,
+ cpu_to_le32((mtts_idx[1]) >> (32 + PAGE_ADDR_SHIFT)));
+ roce_set_field(srq_context->byte_56_xrc_cqn,
+ SRQC_BYTE_56_SRQ_XRC_CQN_M, SRQC_BYTE_56_SRQ_XRC_CQN_S,
+ cqn);
+ roce_set_field(srq_context->byte_56_xrc_cqn,
+ SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_M,
+ SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_S,
+ hr_dev->caps.srqwqe_ba_pg_sz + PG_SHIFT_OFFSET);
+ roce_set_field(srq_context->byte_56_xrc_cqn,
+ SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_M,
+ SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_S,
+ hr_dev->caps.srqwqe_buf_pg_sz + PG_SHIFT_OFFSET);
+
+ roce_set_bit(srq_context->db_record_addr_record_en,
+ SRQC_BYTE_60_SRQ_RECORD_EN_S, 0);
+}
+
+static int hns_roce_v2_modify_srq(struct ib_srq *ibsrq,
+ struct ib_srq_attr *srq_attr,
+ enum ib_srq_attr_mask srq_attr_mask,
+ struct ib_udata *udata)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device);
+ struct hns_roce_srq *srq = to_hr_srq(ibsrq);
+ struct hns_roce_srq_context *srq_context;
+ struct hns_roce_srq_context *srqc_mask;
+ struct hns_roce_cmd_mailbox *mailbox;
+ int ret;
+
+ if (srq_attr_mask & IB_SRQ_LIMIT) {
+ if (srq_attr->srq_limit >= srq->max)
+ return -EINVAL;
+
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ srq_context = mailbox->buf;
+ srqc_mask = (struct hns_roce_srq_context *)mailbox->buf + 1;
+
+ memset(srqc_mask, 0xff, sizeof(*srqc_mask));
+
+ roce_set_field(srq_context->byte_8_limit_wl,
+ SRQC_BYTE_8_SRQ_LIMIT_WL_M,
+ SRQC_BYTE_8_SRQ_LIMIT_WL_S, srq_attr->srq_limit);
+ roce_set_field(srqc_mask->byte_8_limit_wl,
+ SRQC_BYTE_8_SRQ_LIMIT_WL_M,
+ SRQC_BYTE_8_SRQ_LIMIT_WL_S, 0);
+
+ ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, srq->srqn, 0,
+ HNS_ROCE_CMD_MODIFY_SRQC,
+ HNS_ROCE_CMD_TIMEOUT_MSECS);
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+ if (ret) {
+ dev_err(hr_dev->dev,
+ "MODIFY SRQ Failed to cmd mailbox.\n");
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+int hns_roce_v2_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device);
+ struct hns_roce_srq *srq = to_hr_srq(ibsrq);
+ struct hns_roce_srq_context *srq_context;
+ struct hns_roce_cmd_mailbox *mailbox;
+ int limit_wl;
+ int ret;
+
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ srq_context = mailbox->buf;
+ ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, srq->srqn, 0,
+ HNS_ROCE_CMD_QUERY_SRQC,
+ HNS_ROCE_CMD_TIMEOUT_MSECS);
+ if (ret) {
+ dev_err(hr_dev->dev, "QUERY SRQ cmd process error\n");
+ goto out;
+ }
+
+ limit_wl = roce_get_field(srq_context->byte_8_limit_wl,
+ SRQC_BYTE_8_SRQ_LIMIT_WL_M,
+ SRQC_BYTE_8_SRQ_LIMIT_WL_S);
+
+ attr->srq_limit = limit_wl;
+ attr->max_wr = srq->max - 1;
+ attr->max_sge = srq->max_gs;
+
+ memcpy(srq_context, mailbox->buf, sizeof(*srq_context));
+
+out:
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+ return ret;
+}
+
+static int find_empty_entry(struct hns_roce_idx_que *idx_que)
+{
+ int bit_num;
+ int i;
+
+ /* bitmap[i] is set zero if all bits are allocated */
+ for (i = 0; idx_que->bitmap[i] == 0; ++i)
+ ;
+ bit_num = ffs(idx_que->bitmap[i]);
+ idx_que->bitmap[i] &= ~(1ULL << (bit_num - 1));
+
+ return i * sizeof(u64) * 8 + (bit_num - 1);
+}
+
+static void fill_idx_queue(struct hns_roce_idx_que *idx_que,
+ int cur_idx, int wqe_idx)
+{
+ unsigned int *addr;
+
+ addr = (unsigned int *)hns_roce_buf_offset(&idx_que->idx_buf,
+ cur_idx * idx_que->entry_sz);
+ *addr = wqe_idx;
+}
+
+static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq,
+ const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr)
+{
+ struct hns_roce_srq *srq = to_hr_srq(ibsrq);
+ struct hns_roce_v2_wqe_data_seg *dseg;
+ struct hns_roce_v2_db srq_db;
+ unsigned long flags;
+ int ret = 0;
+ int wqe_idx;
+ void *wqe;
+ int nreq;
+ int ind;
+ int i;
+
+ spin_lock_irqsave(&srq->lock, flags);
+
+ ind = srq->head & (srq->max - 1);
+
+ for (nreq = 0; wr; ++nreq, wr = wr->next) {
+ if (unlikely(wr->num_sge > srq->max_gs)) {
+ ret = -EINVAL;
+ *bad_wr = wr;
+ break;
+ }
+
+ if (unlikely(srq->head == srq->tail)) {
+ ret = -ENOMEM;
+ *bad_wr = wr;
+ break;
+ }
+
+ wqe_idx = find_empty_entry(&srq->idx_que);
+ fill_idx_queue(&srq->idx_que, ind, wqe_idx);
+ wqe = get_srq_wqe(srq, wqe_idx);
+ dseg = (struct hns_roce_v2_wqe_data_seg *)wqe;
+
+ for (i = 0; i < wr->num_sge; ++i) {
+ dseg[i].len = cpu_to_le32(wr->sg_list[i].length);
+ dseg[i].lkey = cpu_to_le32(wr->sg_list[i].lkey);
+ dseg[i].addr = cpu_to_le64(wr->sg_list[i].addr);
+ }
+
+ if (i < srq->max_gs) {
+ dseg->len = 0;
+ dseg->lkey = cpu_to_le32(0x100);
+ dseg->addr = 0;
+ }
+
+ srq->wrid[wqe_idx] = wr->wr_id;
+ ind = (ind + 1) & (srq->max - 1);
+ }
+
+ if (likely(nreq)) {
+ srq->head += nreq;
+
+ /*
+ * Make sure that descriptors are written before
+ * doorbell record.
+ */
+ wmb();
+
+ srq_db.byte_4 = HNS_ROCE_V2_SRQ_DB << 24 | srq->srqn;
+ srq_db.parameter = srq->head;
+
+ hns_roce_write64_k((__le32 *)&srq_db, srq->db_reg_l);
+
+ }
+
+ spin_unlock_irqrestore(&srq->lock, flags);
+
+ return ret;
+}
+
+static const struct ib_device_ops hns_roce_v2_dev_ops = {
+ .destroy_qp = hns_roce_v2_destroy_qp,
+ .modify_cq = hns_roce_v2_modify_cq,
+ .poll_cq = hns_roce_v2_poll_cq,
+ .post_recv = hns_roce_v2_post_recv,
+ .post_send = hns_roce_v2_post_send,
+ .query_qp = hns_roce_v2_query_qp,
+ .req_notify_cq = hns_roce_v2_req_notify_cq,
+};
+
+static const struct ib_device_ops hns_roce_v2_dev_srq_ops = {
+ .modify_srq = hns_roce_v2_modify_srq,
+ .post_srq_recv = hns_roce_v2_post_srq_recv,
+ .query_srq = hns_roce_v2_query_srq,
+};
+
static const struct hns_roce_hw hns_roce_hw_v2 = {
.cmq_init = hns_roce_v2_cmq_init,
.cmq_exit = hns_roce_v2_cmq_exit,
@@ -5359,6 +5777,12 @@ static const struct hns_roce_hw hns_roce_hw_v2 = {
.poll_cq = hns_roce_v2_poll_cq,
.init_eq = hns_roce_v2_init_eq_table,
.cleanup_eq = hns_roce_v2_cleanup_eq_table,
+ .write_srqc = hns_roce_v2_write_srqc,
+ .modify_srq = hns_roce_v2_modify_srq,
+ .query_srq = hns_roce_v2_query_srq,
+ .post_srq_recv = hns_roce_v2_post_srq_recv,
+ .hns_roce_dev_ops = &hns_roce_v2_dev_ops,
+ .hns_roce_dev_srq_ops = &hns_roce_v2_dev_srq_ops,
};
static const struct pci_device_id hns_roce_hw_v2_pci_tbl[] = {
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
index 8bc820635bbd..b72d0443c835 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -46,10 +46,16 @@
#define HNS_ROCE_V2_MAX_QP_NUM 0x2000
#define HNS_ROCE_V2_MAX_WQE_NUM 0x8000
+#define HNS_ROCE_V2_MAX_SRQ 0x100000
+#define HNS_ROCE_V2_MAX_SRQ_WR 0x8000
+#define HNS_ROCE_V2_MAX_SRQ_SGE 0x100
#define HNS_ROCE_V2_MAX_CQ_NUM 0x8000
+#define HNS_ROCE_V2_MAX_SRQ_NUM 0x100000
#define HNS_ROCE_V2_MAX_CQE_NUM 0x10000
+#define HNS_ROCE_V2_MAX_SRQWQE_NUM 0x8000
#define HNS_ROCE_V2_MAX_RQ_SGE_NUM 0x100
#define HNS_ROCE_V2_MAX_SQ_SGE_NUM 0xff
+#define HNS_ROCE_V2_MAX_SRQ_SGE_NUM 0x100
#define HNS_ROCE_V2_MAX_EXTEND_SGE_NUM 0x200000
#define HNS_ROCE_V2_MAX_SQ_INLINE 0x20
#define HNS_ROCE_V2_UAR_NUM 256
@@ -61,6 +67,8 @@
#define HNS_ROCE_V2_MAX_MTPT_NUM 0x8000
#define HNS_ROCE_V2_MAX_MTT_SEGS 0x1000000
#define HNS_ROCE_V2_MAX_CQE_SEGS 0x1000000
+#define HNS_ROCE_V2_MAX_SRQWQE_SEGS 0x1000000
+#define HNS_ROCE_V2_MAX_IDX_SEGS 0x1000000
#define HNS_ROCE_V2_MAX_PD_NUM 0x1000000
#define HNS_ROCE_V2_MAX_QP_INIT_RDMA 128
#define HNS_ROCE_V2_MAX_QP_DEST_RDMA 128
@@ -71,6 +79,7 @@
#define HNS_ROCE_V2_IRRL_ENTRY_SZ 64
#define HNS_ROCE_V2_TRRL_ENTRY_SZ 48
#define HNS_ROCE_V2_CQC_ENTRY_SZ 64
+#define HNS_ROCE_V2_SRQC_ENTRY_SZ 64
#define HNS_ROCE_V2_MTPT_ENTRY_SZ 64
#define HNS_ROCE_V2_MTT_ENTRY_SZ 64
#define HNS_ROCE_V2_CQE_ENTRY_SIZE 32
@@ -84,8 +93,10 @@
#define HNS_ROCE_CONTEXT_HOP_NUM 1
#define HNS_ROCE_MTT_HOP_NUM 1
#define HNS_ROCE_CQE_HOP_NUM 1
+#define HNS_ROCE_SRQWQE_HOP_NUM 1
#define HNS_ROCE_PBL_HOP_NUM 2
#define HNS_ROCE_EQE_HOP_NUM 2
+#define HNS_ROCE_IDX_HOP_NUM 1
#define HNS_ROCE_V2_GID_INDEX_NUM 256
@@ -113,6 +124,8 @@
((step_idx == 0 && hop_num == HNS_ROCE_HOP_NUM_0) || \
(step_idx == 1 && hop_num == 1) || \
(step_idx == 2 && hop_num == 2))
+#define HNS_ICL_SWITCH_CMD_ROCEE_SEL_SHIFT 0
+#define HNS_ICL_SWITCH_CMD_ROCEE_SEL BIT(HNS_ICL_SWITCH_CMD_ROCEE_SEL_SHIFT)
#define CMD_CSQ_DESC_NUM 1024
#define CMD_CRQ_DESC_NUM 1024
@@ -213,7 +226,10 @@ enum hns_roce_opcode_type {
HNS_ROCE_OPC_CFG_TMOUT_LLM = 0x8404,
HNS_ROCE_OPC_CFG_SGID_TB = 0x8500,
HNS_ROCE_OPC_CFG_SMAC_TB = 0x8501,
+ HNS_ROCE_OPC_POST_MB = 0x8504,
+ HNS_ROCE_OPC_QUERY_MB_ST = 0x8505,
HNS_ROCE_OPC_CFG_BT_ATTR = 0x8506,
+ HNS_SWITCH_PARAMETER_CFG = 0x1033,
};
enum {
@@ -325,6 +341,90 @@ struct hns_roce_v2_cq_context {
#define V2_CQC_BYTE_64_SE_CQE_IDX_S 0
#define V2_CQC_BYTE_64_SE_CQE_IDX_M GENMASK(23, 0)
+struct hns_roce_srq_context {
+ __le32 byte_4_srqn_srqst;
+ __le32 byte_8_limit_wl;
+ __le32 byte_12_xrcd;
+ __le32 byte_16_pi_ci;
+ __le32 wqe_bt_ba;
+ __le32 byte_24_wqe_bt_ba;
+ __le32 byte_28_rqws_pd;
+ __le32 idx_bt_ba;
+ __le32 rsv_idx_bt_ba;
+ __le32 idx_cur_blk_addr;
+ __le32 byte_44_idxbufpgsz_addr;
+ __le32 idx_nxt_blk_addr;
+ __le32 rsv_idxnxtblkaddr;
+ __le32 byte_56_xrc_cqn;
+ __le32 db_record_addr_record_en;
+ __le32 db_record_addr;
+};
+
+#define SRQC_BYTE_4_SRQ_ST_S 0
+#define SRQC_BYTE_4_SRQ_ST_M GENMASK(1, 0)
+
+#define SRQC_BYTE_4_SRQ_WQE_HOP_NUM_S 2
+#define SRQC_BYTE_4_SRQ_WQE_HOP_NUM_M GENMASK(3, 2)
+
+#define SRQC_BYTE_4_SRQ_SHIFT_S 4
+#define SRQC_BYTE_4_SRQ_SHIFT_M GENMASK(7, 4)
+
+#define SRQC_BYTE_4_SRQN_S 8
+#define SRQC_BYTE_4_SRQN_M GENMASK(31, 8)
+
+#define SRQC_BYTE_8_SRQ_LIMIT_WL_S 0
+#define SRQC_BYTE_8_SRQ_LIMIT_WL_M GENMASK(15, 0)
+
+#define SRQC_BYTE_12_SRQ_XRCD_S 0
+#define SRQC_BYTE_12_SRQ_XRCD_M GENMASK(23, 0)
+
+#define SRQC_BYTE_16_SRQ_PRODUCER_IDX_S 0
+#define SRQC_BYTE_16_SRQ_PRODUCER_IDX_M GENMASK(15, 0)
+
+#define SRQC_BYTE_16_SRQ_CONSUMER_IDX_S 0
+#define SRQC_BYTE_16_SRQ_CONSUMER_IDX_M GENMASK(31, 16)
+
+#define SRQC_BYTE_24_SRQ_WQE_BT_BA_S 0
+#define SRQC_BYTE_24_SRQ_WQE_BT_BA_M GENMASK(28, 0)
+
+#define SRQC_BYTE_28_PD_S 0
+#define SRQC_BYTE_28_PD_M GENMASK(23, 0)
+
+#define SRQC_BYTE_28_RQWS_S 24
+#define SRQC_BYTE_28_RQWS_M GENMASK(27, 24)
+
+#define SRQC_BYTE_36_SRQ_IDX_BT_BA_S 0
+#define SRQC_BYTE_36_SRQ_IDX_BT_BA_M GENMASK(28, 0)
+
+#define SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_S 0
+#define SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_M GENMASK(19, 0)
+
+#define SRQC_BYTE_44_SRQ_IDX_HOP_NUM_S 22
+#define SRQC_BYTE_44_SRQ_IDX_HOP_NUM_M GENMASK(23, 22)
+
+#define SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_S 24
+#define SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_M GENMASK(27, 24)
+
+#define SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_S 28
+#define SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_M GENMASK(31, 28)
+
+#define SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_S 0
+#define SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_M GENMASK(19, 0)
+
+#define SRQC_BYTE_56_SRQ_XRC_CQN_S 0
+#define SRQC_BYTE_56_SRQ_XRC_CQN_M GENMASK(23, 0)
+
+#define SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_S 24
+#define SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_M GENMASK(27, 24)
+
+#define SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_S 28
+#define SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_M GENMASK(31, 28)
+
+#define SRQC_BYTE_60_SRQ_RECORD_EN_S 0
+
+#define SRQC_BYTE_60_SRQ_DB_RECORD_ADDR_S 1
+#define SRQC_BYTE_60_SRQ_DB_RECORD_ADDR_M GENMASK(31, 1)
+
enum{
V2_MPT_ST_VALID = 0x1,
V2_MPT_ST_FREE = 0x2,
@@ -1289,6 +1389,36 @@ struct hns_roce_vf_res_b {
#define VF_RES_B_DATA_3_VF_SL_NUM_S 16
#define VF_RES_B_DATA_3_VF_SL_NUM_M GENMASK(19, 16)
+struct hns_roce_vf_switch {
+ __le32 rocee_sel;
+ __le32 fun_id;
+ __le32 cfg;
+ __le32 resv1;
+ __le32 resv2;
+ __le32 resv3;
+};
+
+#define VF_SWITCH_DATA_FUN_ID_VF_ID_S 3
+#define VF_SWITCH_DATA_FUN_ID_VF_ID_M GENMASK(10, 3)
+
+#define VF_SWITCH_DATA_CFG_ALW_LPBK_S 1
+#define VF_SWITCH_DATA_CFG_ALW_LCL_LPBK_S 2
+#define VF_SWITCH_DATA_CFG_ALW_DST_OVRD_S 3
+
+struct hns_roce_post_mbox {
+ __le32 in_param_l;
+ __le32 in_param_h;
+ __le32 out_param_l;
+ __le32 out_param_h;
+ __le32 cmd_tag;
+ __le32 token_event_en;
+};
+
+struct hns_roce_mbox_status {
+ __le32 mb_status_hw_run;
+ __le32 rsv[5];
+};
+
struct hns_roce_cfg_bt_attr {
__le32 vf_qpc_cfg;
__le32 vf_srqc_cfg;
@@ -1372,18 +1502,6 @@ struct hns_roce_cmq_desc {
#define HNS_ROCE_HW_RUN_BIT_SHIFT 31
#define HNS_ROCE_HW_MB_STATUS_MASK 0xFF
-#define HNS_ROCE_VF_MB4_TAG_MASK 0xFFFFFF00
-#define HNS_ROCE_VF_MB4_TAG_SHIFT 8
-
-#define HNS_ROCE_VF_MB4_CMD_MASK 0xFF
-#define HNS_ROCE_VF_MB4_CMD_SHIFT 0
-
-#define HNS_ROCE_VF_MB5_EVENT_MASK 0x10000
-#define HNS_ROCE_VF_MB5_EVENT_SHIFT 16
-
-#define HNS_ROCE_VF_MB5_TOKEN_MASK 0xFFFF
-#define HNS_ROCE_VF_MB5_TOKEN_SHIFT 0
-
struct hns_roce_v2_cmq_ring {
dma_addr_t desc_dma_addr;
struct hns_roce_cmq_desc *desc;
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index 1b3ee514f2ef..c79054ba9495 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -220,6 +220,11 @@ static int hns_roce_query_device(struct ib_device *ib_dev,
IB_ATOMIC_HCA : IB_ATOMIC_NONE;
props->max_pkeys = 1;
props->local_ca_ack_delay = hr_dev->caps.local_ca_ack_delay;
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) {
+ props->max_srq = hr_dev->caps.max_srqs;
+ props->max_srq_wr = hr_dev->caps.max_srq_wrs;
+ props->max_srq_sge = hr_dev->caps.max_srq_sges;
+ }
return 0;
}
@@ -440,6 +445,54 @@ static void hns_roce_unregister_device(struct hns_roce_dev *hr_dev)
ib_unregister_device(&hr_dev->ib_dev);
}
+static const struct ib_device_ops hns_roce_dev_ops = {
+ .add_gid = hns_roce_add_gid,
+ .alloc_pd = hns_roce_alloc_pd,
+ .alloc_ucontext = hns_roce_alloc_ucontext,
+ .create_ah = hns_roce_create_ah,
+ .create_cq = hns_roce_ib_create_cq,
+ .create_qp = hns_roce_create_qp,
+ .dealloc_pd = hns_roce_dealloc_pd,
+ .dealloc_ucontext = hns_roce_dealloc_ucontext,
+ .del_gid = hns_roce_del_gid,
+ .dereg_mr = hns_roce_dereg_mr,
+ .destroy_ah = hns_roce_destroy_ah,
+ .destroy_cq = hns_roce_ib_destroy_cq,
+ .disassociate_ucontext = hns_roce_disassociate_ucontext,
+ .get_dma_mr = hns_roce_get_dma_mr,
+ .get_link_layer = hns_roce_get_link_layer,
+ .get_netdev = hns_roce_get_netdev,
+ .get_port_immutable = hns_roce_port_immutable,
+ .mmap = hns_roce_mmap,
+ .modify_device = hns_roce_modify_device,
+ .modify_port = hns_roce_modify_port,
+ .modify_qp = hns_roce_modify_qp,
+ .query_ah = hns_roce_query_ah,
+ .query_device = hns_roce_query_device,
+ .query_pkey = hns_roce_query_pkey,
+ .query_port = hns_roce_query_port,
+ .reg_user_mr = hns_roce_reg_user_mr,
+};
+
+static const struct ib_device_ops hns_roce_dev_mr_ops = {
+ .rereg_user_mr = hns_roce_rereg_user_mr,
+};
+
+static const struct ib_device_ops hns_roce_dev_mw_ops = {
+ .alloc_mw = hns_roce_alloc_mw,
+ .dealloc_mw = hns_roce_dealloc_mw,
+};
+
+static const struct ib_device_ops hns_roce_dev_frmr_ops = {
+ .alloc_mr = hns_roce_alloc_mr,
+ .map_mr_sg = hns_roce_map_mr_sg,
+};
+
+static const struct ib_device_ops hns_roce_dev_srq_ops = {
+ .create_srq = hns_roce_create_srq,
+ .destroy_srq = hns_roce_destroy_srq,
+};
+
static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
{
int ret;
@@ -479,73 +532,38 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
ib_dev->uverbs_ex_cmd_mask |=
(1ULL << IB_USER_VERBS_EX_CMD_MODIFY_CQ);
- /* HCA||device||port */
- ib_dev->modify_device = hns_roce_modify_device;
- ib_dev->query_device = hns_roce_query_device;
- ib_dev->query_port = hns_roce_query_port;
- ib_dev->modify_port = hns_roce_modify_port;
- ib_dev->get_link_layer = hns_roce_get_link_layer;
- ib_dev->get_netdev = hns_roce_get_netdev;
- ib_dev->add_gid = hns_roce_add_gid;
- ib_dev->del_gid = hns_roce_del_gid;
- ib_dev->query_pkey = hns_roce_query_pkey;
- ib_dev->alloc_ucontext = hns_roce_alloc_ucontext;
- ib_dev->dealloc_ucontext = hns_roce_dealloc_ucontext;
- ib_dev->mmap = hns_roce_mmap;
-
- /* PD */
- ib_dev->alloc_pd = hns_roce_alloc_pd;
- ib_dev->dealloc_pd = hns_roce_dealloc_pd;
-
- /* AH */
- ib_dev->create_ah = hns_roce_create_ah;
- ib_dev->query_ah = hns_roce_query_ah;
- ib_dev->destroy_ah = hns_roce_destroy_ah;
-
- /* QP */
- ib_dev->create_qp = hns_roce_create_qp;
- ib_dev->modify_qp = hns_roce_modify_qp;
- ib_dev->query_qp = hr_dev->hw->query_qp;
- ib_dev->destroy_qp = hr_dev->hw->destroy_qp;
- ib_dev->post_send = hr_dev->hw->post_send;
- ib_dev->post_recv = hr_dev->hw->post_recv;
-
- /* CQ */
- ib_dev->create_cq = hns_roce_ib_create_cq;
- ib_dev->modify_cq = hr_dev->hw->modify_cq;
- ib_dev->destroy_cq = hns_roce_ib_destroy_cq;
- ib_dev->req_notify_cq = hr_dev->hw->req_notify_cq;
- ib_dev->poll_cq = hr_dev->hw->poll_cq;
-
- /* MR */
- ib_dev->get_dma_mr = hns_roce_get_dma_mr;
- ib_dev->reg_user_mr = hns_roce_reg_user_mr;
- ib_dev->dereg_mr = hns_roce_dereg_mr;
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_REREG_MR) {
- ib_dev->rereg_user_mr = hns_roce_rereg_user_mr;
ib_dev->uverbs_cmd_mask |= (1ULL << IB_USER_VERBS_CMD_REREG_MR);
+ ib_set_device_ops(ib_dev, &hns_roce_dev_mr_ops);
}
/* MW */
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_MW) {
- ib_dev->alloc_mw = hns_roce_alloc_mw;
- ib_dev->dealloc_mw = hns_roce_dealloc_mw;
ib_dev->uverbs_cmd_mask |=
(1ULL << IB_USER_VERBS_CMD_ALLOC_MW) |
(1ULL << IB_USER_VERBS_CMD_DEALLOC_MW);
+ ib_set_device_ops(ib_dev, &hns_roce_dev_mw_ops);
}
/* FRMR */
- if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_FRMR) {
- ib_dev->alloc_mr = hns_roce_alloc_mr;
- ib_dev->map_mr_sg = hns_roce_map_mr_sg;
- }
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_FRMR)
+ ib_set_device_ops(ib_dev, &hns_roce_dev_frmr_ops);
- /* OTHERS */
- ib_dev->get_port_immutable = hns_roce_port_immutable;
- ib_dev->disassociate_ucontext = hns_roce_disassociate_ucontext;
+ /* SRQ */
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) {
+ ib_dev->uverbs_cmd_mask |=
+ (1ULL << IB_USER_VERBS_CMD_CREATE_SRQ) |
+ (1ULL << IB_USER_VERBS_CMD_MODIFY_SRQ) |
+ (1ULL << IB_USER_VERBS_CMD_QUERY_SRQ) |
+ (1ULL << IB_USER_VERBS_CMD_DESTROY_SRQ) |
+ (1ULL << IB_USER_VERBS_CMD_POST_SRQ_RECV);
+ ib_set_device_ops(ib_dev, &hns_roce_dev_srq_ops);
+ ib_set_device_ops(ib_dev, hr_dev->hw->hns_roce_dev_srq_ops);
+ }
ib_dev->driver_id = RDMA_DRIVER_HNS;
+ ib_set_device_ops(ib_dev, hr_dev->hw->hns_roce_dev_ops);
+ ib_set_device_ops(ib_dev, &hns_roce_dev_ops);
ret = ib_register_device(ib_dev, "hns_%d", NULL);
if (ret) {
dev_err(dev, "ib_register_device failed!\n");
@@ -646,8 +664,58 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev)
goto err_unmap_trrl;
}
+ if (hr_dev->caps.srqc_entry_sz) {
+ ret = hns_roce_init_hem_table(hr_dev, &hr_dev->srq_table.table,
+ HEM_TYPE_SRQC,
+ hr_dev->caps.srqc_entry_sz,
+ hr_dev->caps.num_srqs, 1);
+ if (ret) {
+ dev_err(dev,
+ "Failed to init SRQ context memory, aborting.\n");
+ goto err_unmap_cq;
+ }
+ }
+
+ if (hr_dev->caps.num_srqwqe_segs) {
+ ret = hns_roce_init_hem_table(hr_dev,
+ &hr_dev->mr_table.mtt_srqwqe_table,
+ HEM_TYPE_SRQWQE,
+ hr_dev->caps.mtt_entry_sz,
+ hr_dev->caps.num_srqwqe_segs, 1);
+ if (ret) {
+ dev_err(dev,
+ "Failed to init MTT srqwqe memory, aborting.\n");
+ goto err_unmap_srq;
+ }
+ }
+
+ if (hr_dev->caps.num_idx_segs) {
+ ret = hns_roce_init_hem_table(hr_dev,
+ &hr_dev->mr_table.mtt_idx_table,
+ HEM_TYPE_IDX,
+ hr_dev->caps.idx_entry_sz,
+ hr_dev->caps.num_idx_segs, 1);
+ if (ret) {
+ dev_err(dev,
+ "Failed to init MTT idx memory, aborting.\n");
+ goto err_unmap_srqwqe;
+ }
+ }
+
return 0;
+err_unmap_srqwqe:
+ if (hr_dev->caps.num_srqwqe_segs)
+ hns_roce_cleanup_hem_table(hr_dev,
+ &hr_dev->mr_table.mtt_srqwqe_table);
+
+err_unmap_srq:
+ if (hr_dev->caps.srqc_entry_sz)
+ hns_roce_cleanup_hem_table(hr_dev, &hr_dev->srq_table.table);
+
+err_unmap_cq:
+ hns_roce_cleanup_hem_table(hr_dev, &hr_dev->cq_table.table);
+
err_unmap_trrl:
if (hr_dev->caps.trrl_entry_sz)
hns_roce_cleanup_hem_table(hr_dev,
@@ -727,8 +795,21 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev)
goto err_cq_table_free;
}
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) {
+ ret = hns_roce_init_srq_table(hr_dev);
+ if (ret) {
+ dev_err(dev,
+ "Failed to init share receive queue table.\n");
+ goto err_qp_table_free;
+ }
+ }
+
return 0;
+err_qp_table_free:
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ)
+ hns_roce_cleanup_qp_table(hr_dev);
+
err_cq_table_free:
hns_roce_cleanup_cq_table(hr_dev);
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
index 521ad2aa3a4e..ee5991bd4171 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -184,12 +184,27 @@ static int hns_roce_alloc_mtt_range(struct hns_roce_dev *hr_dev, int order,
struct hns_roce_buddy *buddy;
int ret;
- if (mtt_type == MTT_TYPE_WQE) {
+ switch (mtt_type) {
+ case MTT_TYPE_WQE:
buddy = &mr_table->mtt_buddy;
table = &mr_table->mtt_table;
- } else {
+ break;
+ case MTT_TYPE_CQE:
buddy = &mr_table->mtt_cqe_buddy;
table = &mr_table->mtt_cqe_table;
+ break;
+ case MTT_TYPE_SRQWQE:
+ buddy = &mr_table->mtt_srqwqe_buddy;
+ table = &mr_table->mtt_srqwqe_table;
+ break;
+ case MTT_TYPE_IDX:
+ buddy = &mr_table->mtt_idx_buddy;
+ table = &mr_table->mtt_idx_table;
+ break;
+ default:
+ dev_err(hr_dev->dev, "Unsupport MTT table type: %d\n",
+ mtt_type);
+ return -EINVAL;
}
ret = hns_roce_buddy_alloc(buddy, order, seg);
@@ -242,18 +257,40 @@ void hns_roce_mtt_cleanup(struct hns_roce_dev *hr_dev, struct hns_roce_mtt *mtt)
if (mtt->order < 0)
return;
- if (mtt->mtt_type == MTT_TYPE_WQE) {
+ switch (mtt->mtt_type) {
+ case MTT_TYPE_WQE:
hns_roce_buddy_free(&mr_table->mtt_buddy, mtt->first_seg,
mtt->order);
hns_roce_table_put_range(hr_dev, &mr_table->mtt_table,
mtt->first_seg,
mtt->first_seg + (1 << mtt->order) - 1);
- } else {
+ break;
+ case MTT_TYPE_CQE:
hns_roce_buddy_free(&mr_table->mtt_cqe_buddy, mtt->first_seg,
mtt->order);
hns_roce_table_put_range(hr_dev, &mr_table->mtt_cqe_table,
mtt->first_seg,
mtt->first_seg + (1 << mtt->order) - 1);
+ break;
+ case MTT_TYPE_SRQWQE:
+ hns_roce_buddy_free(&mr_table->mtt_srqwqe_buddy, mtt->first_seg,
+ mtt->order);
+ hns_roce_table_put_range(hr_dev, &mr_table->mtt_srqwqe_table,
+ mtt->first_seg,
+ mtt->first_seg + (1 << mtt->order) - 1);
+ break;
+ case MTT_TYPE_IDX:
+ hns_roce_buddy_free(&mr_table->mtt_idx_buddy, mtt->first_seg,
+ mtt->order);
+ hns_roce_table_put_range(hr_dev, &mr_table->mtt_idx_table,
+ mtt->first_seg,
+ mtt->first_seg + (1 << mtt->order) - 1);
+ break;
+ default:
+ dev_err(hr_dev->dev,
+ "Unsupport mtt type %d, clean mtt failed\n",
+ mtt->mtt_type);
+ break;
}
}
EXPORT_SYMBOL_GPL(hns_roce_mtt_cleanup);
@@ -713,10 +750,26 @@ static int hns_roce_write_mtt_chunk(struct hns_roce_dev *hr_dev,
u32 bt_page_size;
u32 i;
- if (mtt->mtt_type == MTT_TYPE_WQE)
+ switch (mtt->mtt_type) {
+ case MTT_TYPE_WQE:
+ table = &hr_dev->mr_table.mtt_table;
bt_page_size = 1 << (hr_dev->caps.mtt_ba_pg_sz + PAGE_SHIFT);
- else
+ break;
+ case MTT_TYPE_CQE:
+ table = &hr_dev->mr_table.mtt_cqe_table;
bt_page_size = 1 << (hr_dev->caps.cqe_ba_pg_sz + PAGE_SHIFT);
+ break;
+ case MTT_TYPE_SRQWQE:
+ table = &hr_dev->mr_table.mtt_srqwqe_table;
+ bt_page_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz + PAGE_SHIFT);
+ break;
+ case MTT_TYPE_IDX:
+ table = &hr_dev->mr_table.mtt_idx_table;
+ bt_page_size = 1 << (hr_dev->caps.idx_ba_pg_sz + PAGE_SHIFT);
+ break;
+ default:
+ return -EINVAL;
+ }
/* All MTTs must fit in the same page */
if (start_index / (bt_page_size / sizeof(u64)) !=
@@ -726,11 +779,6 @@ static int hns_roce_write_mtt_chunk(struct hns_roce_dev *hr_dev,
if (start_index & (HNS_ROCE_MTT_ENTRY_PER_SEG - 1))
return -EINVAL;
- if (mtt->mtt_type == MTT_TYPE_WQE)
- table = &hr_dev->mr_table.mtt_table;
- else
- table = &hr_dev->mr_table.mtt_cqe_table;
-
mtts = hns_roce_table_find(hr_dev, table,
mtt->first_seg + s / hr_dev->caps.mtt_entry_sz,
&dma_handle);
@@ -759,10 +807,25 @@ static int hns_roce_write_mtt(struct hns_roce_dev *hr_dev,
if (mtt->order < 0)
return -EINVAL;
- if (mtt->mtt_type == MTT_TYPE_WQE)
+ switch (mtt->mtt_type) {
+ case MTT_TYPE_WQE:
bt_page_size = 1 << (hr_dev->caps.mtt_ba_pg_sz + PAGE_SHIFT);
- else
+ break;
+ case MTT_TYPE_CQE:
bt_page_size = 1 << (hr_dev->caps.cqe_ba_pg_sz + PAGE_SHIFT);
+ break;
+ case MTT_TYPE_SRQWQE:
+ bt_page_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz + PAGE_SHIFT);
+ break;
+ case MTT_TYPE_IDX:
+ bt_page_size = 1 << (hr_dev->caps.idx_ba_pg_sz + PAGE_SHIFT);
+ break;
+ default:
+ dev_err(hr_dev->dev,
+ "Unsupport mtt type %d, write mtt failed\n",
+ mtt->mtt_type);
+ return -EINVAL;
+ }
while (npages > 0) {
chunk = min_t(int, bt_page_size / sizeof(u64), npages);
@@ -828,8 +891,31 @@ int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev)
if (ret)
goto err_buddy_cqe;
}
+
+ if (hr_dev->caps.num_srqwqe_segs) {
+ ret = hns_roce_buddy_init(&mr_table->mtt_srqwqe_buddy,
+ ilog2(hr_dev->caps.num_srqwqe_segs));
+ if (ret)
+ goto err_buddy_srqwqe;
+ }
+
+ if (hr_dev->caps.num_idx_segs) {
+ ret = hns_roce_buddy_init(&mr_table->mtt_idx_buddy,
+ ilog2(hr_dev->caps.num_idx_segs));
+ if (ret)
+ goto err_buddy_idx;
+ }
+
return 0;
+err_buddy_idx:
+ if (hr_dev->caps.num_srqwqe_segs)
+ hns_roce_buddy_cleanup(&mr_table->mtt_srqwqe_buddy);
+
+err_buddy_srqwqe:
+ if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE))
+ hns_roce_buddy_cleanup(&mr_table->mtt_cqe_buddy);
+
err_buddy_cqe:
hns_roce_buddy_cleanup(&mr_table->mtt_buddy);
@@ -842,6 +928,10 @@ void hns_roce_cleanup_mr_table(struct hns_roce_dev *hr_dev)
{
struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
+ if (hr_dev->caps.num_idx_segs)
+ hns_roce_buddy_cleanup(&mr_table->mtt_idx_buddy);
+ if (hr_dev->caps.num_srqwqe_segs)
+ hns_roce_buddy_cleanup(&mr_table->mtt_srqwqe_buddy);
hns_roce_buddy_cleanup(&mr_table->mtt_buddy);
if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE))
hns_roce_buddy_cleanup(&mr_table->mtt_cqe_buddy);
@@ -897,8 +987,25 @@ int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev,
u32 bt_page_size;
u32 n;
- order = mtt->mtt_type == MTT_TYPE_WQE ? hr_dev->caps.mtt_ba_pg_sz :
- hr_dev->caps.cqe_ba_pg_sz;
+ switch (mtt->mtt_type) {
+ case MTT_TYPE_WQE:
+ order = hr_dev->caps.mtt_ba_pg_sz;
+ break;
+ case MTT_TYPE_CQE:
+ order = hr_dev->caps.cqe_ba_pg_sz;
+ break;
+ case MTT_TYPE_SRQWQE:
+ order = hr_dev->caps.srqwqe_ba_pg_sz;
+ break;
+ case MTT_TYPE_IDX:
+ order = hr_dev->caps.idx_ba_pg_sz;
+ break;
+ default:
+ dev_err(dev, "Unsupport mtt type %d, write mtt failed\n",
+ mtt->mtt_type);
+ return -EINVAL;
+ }
+
bt_page_size = 1 << (order + PAGE_SHIFT);
pages = (u64 *) __get_free_pages(GFP_KERNEL, order);
@@ -1021,14 +1128,14 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
goto err_umem;
}
} else {
- int pbl_size = 1;
+ u64 pbl_size = 1;
bt_size = (1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT)) / 8;
for (i = 0; i < hr_dev->caps.pbl_hop_num; i++)
pbl_size *= bt_size;
if (n > pbl_size) {
dev_err(dev,
- " MR len %lld err. MR page num is limited to %d!\n",
+ " MR len %lld err. MR page num is limited to %lld!\n",
length, pbl_size);
ret = -EINVAL;
goto err_umem;
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index 5ebf481a39d9..54031c5b53fa 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -280,7 +280,7 @@ void hns_roce_release_range_qp(struct hns_roce_dev *hr_dev, int base_qpn,
EXPORT_SYMBOL_GPL(hns_roce_release_range_qp);
static int hns_roce_set_rq_size(struct hns_roce_dev *hr_dev,
- struct ib_qp_cap *cap, int is_user, int has_srq,
+ struct ib_qp_cap *cap, bool is_user, int has_rq,
struct hns_roce_qp *hr_qp)
{
struct device *dev = hr_dev->dev;
@@ -294,14 +294,12 @@ static int hns_roce_set_rq_size(struct hns_roce_dev *hr_dev,
return -EINVAL;
}
- /* If srq exit, set zero for relative number of rq */
- if (has_srq) {
- if (cap->max_recv_wr) {
- dev_dbg(dev, "srq no need config max_recv_wr\n");
- return -EINVAL;
- }
-
- hr_qp->rq.wqe_cnt = hr_qp->rq.max_gs = 0;
+ /* If srq exist, set zero for relative number of rq */
+ if (!has_rq) {
+ hr_qp->rq.wqe_cnt = 0;
+ hr_qp->rq.max_gs = 0;
+ cap->max_recv_wr = 0;
+ cap->max_recv_sge = 0;
} else {
if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge)) {
dev_err(dev, "user space no need config max_recv_wr max_recv_sge\n");
@@ -562,14 +560,15 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
else
hr_qp->sq_signal_bits = cpu_to_le32(IB_SIGNAL_REQ_WR);
- ret = hns_roce_set_rq_size(hr_dev, &init_attr->cap, !!ib_pd->uobject,
- !!init_attr->srq, hr_qp);
+ ret = hns_roce_set_rq_size(hr_dev, &init_attr->cap, udata,
+ hns_roce_qp_has_rq(init_attr), hr_qp);
if (ret) {
dev_err(dev, "hns_roce_set_rq_size failed\n");
goto err_out;
}
- if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) {
+ if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) &&
+ hns_roce_qp_has_rq(init_attr)) {
/* allocate recv inline buf */
hr_qp->rq_inl_buf.wqe_list = kcalloc(hr_qp->rq.wqe_cnt,
sizeof(struct hns_roce_rinl_wqe),
@@ -599,7 +598,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
init_attr->cap.max_recv_sge];
}
- if (ib_pd->uobject) {
+ if (udata) {
if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
dev_err(dev, "ib_copy_from_udata error for create qp\n");
ret = -EFAULT;
@@ -784,7 +783,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
else
hr_qp->doorbell_qpn = cpu_to_le64(hr_qp->qpn);
- if (ib_pd->uobject && (udata->outlen >= sizeof(resp)) &&
+ if (udata && (udata->outlen >= sizeof(resp)) &&
(hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB)) {
/* indicate kernel supports rq record db */
@@ -811,7 +810,7 @@ err_qpn:
hns_roce_release_range_qp(hr_dev, qpn, 1);
err_wrid:
- if (ib_pd->uobject) {
+ if (udata) {
if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) &&
(udata->outlen >= sizeof(resp)) &&
hns_roce_qp_has_rq(init_attr))
@@ -824,7 +823,7 @@ err_wrid:
}
err_sq_dbmap:
- if (ib_pd->uobject)
+ if (udata)
if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SQ_RECORD_DB) &&
(udata->inlen >= sizeof(ucmd)) &&
(udata->outlen >= sizeof(resp)) &&
@@ -837,13 +836,13 @@ err_mtt:
hns_roce_mtt_cleanup(hr_dev, &hr_qp->mtt);
err_buf:
- if (ib_pd->uobject)
+ if (hr_qp->umem)
ib_umem_release(hr_qp->umem);
else
hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf);
err_db:
- if (!ib_pd->uobject && hns_roce_qp_has_rq(init_attr) &&
+ if (!udata && hns_roce_qp_has_rq(init_attr) &&
(hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB))
hns_roce_free_db(hr_dev, &hr_qp->rdb);
@@ -889,7 +888,7 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd,
}
case IB_QPT_GSI: {
/* Userspace is not allowed to create special QPs: */
- if (pd->uobject) {
+ if (udata) {
dev_err(dev, "not support usr space GSI\n");
return ERR_PTR(-EINVAL);
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c
new file mode 100644
index 000000000000..960b1946c365
--- /dev/null
+++ b/drivers/infiniband/hw/hns/hns_roce_srq.c
@@ -0,0 +1,457 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2018 Hisilicon Limited.
+ */
+
+#include <rdma/ib_umem.h>
+#include <rdma/hns-abi.h>
+#include "hns_roce_device.h"
+#include "hns_roce_cmd.h"
+#include "hns_roce_hem.h"
+
+void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type)
+{
+ struct hns_roce_srq_table *srq_table = &hr_dev->srq_table;
+ struct hns_roce_srq *srq;
+
+ xa_lock(&srq_table->xa);
+ srq = xa_load(&srq_table->xa, srqn & (hr_dev->caps.num_srqs - 1));
+ if (srq)
+ atomic_inc(&srq->refcount);
+ xa_unlock(&srq_table->xa);
+
+ if (!srq) {
+ dev_warn(hr_dev->dev, "Async event for bogus SRQ %08x\n", srqn);
+ return;
+ }
+
+ srq->event(srq, event_type);
+
+ if (atomic_dec_and_test(&srq->refcount))
+ complete(&srq->free);
+}
+EXPORT_SYMBOL_GPL(hns_roce_srq_event);
+
+static void hns_roce_ib_srq_event(struct hns_roce_srq *srq,
+ enum hns_roce_event event_type)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(srq->ibsrq.device);
+ struct ib_srq *ibsrq = &srq->ibsrq;
+ struct ib_event event;
+
+ if (ibsrq->event_handler) {
+ event.device = ibsrq->device;
+ event.element.srq = ibsrq;
+ switch (event_type) {
+ case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
+ event.event = IB_EVENT_SRQ_LIMIT_REACHED;
+ break;
+ case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR:
+ event.event = IB_EVENT_SRQ_ERR;
+ break;
+ default:
+ dev_err(hr_dev->dev,
+ "hns_roce:Unexpected event type 0x%x on SRQ %06lx\n",
+ event_type, srq->srqn);
+ return;
+ }
+
+ ibsrq->event_handler(&event, ibsrq->srq_context);
+ }
+}
+
+static int hns_roce_sw2hw_srq(struct hns_roce_dev *dev,
+ struct hns_roce_cmd_mailbox *mailbox,
+ unsigned long srq_num)
+{
+ return hns_roce_cmd_mbox(dev, mailbox->dma, 0, srq_num, 0,
+ HNS_ROCE_CMD_SW2HW_SRQ,
+ HNS_ROCE_CMD_TIMEOUT_MSECS);
+}
+
+static int hns_roce_hw2sw_srq(struct hns_roce_dev *dev,
+ struct hns_roce_cmd_mailbox *mailbox,
+ unsigned long srq_num)
+{
+ return hns_roce_cmd_mbox(dev, 0, mailbox ? mailbox->dma : 0, srq_num,
+ mailbox ? 0 : 1, HNS_ROCE_CMD_HW2SW_SRQ,
+ HNS_ROCE_CMD_TIMEOUT_MSECS);
+}
+
+int hns_roce_srq_alloc(struct hns_roce_dev *hr_dev, u32 pdn, u32 cqn, u16 xrcd,
+ struct hns_roce_mtt *hr_mtt, u64 db_rec_addr,
+ struct hns_roce_srq *srq)
+{
+ struct hns_roce_srq_table *srq_table = &hr_dev->srq_table;
+ struct hns_roce_cmd_mailbox *mailbox;
+ dma_addr_t dma_handle_wqe;
+ dma_addr_t dma_handle_idx;
+ u64 *mtts_wqe;
+ u64 *mtts_idx;
+ int ret;
+
+ /* Get the physical address of srq buf */
+ mtts_wqe = hns_roce_table_find(hr_dev,
+ &hr_dev->mr_table.mtt_srqwqe_table,
+ srq->mtt.first_seg,
+ &dma_handle_wqe);
+ if (!mtts_wqe) {
+ dev_err(hr_dev->dev,
+ "SRQ alloc.Failed to find srq buf addr.\n");
+ return -EINVAL;
+ }
+
+ /* Get physical address of idx que buf */
+ mtts_idx = hns_roce_table_find(hr_dev, &hr_dev->mr_table.mtt_idx_table,
+ srq->idx_que.mtt.first_seg,
+ &dma_handle_idx);
+ if (!mtts_idx) {
+ dev_err(hr_dev->dev,
+ "SRQ alloc.Failed to find idx que buf addr.\n");
+ return -EINVAL;
+ }
+
+ ret = hns_roce_bitmap_alloc(&srq_table->bitmap, &srq->srqn);
+ if (ret == -1) {
+ dev_err(hr_dev->dev, "SRQ alloc.Failed to alloc index.\n");
+ return -ENOMEM;
+ }
+
+ ret = hns_roce_table_get(hr_dev, &srq_table->table, srq->srqn);
+ if (ret)
+ goto err_out;
+
+ ret = xa_err(xa_store(&srq_table->xa, srq->srqn, srq, GFP_KERNEL));
+ if (ret)
+ goto err_put;
+
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox)) {
+ ret = PTR_ERR(mailbox);
+ goto err_xa;
+ }
+
+ hr_dev->hw->write_srqc(hr_dev, srq, pdn, xrcd, cqn, mailbox->buf,
+ mtts_wqe, mtts_idx, dma_handle_wqe,
+ dma_handle_idx);
+
+ ret = hns_roce_sw2hw_srq(hr_dev, mailbox, srq->srqn);
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+ if (ret)
+ goto err_xa;
+
+ atomic_set(&srq->refcount, 1);
+ init_completion(&srq->free);
+ return ret;
+
+err_xa:
+ xa_erase(&srq_table->xa, srq->srqn);
+
+err_put:
+ hns_roce_table_put(hr_dev, &srq_table->table, srq->srqn);
+
+err_out:
+ hns_roce_bitmap_free(&srq_table->bitmap, srq->srqn, BITMAP_NO_RR);
+ return ret;
+}
+
+void hns_roce_srq_free(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
+{
+ struct hns_roce_srq_table *srq_table = &hr_dev->srq_table;
+ int ret;
+
+ ret = hns_roce_hw2sw_srq(hr_dev, NULL, srq->srqn);
+ if (ret)
+ dev_err(hr_dev->dev, "HW2SW_SRQ failed (%d) for CQN %06lx\n",
+ ret, srq->srqn);
+
+ xa_erase(&srq_table->xa, srq->srqn);
+
+ if (atomic_dec_and_test(&srq->refcount))
+ complete(&srq->free);
+ wait_for_completion(&srq->free);
+
+ hns_roce_table_put(hr_dev, &srq_table->table, srq->srqn);
+ hns_roce_bitmap_free(&srq_table->bitmap, srq->srqn, BITMAP_NO_RR);
+}
+
+static int hns_roce_create_idx_que(struct ib_pd *pd, struct hns_roce_srq *srq,
+ u32 page_shift)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
+ struct hns_roce_idx_que *idx_que = &srq->idx_que;
+ u32 bitmap_num;
+ int i;
+
+ bitmap_num = HNS_ROCE_ALOGN_UP(srq->max, 8 * sizeof(u64));
+
+ idx_que->bitmap = kcalloc(1, bitmap_num / 8, GFP_KERNEL);
+ if (!idx_que->bitmap)
+ return -ENOMEM;
+
+ bitmap_num = bitmap_num / (8 * sizeof(u64));
+
+ idx_que->buf_size = srq->idx_que.buf_size;
+
+ if (hns_roce_buf_alloc(hr_dev, idx_que->buf_size, (1 << page_shift) * 2,
+ &idx_que->idx_buf, page_shift)) {
+ kfree(idx_que->bitmap);
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < bitmap_num; i++)
+ idx_que->bitmap[i] = ~(0UL);
+
+ return 0;
+}
+
+struct ib_srq *hns_roce_create_srq(struct ib_pd *pd,
+ struct ib_srq_init_attr *srq_init_attr,
+ struct ib_udata *udata)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
+ struct hns_roce_srq *srq;
+ int srq_desc_size;
+ int srq_buf_size;
+ u32 page_shift;
+ int ret = 0;
+ u32 npages;
+ u32 cqn;
+
+ /* Check the actual SRQ wqe and SRQ sge num */
+ if (srq_init_attr->attr.max_wr >= hr_dev->caps.max_srq_wrs ||
+ srq_init_attr->attr.max_sge > hr_dev->caps.max_srq_sges)
+ return ERR_PTR(-EINVAL);
+
+ srq = kzalloc(sizeof(*srq), GFP_KERNEL);
+ if (!srq)
+ return ERR_PTR(-ENOMEM);
+
+ mutex_init(&srq->mutex);
+ spin_lock_init(&srq->lock);
+
+ srq->max = roundup_pow_of_two(srq_init_attr->attr.max_wr + 1);
+ srq->max_gs = srq_init_attr->attr.max_sge;
+
+ srq_desc_size = max(16, 16 * srq->max_gs);
+
+ srq->wqe_shift = ilog2(srq_desc_size);
+
+ srq_buf_size = srq->max * srq_desc_size;
+
+ srq->idx_que.entry_sz = HNS_ROCE_IDX_QUE_ENTRY_SZ;
+ srq->idx_que.buf_size = srq->max * srq->idx_que.entry_sz;
+ srq->mtt.mtt_type = MTT_TYPE_SRQWQE;
+ srq->idx_que.mtt.mtt_type = MTT_TYPE_IDX;
+
+ if (udata) {
+ struct hns_roce_ib_create_srq ucmd;
+
+ if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
+ ret = -EFAULT;
+ goto err_srq;
+ }
+
+ srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
+ srq_buf_size, 0, 0);
+ if (IS_ERR(srq->umem)) {
+ ret = PTR_ERR(srq->umem);
+ goto err_srq;
+ }
+
+ if (hr_dev->caps.srqwqe_buf_pg_sz) {
+ npages = (ib_umem_page_count(srq->umem) +
+ (1 << hr_dev->caps.srqwqe_buf_pg_sz) - 1) /
+ (1 << hr_dev->caps.srqwqe_buf_pg_sz);
+ page_shift = PAGE_SHIFT + hr_dev->caps.srqwqe_buf_pg_sz;
+ ret = hns_roce_mtt_init(hr_dev, npages,
+ page_shift,
+ &srq->mtt);
+ } else
+ ret = hns_roce_mtt_init(hr_dev,
+ ib_umem_page_count(srq->umem),
+ srq->umem->page_shift,
+ &srq->mtt);
+ if (ret)
+ goto err_buf;
+
+ ret = hns_roce_ib_umem_write_mtt(hr_dev, &srq->mtt, srq->umem);
+ if (ret)
+ goto err_srq_mtt;
+
+ /* config index queue BA */
+ srq->idx_que.umem = ib_umem_get(pd->uobject->context,
+ ucmd.que_addr,
+ srq->idx_que.buf_size, 0, 0);
+ if (IS_ERR(srq->idx_que.umem)) {
+ dev_err(hr_dev->dev,
+ "ib_umem_get error for index queue\n");
+ ret = PTR_ERR(srq->idx_que.umem);
+ goto err_srq_mtt;
+ }
+
+ if (hr_dev->caps.idx_buf_pg_sz) {
+ npages = (ib_umem_page_count(srq->idx_que.umem) +
+ (1 << hr_dev->caps.idx_buf_pg_sz) - 1) /
+ (1 << hr_dev->caps.idx_buf_pg_sz);
+ page_shift = PAGE_SHIFT + hr_dev->caps.idx_buf_pg_sz;
+ ret = hns_roce_mtt_init(hr_dev, npages,
+ page_shift, &srq->idx_que.mtt);
+ } else {
+ ret = hns_roce_mtt_init(hr_dev,
+ ib_umem_page_count(srq->idx_que.umem),
+ srq->idx_que.umem->page_shift,
+ &srq->idx_que.mtt);
+ }
+
+ if (ret) {
+ dev_err(hr_dev->dev,
+ "hns_roce_mtt_init error for idx que\n");
+ goto err_idx_mtt;
+ }
+
+ ret = hns_roce_ib_umem_write_mtt(hr_dev, &srq->idx_que.mtt,
+ srq->idx_que.umem);
+ if (ret) {
+ dev_err(hr_dev->dev,
+ "hns_roce_ib_umem_write_mtt error for idx que\n");
+ goto err_idx_buf;
+ }
+ } else {
+ page_shift = PAGE_SHIFT + hr_dev->caps.srqwqe_buf_pg_sz;
+ if (hns_roce_buf_alloc(hr_dev, srq_buf_size,
+ (1 << page_shift) * 2,
+ &srq->buf, page_shift)) {
+ ret = -ENOMEM;
+ goto err_srq;
+ }
+
+ srq->head = 0;
+ srq->tail = srq->max - 1;
+
+ ret = hns_roce_mtt_init(hr_dev, srq->buf.npages,
+ srq->buf.page_shift, &srq->mtt);
+ if (ret)
+ goto err_buf;
+
+ ret = hns_roce_buf_write_mtt(hr_dev, &srq->mtt, &srq->buf);
+ if (ret)
+ goto err_srq_mtt;
+
+ page_shift = PAGE_SHIFT + hr_dev->caps.idx_buf_pg_sz;
+ ret = hns_roce_create_idx_que(pd, srq, page_shift);
+ if (ret) {
+ dev_err(hr_dev->dev, "Create idx queue fail(%d)!\n",
+ ret);
+ goto err_srq_mtt;
+ }
+
+ /* Init mtt table for idx_que */
+ ret = hns_roce_mtt_init(hr_dev, srq->idx_que.idx_buf.npages,
+ srq->idx_que.idx_buf.page_shift,
+ &srq->idx_que.mtt);
+ if (ret)
+ goto err_create_idx;
+
+ /* Write buffer address into the mtt table */
+ ret = hns_roce_buf_write_mtt(hr_dev, &srq->idx_que.mtt,
+ &srq->idx_que.idx_buf);
+ if (ret)
+ goto err_idx_buf;
+
+ srq->wrid = kvmalloc_array(srq->max, sizeof(u64), GFP_KERNEL);
+ if (!srq->wrid) {
+ ret = -ENOMEM;
+ goto err_idx_buf;
+ }
+ }
+
+ cqn = ib_srq_has_cq(srq_init_attr->srq_type) ?
+ to_hr_cq(srq_init_attr->ext.cq)->cqn : 0;
+
+ srq->db_reg_l = hr_dev->reg_base + SRQ_DB_REG;
+
+ ret = hns_roce_srq_alloc(hr_dev, to_hr_pd(pd)->pdn, cqn, 0,
+ &srq->mtt, 0, srq);
+ if (ret)
+ goto err_wrid;
+
+ srq->event = hns_roce_ib_srq_event;
+ srq->ibsrq.ext.xrc.srq_num = srq->srqn;
+
+ if (udata) {
+ if (ib_copy_to_udata(udata, &srq->srqn, sizeof(__u32))) {
+ ret = -EFAULT;
+ goto err_wrid;
+ }
+ }
+
+ return &srq->ibsrq;
+
+err_wrid:
+ kvfree(srq->wrid);
+
+err_idx_buf:
+ hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt);
+
+err_idx_mtt:
+ if (udata)
+ ib_umem_release(srq->idx_que.umem);
+
+err_create_idx:
+ hns_roce_buf_free(hr_dev, srq->idx_que.buf_size,
+ &srq->idx_que.idx_buf);
+ kfree(srq->idx_que.bitmap);
+
+err_srq_mtt:
+ hns_roce_mtt_cleanup(hr_dev, &srq->mtt);
+
+err_buf:
+ if (udata)
+ ib_umem_release(srq->umem);
+ else
+ hns_roce_buf_free(hr_dev, srq_buf_size, &srq->buf);
+
+err_srq:
+ kfree(srq);
+ return ERR_PTR(ret);
+}
+
+int hns_roce_destroy_srq(struct ib_srq *ibsrq)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device);
+ struct hns_roce_srq *srq = to_hr_srq(ibsrq);
+
+ hns_roce_srq_free(hr_dev, srq);
+ hns_roce_mtt_cleanup(hr_dev, &srq->mtt);
+
+ if (ibsrq->uobject) {
+ hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt);
+ ib_umem_release(srq->idx_que.umem);
+ ib_umem_release(srq->umem);
+ } else {
+ kvfree(srq->wrid);
+ hns_roce_buf_free(hr_dev, srq->max << srq->wqe_shift,
+ &srq->buf);
+ }
+
+ kfree(srq);
+
+ return 0;
+}
+
+int hns_roce_init_srq_table(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_srq_table *srq_table = &hr_dev->srq_table;
+
+ xa_init(&srq_table->xa);
+
+ return hns_roce_bitmap_init(&srq_table->bitmap, hr_dev->caps.num_srqs,
+ hr_dev->caps.num_srqs - 1,
+ hr_dev->caps.reserved_srqs, 0);
+}
+
+void hns_roce_cleanup_srq_table(struct hns_roce_dev *hr_dev)
+{
+ hns_roce_bitmap_cleanup(&hr_dev->srq_table.bitmap);
+}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c
index 4b3999d88c9e..206cfb0016f8 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c
@@ -3478,7 +3478,7 @@ static void i40iw_qp_disconnect(struct i40iw_qp *iwqp)
/* Need to free the Last Streaming Mode Message */
if (iwqp->ietf_mem.va) {
if (iwqp->lsmm_mr)
- iwibdev->ibdev.dereg_mr(iwqp->lsmm_mr);
+ iwibdev->ibdev.ops.dereg_mr(iwqp->lsmm_mr);
i40iw_free_dma_mem(iwdev->sc_dev.hw, &iwqp->ietf_mem);
}
}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index 102875872bea..0b675b0742c2 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -673,28 +673,26 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd,
goto error;
}
iwqp->ctx_info.qp_compl_ctx = req.user_compl_ctx;
- if (ibpd->uobject && ibpd->uobject->context) {
- iwqp->user_mode = 1;
- ucontext = to_ucontext(ibpd->uobject->context);
-
- if (req.user_wqe_buffers) {
- struct i40iw_pbl *iwpbl;
-
- spin_lock_irqsave(
- &ucontext->qp_reg_mem_list_lock, flags);
- iwpbl = i40iw_get_pbl(
- (unsigned long)req.user_wqe_buffers,
- &ucontext->qp_reg_mem_list);
- spin_unlock_irqrestore(
- &ucontext->qp_reg_mem_list_lock, flags);
-
- if (!iwpbl) {
- err_code = -ENODATA;
- i40iw_pr_err("no pbl info\n");
- goto error;
- }
- memcpy(&iwqp->iwpbl, iwpbl, sizeof(iwqp->iwpbl));
+ iwqp->user_mode = 1;
+ ucontext = to_ucontext(ibpd->uobject->context);
+
+ if (req.user_wqe_buffers) {
+ struct i40iw_pbl *iwpbl;
+
+ spin_lock_irqsave(
+ &ucontext->qp_reg_mem_list_lock, flags);
+ iwpbl = i40iw_get_pbl(
+ (unsigned long)req.user_wqe_buffers,
+ &ucontext->qp_reg_mem_list);
+ spin_unlock_irqrestore(
+ &ucontext->qp_reg_mem_list_lock, flags);
+
+ if (!iwpbl) {
+ err_code = -ENODATA;
+ i40iw_pr_err("no pbl info\n");
+ goto error;
}
+ memcpy(&iwqp->iwpbl, iwpbl, sizeof(iwqp->iwpbl));
}
err_code = i40iw_setup_virt_qp(iwdev, iwqp, &init_info);
} else {
@@ -768,7 +766,7 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd,
iwdev->qp_table[qp_num] = iwqp;
i40iw_add_pdusecount(iwqp->iwpd);
i40iw_add_devusecount(iwdev);
- if (ibpd->uobject && udata) {
+ if (udata) {
memset(&uresp, 0, sizeof(uresp));
uresp.actual_sq_size = sq_size;
uresp.actual_rq_size = rq_size;
@@ -2092,7 +2090,8 @@ static int i40iw_dereg_mr(struct ib_mr *ib_mr)
ib_umem_release(iwmr->region);
if (iwmr->type != IW_MEMREG_TYPE_MEM) {
- if (ibpd->uobject) {
+ /* region is released. only test for userness. */
+ if (iwmr->region) {
struct i40iw_ucontext *ucontext;
ucontext = to_ucontext(ibpd->uobject->context);
@@ -2721,24 +2720,38 @@ static int i40iw_query_pkey(struct ib_device *ibdev,
return 0;
}
-/**
- * i40iw_get_vector_affinity - report IRQ affinity mask
- * @ibdev: IB device
- * @comp_vector: completion vector index
- */
-static const struct cpumask *i40iw_get_vector_affinity(struct ib_device *ibdev,
- int comp_vector)
-{
- struct i40iw_device *iwdev = to_iwdev(ibdev);
- struct i40iw_msix_vector *msix_vec;
-
- if (iwdev->msix_shared)
- msix_vec = &iwdev->iw_msixtbl[comp_vector];
- else
- msix_vec = &iwdev->iw_msixtbl[comp_vector + 1];
-
- return irq_get_affinity_mask(msix_vec->irq);
-}
+static const struct ib_device_ops i40iw_dev_ops = {
+ .alloc_hw_stats = i40iw_alloc_hw_stats,
+ .alloc_mr = i40iw_alloc_mr,
+ .alloc_pd = i40iw_alloc_pd,
+ .alloc_ucontext = i40iw_alloc_ucontext,
+ .create_cq = i40iw_create_cq,
+ .create_qp = i40iw_create_qp,
+ .dealloc_pd = i40iw_dealloc_pd,
+ .dealloc_ucontext = i40iw_dealloc_ucontext,
+ .dereg_mr = i40iw_dereg_mr,
+ .destroy_cq = i40iw_destroy_cq,
+ .destroy_qp = i40iw_destroy_qp,
+ .drain_rq = i40iw_drain_rq,
+ .drain_sq = i40iw_drain_sq,
+ .get_dev_fw_str = i40iw_get_dev_fw_str,
+ .get_dma_mr = i40iw_get_dma_mr,
+ .get_hw_stats = i40iw_get_hw_stats,
+ .get_port_immutable = i40iw_port_immutable,
+ .map_mr_sg = i40iw_map_mr_sg,
+ .mmap = i40iw_mmap,
+ .modify_qp = i40iw_modify_qp,
+ .poll_cq = i40iw_poll_cq,
+ .post_recv = i40iw_post_recv,
+ .post_send = i40iw_post_send,
+ .query_device = i40iw_query_device,
+ .query_gid = i40iw_query_gid,
+ .query_pkey = i40iw_query_pkey,
+ .query_port = i40iw_query_port,
+ .query_qp = i40iw_query_qp,
+ .reg_user_mr = i40iw_reg_user_mr,
+ .req_notify_cq = i40iw_req_notify_cq,
+};
/**
* i40iw_init_rdma_device - initialization of iwarp device
@@ -2786,30 +2799,6 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev
iwibdev->ibdev.phys_port_cnt = 1;
iwibdev->ibdev.num_comp_vectors = iwdev->ceqs_count;
iwibdev->ibdev.dev.parent = &pcidev->dev;
- iwibdev->ibdev.query_port = i40iw_query_port;
- iwibdev->ibdev.query_pkey = i40iw_query_pkey;
- iwibdev->ibdev.query_gid = i40iw_query_gid;
- iwibdev->ibdev.alloc_ucontext = i40iw_alloc_ucontext;
- iwibdev->ibdev.dealloc_ucontext = i40iw_dealloc_ucontext;
- iwibdev->ibdev.mmap = i40iw_mmap;
- iwibdev->ibdev.alloc_pd = i40iw_alloc_pd;
- iwibdev->ibdev.dealloc_pd = i40iw_dealloc_pd;
- iwibdev->ibdev.create_qp = i40iw_create_qp;
- iwibdev->ibdev.modify_qp = i40iw_modify_qp;
- iwibdev->ibdev.query_qp = i40iw_query_qp;
- iwibdev->ibdev.destroy_qp = i40iw_destroy_qp;
- iwibdev->ibdev.create_cq = i40iw_create_cq;
- iwibdev->ibdev.destroy_cq = i40iw_destroy_cq;
- iwibdev->ibdev.get_dma_mr = i40iw_get_dma_mr;
- iwibdev->ibdev.reg_user_mr = i40iw_reg_user_mr;
- iwibdev->ibdev.dereg_mr = i40iw_dereg_mr;
- iwibdev->ibdev.alloc_hw_stats = i40iw_alloc_hw_stats;
- iwibdev->ibdev.get_hw_stats = i40iw_get_hw_stats;
- iwibdev->ibdev.query_device = i40iw_query_device;
- iwibdev->ibdev.drain_sq = i40iw_drain_sq;
- iwibdev->ibdev.drain_rq = i40iw_drain_rq;
- iwibdev->ibdev.alloc_mr = i40iw_alloc_mr;
- iwibdev->ibdev.map_mr_sg = i40iw_map_mr_sg;
iwibdev->ibdev.iwcm = kzalloc(sizeof(*iwibdev->ibdev.iwcm), GFP_KERNEL);
if (!iwibdev->ibdev.iwcm) {
ib_dealloc_device(&iwibdev->ibdev);
@@ -2826,13 +2815,7 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev
iwibdev->ibdev.iwcm->destroy_listen = i40iw_destroy_listen;
memcpy(iwibdev->ibdev.iwcm->ifname, netdev->name,
sizeof(iwibdev->ibdev.iwcm->ifname));
- iwibdev->ibdev.get_port_immutable = i40iw_port_immutable;
- iwibdev->ibdev.get_dev_fw_str = i40iw_get_dev_fw_str;
- iwibdev->ibdev.poll_cq = i40iw_poll_cq;
- iwibdev->ibdev.req_notify_cq = i40iw_req_notify_cq;
- iwibdev->ibdev.post_send = i40iw_post_send;
- iwibdev->ibdev.post_recv = i40iw_post_recv;
- iwibdev->ibdev.get_vector_affinity = i40iw_get_vector_affinity;
+ ib_set_device_ops(&iwibdev->ibdev, &i40iw_dev_ops);
return iwibdev;
}
diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c
index e9e3a6f390db..1672808262ba 100644
--- a/drivers/infiniband/hw/mlx4/ah.c
+++ b/drivers/infiniband/hw/mlx4/ah.c
@@ -144,7 +144,7 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd,
}
struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
- struct ib_udata *udata)
+ u32 flags, struct ib_udata *udata)
{
struct mlx4_ib_ah *ah;
@@ -189,7 +189,7 @@ struct ib_ah *mlx4_ib_create_ah_slave(struct ib_pd *pd,
slave_attr.grh.sgid_attr = NULL;
slave_attr.grh.sgid_index = slave_sgid_index;
- ah = mlx4_ib_create_ah(pd, &slave_attr, NULL);
+ ah = mlx4_ib_create_ah(pd, &slave_attr, 0, NULL);
if (IS_ERR(ah))
return ah;
@@ -250,7 +250,7 @@ int mlx4_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr)
return 0;
}
-int mlx4_ib_destroy_ah(struct ib_ah *ah)
+int mlx4_ib_destroy_ah(struct ib_ah *ah, u32 flags)
{
kfree(to_mah(ah));
return 0;
diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c
index 155b4dfc0ae8..782499abcd98 100644
--- a/drivers/infiniband/hw/mlx4/alias_GUID.c
+++ b/drivers/infiniband/hw/mlx4/alias_GUID.c
@@ -849,7 +849,7 @@ int mlx4_ib_init_alias_guid_service(struct mlx4_ib_dev *dev)
spin_lock_init(&dev->sriov.alias_guid.ag_work_lock);
for (i = 1; i <= dev->num_ports; ++i) {
- if (dev->ib_dev.query_gid(&dev->ib_dev , i, 0, &gid)) {
+ if (dev->ib_dev.ops.query_gid(&dev->ib_dev, i, 0, &gid)) {
ret = -EFAULT;
goto err_unregister;
}
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index 8942f5f7f04d..25439da8976c 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -202,13 +202,13 @@ static void update_sm_ah(struct mlx4_ib_dev *dev, u8 port_num, u16 lid, u8 sl)
rdma_ah_set_port_num(&ah_attr, port_num);
new_ah = rdma_create_ah(dev->send_agent[port_num - 1][0]->qp->pd,
- &ah_attr);
+ &ah_attr, 0);
if (IS_ERR(new_ah))
return;
spin_lock_irqsave(&dev->sm_lock, flags);
if (dev->sm_ah[port_num - 1])
- rdma_destroy_ah(dev->sm_ah[port_num - 1]);
+ rdma_destroy_ah(dev->sm_ah[port_num - 1], 0);
dev->sm_ah[port_num - 1] = new_ah;
spin_unlock_irqrestore(&dev->sm_lock, flags);
}
@@ -567,7 +567,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
return -EINVAL;
rdma_ah_set_grh(&attr, &dgid, 0, 0, 0, 0);
}
- ah = rdma_create_ah(tun_ctx->pd, &attr);
+ ah = rdma_create_ah(tun_ctx->pd, &attr, 0);
if (IS_ERR(ah))
return -ENOMEM;
@@ -584,7 +584,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
tun_mad = (struct mlx4_rcv_tunnel_mad *) (tun_qp->tx_ring[tun_tx_ix].buf.addr);
if (tun_qp->tx_ring[tun_tx_ix].ah)
- rdma_destroy_ah(tun_qp->tx_ring[tun_tx_ix].ah);
+ rdma_destroy_ah(tun_qp->tx_ring[tun_tx_ix].ah, 0);
tun_qp->tx_ring[tun_tx_ix].ah = ah;
ib_dma_sync_single_for_cpu(&dev->ib_dev,
tun_qp->tx_ring[tun_tx_ix].buf.map,
@@ -657,7 +657,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
spin_unlock(&tun_qp->tx_lock);
tun_qp->tx_ring[tun_tx_ix].ah = NULL;
end:
- rdma_destroy_ah(ah);
+ rdma_destroy_ah(ah, 0);
return ret;
}
@@ -1024,7 +1024,7 @@ static void send_handler(struct ib_mad_agent *agent,
struct ib_mad_send_wc *mad_send_wc)
{
if (mad_send_wc->send_buf->context[0])
- rdma_destroy_ah(mad_send_wc->send_buf->context[0]);
+ rdma_destroy_ah(mad_send_wc->send_buf->context[0], 0);
ib_free_send_mad(mad_send_wc->send_buf);
}
@@ -1079,7 +1079,7 @@ void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev)
}
if (dev->sm_ah[p])
- rdma_destroy_ah(dev->sm_ah[p]);
+ rdma_destroy_ah(dev->sm_ah[p], 0);
}
}
@@ -1411,7 +1411,7 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
sqp_mad = (struct mlx4_mad_snd_buf *) (sqp->tx_ring[wire_tx_ix].buf.addr);
if (sqp->tx_ring[wire_tx_ix].ah)
- rdma_destroy_ah(sqp->tx_ring[wire_tx_ix].ah);
+ rdma_destroy_ah(sqp->tx_ring[wire_tx_ix].ah, 0);
sqp->tx_ring[wire_tx_ix].ah = ah;
ib_dma_sync_single_for_cpu(&dev->ib_dev,
sqp->tx_ring[wire_tx_ix].buf.map,
@@ -1450,7 +1450,7 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
spin_unlock(&sqp->tx_lock);
sqp->tx_ring[wire_tx_ix].ah = NULL;
out:
- mlx4_ib_destroy_ah(ah);
+ mlx4_ib_destroy_ah(ah, 0);
return ret;
}
@@ -1716,7 +1716,7 @@ static void mlx4_ib_free_pv_qp_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
tx_buf_size, DMA_TO_DEVICE);
kfree(tun_qp->tx_ring[i].buf.addr);
if (tun_qp->tx_ring[i].ah)
- rdma_destroy_ah(tun_qp->tx_ring[i].ah);
+ rdma_destroy_ah(tun_qp->tx_ring[i].ah, 0);
}
kfree(tun_qp->tx_ring);
kfree(tun_qp->ring);
@@ -1749,7 +1749,7 @@ static void mlx4_ib_tunnel_comp_worker(struct work_struct *work)
"wrid=0x%llx, status=0x%x\n",
wc.wr_id, wc.status);
rdma_destroy_ah(tun_qp->tx_ring[wc.wr_id &
- (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
+ (MLX4_NUM_TUNNEL_BUFS - 1)].ah, 0);
tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
= NULL;
spin_lock(&tun_qp->tx_lock);
@@ -1766,7 +1766,7 @@ static void mlx4_ib_tunnel_comp_worker(struct work_struct *work)
ctx->slave, wc.status, wc.wr_id);
if (!MLX4_TUN_IS_RECV(wc.wr_id)) {
rdma_destroy_ah(tun_qp->tx_ring[wc.wr_id &
- (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
+ (MLX4_NUM_TUNNEL_BUFS - 1)].ah, 0);
tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
= NULL;
spin_lock(&tun_qp->tx_lock);
@@ -1903,7 +1903,7 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work)
switch (wc.opcode) {
case IB_WC_SEND:
rdma_destroy_ah(sqp->tx_ring[wc.wr_id &
- (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
+ (MLX4_NUM_TUNNEL_BUFS - 1)].ah, 0);
sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
= NULL;
spin_lock(&sqp->tx_lock);
@@ -1932,7 +1932,7 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work)
ctx->slave, wc.status, wc.wr_id);
if (!MLX4_TUN_IS_RECV(wc.wr_id)) {
rdma_destroy_ah(sqp->tx_ring[wc.wr_id &
- (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
+ (MLX4_NUM_TUNNEL_BUFS - 1)].ah, 0);
sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
= NULL;
spin_lock(&sqp->tx_lock);
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 0def2323459c..1f15ec3e2b83 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -2220,6 +2220,11 @@ static void mlx4_ib_fill_diag_counters(struct mlx4_ib_dev *ibdev,
}
}
+static const struct ib_device_ops mlx4_ib_hw_stats_ops = {
+ .alloc_hw_stats = mlx4_ib_alloc_hw_stats,
+ .get_hw_stats = mlx4_ib_get_hw_stats,
+};
+
static int mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev)
{
struct mlx4_ib_diag_counters *diag = ibdev->diag_counters;
@@ -2246,8 +2251,7 @@ static int mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev)
diag[i].offset, i);
}
- ibdev->ib_dev.get_hw_stats = mlx4_ib_get_hw_stats;
- ibdev->ib_dev.alloc_hw_stats = mlx4_ib_alloc_hw_stats;
+ ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_hw_stats_ops);
return 0;
@@ -2352,6 +2356,32 @@ static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev,
event == NETDEV_UP || event == NETDEV_CHANGE))
update_qps_port = port;
+ if (dev == iboe->netdevs[port - 1] &&
+ (event == NETDEV_UP || event == NETDEV_DOWN)) {
+ enum ib_port_state port_state;
+ struct ib_event ibev = { };
+
+ if (ib_get_cached_port_state(&ibdev->ib_dev, port,
+ &port_state))
+ continue;
+
+ if (event == NETDEV_UP &&
+ (port_state != IB_PORT_ACTIVE ||
+ iboe->last_port_state[port - 1] != IB_PORT_DOWN))
+ continue;
+ if (event == NETDEV_DOWN &&
+ (port_state != IB_PORT_DOWN ||
+ iboe->last_port_state[port - 1] != IB_PORT_ACTIVE))
+ continue;
+ iboe->last_port_state[port - 1] = port_state;
+
+ ibev.device = &ibdev->ib_dev;
+ ibev.element.port_num = port;
+ ibev.event = event == NETDEV_UP ? IB_EVENT_PORT_ACTIVE :
+ IB_EVENT_PORT_ERR;
+ ib_dispatch_event(&ibev);
+ }
+
}
spin_unlock_bh(&iboe->lock);
@@ -2499,6 +2529,88 @@ static void get_fw_ver_str(struct ib_device *device, char *str)
(int) dev->dev->caps.fw_ver & 0xffff);
}
+static const struct ib_device_ops mlx4_ib_dev_ops = {
+ .add_gid = mlx4_ib_add_gid,
+ .alloc_mr = mlx4_ib_alloc_mr,
+ .alloc_pd = mlx4_ib_alloc_pd,
+ .alloc_ucontext = mlx4_ib_alloc_ucontext,
+ .attach_mcast = mlx4_ib_mcg_attach,
+ .create_ah = mlx4_ib_create_ah,
+ .create_cq = mlx4_ib_create_cq,
+ .create_qp = mlx4_ib_create_qp,
+ .create_srq = mlx4_ib_create_srq,
+ .dealloc_pd = mlx4_ib_dealloc_pd,
+ .dealloc_ucontext = mlx4_ib_dealloc_ucontext,
+ .del_gid = mlx4_ib_del_gid,
+ .dereg_mr = mlx4_ib_dereg_mr,
+ .destroy_ah = mlx4_ib_destroy_ah,
+ .destroy_cq = mlx4_ib_destroy_cq,
+ .destroy_qp = mlx4_ib_destroy_qp,
+ .destroy_srq = mlx4_ib_destroy_srq,
+ .detach_mcast = mlx4_ib_mcg_detach,
+ .disassociate_ucontext = mlx4_ib_disassociate_ucontext,
+ .drain_rq = mlx4_ib_drain_rq,
+ .drain_sq = mlx4_ib_drain_sq,
+ .get_dev_fw_str = get_fw_ver_str,
+ .get_dma_mr = mlx4_ib_get_dma_mr,
+ .get_link_layer = mlx4_ib_port_link_layer,
+ .get_netdev = mlx4_ib_get_netdev,
+ .get_port_immutable = mlx4_port_immutable,
+ .map_mr_sg = mlx4_ib_map_mr_sg,
+ .mmap = mlx4_ib_mmap,
+ .modify_cq = mlx4_ib_modify_cq,
+ .modify_device = mlx4_ib_modify_device,
+ .modify_port = mlx4_ib_modify_port,
+ .modify_qp = mlx4_ib_modify_qp,
+ .modify_srq = mlx4_ib_modify_srq,
+ .poll_cq = mlx4_ib_poll_cq,
+ .post_recv = mlx4_ib_post_recv,
+ .post_send = mlx4_ib_post_send,
+ .post_srq_recv = mlx4_ib_post_srq_recv,
+ .process_mad = mlx4_ib_process_mad,
+ .query_ah = mlx4_ib_query_ah,
+ .query_device = mlx4_ib_query_device,
+ .query_gid = mlx4_ib_query_gid,
+ .query_pkey = mlx4_ib_query_pkey,
+ .query_port = mlx4_ib_query_port,
+ .query_qp = mlx4_ib_query_qp,
+ .query_srq = mlx4_ib_query_srq,
+ .reg_user_mr = mlx4_ib_reg_user_mr,
+ .req_notify_cq = mlx4_ib_arm_cq,
+ .rereg_user_mr = mlx4_ib_rereg_user_mr,
+ .resize_cq = mlx4_ib_resize_cq,
+};
+
+static const struct ib_device_ops mlx4_ib_dev_wq_ops = {
+ .create_rwq_ind_table = mlx4_ib_create_rwq_ind_table,
+ .create_wq = mlx4_ib_create_wq,
+ .destroy_rwq_ind_table = mlx4_ib_destroy_rwq_ind_table,
+ .destroy_wq = mlx4_ib_destroy_wq,
+ .modify_wq = mlx4_ib_modify_wq,
+};
+
+static const struct ib_device_ops mlx4_ib_dev_fmr_ops = {
+ .alloc_fmr = mlx4_ib_fmr_alloc,
+ .dealloc_fmr = mlx4_ib_fmr_dealloc,
+ .map_phys_fmr = mlx4_ib_map_phys_fmr,
+ .unmap_fmr = mlx4_ib_unmap_fmr,
+};
+
+static const struct ib_device_ops mlx4_ib_dev_mw_ops = {
+ .alloc_mw = mlx4_ib_alloc_mw,
+ .dealloc_mw = mlx4_ib_dealloc_mw,
+};
+
+static const struct ib_device_ops mlx4_ib_dev_xrc_ops = {
+ .alloc_xrcd = mlx4_ib_alloc_xrcd,
+ .dealloc_xrcd = mlx4_ib_dealloc_xrcd,
+};
+
+static const struct ib_device_ops mlx4_ib_dev_fs_ops = {
+ .create_flow = mlx4_ib_create_flow,
+ .destroy_flow = mlx4_ib_destroy_flow,
+};
+
static void *mlx4_ib_add(struct mlx4_dev *dev)
{
struct mlx4_ib_dev *ibdev;
@@ -2554,9 +2666,6 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
1 : ibdev->num_ports;
ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors;
ibdev->ib_dev.dev.parent = &dev->persist->pdev->dev;
- ibdev->ib_dev.get_netdev = mlx4_ib_get_netdev;
- ibdev->ib_dev.add_gid = mlx4_ib_add_gid;
- ibdev->ib_dev.del_gid = mlx4_ib_del_gid;
if (dev->caps.userspace_caps)
ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION;
@@ -2589,116 +2698,53 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
(1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) |
(1ull << IB_USER_VERBS_CMD_OPEN_QP);
- ibdev->ib_dev.query_device = mlx4_ib_query_device;
- ibdev->ib_dev.query_port = mlx4_ib_query_port;
- ibdev->ib_dev.get_link_layer = mlx4_ib_port_link_layer;
- ibdev->ib_dev.query_gid = mlx4_ib_query_gid;
- ibdev->ib_dev.query_pkey = mlx4_ib_query_pkey;
- ibdev->ib_dev.modify_device = mlx4_ib_modify_device;
- ibdev->ib_dev.modify_port = mlx4_ib_modify_port;
- ibdev->ib_dev.alloc_ucontext = mlx4_ib_alloc_ucontext;
- ibdev->ib_dev.dealloc_ucontext = mlx4_ib_dealloc_ucontext;
- ibdev->ib_dev.mmap = mlx4_ib_mmap;
- ibdev->ib_dev.alloc_pd = mlx4_ib_alloc_pd;
- ibdev->ib_dev.dealloc_pd = mlx4_ib_dealloc_pd;
- ibdev->ib_dev.create_ah = mlx4_ib_create_ah;
- ibdev->ib_dev.query_ah = mlx4_ib_query_ah;
- ibdev->ib_dev.destroy_ah = mlx4_ib_destroy_ah;
- ibdev->ib_dev.create_srq = mlx4_ib_create_srq;
- ibdev->ib_dev.modify_srq = mlx4_ib_modify_srq;
- ibdev->ib_dev.query_srq = mlx4_ib_query_srq;
- ibdev->ib_dev.destroy_srq = mlx4_ib_destroy_srq;
- ibdev->ib_dev.post_srq_recv = mlx4_ib_post_srq_recv;
- ibdev->ib_dev.create_qp = mlx4_ib_create_qp;
- ibdev->ib_dev.modify_qp = mlx4_ib_modify_qp;
- ibdev->ib_dev.query_qp = mlx4_ib_query_qp;
- ibdev->ib_dev.destroy_qp = mlx4_ib_destroy_qp;
- ibdev->ib_dev.drain_sq = mlx4_ib_drain_sq;
- ibdev->ib_dev.drain_rq = mlx4_ib_drain_rq;
- ibdev->ib_dev.post_send = mlx4_ib_post_send;
- ibdev->ib_dev.post_recv = mlx4_ib_post_recv;
- ibdev->ib_dev.create_cq = mlx4_ib_create_cq;
- ibdev->ib_dev.modify_cq = mlx4_ib_modify_cq;
- ibdev->ib_dev.resize_cq = mlx4_ib_resize_cq;
- ibdev->ib_dev.destroy_cq = mlx4_ib_destroy_cq;
- ibdev->ib_dev.poll_cq = mlx4_ib_poll_cq;
- ibdev->ib_dev.req_notify_cq = mlx4_ib_arm_cq;
- ibdev->ib_dev.get_dma_mr = mlx4_ib_get_dma_mr;
- ibdev->ib_dev.reg_user_mr = mlx4_ib_reg_user_mr;
- ibdev->ib_dev.rereg_user_mr = mlx4_ib_rereg_user_mr;
- ibdev->ib_dev.dereg_mr = mlx4_ib_dereg_mr;
- ibdev->ib_dev.alloc_mr = mlx4_ib_alloc_mr;
- ibdev->ib_dev.map_mr_sg = mlx4_ib_map_mr_sg;
- ibdev->ib_dev.attach_mcast = mlx4_ib_mcg_attach;
- ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach;
- ibdev->ib_dev.process_mad = mlx4_ib_process_mad;
- ibdev->ib_dev.get_port_immutable = mlx4_port_immutable;
- ibdev->ib_dev.get_dev_fw_str = get_fw_ver_str;
- ibdev->ib_dev.disassociate_ucontext = mlx4_ib_disassociate_ucontext;
-
+ ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_ops);
ibdev->ib_dev.uverbs_ex_cmd_mask |=
- (1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ);
+ (1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ) |
+ (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) |
+ (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) |
+ (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP);
if ((dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS) &&
((mlx4_ib_port_link_layer(&ibdev->ib_dev, 1) ==
IB_LINK_LAYER_ETHERNET) ||
(mlx4_ib_port_link_layer(&ibdev->ib_dev, 2) ==
IB_LINK_LAYER_ETHERNET))) {
- ibdev->ib_dev.create_wq = mlx4_ib_create_wq;
- ibdev->ib_dev.modify_wq = mlx4_ib_modify_wq;
- ibdev->ib_dev.destroy_wq = mlx4_ib_destroy_wq;
- ibdev->ib_dev.create_rwq_ind_table =
- mlx4_ib_create_rwq_ind_table;
- ibdev->ib_dev.destroy_rwq_ind_table =
- mlx4_ib_destroy_rwq_ind_table;
ibdev->ib_dev.uverbs_ex_cmd_mask |=
(1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) |
(1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) |
(1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
(1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
(1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
+ ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_wq_ops);
}
- if (!mlx4_is_slave(ibdev->dev)) {
- ibdev->ib_dev.alloc_fmr = mlx4_ib_fmr_alloc;
- ibdev->ib_dev.map_phys_fmr = mlx4_ib_map_phys_fmr;
- ibdev->ib_dev.unmap_fmr = mlx4_ib_unmap_fmr;
- ibdev->ib_dev.dealloc_fmr = mlx4_ib_fmr_dealloc;
- }
+ if (!mlx4_is_slave(ibdev->dev))
+ ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_fmr_ops);
if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW ||
dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) {
- ibdev->ib_dev.alloc_mw = mlx4_ib_alloc_mw;
- ibdev->ib_dev.dealloc_mw = mlx4_ib_dealloc_mw;
-
ibdev->ib_dev.uverbs_cmd_mask |=
(1ull << IB_USER_VERBS_CMD_ALLOC_MW) |
(1ull << IB_USER_VERBS_CMD_DEALLOC_MW);
+ ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_mw_ops);
}
if (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) {
- ibdev->ib_dev.alloc_xrcd = mlx4_ib_alloc_xrcd;
- ibdev->ib_dev.dealloc_xrcd = mlx4_ib_dealloc_xrcd;
ibdev->ib_dev.uverbs_cmd_mask |=
(1ull << IB_USER_VERBS_CMD_OPEN_XRCD) |
(1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
+ ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_xrc_ops);
}
if (check_flow_steering_support(dev)) {
ibdev->steering_support = MLX4_STEERING_MODE_DEVICE_MANAGED;
- ibdev->ib_dev.create_flow = mlx4_ib_create_flow;
- ibdev->ib_dev.destroy_flow = mlx4_ib_destroy_flow;
-
ibdev->ib_dev.uverbs_ex_cmd_mask |=
(1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
(1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW);
+ ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_fs_ops);
}
- ibdev->ib_dev.uverbs_ex_cmd_mask |=
- (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) |
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) |
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP);
-
mlx4_ib_alloc_eqs(dev, ibdev);
spin_lock_init(&iboe->lock);
@@ -2710,6 +2756,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
for (i = 0; i < ibdev->num_ports; ++i) {
mutex_init(&ibdev->counters_table[i].mutex);
INIT_LIST_HEAD(&ibdev->counters_table[i].counters_list);
+ iboe->last_port_state[i] = IB_PORT_DOWN;
}
num_req_counters = mlx4_is_bonded(dev) ? 1 : ibdev->num_ports;
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 8850dfc3826d..e491f3eda6e7 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -519,6 +519,7 @@ struct mlx4_ib_iboe {
atomic64_t mac[MLX4_MAX_PORTS];
struct notifier_block nb;
struct mlx4_port_gid_table gids[MLX4_MAX_PORTS];
+ enum ib_port_state last_port_state[MLX4_MAX_PORTS];
};
struct pkey_mgt {
@@ -753,13 +754,13 @@ void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq)
void mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq);
struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
- struct ib_udata *udata);
+ u32 flags, struct ib_udata *udata);
struct ib_ah *mlx4_ib_create_ah_slave(struct ib_pd *pd,
struct rdma_ah_attr *ah_attr,
int slave_sgid_index, u8 *s_mac,
u16 vlan_tag);
int mlx4_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
-int mlx4_ib_destroy_ah(struct ib_ah *ah);
+int mlx4_ib_destroy_ah(struct ib_ah *ah, u32 flags);
struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
struct ib_srq_init_attr *init_attr,
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 0711ca1dfb8f..971e9a9ebdaf 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -323,7 +323,7 @@ static int send_wqe_overhead(enum mlx4_ib_qp_type type, u32 flags)
}
static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
- int is_user, int has_rq, struct mlx4_ib_qp *qp,
+ bool is_user, int has_rq, struct mlx4_ib_qp *qp,
u32 inl_recv_sz)
{
/* Sanity check RQ size before proceeding */
@@ -401,7 +401,7 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
* We need to leave 2 KB + 1 WR of headroom in the SQ to
* allow HW to prefetch.
*/
- qp->sq_spare_wqes = (2048 >> qp->sq.wqe_shift) + 1;
+ qp->sq_spare_wqes = MLX4_IB_SQ_HEADROOM(qp->sq.wqe_shift);
qp->sq.wqe_cnt = roundup_pow_of_two(cap->max_send_wr +
qp->sq_spare_wqes);
@@ -942,7 +942,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
- if (pd->uobject) {
+ if (udata) {
union {
struct mlx4_ib_create_qp qp;
struct mlx4_ib_create_wq wq;
@@ -991,7 +991,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
qp->flags |= MLX4_IB_QP_SCATTER_FCS;
}
- err = set_rq_size(dev, &init_attr->cap, !!pd->uobject,
+ err = set_rq_size(dev, &init_attr->cap, udata,
qp_has_rq(init_attr), qp, qp->inl_recv_sz);
if (err)
goto err;
@@ -1043,7 +1043,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
}
qp->mqp.usage = MLX4_RES_USAGE_USER_VERBS;
} else {
- err = set_rq_size(dev, &init_attr->cap, !!pd->uobject,
+ err = set_rq_size(dev, &init_attr->cap, udata,
qp_has_rq(init_attr), qp, 0);
if (err)
goto err;
@@ -1189,7 +1189,7 @@ err_proxy:
if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI)
free_proxy_bufs(pd->device, qp);
err_wrid:
- if (pd->uobject) {
+ if (udata) {
if (qp_has_rq(init_attr))
mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &qp->db);
} else {
@@ -1201,20 +1201,20 @@ err_mtt:
mlx4_mtt_cleanup(dev->dev, &qp->mtt);
err_buf:
- if (pd->uobject)
+ if (qp->umem)
ib_umem_release(qp->umem);
else
mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf);
err_db:
- if (!pd->uobject && qp_has_rq(init_attr))
+ if (!udata && qp_has_rq(init_attr))
mlx4_db_free(dev->dev, &qp->db);
err:
- if (sqp)
- kfree(sqp);
- else if (!*caller_qp)
+ if (!sqp && !*caller_qp)
kfree(qp);
+ kfree(sqp);
+
return err;
}
@@ -1332,7 +1332,7 @@ static void destroy_qp_rss(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
}
static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
- enum mlx4_ib_source_type src, int is_user)
+ enum mlx4_ib_source_type src, bool is_user)
{
struct mlx4_ib_cq *send_cq, *recv_cq;
unsigned long flags;
@@ -1609,10 +1609,7 @@ static int _mlx4_ib_destroy_qp(struct ib_qp *qp)
if (qp->rwq_ind_tbl) {
destroy_qp_rss(dev, mqp);
} else {
- struct mlx4_ib_pd *pd;
-
- pd = get_pd(mqp);
- destroy_qp_common(dev, mqp, MLX4_IB_QP_SRC, !!pd->ibpd.uobject);
+ destroy_qp_common(dev, mqp, MLX4_IB_QP_SRC, qp->uobject);
}
if (is_sqp(dev, mqp))
@@ -4044,7 +4041,7 @@ struct ib_wq *mlx4_ib_create_wq(struct ib_pd *pd,
struct mlx4_ib_create_wq ucmd;
int err, required_cmd_sz;
- if (!(udata && pd->uobject))
+ if (!udata)
return ERR_PTR(-EINVAL);
required_cmd_sz = offsetof(typeof(ucmd), comp_mask) +
diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c
index 3731b31c3653..4456f1b8921d 100644
--- a/drivers/infiniband/hw/mlx4/srq.c
+++ b/drivers/infiniband/hw/mlx4/srq.c
@@ -105,7 +105,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
buf_size = srq->msrq.max * desc_size;
- if (pd->uobject) {
+ if (udata) {
struct mlx4_ib_create_srq ucmd;
if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
@@ -191,7 +191,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
srq->msrq.event = mlx4_ib_srq_event;
srq->ibsrq.ext.xrc.srq_num = srq->msrq.srqn;
- if (pd->uobject)
+ if (udata)
if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof (__u32))) {
err = -EFAULT;
goto err_wrid;
@@ -202,7 +202,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
return &srq->ibsrq;
err_wrid:
- if (pd->uobject)
+ if (udata)
mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &srq->db);
else
kvfree(srq->wrid);
@@ -211,13 +211,13 @@ err_mtt:
mlx4_mtt_cleanup(dev->dev, &srq->mtt);
err_buf:
- if (pd->uobject)
+ if (srq->umem)
ib_umem_release(srq->umem);
else
mlx4_buf_free(dev->dev, buf_size, &srq->buf);
err_db:
- if (!pd->uobject)
+ if (!udata)
mlx4_db_free(dev->dev, &srq->db);
err_srq:
diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c
index 752bdd536130..ea1f3a081b05 100644
--- a/drivers/infiniband/hw/mlx4/sysfs.c
+++ b/drivers/infiniband/hw/mlx4/sysfs.c
@@ -353,16 +353,12 @@ err:
static void get_name(struct mlx4_ib_dev *dev, char *name, int i, int max)
{
- char base_name[9];
-
- /* pci_name format is: bus:dev:func -> xxxx:yy:zz.n */
- strlcpy(name, pci_name(dev->dev->persist->pdev), max);
- strncpy(base_name, name, 8); /*till xxxx:yy:*/
- base_name[8] = '\0';
- /* with no ARI only 3 last bits are used so when the fn is higher than 8
+ /* pci_name format is: bus:dev:func -> xxxx:yy:zz.n
+ * with no ARI only 3 last bits are used so when the fn is higher than 8
* need to add it to the dev num, so count in the last number will be
* modulo 8 */
- sprintf(name, "%s%.2d.%d", base_name, (i/8), (i%8));
+ snprintf(name, max, "%.8s%.2d.%d", pci_name(dev->dev->persist->pdev),
+ i / 8, i % 8);
}
struct mlx4_port {
diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c
index ffd03bf1a71e..420ae0897333 100644
--- a/drivers/infiniband/hw/mlx5/ah.c
+++ b/drivers/infiniband/hw/mlx5/ah.c
@@ -72,7 +72,7 @@ static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev,
}
struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
- struct ib_udata *udata)
+ u32 flags, struct ib_udata *udata)
{
struct mlx5_ib_ah *ah;
@@ -131,7 +131,7 @@ int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr)
return 0;
}
-int mlx5_ib_destroy_ah(struct ib_ah *ah)
+int mlx5_ib_destroy_ah(struct ib_ah *ah, u32 flags)
{
kfree(to_mah(ah));
return 0;
diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c
index ca060a2e2b36..356bccc715ee 100644
--- a/drivers/infiniband/hw/mlx5/cmd.c
+++ b/drivers/infiniband/hw/mlx5/cmd.c
@@ -240,6 +240,7 @@ int mlx5_cmd_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn,
MLX5_SET(alloc_transport_domain_in, in, opcode,
MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN);
+ MLX5_SET(alloc_transport_domain_in, in, uid, uid);
err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
if (!err)
@@ -257,6 +258,7 @@ void mlx5_cmd_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn,
MLX5_SET(dealloc_transport_domain_in, in, opcode,
MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN);
+ MLX5_SET(dealloc_transport_domain_in, in, uid, uid);
MLX5_SET(dealloc_transport_domain_in, in, transport_domain, tdn);
mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
}
@@ -326,3 +328,20 @@ int mlx5_cmd_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn, u16 uid)
MLX5_SET(dealloc_xrcd_in, in, uid, uid);
return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
}
+
+int mlx5_cmd_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id,
+ u16 uid)
+{
+ u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {0};
+ int err;
+
+ MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER);
+ MLX5_SET(alloc_q_counter_in, in, uid, uid);
+
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (!err)
+ *counter_id = MLX5_GET(alloc_q_counter_out, out,
+ counter_set_id);
+ return err;
+}
diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h
index c03c56455534..1e76dc67a369 100644
--- a/drivers/infiniband/hw/mlx5/cmd.h
+++ b/drivers/infiniband/hw/mlx5/cmd.h
@@ -61,4 +61,6 @@ int mlx5_cmd_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid,
u32 qpn, u16 uid);
int mlx5_cmd_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn, u16 uid);
int mlx5_cmd_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn, u16 uid);
+int mlx5_cmd_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id,
+ u16 uid);
#endif /* MLX5_IB_CMD_H */
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 26ab9041f94a..90f1b0bae5b5 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -330,67 +330,6 @@ static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev,
dump_cqe(dev, cqe);
}
-static int is_atomic_response(struct mlx5_ib_qp *qp, uint16_t idx)
-{
- /* TBD: waiting decision
- */
- return 0;
-}
-
-static void *mlx5_get_atomic_laddr(struct mlx5_ib_qp *qp, uint16_t idx)
-{
- struct mlx5_wqe_data_seg *dpseg;
- void *addr;
-
- dpseg = mlx5_get_send_wqe(qp, idx) + sizeof(struct mlx5_wqe_ctrl_seg) +
- sizeof(struct mlx5_wqe_raddr_seg) +
- sizeof(struct mlx5_wqe_atomic_seg);
- addr = (void *)(unsigned long)be64_to_cpu(dpseg->addr);
- return addr;
-}
-
-static void handle_atomic(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64,
- uint16_t idx)
-{
- void *addr;
- int byte_count;
- int i;
-
- if (!is_atomic_response(qp, idx))
- return;
-
- byte_count = be32_to_cpu(cqe64->byte_cnt);
- addr = mlx5_get_atomic_laddr(qp, idx);
-
- if (byte_count == 4) {
- *(uint32_t *)addr = be32_to_cpu(*((__be32 *)addr));
- } else {
- for (i = 0; i < byte_count; i += 8) {
- *(uint64_t *)addr = be64_to_cpu(*((__be64 *)addr));
- addr += 8;
- }
- }
-
- return;
-}
-
-static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64,
- u16 tail, u16 head)
-{
- u16 idx;
-
- do {
- idx = tail & (qp->sq.wqe_cnt - 1);
- handle_atomic(qp, cqe64, idx);
- if (idx == head)
- break;
-
- tail = qp->sq.w_list[idx].next;
- } while (1);
- tail = qp->sq.w_list[idx].next;
- qp->sq.last_poll = tail;
-}
-
static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf)
{
mlx5_frag_buf_free(dev->mdev, &buf->frag_buf);
@@ -428,45 +367,15 @@ static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe,
item->key = be32_to_cpu(cqe->mkey);
}
-static void sw_send_comp(struct mlx5_ib_qp *qp, int num_entries,
- struct ib_wc *wc, int *npolled)
-{
- struct mlx5_ib_wq *wq;
- unsigned int cur;
- unsigned int idx;
- int np;
- int i;
-
- wq = &qp->sq;
- cur = wq->head - wq->tail;
- np = *npolled;
-
- if (cur == 0)
- return;
-
- for (i = 0; i < cur && np < num_entries; i++) {
- idx = wq->last_poll & (wq->wqe_cnt - 1);
- wc->wr_id = wq->wrid[idx];
- wc->status = IB_WC_WR_FLUSH_ERR;
- wc->vendor_err = MLX5_CQE_SYNDROME_WR_FLUSH_ERR;
- wq->tail++;
- np++;
- wc->qp = &qp->ibqp;
- wc++;
- wq->last_poll = wq->w_list[idx].next;
- }
- *npolled = np;
-}
-
-static void sw_recv_comp(struct mlx5_ib_qp *qp, int num_entries,
- struct ib_wc *wc, int *npolled)
+static void sw_comp(struct mlx5_ib_qp *qp, int num_entries, struct ib_wc *wc,
+ int *npolled, int is_send)
{
struct mlx5_ib_wq *wq;
unsigned int cur;
int np;
int i;
- wq = &qp->rq;
+ wq = (is_send) ? &qp->sq : &qp->rq;
cur = wq->head - wq->tail;
np = *npolled;
@@ -493,13 +402,13 @@ static void mlx5_ib_poll_sw_comp(struct mlx5_ib_cq *cq, int num_entries,
*npolled = 0;
/* Find uncompleted WQEs belonging to that cq and return mmics ones */
list_for_each_entry(qp, &cq->list_send_qp, cq_send_list) {
- sw_send_comp(qp, num_entries, wc + *npolled, npolled);
+ sw_comp(qp, num_entries, wc + *npolled, npolled, true);
if (*npolled >= num_entries)
return;
}
list_for_each_entry(qp, &cq->list_recv_qp, cq_recv_list) {
- sw_recv_comp(qp, num_entries, wc + *npolled, npolled);
+ sw_comp(qp, num_entries, wc + *npolled, npolled, false);
if (*npolled >= num_entries)
return;
}
@@ -567,7 +476,6 @@ repoll:
wqe_ctr = be16_to_cpu(cqe64->wqe_counter);
idx = wqe_ctr & (wq->wqe_cnt - 1);
handle_good_req(wc, cqe64, wq, idx);
- handle_atomics(*cur_qp, cqe64, wq->last_poll, idx);
wc->wr_id = wq->wrid[idx];
wq->tail = wq->wqe_head[idx] + 1;
wc->status = IB_WC_SUCCESS;
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index 45c421c87100..5a588f3cfb1b 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -9,6 +9,7 @@
#include <rdma/uverbs_ioctl.h>
#include <rdma/mlx5_user_ioctl_cmds.h>
#include <rdma/ib_umem.h>
+#include <rdma/uverbs_std_types.h>
#include <linux/mlx5/driver.h>
#include <linux/mlx5/fs.h>
#include "mlx5_ib.h"
@@ -40,29 +41,32 @@ struct devx_umem_reg_cmd {
u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
};
-static struct mlx5_ib_ucontext *devx_ufile2uctx(struct ib_uverbs_file *file)
+static struct mlx5_ib_ucontext *
+devx_ufile2uctx(const struct uverbs_attr_bundle *attrs)
{
- return to_mucontext(ib_uverbs_get_ucontext(file));
+ return to_mucontext(ib_uverbs_get_ucontext(attrs));
}
-int mlx5_ib_devx_create(struct mlx5_ib_dev *dev)
+int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user)
{
u32 in[MLX5_ST_SZ_DW(create_uctx_in)] = {0};
u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0};
- u64 general_obj_types;
- void *hdr;
+ void *uctx;
int err;
u16 uid;
+ u32 cap = 0;
- hdr = MLX5_ADDR_OF(create_uctx_in, in, hdr);
-
- general_obj_types = MLX5_CAP_GEN_64(dev->mdev, general_obj_types);
- if (!(general_obj_types & MLX5_GENERAL_OBJ_TYPES_CAP_UCTX) ||
- !(general_obj_types & MLX5_GENERAL_OBJ_TYPES_CAP_UMEM))
+ /* 0 means not supported */
+ if (!MLX5_CAP_GEN(dev->mdev, log_max_uctx))
return -EINVAL;
- MLX5_SET(general_obj_in_cmd_hdr, hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
- MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_type, MLX5_OBJ_TYPE_UCTX);
+ uctx = MLX5_ADDR_OF(create_uctx_in, in, uctx);
+ if (is_user && capable(CAP_NET_RAW) &&
+ (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RAW_TX))
+ cap |= MLX5_UCTX_CAP_RAW_TX;
+
+ MLX5_SET(create_uctx_in, in, opcode, MLX5_CMD_OP_CREATE_UCTX);
+ MLX5_SET(uctx, uctx, cap, cap);
err = mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
if (err)
@@ -74,12 +78,11 @@ int mlx5_ib_devx_create(struct mlx5_ib_dev *dev)
void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid)
{
- u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {0};
+ u32 in[MLX5_ST_SZ_DW(destroy_uctx_in)] = {0};
u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0};
- MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
- MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_UCTX);
- MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, uid);
+ MLX5_SET(destroy_uctx_in, in, opcode, MLX5_CMD_OP_DESTROY_UCTX);
+ MLX5_SET(destroy_uctx_in, in, uid, uid);
mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
}
@@ -106,6 +109,21 @@ bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type)
}
}
+bool mlx5_ib_devx_is_flow_counter(void *obj, u32 *counter_id)
+{
+ struct devx_obj *devx_obj = obj;
+ u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode);
+
+ if (opcode == MLX5_CMD_OP_DEALLOC_FLOW_COUNTER) {
+ *counter_id = MLX5_GET(dealloc_flow_counter_in,
+ devx_obj->dinbox,
+ flow_counter_id);
+ return true;
+ }
+
+ return false;
+}
+
/*
* As the obj_id in the firmware is not globally unique the object type
* must be considered upon checking for a valid object id.
@@ -116,7 +134,7 @@ static u64 get_enc_obj_id(u16 opcode, u32 obj_id)
return ((u64)opcode << 32) | obj_id;
}
-static int devx_is_valid_obj_id(struct devx_obj *obj, const void *in)
+static u64 devx_get_obj_id(const void *in)
{
u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
u64 obj_id;
@@ -290,6 +308,8 @@ static int devx_is_valid_obj_id(struct devx_obj *obj, const void *in)
MLX5_GET(query_dct_in, in, dctn));
break;
case MLX5_CMD_OP_QUERY_XRQ:
+ case MLX5_CMD_OP_QUERY_XRQ_DC_PARAMS_ENTRY:
+ case MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS:
obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRQ,
MLX5_GET(query_xrq_in, in, xrqn));
break;
@@ -316,17 +336,107 @@ static int devx_is_valid_obj_id(struct devx_obj *obj, const void *in)
MLX5_GET(drain_dct_in, in, dctn));
break;
case MLX5_CMD_OP_ARM_XRQ:
+ case MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY:
obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRQ,
MLX5_GET(arm_xrq_in, in, xrqn));
break;
+ case MLX5_CMD_OP_QUERY_PACKET_REFORMAT_CONTEXT:
+ obj_id = get_enc_obj_id
+ (MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT,
+ MLX5_GET(query_packet_reformat_context_in,
+ in, packet_reformat_id));
+ break;
default:
+ obj_id = 0;
+ }
+
+ return obj_id;
+}
+
+static bool devx_is_valid_obj_id(struct ib_uobject *uobj, const void *in)
+{
+ u64 obj_id = devx_get_obj_id(in);
+
+ if (!obj_id)
return false;
+
+ switch (uobj_get_object_id(uobj)) {
+ case UVERBS_OBJECT_CQ:
+ return get_enc_obj_id(MLX5_CMD_OP_CREATE_CQ,
+ to_mcq(uobj->object)->mcq.cqn) ==
+ obj_id;
+
+ case UVERBS_OBJECT_SRQ:
+ {
+ struct mlx5_core_srq *srq = &(to_msrq(uobj->object)->msrq);
+ struct mlx5_ib_dev *dev = to_mdev(uobj->context->device);
+ u16 opcode;
+
+ switch (srq->common.res) {
+ case MLX5_RES_XSRQ:
+ opcode = MLX5_CMD_OP_CREATE_XRC_SRQ;
+ break;
+ case MLX5_RES_XRQ:
+ opcode = MLX5_CMD_OP_CREATE_XRQ;
+ break;
+ default:
+ if (!dev->mdev->issi)
+ opcode = MLX5_CMD_OP_CREATE_SRQ;
+ else
+ opcode = MLX5_CMD_OP_CREATE_RMP;
+ }
+
+ return get_enc_obj_id(opcode,
+ to_msrq(uobj->object)->msrq.srqn) ==
+ obj_id;
}
- if (obj_id == obj->obj_id)
- return true;
+ case UVERBS_OBJECT_QP:
+ {
+ struct mlx5_ib_qp *qp = to_mqp(uobj->object);
+ enum ib_qp_type qp_type = qp->ibqp.qp_type;
+
+ if (qp_type == IB_QPT_RAW_PACKET ||
+ (qp->flags & MLX5_IB_QP_UNDERLAY)) {
+ struct mlx5_ib_raw_packet_qp *raw_packet_qp =
+ &qp->raw_packet_qp;
+ struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
+ struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
+
+ return (get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ,
+ rq->base.mqp.qpn) == obj_id ||
+ get_enc_obj_id(MLX5_CMD_OP_CREATE_SQ,
+ sq->base.mqp.qpn) == obj_id ||
+ get_enc_obj_id(MLX5_CMD_OP_CREATE_TIR,
+ rq->tirn) == obj_id ||
+ get_enc_obj_id(MLX5_CMD_OP_CREATE_TIS,
+ sq->tisn) == obj_id);
+ }
+
+ if (qp_type == MLX5_IB_QPT_DCT)
+ return get_enc_obj_id(MLX5_CMD_OP_CREATE_DCT,
+ qp->dct.mdct.mqp.qpn) == obj_id;
+
+ return get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
+ qp->ibqp.qp_num) == obj_id;
+ }
- return false;
+ case UVERBS_OBJECT_WQ:
+ return get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ,
+ to_mrwq(uobj->object)->core_qp.qpn) ==
+ obj_id;
+
+ case UVERBS_OBJECT_RWQ_IND_TBL:
+ return get_enc_obj_id(MLX5_CMD_OP_CREATE_RQT,
+ to_mrwq_ind_table(uobj->object)->rqtn) ==
+ obj_id;
+
+ case MLX5_IB_OBJECT_DEVX_OBJ:
+ return ((struct devx_obj *)uobj->object)->obj_id == obj_id;
+
+ default:
+ return false;
+ }
}
static void devx_set_umem_valid(const void *in)
@@ -494,6 +604,7 @@ static bool devx_is_obj_modify_cmd(const void *in)
case MLX5_CMD_OP_DRAIN_DCT:
case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION:
case MLX5_CMD_OP_ARM_XRQ:
+ case MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY:
return true;
case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
{
@@ -535,6 +646,9 @@ static bool devx_is_obj_query_cmd(const void *in)
case MLX5_CMD_OP_QUERY_XRC_SRQ:
case MLX5_CMD_OP_QUERY_DCT:
case MLX5_CMD_OP_QUERY_XRQ:
+ case MLX5_CMD_OP_QUERY_XRQ_DC_PARAMS_ENTRY:
+ case MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS:
+ case MLX5_CMD_OP_QUERY_PACKET_REFORMAT_CONTEXT:
return true;
default:
return false;
@@ -572,15 +686,16 @@ static int devx_get_uid(struct mlx5_ib_ucontext *c, void *cmd_in)
if (!c->devx_uid)
return -EINVAL;
- if (!capable(CAP_NET_RAW))
- return -EPERM;
-
return c->devx_uid;
}
static bool devx_is_general_cmd(void *in)
{
u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
+ if (opcode >= MLX5_CMD_OP_GENERAL_START &&
+ opcode < MLX5_CMD_OP_GENERAL_END)
+ return true;
+
switch (opcode) {
case MLX5_CMD_OP_QUERY_HCA_CAP:
case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT:
@@ -603,7 +718,7 @@ static bool devx_is_general_cmd(void *in)
}
static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)(
- struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+ struct uverbs_attr_bundle *attrs)
{
struct mlx5_ib_ucontext *c;
struct mlx5_ib_dev *dev;
@@ -616,7 +731,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)(
MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC))
return -EFAULT;
- c = devx_ufile2uctx(file);
+ c = devx_ufile2uctx(attrs);
if (IS_ERR(c))
return PTR_ERR(c);
dev = to_mdev(c->ibucontext.device);
@@ -653,14 +768,14 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)(
* queue or arm its CQ for event generation), no further harm is expected.
*/
static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_UAR)(
- struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+ struct uverbs_attr_bundle *attrs)
{
struct mlx5_ib_ucontext *c;
struct mlx5_ib_dev *dev;
u32 user_idx;
s32 dev_idx;
- c = devx_ufile2uctx(file);
+ c = devx_ufile2uctx(attrs);
if (IS_ERR(c))
return PTR_ERR(c);
dev = to_mdev(c->ibucontext.device);
@@ -681,7 +796,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_UAR)(
}
static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)(
- struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+ struct uverbs_attr_bundle *attrs)
{
struct mlx5_ib_ucontext *c;
struct mlx5_ib_dev *dev;
@@ -693,7 +808,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)(
int err;
int uid;
- c = devx_ufile2uctx(file);
+ c = devx_ufile2uctx(attrs);
if (IS_ERR(c))
return PTR_ERR(c);
dev = to_mdev(c->ibucontext.device);
@@ -740,6 +855,10 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din,
MLX5_SET(general_obj_in_cmd_hdr, din, obj_type, obj_type);
break;
+ case MLX5_CMD_OP_CREATE_UMEM:
+ MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_CMD_OP_DESTROY_UMEM);
+ break;
case MLX5_CMD_OP_CREATE_MKEY:
MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_MKEY);
break;
@@ -908,7 +1027,7 @@ static int devx_obj_cleanup(struct ib_uobject *uobject,
}
static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
- struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+ struct uverbs_attr_bundle *attrs)
{
void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN);
int cmd_out_len = uverbs_attr_get_len(attrs,
@@ -970,7 +1089,7 @@ obj_free:
}
static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)(
- struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+ struct uverbs_attr_bundle *attrs)
{
void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN);
int cmd_out_len = uverbs_attr_get_len(attrs,
@@ -978,7 +1097,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)(
struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE);
struct mlx5_ib_ucontext *c = to_mucontext(uobj->context);
- struct devx_obj *obj = uobj->object;
+ struct mlx5_ib_dev *mdev = to_mdev(uobj->context->device);
void *cmd_out;
int err;
int uid;
@@ -990,7 +1109,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)(
if (!devx_is_obj_modify_cmd(cmd_in))
return -EINVAL;
- if (!devx_is_valid_obj_id(obj, cmd_in))
+ if (!devx_is_valid_obj_id(uobj, cmd_in))
return -EINVAL;
cmd_out = uverbs_zalloc(attrs, cmd_out_len);
@@ -1000,7 +1119,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)(
MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
devx_set_umem_valid(cmd_in);
- err = mlx5_cmd_exec(obj->mdev, cmd_in,
+ err = mlx5_cmd_exec(mdev->mdev, cmd_in,
uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN),
cmd_out, cmd_out_len);
if (err)
@@ -1011,7 +1130,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)(
}
static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)(
- struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+ struct uverbs_attr_bundle *attrs)
{
void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN);
int cmd_out_len = uverbs_attr_get_len(attrs,
@@ -1019,10 +1138,10 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)(
struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE);
struct mlx5_ib_ucontext *c = to_mucontext(uobj->context);
- struct devx_obj *obj = uobj->object;
void *cmd_out;
int err;
int uid;
+ struct mlx5_ib_dev *mdev = to_mdev(uobj->context->device);
uid = devx_get_uid(c, cmd_in);
if (uid < 0)
@@ -1031,7 +1150,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)(
if (!devx_is_obj_query_cmd(cmd_in))
return -EINVAL;
- if (!devx_is_valid_obj_id(obj, cmd_in))
+ if (!devx_is_valid_obj_id(uobj, cmd_in))
return -EINVAL;
cmd_out = uverbs_zalloc(attrs, cmd_out_len);
@@ -1039,7 +1158,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)(
return PTR_ERR(cmd_out);
MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
- err = mlx5_cmd_exec(obj->mdev, cmd_in,
+ err = mlx5_cmd_exec(mdev->mdev, cmd_in,
uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN),
cmd_out, cmd_out_len);
if (err)
@@ -1115,8 +1234,7 @@ static void devx_umem_reg_cmd_build(struct mlx5_ib_dev *dev,
umem = MLX5_ADDR_OF(create_umem_in, cmd->in, umem);
mtt = (__be64 *)MLX5_ADDR_OF(umem, umem, mtt);
- MLX5_SET(general_obj_in_cmd_hdr, cmd->in, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
- MLX5_SET(general_obj_in_cmd_hdr, cmd->in, obj_type, MLX5_OBJ_TYPE_UMEM);
+ MLX5_SET(create_umem_in, cmd->in, opcode, MLX5_CMD_OP_CREATE_UMEM);
MLX5_SET64(umem, umem, num_of_mtt, obj->ncont);
MLX5_SET(umem, umem, log_page_size, obj->page_shift -
MLX5_ADAPTER_PAGE_SHIFT);
@@ -1127,7 +1245,7 @@ static void devx_umem_reg_cmd_build(struct mlx5_ib_dev *dev,
}
static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)(
- struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+ struct uverbs_attr_bundle *attrs)
{
struct devx_umem_reg_cmd cmd;
struct devx_umem *obj;
@@ -1141,9 +1259,6 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)(
if (!c->devx_uid)
return -EINVAL;
- if (!capable(CAP_NET_RAW))
- return -EPERM;
-
obj = kzalloc(sizeof(struct devx_umem), GFP_KERNEL);
if (!obj)
return -ENOMEM;
@@ -1158,7 +1273,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)(
devx_umem_reg_cmd_build(dev, obj, &cmd);
- MLX5_SET(general_obj_in_cmd_hdr, cmd.in, uid, c->devx_uid);
+ MLX5_SET(create_umem_in, cmd.in, uid, c->devx_uid);
err = mlx5_cmd_exec(dev->mdev, cmd.in, cmd.inlen, cmd.out,
sizeof(cmd.out));
if (err)
@@ -1279,7 +1394,7 @@ DECLARE_UVERBS_NAMED_METHOD_DESTROY(
DECLARE_UVERBS_NAMED_METHOD(
MLX5_IB_METHOD_DEVX_OBJ_MODIFY,
UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE,
- MLX5_IB_OBJECT_DEVX_OBJ,
+ UVERBS_IDR_ANY_OBJECT,
UVERBS_ACCESS_WRITE,
UA_MANDATORY),
UVERBS_ATTR_PTR_IN(
@@ -1295,7 +1410,7 @@ DECLARE_UVERBS_NAMED_METHOD(
DECLARE_UVERBS_NAMED_METHOD(
MLX5_IB_METHOD_DEVX_OBJ_QUERY,
UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE,
- MLX5_IB_OBJECT_DEVX_OBJ,
+ UVERBS_IDR_ANY_OBJECT,
UVERBS_ACCESS_READ,
UA_MANDATORY),
UVERBS_ATTR_PTR_IN(
@@ -1325,12 +1440,22 @@ DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM,
&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_REG),
&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_DEREG));
-DECLARE_UVERBS_OBJECT_TREE(devx_objects,
- &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX),
- &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ),
- &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM));
-
-const struct uverbs_object_tree_def *mlx5_ib_get_devx_tree(void)
+static bool devx_is_supported(struct ib_device *device)
{
- return &devx_objects;
+ struct mlx5_ib_dev *dev = to_mdev(device);
+
+ return !dev->rep && MLX5_CAP_GEN(dev->mdev, log_max_uctx);
}
+
+const struct uapi_definition mlx5_ib_devx_defs[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
+ MLX5_IB_OBJECT_DEVX,
+ UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
+ MLX5_IB_OBJECT_DEVX_OBJ,
+ UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
+ MLX5_IB_OBJECT_DEVX_UMEM,
+ UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
+ {},
+};
diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c
index f86cdcafdafc..e8a1e4498e3f 100644
--- a/drivers/infiniband/hw/mlx5/flow.c
+++ b/drivers/infiniband/hw/mlx5/flow.c
@@ -60,7 +60,7 @@ static const struct uverbs_attr_spec mlx5_ib_flow_type[] = {
#define MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS 2
static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
- struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+ struct uverbs_attr_bundle *attrs)
{
struct mlx5_flow_act flow_act = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG};
struct mlx5_ib_flow_handler *flow_handler;
@@ -77,6 +77,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_CREATE_FLOW_HANDLE);
struct mlx5_ib_dev *dev = to_mdev(uobj->context->device);
int len, ret, i;
+ u32 counter_id = 0;
if (!capable(CAP_NET_RAW))
return -EPERM;
@@ -92,10 +93,6 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
((dest_devx && dest_qp) || (!dest_devx && !dest_qp)))
return -EINVAL;
- if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS &&
- (dest_devx || dest_qp))
- return -EINVAL;
-
if (dest_devx) {
devx_obj = uverbs_attr_get_obj(
attrs, MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX);
@@ -128,8 +125,19 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
dest_type = MLX5_FLOW_DESTINATION_TYPE_PORT;
}
- if (dev->rep)
- return -ENOTSUPP;
+ len = uverbs_attr_get_uobjs_arr(attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX, &arr_flow_actions);
+ if (len) {
+ devx_obj = arr_flow_actions[0]->object;
+
+ if (!mlx5_ib_devx_is_flow_counter(devx_obj, &counter_id))
+ return -EINVAL;
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ }
+
+ if (dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
+ fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS)
+ return -EINVAL;
cmd_in = uverbs_attr_get_alloced_ptr(
attrs, MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE);
@@ -164,6 +172,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
}
flow_handler = mlx5_ib_raw_fs_rule_add(dev, fs_matcher, &flow_act,
+ counter_id,
cmd_in, inlen,
dest_id, dest_type);
if (IS_ERR(flow_handler)) {
@@ -194,7 +203,7 @@ static int flow_matcher_cleanup(struct ib_uobject *uobject,
}
static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)(
- struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+ struct uverbs_attr_bundle *attrs)
{
struct ib_uobject *uobj = uverbs_attr_get_uobject(
attrs, MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE);
@@ -313,7 +322,6 @@ static bool mlx5_ib_modify_header_supported(struct mlx5_ib_dev *dev)
}
static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER)(
- struct ib_uverbs_file *file,
struct uverbs_attr_bundle *attrs)
{
struct ib_uobject *uobj = uverbs_attr_get_uobject(
@@ -321,9 +329,8 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER)(
struct mlx5_ib_dev *mdev = to_mdev(uobj->context->device);
enum mlx5_ib_uapi_flow_table_type ft_type;
struct ib_flow_action *action;
- size_t num_actions;
+ int num_actions;
void *in;
- int len;
int ret;
if (!mlx5_ib_modify_header_supported(mdev))
@@ -331,18 +338,17 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER)(
in = uverbs_attr_get_alloced_ptr(attrs,
MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM);
- len = uverbs_attr_get_len(attrs,
- MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM);
- if (len % MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto))
- return -EINVAL;
+ num_actions = uverbs_attr_ptr_get_array_size(
+ attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM,
+ MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto));
+ if (num_actions < 0)
+ return num_actions;
ret = uverbs_get_const(&ft_type, attrs,
MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE);
if (ret)
return ret;
-
- num_actions = len / MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto),
action = mlx5_ib_create_modify_header(mdev, ft_type, num_actions, in);
if (IS_ERR(action))
return PTR_ERR(action);
@@ -435,7 +441,6 @@ static int mlx5_ib_flow_action_create_packet_reformat_ctx(
}
static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT)(
- struct ib_uverbs_file *file,
struct uverbs_attr_bundle *attrs)
{
struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
@@ -526,7 +531,11 @@ DECLARE_UVERBS_NAMED_METHOD(
UA_OPTIONAL),
UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_TAG,
UVERBS_ATTR_TYPE(u32),
- UA_OPTIONAL));
+ UA_OPTIONAL),
+ UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX,
+ MLX5_IB_OBJECT_DEVX_OBJ,
+ UVERBS_ACCESS_READ, 1, 1,
+ UA_OPTIONAL));
DECLARE_UVERBS_NAMED_METHOD_DESTROY(
MLX5_IB_METHOD_DESTROY_FLOW,
@@ -610,16 +619,20 @@ DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_FLOW_MATCHER,
&UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_CREATE),
&UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_DESTROY));
-DECLARE_UVERBS_OBJECT_TREE(flow_objects,
- &UVERBS_OBJECT(MLX5_IB_OBJECT_FLOW_MATCHER));
-
-int mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root)
+static bool flow_is_supported(struct ib_device *device)
{
- int i = 0;
-
- root[i++] = &flow_objects;
- root[i++] = &mlx5_ib_fs;
- root[i++] = &mlx5_ib_flow_actions;
-
- return i;
+ return !to_mdev(device)->rep;
}
+
+const struct uapi_definition mlx5_ib_flow_defs[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
+ MLX5_IB_OBJECT_FLOW_MATCHER,
+ UAPI_DEF_IS_OBJ_SUPPORTED(flow_is_supported)),
+ UAPI_DEF_CHAIN_OBJ_TREE(
+ UVERBS_OBJECT_FLOW,
+ &mlx5_ib_fs,
+ UAPI_DEF_IS_OBJ_SUPPORTED(flow_is_supported)),
+ UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION,
+ &mlx5_ib_flow_actions),
+ {},
+};
diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c
index 32a9e9228b13..558638468edb 100644
--- a/drivers/infiniband/hw/mlx5/mad.c
+++ b/drivers/infiniband/hw/mlx5/mad.c
@@ -526,11 +526,6 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port,
int ext_active_speed;
int err = -ENOMEM;
- if (port < 1 || port > dev->num_ports) {
- mlx5_ib_warn(dev, "invalid port number %d\n", port);
- return -EINVAL;
- }
-
in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL);
out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL);
if (!in_mad || !out_mad)
@@ -568,6 +563,14 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port,
props->max_vl_num = out_mad->data[37] >> 4;
props->init_type_reply = out_mad->data[41] >> 4;
+ if (props->port_cap_flags & IB_PORT_CAP_MASK2_SUP) {
+ props->port_cap_flags2 =
+ be16_to_cpup((__be16 *)(out_mad->data + 60));
+
+ if (props->port_cap_flags2 & IB_PORT_LINK_WIDTH_2X_SUP)
+ props->active_width = out_mad->data[31] & 0x1f;
+ }
+
/* Check if extended speeds (EDR/FDR/...) are supported */
if (props->port_cap_flags & IB_PORT_EXTENDED_SPEEDS_SUP) {
ext_active_speed = out_mad->data[62] >> 4;
@@ -579,6 +582,11 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port,
case 2:
props->active_speed = 32; /* EDR */
break;
+ case 4:
+ if (props->port_cap_flags & IB_PORT_CAP_MASK2_SUP &&
+ props->port_cap_flags2 & IB_PORT_LINK_SPEED_HDR_SUP)
+ props->active_speed = IB_SPEED_HDR;
+ break;
}
}
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index e85974ab06c0..94fe253d4956 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -150,7 +150,7 @@ static int get_port_state(struct ib_device *ibdev,
int ret;
memset(&attr, 0, sizeof(attr));
- ret = ibdev->query_port(ibdev, port_num, &attr);
+ ret = ibdev->ops.query_port(ibdev, port_num, &attr);
if (!ret)
*state = attr.state;
return ret;
@@ -172,7 +172,6 @@ static int mlx5_netdev_event(struct notifier_block *this,
switch (event) {
case NETDEV_REGISTER:
- case NETDEV_UNREGISTER:
write_lock(&roce->netdev_lock);
if (ibdev->rep) {
struct mlx5_eswitch *esw = ibdev->mdev->priv.eswitch;
@@ -181,15 +180,20 @@ static int mlx5_netdev_event(struct notifier_block *this,
rep_ndev = mlx5_ib_get_rep_netdev(esw,
ibdev->rep->vport);
if (rep_ndev == ndev)
- roce->netdev = (event == NETDEV_UNREGISTER) ?
- NULL : ndev;
+ roce->netdev = ndev;
} else if (ndev->dev.parent == &mdev->pdev->dev) {
- roce->netdev = (event == NETDEV_UNREGISTER) ?
- NULL : ndev;
+ roce->netdev = ndev;
}
write_unlock(&roce->netdev_lock);
break;
+ case NETDEV_UNREGISTER:
+ write_lock(&roce->netdev_lock);
+ if (roce->netdev == ndev)
+ roce->netdev = NULL;
+ write_unlock(&roce->netdev_lock);
+ break;
+
case NETDEV_CHANGE:
case NETDEV_UP:
case NETDEV_DOWN: {
@@ -1018,6 +1022,9 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
if (MLX5_CAP_GEN(mdev, cqe_128_always))
resp.flags |= MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_PAD;
+ if (MLX5_CAP_GEN(mdev, qp_packet_based))
+ resp.flags |=
+ MLX5_IB_QUERY_DEV_RESP_PACKET_BASED_CREDIT_MODE;
}
if (field_avail(typeof(resp), sw_parsing_caps,
@@ -1105,6 +1112,8 @@ static void translate_active_width(struct ib_device *ibdev, u8 active_width,
if (active_width & MLX5_IB_WIDTH_1X)
*ib_width = IB_WIDTH_1X;
+ else if (active_width & MLX5_IB_WIDTH_2X)
+ *ib_width = IB_WIDTH_2X;
else if (active_width & MLX5_IB_WIDTH_4X)
*ib_width = IB_WIDTH_4X;
else if (active_width & MLX5_IB_WIDTH_8X)
@@ -1220,6 +1229,9 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port,
props->subnet_timeout = rep->subnet_timeout;
props->init_type_reply = rep->init_type_reply;
+ if (props->port_cap_flags & IB_PORT_CAP_MASK2_SUP)
+ props->port_cap_flags2 = rep->cap_mask2;
+
err = mlx5_query_port_link_width_oper(mdev, &ib_link_width_oper, port);
if (err)
goto out;
@@ -1756,7 +1768,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
#endif
if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) {
- err = mlx5_ib_devx_create(dev);
+ err = mlx5_ib_devx_create(dev, true);
if (err < 0)
goto out_uars;
context->devx_uid = err;
@@ -3710,7 +3722,8 @@ _create_raw_flow_rule(struct mlx5_ib_dev *dev,
struct mlx5_flow_destination *dst,
struct mlx5_ib_flow_matcher *fs_matcher,
struct mlx5_flow_act *flow_act,
- void *cmd_in, int inlen)
+ void *cmd_in, int inlen,
+ int dst_num)
{
struct mlx5_ib_flow_handler *handler;
struct mlx5_flow_spec *spec;
@@ -3732,7 +3745,7 @@ _create_raw_flow_rule(struct mlx5_ib_dev *dev,
spec->match_criteria_enable = fs_matcher->match_criteria_enable;
handler->rule = mlx5_add_flow_rules(ft, spec,
- flow_act, dst, 1);
+ flow_act, dst, dst_num);
if (IS_ERR(handler->rule)) {
err = PTR_ERR(handler->rule);
@@ -3795,12 +3808,14 @@ struct mlx5_ib_flow_handler *
mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev,
struct mlx5_ib_flow_matcher *fs_matcher,
struct mlx5_flow_act *flow_act,
+ u32 counter_id,
void *cmd_in, int inlen, int dest_id,
int dest_type)
{
struct mlx5_flow_destination *dst;
struct mlx5_ib_flow_prio *ft_prio;
struct mlx5_ib_flow_handler *handler;
+ int dst_num = 0;
bool mcast;
int err;
@@ -3810,7 +3825,7 @@ mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev,
if (fs_matcher->priority > MLX5_IB_FLOW_LAST_PRIO)
return ERR_PTR(-ENOMEM);
- dst = kzalloc(sizeof(*dst), GFP_KERNEL);
+ dst = kzalloc(sizeof(*dst) * 2, GFP_KERNEL);
if (!dst)
return ERR_PTR(-ENOMEM);
@@ -3824,20 +3839,28 @@ mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev,
}
if (dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR) {
- dst->type = dest_type;
- dst->tir_num = dest_id;
+ dst[dst_num].type = dest_type;
+ dst[dst_num].tir_num = dest_id;
flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
} else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) {
- dst->type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM;
- dst->ft_num = dest_id;
+ dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM;
+ dst[dst_num].ft_num = dest_id;
flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
} else {
- dst->type = MLX5_FLOW_DESTINATION_TYPE_PORT;
+ dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_PORT;
flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
}
+ dst_num++;
+
+ if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+ dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ dst[dst_num].counter_id = counter_id;
+ dst_num++;
+ }
+
handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher, flow_act,
- cmd_in, inlen);
+ cmd_in, inlen, dst_num);
if (IS_ERR(handler)) {
err = PTR_ERR(handler);
@@ -5095,6 +5118,9 @@ static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev)
{
int err = 0;
int i;
+ bool is_shared;
+
+ is_shared = MLX5_CAP_GEN(dev->mdev, log_max_uctx) != 0;
for (i = 0; i < dev->num_ports; i++) {
err = __mlx5_ib_alloc_counters(dev, &dev->port[i].cnts);
@@ -5104,8 +5130,10 @@ static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev)
mlx5_ib_fill_counters(dev, dev->port[i].cnts.names,
dev->port[i].cnts.offsets);
- err = mlx5_core_alloc_q_counter(dev->mdev,
- &dev->port[i].cnts.set_id);
+ err = mlx5_cmd_alloc_q_counter(dev->mdev,
+ &dev->port[i].cnts.set_id,
+ is_shared ?
+ MLX5_SHARED_RESOURCE_UID : 0);
if (err) {
mlx5_ib_warn(dev,
"couldn't allocate queue counter for port %d, err %d\n",
@@ -5382,14 +5410,6 @@ static void init_delay_drop(struct mlx5_ib_dev *dev)
mlx5_ib_warn(dev, "Failed to init delay drop debugfs\n");
}
-static const struct cpumask *
-mlx5_ib_get_vector_affinity(struct ib_device *ibdev, int comp_vector)
-{
- struct mlx5_ib_dev *dev = to_mdev(ibdev);
-
- return mlx5_comp_irq_get_affinity_mask(dev->mdev, comp_vector);
-}
-
/* The mlx5_ib_multiport_mutex should be held when calling this function */
static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev,
struct mlx5_ib_multiport_info *mpi)
@@ -5617,30 +5637,17 @@ ADD_UVERBS_ATTRIBUTES_SIMPLE(
UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS,
enum mlx5_ib_uapi_flow_action_flags));
-static int populate_specs_root(struct mlx5_ib_dev *dev)
-{
- const struct uverbs_object_tree_def **trees = dev->driver_trees;
- size_t num_trees = 0;
-
- if (mlx5_accel_ipsec_device_caps(dev->mdev) &
- MLX5_ACCEL_IPSEC_CAP_DEVICE)
- trees[num_trees++] = &mlx5_ib_flow_action;
-
- if (MLX5_CAP_DEV_MEM(dev->mdev, memic))
- trees[num_trees++] = &mlx5_ib_dm;
-
- if (MLX5_CAP_GEN_64(dev->mdev, general_obj_types) &
- MLX5_GENERAL_OBJ_TYPES_CAP_UCTX)
- trees[num_trees++] = mlx5_ib_get_devx_tree();
-
- num_trees += mlx5_ib_get_flow_trees(trees + num_trees);
-
- WARN_ON(num_trees >= ARRAY_SIZE(dev->driver_trees));
- trees[num_trees] = NULL;
- dev->ib_dev.driver_specs = trees;
+static const struct uapi_definition mlx5_ib_defs[] = {
+#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
+ UAPI_DEF_CHAIN(mlx5_ib_devx_defs),
+ UAPI_DEF_CHAIN(mlx5_ib_flow_defs),
+#endif
- return 0;
-}
+ UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION,
+ &mlx5_ib_flow_action),
+ UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_DM, &mlx5_ib_dm),
+ {}
+};
static int mlx5_ib_read_counters(struct ib_counters *counters,
struct ib_counters_read_attr *read_attr,
@@ -5717,6 +5724,8 @@ void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
mlx5_ib_cleanup_multiport_master(dev);
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
cleanup_srcu_struct(&dev->mr_srcu);
+ drain_workqueue(dev->advise_mr_wq);
+ destroy_workqueue(dev->advise_mr_wq);
#endif
kfree(dev->port);
}
@@ -5771,9 +5780,17 @@ int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
dev->memic.dev = mdev;
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ dev->advise_mr_wq = alloc_ordered_workqueue("mlx5_ib_advise_mr_wq", 0);
+ if (!dev->advise_mr_wq) {
+ err = -ENOMEM;
+ goto err_mp;
+ }
+
err = init_srcu_struct(&dev->mr_srcu);
- if (err)
- goto err_free_port;
+ if (err) {
+ destroy_workqueue(dev->advise_mr_wq);
+ goto err_mp;
+ }
#endif
return 0;
@@ -5817,6 +5834,94 @@ static void mlx5_ib_stage_flow_db_cleanup(struct mlx5_ib_dev *dev)
kfree(dev->flow_db);
}
+static const struct ib_device_ops mlx5_ib_dev_ops = {
+ .add_gid = mlx5_ib_add_gid,
+ .alloc_mr = mlx5_ib_alloc_mr,
+ .alloc_pd = mlx5_ib_alloc_pd,
+ .alloc_ucontext = mlx5_ib_alloc_ucontext,
+ .attach_mcast = mlx5_ib_mcg_attach,
+ .check_mr_status = mlx5_ib_check_mr_status,
+ .create_ah = mlx5_ib_create_ah,
+ .create_counters = mlx5_ib_create_counters,
+ .create_cq = mlx5_ib_create_cq,
+ .create_flow = mlx5_ib_create_flow,
+ .create_qp = mlx5_ib_create_qp,
+ .create_srq = mlx5_ib_create_srq,
+ .dealloc_pd = mlx5_ib_dealloc_pd,
+ .dealloc_ucontext = mlx5_ib_dealloc_ucontext,
+ .del_gid = mlx5_ib_del_gid,
+ .dereg_mr = mlx5_ib_dereg_mr,
+ .destroy_ah = mlx5_ib_destroy_ah,
+ .destroy_counters = mlx5_ib_destroy_counters,
+ .destroy_cq = mlx5_ib_destroy_cq,
+ .destroy_flow = mlx5_ib_destroy_flow,
+ .destroy_flow_action = mlx5_ib_destroy_flow_action,
+ .destroy_qp = mlx5_ib_destroy_qp,
+ .destroy_srq = mlx5_ib_destroy_srq,
+ .detach_mcast = mlx5_ib_mcg_detach,
+ .disassociate_ucontext = mlx5_ib_disassociate_ucontext,
+ .drain_rq = mlx5_ib_drain_rq,
+ .drain_sq = mlx5_ib_drain_sq,
+ .get_dev_fw_str = get_dev_fw_str,
+ .get_dma_mr = mlx5_ib_get_dma_mr,
+ .get_link_layer = mlx5_ib_port_link_layer,
+ .map_mr_sg = mlx5_ib_map_mr_sg,
+ .mmap = mlx5_ib_mmap,
+ .modify_cq = mlx5_ib_modify_cq,
+ .modify_device = mlx5_ib_modify_device,
+ .modify_port = mlx5_ib_modify_port,
+ .modify_qp = mlx5_ib_modify_qp,
+ .modify_srq = mlx5_ib_modify_srq,
+ .poll_cq = mlx5_ib_poll_cq,
+ .post_recv = mlx5_ib_post_recv,
+ .post_send = mlx5_ib_post_send,
+ .post_srq_recv = mlx5_ib_post_srq_recv,
+ .process_mad = mlx5_ib_process_mad,
+ .query_ah = mlx5_ib_query_ah,
+ .query_device = mlx5_ib_query_device,
+ .query_gid = mlx5_ib_query_gid,
+ .query_pkey = mlx5_ib_query_pkey,
+ .query_qp = mlx5_ib_query_qp,
+ .query_srq = mlx5_ib_query_srq,
+ .read_counters = mlx5_ib_read_counters,
+ .reg_user_mr = mlx5_ib_reg_user_mr,
+ .req_notify_cq = mlx5_ib_arm_cq,
+ .rereg_user_mr = mlx5_ib_rereg_user_mr,
+ .resize_cq = mlx5_ib_resize_cq,
+};
+
+static const struct ib_device_ops mlx5_ib_dev_flow_ipsec_ops = {
+ .create_flow_action_esp = mlx5_ib_create_flow_action_esp,
+ .modify_flow_action_esp = mlx5_ib_modify_flow_action_esp,
+};
+
+static const struct ib_device_ops mlx5_ib_dev_ipoib_enhanced_ops = {
+ .rdma_netdev_get_params = mlx5_ib_rn_get_params,
+};
+
+static const struct ib_device_ops mlx5_ib_dev_sriov_ops = {
+ .get_vf_config = mlx5_ib_get_vf_config,
+ .get_vf_stats = mlx5_ib_get_vf_stats,
+ .set_vf_guid = mlx5_ib_set_vf_guid,
+ .set_vf_link_state = mlx5_ib_set_vf_link_state,
+};
+
+static const struct ib_device_ops mlx5_ib_dev_mw_ops = {
+ .alloc_mw = mlx5_ib_alloc_mw,
+ .dealloc_mw = mlx5_ib_dealloc_mw,
+};
+
+static const struct ib_device_ops mlx5_ib_dev_xrc_ops = {
+ .alloc_xrcd = mlx5_ib_alloc_xrcd,
+ .dealloc_xrcd = mlx5_ib_dealloc_xrcd,
+};
+
+static const struct ib_device_ops mlx5_ib_dev_dm_ops = {
+ .alloc_dm = mlx5_ib_alloc_dm,
+ .dealloc_dm = mlx5_ib_dealloc_dm,
+ .reg_dm_mr = mlx5_ib_reg_dm_mr,
+};
+
int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
{
struct mlx5_core_dev *mdev = dev->mdev;
@@ -5855,104 +5960,45 @@ int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
(1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) |
(1ull << IB_USER_VERBS_EX_CMD_CREATE_QP) |
(1ull << IB_USER_VERBS_EX_CMD_MODIFY_QP) |
- (1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ);
-
- dev->ib_dev.query_device = mlx5_ib_query_device;
- dev->ib_dev.get_link_layer = mlx5_ib_port_link_layer;
- dev->ib_dev.query_gid = mlx5_ib_query_gid;
- dev->ib_dev.add_gid = mlx5_ib_add_gid;
- dev->ib_dev.del_gid = mlx5_ib_del_gid;
- dev->ib_dev.query_pkey = mlx5_ib_query_pkey;
- dev->ib_dev.modify_device = mlx5_ib_modify_device;
- dev->ib_dev.modify_port = mlx5_ib_modify_port;
- dev->ib_dev.alloc_ucontext = mlx5_ib_alloc_ucontext;
- dev->ib_dev.dealloc_ucontext = mlx5_ib_dealloc_ucontext;
- dev->ib_dev.mmap = mlx5_ib_mmap;
- dev->ib_dev.alloc_pd = mlx5_ib_alloc_pd;
- dev->ib_dev.dealloc_pd = mlx5_ib_dealloc_pd;
- dev->ib_dev.create_ah = mlx5_ib_create_ah;
- dev->ib_dev.query_ah = mlx5_ib_query_ah;
- dev->ib_dev.destroy_ah = mlx5_ib_destroy_ah;
- dev->ib_dev.create_srq = mlx5_ib_create_srq;
- dev->ib_dev.modify_srq = mlx5_ib_modify_srq;
- dev->ib_dev.query_srq = mlx5_ib_query_srq;
- dev->ib_dev.destroy_srq = mlx5_ib_destroy_srq;
- dev->ib_dev.post_srq_recv = mlx5_ib_post_srq_recv;
- dev->ib_dev.create_qp = mlx5_ib_create_qp;
- dev->ib_dev.modify_qp = mlx5_ib_modify_qp;
- dev->ib_dev.query_qp = mlx5_ib_query_qp;
- dev->ib_dev.destroy_qp = mlx5_ib_destroy_qp;
- dev->ib_dev.drain_sq = mlx5_ib_drain_sq;
- dev->ib_dev.drain_rq = mlx5_ib_drain_rq;
- dev->ib_dev.post_send = mlx5_ib_post_send;
- dev->ib_dev.post_recv = mlx5_ib_post_recv;
- dev->ib_dev.create_cq = mlx5_ib_create_cq;
- dev->ib_dev.modify_cq = mlx5_ib_modify_cq;
- dev->ib_dev.resize_cq = mlx5_ib_resize_cq;
- dev->ib_dev.destroy_cq = mlx5_ib_destroy_cq;
- dev->ib_dev.poll_cq = mlx5_ib_poll_cq;
- dev->ib_dev.req_notify_cq = mlx5_ib_arm_cq;
- dev->ib_dev.get_dma_mr = mlx5_ib_get_dma_mr;
- dev->ib_dev.reg_user_mr = mlx5_ib_reg_user_mr;
- dev->ib_dev.rereg_user_mr = mlx5_ib_rereg_user_mr;
- dev->ib_dev.dereg_mr = mlx5_ib_dereg_mr;
- dev->ib_dev.attach_mcast = mlx5_ib_mcg_attach;
- dev->ib_dev.detach_mcast = mlx5_ib_mcg_detach;
- dev->ib_dev.process_mad = mlx5_ib_process_mad;
- dev->ib_dev.alloc_mr = mlx5_ib_alloc_mr;
- dev->ib_dev.map_mr_sg = mlx5_ib_map_mr_sg;
- dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status;
- dev->ib_dev.get_dev_fw_str = get_dev_fw_str;
- dev->ib_dev.get_vector_affinity = mlx5_ib_get_vector_affinity;
+ (1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ) |
+ (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
+ (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW);
+
if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads) &&
IS_ENABLED(CONFIG_MLX5_CORE_IPOIB))
- dev->ib_dev.rdma_netdev_get_params = mlx5_ib_rn_get_params;
+ ib_set_device_ops(&dev->ib_dev,
+ &mlx5_ib_dev_ipoib_enhanced_ops);
- if (mlx5_core_is_pf(mdev)) {
- dev->ib_dev.get_vf_config = mlx5_ib_get_vf_config;
- dev->ib_dev.set_vf_link_state = mlx5_ib_set_vf_link_state;
- dev->ib_dev.get_vf_stats = mlx5_ib_get_vf_stats;
- dev->ib_dev.set_vf_guid = mlx5_ib_set_vf_guid;
- }
-
- dev->ib_dev.disassociate_ucontext = mlx5_ib_disassociate_ucontext;
+ if (mlx5_core_is_pf(mdev))
+ ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_sriov_ops);
dev->umr_fence = mlx5_get_umr_fence(MLX5_CAP_GEN(mdev, umr_fence));
if (MLX5_CAP_GEN(mdev, imaicl)) {
- dev->ib_dev.alloc_mw = mlx5_ib_alloc_mw;
- dev->ib_dev.dealloc_mw = mlx5_ib_dealloc_mw;
dev->ib_dev.uverbs_cmd_mask |=
(1ull << IB_USER_VERBS_CMD_ALLOC_MW) |
(1ull << IB_USER_VERBS_CMD_DEALLOC_MW);
+ ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_mw_ops);
}
if (MLX5_CAP_GEN(mdev, xrc)) {
- dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd;
- dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd;
dev->ib_dev.uverbs_cmd_mask |=
(1ull << IB_USER_VERBS_CMD_OPEN_XRCD) |
(1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
+ ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_xrc_ops);
}
- if (MLX5_CAP_DEV_MEM(mdev, memic)) {
- dev->ib_dev.alloc_dm = mlx5_ib_alloc_dm;
- dev->ib_dev.dealloc_dm = mlx5_ib_dealloc_dm;
- dev->ib_dev.reg_dm_mr = mlx5_ib_reg_dm_mr;
- }
+ if (MLX5_CAP_DEV_MEM(mdev, memic))
+ ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_dm_ops);
- dev->ib_dev.create_flow = mlx5_ib_create_flow;
- dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow;
- dev->ib_dev.uverbs_ex_cmd_mask |=
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
- (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW);
- dev->ib_dev.create_flow_action_esp = mlx5_ib_create_flow_action_esp;
- dev->ib_dev.destroy_flow_action = mlx5_ib_destroy_flow_action;
- dev->ib_dev.modify_flow_action_esp = mlx5_ib_modify_flow_action_esp;
+ if (mlx5_accel_ipsec_device_caps(dev->mdev) &
+ MLX5_ACCEL_IPSEC_CAP_DEVICE)
+ ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_flow_ipsec_ops);
dev->ib_dev.driver_id = RDMA_DRIVER_MLX5;
- dev->ib_dev.create_counters = mlx5_ib_create_counters;
- dev->ib_dev.destroy_counters = mlx5_ib_destroy_counters;
- dev->ib_dev.read_counters = mlx5_ib_read_counters;
+ ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_ops);
+
+ if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS))
+ dev->ib_dev.driver_def = mlx5_ib_defs;
err = init_node_data(dev);
if (err)
@@ -5966,22 +6012,37 @@ int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
return 0;
}
+static const struct ib_device_ops mlx5_ib_dev_port_ops = {
+ .get_port_immutable = mlx5_port_immutable,
+ .query_port = mlx5_ib_query_port,
+};
+
static int mlx5_ib_stage_non_default_cb(struct mlx5_ib_dev *dev)
{
- dev->ib_dev.get_port_immutable = mlx5_port_immutable;
- dev->ib_dev.query_port = mlx5_ib_query_port;
-
+ ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_port_ops);
return 0;
}
+static const struct ib_device_ops mlx5_ib_dev_port_rep_ops = {
+ .get_port_immutable = mlx5_port_rep_immutable,
+ .query_port = mlx5_ib_rep_query_port,
+};
+
int mlx5_ib_stage_rep_non_default_cb(struct mlx5_ib_dev *dev)
{
- dev->ib_dev.get_port_immutable = mlx5_port_rep_immutable;
- dev->ib_dev.query_port = mlx5_ib_rep_query_port;
-
+ ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_port_rep_ops);
return 0;
}
+static const struct ib_device_ops mlx5_ib_dev_common_roce_ops = {
+ .create_rwq_ind_table = mlx5_ib_create_rwq_ind_table,
+ .create_wq = mlx5_ib_create_wq,
+ .destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table,
+ .destroy_wq = mlx5_ib_destroy_wq,
+ .get_netdev = mlx5_ib_get_netdev,
+ .modify_wq = mlx5_ib_modify_wq,
+};
+
static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev)
{
u8 port_num;
@@ -5993,19 +6054,13 @@ static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev)
dev->roce[i].last_port_state = IB_PORT_DOWN;
}
- dev->ib_dev.get_netdev = mlx5_ib_get_netdev;
- dev->ib_dev.create_wq = mlx5_ib_create_wq;
- dev->ib_dev.modify_wq = mlx5_ib_modify_wq;
- dev->ib_dev.destroy_wq = mlx5_ib_destroy_wq;
- dev->ib_dev.create_rwq_ind_table = mlx5_ib_create_rwq_ind_table;
- dev->ib_dev.destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table;
-
dev->ib_dev.uverbs_ex_cmd_mask |=
(1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) |
(1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) |
(1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
(1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
(1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
+ ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_common_roce_ops);
port_num = mlx5_core_native_port_num(dev->mdev) - 1;
@@ -6104,11 +6159,15 @@ void mlx5_ib_stage_odp_cleanup(struct mlx5_ib_dev *dev)
mlx5_ib_odp_cleanup_one(dev);
}
+static const struct ib_device_ops mlx5_ib_dev_hw_stats_ops = {
+ .alloc_hw_stats = mlx5_ib_alloc_hw_stats,
+ .get_hw_stats = mlx5_ib_get_hw_stats,
+};
+
int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev)
{
if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) {
- dev->ib_dev.get_hw_stats = mlx5_ib_get_hw_stats;
- dev->ib_dev.alloc_hw_stats = mlx5_ib_alloc_hw_stats;
+ ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_hw_stats_ops);
return mlx5_ib_alloc_counters(dev);
}
@@ -6166,11 +6225,6 @@ void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev)
mlx5_free_bfreg(dev->mdev, &dev->bfreg);
}
-static int mlx5_ib_stage_populate_specs(struct mlx5_ib_dev *dev)
-{
- return populate_specs_root(dev);
-}
-
int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
{
const char *name;
@@ -6226,7 +6280,7 @@ static int mlx5_ib_stage_devx_init(struct mlx5_ib_dev *dev)
{
int uid;
- uid = mlx5_ib_devx_create(dev);
+ uid = mlx5_ib_devx_create(dev, false);
if (uid > 0)
dev->devx_whitelist_uid = uid;
@@ -6318,9 +6372,6 @@ static const struct mlx5_ib_profile pf_profile = {
STAGE_CREATE(MLX5_IB_STAGE_PRE_IB_REG_UMR,
NULL,
mlx5_ib_stage_pre_ib_reg_umr_cleanup),
- STAGE_CREATE(MLX5_IB_STAGE_SPECS,
- mlx5_ib_stage_populate_specs,
- NULL),
STAGE_CREATE(MLX5_IB_STAGE_WHITELIST_UID,
mlx5_ib_stage_devx_init,
mlx5_ib_stage_devx_cleanup),
@@ -6372,9 +6423,6 @@ static const struct mlx5_ib_profile nic_rep_profile = {
STAGE_CREATE(MLX5_IB_STAGE_PRE_IB_REG_UMR,
NULL,
mlx5_ib_stage_pre_ib_reg_umr_cleanup),
- STAGE_CREATE(MLX5_IB_STAGE_SPECS,
- mlx5_ib_stage_populate_specs,
- NULL),
STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
mlx5_ib_stage_ib_reg_init,
mlx5_ib_stage_ib_reg_cleanup),
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index e507b6eb7c09..b06d3b1efea8 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -41,7 +41,6 @@
#include <linux/mlx5/cq.h>
#include <linux/mlx5/fs.h>
#include <linux/mlx5/qp.h>
-#include <linux/mlx5/fs.h>
#include <linux/types.h>
#include <linux/mlx5/transobj.h>
#include <rdma/ib_user_verbs.h>
@@ -258,6 +257,7 @@ enum mlx5_ib_rq_flags {
};
struct mlx5_ib_wq {
+ struct mlx5_frag_buf_ctrl fbc;
u64 *wrid;
u32 *wr_data;
struct wr_list *w_list;
@@ -275,8 +275,7 @@ struct mlx5_ib_wq {
unsigned head;
unsigned tail;
u16 cur_post;
- u16 last_poll;
- void *qend;
+ void *cur_edge;
};
enum mlx5_ib_wq_flags {
@@ -461,6 +460,7 @@ enum mlx5_ib_qp_flags {
MLX5_IB_QP_UNDERLAY = 1 << 10,
MLX5_IB_QP_PCI_WRITE_END_PADDING = 1 << 11,
MLX5_IB_QP_TUNNEL_OFFLOAD = 1 << 12,
+ MLX5_IB_QP_PACKET_BASED_CREDIT = 1 << 13,
};
struct mlx5_umr_wr {
@@ -524,6 +524,7 @@ struct mlx5_ib_srq {
struct mlx5_core_srq msrq;
struct mlx5_frag_buf buf;
struct mlx5_db db;
+ struct mlx5_frag_buf_ctrl fbc;
u64 *wrid;
/* protect SRQ hanlding
*/
@@ -541,7 +542,6 @@ struct mlx5_ib_srq {
struct mlx5_ib_xrcd {
struct ib_xrcd ibxrcd;
u32 xrcdn;
- u16 uid;
};
enum mlx5_ib_mtt_access_flags {
@@ -784,7 +784,6 @@ enum mlx5_ib_stages {
MLX5_IB_STAGE_UAR,
MLX5_IB_STAGE_BFREG,
MLX5_IB_STAGE_PRE_IB_REG_UMR,
- MLX5_IB_STAGE_SPECS,
MLX5_IB_STAGE_WHITELIST_UID,
MLX5_IB_STAGE_IB_REG,
MLX5_IB_STAGE_POST_IB_REG_UMR,
@@ -895,7 +894,6 @@ struct mlx5_ib_pf_eq {
struct mlx5_ib_dev {
struct ib_device ib_dev;
- const struct uverbs_object_tree_def *driver_trees[7];
struct mlx5_core_dev *mdev;
struct notifier_block mdev_events;
struct mlx5_roce roce[MLX5_MAX_PORTS];
@@ -924,6 +922,7 @@ struct mlx5_ib_dev {
*/
struct srcu_struct mr_srcu;
u32 null_mkey;
+ struct workqueue_struct *advise_mr_wq;
#endif
struct mlx5_ib_flow_db *flow_db;
/* protect resources needed as part of reset flow */
@@ -1043,9 +1042,9 @@ int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey,
u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh,
const void *in_mad, void *response_mad);
struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
- struct ib_udata *udata);
+ u32 flags, struct ib_udata *udata);
int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
-int mlx5_ib_destroy_ah(struct ib_ah *ah);
+int mlx5_ib_destroy_ah(struct ib_ah *ah, u32 flags);
struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
struct ib_srq_init_attr *init_attr,
struct ib_udata *udata);
@@ -1071,7 +1070,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
const struct ib_send_wr **bad_wr);
int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
const struct ib_recv_wr **bad_wr);
-void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n);
int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index,
void *buffer, u32 length,
struct mlx5_ib_qp_base *base);
@@ -1088,6 +1086,12 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc);
struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
struct ib_udata *udata);
+int mlx5_ib_advise_mr(struct ib_pd *pd,
+ enum ib_uverbs_advise_mr_advice advice,
+ u32 flags,
+ struct ib_sge *sg_list,
+ u32 num_sge,
+ struct uverbs_attr_bundle *attrs);
struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
struct ib_udata *udata);
int mlx5_ib_dealloc_mw(struct ib_mw *mw);
@@ -1185,6 +1189,10 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start,
void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent);
void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
size_t nentries, struct mlx5_ib_mr *mr, int flags);
+
+int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
+ enum ib_uverbs_advise_mr_advice advice,
+ u32 flags, struct ib_sge *sg_list, u32 num_sge);
#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
{
@@ -1200,6 +1208,13 @@ static inline void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
size_t nentries, struct mlx5_ib_mr *mr,
int flags) {}
+static inline int
+mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
+ enum ib_uverbs_advise_mr_advice advice, u32 flags,
+ struct ib_sge *sg_list, u32 num_sge)
+{
+ return -EOPNOTSUPP;
+}
#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
/* Needed for rep profile */
@@ -1268,32 +1283,29 @@ void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *dev,
u8 port_num);
#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
-int mlx5_ib_devx_create(struct mlx5_ib_dev *dev);
+int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user);
void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid);
const struct uverbs_object_tree_def *mlx5_ib_get_devx_tree(void);
+extern const struct uapi_definition mlx5_ib_devx_defs[];
+extern const struct uapi_definition mlx5_ib_flow_defs[];
struct mlx5_ib_flow_handler *mlx5_ib_raw_fs_rule_add(
struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher,
- struct mlx5_flow_act *flow_act, void *cmd_in, int inlen,
- int dest_id, int dest_type);
+ struct mlx5_flow_act *flow_act, u32 counter_id,
+ void *cmd_in, int inlen, int dest_id, int dest_type);
bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type);
+bool mlx5_ib_devx_is_flow_counter(void *obj, u32 *counter_id);
int mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root);
void mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction);
#else
static inline int
-mlx5_ib_devx_create(struct mlx5_ib_dev *dev) { return -EOPNOTSUPP; };
+mlx5_ib_devx_create(struct mlx5_ib_dev *dev,
+ bool is_user) { return -EOPNOTSUPP; }
static inline void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid) {}
-static inline const struct uverbs_object_tree_def *
-mlx5_ib_get_devx_tree(void) { return NULL; }
static inline bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id,
int *dest_type)
{
return false;
}
-static inline int
-mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root)
-{
- return 0;
-}
static inline void
mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction)
{
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 9b195d65a13e..1bd8c1b1dba1 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -73,7 +73,8 @@ static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
/* Wait until all page fault handlers using the mr complete. */
- synchronize_srcu(&dev->mr_srcu);
+ if (mr->umem && mr->umem->is_odp)
+ synchronize_srcu(&dev->mr_srcu);
#endif
return err;
@@ -237,6 +238,9 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
{
struct mlx5_mr_cache *cache = &dev->cache;
struct mlx5_cache_ent *ent = &cache->ent[c];
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ bool odp_mkey_exist = false;
+#endif
struct mlx5_ib_mr *tmp_mr;
struct mlx5_ib_mr *mr;
LIST_HEAD(del_list);
@@ -249,6 +253,10 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
break;
}
mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ if (mr->umem && mr->umem->is_odp)
+ odp_mkey_exist = true;
+#endif
list_move(&mr->list, &del_list);
ent->cur--;
ent->size--;
@@ -257,7 +265,8 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
}
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- synchronize_srcu(&dev->mr_srcu);
+ if (odp_mkey_exist)
+ synchronize_srcu(&dev->mr_srcu);
#endif
list_for_each_entry_safe(mr, tmp_mr, &del_list, list) {
@@ -572,6 +581,7 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c)
{
struct mlx5_mr_cache *cache = &dev->cache;
struct mlx5_cache_ent *ent = &cache->ent[c];
+ bool odp_mkey_exist = false;
struct mlx5_ib_mr *tmp_mr;
struct mlx5_ib_mr *mr;
LIST_HEAD(del_list);
@@ -584,6 +594,8 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c)
break;
}
mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
+ if (mr->umem && mr->umem->is_odp)
+ odp_mkey_exist = true;
list_move(&mr->list, &del_list);
ent->cur--;
ent->size--;
@@ -592,7 +604,8 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c)
}
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- synchronize_srcu(&dev->mr_srcu);
+ if (odp_mkey_exist)
+ synchronize_srcu(&dev->mr_srcu);
#endif
list_for_each_entry_safe(mr, tmp_mr, &del_list, list) {
@@ -1211,7 +1224,7 @@ err_1:
return ERR_PTR(err);
}
-static void set_mr_fileds(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
+static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
int npages, u64 length, int access_flags)
{
mr->npages = npages;
@@ -1267,7 +1280,7 @@ static struct ib_mr *mlx5_ib_get_memic_mr(struct ib_pd *pd, u64 memic_addr,
kfree(in);
mr->umem = NULL;
- set_mr_fileds(dev, mr, 0, length, acc);
+ set_mr_fields(dev, mr, 0, length, acc);
return &mr->ibmr;
@@ -1280,6 +1293,21 @@ err_free:
return ERR_PTR(err);
}
+int mlx5_ib_advise_mr(struct ib_pd *pd,
+ enum ib_uverbs_advise_mr_advice advice,
+ u32 flags,
+ struct ib_sge *sg_list,
+ u32 num_sge,
+ struct uverbs_attr_bundle *attrs)
+{
+ if (advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH &&
+ advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE)
+ return -EOPNOTSUPP;
+
+ return mlx5_ib_advise_mr_prefetch(pd, advice, flags,
+ sg_list, num_sge);
+}
+
struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm,
struct ib_dm_mr_attr *attr,
struct uverbs_attr_bundle *attrs)
@@ -1369,7 +1397,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key);
mr->umem = umem;
- set_mr_fileds(dev, mr, npages, length, access_flags);
+ set_mr_fields(dev, mr, npages, length, access_flags);
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
update_odp_mr(mr);
@@ -1536,7 +1564,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
goto err;
}
- set_mr_fileds(dev, mr, npages, len, access_flags);
+ set_mr_fields(dev, mr, npages, len, access_flags);
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
update_odp_mr(mr);
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index 7309fb6bf0d2..01e0f6200631 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -549,12 +549,17 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr)
wait_event(imr->q_leaf_free, !atomic_read(&imr->num_leaf_free));
}
+#define MLX5_PF_FLAGS_PREFETCH BIT(0)
+#define MLX5_PF_FLAGS_DOWNGRADE BIT(1)
static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
- u64 io_virt, size_t bcnt, u32 *bytes_mapped)
+ u64 io_virt, size_t bcnt, u32 *bytes_mapped,
+ u32 flags)
{
int npages = 0, current_seq, page_shift, ret, np;
bool implicit = false;
struct ib_umem_odp *odp_mr = to_ib_umem_odp(mr->umem);
+ bool downgrade = flags & MLX5_PF_FLAGS_DOWNGRADE;
+ bool prefetch = flags & MLX5_PF_FLAGS_PREFETCH;
u64 access_mask = ODP_READ_ALLOWED_BIT;
u64 start_idx, page_mask;
struct ib_umem_odp *odp;
@@ -578,7 +583,15 @@ next_mr:
page_mask = ~(BIT(page_shift) - 1);
start_idx = (io_virt - (mr->mmkey.iova & page_mask)) >> page_shift;
- if (mr->umem->writable)
+ if (prefetch && !downgrade && !mr->umem->writable) {
+ /* prefetch with write-access must
+ * be supported by the MR
+ */
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (mr->umem->writable && !downgrade)
access_mask |= ODP_WRITE_ALLOWED_BIT;
current_seq = READ_ONCE(odp->notifiers_seq);
@@ -683,12 +696,13 @@ struct pf_frame {
* -EFAULT when there's an error mapping the requested pages. The caller will
* abort the page fault handling.
*/
-static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
- u32 key, u64 io_virt, size_t bcnt,
+static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, u32 key,
+ u64 io_virt, size_t bcnt,
u32 *bytes_committed,
- u32 *bytes_mapped)
+ u32 *bytes_mapped, u32 flags)
{
int npages = 0, srcu_key, ret, i, outlen, cur_outlen = 0, depth = 0;
+ bool prefetch = flags & MLX5_PF_FLAGS_PREFETCH;
struct pf_frame *head = NULL, *frame;
struct mlx5_core_mkey *mmkey;
struct mlx5_ib_mw *mw;
@@ -710,6 +724,12 @@ next_mr:
goto srcu_unlock;
}
+ if (prefetch && mmkey->type != MLX5_MKEY_MR) {
+ mlx5_ib_dbg(dev, "prefetch is allowed only for MR\n");
+ ret = -EINVAL;
+ goto srcu_unlock;
+ }
+
switch (mmkey->type) {
case MLX5_MKEY_MR:
mr = container_of(mmkey, struct mlx5_ib_mr, mmkey);
@@ -719,6 +739,11 @@ next_mr:
goto srcu_unlock;
}
+ if (prefetch && !mr->umem->is_odp) {
+ ret = -EINVAL;
+ goto srcu_unlock;
+ }
+
if (!mr->umem->is_odp) {
mlx5_ib_dbg(dev, "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n",
key);
@@ -728,7 +753,7 @@ next_mr:
goto srcu_unlock;
}
- ret = pagefault_mr(dev, mr, io_virt, bcnt, bytes_mapped);
+ ret = pagefault_mr(dev, mr, io_virt, bcnt, bytes_mapped, flags);
if (ret < 0)
goto srcu_unlock;
@@ -905,7 +930,7 @@ static int pagefault_data_segments(struct mlx5_ib_dev *dev,
ret = pagefault_single_data_segment(dev, key, io_virt, bcnt,
&pfault->bytes_committed,
- bytes_mapped);
+ bytes_mapped, 0);
if (ret < 0)
break;
npages += ret;
@@ -1216,7 +1241,8 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev,
}
ret = pagefault_single_data_segment(dev, rkey, address, length,
- &pfault->bytes_committed, NULL);
+ &pfault->bytes_committed, NULL,
+ 0);
if (ret == -EAGAIN) {
/* We're racing with an invalidation, don't prefetch */
prefetch_activated = 0;
@@ -1243,7 +1269,8 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev,
ret = pagefault_single_data_segment(dev, rkey, address,
prefetch_len,
- &bytes_committed, NULL);
+ &bytes_committed, NULL,
+ 0);
if (ret < 0 && ret != -EAGAIN) {
mlx5_ib_dbg(dev, "Prefetch failed. ret: %d, QP 0x%x, address: 0x%.16llx, length = 0x%.16x\n",
ret, pfault->token, address, prefetch_len);
@@ -1492,10 +1519,17 @@ void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent)
}
}
+static const struct ib_device_ops mlx5_ib_dev_odp_ops = {
+ .advise_mr = mlx5_ib_advise_mr,
+};
+
int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev)
{
int ret = 0;
+ if (dev->odp_caps.general_caps & IB_ODP_SUPPORT)
+ ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_odp_ops);
+
if (dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT) {
ret = mlx5_cmd_null_mkey(dev->mdev, &dev->null_mkey);
if (ret) {
@@ -1527,3 +1561,76 @@ int mlx5_ib_odp_init(void)
return 0;
}
+
+struct prefetch_mr_work {
+ struct work_struct work;
+ struct mlx5_ib_dev *dev;
+ u32 pf_flags;
+ u32 num_sge;
+ struct ib_sge sg_list[0];
+};
+
+static int mlx5_ib_prefetch_sg_list(struct mlx5_ib_dev *dev, u32 pf_flags,
+ struct ib_sge *sg_list, u32 num_sge)
+{
+ int i;
+
+ for (i = 0; i < num_sge; ++i) {
+ struct ib_sge *sg = &sg_list[i];
+ int bytes_committed = 0;
+ int ret;
+
+ ret = pagefault_single_data_segment(dev, sg->lkey, sg->addr,
+ sg->length,
+ &bytes_committed, NULL,
+ pf_flags);
+ if (ret < 0)
+ return ret;
+ }
+ return 0;
+}
+
+static void mlx5_ib_prefetch_mr_work(struct work_struct *work)
+{
+ struct prefetch_mr_work *w =
+ container_of(work, struct prefetch_mr_work, work);
+
+ if (w->dev->ib_dev.reg_state == IB_DEV_REGISTERED)
+ mlx5_ib_prefetch_sg_list(w->dev, w->pf_flags, w->sg_list,
+ w->num_sge);
+
+ kfree(w);
+}
+
+int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
+ enum ib_uverbs_advise_mr_advice advice,
+ u32 flags, struct ib_sge *sg_list, u32 num_sge)
+{
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ u32 pf_flags = MLX5_PF_FLAGS_PREFETCH;
+ struct prefetch_mr_work *work;
+
+ if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH)
+ pf_flags |= MLX5_PF_FLAGS_DOWNGRADE;
+
+ if (flags & IB_UVERBS_ADVISE_MR_FLAG_FLUSH)
+ return mlx5_ib_prefetch_sg_list(dev, pf_flags, sg_list,
+ num_sge);
+
+ if (dev->ib_dev.reg_state != IB_DEV_REGISTERED)
+ return -ENODEV;
+
+ work = kvzalloc(struct_size(work, sg_list, num_sge), GFP_KERNEL);
+ if (!work)
+ return -ENOMEM;
+
+ memcpy(work->sg_list, sg_list, num_sge * sizeof(struct ib_sge));
+
+ work->dev = dev;
+ work->pf_flags = pf_flags;
+ work->num_sge = num_sge;
+
+ INIT_WORK(&work->work, mlx5_ib_prefetch_mr_work);
+ schedule_work(&work->work);
+ return 0;
+}
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index a0e9ff763d42..9c94c1b9ec35 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -108,21 +108,6 @@ static int is_sqp(enum ib_qp_type qp_type)
return is_qp0(qp_type) || is_qp1(qp_type);
}
-static void *get_wqe(struct mlx5_ib_qp *qp, int offset)
-{
- return mlx5_buf_offset(&qp->buf, offset);
-}
-
-static void *get_recv_wqe(struct mlx5_ib_qp *qp, int n)
-{
- return get_wqe(qp, qp->rq.offset + (n << qp->rq.wqe_shift));
-}
-
-void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n)
-{
- return get_wqe(qp, qp->sq.offset + (n << MLX5_IB_SQ_STRIDE));
-}
-
/**
* mlx5_ib_read_user_wqe() - Copy a user-space WQE to kernel space.
*
@@ -790,6 +775,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
__be64 *pas;
void *qpc;
int err;
+ u16 uid;
err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
if (err) {
@@ -851,7 +837,8 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
goto err_umem;
}
- MLX5_SET(create_qp_in, *in, uid, to_mpd(pd)->uid);
+ uid = (attr->qp_type != IB_QPT_XRC_TGT) ? to_mpd(pd)->uid : 0;
+ MLX5_SET(create_qp_in, *in, uid, uid);
pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas);
if (ubuffer->umem)
mlx5_ib_populate_pas(dev, ubuffer->umem, page_shift, pas, 0);
@@ -917,6 +904,30 @@ static void destroy_qp_user(struct mlx5_ib_dev *dev, struct ib_pd *pd,
mlx5_ib_free_bfreg(dev, &context->bfregi, qp->bfregn);
}
+/* get_sq_edge - Get the next nearby edge.
+ *
+ * An 'edge' is defined as the first following address after the end
+ * of the fragment or the SQ. Accordingly, during the WQE construction
+ * which repetitively increases the pointer to write the next data, it
+ * simply should check if it gets to an edge.
+ *
+ * @sq - SQ buffer.
+ * @idx - Stride index in the SQ buffer.
+ *
+ * Return:
+ * The new edge.
+ */
+static void *get_sq_edge(struct mlx5_ib_wq *sq, u32 idx)
+{
+ void *fragment_end;
+
+ fragment_end = mlx5_frag_buf_get_wqe
+ (&sq->fbc,
+ mlx5_frag_buf_get_idx_last_contig_stride(&sq->fbc, idx));
+
+ return fragment_end + MLX5_SEND_WQE_BB;
+}
+
static int create_kernel_qp(struct mlx5_ib_dev *dev,
struct ib_qp_init_attr *init_attr,
struct mlx5_ib_qp *qp,
@@ -955,13 +966,29 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
base->ubuffer.buf_size = err + (qp->rq.wqe_cnt << qp->rq.wqe_shift);
- err = mlx5_buf_alloc(dev->mdev, base->ubuffer.buf_size, &qp->buf);
+ err = mlx5_frag_buf_alloc_node(dev->mdev, base->ubuffer.buf_size,
+ &qp->buf, dev->mdev->priv.numa_node);
if (err) {
mlx5_ib_dbg(dev, "err %d\n", err);
return err;
}
- qp->sq.qend = mlx5_get_send_wqe(qp, qp->sq.wqe_cnt);
+ if (qp->rq.wqe_cnt)
+ mlx5_init_fbc(qp->buf.frags, qp->rq.wqe_shift,
+ ilog2(qp->rq.wqe_cnt), &qp->rq.fbc);
+
+ if (qp->sq.wqe_cnt) {
+ int sq_strides_offset = (qp->sq.offset & (PAGE_SIZE - 1)) /
+ MLX5_SEND_WQE_BB;
+ mlx5_init_fbc_offset(qp->buf.frags +
+ (qp->sq.offset / PAGE_SIZE),
+ ilog2(MLX5_SEND_WQE_BB),
+ ilog2(qp->sq.wqe_cnt),
+ sq_strides_offset, &qp->sq.fbc);
+
+ qp->sq.cur_edge = get_sq_edge(&qp->sq, 0);
+ }
+
*inlen = MLX5_ST_SZ_BYTES(create_qp_in) +
MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * qp->buf.npages;
*in = kvzalloc(*inlen, GFP_KERNEL);
@@ -983,8 +1010,9 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
qp->flags |= MLX5_IB_QP_SQPN_QP1;
}
- mlx5_fill_page_array(&qp->buf,
- (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas));
+ mlx5_fill_page_frag_array(&qp->buf,
+ (__be64 *)MLX5_ADDR_OF(create_qp_in,
+ *in, pas));
err = mlx5_db_alloc(dev->mdev, &qp->db);
if (err) {
@@ -1024,7 +1052,7 @@ err_free:
kvfree(*in);
err_buf:
- mlx5_buf_free(dev->mdev, &qp->buf);
+ mlx5_frag_buf_free(dev->mdev, &qp->buf);
return err;
}
@@ -1036,7 +1064,7 @@ static void destroy_qp_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
kvfree(qp->sq.wr_data);
kvfree(qp->rq.wrid);
mlx5_db_free(dev->mdev, &qp->db);
- mlx5_buf_free(dev->mdev, &qp->buf);
+ mlx5_frag_buf_free(dev->mdev, &qp->buf);
}
static u32 get_rx_type(struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr)
@@ -1876,7 +1904,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
qp->flags |= MLX5_IB_QP_CVLAN_STRIPPING;
}
- if (pd && pd->uobject) {
+ if (udata) {
if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
mlx5_ib_dbg(dev, "copy failed\n");
return -EFAULT;
@@ -1889,7 +1917,8 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
MLX5_QP_FLAG_BFREG_INDEX |
MLX5_QP_FLAG_TYPE_DCT |
MLX5_QP_FLAG_TYPE_DCI |
- MLX5_QP_FLAG_ALLOW_SCATTER_CQE))
+ MLX5_QP_FLAG_ALLOW_SCATTER_CQE |
+ MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE))
return -EINVAL;
err = get_qp_user_index(to_mucontext(pd->uobject->context),
@@ -1925,6 +1954,15 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
qp->flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC;
}
+ if (ucmd.flags & MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE) {
+ if (init_attr->qp_type != IB_QPT_RC ||
+ !MLX5_CAP_GEN(dev->mdev, qp_packet_based)) {
+ mlx5_ib_dbg(dev, "packet based credit mode isn't supported\n");
+ return -EOPNOTSUPP;
+ }
+ qp->flags |= MLX5_IB_QP_PACKET_BASED_CREDIT;
+ }
+
if (init_attr->create_flags & IB_QP_CREATE_SOURCE_QPN) {
if (init_attr->qp_type != IB_QPT_UD ||
(MLX5_CAP_GEN(dev->mdev, port_type) !=
@@ -1948,14 +1986,14 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
qp->has_rq = qp_has_rq(init_attr);
err = set_rq_size(dev, &init_attr->cap, qp->has_rq,
- qp, (pd && pd->uobject) ? &ucmd : NULL);
+ qp, udata ? &ucmd : NULL);
if (err) {
mlx5_ib_dbg(dev, "err %d\n", err);
return err;
}
if (pd) {
- if (pd->uobject) {
+ if (udata) {
__u32 max_wqes =
1 << MLX5_CAP_GEN(mdev, log_max_qp_sz);
mlx5_ib_dbg(dev, "requested sq_wqe_count (%d)\n", ucmd.sq_wqe_count);
@@ -2021,11 +2059,12 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
MLX5_SET(qpc, qpc, cd_slave_send, 1);
if (qp->flags & MLX5_IB_QP_MANAGED_RECV)
MLX5_SET(qpc, qpc, cd_slave_receive, 1);
-
+ if (qp->flags & MLX5_IB_QP_PACKET_BASED_CREDIT)
+ MLX5_SET(qpc, qpc, req_e2e_credit_mode, 1);
if (qp->scat_cqe && is_connected(init_attr->qp_type)) {
configure_responder_scat_cqe(init_attr, qpc);
configure_requester_scat_cqe(dev, init_attr,
- (pd && pd->uobject) ? &ucmd : NULL,
+ udata ? &ucmd : NULL,
qpc);
}
@@ -2465,7 +2504,7 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
dev = to_mdev(pd->device);
if (init_attr->qp_type == IB_QPT_RAW_PACKET) {
- if (!pd->uobject) {
+ if (!udata) {
mlx5_ib_dbg(dev, "Raw Packet QP is not supported for kernel consumers\n");
return ERR_PTR(-EINVAL);
} else if (!to_mucontext(pd->uobject->context)->cqe_version) {
@@ -2663,7 +2702,7 @@ static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate)
if (rate == IB_RATE_PORT_CURRENT)
return 0;
- if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_300_GBPS)
+ if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_600_GBPS)
return -EINVAL;
while (rate != IB_RATE_PORT_CURRENT &&
@@ -3475,7 +3514,8 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
qp->sq.head = 0;
qp->sq.tail = 0;
qp->sq.cur_post = 0;
- qp->sq.last_poll = 0;
+ if (qp->sq.wqe_cnt)
+ qp->sq.cur_edge = get_sq_edge(&qp->sq, 0);
qp->db.db[MLX5_RCV_DBR] = 0;
qp->db.db[MLX5_SND_DBR] = 0;
}
@@ -3515,7 +3555,7 @@ static bool modify_dci_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state new
return is_valid_mask(attr_mask, req, opt);
} else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) {
req |= IB_QP_PATH_MTU;
- opt = IB_QP_PKEY_INDEX;
+ opt = IB_QP_PKEY_INDEX | IB_QP_AV;
return is_valid_mask(attr_mask, req, opt);
} else if (cur_state == IB_QPS_RTR && new_state == IB_QPS_RTS) {
req |= IB_QP_TIMEOUT | IB_QP_RETRY_CNT | IB_QP_RNR_RETRY |
@@ -3749,6 +3789,62 @@ out:
return err;
}
+static void _handle_post_send_edge(struct mlx5_ib_wq *sq, void **seg,
+ u32 wqe_sz, void **cur_edge)
+{
+ u32 idx;
+
+ idx = (sq->cur_post + (wqe_sz >> 2)) & (sq->wqe_cnt - 1);
+ *cur_edge = get_sq_edge(sq, idx);
+
+ *seg = mlx5_frag_buf_get_wqe(&sq->fbc, idx);
+}
+
+/* handle_post_send_edge - Check if we get to SQ edge. If yes, update to the
+ * next nearby edge and get new address translation for current WQE position.
+ * @sq - SQ buffer.
+ * @seg: Current WQE position (16B aligned).
+ * @wqe_sz: Total current WQE size [16B].
+ * @cur_edge: Updated current edge.
+ */
+static inline void handle_post_send_edge(struct mlx5_ib_wq *sq, void **seg,
+ u32 wqe_sz, void **cur_edge)
+{
+ if (likely(*seg != *cur_edge))
+ return;
+
+ _handle_post_send_edge(sq, seg, wqe_sz, cur_edge);
+}
+
+/* memcpy_send_wqe - copy data from src to WQE and update the relevant WQ's
+ * pointers. At the end @seg is aligned to 16B regardless the copied size.
+ * @sq - SQ buffer.
+ * @cur_edge: Updated current edge.
+ * @seg: Current WQE position (16B aligned).
+ * @wqe_sz: Total current WQE size [16B].
+ * @src: Pointer to copy from.
+ * @n: Number of bytes to copy.
+ */
+static inline void memcpy_send_wqe(struct mlx5_ib_wq *sq, void **cur_edge,
+ void **seg, u32 *wqe_sz, const void *src,
+ size_t n)
+{
+ while (likely(n)) {
+ size_t leftlen = *cur_edge - *seg;
+ size_t copysz = min_t(size_t, leftlen, n);
+ size_t stride;
+
+ memcpy(*seg, src, copysz);
+
+ n -= copysz;
+ src += copysz;
+ stride = !n ? ALIGN(copysz, 16) : copysz;
+ *seg += stride;
+ *wqe_sz += stride >> 4;
+ handle_post_send_edge(sq, seg, *wqe_sz, cur_edge);
+ }
+}
+
static int mlx5_wq_overflow(struct mlx5_ib_wq *wq, int nreq, struct ib_cq *ib_cq)
{
struct mlx5_ib_cq *cq;
@@ -3774,11 +3870,10 @@ static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg,
rseg->reserved = 0;
}
-static void *set_eth_seg(struct mlx5_wqe_eth_seg *eseg,
- const struct ib_send_wr *wr, void *qend,
- struct mlx5_ib_qp *qp, int *size)
+static void set_eth_seg(const struct ib_send_wr *wr, struct mlx5_ib_qp *qp,
+ void **seg, int *size, void **cur_edge)
{
- void *seg = eseg;
+ struct mlx5_wqe_eth_seg *eseg = *seg;
memset(eseg, 0, sizeof(struct mlx5_wqe_eth_seg));
@@ -3786,45 +3881,41 @@ static void *set_eth_seg(struct mlx5_wqe_eth_seg *eseg,
eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM |
MLX5_ETH_WQE_L4_CSUM;
- seg += sizeof(struct mlx5_wqe_eth_seg);
- *size += sizeof(struct mlx5_wqe_eth_seg) / 16;
-
if (wr->opcode == IB_WR_LSO) {
struct ib_ud_wr *ud_wr = container_of(wr, struct ib_ud_wr, wr);
- int size_of_inl_hdr_start = sizeof(eseg->inline_hdr.start);
- u64 left, leftlen, copysz;
+ size_t left, copysz;
void *pdata = ud_wr->header;
+ size_t stride;
left = ud_wr->hlen;
eseg->mss = cpu_to_be16(ud_wr->mss);
eseg->inline_hdr.sz = cpu_to_be16(left);
- /*
- * check if there is space till the end of queue, if yes,
- * copy all in one shot, otherwise copy till the end of queue,
- * rollback and than the copy the left
+ /* memcpy_send_wqe should get a 16B align address. Hence, we
+ * first copy up to the current edge and then, if needed,
+ * fall-through to memcpy_send_wqe.
*/
- leftlen = qend - (void *)eseg->inline_hdr.start;
- copysz = min_t(u64, leftlen, left);
-
- memcpy(seg - size_of_inl_hdr_start, pdata, copysz);
-
- if (likely(copysz > size_of_inl_hdr_start)) {
- seg += ALIGN(copysz - size_of_inl_hdr_start, 16);
- *size += ALIGN(copysz - size_of_inl_hdr_start, 16) / 16;
- }
-
- if (unlikely(copysz < left)) { /* the last wqe in the queue */
- seg = mlx5_get_send_wqe(qp, 0);
+ copysz = min_t(u64, *cur_edge - (void *)eseg->inline_hdr.start,
+ left);
+ memcpy(eseg->inline_hdr.start, pdata, copysz);
+ stride = ALIGN(sizeof(struct mlx5_wqe_eth_seg) -
+ sizeof(eseg->inline_hdr.start) + copysz, 16);
+ *size += stride / 16;
+ *seg += stride;
+
+ if (copysz < left) {
+ handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
left -= copysz;
pdata += copysz;
- memcpy(seg, pdata, left);
- seg += ALIGN(left, 16);
- *size += ALIGN(left, 16) / 16;
+ memcpy_send_wqe(&qp->sq, cur_edge, seg, size, pdata,
+ left);
}
+
+ return;
}
- return seg;
+ *seg += sizeof(struct mlx5_wqe_eth_seg);
+ *size += sizeof(struct mlx5_wqe_eth_seg) / 16;
}
static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg,
@@ -4083,24 +4174,6 @@ static void set_reg_data_seg(struct mlx5_wqe_data_seg *dseg,
dseg->lkey = cpu_to_be32(pd->ibpd.local_dma_lkey);
}
-static void set_reg_umr_inline_seg(void *seg, struct mlx5_ib_qp *qp,
- struct mlx5_ib_mr *mr, int mr_list_size)
-{
- void *qend = qp->sq.qend;
- void *addr = mr->descs;
- int copy;
-
- if (unlikely(seg + mr_list_size > qend)) {
- copy = qend - seg;
- memcpy(seg, addr, copy);
- addr += copy;
- mr_list_size -= copy;
- seg = mlx5_get_send_wqe(qp, 0);
- }
- memcpy(seg, addr, mr_list_size);
- seg += mr_list_size;
-}
-
static __be32 send_ieth(const struct ib_send_wr *wr)
{
switch (wr->opcode) {
@@ -4134,40 +4207,48 @@ static u8 wq_sig(void *wqe)
}
static int set_data_inl_seg(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr,
- void *wqe, int *sz)
+ void **wqe, int *wqe_sz, void **cur_edge)
{
struct mlx5_wqe_inline_seg *seg;
- void *qend = qp->sq.qend;
- void *addr;
+ size_t offset;
int inl = 0;
- int copy;
- int len;
int i;
- seg = wqe;
- wqe += sizeof(*seg);
+ seg = *wqe;
+ *wqe += sizeof(*seg);
+ offset = sizeof(*seg);
+
for (i = 0; i < wr->num_sge; i++) {
- addr = (void *)(unsigned long)(wr->sg_list[i].addr);
- len = wr->sg_list[i].length;
+ size_t len = wr->sg_list[i].length;
+ void *addr = (void *)(unsigned long)(wr->sg_list[i].addr);
+
inl += len;
if (unlikely(inl > qp->max_inline_data))
return -ENOMEM;
- if (unlikely(wqe + len > qend)) {
- copy = qend - wqe;
- memcpy(wqe, addr, copy);
- addr += copy;
- len -= copy;
- wqe = mlx5_get_send_wqe(qp, 0);
+ while (likely(len)) {
+ size_t leftlen;
+ size_t copysz;
+
+ handle_post_send_edge(&qp->sq, wqe,
+ *wqe_sz + (offset >> 4),
+ cur_edge);
+
+ leftlen = *cur_edge - *wqe;
+ copysz = min_t(size_t, leftlen, len);
+
+ memcpy(*wqe, addr, copysz);
+ len -= copysz;
+ addr += copysz;
+ *wqe += copysz;
+ offset += copysz;
}
- memcpy(wqe, addr, len);
- wqe += len;
}
seg->byte_count = cpu_to_be32(inl | MLX5_INLINE_SEG);
- *sz = ALIGN(inl + sizeof(seg->byte_count), 16) / 16;
+ *wqe_sz += ALIGN(inl + sizeof(seg->byte_count), 16) / 16;
return 0;
}
@@ -4280,7 +4361,8 @@ static int mlx5_set_bsf(struct ib_mr *sig_mr,
}
static int set_sig_data_segment(const struct ib_sig_handover_wr *wr,
- struct mlx5_ib_qp *qp, void **seg, int *size)
+ struct mlx5_ib_qp *qp, void **seg,
+ int *size, void **cur_edge)
{
struct ib_sig_attrs *sig_attrs = wr->sig_attrs;
struct ib_mr *sig_mr = wr->sig_mr;
@@ -4364,8 +4446,7 @@ static int set_sig_data_segment(const struct ib_sig_handover_wr *wr,
*seg += wqe_size;
*size += wqe_size / 16;
- if (unlikely((*seg == qp->sq.qend)))
- *seg = mlx5_get_send_wqe(qp, 0);
+ handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
bsf = *seg;
ret = mlx5_set_bsf(sig_mr, sig_attrs, bsf, data_len);
@@ -4374,8 +4455,7 @@ static int set_sig_data_segment(const struct ib_sig_handover_wr *wr,
*seg += sizeof(*bsf);
*size += sizeof(*bsf) / 16;
- if (unlikely((*seg == qp->sq.qend)))
- *seg = mlx5_get_send_wqe(qp, 0);
+ handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
return 0;
}
@@ -4413,7 +4493,8 @@ static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
static int set_sig_umr_wr(const struct ib_send_wr *send_wr,
- struct mlx5_ib_qp *qp, void **seg, int *size)
+ struct mlx5_ib_qp *qp, void **seg, int *size,
+ void **cur_edge)
{
const struct ib_sig_handover_wr *wr = sig_handover_wr(send_wr);
struct mlx5_ib_mr *sig_mr = to_mmr(wr->sig_mr);
@@ -4445,16 +4526,14 @@ static int set_sig_umr_wr(const struct ib_send_wr *send_wr,
set_sig_umr_segment(*seg, xlt_size);
*seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
*size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
- if (unlikely((*seg == qp->sq.qend)))
- *seg = mlx5_get_send_wqe(qp, 0);
+ handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
set_sig_mkey_segment(*seg, wr, xlt_size, region_len, pdn);
*seg += sizeof(struct mlx5_mkey_seg);
*size += sizeof(struct mlx5_mkey_seg) / 16;
- if (unlikely((*seg == qp->sq.qend)))
- *seg = mlx5_get_send_wqe(qp, 0);
+ handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
- ret = set_sig_data_segment(wr, qp, seg, size);
+ ret = set_sig_data_segment(wr, qp, seg, size, cur_edge);
if (ret)
return ret;
@@ -4491,11 +4570,11 @@ static int set_psv_wr(struct ib_sig_domain *domain,
static int set_reg_wr(struct mlx5_ib_qp *qp,
const struct ib_reg_wr *wr,
- void **seg, int *size)
+ void **seg, int *size, void **cur_edge)
{
struct mlx5_ib_mr *mr = to_mmr(wr->mr);
struct mlx5_ib_pd *pd = to_mpd(qp->ibqp.pd);
- int mr_list_size = mr->ndescs * mr->desc_size;
+ size_t mr_list_size = mr->ndescs * mr->desc_size;
bool umr_inline = mr_list_size <= MLX5_IB_SQ_UMR_INLINE_THRESHOLD;
if (unlikely(wr->wr.send_flags & IB_SEND_INLINE)) {
@@ -4507,18 +4586,17 @@ static int set_reg_wr(struct mlx5_ib_qp *qp,
set_reg_umr_seg(*seg, mr, umr_inline);
*seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
*size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
- if (unlikely((*seg == qp->sq.qend)))
- *seg = mlx5_get_send_wqe(qp, 0);
+ handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
set_reg_mkey_seg(*seg, mr, wr->key, wr->access);
*seg += sizeof(struct mlx5_mkey_seg);
*size += sizeof(struct mlx5_mkey_seg) / 16;
- if (unlikely((*seg == qp->sq.qend)))
- *seg = mlx5_get_send_wqe(qp, 0);
+ handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
if (umr_inline) {
- set_reg_umr_inline_seg(*seg, qp, mr, mr_list_size);
- *size += get_xlt_octo(mr_list_size);
+ memcpy_send_wqe(&qp->sq, cur_edge, seg, size, mr->descs,
+ mr_list_size);
+ *size = ALIGN(*size, MLX5_SEND_WQE_BB >> 4);
} else {
set_reg_data_seg(*seg, mr, pd);
*seg += sizeof(struct mlx5_wqe_data_seg);
@@ -4527,32 +4605,31 @@ static int set_reg_wr(struct mlx5_ib_qp *qp,
return 0;
}
-static void set_linv_wr(struct mlx5_ib_qp *qp, void **seg, int *size)
+static void set_linv_wr(struct mlx5_ib_qp *qp, void **seg, int *size,
+ void **cur_edge)
{
set_linv_umr_seg(*seg);
*seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
*size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
- if (unlikely((*seg == qp->sq.qend)))
- *seg = mlx5_get_send_wqe(qp, 0);
+ handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
set_linv_mkey_seg(*seg);
*seg += sizeof(struct mlx5_mkey_seg);
*size += sizeof(struct mlx5_mkey_seg) / 16;
- if (unlikely((*seg == qp->sq.qend)))
- *seg = mlx5_get_send_wqe(qp, 0);
+ handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
}
-static void dump_wqe(struct mlx5_ib_qp *qp, int idx, int size_16)
+static void dump_wqe(struct mlx5_ib_qp *qp, u32 idx, int size_16)
{
__be32 *p = NULL;
- int tidx = idx;
+ u32 tidx = idx;
int i, j;
- pr_debug("dump wqe at %p\n", mlx5_get_send_wqe(qp, tidx));
+ pr_debug("dump WQE index %u:\n", idx);
for (i = 0, j = 0; i < size_16 * 4; i += 4, j += 4) {
if ((i & 0xf) == 0) {
- void *buf = mlx5_get_send_wqe(qp, tidx);
tidx = (tidx + 1) & (qp->sq.wqe_cnt - 1);
- p = buf;
+ p = mlx5_frag_buf_get_wqe(&qp->sq.fbc, tidx);
+ pr_debug("WQBB at %p:\n", (void *)p);
j = 0;
}
pr_debug("%08x %08x %08x %08x\n", be32_to_cpu(p[j]),
@@ -4562,15 +4639,16 @@ static void dump_wqe(struct mlx5_ib_qp *qp, int idx, int size_16)
}
static int __begin_wqe(struct mlx5_ib_qp *qp, void **seg,
- struct mlx5_wqe_ctrl_seg **ctrl,
- const struct ib_send_wr *wr, unsigned *idx,
- int *size, int nreq, bool send_signaled, bool solicited)
+ struct mlx5_wqe_ctrl_seg **ctrl,
+ const struct ib_send_wr *wr, unsigned int *idx,
+ int *size, void **cur_edge, int nreq,
+ bool send_signaled, bool solicited)
{
if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)))
return -ENOMEM;
*idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1);
- *seg = mlx5_get_send_wqe(qp, *idx);
+ *seg = mlx5_frag_buf_get_wqe(&qp->sq.fbc, *idx);
*ctrl = *seg;
*(uint32_t *)(*seg + 8) = 0;
(*ctrl)->imm = send_ieth(wr);
@@ -4580,6 +4658,7 @@ static int __begin_wqe(struct mlx5_ib_qp *qp, void **seg,
*seg += sizeof(**ctrl);
*size = sizeof(**ctrl) / 16;
+ *cur_edge = qp->sq.cur_edge;
return 0;
}
@@ -4587,17 +4666,18 @@ static int __begin_wqe(struct mlx5_ib_qp *qp, void **seg,
static int begin_wqe(struct mlx5_ib_qp *qp, void **seg,
struct mlx5_wqe_ctrl_seg **ctrl,
const struct ib_send_wr *wr, unsigned *idx,
- int *size, int nreq)
+ int *size, void **cur_edge, int nreq)
{
- return __begin_wqe(qp, seg, ctrl, wr, idx, size, nreq,
+ return __begin_wqe(qp, seg, ctrl, wr, idx, size, cur_edge, nreq,
wr->send_flags & IB_SEND_SIGNALED,
wr->send_flags & IB_SEND_SOLICITED);
}
static void finish_wqe(struct mlx5_ib_qp *qp,
struct mlx5_wqe_ctrl_seg *ctrl,
- u8 size, unsigned idx, u64 wr_id,
- int nreq, u8 fence, u32 mlx5_opcode)
+ void *seg, u8 size, void *cur_edge,
+ unsigned int idx, u64 wr_id, int nreq, u8 fence,
+ u32 mlx5_opcode)
{
u8 opmod = 0;
@@ -4613,6 +4693,15 @@ static void finish_wqe(struct mlx5_ib_qp *qp,
qp->sq.wqe_head[idx] = qp->sq.head + nreq;
qp->sq.cur_post += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB);
qp->sq.w_list[idx].next = qp->sq.cur_post;
+
+ /* We save the edge which was possibly updated during the WQE
+ * construction, into SQ's cache.
+ */
+ seg = PTR_ALIGN(seg, MLX5_SEND_WQE_BB);
+ qp->sq.cur_edge = (unlikely(seg == cur_edge)) ?
+ get_sq_edge(&qp->sq, qp->sq.cur_post &
+ (qp->sq.wqe_cnt - 1)) :
+ cur_edge;
}
static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
@@ -4623,11 +4712,10 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
struct mlx5_core_dev *mdev = dev->mdev;
struct mlx5_ib_qp *qp;
struct mlx5_ib_mr *mr;
- struct mlx5_wqe_data_seg *dpseg;
struct mlx5_wqe_xrc_seg *xrc;
struct mlx5_bf *bf;
+ void *cur_edge;
int uninitialized_var(size);
- void *qend;
unsigned long flags;
unsigned idx;
int err = 0;
@@ -4649,7 +4737,6 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
qp = to_mqp(ibqp);
bf = &qp->bf;
- qend = qp->sq.qend;
spin_lock_irqsave(&qp->sq.lock, flags);
@@ -4669,7 +4756,8 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
goto out;
}
- err = begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, nreq);
+ err = begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, &cur_edge,
+ nreq);
if (err) {
mlx5_ib_warn(dev, "\n");
err = -ENOMEM;
@@ -4719,14 +4807,15 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
case IB_WR_LOCAL_INV:
qp->sq.wr_data[idx] = IB_WR_LOCAL_INV;
ctrl->imm = cpu_to_be32(wr->ex.invalidate_rkey);
- set_linv_wr(qp, &seg, &size);
+ set_linv_wr(qp, &seg, &size, &cur_edge);
num_sge = 0;
break;
case IB_WR_REG_MR:
qp->sq.wr_data[idx] = IB_WR_REG_MR;
ctrl->imm = cpu_to_be32(reg_wr(wr)->key);
- err = set_reg_wr(qp, reg_wr(wr), &seg, &size);
+ err = set_reg_wr(qp, reg_wr(wr), &seg, &size,
+ &cur_edge);
if (err) {
*bad_wr = wr;
goto out;
@@ -4739,21 +4828,24 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
mr = to_mmr(sig_handover_wr(wr)->sig_mr);
ctrl->imm = cpu_to_be32(mr->ibmr.rkey);
- err = set_sig_umr_wr(wr, qp, &seg, &size);
+ err = set_sig_umr_wr(wr, qp, &seg, &size,
+ &cur_edge);
if (err) {
mlx5_ib_warn(dev, "\n");
*bad_wr = wr;
goto out;
}
- finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq,
- fence, MLX5_OPCODE_UMR);
+ finish_wqe(qp, ctrl, seg, size, cur_edge, idx,
+ wr->wr_id, nreq, fence,
+ MLX5_OPCODE_UMR);
/*
* SET_PSV WQEs are not signaled and solicited
* on error
*/
err = __begin_wqe(qp, &seg, &ctrl, wr, &idx,
- &size, nreq, false, true);
+ &size, &cur_edge, nreq, false,
+ true);
if (err) {
mlx5_ib_warn(dev, "\n");
err = -ENOMEM;
@@ -4770,10 +4862,12 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
goto out;
}
- finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq,
- fence, MLX5_OPCODE_SET_PSV);
+ finish_wqe(qp, ctrl, seg, size, cur_edge, idx,
+ wr->wr_id, nreq, fence,
+ MLX5_OPCODE_SET_PSV);
err = __begin_wqe(qp, &seg, &ctrl, wr, &idx,
- &size, nreq, false, true);
+ &size, &cur_edge, nreq, false,
+ true);
if (err) {
mlx5_ib_warn(dev, "\n");
err = -ENOMEM;
@@ -4790,8 +4884,9 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
goto out;
}
- finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq,
- fence, MLX5_OPCODE_SET_PSV);
+ finish_wqe(qp, ctrl, seg, size, cur_edge, idx,
+ wr->wr_id, nreq, fence,
+ MLX5_OPCODE_SET_PSV);
qp->next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
num_sge = 0;
goto skip_psv;
@@ -4828,16 +4923,14 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
set_datagram_seg(seg, wr);
seg += sizeof(struct mlx5_wqe_datagram_seg);
size += sizeof(struct mlx5_wqe_datagram_seg) / 16;
- if (unlikely((seg == qend)))
- seg = mlx5_get_send_wqe(qp, 0);
+ handle_post_send_edge(&qp->sq, &seg, size, &cur_edge);
+
break;
case IB_QPT_UD:
set_datagram_seg(seg, wr);
seg += sizeof(struct mlx5_wqe_datagram_seg);
size += sizeof(struct mlx5_wqe_datagram_seg) / 16;
-
- if (unlikely((seg == qend)))
- seg = mlx5_get_send_wqe(qp, 0);
+ handle_post_send_edge(&qp->sq, &seg, size, &cur_edge);
/* handle qp that supports ud offload */
if (qp->flags & IB_QP_CREATE_IPOIB_UD_LSO) {
@@ -4847,11 +4940,9 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
memset(pad, 0, sizeof(struct mlx5_wqe_eth_pad));
seg += sizeof(struct mlx5_wqe_eth_pad);
size += sizeof(struct mlx5_wqe_eth_pad) / 16;
-
- seg = set_eth_seg(seg, wr, qend, qp, &size);
-
- if (unlikely((seg == qend)))
- seg = mlx5_get_send_wqe(qp, 0);
+ set_eth_seg(wr, qp, &seg, &size, &cur_edge);
+ handle_post_send_edge(&qp->sq, &seg, size,
+ &cur_edge);
}
break;
case MLX5_IB_QPT_REG_UMR:
@@ -4867,13 +4958,11 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
goto out;
seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
- if (unlikely((seg == qend)))
- seg = mlx5_get_send_wqe(qp, 0);
+ handle_post_send_edge(&qp->sq, &seg, size, &cur_edge);
set_reg_mkey_segment(seg, wr);
seg += sizeof(struct mlx5_mkey_seg);
size += sizeof(struct mlx5_mkey_seg) / 16;
- if (unlikely((seg == qend)))
- seg = mlx5_get_send_wqe(qp, 0);
+ handle_post_send_edge(&qp->sq, &seg, size, &cur_edge);
break;
default:
@@ -4881,33 +4970,29 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
}
if (wr->send_flags & IB_SEND_INLINE && num_sge) {
- int uninitialized_var(sz);
-
- err = set_data_inl_seg(qp, wr, seg, &sz);
+ err = set_data_inl_seg(qp, wr, &seg, &size, &cur_edge);
if (unlikely(err)) {
mlx5_ib_warn(dev, "\n");
*bad_wr = wr;
goto out;
}
- size += sz;
} else {
- dpseg = seg;
for (i = 0; i < num_sge; i++) {
- if (unlikely(dpseg == qend)) {
- seg = mlx5_get_send_wqe(qp, 0);
- dpseg = seg;
- }
+ handle_post_send_edge(&qp->sq, &seg, size,
+ &cur_edge);
if (likely(wr->sg_list[i].length)) {
- set_data_ptr_seg(dpseg, wr->sg_list + i);
+ set_data_ptr_seg
+ ((struct mlx5_wqe_data_seg *)seg,
+ wr->sg_list + i);
size += sizeof(struct mlx5_wqe_data_seg) / 16;
- dpseg++;
+ seg += sizeof(struct mlx5_wqe_data_seg);
}
}
}
qp->next_fence = next_fence;
- finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, fence,
- mlx5_ib_opcode[wr->opcode]);
+ finish_wqe(qp, ctrl, seg, size, cur_edge, idx, wr->wr_id, nreq,
+ fence, mlx5_ib_opcode[wr->opcode]);
skip_psv:
if (0)
dump_wqe(qp, idx, size);
@@ -4993,7 +5078,7 @@ static int _mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
goto out;
}
- scat = get_recv_wqe(qp, ind);
+ scat = mlx5_frag_buf_get_wqe(&qp->rq.fbc, ind);
if (qp->wq_sig)
scat++;
@@ -5441,7 +5526,6 @@ struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev,
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_ib_xrcd *xrcd;
int err;
- u16 uid;
if (!MLX5_CAP_GEN(dev->mdev, xrc))
return ERR_PTR(-ENOSYS);
@@ -5450,14 +5534,12 @@ struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev,
if (!xrcd)
return ERR_PTR(-ENOMEM);
- uid = context ? to_mucontext(context)->devx_uid : 0;
- err = mlx5_cmd_xrcd_alloc(dev->mdev, &xrcd->xrcdn, uid);
+ err = mlx5_cmd_xrcd_alloc(dev->mdev, &xrcd->xrcdn, 0);
if (err) {
kfree(xrcd);
return ERR_PTR(-ENOMEM);
}
- xrcd->uid = uid;
return &xrcd->ibxrcd;
}
@@ -5465,10 +5547,9 @@ int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd)
{
struct mlx5_ib_dev *dev = to_mdev(xrcd->device);
u32 xrcdn = to_mxrcd(xrcd)->xrcdn;
- u16 uid = to_mxrcd(xrcd)->uid;
int err;
- err = mlx5_cmd_xrcd_dealloc(dev->mdev, xrcdn, uid);
+ err = mlx5_cmd_xrcd_dealloc(dev->mdev, xrcdn, 0);
if (err)
mlx5_ib_warn(dev, "failed to dealloc xrcdn 0x%x\n", xrcdn);
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index 91dcd3918d96..4e8d18009f58 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -13,7 +13,7 @@
static void *get_wqe(struct mlx5_ib_srq *srq, int n)
{
- return mlx5_buf_offset(&srq->buf, n << srq->msrq.wqe_shift);
+ return mlx5_frag_buf_get_wqe(&srq->fbc, n);
}
static void mlx5_ib_srq_event(struct mlx5_core_srq *srq, enum mlx5_event type)
@@ -113,7 +113,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
in->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
in->page_offset = offset;
- in->uid = to_mpd(pd)->uid;
+ in->uid = (in->type != IB_SRQT_XRC) ? to_mpd(pd)->uid : 0;
if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 &&
in->type != IB_SRQT_BASIC)
in->user_index = uidx;
@@ -142,12 +142,16 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
return err;
}
- if (mlx5_buf_alloc(dev->mdev, buf_size, &srq->buf)) {
+ if (mlx5_frag_buf_alloc_node(dev->mdev, buf_size, &srq->buf,
+ dev->mdev->priv.numa_node)) {
mlx5_ib_dbg(dev, "buf alloc failed\n");
err = -ENOMEM;
goto err_db;
}
+ mlx5_init_fbc(srq->buf.frags, srq->msrq.wqe_shift, ilog2(srq->msrq.max),
+ &srq->fbc);
+
srq->head = 0;
srq->tail = srq->msrq.max - 1;
srq->wqe_ctr = 0;
@@ -164,7 +168,7 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
err = -ENOMEM;
goto err_buf;
}
- mlx5_fill_page_array(&srq->buf, in->pas);
+ mlx5_fill_page_frag_array(&srq->buf, in->pas);
srq->wrid = kvmalloc_array(srq->msrq.max, sizeof(u64), GFP_KERNEL);
if (!srq->wrid) {
@@ -184,7 +188,7 @@ err_in:
kvfree(in->pas);
err_buf:
- mlx5_buf_free(dev->mdev, &srq->buf);
+ mlx5_frag_buf_free(dev->mdev, &srq->buf);
err_db:
mlx5_db_free(dev->mdev, &srq->db);
@@ -201,7 +205,7 @@ static void destroy_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq)
static void destroy_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq)
{
kvfree(srq->wrid);
- mlx5_buf_free(dev->mdev, &srq->buf);
+ mlx5_frag_buf_free(dev->mdev, &srq->buf);
mlx5_db_free(dev->mdev, &srq->db);
}
@@ -256,14 +260,14 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
}
in.type = init_attr->srq_type;
- if (pd->uobject)
+ if (udata)
err = create_srq_user(pd, srq, &in, udata, buf_size);
else
err = create_srq_kernel(dev, srq, &in, buf_size);
if (err) {
mlx5_ib_warn(dev, "create srq %s failed, err %d\n",
- pd->uobject ? "user" : "kernel", err);
+ udata ? "user" : "kernel", err);
goto err_srq;
}
@@ -308,7 +312,7 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
srq->msrq.event = mlx5_ib_srq_event;
srq->ibsrq.ext.xrc.srq_num = srq->msrq.srqn;
- if (pd->uobject)
+ if (udata)
if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof(__u32))) {
mlx5_ib_dbg(dev, "copy to user failed\n");
err = -EFAULT;
@@ -323,7 +327,7 @@ err_core:
mlx5_cmd_destroy_srq(dev, &srq->msrq);
err_usr_kern_srq:
- if (pd->uobject)
+ if (udata)
destroy_srq_user(pd, srq);
else
destroy_srq_kernel(dev, srq);
diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h
index 220a3e4717a3..bfd4eebc1182 100644
--- a/drivers/infiniband/hw/mthca/mthca_dev.h
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -510,7 +510,8 @@ int mthca_alloc_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int nent
void mthca_free_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int cqe);
int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
- struct ib_srq_attr *attr, struct mthca_srq *srq);
+ struct ib_srq_attr *attr, struct mthca_srq *srq,
+ struct ib_udata *udata);
void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq);
int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
@@ -547,7 +548,8 @@ int mthca_alloc_qp(struct mthca_dev *dev,
enum ib_qp_type type,
enum ib_sig_type send_policy,
struct ib_qp_cap *cap,
- struct mthca_qp *qp);
+ struct mthca_qp *qp,
+ struct ib_udata *udata);
int mthca_alloc_sqp(struct mthca_dev *dev,
struct mthca_pd *pd,
struct mthca_cq *send_cq,
@@ -556,7 +558,8 @@ int mthca_alloc_sqp(struct mthca_dev *dev,
struct ib_qp_cap *cap,
int qpn,
int port,
- struct mthca_sqp *sqp);
+ struct mthca_sqp *sqp,
+ struct ib_udata *udata);
void mthca_free_qp(struct mthca_dev *dev, struct mthca_qp *qp);
int mthca_create_ah(struct mthca_dev *dev,
struct mthca_pd *pd,
diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c
index 2e5dc0a67cfc..7ad517da4917 100644
--- a/drivers/infiniband/hw/mthca/mthca_mad.c
+++ b/drivers/infiniband/hw/mthca/mthca_mad.c
@@ -89,13 +89,13 @@ static void update_sm_ah(struct mthca_dev *dev,
rdma_ah_set_port_num(&ah_attr, port_num);
new_ah = rdma_create_ah(dev->send_agent[port_num - 1][0]->qp->pd,
- &ah_attr);
+ &ah_attr, 0);
if (IS_ERR(new_ah))
return;
spin_lock_irqsave(&dev->sm_lock, flags);
if (dev->sm_ah[port_num - 1])
- rdma_destroy_ah(dev->sm_ah[port_num - 1]);
+ rdma_destroy_ah(dev->sm_ah[port_num - 1], 0);
dev->sm_ah[port_num - 1] = new_ah;
spin_unlock_irqrestore(&dev->sm_lock, flags);
}
@@ -347,6 +347,7 @@ void mthca_free_agents(struct mthca_dev *dev)
}
if (dev->sm_ah[p])
- rdma_destroy_ah(dev->sm_ah[p]);
+ rdma_destroy_ah(dev->sm_ah[p],
+ RDMA_DESTROY_AH_SLEEPABLE);
}
}
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 691c6f048938..82cb6b71ac7c 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -412,6 +412,7 @@ static int mthca_dealloc_pd(struct ib_pd *pd)
static struct ib_ah *mthca_ah_create(struct ib_pd *pd,
struct rdma_ah_attr *ah_attr,
+ u32 flags,
struct ib_udata *udata)
{
@@ -431,7 +432,7 @@ static struct ib_ah *mthca_ah_create(struct ib_pd *pd,
return &ah->ibah;
}
-static int mthca_ah_destroy(struct ib_ah *ah)
+static int mthca_ah_destroy(struct ib_ah *ah, u32 flags)
{
mthca_destroy_ah(to_mdev(ah->device), to_mah(ah));
kfree(ah);
@@ -455,7 +456,7 @@ static struct ib_srq *mthca_create_srq(struct ib_pd *pd,
if (!srq)
return ERR_PTR(-ENOMEM);
- if (pd->uobject) {
+ if (udata) {
context = to_mucontext(pd->uobject->context);
if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
@@ -475,9 +476,9 @@ static struct ib_srq *mthca_create_srq(struct ib_pd *pd,
}
err = mthca_alloc_srq(to_mdev(pd->device), to_mpd(pd),
- &init_attr->attr, srq);
+ &init_attr->attr, srq, udata);
- if (err && pd->uobject)
+ if (err && udata)
mthca_unmap_user_db(to_mdev(pd->device), &context->uar,
context->db_tab, ucmd.db_index);
@@ -537,7 +538,7 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd,
if (!qp)
return ERR_PTR(-ENOMEM);
- if (pd->uobject) {
+ if (udata) {
context = to_mucontext(pd->uobject->context);
if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
@@ -574,9 +575,9 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd,
to_mcq(init_attr->send_cq),
to_mcq(init_attr->recv_cq),
init_attr->qp_type, init_attr->sq_sig_type,
- &init_attr->cap, qp);
+ &init_attr->cap, qp, udata);
- if (err && pd->uobject) {
+ if (err && udata) {
context = to_mucontext(pd->uobject->context);
mthca_unmap_user_db(to_mdev(pd->device),
@@ -596,7 +597,7 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd,
case IB_QPT_GSI:
{
/* Don't allow userspace to create special QPs */
- if (pd->uobject)
+ if (udata)
return ERR_PTR(-EINVAL);
qp = kmalloc(sizeof (struct mthca_sqp), GFP_KERNEL);
@@ -610,7 +611,7 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd,
to_mcq(init_attr->recv_cq),
init_attr->sq_sig_type, &init_attr->cap,
qp->ibqp.qp_num, init_attr->port_num,
- to_msqp(qp));
+ to_msqp(qp), udata);
break;
}
default:
@@ -1193,6 +1194,81 @@ static void get_dev_fw_str(struct ib_device *device, char *str)
(int) dev->fw_ver & 0xffff);
}
+static const struct ib_device_ops mthca_dev_ops = {
+ .alloc_pd = mthca_alloc_pd,
+ .alloc_ucontext = mthca_alloc_ucontext,
+ .attach_mcast = mthca_multicast_attach,
+ .create_ah = mthca_ah_create,
+ .create_cq = mthca_create_cq,
+ .create_qp = mthca_create_qp,
+ .dealloc_pd = mthca_dealloc_pd,
+ .dealloc_ucontext = mthca_dealloc_ucontext,
+ .dereg_mr = mthca_dereg_mr,
+ .destroy_ah = mthca_ah_destroy,
+ .destroy_cq = mthca_destroy_cq,
+ .destroy_qp = mthca_destroy_qp,
+ .detach_mcast = mthca_multicast_detach,
+ .get_dev_fw_str = get_dev_fw_str,
+ .get_dma_mr = mthca_get_dma_mr,
+ .get_port_immutable = mthca_port_immutable,
+ .mmap = mthca_mmap_uar,
+ .modify_device = mthca_modify_device,
+ .modify_port = mthca_modify_port,
+ .modify_qp = mthca_modify_qp,
+ .poll_cq = mthca_poll_cq,
+ .process_mad = mthca_process_mad,
+ .query_ah = mthca_ah_query,
+ .query_device = mthca_query_device,
+ .query_gid = mthca_query_gid,
+ .query_pkey = mthca_query_pkey,
+ .query_port = mthca_query_port,
+ .query_qp = mthca_query_qp,
+ .reg_user_mr = mthca_reg_user_mr,
+ .resize_cq = mthca_resize_cq,
+};
+
+static const struct ib_device_ops mthca_dev_arbel_srq_ops = {
+ .create_srq = mthca_create_srq,
+ .destroy_srq = mthca_destroy_srq,
+ .modify_srq = mthca_modify_srq,
+ .post_srq_recv = mthca_arbel_post_srq_recv,
+ .query_srq = mthca_query_srq,
+};
+
+static const struct ib_device_ops mthca_dev_tavor_srq_ops = {
+ .create_srq = mthca_create_srq,
+ .destroy_srq = mthca_destroy_srq,
+ .modify_srq = mthca_modify_srq,
+ .post_srq_recv = mthca_tavor_post_srq_recv,
+ .query_srq = mthca_query_srq,
+};
+
+static const struct ib_device_ops mthca_dev_arbel_fmr_ops = {
+ .alloc_fmr = mthca_alloc_fmr,
+ .dealloc_fmr = mthca_dealloc_fmr,
+ .map_phys_fmr = mthca_arbel_map_phys_fmr,
+ .unmap_fmr = mthca_unmap_fmr,
+};
+
+static const struct ib_device_ops mthca_dev_tavor_fmr_ops = {
+ .alloc_fmr = mthca_alloc_fmr,
+ .dealloc_fmr = mthca_dealloc_fmr,
+ .map_phys_fmr = mthca_tavor_map_phys_fmr,
+ .unmap_fmr = mthca_unmap_fmr,
+};
+
+static const struct ib_device_ops mthca_dev_arbel_ops = {
+ .post_recv = mthca_arbel_post_receive,
+ .post_send = mthca_arbel_post_send,
+ .req_notify_cq = mthca_arbel_arm_cq,
+};
+
+static const struct ib_device_ops mthca_dev_tavor_ops = {
+ .post_recv = mthca_tavor_post_receive,
+ .post_send = mthca_tavor_post_send,
+ .req_notify_cq = mthca_tavor_arm_cq,
+};
+
int mthca_register_device(struct mthca_dev *dev)
{
int ret;
@@ -1226,26 +1302,8 @@ int mthca_register_device(struct mthca_dev *dev)
dev->ib_dev.phys_port_cnt = dev->limits.num_ports;
dev->ib_dev.num_comp_vectors = 1;
dev->ib_dev.dev.parent = &dev->pdev->dev;
- dev->ib_dev.query_device = mthca_query_device;
- dev->ib_dev.query_port = mthca_query_port;
- dev->ib_dev.modify_device = mthca_modify_device;
- dev->ib_dev.modify_port = mthca_modify_port;
- dev->ib_dev.query_pkey = mthca_query_pkey;
- dev->ib_dev.query_gid = mthca_query_gid;
- dev->ib_dev.alloc_ucontext = mthca_alloc_ucontext;
- dev->ib_dev.dealloc_ucontext = mthca_dealloc_ucontext;
- dev->ib_dev.mmap = mthca_mmap_uar;
- dev->ib_dev.alloc_pd = mthca_alloc_pd;
- dev->ib_dev.dealloc_pd = mthca_dealloc_pd;
- dev->ib_dev.create_ah = mthca_ah_create;
- dev->ib_dev.query_ah = mthca_ah_query;
- dev->ib_dev.destroy_ah = mthca_ah_destroy;
if (dev->mthca_flags & MTHCA_FLAG_SRQ) {
- dev->ib_dev.create_srq = mthca_create_srq;
- dev->ib_dev.modify_srq = mthca_modify_srq;
- dev->ib_dev.query_srq = mthca_query_srq;
- dev->ib_dev.destroy_srq = mthca_destroy_srq;
dev->ib_dev.uverbs_cmd_mask |=
(1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
(1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
@@ -1253,48 +1311,28 @@ int mthca_register_device(struct mthca_dev *dev)
(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ);
if (mthca_is_memfree(dev))
- dev->ib_dev.post_srq_recv = mthca_arbel_post_srq_recv;
+ ib_set_device_ops(&dev->ib_dev,
+ &mthca_dev_arbel_srq_ops);
else
- dev->ib_dev.post_srq_recv = mthca_tavor_post_srq_recv;
+ ib_set_device_ops(&dev->ib_dev,
+ &mthca_dev_tavor_srq_ops);
}
- dev->ib_dev.create_qp = mthca_create_qp;
- dev->ib_dev.modify_qp = mthca_modify_qp;
- dev->ib_dev.query_qp = mthca_query_qp;
- dev->ib_dev.destroy_qp = mthca_destroy_qp;
- dev->ib_dev.create_cq = mthca_create_cq;
- dev->ib_dev.resize_cq = mthca_resize_cq;
- dev->ib_dev.destroy_cq = mthca_destroy_cq;
- dev->ib_dev.poll_cq = mthca_poll_cq;
- dev->ib_dev.get_dma_mr = mthca_get_dma_mr;
- dev->ib_dev.reg_user_mr = mthca_reg_user_mr;
- dev->ib_dev.dereg_mr = mthca_dereg_mr;
- dev->ib_dev.get_port_immutable = mthca_port_immutable;
- dev->ib_dev.get_dev_fw_str = get_dev_fw_str;
-
if (dev->mthca_flags & MTHCA_FLAG_FMR) {
- dev->ib_dev.alloc_fmr = mthca_alloc_fmr;
- dev->ib_dev.unmap_fmr = mthca_unmap_fmr;
- dev->ib_dev.dealloc_fmr = mthca_dealloc_fmr;
if (mthca_is_memfree(dev))
- dev->ib_dev.map_phys_fmr = mthca_arbel_map_phys_fmr;
+ ib_set_device_ops(&dev->ib_dev,
+ &mthca_dev_arbel_fmr_ops);
else
- dev->ib_dev.map_phys_fmr = mthca_tavor_map_phys_fmr;
+ ib_set_device_ops(&dev->ib_dev,
+ &mthca_dev_tavor_fmr_ops);
}
- dev->ib_dev.attach_mcast = mthca_multicast_attach;
- dev->ib_dev.detach_mcast = mthca_multicast_detach;
- dev->ib_dev.process_mad = mthca_process_mad;
+ ib_set_device_ops(&dev->ib_dev, &mthca_dev_ops);
- if (mthca_is_memfree(dev)) {
- dev->ib_dev.req_notify_cq = mthca_arbel_arm_cq;
- dev->ib_dev.post_send = mthca_arbel_post_send;
- dev->ib_dev.post_recv = mthca_arbel_post_receive;
- } else {
- dev->ib_dev.req_notify_cq = mthca_tavor_arm_cq;
- dev->ib_dev.post_send = mthca_tavor_post_send;
- dev->ib_dev.post_recv = mthca_tavor_post_receive;
- }
+ if (mthca_is_memfree(dev))
+ ib_set_device_ops(&dev->ib_dev, &mthca_dev_arbel_ops);
+ else
+ ib_set_device_ops(&dev->ib_dev, &mthca_dev_tavor_ops);
mutex_init(&dev->cap_mask_mutex);
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 9d178ee3c96a..4e5b5cc17f1d 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -981,7 +981,8 @@ static void mthca_adjust_qp_caps(struct mthca_dev *dev,
*/
static int mthca_alloc_wqe_buf(struct mthca_dev *dev,
struct mthca_pd *pd,
- struct mthca_qp *qp)
+ struct mthca_qp *qp,
+ struct ib_udata *udata)
{
int size;
int err = -ENOMEM;
@@ -1048,7 +1049,7 @@ static int mthca_alloc_wqe_buf(struct mthca_dev *dev,
* allocate anything. All we need is to calculate the WQE
* sizes and the send_wqe_offset, so we're done now.
*/
- if (pd->ibpd.uobject)
+ if (udata)
return 0;
size = PAGE_ALIGN(qp->send_wqe_offset +
@@ -1155,7 +1156,8 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev,
struct mthca_cq *send_cq,
struct mthca_cq *recv_cq,
enum ib_sig_type send_policy,
- struct mthca_qp *qp)
+ struct mthca_qp *qp,
+ struct ib_udata *udata)
{
int ret;
int i;
@@ -1178,7 +1180,7 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev,
if (ret)
return ret;
- ret = mthca_alloc_wqe_buf(dev, pd, qp);
+ ret = mthca_alloc_wqe_buf(dev, pd, qp, udata);
if (ret) {
mthca_unmap_memfree(dev, qp);
return ret;
@@ -1191,7 +1193,7 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev,
* will be allocated and buffers will be initialized in
* userspace.
*/
- if (pd->ibpd.uobject)
+ if (udata)
return 0;
ret = mthca_alloc_memfree(dev, qp);
@@ -1285,7 +1287,8 @@ int mthca_alloc_qp(struct mthca_dev *dev,
enum ib_qp_type type,
enum ib_sig_type send_policy,
struct ib_qp_cap *cap,
- struct mthca_qp *qp)
+ struct mthca_qp *qp,
+ struct ib_udata *udata)
{
int err;
@@ -1308,7 +1311,7 @@ int mthca_alloc_qp(struct mthca_dev *dev,
qp->port = 0;
err = mthca_alloc_qp_common(dev, pd, send_cq, recv_cq,
- send_policy, qp);
+ send_policy, qp, udata);
if (err) {
mthca_free(&dev->qp_table.alloc, qp->qpn);
return err;
@@ -1360,7 +1363,8 @@ int mthca_alloc_sqp(struct mthca_dev *dev,
struct ib_qp_cap *cap,
int qpn,
int port,
- struct mthca_sqp *sqp)
+ struct mthca_sqp *sqp,
+ struct ib_udata *udata)
{
u32 mqpn = qpn * 2 + dev->qp_table.sqp_start + port - 1;
int err;
@@ -1391,7 +1395,7 @@ int mthca_alloc_sqp(struct mthca_dev *dev,
sqp->qp.transport = MLX;
err = mthca_alloc_qp_common(dev, pd, send_cq, recv_cq,
- send_policy, &sqp->qp);
+ send_policy, &sqp->qp, udata);
if (err)
goto err_out_free;
diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c
index 9a3fc6fb0d7e..b8333c79e3fa 100644
--- a/drivers/infiniband/hw/mthca/mthca_srq.c
+++ b/drivers/infiniband/hw/mthca/mthca_srq.c
@@ -95,7 +95,8 @@ static inline int *wqe_to_link(void *wqe)
static void mthca_tavor_init_srq_context(struct mthca_dev *dev,
struct mthca_pd *pd,
struct mthca_srq *srq,
- struct mthca_tavor_srq_context *context)
+ struct mthca_tavor_srq_context *context,
+ bool is_user)
{
memset(context, 0, sizeof *context);
@@ -103,7 +104,7 @@ static void mthca_tavor_init_srq_context(struct mthca_dev *dev,
context->state_pd = cpu_to_be32(pd->pd_num);
context->lkey = cpu_to_be32(srq->mr.ibmr.lkey);
- if (pd->ibpd.uobject)
+ if (is_user)
context->uar =
cpu_to_be32(to_mucontext(pd->ibpd.uobject->context)->uar.index);
else
@@ -113,7 +114,8 @@ static void mthca_tavor_init_srq_context(struct mthca_dev *dev,
static void mthca_arbel_init_srq_context(struct mthca_dev *dev,
struct mthca_pd *pd,
struct mthca_srq *srq,
- struct mthca_arbel_srq_context *context)
+ struct mthca_arbel_srq_context *context,
+ bool is_user)
{
int logsize, max;
@@ -129,7 +131,7 @@ static void mthca_arbel_init_srq_context(struct mthca_dev *dev,
context->lkey = cpu_to_be32(srq->mr.ibmr.lkey);
context->db_index = cpu_to_be32(srq->db_index);
context->logstride_usrpage = cpu_to_be32((srq->wqe_shift - 4) << 29);
- if (pd->ibpd.uobject)
+ if (is_user)
context->logstride_usrpage |=
cpu_to_be32(to_mucontext(pd->ibpd.uobject->context)->uar.index);
else
@@ -145,14 +147,14 @@ static void mthca_free_srq_buf(struct mthca_dev *dev, struct mthca_srq *srq)
}
static int mthca_alloc_srq_buf(struct mthca_dev *dev, struct mthca_pd *pd,
- struct mthca_srq *srq)
+ struct mthca_srq *srq, struct ib_udata *udata)
{
struct mthca_data_seg *scatter;
void *wqe;
int err;
int i;
- if (pd->ibpd.uobject)
+ if (udata)
return 0;
srq->wrid = kmalloc_array(srq->max, sizeof(u64), GFP_KERNEL);
@@ -197,7 +199,8 @@ static int mthca_alloc_srq_buf(struct mthca_dev *dev, struct mthca_pd *pd,
}
int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
- struct ib_srq_attr *attr, struct mthca_srq *srq)
+ struct ib_srq_attr *attr, struct mthca_srq *srq,
+ struct ib_udata *udata)
{
struct mthca_mailbox *mailbox;
int ds;
@@ -235,7 +238,7 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
if (err)
goto err_out;
- if (!pd->ibpd.uobject) {
+ if (!udata) {
srq->db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_SRQ,
srq->srqn, &srq->db);
if (srq->db_index < 0) {
@@ -251,7 +254,7 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
goto err_out_db;
}
- err = mthca_alloc_srq_buf(dev, pd, srq);
+ err = mthca_alloc_srq_buf(dev, pd, srq, udata);
if (err)
goto err_out_mailbox;
@@ -261,9 +264,9 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
mutex_init(&srq->mutex);
if (mthca_is_memfree(dev))
- mthca_arbel_init_srq_context(dev, pd, srq, mailbox->buf);
+ mthca_arbel_init_srq_context(dev, pd, srq, mailbox->buf, udata);
else
- mthca_tavor_init_srq_context(dev, pd, srq, mailbox->buf);
+ mthca_tavor_init_srq_context(dev, pd, srq, mailbox->buf, udata);
err = mthca_SW2HW_SRQ(dev, mailbox, srq->srqn);
@@ -297,14 +300,14 @@ err_out_free_srq:
mthca_warn(dev, "HW2SW_SRQ failed (%d)\n", err);
err_out_free_buf:
- if (!pd->ibpd.uobject)
+ if (!udata)
mthca_free_srq_buf(dev, srq);
err_out_mailbox:
mthca_free_mailbox(dev, mailbox);
err_out_db:
- if (!pd->ibpd.uobject && mthca_is_memfree(dev))
+ if (!udata && mthca_is_memfree(dev))
mthca_free_db(dev, MTHCA_DB_TYPE_SRQ, srq->db_index);
err_out_icm:
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index 2b67ace5b614..032883180f65 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -3033,7 +3033,7 @@ static int nes_disconnect(struct nes_qp *nesqp, int abrupt)
/* Need to free the Last Streaming Mode Message */
if (nesqp->ietf_frame) {
if (nesqp->lsmm_mr)
- nesibdev->ibdev.dereg_mr(nesqp->lsmm_mr);
+ nesibdev->ibdev.ops.dereg_mr(nesqp->lsmm_mr);
pci_free_consistent(nesdev->pcidev,
nesqp->private_data_len + nesqp->ietf_frame_size,
nesqp->ietf_frame, nesqp->ietf_frame_pbase);
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index 92d1cadd4cfd..4e7f08ee1907 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -1066,7 +1066,7 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd,
}
if (req.user_qp_buffer)
nesqp->nesuqp_addr = req.user_qp_buffer;
- if ((ibpd->uobject) && (ibpd->uobject->context)) {
+ if (udata && (ibpd->uobject->context)) {
nesqp->user_mode = 1;
nes_ucontext = to_nesucontext(ibpd->uobject->context);
if (virt_wqs) {
@@ -1257,7 +1257,7 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd,
nes_put_cqp_request(nesdev, cqp_request);
- if (ibpd->uobject) {
+ if (udata) {
uresp.mmap_sq_db_index = nesqp->mmap_sq_db_index;
uresp.mmap_rq_db_index = 0;
uresp.actual_sq_size = sq_size;
@@ -3627,6 +3627,39 @@ static void get_dev_fw_str(struct ib_device *dev, char *str)
(nesvnic->nesdev->nesadapter->firmware_version & 0x000000ff));
}
+static const struct ib_device_ops nes_dev_ops = {
+ .alloc_mr = nes_alloc_mr,
+ .alloc_mw = nes_alloc_mw,
+ .alloc_pd = nes_alloc_pd,
+ .alloc_ucontext = nes_alloc_ucontext,
+ .create_cq = nes_create_cq,
+ .create_qp = nes_create_qp,
+ .dealloc_mw = nes_dealloc_mw,
+ .dealloc_pd = nes_dealloc_pd,
+ .dealloc_ucontext = nes_dealloc_ucontext,
+ .dereg_mr = nes_dereg_mr,
+ .destroy_cq = nes_destroy_cq,
+ .destroy_qp = nes_destroy_qp,
+ .drain_rq = nes_drain_rq,
+ .drain_sq = nes_drain_sq,
+ .get_dev_fw_str = get_dev_fw_str,
+ .get_dma_mr = nes_get_dma_mr,
+ .get_port_immutable = nes_port_immutable,
+ .map_mr_sg = nes_map_mr_sg,
+ .mmap = nes_mmap,
+ .modify_qp = nes_modify_qp,
+ .poll_cq = nes_poll_cq,
+ .post_recv = nes_post_recv,
+ .post_send = nes_post_send,
+ .query_device = nes_query_device,
+ .query_gid = nes_query_gid,
+ .query_pkey = nes_query_pkey,
+ .query_port = nes_query_port,
+ .query_qp = nes_query_qp,
+ .reg_user_mr = nes_reg_user_mr,
+ .req_notify_cq = nes_req_notify_cq,
+};
+
/**
* nes_init_ofa_device
*/
@@ -3673,36 +3706,6 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev)
nesibdev->ibdev.phys_port_cnt = 1;
nesibdev->ibdev.num_comp_vectors = 1;
nesibdev->ibdev.dev.parent = &nesdev->pcidev->dev;
- nesibdev->ibdev.query_device = nes_query_device;
- nesibdev->ibdev.query_port = nes_query_port;
- nesibdev->ibdev.query_pkey = nes_query_pkey;
- nesibdev->ibdev.query_gid = nes_query_gid;
- nesibdev->ibdev.alloc_ucontext = nes_alloc_ucontext;
- nesibdev->ibdev.dealloc_ucontext = nes_dealloc_ucontext;
- nesibdev->ibdev.mmap = nes_mmap;
- nesibdev->ibdev.alloc_pd = nes_alloc_pd;
- nesibdev->ibdev.dealloc_pd = nes_dealloc_pd;
- nesibdev->ibdev.create_qp = nes_create_qp;
- nesibdev->ibdev.modify_qp = nes_modify_qp;
- nesibdev->ibdev.query_qp = nes_query_qp;
- nesibdev->ibdev.destroy_qp = nes_destroy_qp;
- nesibdev->ibdev.create_cq = nes_create_cq;
- nesibdev->ibdev.destroy_cq = nes_destroy_cq;
- nesibdev->ibdev.poll_cq = nes_poll_cq;
- nesibdev->ibdev.get_dma_mr = nes_get_dma_mr;
- nesibdev->ibdev.reg_user_mr = nes_reg_user_mr;
- nesibdev->ibdev.dereg_mr = nes_dereg_mr;
- nesibdev->ibdev.alloc_mw = nes_alloc_mw;
- nesibdev->ibdev.dealloc_mw = nes_dealloc_mw;
-
- nesibdev->ibdev.alloc_mr = nes_alloc_mr;
- nesibdev->ibdev.map_mr_sg = nes_map_mr_sg;
-
- nesibdev->ibdev.req_notify_cq = nes_req_notify_cq;
- nesibdev->ibdev.post_send = nes_post_send;
- nesibdev->ibdev.post_recv = nes_post_recv;
- nesibdev->ibdev.drain_sq = nes_drain_sq;
- nesibdev->ibdev.drain_rq = nes_drain_rq;
nesibdev->ibdev.iwcm = kzalloc(sizeof(*nesibdev->ibdev.iwcm), GFP_KERNEL);
if (nesibdev->ibdev.iwcm == NULL) {
@@ -3717,8 +3720,8 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev)
nesibdev->ibdev.iwcm->reject = nes_reject;
nesibdev->ibdev.iwcm->create_listen = nes_create_listen;
nesibdev->ibdev.iwcm->destroy_listen = nes_destroy_listen;
- nesibdev->ibdev.get_port_immutable = nes_port_immutable;
- nesibdev->ibdev.get_dev_fw_str = get_dev_fw_str;
+
+ ib_set_device_ops(&nesibdev->ibdev, &nes_dev_ops);
memcpy(nesibdev->ibdev.iwcm->ifname, netdev->name,
sizeof(nesibdev->ibdev.iwcm->ifname));
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
index 58188fe5aed2..a7295322efbc 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
@@ -157,7 +157,7 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
}
struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr,
- struct ib_udata *udata)
+ u32 flags, struct ib_udata *udata)
{
u32 *ahid_addr;
int status;
@@ -219,7 +219,7 @@ av_err:
return ERR_PTR(status);
}
-int ocrdma_destroy_ah(struct ib_ah *ibah)
+int ocrdma_destroy_ah(struct ib_ah *ibah, u32 flags)
{
struct ocrdma_ah *ah = get_ocrdma_ah(ibah);
struct ocrdma_dev *dev = get_ocrdma_dev(ibah->device);
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h
index c0c32c9b80ae..eb996e14b520 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h
@@ -52,8 +52,8 @@ enum {
};
struct ib_ah *ocrdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
- struct ib_udata *udata);
-int ocrdma_destroy_ah(struct ib_ah *ah);
+ u32 flags, struct ib_udata *udata);
+int ocrdma_destroy_ah(struct ib_ah *ah, u32 flags);
int ocrdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
int ocrdma_process_mad(struct ib_device *,
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index 873cc7f6fe61..1f393842453a 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -143,6 +143,50 @@ static const struct attribute_group ocrdma_attr_group = {
.attrs = ocrdma_attributes,
};
+static const struct ib_device_ops ocrdma_dev_ops = {
+ .alloc_mr = ocrdma_alloc_mr,
+ .alloc_pd = ocrdma_alloc_pd,
+ .alloc_ucontext = ocrdma_alloc_ucontext,
+ .create_ah = ocrdma_create_ah,
+ .create_cq = ocrdma_create_cq,
+ .create_qp = ocrdma_create_qp,
+ .dealloc_pd = ocrdma_dealloc_pd,
+ .dealloc_ucontext = ocrdma_dealloc_ucontext,
+ .dereg_mr = ocrdma_dereg_mr,
+ .destroy_ah = ocrdma_destroy_ah,
+ .destroy_cq = ocrdma_destroy_cq,
+ .destroy_qp = ocrdma_destroy_qp,
+ .get_dev_fw_str = get_dev_fw_str,
+ .get_dma_mr = ocrdma_get_dma_mr,
+ .get_link_layer = ocrdma_link_layer,
+ .get_netdev = ocrdma_get_netdev,
+ .get_port_immutable = ocrdma_port_immutable,
+ .map_mr_sg = ocrdma_map_mr_sg,
+ .mmap = ocrdma_mmap,
+ .modify_port = ocrdma_modify_port,
+ .modify_qp = ocrdma_modify_qp,
+ .poll_cq = ocrdma_poll_cq,
+ .post_recv = ocrdma_post_recv,
+ .post_send = ocrdma_post_send,
+ .process_mad = ocrdma_process_mad,
+ .query_ah = ocrdma_query_ah,
+ .query_device = ocrdma_query_device,
+ .query_pkey = ocrdma_query_pkey,
+ .query_port = ocrdma_query_port,
+ .query_qp = ocrdma_query_qp,
+ .reg_user_mr = ocrdma_reg_user_mr,
+ .req_notify_cq = ocrdma_arm_cq,
+ .resize_cq = ocrdma_resize_cq,
+};
+
+static const struct ib_device_ops ocrdma_dev_srq_ops = {
+ .create_srq = ocrdma_create_srq,
+ .destroy_srq = ocrdma_destroy_srq,
+ .modify_srq = ocrdma_modify_srq,
+ .post_srq_recv = ocrdma_post_srq_recv,
+ .query_srq = ocrdma_query_srq,
+};
+
static int ocrdma_register_device(struct ocrdma_dev *dev)
{
ocrdma_get_guid(dev, (u8 *)&dev->ibdev.node_guid);
@@ -182,50 +226,10 @@ static int ocrdma_register_device(struct ocrdma_dev *dev)
dev->ibdev.phys_port_cnt = 1;
dev->ibdev.num_comp_vectors = dev->eq_cnt;
- /* mandatory verbs. */
- dev->ibdev.query_device = ocrdma_query_device;
- dev->ibdev.query_port = ocrdma_query_port;
- dev->ibdev.modify_port = ocrdma_modify_port;
- dev->ibdev.get_netdev = ocrdma_get_netdev;
- dev->ibdev.get_link_layer = ocrdma_link_layer;
- dev->ibdev.alloc_pd = ocrdma_alloc_pd;
- dev->ibdev.dealloc_pd = ocrdma_dealloc_pd;
-
- dev->ibdev.create_cq = ocrdma_create_cq;
- dev->ibdev.destroy_cq = ocrdma_destroy_cq;
- dev->ibdev.resize_cq = ocrdma_resize_cq;
-
- dev->ibdev.create_qp = ocrdma_create_qp;
- dev->ibdev.modify_qp = ocrdma_modify_qp;
- dev->ibdev.query_qp = ocrdma_query_qp;
- dev->ibdev.destroy_qp = ocrdma_destroy_qp;
-
- dev->ibdev.query_pkey = ocrdma_query_pkey;
- dev->ibdev.create_ah = ocrdma_create_ah;
- dev->ibdev.destroy_ah = ocrdma_destroy_ah;
- dev->ibdev.query_ah = ocrdma_query_ah;
-
- dev->ibdev.poll_cq = ocrdma_poll_cq;
- dev->ibdev.post_send = ocrdma_post_send;
- dev->ibdev.post_recv = ocrdma_post_recv;
- dev->ibdev.req_notify_cq = ocrdma_arm_cq;
-
- dev->ibdev.get_dma_mr = ocrdma_get_dma_mr;
- dev->ibdev.dereg_mr = ocrdma_dereg_mr;
- dev->ibdev.reg_user_mr = ocrdma_reg_user_mr;
-
- dev->ibdev.alloc_mr = ocrdma_alloc_mr;
- dev->ibdev.map_mr_sg = ocrdma_map_mr_sg;
-
/* mandatory to support user space verbs consumer. */
- dev->ibdev.alloc_ucontext = ocrdma_alloc_ucontext;
- dev->ibdev.dealloc_ucontext = ocrdma_dealloc_ucontext;
- dev->ibdev.mmap = ocrdma_mmap;
dev->ibdev.dev.parent = &dev->nic_info.pdev->dev;
- dev->ibdev.process_mad = ocrdma_process_mad;
- dev->ibdev.get_port_immutable = ocrdma_port_immutable;
- dev->ibdev.get_dev_fw_str = get_dev_fw_str;
+ ib_set_device_ops(&dev->ibdev, &ocrdma_dev_ops);
if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) {
dev->ibdev.uverbs_cmd_mask |=
@@ -235,11 +239,7 @@ static int ocrdma_register_device(struct ocrdma_dev *dev)
OCRDMA_UVERBS(DESTROY_SRQ) |
OCRDMA_UVERBS(POST_SRQ_RECV);
- dev->ibdev.create_srq = ocrdma_create_srq;
- dev->ibdev.modify_srq = ocrdma_modify_srq;
- dev->ibdev.query_srq = ocrdma_query_srq;
- dev->ibdev.destroy_srq = ocrdma_destroy_srq;
- dev->ibdev.post_srq_recv = ocrdma_post_srq_recv;
+ ib_set_device_ops(&dev->ibdev, &ocrdma_dev_srq_ops);
}
rdma_set_device_sysfs_group(&dev->ibdev, &ocrdma_attr_group);
dev->ibdev.driver_id = RDMA_DRIVER_OCRDMA;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
index 290d776edf48..dd15474b19b7 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
@@ -760,12 +760,13 @@ static const struct file_operations ocrdma_dbg_ops = {
void ocrdma_add_port_stats(struct ocrdma_dev *dev)
{
+ const struct pci_dev *pdev = dev->nic_info.pdev;
+
if (!ocrdma_dbgfs_dir)
return;
/* Create post stats base dir */
- dev->dir =
- debugfs_create_dir(dev_name(&dev->ibdev.dev), ocrdma_dbgfs_dir);
+ dev->dir = debugfs_create_dir(pci_name(pdev), ocrdma_dbgfs_dir);
if (!dev->dir)
goto err;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index 06d2a7f3304c..c46bed0c5513 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -177,11 +177,6 @@ int ocrdma_query_port(struct ib_device *ibdev,
/* props being zeroed by the caller, avoid zeroing it here */
dev = get_ocrdma_dev(ibdev);
- if (port > 1) {
- pr_err("%s(%d) invalid_port=0x%x\n", __func__,
- dev->id, port);
- return -EINVAL;
- }
netdev = dev->nic_info.netdev;
if (netif_running(netdev) && netif_oper_up(netdev)) {
port_state = IB_PORT_ACTIVE;
@@ -215,13 +210,6 @@ int ocrdma_query_port(struct ib_device *ibdev,
int ocrdma_modify_port(struct ib_device *ibdev, u8 port, int mask,
struct ib_port_modify *props)
{
- struct ocrdma_dev *dev;
-
- dev = get_ocrdma_dev(ibdev);
- if (port > 1) {
- pr_err("%s(%d) invalid_port=0x%x\n", __func__, dev->id, port);
- return -EINVAL;
- }
return 0;
}
@@ -1169,7 +1157,8 @@ static void ocrdma_del_qpn_map(struct ocrdma_dev *dev, struct ocrdma_qp *qp)
}
static int ocrdma_check_qp_params(struct ib_pd *ibpd, struct ocrdma_dev *dev,
- struct ib_qp_init_attr *attrs)
+ struct ib_qp_init_attr *attrs,
+ struct ib_udata *udata)
{
if ((attrs->qp_type != IB_QPT_GSI) &&
(attrs->qp_type != IB_QPT_RC) &&
@@ -1217,7 +1206,7 @@ static int ocrdma_check_qp_params(struct ib_pd *ibpd, struct ocrdma_dev *dev,
return -EINVAL;
}
/* unprivileged user space cannot create special QP */
- if (ibpd->uobject && attrs->qp_type == IB_QPT_GSI) {
+ if (udata && attrs->qp_type == IB_QPT_GSI) {
pr_err
("%s(%d) Userspace can't create special QPs of type=0x%x\n",
__func__, dev->id, attrs->qp_type);
@@ -1374,7 +1363,7 @@ struct ib_qp *ocrdma_create_qp(struct ib_pd *ibpd,
struct ocrdma_create_qp_ureq ureq;
u16 dpp_credit_lmt, dpp_offset;
- status = ocrdma_check_qp_params(ibpd, dev, attrs);
+ status = ocrdma_check_qp_params(ibpd, dev, attrs, udata);
if (status)
goto gen_err;
diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c
index 8d6ff9df49fe..75940e2a8791 100644
--- a/drivers/infiniband/hw/qedr/main.c
+++ b/drivers/infiniband/hw/qedr/main.c
@@ -160,12 +160,16 @@ static const struct attribute_group qedr_attr_group = {
.attrs = qedr_attributes,
};
+static const struct ib_device_ops qedr_iw_dev_ops = {
+ .get_port_immutable = qedr_iw_port_immutable,
+ .query_gid = qedr_iw_query_gid,
+};
+
static int qedr_iw_register_device(struct qedr_dev *dev)
{
dev->ibdev.node_type = RDMA_NODE_RNIC;
- dev->ibdev.query_gid = qedr_iw_query_gid;
- dev->ibdev.get_port_immutable = qedr_iw_port_immutable;
+ ib_set_device_ops(&dev->ibdev, &qedr_iw_dev_ops);
dev->ibdev.iwcm = kzalloc(sizeof(*dev->ibdev.iwcm), GFP_KERNEL);
if (!dev->ibdev.iwcm)
@@ -186,13 +190,56 @@ static int qedr_iw_register_device(struct qedr_dev *dev)
return 0;
}
+static const struct ib_device_ops qedr_roce_dev_ops = {
+ .get_port_immutable = qedr_roce_port_immutable,
+};
+
static void qedr_roce_register_device(struct qedr_dev *dev)
{
dev->ibdev.node_type = RDMA_NODE_IB_CA;
- dev->ibdev.get_port_immutable = qedr_roce_port_immutable;
+ ib_set_device_ops(&dev->ibdev, &qedr_roce_dev_ops);
}
+static const struct ib_device_ops qedr_dev_ops = {
+ .alloc_mr = qedr_alloc_mr,
+ .alloc_pd = qedr_alloc_pd,
+ .alloc_ucontext = qedr_alloc_ucontext,
+ .create_ah = qedr_create_ah,
+ .create_cq = qedr_create_cq,
+ .create_qp = qedr_create_qp,
+ .create_srq = qedr_create_srq,
+ .dealloc_pd = qedr_dealloc_pd,
+ .dealloc_ucontext = qedr_dealloc_ucontext,
+ .dereg_mr = qedr_dereg_mr,
+ .destroy_ah = qedr_destroy_ah,
+ .destroy_cq = qedr_destroy_cq,
+ .destroy_qp = qedr_destroy_qp,
+ .destroy_srq = qedr_destroy_srq,
+ .get_dev_fw_str = qedr_get_dev_fw_str,
+ .get_dma_mr = qedr_get_dma_mr,
+ .get_link_layer = qedr_link_layer,
+ .get_netdev = qedr_get_netdev,
+ .map_mr_sg = qedr_map_mr_sg,
+ .mmap = qedr_mmap,
+ .modify_port = qedr_modify_port,
+ .modify_qp = qedr_modify_qp,
+ .modify_srq = qedr_modify_srq,
+ .poll_cq = qedr_poll_cq,
+ .post_recv = qedr_post_recv,
+ .post_send = qedr_post_send,
+ .post_srq_recv = qedr_post_srq_recv,
+ .process_mad = qedr_process_mad,
+ .query_device = qedr_query_device,
+ .query_pkey = qedr_query_pkey,
+ .query_port = qedr_query_port,
+ .query_qp = qedr_query_qp,
+ .query_srq = qedr_query_srq,
+ .reg_user_mr = qedr_reg_user_mr,
+ .req_notify_cq = qedr_arm_cq,
+ .resize_cq = qedr_resize_cq,
+};
+
static int qedr_register_device(struct qedr_dev *dev)
{
int rc;
@@ -237,57 +284,11 @@ static int qedr_register_device(struct qedr_dev *dev)
dev->ibdev.phys_port_cnt = 1;
dev->ibdev.num_comp_vectors = dev->num_cnq;
-
- dev->ibdev.query_device = qedr_query_device;
- dev->ibdev.query_port = qedr_query_port;
- dev->ibdev.modify_port = qedr_modify_port;
-
- dev->ibdev.alloc_ucontext = qedr_alloc_ucontext;
- dev->ibdev.dealloc_ucontext = qedr_dealloc_ucontext;
- dev->ibdev.mmap = qedr_mmap;
-
- dev->ibdev.alloc_pd = qedr_alloc_pd;
- dev->ibdev.dealloc_pd = qedr_dealloc_pd;
-
- dev->ibdev.create_cq = qedr_create_cq;
- dev->ibdev.destroy_cq = qedr_destroy_cq;
- dev->ibdev.resize_cq = qedr_resize_cq;
- dev->ibdev.req_notify_cq = qedr_arm_cq;
-
- dev->ibdev.create_qp = qedr_create_qp;
- dev->ibdev.modify_qp = qedr_modify_qp;
- dev->ibdev.query_qp = qedr_query_qp;
- dev->ibdev.destroy_qp = qedr_destroy_qp;
-
- dev->ibdev.create_srq = qedr_create_srq;
- dev->ibdev.destroy_srq = qedr_destroy_srq;
- dev->ibdev.modify_srq = qedr_modify_srq;
- dev->ibdev.query_srq = qedr_query_srq;
- dev->ibdev.post_srq_recv = qedr_post_srq_recv;
- dev->ibdev.query_pkey = qedr_query_pkey;
-
- dev->ibdev.create_ah = qedr_create_ah;
- dev->ibdev.destroy_ah = qedr_destroy_ah;
-
- dev->ibdev.get_dma_mr = qedr_get_dma_mr;
- dev->ibdev.dereg_mr = qedr_dereg_mr;
- dev->ibdev.reg_user_mr = qedr_reg_user_mr;
- dev->ibdev.alloc_mr = qedr_alloc_mr;
- dev->ibdev.map_mr_sg = qedr_map_mr_sg;
-
- dev->ibdev.poll_cq = qedr_poll_cq;
- dev->ibdev.post_send = qedr_post_send;
- dev->ibdev.post_recv = qedr_post_recv;
-
- dev->ibdev.process_mad = qedr_process_mad;
-
- dev->ibdev.get_netdev = qedr_get_netdev;
-
dev->ibdev.dev.parent = &dev->pdev->dev;
- dev->ibdev.get_link_layer = qedr_link_layer;
- dev->ibdev.get_dev_fw_str = qedr_get_dev_fw_str;
rdma_set_device_sysfs_group(&dev->ibdev, &qedr_attr_group);
+ ib_set_device_ops(&dev->ibdev, &qedr_dev_ops);
+
dev->ibdev.driver_id = RDMA_DRIVER_QEDR;
return ib_register_device(&dev->ibdev, "qedr%d", NULL);
}
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index 82ee4b4a7084..b342a70e2814 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -216,10 +216,6 @@ int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr)
struct qed_rdma_port *rdma_port;
dev = get_qedr_dev(ibdev);
- if (port > 1) {
- DP_ERR(dev, "invalid_port=0x%x\n", port);
- return -EINVAL;
- }
if (!dev->rdma_ctx) {
DP_ERR(dev, "rdma_ctx is NULL\n");
@@ -263,14 +259,6 @@ int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr)
int qedr_modify_port(struct ib_device *ibdev, u8 port, int mask,
struct ib_port_modify *props)
{
- struct qedr_dev *dev;
-
- dev = get_qedr_dev(ibdev);
- if (port > 1) {
- DP_ERR(dev, "invalid_port=0x%x\n", port);
- return -EINVAL;
- }
-
return 0;
}
@@ -1148,7 +1136,8 @@ static inline int get_gid_info_from_table(struct ib_qp *ibqp,
}
static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev,
- struct ib_qp_init_attr *attrs)
+ struct ib_qp_init_attr *attrs,
+ struct ib_udata *udata)
{
struct qedr_device_attr *qattr = &dev->attr;
@@ -1189,7 +1178,7 @@ static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev,
}
/* Unprivileged user space cannot create special QP */
- if (ibpd->uobject && attrs->qp_type == IB_QPT_GSI) {
+ if (udata && attrs->qp_type == IB_QPT_GSI) {
DP_ERR(dev,
"create qp: userspace can't create special QPs of type=0x%x\n",
attrs->qp_type);
@@ -1552,7 +1541,7 @@ int qedr_destroy_srq(struct ib_srq *ibsrq)
in_params.srq_id = srq->srq_id;
dev->ops->rdma_destroy_srq(dev->rdma_ctx, &in_params);
- if (ibsrq->pd->uobject)
+ if (ibsrq->uobject)
qedr_free_srq_user_params(srq);
else
qedr_free_srq_kernel_params(srq);
@@ -2005,7 +1994,7 @@ struct ib_qp *qedr_create_qp(struct ib_pd *ibpd,
DP_DEBUG(dev, QEDR_MSG_QP, "create qp: called from %s, pd=%p\n",
udata ? "user library" : "kernel", pd);
- rc = qedr_check_qp_attrs(ibpd, dev, attrs);
+ rc = qedr_check_qp_attrs(ibpd, dev, attrs, udata);
if (rc)
return ERR_PTR(rc);
@@ -2626,7 +2615,7 @@ int qedr_destroy_qp(struct ib_qp *ibqp)
}
struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr,
- struct ib_udata *udata)
+ u32 flags, struct ib_udata *udata)
{
struct qedr_ah *ah;
@@ -2639,7 +2628,7 @@ struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr,
return &ah->ibah;
}
-int qedr_destroy_ah(struct ib_ah *ibah)
+int qedr_destroy_ah(struct ib_ah *ibah, u32 flags)
{
struct qedr_ah *ah = get_qedr_ah(ibah);
diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h
index 0b7d0124b16c..1852b7012bf4 100644
--- a/drivers/infiniband/hw/qedr/verbs.h
+++ b/drivers/infiniband/hw/qedr/verbs.h
@@ -76,8 +76,8 @@ int qedr_destroy_srq(struct ib_srq *ibsrq);
int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
const struct ib_recv_wr **bad_recv_wr);
struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr,
- struct ib_udata *udata);
-int qedr_destroy_ah(struct ib_ah *ibah);
+ u32 flags, struct ib_udata *udata);
+int qedr_destroy_ah(struct ib_ah *ibah, u32 flags);
int qedr_dereg_mr(struct ib_mr *);
struct ib_mr *qedr_get_dma_mr(struct ib_pd *, int acc);
diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c
index fb1ff59f40bd..cdbf707fa267 100644
--- a/drivers/infiniband/hw/qib/qib_iba6120.c
+++ b/drivers/infiniband/hw/qib/qib_iba6120.c
@@ -3237,7 +3237,6 @@ static int init_6120_variables(struct qib_devdata *dd)
/* we always allocate at least 2048 bytes for eager buffers */
ret = ib_mtu_enum_to_int(qib_ibmtu);
dd->rcvegrbufsize = ret != -1 ? max(ret, 2048) : QIB_DEFAULT_MTU;
- BUG_ON(!is_power_of_2(dd->rcvegrbufsize));
dd->rcvegrbufsize_shift = ilog2(dd->rcvegrbufsize);
qib_6120_tidtemplate(dd);
diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c
index 163a57a88742..9fde45538f6e 100644
--- a/drivers/infiniband/hw/qib/qib_iba7220.c
+++ b/drivers/infiniband/hw/qib/qib_iba7220.c
@@ -4043,7 +4043,6 @@ static int qib_init_7220_variables(struct qib_devdata *dd)
/* we always allocate at least 2048 bytes for eager buffers */
ret = ib_mtu_enum_to_int(qib_ibmtu);
dd->rcvegrbufsize = ret != -1 ? max(ret, 2048) : QIB_DEFAULT_MTU;
- BUG_ON(!is_power_of_2(dd->rcvegrbufsize));
dd->rcvegrbufsize_shift = ilog2(dd->rcvegrbufsize);
qib_7220_tidtemplate(dd);
@@ -4252,7 +4251,6 @@ static int init_sdma_7220_regs(struct qib_pportdata *ppd)
unsigned word = i / 64;
unsigned bit = i & 63;
- BUG_ON(word >= 3);
senddmabufmask[word] |= 1ULL << bit;
}
qib_write_kreg(dd, kr_senddmabufmask0, senddmabufmask[0]);
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index bf5e222eed8e..17d6b24b3473 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -1382,7 +1382,6 @@ static void err_decode(char *msg, size_t len, u64 errs,
*msg++ = ',';
len--;
}
- BUG_ON(!msp->sz);
/* msp->sz counts the nul */
took = min_t(size_t, msp->sz - (size_t)1, len);
memcpy(msg, msp->msg, took);
@@ -6599,7 +6598,6 @@ static int qib_init_7322_variables(struct qib_devdata *dd)
/* we always allocate at least 2048 bytes for eager buffers */
dd->rcvegrbufsize = max(mtu, 2048);
- BUG_ON(!is_power_of_2(dd->rcvegrbufsize));
dd->rcvegrbufsize_shift = ilog2(dd->rcvegrbufsize);
qib_7322_tidtemplate(dd);
@@ -6904,7 +6902,6 @@ static int init_sdma_7322_regs(struct qib_pportdata *ppd)
unsigned word = erstbuf / BITS_PER_LONG;
unsigned bit = erstbuf & (BITS_PER_LONG - 1);
- BUG_ON(word >= 3);
senddmabufmask[word] |= 1ULL << bit;
}
qib_write_kreg_port(ppd, krp_senddmabufmask0, senddmabufmask[0]);
diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
index d7cdc77d6306..9fd69903ca57 100644
--- a/drivers/infiniband/hw/qib/qib_init.c
+++ b/drivers/infiniband/hw/qib/qib_init.c
@@ -209,7 +209,6 @@ struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt,
rcd->rcvegrbuf_chunks = (rcd->rcvegrcnt +
rcd->rcvegrbufs_perchunk - 1) /
rcd->rcvegrbufs_perchunk;
- BUG_ON(!is_power_of_2(rcd->rcvegrbufs_perchunk));
rcd->rcvegrbufs_perchunk_shift =
ilog2(rcd->rcvegrbufs_perchunk);
}
diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c
index 4845d000c22f..f92faf5ec369 100644
--- a/drivers/infiniband/hw/qib/qib_mad.c
+++ b/drivers/infiniband/hw/qib/qib_mad.c
@@ -2494,5 +2494,6 @@ void qib_notify_free_mad_agent(struct rvt_dev_info *rdi, int port_idx)
del_timer_sync(&dd->pport[port_idx].cong_stats.timer);
if (dd->pport[port_idx].ibport_data.smi_ah)
- rdma_destroy_ah(&dd->pport[port_idx].ibport_data.smi_ah->ibah);
+ rdma_destroy_ah(&dd->pport[port_idx].ibport_data.smi_ah->ibah,
+ RDMA_DESTROY_AH_SLEEPABLE);
}
diff --git a/drivers/infiniband/hw/qib/qib_pcie.c b/drivers/infiniband/hw/qib/qib_pcie.c
index 30595b358d8f..864f2af171f7 100644
--- a/drivers/infiniband/hw/qib/qib_pcie.c
+++ b/drivers/infiniband/hw/qib/qib_pcie.c
@@ -387,7 +387,7 @@ void qib_pcie_reenable(struct qib_devdata *dd, u16 cmd, u8 iline, u8 cline)
static int qib_pcie_coalesce;
module_param_named(pcie_coalesce, qib_pcie_coalesce, int, S_IRUGO);
-MODULE_PARM_DESC(pcie_coalesce, "tune PCIe colescing on some Intel chipsets");
+MODULE_PARM_DESC(pcie_coalesce, "tune PCIe coalescing on some Intel chipsets");
/*
* Enable PCIe completion and data coalescing, on Intel 5x00 and 7300
diff --git a/drivers/infiniband/hw/qib/qib_sdma.c b/drivers/infiniband/hw/qib/qib_sdma.c
index 757d4c9d713d..3d64081c4819 100644
--- a/drivers/infiniband/hw/qib/qib_sdma.c
+++ b/drivers/infiniband/hw/qib/qib_sdma.c
@@ -572,12 +572,13 @@ retry:
len = sge->length;
if (len > sge->sge_length)
len = sge->sge_length;
- BUG_ON(len == 0);
dw = (len + 3) >> 2;
addr = dma_map_single(&ppd->dd->pcidev->dev, sge->vaddr,
dw << 2, DMA_TO_DEVICE);
- if (dma_mapping_error(&ppd->dd->pcidev->dev, addr))
+ if (dma_mapping_error(&ppd->dd->pcidev->dev, addr)) {
+ ret = -ENOMEM;
goto unmap;
+ }
sdmadesc[0] = 0;
make_sdma_desc(ppd, sdmadesc, (u64) addr, dw, dwoffset);
/* SDmaUseLargeBuf has to be set in every descriptor */
diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c
index 4d4c31ea4e2d..868da0ece7ba 100644
--- a/drivers/infiniband/hw/qib/qib_ud.c
+++ b/drivers/infiniband/hw/qib/qib_ud.c
@@ -178,7 +178,6 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
len = length;
if (len > sge->sge_length)
len = sge->sge_length;
- BUG_ON(len == 0);
rvt_copy_sge(qp, &qp->r_sge, sge->vaddr, len, true, false);
sge->vaddr += len;
sge->length -= len;
diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c b/drivers/infiniband/hw/qib/qib_user_sdma.c
index 926f3c8eba69..31c523b2a9f5 100644
--- a/drivers/infiniband/hw/qib/qib_user_sdma.c
+++ b/drivers/infiniband/hw/qib/qib_user_sdma.c
@@ -237,7 +237,6 @@ qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt)
ret = qib_user_sdma_rb_insert(&qib_user_sdma_rb_root,
sdma_rb_node);
- BUG_ON(ret == 0);
}
pq->sdma_rb_node = sdma_rb_node;
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index 4b0f5761a646..276304f611ab 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -150,7 +150,6 @@ static u32 qib_count_sge(struct rvt_sge_state *ss, u32 length)
len = length;
if (len > sge.sge_length)
len = sge.sge_length;
- BUG_ON(len == 0);
if (((long) sge.vaddr & (sizeof(u32) - 1)) ||
(len != length && (len & (sizeof(u32) - 1)))) {
ndesc = 0;
@@ -193,7 +192,6 @@ static void qib_copy_from_sge(void *data, struct rvt_sge_state *ss, u32 length)
len = length;
if (len > sge->sge_length)
len = sge->sge_length;
- BUG_ON(len == 0);
memcpy(data, sge->vaddr, len);
sge->vaddr += len;
sge->length -= len;
@@ -449,7 +447,6 @@ static void copy_io(u32 __iomem *piobuf, struct rvt_sge_state *ss,
len = length;
if (len > ss->sge.sge_length)
len = ss->sge.sge_length;
- BUG_ON(len == 0);
/* If the source address is not aligned, try to align it. */
off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1);
if (off) {
@@ -1365,7 +1362,7 @@ struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid)
rcu_read_lock();
qp0 = rcu_dereference(ibp->rvp.qp[0]);
if (qp0)
- ah = rdma_create_ah(qp0->ibqp.pd, &attr);
+ ah = rdma_create_ah(qp0->ibqp.pd, &attr, 0);
rcu_read_unlock();
return ah;
}
@@ -1496,6 +1493,11 @@ static void qib_fill_device_attr(struct qib_devdata *dd)
dd->verbs_dev.rdi.wc_opcode = ib_qib_wc_opcode;
}
+static const struct ib_device_ops qib_dev_ops = {
+ .modify_device = qib_modify_device,
+ .process_mad = qib_process_mad,
+};
+
/**
* qib_register_ib_device - register our device with the infiniband core
* @dd: the device data structure
@@ -1558,8 +1560,6 @@ int qib_register_ib_device(struct qib_devdata *dd)
ibdev->node_guid = ppd->guid;
ibdev->phys_port_cnt = dd->num_pports;
ibdev->dev.parent = &dd->pcidev->dev;
- ibdev->modify_device = qib_modify_device;
- ibdev->process_mad = qib_process_mad;
snprintf(ibdev->node_desc, sizeof(ibdev->node_desc),
"Intel Infiniband HCA %s", init_utsname()->nodename);
@@ -1627,6 +1627,7 @@ int qib_register_ib_device(struct qib_devdata *dd)
}
rdma_set_device_sysfs_group(&dd->verbs_dev.rdi.ibdev, &qib_attr_group);
+ ib_set_device_ops(ibdev, &qib_dev_ops);
ret = rvt_register_device(&dd->verbs_dev.rdi, RDMA_DRIVER_QIB);
if (ret)
goto err_tx;
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c
index 73bd00f8d2c8..b2323a52a0dd 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_main.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c
@@ -330,6 +330,37 @@ static void usnic_get_dev_fw_str(struct ib_device *device, char *str)
snprintf(str, IB_FW_VERSION_NAME_MAX, "%s", info.fw_version);
}
+static const struct ib_device_ops usnic_dev_ops = {
+ .alloc_pd = usnic_ib_alloc_pd,
+ .alloc_ucontext = usnic_ib_alloc_ucontext,
+ .create_ah = usnic_ib_create_ah,
+ .create_cq = usnic_ib_create_cq,
+ .create_qp = usnic_ib_create_qp,
+ .dealloc_pd = usnic_ib_dealloc_pd,
+ .dealloc_ucontext = usnic_ib_dealloc_ucontext,
+ .dereg_mr = usnic_ib_dereg_mr,
+ .destroy_ah = usnic_ib_destroy_ah,
+ .destroy_cq = usnic_ib_destroy_cq,
+ .destroy_qp = usnic_ib_destroy_qp,
+ .get_dev_fw_str = usnic_get_dev_fw_str,
+ .get_dma_mr = usnic_ib_get_dma_mr,
+ .get_link_layer = usnic_ib_port_link_layer,
+ .get_netdev = usnic_get_netdev,
+ .get_port_immutable = usnic_port_immutable,
+ .mmap = usnic_ib_mmap,
+ .modify_qp = usnic_ib_modify_qp,
+ .poll_cq = usnic_ib_poll_cq,
+ .post_recv = usnic_ib_post_recv,
+ .post_send = usnic_ib_post_send,
+ .query_device = usnic_ib_query_device,
+ .query_gid = usnic_ib_query_gid,
+ .query_pkey = usnic_ib_query_pkey,
+ .query_port = usnic_ib_query_port,
+ .query_qp = usnic_ib_query_qp,
+ .reg_user_mr = usnic_ib_reg_mr,
+ .req_notify_cq = usnic_ib_req_notify_cq,
+};
+
/* Start of PF discovery section */
static void *usnic_ib_device_add(struct pci_dev *dev)
{
@@ -386,35 +417,7 @@ static void *usnic_ib_device_add(struct pci_dev *dev)
(1ull << IB_USER_VERBS_CMD_DETACH_MCAST) |
(1ull << IB_USER_VERBS_CMD_OPEN_QP);
- us_ibdev->ib_dev.query_device = usnic_ib_query_device;
- us_ibdev->ib_dev.query_port = usnic_ib_query_port;
- us_ibdev->ib_dev.query_pkey = usnic_ib_query_pkey;
- us_ibdev->ib_dev.query_gid = usnic_ib_query_gid;
- us_ibdev->ib_dev.get_netdev = usnic_get_netdev;
- us_ibdev->ib_dev.get_link_layer = usnic_ib_port_link_layer;
- us_ibdev->ib_dev.alloc_pd = usnic_ib_alloc_pd;
- us_ibdev->ib_dev.dealloc_pd = usnic_ib_dealloc_pd;
- us_ibdev->ib_dev.create_qp = usnic_ib_create_qp;
- us_ibdev->ib_dev.modify_qp = usnic_ib_modify_qp;
- us_ibdev->ib_dev.query_qp = usnic_ib_query_qp;
- us_ibdev->ib_dev.destroy_qp = usnic_ib_destroy_qp;
- us_ibdev->ib_dev.create_cq = usnic_ib_create_cq;
- us_ibdev->ib_dev.destroy_cq = usnic_ib_destroy_cq;
- us_ibdev->ib_dev.reg_user_mr = usnic_ib_reg_mr;
- us_ibdev->ib_dev.dereg_mr = usnic_ib_dereg_mr;
- us_ibdev->ib_dev.alloc_ucontext = usnic_ib_alloc_ucontext;
- us_ibdev->ib_dev.dealloc_ucontext = usnic_ib_dealloc_ucontext;
- us_ibdev->ib_dev.mmap = usnic_ib_mmap;
- us_ibdev->ib_dev.create_ah = usnic_ib_create_ah;
- us_ibdev->ib_dev.destroy_ah = usnic_ib_destroy_ah;
- us_ibdev->ib_dev.post_send = usnic_ib_post_send;
- us_ibdev->ib_dev.post_recv = usnic_ib_post_recv;
- us_ibdev->ib_dev.poll_cq = usnic_ib_poll_cq;
- us_ibdev->ib_dev.req_notify_cq = usnic_ib_req_notify_cq;
- us_ibdev->ib_dev.get_dma_mr = usnic_ib_get_dma_mr;
- us_ibdev->ib_dev.get_port_immutable = usnic_port_immutable;
- us_ibdev->ib_dev.get_dev_fw_str = usnic_get_dev_fw_str;
-
+ ib_set_device_ops(&us_ibdev->ib_dev, &usnic_dev_ops);
us_ibdev->ib_dev.driver_id = RDMA_DRIVER_USNIC;
rdma_set_device_sysfs_group(&us_ibdev->ib_dev, &usnic_attr_group);
@@ -649,7 +652,7 @@ static int __init usnic_ib_init(void)
err = usnic_uiom_init(DRV_NAME);
if (err) {
- usnic_err("Unable to initalize umem with err %d\n", err);
+ usnic_err("Unable to initialize umem with err %d\n", err);
return err;
}
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c
index bf5136533d49..0cdb156e165e 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c
@@ -681,7 +681,7 @@ usnic_ib_qp_grp_create(struct usnic_fwd_dev *ufdev, struct usnic_ib_vf *vf,
err = usnic_vnic_res_spec_satisfied(&min_transport_spec[transport],
res_spec);
if (err) {
- usnic_err("Spec does not meet miniumum req for transport %d\n",
+ usnic_err("Spec does not meet minimum req for transport %d\n",
transport);
log_spec(res_spec);
return ERR_PTR(err);
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
index 0b91ff36768a..1d4abef17e38 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
@@ -336,13 +336,16 @@ int usnic_ib_query_port(struct ib_device *ibdev, u8 port,
usnic_dbg("\n");
- mutex_lock(&us_ibdev->usdev_lock);
if (ib_get_eth_speed(ibdev, port, &props->active_speed,
- &props->active_width)) {
- mutex_unlock(&us_ibdev->usdev_lock);
+ &props->active_width))
return -EINVAL;
- }
+ /*
+ * usdev_lock is acquired after (and not before) ib_get_eth_speed call
+ * because acquiring rtnl_lock in ib_get_eth_speed, while holding
+ * usdev_lock could lead to a deadlock.
+ */
+ mutex_lock(&us_ibdev->usdev_lock);
/* props being zeroed by the caller, avoid zeroing it here */
props->lid = 0;
@@ -760,6 +763,7 @@ int usnic_ib_mmap(struct ib_ucontext *context,
/* In ib callbacks section - Start of stub funcs */
struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd,
struct rdma_ah_attr *ah_attr,
+ u32 flags,
struct ib_udata *udata)
{
@@ -767,7 +771,7 @@ struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd,
return ERR_PTR(-EPERM);
}
-int usnic_ib_destroy_ah(struct ib_ah *ah)
+int usnic_ib_destroy_ah(struct ib_ah *ah, u32 flags)
{
usnic_dbg("\n");
return -EINVAL;
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
index 2a2c9beb715f..e33144261b9a 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
@@ -77,9 +77,10 @@ int usnic_ib_mmap(struct ib_ucontext *context,
struct vm_area_struct *vma);
struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd,
struct rdma_ah_attr *ah_attr,
+ u32 flags,
struct ib_udata *udata);
-int usnic_ib_destroy_ah(struct ib_ah *ah);
+int usnic_ib_destroy_ah(struct ib_ah *ah, u32 flags);
int usnic_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
const struct ib_send_wr **bad_wr);
int usnic_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
index 398443f43dc3..eaa109dbc96a 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
@@ -161,6 +161,49 @@ static struct net_device *pvrdma_get_netdev(struct ib_device *ibdev,
return netdev;
}
+static const struct ib_device_ops pvrdma_dev_ops = {
+ .add_gid = pvrdma_add_gid,
+ .alloc_mr = pvrdma_alloc_mr,
+ .alloc_pd = pvrdma_alloc_pd,
+ .alloc_ucontext = pvrdma_alloc_ucontext,
+ .create_ah = pvrdma_create_ah,
+ .create_cq = pvrdma_create_cq,
+ .create_qp = pvrdma_create_qp,
+ .dealloc_pd = pvrdma_dealloc_pd,
+ .dealloc_ucontext = pvrdma_dealloc_ucontext,
+ .del_gid = pvrdma_del_gid,
+ .dereg_mr = pvrdma_dereg_mr,
+ .destroy_ah = pvrdma_destroy_ah,
+ .destroy_cq = pvrdma_destroy_cq,
+ .destroy_qp = pvrdma_destroy_qp,
+ .get_dev_fw_str = pvrdma_get_fw_ver_str,
+ .get_dma_mr = pvrdma_get_dma_mr,
+ .get_link_layer = pvrdma_port_link_layer,
+ .get_netdev = pvrdma_get_netdev,
+ .get_port_immutable = pvrdma_port_immutable,
+ .map_mr_sg = pvrdma_map_mr_sg,
+ .mmap = pvrdma_mmap,
+ .modify_port = pvrdma_modify_port,
+ .modify_qp = pvrdma_modify_qp,
+ .poll_cq = pvrdma_poll_cq,
+ .post_recv = pvrdma_post_recv,
+ .post_send = pvrdma_post_send,
+ .query_device = pvrdma_query_device,
+ .query_gid = pvrdma_query_gid,
+ .query_pkey = pvrdma_query_pkey,
+ .query_port = pvrdma_query_port,
+ .query_qp = pvrdma_query_qp,
+ .reg_user_mr = pvrdma_reg_user_mr,
+ .req_notify_cq = pvrdma_req_notify_cq,
+};
+
+static const struct ib_device_ops pvrdma_dev_srq_ops = {
+ .create_srq = pvrdma_create_srq,
+ .destroy_srq = pvrdma_destroy_srq,
+ .modify_srq = pvrdma_modify_srq,
+ .query_srq = pvrdma_query_srq,
+};
+
static int pvrdma_register_device(struct pvrdma_dev *dev)
{
int ret = -1;
@@ -197,39 +240,7 @@ static int pvrdma_register_device(struct pvrdma_dev *dev)
dev->ib_dev.node_type = RDMA_NODE_IB_CA;
dev->ib_dev.phys_port_cnt = dev->dsr->caps.phys_port_cnt;
- dev->ib_dev.query_device = pvrdma_query_device;
- dev->ib_dev.query_port = pvrdma_query_port;
- dev->ib_dev.query_gid = pvrdma_query_gid;
- dev->ib_dev.query_pkey = pvrdma_query_pkey;
- dev->ib_dev.modify_port = pvrdma_modify_port;
- dev->ib_dev.alloc_ucontext = pvrdma_alloc_ucontext;
- dev->ib_dev.dealloc_ucontext = pvrdma_dealloc_ucontext;
- dev->ib_dev.mmap = pvrdma_mmap;
- dev->ib_dev.alloc_pd = pvrdma_alloc_pd;
- dev->ib_dev.dealloc_pd = pvrdma_dealloc_pd;
- dev->ib_dev.create_ah = pvrdma_create_ah;
- dev->ib_dev.destroy_ah = pvrdma_destroy_ah;
- dev->ib_dev.create_qp = pvrdma_create_qp;
- dev->ib_dev.modify_qp = pvrdma_modify_qp;
- dev->ib_dev.query_qp = pvrdma_query_qp;
- dev->ib_dev.destroy_qp = pvrdma_destroy_qp;
- dev->ib_dev.post_send = pvrdma_post_send;
- dev->ib_dev.post_recv = pvrdma_post_recv;
- dev->ib_dev.create_cq = pvrdma_create_cq;
- dev->ib_dev.destroy_cq = pvrdma_destroy_cq;
- dev->ib_dev.poll_cq = pvrdma_poll_cq;
- dev->ib_dev.req_notify_cq = pvrdma_req_notify_cq;
- dev->ib_dev.get_dma_mr = pvrdma_get_dma_mr;
- dev->ib_dev.reg_user_mr = pvrdma_reg_user_mr;
- dev->ib_dev.dereg_mr = pvrdma_dereg_mr;
- dev->ib_dev.alloc_mr = pvrdma_alloc_mr;
- dev->ib_dev.map_mr_sg = pvrdma_map_mr_sg;
- dev->ib_dev.add_gid = pvrdma_add_gid;
- dev->ib_dev.del_gid = pvrdma_del_gid;
- dev->ib_dev.get_netdev = pvrdma_get_netdev;
- dev->ib_dev.get_port_immutable = pvrdma_port_immutable;
- dev->ib_dev.get_link_layer = pvrdma_port_link_layer;
- dev->ib_dev.get_dev_fw_str = pvrdma_get_fw_ver_str;
+ ib_set_device_ops(&dev->ib_dev, &pvrdma_dev_ops);
mutex_init(&dev->port_mutex);
spin_lock_init(&dev->desc_lock);
@@ -255,10 +266,7 @@ static int pvrdma_register_device(struct pvrdma_dev *dev)
(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) |
(1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV);
- dev->ib_dev.create_srq = pvrdma_create_srq;
- dev->ib_dev.modify_srq = pvrdma_modify_srq;
- dev->ib_dev.query_srq = pvrdma_query_srq;
- dev->ib_dev.destroy_srq = pvrdma_destroy_srq;
+ ib_set_device_ops(&dev->ib_dev, &pvrdma_dev_srq_ops);
dev->srq_tbl = kcalloc(dev->dsr->caps.max_srq,
sizeof(struct pvrdma_srq *),
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
index cf22f57a9f0d..3acf74cbe266 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
@@ -249,7 +249,7 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
init_completion(&qp->free);
qp->state = IB_QPS_RESET;
- qp->is_kernel = !(pd->uobject && udata);
+ qp->is_kernel = !udata;
if (!qp->is_kernel) {
dev_dbg(&dev->pdev->dev,
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c
index dc0ce877c7a3..06ba7c7a2235 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c
@@ -111,7 +111,7 @@ struct ib_srq *pvrdma_create_srq(struct ib_pd *pd,
unsigned long flags;
int ret;
- if (!(pd->uobject && udata)) {
+ if (!udata) {
/* No support for kernel clients. */
dev_warn(&dev->pdev->dev,
"no shared receive queue support for kernel client\n");
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
index b65d10b0a875..4d238d0e484b 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
@@ -533,11 +533,12 @@ int pvrdma_dealloc_pd(struct ib_pd *pd)
* @pd: the protection domain
* @ah_attr: the attributes of the AH
* @udata: user data blob
+ * @flags: create address handle flags (see enum rdma_create_ah_flags)
*
* @return: the ib_ah pointer on success, otherwise errno.
*/
struct ib_ah *pvrdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
- struct ib_udata *udata)
+ u32 flags, struct ib_udata *udata)
{
struct pvrdma_dev *dev = to_vdev(pd->device);
struct pvrdma_ah *ah;
@@ -555,7 +556,7 @@ struct ib_ah *pvrdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
if (!atomic_add_unless(&dev->num_ahs, 1, dev->dsr->caps.max_ah))
return ERR_PTR(-ENOMEM);
- ah = kzalloc(sizeof(*ah), GFP_KERNEL);
+ ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
if (!ah) {
atomic_dec(&dev->num_ahs);
return ERR_PTR(-ENOMEM);
@@ -581,10 +582,11 @@ struct ib_ah *pvrdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
/**
* pvrdma_destroy_ah - destroy an address handle
* @ah: the address handle to destroyed
+ * @flags: destroy address handle flags (see enum rdma_destroy_ah_flags)
*
* @return: 0 on success.
*/
-int pvrdma_destroy_ah(struct ib_ah *ah)
+int pvrdma_destroy_ah(struct ib_ah *ah, u32 flags)
{
struct pvrdma_dev *dev = to_vdev(ah->device);
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
index b2e3ab50cb08..f7f758d60110 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
@@ -420,8 +420,8 @@ int pvrdma_destroy_cq(struct ib_cq *cq);
int pvrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
int pvrdma_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags);
struct ib_ah *pvrdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
- struct ib_udata *udata);
-int pvrdma_destroy_ah(struct ib_ah *ah);
+ u32 flags, struct ib_udata *udata);
+int pvrdma_destroy_ah(struct ib_ah *ah, u32 flags);
struct ib_srq *pvrdma_create_srq(struct ib_pd *pd,
struct ib_srq_init_attr *init_attr,
diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c
index 084bb4baebb5..fc10e4e26ca7 100644
--- a/drivers/infiniband/sw/rdmavt/ah.c
+++ b/drivers/infiniband/sw/rdmavt/ah.c
@@ -91,6 +91,7 @@ EXPORT_SYMBOL(rvt_check_ah);
* rvt_create_ah - create an address handle
* @pd: the protection domain
* @ah_attr: the attributes of the AH
+ * @create_flags: create address handle flags (see enum rdma_create_ah_flags)
* @udata: pointer to user's input output buffer information.
*
* This may be called from interrupt context.
@@ -99,6 +100,7 @@ EXPORT_SYMBOL(rvt_check_ah);
*/
struct ib_ah *rvt_create_ah(struct ib_pd *pd,
struct rdma_ah_attr *ah_attr,
+ u32 create_flags,
struct ib_udata *udata)
{
struct rvt_ah *ah;
@@ -135,10 +137,11 @@ struct ib_ah *rvt_create_ah(struct ib_pd *pd,
/**
* rvt_destory_ah - Destory an address handle
* @ibah: address handle
+ * @destroy_flags: destroy address handle flags (see enum rdma_destroy_ah_flags)
*
* Return: 0 on success
*/
-int rvt_destroy_ah(struct ib_ah *ibah)
+int rvt_destroy_ah(struct ib_ah *ibah, u32 destroy_flags)
{
struct rvt_dev_info *dev = ib_to_rvt(ibah->device);
struct rvt_ah *ah = ibah_to_rvtah(ibah);
diff --git a/drivers/infiniband/sw/rdmavt/ah.h b/drivers/infiniband/sw/rdmavt/ah.h
index 25271b48a683..72431a618d5d 100644
--- a/drivers/infiniband/sw/rdmavt/ah.h
+++ b/drivers/infiniband/sw/rdmavt/ah.h
@@ -52,8 +52,9 @@
struct ib_ah *rvt_create_ah(struct ib_pd *pd,
struct rdma_ah_attr *ah_attr,
+ u32 create_flags,
struct ib_udata *udata);
-int rvt_destroy_ah(struct ib_ah *ibah);
+int rvt_destroy_ah(struct ib_ah *ibah, u32 destroy_flags);
int rvt_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
int rvt_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
diff --git a/drivers/infiniband/sw/rdmavt/mad.c b/drivers/infiniband/sw/rdmavt/mad.c
index d6981dc04adb..108c71e3ac23 100644
--- a/drivers/infiniband/sw/rdmavt/mad.c
+++ b/drivers/infiniband/sw/rdmavt/mad.c
@@ -160,7 +160,8 @@ void rvt_free_mad_agents(struct rvt_dev_info *rdi)
ib_unregister_mad_agent(agent);
}
if (rvp->sm_ah) {
- rdma_destroy_ah(&rvp->sm_ah->ibah);
+ rdma_destroy_ah(&rvp->sm_ah->ibah,
+ RDMA_DESTROY_AH_SLEEPABLE);
rvp->sm_ah = NULL;
}
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index 1735deb1a9d4..a1bd8cfc2c25 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2016, 2017 Intel Corporation.
+ * Copyright(c) 2016 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -1094,6 +1094,13 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
qp->ibqp.qp_num = err;
qp->port_num = init_attr->port_num;
rvt_init_qp(rdi, qp, init_attr->qp_type);
+ if (rdi->driver_f.qp_priv_init) {
+ err = rdi->driver_f.qp_priv_init(rdi, qp, init_attr);
+ if (err) {
+ ret = ERR_PTR(err);
+ goto bail_rq_wq;
+ }
+ }
break;
default:
diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c
index 723d3daf2eba..aef3aa3fe667 100644
--- a/drivers/infiniband/sw/rdmavt/vt.c
+++ b/drivers/infiniband/sw/rdmavt/vt.c
@@ -392,16 +392,51 @@ enum {
_VERB_IDX_MAX /* Must always be last! */
};
-static inline int check_driver_override(struct rvt_dev_info *rdi,
- size_t offset, void *func)
-{
- if (!*(void **)((void *)&rdi->ibdev + offset)) {
- *(void **)((void *)&rdi->ibdev + offset) = func;
- return 0;
- }
-
- return 1;
-}
+static const struct ib_device_ops rvt_dev_ops = {
+ .alloc_fmr = rvt_alloc_fmr,
+ .alloc_mr = rvt_alloc_mr,
+ .alloc_pd = rvt_alloc_pd,
+ .alloc_ucontext = rvt_alloc_ucontext,
+ .attach_mcast = rvt_attach_mcast,
+ .create_ah = rvt_create_ah,
+ .create_cq = rvt_create_cq,
+ .create_qp = rvt_create_qp,
+ .create_srq = rvt_create_srq,
+ .dealloc_fmr = rvt_dealloc_fmr,
+ .dealloc_pd = rvt_dealloc_pd,
+ .dealloc_ucontext = rvt_dealloc_ucontext,
+ .dereg_mr = rvt_dereg_mr,
+ .destroy_ah = rvt_destroy_ah,
+ .destroy_cq = rvt_destroy_cq,
+ .destroy_qp = rvt_destroy_qp,
+ .destroy_srq = rvt_destroy_srq,
+ .detach_mcast = rvt_detach_mcast,
+ .get_dma_mr = rvt_get_dma_mr,
+ .get_port_immutable = rvt_get_port_immutable,
+ .map_mr_sg = rvt_map_mr_sg,
+ .map_phys_fmr = rvt_map_phys_fmr,
+ .mmap = rvt_mmap,
+ .modify_ah = rvt_modify_ah,
+ .modify_device = rvt_modify_device,
+ .modify_port = rvt_modify_port,
+ .modify_qp = rvt_modify_qp,
+ .modify_srq = rvt_modify_srq,
+ .poll_cq = rvt_poll_cq,
+ .post_recv = rvt_post_recv,
+ .post_send = rvt_post_send,
+ .post_srq_recv = rvt_post_srq_recv,
+ .query_ah = rvt_query_ah,
+ .query_device = rvt_query_device,
+ .query_gid = rvt_query_gid,
+ .query_pkey = rvt_query_pkey,
+ .query_port = rvt_query_port,
+ .query_qp = rvt_query_qp,
+ .query_srq = rvt_query_srq,
+ .reg_user_mr = rvt_reg_user_mr,
+ .req_notify_cq = rvt_req_notify_cq,
+ .resize_cq = rvt_resize_cq,
+ .unmap_fmr = rvt_unmap_fmr,
+};
static noinline int check_support(struct rvt_dev_info *rdi, int verb)
{
@@ -416,76 +451,36 @@ static noinline int check_support(struct rvt_dev_info *rdi, int verb)
return -EINVAL;
break;
- case QUERY_DEVICE:
- check_driver_override(rdi, offsetof(struct ib_device,
- query_device),
- rvt_query_device);
- break;
-
case MODIFY_DEVICE:
/*
* rdmavt does not support modify device currently drivers must
* provide.
*/
- if (!check_driver_override(rdi, offsetof(struct ib_device,
- modify_device),
- rvt_modify_device))
+ if (!rdi->ibdev.ops.modify_device)
return -EOPNOTSUPP;
break;
case QUERY_PORT:
- if (!check_driver_override(rdi, offsetof(struct ib_device,
- query_port),
- rvt_query_port))
+ if (!rdi->ibdev.ops.query_port)
if (!rdi->driver_f.query_port_state)
return -EINVAL;
break;
case MODIFY_PORT:
- if (!check_driver_override(rdi, offsetof(struct ib_device,
- modify_port),
- rvt_modify_port))
+ if (!rdi->ibdev.ops.modify_port)
if (!rdi->driver_f.cap_mask_chg ||
!rdi->driver_f.shut_down_port)
return -EINVAL;
break;
- case QUERY_PKEY:
- check_driver_override(rdi, offsetof(struct ib_device,
- query_pkey),
- rvt_query_pkey);
- break;
-
case QUERY_GID:
- if (!check_driver_override(rdi, offsetof(struct ib_device,
- query_gid),
- rvt_query_gid))
+ if (!rdi->ibdev.ops.query_gid)
if (!rdi->driver_f.get_guid_be)
return -EINVAL;
break;
- case ALLOC_UCONTEXT:
- check_driver_override(rdi, offsetof(struct ib_device,
- alloc_ucontext),
- rvt_alloc_ucontext);
- break;
-
- case DEALLOC_UCONTEXT:
- check_driver_override(rdi, offsetof(struct ib_device,
- dealloc_ucontext),
- rvt_dealloc_ucontext);
- break;
-
- case GET_PORT_IMMUTABLE:
- check_driver_override(rdi, offsetof(struct ib_device,
- get_port_immutable),
- rvt_get_port_immutable);
- break;
-
case CREATE_QP:
- if (!check_driver_override(rdi, offsetof(struct ib_device,
- create_qp),
- rvt_create_qp))
+ if (!rdi->ibdev.ops.create_qp)
if (!rdi->driver_f.qp_priv_alloc ||
!rdi->driver_f.qp_priv_free ||
!rdi->driver_f.notify_qp_reset ||
@@ -496,9 +491,7 @@ static noinline int check_support(struct rvt_dev_info *rdi, int verb)
break;
case MODIFY_QP:
- if (!check_driver_override(rdi, offsetof(struct ib_device,
- modify_qp),
- rvt_modify_qp))
+ if (!rdi->ibdev.ops.modify_qp)
if (!rdi->driver_f.notify_qp_reset ||
!rdi->driver_f.schedule_send ||
!rdi->driver_f.get_pmtu_from_attr ||
@@ -512,9 +505,7 @@ static noinline int check_support(struct rvt_dev_info *rdi, int verb)
break;
case DESTROY_QP:
- if (!check_driver_override(rdi, offsetof(struct ib_device,
- destroy_qp),
- rvt_destroy_qp))
+ if (!rdi->ibdev.ops.destroy_qp)
if (!rdi->driver_f.qp_priv_free ||
!rdi->driver_f.notify_qp_reset ||
!rdi->driver_f.flush_qp_waiters ||
@@ -523,197 +514,14 @@ static noinline int check_support(struct rvt_dev_info *rdi, int verb)
return -EINVAL;
break;
- case QUERY_QP:
- check_driver_override(rdi, offsetof(struct ib_device,
- query_qp),
- rvt_query_qp);
- break;
-
case POST_SEND:
- if (!check_driver_override(rdi, offsetof(struct ib_device,
- post_send),
- rvt_post_send))
+ if (!rdi->ibdev.ops.post_send)
if (!rdi->driver_f.schedule_send ||
!rdi->driver_f.do_send ||
!rdi->post_parms)
return -EINVAL;
break;
- case POST_RECV:
- check_driver_override(rdi, offsetof(struct ib_device,
- post_recv),
- rvt_post_recv);
- break;
- case POST_SRQ_RECV:
- check_driver_override(rdi, offsetof(struct ib_device,
- post_srq_recv),
- rvt_post_srq_recv);
- break;
-
- case CREATE_AH:
- check_driver_override(rdi, offsetof(struct ib_device,
- create_ah),
- rvt_create_ah);
- break;
-
- case DESTROY_AH:
- check_driver_override(rdi, offsetof(struct ib_device,
- destroy_ah),
- rvt_destroy_ah);
- break;
-
- case MODIFY_AH:
- check_driver_override(rdi, offsetof(struct ib_device,
- modify_ah),
- rvt_modify_ah);
- break;
-
- case QUERY_AH:
- check_driver_override(rdi, offsetof(struct ib_device,
- query_ah),
- rvt_query_ah);
- break;
-
- case CREATE_SRQ:
- check_driver_override(rdi, offsetof(struct ib_device,
- create_srq),
- rvt_create_srq);
- break;
-
- case MODIFY_SRQ:
- check_driver_override(rdi, offsetof(struct ib_device,
- modify_srq),
- rvt_modify_srq);
- break;
-
- case DESTROY_SRQ:
- check_driver_override(rdi, offsetof(struct ib_device,
- destroy_srq),
- rvt_destroy_srq);
- break;
-
- case QUERY_SRQ:
- check_driver_override(rdi, offsetof(struct ib_device,
- query_srq),
- rvt_query_srq);
- break;
-
- case ATTACH_MCAST:
- check_driver_override(rdi, offsetof(struct ib_device,
- attach_mcast),
- rvt_attach_mcast);
- break;
-
- case DETACH_MCAST:
- check_driver_override(rdi, offsetof(struct ib_device,
- detach_mcast),
- rvt_detach_mcast);
- break;
-
- case GET_DMA_MR:
- check_driver_override(rdi, offsetof(struct ib_device,
- get_dma_mr),
- rvt_get_dma_mr);
- break;
-
- case REG_USER_MR:
- check_driver_override(rdi, offsetof(struct ib_device,
- reg_user_mr),
- rvt_reg_user_mr);
- break;
-
- case DEREG_MR:
- check_driver_override(rdi, offsetof(struct ib_device,
- dereg_mr),
- rvt_dereg_mr);
- break;
-
- case ALLOC_FMR:
- check_driver_override(rdi, offsetof(struct ib_device,
- alloc_fmr),
- rvt_alloc_fmr);
- break;
-
- case ALLOC_MR:
- check_driver_override(rdi, offsetof(struct ib_device,
- alloc_mr),
- rvt_alloc_mr);
- break;
-
- case MAP_MR_SG:
- check_driver_override(rdi, offsetof(struct ib_device,
- map_mr_sg),
- rvt_map_mr_sg);
- break;
-
- case MAP_PHYS_FMR:
- check_driver_override(rdi, offsetof(struct ib_device,
- map_phys_fmr),
- rvt_map_phys_fmr);
- break;
-
- case UNMAP_FMR:
- check_driver_override(rdi, offsetof(struct ib_device,
- unmap_fmr),
- rvt_unmap_fmr);
- break;
-
- case DEALLOC_FMR:
- check_driver_override(rdi, offsetof(struct ib_device,
- dealloc_fmr),
- rvt_dealloc_fmr);
- break;
-
- case MMAP:
- check_driver_override(rdi, offsetof(struct ib_device,
- mmap),
- rvt_mmap);
- break;
-
- case CREATE_CQ:
- check_driver_override(rdi, offsetof(struct ib_device,
- create_cq),
- rvt_create_cq);
- break;
-
- case DESTROY_CQ:
- check_driver_override(rdi, offsetof(struct ib_device,
- destroy_cq),
- rvt_destroy_cq);
- break;
-
- case POLL_CQ:
- check_driver_override(rdi, offsetof(struct ib_device,
- poll_cq),
- rvt_poll_cq);
- break;
-
- case REQ_NOTFIY_CQ:
- check_driver_override(rdi, offsetof(struct ib_device,
- req_notify_cq),
- rvt_req_notify_cq);
- break;
-
- case RESIZE_CQ:
- check_driver_override(rdi, offsetof(struct ib_device,
- resize_cq),
- rvt_resize_cq);
- break;
-
- case ALLOC_PD:
- check_driver_override(rdi, offsetof(struct ib_device,
- alloc_pd),
- rvt_alloc_pd);
- break;
-
- case DEALLOC_PD:
- check_driver_override(rdi, offsetof(struct ib_device,
- dealloc_pd),
- rvt_dealloc_pd);
- break;
-
- default:
- return -EINVAL;
}
return 0;
@@ -745,6 +553,7 @@ int rvt_register_device(struct rvt_dev_info *rdi, u32 driver_id)
return -EINVAL;
}
+ ib_set_device_ops(&rdi->ibdev, &rvt_dev_ops);
/* Once we get past here we can use rvt_pr macros and tracepoints */
trace_rvt_dbg(rdi, "Driver attempting registration");
diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h
index d9ec2de68738..5bde2ad964d2 100644
--- a/drivers/infiniband/sw/rxe/rxe.h
+++ b/drivers/infiniband/sw/rxe/rxe.h
@@ -65,8 +65,9 @@
*/
#define RXE_UVERBS_ABI_VERSION 2
-#define IB_PHYS_STATE_LINK_UP (5)
-#define IB_PHYS_STATE_LINK_DOWN (3)
+#define RDMA_LINK_PHYS_STATE_LINK_UP (5)
+#define RDMA_LINK_PHYS_STATE_DISABLED (3)
+#define RDMA_LINK_PHYS_STATE_POLLING (2)
#define RXE_ROCE_V2_SPORT (0xc000)
@@ -109,5 +110,6 @@ struct rxe_dev *get_rxe_by_name(const char *name);
void rxe_port_up(struct rxe_dev *rxe);
void rxe_port_down(struct rxe_dev *rxe);
+void rxe_set_port_state(struct rxe_dev *rxe);
#endif /* RXE_H */
diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index ea089cb091ad..e996da67a851 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -439,6 +439,7 @@ static void make_send_cqe(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
*/
static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
{
+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
struct rxe_cqe cqe;
if ((qp->sq_sig_type == IB_SIGNAL_ALL_WR) ||
@@ -451,6 +452,11 @@ static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
advance_consumer(qp->sq.queue);
}
+ if (wqe->wr.opcode == IB_WR_SEND ||
+ wqe->wr.opcode == IB_WR_SEND_WITH_IMM ||
+ wqe->wr.opcode == IB_WR_SEND_WITH_INV)
+ rxe_counter_inc(rxe, RXE_CNT_RDMA_SEND);
+
/*
* we completed something so let req run again
* if it is trying to fence
diff --git a/drivers/infiniband/sw/rxe/rxe_hw_counters.c b/drivers/infiniband/sw/rxe/rxe_hw_counters.c
index 6aeb7a165e46..636edb5f4cf4 100644
--- a/drivers/infiniband/sw/rxe/rxe_hw_counters.c
+++ b/drivers/infiniband/sw/rxe/rxe_hw_counters.c
@@ -37,15 +37,18 @@ static const char * const rxe_counter_name[] = {
[RXE_CNT_SENT_PKTS] = "sent_pkts",
[RXE_CNT_RCVD_PKTS] = "rcvd_pkts",
[RXE_CNT_DUP_REQ] = "duplicate_request",
- [RXE_CNT_OUT_OF_SEQ_REQ] = "out_of_sequence",
+ [RXE_CNT_OUT_OF_SEQ_REQ] = "out_of_seq_request",
[RXE_CNT_RCV_RNR] = "rcvd_rnr_err",
[RXE_CNT_SND_RNR] = "send_rnr_err",
[RXE_CNT_RCV_SEQ_ERR] = "rcvd_seq_err",
- [RXE_CNT_COMPLETER_SCHED] = "ack_deffered",
+ [RXE_CNT_COMPLETER_SCHED] = "ack_deferred",
[RXE_CNT_RETRY_EXCEEDED] = "retry_exceeded_err",
[RXE_CNT_RNR_RETRY_EXCEEDED] = "retry_rnr_exceeded_err",
[RXE_CNT_COMP_RETRY] = "completer_retry_err",
[RXE_CNT_SEND_ERR] = "send_err",
+ [RXE_CNT_LINK_DOWNED] = "link_downed",
+ [RXE_CNT_RDMA_SEND] = "rdma_sends",
+ [RXE_CNT_RDMA_RECV] = "rdma_recvs",
};
int rxe_ib_get_hw_stats(struct ib_device *ibdev,
@@ -59,7 +62,7 @@ int rxe_ib_get_hw_stats(struct ib_device *ibdev,
return -EINVAL;
for (cnt = 0; cnt < ARRAY_SIZE(rxe_counter_name); cnt++)
- stats->value[cnt] = dev->stats_counters[cnt];
+ stats->value[cnt] = atomic64_read(&dev->stats_counters[cnt]);
return ARRAY_SIZE(rxe_counter_name);
}
diff --git a/drivers/infiniband/sw/rxe/rxe_hw_counters.h b/drivers/infiniband/sw/rxe/rxe_hw_counters.h
index f44df1b76742..72c0d63c79e0 100644
--- a/drivers/infiniband/sw/rxe/rxe_hw_counters.h
+++ b/drivers/infiniband/sw/rxe/rxe_hw_counters.h
@@ -50,6 +50,9 @@ enum rxe_counters {
RXE_CNT_RNR_RETRY_EXCEEDED,
RXE_CNT_COMP_RETRY,
RXE_CNT_SEND_ERR,
+ RXE_CNT_LINK_DOWNED,
+ RXE_CNT_RDMA_SEND,
+ RXE_CNT_RDMA_RECV,
RXE_NUM_OF_COUNTERS
};
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index afd53f57a62b..01b74597b36a 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -157,7 +157,7 @@ int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init);
int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd,
struct ib_qp_init_attr *init,
struct rxe_create_qp_resp __user *uresp,
- struct ib_pd *ibpd);
+ struct ib_pd *ibpd, struct ib_udata *udata);
int rxe_qp_to_init(struct rxe_qp *qp, struct ib_qp_init_attr *init);
@@ -250,11 +250,12 @@ static inline unsigned int wr_opcode_mask(int opcode, struct rxe_qp *qp)
return rxe_wr_opcode_info[opcode].mask[qp->ibqp.qp_type];
}
-static inline int rxe_xmit_packet(struct rxe_dev *rxe, struct rxe_qp *qp,
- struct rxe_pkt_info *pkt, struct sk_buff *skb)
+static inline int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
+ struct sk_buff *skb)
{
int err;
int is_request = pkt->mask & RXE_REQ_MASK;
+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
if ((is_request && (qp->req.state != QP_STATE_READY)) ||
(!is_request && (qp->resp.state != QP_STATE_READY))) {
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index 40e82e0f6c2d..8fd03ae20efc 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -607,7 +607,6 @@ void rxe_port_up(struct rxe_dev *rxe)
port = &rxe->port;
port->attr.state = IB_PORT_ACTIVE;
- port->attr.phys_state = IB_PHYS_STATE_LINK_UP;
rxe_port_event(rxe, IB_EVENT_PORT_ACTIVE);
dev_info(&rxe->ib_dev.dev, "set active\n");
@@ -620,12 +619,20 @@ void rxe_port_down(struct rxe_dev *rxe)
port = &rxe->port;
port->attr.state = IB_PORT_DOWN;
- port->attr.phys_state = IB_PHYS_STATE_LINK_DOWN;
rxe_port_event(rxe, IB_EVENT_PORT_ERR);
+ rxe_counter_inc(rxe, RXE_CNT_LINK_DOWNED);
dev_info(&rxe->ib_dev.dev, "set down\n");
}
+void rxe_set_port_state(struct rxe_dev *rxe)
+{
+ if (netif_running(rxe->ndev) && netif_carrier_ok(rxe->ndev))
+ rxe_port_up(rxe);
+ else
+ rxe_port_down(rxe);
+}
+
static int rxe_notify(struct notifier_block *not_blk,
unsigned long event,
void *arg)
@@ -652,10 +659,7 @@ static int rxe_notify(struct notifier_block *not_blk,
rxe_set_mtu(rxe, ndev->mtu);
break;
case NETDEV_CHANGE:
- if (netif_running(ndev) && netif_carrier_ok(ndev))
- rxe_port_up(rxe);
- else
- rxe_port_down(rxe);
+ rxe_set_port_state(rxe);
break;
case NETDEV_REBOOT:
case NETDEV_GOING_DOWN:
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c
index 36b53fb94a49..b5c91df22047 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.c
+++ b/drivers/infiniband/sw/rxe/rxe_pool.c
@@ -112,6 +112,18 @@ static inline struct kmem_cache *pool_cache(struct rxe_pool *pool)
return rxe_type_info[pool->type].cache;
}
+static void rxe_cache_clean(size_t cnt)
+{
+ int i;
+ struct rxe_type_info *type;
+
+ for (i = 0; i < cnt; i++) {
+ type = &rxe_type_info[i];
+ kmem_cache_destroy(type->cache);
+ type->cache = NULL;
+ }
+}
+
int rxe_cache_init(void)
{
int err;
@@ -136,24 +148,14 @@ int rxe_cache_init(void)
return 0;
err1:
- while (--i >= 0) {
- kmem_cache_destroy(type->cache);
- type->cache = NULL;
- }
+ rxe_cache_clean(i);
return err;
}
void rxe_cache_exit(void)
{
- int i;
- struct rxe_type_info *type;
-
- for (i = 0; i < RXE_NUM_TYPES; i++) {
- type = &rxe_type_info[i];
- kmem_cache_destroy(type->cache);
- type->cache = NULL;
- }
+ rxe_cache_clean(RXE_NUM_TYPES);
}
static int rxe_pool_init_index(struct rxe_pool *pool, u32 max, u32 min)
@@ -241,7 +243,7 @@ static void rxe_pool_put(struct rxe_pool *pool)
kref_put(&pool->ref_cnt, rxe_pool_release);
}
-int rxe_pool_cleanup(struct rxe_pool *pool)
+void rxe_pool_cleanup(struct rxe_pool *pool)
{
unsigned long flags;
@@ -253,8 +255,6 @@ int rxe_pool_cleanup(struct rxe_pool *pool)
write_unlock_irqrestore(&pool->pool_lock, flags);
rxe_pool_put(pool);
-
- return 0;
}
static u32 alloc_index(struct rxe_pool *pool)
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.h b/drivers/infiniband/sw/rxe/rxe_pool.h
index aa4ba307097b..72968c29e01f 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.h
+++ b/drivers/infiniband/sw/rxe/rxe_pool.h
@@ -126,7 +126,7 @@ int rxe_pool_init(struct rxe_dev *rxe, struct rxe_pool *pool,
enum rxe_elem_type type, u32 max_elem);
/* free resources from object pool */
-int rxe_pool_cleanup(struct rxe_pool *pool);
+void rxe_pool_cleanup(struct rxe_pool *pool);
/* allocate an object from pool */
void *rxe_alloc(struct rxe_pool *pool);
diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index b9710907dac2..fd86fd2fbb26 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -97,7 +97,7 @@ int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init)
goto err1;
if (init->qp_type == IB_QPT_SMI || init->qp_type == IB_QPT_GSI) {
- if (port_num != 1) {
+ if (!rdma_is_port_valid(&rxe->ib_dev, port_num)) {
pr_warn("invalid port = %d\n", port_num);
goto err1;
}
@@ -336,13 +336,14 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp,
int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd,
struct ib_qp_init_attr *init,
struct rxe_create_qp_resp __user *uresp,
- struct ib_pd *ibpd)
+ struct ib_pd *ibpd,
+ struct ib_udata *udata)
{
int err;
struct rxe_cq *rcq = to_rcq(init->recv_cq);
struct rxe_cq *scq = to_rcq(init->send_cq);
struct rxe_srq *srq = init->srq ? to_rsrq(init->srq) : NULL;
- struct ib_ucontext *context = ibpd->uobject ? ibpd->uobject->context : NULL;
+ struct ib_ucontext *context = udata ? ibpd->uobject->context : NULL;
rxe_add_ref(pd);
rxe_add_ref(rcq);
@@ -433,7 +434,7 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp,
}
if (mask & IB_QP_PORT) {
- if (attr->port_num != 1) {
+ if (!rdma_is_port_valid(&rxe->ib_dev, attr->port_num)) {
pr_warn("invalid port %d\n", attr->port_num);
goto err1;
}
@@ -448,7 +449,7 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp,
if (mask & IB_QP_ALT_PATH) {
if (rxe_av_chk_attr(rxe, &attr->alt_ah_attr))
goto err1;
- if (attr->alt_port_num != 1) {
+ if (!rdma_is_port_valid(&rxe->ib_dev, attr->alt_port_num)) {
pr_warn("invalid alt port %d\n", attr->alt_port_num);
goto err1;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index 6c361d70d7cd..c5d9b558fa90 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -643,6 +643,7 @@ next_wqe:
rmr->access = wqe->wr.wr.reg.access;
rmr->lkey = wqe->wr.wr.reg.key;
rmr->rkey = wqe->wr.wr.reg.key;
+ rmr->iova = wqe->wr.wr.reg.mr->iova;
wqe->state = wqe_state_done;
wqe->status = IB_WC_SUCCESS;
} else {
@@ -728,7 +729,7 @@ next_wqe:
save_state(wqe, qp, &rollback_wqe, &rollback_psn);
update_wqe_state(qp, wqe, &pkt);
update_wqe_psn(qp, wqe, &pkt, payload);
- ret = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp, &pkt, skb);
+ ret = rxe_xmit_packet(qp, &pkt, skb);
if (ret) {
qp->need_req_skb = 1;
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index c962160292f4..231528188250 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -124,12 +124,9 @@ static inline enum resp_states get_req(struct rxe_qp *qp,
struct sk_buff *skb;
if (qp->resp.state == QP_STATE_ERROR) {
- skb = skb_dequeue(&qp->req_pkts);
- if (skb) {
- /* drain request packet queue */
+ while ((skb = skb_dequeue(&qp->req_pkts))) {
rxe_drop_ref(qp);
kfree_skb(skb);
- return RESPST_GET_REQ;
}
/* go drain recv wr queue */
@@ -660,7 +657,6 @@ static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp,
static enum resp_states read_reply(struct rxe_qp *qp,
struct rxe_pkt_info *req_pkt)
{
- struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
struct rxe_pkt_info ack_pkt;
struct sk_buff *skb;
int mtu = qp->mtu;
@@ -739,7 +735,7 @@ static enum resp_states read_reply(struct rxe_qp *qp,
p = payload_addr(&ack_pkt) + payload + bth_pad(&ack_pkt);
*p = ~icrc;
- err = rxe_xmit_packet(rxe, qp, &ack_pkt, skb);
+ err = rxe_xmit_packet(qp, &ack_pkt, skb);
if (err) {
pr_err("Failed sending RDMA reply.\n");
return RESPST_ERR_RNR;
@@ -838,18 +834,25 @@ static enum resp_states do_complete(struct rxe_qp *qp,
struct ib_wc *wc = &cqe.ibwc;
struct ib_uverbs_wc *uwc = &cqe.uibwc;
struct rxe_recv_wqe *wqe = qp->resp.wqe;
+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
if (unlikely(!wqe))
return RESPST_CLEANUP;
memset(&cqe, 0, sizeof(cqe));
- wc->wr_id = wqe->wr_id;
- wc->status = qp->resp.status;
- wc->qp = &qp->ibqp;
+ if (qp->rcq->is_user) {
+ uwc->status = qp->resp.status;
+ uwc->qp_num = qp->ibqp.qp_num;
+ uwc->wr_id = wqe->wr_id;
+ } else {
+ wc->status = qp->resp.status;
+ wc->qp = &qp->ibqp;
+ wc->wr_id = wqe->wr_id;
+ }
- /* fields after status are not required for errors */
if (wc->status == IB_WC_SUCCESS) {
+ rxe_counter_inc(rxe, RXE_CNT_RDMA_RECV);
wc->opcode = (pkt->mask & RXE_IMMDT_MASK &&
pkt->mask & RXE_WRITE_MASK) ?
IB_WC_RECV_RDMA_WITH_IMM : IB_WC_RECV;
@@ -898,7 +901,6 @@ static enum resp_states do_complete(struct rxe_qp *qp,
}
if (pkt->mask & RXE_IETH_MASK) {
- struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
struct rxe_mem *rmr;
wc->wc_flags |= IB_WC_WITH_INVALIDATE;
@@ -950,7 +952,6 @@ static int send_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
int err = 0;
struct rxe_pkt_info ack_pkt;
struct sk_buff *skb;
- struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
skb = prepare_ack_packet(qp, pkt, &ack_pkt, IB_OPCODE_RC_ACKNOWLEDGE,
0, psn, syndrome, NULL);
@@ -959,7 +960,7 @@ static int send_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
goto err1;
}
- err = rxe_xmit_packet(rxe, qp, &ack_pkt, skb);
+ err = rxe_xmit_packet(qp, &ack_pkt, skb);
if (err)
pr_err_ratelimited("Failed sending ack\n");
@@ -973,7 +974,6 @@ static int send_atomic_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
int rc = 0;
struct rxe_pkt_info ack_pkt;
struct sk_buff *skb;
- struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
struct resp_res *res;
skb = prepare_ack_packet(qp, pkt, &ack_pkt,
@@ -1001,7 +1001,7 @@ static int send_atomic_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
res->last_psn = ack_pkt.psn;
res->cur_psn = ack_pkt.psn;
- rc = rxe_xmit_packet(rxe, qp, &ack_pkt, skb);
+ rc = rxe_xmit_packet(qp, &ack_pkt, skb);
if (rc) {
pr_err_ratelimited("Failed sending ack\n");
rxe_drop_ref(qp);
@@ -1131,8 +1131,7 @@ static enum resp_states duplicate_request(struct rxe_qp *qp,
if (res) {
skb_get(res->atomic.skb);
/* Resend the result. */
- rc = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp,
- pkt, res->atomic.skb);
+ rc = rxe_xmit_packet(qp, pkt, res->atomic.skb);
if (rc) {
pr_err("Failed resending result. This flow is not handled - skb ignored\n");
rc = RESPST_CLEANUP;
diff --git a/drivers/infiniband/sw/rxe/rxe_sysfs.c b/drivers/infiniband/sw/rxe/rxe_sysfs.c
index 73a19f808e1b..95a15892f7e6 100644
--- a/drivers/infiniband/sw/rxe/rxe_sysfs.c
+++ b/drivers/infiniband/sw/rxe/rxe_sysfs.c
@@ -53,22 +53,6 @@ static int sanitize_arg(const char *val, char *intf, int intf_len)
return len;
}
-static void rxe_set_port_state(struct net_device *ndev)
-{
- struct rxe_dev *rxe = net_to_rxe(ndev);
- bool is_up = netif_running(ndev) && netif_carrier_ok(ndev);
-
- if (!rxe)
- goto out;
-
- if (is_up)
- rxe_port_up(rxe);
- else
- rxe_port_down(rxe); /* down for unknown state */
-out:
- return;
-}
-
static int rxe_param_set_add(const char *val, const struct kernel_param *kp)
{
int len;
@@ -104,7 +88,7 @@ static int rxe_param_set_add(const char *val, const struct kernel_param *kp)
goto err;
}
- rxe_set_port_state(ndev);
+ rxe_set_port_state(rxe);
dev_info(&rxe->ib_dev.dev, "added %s\n", intf);
err:
if (ndev)
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 9c19f2027511..b20e6e0415f5 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -56,12 +56,7 @@ static int rxe_query_port(struct ib_device *dev,
{
struct rxe_dev *rxe = to_rdev(dev);
struct rxe_port *port;
- int rc = -EINVAL;
-
- if (unlikely(port_num != 1)) {
- pr_warn("invalid port_number %d\n", port_num);
- goto out;
- }
+ int rc;
port = &rxe->port;
@@ -71,9 +66,16 @@ static int rxe_query_port(struct ib_device *dev,
mutex_lock(&rxe->usdev_lock);
rc = ib_get_eth_speed(dev, port_num, &attr->active_speed,
&attr->active_width);
+
+ if (attr->state == IB_PORT_ACTIVE)
+ attr->phys_state = RDMA_LINK_PHYS_STATE_LINK_UP;
+ else if (dev_get_flags(rxe->ndev) & IFF_UP)
+ attr->phys_state = RDMA_LINK_PHYS_STATE_POLLING;
+ else
+ attr->phys_state = RDMA_LINK_PHYS_STATE_DISABLED;
+
mutex_unlock(&rxe->usdev_lock);
-out:
return rc;
}
@@ -96,12 +98,6 @@ static int rxe_query_pkey(struct ib_device *device,
struct rxe_dev *rxe = to_rdev(device);
struct rxe_port *port;
- if (unlikely(port_num != 1)) {
- dev_warn(device->dev.parent, "invalid port_num = %d\n",
- port_num);
- goto err1;
- }
-
port = &rxe->port;
if (unlikely(index >= port->attr.pkey_tbl_len)) {
@@ -139,11 +135,6 @@ static int rxe_modify_port(struct ib_device *dev,
struct rxe_dev *rxe = to_rdev(dev);
struct rxe_port *port;
- if (unlikely(port_num != 1)) {
- pr_warn("invalid port_num = %d\n", port_num);
- goto err1;
- }
-
port = &rxe->port;
port->attr.port_cap_flags |= attr->set_port_cap_mask;
@@ -153,9 +144,6 @@ static int rxe_modify_port(struct ib_device *dev,
port->attr.qkey_viol_cntr = 0;
return 0;
-
-err1:
- return -EINVAL;
}
static enum rdma_link_layer rxe_get_link_layer(struct ib_device *dev,
@@ -231,6 +219,7 @@ static void rxe_init_av(struct rxe_dev *rxe, struct rdma_ah_attr *attr,
static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd,
struct rdma_ah_attr *attr,
+ u32 flags,
struct ib_udata *udata)
{
@@ -278,7 +267,7 @@ static int rxe_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr)
return 0;
}
-static int rxe_destroy_ah(struct ib_ah *ibah)
+static int rxe_destroy_ah(struct ib_ah *ibah, u32 flags)
{
struct rxe_ah *ah = to_rah(ibah);
@@ -498,7 +487,7 @@ static struct ib_qp *rxe_create_qp(struct ib_pd *ibpd,
rxe_add_index(qp);
- err = rxe_qp_from_init(rxe, qp, pd, init, uresp, ibpd);
+ err = rxe_qp_from_init(rxe, qp, pd, init, uresp, ibpd, udata);
if (err)
goto err3;
@@ -1157,6 +1146,52 @@ static const struct attribute_group rxe_attr_group = {
.attrs = rxe_dev_attributes,
};
+static const struct ib_device_ops rxe_dev_ops = {
+ .alloc_hw_stats = rxe_ib_alloc_hw_stats,
+ .alloc_mr = rxe_alloc_mr,
+ .alloc_pd = rxe_alloc_pd,
+ .alloc_ucontext = rxe_alloc_ucontext,
+ .attach_mcast = rxe_attach_mcast,
+ .create_ah = rxe_create_ah,
+ .create_cq = rxe_create_cq,
+ .create_qp = rxe_create_qp,
+ .create_srq = rxe_create_srq,
+ .dealloc_pd = rxe_dealloc_pd,
+ .dealloc_ucontext = rxe_dealloc_ucontext,
+ .dereg_mr = rxe_dereg_mr,
+ .destroy_ah = rxe_destroy_ah,
+ .destroy_cq = rxe_destroy_cq,
+ .destroy_qp = rxe_destroy_qp,
+ .destroy_srq = rxe_destroy_srq,
+ .detach_mcast = rxe_detach_mcast,
+ .get_dma_mr = rxe_get_dma_mr,
+ .get_hw_stats = rxe_ib_get_hw_stats,
+ .get_link_layer = rxe_get_link_layer,
+ .get_netdev = rxe_get_netdev,
+ .get_port_immutable = rxe_port_immutable,
+ .map_mr_sg = rxe_map_mr_sg,
+ .mmap = rxe_mmap,
+ .modify_ah = rxe_modify_ah,
+ .modify_device = rxe_modify_device,
+ .modify_port = rxe_modify_port,
+ .modify_qp = rxe_modify_qp,
+ .modify_srq = rxe_modify_srq,
+ .peek_cq = rxe_peek_cq,
+ .poll_cq = rxe_poll_cq,
+ .post_recv = rxe_post_recv,
+ .post_send = rxe_post_send,
+ .post_srq_recv = rxe_post_srq_recv,
+ .query_ah = rxe_query_ah,
+ .query_device = rxe_query_device,
+ .query_pkey = rxe_query_pkey,
+ .query_port = rxe_query_port,
+ .query_qp = rxe_query_qp,
+ .query_srq = rxe_query_srq,
+ .reg_user_mr = rxe_reg_user_mr,
+ .req_notify_cq = rxe_req_notify_cq,
+ .resize_cq = rxe_resize_cq,
+};
+
int rxe_register_device(struct rxe_dev *rxe)
{
int err;
@@ -1211,49 +1246,7 @@ int rxe_register_device(struct rxe_dev *rxe)
| BIT_ULL(IB_USER_VERBS_CMD_DETACH_MCAST)
;
- dev->query_device = rxe_query_device;
- dev->modify_device = rxe_modify_device;
- dev->query_port = rxe_query_port;
- dev->modify_port = rxe_modify_port;
- dev->get_link_layer = rxe_get_link_layer;
- dev->get_netdev = rxe_get_netdev;
- dev->query_pkey = rxe_query_pkey;
- dev->alloc_ucontext = rxe_alloc_ucontext;
- dev->dealloc_ucontext = rxe_dealloc_ucontext;
- dev->mmap = rxe_mmap;
- dev->get_port_immutable = rxe_port_immutable;
- dev->alloc_pd = rxe_alloc_pd;
- dev->dealloc_pd = rxe_dealloc_pd;
- dev->create_ah = rxe_create_ah;
- dev->modify_ah = rxe_modify_ah;
- dev->query_ah = rxe_query_ah;
- dev->destroy_ah = rxe_destroy_ah;
- dev->create_srq = rxe_create_srq;
- dev->modify_srq = rxe_modify_srq;
- dev->query_srq = rxe_query_srq;
- dev->destroy_srq = rxe_destroy_srq;
- dev->post_srq_recv = rxe_post_srq_recv;
- dev->create_qp = rxe_create_qp;
- dev->modify_qp = rxe_modify_qp;
- dev->query_qp = rxe_query_qp;
- dev->destroy_qp = rxe_destroy_qp;
- dev->post_send = rxe_post_send;
- dev->post_recv = rxe_post_recv;
- dev->create_cq = rxe_create_cq;
- dev->destroy_cq = rxe_destroy_cq;
- dev->resize_cq = rxe_resize_cq;
- dev->poll_cq = rxe_poll_cq;
- dev->peek_cq = rxe_peek_cq;
- dev->req_notify_cq = rxe_req_notify_cq;
- dev->get_dma_mr = rxe_get_dma_mr;
- dev->reg_user_mr = rxe_reg_user_mr;
- dev->dereg_mr = rxe_dereg_mr;
- dev->alloc_mr = rxe_alloc_mr;
- dev->map_mr_sg = rxe_map_mr_sg;
- dev->attach_mcast = rxe_attach_mcast;
- dev->detach_mcast = rxe_detach_mcast;
- dev->get_hw_stats = rxe_ib_get_hw_stats;
- dev->alloc_hw_stats = rxe_ib_alloc_hw_stats;
+ ib_set_device_ops(dev, &rxe_dev_ops);
tfm = crypto_alloc_shash("crc32", 0, 0);
if (IS_ERR(tfm)) {
@@ -1279,11 +1272,9 @@ err1:
return err;
}
-int rxe_unregister_device(struct rxe_dev *rxe)
+void rxe_unregister_device(struct rxe_dev *rxe)
{
struct ib_device *dev = &rxe->ib_dev;
ib_unregister_device(dev);
-
- return 0;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index 82e670d6eeea..74e04801d34d 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -409,16 +409,16 @@ struct rxe_dev {
spinlock_t mmap_offset_lock; /* guard mmap_offset */
int mmap_offset;
- u64 stats_counters[RXE_NUM_OF_COUNTERS];
+ atomic64_t stats_counters[RXE_NUM_OF_COUNTERS];
struct rxe_port port;
struct list_head list;
struct crypto_shash *tfm;
};
-static inline void rxe_counter_inc(struct rxe_dev *rxe, enum rxe_counters cnt)
+static inline void rxe_counter_inc(struct rxe_dev *rxe, enum rxe_counters index)
{
- rxe->stats_counters[cnt]++;
+ atomic64_inc(&rxe->stats_counters[index]);
}
static inline struct rxe_dev *to_rdev(struct ib_device *dev)
@@ -467,7 +467,7 @@ static inline struct rxe_mem *to_rmw(struct ib_mw *mw)
}
int rxe_register_device(struct rxe_dev *rxe);
-int rxe_unregister_device(struct rxe_dev *rxe);
+void rxe_unregister_device(struct rxe_dev *rxe);
void rxe_mc_cleanup(struct rxe_pool_entry *arg);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 9006a13af1de..6d35570092d6 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -66,7 +66,7 @@ struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
ah->last_send = 0;
kref_init(&ah->ref);
- vah = rdma_create_ah(pd, attr);
+ vah = rdma_create_ah(pd, attr, RDMA_CREATE_AH_SLEEPABLE);
if (IS_ERR(vah)) {
kfree(ah);
ah = (struct ipoib_ah *)vah;
@@ -678,7 +678,7 @@ static void __ipoib_reap_ah(struct net_device *dev)
list_for_each_entry_safe(ah, tah, &priv->dead_ahs, list)
if ((int) priv->tx_tail - (int) ah->last_send >= 0) {
list_del(&ah->list);
- rdma_destroy_ah(ah->ah);
+ rdma_destroy_ah(ah->ah, 0);
kfree(ah);
}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 6214d8c0d546..d932f99201d1 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -2453,8 +2453,8 @@ static struct net_device *ipoib_add_port(const char *format,
return ERR_PTR(result);
}
- if (hca->rdma_netdev_get_params) {
- int rc = hca->rdma_netdev_get_params(hca, port,
+ if (hca->ops.rdma_netdev_get_params) {
+ int rc = hca->ops.rdma_netdev_get_params(hca, port,
RDMA_NETDEV_IPOIB,
&params);
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index 3fecd87c9f2b..8c707accd148 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -997,7 +997,6 @@ static struct scsi_host_template iscsi_iser_sht = {
.eh_device_reset_handler= iscsi_eh_device_reset,
.eh_target_reset_handler = iscsi_eh_recover_target,
.target_alloc = iscsi_target_alloc,
- .use_clustering = ENABLE_CLUSTERING,
.slave_alloc = iscsi_iser_slave_alloc,
.proc_name = "iscsi_iser",
.this_id = -1,
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index 009be8889d71..e9b7efc302d0 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -77,8 +77,8 @@ int iser_assign_reg_ops(struct iser_device *device)
struct ib_device *ib_dev = device->ib_device;
/* Assign function handles - based on FMR support */
- if (ib_dev->alloc_fmr && ib_dev->dealloc_fmr &&
- ib_dev->map_phys_fmr && ib_dev->unmap_fmr) {
+ if (ib_dev->ops.alloc_fmr && ib_dev->ops.dealloc_fmr &&
+ ib_dev->ops.map_phys_fmr && ib_dev->ops.unmap_fmr) {
iser_info("FMR supported, using FMR for registration\n");
device->reg_ops = &fmr_ops;
} else if (ib_dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
@@ -277,16 +277,13 @@ void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,
enum iser_data_dir cmd_dir)
{
struct iser_mem_reg *reg = &iser_task->rdma_reg[cmd_dir];
- int ret;
if (!reg->mem_h)
return;
iser_dbg("PHYSICAL Mem.Unregister mem_h %p\n", reg->mem_h);
- ret = ib_fmr_pool_unmap((struct ib_pool_fmr *)reg->mem_h);
- if (ret)
- iser_err("ib_fmr_pool_unmap failed %d\n", ret);
+ ib_fmr_pool_unmap((struct ib_pool_fmr *)reg->mem_h);
reg->mem_h = NULL;
}
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c
index 61558788b3fa..ae70cd18903e 100644
--- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c
@@ -330,10 +330,10 @@ struct opa_vnic_adapter *opa_vnic_add_netdev(struct ib_device *ibdev,
struct rdma_netdev *rn;
int rc;
- netdev = ibdev->alloc_rdma_netdev(ibdev, port_num,
- RDMA_NETDEV_OPA_VNIC,
- "veth%d", NET_NAME_UNKNOWN,
- ether_setup);
+ netdev = ibdev->ops.alloc_rdma_netdev(ibdev, port_num,
+ RDMA_NETDEV_OPA_VNIC,
+ "veth%d", NET_NAME_UNKNOWN,
+ ether_setup);
if (!netdev)
return ERR_PTR(-ENOMEM);
else if (IS_ERR(netdev))
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c
index d119d9afa845..560e4f2d466e 100644
--- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c
@@ -606,7 +606,7 @@ static void vema_set(struct opa_vnic_vema_port *port,
static void vema_send(struct ib_mad_agent *mad_agent,
struct ib_mad_send_wc *mad_wc)
{
- rdma_destroy_ah(mad_wc->send_buf->ah);
+ rdma_destroy_ah(mad_wc->send_buf->ah, RDMA_DESTROY_AH_SLEEPABLE);
ib_free_send_mad(mad_wc->send_buf);
}
@@ -680,7 +680,7 @@ static void vema_recv(struct ib_mad_agent *mad_agent,
ib_free_send_mad(rsp);
err_rsp:
- rdma_destroy_ah(ah);
+ rdma_destroy_ah(ah, RDMA_DESTROY_AH_SLEEPABLE);
free_recv_mad:
ib_free_recv_mad(mad_wc);
}
@@ -777,7 +777,7 @@ void opa_vnic_vema_send_trap(struct opa_vnic_adapter *adapter,
}
rdma_ah_set_dlid(&ah_attr, trap_lid);
- ah = rdma_create_ah(port->mad_agent->qp->pd, &ah_attr);
+ ah = rdma_create_ah(port->mad_agent->qp->pd, &ah_attr, 0);
if (IS_ERR(ah)) {
c_err("%s:Couldn't create new AH = %p\n", __func__, ah);
c_err("%s:dlid = %d, sl = %d, port = %d\n", __func__,
@@ -848,7 +848,7 @@ void opa_vnic_vema_send_trap(struct opa_vnic_adapter *adapter,
}
err_sndbuf:
- rdma_destroy_ah(ah);
+ rdma_destroy_ah(ah, 0);
err_exit:
v_err("Aborting trap\n");
}
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index eed0eb3bb04c..31d91538bbf4 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -132,6 +132,15 @@ MODULE_PARM_DESC(dev_loss_tmo,
" if fast_io_fail_tmo has not been set. \"off\" means that"
" this functionality is disabled.");
+static bool srp_use_imm_data = true;
+module_param_named(use_imm_data, srp_use_imm_data, bool, 0644);
+MODULE_PARM_DESC(use_imm_data,
+ "Whether or not to request permission to use immediate data during SRP login.");
+
+static unsigned int srp_max_imm_data = 8 * 1024;
+module_param_named(max_imm_data, srp_max_imm_data, uint, 0644);
+MODULE_PARM_DESC(max_imm_data, "Maximum immediate data size.");
+
static unsigned ch_count;
module_param(ch_count, uint, 0444);
MODULE_PARM_DESC(ch_count,
@@ -573,7 +582,7 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
init_attr->cap.max_send_wr = m * target->queue_size;
init_attr->cap.max_recv_wr = target->queue_size + 1;
init_attr->cap.max_recv_sge = 1;
- init_attr->cap.max_send_sge = 1;
+ init_attr->cap.max_send_sge = SRP_MAX_SGE;
init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
init_attr->qp_type = IB_QPT_RC;
init_attr->send_cq = send_cq;
@@ -823,7 +832,8 @@ static u8 srp_get_subnet_timeout(struct srp_host *host)
return subnet_timeout;
}
-static int srp_send_req(struct srp_rdma_ch *ch, bool multich)
+static int srp_send_req(struct srp_rdma_ch *ch, uint32_t max_iu_len,
+ bool multich)
{
struct srp_target_port *target = ch->target;
struct {
@@ -852,11 +862,15 @@ static int srp_send_req(struct srp_rdma_ch *ch, bool multich)
req->ib_req.opcode = SRP_LOGIN_REQ;
req->ib_req.tag = 0;
- req->ib_req.req_it_iu_len = cpu_to_be32(target->max_iu_len);
+ req->ib_req.req_it_iu_len = cpu_to_be32(max_iu_len);
req->ib_req.req_buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
SRP_BUF_FORMAT_INDIRECT);
req->ib_req.req_flags = (multich ? SRP_MULTICHAN_MULTI :
SRP_MULTICHAN_SINGLE);
+ if (srp_use_imm_data) {
+ req->ib_req.req_flags |= SRP_IMMED_REQUESTED;
+ req->ib_req.imm_data_offset = cpu_to_be16(SRP_IMM_DATA_OFFSET);
+ }
if (target->using_rdma_cm) {
req->rdma_param.flow_control = req->ib_param.flow_control;
@@ -873,6 +887,7 @@ static int srp_send_req(struct srp_rdma_ch *ch, bool multich)
req->rdma_req.req_it_iu_len = req->ib_req.req_it_iu_len;
req->rdma_req.req_buf_fmt = req->ib_req.req_buf_fmt;
req->rdma_req.req_flags = req->ib_req.req_flags;
+ req->rdma_req.imm_data_offset = req->ib_req.imm_data_offset;
ipi = req->rdma_req.initiator_port_id;
tpi = req->rdma_req.target_port_id;
@@ -1145,7 +1160,8 @@ static int srp_connected_ch(struct srp_target_port *target)
return c;
}
-static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich)
+static int srp_connect_ch(struct srp_rdma_ch *ch, uint32_t max_iu_len,
+ bool multich)
{
struct srp_target_port *target = ch->target;
int ret;
@@ -1158,7 +1174,7 @@ static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich)
while (1) {
init_completion(&ch->done);
- ret = srp_send_req(ch, multich);
+ ret = srp_send_req(ch, max_iu_len, multich);
if (ret)
goto out;
ret = wait_for_completion_interruptible(&ch->done);
@@ -1344,6 +1360,20 @@ static void srp_terminate_io(struct srp_rport *rport)
}
}
+/* Calculate maximum initiator to target information unit length. */
+static uint32_t srp_max_it_iu_len(int cmd_sg_cnt, bool use_imm_data)
+{
+ uint32_t max_iu_len = sizeof(struct srp_cmd) + SRP_MAX_ADD_CDB_LEN +
+ sizeof(struct srp_indirect_buf) +
+ cmd_sg_cnt * sizeof(struct srp_direct_buf);
+
+ if (use_imm_data)
+ max_iu_len = max(max_iu_len, SRP_IMM_DATA_OFFSET +
+ srp_max_imm_data);
+
+ return max_iu_len;
+}
+
/*
* It is up to the caller to ensure that srp_rport_reconnect() calls are
* serialized and that no concurrent srp_queuecommand(), srp_abort(),
@@ -1357,6 +1387,8 @@ static int srp_rport_reconnect(struct srp_rport *rport)
{
struct srp_target_port *target = rport->lld_data;
struct srp_rdma_ch *ch;
+ uint32_t max_iu_len = srp_max_it_iu_len(target->cmd_sg_cnt,
+ srp_use_imm_data);
int i, j, ret = 0;
bool multich = false;
@@ -1402,7 +1434,7 @@ static int srp_rport_reconnect(struct srp_rport *rport)
ch = &target->ch[i];
if (ret)
break;
- ret = srp_connect_ch(ch, multich);
+ ret = srp_connect_ch(ch, max_iu_len, multich);
multich = true;
}
@@ -1764,25 +1796,29 @@ static void srp_check_mapping(struct srp_map_state *state,
* @req: SRP request
*
* Returns the length in bytes of the SRP_CMD IU or a negative value if
- * mapping failed.
+ * mapping failed. The size of any immediate data is not included in the
+ * return value.
*/
static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
struct srp_request *req)
{
struct srp_target_port *target = ch->target;
- struct scatterlist *scat;
+ struct scatterlist *scat, *sg;
struct srp_cmd *cmd = req->cmd->buf;
- int len, nents, count, ret;
+ int i, len, nents, count, ret;
struct srp_device *dev;
struct ib_device *ibdev;
struct srp_map_state state;
struct srp_indirect_buf *indirect_hdr;
+ u64 data_len;
u32 idb_len, table_len;
__be32 idb_rkey;
u8 fmt;
+ req->cmd->num_sge = 1;
+
if (!scsi_sglist(scmnd) || scmnd->sc_data_direction == DMA_NONE)
- return sizeof (struct srp_cmd);
+ return sizeof(struct srp_cmd) + cmd->add_cdb_len;
if (scmnd->sc_data_direction != DMA_FROM_DEVICE &&
scmnd->sc_data_direction != DMA_TO_DEVICE) {
@@ -1794,6 +1830,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
nents = scsi_sg_count(scmnd);
scat = scsi_sglist(scmnd);
+ data_len = scsi_bufflen(scmnd);
dev = target->srp_host->srp_dev;
ibdev = dev->dev;
@@ -1802,8 +1839,31 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
if (unlikely(count == 0))
return -EIO;
+ if (ch->use_imm_data &&
+ count <= SRP_MAX_IMM_SGE &&
+ SRP_IMM_DATA_OFFSET + data_len <= ch->max_it_iu_len &&
+ scmnd->sc_data_direction == DMA_TO_DEVICE) {
+ struct srp_imm_buf *buf;
+ struct ib_sge *sge = &req->cmd->sge[1];
+
+ fmt = SRP_DATA_DESC_IMM;
+ len = SRP_IMM_DATA_OFFSET;
+ req->nmdesc = 0;
+ buf = (void *)cmd->add_data + cmd->add_cdb_len;
+ buf->len = cpu_to_be32(data_len);
+ WARN_ON_ONCE((void *)(buf + 1) > (void *)cmd + len);
+ for_each_sg(scat, sg, count, i) {
+ sge[i].addr = ib_sg_dma_address(ibdev, sg);
+ sge[i].length = ib_sg_dma_len(ibdev, sg);
+ sge[i].lkey = target->lkey;
+ }
+ req->cmd->num_sge += count;
+ goto map_complete;
+ }
+
fmt = SRP_DATA_DESC_DIRECT;
- len = sizeof (struct srp_cmd) + sizeof (struct srp_direct_buf);
+ len = sizeof(struct srp_cmd) + cmd->add_cdb_len +
+ sizeof(struct srp_direct_buf);
if (count == 1 && target->global_rkey) {
/*
@@ -1812,8 +1872,9 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
* single entry. So a direct descriptor along with
* the DMA MR suffices.
*/
- struct srp_direct_buf *buf = (void *) cmd->add_data;
+ struct srp_direct_buf *buf;
+ buf = (void *)cmd->add_data + cmd->add_cdb_len;
buf->va = cpu_to_be64(ib_sg_dma_address(ibdev, scat));
buf->key = cpu_to_be32(target->global_rkey);
buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat));
@@ -1826,7 +1887,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
* We have more than one scatter/gather entry, so build our indirect
* descriptor table, trying to merge as many entries as we can.
*/
- indirect_hdr = (void *) cmd->add_data;
+ indirect_hdr = (void *)cmd->add_data + cmd->add_cdb_len;
ib_dma_sync_single_for_cpu(ibdev, req->indirect_dma_addr,
target->indirect_size, DMA_TO_DEVICE);
@@ -1861,8 +1922,9 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
* Memory registration collapsed the sg-list into one entry,
* so use a direct descriptor.
*/
- struct srp_direct_buf *buf = (void *) cmd->add_data;
+ struct srp_direct_buf *buf;
+ buf = (void *)cmd->add_data + cmd->add_cdb_len;
*buf = req->indirect_desc[0];
goto map_complete;
}
@@ -1880,7 +1942,8 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
idb_len = sizeof(struct srp_indirect_buf) + table_len;
fmt = SRP_DATA_DESC_INDIRECT;
- len = sizeof(struct srp_cmd) + sizeof (struct srp_indirect_buf);
+ len = sizeof(struct srp_cmd) + cmd->add_cdb_len +
+ sizeof(struct srp_indirect_buf);
len += count * sizeof (struct srp_direct_buf);
memcpy(indirect_hdr->desc_list, req->indirect_desc,
@@ -2001,22 +2064,30 @@ static void srp_send_done(struct ib_cq *cq, struct ib_wc *wc)
list_add(&iu->list, &ch->free_tx);
}
+/**
+ * srp_post_send() - send an SRP information unit
+ * @ch: RDMA channel over which to send the information unit.
+ * @iu: Information unit to send.
+ * @len: Length of the information unit excluding immediate data.
+ */
static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len)
{
struct srp_target_port *target = ch->target;
- struct ib_sge list;
struct ib_send_wr wr;
- list.addr = iu->dma;
- list.length = len;
- list.lkey = target->lkey;
+ if (WARN_ON_ONCE(iu->num_sge > SRP_MAX_SGE))
+ return -EINVAL;
+
+ iu->sge[0].addr = iu->dma;
+ iu->sge[0].length = len;
+ iu->sge[0].lkey = target->lkey;
iu->cqe.done = srp_send_done;
wr.next = NULL;
wr.wr_cqe = &iu->cqe;
- wr.sg_list = &list;
- wr.num_sge = 1;
+ wr.sg_list = &iu->sge[0];
+ wr.num_sge = iu->num_sge;
wr.opcode = IB_WR_SEND;
wr.send_flags = IB_SEND_SIGNALED;
@@ -2129,6 +2200,7 @@ static int srp_response_common(struct srp_rdma_ch *ch, s32 req_delta,
return 1;
}
+ iu->num_sge = 1;
ib_dma_sync_single_for_cpu(dev, iu->dma, len, DMA_TO_DEVICE);
memcpy(iu->buf, rsp, len);
ib_dma_sync_single_for_device(dev, iu->dma, len, DMA_TO_DEVICE);
@@ -2312,7 +2384,7 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
req = &ch->req_ring[idx];
dev = target->srp_host->srp_dev->dev;
- ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_iu_len,
+ ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_it_iu_len,
DMA_TO_DEVICE);
scmnd->host_scribble = (void *) req;
@@ -2324,6 +2396,12 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
int_to_scsilun(scmnd->device->lun, &cmd->lun);
cmd->tag = tag;
memcpy(cmd->cdb, scmnd->cmnd, scmnd->cmd_len);
+ if (unlikely(scmnd->cmd_len > sizeof(cmd->cdb))) {
+ cmd->add_cdb_len = round_up(scmnd->cmd_len - sizeof(cmd->cdb),
+ 4);
+ if (WARN_ON_ONCE(cmd->add_cdb_len > SRP_MAX_ADD_CDB_LEN))
+ goto err_iu;
+ }
req->scmnd = scmnd;
req->cmd = iu;
@@ -2343,11 +2421,12 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
goto err_iu;
}
- ib_dma_sync_single_for_device(dev, iu->dma, target->max_iu_len,
+ ib_dma_sync_single_for_device(dev, iu->dma, ch->max_it_iu_len,
DMA_TO_DEVICE);
if (srp_post_send(ch, iu, len)) {
shost_printk(KERN_ERR, target->scsi_host, PFX "Send failed\n");
+ scmnd->result = DID_ERROR << 16;
goto err_unmap;
}
@@ -2410,7 +2489,7 @@ static int srp_alloc_iu_bufs(struct srp_rdma_ch *ch)
for (i = 0; i < target->queue_size; ++i) {
ch->tx_ring[i] = srp_alloc_iu(target->srp_host,
- target->max_iu_len,
+ ch->max_it_iu_len,
GFP_KERNEL, DMA_TO_DEVICE);
if (!ch->tx_ring[i])
goto err;
@@ -2476,6 +2555,15 @@ static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
if (lrsp->opcode == SRP_LOGIN_RSP) {
ch->max_ti_iu_len = be32_to_cpu(lrsp->max_ti_iu_len);
ch->req_lim = be32_to_cpu(lrsp->req_lim_delta);
+ ch->use_imm_data = lrsp->rsp_flags & SRP_LOGIN_RSP_IMMED_SUPP;
+ ch->max_it_iu_len = srp_max_it_iu_len(target->cmd_sg_cnt,
+ ch->use_imm_data);
+ WARN_ON_ONCE(ch->max_it_iu_len >
+ be32_to_cpu(lrsp->max_it_iu_len));
+
+ if (ch->use_imm_data)
+ shost_printk(KERN_DEBUG, target->scsi_host,
+ PFX "using immediate data\n");
/*
* Reserve credits for task management so we don't
@@ -2864,6 +2952,8 @@ static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun,
return -1;
}
+ iu->num_sge = 1;
+
ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt,
DMA_TO_DEVICE);
tsk_mgmt = iu->buf;
@@ -3215,7 +3305,6 @@ static struct scsi_host_template srp_template = {
.can_queue = SRP_DEFAULT_CMD_SQ_SIZE,
.this_id = -1,
.cmd_per_lun = SRP_DEFAULT_CMD_SQ_SIZE,
- .use_clustering = ENABLE_CLUSTERING,
.shost_attrs = srp_host_attrs,
.track_queue_depth = 1,
};
@@ -3403,6 +3492,9 @@ static const match_table_t srp_opt_tokens = {
/**
* srp_parse_in - parse an IP address and port number combination
+ * @net: [in] Network namespace.
+ * @sa: [out] Address family, IP address and port number.
+ * @addr_port_str: [in] IP address and port number.
*
* Parse the following address formats:
* - IPv4: <ip_address>:<port>, e.g. 1.2.3.4:5.
@@ -3720,6 +3812,7 @@ static ssize_t srp_create_target(struct device *dev,
int ret, node_idx, node, cpu, i;
unsigned int max_sectors_per_mr, mr_per_cmd = 0;
bool multich = false;
+ uint32_t max_iu_len;
target_host = scsi_host_alloc(&srp_template,
sizeof (struct srp_target_port));
@@ -3825,9 +3918,7 @@ static ssize_t srp_create_target(struct device *dev,
target->mr_per_cmd = mr_per_cmd;
target->indirect_size = target->sg_tablesize *
sizeof (struct srp_direct_buf);
- target->max_iu_len = sizeof (struct srp_cmd) +
- sizeof (struct srp_indirect_buf) +
- target->cmd_sg_cnt * sizeof (struct srp_direct_buf);
+ max_iu_len = srp_max_it_iu_len(target->cmd_sg_cnt, srp_use_imm_data);
INIT_WORK(&target->tl_err_work, srp_tl_err_work);
INIT_WORK(&target->remove_work, srp_remove_work);
@@ -3882,7 +3973,7 @@ static ssize_t srp_create_target(struct device *dev,
if (ret)
goto err_disconnect;
- ret = srp_connect_ch(ch, multich);
+ ret = srp_connect_ch(ch, max_iu_len, multich);
if (ret) {
char dst[64];
@@ -4063,8 +4154,10 @@ static void srp_add_one(struct ib_device *device)
srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR,
max_pages_per_mr);
- srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
- device->map_phys_fmr && device->unmap_fmr);
+ srp_dev->has_fmr = (device->ops.alloc_fmr &&
+ device->ops.dealloc_fmr &&
+ device->ops.map_phys_fmr &&
+ device->ops.unmap_fmr);
srp_dev->has_fr = (attr->device_cap_flags &
IB_DEVICE_MEM_MGT_EXTENSIONS);
if (!never_register && !srp_dev->has_fmr && !srp_dev->has_fr) {
@@ -4172,6 +4265,11 @@ static int __init srp_init_module(void)
{
int ret;
+ BUILD_BUG_ON(sizeof(struct srp_imm_buf) != 4);
+ BUILD_BUG_ON(sizeof(struct srp_login_req) != 64);
+ BUILD_BUG_ON(sizeof(struct srp_login_req_rdma) != 56);
+ BUILD_BUG_ON(sizeof(struct srp_cmd) != 48);
+
if (srp_sg_tablesize) {
pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n");
if (!cmd_sg_entries)
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h
index a2706086b9c7..b2861cd2087a 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.h
+++ b/drivers/infiniband/ulp/srp/ib_srp.h
@@ -67,6 +67,17 @@ enum {
SRP_TAG_TSK_MGMT = 1U << 31,
SRP_MAX_PAGES_PER_MR = 512,
+
+ SRP_MAX_ADD_CDB_LEN = 16,
+
+ SRP_MAX_IMM_SGE = 2,
+ SRP_MAX_SGE = SRP_MAX_IMM_SGE + 1,
+ /*
+ * Choose the immediate data offset such that a 32 byte CDB still fits.
+ */
+ SRP_IMM_DATA_OFFSET = sizeof(struct srp_cmd) +
+ SRP_MAX_ADD_CDB_LEN +
+ sizeof(struct srp_imm_buf),
};
enum srp_target_state {
@@ -130,6 +141,8 @@ struct srp_request {
/**
* struct srp_rdma_ch
* @comp_vector: Completion vector used by this RDMA channel.
+ * @max_it_iu_len: Maximum initiator-to-target information unit length.
+ * @max_ti_iu_len: Maximum target-to-initiator information unit length.
*/
struct srp_rdma_ch {
/* These are RW in the hot path, and commonly used together */
@@ -146,6 +159,9 @@ struct srp_rdma_ch {
struct ib_fmr_pool *fmr_pool;
struct srp_fr_pool *fr_pool;
};
+ uint32_t max_it_iu_len;
+ uint32_t max_ti_iu_len;
+ bool use_imm_data;
/* Everything above this point is used in the hot path of
* command processing. Try to keep them packed into cachelines.
@@ -169,7 +185,6 @@ struct srp_rdma_ch {
struct srp_iu **tx_ring;
struct srp_iu **rx_ring;
struct srp_request *req_ring;
- int max_ti_iu_len;
int comp_vector;
u64 tsk_mgmt_tag;
@@ -194,7 +209,6 @@ struct srp_target_port {
u32 ch_count;
u32 lkey;
enum srp_target_state state;
- unsigned int max_iu_len;
unsigned int cmd_sg_cnt;
unsigned int indirect_size;
bool allow_ext_sg;
@@ -259,6 +273,8 @@ struct srp_iu {
void *buf;
size_t size;
enum dma_data_direction direction;
+ u32 num_sge;
+ struct ib_sge sge[SRP_MAX_SGE];
struct ib_cqe cqe;
};
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index 2357aa727dcf..e9c336cff8f5 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -51,8 +51,6 @@
/* Name of this kernel module. */
#define DRV_NAME "ib_srpt"
-#define DRV_VERSION "2.0.0"
-#define DRV_RELDATE "2011-02-14"
#define SRPT_ID_STRING "Linux SRP target"
@@ -60,8 +58,7 @@
#define pr_fmt(fmt) DRV_NAME " " fmt
MODULE_AUTHOR("Vu Pham and Bart Van Assche");
-MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol target "
- "v" DRV_VERSION " (" DRV_RELDATE ")");
+MODULE_DESCRIPTION("SCSI RDMA Protocol target driver");
MODULE_LICENSE("Dual BSD/GPL");
/*
@@ -89,8 +86,7 @@ static int srpt_get_u64_x(char *buffer, const struct kernel_param *kp)
module_param_call(srpt_service_guid, NULL, srpt_get_u64_x, &srpt_service_guid,
0444);
MODULE_PARM_DESC(srpt_service_guid,
- "Using this value for ioc_guid, id_ext, and cm_listen_id"
- " instead of using the node_guid of the first HCA.");
+ "Using this value for ioc_guid, id_ext, and cm_listen_id instead of using the node_guid of the first HCA.");
static struct ib_client srpt_client;
/* Protects both rdma_cm_port and rdma_cm_id. */
@@ -462,7 +458,7 @@ static void srpt_mgmt_method_get(struct srpt_port *sp, struct ib_mad *rq_mad,
static void srpt_mad_send_handler(struct ib_mad_agent *mad_agent,
struct ib_mad_send_wc *mad_wc)
{
- rdma_destroy_ah(mad_wc->send_buf->ah);
+ rdma_destroy_ah(mad_wc->send_buf->ah, RDMA_DESTROY_AH_SLEEPABLE);
ib_free_send_mad(mad_wc->send_buf);
}
@@ -529,7 +525,7 @@ static void srpt_mad_recv_handler(struct ib_mad_agent *mad_agent,
ib_free_send_mad(rsp);
err_rsp:
- rdma_destroy_ah(ah);
+ rdma_destroy_ah(ah, RDMA_DESTROY_AH_SLEEPABLE);
err:
ib_free_recv_mad(mad_wc);
}
@@ -652,31 +648,33 @@ static void srpt_unregister_mad_agent(struct srpt_device *sdev)
* srpt_alloc_ioctx - allocate a SRPT I/O context structure
* @sdev: SRPT HCA pointer.
* @ioctx_size: I/O context size.
- * @dma_size: Size of I/O context DMA buffer.
+ * @buf_cache: I/O buffer cache.
* @dir: DMA data direction.
*/
static struct srpt_ioctx *srpt_alloc_ioctx(struct srpt_device *sdev,
- int ioctx_size, int dma_size,
+ int ioctx_size,
+ struct kmem_cache *buf_cache,
enum dma_data_direction dir)
{
struct srpt_ioctx *ioctx;
- ioctx = kmalloc(ioctx_size, GFP_KERNEL);
+ ioctx = kzalloc(ioctx_size, GFP_KERNEL);
if (!ioctx)
goto err;
- ioctx->buf = kmalloc(dma_size, GFP_KERNEL);
+ ioctx->buf = kmem_cache_alloc(buf_cache, GFP_KERNEL);
if (!ioctx->buf)
goto err_free_ioctx;
- ioctx->dma = ib_dma_map_single(sdev->device, ioctx->buf, dma_size, dir);
+ ioctx->dma = ib_dma_map_single(sdev->device, ioctx->buf,
+ kmem_cache_size(buf_cache), dir);
if (ib_dma_mapping_error(sdev->device, ioctx->dma))
goto err_free_buf;
return ioctx;
err_free_buf:
- kfree(ioctx->buf);
+ kmem_cache_free(buf_cache, ioctx->buf);
err_free_ioctx:
kfree(ioctx);
err:
@@ -687,17 +685,19 @@ err:
* srpt_free_ioctx - free a SRPT I/O context structure
* @sdev: SRPT HCA pointer.
* @ioctx: I/O context pointer.
- * @dma_size: Size of I/O context DMA buffer.
+ * @buf_cache: I/O buffer cache.
* @dir: DMA data direction.
*/
static void srpt_free_ioctx(struct srpt_device *sdev, struct srpt_ioctx *ioctx,
- int dma_size, enum dma_data_direction dir)
+ struct kmem_cache *buf_cache,
+ enum dma_data_direction dir)
{
if (!ioctx)
return;
- ib_dma_unmap_single(sdev->device, ioctx->dma, dma_size, dir);
- kfree(ioctx->buf);
+ ib_dma_unmap_single(sdev->device, ioctx->dma,
+ kmem_cache_size(buf_cache), dir);
+ kmem_cache_free(buf_cache, ioctx->buf);
kfree(ioctx);
}
@@ -706,33 +706,38 @@ static void srpt_free_ioctx(struct srpt_device *sdev, struct srpt_ioctx *ioctx,
* @sdev: Device to allocate the I/O context ring for.
* @ring_size: Number of elements in the I/O context ring.
* @ioctx_size: I/O context size.
- * @dma_size: DMA buffer size.
+ * @buf_cache: I/O buffer cache.
+ * @alignment_offset: Offset in each ring buffer at which the SRP information
+ * unit starts.
* @dir: DMA data direction.
*/
static struct srpt_ioctx **srpt_alloc_ioctx_ring(struct srpt_device *sdev,
int ring_size, int ioctx_size,
- int dma_size, enum dma_data_direction dir)
+ struct kmem_cache *buf_cache,
+ int alignment_offset,
+ enum dma_data_direction dir)
{
struct srpt_ioctx **ring;
int i;
- WARN_ON(ioctx_size != sizeof(struct srpt_recv_ioctx)
- && ioctx_size != sizeof(struct srpt_send_ioctx));
+ WARN_ON(ioctx_size != sizeof(struct srpt_recv_ioctx) &&
+ ioctx_size != sizeof(struct srpt_send_ioctx));
ring = kvmalloc_array(ring_size, sizeof(ring[0]), GFP_KERNEL);
if (!ring)
goto out;
for (i = 0; i < ring_size; ++i) {
- ring[i] = srpt_alloc_ioctx(sdev, ioctx_size, dma_size, dir);
+ ring[i] = srpt_alloc_ioctx(sdev, ioctx_size, buf_cache, dir);
if (!ring[i])
goto err;
ring[i]->index = i;
+ ring[i]->offset = alignment_offset;
}
goto out;
err:
while (--i >= 0)
- srpt_free_ioctx(sdev, ring[i], dma_size, dir);
+ srpt_free_ioctx(sdev, ring[i], buf_cache, dir);
kvfree(ring);
ring = NULL;
out:
@@ -744,12 +749,13 @@ out:
* @ioctx_ring: I/O context ring to be freed.
* @sdev: SRPT HCA pointer.
* @ring_size: Number of ring elements.
- * @dma_size: Size of I/O context DMA buffer.
+ * @buf_cache: I/O buffer cache.
* @dir: DMA data direction.
*/
static void srpt_free_ioctx_ring(struct srpt_ioctx **ioctx_ring,
struct srpt_device *sdev, int ring_size,
- int dma_size, enum dma_data_direction dir)
+ struct kmem_cache *buf_cache,
+ enum dma_data_direction dir)
{
int i;
@@ -757,7 +763,7 @@ static void srpt_free_ioctx_ring(struct srpt_ioctx **ioctx_ring,
return;
for (i = 0; i < ring_size; ++i)
- srpt_free_ioctx(sdev, ioctx_ring[i], dma_size, dir);
+ srpt_free_ioctx(sdev, ioctx_ring[i], buf_cache, dir);
kvfree(ioctx_ring);
}
@@ -819,7 +825,7 @@ static int srpt_post_recv(struct srpt_device *sdev, struct srpt_rdma_ch *ch,
struct ib_recv_wr wr;
BUG_ON(!sdev);
- list.addr = ioctx->ioctx.dma;
+ list.addr = ioctx->ioctx.dma + ioctx->ioctx.offset;
list.length = srp_max_req_size;
list.lkey = sdev->lkey;
@@ -985,23 +991,28 @@ static inline void *srpt_get_desc_buf(struct srp_cmd *srp_cmd)
/**
* srpt_get_desc_tbl - parse the data descriptors of a SRP_CMD request
- * @ioctx: Pointer to the I/O context associated with the request.
+ * @recv_ioctx: I/O context associated with the received command @srp_cmd.
+ * @ioctx: I/O context that will be used for responding to the initiator.
* @srp_cmd: Pointer to the SRP_CMD request data.
* @dir: Pointer to the variable to which the transfer direction will be
* written.
- * @sg: [out] scatterlist allocated for the parsed SRP_CMD.
+ * @sg: [out] scatterlist for the parsed SRP_CMD.
* @sg_cnt: [out] length of @sg.
* @data_len: Pointer to the variable to which the total data length of all
* descriptors in the SRP_CMD request will be written.
+ * @imm_data_offset: [in] Offset in SRP_CMD requests at which immediate data
+ * starts.
*
* This function initializes ioctx->nrbuf and ioctx->r_bufs.
*
* Returns -EINVAL when the SRP_CMD request contains inconsistent descriptors;
* -ENOMEM when memory allocation fails and zero upon success.
*/
-static int srpt_get_desc_tbl(struct srpt_send_ioctx *ioctx,
+static int srpt_get_desc_tbl(struct srpt_recv_ioctx *recv_ioctx,
+ struct srpt_send_ioctx *ioctx,
struct srp_cmd *srp_cmd, enum dma_data_direction *dir,
- struct scatterlist **sg, unsigned *sg_cnt, u64 *data_len)
+ struct scatterlist **sg, unsigned int *sg_cnt, u64 *data_len,
+ u16 imm_data_offset)
{
BUG_ON(!dir);
BUG_ON(!data_len);
@@ -1025,7 +1036,7 @@ static int srpt_get_desc_tbl(struct srpt_send_ioctx *ioctx,
if (((srp_cmd->buf_fmt & 0xf) == SRP_DATA_DESC_DIRECT) ||
((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_DIRECT)) {
- struct srp_direct_buf *db = srpt_get_desc_buf(srp_cmd);
+ struct srp_direct_buf *db = srpt_get_desc_buf(srp_cmd);
*data_len = be32_to_cpu(db->len);
return srpt_alloc_rw_ctxs(ioctx, db, 1, sg, sg_cnt);
@@ -1037,8 +1048,7 @@ static int srpt_get_desc_tbl(struct srpt_send_ioctx *ioctx,
if (nbufs >
(srp_cmd->data_out_desc_cnt + srp_cmd->data_in_desc_cnt)) {
- pr_err("received unsupported SRP_CMD request"
- " type (%u out + %u in != %u / %zu)\n",
+ pr_err("received unsupported SRP_CMD request type (%u out + %u in != %u / %zu)\n",
srp_cmd->data_out_desc_cnt,
srp_cmd->data_in_desc_cnt,
be32_to_cpu(idb->table_desc.len),
@@ -1049,6 +1059,40 @@ static int srpt_get_desc_tbl(struct srpt_send_ioctx *ioctx,
*data_len = be32_to_cpu(idb->len);
return srpt_alloc_rw_ctxs(ioctx, idb->desc_list, nbufs,
sg, sg_cnt);
+ } else if ((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_IMM) {
+ struct srp_imm_buf *imm_buf = srpt_get_desc_buf(srp_cmd);
+ void *data = (void *)srp_cmd + imm_data_offset;
+ uint32_t len = be32_to_cpu(imm_buf->len);
+ uint32_t req_size = imm_data_offset + len;
+
+ if (req_size > srp_max_req_size) {
+ pr_err("Immediate data (length %d + %d) exceeds request size %d\n",
+ imm_data_offset, len, srp_max_req_size);
+ return -EINVAL;
+ }
+ if (recv_ioctx->byte_len < req_size) {
+ pr_err("Received too few data - %d < %d\n",
+ recv_ioctx->byte_len, req_size);
+ return -EIO;
+ }
+ /*
+ * The immediate data buffer descriptor must occur before the
+ * immediate data itself.
+ */
+ if ((void *)(imm_buf + 1) > (void *)data) {
+ pr_err("Received invalid write request\n");
+ return -EINVAL;
+ }
+ *data_len = len;
+ ioctx->recv_ioctx = recv_ioctx;
+ if ((uintptr_t)data & 511) {
+ pr_warn_once("Internal error - the receive buffers are not aligned properly.\n");
+ return -EINVAL;
+ }
+ sg_init_one(&ioctx->imm_sg, data, len);
+ *sg = &ioctx->imm_sg;
+ *sg_cnt = 1;
+ return 0;
} else {
*data_len = 0;
return 0;
@@ -1191,6 +1235,7 @@ static struct srpt_send_ioctx *srpt_get_send_ioctx(struct srpt_rdma_ch *ch)
BUG_ON(ioctx->ch != ch);
ioctx->state = SRPT_STATE_NEW;
+ WARN_ON_ONCE(ioctx->recv_ioctx);
ioctx->n_rdma = 0;
ioctx->n_rw_ctx = 0;
ioctx->queue_status_only = false;
@@ -1352,8 +1397,8 @@ static int srpt_build_cmd_rsp(struct srpt_rdma_ch *ch,
BUILD_BUG_ON(MIN_MAX_RSP_SIZE <= sizeof(*srp_rsp));
max_sense_len = ch->max_ti_iu_len - sizeof(*srp_rsp);
if (sense_data_len > max_sense_len) {
- pr_warn("truncated sense data from %d to %d"
- " bytes\n", sense_data_len, max_sense_len);
+ pr_warn("truncated sense data from %d to %d bytes\n",
+ sense_data_len, max_sense_len);
sense_data_len = max_sense_len;
}
@@ -1433,7 +1478,7 @@ static void srpt_handle_cmd(struct srpt_rdma_ch *ch,
BUG_ON(!send_ioctx);
- srp_cmd = recv_ioctx->ioctx.buf;
+ srp_cmd = recv_ioctx->ioctx.buf + recv_ioctx->ioctx.offset;
cmd = &send_ioctx->cmd;
cmd->tag = srp_cmd->tag;
@@ -1453,8 +1498,8 @@ static void srpt_handle_cmd(struct srpt_rdma_ch *ch,
break;
}
- rc = srpt_get_desc_tbl(send_ioctx, srp_cmd, &dir, &sg, &sg_cnt,
- &data_len);
+ rc = srpt_get_desc_tbl(recv_ioctx, send_ioctx, srp_cmd, &dir,
+ &sg, &sg_cnt, &data_len, ch->imm_data_offset);
if (rc) {
if (rc != -EAGAIN) {
pr_err("0x%llx: parsing SRP descriptor table failed.\n",
@@ -1521,7 +1566,7 @@ static void srpt_handle_tsk_mgmt(struct srpt_rdma_ch *ch,
BUG_ON(!send_ioctx);
- srp_tsk = recv_ioctx->ioctx.buf;
+ srp_tsk = recv_ioctx->ioctx.buf + recv_ioctx->ioctx.offset;
cmd = &send_ioctx->cmd;
pr_debug("recv tsk_mgmt fn %d for task_tag %lld and cmd tag %lld ch %p sess %p\n",
@@ -1564,10 +1609,11 @@ srpt_handle_new_iu(struct srpt_rdma_ch *ch, struct srpt_recv_ioctx *recv_ioctx)
goto push;
ib_dma_sync_single_for_cpu(ch->sport->sdev->device,
- recv_ioctx->ioctx.dma, srp_max_req_size,
+ recv_ioctx->ioctx.dma,
+ recv_ioctx->ioctx.offset + srp_max_req_size,
DMA_FROM_DEVICE);
- srp_cmd = recv_ioctx->ioctx.buf;
+ srp_cmd = recv_ioctx->ioctx.buf + recv_ioctx->ioctx.offset;
opcode = srp_cmd->opcode;
if (opcode == SRP_CMD || opcode == SRP_TSK_MGMT) {
send_ioctx = srpt_get_send_ioctx(ch);
@@ -1604,7 +1650,8 @@ srpt_handle_new_iu(struct srpt_rdma_ch *ch, struct srpt_recv_ioctx *recv_ioctx)
break;
}
- srpt_post_recv(ch->sport->sdev, ch, recv_ioctx);
+ if (!send_ioctx || !send_ioctx->recv_ioctx)
+ srpt_post_recv(ch->sport->sdev, ch, recv_ioctx);
res = true;
out:
@@ -1630,6 +1677,7 @@ static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc)
req_lim = atomic_dec_return(&ch->req_lim);
if (unlikely(req_lim < 0))
pr_err("req_lim = %d < 0\n", req_lim);
+ ioctx->byte_len = wc->byte_len;
srpt_handle_new_iu(ch, ioctx);
} else {
pr_info_ratelimited("receiving failed for ioctx %p with status %d\n",
@@ -1693,14 +1741,14 @@ static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc)
atomic_add(1 + ioctx->n_rdma, &ch->sq_wr_avail);
if (wc->status != IB_WC_SUCCESS)
- pr_info("sending response for ioctx 0x%p failed"
- " with status %d\n", ioctx, wc->status);
+ pr_info("sending response for ioctx 0x%p failed with status %d\n",
+ ioctx, wc->status);
if (state != SRPT_STATE_DONE) {
transport_generic_free_cmd(&ioctx->cmd, 0);
} else {
- pr_err("IB completion has been received too late for"
- " wr_id = %u.\n", ioctx->ioctx.index);
+ pr_err("IB completion has been received too late for wr_id = %u.\n",
+ ioctx->ioctx.index);
}
srpt_process_wait_list(ch);
@@ -1754,6 +1802,8 @@ retry:
qp_init->cap.max_rdma_ctxs = sq_size / 2;
qp_init->cap.max_send_sge = min(attrs->max_send_sge,
SRPT_MAX_SG_PER_WQE);
+ qp_init->cap.max_recv_sge = min(attrs->max_recv_sge,
+ SRPT_MAX_SG_PER_WQE);
qp_init->port_num = ch->sport->port;
if (sdev->use_srq) {
qp_init->srq = sdev->srq;
@@ -2010,6 +2060,14 @@ static void srpt_free_ch(struct kref *kref)
kfree_rcu(ch, rcu);
}
+/*
+ * Shut down the SCSI target session, tell the connection manager to
+ * disconnect the associated RDMA channel, transition the QP to the error
+ * state and remove the channel from the channel list. This function is
+ * typically called from inside srpt_zerolength_write_done(). Concurrent
+ * srpt_zerolength_write() calls from inside srpt_close_ch() are possible
+ * as long as the channel is on sport->nexus_list.
+ */
static void srpt_release_channel_work(struct work_struct *w)
{
struct srpt_rdma_ch *ch;
@@ -2037,20 +2095,24 @@ static void srpt_release_channel_work(struct work_struct *w)
else
ib_destroy_cm_id(ch->ib_cm.cm_id);
+ sport = ch->sport;
+ mutex_lock(&sport->mutex);
+ list_del_rcu(&ch->list);
+ mutex_unlock(&sport->mutex);
+
srpt_destroy_ch_ib(ch);
srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_ring,
ch->sport->sdev, ch->rq_size,
- ch->max_rsp_size, DMA_TO_DEVICE);
+ ch->rsp_buf_cache, DMA_TO_DEVICE);
+
+ kmem_cache_destroy(ch->rsp_buf_cache);
srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_recv_ring,
sdev, ch->rq_size,
- srp_max_req_size, DMA_FROM_DEVICE);
+ ch->req_buf_cache, DMA_FROM_DEVICE);
- sport = ch->sport;
- mutex_lock(&sport->mutex);
- list_del_rcu(&ch->list);
- mutex_unlock(&sport->mutex);
+ kmem_cache_destroy(ch->req_buf_cache);
wake_up(&sport->ch_releaseQ);
@@ -2174,14 +2236,19 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev,
INIT_LIST_HEAD(&ch->cmd_wait_list);
ch->max_rsp_size = ch->sport->port_attrib.srp_max_rsp_size;
+ ch->rsp_buf_cache = kmem_cache_create("srpt-rsp-buf", ch->max_rsp_size,
+ 512, 0, NULL);
+ if (!ch->rsp_buf_cache)
+ goto free_ch;
+
ch->ioctx_ring = (struct srpt_send_ioctx **)
srpt_alloc_ioctx_ring(ch->sport->sdev, ch->rq_size,
sizeof(*ch->ioctx_ring[0]),
- ch->max_rsp_size, DMA_TO_DEVICE);
+ ch->rsp_buf_cache, 0, DMA_TO_DEVICE);
if (!ch->ioctx_ring) {
pr_err("rejected SRP_LOGIN_REQ because creating a new QP SQ ring failed.\n");
rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
- goto free_ch;
+ goto free_rsp_cache;
}
INIT_LIST_HEAD(&ch->free_list);
@@ -2190,16 +2257,39 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev,
list_add_tail(&ch->ioctx_ring[i]->free_list, &ch->free_list);
}
if (!sdev->use_srq) {
+ u16 imm_data_offset = req->req_flags & SRP_IMMED_REQUESTED ?
+ be16_to_cpu(req->imm_data_offset) : 0;
+ u16 alignment_offset;
+ u32 req_sz;
+
+ if (req->req_flags & SRP_IMMED_REQUESTED)
+ pr_debug("imm_data_offset = %d\n",
+ be16_to_cpu(req->imm_data_offset));
+ if (imm_data_offset >= sizeof(struct srp_cmd)) {
+ ch->imm_data_offset = imm_data_offset;
+ rsp->rsp_flags |= SRP_LOGIN_RSP_IMMED_SUPP;
+ } else {
+ ch->imm_data_offset = 0;
+ }
+ alignment_offset = round_up(imm_data_offset, 512) -
+ imm_data_offset;
+ req_sz = alignment_offset + imm_data_offset + srp_max_req_size;
+ ch->req_buf_cache = kmem_cache_create("srpt-req-buf", req_sz,
+ 512, 0, NULL);
+ if (!ch->req_buf_cache)
+ goto free_rsp_ring;
+
ch->ioctx_recv_ring = (struct srpt_recv_ioctx **)
srpt_alloc_ioctx_ring(ch->sport->sdev, ch->rq_size,
sizeof(*ch->ioctx_recv_ring[0]),
- srp_max_req_size,
+ ch->req_buf_cache,
+ alignment_offset,
DMA_FROM_DEVICE);
if (!ch->ioctx_recv_ring) {
pr_err("rejected SRP_LOGIN_REQ because creating a new QP RQ ring failed.\n");
rej->reason =
cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
- goto free_ring;
+ goto free_recv_cache;
}
for (i = 0; i < ch->rq_size; i++)
INIT_LIST_HEAD(&ch->ioctx_recv_ring[i]->wait_list);
@@ -2249,17 +2339,15 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev,
if ((req->req_flags & SRP_MTCH_ACTION) == SRP_MULTICHAN_SINGLE) {
struct srpt_rdma_ch *ch2;
- rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_NO_CHAN;
-
list_for_each_entry(ch2, &nexus->ch_list, list) {
if (srpt_disconnect_ch(ch2) < 0)
continue;
pr_info("Relogin - closed existing channel %s\n",
ch2->sess_name);
- rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_TERMINATED;
+ rsp->rsp_flags |= SRP_LOGIN_RSP_MULTICHAN_TERMINATED;
}
} else {
- rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_MAINTAINED;
+ rsp->rsp_flags |= SRP_LOGIN_RSP_MULTICHAN_MAINTAINED;
}
list_add_tail_rcu(&ch->list, &nexus->ch_list);
@@ -2289,7 +2377,7 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev,
/* create srp_login_response */
rsp->opcode = SRP_LOGIN_RSP;
rsp->tag = req->tag;
- rsp->max_it_iu_len = req->req_it_iu_len;
+ rsp->max_it_iu_len = cpu_to_be32(srp_max_req_size);
rsp->max_ti_iu_len = req->req_it_iu_len;
ch->max_ti_iu_len = it_iu_len;
rsp->buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
@@ -2353,12 +2441,18 @@ destroy_ib:
free_recv_ring:
srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_recv_ring,
ch->sport->sdev, ch->rq_size,
- srp_max_req_size, DMA_FROM_DEVICE);
+ ch->req_buf_cache, DMA_FROM_DEVICE);
+
+free_recv_cache:
+ kmem_cache_destroy(ch->req_buf_cache);
-free_ring:
+free_rsp_ring:
srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_ring,
ch->sport->sdev, ch->rq_size,
- ch->max_rsp_size, DMA_TO_DEVICE);
+ ch->rsp_buf_cache, DMA_TO_DEVICE);
+
+free_rsp_cache:
+ kmem_cache_destroy(ch->rsp_buf_cache);
free_ch:
if (rdma_cm_id)
@@ -2439,6 +2533,7 @@ static int srpt_rdma_cm_req_recv(struct rdma_cm_id *cm_id,
req.req_flags = req_rdma->req_flags;
memcpy(req.initiator_port_id, req_rdma->initiator_port_id, 16);
memcpy(req.target_port_id, req_rdma->target_port_id, 16);
+ req.imm_data_offset = req_rdma->imm_data_offset;
snprintf(src_addr, sizeof(src_addr), "%pIS",
&cm_id->route.addr.src_addr);
@@ -2629,6 +2724,12 @@ static int srpt_write_pending(struct se_cmd *se_cmd)
enum srpt_command_state new_state;
int ret, i;
+ if (ioctx->recv_ioctx) {
+ srpt_set_cmd_state(ioctx, SRPT_STATE_DATA_IN);
+ target_execute_cmd(&ioctx->cmd);
+ return 0;
+ }
+
new_state = srpt_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA);
WARN_ON(new_state == SRPT_STATE_DONE);
@@ -2908,7 +3009,9 @@ static void srpt_free_srq(struct srpt_device *sdev)
ib_destroy_srq(sdev->srq);
srpt_free_ioctx_ring((struct srpt_ioctx **)sdev->ioctx_ring, sdev,
- sdev->srq_size, srp_max_req_size, DMA_FROM_DEVICE);
+ sdev->srq_size, sdev->req_buf_cache,
+ DMA_FROM_DEVICE);
+ kmem_cache_destroy(sdev->req_buf_cache);
sdev->srq = NULL;
}
@@ -2935,14 +3038,17 @@ static int srpt_alloc_srq(struct srpt_device *sdev)
pr_debug("create SRQ #wr= %d max_allow=%d dev= %s\n", sdev->srq_size,
sdev->device->attrs.max_srq_wr, dev_name(&device->dev));
+ sdev->req_buf_cache = kmem_cache_create("srpt-srq-req-buf",
+ srp_max_req_size, 0, 0, NULL);
+ if (!sdev->req_buf_cache)
+ goto free_srq;
+
sdev->ioctx_ring = (struct srpt_recv_ioctx **)
srpt_alloc_ioctx_ring(sdev, sdev->srq_size,
sizeof(*sdev->ioctx_ring[0]),
- srp_max_req_size, DMA_FROM_DEVICE);
- if (!sdev->ioctx_ring) {
- ib_destroy_srq(srq);
- return -ENOMEM;
- }
+ sdev->req_buf_cache, 0, DMA_FROM_DEVICE);
+ if (!sdev->ioctx_ring)
+ goto free_cache;
sdev->use_srq = true;
sdev->srq = srq;
@@ -2953,6 +3059,13 @@ static int srpt_alloc_srq(struct srpt_device *sdev)
}
return 0;
+
+free_cache:
+ kmem_cache_destroy(sdev->req_buf_cache);
+
+free_srq:
+ ib_destroy_srq(srq);
+ return -ENOMEM;
}
static int srpt_use_srq(struct srpt_device *sdev, bool use_srq)
@@ -3015,9 +3128,8 @@ static void srpt_add_one(struct ib_device *device)
}
/* print out target login information */
- pr_debug("Target login info: id_ext=%016llx,ioc_guid=%016llx,"
- "pkey=ffff,service_id=%016llx\n", srpt_service_guid,
- srpt_service_guid, srpt_service_guid);
+ pr_debug("Target login info: id_ext=%016llx,ioc_guid=%016llx,pkey=ffff,service_id=%016llx\n",
+ srpt_service_guid, srpt_service_guid, srpt_service_guid);
/*
* We do not have a consistent service_id (ie. also id_ext of target_id)
@@ -3147,11 +3259,6 @@ static int srpt_check_false(struct se_portal_group *se_tpg)
return 0;
}
-static char *srpt_get_fabric_name(void)
-{
- return "srpt";
-}
-
static struct srpt_port *srpt_tpg_to_sport(struct se_portal_group *tpg)
{
return tpg->se_tpg_wwn->priv;
@@ -3182,11 +3289,18 @@ static void srpt_release_cmd(struct se_cmd *se_cmd)
struct srpt_send_ioctx *ioctx = container_of(se_cmd,
struct srpt_send_ioctx, cmd);
struct srpt_rdma_ch *ch = ioctx->ch;
+ struct srpt_recv_ioctx *recv_ioctx = ioctx->recv_ioctx;
unsigned long flags;
WARN_ON_ONCE(ioctx->state != SRPT_STATE_DONE &&
!(ioctx->cmd.transport_state & CMD_T_ABORTED));
+ if (recv_ioctx) {
+ WARN_ON_ONCE(!list_empty(&recv_ioctx->wait_list));
+ ioctx->recv_ioctx = NULL;
+ srpt_post_recv(ch->sport->sdev, ch, recv_ioctx);
+ }
+
if (ioctx->n_rw_ctx) {
srpt_free_rw_ctxs(ch, ioctx);
ioctx->n_rw_ctx = 0;
@@ -3572,7 +3686,7 @@ static ssize_t srpt_tpg_enable_show(struct config_item *item, char *page)
struct se_portal_group *se_tpg = to_tpg(item);
struct srpt_port *sport = srpt_tpg_to_sport(se_tpg);
- return snprintf(page, PAGE_SIZE, "%d\n", (sport->enabled) ? 1: 0);
+ return snprintf(page, PAGE_SIZE, "%d\n", sport->enabled);
}
static ssize_t srpt_tpg_enable_store(struct config_item *item,
@@ -3581,7 +3695,7 @@ static ssize_t srpt_tpg_enable_store(struct config_item *item,
struct se_portal_group *se_tpg = to_tpg(item);
struct srpt_port *sport = srpt_tpg_to_sport(se_tpg);
unsigned long tmp;
- int ret;
+ int ret;
ret = kstrtoul(page, 0, &tmp);
if (ret < 0) {
@@ -3617,7 +3731,7 @@ static struct se_portal_group *srpt_make_tpg(struct se_wwn *wwn,
const char *name)
{
struct srpt_port *sport = wwn->priv;
- static struct se_portal_group *tpg;
+ struct se_portal_group *tpg;
int res;
WARN_ON_ONCE(wwn != &sport->port_guid_wwn &&
@@ -3666,7 +3780,7 @@ static void srpt_drop_tport(struct se_wwn *wwn)
static ssize_t srpt_wwn_version_show(struct config_item *item, char *buf)
{
- return scnprintf(buf, PAGE_SIZE, "%s\n", DRV_VERSION);
+ return scnprintf(buf, PAGE_SIZE, "\n");
}
CONFIGFS_ATTR_RO(srpt_wwn_, version);
@@ -3678,8 +3792,7 @@ static struct configfs_attribute *srpt_wwn_attrs[] = {
static const struct target_core_fabric_ops srpt_template = {
.module = THIS_MODULE,
- .name = "srpt",
- .get_fabric_name = srpt_get_fabric_name,
+ .fabric_name = "srpt",
.tpg_get_wwn = srpt_get_fabric_wwn,
.tpg_get_tag = srpt_get_tag,
.tpg_check_demo_mode = srpt_check_false,
@@ -3730,16 +3843,14 @@ static int __init srpt_init_module(void)
ret = -EINVAL;
if (srp_max_req_size < MIN_MAX_REQ_SIZE) {
- pr_err("invalid value %d for kernel module parameter"
- " srp_max_req_size -- must be at least %d.\n",
+ pr_err("invalid value %d for kernel module parameter srp_max_req_size -- must be at least %d.\n",
srp_max_req_size, MIN_MAX_REQ_SIZE);
goto out;
}
if (srpt_srq_size < MIN_SRPT_SRQ_SIZE
|| srpt_srq_size > MAX_SRPT_SRQ_SIZE) {
- pr_err("invalid value %d for kernel module parameter"
- " srpt_srq_size -- must be in the range [%d..%d].\n",
+ pr_err("invalid value %d for kernel module parameter srpt_srq_size -- must be in the range [%d..%d].\n",
srpt_srq_size, MIN_SRPT_SRQ_SIZE, MAX_SRPT_SRQ_SIZE);
goto out;
}
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h
index 444dfd7281b5..39b3e50baf3d 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.h
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.h
@@ -104,10 +104,6 @@ enum {
SRP_CMD_ORDERED_Q = 0x2,
SRP_CMD_ACA = 0x4,
- SRP_LOGIN_RSP_MULTICHAN_NO_CHAN = 0x0,
- SRP_LOGIN_RSP_MULTICHAN_TERMINATED = 0x1,
- SRP_LOGIN_RSP_MULTICHAN_MAINTAINED = 0x2,
-
SRPT_DEF_SG_TABLESIZE = 128,
/*
* An experimentally determined value that avoids that QP creation
@@ -124,11 +120,18 @@ enum {
MAX_SRPT_RDMA_SIZE = 1U << 24,
MAX_SRPT_RSP_SIZE = 1024,
+ SRP_MAX_ADD_CDB_LEN = 16,
+ SRP_MAX_IMM_DATA_OFFSET = 80,
+ SRP_MAX_IMM_DATA = 8 * 1024,
MIN_MAX_REQ_SIZE = 996,
- DEFAULT_MAX_REQ_SIZE
- = sizeof(struct srp_cmd)/*48*/
- + sizeof(struct srp_indirect_buf)/*20*/
- + 128 * sizeof(struct srp_direct_buf)/*16*/,
+ DEFAULT_MAX_REQ_SIZE_1 = sizeof(struct srp_cmd)/*48*/ +
+ SRP_MAX_ADD_CDB_LEN +
+ sizeof(struct srp_indirect_buf)/*20*/ +
+ 128 * sizeof(struct srp_direct_buf)/*16*/,
+ DEFAULT_MAX_REQ_SIZE_2 = SRP_MAX_IMM_DATA_OFFSET +
+ sizeof(struct srp_imm_buf) + SRP_MAX_IMM_DATA,
+ DEFAULT_MAX_REQ_SIZE = DEFAULT_MAX_REQ_SIZE_1 > DEFAULT_MAX_REQ_SIZE_2 ?
+ DEFAULT_MAX_REQ_SIZE_1 : DEFAULT_MAX_REQ_SIZE_2,
MIN_MAX_RSP_SIZE = sizeof(struct srp_rsp)/*36*/ + 4,
DEFAULT_MAX_RSP_SIZE = 256, /* leaves 220 bytes for sense data */
@@ -165,12 +168,14 @@ enum srpt_command_state {
* @cqe: Completion queue element.
* @buf: Pointer to the buffer.
* @dma: DMA address of the buffer.
+ * @offset: Offset of the first byte in @buf and @dma that is actually used.
* @index: Index of the I/O context in its ioctx_ring array.
*/
struct srpt_ioctx {
struct ib_cqe cqe;
void *buf;
dma_addr_t dma;
+ uint32_t offset;
uint32_t index;
};
@@ -178,12 +183,14 @@ struct srpt_ioctx {
* struct srpt_recv_ioctx - SRPT receive I/O context
* @ioctx: See above.
* @wait_list: Node for insertion in srpt_rdma_ch.cmd_wait_list.
+ * @byte_len: Number of bytes in @ioctx.buf.
*/
struct srpt_recv_ioctx {
struct srpt_ioctx ioctx;
struct list_head wait_list;
+ int byte_len;
};
-
+
struct srpt_rw_ctx {
struct rdma_rw_ctx rw;
struct scatterlist *sg;
@@ -194,8 +201,11 @@ struct srpt_rw_ctx {
* struct srpt_send_ioctx - SRPT send I/O context
* @ioctx: See above.
* @ch: Channel pointer.
+ * @recv_ioctx: Receive I/O context associated with this send I/O context.
+ * Only used for processing immediate data.
* @s_rw_ctx: @rw_ctxs points here if only a single rw_ctx is needed.
* @rw_ctxs: RDMA read/write contexts.
+ * @imm_sg: Scatterlist for immediate data.
* @rdma_cqe: RDMA completion queue element.
* @free_list: Node in srpt_rdma_ch.free_list.
* @state: I/O context state.
@@ -209,10 +219,13 @@ struct srpt_rw_ctx {
struct srpt_send_ioctx {
struct srpt_ioctx ioctx;
struct srpt_rdma_ch *ch;
+ struct srpt_recv_ioctx *recv_ioctx;
struct srpt_rw_ctx s_rw_ctx;
struct srpt_rw_ctx *rw_ctxs;
+ struct scatterlist imm_sg;
+
struct ib_cqe rdma_cqe;
struct list_head free_list;
enum srpt_command_state state;
@@ -245,7 +258,10 @@ enum rdma_ch_state {
* struct srpt_rdma_ch - RDMA channel
* @nexus: I_T nexus this channel is associated with.
* @qp: IB queue pair used for communicating over this channel.
- * @cm_id: IB CM ID associated with the channel.
+ * @ib_cm: See below.
+ * @ib_cm.cm_id: IB CM ID associated with the channel.
+ * @rdma_cm: See below.
+ * @rdma_cm.cm_id: RDMA CM ID associated with the channel.
* @cq: IB completion queue for this channel.
* @zw_cqe: Zero-length write CQE.
* @rcu: RCU head.
@@ -259,12 +275,15 @@ enum rdma_ch_state {
* @req_lim: request limit: maximum number of requests that may be sent
* by the initiator without having received a response.
* @req_lim_delta: Number of credits not yet sent back to the initiator.
+ * @imm_data_offset: Offset from start of SRP_CMD for immediate data.
* @spinlock: Protects free_list and state.
* @free_list: Head of list with free send I/O contexts.
* @state: channel state. See also enum rdma_ch_state.
* @using_rdma_cm: Whether the RDMA/CM or IB/CM is used for this channel.
* @processing_wait_list: Whether or not cmd_wait_list is being processed.
+ * @rsp_buf_cache: kmem_cache for @ioctx_ring.
* @ioctx_ring: Send ring.
+ * @req_buf_cache: kmem_cache for @ioctx_recv_ring.
* @ioctx_recv_ring: Receive I/O context ring.
* @list: Node in srpt_nexus.ch_list.
* @cmd_wait_list: List of SCSI commands that arrived before the RTU event. This
@@ -297,10 +316,13 @@ struct srpt_rdma_ch {
int max_ti_iu_len;
atomic_t req_lim;
atomic_t req_lim_delta;
+ u16 imm_data_offset;
spinlock_t spinlock;
struct list_head free_list;
enum rdma_ch_state state;
+ struct kmem_cache *rsp_buf_cache;
struct srpt_send_ioctx **ioctx_ring;
+ struct kmem_cache *req_buf_cache;
struct srpt_recv_ioctx **ioctx_recv_ring;
struct list_head list;
struct list_head cmd_wait_list;
@@ -395,6 +417,7 @@ struct srpt_port {
* @srq_size: SRQ size.
* @sdev_mutex: Serializes use_srq changes.
* @use_srq: Whether or not to use SRQ.
+ * @req_buf_cache: kmem_cache for @ioctx_ring buffers.
* @ioctx_ring: Per-HCA SRQ.
* @event_handler: Per-HCA asynchronous IB event handler.
* @list: Node in srpt_dev_list.
@@ -409,6 +432,7 @@ struct srpt_device {
int srq_size;
struct mutex sdev_mutex;
bool use_srq;
+ struct kmem_cache *req_buf_cache;
struct srpt_recv_ioctx **ioctx_ring;
struct ib_event_handler event_handler;
struct list_head list;
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 1167ff0416cf..567221cca13c 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -55,8 +55,6 @@
#include "amd_iommu_types.h"
#include "irq_remapping.h"
-#define AMD_IOMMU_MAPPING_ERROR 0
-
#define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28))
#define LOOP_TIMEOUT 100000
@@ -2186,7 +2184,7 @@ static int amd_iommu_add_device(struct device *dev)
dev_name(dev));
iommu_ignore_device(dev);
- dev->dma_ops = &dma_direct_ops;
+ dev->dma_ops = NULL;
goto out;
}
init_iommu_group(dev);
@@ -2339,7 +2337,7 @@ static dma_addr_t __map_single(struct device *dev,
paddr &= PAGE_MASK;
address = dma_ops_alloc_iova(dev, dma_dom, pages, dma_mask);
- if (address == AMD_IOMMU_MAPPING_ERROR)
+ if (!address)
goto out;
prot = dir2prot(direction);
@@ -2376,7 +2374,7 @@ out_unmap:
dma_ops_free_iova(dma_dom, address, pages);
- return AMD_IOMMU_MAPPING_ERROR;
+ return DMA_MAPPING_ERROR;
}
/*
@@ -2427,7 +2425,7 @@ static dma_addr_t map_page(struct device *dev, struct page *page,
if (PTR_ERR(domain) == -EINVAL)
return (dma_addr_t)paddr;
else if (IS_ERR(domain))
- return AMD_IOMMU_MAPPING_ERROR;
+ return DMA_MAPPING_ERROR;
dma_mask = *dev->dma_mask;
dma_dom = to_dma_ops_domain(domain);
@@ -2504,7 +2502,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
npages = sg_num_pages(dev, sglist, nelems);
address = dma_ops_alloc_iova(dev, dma_dom, npages, dma_mask);
- if (address == AMD_IOMMU_MAPPING_ERROR)
+ if (address == DMA_MAPPING_ERROR)
goto out_err;
prot = dir2prot(direction);
@@ -2627,7 +2625,7 @@ static void *alloc_coherent(struct device *dev, size_t size,
*dma_addr = __map_single(dev, dma_dom, page_to_phys(page),
size, DMA_BIDIRECTIONAL, dma_mask);
- if (*dma_addr == AMD_IOMMU_MAPPING_ERROR)
+ if (*dma_addr == DMA_MAPPING_ERROR)
goto out_free;
return page_address(page);
@@ -2678,11 +2676,6 @@ static int amd_iommu_dma_supported(struct device *dev, u64 mask)
return check_device(dev);
}
-static int amd_iommu_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
- return dma_addr == AMD_IOMMU_MAPPING_ERROR;
-}
-
static const struct dma_map_ops amd_iommu_dma_ops = {
.alloc = alloc_coherent,
.free = free_coherent,
@@ -2691,7 +2684,6 @@ static const struct dma_map_ops amd_iommu_dma_ops = {
.map_sg = map_sg,
.unmap_sg = unmap_sg,
.dma_supported = amd_iommu_dma_supported,
- .mapping_error = amd_iommu_mapping_error,
};
static int init_reserved_iova_ranges(void)
@@ -2778,17 +2770,6 @@ int __init amd_iommu_init_dma_ops(void)
swiotlb = (iommu_pass_through || sme_me_mask) ? 1 : 0;
iommu_detected = 1;
- /*
- * In case we don't initialize SWIOTLB (actually the common case
- * when AMD IOMMU is enabled and SME is not active), make sure there
- * are global dma_ops set as a fall-back for devices not handled by
- * this driver (for example non-PCI devices). When SME is active,
- * make sure that swiotlb variable remains set so the global dma_ops
- * continue to be SWIOTLB.
- */
- if (!swiotlb)
- dma_ops = &dma_direct_ops;
-
if (amd_iommu_unmap_flush)
pr_info("AMD-Vi: IO/TLB flush on unmap enabled\n");
else
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index d1b04753b204..60c7e9e9901e 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -32,8 +32,6 @@
#include <linux/scatterlist.h>
#include <linux/vmalloc.h>
-#define IOMMU_MAPPING_ERROR 0
-
struct iommu_dma_msi_page {
struct list_head list;
dma_addr_t iova;
@@ -523,7 +521,7 @@ void iommu_dma_free(struct device *dev, struct page **pages, size_t size,
{
__iommu_dma_unmap(iommu_get_dma_domain(dev), *handle, size);
__iommu_dma_free_pages(pages, PAGE_ALIGN(size) >> PAGE_SHIFT);
- *handle = IOMMU_MAPPING_ERROR;
+ *handle = DMA_MAPPING_ERROR;
}
/**
@@ -556,7 +554,7 @@ struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp,
dma_addr_t iova;
unsigned int count, min_size, alloc_sizes = domain->pgsize_bitmap;
- *handle = IOMMU_MAPPING_ERROR;
+ *handle = DMA_MAPPING_ERROR;
min_size = alloc_sizes & -alloc_sizes;
if (min_size < PAGE_SIZE) {
@@ -649,11 +647,11 @@ static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
iova = iommu_dma_alloc_iova(domain, size, dma_get_mask(dev), dev);
if (!iova)
- return IOMMU_MAPPING_ERROR;
+ return DMA_MAPPING_ERROR;
if (iommu_map(domain, iova, phys - iova_off, size, prot)) {
iommu_dma_free_iova(cookie, iova, size);
- return IOMMU_MAPPING_ERROR;
+ return DMA_MAPPING_ERROR;
}
return iova + iova_off;
}
@@ -694,7 +692,7 @@ static int __finalise_sg(struct device *dev, struct scatterlist *sg, int nents,
s->offset += s_iova_off;
s->length = s_length;
- sg_dma_address(s) = IOMMU_MAPPING_ERROR;
+ sg_dma_address(s) = DMA_MAPPING_ERROR;
sg_dma_len(s) = 0;
/*
@@ -737,11 +735,11 @@ static void __invalidate_sg(struct scatterlist *sg, int nents)
int i;
for_each_sg(sg, s, nents, i) {
- if (sg_dma_address(s) != IOMMU_MAPPING_ERROR)
+ if (sg_dma_address(s) != DMA_MAPPING_ERROR)
s->offset += sg_dma_address(s);
if (sg_dma_len(s))
s->length = sg_dma_len(s);
- sg_dma_address(s) = IOMMU_MAPPING_ERROR;
+ sg_dma_address(s) = DMA_MAPPING_ERROR;
sg_dma_len(s) = 0;
}
}
@@ -858,11 +856,6 @@ void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle,
__iommu_dma_unmap(iommu_get_dma_domain(dev), handle, size);
}
-int iommu_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
- return dma_addr == IOMMU_MAPPING_ERROR;
-}
-
static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
phys_addr_t msi_addr, struct iommu_domain *domain)
{
@@ -882,7 +875,7 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
return NULL;
iova = __iommu_dma_map(dev, msi_addr, size, prot, domain);
- if (iommu_dma_mapping_error(dev, iova))
+ if (iova == DMA_MAPPING_ERROR)
goto out_free_page;
INIT_LIST_HEAD(&msi_page->list);
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 41a4b8808802..0ad67d65bbce 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -3597,9 +3597,11 @@ static int iommu_no_mapping(struct device *dev)
return 0;
}
-static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
- size_t size, int dir, u64 dma_mask)
+static dma_addr_t __intel_map_page(struct device *dev, struct page *page,
+ unsigned long offset, size_t size, int dir,
+ u64 dma_mask)
{
+ phys_addr_t paddr = page_to_phys(page) + offset;
struct dmar_domain *domain;
phys_addr_t start_paddr;
unsigned long iova_pfn;
@@ -3615,7 +3617,7 @@ static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
domain = get_valid_domain_for_dev(dev);
if (!domain)
- return 0;
+ return DMA_MAPPING_ERROR;
iommu = domain_get_iommu(domain);
size = aligned_nrpages(paddr, size);
@@ -3653,7 +3655,7 @@ error:
free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
pr_err("Device %s request: %zx@%llx dir %d --- failed\n",
dev_name(dev), size, (unsigned long long)paddr, dir);
- return 0;
+ return DMA_MAPPING_ERROR;
}
static dma_addr_t intel_map_page(struct device *dev, struct page *page,
@@ -3661,8 +3663,7 @@ static dma_addr_t intel_map_page(struct device *dev, struct page *page,
enum dma_data_direction dir,
unsigned long attrs)
{
- return __intel_map_single(dev, page_to_phys(page) + offset, size,
- dir, *dev->dma_mask);
+ return __intel_map_page(dev, page, offset, size, dir, *dev->dma_mask);
}
static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
@@ -3753,10 +3754,9 @@ static void *intel_alloc_coherent(struct device *dev, size_t size,
return NULL;
memset(page_address(page), 0, size);
- *dma_handle = __intel_map_single(dev, page_to_phys(page), size,
- DMA_BIDIRECTIONAL,
- dev->coherent_dma_mask);
- if (*dma_handle)
+ *dma_handle = __intel_map_page(dev, page, 0, size, DMA_BIDIRECTIONAL,
+ dev->coherent_dma_mask);
+ if (*dma_handle != DMA_MAPPING_ERROR)
return page_address(page);
if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
__free_pages(page, order);
@@ -3865,11 +3865,6 @@ static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nele
return nelems;
}
-static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
- return !dma_addr;
-}
-
static const struct dma_map_ops intel_dma_ops = {
.alloc = intel_alloc_coherent,
.free = intel_free_coherent,
@@ -3877,7 +3872,6 @@ static const struct dma_map_ops intel_dma_ops = {
.unmap_sg = intel_unmap_sg,
.map_page = intel_map_page,
.unmap_page = intel_unmap_page,
- .mapping_error = intel_mapping_error,
.dma_supported = dma_direct_supported,
};
diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index efb976a863d2..5f82036fe322 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -389,7 +389,7 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
goto err_dev;
}
- tqueue = blk_alloc_queue_node(GFP_KERNEL, dev->q->node, NULL);
+ tqueue = blk_alloc_queue_node(GFP_KERNEL, dev->q->node);
if (!tqueue) {
ret = -ENOMEM;
goto err_disk;
@@ -974,7 +974,7 @@ static int nvm_get_bb_meta(struct nvm_dev *dev, sector_t slba,
struct ppa_addr ppa;
u8 *blks;
int ch, lun, nr_blks;
- int ret;
+ int ret = 0;
ppa.ppa = slba;
ppa = dev_to_generic_addr(dev, ppa);
@@ -1140,30 +1140,33 @@ EXPORT_SYMBOL(nvm_alloc_dev);
int nvm_register(struct nvm_dev *dev)
{
- int ret;
+ int ret, exp_pool_size;
if (!dev->q || !dev->ops)
return -EINVAL;
- dev->dma_pool = dev->ops->create_dma_pool(dev, "ppalist");
+ ret = nvm_init(dev);
+ if (ret)
+ return ret;
+
+ exp_pool_size = max_t(int, PAGE_SIZE,
+ (NVM_MAX_VLBA * (sizeof(u64) + dev->geo.sos)));
+ exp_pool_size = round_up(exp_pool_size, PAGE_SIZE);
+
+ dev->dma_pool = dev->ops->create_dma_pool(dev, "ppalist",
+ exp_pool_size);
if (!dev->dma_pool) {
pr_err("nvm: could not create dma pool\n");
+ nvm_free(dev);
return -ENOMEM;
}
- ret = nvm_init(dev);
- if (ret)
- goto err_init;
-
/* register device with a supported media manager */
down_write(&nvm_lock);
list_add(&dev->devices, &nvm_devices);
up_write(&nvm_lock);
return 0;
-err_init:
- dev->ops->destroy_dma_pool(dev->dma_pool);
- return ret;
}
EXPORT_SYMBOL(nvm_register);
diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
index 6944aac43b01..1ff165351180 100644
--- a/drivers/lightnvm/pblk-core.c
+++ b/drivers/lightnvm/pblk-core.c
@@ -250,8 +250,8 @@ int pblk_alloc_rqd_meta(struct pblk *pblk, struct nvm_rq *rqd)
if (rqd->nr_ppas == 1)
return 0;
- rqd->ppa_list = rqd->meta_list + pblk_dma_meta_size;
- rqd->dma_ppa_list = rqd->dma_meta_list + pblk_dma_meta_size;
+ rqd->ppa_list = rqd->meta_list + pblk_dma_meta_size(pblk);
+ rqd->dma_ppa_list = rqd->dma_meta_list + pblk_dma_meta_size(pblk);
return 0;
}
@@ -376,7 +376,7 @@ void pblk_write_should_kick(struct pblk *pblk)
{
unsigned int secs_avail = pblk_rb_read_count(&pblk->rwb);
- if (secs_avail >= pblk->min_write_pgs)
+ if (secs_avail >= pblk->min_write_pgs_data)
pblk_write_kick(pblk);
}
@@ -407,7 +407,9 @@ struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line)
struct pblk_line_meta *lm = &pblk->lm;
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct list_head *move_list = NULL;
- int vsc = le32_to_cpu(*line->vsc);
+ int packed_meta = (le32_to_cpu(*line->vsc) / pblk->min_write_pgs_data)
+ * (pblk->min_write_pgs - pblk->min_write_pgs_data);
+ int vsc = le32_to_cpu(*line->vsc) + packed_meta;
lockdep_assert_held(&line->lock);
@@ -531,7 +533,7 @@ void pblk_check_chunk_state_update(struct pblk *pblk, struct nvm_rq *rqd)
if (caddr == 0)
trace_pblk_chunk_state(pblk_disk_name(pblk),
ppa, NVM_CHK_ST_OPEN);
- else if (caddr == chunk->cnlb)
+ else if (caddr == (chunk->cnlb - 1))
trace_pblk_chunk_state(pblk_disk_name(pblk),
ppa, NVM_CHK_ST_CLOSED);
}
@@ -620,12 +622,15 @@ out:
}
int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail,
- unsigned long secs_to_flush)
+ unsigned long secs_to_flush, bool skip_meta)
{
int max = pblk->sec_per_write;
int min = pblk->min_write_pgs;
int secs_to_sync = 0;
+ if (skip_meta && pblk->min_write_pgs_data != pblk->min_write_pgs)
+ min = max = pblk->min_write_pgs_data;
+
if (secs_avail >= max)
secs_to_sync = max;
else if (secs_avail >= min)
@@ -796,10 +801,11 @@ static int pblk_line_smeta_write(struct pblk *pblk, struct pblk_line *line,
rqd.is_seq = 1;
for (i = 0; i < lm->smeta_sec; i++, paddr++) {
- struct pblk_sec_meta *meta_list = rqd.meta_list;
+ struct pblk_sec_meta *meta = pblk_get_meta(pblk,
+ rqd.meta_list, i);
rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
- meta_list[i].lba = lba_list[paddr] = addr_empty;
+ meta->lba = lba_list[paddr] = addr_empty;
}
ret = pblk_submit_io_sync_sem(pblk, &rqd);
@@ -845,13 +851,13 @@ int pblk_line_emeta_read(struct pblk *pblk, struct pblk_line *line,
if (!meta_list)
return -ENOMEM;
- ppa_list = meta_list + pblk_dma_meta_size;
- dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
+ ppa_list = meta_list + pblk_dma_meta_size(pblk);
+ dma_ppa_list = dma_meta_list + pblk_dma_meta_size(pblk);
next_rq:
memset(&rqd, 0, sizeof(struct nvm_rq));
- rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
+ rq_ppas = pblk_calc_secs(pblk, left_ppas, 0, false);
rq_len = rq_ppas * geo->csecs;
bio = pblk_bio_map_addr(pblk, emeta_buf, rq_ppas, rq_len,
@@ -1276,6 +1282,7 @@ static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line)
return 0;
}
+/* Line allocations in the recovery path are always single threaded */
int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line)
{
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
@@ -1295,15 +1302,22 @@ int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line)
ret = pblk_line_alloc_bitmaps(pblk, line);
if (ret)
- return ret;
+ goto fail;
if (!pblk_line_init_bb(pblk, line, 0)) {
- list_add(&line->list, &l_mg->free_list);
- return -EINTR;
+ ret = -EINTR;
+ goto fail;
}
pblk_rl_free_lines_dec(&pblk->rl, line, true);
return 0;
+
+fail:
+ spin_lock(&l_mg->free_lock);
+ list_add(&line->list, &l_mg->free_list);
+ spin_unlock(&l_mg->free_lock);
+
+ return ret;
}
void pblk_line_recov_close(struct pblk *pblk, struct pblk_line *line)
@@ -2160,3 +2174,38 @@ void pblk_lookup_l2p_rand(struct pblk *pblk, struct ppa_addr *ppas,
}
spin_unlock(&pblk->trans_lock);
}
+
+void *pblk_get_meta_for_writes(struct pblk *pblk, struct nvm_rq *rqd)
+{
+ void *buffer;
+
+ if (pblk_is_oob_meta_supported(pblk)) {
+ /* Just use OOB metadata buffer as always */
+ buffer = rqd->meta_list;
+ } else {
+ /* We need to reuse last page of request (packed metadata)
+ * in similar way as traditional oob metadata
+ */
+ buffer = page_to_virt(
+ rqd->bio->bi_io_vec[rqd->bio->bi_vcnt - 1].bv_page);
+ }
+
+ return buffer;
+}
+
+void pblk_get_packed_meta(struct pblk *pblk, struct nvm_rq *rqd)
+{
+ void *meta_list = rqd->meta_list;
+ void *page;
+ int i = 0;
+
+ if (pblk_is_oob_meta_supported(pblk))
+ return;
+
+ page = page_to_virt(rqd->bio->bi_io_vec[rqd->bio->bi_vcnt - 1].bv_page);
+ /* We need to fill oob meta buffer with data from packed metadata */
+ for (; i < rqd->nr_ppas; i++)
+ memcpy(pblk_get_meta(pblk, meta_list, i),
+ page + (i * sizeof(struct pblk_sec_meta)),
+ sizeof(struct pblk_sec_meta));
+}
diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c
index 13822594647c..f9a3e47b6a93 100644
--- a/drivers/lightnvm/pblk-init.c
+++ b/drivers/lightnvm/pblk-init.c
@@ -207,9 +207,6 @@ static int pblk_rwb_init(struct pblk *pblk)
return pblk_rb_init(&pblk->rwb, buffer_size, threshold, geo->csecs);
}
-/* Minimum pages needed within a lun */
-#define ADDR_POOL_SIZE 64
-
static int pblk_set_addrf_12(struct pblk *pblk, struct nvm_geo *geo,
struct nvm_addrf_12 *dst)
{
@@ -350,23 +347,19 @@ fail_destroy_ws:
static int pblk_get_global_caches(void)
{
- int ret;
+ int ret = 0;
mutex_lock(&pblk_caches.mutex);
- if (kref_read(&pblk_caches.kref) > 0) {
- kref_get(&pblk_caches.kref);
- mutex_unlock(&pblk_caches.mutex);
- return 0;
- }
+ if (kref_get_unless_zero(&pblk_caches.kref))
+ goto out;
ret = pblk_create_global_caches();
-
if (!ret)
- kref_get(&pblk_caches.kref);
+ kref_init(&pblk_caches.kref);
+out:
mutex_unlock(&pblk_caches.mutex);
-
return ret;
}
@@ -406,12 +399,45 @@ static int pblk_core_init(struct pblk *pblk)
pblk->nr_flush_rst = 0;
pblk->min_write_pgs = geo->ws_opt;
+ pblk->min_write_pgs_data = pblk->min_write_pgs;
max_write_ppas = pblk->min_write_pgs * geo->all_luns;
pblk->max_write_pgs = min_t(int, max_write_ppas, NVM_MAX_VLBA);
pblk->max_write_pgs = min_t(int, pblk->max_write_pgs,
queue_max_hw_sectors(dev->q) / (geo->csecs >> SECTOR_SHIFT));
pblk_set_sec_per_write(pblk, pblk->min_write_pgs);
+ pblk->oob_meta_size = geo->sos;
+ if (!pblk_is_oob_meta_supported(pblk)) {
+ /* For drives which does not have OOB metadata feature
+ * in order to support recovery feature we need to use
+ * so called packed metadata. Packed metada will store
+ * the same information as OOB metadata (l2p table mapping,
+ * but in the form of the single page at the end of
+ * every write request.
+ */
+ if (pblk->min_write_pgs
+ * sizeof(struct pblk_sec_meta) > PAGE_SIZE) {
+ /* We want to keep all the packed metadata on single
+ * page per write requests. So we need to ensure that
+ * it will fit.
+ *
+ * This is more like sanity check, since there is
+ * no device with such a big minimal write size
+ * (above 1 metabytes).
+ */
+ pblk_err(pblk, "Not supported min write size\n");
+ return -EINVAL;
+ }
+ /* For packed meta approach we do some simplification.
+ * On read path we always issue requests which size
+ * equal to max_write_pgs, with all pages filled with
+ * user payload except of last one page which will be
+ * filled with packed metadata.
+ */
+ pblk->max_write_pgs = pblk->min_write_pgs;
+ pblk->min_write_pgs_data = pblk->min_write_pgs - 1;
+ }
+
pblk->pad_dist = kcalloc(pblk->min_write_pgs - 1, sizeof(atomic64_t),
GFP_KERNEL);
if (!pblk->pad_dist)
@@ -635,40 +661,61 @@ static unsigned int calc_emeta_len(struct pblk *pblk)
return (lm->emeta_len[1] + lm->emeta_len[2] + lm->emeta_len[3]);
}
-static void pblk_set_provision(struct pblk *pblk, long nr_free_blks)
+static int pblk_set_provision(struct pblk *pblk, int nr_free_chks)
{
struct nvm_tgt_dev *dev = pblk->dev;
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line_meta *lm = &pblk->lm;
struct nvm_geo *geo = &dev->geo;
sector_t provisioned;
- int sec_meta, blk_meta;
+ int sec_meta, blk_meta, clba;
+ int minimum;
if (geo->op == NVM_TARGET_DEFAULT_OP)
pblk->op = PBLK_DEFAULT_OP;
else
pblk->op = geo->op;
- provisioned = nr_free_blks;
+ minimum = pblk_get_min_chks(pblk);
+ provisioned = nr_free_chks;
provisioned *= (100 - pblk->op);
sector_div(provisioned, 100);
- pblk->op_blks = nr_free_blks - provisioned;
+ if ((nr_free_chks - provisioned) < minimum) {
+ if (geo->op != NVM_TARGET_DEFAULT_OP) {
+ pblk_err(pblk, "OP too small to create a sane instance\n");
+ return -EINTR;
+ }
+
+ /* If the user did not specify an OP value, and PBLK_DEFAULT_OP
+ * is not enough, calculate and set sane value
+ */
+
+ provisioned = nr_free_chks - minimum;
+ pblk->op = (100 * minimum) / nr_free_chks;
+ pblk_info(pblk, "Default OP insufficient, adjusting OP to %d\n",
+ pblk->op);
+ }
+
+ pblk->op_blks = nr_free_chks - provisioned;
/* Internally pblk manages all free blocks, but all calculations based
* on user capacity consider only provisioned blocks
*/
- pblk->rl.total_blocks = nr_free_blks;
- pblk->rl.nr_secs = nr_free_blks * geo->clba;
+ pblk->rl.total_blocks = nr_free_chks;
+ pblk->rl.nr_secs = nr_free_chks * geo->clba;
/* Consider sectors used for metadata */
sec_meta = (lm->smeta_sec + lm->emeta_sec[0]) * l_mg->nr_free_lines;
blk_meta = DIV_ROUND_UP(sec_meta, geo->clba);
- pblk->capacity = (provisioned - blk_meta) * geo->clba;
+ clba = (geo->clba / pblk->min_write_pgs) * pblk->min_write_pgs_data;
+ pblk->capacity = (provisioned - blk_meta) * clba;
- atomic_set(&pblk->rl.free_blocks, nr_free_blks);
- atomic_set(&pblk->rl.free_user_blocks, nr_free_blks);
+ atomic_set(&pblk->rl.free_blocks, nr_free_chks);
+ atomic_set(&pblk->rl.free_user_blocks, nr_free_chks);
+
+ return 0;
}
static int pblk_setup_line_meta_chk(struct pblk *pblk, struct pblk_line *line,
@@ -984,7 +1031,7 @@ static int pblk_lines_init(struct pblk *pblk)
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line *line;
void *chunk_meta;
- long nr_free_chks = 0;
+ int nr_free_chks = 0;
int i, ret;
ret = pblk_line_meta_init(pblk);
@@ -1031,7 +1078,9 @@ static int pblk_lines_init(struct pblk *pblk)
goto fail_free_lines;
}
- pblk_set_provision(pblk, nr_free_chks);
+ ret = pblk_set_provision(pblk, nr_free_chks);
+ if (ret)
+ goto fail_free_lines;
vfree(chunk_meta);
return 0;
@@ -1041,7 +1090,7 @@ fail_free_lines:
pblk_line_meta_free(l_mg, &pblk->lines[i]);
kfree(pblk->lines);
fail_free_chunk_meta:
- kfree(chunk_meta);
+ vfree(chunk_meta);
fail_free_luns:
kfree(pblk->luns);
fail_free_meta:
@@ -1154,6 +1203,12 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
return ERR_PTR(-EINVAL);
}
+ if (geo->ext) {
+ pblk_err(pblk, "extended metadata not supported\n");
+ kfree(pblk);
+ return ERR_PTR(-EINVAL);
+ }
+
spin_lock_init(&pblk->resubmit_lock);
spin_lock_init(&pblk->trans_lock);
spin_lock_init(&pblk->lock);
diff --git a/drivers/lightnvm/pblk-map.c b/drivers/lightnvm/pblk-map.c
index 6dcbd44e3acb..79df583ea709 100644
--- a/drivers/lightnvm/pblk-map.c
+++ b/drivers/lightnvm/pblk-map.c
@@ -22,7 +22,7 @@
static int pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
struct ppa_addr *ppa_list,
unsigned long *lun_bitmap,
- struct pblk_sec_meta *meta_list,
+ void *meta_list,
unsigned int valid_secs)
{
struct pblk_line *line = pblk_line_get_data(pblk);
@@ -33,6 +33,9 @@ static int pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
int nr_secs = pblk->min_write_pgs;
int i;
+ if (!line)
+ return -ENOSPC;
+
if (pblk_line_is_full(line)) {
struct pblk_line *prev_line = line;
@@ -42,8 +45,11 @@ static int pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
line = pblk_line_replace_data(pblk);
pblk_line_close_meta(pblk, prev_line);
- if (!line)
- return -EINTR;
+ if (!line) {
+ pblk_pipeline_stop(pblk);
+ return -ENOSPC;
+ }
+
}
emeta = line->emeta;
@@ -52,6 +58,7 @@ static int pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
paddr = pblk_alloc_page(pblk, line, nr_secs);
for (i = 0; i < nr_secs; i++, paddr++) {
+ struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i);
__le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
/* ppa to be sent to the device */
@@ -68,14 +75,15 @@ static int pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
kref_get(&line->ref);
w_ctx = pblk_rb_w_ctx(&pblk->rwb, sentry + i);
w_ctx->ppa = ppa_list[i];
- meta_list[i].lba = cpu_to_le64(w_ctx->lba);
+ meta->lba = cpu_to_le64(w_ctx->lba);
lba_list[paddr] = cpu_to_le64(w_ctx->lba);
if (lba_list[paddr] != addr_empty)
line->nr_valid_lbas++;
else
atomic64_inc(&pblk->pad_wa);
} else {
- lba_list[paddr] = meta_list[i].lba = addr_empty;
+ lba_list[paddr] = addr_empty;
+ meta->lba = addr_empty;
__pblk_map_invalidate(pblk, line, paddr);
}
}
@@ -84,50 +92,57 @@ static int pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
return 0;
}
-void pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry,
+int pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry,
unsigned long *lun_bitmap, unsigned int valid_secs,
unsigned int off)
{
- struct pblk_sec_meta *meta_list = rqd->meta_list;
+ void *meta_list = pblk_get_meta_for_writes(pblk, rqd);
+ void *meta_buffer;
struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
unsigned int map_secs;
int min = pblk->min_write_pgs;
int i;
+ int ret;
for (i = off; i < rqd->nr_ppas; i += min) {
map_secs = (i + min > valid_secs) ? (valid_secs % min) : min;
- if (pblk_map_page_data(pblk, sentry + i, &ppa_list[i],
- lun_bitmap, &meta_list[i], map_secs)) {
- bio_put(rqd->bio);
- pblk_free_rqd(pblk, rqd, PBLK_WRITE);
- pblk_pipeline_stop(pblk);
- }
+ meta_buffer = pblk_get_meta(pblk, meta_list, i);
+
+ ret = pblk_map_page_data(pblk, sentry + i, &ppa_list[i],
+ lun_bitmap, meta_buffer, map_secs);
+ if (ret)
+ return ret;
}
+
+ return 0;
}
/* only if erase_ppa is set, acquire erase semaphore */
-void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
+int pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
unsigned int sentry, unsigned long *lun_bitmap,
unsigned int valid_secs, struct ppa_addr *erase_ppa)
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
struct pblk_line_meta *lm = &pblk->lm;
- struct pblk_sec_meta *meta_list = rqd->meta_list;
+ void *meta_list = pblk_get_meta_for_writes(pblk, rqd);
+ void *meta_buffer;
struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
struct pblk_line *e_line, *d_line;
unsigned int map_secs;
int min = pblk->min_write_pgs;
int i, erase_lun;
+ int ret;
+
for (i = 0; i < rqd->nr_ppas; i += min) {
map_secs = (i + min > valid_secs) ? (valid_secs % min) : min;
- if (pblk_map_page_data(pblk, sentry + i, &ppa_list[i],
- lun_bitmap, &meta_list[i], map_secs)) {
- bio_put(rqd->bio);
- pblk_free_rqd(pblk, rqd, PBLK_WRITE);
- pblk_pipeline_stop(pblk);
- }
+ meta_buffer = pblk_get_meta(pblk, meta_list, i);
+
+ ret = pblk_map_page_data(pblk, sentry + i, &ppa_list[i],
+ lun_bitmap, meta_buffer, map_secs);
+ if (ret)
+ return ret;
erase_lun = pblk_ppa_to_pos(geo, ppa_list[i]);
@@ -163,7 +178,7 @@ void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
*/
e_line = pblk_line_get_erase(pblk);
if (!e_line)
- return;
+ return -ENOSPC;
/* Erase blocks that are bad in this line but might not be in next */
if (unlikely(pblk_ppa_empty(*erase_ppa)) &&
@@ -174,7 +189,7 @@ retry:
bit = find_next_bit(d_line->blk_bitmap,
lm->blk_per_line, bit + 1);
if (bit >= lm->blk_per_line)
- return;
+ return 0;
spin_lock(&e_line->lock);
if (test_bit(bit, e_line->erase_bitmap)) {
@@ -188,4 +203,6 @@ retry:
*erase_ppa = pblk->luns[bit].bppa; /* set ch and lun */
erase_ppa->a.blk = e_line->id;
}
+
+ return 0;
}
diff --git a/drivers/lightnvm/pblk-rb.c b/drivers/lightnvm/pblk-rb.c
index b1f4b51783f4..d4ca8c64ee0f 100644
--- a/drivers/lightnvm/pblk-rb.c
+++ b/drivers/lightnvm/pblk-rb.c
@@ -147,7 +147,7 @@ int pblk_rb_init(struct pblk_rb *rb, unsigned int size, unsigned int threshold,
/*
* Initialize rate-limiter, which controls access to the write buffer
- * but user and GC I/O
+ * by user and GC I/O
*/
pblk_rl_init(&pblk->rl, rb->nr_entries);
@@ -552,6 +552,9 @@ unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd,
to_read = count;
}
+ /* Add space for packed metadata if in use*/
+ pad += (pblk->min_write_pgs - pblk->min_write_pgs_data);
+
c_ctx->sentry = pos;
c_ctx->nr_valid = to_read;
c_ctx->nr_padded = pad;
diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c
index 9fba614adeeb..3789185144da 100644
--- a/drivers/lightnvm/pblk-read.c
+++ b/drivers/lightnvm/pblk-read.c
@@ -43,7 +43,7 @@ static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd,
struct bio *bio, sector_t blba,
unsigned long *read_bitmap)
{
- struct pblk_sec_meta *meta_list = rqd->meta_list;
+ void *meta_list = rqd->meta_list;
struct ppa_addr ppas[NVM_MAX_VLBA];
int nr_secs = rqd->nr_ppas;
bool advanced_bio = false;
@@ -53,12 +53,15 @@ static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd,
for (i = 0; i < nr_secs; i++) {
struct ppa_addr p = ppas[i];
+ struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i);
sector_t lba = blba + i;
retry:
if (pblk_ppa_empty(p)) {
+ __le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
+
WARN_ON(test_and_set_bit(i, read_bitmap));
- meta_list[i].lba = cpu_to_le64(ADDR_EMPTY);
+ meta->lba = addr_empty;
if (unlikely(!advanced_bio)) {
bio_advance(bio, (i) * PBLK_EXPOSED_PAGE_SIZE);
@@ -78,7 +81,7 @@ retry:
goto retry;
}
WARN_ON(test_and_set_bit(i, read_bitmap));
- meta_list[i].lba = cpu_to_le64(lba);
+ meta->lba = cpu_to_le64(lba);
advanced_bio = true;
#ifdef CONFIG_NVM_PBLK_DEBUG
atomic_long_inc(&pblk->cache_reads);
@@ -105,12 +108,16 @@ next:
static void pblk_read_check_seq(struct pblk *pblk, struct nvm_rq *rqd,
sector_t blba)
{
- struct pblk_sec_meta *meta_lba_list = rqd->meta_list;
+ void *meta_list = rqd->meta_list;
int nr_lbas = rqd->nr_ppas;
int i;
+ if (!pblk_is_oob_meta_supported(pblk))
+ return;
+
for (i = 0; i < nr_lbas; i++) {
- u64 lba = le64_to_cpu(meta_lba_list[i].lba);
+ struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i);
+ u64 lba = le64_to_cpu(meta->lba);
if (lba == ADDR_EMPTY)
continue;
@@ -134,17 +141,22 @@ static void pblk_read_check_seq(struct pblk *pblk, struct nvm_rq *rqd,
static void pblk_read_check_rand(struct pblk *pblk, struct nvm_rq *rqd,
u64 *lba_list, int nr_lbas)
{
- struct pblk_sec_meta *meta_lba_list = rqd->meta_list;
+ void *meta_lba_list = rqd->meta_list;
int i, j;
+ if (!pblk_is_oob_meta_supported(pblk))
+ return;
+
for (i = 0, j = 0; i < nr_lbas; i++) {
+ struct pblk_sec_meta *meta = pblk_get_meta(pblk,
+ meta_lba_list, j);
u64 lba = lba_list[i];
u64 meta_lba;
if (lba == ADDR_EMPTY)
continue;
- meta_lba = le64_to_cpu(meta_lba_list[j].lba);
+ meta_lba = le64_to_cpu(meta->lba);
if (lba != meta_lba) {
#ifdef CONFIG_NVM_PBLK_DEBUG
@@ -216,15 +228,15 @@ static void pblk_end_partial_read(struct nvm_rq *rqd)
struct pblk *pblk = rqd->private;
struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
struct pblk_pr_ctx *pr_ctx = r_ctx->private;
+ struct pblk_sec_meta *meta;
struct bio *new_bio = rqd->bio;
struct bio *bio = pr_ctx->orig_bio;
struct bio_vec src_bv, dst_bv;
- struct pblk_sec_meta *meta_list = rqd->meta_list;
+ void *meta_list = rqd->meta_list;
int bio_init_idx = pr_ctx->bio_init_idx;
unsigned long *read_bitmap = pr_ctx->bitmap;
int nr_secs = pr_ctx->orig_nr_secs;
int nr_holes = nr_secs - bitmap_weight(read_bitmap, nr_secs);
- __le64 *lba_list_mem, *lba_list_media;
void *src_p, *dst_p;
int hole, i;
@@ -237,13 +249,10 @@ static void pblk_end_partial_read(struct nvm_rq *rqd)
rqd->ppa_list[0] = ppa;
}
- /* Re-use allocated memory for intermediate lbas */
- lba_list_mem = (((void *)rqd->ppa_list) + pblk_dma_ppa_size);
- lba_list_media = (((void *)rqd->ppa_list) + 2 * pblk_dma_ppa_size);
-
for (i = 0; i < nr_secs; i++) {
- lba_list_media[i] = meta_list[i].lba;
- meta_list[i].lba = lba_list_mem[i];
+ meta = pblk_get_meta(pblk, meta_list, i);
+ pr_ctx->lba_list_media[i] = le64_to_cpu(meta->lba);
+ meta->lba = cpu_to_le64(pr_ctx->lba_list_mem[i]);
}
/* Fill the holes in the original bio */
@@ -255,7 +264,8 @@ static void pblk_end_partial_read(struct nvm_rq *rqd)
line = pblk_ppa_to_line(pblk, rqd->ppa_list[i]);
kref_put(&line->ref, pblk_line_put);
- meta_list[hole].lba = lba_list_media[i];
+ meta = pblk_get_meta(pblk, meta_list, hole);
+ meta->lba = cpu_to_le64(pr_ctx->lba_list_media[i]);
src_bv = new_bio->bi_io_vec[i++];
dst_bv = bio->bi_io_vec[bio_init_idx + hole];
@@ -291,17 +301,13 @@ static int pblk_setup_partial_read(struct pblk *pblk, struct nvm_rq *rqd,
unsigned long *read_bitmap,
int nr_holes)
{
- struct pblk_sec_meta *meta_list = rqd->meta_list;
+ void *meta_list = rqd->meta_list;
struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
struct pblk_pr_ctx *pr_ctx;
struct bio *new_bio, *bio = r_ctx->private;
- __le64 *lba_list_mem;
int nr_secs = rqd->nr_ppas;
int i;
- /* Re-use allocated memory for intermediate lbas */
- lba_list_mem = (((void *)rqd->ppa_list) + pblk_dma_ppa_size);
-
new_bio = bio_alloc(GFP_KERNEL, nr_holes);
if (pblk_bio_add_pages(pblk, new_bio, GFP_KERNEL, nr_holes))
@@ -312,12 +318,15 @@ static int pblk_setup_partial_read(struct pblk *pblk, struct nvm_rq *rqd,
goto fail_free_pages;
}
- pr_ctx = kmalloc(sizeof(struct pblk_pr_ctx), GFP_KERNEL);
+ pr_ctx = kzalloc(sizeof(struct pblk_pr_ctx), GFP_KERNEL);
if (!pr_ctx)
goto fail_free_pages;
- for (i = 0; i < nr_secs; i++)
- lba_list_mem[i] = meta_list[i].lba;
+ for (i = 0; i < nr_secs; i++) {
+ struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i);
+
+ pr_ctx->lba_list_mem[i] = le64_to_cpu(meta->lba);
+ }
new_bio->bi_iter.bi_sector = 0; /* internal bio */
bio_set_op_attrs(new_bio, REQ_OP_READ, 0);
@@ -325,7 +334,6 @@ static int pblk_setup_partial_read(struct pblk *pblk, struct nvm_rq *rqd,
rqd->bio = new_bio;
rqd->nr_ppas = nr_holes;
- pr_ctx->ppa_ptr = NULL;
pr_ctx->orig_bio = bio;
bitmap_copy(pr_ctx->bitmap, read_bitmap, NVM_MAX_VLBA);
pr_ctx->bio_init_idx = bio_init_idx;
@@ -383,7 +391,7 @@ err:
static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd, struct bio *bio,
sector_t lba, unsigned long *read_bitmap)
{
- struct pblk_sec_meta *meta_list = rqd->meta_list;
+ struct pblk_sec_meta *meta = pblk_get_meta(pblk, rqd->meta_list, 0);
struct ppa_addr ppa;
pblk_lookup_l2p_seq(pblk, &ppa, lba, 1);
@@ -394,8 +402,10 @@ static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd, struct bio *bio,
retry:
if (pblk_ppa_empty(ppa)) {
+ __le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
+
WARN_ON(test_and_set_bit(0, read_bitmap));
- meta_list[0].lba = cpu_to_le64(ADDR_EMPTY);
+ meta->lba = addr_empty;
return;
}
@@ -409,7 +419,7 @@ retry:
}
WARN_ON(test_and_set_bit(0, read_bitmap));
- meta_list[0].lba = cpu_to_le64(lba);
+ meta->lba = cpu_to_le64(lba);
#ifdef CONFIG_NVM_PBLK_DEBUG
atomic_long_inc(&pblk->cache_reads);
diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c
index 5740b7509bd8..3fcf062d752c 100644
--- a/drivers/lightnvm/pblk-recovery.c
+++ b/drivers/lightnvm/pblk-recovery.c
@@ -13,6 +13,9 @@
* General Public License for more details.
*
* pblk-recovery.c - pblk's recovery path
+ *
+ * The L2P recovery path is single threaded as the L2P table is updated in order
+ * following the line sequence ID.
*/
#include "pblk.h"
@@ -124,7 +127,7 @@ static u64 pblk_sec_in_open_line(struct pblk *pblk, struct pblk_line *line)
struct pblk_recov_alloc {
struct ppa_addr *ppa_list;
- struct pblk_sec_meta *meta_list;
+ void *meta_list;
struct nvm_rq *rqd;
void *data;
dma_addr_t dma_ppa_list;
@@ -158,7 +161,7 @@ static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line,
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
- struct pblk_sec_meta *meta_list;
+ void *meta_list;
struct pblk_pad_rq *pad_rq;
struct nvm_rq *rqd;
struct bio *bio;
@@ -188,7 +191,7 @@ static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line,
kref_init(&pad_rq->ref);
next_pad_rq:
- rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
+ rq_ppas = pblk_calc_secs(pblk, left_ppas, 0, false);
if (rq_ppas < pblk->min_write_pgs) {
pblk_err(pblk, "corrupted pad line %d\n", line->id);
goto fail_free_pad;
@@ -237,12 +240,15 @@ next_pad_rq:
for (j = 0; j < pblk->min_write_pgs; j++, i++, w_ptr++) {
struct ppa_addr dev_ppa;
+ struct pblk_sec_meta *meta;
__le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
dev_ppa = addr_to_gen_ppa(pblk, w_ptr, line->id);
pblk_map_invalidate(pblk, dev_ppa);
- lba_list[w_ptr] = meta_list[i].lba = addr_empty;
+ lba_list[w_ptr] = addr_empty;
+ meta = pblk_get_meta(pblk, meta_list, i);
+ meta->lba = addr_empty;
rqd->ppa_list[i] = dev_ppa;
}
}
@@ -334,20 +340,21 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
struct pblk_recov_alloc p)
{
struct nvm_tgt_dev *dev = pblk->dev;
+ struct pblk_line_meta *lm = &pblk->lm;
struct nvm_geo *geo = &dev->geo;
struct ppa_addr *ppa_list;
- struct pblk_sec_meta *meta_list;
+ void *meta_list;
struct nvm_rq *rqd;
struct bio *bio;
void *data;
dma_addr_t dma_ppa_list, dma_meta_list;
__le64 *lba_list;
- u64 paddr = 0;
+ u64 paddr = pblk_line_smeta_start(pblk, line) + lm->smeta_sec;
bool padded = false;
int rq_ppas, rq_len;
int i, j;
int ret;
- u64 left_ppas = pblk_sec_in_open_line(pblk, line);
+ u64 left_ppas = pblk_sec_in_open_line(pblk, line) - lm->smeta_sec;
if (pblk_line_wp_is_unbalanced(pblk, line))
pblk_warn(pblk, "recovering unbalanced line (%d)\n", line->id);
@@ -364,17 +371,19 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
next_rq:
memset(rqd, 0, pblk_g_rq_size);
- rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
+ rq_ppas = pblk_calc_secs(pblk, left_ppas, 0, false);
if (!rq_ppas)
rq_ppas = pblk->min_write_pgs;
rq_len = rq_ppas * geo->csecs;
+retry_rq:
bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
if (IS_ERR(bio))
return PTR_ERR(bio);
bio->bi_iter.bi_sector = 0; /* internal bio */
bio_set_op_attrs(bio, REQ_OP_READ, 0);
+ bio_get(bio);
rqd->bio = bio;
rqd->opcode = NVM_OP_PREAD;
@@ -387,7 +396,6 @@ next_rq:
if (pblk_io_aligned(pblk, rq_ppas))
rqd->is_seq = 1;
-retry_rq:
for (i = 0; i < rqd->nr_ppas; ) {
struct ppa_addr ppa;
int pos;
@@ -410,6 +418,7 @@ retry_rq:
if (ret) {
pblk_err(pblk, "I/O submission failed: %d\n", ret);
bio_put(bio);
+ bio_put(bio);
return ret;
}
@@ -421,20 +430,28 @@ retry_rq:
if (padded) {
pblk_log_read_err(pblk, rqd);
+ bio_put(bio);
return -EINTR;
}
pad_distance = pblk_pad_distance(pblk, line);
ret = pblk_recov_pad_line(pblk, line, pad_distance);
- if (ret)
+ if (ret) {
+ bio_put(bio);
return ret;
+ }
padded = true;
+ bio_put(bio);
goto retry_rq;
}
+ pblk_get_packed_meta(pblk, rqd);
+ bio_put(bio);
+
for (i = 0; i < rqd->nr_ppas; i++) {
- u64 lba = le64_to_cpu(meta_list[i].lba);
+ struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i);
+ u64 lba = le64_to_cpu(meta->lba);
lba_list[paddr++] = cpu_to_le64(lba);
@@ -463,7 +480,7 @@ static int pblk_recov_l2p_from_oob(struct pblk *pblk, struct pblk_line *line)
struct nvm_geo *geo = &dev->geo;
struct nvm_rq *rqd;
struct ppa_addr *ppa_list;
- struct pblk_sec_meta *meta_list;
+ void *meta_list;
struct pblk_recov_alloc p;
void *data;
dma_addr_t dma_ppa_list, dma_meta_list;
@@ -473,8 +490,8 @@ static int pblk_recov_l2p_from_oob(struct pblk *pblk, struct pblk_line *line)
if (!meta_list)
return -ENOMEM;
- ppa_list = (void *)(meta_list) + pblk_dma_meta_size;
- dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
+ ppa_list = (void *)(meta_list) + pblk_dma_meta_size(pblk);
+ dma_ppa_list = dma_meta_list + pblk_dma_meta_size(pblk);
data = kcalloc(pblk->max_write_pgs, geo->csecs, GFP_KERNEL);
if (!data) {
@@ -804,7 +821,6 @@ next:
WARN_ON_ONCE(!test_and_clear_bit(meta_line,
&l_mg->meta_bitmap));
spin_unlock(&l_mg->free_lock);
- pblk_line_replace_data(pblk);
} else {
spin_lock(&l_mg->free_lock);
/* Allocate next line for preparation */
diff --git a/drivers/lightnvm/pblk-rl.c b/drivers/lightnvm/pblk-rl.c
index db55a1c89997..76116d5f78e4 100644
--- a/drivers/lightnvm/pblk-rl.c
+++ b/drivers/lightnvm/pblk-rl.c
@@ -214,11 +214,10 @@ void pblk_rl_init(struct pblk_rl *rl, int budget)
struct nvm_geo *geo = &dev->geo;
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line_meta *lm = &pblk->lm;
- int min_blocks = lm->blk_per_line * PBLK_GC_RSV_LINE;
int sec_meta, blk_meta;
-
unsigned int rb_windows;
+
/* Consider sectors used for metadata */
sec_meta = (lm->smeta_sec + lm->emeta_sec[0]) * l_mg->nr_free_lines;
blk_meta = DIV_ROUND_UP(sec_meta, geo->clba);
@@ -226,7 +225,7 @@ void pblk_rl_init(struct pblk_rl *rl, int budget)
rl->high = pblk->op_blks - blk_meta - lm->blk_per_line;
rl->high_pw = get_count_order(rl->high);
- rl->rsv_blocks = min_blocks;
+ rl->rsv_blocks = pblk_get_min_chks(pblk);
/* This will always be a power-of-2 */
rb_windows = budget / NVM_MAX_VLBA;
diff --git a/drivers/lightnvm/pblk-sysfs.c b/drivers/lightnvm/pblk-sysfs.c
index 2d2818155aa8..7d8958df9472 100644
--- a/drivers/lightnvm/pblk-sysfs.c
+++ b/drivers/lightnvm/pblk-sysfs.c
@@ -479,6 +479,13 @@ static ssize_t pblk_sysfs_set_sec_per_write(struct pblk *pblk,
if (kstrtouint(page, 0, &sec_per_write))
return -EINVAL;
+ if (!pblk_is_oob_meta_supported(pblk)) {
+ /* For packed metadata case it is
+ * not allowed to change sec_per_write.
+ */
+ return -EINVAL;
+ }
+
if (sec_per_write < pblk->min_write_pgs
|| sec_per_write > pblk->max_write_pgs
|| sec_per_write % pblk->min_write_pgs != 0)
diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c
index fa8726493b39..06d56deb645d 100644
--- a/drivers/lightnvm/pblk-write.c
+++ b/drivers/lightnvm/pblk-write.c
@@ -105,14 +105,20 @@ retry:
}
/* Map remaining sectors in chunk, starting from ppa */
-static void pblk_map_remaining(struct pblk *pblk, struct ppa_addr *ppa)
+static void pblk_map_remaining(struct pblk *pblk, struct ppa_addr *ppa,
+ int rqd_ppas)
{
struct pblk_line *line;
struct ppa_addr map_ppa = *ppa;
+ __le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
+ __le64 *lba_list;
u64 paddr;
int done = 0;
+ int n = 0;
line = pblk_ppa_to_line(pblk, *ppa);
+ lba_list = emeta_to_lbas(pblk, line->emeta->buf);
+
spin_lock(&line->lock);
while (!done) {
@@ -121,10 +127,17 @@ static void pblk_map_remaining(struct pblk *pblk, struct ppa_addr *ppa)
if (!test_and_set_bit(paddr, line->map_bitmap))
line->left_msecs--;
+ if (n < rqd_ppas && lba_list[paddr] != addr_empty)
+ line->nr_valid_lbas--;
+
+ lba_list[paddr] = addr_empty;
+
if (!test_and_set_bit(paddr, line->invalid_bitmap))
le32_add_cpu(line->vsc, -1);
done = nvm_next_ppa_in_chk(pblk->dev, &map_ppa);
+
+ n++;
}
line->w_err_gc->has_write_err = 1;
@@ -148,9 +161,11 @@ static void pblk_prepare_resubmit(struct pblk *pblk, unsigned int sentry,
w_ctx = &entry->w_ctx;
/* Check if the lba has been overwritten */
- ppa_l2p = pblk_trans_map_get(pblk, w_ctx->lba);
- if (!pblk_ppa_comp(ppa_l2p, entry->cacheline))
- w_ctx->lba = ADDR_EMPTY;
+ if (w_ctx->lba != ADDR_EMPTY) {
+ ppa_l2p = pblk_trans_map_get(pblk, w_ctx->lba);
+ if (!pblk_ppa_comp(ppa_l2p, entry->cacheline))
+ w_ctx->lba = ADDR_EMPTY;
+ }
/* Mark up the entry as submittable again */
flags = READ_ONCE(w_ctx->flags);
@@ -200,7 +215,7 @@ static void pblk_submit_rec(struct work_struct *work)
pblk_log_write_err(pblk, rqd);
- pblk_map_remaining(pblk, ppa_list);
+ pblk_map_remaining(pblk, ppa_list, rqd->nr_ppas);
pblk_queue_resubmit(pblk, c_ctx);
pblk_up_rq(pblk, c_ctx->lun_bitmap);
@@ -319,12 +334,13 @@ static int pblk_setup_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
}
if (likely(!e_line || !atomic_read(&e_line->left_eblks)))
- pblk_map_rq(pblk, rqd, c_ctx->sentry, lun_bitmap, valid, 0);
+ ret = pblk_map_rq(pblk, rqd, c_ctx->sentry, lun_bitmap,
+ valid, 0);
else
- pblk_map_erase_rq(pblk, rqd, c_ctx->sentry, lun_bitmap,
+ ret = pblk_map_erase_rq(pblk, rqd, c_ctx->sentry, lun_bitmap,
valid, erase_ppa);
- return 0;
+ return ret;
}
static int pblk_calc_secs_to_sync(struct pblk *pblk, unsigned int secs_avail,
@@ -332,7 +348,7 @@ static int pblk_calc_secs_to_sync(struct pblk *pblk, unsigned int secs_avail,
{
int secs_to_sync;
- secs_to_sync = pblk_calc_secs(pblk, secs_avail, secs_to_flush);
+ secs_to_sync = pblk_calc_secs(pblk, secs_avail, secs_to_flush, true);
#ifdef CONFIG_NVM_PBLK_DEBUG
if ((!secs_to_sync && secs_to_flush)
@@ -548,15 +564,17 @@ static void pblk_free_write_rqd(struct pblk *pblk, struct nvm_rq *rqd)
c_ctx->nr_padded);
}
-static int pblk_submit_write(struct pblk *pblk)
+static int pblk_submit_write(struct pblk *pblk, int *secs_left)
{
struct bio *bio;
struct nvm_rq *rqd;
unsigned int secs_avail, secs_to_sync, secs_to_com;
- unsigned int secs_to_flush;
+ unsigned int secs_to_flush, packed_meta_pgs;
unsigned long pos;
unsigned int resubmit;
+ *secs_left = 0;
+
spin_lock(&pblk->resubmit_lock);
resubmit = !list_empty(&pblk->resubmit_list);
spin_unlock(&pblk->resubmit_lock);
@@ -586,17 +604,17 @@ static int pblk_submit_write(struct pblk *pblk)
*/
secs_avail = pblk_rb_read_count(&pblk->rwb);
if (!secs_avail)
- return 1;
+ return 0;
secs_to_flush = pblk_rb_flush_point_count(&pblk->rwb);
- if (!secs_to_flush && secs_avail < pblk->min_write_pgs)
- return 1;
+ if (!secs_to_flush && secs_avail < pblk->min_write_pgs_data)
+ return 0;
secs_to_sync = pblk_calc_secs_to_sync(pblk, secs_avail,
secs_to_flush);
if (secs_to_sync > pblk->max_write_pgs) {
pblk_err(pblk, "bad buffer sync calculation\n");
- return 1;
+ return 0;
}
secs_to_com = (secs_to_sync > secs_avail) ?
@@ -604,7 +622,8 @@ static int pblk_submit_write(struct pblk *pblk)
pos = pblk_rb_read_commit(&pblk->rwb, secs_to_com);
}
- bio = bio_alloc(GFP_KERNEL, secs_to_sync);
+ packed_meta_pgs = (pblk->min_write_pgs - pblk->min_write_pgs_data);
+ bio = bio_alloc(GFP_KERNEL, secs_to_sync + packed_meta_pgs);
bio->bi_iter.bi_sector = 0; /* internal bio */
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
@@ -625,6 +644,7 @@ static int pblk_submit_write(struct pblk *pblk)
atomic_long_add(secs_to_sync, &pblk->sub_writes);
#endif
+ *secs_left = 1;
return 0;
fail_free_bio:
@@ -633,16 +653,22 @@ fail_put_bio:
bio_put(bio);
pblk_free_rqd(pblk, rqd, PBLK_WRITE);
- return 1;
+ return -EINTR;
}
int pblk_write_ts(void *data)
{
struct pblk *pblk = data;
+ int secs_left;
+ int write_failure = 0;
while (!kthread_should_stop()) {
- if (!pblk_submit_write(pblk))
- continue;
+ if (!write_failure) {
+ write_failure = pblk_submit_write(pblk, &secs_left);
+
+ if (secs_left)
+ continue;
+ }
set_current_state(TASK_INTERRUPTIBLE);
io_schedule();
}
diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
index 02bb2e98f8a9..85e38ed62f85 100644
--- a/drivers/lightnvm/pblk.h
+++ b/drivers/lightnvm/pblk.h
@@ -104,7 +104,6 @@ enum {
PBLK_RL_LOW = 4
};
-#define pblk_dma_meta_size (sizeof(struct pblk_sec_meta) * NVM_MAX_VLBA)
#define pblk_dma_ppa_size (sizeof(u64) * NVM_MAX_VLBA)
/* write buffer completion context */
@@ -132,6 +131,8 @@ struct pblk_pr_ctx {
unsigned int bio_init_idx;
void *ppa_ptr;
dma_addr_t dma_ppa_list;
+ __le64 lba_list_mem[NVM_MAX_VLBA];
+ __le64 lba_list_media[NVM_MAX_VLBA];
};
/* Pad context */
@@ -631,7 +632,9 @@ struct pblk {
int state; /* pblk line state */
int min_write_pgs; /* Minimum amount of pages required by controller */
+ int min_write_pgs_data; /* Minimum amount of payload pages */
int max_write_pgs; /* Maximum amount of pages supported by controller */
+ int oob_meta_size; /* Size of OOB sector metadata */
sector_t capacity; /* Device capacity when bad blocks are subtracted */
@@ -836,7 +839,7 @@ void pblk_dealloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs);
u64 pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs);
u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs);
int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail,
- unsigned long secs_to_flush);
+ unsigned long secs_to_flush, bool skip_meta);
void pblk_down_rq(struct pblk *pblk, struct ppa_addr ppa,
unsigned long *lun_bitmap);
void pblk_down_chunk(struct pblk *pblk, struct ppa_addr ppa);
@@ -860,6 +863,8 @@ void pblk_lookup_l2p_rand(struct pblk *pblk, struct ppa_addr *ppas,
u64 *lba_list, int nr_secs);
void pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas,
sector_t blba, int nr_secs);
+void *pblk_get_meta_for_writes(struct pblk *pblk, struct nvm_rq *rqd);
+void pblk_get_packed_meta(struct pblk *pblk, struct nvm_rq *rqd);
/*
* pblk user I/O write path
@@ -871,10 +876,10 @@ int pblk_write_gc_to_cache(struct pblk *pblk, struct pblk_gc_rq *gc_rq);
/*
* pblk map
*/
-void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
+int pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
unsigned int sentry, unsigned long *lun_bitmap,
unsigned int valid_secs, struct ppa_addr *erase_ppa);
-void pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry,
+int pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry,
unsigned long *lun_bitmap, unsigned int valid_secs,
unsigned int off);
@@ -905,7 +910,6 @@ int pblk_recov_check_emeta(struct pblk *pblk, struct line_emeta *emeta);
#define PBLK_GC_MAX_READERS 8 /* Max number of outstanding GC reader jobs */
#define PBLK_GC_RQ_QD 128 /* Queue depth for inflight GC requests */
#define PBLK_GC_L_QD 4 /* Queue depth for inflight GC lines */
-#define PBLK_GC_RSV_LINE 1 /* Reserved lines for GC */
int pblk_gc_init(struct pblk *pblk);
void pblk_gc_exit(struct pblk *pblk, bool graceful);
@@ -1370,4 +1374,33 @@ static inline char *pblk_disk_name(struct pblk *pblk)
return disk->disk_name;
}
+
+static inline unsigned int pblk_get_min_chks(struct pblk *pblk)
+{
+ struct pblk_line_meta *lm = &pblk->lm;
+ /* In a worst-case scenario every line will have OP invalid sectors.
+ * We will then need a minimum of 1/OP lines to free up a single line
+ */
+
+ return DIV_ROUND_UP(100, pblk->op) * lm->blk_per_line;
+}
+
+static inline struct pblk_sec_meta *pblk_get_meta(struct pblk *pblk,
+ void *meta, int index)
+{
+ return meta +
+ max_t(int, sizeof(struct pblk_sec_meta), pblk->oob_meta_size)
+ * index;
+}
+
+static inline int pblk_dma_meta_size(struct pblk *pblk)
+{
+ return max_t(int, sizeof(struct pblk_sec_meta), pblk->oob_meta_size)
+ * NVM_MAX_VLBA;
+}
+
+static inline int pblk_is_oob_meta_supported(struct pblk *pblk)
+{
+ return pblk->oob_meta_size >= sizeof(struct pblk_sec_meta);
+}
#endif /* PBLK_H_ */
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index b61b83bbcfff..fdf75352e16a 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -627,6 +627,20 @@ struct cache_set {
struct bkey gc_done;
/*
+ * For automatical garbage collection after writeback completed, this
+ * varialbe is used as bit fields,
+ * - 0000 0001b (BCH_ENABLE_AUTO_GC): enable gc after writeback
+ * - 0000 0010b (BCH_DO_AUTO_GC): do gc after writeback
+ * This is an optimization for following write request after writeback
+ * finished, but read hit rate dropped due to clean data on cache is
+ * discarded. Unless user explicitly sets it via sysfs, it won't be
+ * enabled.
+ */
+#define BCH_ENABLE_AUTO_GC 1
+#define BCH_DO_AUTO_GC 2
+ uint8_t gc_after_writeback;
+
+ /*
* The allocation code needs gc_mark in struct bucket to be correct, but
* it's not while a gc is in progress. Protected by bucket_lock.
*/
@@ -658,7 +672,11 @@ struct cache_set {
/*
* A btree node on disk could have too many bsets for an iterator to fit
- * on the stack - have to dynamically allocate them
+ * on the stack - have to dynamically allocate them.
+ * bch_cache_set_alloc() will make sure the pool can allocate iterators
+ * equipped with enough room that can host
+ * (sb.bucket_size / sb.block_size)
+ * btree_iter_sets, which is more than static MAX_BSETS.
*/
mempool_t fill_iter;
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 3f4211b5cd33..23cb1dc7296b 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -207,6 +207,11 @@ void bch_btree_node_read_done(struct btree *b)
struct bset *i = btree_bset_first(b);
struct btree_iter *iter;
+ /*
+ * c->fill_iter can allocate an iterator with more memory space
+ * than static MAX_BSETS.
+ * See the comment arount cache_set->fill_iter.
+ */
iter = mempool_alloc(&b->c->fill_iter, GFP_NOIO);
iter->size = b->c->sb.bucket_size / b->c->sb.block_size;
iter->used = 0;
diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h
index a68d6c55783b..d1c72ef64edf 100644
--- a/drivers/md/bcache/btree.h
+++ b/drivers/md/bcache/btree.h
@@ -266,6 +266,24 @@ static inline void wake_up_gc(struct cache_set *c)
wake_up(&c->gc_wait);
}
+static inline void force_wake_up_gc(struct cache_set *c)
+{
+ /*
+ * Garbage collection thread only works when sectors_to_gc < 0,
+ * calling wake_up_gc() won't start gc thread if sectors_to_gc is
+ * not a nagetive value.
+ * Therefore sectors_to_gc is set to -1 here, before waking up
+ * gc thread by calling wake_up_gc(). Then gc_should_run() will
+ * give a chance to permit gc thread to run. "Give a chance" means
+ * before going into gc_should_run(), there is still possibility
+ * that c->sectors_to_gc being set to other positive value. So
+ * this routine won't 100% make sure gc thread will be woken up
+ * to run.
+ */
+ atomic_set(&c->sectors_to_gc, -1);
+ wake_up_gc(c);
+}
+
#define MAP_DONE 0
#define MAP_CONTINUE 1
diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c
index 8f448b9c96a1..8b123be05254 100644
--- a/drivers/md/bcache/debug.c
+++ b/drivers/md/bcache/debug.c
@@ -249,8 +249,7 @@ void bch_debug_init_cache_set(struct cache_set *c)
void bch_debug_exit(void)
{
- if (!IS_ERR_OR_NULL(bcache_debug))
- debugfs_remove_recursive(bcache_debug);
+ debugfs_remove_recursive(bcache_debug);
}
void __init bch_debug_init(void)
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index 522c7426f3a0..b2fd412715b1 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -663,7 +663,7 @@ static void journal_write_unlocked(struct closure *cl)
REQ_SYNC|REQ_META|REQ_PREFLUSH|REQ_FUA);
bch_bio_map(bio, w->data);
- trace_bcache_journal_write(bio);
+ trace_bcache_journal_write(bio, w->data->keys);
bio_list_add(&list, bio);
SET_PTR_OFFSET(k, i, PTR_OFFSET(k, i) + sectors);
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 3bf35914bb57..15070412a32e 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -311,11 +311,11 @@ err:
* data is written it calls bch_journal, and after the keys have been added to
* the next journal write they're inserted into the btree.
*
- * It inserts the data in s->cache_bio; bi_sector is used for the key offset,
+ * It inserts the data in op->bio; bi_sector is used for the key offset,
* and op->inode is used for the key inode.
*
- * If s->bypass is true, instead of inserting the data it invalidates the
- * region of the cache represented by s->cache_bio and op->inode.
+ * If op->bypass is true, instead of inserting the data it invalidates the
+ * region of the cache represented by op->bio and op->inode.
*/
void bch_data_insert(struct closure *cl)
{
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 7bbd670a5a84..4dee119c3664 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -25,8 +25,8 @@
#include <linux/reboot.h>
#include <linux/sysfs.h>
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Kent Overstreet <kent.overstreet@gmail.com>");
+unsigned int bch_cutoff_writeback;
+unsigned int bch_cutoff_writeback_sync;
static const char bcache_magic[] = {
0xc6, 0x85, 0x73, 0xf6, 0x4e, 0x1a, 0x45, 0xca,
@@ -1510,8 +1510,7 @@ static void cache_set_free(struct closure *cl)
struct cache *ca;
unsigned int i;
- if (!IS_ERR_OR_NULL(c->debug))
- debugfs_remove(c->debug);
+ debugfs_remove(c->debug);
bch_open_buckets_free(c);
bch_btree_cache_free(c);
@@ -2424,6 +2423,32 @@ static void bcache_exit(void)
mutex_destroy(&bch_register_lock);
}
+/* Check and fixup module parameters */
+static void check_module_parameters(void)
+{
+ if (bch_cutoff_writeback_sync == 0)
+ bch_cutoff_writeback_sync = CUTOFF_WRITEBACK_SYNC;
+ else if (bch_cutoff_writeback_sync > CUTOFF_WRITEBACK_SYNC_MAX) {
+ pr_warn("set bch_cutoff_writeback_sync (%u) to max value %u",
+ bch_cutoff_writeback_sync, CUTOFF_WRITEBACK_SYNC_MAX);
+ bch_cutoff_writeback_sync = CUTOFF_WRITEBACK_SYNC_MAX;
+ }
+
+ if (bch_cutoff_writeback == 0)
+ bch_cutoff_writeback = CUTOFF_WRITEBACK;
+ else if (bch_cutoff_writeback > CUTOFF_WRITEBACK_MAX) {
+ pr_warn("set bch_cutoff_writeback (%u) to max value %u",
+ bch_cutoff_writeback, CUTOFF_WRITEBACK_MAX);
+ bch_cutoff_writeback = CUTOFF_WRITEBACK_MAX;
+ }
+
+ if (bch_cutoff_writeback > bch_cutoff_writeback_sync) {
+ pr_warn("set bch_cutoff_writeback (%u) to %u",
+ bch_cutoff_writeback, bch_cutoff_writeback_sync);
+ bch_cutoff_writeback = bch_cutoff_writeback_sync;
+ }
+}
+
static int __init bcache_init(void)
{
static const struct attribute *files[] = {
@@ -2432,6 +2457,8 @@ static int __init bcache_init(void)
NULL
};
+ check_module_parameters();
+
mutex_init(&bch_register_lock);
init_waitqueue_head(&unregister_wait);
register_reboot_notifier(&reboot);
@@ -2468,5 +2495,18 @@ err:
return -ENOMEM;
}
+/*
+ * Module hooks
+ */
module_exit(bcache_exit);
module_init(bcache_init);
+
+module_param(bch_cutoff_writeback, uint, 0);
+MODULE_PARM_DESC(bch_cutoff_writeback, "threshold to cutoff writeback");
+
+module_param(bch_cutoff_writeback_sync, uint, 0);
+MODULE_PARM_DESC(bch_cutoff_writeback_sync, "hard threshold to cutoff writeback");
+
+MODULE_DESCRIPTION("Bcache: a Linux block layer cache");
+MODULE_AUTHOR("Kent Overstreet <kent.overstreet@gmail.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 26f035a0c5b9..557a8a3270a1 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -16,7 +16,7 @@
#include <linux/sort.h>
#include <linux/sched/clock.h>
-/* Default is -1; we skip past it for struct cached_dev's cache mode */
+/* Default is 0 ("writethrough") */
static const char * const bch_cache_modes[] = {
"writethrough",
"writeback",
@@ -25,7 +25,7 @@ static const char * const bch_cache_modes[] = {
NULL
};
-/* Default is -1; we skip past it for stop_when_cache_set_failed */
+/* Default is 0 ("auto") */
static const char * const bch_stop_on_failure_modes[] = {
"auto",
"always",
@@ -88,6 +88,8 @@ read_attribute(writeback_keys_done);
read_attribute(writeback_keys_failed);
read_attribute(io_errors);
read_attribute(congested);
+read_attribute(cutoff_writeback);
+read_attribute(cutoff_writeback_sync);
rw_attribute(congested_read_threshold_us);
rw_attribute(congested_write_threshold_us);
@@ -128,6 +130,7 @@ rw_attribute(expensive_debug_checks);
rw_attribute(cache_replacement_policy);
rw_attribute(btree_shrinker_disabled);
rw_attribute(copy_gc_enabled);
+rw_attribute(gc_after_writeback);
rw_attribute(size);
static ssize_t bch_snprint_string_list(char *buf,
@@ -264,7 +267,8 @@ STORE(__cached_dev)
d_strtoul(writeback_running);
d_strtoul(writeback_delay);
- sysfs_strtoul_clamp(writeback_percent, dc->writeback_percent, 0, 40);
+ sysfs_strtoul_clamp(writeback_percent, dc->writeback_percent,
+ 0, bch_cutoff_writeback);
if (attr == &sysfs_writeback_rate) {
ssize_t ret;
@@ -384,8 +388,25 @@ STORE(bch_cached_dev)
mutex_lock(&bch_register_lock);
size = __cached_dev_store(kobj, attr, buf, size);
- if (attr == &sysfs_writeback_running)
- bch_writeback_queue(dc);
+ if (attr == &sysfs_writeback_running) {
+ /* dc->writeback_running changed in __cached_dev_store() */
+ if (IS_ERR_OR_NULL(dc->writeback_thread)) {
+ /*
+ * reject setting it to 1 via sysfs if writeback
+ * kthread is not created yet.
+ */
+ if (dc->writeback_running) {
+ dc->writeback_running = false;
+ pr_err("%s: failed to run non-existent writeback thread",
+ dc->disk.disk->disk_name);
+ }
+ } else
+ /*
+ * writeback kthread will check if dc->writeback_running
+ * is true or false.
+ */
+ bch_writeback_queue(dc);
+ }
if (attr == &sysfs_writeback_percent)
if (!test_and_set_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags))
@@ -668,6 +689,9 @@ SHOW(__bch_cache_set)
sysfs_print(congested_write_threshold_us,
c->congested_write_threshold_us);
+ sysfs_print(cutoff_writeback, bch_cutoff_writeback);
+ sysfs_print(cutoff_writeback_sync, bch_cutoff_writeback_sync);
+
sysfs_print(active_journal_entries, fifo_used(&c->journal.pin));
sysfs_printf(verify, "%i", c->verify);
sysfs_printf(key_merging_disabled, "%i", c->key_merging_disabled);
@@ -676,6 +700,7 @@ SHOW(__bch_cache_set)
sysfs_printf(gc_always_rewrite, "%i", c->gc_always_rewrite);
sysfs_printf(btree_shrinker_disabled, "%i", c->shrinker_disabled);
sysfs_printf(copy_gc_enabled, "%i", c->copy_gc_enabled);
+ sysfs_printf(gc_after_writeback, "%i", c->gc_after_writeback);
sysfs_printf(io_disable, "%i",
test_bit(CACHE_SET_IO_DISABLE, &c->flags));
@@ -725,21 +750,8 @@ STORE(__bch_cache_set)
bch_cache_accounting_clear(&c->accounting);
}
- if (attr == &sysfs_trigger_gc) {
- /*
- * Garbage collection thread only works when sectors_to_gc < 0,
- * when users write to sysfs entry trigger_gc, most of time
- * they want to forcibly triger gargage collection. Here -1 is
- * set to c->sectors_to_gc, to make gc_should_run() give a
- * chance to permit gc thread to run. "give a chance" means
- * before going into gc_should_run(), there is still chance
- * that c->sectors_to_gc being set to other positive value. So
- * writing sysfs entry trigger_gc won't always make sure gc
- * thread takes effect.
- */
- atomic_set(&c->sectors_to_gc, -1);
- wake_up_gc(c);
- }
+ if (attr == &sysfs_trigger_gc)
+ force_wake_up_gc(c);
if (attr == &sysfs_prune_cache) {
struct shrink_control sc;
@@ -789,6 +801,12 @@ STORE(__bch_cache_set)
sysfs_strtoul(gc_always_rewrite, c->gc_always_rewrite);
sysfs_strtoul(btree_shrinker_disabled, c->shrinker_disabled);
sysfs_strtoul(copy_gc_enabled, c->copy_gc_enabled);
+ /*
+ * write gc_after_writeback here may overwrite an already set
+ * BCH_DO_AUTO_GC, it doesn't matter because this flag will be
+ * set in next chance.
+ */
+ sysfs_strtoul_clamp(gc_after_writeback, c->gc_after_writeback, 0, 1);
return size;
}
@@ -869,7 +887,10 @@ static struct attribute *bch_cache_set_internal_files[] = {
&sysfs_gc_always_rewrite,
&sysfs_btree_shrinker_disabled,
&sysfs_copy_gc_enabled,
+ &sysfs_gc_after_writeback,
&sysfs_io_disable,
+ &sysfs_cutoff_writeback,
+ &sysfs_cutoff_writeback_sync,
NULL
};
KTYPE(bch_cache_set_internal);
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 08c3a9f9676c..73f0efac2b9f 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -17,6 +17,15 @@
#include <linux/sched/clock.h>
#include <trace/events/bcache.h>
+static void update_gc_after_writeback(struct cache_set *c)
+{
+ if (c->gc_after_writeback != (BCH_ENABLE_AUTO_GC) ||
+ c->gc_stats.in_use < BCH_AUTO_GC_DIRTY_THRESHOLD)
+ return;
+
+ c->gc_after_writeback |= BCH_DO_AUTO_GC;
+}
+
/* Rate limiting */
static uint64_t __calc_target_rate(struct cached_dev *dc)
{
@@ -191,6 +200,7 @@ static void update_writeback_rate(struct work_struct *work)
if (!set_at_max_writeback_rate(c, dc)) {
down_read(&dc->writeback_lock);
__update_writeback_rate(dc);
+ update_gc_after_writeback(c);
up_read(&dc->writeback_lock);
}
}
@@ -689,6 +699,23 @@ static int bch_writeback_thread(void *arg)
up_write(&dc->writeback_lock);
break;
}
+
+ /*
+ * When dirty data rate is high (e.g. 50%+), there might
+ * be heavy buckets fragmentation after writeback
+ * finished, which hurts following write performance.
+ * If users really care about write performance they
+ * may set BCH_ENABLE_AUTO_GC via sysfs, then when
+ * BCH_DO_AUTO_GC is set, garbage collection thread
+ * will be wake up here. After moving gc, the shrunk
+ * btree and discarded free buckets SSD space may be
+ * helpful for following write requests.
+ */
+ if (c->gc_after_writeback ==
+ (BCH_ENABLE_AUTO_GC|BCH_DO_AUTO_GC)) {
+ c->gc_after_writeback &= ~BCH_DO_AUTO_GC;
+ force_wake_up_gc(c);
+ }
}
up_write(&dc->writeback_lock);
@@ -777,7 +804,7 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc)
bch_keybuf_init(&dc->writeback_keys);
dc->writeback_metadata = true;
- dc->writeback_running = true;
+ dc->writeback_running = false;
dc->writeback_percent = 10;
dc->writeback_delay = 30;
atomic_long_set(&dc->writeback_rate.rate, 1024);
@@ -805,6 +832,7 @@ int bch_cached_dev_writeback_start(struct cached_dev *dc)
cached_dev_put(dc);
return PTR_ERR(dc->writeback_thread);
}
+ dc->writeback_running = true;
WARN_ON(test_and_set_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags));
schedule_delayed_work(&dc->writeback_rate_update,
diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h
index d2b9fdbc8994..6a743d3bb338 100644
--- a/drivers/md/bcache/writeback.h
+++ b/drivers/md/bcache/writeback.h
@@ -5,12 +5,17 @@
#define CUTOFF_WRITEBACK 40
#define CUTOFF_WRITEBACK_SYNC 70
+#define CUTOFF_WRITEBACK_MAX 70
+#define CUTOFF_WRITEBACK_SYNC_MAX 90
+
#define MAX_WRITEBACKS_IN_PASS 5
#define MAX_WRITESIZE_IN_PASS 5000 /* *512b */
#define WRITEBACK_RATE_UPDATE_SECS_MAX 60
#define WRITEBACK_RATE_UPDATE_SECS_DEFAULT 5
+#define BCH_AUTO_GC_DIRTY_THRESHOLD 50
+
/*
* 14 (16384ths) is chosen here as something that each backing device
* should be a reasonable fraction of the share, and not to blow up
@@ -53,6 +58,9 @@ static inline bool bcache_dev_stripe_dirty(struct cached_dev *dc,
}
}
+extern unsigned int bch_cutoff_writeback;
+extern unsigned int bch_cutoff_writeback_sync;
+
static inline bool should_writeback(struct cached_dev *dc, struct bio *bio,
unsigned int cache_mode, bool would_skip)
{
@@ -60,7 +68,7 @@ static inline bool should_writeback(struct cached_dev *dc, struct bio *bio,
if (cache_mode != CACHE_MODE_WRITEBACK ||
test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) ||
- in_use > CUTOFF_WRITEBACK_SYNC)
+ in_use > bch_cutoff_writeback_sync)
return false;
if (dc->partial_stripes_expensive &&
@@ -73,7 +81,7 @@ static inline bool should_writeback(struct cached_dev *dc, struct bio *bio,
return (op_is_sync(bio->bi_opf) ||
bio->bi_opf & (REQ_META|REQ_PRIO) ||
- in_use <= CUTOFF_WRITEBACK);
+ in_use <= bch_cutoff_writeback);
}
static inline void bch_writeback_queue(struct cached_dev *dc)
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index 8b0b628e5d1c..1ecef76225a1 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -65,7 +65,7 @@
/*
* Linking of buffers:
- * All buffers are linked to cache_hash with their hash_list field.
+ * All buffers are linked to buffer_tree with their node field.
*
* Clean buffers that are not being written (B_WRITING not set)
* are linked to lru[LIST_CLEAN] with their lru_list field.
@@ -457,7 +457,7 @@ static void free_buffer(struct dm_buffer *b)
}
/*
- * Link buffer to the hash list and clean or dirty queue.
+ * Link buffer to the buffer tree and clean or dirty queue.
*/
static void __link_buffer(struct dm_buffer *b, sector_t block, int dirty)
{
@@ -472,7 +472,7 @@ static void __link_buffer(struct dm_buffer *b, sector_t block, int dirty)
}
/*
- * Unlink buffer from the hash list and dirty or clean queue.
+ * Unlink buffer from the buffer tree and dirty or clean queue.
*/
static void __unlink_buffer(struct dm_buffer *b)
{
@@ -993,7 +993,7 @@ static struct dm_buffer *__bufio_new(struct dm_bufio_client *c, sector_t block,
/*
* We've had a period where the mutex was unlocked, so need to
- * recheck the hash table.
+ * recheck the buffer tree.
*/
b = __find(c, block);
if (b) {
@@ -1327,7 +1327,7 @@ again:
EXPORT_SYMBOL_GPL(dm_bufio_write_dirty_buffers);
/*
- * Use dm-io to send and empty barrier flush the device.
+ * Use dm-io to send an empty barrier to flush the device.
*/
int dm_bufio_issue_flush(struct dm_bufio_client *c)
{
@@ -1356,7 +1356,7 @@ EXPORT_SYMBOL_GPL(dm_bufio_issue_flush);
* Then, we write the buffer to the original location if it was dirty.
*
* Then, if we are the only one who is holding the buffer, relink the buffer
- * in the hash queue for the new location.
+ * in the buffer tree for the new location.
*
* If there was someone else holding the buffer, we write it to the new
* location but not relink it, because that other user needs to have the buffer
diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h
index 224d44503a06..95c6d86ab5e8 100644
--- a/drivers/md/dm-core.h
+++ b/drivers/md/dm-core.h
@@ -65,7 +65,6 @@ struct mapped_device {
*/
struct work_struct work;
wait_queue_head_t wait;
- atomic_t pending[2];
spinlock_t deferred_lock;
struct bio_list deferred;
@@ -107,9 +106,6 @@ struct mapped_device {
struct block_device *bdev;
- /* zero-length flush that will be cloned and submitted to targets */
- struct bio flush_bio;
-
struct dm_stats stats;
/* for blk-mq request-based DM support */
@@ -119,7 +115,6 @@ struct mapped_device {
struct srcu_struct io_barrier;
};
-int md_in_flight(struct mapped_device *md);
void disable_write_same(struct mapped_device *md);
void disable_write_zeroes(struct mapped_device *md);
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index a8c32de29e3f..0ff22159a0ca 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -49,7 +49,7 @@ struct convert_context {
struct bio *bio_out;
struct bvec_iter iter_in;
struct bvec_iter iter_out;
- sector_t cc_sector;
+ u64 cc_sector;
atomic_t cc_pending;
union {
struct skcipher_request *req;
@@ -81,7 +81,7 @@ struct dm_crypt_request {
struct convert_context *ctx;
struct scatterlist sg_in[4];
struct scatterlist sg_out[4];
- sector_t iv_sector;
+ u64 iv_sector;
};
struct crypt_config;
@@ -160,7 +160,7 @@ struct crypt_config {
struct iv_lmk_private lmk;
struct iv_tcw_private tcw;
} iv_gen_private;
- sector_t iv_offset;
+ u64 iv_offset;
unsigned int iv_size;
unsigned short int sector_size;
unsigned char sector_shift;
@@ -1885,6 +1885,13 @@ static int crypt_alloc_tfms_skcipher(struct crypt_config *cc, char *ciphermode)
}
}
+ /*
+ * dm-crypt performance can vary greatly depending on which crypto
+ * algorithm implementation is used. Help people debug performance
+ * problems by logging the ->cra_driver_name.
+ */
+ DMINFO("%s using implementation \"%s\"", ciphermode,
+ crypto_skcipher_alg(any_tfm(cc))->base.cra_driver_name);
return 0;
}
@@ -1903,6 +1910,8 @@ static int crypt_alloc_tfms_aead(struct crypt_config *cc, char *ciphermode)
return err;
}
+ DMINFO("%s using implementation \"%s\"", ciphermode,
+ crypto_aead_alg(any_tfm_aead(cc))->base.cra_driver_name);
return 0;
}
@@ -2781,7 +2790,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
}
ret = -EINVAL;
- if (sscanf(argv[4], "%llu%c", &tmpll, &dummy) != 1) {
+ if (sscanf(argv[4], "%llu%c", &tmpll, &dummy) != 1 || tmpll != (sector_t)tmpll) {
ti->error = "Invalid device sector";
goto bad;
}
diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c
index 2fb7bb4304ad..fddffe251bf6 100644
--- a/drivers/md/dm-delay.c
+++ b/drivers/md/dm-delay.c
@@ -141,7 +141,7 @@ static int delay_class_ctr(struct dm_target *ti, struct delay_class *c, char **a
unsigned long long tmpll;
char dummy;
- if (sscanf(argv[1], "%llu%c", &tmpll, &dummy) != 1) {
+ if (sscanf(argv[1], "%llu%c", &tmpll, &dummy) != 1 || tmpll != (sector_t)tmpll) {
ti->error = "Invalid device sector";
return -EINVAL;
}
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index 3cb97fa4c11d..a9bc518156f2 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -213,7 +213,7 @@ static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv)
devname = dm_shift_arg(&as);
r = -EINVAL;
- if (sscanf(dm_shift_arg(&as), "%llu%c", &tmpll, &dummy) != 1) {
+ if (sscanf(dm_shift_arg(&as), "%llu%c", &tmpll, &dummy) != 1 || tmpll != (sector_t)tmpll) {
ti->error = "Invalid device sector";
goto bad;
}
@@ -287,20 +287,31 @@ static void flakey_map_bio(struct dm_target *ti, struct bio *bio)
static void corrupt_bio_data(struct bio *bio, struct flakey_c *fc)
{
- unsigned bio_bytes = bio_cur_bytes(bio);
- char *data = bio_data(bio);
+ unsigned int corrupt_bio_byte = fc->corrupt_bio_byte - 1;
+
+ struct bvec_iter iter;
+ struct bio_vec bvec;
+
+ if (!bio_has_data(bio))
+ return;
/*
- * Overwrite the Nth byte of the data returned.
+ * Overwrite the Nth byte of the bio's data, on whichever page
+ * it falls.
*/
- if (data && bio_bytes >= fc->corrupt_bio_byte) {
- data[fc->corrupt_bio_byte - 1] = fc->corrupt_bio_value;
-
- DMDEBUG("Corrupting data bio=%p by writing %u to byte %u "
- "(rw=%c bi_opf=%u bi_sector=%llu cur_bytes=%u)\n",
- bio, fc->corrupt_bio_value, fc->corrupt_bio_byte,
- (bio_data_dir(bio) == WRITE) ? 'w' : 'r', bio->bi_opf,
- (unsigned long long)bio->bi_iter.bi_sector, bio_bytes);
+ bio_for_each_segment(bvec, bio, iter) {
+ if (bio_iter_len(bio, iter) > corrupt_bio_byte) {
+ char *segment = (page_address(bio_iter_page(bio, iter))
+ + bio_iter_offset(bio, iter));
+ segment[corrupt_bio_byte] = fc->corrupt_bio_value;
+ DMDEBUG("Corrupting data bio=%p by writing %u to byte %u "
+ "(rw=%c bi_opf=%u bi_sector=%llu size=%u)\n",
+ bio, fc->corrupt_bio_value, fc->corrupt_bio_byte,
+ (bio_data_dir(bio) == WRITE) ? 'w' : 'r', bio->bi_opf,
+ (unsigned long long)bio->bi_iter.bi_sector, bio->bi_iter.bi_size);
+ break;
+ }
+ corrupt_bio_byte -= bio_iter_len(bio, iter);
}
}
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index 62baa3214cc7..457200ca6287 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -3460,7 +3460,7 @@ try_smaller_buffer:
ti->error = "Recalculate is only valid with internal hash";
goto bad;
}
- ic->recalc_wq = alloc_workqueue("dm-intergrity-recalc", WQ_MEM_RECLAIM, 1);
+ ic->recalc_wq = alloc_workqueue("dm-integrity-recalc", WQ_MEM_RECLAIM, 1);
if (!ic->recalc_wq ) {
ti->error = "Cannot allocate workqueue";
r = -ENOMEM;
diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c
index 2fc4213e02b5..671c24332802 100644
--- a/drivers/md/dm-kcopyd.c
+++ b/drivers/md/dm-kcopyd.c
@@ -56,15 +56,17 @@ struct dm_kcopyd_client {
atomic_t nr_jobs;
/*
- * We maintain three lists of jobs:
+ * We maintain four lists of jobs:
*
* i) jobs waiting for pages
* ii) jobs that have pages, and are waiting for the io to be issued.
- * iii) jobs that have completed.
+ * iii) jobs that don't need to do any IO and just run a callback
+ * iv) jobs that have completed.
*
- * All three of these are protected by job_lock.
+ * All four of these are protected by job_lock.
*/
spinlock_t job_lock;
+ struct list_head callback_jobs;
struct list_head complete_jobs;
struct list_head io_jobs;
struct list_head pages_jobs;
@@ -625,6 +627,7 @@ static void do_work(struct work_struct *work)
struct dm_kcopyd_client *kc = container_of(work,
struct dm_kcopyd_client, kcopyd_work);
struct blk_plug plug;
+ unsigned long flags;
/*
* The order that these are called is *very* important.
@@ -633,6 +636,10 @@ static void do_work(struct work_struct *work)
* list. io jobs call wake when they complete and it all
* starts again.
*/
+ spin_lock_irqsave(&kc->job_lock, flags);
+ list_splice_tail_init(&kc->callback_jobs, &kc->complete_jobs);
+ spin_unlock_irqrestore(&kc->job_lock, flags);
+
blk_start_plug(&plug);
process_jobs(&kc->complete_jobs, kc, run_complete_job);
process_jobs(&kc->pages_jobs, kc, run_pages_job);
@@ -650,7 +657,7 @@ static void dispatch_job(struct kcopyd_job *job)
struct dm_kcopyd_client *kc = job->kc;
atomic_inc(&kc->nr_jobs);
if (unlikely(!job->source.count))
- push(&kc->complete_jobs, job);
+ push(&kc->callback_jobs, job);
else if (job->pages == &zero_page_list)
push(&kc->io_jobs, job);
else
@@ -858,7 +865,7 @@ void dm_kcopyd_do_callback(void *j, int read_err, unsigned long write_err)
job->read_err = read_err;
job->write_err = write_err;
- push(&kc->complete_jobs, job);
+ push(&kc->callback_jobs, job);
wake(kc);
}
EXPORT_SYMBOL(dm_kcopyd_do_callback);
@@ -888,6 +895,7 @@ struct dm_kcopyd_client *dm_kcopyd_client_create(struct dm_kcopyd_throttle *thro
return ERR_PTR(-ENOMEM);
spin_lock_init(&kc->job_lock);
+ INIT_LIST_HEAD(&kc->callback_jobs);
INIT_LIST_HEAD(&kc->complete_jobs);
INIT_LIST_HEAD(&kc->io_jobs);
INIT_LIST_HEAD(&kc->pages_jobs);
@@ -939,6 +947,7 @@ void dm_kcopyd_client_destroy(struct dm_kcopyd_client *kc)
/* Wait for completion of all jobs submitted by this client. */
wait_event(kc->destroyq, !atomic_read(&kc->nr_jobs));
+ BUG_ON(!list_empty(&kc->callback_jobs));
BUG_ON(!list_empty(&kc->complete_jobs));
BUG_ON(!list_empty(&kc->io_jobs));
BUG_ON(!list_empty(&kc->pages_jobs));
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index 8d7ddee6ac4d..ad980a38fb1e 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -45,7 +45,7 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv)
}
ret = -EINVAL;
- if (sscanf(argv[1], "%llu%c", &tmp, &dummy) != 1) {
+ if (sscanf(argv[1], "%llu%c", &tmp, &dummy) != 1 || tmp != (sector_t)tmp) {
ti->error = "Invalid device sector";
goto bad;
}
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index d6a66921daf4..2ee5e357a0a7 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -1211,14 +1211,16 @@ static void flush_multipath_work(struct multipath *m)
set_bit(MPATHF_PG_INIT_DISABLED, &m->flags);
smp_mb__after_atomic();
- flush_workqueue(kmpath_handlerd);
+ if (atomic_read(&m->pg_init_in_progress))
+ flush_workqueue(kmpath_handlerd);
multipath_wait_for_pg_init_completion(m);
clear_bit(MPATHF_PG_INIT_DISABLED, &m->flags);
smp_mb__after_atomic();
}
- flush_workqueue(kmultipathd);
+ if (m->queue_mode == DM_TYPE_BIO_BASED)
+ flush_work(&m->process_queued_bios);
flush_work(&m->trigger_event);
}
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index e1dd1622a290..adcfe8ae10aa 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -3690,8 +3690,7 @@ static int raid_message(struct dm_target *ti, unsigned int argc, char **argv,
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
md_reap_sync_thread(mddev);
}
- } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
- test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
+ } else if (decipher_sync_action(mddev, mddev->recovery) != st_idle)
return -EBUSY;
else if (!strcasecmp(argv[0], "resync"))
; /* MD_RECOVERY_NEEDED set below */
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 79eab1071ec2..5a51151f680d 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -943,7 +943,8 @@ static int get_mirror(struct mirror_set *ms, struct dm_target *ti,
char dummy;
int ret;
- if (sscanf(argv[1], "%llu%c", &offset, &dummy) != 1) {
+ if (sscanf(argv[1], "%llu%c", &offset, &dummy) != 1 ||
+ offset != (sector_t)offset) {
ti->error = "Invalid offset";
return -EINVAL;
}
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index 7cd36e4d1310..4eb5f8c56535 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -43,7 +43,7 @@ static unsigned dm_get_blk_mq_queue_depth(void)
int dm_request_based(struct mapped_device *md)
{
- return queue_is_rq_based(md->queue);
+ return queue_is_mq(md->queue);
}
void dm_start_queue(struct request_queue *q)
@@ -128,12 +128,10 @@ static void rq_end_stats(struct mapped_device *md, struct request *orig)
* the md may be freed in dm_put() at the end of this function.
* Or do dm_get() before calling this function and dm_put() later.
*/
-static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
+static void rq_completed(struct mapped_device *md)
{
- atomic_dec(&md->pending[rw]);
-
/* nudge anyone waiting on suspend queue */
- if (!md_in_flight(md))
+ if (unlikely(waitqueue_active(&md->wait)))
wake_up(&md->wait);
/*
@@ -149,7 +147,6 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
*/
static void dm_end_request(struct request *clone, blk_status_t error)
{
- int rw = rq_data_dir(clone);
struct dm_rq_target_io *tio = clone->end_io_data;
struct mapped_device *md = tio->md;
struct request *rq = tio->orig;
@@ -159,7 +156,7 @@ static void dm_end_request(struct request *clone, blk_status_t error)
rq_end_stats(md, rq);
blk_mq_end_request(rq, error);
- rq_completed(md, rw, true);
+ rq_completed(md);
}
static void __dm_mq_kick_requeue_list(struct request_queue *q, unsigned long msecs)
@@ -183,7 +180,6 @@ static void dm_requeue_original_request(struct dm_rq_target_io *tio, bool delay_
{
struct mapped_device *md = tio->md;
struct request *rq = tio->orig;
- int rw = rq_data_dir(rq);
unsigned long delay_ms = delay_requeue ? 100 : 0;
rq_end_stats(md, rq);
@@ -193,7 +189,7 @@ static void dm_requeue_original_request(struct dm_rq_target_io *tio, bool delay_
}
dm_mq_delay_requeue_request(rq, delay_ms);
- rq_completed(md, rw, false);
+ rq_completed(md);
}
static void dm_done(struct request *clone, blk_status_t error, bool mapped)
@@ -248,15 +244,13 @@ static void dm_softirq_done(struct request *rq)
bool mapped = true;
struct dm_rq_target_io *tio = tio_from_request(rq);
struct request *clone = tio->clone;
- int rw;
if (!clone) {
struct mapped_device *md = tio->md;
rq_end_stats(md, rq);
- rw = rq_data_dir(rq);
blk_mq_end_request(rq, tio->error);
- rq_completed(md, rw, false);
+ rq_completed(md);
return;
}
@@ -378,7 +372,6 @@ static int map_request(struct dm_rq_target_io *tio)
blk_status_t ret;
r = ti->type->clone_and_map_rq(ti, rq, &tio->info, &clone);
-check_again:
switch (r) {
case DM_MAPIO_SUBMITTED:
/* The target has taken the I/O to submit by itself later */
@@ -398,8 +391,7 @@ check_again:
blk_rq_unprep_clone(clone);
tio->ti->type->release_clone_rq(clone);
tio->clone = NULL;
- r = DM_MAPIO_REQUEUE;
- goto check_again;
+ return DM_MAPIO_REQUEUE;
}
break;
case DM_MAPIO_REQUEUE:
@@ -436,7 +428,6 @@ ssize_t dm_attr_rq_based_seq_io_merge_deadline_store(struct mapped_device *md,
static void dm_start_request(struct mapped_device *md, struct request *orig)
{
blk_mq_start_request(orig);
- atomic_inc(&md->pending[rq_data_dir(orig)]);
if (unlikely(dm_stats_used(&md->stats))) {
struct dm_rq_target_io *tio = tio_from_request(orig);
@@ -510,7 +501,7 @@ static blk_status_t dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
if (map_request(tio) == DM_MAPIO_REQUEUE) {
/* Undo dm_start_request() before requeuing */
rq_end_stats(md, rq);
- rq_completed(md, rq_data_dir(rq), false);
+ rq_completed(md);
return BLK_STS_RESOURCE;
}
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index ae4b33d10924..36805b12661e 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -19,6 +19,7 @@
#include <linux/vmalloc.h>
#include <linux/log2.h>
#include <linux/dm-kcopyd.h>
+#include <linux/semaphore.h>
#include "dm.h"
@@ -105,6 +106,9 @@ struct dm_snapshot {
/* The on disk metadata handler */
struct dm_exception_store *store;
+ /* Maximum number of in-flight COW jobs. */
+ struct semaphore cow_count;
+
struct dm_kcopyd_client *kcopyd_client;
/* Wait for events based on state_bits */
@@ -145,6 +149,19 @@ struct dm_snapshot {
#define RUNNING_MERGE 0
#define SHUTDOWN_MERGE 1
+/*
+ * Maximum number of chunks being copied on write.
+ *
+ * The value was decided experimentally as a trade-off between memory
+ * consumption, stalling the kernel's workqueues and maintaining a high enough
+ * throughput.
+ */
+#define DEFAULT_COW_THRESHOLD 2048
+
+static int cow_threshold = DEFAULT_COW_THRESHOLD;
+module_param_named(snapshot_cow_threshold, cow_threshold, int, 0644);
+MODULE_PARM_DESC(snapshot_cow_threshold, "Maximum number of chunks being copied on write");
+
DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle,
"A percentage of time allocated for copy on write");
@@ -1190,6 +1207,8 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
goto bad_hash_tables;
}
+ sema_init(&s->cow_count, (cow_threshold > 0) ? cow_threshold : INT_MAX);
+
s->kcopyd_client = dm_kcopyd_client_create(&dm_kcopyd_throttle);
if (IS_ERR(s->kcopyd_client)) {
r = PTR_ERR(s->kcopyd_client);
@@ -1575,6 +1594,7 @@ static void copy_callback(int read_err, unsigned long write_err, void *context)
rb_link_node(&pe->out_of_order_node, parent, p);
rb_insert_color(&pe->out_of_order_node, &s->out_of_order_tree);
}
+ up(&s->cow_count);
}
/*
@@ -1598,6 +1618,7 @@ static void start_copy(struct dm_snap_pending_exception *pe)
dest.count = src.count;
/* Hand over to kcopyd */
+ down(&s->cow_count);
dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, copy_callback, pe);
}
@@ -1617,6 +1638,7 @@ static void start_full_bio(struct dm_snap_pending_exception *pe,
pe->full_bio = bio;
pe->full_bio_end_io = bio->bi_end_io;
+ down(&s->cow_count);
callback_data = dm_kcopyd_prepare_callback(s->kcopyd_client,
copy_callback, pe);
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 9038c302d5c2..4b1be754cc41 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -919,12 +919,12 @@ static int device_is_rq_based(struct dm_target *ti, struct dm_dev *dev,
struct request_queue *q = bdev_get_queue(dev->bdev);
struct verify_rq_based_data *v = data;
- if (q->mq_ops)
+ if (queue_is_mq(q))
v->mq_count++;
else
v->sq_count++;
- return queue_is_rq_based(q);
+ return queue_is_mq(q);
}
static int dm_table_determine_type(struct dm_table *t)
@@ -1927,6 +1927,9 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
*/
if (blk_queue_is_zoned(q))
blk_revalidate_disk_zones(t->md->disk);
+
+ /* Allow reads to exceed readahead limits */
+ q->backing_dev_info->io_pages = limits->max_sectors >> (PAGE_SHIFT - 9);
}
unsigned int dm_table_get_num_targets(struct dm_table *t)
diff --git a/drivers/md/dm-unstripe.c b/drivers/md/dm-unstripe.c
index 954b7ab4e684..e673dacf6418 100644
--- a/drivers/md/dm-unstripe.c
+++ b/drivers/md/dm-unstripe.c
@@ -78,7 +78,7 @@ static int unstripe_ctr(struct dm_target *ti, unsigned int argc, char **argv)
goto err;
}
- if (sscanf(argv[4], "%llu%c", &start, &dummy) != 1) {
+ if (sscanf(argv[4], "%llu%c", &start, &dummy) != 1 || start != (sector_t)start) {
ti->error = "Invalid striped device offset";
goto err;
}
diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index fc65f0dedf7f..f4c31ffaa88e 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -1040,6 +1040,15 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
v->tfm = NULL;
goto bad;
}
+
+ /*
+ * dm-verity performance can vary greatly depending on which hash
+ * algorithm implementation is used. Help people debug performance
+ * problems by logging the ->cra_driver_name.
+ */
+ DMINFO("%s using implementation \"%s\"", v->alg_name,
+ crypto_hash_alg_common(v->tfm)->base.cra_driver_name);
+
v->digest_size = crypto_ahash_digestsize(v->tfm);
if ((1 << v->hash_dev_block_bits) < v->digest_size * 2) {
ti->error = "Digest size too big";
diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index 2d50eec94cd7..2b8cee35e4d5 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -2061,7 +2061,7 @@ invalid_optional:
if (IS_ERR(wc->flush_thread)) {
r = PTR_ERR(wc->flush_thread);
wc->flush_thread = NULL;
- ti->error = "Couldn't spawn endio thread";
+ ti->error = "Couldn't spawn flush thread";
goto bad;
}
wake_up_process(wc->flush_thread);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 63a7c416b224..d67c95ef8d7e 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -646,26 +646,38 @@ static void free_tio(struct dm_target_io *tio)
bio_put(&tio->clone);
}
-int md_in_flight(struct mapped_device *md)
+static bool md_in_flight_bios(struct mapped_device *md)
{
- return atomic_read(&md->pending[READ]) +
- atomic_read(&md->pending[WRITE]);
+ int cpu;
+ struct hd_struct *part = &dm_disk(md)->part0;
+ long sum = 0;
+
+ for_each_possible_cpu(cpu) {
+ sum += part_stat_local_read_cpu(part, in_flight[0], cpu);
+ sum += part_stat_local_read_cpu(part, in_flight[1], cpu);
+ }
+
+ return sum != 0;
+}
+
+static bool md_in_flight(struct mapped_device *md)
+{
+ if (queue_is_mq(md->queue))
+ return blk_mq_queue_inflight(md->queue);
+ else
+ return md_in_flight_bios(md);
}
static void start_io_acct(struct dm_io *io)
{
struct mapped_device *md = io->md;
struct bio *bio = io->orig_bio;
- int rw = bio_data_dir(bio);
io->start_time = jiffies;
generic_start_io_acct(md->queue, bio_op(bio), bio_sectors(bio),
&dm_disk(md)->part0);
- atomic_set(&dm_disk(md)->part0.in_flight[rw],
- atomic_inc_return(&md->pending[rw]));
-
if (unlikely(dm_stats_used(&md->stats)))
dm_stats_account_io(&md->stats, bio_data_dir(bio),
bio->bi_iter.bi_sector, bio_sectors(bio),
@@ -677,8 +689,6 @@ static void end_io_acct(struct dm_io *io)
struct mapped_device *md = io->md;
struct bio *bio = io->orig_bio;
unsigned long duration = jiffies - io->start_time;
- int pending;
- int rw = bio_data_dir(bio);
generic_end_io_acct(md->queue, bio_op(bio), &dm_disk(md)->part0,
io->start_time);
@@ -688,16 +698,8 @@ static void end_io_acct(struct dm_io *io)
bio->bi_iter.bi_sector, bio_sectors(bio),
true, duration, &io->stats_aux);
- /*
- * After this is decremented the bio must not be touched if it is
- * a flush.
- */
- pending = atomic_dec_return(&md->pending[rw]);
- atomic_set(&dm_disk(md)->part0.in_flight[rw], pending);
- pending += atomic_read(&md->pending[rw^0x1]);
-
/* nudge anyone waiting on suspend queue */
- if (!pending)
+ if (unlikely(waitqueue_active(&md->wait)))
wake_up(&md->wait);
}
@@ -1417,10 +1419,21 @@ static int __send_empty_flush(struct clone_info *ci)
unsigned target_nr = 0;
struct dm_target *ti;
+ /*
+ * Empty flush uses a statically initialized bio, as the base for
+ * cloning. However, blkg association requires that a bdev is
+ * associated with a gendisk, which doesn't happen until the bdev is
+ * opened. So, blkg association is done at issue time of the flush
+ * rather than when the device is created in alloc_dev().
+ */
+ bio_set_dev(ci->bio, ci->io->md->bdev);
+
BUG_ON(bio_has_data(ci->bio));
while ((ti = dm_table_get_target(ci->map, target_nr++)))
__send_duplicate_bios(ci, ti, ti->num_flush_bios, NULL);
+ bio_disassociate_blkg(ci->bio);
+
return 0;
}
@@ -1473,11 +1486,9 @@ static bool is_split_required_for_discard(struct dm_target *ti)
}
static int __send_changing_extent_only(struct clone_info *ci, struct dm_target *ti,
- get_num_bios_fn get_num_bios,
- is_split_required_fn is_split_required)
+ unsigned num_bios, bool is_split_required)
{
unsigned len;
- unsigned num_bios;
/*
* Even though the device advertised support for this type of
@@ -1485,11 +1496,10 @@ static int __send_changing_extent_only(struct clone_info *ci, struct dm_target *
* reconfiguration might also have changed that since the
* check was performed.
*/
- num_bios = get_num_bios ? get_num_bios(ti) : 0;
if (!num_bios)
return -EOPNOTSUPP;
- if (is_split_required && !is_split_required(ti))
+ if (!is_split_required)
len = min((sector_t)ci->sector_count, max_io_len_target_boundary(ci->sector, ti));
else
len = min((sector_t)ci->sector_count, max_io_len(ci->sector, ti));
@@ -1504,23 +1514,23 @@ static int __send_changing_extent_only(struct clone_info *ci, struct dm_target *
static int __send_discard(struct clone_info *ci, struct dm_target *ti)
{
- return __send_changing_extent_only(ci, ti, get_num_discard_bios,
- is_split_required_for_discard);
+ return __send_changing_extent_only(ci, ti, get_num_discard_bios(ti),
+ is_split_required_for_discard(ti));
}
static int __send_secure_erase(struct clone_info *ci, struct dm_target *ti)
{
- return __send_changing_extent_only(ci, ti, get_num_secure_erase_bios, NULL);
+ return __send_changing_extent_only(ci, ti, get_num_secure_erase_bios(ti), false);
}
static int __send_write_same(struct clone_info *ci, struct dm_target *ti)
{
- return __send_changing_extent_only(ci, ti, get_num_write_same_bios, NULL);
+ return __send_changing_extent_only(ci, ti, get_num_write_same_bios(ti), false);
}
static int __send_write_zeroes(struct clone_info *ci, struct dm_target *ti)
{
- return __send_changing_extent_only(ci, ti, get_num_write_zeroes_bios, NULL);
+ return __send_changing_extent_only(ci, ti, get_num_write_zeroes_bios(ti), false);
}
static bool __process_abnormal_io(struct clone_info *ci, struct dm_target *ti,
@@ -1598,7 +1608,16 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md,
init_clone_info(&ci, md, map, bio);
if (bio->bi_opf & REQ_PREFLUSH) {
- ci.bio = &ci.io->md->flush_bio;
+ struct bio flush_bio;
+
+ /*
+ * Use an on-stack bio for this, it's safe since we don't
+ * need to reference it after submit. It's just used as
+ * the basis for the clone(s).
+ */
+ bio_init(&flush_bio, NULL, 0);
+ flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC;
+ ci.bio = &flush_bio;
ci.sector_count = 0;
error = __send_empty_flush(&ci);
/* dec_pending submits any data associated with flush */
@@ -1654,7 +1673,16 @@ static blk_qc_t __process_bio(struct mapped_device *md,
init_clone_info(&ci, md, map, bio);
if (bio->bi_opf & REQ_PREFLUSH) {
- ci.bio = &ci.io->md->flush_bio;
+ struct bio flush_bio;
+
+ /*
+ * Use an on-stack bio for this, it's safe since we don't
+ * need to reference it after submit. It's just used as
+ * the basis for the clone(s).
+ */
+ bio_init(&flush_bio, NULL, 0);
+ flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC;
+ ci.bio = &flush_bio;
ci.sector_count = 0;
error = __send_empty_flush(&ci);
/* dec_pending submits any data associated with flush */
@@ -1685,10 +1713,7 @@ out:
return ret;
}
-typedef blk_qc_t (process_bio_fn)(struct mapped_device *, struct dm_table *, struct bio *);
-
-static blk_qc_t __dm_make_request(struct request_queue *q, struct bio *bio,
- process_bio_fn process_bio)
+static blk_qc_t dm_make_request(struct request_queue *q, struct bio *bio)
{
struct mapped_device *md = q->queuedata;
blk_qc_t ret = BLK_QC_T_NONE;
@@ -1708,26 +1733,15 @@ static blk_qc_t __dm_make_request(struct request_queue *q, struct bio *bio,
return ret;
}
- ret = process_bio(md, map, bio);
+ if (dm_get_md_type(md) == DM_TYPE_NVME_BIO_BASED)
+ ret = __process_bio(md, map, bio);
+ else
+ ret = __split_and_process_bio(md, map, bio);
dm_put_live_table(md, srcu_idx);
return ret;
}
-/*
- * The request function that remaps the bio to one target and
- * splits off any remainder.
- */
-static blk_qc_t dm_make_request(struct request_queue *q, struct bio *bio)
-{
- return __dm_make_request(q, bio, __split_and_process_bio);
-}
-
-static blk_qc_t dm_make_request_nvme(struct request_queue *q, struct bio *bio)
-{
- return __dm_make_request(q, bio, __process_bio);
-}
-
static int dm_any_congested(void *congested_data, int bdi_bits)
{
int r = bdi_bits;
@@ -1898,7 +1912,7 @@ static struct mapped_device *alloc_dev(int minor)
INIT_LIST_HEAD(&md->table_devices);
spin_lock_init(&md->uevent_lock);
- md->queue = blk_alloc_queue_node(GFP_KERNEL, numa_node_id, NULL);
+ md->queue = blk_alloc_queue_node(GFP_KERNEL, numa_node_id);
if (!md->queue)
goto bad;
md->queue->queuedata = md;
@@ -1908,8 +1922,6 @@ static struct mapped_device *alloc_dev(int minor)
if (!md->disk)
goto bad;
- atomic_set(&md->pending[0], 0);
- atomic_set(&md->pending[1], 0);
init_waitqueue_head(&md->wait);
INIT_WORK(&md->work, dm_wq_work);
init_waitqueue_head(&md->eventq);
@@ -1940,10 +1952,6 @@ static struct mapped_device *alloc_dev(int minor)
if (!md->bdev)
goto bad;
- bio_init(&md->flush_bio, NULL, 0);
- bio_set_dev(&md->flush_bio, md->bdev);
- md->flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC;
-
dm_stats_init(&md->stats);
/* Populate the mapping, nobody knows we exist yet */
@@ -2221,12 +2229,9 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t)
break;
case DM_TYPE_BIO_BASED:
case DM_TYPE_DAX_BIO_BASED:
- dm_init_normal_md_queue(md);
- blk_queue_make_request(md->queue, dm_make_request);
- break;
case DM_TYPE_NVME_BIO_BASED:
dm_init_normal_md_queue(md);
- blk_queue_make_request(md->queue, dm_make_request_nvme);
+ blk_queue_make_request(md->queue, dm_make_request);
break;
case DM_TYPE_NONE:
WARN_ON_ONCE(true);
diff --git a/drivers/md/md.c b/drivers/md/md.c
index fc488cb30a94..9a0a1e0934d5 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -334,7 +334,6 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
const int sgrp = op_stat_group(bio_op(bio));
struct mddev *mddev = q->queuedata;
unsigned int sectors;
- int cpu;
blk_queue_split(q, &bio);
@@ -359,9 +358,9 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
md_handle_request(mddev, bio);
- cpu = part_stat_lock();
- part_stat_inc(cpu, &mddev->gendisk->part0, ios[sgrp]);
- part_stat_add(cpu, &mddev->gendisk->part0, sectors[sgrp], sectors);
+ part_stat_lock();
+ part_stat_inc(&mddev->gendisk->part0, ios[sgrp]);
+ part_stat_add(&mddev->gendisk->part0, sectors[sgrp], sectors);
part_stat_unlock();
return BLK_QC_T_NONE;
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index ac1cffd2a09b..f3fb5bb8c82a 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -542,7 +542,7 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio)
!discard_bio)
continue;
bio_chain(discard_bio, bio);
- bio_clone_blkcg_association(discard_bio, bio);
+ bio_clone_blkg_association(discard_bio, bio);
if (mddev->gendisk)
trace_block_bio_remap(bdev_get_queue(rdev->bdev),
discard_bio, disk_devt(mddev->gendisk),
diff --git a/drivers/memstick/core/memstick.c b/drivers/memstick/core/memstick.c
index 76382c858c35..1246d69ba187 100644
--- a/drivers/memstick/core/memstick.c
+++ b/drivers/memstick/core/memstick.c
@@ -18,6 +18,7 @@
#include <linux/delay.h>
#include <linux/slab.h>
#include <linux/module.h>
+#include <linux/pm_runtime.h>
#define DRIVER_NAME "memstick"
@@ -436,6 +437,7 @@ static void memstick_check(struct work_struct *work)
struct memstick_dev *card;
dev_dbg(&host->dev, "memstick_check started\n");
+ pm_runtime_get_noresume(host->dev.parent);
mutex_lock(&host->lock);
if (!host->card) {
if (memstick_power_on(host))
@@ -479,6 +481,7 @@ out_power_off:
host->set_param(host, MEMSTICK_POWER, MEMSTICK_POWER_OFF);
mutex_unlock(&host->lock);
+ pm_runtime_put(host->dev.parent);
dev_dbg(&host->dev, "memstick_check finished\n");
}
diff --git a/drivers/memstick/core/ms_block.c b/drivers/memstick/core/ms_block.c
index 8a02f11076f9..82daccc9ea62 100644
--- a/drivers/memstick/core/ms_block.c
+++ b/drivers/memstick/core/ms_block.c
@@ -15,7 +15,7 @@
#define pr_fmt(fmt) DRIVER_NAME ": " fmt
#include <linux/module.h>
-#include <linux/blkdev.h>
+#include <linux/blk-mq.h>
#include <linux/memstick.h>
#include <linux/idr.h>
#include <linux/hdreg.h>
@@ -1873,69 +1873,65 @@ static void msb_io_work(struct work_struct *work)
struct msb_data *msb = container_of(work, struct msb_data, io_work);
int page, error, len;
sector_t lba;
- unsigned long flags;
struct scatterlist *sg = msb->prealloc_sg;
+ struct request *req;
dbg_verbose("IO: work started");
while (1) {
- spin_lock_irqsave(&msb->q_lock, flags);
+ spin_lock_irq(&msb->q_lock);
if (msb->need_flush_cache) {
msb->need_flush_cache = false;
- spin_unlock_irqrestore(&msb->q_lock, flags);
+ spin_unlock_irq(&msb->q_lock);
msb_cache_flush(msb);
continue;
}
- if (!msb->req) {
- msb->req = blk_fetch_request(msb->queue);
- if (!msb->req) {
- dbg_verbose("IO: no more requests exiting");
- spin_unlock_irqrestore(&msb->q_lock, flags);
- return;
- }
+ req = msb->req;
+ if (!req) {
+ dbg_verbose("IO: no more requests exiting");
+ spin_unlock_irq(&msb->q_lock);
+ return;
}
- spin_unlock_irqrestore(&msb->q_lock, flags);
-
- /* If card was removed meanwhile */
- if (!msb->req)
- return;
+ spin_unlock_irq(&msb->q_lock);
/* process the request */
dbg_verbose("IO: processing new request");
- blk_rq_map_sg(msb->queue, msb->req, sg);
+ blk_rq_map_sg(msb->queue, req, sg);
- lba = blk_rq_pos(msb->req);
+ lba = blk_rq_pos(req);
sector_div(lba, msb->page_size / 512);
page = sector_div(lba, msb->pages_in_block);
if (rq_data_dir(msb->req) == READ)
error = msb_do_read_request(msb, lba, page, sg,
- blk_rq_bytes(msb->req), &len);
+ blk_rq_bytes(req), &len);
else
error = msb_do_write_request(msb, lba, page, sg,
- blk_rq_bytes(msb->req), &len);
-
- spin_lock_irqsave(&msb->q_lock, flags);
+ blk_rq_bytes(req), &len);
- if (len)
- if (!__blk_end_request(msb->req, BLK_STS_OK, len))
- msb->req = NULL;
+ if (len && !blk_update_request(req, BLK_STS_OK, len)) {
+ __blk_mq_end_request(req, BLK_STS_OK);
+ spin_lock_irq(&msb->q_lock);
+ msb->req = NULL;
+ spin_unlock_irq(&msb->q_lock);
+ }
if (error && msb->req) {
blk_status_t ret = errno_to_blk_status(error);
+
dbg_verbose("IO: ending one sector of the request with error");
- if (!__blk_end_request(msb->req, ret, msb->page_size))
- msb->req = NULL;
+ blk_mq_end_request(req, ret);
+ spin_lock_irq(&msb->q_lock);
+ msb->req = NULL;
+ spin_unlock_irq(&msb->q_lock);
}
if (msb->req)
dbg_verbose("IO: request still pending");
-
- spin_unlock_irqrestore(&msb->q_lock, flags);
}
}
@@ -2002,29 +1998,40 @@ static int msb_bd_getgeo(struct block_device *bdev,
return 0;
}
-static void msb_submit_req(struct request_queue *q)
+static blk_status_t msb_queue_rq(struct blk_mq_hw_ctx *hctx,
+ const struct blk_mq_queue_data *bd)
{
- struct memstick_dev *card = q->queuedata;
+ struct memstick_dev *card = hctx->queue->queuedata;
struct msb_data *msb = memstick_get_drvdata(card);
- struct request *req = NULL;
+ struct request *req = bd->rq;
dbg_verbose("Submit request");
+ spin_lock_irq(&msb->q_lock);
+
if (msb->card_dead) {
dbg("Refusing requests on removed card");
WARN_ON(!msb->io_queue_stopped);
- while ((req = blk_fetch_request(q)) != NULL)
- __blk_end_request_all(req, BLK_STS_IOERR);
- return;
+ spin_unlock_irq(&msb->q_lock);
+ blk_mq_start_request(req);
+ return BLK_STS_IOERR;
}
- if (msb->req)
- return;
+ if (msb->req) {
+ spin_unlock_irq(&msb->q_lock);
+ return BLK_STS_DEV_RESOURCE;
+ }
+
+ blk_mq_start_request(req);
+ msb->req = req;
if (!msb->io_queue_stopped)
queue_work(msb->io_queue, &msb->io_work);
+
+ spin_unlock_irq(&msb->q_lock);
+ return BLK_STS_OK;
}
static int msb_check_card(struct memstick_dev *card)
@@ -2040,21 +2047,20 @@ static void msb_stop(struct memstick_dev *card)
dbg("Stopping all msblock IO");
+ blk_mq_stop_hw_queues(msb->queue);
spin_lock_irqsave(&msb->q_lock, flags);
- blk_stop_queue(msb->queue);
msb->io_queue_stopped = true;
spin_unlock_irqrestore(&msb->q_lock, flags);
del_timer_sync(&msb->cache_flush_timer);
flush_workqueue(msb->io_queue);
+ spin_lock_irqsave(&msb->q_lock, flags);
if (msb->req) {
- spin_lock_irqsave(&msb->q_lock, flags);
- blk_requeue_request(msb->queue, msb->req);
+ blk_mq_requeue_request(msb->req, false);
msb->req = NULL;
- spin_unlock_irqrestore(&msb->q_lock, flags);
}
-
+ spin_unlock_irqrestore(&msb->q_lock, flags);
}
static void msb_start(struct memstick_dev *card)
@@ -2077,9 +2083,7 @@ static void msb_start(struct memstick_dev *card)
msb->need_flush_cache = true;
msb->io_queue_stopped = false;
- spin_lock_irqsave(&msb->q_lock, flags);
- blk_start_queue(msb->queue);
- spin_unlock_irqrestore(&msb->q_lock, flags);
+ blk_mq_start_hw_queues(msb->queue);
queue_work(msb->io_queue, &msb->io_work);
@@ -2092,6 +2096,10 @@ static const struct block_device_operations msb_bdops = {
.owner = THIS_MODULE
};
+static const struct blk_mq_ops msb_mq_ops = {
+ .queue_rq = msb_queue_rq,
+};
+
/* Registers the block device */
static int msb_init_disk(struct memstick_dev *card)
{
@@ -2112,9 +2120,11 @@ static int msb_init_disk(struct memstick_dev *card)
goto out_release_id;
}
- msb->queue = blk_init_queue(msb_submit_req, &msb->q_lock);
- if (!msb->queue) {
- rc = -ENOMEM;
+ msb->queue = blk_mq_init_sq_queue(&msb->tag_set, &msb_mq_ops, 2,
+ BLK_MQ_F_SHOULD_MERGE);
+ if (IS_ERR(msb->queue)) {
+ rc = PTR_ERR(msb->queue);
+ msb->queue = NULL;
goto out_put_disk;
}
@@ -2202,12 +2212,13 @@ static void msb_remove(struct memstick_dev *card)
/* Take care of unhandled + new requests from now on */
spin_lock_irqsave(&msb->q_lock, flags);
msb->card_dead = true;
- blk_start_queue(msb->queue);
spin_unlock_irqrestore(&msb->q_lock, flags);
+ blk_mq_start_hw_queues(msb->queue);
/* Remove the disk */
del_gendisk(msb->disk);
blk_cleanup_queue(msb->queue);
+ blk_mq_free_tag_set(&msb->tag_set);
msb->queue = NULL;
mutex_lock(&msb_disk_lock);
diff --git a/drivers/memstick/core/ms_block.h b/drivers/memstick/core/ms_block.h
index 53962c3b21df..9ba84e0ced63 100644
--- a/drivers/memstick/core/ms_block.h
+++ b/drivers/memstick/core/ms_block.h
@@ -152,6 +152,7 @@ struct msb_data {
struct gendisk *disk;
struct request_queue *queue;
spinlock_t q_lock;
+ struct blk_mq_tag_set tag_set;
struct hd_geometry geometry;
struct attribute_group attr_group;
struct request *req;
diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c
index 0cd30dcb6801..aba50ec98b4d 100644
--- a/drivers/memstick/core/mspro_block.c
+++ b/drivers/memstick/core/mspro_block.c
@@ -12,7 +12,7 @@
*
*/
-#include <linux/blkdev.h>
+#include <linux/blk-mq.h>
#include <linux/idr.h>
#include <linux/hdreg.h>
#include <linux/kthread.h>
@@ -142,6 +142,7 @@ struct mspro_block_data {
struct gendisk *disk;
struct request_queue *queue;
struct request *block_req;
+ struct blk_mq_tag_set tag_set;
spinlock_t q_lock;
unsigned short page_size;
@@ -152,7 +153,6 @@ struct mspro_block_data {
unsigned char system;
unsigned char read_only:1,
eject:1,
- has_request:1,
data_dir:1,
active:1;
unsigned char transfer_cmd;
@@ -694,13 +694,12 @@ static void h_mspro_block_setup_cmd(struct memstick_dev *card, u64 offset,
/*** Data transfer ***/
-static int mspro_block_issue_req(struct memstick_dev *card, int chunk)
+static int mspro_block_issue_req(struct memstick_dev *card, bool chunk)
{
struct mspro_block_data *msb = memstick_get_drvdata(card);
u64 t_off;
unsigned int count;
-try_again:
while (chunk) {
msb->current_page = 0;
msb->current_seg = 0;
@@ -709,9 +708,17 @@ try_again:
msb->req_sg);
if (!msb->seg_count) {
- chunk = __blk_end_request_cur(msb->block_req,
- BLK_STS_RESOURCE);
- continue;
+ unsigned int bytes = blk_rq_cur_bytes(msb->block_req);
+
+ chunk = blk_update_request(msb->block_req,
+ BLK_STS_RESOURCE,
+ bytes);
+ if (chunk)
+ continue;
+ __blk_mq_end_request(msb->block_req,
+ BLK_STS_RESOURCE);
+ msb->block_req = NULL;
+ break;
}
t_off = blk_rq_pos(msb->block_req);
@@ -729,30 +736,22 @@ try_again:
return 0;
}
- dev_dbg(&card->dev, "blk_fetch\n");
- msb->block_req = blk_fetch_request(msb->queue);
- if (!msb->block_req) {
- dev_dbg(&card->dev, "issue end\n");
- return -EAGAIN;
- }
-
- dev_dbg(&card->dev, "trying again\n");
- chunk = 1;
- goto try_again;
+ return 1;
}
static int mspro_block_complete_req(struct memstick_dev *card, int error)
{
struct mspro_block_data *msb = memstick_get_drvdata(card);
- int chunk, cnt;
+ int cnt;
+ bool chunk;
unsigned int t_len = 0;
unsigned long flags;
spin_lock_irqsave(&msb->q_lock, flags);
- dev_dbg(&card->dev, "complete %d, %d\n", msb->has_request ? 1 : 0,
+ dev_dbg(&card->dev, "complete %d, %d\n", msb->block_req ? 1 : 0,
error);
- if (msb->has_request) {
+ if (msb->block_req) {
/* Nothing to do - not really an error */
if (error == -EAGAIN)
error = 0;
@@ -777,15 +776,17 @@ static int mspro_block_complete_req(struct memstick_dev *card, int error)
if (error && !t_len)
t_len = blk_rq_cur_bytes(msb->block_req);
- chunk = __blk_end_request(msb->block_req,
+ chunk = blk_update_request(msb->block_req,
errno_to_blk_status(error), t_len);
-
- error = mspro_block_issue_req(card, chunk);
-
- if (!error)
- goto out;
- else
- msb->has_request = 0;
+ if (chunk) {
+ error = mspro_block_issue_req(card, chunk);
+ if (!error)
+ goto out;
+ } else {
+ __blk_mq_end_request(msb->block_req,
+ errno_to_blk_status(error));
+ msb->block_req = NULL;
+ }
} else {
if (!error)
error = -EAGAIN;
@@ -806,8 +807,8 @@ static void mspro_block_stop(struct memstick_dev *card)
while (1) {
spin_lock_irqsave(&msb->q_lock, flags);
- if (!msb->has_request) {
- blk_stop_queue(msb->queue);
+ if (!msb->block_req) {
+ blk_mq_stop_hw_queues(msb->queue);
rc = 1;
}
spin_unlock_irqrestore(&msb->q_lock, flags);
@@ -822,32 +823,37 @@ static void mspro_block_stop(struct memstick_dev *card)
static void mspro_block_start(struct memstick_dev *card)
{
struct mspro_block_data *msb = memstick_get_drvdata(card);
- unsigned long flags;
- spin_lock_irqsave(&msb->q_lock, flags);
- blk_start_queue(msb->queue);
- spin_unlock_irqrestore(&msb->q_lock, flags);
+ blk_mq_start_hw_queues(msb->queue);
}
-static void mspro_block_submit_req(struct request_queue *q)
+static blk_status_t mspro_queue_rq(struct blk_mq_hw_ctx *hctx,
+ const struct blk_mq_queue_data *bd)
{
- struct memstick_dev *card = q->queuedata;
+ struct memstick_dev *card = hctx->queue->queuedata;
struct mspro_block_data *msb = memstick_get_drvdata(card);
- struct request *req = NULL;
- if (msb->has_request)
- return;
+ spin_lock_irq(&msb->q_lock);
- if (msb->eject) {
- while ((req = blk_fetch_request(q)) != NULL)
- __blk_end_request_all(req, BLK_STS_IOERR);
+ if (msb->block_req) {
+ spin_unlock_irq(&msb->q_lock);
+ return BLK_STS_DEV_RESOURCE;
+ }
- return;
+ if (msb->eject) {
+ spin_unlock_irq(&msb->q_lock);
+ blk_mq_start_request(bd->rq);
+ return BLK_STS_IOERR;
}
- msb->has_request = 1;
- if (mspro_block_issue_req(card, 0))
- msb->has_request = 0;
+ msb->block_req = bd->rq;
+ blk_mq_start_request(bd->rq);
+
+ if (mspro_block_issue_req(card, true))
+ msb->block_req = NULL;
+
+ spin_unlock_irq(&msb->q_lock);
+ return BLK_STS_OK;
}
/*** Initialization ***/
@@ -1167,6 +1173,10 @@ static int mspro_block_init_card(struct memstick_dev *card)
}
+static const struct blk_mq_ops mspro_mq_ops = {
+ .queue_rq = mspro_queue_rq,
+};
+
static int mspro_block_init_disk(struct memstick_dev *card)
{
struct mspro_block_data *msb = memstick_get_drvdata(card);
@@ -1206,9 +1216,11 @@ static int mspro_block_init_disk(struct memstick_dev *card)
goto out_release_id;
}
- msb->queue = blk_init_queue(mspro_block_submit_req, &msb->q_lock);
- if (!msb->queue) {
- rc = -ENOMEM;
+ msb->queue = blk_mq_init_sq_queue(&msb->tag_set, &mspro_mq_ops, 2,
+ BLK_MQ_F_SHOULD_MERGE);
+ if (IS_ERR(msb->queue)) {
+ rc = PTR_ERR(msb->queue);
+ msb->queue = NULL;
goto out_put_disk;
}
@@ -1318,13 +1330,14 @@ static void mspro_block_remove(struct memstick_dev *card)
spin_lock_irqsave(&msb->q_lock, flags);
msb->eject = 1;
- blk_start_queue(msb->queue);
spin_unlock_irqrestore(&msb->q_lock, flags);
+ blk_mq_start_hw_queues(msb->queue);
del_gendisk(msb->disk);
dev_dbg(&card->dev, "mspro block remove\n");
blk_cleanup_queue(msb->queue);
+ blk_mq_free_tag_set(&msb->tag_set);
msb->queue = NULL;
sysfs_remove_group(&card->dev.kobj, &msb->attr_group);
@@ -1344,8 +1357,9 @@ static int mspro_block_suspend(struct memstick_dev *card, pm_message_t state)
struct mspro_block_data *msb = memstick_get_drvdata(card);
unsigned long flags;
+ blk_mq_stop_hw_queues(msb->queue);
+
spin_lock_irqsave(&msb->q_lock, flags);
- blk_stop_queue(msb->queue);
msb->active = 0;
spin_unlock_irqrestore(&msb->q_lock, flags);
@@ -1355,7 +1369,6 @@ static int mspro_block_suspend(struct memstick_dev *card, pm_message_t state)
static int mspro_block_resume(struct memstick_dev *card)
{
struct mspro_block_data *msb = memstick_get_drvdata(card);
- unsigned long flags;
int rc = 0;
#ifdef CONFIG_MEMSTICK_UNSAFE_RESUME
@@ -1401,9 +1414,7 @@ out_unlock:
#endif /* CONFIG_MEMSTICK_UNSAFE_RESUME */
- spin_lock_irqsave(&msb->q_lock, flags);
- blk_start_queue(msb->queue);
- spin_unlock_irqrestore(&msb->q_lock, flags);
+ blk_mq_start_hw_queues(msb->queue);
return rc;
}
diff --git a/drivers/memstick/host/rtsx_usb_ms.c b/drivers/memstick/host/rtsx_usb_ms.c
index 4f64563df7de..97308dc28ccf 100644
--- a/drivers/memstick/host/rtsx_usb_ms.c
+++ b/drivers/memstick/host/rtsx_usb_ms.c
@@ -40,15 +40,14 @@ struct rtsx_usb_ms {
struct mutex host_mutex;
struct work_struct handle_req;
-
- struct task_struct *detect_ms;
- struct completion detect_ms_exit;
+ struct delayed_work poll_card;
u8 ssc_depth;
unsigned int clock;
int power_mode;
unsigned char ifmode;
bool eject;
+ bool system_suspending;
};
static inline struct device *ms_dev(struct rtsx_usb_ms *host)
@@ -545,7 +544,7 @@ static void rtsx_usb_ms_handle_req(struct work_struct *work)
host->req->error);
}
} while (!rc);
- pm_runtime_put(ms_dev(host));
+ pm_runtime_put_sync(ms_dev(host));
}
}
@@ -585,14 +584,14 @@ static int rtsx_usb_ms_set_param(struct memstick_host *msh,
break;
if (value == MEMSTICK_POWER_ON) {
- pm_runtime_get_sync(ms_dev(host));
+ pm_runtime_get_noresume(ms_dev(host));
err = ms_power_on(host);
+ if (err)
+ pm_runtime_put_noidle(ms_dev(host));
} else if (value == MEMSTICK_POWER_OFF) {
err = ms_power_off(host);
- if (host->msh->card)
+ if (!err)
pm_runtime_put_noidle(ms_dev(host));
- else
- pm_runtime_put(ms_dev(host));
} else
err = -EINVAL;
if (!err)
@@ -638,12 +637,16 @@ static int rtsx_usb_ms_set_param(struct memstick_host *msh,
}
out:
mutex_unlock(&ucr->dev_mutex);
- pm_runtime_put(ms_dev(host));
+ pm_runtime_put_sync(ms_dev(host));
/* power-on delay */
- if (param == MEMSTICK_POWER && value == MEMSTICK_POWER_ON)
+ if (param == MEMSTICK_POWER && value == MEMSTICK_POWER_ON) {
usleep_range(10000, 12000);
+ if (!host->eject)
+ schedule_delayed_work(&host->poll_card, 100);
+ }
+
dev_dbg(ms_dev(host), "%s: return = %d\n", __func__, err);
return err;
}
@@ -654,9 +657,24 @@ static int rtsx_usb_ms_suspend(struct device *dev)
struct rtsx_usb_ms *host = dev_get_drvdata(dev);
struct memstick_host *msh = host->msh;
- dev_dbg(ms_dev(host), "--> %s\n", __func__);
+ /* Since we use rtsx_usb's resume callback to runtime resume its
+ * children to implement remote wakeup signaling, this causes
+ * rtsx_usb_ms' runtime resume callback runs after its suspend
+ * callback:
+ * rtsx_usb_ms_suspend()
+ * rtsx_usb_resume()
+ * -> rtsx_usb_ms_runtime_resume()
+ * -> memstick_detect_change()
+ *
+ * rtsx_usb_suspend()
+ *
+ * To avoid this, skip runtime resume/suspend if system suspend is
+ * underway.
+ */
+ host->system_suspending = true;
memstick_suspend_host(msh);
+
return 0;
}
@@ -665,58 +683,85 @@ static int rtsx_usb_ms_resume(struct device *dev)
struct rtsx_usb_ms *host = dev_get_drvdata(dev);
struct memstick_host *msh = host->msh;
- dev_dbg(ms_dev(host), "--> %s\n", __func__);
-
memstick_resume_host(msh);
+ host->system_suspending = false;
+
return 0;
}
#endif /* CONFIG_PM_SLEEP */
-/*
- * Thread function of ms card slot detection. The thread starts right after
- * successful host addition. It stops while the driver removal function sets
- * host->eject true.
- */
-static int rtsx_usb_detect_ms_card(void *__host)
+#ifdef CONFIG_PM
+static int rtsx_usb_ms_runtime_suspend(struct device *dev)
{
- struct rtsx_usb_ms *host = (struct rtsx_usb_ms *)__host;
+ struct rtsx_usb_ms *host = dev_get_drvdata(dev);
+
+ if (host->system_suspending)
+ return 0;
+
+ if (host->msh->card || host->power_mode != MEMSTICK_POWER_OFF)
+ return -EAGAIN;
+
+ return 0;
+}
+
+static int rtsx_usb_ms_runtime_resume(struct device *dev)
+{
+ struct rtsx_usb_ms *host = dev_get_drvdata(dev);
+
+
+ if (host->system_suspending)
+ return 0;
+
+ memstick_detect_change(host->msh);
+
+ return 0;
+}
+#endif /* CONFIG_PM */
+
+static const struct dev_pm_ops rtsx_usb_ms_pm_ops = {
+ SET_SYSTEM_SLEEP_PM_OPS(rtsx_usb_ms_suspend, rtsx_usb_ms_resume)
+ SET_RUNTIME_PM_OPS(rtsx_usb_ms_runtime_suspend, rtsx_usb_ms_runtime_resume, NULL)
+};
+
+
+static void rtsx_usb_ms_poll_card(struct work_struct *work)
+{
+ struct rtsx_usb_ms *host = container_of(work, struct rtsx_usb_ms,
+ poll_card.work);
struct rtsx_ucr *ucr = host->ucr;
- u8 val = 0;
int err;
+ u8 val;
- for (;;) {
- pm_runtime_get_sync(ms_dev(host));
- mutex_lock(&ucr->dev_mutex);
-
- /* Check pending MS card changes */
- err = rtsx_usb_read_register(ucr, CARD_INT_PEND, &val);
- if (err) {
- mutex_unlock(&ucr->dev_mutex);
- goto poll_again;
- }
+ if (host->eject || host->power_mode != MEMSTICK_POWER_ON)
+ return;
- /* Clear the pending */
- rtsx_usb_write_register(ucr, CARD_INT_PEND,
- XD_INT | MS_INT | SD_INT,
- XD_INT | MS_INT | SD_INT);
+ pm_runtime_get_sync(ms_dev(host));
+ mutex_lock(&ucr->dev_mutex);
+ /* Check pending MS card changes */
+ err = rtsx_usb_read_register(ucr, CARD_INT_PEND, &val);
+ if (err) {
mutex_unlock(&ucr->dev_mutex);
+ goto poll_again;
+ }
- if (val & MS_INT) {
- dev_dbg(ms_dev(host), "MS slot change detected\n");
- memstick_detect_change(host->msh);
- }
+ /* Clear the pending */
+ rtsx_usb_write_register(ucr, CARD_INT_PEND,
+ XD_INT | MS_INT | SD_INT,
+ XD_INT | MS_INT | SD_INT);
-poll_again:
- pm_runtime_put(ms_dev(host));
- if (host->eject)
- break;
+ mutex_unlock(&ucr->dev_mutex);
- schedule_timeout_idle(HZ);
+ if (val & MS_INT) {
+ dev_dbg(ms_dev(host), "MS slot change detected\n");
+ memstick_detect_change(host->msh);
}
- complete(&host->detect_ms_exit);
- return 0;
+poll_again:
+ pm_runtime_put_sync(ms_dev(host));
+
+ if (!host->eject && host->power_mode == MEMSTICK_POWER_ON)
+ schedule_delayed_work(&host->poll_card, 100);
}
static int rtsx_usb_ms_drv_probe(struct platform_device *pdev)
@@ -747,45 +792,42 @@ static int rtsx_usb_ms_drv_probe(struct platform_device *pdev)
mutex_init(&host->host_mutex);
INIT_WORK(&host->handle_req, rtsx_usb_ms_handle_req);
- init_completion(&host->detect_ms_exit);
- host->detect_ms = kthread_create(rtsx_usb_detect_ms_card, host,
- "rtsx_usb_ms_%d", pdev->id);
- if (IS_ERR(host->detect_ms)) {
- dev_dbg(&(pdev->dev),
- "Unable to create polling thread.\n");
- err = PTR_ERR(host->detect_ms);
- goto err_out;
- }
+ INIT_DELAYED_WORK(&host->poll_card, rtsx_usb_ms_poll_card);
msh->request = rtsx_usb_ms_request;
msh->set_param = rtsx_usb_ms_set_param;
msh->caps = MEMSTICK_CAP_PAR4;
- pm_runtime_enable(&pdev->dev);
+ pm_runtime_get_noresume(ms_dev(host));
+ pm_runtime_set_active(ms_dev(host));
+ pm_runtime_enable(ms_dev(host));
+
err = memstick_add_host(msh);
if (err)
goto err_out;
- wake_up_process(host->detect_ms);
+ pm_runtime_put(ms_dev(host));
+
return 0;
err_out:
memstick_free_host(msh);
+ pm_runtime_disable(ms_dev(host));
+ pm_runtime_put_noidle(ms_dev(host));
return err;
}
static int rtsx_usb_ms_drv_remove(struct platform_device *pdev)
{
struct rtsx_usb_ms *host = platform_get_drvdata(pdev);
- struct memstick_host *msh;
+ struct memstick_host *msh = host->msh;
int err;
- msh = host->msh;
host->eject = true;
cancel_work_sync(&host->handle_req);
mutex_lock(&host->host_mutex);
if (host->req) {
- dev_dbg(&(pdev->dev),
+ dev_dbg(ms_dev(host),
"%s: Controller removed during transfer\n",
dev_name(&msh->dev));
host->req->error = -ENOMEDIUM;
@@ -797,7 +839,6 @@ static int rtsx_usb_ms_drv_remove(struct platform_device *pdev)
}
mutex_unlock(&host->host_mutex);
- wait_for_completion(&host->detect_ms_exit);
memstick_remove_host(msh);
memstick_free_host(msh);
@@ -807,18 +848,15 @@ static int rtsx_usb_ms_drv_remove(struct platform_device *pdev)
if (pm_runtime_active(ms_dev(host)))
pm_runtime_put(ms_dev(host));
- pm_runtime_disable(&pdev->dev);
+ pm_runtime_disable(ms_dev(host));
platform_set_drvdata(pdev, NULL);
- dev_dbg(&(pdev->dev),
+ dev_dbg(ms_dev(host),
": Realtek USB Memstick controller has been removed\n");
return 0;
}
-static SIMPLE_DEV_PM_OPS(rtsx_usb_ms_pm_ops,
- rtsx_usb_ms_suspend, rtsx_usb_ms_resume);
-
static struct platform_device_id rtsx_usb_ms_ids[] = {
{
.name = "rtsx_usb_ms",
diff --git a/drivers/message/fusion/mptfc.c b/drivers/message/fusion/mptfc.c
index b15fdc626fb8..4314a3352b96 100644
--- a/drivers/message/fusion/mptfc.c
+++ b/drivers/message/fusion/mptfc.c
@@ -129,7 +129,6 @@ static struct scsi_host_template mptfc_driver_template = {
.sg_tablesize = MPT_SCSI_SG_DEPTH,
.max_sectors = 8192,
.cmd_per_lun = 7,
- .use_clustering = ENABLE_CLUSTERING,
.shost_attrs = mptscsih_host_attrs,
};
diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c
index 9b404fc69c90..612cb5bc1333 100644
--- a/drivers/message/fusion/mptsas.c
+++ b/drivers/message/fusion/mptsas.c
@@ -1992,7 +1992,6 @@ static struct scsi_host_template mptsas_driver_template = {
.sg_tablesize = MPT_SCSI_SG_DEPTH,
.max_sectors = 8192,
.cmd_per_lun = 7,
- .use_clustering = ENABLE_CLUSTERING,
.shost_attrs = mptscsih_host_attrs,
.no_write_same = 1,
};
diff --git a/drivers/message/fusion/mptspi.c b/drivers/message/fusion/mptspi.c
index 9a336a161d9f..7172b0b16bdd 100644
--- a/drivers/message/fusion/mptspi.c
+++ b/drivers/message/fusion/mptspi.c
@@ -848,7 +848,6 @@ static struct scsi_host_template mptspi_driver_template = {
.sg_tablesize = MPT_SCSI_SG_DEPTH,
.max_sectors = 8192,
.cmd_per_lun = 7,
- .use_clustering = ENABLE_CLUSTERING,
.shost_attrs = mptscsih_host_attrs,
};
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index af22bbc3d00c..fe3134cf3008 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -57,4 +57,4 @@ obj-$(CONFIG_ASPEED_LPC_CTRL) += aspeed-lpc-ctrl.o
obj-$(CONFIG_ASPEED_LPC_SNOOP) += aspeed-lpc-snoop.o
obj-$(CONFIG_PCI_ENDPOINT_TEST) += pci_endpoint_test.o
obj-$(CONFIG_OCXL) += ocxl/
-obj-$(CONFIG_MISC_RTSX) += cardreader/
+obj-y += cardreader/
diff --git a/drivers/misc/cardreader/Kconfig b/drivers/misc/cardreader/Kconfig
index 69e815e32a8c..ed8993b5d058 100644
--- a/drivers/misc/cardreader/Kconfig
+++ b/drivers/misc/cardreader/Kconfig
@@ -1,3 +1,14 @@
+config MISC_ALCOR_PCI
+ tristate "Alcor Micro/Alcor Link PCI-E card reader"
+ depends on PCI
+ select MFD_CORE
+ help
+ This supports for Alcor Micro PCI-Express card reader including au6601,
+ au6621.
+ Alcor Micro card readers support access to many types of memory cards,
+ such as Memory Stick, Memory Stick Pro, Secure Digital and
+ MultiMediaCard.
+
config MISC_RTSX_PCI
tristate "Realtek PCI-E card reader"
depends on PCI
diff --git a/drivers/misc/cardreader/Makefile b/drivers/misc/cardreader/Makefile
index 9fabfcc6fa7a..9882d2a1025c 100644
--- a/drivers/misc/cardreader/Makefile
+++ b/drivers/misc/cardreader/Makefile
@@ -1,4 +1,4 @@
-rtsx_pci-objs := rtsx_pcr.o rts5209.o rts5229.o rtl8411.o rts5227.o rts5249.o rts5260.o
-
+obj-$(CONFIG_MISC_ALCOR_PCI) += alcor_pci.o
obj-$(CONFIG_MISC_RTSX_PCI) += rtsx_pci.o
+rtsx_pci-objs := rtsx_pcr.o rts5209.o rts5229.o rtl8411.o rts5227.o rts5249.o rts5260.o
obj-$(CONFIG_MISC_RTSX_USB) += rtsx_usb.o
diff --git a/drivers/misc/cardreader/alcor_pci.c b/drivers/misc/cardreader/alcor_pci.c
new file mode 100644
index 000000000000..bcb10fa4bc3a
--- /dev/null
+++ b/drivers/misc/cardreader/alcor_pci.c
@@ -0,0 +1,371 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2018 Oleksij Rempel <linux@rempel-privat.de>
+ *
+ * Driver for Alcor Micro AU6601 and AU6621 controllers
+ */
+
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/mfd/core.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/pm.h>
+
+#include <linux/alcor_pci.h>
+
+#define DRV_NAME_ALCOR_PCI "alcor_pci"
+
+static DEFINE_IDA(alcor_pci_idr);
+
+static struct mfd_cell alcor_pci_cells[] = {
+ [ALCOR_SD_CARD] = {
+ .name = DRV_NAME_ALCOR_PCI_SDMMC,
+ },
+ [ALCOR_MS_CARD] = {
+ .name = DRV_NAME_ALCOR_PCI_MS,
+ },
+};
+
+static const struct alcor_dev_cfg alcor_cfg = {
+ .dma = 0,
+};
+
+static const struct alcor_dev_cfg au6621_cfg = {
+ .dma = 1,
+};
+
+static const struct pci_device_id pci_ids[] = {
+ { PCI_DEVICE(PCI_ID_ALCOR_MICRO, PCI_ID_AU6601),
+ .driver_data = (kernel_ulong_t)&alcor_cfg },
+ { PCI_DEVICE(PCI_ID_ALCOR_MICRO, PCI_ID_AU6621),
+ .driver_data = (kernel_ulong_t)&au6621_cfg },
+ { },
+};
+MODULE_DEVICE_TABLE(pci, pci_ids);
+
+void alcor_write8(struct alcor_pci_priv *priv, u8 val, unsigned int addr)
+{
+ writeb(val, priv->iobase + addr);
+}
+EXPORT_SYMBOL_GPL(alcor_write8);
+
+void alcor_write16(struct alcor_pci_priv *priv, u16 val, unsigned int addr)
+{
+ writew(val, priv->iobase + addr);
+}
+EXPORT_SYMBOL_GPL(alcor_write16);
+
+void alcor_write32(struct alcor_pci_priv *priv, u32 val, unsigned int addr)
+{
+ writel(val, priv->iobase + addr);
+}
+EXPORT_SYMBOL_GPL(alcor_write32);
+
+void alcor_write32be(struct alcor_pci_priv *priv, u32 val, unsigned int addr)
+{
+ iowrite32be(val, priv->iobase + addr);
+}
+EXPORT_SYMBOL_GPL(alcor_write32be);
+
+u8 alcor_read8(struct alcor_pci_priv *priv, unsigned int addr)
+{
+ return readb(priv->iobase + addr);
+}
+EXPORT_SYMBOL_GPL(alcor_read8);
+
+u32 alcor_read32(struct alcor_pci_priv *priv, unsigned int addr)
+{
+ return readl(priv->iobase + addr);
+}
+EXPORT_SYMBOL_GPL(alcor_read32);
+
+u32 alcor_read32be(struct alcor_pci_priv *priv, unsigned int addr)
+{
+ return ioread32be(priv->iobase + addr);
+}
+EXPORT_SYMBOL_GPL(alcor_read32be);
+
+static int alcor_pci_find_cap_offset(struct alcor_pci_priv *priv,
+ struct pci_dev *pci)
+{
+ int where;
+ u8 val8;
+ u32 val32;
+
+ where = ALCOR_CAP_START_OFFSET;
+ pci_read_config_byte(pci, where, &val8);
+ if (!val8)
+ return 0;
+
+ where = (int)val8;
+ while (1) {
+ pci_read_config_dword(pci, where, &val32);
+ if (val32 == 0xffffffff) {
+ dev_dbg(priv->dev, "find_cap_offset invalid value %x.\n",
+ val32);
+ return 0;
+ }
+
+ if ((val32 & 0xff) == 0x10) {
+ dev_dbg(priv->dev, "pcie cap offset: %x\n", where);
+ return where;
+ }
+
+ if ((val32 & 0xff00) == 0x00) {
+ dev_dbg(priv->dev, "pci_find_cap_offset invalid value %x.\n",
+ val32);
+ break;
+ }
+ where = (int)((val32 >> 8) & 0xff);
+ }
+
+ return 0;
+}
+
+static void alcor_pci_init_check_aspm(struct alcor_pci_priv *priv)
+{
+ struct pci_dev *pci;
+ int where;
+ u32 val32;
+
+ priv->pdev_cap_off = alcor_pci_find_cap_offset(priv, priv->pdev);
+ priv->parent_cap_off = alcor_pci_find_cap_offset(priv,
+ priv->parent_pdev);
+
+ if ((priv->pdev_cap_off == 0) || (priv->parent_cap_off == 0)) {
+ dev_dbg(priv->dev, "pci_cap_off: %x, parent_cap_off: %x\n",
+ priv->pdev_cap_off, priv->parent_cap_off);
+ return;
+ }
+
+ /* link capability */
+ pci = priv->pdev;
+ where = priv->pdev_cap_off + ALCOR_PCIE_LINK_CAP_OFFSET;
+ pci_read_config_dword(pci, where, &val32);
+ priv->pdev_aspm_cap = (u8)(val32 >> 10) & 0x03;
+
+ pci = priv->parent_pdev;
+ where = priv->parent_cap_off + ALCOR_PCIE_LINK_CAP_OFFSET;
+ pci_read_config_dword(pci, where, &val32);
+ priv->parent_aspm_cap = (u8)(val32 >> 10) & 0x03;
+
+ if (priv->pdev_aspm_cap != priv->parent_aspm_cap) {
+ u8 aspm_cap;
+
+ dev_dbg(priv->dev, "pdev_aspm_cap: %x, parent_aspm_cap: %x\n",
+ priv->pdev_aspm_cap, priv->parent_aspm_cap);
+ aspm_cap = priv->pdev_aspm_cap & priv->parent_aspm_cap;
+ priv->pdev_aspm_cap = aspm_cap;
+ priv->parent_aspm_cap = aspm_cap;
+ }
+
+ dev_dbg(priv->dev, "ext_config_dev_aspm: %x, pdev_aspm_cap: %x\n",
+ priv->ext_config_dev_aspm, priv->pdev_aspm_cap);
+ priv->ext_config_dev_aspm &= priv->pdev_aspm_cap;
+}
+
+static void alcor_pci_aspm_ctrl(struct alcor_pci_priv *priv, u8 aspm_enable)
+{
+ struct pci_dev *pci;
+ u8 aspm_ctrl, i;
+ int where;
+ u32 val32;
+
+ if ((!priv->pdev_cap_off) || (!priv->parent_cap_off)) {
+ dev_dbg(priv->dev, "pci_cap_off: %x, parent_cap_off: %x\n",
+ priv->pdev_cap_off, priv->parent_cap_off);
+ return;
+ }
+
+ if (!priv->pdev_aspm_cap)
+ return;
+
+ aspm_ctrl = 0;
+ if (aspm_enable) {
+ aspm_ctrl = priv->ext_config_dev_aspm;
+
+ if (!aspm_ctrl) {
+ dev_dbg(priv->dev, "aspm_ctrl == 0\n");
+ return;
+ }
+ }
+
+ for (i = 0; i < 2; i++) {
+
+ if (i) {
+ pci = priv->parent_pdev;
+ where = priv->parent_cap_off
+ + ALCOR_PCIE_LINK_CTRL_OFFSET;
+ } else {
+ pci = priv->pdev;
+ where = priv->pdev_cap_off
+ + ALCOR_PCIE_LINK_CTRL_OFFSET;
+ }
+
+ pci_read_config_dword(pci, where, &val32);
+ val32 &= (~0x03);
+ val32 |= (aspm_ctrl & priv->pdev_aspm_cap);
+ pci_write_config_byte(pci, where, (u8)val32);
+ }
+
+}
+
+static inline void alcor_mask_sd_irqs(struct alcor_pci_priv *priv)
+{
+ alcor_write32(priv, 0, AU6601_REG_INT_ENABLE);
+}
+
+static inline void alcor_unmask_sd_irqs(struct alcor_pci_priv *priv)
+{
+ alcor_write32(priv, AU6601_INT_CMD_MASK | AU6601_INT_DATA_MASK |
+ AU6601_INT_CARD_INSERT | AU6601_INT_CARD_REMOVE |
+ AU6601_INT_OVER_CURRENT_ERR,
+ AU6601_REG_INT_ENABLE);
+}
+
+static inline void alcor_mask_ms_irqs(struct alcor_pci_priv *priv)
+{
+ alcor_write32(priv, 0, AU6601_MS_INT_ENABLE);
+}
+
+static inline void alcor_unmask_ms_irqs(struct alcor_pci_priv *priv)
+{
+ alcor_write32(priv, 0x3d00fa, AU6601_MS_INT_ENABLE);
+}
+
+static int alcor_pci_probe(struct pci_dev *pdev,
+ const struct pci_device_id *ent)
+{
+ struct alcor_dev_cfg *cfg;
+ struct alcor_pci_priv *priv;
+ int ret, i, bar = 0;
+
+ cfg = (void *)ent->driver_data;
+
+ ret = pcim_enable_device(pdev);
+ if (ret)
+ return ret;
+
+ priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ ret = ida_simple_get(&alcor_pci_idr, 0, 0, GFP_KERNEL);
+ if (ret < 0)
+ return ret;
+ priv->id = ret;
+
+ priv->pdev = pdev;
+ priv->parent_pdev = pdev->bus->self;
+ priv->dev = &pdev->dev;
+ priv->cfg = cfg;
+ priv->irq = pdev->irq;
+
+ ret = pci_request_regions(pdev, DRV_NAME_ALCOR_PCI);
+ if (ret) {
+ dev_err(&pdev->dev, "Cannot request region\n");
+ return -ENOMEM;
+ }
+
+ if (!(pci_resource_flags(pdev, bar) & IORESOURCE_MEM)) {
+ dev_err(&pdev->dev, "BAR %d is not iomem. Aborting.\n", bar);
+ ret = -ENODEV;
+ goto error_release_regions;
+ }
+
+ priv->iobase = pcim_iomap(pdev, bar, 0);
+ if (!priv->iobase) {
+ ret = -ENOMEM;
+ goto error_release_regions;
+ }
+
+ /* make sure irqs are disabled */
+ alcor_write32(priv, 0, AU6601_REG_INT_ENABLE);
+ alcor_write32(priv, 0, AU6601_MS_INT_ENABLE);
+
+ ret = dma_set_mask_and_coherent(priv->dev, AU6601_SDMA_MASK);
+ if (ret) {
+ dev_err(priv->dev, "Failed to set DMA mask\n");
+ goto error_release_regions;
+ }
+
+ pci_set_master(pdev);
+ pci_set_drvdata(pdev, priv);
+ alcor_pci_init_check_aspm(priv);
+
+ for (i = 0; i < ARRAY_SIZE(alcor_pci_cells); i++) {
+ alcor_pci_cells[i].platform_data = priv;
+ alcor_pci_cells[i].pdata_size = sizeof(*priv);
+ }
+ ret = mfd_add_devices(&pdev->dev, priv->id, alcor_pci_cells,
+ ARRAY_SIZE(alcor_pci_cells), NULL, 0, NULL);
+ if (ret < 0)
+ goto error_release_regions;
+
+ alcor_pci_aspm_ctrl(priv, 0);
+
+ return 0;
+
+error_release_regions:
+ pci_release_regions(pdev);
+ return ret;
+}
+
+static void alcor_pci_remove(struct pci_dev *pdev)
+{
+ struct alcor_pci_priv *priv;
+
+ priv = pci_get_drvdata(pdev);
+
+ alcor_pci_aspm_ctrl(priv, 1);
+
+ mfd_remove_devices(&pdev->dev);
+
+ ida_simple_remove(&alcor_pci_idr, priv->id);
+
+ pci_release_regions(pdev);
+ pci_set_drvdata(pdev, NULL);
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int alcor_suspend(struct device *dev)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct alcor_pci_priv *priv = pci_get_drvdata(pdev);
+
+ alcor_pci_aspm_ctrl(priv, 1);
+ return 0;
+}
+
+static int alcor_resume(struct device *dev)
+{
+
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct alcor_pci_priv *priv = pci_get_drvdata(pdev);
+
+ alcor_pci_aspm_ctrl(priv, 0);
+ return 0;
+}
+#endif /* CONFIG_PM_SLEEP */
+
+static SIMPLE_DEV_PM_OPS(alcor_pci_pm_ops, alcor_suspend, alcor_resume);
+
+static struct pci_driver alcor_driver = {
+ .name = DRV_NAME_ALCOR_PCI,
+ .id_table = pci_ids,
+ .probe = alcor_pci_probe,
+ .remove = alcor_pci_remove,
+ .driver = {
+ .pm = &alcor_pci_pm_ops
+ },
+};
+
+module_pci_driver(alcor_driver);
+
+MODULE_AUTHOR("Oleksij Rempel <linux@rempel-privat.de>");
+MODULE_DESCRIPTION("PCI driver for Alcor Micro AU6601 Secure Digital Host Controller Interface");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/cardreader/rtsx_usb.c b/drivers/misc/cardreader/rtsx_usb.c
index b97903ff1a72..f7a66f614085 100644
--- a/drivers/misc/cardreader/rtsx_usb.c
+++ b/drivers/misc/cardreader/rtsx_usb.c
@@ -723,8 +723,15 @@ static int rtsx_usb_suspend(struct usb_interface *intf, pm_message_t message)
return 0;
}
+static int rtsx_usb_resume_child(struct device *dev, void *data)
+{
+ pm_request_resume(dev);
+ return 0;
+}
+
static int rtsx_usb_resume(struct usb_interface *intf)
{
+ device_for_each_child(&intf->dev, NULL, rtsx_usb_resume_child);
return 0;
}
@@ -734,6 +741,7 @@ static int rtsx_usb_reset_resume(struct usb_interface *intf)
(struct rtsx_ucr *)usb_get_intfdata(intf);
rtsx_usb_reset_chip(ucr);
+ device_for_each_child(&intf->dev, NULL, rtsx_usb_resume_child);
return 0;
}
diff --git a/drivers/misc/mic/host/mic_boot.c b/drivers/misc/mic/host/mic_boot.c
index c327985c9523..6479435ac96b 100644
--- a/drivers/misc/mic/host/mic_boot.c
+++ b/drivers/misc/mic/host/mic_boot.c
@@ -149,7 +149,7 @@ static void *__mic_dma_alloc(struct device *dev, size_t size,
struct scif_hw_dev *scdev = dev_get_drvdata(dev);
struct mic_device *mdev = scdev_to_mdev(scdev);
dma_addr_t tmp;
- void *va = kmalloc(size, gfp);
+ void *va = kmalloc(size, gfp | __GFP_ZERO);
if (va) {
tmp = mic_map_single(mdev, va, size);
diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index 111934838da2..aef1185f383d 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -100,7 +100,6 @@ static DEFINE_IDA(mmc_rpmb_ida);
* There is one mmc_blk_data per slot.
*/
struct mmc_blk_data {
- spinlock_t lock;
struct device *parent;
struct gendisk *disk;
struct mmc_queue queue;
@@ -1488,7 +1487,7 @@ static void mmc_blk_cqe_complete_rq(struct mmc_queue *mq, struct request *req)
blk_mq_end_request(req, BLK_STS_OK);
}
- spin_lock_irqsave(q->queue_lock, flags);
+ spin_lock_irqsave(&mq->lock, flags);
mq->in_flight[mmc_issue_type(mq, req)] -= 1;
@@ -1496,7 +1495,7 @@ static void mmc_blk_cqe_complete_rq(struct mmc_queue *mq, struct request *req)
mmc_cqe_check_busy(mq);
- spin_unlock_irqrestore(q->queue_lock, flags);
+ spin_unlock_irqrestore(&mq->lock, flags);
if (!mq->cqe_busy)
blk_mq_run_hw_queues(q, true);
@@ -1961,7 +1960,7 @@ static void mmc_blk_urgent_bkops(struct mmc_queue *mq,
struct mmc_queue_req *mqrq)
{
if (mmc_blk_urgent_bkops_needed(mq, mqrq))
- mmc_start_bkops(mq->card, true);
+ mmc_run_bkops(mq->card);
}
void mmc_blk_mq_complete(struct request *req)
@@ -1993,17 +1992,16 @@ static void mmc_blk_mq_poll_completion(struct mmc_queue *mq,
static void mmc_blk_mq_dec_in_flight(struct mmc_queue *mq, struct request *req)
{
- struct request_queue *q = req->q;
unsigned long flags;
bool put_card;
- spin_lock_irqsave(q->queue_lock, flags);
+ spin_lock_irqsave(&mq->lock, flags);
mq->in_flight[mmc_issue_type(mq, req)] -= 1;
put_card = (mmc_tot_in_flight(mq) == 0);
- spin_unlock_irqrestore(q->queue_lock, flags);
+ spin_unlock_irqrestore(&mq->lock, flags);
if (put_card)
mmc_put_card(mq->card, &mq->ctx);
@@ -2099,11 +2097,11 @@ static void mmc_blk_mq_req_done(struct mmc_request *mrq)
* request does not need to wait (although it does need to
* complete complete_req first).
*/
- spin_lock_irqsave(q->queue_lock, flags);
+ spin_lock_irqsave(&mq->lock, flags);
mq->complete_req = req;
mq->rw_wait = false;
waiting = mq->waiting;
- spin_unlock_irqrestore(q->queue_lock, flags);
+ spin_unlock_irqrestore(&mq->lock, flags);
/*
* If 'waiting' then the waiting task will complete this
@@ -2122,10 +2120,10 @@ static void mmc_blk_mq_req_done(struct mmc_request *mrq)
/* Take the recovery path for errors or urgent background operations */
if (mmc_blk_rq_error(&mqrq->brq) ||
mmc_blk_urgent_bkops_needed(mq, mqrq)) {
- spin_lock_irqsave(q->queue_lock, flags);
+ spin_lock_irqsave(&mq->lock, flags);
mq->recovery_needed = true;
mq->recovery_req = req;
- spin_unlock_irqrestore(q->queue_lock, flags);
+ spin_unlock_irqrestore(&mq->lock, flags);
wake_up(&mq->wait);
schedule_work(&mq->recovery_work);
return;
@@ -2141,7 +2139,6 @@ static void mmc_blk_mq_req_done(struct mmc_request *mrq)
static bool mmc_blk_rw_wait_cond(struct mmc_queue *mq, int *err)
{
- struct request_queue *q = mq->queue;
unsigned long flags;
bool done;
@@ -2149,7 +2146,7 @@ static bool mmc_blk_rw_wait_cond(struct mmc_queue *mq, int *err)
* Wait while there is another request in progress, but not if recovery
* is needed. Also indicate whether there is a request waiting to start.
*/
- spin_lock_irqsave(q->queue_lock, flags);
+ spin_lock_irqsave(&mq->lock, flags);
if (mq->recovery_needed) {
*err = -EBUSY;
done = true;
@@ -2157,7 +2154,7 @@ static bool mmc_blk_rw_wait_cond(struct mmc_queue *mq, int *err)
done = !mq->rw_wait;
}
mq->waiting = !done;
- spin_unlock_irqrestore(q->queue_lock, flags);
+ spin_unlock_irqrestore(&mq->lock, flags);
return done;
}
@@ -2334,12 +2331,11 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card,
goto err_kfree;
}
- spin_lock_init(&md->lock);
INIT_LIST_HEAD(&md->part);
INIT_LIST_HEAD(&md->rpmbs);
md->usage = 1;
- ret = mmc_init_queue(&md->queue, card, &md->lock, subname);
+ ret = mmc_init_queue(&md->queue, card);
if (ret)
goto err_putdisk;
diff --git a/drivers/mmc/core/card.h b/drivers/mmc/core/card.h
index 1170feb8f969..eef301452406 100644
--- a/drivers/mmc/core/card.h
+++ b/drivers/mmc/core/card.h
@@ -23,15 +23,13 @@
#define MMC_STATE_BLOCKADDR (1<<2) /* card uses block-addressing */
#define MMC_CARD_SDXC (1<<3) /* card is SDXC */
#define MMC_CARD_REMOVED (1<<4) /* card has been removed */
-#define MMC_STATE_DOING_BKOPS (1<<5) /* card is doing BKOPS */
-#define MMC_STATE_SUSPENDED (1<<6) /* card is suspended */
+#define MMC_STATE_SUSPENDED (1<<5) /* card is suspended */
#define mmc_card_present(c) ((c)->state & MMC_STATE_PRESENT)
#define mmc_card_readonly(c) ((c)->state & MMC_STATE_READONLY)
#define mmc_card_blockaddr(c) ((c)->state & MMC_STATE_BLOCKADDR)
#define mmc_card_ext_capacity(c) ((c)->state & MMC_CARD_SDXC)
#define mmc_card_removed(c) ((c) && ((c)->state & MMC_CARD_REMOVED))
-#define mmc_card_doing_bkops(c) ((c)->state & MMC_STATE_DOING_BKOPS)
#define mmc_card_suspended(c) ((c)->state & MMC_STATE_SUSPENDED)
#define mmc_card_set_present(c) ((c)->state |= MMC_STATE_PRESENT)
@@ -39,8 +37,6 @@
#define mmc_card_set_blockaddr(c) ((c)->state |= MMC_STATE_BLOCKADDR)
#define mmc_card_set_ext_capacity(c) ((c)->state |= MMC_CARD_SDXC)
#define mmc_card_set_removed(c) ((c)->state |= MMC_CARD_REMOVED)
-#define mmc_card_set_doing_bkops(c) ((c)->state |= MMC_STATE_DOING_BKOPS)
-#define mmc_card_clr_doing_bkops(c) ((c)->state &= ~MMC_STATE_DOING_BKOPS)
#define mmc_card_set_suspended(c) ((c)->state |= MMC_STATE_SUSPENDED)
#define mmc_card_clr_suspended(c) ((c)->state &= ~MMC_STATE_SUSPENDED)
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 50a5c340307b..5bd58b95d318 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -887,7 +887,10 @@ void mmc_release_host(struct mmc_host *host)
spin_unlock_irqrestore(&host->lock, flags);
wake_up(&host->wq);
pm_runtime_mark_last_busy(mmc_dev(host));
- pm_runtime_put_autosuspend(mmc_dev(host));
+ if (host->caps & MMC_CAP_SYNC_RUNTIME_PM)
+ pm_runtime_put_sync_suspend(mmc_dev(host));
+ else
+ pm_runtime_put_autosuspend(mmc_dev(host));
}
}
EXPORT_SYMBOL(mmc_release_host);
@@ -2413,20 +2416,6 @@ int mmc_set_blocklen(struct mmc_card *card, unsigned int blocklen)
}
EXPORT_SYMBOL(mmc_set_blocklen);
-int mmc_set_blockcount(struct mmc_card *card, unsigned int blockcount,
- bool is_rel_write)
-{
- struct mmc_command cmd = {};
-
- cmd.opcode = MMC_SET_BLOCK_COUNT;
- cmd.arg = blockcount & 0x0000FFFF;
- if (is_rel_write)
- cmd.arg |= 1 << 31;
- cmd.flags = MMC_RSP_SPI_R1 | MMC_RSP_R1 | MMC_CMD_AC;
- return mmc_wait_for_cmd(card->host, &cmd, 5);
-}
-EXPORT_SYMBOL(mmc_set_blockcount);
-
static void mmc_hw_reset_for_init(struct mmc_host *host)
{
mmc_pwrseq_reset(host);
diff --git a/drivers/mmc/core/core.h b/drivers/mmc/core/core.h
index 087ba68b2920..8fb6bc37f808 100644
--- a/drivers/mmc/core/core.h
+++ b/drivers/mmc/core/core.h
@@ -118,8 +118,6 @@ int mmc_erase_group_aligned(struct mmc_card *card, unsigned int from,
unsigned int mmc_calc_max_discard(struct mmc_card *card);
int mmc_set_blocklen(struct mmc_card *card, unsigned int blocklen);
-int mmc_set_blockcount(struct mmc_card *card, unsigned int blockcount,
- bool is_rel_write);
int __mmc_claim_host(struct mmc_host *host, struct mmc_ctx *ctx,
atomic_t *abort);
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 55997cf84b39..da892a599524 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -1181,6 +1181,9 @@ static int mmc_select_hs400(struct mmc_card *card)
if (err)
goto out_err;
+ if (host->ops->hs400_prepare_ddr)
+ host->ops->hs400_prepare_ddr(host);
+
/* Switch card to DDR */
err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
EXT_CSD_BUS_WIDTH,
@@ -2011,12 +2014,6 @@ static int _mmc_suspend(struct mmc_host *host, bool is_suspend)
if (mmc_card_suspended(host->card))
goto out;
- if (mmc_card_doing_bkops(host->card)) {
- err = mmc_stop_bkops(host->card);
- if (err)
- goto out;
- }
-
err = mmc_flush_cache(host->card);
if (err)
goto out;
diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c
index 873b2aa0c155..9054329fe903 100644
--- a/drivers/mmc/core/mmc_ops.c
+++ b/drivers/mmc/core/mmc_ops.c
@@ -802,12 +802,6 @@ static int mmc_send_hpi_cmd(struct mmc_card *card, u32 *status)
unsigned int opcode;
int err;
- if (!card->ext_csd.hpi) {
- pr_warn("%s: Card didn't support HPI command\n",
- mmc_hostname(card->host));
- return -EINVAL;
- }
-
opcode = card->ext_csd.hpi_cmd;
if (opcode == MMC_STOP_TRANSMISSION)
cmd.flags = MMC_RSP_R1B | MMC_CMD_AC;
@@ -897,34 +891,6 @@ int mmc_can_ext_csd(struct mmc_card *card)
return (card && card->csd.mmca_vsn > CSD_SPEC_VER_3);
}
-/**
- * mmc_stop_bkops - stop ongoing BKOPS
- * @card: MMC card to check BKOPS
- *
- * Send HPI command to stop ongoing background operations to
- * allow rapid servicing of foreground operations, e.g. read/
- * writes. Wait until the card comes out of the programming state
- * to avoid errors in servicing read/write requests.
- */
-int mmc_stop_bkops(struct mmc_card *card)
-{
- int err = 0;
-
- err = mmc_interrupt_hpi(card);
-
- /*
- * If err is EINVAL, we can't issue an HPI.
- * It should complete the BKOPS.
- */
- if (!err || (err == -EINVAL)) {
- mmc_card_clr_doing_bkops(card);
- mmc_retune_release(card->host);
- err = 0;
- }
-
- return err;
-}
-
static int mmc_read_bkops_status(struct mmc_card *card)
{
int err;
@@ -941,22 +907,17 @@ static int mmc_read_bkops_status(struct mmc_card *card)
}
/**
- * mmc_start_bkops - start BKOPS for supported cards
- * @card: MMC card to start BKOPS
- * @from_exception: A flag to indicate if this function was
- * called due to an exception raised by the card
+ * mmc_run_bkops - Run BKOPS for supported cards
+ * @card: MMC card to run BKOPS for
*
- * Start background operations whenever requested.
- * When the urgent BKOPS bit is set in a R1 command response
- * then background operations should be started immediately.
+ * Run background operations synchronously for cards having manual BKOPS
+ * enabled and in case it reports urgent BKOPS level.
*/
-void mmc_start_bkops(struct mmc_card *card, bool from_exception)
+void mmc_run_bkops(struct mmc_card *card)
{
int err;
- int timeout;
- bool use_busy_signal;
- if (!card->ext_csd.man_bkops_en || mmc_card_doing_bkops(card))
+ if (!card->ext_csd.man_bkops_en)
return;
err = mmc_read_bkops_status(card);
@@ -966,44 +927,26 @@ void mmc_start_bkops(struct mmc_card *card, bool from_exception)
return;
}
- if (!card->ext_csd.raw_bkops_status)
- return;
-
- if (card->ext_csd.raw_bkops_status < EXT_CSD_BKOPS_LEVEL_2 &&
- from_exception)
+ if (!card->ext_csd.raw_bkops_status ||
+ card->ext_csd.raw_bkops_status < EXT_CSD_BKOPS_LEVEL_2)
return;
- if (card->ext_csd.raw_bkops_status >= EXT_CSD_BKOPS_LEVEL_2) {
- timeout = MMC_OPS_TIMEOUT_MS;
- use_busy_signal = true;
- } else {
- timeout = 0;
- use_busy_signal = false;
- }
-
mmc_retune_hold(card->host);
- err = __mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
- EXT_CSD_BKOPS_START, 1, timeout, 0,
- use_busy_signal, true, false);
- if (err) {
+ /*
+ * For urgent BKOPS status, LEVEL_2 and higher, let's execute
+ * synchronously. Future wise, we may consider to start BKOPS, for less
+ * urgent levels by using an asynchronous background task, when idle.
+ */
+ err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
+ EXT_CSD_BKOPS_START, 1, MMC_OPS_TIMEOUT_MS);
+ if (err)
pr_warn("%s: Error %d starting bkops\n",
mmc_hostname(card->host), err);
- mmc_retune_release(card->host);
- return;
- }
- /*
- * For urgent bkops status (LEVEL_2 and more)
- * bkops executed synchronously, otherwise
- * the operation is in progress
- */
- if (!use_busy_signal)
- mmc_card_set_doing_bkops(card);
- else
- mmc_retune_release(card->host);
+ mmc_retune_release(card->host);
}
-EXPORT_SYMBOL(mmc_start_bkops);
+EXPORT_SYMBOL(mmc_run_bkops);
/*
* Flush the cache to the non-volatile storage.
diff --git a/drivers/mmc/core/mmc_ops.h b/drivers/mmc/core/mmc_ops.h
index a1390d486381..018a5e3f66d6 100644
--- a/drivers/mmc/core/mmc_ops.h
+++ b/drivers/mmc/core/mmc_ops.h
@@ -40,8 +40,7 @@ int __mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value,
bool use_busy_signal, bool send_status, bool retry_crc_err);
int mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value,
unsigned int timeout_ms);
-int mmc_stop_bkops(struct mmc_card *card);
-void mmc_start_bkops(struct mmc_card *card, bool from_exception);
+void mmc_run_bkops(struct mmc_card *card);
int mmc_flush_cache(struct mmc_card *card);
int mmc_cmdq_enable(struct mmc_card *card);
int mmc_cmdq_disable(struct mmc_card *card);
diff --git a/drivers/mmc/core/mmc_test.c b/drivers/mmc/core/mmc_test.c
index ef18daeaa4cc..eabb1cab1765 100644
--- a/drivers/mmc/core/mmc_test.c
+++ b/drivers/mmc/core/mmc_test.c
@@ -3145,17 +3145,7 @@ static int mtf_testlist_show(struct seq_file *sf, void *data)
return 0;
}
-static int mtf_testlist_open(struct inode *inode, struct file *file)
-{
- return single_open(file, mtf_testlist_show, inode->i_private);
-}
-
-static const struct file_operations mmc_test_fops_testlist = {
- .open = mtf_testlist_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(mtf_testlist);
static void mmc_test_free_dbgfs_file(struct mmc_card *card)
{
@@ -3216,7 +3206,7 @@ static int mmc_test_register_dbgfs_file(struct mmc_card *card)
goto err;
ret = __mmc_test_register_dbgfs_file(card, "testlist", S_IRUGO,
- &mmc_test_fops_testlist);
+ &mtf_testlist_fops);
if (ret)
goto err;
diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
index 6edffeed9953..35cc138b096d 100644
--- a/drivers/mmc/core/queue.c
+++ b/drivers/mmc/core/queue.c
@@ -89,9 +89,9 @@ void mmc_cqe_recovery_notifier(struct mmc_request *mrq)
struct mmc_queue *mq = q->queuedata;
unsigned long flags;
- spin_lock_irqsave(q->queue_lock, flags);
+ spin_lock_irqsave(&mq->lock, flags);
__mmc_cqe_recovery_notifier(mq);
- spin_unlock_irqrestore(q->queue_lock, flags);
+ spin_unlock_irqrestore(&mq->lock, flags);
}
static enum blk_eh_timer_return mmc_cqe_timed_out(struct request *req)
@@ -128,14 +128,14 @@ static enum blk_eh_timer_return mmc_mq_timed_out(struct request *req,
unsigned long flags;
int ret;
- spin_lock_irqsave(q->queue_lock, flags);
+ spin_lock_irqsave(&mq->lock, flags);
if (mq->recovery_needed || !mq->use_cqe)
ret = BLK_EH_RESET_TIMER;
else
ret = mmc_cqe_timed_out(req);
- spin_unlock_irqrestore(q->queue_lock, flags);
+ spin_unlock_irqrestore(&mq->lock, flags);
return ret;
}
@@ -157,9 +157,9 @@ static void mmc_mq_recovery_handler(struct work_struct *work)
mq->in_recovery = false;
- spin_lock_irq(q->queue_lock);
+ spin_lock_irq(&mq->lock);
mq->recovery_needed = false;
- spin_unlock_irq(q->queue_lock);
+ spin_unlock_irq(&mq->lock);
mmc_put_card(mq->card, &mq->ctx);
@@ -258,10 +258,10 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
issue_type = mmc_issue_type(mq, req);
- spin_lock_irq(q->queue_lock);
+ spin_lock_irq(&mq->lock);
if (mq->recovery_needed || mq->busy) {
- spin_unlock_irq(q->queue_lock);
+ spin_unlock_irq(&mq->lock);
return BLK_STS_RESOURCE;
}
@@ -269,7 +269,7 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
case MMC_ISSUE_DCMD:
if (mmc_cqe_dcmd_busy(mq)) {
mq->cqe_busy |= MMC_CQE_DCMD_BUSY;
- spin_unlock_irq(q->queue_lock);
+ spin_unlock_irq(&mq->lock);
return BLK_STS_RESOURCE;
}
break;
@@ -294,7 +294,7 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
get_card = (mmc_tot_in_flight(mq) == 1);
cqe_retune_ok = (mmc_cqe_qcnt(mq) == 1);
- spin_unlock_irq(q->queue_lock);
+ spin_unlock_irq(&mq->lock);
if (!(req->rq_flags & RQF_DONTPREP)) {
req_to_mmc_queue_req(req)->retries = 0;
@@ -328,12 +328,12 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
if (issued != MMC_REQ_STARTED) {
bool put_card = false;
- spin_lock_irq(q->queue_lock);
+ spin_lock_irq(&mq->lock);
mq->in_flight[issue_type] -= 1;
if (mmc_tot_in_flight(mq) == 0)
put_card = true;
mq->busy = false;
- spin_unlock_irq(q->queue_lock);
+ spin_unlock_irq(&mq->lock);
if (put_card)
mmc_put_card(card, &mq->ctx);
} else {
@@ -378,14 +378,37 @@ static void mmc_setup_queue(struct mmc_queue *mq, struct mmc_card *card)
init_waitqueue_head(&mq->wait);
}
-static int mmc_mq_init_queue(struct mmc_queue *mq, int q_depth,
- const struct blk_mq_ops *mq_ops, spinlock_t *lock)
+/* Set queue depth to get a reasonable value for q->nr_requests */
+#define MMC_QUEUE_DEPTH 64
+
+/**
+ * mmc_init_queue - initialise a queue structure.
+ * @mq: mmc queue
+ * @card: mmc card to attach this queue
+ *
+ * Initialise a MMC card request queue.
+ */
+int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card)
{
+ struct mmc_host *host = card->host;
int ret;
+ mq->card = card;
+ mq->use_cqe = host->cqe_enabled;
+
+ spin_lock_init(&mq->lock);
+
memset(&mq->tag_set, 0, sizeof(mq->tag_set));
- mq->tag_set.ops = mq_ops;
- mq->tag_set.queue_depth = q_depth;
+ mq->tag_set.ops = &mmc_mq_ops;
+ /*
+ * The queue depth for CQE must match the hardware because the request
+ * tag is used to index the hardware queue.
+ */
+ if (mq->use_cqe)
+ mq->tag_set.queue_depth =
+ min_t(int, card->ext_csd.cmdq_depth, host->cqe_qdepth);
+ else
+ mq->tag_set.queue_depth = MMC_QUEUE_DEPTH;
mq->tag_set.numa_node = NUMA_NO_NODE;
mq->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE |
BLK_MQ_F_BLOCKING;
@@ -403,68 +426,17 @@ static int mmc_mq_init_queue(struct mmc_queue *mq, int q_depth,
goto free_tag_set;
}
- mq->queue->queue_lock = lock;
mq->queue->queuedata = mq;
+ blk_queue_rq_timeout(mq->queue, 60 * HZ);
+ mmc_setup_queue(mq, card);
return 0;
free_tag_set:
blk_mq_free_tag_set(&mq->tag_set);
-
return ret;
}
-/* Set queue depth to get a reasonable value for q->nr_requests */
-#define MMC_QUEUE_DEPTH 64
-
-static int mmc_mq_init(struct mmc_queue *mq, struct mmc_card *card,
- spinlock_t *lock)
-{
- struct mmc_host *host = card->host;
- int q_depth;
- int ret;
-
- /*
- * The queue depth for CQE must match the hardware because the request
- * tag is used to index the hardware queue.
- */
- if (mq->use_cqe)
- q_depth = min_t(int, card->ext_csd.cmdq_depth, host->cqe_qdepth);
- else
- q_depth = MMC_QUEUE_DEPTH;
-
- ret = mmc_mq_init_queue(mq, q_depth, &mmc_mq_ops, lock);
- if (ret)
- return ret;
-
- blk_queue_rq_timeout(mq->queue, 60 * HZ);
-
- mmc_setup_queue(mq, card);
-
- return 0;
-}
-
-/**
- * mmc_init_queue - initialise a queue structure.
- * @mq: mmc queue
- * @card: mmc card to attach this queue
- * @lock: queue lock
- * @subname: partition subname
- *
- * Initialise a MMC card request queue.
- */
-int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card,
- spinlock_t *lock, const char *subname)
-{
- struct mmc_host *host = card->host;
-
- mq->card = card;
-
- mq->use_cqe = host->cqe_enabled;
-
- return mmc_mq_init(mq, card, lock);
-}
-
void mmc_queue_suspend(struct mmc_queue *mq)
{
blk_mq_quiesce_queue(mq->queue);
diff --git a/drivers/mmc/core/queue.h b/drivers/mmc/core/queue.h
index 9bf3c9245075..fd11491ced9f 100644
--- a/drivers/mmc/core/queue.h
+++ b/drivers/mmc/core/queue.h
@@ -77,6 +77,7 @@ struct mmc_queue {
struct blk_mq_tag_set tag_set;
struct mmc_blk_data *blkdata;
struct request_queue *queue;
+ spinlock_t lock;
int in_flight[MMC_ISSUE_MAX];
unsigned int cqe_busy;
#define MMC_CQE_DCMD_BUSY BIT(0)
@@ -95,8 +96,7 @@ struct mmc_queue {
struct work_struct complete_work;
};
-extern int mmc_init_queue(struct mmc_queue *, struct mmc_card *, spinlock_t *,
- const char *);
+extern int mmc_init_queue(struct mmc_queue *, struct mmc_card *);
extern void mmc_cleanup_queue(struct mmc_queue *);
extern void mmc_queue_suspend(struct mmc_queue *);
extern void mmc_queue_resume(struct mmc_queue *);
diff --git a/drivers/mmc/core/slot-gpio.c b/drivers/mmc/core/slot-gpio.c
index 86803a3a04dc..319ccd93383d 100644
--- a/drivers/mmc/core/slot-gpio.c
+++ b/drivers/mmc/core/slot-gpio.c
@@ -9,7 +9,6 @@
*/
#include <linux/err.h>
-#include <linux/gpio.h>
#include <linux/gpio/consumer.h>
#include <linux/interrupt.h>
#include <linux/jiffies.h>
@@ -27,8 +26,8 @@ struct mmc_gpio {
bool override_cd_active_level;
irqreturn_t (*cd_gpio_isr)(int irq, void *dev_id);
char *ro_label;
+ char *cd_label;
u32 cd_debounce_delay_ms;
- char cd_label[];
};
static irqreturn_t mmc_gpio_cd_irqt(int irq, void *dev_id)
@@ -45,15 +44,19 @@ static irqreturn_t mmc_gpio_cd_irqt(int irq, void *dev_id)
int mmc_gpio_alloc(struct mmc_host *host)
{
- size_t len = strlen(dev_name(host->parent)) + 4;
struct mmc_gpio *ctx = devm_kzalloc(host->parent,
- sizeof(*ctx) + 2 * len, GFP_KERNEL);
+ sizeof(*ctx), GFP_KERNEL);
if (ctx) {
- ctx->ro_label = ctx->cd_label + len;
ctx->cd_debounce_delay_ms = 200;
- snprintf(ctx->cd_label, len, "%s cd", dev_name(host->parent));
- snprintf(ctx->ro_label, len, "%s ro", dev_name(host->parent));
+ ctx->cd_label = devm_kasprintf(host->parent, GFP_KERNEL,
+ "%s cd", dev_name(host->parent));
+ if (!ctx->cd_label)
+ return -ENOMEM;
+ ctx->ro_label = devm_kasprintf(host->parent, GFP_KERNEL,
+ "%s ro", dev_name(host->parent));
+ if (!ctx->ro_label)
+ return -ENOMEM;
host->slot.handler_priv = ctx;
host->slot.cd_irq = -EINVAL;
}
@@ -98,36 +101,6 @@ int mmc_gpio_get_cd(struct mmc_host *host)
}
EXPORT_SYMBOL(mmc_gpio_get_cd);
-/**
- * mmc_gpio_request_ro - request a gpio for write-protection
- * @host: mmc host
- * @gpio: gpio number requested
- *
- * As devm_* managed functions are used in mmc_gpio_request_ro(), client
- * drivers do not need to worry about freeing up memory.
- *
- * Returns zero on success, else an error.
- */
-int mmc_gpio_request_ro(struct mmc_host *host, unsigned int gpio)
-{
- struct mmc_gpio *ctx = host->slot.handler_priv;
- int ret;
-
- if (!gpio_is_valid(gpio))
- return -EINVAL;
-
- ret = devm_gpio_request_one(host->parent, gpio, GPIOF_DIR_IN,
- ctx->ro_label);
- if (ret < 0)
- return ret;
-
- ctx->override_ro_active_level = true;
- ctx->ro_gpio = gpio_to_desc(gpio);
-
- return 0;
-}
-EXPORT_SYMBOL(mmc_gpio_request_ro);
-
void mmc_gpiod_request_cd_irq(struct mmc_host *host)
{
struct mmc_gpio *ctx = host->slot.handler_priv;
@@ -197,50 +170,6 @@ void mmc_gpio_set_cd_isr(struct mmc_host *host,
EXPORT_SYMBOL(mmc_gpio_set_cd_isr);
/**
- * mmc_gpio_request_cd - request a gpio for card-detection
- * @host: mmc host
- * @gpio: gpio number requested
- * @debounce: debounce time in microseconds
- *
- * As devm_* managed functions are used in mmc_gpio_request_cd(), client
- * drivers do not need to worry about freeing up memory.
- *
- * If GPIO debouncing is desired, set the debounce parameter to a non-zero
- * value. The caller is responsible for ensuring that the GPIO driver associated
- * with the GPIO supports debouncing, otherwise an error will be returned.
- *
- * Returns zero on success, else an error.
- */
-int mmc_gpio_request_cd(struct mmc_host *host, unsigned int gpio,
- unsigned int debounce)
-{
- struct mmc_gpio *ctx = host->slot.handler_priv;
- int ret;
-
- ret = devm_gpio_request_one(host->parent, gpio, GPIOF_DIR_IN,
- ctx->cd_label);
- if (ret < 0)
- /*
- * don't bother freeing memory. It might still get used by other
- * slot functions, in any case it will be freed, when the device
- * is destroyed.
- */
- return ret;
-
- if (debounce) {
- ret = gpio_set_debounce(gpio, debounce);
- if (ret < 0)
- return ret;
- }
-
- ctx->override_cd_active_level = true;
- ctx->cd_gpio = gpio_to_desc(gpio);
-
- return 0;
-}
-EXPORT_SYMBOL(mmc_gpio_request_cd);
-
-/**
* mmc_gpiod_request_cd - request a gpio descriptor for card-detection
* @host: mmc host
* @con_id: function within the GPIO consumer
@@ -250,8 +179,7 @@ EXPORT_SYMBOL(mmc_gpio_request_cd);
* @gpio_invert: will return whether the GPIO line is inverted or not, set
* to NULL to ignore
*
- * Use this function in place of mmc_gpio_request_cd() to use the GPIO
- * descriptor API. Note that it must be called prior to mmc_add_host()
+ * Note that this must be called prior to mmc_add_host()
* otherwise the caller must also call mmc_gpiod_request_cd_irq().
*
* Returns zero on success, else an error.
@@ -302,9 +230,6 @@ EXPORT_SYMBOL(mmc_can_gpio_cd);
* @gpio_invert: will return whether the GPIO line is inverted or not,
* set to NULL to ignore
*
- * Use this function in place of mmc_gpio_request_ro() to use the GPIO
- * descriptor API.
- *
* Returns zero on success, else an error.
*/
int mmc_gpiod_request_ro(struct mmc_host *host, const char *con_id,
diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig
index 1b58739d9744..e26b8145efb3 100644
--- a/drivers/mmc/host/Kconfig
+++ b/drivers/mmc/host/Kconfig
@@ -441,6 +441,13 @@ config MMC_WBSD
If unsure, say N.
+config MMC_ALCOR
+ tristate "Alcor Micro/Alcor Link SD/MMC controller"
+ depends on MISC_ALCOR_PCI
+ help
+ Say Y here to include driver code to support SD/MMC card interface
+ of Alcor Micro PCI-E card reader
+
config MMC_AU1X
tristate "Alchemy AU1XX0 MMC Card Interface support"
depends on MIPS_ALCHEMY
@@ -646,13 +653,14 @@ config MMC_SDHI_SYS_DMAC
config MMC_SDHI_INTERNAL_DMAC
tristate "DMA for SDHI SD/SDIO controllers using on-chip bus mastering"
- depends on ARM64 || ARCH_R8A77470 || COMPILE_TEST
+ depends on ARM64 || ARCH_R7S9210 || ARCH_R8A77470 || COMPILE_TEST
depends on MMC_SDHI
- default MMC_SDHI if (ARM64 || ARCH_R8A77470)
+ default MMC_SDHI if (ARM64 || ARCH_R7S9210 || ARCH_R8A77470)
help
This provides DMA support for SDHI SD/SDIO controllers
using on-chip bus mastering. This supports the controllers
- found in arm64 based SoCs.
+ found in arm64 based SoCs. This controller is also found in
+ some RZ family SoCs.
config MMC_UNIPHIER
tristate "UniPhier SD/eMMC Host Controller support"
@@ -969,6 +977,8 @@ config MMC_SDHCI_XENON
config MMC_SDHCI_OMAP
tristate "TI SDHCI Controller Support"
depends on MMC_SDHCI_PLTFM && OF
+ select THERMAL
+ select TI_SOC_THERMAL
help
This selects the Secure Digital Host Controller Interface (SDHCI)
support present in TI's DRA7 SOCs. The controller supports
@@ -977,3 +987,15 @@ config MMC_SDHCI_OMAP
If you have a controller with this interface, say Y or M here.
If unsure, say N.
+
+config MMC_SDHCI_AM654
+ tristate "Support for the SDHCI Controller in TI's AM654 SOCs"
+ depends on MMC_SDHCI_PLTFM && OF
+ help
+ This selects the Secure Digital Host Controller Interface (SDHCI)
+ support present in TI's AM654 SOCs. The controller supports
+ SD/MMC/SDIO devices.
+
+ If you have a controller with this interface, say Y or M here.
+
+ If unsure, say N.
diff --git a/drivers/mmc/host/Makefile b/drivers/mmc/host/Makefile
index 720d37777098..73578718f119 100644
--- a/drivers/mmc/host/Makefile
+++ b/drivers/mmc/host/Makefile
@@ -22,8 +22,10 @@ obj-$(CONFIG_MMC_SDHCI_S3C) += sdhci-s3c.o
obj-$(CONFIG_MMC_SDHCI_SIRF) += sdhci-sirf.o
obj-$(CONFIG_MMC_SDHCI_F_SDH30) += sdhci_f_sdh30.o
obj-$(CONFIG_MMC_SDHCI_SPEAR) += sdhci-spear.o
+obj-$(CONFIG_MMC_SDHCI_AM654) += sdhci_am654.o
obj-$(CONFIG_MMC_WBSD) += wbsd.o
obj-$(CONFIG_MMC_AU1X) += au1xmmc.o
+obj-$(CONFIG_MMC_ALCOR) += alcor.o
obj-$(CONFIG_MMC_MTK) += mtk-sd.o
obj-$(CONFIG_MMC_OMAP) += omap.o
obj-$(CONFIG_MMC_OMAP_HS) += omap_hsmmc.o
diff --git a/drivers/mmc/host/alcor.c b/drivers/mmc/host/alcor.c
new file mode 100644
index 000000000000..c712b7deb3a9
--- /dev/null
+++ b/drivers/mmc/host/alcor.c
@@ -0,0 +1,1162 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2018 Oleksij Rempel <linux@rempel-privat.de>
+ *
+ * Driver for Alcor Micro AU6601 and AU6621 controllers
+ */
+
+/* Note: this driver was created without any documentation. Based
+ * on sniffing, testing and in some cases mimic of original driver.
+ * As soon as some one with documentation or more experience in SD/MMC, or
+ * reverse engineering then me, please review this driver and question every
+ * thing what I did. 2018 Oleksij Rempel <linux@rempel-privat.de>
+ */
+
+#include <linux/delay.h>
+#include <linux/pci.h>
+#include <linux/module.h>
+#include <linux/io.h>
+#include <linux/pm.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+
+#include <linux/mmc/host.h>
+#include <linux/mmc/mmc.h>
+
+#include <linux/alcor_pci.h>
+
+enum alcor_cookie {
+ COOKIE_UNMAPPED,
+ COOKIE_PRE_MAPPED,
+ COOKIE_MAPPED,
+};
+
+struct alcor_pll_conf {
+ unsigned int clk_src_freq;
+ unsigned int clk_src_reg;
+ unsigned int min_div;
+ unsigned int max_div;
+};
+
+struct alcor_sdmmc_host {
+ struct device *dev;
+ struct alcor_pci_priv *alcor_pci;
+
+ struct mmc_host *mmc;
+ struct mmc_request *mrq;
+ struct mmc_command *cmd;
+ struct mmc_data *data;
+ unsigned int dma_on:1;
+ unsigned int early_data:1;
+
+ struct mutex cmd_mutex;
+
+ struct delayed_work timeout_work;
+
+ struct sg_mapping_iter sg_miter; /* SG state for PIO */
+ struct scatterlist *sg;
+ unsigned int blocks; /* remaining PIO blocks */
+ int sg_count;
+
+ u32 irq_status_sd;
+ unsigned char cur_power_mode;
+};
+
+static const struct alcor_pll_conf alcor_pll_cfg[] = {
+ /* MHZ, CLK src, max div, min div */
+ { 31250000, AU6601_CLK_31_25_MHZ, 1, 511},
+ { 48000000, AU6601_CLK_48_MHZ, 1, 511},
+ {125000000, AU6601_CLK_125_MHZ, 1, 511},
+ {384000000, AU6601_CLK_384_MHZ, 1, 511},
+};
+
+static inline void alcor_rmw8(struct alcor_sdmmc_host *host, unsigned int addr,
+ u8 clear, u8 set)
+{
+ struct alcor_pci_priv *priv = host->alcor_pci;
+ u32 var;
+
+ var = alcor_read8(priv, addr);
+ var &= ~clear;
+ var |= set;
+ alcor_write8(priv, var, addr);
+}
+
+/* As soon as irqs are masked, some status updates may be missed.
+ * Use this with care.
+ */
+static inline void alcor_mask_sd_irqs(struct alcor_sdmmc_host *host)
+{
+ struct alcor_pci_priv *priv = host->alcor_pci;
+
+ alcor_write32(priv, 0, AU6601_REG_INT_ENABLE);
+}
+
+static inline void alcor_unmask_sd_irqs(struct alcor_sdmmc_host *host)
+{
+ struct alcor_pci_priv *priv = host->alcor_pci;
+
+ alcor_write32(priv, AU6601_INT_CMD_MASK | AU6601_INT_DATA_MASK |
+ AU6601_INT_CARD_INSERT | AU6601_INT_CARD_REMOVE |
+ AU6601_INT_OVER_CURRENT_ERR,
+ AU6601_REG_INT_ENABLE);
+}
+
+static void alcor_reset(struct alcor_sdmmc_host *host, u8 val)
+{
+ struct alcor_pci_priv *priv = host->alcor_pci;
+ int i;
+
+ alcor_write8(priv, val | AU6601_BUF_CTRL_RESET,
+ AU6601_REG_SW_RESET);
+ for (i = 0; i < 100; i++) {
+ if (!(alcor_read8(priv, AU6601_REG_SW_RESET) & val))
+ return;
+ udelay(50);
+ }
+ dev_err(host->dev, "%s: timeout\n", __func__);
+}
+
+static void alcor_data_set_dma(struct alcor_sdmmc_host *host)
+{
+ struct alcor_pci_priv *priv = host->alcor_pci;
+ u32 addr;
+
+ if (!host->sg_count)
+ return;
+
+ if (!host->sg) {
+ dev_err(host->dev, "have blocks, but no SG\n");
+ return;
+ }
+
+ if (!sg_dma_len(host->sg)) {
+ dev_err(host->dev, "DMA SG len == 0\n");
+ return;
+ }
+
+
+ addr = (u32)sg_dma_address(host->sg);
+
+ alcor_write32(priv, addr, AU6601_REG_SDMA_ADDR);
+ host->sg = sg_next(host->sg);
+ host->sg_count--;
+}
+
+static void alcor_trigger_data_transfer(struct alcor_sdmmc_host *host,
+ bool early)
+{
+ struct alcor_pci_priv *priv = host->alcor_pci;
+ struct mmc_data *data = host->data;
+ u8 ctrl = 0;
+
+ if (data->flags & MMC_DATA_WRITE)
+ ctrl |= AU6601_DATA_WRITE;
+
+ if (data->host_cookie == COOKIE_MAPPED) {
+ if (host->early_data) {
+ host->early_data = false;
+ return;
+ }
+
+ host->early_data = early;
+
+ alcor_data_set_dma(host);
+ ctrl |= AU6601_DATA_DMA_MODE;
+ host->dma_on = 1;
+ alcor_write32(priv, data->sg_count * 0x1000,
+ AU6601_REG_BLOCK_SIZE);
+ } else {
+ alcor_write32(priv, data->blksz, AU6601_REG_BLOCK_SIZE);
+ }
+
+ alcor_write8(priv, ctrl | AU6601_DATA_START_XFER,
+ AU6601_DATA_XFER_CTRL);
+}
+
+static void alcor_trf_block_pio(struct alcor_sdmmc_host *host, bool read)
+{
+ struct alcor_pci_priv *priv = host->alcor_pci;
+ size_t blksize, len;
+ u8 *buf;
+
+ if (!host->blocks)
+ return;
+
+ if (host->dma_on) {
+ dev_err(host->dev, "configured DMA but got PIO request.\n");
+ return;
+ }
+
+ if (!!(host->data->flags & MMC_DATA_READ) != read) {
+ dev_err(host->dev, "got unexpected direction %i != %i\n",
+ !!(host->data->flags & MMC_DATA_READ), read);
+ }
+
+ if (!sg_miter_next(&host->sg_miter))
+ return;
+
+ blksize = host->data->blksz;
+ len = min(host->sg_miter.length, blksize);
+
+ dev_dbg(host->dev, "PIO, %s block size: 0x%zx\n",
+ read ? "read" : "write", blksize);
+
+ host->sg_miter.consumed = len;
+ host->blocks--;
+
+ buf = host->sg_miter.addr;
+
+ if (read)
+ ioread32_rep(priv->iobase + AU6601_REG_BUFFER, buf, len >> 2);
+ else
+ iowrite32_rep(priv->iobase + AU6601_REG_BUFFER, buf, len >> 2);
+
+ sg_miter_stop(&host->sg_miter);
+}
+
+static void alcor_prepare_sg_miter(struct alcor_sdmmc_host *host)
+{
+ unsigned int flags = SG_MITER_ATOMIC;
+ struct mmc_data *data = host->data;
+
+ if (data->flags & MMC_DATA_READ)
+ flags |= SG_MITER_TO_SG;
+ else
+ flags |= SG_MITER_FROM_SG;
+ sg_miter_start(&host->sg_miter, data->sg, data->sg_len, flags);
+}
+
+static void alcor_prepare_data(struct alcor_sdmmc_host *host,
+ struct mmc_command *cmd)
+{
+ struct mmc_data *data = cmd->data;
+
+ if (!data)
+ return;
+
+
+ host->data = data;
+ host->data->bytes_xfered = 0;
+ host->blocks = data->blocks;
+ host->sg = data->sg;
+ host->sg_count = data->sg_count;
+ dev_dbg(host->dev, "prepare DATA: sg %i, blocks: %i\n",
+ host->sg_count, host->blocks);
+
+ if (data->host_cookie != COOKIE_MAPPED)
+ alcor_prepare_sg_miter(host);
+
+ alcor_trigger_data_transfer(host, true);
+}
+
+static void alcor_send_cmd(struct alcor_sdmmc_host *host,
+ struct mmc_command *cmd, bool set_timeout)
+{
+ struct alcor_pci_priv *priv = host->alcor_pci;
+ unsigned long timeout = 0;
+ u8 ctrl = 0;
+
+ host->cmd = cmd;
+ alcor_prepare_data(host, cmd);
+
+ dev_dbg(host->dev, "send CMD. opcode: 0x%02x, arg; 0x%08x\n",
+ cmd->opcode, cmd->arg);
+ alcor_write8(priv, cmd->opcode | 0x40, AU6601_REG_CMD_OPCODE);
+ alcor_write32be(priv, cmd->arg, AU6601_REG_CMD_ARG);
+
+ switch (mmc_resp_type(cmd)) {
+ case MMC_RSP_NONE:
+ ctrl = AU6601_CMD_NO_RESP;
+ break;
+ case MMC_RSP_R1:
+ ctrl = AU6601_CMD_6_BYTE_CRC;
+ break;
+ case MMC_RSP_R1B:
+ ctrl = AU6601_CMD_6_BYTE_CRC | AU6601_CMD_STOP_WAIT_RDY;
+ break;
+ case MMC_RSP_R2:
+ ctrl = AU6601_CMD_17_BYTE_CRC;
+ break;
+ case MMC_RSP_R3:
+ ctrl = AU6601_CMD_6_BYTE_WO_CRC;
+ break;
+ default:
+ dev_err(host->dev, "%s: cmd->flag (0x%02x) is not valid\n",
+ mmc_hostname(host->mmc), mmc_resp_type(cmd));
+ break;
+ }
+
+ if (set_timeout) {
+ if (!cmd->data && cmd->busy_timeout)
+ timeout = cmd->busy_timeout;
+ else
+ timeout = 10000;
+
+ schedule_delayed_work(&host->timeout_work,
+ msecs_to_jiffies(timeout));
+ }
+
+ dev_dbg(host->dev, "xfer ctrl: 0x%02x; timeout: %lu\n", ctrl, timeout);
+ alcor_write8(priv, ctrl | AU6601_CMD_START_XFER,
+ AU6601_CMD_XFER_CTRL);
+}
+
+static void alcor_request_complete(struct alcor_sdmmc_host *host,
+ bool cancel_timeout)
+{
+ struct mmc_request *mrq;
+
+ /*
+ * If this work gets rescheduled while running, it will
+ * be run again afterwards but without any active request.
+ */
+ if (!host->mrq)
+ return;
+
+ if (cancel_timeout)
+ cancel_delayed_work(&host->timeout_work);
+
+ mrq = host->mrq;
+
+ host->mrq = NULL;
+ host->cmd = NULL;
+ host->data = NULL;
+ host->dma_on = 0;
+
+ mmc_request_done(host->mmc, mrq);
+}
+
+static void alcor_finish_data(struct alcor_sdmmc_host *host)
+{
+ struct mmc_data *data;
+
+ data = host->data;
+ host->data = NULL;
+ host->dma_on = 0;
+
+ /*
+ * The specification states that the block count register must
+ * be updated, but it does not specify at what point in the
+ * data flow. That makes the register entirely useless to read
+ * back so we have to assume that nothing made it to the card
+ * in the event of an error.
+ */
+ if (data->error)
+ data->bytes_xfered = 0;
+ else
+ data->bytes_xfered = data->blksz * data->blocks;
+
+ /*
+ * Need to send CMD12 if -
+ * a) open-ended multiblock transfer (no CMD23)
+ * b) error in multiblock transfer
+ */
+ if (data->stop &&
+ (data->error ||
+ !host->mrq->sbc)) {
+
+ /*
+ * The controller needs a reset of internal state machines
+ * upon error conditions.
+ */
+ if (data->error)
+ alcor_reset(host, AU6601_RESET_CMD | AU6601_RESET_DATA);
+
+ alcor_unmask_sd_irqs(host);
+ alcor_send_cmd(host, data->stop, false);
+ return;
+ }
+
+ alcor_request_complete(host, 1);
+}
+
+static void alcor_err_irq(struct alcor_sdmmc_host *host, u32 intmask)
+{
+ dev_dbg(host->dev, "ERR IRQ %x\n", intmask);
+
+ if (host->cmd) {
+ if (intmask & AU6601_INT_CMD_TIMEOUT_ERR)
+ host->cmd->error = -ETIMEDOUT;
+ else
+ host->cmd->error = -EILSEQ;
+ }
+
+ if (host->data) {
+ if (intmask & AU6601_INT_DATA_TIMEOUT_ERR)
+ host->data->error = -ETIMEDOUT;
+ else
+ host->data->error = -EILSEQ;
+
+ host->data->bytes_xfered = 0;
+ }
+
+ alcor_reset(host, AU6601_RESET_CMD | AU6601_RESET_DATA);
+ alcor_request_complete(host, 1);
+}
+
+static int alcor_cmd_irq_done(struct alcor_sdmmc_host *host, u32 intmask)
+{
+ struct alcor_pci_priv *priv = host->alcor_pci;
+
+ intmask &= AU6601_INT_CMD_END;
+
+ if (!intmask)
+ return true;
+
+ /* got CMD_END but no CMD is in progress, wake thread an process the
+ * error
+ */
+ if (!host->cmd)
+ return false;
+
+ if (host->cmd->flags & MMC_RSP_PRESENT) {
+ struct mmc_command *cmd = host->cmd;
+
+ cmd->resp[0] = alcor_read32be(priv, AU6601_REG_CMD_RSP0);
+ dev_dbg(host->dev, "RSP0: 0x%04x\n", cmd->resp[0]);
+ if (host->cmd->flags & MMC_RSP_136) {
+ cmd->resp[1] =
+ alcor_read32be(priv, AU6601_REG_CMD_RSP1);
+ cmd->resp[2] =
+ alcor_read32be(priv, AU6601_REG_CMD_RSP2);
+ cmd->resp[3] =
+ alcor_read32be(priv, AU6601_REG_CMD_RSP3);
+ dev_dbg(host->dev, "RSP1,2,3: 0x%04x 0x%04x 0x%04x\n",
+ cmd->resp[1], cmd->resp[2], cmd->resp[3]);
+ }
+
+ }
+
+ host->cmd->error = 0;
+
+ /* Processed actual command. */
+ if (!host->data)
+ return false;
+
+ alcor_trigger_data_transfer(host, false);
+ host->cmd = NULL;
+ return true;
+}
+
+static void alcor_cmd_irq_thread(struct alcor_sdmmc_host *host, u32 intmask)
+{
+ intmask &= AU6601_INT_CMD_END;
+
+ if (!intmask)
+ return;
+
+ if (!host->cmd && intmask & AU6601_INT_CMD_END) {
+ dev_dbg(host->dev, "Got command interrupt 0x%08x even though no command operation was in progress.\n",
+ intmask);
+ }
+
+ /* Processed actual command. */
+ if (!host->data)
+ alcor_request_complete(host, 1);
+ else
+ alcor_trigger_data_transfer(host, false);
+ host->cmd = NULL;
+}
+
+static int alcor_data_irq_done(struct alcor_sdmmc_host *host, u32 intmask)
+{
+ u32 tmp;
+
+ intmask &= AU6601_INT_DATA_MASK;
+
+ /* nothing here to do */
+ if (!intmask)
+ return 1;
+
+ /* we was too fast and got DATA_END after it was processed?
+ * lets ignore it for now.
+ */
+ if (!host->data && intmask == AU6601_INT_DATA_END)
+ return 1;
+
+ /* looks like an error, so lets handle it. */
+ if (!host->data)
+ return 0;
+
+ tmp = intmask & (AU6601_INT_READ_BUF_RDY | AU6601_INT_WRITE_BUF_RDY
+ | AU6601_INT_DMA_END);
+ switch (tmp) {
+ case 0:
+ break;
+ case AU6601_INT_READ_BUF_RDY:
+ alcor_trf_block_pio(host, true);
+ if (!host->blocks)
+ break;
+ alcor_trigger_data_transfer(host, false);
+ return 1;
+ case AU6601_INT_WRITE_BUF_RDY:
+ alcor_trf_block_pio(host, false);
+ if (!host->blocks)
+ break;
+ alcor_trigger_data_transfer(host, false);
+ return 1;
+ case AU6601_INT_DMA_END:
+ if (!host->sg_count)
+ break;
+
+ alcor_data_set_dma(host);
+ break;
+ default:
+ dev_err(host->dev, "Got READ_BUF_RDY and WRITE_BUF_RDY at same time\n");
+ break;
+ }
+
+ if (intmask & AU6601_INT_DATA_END)
+ return 0;
+
+ return 1;
+}
+
+static void alcor_data_irq_thread(struct alcor_sdmmc_host *host, u32 intmask)
+{
+ intmask &= AU6601_INT_DATA_MASK;
+
+ if (!intmask)
+ return;
+
+ if (!host->data) {
+ dev_dbg(host->dev, "Got data interrupt 0x%08x even though no data operation was in progress.\n",
+ intmask);
+ alcor_reset(host, AU6601_RESET_DATA);
+ return;
+ }
+
+ if (alcor_data_irq_done(host, intmask))
+ return;
+
+ if ((intmask & AU6601_INT_DATA_END) || !host->blocks ||
+ (host->dma_on && !host->sg_count))
+ alcor_finish_data(host);
+}
+
+static void alcor_cd_irq(struct alcor_sdmmc_host *host, u32 intmask)
+{
+ dev_dbg(host->dev, "card %s\n",
+ intmask & AU6601_INT_CARD_REMOVE ? "removed" : "inserted");
+
+ if (host->mrq) {
+ dev_dbg(host->dev, "cancel all pending tasks.\n");
+
+ if (host->data)
+ host->data->error = -ENOMEDIUM;
+
+ if (host->cmd)
+ host->cmd->error = -ENOMEDIUM;
+ else
+ host->mrq->cmd->error = -ENOMEDIUM;
+
+ alcor_request_complete(host, 1);
+ }
+
+ mmc_detect_change(host->mmc, msecs_to_jiffies(1));
+}
+
+static irqreturn_t alcor_irq_thread(int irq, void *d)
+{
+ struct alcor_sdmmc_host *host = d;
+ irqreturn_t ret = IRQ_HANDLED;
+ u32 intmask, tmp;
+
+ mutex_lock(&host->cmd_mutex);
+
+ intmask = host->irq_status_sd;
+
+ /* some thing bad */
+ if (unlikely(!intmask || AU6601_INT_ALL_MASK == intmask)) {
+ dev_dbg(host->dev, "unexpected IRQ: 0x%04x\n", intmask);
+ ret = IRQ_NONE;
+ goto exit;
+ }
+
+ tmp = intmask & (AU6601_INT_CMD_MASK | AU6601_INT_DATA_MASK);
+ if (tmp) {
+ if (tmp & AU6601_INT_ERROR_MASK)
+ alcor_err_irq(host, tmp);
+ else {
+ alcor_cmd_irq_thread(host, tmp);
+ alcor_data_irq_thread(host, tmp);
+ }
+ intmask &= ~(AU6601_INT_CMD_MASK | AU6601_INT_DATA_MASK);
+ }
+
+ if (intmask & (AU6601_INT_CARD_INSERT | AU6601_INT_CARD_REMOVE)) {
+ alcor_cd_irq(host, intmask);
+ intmask &= ~(AU6601_INT_CARD_INSERT | AU6601_INT_CARD_REMOVE);
+ }
+
+ if (intmask & AU6601_INT_OVER_CURRENT_ERR) {
+ dev_warn(host->dev,
+ "warning: over current detected!\n");
+ intmask &= ~AU6601_INT_OVER_CURRENT_ERR;
+ }
+
+ if (intmask)
+ dev_dbg(host->dev, "got not handled IRQ: 0x%04x\n", intmask);
+
+exit:
+ mutex_unlock(&host->cmd_mutex);
+ alcor_unmask_sd_irqs(host);
+ return ret;
+}
+
+
+static irqreturn_t alcor_irq(int irq, void *d)
+{
+ struct alcor_sdmmc_host *host = d;
+ struct alcor_pci_priv *priv = host->alcor_pci;
+ u32 status, tmp;
+ irqreturn_t ret;
+ int cmd_done, data_done;
+
+ status = alcor_read32(priv, AU6601_REG_INT_STATUS);
+ if (!status)
+ return IRQ_NONE;
+
+ alcor_write32(priv, status, AU6601_REG_INT_STATUS);
+
+ tmp = status & (AU6601_INT_READ_BUF_RDY | AU6601_INT_WRITE_BUF_RDY
+ | AU6601_INT_DATA_END | AU6601_INT_DMA_END
+ | AU6601_INT_CMD_END);
+ if (tmp == status) {
+ cmd_done = alcor_cmd_irq_done(host, tmp);
+ data_done = alcor_data_irq_done(host, tmp);
+ /* use fast path for simple tasks */
+ if (cmd_done && data_done) {
+ ret = IRQ_HANDLED;
+ goto alcor_irq_done;
+ }
+ }
+
+ host->irq_status_sd = status;
+ ret = IRQ_WAKE_THREAD;
+ alcor_mask_sd_irqs(host);
+alcor_irq_done:
+ return ret;
+}
+
+static void alcor_set_clock(struct alcor_sdmmc_host *host, unsigned int clock)
+{
+ struct alcor_pci_priv *priv = host->alcor_pci;
+ int i, diff = 0x7fffffff, tmp_clock = 0;
+ u16 clk_src = 0;
+ u8 clk_div = 0;
+
+ if (clock == 0) {
+ alcor_write16(priv, 0, AU6601_CLK_SELECT);
+ return;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(alcor_pll_cfg); i++) {
+ unsigned int tmp_div, tmp_diff;
+ const struct alcor_pll_conf *cfg = &alcor_pll_cfg[i];
+
+ tmp_div = DIV_ROUND_UP(cfg->clk_src_freq, clock);
+ if (cfg->min_div > tmp_div || tmp_div > cfg->max_div)
+ continue;
+
+ tmp_clock = DIV_ROUND_UP(cfg->clk_src_freq, tmp_div);
+ tmp_diff = abs(clock - tmp_clock);
+
+ if (tmp_diff >= 0 && tmp_diff < diff) {
+ diff = tmp_diff;
+ clk_src = cfg->clk_src_reg;
+ clk_div = tmp_div;
+ }
+ }
+
+ clk_src |= ((clk_div - 1) << 8);
+ clk_src |= AU6601_CLK_ENABLE;
+
+ dev_dbg(host->dev, "set freq %d cal freq %d, use div %d, mod %x\n",
+ clock, tmp_clock, clk_div, clk_src);
+
+ alcor_write16(priv, clk_src, AU6601_CLK_SELECT);
+
+}
+
+static void alcor_set_timing(struct mmc_host *mmc, struct mmc_ios *ios)
+{
+ struct alcor_sdmmc_host *host = mmc_priv(mmc);
+
+ if (ios->timing == MMC_TIMING_LEGACY) {
+ alcor_rmw8(host, AU6601_CLK_DELAY,
+ AU6601_CLK_POSITIVE_EDGE_ALL, 0);
+ } else {
+ alcor_rmw8(host, AU6601_CLK_DELAY,
+ 0, AU6601_CLK_POSITIVE_EDGE_ALL);
+ }
+}
+
+static void alcor_set_bus_width(struct mmc_host *mmc, struct mmc_ios *ios)
+{
+ struct alcor_sdmmc_host *host = mmc_priv(mmc);
+ struct alcor_pci_priv *priv = host->alcor_pci;
+
+ if (ios->bus_width == MMC_BUS_WIDTH_1) {
+ alcor_write8(priv, 0, AU6601_REG_BUS_CTRL);
+ } else if (ios->bus_width == MMC_BUS_WIDTH_4) {
+ alcor_write8(priv, AU6601_BUS_WIDTH_4BIT,
+ AU6601_REG_BUS_CTRL);
+ } else
+ dev_err(host->dev, "Unknown BUS mode\n");
+
+}
+
+static int alcor_card_busy(struct mmc_host *mmc)
+{
+ struct alcor_sdmmc_host *host = mmc_priv(mmc);
+ struct alcor_pci_priv *priv = host->alcor_pci;
+ u8 status;
+
+ /* Check whether dat[0:3] low */
+ status = alcor_read8(priv, AU6601_DATA_PIN_STATE);
+
+ return !(status & AU6601_BUS_STAT_DAT_MASK);
+}
+
+static int alcor_get_cd(struct mmc_host *mmc)
+{
+ struct alcor_sdmmc_host *host = mmc_priv(mmc);
+ struct alcor_pci_priv *priv = host->alcor_pci;
+ u8 detect;
+
+ detect = alcor_read8(priv, AU6601_DETECT_STATUS)
+ & AU6601_DETECT_STATUS_M;
+ /* check if card is present then send command and data */
+ return (detect == AU6601_SD_DETECTED);
+}
+
+static int alcor_get_ro(struct mmc_host *mmc)
+{
+ struct alcor_sdmmc_host *host = mmc_priv(mmc);
+ struct alcor_pci_priv *priv = host->alcor_pci;
+ u8 status;
+
+ /* get write protect pin status */
+ status = alcor_read8(priv, AU6601_INTERFACE_MODE_CTRL);
+
+ return !!(status & AU6601_SD_CARD_WP);
+}
+
+static void alcor_request(struct mmc_host *mmc, struct mmc_request *mrq)
+{
+ struct alcor_sdmmc_host *host = mmc_priv(mmc);
+
+ mutex_lock(&host->cmd_mutex);
+
+ host->mrq = mrq;
+
+ /* check if card is present then send command and data */
+ if (alcor_get_cd(mmc))
+ alcor_send_cmd(host, mrq->cmd, true);
+ else {
+ mrq->cmd->error = -ENOMEDIUM;
+ alcor_request_complete(host, 1);
+ }
+
+ mutex_unlock(&host->cmd_mutex);
+}
+
+static void alcor_pre_req(struct mmc_host *mmc,
+ struct mmc_request *mrq)
+{
+ struct alcor_sdmmc_host *host = mmc_priv(mmc);
+ struct mmc_data *data = mrq->data;
+ struct mmc_command *cmd = mrq->cmd;
+ struct scatterlist *sg;
+ unsigned int i, sg_len;
+
+ if (!data || !cmd)
+ return;
+
+ data->host_cookie = COOKIE_UNMAPPED;
+
+ /* FIXME: looks like the DMA engine works only with CMD18 */
+ if (cmd->opcode != 18)
+ return;
+ /*
+ * We don't do DMA on "complex" transfers, i.e. with
+ * non-word-aligned buffers or lengths. Also, we don't bother
+ * with all the DMA setup overhead for short transfers.
+ */
+ if (data->blocks * data->blksz < AU6601_MAX_DMA_BLOCK_SIZE)
+ return;
+
+ if (data->blksz & 3)
+ return;
+
+ for_each_sg(data->sg, sg, data->sg_len, i) {
+ if (sg->length != AU6601_MAX_DMA_BLOCK_SIZE)
+ return;
+ }
+
+ /* This data might be unmapped at this time */
+
+ sg_len = dma_map_sg(host->dev, data->sg, data->sg_len,
+ mmc_get_dma_dir(data));
+ if (sg_len)
+ data->host_cookie = COOKIE_MAPPED;
+
+ data->sg_count = sg_len;
+}
+
+static void alcor_post_req(struct mmc_host *mmc,
+ struct mmc_request *mrq,
+ int err)
+{
+ struct alcor_sdmmc_host *host = mmc_priv(mmc);
+ struct mmc_data *data = mrq->data;
+
+ if (!data)
+ return;
+
+ if (data->host_cookie == COOKIE_MAPPED) {
+ dma_unmap_sg(host->dev,
+ data->sg,
+ data->sg_len,
+ mmc_get_dma_dir(data));
+ }
+
+ data->host_cookie = COOKIE_UNMAPPED;
+}
+
+static void alcor_set_power_mode(struct mmc_host *mmc, struct mmc_ios *ios)
+{
+ struct alcor_sdmmc_host *host = mmc_priv(mmc);
+ struct alcor_pci_priv *priv = host->alcor_pci;
+
+ switch (ios->power_mode) {
+ case MMC_POWER_OFF:
+ alcor_set_clock(host, ios->clock);
+ /* set all pins to input */
+ alcor_write8(priv, 0, AU6601_OUTPUT_ENABLE);
+ /* turn of VDD */
+ alcor_write8(priv, 0, AU6601_POWER_CONTROL);
+ break;
+ case MMC_POWER_UP:
+ break;
+ case MMC_POWER_ON:
+ /* This is most trickiest part. The order and timings of
+ * instructions seems to play important role. Any changes may
+ * confuse internal state engine if this HW.
+ * FIXME: If we will ever get access to documentation, then this
+ * part should be reviewed again.
+ */
+
+ /* enable SD card mode */
+ alcor_write8(priv, AU6601_SD_CARD,
+ AU6601_ACTIVE_CTRL);
+ /* set signal voltage to 3.3V */
+ alcor_write8(priv, 0, AU6601_OPT);
+ /* no documentation about clk delay, for now just try to mimic
+ * original driver.
+ */
+ alcor_write8(priv, 0x20, AU6601_CLK_DELAY);
+ /* set BUS width to 1 bit */
+ alcor_write8(priv, 0, AU6601_REG_BUS_CTRL);
+ /* set CLK first time */
+ alcor_set_clock(host, ios->clock);
+ /* power on VDD */
+ alcor_write8(priv, AU6601_SD_CARD,
+ AU6601_POWER_CONTROL);
+ /* wait until the CLK will get stable */
+ mdelay(20);
+ /* set CLK again, mimic original driver. */
+ alcor_set_clock(host, ios->clock);
+
+ /* enable output */
+ alcor_write8(priv, AU6601_SD_CARD,
+ AU6601_OUTPUT_ENABLE);
+ /* The clk will not work on au6621. We need to trigger data
+ * transfer.
+ */
+ alcor_write8(priv, AU6601_DATA_WRITE,
+ AU6601_DATA_XFER_CTRL);
+ /* configure timeout. Not clear what exactly it means. */
+ alcor_write8(priv, 0x7d, AU6601_TIME_OUT_CTRL);
+ mdelay(100);
+ break;
+ default:
+ dev_err(host->dev, "Unknown power parameter\n");
+ }
+}
+
+static void alcor_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
+{
+ struct alcor_sdmmc_host *host = mmc_priv(mmc);
+
+ mutex_lock(&host->cmd_mutex);
+
+ dev_dbg(host->dev, "set ios. bus width: %x, power mode: %x\n",
+ ios->bus_width, ios->power_mode);
+
+ if (ios->power_mode != host->cur_power_mode) {
+ alcor_set_power_mode(mmc, ios);
+ host->cur_power_mode = ios->power_mode;
+ } else {
+ alcor_set_timing(mmc, ios);
+ alcor_set_bus_width(mmc, ios);
+ alcor_set_clock(host, ios->clock);
+ }
+
+ mutex_unlock(&host->cmd_mutex);
+}
+
+static int alcor_signal_voltage_switch(struct mmc_host *mmc,
+ struct mmc_ios *ios)
+{
+ struct alcor_sdmmc_host *host = mmc_priv(mmc);
+
+ mutex_lock(&host->cmd_mutex);
+
+ switch (ios->signal_voltage) {
+ case MMC_SIGNAL_VOLTAGE_330:
+ alcor_rmw8(host, AU6601_OPT, AU6601_OPT_SD_18V, 0);
+ break;
+ case MMC_SIGNAL_VOLTAGE_180:
+ alcor_rmw8(host, AU6601_OPT, 0, AU6601_OPT_SD_18V);
+ break;
+ default:
+ /* No signal voltage switch required */
+ break;
+ }
+
+ mutex_unlock(&host->cmd_mutex);
+ return 0;
+}
+
+static const struct mmc_host_ops alcor_sdc_ops = {
+ .card_busy = alcor_card_busy,
+ .get_cd = alcor_get_cd,
+ .get_ro = alcor_get_ro,
+ .post_req = alcor_post_req,
+ .pre_req = alcor_pre_req,
+ .request = alcor_request,
+ .set_ios = alcor_set_ios,
+ .start_signal_voltage_switch = alcor_signal_voltage_switch,
+};
+
+static void alcor_timeout_timer(struct work_struct *work)
+{
+ struct delayed_work *d = to_delayed_work(work);
+ struct alcor_sdmmc_host *host = container_of(d, struct alcor_sdmmc_host,
+ timeout_work);
+ mutex_lock(&host->cmd_mutex);
+
+ dev_dbg(host->dev, "triggered timeout\n");
+ if (host->mrq) {
+ dev_err(host->dev, "Timeout waiting for hardware interrupt.\n");
+
+ if (host->data) {
+ host->data->error = -ETIMEDOUT;
+ } else {
+ if (host->cmd)
+ host->cmd->error = -ETIMEDOUT;
+ else
+ host->mrq->cmd->error = -ETIMEDOUT;
+ }
+
+ alcor_reset(host, AU6601_RESET_CMD | AU6601_RESET_DATA);
+ alcor_request_complete(host, 0);
+ }
+
+ mmiowb();
+ mutex_unlock(&host->cmd_mutex);
+}
+
+static void alcor_hw_init(struct alcor_sdmmc_host *host)
+{
+ struct alcor_pci_priv *priv = host->alcor_pci;
+ struct alcor_dev_cfg *cfg = priv->cfg;
+
+ /* FIXME: This part is a mimics HW init of original driver.
+ * If we will ever get access to documentation, then this part
+ * should be reviewed again.
+ */
+
+ /* reset command state engine */
+ alcor_reset(host, AU6601_RESET_CMD);
+
+ alcor_write8(priv, 0, AU6601_DMA_BOUNDARY);
+ /* enable sd card mode */
+ alcor_write8(priv, AU6601_SD_CARD, AU6601_ACTIVE_CTRL);
+
+ /* set BUS width to 1 bit */
+ alcor_write8(priv, 0, AU6601_REG_BUS_CTRL);
+
+ /* reset data state engine */
+ alcor_reset(host, AU6601_RESET_DATA);
+ /* Not sure if a voodoo with AU6601_DMA_BOUNDARY is really needed */
+ alcor_write8(priv, 0, AU6601_DMA_BOUNDARY);
+
+ alcor_write8(priv, 0, AU6601_INTERFACE_MODE_CTRL);
+ /* not clear what we are doing here. */
+ alcor_write8(priv, 0x44, AU6601_PAD_DRIVE0);
+ alcor_write8(priv, 0x44, AU6601_PAD_DRIVE1);
+ alcor_write8(priv, 0x00, AU6601_PAD_DRIVE2);
+
+ /* for 6601 - dma_boundary; for 6621 - dma_page_cnt
+ * exact meaning of this register is not clear.
+ */
+ alcor_write8(priv, cfg->dma, AU6601_DMA_BOUNDARY);
+
+ /* make sure all pins are set to input and VDD is off */
+ alcor_write8(priv, 0, AU6601_OUTPUT_ENABLE);
+ alcor_write8(priv, 0, AU6601_POWER_CONTROL);
+
+ alcor_write8(priv, AU6601_DETECT_EN, AU6601_DETECT_STATUS);
+ /* now we should be safe to enable IRQs */
+ alcor_unmask_sd_irqs(host);
+}
+
+static void alcor_hw_uninit(struct alcor_sdmmc_host *host)
+{
+ struct alcor_pci_priv *priv = host->alcor_pci;
+
+ alcor_mask_sd_irqs(host);
+ alcor_reset(host, AU6601_RESET_CMD | AU6601_RESET_DATA);
+
+ alcor_write8(priv, 0, AU6601_DETECT_STATUS);
+
+ alcor_write8(priv, 0, AU6601_OUTPUT_ENABLE);
+ alcor_write8(priv, 0, AU6601_POWER_CONTROL);
+
+ alcor_write8(priv, 0, AU6601_OPT);
+}
+
+static void alcor_init_mmc(struct alcor_sdmmc_host *host)
+{
+ struct mmc_host *mmc = host->mmc;
+
+ mmc->f_min = AU6601_MIN_CLOCK;
+ mmc->f_max = AU6601_MAX_CLOCK;
+ mmc->ocr_avail = MMC_VDD_33_34;
+ mmc->caps = MMC_CAP_4_BIT_DATA | MMC_CAP_SD_HIGHSPEED
+ | MMC_CAP_UHS_SDR12 | MMC_CAP_UHS_SDR25 | MMC_CAP_UHS_SDR50
+ | MMC_CAP_UHS_SDR104 | MMC_CAP_UHS_DDR50;
+ mmc->caps2 = MMC_CAP2_NO_SDIO;
+ mmc->ops = &alcor_sdc_ops;
+
+ /* Hardware cannot do scatter lists */
+ mmc->max_segs = AU6601_MAX_DMA_SEGMENTS;
+ mmc->max_seg_size = AU6601_MAX_DMA_BLOCK_SIZE;
+
+ mmc->max_blk_size = mmc->max_seg_size;
+ mmc->max_blk_count = mmc->max_segs;
+
+ mmc->max_req_size = mmc->max_seg_size * mmc->max_segs;
+}
+
+static int alcor_pci_sdmmc_drv_probe(struct platform_device *pdev)
+{
+ struct alcor_pci_priv *priv = pdev->dev.platform_data;
+ struct mmc_host *mmc;
+ struct alcor_sdmmc_host *host;
+ int ret;
+
+ mmc = mmc_alloc_host(sizeof(*host), &pdev->dev);
+ if (!mmc) {
+ dev_err(&pdev->dev, "Can't allocate MMC\n");
+ return -ENOMEM;
+ }
+
+ host = mmc_priv(mmc);
+ host->mmc = mmc;
+ host->dev = &pdev->dev;
+ host->cur_power_mode = MMC_POWER_UNDEFINED;
+ host->alcor_pci = priv;
+
+ /* make sure irqs are disabled */
+ alcor_write32(priv, 0, AU6601_REG_INT_ENABLE);
+ alcor_write32(priv, 0, AU6601_MS_INT_ENABLE);
+
+ ret = devm_request_threaded_irq(&pdev->dev, priv->irq,
+ alcor_irq, alcor_irq_thread, IRQF_SHARED,
+ DRV_NAME_ALCOR_PCI_SDMMC, host);
+
+ if (ret) {
+ dev_err(&pdev->dev, "Failed to get irq for data line\n");
+ return ret;
+ }
+
+ mutex_init(&host->cmd_mutex);
+ INIT_DELAYED_WORK(&host->timeout_work, alcor_timeout_timer);
+
+ alcor_init_mmc(host);
+ alcor_hw_init(host);
+
+ dev_set_drvdata(&pdev->dev, host);
+ mmc_add_host(mmc);
+ return 0;
+}
+
+static int alcor_pci_sdmmc_drv_remove(struct platform_device *pdev)
+{
+ struct alcor_sdmmc_host *host = dev_get_drvdata(&pdev->dev);
+
+ if (cancel_delayed_work_sync(&host->timeout_work))
+ alcor_request_complete(host, 0);
+
+ alcor_hw_uninit(host);
+ mmc_remove_host(host->mmc);
+ mmc_free_host(host->mmc);
+
+ return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int alcor_pci_sdmmc_suspend(struct device *dev)
+{
+ struct alcor_sdmmc_host *host = dev_get_drvdata(dev);
+
+ if (cancel_delayed_work_sync(&host->timeout_work))
+ alcor_request_complete(host, 0);
+
+ alcor_hw_uninit(host);
+
+ return 0;
+}
+
+static int alcor_pci_sdmmc_resume(struct device *dev)
+{
+ struct alcor_sdmmc_host *host = dev_get_drvdata(dev);
+
+ alcor_hw_init(host);
+
+ return 0;
+}
+#endif /* CONFIG_PM_SLEEP */
+
+static SIMPLE_DEV_PM_OPS(alcor_mmc_pm_ops, alcor_pci_sdmmc_suspend,
+ alcor_pci_sdmmc_resume);
+
+static const struct platform_device_id alcor_pci_sdmmc_ids[] = {
+ {
+ .name = DRV_NAME_ALCOR_PCI_SDMMC,
+ }, {
+ /* sentinel */
+ }
+};
+MODULE_DEVICE_TABLE(platform, alcor_pci_sdmmc_ids);
+
+static struct platform_driver alcor_pci_sdmmc_driver = {
+ .probe = alcor_pci_sdmmc_drv_probe,
+ .remove = alcor_pci_sdmmc_drv_remove,
+ .id_table = alcor_pci_sdmmc_ids,
+ .driver = {
+ .name = DRV_NAME_ALCOR_PCI_SDMMC,
+ .pm = &alcor_mmc_pm_ops
+ },
+};
+module_platform_driver(alcor_pci_sdmmc_driver);
+
+MODULE_AUTHOR("Oleksij Rempel <linux@rempel-privat.de>");
+MODULE_DESCRIPTION("PCI driver for Alcor Micro AU6601 Secure Digital Host Controller Interface");
+MODULE_LICENSE("GPL");
diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c
index be53044086c7..47189f9ed4e2 100644
--- a/drivers/mmc/host/atmel-mci.c
+++ b/drivers/mmc/host/atmel-mci.c
@@ -446,18 +446,7 @@ static int atmci_req_show(struct seq_file *s, void *v)
return 0;
}
-static int atmci_req_open(struct inode *inode, struct file *file)
-{
- return single_open(file, atmci_req_show, inode->i_private);
-}
-
-static const struct file_operations atmci_req_fops = {
- .owner = THIS_MODULE,
- .open = atmci_req_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(atmci_req);
static void atmci_show_status_reg(struct seq_file *s,
const char *regname, u32 value)
@@ -583,18 +572,7 @@ static int atmci_regs_show(struct seq_file *s, void *v)
return ret;
}
-static int atmci_regs_open(struct inode *inode, struct file *file)
-{
- return single_open(file, atmci_regs_show, inode->i_private);
-}
-
-static const struct file_operations atmci_regs_fops = {
- .owner = THIS_MODULE,
- .open = atmci_regs_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(atmci_regs);
static void atmci_init_debugfs(struct atmel_mci_slot *slot)
{
@@ -608,13 +586,14 @@ static void atmci_init_debugfs(struct atmel_mci_slot *slot)
return;
node = debugfs_create_file("regs", S_IRUSR, root, host,
- &atmci_regs_fops);
+ &atmci_regs_fops);
if (IS_ERR(node))
return;
if (!node)
goto err;
- node = debugfs_create_file("req", S_IRUSR, root, slot, &atmci_req_fops);
+ node = debugfs_create_file("req", S_IRUSR, root, slot,
+ &atmci_req_fops);
if (!node)
goto err;
@@ -1954,13 +1933,14 @@ static void atmci_tasklet_func(unsigned long priv)
}
atmci_request_end(host, host->mrq);
- state = STATE_IDLE;
+ goto unlock; /* atmci_request_end() sets host->state */
break;
}
} while (state != prev_state);
host->state = state;
+unlock:
spin_unlock(&host->lock);
}
diff --git a/drivers/mmc/host/bcm2835.c b/drivers/mmc/host/bcm2835.c
index 768972af8b85..50293529d6de 100644
--- a/drivers/mmc/host/bcm2835.c
+++ b/drivers/mmc/host/bcm2835.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* bcm2835 sdhost driver.
*
@@ -25,18 +26,6 @@
* sdhci-bcm2708.c by Broadcom
* sdhci-bcm2835.c by Stephen Warren and Oleksandr Tymoshenko
* sdhci.c and sdhci-pci.c by Pierre Ossman
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/clk.h>
#include <linux/delay.h>
@@ -286,6 +275,7 @@ static void bcm2835_reset(struct mmc_host *mmc)
if (host->dma_chan)
dmaengine_terminate_sync(host->dma_chan);
+ host->dma_chan = NULL;
bcm2835_reset_internal(host);
}
@@ -463,7 +453,7 @@ static void bcm2835_transfer_pio(struct bcm2835_host *host)
static
void bcm2835_prepare_dma(struct bcm2835_host *host, struct mmc_data *data)
{
- int len, dir_data, dir_slave;
+ int sg_len, dir_data, dir_slave;
struct dma_async_tx_descriptor *desc = NULL;
struct dma_chan *dma_chan;
@@ -509,23 +499,24 @@ void bcm2835_prepare_dma(struct bcm2835_host *host, struct mmc_data *data)
&host->dma_cfg_rx :
&host->dma_cfg_tx);
- len = dma_map_sg(dma_chan->device->dev, data->sg, data->sg_len,
- dir_data);
+ sg_len = dma_map_sg(dma_chan->device->dev, data->sg, data->sg_len,
+ dir_data);
+ if (!sg_len)
+ return;
- if (len > 0) {
- desc = dmaengine_prep_slave_sg(dma_chan, data->sg,
- len, dir_slave,
- DMA_PREP_INTERRUPT |
- DMA_CTRL_ACK);
- }
+ desc = dmaengine_prep_slave_sg(dma_chan, data->sg, sg_len, dir_slave,
+ DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
- if (desc) {
- desc->callback = bcm2835_dma_complete;
- desc->callback_param = host;
- host->dma_desc = desc;
- host->dma_chan = dma_chan;
- host->dma_dir = dir_data;
+ if (!desc) {
+ dma_unmap_sg(dma_chan->device->dev, data->sg, sg_len, dir_data);
+ return;
}
+
+ desc->callback = bcm2835_dma_complete;
+ desc->callback_param = host;
+ host->dma_desc = desc;
+ host->dma_chan = dma_chan;
+ host->dma_dir = dir_data;
}
static void bcm2835_start_dma(struct bcm2835_host *host)
@@ -607,7 +598,7 @@ static void bcm2835_finish_request(struct bcm2835_host *host)
struct dma_chan *terminate_chan = NULL;
struct mmc_request *mrq;
- cancel_delayed_work(&host->timeout_work);
+ cancel_delayed_work_sync(&host->timeout_work);
mrq = host->mrq;
@@ -772,6 +763,8 @@ static void bcm2835_finish_command(struct bcm2835_host *host)
if (!(sdhsts & SDHSTS_CRC7_ERROR) ||
(host->cmd->opcode != MMC_SEND_OP_COND)) {
+ u32 edm, fsm;
+
if (sdhsts & SDHSTS_CMD_TIME_OUT) {
host->cmd->error = -ETIMEDOUT;
} else {
@@ -780,6 +773,13 @@ static void bcm2835_finish_command(struct bcm2835_host *host)
bcm2835_dumpregs(host);
host->cmd->error = -EILSEQ;
}
+ edm = readl(host->ioaddr + SDEDM);
+ fsm = edm & SDEDM_FSM_MASK;
+ if (fsm == SDEDM_FSM_READWAIT ||
+ fsm == SDEDM_FSM_WRITESTART1)
+ /* Kick the FSM out of its wait */
+ writel(edm | SDEDM_FORCE_DATA_MODE,
+ host->ioaddr + SDEDM);
bcm2835_finish_request(host);
return;
}
@@ -837,6 +837,8 @@ static void bcm2835_timeout(struct work_struct *work)
dev_err(dev, "timeout waiting for hardware interrupt.\n");
bcm2835_dumpregs(host);
+ bcm2835_reset(host->mmc);
+
if (host->data) {
host->data->error = -ETIMEDOUT;
bcm2835_finish_data(host);
@@ -1052,10 +1054,12 @@ static void bcm2835_dma_complete_work(struct work_struct *work)
{
struct bcm2835_host *host =
container_of(work, struct bcm2835_host, dma_work);
- struct mmc_data *data = host->data;
+ struct mmc_data *data;
mutex_lock(&host->mutex);
+ data = host->data;
+
if (host->dma_chan) {
dma_unmap_sg(host->dma_chan->device->dev,
data->sg, data->sg_len,
@@ -1180,9 +1184,6 @@ static void bcm2835_request(struct mmc_host *mmc, struct mmc_request *mrq)
return;
}
- if (host->use_dma && mrq->data && (mrq->data->blocks > PIO_THRESHOLD))
- bcm2835_prepare_dma(host, mrq->data);
-
mutex_lock(&host->mutex);
WARN_ON(host->mrq);
@@ -1206,6 +1207,9 @@ static void bcm2835_request(struct mmc_host *mmc, struct mmc_request *mrq)
return;
}
+ if (host->use_dma && mrq->data && (mrq->data->blocks > PIO_THRESHOLD))
+ bcm2835_prepare_dma(host, mrq->data);
+
host->use_sbc = !!mrq->sbc && host->mrq->data &&
(host->mrq->data->flags & MMC_DATA_READ);
if (host->use_sbc) {
@@ -1445,6 +1449,9 @@ static int bcm2835_remove(struct platform_device *pdev)
cancel_work_sync(&host->dma_work);
cancel_delayed_work_sync(&host->timeout_work);
+ if (host->dma_chan_rxtx)
+ dma_release_channel(host->dma_chan_rxtx);
+
mmc_free_host(host->mmc);
platform_set_drvdata(pdev, NULL);
diff --git a/drivers/mmc/host/dw_mmc-bluefield.c b/drivers/mmc/host/dw_mmc-bluefield.c
index 54c3fbb4a391..ed8f2254b66a 100644
--- a/drivers/mmc/host/dw_mmc-bluefield.c
+++ b/drivers/mmc/host/dw_mmc-bluefield.c
@@ -52,16 +52,7 @@ MODULE_DEVICE_TABLE(of, dw_mci_bluefield_match);
static int dw_mci_bluefield_probe(struct platform_device *pdev)
{
- const struct dw_mci_drv_data *drv_data = NULL;
- const struct of_device_id *match;
-
- if (pdev->dev.of_node) {
- match = of_match_node(dw_mci_bluefield_match,
- pdev->dev.of_node);
- drv_data = match->data;
- }
-
- return dw_mci_pltfm_register(pdev, drv_data);
+ return dw_mci_pltfm_register(pdev, &bluefield_drv_data);
}
static struct platform_driver dw_mci_bluefield_pltfm_driver = {
diff --git a/drivers/mmc/host/jz4740_mmc.c b/drivers/mmc/host/jz4740_mmc.c
index 0c1efd5100b7..33215d66afa2 100644
--- a/drivers/mmc/host/jz4740_mmc.c
+++ b/drivers/mmc/host/jz4740_mmc.c
@@ -21,7 +21,7 @@
#include <linux/dmaengine.h>
#include <linux/dma-mapping.h>
#include <linux/err.h>
-#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
#include <linux/interrupt.h>
#include <linux/io.h>
#include <linux/irq.h>
@@ -126,9 +126,23 @@ enum jz4740_mmc_state {
JZ4740_MMC_STATE_DONE,
};
-struct jz4740_mmc_host_next {
- int sg_len;
- s32 cookie;
+/*
+ * The MMC core allows to prepare a mmc_request while another mmc_request
+ * is in-flight. This is used via the pre_req/post_req hooks.
+ * This driver uses the pre_req/post_req hooks to map/unmap the mmc_request.
+ * Following what other drivers do (sdhci, dw_mmc) we use the following cookie
+ * flags to keep track of the mmc_request mapping state.
+ *
+ * COOKIE_UNMAPPED: the request is not mapped.
+ * COOKIE_PREMAPPED: the request was mapped in pre_req,
+ * and should be unmapped in post_req.
+ * COOKIE_MAPPED: the request was mapped in the irq handler,
+ * and should be unmapped before mmc_request_done is called..
+ */
+enum jz4780_cookie {
+ COOKIE_UNMAPPED = 0,
+ COOKIE_PREMAPPED,
+ COOKIE_MAPPED,
};
struct jz4740_mmc_host {
@@ -136,6 +150,7 @@ struct jz4740_mmc_host {
struct platform_device *pdev;
struct jz4740_mmc_platform_data *pdata;
struct clk *clk;
+ struct gpio_desc *power;
enum jz4740_mmc_version version;
@@ -162,9 +177,7 @@ struct jz4740_mmc_host {
/* DMA support */
struct dma_chan *dma_rx;
struct dma_chan *dma_tx;
- struct jz4740_mmc_host_next next_data;
bool use_dma;
- int sg_len;
/* The DMA trigger level is 8 words, that is to say, the DMA read
* trigger is when data words in MSC_RXFIFO is >= 8 and the DMA write
@@ -226,9 +239,6 @@ static int jz4740_mmc_acquire_dma_channels(struct jz4740_mmc_host *host)
return PTR_ERR(host->dma_rx);
}
- /* Initialize DMA pre request cookie */
- host->next_data.cookie = 1;
-
return 0;
}
@@ -245,60 +255,44 @@ static void jz4740_mmc_dma_unmap(struct jz4740_mmc_host *host,
enum dma_data_direction dir = mmc_get_dma_dir(data);
dma_unmap_sg(chan->device->dev, data->sg, data->sg_len, dir);
+ data->host_cookie = COOKIE_UNMAPPED;
}
-/* Prepares DMA data for current/next transfer, returns non-zero on failure */
+/* Prepares DMA data for current or next transfer.
+ * A request can be in-flight when this is called.
+ */
static int jz4740_mmc_prepare_dma_data(struct jz4740_mmc_host *host,
struct mmc_data *data,
- struct jz4740_mmc_host_next *next,
- struct dma_chan *chan)
+ int cookie)
{
- struct jz4740_mmc_host_next *next_data = &host->next_data;
+ struct dma_chan *chan = jz4740_mmc_get_dma_chan(host, data);
enum dma_data_direction dir = mmc_get_dma_dir(data);
- int sg_len;
-
- if (!next && data->host_cookie &&
- data->host_cookie != host->next_data.cookie) {
- dev_warn(mmc_dev(host->mmc),
- "[%s] invalid cookie: data->host_cookie %d host->next_data.cookie %d\n",
- __func__,
- data->host_cookie,
- host->next_data.cookie);
- data->host_cookie = 0;
- }
+ int sg_count;
- /* Check if next job is already prepared */
- if (next || data->host_cookie != host->next_data.cookie) {
- sg_len = dma_map_sg(chan->device->dev,
- data->sg,
- data->sg_len,
- dir);
+ if (data->host_cookie == COOKIE_PREMAPPED)
+ return data->sg_count;
- } else {
- sg_len = next_data->sg_len;
- next_data->sg_len = 0;
- }
+ sg_count = dma_map_sg(chan->device->dev,
+ data->sg,
+ data->sg_len,
+ dir);
- if (sg_len <= 0) {
+ if (sg_count <= 0) {
dev_err(mmc_dev(host->mmc),
"Failed to map scatterlist for DMA operation\n");
return -EINVAL;
}
- if (next) {
- next->sg_len = sg_len;
- data->host_cookie = ++next->cookie < 0 ? 1 : next->cookie;
- } else
- host->sg_len = sg_len;
+ data->sg_count = sg_count;
+ data->host_cookie = cookie;
- return 0;
+ return data->sg_count;
}
static int jz4740_mmc_start_dma_transfer(struct jz4740_mmc_host *host,
struct mmc_data *data)
{
- int ret;
- struct dma_chan *chan;
+ struct dma_chan *chan = jz4740_mmc_get_dma_chan(host, data);
struct dma_async_tx_descriptor *desc;
struct dma_slave_config conf = {
.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES,
@@ -306,29 +300,26 @@ static int jz4740_mmc_start_dma_transfer(struct jz4740_mmc_host *host,
.src_maxburst = JZ4740_MMC_FIFO_HALF_SIZE,
.dst_maxburst = JZ4740_MMC_FIFO_HALF_SIZE,
};
+ int sg_count;
if (data->flags & MMC_DATA_WRITE) {
conf.direction = DMA_MEM_TO_DEV;
conf.dst_addr = host->mem_res->start + JZ_REG_MMC_TXFIFO;
conf.slave_id = JZ4740_DMA_TYPE_MMC_TRANSMIT;
- chan = host->dma_tx;
} else {
conf.direction = DMA_DEV_TO_MEM;
conf.src_addr = host->mem_res->start + JZ_REG_MMC_RXFIFO;
conf.slave_id = JZ4740_DMA_TYPE_MMC_RECEIVE;
- chan = host->dma_rx;
}
- ret = jz4740_mmc_prepare_dma_data(host, data, NULL, chan);
- if (ret)
- return ret;
+ sg_count = jz4740_mmc_prepare_dma_data(host, data, COOKIE_MAPPED);
+ if (sg_count < 0)
+ return sg_count;
dmaengine_slave_config(chan, &conf);
- desc = dmaengine_prep_slave_sg(chan,
- data->sg,
- host->sg_len,
- conf.direction,
- DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+ desc = dmaengine_prep_slave_sg(chan, data->sg, sg_count,
+ conf.direction,
+ DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
if (!desc) {
dev_err(mmc_dev(host->mmc),
"Failed to allocate DMA %s descriptor",
@@ -342,7 +333,8 @@ static int jz4740_mmc_start_dma_transfer(struct jz4740_mmc_host *host,
return 0;
dma_unmap:
- jz4740_mmc_dma_unmap(host, data);
+ if (data->host_cookie == COOKIE_MAPPED)
+ jz4740_mmc_dma_unmap(host, data);
return -ENOMEM;
}
@@ -351,16 +343,13 @@ static void jz4740_mmc_pre_request(struct mmc_host *mmc,
{
struct jz4740_mmc_host *host = mmc_priv(mmc);
struct mmc_data *data = mrq->data;
- struct jz4740_mmc_host_next *next_data = &host->next_data;
- BUG_ON(data->host_cookie);
-
- if (host->use_dma) {
- struct dma_chan *chan = jz4740_mmc_get_dma_chan(host, data);
+ if (!host->use_dma)
+ return;
- if (jz4740_mmc_prepare_dma_data(host, data, next_data, chan))
- data->host_cookie = 0;
- }
+ data->host_cookie = COOKIE_UNMAPPED;
+ if (jz4740_mmc_prepare_dma_data(host, data, COOKIE_PREMAPPED) < 0)
+ data->host_cookie = COOKIE_UNMAPPED;
}
static void jz4740_mmc_post_request(struct mmc_host *mmc,
@@ -370,10 +359,8 @@ static void jz4740_mmc_post_request(struct mmc_host *mmc,
struct jz4740_mmc_host *host = mmc_priv(mmc);
struct mmc_data *data = mrq->data;
- if (host->use_dma && data->host_cookie) {
+ if (data && data->host_cookie != COOKIE_UNMAPPED)
jz4740_mmc_dma_unmap(host, data);
- data->host_cookie = 0;
- }
if (err) {
struct dma_chan *chan = jz4740_mmc_get_dma_chan(host, data);
@@ -436,10 +423,14 @@ static void jz4740_mmc_reset(struct jz4740_mmc_host *host)
static void jz4740_mmc_request_done(struct jz4740_mmc_host *host)
{
struct mmc_request *req;
+ struct mmc_data *data;
req = host->req;
+ data = req->data;
host->req = NULL;
+ if (data && data->host_cookie == COOKIE_MAPPED)
+ jz4740_mmc_dma_unmap(host, data);
mmc_request_done(host->mmc, req);
}
@@ -903,18 +894,16 @@ static void jz4740_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
switch (ios->power_mode) {
case MMC_POWER_UP:
jz4740_mmc_reset(host);
- if (host->pdata && gpio_is_valid(host->pdata->gpio_power))
- gpio_set_value(host->pdata->gpio_power,
- !host->pdata->power_active_low);
+ if (host->power)
+ gpiod_set_value(host->power, 1);
host->cmdat |= JZ_MMC_CMDAT_INIT;
clk_prepare_enable(host->clk);
break;
case MMC_POWER_ON:
break;
default:
- if (host->pdata && gpio_is_valid(host->pdata->gpio_power))
- gpio_set_value(host->pdata->gpio_power,
- host->pdata->power_active_low);
+ if (host->power)
+ gpiod_set_value(host->power, 0);
clk_disable_unprepare(host->clk);
break;
}
@@ -947,30 +936,9 @@ static const struct mmc_host_ops jz4740_mmc_ops = {
.enable_sdio_irq = jz4740_mmc_enable_sdio_irq,
};
-static int jz4740_mmc_request_gpio(struct device *dev, int gpio,
- const char *name, bool output, int value)
-{
- int ret;
-
- if (!gpio_is_valid(gpio))
- return 0;
-
- ret = gpio_request(gpio, name);
- if (ret) {
- dev_err(dev, "Failed to request %s gpio: %d\n", name, ret);
- return ret;
- }
-
- if (output)
- gpio_direction_output(gpio, value);
- else
- gpio_direction_input(gpio);
-
- return 0;
-}
-
-static int jz4740_mmc_request_gpios(struct mmc_host *mmc,
- struct platform_device *pdev)
+static int jz4740_mmc_request_gpios(struct jz4740_mmc_host *host,
+ struct mmc_host *mmc,
+ struct platform_device *pdev)
{
struct jz4740_mmc_platform_data *pdata = dev_get_platdata(&pdev->dev);
int ret = 0;
@@ -983,31 +951,21 @@ static int jz4740_mmc_request_gpios(struct mmc_host *mmc,
if (!pdata->read_only_active_low)
mmc->caps2 |= MMC_CAP2_RO_ACTIVE_HIGH;
- if (gpio_is_valid(pdata->gpio_card_detect)) {
- ret = mmc_gpio_request_cd(mmc, pdata->gpio_card_detect, 0);
- if (ret)
- return ret;
- }
-
- if (gpio_is_valid(pdata->gpio_read_only)) {
- ret = mmc_gpio_request_ro(mmc, pdata->gpio_read_only);
- if (ret)
- return ret;
- }
-
- return jz4740_mmc_request_gpio(&pdev->dev, pdata->gpio_power,
- "MMC read only", true, pdata->power_active_low);
-}
-
-static void jz4740_mmc_free_gpios(struct platform_device *pdev)
-{
- struct jz4740_mmc_platform_data *pdata = dev_get_platdata(&pdev->dev);
+ /*
+ * Get optional card detect and write protect GPIOs,
+ * only back out on probe deferral.
+ */
+ ret = mmc_gpiod_request_cd(mmc, "cd", 0, false, 0, NULL);
+ if (ret == -EPROBE_DEFER)
+ return ret;
- if (!pdata)
- return;
+ ret = mmc_gpiod_request_ro(mmc, "wp", 0, false, 0, NULL);
+ if (ret == -EPROBE_DEFER)
+ return ret;
- if (gpio_is_valid(pdata->gpio_power))
- gpio_free(pdata->gpio_power);
+ host->power = devm_gpiod_get_optional(&pdev->dev, "power",
+ GPIOD_OUT_HIGH);
+ return PTR_ERR_OR_ZERO(host->power);
}
static const struct of_device_id jz4740_mmc_of_match[] = {
@@ -1053,7 +1011,7 @@ static int jz4740_mmc_probe(struct platform_device* pdev)
mmc->caps |= MMC_CAP_SDIO_IRQ;
if (!(pdata && pdata->data_1bit))
mmc->caps |= MMC_CAP_4_BIT_DATA;
- ret = jz4740_mmc_request_gpios(mmc, pdev);
+ ret = jz4740_mmc_request_gpios(host, mmc, pdev);
if (ret)
goto err_free_host;
}
@@ -1104,7 +1062,7 @@ static int jz4740_mmc_probe(struct platform_device* pdev)
dev_name(&pdev->dev), host);
if (ret) {
dev_err(&pdev->dev, "Failed to request irq: %d\n", ret);
- goto err_free_gpios;
+ goto err_free_host;
}
jz4740_mmc_clock_disable(host);
@@ -1135,8 +1093,6 @@ err_release_dma:
jz4740_mmc_release_dma_channels(host);
err_free_irq:
free_irq(host->irq, host);
-err_free_gpios:
- jz4740_mmc_free_gpios(pdev);
err_free_host:
mmc_free_host(mmc);
@@ -1155,8 +1111,6 @@ static int jz4740_mmc_remove(struct platform_device *pdev)
free_irq(host->irq, host);
- jz4740_mmc_free_gpios(pdev);
-
if (host->use_dma)
jz4740_mmc_release_dma_channels(host);
diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c
index c201c378537e..c2690c1a50ff 100644
--- a/drivers/mmc/host/meson-gx-mmc.c
+++ b/drivers/mmc/host/meson-gx-mmc.c
@@ -21,11 +21,11 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
+#include <linux/delay.h>
#include <linux/device.h>
#include <linux/of_device.h>
#include <linux/platform_device.h>
#include <linux/ioport.h>
-#include <linux/spinlock.h>
#include <linux/dma-mapping.h>
#include <linux/mmc/host.h>
#include <linux/mmc/mmc.h>
@@ -66,6 +66,9 @@
#define SD_EMMC_DELAY 0x4
#define SD_EMMC_ADJUST 0x8
+#define ADJUST_ADJ_DELAY_MASK GENMASK(21, 16)
+#define ADJUST_DS_EN BIT(15)
+#define ADJUST_ADJ_EN BIT(13)
#define SD_EMMC_DELAY1 0x4
#define SD_EMMC_DELAY2 0x8
@@ -90,9 +93,11 @@
#define CFG_CLK_ALWAYS_ON BIT(18)
#define CFG_CHK_DS BIT(20)
#define CFG_AUTO_CLK BIT(23)
+#define CFG_ERR_ABORT BIT(27)
#define SD_EMMC_STATUS 0x48
#define STATUS_BUSY BIT(31)
+#define STATUS_DESC_BUSY BIT(30)
#define STATUS_DATI GENMASK(23, 16)
#define SD_EMMC_IRQ_EN 0x4c
@@ -141,6 +146,7 @@ struct meson_mmc_data {
unsigned int tx_delay_mask;
unsigned int rx_delay_mask;
unsigned int always_on;
+ unsigned int adjust;
};
struct sd_emmc_desc {
@@ -156,7 +162,6 @@ struct meson_host {
struct mmc_host *mmc;
struct mmc_command *cmd;
- spinlock_t lock;
void __iomem *regs;
struct clk *core_clk;
struct clk *mmc_clk;
@@ -633,14 +638,8 @@ static int meson_mmc_clk_init(struct meson_host *host)
if (ret)
return ret;
- /*
- * Set phases : These values are mostly the datasheet recommended ones
- * except for the Tx phase. Datasheet recommends 180 but some cards
- * fail at initialisation with it. 270 works just fine, it fixes these
- * initialisation issues and enable eMMC DDR52 mode.
- */
clk_set_phase(host->mmc_clk, 180);
- clk_set_phase(host->tx_clk, 270);
+ clk_set_phase(host->tx_clk, 0);
clk_set_phase(host->rx_clk, 0);
return clk_prepare_enable(host->mmc_clk);
@@ -928,6 +927,7 @@ static void meson_mmc_start_cmd(struct mmc_host *mmc, struct mmc_command *cmd)
cmd_cfg |= FIELD_PREP(CMD_CFG_CMD_INDEX_MASK, cmd->opcode);
cmd_cfg |= CMD_CFG_OWNER; /* owned by CPU */
+ cmd_cfg |= CMD_CFG_ERROR; /* stop in case of error */
meson_mmc_set_response_bits(cmd, &cmd_cfg);
@@ -1022,29 +1022,34 @@ static irqreturn_t meson_mmc_irq(int irq, void *dev_id)
u32 irq_en, status, raw_status;
irqreturn_t ret = IRQ_NONE;
- if (WARN_ON(!host) || WARN_ON(!host->cmd))
+ irq_en = readl(host->regs + SD_EMMC_IRQ_EN);
+ raw_status = readl(host->regs + SD_EMMC_STATUS);
+ status = raw_status & irq_en;
+
+ if (!status) {
+ dev_dbg(host->dev,
+ "Unexpected IRQ! irq_en 0x%08x - status 0x%08x\n",
+ irq_en, raw_status);
return IRQ_NONE;
+ }
- spin_lock(&host->lock);
+ if (WARN_ON(!host) || WARN_ON(!host->cmd))
+ return IRQ_NONE;
cmd = host->cmd;
data = cmd->data;
- irq_en = readl(host->regs + SD_EMMC_IRQ_EN);
- raw_status = readl(host->regs + SD_EMMC_STATUS);
- status = raw_status & irq_en;
-
cmd->error = 0;
if (status & IRQ_CRC_ERR) {
dev_dbg(host->dev, "CRC Error - status 0x%08x\n", status);
cmd->error = -EILSEQ;
- ret = IRQ_HANDLED;
+ ret = IRQ_WAKE_THREAD;
goto out;
}
if (status & IRQ_TIMEOUTS) {
dev_dbg(host->dev, "Timeout - status 0x%08x\n", status);
cmd->error = -ETIMEDOUT;
- ret = IRQ_HANDLED;
+ ret = IRQ_WAKE_THREAD;
goto out;
}
@@ -1069,17 +1074,48 @@ out:
/* ack all enabled interrupts */
writel(irq_en, host->regs + SD_EMMC_STATUS);
+ if (cmd->error) {
+ /* Stop desc in case of errors */
+ u32 start = readl(host->regs + SD_EMMC_START);
+
+ start &= ~START_DESC_BUSY;
+ writel(start, host->regs + SD_EMMC_START);
+ }
+
if (ret == IRQ_HANDLED)
meson_mmc_request_done(host->mmc, cmd->mrq);
- else if (ret == IRQ_NONE)
- dev_warn(host->dev,
- "Unexpected IRQ! status=0x%08x, irq_en=0x%08x\n",
- raw_status, irq_en);
- spin_unlock(&host->lock);
return ret;
}
+static int meson_mmc_wait_desc_stop(struct meson_host *host)
+{
+ int loop;
+ u32 status;
+
+ /*
+ * It may sometimes take a while for it to actually halt. Here, we
+ * are giving it 5ms to comply
+ *
+ * If we don't confirm the descriptor is stopped, it might raise new
+ * IRQs after we have called mmc_request_done() which is bad.
+ */
+ for (loop = 50; loop; loop--) {
+ status = readl(host->regs + SD_EMMC_STATUS);
+ if (status & (STATUS_BUSY | STATUS_DESC_BUSY))
+ udelay(100);
+ else
+ break;
+ }
+
+ if (status & (STATUS_BUSY | STATUS_DESC_BUSY)) {
+ dev_err(host->dev, "Timed out waiting for host to stop\n");
+ return -ETIMEDOUT;
+ }
+
+ return 0;
+}
+
static irqreturn_t meson_mmc_irq_thread(int irq, void *dev_id)
{
struct meson_host *host = dev_id;
@@ -1090,6 +1126,13 @@ static irqreturn_t meson_mmc_irq_thread(int irq, void *dev_id)
if (WARN_ON(!cmd))
return IRQ_NONE;
+ if (cmd->error) {
+ meson_mmc_wait_desc_stop(host);
+ meson_mmc_request_done(host->mmc, cmd->mrq);
+
+ return IRQ_HANDLED;
+ }
+
data = cmd->data;
if (meson_mmc_bounce_buf_read(data)) {
xfer_bytes = data->blksz * data->blocks;
@@ -1123,14 +1166,21 @@ static int meson_mmc_get_cd(struct mmc_host *mmc)
static void meson_mmc_cfg_init(struct meson_host *host)
{
- u32 cfg = 0;
+ u32 cfg = 0, adj = 0;
cfg |= FIELD_PREP(CFG_RESP_TIMEOUT_MASK,
ilog2(SD_EMMC_CFG_RESP_TIMEOUT));
cfg |= FIELD_PREP(CFG_RC_CC_MASK, ilog2(SD_EMMC_CFG_CMD_GAP));
cfg |= FIELD_PREP(CFG_BLK_LEN_MASK, ilog2(SD_EMMC_CFG_BLK_SIZE));
+ /* abort chain on R/W errors */
+ cfg |= CFG_ERR_ABORT;
+
writel(cfg, host->regs + SD_EMMC_CFG);
+
+ /* enable signal resampling w/o delay */
+ adj = ADJUST_ADJ_EN;
+ writel(adj, host->regs + host->data->adjust);
}
static int meson_mmc_card_busy(struct mmc_host *mmc)
@@ -1191,8 +1241,6 @@ static int meson_mmc_probe(struct platform_device *pdev)
host->dev = &pdev->dev;
dev_set_drvdata(&pdev->dev, host);
- spin_lock_init(&host->lock);
-
/* Get regulators and the supported OCR mask */
host->vqmmc_enabled = false;
ret = mmc_regulator_get_supply(mmc);
@@ -1356,12 +1404,14 @@ static const struct meson_mmc_data meson_gx_data = {
.tx_delay_mask = CLK_V2_TX_DELAY_MASK,
.rx_delay_mask = CLK_V2_RX_DELAY_MASK,
.always_on = CLK_V2_ALWAYS_ON,
+ .adjust = SD_EMMC_ADJUST,
};
static const struct meson_mmc_data meson_axg_data = {
.tx_delay_mask = CLK_V3_TX_DELAY_MASK,
.rx_delay_mask = CLK_V3_RX_DELAY_MASK,
.always_on = CLK_V3_ALWAYS_ON,
+ .adjust = SD_EMMC_V3_ADJUST,
};
static const struct of_device_id meson_mmc_of_match[] = {
diff --git a/drivers/mmc/host/meson-mx-sdio.c b/drivers/mmc/host/meson-mx-sdio.c
index abe253c262a2..ec980bda071c 100644
--- a/drivers/mmc/host/meson-mx-sdio.c
+++ b/drivers/mmc/host/meson-mx-sdio.c
@@ -596,6 +596,9 @@ static int meson_mx_mmc_register_clks(struct meson_mx_mmc_host *host)
init.name = devm_kasprintf(host->controller_dev, GFP_KERNEL,
"%s#fixed_factor",
dev_name(host->controller_dev));
+ if (!init.name)
+ return -ENOMEM;
+
init.ops = &clk_fixed_factor_ops;
init.flags = 0;
init.parent_names = &clk_fixed_factor_parent;
@@ -612,6 +615,9 @@ static int meson_mx_mmc_register_clks(struct meson_mx_mmc_host *host)
clk_div_parent = __clk_get_name(host->fixed_factor_clk);
init.name = devm_kasprintf(host->controller_dev, GFP_KERNEL,
"%s#div", dev_name(host->controller_dev));
+ if (!init.name)
+ return -ENOMEM;
+
init.ops = &clk_divider_ops;
init.flags = CLK_SET_RATE_PARENT;
init.parent_names = &clk_div_parent;
diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c
index 476e53d30128..10ba46b728e8 100644
--- a/drivers/mmc/host/mmc_spi.c
+++ b/drivers/mmc/host/mmc_spi.c
@@ -1434,13 +1434,16 @@ static int mmc_spi_probe(struct spi_device *spi)
if (status != 0)
goto fail_add_host;
- if (host->pdata && host->pdata->flags & MMC_SPI_USE_CD_GPIO) {
- status = mmc_gpio_request_cd(mmc, host->pdata->cd_gpio,
- host->pdata->cd_debounce);
- if (status != 0)
- goto fail_add_host;
-
- /* The platform has a CD GPIO signal that may support
+ /*
+ * Index 0 is card detect
+ * Old boardfiles were specifying 1 ms as debounce
+ */
+ status = mmc_gpiod_request_cd(mmc, NULL, 0, false, 1, NULL);
+ if (status == -EPROBE_DEFER)
+ goto fail_add_host;
+ if (!status) {
+ /*
+ * The platform has a CD GPIO signal that may support
* interrupts, so let mmc_gpiod_request_cd_irq() decide
* if polling is needed or not.
*/
@@ -1448,12 +1451,12 @@ static int mmc_spi_probe(struct spi_device *spi)
mmc_gpiod_request_cd_irq(mmc);
}
- if (host->pdata && host->pdata->flags & MMC_SPI_USE_RO_GPIO) {
+ /* Index 1 is write protect/read only */
+ status = mmc_gpiod_request_ro(mmc, NULL, 1, false, 0, NULL);
+ if (status == -EPROBE_DEFER)
+ goto fail_add_host;
+ if (!status)
has_ro = true;
- status = mmc_gpio_request_ro(mmc, host->pdata->ro_gpio);
- if (status != 0)
- goto fail_add_host;
- }
dev_info(&spi->dev, "SD/MMC host %s%s%s%s%s\n",
dev_name(&mmc->class_dev),
diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index 82bab35fff41..e352f5ad5801 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -21,6 +21,7 @@
#include <linux/err.h>
#include <linux/highmem.h>
#include <linux/log2.h>
+#include <linux/mmc/mmc.h>
#include <linux/mmc/pm.h>
#include <linux/mmc/host.h>
#include <linux/mmc/card.h>
@@ -274,6 +275,7 @@ static struct variant_data variant_stm32_sdmmc = {
.cmdreg_lrsp_crc = MCI_CPSM_STM32_LRSP_CRC,
.cmdreg_srsp_crc = MCI_CPSM_STM32_SRSP_CRC,
.cmdreg_srsp = MCI_CPSM_STM32_SRSP,
+ .cmdreg_stop = MCI_CPSM_STM32_CMDSTOP,
.data_cmd_enable = MCI_CPSM_STM32_CMDTRANS,
.irq_pio_mask = MCI_IRQ_PIO_STM32_MASK,
.datactrl_first = true,
@@ -1100,6 +1102,10 @@ mmci_start_command(struct mmci_host *host, struct mmc_command *cmd, u32 c)
mmci_reg_delay(host);
}
+ if (host->variant->cmdreg_stop &&
+ cmd->opcode == MMC_STOP_TRANSMISSION)
+ c |= host->variant->cmdreg_stop;
+
c |= cmd->opcode | host->variant->cmdreg_cpsm_enable;
if (cmd->flags & MMC_RSP_PRESENT) {
if (cmd->flags & MMC_RSP_136)
@@ -1190,11 +1196,10 @@ mmci_data_irq(struct mmci_host *host, struct mmc_data *data,
/* The error clause is handled above, success! */
data->bytes_xfered = data->blksz * data->blocks;
- if (!data->stop || host->mrq->sbc) {
+ if (!data->stop || (host->mrq->sbc && !data->error))
mmci_request_end(host, data->mrq);
- } else {
+ else
mmci_start_command(host, data->stop, 0);
- }
}
}
diff --git a/drivers/mmc/host/mmci.h b/drivers/mmc/host/mmci.h
index 550dd3914461..24229097d05c 100644
--- a/drivers/mmc/host/mmci.h
+++ b/drivers/mmc/host/mmci.h
@@ -264,6 +264,7 @@ struct mmci_host;
* @cmdreg_lrsp_crc: enable value for long response with crc
* @cmdreg_srsp_crc: enable value for short response with crc
* @cmdreg_srsp: enable value for short response without crc
+ * @cmdreg_stop: enable value for stop and abort transmission
* @datalength_bits: number of bits in the MMCIDATALENGTH register
* @fifosize: number of bytes that can be written when MMCI_TXFIFOEMPTY
* is asserted (likewise for RX)
@@ -316,6 +317,7 @@ struct variant_data {
unsigned int cmdreg_lrsp_crc;
unsigned int cmdreg_srsp_crc;
unsigned int cmdreg_srsp;
+ unsigned int cmdreg_stop;
unsigned int datalength_bits;
unsigned int fifosize;
unsigned int fifohalfsize;
diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c
index 6334cc752d8b..8afeaf81ae66 100644
--- a/drivers/mmc/host/mtk-sd.c
+++ b/drivers/mmc/host/mtk-sd.c
@@ -1114,6 +1114,7 @@ static void msdc_start_command(struct msdc_host *host,
struct mmc_request *mrq, struct mmc_command *cmd)
{
u32 rawcmd;
+ unsigned long flags;
WARN_ON(host->cmd);
host->cmd = cmd;
@@ -1131,7 +1132,10 @@ static void msdc_start_command(struct msdc_host *host,
cmd->error = 0;
rawcmd = msdc_cmd_prepare_raw_cmd(host, mrq, cmd);
+ spin_lock_irqsave(&host->lock, flags);
sdr_set_bits(host->base + MSDC_INTEN, cmd_ints_mask);
+ spin_unlock_irqrestore(&host->lock, flags);
+
writel(cmd->arg, host->base + SDC_ARG);
writel(rawcmd, host->base + SDC_CMD);
}
@@ -1351,6 +1355,31 @@ static void msdc_request_timeout(struct work_struct *work)
}
}
+static void __msdc_enable_sdio_irq(struct mmc_host *mmc, int enb)
+{
+ unsigned long flags;
+ struct msdc_host *host = mmc_priv(mmc);
+
+ spin_lock_irqsave(&host->lock, flags);
+ if (enb)
+ sdr_set_bits(host->base + MSDC_INTEN, MSDC_INTEN_SDIOIRQ);
+ else
+ sdr_clr_bits(host->base + MSDC_INTEN, MSDC_INTEN_SDIOIRQ);
+ spin_unlock_irqrestore(&host->lock, flags);
+}
+
+static void msdc_enable_sdio_irq(struct mmc_host *mmc, int enb)
+{
+ struct msdc_host *host = mmc_priv(mmc);
+
+ __msdc_enable_sdio_irq(mmc, enb);
+
+ if (enb)
+ pm_runtime_get_noresume(host->dev);
+ else
+ pm_runtime_put_noidle(host->dev);
+}
+
static irqreturn_t msdc_irq(int irq, void *dev_id)
{
struct msdc_host *host = (struct msdc_host *) dev_id;
@@ -1373,7 +1402,12 @@ static irqreturn_t msdc_irq(int irq, void *dev_id)
data = host->data;
spin_unlock_irqrestore(&host->lock, flags);
- if (!(events & event_mask))
+ if ((events & event_mask) & MSDC_INT_SDIOIRQ) {
+ __msdc_enable_sdio_irq(host->mmc, 0);
+ sdio_signal_irq(host->mmc);
+ }
+
+ if (!(events & (event_mask & ~MSDC_INT_SDIOIRQ)))
break;
if (!mrq) {
@@ -1493,8 +1527,11 @@ static void msdc_init_hw(struct msdc_host *host)
*/
sdr_set_bits(host->base + SDC_CFG, SDC_CFG_SDIO);
- /* disable detect SDIO device interrupt function */
- sdr_clr_bits(host->base + SDC_CFG, SDC_CFG_SDIOIDE);
+ /* Config SDIO device detect interrupt function */
+ if (host->mmc->caps & MMC_CAP_SDIO_IRQ)
+ sdr_set_bits(host->base + SDC_CFG, SDC_CFG_SDIOIDE);
+ else
+ sdr_clr_bits(host->base + SDC_CFG, SDC_CFG_SDIOIDE);
/* Configure to default data timeout */
sdr_set_field(host->base + SDC_CFG, SDC_CFG_DTOC, 3);
@@ -2013,6 +2050,11 @@ static void msdc_hw_reset(struct mmc_host *mmc)
sdr_clr_bits(host->base + EMMC_IOCON, 1);
}
+static void msdc_ack_sdio_irq(struct mmc_host *mmc)
+{
+ __msdc_enable_sdio_irq(mmc, 1);
+}
+
static const struct mmc_host_ops mt_msdc_ops = {
.post_req = msdc_post_req,
.pre_req = msdc_pre_req,
@@ -2020,6 +2062,8 @@ static const struct mmc_host_ops mt_msdc_ops = {
.set_ios = msdc_ops_set_ios,
.get_ro = mmc_gpio_get_ro,
.get_cd = mmc_gpio_get_cd,
+ .enable_sdio_irq = msdc_enable_sdio_irq,
+ .ack_sdio_irq = msdc_ack_sdio_irq,
.start_signal_voltage_switch = msdc_ops_switch_volt,
.card_busy = msdc_card_busy,
.execute_tuning = msdc_execute_tuning,
@@ -2147,6 +2191,9 @@ static int msdc_drv_probe(struct platform_device *pdev)
else
mmc->f_min = DIV_ROUND_UP(host->src_clk_freq, 4 * 4095);
+ if (mmc->caps & MMC_CAP_SDIO_IRQ)
+ mmc->caps2 |= MMC_CAP2_SDIO_IRQ_NOTHREAD;
+
mmc->caps |= MMC_CAP_ERASE | MMC_CAP_CMD23;
/* MMC core transfer sizes tunable parameters */
mmc->max_segs = MAX_BD_NUM;
diff --git a/drivers/mmc/host/of_mmc_spi.c b/drivers/mmc/host/of_mmc_spi.c
index c9eed8436b6b..b294b221f225 100644
--- a/drivers/mmc/host/of_mmc_spi.c
+++ b/drivers/mmc/host/of_mmc_spi.c
@@ -16,9 +16,7 @@
#include <linux/device.h>
#include <linux/slab.h>
#include <linux/irq.h>
-#include <linux/gpio.h>
#include <linux/of.h>
-#include <linux/of_gpio.h>
#include <linux/of_irq.h>
#include <linux/spi/spi.h>
#include <linux/spi/mmc_spi.h>
@@ -32,15 +30,7 @@
MODULE_LICENSE("GPL");
-enum {
- CD_GPIO = 0,
- WP_GPIO,
- NUM_GPIOS,
-};
-
struct of_mmc_spi {
- int gpios[NUM_GPIOS];
- bool alow_gpios[NUM_GPIOS];
int detect_irq;
struct mmc_spi_platform_data pdata;
};
@@ -102,30 +92,6 @@ struct mmc_spi_platform_data *mmc_spi_get_pdata(struct spi_device *spi)
oms->pdata.ocr_mask |= mask;
}
- for (i = 0; i < ARRAY_SIZE(oms->gpios); i++) {
- enum of_gpio_flags gpio_flags;
-
- oms->gpios[i] = of_get_gpio_flags(np, i, &gpio_flags);
- if (!gpio_is_valid(oms->gpios[i]))
- continue;
-
- if (gpio_flags & OF_GPIO_ACTIVE_LOW)
- oms->alow_gpios[i] = true;
- }
-
- if (gpio_is_valid(oms->gpios[CD_GPIO])) {
- oms->pdata.cd_gpio = oms->gpios[CD_GPIO];
- oms->pdata.flags |= MMC_SPI_USE_CD_GPIO;
- if (!oms->alow_gpios[CD_GPIO])
- oms->pdata.caps2 |= MMC_CAP2_CD_ACTIVE_HIGH;
- }
- if (gpio_is_valid(oms->gpios[WP_GPIO])) {
- oms->pdata.ro_gpio = oms->gpios[WP_GPIO];
- oms->pdata.flags |= MMC_SPI_USE_RO_GPIO;
- if (!oms->alow_gpios[WP_GPIO])
- oms->pdata.caps2 |= MMC_CAP2_RO_ACTIVE_HIGH;
- }
-
oms->detect_irq = irq_of_parse_and_map(np, 0);
if (oms->detect_irq != 0) {
oms->pdata.init = of_mmc_spi_init;
diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 3f4ea8f624be..29a1ddaa7466 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -1652,7 +1652,7 @@ static struct mmc_host_ops omap_hsmmc_ops = {
#ifdef CONFIG_DEBUG_FS
-static int omap_hsmmc_regs_show(struct seq_file *s, void *data)
+static int mmc_regs_show(struct seq_file *s, void *data)
{
struct mmc_host *mmc = s->private;
struct omap_hsmmc_host *host = mmc_priv(mmc);
@@ -1691,17 +1691,7 @@ static int omap_hsmmc_regs_show(struct seq_file *s, void *data)
return 0;
}
-static int omap_hsmmc_regs_open(struct inode *inode, struct file *file)
-{
- return single_open(file, omap_hsmmc_regs_show, inode->i_private);
-}
-
-static const struct file_operations mmc_regs_fops = {
- .open = omap_hsmmc_regs_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(mmc_regs);
static void omap_hsmmc_debugfs(struct mmc_host *mmc)
{
diff --git a/drivers/mmc/host/pxamci.c b/drivers/mmc/host/pxamci.c
index f7ffbf1676b1..8779bbaa6b69 100644
--- a/drivers/mmc/host/pxamci.c
+++ b/drivers/mmc/host/pxamci.c
@@ -30,10 +30,9 @@
#include <linux/mmc/slot-gpio.h>
#include <linux/io.h>
#include <linux/regulator/consumer.h>
-#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
#include <linux/gfp.h>
#include <linux/of.h>
-#include <linux/of_gpio.h>
#include <linux/of_device.h>
#include <asm/sizes.h>
@@ -63,6 +62,8 @@ struct pxamci_host {
unsigned int imask;
unsigned int power_mode;
unsigned long detect_delay_ms;
+ bool use_ro_gpio;
+ struct gpio_desc *power;
struct pxamci_platform_data *pdata;
struct mmc_request *mrq;
@@ -101,16 +102,13 @@ static inline int pxamci_set_power(struct pxamci_host *host,
{
struct mmc_host *mmc = host->mmc;
struct regulator *supply = mmc->supply.vmmc;
- int on;
if (!IS_ERR(supply))
return mmc_regulator_set_ocr(mmc, supply, vdd);
- if (host->pdata &&
- gpio_is_valid(host->pdata->gpio_power)) {
- on = ((1 << vdd) & host->pdata->ocr_mask);
- gpio_set_value(host->pdata->gpio_power,
- !!on ^ host->pdata->gpio_power_invert);
+ if (host->power) {
+ bool on = !!((1 << vdd) & host->pdata->ocr_mask);
+ gpiod_set_value(host->power, on);
}
if (host->pdata && host->pdata->setpower)
@@ -432,7 +430,7 @@ static int pxamci_get_ro(struct mmc_host *mmc)
{
struct pxamci_host *host = mmc_priv(mmc);
- if (host->pdata && gpio_is_valid(host->pdata->gpio_card_ro))
+ if (host->use_ro_gpio)
return mmc_gpio_get_ro(mmc);
if (host->pdata && host->pdata->get_ro)
return !!host->pdata->get_ro(mmc_dev(mmc));
@@ -730,52 +728,38 @@ static int pxamci_probe(struct platform_device *pdev)
}
if (host->pdata) {
- int gpio_cd = host->pdata->gpio_card_detect;
- int gpio_ro = host->pdata->gpio_card_ro;
- int gpio_power = host->pdata->gpio_power;
-
host->detect_delay_ms = host->pdata->detect_delay_ms;
- if (gpio_is_valid(gpio_power)) {
- ret = devm_gpio_request(dev, gpio_power,
- "mmc card power");
- if (ret) {
- dev_err(dev,
- "Failed requesting gpio_power %d\n",
- gpio_power);
- goto out;
- }
- gpio_direction_output(gpio_power,
- host->pdata->gpio_power_invert);
+ host->power = devm_gpiod_get_optional(dev, "power", GPIOD_OUT_LOW);
+ if (IS_ERR(host->power)) {
+ dev_err(dev, "Failed requesting gpio_power\n");
+ goto out;
}
- if (gpio_is_valid(gpio_ro)) {
- ret = mmc_gpio_request_ro(mmc, gpio_ro);
- if (ret) {
- dev_err(dev,
- "Failed requesting gpio_ro %d\n",
- gpio_ro);
- goto out;
- } else {
- mmc->caps2 |= host->pdata->gpio_card_ro_invert ?
- 0 : MMC_CAP2_RO_ACTIVE_HIGH;
- }
+ /* FIXME: should we pass detection delay to debounce? */
+ ret = mmc_gpiod_request_cd(mmc, "cd", 0, false, 0, NULL);
+ if (ret && ret != -ENOENT) {
+ dev_err(dev, "Failed requesting gpio_cd\n");
+ goto out;
}
- if (gpio_is_valid(gpio_cd))
- ret = mmc_gpio_request_cd(mmc, gpio_cd, 0);
- if (ret) {
- dev_err(dev, "Failed requesting gpio_cd %d\n",
- gpio_cd);
+ ret = mmc_gpiod_request_ro(mmc, "wp", 0, false, 0, NULL);
+ if (ret && ret != -ENOENT) {
+ dev_err(dev, "Failed requesting gpio_ro\n");
goto out;
}
+ if (!ret) {
+ host->use_ro_gpio = true;
+ mmc->caps2 |= host->pdata->gpio_card_ro_invert ?
+ 0 : MMC_CAP2_RO_ACTIVE_HIGH;
+ }
if (host->pdata->init)
host->pdata->init(dev, pxamci_detect_irq, mmc);
- if (gpio_is_valid(gpio_power) && host->pdata->setpower)
+ if (host->power && host->pdata->setpower)
dev_warn(dev, "gpio_power and setpower() both defined\n");
- if (gpio_is_valid(gpio_ro) && host->pdata->get_ro)
+ if (host->use_ro_gpio && host->pdata->get_ro)
dev_warn(dev, "gpio_ro and get_ro() both defined\n");
}
diff --git a/drivers/mmc/host/renesas_sdhi_core.c b/drivers/mmc/host/renesas_sdhi_core.c
index d3ac43c3d0b6..31a351a20dc0 100644
--- a/drivers/mmc/host/renesas_sdhi_core.c
+++ b/drivers/mmc/host/renesas_sdhi_core.c
@@ -32,6 +32,7 @@
#include <linux/pinctrl/consumer.h>
#include <linux/pinctrl/pinctrl-state.h>
#include <linux/regulator/consumer.h>
+#include <linux/sys_soc.h>
#include "renesas_sdhi.h"
#include "tmio_mmc.h"
@@ -45,6 +46,11 @@
#define SDHI_VER_GEN3_SD 0xcc10
#define SDHI_VER_GEN3_SDMMC 0xcd10
+struct renesas_sdhi_quirks {
+ bool hs400_disabled;
+ bool hs400_4taps;
+};
+
static void renesas_sdhi_sdbuf_width(struct tmio_mmc_host *host, int width)
{
u32 val;
@@ -163,15 +169,6 @@ static void renesas_sdhi_set_clock(struct tmio_mmc_host *host,
if (new_clock == 0)
goto out;
- /*
- * Both HS400 and HS200/SD104 set 200MHz, but some devices need to
- * set 400MHz to distinguish the CPG settings in HS400.
- */
- if (host->mmc->ios.timing == MMC_TIMING_MMC_HS400 &&
- host->pdata->flags & TMIO_MMC_HAVE_4TAP_HS400 &&
- new_clock == 200000000)
- new_clock = 400000000;
-
clock = renesas_sdhi_clk_update(host, new_clock) / 512;
for (clk = 0x80000080; new_clock >= (clock << 1); clk >>= 1)
@@ -532,6 +529,10 @@ static void renesas_sdhi_hw_reset(struct tmio_mmc_host *host)
sd_scc_write32(host, priv, SH_MOBILE_SDHI_SCC_RVSCNTL,
~SH_MOBILE_SDHI_SCC_RVSCNTL_RVSEN &
sd_scc_read32(host, priv, SH_MOBILE_SDHI_SCC_RVSCNTL));
+
+ if (host->pdata->flags & TMIO_MMC_MIN_RCAR2)
+ sd_ctrl_write32_as_16_and_16(host, CTL_IRQ_MASK,
+ TMIO_MASK_INIT_RCAR2);
}
static int renesas_sdhi_wait_idle(struct tmio_mmc_host *host, u32 bit)
@@ -602,11 +603,31 @@ static void renesas_sdhi_enable_dma(struct tmio_mmc_host *host, bool enable)
renesas_sdhi_sdbuf_width(host, enable ? width : 16);
}
+static const struct renesas_sdhi_quirks sdhi_quirks_h3_m3w_es1 = {
+ .hs400_disabled = true,
+ .hs400_4taps = true,
+};
+
+static const struct renesas_sdhi_quirks sdhi_quirks_h3_es2 = {
+ .hs400_disabled = false,
+ .hs400_4taps = true,
+};
+
+static const struct soc_device_attribute sdhi_quirks_match[] = {
+ { .soc_id = "r8a7795", .revision = "ES1.*", .data = &sdhi_quirks_h3_m3w_es1 },
+ { .soc_id = "r8a7795", .revision = "ES2.0", .data = &sdhi_quirks_h3_es2 },
+ { .soc_id = "r8a7796", .revision = "ES1.0", .data = &sdhi_quirks_h3_m3w_es1 },
+ { .soc_id = "r8a7796", .revision = "ES1.1", .data = &sdhi_quirks_h3_m3w_es1 },
+ { /* Sentinel. */ },
+};
+
int renesas_sdhi_probe(struct platform_device *pdev,
const struct tmio_mmc_dma_ops *dma_ops)
{
struct tmio_mmc_data *mmd = pdev->dev.platform_data;
+ const struct renesas_sdhi_quirks *quirks = NULL;
const struct renesas_sdhi_of_data *of_data;
+ const struct soc_device_attribute *attr;
struct tmio_mmc_data *mmc_data;
struct tmio_mmc_dma *dma_priv;
struct tmio_mmc_host *host;
@@ -616,6 +637,10 @@ int renesas_sdhi_probe(struct platform_device *pdev,
of_data = of_device_get_match_data(&pdev->dev);
+ attr = soc_device_match(sdhi_quirks_match);
+ if (attr)
+ quirks = attr->data;
+
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
if (!res)
return -EINVAL;
@@ -681,6 +706,12 @@ int renesas_sdhi_probe(struct platform_device *pdev,
host->multi_io_quirk = renesas_sdhi_multi_io_quirk;
host->dma_ops = dma_ops;
+ if (quirks && quirks->hs400_disabled)
+ host->mmc->caps2 &= ~(MMC_CAP2_HS400 | MMC_CAP2_HS400_ES);
+
+ if (quirks && quirks->hs400_4taps)
+ mmc_data->flags |= TMIO_MMC_HAVE_4TAP_HS400;
+
/* For some SoC, we disable internal WP. GPIO may override this */
if (mmc_can_gpio_ro(host->mmc))
mmc_data->capabilities2 &= ~MMC_CAP2_NO_WRITE_PROTECT;
@@ -691,6 +722,7 @@ int renesas_sdhi_probe(struct platform_device *pdev,
host->ops.card_busy = renesas_sdhi_card_busy;
host->ops.start_signal_voltage_switch =
renesas_sdhi_start_signal_voltage_switch;
+ host->sdcard_irq_setbit_mask = TMIO_STAT_ALWAYS_SET_27;
}
/* Orginally registers were 16 bit apart, could be 32 or 64 nowadays */
diff --git a/drivers/mmc/host/renesas_sdhi_internal_dmac.c b/drivers/mmc/host/renesas_sdhi_internal_dmac.c
index b6f54102bfdd..92c9b15252da 100644
--- a/drivers/mmc/host/renesas_sdhi_internal_dmac.c
+++ b/drivers/mmc/host/renesas_sdhi_internal_dmac.c
@@ -34,7 +34,7 @@
#define DTRAN_MODE_CH_NUM_CH0 0 /* "downstream" = for write commands */
#define DTRAN_MODE_CH_NUM_CH1 BIT(16) /* "upstream" = for read commands */
#define DTRAN_MODE_BUS_WIDTH (BIT(5) | BIT(4))
-#define DTRAN_MODE_ADDR_MODE BIT(0) /* 1 = Increment address */
+#define DTRAN_MODE_ADDR_MODE BIT(0) /* 1 = Increment address, 0 = Fixed */
/* DM_CM_DTRAN_CTRL */
#define DTRAN_CTRL_DM_START BIT(0)
@@ -73,6 +73,9 @@ static unsigned long global_flags;
#define SDHI_INTERNAL_DMAC_ONE_RX_ONLY 0
#define SDHI_INTERNAL_DMAC_RX_IN_USE 1
+/* RZ/A2 does not have the ADRR_MODE bit */
+#define SDHI_INTERNAL_DMAC_ADDR_MODE_FIXED_ONLY 2
+
/* Definitions for sampling clocks */
static struct renesas_sdhi_scc rcar_gen3_scc_taps[] = {
{
@@ -81,15 +84,14 @@ static struct renesas_sdhi_scc rcar_gen3_scc_taps[] = {
},
};
-static const struct renesas_sdhi_of_data of_rcar_r8a7795_compatible = {
+static const struct renesas_sdhi_of_data of_rza2_compatible = {
.tmio_flags = TMIO_MMC_HAS_IDLE_WAIT | TMIO_MMC_CLK_ACTUAL |
- TMIO_MMC_HAVE_CBSY | TMIO_MMC_MIN_RCAR2 |
- TMIO_MMC_HAVE_4TAP_HS400,
+ TMIO_MMC_HAVE_CBSY,
+ .tmio_ocr_mask = MMC_VDD_32_33,
.capabilities = MMC_CAP_SD_HIGHSPEED | MMC_CAP_SDIO_IRQ |
MMC_CAP_CMD23,
- .capabilities2 = MMC_CAP2_NO_WRITE_PROTECT,
.bus_shift = 2,
- .scc_offset = 0x1000,
+ .scc_offset = 0 - 0x1000,
.taps = rcar_gen3_scc_taps,
.taps_num = ARRAY_SIZE(rcar_gen3_scc_taps),
/* DMAC can handle 0xffffffff blk count but only 1 segment */
@@ -113,9 +115,10 @@ static const struct renesas_sdhi_of_data of_rcar_gen3_compatible = {
};
static const struct of_device_id renesas_sdhi_internal_dmac_of_match[] = {
+ { .compatible = "renesas,sdhi-r7s9210", .data = &of_rza2_compatible, },
{ .compatible = "renesas,sdhi-mmc-r8a77470", .data = &of_rcar_gen3_compatible, },
- { .compatible = "renesas,sdhi-r8a7795", .data = &of_rcar_r8a7795_compatible, },
- { .compatible = "renesas,sdhi-r8a7796", .data = &of_rcar_r8a7795_compatible, },
+ { .compatible = "renesas,sdhi-r8a7795", .data = &of_rcar_gen3_compatible, },
+ { .compatible = "renesas,sdhi-r8a7796", .data = &of_rcar_gen3_compatible, },
{ .compatible = "renesas,rcar-gen3-sdhi", .data = &of_rcar_gen3_compatible, },
{},
};
@@ -172,7 +175,10 @@ renesas_sdhi_internal_dmac_start_dma(struct tmio_mmc_host *host,
struct mmc_data *data)
{
struct scatterlist *sg = host->sg_ptr;
- u32 dtran_mode = DTRAN_MODE_BUS_WIDTH | DTRAN_MODE_ADDR_MODE;
+ u32 dtran_mode = DTRAN_MODE_BUS_WIDTH;
+
+ if (!test_bit(SDHI_INTERNAL_DMAC_ADDR_MODE_FIXED_ONLY, &global_flags))
+ dtran_mode |= DTRAN_MODE_ADDR_MODE;
if (!dma_map_sg(&host->pdev->dev, sg, host->sg_len,
mmc_get_dma_dir(data)))
@@ -292,18 +298,22 @@ static const struct tmio_mmc_dma_ops renesas_sdhi_internal_dmac_dma_ops = {
*/
static const struct soc_device_attribute soc_whitelist[] = {
/* specific ones */
+ { .soc_id = "r7s9210",
+ .data = (void *)BIT(SDHI_INTERNAL_DMAC_ADDR_MODE_FIXED_ONLY) },
{ .soc_id = "r8a7795", .revision = "ES1.*",
.data = (void *)BIT(SDHI_INTERNAL_DMAC_ONE_RX_ONLY) },
{ .soc_id = "r8a7796", .revision = "ES1.0",
.data = (void *)BIT(SDHI_INTERNAL_DMAC_ONE_RX_ONLY) },
/* generic ones */
{ .soc_id = "r8a774a1" },
+ { .soc_id = "r8a774c0" },
{ .soc_id = "r8a77470" },
{ .soc_id = "r8a7795" },
{ .soc_id = "r8a7796" },
{ .soc_id = "r8a77965" },
{ .soc_id = "r8a77970" },
{ .soc_id = "r8a77980" },
+ { .soc_id = "r8a77990" },
{ .soc_id = "r8a77995" },
{ /* sentinel */ }
};
diff --git a/drivers/mmc/host/renesas_sdhi_sys_dmac.c b/drivers/mmc/host/renesas_sdhi_sys_dmac.c
index 1a4016f635d3..8471160316e0 100644
--- a/drivers/mmc/host/renesas_sdhi_sys_dmac.c
+++ b/drivers/mmc/host/renesas_sdhi_sys_dmac.c
@@ -75,19 +75,6 @@ static struct renesas_sdhi_scc rcar_gen3_scc_taps[] = {
},
};
-static const struct renesas_sdhi_of_data of_rcar_r8a7795_compatible = {
- .tmio_flags = TMIO_MMC_HAS_IDLE_WAIT | TMIO_MMC_CLK_ACTUAL |
- TMIO_MMC_HAVE_CBSY | TMIO_MMC_MIN_RCAR2 |
- TMIO_MMC_HAVE_4TAP_HS400,
- .capabilities = MMC_CAP_SD_HIGHSPEED | MMC_CAP_SDIO_IRQ |
- MMC_CAP_CMD23,
- .capabilities2 = MMC_CAP2_NO_WRITE_PROTECT,
- .bus_shift = 2,
- .scc_offset = 0x1000,
- .taps = rcar_gen3_scc_taps,
- .taps_num = ARRAY_SIZE(rcar_gen3_scc_taps),
-};
-
static const struct renesas_sdhi_of_data of_rcar_gen3_compatible = {
.tmio_flags = TMIO_MMC_HAS_IDLE_WAIT | TMIO_MMC_CLK_ACTUAL |
TMIO_MMC_HAVE_CBSY | TMIO_MMC_MIN_RCAR2,
@@ -114,8 +101,8 @@ static const struct of_device_id renesas_sdhi_sys_dmac_of_match[] = {
{ .compatible = "renesas,sdhi-r8a7792", .data = &of_rcar_gen2_compatible, },
{ .compatible = "renesas,sdhi-r8a7793", .data = &of_rcar_gen2_compatible, },
{ .compatible = "renesas,sdhi-r8a7794", .data = &of_rcar_gen2_compatible, },
- { .compatible = "renesas,sdhi-r8a7795", .data = &of_rcar_r8a7795_compatible, },
- { .compatible = "renesas,sdhi-r8a7796", .data = &of_rcar_r8a7795_compatible, },
+ { .compatible = "renesas,sdhi-r8a7795", .data = &of_rcar_gen3_compatible, },
+ { .compatible = "renesas,sdhi-r8a7796", .data = &of_rcar_gen3_compatible, },
{ .compatible = "renesas,rcar-gen1-sdhi", .data = &of_rcar_gen1_compatible, },
{ .compatible = "renesas,rcar-gen2-sdhi", .data = &of_rcar_gen2_compatible, },
{ .compatible = "renesas,rcar-gen3-sdhi", .data = &of_rcar_gen3_compatible, },
@@ -493,8 +480,7 @@ static const struct soc_device_attribute gen3_soc_whitelist[] = {
static int renesas_sdhi_sys_dmac_probe(struct platform_device *pdev)
{
- if ((of_device_get_match_data(&pdev->dev) == &of_rcar_gen3_compatible ||
- of_device_get_match_data(&pdev->dev) == &of_rcar_r8a7795_compatible) &&
+ if (of_device_get_match_data(&pdev->dev) == &of_rcar_gen3_compatible &&
!soc_device_match(gen3_soc_whitelist))
return -ENODEV;
diff --git a/drivers/mmc/host/rtsx_usb_sdmmc.c b/drivers/mmc/host/rtsx_usb_sdmmc.c
index 9a3ff22dd0fe..669c6ab021c8 100644
--- a/drivers/mmc/host/rtsx_usb_sdmmc.c
+++ b/drivers/mmc/host/rtsx_usb_sdmmc.c
@@ -28,6 +28,7 @@
#include <linux/mmc/sd.h>
#include <linux/mmc/card.h>
#include <linux/scatterlist.h>
+#include <linux/pm.h>
#include <linux/pm_runtime.h>
#include <linux/rtsx_usb.h>
@@ -1042,9 +1043,9 @@ static int sd_set_power_mode(struct rtsx_usb_sdmmc *host,
if (power_mode == MMC_POWER_OFF) {
err = sd_power_off(host);
- pm_runtime_put(sdmmc_dev(host));
+ pm_runtime_put_noidle(sdmmc_dev(host));
} else {
- pm_runtime_get_sync(sdmmc_dev(host));
+ pm_runtime_get_noresume(sdmmc_dev(host));
err = sd_power_on(host);
}
@@ -1297,16 +1298,20 @@ static void rtsx_usb_update_led(struct work_struct *work)
container_of(work, struct rtsx_usb_sdmmc, led_work);
struct rtsx_ucr *ucr = host->ucr;
- pm_runtime_get_sync(sdmmc_dev(host));
+ pm_runtime_get_noresume(sdmmc_dev(host));
mutex_lock(&ucr->dev_mutex);
+ if (host->power_mode == MMC_POWER_OFF)
+ goto out;
+
if (host->led.brightness == LED_OFF)
rtsx_usb_turn_off_led(ucr);
else
rtsx_usb_turn_on_led(ucr);
+out:
mutex_unlock(&ucr->dev_mutex);
- pm_runtime_put(sdmmc_dev(host));
+ pm_runtime_put_sync_suspend(sdmmc_dev(host));
}
#endif
@@ -1320,7 +1325,7 @@ static void rtsx_usb_init_host(struct rtsx_usb_sdmmc *host)
mmc->caps = MMC_CAP_4_BIT_DATA | MMC_CAP_SD_HIGHSPEED |
MMC_CAP_MMC_HIGHSPEED | MMC_CAP_BUS_WIDTH_TEST |
MMC_CAP_UHS_SDR12 | MMC_CAP_UHS_SDR25 | MMC_CAP_UHS_SDR50 |
- MMC_CAP_NEEDS_POLL | MMC_CAP_ERASE;
+ MMC_CAP_ERASE | MMC_CAP_SYNC_RUNTIME_PM;
mmc->caps2 = MMC_CAP2_NO_PRESCAN_POWERUP | MMC_CAP2_FULL_PWR_CYCLE |
MMC_CAP2_NO_SDIO;
@@ -1363,8 +1368,6 @@ static int rtsx_usb_sdmmc_drv_probe(struct platform_device *pdev)
mutex_init(&host->host_mutex);
rtsx_usb_init_host(host);
- pm_runtime_use_autosuspend(&pdev->dev);
- pm_runtime_set_autosuspend_delay(&pdev->dev, 50);
pm_runtime_enable(&pdev->dev);
#ifdef RTSX_USB_USE_LEDS_CLASS
@@ -1419,7 +1422,6 @@ static int rtsx_usb_sdmmc_drv_remove(struct platform_device *pdev)
mmc_free_host(mmc);
pm_runtime_disable(&pdev->dev);
- pm_runtime_dont_use_autosuspend(&pdev->dev);
platform_set_drvdata(pdev, NULL);
dev_dbg(&(pdev->dev),
@@ -1428,6 +1430,31 @@ static int rtsx_usb_sdmmc_drv_remove(struct platform_device *pdev)
return 0;
}
+#ifdef CONFIG_PM
+static int rtsx_usb_sdmmc_runtime_suspend(struct device *dev)
+{
+ struct rtsx_usb_sdmmc *host = dev_get_drvdata(dev);
+
+ host->mmc->caps &= ~MMC_CAP_NEEDS_POLL;
+ return 0;
+}
+
+static int rtsx_usb_sdmmc_runtime_resume(struct device *dev)
+{
+ struct rtsx_usb_sdmmc *host = dev_get_drvdata(dev);
+
+ host->mmc->caps |= MMC_CAP_NEEDS_POLL;
+ if (sdmmc_get_cd(host->mmc) == 1)
+ mmc_detect_change(host->mmc, 0);
+ return 0;
+}
+#endif
+
+static const struct dev_pm_ops rtsx_usb_sdmmc_dev_pm_ops = {
+ SET_RUNTIME_PM_OPS(rtsx_usb_sdmmc_runtime_suspend,
+ rtsx_usb_sdmmc_runtime_resume, NULL)
+};
+
static const struct platform_device_id rtsx_usb_sdmmc_ids[] = {
{
.name = "rtsx_usb_sdmmc",
@@ -1443,6 +1470,7 @@ static struct platform_driver rtsx_usb_sdmmc_driver = {
.id_table = rtsx_usb_sdmmc_ids,
.driver = {
.name = "rtsx_usb_sdmmc",
+ .pm = &rtsx_usb_sdmmc_dev_pm_ops,
},
};
module_platform_driver(rtsx_usb_sdmmc_driver);
diff --git a/drivers/mmc/host/s3cmci.c b/drivers/mmc/host/s3cmci.c
index f77493604312..10f5219b3b40 100644
--- a/drivers/mmc/host/s3cmci.c
+++ b/drivers/mmc/host/s3cmci.c
@@ -26,7 +26,6 @@
#include <linux/io.h>
#include <linux/of.h>
#include <linux/of_device.h>
-#include <linux/of_gpio.h>
#include <linux/mmc/slot-gpio.h>
#include <plat/gpio-cfg.h>
@@ -1406,18 +1405,7 @@ static int s3cmci_state_show(struct seq_file *seq, void *v)
return 0;
}
-static int s3cmci_state_open(struct inode *inode, struct file *file)
-{
- return single_open(file, s3cmci_state_show, inode->i_private);
-}
-
-static const struct file_operations s3cmci_fops_state = {
- .owner = THIS_MODULE,
- .open = s3cmci_state_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(s3cmci_state);
#define DBG_REG(_r) { .addr = S3C2410_SDI##_r, .name = #_r }
@@ -1459,18 +1447,7 @@ static int s3cmci_regs_show(struct seq_file *seq, void *v)
return 0;
}
-static int s3cmci_regs_open(struct inode *inode, struct file *file)
-{
- return single_open(file, s3cmci_regs_show, inode->i_private);
-}
-
-static const struct file_operations s3cmci_fops_regs = {
- .owner = THIS_MODULE,
- .open = s3cmci_regs_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(s3cmci_regs);
static void s3cmci_debugfs_attach(struct s3cmci_host *host)
{
@@ -1484,14 +1461,14 @@ static void s3cmci_debugfs_attach(struct s3cmci_host *host)
host->debug_state = debugfs_create_file("state", 0444,
host->debug_root, host,
- &s3cmci_fops_state);
+ &s3cmci_state_fops);
if (IS_ERR(host->debug_state))
dev_err(dev, "failed to create debug state file\n");
host->debug_regs = debugfs_create_file("regs", 0444,
host->debug_root, host,
- &s3cmci_fops_regs);
+ &s3cmci_regs_fops);
if (IS_ERR(host->debug_regs))
dev_err(dev, "failed to create debug regs file\n");
@@ -1545,25 +1522,19 @@ static int s3cmci_probe_pdata(struct s3cmci_host *host)
if (pdata->wprotect_invert)
mmc->caps2 |= MMC_CAP2_RO_ACTIVE_HIGH;
- if (pdata->detect_invert)
- mmc->caps2 |= MMC_CAP2_CD_ACTIVE_HIGH;
-
- if (gpio_is_valid(pdata->gpio_detect)) {
- ret = mmc_gpio_request_cd(mmc, pdata->gpio_detect, 0);
- if (ret) {
- dev_err(&pdev->dev, "error requesting GPIO for CD %d\n",
- ret);
- return ret;
- }
+ /* If we get -ENOENT we have no card detect GPIO line */
+ ret = mmc_gpiod_request_cd(mmc, "cd", 0, false, 0, NULL);
+ if (ret != -ENOENT) {
+ dev_err(&pdev->dev, "error requesting GPIO for CD %d\n",
+ ret);
+ return ret;
}
- if (gpio_is_valid(pdata->gpio_wprotect)) {
- ret = mmc_gpio_request_ro(mmc, pdata->gpio_wprotect);
- if (ret) {
- dev_err(&pdev->dev, "error requesting GPIO for WP %d\n",
- ret);
- return ret;
- }
+ ret = mmc_gpiod_request_ro(host->mmc, "wp", 0, false, 0, NULL);
+ if (ret != -ENOENT) {
+ dev_err(&pdev->dev, "error requesting GPIO for WP %d\n",
+ ret);
+ return ret;
}
return 0;
diff --git a/drivers/mmc/host/sdhci-acpi.c b/drivers/mmc/host/sdhci-acpi.c
index 057e24f4a620..6669e540851d 100644
--- a/drivers/mmc/host/sdhci-acpi.c
+++ b/drivers/mmc/host/sdhci-acpi.c
@@ -437,7 +437,8 @@ static const struct sdhci_acpi_slot sdhci_acpi_slot_int_emmc = {
MMC_CAP_HW_RESET | MMC_CAP_1_8V_DDR |
MMC_CAP_CMD_DURING_TFR | MMC_CAP_WAIT_WHILE_BUSY,
.flags = SDHCI_ACPI_RUNTIME_PM,
- .quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC,
+ .quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC |
+ SDHCI_QUIRK_NO_LED,
.quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN |
SDHCI_QUIRK2_STOP_WITH_TC |
SDHCI_QUIRK2_CAPS_BIT63_FOR_HS400,
@@ -448,6 +449,7 @@ static const struct sdhci_acpi_slot sdhci_acpi_slot_int_emmc = {
static const struct sdhci_acpi_slot sdhci_acpi_slot_int_sdio = {
.quirks = SDHCI_QUIRK_BROKEN_CARD_DETECTION |
+ SDHCI_QUIRK_NO_LED |
SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC,
.quirks2 = SDHCI_QUIRK2_HOST_OFF_CARD_ON,
.caps = MMC_CAP_NONREMOVABLE | MMC_CAP_POWER_OFF_CARD |
@@ -462,7 +464,8 @@ static const struct sdhci_acpi_slot sdhci_acpi_slot_int_sdio = {
static const struct sdhci_acpi_slot sdhci_acpi_slot_int_sd = {
.flags = SDHCI_ACPI_SD_CD | SDHCI_ACPI_SD_CD_OVERRIDE_LEVEL |
SDHCI_ACPI_RUNTIME_PM,
- .quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC,
+ .quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC |
+ SDHCI_QUIRK_NO_LED,
.quirks2 = SDHCI_QUIRK2_CARD_ON_NEEDS_BUS_ON |
SDHCI_QUIRK2_STOP_WITH_TC,
.caps = MMC_CAP_WAIT_WHILE_BUSY | MMC_CAP_AGGRESSIVE_PM,
diff --git a/drivers/mmc/host/sdhci-cadence.c b/drivers/mmc/host/sdhci-cadence.c
index 7a343b87b5e5..e2412875dac5 100644
--- a/drivers/mmc/host/sdhci-cadence.c
+++ b/drivers/mmc/host/sdhci-cadence.c
@@ -14,7 +14,7 @@
*/
#include <linux/bitfield.h>
-#include <linux/bitops.h>
+#include <linux/bits.h>
#include <linux/iopoll.h>
#include <linux/module.h>
#include <linux/mmc/host.h>
diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c
index f44e49014a44..d0d319398a54 100644
--- a/drivers/mmc/host/sdhci-esdhc-imx.c
+++ b/drivers/mmc/host/sdhci-esdhc-imx.c
@@ -12,7 +12,6 @@
#include <linux/delay.h>
#include <linux/err.h>
#include <linux/clk.h>
-#include <linux/gpio.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/mmc/host.h>
@@ -21,7 +20,6 @@
#include <linux/mmc/slot-gpio.h>
#include <linux/of.h>
#include <linux/of_device.h>
-#include <linux/of_gpio.h>
#include <linux/pinctrl/consumer.h>
#include <linux/platform_data/mmc-esdhc-imx.h>
#include <linux/pm_runtime.h>
@@ -429,7 +427,7 @@ static u16 esdhc_readw_le(struct sdhci_host *host, int reg)
val = readl(host->ioaddr + ESDHC_MIX_CTRL);
else if (imx_data->socdata->flags & ESDHC_FLAG_STD_TUNING)
/* the std tuning bits is in ACMD12_ERR for imx6sl */
- val = readl(host->ioaddr + SDHCI_ACMD12_ERR);
+ val = readl(host->ioaddr + SDHCI_AUTO_CMD_STATUS);
}
if (val & ESDHC_MIX_CTRL_EXE_TUNE)
@@ -494,7 +492,7 @@ static void esdhc_writew_le(struct sdhci_host *host, u16 val, int reg)
}
writel(new_val , host->ioaddr + ESDHC_MIX_CTRL);
} else if (imx_data->socdata->flags & ESDHC_FLAG_STD_TUNING) {
- u32 v = readl(host->ioaddr + SDHCI_ACMD12_ERR);
+ u32 v = readl(host->ioaddr + SDHCI_AUTO_CMD_STATUS);
u32 m = readl(host->ioaddr + ESDHC_MIX_CTRL);
if (val & SDHCI_CTRL_TUNED_CLK) {
v |= ESDHC_MIX_CTRL_SMPCLK_SEL;
@@ -512,7 +510,7 @@ static void esdhc_writew_le(struct sdhci_host *host, u16 val, int reg)
v &= ~ESDHC_MIX_CTRL_EXE_TUNE;
}
- writel(v, host->ioaddr + SDHCI_ACMD12_ERR);
+ writel(v, host->ioaddr + SDHCI_AUTO_CMD_STATUS);
writel(m, host->ioaddr + ESDHC_MIX_CTRL);
}
return;
@@ -957,9 +955,9 @@ static void esdhc_reset_tuning(struct sdhci_host *host)
writel(ctrl, host->ioaddr + ESDHC_MIX_CTRL);
writel(0, host->ioaddr + ESDHC_TUNE_CTRL_STATUS);
} else if (imx_data->socdata->flags & ESDHC_FLAG_STD_TUNING) {
- ctrl = readl(host->ioaddr + SDHCI_ACMD12_ERR);
+ ctrl = readl(host->ioaddr + SDHCI_AUTO_CMD_STATUS);
ctrl &= ~ESDHC_MIX_CTRL_SMPCLK_SEL;
- writel(ctrl, host->ioaddr + SDHCI_ACMD12_ERR);
+ writel(ctrl, host->ioaddr + SDHCI_AUTO_CMD_STATUS);
}
}
}
@@ -1139,8 +1137,12 @@ sdhci_esdhc_imx_probe_dt(struct platform_device *pdev,
if (of_get_property(np, "fsl,wp-controller", NULL))
boarddata->wp_type = ESDHC_WP_CONTROLLER;
- boarddata->wp_gpio = of_get_named_gpio(np, "wp-gpios", 0);
- if (gpio_is_valid(boarddata->wp_gpio))
+ /*
+ * If we have this property, then activate WP check.
+ * Retrieveing and requesting the actual WP GPIO will happen
+ * in the call to mmc_of_parse().
+ */
+ if (of_property_read_bool(np, "wp-gpios"))
boarddata->wp_type = ESDHC_WP_GPIO;
of_property_read_u32(np, "fsl,tuning-step", &boarddata->tuning_step);
@@ -1198,7 +1200,7 @@ static int sdhci_esdhc_imx_probe_nondt(struct platform_device *pdev,
host->mmc->parent->platform_data);
/* write_protect */
if (boarddata->wp_type == ESDHC_WP_GPIO) {
- err = mmc_gpio_request_ro(host->mmc, boarddata->wp_gpio);
+ err = mmc_gpiod_request_ro(host->mmc, "wp", 0, false, 0, NULL);
if (err) {
dev_err(mmc_dev(host->mmc),
"failed to request write-protect gpio!\n");
@@ -1210,7 +1212,7 @@ static int sdhci_esdhc_imx_probe_nondt(struct platform_device *pdev,
/* card_detect */
switch (boarddata->cd_type) {
case ESDHC_CD_GPIO:
- err = mmc_gpio_request_cd(host->mmc, boarddata->cd_gpio, 0);
+ err = mmc_gpiod_request_cd(host->mmc, "cd", 0, false, 0, NULL);
if (err) {
dev_err(mmc_dev(host->mmc),
"failed to request card-detect gpio!\n");
@@ -1317,7 +1319,7 @@ static int sdhci_esdhc_imx_probe(struct platform_device *pdev)
/* clear tuning bits in case ROM has set it already */
writel(0x0, host->ioaddr + ESDHC_MIX_CTRL);
- writel(0x0, host->ioaddr + SDHCI_ACMD12_ERR);
+ writel(0x0, host->ioaddr + SDHCI_AUTO_CMD_STATUS);
writel(0x0, host->ioaddr + ESDHC_TUNE_CTRL_STATUS);
}
diff --git a/drivers/mmc/host/sdhci-esdhc.h b/drivers/mmc/host/sdhci-esdhc.h
index 3f16d9c90ba2..39dbbd6eaf28 100644
--- a/drivers/mmc/host/sdhci-esdhc.h
+++ b/drivers/mmc/host/sdhci-esdhc.h
@@ -59,9 +59,33 @@
/* Tuning Block Control Register */
#define ESDHC_TBCTL 0x120
+#define ESDHC_HS400_WNDW_ADJUST 0x00000040
+#define ESDHC_HS400_MODE 0x00000010
#define ESDHC_TB_EN 0x00000004
#define ESDHC_TBPTR 0x128
+/* SD Clock Control Register */
+#define ESDHC_SDCLKCTL 0x144
+#define ESDHC_LPBK_CLK_SEL 0x80000000
+#define ESDHC_CMD_CLK_CTL 0x00008000
+
+/* SD Timing Control Register */
+#define ESDHC_SDTIMNGCTL 0x148
+#define ESDHC_FLW_CTL_BG 0x00008000
+
+/* DLL Config 0 Register */
+#define ESDHC_DLLCFG0 0x160
+#define ESDHC_DLL_ENABLE 0x80000000
+#define ESDHC_DLL_FREQ_SEL 0x08000000
+
+/* DLL Config 1 Register */
+#define ESDHC_DLLCFG1 0x164
+#define ESDHC_DLL_PD_PULSE_STRETCH_SEL 0x80000000
+
+/* DLL Status 0 Register */
+#define ESDHC_DLLSTAT0 0x170
+#define ESDHC_DLL_STS_SLV_LOCK 0x08000000
+
/* Control Register for DMA transfer */
#define ESDHC_DMA_SYSCTL 0x40c
#define ESDHC_PERIPHERAL_CLK_SEL 0x00080000
diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c
index 3cc8bfee6c18..d6c9ebd8d263 100644
--- a/drivers/mmc/host/sdhci-msm.c
+++ b/drivers/mmc/host/sdhci-msm.c
@@ -232,6 +232,7 @@ struct sdhci_msm_variant_ops {
*/
struct sdhci_msm_variant_info {
bool mci_removed;
+ bool restore_dll_config;
const struct sdhci_msm_variant_ops *var_ops;
const struct sdhci_msm_offset *offset;
};
@@ -256,8 +257,11 @@ struct sdhci_msm_host {
bool pwr_irq_flag;
u32 caps_0;
bool mci_removed;
+ bool restore_dll_config;
const struct sdhci_msm_variant_ops *var_ops;
const struct sdhci_msm_offset *offset;
+ bool use_cdr;
+ u32 transfer_mode;
};
static const struct sdhci_msm_offset *sdhci_priv_msm_offset(struct sdhci_host *host)
@@ -1025,6 +1029,69 @@ out:
return ret;
}
+static bool sdhci_msm_is_tuning_needed(struct sdhci_host *host)
+{
+ struct mmc_ios *ios = &host->mmc->ios;
+
+ /*
+ * Tuning is required for SDR104, HS200 and HS400 cards and
+ * if clock frequency is greater than 100MHz in these modes.
+ */
+ if (host->clock <= CORE_FREQ_100MHZ ||
+ !(ios->timing == MMC_TIMING_MMC_HS400 ||
+ ios->timing == MMC_TIMING_MMC_HS200 ||
+ ios->timing == MMC_TIMING_UHS_SDR104) ||
+ ios->enhanced_strobe)
+ return false;
+
+ return true;
+}
+
+static int sdhci_msm_restore_sdr_dll_config(struct sdhci_host *host)
+{
+ struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+ struct sdhci_msm_host *msm_host = sdhci_pltfm_priv(pltfm_host);
+ int ret;
+
+ /*
+ * SDR DLL comes into picture only for timing modes which needs
+ * tuning.
+ */
+ if (!sdhci_msm_is_tuning_needed(host))
+ return 0;
+
+ /* Reset the tuning block */
+ ret = msm_init_cm_dll(host);
+ if (ret)
+ return ret;
+
+ /* Restore the tuning block */
+ ret = msm_config_cm_dll_phase(host, msm_host->saved_tuning_phase);
+
+ return ret;
+}
+
+static void sdhci_msm_set_cdr(struct sdhci_host *host, bool enable)
+{
+ const struct sdhci_msm_offset *msm_offset = sdhci_priv_msm_offset(host);
+ u32 config, oldconfig = readl_relaxed(host->ioaddr +
+ msm_offset->core_dll_config);
+
+ config = oldconfig;
+ if (enable) {
+ config |= CORE_CDR_EN;
+ config &= ~CORE_CDR_EXT_EN;
+ } else {
+ config &= ~CORE_CDR_EN;
+ config |= CORE_CDR_EXT_EN;
+ }
+
+ if (config != oldconfig) {
+ writel_relaxed(config, host->ioaddr +
+ msm_offset->core_dll_config);
+ }
+}
+
static int sdhci_msm_execute_tuning(struct mmc_host *mmc, u32 opcode)
{
struct sdhci_host *host = mmc_priv(mmc);
@@ -1035,15 +1102,14 @@ static int sdhci_msm_execute_tuning(struct mmc_host *mmc, u32 opcode)
struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
struct sdhci_msm_host *msm_host = sdhci_pltfm_priv(pltfm_host);
- /*
- * Tuning is required for SDR104, HS200 and HS400 cards and
- * if clock frequency is greater than 100MHz in these modes.
- */
- if (host->clock <= CORE_FREQ_100MHZ ||
- !(ios.timing == MMC_TIMING_MMC_HS400 ||
- ios.timing == MMC_TIMING_MMC_HS200 ||
- ios.timing == MMC_TIMING_UHS_SDR104))
+ if (!sdhci_msm_is_tuning_needed(host)) {
+ msm_host->use_cdr = false;
+ sdhci_msm_set_cdr(host, false);
return 0;
+ }
+
+ /* Clock-Data-Recovery used to dynamically adjust RX sampling point */
+ msm_host->use_cdr = true;
/*
* For HS400 tuning in HS200 timing requires:
@@ -1069,7 +1135,6 @@ retry:
if (rc)
return rc;
- msm_host->saved_tuning_phase = phase;
rc = mmc_send_tuning(mmc, opcode, NULL);
if (!rc) {
/* Tuning is successful at this tuning point */
@@ -1094,6 +1159,7 @@ retry:
rc = msm_config_cm_dll_phase(host, phase);
if (rc)
return rc;
+ msm_host->saved_tuning_phase = phase;
dev_dbg(mmc_dev(mmc), "%s: Setting the tuning phase to %d\n",
mmc_hostname(mmc), phase);
} else {
@@ -1525,6 +1591,19 @@ static int __sdhci_msm_check_write(struct sdhci_host *host, u16 val, int reg)
case SDHCI_POWER_CONTROL:
req_type = !val ? REQ_BUS_OFF : REQ_BUS_ON;
break;
+ case SDHCI_TRANSFER_MODE:
+ msm_host->transfer_mode = val;
+ break;
+ case SDHCI_COMMAND:
+ if (!msm_host->use_cdr)
+ break;
+ if ((msm_host->transfer_mode & SDHCI_TRNS_READ) &&
+ SDHCI_GET_CMD(val) != MMC_SEND_TUNING_BLOCK_HS200 &&
+ SDHCI_GET_CMD(val) != MMC_SEND_TUNING_BLOCK)
+ sdhci_msm_set_cdr(host, true);
+ else
+ sdhci_msm_set_cdr(host, false);
+ break;
}
if (req_type) {
@@ -1616,7 +1695,6 @@ static const struct sdhci_msm_variant_ops v5_var_ops = {
};
static const struct sdhci_msm_variant_info sdhci_msm_mci_var = {
- .mci_removed = false,
.var_ops = &mci_var_ops,
.offset = &sdhci_msm_mci_offset,
};
@@ -1627,9 +1705,17 @@ static const struct sdhci_msm_variant_info sdhci_msm_v5_var = {
.offset = &sdhci_msm_v5_offset,
};
+static const struct sdhci_msm_variant_info sdm845_sdhci_var = {
+ .mci_removed = true,
+ .restore_dll_config = true,
+ .var_ops = &v5_var_ops,
+ .offset = &sdhci_msm_v5_offset,
+};
+
static const struct of_device_id sdhci_msm_dt_match[] = {
{.compatible = "qcom,sdhci-msm-v4", .data = &sdhci_msm_mci_var},
{.compatible = "qcom,sdhci-msm-v5", .data = &sdhci_msm_v5_var},
+ {.compatible = "qcom,sdm845-sdhci", .data = &sdm845_sdhci_var},
{},
};
@@ -1689,6 +1775,7 @@ static int sdhci_msm_probe(struct platform_device *pdev)
var_info = of_device_get_match_data(&pdev->dev);
msm_host->mci_removed = var_info->mci_removed;
+ msm_host->restore_dll_config = var_info->restore_dll_config;
msm_host->var_ops = var_info->var_ops;
msm_host->offset = var_info->offset;
@@ -1910,8 +1997,7 @@ static int sdhci_msm_remove(struct platform_device *pdev)
return 0;
}
-#ifdef CONFIG_PM
-static int sdhci_msm_runtime_suspend(struct device *dev)
+static __maybe_unused int sdhci_msm_runtime_suspend(struct device *dev)
{
struct sdhci_host *host = dev_get_drvdata(dev);
struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
@@ -1923,16 +2009,26 @@ static int sdhci_msm_runtime_suspend(struct device *dev)
return 0;
}
-static int sdhci_msm_runtime_resume(struct device *dev)
+static __maybe_unused int sdhci_msm_runtime_resume(struct device *dev)
{
struct sdhci_host *host = dev_get_drvdata(dev);
struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
struct sdhci_msm_host *msm_host = sdhci_pltfm_priv(pltfm_host);
+ int ret;
- return clk_bulk_prepare_enable(ARRAY_SIZE(msm_host->bulk_clks),
+ ret = clk_bulk_prepare_enable(ARRAY_SIZE(msm_host->bulk_clks),
msm_host->bulk_clks);
+ if (ret)
+ return ret;
+ /*
+ * Whenever core-clock is gated dynamically, it's needed to
+ * restore the SDR DLL settings when the clock is ungated.
+ */
+ if (msm_host->restore_dll_config && msm_host->clk_rate)
+ return sdhci_msm_restore_sdr_dll_config(host);
+
+ return 0;
}
-#endif
static const struct dev_pm_ops sdhci_msm_pm_ops = {
SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
diff --git a/drivers/mmc/host/sdhci-of-arasan.c b/drivers/mmc/host/sdhci-of-arasan.c
index 142c4b802f31..c9e3e050ccc8 100644
--- a/drivers/mmc/host/sdhci-of-arasan.c
+++ b/drivers/mmc/host/sdhci-of-arasan.c
@@ -231,25 +231,6 @@ static void sdhci_arasan_set_clock(struct sdhci_host *host, unsigned int clock)
}
}
-static void sdhci_arasan_am654_set_clock(struct sdhci_host *host,
- unsigned int clock)
-{
- struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
- struct sdhci_arasan_data *sdhci_arasan = sdhci_pltfm_priv(pltfm_host);
-
- if (sdhci_arasan->is_phy_on) {
- phy_power_off(sdhci_arasan->phy);
- sdhci_arasan->is_phy_on = false;
- }
-
- sdhci_set_clock(host, clock);
-
- if (clock > PHY_CLK_TOO_SLOW_HZ) {
- phy_power_on(sdhci_arasan->phy);
- sdhci_arasan->is_phy_on = true;
- }
-}
-
static void sdhci_arasan_hs400_enhanced_strobe(struct mmc_host *mmc,
struct mmc_ios *ios)
{
@@ -335,29 +316,6 @@ static struct sdhci_arasan_of_data sdhci_arasan_data = {
.pdata = &sdhci_arasan_pdata,
};
-static const struct sdhci_ops sdhci_arasan_am654_ops = {
- .set_clock = sdhci_arasan_am654_set_clock,
- .get_max_clock = sdhci_pltfm_clk_get_max_clock,
- .get_timeout_clock = sdhci_pltfm_clk_get_max_clock,
- .set_bus_width = sdhci_set_bus_width,
- .reset = sdhci_arasan_reset,
- .set_uhs_signaling = sdhci_set_uhs_signaling,
-};
-
-static const struct sdhci_pltfm_data sdhci_arasan_am654_pdata = {
- .ops = &sdhci_arasan_am654_ops,
- .quirks = SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN |
- SDHCI_QUIRK_INVERTED_WRITE_PROTECT |
- SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12,
- .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN |
- SDHCI_QUIRK2_CLOCK_DIV_ZERO_BROKEN |
- SDHCI_QUIRK2_CAPS_BIT63_FOR_HS400,
-};
-
-static const struct sdhci_arasan_of_data sdhci_arasan_am654_data = {
- .pdata = &sdhci_arasan_am654_pdata,
-};
-
static u32 sdhci_arasan_cqhci_irq(struct sdhci_host *host, u32 intmask)
{
int cmd_error = 0;
@@ -520,10 +478,6 @@ static const struct of_device_id sdhci_arasan_of_match[] = {
.compatible = "rockchip,rk3399-sdhci-5.1",
.data = &sdhci_arasan_rk3399_data,
},
- {
- .compatible = "ti,am654-sdhci-5.1",
- .data = &sdhci_arasan_am654_data,
- },
/* Generic compatible below here */
{
.compatible = "arasan,sdhci-8.9a",
diff --git a/drivers/mmc/host/sdhci-of-esdhc.c b/drivers/mmc/host/sdhci-of-esdhc.c
index 86fc9f022002..4e669b4edfc1 100644
--- a/drivers/mmc/host/sdhci-of-esdhc.c
+++ b/drivers/mmc/host/sdhci-of-esdhc.c
@@ -78,6 +78,8 @@ struct sdhci_esdhc {
u8 vendor_ver;
u8 spec_ver;
bool quirk_incorrect_hostver;
+ bool quirk_limited_clk_division;
+ bool quirk_unreliable_pulse_detection;
bool quirk_fixup_tuning;
unsigned int peripheral_clock;
const struct esdhc_clk_fixup *clk_fixup;
@@ -528,8 +530,12 @@ static void esdhc_clock_enable(struct sdhci_host *host, bool enable)
/* Wait max 20 ms */
timeout = ktime_add_ms(ktime_get(), 20);
val = ESDHC_CLOCK_STABLE;
- while (!(sdhci_readl(host, ESDHC_PRSSTAT) & val)) {
- if (ktime_after(ktime_get(), timeout)) {
+ while (1) {
+ bool timedout = ktime_after(ktime_get(), timeout);
+
+ if (sdhci_readl(host, ESDHC_PRSSTAT) & val)
+ break;
+ if (timedout) {
pr_err("%s: Internal clock never stabilised.\n",
mmc_hostname(host->mmc));
break;
@@ -544,6 +550,7 @@ static void esdhc_of_set_clock(struct sdhci_host *host, unsigned int clock)
struct sdhci_esdhc *esdhc = sdhci_pltfm_priv(pltfm_host);
int pre_div = 1;
int div = 1;
+ int division;
ktime_t timeout;
long fixup = 0;
u32 temp;
@@ -579,6 +586,26 @@ static void esdhc_of_set_clock(struct sdhci_host *host, unsigned int clock)
while (host->max_clk / pre_div / div > clock && div < 16)
div++;
+ if (esdhc->quirk_limited_clk_division &&
+ clock == MMC_HS200_MAX_DTR &&
+ (host->mmc->ios.timing == MMC_TIMING_MMC_HS400 ||
+ host->flags & SDHCI_HS400_TUNING)) {
+ division = pre_div * div;
+ if (division <= 4) {
+ pre_div = 4;
+ div = 1;
+ } else if (division <= 8) {
+ pre_div = 4;
+ div = 2;
+ } else if (division <= 12) {
+ pre_div = 4;
+ div = 3;
+ } else {
+ pr_warn("%s: using unsupported clock division.\n",
+ mmc_hostname(host->mmc));
+ }
+ }
+
dev_dbg(mmc_dev(host->mmc), "desired SD clock: %d, actual: %d\n",
clock, host->max_clk / pre_div / div);
host->mmc->actual_clock = host->max_clk / pre_div / div;
@@ -592,10 +619,36 @@ static void esdhc_of_set_clock(struct sdhci_host *host, unsigned int clock)
| (pre_div << ESDHC_PREDIV_SHIFT));
sdhci_writel(host, temp, ESDHC_SYSTEM_CONTROL);
+ if (host->mmc->ios.timing == MMC_TIMING_MMC_HS400 &&
+ clock == MMC_HS200_MAX_DTR) {
+ temp = sdhci_readl(host, ESDHC_TBCTL);
+ sdhci_writel(host, temp | ESDHC_HS400_MODE, ESDHC_TBCTL);
+ temp = sdhci_readl(host, ESDHC_SDCLKCTL);
+ sdhci_writel(host, temp | ESDHC_CMD_CLK_CTL, ESDHC_SDCLKCTL);
+ esdhc_clock_enable(host, true);
+
+ temp = sdhci_readl(host, ESDHC_DLLCFG0);
+ temp |= ESDHC_DLL_ENABLE;
+ if (host->mmc->actual_clock == MMC_HS200_MAX_DTR)
+ temp |= ESDHC_DLL_FREQ_SEL;
+ sdhci_writel(host, temp, ESDHC_DLLCFG0);
+ temp = sdhci_readl(host, ESDHC_TBCTL);
+ sdhci_writel(host, temp | ESDHC_HS400_WNDW_ADJUST, ESDHC_TBCTL);
+
+ esdhc_clock_enable(host, false);
+ temp = sdhci_readl(host, ESDHC_DMA_SYSCTL);
+ temp |= ESDHC_FLUSH_ASYNC_FIFO;
+ sdhci_writel(host, temp, ESDHC_DMA_SYSCTL);
+ }
+
/* Wait max 20 ms */
timeout = ktime_add_ms(ktime_get(), 20);
- while (!(sdhci_readl(host, ESDHC_PRSSTAT) & ESDHC_CLOCK_STABLE)) {
- if (ktime_after(ktime_get(), timeout)) {
+ while (1) {
+ bool timedout = ktime_after(ktime_get(), timeout);
+
+ if (sdhci_readl(host, ESDHC_PRSSTAT) & ESDHC_CLOCK_STABLE)
+ break;
+ if (timedout) {
pr_err("%s: Internal clock never stabilised.\n",
mmc_hostname(host->mmc));
return;
@@ -603,6 +656,7 @@ static void esdhc_of_set_clock(struct sdhci_host *host, unsigned int clock)
udelay(10);
}
+ temp = sdhci_readl(host, ESDHC_SYSTEM_CONTROL);
temp |= ESDHC_CLOCK_SDCLKEN;
sdhci_writel(host, temp, ESDHC_SYSTEM_CONTROL);
}
@@ -631,6 +685,8 @@ static void esdhc_pltfm_set_bus_width(struct sdhci_host *host, int width)
static void esdhc_reset(struct sdhci_host *host, u8 mask)
{
+ struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+ struct sdhci_esdhc *esdhc = sdhci_pltfm_priv(pltfm_host);
u32 val;
sdhci_reset(host, mask);
@@ -642,6 +698,12 @@ static void esdhc_reset(struct sdhci_host *host, u8 mask)
val = sdhci_readl(host, ESDHC_TBCTL);
val &= ~ESDHC_TB_EN;
sdhci_writel(host, val, ESDHC_TBCTL);
+
+ if (esdhc->quirk_unreliable_pulse_detection) {
+ val = sdhci_readl(host, ESDHC_DLLCFG1);
+ val &= ~ESDHC_DLL_PD_PULSE_STRETCH_SEL;
+ sdhci_writel(host, val, ESDHC_DLLCFG1);
+ }
}
}
@@ -728,25 +790,50 @@ static struct soc_device_attribute soc_fixup_tuning[] = {
{ },
};
-static int esdhc_execute_tuning(struct mmc_host *mmc, u32 opcode)
+static void esdhc_tuning_block_enable(struct sdhci_host *host, bool enable)
{
- struct sdhci_host *host = mmc_priv(mmc);
- struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
- struct sdhci_esdhc *esdhc = sdhci_pltfm_priv(pltfm_host);
u32 val;
- /* Use tuning block for tuning procedure */
esdhc_clock_enable(host, false);
+
val = sdhci_readl(host, ESDHC_DMA_SYSCTL);
val |= ESDHC_FLUSH_ASYNC_FIFO;
sdhci_writel(host, val, ESDHC_DMA_SYSCTL);
val = sdhci_readl(host, ESDHC_TBCTL);
- val |= ESDHC_TB_EN;
+ if (enable)
+ val |= ESDHC_TB_EN;
+ else
+ val &= ~ESDHC_TB_EN;
sdhci_writel(host, val, ESDHC_TBCTL);
+
esdhc_clock_enable(host, true);
+}
+
+static int esdhc_execute_tuning(struct mmc_host *mmc, u32 opcode)
+{
+ struct sdhci_host *host = mmc_priv(mmc);
+ struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+ struct sdhci_esdhc *esdhc = sdhci_pltfm_priv(pltfm_host);
+ bool hs400_tuning;
+ u32 val;
+ int ret;
+
+ if (esdhc->quirk_limited_clk_division &&
+ host->flags & SDHCI_HS400_TUNING)
+ esdhc_of_set_clock(host, host->clock);
+
+ esdhc_tuning_block_enable(host, true);
+
+ hs400_tuning = host->flags & SDHCI_HS400_TUNING;
+ ret = sdhci_execute_tuning(mmc, opcode);
+
+ if (hs400_tuning) {
+ val = sdhci_readl(host, ESDHC_SDTIMNGCTL);
+ val |= ESDHC_FLW_CTL_BG;
+ sdhci_writel(host, val, ESDHC_SDTIMNGCTL);
+ }
- sdhci_execute_tuning(mmc, opcode);
if (host->tuning_err == -EAGAIN && esdhc->quirk_fixup_tuning) {
/* program TBPTR[TB_WNDW_END_PTR] = 3*DIV_RATIO and
@@ -765,7 +852,16 @@ static int esdhc_execute_tuning(struct mmc_host *mmc, u32 opcode)
sdhci_writel(host, val, ESDHC_TBCTL);
sdhci_execute_tuning(mmc, opcode);
}
- return 0;
+ return ret;
+}
+
+static void esdhc_set_uhs_signaling(struct sdhci_host *host,
+ unsigned int timing)
+{
+ if (timing == MMC_TIMING_MMC_HS400)
+ esdhc_tuning_block_enable(host, true);
+ else
+ sdhci_set_uhs_signaling(host, timing);
}
#ifdef CONFIG_PM_SLEEP
@@ -814,7 +910,7 @@ static const struct sdhci_ops sdhci_esdhc_be_ops = {
.adma_workaround = esdhc_of_adma_workaround,
.set_bus_width = esdhc_pltfm_set_bus_width,
.reset = esdhc_reset,
- .set_uhs_signaling = sdhci_set_uhs_signaling,
+ .set_uhs_signaling = esdhc_set_uhs_signaling,
};
static const struct sdhci_ops sdhci_esdhc_le_ops = {
@@ -831,7 +927,7 @@ static const struct sdhci_ops sdhci_esdhc_le_ops = {
.adma_workaround = esdhc_of_adma_workaround,
.set_bus_width = esdhc_pltfm_set_bus_width,
.reset = esdhc_reset,
- .set_uhs_signaling = sdhci_set_uhs_signaling,
+ .set_uhs_signaling = esdhc_set_uhs_signaling,
};
static const struct sdhci_pltfm_data sdhci_esdhc_be_pdata = {
@@ -857,6 +953,16 @@ static struct soc_device_attribute soc_incorrect_hostver[] = {
{ },
};
+static struct soc_device_attribute soc_fixup_sdhc_clkdivs[] = {
+ { .family = "QorIQ LX2160A", .revision = "1.0", },
+ { },
+};
+
+static struct soc_device_attribute soc_unreliable_pulse_detection[] = {
+ { .family = "QorIQ LX2160A", .revision = "1.0", },
+ { },
+};
+
static void esdhc_init(struct platform_device *pdev, struct sdhci_host *host)
{
const struct of_device_id *match;
@@ -879,6 +985,16 @@ static void esdhc_init(struct platform_device *pdev, struct sdhci_host *host)
else
esdhc->quirk_incorrect_hostver = false;
+ if (soc_device_match(soc_fixup_sdhc_clkdivs))
+ esdhc->quirk_limited_clk_division = true;
+ else
+ esdhc->quirk_limited_clk_division = false;
+
+ if (soc_device_match(soc_unreliable_pulse_detection))
+ esdhc->quirk_unreliable_pulse_detection = true;
+ else
+ esdhc->quirk_unreliable_pulse_detection = false;
+
match = of_match_node(sdhci_esdhc_of_match, pdev->dev.of_node);
if (match)
esdhc->clk_fixup = match->data;
@@ -909,6 +1025,12 @@ static void esdhc_init(struct platform_device *pdev, struct sdhci_host *host)
}
}
+static int esdhc_hs400_prepare_ddr(struct mmc_host *mmc)
+{
+ esdhc_tuning_block_enable(mmc_priv(mmc), false);
+ return 0;
+}
+
static int sdhci_esdhc_probe(struct platform_device *pdev)
{
struct sdhci_host *host;
@@ -932,6 +1054,7 @@ static int sdhci_esdhc_probe(struct platform_device *pdev)
host->mmc_host_ops.start_signal_voltage_switch =
esdhc_signal_voltage_switch;
host->mmc_host_ops.execute_tuning = esdhc_execute_tuning;
+ host->mmc_host_ops.hs400_prepare_ddr = esdhc_hs400_prepare_ddr;
host->tuning_delay = 1;
esdhc_init(pdev, host);
diff --git a/drivers/mmc/host/sdhci-omap.c b/drivers/mmc/host/sdhci-omap.c
index d264391616f9..c11c18a9aacb 100644
--- a/drivers/mmc/host/sdhci-omap.c
+++ b/drivers/mmc/host/sdhci-omap.c
@@ -27,6 +27,7 @@
#include <linux/regulator/consumer.h>
#include <linux/pinctrl/consumer.h>
#include <linux/sys_soc.h>
+#include <linux/thermal.h>
#include "sdhci-pltfm.h"
@@ -115,6 +116,7 @@ struct sdhci_omap_host {
struct pinctrl *pinctrl;
struct pinctrl_state **pinctrl_state;
+ bool is_tuning;
};
static void sdhci_omap_start_clock(struct sdhci_omap_host *omap_host);
@@ -220,8 +222,12 @@ static void sdhci_omap_conf_bus_power(struct sdhci_omap_host *omap_host,
/* wait 1ms */
timeout = ktime_add_ms(ktime_get(), SDHCI_OMAP_TIMEOUT);
- while (!(sdhci_omap_readl(omap_host, SDHCI_OMAP_HCTL) & HCTL_SDBP)) {
- if (WARN_ON(ktime_after(ktime_get(), timeout)))
+ while (1) {
+ bool timedout = ktime_after(ktime_get(), timeout);
+
+ if (sdhci_omap_readl(omap_host, SDHCI_OMAP_HCTL) & HCTL_SDBP)
+ break;
+ if (WARN_ON(timedout))
return;
usleep_range(5, 10);
}
@@ -285,19 +291,19 @@ static int sdhci_omap_execute_tuning(struct mmc_host *mmc, u32 opcode)
struct sdhci_host *host = mmc_priv(mmc);
struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
struct sdhci_omap_host *omap_host = sdhci_pltfm_priv(pltfm_host);
+ struct thermal_zone_device *thermal_dev;
struct device *dev = omap_host->dev;
struct mmc_ios *ios = &mmc->ios;
u32 start_window = 0, max_window = 0;
+ bool single_point_failure = false;
bool dcrc_was_enabled = false;
u8 cur_match, prev_match = 0;
u32 length = 0, max_len = 0;
u32 phase_delay = 0;
+ int temperature;
int ret = 0;
u32 reg;
-
- pltfm_host = sdhci_priv(host);
- omap_host = sdhci_pltfm_priv(pltfm_host);
- dev = omap_host->dev;
+ int i;
/* clock tuning is not needed for upto 52MHz */
if (ios->clock <= 52000000)
@@ -307,6 +313,16 @@ static int sdhci_omap_execute_tuning(struct mmc_host *mmc, u32 opcode)
if (ios->timing == MMC_TIMING_UHS_SDR50 && !(reg & CAPA2_TSDR50))
return 0;
+ thermal_dev = thermal_zone_get_zone_by_name("cpu_thermal");
+ if (IS_ERR(thermal_dev)) {
+ dev_err(dev, "Unable to get thermal zone for tuning\n");
+ return PTR_ERR(thermal_dev);
+ }
+
+ ret = thermal_zone_get_temp(thermal_dev, &temperature);
+ if (ret)
+ return ret;
+
reg = sdhci_omap_readl(omap_host, SDHCI_OMAP_DLL);
reg |= DLL_SWT;
sdhci_omap_writel(omap_host, SDHCI_OMAP_DLL, reg);
@@ -322,6 +338,13 @@ static int sdhci_omap_execute_tuning(struct mmc_host *mmc, u32 opcode)
dcrc_was_enabled = true;
}
+ omap_host->is_tuning = true;
+
+ /*
+ * Stage 1: Search for a maximum pass window ignoring any
+ * any single point failures. If the tuning value ends up
+ * near it, move away from it in stage 2 below
+ */
while (phase_delay <= MAX_PHASE_DELAY) {
sdhci_omap_set_dll(omap_host, phase_delay);
@@ -329,10 +352,15 @@ static int sdhci_omap_execute_tuning(struct mmc_host *mmc, u32 opcode)
if (cur_match) {
if (prev_match) {
length++;
+ } else if (single_point_failure) {
+ /* ignore single point failure */
+ length++;
} else {
start_window = phase_delay;
length = 1;
}
+ } else {
+ single_point_failure = prev_match;
}
if (length > max_len) {
@@ -350,18 +378,84 @@ static int sdhci_omap_execute_tuning(struct mmc_host *mmc, u32 opcode)
goto tuning_error;
}
+ /*
+ * Assign tuning value as a ratio of maximum pass window based
+ * on temperature
+ */
+ if (temperature < -20000)
+ phase_delay = min(max_window + 4 * max_len - 24,
+ max_window +
+ DIV_ROUND_UP(13 * max_len, 16) * 4);
+ else if (temperature < 20000)
+ phase_delay = max_window + DIV_ROUND_UP(9 * max_len, 16) * 4;
+ else if (temperature < 40000)
+ phase_delay = max_window + DIV_ROUND_UP(8 * max_len, 16) * 4;
+ else if (temperature < 70000)
+ phase_delay = max_window + DIV_ROUND_UP(7 * max_len, 16) * 4;
+ else if (temperature < 90000)
+ phase_delay = max_window + DIV_ROUND_UP(5 * max_len, 16) * 4;
+ else if (temperature < 120000)
+ phase_delay = max_window + DIV_ROUND_UP(4 * max_len, 16) * 4;
+ else
+ phase_delay = max_window + DIV_ROUND_UP(3 * max_len, 16) * 4;
+
+ /*
+ * Stage 2: Search for a single point failure near the chosen tuning
+ * value in two steps. First in the +3 to +10 range and then in the
+ * +2 to -10 range. If found, move away from it in the appropriate
+ * direction by the appropriate amount depending on the temperature.
+ */
+ for (i = 3; i <= 10; i++) {
+ sdhci_omap_set_dll(omap_host, phase_delay + i);
+
+ if (mmc_send_tuning(mmc, opcode, NULL)) {
+ if (temperature < 10000)
+ phase_delay += i + 6;
+ else if (temperature < 20000)
+ phase_delay += i - 12;
+ else if (temperature < 70000)
+ phase_delay += i - 8;
+ else
+ phase_delay += i - 6;
+
+ goto single_failure_found;
+ }
+ }
+
+ for (i = 2; i >= -10; i--) {
+ sdhci_omap_set_dll(omap_host, phase_delay + i);
+
+ if (mmc_send_tuning(mmc, opcode, NULL)) {
+ if (temperature < 10000)
+ phase_delay += i + 12;
+ else if (temperature < 20000)
+ phase_delay += i + 8;
+ else if (temperature < 70000)
+ phase_delay += i + 8;
+ else if (temperature < 90000)
+ phase_delay += i + 10;
+ else
+ phase_delay += i + 12;
+
+ goto single_failure_found;
+ }
+ }
+
+single_failure_found:
reg = sdhci_omap_readl(omap_host, SDHCI_OMAP_AC12);
if (!(reg & AC12_SCLK_SEL)) {
ret = -EIO;
goto tuning_error;
}
- phase_delay = max_window + 4 * (max_len >> 1);
sdhci_omap_set_dll(omap_host, phase_delay);
+ omap_host->is_tuning = false;
+
goto ret;
tuning_error:
+ omap_host->is_tuning = false;
dev_err(dev, "Tuning failed\n");
sdhci_omap_disable_tuning(omap_host);
@@ -653,8 +747,12 @@ static void sdhci_omap_init_74_clocks(struct sdhci_host *host, u8 power_mode)
/* wait 1ms */
timeout = ktime_add_ms(ktime_get(), SDHCI_OMAP_TIMEOUT);
- while (!(sdhci_omap_readl(omap_host, SDHCI_OMAP_STAT) & INT_CC_EN)) {
- if (WARN_ON(ktime_after(ktime_get(), timeout)))
+ while (1) {
+ bool timedout = ktime_after(ktime_get(), timeout);
+
+ if (sdhci_omap_readl(omap_host, SDHCI_OMAP_STAT) & INT_CC_EN)
+ break;
+ if (WARN_ON(timedout))
return;
usleep_range(5, 10);
}
@@ -687,6 +785,18 @@ static void sdhci_omap_set_uhs_signaling(struct sdhci_host *host,
sdhci_omap_start_clock(omap_host);
}
+void sdhci_omap_reset(struct sdhci_host *host, u8 mask)
+{
+ struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+ struct sdhci_omap_host *omap_host = sdhci_pltfm_priv(pltfm_host);
+
+ /* Don't reset data lines during tuning operation */
+ if (omap_host->is_tuning)
+ mask &= ~SDHCI_RESET_DATA;
+
+ sdhci_reset(host, mask);
+}
+
static struct sdhci_ops sdhci_omap_ops = {
.set_clock = sdhci_omap_set_clock,
.set_power = sdhci_omap_set_power,
@@ -695,7 +805,7 @@ static struct sdhci_ops sdhci_omap_ops = {
.get_min_clock = sdhci_omap_get_min_clock,
.set_bus_width = sdhci_omap_set_bus_width,
.platform_send_init_74_clocks = sdhci_omap_init_74_clocks,
- .reset = sdhci_reset,
+ .reset = sdhci_omap_reset,
.set_uhs_signaling = sdhci_omap_set_uhs_signaling,
};
diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c
index c4115bae5db1..2a6eba74b94e 100644
--- a/drivers/mmc/host/sdhci-pci-core.c
+++ b/drivers/mmc/host/sdhci-pci-core.c
@@ -710,11 +710,15 @@ static int intel_execute_tuning(struct mmc_host *mmc, u32 opcode)
static void byt_probe_slot(struct sdhci_pci_slot *slot)
{
struct mmc_host_ops *ops = &slot->host->mmc_host_ops;
+ struct device *dev = &slot->chip->pdev->dev;
+ struct mmc_host *mmc = slot->host->mmc;
byt_read_dsm(slot);
ops->execute_tuning = intel_execute_tuning;
ops->start_signal_voltage_switch = intel_start_signal_voltage_switch;
+
+ device_property_read_u32(dev, "max-frequency", &mmc->f_max);
}
static int byt_emmc_probe_slot(struct sdhci_pci_slot *slot)
@@ -937,7 +941,8 @@ static int byt_sd_probe_slot(struct sdhci_pci_slot *slot)
static const struct sdhci_pci_fixes sdhci_intel_byt_emmc = {
.allow_runtime_pm = true,
.probe_slot = byt_emmc_probe_slot,
- .quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC,
+ .quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC |
+ SDHCI_QUIRK_NO_LED,
.quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN |
SDHCI_QUIRK2_CAPS_BIT63_FOR_HS400 |
SDHCI_QUIRK2_STOP_WITH_TC,
@@ -957,7 +962,8 @@ static const struct sdhci_pci_fixes sdhci_intel_glk_emmc = {
.runtime_suspend = glk_runtime_suspend,
.runtime_resume = glk_runtime_resume,
#endif
- .quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC,
+ .quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC |
+ SDHCI_QUIRK_NO_LED,
.quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN |
SDHCI_QUIRK2_CAPS_BIT63_FOR_HS400 |
SDHCI_QUIRK2_STOP_WITH_TC,
@@ -966,7 +972,8 @@ static const struct sdhci_pci_fixes sdhci_intel_glk_emmc = {
};
static const struct sdhci_pci_fixes sdhci_ni_byt_sdio = {
- .quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC,
+ .quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC |
+ SDHCI_QUIRK_NO_LED,
.quirks2 = SDHCI_QUIRK2_HOST_OFF_CARD_ON |
SDHCI_QUIRK2_PRESET_VALUE_BROKEN,
.allow_runtime_pm = true,
@@ -976,7 +983,8 @@ static const struct sdhci_pci_fixes sdhci_ni_byt_sdio = {
};
static const struct sdhci_pci_fixes sdhci_intel_byt_sdio = {
- .quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC,
+ .quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC |
+ SDHCI_QUIRK_NO_LED,
.quirks2 = SDHCI_QUIRK2_HOST_OFF_CARD_ON |
SDHCI_QUIRK2_PRESET_VALUE_BROKEN,
.allow_runtime_pm = true,
@@ -986,7 +994,8 @@ static const struct sdhci_pci_fixes sdhci_intel_byt_sdio = {
};
static const struct sdhci_pci_fixes sdhci_intel_byt_sd = {
- .quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC,
+ .quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC |
+ SDHCI_QUIRK_NO_LED,
.quirks2 = SDHCI_QUIRK2_CARD_ON_NEEDS_BUS_ON |
SDHCI_QUIRK2_PRESET_VALUE_BROKEN |
SDHCI_QUIRK2_STOP_WITH_TC,
diff --git a/drivers/mmc/host/sdhci-xenon-phy.c b/drivers/mmc/host/sdhci-xenon-phy.c
index 5956e90380e8..5b5eb53a63d2 100644
--- a/drivers/mmc/host/sdhci-xenon-phy.c
+++ b/drivers/mmc/host/sdhci-xenon-phy.c
@@ -357,9 +357,13 @@ static int xenon_emmc_phy_enable_dll(struct sdhci_host *host)
/* Wait max 32 ms */
timeout = ktime_add_ms(ktime_get(), 32);
- while (!(sdhci_readw(host, XENON_SLOT_EXT_PRESENT_STATE) &
- XENON_DLL_LOCK_STATE)) {
- if (ktime_after(ktime_get(), timeout)) {
+ while (1) {
+ bool timedout = ktime_after(ktime_get(), timeout);
+
+ if (sdhci_readw(host, XENON_SLOT_EXT_PRESENT_STATE) &
+ XENON_DLL_LOCK_STATE)
+ break;
+ if (timedout) {
dev_err(mmc_dev(host->mmc), "Wait for DLL Lock time-out\n");
return -ETIMEDOUT;
}
diff --git a/drivers/mmc/host/sdhci-xenon.c b/drivers/mmc/host/sdhci-xenon.c
index 4d0791f6ec23..a0b5089b3274 100644
--- a/drivers/mmc/host/sdhci-xenon.c
+++ b/drivers/mmc/host/sdhci-xenon.c
@@ -34,9 +34,13 @@ static int xenon_enable_internal_clk(struct sdhci_host *host)
sdhci_writel(host, reg, SDHCI_CLOCK_CONTROL);
/* Wait max 20 ms */
timeout = ktime_add_ms(ktime_get(), 20);
- while (!((reg = sdhci_readw(host, SDHCI_CLOCK_CONTROL))
- & SDHCI_CLOCK_INT_STABLE)) {
- if (ktime_after(ktime_get(), timeout)) {
+ while (1) {
+ bool timedout = ktime_after(ktime_get(), timeout);
+
+ reg = sdhci_readw(host, SDHCI_CLOCK_CONTROL);
+ if (reg & SDHCI_CLOCK_INT_STABLE)
+ break;
+ if (timedout) {
dev_err(mmc_dev(host->mmc), "Internal clock never stabilised.\n");
return -ETIMEDOUT;
}
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index df05352b6a4a..a22e11a65658 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -82,8 +82,8 @@ void sdhci_dumpregs(struct sdhci_host *host)
SDHCI_DUMP("Int enab: 0x%08x | Sig enab: 0x%08x\n",
sdhci_readl(host, SDHCI_INT_ENABLE),
sdhci_readl(host, SDHCI_SIGNAL_ENABLE));
- SDHCI_DUMP("AC12 err: 0x%08x | Slot int: 0x%08x\n",
- sdhci_readw(host, SDHCI_ACMD12_ERR),
+ SDHCI_DUMP("ACmd stat: 0x%08x | Slot int: 0x%08x\n",
+ sdhci_readw(host, SDHCI_AUTO_CMD_STATUS),
sdhci_readw(host, SDHCI_SLOT_INT_STATUS));
SDHCI_DUMP("Caps: 0x%08x | Caps_1: 0x%08x\n",
sdhci_readl(host, SDHCI_CAPABILITIES),
@@ -349,6 +349,9 @@ static void __sdhci_led_activate(struct sdhci_host *host)
{
u8 ctrl;
+ if (host->quirks & SDHCI_QUIRK_NO_LED)
+ return;
+
ctrl = sdhci_readb(host, SDHCI_HOST_CONTROL);
ctrl |= SDHCI_CTRL_LED;
sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL);
@@ -358,6 +361,9 @@ static void __sdhci_led_deactivate(struct sdhci_host *host)
{
u8 ctrl;
+ if (host->quirks & SDHCI_QUIRK_NO_LED)
+ return;
+
ctrl = sdhci_readb(host, SDHCI_HOST_CONTROL);
ctrl &= ~SDHCI_CTRL_LED;
sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL);
@@ -387,6 +393,9 @@ static int sdhci_led_register(struct sdhci_host *host)
{
struct mmc_host *mmc = host->mmc;
+ if (host->quirks & SDHCI_QUIRK_NO_LED)
+ return 0;
+
snprintf(host->led_name, sizeof(host->led_name),
"%s::", mmc_hostname(mmc));
@@ -400,6 +409,9 @@ static int sdhci_led_register(struct sdhci_host *host)
static void sdhci_led_unregister(struct sdhci_host *host)
{
+ if (host->quirks & SDHCI_QUIRK_NO_LED)
+ return;
+
led_classdev_unregister(&host->led);
}
@@ -933,6 +945,11 @@ static void sdhci_set_transfer_irqs(struct sdhci_host *host)
else
host->ier = (host->ier & ~dma_irqs) | pio_irqs;
+ if (host->flags & (SDHCI_AUTO_CMD23 | SDHCI_AUTO_CMD12))
+ host->ier |= SDHCI_INT_AUTO_CMD_ERR;
+ else
+ host->ier &= ~SDHCI_INT_AUTO_CMD_ERR;
+
sdhci_writel(host, host->ier, SDHCI_INT_ENABLE);
sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE);
}
@@ -1191,8 +1208,7 @@ static bool sdhci_needs_reset(struct sdhci_host *host, struct mmc_request *mrq)
return (!(host->flags & SDHCI_DEVICE_DEAD) &&
((mrq->cmd && mrq->cmd->error) ||
(mrq->sbc && mrq->sbc->error) ||
- (mrq->data && ((mrq->data->error && !mrq->data->stop) ||
- (mrq->data->stop && mrq->data->stop->error))) ||
+ (mrq->data && mrq->data->stop && mrq->data->stop->error) ||
(host->quirks & SDHCI_QUIRK_RESET_AFTER_REQUEST)));
}
@@ -1244,6 +1260,16 @@ static void sdhci_finish_data(struct sdhci_host *host)
host->data = NULL;
host->data_cmd = NULL;
+ /*
+ * The controller needs a reset of internal state machines upon error
+ * conditions.
+ */
+ if (data->error) {
+ if (!host->cmd || host->cmd == data_cmd)
+ sdhci_do_reset(host, SDHCI_RESET_CMD);
+ sdhci_do_reset(host, SDHCI_RESET_DATA);
+ }
+
if ((host->flags & (SDHCI_REQ_USE_DMA | SDHCI_USE_ADMA)) ==
(SDHCI_REQ_USE_DMA | SDHCI_USE_ADMA))
sdhci_adma_table_post(host, data);
@@ -1268,17 +1294,6 @@ static void sdhci_finish_data(struct sdhci_host *host)
if (data->stop &&
(data->error ||
!data->mrq->sbc)) {
-
- /*
- * The controller needs a reset of internal state machines
- * upon error conditions.
- */
- if (data->error) {
- if (!host->cmd || host->cmd == data_cmd)
- sdhci_do_reset(host, SDHCI_RESET_CMD);
- sdhci_do_reset(host, SDHCI_RESET_DATA);
- }
-
/*
* 'cap_cmd_during_tfr' request must not use the command line
* after mmc_command_done() has been called. It is upper layer's
@@ -2757,8 +2772,23 @@ static void sdhci_timeout_data_timer(struct timer_list *t)
* *
\*****************************************************************************/
-static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask)
+static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask, u32 *intmask_p)
{
+ /* Handle auto-CMD12 error */
+ if (intmask & SDHCI_INT_AUTO_CMD_ERR && host->data_cmd) {
+ struct mmc_request *mrq = host->data_cmd->mrq;
+ u16 auto_cmd_status = sdhci_readw(host, SDHCI_AUTO_CMD_STATUS);
+ int data_err_bit = (auto_cmd_status & SDHCI_AUTO_CMD_TIMEOUT) ?
+ SDHCI_INT_DATA_TIMEOUT :
+ SDHCI_INT_DATA_CRC;
+
+ /* Treat auto-CMD12 error the same as data error */
+ if (!mrq->sbc && (host->flags & SDHCI_AUTO_CMD12)) {
+ *intmask_p |= data_err_bit;
+ return;
+ }
+ }
+
if (!host->cmd) {
/*
* SDHCI recovers from errors by resetting the cmd and data
@@ -2780,20 +2810,12 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask)
else
host->cmd->error = -EILSEQ;
- /*
- * If this command initiates a data phase and a response
- * CRC error is signalled, the card can start transferring
- * data - the card may have received the command without
- * error. We must not terminate the mmc_request early.
- *
- * If the card did not receive the command or returned an
- * error which prevented it sending data, the data phase
- * will time out.
- */
+ /* Treat data command CRC error the same as data CRC error */
if (host->cmd->data &&
(intmask & (SDHCI_INT_CRC | SDHCI_INT_TIMEOUT)) ==
SDHCI_INT_CRC) {
host->cmd = NULL;
+ *intmask_p |= SDHCI_INT_DATA_CRC;
return;
}
@@ -2801,6 +2823,21 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask)
return;
}
+ /* Handle auto-CMD23 error */
+ if (intmask & SDHCI_INT_AUTO_CMD_ERR) {
+ struct mmc_request *mrq = host->cmd->mrq;
+ u16 auto_cmd_status = sdhci_readw(host, SDHCI_AUTO_CMD_STATUS);
+ int err = (auto_cmd_status & SDHCI_AUTO_CMD_TIMEOUT) ?
+ -ETIMEDOUT :
+ -EILSEQ;
+
+ if (mrq->sbc && (host->flags & SDHCI_AUTO_CMD23)) {
+ mrq->sbc->error = err;
+ sdhci_finish_mrq(host, mrq);
+ return;
+ }
+ }
+
if (intmask & SDHCI_INT_RESPONSE)
sdhci_finish_command(host);
}
@@ -3021,7 +3058,7 @@ static irqreturn_t sdhci_irq(int irq, void *dev_id)
}
if (intmask & SDHCI_INT_CMD_MASK)
- sdhci_cmd_irq(host, intmask & SDHCI_INT_CMD_MASK);
+ sdhci_cmd_irq(host, intmask & SDHCI_INT_CMD_MASK, &intmask);
if (intmask & SDHCI_INT_DATA_MASK)
sdhci_data_irq(host, intmask & SDHCI_INT_DATA_MASK);
@@ -3541,7 +3578,7 @@ void __sdhci_read_caps(struct sdhci_host *host, u16 *ver, u32 *caps, u32 *caps1)
}
EXPORT_SYMBOL_GPL(__sdhci_read_caps);
-static int sdhci_allocate_bounce_buffer(struct sdhci_host *host)
+static void sdhci_allocate_bounce_buffer(struct sdhci_host *host)
{
struct mmc_host *mmc = host->mmc;
unsigned int max_blocks;
@@ -3579,7 +3616,7 @@ static int sdhci_allocate_bounce_buffer(struct sdhci_host *host)
* Exiting with zero here makes sure we proceed with
* mmc->max_segs == 1.
*/
- return 0;
+ return;
}
host->bounce_addr = dma_map_single(mmc->parent,
@@ -3589,7 +3626,7 @@ static int sdhci_allocate_bounce_buffer(struct sdhci_host *host)
ret = dma_mapping_error(mmc->parent, host->bounce_addr);
if (ret)
/* Again fall back to max_segs == 1 */
- return 0;
+ return;
host->bounce_buffer_size = bounce_size;
/* Lie about this since we're bouncing */
@@ -3599,8 +3636,6 @@ static int sdhci_allocate_bounce_buffer(struct sdhci_host *host)
pr_info("%s bounce up to %u segments into one, max segment size %u bytes\n",
mmc_hostname(mmc), max_blocks, bounce_size);
-
- return 0;
}
static inline bool sdhci_can_64bit_dma(struct sdhci_host *host)
@@ -4134,12 +4169,9 @@ int sdhci_setup_host(struct sdhci_host *host)
*/
mmc->max_blk_count = (host->quirks & SDHCI_QUIRK_NO_MULTIBLOCK) ? 1 : 65535;
- if (mmc->max_segs == 1) {
+ if (mmc->max_segs == 1)
/* This may alter mmc->*_blk_* parameters */
- ret = sdhci_allocate_bounce_buffer(host);
- if (ret)
- return ret;
- }
+ sdhci_allocate_bounce_buffer(host);
return 0;
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index b001cf4d3d7e..6cc9a3c2ac66 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -146,14 +146,15 @@
#define SDHCI_INT_DATA_CRC 0x00200000
#define SDHCI_INT_DATA_END_BIT 0x00400000
#define SDHCI_INT_BUS_POWER 0x00800000
-#define SDHCI_INT_ACMD12ERR 0x01000000
+#define SDHCI_INT_AUTO_CMD_ERR 0x01000000
#define SDHCI_INT_ADMA_ERROR 0x02000000
#define SDHCI_INT_NORMAL_MASK 0x00007FFF
#define SDHCI_INT_ERROR_MASK 0xFFFF8000
#define SDHCI_INT_CMD_MASK (SDHCI_INT_RESPONSE | SDHCI_INT_TIMEOUT | \
- SDHCI_INT_CRC | SDHCI_INT_END_BIT | SDHCI_INT_INDEX)
+ SDHCI_INT_CRC | SDHCI_INT_END_BIT | SDHCI_INT_INDEX | \
+ SDHCI_INT_AUTO_CMD_ERR)
#define SDHCI_INT_DATA_MASK (SDHCI_INT_DATA_END | SDHCI_INT_DMA_END | \
SDHCI_INT_DATA_AVAIL | SDHCI_INT_SPACE_AVAIL | \
SDHCI_INT_DATA_TIMEOUT | SDHCI_INT_DATA_CRC | \
@@ -168,7 +169,11 @@
#define SDHCI_CQE_INT_MASK (SDHCI_CQE_INT_ERR_MASK | SDHCI_INT_CQE)
-#define SDHCI_ACMD12_ERR 0x3C
+#define SDHCI_AUTO_CMD_STATUS 0x3C
+#define SDHCI_AUTO_CMD_TIMEOUT 0x00000002
+#define SDHCI_AUTO_CMD_CRC 0x00000004
+#define SDHCI_AUTO_CMD_END_BIT 0x00000008
+#define SDHCI_AUTO_CMD_INDEX 0x00000010
#define SDHCI_HOST_CONTROL2 0x3E
#define SDHCI_CTRL_UHS_MASK 0x0007
@@ -403,6 +408,8 @@ struct sdhci_host {
#define SDHCI_QUIRK_INVERTED_WRITE_PROTECT (1<<16)
/* Controller does not like fast PIO transfers */
#define SDHCI_QUIRK_PIO_NEEDS_DELAY (1<<18)
+/* Controller does not have a LED */
+#define SDHCI_QUIRK_NO_LED (1<<19)
/* Controller has to be forced to use block size of 2048 bytes */
#define SDHCI_QUIRK_FORCE_BLK_SZ_2048 (1<<20)
/* Controller cannot do multi-block transfers */
diff --git a/drivers/mmc/host/sdhci_am654.c b/drivers/mmc/host/sdhci_am654.c
new file mode 100644
index 000000000000..8c05879850a0
--- /dev/null
+++ b/drivers/mmc/host/sdhci_am654.c
@@ -0,0 +1,374 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * sdhci_am654.c - SDHCI driver for TI's AM654 SOCs
+ *
+ * Copyright (C) 2018 Texas Instruments Incorporated - http://www.ti.com
+ *
+ */
+#include <linux/clk.h>
+#include <linux/module.h>
+#include <linux/pm_runtime.h>
+#include <linux/property.h>
+#include <linux/regmap.h>
+
+#include "sdhci-pltfm.h"
+
+/* CTL_CFG Registers */
+#define CTL_CFG_2 0x14
+
+#define SLOTTYPE_MASK GENMASK(31, 30)
+#define SLOTTYPE_EMBEDDED BIT(30)
+
+/* PHY Registers */
+#define PHY_CTRL1 0x100
+#define PHY_CTRL2 0x104
+#define PHY_CTRL3 0x108
+#define PHY_CTRL4 0x10C
+#define PHY_CTRL5 0x110
+#define PHY_CTRL6 0x114
+#define PHY_STAT1 0x130
+#define PHY_STAT2 0x134
+
+#define IOMUX_ENABLE_SHIFT 31
+#define IOMUX_ENABLE_MASK BIT(IOMUX_ENABLE_SHIFT)
+#define OTAPDLYENA_SHIFT 20
+#define OTAPDLYENA_MASK BIT(OTAPDLYENA_SHIFT)
+#define OTAPDLYSEL_SHIFT 12
+#define OTAPDLYSEL_MASK GENMASK(15, 12)
+#define STRBSEL_SHIFT 24
+#define STRBSEL_MASK GENMASK(27, 24)
+#define SEL50_SHIFT 8
+#define SEL50_MASK BIT(SEL50_SHIFT)
+#define SEL100_SHIFT 9
+#define SEL100_MASK BIT(SEL100_SHIFT)
+#define DLL_TRIM_ICP_SHIFT 4
+#define DLL_TRIM_ICP_MASK GENMASK(7, 4)
+#define DR_TY_SHIFT 20
+#define DR_TY_MASK GENMASK(22, 20)
+#define ENDLL_SHIFT 1
+#define ENDLL_MASK BIT(ENDLL_SHIFT)
+#define DLLRDY_SHIFT 0
+#define DLLRDY_MASK BIT(DLLRDY_SHIFT)
+#define PDB_SHIFT 0
+#define PDB_MASK BIT(PDB_SHIFT)
+#define CALDONE_SHIFT 1
+#define CALDONE_MASK BIT(CALDONE_SHIFT)
+#define RETRIM_SHIFT 17
+#define RETRIM_MASK BIT(RETRIM_SHIFT)
+
+#define DRIVER_STRENGTH_50_OHM 0x0
+#define DRIVER_STRENGTH_33_OHM 0x1
+#define DRIVER_STRENGTH_66_OHM 0x2
+#define DRIVER_STRENGTH_100_OHM 0x3
+#define DRIVER_STRENGTH_40_OHM 0x4
+
+#define CLOCK_TOO_SLOW_HZ 400000
+
+static struct regmap_config sdhci_am654_regmap_config = {
+ .reg_bits = 32,
+ .val_bits = 32,
+ .reg_stride = 4,
+ .fast_io = true,
+};
+
+struct sdhci_am654_data {
+ struct regmap *base;
+ int otap_del_sel;
+ int trm_icp;
+ int drv_strength;
+ bool dll_on;
+};
+
+static void sdhci_am654_set_clock(struct sdhci_host *host, unsigned int clock)
+{
+ struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+ struct sdhci_am654_data *sdhci_am654 = sdhci_pltfm_priv(pltfm_host);
+ int sel50, sel100;
+ u32 mask, val;
+ int ret;
+
+ if (sdhci_am654->dll_on) {
+ regmap_update_bits(sdhci_am654->base, PHY_CTRL1,
+ ENDLL_MASK, 0);
+
+ sdhci_am654->dll_on = false;
+ }
+
+ sdhci_set_clock(host, clock);
+
+ if (clock > CLOCK_TOO_SLOW_HZ) {
+ /* Setup DLL Output TAP delay */
+ mask = OTAPDLYENA_MASK | OTAPDLYSEL_MASK;
+ val = (1 << OTAPDLYENA_SHIFT) |
+ (sdhci_am654->otap_del_sel << OTAPDLYSEL_SHIFT);
+ regmap_update_bits(sdhci_am654->base, PHY_CTRL4,
+ mask, val);
+ switch (clock) {
+ case 200000000:
+ sel50 = 0;
+ sel100 = 0;
+ break;
+ case 100000000:
+ sel50 = 0;
+ sel100 = 1;
+ break;
+ default:
+ sel50 = 1;
+ sel100 = 0;
+ }
+
+ /* Configure PHY DLL frequency */
+ mask = SEL50_MASK | SEL100_MASK;
+ val = (sel50 << SEL50_SHIFT) | (sel100 << SEL100_SHIFT);
+ regmap_update_bits(sdhci_am654->base, PHY_CTRL5,
+ mask, val);
+ /* Configure DLL TRIM */
+ mask = DLL_TRIM_ICP_MASK;
+ val = sdhci_am654->trm_icp << DLL_TRIM_ICP_SHIFT;
+
+ /* Configure DLL driver strength */
+ mask |= DR_TY_MASK;
+ val |= sdhci_am654->drv_strength << DR_TY_SHIFT;
+ regmap_update_bits(sdhci_am654->base, PHY_CTRL1,
+ mask, val);
+ /* Enable DLL */
+ regmap_update_bits(sdhci_am654->base, PHY_CTRL1,
+ ENDLL_MASK, 0x1 << ENDLL_SHIFT);
+ /*
+ * Poll for DLL ready. Use a one second timeout.
+ * Works in all experiments done so far
+ */
+ ret = regmap_read_poll_timeout(sdhci_am654->base,
+ PHY_STAT1, val,
+ val & DLLRDY_MASK,
+ 1000, 1000000);
+
+ sdhci_am654->dll_on = true;
+ }
+}
+
+static void sdhci_am654_set_power(struct sdhci_host *host, unsigned char mode,
+ unsigned short vdd)
+{
+ if (!IS_ERR(host->mmc->supply.vmmc)) {
+ struct mmc_host *mmc = host->mmc;
+
+ mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, vdd);
+ }
+ sdhci_set_power_noreg(host, mode, vdd);
+}
+
+struct sdhci_ops sdhci_am654_ops = {
+ .get_max_clock = sdhci_pltfm_clk_get_max_clock,
+ .get_timeout_clock = sdhci_pltfm_clk_get_max_clock,
+ .set_uhs_signaling = sdhci_set_uhs_signaling,
+ .set_bus_width = sdhci_set_bus_width,
+ .set_power = sdhci_am654_set_power,
+ .set_clock = sdhci_am654_set_clock,
+ .reset = sdhci_reset,
+};
+
+static const struct sdhci_pltfm_data sdhci_am654_pdata = {
+ .ops = &sdhci_am654_ops,
+ .quirks = SDHCI_QUIRK_INVERTED_WRITE_PROTECT |
+ SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12,
+ .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN,
+};
+
+static int sdhci_am654_init(struct sdhci_host *host)
+{
+ struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+ struct sdhci_am654_data *sdhci_am654 = sdhci_pltfm_priv(pltfm_host);
+ u32 ctl_cfg_2 = 0;
+ u32 mask;
+ u32 val;
+ int ret;
+
+ /* Reset OTAP to default value */
+ mask = OTAPDLYENA_MASK | OTAPDLYSEL_MASK;
+ regmap_update_bits(sdhci_am654->base, PHY_CTRL4,
+ mask, 0x0);
+
+ regmap_read(sdhci_am654->base, PHY_STAT1, &val);
+ if (~val & CALDONE_MASK) {
+ /* Calibrate IO lines */
+ regmap_update_bits(sdhci_am654->base, PHY_CTRL1,
+ PDB_MASK, PDB_MASK);
+ ret = regmap_read_poll_timeout(sdhci_am654->base, PHY_STAT1,
+ val, val & CALDONE_MASK, 1, 20);
+ if (ret)
+ return ret;
+ }
+
+ /* Enable pins by setting IO mux to 0 */
+ regmap_update_bits(sdhci_am654->base, PHY_CTRL1,
+ IOMUX_ENABLE_MASK, 0);
+
+ /* Set slot type based on SD or eMMC */
+ if (host->mmc->caps & MMC_CAP_NONREMOVABLE)
+ ctl_cfg_2 = SLOTTYPE_EMBEDDED;
+
+ regmap_update_bits(sdhci_am654->base, CTL_CFG_2,
+ ctl_cfg_2, SLOTTYPE_MASK);
+
+ return sdhci_add_host(host);
+}
+
+static int sdhci_am654_get_of_property(struct platform_device *pdev,
+ struct sdhci_am654_data *sdhci_am654)
+{
+ struct device *dev = &pdev->dev;
+ int drv_strength;
+ int ret;
+
+ ret = device_property_read_u32(dev, "ti,trm-icp",
+ &sdhci_am654->trm_icp);
+ if (ret)
+ return ret;
+
+ ret = device_property_read_u32(dev, "ti,otap-del-sel",
+ &sdhci_am654->otap_del_sel);
+ if (ret)
+ return ret;
+
+ ret = device_property_read_u32(dev, "ti,driver-strength-ohm",
+ &drv_strength);
+ if (ret)
+ return ret;
+
+ switch (drv_strength) {
+ case 50:
+ sdhci_am654->drv_strength = DRIVER_STRENGTH_50_OHM;
+ break;
+ case 33:
+ sdhci_am654->drv_strength = DRIVER_STRENGTH_33_OHM;
+ break;
+ case 66:
+ sdhci_am654->drv_strength = DRIVER_STRENGTH_66_OHM;
+ break;
+ case 100:
+ sdhci_am654->drv_strength = DRIVER_STRENGTH_100_OHM;
+ break;
+ case 40:
+ sdhci_am654->drv_strength = DRIVER_STRENGTH_40_OHM;
+ break;
+ default:
+ dev_err(dev, "Invalid driver strength\n");
+ return -EINVAL;
+ }
+
+ sdhci_get_of_property(pdev);
+
+ return 0;
+}
+
+static int sdhci_am654_probe(struct platform_device *pdev)
+{
+ struct sdhci_pltfm_host *pltfm_host;
+ struct sdhci_am654_data *sdhci_am654;
+ struct sdhci_host *host;
+ struct resource *res;
+ struct clk *clk_xin;
+ struct device *dev = &pdev->dev;
+ void __iomem *base;
+ int ret;
+
+ host = sdhci_pltfm_init(pdev, &sdhci_am654_pdata, sizeof(*sdhci_am654));
+ if (IS_ERR(host))
+ return PTR_ERR(host);
+
+ pltfm_host = sdhci_priv(host);
+ sdhci_am654 = sdhci_pltfm_priv(pltfm_host);
+
+ clk_xin = devm_clk_get(dev, "clk_xin");
+ if (IS_ERR(clk_xin)) {
+ dev_err(dev, "clk_xin clock not found.\n");
+ ret = PTR_ERR(clk_xin);
+ goto err_pltfm_free;
+ }
+
+ pltfm_host->clk = clk_xin;
+
+ /* Clocks are enabled using pm_runtime */
+ pm_runtime_enable(dev);
+ ret = pm_runtime_get_sync(dev);
+ if (ret < 0) {
+ pm_runtime_put_noidle(dev);
+ goto pm_runtime_disable;
+ }
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+ base = devm_ioremap_resource(dev, res);
+ if (IS_ERR(base)) {
+ ret = PTR_ERR(base);
+ goto pm_runtime_put;
+ }
+
+ sdhci_am654->base = devm_regmap_init_mmio(dev, base,
+ &sdhci_am654_regmap_config);
+ if (IS_ERR(sdhci_am654->base)) {
+ dev_err(dev, "Failed to initialize regmap\n");
+ ret = PTR_ERR(sdhci_am654->base);
+ goto pm_runtime_put;
+ }
+
+ ret = sdhci_am654_get_of_property(pdev, sdhci_am654);
+ if (ret)
+ goto pm_runtime_put;
+
+ ret = mmc_of_parse(host->mmc);
+ if (ret) {
+ dev_err(dev, "parsing dt failed (%d)\n", ret);
+ goto pm_runtime_put;
+ }
+
+ ret = sdhci_am654_init(host);
+ if (ret)
+ goto pm_runtime_put;
+
+ return 0;
+
+pm_runtime_put:
+ pm_runtime_put_sync(dev);
+pm_runtime_disable:
+ pm_runtime_disable(dev);
+err_pltfm_free:
+ sdhci_pltfm_free(pdev);
+ return ret;
+}
+
+static int sdhci_am654_remove(struct platform_device *pdev)
+{
+ struct sdhci_host *host = platform_get_drvdata(pdev);
+ int ret;
+
+ sdhci_remove_host(host, true);
+ ret = pm_runtime_put_sync(&pdev->dev);
+ if (ret < 0)
+ return ret;
+
+ pm_runtime_disable(&pdev->dev);
+ sdhci_pltfm_free(pdev);
+
+ return 0;
+}
+
+static const struct of_device_id sdhci_am654_of_match[] = {
+ { .compatible = "ti,am654-sdhci-5.1" },
+ { /* sentinel */ }
+};
+
+static struct platform_driver sdhci_am654_driver = {
+ .driver = {
+ .name = "sdhci-am654",
+ .of_match_table = sdhci_am654_of_match,
+ },
+ .probe = sdhci_am654_probe,
+ .remove = sdhci_am654_remove,
+};
+
+module_platform_driver(sdhci_am654_driver);
+
+MODULE_DESCRIPTION("Driver for SDHCI Controller on TI's AM654 devices");
+MODULE_AUTHOR("Faiz Abbas <faiz_abbas@ti.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/mmc/host/tmio_mmc.h b/drivers/mmc/host/tmio_mmc.h
index 1e317027bf53..c03529e3f01a 100644
--- a/drivers/mmc/host/tmio_mmc.h
+++ b/drivers/mmc/host/tmio_mmc.h
@@ -70,6 +70,7 @@
#define TMIO_STAT_DAT0 BIT(23) /* only known on R-Car so far */
#define TMIO_STAT_RXRDY BIT(24)
#define TMIO_STAT_TXRQ BIT(25)
+#define TMIO_STAT_ALWAYS_SET_27 BIT(27) /* only known on R-Car 2+ so far */
#define TMIO_STAT_ILL_FUNC BIT(29) /* only when !TMIO_MMC_HAS_IDLE_WAIT */
#define TMIO_STAT_SCLKDIVEN BIT(29) /* only when TMIO_MMC_HAS_IDLE_WAIT */
#define TMIO_STAT_CMD_BUSY BIT(30)
@@ -96,6 +97,7 @@
/* Define some IRQ masks */
/* This is the mask used at reset by the chip */
+#define TMIO_MASK_INIT_RCAR2 0x8b7f031d /* Initial value for R-Car Gen2+ */
#define TMIO_MASK_ALL 0x837f031d
#define TMIO_MASK_READOP (TMIO_STAT_RXRDY | TMIO_STAT_DATAEND)
#define TMIO_MASK_WRITEOP (TMIO_STAT_TXRQ | TMIO_STAT_DATAEND)
@@ -153,6 +155,7 @@ struct tmio_mmc_host {
u32 sdcard_irq_mask;
u32 sdio_irq_mask;
unsigned int clk_cache;
+ u32 sdcard_irq_setbit_mask;
spinlock_t lock; /* protect host private data */
unsigned long last_req_ts;
@@ -267,6 +270,9 @@ static inline void sd_ctrl_write16_rep(struct tmio_mmc_host *host, int addr,
static inline void sd_ctrl_write32_as_16_and_16(struct tmio_mmc_host *host,
int addr, u32 val)
{
+ if (addr == CTL_IRQ_MASK || addr == CTL_STATUS)
+ val |= host->sdcard_irq_setbit_mask;
+
iowrite16(val & 0xffff, host->ctl + (addr << host->bus_shift));
iowrite16(val >> 16, host->ctl + ((addr + 2) << host->bus_shift));
}
diff --git a/drivers/mmc/host/tmio_mmc_core.c b/drivers/mmc/host/tmio_mmc_core.c
index 8d64f6196f33..085a0fab769c 100644
--- a/drivers/mmc/host/tmio_mmc_core.c
+++ b/drivers/mmc/host/tmio_mmc_core.c
@@ -171,6 +171,18 @@ static void tmio_mmc_reset(struct tmio_mmc_host *host)
}
}
+static void tmio_mmc_hw_reset(struct mmc_host *mmc)
+{
+ struct tmio_mmc_host *host = mmc_priv(mmc);
+
+ host->reset(host);
+
+ tmio_mmc_abort_dma(host);
+
+ if (host->hw_reset)
+ host->hw_reset(host);
+}
+
static void tmio_mmc_reset_work(struct work_struct *work)
{
struct tmio_mmc_host *host = container_of(work, struct tmio_mmc_host,
@@ -209,12 +221,11 @@ static void tmio_mmc_reset_work(struct work_struct *work)
spin_unlock_irqrestore(&host->lock, flags);
- host->reset(host);
+ tmio_mmc_hw_reset(host->mmc);
/* Ready for new calls */
host->mrq = NULL;
- tmio_mmc_abort_dma(host);
mmc_request_done(host->mmc, mrq);
}
@@ -696,14 +707,6 @@ static int tmio_mmc_start_data(struct tmio_mmc_host *host,
return 0;
}
-static void tmio_mmc_hw_reset(struct mmc_host *mmc)
-{
- struct tmio_mmc_host *host = mmc_priv(mmc);
-
- if (host->hw_reset)
- host->hw_reset(host);
-}
-
static int tmio_mmc_execute_tuning(struct mmc_host *mmc, u32 opcode)
{
struct tmio_mmc_host *host = mmc_priv(mmc);
@@ -734,8 +737,6 @@ static int tmio_mmc_execute_tuning(struct mmc_host *mmc, u32 opcode)
ret = mmc_send_tuning(mmc, opcode, NULL);
if (ret == 0)
set_bit(i, host->taps);
-
- usleep_range(1000, 1200);
}
ret = host->select_tuning(host);
@@ -1167,11 +1168,13 @@ int tmio_mmc_host_probe(struct tmio_mmc_host *_host)
if (ret < 0)
return ret;
- if (pdata->flags & TMIO_MMC_USE_GPIO_CD) {
- ret = mmc_gpio_request_cd(mmc, pdata->cd_gpio, 0);
- if (ret)
- return ret;
- }
+ /*
+ * Look for a card detect GPIO, if it fails with anything
+ * else than a probe deferral, just live without it.
+ */
+ ret = mmc_gpiod_request_cd(mmc, "cd", 0, false, 0, NULL);
+ if (ret == -EPROBE_DEFER)
+ return ret;
mmc->caps |= MMC_CAP_4_BIT_DATA | pdata->capabilities;
mmc->caps2 |= pdata->capabilities2;
@@ -1228,7 +1231,7 @@ int tmio_mmc_host_probe(struct tmio_mmc_host *_host)
_host->sdio_irq_mask = TMIO_SDIO_MASK_ALL;
_host->set_clock(_host, 0);
- _host->reset(_host);
+ tmio_mmc_hw_reset(mmc);
_host->sdcard_irq_mask = sd_ctrl_read16_and_16_as_32(_host, CTL_IRQ_MASK);
tmio_mmc_disable_mmc_irqs(_host, TMIO_MASK_ALL);
@@ -1328,8 +1331,8 @@ int tmio_mmc_host_runtime_resume(struct device *dev)
{
struct tmio_mmc_host *host = dev_get_drvdata(dev);
- host->reset(host);
tmio_mmc_clk_enable(host);
+ tmio_mmc_hw_reset(host->mmc);
if (host->clk_cache)
host->set_clock(host, host->clk_cache);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 77896c11f6f3..be81b319b0dc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -669,11 +669,9 @@ static int mlx5_pci_init(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
priv->numa_node = dev_to_node(&dev->pdev->dev);
- priv->dbg_root = debugfs_create_dir(dev_name(&pdev->dev), mlx5_debugfs_root);
- if (!priv->dbg_root) {
- dev_err(&pdev->dev, "Cannot create debugfs dir, aborting\n");
- return -ENOMEM;
- }
+ if (mlx5_debugfs_root)
+ priv->dbg_root =
+ debugfs_create_dir(pci_name(pdev), mlx5_debugfs_root);
err = mlx5_pci_enable_device(dev);
if (err) {
diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c
index 59dd50866932..5477a014e1fb 100644
--- a/drivers/net/wireless/ath/ath6kl/cfg80211.c
+++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c
@@ -1322,7 +1322,7 @@ static int ath6kl_cfg80211_set_default_key(struct wiphy *wiphy,
struct ath6kl_vif *vif = netdev_priv(ndev);
struct ath6kl_key *key = NULL;
u8 key_usage;
- enum crypto_type key_type = NONE_CRYPT;
+ enum ath6kl_crypto_type key_type = NONE_CRYPT;
ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "%s: index %d\n", __func__, key_index);
diff --git a/drivers/net/wireless/ath/ath6kl/common.h b/drivers/net/wireless/ath/ath6kl/common.h
index 4f82e8632d37..d6e5234f67a1 100644
--- a/drivers/net/wireless/ath/ath6kl/common.h
+++ b/drivers/net/wireless/ath/ath6kl/common.h
@@ -67,7 +67,7 @@ struct ath6kl_llc_snap_hdr {
__be16 eth_type;
} __packed;
-enum crypto_type {
+enum ath6kl_crypto_type {
NONE_CRYPT = 0x01,
WEP_CRYPT = 0x02,
TKIP_CRYPT = 0x04,
diff --git a/drivers/net/wireless/ath/ath6kl/wmi.c b/drivers/net/wireless/ath/ath6kl/wmi.c
index 777acc564ac9..9d7ac1ab2d02 100644
--- a/drivers/net/wireless/ath/ath6kl/wmi.c
+++ b/drivers/net/wireless/ath/ath6kl/wmi.c
@@ -1849,9 +1849,9 @@ int ath6kl_wmi_connect_cmd(struct wmi *wmi, u8 if_idx,
enum network_type nw_type,
enum dot11_auth_mode dot11_auth_mode,
enum auth_mode auth_mode,
- enum crypto_type pairwise_crypto,
+ enum ath6kl_crypto_type pairwise_crypto,
u8 pairwise_crypto_len,
- enum crypto_type group_crypto,
+ enum ath6kl_crypto_type group_crypto,
u8 group_crypto_len, int ssid_len, u8 *ssid,
u8 *bssid, u16 channel, u32 ctrl_flags,
u8 nw_subtype)
@@ -2301,7 +2301,7 @@ int ath6kl_wmi_disctimeout_cmd(struct wmi *wmi, u8 if_idx, u8 timeout)
}
int ath6kl_wmi_addkey_cmd(struct wmi *wmi, u8 if_idx, u8 key_index,
- enum crypto_type key_type,
+ enum ath6kl_crypto_type key_type,
u8 key_usage, u8 key_len,
u8 *key_rsc, unsigned int key_rsc_len,
u8 *key_material,
diff --git a/drivers/net/wireless/ath/ath6kl/wmi.h b/drivers/net/wireless/ath/ath6kl/wmi.h
index a60bb49fe920..784940ba4c90 100644
--- a/drivers/net/wireless/ath/ath6kl/wmi.h
+++ b/drivers/net/wireless/ath/ath6kl/wmi.h
@@ -2556,9 +2556,9 @@ int ath6kl_wmi_connect_cmd(struct wmi *wmi, u8 if_idx,
enum network_type nw_type,
enum dot11_auth_mode dot11_auth_mode,
enum auth_mode auth_mode,
- enum crypto_type pairwise_crypto,
+ enum ath6kl_crypto_type pairwise_crypto,
u8 pairwise_crypto_len,
- enum crypto_type group_crypto,
+ enum ath6kl_crypto_type group_crypto,
u8 group_crypto_len, int ssid_len, u8 *ssid,
u8 *bssid, u16 channel, u32 ctrl_flags,
u8 nw_subtype);
@@ -2610,7 +2610,7 @@ int ath6kl_wmi_config_debug_module_cmd(struct wmi *wmi, u32 valid, u32 config);
int ath6kl_wmi_get_stats_cmd(struct wmi *wmi, u8 if_idx);
int ath6kl_wmi_addkey_cmd(struct wmi *wmi, u8 if_idx, u8 key_index,
- enum crypto_type key_type,
+ enum ath6kl_crypto_type key_type,
u8 key_usage, u8 key_len,
u8 *key_rsc, unsigned int key_rsc_len,
u8 *key_material,
diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig
index 9d36473dc2a2..5e27918e4624 100644
--- a/drivers/nvdimm/Kconfig
+++ b/drivers/nvdimm/Kconfig
@@ -112,4 +112,9 @@ config OF_PMEM
Select Y if unsure.
+config NVDIMM_KEYS
+ def_bool y
+ depends on ENCRYPTED_KEYS
+ depends on (LIBNVDIMM=ENCRYPTED_KEYS) || LIBNVDIMM=m
+
endif
diff --git a/drivers/nvdimm/Makefile b/drivers/nvdimm/Makefile
index e8847045dac0..6f2a088afad6 100644
--- a/drivers/nvdimm/Makefile
+++ b/drivers/nvdimm/Makefile
@@ -27,3 +27,4 @@ libnvdimm-$(CONFIG_ND_CLAIM) += claim.o
libnvdimm-$(CONFIG_BTT) += btt_devs.o
libnvdimm-$(CONFIG_NVDIMM_PFN) += pfn_devs.o
libnvdimm-$(CONFIG_NVDIMM_DAX) += dax_devs.o
+libnvdimm-$(CONFIG_NVDIMM_KEYS) += security.o
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index f1fb39921236..dca5f7a805cb 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -331,6 +331,12 @@ struct nvdimm_bus *to_nvdimm_bus(struct device *dev)
}
EXPORT_SYMBOL_GPL(to_nvdimm_bus);
+struct nvdimm_bus *nvdimm_to_bus(struct nvdimm *nvdimm)
+{
+ return to_nvdimm_bus(nvdimm->dev.parent);
+}
+EXPORT_SYMBOL_GPL(nvdimm_to_bus);
+
struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
struct nvdimm_bus_descriptor *nd_desc)
{
@@ -344,12 +350,12 @@ struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
INIT_LIST_HEAD(&nvdimm_bus->mapping_list);
init_waitqueue_head(&nvdimm_bus->probe_wait);
nvdimm_bus->id = ida_simple_get(&nd_ida, 0, 0, GFP_KERNEL);
- mutex_init(&nvdimm_bus->reconfig_mutex);
- badrange_init(&nvdimm_bus->badrange);
if (nvdimm_bus->id < 0) {
kfree(nvdimm_bus);
return NULL;
}
+ mutex_init(&nvdimm_bus->reconfig_mutex);
+ badrange_init(&nvdimm_bus->badrange);
nvdimm_bus->nd_desc = nd_desc;
nvdimm_bus->dev.parent = parent;
nvdimm_bus->dev.release = nvdimm_bus_release;
@@ -387,9 +393,24 @@ static int child_unregister(struct device *dev, void *data)
* i.e. remove classless children
*/
if (dev->class)
- /* pass */;
- else
- nd_device_unregister(dev, ND_SYNC);
+ return 0;
+
+ if (is_nvdimm(dev)) {
+ struct nvdimm *nvdimm = to_nvdimm(dev);
+ bool dev_put = false;
+
+ /* We are shutting down. Make state frozen artificially. */
+ nvdimm_bus_lock(dev);
+ nvdimm->sec.state = NVDIMM_SECURITY_FROZEN;
+ if (test_and_clear_bit(NDD_WORK_PENDING, &nvdimm->flags))
+ dev_put = true;
+ nvdimm_bus_unlock(dev);
+ cancel_delayed_work_sync(&nvdimm->dwork);
+ if (dev_put)
+ put_device(dev);
+ }
+ nd_device_unregister(dev, ND_SYNC);
+
return 0;
}
@@ -902,7 +923,7 @@ static int nd_cmd_clear_to_send(struct nvdimm_bus *nvdimm_bus,
/* ask the bus provider if it would like to block this request */
if (nd_desc->clear_to_send) {
- int rc = nd_desc->clear_to_send(nd_desc, nvdimm, cmd);
+ int rc = nd_desc->clear_to_send(nd_desc, nvdimm, cmd, data);
if (rc)
return rc;
diff --git a/drivers/nvdimm/dimm.c b/drivers/nvdimm/dimm.c
index 9899c97138a3..0cf58cabc9ed 100644
--- a/drivers/nvdimm/dimm.c
+++ b/drivers/nvdimm/dimm.c
@@ -34,7 +34,11 @@ static int nvdimm_probe(struct device *dev)
return rc;
}
- /* reset locked, to be validated below... */
+ /*
+ * The locked status bit reflects explicit status codes from the
+ * label reading commands, revalidate it each time the driver is
+ * activated and re-reads the label area.
+ */
nvdimm_clear_locked(dev);
ndd = kzalloc(sizeof(*ndd), GFP_KERNEL);
@@ -52,6 +56,16 @@ static int nvdimm_probe(struct device *dev)
kref_init(&ndd->kref);
/*
+ * Attempt to unlock, if the DIMM supports security commands,
+ * otherwise the locked indication is determined by explicit
+ * status codes from the label reading commands.
+ */
+ rc = nvdimm_security_unlock(dev);
+ if (rc < 0)
+ dev_dbg(dev, "failed to unlock dimm: %d\n", rc);
+
+
+ /*
* EACCES failures reading the namespace label-area-properties
* are interpreted as the DIMM capacity being locked but the
* namespace labels themselves being accessible.
diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c
index 6c3de2317390..4890310df874 100644
--- a/drivers/nvdimm/dimm_devs.c
+++ b/drivers/nvdimm/dimm_devs.c
@@ -370,23 +370,172 @@ static ssize_t available_slots_show(struct device *dev,
}
static DEVICE_ATTR_RO(available_slots);
+__weak ssize_t security_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct nvdimm *nvdimm = to_nvdimm(dev);
+
+ switch (nvdimm->sec.state) {
+ case NVDIMM_SECURITY_DISABLED:
+ return sprintf(buf, "disabled\n");
+ case NVDIMM_SECURITY_UNLOCKED:
+ return sprintf(buf, "unlocked\n");
+ case NVDIMM_SECURITY_LOCKED:
+ return sprintf(buf, "locked\n");
+ case NVDIMM_SECURITY_FROZEN:
+ return sprintf(buf, "frozen\n");
+ case NVDIMM_SECURITY_OVERWRITE:
+ return sprintf(buf, "overwrite\n");
+ default:
+ return -ENOTTY;
+ }
+
+ return -ENOTTY;
+}
+
+#define OPS \
+ C( OP_FREEZE, "freeze", 1), \
+ C( OP_DISABLE, "disable", 2), \
+ C( OP_UPDATE, "update", 3), \
+ C( OP_ERASE, "erase", 2), \
+ C( OP_OVERWRITE, "overwrite", 2), \
+ C( OP_MASTER_UPDATE, "master_update", 3), \
+ C( OP_MASTER_ERASE, "master_erase", 2)
+#undef C
+#define C(a, b, c) a
+enum nvdimmsec_op_ids { OPS };
+#undef C
+#define C(a, b, c) { b, c }
+static struct {
+ const char *name;
+ int args;
+} ops[] = { OPS };
+#undef C
+
+#define SEC_CMD_SIZE 32
+#define KEY_ID_SIZE 10
+
+static ssize_t __security_store(struct device *dev, const char *buf, size_t len)
+{
+ struct nvdimm *nvdimm = to_nvdimm(dev);
+ ssize_t rc;
+ char cmd[SEC_CMD_SIZE+1], keystr[KEY_ID_SIZE+1],
+ nkeystr[KEY_ID_SIZE+1];
+ unsigned int key, newkey;
+ int i;
+
+ if (atomic_read(&nvdimm->busy))
+ return -EBUSY;
+
+ rc = sscanf(buf, "%"__stringify(SEC_CMD_SIZE)"s"
+ " %"__stringify(KEY_ID_SIZE)"s"
+ " %"__stringify(KEY_ID_SIZE)"s",
+ cmd, keystr, nkeystr);
+ if (rc < 1)
+ return -EINVAL;
+ for (i = 0; i < ARRAY_SIZE(ops); i++)
+ if (sysfs_streq(cmd, ops[i].name))
+ break;
+ if (i >= ARRAY_SIZE(ops))
+ return -EINVAL;
+ if (ops[i].args > 1)
+ rc = kstrtouint(keystr, 0, &key);
+ if (rc >= 0 && ops[i].args > 2)
+ rc = kstrtouint(nkeystr, 0, &newkey);
+ if (rc < 0)
+ return rc;
+
+ if (i == OP_FREEZE) {
+ dev_dbg(dev, "freeze\n");
+ rc = nvdimm_security_freeze(nvdimm);
+ } else if (i == OP_DISABLE) {
+ dev_dbg(dev, "disable %u\n", key);
+ rc = nvdimm_security_disable(nvdimm, key);
+ } else if (i == OP_UPDATE) {
+ dev_dbg(dev, "update %u %u\n", key, newkey);
+ rc = nvdimm_security_update(nvdimm, key, newkey, NVDIMM_USER);
+ } else if (i == OP_ERASE) {
+ dev_dbg(dev, "erase %u\n", key);
+ rc = nvdimm_security_erase(nvdimm, key, NVDIMM_USER);
+ } else if (i == OP_OVERWRITE) {
+ dev_dbg(dev, "overwrite %u\n", key);
+ rc = nvdimm_security_overwrite(nvdimm, key);
+ } else if (i == OP_MASTER_UPDATE) {
+ dev_dbg(dev, "master_update %u %u\n", key, newkey);
+ rc = nvdimm_security_update(nvdimm, key, newkey,
+ NVDIMM_MASTER);
+ } else if (i == OP_MASTER_ERASE) {
+ dev_dbg(dev, "master_erase %u\n", key);
+ rc = nvdimm_security_erase(nvdimm, key,
+ NVDIMM_MASTER);
+ } else
+ return -EINVAL;
+
+ if (rc == 0)
+ rc = len;
+ return rc;
+}
+
+static ssize_t security_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t len)
+
+{
+ ssize_t rc;
+
+ /*
+ * Require all userspace triggered security management to be
+ * done while probing is idle and the DIMM is not in active use
+ * in any region.
+ */
+ device_lock(dev);
+ nvdimm_bus_lock(dev);
+ wait_nvdimm_bus_probe_idle(dev);
+ rc = __security_store(dev, buf, len);
+ nvdimm_bus_unlock(dev);
+ device_unlock(dev);
+
+ return rc;
+}
+static DEVICE_ATTR_RW(security);
+
static struct attribute *nvdimm_attributes[] = {
&dev_attr_state.attr,
&dev_attr_flags.attr,
&dev_attr_commands.attr,
&dev_attr_available_slots.attr,
+ &dev_attr_security.attr,
NULL,
};
+static umode_t nvdimm_visible(struct kobject *kobj, struct attribute *a, int n)
+{
+ struct device *dev = container_of(kobj, typeof(*dev), kobj);
+ struct nvdimm *nvdimm = to_nvdimm(dev);
+
+ if (a != &dev_attr_security.attr)
+ return a->mode;
+ if (nvdimm->sec.state < 0)
+ return 0;
+ /* Are there any state mutation ops? */
+ if (nvdimm->sec.ops->freeze || nvdimm->sec.ops->disable
+ || nvdimm->sec.ops->change_key
+ || nvdimm->sec.ops->erase
+ || nvdimm->sec.ops->overwrite)
+ return a->mode;
+ return 0444;
+}
+
struct attribute_group nvdimm_attribute_group = {
.attrs = nvdimm_attributes,
+ .is_visible = nvdimm_visible,
};
EXPORT_SYMBOL_GPL(nvdimm_attribute_group);
-struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
- const struct attribute_group **groups, unsigned long flags,
- unsigned long cmd_mask, int num_flush,
- struct resource *flush_wpq)
+struct nvdimm *__nvdimm_create(struct nvdimm_bus *nvdimm_bus,
+ void *provider_data, const struct attribute_group **groups,
+ unsigned long flags, unsigned long cmd_mask, int num_flush,
+ struct resource *flush_wpq, const char *dimm_id,
+ const struct nvdimm_security_ops *sec_ops)
{
struct nvdimm *nvdimm = kzalloc(sizeof(*nvdimm), GFP_KERNEL);
struct device *dev;
@@ -399,6 +548,8 @@ struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
kfree(nvdimm);
return NULL;
}
+
+ nvdimm->dimm_id = dimm_id;
nvdimm->provider_data = provider_data;
nvdimm->flags = flags;
nvdimm->cmd_mask = cmd_mask;
@@ -411,11 +562,60 @@ struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
dev->type = &nvdimm_device_type;
dev->devt = MKDEV(nvdimm_major, nvdimm->id);
dev->groups = groups;
+ nvdimm->sec.ops = sec_ops;
+ nvdimm->sec.overwrite_tmo = 0;
+ INIT_DELAYED_WORK(&nvdimm->dwork, nvdimm_security_overwrite_query);
+ /*
+ * Security state must be initialized before device_add() for
+ * attribute visibility.
+ */
+ /* get security state and extended (master) state */
+ nvdimm->sec.state = nvdimm_security_state(nvdimm, NVDIMM_USER);
+ nvdimm->sec.ext_state = nvdimm_security_state(nvdimm, NVDIMM_MASTER);
nd_device_register(dev);
return nvdimm;
}
-EXPORT_SYMBOL_GPL(nvdimm_create);
+EXPORT_SYMBOL_GPL(__nvdimm_create);
+
+int nvdimm_security_setup_events(struct nvdimm *nvdimm)
+{
+ nvdimm->sec.overwrite_state = sysfs_get_dirent(nvdimm->dev.kobj.sd,
+ "security");
+ if (!nvdimm->sec.overwrite_state)
+ return -ENODEV;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nvdimm_security_setup_events);
+
+int nvdimm_in_overwrite(struct nvdimm *nvdimm)
+{
+ return test_bit(NDD_SECURITY_OVERWRITE, &nvdimm->flags);
+}
+EXPORT_SYMBOL_GPL(nvdimm_in_overwrite);
+
+int nvdimm_security_freeze(struct nvdimm *nvdimm)
+{
+ int rc;
+
+ WARN_ON_ONCE(!is_nvdimm_bus_locked(&nvdimm->dev));
+
+ if (!nvdimm->sec.ops || !nvdimm->sec.ops->freeze)
+ return -EOPNOTSUPP;
+
+ if (nvdimm->sec.state < 0)
+ return -EIO;
+
+ if (test_bit(NDD_SECURITY_OVERWRITE, &nvdimm->flags)) {
+ dev_warn(&nvdimm->dev, "Overwrite operation in progress.\n");
+ return -EBUSY;
+ }
+
+ rc = nvdimm->sec.ops->freeze(nvdimm);
+ nvdimm->sec.state = nvdimm_security_state(nvdimm, NVDIMM_USER);
+
+ return rc;
+}
int alias_dpa_busy(struct device *dev, void *data)
{
diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c
index 750dbaa6ce82..a11bf4e6b451 100644
--- a/drivers/nvdimm/label.c
+++ b/drivers/nvdimm/label.c
@@ -944,8 +944,7 @@ static int __blk_label_update(struct nd_region *nd_region,
victims = 0;
if (old_num_resources) {
/* convert old local-label-map to dimm-slot victim-map */
- victim_map = kcalloc(BITS_TO_LONGS(nslot), sizeof(long),
- GFP_KERNEL);
+ victim_map = bitmap_zalloc(nslot, GFP_KERNEL);
if (!victim_map)
return -ENOMEM;
@@ -968,7 +967,7 @@ static int __blk_label_update(struct nd_region *nd_region,
/* don't allow updates that consume the last label */
if (nfree - alloc < 0 || nfree - alloc + victims < 1) {
dev_info(&nsblk->common.dev, "insufficient label space\n");
- kfree(victim_map);
+ bitmap_free(victim_map);
return -ENOSPC;
}
/* from here on we need to abort on error */
@@ -1140,7 +1139,7 @@ static int __blk_label_update(struct nd_region *nd_region,
out:
kfree(old_res_list);
- kfree(victim_map);
+ bitmap_free(victim_map);
return rc;
abort:
diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
index 681af3a8fd62..4b077555ac70 100644
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -270,11 +270,10 @@ static ssize_t __alt_name_store(struct device *dev, const char *buf,
if (dev->driver || to_ndns(dev)->claim)
return -EBUSY;
- input = kmemdup(buf, len + 1, GFP_KERNEL);
+ input = kstrndup(buf, len, GFP_KERNEL);
if (!input)
return -ENOMEM;
- input[len] = '\0';
pos = strim(input);
if (strlen(pos) + 1 > NSLABEL_NAME_LEN) {
rc = -EINVAL;
diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h
index d0c621b32f72..2b2cf4e554d3 100644
--- a/drivers/nvdimm/nd-core.h
+++ b/drivers/nvdimm/nd-core.h
@@ -21,6 +21,7 @@
extern struct list_head nvdimm_bus_list;
extern struct mutex nvdimm_bus_list_mutex;
extern int nvdimm_major;
+extern struct workqueue_struct *nvdimm_wq;
struct nvdimm_bus {
struct nvdimm_bus_descriptor *nd_desc;
@@ -41,8 +42,64 @@ struct nvdimm {
atomic_t busy;
int id, num_flush;
struct resource *flush_wpq;
+ const char *dimm_id;
+ struct {
+ const struct nvdimm_security_ops *ops;
+ enum nvdimm_security_state state;
+ enum nvdimm_security_state ext_state;
+ unsigned int overwrite_tmo;
+ struct kernfs_node *overwrite_state;
+ } sec;
+ struct delayed_work dwork;
};
+static inline enum nvdimm_security_state nvdimm_security_state(
+ struct nvdimm *nvdimm, bool master)
+{
+ if (!nvdimm->sec.ops)
+ return -ENXIO;
+
+ return nvdimm->sec.ops->state(nvdimm, master);
+}
+int nvdimm_security_freeze(struct nvdimm *nvdimm);
+#if IS_ENABLED(CONFIG_NVDIMM_KEYS)
+int nvdimm_security_disable(struct nvdimm *nvdimm, unsigned int keyid);
+int nvdimm_security_update(struct nvdimm *nvdimm, unsigned int keyid,
+ unsigned int new_keyid,
+ enum nvdimm_passphrase_type pass_type);
+int nvdimm_security_erase(struct nvdimm *nvdimm, unsigned int keyid,
+ enum nvdimm_passphrase_type pass_type);
+int nvdimm_security_overwrite(struct nvdimm *nvdimm, unsigned int keyid);
+void nvdimm_security_overwrite_query(struct work_struct *work);
+#else
+static inline int nvdimm_security_disable(struct nvdimm *nvdimm,
+ unsigned int keyid)
+{
+ return -EOPNOTSUPP;
+}
+static inline int nvdimm_security_update(struct nvdimm *nvdimm,
+ unsigned int keyid,
+ unsigned int new_keyid,
+ enum nvdimm_passphrase_type pass_type)
+{
+ return -EOPNOTSUPP;
+}
+static inline int nvdimm_security_erase(struct nvdimm *nvdimm,
+ unsigned int keyid,
+ enum nvdimm_passphrase_type pass_type)
+{
+ return -EOPNOTSUPP;
+}
+static inline int nvdimm_security_overwrite(struct nvdimm *nvdimm,
+ unsigned int keyid)
+{
+ return -EOPNOTSUPP;
+}
+static inline void nvdimm_security_overwrite_query(struct work_struct *work)
+{
+}
+#endif
+
/**
* struct blk_alloc_info - tracking info for BLK dpa scanning
* @nd_mapping: blk region mapping boundaries
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index e79cc8e5c114..cfde992684e7 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -250,6 +250,14 @@ long nvdimm_clear_poison(struct device *dev, phys_addr_t phys,
void nvdimm_set_aliasing(struct device *dev);
void nvdimm_set_locked(struct device *dev);
void nvdimm_clear_locked(struct device *dev);
+#if IS_ENABLED(CONFIG_NVDIMM_KEYS)
+int nvdimm_security_unlock(struct device *dev);
+#else
+static inline int nvdimm_security_unlock(struct device *dev)
+{
+ return 0;
+}
+#endif
struct nd_btt *to_nd_btt(struct device *dev);
struct nd_gen_sb {
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index d28418b05a04..bc2f700feef8 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -396,7 +396,7 @@ static int pmem_attach_disk(struct device *dev,
return -EBUSY;
}
- q = blk_alloc_queue_node(GFP_KERNEL, dev_to_node(dev), NULL);
+ q = blk_alloc_queue_node(GFP_KERNEL, dev_to_node(dev));
if (!q)
return -ENOMEM;
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index e7377f1028ef..e2818f94f292 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -79,6 +79,11 @@ int nd_region_activate(struct nd_region *nd_region)
struct nd_mapping *nd_mapping = &nd_region->mapping[i];
struct nvdimm *nvdimm = nd_mapping->nvdimm;
+ if (test_bit(NDD_SECURITY_OVERWRITE, &nvdimm->flags)) {
+ nvdimm_bus_unlock(&nd_region->dev);
+ return -EBUSY;
+ }
+
/* at least one null hint slot per-dimm for the "no-hint" case */
flush_data_size += sizeof(void *);
num_flush = min_not_zero(num_flush, nvdimm->num_flush);
diff --git a/drivers/nvdimm/security.c b/drivers/nvdimm/security.c
new file mode 100644
index 000000000000..f8bb746a549f
--- /dev/null
+++ b/drivers/nvdimm/security.c
@@ -0,0 +1,454 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2018 Intel Corporation. All rights reserved. */
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/ndctl.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include <linux/mm.h>
+#include <linux/cred.h>
+#include <linux/key.h>
+#include <linux/key-type.h>
+#include <keys/user-type.h>
+#include <keys/encrypted-type.h>
+#include "nd-core.h"
+#include "nd.h"
+
+#define NVDIMM_BASE_KEY 0
+#define NVDIMM_NEW_KEY 1
+
+static bool key_revalidate = true;
+module_param(key_revalidate, bool, 0444);
+MODULE_PARM_DESC(key_revalidate, "Require key validation at init.");
+
+static void *key_data(struct key *key)
+{
+ struct encrypted_key_payload *epayload = dereference_key_locked(key);
+
+ lockdep_assert_held_read(&key->sem);
+
+ return epayload->decrypted_data;
+}
+
+static void nvdimm_put_key(struct key *key)
+{
+ if (!key)
+ return;
+
+ up_read(&key->sem);
+ key_put(key);
+}
+
+/*
+ * Retrieve kernel key for DIMM and request from user space if
+ * necessary. Returns a key held for read and must be put by
+ * nvdimm_put_key() before the usage goes out of scope.
+ */
+static struct key *nvdimm_request_key(struct nvdimm *nvdimm)
+{
+ struct key *key = NULL;
+ static const char NVDIMM_PREFIX[] = "nvdimm:";
+ char desc[NVDIMM_KEY_DESC_LEN + sizeof(NVDIMM_PREFIX)];
+ struct device *dev = &nvdimm->dev;
+
+ sprintf(desc, "%s%s", NVDIMM_PREFIX, nvdimm->dimm_id);
+ key = request_key(&key_type_encrypted, desc, "");
+ if (IS_ERR(key)) {
+ if (PTR_ERR(key) == -ENOKEY)
+ dev_dbg(dev, "request_key() found no key\n");
+ else
+ dev_dbg(dev, "request_key() upcall failed\n");
+ key = NULL;
+ } else {
+ struct encrypted_key_payload *epayload;
+
+ down_read(&key->sem);
+ epayload = dereference_key_locked(key);
+ if (epayload->decrypted_datalen != NVDIMM_PASSPHRASE_LEN) {
+ up_read(&key->sem);
+ key_put(key);
+ key = NULL;
+ }
+ }
+
+ return key;
+}
+
+static struct key *nvdimm_lookup_user_key(struct nvdimm *nvdimm,
+ key_serial_t id, int subclass)
+{
+ key_ref_t keyref;
+ struct key *key;
+ struct encrypted_key_payload *epayload;
+ struct device *dev = &nvdimm->dev;
+
+ keyref = lookup_user_key(id, 0, 0);
+ if (IS_ERR(keyref))
+ return NULL;
+
+ key = key_ref_to_ptr(keyref);
+ if (key->type != &key_type_encrypted) {
+ key_put(key);
+ return NULL;
+ }
+
+ dev_dbg(dev, "%s: key found: %#x\n", __func__, key_serial(key));
+
+ down_read_nested(&key->sem, subclass);
+ epayload = dereference_key_locked(key);
+ if (epayload->decrypted_datalen != NVDIMM_PASSPHRASE_LEN) {
+ up_read(&key->sem);
+ key_put(key);
+ key = NULL;
+ }
+ return key;
+}
+
+static struct key *nvdimm_key_revalidate(struct nvdimm *nvdimm)
+{
+ struct key *key;
+ int rc;
+
+ if (!nvdimm->sec.ops->change_key)
+ return NULL;
+
+ key = nvdimm_request_key(nvdimm);
+ if (!key)
+ return NULL;
+
+ /*
+ * Send the same key to the hardware as new and old key to
+ * verify that the key is good.
+ */
+ rc = nvdimm->sec.ops->change_key(nvdimm, key_data(key),
+ key_data(key), NVDIMM_USER);
+ if (rc < 0) {
+ nvdimm_put_key(key);
+ key = NULL;
+ }
+ return key;
+}
+
+static int __nvdimm_security_unlock(struct nvdimm *nvdimm)
+{
+ struct device *dev = &nvdimm->dev;
+ struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+ struct key *key = NULL;
+ int rc;
+
+ /* The bus lock should be held at the top level of the call stack */
+ lockdep_assert_held(&nvdimm_bus->reconfig_mutex);
+
+ if (!nvdimm->sec.ops || !nvdimm->sec.ops->unlock
+ || nvdimm->sec.state < 0)
+ return -EIO;
+
+ if (test_bit(NDD_SECURITY_OVERWRITE, &nvdimm->flags)) {
+ dev_dbg(dev, "Security operation in progress.\n");
+ return -EBUSY;
+ }
+
+ /*
+ * If the pre-OS has unlocked the DIMM, attempt to send the key
+ * from request_key() to the hardware for verification. Failure
+ * to revalidate the key against the hardware results in a
+ * freeze of the security configuration. I.e. if the OS does not
+ * have the key, security is being managed pre-OS.
+ */
+ if (nvdimm->sec.state == NVDIMM_SECURITY_UNLOCKED) {
+ if (!key_revalidate)
+ return 0;
+
+ key = nvdimm_key_revalidate(nvdimm);
+ if (!key)
+ return nvdimm_security_freeze(nvdimm);
+ } else
+ key = nvdimm_request_key(nvdimm);
+
+ if (!key)
+ return -ENOKEY;
+
+ rc = nvdimm->sec.ops->unlock(nvdimm, key_data(key));
+ dev_dbg(dev, "key: %d unlock: %s\n", key_serial(key),
+ rc == 0 ? "success" : "fail");
+
+ nvdimm_put_key(key);
+ nvdimm->sec.state = nvdimm_security_state(nvdimm, NVDIMM_USER);
+ return rc;
+}
+
+int nvdimm_security_unlock(struct device *dev)
+{
+ struct nvdimm *nvdimm = to_nvdimm(dev);
+ int rc;
+
+ nvdimm_bus_lock(dev);
+ rc = __nvdimm_security_unlock(nvdimm);
+ nvdimm_bus_unlock(dev);
+ return rc;
+}
+
+int nvdimm_security_disable(struct nvdimm *nvdimm, unsigned int keyid)
+{
+ struct device *dev = &nvdimm->dev;
+ struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+ struct key *key;
+ int rc;
+
+ /* The bus lock should be held at the top level of the call stack */
+ lockdep_assert_held(&nvdimm_bus->reconfig_mutex);
+
+ if (!nvdimm->sec.ops || !nvdimm->sec.ops->disable
+ || nvdimm->sec.state < 0)
+ return -EOPNOTSUPP;
+
+ if (nvdimm->sec.state >= NVDIMM_SECURITY_FROZEN) {
+ dev_dbg(dev, "Incorrect security state: %d\n",
+ nvdimm->sec.state);
+ return -EIO;
+ }
+
+ if (test_bit(NDD_SECURITY_OVERWRITE, &nvdimm->flags)) {
+ dev_dbg(dev, "Security operation in progress.\n");
+ return -EBUSY;
+ }
+
+ key = nvdimm_lookup_user_key(nvdimm, keyid, NVDIMM_BASE_KEY);
+ if (!key)
+ return -ENOKEY;
+
+ rc = nvdimm->sec.ops->disable(nvdimm, key_data(key));
+ dev_dbg(dev, "key: %d disable: %s\n", key_serial(key),
+ rc == 0 ? "success" : "fail");
+
+ nvdimm_put_key(key);
+ nvdimm->sec.state = nvdimm_security_state(nvdimm, NVDIMM_USER);
+ return rc;
+}
+
+int nvdimm_security_update(struct nvdimm *nvdimm, unsigned int keyid,
+ unsigned int new_keyid,
+ enum nvdimm_passphrase_type pass_type)
+{
+ struct device *dev = &nvdimm->dev;
+ struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+ struct key *key, *newkey;
+ int rc;
+
+ /* The bus lock should be held at the top level of the call stack */
+ lockdep_assert_held(&nvdimm_bus->reconfig_mutex);
+
+ if (!nvdimm->sec.ops || !nvdimm->sec.ops->change_key
+ || nvdimm->sec.state < 0)
+ return -EOPNOTSUPP;
+
+ if (nvdimm->sec.state >= NVDIMM_SECURITY_FROZEN) {
+ dev_dbg(dev, "Incorrect security state: %d\n",
+ nvdimm->sec.state);
+ return -EIO;
+ }
+
+ if (keyid == 0)
+ key = NULL;
+ else {
+ key = nvdimm_lookup_user_key(nvdimm, keyid, NVDIMM_BASE_KEY);
+ if (!key)
+ return -ENOKEY;
+ }
+
+ newkey = nvdimm_lookup_user_key(nvdimm, new_keyid, NVDIMM_NEW_KEY);
+ if (!newkey) {
+ nvdimm_put_key(key);
+ return -ENOKEY;
+ }
+
+ rc = nvdimm->sec.ops->change_key(nvdimm, key ? key_data(key) : NULL,
+ key_data(newkey), pass_type);
+ dev_dbg(dev, "key: %d %d update%s: %s\n",
+ key_serial(key), key_serial(newkey),
+ pass_type == NVDIMM_MASTER ? "(master)" : "(user)",
+ rc == 0 ? "success" : "fail");
+
+ nvdimm_put_key(newkey);
+ nvdimm_put_key(key);
+ if (pass_type == NVDIMM_MASTER)
+ nvdimm->sec.ext_state = nvdimm_security_state(nvdimm,
+ NVDIMM_MASTER);
+ else
+ nvdimm->sec.state = nvdimm_security_state(nvdimm,
+ NVDIMM_USER);
+ return rc;
+}
+
+int nvdimm_security_erase(struct nvdimm *nvdimm, unsigned int keyid,
+ enum nvdimm_passphrase_type pass_type)
+{
+ struct device *dev = &nvdimm->dev;
+ struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+ struct key *key;
+ int rc;
+
+ /* The bus lock should be held at the top level of the call stack */
+ lockdep_assert_held(&nvdimm_bus->reconfig_mutex);
+
+ if (!nvdimm->sec.ops || !nvdimm->sec.ops->erase
+ || nvdimm->sec.state < 0)
+ return -EOPNOTSUPP;
+
+ if (atomic_read(&nvdimm->busy)) {
+ dev_dbg(dev, "Unable to secure erase while DIMM active.\n");
+ return -EBUSY;
+ }
+
+ if (nvdimm->sec.state >= NVDIMM_SECURITY_FROZEN) {
+ dev_dbg(dev, "Incorrect security state: %d\n",
+ nvdimm->sec.state);
+ return -EIO;
+ }
+
+ if (test_bit(NDD_SECURITY_OVERWRITE, &nvdimm->flags)) {
+ dev_dbg(dev, "Security operation in progress.\n");
+ return -EBUSY;
+ }
+
+ if (nvdimm->sec.ext_state != NVDIMM_SECURITY_UNLOCKED
+ && pass_type == NVDIMM_MASTER) {
+ dev_dbg(dev,
+ "Attempt to secure erase in wrong master state.\n");
+ return -EOPNOTSUPP;
+ }
+
+ key = nvdimm_lookup_user_key(nvdimm, keyid, NVDIMM_BASE_KEY);
+ if (!key)
+ return -ENOKEY;
+
+ rc = nvdimm->sec.ops->erase(nvdimm, key_data(key), pass_type);
+ dev_dbg(dev, "key: %d erase%s: %s\n", key_serial(key),
+ pass_type == NVDIMM_MASTER ? "(master)" : "(user)",
+ rc == 0 ? "success" : "fail");
+
+ nvdimm_put_key(key);
+ nvdimm->sec.state = nvdimm_security_state(nvdimm, NVDIMM_USER);
+ return rc;
+}
+
+int nvdimm_security_overwrite(struct nvdimm *nvdimm, unsigned int keyid)
+{
+ struct device *dev = &nvdimm->dev;
+ struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+ struct key *key;
+ int rc;
+
+ /* The bus lock should be held at the top level of the call stack */
+ lockdep_assert_held(&nvdimm_bus->reconfig_mutex);
+
+ if (!nvdimm->sec.ops || !nvdimm->sec.ops->overwrite
+ || nvdimm->sec.state < 0)
+ return -EOPNOTSUPP;
+
+ if (atomic_read(&nvdimm->busy)) {
+ dev_dbg(dev, "Unable to overwrite while DIMM active.\n");
+ return -EBUSY;
+ }
+
+ if (dev->driver == NULL) {
+ dev_dbg(dev, "Unable to overwrite while DIMM active.\n");
+ return -EINVAL;
+ }
+
+ if (nvdimm->sec.state >= NVDIMM_SECURITY_FROZEN) {
+ dev_dbg(dev, "Incorrect security state: %d\n",
+ nvdimm->sec.state);
+ return -EIO;
+ }
+
+ if (test_bit(NDD_SECURITY_OVERWRITE, &nvdimm->flags)) {
+ dev_dbg(dev, "Security operation in progress.\n");
+ return -EBUSY;
+ }
+
+ if (keyid == 0)
+ key = NULL;
+ else {
+ key = nvdimm_lookup_user_key(nvdimm, keyid, NVDIMM_BASE_KEY);
+ if (!key)
+ return -ENOKEY;
+ }
+
+ rc = nvdimm->sec.ops->overwrite(nvdimm, key ? key_data(key) : NULL);
+ dev_dbg(dev, "key: %d overwrite submission: %s\n", key_serial(key),
+ rc == 0 ? "success" : "fail");
+
+ nvdimm_put_key(key);
+ if (rc == 0) {
+ set_bit(NDD_SECURITY_OVERWRITE, &nvdimm->flags);
+ set_bit(NDD_WORK_PENDING, &nvdimm->flags);
+ nvdimm->sec.state = NVDIMM_SECURITY_OVERWRITE;
+ /*
+ * Make sure we don't lose device while doing overwrite
+ * query.
+ */
+ get_device(dev);
+ queue_delayed_work(system_wq, &nvdimm->dwork, 0);
+ }
+
+ return rc;
+}
+
+void __nvdimm_security_overwrite_query(struct nvdimm *nvdimm)
+{
+ struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(&nvdimm->dev);
+ int rc;
+ unsigned int tmo;
+
+ /* The bus lock should be held at the top level of the call stack */
+ lockdep_assert_held(&nvdimm_bus->reconfig_mutex);
+
+ /*
+ * Abort and release device if we no longer have the overwrite
+ * flag set. It means the work has been canceled.
+ */
+ if (!test_bit(NDD_WORK_PENDING, &nvdimm->flags))
+ return;
+
+ tmo = nvdimm->sec.overwrite_tmo;
+
+ if (!nvdimm->sec.ops || !nvdimm->sec.ops->query_overwrite
+ || nvdimm->sec.state < 0)
+ return;
+
+ rc = nvdimm->sec.ops->query_overwrite(nvdimm);
+ if (rc == -EBUSY) {
+
+ /* setup delayed work again */
+ tmo += 10;
+ queue_delayed_work(system_wq, &nvdimm->dwork, tmo * HZ);
+ nvdimm->sec.overwrite_tmo = min(15U * 60U, tmo);
+ return;
+ }
+
+ if (rc < 0)
+ dev_dbg(&nvdimm->dev, "overwrite failed\n");
+ else
+ dev_dbg(&nvdimm->dev, "overwrite completed\n");
+
+ if (nvdimm->sec.overwrite_state)
+ sysfs_notify_dirent(nvdimm->sec.overwrite_state);
+ nvdimm->sec.overwrite_tmo = 0;
+ clear_bit(NDD_SECURITY_OVERWRITE, &nvdimm->flags);
+ clear_bit(NDD_WORK_PENDING, &nvdimm->flags);
+ put_device(&nvdimm->dev);
+ nvdimm->sec.state = nvdimm_security_state(nvdimm, NVDIMM_USER);
+ nvdimm->sec.ext_state = nvdimm_security_state(nvdimm, NVDIMM_MASTER);
+}
+
+void nvdimm_security_overwrite_query(struct work_struct *work)
+{
+ struct nvdimm *nvdimm =
+ container_of(work, typeof(*nvdimm), dwork.work);
+
+ nvdimm_bus_lock(&nvdimm->dev);
+ __nvdimm_security_overwrite_query(nvdimm);
+ nvdimm_bus_unlock(&nvdimm->dev);
+}
diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig
index 88a8b5916624..0f345e207675 100644
--- a/drivers/nvme/host/Kconfig
+++ b/drivers/nvme/host/Kconfig
@@ -57,3 +57,18 @@ config NVME_FC
from https://github.com/linux-nvme/nvme-cli.
If unsure, say N.
+
+config NVME_TCP
+ tristate "NVM Express over Fabrics TCP host driver"
+ depends on INET
+ depends on BLK_DEV_NVME
+ select NVME_FABRICS
+ help
+ This provides support for the NVMe over Fabrics protocol using
+ the TCP transport. This allows you to use remote block devices
+ exported using the NVMe protocol set.
+
+ To configure a NVMe over Fabrics controller use the nvme-cli tool
+ from https://github.com/linux-nvme/nvme-cli.
+
+ If unsure, say N.
diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile
index aea459c65ae1..8a4b671c5f0c 100644
--- a/drivers/nvme/host/Makefile
+++ b/drivers/nvme/host/Makefile
@@ -7,6 +7,7 @@ obj-$(CONFIG_BLK_DEV_NVME) += nvme.o
obj-$(CONFIG_NVME_FABRICS) += nvme-fabrics.o
obj-$(CONFIG_NVME_RDMA) += nvme-rdma.o
obj-$(CONFIG_NVME_FC) += nvme-fc.o
+obj-$(CONFIG_NVME_TCP) += nvme-tcp.o
nvme-core-y := core.o
nvme-core-$(CONFIG_TRACING) += trace.o
@@ -21,3 +22,5 @@ nvme-fabrics-y += fabrics.o
nvme-rdma-y += rdma.o
nvme-fc-y += fc.o
+
+nvme-tcp-y += tcp.o
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 962012135b62..08f2c92602f4 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -97,7 +97,6 @@ static dev_t nvme_chr_devt;
static struct class *nvme_class;
static struct class *nvme_subsys_class;
-static void nvme_ns_remove(struct nvme_ns *ns);
static int nvme_revalidate_disk(struct gendisk *disk);
static void nvme_put_subsystem(struct nvme_subsystem *subsys);
static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,
@@ -245,12 +244,31 @@ static inline bool nvme_req_needs_retry(struct request *req)
return true;
}
+static void nvme_retry_req(struct request *req)
+{
+ struct nvme_ns *ns = req->q->queuedata;
+ unsigned long delay = 0;
+ u16 crd;
+
+ /* The mask and shift result must be <= 3 */
+ crd = (nvme_req(req)->status & NVME_SC_CRD) >> 11;
+ if (ns && crd)
+ delay = ns->ctrl->crdt[crd - 1] * 100;
+
+ nvme_req(req)->retries++;
+ blk_mq_requeue_request(req, false);
+ blk_mq_delay_kick_requeue_list(req->q, delay);
+}
+
void nvme_complete_rq(struct request *req)
{
blk_status_t status = nvme_error_status(req);
trace_nvme_complete_rq(req);
+ if (nvme_req(req)->ctrl->kas)
+ nvme_req(req)->ctrl->comp_seen = true;
+
if (unlikely(status != BLK_STS_OK && nvme_req_needs_retry(req))) {
if ((req->cmd_flags & REQ_NVME_MPATH) &&
blk_path_error(status)) {
@@ -259,8 +277,7 @@ void nvme_complete_rq(struct request *req)
}
if (!blk_queue_dying(req->q)) {
- nvme_req(req)->retries++;
- blk_mq_requeue_request(req, true);
+ nvme_retry_req(req);
return;
}
}
@@ -268,14 +285,14 @@ void nvme_complete_rq(struct request *req)
}
EXPORT_SYMBOL_GPL(nvme_complete_rq);
-void nvme_cancel_request(struct request *req, void *data, bool reserved)
+bool nvme_cancel_request(struct request *req, void *data, bool reserved)
{
dev_dbg_ratelimited(((struct nvme_ctrl *) data)->device,
"Cancelling I/O %d", req->tag);
nvme_req(req)->status = NVME_SC_ABORT_REQ;
blk_mq_complete_request(req);
-
+ return true;
}
EXPORT_SYMBOL_GPL(nvme_cancel_request);
@@ -536,7 +553,6 @@ static void nvme_assign_write_stream(struct nvme_ctrl *ctrl,
static inline void nvme_setup_flush(struct nvme_ns *ns,
struct nvme_command *cmnd)
{
- memset(cmnd, 0, sizeof(*cmnd));
cmnd->common.opcode = nvme_cmd_flush;
cmnd->common.nsid = cpu_to_le32(ns->head->ns_id);
}
@@ -548,9 +564,19 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,
struct nvme_dsm_range *range;
struct bio *bio;
- range = kmalloc_array(segments, sizeof(*range), GFP_ATOMIC);
- if (!range)
- return BLK_STS_RESOURCE;
+ range = kmalloc_array(segments, sizeof(*range),
+ GFP_ATOMIC | __GFP_NOWARN);
+ if (!range) {
+ /*
+ * If we fail allocation our range, fallback to the controller
+ * discard page. If that's also busy, it's safe to return
+ * busy, as we know we can make progress once that's freed.
+ */
+ if (test_and_set_bit_lock(0, &ns->ctrl->discard_page_busy))
+ return BLK_STS_RESOURCE;
+
+ range = page_address(ns->ctrl->discard_page);
+ }
__rq_for_each_bio(bio, req) {
u64 slba = nvme_block_nr(ns, bio->bi_iter.bi_sector);
@@ -565,11 +591,13 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,
}
if (WARN_ON_ONCE(n != segments)) {
- kfree(range);
+ if (virt_to_page(range) == ns->ctrl->discard_page)
+ clear_bit_unlock(0, &ns->ctrl->discard_page_busy);
+ else
+ kfree(range);
return BLK_STS_IOERR;
}
- memset(cmnd, 0, sizeof(*cmnd));
cmnd->dsm.opcode = nvme_cmd_dsm;
cmnd->dsm.nsid = cpu_to_le32(ns->head->ns_id);
cmnd->dsm.nr = cpu_to_le32(segments - 1);
@@ -598,7 +626,6 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
if (req->cmd_flags & REQ_RAHEAD)
dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH;
- memset(cmnd, 0, sizeof(*cmnd));
cmnd->rw.opcode = (rq_data_dir(req) ? nvme_cmd_write : nvme_cmd_read);
cmnd->rw.nsid = cpu_to_le32(ns->head->ns_id);
cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
@@ -650,8 +677,13 @@ void nvme_cleanup_cmd(struct request *req)
blk_rq_bytes(req) >> ns->lba_shift);
}
if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
- kfree(page_address(req->special_vec.bv_page) +
- req->special_vec.bv_offset);
+ struct nvme_ns *ns = req->rq_disk->private_data;
+ struct page *page = req->special_vec.bv_page;
+
+ if (page == ns->ctrl->discard_page)
+ clear_bit_unlock(0, &ns->ctrl->discard_page_busy);
+ else
+ kfree(page_address(page) + req->special_vec.bv_offset);
}
}
EXPORT_SYMBOL_GPL(nvme_cleanup_cmd);
@@ -663,6 +695,7 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
nvme_clear_nvme_request(req);
+ memset(cmd, 0, sizeof(*cmd));
switch (req_op(req)) {
case REQ_OP_DRV_IN:
case REQ_OP_DRV_OUT:
@@ -691,6 +724,31 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
}
EXPORT_SYMBOL_GPL(nvme_setup_cmd);
+static void nvme_end_sync_rq(struct request *rq, blk_status_t error)
+{
+ struct completion *waiting = rq->end_io_data;
+
+ rq->end_io_data = NULL;
+ complete(waiting);
+}
+
+static void nvme_execute_rq_polled(struct request_queue *q,
+ struct gendisk *bd_disk, struct request *rq, int at_head)
+{
+ DECLARE_COMPLETION_ONSTACK(wait);
+
+ WARN_ON_ONCE(!test_bit(QUEUE_FLAG_POLL, &q->queue_flags));
+
+ rq->cmd_flags |= REQ_HIPRI;
+ rq->end_io_data = &wait;
+ blk_execute_rq_nowait(q, bd_disk, rq, at_head, nvme_end_sync_rq);
+
+ while (!completion_done(&wait)) {
+ blk_poll(q, request_to_qc_t(rq->mq_hctx, rq), true);
+ cond_resched();
+ }
+}
+
/*
* Returns 0 on success. If the result is negative, it's a Linux error code;
* if the result is positive, it's an NVM Express status code
@@ -698,7 +756,7 @@ EXPORT_SYMBOL_GPL(nvme_setup_cmd);
int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
union nvme_result *result, void *buffer, unsigned bufflen,
unsigned timeout, int qid, int at_head,
- blk_mq_req_flags_t flags)
+ blk_mq_req_flags_t flags, bool poll)
{
struct request *req;
int ret;
@@ -715,7 +773,10 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
goto out;
}
- blk_execute_rq(req->q, NULL, req, at_head);
+ if (poll)
+ nvme_execute_rq_polled(req->q, NULL, req, at_head);
+ else
+ blk_execute_rq(req->q, NULL, req, at_head);
if (result)
*result = nvme_req(req)->result;
if (nvme_req(req)->flags & NVME_REQ_CANCELLED)
@@ -732,7 +793,7 @@ int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
void *buffer, unsigned bufflen)
{
return __nvme_submit_sync_cmd(q, cmd, NULL, buffer, bufflen, 0,
- NVME_QID_ANY, 0, 0);
+ NVME_QID_ANY, 0, 0, false);
}
EXPORT_SYMBOL_GPL(nvme_submit_sync_cmd);
@@ -843,6 +904,7 @@ static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status)
return;
}
+ ctrl->comp_seen = false;
spin_lock_irqsave(&ctrl->lock, flags);
if (ctrl->state == NVME_CTRL_LIVE ||
ctrl->state == NVME_CTRL_CONNECTING)
@@ -873,6 +935,15 @@ static void nvme_keep_alive_work(struct work_struct *work)
{
struct nvme_ctrl *ctrl = container_of(to_delayed_work(work),
struct nvme_ctrl, ka_work);
+ bool comp_seen = ctrl->comp_seen;
+
+ if ((ctrl->ctratt & NVME_CTRL_ATTR_TBKAS) && comp_seen) {
+ dev_dbg(ctrl->device,
+ "reschedule traffic based keep-alive timer\n");
+ ctrl->comp_seen = false;
+ schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
+ return;
+ }
if (nvme_keep_alive(ctrl)) {
/* allocation failure, reset the controller */
@@ -1041,7 +1112,7 @@ static int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword
c.features.dword11 = cpu_to_le32(dword11);
ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &res,
- buffer, buflen, 0, NVME_QID_ANY, 0, 0);
+ buffer, buflen, 0, NVME_QID_ANY, 0, 0, false);
if (ret >= 0 && result)
*result = le32_to_cpu(res.u32);
return ret;
@@ -1240,12 +1311,12 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
c.common.nsid = cpu_to_le32(cmd.nsid);
c.common.cdw2[0] = cpu_to_le32(cmd.cdw2);
c.common.cdw2[1] = cpu_to_le32(cmd.cdw3);
- c.common.cdw10[0] = cpu_to_le32(cmd.cdw10);
- c.common.cdw10[1] = cpu_to_le32(cmd.cdw11);
- c.common.cdw10[2] = cpu_to_le32(cmd.cdw12);
- c.common.cdw10[3] = cpu_to_le32(cmd.cdw13);
- c.common.cdw10[4] = cpu_to_le32(cmd.cdw14);
- c.common.cdw10[5] = cpu_to_le32(cmd.cdw15);
+ c.common.cdw10 = cpu_to_le32(cmd.cdw10);
+ c.common.cdw11 = cpu_to_le32(cmd.cdw11);
+ c.common.cdw12 = cpu_to_le32(cmd.cdw12);
+ c.common.cdw13 = cpu_to_le32(cmd.cdw13);
+ c.common.cdw14 = cpu_to_le32(cmd.cdw14);
+ c.common.cdw15 = cpu_to_le32(cmd.cdw15);
if (cmd.timeout_ms)
timeout = msecs_to_jiffies(cmd.timeout_ms);
@@ -1524,8 +1595,6 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
if (ns->noiob)
nvme_set_chunk_size(ns);
nvme_update_disk_info(disk, ns, id);
- if (ns->ndev)
- nvme_nvm_update_nvm_info(ns);
#ifdef CONFIG_NVME_MULTIPATH
if (ns->head->disk) {
nvme_update_disk_info(ns->head->disk, ns, id);
@@ -1608,7 +1677,7 @@ static int nvme_pr_command(struct block_device *bdev, u32 cdw10,
memset(&c, 0, sizeof(c));
c.common.opcode = op;
c.common.nsid = cpu_to_le32(ns->head->ns_id);
- c.common.cdw10[0] = cpu_to_le32(cdw10);
+ c.common.cdw10 = cpu_to_le32(cdw10);
ret = nvme_submit_sync_cmd(ns->queue, &c, data, 16);
nvme_put_ns_from_disk(head, srcu_idx);
@@ -1682,11 +1751,11 @@ int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len,
else
cmd.common.opcode = nvme_admin_security_recv;
cmd.common.nsid = 0;
- cmd.common.cdw10[0] = cpu_to_le32(((u32)secp) << 24 | ((u32)spsp) << 8);
- cmd.common.cdw10[1] = cpu_to_le32(len);
+ cmd.common.cdw10 = cpu_to_le32(((u32)secp) << 24 | ((u32)spsp) << 8);
+ cmd.common.cdw11 = cpu_to_le32(len);
return __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, NULL, buffer, len,
- ADMIN_TIMEOUT, NVME_QID_ANY, 1, 0);
+ ADMIN_TIMEOUT, NVME_QID_ANY, 1, 0, false);
}
EXPORT_SYMBOL_GPL(nvme_sec_submit);
#endif /* CONFIG_BLK_SED_OPAL */
@@ -1881,6 +1950,26 @@ static int nvme_configure_timestamp(struct nvme_ctrl *ctrl)
return ret;
}
+static int nvme_configure_acre(struct nvme_ctrl *ctrl)
+{
+ struct nvme_feat_host_behavior *host;
+ int ret;
+
+ /* Don't bother enabling the feature if retry delay is not reported */
+ if (!ctrl->crdt[0])
+ return 0;
+
+ host = kzalloc(sizeof(*host), GFP_KERNEL);
+ if (!host)
+ return 0;
+
+ host->acre = NVME_ENABLE_ACRE;
+ ret = nvme_set_features(ctrl, NVME_FEAT_HOST_BEHAVIOR, 0,
+ host, sizeof(*host), NULL);
+ kfree(host);
+ return ret;
+}
+
static int nvme_configure_apst(struct nvme_ctrl *ctrl)
{
/*
@@ -2402,6 +2491,10 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
ctrl->quirks &= ~NVME_QUIRK_NO_DEEPEST_PS;
}
+ ctrl->crdt[0] = le16_to_cpu(id->crdt1);
+ ctrl->crdt[1] = le16_to_cpu(id->crdt2);
+ ctrl->crdt[2] = le16_to_cpu(id->crdt3);
+
ctrl->oacs = le16_to_cpu(id->oacs);
ctrl->oncs = le16_to_cpup(&id->oncs);
ctrl->oaes = le32_to_cpu(id->oaes);
@@ -2419,6 +2512,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
ctrl->sgls = le32_to_cpu(id->sgls);
ctrl->kas = le16_to_cpu(id->kas);
ctrl->max_namespaces = le32_to_cpu(id->mnan);
+ ctrl->ctratt = le32_to_cpu(id->ctratt);
if (id->rtd3e) {
/* us -> s */
@@ -2501,6 +2595,10 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
if (ret < 0)
return ret;
+ ret = nvme_configure_acre(ctrl);
+ if (ret < 0)
+ return ret;
+
ctrl->identified = true;
return 0;
@@ -2776,6 +2874,7 @@ static ssize_t field##_show(struct device *dev, \
static DEVICE_ATTR(field, S_IRUGO, field##_show, NULL);
nvme_show_int_function(cntlid);
+nvme_show_int_function(numa_node);
static ssize_t nvme_sysfs_delete(struct device *dev,
struct device_attribute *attr, const char *buf,
@@ -2855,6 +2954,7 @@ static struct attribute *nvme_dev_attrs[] = {
&dev_attr_subsysnqn.attr,
&dev_attr_address.attr,
&dev_attr_state.attr,
+ &dev_attr_numa_node.attr,
NULL
};
@@ -3065,7 +3165,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
struct gendisk *disk;
struct nvme_id_ns *id;
char disk_name[DISK_NAME_LEN];
- int node = dev_to_node(ctrl->dev), flags = GENHD_FL_EXT_DEVT;
+ int node = ctrl->numa_node, flags = GENHD_FL_EXT_DEVT;
ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node);
if (!ns)
@@ -3100,13 +3200,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
nvme_setup_streams_ns(ctrl, ns);
nvme_set_disk_name(disk_name, ns, ctrl, &flags);
- if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) {
- if (nvme_nvm_register(ns, disk_name, node)) {
- dev_warn(ctrl->device, "LightNVM init failure\n");
- goto out_unlink_ns;
- }
- }
-
disk = alloc_disk_node(0, node);
if (!disk)
goto out_unlink_ns;
@@ -3120,6 +3213,13 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
__nvme_revalidate_disk(disk, id);
+ if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) {
+ if (nvme_nvm_register(ns, disk_name, node)) {
+ dev_warn(ctrl->device, "LightNVM init failure\n");
+ goto out_put_disk;
+ }
+ }
+
down_write(&ctrl->namespaces_rwsem);
list_add_tail(&ns->list, &ctrl->namespaces);
up_write(&ctrl->namespaces_rwsem);
@@ -3133,6 +3233,8 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
kfree(id);
return;
+ out_put_disk:
+ put_disk(ns->disk);
out_unlink_ns:
mutex_lock(&ctrl->subsys->lock);
list_del_rcu(&ns->siblings);
@@ -3522,6 +3624,7 @@ static void nvme_free_ctrl(struct device *dev)
ida_simple_remove(&nvme_instance_ida, ctrl->instance);
kfree(ctrl->effects);
nvme_mpath_uninit(ctrl);
+ __free_page(ctrl->discard_page);
if (subsys) {
mutex_lock(&subsys->lock);
@@ -3562,6 +3665,14 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
memset(&ctrl->ka_cmd, 0, sizeof(ctrl->ka_cmd));
ctrl->ka_cmd.common.opcode = nvme_admin_keep_alive;
+ BUILD_BUG_ON(NVME_DSM_MAX_RANGES * sizeof(struct nvme_dsm_range) >
+ PAGE_SIZE);
+ ctrl->discard_page = alloc_page(GFP_KERNEL);
+ if (!ctrl->discard_page) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
ret = ida_simple_get(&nvme_instance_ida, 0, 0, GFP_KERNEL);
if (ret < 0)
goto out;
@@ -3599,6 +3710,8 @@ out_free_name:
out_release_instance:
ida_simple_remove(&nvme_instance_ida, ctrl->instance);
out:
+ if (ctrl->discard_page)
+ __free_page(ctrl->discard_page);
return ret;
}
EXPORT_SYMBOL_GPL(nvme_init_ctrl);
@@ -3746,7 +3859,7 @@ out:
return result;
}
-void nvme_core_exit(void)
+void __exit nvme_core_exit(void)
{
ida_destroy(&nvme_subsystems_ida);
class_destroy(nvme_subsys_class);
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index bd0969db6225..b2ab213f43de 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -159,7 +159,7 @@ int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val)
cmd.prop_get.offset = cpu_to_le32(off);
ret = __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, &res, NULL, 0, 0,
- NVME_QID_ANY, 0, 0);
+ NVME_QID_ANY, 0, 0, false);
if (ret >= 0)
*val = le64_to_cpu(res.u64);
@@ -206,7 +206,7 @@ int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val)
cmd.prop_get.offset = cpu_to_le32(off);
ret = __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, &res, NULL, 0, 0,
- NVME_QID_ANY, 0, 0);
+ NVME_QID_ANY, 0, 0, false);
if (ret >= 0)
*val = le64_to_cpu(res.u64);
@@ -252,7 +252,7 @@ int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val)
cmd.prop_set.value = cpu_to_le64(val);
ret = __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, NULL, NULL, 0, 0,
- NVME_QID_ANY, 0, 0);
+ NVME_QID_ANY, 0, 0, false);
if (unlikely(ret))
dev_err(ctrl->device,
"Property Set error: %d, offset %#x\n",
@@ -392,6 +392,9 @@ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl)
cmd.connect.kato = ctrl->opts->discovery_nqn ? 0 :
cpu_to_le32((ctrl->kato + NVME_KATO_GRACE) * 1000);
+ if (ctrl->opts->disable_sqflow)
+ cmd.connect.cattr |= NVME_CONNECT_DISABLE_SQFLOW;
+
data = kzalloc(sizeof(*data), GFP_KERNEL);
if (!data)
return -ENOMEM;
@@ -403,7 +406,7 @@ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl)
ret = __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, &res,
data, sizeof(*data), 0, NVME_QID_ANY, 1,
- BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT);
+ BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT, false);
if (ret) {
nvmf_log_connect_error(ctrl, ret, le32_to_cpu(res.u32),
&cmd, data);
@@ -438,7 +441,7 @@ EXPORT_SYMBOL_GPL(nvmf_connect_admin_queue);
* > 0: NVMe error status code
* < 0: Linux errno error code
*/
-int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid)
+int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid, bool poll)
{
struct nvme_command cmd;
struct nvmf_connect_data *data;
@@ -451,6 +454,9 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid)
cmd.connect.qid = cpu_to_le16(qid);
cmd.connect.sqsize = cpu_to_le16(ctrl->sqsize);
+ if (ctrl->opts->disable_sqflow)
+ cmd.connect.cattr |= NVME_CONNECT_DISABLE_SQFLOW;
+
data = kzalloc(sizeof(*data), GFP_KERNEL);
if (!data)
return -ENOMEM;
@@ -462,7 +468,7 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid)
ret = __nvme_submit_sync_cmd(ctrl->connect_q, &cmd, &res,
data, sizeof(*data), 0, qid, 1,
- BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT);
+ BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT, poll);
if (ret) {
nvmf_log_connect_error(ctrl, ret, le32_to_cpu(res.u32),
&cmd, data);
@@ -607,6 +613,11 @@ static const match_table_t opt_tokens = {
{ NVMF_OPT_HOST_TRADDR, "host_traddr=%s" },
{ NVMF_OPT_HOST_ID, "hostid=%s" },
{ NVMF_OPT_DUP_CONNECT, "duplicate_connect" },
+ { NVMF_OPT_DISABLE_SQFLOW, "disable_sqflow" },
+ { NVMF_OPT_HDR_DIGEST, "hdr_digest" },
+ { NVMF_OPT_DATA_DIGEST, "data_digest" },
+ { NVMF_OPT_NR_WRITE_QUEUES, "nr_write_queues=%d" },
+ { NVMF_OPT_NR_POLL_QUEUES, "nr_poll_queues=%d" },
{ NVMF_OPT_ERR, NULL }
};
@@ -626,6 +637,8 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
opts->reconnect_delay = NVMF_DEF_RECONNECT_DELAY;
opts->kato = NVME_DEFAULT_KATO;
opts->duplicate_connect = false;
+ opts->hdr_digest = false;
+ opts->data_digest = false;
options = o = kstrdup(buf, GFP_KERNEL);
if (!options)
@@ -817,6 +830,39 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
case NVMF_OPT_DUP_CONNECT:
opts->duplicate_connect = true;
break;
+ case NVMF_OPT_DISABLE_SQFLOW:
+ opts->disable_sqflow = true;
+ break;
+ case NVMF_OPT_HDR_DIGEST:
+ opts->hdr_digest = true;
+ break;
+ case NVMF_OPT_DATA_DIGEST:
+ opts->data_digest = true;
+ break;
+ case NVMF_OPT_NR_WRITE_QUEUES:
+ if (match_int(args, &token)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ if (token <= 0) {
+ pr_err("Invalid nr_write_queues %d\n", token);
+ ret = -EINVAL;
+ goto out;
+ }
+ opts->nr_write_queues = token;
+ break;
+ case NVMF_OPT_NR_POLL_QUEUES:
+ if (match_int(args, &token)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ if (token <= 0) {
+ pr_err("Invalid nr_poll_queues %d\n", token);
+ ret = -EINVAL;
+ goto out;
+ }
+ opts->nr_poll_queues = token;
+ break;
default:
pr_warn("unknown parameter or missing value '%s' in ctrl creation request\n",
p);
@@ -933,7 +979,8 @@ EXPORT_SYMBOL_GPL(nvmf_free_options);
#define NVMF_REQUIRED_OPTS (NVMF_OPT_TRANSPORT | NVMF_OPT_NQN)
#define NVMF_ALLOWED_OPTS (NVMF_OPT_QUEUE_SIZE | NVMF_OPT_NR_IO_QUEUES | \
NVMF_OPT_KATO | NVMF_OPT_HOSTNQN | \
- NVMF_OPT_HOST_ID | NVMF_OPT_DUP_CONNECT)
+ NVMF_OPT_HOST_ID | NVMF_OPT_DUP_CONNECT |\
+ NVMF_OPT_DISABLE_SQFLOW)
static struct nvme_ctrl *
nvmf_create_ctrl(struct device *dev, const char *buf, size_t count)
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
index 6ea6275f332a..478343b73e38 100644
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -58,6 +58,11 @@ enum {
NVMF_OPT_CTRL_LOSS_TMO = 1 << 11,
NVMF_OPT_HOST_ID = 1 << 12,
NVMF_OPT_DUP_CONNECT = 1 << 13,
+ NVMF_OPT_DISABLE_SQFLOW = 1 << 14,
+ NVMF_OPT_HDR_DIGEST = 1 << 15,
+ NVMF_OPT_DATA_DIGEST = 1 << 16,
+ NVMF_OPT_NR_WRITE_QUEUES = 1 << 17,
+ NVMF_OPT_NR_POLL_QUEUES = 1 << 18,
};
/**
@@ -85,6 +90,11 @@ enum {
* @max_reconnects: maximum number of allowed reconnect attempts before removing
* the controller, (-1) means reconnect forever, zero means remove
* immediately;
+ * @disable_sqflow: disable controller sq flow control
+ * @hdr_digest: generate/verify header digest (TCP)
+ * @data_digest: generate/verify data digest (TCP)
+ * @nr_write_queues: number of queues for write I/O
+ * @nr_poll_queues: number of queues for polling I/O
*/
struct nvmf_ctrl_options {
unsigned mask;
@@ -101,6 +111,11 @@ struct nvmf_ctrl_options {
unsigned int kato;
struct nvmf_host *host;
int max_reconnects;
+ bool disable_sqflow;
+ bool hdr_digest;
+ bool data_digest;
+ unsigned int nr_write_queues;
+ unsigned int nr_poll_queues;
};
/*
@@ -156,7 +171,7 @@ int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val);
int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val);
int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val);
int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl);
-int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid);
+int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid, bool poll);
int nvmf_register_transport(struct nvmf_transport_ops *ops);
void nvmf_unregister_transport(struct nvmf_transport_ops *ops);
void nvmf_free_options(struct nvmf_ctrl_options *opts);
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index feb86b59170e..89accc76d71c 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -1975,7 +1975,7 @@ nvme_fc_connect_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize)
(qsize / 5));
if (ret)
break;
- ret = nvmf_connect_io_queue(&ctrl->ctrl, i);
+ ret = nvmf_connect_io_queue(&ctrl->ctrl, i, false);
if (ret)
break;
@@ -2326,38 +2326,6 @@ nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx,
return nvme_fc_start_fcp_op(ctrl, queue, op, data_len, io_dir);
}
-static struct blk_mq_tags *
-nvme_fc_tagset(struct nvme_fc_queue *queue)
-{
- if (queue->qnum == 0)
- return queue->ctrl->admin_tag_set.tags[queue->qnum];
-
- return queue->ctrl->tag_set.tags[queue->qnum - 1];
-}
-
-static int
-nvme_fc_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
-
-{
- struct nvme_fc_queue *queue = hctx->driver_data;
- struct nvme_fc_ctrl *ctrl = queue->ctrl;
- struct request *req;
- struct nvme_fc_fcp_op *op;
-
- req = blk_mq_tag_to_rq(nvme_fc_tagset(queue), tag);
- if (!req)
- return 0;
-
- op = blk_mq_rq_to_pdu(req);
-
- if ((atomic_read(&op->state) == FCPOP_STATE_ACTIVE) &&
- (ctrl->lport->ops->poll_queue))
- ctrl->lport->ops->poll_queue(&ctrl->lport->localport,
- queue->lldd_handle);
-
- return ((atomic_read(&op->state) != FCPOP_STATE_ACTIVE));
-}
-
static void
nvme_fc_submit_async_event(struct nvme_ctrl *arg)
{
@@ -2410,7 +2378,7 @@ nvme_fc_complete_rq(struct request *rq)
* status. The done path will return the io request back to the block
* layer with an error status.
*/
-static void
+static bool
nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved)
{
struct nvme_ctrl *nctrl = data;
@@ -2418,6 +2386,7 @@ nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved)
struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req);
__nvme_fc_abort_op(ctrl, op);
+ return true;
}
@@ -2427,7 +2396,6 @@ static const struct blk_mq_ops nvme_fc_mq_ops = {
.init_request = nvme_fc_init_request,
.exit_request = nvme_fc_exit_request,
.init_hctx = nvme_fc_init_hctx,
- .poll = nvme_fc_poll,
.timeout = nvme_fc_timeout,
};
@@ -2457,7 +2425,7 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
ctrl->tag_set.ops = &nvme_fc_mq_ops;
ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size;
ctrl->tag_set.reserved_tags = 1; /* fabric connect */
- ctrl->tag_set.numa_node = NUMA_NO_NODE;
+ ctrl->tag_set.numa_node = ctrl->ctrl.numa_node;
ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
ctrl->tag_set.cmd_size =
struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv,
@@ -3050,6 +3018,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
ctrl->ctrl.opts = opts;
ctrl->ctrl.nr_reconnects = 0;
+ ctrl->ctrl.numa_node = dev_to_node(lport->dev);
INIT_LIST_HEAD(&ctrl->ctrl_list);
ctrl->lport = lport;
ctrl->rport = rport;
@@ -3090,7 +3059,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
ctrl->admin_tag_set.ops = &nvme_fc_admin_mq_ops;
ctrl->admin_tag_set.queue_depth = NVME_AQ_MQ_TAG_DEPTH;
ctrl->admin_tag_set.reserved_tags = 2; /* fabric connect + Keep-Alive */
- ctrl->admin_tag_set.numa_node = NUMA_NO_NODE;
+ ctrl->admin_tag_set.numa_node = ctrl->ctrl.numa_node;
ctrl->admin_tag_set.cmd_size =
struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv,
ctrl->lport->ops->fcprqst_priv_sz);
diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
index a4f3b263cd6c..b759c25c89c8 100644
--- a/drivers/nvme/host/lightnvm.c
+++ b/drivers/nvme/host/lightnvm.c
@@ -577,7 +577,8 @@ static int nvme_nvm_get_chk_meta(struct nvm_dev *ndev,
struct ppa_addr ppa;
size_t left = nchks * sizeof(struct nvme_nvm_chk_meta);
size_t log_pos, offset, len;
- int ret, i, max_len;
+ int i, max_len;
+ int ret = 0;
/*
* limit requests to maximum 256K to avoid issuing arbitrary large
@@ -731,11 +732,12 @@ static int nvme_nvm_submit_io_sync(struct nvm_dev *dev, struct nvm_rq *rqd)
return ret;
}
-static void *nvme_nvm_create_dma_pool(struct nvm_dev *nvmdev, char *name)
+static void *nvme_nvm_create_dma_pool(struct nvm_dev *nvmdev, char *name,
+ int size)
{
struct nvme_ns *ns = nvmdev->q->queuedata;
- return dma_pool_create(name, ns->ctrl->dev, PAGE_SIZE, PAGE_SIZE, 0);
+ return dma_pool_create(name, ns->ctrl->dev, size, PAGE_SIZE, 0);
}
static void nvme_nvm_destroy_dma_pool(void *pool)
@@ -935,9 +937,9 @@ static int nvme_nvm_user_vcmd(struct nvme_ns *ns, int admin,
/* cdw11-12 */
c.ph_rw.length = cpu_to_le16(vcmd.nppas);
c.ph_rw.control = cpu_to_le16(vcmd.control);
- c.common.cdw10[3] = cpu_to_le32(vcmd.cdw13);
- c.common.cdw10[4] = cpu_to_le32(vcmd.cdw14);
- c.common.cdw10[5] = cpu_to_le32(vcmd.cdw15);
+ c.common.cdw13 = cpu_to_le32(vcmd.cdw13);
+ c.common.cdw14 = cpu_to_le32(vcmd.cdw14);
+ c.common.cdw15 = cpu_to_le32(vcmd.cdw15);
if (vcmd.timeout_ms)
timeout = msecs_to_jiffies(vcmd.timeout_ms);
@@ -972,22 +974,11 @@ int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, unsigned long arg)
}
}
-void nvme_nvm_update_nvm_info(struct nvme_ns *ns)
-{
- struct nvm_dev *ndev = ns->ndev;
- struct nvm_geo *geo = &ndev->geo;
-
- if (geo->version == NVM_OCSSD_SPEC_12)
- return;
-
- geo->csecs = 1 << ns->lba_shift;
- geo->sos = ns->ms;
-}
-
int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node)
{
struct request_queue *q = ns->queue;
struct nvm_dev *dev;
+ struct nvm_geo *geo;
_nvme_nvm_check_size();
@@ -995,6 +986,12 @@ int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node)
if (!dev)
return -ENOMEM;
+ /* Note that csecs and sos will be overridden if it is a 1.2 drive. */
+ geo = &dev->geo;
+ geo->csecs = 1 << ns->lba_shift;
+ geo->sos = ns->ms;
+ geo->ext = ns->ext;
+
dev->q = q;
memcpy(dev->name, disk_name, DISK_NAME_LEN);
dev->ops = &nvme_nvm_dev_ops;
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 9901afd804ce..183ec17ba067 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -141,7 +141,7 @@ static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head, int node)
test_bit(NVME_NS_ANA_PENDING, &ns->flags))
continue;
- distance = node_distance(node, dev_to_node(ns->ctrl->dev));
+ distance = node_distance(node, ns->ctrl->numa_node);
switch (ns->ana_state) {
case NVME_ANA_OPTIMIZED:
@@ -220,21 +220,6 @@ static blk_qc_t nvme_ns_head_make_request(struct request_queue *q,
return ret;
}
-static bool nvme_ns_head_poll(struct request_queue *q, blk_qc_t qc)
-{
- struct nvme_ns_head *head = q->queuedata;
- struct nvme_ns *ns;
- bool found = false;
- int srcu_idx;
-
- srcu_idx = srcu_read_lock(&head->srcu);
- ns = srcu_dereference(head->current_path[numa_node_id()], &head->srcu);
- if (likely(ns && nvme_path_is_optimized(ns)))
- found = ns->queue->poll_fn(q, qc);
- srcu_read_unlock(&head->srcu, srcu_idx);
- return found;
-}
-
static void nvme_requeue_work(struct work_struct *work)
{
struct nvme_ns_head *head =
@@ -276,12 +261,11 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
if (!(ctrl->subsys->cmic & (1 << 1)) || !multipath)
return 0;
- q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE, NULL);
+ q = blk_alloc_queue_node(GFP_KERNEL, ctrl->numa_node);
if (!q)
goto out;
q->queuedata = head;
blk_queue_make_request(q, nvme_ns_head_make_request);
- q->poll_fn = nvme_ns_head_poll;
blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
/* set to a default value for 512 until disk is validated */
blk_queue_logical_block_size(q, 512);
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 081cbdcce880..2b36ac922596 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -145,6 +145,7 @@ enum nvme_ctrl_state {
};
struct nvme_ctrl {
+ bool comp_seen;
enum nvme_ctrl_state state;
bool identified;
spinlock_t lock;
@@ -153,6 +154,7 @@ struct nvme_ctrl {
struct request_queue *connect_q;
struct device *dev;
int instance;
+ int numa_node;
struct blk_mq_tag_set *tagset;
struct blk_mq_tag_set *admin_tagset;
struct list_head namespaces;
@@ -179,6 +181,7 @@ struct nvme_ctrl {
u32 page_size;
u32 max_hw_sectors;
u32 max_segments;
+ u16 crdt[3];
u16 oncs;
u16 oacs;
u16 nssa;
@@ -193,6 +196,7 @@ struct nvme_ctrl {
u8 apsta;
u32 oaes;
u32 aen_result;
+ u32 ctratt;
unsigned int shutdown_timeout;
unsigned int kato;
bool subsystem;
@@ -237,6 +241,9 @@ struct nvme_ctrl {
u16 maxcmd;
int nr_reconnects;
struct nvmf_ctrl_options *opts;
+
+ struct page *discard_page;
+ unsigned long discard_page_busy;
};
struct nvme_subsystem {
@@ -364,15 +371,6 @@ static inline void nvme_fault_inject_fini(struct nvme_ns *ns) {}
static inline void nvme_should_fail(struct request *req) {}
#endif
-static inline bool nvme_ctrl_ready(struct nvme_ctrl *ctrl)
-{
- u32 val = 0;
-
- if (ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &val))
- return false;
- return val & NVME_CSTS_RDY;
-}
-
static inline int nvme_reset_subsystem(struct nvme_ctrl *ctrl)
{
if (!ctrl->subsystem)
@@ -408,7 +406,7 @@ static inline void nvme_put_ctrl(struct nvme_ctrl *ctrl)
}
void nvme_complete_rq(struct request *req);
-void nvme_cancel_request(struct request *req, void *data, bool reserved);
+bool nvme_cancel_request(struct request *req, void *data, bool reserved);
bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
enum nvme_ctrl_state new_state);
int nvme_disable_ctrl(struct nvme_ctrl *ctrl, u64 cap);
@@ -449,7 +447,7 @@ int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
union nvme_result *result, void *buffer, unsigned bufflen,
unsigned timeout, int qid, int at_head,
- blk_mq_req_flags_t flags);
+ blk_mq_req_flags_t flags, bool poll);
int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count);
void nvme_stop_keep_alive(struct nvme_ctrl *ctrl);
int nvme_reset_ctrl(struct nvme_ctrl *ctrl);
@@ -545,13 +543,11 @@ static inline void nvme_mpath_stop(struct nvme_ctrl *ctrl)
#endif /* CONFIG_NVME_MULTIPATH */
#ifdef CONFIG_NVM
-void nvme_nvm_update_nvm_info(struct nvme_ns *ns);
int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node);
void nvme_nvm_unregister(struct nvme_ns *ns);
extern const struct attribute_group nvme_nvm_attr_group;
int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, unsigned long arg);
#else
-static inline void nvme_nvm_update_nvm_info(struct nvme_ns *ns) {};
static inline int nvme_nvm_register(struct nvme_ns *ns, char *disk_name,
int node)
{
@@ -572,6 +568,6 @@ static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev)
}
int __init nvme_core_init(void);
-void nvme_core_exit(void);
+void __exit nvme_core_exit(void);
#endif /* _NVME_H */
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index c33bb201b884..5a0bf6a24d50 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -32,6 +32,7 @@
#include <linux/sed-opal.h>
#include <linux/pci-p2pdma.h>
+#include "trace.h"
#include "nvme.h"
#define SQ_SIZE(depth) (depth * sizeof(struct nvme_command))
@@ -74,6 +75,22 @@ static int io_queue_depth = 1024;
module_param_cb(io_queue_depth, &io_queue_depth_ops, &io_queue_depth, 0644);
MODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2");
+static int queue_count_set(const char *val, const struct kernel_param *kp);
+static const struct kernel_param_ops queue_count_ops = {
+ .set = queue_count_set,
+ .get = param_get_int,
+};
+
+static int write_queues;
+module_param_cb(write_queues, &queue_count_ops, &write_queues, 0644);
+MODULE_PARM_DESC(write_queues,
+ "Number of queues to use for writes. If not set, reads and writes "
+ "will share a queue set.");
+
+static int poll_queues = 0;
+module_param_cb(poll_queues, &queue_count_ops, &poll_queues, 0644);
+MODULE_PARM_DESC(poll_queues, "Number of queues to use for polled IO.");
+
struct nvme_dev;
struct nvme_queue;
@@ -92,6 +109,7 @@ struct nvme_dev {
struct dma_pool *prp_small_pool;
unsigned online_queues;
unsigned max_qid;
+ unsigned io_queues[HCTX_MAX_TYPES];
unsigned int num_vecs;
int q_depth;
u32 db_stride;
@@ -105,7 +123,6 @@ struct nvme_dev {
u32 cmbsz;
u32 cmbloc;
struct nvme_ctrl ctrl;
- struct completion ioq_wait;
mempool_t *iod_mempool;
@@ -134,6 +151,17 @@ static int io_queue_depth_set(const char *val, const struct kernel_param *kp)
return param_set_int(val, kp);
}
+static int queue_count_set(const char *val, const struct kernel_param *kp)
+{
+ int n = 0, ret;
+
+ ret = kstrtoint(val, 10, &n);
+ if (n > num_possible_cpus())
+ n = num_possible_cpus();
+
+ return param_set_int(val, kp);
+}
+
static inline unsigned int sq_idx(unsigned int qid, u32 stride)
{
return qid * 2 * stride;
@@ -158,8 +186,8 @@ struct nvme_queue {
struct nvme_dev *dev;
spinlock_t sq_lock;
struct nvme_command *sq_cmds;
- bool sq_cmds_is_io;
- spinlock_t cq_lock ____cacheline_aligned_in_smp;
+ /* only used for poll queues: */
+ spinlock_t cq_poll_lock ____cacheline_aligned_in_smp;
volatile struct nvme_completion *cqes;
struct blk_mq_tags **tags;
dma_addr_t sq_dma_addr;
@@ -168,14 +196,20 @@ struct nvme_queue {
u16 q_depth;
s16 cq_vector;
u16 sq_tail;
+ u16 last_sq_tail;
u16 cq_head;
u16 last_cq_head;
u16 qid;
u8 cq_phase;
+ unsigned long flags;
+#define NVMEQ_ENABLED 0
+#define NVMEQ_SQ_CMB 1
+#define NVMEQ_DELETE_ERROR 2
u32 *dbbuf_sq_db;
u32 *dbbuf_cq_db;
u32 *dbbuf_sq_ei;
u32 *dbbuf_cq_ei;
+ struct completion delete_done;
};
/*
@@ -218,9 +252,20 @@ static inline void _nvme_check_size(void)
BUILD_BUG_ON(sizeof(struct nvme_dbbuf) != 64);
}
+static unsigned int max_io_queues(void)
+{
+ return num_possible_cpus() + write_queues + poll_queues;
+}
+
+static unsigned int max_queue_count(void)
+{
+ /* IO queues + admin queue */
+ return 1 + max_io_queues();
+}
+
static inline unsigned int nvme_dbbuf_size(u32 stride)
{
- return ((num_possible_cpus() + 1) * 8 * stride);
+ return (max_queue_count() * 8 * stride);
}
static int nvme_dbbuf_dma_alloc(struct nvme_dev *dev)
@@ -431,30 +476,90 @@ static int nvme_init_request(struct blk_mq_tag_set *set, struct request *req,
return 0;
}
+static int queue_irq_offset(struct nvme_dev *dev)
+{
+ /* if we have more than 1 vec, admin queue offsets us by 1 */
+ if (dev->num_vecs > 1)
+ return 1;
+
+ return 0;
+}
+
static int nvme_pci_map_queues(struct blk_mq_tag_set *set)
{
struct nvme_dev *dev = set->driver_data;
+ int i, qoff, offset;
+
+ offset = queue_irq_offset(dev);
+ for (i = 0, qoff = 0; i < set->nr_maps; i++) {
+ struct blk_mq_queue_map *map = &set->map[i];
+
+ map->nr_queues = dev->io_queues[i];
+ if (!map->nr_queues) {
+ BUG_ON(i == HCTX_TYPE_DEFAULT);
+ continue;
+ }
- return blk_mq_pci_map_queues(set, to_pci_dev(dev->dev),
- dev->num_vecs > 1 ? 1 /* admin queue */ : 0);
+ /*
+ * The poll queue(s) doesn't have an IRQ (and hence IRQ
+ * affinity), so use the regular blk-mq cpu mapping
+ */
+ map->queue_offset = qoff;
+ if (i != HCTX_TYPE_POLL)
+ blk_mq_pci_map_queues(map, to_pci_dev(dev->dev), offset);
+ else
+ blk_mq_map_queues(map);
+ qoff += map->nr_queues;
+ offset += map->nr_queues;
+ }
+
+ return 0;
+}
+
+/*
+ * Write sq tail if we are asked to, or if the next command would wrap.
+ */
+static inline void nvme_write_sq_db(struct nvme_queue *nvmeq, bool write_sq)
+{
+ if (!write_sq) {
+ u16 next_tail = nvmeq->sq_tail + 1;
+
+ if (next_tail == nvmeq->q_depth)
+ next_tail = 0;
+ if (next_tail != nvmeq->last_sq_tail)
+ return;
+ }
+
+ if (nvme_dbbuf_update_and_check_event(nvmeq->sq_tail,
+ nvmeq->dbbuf_sq_db, nvmeq->dbbuf_sq_ei))
+ writel(nvmeq->sq_tail, nvmeq->q_db);
+ nvmeq->last_sq_tail = nvmeq->sq_tail;
}
/**
* nvme_submit_cmd() - Copy a command into a queue and ring the doorbell
* @nvmeq: The queue to use
* @cmd: The command to send
+ * @write_sq: whether to write to the SQ doorbell
*/
-static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd)
+static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd,
+ bool write_sq)
{
spin_lock(&nvmeq->sq_lock);
-
memcpy(&nvmeq->sq_cmds[nvmeq->sq_tail], cmd, sizeof(*cmd));
-
if (++nvmeq->sq_tail == nvmeq->q_depth)
nvmeq->sq_tail = 0;
- if (nvme_dbbuf_update_and_check_event(nvmeq->sq_tail,
- nvmeq->dbbuf_sq_db, nvmeq->dbbuf_sq_ei))
- writel(nvmeq->sq_tail, nvmeq->q_db);
+ nvme_write_sq_db(nvmeq, write_sq);
+ spin_unlock(&nvmeq->sq_lock);
+}
+
+static void nvme_commit_rqs(struct blk_mq_hw_ctx *hctx)
+{
+ struct nvme_queue *nvmeq = hctx->driver_data;
+
+ spin_lock(&nvmeq->sq_lock);
+ if (nvmeq->sq_tail != nvmeq->last_sq_tail)
+ nvme_write_sq_db(nvmeq, true);
spin_unlock(&nvmeq->sq_lock);
}
@@ -822,7 +927,7 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
* We should not need to do this, but we're still using this to
* ensure we can drain requests on a dying queue.
*/
- if (unlikely(nvmeq->cq_vector < 0))
+ if (unlikely(!test_bit(NVMEQ_ENABLED, &nvmeq->flags)))
return BLK_STS_IOERR;
ret = nvme_setup_cmd(ns, req, &cmnd);
@@ -840,7 +945,7 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
}
blk_mq_start_request(req);
- nvme_submit_cmd(nvmeq, &cmnd);
+ nvme_submit_cmd(nvmeq, &cmnd, bd->last);
return BLK_STS_OK;
out_cleanup_iod:
nvme_free_iod(dev, req);
@@ -899,6 +1004,7 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx)
}
req = blk_mq_tag_to_rq(*nvmeq->tags, cqe->command_id);
+ trace_nvme_sq(req, cqe->sq_head, nvmeq->sq_tail);
nvme_end_request(req, cqe->status, cqe->result);
}
@@ -919,15 +1025,15 @@ static inline void nvme_update_cq_head(struct nvme_queue *nvmeq)
}
}
-static inline bool nvme_process_cq(struct nvme_queue *nvmeq, u16 *start,
- u16 *end, int tag)
+static inline int nvme_process_cq(struct nvme_queue *nvmeq, u16 *start,
+ u16 *end, unsigned int tag)
{
- bool found = false;
+ int found = 0;
*start = nvmeq->cq_head;
- while (!found && nvme_cqe_pending(nvmeq)) {
- if (nvmeq->cqes[nvmeq->cq_head].command_id == tag)
- found = true;
+ while (nvme_cqe_pending(nvmeq)) {
+ if (tag == -1U || nvmeq->cqes[nvmeq->cq_head].command_id == tag)
+ found++;
nvme_update_cq_head(nvmeq);
}
*end = nvmeq->cq_head;
@@ -943,12 +1049,16 @@ static irqreturn_t nvme_irq(int irq, void *data)
irqreturn_t ret = IRQ_NONE;
u16 start, end;
- spin_lock(&nvmeq->cq_lock);
+ /*
+ * The rmb/wmb pair ensures we see all updates from a previous run of
+ * the irq handler, even if that was on another CPU.
+ */
+ rmb();
if (nvmeq->cq_head != nvmeq->last_cq_head)
ret = IRQ_HANDLED;
nvme_process_cq(nvmeq, &start, &end, -1);
nvmeq->last_cq_head = nvmeq->cq_head;
- spin_unlock(&nvmeq->cq_lock);
+ wmb();
if (start != end) {
nvme_complete_cqes(nvmeq, start, end);
@@ -966,27 +1076,50 @@ static irqreturn_t nvme_irq_check(int irq, void *data)
return IRQ_NONE;
}
-static int __nvme_poll(struct nvme_queue *nvmeq, unsigned int tag)
+/*
+ * Poll for completions any queue, including those not dedicated to polling.
+ * Can be called from any context.
+ */
+static int nvme_poll_irqdisable(struct nvme_queue *nvmeq, unsigned int tag)
{
+ struct pci_dev *pdev = to_pci_dev(nvmeq->dev->dev);
u16 start, end;
- bool found;
+ int found;
- if (!nvme_cqe_pending(nvmeq))
- return 0;
-
- spin_lock_irq(&nvmeq->cq_lock);
- found = nvme_process_cq(nvmeq, &start, &end, tag);
- spin_unlock_irq(&nvmeq->cq_lock);
+ /*
+ * For a poll queue we need to protect against the polling thread
+ * using the CQ lock. For normal interrupt driven threads we have
+ * to disable the interrupt to avoid racing with it.
+ */
+ if (nvmeq->cq_vector == -1) {
+ spin_lock(&nvmeq->cq_poll_lock);
+ found = nvme_process_cq(nvmeq, &start, &end, tag);
+ spin_unlock(&nvmeq->cq_poll_lock);
+ } else {
+ disable_irq(pci_irq_vector(pdev, nvmeq->cq_vector));
+ found = nvme_process_cq(nvmeq, &start, &end, tag);
+ enable_irq(pci_irq_vector(pdev, nvmeq->cq_vector));
+ }
nvme_complete_cqes(nvmeq, start, end);
return found;
}
-static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
+static int nvme_poll(struct blk_mq_hw_ctx *hctx)
{
struct nvme_queue *nvmeq = hctx->driver_data;
+ u16 start, end;
+ bool found;
+
+ if (!nvme_cqe_pending(nvmeq))
+ return 0;
+
+ spin_lock(&nvmeq->cq_poll_lock);
+ found = nvme_process_cq(nvmeq, &start, &end, -1);
+ spin_unlock(&nvmeq->cq_poll_lock);
- return __nvme_poll(nvmeq, tag);
+ nvme_complete_cqes(nvmeq, start, end);
+ return found;
}
static void nvme_pci_submit_async_event(struct nvme_ctrl *ctrl)
@@ -998,7 +1131,7 @@ static void nvme_pci_submit_async_event(struct nvme_ctrl *ctrl)
memset(&c, 0, sizeof(c));
c.common.opcode = nvme_admin_async_event;
c.common.command_id = NVME_AQ_BLK_MQ_DEPTH;
- nvme_submit_cmd(nvmeq, &c);
+ nvme_submit_cmd(nvmeq, &c, true);
}
static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
@@ -1016,7 +1149,10 @@ static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid,
struct nvme_queue *nvmeq, s16 vector)
{
struct nvme_command c;
- int flags = NVME_QUEUE_PHYS_CONTIG | NVME_CQ_IRQ_ENABLED;
+ int flags = NVME_QUEUE_PHYS_CONTIG;
+
+ if (vector != -1)
+ flags |= NVME_CQ_IRQ_ENABLED;
/*
* Note: we (ab)use the fact that the prp fields survive if no data
@@ -1028,7 +1164,10 @@ static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid,
c.create_cq.cqid = cpu_to_le16(qid);
c.create_cq.qsize = cpu_to_le16(nvmeq->q_depth - 1);
c.create_cq.cq_flags = cpu_to_le16(flags);
- c.create_cq.irq_vector = cpu_to_le16(vector);
+ if (vector != -1)
+ c.create_cq.irq_vector = cpu_to_le16(vector);
+ else
+ c.create_cq.irq_vector = 0;
return nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0);
}
@@ -1157,7 +1296,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
/*
* Did we miss an interrupt?
*/
- if (__nvme_poll(nvmeq, req->tag)) {
+ if (nvme_poll_irqdisable(nvmeq, req->tag)) {
dev_warn(dev->ctrl.device,
"I/O %d QID %d timeout, completion polled\n",
req->tag, nvmeq->qid);
@@ -1237,17 +1376,15 @@ static void nvme_free_queue(struct nvme_queue *nvmeq)
{
dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth),
(void *)nvmeq->cqes, nvmeq->cq_dma_addr);
+ if (!nvmeq->sq_cmds)
+ return;
- if (nvmeq->sq_cmds) {
- if (nvmeq->sq_cmds_is_io)
- pci_free_p2pmem(to_pci_dev(nvmeq->q_dmadev),
- nvmeq->sq_cmds,
- SQ_SIZE(nvmeq->q_depth));
- else
- dma_free_coherent(nvmeq->q_dmadev,
- SQ_SIZE(nvmeq->q_depth),
- nvmeq->sq_cmds,
- nvmeq->sq_dma_addr);
+ if (test_and_clear_bit(NVMEQ_SQ_CMB, &nvmeq->flags)) {
+ pci_free_p2pmem(to_pci_dev(nvmeq->q_dmadev),
+ nvmeq->sq_cmds, SQ_SIZE(nvmeq->q_depth));
+ } else {
+ dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth),
+ nvmeq->sq_cmds, nvmeq->sq_dma_addr);
}
}
@@ -1267,47 +1404,32 @@ static void nvme_free_queues(struct nvme_dev *dev, int lowest)
*/
static int nvme_suspend_queue(struct nvme_queue *nvmeq)
{
- int vector;
-
- spin_lock_irq(&nvmeq->cq_lock);
- if (nvmeq->cq_vector == -1) {
- spin_unlock_irq(&nvmeq->cq_lock);
+ if (!test_and_clear_bit(NVMEQ_ENABLED, &nvmeq->flags))
return 1;
- }
- vector = nvmeq->cq_vector;
- nvmeq->dev->online_queues--;
- nvmeq->cq_vector = -1;
- spin_unlock_irq(&nvmeq->cq_lock);
- /*
- * Ensure that nvme_queue_rq() sees it ->cq_vector == -1 without
- * having to grab the lock.
- */
+ /* ensure that nvme_queue_rq() sees NVMEQ_ENABLED cleared */
mb();
+ nvmeq->dev->online_queues--;
if (!nvmeq->qid && nvmeq->dev->ctrl.admin_q)
blk_mq_quiesce_queue(nvmeq->dev->ctrl.admin_q);
-
- pci_free_irq(to_pci_dev(nvmeq->dev->dev), vector, nvmeq);
-
+ if (nvmeq->cq_vector == -1)
+ return 0;
+ pci_free_irq(to_pci_dev(nvmeq->dev->dev), nvmeq->cq_vector, nvmeq);
+ nvmeq->cq_vector = -1;
return 0;
}
static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown)
{
struct nvme_queue *nvmeq = &dev->queues[0];
- u16 start, end;
if (shutdown)
nvme_shutdown_ctrl(&dev->ctrl);
else
nvme_disable_ctrl(&dev->ctrl, dev->ctrl.cap);
- spin_lock_irq(&nvmeq->cq_lock);
- nvme_process_cq(nvmeq, &start, &end, -1);
- spin_unlock_irq(&nvmeq->cq_lock);
-
- nvme_complete_cqes(nvmeq, start, end);
+ nvme_poll_irqdisable(nvmeq, -1);
}
static int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues,
@@ -1343,15 +1465,14 @@ static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq,
nvmeq->sq_cmds = pci_alloc_p2pmem(pdev, SQ_SIZE(depth));
nvmeq->sq_dma_addr = pci_p2pmem_virt_to_bus(pdev,
nvmeq->sq_cmds);
- nvmeq->sq_cmds_is_io = true;
- }
-
- if (!nvmeq->sq_cmds) {
- nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth),
- &nvmeq->sq_dma_addr, GFP_KERNEL);
- nvmeq->sq_cmds_is_io = false;
+ if (nvmeq->sq_dma_addr) {
+ set_bit(NVMEQ_SQ_CMB, &nvmeq->flags);
+ return 0;
+ }
}
+ nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth),
+ &nvmeq->sq_dma_addr, GFP_KERNEL);
if (!nvmeq->sq_cmds)
return -ENOMEM;
return 0;
@@ -1375,7 +1496,7 @@ static int nvme_alloc_queue(struct nvme_dev *dev, int qid, int depth)
nvmeq->q_dmadev = dev->dev;
nvmeq->dev = dev;
spin_lock_init(&nvmeq->sq_lock);
- spin_lock_init(&nvmeq->cq_lock);
+ spin_lock_init(&nvmeq->cq_poll_lock);
nvmeq->cq_head = 0;
nvmeq->cq_phase = 1;
nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
@@ -1411,28 +1532,34 @@ static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid)
{
struct nvme_dev *dev = nvmeq->dev;
- spin_lock_irq(&nvmeq->cq_lock);
nvmeq->sq_tail = 0;
+ nvmeq->last_sq_tail = 0;
nvmeq->cq_head = 0;
nvmeq->cq_phase = 1;
nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
memset((void *)nvmeq->cqes, 0, CQ_SIZE(nvmeq->q_depth));
nvme_dbbuf_init(dev, nvmeq, qid);
dev->online_queues++;
- spin_unlock_irq(&nvmeq->cq_lock);
+ wmb(); /* ensure the first interrupt sees the initialization */
}
-static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
+static int nvme_create_queue(struct nvme_queue *nvmeq, int qid, bool polled)
{
struct nvme_dev *dev = nvmeq->dev;
int result;
s16 vector;
+ clear_bit(NVMEQ_DELETE_ERROR, &nvmeq->flags);
+
/*
* A queue's vector matches the queue identifier unless the controller
* has only one vector available.
*/
- vector = dev->num_vecs == 1 ? 0 : qid;
+ if (!polled)
+ vector = dev->num_vecs == 1 ? 0 : qid;
+ else
+ vector = -1;
+
result = adapter_alloc_cq(dev, qid, nvmeq, vector);
if (result)
return result;
@@ -1443,17 +1570,16 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
else if (result)
goto release_cq;
- /*
- * Set cq_vector after alloc cq/sq, otherwise nvme_suspend_queue will
- * invoke free_irq for it and cause a 'Trying to free already-free IRQ
- * xxx' warning if the create CQ/SQ command times out.
- */
nvmeq->cq_vector = vector;
nvme_init_queue(nvmeq, qid);
- result = queue_request_irq(nvmeq);
- if (result < 0)
- goto release_sq;
+ if (vector != -1) {
+ result = queue_request_irq(nvmeq);
+ if (result < 0)
+ goto release_sq;
+ }
+
+ set_bit(NVMEQ_ENABLED, &nvmeq->flags);
return result;
release_sq:
@@ -1477,6 +1603,7 @@ static const struct blk_mq_ops nvme_mq_admin_ops = {
static const struct blk_mq_ops nvme_mq_ops = {
.queue_rq = nvme_queue_rq,
.complete = nvme_pci_complete_rq,
+ .commit_rqs = nvme_commit_rqs,
.init_hctx = nvme_init_hctx,
.init_request = nvme_init_request,
.map_queues = nvme_pci_map_queues,
@@ -1602,12 +1729,13 @@ static int nvme_pci_configure_admin_queue(struct nvme_dev *dev)
return result;
}
+ set_bit(NVMEQ_ENABLED, &nvmeq->flags);
return result;
}
static int nvme_create_io_queues(struct nvme_dev *dev)
{
- unsigned i, max;
+ unsigned i, max, rw_queues;
int ret = 0;
for (i = dev->ctrl.queue_count; i <= dev->max_qid; i++) {
@@ -1618,8 +1746,17 @@ static int nvme_create_io_queues(struct nvme_dev *dev)
}
max = min(dev->max_qid, dev->ctrl.queue_count - 1);
+ if (max != 1 && dev->io_queues[HCTX_TYPE_POLL]) {
+ rw_queues = dev->io_queues[HCTX_TYPE_DEFAULT] +
+ dev->io_queues[HCTX_TYPE_READ];
+ } else {
+ rw_queues = max;
+ }
+
for (i = dev->online_queues; i <= max; i++) {
- ret = nvme_create_queue(&dev->queues[i], i);
+ bool polled = i > rw_queues;
+
+ ret = nvme_create_queue(&dev->queues[i], i, polled);
if (ret)
break;
}
@@ -1891,6 +2028,110 @@ static int nvme_setup_host_mem(struct nvme_dev *dev)
return ret;
}
+static void nvme_calc_io_queues(struct nvme_dev *dev, unsigned int irq_queues)
+{
+ unsigned int this_w_queues = write_queues;
+
+ /*
+ * Setup read/write queue split
+ */
+ if (irq_queues == 1) {
+ dev->io_queues[HCTX_TYPE_DEFAULT] = 1;
+ dev->io_queues[HCTX_TYPE_READ] = 0;
+ return;
+ }
+
+ /*
+ * If 'write_queues' is set, ensure it leaves room for at least
+ * one read queue
+ */
+ if (this_w_queues >= irq_queues)
+ this_w_queues = irq_queues - 1;
+
+ /*
+ * If 'write_queues' is set to zero, reads and writes will share
+ * a queue set.
+ */
+ if (!this_w_queues) {
+ dev->io_queues[HCTX_TYPE_DEFAULT] = irq_queues;
+ dev->io_queues[HCTX_TYPE_READ] = 0;
+ } else {
+ dev->io_queues[HCTX_TYPE_DEFAULT] = this_w_queues;
+ dev->io_queues[HCTX_TYPE_READ] = irq_queues - this_w_queues;
+ }
+}
+
+static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues)
+{
+ struct pci_dev *pdev = to_pci_dev(dev->dev);
+ int irq_sets[2];
+ struct irq_affinity affd = {
+ .pre_vectors = 1,
+ .nr_sets = ARRAY_SIZE(irq_sets),
+ .sets = irq_sets,
+ };
+ int result = 0;
+ unsigned int irq_queues, this_p_queues;
+
+ /*
+ * Poll queues don't need interrupts, but we need at least one IO
+ * queue left over for non-polled IO.
+ */
+ this_p_queues = poll_queues;
+ if (this_p_queues >= nr_io_queues) {
+ this_p_queues = nr_io_queues - 1;
+ irq_queues = 1;
+ } else {
+ irq_queues = nr_io_queues - this_p_queues;
+ }
+ dev->io_queues[HCTX_TYPE_POLL] = this_p_queues;
+
+ /*
+ * For irq sets, we have to ask for minvec == maxvec. This passes
+ * any reduction back to us, so we can adjust our queue counts and
+ * IRQ vector needs.
+ */
+ do {
+ nvme_calc_io_queues(dev, irq_queues);
+ irq_sets[0] = dev->io_queues[HCTX_TYPE_DEFAULT];
+ irq_sets[1] = dev->io_queues[HCTX_TYPE_READ];
+ if (!irq_sets[1])
+ affd.nr_sets = 1;
+
+ /*
+ * If we got a failure and we're down to asking for just
+ * 1 + 1 queues, just ask for a single vector. We'll share
+ * that between the single IO queue and the admin queue.
+ */
+ if (result >= 0 && irq_queues > 1)
+ irq_queues = irq_sets[0] + irq_sets[1] + 1;
+
+ result = pci_alloc_irq_vectors_affinity(pdev, irq_queues,
+ irq_queues,
+ PCI_IRQ_ALL_TYPES | PCI_IRQ_AFFINITY, &affd);
+
+ /*
+ * Need to reduce our vec counts. If we get ENOSPC, the
+ * platform should support mulitple vecs, we just need
+ * to decrease our ask. If we get EINVAL, the platform
+ * likely does not. Back down to ask for just one vector.
+ */
+ if (result == -ENOSPC) {
+ irq_queues--;
+ if (!irq_queues)
+ return result;
+ continue;
+ } else if (result == -EINVAL) {
+ irq_queues = 1;
+ continue;
+ } else if (result <= 0)
+ return -EIO;
+ break;
+ } while (1);
+
+ return result;
+}
+
static int nvme_setup_io_queues(struct nvme_dev *dev)
{
struct nvme_queue *adminq = &dev->queues[0];
@@ -1898,17 +2139,15 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
int result, nr_io_queues;
unsigned long size;
- struct irq_affinity affd = {
- .pre_vectors = 1
- };
-
- nr_io_queues = num_possible_cpus();
+ nr_io_queues = max_io_queues();
result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues);
if (result < 0)
return result;
if (nr_io_queues == 0)
return 0;
+
+ clear_bit(NVMEQ_ENABLED, &adminq->flags);
if (dev->cmb_use_sqes) {
result = nvme_cmb_qdepth(dev, nr_io_queues,
@@ -1937,12 +2176,19 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
* setting up the full range we need.
*/
pci_free_irq_vectors(pdev);
- result = pci_alloc_irq_vectors_affinity(pdev, 1, nr_io_queues + 1,
- PCI_IRQ_ALL_TYPES | PCI_IRQ_AFFINITY, &affd);
+
+ result = nvme_setup_irqs(dev, nr_io_queues);
if (result <= 0)
return -EIO;
+
dev->num_vecs = result;
- dev->max_qid = max(result - 1, 1);
+ result = max(result - 1, 1);
+ dev->max_qid = result + dev->io_queues[HCTX_TYPE_POLL];
+
+ dev_info(dev->ctrl.device, "%d/%d/%d default/read/poll queues\n",
+ dev->io_queues[HCTX_TYPE_DEFAULT],
+ dev->io_queues[HCTX_TYPE_READ],
+ dev->io_queues[HCTX_TYPE_POLL]);
/*
* Should investigate if there's a performance win from allocating
@@ -1956,6 +2202,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
adminq->cq_vector = -1;
return result;
}
+ set_bit(NVMEQ_ENABLED, &adminq->flags);
return nvme_create_io_queues(dev);
}
@@ -1964,23 +2211,15 @@ static void nvme_del_queue_end(struct request *req, blk_status_t error)
struct nvme_queue *nvmeq = req->end_io_data;
blk_mq_free_request(req);
- complete(&nvmeq->dev->ioq_wait);
+ complete(&nvmeq->delete_done);
}
static void nvme_del_cq_end(struct request *req, blk_status_t error)
{
struct nvme_queue *nvmeq = req->end_io_data;
- u16 start, end;
-
- if (!error) {
- unsigned long flags;
- spin_lock_irqsave(&nvmeq->cq_lock, flags);
- nvme_process_cq(nvmeq, &start, &end, -1);
- spin_unlock_irqrestore(&nvmeq->cq_lock, flags);
-
- nvme_complete_cqes(nvmeq, start, end);
- }
+ if (error)
+ set_bit(NVMEQ_DELETE_ERROR, &nvmeq->flags);
nvme_del_queue_end(req, error);
}
@@ -2002,37 +2241,44 @@ static int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode)
req->timeout = ADMIN_TIMEOUT;
req->end_io_data = nvmeq;
+ init_completion(&nvmeq->delete_done);
blk_execute_rq_nowait(q, NULL, req, false,
opcode == nvme_admin_delete_cq ?
nvme_del_cq_end : nvme_del_queue_end);
return 0;
}
-static void nvme_disable_io_queues(struct nvme_dev *dev)
+static bool nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode)
{
- int pass, queues = dev->online_queues - 1;
+ int nr_queues = dev->online_queues - 1, sent = 0;
unsigned long timeout;
- u8 opcode = nvme_admin_delete_sq;
-
- for (pass = 0; pass < 2; pass++) {
- int sent = 0, i = queues;
- reinit_completion(&dev->ioq_wait);
retry:
- timeout = ADMIN_TIMEOUT;
- for (; i > 0; i--, sent++)
- if (nvme_delete_queue(&dev->queues[i], opcode))
- break;
-
- while (sent--) {
- timeout = wait_for_completion_io_timeout(&dev->ioq_wait, timeout);
- if (timeout == 0)
- return;
- if (i)
- goto retry;
- }
- opcode = nvme_admin_delete_cq;
+ timeout = ADMIN_TIMEOUT;
+ while (nr_queues > 0) {
+ if (nvme_delete_queue(&dev->queues[nr_queues], opcode))
+ break;
+ nr_queues--;
+ sent++;
}
+ while (sent) {
+ struct nvme_queue *nvmeq = &dev->queues[nr_queues + sent];
+
+ timeout = wait_for_completion_io_timeout(&nvmeq->delete_done,
+ timeout);
+ if (timeout == 0)
+ return false;
+
+ /* handle any remaining CQEs */
+ if (opcode == nvme_admin_delete_cq &&
+ !test_bit(NVMEQ_DELETE_ERROR, &nvmeq->flags))
+ nvme_poll_irqdisable(nvmeq, -1);
+
+ sent--;
+ if (nr_queues)
+ goto retry;
+ }
+ return true;
}
/*
@@ -2045,6 +2291,10 @@ static int nvme_dev_add(struct nvme_dev *dev)
if (!dev->ctrl.tagset) {
dev->tagset.ops = &nvme_mq_ops;
dev->tagset.nr_hw_queues = dev->online_queues - 1;
+ dev->tagset.nr_maps = 2; /* default + read */
+ if (dev->io_queues[HCTX_TYPE_POLL])
+ dev->tagset.nr_maps++;
+ dev->tagset.nr_maps = HCTX_MAX_TYPES;
dev->tagset.timeout = NVME_IO_TIMEOUT;
dev->tagset.numa_node = dev_to_node(dev->dev);
dev->tagset.queue_depth =
@@ -2187,7 +2437,8 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
nvme_stop_queues(&dev->ctrl);
if (!dead && dev->ctrl.queue_count > 0) {
- nvme_disable_io_queues(dev);
+ if (nvme_disable_io_queues(dev, nvme_admin_delete_sq))
+ nvme_disable_io_queues(dev, nvme_admin_delete_cq);
nvme_disable_admin_queue(dev, shutdown);
}
for (i = dev->ctrl.queue_count - 1; i >= 0; i--)
@@ -2491,8 +2742,8 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (!dev)
return -ENOMEM;
- dev->queues = kcalloc_node(num_possible_cpus() + 1,
- sizeof(struct nvme_queue), GFP_KERNEL, node);
+ dev->queues = kcalloc_node(max_queue_count(), sizeof(struct nvme_queue),
+ GFP_KERNEL, node);
if (!dev->queues)
goto free;
@@ -2506,7 +2757,6 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
INIT_WORK(&dev->ctrl.reset_work, nvme_reset_work);
INIT_WORK(&dev->remove_work, nvme_remove_dead_ctrl_work);
mutex_init(&dev->shutdown_lock);
- init_completion(&dev->ioq_wait);
result = nvme_setup_prp_pools(dev);
if (result)
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index ab6ec7295bf9..0a2fd2949ad7 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -162,6 +162,13 @@ static inline int nvme_rdma_queue_idx(struct nvme_rdma_queue *queue)
return queue - queue->ctrl->queues;
}
+static bool nvme_rdma_poll_queue(struct nvme_rdma_queue *queue)
+{
+ return nvme_rdma_queue_idx(queue) >
+ queue->ctrl->ctrl.opts->nr_io_queues +
+ queue->ctrl->ctrl.opts->nr_write_queues;
+}
+
static inline size_t nvme_rdma_inline_data_size(struct nvme_rdma_queue *queue)
{
return queue->cmnd_capsule_len - sizeof(struct nvme_command);
@@ -440,6 +447,7 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
const int send_wr_factor = 3; /* MR, SEND, INV */
const int cq_factor = send_wr_factor + 1; /* + RECV */
int comp_vector, idx = nvme_rdma_queue_idx(queue);
+ enum ib_poll_context poll_ctx;
int ret;
queue->device = nvme_rdma_find_get_device(queue->cm_id);
@@ -456,10 +464,16 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
*/
comp_vector = idx == 0 ? idx : idx - 1;
+ /* Polling queues need direct cq polling context */
+ if (nvme_rdma_poll_queue(queue))
+ poll_ctx = IB_POLL_DIRECT;
+ else
+ poll_ctx = IB_POLL_SOFTIRQ;
+
/* +1 for ib_stop_cq */
queue->ib_cq = ib_alloc_cq(ibdev, queue,
cq_factor * queue->queue_size + 1,
- comp_vector, IB_POLL_SOFTIRQ);
+ comp_vector, poll_ctx);
if (IS_ERR(queue->ib_cq)) {
ret = PTR_ERR(queue->ib_cq);
goto out_put_dev;
@@ -595,15 +609,17 @@ static void nvme_rdma_stop_io_queues(struct nvme_rdma_ctrl *ctrl)
static int nvme_rdma_start_queue(struct nvme_rdma_ctrl *ctrl, int idx)
{
+ struct nvme_rdma_queue *queue = &ctrl->queues[idx];
+ bool poll = nvme_rdma_poll_queue(queue);
int ret;
if (idx)
- ret = nvmf_connect_io_queue(&ctrl->ctrl, idx);
+ ret = nvmf_connect_io_queue(&ctrl->ctrl, idx, poll);
else
ret = nvmf_connect_admin_queue(&ctrl->ctrl);
if (!ret)
- set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[idx].flags);
+ set_bit(NVME_RDMA_Q_LIVE, &queue->flags);
else
dev_info(ctrl->ctrl.device,
"failed to connect queue: %d ret=%d\n", idx, ret);
@@ -645,6 +661,9 @@ static int nvme_rdma_alloc_io_queues(struct nvme_rdma_ctrl *ctrl)
nr_io_queues = min_t(unsigned int, nr_io_queues,
ibdev->num_comp_vectors);
+ nr_io_queues += min(opts->nr_write_queues, num_online_cpus());
+ nr_io_queues += min(opts->nr_poll_queues, num_online_cpus());
+
ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues);
if (ret)
return ret;
@@ -694,7 +713,7 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
set->ops = &nvme_rdma_admin_mq_ops;
set->queue_depth = NVME_AQ_MQ_TAG_DEPTH;
set->reserved_tags = 2; /* connect + keep-alive */
- set->numa_node = NUMA_NO_NODE;
+ set->numa_node = nctrl->numa_node;
set->cmd_size = sizeof(struct nvme_rdma_request) +
SG_CHUNK_SIZE * sizeof(struct scatterlist);
set->driver_data = ctrl;
@@ -707,13 +726,14 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
set->ops = &nvme_rdma_mq_ops;
set->queue_depth = nctrl->sqsize + 1;
set->reserved_tags = 1; /* fabric connect */
- set->numa_node = NUMA_NO_NODE;
+ set->numa_node = nctrl->numa_node;
set->flags = BLK_MQ_F_SHOULD_MERGE;
set->cmd_size = sizeof(struct nvme_rdma_request) +
SG_CHUNK_SIZE * sizeof(struct scatterlist);
set->driver_data = ctrl;
set->nr_hw_queues = nctrl->queue_count - 1;
set->timeout = NVME_IO_TIMEOUT;
+ set->nr_maps = nctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2;
}
ret = blk_mq_alloc_tag_set(set);
@@ -763,6 +783,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
return error;
ctrl->device = ctrl->queues[0].device;
+ ctrl->ctrl.numa_node = dev_to_node(ctrl->device->dev->dma_device);
ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev);
@@ -1411,12 +1432,11 @@ static void nvme_rdma_submit_async_event(struct nvme_ctrl *arg)
WARN_ON_ONCE(ret);
}
-static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
- struct nvme_completion *cqe, struct ib_wc *wc, int tag)
+static void nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
+ struct nvme_completion *cqe, struct ib_wc *wc)
{
struct request *rq;
struct nvme_rdma_request *req;
- int ret = 0;
rq = blk_mq_tag_to_rq(nvme_rdma_tagset(queue), cqe->command_id);
if (!rq) {
@@ -1424,7 +1444,7 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
"tag 0x%x on QP %#x not found\n",
cqe->command_id, queue->qp->qp_num);
nvme_rdma_error_recovery(queue->ctrl);
- return ret;
+ return;
}
req = blk_mq_rq_to_pdu(rq);
@@ -1439,6 +1459,8 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
nvme_rdma_error_recovery(queue->ctrl);
}
} else if (req->mr) {
+ int ret;
+
ret = nvme_rdma_inv_rkey(queue, req);
if (unlikely(ret < 0)) {
dev_err(queue->ctrl->ctrl.device,
@@ -1447,19 +1469,14 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
nvme_rdma_error_recovery(queue->ctrl);
}
/* the local invalidation completion will end the request */
- return 0;
+ return;
}
- if (refcount_dec_and_test(&req->ref)) {
- if (rq->tag == tag)
- ret = 1;
+ if (refcount_dec_and_test(&req->ref))
nvme_end_request(rq, req->status, req->result);
- }
-
- return ret;
}
-static int __nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc, int tag)
+static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc)
{
struct nvme_rdma_qe *qe =
container_of(wc->wr_cqe, struct nvme_rdma_qe, cqe);
@@ -1467,11 +1484,10 @@ static int __nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc, int tag)
struct ib_device *ibdev = queue->device->dev;
struct nvme_completion *cqe = qe->data;
const size_t len = sizeof(struct nvme_completion);
- int ret = 0;
if (unlikely(wc->status != IB_WC_SUCCESS)) {
nvme_rdma_wr_error(cq, wc, "RECV");
- return 0;
+ return;
}
ib_dma_sync_single_for_cpu(ibdev, qe->dma, len, DMA_FROM_DEVICE);
@@ -1486,16 +1502,10 @@ static int __nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc, int tag)
nvme_complete_async_event(&queue->ctrl->ctrl, cqe->status,
&cqe->result);
else
- ret = nvme_rdma_process_nvme_rsp(queue, cqe, wc, tag);
+ nvme_rdma_process_nvme_rsp(queue, cqe, wc);
ib_dma_sync_single_for_device(ibdev, qe->dma, len, DMA_FROM_DEVICE);
nvme_rdma_post_recv(queue, qe);
- return ret;
-}
-
-static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc)
-{
- __nvme_rdma_recv_done(cq, wc, -1);
}
static int nvme_rdma_conn_established(struct nvme_rdma_queue *queue)
@@ -1749,25 +1759,11 @@ err:
return BLK_STS_IOERR;
}
-static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
+static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx)
{
struct nvme_rdma_queue *queue = hctx->driver_data;
- struct ib_cq *cq = queue->ib_cq;
- struct ib_wc wc;
- int found = 0;
-
- while (ib_poll_cq(cq, 1, &wc) > 0) {
- struct ib_cqe *cqe = wc.wr_cqe;
-
- if (cqe) {
- if (cqe->done == nvme_rdma_recv_done)
- found |= __nvme_rdma_recv_done(cq, &wc, tag);
- else
- cqe->done(cq, &wc);
- }
- }
- return found;
+ return ib_process_cq_direct(queue->ib_cq, -1);
}
static void nvme_rdma_complete_rq(struct request *rq)
@@ -1782,7 +1778,36 @@ static int nvme_rdma_map_queues(struct blk_mq_tag_set *set)
{
struct nvme_rdma_ctrl *ctrl = set->driver_data;
- return blk_mq_rdma_map_queues(set, ctrl->device->dev, 0);
+ set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
+ set->map[HCTX_TYPE_READ].nr_queues = ctrl->ctrl.opts->nr_io_queues;
+ if (ctrl->ctrl.opts->nr_write_queues) {
+ /* separate read/write queues */
+ set->map[HCTX_TYPE_DEFAULT].nr_queues =
+ ctrl->ctrl.opts->nr_write_queues;
+ set->map[HCTX_TYPE_READ].queue_offset =
+ ctrl->ctrl.opts->nr_write_queues;
+ } else {
+ /* mixed read/write queues */
+ set->map[HCTX_TYPE_DEFAULT].nr_queues =
+ ctrl->ctrl.opts->nr_io_queues;
+ set->map[HCTX_TYPE_READ].queue_offset = 0;
+ }
+ blk_mq_rdma_map_queues(&set->map[HCTX_TYPE_DEFAULT],
+ ctrl->device->dev, 0);
+ blk_mq_rdma_map_queues(&set->map[HCTX_TYPE_READ],
+ ctrl->device->dev, 0);
+
+ if (ctrl->ctrl.opts->nr_poll_queues) {
+ set->map[HCTX_TYPE_POLL].nr_queues =
+ ctrl->ctrl.opts->nr_poll_queues;
+ set->map[HCTX_TYPE_POLL].queue_offset =
+ ctrl->ctrl.opts->nr_io_queues;
+ if (ctrl->ctrl.opts->nr_write_queues)
+ set->map[HCTX_TYPE_POLL].queue_offset +=
+ ctrl->ctrl.opts->nr_write_queues;
+ blk_mq_map_queues(&set->map[HCTX_TYPE_POLL]);
+ }
+ return 0;
}
static const struct blk_mq_ops nvme_rdma_mq_ops = {
@@ -1791,9 +1816,9 @@ static const struct blk_mq_ops nvme_rdma_mq_ops = {
.init_request = nvme_rdma_init_request,
.exit_request = nvme_rdma_exit_request,
.init_hctx = nvme_rdma_init_hctx,
- .poll = nvme_rdma_poll,
.timeout = nvme_rdma_timeout,
.map_queues = nvme_rdma_map_queues,
+ .poll = nvme_rdma_poll,
};
static const struct blk_mq_ops nvme_rdma_admin_mq_ops = {
@@ -1938,7 +1963,8 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
INIT_WORK(&ctrl->err_work, nvme_rdma_error_recovery_work);
INIT_WORK(&ctrl->ctrl.reset_work, nvme_rdma_reset_ctrl_work);
- ctrl->ctrl.queue_count = opts->nr_io_queues + 1; /* +1 for admin queue */
+ ctrl->ctrl.queue_count = opts->nr_io_queues + opts->nr_write_queues +
+ opts->nr_poll_queues + 1;
ctrl->ctrl.sqsize = opts->queue_size - 1;
ctrl->ctrl.kato = opts->kato;
@@ -1989,7 +2015,8 @@ static struct nvmf_transport_ops nvme_rdma_transport = {
.module = THIS_MODULE,
.required_opts = NVMF_OPT_TRADDR,
.allowed_opts = NVMF_OPT_TRSVCID | NVMF_OPT_RECONNECT_DELAY |
- NVMF_OPT_HOST_TRADDR | NVMF_OPT_CTRL_LOSS_TMO,
+ NVMF_OPT_HOST_TRADDR | NVMF_OPT_CTRL_LOSS_TMO |
+ NVMF_OPT_NR_WRITE_QUEUES | NVMF_OPT_NR_POLL_QUEUES,
.create_ctrl = nvme_rdma_create_ctrl,
};
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
new file mode 100644
index 000000000000..de174912445e
--- /dev/null
+++ b/drivers/nvme/host/tcp.c
@@ -0,0 +1,2278 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * NVMe over Fabrics TCP host.
+ * Copyright (c) 2018 Lightbits Labs. All rights reserved.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/nvme-tcp.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+#include <linux/blk-mq.h>
+#include <crypto/hash.h>
+
+#include "nvme.h"
+#include "fabrics.h"
+
+struct nvme_tcp_queue;
+
+enum nvme_tcp_send_state {
+ NVME_TCP_SEND_CMD_PDU = 0,
+ NVME_TCP_SEND_H2C_PDU,
+ NVME_TCP_SEND_DATA,
+ NVME_TCP_SEND_DDGST,
+};
+
+struct nvme_tcp_request {
+ struct nvme_request req;
+ void *pdu;
+ struct nvme_tcp_queue *queue;
+ u32 data_len;
+ u32 pdu_len;
+ u32 pdu_sent;
+ u16 ttag;
+ struct list_head entry;
+ __le32 ddgst;
+
+ struct bio *curr_bio;
+ struct iov_iter iter;
+
+ /* send state */
+ size_t offset;
+ size_t data_sent;
+ enum nvme_tcp_send_state state;
+};
+
+enum nvme_tcp_queue_flags {
+ NVME_TCP_Q_ALLOCATED = 0,
+ NVME_TCP_Q_LIVE = 1,
+};
+
+enum nvme_tcp_recv_state {
+ NVME_TCP_RECV_PDU = 0,
+ NVME_TCP_RECV_DATA,
+ NVME_TCP_RECV_DDGST,
+};
+
+struct nvme_tcp_ctrl;
+struct nvme_tcp_queue {
+ struct socket *sock;
+ struct work_struct io_work;
+ int io_cpu;
+
+ spinlock_t lock;
+ struct list_head send_list;
+
+ /* recv state */
+ void *pdu;
+ int pdu_remaining;
+ int pdu_offset;
+ size_t data_remaining;
+ size_t ddgst_remaining;
+
+ /* send state */
+ struct nvme_tcp_request *request;
+
+ int queue_size;
+ size_t cmnd_capsule_len;
+ struct nvme_tcp_ctrl *ctrl;
+ unsigned long flags;
+ bool rd_enabled;
+
+ bool hdr_digest;
+ bool data_digest;
+ struct ahash_request *rcv_hash;
+ struct ahash_request *snd_hash;
+ __le32 exp_ddgst;
+ __le32 recv_ddgst;
+
+ struct page_frag_cache pf_cache;
+
+ void (*state_change)(struct sock *);
+ void (*data_ready)(struct sock *);
+ void (*write_space)(struct sock *);
+};
+
+struct nvme_tcp_ctrl {
+ /* read only in the hot path */
+ struct nvme_tcp_queue *queues;
+ struct blk_mq_tag_set tag_set;
+
+ /* other member variables */
+ struct list_head list;
+ struct blk_mq_tag_set admin_tag_set;
+ struct sockaddr_storage addr;
+ struct sockaddr_storage src_addr;
+ struct nvme_ctrl ctrl;
+
+ struct work_struct err_work;
+ struct delayed_work connect_work;
+ struct nvme_tcp_request async_req;
+};
+
+static LIST_HEAD(nvme_tcp_ctrl_list);
+static DEFINE_MUTEX(nvme_tcp_ctrl_mutex);
+static struct workqueue_struct *nvme_tcp_wq;
+static struct blk_mq_ops nvme_tcp_mq_ops;
+static struct blk_mq_ops nvme_tcp_admin_mq_ops;
+
+static inline struct nvme_tcp_ctrl *to_tcp_ctrl(struct nvme_ctrl *ctrl)
+{
+ return container_of(ctrl, struct nvme_tcp_ctrl, ctrl);
+}
+
+static inline int nvme_tcp_queue_id(struct nvme_tcp_queue *queue)
+{
+ return queue - queue->ctrl->queues;
+}
+
+static inline struct blk_mq_tags *nvme_tcp_tagset(struct nvme_tcp_queue *queue)
+{
+ u32 queue_idx = nvme_tcp_queue_id(queue);
+
+ if (queue_idx == 0)
+ return queue->ctrl->admin_tag_set.tags[queue_idx];
+ return queue->ctrl->tag_set.tags[queue_idx - 1];
+}
+
+static inline u8 nvme_tcp_hdgst_len(struct nvme_tcp_queue *queue)
+{
+ return queue->hdr_digest ? NVME_TCP_DIGEST_LENGTH : 0;
+}
+
+static inline u8 nvme_tcp_ddgst_len(struct nvme_tcp_queue *queue)
+{
+ return queue->data_digest ? NVME_TCP_DIGEST_LENGTH : 0;
+}
+
+static inline size_t nvme_tcp_inline_data_size(struct nvme_tcp_queue *queue)
+{
+ return queue->cmnd_capsule_len - sizeof(struct nvme_command);
+}
+
+static inline bool nvme_tcp_async_req(struct nvme_tcp_request *req)
+{
+ return req == &req->queue->ctrl->async_req;
+}
+
+static inline bool nvme_tcp_has_inline_data(struct nvme_tcp_request *req)
+{
+ struct request *rq;
+ unsigned int bytes;
+
+ if (unlikely(nvme_tcp_async_req(req)))
+ return false; /* async events don't have a request */
+
+ rq = blk_mq_rq_from_pdu(req);
+ bytes = blk_rq_payload_bytes(rq);
+
+ return rq_data_dir(rq) == WRITE && bytes &&
+ bytes <= nvme_tcp_inline_data_size(req->queue);
+}
+
+static inline struct page *nvme_tcp_req_cur_page(struct nvme_tcp_request *req)
+{
+ return req->iter.bvec->bv_page;
+}
+
+static inline size_t nvme_tcp_req_cur_offset(struct nvme_tcp_request *req)
+{
+ return req->iter.bvec->bv_offset + req->iter.iov_offset;
+}
+
+static inline size_t nvme_tcp_req_cur_length(struct nvme_tcp_request *req)
+{
+ return min_t(size_t, req->iter.bvec->bv_len - req->iter.iov_offset,
+ req->pdu_len - req->pdu_sent);
+}
+
+static inline size_t nvme_tcp_req_offset(struct nvme_tcp_request *req)
+{
+ return req->iter.iov_offset;
+}
+
+static inline size_t nvme_tcp_pdu_data_left(struct nvme_tcp_request *req)
+{
+ return rq_data_dir(blk_mq_rq_from_pdu(req)) == WRITE ?
+ req->pdu_len - req->pdu_sent : 0;
+}
+
+static inline size_t nvme_tcp_pdu_last_send(struct nvme_tcp_request *req,
+ int len)
+{
+ return nvme_tcp_pdu_data_left(req) <= len;
+}
+
+static void nvme_tcp_init_iter(struct nvme_tcp_request *req,
+ unsigned int dir)
+{
+ struct request *rq = blk_mq_rq_from_pdu(req);
+ struct bio_vec *vec;
+ unsigned int size;
+ int nsegs;
+ size_t offset;
+
+ if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) {
+ vec = &rq->special_vec;
+ nsegs = 1;
+ size = blk_rq_payload_bytes(rq);
+ offset = 0;
+ } else {
+ struct bio *bio = req->curr_bio;
+
+ vec = __bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter);
+ nsegs = bio_segments(bio);
+ size = bio->bi_iter.bi_size;
+ offset = bio->bi_iter.bi_bvec_done;
+ }
+
+ iov_iter_bvec(&req->iter, dir, vec, nsegs, size);
+ req->iter.iov_offset = offset;
+}
+
+static inline void nvme_tcp_advance_req(struct nvme_tcp_request *req,
+ int len)
+{
+ req->data_sent += len;
+ req->pdu_sent += len;
+ iov_iter_advance(&req->iter, len);
+ if (!iov_iter_count(&req->iter) &&
+ req->data_sent < req->data_len) {
+ req->curr_bio = req->curr_bio->bi_next;
+ nvme_tcp_init_iter(req, WRITE);
+ }
+}
+
+static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req)
+{
+ struct nvme_tcp_queue *queue = req->queue;
+
+ spin_lock(&queue->lock);
+ list_add_tail(&req->entry, &queue->send_list);
+ spin_unlock(&queue->lock);
+
+ queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
+}
+
+static inline struct nvme_tcp_request *
+nvme_tcp_fetch_request(struct nvme_tcp_queue *queue)
+{
+ struct nvme_tcp_request *req;
+
+ spin_lock(&queue->lock);
+ req = list_first_entry_or_null(&queue->send_list,
+ struct nvme_tcp_request, entry);
+ if (req)
+ list_del(&req->entry);
+ spin_unlock(&queue->lock);
+
+ return req;
+}
+
+static inline void nvme_tcp_ddgst_final(struct ahash_request *hash,
+ __le32 *dgst)
+{
+ ahash_request_set_crypt(hash, NULL, (u8 *)dgst, 0);
+ crypto_ahash_final(hash);
+}
+
+static inline void nvme_tcp_ddgst_update(struct ahash_request *hash,
+ struct page *page, off_t off, size_t len)
+{
+ struct scatterlist sg;
+
+ sg_init_marker(&sg, 1);
+ sg_set_page(&sg, page, len, off);
+ ahash_request_set_crypt(hash, &sg, NULL, len);
+ crypto_ahash_update(hash);
+}
+
+static inline void nvme_tcp_hdgst(struct ahash_request *hash,
+ void *pdu, size_t len)
+{
+ struct scatterlist sg;
+
+ sg_init_one(&sg, pdu, len);
+ ahash_request_set_crypt(hash, &sg, pdu + len, len);
+ crypto_ahash_digest(hash);
+}
+
+static int nvme_tcp_verify_hdgst(struct nvme_tcp_queue *queue,
+ void *pdu, size_t pdu_len)
+{
+ struct nvme_tcp_hdr *hdr = pdu;
+ __le32 recv_digest;
+ __le32 exp_digest;
+
+ if (unlikely(!(hdr->flags & NVME_TCP_F_HDGST))) {
+ dev_err(queue->ctrl->ctrl.device,
+ "queue %d: header digest flag is cleared\n",
+ nvme_tcp_queue_id(queue));
+ return -EPROTO;
+ }
+
+ recv_digest = *(__le32 *)(pdu + hdr->hlen);
+ nvme_tcp_hdgst(queue->rcv_hash, pdu, pdu_len);
+ exp_digest = *(__le32 *)(pdu + hdr->hlen);
+ if (recv_digest != exp_digest) {
+ dev_err(queue->ctrl->ctrl.device,
+ "header digest error: recv %#x expected %#x\n",
+ le32_to_cpu(recv_digest), le32_to_cpu(exp_digest));
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static int nvme_tcp_check_ddgst(struct nvme_tcp_queue *queue, void *pdu)
+{
+ struct nvme_tcp_hdr *hdr = pdu;
+ u8 digest_len = nvme_tcp_hdgst_len(queue);
+ u32 len;
+
+ len = le32_to_cpu(hdr->plen) - hdr->hlen -
+ ((hdr->flags & NVME_TCP_F_HDGST) ? digest_len : 0);
+
+ if (unlikely(len && !(hdr->flags & NVME_TCP_F_DDGST))) {
+ dev_err(queue->ctrl->ctrl.device,
+ "queue %d: data digest flag is cleared\n",
+ nvme_tcp_queue_id(queue));
+ return -EPROTO;
+ }
+ crypto_ahash_init(queue->rcv_hash);
+
+ return 0;
+}
+
+static void nvme_tcp_exit_request(struct blk_mq_tag_set *set,
+ struct request *rq, unsigned int hctx_idx)
+{
+ struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
+
+ page_frag_free(req->pdu);
+}
+
+static int nvme_tcp_init_request(struct blk_mq_tag_set *set,
+ struct request *rq, unsigned int hctx_idx,
+ unsigned int numa_node)
+{
+ struct nvme_tcp_ctrl *ctrl = set->driver_data;
+ struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
+ int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0;
+ struct nvme_tcp_queue *queue = &ctrl->queues[queue_idx];
+ u8 hdgst = nvme_tcp_hdgst_len(queue);
+
+ req->pdu = page_frag_alloc(&queue->pf_cache,
+ sizeof(struct nvme_tcp_cmd_pdu) + hdgst,
+ GFP_KERNEL | __GFP_ZERO);
+ if (!req->pdu)
+ return -ENOMEM;
+
+ req->queue = queue;
+ nvme_req(rq)->ctrl = &ctrl->ctrl;
+
+ return 0;
+}
+
+static int nvme_tcp_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
+ unsigned int hctx_idx)
+{
+ struct nvme_tcp_ctrl *ctrl = data;
+ struct nvme_tcp_queue *queue = &ctrl->queues[hctx_idx + 1];
+
+ hctx->driver_data = queue;
+ return 0;
+}
+
+static int nvme_tcp_init_admin_hctx(struct blk_mq_hw_ctx *hctx, void *data,
+ unsigned int hctx_idx)
+{
+ struct nvme_tcp_ctrl *ctrl = data;
+ struct nvme_tcp_queue *queue = &ctrl->queues[0];
+
+ hctx->driver_data = queue;
+ return 0;
+}
+
+static enum nvme_tcp_recv_state
+nvme_tcp_recv_state(struct nvme_tcp_queue *queue)
+{
+ return (queue->pdu_remaining) ? NVME_TCP_RECV_PDU :
+ (queue->ddgst_remaining) ? NVME_TCP_RECV_DDGST :
+ NVME_TCP_RECV_DATA;
+}
+
+static void nvme_tcp_init_recv_ctx(struct nvme_tcp_queue *queue)
+{
+ queue->pdu_remaining = sizeof(struct nvme_tcp_rsp_pdu) +
+ nvme_tcp_hdgst_len(queue);
+ queue->pdu_offset = 0;
+ queue->data_remaining = -1;
+ queue->ddgst_remaining = 0;
+}
+
+static void nvme_tcp_error_recovery(struct nvme_ctrl *ctrl)
+{
+ if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
+ return;
+
+ queue_work(nvme_wq, &to_tcp_ctrl(ctrl)->err_work);
+}
+
+static int nvme_tcp_process_nvme_cqe(struct nvme_tcp_queue *queue,
+ struct nvme_completion *cqe)
+{
+ struct request *rq;
+
+ rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), cqe->command_id);
+ if (!rq) {
+ dev_err(queue->ctrl->ctrl.device,
+ "queue %d tag 0x%x not found\n",
+ nvme_tcp_queue_id(queue), cqe->command_id);
+ nvme_tcp_error_recovery(&queue->ctrl->ctrl);
+ return -EINVAL;
+ }
+
+ nvme_end_request(rq, cqe->status, cqe->result);
+
+ return 0;
+}
+
+static int nvme_tcp_handle_c2h_data(struct nvme_tcp_queue *queue,
+ struct nvme_tcp_data_pdu *pdu)
+{
+ struct request *rq;
+
+ rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), pdu->command_id);
+ if (!rq) {
+ dev_err(queue->ctrl->ctrl.device,
+ "queue %d tag %#x not found\n",
+ nvme_tcp_queue_id(queue), pdu->command_id);
+ return -ENOENT;
+ }
+
+ if (!blk_rq_payload_bytes(rq)) {
+ dev_err(queue->ctrl->ctrl.device,
+ "queue %d tag %#x unexpected data\n",
+ nvme_tcp_queue_id(queue), rq->tag);
+ return -EIO;
+ }
+
+ queue->data_remaining = le32_to_cpu(pdu->data_length);
+
+ return 0;
+
+}
+
+static int nvme_tcp_handle_comp(struct nvme_tcp_queue *queue,
+ struct nvme_tcp_rsp_pdu *pdu)
+{
+ struct nvme_completion *cqe = &pdu->cqe;
+ int ret = 0;
+
+ /*
+ * AEN requests are special as they don't time out and can
+ * survive any kind of queue freeze and often don't respond to
+ * aborts. We don't even bother to allocate a struct request
+ * for them but rather special case them here.
+ */
+ if (unlikely(nvme_tcp_queue_id(queue) == 0 &&
+ cqe->command_id >= NVME_AQ_BLK_MQ_DEPTH))
+ nvme_complete_async_event(&queue->ctrl->ctrl, cqe->status,
+ &cqe->result);
+ else
+ ret = nvme_tcp_process_nvme_cqe(queue, cqe);
+
+ return ret;
+}
+
+static int nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req,
+ struct nvme_tcp_r2t_pdu *pdu)
+{
+ struct nvme_tcp_data_pdu *data = req->pdu;
+ struct nvme_tcp_queue *queue = req->queue;
+ struct request *rq = blk_mq_rq_from_pdu(req);
+ u8 hdgst = nvme_tcp_hdgst_len(queue);
+ u8 ddgst = nvme_tcp_ddgst_len(queue);
+
+ req->pdu_len = le32_to_cpu(pdu->r2t_length);
+ req->pdu_sent = 0;
+
+ if (unlikely(req->data_sent + req->pdu_len > req->data_len)) {
+ dev_err(queue->ctrl->ctrl.device,
+ "req %d r2t len %u exceeded data len %u (%zu sent)\n",
+ rq->tag, req->pdu_len, req->data_len,
+ req->data_sent);
+ return -EPROTO;
+ }
+
+ if (unlikely(le32_to_cpu(pdu->r2t_offset) < req->data_sent)) {
+ dev_err(queue->ctrl->ctrl.device,
+ "req %d unexpected r2t offset %u (expected %zu)\n",
+ rq->tag, le32_to_cpu(pdu->r2t_offset),
+ req->data_sent);
+ return -EPROTO;
+ }
+
+ memset(data, 0, sizeof(*data));
+ data->hdr.type = nvme_tcp_h2c_data;
+ data->hdr.flags = NVME_TCP_F_DATA_LAST;
+ if (queue->hdr_digest)
+ data->hdr.flags |= NVME_TCP_F_HDGST;
+ if (queue->data_digest)
+ data->hdr.flags |= NVME_TCP_F_DDGST;
+ data->hdr.hlen = sizeof(*data);
+ data->hdr.pdo = data->hdr.hlen + hdgst;
+ data->hdr.plen =
+ cpu_to_le32(data->hdr.hlen + hdgst + req->pdu_len + ddgst);
+ data->ttag = pdu->ttag;
+ data->command_id = rq->tag;
+ data->data_offset = cpu_to_le32(req->data_sent);
+ data->data_length = cpu_to_le32(req->pdu_len);
+ return 0;
+}
+
+static int nvme_tcp_handle_r2t(struct nvme_tcp_queue *queue,
+ struct nvme_tcp_r2t_pdu *pdu)
+{
+ struct nvme_tcp_request *req;
+ struct request *rq;
+ int ret;
+
+ rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), pdu->command_id);
+ if (!rq) {
+ dev_err(queue->ctrl->ctrl.device,
+ "queue %d tag %#x not found\n",
+ nvme_tcp_queue_id(queue), pdu->command_id);
+ return -ENOENT;
+ }
+ req = blk_mq_rq_to_pdu(rq);
+
+ ret = nvme_tcp_setup_h2c_data_pdu(req, pdu);
+ if (unlikely(ret))
+ return ret;
+
+ req->state = NVME_TCP_SEND_H2C_PDU;
+ req->offset = 0;
+
+ nvme_tcp_queue_request(req);
+
+ return 0;
+}
+
+static int nvme_tcp_recv_pdu(struct nvme_tcp_queue *queue, struct sk_buff *skb,
+ unsigned int *offset, size_t *len)
+{
+ struct nvme_tcp_hdr *hdr;
+ char *pdu = queue->pdu;
+ size_t rcv_len = min_t(size_t, *len, queue->pdu_remaining);
+ int ret;
+
+ ret = skb_copy_bits(skb, *offset,
+ &pdu[queue->pdu_offset], rcv_len);
+ if (unlikely(ret))
+ return ret;
+
+ queue->pdu_remaining -= rcv_len;
+ queue->pdu_offset += rcv_len;
+ *offset += rcv_len;
+ *len -= rcv_len;
+ if (queue->pdu_remaining)
+ return 0;
+
+ hdr = queue->pdu;
+ if (queue->hdr_digest) {
+ ret = nvme_tcp_verify_hdgst(queue, queue->pdu, hdr->hlen);
+ if (unlikely(ret))
+ return ret;
+ }
+
+
+ if (queue->data_digest) {
+ ret = nvme_tcp_check_ddgst(queue, queue->pdu);
+ if (unlikely(ret))
+ return ret;
+ }
+
+ switch (hdr->type) {
+ case nvme_tcp_c2h_data:
+ ret = nvme_tcp_handle_c2h_data(queue, (void *)queue->pdu);
+ break;
+ case nvme_tcp_rsp:
+ nvme_tcp_init_recv_ctx(queue);
+ ret = nvme_tcp_handle_comp(queue, (void *)queue->pdu);
+ break;
+ case nvme_tcp_r2t:
+ nvme_tcp_init_recv_ctx(queue);
+ ret = nvme_tcp_handle_r2t(queue, (void *)queue->pdu);
+ break;
+ default:
+ dev_err(queue->ctrl->ctrl.device,
+ "unsupported pdu type (%d)\n", hdr->type);
+ return -EINVAL;
+ }
+
+ return ret;
+}
+
+static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb,
+ unsigned int *offset, size_t *len)
+{
+ struct nvme_tcp_data_pdu *pdu = (void *)queue->pdu;
+ struct nvme_tcp_request *req;
+ struct request *rq;
+
+ rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), pdu->command_id);
+ if (!rq) {
+ dev_err(queue->ctrl->ctrl.device,
+ "queue %d tag %#x not found\n",
+ nvme_tcp_queue_id(queue), pdu->command_id);
+ return -ENOENT;
+ }
+ req = blk_mq_rq_to_pdu(rq);
+
+ while (true) {
+ int recv_len, ret;
+
+ recv_len = min_t(size_t, *len, queue->data_remaining);
+ if (!recv_len)
+ break;
+
+ if (!iov_iter_count(&req->iter)) {
+ req->curr_bio = req->curr_bio->bi_next;
+
+ /*
+ * If we don`t have any bios it means that controller
+ * sent more data than we requested, hence error
+ */
+ if (!req->curr_bio) {
+ dev_err(queue->ctrl->ctrl.device,
+ "queue %d no space in request %#x",
+ nvme_tcp_queue_id(queue), rq->tag);
+ nvme_tcp_init_recv_ctx(queue);
+ return -EIO;
+ }
+ nvme_tcp_init_iter(req, READ);
+ }
+
+ /* we can read only from what is left in this bio */
+ recv_len = min_t(size_t, recv_len,
+ iov_iter_count(&req->iter));
+
+ if (queue->data_digest)
+ ret = skb_copy_and_hash_datagram_iter(skb, *offset,
+ &req->iter, recv_len, queue->rcv_hash);
+ else
+ ret = skb_copy_datagram_iter(skb, *offset,
+ &req->iter, recv_len);
+ if (ret) {
+ dev_err(queue->ctrl->ctrl.device,
+ "queue %d failed to copy request %#x data",
+ nvme_tcp_queue_id(queue), rq->tag);
+ return ret;
+ }
+
+ *len -= recv_len;
+ *offset += recv_len;
+ queue->data_remaining -= recv_len;
+ }
+
+ if (!queue->data_remaining) {
+ if (queue->data_digest) {
+ nvme_tcp_ddgst_final(queue->rcv_hash, &queue->exp_ddgst);
+ queue->ddgst_remaining = NVME_TCP_DIGEST_LENGTH;
+ } else {
+ nvme_tcp_init_recv_ctx(queue);
+ }
+ }
+
+ return 0;
+}
+
+static int nvme_tcp_recv_ddgst(struct nvme_tcp_queue *queue,
+ struct sk_buff *skb, unsigned int *offset, size_t *len)
+{
+ char *ddgst = (char *)&queue->recv_ddgst;
+ size_t recv_len = min_t(size_t, *len, queue->ddgst_remaining);
+ off_t off = NVME_TCP_DIGEST_LENGTH - queue->ddgst_remaining;
+ int ret;
+
+ ret = skb_copy_bits(skb, *offset, &ddgst[off], recv_len);
+ if (unlikely(ret))
+ return ret;
+
+ queue->ddgst_remaining -= recv_len;
+ *offset += recv_len;
+ *len -= recv_len;
+ if (queue->ddgst_remaining)
+ return 0;
+
+ if (queue->recv_ddgst != queue->exp_ddgst) {
+ dev_err(queue->ctrl->ctrl.device,
+ "data digest error: recv %#x expected %#x\n",
+ le32_to_cpu(queue->recv_ddgst),
+ le32_to_cpu(queue->exp_ddgst));
+ return -EIO;
+ }
+
+ nvme_tcp_init_recv_ctx(queue);
+ return 0;
+}
+
+static int nvme_tcp_recv_skb(read_descriptor_t *desc, struct sk_buff *skb,
+ unsigned int offset, size_t len)
+{
+ struct nvme_tcp_queue *queue = desc->arg.data;
+ size_t consumed = len;
+ int result;
+
+ while (len) {
+ switch (nvme_tcp_recv_state(queue)) {
+ case NVME_TCP_RECV_PDU:
+ result = nvme_tcp_recv_pdu(queue, skb, &offset, &len);
+ break;
+ case NVME_TCP_RECV_DATA:
+ result = nvme_tcp_recv_data(queue, skb, &offset, &len);
+ break;
+ case NVME_TCP_RECV_DDGST:
+ result = nvme_tcp_recv_ddgst(queue, skb, &offset, &len);
+ break;
+ default:
+ result = -EFAULT;
+ }
+ if (result) {
+ dev_err(queue->ctrl->ctrl.device,
+ "receive failed: %d\n", result);
+ queue->rd_enabled = false;
+ nvme_tcp_error_recovery(&queue->ctrl->ctrl);
+ return result;
+ }
+ }
+
+ return consumed;
+}
+
+static void nvme_tcp_data_ready(struct sock *sk)
+{
+ struct nvme_tcp_queue *queue;
+
+ read_lock(&sk->sk_callback_lock);
+ queue = sk->sk_user_data;
+ if (likely(queue && queue->rd_enabled))
+ queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
+ read_unlock(&sk->sk_callback_lock);
+}
+
+static void nvme_tcp_write_space(struct sock *sk)
+{
+ struct nvme_tcp_queue *queue;
+
+ read_lock_bh(&sk->sk_callback_lock);
+ queue = sk->sk_user_data;
+ if (likely(queue && sk_stream_is_writeable(sk))) {
+ clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+ queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
+ }
+ read_unlock_bh(&sk->sk_callback_lock);
+}
+
+static void nvme_tcp_state_change(struct sock *sk)
+{
+ struct nvme_tcp_queue *queue;
+
+ read_lock(&sk->sk_callback_lock);
+ queue = sk->sk_user_data;
+ if (!queue)
+ goto done;
+
+ switch (sk->sk_state) {
+ case TCP_CLOSE:
+ case TCP_CLOSE_WAIT:
+ case TCP_LAST_ACK:
+ case TCP_FIN_WAIT1:
+ case TCP_FIN_WAIT2:
+ /* fallthrough */
+ nvme_tcp_error_recovery(&queue->ctrl->ctrl);
+ break;
+ default:
+ dev_info(queue->ctrl->ctrl.device,
+ "queue %d socket state %d\n",
+ nvme_tcp_queue_id(queue), sk->sk_state);
+ }
+
+ queue->state_change(sk);
+done:
+ read_unlock(&sk->sk_callback_lock);
+}
+
+static inline void nvme_tcp_done_send_req(struct nvme_tcp_queue *queue)
+{
+ queue->request = NULL;
+}
+
+static void nvme_tcp_fail_request(struct nvme_tcp_request *req)
+{
+ union nvme_result res = {};
+
+ nvme_end_request(blk_mq_rq_from_pdu(req),
+ cpu_to_le16(NVME_SC_DATA_XFER_ERROR), res);
+}
+
+static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
+{
+ struct nvme_tcp_queue *queue = req->queue;
+
+ while (true) {
+ struct page *page = nvme_tcp_req_cur_page(req);
+ size_t offset = nvme_tcp_req_cur_offset(req);
+ size_t len = nvme_tcp_req_cur_length(req);
+ bool last = nvme_tcp_pdu_last_send(req, len);
+ int ret, flags = MSG_DONTWAIT;
+
+ if (last && !queue->data_digest)
+ flags |= MSG_EOR;
+ else
+ flags |= MSG_MORE;
+
+ ret = kernel_sendpage(queue->sock, page, offset, len, flags);
+ if (ret <= 0)
+ return ret;
+
+ nvme_tcp_advance_req(req, ret);
+ if (queue->data_digest)
+ nvme_tcp_ddgst_update(queue->snd_hash, page,
+ offset, ret);
+
+ /* fully successful last write*/
+ if (last && ret == len) {
+ if (queue->data_digest) {
+ nvme_tcp_ddgst_final(queue->snd_hash,
+ &req->ddgst);
+ req->state = NVME_TCP_SEND_DDGST;
+ req->offset = 0;
+ } else {
+ nvme_tcp_done_send_req(queue);
+ }
+ return 1;
+ }
+ }
+ return -EAGAIN;
+}
+
+static int nvme_tcp_try_send_cmd_pdu(struct nvme_tcp_request *req)
+{
+ struct nvme_tcp_queue *queue = req->queue;
+ struct nvme_tcp_cmd_pdu *pdu = req->pdu;
+ bool inline_data = nvme_tcp_has_inline_data(req);
+ int flags = MSG_DONTWAIT | (inline_data ? MSG_MORE : MSG_EOR);
+ u8 hdgst = nvme_tcp_hdgst_len(queue);
+ int len = sizeof(*pdu) + hdgst - req->offset;
+ int ret;
+
+ if (queue->hdr_digest && !req->offset)
+ nvme_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu));
+
+ ret = kernel_sendpage(queue->sock, virt_to_page(pdu),
+ offset_in_page(pdu) + req->offset, len, flags);
+ if (unlikely(ret <= 0))
+ return ret;
+
+ len -= ret;
+ if (!len) {
+ if (inline_data) {
+ req->state = NVME_TCP_SEND_DATA;
+ if (queue->data_digest)
+ crypto_ahash_init(queue->snd_hash);
+ nvme_tcp_init_iter(req, WRITE);
+ } else {
+ nvme_tcp_done_send_req(queue);
+ }
+ return 1;
+ }
+ req->offset += ret;
+
+ return -EAGAIN;
+}
+
+static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req)
+{
+ struct nvme_tcp_queue *queue = req->queue;
+ struct nvme_tcp_data_pdu *pdu = req->pdu;
+ u8 hdgst = nvme_tcp_hdgst_len(queue);
+ int len = sizeof(*pdu) - req->offset + hdgst;
+ int ret;
+
+ if (queue->hdr_digest && !req->offset)
+ nvme_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu));
+
+ ret = kernel_sendpage(queue->sock, virt_to_page(pdu),
+ offset_in_page(pdu) + req->offset, len,
+ MSG_DONTWAIT | MSG_MORE);
+ if (unlikely(ret <= 0))
+ return ret;
+
+ len -= ret;
+ if (!len) {
+ req->state = NVME_TCP_SEND_DATA;
+ if (queue->data_digest)
+ crypto_ahash_init(queue->snd_hash);
+ if (!req->data_sent)
+ nvme_tcp_init_iter(req, WRITE);
+ return 1;
+ }
+ req->offset += ret;
+
+ return -EAGAIN;
+}
+
+static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req)
+{
+ struct nvme_tcp_queue *queue = req->queue;
+ int ret;
+ struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_EOR };
+ struct kvec iov = {
+ .iov_base = &req->ddgst + req->offset,
+ .iov_len = NVME_TCP_DIGEST_LENGTH - req->offset
+ };
+
+ ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len);
+ if (unlikely(ret <= 0))
+ return ret;
+
+ if (req->offset + ret == NVME_TCP_DIGEST_LENGTH) {
+ nvme_tcp_done_send_req(queue);
+ return 1;
+ }
+
+ req->offset += ret;
+ return -EAGAIN;
+}
+
+static int nvme_tcp_try_send(struct nvme_tcp_queue *queue)
+{
+ struct nvme_tcp_request *req;
+ int ret = 1;
+
+ if (!queue->request) {
+ queue->request = nvme_tcp_fetch_request(queue);
+ if (!queue->request)
+ return 0;
+ }
+ req = queue->request;
+
+ if (req->state == NVME_TCP_SEND_CMD_PDU) {
+ ret = nvme_tcp_try_send_cmd_pdu(req);
+ if (ret <= 0)
+ goto done;
+ if (!nvme_tcp_has_inline_data(req))
+ return ret;
+ }
+
+ if (req->state == NVME_TCP_SEND_H2C_PDU) {
+ ret = nvme_tcp_try_send_data_pdu(req);
+ if (ret <= 0)
+ goto done;
+ }
+
+ if (req->state == NVME_TCP_SEND_DATA) {
+ ret = nvme_tcp_try_send_data(req);
+ if (ret <= 0)
+ goto done;
+ }
+
+ if (req->state == NVME_TCP_SEND_DDGST)
+ ret = nvme_tcp_try_send_ddgst(req);
+done:
+ if (ret == -EAGAIN)
+ ret = 0;
+ return ret;
+}
+
+static int nvme_tcp_try_recv(struct nvme_tcp_queue *queue)
+{
+ struct sock *sk = queue->sock->sk;
+ read_descriptor_t rd_desc;
+ int consumed;
+
+ rd_desc.arg.data = queue;
+ rd_desc.count = 1;
+ lock_sock(sk);
+ consumed = tcp_read_sock(sk, &rd_desc, nvme_tcp_recv_skb);
+ release_sock(sk);
+ return consumed;
+}
+
+static void nvme_tcp_io_work(struct work_struct *w)
+{
+ struct nvme_tcp_queue *queue =
+ container_of(w, struct nvme_tcp_queue, io_work);
+ unsigned long start = jiffies + msecs_to_jiffies(1);
+
+ do {
+ bool pending = false;
+ int result;
+
+ result = nvme_tcp_try_send(queue);
+ if (result > 0) {
+ pending = true;
+ } else if (unlikely(result < 0)) {
+ dev_err(queue->ctrl->ctrl.device,
+ "failed to send request %d\n", result);
+ if (result != -EPIPE)
+ nvme_tcp_fail_request(queue->request);
+ nvme_tcp_done_send_req(queue);
+ return;
+ }
+
+ result = nvme_tcp_try_recv(queue);
+ if (result > 0)
+ pending = true;
+
+ if (!pending)
+ return;
+
+ } while (time_after(jiffies, start)); /* quota is exhausted */
+
+ queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
+}
+
+static void nvme_tcp_free_crypto(struct nvme_tcp_queue *queue)
+{
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(queue->rcv_hash);
+
+ ahash_request_free(queue->rcv_hash);
+ ahash_request_free(queue->snd_hash);
+ crypto_free_ahash(tfm);
+}
+
+static int nvme_tcp_alloc_crypto(struct nvme_tcp_queue *queue)
+{
+ struct crypto_ahash *tfm;
+
+ tfm = crypto_alloc_ahash("crc32c", 0, CRYPTO_ALG_ASYNC);
+ if (IS_ERR(tfm))
+ return PTR_ERR(tfm);
+
+ queue->snd_hash = ahash_request_alloc(tfm, GFP_KERNEL);
+ if (!queue->snd_hash)
+ goto free_tfm;
+ ahash_request_set_callback(queue->snd_hash, 0, NULL, NULL);
+
+ queue->rcv_hash = ahash_request_alloc(tfm, GFP_KERNEL);
+ if (!queue->rcv_hash)
+ goto free_snd_hash;
+ ahash_request_set_callback(queue->rcv_hash, 0, NULL, NULL);
+
+ return 0;
+free_snd_hash:
+ ahash_request_free(queue->snd_hash);
+free_tfm:
+ crypto_free_ahash(tfm);
+ return -ENOMEM;
+}
+
+static void nvme_tcp_free_async_req(struct nvme_tcp_ctrl *ctrl)
+{
+ struct nvme_tcp_request *async = &ctrl->async_req;
+
+ page_frag_free(async->pdu);
+}
+
+static int nvme_tcp_alloc_async_req(struct nvme_tcp_ctrl *ctrl)
+{
+ struct nvme_tcp_queue *queue = &ctrl->queues[0];
+ struct nvme_tcp_request *async = &ctrl->async_req;
+ u8 hdgst = nvme_tcp_hdgst_len(queue);
+
+ async->pdu = page_frag_alloc(&queue->pf_cache,
+ sizeof(struct nvme_tcp_cmd_pdu) + hdgst,
+ GFP_KERNEL | __GFP_ZERO);
+ if (!async->pdu)
+ return -ENOMEM;
+
+ async->queue = &ctrl->queues[0];
+ return 0;
+}
+
+static void nvme_tcp_free_queue(struct nvme_ctrl *nctrl, int qid)
+{
+ struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
+ struct nvme_tcp_queue *queue = &ctrl->queues[qid];
+
+ if (!test_and_clear_bit(NVME_TCP_Q_ALLOCATED, &queue->flags))
+ return;
+
+ if (queue->hdr_digest || queue->data_digest)
+ nvme_tcp_free_crypto(queue);
+
+ sock_release(queue->sock);
+ kfree(queue->pdu);
+}
+
+static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue)
+{
+ struct nvme_tcp_icreq_pdu *icreq;
+ struct nvme_tcp_icresp_pdu *icresp;
+ struct msghdr msg = {};
+ struct kvec iov;
+ bool ctrl_hdgst, ctrl_ddgst;
+ int ret;
+
+ icreq = kzalloc(sizeof(*icreq), GFP_KERNEL);
+ if (!icreq)
+ return -ENOMEM;
+
+ icresp = kzalloc(sizeof(*icresp), GFP_KERNEL);
+ if (!icresp) {
+ ret = -ENOMEM;
+ goto free_icreq;
+ }
+
+ icreq->hdr.type = nvme_tcp_icreq;
+ icreq->hdr.hlen = sizeof(*icreq);
+ icreq->hdr.pdo = 0;
+ icreq->hdr.plen = cpu_to_le32(icreq->hdr.hlen);
+ icreq->pfv = cpu_to_le16(NVME_TCP_PFV_1_0);
+ icreq->maxr2t = 0; /* single inflight r2t supported */
+ icreq->hpda = 0; /* no alignment constraint */
+ if (queue->hdr_digest)
+ icreq->digest |= NVME_TCP_HDR_DIGEST_ENABLE;
+ if (queue->data_digest)
+ icreq->digest |= NVME_TCP_DATA_DIGEST_ENABLE;
+
+ iov.iov_base = icreq;
+ iov.iov_len = sizeof(*icreq);
+ ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len);
+ if (ret < 0)
+ goto free_icresp;
+
+ memset(&msg, 0, sizeof(msg));
+ iov.iov_base = icresp;
+ iov.iov_len = sizeof(*icresp);
+ ret = kernel_recvmsg(queue->sock, &msg, &iov, 1,
+ iov.iov_len, msg.msg_flags);
+ if (ret < 0)
+ goto free_icresp;
+
+ ret = -EINVAL;
+ if (icresp->hdr.type != nvme_tcp_icresp) {
+ pr_err("queue %d: bad type returned %d\n",
+ nvme_tcp_queue_id(queue), icresp->hdr.type);
+ goto free_icresp;
+ }
+
+ if (le32_to_cpu(icresp->hdr.plen) != sizeof(*icresp)) {
+ pr_err("queue %d: bad pdu length returned %d\n",
+ nvme_tcp_queue_id(queue), icresp->hdr.plen);
+ goto free_icresp;
+ }
+
+ if (icresp->pfv != NVME_TCP_PFV_1_0) {
+ pr_err("queue %d: bad pfv returned %d\n",
+ nvme_tcp_queue_id(queue), icresp->pfv);
+ goto free_icresp;
+ }
+
+ ctrl_ddgst = !!(icresp->digest & NVME_TCP_DATA_DIGEST_ENABLE);
+ if ((queue->data_digest && !ctrl_ddgst) ||
+ (!queue->data_digest && ctrl_ddgst)) {
+ pr_err("queue %d: data digest mismatch host: %s ctrl: %s\n",
+ nvme_tcp_queue_id(queue),
+ queue->data_digest ? "enabled" : "disabled",
+ ctrl_ddgst ? "enabled" : "disabled");
+ goto free_icresp;
+ }
+
+ ctrl_hdgst = !!(icresp->digest & NVME_TCP_HDR_DIGEST_ENABLE);
+ if ((queue->hdr_digest && !ctrl_hdgst) ||
+ (!queue->hdr_digest && ctrl_hdgst)) {
+ pr_err("queue %d: header digest mismatch host: %s ctrl: %s\n",
+ nvme_tcp_queue_id(queue),
+ queue->hdr_digest ? "enabled" : "disabled",
+ ctrl_hdgst ? "enabled" : "disabled");
+ goto free_icresp;
+ }
+
+ if (icresp->cpda != 0) {
+ pr_err("queue %d: unsupported cpda returned %d\n",
+ nvme_tcp_queue_id(queue), icresp->cpda);
+ goto free_icresp;
+ }
+
+ ret = 0;
+free_icresp:
+ kfree(icresp);
+free_icreq:
+ kfree(icreq);
+ return ret;
+}
+
+static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
+ int qid, size_t queue_size)
+{
+ struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
+ struct nvme_tcp_queue *queue = &ctrl->queues[qid];
+ struct linger sol = { .l_onoff = 1, .l_linger = 0 };
+ int ret, opt, rcv_pdu_size, n;
+
+ queue->ctrl = ctrl;
+ INIT_LIST_HEAD(&queue->send_list);
+ spin_lock_init(&queue->lock);
+ INIT_WORK(&queue->io_work, nvme_tcp_io_work);
+ queue->queue_size = queue_size;
+
+ if (qid > 0)
+ queue->cmnd_capsule_len = ctrl->ctrl.ioccsz * 16;
+ else
+ queue->cmnd_capsule_len = sizeof(struct nvme_command) +
+ NVME_TCP_ADMIN_CCSZ;
+
+ ret = sock_create(ctrl->addr.ss_family, SOCK_STREAM,
+ IPPROTO_TCP, &queue->sock);
+ if (ret) {
+ dev_err(ctrl->ctrl.device,
+ "failed to create socket: %d\n", ret);
+ return ret;
+ }
+
+ /* Single syn retry */
+ opt = 1;
+ ret = kernel_setsockopt(queue->sock, IPPROTO_TCP, TCP_SYNCNT,
+ (char *)&opt, sizeof(opt));
+ if (ret) {
+ dev_err(ctrl->ctrl.device,
+ "failed to set TCP_SYNCNT sock opt %d\n", ret);
+ goto err_sock;
+ }
+
+ /* Set TCP no delay */
+ opt = 1;
+ ret = kernel_setsockopt(queue->sock, IPPROTO_TCP,
+ TCP_NODELAY, (char *)&opt, sizeof(opt));
+ if (ret) {
+ dev_err(ctrl->ctrl.device,
+ "failed to set TCP_NODELAY sock opt %d\n", ret);
+ goto err_sock;
+ }
+
+ /*
+ * Cleanup whatever is sitting in the TCP transmit queue on socket
+ * close. This is done to prevent stale data from being sent should
+ * the network connection be restored before TCP times out.
+ */
+ ret = kernel_setsockopt(queue->sock, SOL_SOCKET, SO_LINGER,
+ (char *)&sol, sizeof(sol));
+ if (ret) {
+ dev_err(ctrl->ctrl.device,
+ "failed to set SO_LINGER sock opt %d\n", ret);
+ goto err_sock;
+ }
+
+ queue->sock->sk->sk_allocation = GFP_ATOMIC;
+ if (!qid)
+ n = 0;
+ else
+ n = (qid - 1) % num_online_cpus();
+ queue->io_cpu = cpumask_next_wrap(n - 1, cpu_online_mask, -1, false);
+ queue->request = NULL;
+ queue->data_remaining = 0;
+ queue->ddgst_remaining = 0;
+ queue->pdu_remaining = 0;
+ queue->pdu_offset = 0;
+ sk_set_memalloc(queue->sock->sk);
+
+ if (ctrl->ctrl.opts->mask & NVMF_OPT_HOST_TRADDR) {
+ ret = kernel_bind(queue->sock, (struct sockaddr *)&ctrl->src_addr,
+ sizeof(ctrl->src_addr));
+ if (ret) {
+ dev_err(ctrl->ctrl.device,
+ "failed to bind queue %d socket %d\n",
+ qid, ret);
+ goto err_sock;
+ }
+ }
+
+ queue->hdr_digest = nctrl->opts->hdr_digest;
+ queue->data_digest = nctrl->opts->data_digest;
+ if (queue->hdr_digest || queue->data_digest) {
+ ret = nvme_tcp_alloc_crypto(queue);
+ if (ret) {
+ dev_err(ctrl->ctrl.device,
+ "failed to allocate queue %d crypto\n", qid);
+ goto err_sock;
+ }
+ }
+
+ rcv_pdu_size = sizeof(struct nvme_tcp_rsp_pdu) +
+ nvme_tcp_hdgst_len(queue);
+ queue->pdu = kmalloc(rcv_pdu_size, GFP_KERNEL);
+ if (!queue->pdu) {
+ ret = -ENOMEM;
+ goto err_crypto;
+ }
+
+ dev_dbg(ctrl->ctrl.device, "connecting queue %d\n",
+ nvme_tcp_queue_id(queue));
+
+ ret = kernel_connect(queue->sock, (struct sockaddr *)&ctrl->addr,
+ sizeof(ctrl->addr), 0);
+ if (ret) {
+ dev_err(ctrl->ctrl.device,
+ "failed to connect socket: %d\n", ret);
+ goto err_rcv_pdu;
+ }
+
+ ret = nvme_tcp_init_connection(queue);
+ if (ret)
+ goto err_init_connect;
+
+ queue->rd_enabled = true;
+ set_bit(NVME_TCP_Q_ALLOCATED, &queue->flags);
+ nvme_tcp_init_recv_ctx(queue);
+
+ write_lock_bh(&queue->sock->sk->sk_callback_lock);
+ queue->sock->sk->sk_user_data = queue;
+ queue->state_change = queue->sock->sk->sk_state_change;
+ queue->data_ready = queue->sock->sk->sk_data_ready;
+ queue->write_space = queue->sock->sk->sk_write_space;
+ queue->sock->sk->sk_data_ready = nvme_tcp_data_ready;
+ queue->sock->sk->sk_state_change = nvme_tcp_state_change;
+ queue->sock->sk->sk_write_space = nvme_tcp_write_space;
+ write_unlock_bh(&queue->sock->sk->sk_callback_lock);
+
+ return 0;
+
+err_init_connect:
+ kernel_sock_shutdown(queue->sock, SHUT_RDWR);
+err_rcv_pdu:
+ kfree(queue->pdu);
+err_crypto:
+ if (queue->hdr_digest || queue->data_digest)
+ nvme_tcp_free_crypto(queue);
+err_sock:
+ sock_release(queue->sock);
+ queue->sock = NULL;
+ return ret;
+}
+
+static void nvme_tcp_restore_sock_calls(struct nvme_tcp_queue *queue)
+{
+ struct socket *sock = queue->sock;
+
+ write_lock_bh(&sock->sk->sk_callback_lock);
+ sock->sk->sk_user_data = NULL;
+ sock->sk->sk_data_ready = queue->data_ready;
+ sock->sk->sk_state_change = queue->state_change;
+ sock->sk->sk_write_space = queue->write_space;
+ write_unlock_bh(&sock->sk->sk_callback_lock);
+}
+
+static void __nvme_tcp_stop_queue(struct nvme_tcp_queue *queue)
+{
+ kernel_sock_shutdown(queue->sock, SHUT_RDWR);
+ nvme_tcp_restore_sock_calls(queue);
+ cancel_work_sync(&queue->io_work);
+}
+
+static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid)
+{
+ struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
+ struct nvme_tcp_queue *queue = &ctrl->queues[qid];
+
+ if (!test_and_clear_bit(NVME_TCP_Q_LIVE, &queue->flags))
+ return;
+
+ __nvme_tcp_stop_queue(queue);
+}
+
+static int nvme_tcp_start_queue(struct nvme_ctrl *nctrl, int idx)
+{
+ struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
+ int ret;
+
+ if (idx)
+ ret = nvmf_connect_io_queue(nctrl, idx, false);
+ else
+ ret = nvmf_connect_admin_queue(nctrl);
+
+ if (!ret) {
+ set_bit(NVME_TCP_Q_LIVE, &ctrl->queues[idx].flags);
+ } else {
+ __nvme_tcp_stop_queue(&ctrl->queues[idx]);
+ dev_err(nctrl->device,
+ "failed to connect queue: %d ret=%d\n", idx, ret);
+ }
+ return ret;
+}
+
+static struct blk_mq_tag_set *nvme_tcp_alloc_tagset(struct nvme_ctrl *nctrl,
+ bool admin)
+{
+ struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
+ struct blk_mq_tag_set *set;
+ int ret;
+
+ if (admin) {
+ set = &ctrl->admin_tag_set;
+ memset(set, 0, sizeof(*set));
+ set->ops = &nvme_tcp_admin_mq_ops;
+ set->queue_depth = NVME_AQ_MQ_TAG_DEPTH;
+ set->reserved_tags = 2; /* connect + keep-alive */
+ set->numa_node = NUMA_NO_NODE;
+ set->cmd_size = sizeof(struct nvme_tcp_request);
+ set->driver_data = ctrl;
+ set->nr_hw_queues = 1;
+ set->timeout = ADMIN_TIMEOUT;
+ } else {
+ set = &ctrl->tag_set;
+ memset(set, 0, sizeof(*set));
+ set->ops = &nvme_tcp_mq_ops;
+ set->queue_depth = nctrl->sqsize + 1;
+ set->reserved_tags = 1; /* fabric connect */
+ set->numa_node = NUMA_NO_NODE;
+ set->flags = BLK_MQ_F_SHOULD_MERGE;
+ set->cmd_size = sizeof(struct nvme_tcp_request);
+ set->driver_data = ctrl;
+ set->nr_hw_queues = nctrl->queue_count - 1;
+ set->timeout = NVME_IO_TIMEOUT;
+ set->nr_maps = 2 /* default + read */;
+ }
+
+ ret = blk_mq_alloc_tag_set(set);
+ if (ret)
+ return ERR_PTR(ret);
+
+ return set;
+}
+
+static void nvme_tcp_free_admin_queue(struct nvme_ctrl *ctrl)
+{
+ if (to_tcp_ctrl(ctrl)->async_req.pdu) {
+ nvme_tcp_free_async_req(to_tcp_ctrl(ctrl));
+ to_tcp_ctrl(ctrl)->async_req.pdu = NULL;
+ }
+
+ nvme_tcp_free_queue(ctrl, 0);
+}
+
+static void nvme_tcp_free_io_queues(struct nvme_ctrl *ctrl)
+{
+ int i;
+
+ for (i = 1; i < ctrl->queue_count; i++)
+ nvme_tcp_free_queue(ctrl, i);
+}
+
+static void nvme_tcp_stop_io_queues(struct nvme_ctrl *ctrl)
+{
+ int i;
+
+ for (i = 1; i < ctrl->queue_count; i++)
+ nvme_tcp_stop_queue(ctrl, i);
+}
+
+static int nvme_tcp_start_io_queues(struct nvme_ctrl *ctrl)
+{
+ int i, ret = 0;
+
+ for (i = 1; i < ctrl->queue_count; i++) {
+ ret = nvme_tcp_start_queue(ctrl, i);
+ if (ret)
+ goto out_stop_queues;
+ }
+
+ return 0;
+
+out_stop_queues:
+ for (i--; i >= 1; i--)
+ nvme_tcp_stop_queue(ctrl, i);
+ return ret;
+}
+
+static int nvme_tcp_alloc_admin_queue(struct nvme_ctrl *ctrl)
+{
+ int ret;
+
+ ret = nvme_tcp_alloc_queue(ctrl, 0, NVME_AQ_DEPTH);
+ if (ret)
+ return ret;
+
+ ret = nvme_tcp_alloc_async_req(to_tcp_ctrl(ctrl));
+ if (ret)
+ goto out_free_queue;
+
+ return 0;
+
+out_free_queue:
+ nvme_tcp_free_queue(ctrl, 0);
+ return ret;
+}
+
+static int nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl)
+{
+ int i, ret;
+
+ for (i = 1; i < ctrl->queue_count; i++) {
+ ret = nvme_tcp_alloc_queue(ctrl, i,
+ ctrl->sqsize + 1);
+ if (ret)
+ goto out_free_queues;
+ }
+
+ return 0;
+
+out_free_queues:
+ for (i--; i >= 1; i--)
+ nvme_tcp_free_queue(ctrl, i);
+
+ return ret;
+}
+
+static unsigned int nvme_tcp_nr_io_queues(struct nvme_ctrl *ctrl)
+{
+ unsigned int nr_io_queues;
+
+ nr_io_queues = min(ctrl->opts->nr_io_queues, num_online_cpus());
+ nr_io_queues += min(ctrl->opts->nr_write_queues, num_online_cpus());
+
+ return nr_io_queues;
+}
+
+static int nvme_alloc_io_queues(struct nvme_ctrl *ctrl)
+{
+ unsigned int nr_io_queues;
+ int ret;
+
+ nr_io_queues = nvme_tcp_nr_io_queues(ctrl);
+ ret = nvme_set_queue_count(ctrl, &nr_io_queues);
+ if (ret)
+ return ret;
+
+ ctrl->queue_count = nr_io_queues + 1;
+ if (ctrl->queue_count < 2)
+ return 0;
+
+ dev_info(ctrl->device,
+ "creating %d I/O queues.\n", nr_io_queues);
+
+ return nvme_tcp_alloc_io_queues(ctrl);
+}
+
+static void nvme_tcp_destroy_io_queues(struct nvme_ctrl *ctrl, bool remove)
+{
+ nvme_tcp_stop_io_queues(ctrl);
+ if (remove) {
+ if (ctrl->ops->flags & NVME_F_FABRICS)
+ blk_cleanup_queue(ctrl->connect_q);
+ blk_mq_free_tag_set(ctrl->tagset);
+ }
+ nvme_tcp_free_io_queues(ctrl);
+}
+
+static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new)
+{
+ int ret;
+
+ ret = nvme_alloc_io_queues(ctrl);
+ if (ret)
+ return ret;
+
+ if (new) {
+ ctrl->tagset = nvme_tcp_alloc_tagset(ctrl, false);
+ if (IS_ERR(ctrl->tagset)) {
+ ret = PTR_ERR(ctrl->tagset);
+ goto out_free_io_queues;
+ }
+
+ if (ctrl->ops->flags & NVME_F_FABRICS) {
+ ctrl->connect_q = blk_mq_init_queue(ctrl->tagset);
+ if (IS_ERR(ctrl->connect_q)) {
+ ret = PTR_ERR(ctrl->connect_q);
+ goto out_free_tag_set;
+ }
+ }
+ } else {
+ blk_mq_update_nr_hw_queues(ctrl->tagset,
+ ctrl->queue_count - 1);
+ }
+
+ ret = nvme_tcp_start_io_queues(ctrl);
+ if (ret)
+ goto out_cleanup_connect_q;
+
+ return 0;
+
+out_cleanup_connect_q:
+ if (new && (ctrl->ops->flags & NVME_F_FABRICS))
+ blk_cleanup_queue(ctrl->connect_q);
+out_free_tag_set:
+ if (new)
+ blk_mq_free_tag_set(ctrl->tagset);
+out_free_io_queues:
+ nvme_tcp_free_io_queues(ctrl);
+ return ret;
+}
+
+static void nvme_tcp_destroy_admin_queue(struct nvme_ctrl *ctrl, bool remove)
+{
+ nvme_tcp_stop_queue(ctrl, 0);
+ if (remove) {
+ free_opal_dev(ctrl->opal_dev);
+ blk_cleanup_queue(ctrl->admin_q);
+ blk_mq_free_tag_set(ctrl->admin_tagset);
+ }
+ nvme_tcp_free_admin_queue(ctrl);
+}
+
+static int nvme_tcp_configure_admin_queue(struct nvme_ctrl *ctrl, bool new)
+{
+ int error;
+
+ error = nvme_tcp_alloc_admin_queue(ctrl);
+ if (error)
+ return error;
+
+ if (new) {
+ ctrl->admin_tagset = nvme_tcp_alloc_tagset(ctrl, true);
+ if (IS_ERR(ctrl->admin_tagset)) {
+ error = PTR_ERR(ctrl->admin_tagset);
+ goto out_free_queue;
+ }
+
+ ctrl->admin_q = blk_mq_init_queue(ctrl->admin_tagset);
+ if (IS_ERR(ctrl->admin_q)) {
+ error = PTR_ERR(ctrl->admin_q);
+ goto out_free_tagset;
+ }
+ }
+
+ error = nvme_tcp_start_queue(ctrl, 0);
+ if (error)
+ goto out_cleanup_queue;
+
+ error = ctrl->ops->reg_read64(ctrl, NVME_REG_CAP, &ctrl->cap);
+ if (error) {
+ dev_err(ctrl->device,
+ "prop_get NVME_REG_CAP failed\n");
+ goto out_stop_queue;
+ }
+
+ ctrl->sqsize = min_t(int, NVME_CAP_MQES(ctrl->cap), ctrl->sqsize);
+
+ error = nvme_enable_ctrl(ctrl, ctrl->cap);
+ if (error)
+ goto out_stop_queue;
+
+ error = nvme_init_identify(ctrl);
+ if (error)
+ goto out_stop_queue;
+
+ return 0;
+
+out_stop_queue:
+ nvme_tcp_stop_queue(ctrl, 0);
+out_cleanup_queue:
+ if (new)
+ blk_cleanup_queue(ctrl->admin_q);
+out_free_tagset:
+ if (new)
+ blk_mq_free_tag_set(ctrl->admin_tagset);
+out_free_queue:
+ nvme_tcp_free_admin_queue(ctrl);
+ return error;
+}
+
+static void nvme_tcp_teardown_admin_queue(struct nvme_ctrl *ctrl,
+ bool remove)
+{
+ blk_mq_quiesce_queue(ctrl->admin_q);
+ nvme_tcp_stop_queue(ctrl, 0);
+ blk_mq_tagset_busy_iter(ctrl->admin_tagset, nvme_cancel_request, ctrl);
+ blk_mq_unquiesce_queue(ctrl->admin_q);
+ nvme_tcp_destroy_admin_queue(ctrl, remove);
+}
+
+static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl,
+ bool remove)
+{
+ if (ctrl->queue_count <= 1)
+ return;
+ nvme_stop_queues(ctrl);
+ nvme_tcp_stop_io_queues(ctrl);
+ blk_mq_tagset_busy_iter(ctrl->tagset, nvme_cancel_request, ctrl);
+ if (remove)
+ nvme_start_queues(ctrl);
+ nvme_tcp_destroy_io_queues(ctrl, remove);
+}
+
+static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl)
+{
+ /* If we are resetting/deleting then do nothing */
+ if (ctrl->state != NVME_CTRL_CONNECTING) {
+ WARN_ON_ONCE(ctrl->state == NVME_CTRL_NEW ||
+ ctrl->state == NVME_CTRL_LIVE);
+ return;
+ }
+
+ if (nvmf_should_reconnect(ctrl)) {
+ dev_info(ctrl->device, "Reconnecting in %d seconds...\n",
+ ctrl->opts->reconnect_delay);
+ queue_delayed_work(nvme_wq, &to_tcp_ctrl(ctrl)->connect_work,
+ ctrl->opts->reconnect_delay * HZ);
+ } else {
+ dev_info(ctrl->device, "Removing controller...\n");
+ nvme_delete_ctrl(ctrl);
+ }
+}
+
+static int nvme_tcp_setup_ctrl(struct nvme_ctrl *ctrl, bool new)
+{
+ struct nvmf_ctrl_options *opts = ctrl->opts;
+ int ret = -EINVAL;
+
+ ret = nvme_tcp_configure_admin_queue(ctrl, new);
+ if (ret)
+ return ret;
+
+ if (ctrl->icdoff) {
+ dev_err(ctrl->device, "icdoff is not supported!\n");
+ goto destroy_admin;
+ }
+
+ if (opts->queue_size > ctrl->sqsize + 1)
+ dev_warn(ctrl->device,
+ "queue_size %zu > ctrl sqsize %u, clamping down\n",
+ opts->queue_size, ctrl->sqsize + 1);
+
+ if (ctrl->sqsize + 1 > ctrl->maxcmd) {
+ dev_warn(ctrl->device,
+ "sqsize %u > ctrl maxcmd %u, clamping down\n",
+ ctrl->sqsize + 1, ctrl->maxcmd);
+ ctrl->sqsize = ctrl->maxcmd - 1;
+ }
+
+ if (ctrl->queue_count > 1) {
+ ret = nvme_tcp_configure_io_queues(ctrl, new);
+ if (ret)
+ goto destroy_admin;
+ }
+
+ if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_LIVE)) {
+ /* state change failure is ok if we're in DELETING state */
+ WARN_ON_ONCE(ctrl->state != NVME_CTRL_DELETING);
+ ret = -EINVAL;
+ goto destroy_io;
+ }
+
+ nvme_start_ctrl(ctrl);
+ return 0;
+
+destroy_io:
+ if (ctrl->queue_count > 1)
+ nvme_tcp_destroy_io_queues(ctrl, new);
+destroy_admin:
+ nvme_tcp_stop_queue(ctrl, 0);
+ nvme_tcp_destroy_admin_queue(ctrl, new);
+ return ret;
+}
+
+static void nvme_tcp_reconnect_ctrl_work(struct work_struct *work)
+{
+ struct nvme_tcp_ctrl *tcp_ctrl = container_of(to_delayed_work(work),
+ struct nvme_tcp_ctrl, connect_work);
+ struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl;
+
+ ++ctrl->nr_reconnects;
+
+ if (nvme_tcp_setup_ctrl(ctrl, false))
+ goto requeue;
+
+ dev_info(ctrl->device, "Successfully reconnected (%d attempt)\n",
+ ctrl->nr_reconnects);
+
+ ctrl->nr_reconnects = 0;
+
+ return;
+
+requeue:
+ dev_info(ctrl->device, "Failed reconnect attempt %d\n",
+ ctrl->nr_reconnects);
+ nvme_tcp_reconnect_or_remove(ctrl);
+}
+
+static void nvme_tcp_error_recovery_work(struct work_struct *work)
+{
+ struct nvme_tcp_ctrl *tcp_ctrl = container_of(work,
+ struct nvme_tcp_ctrl, err_work);
+ struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl;
+
+ nvme_stop_keep_alive(ctrl);
+ nvme_tcp_teardown_io_queues(ctrl, false);
+ /* unquiesce to fail fast pending requests */
+ nvme_start_queues(ctrl);
+ nvme_tcp_teardown_admin_queue(ctrl, false);
+
+ if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_CONNECTING)) {
+ /* state change failure is ok if we're in DELETING state */
+ WARN_ON_ONCE(ctrl->state != NVME_CTRL_DELETING);
+ return;
+ }
+
+ nvme_tcp_reconnect_or_remove(ctrl);
+}
+
+static void nvme_tcp_teardown_ctrl(struct nvme_ctrl *ctrl, bool shutdown)
+{
+ nvme_tcp_teardown_io_queues(ctrl, shutdown);
+ if (shutdown)
+ nvme_shutdown_ctrl(ctrl);
+ else
+ nvme_disable_ctrl(ctrl, ctrl->cap);
+ nvme_tcp_teardown_admin_queue(ctrl, shutdown);
+}
+
+static void nvme_tcp_delete_ctrl(struct nvme_ctrl *ctrl)
+{
+ nvme_tcp_teardown_ctrl(ctrl, true);
+}
+
+static void nvme_reset_ctrl_work(struct work_struct *work)
+{
+ struct nvme_ctrl *ctrl =
+ container_of(work, struct nvme_ctrl, reset_work);
+
+ nvme_stop_ctrl(ctrl);
+ nvme_tcp_teardown_ctrl(ctrl, false);
+
+ if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_CONNECTING)) {
+ /* state change failure is ok if we're in DELETING state */
+ WARN_ON_ONCE(ctrl->state != NVME_CTRL_DELETING);
+ return;
+ }
+
+ if (nvme_tcp_setup_ctrl(ctrl, false))
+ goto out_fail;
+
+ return;
+
+out_fail:
+ ++ctrl->nr_reconnects;
+ nvme_tcp_reconnect_or_remove(ctrl);
+}
+
+static void nvme_tcp_stop_ctrl(struct nvme_ctrl *ctrl)
+{
+ cancel_work_sync(&to_tcp_ctrl(ctrl)->err_work);
+ cancel_delayed_work_sync(&to_tcp_ctrl(ctrl)->connect_work);
+}
+
+static void nvme_tcp_free_ctrl(struct nvme_ctrl *nctrl)
+{
+ struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
+
+ if (list_empty(&ctrl->list))
+ goto free_ctrl;
+
+ mutex_lock(&nvme_tcp_ctrl_mutex);
+ list_del(&ctrl->list);
+ mutex_unlock(&nvme_tcp_ctrl_mutex);
+
+ nvmf_free_options(nctrl->opts);
+free_ctrl:
+ kfree(ctrl->queues);
+ kfree(ctrl);
+}
+
+static void nvme_tcp_set_sg_null(struct nvme_command *c)
+{
+ struct nvme_sgl_desc *sg = &c->common.dptr.sgl;
+
+ sg->addr = 0;
+ sg->length = 0;
+ sg->type = (NVME_TRANSPORT_SGL_DATA_DESC << 4) |
+ NVME_SGL_FMT_TRANSPORT_A;
+}
+
+static void nvme_tcp_set_sg_inline(struct nvme_tcp_queue *queue,
+ struct nvme_command *c, u32 data_len)
+{
+ struct nvme_sgl_desc *sg = &c->common.dptr.sgl;
+
+ sg->addr = cpu_to_le64(queue->ctrl->ctrl.icdoff);
+ sg->length = cpu_to_le32(data_len);
+ sg->type = (NVME_SGL_FMT_DATA_DESC << 4) | NVME_SGL_FMT_OFFSET;
+}
+
+static void nvme_tcp_set_sg_host_data(struct nvme_command *c,
+ u32 data_len)
+{
+ struct nvme_sgl_desc *sg = &c->common.dptr.sgl;
+
+ sg->addr = 0;
+ sg->length = cpu_to_le32(data_len);
+ sg->type = (NVME_TRANSPORT_SGL_DATA_DESC << 4) |
+ NVME_SGL_FMT_TRANSPORT_A;
+}
+
+static void nvme_tcp_submit_async_event(struct nvme_ctrl *arg)
+{
+ struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(arg);
+ struct nvme_tcp_queue *queue = &ctrl->queues[0];
+ struct nvme_tcp_cmd_pdu *pdu = ctrl->async_req.pdu;
+ struct nvme_command *cmd = &pdu->cmd;
+ u8 hdgst = nvme_tcp_hdgst_len(queue);
+
+ memset(pdu, 0, sizeof(*pdu));
+ pdu->hdr.type = nvme_tcp_cmd;
+ if (queue->hdr_digest)
+ pdu->hdr.flags |= NVME_TCP_F_HDGST;
+ pdu->hdr.hlen = sizeof(*pdu);
+ pdu->hdr.plen = cpu_to_le32(pdu->hdr.hlen + hdgst);
+
+ cmd->common.opcode = nvme_admin_async_event;
+ cmd->common.command_id = NVME_AQ_BLK_MQ_DEPTH;
+ cmd->common.flags |= NVME_CMD_SGL_METABUF;
+ nvme_tcp_set_sg_null(cmd);
+
+ ctrl->async_req.state = NVME_TCP_SEND_CMD_PDU;
+ ctrl->async_req.offset = 0;
+ ctrl->async_req.curr_bio = NULL;
+ ctrl->async_req.data_len = 0;
+
+ nvme_tcp_queue_request(&ctrl->async_req);
+}
+
+static enum blk_eh_timer_return
+nvme_tcp_timeout(struct request *rq, bool reserved)
+{
+ struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
+ struct nvme_tcp_ctrl *ctrl = req->queue->ctrl;
+ struct nvme_tcp_cmd_pdu *pdu = req->pdu;
+
+ dev_dbg(ctrl->ctrl.device,
+ "queue %d: timeout request %#x type %d\n",
+ nvme_tcp_queue_id(req->queue), rq->tag,
+ pdu->hdr.type);
+
+ if (ctrl->ctrl.state != NVME_CTRL_LIVE) {
+ union nvme_result res = {};
+
+ nvme_req(rq)->flags |= NVME_REQ_CANCELLED;
+ nvme_end_request(rq, cpu_to_le16(NVME_SC_ABORT_REQ), res);
+ return BLK_EH_DONE;
+ }
+
+ /* queue error recovery */
+ nvme_tcp_error_recovery(&ctrl->ctrl);
+
+ return BLK_EH_RESET_TIMER;
+}
+
+static blk_status_t nvme_tcp_map_data(struct nvme_tcp_queue *queue,
+ struct request *rq)
+{
+ struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
+ struct nvme_tcp_cmd_pdu *pdu = req->pdu;
+ struct nvme_command *c = &pdu->cmd;
+
+ c->common.flags |= NVME_CMD_SGL_METABUF;
+
+ if (rq_data_dir(rq) == WRITE && req->data_len &&
+ req->data_len <= nvme_tcp_inline_data_size(queue))
+ nvme_tcp_set_sg_inline(queue, c, req->data_len);
+ else
+ nvme_tcp_set_sg_host_data(c, req->data_len);
+
+ return 0;
+}
+
+static blk_status_t nvme_tcp_setup_cmd_pdu(struct nvme_ns *ns,
+ struct request *rq)
+{
+ struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
+ struct nvme_tcp_cmd_pdu *pdu = req->pdu;
+ struct nvme_tcp_queue *queue = req->queue;
+ u8 hdgst = nvme_tcp_hdgst_len(queue), ddgst = 0;
+ blk_status_t ret;
+
+ ret = nvme_setup_cmd(ns, rq, &pdu->cmd);
+ if (ret)
+ return ret;
+
+ req->state = NVME_TCP_SEND_CMD_PDU;
+ req->offset = 0;
+ req->data_sent = 0;
+ req->pdu_len = 0;
+ req->pdu_sent = 0;
+ req->data_len = blk_rq_payload_bytes(rq);
+ req->curr_bio = rq->bio;
+
+ if (rq_data_dir(rq) == WRITE &&
+ req->data_len <= nvme_tcp_inline_data_size(queue))
+ req->pdu_len = req->data_len;
+ else if (req->curr_bio)
+ nvme_tcp_init_iter(req, READ);
+
+ pdu->hdr.type = nvme_tcp_cmd;
+ pdu->hdr.flags = 0;
+ if (queue->hdr_digest)
+ pdu->hdr.flags |= NVME_TCP_F_HDGST;
+ if (queue->data_digest && req->pdu_len) {
+ pdu->hdr.flags |= NVME_TCP_F_DDGST;
+ ddgst = nvme_tcp_ddgst_len(queue);
+ }
+ pdu->hdr.hlen = sizeof(*pdu);
+ pdu->hdr.pdo = req->pdu_len ? pdu->hdr.hlen + hdgst : 0;
+ pdu->hdr.plen =
+ cpu_to_le32(pdu->hdr.hlen + hdgst + req->pdu_len + ddgst);
+
+ ret = nvme_tcp_map_data(queue, rq);
+ if (unlikely(ret)) {
+ dev_err(queue->ctrl->ctrl.device,
+ "Failed to map data (%d)\n", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx,
+ const struct blk_mq_queue_data *bd)
+{
+ struct nvme_ns *ns = hctx->queue->queuedata;
+ struct nvme_tcp_queue *queue = hctx->driver_data;
+ struct request *rq = bd->rq;
+ struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
+ bool queue_ready = test_bit(NVME_TCP_Q_LIVE, &queue->flags);
+ blk_status_t ret;
+
+ if (!nvmf_check_ready(&queue->ctrl->ctrl, rq, queue_ready))
+ return nvmf_fail_nonready_command(&queue->ctrl->ctrl, rq);
+
+ ret = nvme_tcp_setup_cmd_pdu(ns, rq);
+ if (unlikely(ret))
+ return ret;
+
+ blk_mq_start_request(rq);
+
+ nvme_tcp_queue_request(req);
+
+ return BLK_STS_OK;
+}
+
+static int nvme_tcp_map_queues(struct blk_mq_tag_set *set)
+{
+ struct nvme_tcp_ctrl *ctrl = set->driver_data;
+
+ set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
+ set->map[HCTX_TYPE_READ].nr_queues = ctrl->ctrl.opts->nr_io_queues;
+ if (ctrl->ctrl.opts->nr_write_queues) {
+ /* separate read/write queues */
+ set->map[HCTX_TYPE_DEFAULT].nr_queues =
+ ctrl->ctrl.opts->nr_write_queues;
+ set->map[HCTX_TYPE_READ].queue_offset =
+ ctrl->ctrl.opts->nr_write_queues;
+ } else {
+ /* mixed read/write queues */
+ set->map[HCTX_TYPE_DEFAULT].nr_queues =
+ ctrl->ctrl.opts->nr_io_queues;
+ set->map[HCTX_TYPE_READ].queue_offset = 0;
+ }
+ blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]);
+ blk_mq_map_queues(&set->map[HCTX_TYPE_READ]);
+ return 0;
+}
+
+static struct blk_mq_ops nvme_tcp_mq_ops = {
+ .queue_rq = nvme_tcp_queue_rq,
+ .complete = nvme_complete_rq,
+ .init_request = nvme_tcp_init_request,
+ .exit_request = nvme_tcp_exit_request,
+ .init_hctx = nvme_tcp_init_hctx,
+ .timeout = nvme_tcp_timeout,
+ .map_queues = nvme_tcp_map_queues,
+};
+
+static struct blk_mq_ops nvme_tcp_admin_mq_ops = {
+ .queue_rq = nvme_tcp_queue_rq,
+ .complete = nvme_complete_rq,
+ .init_request = nvme_tcp_init_request,
+ .exit_request = nvme_tcp_exit_request,
+ .init_hctx = nvme_tcp_init_admin_hctx,
+ .timeout = nvme_tcp_timeout,
+};
+
+static const struct nvme_ctrl_ops nvme_tcp_ctrl_ops = {
+ .name = "tcp",
+ .module = THIS_MODULE,
+ .flags = NVME_F_FABRICS,
+ .reg_read32 = nvmf_reg_read32,
+ .reg_read64 = nvmf_reg_read64,
+ .reg_write32 = nvmf_reg_write32,
+ .free_ctrl = nvme_tcp_free_ctrl,
+ .submit_async_event = nvme_tcp_submit_async_event,
+ .delete_ctrl = nvme_tcp_delete_ctrl,
+ .get_address = nvmf_get_address,
+ .stop_ctrl = nvme_tcp_stop_ctrl,
+};
+
+static bool
+nvme_tcp_existing_controller(struct nvmf_ctrl_options *opts)
+{
+ struct nvme_tcp_ctrl *ctrl;
+ bool found = false;
+
+ mutex_lock(&nvme_tcp_ctrl_mutex);
+ list_for_each_entry(ctrl, &nvme_tcp_ctrl_list, list) {
+ found = nvmf_ip_options_match(&ctrl->ctrl, opts);
+ if (found)
+ break;
+ }
+ mutex_unlock(&nvme_tcp_ctrl_mutex);
+
+ return found;
+}
+
+static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
+ struct nvmf_ctrl_options *opts)
+{
+ struct nvme_tcp_ctrl *ctrl;
+ int ret;
+
+ ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
+ if (!ctrl)
+ return ERR_PTR(-ENOMEM);
+
+ INIT_LIST_HEAD(&ctrl->list);
+ ctrl->ctrl.opts = opts;
+ ctrl->ctrl.queue_count = opts->nr_io_queues + opts->nr_write_queues + 1;
+ ctrl->ctrl.sqsize = opts->queue_size - 1;
+ ctrl->ctrl.kato = opts->kato;
+
+ INIT_DELAYED_WORK(&ctrl->connect_work,
+ nvme_tcp_reconnect_ctrl_work);
+ INIT_WORK(&ctrl->err_work, nvme_tcp_error_recovery_work);
+ INIT_WORK(&ctrl->ctrl.reset_work, nvme_reset_ctrl_work);
+
+ if (!(opts->mask & NVMF_OPT_TRSVCID)) {
+ opts->trsvcid =
+ kstrdup(__stringify(NVME_TCP_DISC_PORT), GFP_KERNEL);
+ if (!opts->trsvcid) {
+ ret = -ENOMEM;
+ goto out_free_ctrl;
+ }
+ opts->mask |= NVMF_OPT_TRSVCID;
+ }
+
+ ret = inet_pton_with_scope(&init_net, AF_UNSPEC,
+ opts->traddr, opts->trsvcid, &ctrl->addr);
+ if (ret) {
+ pr_err("malformed address passed: %s:%s\n",
+ opts->traddr, opts->trsvcid);
+ goto out_free_ctrl;
+ }
+
+ if (opts->mask & NVMF_OPT_HOST_TRADDR) {
+ ret = inet_pton_with_scope(&init_net, AF_UNSPEC,
+ opts->host_traddr, NULL, &ctrl->src_addr);
+ if (ret) {
+ pr_err("malformed src address passed: %s\n",
+ opts->host_traddr);
+ goto out_free_ctrl;
+ }
+ }
+
+ if (!opts->duplicate_connect && nvme_tcp_existing_controller(opts)) {
+ ret = -EALREADY;
+ goto out_free_ctrl;
+ }
+
+ ctrl->queues = kcalloc(ctrl->ctrl.queue_count, sizeof(*ctrl->queues),
+ GFP_KERNEL);
+ if (!ctrl->queues) {
+ ret = -ENOMEM;
+ goto out_free_ctrl;
+ }
+
+ ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_tcp_ctrl_ops, 0);
+ if (ret)
+ goto out_kfree_queues;
+
+ if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
+ WARN_ON_ONCE(1);
+ ret = -EINTR;
+ goto out_uninit_ctrl;
+ }
+
+ ret = nvme_tcp_setup_ctrl(&ctrl->ctrl, true);
+ if (ret)
+ goto out_uninit_ctrl;
+
+ dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISp\n",
+ ctrl->ctrl.opts->subsysnqn, &ctrl->addr);
+
+ nvme_get_ctrl(&ctrl->ctrl);
+
+ mutex_lock(&nvme_tcp_ctrl_mutex);
+ list_add_tail(&ctrl->list, &nvme_tcp_ctrl_list);
+ mutex_unlock(&nvme_tcp_ctrl_mutex);
+
+ return &ctrl->ctrl;
+
+out_uninit_ctrl:
+ nvme_uninit_ctrl(&ctrl->ctrl);
+ nvme_put_ctrl(&ctrl->ctrl);
+ if (ret > 0)
+ ret = -EIO;
+ return ERR_PTR(ret);
+out_kfree_queues:
+ kfree(ctrl->queues);
+out_free_ctrl:
+ kfree(ctrl);
+ return ERR_PTR(ret);
+}
+
+static struct nvmf_transport_ops nvme_tcp_transport = {
+ .name = "tcp",
+ .module = THIS_MODULE,
+ .required_opts = NVMF_OPT_TRADDR,
+ .allowed_opts = NVMF_OPT_TRSVCID | NVMF_OPT_RECONNECT_DELAY |
+ NVMF_OPT_HOST_TRADDR | NVMF_OPT_CTRL_LOSS_TMO |
+ NVMF_OPT_HDR_DIGEST | NVMF_OPT_DATA_DIGEST |
+ NVMF_OPT_NR_WRITE_QUEUES,
+ .create_ctrl = nvme_tcp_create_ctrl,
+};
+
+static int __init nvme_tcp_init_module(void)
+{
+ nvme_tcp_wq = alloc_workqueue("nvme_tcp_wq",
+ WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
+ if (!nvme_tcp_wq)
+ return -ENOMEM;
+
+ nvmf_register_transport(&nvme_tcp_transport);
+ return 0;
+}
+
+static void __exit nvme_tcp_cleanup_module(void)
+{
+ struct nvme_tcp_ctrl *ctrl;
+
+ nvmf_unregister_transport(&nvme_tcp_transport);
+
+ mutex_lock(&nvme_tcp_ctrl_mutex);
+ list_for_each_entry(ctrl, &nvme_tcp_ctrl_list, list)
+ nvme_delete_ctrl(&ctrl->ctrl);
+ mutex_unlock(&nvme_tcp_ctrl_mutex);
+ flush_workqueue(nvme_delete_wq);
+
+ destroy_workqueue(nvme_tcp_wq);
+}
+
+module_init(nvme_tcp_init_module);
+module_exit(nvme_tcp_cleanup_module);
+
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/nvme/host/trace.c b/drivers/nvme/host/trace.c
index 25b0e310f4a8..5566dda3237a 100644
--- a/drivers/nvme/host/trace.c
+++ b/drivers/nvme/host/trace.c
@@ -139,3 +139,6 @@ const char *nvme_trace_disk_name(struct trace_seq *p, char *name)
return ret;
}
+EXPORT_SYMBOL_GPL(nvme_trace_disk_name);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(nvme_sq);
diff --git a/drivers/nvme/host/trace.h b/drivers/nvme/host/trace.h
index 196d5bd56718..3564120aa7b3 100644
--- a/drivers/nvme/host/trace.h
+++ b/drivers/nvme/host/trace.h
@@ -115,8 +115,8 @@ TRACE_EVENT(nvme_setup_cmd,
__entry->nsid = le32_to_cpu(cmd->common.nsid);
__entry->metadata = le64_to_cpu(cmd->common.metadata);
__assign_disk_name(__entry->disk, req->rq_disk);
- memcpy(__entry->cdw10, cmd->common.cdw10,
- sizeof(__entry->cdw10));
+ memcpy(__entry->cdw10, &cmd->common.cdw10,
+ 6 * sizeof(__entry->cdw10));
),
TP_printk("nvme%d: %sqid=%d, cmdid=%u, nsid=%u, flags=0x%x, meta=0x%llx, cmd=(%s %s)",
__entry->ctrl_id, __print_disk_name(__entry->disk),
@@ -184,6 +184,29 @@ TRACE_EVENT(nvme_async_event,
#undef aer_name
+TRACE_EVENT(nvme_sq,
+ TP_PROTO(struct request *req, __le16 sq_head, int sq_tail),
+ TP_ARGS(req, sq_head, sq_tail),
+ TP_STRUCT__entry(
+ __field(int, ctrl_id)
+ __array(char, disk, DISK_NAME_LEN)
+ __field(int, qid)
+ __field(u16, sq_head)
+ __field(u16, sq_tail)
+ ),
+ TP_fast_assign(
+ __entry->ctrl_id = nvme_req(req)->ctrl->instance;
+ __assign_disk_name(__entry->disk, req->rq_disk);
+ __entry->qid = nvme_req_qid(req);
+ __entry->sq_head = le16_to_cpu(sq_head);
+ __entry->sq_tail = sq_tail;
+ ),
+ TP_printk("nvme%d: %sqid=%d, head=%u, tail=%u",
+ __entry->ctrl_id, __print_disk_name(__entry->disk),
+ __entry->qid, __entry->sq_head, __entry->sq_tail
+ )
+);
+
#endif /* _TRACE_NVME_H */
#undef TRACE_INCLUDE_PATH
diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig
index 3c7b61ddb0d1..d94f25cde019 100644
--- a/drivers/nvme/target/Kconfig
+++ b/drivers/nvme/target/Kconfig
@@ -60,3 +60,13 @@ config NVME_TARGET_FCLOOP
to test NVMe-FC transport interfaces.
If unsure, say N.
+
+config NVME_TARGET_TCP
+ tristate "NVMe over Fabrics TCP target support"
+ depends on INET
+ depends on NVME_TARGET
+ help
+ This enables the NVMe TCP target support, which allows exporting NVMe
+ devices over TCP.
+
+ If unsure, say N.
diff --git a/drivers/nvme/target/Makefile b/drivers/nvme/target/Makefile
index 8118c93391c6..8c3ad0fb6860 100644
--- a/drivers/nvme/target/Makefile
+++ b/drivers/nvme/target/Makefile
@@ -5,6 +5,7 @@ obj-$(CONFIG_NVME_TARGET_LOOP) += nvme-loop.o
obj-$(CONFIG_NVME_TARGET_RDMA) += nvmet-rdma.o
obj-$(CONFIG_NVME_TARGET_FC) += nvmet-fc.o
obj-$(CONFIG_NVME_TARGET_FCLOOP) += nvme-fcloop.o
+obj-$(CONFIG_NVME_TARGET_TCP) += nvmet-tcp.o
nvmet-y += core.o configfs.o admin-cmd.o fabrics-cmd.o \
discovery.o io-cmd-file.o io-cmd-bdev.o
@@ -12,3 +13,4 @@ nvme-loop-y += loop.o
nvmet-rdma-y += rdma.o
nvmet-fc-y += fc.o
nvme-fcloop-y += fcloop.o
+nvmet-tcp-y += tcp.o
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index 1179f6314323..11baeb14c388 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -19,19 +19,6 @@
#include <asm/unaligned.h>
#include "nvmet.h"
-/*
- * This helper allows us to clear the AEN based on the RAE bit,
- * Please use this helper when processing the log pages which are
- * associated with the AEN.
- */
-static inline void nvmet_clear_aen(struct nvmet_req *req, u32 aen_bit)
-{
- int rae = le32_to_cpu(req->cmd->common.cdw10[0]) & 1 << 15;
-
- if (!rae)
- clear_bit(aen_bit, &req->sq->ctrl->aen_masked);
-}
-
u32 nvmet_get_log_page_len(struct nvme_command *cmd)
{
u32 len = le16_to_cpu(cmd->get_log_page.numdu);
@@ -50,6 +37,34 @@ static void nvmet_execute_get_log_page_noop(struct nvmet_req *req)
nvmet_req_complete(req, nvmet_zero_sgl(req, 0, req->data_len));
}
+static void nvmet_execute_get_log_page_error(struct nvmet_req *req)
+{
+ struct nvmet_ctrl *ctrl = req->sq->ctrl;
+ u16 status = NVME_SC_SUCCESS;
+ unsigned long flags;
+ off_t offset = 0;
+ u64 slot;
+ u64 i;
+
+ spin_lock_irqsave(&ctrl->error_lock, flags);
+ slot = ctrl->err_counter % NVMET_ERROR_LOG_SLOTS;
+
+ for (i = 0; i < NVMET_ERROR_LOG_SLOTS; i++) {
+ status = nvmet_copy_to_sgl(req, offset, &ctrl->slots[slot],
+ sizeof(struct nvme_error_slot));
+ if (status)
+ break;
+
+ if (slot == 0)
+ slot = NVMET_ERROR_LOG_SLOTS - 1;
+ else
+ slot--;
+ offset += sizeof(struct nvme_error_slot);
+ }
+ spin_unlock_irqrestore(&ctrl->error_lock, flags);
+ nvmet_req_complete(req, status);
+}
+
static u16 nvmet_get_smart_log_nsid(struct nvmet_req *req,
struct nvme_smart_log *slog)
{
@@ -60,6 +75,7 @@ static u16 nvmet_get_smart_log_nsid(struct nvmet_req *req,
if (!ns) {
pr_err("Could not find namespace id : %d\n",
le32_to_cpu(req->cmd->get_log_page.nsid));
+ req->error_loc = offsetof(struct nvme_rw_command, nsid);
return NVME_SC_INVALID_NS;
}
@@ -119,6 +135,7 @@ static void nvmet_execute_get_log_page_smart(struct nvmet_req *req)
{
struct nvme_smart_log *log;
u16 status = NVME_SC_INTERNAL;
+ unsigned long flags;
if (req->data_len != sizeof(*log))
goto out;
@@ -134,6 +151,11 @@ static void nvmet_execute_get_log_page_smart(struct nvmet_req *req)
if (status)
goto out_free_log;
+ spin_lock_irqsave(&req->sq->ctrl->error_lock, flags);
+ put_unaligned_le64(req->sq->ctrl->err_counter,
+ &log->num_err_log_entries);
+ spin_unlock_irqrestore(&req->sq->ctrl->error_lock, flags);
+
status = nvmet_copy_to_sgl(req, 0, log, sizeof(*log));
out_free_log:
kfree(log);
@@ -189,7 +211,7 @@ static void nvmet_execute_get_log_changed_ns(struct nvmet_req *req)
if (!status)
status = nvmet_zero_sgl(req, len, req->data_len - len);
ctrl->nr_changed_ns = 0;
- nvmet_clear_aen(req, NVME_AEN_CFG_NS_ATTR);
+ nvmet_clear_aen_bit(req, NVME_AEN_BIT_NS_ATTR);
mutex_unlock(&ctrl->lock);
out:
nvmet_req_complete(req, status);
@@ -252,7 +274,7 @@ static void nvmet_execute_get_log_page_ana(struct nvmet_req *req)
hdr.chgcnt = cpu_to_le64(nvmet_ana_chgcnt);
hdr.ngrps = cpu_to_le16(ngrps);
- nvmet_clear_aen(req, NVME_AEN_CFG_ANA_CHANGE);
+ nvmet_clear_aen_bit(req, NVME_AEN_BIT_ANA_CHANGE);
up_read(&nvmet_ana_sem);
kfree(desc);
@@ -304,7 +326,8 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
/* XXX: figure out what to do about RTD3R/RTD3 */
id->oaes = cpu_to_le32(NVMET_AEN_CFG_OPTIONAL);
- id->ctratt = cpu_to_le32(1 << 0);
+ id->ctratt = cpu_to_le32(NVME_CTRL_ATTR_HID_128_BIT |
+ NVME_CTRL_ATTR_TBKAS);
id->oacs = 0;
@@ -392,6 +415,7 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req)
u16 status = 0;
if (le32_to_cpu(req->cmd->identify.nsid) == NVME_NSID_ALL) {
+ req->error_loc = offsetof(struct nvme_identify, nsid);
status = NVME_SC_INVALID_NS | NVME_SC_DNR;
goto out;
}
@@ -512,6 +536,7 @@ static void nvmet_execute_identify_desclist(struct nvmet_req *req)
ns = nvmet_find_namespace(req->sq->ctrl, req->cmd->identify.nsid);
if (!ns) {
+ req->error_loc = offsetof(struct nvme_identify, nsid);
status = NVME_SC_INVALID_NS | NVME_SC_DNR;
goto out;
}
@@ -569,13 +594,15 @@ static u16 nvmet_write_protect_flush_sync(struct nvmet_req *req)
static u16 nvmet_set_feat_write_protect(struct nvmet_req *req)
{
- u32 write_protect = le32_to_cpu(req->cmd->common.cdw10[1]);
+ u32 write_protect = le32_to_cpu(req->cmd->common.cdw11);
struct nvmet_subsys *subsys = req->sq->ctrl->subsys;
u16 status = NVME_SC_FEATURE_NOT_CHANGEABLE;
req->ns = nvmet_find_namespace(req->sq->ctrl, req->cmd->rw.nsid);
- if (unlikely(!req->ns))
+ if (unlikely(!req->ns)) {
+ req->error_loc = offsetof(struct nvme_common_command, nsid);
return status;
+ }
mutex_lock(&subsys->lock);
switch (write_protect) {
@@ -599,11 +626,36 @@ static u16 nvmet_set_feat_write_protect(struct nvmet_req *req)
return status;
}
+u16 nvmet_set_feat_kato(struct nvmet_req *req)
+{
+ u32 val32 = le32_to_cpu(req->cmd->common.cdw11);
+
+ req->sq->ctrl->kato = DIV_ROUND_UP(val32, 1000);
+
+ nvmet_set_result(req, req->sq->ctrl->kato);
+
+ return 0;
+}
+
+u16 nvmet_set_feat_async_event(struct nvmet_req *req, u32 mask)
+{
+ u32 val32 = le32_to_cpu(req->cmd->common.cdw11);
+
+ if (val32 & ~mask) {
+ req->error_loc = offsetof(struct nvme_common_command, cdw11);
+ return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ }
+
+ WRITE_ONCE(req->sq->ctrl->aen_enabled, val32);
+ nvmet_set_result(req, val32);
+
+ return 0;
+}
+
static void nvmet_execute_set_features(struct nvmet_req *req)
{
struct nvmet_subsys *subsys = req->sq->ctrl->subsys;
- u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10[0]);
- u32 val32;
+ u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10);
u16 status = 0;
switch (cdw10 & 0xff) {
@@ -612,19 +664,10 @@ static void nvmet_execute_set_features(struct nvmet_req *req)
(subsys->max_qid - 1) | ((subsys->max_qid - 1) << 16));
break;
case NVME_FEAT_KATO:
- val32 = le32_to_cpu(req->cmd->common.cdw10[1]);
- req->sq->ctrl->kato = DIV_ROUND_UP(val32, 1000);
- nvmet_set_result(req, req->sq->ctrl->kato);
+ status = nvmet_set_feat_kato(req);
break;
case NVME_FEAT_ASYNC_EVENT:
- val32 = le32_to_cpu(req->cmd->common.cdw10[1]);
- if (val32 & ~NVMET_AEN_CFG_ALL) {
- status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
- break;
- }
-
- WRITE_ONCE(req->sq->ctrl->aen_enabled, val32);
- nvmet_set_result(req, val32);
+ status = nvmet_set_feat_async_event(req, NVMET_AEN_CFG_ALL);
break;
case NVME_FEAT_HOST_ID:
status = NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
@@ -633,6 +676,7 @@ static void nvmet_execute_set_features(struct nvmet_req *req)
status = nvmet_set_feat_write_protect(req);
break;
default:
+ req->error_loc = offsetof(struct nvme_common_command, cdw10);
status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
break;
}
@@ -646,9 +690,10 @@ static u16 nvmet_get_feat_write_protect(struct nvmet_req *req)
u32 result;
req->ns = nvmet_find_namespace(req->sq->ctrl, req->cmd->common.nsid);
- if (!req->ns)
+ if (!req->ns) {
+ req->error_loc = offsetof(struct nvme_common_command, nsid);
return NVME_SC_INVALID_NS | NVME_SC_DNR;
-
+ }
mutex_lock(&subsys->lock);
if (req->ns->readonly == true)
result = NVME_NS_WRITE_PROTECT;
@@ -660,10 +705,20 @@ static u16 nvmet_get_feat_write_protect(struct nvmet_req *req)
return 0;
}
+void nvmet_get_feat_kato(struct nvmet_req *req)
+{
+ nvmet_set_result(req, req->sq->ctrl->kato * 1000);
+}
+
+void nvmet_get_feat_async_event(struct nvmet_req *req)
+{
+ nvmet_set_result(req, READ_ONCE(req->sq->ctrl->aen_enabled));
+}
+
static void nvmet_execute_get_features(struct nvmet_req *req)
{
struct nvmet_subsys *subsys = req->sq->ctrl->subsys;
- u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10[0]);
+ u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10);
u16 status = 0;
switch (cdw10 & 0xff) {
@@ -689,7 +744,7 @@ static void nvmet_execute_get_features(struct nvmet_req *req)
break;
#endif
case NVME_FEAT_ASYNC_EVENT:
- nvmet_set_result(req, READ_ONCE(req->sq->ctrl->aen_enabled));
+ nvmet_get_feat_async_event(req);
break;
case NVME_FEAT_VOLATILE_WC:
nvmet_set_result(req, 1);
@@ -699,11 +754,13 @@ static void nvmet_execute_get_features(struct nvmet_req *req)
(subsys->max_qid-1) | ((subsys->max_qid-1) << 16));
break;
case NVME_FEAT_KATO:
- nvmet_set_result(req, req->sq->ctrl->kato * 1000);
+ nvmet_get_feat_kato(req);
break;
case NVME_FEAT_HOST_ID:
/* need 128-bit host identifier flag */
- if (!(req->cmd->common.cdw10[1] & cpu_to_le32(1 << 0))) {
+ if (!(req->cmd->common.cdw11 & cpu_to_le32(1 << 0))) {
+ req->error_loc =
+ offsetof(struct nvme_common_command, cdw11);
status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
break;
}
@@ -715,6 +772,8 @@ static void nvmet_execute_get_features(struct nvmet_req *req)
status = nvmet_get_feat_write_protect(req);
break;
default:
+ req->error_loc =
+ offsetof(struct nvme_common_command, cdw10);
status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
break;
}
@@ -722,7 +781,7 @@ static void nvmet_execute_get_features(struct nvmet_req *req)
nvmet_req_complete(req, status);
}
-static void nvmet_execute_async_event(struct nvmet_req *req)
+void nvmet_execute_async_event(struct nvmet_req *req)
{
struct nvmet_ctrl *ctrl = req->sq->ctrl;
@@ -738,7 +797,7 @@ static void nvmet_execute_async_event(struct nvmet_req *req)
schedule_work(&ctrl->async_event_work);
}
-static void nvmet_execute_keep_alive(struct nvmet_req *req)
+void nvmet_execute_keep_alive(struct nvmet_req *req)
{
struct nvmet_ctrl *ctrl = req->sq->ctrl;
@@ -764,13 +823,7 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req)
switch (cmd->get_log_page.lid) {
case NVME_LOG_ERROR:
- /*
- * We currently never set the More bit in the status
- * field, so all error log entries are invalid and can
- * be zeroed out. This is called a minum viable
- * implementation (TM) of this mandatory log page.
- */
- req->execute = nvmet_execute_get_log_page_noop;
+ req->execute = nvmet_execute_get_log_page_error;
return 0;
case NVME_LOG_SMART:
req->execute = nvmet_execute_get_log_page_smart;
@@ -836,5 +889,6 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req)
pr_err("unhandled cmd %d on qid %d\n", cmd->common.opcode,
req->sq->qid);
+ req->error_loc = offsetof(struct nvme_common_command, opcode);
return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
}
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index d895579b6c5d..618bbd006544 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -25,12 +25,16 @@
static const struct config_item_type nvmet_host_type;
static const struct config_item_type nvmet_subsys_type;
+static LIST_HEAD(nvmet_ports_list);
+struct list_head *nvmet_ports = &nvmet_ports_list;
+
static const struct nvmet_transport_name {
u8 type;
const char *name;
} nvmet_transport_names[] = {
{ NVMF_TRTYPE_RDMA, "rdma" },
{ NVMF_TRTYPE_FC, "fc" },
+ { NVMF_TRTYPE_TCP, "tcp" },
{ NVMF_TRTYPE_LOOP, "loop" },
};
@@ -150,7 +154,8 @@ CONFIGFS_ATTR(nvmet_, addr_traddr);
static ssize_t nvmet_addr_treq_show(struct config_item *item,
char *page)
{
- switch (to_nvmet_port(item)->disc_addr.treq) {
+ switch (to_nvmet_port(item)->disc_addr.treq &
+ NVME_TREQ_SECURE_CHANNEL_MASK) {
case NVMF_TREQ_NOT_SPECIFIED:
return sprintf(page, "not specified\n");
case NVMF_TREQ_REQUIRED:
@@ -166,6 +171,7 @@ static ssize_t nvmet_addr_treq_store(struct config_item *item,
const char *page, size_t count)
{
struct nvmet_port *port = to_nvmet_port(item);
+ u8 treq = port->disc_addr.treq & ~NVME_TREQ_SECURE_CHANNEL_MASK;
if (port->enabled) {
pr_err("Cannot modify address while enabled\n");
@@ -174,15 +180,16 @@ static ssize_t nvmet_addr_treq_store(struct config_item *item,
}
if (sysfs_streq(page, "not specified")) {
- port->disc_addr.treq = NVMF_TREQ_NOT_SPECIFIED;
+ treq |= NVMF_TREQ_NOT_SPECIFIED;
} else if (sysfs_streq(page, "required")) {
- port->disc_addr.treq = NVMF_TREQ_REQUIRED;
+ treq |= NVMF_TREQ_REQUIRED;
} else if (sysfs_streq(page, "not required")) {
- port->disc_addr.treq = NVMF_TREQ_NOT_REQUIRED;
+ treq |= NVMF_TREQ_NOT_REQUIRED;
} else {
pr_err("Invalid value '%s' for treq\n", page);
return -EINVAL;
}
+ port->disc_addr.treq = treq;
return count;
}
@@ -646,7 +653,8 @@ static int nvmet_port_subsys_allow_link(struct config_item *parent,
}
list_add_tail(&link->entry, &port->subsystems);
- nvmet_genctr++;
+ nvmet_port_disc_changed(port, subsys);
+
up_write(&nvmet_config_sem);
return 0;
@@ -673,7 +681,8 @@ static void nvmet_port_subsys_drop_link(struct config_item *parent,
found:
list_del(&p->entry);
- nvmet_genctr++;
+ nvmet_port_disc_changed(port, subsys);
+
if (list_empty(&port->subsystems))
nvmet_disable_port(port);
up_write(&nvmet_config_sem);
@@ -722,7 +731,8 @@ static int nvmet_allowed_hosts_allow_link(struct config_item *parent,
goto out_free_link;
}
list_add_tail(&link->entry, &subsys->hosts);
- nvmet_genctr++;
+ nvmet_subsys_disc_changed(subsys, host);
+
up_write(&nvmet_config_sem);
return 0;
out_free_link:
@@ -748,7 +758,8 @@ static void nvmet_allowed_hosts_drop_link(struct config_item *parent,
found:
list_del(&p->entry);
- nvmet_genctr++;
+ nvmet_subsys_disc_changed(subsys, host);
+
up_write(&nvmet_config_sem);
kfree(p);
}
@@ -787,7 +798,11 @@ static ssize_t nvmet_subsys_attr_allow_any_host_store(struct config_item *item,
goto out_unlock;
}
- subsys->allow_any_host = allow_any_host;
+ if (subsys->allow_any_host != allow_any_host) {
+ subsys->allow_any_host = allow_any_host;
+ nvmet_subsys_disc_changed(subsys, NULL);
+ }
+
out_unlock:
up_write(&nvmet_config_sem);
return ret ? ret : count;
@@ -936,7 +951,7 @@ static ssize_t nvmet_referral_enable_store(struct config_item *item,
if (enable)
nvmet_referral_enable(parent, port);
else
- nvmet_referral_disable(port);
+ nvmet_referral_disable(parent, port);
return count;
inval:
@@ -962,9 +977,10 @@ static struct configfs_attribute *nvmet_referral_attrs[] = {
static void nvmet_referral_release(struct config_item *item)
{
+ struct nvmet_port *parent = to_nvmet_port(item->ci_parent->ci_parent);
struct nvmet_port *port = to_nvmet_port(item);
- nvmet_referral_disable(port);
+ nvmet_referral_disable(parent, port);
kfree(port);
}
@@ -1137,6 +1153,8 @@ static void nvmet_port_release(struct config_item *item)
{
struct nvmet_port *port = to_nvmet_port(item);
+ list_del(&port->global_entry);
+
kfree(port->ana_state);
kfree(port);
}
@@ -1189,12 +1207,15 @@ static struct config_group *nvmet_ports_make(struct config_group *group,
port->ana_state[i] = NVME_ANA_INACCESSIBLE;
}
+ list_add(&port->global_entry, &nvmet_ports_list);
+
INIT_LIST_HEAD(&port->entry);
INIT_LIST_HEAD(&port->subsystems);
INIT_LIST_HEAD(&port->referrals);
port->inline_data_size = -1; /* < 0 == let the transport choose */
port->disc_addr.portid = cpu_to_le16(portid);
+ port->disc_addr.treq = NVMF_TREQ_DISABLE_SQFLOW;
config_group_init_type_name(&port->group, name, &nvmet_port_type);
config_group_init_type_name(&port->subsys_group,
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index a5f9bbce863f..88d260f31835 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -45,28 +45,72 @@ u32 nvmet_ana_group_enabled[NVMET_MAX_ANAGRPS + 1];
u64 nvmet_ana_chgcnt;
DECLARE_RWSEM(nvmet_ana_sem);
+inline u16 errno_to_nvme_status(struct nvmet_req *req, int errno)
+{
+ u16 status;
+
+ switch (errno) {
+ case -ENOSPC:
+ req->error_loc = offsetof(struct nvme_rw_command, length);
+ status = NVME_SC_CAP_EXCEEDED | NVME_SC_DNR;
+ break;
+ case -EREMOTEIO:
+ req->error_loc = offsetof(struct nvme_rw_command, slba);
+ status = NVME_SC_LBA_RANGE | NVME_SC_DNR;
+ break;
+ case -EOPNOTSUPP:
+ req->error_loc = offsetof(struct nvme_common_command, opcode);
+ switch (req->cmd->common.opcode) {
+ case nvme_cmd_dsm:
+ case nvme_cmd_write_zeroes:
+ status = NVME_SC_ONCS_NOT_SUPPORTED | NVME_SC_DNR;
+ break;
+ default:
+ status = NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+ }
+ break;
+ case -ENODATA:
+ req->error_loc = offsetof(struct nvme_rw_command, nsid);
+ status = NVME_SC_ACCESS_DENIED;
+ break;
+ case -EIO:
+ /* FALLTHRU */
+ default:
+ req->error_loc = offsetof(struct nvme_common_command, opcode);
+ status = NVME_SC_INTERNAL | NVME_SC_DNR;
+ }
+
+ return status;
+}
+
static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port,
const char *subsysnqn);
u16 nvmet_copy_to_sgl(struct nvmet_req *req, off_t off, const void *buf,
size_t len)
{
- if (sg_pcopy_from_buffer(req->sg, req->sg_cnt, buf, len, off) != len)
+ if (sg_pcopy_from_buffer(req->sg, req->sg_cnt, buf, len, off) != len) {
+ req->error_loc = offsetof(struct nvme_common_command, dptr);
return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR;
+ }
return 0;
}
u16 nvmet_copy_from_sgl(struct nvmet_req *req, off_t off, void *buf, size_t len)
{
- if (sg_pcopy_to_buffer(req->sg, req->sg_cnt, buf, len, off) != len)
+ if (sg_pcopy_to_buffer(req->sg, req->sg_cnt, buf, len, off) != len) {
+ req->error_loc = offsetof(struct nvme_common_command, dptr);
return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR;
+ }
return 0;
}
u16 nvmet_zero_sgl(struct nvmet_req *req, off_t off, size_t len)
{
- if (sg_zero_buffer(req->sg, req->sg_cnt, len, off) != len)
+ if (sg_zero_buffer(req->sg, req->sg_cnt, len, off) != len) {
+ req->error_loc = offsetof(struct nvme_common_command, dptr);
return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR;
+ }
return 0;
}
@@ -130,7 +174,7 @@ static void nvmet_async_event_work(struct work_struct *work)
}
}
-static void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type,
+void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type,
u8 event_info, u8 log_page)
{
struct nvmet_async_event *aen;
@@ -150,13 +194,6 @@ static void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type,
schedule_work(&ctrl->async_event_work);
}
-static bool nvmet_aen_disabled(struct nvmet_ctrl *ctrl, u32 aen)
-{
- if (!(READ_ONCE(ctrl->aen_enabled) & aen))
- return true;
- return test_and_set_bit(aen, &ctrl->aen_masked);
-}
-
static void nvmet_add_to_changed_ns_log(struct nvmet_ctrl *ctrl, __le32 nsid)
{
u32 i;
@@ -187,7 +224,7 @@ void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid)
list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
nvmet_add_to_changed_ns_log(ctrl, cpu_to_le32(nsid));
- if (nvmet_aen_disabled(ctrl, NVME_AEN_CFG_NS_ATTR))
+ if (nvmet_aen_bit_disabled(ctrl, NVME_AEN_BIT_NS_ATTR))
continue;
nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE,
NVME_AER_NOTICE_NS_CHANGED,
@@ -204,7 +241,7 @@ void nvmet_send_ana_event(struct nvmet_subsys *subsys,
list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
if (port && ctrl->port != port)
continue;
- if (nvmet_aen_disabled(ctrl, NVME_AEN_CFG_ANA_CHANGE))
+ if (nvmet_aen_bit_disabled(ctrl, NVME_AEN_BIT_ANA_CHANGE))
continue;
nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE,
NVME_AER_NOTICE_ANA, NVME_LOG_ANA);
@@ -299,6 +336,15 @@ static void nvmet_keep_alive_timer(struct work_struct *work)
{
struct nvmet_ctrl *ctrl = container_of(to_delayed_work(work),
struct nvmet_ctrl, ka_work);
+ bool cmd_seen = ctrl->cmd_seen;
+
+ ctrl->cmd_seen = false;
+ if (cmd_seen) {
+ pr_debug("ctrl %d reschedule traffic based keep-alive timer\n",
+ ctrl->cntlid);
+ schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
+ return;
+ }
pr_err("ctrl %d keep-alive timer (%d seconds) expired!\n",
ctrl->cntlid, ctrl->kato);
@@ -595,26 +641,58 @@ struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid)
return ns;
}
-static void __nvmet_req_complete(struct nvmet_req *req, u16 status)
+static void nvmet_update_sq_head(struct nvmet_req *req)
{
- u32 old_sqhd, new_sqhd;
- u16 sqhd;
-
- if (status)
- nvmet_set_status(req, status);
-
if (req->sq->size) {
+ u32 old_sqhd, new_sqhd;
+
do {
old_sqhd = req->sq->sqhd;
new_sqhd = (old_sqhd + 1) % req->sq->size;
} while (cmpxchg(&req->sq->sqhd, old_sqhd, new_sqhd) !=
old_sqhd);
}
- sqhd = req->sq->sqhd & 0x0000FFFF;
- req->rsp->sq_head = cpu_to_le16(sqhd);
+ req->rsp->sq_head = cpu_to_le16(req->sq->sqhd & 0x0000FFFF);
+}
+
+static void nvmet_set_error(struct nvmet_req *req, u16 status)
+{
+ struct nvmet_ctrl *ctrl = req->sq->ctrl;
+ struct nvme_error_slot *new_error_slot;
+ unsigned long flags;
+
+ req->rsp->status = cpu_to_le16(status << 1);
+
+ if (!ctrl || req->error_loc == NVMET_NO_ERROR_LOC)
+ return;
+
+ spin_lock_irqsave(&ctrl->error_lock, flags);
+ ctrl->err_counter++;
+ new_error_slot =
+ &ctrl->slots[ctrl->err_counter % NVMET_ERROR_LOG_SLOTS];
+
+ new_error_slot->error_count = cpu_to_le64(ctrl->err_counter);
+ new_error_slot->sqid = cpu_to_le16(req->sq->qid);
+ new_error_slot->cmdid = cpu_to_le16(req->cmd->common.command_id);
+ new_error_slot->status_field = cpu_to_le16(status << 1);
+ new_error_slot->param_error_location = cpu_to_le16(req->error_loc);
+ new_error_slot->lba = cpu_to_le64(req->error_slba);
+ new_error_slot->nsid = req->cmd->common.nsid;
+ spin_unlock_irqrestore(&ctrl->error_lock, flags);
+
+ /* set the more bit for this request */
+ req->rsp->status |= cpu_to_le16(1 << 14);
+}
+
+static void __nvmet_req_complete(struct nvmet_req *req, u16 status)
+{
+ if (!req->sq->sqhd_disabled)
+ nvmet_update_sq_head(req);
req->rsp->sq_id = cpu_to_le16(req->sq->qid);
req->rsp->command_id = req->cmd->common.command_id;
+ if (unlikely(status))
+ nvmet_set_error(req, status);
if (req->ns)
nvmet_put_namespace(req->ns);
req->ops->queue_response(req);
@@ -735,14 +813,20 @@ static u16 nvmet_parse_io_cmd(struct nvmet_req *req)
return ret;
req->ns = nvmet_find_namespace(req->sq->ctrl, cmd->rw.nsid);
- if (unlikely(!req->ns))
+ if (unlikely(!req->ns)) {
+ req->error_loc = offsetof(struct nvme_common_command, nsid);
return NVME_SC_INVALID_NS | NVME_SC_DNR;
+ }
ret = nvmet_check_ana_state(req->port, req->ns);
- if (unlikely(ret))
+ if (unlikely(ret)) {
+ req->error_loc = offsetof(struct nvme_common_command, nsid);
return ret;
+ }
ret = nvmet_io_cmd_check_access(req);
- if (unlikely(ret))
+ if (unlikely(ret)) {
+ req->error_loc = offsetof(struct nvme_common_command, nsid);
return ret;
+ }
if (req->ns->file)
return nvmet_file_parse_io_cmd(req);
@@ -763,10 +847,14 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
req->sg_cnt = 0;
req->transfer_len = 0;
req->rsp->status = 0;
+ req->rsp->sq_head = 0;
req->ns = NULL;
+ req->error_loc = NVMET_NO_ERROR_LOC;
+ req->error_slba = 0;
/* no support for fused commands yet */
if (unlikely(flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND))) {
+ req->error_loc = offsetof(struct nvme_common_command, flags);
status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
goto fail;
}
@@ -777,6 +865,7 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
* byte aligned.
*/
if (unlikely((flags & NVME_CMD_SGL_ALL) != NVME_CMD_SGL_METABUF)) {
+ req->error_loc = offsetof(struct nvme_common_command, flags);
status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
goto fail;
}
@@ -801,6 +890,9 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
goto fail;
}
+ if (sq->ctrl)
+ sq->ctrl->cmd_seen = true;
+
return true;
fail:
@@ -819,9 +911,10 @@ EXPORT_SYMBOL_GPL(nvmet_req_uninit);
void nvmet_req_execute(struct nvmet_req *req)
{
- if (unlikely(req->data_len != req->transfer_len))
+ if (unlikely(req->data_len != req->transfer_len)) {
+ req->error_loc = offsetof(struct nvme_common_command, dptr);
nvmet_req_complete(req, NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR);
- else
+ } else
req->execute(req);
}
EXPORT_SYMBOL_GPL(nvmet_req_execute);
@@ -1027,14 +1120,18 @@ u16 nvmet_check_ctrl_status(struct nvmet_req *req, struct nvme_command *cmd)
return 0;
}
-static bool __nvmet_host_allowed(struct nvmet_subsys *subsys,
- const char *hostnqn)
+bool nvmet_host_allowed(struct nvmet_subsys *subsys, const char *hostnqn)
{
struct nvmet_host_link *p;
+ lockdep_assert_held(&nvmet_config_sem);
+
if (subsys->allow_any_host)
return true;
+ if (subsys->type == NVME_NQN_DISC) /* allow all access to disc subsys */
+ return true;
+
list_for_each_entry(p, &subsys->hosts, entry) {
if (!strcmp(nvmet_host_name(p->host), hostnqn))
return true;
@@ -1043,30 +1140,6 @@ static bool __nvmet_host_allowed(struct nvmet_subsys *subsys,
return false;
}
-static bool nvmet_host_discovery_allowed(struct nvmet_req *req,
- const char *hostnqn)
-{
- struct nvmet_subsys_link *s;
-
- list_for_each_entry(s, &req->port->subsystems, entry) {
- if (__nvmet_host_allowed(s->subsys, hostnqn))
- return true;
- }
-
- return false;
-}
-
-bool nvmet_host_allowed(struct nvmet_req *req, struct nvmet_subsys *subsys,
- const char *hostnqn)
-{
- lockdep_assert_held(&nvmet_config_sem);
-
- if (subsys->type == NVME_NQN_DISC)
- return nvmet_host_discovery_allowed(req, hostnqn);
- else
- return __nvmet_host_allowed(subsys, hostnqn);
-}
-
/*
* Note: ctrl->subsys->lock should be held when calling this function
*/
@@ -1117,7 +1190,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
down_read(&nvmet_config_sem);
- if (!nvmet_host_allowed(req, subsys, hostnqn)) {
+ if (!nvmet_host_allowed(subsys, hostnqn)) {
pr_info("connect by host %s for subsystem %s not allowed\n",
hostnqn, subsysnqn);
req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(hostnqn);
@@ -1175,31 +1248,20 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
ctrl->cntlid = ret;
ctrl->ops = req->ops;
- if (ctrl->subsys->type == NVME_NQN_DISC) {
- /* Don't accept keep-alive timeout for discovery controllers */
- if (kato) {
- status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
- goto out_remove_ida;
- }
- /*
- * Discovery controllers use some arbitrary high value in order
- * to cleanup stale discovery sessions
- *
- * From the latest base diff RC:
- * "The Keep Alive command is not supported by
- * Discovery controllers. A transport may specify a
- * fixed Discovery controller activity timeout value
- * (e.g., 2 minutes). If no commands are received
- * by a Discovery controller within that time
- * period, the controller may perform the
- * actions for Keep Alive Timer expiration".
- */
- ctrl->kato = NVMET_DISC_KATO;
- } else {
- /* keep-alive timeout in seconds */
- ctrl->kato = DIV_ROUND_UP(kato, 1000);
- }
+ /*
+ * Discovery controllers may use some arbitrary high value
+ * in order to cleanup stale discovery sessions
+ */
+ if ((ctrl->subsys->type == NVME_NQN_DISC) && !kato)
+ kato = NVMET_DISC_KATO_MS;
+
+ /* keep-alive timeout in seconds */
+ ctrl->kato = DIV_ROUND_UP(kato, 1000);
+
+ ctrl->err_counter = 0;
+ spin_lock_init(&ctrl->error_lock);
+
nvmet_start_keep_alive_timer(ctrl);
mutex_lock(&subsys->lock);
@@ -1210,8 +1272,6 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
*ctrlp = ctrl;
return 0;
-out_remove_ida:
- ida_simple_remove(&cntlid_ida, ctrl->cntlid);
out_free_sqs:
kfree(ctrl->sqs);
out_free_cqs:
diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c
index bc0aa0bf1543..d2cb71a0b419 100644
--- a/drivers/nvme/target/discovery.c
+++ b/drivers/nvme/target/discovery.c
@@ -18,7 +18,65 @@
struct nvmet_subsys *nvmet_disc_subsys;
-u64 nvmet_genctr;
+static u64 nvmet_genctr;
+
+static void __nvmet_disc_changed(struct nvmet_port *port,
+ struct nvmet_ctrl *ctrl)
+{
+ if (ctrl->port != port)
+ return;
+
+ if (nvmet_aen_bit_disabled(ctrl, NVME_AEN_BIT_DISC_CHANGE))
+ return;
+
+ nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE,
+ NVME_AER_NOTICE_DISC_CHANGED, NVME_LOG_DISC);
+}
+
+void nvmet_port_disc_changed(struct nvmet_port *port,
+ struct nvmet_subsys *subsys)
+{
+ struct nvmet_ctrl *ctrl;
+
+ nvmet_genctr++;
+
+ list_for_each_entry(ctrl, &nvmet_disc_subsys->ctrls, subsys_entry) {
+ if (subsys && !nvmet_host_allowed(subsys, ctrl->hostnqn))
+ continue;
+
+ __nvmet_disc_changed(port, ctrl);
+ }
+}
+
+static void __nvmet_subsys_disc_changed(struct nvmet_port *port,
+ struct nvmet_subsys *subsys,
+ struct nvmet_host *host)
+{
+ struct nvmet_ctrl *ctrl;
+
+ list_for_each_entry(ctrl, &nvmet_disc_subsys->ctrls, subsys_entry) {
+ if (host && strcmp(nvmet_host_name(host), ctrl->hostnqn))
+ continue;
+
+ __nvmet_disc_changed(port, ctrl);
+ }
+}
+
+void nvmet_subsys_disc_changed(struct nvmet_subsys *subsys,
+ struct nvmet_host *host)
+{
+ struct nvmet_port *port;
+ struct nvmet_subsys_link *s;
+
+ nvmet_genctr++;
+
+ list_for_each_entry(port, nvmet_ports, global_entry)
+ list_for_each_entry(s, &port->subsystems, entry) {
+ if (s->subsys != subsys)
+ continue;
+ __nvmet_subsys_disc_changed(port, subsys, host);
+ }
+}
void nvmet_referral_enable(struct nvmet_port *parent, struct nvmet_port *port)
{
@@ -26,18 +84,18 @@ void nvmet_referral_enable(struct nvmet_port *parent, struct nvmet_port *port)
if (list_empty(&port->entry)) {
list_add_tail(&port->entry, &parent->referrals);
port->enabled = true;
- nvmet_genctr++;
+ nvmet_port_disc_changed(parent, NULL);
}
up_write(&nvmet_config_sem);
}
-void nvmet_referral_disable(struct nvmet_port *port)
+void nvmet_referral_disable(struct nvmet_port *parent, struct nvmet_port *port)
{
down_write(&nvmet_config_sem);
if (!list_empty(&port->entry)) {
port->enabled = false;
list_del_init(&port->entry);
- nvmet_genctr++;
+ nvmet_port_disc_changed(parent, NULL);
}
up_write(&nvmet_config_sem);
}
@@ -107,7 +165,7 @@ static void nvmet_execute_get_disc_log_page(struct nvmet_req *req)
down_read(&nvmet_config_sem);
list_for_each_entry(p, &req->port->subsystems, entry) {
- if (!nvmet_host_allowed(req, p->subsys, ctrl->hostnqn))
+ if (!nvmet_host_allowed(p->subsys, ctrl->hostnqn))
continue;
if (residual_len >= entry_size) {
char traddr[NVMF_TRADDR_SIZE];
@@ -136,6 +194,8 @@ static void nvmet_execute_get_disc_log_page(struct nvmet_req *req)
hdr->numrec = cpu_to_le64(numrec);
hdr->recfmt = cpu_to_le16(0);
+ nvmet_clear_aen_bit(req, NVME_AEN_BIT_DISC_CHANGE);
+
up_read(&nvmet_config_sem);
status = nvmet_copy_to_sgl(req, 0, hdr, data_len);
@@ -174,6 +234,8 @@ static void nvmet_execute_identify_disc_ctrl(struct nvmet_req *req)
if (req->port->inline_data_size)
id->sgls |= cpu_to_le32(1 << 20);
+ id->oaes = cpu_to_le32(NVMET_DISC_AEN_CFG_OPTIONAL);
+
strlcpy(id->subnqn, ctrl->subsys->subsysnqn, sizeof(id->subnqn));
status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id));
@@ -183,6 +245,51 @@ out:
nvmet_req_complete(req, status);
}
+static void nvmet_execute_disc_set_features(struct nvmet_req *req)
+{
+ u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10);
+ u16 stat;
+
+ switch (cdw10 & 0xff) {
+ case NVME_FEAT_KATO:
+ stat = nvmet_set_feat_kato(req);
+ break;
+ case NVME_FEAT_ASYNC_EVENT:
+ stat = nvmet_set_feat_async_event(req,
+ NVMET_DISC_AEN_CFG_OPTIONAL);
+ break;
+ default:
+ req->error_loc =
+ offsetof(struct nvme_common_command, cdw10);
+ stat = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ break;
+ }
+
+ nvmet_req_complete(req, stat);
+}
+
+static void nvmet_execute_disc_get_features(struct nvmet_req *req)
+{
+ u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10);
+ u16 stat = 0;
+
+ switch (cdw10 & 0xff) {
+ case NVME_FEAT_KATO:
+ nvmet_get_feat_kato(req);
+ break;
+ case NVME_FEAT_ASYNC_EVENT:
+ nvmet_get_feat_async_event(req);
+ break;
+ default:
+ req->error_loc =
+ offsetof(struct nvme_common_command, cdw10);
+ stat = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ break;
+ }
+
+ nvmet_req_complete(req, stat);
+}
+
u16 nvmet_parse_discovery_cmd(struct nvmet_req *req)
{
struct nvme_command *cmd = req->cmd;
@@ -190,10 +297,28 @@ u16 nvmet_parse_discovery_cmd(struct nvmet_req *req)
if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) {
pr_err("got cmd %d while not ready\n",
cmd->common.opcode);
+ req->error_loc =
+ offsetof(struct nvme_common_command, opcode);
return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
}
switch (cmd->common.opcode) {
+ case nvme_admin_set_features:
+ req->execute = nvmet_execute_disc_set_features;
+ req->data_len = 0;
+ return 0;
+ case nvme_admin_get_features:
+ req->execute = nvmet_execute_disc_get_features;
+ req->data_len = 0;
+ return 0;
+ case nvme_admin_async_event:
+ req->execute = nvmet_execute_async_event;
+ req->data_len = 0;
+ return 0;
+ case nvme_admin_keep_alive:
+ req->execute = nvmet_execute_keep_alive;
+ req->data_len = 0;
+ return 0;
case nvme_admin_get_log_page:
req->data_len = nvmet_get_log_page_len(cmd);
@@ -204,6 +329,8 @@ u16 nvmet_parse_discovery_cmd(struct nvmet_req *req)
default:
pr_err("unsupported get_log_page lid %d\n",
cmd->get_log_page.lid);
+ req->error_loc =
+ offsetof(struct nvme_get_log_page_command, lid);
return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
}
case nvme_admin_identify:
@@ -216,10 +343,12 @@ u16 nvmet_parse_discovery_cmd(struct nvmet_req *req)
default:
pr_err("unsupported identify cns %d\n",
cmd->identify.cns);
+ req->error_loc = offsetof(struct nvme_identify, cns);
return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
}
default:
pr_err("unhandled cmd %d\n", cmd->common.opcode);
+ req->error_loc = offsetof(struct nvme_common_command, opcode);
return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
}
diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c
index d84ae004cb85..6cf1fd9eb32e 100644
--- a/drivers/nvme/target/fabrics-cmd.c
+++ b/drivers/nvme/target/fabrics-cmd.c
@@ -17,23 +17,26 @@
static void nvmet_execute_prop_set(struct nvmet_req *req)
{
+ u64 val = le64_to_cpu(req->cmd->prop_set.value);
u16 status = 0;
- if (!(req->cmd->prop_set.attrib & 1)) {
- u64 val = le64_to_cpu(req->cmd->prop_set.value);
-
- switch (le32_to_cpu(req->cmd->prop_set.offset)) {
- case NVME_REG_CC:
- nvmet_update_cc(req->sq->ctrl, val);
- break;
- default:
- status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
- break;
- }
- } else {
+ if (req->cmd->prop_set.attrib & 1) {
+ req->error_loc =
+ offsetof(struct nvmf_property_set_command, attrib);
status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ goto out;
}
+ switch (le32_to_cpu(req->cmd->prop_set.offset)) {
+ case NVME_REG_CC:
+ nvmet_update_cc(req->sq->ctrl, val);
+ break;
+ default:
+ req->error_loc =
+ offsetof(struct nvmf_property_set_command, offset);
+ status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ }
+out:
nvmet_req_complete(req, status);
}
@@ -69,6 +72,14 @@ static void nvmet_execute_prop_get(struct nvmet_req *req)
}
}
+ if (status && req->cmd->prop_get.attrib & 1) {
+ req->error_loc =
+ offsetof(struct nvmf_property_get_command, offset);
+ } else {
+ req->error_loc =
+ offsetof(struct nvmf_property_get_command, attrib);
+ }
+
req->rsp->result.u64 = cpu_to_le64(val);
nvmet_req_complete(req, status);
}
@@ -89,6 +100,7 @@ u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req)
default:
pr_err("received unknown capsule type 0x%x\n",
cmd->fabrics.fctype);
+ req->error_loc = offsetof(struct nvmf_common_command, fctype);
return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
}
@@ -105,16 +117,34 @@ static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req)
old = cmpxchg(&req->sq->ctrl, NULL, ctrl);
if (old) {
pr_warn("queue already connected!\n");
+ req->error_loc = offsetof(struct nvmf_connect_command, opcode);
return NVME_SC_CONNECT_CTRL_BUSY | NVME_SC_DNR;
}
if (!sqsize) {
pr_warn("queue size zero!\n");
+ req->error_loc = offsetof(struct nvmf_connect_command, sqsize);
return NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
}
/* note: convert queue size from 0's-based value to 1's-based value */
nvmet_cq_setup(ctrl, req->cq, qid, sqsize + 1);
nvmet_sq_setup(ctrl, req->sq, qid, sqsize + 1);
+
+ if (c->cattr & NVME_CONNECT_DISABLE_SQFLOW) {
+ req->sq->sqhd_disabled = true;
+ req->rsp->sq_head = cpu_to_le16(0xffff);
+ }
+
+ if (ctrl->ops->install_queue) {
+ u16 ret = ctrl->ops->install_queue(req->sq);
+
+ if (ret) {
+ pr_err("failed to install queue %d cntlid %d ret %x\n",
+ qid, ret, ctrl->cntlid);
+ return ret;
+ }
+ }
+
return 0;
}
@@ -141,6 +171,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req)
if (c->recfmt != 0) {
pr_warn("invalid connect version (%d).\n",
le16_to_cpu(c->recfmt));
+ req->error_loc = offsetof(struct nvmf_connect_command, recfmt);
status = NVME_SC_CONNECT_FORMAT | NVME_SC_DNR;
goto out;
}
@@ -155,8 +186,13 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req)
status = nvmet_alloc_ctrl(d->subsysnqn, d->hostnqn, req,
le32_to_cpu(c->kato), &ctrl);
- if (status)
+ if (status) {
+ if (status == (NVME_SC_INVALID_FIELD | NVME_SC_DNR))
+ req->error_loc =
+ offsetof(struct nvme_common_command, opcode);
goto out;
+ }
+
uuid_copy(&ctrl->hostid, &d->hostid);
status = nvmet_install_queue(ctrl, req);
@@ -243,11 +279,13 @@ u16 nvmet_parse_connect_cmd(struct nvmet_req *req)
if (cmd->common.opcode != nvme_fabrics_command) {
pr_err("invalid command 0x%x on unconnected queue.\n",
cmd->fabrics.opcode);
+ req->error_loc = offsetof(struct nvme_common_command, opcode);
return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
}
if (cmd->fabrics.fctype != nvme_fabrics_type_connect) {
pr_err("invalid capsule type 0x%x on unconnected queue.\n",
cmd->fabrics.fctype);
+ req->error_loc = offsetof(struct nvmf_common_command, fctype);
return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
}
diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index 409081a03b24..f98f5c5bea26 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -86,8 +86,6 @@ struct nvmet_fc_fcp_iod {
spinlock_t flock;
struct nvmet_req req;
- struct work_struct work;
- struct work_struct done_work;
struct work_struct defer_work;
struct nvmet_fc_tgtport *tgtport;
@@ -134,7 +132,6 @@ struct nvmet_fc_tgt_queue {
u16 sqsize;
u16 ersp_ratio;
__le16 sqhd;
- int cpu;
atomic_t connected;
atomic_t sqtail;
atomic_t zrspcnt;
@@ -232,8 +229,6 @@ static LIST_HEAD(nvmet_fc_portentry_list);
static void nvmet_fc_handle_ls_rqst_work(struct work_struct *work);
-static void nvmet_fc_handle_fcp_rqst_work(struct work_struct *work);
-static void nvmet_fc_fcp_rqst_op_done_work(struct work_struct *work);
static void nvmet_fc_fcp_rqst_op_defer_work(struct work_struct *work);
static void nvmet_fc_tgt_a_put(struct nvmet_fc_tgt_assoc *assoc);
static int nvmet_fc_tgt_a_get(struct nvmet_fc_tgt_assoc *assoc);
@@ -438,8 +433,6 @@ nvmet_fc_prep_fcp_iodlist(struct nvmet_fc_tgtport *tgtport,
int i;
for (i = 0; i < queue->sqsize; fod++, i++) {
- INIT_WORK(&fod->work, nvmet_fc_handle_fcp_rqst_work);
- INIT_WORK(&fod->done_work, nvmet_fc_fcp_rqst_op_done_work);
INIT_WORK(&fod->defer_work, nvmet_fc_fcp_rqst_op_defer_work);
fod->tgtport = tgtport;
fod->queue = queue;
@@ -517,10 +510,7 @@ nvmet_fc_queue_fcp_req(struct nvmet_fc_tgtport *tgtport,
fcpreq->hwqid = queue->qid ?
((queue->qid - 1) % tgtport->ops->max_hw_queues) : 0;
- if (tgtport->ops->target_features & NVMET_FCTGTFEAT_CMD_IN_ISR)
- queue_work_on(queue->cpu, queue->work_q, &fod->work);
- else
- nvmet_fc_handle_fcp_rqst(tgtport, fod);
+ nvmet_fc_handle_fcp_rqst(tgtport, fod);
}
static void
@@ -599,30 +589,6 @@ nvmet_fc_free_fcp_iod(struct nvmet_fc_tgt_queue *queue,
queue_work(queue->work_q, &fod->defer_work);
}
-static int
-nvmet_fc_queue_to_cpu(struct nvmet_fc_tgtport *tgtport, int qid)
-{
- int cpu, idx, cnt;
-
- if (tgtport->ops->max_hw_queues == 1)
- return WORK_CPU_UNBOUND;
-
- /* Simple cpu selection based on qid modulo active cpu count */
- idx = !qid ? 0 : (qid - 1) % num_active_cpus();
-
- /* find the n'th active cpu */
- for (cpu = 0, cnt = 0; ; ) {
- if (cpu_active(cpu)) {
- if (cnt == idx)
- break;
- cnt++;
- }
- cpu = (cpu + 1) % num_possible_cpus();
- }
-
- return cpu;
-}
-
static struct nvmet_fc_tgt_queue *
nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc,
u16 qid, u16 sqsize)
@@ -653,7 +619,6 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc,
queue->qid = qid;
queue->sqsize = sqsize;
queue->assoc = assoc;
- queue->cpu = nvmet_fc_queue_to_cpu(assoc->tgtport, qid);
INIT_LIST_HEAD(&queue->fod_list);
INIT_LIST_HEAD(&queue->avail_defer_list);
INIT_LIST_HEAD(&queue->pending_cmd_list);
@@ -2146,25 +2111,11 @@ nvmet_fc_fod_op_done(struct nvmet_fc_fcp_iod *fod)
}
static void
-nvmet_fc_fcp_rqst_op_done_work(struct work_struct *work)
-{
- struct nvmet_fc_fcp_iod *fod =
- container_of(work, struct nvmet_fc_fcp_iod, done_work);
-
- nvmet_fc_fod_op_done(fod);
-}
-
-static void
nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq)
{
struct nvmet_fc_fcp_iod *fod = fcpreq->nvmet_fc_private;
- struct nvmet_fc_tgt_queue *queue = fod->queue;
- if (fod->tgtport->ops->target_features & NVMET_FCTGTFEAT_OPDONE_IN_ISR)
- /* context switch so completion is not in ISR context */
- queue_work_on(queue->cpu, queue->work_q, &fod->done_work);
- else
- nvmet_fc_fod_op_done(fod);
+ nvmet_fc_fod_op_done(fod);
}
/*
@@ -2332,19 +2283,6 @@ transport_error:
nvmet_fc_abort_op(tgtport, fod);
}
-/*
- * Actual processing routine for received FC-NVME LS Requests from the LLD
- */
-static void
-nvmet_fc_handle_fcp_rqst_work(struct work_struct *work)
-{
- struct nvmet_fc_fcp_iod *fod =
- container_of(work, struct nvmet_fc_fcp_iod, work);
- struct nvmet_fc_tgtport *tgtport = fod->tgtport;
-
- nvmet_fc_handle_fcp_rqst(tgtport, fod);
-}
-
/**
* nvmet_fc_rcv_fcp_req - transport entry point called by an LLDD
* upon the reception of a NVME FCP CMD IU.
diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c
index c1ec3475a140..b6d030d3259f 100644
--- a/drivers/nvme/target/io-cmd-bdev.c
+++ b/drivers/nvme/target/io-cmd-bdev.c
@@ -44,13 +44,69 @@ void nvmet_bdev_ns_disable(struct nvmet_ns *ns)
}
}
+static u16 blk_to_nvme_status(struct nvmet_req *req, blk_status_t blk_sts)
+{
+ u16 status = NVME_SC_SUCCESS;
+
+ if (likely(blk_sts == BLK_STS_OK))
+ return status;
+ /*
+ * Right now there exists M : 1 mapping between block layer error
+ * to the NVMe status code (see nvme_error_status()). For consistency,
+ * when we reverse map we use most appropriate NVMe Status code from
+ * the group of the NVMe staus codes used in the nvme_error_status().
+ */
+ switch (blk_sts) {
+ case BLK_STS_NOSPC:
+ status = NVME_SC_CAP_EXCEEDED | NVME_SC_DNR;
+ req->error_loc = offsetof(struct nvme_rw_command, length);
+ break;
+ case BLK_STS_TARGET:
+ status = NVME_SC_LBA_RANGE | NVME_SC_DNR;
+ req->error_loc = offsetof(struct nvme_rw_command, slba);
+ break;
+ case BLK_STS_NOTSUPP:
+ req->error_loc = offsetof(struct nvme_common_command, opcode);
+ switch (req->cmd->common.opcode) {
+ case nvme_cmd_dsm:
+ case nvme_cmd_write_zeroes:
+ status = NVME_SC_ONCS_NOT_SUPPORTED | NVME_SC_DNR;
+ break;
+ default:
+ status = NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+ }
+ break;
+ case BLK_STS_MEDIUM:
+ status = NVME_SC_ACCESS_DENIED;
+ req->error_loc = offsetof(struct nvme_rw_command, nsid);
+ break;
+ case BLK_STS_IOERR:
+ /* fallthru */
+ default:
+ status = NVME_SC_INTERNAL | NVME_SC_DNR;
+ req->error_loc = offsetof(struct nvme_common_command, opcode);
+ }
+
+ switch (req->cmd->common.opcode) {
+ case nvme_cmd_read:
+ case nvme_cmd_write:
+ req->error_slba = le64_to_cpu(req->cmd->rw.slba);
+ break;
+ case nvme_cmd_write_zeroes:
+ req->error_slba =
+ le64_to_cpu(req->cmd->write_zeroes.slba);
+ break;
+ default:
+ req->error_slba = 0;
+ }
+ return status;
+}
+
static void nvmet_bio_done(struct bio *bio)
{
struct nvmet_req *req = bio->bi_private;
- nvmet_req_complete(req,
- bio->bi_status ? NVME_SC_INTERNAL | NVME_SC_DNR : 0);
-
+ nvmet_req_complete(req, blk_to_nvme_status(req, bio->bi_status));
if (bio != &req->b.inline_bio)
bio_put(bio);
}
@@ -61,7 +117,6 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
struct bio *bio;
struct scatterlist *sg;
sector_t sector;
- blk_qc_t cookie;
int op, op_flags = 0, i;
if (!req->sg_cnt) {
@@ -114,9 +169,7 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
sg_cnt--;
}
- cookie = submit_bio(bio);
-
- blk_poll(bdev_get_queue(req->ns->bdev), cookie);
+ submit_bio(bio);
}
static void nvmet_bdev_execute_flush(struct nvmet_req *req)
@@ -139,18 +192,21 @@ u16 nvmet_bdev_flush(struct nvmet_req *req)
return 0;
}
-static u16 nvmet_bdev_discard_range(struct nvmet_ns *ns,
+static u16 nvmet_bdev_discard_range(struct nvmet_req *req,
struct nvme_dsm_range *range, struct bio **bio)
{
+ struct nvmet_ns *ns = req->ns;
int ret;
ret = __blkdev_issue_discard(ns->bdev,
le64_to_cpu(range->slba) << (ns->blksize_shift - 9),
le32_to_cpu(range->nlb) << (ns->blksize_shift - 9),
GFP_KERNEL, 0, bio);
- if (ret && ret != -EOPNOTSUPP)
- return NVME_SC_INTERNAL | NVME_SC_DNR;
- return 0;
+
+ if (ret)
+ req->error_slba = le64_to_cpu(range->slba);
+
+ return blk_to_nvme_status(req, errno_to_blk_status(ret));
}
static void nvmet_bdev_execute_discard(struct nvmet_req *req)
@@ -166,7 +222,7 @@ static void nvmet_bdev_execute_discard(struct nvmet_req *req)
if (status)
break;
- status = nvmet_bdev_discard_range(req->ns, &range, &bio);
+ status = nvmet_bdev_discard_range(req, &range, &bio);
if (status)
break;
}
@@ -207,16 +263,16 @@ static void nvmet_bdev_execute_write_zeroes(struct nvmet_req *req)
u16 status = NVME_SC_SUCCESS;
sector_t sector;
sector_t nr_sector;
+ int ret;
sector = le64_to_cpu(write_zeroes->slba) <<
(req->ns->blksize_shift - 9);
nr_sector = (((sector_t)le16_to_cpu(write_zeroes->length) + 1) <<
(req->ns->blksize_shift - 9));
- if (__blkdev_issue_zeroout(req->ns->bdev, sector, nr_sector,
- GFP_KERNEL, &bio, 0))
- status = NVME_SC_INTERNAL | NVME_SC_DNR;
-
+ ret = __blkdev_issue_zeroout(req->ns->bdev, sector, nr_sector,
+ GFP_KERNEL, &bio, 0);
+ status = blk_to_nvme_status(req, errno_to_blk_status(ret));
if (bio) {
bio->bi_private = req;
bio->bi_end_io = nvmet_bio_done;
@@ -251,6 +307,7 @@ u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req)
default:
pr_err("unhandled cmd %d on qid %d\n", cmd->common.opcode,
req->sq->qid);
+ req->error_loc = offsetof(struct nvme_common_command, opcode);
return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
}
}
diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c
index 01feebec29ea..517522305e5c 100644
--- a/drivers/nvme/target/io-cmd-file.c
+++ b/drivers/nvme/target/io-cmd-file.c
@@ -83,17 +83,16 @@ static void nvmet_file_init_bvec(struct bio_vec *bv, struct sg_page_iter *iter)
}
static ssize_t nvmet_file_submit_bvec(struct nvmet_req *req, loff_t pos,
- unsigned long nr_segs, size_t count)
+ unsigned long nr_segs, size_t count, int ki_flags)
{
struct kiocb *iocb = &req->f.iocb;
ssize_t (*call_iter)(struct kiocb *iocb, struct iov_iter *iter);
struct iov_iter iter;
- int ki_flags = 0, rw;
- ssize_t ret;
+ int rw;
if (req->cmd->rw.opcode == nvme_cmd_write) {
if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA))
- ki_flags = IOCB_DSYNC;
+ ki_flags |= IOCB_DSYNC;
call_iter = req->ns->file->f_op->write_iter;
rw = WRITE;
} else {
@@ -107,17 +106,13 @@ static ssize_t nvmet_file_submit_bvec(struct nvmet_req *req, loff_t pos,
iocb->ki_filp = req->ns->file;
iocb->ki_flags = ki_flags | iocb_flags(req->ns->file);
- ret = call_iter(iocb, &iter);
-
- if (ret != -EIOCBQUEUED && iocb->ki_complete)
- iocb->ki_complete(iocb, ret, 0);
-
- return ret;
+ return call_iter(iocb, &iter);
}
static void nvmet_file_io_done(struct kiocb *iocb, long ret, long ret2)
{
struct nvmet_req *req = container_of(iocb, struct nvmet_req, f.iocb);
+ u16 status = NVME_SC_SUCCESS;
if (req->f.bvec != req->inline_bvec) {
if (likely(req->f.mpool_alloc == false))
@@ -126,11 +121,12 @@ static void nvmet_file_io_done(struct kiocb *iocb, long ret, long ret2)
mempool_free(req->f.bvec, req->ns->bvec_pool);
}
- nvmet_req_complete(req, ret != req->data_len ?
- NVME_SC_INTERNAL | NVME_SC_DNR : 0);
+ if (unlikely(ret != req->data_len))
+ status = errno_to_nvme_status(req, ret);
+ nvmet_req_complete(req, status);
}
-static void nvmet_file_execute_rw(struct nvmet_req *req)
+static bool nvmet_file_execute_io(struct nvmet_req *req, int ki_flags)
{
ssize_t nr_bvec = DIV_ROUND_UP(req->data_len, PAGE_SIZE);
struct sg_page_iter sg_pg_iter;
@@ -140,30 +136,14 @@ static void nvmet_file_execute_rw(struct nvmet_req *req)
ssize_t ret = 0;
loff_t pos;
- if (!req->sg_cnt || !nr_bvec) {
- nvmet_req_complete(req, 0);
- return;
- }
+
+ if (req->f.mpool_alloc && nr_bvec > NVMET_MAX_MPOOL_BVEC)
+ is_sync = true;
pos = le64_to_cpu(req->cmd->rw.slba) << req->ns->blksize_shift;
if (unlikely(pos + req->data_len > req->ns->size)) {
- nvmet_req_complete(req, NVME_SC_LBA_RANGE | NVME_SC_DNR);
- return;
- }
-
- if (nr_bvec > NVMET_MAX_INLINE_BIOVEC)
- req->f.bvec = kmalloc_array(nr_bvec, sizeof(struct bio_vec),
- GFP_KERNEL);
- else
- req->f.bvec = req->inline_bvec;
-
- req->f.mpool_alloc = false;
- if (unlikely(!req->f.bvec)) {
- /* fallback under memory pressure */
- req->f.bvec = mempool_alloc(req->ns->bvec_pool, GFP_KERNEL);
- req->f.mpool_alloc = true;
- if (nr_bvec > NVMET_MAX_MPOOL_BVEC)
- is_sync = true;
+ nvmet_req_complete(req, errno_to_nvme_status(req, -ENOSPC));
+ return true;
}
memset(&req->f.iocb, 0, sizeof(struct kiocb));
@@ -177,9 +157,10 @@ static void nvmet_file_execute_rw(struct nvmet_req *req)
if (unlikely(is_sync) &&
(nr_bvec - 1 == 0 || bv_cnt == NVMET_MAX_MPOOL_BVEC)) {
- ret = nvmet_file_submit_bvec(req, pos, bv_cnt, len);
+ ret = nvmet_file_submit_bvec(req, pos, bv_cnt, len, 0);
if (ret < 0)
- goto out;
+ goto complete;
+
pos += len;
bv_cnt = 0;
len = 0;
@@ -187,35 +168,95 @@ static void nvmet_file_execute_rw(struct nvmet_req *req)
nr_bvec--;
}
- if (WARN_ON_ONCE(total_len != req->data_len))
+ if (WARN_ON_ONCE(total_len != req->data_len)) {
ret = -EIO;
-out:
- if (unlikely(is_sync || ret)) {
- nvmet_file_io_done(&req->f.iocb, ret < 0 ? ret : total_len, 0);
- return;
+ goto complete;
+ }
+
+ if (unlikely(is_sync)) {
+ ret = total_len;
+ goto complete;
}
- req->f.iocb.ki_complete = nvmet_file_io_done;
- nvmet_file_submit_bvec(req, pos, bv_cnt, total_len);
+
+ /*
+ * A NULL ki_complete ask for synchronous execution, which we want
+ * for the IOCB_NOWAIT case.
+ */
+ if (!(ki_flags & IOCB_NOWAIT))
+ req->f.iocb.ki_complete = nvmet_file_io_done;
+
+ ret = nvmet_file_submit_bvec(req, pos, bv_cnt, total_len, ki_flags);
+
+ switch (ret) {
+ case -EIOCBQUEUED:
+ return true;
+ case -EAGAIN:
+ if (WARN_ON_ONCE(!(ki_flags & IOCB_NOWAIT)))
+ goto complete;
+ return false;
+ case -EOPNOTSUPP:
+ /*
+ * For file systems returning error -EOPNOTSUPP, handle
+ * IOCB_NOWAIT error case separately and retry without
+ * IOCB_NOWAIT.
+ */
+ if ((ki_flags & IOCB_NOWAIT))
+ return false;
+ break;
+ }
+
+complete:
+ nvmet_file_io_done(&req->f.iocb, ret, 0);
+ return true;
}
static void nvmet_file_buffered_io_work(struct work_struct *w)
{
struct nvmet_req *req = container_of(w, struct nvmet_req, f.work);
- nvmet_file_execute_rw(req);
+ nvmet_file_execute_io(req, 0);
}
-static void nvmet_file_execute_rw_buffered_io(struct nvmet_req *req)
+static void nvmet_file_submit_buffered_io(struct nvmet_req *req)
{
INIT_WORK(&req->f.work, nvmet_file_buffered_io_work);
queue_work(buffered_io_wq, &req->f.work);
}
+static void nvmet_file_execute_rw(struct nvmet_req *req)
+{
+ ssize_t nr_bvec = DIV_ROUND_UP(req->data_len, PAGE_SIZE);
+
+ if (!req->sg_cnt || !nr_bvec) {
+ nvmet_req_complete(req, 0);
+ return;
+ }
+
+ if (nr_bvec > NVMET_MAX_INLINE_BIOVEC)
+ req->f.bvec = kmalloc_array(nr_bvec, sizeof(struct bio_vec),
+ GFP_KERNEL);
+ else
+ req->f.bvec = req->inline_bvec;
+
+ if (unlikely(!req->f.bvec)) {
+ /* fallback under memory pressure */
+ req->f.bvec = mempool_alloc(req->ns->bvec_pool, GFP_KERNEL);
+ req->f.mpool_alloc = true;
+ } else
+ req->f.mpool_alloc = false;
+
+ if (req->ns->buffered_io) {
+ if (likely(!req->f.mpool_alloc) &&
+ nvmet_file_execute_io(req, IOCB_NOWAIT))
+ return;
+ nvmet_file_submit_buffered_io(req);
+ } else
+ nvmet_file_execute_io(req, 0);
+}
+
u16 nvmet_file_flush(struct nvmet_req *req)
{
- if (vfs_fsync(req->ns->file, 1) < 0)
- return NVME_SC_INTERNAL | NVME_SC_DNR;
- return 0;
+ return errno_to_nvme_status(req, vfs_fsync(req->ns->file, 1));
}
static void nvmet_file_flush_work(struct work_struct *w)
@@ -236,30 +277,34 @@ static void nvmet_file_execute_discard(struct nvmet_req *req)
int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE;
struct nvme_dsm_range range;
loff_t offset, len;
- u16 ret;
+ u16 status = 0;
+ int ret;
int i;
for (i = 0; i <= le32_to_cpu(req->cmd->dsm.nr); i++) {
- ret = nvmet_copy_from_sgl(req, i * sizeof(range), &range,
+ status = nvmet_copy_from_sgl(req, i * sizeof(range), &range,
sizeof(range));
- if (ret)
+ if (status)
break;
offset = le64_to_cpu(range.slba) << req->ns->blksize_shift;
len = le32_to_cpu(range.nlb);
len <<= req->ns->blksize_shift;
if (offset + len > req->ns->size) {
- ret = NVME_SC_LBA_RANGE | NVME_SC_DNR;
+ req->error_slba = le64_to_cpu(range.slba);
+ status = errno_to_nvme_status(req, -ENOSPC);
break;
}
- if (vfs_fallocate(req->ns->file, mode, offset, len)) {
- ret = NVME_SC_INTERNAL | NVME_SC_DNR;
+ ret = vfs_fallocate(req->ns->file, mode, offset, len);
+ if (ret) {
+ req->error_slba = le64_to_cpu(range.slba);
+ status = errno_to_nvme_status(req, ret);
break;
}
}
- nvmet_req_complete(req, ret);
+ nvmet_req_complete(req, status);
}
static void nvmet_file_dsm_work(struct work_struct *w)
@@ -299,12 +344,12 @@ static void nvmet_file_write_zeroes_work(struct work_struct *w)
req->ns->blksize_shift);
if (unlikely(offset + len > req->ns->size)) {
- nvmet_req_complete(req, NVME_SC_LBA_RANGE | NVME_SC_DNR);
+ nvmet_req_complete(req, errno_to_nvme_status(req, -ENOSPC));
return;
}
ret = vfs_fallocate(req->ns->file, mode, offset, len);
- nvmet_req_complete(req, ret < 0 ? NVME_SC_INTERNAL | NVME_SC_DNR : 0);
+ nvmet_req_complete(req, ret < 0 ? errno_to_nvme_status(req, ret) : 0);
}
static void nvmet_file_execute_write_zeroes(struct nvmet_req *req)
@@ -320,10 +365,7 @@ u16 nvmet_file_parse_io_cmd(struct nvmet_req *req)
switch (cmd->common.opcode) {
case nvme_cmd_read:
case nvme_cmd_write:
- if (req->ns->buffered_io)
- req->execute = nvmet_file_execute_rw_buffered_io;
- else
- req->execute = nvmet_file_execute_rw;
+ req->execute = nvmet_file_execute_rw;
req->data_len = nvmet_rw_len(req);
return 0;
case nvme_cmd_flush:
@@ -342,6 +384,7 @@ u16 nvmet_file_parse_io_cmd(struct nvmet_req *req)
default:
pr_err("unhandled cmd for file ns %d on qid %d\n",
cmd->common.opcode, req->sq->qid);
+ req->error_loc = offsetof(struct nvme_common_command, opcode);
return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
}
}
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 9908082b32c4..4aac1b4a8112 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -345,7 +345,7 @@ static int nvme_loop_connect_io_queues(struct nvme_loop_ctrl *ctrl)
int i, ret;
for (i = 1; i < ctrl->ctrl.queue_count; i++) {
- ret = nvmf_connect_io_queue(&ctrl->ctrl, i);
+ ret = nvmf_connect_io_queue(&ctrl->ctrl, i, false);
if (ret)
return ret;
set_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[i].flags);
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index c2b4d9ee6391..3e4719fdba85 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -30,12 +30,15 @@
#define NVMET_ASYNC_EVENTS 4
#define NVMET_ERROR_LOG_SLOTS 128
+#define NVMET_NO_ERROR_LOC ((u16)-1)
/*
* Supported optional AENs:
*/
#define NVMET_AEN_CFG_OPTIONAL \
(NVME_AEN_CFG_NS_ATTR | NVME_AEN_CFG_ANA_CHANGE)
+#define NVMET_DISC_AEN_CFG_OPTIONAL \
+ (NVME_AEN_CFG_DISC_CHANGE)
/*
* Plus mandatory SMART AENs (we'll never send them, but allow enabling them):
@@ -104,6 +107,7 @@ struct nvmet_sq {
u16 qid;
u16 size;
u32 sqhd;
+ bool sqhd_disabled;
struct completion free_done;
struct completion confirm_done;
};
@@ -137,6 +141,7 @@ struct nvmet_port {
struct list_head subsystems;
struct config_group referrals_group;
struct list_head referrals;
+ struct list_head global_entry;
struct config_group ana_groups_group;
struct nvmet_ana_group ana_default_group;
enum nvme_ana_state *ana_state;
@@ -163,6 +168,8 @@ struct nvmet_ctrl {
struct nvmet_cq **cqs;
struct nvmet_sq **sqs;
+ bool cmd_seen;
+
struct mutex lock;
u64 cap;
u32 cc;
@@ -194,8 +201,12 @@ struct nvmet_ctrl {
char subsysnqn[NVMF_NQN_FIELD_LEN];
char hostnqn[NVMF_NQN_FIELD_LEN];
- struct device *p2p_client;
- struct radix_tree_root p2p_ns_map;
+ struct device *p2p_client;
+ struct radix_tree_root p2p_ns_map;
+
+ spinlock_t error_lock;
+ u64 err_counter;
+ struct nvme_error_slot slots[NVMET_ERROR_LOG_SLOTS];
};
struct nvmet_subsys {
@@ -273,6 +284,7 @@ struct nvmet_fabrics_ops {
void (*delete_ctrl)(struct nvmet_ctrl *ctrl);
void (*disc_traddr)(struct nvmet_req *req,
struct nvmet_port *port, char *traddr);
+ u16 (*install_queue)(struct nvmet_sq *nvme_sq);
};
#define NVMET_MAX_INLINE_BIOVEC 8
@@ -308,17 +320,14 @@ struct nvmet_req {
void (*execute)(struct nvmet_req *req);
const struct nvmet_fabrics_ops *ops;
- struct pci_dev *p2p_dev;
- struct device *p2p_client;
+ struct pci_dev *p2p_dev;
+ struct device *p2p_client;
+ u16 error_loc;
+ u64 error_slba;
};
extern struct workqueue_struct *buffered_io_wq;
-static inline void nvmet_set_status(struct nvmet_req *req, u16 status)
-{
- req->rsp->status = cpu_to_le16(status << 1);
-}
-
static inline void nvmet_set_result(struct nvmet_req *req, u32 result)
{
req->rsp->result.u32 = cpu_to_le32(result);
@@ -340,6 +349,27 @@ struct nvmet_async_event {
u8 log_page;
};
+static inline void nvmet_clear_aen_bit(struct nvmet_req *req, u32 bn)
+{
+ int rae = le32_to_cpu(req->cmd->common.cdw10) & 1 << 15;
+
+ if (!rae)
+ clear_bit(bn, &req->sq->ctrl->aen_masked);
+}
+
+static inline bool nvmet_aen_bit_disabled(struct nvmet_ctrl *ctrl, u32 bn)
+{
+ if (!(READ_ONCE(ctrl->aen_enabled) & (1 << bn)))
+ return true;
+ return test_and_set_bit(bn, &ctrl->aen_masked);
+}
+
+void nvmet_get_feat_kato(struct nvmet_req *req);
+void nvmet_get_feat_async_event(struct nvmet_req *req);
+u16 nvmet_set_feat_kato(struct nvmet_req *req);
+u16 nvmet_set_feat_async_event(struct nvmet_req *req, u32 mask);
+void nvmet_execute_async_event(struct nvmet_req *req);
+
u16 nvmet_parse_connect_cmd(struct nvmet_req *req);
u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req);
u16 nvmet_file_parse_io_cmd(struct nvmet_req *req);
@@ -355,6 +385,8 @@ void nvmet_req_complete(struct nvmet_req *req, u16 status);
int nvmet_req_alloc_sgl(struct nvmet_req *req);
void nvmet_req_free_sgl(struct nvmet_req *req);
+void nvmet_execute_keep_alive(struct nvmet_req *req);
+
void nvmet_cq_setup(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq, u16 qid,
u16 size);
void nvmet_sq_setup(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, u16 qid,
@@ -395,7 +427,7 @@ int nvmet_enable_port(struct nvmet_port *port);
void nvmet_disable_port(struct nvmet_port *port);
void nvmet_referral_enable(struct nvmet_port *parent, struct nvmet_port *port);
-void nvmet_referral_disable(struct nvmet_port *port);
+void nvmet_referral_disable(struct nvmet_port *parent, struct nvmet_port *port);
u16 nvmet_copy_to_sgl(struct nvmet_req *req, off_t off, const void *buf,
size_t len);
@@ -405,6 +437,14 @@ u16 nvmet_zero_sgl(struct nvmet_req *req, off_t off, size_t len);
u32 nvmet_get_log_page_len(struct nvme_command *cmd);
+extern struct list_head *nvmet_ports;
+void nvmet_port_disc_changed(struct nvmet_port *port,
+ struct nvmet_subsys *subsys);
+void nvmet_subsys_disc_changed(struct nvmet_subsys *subsys,
+ struct nvmet_host *host);
+void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type,
+ u8 event_info, u8 log_page);
+
#define NVMET_QUEUE_SIZE 1024
#define NVMET_NR_QUEUES 128
#define NVMET_MAX_CMD NVMET_QUEUE_SIZE
@@ -425,7 +465,7 @@ u32 nvmet_get_log_page_len(struct nvme_command *cmd);
#define NVMET_DEFAULT_ANA_GRPID 1
#define NVMET_KAS 10
-#define NVMET_DISC_KATO 120
+#define NVMET_DISC_KATO_MS 120000
int __init nvmet_init_configfs(void);
void __exit nvmet_exit_configfs(void);
@@ -434,15 +474,13 @@ int __init nvmet_init_discovery(void);
void nvmet_exit_discovery(void);
extern struct nvmet_subsys *nvmet_disc_subsys;
-extern u64 nvmet_genctr;
extern struct rw_semaphore nvmet_config_sem;
extern u32 nvmet_ana_group_enabled[NVMET_MAX_ANAGRPS + 1];
extern u64 nvmet_ana_chgcnt;
extern struct rw_semaphore nvmet_ana_sem;
-bool nvmet_host_allowed(struct nvmet_req *req, struct nvmet_subsys *subsys,
- const char *hostnqn);
+bool nvmet_host_allowed(struct nvmet_subsys *subsys, const char *hostnqn);
int nvmet_bdev_ns_enable(struct nvmet_ns *ns);
int nvmet_file_ns_enable(struct nvmet_ns *ns);
@@ -457,4 +495,6 @@ static inline u32 nvmet_rw_len(struct nvmet_req *req)
return ((u32)le16_to_cpu(req->cmd->rw.length) + 1) <<
req->ns->blksize_shift;
}
+
+u16 errno_to_nvme_status(struct nvmet_req *req, int errno);
#endif /* _NVMET_H */
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 583086dd9cb9..a8d23eb80192 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -196,7 +196,7 @@ nvmet_rdma_put_rsp(struct nvmet_rdma_rsp *rsp)
{
unsigned long flags;
- if (rsp->allocated) {
+ if (unlikely(rsp->allocated)) {
kfree(rsp);
return;
}
@@ -630,8 +630,11 @@ static u16 nvmet_rdma_map_sgl_inline(struct nvmet_rdma_rsp *rsp)
u64 off = le64_to_cpu(sgl->addr);
u32 len = le32_to_cpu(sgl->length);
- if (!nvme_is_write(rsp->req.cmd))
+ if (!nvme_is_write(rsp->req.cmd)) {
+ rsp->req.error_loc =
+ offsetof(struct nvme_common_command, opcode);
return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ }
if (off + len > rsp->queue->dev->inline_data_size) {
pr_err("invalid inline data offset!\n");
@@ -696,6 +699,8 @@ static u16 nvmet_rdma_map_sgl(struct nvmet_rdma_rsp *rsp)
return nvmet_rdma_map_sgl_inline(rsp);
default:
pr_err("invalid SGL subtype: %#x\n", sgl->type);
+ rsp->req.error_loc =
+ offsetof(struct nvme_common_command, dptr);
return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
}
case NVME_KEY_SGL_FMT_DATA_DESC:
@@ -706,10 +711,13 @@ static u16 nvmet_rdma_map_sgl(struct nvmet_rdma_rsp *rsp)
return nvmet_rdma_map_sgl_keyed(rsp, sgl, false);
default:
pr_err("invalid SGL subtype: %#x\n", sgl->type);
+ rsp->req.error_loc =
+ offsetof(struct nvme_common_command, dptr);
return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
}
default:
pr_err("invalid SGL type: %#x\n", sgl->type);
+ rsp->req.error_loc = offsetof(struct nvme_common_command, dptr);
return NVME_SC_SGL_INVALID_TYPE | NVME_SC_DNR;
}
}
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
new file mode 100644
index 000000000000..44b37b202e39
--- /dev/null
+++ b/drivers/nvme/target/tcp.c
@@ -0,0 +1,1737 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * NVMe over Fabrics TCP target.
+ * Copyright (c) 2018 Lightbits Labs. All rights reserved.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/nvme-tcp.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+#include <linux/inet.h>
+#include <linux/llist.h>
+#include <crypto/hash.h>
+
+#include "nvmet.h"
+
+#define NVMET_TCP_DEF_INLINE_DATA_SIZE (4 * PAGE_SIZE)
+
+#define NVMET_TCP_RECV_BUDGET 8
+#define NVMET_TCP_SEND_BUDGET 8
+#define NVMET_TCP_IO_WORK_BUDGET 64
+
+enum nvmet_tcp_send_state {
+ NVMET_TCP_SEND_DATA_PDU,
+ NVMET_TCP_SEND_DATA,
+ NVMET_TCP_SEND_R2T,
+ NVMET_TCP_SEND_DDGST,
+ NVMET_TCP_SEND_RESPONSE
+};
+
+enum nvmet_tcp_recv_state {
+ NVMET_TCP_RECV_PDU,
+ NVMET_TCP_RECV_DATA,
+ NVMET_TCP_RECV_DDGST,
+ NVMET_TCP_RECV_ERR,
+};
+
+enum {
+ NVMET_TCP_F_INIT_FAILED = (1 << 0),
+};
+
+struct nvmet_tcp_cmd {
+ struct nvmet_tcp_queue *queue;
+ struct nvmet_req req;
+
+ struct nvme_tcp_cmd_pdu *cmd_pdu;
+ struct nvme_tcp_rsp_pdu *rsp_pdu;
+ struct nvme_tcp_data_pdu *data_pdu;
+ struct nvme_tcp_r2t_pdu *r2t_pdu;
+
+ u32 rbytes_done;
+ u32 wbytes_done;
+
+ u32 pdu_len;
+ u32 pdu_recv;
+ int sg_idx;
+ int nr_mapped;
+ struct msghdr recv_msg;
+ struct kvec *iov;
+ u32 flags;
+
+ struct list_head entry;
+ struct llist_node lentry;
+
+ /* send state */
+ u32 offset;
+ struct scatterlist *cur_sg;
+ enum nvmet_tcp_send_state state;
+
+ __le32 exp_ddgst;
+ __le32 recv_ddgst;
+};
+
+enum nvmet_tcp_queue_state {
+ NVMET_TCP_Q_CONNECTING,
+ NVMET_TCP_Q_LIVE,
+ NVMET_TCP_Q_DISCONNECTING,
+};
+
+struct nvmet_tcp_queue {
+ struct socket *sock;
+ struct nvmet_tcp_port *port;
+ struct work_struct io_work;
+ int cpu;
+ struct nvmet_cq nvme_cq;
+ struct nvmet_sq nvme_sq;
+
+ /* send state */
+ struct nvmet_tcp_cmd *cmds;
+ unsigned int nr_cmds;
+ struct list_head free_list;
+ struct llist_head resp_list;
+ struct list_head resp_send_list;
+ int send_list_len;
+ struct nvmet_tcp_cmd *snd_cmd;
+
+ /* recv state */
+ int offset;
+ int left;
+ enum nvmet_tcp_recv_state rcv_state;
+ struct nvmet_tcp_cmd *cmd;
+ union nvme_tcp_pdu pdu;
+
+ /* digest state */
+ bool hdr_digest;
+ bool data_digest;
+ struct ahash_request *snd_hash;
+ struct ahash_request *rcv_hash;
+
+ spinlock_t state_lock;
+ enum nvmet_tcp_queue_state state;
+
+ struct sockaddr_storage sockaddr;
+ struct sockaddr_storage sockaddr_peer;
+ struct work_struct release_work;
+
+ int idx;
+ struct list_head queue_list;
+
+ struct nvmet_tcp_cmd connect;
+
+ struct page_frag_cache pf_cache;
+
+ void (*data_ready)(struct sock *);
+ void (*state_change)(struct sock *);
+ void (*write_space)(struct sock *);
+};
+
+struct nvmet_tcp_port {
+ struct socket *sock;
+ struct work_struct accept_work;
+ struct nvmet_port *nport;
+ struct sockaddr_storage addr;
+ int last_cpu;
+ void (*data_ready)(struct sock *);
+};
+
+static DEFINE_IDA(nvmet_tcp_queue_ida);
+static LIST_HEAD(nvmet_tcp_queue_list);
+static DEFINE_MUTEX(nvmet_tcp_queue_mutex);
+
+static struct workqueue_struct *nvmet_tcp_wq;
+static struct nvmet_fabrics_ops nvmet_tcp_ops;
+static void nvmet_tcp_free_cmd(struct nvmet_tcp_cmd *c);
+static void nvmet_tcp_finish_cmd(struct nvmet_tcp_cmd *cmd);
+
+static inline u16 nvmet_tcp_cmd_tag(struct nvmet_tcp_queue *queue,
+ struct nvmet_tcp_cmd *cmd)
+{
+ return cmd - queue->cmds;
+}
+
+static inline bool nvmet_tcp_has_data_in(struct nvmet_tcp_cmd *cmd)
+{
+ return nvme_is_write(cmd->req.cmd) &&
+ cmd->rbytes_done < cmd->req.transfer_len;
+}
+
+static inline bool nvmet_tcp_need_data_in(struct nvmet_tcp_cmd *cmd)
+{
+ return nvmet_tcp_has_data_in(cmd) && !cmd->req.rsp->status;
+}
+
+static inline bool nvmet_tcp_need_data_out(struct nvmet_tcp_cmd *cmd)
+{
+ return !nvme_is_write(cmd->req.cmd) &&
+ cmd->req.transfer_len > 0 &&
+ !cmd->req.rsp->status;
+}
+
+static inline bool nvmet_tcp_has_inline_data(struct nvmet_tcp_cmd *cmd)
+{
+ return nvme_is_write(cmd->req.cmd) && cmd->pdu_len &&
+ !cmd->rbytes_done;
+}
+
+static inline struct nvmet_tcp_cmd *
+nvmet_tcp_get_cmd(struct nvmet_tcp_queue *queue)
+{
+ struct nvmet_tcp_cmd *cmd;
+
+ cmd = list_first_entry_or_null(&queue->free_list,
+ struct nvmet_tcp_cmd, entry);
+ if (!cmd)
+ return NULL;
+ list_del_init(&cmd->entry);
+
+ cmd->rbytes_done = cmd->wbytes_done = 0;
+ cmd->pdu_len = 0;
+ cmd->pdu_recv = 0;
+ cmd->iov = NULL;
+ cmd->flags = 0;
+ return cmd;
+}
+
+static inline void nvmet_tcp_put_cmd(struct nvmet_tcp_cmd *cmd)
+{
+ if (unlikely(cmd == &cmd->queue->connect))
+ return;
+
+ list_add_tail(&cmd->entry, &cmd->queue->free_list);
+}
+
+static inline u8 nvmet_tcp_hdgst_len(struct nvmet_tcp_queue *queue)
+{
+ return queue->hdr_digest ? NVME_TCP_DIGEST_LENGTH : 0;
+}
+
+static inline u8 nvmet_tcp_ddgst_len(struct nvmet_tcp_queue *queue)
+{
+ return queue->data_digest ? NVME_TCP_DIGEST_LENGTH : 0;
+}
+
+static inline void nvmet_tcp_hdgst(struct ahash_request *hash,
+ void *pdu, size_t len)
+{
+ struct scatterlist sg;
+
+ sg_init_one(&sg, pdu, len);
+ ahash_request_set_crypt(hash, &sg, pdu + len, len);
+ crypto_ahash_digest(hash);
+}
+
+static int nvmet_tcp_verify_hdgst(struct nvmet_tcp_queue *queue,
+ void *pdu, size_t len)
+{
+ struct nvme_tcp_hdr *hdr = pdu;
+ __le32 recv_digest;
+ __le32 exp_digest;
+
+ if (unlikely(!(hdr->flags & NVME_TCP_F_HDGST))) {
+ pr_err("queue %d: header digest enabled but no header digest\n",
+ queue->idx);
+ return -EPROTO;
+ }
+
+ recv_digest = *(__le32 *)(pdu + hdr->hlen);
+ nvmet_tcp_hdgst(queue->rcv_hash, pdu, len);
+ exp_digest = *(__le32 *)(pdu + hdr->hlen);
+ if (recv_digest != exp_digest) {
+ pr_err("queue %d: header digest error: recv %#x expected %#x\n",
+ queue->idx, le32_to_cpu(recv_digest),
+ le32_to_cpu(exp_digest));
+ return -EPROTO;
+ }
+
+ return 0;
+}
+
+static int nvmet_tcp_check_ddgst(struct nvmet_tcp_queue *queue, void *pdu)
+{
+ struct nvme_tcp_hdr *hdr = pdu;
+ u8 digest_len = nvmet_tcp_hdgst_len(queue);
+ u32 len;
+
+ len = le32_to_cpu(hdr->plen) - hdr->hlen -
+ (hdr->flags & NVME_TCP_F_HDGST ? digest_len : 0);
+
+ if (unlikely(len && !(hdr->flags & NVME_TCP_F_DDGST))) {
+ pr_err("queue %d: data digest flag is cleared\n", queue->idx);
+ return -EPROTO;
+ }
+
+ return 0;
+}
+
+static void nvmet_tcp_unmap_pdu_iovec(struct nvmet_tcp_cmd *cmd)
+{
+ struct scatterlist *sg;
+ int i;
+
+ sg = &cmd->req.sg[cmd->sg_idx];
+
+ for (i = 0; i < cmd->nr_mapped; i++)
+ kunmap(sg_page(&sg[i]));
+}
+
+static void nvmet_tcp_map_pdu_iovec(struct nvmet_tcp_cmd *cmd)
+{
+ struct kvec *iov = cmd->iov;
+ struct scatterlist *sg;
+ u32 length, offset, sg_offset;
+
+ length = cmd->pdu_len;
+ cmd->nr_mapped = DIV_ROUND_UP(length, PAGE_SIZE);
+ offset = cmd->rbytes_done;
+ cmd->sg_idx = DIV_ROUND_UP(offset, PAGE_SIZE);
+ sg_offset = offset % PAGE_SIZE;
+ sg = &cmd->req.sg[cmd->sg_idx];
+
+ while (length) {
+ u32 iov_len = min_t(u32, length, sg->length - sg_offset);
+
+ iov->iov_base = kmap(sg_page(sg)) + sg->offset + sg_offset;
+ iov->iov_len = iov_len;
+
+ length -= iov_len;
+ sg = sg_next(sg);
+ iov++;
+ }
+
+ iov_iter_kvec(&cmd->recv_msg.msg_iter, READ, cmd->iov,
+ cmd->nr_mapped, cmd->pdu_len);
+}
+
+static void nvmet_tcp_fatal_error(struct nvmet_tcp_queue *queue)
+{
+ queue->rcv_state = NVMET_TCP_RECV_ERR;
+ if (queue->nvme_sq.ctrl)
+ nvmet_ctrl_fatal_error(queue->nvme_sq.ctrl);
+ else
+ kernel_sock_shutdown(queue->sock, SHUT_RDWR);
+}
+
+static int nvmet_tcp_map_data(struct nvmet_tcp_cmd *cmd)
+{
+ struct nvme_sgl_desc *sgl = &cmd->req.cmd->common.dptr.sgl;
+ u32 len = le32_to_cpu(sgl->length);
+
+ if (!cmd->req.data_len)
+ return 0;
+
+ if (sgl->type == ((NVME_SGL_FMT_DATA_DESC << 4) |
+ NVME_SGL_FMT_OFFSET)) {
+ if (!nvme_is_write(cmd->req.cmd))
+ return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+
+ if (len > cmd->req.port->inline_data_size)
+ return NVME_SC_SGL_INVALID_OFFSET | NVME_SC_DNR;
+ cmd->pdu_len = len;
+ }
+ cmd->req.transfer_len += len;
+
+ cmd->req.sg = sgl_alloc(len, GFP_KERNEL, &cmd->req.sg_cnt);
+ if (!cmd->req.sg)
+ return NVME_SC_INTERNAL;
+ cmd->cur_sg = cmd->req.sg;
+
+ if (nvmet_tcp_has_data_in(cmd)) {
+ cmd->iov = kmalloc_array(cmd->req.sg_cnt,
+ sizeof(*cmd->iov), GFP_KERNEL);
+ if (!cmd->iov)
+ goto err;
+ }
+
+ return 0;
+err:
+ sgl_free(cmd->req.sg);
+ return NVME_SC_INTERNAL;
+}
+
+static void nvmet_tcp_ddgst(struct ahash_request *hash,
+ struct nvmet_tcp_cmd *cmd)
+{
+ ahash_request_set_crypt(hash, cmd->req.sg,
+ (void *)&cmd->exp_ddgst, cmd->req.transfer_len);
+ crypto_ahash_digest(hash);
+}
+
+static void nvmet_setup_c2h_data_pdu(struct nvmet_tcp_cmd *cmd)
+{
+ struct nvme_tcp_data_pdu *pdu = cmd->data_pdu;
+ struct nvmet_tcp_queue *queue = cmd->queue;
+ u8 hdgst = nvmet_tcp_hdgst_len(cmd->queue);
+ u8 ddgst = nvmet_tcp_ddgst_len(cmd->queue);
+
+ cmd->offset = 0;
+ cmd->state = NVMET_TCP_SEND_DATA_PDU;
+
+ pdu->hdr.type = nvme_tcp_c2h_data;
+ pdu->hdr.flags = NVME_TCP_F_DATA_LAST;
+ pdu->hdr.hlen = sizeof(*pdu);
+ pdu->hdr.pdo = pdu->hdr.hlen + hdgst;
+ pdu->hdr.plen =
+ cpu_to_le32(pdu->hdr.hlen + hdgst +
+ cmd->req.transfer_len + ddgst);
+ pdu->command_id = cmd->req.rsp->command_id;
+ pdu->data_length = cpu_to_le32(cmd->req.transfer_len);
+ pdu->data_offset = cpu_to_le32(cmd->wbytes_done);
+
+ if (queue->data_digest) {
+ pdu->hdr.flags |= NVME_TCP_F_DDGST;
+ nvmet_tcp_ddgst(queue->snd_hash, cmd);
+ }
+
+ if (cmd->queue->hdr_digest) {
+ pdu->hdr.flags |= NVME_TCP_F_HDGST;
+ nvmet_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu));
+ }
+}
+
+static void nvmet_setup_r2t_pdu(struct nvmet_tcp_cmd *cmd)
+{
+ struct nvme_tcp_r2t_pdu *pdu = cmd->r2t_pdu;
+ struct nvmet_tcp_queue *queue = cmd->queue;
+ u8 hdgst = nvmet_tcp_hdgst_len(cmd->queue);
+
+ cmd->offset = 0;
+ cmd->state = NVMET_TCP_SEND_R2T;
+
+ pdu->hdr.type = nvme_tcp_r2t;
+ pdu->hdr.flags = 0;
+ pdu->hdr.hlen = sizeof(*pdu);
+ pdu->hdr.pdo = 0;
+ pdu->hdr.plen = cpu_to_le32(pdu->hdr.hlen + hdgst);
+
+ pdu->command_id = cmd->req.cmd->common.command_id;
+ pdu->ttag = nvmet_tcp_cmd_tag(cmd->queue, cmd);
+ pdu->r2t_length = cpu_to_le32(cmd->req.transfer_len - cmd->rbytes_done);
+ pdu->r2t_offset = cpu_to_le32(cmd->rbytes_done);
+ if (cmd->queue->hdr_digest) {
+ pdu->hdr.flags |= NVME_TCP_F_HDGST;
+ nvmet_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu));
+ }
+}
+
+static void nvmet_setup_response_pdu(struct nvmet_tcp_cmd *cmd)
+{
+ struct nvme_tcp_rsp_pdu *pdu = cmd->rsp_pdu;
+ struct nvmet_tcp_queue *queue = cmd->queue;
+ u8 hdgst = nvmet_tcp_hdgst_len(cmd->queue);
+
+ cmd->offset = 0;
+ cmd->state = NVMET_TCP_SEND_RESPONSE;
+
+ pdu->hdr.type = nvme_tcp_rsp;
+ pdu->hdr.flags = 0;
+ pdu->hdr.hlen = sizeof(*pdu);
+ pdu->hdr.pdo = 0;
+ pdu->hdr.plen = cpu_to_le32(pdu->hdr.hlen + hdgst);
+ if (cmd->queue->hdr_digest) {
+ pdu->hdr.flags |= NVME_TCP_F_HDGST;
+ nvmet_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu));
+ }
+}
+
+static void nvmet_tcp_process_resp_list(struct nvmet_tcp_queue *queue)
+{
+ struct llist_node *node;
+
+ node = llist_del_all(&queue->resp_list);
+ if (!node)
+ return;
+
+ while (node) {
+ struct nvmet_tcp_cmd *cmd = llist_entry(node,
+ struct nvmet_tcp_cmd, lentry);
+
+ list_add(&cmd->entry, &queue->resp_send_list);
+ node = node->next;
+ queue->send_list_len++;
+ }
+}
+
+static struct nvmet_tcp_cmd *nvmet_tcp_fetch_cmd(struct nvmet_tcp_queue *queue)
+{
+ queue->snd_cmd = list_first_entry_or_null(&queue->resp_send_list,
+ struct nvmet_tcp_cmd, entry);
+ if (!queue->snd_cmd) {
+ nvmet_tcp_process_resp_list(queue);
+ queue->snd_cmd =
+ list_first_entry_or_null(&queue->resp_send_list,
+ struct nvmet_tcp_cmd, entry);
+ if (unlikely(!queue->snd_cmd))
+ return NULL;
+ }
+
+ list_del_init(&queue->snd_cmd->entry);
+ queue->send_list_len--;
+
+ if (nvmet_tcp_need_data_out(queue->snd_cmd))
+ nvmet_setup_c2h_data_pdu(queue->snd_cmd);
+ else if (nvmet_tcp_need_data_in(queue->snd_cmd))
+ nvmet_setup_r2t_pdu(queue->snd_cmd);
+ else
+ nvmet_setup_response_pdu(queue->snd_cmd);
+
+ return queue->snd_cmd;
+}
+
+static void nvmet_tcp_queue_response(struct nvmet_req *req)
+{
+ struct nvmet_tcp_cmd *cmd =
+ container_of(req, struct nvmet_tcp_cmd, req);
+ struct nvmet_tcp_queue *queue = cmd->queue;
+
+ llist_add(&cmd->lentry, &queue->resp_list);
+ queue_work_on(cmd->queue->cpu, nvmet_tcp_wq, &cmd->queue->io_work);
+}
+
+static int nvmet_try_send_data_pdu(struct nvmet_tcp_cmd *cmd)
+{
+ u8 hdgst = nvmet_tcp_hdgst_len(cmd->queue);
+ int left = sizeof(*cmd->data_pdu) - cmd->offset + hdgst;
+ int ret;
+
+ ret = kernel_sendpage(cmd->queue->sock, virt_to_page(cmd->data_pdu),
+ offset_in_page(cmd->data_pdu) + cmd->offset,
+ left, MSG_DONTWAIT | MSG_MORE);
+ if (ret <= 0)
+ return ret;
+
+ cmd->offset += ret;
+ left -= ret;
+
+ if (left)
+ return -EAGAIN;
+
+ cmd->state = NVMET_TCP_SEND_DATA;
+ cmd->offset = 0;
+ return 1;
+}
+
+static int nvmet_try_send_data(struct nvmet_tcp_cmd *cmd)
+{
+ struct nvmet_tcp_queue *queue = cmd->queue;
+ int ret;
+
+ while (cmd->cur_sg) {
+ struct page *page = sg_page(cmd->cur_sg);
+ u32 left = cmd->cur_sg->length - cmd->offset;
+
+ ret = kernel_sendpage(cmd->queue->sock, page, cmd->offset,
+ left, MSG_DONTWAIT | MSG_MORE);
+ if (ret <= 0)
+ return ret;
+
+ cmd->offset += ret;
+ cmd->wbytes_done += ret;
+
+ /* Done with sg?*/
+ if (cmd->offset == cmd->cur_sg->length) {
+ cmd->cur_sg = sg_next(cmd->cur_sg);
+ cmd->offset = 0;
+ }
+ }
+
+ if (queue->data_digest) {
+ cmd->state = NVMET_TCP_SEND_DDGST;
+ cmd->offset = 0;
+ } else {
+ nvmet_setup_response_pdu(cmd);
+ }
+ return 1;
+
+}
+
+static int nvmet_try_send_response(struct nvmet_tcp_cmd *cmd,
+ bool last_in_batch)
+{
+ u8 hdgst = nvmet_tcp_hdgst_len(cmd->queue);
+ int left = sizeof(*cmd->rsp_pdu) - cmd->offset + hdgst;
+ int flags = MSG_DONTWAIT;
+ int ret;
+
+ if (!last_in_batch && cmd->queue->send_list_len)
+ flags |= MSG_MORE;
+ else
+ flags |= MSG_EOR;
+
+ ret = kernel_sendpage(cmd->queue->sock, virt_to_page(cmd->rsp_pdu),
+ offset_in_page(cmd->rsp_pdu) + cmd->offset, left, flags);
+ if (ret <= 0)
+ return ret;
+ cmd->offset += ret;
+ left -= ret;
+
+ if (left)
+ return -EAGAIN;
+
+ kfree(cmd->iov);
+ sgl_free(cmd->req.sg);
+ cmd->queue->snd_cmd = NULL;
+ nvmet_tcp_put_cmd(cmd);
+ return 1;
+}
+
+static int nvmet_try_send_r2t(struct nvmet_tcp_cmd *cmd, bool last_in_batch)
+{
+ u8 hdgst = nvmet_tcp_hdgst_len(cmd->queue);
+ int left = sizeof(*cmd->r2t_pdu) - cmd->offset + hdgst;
+ int flags = MSG_DONTWAIT;
+ int ret;
+
+ if (!last_in_batch && cmd->queue->send_list_len)
+ flags |= MSG_MORE;
+ else
+ flags |= MSG_EOR;
+
+ ret = kernel_sendpage(cmd->queue->sock, virt_to_page(cmd->r2t_pdu),
+ offset_in_page(cmd->r2t_pdu) + cmd->offset, left, flags);
+ if (ret <= 0)
+ return ret;
+ cmd->offset += ret;
+ left -= ret;
+
+ if (left)
+ return -EAGAIN;
+
+ cmd->queue->snd_cmd = NULL;
+ return 1;
+}
+
+static int nvmet_try_send_ddgst(struct nvmet_tcp_cmd *cmd)
+{
+ struct nvmet_tcp_queue *queue = cmd->queue;
+ struct msghdr msg = { .msg_flags = MSG_DONTWAIT };
+ struct kvec iov = {
+ .iov_base = &cmd->exp_ddgst + cmd->offset,
+ .iov_len = NVME_TCP_DIGEST_LENGTH - cmd->offset
+ };
+ int ret;
+
+ ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len);
+ if (unlikely(ret <= 0))
+ return ret;
+
+ cmd->offset += ret;
+ nvmet_setup_response_pdu(cmd);
+ return 1;
+}
+
+static int nvmet_tcp_try_send_one(struct nvmet_tcp_queue *queue,
+ bool last_in_batch)
+{
+ struct nvmet_tcp_cmd *cmd = queue->snd_cmd;
+ int ret = 0;
+
+ if (!cmd || queue->state == NVMET_TCP_Q_DISCONNECTING) {
+ cmd = nvmet_tcp_fetch_cmd(queue);
+ if (unlikely(!cmd))
+ return 0;
+ }
+
+ if (cmd->state == NVMET_TCP_SEND_DATA_PDU) {
+ ret = nvmet_try_send_data_pdu(cmd);
+ if (ret <= 0)
+ goto done_send;
+ }
+
+ if (cmd->state == NVMET_TCP_SEND_DATA) {
+ ret = nvmet_try_send_data(cmd);
+ if (ret <= 0)
+ goto done_send;
+ }
+
+ if (cmd->state == NVMET_TCP_SEND_DDGST) {
+ ret = nvmet_try_send_ddgst(cmd);
+ if (ret <= 0)
+ goto done_send;
+ }
+
+ if (cmd->state == NVMET_TCP_SEND_R2T) {
+ ret = nvmet_try_send_r2t(cmd, last_in_batch);
+ if (ret <= 0)
+ goto done_send;
+ }
+
+ if (cmd->state == NVMET_TCP_SEND_RESPONSE)
+ ret = nvmet_try_send_response(cmd, last_in_batch);
+
+done_send:
+ if (ret < 0) {
+ if (ret == -EAGAIN)
+ return 0;
+ return ret;
+ }
+
+ return 1;
+}
+
+static int nvmet_tcp_try_send(struct nvmet_tcp_queue *queue,
+ int budget, int *sends)
+{
+ int i, ret = 0;
+
+ for (i = 0; i < budget; i++) {
+ ret = nvmet_tcp_try_send_one(queue, i == budget - 1);
+ if (ret <= 0)
+ break;
+ (*sends)++;
+ }
+
+ return ret;
+}
+
+static void nvmet_prepare_receive_pdu(struct nvmet_tcp_queue *queue)
+{
+ queue->offset = 0;
+ queue->left = sizeof(struct nvme_tcp_hdr);
+ queue->cmd = NULL;
+ queue->rcv_state = NVMET_TCP_RECV_PDU;
+}
+
+static void nvmet_tcp_free_crypto(struct nvmet_tcp_queue *queue)
+{
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(queue->rcv_hash);
+
+ ahash_request_free(queue->rcv_hash);
+ ahash_request_free(queue->snd_hash);
+ crypto_free_ahash(tfm);
+}
+
+static int nvmet_tcp_alloc_crypto(struct nvmet_tcp_queue *queue)
+{
+ struct crypto_ahash *tfm;
+
+ tfm = crypto_alloc_ahash("crc32c", 0, CRYPTO_ALG_ASYNC);
+ if (IS_ERR(tfm))
+ return PTR_ERR(tfm);
+
+ queue->snd_hash = ahash_request_alloc(tfm, GFP_KERNEL);
+ if (!queue->snd_hash)
+ goto free_tfm;
+ ahash_request_set_callback(queue->snd_hash, 0, NULL, NULL);
+
+ queue->rcv_hash = ahash_request_alloc(tfm, GFP_KERNEL);
+ if (!queue->rcv_hash)
+ goto free_snd_hash;
+ ahash_request_set_callback(queue->rcv_hash, 0, NULL, NULL);
+
+ return 0;
+free_snd_hash:
+ ahash_request_free(queue->snd_hash);
+free_tfm:
+ crypto_free_ahash(tfm);
+ return -ENOMEM;
+}
+
+
+static int nvmet_tcp_handle_icreq(struct nvmet_tcp_queue *queue)
+{
+ struct nvme_tcp_icreq_pdu *icreq = &queue->pdu.icreq;
+ struct nvme_tcp_icresp_pdu *icresp = &queue->pdu.icresp;
+ struct msghdr msg = {};
+ struct kvec iov;
+ int ret;
+
+ if (le32_to_cpu(icreq->hdr.plen) != sizeof(struct nvme_tcp_icreq_pdu)) {
+ pr_err("bad nvme-tcp pdu length (%d)\n",
+ le32_to_cpu(icreq->hdr.plen));
+ nvmet_tcp_fatal_error(queue);
+ }
+
+ if (icreq->pfv != NVME_TCP_PFV_1_0) {
+ pr_err("queue %d: bad pfv %d\n", queue->idx, icreq->pfv);
+ return -EPROTO;
+ }
+
+ if (icreq->hpda != 0) {
+ pr_err("queue %d: unsupported hpda %d\n", queue->idx,
+ icreq->hpda);
+ return -EPROTO;
+ }
+
+ if (icreq->maxr2t != 0) {
+ pr_err("queue %d: unsupported maxr2t %d\n", queue->idx,
+ le32_to_cpu(icreq->maxr2t) + 1);
+ return -EPROTO;
+ }
+
+ queue->hdr_digest = !!(icreq->digest & NVME_TCP_HDR_DIGEST_ENABLE);
+ queue->data_digest = !!(icreq->digest & NVME_TCP_DATA_DIGEST_ENABLE);
+ if (queue->hdr_digest || queue->data_digest) {
+ ret = nvmet_tcp_alloc_crypto(queue);
+ if (ret)
+ return ret;
+ }
+
+ memset(icresp, 0, sizeof(*icresp));
+ icresp->hdr.type = nvme_tcp_icresp;
+ icresp->hdr.hlen = sizeof(*icresp);
+ icresp->hdr.pdo = 0;
+ icresp->hdr.plen = cpu_to_le32(icresp->hdr.hlen);
+ icresp->pfv = cpu_to_le16(NVME_TCP_PFV_1_0);
+ icresp->maxdata = cpu_to_le32(0xffff); /* FIXME: support r2t */
+ icresp->cpda = 0;
+ if (queue->hdr_digest)
+ icresp->digest |= NVME_TCP_HDR_DIGEST_ENABLE;
+ if (queue->data_digest)
+ icresp->digest |= NVME_TCP_DATA_DIGEST_ENABLE;
+
+ iov.iov_base = icresp;
+ iov.iov_len = sizeof(*icresp);
+ ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len);
+ if (ret < 0)
+ goto free_crypto;
+
+ queue->state = NVMET_TCP_Q_LIVE;
+ nvmet_prepare_receive_pdu(queue);
+ return 0;
+free_crypto:
+ if (queue->hdr_digest || queue->data_digest)
+ nvmet_tcp_free_crypto(queue);
+ return ret;
+}
+
+static void nvmet_tcp_handle_req_failure(struct nvmet_tcp_queue *queue,
+ struct nvmet_tcp_cmd *cmd, struct nvmet_req *req)
+{
+ int ret;
+
+ /* recover the expected data transfer length */
+ req->data_len = le32_to_cpu(req->cmd->common.dptr.sgl.length);
+
+ if (!nvme_is_write(cmd->req.cmd) ||
+ req->data_len > cmd->req.port->inline_data_size) {
+ nvmet_prepare_receive_pdu(queue);
+ return;
+ }
+
+ ret = nvmet_tcp_map_data(cmd);
+ if (unlikely(ret)) {
+ pr_err("queue %d: failed to map data\n", queue->idx);
+ nvmet_tcp_fatal_error(queue);
+ return;
+ }
+
+ queue->rcv_state = NVMET_TCP_RECV_DATA;
+ nvmet_tcp_map_pdu_iovec(cmd);
+ cmd->flags |= NVMET_TCP_F_INIT_FAILED;
+}
+
+static int nvmet_tcp_handle_h2c_data_pdu(struct nvmet_tcp_queue *queue)
+{
+ struct nvme_tcp_data_pdu *data = &queue->pdu.data;
+ struct nvmet_tcp_cmd *cmd;
+
+ cmd = &queue->cmds[data->ttag];
+
+ if (le32_to_cpu(data->data_offset) != cmd->rbytes_done) {
+ pr_err("ttag %u unexpected data offset %u (expected %u)\n",
+ data->ttag, le32_to_cpu(data->data_offset),
+ cmd->rbytes_done);
+ /* FIXME: use path and transport errors */
+ nvmet_req_complete(&cmd->req,
+ NVME_SC_INVALID_FIELD | NVME_SC_DNR);
+ return -EPROTO;
+ }
+
+ cmd->pdu_len = le32_to_cpu(data->data_length);
+ cmd->pdu_recv = 0;
+ nvmet_tcp_map_pdu_iovec(cmd);
+ queue->cmd = cmd;
+ queue->rcv_state = NVMET_TCP_RECV_DATA;
+
+ return 0;
+}
+
+static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue)
+{
+ struct nvme_tcp_hdr *hdr = &queue->pdu.cmd.hdr;
+ struct nvme_command *nvme_cmd = &queue->pdu.cmd.cmd;
+ struct nvmet_req *req;
+ int ret;
+
+ if (unlikely(queue->state == NVMET_TCP_Q_CONNECTING)) {
+ if (hdr->type != nvme_tcp_icreq) {
+ pr_err("unexpected pdu type (%d) before icreq\n",
+ hdr->type);
+ nvmet_tcp_fatal_error(queue);
+ return -EPROTO;
+ }
+ return nvmet_tcp_handle_icreq(queue);
+ }
+
+ if (hdr->type == nvme_tcp_h2c_data) {
+ ret = nvmet_tcp_handle_h2c_data_pdu(queue);
+ if (unlikely(ret))
+ return ret;
+ return 0;
+ }
+
+ queue->cmd = nvmet_tcp_get_cmd(queue);
+ if (unlikely(!queue->cmd)) {
+ /* This should never happen */
+ pr_err("queue %d: out of commands (%d) send_list_len: %d, opcode: %d",
+ queue->idx, queue->nr_cmds, queue->send_list_len,
+ nvme_cmd->common.opcode);
+ nvmet_tcp_fatal_error(queue);
+ return -ENOMEM;
+ }
+
+ req = &queue->cmd->req;
+ memcpy(req->cmd, nvme_cmd, sizeof(*nvme_cmd));
+
+ if (unlikely(!nvmet_req_init(req, &queue->nvme_cq,
+ &queue->nvme_sq, &nvmet_tcp_ops))) {
+ pr_err("failed cmd %p id %d opcode %d, data_len: %d\n",
+ req->cmd, req->cmd->common.command_id,
+ req->cmd->common.opcode,
+ le32_to_cpu(req->cmd->common.dptr.sgl.length));
+
+ nvmet_tcp_handle_req_failure(queue, queue->cmd, req);
+ return -EAGAIN;
+ }
+
+ ret = nvmet_tcp_map_data(queue->cmd);
+ if (unlikely(ret)) {
+ pr_err("queue %d: failed to map data\n", queue->idx);
+ if (nvmet_tcp_has_inline_data(queue->cmd))
+ nvmet_tcp_fatal_error(queue);
+ else
+ nvmet_req_complete(req, ret);
+ ret = -EAGAIN;
+ goto out;
+ }
+
+ if (nvmet_tcp_need_data_in(queue->cmd)) {
+ if (nvmet_tcp_has_inline_data(queue->cmd)) {
+ queue->rcv_state = NVMET_TCP_RECV_DATA;
+ nvmet_tcp_map_pdu_iovec(queue->cmd);
+ return 0;
+ }
+ /* send back R2T */
+ nvmet_tcp_queue_response(&queue->cmd->req);
+ goto out;
+ }
+
+ nvmet_req_execute(&queue->cmd->req);
+out:
+ nvmet_prepare_receive_pdu(queue);
+ return ret;
+}
+
+static const u8 nvme_tcp_pdu_sizes[] = {
+ [nvme_tcp_icreq] = sizeof(struct nvme_tcp_icreq_pdu),
+ [nvme_tcp_cmd] = sizeof(struct nvme_tcp_cmd_pdu),
+ [nvme_tcp_h2c_data] = sizeof(struct nvme_tcp_data_pdu),
+};
+
+static inline u8 nvmet_tcp_pdu_size(u8 type)
+{
+ size_t idx = type;
+
+ return (idx < ARRAY_SIZE(nvme_tcp_pdu_sizes) &&
+ nvme_tcp_pdu_sizes[idx]) ?
+ nvme_tcp_pdu_sizes[idx] : 0;
+}
+
+static inline bool nvmet_tcp_pdu_valid(u8 type)
+{
+ switch (type) {
+ case nvme_tcp_icreq:
+ case nvme_tcp_cmd:
+ case nvme_tcp_h2c_data:
+ /* fallthru */
+ return true;
+ }
+
+ return false;
+}
+
+static int nvmet_tcp_try_recv_pdu(struct nvmet_tcp_queue *queue)
+{
+ struct nvme_tcp_hdr *hdr = &queue->pdu.cmd.hdr;
+ int len;
+ struct kvec iov;
+ struct msghdr msg = { .msg_flags = MSG_DONTWAIT };
+
+recv:
+ iov.iov_base = (void *)&queue->pdu + queue->offset;
+ iov.iov_len = queue->left;
+ len = kernel_recvmsg(queue->sock, &msg, &iov, 1,
+ iov.iov_len, msg.msg_flags);
+ if (unlikely(len < 0))
+ return len;
+
+ queue->offset += len;
+ queue->left -= len;
+ if (queue->left)
+ return -EAGAIN;
+
+ if (queue->offset == sizeof(struct nvme_tcp_hdr)) {
+ u8 hdgst = nvmet_tcp_hdgst_len(queue);
+
+ if (unlikely(!nvmet_tcp_pdu_valid(hdr->type))) {
+ pr_err("unexpected pdu type %d\n", hdr->type);
+ nvmet_tcp_fatal_error(queue);
+ return -EIO;
+ }
+
+ if (unlikely(hdr->hlen != nvmet_tcp_pdu_size(hdr->type))) {
+ pr_err("pdu %d bad hlen %d\n", hdr->type, hdr->hlen);
+ return -EIO;
+ }
+
+ queue->left = hdr->hlen - queue->offset + hdgst;
+ goto recv;
+ }
+
+ if (queue->hdr_digest &&
+ nvmet_tcp_verify_hdgst(queue, &queue->pdu, queue->offset)) {
+ nvmet_tcp_fatal_error(queue); /* fatal */
+ return -EPROTO;
+ }
+
+ if (queue->data_digest &&
+ nvmet_tcp_check_ddgst(queue, &queue->pdu)) {
+ nvmet_tcp_fatal_error(queue); /* fatal */
+ return -EPROTO;
+ }
+
+ return nvmet_tcp_done_recv_pdu(queue);
+}
+
+static void nvmet_tcp_prep_recv_ddgst(struct nvmet_tcp_cmd *cmd)
+{
+ struct nvmet_tcp_queue *queue = cmd->queue;
+
+ nvmet_tcp_ddgst(queue->rcv_hash, cmd);
+ queue->offset = 0;
+ queue->left = NVME_TCP_DIGEST_LENGTH;
+ queue->rcv_state = NVMET_TCP_RECV_DDGST;
+}
+
+static int nvmet_tcp_try_recv_data(struct nvmet_tcp_queue *queue)
+{
+ struct nvmet_tcp_cmd *cmd = queue->cmd;
+ int ret;
+
+ while (msg_data_left(&cmd->recv_msg)) {
+ ret = sock_recvmsg(cmd->queue->sock, &cmd->recv_msg,
+ cmd->recv_msg.msg_flags);
+ if (ret <= 0)
+ return ret;
+
+ cmd->pdu_recv += ret;
+ cmd->rbytes_done += ret;
+ }
+
+ nvmet_tcp_unmap_pdu_iovec(cmd);
+
+ if (!(cmd->flags & NVMET_TCP_F_INIT_FAILED) &&
+ cmd->rbytes_done == cmd->req.transfer_len) {
+ if (queue->data_digest) {
+ nvmet_tcp_prep_recv_ddgst(cmd);
+ return 0;
+ }
+ nvmet_req_execute(&cmd->req);
+ }
+
+ nvmet_prepare_receive_pdu(queue);
+ return 0;
+}
+
+static int nvmet_tcp_try_recv_ddgst(struct nvmet_tcp_queue *queue)
+{
+ struct nvmet_tcp_cmd *cmd = queue->cmd;
+ int ret;
+ struct msghdr msg = { .msg_flags = MSG_DONTWAIT };
+ struct kvec iov = {
+ .iov_base = (void *)&cmd->recv_ddgst + queue->offset,
+ .iov_len = queue->left
+ };
+
+ ret = kernel_recvmsg(queue->sock, &msg, &iov, 1,
+ iov.iov_len, msg.msg_flags);
+ if (unlikely(ret < 0))
+ return ret;
+
+ queue->offset += ret;
+ queue->left -= ret;
+ if (queue->left)
+ return -EAGAIN;
+
+ if (queue->data_digest && cmd->exp_ddgst != cmd->recv_ddgst) {
+ pr_err("queue %d: cmd %d pdu (%d) data digest error: recv %#x expected %#x\n",
+ queue->idx, cmd->req.cmd->common.command_id,
+ queue->pdu.cmd.hdr.type, le32_to_cpu(cmd->recv_ddgst),
+ le32_to_cpu(cmd->exp_ddgst));
+ nvmet_tcp_finish_cmd(cmd);
+ nvmet_tcp_fatal_error(queue);
+ ret = -EPROTO;
+ goto out;
+ }
+
+ if (!(cmd->flags & NVMET_TCP_F_INIT_FAILED) &&
+ cmd->rbytes_done == cmd->req.transfer_len)
+ nvmet_req_execute(&cmd->req);
+ ret = 0;
+out:
+ nvmet_prepare_receive_pdu(queue);
+ return ret;
+}
+
+static int nvmet_tcp_try_recv_one(struct nvmet_tcp_queue *queue)
+{
+ int result;
+
+ if (unlikely(queue->rcv_state == NVMET_TCP_RECV_ERR))
+ return 0;
+
+ if (queue->rcv_state == NVMET_TCP_RECV_PDU) {
+ result = nvmet_tcp_try_recv_pdu(queue);
+ if (result != 0)
+ goto done_recv;
+ }
+
+ if (queue->rcv_state == NVMET_TCP_RECV_DATA) {
+ result = nvmet_tcp_try_recv_data(queue);
+ if (result != 0)
+ goto done_recv;
+ }
+
+ if (queue->rcv_state == NVMET_TCP_RECV_DDGST) {
+ result = nvmet_tcp_try_recv_ddgst(queue);
+ if (result != 0)
+ goto done_recv;
+ }
+
+done_recv:
+ if (result < 0) {
+ if (result == -EAGAIN)
+ return 0;
+ return result;
+ }
+ return 1;
+}
+
+static int nvmet_tcp_try_recv(struct nvmet_tcp_queue *queue,
+ int budget, int *recvs)
+{
+ int i, ret = 0;
+
+ for (i = 0; i < budget; i++) {
+ ret = nvmet_tcp_try_recv_one(queue);
+ if (ret <= 0)
+ break;
+ (*recvs)++;
+ }
+
+ return ret;
+}
+
+static void nvmet_tcp_schedule_release_queue(struct nvmet_tcp_queue *queue)
+{
+ spin_lock(&queue->state_lock);
+ if (queue->state != NVMET_TCP_Q_DISCONNECTING) {
+ queue->state = NVMET_TCP_Q_DISCONNECTING;
+ schedule_work(&queue->release_work);
+ }
+ spin_unlock(&queue->state_lock);
+}
+
+static void nvmet_tcp_io_work(struct work_struct *w)
+{
+ struct nvmet_tcp_queue *queue =
+ container_of(w, struct nvmet_tcp_queue, io_work);
+ bool pending;
+ int ret, ops = 0;
+
+ do {
+ pending = false;
+
+ ret = nvmet_tcp_try_recv(queue, NVMET_TCP_RECV_BUDGET, &ops);
+ if (ret > 0) {
+ pending = true;
+ } else if (ret < 0) {
+ if (ret == -EPIPE || ret == -ECONNRESET)
+ kernel_sock_shutdown(queue->sock, SHUT_RDWR);
+ else
+ nvmet_tcp_fatal_error(queue);
+ return;
+ }
+
+ ret = nvmet_tcp_try_send(queue, NVMET_TCP_SEND_BUDGET, &ops);
+ if (ret > 0) {
+ /* transmitted message/data */
+ pending = true;
+ } else if (ret < 0) {
+ if (ret == -EPIPE || ret == -ECONNRESET)
+ kernel_sock_shutdown(queue->sock, SHUT_RDWR);
+ else
+ nvmet_tcp_fatal_error(queue);
+ return;
+ }
+
+ } while (pending && ops < NVMET_TCP_IO_WORK_BUDGET);
+
+ /*
+ * We exahusted our budget, requeue our selves
+ */
+ if (pending)
+ queue_work_on(queue->cpu, nvmet_tcp_wq, &queue->io_work);
+}
+
+static int nvmet_tcp_alloc_cmd(struct nvmet_tcp_queue *queue,
+ struct nvmet_tcp_cmd *c)
+{
+ u8 hdgst = nvmet_tcp_hdgst_len(queue);
+
+ c->queue = queue;
+ c->req.port = queue->port->nport;
+
+ c->cmd_pdu = page_frag_alloc(&queue->pf_cache,
+ sizeof(*c->cmd_pdu) + hdgst, GFP_KERNEL | __GFP_ZERO);
+ if (!c->cmd_pdu)
+ return -ENOMEM;
+ c->req.cmd = &c->cmd_pdu->cmd;
+
+ c->rsp_pdu = page_frag_alloc(&queue->pf_cache,
+ sizeof(*c->rsp_pdu) + hdgst, GFP_KERNEL | __GFP_ZERO);
+ if (!c->rsp_pdu)
+ goto out_free_cmd;
+ c->req.rsp = &c->rsp_pdu->cqe;
+
+ c->data_pdu = page_frag_alloc(&queue->pf_cache,
+ sizeof(*c->data_pdu) + hdgst, GFP_KERNEL | __GFP_ZERO);
+ if (!c->data_pdu)
+ goto out_free_rsp;
+
+ c->r2t_pdu = page_frag_alloc(&queue->pf_cache,
+ sizeof(*c->r2t_pdu) + hdgst, GFP_KERNEL | __GFP_ZERO);
+ if (!c->r2t_pdu)
+ goto out_free_data;
+
+ c->recv_msg.msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL;
+
+ list_add_tail(&c->entry, &queue->free_list);
+
+ return 0;
+out_free_data:
+ page_frag_free(c->data_pdu);
+out_free_rsp:
+ page_frag_free(c->rsp_pdu);
+out_free_cmd:
+ page_frag_free(c->cmd_pdu);
+ return -ENOMEM;
+}
+
+static void nvmet_tcp_free_cmd(struct nvmet_tcp_cmd *c)
+{
+ page_frag_free(c->r2t_pdu);
+ page_frag_free(c->data_pdu);
+ page_frag_free(c->rsp_pdu);
+ page_frag_free(c->cmd_pdu);
+}
+
+static int nvmet_tcp_alloc_cmds(struct nvmet_tcp_queue *queue)
+{
+ struct nvmet_tcp_cmd *cmds;
+ int i, ret = -EINVAL, nr_cmds = queue->nr_cmds;
+
+ cmds = kcalloc(nr_cmds, sizeof(struct nvmet_tcp_cmd), GFP_KERNEL);
+ if (!cmds)
+ goto out;
+
+ for (i = 0; i < nr_cmds; i++) {
+ ret = nvmet_tcp_alloc_cmd(queue, cmds + i);
+ if (ret)
+ goto out_free;
+ }
+
+ queue->cmds = cmds;
+
+ return 0;
+out_free:
+ while (--i >= 0)
+ nvmet_tcp_free_cmd(cmds + i);
+ kfree(cmds);
+out:
+ return ret;
+}
+
+static void nvmet_tcp_free_cmds(struct nvmet_tcp_queue *queue)
+{
+ struct nvmet_tcp_cmd *cmds = queue->cmds;
+ int i;
+
+ for (i = 0; i < queue->nr_cmds; i++)
+ nvmet_tcp_free_cmd(cmds + i);
+
+ nvmet_tcp_free_cmd(&queue->connect);
+ kfree(cmds);
+}
+
+static void nvmet_tcp_restore_socket_callbacks(struct nvmet_tcp_queue *queue)
+{
+ struct socket *sock = queue->sock;
+
+ write_lock_bh(&sock->sk->sk_callback_lock);
+ sock->sk->sk_data_ready = queue->data_ready;
+ sock->sk->sk_state_change = queue->state_change;
+ sock->sk->sk_write_space = queue->write_space;
+ sock->sk->sk_user_data = NULL;
+ write_unlock_bh(&sock->sk->sk_callback_lock);
+}
+
+static void nvmet_tcp_finish_cmd(struct nvmet_tcp_cmd *cmd)
+{
+ nvmet_req_uninit(&cmd->req);
+ nvmet_tcp_unmap_pdu_iovec(cmd);
+ sgl_free(cmd->req.sg);
+}
+
+static void nvmet_tcp_uninit_data_in_cmds(struct nvmet_tcp_queue *queue)
+{
+ struct nvmet_tcp_cmd *cmd = queue->cmds;
+ int i;
+
+ for (i = 0; i < queue->nr_cmds; i++, cmd++) {
+ if (nvmet_tcp_need_data_in(cmd))
+ nvmet_tcp_finish_cmd(cmd);
+ }
+
+ if (!queue->nr_cmds && nvmet_tcp_need_data_in(&queue->connect)) {
+ /* failed in connect */
+ nvmet_tcp_finish_cmd(&queue->connect);
+ }
+}
+
+static void nvmet_tcp_release_queue_work(struct work_struct *w)
+{
+ struct nvmet_tcp_queue *queue =
+ container_of(w, struct nvmet_tcp_queue, release_work);
+
+ mutex_lock(&nvmet_tcp_queue_mutex);
+ list_del_init(&queue->queue_list);
+ mutex_unlock(&nvmet_tcp_queue_mutex);
+
+ nvmet_tcp_restore_socket_callbacks(queue);
+ flush_work(&queue->io_work);
+
+ nvmet_tcp_uninit_data_in_cmds(queue);
+ nvmet_sq_destroy(&queue->nvme_sq);
+ cancel_work_sync(&queue->io_work);
+ sock_release(queue->sock);
+ nvmet_tcp_free_cmds(queue);
+ if (queue->hdr_digest || queue->data_digest)
+ nvmet_tcp_free_crypto(queue);
+ ida_simple_remove(&nvmet_tcp_queue_ida, queue->idx);
+
+ kfree(queue);
+}
+
+static void nvmet_tcp_data_ready(struct sock *sk)
+{
+ struct nvmet_tcp_queue *queue;
+
+ read_lock_bh(&sk->sk_callback_lock);
+ queue = sk->sk_user_data;
+ if (likely(queue))
+ queue_work_on(queue->cpu, nvmet_tcp_wq, &queue->io_work);
+ read_unlock_bh(&sk->sk_callback_lock);
+}
+
+static void nvmet_tcp_write_space(struct sock *sk)
+{
+ struct nvmet_tcp_queue *queue;
+
+ read_lock_bh(&sk->sk_callback_lock);
+ queue = sk->sk_user_data;
+ if (unlikely(!queue))
+ goto out;
+
+ if (unlikely(queue->state == NVMET_TCP_Q_CONNECTING)) {
+ queue->write_space(sk);
+ goto out;
+ }
+
+ if (sk_stream_is_writeable(sk)) {
+ clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+ queue_work_on(queue->cpu, nvmet_tcp_wq, &queue->io_work);
+ }
+out:
+ read_unlock_bh(&sk->sk_callback_lock);
+}
+
+static void nvmet_tcp_state_change(struct sock *sk)
+{
+ struct nvmet_tcp_queue *queue;
+
+ write_lock_bh(&sk->sk_callback_lock);
+ queue = sk->sk_user_data;
+ if (!queue)
+ goto done;
+
+ switch (sk->sk_state) {
+ case TCP_FIN_WAIT1:
+ case TCP_CLOSE_WAIT:
+ case TCP_CLOSE:
+ /* FALLTHRU */
+ sk->sk_user_data = NULL;
+ nvmet_tcp_schedule_release_queue(queue);
+ break;
+ default:
+ pr_warn("queue %d unhandled state %d\n",
+ queue->idx, sk->sk_state);
+ }
+done:
+ write_unlock_bh(&sk->sk_callback_lock);
+}
+
+static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue)
+{
+ struct socket *sock = queue->sock;
+ struct linger sol = { .l_onoff = 1, .l_linger = 0 };
+ int ret;
+
+ ret = kernel_getsockname(sock,
+ (struct sockaddr *)&queue->sockaddr);
+ if (ret < 0)
+ return ret;
+
+ ret = kernel_getpeername(sock,
+ (struct sockaddr *)&queue->sockaddr_peer);
+ if (ret < 0)
+ return ret;
+
+ /*
+ * Cleanup whatever is sitting in the TCP transmit queue on socket
+ * close. This is done to prevent stale data from being sent should
+ * the network connection be restored before TCP times out.
+ */
+ ret = kernel_setsockopt(sock, SOL_SOCKET, SO_LINGER,
+ (char *)&sol, sizeof(sol));
+ if (ret)
+ return ret;
+
+ write_lock_bh(&sock->sk->sk_callback_lock);
+ sock->sk->sk_user_data = queue;
+ queue->data_ready = sock->sk->sk_data_ready;
+ sock->sk->sk_data_ready = nvmet_tcp_data_ready;
+ queue->state_change = sock->sk->sk_state_change;
+ sock->sk->sk_state_change = nvmet_tcp_state_change;
+ queue->write_space = sock->sk->sk_write_space;
+ sock->sk->sk_write_space = nvmet_tcp_write_space;
+ write_unlock_bh(&sock->sk->sk_callback_lock);
+
+ return 0;
+}
+
+static int nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port,
+ struct socket *newsock)
+{
+ struct nvmet_tcp_queue *queue;
+ int ret;
+
+ queue = kzalloc(sizeof(*queue), GFP_KERNEL);
+ if (!queue)
+ return -ENOMEM;
+
+ INIT_WORK(&queue->release_work, nvmet_tcp_release_queue_work);
+ INIT_WORK(&queue->io_work, nvmet_tcp_io_work);
+ queue->sock = newsock;
+ queue->port = port;
+ queue->nr_cmds = 0;
+ spin_lock_init(&queue->state_lock);
+ queue->state = NVMET_TCP_Q_CONNECTING;
+ INIT_LIST_HEAD(&queue->free_list);
+ init_llist_head(&queue->resp_list);
+ INIT_LIST_HEAD(&queue->resp_send_list);
+
+ queue->idx = ida_simple_get(&nvmet_tcp_queue_ida, 0, 0, GFP_KERNEL);
+ if (queue->idx < 0) {
+ ret = queue->idx;
+ goto out_free_queue;
+ }
+
+ ret = nvmet_tcp_alloc_cmd(queue, &queue->connect);
+ if (ret)
+ goto out_ida_remove;
+
+ ret = nvmet_sq_init(&queue->nvme_sq);
+ if (ret)
+ goto out_free_connect;
+
+ port->last_cpu = cpumask_next_wrap(port->last_cpu,
+ cpu_online_mask, -1, false);
+ queue->cpu = port->last_cpu;
+ nvmet_prepare_receive_pdu(queue);
+
+ mutex_lock(&nvmet_tcp_queue_mutex);
+ list_add_tail(&queue->queue_list, &nvmet_tcp_queue_list);
+ mutex_unlock(&nvmet_tcp_queue_mutex);
+
+ ret = nvmet_tcp_set_queue_sock(queue);
+ if (ret)
+ goto out_destroy_sq;
+
+ queue_work_on(queue->cpu, nvmet_tcp_wq, &queue->io_work);
+
+ return 0;
+out_destroy_sq:
+ mutex_lock(&nvmet_tcp_queue_mutex);
+ list_del_init(&queue->queue_list);
+ mutex_unlock(&nvmet_tcp_queue_mutex);
+ nvmet_sq_destroy(&queue->nvme_sq);
+out_free_connect:
+ nvmet_tcp_free_cmd(&queue->connect);
+out_ida_remove:
+ ida_simple_remove(&nvmet_tcp_queue_ida, queue->idx);
+out_free_queue:
+ kfree(queue);
+ return ret;
+}
+
+static void nvmet_tcp_accept_work(struct work_struct *w)
+{
+ struct nvmet_tcp_port *port =
+ container_of(w, struct nvmet_tcp_port, accept_work);
+ struct socket *newsock;
+ int ret;
+
+ while (true) {
+ ret = kernel_accept(port->sock, &newsock, O_NONBLOCK);
+ if (ret < 0) {
+ if (ret != -EAGAIN)
+ pr_warn("failed to accept err=%d\n", ret);
+ return;
+ }
+ ret = nvmet_tcp_alloc_queue(port, newsock);
+ if (ret) {
+ pr_err("failed to allocate queue\n");
+ sock_release(newsock);
+ }
+ }
+}
+
+static void nvmet_tcp_listen_data_ready(struct sock *sk)
+{
+ struct nvmet_tcp_port *port;
+
+ read_lock_bh(&sk->sk_callback_lock);
+ port = sk->sk_user_data;
+ if (!port)
+ goto out;
+
+ if (sk->sk_state == TCP_LISTEN)
+ schedule_work(&port->accept_work);
+out:
+ read_unlock_bh(&sk->sk_callback_lock);
+}
+
+static int nvmet_tcp_add_port(struct nvmet_port *nport)
+{
+ struct nvmet_tcp_port *port;
+ __kernel_sa_family_t af;
+ int opt, ret;
+
+ port = kzalloc(sizeof(*port), GFP_KERNEL);
+ if (!port)
+ return -ENOMEM;
+
+ switch (nport->disc_addr.adrfam) {
+ case NVMF_ADDR_FAMILY_IP4:
+ af = AF_INET;
+ break;
+ case NVMF_ADDR_FAMILY_IP6:
+ af = AF_INET6;
+ break;
+ default:
+ pr_err("address family %d not supported\n",
+ nport->disc_addr.adrfam);
+ ret = -EINVAL;
+ goto err_port;
+ }
+
+ ret = inet_pton_with_scope(&init_net, af, nport->disc_addr.traddr,
+ nport->disc_addr.trsvcid, &port->addr);
+ if (ret) {
+ pr_err("malformed ip/port passed: %s:%s\n",
+ nport->disc_addr.traddr, nport->disc_addr.trsvcid);
+ goto err_port;
+ }
+
+ port->nport = nport;
+ port->last_cpu = -1;
+ INIT_WORK(&port->accept_work, nvmet_tcp_accept_work);
+ if (port->nport->inline_data_size < 0)
+ port->nport->inline_data_size = NVMET_TCP_DEF_INLINE_DATA_SIZE;
+
+ ret = sock_create(port->addr.ss_family, SOCK_STREAM,
+ IPPROTO_TCP, &port->sock);
+ if (ret) {
+ pr_err("failed to create a socket\n");
+ goto err_port;
+ }
+
+ port->sock->sk->sk_user_data = port;
+ port->data_ready = port->sock->sk->sk_data_ready;
+ port->sock->sk->sk_data_ready = nvmet_tcp_listen_data_ready;
+
+ opt = 1;
+ ret = kernel_setsockopt(port->sock, IPPROTO_TCP,
+ TCP_NODELAY, (char *)&opt, sizeof(opt));
+ if (ret) {
+ pr_err("failed to set TCP_NODELAY sock opt %d\n", ret);
+ goto err_sock;
+ }
+
+ ret = kernel_setsockopt(port->sock, SOL_SOCKET, SO_REUSEADDR,
+ (char *)&opt, sizeof(opt));
+ if (ret) {
+ pr_err("failed to set SO_REUSEADDR sock opt %d\n", ret);
+ goto err_sock;
+ }
+
+ ret = kernel_bind(port->sock, (struct sockaddr *)&port->addr,
+ sizeof(port->addr));
+ if (ret) {
+ pr_err("failed to bind port socket %d\n", ret);
+ goto err_sock;
+ }
+
+ ret = kernel_listen(port->sock, 128);
+ if (ret) {
+ pr_err("failed to listen %d on port sock\n", ret);
+ goto err_sock;
+ }
+
+ nport->priv = port;
+ pr_info("enabling port %d (%pISpc)\n",
+ le16_to_cpu(nport->disc_addr.portid), &port->addr);
+
+ return 0;
+
+err_sock:
+ sock_release(port->sock);
+err_port:
+ kfree(port);
+ return ret;
+}
+
+static void nvmet_tcp_remove_port(struct nvmet_port *nport)
+{
+ struct nvmet_tcp_port *port = nport->priv;
+
+ write_lock_bh(&port->sock->sk->sk_callback_lock);
+ port->sock->sk->sk_data_ready = port->data_ready;
+ port->sock->sk->sk_user_data = NULL;
+ write_unlock_bh(&port->sock->sk->sk_callback_lock);
+ cancel_work_sync(&port->accept_work);
+
+ sock_release(port->sock);
+ kfree(port);
+}
+
+static void nvmet_tcp_delete_ctrl(struct nvmet_ctrl *ctrl)
+{
+ struct nvmet_tcp_queue *queue;
+
+ mutex_lock(&nvmet_tcp_queue_mutex);
+ list_for_each_entry(queue, &nvmet_tcp_queue_list, queue_list)
+ if (queue->nvme_sq.ctrl == ctrl)
+ kernel_sock_shutdown(queue->sock, SHUT_RDWR);
+ mutex_unlock(&nvmet_tcp_queue_mutex);
+}
+
+static u16 nvmet_tcp_install_queue(struct nvmet_sq *sq)
+{
+ struct nvmet_tcp_queue *queue =
+ container_of(sq, struct nvmet_tcp_queue, nvme_sq);
+
+ if (sq->qid == 0) {
+ /* Let inflight controller teardown complete */
+ flush_scheduled_work();
+ }
+
+ queue->nr_cmds = sq->size * 2;
+ if (nvmet_tcp_alloc_cmds(queue))
+ return NVME_SC_INTERNAL;
+ return 0;
+}
+
+static void nvmet_tcp_disc_port_addr(struct nvmet_req *req,
+ struct nvmet_port *nport, char *traddr)
+{
+ struct nvmet_tcp_port *port = nport->priv;
+
+ if (inet_addr_is_any((struct sockaddr *)&port->addr)) {
+ struct nvmet_tcp_cmd *cmd =
+ container_of(req, struct nvmet_tcp_cmd, req);
+ struct nvmet_tcp_queue *queue = cmd->queue;
+
+ sprintf(traddr, "%pISc", (struct sockaddr *)&queue->sockaddr);
+ } else {
+ memcpy(traddr, nport->disc_addr.traddr, NVMF_TRADDR_SIZE);
+ }
+}
+
+static struct nvmet_fabrics_ops nvmet_tcp_ops = {
+ .owner = THIS_MODULE,
+ .type = NVMF_TRTYPE_TCP,
+ .msdbd = 1,
+ .has_keyed_sgls = 0,
+ .add_port = nvmet_tcp_add_port,
+ .remove_port = nvmet_tcp_remove_port,
+ .queue_response = nvmet_tcp_queue_response,
+ .delete_ctrl = nvmet_tcp_delete_ctrl,
+ .install_queue = nvmet_tcp_install_queue,
+ .disc_traddr = nvmet_tcp_disc_port_addr,
+};
+
+static int __init nvmet_tcp_init(void)
+{
+ int ret;
+
+ nvmet_tcp_wq = alloc_workqueue("nvmet_tcp_wq", WQ_HIGHPRI, 0);
+ if (!nvmet_tcp_wq)
+ return -ENOMEM;
+
+ ret = nvmet_register_transport(&nvmet_tcp_ops);
+ if (ret)
+ goto err;
+
+ return 0;
+err:
+ destroy_workqueue(nvmet_tcp_wq);
+ return ret;
+}
+
+static void __exit nvmet_tcp_exit(void)
+{
+ struct nvmet_tcp_queue *queue;
+
+ nvmet_unregister_transport(&nvmet_tcp_ops);
+
+ flush_scheduled_work();
+ mutex_lock(&nvmet_tcp_queue_mutex);
+ list_for_each_entry(queue, &nvmet_tcp_queue_list, queue_list)
+ kernel_sock_shutdown(queue->sock, SHUT_RDWR);
+ mutex_unlock(&nvmet_tcp_queue_mutex);
+ flush_scheduled_work();
+
+ destroy_workqueue(nvmet_tcp_wq);
+}
+
+module_init(nvmet_tcp_init);
+module_exit(nvmet_tcp_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("nvmet-transport-3"); /* 3 == NVMF_TRTYPE_TCP */
diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c
index 358e380eb7fa..8d2fc84119c6 100644
--- a/drivers/parisc/ccio-dma.c
+++ b/drivers/parisc/ccio-dma.c
@@ -110,8 +110,6 @@
#define CMD_TLB_DIRECT_WRITE 35 /* IO_COMMAND for I/O TLB Writes */
#define CMD_TLB_PURGE 33 /* IO_COMMAND to Purge I/O TLB entry */
-#define CCIO_MAPPING_ERROR (~(dma_addr_t)0)
-
struct ioa_registers {
/* Runway Supervisory Set */
int32_t unused1[12];
@@ -740,7 +738,7 @@ ccio_map_single(struct device *dev, void *addr, size_t size,
BUG_ON(!dev);
ioc = GET_IOC(dev);
if (!ioc)
- return CCIO_MAPPING_ERROR;
+ return DMA_MAPPING_ERROR;
BUG_ON(size <= 0);
@@ -1021,11 +1019,6 @@ ccio_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents,
DBG_RUN_SG("%s() DONE (nents %d)\n", __func__, nents);
}
-static int ccio_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
- return dma_addr == CCIO_MAPPING_ERROR;
-}
-
static const struct dma_map_ops ccio_ops = {
.dma_supported = ccio_dma_supported,
.alloc = ccio_alloc,
@@ -1034,7 +1027,6 @@ static const struct dma_map_ops ccio_ops = {
.unmap_page = ccio_unmap_page,
.map_sg = ccio_map_sg,
.unmap_sg = ccio_unmap_sg,
- .mapping_error = ccio_mapping_error,
};
#ifdef CONFIG_PROC_FS
diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c
index e0655949480a..42172eb32235 100644
--- a/drivers/parisc/sba_iommu.c
+++ b/drivers/parisc/sba_iommu.c
@@ -93,8 +93,6 @@
#define DEFAULT_DMA_HINT_REG 0
-#define SBA_MAPPING_ERROR (~(dma_addr_t)0)
-
struct sba_device *sba_list;
EXPORT_SYMBOL_GPL(sba_list);
@@ -725,7 +723,7 @@ sba_map_single(struct device *dev, void *addr, size_t size,
ioc = GET_IOC(dev);
if (!ioc)
- return SBA_MAPPING_ERROR;
+ return DMA_MAPPING_ERROR;
/* save offset bits */
offset = ((dma_addr_t) (long) addr) & ~IOVP_MASK;
@@ -1080,11 +1078,6 @@ sba_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents,
}
-static int sba_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
- return dma_addr == SBA_MAPPING_ERROR;
-}
-
static const struct dma_map_ops sba_ops = {
.dma_supported = sba_dma_supported,
.alloc = sba_alloc,
@@ -1093,7 +1086,6 @@ static const struct dma_map_ops sba_ops = {
.unmap_page = sba_unmap_page,
.map_sg = sba_map_sg,
.unmap_sg = sba_unmap_sg,
- .mapping_error = sba_mapping_error,
};
diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c
index e50b0b5815ff..3890812cdf87 100644
--- a/drivers/pci/controller/vmd.c
+++ b/drivers/pci/controller/vmd.c
@@ -307,39 +307,32 @@ static struct device *to_vmd_dev(struct device *dev)
return &vmd->dev->dev;
}
-static const struct dma_map_ops *vmd_dma_ops(struct device *dev)
-{
- return get_dma_ops(to_vmd_dev(dev));
-}
-
static void *vmd_alloc(struct device *dev, size_t size, dma_addr_t *addr,
gfp_t flag, unsigned long attrs)
{
- return vmd_dma_ops(dev)->alloc(to_vmd_dev(dev), size, addr, flag,
- attrs);
+ return dma_alloc_attrs(to_vmd_dev(dev), size, addr, flag, attrs);
}
static void vmd_free(struct device *dev, size_t size, void *vaddr,
dma_addr_t addr, unsigned long attrs)
{
- return vmd_dma_ops(dev)->free(to_vmd_dev(dev), size, vaddr, addr,
- attrs);
+ return dma_free_attrs(to_vmd_dev(dev), size, vaddr, addr, attrs);
}
static int vmd_mmap(struct device *dev, struct vm_area_struct *vma,
void *cpu_addr, dma_addr_t addr, size_t size,
unsigned long attrs)
{
- return vmd_dma_ops(dev)->mmap(to_vmd_dev(dev), vma, cpu_addr, addr,
- size, attrs);
+ return dma_mmap_attrs(to_vmd_dev(dev), vma, cpu_addr, addr, size,
+ attrs);
}
static int vmd_get_sgtable(struct device *dev, struct sg_table *sgt,
void *cpu_addr, dma_addr_t addr, size_t size,
unsigned long attrs)
{
- return vmd_dma_ops(dev)->get_sgtable(to_vmd_dev(dev), sgt, cpu_addr,
- addr, size, attrs);
+ return dma_get_sgtable_attrs(to_vmd_dev(dev), sgt, cpu_addr, addr, size,
+ attrs);
}
static dma_addr_t vmd_map_page(struct device *dev, struct page *page,
@@ -347,66 +340,60 @@ static dma_addr_t vmd_map_page(struct device *dev, struct page *page,
enum dma_data_direction dir,
unsigned long attrs)
{
- return vmd_dma_ops(dev)->map_page(to_vmd_dev(dev), page, offset, size,
- dir, attrs);
+ return dma_map_page_attrs(to_vmd_dev(dev), page, offset, size, dir,
+ attrs);
}
static void vmd_unmap_page(struct device *dev, dma_addr_t addr, size_t size,
enum dma_data_direction dir, unsigned long attrs)
{
- vmd_dma_ops(dev)->unmap_page(to_vmd_dev(dev), addr, size, dir, attrs);
+ dma_unmap_page_attrs(to_vmd_dev(dev), addr, size, dir, attrs);
}
static int vmd_map_sg(struct device *dev, struct scatterlist *sg, int nents,
enum dma_data_direction dir, unsigned long attrs)
{
- return vmd_dma_ops(dev)->map_sg(to_vmd_dev(dev), sg, nents, dir, attrs);
+ return dma_map_sg_attrs(to_vmd_dev(dev), sg, nents, dir, attrs);
}
static void vmd_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
enum dma_data_direction dir, unsigned long attrs)
{
- vmd_dma_ops(dev)->unmap_sg(to_vmd_dev(dev), sg, nents, dir, attrs);
+ dma_unmap_sg_attrs(to_vmd_dev(dev), sg, nents, dir, attrs);
}
static void vmd_sync_single_for_cpu(struct device *dev, dma_addr_t addr,
size_t size, enum dma_data_direction dir)
{
- vmd_dma_ops(dev)->sync_single_for_cpu(to_vmd_dev(dev), addr, size, dir);
+ dma_sync_single_for_cpu(to_vmd_dev(dev), addr, size, dir);
}
static void vmd_sync_single_for_device(struct device *dev, dma_addr_t addr,
size_t size, enum dma_data_direction dir)
{
- vmd_dma_ops(dev)->sync_single_for_device(to_vmd_dev(dev), addr, size,
- dir);
+ dma_sync_single_for_device(to_vmd_dev(dev), addr, size, dir);
}
static void vmd_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
int nents, enum dma_data_direction dir)
{
- vmd_dma_ops(dev)->sync_sg_for_cpu(to_vmd_dev(dev), sg, nents, dir);
+ dma_sync_sg_for_cpu(to_vmd_dev(dev), sg, nents, dir);
}
static void vmd_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
int nents, enum dma_data_direction dir)
{
- vmd_dma_ops(dev)->sync_sg_for_device(to_vmd_dev(dev), sg, nents, dir);
-}
-
-static int vmd_mapping_error(struct device *dev, dma_addr_t addr)
-{
- return vmd_dma_ops(dev)->mapping_error(to_vmd_dev(dev), addr);
+ dma_sync_sg_for_device(to_vmd_dev(dev), sg, nents, dir);
}
static int vmd_dma_supported(struct device *dev, u64 mask)
{
- return vmd_dma_ops(dev)->dma_supported(to_vmd_dev(dev), mask);
+ return dma_supported(to_vmd_dev(dev), mask);
}
static u64 vmd_get_required_mask(struct device *dev)
{
- return vmd_dma_ops(dev)->get_required_mask(to_vmd_dev(dev));
+ return dma_get_required_mask(to_vmd_dev(dev));
}
static void vmd_teardown_dma_ops(struct vmd_dev *vmd)
@@ -446,7 +433,6 @@ static void vmd_setup_dma_ops(struct vmd_dev *vmd)
ASSIGN_VMD_DMA_OPS(source, dest, sync_single_for_device);
ASSIGN_VMD_DMA_OPS(source, dest, sync_sg_for_cpu);
ASSIGN_VMD_DMA_OPS(source, dest, sync_sg_for_device);
- ASSIGN_VMD_DMA_OPS(source, dest, mapping_error);
ASSIGN_VMD_DMA_OPS(source, dest, dma_supported);
ASSIGN_VMD_DMA_OPS(source, dest, get_required_mask);
add_dma_domain(domain);
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index bef17c3fca67..ea55444e6ead 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -1600,10 +1600,8 @@ static int pci_dma_configure(struct device *dev)
ret = of_dma_configure(dev, bridge->parent->of_node, true);
} else if (has_acpi_companion(bridge)) {
struct acpi_device *adev = to_acpi_device_node(bridge->fwnode);
- enum dev_dma_attr attr = acpi_get_dma_attr(adev);
- if (attr != DEV_DMA_NOT_SUPPORTED)
- ret = acpi_dma_configure(dev, attr);
+ ret = acpi_dma_configure(dev, acpi_get_dma_attr(adev));
}
pci_put_host_bridge_device(bridge);
diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c
index 2016e0ed5865..8e26001dc11c 100644
--- a/drivers/s390/block/dasd_ioctl.c
+++ b/drivers/s390/block/dasd_ioctl.c
@@ -412,6 +412,7 @@ static int dasd_ioctl_information(struct dasd_block *block,
struct ccw_dev_id dev_id;
struct dasd_device *base;
struct ccw_device *cdev;
+ struct list_head *l;
unsigned long flags;
int rc;
@@ -462,23 +463,10 @@ static int dasd_ioctl_information(struct dasd_block *block,
memcpy(dasd_info->type, base->discipline->name, 4);
- if (block->request_queue->request_fn) {
- struct list_head *l;
-#ifdef DASD_EXTENDED_PROFILING
- {
- struct list_head *l;
- spin_lock_irqsave(&block->lock, flags);
- list_for_each(l, &block->request_queue->queue_head)
- dasd_info->req_queue_len++;
- spin_unlock_irqrestore(&block->lock, flags);
- }
-#endif /* DASD_EXTENDED_PROFILING */
- spin_lock_irqsave(get_ccwdev_lock(base->cdev), flags);
- list_for_each(l, &base->ccw_queue)
- dasd_info->chanq_len++;
- spin_unlock_irqrestore(get_ccwdev_lock(base->cdev),
- flags);
- }
+ spin_lock_irqsave(&block->queue_lock, flags);
+ list_for_each(l, &base->ccw_queue)
+ dasd_info->chanq_len++;
+ spin_unlock_irqrestore(&block->queue_lock, flags);
rc = 0;
if (copy_to_user(argp, dasd_info,
diff --git a/drivers/s390/scsi/zfcp_aux.c b/drivers/s390/scsi/zfcp_aux.c
index 94f4d8fe85e0..9cf30d124b9e 100644
--- a/drivers/s390/scsi/zfcp_aux.c
+++ b/drivers/s390/scsi/zfcp_aux.c
@@ -4,7 +4,7 @@
*
* Module interface and handling of zfcp data structures.
*
- * Copyright IBM Corp. 2002, 2013
+ * Copyright IBM Corp. 2002, 2017
*/
/*
@@ -124,6 +124,9 @@ static int __init zfcp_module_init(void)
{
int retval = -ENOMEM;
+ if (zfcp_experimental_dix)
+ pr_warn("DIX is enabled. It is experimental and might cause problems\n");
+
zfcp_fsf_qtcb_cache = zfcp_cache_hw_align("zfcp_fsf_qtcb",
sizeof(struct fsf_qtcb));
if (!zfcp_fsf_qtcb_cache)
@@ -248,43 +251,36 @@ static int zfcp_allocate_low_mem_buffers(struct zfcp_adapter *adapter)
static void zfcp_free_low_mem_buffers(struct zfcp_adapter *adapter)
{
- if (adapter->pool.erp_req)
- mempool_destroy(adapter->pool.erp_req);
- if (adapter->pool.scsi_req)
- mempool_destroy(adapter->pool.scsi_req);
- if (adapter->pool.scsi_abort)
- mempool_destroy(adapter->pool.scsi_abort);
- if (adapter->pool.qtcb_pool)
- mempool_destroy(adapter->pool.qtcb_pool);
- if (adapter->pool.status_read_req)
- mempool_destroy(adapter->pool.status_read_req);
- if (adapter->pool.sr_data)
- mempool_destroy(adapter->pool.sr_data);
- if (adapter->pool.gid_pn)
- mempool_destroy(adapter->pool.gid_pn);
+ mempool_destroy(adapter->pool.erp_req);
+ mempool_destroy(adapter->pool.scsi_req);
+ mempool_destroy(adapter->pool.scsi_abort);
+ mempool_destroy(adapter->pool.qtcb_pool);
+ mempool_destroy(adapter->pool.status_read_req);
+ mempool_destroy(adapter->pool.sr_data);
+ mempool_destroy(adapter->pool.gid_pn);
}
/**
* zfcp_status_read_refill - refill the long running status_read_requests
* @adapter: ptr to struct zfcp_adapter for which the buffers should be refilled
*
- * Returns: 0 on success, 1 otherwise
- *
- * if there are 16 or more status_read requests missing an adapter_reopen
- * is triggered
+ * Return:
+ * * 0 on success meaning at least one status read is pending
+ * * 1 if posting failed and not a single status read buffer is pending,
+ * also triggers adapter reopen recovery
*/
int zfcp_status_read_refill(struct zfcp_adapter *adapter)
{
- while (atomic_read(&adapter->stat_miss) > 0)
+ while (atomic_add_unless(&adapter->stat_miss, -1, 0))
if (zfcp_fsf_status_read(adapter->qdio)) {
+ atomic_inc(&adapter->stat_miss); /* undo add -1 */
if (atomic_read(&adapter->stat_miss) >=
adapter->stat_read_buf_num) {
zfcp_erp_adapter_reopen(adapter, 0, "axsref1");
return 1;
}
break;
- } else
- atomic_dec(&adapter->stat_miss);
+ }
return 0;
}
@@ -542,45 +538,3 @@ err_out:
zfcp_ccw_adapter_put(adapter);
return ERR_PTR(retval);
}
-
-/**
- * zfcp_sg_free_table - free memory used by scatterlists
- * @sg: pointer to scatterlist
- * @count: number of scatterlist which are to be free'ed
- * the scatterlist are expected to reference pages always
- */
-void zfcp_sg_free_table(struct scatterlist *sg, int count)
-{
- int i;
-
- for (i = 0; i < count; i++, sg++)
- if (sg)
- free_page((unsigned long) sg_virt(sg));
- else
- break;
-}
-
-/**
- * zfcp_sg_setup_table - init scatterlist and allocate, assign buffers
- * @sg: pointer to struct scatterlist
- * @count: number of scatterlists which should be assigned with buffers
- * of size page
- *
- * Returns: 0 on success, -ENOMEM otherwise
- */
-int zfcp_sg_setup_table(struct scatterlist *sg, int count)
-{
- void *addr;
- int i;
-
- sg_init_table(sg, count);
- for (i = 0; i < count; i++, sg++) {
- addr = (void *) get_zeroed_page(GFP_KERNEL);
- if (!addr) {
- zfcp_sg_free_table(sg, i);
- return -ENOMEM;
- }
- sg_set_buf(sg, addr, PAGE_SIZE);
- }
- return 0;
-}
diff --git a/drivers/s390/scsi/zfcp_dbf.c b/drivers/s390/scsi/zfcp_dbf.c
index 3b368fcf13f4..dccdb41bed8c 100644
--- a/drivers/s390/scsi/zfcp_dbf.c
+++ b/drivers/s390/scsi/zfcp_dbf.c
@@ -63,7 +63,8 @@ void zfcp_dbf_pl_write(struct zfcp_dbf *dbf, void *data, u16 length, char *area,
/**
* zfcp_dbf_hba_fsf_res - trace event for fsf responses
- * @tag: tag indicating which kind of unsolicited status has been received
+ * @tag: tag indicating which kind of FSF response has been received
+ * @level: trace level to be used for event
* @req: request for which a response was received
*/
void zfcp_dbf_hba_fsf_res(char *tag, int level, struct zfcp_fsf_req *req)
@@ -81,8 +82,8 @@ void zfcp_dbf_hba_fsf_res(char *tag, int level, struct zfcp_fsf_req *req)
rec->id = ZFCP_DBF_HBA_RES;
rec->fsf_req_id = req->req_id;
rec->fsf_req_status = req->status;
- rec->fsf_cmd = req->fsf_command;
- rec->fsf_seq_no = req->seq_no;
+ rec->fsf_cmd = q_head->fsf_command;
+ rec->fsf_seq_no = q_pref->req_seq_no;
rec->u.res.req_issued = req->issued;
rec->u.res.prot_status = q_pref->prot_status;
rec->u.res.fsf_status = q_head->fsf_status;
@@ -94,7 +95,7 @@ void zfcp_dbf_hba_fsf_res(char *tag, int level, struct zfcp_fsf_req *req)
memcpy(rec->u.res.fsf_status_qual, &q_head->fsf_status_qual,
FSF_STATUS_QUALIFIER_SIZE);
- if (req->fsf_command != FSF_QTCB_FCP_CMND) {
+ if (q_head->fsf_command != FSF_QTCB_FCP_CMND) {
rec->pl_len = q_head->log_length;
zfcp_dbf_pl_write(dbf, (char *)q_pref + q_head->log_start,
rec->pl_len, "fsf_res", req->req_id);
@@ -127,7 +128,7 @@ void zfcp_dbf_hba_fsf_uss(char *tag, struct zfcp_fsf_req *req)
rec->id = ZFCP_DBF_HBA_USS;
rec->fsf_req_id = req->req_id;
rec->fsf_req_status = req->status;
- rec->fsf_cmd = req->fsf_command;
+ rec->fsf_cmd = FSF_QTCB_UNSOLICITED_STATUS;
if (!srb)
goto log;
@@ -153,7 +154,7 @@ log:
/**
* zfcp_dbf_hba_bit_err - trace event for bit error conditions
- * @tag: tag indicating which kind of unsolicited status has been received
+ * @tag: tag indicating which kind of bit error unsolicited status was received
* @req: request which caused the bit_error condition
*/
void zfcp_dbf_hba_bit_err(char *tag, struct zfcp_fsf_req *req)
@@ -174,7 +175,7 @@ void zfcp_dbf_hba_bit_err(char *tag, struct zfcp_fsf_req *req)
rec->id = ZFCP_DBF_HBA_BIT;
rec->fsf_req_id = req->req_id;
rec->fsf_req_status = req->status;
- rec->fsf_cmd = req->fsf_command;
+ rec->fsf_cmd = FSF_QTCB_UNSOLICITED_STATUS;
memcpy(&rec->u.be, &sr_buf->payload.bit_error,
sizeof(struct fsf_bit_error_payload));
@@ -224,6 +225,7 @@ void zfcp_dbf_hba_def_err(struct zfcp_adapter *adapter, u64 req_id, u16 scount,
/**
* zfcp_dbf_hba_basic - trace event for basic adapter events
+ * @tag: identifier for event
* @adapter: pointer to struct zfcp_adapter
*/
void zfcp_dbf_hba_basic(char *tag, struct zfcp_adapter *adapter)
@@ -357,7 +359,7 @@ void zfcp_dbf_rec_run_lvl(int level, char *tag, struct zfcp_erp_action *erp)
rec->u.run.fsf_req_id = erp->fsf_req_id;
rec->u.run.rec_status = erp->status;
rec->u.run.rec_step = erp->step;
- rec->u.run.rec_action = erp->action;
+ rec->u.run.rec_action = erp->type;
if (erp->sdev)
rec->u.run.rec_count =
@@ -478,7 +480,8 @@ out:
/**
* zfcp_dbf_san_req - trace event for issued SAN request
* @tag: identifier for event
- * @fsf_req: request containing issued CT data
+ * @fsf: request containing issued CT or ELS data
+ * @d_id: N_Port_ID where SAN request is sent to
* d_id: destination ID
*/
void zfcp_dbf_san_req(char *tag, struct zfcp_fsf_req *fsf, u32 d_id)
@@ -560,7 +563,7 @@ static u16 zfcp_dbf_san_res_cap_len_if_gpn_ft(char *tag,
/**
* zfcp_dbf_san_res - trace event for received SAN request
* @tag: identifier for event
- * @fsf_req: request containing issued CT data
+ * @fsf: request containing received CT or ELS data
*/
void zfcp_dbf_san_res(char *tag, struct zfcp_fsf_req *fsf)
{
@@ -580,7 +583,7 @@ void zfcp_dbf_san_res(char *tag, struct zfcp_fsf_req *fsf)
/**
* zfcp_dbf_san_in_els - trace event for incoming ELS
* @tag: identifier for event
- * @fsf_req: request containing issued CT data
+ * @fsf: request containing received ELS data
*/
void zfcp_dbf_san_in_els(char *tag, struct zfcp_fsf_req *fsf)
{
diff --git a/drivers/s390/scsi/zfcp_dbf.h b/drivers/s390/scsi/zfcp_dbf.h
index d116c07ed77a..900c779cc39b 100644
--- a/drivers/s390/scsi/zfcp_dbf.h
+++ b/drivers/s390/scsi/zfcp_dbf.h
@@ -42,7 +42,8 @@ struct zfcp_dbf_rec_trigger {
* @fsf_req_id: request id for fsf requests
* @rec_status: status of the fsf request
* @rec_step: current step of the recovery action
- * rec_count: recovery counter
+ * @rec_action: ERP action type
+ * @rec_count: recoveries including retries for particular @rec_action
*/
struct zfcp_dbf_rec_running {
u64 fsf_req_id;
@@ -72,6 +73,7 @@ enum zfcp_dbf_rec_id {
* @adapter_status: current status of the adapter
* @port_status: current status of the port
* @lun_status: current status of the lun
+ * @u: record type specific data
* @u.trig: structure zfcp_dbf_rec_trigger
* @u.run: structure zfcp_dbf_rec_running
*/
@@ -126,6 +128,8 @@ struct zfcp_dbf_san {
* @prot_status_qual: protocol status qualifier
* @fsf_status: fsf status
* @fsf_status_qual: fsf status qualifier
+ * @port_handle: handle for port
+ * @lun_handle: handle for LUN
*/
struct zfcp_dbf_hba_res {
u64 req_issued;
@@ -158,6 +162,7 @@ struct zfcp_dbf_hba_uss {
* @ZFCP_DBF_HBA_RES: response trace record
* @ZFCP_DBF_HBA_USS: unsolicited status trace record
* @ZFCP_DBF_HBA_BIT: bit error trace record
+ * @ZFCP_DBF_HBA_BASIC: basic adapter event, only trace tag, no other data
*/
enum zfcp_dbf_hba_id {
ZFCP_DBF_HBA_RES = 1,
@@ -176,6 +181,9 @@ enum zfcp_dbf_hba_id {
* @fsf_seq_no: fsf sequence number
* @pl_len: length of payload stored as zfcp_dbf_pay
* @u: record type specific data
+ * @u.res: data for fsf responses
+ * @u.uss: data for unsolicited status buffer
+ * @u.be: data for bit error unsolicited status buffer
*/
struct zfcp_dbf_hba {
u8 id;
@@ -339,8 +347,8 @@ void zfcp_dbf_hba_fsf_response(struct zfcp_fsf_req *req)
zfcp_dbf_hba_fsf_resp_suppress(req)
? 5 : 1, req);
- } else if ((req->fsf_command == FSF_QTCB_OPEN_PORT_WITH_DID) ||
- (req->fsf_command == FSF_QTCB_OPEN_LUN)) {
+ } else if ((qtcb->header.fsf_command == FSF_QTCB_OPEN_PORT_WITH_DID) ||
+ (qtcb->header.fsf_command == FSF_QTCB_OPEN_LUN)) {
zfcp_dbf_hba_fsf_resp("fs_open", 4, req);
} else if (qtcb->header.log_length) {
diff --git a/drivers/s390/scsi/zfcp_def.h b/drivers/s390/scsi/zfcp_def.h
index 3396a47721a7..87d2f47a6990 100644
--- a/drivers/s390/scsi/zfcp_def.h
+++ b/drivers/s390/scsi/zfcp_def.h
@@ -4,7 +4,7 @@
*
* Global definitions for the zfcp device driver.
*
- * Copyright IBM Corp. 2002, 2010
+ * Copyright IBM Corp. 2002, 2017
*/
#ifndef ZFCP_DEF_H
@@ -41,24 +41,16 @@
#include "zfcp_fc.h"
#include "zfcp_qdio.h"
-struct zfcp_reqlist;
-
-/********************* SCSI SPECIFIC DEFINES *********************************/
-#define ZFCP_SCSI_ER_TIMEOUT (10*HZ)
-
/********************* FSF SPECIFIC DEFINES *********************************/
/* ATTENTION: value must not be used by hardware */
#define FSF_QTCB_UNSOLICITED_STATUS 0x6305
-/* timeout value for "default timer" for fsf requests */
-#define ZFCP_FSF_REQUEST_TIMEOUT (60*HZ)
-
/*************** ADAPTER/PORT/UNIT AND FSF_REQ STATUS FLAGS ******************/
/*
- * Note, the leftmost status byte is common among adapter, port
- * and unit
+ * Note, the leftmost 12 status bits (3 nibbles) are common among adapter, port
+ * and unit. This is a mask for bitwise 'and' with status values.
*/
#define ZFCP_COMMON_FLAGS 0xfff00000
@@ -97,49 +89,60 @@ struct zfcp_reqlist;
/************************* STRUCTURE DEFINITIONS *****************************/
-struct zfcp_fsf_req;
+/**
+ * enum zfcp_erp_act_type - Type of ERP action object.
+ * @ZFCP_ERP_ACTION_REOPEN_LUN: LUN recovery.
+ * @ZFCP_ERP_ACTION_REOPEN_PORT: Port recovery.
+ * @ZFCP_ERP_ACTION_REOPEN_PORT_FORCED: Forced port recovery.
+ * @ZFCP_ERP_ACTION_REOPEN_ADAPTER: Adapter recovery.
+ *
+ * Values must fit into u8 because of code dependencies:
+ * zfcp_dbf_rec_trig(), &zfcp_dbf_rec_trigger.want, &zfcp_dbf_rec_trigger.need;
+ * zfcp_dbf_rec_run_lvl(), zfcp_dbf_rec_run(), &zfcp_dbf_rec_running.rec_action.
+ */
+enum zfcp_erp_act_type {
+ ZFCP_ERP_ACTION_REOPEN_LUN = 1,
+ ZFCP_ERP_ACTION_REOPEN_PORT = 2,
+ ZFCP_ERP_ACTION_REOPEN_PORT_FORCED = 3,
+ ZFCP_ERP_ACTION_REOPEN_ADAPTER = 4,
+};
-/* holds various memory pools of an adapter */
-struct zfcp_adapter_mempool {
- mempool_t *erp_req;
- mempool_t *gid_pn_req;
- mempool_t *scsi_req;
- mempool_t *scsi_abort;
- mempool_t *status_read_req;
- mempool_t *sr_data;
- mempool_t *gid_pn;
- mempool_t *qtcb_pool;
+/*
+ * Values must fit into u16 because of code dependencies:
+ * zfcp_dbf_rec_run_lvl(), zfcp_dbf_rec_run(), zfcp_dbf_rec_run_wka(),
+ * &zfcp_dbf_rec_running.rec_step.
+ */
+enum zfcp_erp_steps {
+ ZFCP_ERP_STEP_UNINITIALIZED = 0x0000,
+ ZFCP_ERP_STEP_PHYS_PORT_CLOSING = 0x0010,
+ ZFCP_ERP_STEP_PORT_CLOSING = 0x0100,
+ ZFCP_ERP_STEP_PORT_OPENING = 0x0800,
+ ZFCP_ERP_STEP_LUN_CLOSING = 0x1000,
+ ZFCP_ERP_STEP_LUN_OPENING = 0x2000,
};
struct zfcp_erp_action {
struct list_head list;
- int action; /* requested action code */
+ enum zfcp_erp_act_type type; /* requested action code */
struct zfcp_adapter *adapter; /* device which should be recovered */
struct zfcp_port *port;
struct scsi_device *sdev;
u32 status; /* recovery status */
- u32 step; /* active step of this erp action */
+ enum zfcp_erp_steps step; /* active step of this erp action */
unsigned long fsf_req_id;
struct timer_list timer;
};
-struct fsf_latency_record {
- u32 min;
- u32 max;
- u64 sum;
-};
-
-struct latency_cont {
- struct fsf_latency_record channel;
- struct fsf_latency_record fabric;
- u64 counter;
-};
-
-struct zfcp_latencies {
- struct latency_cont read;
- struct latency_cont write;
- struct latency_cont cmd;
- spinlock_t lock;
+/* holds various memory pools of an adapter */
+struct zfcp_adapter_mempool {
+ mempool_t *erp_req;
+ mempool_t *gid_pn_req;
+ mempool_t *scsi_req;
+ mempool_t *scsi_abort;
+ mempool_t *status_read_req;
+ mempool_t *sr_data;
+ mempool_t *gid_pn;
+ mempool_t *qtcb_pool;
};
struct zfcp_adapter {
@@ -220,6 +223,25 @@ struct zfcp_port {
unsigned int starget_id;
};
+struct zfcp_latency_record {
+ u32 min;
+ u32 max;
+ u64 sum;
+};
+
+struct zfcp_latency_cont {
+ struct zfcp_latency_record channel;
+ struct zfcp_latency_record fabric;
+ u64 counter;
+};
+
+struct zfcp_latencies {
+ struct zfcp_latency_cont read;
+ struct zfcp_latency_cont write;
+ struct zfcp_latency_cont cmd;
+ spinlock_t lock;
+};
+
/**
* struct zfcp_unit - LUN configured via zfcp sysfs
* @dev: struct device for sysfs representation and reference counting
@@ -287,9 +309,7 @@ static inline u64 zfcp_scsi_dev_lun(struct scsi_device *sdev)
* @qdio_req: qdio queue related values
* @completion: used to signal the completion of the request
* @status: status of the request
- * @fsf_command: FSF command issued
* @qtcb: associated QTCB
- * @seq_no: sequence number of this request
* @data: private data
* @timer: timer data of this request
* @erp_action: reference to erp action if request issued on behalf of ERP
@@ -304,9 +324,7 @@ struct zfcp_fsf_req {
struct zfcp_qdio_req qdio_req;
struct completion completion;
u32 status;
- u32 fsf_command;
struct fsf_qtcb *qtcb;
- u32 seq_no;
void *data;
struct timer_list timer;
struct zfcp_erp_action *erp_action;
@@ -321,4 +339,9 @@ int zfcp_adapter_multi_buffer_active(struct zfcp_adapter *adapter)
return atomic_read(&adapter->status) & ZFCP_STATUS_ADAPTER_MB_ACT;
}
+static inline bool zfcp_fsf_req_is_status_read_buffer(struct zfcp_fsf_req *req)
+{
+ return req->qtcb == NULL;
+}
+
#endif /* ZFCP_DEF_H */
diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c
index e7e6b63905e2..744a64680d5b 100644
--- a/drivers/s390/scsi/zfcp_erp.c
+++ b/drivers/s390/scsi/zfcp_erp.c
@@ -4,7 +4,7 @@
*
* Error Recovery Procedures (ERP).
*
- * Copyright IBM Corp. 2002, 2016
+ * Copyright IBM Corp. 2002, 2017
*/
#define KMSG_COMPONENT "zfcp"
@@ -24,38 +24,18 @@ enum zfcp_erp_act_flags {
ZFCP_STATUS_ERP_NO_REF = 0x00800000,
};
-enum zfcp_erp_steps {
- ZFCP_ERP_STEP_UNINITIALIZED = 0x0000,
- ZFCP_ERP_STEP_PHYS_PORT_CLOSING = 0x0010,
- ZFCP_ERP_STEP_PORT_CLOSING = 0x0100,
- ZFCP_ERP_STEP_PORT_OPENING = 0x0800,
- ZFCP_ERP_STEP_LUN_CLOSING = 0x1000,
- ZFCP_ERP_STEP_LUN_OPENING = 0x2000,
-};
-
-/**
- * enum zfcp_erp_act_type - Type of ERP action object.
- * @ZFCP_ERP_ACTION_REOPEN_LUN: LUN recovery.
- * @ZFCP_ERP_ACTION_REOPEN_PORT: Port recovery.
- * @ZFCP_ERP_ACTION_REOPEN_PORT_FORCED: Forced port recovery.
- * @ZFCP_ERP_ACTION_REOPEN_ADAPTER: Adapter recovery.
- * @ZFCP_ERP_ACTION_NONE: Eyecatcher pseudo flag to bitwise or-combine with
- * either of the first four enum values.
- * Used to indicate that an ERP action could not be
- * set up despite a detected need for some recovery.
- * @ZFCP_ERP_ACTION_FAILED: Eyecatcher pseudo flag to bitwise or-combine with
- * either of the first four enum values.
- * Used to indicate that ERP not needed because
- * the object has ZFCP_STATUS_COMMON_ERP_FAILED.
+/*
+ * Eyecatcher pseudo flag to bitwise or-combine with enum zfcp_erp_act_type.
+ * Used to indicate that an ERP action could not be set up despite a detected
+ * need for some recovery.
*/
-enum zfcp_erp_act_type {
- ZFCP_ERP_ACTION_REOPEN_LUN = 1,
- ZFCP_ERP_ACTION_REOPEN_PORT = 2,
- ZFCP_ERP_ACTION_REOPEN_PORT_FORCED = 3,
- ZFCP_ERP_ACTION_REOPEN_ADAPTER = 4,
- ZFCP_ERP_ACTION_NONE = 0xc0,
- ZFCP_ERP_ACTION_FAILED = 0xe0,
-};
+#define ZFCP_ERP_ACTION_NONE 0xc0
+/*
+ * Eyecatcher pseudo flag to bitwise or-combine with enum zfcp_erp_act_type.
+ * Used to indicate that ERP not needed because the object has
+ * ZFCP_STATUS_COMMON_ERP_FAILED.
+ */
+#define ZFCP_ERP_ACTION_FAILED 0xe0
enum zfcp_erp_act_result {
ZFCP_ERP_SUCCEEDED = 0,
@@ -136,11 +116,11 @@ static void zfcp_erp_action_dismiss_adapter(struct zfcp_adapter *adapter)
}
}
-static int zfcp_erp_handle_failed(int want, struct zfcp_adapter *adapter,
- struct zfcp_port *port,
- struct scsi_device *sdev)
+static enum zfcp_erp_act_type zfcp_erp_handle_failed(
+ enum zfcp_erp_act_type want, struct zfcp_adapter *adapter,
+ struct zfcp_port *port, struct scsi_device *sdev)
{
- int need = want;
+ enum zfcp_erp_act_type need = want;
struct zfcp_scsi_dev *zsdev;
switch (want) {
@@ -171,19 +151,17 @@ static int zfcp_erp_handle_failed(int want, struct zfcp_adapter *adapter,
adapter, ZFCP_STATUS_COMMON_ERP_FAILED);
}
break;
- default:
- need = 0;
- break;
}
return need;
}
-static int zfcp_erp_required_act(int want, struct zfcp_adapter *adapter,
+static enum zfcp_erp_act_type zfcp_erp_required_act(enum zfcp_erp_act_type want,
+ struct zfcp_adapter *adapter,
struct zfcp_port *port,
struct scsi_device *sdev)
{
- int need = want;
+ enum zfcp_erp_act_type need = want;
int l_status, p_status, a_status;
struct zfcp_scsi_dev *zfcp_sdev;
@@ -230,7 +208,8 @@ static int zfcp_erp_required_act(int want, struct zfcp_adapter *adapter,
return need;
}
-static struct zfcp_erp_action *zfcp_erp_setup_act(int need, u32 act_status,
+static struct zfcp_erp_action *zfcp_erp_setup_act(enum zfcp_erp_act_type need,
+ u32 act_status,
struct zfcp_adapter *adapter,
struct zfcp_port *port,
struct scsi_device *sdev)
@@ -278,9 +257,6 @@ static struct zfcp_erp_action *zfcp_erp_setup_act(int need, u32 act_status,
ZFCP_STATUS_COMMON_RUNNING))
act_status |= ZFCP_STATUS_ERP_CLOSE_ONLY;
break;
-
- default:
- return NULL;
}
WARN_ON_ONCE(erp_action->adapter != adapter);
@@ -288,18 +264,19 @@ static struct zfcp_erp_action *zfcp_erp_setup_act(int need, u32 act_status,
memset(&erp_action->timer, 0, sizeof(erp_action->timer));
erp_action->step = ZFCP_ERP_STEP_UNINITIALIZED;
erp_action->fsf_req_id = 0;
- erp_action->action = need;
+ erp_action->type = need;
erp_action->status = act_status;
return erp_action;
}
-static void zfcp_erp_action_enqueue(int want, struct zfcp_adapter *adapter,
+static void zfcp_erp_action_enqueue(enum zfcp_erp_act_type want,
+ struct zfcp_adapter *adapter,
struct zfcp_port *port,
struct scsi_device *sdev,
- char *id, u32 act_status)
+ char *dbftag, u32 act_status)
{
- int need;
+ enum zfcp_erp_act_type need;
struct zfcp_erp_action *act;
need = zfcp_erp_handle_failed(want, adapter, port, sdev);
@@ -327,10 +304,11 @@ static void zfcp_erp_action_enqueue(int want, struct zfcp_adapter *adapter,
list_add_tail(&act->list, &adapter->erp_ready_head);
wake_up(&adapter->erp_ready_wq);
out:
- zfcp_dbf_rec_trig(id, adapter, port, sdev, want, need);
+ zfcp_dbf_rec_trig(dbftag, adapter, port, sdev, want, need);
}
-void zfcp_erp_port_forced_no_port_dbf(char *id, struct zfcp_adapter *adapter,
+void zfcp_erp_port_forced_no_port_dbf(char *dbftag,
+ struct zfcp_adapter *adapter,
u64 port_name, u32 port_id)
{
unsigned long flags;
@@ -344,29 +322,30 @@ void zfcp_erp_port_forced_no_port_dbf(char *id, struct zfcp_adapter *adapter,
atomic_set(&tmpport.status, -1); /* unknown */
tmpport.wwpn = port_name;
tmpport.d_id = port_id;
- zfcp_dbf_rec_trig(id, adapter, &tmpport, NULL,
+ zfcp_dbf_rec_trig(dbftag, adapter, &tmpport, NULL,
ZFCP_ERP_ACTION_REOPEN_PORT_FORCED,
ZFCP_ERP_ACTION_NONE);
write_unlock_irqrestore(&adapter->erp_lock, flags);
}
static void _zfcp_erp_adapter_reopen(struct zfcp_adapter *adapter,
- int clear_mask, char *id)
+ int clear_mask, char *dbftag)
{
zfcp_erp_adapter_block(adapter, clear_mask);
zfcp_scsi_schedule_rports_block(adapter);
zfcp_erp_action_enqueue(ZFCP_ERP_ACTION_REOPEN_ADAPTER,
- adapter, NULL, NULL, id, 0);
+ adapter, NULL, NULL, dbftag, 0);
}
/**
* zfcp_erp_adapter_reopen - Reopen adapter.
* @adapter: Adapter to reopen.
* @clear: Status flags to clear.
- * @id: Id for debug trace event.
+ * @dbftag: Tag for debug trace event.
*/
-void zfcp_erp_adapter_reopen(struct zfcp_adapter *adapter, int clear, char *id)
+void zfcp_erp_adapter_reopen(struct zfcp_adapter *adapter, int clear,
+ char *dbftag)
{
unsigned long flags;
@@ -375,7 +354,7 @@ void zfcp_erp_adapter_reopen(struct zfcp_adapter *adapter, int clear, char *id)
write_lock_irqsave(&adapter->erp_lock, flags);
zfcp_erp_action_enqueue(ZFCP_ERP_ACTION_REOPEN_ADAPTER, adapter,
- NULL, NULL, id, 0);
+ NULL, NULL, dbftag, 0);
write_unlock_irqrestore(&adapter->erp_lock, flags);
}
@@ -383,25 +362,25 @@ void zfcp_erp_adapter_reopen(struct zfcp_adapter *adapter, int clear, char *id)
* zfcp_erp_adapter_shutdown - Shutdown adapter.
* @adapter: Adapter to shut down.
* @clear: Status flags to clear.
- * @id: Id for debug trace event.
+ * @dbftag: Tag for debug trace event.
*/
void zfcp_erp_adapter_shutdown(struct zfcp_adapter *adapter, int clear,
- char *id)
+ char *dbftag)
{
int flags = ZFCP_STATUS_COMMON_RUNNING | ZFCP_STATUS_COMMON_ERP_FAILED;
- zfcp_erp_adapter_reopen(adapter, clear | flags, id);
+ zfcp_erp_adapter_reopen(adapter, clear | flags, dbftag);
}
/**
* zfcp_erp_port_shutdown - Shutdown port
* @port: Port to shut down.
* @clear: Status flags to clear.
- * @id: Id for debug trace event.
+ * @dbftag: Tag for debug trace event.
*/
-void zfcp_erp_port_shutdown(struct zfcp_port *port, int clear, char *id)
+void zfcp_erp_port_shutdown(struct zfcp_port *port, int clear, char *dbftag)
{
int flags = ZFCP_STATUS_COMMON_RUNNING | ZFCP_STATUS_COMMON_ERP_FAILED;
- zfcp_erp_port_reopen(port, clear | flags, id);
+ zfcp_erp_port_reopen(port, clear | flags, dbftag);
}
static void zfcp_erp_port_block(struct zfcp_port *port, int clear)
@@ -411,53 +390,55 @@ static void zfcp_erp_port_block(struct zfcp_port *port, int clear)
}
static void _zfcp_erp_port_forced_reopen(struct zfcp_port *port, int clear,
- char *id)
+ char *dbftag)
{
zfcp_erp_port_block(port, clear);
zfcp_scsi_schedule_rport_block(port);
zfcp_erp_action_enqueue(ZFCP_ERP_ACTION_REOPEN_PORT_FORCED,
- port->adapter, port, NULL, id, 0);
+ port->adapter, port, NULL, dbftag, 0);
}
/**
* zfcp_erp_port_forced_reopen - Forced close of port and open again
* @port: Port to force close and to reopen.
* @clear: Status flags to clear.
- * @id: Id for debug trace event.
+ * @dbftag: Tag for debug trace event.
*/
-void zfcp_erp_port_forced_reopen(struct zfcp_port *port, int clear, char *id)
+void zfcp_erp_port_forced_reopen(struct zfcp_port *port, int clear,
+ char *dbftag)
{
unsigned long flags;
struct zfcp_adapter *adapter = port->adapter;
write_lock_irqsave(&adapter->erp_lock, flags);
- _zfcp_erp_port_forced_reopen(port, clear, id);
+ _zfcp_erp_port_forced_reopen(port, clear, dbftag);
write_unlock_irqrestore(&adapter->erp_lock, flags);
}
-static void _zfcp_erp_port_reopen(struct zfcp_port *port, int clear, char *id)
+static void _zfcp_erp_port_reopen(struct zfcp_port *port, int clear,
+ char *dbftag)
{
zfcp_erp_port_block(port, clear);
zfcp_scsi_schedule_rport_block(port);
zfcp_erp_action_enqueue(ZFCP_ERP_ACTION_REOPEN_PORT,
- port->adapter, port, NULL, id, 0);
+ port->adapter, port, NULL, dbftag, 0);
}
/**
* zfcp_erp_port_reopen - trigger remote port recovery
* @port: port to recover
- * @clear_mask: flags in port status to be cleared
- * @id: Id for debug trace event.
+ * @clear: flags in port status to be cleared
+ * @dbftag: Tag for debug trace event.
*/
-void zfcp_erp_port_reopen(struct zfcp_port *port, int clear, char *id)
+void zfcp_erp_port_reopen(struct zfcp_port *port, int clear, char *dbftag)
{
unsigned long flags;
struct zfcp_adapter *adapter = port->adapter;
write_lock_irqsave(&adapter->erp_lock, flags);
- _zfcp_erp_port_reopen(port, clear, id);
+ _zfcp_erp_port_reopen(port, clear, dbftag);
write_unlock_irqrestore(&adapter->erp_lock, flags);
}
@@ -467,8 +448,8 @@ static void zfcp_erp_lun_block(struct scsi_device *sdev, int clear_mask)
ZFCP_STATUS_COMMON_UNBLOCKED | clear_mask);
}
-static void _zfcp_erp_lun_reopen(struct scsi_device *sdev, int clear, char *id,
- u32 act_status)
+static void _zfcp_erp_lun_reopen(struct scsi_device *sdev, int clear,
+ char *dbftag, u32 act_status)
{
struct zfcp_scsi_dev *zfcp_sdev = sdev_to_zfcp(sdev);
struct zfcp_adapter *adapter = zfcp_sdev->port->adapter;
@@ -476,18 +457,18 @@ static void _zfcp_erp_lun_reopen(struct scsi_device *sdev, int clear, char *id,
zfcp_erp_lun_block(sdev, clear);
zfcp_erp_action_enqueue(ZFCP_ERP_ACTION_REOPEN_LUN, adapter,
- zfcp_sdev->port, sdev, id, act_status);
+ zfcp_sdev->port, sdev, dbftag, act_status);
}
/**
* zfcp_erp_lun_reopen - initiate reopen of a LUN
* @sdev: SCSI device / LUN to be reopened
- * @clear_mask: specifies flags in LUN status to be cleared
- * @id: Id for debug trace event.
+ * @clear: specifies flags in LUN status to be cleared
+ * @dbftag: Tag for debug trace event.
*
* Return: 0 on success, < 0 on error
*/
-void zfcp_erp_lun_reopen(struct scsi_device *sdev, int clear, char *id)
+void zfcp_erp_lun_reopen(struct scsi_device *sdev, int clear, char *dbftag)
{
unsigned long flags;
struct zfcp_scsi_dev *zfcp_sdev = sdev_to_zfcp(sdev);
@@ -495,7 +476,7 @@ void zfcp_erp_lun_reopen(struct scsi_device *sdev, int clear, char *id)
struct zfcp_adapter *adapter = port->adapter;
write_lock_irqsave(&adapter->erp_lock, flags);
- _zfcp_erp_lun_reopen(sdev, clear, id, 0);
+ _zfcp_erp_lun_reopen(sdev, clear, dbftag, 0);
write_unlock_irqrestore(&adapter->erp_lock, flags);
}
@@ -503,25 +484,25 @@ void zfcp_erp_lun_reopen(struct scsi_device *sdev, int clear, char *id)
* zfcp_erp_lun_shutdown - Shutdown LUN
* @sdev: SCSI device / LUN to shut down.
* @clear: Status flags to clear.
- * @id: Id for debug trace event.
+ * @dbftag: Tag for debug trace event.
*/
-void zfcp_erp_lun_shutdown(struct scsi_device *sdev, int clear, char *id)
+void zfcp_erp_lun_shutdown(struct scsi_device *sdev, int clear, char *dbftag)
{
int flags = ZFCP_STATUS_COMMON_RUNNING | ZFCP_STATUS_COMMON_ERP_FAILED;
- zfcp_erp_lun_reopen(sdev, clear | flags, id);
+ zfcp_erp_lun_reopen(sdev, clear | flags, dbftag);
}
/**
* zfcp_erp_lun_shutdown_wait - Shutdown LUN and wait for erp completion
* @sdev: SCSI device / LUN to shut down.
- * @id: Id for debug trace event.
+ * @dbftag: Tag for debug trace event.
*
* Do not acquire a reference for the LUN when creating the ERP
* action. It is safe, because this function waits for the ERP to
* complete first. This allows to shutdown the LUN, even when the SCSI
* device is in the state SDEV_DEL when scsi_device_get will fail.
*/
-void zfcp_erp_lun_shutdown_wait(struct scsi_device *sdev, char *id)
+void zfcp_erp_lun_shutdown_wait(struct scsi_device *sdev, char *dbftag)
{
unsigned long flags;
struct zfcp_scsi_dev *zfcp_sdev = sdev_to_zfcp(sdev);
@@ -530,7 +511,7 @@ void zfcp_erp_lun_shutdown_wait(struct scsi_device *sdev, char *id)
int clear = ZFCP_STATUS_COMMON_RUNNING | ZFCP_STATUS_COMMON_ERP_FAILED;
write_lock_irqsave(&adapter->erp_lock, flags);
- _zfcp_erp_lun_reopen(sdev, clear, id, ZFCP_STATUS_ERP_NO_REF);
+ _zfcp_erp_lun_reopen(sdev, clear, dbftag, ZFCP_STATUS_ERP_NO_REF);
write_unlock_irqrestore(&adapter->erp_lock, flags);
zfcp_erp_wait(adapter);
@@ -619,7 +600,7 @@ void zfcp_erp_notify(struct zfcp_erp_action *erp_action, unsigned long set_mask)
/**
* zfcp_erp_timeout_handler - Trigger ERP action from timed out ERP request
- * @data: ERP action (from timer data)
+ * @t: timer list entry embedded in zfcp FSF request
*/
void zfcp_erp_timeout_handler(struct timer_list *t)
{
@@ -644,31 +625,31 @@ static void zfcp_erp_strategy_memwait(struct zfcp_erp_action *erp_action)
}
static void _zfcp_erp_port_reopen_all(struct zfcp_adapter *adapter,
- int clear, char *id)
+ int clear, char *dbftag)
{
struct zfcp_port *port;
read_lock(&adapter->port_list_lock);
list_for_each_entry(port, &adapter->port_list, list)
- _zfcp_erp_port_reopen(port, clear, id);
+ _zfcp_erp_port_reopen(port, clear, dbftag);
read_unlock(&adapter->port_list_lock);
}
static void _zfcp_erp_lun_reopen_all(struct zfcp_port *port, int clear,
- char *id)
+ char *dbftag)
{
struct scsi_device *sdev;
spin_lock(port->adapter->scsi_host->host_lock);
__shost_for_each_device(sdev, port->adapter->scsi_host)
if (sdev_to_zfcp(sdev)->port == port)
- _zfcp_erp_lun_reopen(sdev, clear, id, 0);
+ _zfcp_erp_lun_reopen(sdev, clear, dbftag, 0);
spin_unlock(port->adapter->scsi_host->host_lock);
}
static void zfcp_erp_strategy_followup_failed(struct zfcp_erp_action *act)
{
- switch (act->action) {
+ switch (act->type) {
case ZFCP_ERP_ACTION_REOPEN_ADAPTER:
_zfcp_erp_adapter_reopen(act->adapter, 0, "ersff_1");
break;
@@ -686,7 +667,7 @@ static void zfcp_erp_strategy_followup_failed(struct zfcp_erp_action *act)
static void zfcp_erp_strategy_followup_success(struct zfcp_erp_action *act)
{
- switch (act->action) {
+ switch (act->type) {
case ZFCP_ERP_ACTION_REOPEN_ADAPTER:
_zfcp_erp_port_reopen_all(act->adapter, 0, "ersfs_1");
break;
@@ -696,6 +677,9 @@ static void zfcp_erp_strategy_followup_success(struct zfcp_erp_action *act)
case ZFCP_ERP_ACTION_REOPEN_PORT:
_zfcp_erp_lun_reopen_all(act->port, 0, "ersfs_3");
break;
+ case ZFCP_ERP_ACTION_REOPEN_LUN:
+ /* NOP */
+ break;
}
}
@@ -723,7 +707,8 @@ static void zfcp_erp_enqueue_ptp_port(struct zfcp_adapter *adapter)
_zfcp_erp_port_reopen(port, 0, "ereptp1");
}
-static int zfcp_erp_adapter_strat_fsf_xconf(struct zfcp_erp_action *erp_action)
+static enum zfcp_erp_act_result zfcp_erp_adapter_strat_fsf_xconf(
+ struct zfcp_erp_action *erp_action)
{
int retries;
int sleep = 1;
@@ -768,7 +753,8 @@ static int zfcp_erp_adapter_strat_fsf_xconf(struct zfcp_erp_action *erp_action)
return ZFCP_ERP_SUCCEEDED;
}
-static int zfcp_erp_adapter_strategy_open_fsf_xport(struct zfcp_erp_action *act)
+static enum zfcp_erp_act_result zfcp_erp_adapter_strategy_open_fsf_xport(
+ struct zfcp_erp_action *act)
{
int ret;
struct zfcp_adapter *adapter = act->adapter;
@@ -793,7 +779,8 @@ static int zfcp_erp_adapter_strategy_open_fsf_xport(struct zfcp_erp_action *act)
return ZFCP_ERP_SUCCEEDED;
}
-static int zfcp_erp_adapter_strategy_open_fsf(struct zfcp_erp_action *act)
+static enum zfcp_erp_act_result zfcp_erp_adapter_strategy_open_fsf(
+ struct zfcp_erp_action *act)
{
if (zfcp_erp_adapter_strat_fsf_xconf(act) == ZFCP_ERP_FAILED)
return ZFCP_ERP_FAILED;
@@ -832,7 +819,8 @@ static void zfcp_erp_adapter_strategy_close(struct zfcp_erp_action *act)
ZFCP_STATUS_ADAPTER_LINK_UNPLUGGED, &adapter->status);
}
-static int zfcp_erp_adapter_strategy_open(struct zfcp_erp_action *act)
+static enum zfcp_erp_act_result zfcp_erp_adapter_strategy_open(
+ struct zfcp_erp_action *act)
{
struct zfcp_adapter *adapter = act->adapter;
@@ -853,7 +841,8 @@ static int zfcp_erp_adapter_strategy_open(struct zfcp_erp_action *act)
return ZFCP_ERP_SUCCEEDED;
}
-static int zfcp_erp_adapter_strategy(struct zfcp_erp_action *act)
+static enum zfcp_erp_act_result zfcp_erp_adapter_strategy(
+ struct zfcp_erp_action *act)
{
struct zfcp_adapter *adapter = act->adapter;
@@ -871,7 +860,8 @@ static int zfcp_erp_adapter_strategy(struct zfcp_erp_action *act)
return ZFCP_ERP_SUCCEEDED;
}
-static int zfcp_erp_port_forced_strategy_close(struct zfcp_erp_action *act)
+static enum zfcp_erp_act_result zfcp_erp_port_forced_strategy_close(
+ struct zfcp_erp_action *act)
{
int retval;
@@ -885,7 +875,8 @@ static int zfcp_erp_port_forced_strategy_close(struct zfcp_erp_action *act)
return ZFCP_ERP_CONTINUES;
}
-static int zfcp_erp_port_forced_strategy(struct zfcp_erp_action *erp_action)
+static enum zfcp_erp_act_result zfcp_erp_port_forced_strategy(
+ struct zfcp_erp_action *erp_action)
{
struct zfcp_port *port = erp_action->port;
int status = atomic_read(&port->status);
@@ -901,11 +892,19 @@ static int zfcp_erp_port_forced_strategy(struct zfcp_erp_action *erp_action)
case ZFCP_ERP_STEP_PHYS_PORT_CLOSING:
if (!(status & ZFCP_STATUS_PORT_PHYS_OPEN))
return ZFCP_ERP_SUCCEEDED;
+ break;
+ case ZFCP_ERP_STEP_PORT_CLOSING:
+ case ZFCP_ERP_STEP_PORT_OPENING:
+ case ZFCP_ERP_STEP_LUN_CLOSING:
+ case ZFCP_ERP_STEP_LUN_OPENING:
+ /* NOP */
+ break;
}
return ZFCP_ERP_FAILED;
}
-static int zfcp_erp_port_strategy_close(struct zfcp_erp_action *erp_action)
+static enum zfcp_erp_act_result zfcp_erp_port_strategy_close(
+ struct zfcp_erp_action *erp_action)
{
int retval;
@@ -918,7 +917,8 @@ static int zfcp_erp_port_strategy_close(struct zfcp_erp_action *erp_action)
return ZFCP_ERP_CONTINUES;
}
-static int zfcp_erp_port_strategy_open_port(struct zfcp_erp_action *erp_action)
+static enum zfcp_erp_act_result zfcp_erp_port_strategy_open_port(
+ struct zfcp_erp_action *erp_action)
{
int retval;
@@ -944,7 +944,8 @@ static int zfcp_erp_open_ptp_port(struct zfcp_erp_action *act)
return zfcp_erp_port_strategy_open_port(act);
}
-static int zfcp_erp_port_strategy_open_common(struct zfcp_erp_action *act)
+static enum zfcp_erp_act_result zfcp_erp_port_strategy_open_common(
+ struct zfcp_erp_action *act)
{
struct zfcp_adapter *adapter = act->adapter;
struct zfcp_port *port = act->port;
@@ -975,12 +976,18 @@ static int zfcp_erp_port_strategy_open_common(struct zfcp_erp_action *act)
port->d_id = 0;
return ZFCP_ERP_FAILED;
}
- /* fall through otherwise */
+ /* no early return otherwise, continue after switch case */
+ break;
+ case ZFCP_ERP_STEP_LUN_CLOSING:
+ case ZFCP_ERP_STEP_LUN_OPENING:
+ /* NOP */
+ break;
}
return ZFCP_ERP_FAILED;
}
-static int zfcp_erp_port_strategy(struct zfcp_erp_action *erp_action)
+static enum zfcp_erp_act_result zfcp_erp_port_strategy(
+ struct zfcp_erp_action *erp_action)
{
struct zfcp_port *port = erp_action->port;
int p_status = atomic_read(&port->status);
@@ -999,6 +1006,12 @@ static int zfcp_erp_port_strategy(struct zfcp_erp_action *erp_action)
if (p_status & ZFCP_STATUS_COMMON_OPEN)
return ZFCP_ERP_FAILED;
break;
+ case ZFCP_ERP_STEP_PHYS_PORT_CLOSING:
+ case ZFCP_ERP_STEP_PORT_OPENING:
+ case ZFCP_ERP_STEP_LUN_CLOSING:
+ case ZFCP_ERP_STEP_LUN_OPENING:
+ /* NOP */
+ break;
}
close_init_done:
@@ -1016,7 +1029,8 @@ static void zfcp_erp_lun_strategy_clearstati(struct scsi_device *sdev)
&zfcp_sdev->status);
}
-static int zfcp_erp_lun_strategy_close(struct zfcp_erp_action *erp_action)
+static enum zfcp_erp_act_result zfcp_erp_lun_strategy_close(
+ struct zfcp_erp_action *erp_action)
{
int retval = zfcp_fsf_close_lun(erp_action);
if (retval == -ENOMEM)
@@ -1027,7 +1041,8 @@ static int zfcp_erp_lun_strategy_close(struct zfcp_erp_action *erp_action)
return ZFCP_ERP_CONTINUES;
}
-static int zfcp_erp_lun_strategy_open(struct zfcp_erp_action *erp_action)
+static enum zfcp_erp_act_result zfcp_erp_lun_strategy_open(
+ struct zfcp_erp_action *erp_action)
{
int retval = zfcp_fsf_open_lun(erp_action);
if (retval == -ENOMEM)
@@ -1038,7 +1053,8 @@ static int zfcp_erp_lun_strategy_open(struct zfcp_erp_action *erp_action)
return ZFCP_ERP_CONTINUES;
}
-static int zfcp_erp_lun_strategy(struct zfcp_erp_action *erp_action)
+static enum zfcp_erp_act_result zfcp_erp_lun_strategy(
+ struct zfcp_erp_action *erp_action)
{
struct scsi_device *sdev = erp_action->sdev;
struct zfcp_scsi_dev *zfcp_sdev = sdev_to_zfcp(sdev);
@@ -1048,7 +1064,8 @@ static int zfcp_erp_lun_strategy(struct zfcp_erp_action *erp_action)
zfcp_erp_lun_strategy_clearstati(sdev);
if (atomic_read(&zfcp_sdev->status) & ZFCP_STATUS_COMMON_OPEN)
return zfcp_erp_lun_strategy_close(erp_action);
- /* already closed, fall through */
+ /* already closed */
+ /* fall through */
case ZFCP_ERP_STEP_LUN_CLOSING:
if (atomic_read(&zfcp_sdev->status) & ZFCP_STATUS_COMMON_OPEN)
return ZFCP_ERP_FAILED;
@@ -1059,11 +1076,18 @@ static int zfcp_erp_lun_strategy(struct zfcp_erp_action *erp_action)
case ZFCP_ERP_STEP_LUN_OPENING:
if (atomic_read(&zfcp_sdev->status) & ZFCP_STATUS_COMMON_OPEN)
return ZFCP_ERP_SUCCEEDED;
+ break;
+ case ZFCP_ERP_STEP_PHYS_PORT_CLOSING:
+ case ZFCP_ERP_STEP_PORT_CLOSING:
+ case ZFCP_ERP_STEP_PORT_OPENING:
+ /* NOP */
+ break;
}
return ZFCP_ERP_FAILED;
}
-static int zfcp_erp_strategy_check_lun(struct scsi_device *sdev, int result)
+static enum zfcp_erp_act_result zfcp_erp_strategy_check_lun(
+ struct scsi_device *sdev, enum zfcp_erp_act_result result)
{
struct zfcp_scsi_dev *zfcp_sdev = sdev_to_zfcp(sdev);
@@ -1084,6 +1108,12 @@ static int zfcp_erp_strategy_check_lun(struct scsi_device *sdev, int result)
ZFCP_STATUS_COMMON_ERP_FAILED);
}
break;
+ case ZFCP_ERP_CONTINUES:
+ case ZFCP_ERP_EXIT:
+ case ZFCP_ERP_DISMISSED:
+ case ZFCP_ERP_NOMEM:
+ /* NOP */
+ break;
}
if (atomic_read(&zfcp_sdev->status) & ZFCP_STATUS_COMMON_ERP_FAILED) {
@@ -1093,7 +1123,8 @@ static int zfcp_erp_strategy_check_lun(struct scsi_device *sdev, int result)
return result;
}
-static int zfcp_erp_strategy_check_port(struct zfcp_port *port, int result)
+static enum zfcp_erp_act_result zfcp_erp_strategy_check_port(
+ struct zfcp_port *port, enum zfcp_erp_act_result result)
{
switch (result) {
case ZFCP_ERP_SUCCEEDED :
@@ -1115,6 +1146,12 @@ static int zfcp_erp_strategy_check_port(struct zfcp_port *port, int result)
ZFCP_STATUS_COMMON_ERP_FAILED);
}
break;
+ case ZFCP_ERP_CONTINUES:
+ case ZFCP_ERP_EXIT:
+ case ZFCP_ERP_DISMISSED:
+ case ZFCP_ERP_NOMEM:
+ /* NOP */
+ break;
}
if (atomic_read(&port->status) & ZFCP_STATUS_COMMON_ERP_FAILED) {
@@ -1124,8 +1161,8 @@ static int zfcp_erp_strategy_check_port(struct zfcp_port *port, int result)
return result;
}
-static int zfcp_erp_strategy_check_adapter(struct zfcp_adapter *adapter,
- int result)
+static enum zfcp_erp_act_result zfcp_erp_strategy_check_adapter(
+ struct zfcp_adapter *adapter, enum zfcp_erp_act_result result)
{
switch (result) {
case ZFCP_ERP_SUCCEEDED :
@@ -1143,6 +1180,12 @@ static int zfcp_erp_strategy_check_adapter(struct zfcp_adapter *adapter,
ZFCP_STATUS_COMMON_ERP_FAILED);
}
break;
+ case ZFCP_ERP_CONTINUES:
+ case ZFCP_ERP_EXIT:
+ case ZFCP_ERP_DISMISSED:
+ case ZFCP_ERP_NOMEM:
+ /* NOP */
+ break;
}
if (atomic_read(&adapter->status) & ZFCP_STATUS_COMMON_ERP_FAILED) {
@@ -1152,14 +1195,14 @@ static int zfcp_erp_strategy_check_adapter(struct zfcp_adapter *adapter,
return result;
}
-static int zfcp_erp_strategy_check_target(struct zfcp_erp_action *erp_action,
- int result)
+static enum zfcp_erp_act_result zfcp_erp_strategy_check_target(
+ struct zfcp_erp_action *erp_action, enum zfcp_erp_act_result result)
{
struct zfcp_adapter *adapter = erp_action->adapter;
struct zfcp_port *port = erp_action->port;
struct scsi_device *sdev = erp_action->sdev;
- switch (erp_action->action) {
+ switch (erp_action->type) {
case ZFCP_ERP_ACTION_REOPEN_LUN:
result = zfcp_erp_strategy_check_lun(sdev, result);
@@ -1192,16 +1235,17 @@ static int zfcp_erp_strat_change_det(atomic_t *target_status, u32 erp_status)
return 0;
}
-static int zfcp_erp_strategy_statechange(struct zfcp_erp_action *act, int ret)
+static enum zfcp_erp_act_result zfcp_erp_strategy_statechange(
+ struct zfcp_erp_action *act, enum zfcp_erp_act_result result)
{
- int action = act->action;
+ enum zfcp_erp_act_type type = act->type;
struct zfcp_adapter *adapter = act->adapter;
struct zfcp_port *port = act->port;
struct scsi_device *sdev = act->sdev;
struct zfcp_scsi_dev *zfcp_sdev;
u32 erp_status = act->status;
- switch (action) {
+ switch (type) {
case ZFCP_ERP_ACTION_REOPEN_ADAPTER:
if (zfcp_erp_strat_change_det(&adapter->status, erp_status)) {
_zfcp_erp_adapter_reopen(adapter,
@@ -1231,7 +1275,7 @@ static int zfcp_erp_strategy_statechange(struct zfcp_erp_action *act, int ret)
}
break;
}
- return ret;
+ return result;
}
static void zfcp_erp_action_dequeue(struct zfcp_erp_action *erp_action)
@@ -1248,7 +1292,7 @@ static void zfcp_erp_action_dequeue(struct zfcp_erp_action *erp_action)
list_del(&erp_action->list);
zfcp_dbf_rec_run("eractd1", erp_action);
- switch (erp_action->action) {
+ switch (erp_action->type) {
case ZFCP_ERP_ACTION_REOPEN_LUN:
zfcp_sdev = sdev_to_zfcp(erp_action->sdev);
atomic_andnot(ZFCP_STATUS_COMMON_ERP_INUSE,
@@ -1324,13 +1368,14 @@ static void zfcp_erp_try_rport_unblock(struct zfcp_port *port)
write_unlock_irqrestore(&adapter->erp_lock, flags);
}
-static void zfcp_erp_action_cleanup(struct zfcp_erp_action *act, int result)
+static void zfcp_erp_action_cleanup(struct zfcp_erp_action *act,
+ enum zfcp_erp_act_result result)
{
struct zfcp_adapter *adapter = act->adapter;
struct zfcp_port *port = act->port;
struct scsi_device *sdev = act->sdev;
- switch (act->action) {
+ switch (act->type) {
case ZFCP_ERP_ACTION_REOPEN_LUN:
if (!(act->status & ZFCP_STATUS_ERP_NO_REF))
scsi_device_put(sdev);
@@ -1364,9 +1409,10 @@ static void zfcp_erp_action_cleanup(struct zfcp_erp_action *act, int result)
}
}
-static int zfcp_erp_strategy_do_action(struct zfcp_erp_action *erp_action)
+static enum zfcp_erp_act_result zfcp_erp_strategy_do_action(
+ struct zfcp_erp_action *erp_action)
{
- switch (erp_action->action) {
+ switch (erp_action->type) {
case ZFCP_ERP_ACTION_REOPEN_ADAPTER:
return zfcp_erp_adapter_strategy(erp_action);
case ZFCP_ERP_ACTION_REOPEN_PORT_FORCED:
@@ -1379,9 +1425,10 @@ static int zfcp_erp_strategy_do_action(struct zfcp_erp_action *erp_action)
return ZFCP_ERP_FAILED;
}
-static int zfcp_erp_strategy(struct zfcp_erp_action *erp_action)
+static enum zfcp_erp_act_result zfcp_erp_strategy(
+ struct zfcp_erp_action *erp_action)
{
- int retval;
+ enum zfcp_erp_act_result result;
unsigned long flags;
struct zfcp_adapter *adapter = erp_action->adapter;
@@ -1392,12 +1439,12 @@ static int zfcp_erp_strategy(struct zfcp_erp_action *erp_action)
if (erp_action->status & ZFCP_STATUS_ERP_DISMISSED) {
zfcp_erp_action_dequeue(erp_action);
- retval = ZFCP_ERP_DISMISSED;
+ result = ZFCP_ERP_DISMISSED;
goto unlock;
}
if (erp_action->status & ZFCP_STATUS_ERP_TIMEDOUT) {
- retval = ZFCP_ERP_FAILED;
+ result = ZFCP_ERP_FAILED;
goto check_target;
}
@@ -1405,13 +1452,13 @@ static int zfcp_erp_strategy(struct zfcp_erp_action *erp_action)
/* no lock to allow for blocking operations */
write_unlock_irqrestore(&adapter->erp_lock, flags);
- retval = zfcp_erp_strategy_do_action(erp_action);
+ result = zfcp_erp_strategy_do_action(erp_action);
write_lock_irqsave(&adapter->erp_lock, flags);
if (erp_action->status & ZFCP_STATUS_ERP_DISMISSED)
- retval = ZFCP_ERP_CONTINUES;
+ result = ZFCP_ERP_CONTINUES;
- switch (retval) {
+ switch (result) {
case ZFCP_ERP_NOMEM:
if (!(erp_action->status & ZFCP_STATUS_ERP_LOWMEM)) {
++adapter->erp_low_mem_count;
@@ -1421,7 +1468,7 @@ static int zfcp_erp_strategy(struct zfcp_erp_action *erp_action)
_zfcp_erp_adapter_reopen(adapter, 0, "erstgy1");
else {
zfcp_erp_strategy_memwait(erp_action);
- retval = ZFCP_ERP_CONTINUES;
+ result = ZFCP_ERP_CONTINUES;
}
goto unlock;
@@ -1431,27 +1478,33 @@ static int zfcp_erp_strategy(struct zfcp_erp_action *erp_action)
erp_action->status &= ~ZFCP_STATUS_ERP_LOWMEM;
}
goto unlock;
+ case ZFCP_ERP_SUCCEEDED:
+ case ZFCP_ERP_FAILED:
+ case ZFCP_ERP_EXIT:
+ case ZFCP_ERP_DISMISSED:
+ /* NOP */
+ break;
}
check_target:
- retval = zfcp_erp_strategy_check_target(erp_action, retval);
+ result = zfcp_erp_strategy_check_target(erp_action, result);
zfcp_erp_action_dequeue(erp_action);
- retval = zfcp_erp_strategy_statechange(erp_action, retval);
- if (retval == ZFCP_ERP_EXIT)
+ result = zfcp_erp_strategy_statechange(erp_action, result);
+ if (result == ZFCP_ERP_EXIT)
goto unlock;
- if (retval == ZFCP_ERP_SUCCEEDED)
+ if (result == ZFCP_ERP_SUCCEEDED)
zfcp_erp_strategy_followup_success(erp_action);
- if (retval == ZFCP_ERP_FAILED)
+ if (result == ZFCP_ERP_FAILED)
zfcp_erp_strategy_followup_failed(erp_action);
unlock:
write_unlock_irqrestore(&adapter->erp_lock, flags);
- if (retval != ZFCP_ERP_CONTINUES)
- zfcp_erp_action_cleanup(erp_action, retval);
+ if (result != ZFCP_ERP_CONTINUES)
+ zfcp_erp_action_cleanup(erp_action, result);
kref_put(&adapter->ref, zfcp_adapter_release);
- return retval;
+ return result;
}
static int zfcp_erp_thread(void *data)
@@ -1489,7 +1542,7 @@ static int zfcp_erp_thread(void *data)
* zfcp_erp_thread_setup - Start ERP thread for adapter
* @adapter: Adapter to start the ERP thread for
*
- * Returns 0 on success or error code from kernel_thread()
+ * Return: 0 on success, or error code from kthread_run().
*/
int zfcp_erp_thread_setup(struct zfcp_adapter *adapter)
{
@@ -1694,11 +1747,11 @@ void zfcp_erp_clear_lun_status(struct scsi_device *sdev, u32 mask)
/**
* zfcp_erp_adapter_reset_sync() - Really reopen adapter and wait.
* @adapter: Pointer to zfcp_adapter to reopen.
- * @id: Trace tag string of length %ZFCP_DBF_TAG_LEN.
+ * @dbftag: Trace tag string of length %ZFCP_DBF_TAG_LEN.
*/
-void zfcp_erp_adapter_reset_sync(struct zfcp_adapter *adapter, char *id)
+void zfcp_erp_adapter_reset_sync(struct zfcp_adapter *adapter, char *dbftag)
{
zfcp_erp_set_adapter_status(adapter, ZFCP_STATUS_COMMON_RUNNING);
- zfcp_erp_adapter_reopen(adapter, ZFCP_STATUS_COMMON_ERP_FAILED, id);
+ zfcp_erp_adapter_reopen(adapter, ZFCP_STATUS_COMMON_ERP_FAILED, dbftag);
zfcp_erp_wait(adapter);
}
diff --git a/drivers/s390/scsi/zfcp_ext.h b/drivers/s390/scsi/zfcp_ext.h
index bd0c5a9f04cb..3fce47b0b21b 100644
--- a/drivers/s390/scsi/zfcp_ext.h
+++ b/drivers/s390/scsi/zfcp_ext.h
@@ -59,14 +59,15 @@ extern void zfcp_dbf_scsi_eh(char *tag, struct zfcp_adapter *adapter,
/* zfcp_erp.c */
extern void zfcp_erp_set_adapter_status(struct zfcp_adapter *, u32);
extern void zfcp_erp_clear_adapter_status(struct zfcp_adapter *, u32);
-extern void zfcp_erp_port_forced_no_port_dbf(char *id,
+extern void zfcp_erp_port_forced_no_port_dbf(char *dbftag,
struct zfcp_adapter *adapter,
u64 port_name, u32 port_id);
extern void zfcp_erp_adapter_reopen(struct zfcp_adapter *, int, char *);
extern void zfcp_erp_adapter_shutdown(struct zfcp_adapter *, int, char *);
extern void zfcp_erp_set_port_status(struct zfcp_port *, u32);
extern void zfcp_erp_clear_port_status(struct zfcp_port *, u32);
-extern void zfcp_erp_port_reopen(struct zfcp_port *port, int clear, char *id);
+extern void zfcp_erp_port_reopen(struct zfcp_port *port, int clear,
+ char *dbftag);
extern void zfcp_erp_port_shutdown(struct zfcp_port *, int, char *);
extern void zfcp_erp_port_forced_reopen(struct zfcp_port *, int, char *);
extern void zfcp_erp_set_lun_status(struct scsi_device *, u32);
@@ -79,7 +80,8 @@ extern void zfcp_erp_thread_kill(struct zfcp_adapter *);
extern void zfcp_erp_wait(struct zfcp_adapter *);
extern void zfcp_erp_notify(struct zfcp_erp_action *, unsigned long);
extern void zfcp_erp_timeout_handler(struct timer_list *t);
-extern void zfcp_erp_adapter_reset_sync(struct zfcp_adapter *adapter, char *id);
+extern void zfcp_erp_adapter_reset_sync(struct zfcp_adapter *adapter,
+ char *dbftag);
/* zfcp_fc.c */
extern struct kmem_cache *zfcp_fc_req_cache;
@@ -144,6 +146,7 @@ extern void zfcp_qdio_close(struct zfcp_qdio *);
extern void zfcp_qdio_siosl(struct zfcp_adapter *);
/* zfcp_scsi.c */
+extern bool zfcp_experimental_dix;
extern struct scsi_transport_template *zfcp_scsi_transport_template;
extern int zfcp_scsi_adapter_register(struct zfcp_adapter *);
extern void zfcp_scsi_adapter_unregister(struct zfcp_adapter *);
diff --git a/drivers/s390/scsi/zfcp_fc.c b/drivers/s390/scsi/zfcp_fc.c
index f6c415d6ef48..db00b5e3abbe 100644
--- a/drivers/s390/scsi/zfcp_fc.c
+++ b/drivers/s390/scsi/zfcp_fc.c
@@ -312,7 +312,7 @@ static void zfcp_fc_incoming_logo(struct zfcp_fsf_req *req)
/**
* zfcp_fc_incoming_els - handle incoming ELS
- * @fsf_req - request which contains incoming ELS
+ * @fsf_req: request which contains incoming ELS
*/
void zfcp_fc_incoming_els(struct zfcp_fsf_req *fsf_req)
{
@@ -597,6 +597,48 @@ void zfcp_fc_test_link(struct zfcp_port *port)
put_device(&port->dev);
}
+/**
+ * zfcp_fc_sg_free_table - free memory used by scatterlists
+ * @sg: pointer to scatterlist
+ * @count: number of scatterlist which are to be free'ed
+ * the scatterlist are expected to reference pages always
+ */
+static void zfcp_fc_sg_free_table(struct scatterlist *sg, int count)
+{
+ int i;
+
+ for (i = 0; i < count; i++, sg++)
+ if (sg)
+ free_page((unsigned long) sg_virt(sg));
+ else
+ break;
+}
+
+/**
+ * zfcp_fc_sg_setup_table - init scatterlist and allocate, assign buffers
+ * @sg: pointer to struct scatterlist
+ * @count: number of scatterlists which should be assigned with buffers
+ * of size page
+ *
+ * Returns: 0 on success, -ENOMEM otherwise
+ */
+static int zfcp_fc_sg_setup_table(struct scatterlist *sg, int count)
+{
+ void *addr;
+ int i;
+
+ sg_init_table(sg, count);
+ for (i = 0; i < count; i++, sg++) {
+ addr = (void *) get_zeroed_page(GFP_KERNEL);
+ if (!addr) {
+ zfcp_fc_sg_free_table(sg, i);
+ return -ENOMEM;
+ }
+ sg_set_buf(sg, addr, PAGE_SIZE);
+ }
+ return 0;
+}
+
static struct zfcp_fc_req *zfcp_fc_alloc_sg_env(int buf_num)
{
struct zfcp_fc_req *fc_req;
@@ -605,7 +647,7 @@ static struct zfcp_fc_req *zfcp_fc_alloc_sg_env(int buf_num)
if (!fc_req)
return NULL;
- if (zfcp_sg_setup_table(&fc_req->sg_rsp, buf_num)) {
+ if (zfcp_fc_sg_setup_table(&fc_req->sg_rsp, buf_num)) {
kmem_cache_free(zfcp_fc_req_cache, fc_req);
return NULL;
}
@@ -763,7 +805,7 @@ void zfcp_fc_scan_ports(struct work_struct *work)
break;
}
}
- zfcp_sg_free_table(&fc_req->sg_rsp, buf_num);
+ zfcp_fc_sg_free_table(&fc_req->sg_rsp, buf_num);
kmem_cache_free(zfcp_fc_req_cache, fc_req);
out:
zfcp_fc_wka_port_put(&adapter->gs->ds);
diff --git a/drivers/s390/scsi/zfcp_fc.h b/drivers/s390/scsi/zfcp_fc.h
index 3cd74729cfb9..6902ae1f8e4f 100644
--- a/drivers/s390/scsi/zfcp_fc.h
+++ b/drivers/s390/scsi/zfcp_fc.h
@@ -121,9 +121,24 @@ struct zfcp_fc_rspn_req {
/**
* struct zfcp_fc_req - Container for FC ELS and CT requests sent from zfcp
* @ct_els: data required for issuing fsf command
- * @sg_req: scatterlist entry for request data
- * @sg_rsp: scatterlist entry for response data
- * @u: request specific data
+ * @sg_req: scatterlist entry for request data, refers to embedded @u submember
+ * @sg_rsp: scatterlist entry for response data, refers to embedded @u submember
+ * @u: request and response specific data
+ * @u.adisc: ADISC specific data
+ * @u.adisc.req: ADISC request
+ * @u.adisc.rsp: ADISC response
+ * @u.gid_pn: GID_PN specific data
+ * @u.gid_pn.req: GID_PN request
+ * @u.gid_pn.rsp: GID_PN response
+ * @u.gpn_ft: GPN_FT specific data
+ * @u.gpn_ft.sg_rsp2: GPN_FT response, not embedded here, allocated elsewhere
+ * @u.gpn_ft.req: GPN_FT request
+ * @u.gspn: GSPN specific data
+ * @u.gspn.req: GSPN request
+ * @u.gspn.rsp: GSPN response
+ * @u.rspn: RSPN specific data
+ * @u.rspn.req: RSPN request
+ * @u.rspn.rsp: RSPN response
*/
struct zfcp_fc_req {
struct zfcp_fsf_ct_els ct_els;
diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c
index 3c86e27f094d..d94496ee6883 100644
--- a/drivers/s390/scsi/zfcp_fsf.c
+++ b/drivers/s390/scsi/zfcp_fsf.c
@@ -19,6 +19,11 @@
#include "zfcp_qdio.h"
#include "zfcp_reqlist.h"
+/* timeout for FSF requests sent during scsi_eh: abort or FCP TMF */
+#define ZFCP_FSF_SCSI_ER_TIMEOUT (10*HZ)
+/* timeout for: exchange config/port data outside ERP, or open/close WKA port */
+#define ZFCP_FSF_REQUEST_TIMEOUT (60*HZ)
+
struct kmem_cache *zfcp_fsf_qtcb_cache;
static void zfcp_fsf_request_timeout_handler(struct timer_list *t)
@@ -74,18 +79,18 @@ static void zfcp_fsf_class_not_supp(struct zfcp_fsf_req *req)
/**
* zfcp_fsf_req_free - free memory used by fsf request
- * @fsf_req: pointer to struct zfcp_fsf_req
+ * @req: pointer to struct zfcp_fsf_req
*/
void zfcp_fsf_req_free(struct zfcp_fsf_req *req)
{
if (likely(req->pool)) {
- if (likely(req->qtcb))
+ if (likely(!zfcp_fsf_req_is_status_read_buffer(req)))
mempool_free(req->qtcb, req->adapter->pool.qtcb_pool);
mempool_free(req, req->pool);
return;
}
- if (likely(req->qtcb))
+ if (likely(!zfcp_fsf_req_is_status_read_buffer(req)))
kmem_cache_free(zfcp_fsf_qtcb_cache, req->qtcb);
kfree(req);
}
@@ -379,7 +384,7 @@ static void zfcp_fsf_protstatus_eval(struct zfcp_fsf_req *req)
/**
* zfcp_fsf_req_complete - process completion of a FSF request
- * @fsf_req: The FSF request that has been completed.
+ * @req: The FSF request that has been completed.
*
* When a request has been completed either from the FCP adapter,
* or it has been dismissed due to a queue shutdown, this function
@@ -388,7 +393,7 @@ static void zfcp_fsf_protstatus_eval(struct zfcp_fsf_req *req)
*/
static void zfcp_fsf_req_complete(struct zfcp_fsf_req *req)
{
- if (unlikely(req->fsf_command == FSF_QTCB_UNSOLICITED_STATUS)) {
+ if (unlikely(zfcp_fsf_req_is_status_read_buffer(req))) {
zfcp_fsf_status_read_handler(req);
return;
}
@@ -705,7 +710,6 @@ static struct zfcp_fsf_req *zfcp_fsf_req_create(struct zfcp_qdio *qdio,
init_completion(&req->completion);
req->adapter = adapter;
- req->fsf_command = fsf_cmd;
req->req_id = adapter->req_no;
if (likely(fsf_cmd != FSF_QTCB_UNSOLICITED_STATUS)) {
@@ -720,14 +724,13 @@ static struct zfcp_fsf_req *zfcp_fsf_req_create(struct zfcp_qdio *qdio,
return ERR_PTR(-ENOMEM);
}
- req->seq_no = adapter->fsf_req_seq_no;
req->qtcb->prefix.req_seq_no = adapter->fsf_req_seq_no;
req->qtcb->prefix.req_id = req->req_id;
req->qtcb->prefix.ulp_info = 26;
- req->qtcb->prefix.qtcb_type = fsf_qtcb_type[req->fsf_command];
+ req->qtcb->prefix.qtcb_type = fsf_qtcb_type[fsf_cmd];
req->qtcb->prefix.qtcb_version = FSF_QTCB_CURRENT_VERSION;
req->qtcb->header.req_handle = req->req_id;
- req->qtcb->header.fsf_command = req->fsf_command;
+ req->qtcb->header.fsf_command = fsf_cmd;
}
zfcp_qdio_req_init(adapter->qdio, &req->qdio_req, req->req_id, sbtype,
@@ -740,7 +743,6 @@ static int zfcp_fsf_req_send(struct zfcp_fsf_req *req)
{
struct zfcp_adapter *adapter = req->adapter;
struct zfcp_qdio *qdio = adapter->qdio;
- int with_qtcb = (req->qtcb != NULL);
int req_id = req->req_id;
zfcp_reqlist_add(adapter->req_list, req);
@@ -756,7 +758,7 @@ static int zfcp_fsf_req_send(struct zfcp_fsf_req *req)
}
/* Don't increase for unsolicited status */
- if (with_qtcb)
+ if (!zfcp_fsf_req_is_status_read_buffer(req))
adapter->fsf_req_seq_no++;
adapter->req_no++;
@@ -765,8 +767,7 @@ static int zfcp_fsf_req_send(struct zfcp_fsf_req *req)
/**
* zfcp_fsf_status_read - send status read request
- * @adapter: pointer to struct zfcp_adapter
- * @req_flags: request flags
+ * @qdio: pointer to struct zfcp_qdio
* Returns: 0 on success, ERROR otherwise
*/
int zfcp_fsf_status_read(struct zfcp_qdio *qdio)
@@ -912,7 +913,7 @@ struct zfcp_fsf_req *zfcp_fsf_abort_fcp_cmnd(struct scsi_cmnd *scmnd)
req->qtcb->header.port_handle = zfcp_sdev->port->handle;
req->qtcb->bottom.support.req_handle = (u64) old_req_id;
- zfcp_fsf_start_timer(req, ZFCP_SCSI_ER_TIMEOUT);
+ zfcp_fsf_start_timer(req, ZFCP_FSF_SCSI_ER_TIMEOUT);
if (!zfcp_fsf_req_send(req))
goto out;
@@ -1057,8 +1058,10 @@ static int zfcp_fsf_setup_ct_els(struct zfcp_fsf_req *req,
/**
* zfcp_fsf_send_ct - initiate a Generic Service request (FC-GS)
+ * @wka_port: pointer to zfcp WKA port to send CT/GS to
* @ct: pointer to struct zfcp_send_ct with data for request
* @pool: if non-null this mempool is used to allocate struct zfcp_fsf_req
+ * @timeout: timeout that hardware should use, and a later software timeout
*/
int zfcp_fsf_send_ct(struct zfcp_fc_wka_port *wka_port,
struct zfcp_fsf_ct_els *ct, mempool_t *pool,
@@ -1151,7 +1154,10 @@ skip_fsfstatus:
/**
* zfcp_fsf_send_els - initiate an ELS command (FC-FS)
+ * @adapter: pointer to zfcp adapter
+ * @d_id: N_Port_ID to send ELS to
* @els: pointer to struct zfcp_send_els with data for the command
+ * @timeout: timeout that hardware should use, and a later software timeout
*/
int zfcp_fsf_send_els(struct zfcp_adapter *adapter, u32 d_id,
struct zfcp_fsf_ct_els *els, unsigned int timeout)
@@ -1809,7 +1815,7 @@ static void zfcp_fsf_open_lun_handler(struct zfcp_fsf_req *req)
case FSF_LUN_SHARING_VIOLATION:
if (qual->word[0])
dev_warn(&zfcp_sdev->port->adapter->ccw_device->dev,
- "LUN 0x%Lx on port 0x%Lx is already in "
+ "LUN 0x%016Lx on port 0x%016Lx is already in "
"use by CSS%d, MIF Image ID %x\n",
zfcp_scsi_dev_lun(sdev),
(unsigned long long)zfcp_sdev->port->wwpn,
@@ -1986,7 +1992,7 @@ out:
return retval;
}
-static void zfcp_fsf_update_lat(struct fsf_latency_record *lat_rec, u32 lat)
+static void zfcp_fsf_update_lat(struct zfcp_latency_record *lat_rec, u32 lat)
{
lat_rec->sum += lat;
lat_rec->min = min(lat_rec->min, lat);
@@ -1996,7 +2002,7 @@ static void zfcp_fsf_update_lat(struct fsf_latency_record *lat_rec, u32 lat)
static void zfcp_fsf_req_trace(struct zfcp_fsf_req *req, struct scsi_cmnd *scsi)
{
struct fsf_qual_latency_info *lat_in;
- struct latency_cont *lat = NULL;
+ struct zfcp_latency_cont *lat = NULL;
struct zfcp_scsi_dev *zfcp_sdev;
struct zfcp_blk_drv_data blktrc;
int ticks = req->adapter->timer_ticks;
@@ -2088,11 +2094,8 @@ static void zfcp_fsf_fcp_handler_common(struct zfcp_fsf_req *req,
break;
case FSF_CMND_LENGTH_NOT_VALID:
dev_err(&req->adapter->ccw_device->dev,
- "Incorrect CDB length %d, LUN 0x%016Lx on "
- "port 0x%016Lx closed\n",
- req->qtcb->bottom.io.fcp_cmnd_length,
- (unsigned long long)zfcp_scsi_dev_lun(sdev),
- (unsigned long long)zfcp_sdev->port->wwpn);
+ "Incorrect FCP_CMND length %d, FCP device closed\n",
+ req->qtcb->bottom.io.fcp_cmnd_length);
zfcp_erp_adapter_shutdown(req->adapter, 0, "fssfch4");
req->status |= ZFCP_STATUS_FSFREQ_ERROR;
break;
@@ -2369,7 +2372,7 @@ struct zfcp_fsf_req *zfcp_fsf_fcp_task_mgmt(struct scsi_device *sdev,
fcp_cmnd = &req->qtcb->bottom.io.fcp_cmnd.iu;
zfcp_fc_fcp_tm(fcp_cmnd, sdev, tm_flags);
- zfcp_fsf_start_timer(req, ZFCP_SCSI_ER_TIMEOUT);
+ zfcp_fsf_start_timer(req, ZFCP_FSF_SCSI_ER_TIMEOUT);
if (!zfcp_fsf_req_send(req))
goto out;
@@ -2382,7 +2385,7 @@ out:
/**
* zfcp_fsf_reqid_check - validate req_id contained in SBAL returned by QDIO
- * @adapter: pointer to struct zfcp_adapter
+ * @qdio: pointer to struct zfcp_qdio
* @sbal_idx: response queue index of SBAL to be processed
*/
void zfcp_fsf_reqid_check(struct zfcp_qdio *qdio, int sbal_idx)
diff --git a/drivers/s390/scsi/zfcp_fsf.h b/drivers/s390/scsi/zfcp_fsf.h
index 535628b92f0a..2c658b66318c 100644
--- a/drivers/s390/scsi/zfcp_fsf.h
+++ b/drivers/s390/scsi/zfcp_fsf.h
@@ -438,8 +438,8 @@ struct zfcp_blk_drv_data {
/**
* struct zfcp_fsf_ct_els - zfcp data for ct or els request
- * @req: scatter-gather list for request
- * @resp: scatter-gather list for response
+ * @req: scatter-gather list for request, points to &zfcp_fc_req.sg_req or BSG
+ * @resp: scatter-gather list for response, points to &zfcp_fc_req.sg_rsp or BSG
* @handler: handler function (called for response to the request)
* @handler_data: data passed to handler function
* @port: Optional pointer to port for zfcp internal ELS (only test link ADISC)
diff --git a/drivers/s390/scsi/zfcp_qdio.c b/drivers/s390/scsi/zfcp_qdio.c
index 4ab02e8d36f3..10c4e8e3fd59 100644
--- a/drivers/s390/scsi/zfcp_qdio.c
+++ b/drivers/s390/scsi/zfcp_qdio.c
@@ -4,7 +4,7 @@
*
* Setup and helper functions to access QDIO.
*
- * Copyright IBM Corp. 2002, 2010
+ * Copyright IBM Corp. 2002, 2017
*/
#define KMSG_COMPONENT "zfcp"
@@ -19,7 +19,7 @@ static bool enable_multibuffer = true;
module_param_named(datarouter, enable_multibuffer, bool, 0400);
MODULE_PARM_DESC(datarouter, "Enable hardware data router support (default on)");
-static void zfcp_qdio_handler_error(struct zfcp_qdio *qdio, char *id,
+static void zfcp_qdio_handler_error(struct zfcp_qdio *qdio, char *dbftag,
unsigned int qdio_err)
{
struct zfcp_adapter *adapter = qdio->adapter;
@@ -28,12 +28,12 @@ static void zfcp_qdio_handler_error(struct zfcp_qdio *qdio, char *id,
if (qdio_err & QDIO_ERROR_SLSB_STATE) {
zfcp_qdio_siosl(adapter);
- zfcp_erp_adapter_shutdown(adapter, 0, id);
+ zfcp_erp_adapter_shutdown(adapter, 0, dbftag);
return;
}
zfcp_erp_adapter_reopen(adapter,
ZFCP_STATUS_ADAPTER_LINK_UNPLUGGED |
- ZFCP_STATUS_COMMON_ERP_FAILED, id);
+ ZFCP_STATUS_COMMON_ERP_FAILED, dbftag);
}
static void zfcp_qdio_zero_sbals(struct qdio_buffer *sbal[], int first, int cnt)
@@ -180,7 +180,6 @@ zfcp_qdio_sbale_next(struct zfcp_qdio *qdio, struct zfcp_qdio_req *q_req)
* @qdio: pointer to struct zfcp_qdio
* @q_req: pointer to struct zfcp_qdio_req
* @sg: scatter-gather list
- * @max_sbals: upper bound for number of SBALs to be used
* Returns: zero or -EINVAL on error
*/
int zfcp_qdio_sbals_from_sg(struct zfcp_qdio *qdio, struct zfcp_qdio_req *q_req,
@@ -303,7 +302,7 @@ static void zfcp_qdio_setup_init_data(struct qdio_initialize *id,
/**
* zfcp_qdio_allocate - allocate queue memory and initialize QDIO data
- * @adapter: pointer to struct zfcp_adapter
+ * @qdio: pointer to struct zfcp_qdio
* Returns: -ENOMEM on memory allocation error or return value from
* qdio_allocate
*/
diff --git a/drivers/s390/scsi/zfcp_qdio.h b/drivers/s390/scsi/zfcp_qdio.h
index 886c662cc154..2a816a37b3c0 100644
--- a/drivers/s390/scsi/zfcp_qdio.h
+++ b/drivers/s390/scsi/zfcp_qdio.h
@@ -30,6 +30,8 @@
* @req_q_full: queue full incidents
* @req_q_wq: used to wait for SBAL availability
* @adapter: adapter used in conjunction with this qdio structure
+ * @max_sbale_per_sbal: qdio limit per sbal
+ * @max_sbale_per_req: qdio limit per request
*/
struct zfcp_qdio {
struct qdio_buffer *res_q[QDIO_MAX_BUFFERS_PER_Q];
@@ -70,7 +72,7 @@ struct zfcp_qdio_req {
/**
* zfcp_qdio_sbale_req - return pointer to sbale on req_q for a request
* @qdio: pointer to struct zfcp_qdio
- * @q_rec: pointer to struct zfcp_qdio_req
+ * @q_req: pointer to struct zfcp_qdio_req
* Returns: pointer to qdio_buffer_element (sbale) structure
*/
static inline struct qdio_buffer_element *
@@ -82,7 +84,7 @@ zfcp_qdio_sbale_req(struct zfcp_qdio *qdio, struct zfcp_qdio_req *q_req)
/**
* zfcp_qdio_sbale_curr - return current sbale on req_q for a request
* @qdio: pointer to struct zfcp_qdio
- * @fsf_req: pointer to struct zfcp_fsf_req
+ * @q_req: pointer to struct zfcp_qdio_req
* Returns: pointer to qdio_buffer_element (sbale) structure
*/
static inline struct qdio_buffer_element *
@@ -135,6 +137,8 @@ void zfcp_qdio_req_init(struct zfcp_qdio *qdio, struct zfcp_qdio_req *q_req,
* zfcp_qdio_fill_next - Fill next sbale, only for single sbal requests
* @qdio: pointer to struct zfcp_qdio
* @q_req: pointer to struct zfcp_queue_req
+ * @data: pointer to data
+ * @len: length of data
*
* This is only required for single sbal requests, calling it when
* wrapping around to the next sbal is a bug.
@@ -182,6 +186,7 @@ int zfcp_qdio_sg_one_sbale(struct scatterlist *sg)
/**
* zfcp_qdio_skip_to_last_sbale - skip to last sbale in sbal
+ * @qdio: pointer to struct zfcp_qdio
* @q_req: The current zfcp_qdio_req
*/
static inline
diff --git a/drivers/s390/scsi/zfcp_reqlist.h b/drivers/s390/scsi/zfcp_reqlist.h
index 59a943c0d51d..9b8ff249e31c 100644
--- a/drivers/s390/scsi/zfcp_reqlist.h
+++ b/drivers/s390/scsi/zfcp_reqlist.h
@@ -17,7 +17,7 @@
/**
* struct zfcp_reqlist - Container for request list (reqlist)
* @lock: Spinlock for protecting the hash list
- * @list: Array of hashbuckets, each is a list of requests in this bucket
+ * @buckets: Array of hashbuckets, each is a list of requests in this bucket
*/
struct zfcp_reqlist {
spinlock_t lock;
diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c
index a8efcb330bc1..00acc7144bbc 100644
--- a/drivers/s390/scsi/zfcp_scsi.c
+++ b/drivers/s390/scsi/zfcp_scsi.c
@@ -27,7 +27,11 @@ MODULE_PARM_DESC(queue_depth, "Default queue depth for new SCSI devices");
static bool enable_dif;
module_param_named(dif, enable_dif, bool, 0400);
-MODULE_PARM_DESC(dif, "Enable DIF/DIX data integrity support");
+MODULE_PARM_DESC(dif, "Enable DIF data integrity support (default off)");
+
+bool zfcp_experimental_dix;
+module_param_named(dix, zfcp_experimental_dix, bool, 0400);
+MODULE_PARM_DESC(dix, "Enable experimental DIX (data integrity extension) support which implies DIF support (default off)");
static bool allow_lun_scan = true;
module_param(allow_lun_scan, bool, 0600);
@@ -226,7 +230,9 @@ static void zfcp_scsi_forget_cmnd(struct zfcp_fsf_req *old_req, void *data)
(struct zfcp_scsi_req_filter *)data;
/* already aborted - prevent side-effects - or not a SCSI command */
- if (old_req->data == NULL || old_req->fsf_command != FSF_QTCB_FCP_CMND)
+ if (old_req->data == NULL ||
+ zfcp_fsf_req_is_status_read_buffer(old_req) ||
+ old_req->qtcb->header.fsf_command != FSF_QTCB_FCP_CMND)
return;
/* (tmf_scope == FCP_TMF_TGT_RESET || tmf_scope == FCP_TMF_LUN_RESET) */
@@ -423,7 +429,6 @@ static struct scsi_host_template zfcp_scsi_host_template = {
* ZFCP_QDIO_MAX_SBALS_PER_REQ) - 2) * 8,
/* GCD, adjusted later */
.dma_boundary = ZFCP_QDIO_SBALE_LEN - 1,
- .use_clustering = 1,
.shost_attrs = zfcp_sysfs_shost_attrs,
.sdev_attrs = zfcp_sysfs_sdev_attrs,
.track_queue_depth = 1,
@@ -788,11 +793,11 @@ void zfcp_scsi_set_prot(struct zfcp_adapter *adapter)
data_div = atomic_read(&adapter->status) &
ZFCP_STATUS_ADAPTER_DATA_DIV_ENABLED;
- if (enable_dif &&
+ if ((enable_dif || zfcp_experimental_dix) &&
adapter->adapter_features & FSF_FEATURE_DIF_PROT_TYPE1)
mask |= SHOST_DIF_TYPE1_PROTECTION;
- if (enable_dif && data_div &&
+ if (zfcp_experimental_dix && data_div &&
adapter->adapter_features & FSF_FEATURE_DIX_PROT_TCPIP) {
mask |= SHOST_DIX_TYPE1_PROTECTION;
scsi_host_set_guard(shost, SHOST_DIX_GUARD_IP);
diff --git a/drivers/scsi/3w-9xxx.c b/drivers/scsi/3w-9xxx.c
index 2d655a97b959..a3c20e3a8b7c 100644
--- a/drivers/scsi/3w-9xxx.c
+++ b/drivers/scsi/3w-9xxx.c
@@ -1998,7 +1998,6 @@ static struct scsi_host_template driver_template = {
.sg_tablesize = TW_APACHE_MAX_SGL_LENGTH,
.max_sectors = TW_MAX_SECTORS,
.cmd_per_lun = TW_MAX_CMDS_PER_LUN,
- .use_clustering = ENABLE_CLUSTERING,
.shost_attrs = twa_host_attrs,
.emulated = 1,
.no_write_same = 1,
diff --git a/drivers/scsi/3w-sas.c b/drivers/scsi/3w-sas.c
index 480cf82700e9..e8f5f7c63190 100644
--- a/drivers/scsi/3w-sas.c
+++ b/drivers/scsi/3w-sas.c
@@ -1550,7 +1550,6 @@ static struct scsi_host_template driver_template = {
.sg_tablesize = TW_LIBERATOR_MAX_SGL_LENGTH,
.max_sectors = TW_MAX_SECTORS,
.cmd_per_lun = TW_MAX_CMDS_PER_LUN,
- .use_clustering = ENABLE_CLUSTERING,
.shost_attrs = twl_host_attrs,
.emulated = 1,
.no_write_same = 1,
diff --git a/drivers/scsi/3w-xxxx.c b/drivers/scsi/3w-xxxx.c
index a58257645e94..2b1e0d503020 100644
--- a/drivers/scsi/3w-xxxx.c
+++ b/drivers/scsi/3w-xxxx.c
@@ -1174,7 +1174,7 @@ static int tw_setfeature(TW_Device_Extension *tw_dev, int parm, int param_size,
command_que_value = tw_dev->command_packet_physical_address[request_id];
if (command_que_value == 0) {
printk(KERN_WARNING "3w-xxxx: tw_setfeature(): Bad command packet physical address.\n");
- return 1;
+ return 1;
}
/* Send command packet to the board */
@@ -2247,7 +2247,6 @@ static struct scsi_host_template driver_template = {
.sg_tablesize = TW_MAX_SGL_LENGTH,
.max_sectors = TW_MAX_SECTORS,
.cmd_per_lun = TW_MAX_CMDS_PER_LUN,
- .use_clustering = ENABLE_CLUSTERING,
.shost_attrs = tw_host_attrs,
.emulated = 1,
.no_write_same = 1,
diff --git a/drivers/scsi/53c700.c b/drivers/scsi/53c700.c
index 6be77b3aa8a5..128d658d472a 100644
--- a/drivers/scsi/53c700.c
+++ b/drivers/scsi/53c700.c
@@ -318,7 +318,6 @@ NCR_700_detect(struct scsi_host_template *tpnt,
tpnt->can_queue = NCR_700_COMMAND_SLOTS_PER_HOST;
tpnt->sg_tablesize = NCR_700_SG_SEGMENTS;
tpnt->cmd_per_lun = NCR_700_CMD_PER_LUN;
- tpnt->use_clustering = ENABLE_CLUSTERING;
tpnt->slave_configure = NCR_700_slave_configure;
tpnt->slave_destroy = NCR_700_slave_destroy;
tpnt->slave_alloc = NCR_700_slave_alloc;
diff --git a/drivers/scsi/BusLogic.c b/drivers/scsi/BusLogic.c
index 9cee941f97d6..e41e51f1da71 100644
--- a/drivers/scsi/BusLogic.c
+++ b/drivers/scsi/BusLogic.c
@@ -2641,6 +2641,7 @@ static int blogic_resultcode(struct blogic_adapter *adapter,
case BLOGIC_BAD_CMD_PARAM:
blogic_warn("BusLogic Driver Protocol Error 0x%02X\n",
adapter, adapter_status);
+ /* fall through */
case BLOGIC_DATA_UNDERRUN:
case BLOGIC_DATA_OVERRUN:
case BLOGIC_NOEXPECT_BUSFREE:
@@ -3857,7 +3858,6 @@ static struct scsi_host_template blogic_template = {
#endif
.unchecked_isa_dma = 1,
.max_sectors = 128,
- .use_clustering = ENABLE_CLUSTERING,
};
/*
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 640cd1b31a18..f38882f6f37d 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -50,18 +50,6 @@ config SCSI_NETLINK
default n
depends on NET
-config SCSI_MQ_DEFAULT
- bool "SCSI: use blk-mq I/O path by default"
- default y
- depends on SCSI
- ---help---
- This option enables the blk-mq based I/O path for SCSI devices by
- default. With this option the scsi_mod.use_blk_mq module/boot
- option defaults to Y, without it to N, but it can still be
- overridden either way.
-
- If unsure say Y.
-
config SCSI_PROC_FS
bool "legacy /proc/scsi/ support"
depends on SCSI && PROC_FS
diff --git a/drivers/scsi/a100u2w.c b/drivers/scsi/a100u2w.c
index 00072ed9540b..ff53fd0d12f2 100644
--- a/drivers/scsi/a100u2w.c
+++ b/drivers/scsi/a100u2w.c
@@ -1078,7 +1078,6 @@ static struct scsi_host_template inia100_template = {
.can_queue = 1,
.this_id = 1,
.sg_tablesize = SG_ALL,
- .use_clustering = ENABLE_CLUSTERING,
};
static int inia100_probe_one(struct pci_dev *pdev,
diff --git a/drivers/scsi/a2091.c b/drivers/scsi/a2091.c
index 61aadc7acb49..c96bc7261a42 100644
--- a/drivers/scsi/a2091.c
+++ b/drivers/scsi/a2091.c
@@ -160,7 +160,7 @@ static struct scsi_host_template a2091_scsi_template = {
.this_id = 7,
.sg_tablesize = SG_ALL,
.cmd_per_lun = CMD_PER_LUN,
- .use_clustering = DISABLE_CLUSTERING
+ .dma_boundary = PAGE_SIZE - 1,
};
static int a2091_probe(struct zorro_dev *z, const struct zorro_device_id *ent)
diff --git a/drivers/scsi/a3000.c b/drivers/scsi/a3000.c
index 2427a8541247..dcf435f312dd 100644
--- a/drivers/scsi/a3000.c
+++ b/drivers/scsi/a3000.c
@@ -175,7 +175,6 @@ static struct scsi_host_template amiga_a3000_scsi_template = {
.this_id = 7,
.sg_tablesize = SG_ALL,
.cmd_per_lun = CMD_PER_LUN,
- .use_clustering = ENABLE_CLUSTERING
};
static int __init amiga_a3000_scsi_probe(struct platform_device *pdev)
diff --git a/drivers/scsi/aacraid/aachba.c b/drivers/scsi/aacraid/aachba.c
index bd7f352c28f3..75ab5ff6b78c 100644
--- a/drivers/scsi/aacraid/aachba.c
+++ b/drivers/scsi/aacraid/aachba.c
@@ -2892,6 +2892,7 @@ int aac_scsi_cmd(struct scsi_cmnd * scsicmd)
!(dev->raw_io_64) ||
((scsicmd->cmnd[1] & 0x1f) != SAI_READ_CAPACITY_16))
break;
+ /* fall through */
case INQUIRY:
case READ_CAPACITY:
case TEST_UNIT_READY:
@@ -2966,6 +2967,7 @@ int aac_scsi_cmd(struct scsi_cmnd * scsicmd)
/* Issue FIB to tell Firmware to flush it's cache */
if ((aac_cache & 6) != 2)
return aac_synchronize(scsicmd);
+ /* fall through */
case INQUIRY:
{
struct inquiry_data inq_data;
@@ -3319,8 +3321,9 @@ int aac_scsi_cmd(struct scsi_cmnd * scsicmd)
min_t(size_t,
sizeof(dev->fsa_dev[cid].sense_data),
SCSI_SENSE_BUFFERSIZE));
- break;
+ break;
}
+ /* fall through */
case RESERVE:
case RELEASE:
case REZERO_UNIT:
diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h
index 39eb415987fc..3291d1c16864 100644
--- a/drivers/scsi/aacraid/aacraid.h
+++ b/drivers/scsi/aacraid/aacraid.h
@@ -40,6 +40,7 @@
#define nblank(x) _nblank(x)[0]
#include <linux/interrupt.h>
+#include <linux/completion.h>
#include <linux/pci.h>
#include <scsi/scsi_host.h>
@@ -1241,7 +1242,7 @@ struct aac_fib_context {
u32 unique; // unique value representing this context
ulong jiffies; // used for cleanup - dmb changed to ulong
struct list_head next; // used to link context's into a linked list
- struct semaphore wait_sem; // this is used to wait for the next fib to arrive.
+ struct completion completion; // this is used to wait for the next fib to arrive.
int wait; // Set to true when thread is in WaitForSingleObject
unsigned long count; // total number of FIBs on FibList
struct list_head fib_list; // this holds fibs and their attachd hw_fibs
@@ -1313,7 +1314,7 @@ struct fib {
* This is the event the sendfib routine will wait on if the
* caller did not pass one and this is synch io.
*/
- struct semaphore event_wait;
+ struct completion event_wait;
spinlock_t event_lock;
u32 done; /* gets set to 1 when fib is complete */
diff --git a/drivers/scsi/aacraid/commctrl.c b/drivers/scsi/aacraid/commctrl.c
index 25f6600d6c09..e2899ff7913e 100644
--- a/drivers/scsi/aacraid/commctrl.c
+++ b/drivers/scsi/aacraid/commctrl.c
@@ -41,7 +41,6 @@
#include <linux/blkdev.h>
#include <linux/delay.h> /* ssleep prototype */
#include <linux/kthread.h>
-#include <linux/semaphore.h>
#include <linux/uaccess.h>
#include <scsi/scsi_host.h>
@@ -203,7 +202,7 @@ static int open_getadapter_fib(struct aac_dev * dev, void __user *arg)
/*
* Initialize the mutex used to wait for the next AIF.
*/
- sema_init(&fibctx->wait_sem, 0);
+ init_completion(&fibctx->completion);
fibctx->wait = 0;
/*
* Initialize the fibs and set the count of fibs on
@@ -335,7 +334,7 @@ return_fib:
ssleep(1);
}
if (f.wait) {
- if(down_interruptible(&fibctx->wait_sem) < 0) {
+ if (wait_for_completion_interruptible(&fibctx->completion) < 0) {
status = -ERESTARTSYS;
} else {
/* Lock again and retry */
diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c
index 1e77d96a18f2..d5a6aa9676c8 100644
--- a/drivers/scsi/aacraid/commsup.c
+++ b/drivers/scsi/aacraid/commsup.c
@@ -44,7 +44,6 @@
#include <linux/delay.h>
#include <linux/kthread.h>
#include <linux/interrupt.h>
-#include <linux/semaphore.h>
#include <linux/bcd.h>
#include <scsi/scsi.h>
#include <scsi/scsi_host.h>
@@ -189,7 +188,7 @@ int aac_fib_setup(struct aac_dev * dev)
fibptr->hw_fib_va = hw_fib;
fibptr->data = (void *) fibptr->hw_fib_va->data;
fibptr->next = fibptr+1; /* Forward chain the fibs */
- sema_init(&fibptr->event_wait, 0);
+ init_completion(&fibptr->event_wait);
spin_lock_init(&fibptr->event_lock);
hw_fib->header.XferState = cpu_to_le32(0xffffffff);
hw_fib->header.SenderSize =
@@ -623,7 +622,7 @@ int aac_fib_send(u16 command, struct fib *fibptr, unsigned long size,
}
if (wait) {
fibptr->flags |= FIB_CONTEXT_FLAG_WAIT;
- if (down_interruptible(&fibptr->event_wait)) {
+ if (wait_for_completion_interruptible(&fibptr->event_wait)) {
fibptr->flags &= ~FIB_CONTEXT_FLAG_WAIT;
return -EFAULT;
}
@@ -659,7 +658,7 @@ int aac_fib_send(u16 command, struct fib *fibptr, unsigned long size,
* hardware failure has occurred.
*/
unsigned long timeout = jiffies + (180 * HZ); /* 3 minutes */
- while (down_trylock(&fibptr->event_wait)) {
+ while (!try_wait_for_completion(&fibptr->event_wait)) {
int blink;
if (time_is_before_eq_jiffies(timeout)) {
struct aac_queue * q = &dev->queues->queue[AdapNormCmdQueue];
@@ -689,9 +688,9 @@ int aac_fib_send(u16 command, struct fib *fibptr, unsigned long size,
*/
schedule();
}
- } else if (down_interruptible(&fibptr->event_wait)) {
+ } else if (wait_for_completion_interruptible(&fibptr->event_wait)) {
/* Do nothing ... satisfy
- * down_interruptible must_check */
+ * wait_for_completion_interruptible must_check */
}
spin_lock_irqsave(&fibptr->event_lock, flags);
@@ -777,7 +776,7 @@ int aac_hba_send(u8 command, struct fib *fibptr, fib_callback callback,
return -EFAULT;
fibptr->flags |= FIB_CONTEXT_FLAG_WAIT;
- if (down_interruptible(&fibptr->event_wait))
+ if (wait_for_completion_interruptible(&fibptr->event_wait))
fibptr->done = 2;
fibptr->flags &= ~(FIB_CONTEXT_FLAG_WAIT);
@@ -1538,7 +1537,7 @@ static int _aac_reset_adapter(struct aac_dev *aac, int forced, u8 reset_type)
|| fib->flags & FIB_CONTEXT_FLAG_WAIT) {
unsigned long flagv;
spin_lock_irqsave(&fib->event_lock, flagv);
- up(&fib->event_wait);
+ complete(&fib->event_wait);
spin_unlock_irqrestore(&fib->event_lock, flagv);
schedule();
retval = 0;
@@ -1828,7 +1827,7 @@ int aac_check_health(struct aac_dev * aac)
* Set the event to wake up the
* thread that will waiting.
*/
- up(&fibctx->wait_sem);
+ complete(&fibctx->completion);
} else {
printk(KERN_WARNING "aifd: didn't allocate NewFib.\n");
kfree(fib);
@@ -2165,7 +2164,7 @@ static void wakeup_fibctx_threads(struct aac_dev *dev,
* Set the event to wake up the
* thread that is waiting.
*/
- up(&fibctx->wait_sem);
+ complete(&fibctx->completion);
entry = entry->next;
}
diff --git a/drivers/scsi/aacraid/dpcsup.c b/drivers/scsi/aacraid/dpcsup.c
index ddc69738375f..40a771dd1c0e 100644
--- a/drivers/scsi/aacraid/dpcsup.c
+++ b/drivers/scsi/aacraid/dpcsup.c
@@ -38,7 +38,6 @@
#include <linux/slab.h>
#include <linux/completion.h>
#include <linux/blkdev.h>
-#include <linux/semaphore.h>
#include "aacraid.h"
@@ -129,7 +128,7 @@ unsigned int aac_response_normal(struct aac_queue * q)
spin_lock_irqsave(&fib->event_lock, flagv);
if (!fib->done) {
fib->done = 1;
- up(&fib->event_wait);
+ complete(&fib->event_wait);
}
spin_unlock_irqrestore(&fib->event_lock, flagv);
@@ -376,16 +375,16 @@ unsigned int aac_intr_normal(struct aac_dev *dev, u32 index, int isAif,
start_callback = 1;
} else {
unsigned long flagv;
- int complete = 0;
+ int completed = 0;
dprintk((KERN_INFO "event_wait up\n"));
spin_lock_irqsave(&fib->event_lock, flagv);
if (fib->done == 2) {
fib->done = 1;
- complete = 1;
+ completed = 1;
} else {
fib->done = 1;
- up(&fib->event_wait);
+ complete(&fib->event_wait);
}
spin_unlock_irqrestore(&fib->event_lock, flagv);
@@ -395,7 +394,7 @@ unsigned int aac_intr_normal(struct aac_dev *dev, u32 index, int isAif,
mflags);
FIB_COUNTER_INCREMENT(aac_config.NativeRecved);
- if (complete)
+ if (completed)
aac_fib_complete(fib);
}
} else {
@@ -428,16 +427,16 @@ unsigned int aac_intr_normal(struct aac_dev *dev, u32 index, int isAif,
start_callback = 1;
} else {
unsigned long flagv;
- int complete = 0;
+ int completed = 0;
dprintk((KERN_INFO "event_wait up\n"));
spin_lock_irqsave(&fib->event_lock, flagv);
if (fib->done == 2) {
fib->done = 1;
- complete = 1;
+ completed = 1;
} else {
fib->done = 1;
- up(&fib->event_wait);
+ complete(&fib->event_wait);
}
spin_unlock_irqrestore(&fib->event_lock, flagv);
@@ -447,7 +446,7 @@ unsigned int aac_intr_normal(struct aac_dev *dev, u32 index, int isAif,
mflags);
FIB_COUNTER_INCREMENT(aac_config.NormalRecved);
- if (complete)
+ if (completed)
aac_fib_complete(fib);
}
}
diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c
index 2d4e4ddc5ace..634ddb90e7aa 100644
--- a/drivers/scsi/aacraid/linit.c
+++ b/drivers/scsi/aacraid/linit.c
@@ -759,6 +759,7 @@ static int aac_eh_abort(struct scsi_cmnd* cmd)
!(aac->raw_io_64) ||
((cmd->cmnd[1] & 0x1f) != SAI_READ_CAPACITY_16))
break;
+ /* fall through */
case INQUIRY:
case READ_CAPACITY:
/*
@@ -1539,7 +1540,6 @@ static struct scsi_host_template aac_driver_template = {
#else
.cmd_per_lun = AAC_NUM_IO_FIB,
#endif
- .use_clustering = ENABLE_CLUSTERING,
.emulated = 1,
.no_write_same = 1,
};
@@ -1559,7 +1559,7 @@ static void __aac_shutdown(struct aac_dev * aac)
struct fib *fib = &aac->fibs[i];
if (!(fib->hw_fib_va->header.XferState & cpu_to_le32(NoResponseExpected | Async)) &&
(fib->hw_fib_va->header.XferState & cpu_to_le32(ResponseExpected)))
- up(&fib->event_wait);
+ complete(&fib->event_wait);
}
kthread_stop(aac->thread);
aac->thread = NULL;
diff --git a/drivers/scsi/aacraid/src.c b/drivers/scsi/aacraid/src.c
index 7a51ccfa8662..8377aec0649d 100644
--- a/drivers/scsi/aacraid/src.c
+++ b/drivers/scsi/aacraid/src.c
@@ -106,7 +106,7 @@ static irqreturn_t aac_src_intr_message(int irq, void *dev_id)
spin_lock_irqsave(&dev->sync_fib->event_lock, sflags);
if (dev->sync_fib->flags & FIB_CONTEXT_FLAG_WAIT) {
dev->management_fib_count--;
- up(&dev->sync_fib->event_wait);
+ complete(&dev->sync_fib->event_wait);
}
spin_unlock_irqrestore(&dev->sync_fib->event_lock,
sflags);
diff --git a/drivers/scsi/advansys.c b/drivers/scsi/advansys.c
index 223ef6f4e258..d37584403c33 100644
--- a/drivers/scsi/advansys.c
+++ b/drivers/scsi/advansys.c
@@ -3192,8 +3192,8 @@ static void asc_prt_driver_conf(struct seq_file *m, struct Scsi_Host *shost)
shost->sg_tablesize, shost->cmd_per_lun);
seq_printf(m,
- " unchecked_isa_dma %d, use_clustering %d\n",
- shost->unchecked_isa_dma, shost->use_clustering);
+ " unchecked_isa_dma %d\n",
+ shost->unchecked_isa_dma);
seq_printf(m,
" flags 0x%x, last_reset 0x%lx, jiffies 0x%lx, asc_n_io_port 0x%x\n",
@@ -10808,14 +10808,6 @@ static struct scsi_host_template advansys_template = {
* for non-ISA adapters.
*/
.unchecked_isa_dma = true,
- /*
- * All adapters controlled by this driver are capable of large
- * scatter-gather lists. According to the mid-level SCSI documentation
- * this obviates any performance gain provided by setting
- * 'use_clustering'. But empirically while CPU utilization is increased
- * by enabling clustering, I/O throughput increases as well.
- */
- .use_clustering = ENABLE_CLUSTERING,
};
static int advansys_wide_init_chip(struct Scsi_Host *shost)
diff --git a/drivers/scsi/aha152x.c b/drivers/scsi/aha152x.c
index 301b3cad15f8..97872838b983 100644
--- a/drivers/scsi/aha152x.c
+++ b/drivers/scsi/aha152x.c
@@ -2920,7 +2920,7 @@ static struct scsi_host_template aha152x_driver_template = {
.can_queue = 1,
.this_id = 7,
.sg_tablesize = SG_ALL,
- .use_clustering = DISABLE_CLUSTERING,
+ .dma_boundary = PAGE_SIZE - 1,
.slave_alloc = aha152x_adjust_queue,
};
diff --git a/drivers/scsi/aha1542.c b/drivers/scsi/aha1542.c
index 41add33e3f1f..ba7a5725be04 100644
--- a/drivers/scsi/aha1542.c
+++ b/drivers/scsi/aha1542.c
@@ -58,8 +58,15 @@ struct aha1542_hostdata {
int aha1542_last_mbi_used;
int aha1542_last_mbo_used;
struct scsi_cmnd *int_cmds[AHA1542_MAILBOXES];
- struct mailbox mb[2 * AHA1542_MAILBOXES];
- struct ccb ccb[AHA1542_MAILBOXES];
+ struct mailbox *mb;
+ dma_addr_t mb_handle;
+ struct ccb *ccb;
+ dma_addr_t ccb_handle;
+};
+
+struct aha1542_cmd {
+ struct chain *chain;
+ dma_addr_t chain_handle;
};
static inline void aha1542_intr_reset(u16 base)
@@ -233,6 +240,21 @@ static int aha1542_test_port(struct Scsi_Host *sh)
return 1;
}
+static void aha1542_free_cmd(struct scsi_cmnd *cmd)
+{
+ struct aha1542_cmd *acmd = scsi_cmd_priv(cmd);
+ struct device *dev = cmd->device->host->dma_dev;
+ size_t len = scsi_sg_count(cmd) * sizeof(struct chain);
+
+ if (acmd->chain) {
+ dma_unmap_single(dev, acmd->chain_handle, len, DMA_TO_DEVICE);
+ kfree(acmd->chain);
+ }
+
+ acmd->chain = NULL;
+ scsi_dma_unmap(cmd);
+}
+
static irqreturn_t aha1542_interrupt(int irq, void *dev_id)
{
struct Scsi_Host *sh = dev_id;
@@ -303,7 +325,7 @@ static irqreturn_t aha1542_interrupt(int irq, void *dev_id)
return IRQ_HANDLED;
};
- mbo = (scsi2int(mb[mbi].ccbptr) - (isa_virt_to_bus(&ccb[0]))) / sizeof(struct ccb);
+ mbo = (scsi2int(mb[mbi].ccbptr) - (unsigned long)aha1542->ccb_handle) / sizeof(struct ccb);
mbistatus = mb[mbi].status;
mb[mbi].status = 0;
aha1542->aha1542_last_mbi_used = mbi;
@@ -331,8 +353,7 @@ static irqreturn_t aha1542_interrupt(int irq, void *dev_id)
return IRQ_HANDLED;
}
my_done = tmp_cmd->scsi_done;
- kfree(tmp_cmd->host_scribble);
- tmp_cmd->host_scribble = NULL;
+ aha1542_free_cmd(tmp_cmd);
/* Fetch the sense data, and tuck it away, in the required slot. The
Adaptec automatically fetches it, and there is no guarantee that
we will still have it in the cdb when we come back */
@@ -369,6 +390,7 @@ static irqreturn_t aha1542_interrupt(int irq, void *dev_id)
static int aha1542_queuecommand(struct Scsi_Host *sh, struct scsi_cmnd *cmd)
{
+ struct aha1542_cmd *acmd = scsi_cmd_priv(cmd);
struct aha1542_hostdata *aha1542 = shost_priv(sh);
u8 direction;
u8 target = cmd->device->id;
@@ -378,7 +400,6 @@ static int aha1542_queuecommand(struct Scsi_Host *sh, struct scsi_cmnd *cmd)
int mbo, sg_count;
struct mailbox *mb = aha1542->mb;
struct ccb *ccb = aha1542->ccb;
- struct chain *cptr;
if (*cmd->cmnd == REQUEST_SENSE) {
/* Don't do the command - we have the sense data already */
@@ -398,15 +419,17 @@ static int aha1542_queuecommand(struct Scsi_Host *sh, struct scsi_cmnd *cmd)
print_hex_dump_bytes("command: ", DUMP_PREFIX_NONE, cmd->cmnd, cmd->cmd_len);
}
#endif
- if (bufflen) { /* allocate memory before taking host_lock */
- sg_count = scsi_sg_count(cmd);
- cptr = kmalloc_array(sg_count, sizeof(*cptr),
- GFP_KERNEL | GFP_DMA);
- if (!cptr)
- return SCSI_MLQUEUE_HOST_BUSY;
- } else {
- sg_count = 0;
- cptr = NULL;
+ sg_count = scsi_dma_map(cmd);
+ if (sg_count) {
+ size_t len = sg_count * sizeof(struct chain);
+
+ acmd->chain = kmalloc(len, GFP_DMA);
+ if (!acmd->chain)
+ goto out_unmap;
+ acmd->chain_handle = dma_map_single(sh->dma_dev, acmd->chain,
+ len, DMA_TO_DEVICE);
+ if (dma_mapping_error(sh->dma_dev, acmd->chain_handle))
+ goto out_free_chain;
}
/* Use the outgoing mailboxes in a round-robin fashion, because this
@@ -437,7 +460,8 @@ static int aha1542_queuecommand(struct Scsi_Host *sh, struct scsi_cmnd *cmd)
shost_printk(KERN_DEBUG, sh, "Sending command (%d %p)...", mbo, cmd->scsi_done);
#endif
- any2scsi(mb[mbo].ccbptr, isa_virt_to_bus(&ccb[mbo])); /* This gets trashed for some reason */
+ /* This gets trashed for some reason */
+ any2scsi(mb[mbo].ccbptr, aha1542->ccb_handle + mbo * sizeof(*ccb));
memset(&ccb[mbo], 0, sizeof(struct ccb));
@@ -456,21 +480,18 @@ static int aha1542_queuecommand(struct Scsi_Host *sh, struct scsi_cmnd *cmd)
int i;
ccb[mbo].op = 2; /* SCSI Initiator Command w/scatter-gather */
- cmd->host_scribble = (void *)cptr;
scsi_for_each_sg(cmd, sg, sg_count, i) {
- any2scsi(cptr[i].dataptr, isa_page_to_bus(sg_page(sg))
- + sg->offset);
- any2scsi(cptr[i].datalen, sg->length);
+ any2scsi(acmd->chain[i].dataptr, sg_dma_address(sg));
+ any2scsi(acmd->chain[i].datalen, sg_dma_len(sg));
};
any2scsi(ccb[mbo].datalen, sg_count * sizeof(struct chain));
- any2scsi(ccb[mbo].dataptr, isa_virt_to_bus(cptr));
+ any2scsi(ccb[mbo].dataptr, acmd->chain_handle);
#ifdef DEBUG
- shost_printk(KERN_DEBUG, sh, "cptr %p: ", cptr);
- print_hex_dump_bytes("cptr: ", DUMP_PREFIX_NONE, cptr, 18);
+ shost_printk(KERN_DEBUG, sh, "cptr %p: ", acmd->chain);
+ print_hex_dump_bytes("cptr: ", DUMP_PREFIX_NONE, acmd->chain, 18);
#endif
} else {
ccb[mbo].op = 0; /* SCSI Initiator Command */
- cmd->host_scribble = NULL;
any2scsi(ccb[mbo].datalen, 0);
any2scsi(ccb[mbo].dataptr, 0);
};
@@ -488,24 +509,29 @@ static int aha1542_queuecommand(struct Scsi_Host *sh, struct scsi_cmnd *cmd)
spin_unlock_irqrestore(sh->host_lock, flags);
return 0;
+out_free_chain:
+ kfree(acmd->chain);
+ acmd->chain = NULL;
+out_unmap:
+ scsi_dma_unmap(cmd);
+ return SCSI_MLQUEUE_HOST_BUSY;
}
/* Initialize mailboxes */
static void setup_mailboxes(struct Scsi_Host *sh)
{
struct aha1542_hostdata *aha1542 = shost_priv(sh);
- int i;
- struct mailbox *mb = aha1542->mb;
- struct ccb *ccb = aha1542->ccb;
-
u8 mb_cmd[5] = { CMD_MBINIT, AHA1542_MAILBOXES, 0, 0, 0};
+ int i;
for (i = 0; i < AHA1542_MAILBOXES; i++) {
- mb[i].status = mb[AHA1542_MAILBOXES + i].status = 0;
- any2scsi(mb[i].ccbptr, isa_virt_to_bus(&ccb[i]));
+ aha1542->mb[i].status = 0;
+ any2scsi(aha1542->mb[i].ccbptr,
+ aha1542->ccb_handle + i * sizeof(struct ccb));
+ aha1542->mb[AHA1542_MAILBOXES + i].status = 0;
};
aha1542_intr_reset(sh->io_port); /* reset interrupts, so they don't block */
- any2scsi((mb_cmd + 2), isa_virt_to_bus(mb));
+ any2scsi(mb_cmd + 2, aha1542->mb_handle);
if (aha1542_out(sh->io_port, mb_cmd, 5))
shost_printk(KERN_ERR, sh, "failed setting up mailboxes\n");
aha1542_intr_reset(sh->io_port);
@@ -739,11 +765,26 @@ static struct Scsi_Host *aha1542_hw_init(struct scsi_host_template *tpnt, struct
if (aha1542->bios_translation == BIOS_TRANSLATION_25563)
shost_printk(KERN_INFO, sh, "Using extended bios translation\n");
+ if (dma_set_mask_and_coherent(pdev, DMA_BIT_MASK(24)) < 0)
+ goto unregister;
+
+ aha1542->mb = dma_alloc_coherent(pdev,
+ AHA1542_MAILBOXES * 2 * sizeof(struct mailbox),
+ &aha1542->mb_handle, GFP_KERNEL);
+ if (!aha1542->mb)
+ goto unregister;
+
+ aha1542->ccb = dma_alloc_coherent(pdev,
+ AHA1542_MAILBOXES * sizeof(struct ccb),
+ &aha1542->ccb_handle, GFP_KERNEL);
+ if (!aha1542->ccb)
+ goto free_mb;
+
setup_mailboxes(sh);
if (request_irq(sh->irq, aha1542_interrupt, 0, "aha1542", sh)) {
shost_printk(KERN_ERR, sh, "Unable to allocate IRQ.\n");
- goto unregister;
+ goto free_ccb;
}
if (sh->dma_channel != 0xFF) {
if (request_dma(sh->dma_channel, "aha1542")) {
@@ -762,11 +803,18 @@ static struct Scsi_Host *aha1542_hw_init(struct scsi_host_template *tpnt, struct
scsi_scan_host(sh);
return sh;
+
free_dma:
if (sh->dma_channel != 0xff)
free_dma(sh->dma_channel);
free_irq:
free_irq(sh->irq, sh);
+free_ccb:
+ dma_free_coherent(pdev, AHA1542_MAILBOXES * sizeof(struct ccb),
+ aha1542->ccb, aha1542->ccb_handle);
+free_mb:
+ dma_free_coherent(pdev, AHA1542_MAILBOXES * 2 * sizeof(struct mailbox),
+ aha1542->mb, aha1542->mb_handle);
unregister:
scsi_host_put(sh);
release:
@@ -777,9 +825,16 @@ release:
static int aha1542_release(struct Scsi_Host *sh)
{
+ struct aha1542_hostdata *aha1542 = shost_priv(sh);
+ struct device *dev = sh->dma_dev;
+
scsi_remove_host(sh);
if (sh->dma_channel != 0xff)
free_dma(sh->dma_channel);
+ dma_free_coherent(dev, AHA1542_MAILBOXES * sizeof(struct ccb),
+ aha1542->ccb, aha1542->ccb_handle);
+ dma_free_coherent(dev, AHA1542_MAILBOXES * 2 * sizeof(struct mailbox),
+ aha1542->mb, aha1542->mb_handle);
if (sh->irq)
free_irq(sh->irq, sh);
if (sh->io_port && sh->n_io_port)
@@ -826,7 +881,8 @@ static int aha1542_dev_reset(struct scsi_cmnd *cmd)
aha1542->aha1542_last_mbo_used = mbo;
- any2scsi(mb[mbo].ccbptr, isa_virt_to_bus(&ccb[mbo])); /* This gets trashed for some reason */
+ /* This gets trashed for some reason */
+ any2scsi(mb[mbo].ccbptr, aha1542->ccb_handle + mbo * sizeof(*ccb));
memset(&ccb[mbo], 0, sizeof(struct ccb));
@@ -901,8 +957,7 @@ static int aha1542_reset(struct scsi_cmnd *cmd, u8 reset_cmd)
*/
continue;
}
- kfree(tmp_cmd->host_scribble);
- tmp_cmd->host_scribble = NULL;
+ aha1542_free_cmd(tmp_cmd);
aha1542->int_cmds[i] = NULL;
aha1542->mb[i].status = 0;
}
@@ -946,6 +1001,7 @@ static struct scsi_host_template driver_template = {
.module = THIS_MODULE,
.proc_name = "aha1542",
.name = "Adaptec 1542",
+ .cmd_size = sizeof(struct aha1542_cmd),
.queuecommand = aha1542_queuecommand,
.eh_device_reset_handler= aha1542_dev_reset,
.eh_bus_reset_handler = aha1542_bus_reset,
@@ -955,7 +1011,6 @@ static struct scsi_host_template driver_template = {
.this_id = 7,
.sg_tablesize = 16,
.unchecked_isa_dma = 1,
- .use_clustering = ENABLE_CLUSTERING,
};
static int aha1542_isa_match(struct device *pdev, unsigned int ndev)
diff --git a/drivers/scsi/aha1740.c b/drivers/scsi/aha1740.c
index 786bf7f32c64..da4150c17781 100644
--- a/drivers/scsi/aha1740.c
+++ b/drivers/scsi/aha1740.c
@@ -545,7 +545,6 @@ static struct scsi_host_template aha1740_template = {
.can_queue = AHA1740_ECBS,
.this_id = 7,
.sg_tablesize = AHA1740_SCATTER,
- .use_clustering = ENABLE_CLUSTERING,
.eh_abort_handler = aha1740_eh_abort_handler,
};
diff --git a/drivers/scsi/aic7xxx/aic79xx_osm.c b/drivers/scsi/aic7xxx/aic79xx_osm.c
index 2588b8f84ba0..57992519384e 100644
--- a/drivers/scsi/aic7xxx/aic79xx_osm.c
+++ b/drivers/scsi/aic7xxx/aic79xx_osm.c
@@ -920,7 +920,6 @@ struct scsi_host_template aic79xx_driver_template = {
.this_id = -1,
.max_sectors = 8192,
.cmd_per_lun = 2,
- .use_clustering = ENABLE_CLUSTERING,
.slave_alloc = ahd_linux_slave_alloc,
.slave_configure = ahd_linux_slave_configure,
.target_alloc = ahd_linux_target_alloc,
diff --git a/drivers/scsi/aic7xxx/aic7xxx_osm.c b/drivers/scsi/aic7xxx/aic7xxx_osm.c
index c6be3aeb302b..3c9c17450bb3 100644
--- a/drivers/scsi/aic7xxx/aic7xxx_osm.c
+++ b/drivers/scsi/aic7xxx/aic7xxx_osm.c
@@ -807,7 +807,6 @@ struct scsi_host_template aic7xxx_driver_template = {
.this_id = -1,
.max_sectors = 8192,
.cmd_per_lun = 2,
- .use_clustering = ENABLE_CLUSTERING,
.slave_alloc = ahc_linux_slave_alloc,
.slave_configure = ahc_linux_slave_configure,
.target_alloc = ahc_linux_target_alloc,
diff --git a/drivers/scsi/aic94xx/aic94xx_hwi.c b/drivers/scsi/aic94xx/aic94xx_hwi.c
index 3b8ad55e59de..2bc7615193bd 100644
--- a/drivers/scsi/aic94xx/aic94xx_hwi.c
+++ b/drivers/scsi/aic94xx/aic94xx_hwi.c
@@ -1057,14 +1057,13 @@ static struct asd_ascb *asd_ascb_alloc(struct asd_ha_struct *asd_ha,
if (ascb) {
ascb->dma_scb.size = sizeof(struct scb);
- ascb->dma_scb.vaddr = dma_pool_alloc(asd_ha->scb_pool,
+ ascb->dma_scb.vaddr = dma_pool_zalloc(asd_ha->scb_pool,
gfp_flags,
&ascb->dma_scb.dma_handle);
if (!ascb->dma_scb.vaddr) {
kmem_cache_free(asd_ascb_cache, ascb);
return NULL;
}
- memset(ascb->dma_scb.vaddr, 0, sizeof(struct scb));
asd_init_ascb(asd_ha, ascb);
spin_lock_irqsave(&seq->tc_index_lock, flags);
diff --git a/drivers/scsi/aic94xx/aic94xx_init.c b/drivers/scsi/aic94xx/aic94xx_init.c
index 41c4d8abdd4a..f83f79b07b50 100644
--- a/drivers/scsi/aic94xx/aic94xx_init.c
+++ b/drivers/scsi/aic94xx/aic94xx_init.c
@@ -68,7 +68,6 @@ static struct scsi_host_template aic94xx_sht = {
.this_id = -1,
.sg_tablesize = SG_ALL,
.max_sectors = SCSI_DEFAULT_MAX_SECTORS,
- .use_clustering = ENABLE_CLUSTERING,
.eh_device_reset_handler = sas_eh_device_reset_handler,
.eh_target_reset_handler = sas_eh_target_reset_handler,
.target_destroy = sas_target_destroy,
diff --git a/drivers/scsi/arcmsr/arcmsr_hba.c b/drivers/scsi/arcmsr/arcmsr_hba.c
index d4404eea24fb..0f6751b0a633 100644
--- a/drivers/scsi/arcmsr/arcmsr_hba.c
+++ b/drivers/scsi/arcmsr/arcmsr_hba.c
@@ -156,7 +156,6 @@ static struct scsi_host_template arcmsr_scsi_host_template = {
.sg_tablesize = ARCMSR_DEFAULT_SG_ENTRIES,
.max_sectors = ARCMSR_MAX_XFER_SECTORS_C,
.cmd_per_lun = ARCMSR_DEFAULT_CMD_PERLUN,
- .use_clustering = ENABLE_CLUSTERING,
.shost_attrs = arcmsr_host_attrs,
.no_write_same = 1,
};
@@ -903,9 +902,9 @@ static int arcmsr_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if(!host){
goto pci_disable_dev;
}
- error = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+ error = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
if(error){
- error = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+ error = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
if(error){
printk(KERN_WARNING
"scsi%d: No suitable DMA mask available\n",
@@ -1049,9 +1048,9 @@ static int arcmsr_resume(struct pci_dev *pdev)
pr_warn("%s: pci_enable_device error\n", __func__);
return -ENODEV;
}
- error = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+ error = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
if (error) {
- error = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+ error = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
if (error) {
pr_warn("scsi%d: No suitable DMA mask available\n",
host->host_no);
diff --git a/drivers/scsi/arm/acornscsi.c b/drivers/scsi/arm/acornscsi.c
index 421fe869a11e..d7509859dc00 100644
--- a/drivers/scsi/arm/acornscsi.c
+++ b/drivers/scsi/arm/acornscsi.c
@@ -2890,7 +2890,7 @@ static struct scsi_host_template acornscsi_template = {
.this_id = 7,
.sg_tablesize = SG_ALL,
.cmd_per_lun = 2,
- .use_clustering = DISABLE_CLUSTERING,
+ .dma_boundary = PAGE_SIZE - 1,
.proc_name = "acornscsi",
};
diff --git a/drivers/scsi/arm/arxescsi.c b/drivers/scsi/arm/arxescsi.c
index 3110736fd337..5e9dd9f34821 100644
--- a/drivers/scsi/arm/arxescsi.c
+++ b/drivers/scsi/arm/arxescsi.c
@@ -245,7 +245,7 @@ static struct scsi_host_template arxescsi_template = {
.can_queue = 0,
.this_id = 7,
.sg_tablesize = SG_ALL,
- .use_clustering = DISABLE_CLUSTERING,
+ .dma_boundary = PAGE_SIZE - 1,
.proc_name = "arxescsi",
};
diff --git a/drivers/scsi/arm/cumana_1.c b/drivers/scsi/arm/cumana_1.c
index ae1d809904fb..e2d2a81d8e0b 100644
--- a/drivers/scsi/arm/cumana_1.c
+++ b/drivers/scsi/arm/cumana_1.c
@@ -221,10 +221,10 @@ static struct scsi_host_template cumanascsi_template = {
.this_id = 7,
.sg_tablesize = SG_ALL,
.cmd_per_lun = 2,
- .use_clustering = DISABLE_CLUSTERING,
.proc_name = "CumanaSCSI-1",
.cmd_size = NCR5380_CMD_SIZE,
.max_sectors = 128,
+ .dma_boundary = PAGE_SIZE - 1,
};
static int cumanascsi1_probe(struct expansion_card *ec,
diff --git a/drivers/scsi/arm/cumana_2.c b/drivers/scsi/arm/cumana_2.c
index edce5f3cfdba..40afcbd8de61 100644
--- a/drivers/scsi/arm/cumana_2.c
+++ b/drivers/scsi/arm/cumana_2.c
@@ -367,7 +367,6 @@ static struct scsi_host_template cumanascsi2_template = {
.this_id = 7,
.sg_tablesize = SG_MAX_SEGMENTS,
.dma_boundary = IOMD_DMA_BOUNDARY,
- .use_clustering = DISABLE_CLUSTERING,
.proc_name = "cumanascsi2",
};
diff --git a/drivers/scsi/arm/eesox.c b/drivers/scsi/arm/eesox.c
index e93e047f4316..8f64c370a8a7 100644
--- a/drivers/scsi/arm/eesox.c
+++ b/drivers/scsi/arm/eesox.c
@@ -486,7 +486,6 @@ static struct scsi_host_template eesox_template = {
.this_id = 7,
.sg_tablesize = SG_MAX_SEGMENTS,
.dma_boundary = IOMD_DMA_BOUNDARY,
- .use_clustering = DISABLE_CLUSTERING,
.proc_name = "eesox",
};
diff --git a/drivers/scsi/arm/oak.c b/drivers/scsi/arm/oak.c
index 05b7f755499b..8f2efaab8d46 100644
--- a/drivers/scsi/arm/oak.c
+++ b/drivers/scsi/arm/oak.c
@@ -110,7 +110,7 @@ static struct scsi_host_template oakscsi_template = {
.this_id = 7,
.sg_tablesize = SG_ALL,
.cmd_per_lun = 2,
- .use_clustering = DISABLE_CLUSTERING,
+ .dma_boundary = PAGE_SIZE - 1,
.proc_name = "oakscsi",
.cmd_size = NCR5380_CMD_SIZE,
.max_sectors = 128,
diff --git a/drivers/scsi/arm/powertec.c b/drivers/scsi/arm/powertec.c
index 79aa88911b7f..759f95ba993c 100644
--- a/drivers/scsi/arm/powertec.c
+++ b/drivers/scsi/arm/powertec.c
@@ -294,7 +294,6 @@ static struct scsi_host_template powertecscsi_template = {
.sg_tablesize = SG_MAX_SEGMENTS,
.dma_boundary = IOMD_DMA_BOUNDARY,
.cmd_per_lun = 2,
- .use_clustering = ENABLE_CLUSTERING,
.proc_name = "powertec",
};
diff --git a/drivers/scsi/atari_scsi.c b/drivers/scsi/atari_scsi.c
index 89f5154c40b6..a503dc50c4f8 100644
--- a/drivers/scsi/atari_scsi.c
+++ b/drivers/scsi/atari_scsi.c
@@ -714,7 +714,7 @@ static struct scsi_host_template atari_scsi_template = {
.eh_host_reset_handler = atari_scsi_host_reset,
.this_id = 7,
.cmd_per_lun = 2,
- .use_clustering = DISABLE_CLUSTERING,
+ .dma_boundary = PAGE_SIZE - 1,
.cmd_size = NCR5380_CMD_SIZE,
};
diff --git a/drivers/scsi/atp870u.c b/drivers/scsi/atp870u.c
index 802d15018ec0..1267200380f8 100644
--- a/drivers/scsi/atp870u.c
+++ b/drivers/scsi/atp870u.c
@@ -1681,7 +1681,6 @@ static struct scsi_host_template atp870u_template = {
.can_queue = qcnt /* can_queue */,
.this_id = 7 /* SCSI ID */,
.sg_tablesize = ATP870U_SCATTER /*SG_ALL*/ /*SG_NONE*/,
- .use_clustering = ENABLE_CLUSTERING,
.max_sectors = ATP870U_MAX_SECTORS,
};
diff --git a/drivers/scsi/be2iscsi/be_main.c b/drivers/scsi/be2iscsi/be_main.c
index effb6fc95af4..39f3820572b4 100644
--- a/drivers/scsi/be2iscsi/be_main.c
+++ b/drivers/scsi/be2iscsi/be_main.c
@@ -214,12 +214,6 @@ static char const *cqe_desc[] = {
"CXN_KILLED_IMM_DATA_RCVD"
};
-static int beiscsi_slave_configure(struct scsi_device *sdev)
-{
- blk_queue_max_segment_size(sdev->request_queue, 65536);
- return 0;
-}
-
static int beiscsi_eh_abort(struct scsi_cmnd *sc)
{
struct iscsi_task *abrt_task = (struct iscsi_task *)sc->SCp.ptr;
@@ -393,7 +387,6 @@ static struct scsi_host_template beiscsi_sht = {
.proc_name = DRV_NAME,
.queuecommand = iscsi_queuecommand,
.change_queue_depth = scsi_change_queue_depth,
- .slave_configure = beiscsi_slave_configure,
.target_alloc = iscsi_target_alloc,
.eh_timed_out = iscsi_eh_cmd_timed_out,
.eh_abort_handler = beiscsi_eh_abort,
@@ -404,8 +397,8 @@ static struct scsi_host_template beiscsi_sht = {
.can_queue = BE2_IO_DEPTH,
.this_id = -1,
.max_sectors = BEISCSI_MAX_SECTORS,
+ .max_segment_size = 65536,
.cmd_per_lun = BEISCSI_CMD_PER_LUN,
- .use_clustering = ENABLE_CLUSTERING,
.vendor_id = SCSI_NL_VID_TYPE_PCI | BE_VENDOR_ID,
.track_queue_depth = 1,
};
diff --git a/drivers/scsi/bfa/bfa_ioc.c b/drivers/scsi/bfa/bfa_ioc.c
index 16d3aeb0e572..9631877aba4f 100644
--- a/drivers/scsi/bfa/bfa_ioc.c
+++ b/drivers/scsi/bfa/bfa_ioc.c
@@ -3819,7 +3819,7 @@ bfa_sfp_scn(struct bfa_sfp_s *sfp, struct bfi_mbmsg_s *msg)
sfp->state = BFA_SFP_STATE_REMOVED;
sfp->data_valid = 0;
bfa_sfp_scn_aen_post(sfp, rsp);
- break;
+ break;
case BFA_SFP_SCN_FAILED:
sfp->state = BFA_SFP_STATE_FAILED;
sfp->data_valid = 0;
@@ -5763,7 +5763,7 @@ bfa_phy_intr(void *phyarg, struct bfi_mbmsg_s *msg)
(struct bfa_phy_stats_s *) phy->ubuf;
bfa_phy_ntoh32((u32 *)stats, (u32 *)phy->dbuf_kva,
sizeof(struct bfa_phy_stats_s));
- bfa_trc(phy, stats->status);
+ bfa_trc(phy, stats->status);
}
phy->status = status;
diff --git a/drivers/scsi/bfa/bfad.c b/drivers/scsi/bfa/bfad.c
index 911efc98d1fd..42a0caf6740d 100644
--- a/drivers/scsi/bfa/bfad.c
+++ b/drivers/scsi/bfa/bfad.c
@@ -739,14 +739,10 @@ bfad_pci_init(struct pci_dev *pdev, struct bfad_s *bfad)
pci_set_master(pdev);
-
- if ((pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) != 0) ||
- (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)) != 0)) {
- if ((pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0) ||
- (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)) != 0)) {
- printk(KERN_ERR "pci_set_dma_mask fail %p\n", pdev);
- goto out_release_region;
- }
+ if (dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)) ||
+ dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32))) {
+ printk(KERN_ERR "dma_set_mask_and_coherent fail %p\n", pdev);
+ goto out_release_region;
}
/* Enable PCIE Advanced Error Recovery (AER) if kernel supports */
@@ -1565,9 +1561,9 @@ bfad_pci_slot_reset(struct pci_dev *pdev)
pci_save_state(pdev);
pci_set_master(pdev);
- if (pci_set_dma_mask(bfad->pcidev, DMA_BIT_MASK(64)) != 0)
- if (pci_set_dma_mask(bfad->pcidev, DMA_BIT_MASK(32)) != 0)
- goto out_disable_device;
+ if (dma_set_mask_and_coherent(&bfad->pcidev->dev, DMA_BIT_MASK(64)) ||
+ dma_set_mask_and_coherent(&bfad->pcidev->dev, DMA_BIT_MASK(32)))
+ goto out_disable_device;
if (restart_bfa(bfad) == -1)
goto out_disable_device;
diff --git a/drivers/scsi/bfa/bfad_im.c b/drivers/scsi/bfa/bfad_im.c
index c4a33317d344..394930cbaa13 100644
--- a/drivers/scsi/bfa/bfad_im.c
+++ b/drivers/scsi/bfa/bfad_im.c
@@ -817,7 +817,6 @@ struct scsi_host_template bfad_im_scsi_host_template = {
.this_id = -1,
.sg_tablesize = BFAD_IO_MAX_SGE,
.cmd_per_lun = 3,
- .use_clustering = ENABLE_CLUSTERING,
.shost_attrs = bfad_im_host_attrs,
.max_sectors = BFAD_MAX_SECTORS,
.vendor_id = BFA_PCI_VENDOR_ID_BROCADE,
@@ -840,7 +839,6 @@ struct scsi_host_template bfad_im_vport_template = {
.this_id = -1,
.sg_tablesize = BFAD_IO_MAX_SGE,
.cmd_per_lun = 3,
- .use_clustering = ENABLE_CLUSTERING,
.shost_attrs = bfad_im_vport_attrs,
.max_sectors = BFAD_MAX_SECTORS,
};
diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
index bcd30e2374f1..2e4e7159ebf9 100644
--- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
+++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
@@ -2970,7 +2970,6 @@ static struct scsi_host_template bnx2fc_shost_template = {
.change_queue_depth = scsi_change_queue_depth,
.this_id = -1,
.cmd_per_lun = 3,
- .use_clustering = ENABLE_CLUSTERING,
.sg_tablesize = BNX2FC_MAX_BDS_PER_CMD,
.max_sectors = 1024,
.track_queue_depth = 1,
diff --git a/drivers/scsi/bnx2i/bnx2i_hwi.c b/drivers/scsi/bnx2i/bnx2i_hwi.c
index e9e669a6c2bc..91f5316aa3ab 100644
--- a/drivers/scsi/bnx2i/bnx2i_hwi.c
+++ b/drivers/scsi/bnx2i/bnx2i_hwi.c
@@ -1906,7 +1906,6 @@ static int bnx2i_queue_scsi_cmd_resp(struct iscsi_session *session,
struct iscsi_task *task;
struct scsi_cmnd *sc;
int rc = 0;
- int cpu;
spin_lock(&session->back_lock);
task = iscsi_itt_to_task(bnx2i_conn->cls_conn->dd_data,
@@ -1917,14 +1916,9 @@ static int bnx2i_queue_scsi_cmd_resp(struct iscsi_session *session,
}
sc = task->sc;
- if (!blk_rq_cpu_valid(sc->request))
- cpu = smp_processor_id();
- else
- cpu = sc->request->cpu;
-
spin_unlock(&session->back_lock);
- p = &per_cpu(bnx2i_percpu, cpu);
+ p = &per_cpu(bnx2i_percpu, blk_mq_rq_cpu(sc->request));
spin_lock(&p->p_work_lock);
if (unlikely(!p->iothread)) {
rc = -EINVAL;
@@ -2433,7 +2427,6 @@ static void bnx2i_process_ofld_cmpl(struct bnx2i_hba *hba,
{
u32 cid_addr;
struct bnx2i_endpoint *ep;
- u32 cid_num;
ep = bnx2i_find_ep_in_ofld_list(hba, ofld_kcqe->iscsi_conn_id);
if (!ep) {
@@ -2468,7 +2461,6 @@ static void bnx2i_process_ofld_cmpl(struct bnx2i_hba *hba,
} else {
ep->state = EP_STATE_OFLD_COMPL;
cid_addr = ofld_kcqe->iscsi_conn_context_id;
- cid_num = bnx2i_get_cid_num(ep);
ep->ep_cid = cid_addr;
ep->qp.ctx_base = NULL;
}
diff --git a/drivers/scsi/bnx2i/bnx2i_iscsi.c b/drivers/scsi/bnx2i/bnx2i_iscsi.c
index de0a507577ef..69c75426c5eb 100644
--- a/drivers/scsi/bnx2i/bnx2i_iscsi.c
+++ b/drivers/scsi/bnx2i/bnx2i_iscsi.c
@@ -2263,7 +2263,6 @@ static struct scsi_host_template bnx2i_host_template = {
.max_sectors = 127,
.cmd_per_lun = 128,
.this_id = -1,
- .use_clustering = ENABLE_CLUSTERING,
.sg_tablesize = ISCSI_MAX_BDS_PER_CMD,
.shost_attrs = bnx2i_dev_attributes,
.track_queue_depth = 1,
diff --git a/drivers/scsi/csiostor/csio_init.c b/drivers/scsi/csiostor/csio_init.c
index 1a458ce08210..cf629380a981 100644
--- a/drivers/scsi/csiostor/csio_init.c
+++ b/drivers/scsi/csiostor/csio_init.c
@@ -255,7 +255,6 @@ static void
csio_hw_exit_workers(struct csio_hw *hw)
{
cancel_work_sync(&hw->evtq_work);
- flush_scheduled_work();
}
static int
@@ -646,7 +645,7 @@ csio_shost_init(struct csio_hw *hw, struct device *dev,
if (csio_lnode_init(ln, hw, pln))
goto err_shost_put;
- if (scsi_add_host(shost, dev))
+ if (scsi_add_host_with_dma(shost, dev, &hw->pdev->dev))
goto err_lnode_exit;
return ln;
diff --git a/drivers/scsi/csiostor/csio_scsi.c b/drivers/scsi/csiostor/csio_scsi.c
index 8c15b7acb4b7..bc5547a62c00 100644
--- a/drivers/scsi/csiostor/csio_scsi.c
+++ b/drivers/scsi/csiostor/csio_scsi.c
@@ -1780,16 +1780,10 @@ csio_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmnd)
int nsge = 0;
int rv = SCSI_MLQUEUE_HOST_BUSY, nr;
int retval;
- int cpu;
struct csio_scsi_qset *sqset;
struct fc_rport *rport = starget_to_rport(scsi_target(cmnd->device));
- if (!blk_rq_cpu_valid(cmnd->request))
- cpu = smp_processor_id();
- else
- cpu = cmnd->request->cpu;
-
- sqset = &hw->sqset[ln->portid][cpu];
+ sqset = &hw->sqset[ln->portid][blk_mq_rq_cpu(cmnd->request)];
nr = fc_remote_port_chkready(rport);
if (nr) {
@@ -2280,7 +2274,6 @@ struct scsi_host_template csio_fcoe_shost_template = {
.this_id = -1,
.sg_tablesize = CSIO_SCSI_MAX_SGE,
.cmd_per_lun = CSIO_MAX_CMD_PER_LUN,
- .use_clustering = ENABLE_CLUSTERING,
.shost_attrs = csio_fcoe_lport_attrs,
.max_sectors = CSIO_MAX_SECTOR_SIZE,
};
@@ -2300,7 +2293,6 @@ struct scsi_host_template csio_fcoe_shost_vport_template = {
.this_id = -1,
.sg_tablesize = CSIO_SCSI_MAX_SGE,
.cmd_per_lun = CSIO_MAX_CMD_PER_LUN,
- .use_clustering = ENABLE_CLUSTERING,
.shost_attrs = csio_fcoe_vport_attrs,
.max_sectors = CSIO_MAX_SECTOR_SIZE,
};
diff --git a/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c b/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c
index bf07735275a4..8a20411699d9 100644
--- a/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c
+++ b/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c
@@ -95,7 +95,7 @@ static struct scsi_host_template cxgb3i_host_template = {
.eh_device_reset_handler = iscsi_eh_device_reset,
.eh_target_reset_handler = iscsi_eh_recover_target,
.target_alloc = iscsi_target_alloc,
- .use_clustering = DISABLE_CLUSTERING,
+ .dma_boundary = PAGE_SIZE - 1,
.this_id = -1,
.track_queue_depth = 1,
};
diff --git a/drivers/scsi/cxgbi/cxgb4i/Kconfig b/drivers/scsi/cxgbi/cxgb4i/Kconfig
index 594f593c8821..f36b76e8e12c 100644
--- a/drivers/scsi/cxgbi/cxgb4i/Kconfig
+++ b/drivers/scsi/cxgbi/cxgb4i/Kconfig
@@ -1,8 +1,8 @@
config SCSI_CXGB4_ISCSI
tristate "Chelsio T4 iSCSI support"
depends on PCI && INET && (IPV6 || IPV6=n)
- select NETDEVICES
- select ETHERNET
+ depends on THERMAL || !THERMAL
+ depends on ETHERNET
select NET_VENDOR_CHELSIO
select CHELSIO_T4
select CHELSIO_LIB
diff --git a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c
index 907dd8792a0a..49f8028ac524 100644
--- a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c
+++ b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c
@@ -113,7 +113,7 @@ static struct scsi_host_template cxgb4i_host_template = {
.eh_device_reset_handler = iscsi_eh_device_reset,
.eh_target_reset_handler = iscsi_eh_recover_target,
.target_alloc = iscsi_target_alloc,
- .use_clustering = DISABLE_CLUSTERING,
+ .dma_boundary = PAGE_SIZE - 1,
.this_id = -1,
.track_queue_depth = 1,
};
diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c
index 6637116529aa..bfa13e3b191c 100644
--- a/drivers/scsi/cxlflash/main.c
+++ b/drivers/scsi/cxlflash/main.c
@@ -3088,12 +3088,6 @@ static ssize_t hwq_mode_store(struct device *dev,
return -EINVAL;
}
- if ((mode == HWQ_MODE_TAG) && !shost_use_blk_mq(shost)) {
- dev_info(cfgdev, "SCSI-MQ is not enabled, use a different "
- "HWQ steering mode.\n");
- return -EINVAL;
- }
-
afu->hwq_mode = mode;
return count;
@@ -3180,7 +3174,6 @@ static struct scsi_host_template driver_template = {
.this_id = -1,
.sg_tablesize = 1, /* No scatter gather support */
.max_sectors = CXLFLASH_MAX_SECTORS,
- .use_clustering = ENABLE_CLUSTERING,
.shost_attrs = cxlflash_host_attrs,
.sdev_attrs = cxlflash_dev_attrs,
};
diff --git a/drivers/scsi/dc395x.c b/drivers/scsi/dc395x.c
index 8c55ec6e1827..13fbb2eab842 100644
--- a/drivers/scsi/dc395x.c
+++ b/drivers/scsi/dc395x.c
@@ -4631,7 +4631,7 @@ static struct scsi_host_template dc395x_driver_template = {
.cmd_per_lun = DC395x_MAX_CMD_PER_LUN,
.eh_abort_handler = dc395x_eh_abort,
.eh_bus_reset_handler = dc395x_eh_bus_reset,
- .use_clustering = DISABLE_CLUSTERING,
+ .dma_boundary = PAGE_SIZE - 1,
};
diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c
index 12dc7100bb4c..d7ac498ba35a 100644
--- a/drivers/scsi/device_handler/scsi_dh_alua.c
+++ b/drivers/scsi/device_handler/scsi_dh_alua.c
@@ -1071,28 +1071,29 @@ static void alua_check(struct scsi_device *sdev, bool force)
* Fail I/O to all paths not in state
* active/optimized or active/non-optimized.
*/
-static int alua_prep_fn(struct scsi_device *sdev, struct request *req)
+static blk_status_t alua_prep_fn(struct scsi_device *sdev, struct request *req)
{
struct alua_dh_data *h = sdev->handler_data;
struct alua_port_group *pg;
unsigned char state = SCSI_ACCESS_STATE_OPTIMAL;
- int ret = BLKPREP_OK;
rcu_read_lock();
pg = rcu_dereference(h->pg);
if (pg)
state = pg->state;
rcu_read_unlock();
- if (state == SCSI_ACCESS_STATE_TRANSITIONING)
- ret = BLKPREP_DEFER;
- else if (state != SCSI_ACCESS_STATE_OPTIMAL &&
- state != SCSI_ACCESS_STATE_ACTIVE &&
- state != SCSI_ACCESS_STATE_LBA) {
- ret = BLKPREP_KILL;
+
+ switch (state) {
+ case SCSI_ACCESS_STATE_OPTIMAL:
+ case SCSI_ACCESS_STATE_ACTIVE:
+ case SCSI_ACCESS_STATE_LBA:
+ return BLK_STS_OK;
+ case SCSI_ACCESS_STATE_TRANSITIONING:
+ return BLK_STS_RESOURCE;
+ default:
req->rq_flags |= RQF_QUIET;
+ return BLK_STS_IOERR;
}
- return ret;
-
}
static void alua_rescan(struct scsi_device *sdev)
diff --git a/drivers/scsi/device_handler/scsi_dh_emc.c b/drivers/scsi/device_handler/scsi_dh_emc.c
index 95c47909a58f..bea8e13febb6 100644
--- a/drivers/scsi/device_handler/scsi_dh_emc.c
+++ b/drivers/scsi/device_handler/scsi_dh_emc.c
@@ -341,17 +341,17 @@ static int clariion_check_sense(struct scsi_device *sdev,
return SCSI_RETURN_NOT_HANDLED;
}
-static int clariion_prep_fn(struct scsi_device *sdev, struct request *req)
+static blk_status_t clariion_prep_fn(struct scsi_device *sdev,
+ struct request *req)
{
struct clariion_dh_data *h = sdev->handler_data;
- int ret = BLKPREP_OK;
if (h->lun_state != CLARIION_LUN_OWNED) {
- ret = BLKPREP_KILL;
req->rq_flags |= RQF_QUIET;
+ return BLK_STS_IOERR;
}
- return ret;
+ return BLK_STS_OK;
}
static int clariion_std_inquiry(struct scsi_device *sdev,
diff --git a/drivers/scsi/device_handler/scsi_dh_hp_sw.c b/drivers/scsi/device_handler/scsi_dh_hp_sw.c
index e65a0ebb4b54..80129b033855 100644
--- a/drivers/scsi/device_handler/scsi_dh_hp_sw.c
+++ b/drivers/scsi/device_handler/scsi_dh_hp_sw.c
@@ -172,17 +172,16 @@ retry:
return rc;
}
-static int hp_sw_prep_fn(struct scsi_device *sdev, struct request *req)
+static blk_status_t hp_sw_prep_fn(struct scsi_device *sdev, struct request *req)
{
struct hp_sw_dh_data *h = sdev->handler_data;
- int ret = BLKPREP_OK;
if (h->path_state != HP_SW_PATH_ACTIVE) {
- ret = BLKPREP_KILL;
req->rq_flags |= RQF_QUIET;
+ return BLK_STS_IOERR;
}
- return ret;
+ return BLK_STS_OK;
}
/*
diff --git a/drivers/scsi/device_handler/scsi_dh_rdac.c b/drivers/scsi/device_handler/scsi_dh_rdac.c
index d27fabae8ddd..65f1fe343c64 100644
--- a/drivers/scsi/device_handler/scsi_dh_rdac.c
+++ b/drivers/scsi/device_handler/scsi_dh_rdac.c
@@ -642,17 +642,16 @@ done:
return 0;
}
-static int rdac_prep_fn(struct scsi_device *sdev, struct request *req)
+static blk_status_t rdac_prep_fn(struct scsi_device *sdev, struct request *req)
{
struct rdac_dh_data *h = sdev->handler_data;
- int ret = BLKPREP_OK;
if (h->state != RDAC_STATE_ACTIVE) {
- ret = BLKPREP_KILL;
req->rq_flags |= RQF_QUIET;
+ return BLK_STS_IOERR;
}
- return ret;
+ return BLK_STS_OK;
}
static int rdac_check_sense(struct scsi_device *sdev,
diff --git a/drivers/scsi/dmx3191d.c b/drivers/scsi/dmx3191d.c
index 003c3d726238..8db1cc552932 100644
--- a/drivers/scsi/dmx3191d.c
+++ b/drivers/scsi/dmx3191d.c
@@ -63,7 +63,7 @@ static struct scsi_host_template dmx3191d_driver_template = {
.this_id = 7,
.sg_tablesize = SG_ALL,
.cmd_per_lun = 2,
- .use_clustering = DISABLE_CLUSTERING,
+ .dma_boundary = PAGE_SIZE - 1,
.cmd_size = NCR5380_CMD_SIZE,
};
diff --git a/drivers/scsi/dpt_i2o.c b/drivers/scsi/dpt_i2o.c
index 37de8fb186d7..70d1a18278af 100644
--- a/drivers/scsi/dpt_i2o.c
+++ b/drivers/scsi/dpt_i2o.c
@@ -934,15 +934,15 @@ static int adpt_install_hba(struct scsi_host_template* sht, struct pci_dev* pDev
* See if we should enable dma64 mode.
*/
if (sizeof(dma_addr_t) > 4 &&
- pci_set_dma_mask(pDev, DMA_BIT_MASK(64)) == 0) {
- if (dma_get_required_mask(&pDev->dev) > DMA_BIT_MASK(32))
- dma64 = 1;
- }
- if (!dma64 && pci_set_dma_mask(pDev, DMA_BIT_MASK(32)) != 0)
+ dma_get_required_mask(&pDev->dev) > DMA_BIT_MASK(32) &&
+ dma_set_mask(&pDev->dev, DMA_BIT_MASK(64)) == 0)
+ dma64 = 1;
+
+ if (!dma64 && dma_set_mask(&pDev->dev, DMA_BIT_MASK(32)) != 0)
return -EINVAL;
/* adapter only supports message blocks below 4GB */
- pci_set_consistent_dma_mask(pDev, DMA_BIT_MASK(32));
+ dma_set_coherent_mask(&pDev->dev, DMA_BIT_MASK(32));
base_addr0_phys = pci_resource_start(pDev,0);
hba_map0_area_size = pci_resource_len(pDev,0);
@@ -3569,7 +3569,6 @@ static struct scsi_host_template driver_template = {
.slave_configure = adpt_slave_configure,
.can_queue = MAX_TO_IOP_MESSAGES,
.this_id = 7,
- .use_clustering = ENABLE_CLUSTERING,
};
static int __init adpt_init(void)
diff --git a/drivers/scsi/esas2r/esas2r_init.c b/drivers/scsi/esas2r/esas2r_init.c
index bbe77db8938d..46b2c83ba21f 100644
--- a/drivers/scsi/esas2r/esas2r_init.c
+++ b/drivers/scsi/esas2r/esas2r_init.c
@@ -266,6 +266,7 @@ int esas2r_init_adapter(struct Scsi_Host *host, struct pci_dev *pcid,
int i;
void *next_uncached;
struct esas2r_request *first_request, *last_request;
+ bool dma64 = false;
if (index >= MAX_ADAPTERS) {
esas2r_log(ESAS2R_LOG_CRIT,
@@ -286,42 +287,20 @@ int esas2r_init_adapter(struct Scsi_Host *host, struct pci_dev *pcid,
a->pcid = pcid;
a->host = host;
- if (sizeof(dma_addr_t) > 4) {
- const uint64_t required_mask = dma_get_required_mask
- (&pcid->dev);
- if (required_mask > DMA_BIT_MASK(32)
- && !pci_set_dma_mask(pcid, DMA_BIT_MASK(64))
- && !pci_set_consistent_dma_mask(pcid,
- DMA_BIT_MASK(64))) {
- esas2r_log_dev(ESAS2R_LOG_INFO,
- &(a->pcid->dev),
- "64-bit PCI addressing enabled\n");
- } else if (!pci_set_dma_mask(pcid, DMA_BIT_MASK(32))
- && !pci_set_consistent_dma_mask(pcid,
- DMA_BIT_MASK(32))) {
- esas2r_log_dev(ESAS2R_LOG_INFO,
- &(a->pcid->dev),
- "32-bit PCI addressing enabled\n");
- } else {
- esas2r_log(ESAS2R_LOG_CRIT,
- "failed to set DMA mask");
- esas2r_kill_adapter(index);
- return 0;
- }
- } else {
- if (!pci_set_dma_mask(pcid, DMA_BIT_MASK(32))
- && !pci_set_consistent_dma_mask(pcid,
- DMA_BIT_MASK(32))) {
- esas2r_log_dev(ESAS2R_LOG_INFO,
- &(a->pcid->dev),
- "32-bit PCI addressing enabled\n");
- } else {
- esas2r_log(ESAS2R_LOG_CRIT,
- "failed to set DMA mask");
- esas2r_kill_adapter(index);
- return 0;
- }
+ if (sizeof(dma_addr_t) > 4 &&
+ dma_get_required_mask(&pcid->dev) > DMA_BIT_MASK(32) &&
+ !dma_set_mask_and_coherent(&pcid->dev, DMA_BIT_MASK(64)))
+ dma64 = true;
+
+ if (!dma64 && dma_set_mask_and_coherent(&pcid->dev, DMA_BIT_MASK(32))) {
+ esas2r_log(ESAS2R_LOG_CRIT, "failed to set DMA mask");
+ esas2r_kill_adapter(index);
+ return 0;
}
+
+ esas2r_log_dev(ESAS2R_LOG_INFO, &pcid->dev,
+ "%s-bit PCI addressing enabled\n", dma64 ? "64" : "32");
+
esas2r_adapters[index] = a;
sprintf(a->name, ESAS2R_DRVR_NAME "_%02d", index);
esas2r_debug("new adapter %p, name %s", a, a->name);
diff --git a/drivers/scsi/esas2r/esas2r_main.c b/drivers/scsi/esas2r/esas2r_main.c
index c07118617d89..64397d441bae 100644
--- a/drivers/scsi/esas2r/esas2r_main.c
+++ b/drivers/scsi/esas2r/esas2r_main.c
@@ -250,7 +250,6 @@ static struct scsi_host_template driver_template = {
ESAS2R_DEFAULT_CMD_PER_LUN,
.present = 0,
.unchecked_isa_dma = 0,
- .use_clustering = ENABLE_CLUSTERING,
.emulated = 0,
.proc_name = ESAS2R_DRVR_NAME,
.change_queue_depth = scsi_change_queue_depth,
diff --git a/drivers/scsi/esp_scsi.c b/drivers/scsi/esp_scsi.c
index ac7da9db7317..465df475f753 100644
--- a/drivers/scsi/esp_scsi.c
+++ b/drivers/scsi/esp_scsi.c
@@ -2676,7 +2676,6 @@ struct scsi_host_template scsi_esp_template = {
.can_queue = 7,
.this_id = 7,
.sg_tablesize = SG_ALL,
- .use_clustering = ENABLE_CLUSTERING,
.max_sectors = 0xffff,
.skip_settle_delay = 1,
};
diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c
index f46b312d04bc..cd19be3f3405 100644
--- a/drivers/scsi/fcoe/fcoe.c
+++ b/drivers/scsi/fcoe/fcoe.c
@@ -286,7 +286,6 @@ static struct scsi_host_template fcoe_shost_template = {
.this_id = -1,
.cmd_per_lun = 3,
.can_queue = FCOE_MAX_OUTSTANDING_COMMANDS,
- .use_clustering = ENABLE_CLUSTERING,
.sg_tablesize = SG_ALL,
.max_sectors = 0xffff,
.track_queue_depth = 1,
@@ -1670,7 +1669,6 @@ static void fcoe_recv_frame(struct sk_buff *skb)
struct fc_stats *stats;
struct fcoe_crc_eof crc_eof;
struct fc_frame *fp;
- struct fcoe_port *port;
struct fcoe_hdr *hp;
fr = fcoe_dev_from_skb(skb);
@@ -1688,7 +1686,6 @@ static void fcoe_recv_frame(struct sk_buff *skb)
skb_end_pointer(skb), skb->csum,
skb->dev ? skb->dev->name : "<NULL>");
- port = lport_priv(lport);
skb_linearize(skb); /* check for skb_is_nonlinear is within skb_linearize */
/*
@@ -1859,7 +1856,6 @@ static int fcoe_device_notification(struct notifier_block *notifier,
struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
struct fcoe_ctlr *ctlr;
struct fcoe_interface *fcoe;
- struct fcoe_port *port;
struct fc_stats *stats;
u32 link_possible = 1;
u32 mfs;
@@ -1897,7 +1893,6 @@ static int fcoe_device_notification(struct notifier_block *notifier,
break;
case NETDEV_UNREGISTER:
list_del(&fcoe->list);
- port = lport_priv(ctlr->lp);
fcoe_vport_remove(lport);
mutex_lock(&fcoe_config_mutex);
fcoe_if_destroy(lport);
diff --git a/drivers/scsi/fnic/fnic_main.c b/drivers/scsi/fnic/fnic_main.c
index cc461fd7bef1..5b3534b0deda 100644
--- a/drivers/scsi/fnic/fnic_main.c
+++ b/drivers/scsi/fnic/fnic_main.c
@@ -115,7 +115,6 @@ static struct scsi_host_template fnic_host_template = {
.this_id = -1,
.cmd_per_lun = 3,
.can_queue = FNIC_DFLT_IO_REQ,
- .use_clustering = ENABLE_CLUSTERING,
.sg_tablesize = FNIC_MAX_SG_DESC_CNT,
.max_sectors = 0xffff,
.shost_attrs = fnic_attrs,
diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c
index 96acfcecd540..cafbcfb85bfa 100644
--- a/drivers/scsi/fnic/fnic_scsi.c
+++ b/drivers/scsi/fnic/fnic_scsi.c
@@ -2274,7 +2274,7 @@ fnic_scsi_host_start_tag(struct fnic *fnic, struct scsi_cmnd *sc)
return SCSI_NO_TAG;
sc->tag = sc->request->tag = dummy->tag;
- sc->request->special = sc;
+ sc->host_scribble = (unsigned char *)dummy;
return dummy->tag;
}
@@ -2286,7 +2286,7 @@ fnic_scsi_host_start_tag(struct fnic *fnic, struct scsi_cmnd *sc)
static inline void
fnic_scsi_host_end_tag(struct fnic *fnic, struct scsi_cmnd *sc)
{
- struct request *dummy = sc->request->special;
+ struct request *dummy = (struct request *)sc->host_scribble;
blk_mq_free_request(dummy);
}
diff --git a/drivers/scsi/fnic/fnic_trace.c b/drivers/scsi/fnic/fnic_trace.c
index 8271785bdb93..bf0fd2aeb92e 100644
--- a/drivers/scsi/fnic/fnic_trace.c
+++ b/drivers/scsi/fnic/fnic_trace.c
@@ -468,14 +468,13 @@ int fnic_trace_buf_init(void)
fnic_max_trace_entries = (trace_max_pages * PAGE_SIZE)/
FNIC_ENTRY_SIZE_BYTES;
- fnic_trace_buf_p = (unsigned long)vmalloc((trace_max_pages * PAGE_SIZE));
+ fnic_trace_buf_p = (unsigned long)vzalloc(trace_max_pages * PAGE_SIZE);
if (!fnic_trace_buf_p) {
printk(KERN_ERR PFX "Failed to allocate memory "
"for fnic_trace_buf_p\n");
err = -ENOMEM;
goto err_fnic_trace_buf_init;
}
- memset((void *)fnic_trace_buf_p, 0, (trace_max_pages * PAGE_SIZE));
fnic_trace_entries.page_offset =
vmalloc(array_size(fnic_max_trace_entries,
diff --git a/drivers/scsi/g_NCR5380.c b/drivers/scsi/g_NCR5380.c
index fc538181f8df..9cdca0625498 100644
--- a/drivers/scsi/g_NCR5380.c
+++ b/drivers/scsi/g_NCR5380.c
@@ -700,7 +700,7 @@ static struct scsi_host_template driver_template = {
.this_id = 7,
.sg_tablesize = SG_ALL,
.cmd_per_lun = 2,
- .use_clustering = DISABLE_CLUSTERING,
+ .dma_boundary = PAGE_SIZE - 1,
.cmd_size = NCR5380_CMD_SIZE,
.max_sectors = 128,
};
diff --git a/drivers/scsi/gdth.c b/drivers/scsi/gdth.c
index 16709735b546..194c294f9b6c 100644
--- a/drivers/scsi/gdth.c
+++ b/drivers/scsi/gdth.c
@@ -4680,7 +4680,6 @@ static struct scsi_host_template gdth_template = {
.sg_tablesize = GDTH_MAXSG,
.cmd_per_lun = GDTH_MAXC_P_L,
.unchecked_isa_dma = 1,
- .use_clustering = ENABLE_CLUSTERING,
.no_write_same = 1,
};
diff --git a/drivers/scsi/gvp11.c b/drivers/scsi/gvp11.c
index a27fc49ebd3a..d2acd0d826e2 100644
--- a/drivers/scsi/gvp11.c
+++ b/drivers/scsi/gvp11.c
@@ -184,7 +184,7 @@ static struct scsi_host_template gvp11_scsi_template = {
.this_id = 7,
.sg_tablesize = SG_ALL,
.cmd_per_lun = CMD_PER_LUN,
- .use_clustering = DISABLE_CLUSTERING
+ .dma_boundary = PAGE_SIZE - 1,
};
static int check_wd33c93(struct gvp11_scsiregs *regs)
diff --git a/drivers/scsi/hisi_sas/hisi_sas.h b/drivers/scsi/hisi_sas/hisi_sas.h
index 0ddb53c8a2e2..af291947a54d 100644
--- a/drivers/scsi/hisi_sas/hisi_sas.h
+++ b/drivers/scsi/hisi_sas/hisi_sas.h
@@ -69,6 +69,12 @@
#define HISI_SAS_SATA_PROTOCOL_FPDMA 0x8
#define HISI_SAS_SATA_PROTOCOL_ATAPI 0x10
+#define HISI_SAS_DIF_PROT_MASK (SHOST_DIF_TYPE1_PROTECTION | \
+ SHOST_DIF_TYPE2_PROTECTION | \
+ SHOST_DIF_TYPE3_PROTECTION)
+
+#define HISI_SAS_PROT_MASK (HISI_SAS_DIF_PROT_MASK)
+
struct hisi_hba;
enum {
@@ -211,7 +217,7 @@ struct hisi_sas_slot {
/* Do not reorder/change members after here */
void *buf;
dma_addr_t buf_dma;
- int idx;
+ u16 idx;
};
struct hisi_sas_hw {
@@ -268,6 +274,8 @@ struct hisi_hba {
struct pci_dev *pci_dev;
struct device *dev;
+ int prot_mask;
+
void __iomem *regs;
void __iomem *sgpio_regs;
struct regmap *ctrl;
@@ -322,6 +330,8 @@ struct hisi_hba {
unsigned long sata_dev_bitmap[BITS_TO_LONGS(HISI_SAS_MAX_DEVICES)];
struct work_struct rst_work;
u32 phy_state;
+ u32 intr_coal_ticks; /* Time of interrupt coalesce in us */
+ u32 intr_coal_count; /* Interrupt count to coalesce */
};
/* Generic HW DMA host memory structures */
@@ -468,7 +478,6 @@ extern int hisi_sas_remove(struct platform_device *pdev);
extern int hisi_sas_slave_configure(struct scsi_device *sdev);
extern int hisi_sas_scan_finished(struct Scsi_Host *shost, unsigned long time);
extern void hisi_sas_scan_start(struct Scsi_Host *shost);
-extern struct device_attribute *host_attrs[];
extern int hisi_sas_host_reset(struct Scsi_Host *shost, int reset_type);
extern void hisi_sas_phy_down(struct hisi_hba *hisi_hba, int phy_no, int rdy);
extern void hisi_sas_slot_task_free(struct hisi_hba *hisi_hba,
diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c
index b3f01d5b821b..eed7fc5b3389 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_main.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_main.c
@@ -296,42 +296,109 @@ static void hisi_sas_task_prep_abort(struct hisi_hba *hisi_hba,
device_id, abort_flag, tag_to_abort);
}
+static void hisi_sas_dma_unmap(struct hisi_hba *hisi_hba,
+ struct sas_task *task, int n_elem,
+ int n_elem_req, int n_elem_resp)
+{
+ struct device *dev = hisi_hba->dev;
+
+ if (!sas_protocol_ata(task->task_proto)) {
+ if (task->num_scatter) {
+ if (n_elem)
+ dma_unmap_sg(dev, task->scatter,
+ task->num_scatter,
+ task->data_dir);
+ } else if (task->task_proto & SAS_PROTOCOL_SMP) {
+ if (n_elem_req)
+ dma_unmap_sg(dev, &task->smp_task.smp_req,
+ 1, DMA_TO_DEVICE);
+ if (n_elem_resp)
+ dma_unmap_sg(dev, &task->smp_task.smp_resp,
+ 1, DMA_FROM_DEVICE);
+ }
+ }
+}
+
+static int hisi_sas_dma_map(struct hisi_hba *hisi_hba,
+ struct sas_task *task, int *n_elem,
+ int *n_elem_req, int *n_elem_resp)
+{
+ struct device *dev = hisi_hba->dev;
+ int rc;
+
+ if (sas_protocol_ata(task->task_proto)) {
+ *n_elem = task->num_scatter;
+ } else {
+ unsigned int req_len, resp_len;
+
+ if (task->num_scatter) {
+ *n_elem = dma_map_sg(dev, task->scatter,
+ task->num_scatter, task->data_dir);
+ if (!*n_elem) {
+ rc = -ENOMEM;
+ goto prep_out;
+ }
+ } else if (task->task_proto & SAS_PROTOCOL_SMP) {
+ *n_elem_req = dma_map_sg(dev, &task->smp_task.smp_req,
+ 1, DMA_TO_DEVICE);
+ if (!*n_elem_req) {
+ rc = -ENOMEM;
+ goto prep_out;
+ }
+ req_len = sg_dma_len(&task->smp_task.smp_req);
+ if (req_len & 0x3) {
+ rc = -EINVAL;
+ goto err_out_dma_unmap;
+ }
+ *n_elem_resp = dma_map_sg(dev, &task->smp_task.smp_resp,
+ 1, DMA_FROM_DEVICE);
+ if (!*n_elem_resp) {
+ rc = -ENOMEM;
+ goto err_out_dma_unmap;
+ }
+ resp_len = sg_dma_len(&task->smp_task.smp_resp);
+ if (resp_len & 0x3) {
+ rc = -EINVAL;
+ goto err_out_dma_unmap;
+ }
+ }
+ }
+
+ if (*n_elem > HISI_SAS_SGE_PAGE_CNT) {
+ dev_err(dev, "task prep: n_elem(%d) > HISI_SAS_SGE_PAGE_CNT",
+ *n_elem);
+ rc = -EINVAL;
+ goto err_out_dma_unmap;
+ }
+ return 0;
+
+err_out_dma_unmap:
+ /* It would be better to call dma_unmap_sg() here, but it's messy */
+ hisi_sas_dma_unmap(hisi_hba, task, *n_elem,
+ *n_elem_req, *n_elem_resp);
+prep_out:
+ return rc;
+}
+
static int hisi_sas_task_prep(struct sas_task *task,
struct hisi_sas_dq **dq_pointer,
bool is_tmf, struct hisi_sas_tmf_task *tmf,
int *pass)
{
struct domain_device *device = task->dev;
- struct hisi_hba *hisi_hba;
+ struct hisi_hba *hisi_hba = dev_to_hisi_hba(device);
struct hisi_sas_device *sas_dev = device->lldd_dev;
struct hisi_sas_port *port;
struct hisi_sas_slot *slot;
struct hisi_sas_cmd_hdr *cmd_hdr_base;
struct asd_sas_port *sas_port = device->port;
- struct device *dev;
+ struct device *dev = hisi_hba->dev;
int dlvry_queue_slot, dlvry_queue, rc, slot_idx;
int n_elem = 0, n_elem_req = 0, n_elem_resp = 0;
struct hisi_sas_dq *dq;
unsigned long flags;
int wr_q_index;
- if (!sas_port) {
- struct task_status_struct *ts = &task->task_status;
-
- ts->resp = SAS_TASK_UNDELIVERED;
- ts->stat = SAS_PHY_DOWN;
- /*
- * libsas will use dev->port, should
- * not call task_done for sata
- */
- if (device->dev_type != SAS_SATA_DEV)
- task->task_done(task);
- return -ECOMM;
- }
-
- hisi_hba = dev_to_hisi_hba(device);
- dev = hisi_hba->dev;
-
if (DEV_IS_GONE(sas_dev)) {
if (sas_dev)
dev_info(dev, "task prep: device %d not ready\n",
@@ -355,49 +422,10 @@ static int hisi_sas_task_prep(struct sas_task *task,
return -ECOMM;
}
- if (!sas_protocol_ata(task->task_proto)) {
- unsigned int req_len, resp_len;
-
- if (task->num_scatter) {
- n_elem = dma_map_sg(dev, task->scatter,
- task->num_scatter, task->data_dir);
- if (!n_elem) {
- rc = -ENOMEM;
- goto prep_out;
- }
- } else if (task->task_proto & SAS_PROTOCOL_SMP) {
- n_elem_req = dma_map_sg(dev, &task->smp_task.smp_req,
- 1, DMA_TO_DEVICE);
- if (!n_elem_req) {
- rc = -ENOMEM;
- goto prep_out;
- }
- req_len = sg_dma_len(&task->smp_task.smp_req);
- if (req_len & 0x3) {
- rc = -EINVAL;
- goto err_out_dma_unmap;
- }
- n_elem_resp = dma_map_sg(dev, &task->smp_task.smp_resp,
- 1, DMA_FROM_DEVICE);
- if (!n_elem_resp) {
- rc = -ENOMEM;
- goto err_out_dma_unmap;
- }
- resp_len = sg_dma_len(&task->smp_task.smp_resp);
- if (resp_len & 0x3) {
- rc = -EINVAL;
- goto err_out_dma_unmap;
- }
- }
- } else
- n_elem = task->num_scatter;
-
- if (n_elem > HISI_SAS_SGE_PAGE_CNT) {
- dev_err(dev, "task prep: n_elem(%d) > HISI_SAS_SGE_PAGE_CNT",
- n_elem);
- rc = -EINVAL;
- goto err_out_dma_unmap;
- }
+ rc = hisi_sas_dma_map(hisi_hba, task, &n_elem,
+ &n_elem_req, &n_elem_resp);
+ if (rc < 0)
+ goto prep_out;
if (hisi_hba->hw->slot_index_alloc)
rc = hisi_hba->hw->slot_index_alloc(hisi_hba, device);
@@ -482,19 +510,8 @@ static int hisi_sas_task_prep(struct sas_task *task,
err_out_tag:
hisi_sas_slot_index_free(hisi_hba, slot_idx);
err_out_dma_unmap:
- if (!sas_protocol_ata(task->task_proto)) {
- if (task->num_scatter) {
- dma_unmap_sg(dev, task->scatter, task->num_scatter,
- task->data_dir);
- } else if (task->task_proto & SAS_PROTOCOL_SMP) {
- if (n_elem_req)
- dma_unmap_sg(dev, &task->smp_task.smp_req,
- 1, DMA_TO_DEVICE);
- if (n_elem_resp)
- dma_unmap_sg(dev, &task->smp_task.smp_resp,
- 1, DMA_FROM_DEVICE);
- }
- }
+ hisi_sas_dma_unmap(hisi_hba, task, n_elem,
+ n_elem_req, n_elem_resp);
prep_out:
dev_err(dev, "task prep: failed[%d]!\n", rc);
return rc;
@@ -506,10 +523,29 @@ static int hisi_sas_task_exec(struct sas_task *task, gfp_t gfp_flags,
u32 rc;
u32 pass = 0;
unsigned long flags;
- struct hisi_hba *hisi_hba = dev_to_hisi_hba(task->dev);
- struct device *dev = hisi_hba->dev;
+ struct hisi_hba *hisi_hba;
+ struct device *dev;
+ struct domain_device *device = task->dev;
+ struct asd_sas_port *sas_port = device->port;
struct hisi_sas_dq *dq = NULL;
+ if (!sas_port) {
+ struct task_status_struct *ts = &task->task_status;
+
+ ts->resp = SAS_TASK_UNDELIVERED;
+ ts->stat = SAS_PHY_DOWN;
+ /*
+ * libsas will use dev->port, should
+ * not call task_done for sata
+ */
+ if (device->dev_type != SAS_SATA_DEV)
+ task->task_done(task);
+ return -ECOMM;
+ }
+
+ hisi_hba = dev_to_hisi_hba(device);
+ dev = hisi_hba->dev;
+
if (unlikely(test_bit(HISI_SAS_REJECT_CMD_BIT, &hisi_hba->flags))) {
if (in_softirq())
return -EINVAL;
@@ -1459,12 +1495,12 @@ static int hisi_sas_abort_task(struct sas_task *task)
if (task->lldd_task && task->task_proto & SAS_PROTOCOL_SSP) {
struct scsi_cmnd *cmnd = task->uldd_task;
struct hisi_sas_slot *slot = task->lldd_task;
- u32 tag = slot->idx;
+ u16 tag = slot->idx;
int rc2;
int_to_scsilun(cmnd->device->lun, &lun);
tmf_task.tmf = TMF_ABORT_TASK;
- tmf_task.tag_of_task_to_be_managed = cpu_to_le16(tag);
+ tmf_task.tag_of_task_to_be_managed = tag;
rc = hisi_sas_debug_issue_ssp_tmf(task->dev, lun.scsi_lun,
&tmf_task);
@@ -1718,7 +1754,7 @@ static int hisi_sas_query_task(struct sas_task *task)
int_to_scsilun(cmnd->device->lun, &lun);
tmf_task.tmf = TMF_QUERY_TASK;
- tmf_task.tag_of_task_to_be_managed = cpu_to_le16(tag);
+ tmf_task.tag_of_task_to_be_managed = tag;
rc = hisi_sas_debug_issue_ssp_tmf(device,
lun.scsi_lun,
@@ -1994,12 +2030,6 @@ EXPORT_SYMBOL_GPL(hisi_sas_kill_tasklets);
struct scsi_transport_template *hisi_sas_stt;
EXPORT_SYMBOL_GPL(hisi_sas_stt);
-struct device_attribute *host_attrs[] = {
- &dev_attr_phy_event_threshold,
- NULL,
-};
-EXPORT_SYMBOL_GPL(host_attrs);
-
static struct sas_domain_function_template hisi_sas_transport_ops = {
.lldd_dev_found = hisi_sas_dev_found,
.lldd_dev_gone = hisi_sas_dev_gone,
@@ -2380,7 +2410,6 @@ int hisi_sas_probe(struct platform_device *pdev,
shost->max_lun = ~0;
shost->max_channel = 1;
shost->max_cmd_len = 16;
- shost->sg_tablesize = min_t(u16, SG_ALL, HISI_SAS_SGE_PAGE_CNT);
if (hisi_hba->hw->slot_index_alloc) {
shost->can_queue = hisi_hba->hw->max_command_entries;
shost->cmd_per_lun = hisi_hba->hw->max_command_entries;
diff --git a/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c
index 8df822a4a1bd..28ab52a021cf 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c
@@ -510,6 +510,7 @@ static void setup_itct_v1_hw(struct hisi_hba *hisi_hba,
struct hisi_sas_itct *itct = &hisi_hba->itct[device_id];
struct asd_sas_port *sas_port = device->port;
struct hisi_sas_port *port = to_hisi_sas_port(sas_port);
+ u64 sas_addr;
memset(itct, 0, sizeof(*itct));
@@ -534,8 +535,8 @@ static void setup_itct_v1_hw(struct hisi_hba *hisi_hba,
itct->qw0 = cpu_to_le64(qw0);
/* qw1 */
- memcpy(&itct->sas_addr, device->sas_addr, SAS_ADDR_SIZE);
- itct->sas_addr = __swab64(itct->sas_addr);
+ memcpy(&sas_addr, device->sas_addr, SAS_ADDR_SIZE);
+ itct->sas_addr = cpu_to_le64(__swab64(sas_addr));
/* qw2 */
itct->qw2 = cpu_to_le64((500ULL << ITCT_HDR_IT_NEXUS_LOSS_TL_OFF) |
@@ -561,7 +562,7 @@ static void clear_itct_v1_hw(struct hisi_hba *hisi_hba,
reg_val &= ~CFG_AGING_TIME_ITCT_REL_MSK;
hisi_sas_write32(hisi_hba, CFG_AGING_TIME, reg_val);
- qw0 = cpu_to_le64(itct->qw0);
+ qw0 = le64_to_cpu(itct->qw0);
qw0 &= ~ITCT_HDR_VALID_MSK;
itct->qw0 = cpu_to_le64(qw0);
}
@@ -1100,7 +1101,7 @@ static void slot_err_v1_hw(struct hisi_hba *hisi_hba,
case SAS_PROTOCOL_SSP:
{
int error = -1;
- u32 dma_err_type = cpu_to_le32(err_record->dma_err_type);
+ u32 dma_err_type = le32_to_cpu(err_record->dma_err_type);
u32 dma_tx_err_type = ((dma_err_type &
ERR_HDR_DMA_TX_ERR_TYPE_MSK)) >>
ERR_HDR_DMA_TX_ERR_TYPE_OFF;
@@ -1108,9 +1109,9 @@ static void slot_err_v1_hw(struct hisi_hba *hisi_hba,
ERR_HDR_DMA_RX_ERR_TYPE_MSK)) >>
ERR_HDR_DMA_RX_ERR_TYPE_OFF;
u32 trans_tx_fail_type =
- cpu_to_le32(err_record->trans_tx_fail_type);
+ le32_to_cpu(err_record->trans_tx_fail_type);
u32 trans_rx_fail_type =
- cpu_to_le32(err_record->trans_rx_fail_type);
+ le32_to_cpu(err_record->trans_rx_fail_type);
if (dma_tx_err_type) {
/* dma tx err */
@@ -1558,7 +1559,7 @@ static irqreturn_t cq_interrupt_v1_hw(int irq, void *p)
u32 cmplt_hdr_data;
complete_hdr = &complete_queue[rd_point];
- cmplt_hdr_data = cpu_to_le32(complete_hdr->data);
+ cmplt_hdr_data = le32_to_cpu(complete_hdr->data);
idx = (cmplt_hdr_data & CMPLT_HDR_IPTT_MSK) >>
CMPLT_HDR_IPTT_OFF;
slot = &hisi_hba->slot_info[idx];
@@ -1797,6 +1798,11 @@ static int hisi_sas_v1_init(struct hisi_hba *hisi_hba)
return 0;
}
+static struct device_attribute *host_attrs_v1_hw[] = {
+ &dev_attr_phy_event_threshold,
+ NULL
+};
+
static struct scsi_host_template sht_v1_hw = {
.name = DRV_NAME,
.module = THIS_MODULE,
@@ -1808,14 +1814,13 @@ static struct scsi_host_template sht_v1_hw = {
.change_queue_depth = sas_change_queue_depth,
.bios_param = sas_bios_param,
.this_id = -1,
- .sg_tablesize = SG_ALL,
+ .sg_tablesize = HISI_SAS_SGE_PAGE_CNT,
.max_sectors = SCSI_DEFAULT_MAX_SECTORS,
- .use_clustering = ENABLE_CLUSTERING,
.eh_device_reset_handler = sas_eh_device_reset_handler,
.eh_target_reset_handler = sas_eh_target_reset_handler,
.target_destroy = sas_target_destroy,
.ioctl = sas_ioctl,
- .shost_attrs = host_attrs,
+ .shost_attrs = host_attrs_v1_hw,
};
static const struct hisi_sas_hw hisi_sas_v1_hw = {
diff --git a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c
index 77a85ead483e..c8ebff3ba559 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c
@@ -934,6 +934,7 @@ static void setup_itct_v2_hw(struct hisi_hba *hisi_hba,
struct domain_device *parent_dev = device->parent;
struct asd_sas_port *sas_port = device->port;
struct hisi_sas_port *port = to_hisi_sas_port(sas_port);
+ u64 sas_addr;
memset(itct, 0, sizeof(*itct));
@@ -966,8 +967,8 @@ static void setup_itct_v2_hw(struct hisi_hba *hisi_hba,
itct->qw0 = cpu_to_le64(qw0);
/* qw1 */
- memcpy(&itct->sas_addr, device->sas_addr, SAS_ADDR_SIZE);
- itct->sas_addr = __swab64(itct->sas_addr);
+ memcpy(&sas_addr, device->sas_addr, SAS_ADDR_SIZE);
+ itct->sas_addr = cpu_to_le64(__swab64(sas_addr));
/* qw2 */
if (!dev_is_sata(device))
@@ -2044,11 +2045,11 @@ static void slot_err_v2_hw(struct hisi_hba *hisi_hba,
struct task_status_struct *ts = &task->task_status;
struct hisi_sas_err_record_v2 *err_record =
hisi_sas_status_buf_addr_mem(slot);
- u32 trans_tx_fail_type = cpu_to_le32(err_record->trans_tx_fail_type);
- u32 trans_rx_fail_type = cpu_to_le32(err_record->trans_rx_fail_type);
- u16 dma_tx_err_type = cpu_to_le16(err_record->dma_tx_err_type);
- u16 sipc_rx_err_type = cpu_to_le16(err_record->sipc_rx_err_type);
- u32 dma_rx_err_type = cpu_to_le32(err_record->dma_rx_err_type);
+ u32 trans_tx_fail_type = le32_to_cpu(err_record->trans_tx_fail_type);
+ u32 trans_rx_fail_type = le32_to_cpu(err_record->trans_rx_fail_type);
+ u16 dma_tx_err_type = le16_to_cpu(err_record->dma_tx_err_type);
+ u16 sipc_rx_err_type = le16_to_cpu(err_record->sipc_rx_err_type);
+ u32 dma_rx_err_type = le32_to_cpu(err_record->dma_rx_err_type);
int error = -1;
if (err_phase == 1) {
@@ -2059,8 +2060,7 @@ static void slot_err_v2_hw(struct hisi_hba *hisi_hba,
trans_tx_fail_type);
} else if (err_phase == 2) {
/* error in RX phase, the priority is: DW1 > DW3 > DW2 */
- error = parse_trans_rx_err_code_v2_hw(
- trans_rx_fail_type);
+ error = parse_trans_rx_err_code_v2_hw(trans_rx_fail_type);
if (error == -1) {
error = parse_dma_rx_err_code_v2_hw(
dma_rx_err_type);
@@ -2358,6 +2358,7 @@ slot_complete_v2_hw(struct hisi_hba *hisi_hba, struct hisi_sas_slot *slot)
&complete_queue[slot->cmplt_queue_slot];
unsigned long flags;
bool is_internal = slot->is_internal;
+ u32 dw0;
if (unlikely(!task || !task->lldd_task || !task->dev))
return -EINVAL;
@@ -2382,8 +2383,9 @@ slot_complete_v2_hw(struct hisi_hba *hisi_hba, struct hisi_sas_slot *slot)
}
/* Use SAS+TMF status codes */
- switch ((complete_hdr->dw0 & CMPLT_HDR_ABORT_STAT_MSK)
- >> CMPLT_HDR_ABORT_STAT_OFF) {
+ dw0 = le32_to_cpu(complete_hdr->dw0);
+ switch ((dw0 & CMPLT_HDR_ABORT_STAT_MSK) >>
+ CMPLT_HDR_ABORT_STAT_OFF) {
case STAT_IO_ABORTED:
/* this io has been aborted by abort command */
ts->stat = SAS_ABORTED_TASK;
@@ -2408,9 +2410,8 @@ slot_complete_v2_hw(struct hisi_hba *hisi_hba, struct hisi_sas_slot *slot)
break;
}
- if ((complete_hdr->dw0 & CMPLT_HDR_ERX_MSK) &&
- (!(complete_hdr->dw0 & CMPLT_HDR_RSPNS_XFRD_MSK))) {
- u32 err_phase = (complete_hdr->dw0 & CMPLT_HDR_ERR_PHASE_MSK)
+ if ((dw0 & CMPLT_HDR_ERX_MSK) && (!(dw0 & CMPLT_HDR_RSPNS_XFRD_MSK))) {
+ u32 err_phase = (dw0 & CMPLT_HDR_ERR_PHASE_MSK)
>> CMPLT_HDR_ERR_PHASE_OFF;
u32 *error_info = hisi_sas_status_buf_addr_mem(slot);
@@ -2526,22 +2527,23 @@ static void prep_ata_v2_hw(struct hisi_hba *hisi_hba,
struct hisi_sas_tmf_task *tmf = slot->tmf;
u8 *buf_cmd;
int has_data = 0, hdr_tag = 0;
- u32 dw1 = 0, dw2 = 0;
+ u32 dw0, dw1 = 0, dw2 = 0;
/* create header */
/* dw0 */
- hdr->dw0 = cpu_to_le32(port->id << CMD_HDR_PORT_OFF);
+ dw0 = port->id << CMD_HDR_PORT_OFF;
if (parent_dev && DEV_IS_EXPANDER(parent_dev->dev_type))
- hdr->dw0 |= cpu_to_le32(3 << CMD_HDR_CMD_OFF);
+ dw0 |= 3 << CMD_HDR_CMD_OFF;
else
- hdr->dw0 |= cpu_to_le32(4 << CMD_HDR_CMD_OFF);
+ dw0 |= 4 << CMD_HDR_CMD_OFF;
if (tmf && tmf->force_phy) {
- hdr->dw0 |= CMD_HDR_FORCE_PHY_MSK;
- hdr->dw0 |= cpu_to_le32((1 << tmf->phy_id)
- << CMD_HDR_PHY_ID_OFF);
+ dw0 |= CMD_HDR_FORCE_PHY_MSK;
+ dw0 |= (1 << tmf->phy_id) << CMD_HDR_PHY_ID_OFF;
}
+ hdr->dw0 = cpu_to_le32(dw0);
+
/* dw1 */
switch (task->data_dir) {
case DMA_TO_DEVICE:
@@ -3152,20 +3154,24 @@ static void cq_tasklet_v2_hw(unsigned long val)
/* Check for NCQ completion */
if (complete_hdr->act) {
- u32 act_tmp = complete_hdr->act;
+ u32 act_tmp = le32_to_cpu(complete_hdr->act);
int ncq_tag_count = ffs(act_tmp);
+ u32 dw1 = le32_to_cpu(complete_hdr->dw1);
- dev_id = (complete_hdr->dw1 & CMPLT_HDR_DEV_ID_MSK) >>
+ dev_id = (dw1 & CMPLT_HDR_DEV_ID_MSK) >>
CMPLT_HDR_DEV_ID_OFF;
itct = &hisi_hba->itct[dev_id];
/* The NCQ tags are held in the itct header */
while (ncq_tag_count) {
- __le64 *ncq_tag = &itct->qw4_15[0];
+ __le64 *_ncq_tag = &itct->qw4_15[0], __ncq_tag;
+ u64 ncq_tag;
- ncq_tag_count -= 1;
- iptt = (ncq_tag[ncq_tag_count / 5]
- >> (ncq_tag_count % 5) * 12) & 0xfff;
+ ncq_tag_count--;
+ __ncq_tag = _ncq_tag[ncq_tag_count / 5];
+ ncq_tag = le64_to_cpu(__ncq_tag);
+ iptt = (ncq_tag >> (ncq_tag_count % 5) * 12) &
+ 0xfff;
slot = &hisi_hba->slot_info[iptt];
slot->cmplt_queue_slot = rd_point;
@@ -3176,7 +3182,9 @@ static void cq_tasklet_v2_hw(unsigned long val)
ncq_tag_count = ffs(act_tmp);
}
} else {
- iptt = (complete_hdr->dw1) & CMPLT_HDR_IPTT_MSK;
+ u32 dw1 = le32_to_cpu(complete_hdr->dw1);
+
+ iptt = dw1 & CMPLT_HDR_IPTT_MSK;
slot = &hisi_hba->slot_info[iptt];
slot->cmplt_queue_slot = rd_point;
slot->cmplt_queue = queue;
@@ -3552,6 +3560,11 @@ static void wait_cmds_complete_timeout_v2_hw(struct hisi_hba *hisi_hba,
dev_dbg(dev, "wait commands complete %dms\n", time);
}
+static struct device_attribute *host_attrs_v2_hw[] = {
+ &dev_attr_phy_event_threshold,
+ NULL
+};
+
static struct scsi_host_template sht_v2_hw = {
.name = DRV_NAME,
.module = THIS_MODULE,
@@ -3563,14 +3576,13 @@ static struct scsi_host_template sht_v2_hw = {
.change_queue_depth = sas_change_queue_depth,
.bios_param = sas_bios_param,
.this_id = -1,
- .sg_tablesize = SG_ALL,
+ .sg_tablesize = HISI_SAS_SGE_PAGE_CNT,
.max_sectors = SCSI_DEFAULT_MAX_SECTORS,
- .use_clustering = ENABLE_CLUSTERING,
.eh_device_reset_handler = sas_eh_device_reset_handler,
.eh_target_reset_handler = sas_eh_target_reset_handler,
.target_destroy = sas_target_destroy,
.ioctl = sas_ioctl,
- .shost_attrs = host_attrs,
+ .shost_attrs = host_attrs_v2_hw,
};
static const struct hisi_sas_hw hisi_sas_v2_hw = {
diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
index a369450a1fa7..e2420a810e99 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
@@ -42,6 +42,7 @@
#define MAX_CON_TIME_LIMIT_TIME 0xa4
#define BUS_INACTIVE_LIMIT_TIME 0xa8
#define REJECT_TO_OPEN_LIMIT_TIME 0xac
+#define CQ_INT_CONVERGE_EN 0xb0
#define CFG_AGING_TIME 0xbc
#define HGC_DFX_CFG2 0xc0
#define CFG_ABT_SET_QUERY_IPTT 0xd4
@@ -126,6 +127,8 @@
#define PHY_CTRL (PORT_BASE + 0x14)
#define PHY_CTRL_RESET_OFF 0
#define PHY_CTRL_RESET_MSK (0x1 << PHY_CTRL_RESET_OFF)
+#define CMD_HDR_PIR_OFF 8
+#define CMD_HDR_PIR_MSK (0x1 << CMD_HDR_PIR_OFF)
#define SL_CFG (PORT_BASE + 0x84)
#define AIP_LIMIT (PORT_BASE + 0x90)
#define SL_CONTROL (PORT_BASE + 0x94)
@@ -332,6 +335,16 @@
#define ITCT_HDR_RTOLT_OFF 48
#define ITCT_HDR_RTOLT_MSK (0xffffULL << ITCT_HDR_RTOLT_OFF)
+struct hisi_sas_protect_iu_v3_hw {
+ u32 dw0;
+ u32 lbrtcv;
+ u32 lbrtgv;
+ u32 dw3;
+ u32 dw4;
+ u32 dw5;
+ u32 rsv;
+};
+
struct hisi_sas_complete_v3_hdr {
__le32 dw0;
__le32 dw1;
@@ -371,6 +384,28 @@ struct hisi_sas_err_record_v3 {
((fis.command == ATA_CMD_DEV_RESET) && \
((fis.control & ATA_SRST) != 0)))
+#define T10_INSRT_EN_OFF 0
+#define T10_INSRT_EN_MSK (1 << T10_INSRT_EN_OFF)
+#define T10_RMV_EN_OFF 1
+#define T10_RMV_EN_MSK (1 << T10_RMV_EN_OFF)
+#define T10_RPLC_EN_OFF 2
+#define T10_RPLC_EN_MSK (1 << T10_RPLC_EN_OFF)
+#define T10_CHK_EN_OFF 3
+#define T10_CHK_EN_MSK (1 << T10_CHK_EN_OFF)
+#define INCR_LBRT_OFF 5
+#define INCR_LBRT_MSK (1 << INCR_LBRT_OFF)
+#define USR_DATA_BLOCK_SZ_OFF 20
+#define USR_DATA_BLOCK_SZ_MSK (0x3 << USR_DATA_BLOCK_SZ_OFF)
+#define T10_CHK_MSK_OFF 16
+
+static bool hisi_sas_intr_conv;
+MODULE_PARM_DESC(intr_conv, "interrupt converge enable (0-1)");
+
+/* permit overriding the host protection capabilities mask (EEDP/T10 PI) */
+static int prot_mask;
+module_param(prot_mask, int, 0);
+MODULE_PARM_DESC(prot_mask, " host protection capabilities mask, def=0x0 ");
+
static u32 hisi_sas_read32(struct hisi_hba *hisi_hba, u32 off)
{
void __iomem *regs = hisi_hba->regs + off;
@@ -436,6 +471,8 @@ static void init_reg_v3_hw(struct hisi_hba *hisi_hba)
hisi_sas_write32(hisi_hba, INT_COAL_EN, 0x1);
hisi_sas_write32(hisi_hba, OQ_INT_COAL_TIME, 0x1);
hisi_sas_write32(hisi_hba, OQ_INT_COAL_CNT, 0x1);
+ hisi_sas_write32(hisi_hba, CQ_INT_CONVERGE_EN,
+ hisi_sas_intr_conv);
hisi_sas_write32(hisi_hba, OQ_INT_SRC, 0xffff);
hisi_sas_write32(hisi_hba, ENT_INT_SRC1, 0xffffffff);
hisi_sas_write32(hisi_hba, ENT_INT_SRC2, 0xffffffff);
@@ -494,7 +531,7 @@ static void init_reg_v3_hw(struct hisi_hba *hisi_hba)
hisi_sas_phy_write32(hisi_hba, i, PHYCTRL_OOB_RESTART_MSK, 0x1);
hisi_sas_phy_write32(hisi_hba, i, STP_LINK_TIMER, 0x7f7a120);
hisi_sas_phy_write32(hisi_hba, i, CON_CFG_DRIVER, 0x2a0a01);
-
+ hisi_sas_phy_write32(hisi_hba, i, SAS_SSP_CON_TIMER_CFG, 0x32);
/* used for 12G negotiate */
hisi_sas_phy_write32(hisi_hba, i, COARSETUNE_TIME, 0x1e);
hisi_sas_phy_write32(hisi_hba, i, AIP_LIMIT, 0x2ffff);
@@ -622,6 +659,7 @@ static void setup_itct_v3_hw(struct hisi_hba *hisi_hba,
struct domain_device *parent_dev = device->parent;
struct asd_sas_port *sas_port = device->port;
struct hisi_sas_port *port = to_hisi_sas_port(sas_port);
+ u64 sas_addr;
memset(itct, 0, sizeof(*itct));
@@ -654,8 +692,8 @@ static void setup_itct_v3_hw(struct hisi_hba *hisi_hba,
itct->qw0 = cpu_to_le64(qw0);
/* qw1 */
- memcpy(&itct->sas_addr, device->sas_addr, SAS_ADDR_SIZE);
- itct->sas_addr = __swab64(itct->sas_addr);
+ memcpy(&sas_addr, device->sas_addr, SAS_ADDR_SIZE);
+ itct->sas_addr = cpu_to_le64(__swab64(sas_addr));
/* qw2 */
if (!dev_is_sata(device))
@@ -932,6 +970,58 @@ static void prep_prd_sge_v3_hw(struct hisi_hba *hisi_hba,
hdr->sg_len = cpu_to_le32(n_elem << CMD_HDR_DATA_SGL_LEN_OFF);
}
+static u32 get_prot_chk_msk_v3_hw(struct scsi_cmnd *scsi_cmnd)
+{
+ unsigned char prot_flags = scsi_cmnd->prot_flags;
+
+ if (prot_flags & SCSI_PROT_TRANSFER_PI) {
+ if (prot_flags & SCSI_PROT_REF_CHECK)
+ return 0xc << 16;
+ return 0xfc << 16;
+ }
+ return 0;
+}
+
+static void fill_prot_v3_hw(struct scsi_cmnd *scsi_cmnd,
+ struct hisi_sas_protect_iu_v3_hw *prot)
+{
+ unsigned char prot_op = scsi_get_prot_op(scsi_cmnd);
+ unsigned int interval = scsi_prot_interval(scsi_cmnd);
+ u32 lbrt_chk_val = t10_pi_ref_tag(scsi_cmnd->request);
+
+ switch (prot_op) {
+ case SCSI_PROT_READ_STRIP:
+ prot->dw0 |= (T10_RMV_EN_MSK | T10_CHK_EN_MSK);
+ prot->lbrtcv = lbrt_chk_val;
+ prot->dw4 |= get_prot_chk_msk_v3_hw(scsi_cmnd);
+ break;
+ case SCSI_PROT_WRITE_INSERT:
+ prot->dw0 |= T10_INSRT_EN_MSK;
+ prot->lbrtgv = lbrt_chk_val;
+ break;
+ default:
+ WARN(1, "prot_op(0x%x) is not valid\n", prot_op);
+ break;
+ }
+
+ switch (interval) {
+ case 512:
+ break;
+ case 4096:
+ prot->dw0 |= (0x1 << USR_DATA_BLOCK_SZ_OFF);
+ break;
+ case 520:
+ prot->dw0 |= (0x2 << USR_DATA_BLOCK_SZ_OFF);
+ break;
+ default:
+ WARN(1, "protection interval (0x%x) invalid\n",
+ interval);
+ break;
+ }
+
+ prot->dw0 |= INCR_LBRT_MSK;
+}
+
static void prep_ssp_v3_hw(struct hisi_hba *hisi_hba,
struct hisi_sas_slot *slot)
{
@@ -943,9 +1033,10 @@ static void prep_ssp_v3_hw(struct hisi_hba *hisi_hba,
struct sas_ssp_task *ssp_task = &task->ssp_task;
struct scsi_cmnd *scsi_cmnd = ssp_task->cmd;
struct hisi_sas_tmf_task *tmf = slot->tmf;
+ unsigned char prot_op = scsi_get_prot_op(scsi_cmnd);
int has_data = 0, priority = !!tmf;
u8 *buf_cmd;
- u32 dw1 = 0, dw2 = 0;
+ u32 dw1 = 0, dw2 = 0, len = 0;
hdr->dw0 = cpu_to_le32((1 << CMD_HDR_RESP_REPORT_OFF) |
(2 << CMD_HDR_TLR_CTRL_OFF) |
@@ -975,7 +1066,6 @@ static void prep_ssp_v3_hw(struct hisi_hba *hisi_hba,
/* map itct entry */
dw1 |= sas_dev->device_id << CMD_HDR_DEV_ID_OFF;
- hdr->dw1 = cpu_to_le32(dw1);
dw2 = (((sizeof(struct ssp_command_iu) + sizeof(struct ssp_frame_hdr)
+ 3) / 4) << CMD_HDR_CFL_OFF) |
@@ -988,7 +1078,6 @@ static void prep_ssp_v3_hw(struct hisi_hba *hisi_hba,
prep_prd_sge_v3_hw(hisi_hba, slot, hdr, task->scatter,
slot->n_elem);
- hdr->data_transfer_len = cpu_to_le32(task->total_xfer_len);
hdr->cmd_table_addr = cpu_to_le64(hisi_sas_cmd_hdr_addr_dma(slot));
hdr->sts_buffer_addr = cpu_to_le64(hisi_sas_status_buf_addr_dma(slot));
@@ -1013,6 +1102,38 @@ static void prep_ssp_v3_hw(struct hisi_hba *hisi_hba,
break;
}
}
+
+ if (has_data && (prot_op != SCSI_PROT_NORMAL)) {
+ struct hisi_sas_protect_iu_v3_hw prot;
+ u8 *buf_cmd_prot;
+
+ hdr->dw7 |= cpu_to_le32(1 << CMD_HDR_ADDR_MODE_SEL_OFF);
+ dw1 |= CMD_HDR_PIR_MSK;
+ buf_cmd_prot = hisi_sas_cmd_hdr_addr_mem(slot) +
+ sizeof(struct ssp_frame_hdr) +
+ sizeof(struct ssp_command_iu);
+
+ memset(&prot, 0, sizeof(struct hisi_sas_protect_iu_v3_hw));
+ fill_prot_v3_hw(scsi_cmnd, &prot);
+ memcpy(buf_cmd_prot, &prot,
+ sizeof(struct hisi_sas_protect_iu_v3_hw));
+
+ /*
+ * For READ, we need length of info read to memory, while for
+ * WRITE we need length of data written to the disk.
+ */
+ if (prot_op == SCSI_PROT_WRITE_INSERT) {
+ unsigned int interval = scsi_prot_interval(scsi_cmnd);
+ unsigned int ilog2_interval = ilog2(interval);
+
+ len = (task->total_xfer_len >> ilog2_interval) * 8;
+ }
+
+ }
+
+ hdr->dw1 = cpu_to_le32(dw1);
+
+ hdr->data_transfer_len = cpu_to_le32(task->total_xfer_len + len);
}
static void prep_smp_v3_hw(struct hisi_hba *hisi_hba,
@@ -1584,15 +1705,16 @@ slot_err_v3_hw(struct hisi_hba *hisi_hba, struct sas_task *task,
&complete_queue[slot->cmplt_queue_slot];
struct hisi_sas_err_record_v3 *record =
hisi_sas_status_buf_addr_mem(slot);
- u32 dma_rx_err_type = record->dma_rx_err_type;
- u32 trans_tx_fail_type = record->trans_tx_fail_type;
+ u32 dma_rx_err_type = le32_to_cpu(record->dma_rx_err_type);
+ u32 trans_tx_fail_type = le32_to_cpu(record->trans_tx_fail_type);
+ u32 dw3 = le32_to_cpu(complete_hdr->dw3);
switch (task->task_proto) {
case SAS_PROTOCOL_SSP:
if (dma_rx_err_type & RX_DATA_LEN_UNDERFLOW_MSK) {
ts->residual = trans_tx_fail_type;
ts->stat = SAS_DATA_UNDERRUN;
- } else if (complete_hdr->dw3 & CMPLT_HDR_IO_IN_TARGET_MSK) {
+ } else if (dw3 & CMPLT_HDR_IO_IN_TARGET_MSK) {
ts->stat = SAS_QUEUE_FULL;
slot->abort = 1;
} else {
@@ -1606,7 +1728,7 @@ slot_err_v3_hw(struct hisi_hba *hisi_hba, struct sas_task *task,
if (dma_rx_err_type & RX_DATA_LEN_UNDERFLOW_MSK) {
ts->residual = trans_tx_fail_type;
ts->stat = SAS_DATA_UNDERRUN;
- } else if (complete_hdr->dw3 & CMPLT_HDR_IO_IN_TARGET_MSK) {
+ } else if (dw3 & CMPLT_HDR_IO_IN_TARGET_MSK) {
ts->stat = SAS_PHY_DOWN;
slot->abort = 1;
} else {
@@ -1639,6 +1761,7 @@ slot_complete_v3_hw(struct hisi_hba *hisi_hba, struct hisi_sas_slot *slot)
&complete_queue[slot->cmplt_queue_slot];
unsigned long flags;
bool is_internal = slot->is_internal;
+ u32 dw0, dw1, dw3;
if (unlikely(!task || !task->lldd_task || !task->dev))
return -EINVAL;
@@ -1662,11 +1785,14 @@ slot_complete_v3_hw(struct hisi_hba *hisi_hba, struct hisi_sas_slot *slot)
goto out;
}
+ dw0 = le32_to_cpu(complete_hdr->dw0);
+ dw1 = le32_to_cpu(complete_hdr->dw1);
+ dw3 = le32_to_cpu(complete_hdr->dw3);
+
/*
* Use SAS+TMF status codes
*/
- switch ((complete_hdr->dw0 & CMPLT_HDR_ABORT_STAT_MSK)
- >> CMPLT_HDR_ABORT_STAT_OFF) {
+ switch ((dw0 & CMPLT_HDR_ABORT_STAT_MSK) >> CMPLT_HDR_ABORT_STAT_OFF) {
case STAT_IO_ABORTED:
/* this IO has been aborted by abort command */
ts->stat = SAS_ABORTED_TASK;
@@ -1689,7 +1815,7 @@ slot_complete_v3_hw(struct hisi_hba *hisi_hba, struct hisi_sas_slot *slot)
}
/* check for erroneous completion */
- if ((complete_hdr->dw0 & CMPLT_HDR_CMPLT_MSK) == 0x3) {
+ if ((dw0 & CMPLT_HDR_CMPLT_MSK) == 0x3) {
u32 *error_info = hisi_sas_status_buf_addr_mem(slot);
slot_err_v3_hw(hisi_hba, task, slot);
@@ -1698,8 +1824,7 @@ slot_complete_v3_hw(struct hisi_hba *hisi_hba, struct hisi_sas_slot *slot)
"CQ hdr: 0x%x 0x%x 0x%x 0x%x "
"Error info: 0x%x 0x%x 0x%x 0x%x\n",
slot->idx, task, sas_dev->device_id,
- complete_hdr->dw0, complete_hdr->dw1,
- complete_hdr->act, complete_hdr->dw3,
+ dw0, dw1, complete_hdr->act, dw3,
error_info[0], error_info[1],
error_info[2], error_info[3]);
if (unlikely(slot->abort))
@@ -1797,11 +1922,13 @@ static void cq_tasklet_v3_hw(unsigned long val)
while (rd_point != wr_point) {
struct hisi_sas_complete_v3_hdr *complete_hdr;
struct device *dev = hisi_hba->dev;
+ u32 dw1;
int iptt;
complete_hdr = &complete_queue[rd_point];
+ dw1 = le32_to_cpu(complete_hdr->dw1);
- iptt = (complete_hdr->dw1) & CMPLT_HDR_IPTT_MSK;
+ iptt = dw1 & CMPLT_HDR_IPTT_MSK;
if (likely(iptt < HISI_SAS_COMMAND_ENTRIES_V3_HW)) {
slot = &hisi_hba->slot_info[iptt];
slot->cmplt_queue_slot = rd_point;
@@ -1878,10 +2005,12 @@ static int interrupt_init_v3_hw(struct hisi_hba *hisi_hba)
for (i = 0; i < hisi_hba->queue_count; i++) {
struct hisi_sas_cq *cq = &hisi_hba->cq[i];
struct tasklet_struct *t = &cq->tasklet;
+ int nr = hisi_sas_intr_conv ? 16 : 16 + i;
+ unsigned long irqflags = hisi_sas_intr_conv ? IRQF_SHARED : 0;
- rc = devm_request_irq(dev, pci_irq_vector(pdev, i+16),
- cq_interrupt_v3_hw, 0,
- DRV_NAME " cq", cq);
+ rc = devm_request_irq(dev, pci_irq_vector(pdev, nr),
+ cq_interrupt_v3_hw, irqflags,
+ DRV_NAME " cq", cq);
if (rc) {
dev_err(dev,
"could not request cq%d interrupt, rc=%d\n",
@@ -1898,8 +2027,9 @@ static int interrupt_init_v3_hw(struct hisi_hba *hisi_hba)
free_cq_irqs:
for (k = 0; k < i; k++) {
struct hisi_sas_cq *cq = &hisi_hba->cq[k];
+ int nr = hisi_sas_intr_conv ? 16 : 16 + k;
- free_irq(pci_irq_vector(pdev, k+16), cq);
+ free_irq(pci_irq_vector(pdev, nr), cq);
}
free_irq(pci_irq_vector(pdev, 11), hisi_hba);
free_chnl_interrupt:
@@ -2089,6 +2219,119 @@ static void wait_cmds_complete_timeout_v3_hw(struct hisi_hba *hisi_hba,
dev_dbg(dev, "wait commands complete %dms\n", time);
}
+static ssize_t intr_conv_v3_hw_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return scnprintf(buf, PAGE_SIZE, "%u\n", hisi_sas_intr_conv);
+}
+static DEVICE_ATTR_RO(intr_conv_v3_hw);
+
+static void config_intr_coal_v3_hw(struct hisi_hba *hisi_hba)
+{
+ /* config those registers between enable and disable PHYs */
+ hisi_sas_stop_phys(hisi_hba);
+
+ if (hisi_hba->intr_coal_ticks == 0 ||
+ hisi_hba->intr_coal_count == 0) {
+ hisi_sas_write32(hisi_hba, INT_COAL_EN, 0x1);
+ hisi_sas_write32(hisi_hba, OQ_INT_COAL_TIME, 0x1);
+ hisi_sas_write32(hisi_hba, OQ_INT_COAL_CNT, 0x1);
+ } else {
+ hisi_sas_write32(hisi_hba, INT_COAL_EN, 0x3);
+ hisi_sas_write32(hisi_hba, OQ_INT_COAL_TIME,
+ hisi_hba->intr_coal_ticks);
+ hisi_sas_write32(hisi_hba, OQ_INT_COAL_CNT,
+ hisi_hba->intr_coal_count);
+ }
+ phys_init_v3_hw(hisi_hba);
+}
+
+static ssize_t intr_coal_ticks_v3_hw_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct Scsi_Host *shost = class_to_shost(dev);
+ struct hisi_hba *hisi_hba = shost_priv(shost);
+
+ return scnprintf(buf, PAGE_SIZE, "%u\n",
+ hisi_hba->intr_coal_ticks);
+}
+
+static ssize_t intr_coal_ticks_v3_hw_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct Scsi_Host *shost = class_to_shost(dev);
+ struct hisi_hba *hisi_hba = shost_priv(shost);
+ u32 intr_coal_ticks;
+ int ret;
+
+ ret = kstrtou32(buf, 10, &intr_coal_ticks);
+ if (ret) {
+ dev_err(dev, "Input data of interrupt coalesce unmatch\n");
+ return -EINVAL;
+ }
+
+ if (intr_coal_ticks >= BIT(24)) {
+ dev_err(dev, "intr_coal_ticks must be less than 2^24!\n");
+ return -EINVAL;
+ }
+
+ hisi_hba->intr_coal_ticks = intr_coal_ticks;
+
+ config_intr_coal_v3_hw(hisi_hba);
+
+ return count;
+}
+static DEVICE_ATTR_RW(intr_coal_ticks_v3_hw);
+
+static ssize_t intr_coal_count_v3_hw_show(struct device *dev,
+ struct device_attribute
+ *attr, char *buf)
+{
+ struct Scsi_Host *shost = class_to_shost(dev);
+ struct hisi_hba *hisi_hba = shost_priv(shost);
+
+ return scnprintf(buf, PAGE_SIZE, "%u\n",
+ hisi_hba->intr_coal_count);
+}
+
+static ssize_t intr_coal_count_v3_hw_store(struct device *dev,
+ struct device_attribute
+ *attr, const char *buf, size_t count)
+{
+ struct Scsi_Host *shost = class_to_shost(dev);
+ struct hisi_hba *hisi_hba = shost_priv(shost);
+ u32 intr_coal_count;
+ int ret;
+
+ ret = kstrtou32(buf, 10, &intr_coal_count);
+ if (ret) {
+ dev_err(dev, "Input data of interrupt coalesce unmatch\n");
+ return -EINVAL;
+ }
+
+ if (intr_coal_count >= BIT(8)) {
+ dev_err(dev, "intr_coal_count must be less than 2^8!\n");
+ return -EINVAL;
+ }
+
+ hisi_hba->intr_coal_count = intr_coal_count;
+
+ config_intr_coal_v3_hw(hisi_hba);
+
+ return count;
+}
+static DEVICE_ATTR_RW(intr_coal_count_v3_hw);
+
+static struct device_attribute *host_attrs_v3_hw[] = {
+ &dev_attr_phy_event_threshold,
+ &dev_attr_intr_conv_v3_hw,
+ &dev_attr_intr_coal_ticks_v3_hw,
+ &dev_attr_intr_coal_count_v3_hw,
+ NULL
+};
+
static struct scsi_host_template sht_v3_hw = {
.name = DRV_NAME,
.module = THIS_MODULE,
@@ -2100,14 +2343,13 @@ static struct scsi_host_template sht_v3_hw = {
.change_queue_depth = sas_change_queue_depth,
.bios_param = sas_bios_param,
.this_id = -1,
- .sg_tablesize = SG_ALL,
+ .sg_tablesize = HISI_SAS_SGE_PAGE_CNT,
.max_sectors = SCSI_DEFAULT_MAX_SECTORS,
- .use_clustering = ENABLE_CLUSTERING,
.eh_device_reset_handler = sas_eh_device_reset_handler,
.eh_target_reset_handler = sas_eh_target_reset_handler,
.target_destroy = sas_target_destroy,
.ioctl = sas_ioctl,
- .shost_attrs = host_attrs,
+ .shost_attrs = host_attrs_v3_hw,
.tag_alloc_policy = BLK_TAG_ALLOC_RR,
};
@@ -2161,6 +2403,12 @@ hisi_sas_shost_alloc_pci(struct pci_dev *pdev)
hisi_hba->shost = shost;
SHOST_TO_SAS_HA(shost) = &hisi_hba->sha;
+ if (prot_mask & ~HISI_SAS_PROT_MASK)
+ dev_err(dev, "unsupported protection mask 0x%x, using default (0x0)\n",
+ prot_mask);
+ else
+ hisi_hba->prot_mask = prot_mask;
+
timer_setup(&hisi_hba->timer, NULL, 0);
if (hisi_sas_get_fw_info(hisi_hba) < 0)
@@ -2199,14 +2447,11 @@ hisi_sas_v3_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (rc)
goto err_out_disable_device;
- if ((pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) != 0) ||
- (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)) != 0)) {
- if ((pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0) ||
- (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)) != 0)) {
- dev_err(dev, "No usable DMA addressing method\n");
- rc = -EIO;
- goto err_out_regions;
- }
+ if (dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)) ||
+ dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32))) {
+ dev_err(dev, "No usable DMA addressing method\n");
+ rc = -EIO;
+ goto err_out_regions;
}
shost = hisi_sas_shost_alloc_pci(pdev);
@@ -2245,7 +2490,6 @@ hisi_sas_v3_probe(struct pci_dev *pdev, const struct pci_device_id *id)
shost->max_lun = ~0;
shost->max_channel = 1;
shost->max_cmd_len = 16;
- shost->sg_tablesize = min_t(u16, SG_ALL, HISI_SAS_SGE_PAGE_CNT);
shost->can_queue = hisi_hba->hw->max_command_entries -
HISI_SAS_RESERVED_IPTT_CNT;
shost->cmd_per_lun = hisi_hba->hw->max_command_entries -
@@ -2275,6 +2519,12 @@ hisi_sas_v3_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (rc)
goto err_out_register_ha;
+ if (hisi_hba->prot_mask) {
+ dev_info(dev, "Registering for DIF/DIX prot_mask=0x%x\n",
+ prot_mask);
+ scsi_host_set_prot(hisi_hba->shost, prot_mask);
+ }
+
scsi_scan_host(shost);
return 0;
@@ -2301,8 +2551,9 @@ hisi_sas_v3_destroy_irqs(struct pci_dev *pdev, struct hisi_hba *hisi_hba)
free_irq(pci_irq_vector(pdev, 11), hisi_hba);
for (i = 0; i < hisi_hba->queue_count; i++) {
struct hisi_sas_cq *cq = &hisi_hba->cq[i];
+ int nr = hisi_sas_intr_conv ? 16 : 16 + i;
- free_irq(pci_irq_vector(pdev, i+16), cq);
+ free_irq(pci_irq_vector(pdev, nr), cq);
}
pci_free_irq_vectors(pdev);
}
@@ -2529,7 +2780,7 @@ static int hisi_sas_v3_suspend(struct pci_dev *pdev, pm_message_t state)
struct hisi_hba *hisi_hba = sha->lldd_ha;
struct device *dev = hisi_hba->dev;
struct Scsi_Host *shost = hisi_hba->shost;
- u32 device_state;
+ pci_power_t device_state;
int rc;
if (!pdev->pm_cap) {
@@ -2575,7 +2826,7 @@ static int hisi_sas_v3_resume(struct pci_dev *pdev)
struct Scsi_Host *shost = hisi_hba->shost;
struct device *dev = hisi_hba->dev;
unsigned int rc;
- u32 device_state = pdev->current_state;
+ pci_power_t device_state = pdev->current_state;
dev_warn(dev, "resuming from operating state [D%d]\n",
device_state);
@@ -2624,6 +2875,7 @@ static struct pci_driver sas_v3_pci_driver = {
};
module_pci_driver(sas_v3_pci_driver);
+module_param_named(intr_conv, hisi_sas_intr_conv, bool, 0444);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("John Garry <john.garry@huawei.com>");
diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
index ea4b0bb0c1cd..eaf329db3973 100644
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -222,18 +222,9 @@ int scsi_add_host_with_dma(struct Scsi_Host *shost, struct device *dev,
if (error)
goto fail;
- if (shost_use_blk_mq(shost)) {
- error = scsi_mq_setup_tags(shost);
- if (error)
- goto fail;
- } else {
- shost->bqt = blk_init_tags(shost->can_queue,
- shost->hostt->tag_alloc_policy);
- if (!shost->bqt) {
- error = -ENOMEM;
- goto fail;
- }
- }
+ error = scsi_mq_setup_tags(shost);
+ if (error)
+ goto fail;
if (!shost->shost_gendev.parent)
shost->shost_gendev.parent = dev ? dev : &platform_bus;
@@ -309,8 +300,7 @@ int scsi_add_host_with_dma(struct Scsi_Host *shost, struct device *dev,
pm_runtime_disable(&shost->shost_gendev);
pm_runtime_set_suspended(&shost->shost_gendev);
pm_runtime_put_noidle(&shost->shost_gendev);
- if (shost_use_blk_mq(shost))
- scsi_mq_destroy_tags(shost);
+ scsi_mq_destroy_tags(shost);
fail:
return error;
}
@@ -344,13 +334,8 @@ static void scsi_host_dev_release(struct device *dev)
kfree(dev_name(&shost->shost_dev));
}
- if (shost_use_blk_mq(shost)) {
- if (shost->tag_set.tags)
- scsi_mq_destroy_tags(shost);
- } else {
- if (shost->bqt)
- blk_free_tags(shost->bqt);
- }
+ if (shost->tag_set.tags)
+ scsi_mq_destroy_tags(shost);
kfree(shost->shost_data);
@@ -431,7 +416,6 @@ struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *sht, int privsize)
shost->sg_prot_tablesize = sht->sg_prot_tablesize;
shost->cmd_per_lun = sht->cmd_per_lun;
shost->unchecked_isa_dma = sht->unchecked_isa_dma;
- shost->use_clustering = sht->use_clustering;
shost->no_write_same = sht->no_write_same;
if (shost_eh_deadline == -1 || !sht->eh_host_reset_handler)
@@ -464,6 +448,11 @@ struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *sht, int privsize)
else
shost->max_sectors = SCSI_DEFAULT_MAX_SECTORS;
+ if (sht->max_segment_size)
+ shost->max_segment_size = sht->max_segment_size;
+ else
+ shost->max_segment_size = BLK_MAX_SEGMENT_SIZE;
+
/*
* assume a 4GB boundary, if not set
*/
@@ -472,8 +461,6 @@ struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *sht, int privsize)
else
shost->dma_boundary = 0xffffffff;
- shost->use_blk_mq = scsi_use_blk_mq || shost->hostt->force_blk_mq;
-
device_initialize(&shost->shost_gendev);
dev_set_name(&shost->shost_gendev, "host%d", shost->host_no);
shost->shost_gendev.bus = &scsi_bus_type;
diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index c9cccf35e9d7..ff67ef5d5347 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -965,7 +965,6 @@ static struct scsi_host_template hpsa_driver_template = {
.scan_finished = hpsa_scan_finished,
.change_queue_depth = hpsa_change_queue_depth,
.this_id = -1,
- .use_clustering = ENABLE_CLUSTERING,
.eh_device_reset_handler = hpsa_eh_device_reset_handler,
.ioctl = hpsa_ioctl,
.slave_alloc = hpsa_slave_alloc,
@@ -4663,6 +4662,7 @@ static int fixup_ioaccel_cdb(u8 *cdb, int *cdb_len)
case WRITE_6:
case WRITE_12:
is_write = 1;
+ /* fall through */
case READ_6:
case READ_12:
if (*cdb_len == 6) {
@@ -5093,6 +5093,7 @@ static int hpsa_scsi_ioaccel_raid_map(struct ctlr_info *h,
switch (cmd->cmnd[0]) {
case WRITE_6:
is_write = 1;
+ /* fall through */
case READ_6:
first_block = (((cmd->cmnd[1] & 0x1F) << 16) |
(cmd->cmnd[2] << 8) |
@@ -5103,6 +5104,7 @@ static int hpsa_scsi_ioaccel_raid_map(struct ctlr_info *h,
break;
case WRITE_10:
is_write = 1;
+ /* fall through */
case READ_10:
first_block =
(((u64) cmd->cmnd[2]) << 24) |
@@ -5115,6 +5117,7 @@ static int hpsa_scsi_ioaccel_raid_map(struct ctlr_info *h,
break;
case WRITE_12:
is_write = 1;
+ /* fall through */
case READ_12:
first_block =
(((u64) cmd->cmnd[2]) << 24) |
@@ -5129,6 +5132,7 @@ static int hpsa_scsi_ioaccel_raid_map(struct ctlr_info *h,
break;
case WRITE_16:
is_write = 1;
+ /* fall through */
case READ_16:
first_block =
(((u64) cmd->cmnd[2]) << 56) |
diff --git a/drivers/scsi/hptiop.c b/drivers/scsi/hptiop.c
index 2fad7f03aa02..3eedfd4f8f57 100644
--- a/drivers/scsi/hptiop.c
+++ b/drivers/scsi/hptiop.c
@@ -1180,7 +1180,6 @@ static struct scsi_host_template driver_template = {
.eh_host_reset_handler = hptiop_reset,
.info = hptiop_info,
.emulated = 0,
- .use_clustering = ENABLE_CLUSTERING,
.proc_name = driver_name,
.shost_attrs = hptiop_attrs,
.slave_configure = hptiop_slave_config,
@@ -1309,11 +1308,11 @@ static int hptiop_probe(struct pci_dev *pcidev, const struct pci_device_id *id)
/* Enable 64bit DMA if possible */
iop_ops = (struct hptiop_adapter_ops *)id->driver_data;
- if (pci_set_dma_mask(pcidev, DMA_BIT_MASK(iop_ops->hw_dma_bit_mask))) {
- if (pci_set_dma_mask(pcidev, DMA_BIT_MASK(32))) {
- printk(KERN_ERR "hptiop: fail to set dma_mask\n");
- goto disable_pci_device;
- }
+ if (dma_set_mask(&pcidev->dev,
+ DMA_BIT_MASK(iop_ops->hw_dma_bit_mask)) ||
+ dma_set_mask(&pcidev->dev, DMA_BIT_MASK(32))) {
+ printk(KERN_ERR "hptiop: fail to set dma_mask\n");
+ goto disable_pci_device;
}
if (pci_request_regions(pcidev, driver_name)) {
diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index b64ca977825d..dbaa4f131433 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -3100,7 +3100,6 @@ static struct scsi_host_template driver_template = {
.this_id = -1,
.sg_tablesize = SG_ALL,
.max_sectors = IBMVFC_MAX_SECTORS,
- .use_clustering = ENABLE_CLUSTERING,
.shost_attrs = ibmvfc_attrs,
.track_queue_depth = 1,
};
diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c
index 9df8a1a2299c..1135e74646e2 100644
--- a/drivers/scsi/ibmvscsi/ibmvscsi.c
+++ b/drivers/scsi/ibmvscsi/ibmvscsi.c
@@ -2079,7 +2079,6 @@ static struct scsi_host_template driver_template = {
.can_queue = IBMVSCSI_MAX_REQUESTS_DEFAULT,
.this_id = -1,
.sg_tablesize = SG_ALL,
- .use_clustering = ENABLE_CLUSTERING,
.shost_attrs = ibmvscsi_attrs,
};
diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
index e63aadd10dfd..cc9cae469c4b 100644
--- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
+++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
@@ -3695,11 +3695,6 @@ static int ibmvscsis_get_system_info(void)
return 0;
}
-static char *ibmvscsis_get_fabric_name(void)
-{
- return "ibmvscsis";
-}
-
static char *ibmvscsis_get_fabric_wwn(struct se_portal_group *se_tpg)
{
struct ibmvscsis_tport *tport =
@@ -4044,9 +4039,8 @@ static struct configfs_attribute *ibmvscsis_tpg_attrs[] = {
static const struct target_core_fabric_ops ibmvscsis_ops = {
.module = THIS_MODULE,
- .name = "ibmvscsis",
+ .fabric_name = "ibmvscsis",
.max_data_sg_nents = MAX_TXU / PAGE_SIZE,
- .get_fabric_name = ibmvscsis_get_fabric_name,
.tpg_get_wwn = ibmvscsis_get_fabric_wwn,
.tpg_get_tag = ibmvscsis_get_tag,
.tpg_get_default_depth = ibmvscsis_get_default_depth,
diff --git a/drivers/scsi/imm.c b/drivers/scsi/imm.c
index 8c6627bc8a39..cea7f502e8ca 100644
--- a/drivers/scsi/imm.c
+++ b/drivers/scsi/imm.c
@@ -1110,7 +1110,6 @@ static struct scsi_host_template imm_template = {
.bios_param = imm_biosparam,
.this_id = 7,
.sg_tablesize = SG_ALL,
- .use_clustering = ENABLE_CLUSTERING,
.can_queue = 1,
.slave_alloc = imm_adjust_queue,
};
diff --git a/drivers/scsi/initio.c b/drivers/scsi/initio.c
index 7a91cf3ff173..eb2778b5c81b 100644
--- a/drivers/scsi/initio.c
+++ b/drivers/scsi/initio.c
@@ -2817,7 +2817,6 @@ static struct scsi_host_template initio_template = {
.can_queue = MAX_TARGETS * i91u_MAXQUEUE,
.this_id = 1,
.sg_tablesize = SG_ALL,
- .use_clustering = ENABLE_CLUSTERING,
};
static int initio_probe_one(struct pci_dev *pdev,
@@ -2840,7 +2839,7 @@ static int initio_probe_one(struct pci_dev *pdev,
reg = 0;
bios_seg = (bios_seg << 8) + ((u16) ((reg & 0xFF00) >> 8));
- if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32))) {
+ if (dma_set_mask(&pdev->dev, DMA_BIT_MASK(32))) {
printk(KERN_WARNING "i91u: Could not set 32 bit DMA mask\n");
error = -ENODEV;
goto out_disable_device;
diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index 271990bc065b..d1b4025a4503 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -6754,7 +6754,6 @@ static struct scsi_host_template driver_template = {
.sg_tablesize = IPR_MAX_SGLIST,
.max_sectors = IPR_IOA_MAX_SECTORS,
.cmd_per_lun = IPR_MAX_CMD_PER_LUN,
- .use_clustering = ENABLE_CLUSTERING,
.shost_attrs = ipr_ioa_attrs,
.sdev_attrs = ipr_dev_attrs,
.proc_name = IPR_NAME,
diff --git a/drivers/scsi/ips.c b/drivers/scsi/ips.c
index ee8a1ecd58fd..e8bc8d328bab 100644
--- a/drivers/scsi/ips.c
+++ b/drivers/scsi/ips.c
@@ -365,7 +365,6 @@ static struct scsi_host_template ips_driver_template = {
.this_id = -1,
.sg_tablesize = IPS_MAX_SG,
.cmd_per_lun = 3,
- .use_clustering = ENABLE_CLUSTERING,
.no_write_same = 1,
};
@@ -1801,13 +1800,13 @@ ips_fill_scb_sg_single(ips_ha_t * ha, dma_addr_t busaddr,
}
if (IPS_USE_ENH_SGLIST(ha)) {
scb->sg_list.enh_list[indx].address_lo =
- cpu_to_le32(pci_dma_lo32(busaddr));
+ cpu_to_le32(lower_32_bits(busaddr));
scb->sg_list.enh_list[indx].address_hi =
- cpu_to_le32(pci_dma_hi32(busaddr));
+ cpu_to_le32(upper_32_bits(busaddr));
scb->sg_list.enh_list[indx].length = cpu_to_le32(e_len);
} else {
scb->sg_list.std_list[indx].address =
- cpu_to_le32(pci_dma_lo32(busaddr));
+ cpu_to_le32(lower_32_bits(busaddr));
scb->sg_list.std_list[indx].length = cpu_to_le32(e_len);
}
@@ -6678,7 +6677,6 @@ ips_register_scsi(int index)
sh->sg_tablesize = sh->hostt->sg_tablesize;
sh->can_queue = sh->hostt->can_queue;
sh->cmd_per_lun = sh->hostt->cmd_per_lun;
- sh->use_clustering = sh->hostt->use_clustering;
sh->max_sectors = 128;
sh->max_id = ha->ntargets;
@@ -6926,7 +6924,7 @@ ips_init_phase1(struct pci_dev *pci_dev, int *indexPtr)
* it! Also, don't use 64bit addressing if dma addresses
* are guaranteed to be < 4G.
*/
- if (IPS_ENABLE_DMA64 && IPS_HAS_ENH_SGLIST(ha) &&
+ if (sizeof(dma_addr_t) > 4 && IPS_HAS_ENH_SGLIST(ha) &&
!dma_set_mask(&ha->pcidev->dev, DMA_BIT_MASK(64))) {
(ha)->flags |= IPS_HA_ENH_SG;
} else {
diff --git a/drivers/scsi/ips.h b/drivers/scsi/ips.h
index db546171e97f..6c0678fb9a67 100644
--- a/drivers/scsi/ips.h
+++ b/drivers/scsi/ips.h
@@ -96,15 +96,6 @@
#define __iomem
#endif
- #define pci_dma_hi32(a) ((a >> 16) >> 16)
- #define pci_dma_lo32(a) (a & 0xffffffff)
-
- #if (BITS_PER_LONG > 32) || defined(CONFIG_HIGHMEM64G)
- #define IPS_ENABLE_DMA64 (1)
- #else
- #define IPS_ENABLE_DMA64 (0)
- #endif
-
/*
* Adapter address map equates
*/
diff --git a/drivers/scsi/isci/init.c b/drivers/scsi/isci/init.c
index 08c7b1e25fe4..68b90c4f79a3 100644
--- a/drivers/scsi/isci/init.c
+++ b/drivers/scsi/isci/init.c
@@ -163,7 +163,6 @@ static struct scsi_host_template isci_sht = {
.this_id = -1,
.sg_tablesize = SG_ALL,
.max_sectors = SCSI_DEFAULT_MAX_SECTORS,
- .use_clustering = ENABLE_CLUSTERING,
.eh_abort_handler = sas_eh_abort_handler,
.eh_device_reset_handler = sas_eh_device_reset_handler,
.eh_target_reset_handler = sas_eh_target_reset_handler,
@@ -304,21 +303,10 @@ static int isci_pci_init(struct pci_dev *pdev)
pci_set_master(pdev);
- err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
- if (err) {
- err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
- if (err)
- return err;
- }
-
- err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
- if (err) {
- err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
- if (err)
- return err;
- }
-
- return 0;
+ err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+ if (err)
+ err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
+ return err;
}
static int num_controllers(struct pci_dev *pdev)
diff --git a/drivers/scsi/isci/phy.c b/drivers/scsi/isci/phy.c
index 1deca8c5a94f..7f9b3f20e5e4 100644
--- a/drivers/scsi/isci/phy.c
+++ b/drivers/scsi/isci/phy.c
@@ -778,6 +778,7 @@ enum sci_status sci_phy_event_handler(struct isci_phy *iphy, u32 event_code)
break;
case SCU_EVENT_LINK_FAILURE:
scu_link_layer_set_txcomsas_timeout(iphy, SCU_SAS_LINK_LAYER_TXCOMSAS_NEGTIME_DEFAULT);
+ /* fall through */
case SCU_EVENT_HARD_RESET_RECEIVED:
/* Start the oob/sn state machine over again */
sci_change_state(&iphy->sm, SCI_PHY_STARTING);
diff --git a/drivers/scsi/isci/remote_device.c b/drivers/scsi/isci/remote_device.c
index cc51f38b116d..9d29edb9f590 100644
--- a/drivers/scsi/isci/remote_device.c
+++ b/drivers/scsi/isci/remote_device.c
@@ -310,7 +310,7 @@ static void isci_remote_device_not_ready(struct isci_host *ihost,
/* Kill all outstanding requests for the device. */
sci_remote_device_terminate_requests(idev);
- /* Fall through into the default case... */
+ /* Fall through - into the default case... */
default:
clear_bit(IDEV_IO_READY, &idev->flags);
break;
@@ -593,7 +593,7 @@ enum sci_status sci_remote_device_event_handler(struct isci_remote_device *idev,
break;
}
- /* Else, fall through and treat as unhandled... */
+ /* fall through - and treat as unhandled... */
default:
dev_dbg(scirdev_to_dev(idev),
"%s: device: %p event code: %x: %s\n",
diff --git a/drivers/scsi/isci/remote_node_context.c b/drivers/scsi/isci/remote_node_context.c
index e3f2a5359d71..474a43460963 100644
--- a/drivers/scsi/isci/remote_node_context.c
+++ b/drivers/scsi/isci/remote_node_context.c
@@ -601,9 +601,9 @@ enum sci_status sci_remote_node_context_suspend(
__func__, sci_rnc);
return SCI_FAILURE_INVALID_STATE;
}
- /* Fall through and handle like SCI_RNC_POSTING */
+ /* Fall through - and handle like SCI_RNC_POSTING */
case SCI_RNC_RESUMING:
- /* Fall through and handle like SCI_RNC_POSTING */
+ /* Fall through - and handle like SCI_RNC_POSTING */
case SCI_RNC_POSTING:
/* Set the destination state to AWAIT - this signals the
* entry into the SCI_RNC_READY state that a suspension
diff --git a/drivers/scsi/isci/request.c b/drivers/scsi/isci/request.c
index 2f151708b59a..1b18cf55167e 100644
--- a/drivers/scsi/isci/request.c
+++ b/drivers/scsi/isci/request.c
@@ -894,7 +894,7 @@ sci_io_request_terminate(struct isci_request *ireq)
* and don't wait for the task response.
*/
sci_change_state(&ireq->sm, SCI_REQ_ABORTING);
- /* Fall through and handle like ABORTING... */
+ /* Fall through - and handle like ABORTING... */
case SCI_REQ_ABORTING:
if (!isci_remote_device_is_safe_to_abort(ireq->target_device))
set_bit(IREQ_PENDING_ABORT, &ireq->flags);
diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c
index 23354f206533..cae6368ebb98 100644
--- a/drivers/scsi/iscsi_tcp.c
+++ b/drivers/scsi/iscsi_tcp.c
@@ -44,6 +44,7 @@
#include <scsi/scsi_host.h>
#include <scsi/scsi.h>
#include <scsi/scsi_transport_iscsi.h>
+#include <trace/events/iscsi.h>
#include "iscsi_tcp.h"
@@ -72,6 +73,9 @@ MODULE_PARM_DESC(debug_iscsi_tcp, "Turn on debugging for iscsi_tcp module "
iscsi_conn_printk(KERN_INFO, _conn, \
"%s " dbg_fmt, \
__func__, ##arg); \
+ iscsi_dbg_trace(trace_iscsi_dbg_sw_tcp, \
+ &(_conn)->cls_conn->dev, \
+ "%s " dbg_fmt, __func__, ##arg);\
} while (0);
@@ -980,7 +984,7 @@ static struct scsi_host_template iscsi_sw_tcp_sht = {
.eh_abort_handler = iscsi_eh_abort,
.eh_device_reset_handler= iscsi_eh_device_reset,
.eh_target_reset_handler = iscsi_eh_recover_target,
- .use_clustering = DISABLE_CLUSTERING,
+ .dma_boundary = PAGE_SIZE - 1,
.slave_alloc = iscsi_sw_tcp_slave_alloc,
.slave_configure = iscsi_sw_tcp_slave_configure,
.target_alloc = iscsi_target_alloc,
diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c
index 1e1c0f1b9e69..9192a1d9dec6 100644
--- a/drivers/scsi/libfc/fc_rport.c
+++ b/drivers/scsi/libfc/fc_rport.c
@@ -860,7 +860,6 @@ static void fc_rport_enter_flogi(struct fc_rport_priv *rdata)
static void fc_rport_recv_flogi_req(struct fc_lport *lport,
struct fc_frame *rx_fp)
{
- struct fc_disc *disc;
struct fc_els_flogi *flp;
struct fc_rport_priv *rdata;
struct fc_frame *fp = rx_fp;
@@ -871,7 +870,6 @@ static void fc_rport_recv_flogi_req(struct fc_lport *lport,
FC_RPORT_ID_DBG(lport, sid, "Received FLOGI request\n");
- disc = &lport->disc;
if (!lport->point_to_multipoint) {
rjt_data.reason = ELS_RJT_UNSUP;
rjt_data.explan = ELS_EXPL_NONE;
@@ -1724,6 +1722,7 @@ static void fc_rport_recv_els_req(struct fc_lport *lport, struct fc_frame *fp)
kref_put(&rdata->kref, fc_rport_destroy);
goto busy;
}
+ /* fall through */
default:
FC_RPORT_DBG(rdata,
"Reject ELS 0x%02x while in state %s\n",
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index f78d2e5c1471..b8d325ce8754 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -40,6 +40,7 @@
#include <scsi/scsi_transport.h>
#include <scsi/scsi_transport_iscsi.h>
#include <scsi/libiscsi.h>
+#include <trace/events/iscsi.h>
static int iscsi_dbg_lib_conn;
module_param_named(debug_libiscsi_conn, iscsi_dbg_lib_conn, int,
@@ -68,6 +69,9 @@ MODULE_PARM_DESC(debug_libiscsi_eh,
iscsi_conn_printk(KERN_INFO, _conn, \
"%s " dbg_fmt, \
__func__, ##arg); \
+ iscsi_dbg_trace(trace_iscsi_dbg_conn, \
+ &(_conn)->cls_conn->dev, \
+ "%s " dbg_fmt, __func__, ##arg);\
} while (0);
#define ISCSI_DBG_SESSION(_session, dbg_fmt, arg...) \
@@ -76,6 +80,9 @@ MODULE_PARM_DESC(debug_libiscsi_eh,
iscsi_session_printk(KERN_INFO, _session, \
"%s " dbg_fmt, \
__func__, ##arg); \
+ iscsi_dbg_trace(trace_iscsi_dbg_session, \
+ &(_session)->cls_session->dev, \
+ "%s " dbg_fmt, __func__, ##arg); \
} while (0);
#define ISCSI_DBG_EH(_session, dbg_fmt, arg...) \
@@ -84,6 +91,9 @@ MODULE_PARM_DESC(debug_libiscsi_eh,
iscsi_session_printk(KERN_INFO, _session, \
"%s " dbg_fmt, \
__func__, ##arg); \
+ iscsi_dbg_trace(trace_iscsi_dbg_eh, \
+ &(_session)->cls_session->dev, \
+ "%s " dbg_fmt, __func__, ##arg); \
} while (0);
inline void iscsi_conn_queue_work(struct iscsi_conn *conn)
diff --git a/drivers/scsi/libiscsi_tcp.c b/drivers/scsi/libiscsi_tcp.c
index 4fcb9e65be57..8a6b1b3f8277 100644
--- a/drivers/scsi/libiscsi_tcp.c
+++ b/drivers/scsi/libiscsi_tcp.c
@@ -43,6 +43,7 @@
#include <scsi/scsi_host.h>
#include <scsi/scsi.h>
#include <scsi/scsi_transport_iscsi.h>
+#include <trace/events/iscsi.h>
#include "iscsi_tcp.h"
@@ -65,6 +66,9 @@ MODULE_PARM_DESC(debug_libiscsi_tcp, "Turn on debugging for libiscsi_tcp "
iscsi_conn_printk(KERN_INFO, _conn, \
"%s " dbg_fmt, \
__func__, ##arg); \
+ iscsi_dbg_trace(trace_iscsi_dbg_tcp, \
+ &(_conn)->cls_conn->dev, \
+ "%s " dbg_fmt, __func__, ##arg);\
} while (0);
static int iscsi_tcp_hdr_recv_done(struct iscsi_tcp_conn *tcp_conn,
diff --git a/drivers/scsi/libsas/Makefile b/drivers/scsi/libsas/Makefile
index 2e70140f70c3..5d51520c6f23 100644
--- a/drivers/scsi/libsas/Makefile
+++ b/drivers/scsi/libsas/Makefile
@@ -26,10 +26,11 @@ libsas-y += sas_init.o \
sas_phy.o \
sas_port.o \
sas_event.o \
- sas_dump.o \
sas_discover.o \
sas_expander.o \
sas_scsi_host.o \
sas_task.o
libsas-$(CONFIG_SCSI_SAS_ATA) += sas_ata.o
libsas-$(CONFIG_SCSI_SAS_HOST_SMP) += sas_host_smp.o
+
+ccflags-y := -DDEBUG
diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c
index 4f6cdf53e913..6f93fee2b21b 100644
--- a/drivers/scsi/libsas/sas_ata.c
+++ b/drivers/scsi/libsas/sas_ata.c
@@ -75,8 +75,8 @@ static enum ata_completion_errors sas_to_ata_err(struct task_status_struct *ts)
case SAS_OPEN_TO:
case SAS_OPEN_REJECT:
- SAS_DPRINTK("%s: Saw error %d. What to do?\n",
- __func__, ts->stat);
+ pr_warn("%s: Saw error %d. What to do?\n",
+ __func__, ts->stat);
return AC_ERR_OTHER;
case SAM_STAT_CHECK_CONDITION:
@@ -151,8 +151,7 @@ static void sas_ata_task_done(struct sas_task *task)
} else {
ac = sas_to_ata_err(stat);
if (ac) {
- SAS_DPRINTK("%s: SAS error %x\n", __func__,
- stat->stat);
+ pr_warn("%s: SAS error %x\n", __func__, stat->stat);
/* We saw a SAS error. Send a vague error. */
if (!link->sactive) {
qc->err_mask = ac;
@@ -237,7 +236,7 @@ static unsigned int sas_ata_qc_issue(struct ata_queued_cmd *qc)
ret = i->dft->lldd_execute_task(task, GFP_ATOMIC);
if (ret) {
- SAS_DPRINTK("lldd_execute_task returned: %d\n", ret);
+ pr_debug("lldd_execute_task returned: %d\n", ret);
if (qc->scsicmd)
ASSIGN_SAS_TASK(qc->scsicmd, NULL);
@@ -282,9 +281,9 @@ int sas_get_ata_info(struct domain_device *dev, struct ex_phy *phy)
res = sas_get_report_phy_sata(dev->parent, phy->phy_id,
&dev->sata_dev.rps_resp);
if (res) {
- SAS_DPRINTK("report phy sata to %016llx:0x%x returned "
- "0x%x\n", SAS_ADDR(dev->parent->sas_addr),
- phy->phy_id, res);
+ pr_debug("report phy sata to %016llx:0x%x returned 0x%x\n",
+ SAS_ADDR(dev->parent->sas_addr),
+ phy->phy_id, res);
return res;
}
memcpy(dev->frame_rcvd, &dev->sata_dev.rps_resp.rps.fis,
@@ -375,7 +374,7 @@ static int sas_ata_printk(const char *level, const struct domain_device *ddev,
vaf.fmt = fmt;
vaf.va = &args;
- r = printk("%ssas: ata%u: %s: %pV",
+ r = printk("%s" SAS_FMT "ata%u: %s: %pV",
level, ap->print_id, dev_name(dev), &vaf);
va_end(args);
@@ -431,8 +430,7 @@ static void sas_ata_internal_abort(struct sas_task *task)
if (task->task_state_flags & SAS_TASK_STATE_ABORTED ||
task->task_state_flags & SAS_TASK_STATE_DONE) {
spin_unlock_irqrestore(&task->task_state_lock, flags);
- SAS_DPRINTK("%s: Task %p already finished.\n", __func__,
- task);
+ pr_debug("%s: Task %p already finished.\n", __func__, task);
goto out;
}
task->task_state_flags |= SAS_TASK_STATE_ABORTED;
@@ -452,7 +450,7 @@ static void sas_ata_internal_abort(struct sas_task *task)
* aborted ata tasks, otherwise we (likely) leak the sas task
* here
*/
- SAS_DPRINTK("%s: Task %p leaked.\n", __func__, task);
+ pr_warn("%s: Task %p leaked.\n", __func__, task);
if (!(task->task_state_flags & SAS_TASK_STATE_DONE))
task->task_state_flags &= ~SAS_TASK_STATE_ABORTED;
@@ -558,7 +556,7 @@ int sas_ata_init(struct domain_device *found_dev)
ata_host = kzalloc(sizeof(*ata_host), GFP_KERNEL);
if (!ata_host) {
- SAS_DPRINTK("ata host alloc failed.\n");
+ pr_err("ata host alloc failed.\n");
return -ENOMEM;
}
@@ -566,7 +564,7 @@ int sas_ata_init(struct domain_device *found_dev)
ap = ata_sas_port_alloc(ata_host, &sata_port_info, shost);
if (!ap) {
- SAS_DPRINTK("ata_sas_port_alloc failed.\n");
+ pr_err("ata_sas_port_alloc failed.\n");
rc = -ENODEV;
goto free_host;
}
@@ -601,12 +599,7 @@ void sas_ata_task_abort(struct sas_task *task)
/* Bounce SCSI-initiated commands to the SCSI EH */
if (qc->scsicmd) {
- struct request_queue *q = qc->scsicmd->device->request_queue;
- unsigned long flags;
-
- spin_lock_irqsave(q->queue_lock, flags);
blk_abort_request(qc->scsicmd->request);
- spin_unlock_irqrestore(q->queue_lock, flags);
return;
}
diff --git a/drivers/scsi/libsas/sas_discover.c b/drivers/scsi/libsas/sas_discover.c
index dde433aa59c2..726ada9b8c79 100644
--- a/drivers/scsi/libsas/sas_discover.c
+++ b/drivers/scsi/libsas/sas_discover.c
@@ -128,7 +128,7 @@ static int sas_get_port_device(struct asd_sas_port *port)
SAS_FANOUT_EXPANDER_DEVICE);
break;
default:
- printk("ERROR: Unidentified device type %d\n", dev->dev_type);
+ pr_warn("ERROR: Unidentified device type %d\n", dev->dev_type);
rphy = NULL;
break;
}
@@ -186,10 +186,9 @@ int sas_notify_lldd_dev_found(struct domain_device *dev)
res = i->dft->lldd_dev_found(dev);
if (res) {
- printk("sas: driver on pcidev %s cannot handle "
- "device %llx, error:%d\n",
- dev_name(sas_ha->dev),
- SAS_ADDR(dev->sas_addr), res);
+ pr_warn("driver on host %s cannot handle device %llx, error:%d\n",
+ dev_name(sas_ha->dev),
+ SAS_ADDR(dev->sas_addr), res);
}
set_bit(SAS_DEV_FOUND, &dev->state);
kref_get(&dev->kref);
@@ -456,8 +455,8 @@ static void sas_discover_domain(struct work_struct *work)
return;
dev = port->port_dev;
- SAS_DPRINTK("DOING DISCOVERY on port %d, pid:%d\n", port->id,
- task_pid_nr(current));
+ pr_debug("DOING DISCOVERY on port %d, pid:%d\n", port->id,
+ task_pid_nr(current));
switch (dev->dev_type) {
case SAS_END_DEVICE:
@@ -473,12 +472,12 @@ static void sas_discover_domain(struct work_struct *work)
error = sas_discover_sata(dev);
break;
#else
- SAS_DPRINTK("ATA device seen but CONFIG_SCSI_SAS_ATA=N so cannot attach\n");
+ pr_notice("ATA device seen but CONFIG_SCSI_SAS_ATA=N so cannot attach\n");
/* Fall through */
#endif
default:
error = -ENXIO;
- SAS_DPRINTK("unhandled device %d\n", dev->dev_type);
+ pr_err("unhandled device %d\n", dev->dev_type);
break;
}
@@ -495,8 +494,8 @@ static void sas_discover_domain(struct work_struct *work)
sas_probe_devices(port);
- SAS_DPRINTK("DONE DISCOVERY on port %d, pid:%d, result:%d\n", port->id,
- task_pid_nr(current), error);
+ pr_debug("DONE DISCOVERY on port %d, pid:%d, result:%d\n", port->id,
+ task_pid_nr(current), error);
}
static void sas_revalidate_domain(struct work_struct *work)
@@ -510,22 +509,22 @@ static void sas_revalidate_domain(struct work_struct *work)
/* prevent revalidation from finding sata links in recovery */
mutex_lock(&ha->disco_mutex);
if (test_bit(SAS_HA_ATA_EH_ACTIVE, &ha->state)) {
- SAS_DPRINTK("REVALIDATION DEFERRED on port %d, pid:%d\n",
- port->id, task_pid_nr(current));
+ pr_debug("REVALIDATION DEFERRED on port %d, pid:%d\n",
+ port->id, task_pid_nr(current));
goto out;
}
clear_bit(DISCE_REVALIDATE_DOMAIN, &port->disc.pending);
- SAS_DPRINTK("REVALIDATING DOMAIN on port %d, pid:%d\n", port->id,
- task_pid_nr(current));
+ pr_debug("REVALIDATING DOMAIN on port %d, pid:%d\n", port->id,
+ task_pid_nr(current));
if (ddev && (ddev->dev_type == SAS_FANOUT_EXPANDER_DEVICE ||
ddev->dev_type == SAS_EDGE_EXPANDER_DEVICE))
res = sas_ex_revalidate_domain(ddev);
- SAS_DPRINTK("done REVALIDATING DOMAIN on port %d, pid:%d, res 0x%x\n",
- port->id, task_pid_nr(current), res);
+ pr_debug("done REVALIDATING DOMAIN on port %d, pid:%d, res 0x%x\n",
+ port->id, task_pid_nr(current), res);
out:
mutex_unlock(&ha->disco_mutex);
diff --git a/drivers/scsi/libsas/sas_dump.c b/drivers/scsi/libsas/sas_dump.c
deleted file mode 100644
index 7e5d262e7a7d..000000000000
--- a/drivers/scsi/libsas/sas_dump.c
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Serial Attached SCSI (SAS) Dump/Debugging routines
- *
- * Copyright (C) 2005 Adaptec, Inc. All rights reserved.
- * Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
- *
- * This file is licensed under GPLv2.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of the
- * License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- *
- */
-
-#include "sas_dump.h"
-
-static const char *sas_porte_str[] = {
- [0] = "PORTE_BYTES_DMAED",
- [1] = "PORTE_BROADCAST_RCVD",
- [2] = "PORTE_LINK_RESET_ERR",
- [3] = "PORTE_TIMER_EVENT",
- [4] = "PORTE_HARD_RESET",
-};
-
-static const char *sas_phye_str[] = {
- [0] = "PHYE_LOSS_OF_SIGNAL",
- [1] = "PHYE_OOB_DONE",
- [2] = "PHYE_OOB_ERROR",
- [3] = "PHYE_SPINUP_HOLD",
- [4] = "PHYE_RESUME_TIMEOUT",
-};
-
-void sas_dprint_porte(int phyid, enum port_event pe)
-{
- SAS_DPRINTK("phy%d: port event: %s\n", phyid, sas_porte_str[pe]);
-}
-void sas_dprint_phye(int phyid, enum phy_event pe)
-{
- SAS_DPRINTK("phy%d: phy event: %s\n", phyid, sas_phye_str[pe]);
-}
-
-void sas_dump_port(struct asd_sas_port *port)
-{
- SAS_DPRINTK("port%d: class:0x%x\n", port->id, port->class);
- SAS_DPRINTK("port%d: sas_addr:%llx\n", port->id,
- SAS_ADDR(port->sas_addr));
- SAS_DPRINTK("port%d: attached_sas_addr:%llx\n", port->id,
- SAS_ADDR(port->attached_sas_addr));
- SAS_DPRINTK("port%d: iproto:0x%x\n", port->id, port->iproto);
- SAS_DPRINTK("port%d: tproto:0x%x\n", port->id, port->tproto);
- SAS_DPRINTK("port%d: oob_mode:0x%x\n", port->id, port->oob_mode);
- SAS_DPRINTK("port%d: num_phys:%d\n", port->id, port->num_phys);
-}
diff --git a/drivers/scsi/libsas/sas_dump.h b/drivers/scsi/libsas/sas_dump.h
deleted file mode 100644
index 6aaee6b0fcdb..000000000000
--- a/drivers/scsi/libsas/sas_dump.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Serial Attached SCSI (SAS) Dump/Debugging routines header file
- *
- * Copyright (C) 2005 Adaptec, Inc. All rights reserved.
- * Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
- *
- * This file is licensed under GPLv2.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of the
- * License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- *
- */
-
-#include "sas_internal.h"
-
-void sas_dprint_porte(int phyid, enum port_event pe);
-void sas_dprint_phye(int phyid, enum phy_event pe);
-void sas_dump_port(struct asd_sas_port *port);
diff --git a/drivers/scsi/libsas/sas_event.c b/drivers/scsi/libsas/sas_event.c
index ae923eb6de95..b1e0f7d2b396 100644
--- a/drivers/scsi/libsas/sas_event.c
+++ b/drivers/scsi/libsas/sas_event.c
@@ -25,7 +25,6 @@
#include <linux/export.h>
#include <scsi/scsi_host.h>
#include "sas_internal.h"
-#include "sas_dump.h"
int sas_queue_work(struct sas_ha_struct *ha, struct sas_work *sw)
{
diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c
index 0d1f72752ca2..17eb4185f29d 100644
--- a/drivers/scsi/libsas/sas_expander.c
+++ b/drivers/scsi/libsas/sas_expander.c
@@ -99,17 +99,17 @@ static int smp_execute_task_sg(struct domain_device *dev,
if (res) {
del_timer(&task->slow_task->timer);
- SAS_DPRINTK("executing SMP task failed:%d\n", res);
+ pr_notice("executing SMP task failed:%d\n", res);
break;
}
wait_for_completion(&task->slow_task->completion);
res = -ECOMM;
if ((task->task_state_flags & SAS_TASK_STATE_ABORTED)) {
- SAS_DPRINTK("smp task timed out or aborted\n");
+ pr_notice("smp task timed out or aborted\n");
i->dft->lldd_abort_task(task);
if (!(task->task_state_flags & SAS_TASK_STATE_DONE)) {
- SAS_DPRINTK("SMP task aborted and not done\n");
+ pr_notice("SMP task aborted and not done\n");
break;
}
}
@@ -134,11 +134,11 @@ static int smp_execute_task_sg(struct domain_device *dev,
task->task_status.stat == SAS_DEVICE_UNKNOWN)
break;
else {
- SAS_DPRINTK("%s: task to dev %016llx response: 0x%x "
- "status 0x%x\n", __func__,
- SAS_ADDR(dev->sas_addr),
- task->task_status.resp,
- task->task_status.stat);
+ pr_notice("%s: task to dev %016llx response: 0x%x status 0x%x\n",
+ __func__,
+ SAS_ADDR(dev->sas_addr),
+ task->task_status.resp,
+ task->task_status.stat);
sas_free_task(task);
task = NULL;
}
@@ -347,11 +347,11 @@ static void sas_set_ex_phy(struct domain_device *dev, int phy_id, void *rsp)
if (test_bit(SAS_HA_ATA_EH_ACTIVE, &ha->state))
set_bit(DISCE_REVALIDATE_DOMAIN, &dev->port->disc.pending);
- SAS_DPRINTK("%sex %016llx phy%02d:%c:%X attached: %016llx (%s)\n",
- test_bit(SAS_HA_ATA_EH_ACTIVE, &ha->state) ? "ata: " : "",
- SAS_ADDR(dev->sas_addr), phy->phy_id,
- sas_route_char(dev, phy), phy->linkrate,
- SAS_ADDR(phy->attached_sas_addr), type);
+ pr_debug("%sex %016llx phy%02d:%c:%X attached: %016llx (%s)\n",
+ test_bit(SAS_HA_ATA_EH_ACTIVE, &ha->state) ? "ata: " : "",
+ SAS_ADDR(dev->sas_addr), phy->phy_id,
+ sas_route_char(dev, phy), phy->linkrate,
+ SAS_ADDR(phy->attached_sas_addr), type);
}
/* check if we have an existing attached ata device on this expander phy */
@@ -393,7 +393,7 @@ static int sas_ex_phy_discover_helper(struct domain_device *dev, u8 *disc_req,
return res;
dr = &((struct smp_resp *)disc_resp)->disc;
if (memcmp(dev->sas_addr, dr->attached_sas_addr, SAS_ADDR_SIZE) == 0) {
- sas_printk("Found loopback topology, just ignore it!\n");
+ pr_notice("Found loopback topology, just ignore it!\n");
return 0;
}
sas_set_ex_phy(dev, single, disc_resp);
@@ -500,12 +500,12 @@ static int sas_ex_general(struct domain_device *dev)
RG_RESP_SIZE);
if (res) {
- SAS_DPRINTK("RG to ex %016llx failed:0x%x\n",
- SAS_ADDR(dev->sas_addr), res);
+ pr_notice("RG to ex %016llx failed:0x%x\n",
+ SAS_ADDR(dev->sas_addr), res);
goto out;
} else if (rg_resp->result != SMP_RESP_FUNC_ACC) {
- SAS_DPRINTK("RG:ex %016llx returned SMP result:0x%x\n",
- SAS_ADDR(dev->sas_addr), rg_resp->result);
+ pr_debug("RG:ex %016llx returned SMP result:0x%x\n",
+ SAS_ADDR(dev->sas_addr), rg_resp->result);
res = rg_resp->result;
goto out;
}
@@ -513,8 +513,8 @@ static int sas_ex_general(struct domain_device *dev)
ex_assign_report_general(dev, rg_resp);
if (dev->ex_dev.configuring) {
- SAS_DPRINTK("RG: ex %llx self-configuring...\n",
- SAS_ADDR(dev->sas_addr));
+ pr_debug("RG: ex %llx self-configuring...\n",
+ SAS_ADDR(dev->sas_addr));
schedule_timeout_interruptible(5*HZ);
} else
break;
@@ -568,12 +568,12 @@ static int sas_ex_manuf_info(struct domain_device *dev)
res = smp_execute_task(dev, mi_req, MI_REQ_SIZE, mi_resp,MI_RESP_SIZE);
if (res) {
- SAS_DPRINTK("MI: ex %016llx failed:0x%x\n",
- SAS_ADDR(dev->sas_addr), res);
+ pr_notice("MI: ex %016llx failed:0x%x\n",
+ SAS_ADDR(dev->sas_addr), res);
goto out;
} else if (mi_resp[2] != SMP_RESP_FUNC_ACC) {
- SAS_DPRINTK("MI ex %016llx returned SMP result:0x%x\n",
- SAS_ADDR(dev->sas_addr), mi_resp[2]);
+ pr_debug("MI ex %016llx returned SMP result:0x%x\n",
+ SAS_ADDR(dev->sas_addr), mi_resp[2]);
goto out;
}
@@ -836,10 +836,9 @@ static struct domain_device *sas_ex_discover_end_dev(
res = sas_discover_sata(child);
if (res) {
- SAS_DPRINTK("sas_discover_sata() for device %16llx at "
- "%016llx:0x%x returned 0x%x\n",
- SAS_ADDR(child->sas_addr),
- SAS_ADDR(parent->sas_addr), phy_id, res);
+ pr_notice("sas_discover_sata() for device %16llx at %016llx:0x%x returned 0x%x\n",
+ SAS_ADDR(child->sas_addr),
+ SAS_ADDR(parent->sas_addr), phy_id, res);
goto out_list_del;
}
} else
@@ -861,16 +860,15 @@ static struct domain_device *sas_ex_discover_end_dev(
res = sas_discover_end_dev(child);
if (res) {
- SAS_DPRINTK("sas_discover_end_dev() for device %16llx "
- "at %016llx:0x%x returned 0x%x\n",
- SAS_ADDR(child->sas_addr),
- SAS_ADDR(parent->sas_addr), phy_id, res);
+ pr_notice("sas_discover_end_dev() for device %16llx at %016llx:0x%x returned 0x%x\n",
+ SAS_ADDR(child->sas_addr),
+ SAS_ADDR(parent->sas_addr), phy_id, res);
goto out_list_del;
}
} else {
- SAS_DPRINTK("target proto 0x%x at %016llx:0x%x not handled\n",
- phy->attached_tproto, SAS_ADDR(parent->sas_addr),
- phy_id);
+ pr_notice("target proto 0x%x at %016llx:0x%x not handled\n",
+ phy->attached_tproto, SAS_ADDR(parent->sas_addr),
+ phy_id);
goto out_free;
}
@@ -927,11 +925,10 @@ static struct domain_device *sas_ex_discover_expander(
int res;
if (phy->routing_attr == DIRECT_ROUTING) {
- SAS_DPRINTK("ex %016llx:0x%x:D <--> ex %016llx:0x%x is not "
- "allowed\n",
- SAS_ADDR(parent->sas_addr), phy_id,
- SAS_ADDR(phy->attached_sas_addr),
- phy->attached_phy_id);
+ pr_warn("ex %016llx:0x%x:D <--> ex %016llx:0x%x is not allowed\n",
+ SAS_ADDR(parent->sas_addr), phy_id,
+ SAS_ADDR(phy->attached_sas_addr),
+ phy->attached_phy_id);
return NULL;
}
child = sas_alloc_device();
@@ -1038,25 +1035,24 @@ static int sas_ex_discover_dev(struct domain_device *dev, int phy_id)
ex_phy->attached_dev_type != SAS_FANOUT_EXPANDER_DEVICE &&
ex_phy->attached_dev_type != SAS_EDGE_EXPANDER_DEVICE &&
ex_phy->attached_dev_type != SAS_SATA_PENDING) {
- SAS_DPRINTK("unknown device type(0x%x) attached to ex %016llx "
- "phy 0x%x\n", ex_phy->attached_dev_type,
- SAS_ADDR(dev->sas_addr),
- phy_id);
+ pr_warn("unknown device type(0x%x) attached to ex %016llx phy 0x%x\n",
+ ex_phy->attached_dev_type,
+ SAS_ADDR(dev->sas_addr),
+ phy_id);
return 0;
}
res = sas_configure_routing(dev, ex_phy->attached_sas_addr);
if (res) {
- SAS_DPRINTK("configure routing for dev %016llx "
- "reported 0x%x. Forgotten\n",
- SAS_ADDR(ex_phy->attached_sas_addr), res);
+ pr_notice("configure routing for dev %016llx reported 0x%x. Forgotten\n",
+ SAS_ADDR(ex_phy->attached_sas_addr), res);
sas_disable_routing(dev, ex_phy->attached_sas_addr);
return res;
}
if (sas_ex_join_wide_port(dev, phy_id)) {
- SAS_DPRINTK("Attaching ex phy%d to wide port %016llx\n",
- phy_id, SAS_ADDR(ex_phy->attached_sas_addr));
+ pr_debug("Attaching ex phy%d to wide port %016llx\n",
+ phy_id, SAS_ADDR(ex_phy->attached_sas_addr));
return res;
}
@@ -1067,12 +1063,11 @@ static int sas_ex_discover_dev(struct domain_device *dev, int phy_id)
break;
case SAS_FANOUT_EXPANDER_DEVICE:
if (SAS_ADDR(dev->port->disc.fanout_sas_addr)) {
- SAS_DPRINTK("second fanout expander %016llx phy 0x%x "
- "attached to ex %016llx phy 0x%x\n",
- SAS_ADDR(ex_phy->attached_sas_addr),
- ex_phy->attached_phy_id,
- SAS_ADDR(dev->sas_addr),
- phy_id);
+ pr_debug("second fanout expander %016llx phy 0x%x attached to ex %016llx phy 0x%x\n",
+ SAS_ADDR(ex_phy->attached_sas_addr),
+ ex_phy->attached_phy_id,
+ SAS_ADDR(dev->sas_addr),
+ phy_id);
sas_ex_disable_phy(dev, phy_id);
break;
} else
@@ -1101,9 +1096,8 @@ static int sas_ex_discover_dev(struct domain_device *dev, int phy_id)
SAS_ADDR(child->sas_addr)) {
ex->ex_phy[i].phy_state= PHY_DEVICE_DISCOVERED;
if (sas_ex_join_wide_port(dev, i))
- SAS_DPRINTK("Attaching ex phy%d to wide port %016llx\n",
- i, SAS_ADDR(ex->ex_phy[i].attached_sas_addr));
-
+ pr_debug("Attaching ex phy%d to wide port %016llx\n",
+ i, SAS_ADDR(ex->ex_phy[i].attached_sas_addr));
}
}
}
@@ -1154,13 +1148,11 @@ static int sas_check_level_subtractive_boundary(struct domain_device *dev)
if (sas_find_sub_addr(child, s2) &&
(SAS_ADDR(sub_addr) != SAS_ADDR(s2))) {
- SAS_DPRINTK("ex %016llx->%016llx-?->%016llx "
- "diverges from subtractive "
- "boundary %016llx\n",
- SAS_ADDR(dev->sas_addr),
- SAS_ADDR(child->sas_addr),
- SAS_ADDR(s2),
- SAS_ADDR(sub_addr));
+ pr_notice("ex %016llx->%016llx-?->%016llx diverges from subtractive boundary %016llx\n",
+ SAS_ADDR(dev->sas_addr),
+ SAS_ADDR(child->sas_addr),
+ SAS_ADDR(s2),
+ SAS_ADDR(sub_addr));
sas_ex_disable_port(child, s2);
}
@@ -1239,12 +1231,10 @@ static int sas_check_ex_subtractive_boundary(struct domain_device *dev)
else if (SAS_ADDR(sub_sas_addr) !=
SAS_ADDR(phy->attached_sas_addr)) {
- SAS_DPRINTK("ex %016llx phy 0x%x "
- "diverges(%016llx) on subtractive "
- "boundary(%016llx). Disabled\n",
- SAS_ADDR(dev->sas_addr), i,
- SAS_ADDR(phy->attached_sas_addr),
- SAS_ADDR(sub_sas_addr));
+ pr_notice("ex %016llx phy 0x%x diverges(%016llx) on subtractive boundary(%016llx). Disabled\n",
+ SAS_ADDR(dev->sas_addr), i,
+ SAS_ADDR(phy->attached_sas_addr),
+ SAS_ADDR(sub_sas_addr));
sas_ex_disable_phy(dev, i);
}
}
@@ -1262,19 +1252,17 @@ static void sas_print_parent_topology_bug(struct domain_device *child,
};
struct domain_device *parent = child->parent;
- sas_printk("%s ex %016llx phy 0x%x <--> %s ex %016llx "
- "phy 0x%x has %c:%c routing link!\n",
-
- ex_type[parent->dev_type],
- SAS_ADDR(parent->sas_addr),
- parent_phy->phy_id,
+ pr_notice("%s ex %016llx phy 0x%x <--> %s ex %016llx phy 0x%x has %c:%c routing link!\n",
+ ex_type[parent->dev_type],
+ SAS_ADDR(parent->sas_addr),
+ parent_phy->phy_id,
- ex_type[child->dev_type],
- SAS_ADDR(child->sas_addr),
- child_phy->phy_id,
+ ex_type[child->dev_type],
+ SAS_ADDR(child->sas_addr),
+ child_phy->phy_id,
- sas_route_char(parent, parent_phy),
- sas_route_char(child, child_phy));
+ sas_route_char(parent, parent_phy),
+ sas_route_char(child, child_phy));
}
static int sas_check_eeds(struct domain_device *child,
@@ -1286,13 +1274,12 @@ static int sas_check_eeds(struct domain_device *child,
if (SAS_ADDR(parent->port->disc.fanout_sas_addr) != 0) {
res = -ENODEV;
- SAS_DPRINTK("edge ex %016llx phy S:0x%x <--> edge ex %016llx "
- "phy S:0x%x, while there is a fanout ex %016llx\n",
- SAS_ADDR(parent->sas_addr),
- parent_phy->phy_id,
- SAS_ADDR(child->sas_addr),
- child_phy->phy_id,
- SAS_ADDR(parent->port->disc.fanout_sas_addr));
+ pr_warn("edge ex %016llx phy S:0x%x <--> edge ex %016llx phy S:0x%x, while there is a fanout ex %016llx\n",
+ SAS_ADDR(parent->sas_addr),
+ parent_phy->phy_id,
+ SAS_ADDR(child->sas_addr),
+ child_phy->phy_id,
+ SAS_ADDR(parent->port->disc.fanout_sas_addr));
} else if (SAS_ADDR(parent->port->disc.eeds_a) == 0) {
memcpy(parent->port->disc.eeds_a, parent->sas_addr,
SAS_ADDR_SIZE);
@@ -1310,12 +1297,11 @@ static int sas_check_eeds(struct domain_device *child,
;
else {
res = -ENODEV;
- SAS_DPRINTK("edge ex %016llx phy 0x%x <--> edge ex %016llx "
- "phy 0x%x link forms a third EEDS!\n",
- SAS_ADDR(parent->sas_addr),
- parent_phy->phy_id,
- SAS_ADDR(child->sas_addr),
- child_phy->phy_id);
+ pr_warn("edge ex %016llx phy 0x%x <--> edge ex %016llx phy 0x%x link forms a third EEDS!\n",
+ SAS_ADDR(parent->sas_addr),
+ parent_phy->phy_id,
+ SAS_ADDR(child->sas_addr),
+ child_phy->phy_id);
}
return res;
@@ -1429,14 +1415,13 @@ static int sas_configure_present(struct domain_device *dev, int phy_id,
goto out;
res = rri_resp[2];
if (res == SMP_RESP_NO_INDEX) {
- SAS_DPRINTK("overflow of indexes: dev %016llx "
- "phy 0x%x index 0x%x\n",
- SAS_ADDR(dev->sas_addr), phy_id, i);
+ pr_warn("overflow of indexes: dev %016llx phy 0x%x index 0x%x\n",
+ SAS_ADDR(dev->sas_addr), phy_id, i);
goto out;
} else if (res != SMP_RESP_FUNC_ACC) {
- SAS_DPRINTK("%s: dev %016llx phy 0x%x index 0x%x "
- "result 0x%x\n", __func__,
- SAS_ADDR(dev->sas_addr), phy_id, i, res);
+ pr_notice("%s: dev %016llx phy 0x%x index 0x%x result 0x%x\n",
+ __func__, SAS_ADDR(dev->sas_addr), phy_id,
+ i, res);
goto out;
}
if (SAS_ADDR(sas_addr) != 0) {
@@ -1500,9 +1485,8 @@ static int sas_configure_set(struct domain_device *dev, int phy_id,
goto out;
res = cri_resp[2];
if (res == SMP_RESP_NO_INDEX) {
- SAS_DPRINTK("overflow of indexes: dev %016llx phy 0x%x "
- "index 0x%x\n",
- SAS_ADDR(dev->sas_addr), phy_id, index);
+ pr_warn("overflow of indexes: dev %016llx phy 0x%x index 0x%x\n",
+ SAS_ADDR(dev->sas_addr), phy_id, index);
}
out:
kfree(cri_req);
@@ -1549,8 +1533,8 @@ static int sas_configure_parent(struct domain_device *parent,
}
if (ex_parent->conf_route_table == 0) {
- SAS_DPRINTK("ex %016llx has self-configuring routing table\n",
- SAS_ADDR(parent->sas_addr));
+ pr_debug("ex %016llx has self-configuring routing table\n",
+ SAS_ADDR(parent->sas_addr));
return 0;
}
@@ -1611,8 +1595,8 @@ static int sas_discover_expander(struct domain_device *dev)
res = sas_expander_discover(dev);
if (res) {
- SAS_DPRINTK("expander %016llx discovery failed(0x%x)\n",
- SAS_ADDR(dev->sas_addr), res);
+ pr_warn("expander %016llx discovery failed(0x%x)\n",
+ SAS_ADDR(dev->sas_addr), res);
goto out_err;
}
@@ -1856,10 +1840,10 @@ static int sas_find_bcast_dev(struct domain_device *dev,
if (phy_id != -1) {
*src_dev = dev;
ex->ex_change_count = ex_change_count;
- SAS_DPRINTK("Expander phy change count has changed\n");
+ pr_info("Expander phy change count has changed\n");
return res;
} else
- SAS_DPRINTK("Expander phys DID NOT change\n");
+ pr_info("Expander phys DID NOT change\n");
}
list_for_each_entry(ch, &ex->children, siblings) {
if (ch->dev_type == SAS_EDGE_EXPANDER_DEVICE || ch->dev_type == SAS_FANOUT_EXPANDER_DEVICE) {
@@ -1969,8 +1953,8 @@ static int sas_discover_new(struct domain_device *dev, int phy_id)
struct domain_device *child;
int res;
- SAS_DPRINTK("ex %016llx phy%d new device attached\n",
- SAS_ADDR(dev->sas_addr), phy_id);
+ pr_debug("ex %016llx phy%d new device attached\n",
+ SAS_ADDR(dev->sas_addr), phy_id);
res = sas_ex_phy_discover(dev, phy_id);
if (res)
return res;
@@ -2048,15 +2032,15 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id, bool last)
if (ata_dev && phy->attached_dev_type == SAS_SATA_PENDING)
action = ", needs recovery";
- SAS_DPRINTK("ex %016llx phy 0x%x broadcast flutter%s\n",
- SAS_ADDR(dev->sas_addr), phy_id, action);
+ pr_debug("ex %016llx phy 0x%x broadcast flutter%s\n",
+ SAS_ADDR(dev->sas_addr), phy_id, action);
return res;
}
/* we always have to delete the old device when we went here */
- SAS_DPRINTK("ex %016llx phy 0x%x replace %016llx\n",
- SAS_ADDR(dev->sas_addr), phy_id,
- SAS_ADDR(phy->attached_sas_addr));
+ pr_info("ex %016llx phy 0x%x replace %016llx\n",
+ SAS_ADDR(dev->sas_addr), phy_id,
+ SAS_ADDR(phy->attached_sas_addr));
sas_unregister_devs_sas_addr(dev, phy_id, last);
return sas_discover_new(dev, phy_id);
@@ -2084,8 +2068,8 @@ static int sas_rediscover(struct domain_device *dev, const int phy_id)
int i;
bool last = true; /* is this the last phy of the port */
- SAS_DPRINTK("ex %016llx phy%d originated BROADCAST(CHANGE)\n",
- SAS_ADDR(dev->sas_addr), phy_id);
+ pr_debug("ex %016llx phy%d originated BROADCAST(CHANGE)\n",
+ SAS_ADDR(dev->sas_addr), phy_id);
if (SAS_ADDR(changed_phy->attached_sas_addr) != 0) {
for (i = 0; i < ex->num_phys; i++) {
@@ -2095,8 +2079,8 @@ static int sas_rediscover(struct domain_device *dev, const int phy_id)
continue;
if (SAS_ADDR(phy->attached_sas_addr) ==
SAS_ADDR(changed_phy->attached_sas_addr)) {
- SAS_DPRINTK("phy%d part of wide port with "
- "phy%d\n", phy_id, i);
+ pr_debug("phy%d part of wide port with phy%d\n",
+ phy_id, i);
last = false;
break;
}
@@ -2154,23 +2138,23 @@ void sas_smp_handler(struct bsg_job *job, struct Scsi_Host *shost,
case SAS_FANOUT_EXPANDER_DEVICE:
break;
default:
- printk("%s: can we send a smp request to a device?\n",
+ pr_err("%s: can we send a smp request to a device?\n",
__func__);
goto out;
}
dev = sas_find_dev_by_rphy(rphy);
if (!dev) {
- printk("%s: fail to find a domain_device?\n", __func__);
+ pr_err("%s: fail to find a domain_device?\n", __func__);
goto out;
}
/* do we need to support multiple segments? */
if (job->request_payload.sg_cnt > 1 ||
job->reply_payload.sg_cnt > 1) {
- printk("%s: multiple segments req %u, rsp %u\n",
- __func__, job->request_payload.payload_len,
- job->reply_payload.payload_len);
+ pr_info("%s: multiple segments req %u, rsp %u\n",
+ __func__, job->request_payload.payload_len,
+ job->reply_payload.payload_len);
goto out;
}
diff --git a/drivers/scsi/libsas/sas_init.c b/drivers/scsi/libsas/sas_init.c
index ede0af78144f..221340ee8651 100644
--- a/drivers/scsi/libsas/sas_init.c
+++ b/drivers/scsi/libsas/sas_init.c
@@ -128,19 +128,19 @@ int sas_register_ha(struct sas_ha_struct *sas_ha)
error = sas_register_phys(sas_ha);
if (error) {
- printk(KERN_NOTICE "couldn't register sas phys:%d\n", error);
+ pr_notice("couldn't register sas phys:%d\n", error);
return error;
}
error = sas_register_ports(sas_ha);
if (error) {
- printk(KERN_NOTICE "couldn't register sas ports:%d\n", error);
+ pr_notice("couldn't register sas ports:%d\n", error);
goto Undo_phys;
}
error = sas_init_events(sas_ha);
if (error) {
- printk(KERN_NOTICE "couldn't start event thread:%d\n", error);
+ pr_notice("couldn't start event thread:%d\n", error);
goto Undo_ports;
}
@@ -623,8 +623,8 @@ struct asd_sas_event *sas_alloc_event(struct asd_sas_phy *phy)
if (atomic_read(&phy->event_nr) > phy->ha->event_thres) {
if (i->dft->lldd_control_phy) {
if (cmpxchg(&phy->in_shutdown, 0, 1) == 0) {
- sas_printk("The phy%02d bursting events, shut it down.\n",
- phy->id);
+ pr_notice("The phy%02d bursting events, shut it down.\n",
+ phy->id);
sas_notify_phy_event(phy, PHYE_SHUTDOWN);
}
} else {
diff --git a/drivers/scsi/libsas/sas_internal.h b/drivers/scsi/libsas/sas_internal.h
index 50e12d662ffe..2cdb981cf476 100644
--- a/drivers/scsi/libsas/sas_internal.h
+++ b/drivers/scsi/libsas/sas_internal.h
@@ -32,9 +32,13 @@
#include <scsi/libsas.h>
#include <scsi/sas_ata.h>
-#define sas_printk(fmt, ...) printk(KERN_NOTICE "sas: " fmt, ## __VA_ARGS__)
+#ifdef pr_fmt
+#undef pr_fmt
+#endif
+
+#define SAS_FMT "sas: "
-#define SAS_DPRINTK(fmt, ...) printk(KERN_DEBUG "sas: " fmt, ## __VA_ARGS__)
+#define pr_fmt(fmt) SAS_FMT fmt
#define TO_SAS_TASK(_scsi_cmd) ((void *)(_scsi_cmd)->host_scribble)
#define ASSIGN_SAS_TASK(_sc, _t) do { (_sc)->host_scribble = (void *) _t; } while (0)
@@ -120,10 +124,10 @@ static inline void sas_smp_host_handler(struct bsg_job *job,
static inline void sas_fail_probe(struct domain_device *dev, const char *func, int err)
{
- SAS_DPRINTK("%s: for %s device %16llx returned %d\n",
- func, dev->parent ? "exp-attached" :
- "direct-attached",
- SAS_ADDR(dev->sas_addr), err);
+ pr_warn("%s: for %s device %16llx returned %d\n",
+ func, dev->parent ? "exp-attached" :
+ "direct-attached",
+ SAS_ADDR(dev->sas_addr), err);
sas_unregister_dev(dev->port, dev);
}
diff --git a/drivers/scsi/libsas/sas_phy.c b/drivers/scsi/libsas/sas_phy.c
index bf3e1b979ca6..0374243c85d0 100644
--- a/drivers/scsi/libsas/sas_phy.c
+++ b/drivers/scsi/libsas/sas_phy.c
@@ -122,11 +122,11 @@ static void sas_phye_shutdown(struct work_struct *work)
phy->enabled = 0;
ret = i->dft->lldd_control_phy(phy, PHY_FUNC_DISABLE, NULL);
if (ret)
- sas_printk("lldd disable phy%02d returned %d\n",
- phy->id, ret);
+ pr_notice("lldd disable phy%02d returned %d\n",
+ phy->id, ret);
} else
- sas_printk("phy%02d is not enabled, cannot shutdown\n",
- phy->id);
+ pr_notice("phy%02d is not enabled, cannot shutdown\n",
+ phy->id);
}
/* ---------- Phy class registration ---------- */
diff --git a/drivers/scsi/libsas/sas_port.c b/drivers/scsi/libsas/sas_port.c
index fad23dd39114..03fe479359b6 100644
--- a/drivers/scsi/libsas/sas_port.c
+++ b/drivers/scsi/libsas/sas_port.c
@@ -110,9 +110,9 @@ static void sas_form_port(struct asd_sas_phy *phy)
wake_up(&sas_ha->eh_wait_q);
return;
} else {
- SAS_DPRINTK("%s: phy%d belongs to port%d already(%d)!\n",
- __func__, phy->id, phy->port->id,
- phy->port->num_phys);
+ pr_info("%s: phy%d belongs to port%d already(%d)!\n",
+ __func__, phy->id, phy->port->id,
+ phy->port->num_phys);
return;
}
}
@@ -125,8 +125,8 @@ static void sas_form_port(struct asd_sas_phy *phy)
if (*(u64 *) port->sas_addr &&
phy_is_wideport_member(port, phy) && port->num_phys > 0) {
/* wide port */
- SAS_DPRINTK("phy%d matched wide port%d\n", phy->id,
- port->id);
+ pr_debug("phy%d matched wide port%d\n", phy->id,
+ port->id);
break;
}
spin_unlock(&port->phy_list_lock);
@@ -147,8 +147,7 @@ static void sas_form_port(struct asd_sas_phy *phy)
}
if (i >= sas_ha->num_phys) {
- printk(KERN_NOTICE "%s: couldn't find a free port, bug?\n",
- __func__);
+ pr_err("%s: couldn't find a free port, bug?\n", __func__);
spin_unlock_irqrestore(&sas_ha->phy_port_lock, flags);
return;
}
@@ -180,10 +179,10 @@ static void sas_form_port(struct asd_sas_phy *phy)
}
sas_port_add_phy(port->port, phy->phy);
- SAS_DPRINTK("%s added to %s, phy_mask:0x%x (%16llx)\n",
- dev_name(&phy->phy->dev), dev_name(&port->port->dev),
- port->phy_mask,
- SAS_ADDR(port->attached_sas_addr));
+ pr_debug("%s added to %s, phy_mask:0x%x (%16llx)\n",
+ dev_name(&phy->phy->dev), dev_name(&port->port->dev),
+ port->phy_mask,
+ SAS_ADDR(port->attached_sas_addr));
if (port->port_dev)
port->port_dev->pathways = port->num_phys;
@@ -279,7 +278,7 @@ void sas_porte_broadcast_rcvd(struct work_struct *work)
prim = phy->sas_prim;
spin_unlock_irqrestore(&phy->sas_prim_lock, flags);
- SAS_DPRINTK("broadcast received: %d\n", prim);
+ pr_debug("broadcast received: %d\n", prim);
sas_discover_event(phy->port, DISCE_REVALIDATE_DOMAIN);
if (phy->port)
diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c
index 33229348dcb6..c43a00a9d819 100644
--- a/drivers/scsi/libsas/sas_scsi_host.c
+++ b/drivers/scsi/libsas/sas_scsi_host.c
@@ -93,9 +93,8 @@ static void sas_end_task(struct scsi_cmnd *sc, struct sas_task *task)
hs = DID_ERROR;
break;
case SAS_PROTO_RESPONSE:
- SAS_DPRINTK("LLDD:%s sent SAS_PROTO_RESP for an SSP "
- "task; please report this\n",
- task->dev->port->ha->sas_ha_name);
+ pr_notice("LLDD:%s sent SAS_PROTO_RESP for an SSP task; please report this\n",
+ task->dev->port->ha->sas_ha_name);
break;
case SAS_ABORTED_TASK:
hs = DID_ABORT;
@@ -132,12 +131,12 @@ static void sas_scsi_task_done(struct sas_task *task)
if (unlikely(!task)) {
/* task will be completed by the error handler */
- SAS_DPRINTK("task done but aborted\n");
+ pr_debug("task done but aborted\n");
return;
}
if (unlikely(!sc)) {
- SAS_DPRINTK("task_done called with non existing SCSI cmnd!\n");
+ pr_debug("task_done called with non existing SCSI cmnd!\n");
sas_free_task(task);
return;
}
@@ -208,7 +207,7 @@ int sas_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
return 0;
out_free_task:
- SAS_DPRINTK("lldd_execute_task returned: %d\n", res);
+ pr_debug("lldd_execute_task returned: %d\n", res);
ASSIGN_SAS_TASK(cmd, NULL);
sas_free_task(task);
if (res == -SAS_QUEUE_FULL)
@@ -301,40 +300,38 @@ static enum task_disposition sas_scsi_find_task(struct sas_task *task)
to_sas_internal(task->dev->port->ha->core.shost->transportt);
for (i = 0; i < 5; i++) {
- SAS_DPRINTK("%s: aborting task 0x%p\n", __func__, task);
+ pr_notice("%s: aborting task 0x%p\n", __func__, task);
res = si->dft->lldd_abort_task(task);
spin_lock_irqsave(&task->task_state_lock, flags);
if (task->task_state_flags & SAS_TASK_STATE_DONE) {
spin_unlock_irqrestore(&task->task_state_lock, flags);
- SAS_DPRINTK("%s: task 0x%p is done\n", __func__,
- task);
+ pr_debug("%s: task 0x%p is done\n", __func__, task);
return TASK_IS_DONE;
}
spin_unlock_irqrestore(&task->task_state_lock, flags);
if (res == TMF_RESP_FUNC_COMPLETE) {
- SAS_DPRINTK("%s: task 0x%p is aborted\n",
- __func__, task);
+ pr_notice("%s: task 0x%p is aborted\n",
+ __func__, task);
return TASK_IS_ABORTED;
} else if (si->dft->lldd_query_task) {
- SAS_DPRINTK("%s: querying task 0x%p\n",
- __func__, task);
+ pr_notice("%s: querying task 0x%p\n", __func__, task);
res = si->dft->lldd_query_task(task);
switch (res) {
case TMF_RESP_FUNC_SUCC:
- SAS_DPRINTK("%s: task 0x%p at LU\n",
- __func__, task);
+ pr_notice("%s: task 0x%p at LU\n", __func__,
+ task);
return TASK_IS_AT_LU;
case TMF_RESP_FUNC_COMPLETE:
- SAS_DPRINTK("%s: task 0x%p not at LU\n",
- __func__, task);
+ pr_notice("%s: task 0x%p not at LU\n",
+ __func__, task);
return TASK_IS_NOT_AT_LU;
case TMF_RESP_FUNC_FAILED:
- SAS_DPRINTK("%s: task 0x%p failed to abort\n",
- __func__, task);
- return TASK_ABORT_FAILED;
- }
+ pr_notice("%s: task 0x%p failed to abort\n",
+ __func__, task);
+ return TASK_ABORT_FAILED;
+ }
}
}
@@ -350,9 +347,9 @@ static int sas_recover_lu(struct domain_device *dev, struct scsi_cmnd *cmd)
int_to_scsilun(cmd->device->lun, &lun);
- SAS_DPRINTK("eh: device %llx LUN %llx has the task\n",
- SAS_ADDR(dev->sas_addr),
- cmd->device->lun);
+ pr_notice("eh: device %llx LUN %llx has the task\n",
+ SAS_ADDR(dev->sas_addr),
+ cmd->device->lun);
if (i->dft->lldd_abort_task_set)
res = i->dft->lldd_abort_task_set(dev, lun.scsi_lun);
@@ -376,8 +373,8 @@ static int sas_recover_I_T(struct domain_device *dev)
struct sas_internal *i =
to_sas_internal(dev->port->ha->core.shost->transportt);
- SAS_DPRINTK("I_T nexus reset for dev %016llx\n",
- SAS_ADDR(dev->sas_addr));
+ pr_notice("I_T nexus reset for dev %016llx\n",
+ SAS_ADDR(dev->sas_addr));
if (i->dft->lldd_I_T_nexus_reset)
res = i->dft->lldd_I_T_nexus_reset(dev);
@@ -471,9 +468,9 @@ static int sas_queue_reset(struct domain_device *dev, int reset_type,
return SUCCESS;
}
- SAS_DPRINTK("%s reset of %s failed\n",
- reset_type == SAS_DEV_LU_RESET ? "LUN" : "Bus",
- dev_name(&dev->rphy->dev));
+ pr_warn("%s reset of %s failed\n",
+ reset_type == SAS_DEV_LU_RESET ? "LUN" : "Bus",
+ dev_name(&dev->rphy->dev));
return FAILED;
}
@@ -501,7 +498,7 @@ int sas_eh_abort_handler(struct scsi_cmnd *cmd)
if (task)
res = i->dft->lldd_abort_task(task);
else
- SAS_DPRINTK("no task to abort\n");
+ pr_notice("no task to abort\n");
if (res == TMF_RESP_FUNC_SUCC || res == TMF_RESP_FUNC_COMPLETE)
return SUCCESS;
@@ -612,34 +609,33 @@ static void sas_eh_handle_sas_errors(struct Scsi_Host *shost, struct list_head *
spin_unlock_irqrestore(&task->task_state_lock, flags);
if (need_reset) {
- SAS_DPRINTK("%s: task 0x%p requests reset\n",
- __func__, task);
+ pr_notice("%s: task 0x%p requests reset\n",
+ __func__, task);
goto reset;
}
- SAS_DPRINTK("trying to find task 0x%p\n", task);
+ pr_debug("trying to find task 0x%p\n", task);
res = sas_scsi_find_task(task);
switch (res) {
case TASK_IS_DONE:
- SAS_DPRINTK("%s: task 0x%p is done\n", __func__,
+ pr_notice("%s: task 0x%p is done\n", __func__,
task);
sas_eh_finish_cmd(cmd);
continue;
case TASK_IS_ABORTED:
- SAS_DPRINTK("%s: task 0x%p is aborted\n",
- __func__, task);
+ pr_notice("%s: task 0x%p is aborted\n",
+ __func__, task);
sas_eh_finish_cmd(cmd);
continue;
case TASK_IS_AT_LU:
- SAS_DPRINTK("task 0x%p is at LU: lu recover\n", task);
+ pr_info("task 0x%p is at LU: lu recover\n", task);
reset:
tmf_resp = sas_recover_lu(task->dev, cmd);
if (tmf_resp == TMF_RESP_FUNC_COMPLETE) {
- SAS_DPRINTK("dev %016llx LU %llx is "
- "recovered\n",
- SAS_ADDR(task->dev),
- cmd->device->lun);
+ pr_notice("dev %016llx LU %llx is recovered\n",
+ SAS_ADDR(task->dev),
+ cmd->device->lun);
sas_eh_finish_cmd(cmd);
sas_scsi_clear_queue_lu(work_q, cmd);
goto Again;
@@ -647,14 +643,14 @@ static void sas_eh_handle_sas_errors(struct Scsi_Host *shost, struct list_head *
/* fallthrough */
case TASK_IS_NOT_AT_LU:
case TASK_ABORT_FAILED:
- SAS_DPRINTK("task 0x%p is not at LU: I_T recover\n",
- task);
+ pr_notice("task 0x%p is not at LU: I_T recover\n",
+ task);
tmf_resp = sas_recover_I_T(task->dev);
if (tmf_resp == TMF_RESP_FUNC_COMPLETE ||
tmf_resp == -ENODEV) {
struct domain_device *dev = task->dev;
- SAS_DPRINTK("I_T %016llx recovered\n",
- SAS_ADDR(task->dev->sas_addr));
+ pr_notice("I_T %016llx recovered\n",
+ SAS_ADDR(task->dev->sas_addr));
sas_eh_finish_cmd(cmd);
sas_scsi_clear_queue_I_T(work_q, dev);
goto Again;
@@ -663,12 +659,12 @@ static void sas_eh_handle_sas_errors(struct Scsi_Host *shost, struct list_head *
try_to_reset_cmd_device(cmd);
if (i->dft->lldd_clear_nexus_port) {
struct asd_sas_port *port = task->dev->port;
- SAS_DPRINTK("clearing nexus for port:%d\n",
- port->id);
+ pr_debug("clearing nexus for port:%d\n",
+ port->id);
res = i->dft->lldd_clear_nexus_port(port);
if (res == TMF_RESP_FUNC_COMPLETE) {
- SAS_DPRINTK("clear nexus port:%d "
- "succeeded\n", port->id);
+ pr_notice("clear nexus port:%d succeeded\n",
+ port->id);
sas_eh_finish_cmd(cmd);
sas_scsi_clear_queue_port(work_q,
port);
@@ -676,11 +672,10 @@ static void sas_eh_handle_sas_errors(struct Scsi_Host *shost, struct list_head *
}
}
if (i->dft->lldd_clear_nexus_ha) {
- SAS_DPRINTK("clear nexus ha\n");
+ pr_debug("clear nexus ha\n");
res = i->dft->lldd_clear_nexus_ha(ha);
if (res == TMF_RESP_FUNC_COMPLETE) {
- SAS_DPRINTK("clear nexus ha "
- "succeeded\n");
+ pr_notice("clear nexus ha succeeded\n");
sas_eh_finish_cmd(cmd);
goto clear_q;
}
@@ -689,10 +684,9 @@ static void sas_eh_handle_sas_errors(struct Scsi_Host *shost, struct list_head *
* of effort could recover from errors. Quite
* possibly the HA just disappeared.
*/
- SAS_DPRINTK("error from device %llx, LUN %llx "
- "couldn't be recovered in any way\n",
- SAS_ADDR(task->dev->sas_addr),
- cmd->device->lun);
+ pr_err("error from device %llx, LUN %llx couldn't be recovered in any way\n",
+ SAS_ADDR(task->dev->sas_addr),
+ cmd->device->lun);
sas_eh_finish_cmd(cmd);
goto clear_q;
@@ -704,7 +698,7 @@ static void sas_eh_handle_sas_errors(struct Scsi_Host *shost, struct list_head *
return;
clear_q:
- SAS_DPRINTK("--- Exit %s -- clear_q\n", __func__);
+ pr_debug("--- Exit %s -- clear_q\n", __func__);
list_for_each_entry_safe(cmd, n, work_q, eh_entry)
sas_eh_finish_cmd(cmd);
goto out;
@@ -758,8 +752,8 @@ retry:
list_splice_init(&shost->eh_cmd_q, &eh_work_q);
spin_unlock_irq(shost->host_lock);
- SAS_DPRINTK("Enter %s busy: %d failed: %d\n",
- __func__, scsi_host_busy(shost), shost->host_failed);
+ pr_notice("Enter %s busy: %d failed: %d\n",
+ __func__, scsi_host_busy(shost), shost->host_failed);
/*
* Deal with commands that still have SAS tasks (i.e. they didn't
* complete via the normal sas_task completion mechanism),
@@ -800,9 +794,9 @@ out:
if (retry)
goto retry;
- SAS_DPRINTK("--- Exit %s: busy: %d failed: %d tries: %d\n",
- __func__, scsi_host_busy(shost),
- shost->host_failed, tries);
+ pr_notice("--- Exit %s: busy: %d failed: %d tries: %d\n",
+ __func__, scsi_host_busy(shost),
+ shost->host_failed, tries);
}
int sas_ioctl(struct scsi_device *sdev, int cmd, void __user *arg)
@@ -875,9 +869,8 @@ int sas_slave_configure(struct scsi_device *scsi_dev)
if (scsi_dev->tagged_supported) {
scsi_change_queue_depth(scsi_dev, SAS_DEF_QD);
} else {
- SAS_DPRINTK("device %llx, LUN %llx doesn't support "
- "TCQ\n", SAS_ADDR(dev->sas_addr),
- scsi_dev->lun);
+ pr_notice("device %llx, LUN %llx doesn't support TCQ\n",
+ SAS_ADDR(dev->sas_addr), scsi_dev->lun);
scsi_change_queue_depth(scsi_dev, 1);
}
@@ -930,16 +923,10 @@ void sas_task_abort(struct sas_task *task)
return;
}
- if (dev_is_sata(task->dev)) {
+ if (dev_is_sata(task->dev))
sas_ata_task_abort(task);
- } else {
- struct request_queue *q = sc->device->request_queue;
- unsigned long flags;
-
- spin_lock_irqsave(q->queue_lock, flags);
+ else
blk_abort_request(sc->request);
- spin_unlock_irqrestore(q->queue_lock, flags);
- }
}
void sas_target_destroy(struct scsi_target *starget)
diff --git a/drivers/scsi/libsas/sas_task.c b/drivers/scsi/libsas/sas_task.c
index a78e5bd3e514..c3b9befad4e6 100644
--- a/drivers/scsi/libsas/sas_task.c
+++ b/drivers/scsi/libsas/sas_task.c
@@ -1,3 +1,6 @@
+
+#include "sas_internal.h"
+
#include <linux/kernel.h>
#include <linux/export.h>
#include <scsi/sas.h>
@@ -23,11 +26,8 @@ void sas_ssp_task_response(struct device *dev, struct sas_task *task,
memcpy(tstat->buf, iu->sense_data, tstat->buf_valid_size);
if (iu->status != SAM_STAT_CHECK_CONDITION)
- dev_printk(KERN_WARNING, dev,
- "dev %llx sent sense data, but "
- "stat(%x) is not CHECK CONDITION\n",
- SAS_ADDR(task->dev->sas_addr),
- iu->status);
+ dev_warn(dev, "dev %llx sent sense data, but stat(%x) is not CHECK CONDITION\n",
+ SAS_ADDR(task->dev->sas_addr), iu->status);
}
else
/* when datapres contains corrupt/unknown value... */
diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index c1eb2b00ca7f..ebdfe5b26937 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -335,6 +335,18 @@ enum hba_state {
LPFC_HBA_ERROR = -1
};
+struct lpfc_trunk_link_state {
+ enum hba_state state;
+ uint8_t fault;
+};
+
+struct lpfc_trunk_link {
+ struct lpfc_trunk_link_state link0,
+ link1,
+ link2,
+ link3;
+};
+
struct lpfc_vport {
struct lpfc_hba *phba;
struct list_head listentry;
@@ -490,6 +502,7 @@ struct lpfc_vport {
struct nvme_fc_local_port *localport;
uint8_t nvmei_support; /* driver supports NVME Initiator */
uint32_t last_fcp_wqidx;
+ uint32_t rcv_flogi_cnt; /* How many unsol FLOGIs ACK'd. */
};
struct hbq_s {
@@ -683,6 +696,7 @@ struct lpfc_hba {
uint32_t iocb_cmd_size;
uint32_t iocb_rsp_size;
+ struct lpfc_trunk_link trunk_link;
enum hba_state link_state;
uint32_t link_flag; /* link state flags */
#define LS_LOOPBACK_MODE 0x1 /* NPort is in Loopback mode */
@@ -717,6 +731,7 @@ struct lpfc_hba {
* capability
*/
#define HBA_NVME_IOQ_FLUSH 0x80000 /* NVME IO queues flushed. */
+#define HBA_FLOGI_ISSUED 0x100000 /* FLOGI was issued */
uint32_t fcp_ring_in_use; /* When polling test if intr-hndlr active*/
struct lpfc_dmabuf slim2p;
@@ -783,6 +798,7 @@ struct lpfc_hba {
#define LPFC_FCF_PRIORITY 2 /* Priority fcf failover */
uint32_t cfg_fcf_failover_policy;
uint32_t cfg_fcp_io_sched;
+ uint32_t cfg_ns_query;
uint32_t cfg_fcp2_no_tgt_reset;
uint32_t cfg_cr_delay;
uint32_t cfg_cr_count;
@@ -989,7 +1005,8 @@ struct lpfc_hba {
spinlock_t port_list_lock; /* lock for port_list mutations */
struct lpfc_vport *pport; /* physical lpfc_vport pointer */
uint16_t max_vpi; /* Maximum virtual nports */
-#define LPFC_MAX_VPI 0xFFFF /* Max number of VPI supported */
+#define LPFC_MAX_VPI 0xFF /* Max number VPI supported 0 - 0xff */
+#define LPFC_MAX_VPORTS 0x100 /* Max vports per port, with pport */
uint16_t max_vports; /*
* For IOV HBAs max_vpi can change
* after a reset. max_vports is max
@@ -1111,6 +1128,10 @@ struct lpfc_hba {
uint16_t vlan_id;
struct list_head fcf_conn_rec_list;
+ bool defer_flogi_acc_flag;
+ uint16_t defer_flogi_acc_rx_id;
+ uint16_t defer_flogi_acc_ox_id;
+
spinlock_t ct_ev_lock; /* synchronize access to ct_ev_waiters */
struct list_head ct_ev_waiters;
struct unsol_rcv_ct_ctx ct_ctx[LPFC_CT_CTX_MAX];
@@ -1262,6 +1283,12 @@ lpfc_sli_read_hs(struct lpfc_hba *phba)
static inline struct lpfc_sli_ring *
lpfc_phba_elsring(struct lpfc_hba *phba)
{
+ /* Return NULL if sli_rev has become invalid due to bad fw */
+ if (phba->sli_rev != LPFC_SLI_REV4 &&
+ phba->sli_rev != LPFC_SLI_REV3 &&
+ phba->sli_rev != LPFC_SLI_REV2)
+ return NULL;
+
if (phba->sli_rev == LPFC_SLI_REV4) {
if (phba->sli4_hba.els_wq)
return phba->sli4_hba.els_wq->pring;
diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index dda7f450b96d..4bae72cbf3f6 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -883,6 +883,42 @@ lpfc_link_state_show(struct device *dev, struct device_attribute *attr,
}
}
+ if ((phba->sli_rev == LPFC_SLI_REV4) &&
+ ((bf_get(lpfc_sli_intf_if_type,
+ &phba->sli4_hba.sli_intf) ==
+ LPFC_SLI_INTF_IF_TYPE_6))) {
+ struct lpfc_trunk_link link = phba->trunk_link;
+
+ if (bf_get(lpfc_conf_trunk_port0, &phba->sli4_hba))
+ len += snprintf(buf + len, PAGE_SIZE - len,
+ "Trunk port 0: Link %s %s\n",
+ (link.link0.state == LPFC_LINK_UP) ?
+ "Up" : "Down. ",
+ trunk_errmsg[link.link0.fault]);
+
+ if (bf_get(lpfc_conf_trunk_port1, &phba->sli4_hba))
+ len += snprintf(buf + len, PAGE_SIZE - len,
+ "Trunk port 1: Link %s %s\n",
+ (link.link1.state == LPFC_LINK_UP) ?
+ "Up" : "Down. ",
+ trunk_errmsg[link.link1.fault]);
+
+ if (bf_get(lpfc_conf_trunk_port2, &phba->sli4_hba))
+ len += snprintf(buf + len, PAGE_SIZE - len,
+ "Trunk port 2: Link %s %s\n",
+ (link.link2.state == LPFC_LINK_UP) ?
+ "Up" : "Down. ",
+ trunk_errmsg[link.link2.fault]);
+
+ if (bf_get(lpfc_conf_trunk_port3, &phba->sli4_hba))
+ len += snprintf(buf + len, PAGE_SIZE - len,
+ "Trunk port 3: Link %s %s\n",
+ (link.link3.state == LPFC_LINK_UP) ?
+ "Up" : "Down. ",
+ trunk_errmsg[link.link3.fault]);
+
+ }
+
return len;
}
@@ -1156,6 +1192,82 @@ out:
}
/**
+ * lpfc_reset_pci_bus - resets PCI bridge controller's secondary bus of an HBA
+ * @phba: lpfc_hba pointer.
+ *
+ * Description:
+ * Issues a PCI secondary bus reset for the phba->pcidev.
+ *
+ * Notes:
+ * First walks the bus_list to ensure only PCI devices with Emulex
+ * vendor id, device ids that support hot reset, only one occurrence
+ * of function 0, and all ports on the bus are in offline mode to ensure the
+ * hot reset only affects one valid HBA.
+ *
+ * Returns:
+ * -ENOTSUPP, cfg_enable_hba_reset must be of value 2
+ * -ENODEV, NULL ptr to pcidev
+ * -EBADSLT, detected invalid device
+ * -EBUSY, port is not in offline state
+ * 0, successful
+ */
+int
+lpfc_reset_pci_bus(struct lpfc_hba *phba)
+{
+ struct pci_dev *pdev = phba->pcidev;
+ struct Scsi_Host *shost = NULL;
+ struct lpfc_hba *phba_other = NULL;
+ struct pci_dev *ptr = NULL;
+ int res;
+
+ if (phba->cfg_enable_hba_reset != 2)
+ return -ENOTSUPP;
+
+ if (!pdev) {
+ lpfc_printf_log(phba, KERN_INFO, LOG_INIT, "8345 pdev NULL!\n");
+ return -ENODEV;
+ }
+
+ res = lpfc_check_pci_resettable(phba);
+ if (res)
+ return res;
+
+ /* Walk the list of devices on the pci_dev's bus */
+ list_for_each_entry(ptr, &pdev->bus->devices, bus_list) {
+ /* Check port is offline */
+ shost = pci_get_drvdata(ptr);
+ if (shost) {
+ phba_other =
+ ((struct lpfc_vport *)shost->hostdata)->phba;
+ if (!(phba_other->pport->fc_flag & FC_OFFLINE_MODE)) {
+ lpfc_printf_log(phba_other, KERN_INFO, LOG_INIT,
+ "8349 WWPN = 0x%02x%02x%02x%02x"
+ "%02x%02x%02x%02x is not "
+ "offline!\n",
+ phba_other->wwpn[0],
+ phba_other->wwpn[1],
+ phba_other->wwpn[2],
+ phba_other->wwpn[3],
+ phba_other->wwpn[4],
+ phba_other->wwpn[5],
+ phba_other->wwpn[6],
+ phba_other->wwpn[7]);
+ return -EBUSY;
+ }
+ }
+ }
+
+ /* Issue PCI bus reset */
+ res = pci_reset_bus(pdev);
+ if (res) {
+ lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+ "8350 PCI reset bus failed: %d\n", res);
+ }
+
+ return res;
+}
+
+/**
* lpfc_selective_reset - Offline then onlines the port
* @phba: lpfc_hba pointer.
*
@@ -1322,7 +1434,7 @@ lpfc_sli4_pdev_reg_request(struct lpfc_hba *phba, uint32_t opcode)
return -EACCES;
if ((phba->sli_rev < LPFC_SLI_REV4) ||
- (bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) !=
+ (bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) <
LPFC_SLI_INTF_IF_TYPE_2))
return -EPERM;
@@ -1430,6 +1542,66 @@ lpfc_nport_evt_cnt_show(struct device *dev, struct device_attribute *attr,
return snprintf(buf, PAGE_SIZE, "%d\n", phba->nport_event_cnt);
}
+int
+lpfc_set_trunking(struct lpfc_hba *phba, char *buff_out)
+{
+ LPFC_MBOXQ_t *mbox = NULL;
+ unsigned long val = 0;
+ char *pval = 0;
+ int rc = 0;
+
+ if (!strncmp("enable", buff_out,
+ strlen("enable"))) {
+ pval = buff_out + strlen("enable") + 1;
+ rc = kstrtoul(pval, 0, &val);
+ if (rc)
+ return rc; /* Invalid number */
+ } else if (!strncmp("disable", buff_out,
+ strlen("disable"))) {
+ val = 0;
+ } else {
+ return -EINVAL; /* Invalid command */
+ }
+
+ switch (val) {
+ case 0:
+ val = 0x0; /* Disable */
+ break;
+ case 2:
+ val = 0x1; /* Enable two port trunk */
+ break;
+ case 4:
+ val = 0x2; /* Enable four port trunk */
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ lpfc_printf_log(phba, KERN_ERR, LOG_MBOX,
+ "0070 Set trunk mode with val %ld ", val);
+
+ mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+ if (!mbox)
+ return -ENOMEM;
+
+ lpfc_sli4_config(phba, mbox, LPFC_MBOX_SUBSYSTEM_FCOE,
+ LPFC_MBOX_OPCODE_FCOE_FC_SET_TRUNK_MODE,
+ 12, LPFC_SLI4_MBX_EMBED);
+
+ bf_set(lpfc_mbx_set_trunk_mode,
+ &mbox->u.mqe.un.set_trunk_mode,
+ val);
+ rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL);
+ if (rc)
+ lpfc_printf_log(phba, KERN_ERR, LOG_MBOX,
+ "0071 Set trunk mode failed with status: %d",
+ rc);
+ if (rc != MBX_TIMEOUT)
+ mempool_free(mbox, phba->mbox_mem_pool);
+
+ return 0;
+}
+
/**
* lpfc_board_mode_show - Return the state of the board
* @dev: class device that is converted into a Scsi_host.
@@ -1522,6 +1694,11 @@ lpfc_board_mode_store(struct device *dev, struct device_attribute *attr,
status = lpfc_sli4_pdev_reg_request(phba, LPFC_FW_RESET);
else if (strncmp(buf, "dv_reset", sizeof("dv_reset") - 1) == 0)
status = lpfc_sli4_pdev_reg_request(phba, LPFC_DV_RESET);
+ else if (strncmp(buf, "pci_bus_reset", sizeof("pci_bus_reset") - 1)
+ == 0)
+ status = lpfc_reset_pci_bus(phba);
+ else if (strncmp(buf, "trunk", sizeof("trunk") - 1) == 0)
+ status = lpfc_set_trunking(phba, (char *)buf + sizeof("trunk"));
else
status = -EINVAL;
@@ -1590,7 +1767,7 @@ lpfc_get_hba_info(struct lpfc_hba *phba,
pmb = &pmboxq->u.mb;
pmb->mbxCommand = MBX_READ_CONFIG;
pmb->mbxOwner = OWN_HOST;
- pmboxq->context1 = NULL;
+ pmboxq->ctx_buf = NULL;
if (phba->pport->fc_flag & FC_OFFLINE_MODE)
rc = MBX_NOT_FINISHED;
@@ -1622,6 +1799,9 @@ lpfc_get_hba_info(struct lpfc_hba *phba,
max_vpi = (bf_get(lpfc_mbx_rd_conf_vpi_count, rd_config) > 0) ?
(bf_get(lpfc_mbx_rd_conf_vpi_count, rd_config) - 1) : 0;
+ /* Limit the max we support */
+ if (max_vpi > LPFC_MAX_VPI)
+ max_vpi = LPFC_MAX_VPI;
if (mvpi)
*mvpi = max_vpi;
if (avpi)
@@ -1637,8 +1817,13 @@ lpfc_get_hba_info(struct lpfc_hba *phba,
*axri = pmb->un.varRdConfig.avail_xri;
if (mvpi)
*mvpi = pmb->un.varRdConfig.max_vpi;
- if (avpi)
- *avpi = pmb->un.varRdConfig.avail_vpi;
+ if (avpi) {
+ /* avail_vpi is only valid if link is up and ready */
+ if (phba->link_state == LPFC_HBA_READY)
+ *avpi = pmb->un.varRdConfig.avail_vpi;
+ else
+ *avpi = pmb->un.varRdConfig.max_vpi;
+ }
}
mempool_free(pmboxq, phba->mbox_mem_pool);
@@ -3831,8 +4016,9 @@ lpfc_topology_store(struct device *dev, struct device_attribute *attr,
val);
return -EINVAL;
}
- if (phba->pcidev->device == PCI_DEVICE_ID_LANCER_G6_FC &&
- val == 4) {
+ if ((phba->pcidev->device == PCI_DEVICE_ID_LANCER_G6_FC ||
+ phba->pcidev->device == PCI_DEVICE_ID_LANCER_G7_FC) &&
+ val == 4) {
lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
"3114 Loop mode not supported\n");
return -EINVAL;
@@ -4254,7 +4440,7 @@ lpfc_link_speed_store(struct device *dev, struct device_attribute *attr,
uint32_t prev_val, if_type;
if_type = bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf);
- if (if_type == LPFC_SLI_INTF_IF_TYPE_2 &&
+ if (if_type >= LPFC_SLI_INTF_IF_TYPE_2 &&
phba->hba_flag & HBA_FORCED_LINK_SPEED)
return -EPERM;
@@ -5070,6 +5256,18 @@ LPFC_ATTR_RW(fcp_io_sched, LPFC_FCP_SCHED_ROUND_ROBIN,
"issuing commands [0] - Round Robin, [1] - Current CPU");
/*
+ * lpfc_ns_query: Determine algrithmn for NameServer queries after RSCN
+ * range is [0,1]. Default value is 0.
+ * For [0], GID_FT is used for NameServer queries after RSCN (default)
+ * For [1], GID_PT is used for NameServer queries after RSCN
+ *
+ */
+LPFC_ATTR_RW(ns_query, LPFC_NS_QUERY_GID_FT,
+ LPFC_NS_QUERY_GID_FT, LPFC_NS_QUERY_GID_PT,
+ "Determine algorithm NameServer queries after RSCN "
+ "[0] - GID_FT, [1] - GID_PT");
+
+/*
# lpfc_fcp2_no_tgt_reset: Determine bus reset behavior
# range is [0,1]. Default value is 0.
# For [0], bus reset issues target reset to ALL devices
@@ -5257,9 +5455,10 @@ LPFC_ATTR_R(nvme_io_channel,
# lpfc_enable_hba_reset: Allow or prevent HBA resets to the hardware.
# 0 = HBA resets disabled
# 1 = HBA resets enabled (default)
-# Value range is [0,1]. Default value is 1.
+# 2 = HBA reset via PCI bus reset enabled
+# Value range is [0,2]. Default value is 1.
*/
-LPFC_ATTR_R(enable_hba_reset, 1, 0, 1, "Enable HBA resets from the driver.");
+LPFC_ATTR_RW(enable_hba_reset, 1, 0, 2, "Enable HBA resets from the driver.");
/*
# lpfc_enable_hba_heartbeat: Disable HBA heartbeat timer..
@@ -5514,6 +5713,7 @@ struct device_attribute *lpfc_hba_attrs[] = {
&dev_attr_lpfc_scan_down,
&dev_attr_lpfc_link_speed,
&dev_attr_lpfc_fcp_io_sched,
+ &dev_attr_lpfc_ns_query,
&dev_attr_lpfc_fcp2_no_tgt_reset,
&dev_attr_lpfc_cr_delay,
&dev_attr_lpfc_cr_count,
@@ -6006,6 +6206,9 @@ lpfc_get_host_speed(struct Scsi_Host *shost)
case LPFC_LINK_SPEED_64GHZ:
fc_host_speed(shost) = FC_PORTSPEED_64GBIT;
break;
+ case LPFC_LINK_SPEED_128GHZ:
+ fc_host_speed(shost) = FC_PORTSPEED_128GBIT;
+ break;
default:
fc_host_speed(shost) = FC_PORTSPEED_UNKNOWN;
break;
@@ -6105,7 +6308,7 @@ lpfc_get_stats(struct Scsi_Host *shost)
pmb = &pmboxq->u.mb;
pmb->mbxCommand = MBX_READ_STATUS;
pmb->mbxOwner = OWN_HOST;
- pmboxq->context1 = NULL;
+ pmboxq->ctx_buf = NULL;
pmboxq->vport = vport;
if (vport->fc_flag & FC_OFFLINE_MODE)
@@ -6137,7 +6340,7 @@ lpfc_get_stats(struct Scsi_Host *shost)
memset(pmboxq, 0, sizeof (LPFC_MBOXQ_t));
pmb->mbxCommand = MBX_READ_LNK_STAT;
pmb->mbxOwner = OWN_HOST;
- pmboxq->context1 = NULL;
+ pmboxq->ctx_buf = NULL;
pmboxq->vport = vport;
if (vport->fc_flag & FC_OFFLINE_MODE)
@@ -6217,7 +6420,7 @@ lpfc_reset_stats(struct Scsi_Host *shost)
pmb->mbxCommand = MBX_READ_STATUS;
pmb->mbxOwner = OWN_HOST;
pmb->un.varWords[0] = 0x1; /* reset request */
- pmboxq->context1 = NULL;
+ pmboxq->ctx_buf = NULL;
pmboxq->vport = vport;
if ((vport->fc_flag & FC_OFFLINE_MODE) ||
@@ -6235,7 +6438,7 @@ lpfc_reset_stats(struct Scsi_Host *shost)
memset(pmboxq, 0, sizeof(LPFC_MBOXQ_t));
pmb->mbxCommand = MBX_READ_LNK_STAT;
pmb->mbxOwner = OWN_HOST;
- pmboxq->context1 = NULL;
+ pmboxq->ctx_buf = NULL;
pmboxq->vport = vport;
if ((vport->fc_flag & FC_OFFLINE_MODE) ||
@@ -6564,6 +6767,7 @@ void
lpfc_get_cfgparam(struct lpfc_hba *phba)
{
lpfc_fcp_io_sched_init(phba, lpfc_fcp_io_sched);
+ lpfc_ns_query_init(phba, lpfc_ns_query);
lpfc_fcp2_no_tgt_reset_init(phba, lpfc_fcp2_no_tgt_reset);
lpfc_cr_delay_init(phba, lpfc_cr_delay);
lpfc_cr_count_init(phba, lpfc_cr_count);
diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c
index 7bd7ae86bed5..8698af86485d 100644
--- a/drivers/scsi/lpfc/lpfc_bsg.c
+++ b/drivers/scsi/lpfc/lpfc_bsg.c
@@ -2222,7 +2222,7 @@ lpfc_bsg_diag_loopback_mode(struct bsg_job *job)
if (phba->sli_rev < LPFC_SLI_REV4)
rc = lpfc_sli3_bsg_diag_loopback_mode(phba, job);
- else if (bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) ==
+ else if (bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) >=
LPFC_SLI_INTF_IF_TYPE_2)
rc = lpfc_sli4_bsg_diag_loopback_mode(phba, job);
else
@@ -2262,7 +2262,7 @@ lpfc_sli4_bsg_diag_mode_end(struct bsg_job *job)
if (phba->sli_rev < LPFC_SLI_REV4)
return -ENODEV;
- if (bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) !=
+ if (bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) <
LPFC_SLI_INTF_IF_TYPE_2)
return -ENODEV;
@@ -2354,7 +2354,7 @@ lpfc_sli4_bsg_link_diag_test(struct bsg_job *job)
rc = -ENODEV;
goto job_error;
}
- if (bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) !=
+ if (bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) <
LPFC_SLI_INTF_IF_TYPE_2) {
rc = -ENODEV;
goto job_error;
@@ -2501,9 +2501,9 @@ static int lpfcdiag_loop_self_reg(struct lpfc_hba *phba, uint16_t *rpi)
return -ENOMEM;
}
- dmabuff = (struct lpfc_dmabuf *) mbox->context1;
- mbox->context1 = NULL;
- mbox->context2 = NULL;
+ dmabuff = (struct lpfc_dmabuf *)mbox->ctx_buf;
+ mbox->ctx_buf = NULL;
+ mbox->ctx_ndlp = NULL;
status = lpfc_sli_issue_mbox_wait(phba, mbox, LPFC_MBOX_TMO);
if ((status != MBX_SUCCESS) || (mbox->u.mb.mbxStatus)) {
@@ -3388,7 +3388,7 @@ lpfc_bsg_issue_mbox_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmboxq)
unsigned long flags;
uint8_t *pmb, *pmb_buf;
- dd_data = pmboxq->context1;
+ dd_data = pmboxq->ctx_ndlp;
/*
* The outgoing buffer is readily referred from the dma buffer,
@@ -3573,7 +3573,7 @@ lpfc_bsg_issue_mbox_ext_handle_job(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmboxq)
struct lpfc_sli_config_mbox *sli_cfg_mbx;
uint8_t *pmbx;
- dd_data = pmboxq->context1;
+ dd_data = pmboxq->ctx_buf;
/* Determine if job has been aborted */
spin_lock_irqsave(&phba->ct_ev_lock, flags);
@@ -3960,7 +3960,7 @@ lpfc_bsg_sli_cfg_read_cmd_ext(struct lpfc_hba *phba, struct bsg_job *job,
pmboxq->mbox_cmpl = lpfc_bsg_issue_read_mbox_ext_cmpl;
/* context fields to callback function */
- pmboxq->context1 = dd_data;
+ pmboxq->ctx_buf = dd_data;
dd_data->type = TYPE_MBOX;
dd_data->set_job = job;
dd_data->context_un.mbox.pmboxq = pmboxq;
@@ -4131,7 +4131,7 @@ lpfc_bsg_sli_cfg_write_cmd_ext(struct lpfc_hba *phba, struct bsg_job *job,
pmboxq->mbox_cmpl = lpfc_bsg_issue_write_mbox_ext_cmpl;
/* context fields to callback function */
- pmboxq->context1 = dd_data;
+ pmboxq->ctx_buf = dd_data;
dd_data->type = TYPE_MBOX;
dd_data->set_job = job;
dd_data->context_un.mbox.pmboxq = pmboxq;
@@ -4476,7 +4476,7 @@ lpfc_bsg_write_ebuf_set(struct lpfc_hba *phba, struct bsg_job *job,
pmboxq->mbox_cmpl = lpfc_bsg_issue_write_mbox_ext_cmpl;
/* context fields to callback function */
- pmboxq->context1 = dd_data;
+ pmboxq->ctx_buf = dd_data;
dd_data->type = TYPE_MBOX;
dd_data->set_job = job;
dd_data->context_un.mbox.pmboxq = pmboxq;
@@ -4761,7 +4761,7 @@ lpfc_bsg_issue_mbox(struct lpfc_hba *phba, struct bsg_job *job,
if (mbox_req->inExtWLen || mbox_req->outExtWLen) {
from = pmbx;
ext = from + sizeof(MAILBOX_t);
- pmboxq->context2 = ext;
+ pmboxq->ctx_buf = ext;
pmboxq->in_ext_byte_len =
mbox_req->inExtWLen * sizeof(uint32_t);
pmboxq->out_ext_byte_len =
@@ -4889,7 +4889,7 @@ lpfc_bsg_issue_mbox(struct lpfc_hba *phba, struct bsg_job *job,
pmboxq->mbox_cmpl = lpfc_bsg_issue_mbox_cmpl;
/* setup context field to pass wait_queue pointer to wake function */
- pmboxq->context1 = dd_data;
+ pmboxq->ctx_ndlp = dd_data;
dd_data->type = TYPE_MBOX;
dd_data->set_job = job;
dd_data->context_un.mbox.pmboxq = pmboxq;
@@ -5348,7 +5348,7 @@ lpfc_bsg_get_ras_config(struct bsg_job *job)
sizeof(struct fc_bsg_request) +
sizeof(struct lpfc_bsg_ras_req)) {
lpfc_printf_log(phba, KERN_ERR, LOG_LIBDFC,
- "6181 Received RAS_LOG request "
+ "6192 FW_LOG request received "
"below minimum size\n");
rc = -EINVAL;
goto ras_job_error;
@@ -5356,7 +5356,7 @@ lpfc_bsg_get_ras_config(struct bsg_job *job)
/* Check FW log status */
rc = lpfc_check_fwlog_support(phba);
- if (rc == -EACCES || rc == -EPERM)
+ if (rc)
goto ras_job_error;
ras_reply = (struct lpfc_bsg_get_ras_config_reply *)
@@ -5381,25 +5381,6 @@ ras_job_error:
}
/**
- * lpfc_ras_stop_fwlog: Disable FW logging by the adapter
- * @phba: Pointer to HBA context object.
- *
- * Disable FW logging into host memory on the adapter. To
- * be done before reading logs from the host memory.
- **/
-static void
-lpfc_ras_stop_fwlog(struct lpfc_hba *phba)
-{
- struct lpfc_ras_fwlog *ras_fwlog = &phba->ras_fwlog;
-
- ras_fwlog->ras_active = false;
-
- /* Disable FW logging to host memory */
- writel(LPFC_CTL_PDEV_CTL_DDL_RAS,
- phba->sli4_hba.conf_regs_memmap_p + LPFC_CTL_PDEV_CTL_OFFSET);
-}
-
-/**
* lpfc_bsg_set_ras_config: Set FW logging parameters
* @job: fc_bsg_job to handle
*
@@ -5416,7 +5397,7 @@ lpfc_bsg_set_ras_config(struct bsg_job *job)
struct lpfc_ras_fwlog *ras_fwlog = &phba->ras_fwlog;
struct fc_bsg_reply *bsg_reply = job->reply;
uint8_t action = 0, log_level = 0;
- int rc = 0;
+ int rc = 0, action_status = 0;
if (job->request_len <
sizeof(struct fc_bsg_request) +
@@ -5430,7 +5411,7 @@ lpfc_bsg_set_ras_config(struct bsg_job *job)
/* Check FW log status */
rc = lpfc_check_fwlog_support(phba);
- if (rc == -EACCES || rc == -EPERM)
+ if (rc)
goto ras_job_error;
ras_req = (struct lpfc_bsg_set_ras_config_req *)
@@ -5449,16 +5430,25 @@ lpfc_bsg_set_ras_config(struct bsg_job *job)
lpfc_ras_stop_fwlog(phba);
} else {
/*action = LPFC_RASACTION_START_LOGGING*/
- if (ras_fwlog->ras_active == true) {
- rc = -EINPROGRESS;
- goto ras_job_error;
- }
+
+ /* Even though FW-logging is active re-initialize
+ * FW-logging with new log-level. Return status
+ * "Logging already Running" to caller.
+ **/
+ if (ras_fwlog->ras_active)
+ action_status = -EINPROGRESS;
/* Enable logging */
rc = lpfc_sli4_ras_fwlog_init(phba, log_level,
LPFC_RAS_ENABLE_LOGGING);
- if (rc)
+ if (rc) {
rc = -EINVAL;
+ goto ras_job_error;
+ }
+
+ /* Check if FW-logging is re-initialized */
+ if (action_status == -EINPROGRESS)
+ rc = action_status;
}
ras_job_error:
/* make error code available to userspace */
@@ -5487,12 +5477,11 @@ lpfc_bsg_get_ras_lwpd(struct bsg_job *job)
struct lpfc_hba *phba = vport->phba;
struct lpfc_ras_fwlog *ras_fwlog = &phba->ras_fwlog;
struct fc_bsg_reply *bsg_reply = job->reply;
- uint32_t lwpd_offset = 0;
- uint64_t wrap_value = 0;
+ u32 *lwpd_ptr = NULL;
int rc = 0;
rc = lpfc_check_fwlog_support(phba);
- if (rc == -EACCES || rc == -EPERM)
+ if (rc)
goto ras_job_error;
if (job->request_len <
@@ -5508,11 +5497,19 @@ lpfc_bsg_get_ras_lwpd(struct bsg_job *job)
ras_reply = (struct lpfc_bsg_get_ras_lwpd *)
bsg_reply->reply_data.vendor_reply.vendor_rsp;
- lwpd_offset = *((uint32_t *)ras_fwlog->lwpd.virt) & 0xffffffff;
- ras_reply->offset = be32_to_cpu(lwpd_offset);
+ if (!ras_fwlog->lwpd.virt) {
+ lpfc_printf_log(phba, KERN_ERR, LOG_LIBDFC,
+ "6193 Restart FW Logging\n");
+ rc = -EINVAL;
+ goto ras_job_error;
+ }
+
+ /* Get lwpd offset */
+ lwpd_ptr = (uint32_t *)(ras_fwlog->lwpd.virt);
+ ras_reply->offset = be32_to_cpu(*lwpd_ptr & 0xffffffff);
- wrap_value = *((uint64_t *)ras_fwlog->lwpd.virt);
- ras_reply->wrap_count = be32_to_cpu((wrap_value >> 32) & 0xffffffff);
+ /* Get wrap count */
+ ras_reply->wrap_count = be32_to_cpu(*(++lwpd_ptr) & 0xffffffff);
ras_job_error:
/* make error code available to userspace */
@@ -5539,9 +5536,8 @@ lpfc_bsg_get_ras_fwlog(struct bsg_job *job)
struct fc_bsg_request *bsg_request = job->request;
struct fc_bsg_reply *bsg_reply = job->reply;
struct lpfc_bsg_get_fwlog_req *ras_req;
- uint32_t rd_offset, rd_index, offset, pending_wlen;
- uint32_t boundary = 0, align_len = 0, write_len = 0;
- void *dest, *src, *fwlog_buff;
+ u32 rd_offset, rd_index, offset;
+ void *src, *fwlog_buff;
struct lpfc_ras_fwlog *ras_fwlog = NULL;
struct lpfc_dmabuf *dmabuf, *next;
int rc = 0;
@@ -5549,7 +5545,7 @@ lpfc_bsg_get_ras_fwlog(struct bsg_job *job)
ras_fwlog = &phba->ras_fwlog;
rc = lpfc_check_fwlog_support(phba);
- if (rc == -EACCES || rc == -EPERM)
+ if (rc)
goto ras_job_error;
/* Logging to be stopped before reading */
@@ -5581,8 +5577,6 @@ lpfc_bsg_get_ras_fwlog(struct bsg_job *job)
rd_index = (rd_offset / LPFC_RAS_MAX_ENTRY_SIZE);
offset = (rd_offset % LPFC_RAS_MAX_ENTRY_SIZE);
- pending_wlen = ras_req->read_size;
- dest = fwlog_buff;
list_for_each_entry_safe(dmabuf, next,
&ras_fwlog->fwlog_buff_list, list) {
@@ -5590,29 +5584,9 @@ lpfc_bsg_get_ras_fwlog(struct bsg_job *job)
if (dmabuf->buffer_tag < rd_index)
continue;
- /* Align read to buffer size */
- if (offset) {
- boundary = ((dmabuf->buffer_tag + 1) *
- LPFC_RAS_MAX_ENTRY_SIZE);
-
- align_len = (boundary - offset);
- write_len = min_t(u32, align_len,
- LPFC_RAS_MAX_ENTRY_SIZE);
- } else {
- write_len = min_t(u32, pending_wlen,
- LPFC_RAS_MAX_ENTRY_SIZE);
- align_len = 0;
- boundary = 0;
- }
src = dmabuf->virt + offset;
- memcpy(dest, src, write_len);
-
- pending_wlen -= write_len;
- if (!pending_wlen)
- break;
-
- dest += write_len;
- offset = (offset + write_len) % LPFC_RAS_MAX_ENTRY_SIZE;
+ memcpy(fwlog_buff, src, ras_req->read_size);
+ break;
}
bsg_reply->reply_payload_rcv_len =
@@ -5629,6 +5603,77 @@ ras_job_error:
return rc;
}
+static int
+lpfc_get_trunk_info(struct bsg_job *job)
+{
+ struct lpfc_vport *vport = shost_priv(fc_bsg_to_shost(job));
+ struct lpfc_hba *phba = vport->phba;
+ struct fc_bsg_reply *bsg_reply = job->reply;
+ struct lpfc_trunk_info *event_reply;
+ int rc = 0;
+
+ if (job->request_len <
+ sizeof(struct fc_bsg_request) + sizeof(struct get_trunk_info_req)) {
+ lpfc_printf_log(phba, KERN_ERR, LOG_LIBDFC,
+ "2744 Received GET TRUNK _INFO request below "
+ "minimum size\n");
+ rc = -EINVAL;
+ goto job_error;
+ }
+
+ event_reply = (struct lpfc_trunk_info *)
+ bsg_reply->reply_data.vendor_reply.vendor_rsp;
+
+ if (job->reply_len <
+ sizeof(struct fc_bsg_request) + sizeof(struct lpfc_trunk_info)) {
+ lpfc_printf_log(phba, KERN_WARNING, LOG_LIBDFC,
+ "2728 Received GET TRUNK _INFO reply below "
+ "minimum size\n");
+ rc = -EINVAL;
+ goto job_error;
+ }
+ if (event_reply == NULL) {
+ rc = -EINVAL;
+ goto job_error;
+ }
+
+ bsg_bf_set(lpfc_trunk_info_link_status, event_reply,
+ (phba->link_state >= LPFC_LINK_UP) ? 1 : 0);
+
+ bsg_bf_set(lpfc_trunk_info_trunk_active0, event_reply,
+ (phba->trunk_link.link0.state == LPFC_LINK_UP) ? 1 : 0);
+
+ bsg_bf_set(lpfc_trunk_info_trunk_active1, event_reply,
+ (phba->trunk_link.link1.state == LPFC_LINK_UP) ? 1 : 0);
+
+ bsg_bf_set(lpfc_trunk_info_trunk_active2, event_reply,
+ (phba->trunk_link.link2.state == LPFC_LINK_UP) ? 1 : 0);
+
+ bsg_bf_set(lpfc_trunk_info_trunk_active3, event_reply,
+ (phba->trunk_link.link3.state == LPFC_LINK_UP) ? 1 : 0);
+
+ bsg_bf_set(lpfc_trunk_info_trunk_config0, event_reply,
+ bf_get(lpfc_conf_trunk_port0, &phba->sli4_hba));
+
+ bsg_bf_set(lpfc_trunk_info_trunk_config1, event_reply,
+ bf_get(lpfc_conf_trunk_port1, &phba->sli4_hba));
+
+ bsg_bf_set(lpfc_trunk_info_trunk_config2, event_reply,
+ bf_get(lpfc_conf_trunk_port2, &phba->sli4_hba));
+
+ bsg_bf_set(lpfc_trunk_info_trunk_config3, event_reply,
+ bf_get(lpfc_conf_trunk_port3, &phba->sli4_hba));
+
+ event_reply->port_speed = phba->sli4_hba.link_state.speed / 1000;
+ event_reply->logical_speed =
+ phba->sli4_hba.link_state.logical_speed / 100;
+job_error:
+ bsg_reply->result = rc;
+ bsg_job_done(job, bsg_reply->result,
+ bsg_reply->reply_payload_rcv_len);
+ return rc;
+
+}
/**
* lpfc_bsg_hst_vendor - process a vendor-specific fc_bsg_job
@@ -5689,6 +5734,9 @@ lpfc_bsg_hst_vendor(struct bsg_job *job)
case LPFC_BSG_VENDOR_RAS_SET_CONFIG:
rc = lpfc_bsg_set_ras_config(job);
break;
+ case LPFC_BSG_VENDOR_GET_TRUNK_INFO:
+ rc = lpfc_get_trunk_info(job);
+ break;
default:
rc = -EINVAL;
bsg_reply->reply_payload_rcv_len = 0;
diff --git a/drivers/scsi/lpfc/lpfc_bsg.h b/drivers/scsi/lpfc/lpfc_bsg.h
index 820323f1139b..9151824beea4 100644
--- a/drivers/scsi/lpfc/lpfc_bsg.h
+++ b/drivers/scsi/lpfc/lpfc_bsg.h
@@ -42,6 +42,7 @@
#define LPFC_BSG_VENDOR_RAS_GET_FWLOG 17
#define LPFC_BSG_VENDOR_RAS_GET_CONFIG 18
#define LPFC_BSG_VENDOR_RAS_SET_CONFIG 19
+#define LPFC_BSG_VENDOR_GET_TRUNK_INFO 20
struct set_ct_event {
uint32_t command;
@@ -331,6 +332,43 @@ struct lpfc_bsg_get_ras_config_reply {
uint32_t log_buff_sz;
};
+struct lpfc_trunk_info {
+ uint32_t word0;
+#define lpfc_trunk_info_link_status_SHIFT 0
+#define lpfc_trunk_info_link_status_MASK 1
+#define lpfc_trunk_info_link_status_WORD word0
+#define lpfc_trunk_info_trunk_active0_SHIFT 8
+#define lpfc_trunk_info_trunk_active0_MASK 1
+#define lpfc_trunk_info_trunk_active0_WORD word0
+#define lpfc_trunk_info_trunk_active1_SHIFT 9
+#define lpfc_trunk_info_trunk_active1_MASK 1
+#define lpfc_trunk_info_trunk_active1_WORD word0
+#define lpfc_trunk_info_trunk_active2_SHIFT 10
+#define lpfc_trunk_info_trunk_active2_MASK 1
+#define lpfc_trunk_info_trunk_active2_WORD word0
+#define lpfc_trunk_info_trunk_active3_SHIFT 11
+#define lpfc_trunk_info_trunk_active3_MASK 1
+#define lpfc_trunk_info_trunk_active3_WORD word0
+#define lpfc_trunk_info_trunk_config0_SHIFT 12
+#define lpfc_trunk_info_trunk_config0_MASK 1
+#define lpfc_trunk_info_trunk_config0_WORD word0
+#define lpfc_trunk_info_trunk_config1_SHIFT 13
+#define lpfc_trunk_info_trunk_config1_MASK 1
+#define lpfc_trunk_info_trunk_config1_WORD word0
+#define lpfc_trunk_info_trunk_config2_SHIFT 14
+#define lpfc_trunk_info_trunk_config2_MASK 1
+#define lpfc_trunk_info_trunk_config2_WORD word0
+#define lpfc_trunk_info_trunk_config3_SHIFT 15
+#define lpfc_trunk_info_trunk_config3_MASK 1
+#define lpfc_trunk_info_trunk_config3_WORD word0
+ uint16_t port_speed;
+ uint16_t logical_speed;
+ uint32_t reserved3;
+};
+
+struct get_trunk_info_req {
+ uint32_t command;
+};
/* driver only */
#define SLI_CONFIG_NOT_HANDLED 0
diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h
index e01136507780..39f3fa988732 100644
--- a/drivers/scsi/lpfc/lpfc_crtn.h
+++ b/drivers/scsi/lpfc/lpfc_crtn.h
@@ -74,7 +74,6 @@ void lpfc_mbx_cmpl_read_topology(struct lpfc_hba *, LPFC_MBOXQ_t *);
void lpfc_init_vpi_cmpl(struct lpfc_hba *, LPFC_MBOXQ_t *);
void lpfc_cancel_all_vport_retry_delay_timer(struct lpfc_hba *);
void lpfc_retry_pport_discovery(struct lpfc_hba *);
-void lpfc_release_rpi(struct lpfc_hba *, struct lpfc_vport *, uint16_t);
int lpfc_init_iocb_list(struct lpfc_hba *phba, int cnt);
void lpfc_free_iocb_list(struct lpfc_hba *phba);
int lpfc_post_rq_buffer(struct lpfc_hba *phba, struct lpfc_queue *hrq,
@@ -175,6 +174,7 @@ void lpfc_hb_timeout_handler(struct lpfc_hba *);
void lpfc_ct_unsol_event(struct lpfc_hba *, struct lpfc_sli_ring *,
struct lpfc_iocbq *);
int lpfc_ct_handle_unsol_abort(struct lpfc_hba *, struct hbq_dmabuf *);
+int lpfc_issue_gidpt(struct lpfc_vport *vport);
int lpfc_issue_gidft(struct lpfc_vport *vport);
int lpfc_get_gidft_type(struct lpfc_vport *vport, struct lpfc_iocbq *iocbq);
int lpfc_ns_cmd(struct lpfc_vport *, int, uint8_t, uint32_t);
@@ -380,8 +380,10 @@ void lpfc_nvmet_buf_free(struct lpfc_hba *phba, void *virtp, dma_addr_t dma);
void lpfc_in_buf_free(struct lpfc_hba *, struct lpfc_dmabuf *);
void lpfc_rq_buf_free(struct lpfc_hba *phba, struct lpfc_dmabuf *mp);
+int lpfc_link_reset(struct lpfc_vport *vport);
/* Function prototypes. */
+int lpfc_check_pci_resettable(const struct lpfc_hba *phba);
const char* lpfc_info(struct Scsi_Host *);
int lpfc_scan_finished(struct Scsi_Host *, unsigned long);
@@ -550,6 +552,7 @@ void lpfc_sli4_ras_init(struct lpfc_hba *phba);
void lpfc_sli4_ras_setup(struct lpfc_hba *phba);
int lpfc_sli4_ras_fwlog_init(struct lpfc_hba *phba, uint32_t fwlog_level,
uint32_t fwlog_enable);
+void lpfc_ras_stop_fwlog(struct lpfc_hba *phba);
int lpfc_check_fwlog_support(struct lpfc_hba *phba);
/* NVME interfaces. */
diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c
index 789ad1502534..552da8bf43e4 100644
--- a/drivers/scsi/lpfc/lpfc_ct.c
+++ b/drivers/scsi/lpfc/lpfc_ct.c
@@ -540,7 +540,17 @@ lpfc_ns_rsp_audit_did(struct lpfc_vport *vport, uint32_t Did, uint8_t fc4_type)
struct lpfc_hba *phba = vport->phba;
struct lpfc_nodelist *ndlp = NULL;
struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
+ char *str;
+ if (phba->cfg_ns_query == LPFC_NS_QUERY_GID_FT)
+ str = "GID_FT";
+ else
+ str = "GID_PT";
+ lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
+ "6430 Process %s rsp for %08x type %x %s %s\n",
+ str, Did, fc4_type,
+ (fc4_type == FC_TYPE_FCP) ? "FCP" : " ",
+ (fc4_type == FC_TYPE_NVME) ? "NVME" : " ");
/*
* To conserve rpi's, filter out addresses for other
* vports on the same physical HBAs.
@@ -832,6 +842,198 @@ out:
}
static void
+lpfc_cmpl_ct_cmd_gid_pt(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
+ struct lpfc_iocbq *rspiocb)
+{
+ struct lpfc_vport *vport = cmdiocb->vport;
+ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
+ IOCB_t *irsp;
+ struct lpfc_dmabuf *outp;
+ struct lpfc_dmabuf *inp;
+ struct lpfc_sli_ct_request *CTrsp;
+ struct lpfc_sli_ct_request *CTreq;
+ struct lpfc_nodelist *ndlp;
+ int rc;
+
+ /* First save ndlp, before we overwrite it */
+ ndlp = cmdiocb->context_un.ndlp;
+
+ /* we pass cmdiocb to state machine which needs rspiocb as well */
+ cmdiocb->context_un.rsp_iocb = rspiocb;
+ inp = (struct lpfc_dmabuf *)cmdiocb->context1;
+ outp = (struct lpfc_dmabuf *)cmdiocb->context2;
+ irsp = &rspiocb->iocb;
+
+ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_CT,
+ "GID_PT cmpl: status:x%x/x%x rtry:%d",
+ irsp->ulpStatus, irsp->un.ulpWord[4],
+ vport->fc_ns_retry);
+
+ /* Don't bother processing response if vport is being torn down. */
+ if (vport->load_flag & FC_UNLOADING) {
+ if (vport->fc_flag & FC_RSCN_MODE)
+ lpfc_els_flush_rscn(vport);
+ goto out;
+ }
+
+ if (lpfc_els_chk_latt(vport)) {
+ lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
+ "4108 Link event during NS query\n");
+ if (vport->fc_flag & FC_RSCN_MODE)
+ lpfc_els_flush_rscn(vport);
+ lpfc_vport_set_state(vport, FC_VPORT_FAILED);
+ goto out;
+ }
+ if (lpfc_error_lost_link(irsp)) {
+ lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
+ "4101 NS query failed due to link event\n");
+ if (vport->fc_flag & FC_RSCN_MODE)
+ lpfc_els_flush_rscn(vport);
+ goto out;
+ }
+
+ spin_lock_irq(shost->host_lock);
+ if (vport->fc_flag & FC_RSCN_DEFERRED) {
+ vport->fc_flag &= ~FC_RSCN_DEFERRED;
+ spin_unlock_irq(shost->host_lock);
+
+ /* This is a GID_PT completing so the gidft_inp counter was
+ * incremented before the GID_PT was issued to the wire.
+ */
+ vport->gidft_inp--;
+
+ /*
+ * Skip processing the NS response
+ * Re-issue the NS cmd
+ */
+ lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS,
+ "4102 Process Deferred RSCN Data: x%x x%x\n",
+ vport->fc_flag, vport->fc_rscn_id_cnt);
+ lpfc_els_handle_rscn(vport);
+
+ goto out;
+ }
+ spin_unlock_irq(shost->host_lock);
+
+ if (irsp->ulpStatus) {
+ /* Check for retry */
+ if (vport->fc_ns_retry < LPFC_MAX_NS_RETRY) {
+ if (irsp->ulpStatus != IOSTAT_LOCAL_REJECT ||
+ (irsp->un.ulpWord[4] & IOERR_PARAM_MASK) !=
+ IOERR_NO_RESOURCES)
+ vport->fc_ns_retry++;
+
+ /* CT command is being retried */
+ vport->gidft_inp--;
+ rc = lpfc_ns_cmd(vport, SLI_CTNS_GID_PT,
+ vport->fc_ns_retry, GID_PT_N_PORT);
+ if (rc == 0)
+ goto out;
+ }
+ if (vport->fc_flag & FC_RSCN_MODE)
+ lpfc_els_flush_rscn(vport);
+ lpfc_vport_set_state(vport, FC_VPORT_FAILED);
+ lpfc_printf_vlog(vport, KERN_ERR, LOG_ELS,
+ "4103 GID_FT Query error: 0x%x 0x%x\n",
+ irsp->ulpStatus, vport->fc_ns_retry);
+ } else {
+ /* Good status, continue checking */
+ CTreq = (struct lpfc_sli_ct_request *)inp->virt;
+ CTrsp = (struct lpfc_sli_ct_request *)outp->virt;
+ if (CTrsp->CommandResponse.bits.CmdRsp ==
+ cpu_to_be16(SLI_CT_RESPONSE_FS_ACC)) {
+ lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
+ "4105 NameServer Rsp Data: x%x x%x\n",
+ vport->fc_flag,
+ CTreq->un.gid.Fc4Type);
+
+ lpfc_ns_rsp(vport,
+ outp,
+ CTreq->un.gid.Fc4Type,
+ (uint32_t)(irsp->un.genreq64.bdl.bdeSize));
+ } else if (CTrsp->CommandResponse.bits.CmdRsp ==
+ be16_to_cpu(SLI_CT_RESPONSE_FS_RJT)) {
+ /* NameServer Rsp Error */
+ if ((CTrsp->ReasonCode == SLI_CT_UNABLE_TO_PERFORM_REQ)
+ && (CTrsp->Explanation == SLI_CT_NO_FC4_TYPES)) {
+ lpfc_printf_vlog(
+ vport, KERN_INFO, LOG_DISCOVERY,
+ "4106 No NameServer Entries "
+ "Data: x%x x%x x%x x%x\n",
+ CTrsp->CommandResponse.bits.CmdRsp,
+ (uint32_t)CTrsp->ReasonCode,
+ (uint32_t)CTrsp->Explanation,
+ vport->fc_flag);
+
+ lpfc_debugfs_disc_trc(
+ vport, LPFC_DISC_TRC_CT,
+ "GID_PT no entry cmd:x%x rsn:x%x exp:x%x",
+ (uint32_t)CTrsp->CommandResponse.bits.CmdRsp,
+ (uint32_t)CTrsp->ReasonCode,
+ (uint32_t)CTrsp->Explanation);
+ } else {
+ lpfc_printf_vlog(
+ vport, KERN_INFO, LOG_DISCOVERY,
+ "4107 NameServer Rsp Error "
+ "Data: x%x x%x x%x x%x\n",
+ CTrsp->CommandResponse.bits.CmdRsp,
+ (uint32_t)CTrsp->ReasonCode,
+ (uint32_t)CTrsp->Explanation,
+ vport->fc_flag);
+
+ lpfc_debugfs_disc_trc(
+ vport, LPFC_DISC_TRC_CT,
+ "GID_PT rsp err1 cmd:x%x rsn:x%x exp:x%x",
+ (uint32_t)CTrsp->CommandResponse.bits.CmdRsp,
+ (uint32_t)CTrsp->ReasonCode,
+ (uint32_t)CTrsp->Explanation);
+ }
+ } else {
+ /* NameServer Rsp Error */
+ lpfc_printf_vlog(vport, KERN_ERR, LOG_DISCOVERY,
+ "4109 NameServer Rsp Error "
+ "Data: x%x x%x x%x x%x\n",
+ CTrsp->CommandResponse.bits.CmdRsp,
+ (uint32_t)CTrsp->ReasonCode,
+ (uint32_t)CTrsp->Explanation,
+ vport->fc_flag);
+
+ lpfc_debugfs_disc_trc(
+ vport, LPFC_DISC_TRC_CT,
+ "GID_PT rsp err2 cmd:x%x rsn:x%x exp:x%x",
+ (uint32_t)CTrsp->CommandResponse.bits.CmdRsp,
+ (uint32_t)CTrsp->ReasonCode,
+ (uint32_t)CTrsp->Explanation);
+ }
+ vport->gidft_inp--;
+ }
+ /* Link up / RSCN discovery */
+ if ((vport->num_disc_nodes == 0) &&
+ (vport->gidft_inp == 0)) {
+ /*
+ * The driver has cycled through all Nports in the RSCN payload.
+ * Complete the handling by cleaning up and marking the
+ * current driver state.
+ */
+ if (vport->port_state >= LPFC_DISC_AUTH) {
+ if (vport->fc_flag & FC_RSCN_MODE) {
+ lpfc_els_flush_rscn(vport);
+ spin_lock_irq(shost->host_lock);
+ vport->fc_flag |= FC_RSCN_MODE; /* RSCN still */
+ spin_unlock_irq(shost->host_lock);
+ } else {
+ lpfc_els_flush_rscn(vport);
+ }
+ }
+
+ lpfc_disc_start(vport);
+ }
+out:
+ cmdiocb->context_un.ndlp = ndlp; /* Now restore ndlp for free */
+ lpfc_ct_free_iocb(phba, cmdiocb);
+}
+
+static void
lpfc_cmpl_ct_cmd_gff_id(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
struct lpfc_iocbq *rspiocb)
{
@@ -857,6 +1059,13 @@ lpfc_cmpl_ct_cmd_gff_id(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
CTrsp = (struct lpfc_sli_ct_request *) outp->virt;
fbits = CTrsp->un.gff_acc.fbits[FCP_TYPE_FEATURE_OFFSET];
+ lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
+ "6431 Process GFF_ID rsp for %08x "
+ "fbits %02x %s %s\n",
+ did, fbits,
+ (fbits & FC4_FEATURE_INIT) ? "Initiator" : " ",
+ (fbits & FC4_FEATURE_TARGET) ? "Target" : " ");
+
if (CTrsp->CommandResponse.bits.CmdRsp ==
be16_to_cpu(SLI_CT_RESPONSE_FS_ACC)) {
if ((fbits & FC4_FEATURE_INIT) &&
@@ -979,9 +1188,15 @@ lpfc_cmpl_ct_cmd_gft_id(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
CTrsp = (struct lpfc_sli_ct_request *)outp->virt;
fc4_data_0 = be32_to_cpu(CTrsp->un.gft_acc.fc4_types[0]);
fc4_data_1 = be32_to_cpu(CTrsp->un.gft_acc.fc4_types[1]);
+
lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
- "3062 DID x%06x GFT Wd0 x%08x Wd1 x%08x\n",
- did, fc4_data_0, fc4_data_1);
+ "6432 Process GFT_ID rsp for %08x "
+ "Data %08x %08x %s %s\n",
+ did, fc4_data_0, fc4_data_1,
+ (fc4_data_0 & LPFC_FC4_TYPE_BITMASK) ?
+ "FCP" : " ",
+ (fc4_data_1 & LPFC_FC4_TYPE_BITMASK) ?
+ "NVME" : " ");
ndlp = lpfc_findnode_did(vport, did);
if (ndlp) {
@@ -1312,6 +1527,7 @@ lpfc_ns_cmd(struct lpfc_vport *vport, int cmdcode,
struct ulp_bde64 *bpl;
void (*cmpl) (struct lpfc_hba *, struct lpfc_iocbq *,
struct lpfc_iocbq *) = NULL;
+ uint32_t *ptr;
uint32_t rsp_size = 1024;
size_t size;
int rc = 0;
@@ -1365,6 +1581,8 @@ lpfc_ns_cmd(struct lpfc_vport *vport, int cmdcode,
bpl->tus.f.bdeFlags = 0;
if (cmdcode == SLI_CTNS_GID_FT)
bpl->tus.f.bdeSize = GID_REQUEST_SZ;
+ else if (cmdcode == SLI_CTNS_GID_PT)
+ bpl->tus.f.bdeSize = GID_REQUEST_SZ;
else if (cmdcode == SLI_CTNS_GFF_ID)
bpl->tus.f.bdeSize = GFF_REQUEST_SZ;
else if (cmdcode == SLI_CTNS_GFT_ID)
@@ -1405,6 +1623,18 @@ lpfc_ns_cmd(struct lpfc_vport *vport, int cmdcode,
rsp_size = FC_MAX_NS_RSP;
break;
+ case SLI_CTNS_GID_PT:
+ CtReq->CommandResponse.bits.CmdRsp =
+ cpu_to_be16(SLI_CTNS_GID_PT);
+ CtReq->un.gid.PortType = context;
+
+ if (vport->port_state < LPFC_NS_QRY)
+ vport->port_state = LPFC_NS_QRY;
+ lpfc_set_disctmo(vport);
+ cmpl = lpfc_cmpl_ct_cmd_gid_pt;
+ rsp_size = FC_MAX_NS_RSP;
+ break;
+
case SLI_CTNS_GFF_ID:
CtReq->CommandResponse.bits.CmdRsp =
cpu_to_be16(SLI_CTNS_GFF_ID);
@@ -1436,8 +1666,18 @@ lpfc_ns_cmd(struct lpfc_vport *vport, int cmdcode,
*/
if ((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
(phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME))
- CtReq->un.rft.rsvd[0] = cpu_to_be32(0x00000100);
+ CtReq->un.rft.rsvd[0] =
+ cpu_to_be32(LPFC_FC4_TYPE_BITMASK);
+ ptr = (uint32_t *)CtReq;
+ lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
+ "6433 Issue RFT (%s %s): %08x %08x %08x %08x "
+ "%08x %08x %08x %08x\n",
+ CtReq->un.rft.fcpReg ? "FCP" : " ",
+ CtReq->un.rft.rsvd[0] ? "NVME" : " ",
+ *ptr, *(ptr + 1), *(ptr + 2), *(ptr + 3),
+ *(ptr + 4), *(ptr + 5),
+ *(ptr + 6), *(ptr + 7));
cmpl = lpfc_cmpl_ct_cmd_rft_id;
break;
@@ -1512,6 +1752,14 @@ lpfc_ns_cmd(struct lpfc_vport *vport, int cmdcode,
else
goto ns_cmd_free_bmpvirt;
+ ptr = (uint32_t *)CtReq;
+ lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
+ "6434 Issue RFF (%s): %08x %08x %08x %08x "
+ "%08x %08x %08x %08x\n",
+ (context == FC_TYPE_NVME) ? "NVME" : "FCP",
+ *ptr, *(ptr + 1), *(ptr + 2), *(ptr + 3),
+ *(ptr + 4), *(ptr + 5),
+ *(ptr + 6), *(ptr + 7));
cmpl = lpfc_cmpl_ct_cmd_rff_id;
break;
}
@@ -1758,7 +2006,7 @@ lpfc_fdmi_hba_attr_manufacturer(struct lpfc_vport *vport,
memset(ae, 0, 256);
strncpy(ae->un.AttrString,
- "Emulex Corporation",
+ "Broadcom Inc.",
sizeof(ae->un.AttrString));
len = strnlen(ae->un.AttrString,
sizeof(ae->un.AttrString));
@@ -2134,6 +2382,8 @@ lpfc_fdmi_port_attr_support_speed(struct lpfc_vport *vport,
ae->un.AttrInt = 0;
if (!(phba->hba_flag & HBA_FCOE_MODE)) {
+ if (phba->lmt & LMT_128Gb)
+ ae->un.AttrInt |= HBA_PORTSPEED_128GFC;
if (phba->lmt & LMT_64Gb)
ae->un.AttrInt |= HBA_PORTSPEED_64GFC;
if (phba->lmt & LMT_32Gb)
@@ -2210,6 +2460,9 @@ lpfc_fdmi_port_attr_speed(struct lpfc_vport *vport,
case LPFC_LINK_SPEED_64GHZ:
ae->un.AttrInt = HBA_PORTSPEED_64GFC;
break;
+ case LPFC_LINK_SPEED_128GHZ:
+ ae->un.AttrInt = HBA_PORTSPEED_128GFC;
+ break;
default:
ae->un.AttrInt = HBA_PORTSPEED_UNKNOWN;
break;
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index 34d311a7dbef..a58f0b3f03a9 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -645,6 +645,8 @@ lpfc_debugfs_nodelist_data(struct lpfc_vport *vport, char *buf, int size)
i, ndlp->cmd_qdepth);
outio += i;
}
+ len += snprintf(buf + len, size - len, "defer:%x ",
+ ndlp->nlp_defer_did);
len += snprintf(buf+len, size-len, "\n");
}
spin_unlock_irq(shost->host_lock);
diff --git a/drivers/scsi/lpfc/lpfc_disc.h b/drivers/scsi/lpfc/lpfc_disc.h
index 28e2b60fc5c0..1c89c9f314fa 100644
--- a/drivers/scsi/lpfc/lpfc_disc.h
+++ b/drivers/scsi/lpfc/lpfc_disc.h
@@ -138,6 +138,7 @@ struct lpfc_nodelist {
uint32_t nvme_fb_size; /* NVME target's supported byte cnt */
#define NVME_FB_BIT_SHIFT 9 /* PRLI Rsp first burst in 512B units. */
+ uint32_t nlp_defer_did;
};
struct lpfc_node_rrq {
struct list_head list;
@@ -165,6 +166,7 @@ struct lpfc_node_rrq {
#define NLP_ELS_SND_MASK 0x000007e0 /* sent ELS request for this entry */
#define NLP_NVMET_RECOV 0x00001000 /* NVMET auditing node for recovery. */
#define NLP_FCP_PRLI_RJT 0x00002000 /* Rport does not support FCP PRLI. */
+#define NLP_UNREG_INP 0x00008000 /* UNREG_RPI cmd is in progress */
#define NLP_DEFER_RM 0x00010000 /* Remove this ndlp if no longer used */
#define NLP_DELAY_TMO 0x00020000 /* delay timeout is running for node */
#define NLP_NPR_2B_DISC 0x00040000 /* node is included in num_disc_nodes */
@@ -293,4 +295,4 @@ struct lpfc_node_rrq {
#define NLP_EVT_DEVICE_RM 0xb /* Device not found in NS / ALPAmap */
#define NLP_EVT_DEVICE_RECOVERY 0xc /* Device existence unknown */
#define NLP_EVT_MAX_EVENT 0xd
-
+#define NLP_EVT_NOTHING_PENDING 0xff
diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index f1c1faa74b46..b3a4789468c3 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -242,6 +242,8 @@ lpfc_prep_els_iocb(struct lpfc_vport *vport, uint8_t expectRsp,
icmd->ulpCommand = CMD_ELS_REQUEST64_CR;
if (elscmd == ELS_CMD_FLOGI)
icmd->ulpTimeout = FF_DEF_RATOV * 2;
+ else if (elscmd == ELS_CMD_LOGO)
+ icmd->ulpTimeout = phba->fc_ratov;
else
icmd->ulpTimeout = phba->fc_ratov * 2;
} else {
@@ -313,20 +315,20 @@ lpfc_prep_els_iocb(struct lpfc_vport *vport, uint8_t expectRsp,
/* Xmit ELS command <elsCmd> to remote NPORT <did> */
lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS,
"0116 Xmit ELS command x%x to remote "
- "NPORT x%x I/O tag: x%x, port state:x%x"
- " fc_flag:x%x\n",
+ "NPORT x%x I/O tag: x%x, port state:x%x "
+ "rpi x%x fc_flag:x%x\n",
elscmd, did, elsiocb->iotag,
- vport->port_state,
+ vport->port_state, ndlp->nlp_rpi,
vport->fc_flag);
} else {
/* Xmit ELS response <elsCmd> to remote NPORT <did> */
lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS,
"0117 Xmit ELS response x%x to remote "
"NPORT x%x I/O tag: x%x, size: x%x "
- "port_state x%x fc_flag x%x\n",
+ "port_state x%x rpi x%x fc_flag x%x\n",
elscmd, ndlp->nlp_DID, elsiocb->iotag,
cmdSize, vport->port_state,
- vport->fc_flag);
+ ndlp->nlp_rpi, vport->fc_flag);
}
return elsiocb;
@@ -413,7 +415,7 @@ lpfc_issue_fabric_reglogin(struct lpfc_vport *vport)
/* increment the reference count on ndlp to hold reference
* for the callback routine.
*/
- mbox->context2 = lpfc_nlp_get(ndlp);
+ mbox->ctx_ndlp = lpfc_nlp_get(ndlp);
rc = lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT);
if (rc == MBX_NOT_FINISHED) {
@@ -428,7 +430,7 @@ fail_issue_reg_login:
* for the failed mbox command.
*/
lpfc_nlp_put(ndlp);
- mp = (struct lpfc_dmabuf *) mbox->context1;
+ mp = (struct lpfc_dmabuf *)mbox->ctx_buf;
lpfc_mbuf_free(phba, mp->virt, mp->phys);
kfree(mp);
fail_free_mbox:
@@ -502,7 +504,7 @@ lpfc_issue_reg_vfi(struct lpfc_vport *vport)
mboxq->mbox_cmpl = lpfc_mbx_cmpl_reg_vfi;
mboxq->vport = vport;
- mboxq->context1 = dmabuf;
+ mboxq->ctx_buf = dmabuf;
rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_NOWAIT);
if (rc == MBX_NOT_FINISHED) {
rc = -ENXIO;
@@ -1055,9 +1057,9 @@ stop_rr_fcf_flogi:
goto flogifail;
lpfc_printf_vlog(vport, KERN_WARNING, LOG_ELS,
- "0150 FLOGI failure Status:x%x/x%x TMO:x%x\n",
+ "0150 FLOGI failure Status:x%x/x%x xri x%x TMO:x%x\n",
irsp->ulpStatus, irsp->un.ulpWord[4],
- irsp->ulpTimeout);
+ cmdiocb->sli4_xritag, irsp->ulpTimeout);
/* FLOGI failed, so there is no fabric */
spin_lock_irq(shost->host_lock);
@@ -1111,7 +1113,8 @@ stop_rr_fcf_flogi:
/* FLOGI completes successfully */
lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS,
"0101 FLOGI completes successfully, I/O tag:x%x, "
- "Data: x%x x%x x%x x%x x%x x%x\n", cmdiocb->iotag,
+ "xri x%x Data: x%x x%x x%x x%x x%x %x\n",
+ cmdiocb->iotag, cmdiocb->sli4_xritag,
irsp->un.ulpWord[4], sp->cmn.e_d_tov,
sp->cmn.w2.r_a_tov, sp->cmn.edtovResolution,
vport->port_state, vport->fc_flag);
@@ -1155,6 +1158,7 @@ stop_rr_fcf_flogi:
phba->fcf.fcf_flag &= ~FCF_DISCOVERY;
phba->hba_flag &= ~(FCF_RR_INPROG | HBA_DEVLOSS_TMO);
spin_unlock_irq(&phba->hbalock);
+ phba->fcf.fcf_redisc_attempted = 0; /* reset */
goto out;
}
if (!rc) {
@@ -1169,6 +1173,7 @@ stop_rr_fcf_flogi:
phba->fcf.fcf_flag &= ~FCF_DISCOVERY;
phba->hba_flag &= ~(FCF_RR_INPROG | HBA_DEVLOSS_TMO);
spin_unlock_irq(&phba->hbalock);
+ phba->fcf.fcf_redisc_attempted = 0; /* reset */
goto out;
}
}
@@ -1229,9 +1234,10 @@ lpfc_issue_els_flogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
struct serv_parm *sp;
IOCB_t *icmd;
struct lpfc_iocbq *elsiocb;
+ struct lpfc_iocbq defer_flogi_acc;
uint8_t *pcmd;
uint16_t cmdsize;
- uint32_t tmo;
+ uint32_t tmo, did;
int rc;
cmdsize = (sizeof(uint32_t) + sizeof(struct serv_parm));
@@ -1303,6 +1309,35 @@ lpfc_issue_els_flogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
phba->sli3_options, 0, 0);
rc = lpfc_issue_fabric_iocb(phba, elsiocb);
+
+ phba->hba_flag |= HBA_FLOGI_ISSUED;
+
+ /* Check for a deferred FLOGI ACC condition */
+ if (phba->defer_flogi_acc_flag) {
+ did = vport->fc_myDID;
+ vport->fc_myDID = Fabric_DID;
+
+ memset(&defer_flogi_acc, 0, sizeof(struct lpfc_iocbq));
+
+ defer_flogi_acc.iocb.ulpContext = phba->defer_flogi_acc_rx_id;
+ defer_flogi_acc.iocb.unsli3.rcvsli3.ox_id =
+ phba->defer_flogi_acc_ox_id;
+
+ lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS,
+ "3354 Xmit deferred FLOGI ACC: rx_id: x%x,"
+ " ox_id: x%x, hba_flag x%x\n",
+ phba->defer_flogi_acc_rx_id,
+ phba->defer_flogi_acc_ox_id, phba->hba_flag);
+
+ /* Send deferred FLOGI ACC */
+ lpfc_els_rsp_acc(vport, ELS_CMD_FLOGI, &defer_flogi_acc,
+ ndlp, NULL);
+
+ phba->defer_flogi_acc_flag = false;
+
+ vport->fc_myDID = did;
+ }
+
if (rc == IOCB_ERROR) {
lpfc_els_free_iocb(phba, elsiocb);
return 1;
@@ -1338,6 +1373,8 @@ lpfc_els_abort_flogi(struct lpfc_hba *phba)
Fabric_DID);
pring = lpfc_phba_elsring(phba);
+ if (unlikely(!pring))
+ return -EIO;
/*
* Check the txcmplq for an iocb that matches the nport the driver is
@@ -1531,7 +1568,9 @@ lpfc_plogi_confirm_nport(struct lpfc_hba *phba, uint32_t *prsp,
struct serv_parm *sp;
uint8_t name[sizeof(struct lpfc_name)];
uint32_t rc, keepDID = 0, keep_nlp_flag = 0;
+ uint32_t keep_new_nlp_flag = 0;
uint16_t keep_nlp_state;
+ u32 keep_nlp_fc4_type = 0;
struct lpfc_nvme_rport *keep_nrport = NULL;
int put_node;
int put_rport;
@@ -1551,8 +1590,10 @@ lpfc_plogi_confirm_nport(struct lpfc_hba *phba, uint32_t *prsp,
*/
new_ndlp = lpfc_findnode_wwpn(vport, &sp->portName);
+ /* return immediately if the WWPN matches ndlp */
if (new_ndlp == ndlp && NLP_CHK_NODE_ACT(new_ndlp))
return ndlp;
+
if (phba->sli_rev == LPFC_SLI_REV4) {
active_rrqs_xri_bitmap = mempool_alloc(phba->active_rrq_pool,
GFP_KERNEL);
@@ -1561,9 +1602,13 @@ lpfc_plogi_confirm_nport(struct lpfc_hba *phba, uint32_t *prsp,
phba->cfg_rrq_xri_bitmap_sz);
}
- lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS,
- "3178 PLOGI confirm: ndlp %p x%x: new_ndlp %p\n",
- ndlp, ndlp->nlp_DID, new_ndlp);
+ lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS | LOG_NODE,
+ "3178 PLOGI confirm: ndlp x%x x%x x%x: "
+ "new_ndlp x%x x%x x%x\n",
+ ndlp->nlp_DID, ndlp->nlp_flag, ndlp->nlp_fc4_type,
+ (new_ndlp ? new_ndlp->nlp_DID : 0),
+ (new_ndlp ? new_ndlp->nlp_flag : 0),
+ (new_ndlp ? new_ndlp->nlp_fc4_type : 0));
if (!new_ndlp) {
rc = memcmp(&ndlp->nlp_portname, name,
@@ -1612,6 +1657,16 @@ lpfc_plogi_confirm_nport(struct lpfc_hba *phba, uint32_t *prsp,
phba->cfg_rrq_xri_bitmap_sz);
}
+ /* At this point in this routine, we know new_ndlp will be
+ * returned. however, any previous GID_FTs that were done
+ * would have updated nlp_fc4_type in ndlp, so we must ensure
+ * new_ndlp has the right value.
+ */
+ if (vport->fc_flag & FC_FABRIC) {
+ keep_nlp_fc4_type = new_ndlp->nlp_fc4_type;
+ new_ndlp->nlp_fc4_type = ndlp->nlp_fc4_type;
+ }
+
lpfc_unreg_rpi(vport, new_ndlp);
new_ndlp->nlp_DID = ndlp->nlp_DID;
new_ndlp->nlp_prev_state = ndlp->nlp_prev_state;
@@ -1621,9 +1676,36 @@ lpfc_plogi_confirm_nport(struct lpfc_hba *phba, uint32_t *prsp,
phba->cfg_rrq_xri_bitmap_sz);
spin_lock_irq(shost->host_lock);
- keep_nlp_flag = new_ndlp->nlp_flag;
+ keep_new_nlp_flag = new_ndlp->nlp_flag;
+ keep_nlp_flag = ndlp->nlp_flag;
new_ndlp->nlp_flag = ndlp->nlp_flag;
- ndlp->nlp_flag = keep_nlp_flag;
+
+ /* if new_ndlp had NLP_UNREG_INP set, keep it */
+ if (keep_new_nlp_flag & NLP_UNREG_INP)
+ new_ndlp->nlp_flag |= NLP_UNREG_INP;
+ else
+ new_ndlp->nlp_flag &= ~NLP_UNREG_INP;
+
+ /* if new_ndlp had NLP_RPI_REGISTERED set, keep it */
+ if (keep_new_nlp_flag & NLP_RPI_REGISTERED)
+ new_ndlp->nlp_flag |= NLP_RPI_REGISTERED;
+ else
+ new_ndlp->nlp_flag &= ~NLP_RPI_REGISTERED;
+
+ ndlp->nlp_flag = keep_new_nlp_flag;
+
+ /* if ndlp had NLP_UNREG_INP set, keep it */
+ if (keep_nlp_flag & NLP_UNREG_INP)
+ ndlp->nlp_flag |= NLP_UNREG_INP;
+ else
+ ndlp->nlp_flag &= ~NLP_UNREG_INP;
+
+ /* if ndlp had NLP_RPI_REGISTERED set, keep it */
+ if (keep_nlp_flag & NLP_RPI_REGISTERED)
+ ndlp->nlp_flag |= NLP_RPI_REGISTERED;
+ else
+ ndlp->nlp_flag &= ~NLP_RPI_REGISTERED;
+
spin_unlock_irq(shost->host_lock);
/* Set nlp_states accordingly */
@@ -1661,7 +1743,6 @@ lpfc_plogi_confirm_nport(struct lpfc_hba *phba, uint32_t *prsp,
if (ndlp->nrport) {
ndlp->nrport = NULL;
lpfc_nlp_put(ndlp);
- new_ndlp->nlp_fc4_type = ndlp->nlp_fc4_type;
}
/* We shall actually free the ndlp with both nlp_DID and
@@ -1674,7 +1755,10 @@ lpfc_plogi_confirm_nport(struct lpfc_hba *phba, uint32_t *prsp,
spin_unlock_irq(&phba->ndlp_lock);
}
- /* Two ndlps cannot have the same did on the nodelist */
+ /* Two ndlps cannot have the same did on the nodelist.
+ * Note: for this case, ndlp has a NULL WWPN so setting
+ * the nlp_fc4_type isn't required.
+ */
ndlp->nlp_DID = keepDID;
lpfc_nlp_set_state(vport, ndlp, keep_nlp_state);
if (phba->sli_rev == LPFC_SLI_REV4 &&
@@ -1693,8 +1777,13 @@ lpfc_plogi_confirm_nport(struct lpfc_hba *phba, uint32_t *prsp,
lpfc_unreg_rpi(vport, ndlp);
- /* Two ndlps cannot have the same did */
+ /* Two ndlps cannot have the same did and the fc4
+ * type must be transferred because the ndlp is in
+ * flight.
+ */
ndlp->nlp_DID = keepDID;
+ ndlp->nlp_fc4_type = keep_nlp_fc4_type;
+
if (phba->sli_rev == LPFC_SLI_REV4 &&
active_rrqs_xri_bitmap)
memcpy(ndlp->active_rrqs_xri_bitmap,
@@ -1735,6 +1824,12 @@ lpfc_plogi_confirm_nport(struct lpfc_hba *phba, uint32_t *prsp,
active_rrqs_xri_bitmap)
mempool_free(active_rrqs_xri_bitmap,
phba->active_rrq_pool);
+
+ lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS | LOG_NODE,
+ "3173 PLOGI confirm exit: new_ndlp x%x x%x x%x\n",
+ new_ndlp->nlp_DID, new_ndlp->nlp_flag,
+ new_ndlp->nlp_fc4_type);
+
return new_ndlp;
}
@@ -1895,7 +1990,7 @@ lpfc_cmpl_els_plogi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
disc = (ndlp->nlp_flag & NLP_NPR_2B_DISC);
ndlp->nlp_flag &= ~NLP_NPR_2B_DISC;
spin_unlock_irq(shost->host_lock);
- rc = 0;
+ rc = 0;
/* PLOGI completes to NPort <nlp_DID> */
lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS,
@@ -2002,8 +2097,29 @@ lpfc_issue_els_plogi(struct lpfc_vport *vport, uint32_t did, uint8_t retry)
int ret;
ndlp = lpfc_findnode_did(vport, did);
- if (ndlp && !NLP_CHK_NODE_ACT(ndlp))
- ndlp = NULL;
+
+ if (ndlp) {
+ /* Defer the processing of the issue PLOGI until after the
+ * outstanding UNREG_RPI mbox command completes, unless we
+ * are going offline. This logic does not apply for Fabric DIDs
+ */
+ if ((ndlp->nlp_flag & NLP_UNREG_INP) &&
+ ((ndlp->nlp_DID & Fabric_DID_MASK) != Fabric_DID_MASK) &&
+ !(vport->fc_flag & FC_OFFLINE_MODE)) {
+ lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
+ "4110 Issue PLOGI x%x deferred "
+ "on NPort x%x rpi x%x Data: %p\n",
+ ndlp->nlp_defer_did, ndlp->nlp_DID,
+ ndlp->nlp_rpi, ndlp);
+
+ /* We can only defer 1st PLOGI */
+ if (ndlp->nlp_defer_did == NLP_EVT_NOTHING_PENDING)
+ ndlp->nlp_defer_did = did;
+ return 0;
+ }
+ if (!NLP_CHK_NODE_ACT(ndlp))
+ ndlp = NULL;
+ }
/* If ndlp is not NULL, we will bump the reference count on it */
cmdsize = (sizeof(uint32_t) + sizeof(struct serv_parm));
@@ -2137,7 +2253,7 @@ lpfc_cmpl_els_prli(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
else
lpfc_disc_state_machine(vport, ndlp, cmdiocb,
NLP_EVT_CMPL_PRLI);
- } else
+ } else {
/* Good status, call state machine. However, if another
* PRLI is outstanding, don't call the state machine
* because final disposition to Mapped or Unmapped is
@@ -2145,6 +2261,7 @@ lpfc_cmpl_els_prli(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
*/
lpfc_disc_state_machine(vport, ndlp, cmdiocb,
NLP_EVT_CMPL_PRLI);
+ }
out:
lpfc_els_free_iocb(phba, cmdiocb);
@@ -2203,7 +2320,7 @@ lpfc_issue_els_prli(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
ndlp->nlp_type &= ~(NLP_FCP_TARGET | NLP_FCP_INITIATOR);
ndlp->nlp_type &= ~(NLP_NVME_TARGET | NLP_NVME_INITIATOR);
ndlp->nlp_fcp_info &= ~NLP_FCP_2_DEVICE;
- ndlp->nlp_flag &= ~NLP_FIRSTBURST;
+ ndlp->nlp_flag &= ~(NLP_FIRSTBURST | NLP_NPR_2B_DISC);
ndlp->nvme_fb_size = 0;
send_next_prli:
@@ -2682,16 +2799,15 @@ lpfc_cmpl_els_logo(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
goto out;
}
+ /* The LOGO will not be retried on failure. A LOGO was
+ * issued to the remote rport and a ACC or RJT or no Answer are
+ * all acceptable. Note the failure and move forward with
+ * discovery. The PLOGI will retry.
+ */
if (irsp->ulpStatus) {
- /* Check for retry */
- if (lpfc_els_retry(phba, cmdiocb, rspiocb)) {
- /* ELS command is being retried */
- skip_recovery = 1;
- goto out;
- }
/* LOGO failed */
lpfc_printf_vlog(vport, KERN_ERR, LOG_ELS,
- "2756 LOGO failure DID:%06X Status:x%x/x%x\n",
+ "2756 LOGO failure, No Retry DID:%06X Status:x%x/x%x\n",
ndlp->nlp_DID, irsp->ulpStatus,
irsp->un.ulpWord[4]);
/* Do not call DSM for lpfc_els_abort'ed ELS cmds */
@@ -2737,7 +2853,8 @@ out:
* For any other port type, the rpi is unregistered as an implicit
* LOGO.
*/
- if ((ndlp->nlp_type & NLP_FCP_TARGET) && (skip_recovery == 0)) {
+ if (ndlp->nlp_type & (NLP_FCP_TARGET | NLP_NVME_TARGET) &&
+ skip_recovery == 0) {
lpfc_cancel_retry_delay_tmo(vport, ndlp);
spin_lock_irqsave(shost->host_lock, flags);
ndlp->nlp_flag |= NLP_NPR_2B_DISC;
@@ -2770,6 +2887,8 @@ out:
* will be stored into the context1 field of the IOCB for the completion
* callback function to the LOGO ELS command.
*
+ * Callers of this routine are expected to unregister the RPI first
+ *
* Return code
* 0 - successfully issued logo
* 1 - failed to issue logo
@@ -2811,22 +2930,6 @@ lpfc_issue_els_logo(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
"Issue LOGO: did:x%x",
ndlp->nlp_DID, 0, 0);
- /*
- * If we are issuing a LOGO, we may try to recover the remote NPort
- * by issuing a PLOGI later. Even though we issue ELS cmds by the
- * VPI, if we have a valid RPI, and that RPI gets unreg'ed while
- * that ELS command is in-flight, the HBA returns a IOERR_INVALID_RPI
- * for that ELS cmd. To avoid this situation, lets get rid of the
- * RPI right now, before any ELS cmds are sent.
- */
- spin_lock_irq(shost->host_lock);
- ndlp->nlp_flag |= NLP_ISSUE_LOGO;
- spin_unlock_irq(shost->host_lock);
- if (lpfc_unreg_rpi(vport, ndlp)) {
- lpfc_els_free_iocb(phba, elsiocb);
- return 0;
- }
-
phba->fc_stat.elsXmitLOGO++;
elsiocb->iocb_cmpl = lpfc_cmpl_els_logo;
spin_lock_irq(shost->host_lock);
@@ -2834,7 +2937,6 @@ lpfc_issue_els_logo(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
ndlp->nlp_flag &= ~NLP_ISSUE_LOGO;
spin_unlock_irq(shost->host_lock);
rc = lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, elsiocb, 0);
-
if (rc == IOCB_ERROR) {
spin_lock_irq(shost->host_lock);
ndlp->nlp_flag &= ~NLP_LOGO_SND;
@@ -2842,6 +2944,11 @@ lpfc_issue_els_logo(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
lpfc_els_free_iocb(phba, elsiocb);
return 1;
}
+
+ spin_lock_irq(shost->host_lock);
+ ndlp->nlp_prev_state = ndlp->nlp_state;
+ spin_unlock_irq(shost->host_lock);
+ lpfc_nlp_set_state(vport, ndlp, NLP_STE_LOGO_ISSUE);
return 0;
}
@@ -3250,6 +3357,62 @@ lpfc_els_retry_delay_handler(struct lpfc_nodelist *ndlp)
}
/**
+ * lpfc_link_reset - Issue link reset
+ * @vport: pointer to a virtual N_Port data structure.
+ *
+ * This routine performs link reset by sending INIT_LINK mailbox command.
+ * For SLI-3 adapter, link attention interrupt is enabled before issuing
+ * INIT_LINK mailbox command.
+ *
+ * Return code
+ * 0 - Link reset initiated successfully
+ * 1 - Failed to initiate link reset
+ **/
+int
+lpfc_link_reset(struct lpfc_vport *vport)
+{
+ struct lpfc_hba *phba = vport->phba;
+ LPFC_MBOXQ_t *mbox;
+ uint32_t control;
+ int rc;
+
+ lpfc_printf_vlog(vport, KERN_ERR, LOG_ELS,
+ "2851 Attempt link reset\n");
+ mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+ if (!mbox) {
+ lpfc_printf_log(phba, KERN_ERR, LOG_MBOX,
+ "2852 Failed to allocate mbox memory");
+ return 1;
+ }
+
+ /* Enable Link attention interrupts */
+ if (phba->sli_rev <= LPFC_SLI_REV3) {
+ spin_lock_irq(&phba->hbalock);
+ phba->sli.sli_flag |= LPFC_PROCESS_LA;
+ control = readl(phba->HCregaddr);
+ control |= HC_LAINT_ENA;
+ writel(control, phba->HCregaddr);
+ readl(phba->HCregaddr); /* flush */
+ spin_unlock_irq(&phba->hbalock);
+ }
+
+ lpfc_init_link(phba, mbox, phba->cfg_topology,
+ phba->cfg_link_speed);
+ mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
+ mbox->vport = vport;
+ rc = lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT);
+ if ((rc != MBX_BUSY) && (rc != MBX_SUCCESS)) {
+ lpfc_printf_log(phba, KERN_ERR, LOG_MBOX,
+ "2853 Failed to issue INIT_LINK "
+ "mbox command, rc:x%x\n", rc);
+ mempool_free(mbox, phba->mbox_mem_pool);
+ return 1;
+ }
+
+ return 0;
+}
+
+/**
* lpfc_els_retry - Make retry decision on an els command iocb
* @phba: pointer to lpfc hba data structure.
* @cmdiocb: pointer to lpfc command iocb data structure.
@@ -3285,6 +3448,7 @@ lpfc_els_retry(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
int logerr = 0;
uint32_t cmd = 0;
uint32_t did;
+ int link_reset = 0, rc;
/* Note: context2 may be 0 for internal driver abort
@@ -3366,7 +3530,6 @@ lpfc_els_retry(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
retry = 1;
break;
- case IOERR_SEQUENCE_TIMEOUT:
case IOERR_INVALID_RPI:
if (cmd == ELS_CMD_PLOGI &&
did == NameServer_DID) {
@@ -3377,6 +3540,18 @@ lpfc_els_retry(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
}
retry = 1;
break;
+
+ case IOERR_SEQUENCE_TIMEOUT:
+ if (cmd == ELS_CMD_PLOGI &&
+ did == NameServer_DID &&
+ (cmdiocb->retry + 1) == maxretry) {
+ /* Reset the Link */
+ link_reset = 1;
+ break;
+ }
+ retry = 1;
+ delay = 100;
+ break;
}
break;
@@ -3533,6 +3708,19 @@ lpfc_els_retry(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
break;
}
+ if (link_reset) {
+ rc = lpfc_link_reset(vport);
+ if (rc) {
+ /* Do not give up. Retry PLOGI one more time and attempt
+ * link reset if PLOGI fails again.
+ */
+ retry = 1;
+ delay = 100;
+ goto out_retry;
+ }
+ return 1;
+ }
+
if (did == FDMI_DID)
retry = 1;
@@ -3895,11 +4083,11 @@ lpfc_cmpl_els_logo_acc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
void
lpfc_mbx_cmpl_dflt_rpi(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
{
- struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) (pmb->context1);
- struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) pmb->context2;
+ struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *)(pmb->ctx_buf);
+ struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp;
- pmb->context1 = NULL;
- pmb->context2 = NULL;
+ pmb->ctx_buf = NULL;
+ pmb->ctx_ndlp = NULL;
lpfc_mbuf_free(phba, mp->virt, mp->phys);
kfree(mp);
@@ -3975,7 +4163,7 @@ lpfc_cmpl_els_rsp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
/* Check to see if link went down during discovery */
if (!ndlp || !NLP_CHK_NODE_ACT(ndlp) || lpfc_els_chk_latt(vport)) {
if (mbox) {
- mp = (struct lpfc_dmabuf *) mbox->context1;
+ mp = (struct lpfc_dmabuf *)mbox->ctx_buf;
if (mp) {
lpfc_mbuf_free(phba, mp->virt, mp->phys);
kfree(mp);
@@ -4019,7 +4207,7 @@ lpfc_cmpl_els_rsp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
"Data: x%x x%x x%x\n",
ndlp->nlp_DID, ndlp->nlp_state,
ndlp->nlp_rpi, ndlp->nlp_flag);
- mp = mbox->context1;
+ mp = mbox->ctx_buf;
if (mp) {
lpfc_mbuf_free(phba, mp->virt,
mp->phys);
@@ -4032,7 +4220,7 @@ lpfc_cmpl_els_rsp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
/* Increment reference count to ndlp to hold the
* reference to ndlp for the callback function.
*/
- mbox->context2 = lpfc_nlp_get(ndlp);
+ mbox->ctx_ndlp = lpfc_nlp_get(ndlp);
mbox->vport = vport;
if (ndlp->nlp_flag & NLP_RM_DFLT_RPI) {
mbox->mbox_flag |= LPFC_MBX_IMED_UNREG;
@@ -4086,7 +4274,7 @@ lpfc_cmpl_els_rsp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
}
}
}
- mp = (struct lpfc_dmabuf *) mbox->context1;
+ mp = (struct lpfc_dmabuf *)mbox->ctx_buf;
if (mp) {
lpfc_mbuf_free(phba, mp->virt, mp->phys);
kfree(mp);
@@ -4272,14 +4460,6 @@ lpfc_els_rsp_acc(struct lpfc_vport *vport, uint32_t flag,
default:
return 1;
}
- /* Xmit ELS ACC response tag <ulpIoTag> */
- lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS,
- "0128 Xmit ELS ACC response tag x%x, XRI: x%x, "
- "DID: x%x, nlp_flag: x%x nlp_state: x%x RPI: x%x "
- "fc_flag x%x\n",
- elsiocb->iotag, elsiocb->iocb.ulpContext,
- ndlp->nlp_DID, ndlp->nlp_flag, ndlp->nlp_state,
- ndlp->nlp_rpi, vport->fc_flag);
if (ndlp->nlp_flag & NLP_LOGO_ACC) {
spin_lock_irq(shost->host_lock);
if (!(ndlp->nlp_flag & NLP_RPI_REGISTERED ||
@@ -4448,6 +4628,15 @@ lpfc_els_rsp_adisc_acc(struct lpfc_vport *vport, struct lpfc_iocbq *oldiocb,
lpfc_els_free_iocb(phba, elsiocb);
return 1;
}
+
+ /* Xmit ELS ACC response tag <ulpIoTag> */
+ lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS,
+ "0128 Xmit ELS ACC response Status: x%x, IoTag: x%x, "
+ "XRI: x%x, DID: x%x, nlp_flag: x%x nlp_state: x%x "
+ "RPI: x%x, fc_flag x%x\n",
+ rc, elsiocb->iotag, elsiocb->sli4_xritag,
+ ndlp->nlp_DID, ndlp->nlp_flag, ndlp->nlp_state,
+ ndlp->nlp_rpi, vport->fc_flag);
return 0;
}
@@ -5281,6 +5470,8 @@ lpfc_rdp_res_speed(struct fc_rdp_port_speed_desc *desc, struct lpfc_hba *phba)
desc->info.port_speed.speed = cpu_to_be16(rdp_speed);
+ if (phba->lmt & LMT_128Gb)
+ rdp_cap |= RDP_PS_128GB;
if (phba->lmt & LMT_64Gb)
rdp_cap |= RDP_PS_64GB;
if (phba->lmt & LMT_32Gb)
@@ -5499,7 +5690,7 @@ lpfc_get_rdp_info(struct lpfc_hba *phba, struct lpfc_rdp_context *rdp_context)
goto prep_mbox_fail;
mbox->vport = rdp_context->ndlp->vport;
mbox->mbox_cmpl = lpfc_mbx_cmpl_rdp_page_a0;
- mbox->context2 = (struct lpfc_rdp_context *) rdp_context;
+ mbox->ctx_ndlp = (struct lpfc_rdp_context *)rdp_context;
rc = lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT);
if (rc == MBX_NOT_FINISHED)
goto issue_mbox_fail;
@@ -5542,7 +5733,7 @@ lpfc_els_rcv_rdp(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb,
struct ls_rjt stat;
if (phba->sli_rev < LPFC_SLI_REV4 ||
- bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) !=
+ bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) <
LPFC_SLI_INTF_IF_TYPE_2) {
rjt_err = LSRJT_UNABLE_TPC;
rjt_expl = LSEXP_REQ_UNSUPPORTED;
@@ -5624,10 +5815,10 @@ lpfc_els_lcb_rsp(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
int rc;
mb = &pmb->u.mb;
- lcb_context = (struct lpfc_lcb_context *)pmb->context1;
+ lcb_context = (struct lpfc_lcb_context *)pmb->ctx_ndlp;
ndlp = lcb_context->ndlp;
- pmb->context1 = NULL;
- pmb->context2 = NULL;
+ pmb->ctx_ndlp = NULL;
+ pmb->ctx_buf = NULL;
shdr = (union lpfc_sli4_cfg_shdr *)
&pmb->u.mqe.un.beacon_config.header.cfg_shdr;
@@ -5701,6 +5892,9 @@ error:
stat = (struct ls_rjt *)(pcmd + sizeof(uint32_t));
stat->un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC;
+ if (shdr_add_status == ADD_STATUS_OPERATION_ALREADY_ACTIVE)
+ stat->un.b.lsRjtRsnCodeExp = LSEXP_CMD_IN_PROGRESS;
+
elsiocb->iocb_cmpl = lpfc_cmpl_els_rsp;
phba->fc_stat.elsXmitLSRJT++;
rc = lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, elsiocb, 0);
@@ -5731,7 +5925,7 @@ lpfc_sli4_set_beacon(struct lpfc_vport *vport,
lpfc_sli4_config(phba, mbox, LPFC_MBOX_SUBSYSTEM_COMMON,
LPFC_MBOX_OPCODE_SET_BEACON_CONFIG, len,
LPFC_SLI4_MBX_EMBED);
- mbox->context1 = (void *)lcb_context;
+ mbox->ctx_ndlp = (void *)lcb_context;
mbox->vport = phba->pport;
mbox->mbox_cmpl = lpfc_els_lcb_rsp;
bf_set(lpfc_mbx_set_beacon_port_num, &mbox->u.mqe.un.beacon_config,
@@ -6011,6 +6205,19 @@ lpfc_rscn_recovery_check(struct lpfc_vport *vport)
if (vport->phba->nvmet_support)
continue;
+ /* If we are in the process of doing discovery on this
+ * NPort, let it continue on its own.
+ */
+ switch (ndlp->nlp_state) {
+ case NLP_STE_PLOGI_ISSUE:
+ case NLP_STE_ADISC_ISSUE:
+ case NLP_STE_REG_LOGIN_ISSUE:
+ case NLP_STE_PRLI_ISSUE:
+ case NLP_STE_LOGO_ISSUE:
+ continue;
+ }
+
+
lpfc_disc_state_machine(vport, ndlp, NULL,
NLP_EVT_DEVICE_RECOVERY);
lpfc_cancel_retry_delay_tmo(vport, ndlp);
@@ -6272,6 +6479,7 @@ int
lpfc_els_handle_rscn(struct lpfc_vport *vport)
{
struct lpfc_nodelist *ndlp;
+ struct lpfc_hba *phba = vport->phba;
/* Ignore RSCN if the port is being torn down. */
if (vport->load_flag & FC_UNLOADING) {
@@ -6300,8 +6508,15 @@ lpfc_els_handle_rscn(struct lpfc_vport *vport)
* flush the RSCN. Otherwise, the outstanding requests
* need to complete.
*/
- if (lpfc_issue_gidft(vport) > 0)
+ if (phba->cfg_ns_query == LPFC_NS_QUERY_GID_FT) {
+ if (lpfc_issue_gidft(vport) > 0)
+ return 1;
+ } else if (phba->cfg_ns_query == LPFC_NS_QUERY_GID_PT) {
+ if (lpfc_issue_gidpt(vport) > 0)
+ return 1;
+ } else {
return 1;
+ }
} else {
/* Nameserver login in question. Revalidate. */
if (ndlp) {
@@ -6455,6 +6670,11 @@ lpfc_els_rcv_flogi(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb,
port_state = vport->port_state;
vport->fc_flag |= FC_PT2PT;
vport->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP);
+
+ /* Acking an unsol FLOGI. Count 1 for link bounce
+ * work-around.
+ */
+ vport->rcv_flogi_cnt++;
spin_unlock_irq(shost->host_lock);
lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS,
"3311 Rcv Flogi PS x%x new PS x%x "
@@ -6472,6 +6692,25 @@ lpfc_els_rcv_flogi(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb,
memcpy(&phba->fc_fabparam, sp, sizeof(struct serv_parm));
+ /* Defer ACC response until AFTER we issue a FLOGI */
+ if (!(phba->hba_flag & HBA_FLOGI_ISSUED)) {
+ phba->defer_flogi_acc_rx_id = cmdiocb->iocb.ulpContext;
+ phba->defer_flogi_acc_ox_id =
+ cmdiocb->iocb.unsli3.rcvsli3.ox_id;
+
+ vport->fc_myDID = did;
+
+ lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS,
+ "3344 Deferring FLOGI ACC: rx_id: x%x,"
+ " ox_id: x%x, hba_flag x%x\n",
+ phba->defer_flogi_acc_rx_id,
+ phba->defer_flogi_acc_ox_id, phba->hba_flag);
+
+ phba->defer_flogi_acc_flag = true;
+
+ return 0;
+ }
+
/* Send back ACC */
lpfc_els_rsp_acc(vport, ELS_CMD_FLOGI, cmdiocb, ndlp, NULL);
@@ -6644,11 +6883,11 @@ lpfc_els_rsp_rls_acc(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
mb = &pmb->u.mb;
- ndlp = (struct lpfc_nodelist *) pmb->context2;
- rxid = (uint16_t) ((unsigned long)(pmb->context1) & 0xffff);
- oxid = (uint16_t) (((unsigned long)(pmb->context1) >> 16) & 0xffff);
- pmb->context1 = NULL;
- pmb->context2 = NULL;
+ ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp;
+ rxid = (uint16_t)((unsigned long)(pmb->ctx_buf) & 0xffff);
+ oxid = (uint16_t)(((unsigned long)(pmb->ctx_buf) >> 16) & 0xffff);
+ pmb->ctx_buf = NULL;
+ pmb->ctx_ndlp = NULL;
if (mb->mbxStatus) {
mempool_free(pmb, phba->mbox_mem_pool);
@@ -6732,11 +6971,11 @@ lpfc_els_rsp_rps_acc(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
mb = &pmb->u.mb;
- ndlp = (struct lpfc_nodelist *) pmb->context2;
- rxid = (uint16_t) ((unsigned long)(pmb->context1) & 0xffff);
- oxid = (uint16_t) (((unsigned long)(pmb->context1) >> 16) & 0xffff);
- pmb->context1 = NULL;
- pmb->context2 = NULL;
+ ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp;
+ rxid = (uint16_t)((unsigned long)(pmb->ctx_buf) & 0xffff);
+ oxid = (uint16_t)(((unsigned long)(pmb->ctx_buf) >> 16) & 0xffff);
+ pmb->ctx_ndlp = NULL;
+ pmb->ctx_buf = NULL;
if (mb->mbxStatus) {
mempool_free(pmb, phba->mbox_mem_pool);
@@ -6827,10 +7066,10 @@ lpfc_els_rcv_rls(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb,
mbox = mempool_alloc(phba->mbox_mem_pool, GFP_ATOMIC);
if (mbox) {
lpfc_read_lnk_stat(phba, mbox);
- mbox->context1 = (void *)((unsigned long)
+ mbox->ctx_buf = (void *)((unsigned long)
((cmdiocb->iocb.unsli3.rcvsli3.ox_id << 16) |
cmdiocb->iocb.ulpContext)); /* rx_id */
- mbox->context2 = lpfc_nlp_get(ndlp);
+ mbox->ctx_ndlp = lpfc_nlp_get(ndlp);
mbox->vport = vport;
mbox->mbox_cmpl = lpfc_els_rsp_rls_acc;
if (lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT)
@@ -6990,10 +7229,10 @@ lpfc_els_rcv_rps(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb,
mbox = mempool_alloc(phba->mbox_mem_pool, GFP_ATOMIC);
if (mbox) {
lpfc_read_lnk_stat(phba, mbox);
- mbox->context1 = (void *)((unsigned long)
+ mbox->ctx_buf = (void *)((unsigned long)
((cmdiocb->iocb.unsli3.rcvsli3.ox_id << 16) |
cmdiocb->iocb.ulpContext)); /* rx_id */
- mbox->context2 = lpfc_nlp_get(ndlp);
+ mbox->ctx_ndlp = lpfc_nlp_get(ndlp);
mbox->vport = vport;
mbox->mbox_cmpl = lpfc_els_rsp_rps_acc;
if (lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT)
@@ -7852,8 +8091,9 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
struct ls_rjt stat;
uint32_t *payload;
uint32_t cmd, did, newnode;
- uint8_t rjt_exp, rjt_err = 0;
+ uint8_t rjt_exp, rjt_err = 0, init_link = 0;
IOCB_t *icmd = &elsiocb->iocb;
+ LPFC_MBOXQ_t *mbox;
if (!vport || !(elsiocb->context2))
goto dropit;
@@ -7940,9 +8180,10 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
cmd, did, vport->port_state, vport->fc_flag,
vport->fc_myDID, vport->fc_prevDID);
- /* reject till our FLOGI completes */
+ /* reject till our FLOGI completes or PLOGI assigned DID via PT2PT */
if ((vport->port_state < LPFC_FABRIC_CFG_LINK) &&
- (cmd != ELS_CMD_FLOGI)) {
+ (cmd != ELS_CMD_FLOGI) &&
+ !((cmd == ELS_CMD_PLOGI) && (vport->fc_flag & FC_PT2PT))) {
rjt_err = LSRJT_LOGICAL_BSY;
rjt_exp = LSEXP_NOTHING_MORE;
goto lsrjt;
@@ -8002,6 +8243,19 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
did, vport->port_state, ndlp->nlp_flag);
phba->fc_stat.elsRcvFLOGI++;
+
+ /* If the driver believes fabric discovery is done and is ready,
+ * bounce the link. There is some descrepancy.
+ */
+ if (vport->port_state >= LPFC_LOCAL_CFG_LINK &&
+ vport->fc_flag & FC_PT2PT &&
+ vport->rcv_flogi_cnt >= 1) {
+ rjt_err = LSRJT_LOGICAL_BSY;
+ rjt_exp = LSEXP_NOTHING_MORE;
+ init_link++;
+ goto lsrjt;
+ }
+
lpfc_els_rcv_flogi(vport, elsiocb, ndlp);
if (newnode)
lpfc_nlp_put(ndlp);
@@ -8230,6 +8484,27 @@ lsrjt:
lpfc_nlp_put(elsiocb->context1);
elsiocb->context1 = NULL;
+
+ /* Special case. Driver received an unsolicited command that
+ * unsupportable given the driver's current state. Reset the
+ * link and start over.
+ */
+ if (init_link) {
+ mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+ if (!mbox)
+ return;
+ lpfc_linkdown(phba);
+ lpfc_init_link(phba, mbox,
+ phba->cfg_topology,
+ phba->cfg_link_speed);
+ mbox->u.mb.un.varInitLnk.lipsr_AL_PA = 0;
+ mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
+ mbox->vport = vport;
+ if (lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT) ==
+ MBX_NOT_FINISHED)
+ mempool_free(mbox, phba->mbox_mem_pool);
+ }
+
return;
dropit:
@@ -8453,7 +8728,7 @@ lpfc_cmpl_reg_new_vport(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
{
struct lpfc_vport *vport = pmb->vport;
struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
- struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) pmb->context2;
+ struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp;
MAILBOX_t *mb = &pmb->u.mb;
int rc;
@@ -8571,7 +8846,7 @@ lpfc_register_new_vport(struct lpfc_hba *phba, struct lpfc_vport *vport,
if (mbox) {
lpfc_reg_vpi(vport, mbox);
mbox->vport = vport;
- mbox->context2 = lpfc_nlp_get(ndlp);
+ mbox->ctx_ndlp = lpfc_nlp_get(ndlp);
mbox->mbox_cmpl = lpfc_cmpl_reg_new_vport;
if (lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT)
== MBX_NOT_FINISHED) {
@@ -9505,7 +9780,8 @@ lpfc_sli_abts_recover_port(struct lpfc_vport *vport,
"rport in state 0x%x\n", ndlp->nlp_state);
return;
}
- lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
+ lpfc_printf_log(phba, KERN_ERR,
+ LOG_ELS | LOG_FCP_ERROR | LOG_NVME_IOERR,
"3094 Start rport recovery on shost id 0x%x "
"fc_id 0x%06x vpi 0x%x rpi 0x%x state 0x%x "
"flags 0x%x\n",
@@ -9518,8 +9794,8 @@ lpfc_sli_abts_recover_port(struct lpfc_vport *vport,
*/
spin_lock_irqsave(shost->host_lock, flags);
ndlp->nlp_fcp_info &= ~NLP_FCP_2_DEVICE;
+ ndlp->nlp_flag |= NLP_ISSUE_LOGO;
spin_unlock_irqrestore(shost->host_lock, flags);
- lpfc_issue_els_logo(vport, ndlp, 0);
- lpfc_nlp_set_state(vport, ndlp, NLP_STE_LOGO_ISSUE);
+ lpfc_unreg_rpi(vport, ndlp);
}
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index f4deb862efc6..b183b882d506 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -888,17 +888,31 @@ lpfc_linkdown(struct lpfc_hba *phba)
LPFC_MBOXQ_t *mb;
int i;
- if (phba->link_state == LPFC_LINK_DOWN)
+ if (phba->link_state == LPFC_LINK_DOWN) {
+ if (phba->sli4_hba.conf_trunk) {
+ phba->trunk_link.link0.state = 0;
+ phba->trunk_link.link1.state = 0;
+ phba->trunk_link.link2.state = 0;
+ phba->trunk_link.link3.state = 0;
+ }
return 0;
-
+ }
/* Block all SCSI stack I/Os */
lpfc_scsi_dev_block(phba);
+ phba->defer_flogi_acc_flag = false;
+
spin_lock_irq(&phba->hbalock);
phba->fcf.fcf_flag &= ~(FCF_AVAILABLE | FCF_SCAN_DONE);
spin_unlock_irq(&phba->hbalock);
if (phba->link_state > LPFC_LINK_DOWN) {
phba->link_state = LPFC_LINK_DOWN;
+ if (phba->sli4_hba.conf_trunk) {
+ phba->trunk_link.link0.state = 0;
+ phba->trunk_link.link1.state = 0;
+ phba->trunk_link.link2.state = 0;
+ phba->trunk_link.link3.state = 0;
+ }
spin_lock_irq(shost->host_lock);
phba->pport->fc_flag &= ~FC_LBIT;
spin_unlock_irq(shost->host_lock);
@@ -947,6 +961,7 @@ lpfc_linkdown(struct lpfc_hba *phba)
}
spin_lock_irq(shost->host_lock);
phba->pport->fc_flag &= ~(FC_PT2PT | FC_PT2PT_PLOGI);
+ phba->pport->rcv_flogi_cnt = 0;
spin_unlock_irq(shost->host_lock);
}
return 0;
@@ -1018,6 +1033,7 @@ lpfc_linkup(struct lpfc_hba *phba)
{
struct lpfc_vport **vports;
int i;
+ struct Scsi_Host *shost = lpfc_shost_from_vport(phba->pport);
phba->link_state = LPFC_LINK_UP;
@@ -1031,6 +1047,18 @@ lpfc_linkup(struct lpfc_hba *phba)
lpfc_linkup_port(vports[i]);
lpfc_destroy_vport_work_array(phba, vports);
+ /* Clear the pport flogi counter in case the link down was
+ * absorbed without an ACQE. No lock here - in worker thread
+ * and discovery is synchronized.
+ */
+ spin_lock_irq(shost->host_lock);
+ phba->pport->rcv_flogi_cnt = 0;
+ spin_unlock_irq(shost->host_lock);
+
+ /* reinitialize initial FLOGI flag */
+ phba->hba_flag &= ~(HBA_FLOGI_ISSUED);
+ phba->defer_flogi_acc_flag = false;
+
return 0;
}
@@ -1992,6 +2020,26 @@ int lpfc_sli4_fcf_rr_next_proc(struct lpfc_vport *vport, uint16_t fcf_index)
"failover and change port state:x%x/x%x\n",
phba->pport->port_state, LPFC_VPORT_UNKNOWN);
phba->pport->port_state = LPFC_VPORT_UNKNOWN;
+
+ if (!phba->fcf.fcf_redisc_attempted) {
+ lpfc_unregister_fcf(phba);
+
+ rc = lpfc_sli4_redisc_fcf_table(phba);
+ if (!rc) {
+ lpfc_printf_log(phba, KERN_INFO, LOG_FIP,
+ "3195 Rediscover FCF table\n");
+ phba->fcf.fcf_redisc_attempted = 1;
+ lpfc_sli4_clear_fcf_rr_bmask(phba);
+ } else {
+ lpfc_printf_log(phba, KERN_WARNING, LOG_FIP,
+ "3196 Rediscover FCF table "
+ "failed. Status:x%x\n", rc);
+ }
+ } else {
+ lpfc_printf_log(phba, KERN_WARNING, LOG_FIP,
+ "3197 Already rediscover FCF table "
+ "attempted. No more retry\n");
+ }
goto stop_flogi_current_fcf;
} else {
lpfc_printf_log(phba, KERN_INFO, LOG_FIP | LOG_ELS,
@@ -2915,7 +2963,7 @@ lpfc_start_fdiscs(struct lpfc_hba *phba)
void
lpfc_mbx_cmpl_reg_vfi(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq)
{
- struct lpfc_dmabuf *dmabuf = mboxq->context1;
+ struct lpfc_dmabuf *dmabuf = mboxq->ctx_buf;
struct lpfc_vport *vport = mboxq->vport;
struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
@@ -3008,7 +3056,7 @@ static void
lpfc_mbx_cmpl_read_sparam(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
{
MAILBOX_t *mb = &pmb->u.mb;
- struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) pmb->context1;
+ struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *)pmb->ctx_buf;
struct lpfc_vport *vport = pmb->vport;
struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
struct serv_parm *sp = &vport->fc_sparam;
@@ -3052,7 +3100,7 @@ lpfc_mbx_cmpl_read_sparam(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
return;
out:
- pmb->context1 = NULL;
+ pmb->ctx_buf = NULL;
lpfc_mbuf_free(phba, mp->virt, mp->phys);
kfree(mp);
lpfc_issue_clear_la(phba, vport);
@@ -3085,6 +3133,7 @@ lpfc_mbx_process_link_up(struct lpfc_hba *phba, struct lpfc_mbx_read_top *la)
case LPFC_LINK_SPEED_16GHZ:
case LPFC_LINK_SPEED_32GHZ:
case LPFC_LINK_SPEED_64GHZ:
+ case LPFC_LINK_SPEED_128GHZ:
break;
default:
phba->fc_linkspeed = LPFC_LINK_SPEED_UNKNOWN;
@@ -3190,7 +3239,7 @@ lpfc_mbx_process_link_up(struct lpfc_hba *phba, struct lpfc_mbx_read_top *la)
sparam_mbox->mbox_cmpl = lpfc_mbx_cmpl_read_sparam;
rc = lpfc_sli_issue_mbox(phba, sparam_mbox, MBX_NOWAIT);
if (rc == MBX_NOT_FINISHED) {
- mp = (struct lpfc_dmabuf *) sparam_mbox->context1;
+ mp = (struct lpfc_dmabuf *)sparam_mbox->ctx_buf;
lpfc_mbuf_free(phba, mp->virt, mp->phys);
kfree(mp);
mempool_free(sparam_mbox, phba->mbox_mem_pool);
@@ -3319,7 +3368,7 @@ lpfc_mbx_cmpl_read_topology(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
struct lpfc_mbx_read_top *la;
struct lpfc_sli_ring *pring;
MAILBOX_t *mb = &pmb->u.mb;
- struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) (pmb->context1);
+ struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *)(pmb->ctx_buf);
uint8_t attn_type;
/* Unblock ELS traffic */
@@ -3476,12 +3525,12 @@ void
lpfc_mbx_cmpl_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
{
struct lpfc_vport *vport = pmb->vport;
- struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) (pmb->context1);
- struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) pmb->context2;
+ struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *)(pmb->ctx_buf);
+ struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp;
struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
- pmb->context1 = NULL;
- pmb->context2 = NULL;
+ pmb->ctx_buf = NULL;
+ pmb->ctx_ndlp = NULL;
lpfc_printf_vlog(vport, KERN_INFO, LOG_SLI,
"0002 rpi:%x DID:%x flg:%x %d map:%x %p\n",
@@ -3689,8 +3738,8 @@ lpfc_create_static_vport(struct lpfc_hba *phba)
vport_buff = (uint8_t *) vport_info;
do {
/* free dma buffer from previous round */
- if (pmb->context1) {
- mp = (struct lpfc_dmabuf *)pmb->context1;
+ if (pmb->ctx_buf) {
+ mp = (struct lpfc_dmabuf *)pmb->ctx_buf;
lpfc_mbuf_free(phba, mp->virt, mp->phys);
kfree(mp);
}
@@ -3712,7 +3761,7 @@ lpfc_create_static_vport(struct lpfc_hba *phba)
if (phba->sli_rev == LPFC_SLI_REV4) {
byte_count = pmb->u.mqe.un.mb_words[5];
- mp = (struct lpfc_dmabuf *)pmb->context1;
+ mp = (struct lpfc_dmabuf *)pmb->ctx_buf;
if (byte_count > sizeof(struct static_vport_info) -
offset)
byte_count = sizeof(struct static_vport_info)
@@ -3777,8 +3826,8 @@ lpfc_create_static_vport(struct lpfc_hba *phba)
out:
kfree(vport_info);
if (mbx_wait_rc != MBX_TIMEOUT) {
- if (pmb->context1) {
- mp = (struct lpfc_dmabuf *)pmb->context1;
+ if (pmb->ctx_buf) {
+ mp = (struct lpfc_dmabuf *)pmb->ctx_buf;
lpfc_mbuf_free(phba, mp->virt, mp->phys);
kfree(mp);
}
@@ -3799,13 +3848,13 @@ lpfc_mbx_cmpl_fabric_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
{
struct lpfc_vport *vport = pmb->vport;
MAILBOX_t *mb = &pmb->u.mb;
- struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) (pmb->context1);
+ struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *)(pmb->ctx_buf);
struct lpfc_nodelist *ndlp;
struct Scsi_Host *shost;
- ndlp = (struct lpfc_nodelist *) pmb->context2;
- pmb->context1 = NULL;
- pmb->context2 = NULL;
+ ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp;
+ pmb->ctx_ndlp = NULL;
+ pmb->ctx_buf = NULL;
if (mb->mbxStatus) {
lpfc_printf_vlog(vport, KERN_ERR, LOG_MBOX,
@@ -3913,6 +3962,35 @@ lpfc_issue_gidft(struct lpfc_vport *vport)
return vport->gidft_inp;
}
+/**
+ * lpfc_issue_gidpt - issue a GID_PT for all N_Ports
+ * @vport: The virtual port for which this call is being executed.
+ *
+ * This routine will issue a GID_PT to get a list of all N_Ports
+ *
+ * Return value :
+ * 0 - Failure to issue a GID_PT
+ * 1 - GID_PT issued
+ **/
+int
+lpfc_issue_gidpt(struct lpfc_vport *vport)
+{
+ /* Good status, issue CT Request to NameServer */
+ if (lpfc_ns_cmd(vport, SLI_CTNS_GID_PT, 0, GID_PT_N_PORT)) {
+ /* Cannot issue NameServer FCP Query, so finish up
+ * discovery
+ */
+ lpfc_printf_vlog(vport, KERN_ERR, LOG_SLI,
+ "0606 %s Port TYPE %x %s\n",
+ "Failed to issue GID_PT to ",
+ GID_PT_N_PORT,
+ "Finishing discovery.");
+ return 0;
+ }
+ vport->gidft_inp++;
+ return 1;
+}
+
/*
* This routine handles processing a NameServer REG_LOGIN mailbox
* command upon completion. It is setup in the LPFC_MBOXQ
@@ -3923,12 +4001,12 @@ void
lpfc_mbx_cmpl_ns_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
{
MAILBOX_t *mb = &pmb->u.mb;
- struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) (pmb->context1);
- struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) pmb->context2;
+ struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *)(pmb->ctx_buf);
+ struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp;
struct lpfc_vport *vport = pmb->vport;
- pmb->context1 = NULL;
- pmb->context2 = NULL;
+ pmb->ctx_buf = NULL;
+ pmb->ctx_ndlp = NULL;
vport->gidft_inp = 0;
if (mb->mbxStatus) {
@@ -4385,6 +4463,7 @@ lpfc_initialize_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
NLP_INT_NODE_ACT(ndlp);
atomic_set(&ndlp->cmd_pending, 0);
ndlp->cmd_qdepth = vport->cfg_tgt_queue_depth;
+ ndlp->nlp_defer_did = NLP_EVT_NOTHING_PENDING;
}
struct lpfc_nodelist *
@@ -4392,10 +4471,11 @@ lpfc_enable_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
int state)
{
struct lpfc_hba *phba = vport->phba;
- uint32_t did;
+ uint32_t did, flag;
unsigned long flags;
unsigned long *active_rrqs_xri_bitmap = NULL;
int rpi = LPFC_RPI_ALLOC_ERROR;
+ uint32_t defer_did = 0;
if (!ndlp)
return NULL;
@@ -4428,16 +4508,23 @@ lpfc_enable_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
goto free_rpi;
}
- /* Keep the original DID */
+ /* First preserve the orginal DID, xri_bitmap and some flags */
did = ndlp->nlp_DID;
+ flag = (ndlp->nlp_flag & NLP_UNREG_INP);
+ if (flag & NLP_UNREG_INP)
+ defer_did = ndlp->nlp_defer_did;
if (phba->sli_rev == LPFC_SLI_REV4)
active_rrqs_xri_bitmap = ndlp->active_rrqs_xri_bitmap;
- /* re-initialize ndlp except of ndlp linked list pointer */
+ /* Zero ndlp except of ndlp linked list pointer */
memset((((char *)ndlp) + sizeof (struct list_head)), 0,
sizeof (struct lpfc_nodelist) - sizeof (struct list_head));
- lpfc_initialize_node(vport, ndlp, did);
+ /* Next reinitialize and restore saved objects */
+ lpfc_initialize_node(vport, ndlp, did);
+ ndlp->nlp_flag |= flag;
+ if (flag & NLP_UNREG_INP)
+ ndlp->nlp_defer_did = defer_did;
if (phba->sli_rev == LPFC_SLI_REV4)
ndlp->active_rrqs_xri_bitmap = active_rrqs_xri_bitmap;
@@ -4697,11 +4784,27 @@ lpfc_nlp_logo_unreg(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
struct lpfc_vport *vport = pmb->vport;
struct lpfc_nodelist *ndlp;
- ndlp = (struct lpfc_nodelist *)(pmb->context1);
+ ndlp = (struct lpfc_nodelist *)(pmb->ctx_ndlp);
if (!ndlp)
return;
lpfc_issue_els_logo(vport, ndlp, 0);
mempool_free(pmb, phba->mbox_mem_pool);
+
+ /* Check to see if there are any deferred events to process */
+ if ((ndlp->nlp_flag & NLP_UNREG_INP) &&
+ (ndlp->nlp_defer_did != NLP_EVT_NOTHING_PENDING)) {
+ lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
+ "1434 UNREG cmpl deferred logo x%x "
+ "on NPort x%x Data: x%x %p\n",
+ ndlp->nlp_rpi, ndlp->nlp_DID,
+ ndlp->nlp_defer_did, ndlp);
+
+ ndlp->nlp_flag &= ~NLP_UNREG_INP;
+ ndlp->nlp_defer_did = NLP_EVT_NOTHING_PENDING;
+ lpfc_issue_els_plogi(vport, ndlp->nlp_DID, 0);
+ } else {
+ ndlp->nlp_flag &= ~NLP_UNREG_INP;
+ }
}
/*
@@ -4730,6 +4833,21 @@ lpfc_unreg_rpi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
"did x%x\n",
ndlp->nlp_rpi, ndlp->nlp_flag,
ndlp->nlp_DID);
+
+ /* If there is already an UNREG in progress for this ndlp,
+ * no need to queue up another one.
+ */
+ if (ndlp->nlp_flag & NLP_UNREG_INP) {
+ lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
+ "1436 unreg_rpi SKIP UNREG x%x on "
+ "NPort x%x deferred x%x flg x%x "
+ "Data: %p\n",
+ ndlp->nlp_rpi, ndlp->nlp_DID,
+ ndlp->nlp_defer_did,
+ ndlp->nlp_flag, ndlp);
+ goto out;
+ }
+
mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
if (mbox) {
/* SLI4 ports require the physical rpi value. */
@@ -4740,26 +4858,38 @@ lpfc_unreg_rpi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
lpfc_unreg_login(phba, vport->vpi, rpi, mbox);
mbox->vport = vport;
if (ndlp->nlp_flag & NLP_ISSUE_LOGO) {
- mbox->context1 = ndlp;
+ mbox->ctx_ndlp = ndlp;
mbox->mbox_cmpl = lpfc_nlp_logo_unreg;
} else {
if (phba->sli_rev == LPFC_SLI_REV4 &&
(!(vport->load_flag & FC_UNLOADING)) &&
(bf_get(lpfc_sli_intf_if_type,
- &phba->sli4_hba.sli_intf) ==
+ &phba->sli4_hba.sli_intf) >=
LPFC_SLI_INTF_IF_TYPE_2) &&
(kref_read(&ndlp->kref) > 0)) {
- mbox->context1 = lpfc_nlp_get(ndlp);
+ mbox->ctx_ndlp = lpfc_nlp_get(ndlp);
mbox->mbox_cmpl =
lpfc_sli4_unreg_rpi_cmpl_clr;
/*
* accept PLOGIs after unreg_rpi_cmpl
*/
acc_plogi = 0;
- } else
+ } else {
+ mbox->ctx_ndlp = ndlp;
mbox->mbox_cmpl =
lpfc_sli_def_mbox_cmpl;
+ }
}
+ if (((ndlp->nlp_DID & Fabric_DID_MASK) !=
+ Fabric_DID_MASK) &&
+ (!(vport->fc_flag & FC_OFFLINE_MODE)))
+ ndlp->nlp_flag |= NLP_UNREG_INP;
+
+ lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
+ "1433 unreg_rpi UNREG x%x on "
+ "NPort x%x deferred flg x%x Data:%p\n",
+ ndlp->nlp_rpi, ndlp->nlp_DID,
+ ndlp->nlp_flag, ndlp);
rc = lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT);
if (rc == MBX_NOT_FINISHED) {
@@ -4768,7 +4898,7 @@ lpfc_unreg_rpi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
}
}
lpfc_no_rpi(phba, ndlp);
-
+out:
if (phba->sli_rev != LPFC_SLI_REV4)
ndlp->nlp_rpi = 0;
ndlp->nlp_flag &= ~NLP_RPI_REGISTERED;
@@ -4836,7 +4966,7 @@ lpfc_unreg_all_rpis(struct lpfc_vport *vport)
mbox);
mbox->vport = vport;
mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
- mbox->context1 = NULL;
+ mbox->ctx_ndlp = NULL;
rc = lpfc_sli_issue_mbox_wait(phba, mbox, LPFC_MBOX_TMO);
if (rc != MBX_TIMEOUT)
mempool_free(mbox, phba->mbox_mem_pool);
@@ -4861,7 +4991,7 @@ lpfc_unreg_default_rpis(struct lpfc_vport *vport)
mbox);
mbox->vport = vport;
mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
- mbox->context1 = NULL;
+ mbox->ctx_ndlp = NULL;
rc = lpfc_sli_issue_mbox_wait(phba, mbox, LPFC_MBOX_TMO);
if (rc != MBX_TIMEOUT)
mempool_free(mbox, phba->mbox_mem_pool);
@@ -4915,8 +5045,8 @@ lpfc_cleanup_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
if ((mb = phba->sli.mbox_active)) {
if ((mb->u.mb.mbxCommand == MBX_REG_LOGIN64) &&
!(mb->mbox_flag & LPFC_MBX_IMED_UNREG) &&
- (ndlp == (struct lpfc_nodelist *) mb->context2)) {
- mb->context2 = NULL;
+ (ndlp == (struct lpfc_nodelist *)mb->ctx_ndlp)) {
+ mb->ctx_ndlp = NULL;
mb->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
}
}
@@ -4926,18 +5056,18 @@ lpfc_cleanup_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
list_for_each_entry(mb, &phba->sli.mboxq_cmpl, list) {
if ((mb->u.mb.mbxCommand != MBX_REG_LOGIN64) ||
(mb->mbox_flag & LPFC_MBX_IMED_UNREG) ||
- (ndlp != (struct lpfc_nodelist *) mb->context2))
+ (ndlp != (struct lpfc_nodelist *)mb->ctx_ndlp))
continue;
- mb->context2 = NULL;
+ mb->ctx_ndlp = NULL;
mb->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
}
list_for_each_entry_safe(mb, nextmb, &phba->sli.mboxq, list) {
if ((mb->u.mb.mbxCommand == MBX_REG_LOGIN64) &&
!(mb->mbox_flag & LPFC_MBX_IMED_UNREG) &&
- (ndlp == (struct lpfc_nodelist *) mb->context2)) {
- mp = (struct lpfc_dmabuf *) (mb->context1);
+ (ndlp == (struct lpfc_nodelist *)mb->ctx_ndlp)) {
+ mp = (struct lpfc_dmabuf *)(mb->ctx_buf);
if (mp) {
__lpfc_mbuf_free(phba, mp->virt, mp->phys);
kfree(mp);
@@ -5007,7 +5137,7 @@ lpfc_nlp_remove(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
mbox->mbox_flag |= LPFC_MBX_IMED_UNREG;
mbox->mbox_cmpl = lpfc_mbx_cmpl_dflt_rpi;
mbox->vport = vport;
- mbox->context2 = ndlp;
+ mbox->ctx_ndlp = ndlp;
rc =lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT);
if (rc == MBX_NOT_FINISHED) {
mempool_free(mbox, phba->mbox_mem_pool);
@@ -5772,12 +5902,12 @@ void
lpfc_mbx_cmpl_fdmi_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
{
MAILBOX_t *mb = &pmb->u.mb;
- struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) (pmb->context1);
- struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) pmb->context2;
+ struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *)(pmb->ctx_buf);
+ struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp;
struct lpfc_vport *vport = pmb->vport;
- pmb->context1 = NULL;
- pmb->context2 = NULL;
+ pmb->ctx_buf = NULL;
+ pmb->ctx_ndlp = NULL;
if (phba->sli_rev < LPFC_SLI_REV4)
ndlp->nlp_rpi = mb->un.varWords[0];
diff --git a/drivers/scsi/lpfc/lpfc_hw.h b/drivers/scsi/lpfc/lpfc_hw.h
index 009aa0eee040..ec1227018913 100644
--- a/drivers/scsi/lpfc/lpfc_hw.h
+++ b/drivers/scsi/lpfc/lpfc_hw.h
@@ -115,6 +115,7 @@ struct lpfc_sli_ct_request {
uint32_t PortID;
struct gid {
uint8_t PortType; /* for GID_PT requests */
+#define GID_PT_N_PORT 1
uint8_t DomainScope;
uint8_t AreaScope;
uint8_t Fc4Type; /* for GID_FT requests */
diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h
index bbd0a57e953f..c15b9b6fb840 100644
--- a/drivers/scsi/lpfc/lpfc_hw4.h
+++ b/drivers/scsi/lpfc/lpfc_hw4.h
@@ -197,6 +197,10 @@ struct lpfc_sli_intf {
#define LPFC_FCP_SCHED_ROUND_ROBIN 0
#define LPFC_FCP_SCHED_BY_CPU 1
+/* Algrithmns for NameServer Query after RSCN */
+#define LPFC_NS_QUERY_GID_FT 0
+#define LPFC_NS_QUERY_GID_PT 1
+
/* Delay Multiplier constant */
#define LPFC_DMULT_CONST 651042
#define LPFC_DMULT_MAX 1023
@@ -1031,6 +1035,7 @@ struct mbox_header {
#define LPFC_MBOX_OPCODE_FCOE_SET_FCLINK_SETTINGS 0x21
#define LPFC_MBOX_OPCODE_FCOE_LINK_DIAG_STATE 0x22
#define LPFC_MBOX_OPCODE_FCOE_LINK_DIAG_LOOPBACK 0x23
+#define LPFC_MBOX_OPCODE_FCOE_FC_SET_TRUNK_MODE 0x42
/* Low level Opcodes */
#define LPFC_MBOX_OPCODE_SET_DIAG_LOG_OPTION 0x37
@@ -2777,6 +2782,9 @@ struct lpfc_mbx_read_config {
#define lpfc_mbx_rd_conf_lnk_ldv_SHIFT 8
#define lpfc_mbx_rd_conf_lnk_ldv_MASK 0x00000001
#define lpfc_mbx_rd_conf_lnk_ldv_WORD word2
+#define lpfc_mbx_rd_conf_trunk_SHIFT 12
+#define lpfc_mbx_rd_conf_trunk_MASK 0x0000000F
+#define lpfc_mbx_rd_conf_trunk_WORD word2
#define lpfc_mbx_rd_conf_topology_SHIFT 24
#define lpfc_mbx_rd_conf_topology_MASK 0x000000FF
#define lpfc_mbx_rd_conf_topology_WORD word2
@@ -3512,6 +3520,15 @@ struct lpfc_mbx_set_host_data {
uint8_t data[LPFC_HOST_OS_DRIVER_VERSION_SIZE];
};
+struct lpfc_mbx_set_trunk_mode {
+ struct mbox_header header;
+ uint32_t word0;
+#define lpfc_mbx_set_trunk_mode_WORD word0
+#define lpfc_mbx_set_trunk_mode_SHIFT 0
+#define lpfc_mbx_set_trunk_mode_MASK 0xFF
+ uint32_t word1;
+ uint32_t word2;
+};
struct lpfc_mbx_get_sli4_parameters {
struct mbox_header header;
@@ -3833,6 +3850,9 @@ struct lpfc_mbx_wr_object {
#define lpfc_wr_object_eof_SHIFT 31
#define lpfc_wr_object_eof_MASK 0x00000001
#define lpfc_wr_object_eof_WORD word4
+#define lpfc_wr_object_eas_SHIFT 29
+#define lpfc_wr_object_eas_MASK 0x00000001
+#define lpfc_wr_object_eas_WORD word4
#define lpfc_wr_object_write_length_SHIFT 0
#define lpfc_wr_object_write_length_MASK 0x00FFFFFF
#define lpfc_wr_object_write_length_WORD word4
@@ -3843,6 +3863,15 @@ struct lpfc_mbx_wr_object {
} request;
struct {
uint32_t actual_write_length;
+ uint32_t word5;
+#define lpfc_wr_object_change_status_SHIFT 0
+#define lpfc_wr_object_change_status_MASK 0x000000FF
+#define lpfc_wr_object_change_status_WORD word5
+#define LPFC_CHANGE_STATUS_NO_RESET_NEEDED 0x00
+#define LPFC_CHANGE_STATUS_PHYS_DEV_RESET 0x01
+#define LPFC_CHANGE_STATUS_FW_RESET 0x02
+#define LPFC_CHANGE_STATUS_PORT_MIGRATION 0x04
+#define LPFC_CHANGE_STATUS_PCI_RESET 0x05
} response;
} u;
};
@@ -3911,6 +3940,7 @@ struct lpfc_mqe {
struct lpfc_mbx_set_feature set_feature;
struct lpfc_mbx_memory_dump_type3 mem_dump_type3;
struct lpfc_mbx_set_host_data set_host_data;
+ struct lpfc_mbx_set_trunk_mode set_trunk_mode;
struct lpfc_mbx_nop nop;
struct lpfc_mbx_set_ras_fwlog ras_fwlog;
} un;
@@ -4047,6 +4077,23 @@ struct lpfc_acqe_grp5 {
uint32_t trailer;
};
+static char *const trunk_errmsg[] = { /* map errcode */
+ "", /* There is no such error code at index 0*/
+ "link negotiated speed does not match existing"
+ " trunk - link was \"low\" speed",
+ "link negotiated speed does not match"
+ " existing trunk - link was \"middle\" speed",
+ "link negotiated speed does not match existing"
+ " trunk - link was \"high\" speed",
+ "Attached to non-trunking port - F_Port",
+ "Attached to non-trunking port - N_Port",
+ "FLOGI response timeout",
+ "non-FLOGI frame received",
+ "Invalid FLOGI response",
+ "Trunking initialization protocol",
+ "Trunk peer device mismatch",
+};
+
struct lpfc_acqe_fc_la {
uint32_t word0;
#define lpfc_acqe_fc_la_speed_SHIFT 24
@@ -4080,6 +4127,7 @@ struct lpfc_acqe_fc_la {
#define LPFC_FC_LA_TYPE_MDS_LINK_DOWN 0x4
#define LPFC_FC_LA_TYPE_MDS_LOOPBACK 0x5
#define LPFC_FC_LA_TYPE_UNEXP_WWPN 0x6
+#define LPFC_FC_LA_TYPE_TRUNKING_EVENT 0x7
#define lpfc_acqe_fc_la_port_type_SHIFT 6
#define lpfc_acqe_fc_la_port_type_MASK 0x00000003
#define lpfc_acqe_fc_la_port_type_WORD word0
@@ -4088,6 +4136,32 @@ struct lpfc_acqe_fc_la {
#define lpfc_acqe_fc_la_port_number_SHIFT 0
#define lpfc_acqe_fc_la_port_number_MASK 0x0000003F
#define lpfc_acqe_fc_la_port_number_WORD word0
+
+/* Attention Type is 0x07 (Trunking Event) word0 */
+#define lpfc_acqe_fc_la_trunk_link_status_port0_SHIFT 16
+#define lpfc_acqe_fc_la_trunk_link_status_port0_MASK 0x0000001
+#define lpfc_acqe_fc_la_trunk_link_status_port0_WORD word0
+#define lpfc_acqe_fc_la_trunk_link_status_port1_SHIFT 17
+#define lpfc_acqe_fc_la_trunk_link_status_port1_MASK 0x0000001
+#define lpfc_acqe_fc_la_trunk_link_status_port1_WORD word0
+#define lpfc_acqe_fc_la_trunk_link_status_port2_SHIFT 18
+#define lpfc_acqe_fc_la_trunk_link_status_port2_MASK 0x0000001
+#define lpfc_acqe_fc_la_trunk_link_status_port2_WORD word0
+#define lpfc_acqe_fc_la_trunk_link_status_port3_SHIFT 19
+#define lpfc_acqe_fc_la_trunk_link_status_port3_MASK 0x0000001
+#define lpfc_acqe_fc_la_trunk_link_status_port3_WORD word0
+#define lpfc_acqe_fc_la_trunk_config_port0_SHIFT 20
+#define lpfc_acqe_fc_la_trunk_config_port0_MASK 0x0000001
+#define lpfc_acqe_fc_la_trunk_config_port0_WORD word0
+#define lpfc_acqe_fc_la_trunk_config_port1_SHIFT 21
+#define lpfc_acqe_fc_la_trunk_config_port1_MASK 0x0000001
+#define lpfc_acqe_fc_la_trunk_config_port1_WORD word0
+#define lpfc_acqe_fc_la_trunk_config_port2_SHIFT 22
+#define lpfc_acqe_fc_la_trunk_config_port2_MASK 0x0000001
+#define lpfc_acqe_fc_la_trunk_config_port2_WORD word0
+#define lpfc_acqe_fc_la_trunk_config_port3_SHIFT 23
+#define lpfc_acqe_fc_la_trunk_config_port3_MASK 0x0000001
+#define lpfc_acqe_fc_la_trunk_config_port3_WORD word0
uint32_t word1;
#define lpfc_acqe_fc_la_llink_spd_SHIFT 16
#define lpfc_acqe_fc_la_llink_spd_MASK 0x0000FFFF
@@ -4095,6 +4169,12 @@ struct lpfc_acqe_fc_la {
#define lpfc_acqe_fc_la_fault_SHIFT 0
#define lpfc_acqe_fc_la_fault_MASK 0x000000FF
#define lpfc_acqe_fc_la_fault_WORD word1
+#define lpfc_acqe_fc_la_trunk_fault_SHIFT 0
+#define lpfc_acqe_fc_la_trunk_fault_MASK 0x0000000F
+#define lpfc_acqe_fc_la_trunk_fault_WORD word1
+#define lpfc_acqe_fc_la_trunk_linkmask_SHIFT 4
+#define lpfc_acqe_fc_la_trunk_linkmask_MASK 0x000000F
+#define lpfc_acqe_fc_la_trunk_linkmask_WORD word1
#define LPFC_FC_LA_FAULT_NONE 0x0
#define LPFC_FC_LA_FAULT_LOCAL 0x1
#define LPFC_FC_LA_FAULT_REMOTE 0x2
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 68d62d55a3a5..c1c36812c3d2 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -447,19 +447,19 @@ lpfc_config_port_post(struct lpfc_hba *phba)
"READ_SPARM mbxStatus x%x\n",
mb->mbxCommand, mb->mbxStatus);
phba->link_state = LPFC_HBA_ERROR;
- mp = (struct lpfc_dmabuf *) pmb->context1;
+ mp = (struct lpfc_dmabuf *)pmb->ctx_buf;
mempool_free(pmb, phba->mbox_mem_pool);
lpfc_mbuf_free(phba, mp->virt, mp->phys);
kfree(mp);
return -EIO;
}
- mp = (struct lpfc_dmabuf *) pmb->context1;
+ mp = (struct lpfc_dmabuf *)pmb->ctx_buf;
memcpy(&vport->fc_sparam, mp->virt, sizeof (struct serv_parm));
lpfc_mbuf_free(phba, mp->virt, mp->phys);
kfree(mp);
- pmb->context1 = NULL;
+ pmb->ctx_buf = NULL;
lpfc_update_vport_wwn(vport);
/* Update the fc_host data structures with new wwn. */
@@ -1801,7 +1801,12 @@ lpfc_sli4_port_sta_fn_reset(struct lpfc_hba *phba, int mbx_action,
lpfc_offline(phba);
/* release interrupt for possible resource change */
lpfc_sli4_disable_intr(phba);
- lpfc_sli_brdrestart(phba);
+ rc = lpfc_sli_brdrestart(phba);
+ if (rc) {
+ lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+ "6309 Failed to restart board\n");
+ return rc;
+ }
/* request and enable interrupt */
intr_mode = lpfc_sli4_enable_intr(phba, phba->intr_mode);
if (intr_mode == LPFC_INTR_ERROR) {
@@ -4106,6 +4111,32 @@ finished:
return stat;
}
+void lpfc_host_supported_speeds_set(struct Scsi_Host *shost)
+{
+ struct lpfc_vport *vport = (struct lpfc_vport *)shost->hostdata;
+ struct lpfc_hba *phba = vport->phba;
+
+ fc_host_supported_speeds(shost) = 0;
+ if (phba->lmt & LMT_128Gb)
+ fc_host_supported_speeds(shost) |= FC_PORTSPEED_128GBIT;
+ if (phba->lmt & LMT_64Gb)
+ fc_host_supported_speeds(shost) |= FC_PORTSPEED_64GBIT;
+ if (phba->lmt & LMT_32Gb)
+ fc_host_supported_speeds(shost) |= FC_PORTSPEED_32GBIT;
+ if (phba->lmt & LMT_16Gb)
+ fc_host_supported_speeds(shost) |= FC_PORTSPEED_16GBIT;
+ if (phba->lmt & LMT_10Gb)
+ fc_host_supported_speeds(shost) |= FC_PORTSPEED_10GBIT;
+ if (phba->lmt & LMT_8Gb)
+ fc_host_supported_speeds(shost) |= FC_PORTSPEED_8GBIT;
+ if (phba->lmt & LMT_4Gb)
+ fc_host_supported_speeds(shost) |= FC_PORTSPEED_4GBIT;
+ if (phba->lmt & LMT_2Gb)
+ fc_host_supported_speeds(shost) |= FC_PORTSPEED_2GBIT;
+ if (phba->lmt & LMT_1Gb)
+ fc_host_supported_speeds(shost) |= FC_PORTSPEED_1GBIT;
+}
+
/**
* lpfc_host_attrib_init - Initialize SCSI host attributes on a FC port
* @shost: pointer to SCSI host data structure.
@@ -4133,23 +4164,7 @@ void lpfc_host_attrib_init(struct Scsi_Host *shost)
lpfc_vport_symbolic_node_name(vport, fc_host_symbolic_name(shost),
sizeof fc_host_symbolic_name(shost));
- fc_host_supported_speeds(shost) = 0;
- if (phba->lmt & LMT_64Gb)
- fc_host_supported_speeds(shost) |= FC_PORTSPEED_64GBIT;
- if (phba->lmt & LMT_32Gb)
- fc_host_supported_speeds(shost) |= FC_PORTSPEED_32GBIT;
- if (phba->lmt & LMT_16Gb)
- fc_host_supported_speeds(shost) |= FC_PORTSPEED_16GBIT;
- if (phba->lmt & LMT_10Gb)
- fc_host_supported_speeds(shost) |= FC_PORTSPEED_10GBIT;
- if (phba->lmt & LMT_8Gb)
- fc_host_supported_speeds(shost) |= FC_PORTSPEED_8GBIT;
- if (phba->lmt & LMT_4Gb)
- fc_host_supported_speeds(shost) |= FC_PORTSPEED_4GBIT;
- if (phba->lmt & LMT_2Gb)
- fc_host_supported_speeds(shost) |= FC_PORTSPEED_2GBIT;
- if (phba->lmt & LMT_1Gb)
- fc_host_supported_speeds(shost) |= FC_PORTSPEED_1GBIT;
+ lpfc_host_supported_speeds_set(shost);
fc_host_maxframe_size(shost) =
(((uint32_t) vport->fc_sparam.cmn.bbRcvSizeMsb & 0x0F) << 8) |
@@ -4467,6 +4482,9 @@ lpfc_sli4_port_speed_parse(struct lpfc_hba *phba, uint32_t evt_code,
case LPFC_FC_LA_SPEED_64G:
port_speed = 64000;
break;
+ case LPFC_FC_LA_SPEED_128G:
+ port_speed = 128000;
+ break;
default:
port_speed = 0;
}
@@ -4609,6 +4627,136 @@ out_free_pmb:
}
/**
+ * lpfc_async_link_speed_to_read_top - Parse async evt link speed code to read
+ * topology.
+ * @phba: pointer to lpfc hba data structure.
+ * @evt_code: asynchronous event code.
+ * @speed_code: asynchronous event link speed code.
+ *
+ * This routine is to parse the giving SLI4 async event link speed code into
+ * value of Read topology link speed.
+ *
+ * Return: link speed in terms of Read topology.
+ **/
+static uint8_t
+lpfc_async_link_speed_to_read_top(struct lpfc_hba *phba, uint8_t speed_code)
+{
+ uint8_t port_speed;
+
+ switch (speed_code) {
+ case LPFC_FC_LA_SPEED_1G:
+ port_speed = LPFC_LINK_SPEED_1GHZ;
+ break;
+ case LPFC_FC_LA_SPEED_2G:
+ port_speed = LPFC_LINK_SPEED_2GHZ;
+ break;
+ case LPFC_FC_LA_SPEED_4G:
+ port_speed = LPFC_LINK_SPEED_4GHZ;
+ break;
+ case LPFC_FC_LA_SPEED_8G:
+ port_speed = LPFC_LINK_SPEED_8GHZ;
+ break;
+ case LPFC_FC_LA_SPEED_16G:
+ port_speed = LPFC_LINK_SPEED_16GHZ;
+ break;
+ case LPFC_FC_LA_SPEED_32G:
+ port_speed = LPFC_LINK_SPEED_32GHZ;
+ break;
+ case LPFC_FC_LA_SPEED_64G:
+ port_speed = LPFC_LINK_SPEED_64GHZ;
+ break;
+ case LPFC_FC_LA_SPEED_128G:
+ port_speed = LPFC_LINK_SPEED_128GHZ;
+ break;
+ case LPFC_FC_LA_SPEED_256G:
+ port_speed = LPFC_LINK_SPEED_256GHZ;
+ break;
+ default:
+ port_speed = 0;
+ break;
+ }
+
+ return port_speed;
+}
+
+#define trunk_link_status(__idx)\
+ bf_get(lpfc_acqe_fc_la_trunk_config_port##__idx, acqe_fc) ?\
+ ((phba->trunk_link.link##__idx.state == LPFC_LINK_UP) ?\
+ "Link up" : "Link down") : "NA"
+/* Did port __idx reported an error */
+#define trunk_port_fault(__idx)\
+ bf_get(lpfc_acqe_fc_la_trunk_config_port##__idx, acqe_fc) ?\
+ (port_fault & (1 << __idx) ? "YES" : "NO") : "NA"
+
+static void
+lpfc_update_trunk_link_status(struct lpfc_hba *phba,
+ struct lpfc_acqe_fc_la *acqe_fc)
+{
+ uint8_t port_fault = bf_get(lpfc_acqe_fc_la_trunk_linkmask, acqe_fc);
+ uint8_t err = bf_get(lpfc_acqe_fc_la_trunk_fault, acqe_fc);
+
+ phba->sli4_hba.link_state.speed =
+ lpfc_sli4_port_speed_parse(phba, LPFC_TRAILER_CODE_FC,
+ bf_get(lpfc_acqe_fc_la_speed, acqe_fc));
+
+ phba->sli4_hba.link_state.logical_speed =
+ bf_get(lpfc_acqe_fc_la_llink_spd, acqe_fc);
+ /* We got FC link speed, convert to fc_linkspeed (READ_TOPOLOGY) */
+ phba->fc_linkspeed =
+ lpfc_async_link_speed_to_read_top(
+ phba,
+ bf_get(lpfc_acqe_fc_la_speed, acqe_fc));
+
+ if (bf_get(lpfc_acqe_fc_la_trunk_config_port0, acqe_fc)) {
+ phba->trunk_link.link0.state =
+ bf_get(lpfc_acqe_fc_la_trunk_link_status_port0, acqe_fc)
+ ? LPFC_LINK_UP : LPFC_LINK_DOWN;
+ phba->trunk_link.link0.fault = port_fault & 0x1 ? err : 0;
+ }
+ if (bf_get(lpfc_acqe_fc_la_trunk_config_port1, acqe_fc)) {
+ phba->trunk_link.link1.state =
+ bf_get(lpfc_acqe_fc_la_trunk_link_status_port1, acqe_fc)
+ ? LPFC_LINK_UP : LPFC_LINK_DOWN;
+ phba->trunk_link.link1.fault = port_fault & 0x2 ? err : 0;
+ }
+ if (bf_get(lpfc_acqe_fc_la_trunk_config_port2, acqe_fc)) {
+ phba->trunk_link.link2.state =
+ bf_get(lpfc_acqe_fc_la_trunk_link_status_port2, acqe_fc)
+ ? LPFC_LINK_UP : LPFC_LINK_DOWN;
+ phba->trunk_link.link2.fault = port_fault & 0x4 ? err : 0;
+ }
+ if (bf_get(lpfc_acqe_fc_la_trunk_config_port3, acqe_fc)) {
+ phba->trunk_link.link3.state =
+ bf_get(lpfc_acqe_fc_la_trunk_link_status_port3, acqe_fc)
+ ? LPFC_LINK_UP : LPFC_LINK_DOWN;
+ phba->trunk_link.link3.fault = port_fault & 0x8 ? err : 0;
+ }
+
+ lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+ "2910 Async FC Trunking Event - Speed:%d\n"
+ "\tLogical speed:%d "
+ "port0: %s port1: %s port2: %s port3: %s\n",
+ phba->sli4_hba.link_state.speed,
+ phba->sli4_hba.link_state.logical_speed,
+ trunk_link_status(0), trunk_link_status(1),
+ trunk_link_status(2), trunk_link_status(3));
+
+ if (port_fault)
+ lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+ "3202 trunk error:0x%x (%s) seen on port0:%s "
+ /*
+ * SLI-4: We have only 0xA error codes
+ * defined as of now. print an appropriate
+ * message in case driver needs to be updated.
+ */
+ "port1:%s port2:%s port3:%s\n", err, err > 0xA ?
+ "UNDEFINED. update driver." : trunk_errmsg[err],
+ trunk_port_fault(0), trunk_port_fault(1),
+ trunk_port_fault(2), trunk_port_fault(3));
+}
+
+
+/**
* lpfc_sli4_async_fc_evt - Process the asynchronous FC link event
* @phba: pointer to lpfc hba data structure.
* @acqe_fc: pointer to the async fc completion queue entry.
@@ -4633,6 +4781,13 @@ lpfc_sli4_async_fc_evt(struct lpfc_hba *phba, struct lpfc_acqe_fc_la *acqe_fc)
bf_get(lpfc_trailer_type, acqe_fc));
return;
}
+
+ if (bf_get(lpfc_acqe_fc_la_att_type, acqe_fc) ==
+ LPFC_FC_LA_TYPE_TRUNKING_EVENT) {
+ lpfc_update_trunk_link_status(phba, acqe_fc);
+ return;
+ }
+
/* Keep the link status for extra SLI4 state machine reference */
phba->sli4_hba.link_state.speed =
lpfc_sli4_port_speed_parse(phba, LPFC_TRAILER_CODE_FC,
@@ -4762,6 +4917,8 @@ lpfc_sli4_async_sli_evt(struct lpfc_hba *phba, struct lpfc_acqe_sli *acqe_sli)
struct temp_event temp_event_data;
struct lpfc_acqe_misconfigured_event *misconfigured;
struct Scsi_Host *shost;
+ struct lpfc_vport **vports;
+ int rc, i;
evt_type = bf_get(lpfc_trailer_type, acqe_sli);
@@ -4887,6 +5044,25 @@ lpfc_sli4_async_sli_evt(struct lpfc_hba *phba, struct lpfc_acqe_sli *acqe_sli)
sprintf(message, "Unknown event status x%02x", status);
break;
}
+
+ /* Issue READ_CONFIG mbox command to refresh supported speeds */
+ rc = lpfc_sli4_read_config(phba);
+ if (rc) {
+ phba->lmt = 0;
+ lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+ "3194 Unable to retrieve supported "
+ "speeds, rc = 0x%x\n", rc);
+ }
+ vports = lpfc_create_vport_work_array(phba);
+ if (vports != NULL) {
+ for (i = 0; i <= phba->max_vports && vports[i] != NULL;
+ i++) {
+ shost = lpfc_shost_from_vport(vports[i]);
+ lpfc_host_supported_speeds_set(shost);
+ }
+ }
+ lpfc_destroy_vport_work_array(phba, vports);
+
phba->sli4_hba.lnk_info.optic_state = status;
lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
"3176 Port Name %c %s\n", port_name, message);
@@ -5044,7 +5220,7 @@ lpfc_sli4_async_fip_evt(struct lpfc_hba *phba,
break;
}
/* If fast FCF failover rescan event is pending, do nothing */
- if (phba->fcf.fcf_flag & FCF_REDISC_EVT) {
+ if (phba->fcf.fcf_flag & (FCF_REDISC_EVT | FCF_REDISC_PEND)) {
spin_unlock_irq(&phba->hbalock);
break;
}
@@ -7181,26 +7357,19 @@ lpfc_post_init_setup(struct lpfc_hba *phba)
static int
lpfc_sli_pci_mem_setup(struct lpfc_hba *phba)
{
- struct pci_dev *pdev;
+ struct pci_dev *pdev = phba->pcidev;
unsigned long bar0map_len, bar2map_len;
int i, hbq_count;
void *ptr;
int error = -ENODEV;
- /* Obtain PCI device reference */
- if (!phba->pcidev)
+ if (!pdev)
return error;
- else
- pdev = phba->pcidev;
/* Set the device DMA mask size */
- if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) != 0
- || pci_set_consistent_dma_mask(pdev,DMA_BIT_MASK(64)) != 0) {
- if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0
- || pci_set_consistent_dma_mask(pdev,DMA_BIT_MASK(32)) != 0) {
- return error;
- }
- }
+ if (dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)) ||
+ dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)))
+ return error;
/* Get the bus address of Bar0 and Bar2 and the number of bytes
* required by each mapping.
@@ -7779,6 +7948,8 @@ lpfc_sli4_read_config(struct lpfc_hba *phba)
phba->sli4_hba.bbscn_params.word0 = rd_config->word8;
}
+ phba->sli4_hba.conf_trunk =
+ bf_get(lpfc_mbx_rd_conf_trunk, rd_config);
phba->sli4_hba.extents_in_use =
bf_get(lpfc_mbx_rd_conf_extnts_inuse, rd_config);
phba->sli4_hba.max_cfg_param.max_xri =
@@ -7787,6 +7958,9 @@ lpfc_sli4_read_config(struct lpfc_hba *phba)
bf_get(lpfc_mbx_rd_conf_xri_base, rd_config);
phba->sli4_hba.max_cfg_param.max_vpi =
bf_get(lpfc_mbx_rd_conf_vpi_count, rd_config);
+ /* Limit the max we support */
+ if (phba->sli4_hba.max_cfg_param.max_vpi > LPFC_MAX_VPORTS)
+ phba->sli4_hba.max_cfg_param.max_vpi = LPFC_MAX_VPORTS;
phba->sli4_hba.max_cfg_param.vpi_base =
bf_get(lpfc_mbx_rd_conf_vpi_base, rd_config);
phba->sli4_hba.max_cfg_param.max_rpi =
@@ -9562,25 +9736,18 @@ out:
static int
lpfc_sli4_pci_mem_setup(struct lpfc_hba *phba)
{
- struct pci_dev *pdev;
+ struct pci_dev *pdev = phba->pcidev;
unsigned long bar0map_len, bar1map_len, bar2map_len;
int error = -ENODEV;
uint32_t if_type;
- /* Obtain PCI device reference */
- if (!phba->pcidev)
+ if (!pdev)
return error;
- else
- pdev = phba->pcidev;
/* Set the device DMA mask size */
- if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) != 0
- || pci_set_consistent_dma_mask(pdev,DMA_BIT_MASK(64)) != 0) {
- if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0
- || pci_set_consistent_dma_mask(pdev,DMA_BIT_MASK(32)) != 0) {
- return error;
- }
- }
+ if (dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)) ||
+ dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)))
+ return error;
/*
* The BARs and register set definitions and offset locations are
@@ -10523,12 +10690,7 @@ lpfc_sli4_hba_unset(struct lpfc_hba *phba)
kthread_stop(phba->worker_thread);
/* Disable FW logging to host memory */
- writel(LPFC_CTL_PDEV_CTL_DDL_RAS,
- phba->sli4_hba.conf_regs_memmap_p + LPFC_CTL_PDEV_CTL_OFFSET);
-
- /* Free RAS DMA memory */
- if (phba->ras_fwlog.ras_enabled == true)
- lpfc_sli4_ras_dma_free(phba);
+ lpfc_ras_stop_fwlog(phba);
/* Unset the queues shared with the hardware then release all
* allocated resources.
@@ -10539,6 +10701,10 @@ lpfc_sli4_hba_unset(struct lpfc_hba *phba)
/* Reset SLI4 HBA FCoE function */
lpfc_pci_function_reset(phba);
+ /* Free RAS DMA memory */
+ if (phba->ras_fwlog.ras_enabled)
+ lpfc_sli4_ras_dma_free(phba);
+
/* Stop the SLI4 device port */
phba->pport->work_port_events = 0;
}
@@ -12476,7 +12642,8 @@ lpfc_sli4_ras_init(struct lpfc_hba *phba)
case PCI_DEVICE_ID_LANCER_G6_FC:
case PCI_DEVICE_ID_LANCER_G7_FC:
phba->ras_fwlog.ras_hwsupport = true;
- if (phba->cfg_ras_fwlog_func == PCI_FUNC(phba->pcidev->devfn))
+ if (phba->cfg_ras_fwlog_func == PCI_FUNC(phba->pcidev->devfn) &&
+ phba->cfg_ras_fwlog_buffsize)
phba->ras_fwlog.ras_enabled = true;
else
phba->ras_fwlog.ras_enabled = false;
diff --git a/drivers/scsi/lpfc/lpfc_mbox.c b/drivers/scsi/lpfc/lpfc_mbox.c
index deb094fdbb79..f6a5083a621e 100644
--- a/drivers/scsi/lpfc/lpfc_mbox.c
+++ b/drivers/scsi/lpfc/lpfc_mbox.c
@@ -94,7 +94,7 @@ lpfc_dump_static_vport(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb,
memset(mp->virt, 0, LPFC_BPL_SIZE);
INIT_LIST_HEAD(&mp->list);
/* save address for completion */
- pmb->context1 = (uint8_t *)mp;
+ pmb->ctx_buf = (uint8_t *)mp;
mb->un.varWords[3] = putPaddrLow(mp->phys);
mb->un.varWords[4] = putPaddrHigh(mp->phys);
mb->un.varDmp.sli4_length = sizeof(struct static_vport_info);
@@ -139,7 +139,7 @@ lpfc_dump_mem(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb, uint16_t offset,
void *ctx;
mb = &pmb->u.mb;
- ctx = pmb->context2;
+ ctx = pmb->ctx_buf;
/* Setup to dump VPD region */
memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
@@ -151,7 +151,7 @@ lpfc_dump_mem(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb, uint16_t offset,
mb->un.varDmp.word_cnt = (DMP_RSP_SIZE / sizeof (uint32_t));
mb->un.varDmp.co = 0;
mb->un.varDmp.resp_offset = 0;
- pmb->context2 = ctx;
+ pmb->ctx_buf = ctx;
mb->mbxOwner = OWN_HOST;
return;
}
@@ -172,7 +172,7 @@ lpfc_dump_wakeup_param(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
mb = &pmb->u.mb;
/* Save context so that we can restore after memset */
- ctx = pmb->context2;
+ ctx = pmb->ctx_buf;
/* Setup to dump VPD region */
memset(pmb, 0, sizeof(LPFC_MBOXQ_t));
@@ -186,7 +186,7 @@ lpfc_dump_wakeup_param(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
mb->un.varDmp.word_cnt = WAKE_UP_PARMS_WORD_SIZE;
mb->un.varDmp.co = 0;
mb->un.varDmp.resp_offset = 0;
- pmb->context2 = ctx;
+ pmb->ctx_buf = ctx;
return;
}
@@ -304,7 +304,7 @@ lpfc_read_topology(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb,
/* Save address for later completion and set the owner to host so that
* the FW knows this mailbox is available for processing.
*/
- pmb->context1 = (uint8_t *)mp;
+ pmb->ctx_buf = (uint8_t *)mp;
mb->mbxOwner = OWN_HOST;
return (0);
}
@@ -513,9 +513,9 @@ lpfc_init_link(struct lpfc_hba * phba,
break;
}
- if (phba->pcidev->device == PCI_DEVICE_ID_LANCER_G6_FC &&
- mb->un.varInitLnk.link_flags & FLAGS_TOPOLOGY_MODE_LOOP) {
- /* Failover is not tried for Lancer G6 */
+ if ((phba->pcidev->device == PCI_DEVICE_ID_LANCER_G6_FC ||
+ phba->pcidev->device == PCI_DEVICE_ID_LANCER_G7_FC) &&
+ mb->un.varInitLnk.link_flags & FLAGS_TOPOLOGY_MODE_LOOP) {
mb->un.varInitLnk.link_flags = FLAGS_TOPOLOGY_MODE_PT_PT;
phba->cfg_topology = FLAGS_TOPOLOGY_MODE_PT_PT;
}
@@ -631,7 +631,7 @@ lpfc_read_sparam(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb, int vpi)
mb->un.varRdSparm.vpi = phba->vpi_ids[vpi];
/* save address for completion */
- pmb->context1 = mp;
+ pmb->ctx_buf = mp;
return (0);
}
@@ -783,7 +783,7 @@ lpfc_reg_rpi(struct lpfc_hba *phba, uint16_t vpi, uint32_t did,
memcpy(sparam, param, sizeof (struct serv_parm));
/* save address for completion */
- pmb->context1 = (uint8_t *) mp;
+ pmb->ctx_buf = (uint8_t *)mp;
mb->mbxCommand = MBX_REG_LOGIN64;
mb->un.varRegLogin.un.sp64.tus.f.bdeSize = sizeof (struct serv_parm);
@@ -858,7 +858,7 @@ lpfc_sli4_unreg_all_rpis(struct lpfc_vport *vport)
mbox->u.mb.un.varUnregLogin.rsvd1 = 0x4000;
mbox->vport = vport;
mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
- mbox->context1 = NULL;
+ mbox->ctx_ndlp = NULL;
rc = lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT);
if (rc == MBX_NOT_FINISHED)
mempool_free(mbox, phba->mbox_mem_pool);
@@ -2288,7 +2288,7 @@ lpfc_sli4_dump_cfg_rg23(struct lpfc_hba *phba, struct lpfcMboxq *mbox)
INIT_LIST_HEAD(&mp->list);
/* save address for completion */
- mbox->context1 = (uint8_t *) mp;
+ mbox->ctx_buf = (uint8_t *)mp;
mb->mbxCommand = MBX_DUMP_MEMORY;
mb->un.varDmp.type = DMP_NV_PARAMS;
@@ -2305,7 +2305,7 @@ lpfc_mbx_cmpl_rdp_link_stat(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq)
MAILBOX_t *mb;
int rc = FAILURE;
struct lpfc_rdp_context *rdp_context =
- (struct lpfc_rdp_context *)(mboxq->context2);
+ (struct lpfc_rdp_context *)(mboxq->ctx_ndlp);
mb = &mboxq->u.mb;
if (mb->mbxStatus)
@@ -2323,9 +2323,9 @@ mbx_failed:
static void
lpfc_mbx_cmpl_rdp_page_a2(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox)
{
- struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) mbox->context1;
+ struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *)mbox->ctx_buf;
struct lpfc_rdp_context *rdp_context =
- (struct lpfc_rdp_context *)(mbox->context2);
+ (struct lpfc_rdp_context *)(mbox->ctx_ndlp);
if (bf_get(lpfc_mqe_status, &mbox->u.mqe))
goto error_mbuf_free;
@@ -2341,7 +2341,7 @@ lpfc_mbx_cmpl_rdp_page_a2(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox)
lpfc_read_lnk_stat(phba, mbox);
mbox->vport = rdp_context->ndlp->vport;
mbox->mbox_cmpl = lpfc_mbx_cmpl_rdp_link_stat;
- mbox->context2 = (struct lpfc_rdp_context *) rdp_context;
+ mbox->ctx_ndlp = (struct lpfc_rdp_context *)rdp_context;
if (lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT) == MBX_NOT_FINISHED)
goto error_cmd_free;
@@ -2359,9 +2359,9 @@ void
lpfc_mbx_cmpl_rdp_page_a0(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox)
{
int rc;
- struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) (mbox->context1);
+ struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *)(mbox->ctx_buf);
struct lpfc_rdp_context *rdp_context =
- (struct lpfc_rdp_context *)(mbox->context2);
+ (struct lpfc_rdp_context *)(mbox->ctx_ndlp);
if (bf_get(lpfc_mqe_status, &mbox->u.mqe))
goto error;
@@ -2375,7 +2375,7 @@ lpfc_mbx_cmpl_rdp_page_a0(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox)
INIT_LIST_HEAD(&mp->list);
/* save address for completion */
- mbox->context1 = mp;
+ mbox->ctx_buf = mp;
mbox->vport = rdp_context->ndlp->vport;
bf_set(lpfc_mqe_command, &mbox->u.mqe, MBX_DUMP_MEMORY);
@@ -2391,7 +2391,7 @@ lpfc_mbx_cmpl_rdp_page_a0(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox)
mbox->u.mqe.un.mem_dump_type3.addr_hi = putPaddrHigh(mp->phys);
mbox->mbox_cmpl = lpfc_mbx_cmpl_rdp_page_a2;
- mbox->context2 = (struct lpfc_rdp_context *) rdp_context;
+ mbox->ctx_ndlp = (struct lpfc_rdp_context *)rdp_context;
rc = lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT);
if (rc == MBX_NOT_FINISHED)
goto error;
@@ -2436,7 +2436,7 @@ lpfc_sli4_dump_page_a0(struct lpfc_hba *phba, struct lpfcMboxq *mbox)
bf_set(lpfc_mqe_command, &mbox->u.mqe, MBX_DUMP_MEMORY);
/* save address for completion */
- mbox->context1 = mp;
+ mbox->ctx_buf = mp;
bf_set(lpfc_mbx_memory_dump_type3_type,
&mbox->u.mqe.un.mem_dump_type3, DMP_LMSD);
diff --git a/drivers/scsi/lpfc/lpfc_mem.c b/drivers/scsi/lpfc/lpfc_mem.c
index 9c22a2c93462..66191fa35f63 100644
--- a/drivers/scsi/lpfc/lpfc_mem.c
+++ b/drivers/scsi/lpfc/lpfc_mem.c
@@ -330,7 +330,7 @@ lpfc_mem_free_all(struct lpfc_hba *phba)
/* Free memory used in mailbox queue back to mailbox memory pool */
list_for_each_entry_safe(mbox, next_mbox, &psli->mboxq, list) {
- mp = (struct lpfc_dmabuf *) (mbox->context1);
+ mp = (struct lpfc_dmabuf *)(mbox->ctx_buf);
if (mp) {
lpfc_mbuf_free(phba, mp->virt, mp->phys);
kfree(mp);
@@ -340,7 +340,7 @@ lpfc_mem_free_all(struct lpfc_hba *phba)
}
/* Free memory used in mailbox cmpl list back to mailbox memory pool */
list_for_each_entry_safe(mbox, next_mbox, &psli->mboxq_cmpl, list) {
- mp = (struct lpfc_dmabuf *) (mbox->context1);
+ mp = (struct lpfc_dmabuf *)(mbox->ctx_buf);
if (mp) {
lpfc_mbuf_free(phba, mp->virt, mp->phys);
kfree(mp);
@@ -354,7 +354,7 @@ lpfc_mem_free_all(struct lpfc_hba *phba)
spin_unlock_irq(&phba->hbalock);
if (psli->mbox_active) {
mbox = psli->mbox_active;
- mp = (struct lpfc_dmabuf *) (mbox->context1);
+ mp = (struct lpfc_dmabuf *)(mbox->ctx_buf);
if (mp) {
lpfc_mbuf_free(phba, mp->virt, mp->phys);
kfree(mp);
diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c
index 269808e8480f..96bc3789a166 100644
--- a/drivers/scsi/lpfc/lpfc_nportdisc.c
+++ b/drivers/scsi/lpfc/lpfc_nportdisc.c
@@ -467,7 +467,7 @@ lpfc_rcv_plogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
*/
mbox->mbox_cmpl = lpfc_mbx_cmpl_reg_login;
/*
- * mbox->context2 = lpfc_nlp_get(ndlp) deferred until mailbox
+ * mbox->ctx_ndlp = lpfc_nlp_get(ndlp) deferred until mailbox
* command issued in lpfc_cmpl_els_acc().
*/
mbox->vport = vport;
@@ -535,8 +535,8 @@ lpfc_mbx_cmpl_resume_rpi(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq)
struct lpfc_nodelist *ndlp;
uint32_t cmd;
- elsiocb = (struct lpfc_iocbq *)mboxq->context1;
- ndlp = (struct lpfc_nodelist *) mboxq->context2;
+ elsiocb = (struct lpfc_iocbq *)mboxq->ctx_buf;
+ ndlp = (struct lpfc_nodelist *)mboxq->ctx_ndlp;
vport = mboxq->vport;
cmd = elsiocb->drvrTimeout;
@@ -836,7 +836,9 @@ lpfc_disc_set_adisc(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
if (!(ndlp->nlp_flag & NLP_RPI_REGISTERED)) {
+ spin_lock_irq(shost->host_lock);
ndlp->nlp_flag &= ~NLP_NPR_ADISC;
+ spin_unlock_irq(shost->host_lock);
return 0;
}
@@ -851,7 +853,10 @@ lpfc_disc_set_adisc(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
return 1;
}
}
+
+ spin_lock_irq(shost->host_lock);
ndlp->nlp_flag &= ~NLP_NPR_ADISC;
+ spin_unlock_irq(shost->host_lock);
lpfc_unreg_rpi(vport, ndlp);
return 0;
}
@@ -866,13 +871,26 @@ lpfc_disc_set_adisc(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
* to release a rpi.
**/
void
-lpfc_release_rpi(struct lpfc_hba *phba,
- struct lpfc_vport *vport,
- uint16_t rpi)
+lpfc_release_rpi(struct lpfc_hba *phba, struct lpfc_vport *vport,
+ struct lpfc_nodelist *ndlp, uint16_t rpi)
{
LPFC_MBOXQ_t *pmb;
int rc;
+ /* If there is already an UNREG in progress for this ndlp,
+ * no need to queue up another one.
+ */
+ if (ndlp->nlp_flag & NLP_UNREG_INP) {
+ lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
+ "1435 release_rpi SKIP UNREG x%x on "
+ "NPort x%x deferred x%x flg x%x "
+ "Data: %p\n",
+ ndlp->nlp_rpi, ndlp->nlp_DID,
+ ndlp->nlp_defer_did,
+ ndlp->nlp_flag, ndlp);
+ return;
+ }
+
pmb = (LPFC_MBOXQ_t *) mempool_alloc(phba->mbox_mem_pool,
GFP_KERNEL);
if (!pmb)
@@ -881,6 +899,18 @@ lpfc_release_rpi(struct lpfc_hba *phba,
else {
lpfc_unreg_login(phba, vport->vpi, rpi, pmb);
pmb->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
+ pmb->vport = vport;
+ pmb->ctx_ndlp = ndlp;
+
+ if (((ndlp->nlp_DID & Fabric_DID_MASK) != Fabric_DID_MASK) &&
+ (!(vport->fc_flag & FC_OFFLINE_MODE)))
+ ndlp->nlp_flag |= NLP_UNREG_INP;
+
+ lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
+ "1437 release_rpi UNREG x%x "
+ "on NPort x%x flg x%x\n",
+ ndlp->nlp_rpi, ndlp->nlp_DID, ndlp->nlp_flag);
+
rc = lpfc_sli_issue_mbox(phba, pmb, MBX_NOWAIT);
if (rc == MBX_NOT_FINISHED)
mempool_free(pmb, phba->mbox_mem_pool);
@@ -901,7 +931,7 @@ lpfc_disc_illegal(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
(evt == NLP_EVT_CMPL_REG_LOGIN) &&
(!pmb->u.mb.mbxStatus)) {
rpi = pmb->u.mb.un.varWords[0];
- lpfc_release_rpi(phba, vport, rpi);
+ lpfc_release_rpi(phba, vport, ndlp, rpi);
}
lpfc_printf_vlog(vport, KERN_ERR, LOG_DISCOVERY,
"0271 Illegal State Transition: node x%x "
@@ -1253,7 +1283,7 @@ lpfc_cmpl_plogi_plogi_issue(struct lpfc_vport *vport,
ndlp->nlp_flag |= NLP_REG_LOGIN_SEND;
mbox->mbox_cmpl = lpfc_mbx_cmpl_reg_login;
}
- mbox->context2 = lpfc_nlp_get(ndlp);
+ mbox->ctx_ndlp = lpfc_nlp_get(ndlp);
mbox->vport = vport;
if (lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT)
!= MBX_NOT_FINISHED) {
@@ -1267,7 +1297,7 @@ lpfc_cmpl_plogi_plogi_issue(struct lpfc_vport *vport,
* command
*/
lpfc_nlp_put(ndlp);
- mp = (struct lpfc_dmabuf *) mbox->context1;
+ mp = (struct lpfc_dmabuf *)mbox->ctx_buf;
lpfc_mbuf_free(phba, mp->virt, mp->phys);
kfree(mp);
mempool_free(mbox, phba->mbox_mem_pool);
@@ -1329,7 +1359,7 @@ lpfc_cmpl_reglogin_plogi_issue(struct lpfc_vport *vport,
if (!(phba->pport->load_flag & FC_UNLOADING) &&
!mb->mbxStatus) {
rpi = pmb->u.mb.un.varWords[0];
- lpfc_release_rpi(phba, vport, rpi);
+ lpfc_release_rpi(phba, vport, ndlp, rpi);
}
return ndlp->nlp_state;
}
@@ -1636,10 +1666,10 @@ lpfc_rcv_logo_reglogin_issue(struct lpfc_vport *vport,
/* cleanup any ndlp on mbox q waiting for reglogin cmpl */
if ((mb = phba->sli.mbox_active)) {
if ((mb->u.mb.mbxCommand == MBX_REG_LOGIN64) &&
- (ndlp == (struct lpfc_nodelist *) mb->context2)) {
+ (ndlp == (struct lpfc_nodelist *)mb->ctx_ndlp)) {
ndlp->nlp_flag &= ~NLP_REG_LOGIN_SEND;
lpfc_nlp_put(ndlp);
- mb->context2 = NULL;
+ mb->ctx_ndlp = NULL;
mb->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
}
}
@@ -1647,8 +1677,8 @@ lpfc_rcv_logo_reglogin_issue(struct lpfc_vport *vport,
spin_lock_irq(&phba->hbalock);
list_for_each_entry_safe(mb, nextmb, &phba->sli.mboxq, list) {
if ((mb->u.mb.mbxCommand == MBX_REG_LOGIN64) &&
- (ndlp == (struct lpfc_nodelist *) mb->context2)) {
- mp = (struct lpfc_dmabuf *) (mb->context1);
+ (ndlp == (struct lpfc_nodelist *)mb->ctx_ndlp)) {
+ mp = (struct lpfc_dmabuf *)(mb->ctx_buf);
if (mp) {
__lpfc_mbuf_free(phba, mp->virt, mp->phys);
kfree(mp);
@@ -1770,9 +1800,16 @@ lpfc_cmpl_reglogin_reglogin_issue(struct lpfc_vport *vport,
ndlp->nlp_fc4_type |= NLP_FC4_FCP;
} else if (ndlp->nlp_fc4_type == 0) {
- rc = lpfc_ns_cmd(vport, SLI_CTNS_GFT_ID,
- 0, ndlp->nlp_DID);
- return ndlp->nlp_state;
+ /* If we are only configured for FCP, the driver
+ * should just issue PRLI for FCP. Otherwise issue
+ * GFT_ID to determine if remote port supports NVME.
+ */
+ if (phba->cfg_enable_fc4_type != LPFC_ENABLE_FCP) {
+ rc = lpfc_ns_cmd(vport, SLI_CTNS_GFT_ID,
+ 0, ndlp->nlp_DID);
+ return ndlp->nlp_state;
+ }
+ ndlp->nlp_fc4_type = NLP_FC4_FCP;
}
ndlp->nlp_prev_state = NLP_STE_REG_LOGIN_ISSUE;
@@ -2863,8 +2900,9 @@ lpfc_disc_state_machine(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
/* DSM in event <evt> on NPort <nlp_DID> in state <cur_state> */
lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
"0211 DSM in event x%x on NPort x%x in "
- "state %d Data: x%x\n",
- evt, ndlp->nlp_DID, cur_state, ndlp->nlp_flag);
+ "state %d rpi x%x Data: x%x x%x\n",
+ evt, ndlp->nlp_DID, cur_state, ndlp->nlp_rpi,
+ ndlp->nlp_flag, ndlp->nlp_fc4_type);
lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_DSM,
"DSM in: evt:%d ste:%d did:x%x",
@@ -2876,8 +2914,9 @@ lpfc_disc_state_machine(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
/* DSM out state <rc> on NPort <nlp_DID> */
if (got_ndlp) {
lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
- "0212 DSM out state %d on NPort x%x Data: x%x\n",
- rc, ndlp->nlp_DID, ndlp->nlp_flag);
+ "0212 DSM out state %d on NPort x%x "
+ "rpi x%x Data: x%x\n",
+ rc, ndlp->nlp_DID, ndlp->nlp_rpi, ndlp->nlp_flag);
lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_DSM,
"DSM out: ste:%d did:x%x flg:x%x",
diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c
index ba831def9301..4c66b19e6199 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.c
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -1855,7 +1855,6 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport,
bf_set(abort_cmd_criteria, &abts_wqe->abort_cmd, T_XRI_TAG);
/* word 7 */
- bf_set(wqe_ct, &abts_wqe->abort_cmd.wqe_com, 0);
bf_set(wqe_cmnd, &abts_wqe->abort_cmd.wqe_com, CMD_ABORT_XRI_CX);
bf_set(wqe_class, &abts_wqe->abort_cmd.wqe_com,
nvmereq_wqe->iocb.ulpClass);
@@ -1870,7 +1869,6 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport,
abts_buf->iotag);
/* word 10 */
- bf_set(wqe_wqid, &abts_wqe->abort_cmd.wqe_com, nvmereq_wqe->hba_wqidx);
bf_set(wqe_qosd, &abts_wqe->abort_cmd.wqe_com, 1);
bf_set(wqe_lenloc, &abts_wqe->abort_cmd.wqe_com, LPFC_WQE_LENLOC_NONE);
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index 4fa6703a9ec9..b4f1a840b3b4 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -2734,6 +2734,7 @@ lpfc_bg_scsi_prep_dma_buf_s3(struct lpfc_hba *phba,
int datasegcnt, protsegcnt, datadir = scsi_cmnd->sc_data_direction;
int prot_group_type = 0;
int fcpdl;
+ struct lpfc_vport *vport = phba->pport;
/*
* Start the lpfc command prep by bumping the bpl beyond fcp_cmnd
@@ -2839,6 +2840,14 @@ lpfc_bg_scsi_prep_dma_buf_s3(struct lpfc_hba *phba,
*/
iocb_cmd->un.fcpi.fcpi_parm = fcpdl;
+ /*
+ * For First burst, we may need to adjust the initial transfer
+ * length for DIF
+ */
+ if (iocb_cmd->un.fcpi.fcpi_XRdy &&
+ (fcpdl < vport->cfg_first_burst_size))
+ iocb_cmd->un.fcpi.fcpi_XRdy = fcpdl;
+
return 0;
err:
if (lpfc_cmd->seg_cnt)
@@ -3403,6 +3412,7 @@ lpfc_bg_scsi_prep_dma_buf_s4(struct lpfc_hba *phba,
int datasegcnt, protsegcnt, datadir = scsi_cmnd->sc_data_direction;
int prot_group_type = 0;
int fcpdl;
+ struct lpfc_vport *vport = phba->pport;
/*
* Start the lpfc command prep by bumping the sgl beyond fcp_cmnd
@@ -3519,6 +3529,14 @@ lpfc_bg_scsi_prep_dma_buf_s4(struct lpfc_hba *phba,
iocb_cmd->un.fcpi.fcpi_parm = fcpdl;
/*
+ * For First burst, we may need to adjust the initial transfer
+ * length for DIF
+ */
+ if (iocb_cmd->un.fcpi.fcpi_XRdy &&
+ (fcpdl < vport->cfg_first_burst_size))
+ iocb_cmd->un.fcpi.fcpi_XRdy = fcpdl;
+
+ /*
* If the OAS driver feature is enabled and the lun is enabled for
* OAS, set the oas iocb related flags.
*/
@@ -3914,7 +3932,7 @@ int lpfc_sli4_scmd_to_wqidx_distr(struct lpfc_hba *phba,
uint32_t tag;
uint16_t hwq;
- if (cmnd && shost_use_blk_mq(cmnd->device->host)) {
+ if (cmnd) {
tag = blk_mq_unique_tag(cmnd->request);
hwq = blk_mq_unique_tag_to_hwq(tag);
@@ -4163,7 +4181,7 @@ lpfc_scsi_cmd_iocb_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pIocbIn,
/* If pCmd was set to NULL from abort path, do not call scsi_done */
if (xchg(&lpfc_cmd->pCmd, NULL) == NULL) {
lpfc_printf_vlog(vport, KERN_INFO, LOG_FCP,
- "0711 FCP cmd already NULL, sid: 0x%06x, "
+ "5688 FCP cmd already NULL, sid: 0x%06x, "
"did: 0x%06x, oxid: 0x%04x\n",
vport->fc_myDID,
(pnode) ? pnode->nlp_DID : 0,
@@ -4442,6 +4460,66 @@ lpfc_tskmgmt_def_cmpl(struct lpfc_hba *phba,
}
/**
+ * lpfc_check_pci_resettable - Walks list of devices on pci_dev's bus to check
+ * if issuing a pci_bus_reset is possibly unsafe
+ * @phba: lpfc_hba pointer.
+ *
+ * Description:
+ * Walks the bus_list to ensure only PCI devices with Emulex
+ * vendor id, device ids that support hot reset, and only one occurrence
+ * of function 0.
+ *
+ * Returns:
+ * -EBADSLT, detected invalid device
+ * 0, successful
+ */
+int
+lpfc_check_pci_resettable(const struct lpfc_hba *phba)
+{
+ const struct pci_dev *pdev = phba->pcidev;
+ struct pci_dev *ptr = NULL;
+ u8 counter = 0;
+
+ /* Walk the list of devices on the pci_dev's bus */
+ list_for_each_entry(ptr, &pdev->bus->devices, bus_list) {
+ /* Check for Emulex Vendor ID */
+ if (ptr->vendor != PCI_VENDOR_ID_EMULEX) {
+ lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+ "8346 Non-Emulex vendor found: "
+ "0x%04x\n", ptr->vendor);
+ return -EBADSLT;
+ }
+
+ /* Check for valid Emulex Device ID */
+ switch (ptr->device) {
+ case PCI_DEVICE_ID_LANCER_FC:
+ case PCI_DEVICE_ID_LANCER_G6_FC:
+ case PCI_DEVICE_ID_LANCER_G7_FC:
+ break;
+ default:
+ lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+ "8347 Invalid device found: "
+ "0x%04x\n", ptr->device);
+ return -EBADSLT;
+ }
+
+ /* Check for only one function 0 ID to ensure only one HBA on
+ * secondary bus
+ */
+ if (ptr->devfn == 0) {
+ if (++counter > 1) {
+ lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+ "8348 More than one device on "
+ "secondary bus found\n");
+ return -EBADSLT;
+ }
+ }
+ }
+
+ return 0;
+}
+
+/**
* lpfc_info - Info entry point of scsi_host_template data structure
* @host: The scsi host for which this call is being executed.
*
@@ -4455,32 +4533,53 @@ lpfc_info(struct Scsi_Host *host)
{
struct lpfc_vport *vport = (struct lpfc_vport *) host->hostdata;
struct lpfc_hba *phba = vport->phba;
- int len, link_speed = 0;
- static char lpfcinfobuf[384];
+ int link_speed = 0;
+ static char lpfcinfobuf[384];
+ char tmp[384] = {0};
- memset(lpfcinfobuf,0,384);
+ memset(lpfcinfobuf, 0, sizeof(lpfcinfobuf));
if (phba && phba->pcidev){
- strncpy(lpfcinfobuf, phba->ModelDesc, 256);
- len = strlen(lpfcinfobuf);
- snprintf(lpfcinfobuf + len,
- 384-len,
- " on PCI bus %02x device %02x irq %d",
- phba->pcidev->bus->number,
- phba->pcidev->devfn,
- phba->pcidev->irq);
- len = strlen(lpfcinfobuf);
+ /* Model Description */
+ scnprintf(tmp, sizeof(tmp), phba->ModelDesc);
+ if (strlcat(lpfcinfobuf, tmp, sizeof(lpfcinfobuf)) >=
+ sizeof(lpfcinfobuf))
+ goto buffer_done;
+
+ /* PCI Info */
+ scnprintf(tmp, sizeof(tmp),
+ " on PCI bus %02x device %02x irq %d",
+ phba->pcidev->bus->number, phba->pcidev->devfn,
+ phba->pcidev->irq);
+ if (strlcat(lpfcinfobuf, tmp, sizeof(lpfcinfobuf)) >=
+ sizeof(lpfcinfobuf))
+ goto buffer_done;
+
+ /* Port Number */
if (phba->Port[0]) {
- snprintf(lpfcinfobuf + len,
- 384-len,
- " port %s",
- phba->Port);
+ scnprintf(tmp, sizeof(tmp), " port %s", phba->Port);
+ if (strlcat(lpfcinfobuf, tmp, sizeof(lpfcinfobuf)) >=
+ sizeof(lpfcinfobuf))
+ goto buffer_done;
}
- len = strlen(lpfcinfobuf);
+
+ /* Link Speed */
link_speed = lpfc_sli_port_speed_get(phba);
- if (link_speed != 0)
- snprintf(lpfcinfobuf + len, 384-len,
- " Logical Link Speed: %d Mbps", link_speed);
+ if (link_speed != 0) {
+ scnprintf(tmp, sizeof(tmp),
+ " Logical Link Speed: %d Mbps", link_speed);
+ if (strlcat(lpfcinfobuf, tmp, sizeof(lpfcinfobuf)) >=
+ sizeof(lpfcinfobuf))
+ goto buffer_done;
+ }
+
+ /* PCI resettable */
+ if (!lpfc_check_pci_resettable(phba)) {
+ scnprintf(tmp, sizeof(tmp), " PCI resettable");
+ strlcat(lpfcinfobuf, tmp, sizeof(lpfcinfobuf));
+ }
}
+
+buffer_done:
return lpfcinfobuf;
}
@@ -6036,7 +6135,6 @@ struct scsi_host_template lpfc_template_nvme = {
.this_id = -1,
.sg_tablesize = 1,
.cmd_per_lun = 1,
- .use_clustering = ENABLE_CLUSTERING,
.shost_attrs = lpfc_hba_attrs,
.max_sectors = 0xFFFF,
.vendor_id = LPFC_NL_VENDOR_ID,
@@ -6061,7 +6159,6 @@ struct scsi_host_template lpfc_template_no_hr = {
.this_id = -1,
.sg_tablesize = LPFC_DEFAULT_SG_SEG_CNT,
.cmd_per_lun = LPFC_CMD_PER_LUN,
- .use_clustering = ENABLE_CLUSTERING,
.shost_attrs = lpfc_hba_attrs,
.max_sectors = 0xFFFF,
.vendor_id = LPFC_NL_VENDOR_ID,
@@ -6088,7 +6185,6 @@ struct scsi_host_template lpfc_template = {
.this_id = -1,
.sg_tablesize = LPFC_DEFAULT_SG_SEG_CNT,
.cmd_per_lun = LPFC_CMD_PER_LUN,
- .use_clustering = ENABLE_CLUSTERING,
.shost_attrs = lpfc_hba_attrs,
.max_sectors = 0xFFFF,
.vendor_id = LPFC_NL_VENDOR_ID,
@@ -6113,7 +6209,6 @@ struct scsi_host_template lpfc_vport_template = {
.this_id = -1,
.sg_tablesize = LPFC_DEFAULT_SG_SEG_CNT,
.cmd_per_lun = LPFC_CMD_PER_LUN,
- .use_clustering = ENABLE_CLUSTERING,
.shost_attrs = lpfc_vport_attrs,
.max_sectors = 0xFFFF,
.change_queue_depth = scsi_change_queue_depth,
diff --git a/drivers/scsi/lpfc/lpfc_scsi.h b/drivers/scsi/lpfc/lpfc_scsi.h
index cc99859774ff..b759b089432c 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.h
+++ b/drivers/scsi/lpfc/lpfc_scsi.h
@@ -194,6 +194,10 @@ struct lpfc_scsi_buf {
#define NO_MORE_OAS_LUN -1
#define NOT_OAS_ENABLED_LUN NO_MORE_OAS_LUN
+#ifndef FC_PORTSPEED_128GBIT
+#define FC_PORTSPEED_128GBIT 0x2000
+#endif
+
#define TXRDY_PAYLOAD_LEN 12
int lpfc_sli4_scmd_to_wqidx_distr(struct lpfc_hba *phba,
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index b9e5cd79931a..30734caf77e1 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -2456,7 +2456,7 @@ lpfc_sli_def_mbox_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
uint16_t rpi, vpi;
int rc;
- mp = (struct lpfc_dmabuf *) (pmb->context1);
+ mp = (struct lpfc_dmabuf *)(pmb->ctx_buf);
if (mp) {
lpfc_mbuf_free(phba, mp->virt, mp->phys);
@@ -2491,9 +2491,35 @@ lpfc_sli_def_mbox_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
}
if (pmb->u.mb.mbxCommand == MBX_REG_LOGIN64) {
- ndlp = (struct lpfc_nodelist *)pmb->context2;
+ ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp;
lpfc_nlp_put(ndlp);
- pmb->context2 = NULL;
+ pmb->ctx_buf = NULL;
+ pmb->ctx_ndlp = NULL;
+ }
+
+ if (pmb->u.mb.mbxCommand == MBX_UNREG_LOGIN) {
+ ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp;
+
+ /* Check to see if there are any deferred events to process */
+ if (ndlp) {
+ lpfc_printf_vlog(
+ vport,
+ KERN_INFO, LOG_MBOX | LOG_DISCOVERY,
+ "1438 UNREG cmpl deferred mbox x%x "
+ "on NPort x%x Data: x%x x%x %p\n",
+ ndlp->nlp_rpi, ndlp->nlp_DID,
+ ndlp->nlp_flag, ndlp->nlp_defer_did, ndlp);
+
+ if ((ndlp->nlp_flag & NLP_UNREG_INP) &&
+ (ndlp->nlp_defer_did != NLP_EVT_NOTHING_PENDING)) {
+ ndlp->nlp_flag &= ~NLP_UNREG_INP;
+ ndlp->nlp_defer_did = NLP_EVT_NOTHING_PENDING;
+ lpfc_issue_els_plogi(vport, ndlp->nlp_DID, 0);
+ } else {
+ ndlp->nlp_flag &= ~NLP_UNREG_INP;
+ }
+ }
+ pmb->ctx_ndlp = NULL;
}
/* Check security permission status on INIT_LINK mailbox command */
@@ -2527,21 +2553,46 @@ lpfc_sli4_unreg_rpi_cmpl_clr(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
struct lpfc_vport *vport = pmb->vport;
struct lpfc_nodelist *ndlp;
- ndlp = pmb->context1;
+ ndlp = pmb->ctx_ndlp;
if (pmb->u.mb.mbxCommand == MBX_UNREG_LOGIN) {
if (phba->sli_rev == LPFC_SLI_REV4 &&
(bf_get(lpfc_sli_intf_if_type,
&phba->sli4_hba.sli_intf) >=
LPFC_SLI_INTF_IF_TYPE_2)) {
if (ndlp) {
- lpfc_printf_vlog(vport, KERN_INFO, LOG_SLI,
- "0010 UNREG_LOGIN vpi:%x "
- "rpi:%x DID:%x map:%x %p\n",
- vport->vpi, ndlp->nlp_rpi,
- ndlp->nlp_DID,
- ndlp->nlp_usg_map, ndlp);
+ lpfc_printf_vlog(
+ vport, KERN_INFO, LOG_MBOX | LOG_SLI,
+ "0010 UNREG_LOGIN vpi:%x "
+ "rpi:%x DID:%x defer x%x flg x%x "
+ "map:%x %p\n",
+ vport->vpi, ndlp->nlp_rpi,
+ ndlp->nlp_DID, ndlp->nlp_defer_did,
+ ndlp->nlp_flag,
+ ndlp->nlp_usg_map, ndlp);
ndlp->nlp_flag &= ~NLP_LOGO_ACC;
lpfc_nlp_put(ndlp);
+
+ /* Check to see if there are any deferred
+ * events to process
+ */
+ if ((ndlp->nlp_flag & NLP_UNREG_INP) &&
+ (ndlp->nlp_defer_did !=
+ NLP_EVT_NOTHING_PENDING)) {
+ lpfc_printf_vlog(
+ vport, KERN_INFO, LOG_DISCOVERY,
+ "4111 UNREG cmpl deferred "
+ "clr x%x on "
+ "NPort x%x Data: x%x %p\n",
+ ndlp->nlp_rpi, ndlp->nlp_DID,
+ ndlp->nlp_defer_did, ndlp);
+ ndlp->nlp_flag &= ~NLP_UNREG_INP;
+ ndlp->nlp_defer_did =
+ NLP_EVT_NOTHING_PENDING;
+ lpfc_issue_els_plogi(
+ vport, ndlp->nlp_DID, 0);
+ } else {
+ ndlp->nlp_flag &= ~NLP_UNREG_INP;
+ }
}
}
}
@@ -4640,6 +4691,8 @@ lpfc_sli_brdrestart_s4(struct lpfc_hba *phba)
hba_aer_enabled = phba->hba_flag & HBA_AER_ENABLED;
rc = lpfc_sli4_brdreset(phba);
+ if (rc)
+ return rc;
spin_lock_irq(&phba->hbalock);
phba->pport->stopped = 0;
@@ -5228,7 +5281,7 @@ lpfc_sli4_read_fcoe_params(struct lpfc_hba *phba)
goto out_free_mboxq;
}
- mp = (struct lpfc_dmabuf *) mboxq->context1;
+ mp = (struct lpfc_dmabuf *)mboxq->ctx_buf;
rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL);
lpfc_printf_log(phba, KERN_INFO, LOG_MBOX | LOG_SLI,
@@ -6148,6 +6201,25 @@ lpfc_set_features(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox,
}
/**
+ * lpfc_ras_stop_fwlog: Disable FW logging by the adapter
+ * @phba: Pointer to HBA context object.
+ *
+ * Disable FW logging into host memory on the adapter. To
+ * be done before reading logs from the host memory.
+ **/
+void
+lpfc_ras_stop_fwlog(struct lpfc_hba *phba)
+{
+ struct lpfc_ras_fwlog *ras_fwlog = &phba->ras_fwlog;
+
+ ras_fwlog->ras_active = false;
+
+ /* Disable FW logging to host memory */
+ writel(LPFC_CTL_PDEV_CTL_DDL_RAS,
+ phba->sli4_hba.conf_regs_memmap_p + LPFC_CTL_PDEV_CTL_OFFSET);
+}
+
+/**
* lpfc_sli4_ras_dma_free - Free memory allocated for FW logging.
* @phba: Pointer to HBA context object.
*
@@ -6211,7 +6283,7 @@ lpfc_sli4_ras_dma_alloc(struct lpfc_hba *phba,
&ras_fwlog->lwpd.phys,
GFP_KERNEL);
if (!ras_fwlog->lwpd.virt) {
- lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
+ lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
"6185 LWPD Memory Alloc Failed\n");
return -ENOMEM;
@@ -6228,7 +6300,7 @@ lpfc_sli4_ras_dma_alloc(struct lpfc_hba *phba,
goto free_mem;
}
- dmabuf->virt = dma_alloc_coherent(&phba->pcidev->dev,
+ dmabuf->virt = dma_zalloc_coherent(&phba->pcidev->dev,
LPFC_RAS_MAX_ENTRY_SIZE,
&dmabuf->phys,
GFP_KERNEL);
@@ -6239,7 +6311,6 @@ lpfc_sli4_ras_dma_alloc(struct lpfc_hba *phba,
"6187 DMA Alloc Failed FW logging");
goto free_mem;
}
- memset(dmabuf->virt, 0, LPFC_RAS_MAX_ENTRY_SIZE);
dmabuf->buffer_tag = i;
list_add_tail(&dmabuf->list, &ras_fwlog->fwlog_buff_list);
}
@@ -6274,11 +6345,13 @@ lpfc_sli4_ras_mbox_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response);
if (mb->mbxStatus != MBX_SUCCESS || shdr_status) {
- lpfc_printf_log(phba, KERN_WARNING, LOG_MBOX,
+ lpfc_printf_log(phba, KERN_ERR, LOG_MBOX,
"6188 FW LOG mailbox "
"completed with status x%x add_status x%x,"
" mbx status x%x\n",
shdr_status, shdr_add_status, mb->mbxStatus);
+
+ ras_fwlog->ras_hwsupport = false;
goto disable_ras;
}
@@ -6326,7 +6399,7 @@ lpfc_sli4_ras_fwlog_init(struct lpfc_hba *phba,
rc = lpfc_sli4_ras_dma_alloc(phba, fwlog_entry_count);
if (rc) {
lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
- "6189 RAS FW Log Support Not Enabled");
+ "6189 FW Log Memory Allocation Failed");
return rc;
}
}
@@ -6334,7 +6407,7 @@ lpfc_sli4_ras_fwlog_init(struct lpfc_hba *phba,
/* Setup Mailbox command */
mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
if (!mbox) {
- lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
+ lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
"6190 RAS MBX Alloc Failed");
rc = -ENOMEM;
goto mem_free;
@@ -6379,8 +6452,8 @@ lpfc_sli4_ras_fwlog_init(struct lpfc_hba *phba,
rc = lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT);
if (rc == MBX_NOT_FINISHED) {
- lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
- "6191 RAS Mailbox failed. "
+ lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+ "6191 FW-Log Mailbox failed. "
"status %d mbxStatus : x%x", rc,
bf_get(lpfc_mqe_status, &mbox->u.mqe));
mempool_free(mbox, phba->mbox_mem_pool);
@@ -7348,7 +7421,7 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
mboxq->vport = vport;
rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL);
- mp = (struct lpfc_dmabuf *) mboxq->context1;
+ mp = (struct lpfc_dmabuf *)mboxq->ctx_buf;
if (rc == MBX_SUCCESS) {
memcpy(&vport->fc_sparam, mp->virt, sizeof(struct serv_parm));
rc = 0;
@@ -7360,7 +7433,7 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
*/
lpfc_mbuf_free(phba, mp->virt, mp->phys);
kfree(mp);
- mboxq->context1 = NULL;
+ mboxq->ctx_buf = NULL;
if (unlikely(rc)) {
lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
"0382 READ_SPARAM command failed "
@@ -7635,7 +7708,18 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
*/
spin_lock_irq(&phba->hbalock);
phba->link_state = LPFC_LINK_DOWN;
+
+ /* Check if physical ports are trunked */
+ if (bf_get(lpfc_conf_trunk_port0, &phba->sli4_hba))
+ phba->trunk_link.link0.state = LPFC_LINK_DOWN;
+ if (bf_get(lpfc_conf_trunk_port1, &phba->sli4_hba))
+ phba->trunk_link.link1.state = LPFC_LINK_DOWN;
+ if (bf_get(lpfc_conf_trunk_port2, &phba->sli4_hba))
+ phba->trunk_link.link2.state = LPFC_LINK_DOWN;
+ if (bf_get(lpfc_conf_trunk_port3, &phba->sli4_hba))
+ phba->trunk_link.link3.state = LPFC_LINK_DOWN;
spin_unlock_irq(&phba->hbalock);
+
if (!(phba->hba_flag & HBA_FCOE_MODE) &&
(phba->hba_flag & LINK_DISABLED)) {
lpfc_printf_log(phba, KERN_ERR, LOG_INIT | LOG_SLI,
@@ -8119,10 +8203,10 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox,
}
/* Copy the mailbox extension data */
- if (pmbox->in_ext_byte_len && pmbox->context2) {
- lpfc_sli_pcimem_bcopy(pmbox->context2,
- (uint8_t *)phba->mbox_ext,
- pmbox->in_ext_byte_len);
+ if (pmbox->in_ext_byte_len && pmbox->ctx_buf) {
+ lpfc_sli_pcimem_bcopy(pmbox->ctx_buf,
+ (uint8_t *)phba->mbox_ext,
+ pmbox->in_ext_byte_len);
}
/* Copy command data to host SLIM area */
lpfc_sli_pcimem_bcopy(mbx, phba->mbox, MAILBOX_CMD_SIZE);
@@ -8133,10 +8217,10 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox,
= MAILBOX_HBA_EXT_OFFSET;
/* Copy the mailbox extension data */
- if (pmbox->in_ext_byte_len && pmbox->context2)
+ if (pmbox->in_ext_byte_len && pmbox->ctx_buf)
lpfc_memcpy_to_slim(phba->MBslimaddr +
MAILBOX_HBA_EXT_OFFSET,
- pmbox->context2, pmbox->in_ext_byte_len);
+ pmbox->ctx_buf, pmbox->in_ext_byte_len);
if (mbx->mbxCommand == MBX_CONFIG_PORT)
/* copy command data into host mbox for cmpl */
@@ -8259,9 +8343,9 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox,
lpfc_sli_pcimem_bcopy(phba->mbox, mbx,
MAILBOX_CMD_SIZE);
/* Copy the mailbox extension data */
- if (pmbox->out_ext_byte_len && pmbox->context2) {
+ if (pmbox->out_ext_byte_len && pmbox->ctx_buf) {
lpfc_sli_pcimem_bcopy(phba->mbox_ext,
- pmbox->context2,
+ pmbox->ctx_buf,
pmbox->out_ext_byte_len);
}
} else {
@@ -8269,8 +8353,9 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox,
lpfc_memcpy_from_slim(mbx, phba->MBslimaddr,
MAILBOX_CMD_SIZE);
/* Copy the mailbox extension data */
- if (pmbox->out_ext_byte_len && pmbox->context2) {
- lpfc_memcpy_from_slim(pmbox->context2,
+ if (pmbox->out_ext_byte_len && pmbox->ctx_buf) {
+ lpfc_memcpy_from_slim(
+ pmbox->ctx_buf,
phba->MBslimaddr +
MAILBOX_HBA_EXT_OFFSET,
pmbox->out_ext_byte_len);
@@ -11265,19 +11350,12 @@ lpfc_sli4_abort_nvme_io(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
/* Complete prepping the abort wqe and issue to the FW. */
abts_wqe = &abtsiocbp->wqe;
- bf_set(abort_cmd_ia, &abts_wqe->abort_cmd, 0);
- bf_set(abort_cmd_criteria, &abts_wqe->abort_cmd, T_XRI_TAG);
-
- /* Explicitly set reserved fields to zero.*/
- abts_wqe->abort_cmd.rsrvd4 = 0;
- abts_wqe->abort_cmd.rsrvd5 = 0;
- /* WQE Common - word 6. Context is XRI tag. Set 0. */
- bf_set(wqe_xri_tag, &abts_wqe->abort_cmd.wqe_com, 0);
- bf_set(wqe_ctxt_tag, &abts_wqe->abort_cmd.wqe_com, 0);
+ /* Clear any stale WQE contents */
+ memset(abts_wqe, 0, sizeof(union lpfc_wqe));
+ bf_set(abort_cmd_criteria, &abts_wqe->abort_cmd, T_XRI_TAG);
/* word 7 */
- bf_set(wqe_ct, &abts_wqe->abort_cmd.wqe_com, 0);
bf_set(wqe_cmnd, &abts_wqe->abort_cmd.wqe_com, CMD_ABORT_XRI_CX);
bf_set(wqe_class, &abts_wqe->abort_cmd.wqe_com,
cmdiocb->iocb.ulpClass);
@@ -11292,7 +11370,6 @@ lpfc_sli4_abort_nvme_io(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
abtsiocbp->iotag);
/* word 10 */
- bf_set(wqe_wqid, &abts_wqe->abort_cmd.wqe_com, cmdiocb->hba_wqidx);
bf_set(wqe_qosd, &abts_wqe->abort_cmd.wqe_com, 1);
bf_set(wqe_lenloc, &abts_wqe->abort_cmd.wqe_com, LPFC_WQE_LENLOC_NONE);
@@ -12545,10 +12622,10 @@ lpfc_sli_sp_intr_handler(int irq, void *dev_id)
lpfc_sli_pcimem_bcopy(mbox, pmbox,
MAILBOX_CMD_SIZE);
if (pmb->out_ext_byte_len &&
- pmb->context2)
+ pmb->ctx_buf)
lpfc_sli_pcimem_bcopy(
phba->mbox_ext,
- pmb->context2,
+ pmb->ctx_buf,
pmb->out_ext_byte_len);
}
if (pmb->mbox_flag & LPFC_MBX_IMED_UNREG) {
@@ -12563,9 +12640,9 @@ lpfc_sli_sp_intr_handler(int irq, void *dev_id)
if (!pmbox->mbxStatus) {
mp = (struct lpfc_dmabuf *)
- (pmb->context1);
+ (pmb->ctx_buf);
ndlp = (struct lpfc_nodelist *)
- pmb->context2;
+ pmb->ctx_ndlp;
/* Reg_LOGIN of dflt RPI was
* successful. new lets get
@@ -12578,8 +12655,8 @@ lpfc_sli_sp_intr_handler(int irq, void *dev_id)
pmb);
pmb->mbox_cmpl =
lpfc_mbx_cmpl_dflt_rpi;
- pmb->context1 = mp;
- pmb->context2 = ndlp;
+ pmb->ctx_buf = mp;
+ pmb->ctx_ndlp = ndlp;
pmb->vport = vport;
rc = lpfc_sli_issue_mbox(phba,
pmb,
@@ -13185,16 +13262,16 @@ lpfc_sli4_sp_handle_mbox_event(struct lpfc_hba *phba, struct lpfc_mcqe *mcqe)
mcqe_status,
pmbox->un.varWords[0], 0);
if (mcqe_status == MB_CQE_STATUS_SUCCESS) {
- mp = (struct lpfc_dmabuf *)(pmb->context1);
- ndlp = (struct lpfc_nodelist *)pmb->context2;
+ mp = (struct lpfc_dmabuf *)(pmb->ctx_buf);
+ ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp;
/* Reg_LOGIN of dflt RPI was successful. Now lets get
* RID of the PPI using the same mbox buffer.
*/
lpfc_unreg_login(phba, vport->vpi,
pmbox->un.varWords[0], pmb);
pmb->mbox_cmpl = lpfc_mbx_cmpl_dflt_rpi;
- pmb->context1 = mp;
- pmb->context2 = ndlp;
+ pmb->ctx_buf = mp;
+ pmb->ctx_ndlp = ndlp;
pmb->vport = vport;
rc = lpfc_sli_issue_mbox(phba, pmb, MBX_NOWAIT);
if (rc != MBX_BUSY)
@@ -13413,6 +13490,8 @@ lpfc_sli4_sp_handle_abort_xri_wcqe(struct lpfc_hba *phba,
return workposted;
}
+#define FC_RCTL_MDS_DIAGS 0xF4
+
/**
* lpfc_sli4_sp_handle_rcqe - Process a receive-queue completion queue entry
* @phba: Pointer to HBA context object.
@@ -13426,6 +13505,7 @@ static bool
lpfc_sli4_sp_handle_rcqe(struct lpfc_hba *phba, struct lpfc_rcqe *rcqe)
{
bool workposted = false;
+ struct fc_frame_header *fc_hdr;
struct lpfc_queue *hrq = phba->sli4_hba.hdr_rq;
struct lpfc_queue *drq = phba->sli4_hba.dat_rq;
struct lpfc_nvmet_tgtport *tgtp;
@@ -13462,7 +13542,17 @@ lpfc_sli4_sp_handle_rcqe(struct lpfc_hba *phba, struct lpfc_rcqe *rcqe)
hrq->RQ_buf_posted--;
memcpy(&dma_buf->cq_event.cqe.rcqe_cmpl, rcqe, sizeof(*rcqe));
- /* save off the frame for the word thread to process */
+ fc_hdr = (struct fc_frame_header *)dma_buf->hbuf.virt;
+
+ if (fc_hdr->fh_r_ctl == FC_RCTL_MDS_DIAGS ||
+ fc_hdr->fh_r_ctl == FC_RCTL_DD_UNSOL_DATA) {
+ spin_unlock_irqrestore(&phba->hbalock, iflags);
+ /* Handle MDS Loopback frames */
+ lpfc_sli4_handle_mds_loopback(phba->pport, dma_buf);
+ break;
+ }
+
+ /* save off the frame for the work thread to process */
list_add_tail(&dma_buf->cq_event.list,
&phba->sli4_hba.sp_queue_event);
/* Frame received */
@@ -14501,7 +14591,8 @@ lpfc_sli4_queue_alloc(struct lpfc_hba *phba, uint32_t page_size,
hw_page_size))/hw_page_size;
/* If needed, Adjust page count to match the max the adapter supports */
- if (queue->page_count > phba->sli4_hba.pc_sli4_params.wqpcnt)
+ if (phba->sli4_hba.pc_sli4_params.wqpcnt &&
+ (queue->page_count > phba->sli4_hba.pc_sli4_params.wqpcnt))
queue->page_count = phba->sli4_hba.pc_sli4_params.wqpcnt;
INIT_LIST_HEAD(&queue->list);
@@ -14669,7 +14760,8 @@ lpfc_modify_hba_eq_delay(struct lpfc_hba *phba, uint32_t startq,
mbox->vport = phba->pport;
mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
- mbox->context1 = NULL;
+ mbox->ctx_buf = NULL;
+ mbox->ctx_ndlp = NULL;
rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL);
shdr = (union lpfc_sli4_cfg_shdr *) &eq_delay->header.cfg_shdr;
shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
@@ -14789,7 +14881,8 @@ lpfc_eq_create(struct lpfc_hba *phba, struct lpfc_queue *eq, uint32_t imax)
}
mbox->vport = phba->pport;
mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
- mbox->context1 = NULL;
+ mbox->ctx_buf = NULL;
+ mbox->ctx_ndlp = NULL;
rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL);
shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response);
@@ -16863,8 +16956,6 @@ lpfc_fc_frame_check(struct lpfc_hba *phba, struct fc_frame_header *fc_hdr)
struct fc_vft_header *fc_vft_hdr;
uint32_t *header = (uint32_t *) fc_hdr;
-#define FC_RCTL_MDS_DIAGS 0xF4
-
switch (fc_hdr->fh_r_ctl) {
case FC_RCTL_DD_UNCAT: /* uncategorized information */
case FC_RCTL_DD_SOL_DATA: /* solicited data */
@@ -16903,15 +16994,12 @@ lpfc_fc_frame_check(struct lpfc_hba *phba, struct fc_frame_header *fc_hdr)
goto drop;
}
-#define FC_TYPE_VENDOR_UNIQUE 0xFF
-
switch (fc_hdr->fh_type) {
case FC_TYPE_BLS:
case FC_TYPE_ELS:
case FC_TYPE_FCP:
case FC_TYPE_CT:
case FC_TYPE_NVME:
- case FC_TYPE_VENDOR_UNIQUE:
break;
case FC_TYPE_IP:
case FC_TYPE_ILS:
@@ -17741,6 +17829,7 @@ lpfc_sli4_mds_loopback_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
dma_pool_free(phba->lpfc_drb_pool, pcmd->virt, pcmd->phys);
kfree(pcmd);
lpfc_sli_release_iocbq(phba, cmdiocb);
+ lpfc_drain_txq(phba);
}
static void
@@ -17754,14 +17843,23 @@ lpfc_sli4_handle_mds_loopback(struct lpfc_vport *vport,
struct lpfc_dmabuf *pcmd = NULL;
uint32_t frame_len;
int rc;
+ unsigned long iflags;
fc_hdr = (struct fc_frame_header *)dmabuf->hbuf.virt;
frame_len = bf_get(lpfc_rcqe_length, &dmabuf->cq_event.cqe.rcqe_cmpl);
/* Send the received frame back */
iocbq = lpfc_sli_get_iocbq(phba);
- if (!iocbq)
- goto exit;
+ if (!iocbq) {
+ /* Queue cq event and wakeup worker thread to process it */
+ spin_lock_irqsave(&phba->hbalock, iflags);
+ list_add_tail(&dmabuf->cq_event.list,
+ &phba->sli4_hba.sp_queue_event);
+ phba->hba_flag |= HBA_SP_QUEUE_EVT;
+ spin_unlock_irqrestore(&phba->hbalock, iflags);
+ lpfc_worker_wake_up(phba);
+ return;
+ }
/* Allocate buffer for command payload */
pcmd = kmalloc(sizeof(struct lpfc_dmabuf), GFP_KERNEL);
@@ -17846,6 +17944,14 @@ lpfc_sli4_handle_received_buffer(struct lpfc_hba *phba,
/* Process each received buffer */
fc_hdr = (struct fc_frame_header *)dmabuf->hbuf.virt;
+ if (fc_hdr->fh_r_ctl == FC_RCTL_MDS_DIAGS ||
+ fc_hdr->fh_r_ctl == FC_RCTL_DD_UNSOL_DATA) {
+ vport = phba->pport;
+ /* Handle MDS Loopback frames */
+ lpfc_sli4_handle_mds_loopback(vport, dmabuf);
+ return;
+ }
+
/* check to see if this a valid type of frame */
if (lpfc_fc_frame_check(phba, fc_hdr)) {
lpfc_in_buf_free(phba, &dmabuf->dbuf);
@@ -17860,13 +17966,6 @@ lpfc_sli4_handle_received_buffer(struct lpfc_hba *phba,
fcfi = bf_get(lpfc_rcqe_fcf_id,
&dmabuf->cq_event.cqe.rcqe_cmpl);
- if (fc_hdr->fh_r_ctl == 0xF4 && fc_hdr->fh_type == 0xFF) {
- vport = phba->pport;
- /* Handle MDS Loopback frames */
- lpfc_sli4_handle_mds_loopback(vport, dmabuf);
- return;
- }
-
/* d_id this frame is directed to */
did = sli4_did_from_fc_hdr(fc_hdr);
@@ -18207,8 +18306,8 @@ lpfc_sli4_resume_rpi(struct lpfc_nodelist *ndlp,
lpfc_resume_rpi(mboxq, ndlp);
if (cmpl) {
mboxq->mbox_cmpl = cmpl;
- mboxq->context1 = arg;
- mboxq->context2 = ndlp;
+ mboxq->ctx_buf = arg;
+ mboxq->ctx_ndlp = ndlp;
} else
mboxq->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
mboxq->vport = ndlp->vport;
@@ -18711,15 +18810,8 @@ next_priority:
goto initial_priority;
lpfc_printf_log(phba, KERN_WARNING, LOG_FIP,
"2844 No roundrobin failover FCF available\n");
- if (next_fcf_index >= LPFC_SLI4_FCF_TBL_INDX_MAX)
- return LPFC_FCOE_FCF_NEXT_NONE;
- else {
- lpfc_printf_log(phba, KERN_WARNING, LOG_FIP,
- "3063 Only FCF available idx %d, flag %x\n",
- next_fcf_index,
- phba->fcf.fcf_pri[next_fcf_index].fcf_rec.flag);
- return next_fcf_index;
- }
+
+ return LPFC_FCOE_FCF_NEXT_NONE;
}
if (next_fcf_index < LPFC_SLI4_FCF_TBL_INDX_MAX &&
@@ -19026,7 +19118,7 @@ lpfc_sli4_get_config_region23(struct lpfc_hba *phba, char *rgn23_data)
if (lpfc_sli4_dump_cfg_rg23(phba, mboxq))
goto out;
mqe = &mboxq->u.mqe;
- mp = (struct lpfc_dmabuf *) mboxq->context1;
+ mp = (struct lpfc_dmabuf *)mboxq->ctx_buf;
rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL);
if (rc)
goto out;
@@ -19171,11 +19263,11 @@ lpfc_wr_object(struct lpfc_hba *phba, struct list_head *dmabuf_list,
struct lpfc_mbx_wr_object *wr_object;
LPFC_MBOXQ_t *mbox;
int rc = 0, i = 0;
- uint32_t shdr_status, shdr_add_status;
+ uint32_t shdr_status, shdr_add_status, shdr_change_status;
uint32_t mbox_tmo;
- union lpfc_sli4_cfg_shdr *shdr;
struct lpfc_dmabuf *dmabuf;
uint32_t written = 0;
+ bool check_change_status = false;
mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
if (!mbox)
@@ -19203,6 +19295,8 @@ lpfc_wr_object(struct lpfc_hba *phba, struct list_head *dmabuf_list,
(size - written);
written += (size - written);
bf_set(lpfc_wr_object_eof, &wr_object->u.request, 1);
+ bf_set(lpfc_wr_object_eas, &wr_object->u.request, 1);
+ check_change_status = true;
} else {
wr_object->u.request.bde[i].tus.f.bdeSize =
SLI4_PAGE_SIZE;
@@ -19219,9 +19313,39 @@ lpfc_wr_object(struct lpfc_hba *phba, struct list_head *dmabuf_list,
rc = lpfc_sli_issue_mbox_wait(phba, mbox, mbox_tmo);
}
/* The IOCTL status is embedded in the mailbox subheader. */
- shdr = (union lpfc_sli4_cfg_shdr *) &wr_object->header.cfg_shdr;
- shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
- shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response);
+ shdr_status = bf_get(lpfc_mbox_hdr_status,
+ &wr_object->header.cfg_shdr.response);
+ shdr_add_status = bf_get(lpfc_mbox_hdr_add_status,
+ &wr_object->header.cfg_shdr.response);
+ if (check_change_status) {
+ shdr_change_status = bf_get(lpfc_wr_object_change_status,
+ &wr_object->u.response);
+ switch (shdr_change_status) {
+ case (LPFC_CHANGE_STATUS_PHYS_DEV_RESET):
+ lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+ "3198 Firmware write complete: System "
+ "reboot required to instantiate\n");
+ break;
+ case (LPFC_CHANGE_STATUS_FW_RESET):
+ lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+ "3199 Firmware write complete: Firmware"
+ " reset required to instantiate\n");
+ break;
+ case (LPFC_CHANGE_STATUS_PORT_MIGRATION):
+ lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+ "3200 Firmware write complete: Port "
+ "Migration or PCI Reset required to "
+ "instantiate\n");
+ break;
+ case (LPFC_CHANGE_STATUS_PCI_RESET):
+ lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+ "3201 Firmware write complete: PCI "
+ "Reset required to instantiate\n");
+ break;
+ default:
+ break;
+ }
+ }
if (rc != MBX_TIMEOUT)
mempool_free(mbox, phba->mbox_mem_pool);
if (shdr_status || shdr_add_status || rc) {
@@ -19277,7 +19401,7 @@ lpfc_cleanup_pending_mbox(struct lpfc_vport *vport)
(mb->u.mb.mbxCommand == MBX_REG_VPI))
mb->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
if (mb->u.mb.mbxCommand == MBX_REG_LOGIN64) {
- act_mbx_ndlp = (struct lpfc_nodelist *)mb->context2;
+ act_mbx_ndlp = (struct lpfc_nodelist *)mb->ctx_ndlp;
/* Put reference count for delayed processing */
act_mbx_ndlp = lpfc_nlp_get(act_mbx_ndlp);
/* Unregister the RPI when mailbox complete */
@@ -19302,7 +19426,7 @@ lpfc_cleanup_pending_mbox(struct lpfc_vport *vport)
mb->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
if (mb->u.mb.mbxCommand == MBX_REG_LOGIN64) {
- ndlp = (struct lpfc_nodelist *)mb->context2;
+ ndlp = (struct lpfc_nodelist *)mb->ctx_ndlp;
/* Unregister the RPI when mailbox complete */
mb->mbox_flag |= LPFC_MBX_IMED_UNREG;
restart_loop = 1;
@@ -19322,13 +19446,14 @@ lpfc_cleanup_pending_mbox(struct lpfc_vport *vport)
while (!list_empty(&mbox_cmd_list)) {
list_remove_head(&mbox_cmd_list, mb, LPFC_MBOXQ_t, list);
if (mb->u.mb.mbxCommand == MBX_REG_LOGIN64) {
- mp = (struct lpfc_dmabuf *) (mb->context1);
+ mp = (struct lpfc_dmabuf *)(mb->ctx_buf);
if (mp) {
__lpfc_mbuf_free(phba, mp->virt, mp->phys);
kfree(mp);
}
- ndlp = (struct lpfc_nodelist *) mb->context2;
- mb->context2 = NULL;
+ mb->ctx_buf = NULL;
+ ndlp = (struct lpfc_nodelist *)mb->ctx_ndlp;
+ mb->ctx_ndlp = NULL;
if (ndlp) {
spin_lock(shost->host_lock);
ndlp->nlp_flag &= ~NLP_IGNR_REG_CMPL;
diff --git a/drivers/scsi/lpfc/lpfc_sli.h b/drivers/scsi/lpfc/lpfc_sli.h
index 34b7ab69b9b4..7abb395bb64a 100644
--- a/drivers/scsi/lpfc/lpfc_sli.h
+++ b/drivers/scsi/lpfc/lpfc_sli.h
@@ -144,9 +144,9 @@ typedef struct lpfcMboxq {
MAILBOX_t mb; /* Mailbox cmd */
struct lpfc_mqe mqe;
} u;
- struct lpfc_vport *vport;/* virtual port pointer */
- void *context1; /* caller context information */
- void *context2; /* caller context information */
+ struct lpfc_vport *vport; /* virtual port pointer */
+ void *ctx_ndlp; /* caller ndlp information */
+ void *ctx_buf; /* caller buffer information */
void *context3;
void (*mbox_cmpl) (struct lpfc_hba *, struct lpfcMboxq *);
diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h
index e76c380e1a84..6b2d2350e2c6 100644
--- a/drivers/scsi/lpfc/lpfc_sli4.h
+++ b/drivers/scsi/lpfc/lpfc_sli4.h
@@ -279,6 +279,7 @@ struct lpfc_fcf {
#define FCF_REDISC_EVT 0x100 /* FCF rediscovery event to worker thread */
#define FCF_REDISC_FOV 0x200 /* Post FCF rediscovery fast failover */
#define FCF_REDISC_PROG (FCF_REDISC_PEND | FCF_REDISC_EVT)
+ uint16_t fcf_redisc_attempted;
uint32_t addr_mode;
uint32_t eligible_fcf_cnt;
struct lpfc_fcf_rec current_rec;
@@ -717,6 +718,19 @@ struct lpfc_sli4_hba {
uint16_t num_online_cpu;
uint16_t num_present_cpu;
uint16_t curr_disp_cpu;
+ uint32_t conf_trunk;
+#define lpfc_conf_trunk_port0_WORD conf_trunk
+#define lpfc_conf_trunk_port0_SHIFT 0
+#define lpfc_conf_trunk_port0_MASK 0x1
+#define lpfc_conf_trunk_port1_WORD conf_trunk
+#define lpfc_conf_trunk_port1_SHIFT 1
+#define lpfc_conf_trunk_port1_MASK 0x1
+#define lpfc_conf_trunk_port2_WORD conf_trunk
+#define lpfc_conf_trunk_port2_SHIFT 2
+#define lpfc_conf_trunk_port2_MASK 0x1
+#define lpfc_conf_trunk_port3_WORD conf_trunk
+#define lpfc_conf_trunk_port3_SHIFT 3
+#define lpfc_conf_trunk_port3_MASK 0x1
};
enum lpfc_sge_type {
diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h
index 5a0d512ff497..3f4398ffb567 100644
--- a/drivers/scsi/lpfc/lpfc_version.h
+++ b/drivers/scsi/lpfc/lpfc_version.h
@@ -20,7 +20,7 @@
* included with this package. *
*******************************************************************/
-#define LPFC_DRIVER_VERSION "12.0.0.7"
+#define LPFC_DRIVER_VERSION "12.0.0.10"
#define LPFC_DRIVER_NAME "lpfc"
/* Used for SLI 2/3 */
diff --git a/drivers/scsi/lpfc/lpfc_vport.c b/drivers/scsi/lpfc/lpfc_vport.c
index c340e0e47473..102a011ff6d4 100644
--- a/drivers/scsi/lpfc/lpfc_vport.c
+++ b/drivers/scsi/lpfc/lpfc_vport.c
@@ -138,8 +138,8 @@ lpfc_vport_sparm(struct lpfc_hba *phba, struct lpfc_vport *vport)
* Grab buffer pointer and clear context1 so we can use
* lpfc_sli_issue_box_wait
*/
- mp = (struct lpfc_dmabuf *) pmb->context1;
- pmb->context1 = NULL;
+ mp = (struct lpfc_dmabuf *)pmb->ctx_buf;
+ pmb->ctx_buf = NULL;
pmb->vport = vport;
rc = lpfc_sli_issue_mbox_wait(phba, pmb, phba->fc_ratov * 2);
diff --git a/drivers/scsi/mac53c94.c b/drivers/scsi/mac53c94.c
index 177701dfdfcb..c8e6ae98a4a6 100644
--- a/drivers/scsi/mac53c94.c
+++ b/drivers/scsi/mac53c94.c
@@ -403,7 +403,7 @@ static struct scsi_host_template mac53c94_template = {
.can_queue = 1,
.this_id = 7,
.sg_tablesize = SG_ALL,
- .use_clustering = DISABLE_CLUSTERING,
+ .max_segment_size = 65535,
};
static int mac53c94_probe(struct macio_dev *mdev, const struct of_device_id *match)
diff --git a/drivers/scsi/mac_esp.c b/drivers/scsi/mac_esp.c
index 764d320bb2ca..ee741207fd4e 100644
--- a/drivers/scsi/mac_esp.c
+++ b/drivers/scsi/mac_esp.c
@@ -307,7 +307,7 @@ static int esp_mac_probe(struct platform_device *dev)
goto fail;
host->max_id = 8;
- host->use_clustering = DISABLE_CLUSTERING;
+ host->dma_boundary = PAGE_SIZE - 1;
esp = shost_priv(host);
esp->host = host;
diff --git a/drivers/scsi/mac_scsi.c b/drivers/scsi/mac_scsi.c
index dd6057359d7c..8b4b5b1a13d7 100644
--- a/drivers/scsi/mac_scsi.c
+++ b/drivers/scsi/mac_scsi.c
@@ -333,7 +333,7 @@ static struct scsi_host_template mac_scsi_template = {
.this_id = 7,
.sg_tablesize = 1,
.cmd_per_lun = 2,
- .use_clustering = DISABLE_CLUSTERING,
+ .dma_boundary = PAGE_SIZE - 1,
.cmd_size = NCR5380_CMD_SIZE,
.max_sectors = 128,
};
diff --git a/drivers/scsi/megaraid.c b/drivers/scsi/megaraid.c
index 8c7154143a4e..4862f65ec3e8 100644
--- a/drivers/scsi/megaraid.c
+++ b/drivers/scsi/megaraid.c
@@ -4148,7 +4148,6 @@ static struct scsi_host_template megaraid_template = {
.this_id = DEFAULT_INITIATOR_ID,
.sg_tablesize = MAX_SGLIST,
.cmd_per_lun = DEF_CMD_PER_LUN,
- .use_clustering = ENABLE_CLUSTERING,
.eh_abort_handler = megaraid_abort,
.eh_device_reset_handler = megaraid_reset,
.eh_bus_reset_handler = megaraid_reset,
diff --git a/drivers/scsi/megaraid/megaraid_mbox.c b/drivers/scsi/megaraid/megaraid_mbox.c
index 3b7abe5ca7f5..e836392b75e8 100644
--- a/drivers/scsi/megaraid/megaraid_mbox.c
+++ b/drivers/scsi/megaraid/megaraid_mbox.c
@@ -336,7 +336,6 @@ static struct scsi_host_template megaraid_template_g = {
.eh_abort_handler = megaraid_abort_handler,
.eh_host_reset_handler = megaraid_reset_handler,
.change_queue_depth = scsi_change_queue_depth,
- .use_clustering = ENABLE_CLUSTERING,
.no_write_same = 1,
.sdev_attrs = megaraid_sdev_attrs,
.shost_attrs = megaraid_shost_attrs,
@@ -1243,8 +1242,7 @@ megaraid_mbox_teardown_dma_pools(adapter_t *adapter)
dma_pool_free(raid_dev->sg_pool_handle, sg_pci_blk[i].vaddr,
sg_pci_blk[i].dma_addr);
}
- if (raid_dev->sg_pool_handle)
- dma_pool_destroy(raid_dev->sg_pool_handle);
+ dma_pool_destroy(raid_dev->sg_pool_handle);
epthru_pci_blk = raid_dev->epthru_pool;
@@ -1252,8 +1250,7 @@ megaraid_mbox_teardown_dma_pools(adapter_t *adapter)
dma_pool_free(raid_dev->epthru_pool_handle,
epthru_pci_blk[i].vaddr, epthru_pci_blk[i].dma_addr);
}
- if (raid_dev->epthru_pool_handle)
- dma_pool_destroy(raid_dev->epthru_pool_handle);
+ dma_pool_destroy(raid_dev->epthru_pool_handle);
mbox_pci_blk = raid_dev->mbox_pool;
@@ -1261,8 +1258,7 @@ megaraid_mbox_teardown_dma_pools(adapter_t *adapter)
dma_pool_free(raid_dev->mbox_pool_handle,
mbox_pci_blk[i].vaddr, mbox_pci_blk[i].dma_addr);
}
- if (raid_dev->mbox_pool_handle)
- dma_pool_destroy(raid_dev->mbox_pool_handle);
+ dma_pool_destroy(raid_dev->mbox_pool_handle);
return;
}
diff --git a/drivers/scsi/megaraid/megaraid_mm.c b/drivers/scsi/megaraid/megaraid_mm.c
index 8428247015db..3ce837e4b24c 100644
--- a/drivers/scsi/megaraid/megaraid_mm.c
+++ b/drivers/scsi/megaraid/megaraid_mm.c
@@ -1017,8 +1017,7 @@ memalloc_error:
kfree(adapter->kioc_list);
kfree(adapter->mbox_list);
- if (adapter->pthru_dma_pool)
- dma_pool_destroy(adapter->pthru_dma_pool);
+ dma_pool_destroy(adapter->pthru_dma_pool);
kfree(adapter);
diff --git a/drivers/scsi/megaraid/megaraid_sas.h b/drivers/scsi/megaraid/megaraid_sas.h
index 67d356d84717..16536c41f0c5 100644
--- a/drivers/scsi/megaraid/megaraid_sas.h
+++ b/drivers/scsi/megaraid/megaraid_sas.h
@@ -2,7 +2,8 @@
* Linux MegaRAID driver for SAS based RAID controllers
*
* Copyright (c) 2003-2013 LSI Corporation
- * Copyright (c) 2013-2014 Avago Technologies
+ * Copyright (c) 2013-2016 Avago Technologies
+ * Copyright (c) 2016-2018 Broadcom Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -19,14 +20,11 @@
*
* FILE: megaraid_sas.h
*
- * Authors: Avago Technologies
- * Kashyap Desai <kashyap.desai@avagotech.com>
- * Sumit Saxena <sumit.saxena@avagotech.com>
+ * Authors: Broadcom Inc.
+ * Kashyap Desai <kashyap.desai@broadcom.com>
+ * Sumit Saxena <sumit.saxena@broadcom.com>
*
- * Send feedback to: megaraidlinux.pdl@avagotech.com
- *
- * Mail to: Avago Technologies, 350 West Trimble Road, Building 90,
- * San Jose, California 95131
+ * Send feedback to: megaraidlinux.pdl@broadcom.com
*/
#ifndef LSI_MEGARAID_SAS_H
@@ -35,8 +33,8 @@
/*
* MegaRAID SAS Driver meta data
*/
-#define MEGASAS_VERSION "07.706.03.00-rc1"
-#define MEGASAS_RELDATE "May 21, 2018"
+#define MEGASAS_VERSION "07.707.50.00-rc1"
+#define MEGASAS_RELDATE "December 18, 2018"
/*
* Device IDs
@@ -62,6 +60,10 @@
#define PCI_DEVICE_ID_LSI_TOMCAT 0x0017
#define PCI_DEVICE_ID_LSI_VENTURA_4PORT 0x001B
#define PCI_DEVICE_ID_LSI_CRUSADER_4PORT 0x001C
+#define PCI_DEVICE_ID_LSI_AERO_10E1 0x10e1
+#define PCI_DEVICE_ID_LSI_AERO_10E2 0x10e2
+#define PCI_DEVICE_ID_LSI_AERO_10E5 0x10e5
+#define PCI_DEVICE_ID_LSI_AERO_10E6 0x10e6
/*
* Intel HBA SSDIDs
@@ -142,6 +144,7 @@
* CLR_HANDSHAKE: FW is waiting for HANDSHAKE from BIOS or Driver
* HOTPLUG : Resume from Hotplug
* MFI_STOP_ADP : Send signal to FW to stop processing
+ * MFI_ADP_TRIGGER_SNAP_DUMP: Inform firmware to initiate snap dump
*/
#define WRITE_SEQUENCE_OFFSET (0x0000000FC) /* I20 */
#define HOST_DIAGNOSTIC_OFFSET (0x000000F8) /* I20 */
@@ -158,6 +161,7 @@
#define MFI_RESET_FLAGS MFI_INIT_READY| \
MFI_INIT_MFIMODE| \
MFI_INIT_ABORT
+#define MFI_ADP_TRIGGER_SNAP_DUMP 0x00000100
#define MPI2_IOCINIT_MSGFLAG_RDPQ_ARRAY_MODE (0x01)
/*
@@ -860,8 +864,22 @@ struct megasas_ctrl_prop {
u32 reserved:18;
#endif
} OnOffProperties;
- u8 autoSnapVDSpace;
- u8 viewSpace;
+
+ union {
+ u8 autoSnapVDSpace;
+ u8 viewSpace;
+ struct {
+#if defined(__BIG_ENDIAN_BITFIELD)
+ u16 reserved2:11;
+ u16 enable_snap_dump:1;
+ u16 reserved1:4;
+#else
+ u16 reserved1:4;
+ u16 enable_snap_dump:1;
+ u16 reserved2:11;
+#endif
+ } on_off_properties2;
+ };
__le16 spinDownTime;
u8 reserved[24];
} __packed;
@@ -1485,7 +1503,6 @@ enum FW_BOOT_CONTEXT {
#define MEGASAS_IOCTL_CMD 0
#define MEGASAS_DEFAULT_CMD_TIMEOUT 90
#define MEGASAS_THROTTLE_QUEUE_DEPTH 16
-#define MEGASAS_BLOCKED_CMD_TIMEOUT 60
#define MEGASAS_DEFAULT_TM_TIMEOUT 50
/*
* FW reports the maximum of number of commands that it can accept (maximum
@@ -1544,11 +1561,16 @@ enum FW_BOOT_CONTEXT {
#define MR_CAN_HANDLE_64_BIT_DMA_OFFSET (1 << 25)
+#define MEGASAS_WATCHDOG_THREAD_INTERVAL 1000
+#define MEGASAS_WAIT_FOR_NEXT_DMA_MSECS 20
+#define MEGASAS_WATCHDOG_WAIT_COUNT 50
+
enum MR_ADAPTER_TYPE {
MFI_SERIES = 1,
THUNDERBOLT_SERIES = 2,
INVADER_SERIES = 3,
VENTURA_SERIES = 4,
+ AERO_SERIES = 5,
};
/*
@@ -1588,11 +1610,10 @@ struct megasas_register_set {
u32 reserved_3[3]; /*00A4h*/
- u32 outbound_scratch_pad ; /*00B0h*/
- u32 outbound_scratch_pad_2; /*00B4h*/
- u32 outbound_scratch_pad_3; /*00B8h*/
- u32 outbound_scratch_pad_4; /*00BCh*/
-
+ u32 outbound_scratch_pad_0; /*00B0h*/
+ u32 outbound_scratch_pad_1; /*00B4h*/
+ u32 outbound_scratch_pad_2; /*00B8h*/
+ u32 outbound_scratch_pad_3; /*00BCh*/
u32 inbound_low_queue_port ; /*00C0h*/
@@ -2181,6 +2202,9 @@ struct megasas_instance {
struct MR_LD_TARGETID_LIST *ld_targetid_list_buf;
dma_addr_t ld_targetid_list_buf_h;
+ struct MR_SNAPDUMP_PROPERTIES *snapdump_prop;
+ dma_addr_t snapdump_prop_h;
+
void *crash_buf[MAX_CRASH_DUMP_SIZE];
unsigned int fw_crash_buffer_size;
unsigned int fw_crash_state;
@@ -2250,7 +2274,9 @@ struct megasas_instance {
struct megasas_instance_template *instancet;
struct tasklet_struct isr_tasklet;
struct work_struct work_init;
- struct work_struct crash_init;
+ struct delayed_work fw_fault_work;
+ struct workqueue_struct *fw_fault_work_q;
+ char fault_handler_work_q_name[48];
u8 flag;
u8 unload;
@@ -2310,6 +2336,7 @@ struct megasas_instance {
bool support_nvme_passthru;
u8 task_abort_tmo;
u8 max_reset_tmo;
+ u8 snapdump_wait_time;
};
struct MR_LD_VF_MAP {
u32 size;
@@ -2386,9 +2413,9 @@ struct megasas_instance_template {
void (*enable_intr)(struct megasas_instance *);
void (*disable_intr)(struct megasas_instance *);
- int (*clear_intr)(struct megasas_register_set __iomem *);
+ int (*clear_intr)(struct megasas_instance *);
- u32 (*read_fw_status_reg)(struct megasas_register_set __iomem *);
+ u32 (*read_fw_status_reg)(struct megasas_instance *);
int (*adp_reset)(struct megasas_instance *, \
struct megasas_register_set __iomem *);
int (*check_reset)(struct megasas_instance *, \
@@ -2535,11 +2562,11 @@ void megasas_set_dynamic_target_properties(struct scsi_device *sdev,
bool is_target_prop);
int megasas_get_target_prop(struct megasas_instance *instance,
struct scsi_device *sdev);
+void megasas_get_snapdump_properties(struct megasas_instance *instance);
int megasas_set_crash_dump_params(struct megasas_instance *instance,
u8 crash_buf_state);
void megasas_free_host_crash_buffer(struct megasas_instance *instance);
-void megasas_fusion_crash_dump_wq(struct work_struct *work);
void megasas_return_cmd_fusion(struct megasas_instance *instance,
struct megasas_cmd_fusion *cmd);
@@ -2560,6 +2587,9 @@ int megasas_reset_target_fusion(struct scsi_cmnd *scmd);
u32 mega_mod64(u64 dividend, u32 divisor);
int megasas_alloc_fusion_context(struct megasas_instance *instance);
void megasas_free_fusion_context(struct megasas_instance *instance);
+int megasas_fusion_start_watchdog(struct megasas_instance *instance);
+void megasas_fusion_stop_watchdog(struct megasas_instance *instance);
+
void megasas_set_dma_settings(struct megasas_instance *instance,
struct megasas_dcmd_frame *dcmd,
dma_addr_t dma_addr, u32 dma_len);
diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c
index 9b90c716f06d..f7bdd783360a 100644
--- a/drivers/scsi/megaraid/megaraid_sas_base.c
+++ b/drivers/scsi/megaraid/megaraid_sas_base.c
@@ -2,7 +2,8 @@
* Linux MegaRAID driver for SAS based RAID controllers
*
* Copyright (c) 2003-2013 LSI Corporation
- * Copyright (c) 2013-2014 Avago Technologies
+ * Copyright (c) 2013-2016 Avago Technologies
+ * Copyright (c) 2016-2018 Broadcom Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -17,18 +18,15 @@
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
- * Authors: Avago Technologies
+ * Authors: Broadcom Inc.
* Sreenivas Bagalkote
* Sumant Patro
* Bo Yang
* Adam Radford
- * Kashyap Desai <kashyap.desai@avagotech.com>
- * Sumit Saxena <sumit.saxena@avagotech.com>
+ * Kashyap Desai <kashyap.desai@broadcom.com>
+ * Sumit Saxena <sumit.saxena@broadcom.com>
*
- * Send feedback to: megaraidlinux.pdl@avagotech.com
- *
- * Mail to: Avago Technologies, 350 West Trimble Road, Building 90,
- * San Jose, California 95131
+ * Send feedback to: megaraidlinux.pdl@broadcom.com
*/
#include <linux/kernel.h>
@@ -87,8 +85,7 @@ MODULE_PARM_DESC(throttlequeuedepth,
unsigned int resetwaittime = MEGASAS_RESET_WAIT_TIME;
module_param(resetwaittime, int, S_IRUGO);
-MODULE_PARM_DESC(resetwaittime, "Wait time in seconds after I/O timeout "
- "before resetting adapter. Default: 180");
+MODULE_PARM_DESC(resetwaittime, "Wait time in (1-180s) after I/O timeout before resetting adapter. Default: 180s");
int smp_affinity_enable = 1;
module_param(smp_affinity_enable, int, S_IRUGO);
@@ -96,7 +93,7 @@ MODULE_PARM_DESC(smp_affinity_enable, "SMP affinity feature enable/disable Defau
int rdpq_enable = 1;
module_param(rdpq_enable, int, S_IRUGO);
-MODULE_PARM_DESC(rdpq_enable, " Allocate reply queue in chunks for large queue depth enable/disable Default: disable(0)");
+MODULE_PARM_DESC(rdpq_enable, "Allocate reply queue in chunks for large queue depth enable/disable Default: enable(1)");
unsigned int dual_qdepth_disable;
module_param(dual_qdepth_disable, int, S_IRUGO);
@@ -108,8 +105,8 @@ MODULE_PARM_DESC(scmd_timeout, "scsi command timeout (10-90s), default 90s. See
MODULE_LICENSE("GPL");
MODULE_VERSION(MEGASAS_VERSION);
-MODULE_AUTHOR("megaraidlinux.pdl@avagotech.com");
-MODULE_DESCRIPTION("Avago MegaRAID SAS Driver");
+MODULE_AUTHOR("megaraidlinux.pdl@broadcom.com");
+MODULE_DESCRIPTION("Broadcom MegaRAID SAS Driver");
int megasas_transition_to_ready(struct megasas_instance *instance, int ocr);
static int megasas_get_pd_list(struct megasas_instance *instance);
@@ -165,6 +162,10 @@ static struct pci_device_id megasas_pci_table[] = {
{PCI_DEVICE(PCI_VENDOR_ID_LSI_LOGIC, PCI_DEVICE_ID_LSI_TOMCAT)},
{PCI_DEVICE(PCI_VENDOR_ID_LSI_LOGIC, PCI_DEVICE_ID_LSI_VENTURA_4PORT)},
{PCI_DEVICE(PCI_VENDOR_ID_LSI_LOGIC, PCI_DEVICE_ID_LSI_CRUSADER_4PORT)},
+ {PCI_DEVICE(PCI_VENDOR_ID_LSI_LOGIC, PCI_DEVICE_ID_LSI_AERO_10E1)},
+ {PCI_DEVICE(PCI_VENDOR_ID_LSI_LOGIC, PCI_DEVICE_ID_LSI_AERO_10E2)},
+ {PCI_DEVICE(PCI_VENDOR_ID_LSI_LOGIC, PCI_DEVICE_ID_LSI_AERO_10E5)},
+ {PCI_DEVICE(PCI_VENDOR_ID_LSI_LOGIC, PCI_DEVICE_ID_LSI_AERO_10E6)},
{}
};
@@ -189,7 +190,7 @@ void
megasas_complete_cmd(struct megasas_instance *instance, struct megasas_cmd *cmd,
u8 alt_status);
static u32
-megasas_read_fw_status_reg_gen2(struct megasas_register_set __iomem *regs);
+megasas_read_fw_status_reg_gen2(struct megasas_instance *instance);
static int
megasas_adp_reset_gen2(struct megasas_instance *instance,
struct megasas_register_set __iomem *reg_set);
@@ -219,6 +220,28 @@ megasas_free_ctrl_dma_buffers(struct megasas_instance *instance);
static inline void
megasas_init_ctrl_params(struct megasas_instance *instance);
+u32 megasas_readl(struct megasas_instance *instance,
+ const volatile void __iomem *addr)
+{
+ u32 i = 0, ret_val;
+ /*
+ * Due to a HW errata in Aero controllers, reads to certain
+ * Fusion registers could intermittently return all zeroes.
+ * This behavior is transient in nature and subsequent reads will
+ * return valid value. As a workaround in driver, retry readl for
+ * upto three times until a non-zero value is read.
+ */
+ if (instance->adapter_type == AERO_SERIES) {
+ do {
+ ret_val = readl(addr);
+ i++;
+ } while (ret_val == 0 && i < 3);
+ return ret_val;
+ } else {
+ return readl(addr);
+ }
+}
+
/**
* megasas_set_dma_settings - Populate DMA address, length and flags for DCMDs
* @instance: Adapter soft state
@@ -419,19 +442,21 @@ megasas_disable_intr_xscale(struct megasas_instance *instance)
* @regs: MFI register set
*/
static u32
-megasas_read_fw_status_reg_xscale(struct megasas_register_set __iomem * regs)
+megasas_read_fw_status_reg_xscale(struct megasas_instance *instance)
{
- return readl(&(regs)->outbound_msg_0);
+ return readl(&instance->reg_set->outbound_msg_0);
}
/**
* megasas_clear_interrupt_xscale - Check & clear interrupt
* @regs: MFI register set
*/
static int
-megasas_clear_intr_xscale(struct megasas_register_set __iomem * regs)
+megasas_clear_intr_xscale(struct megasas_instance *instance)
{
u32 status;
u32 mfiStatus = 0;
+ struct megasas_register_set __iomem *regs;
+ regs = instance->reg_set;
/*
* Check if it is our interrupt
@@ -596,9 +621,9 @@ megasas_disable_intr_ppc(struct megasas_instance *instance)
* @regs: MFI register set
*/
static u32
-megasas_read_fw_status_reg_ppc(struct megasas_register_set __iomem * regs)
+megasas_read_fw_status_reg_ppc(struct megasas_instance *instance)
{
- return readl(&(regs)->outbound_scratch_pad);
+ return readl(&instance->reg_set->outbound_scratch_pad_0);
}
/**
@@ -606,9 +631,11 @@ megasas_read_fw_status_reg_ppc(struct megasas_register_set __iomem * regs)
* @regs: MFI register set
*/
static int
-megasas_clear_intr_ppc(struct megasas_register_set __iomem * regs)
+megasas_clear_intr_ppc(struct megasas_instance *instance)
{
u32 status, mfiStatus = 0;
+ struct megasas_register_set __iomem *regs;
+ regs = instance->reg_set;
/*
* Check if it is our interrupt
@@ -721,9 +748,9 @@ megasas_disable_intr_skinny(struct megasas_instance *instance)
* @regs: MFI register set
*/
static u32
-megasas_read_fw_status_reg_skinny(struct megasas_register_set __iomem *regs)
+megasas_read_fw_status_reg_skinny(struct megasas_instance *instance)
{
- return readl(&(regs)->outbound_scratch_pad);
+ return readl(&instance->reg_set->outbound_scratch_pad_0);
}
/**
@@ -731,10 +758,12 @@ megasas_read_fw_status_reg_skinny(struct megasas_register_set __iomem *regs)
* @regs: MFI register set
*/
static int
-megasas_clear_intr_skinny(struct megasas_register_set __iomem *regs)
+megasas_clear_intr_skinny(struct megasas_instance *instance)
{
u32 status;
u32 mfiStatus = 0;
+ struct megasas_register_set __iomem *regs;
+ regs = instance->reg_set;
/*
* Check if it is our interrupt
@@ -748,7 +777,7 @@ megasas_clear_intr_skinny(struct megasas_register_set __iomem *regs)
/*
* Check if it is our interrupt
*/
- if ((megasas_read_fw_status_reg_skinny(regs) & MFI_STATE_MASK) ==
+ if ((megasas_read_fw_status_reg_skinny(instance) & MFI_STATE_MASK) ==
MFI_STATE_FAULT) {
mfiStatus = MFI_INTR_FLAG_FIRMWARE_STATE_CHANGE;
} else
@@ -866,9 +895,9 @@ megasas_disable_intr_gen2(struct megasas_instance *instance)
* @regs: MFI register set
*/
static u32
-megasas_read_fw_status_reg_gen2(struct megasas_register_set __iomem *regs)
+megasas_read_fw_status_reg_gen2(struct megasas_instance *instance)
{
- return readl(&(regs)->outbound_scratch_pad);
+ return readl(&instance->reg_set->outbound_scratch_pad_0);
}
/**
@@ -876,10 +905,12 @@ megasas_read_fw_status_reg_gen2(struct megasas_register_set __iomem *regs)
* @regs: MFI register set
*/
static int
-megasas_clear_intr_gen2(struct megasas_register_set __iomem *regs)
+megasas_clear_intr_gen2(struct megasas_instance *instance)
{
u32 status;
u32 mfiStatus = 0;
+ struct megasas_register_set __iomem *regs;
+ regs = instance->reg_set;
/*
* Check if it is our interrupt
@@ -2079,9 +2110,11 @@ void megaraid_sas_kill_hba(struct megasas_instance *instance)
if ((instance->pdev->device == PCI_DEVICE_ID_LSI_SAS0073SKINNY) ||
(instance->pdev->device == PCI_DEVICE_ID_LSI_SAS0071SKINNY) ||
(instance->adapter_type != MFI_SERIES)) {
- writel(MFI_STOP_ADP, &instance->reg_set->doorbell);
- /* Flush */
- readl(&instance->reg_set->doorbell);
+ if (!instance->requestorId) {
+ writel(MFI_STOP_ADP, &instance->reg_set->doorbell);
+ /* Flush */
+ readl(&instance->reg_set->doorbell);
+ }
if (instance->requestorId && instance->peerIsPresent)
memset(instance->ld_ids, 0xff, MEGASAS_MAX_LD_IDS);
} else {
@@ -2682,7 +2715,7 @@ static int megasas_wait_for_outstanding(struct megasas_instance *instance)
i = 0;
outstanding = atomic_read(&instance->fw_outstanding);
- fw_state = instance->instancet->read_fw_status_reg(instance->reg_set) & MFI_STATE_MASK;
+ fw_state = instance->instancet->read_fw_status_reg(instance) & MFI_STATE_MASK;
if ((!outstanding && (fw_state == MFI_STATE_OPERATIONAL)))
goto no_outstanding;
@@ -2711,7 +2744,7 @@ static int megasas_wait_for_outstanding(struct megasas_instance *instance)
outstanding = atomic_read(&instance->fw_outstanding);
- fw_state = instance->instancet->read_fw_status_reg(instance->reg_set) & MFI_STATE_MASK;
+ fw_state = instance->instancet->read_fw_status_reg(instance) & MFI_STATE_MASK;
if ((!outstanding && (fw_state == MFI_STATE_OPERATIONAL)))
goto no_outstanding;
}
@@ -3186,7 +3219,6 @@ static struct scsi_host_template megasas_template = {
.eh_timed_out = megasas_reset_timer,
.shost_attrs = megaraid_host_attrs,
.bios_param = megasas_bios_param,
- .use_clustering = ENABLE_CLUSTERING,
.change_queue_depth = scsi_change_queue_depth,
.no_write_same = 1,
};
@@ -3278,6 +3310,7 @@ megasas_complete_cmd(struct megasas_instance *instance, struct megasas_cmd *cmd,
megasas_complete_int_cmd(instance, cmd);
break;
}
+ /* fall through */
case MFI_CMD_LD_READ:
case MFI_CMD_LD_WRITE:
@@ -3665,9 +3698,8 @@ megasas_deplete_reply_queue(struct megasas_instance *instance,
return IRQ_HANDLED;
}
- if ((mfiStatus = instance->instancet->clear_intr(
- instance->reg_set)
- ) == 0) {
+ mfiStatus = instance->instancet->clear_intr(instance);
+ if (mfiStatus == 0) {
/* Hardware may not set outbound_intr_status in MSI-X mode */
if (!instance->msix_vectors)
return IRQ_NONE;
@@ -3677,7 +3709,7 @@ megasas_deplete_reply_queue(struct megasas_instance *instance,
if ((mfiStatus & MFI_INTR_FLAG_FIRMWARE_STATE_CHANGE)) {
fw_state = instance->instancet->read_fw_status_reg(
- instance->reg_set) & MFI_STATE_MASK;
+ instance) & MFI_STATE_MASK;
if (fw_state != MFI_STATE_FAULT) {
dev_notice(&instance->pdev->dev, "fw state:%x\n",
@@ -3760,7 +3792,7 @@ megasas_transition_to_ready(struct megasas_instance *instance, int ocr)
u32 cur_state;
u32 abs_state, curr_abs_state;
- abs_state = instance->instancet->read_fw_status_reg(instance->reg_set);
+ abs_state = instance->instancet->read_fw_status_reg(instance);
fw_state = abs_state & MFI_STATE_MASK;
if (fw_state != MFI_STATE_READY)
@@ -3832,7 +3864,8 @@ megasas_transition_to_ready(struct megasas_instance *instance, int ocr)
if (instance->adapter_type != MFI_SERIES) {
for (i = 0; i < (10 * 1000); i += 20) {
- if (readl(
+ if (megasas_readl(
+ instance,
&instance->
reg_set->
doorbell) & 1)
@@ -3891,12 +3924,12 @@ megasas_transition_to_ready(struct megasas_instance *instance, int ocr)
/*
* The cur_state should not last for more than max_wait secs
*/
- for (i = 0; i < (max_wait * 1000); i++) {
+ for (i = 0; i < max_wait; i++) {
curr_abs_state = instance->instancet->
- read_fw_status_reg(instance->reg_set);
+ read_fw_status_reg(instance);
if (abs_state == curr_abs_state) {
- msleep(1);
+ msleep(1000);
} else
break;
}
@@ -4634,9 +4667,9 @@ static void megasas_update_ext_vd_details(struct megasas_instance *instance)
}
dev_info(&instance->pdev->dev,
- "firmware type\t: %s\n",
- instance->supportmax256vd ? "Extended VD(240 VD)firmware" :
- "Legacy(64 VD) firmware");
+ "FW provided supportMaxExtLDs: %d\tmax_lds: %d\n",
+ instance->ctrl_info_buf->adapterOperations3.supportMaxExtLDs ? 1 : 0,
+ instance->ctrl_info_buf->max_lds);
if (instance->max_raid_mapsize) {
ventura_map_sz = instance->max_raid_mapsize *
@@ -4661,6 +4694,87 @@ static void megasas_update_ext_vd_details(struct megasas_instance *instance)
fusion->drv_map_sz = sizeof(struct MR_DRV_RAID_MAP_ALL);
}
+/*
+ * dcmd.opcode - MR_DCMD_CTRL_SNAPDUMP_GET_PROPERTIES
+ * dcmd.hdr.length - number of bytes to read
+ * dcmd.sge - Ptr to MR_SNAPDUMP_PROPERTIES
+ * Desc: Fill in snapdump properties
+ * Status: MFI_STAT_OK- Command successful
+ */
+void megasas_get_snapdump_properties(struct megasas_instance *instance)
+{
+ int ret = 0;
+ struct megasas_cmd *cmd;
+ struct megasas_dcmd_frame *dcmd;
+ struct MR_SNAPDUMP_PROPERTIES *ci;
+ dma_addr_t ci_h = 0;
+
+ ci = instance->snapdump_prop;
+ ci_h = instance->snapdump_prop_h;
+
+ if (!ci)
+ return;
+
+ cmd = megasas_get_cmd(instance);
+
+ if (!cmd) {
+ dev_dbg(&instance->pdev->dev, "Failed to get a free cmd\n");
+ return;
+ }
+
+ dcmd = &cmd->frame->dcmd;
+
+ memset(ci, 0, sizeof(*ci));
+ memset(dcmd->mbox.b, 0, MFI_MBOX_SIZE);
+
+ dcmd->cmd = MFI_CMD_DCMD;
+ dcmd->cmd_status = MFI_STAT_INVALID_STATUS;
+ dcmd->sge_count = 1;
+ dcmd->flags = MFI_FRAME_DIR_READ;
+ dcmd->timeout = 0;
+ dcmd->pad_0 = 0;
+ dcmd->data_xfer_len = cpu_to_le32(sizeof(struct MR_SNAPDUMP_PROPERTIES));
+ dcmd->opcode = cpu_to_le32(MR_DCMD_CTRL_SNAPDUMP_GET_PROPERTIES);
+
+ megasas_set_dma_settings(instance, dcmd, ci_h,
+ sizeof(struct MR_SNAPDUMP_PROPERTIES));
+
+ if (!instance->mask_interrupts) {
+ ret = megasas_issue_blocked_cmd(instance, cmd,
+ MFI_IO_TIMEOUT_SECS);
+ } else {
+ ret = megasas_issue_polled(instance, cmd);
+ cmd->flags |= DRV_DCMD_SKIP_REFIRE;
+ }
+
+ switch (ret) {
+ case DCMD_SUCCESS:
+ instance->snapdump_wait_time =
+ min_t(u8, ci->trigger_min_num_sec_before_ocr,
+ MEGASAS_MAX_SNAP_DUMP_WAIT_TIME);
+ break;
+
+ case DCMD_TIMEOUT:
+ switch (dcmd_timeout_ocr_possible(instance)) {
+ case INITIATE_OCR:
+ cmd->flags |= DRV_DCMD_SKIP_REFIRE;
+ megasas_reset_fusion(instance->host,
+ MFI_IO_TIMEOUT_OCR);
+ break;
+ case KILL_ADAPTER:
+ megaraid_sas_kill_hba(instance);
+ break;
+ case IGNORE_TIMEOUT:
+ dev_info(&instance->pdev->dev, "Ignore DCMD timeout: %s %d\n",
+ __func__, __LINE__);
+ break;
+ }
+ }
+
+ if (ret != DCMD_TIMEOUT)
+ megasas_return_cmd(instance, cmd);
+}
+
/**
* megasas_get_controller_info - Returns FW's controller structure
* @instance: Adapter soft state
@@ -4720,6 +4834,7 @@ megasas_get_ctrl_info(struct megasas_instance *instance)
* CPU endianness format.
*/
le32_to_cpus((u32 *)&ci->properties.OnOffProperties);
+ le16_to_cpus((u16 *)&ci->properties.on_off_properties2);
le32_to_cpus((u32 *)&ci->adapterOperations2);
le32_to_cpus((u32 *)&ci->adapterOperations3);
le16_to_cpus((u16 *)&ci->adapter_operations4);
@@ -4741,6 +4856,11 @@ megasas_get_ctrl_info(struct megasas_instance *instance)
/*Check whether controller is iMR or MR */
instance->is_imr = (ci->memory_size ? 0 : 1);
+
+ instance->snapdump_wait_time =
+ (ci->properties.on_off_properties2.enable_snap_dump ?
+ MEGASAS_DEFAULT_SNAP_DUMP_WAIT_TIME : 0);
+
dev_info(&instance->pdev->dev,
"controller type\t: %s(%dMB)\n",
instance->is_imr ? "iMR" : "MR",
@@ -4942,16 +5062,13 @@ fail_fw_init:
static u32
megasas_init_adapter_mfi(struct megasas_instance *instance)
{
- struct megasas_register_set __iomem *reg_set;
u32 context_sz;
u32 reply_q_sz;
- reg_set = instance->reg_set;
-
/*
* Get various operational parameters from status register
*/
- instance->max_fw_cmds = instance->instancet->read_fw_status_reg(reg_set) & 0x00FFFF;
+ instance->max_fw_cmds = instance->instancet->read_fw_status_reg(instance) & 0x00FFFF;
/*
* Reduce the max supported cmds by 1. This is to ensure that the
* reply_q_sz (1 more than the max cmd that driver may send)
@@ -4959,7 +5076,7 @@ megasas_init_adapter_mfi(struct megasas_instance *instance)
*/
instance->max_fw_cmds = instance->max_fw_cmds-1;
instance->max_mfi_cmds = instance->max_fw_cmds;
- instance->max_num_sge = (instance->instancet->read_fw_status_reg(reg_set) & 0xFF0000) >>
+ instance->max_num_sge = (instance->instancet->read_fw_status_reg(instance) & 0xFF0000) >>
0x10;
/*
* For MFI skinny adapters, MEGASAS_SKINNY_INT_CMDS commands
@@ -5015,7 +5132,7 @@ megasas_init_adapter_mfi(struct megasas_instance *instance)
instance->fw_support_ieee = 0;
instance->fw_support_ieee =
- (instance->instancet->read_fw_status_reg(reg_set) &
+ (instance->instancet->read_fw_status_reg(instance) &
0x04000000);
dev_notice(&instance->pdev->dev, "megasas_init_mfi: fw_support_ieee=%d",
@@ -5213,14 +5330,14 @@ static int megasas_init_fw(struct megasas_instance *instance)
{
u32 max_sectors_1;
u32 max_sectors_2, tmp_sectors, msix_enable;
- u32 scratch_pad_2, scratch_pad_3, scratch_pad_4;
+ u32 scratch_pad_1, scratch_pad_2, scratch_pad_3, status_reg;
resource_size_t base_addr;
- struct megasas_register_set __iomem *reg_set;
struct megasas_ctrl_info *ctrl_info = NULL;
unsigned long bar_list;
int i, j, loop, fw_msix_count = 0;
struct IOV_111 *iovPtr;
struct fusion_context *fusion;
+ bool do_adp_reset = true;
fusion = instance->ctrl_context;
@@ -5241,8 +5358,6 @@ static int megasas_init_fw(struct megasas_instance *instance)
goto fail_ioremap;
}
- reg_set = instance->reg_set;
-
if (instance->adapter_type != MFI_SERIES)
instance->instancet = &megasas_instance_template_fusion;
else {
@@ -5269,19 +5384,29 @@ static int megasas_init_fw(struct megasas_instance *instance)
}
if (megasas_transition_to_ready(instance, 0)) {
- atomic_set(&instance->fw_reset_no_pci_access, 1);
- instance->instancet->adp_reset
- (instance, instance->reg_set);
- atomic_set(&instance->fw_reset_no_pci_access, 0);
- dev_info(&instance->pdev->dev,
- "FW restarted successfully from %s!\n",
- __func__);
+ if (instance->adapter_type >= INVADER_SERIES) {
+ status_reg = instance->instancet->read_fw_status_reg(
+ instance);
+ do_adp_reset = status_reg & MFI_RESET_ADAPTER;
+ }
- /*waitting for about 30 second before retry*/
- ssleep(30);
+ if (do_adp_reset) {
+ atomic_set(&instance->fw_reset_no_pci_access, 1);
+ instance->instancet->adp_reset
+ (instance, instance->reg_set);
+ atomic_set(&instance->fw_reset_no_pci_access, 0);
+ dev_info(&instance->pdev->dev,
+ "FW restarted successfully from %s!\n",
+ __func__);
+
+ /*waiting for about 30 second before retry*/
+ ssleep(30);
- if (megasas_transition_to_ready(instance, 0))
+ if (megasas_transition_to_ready(instance, 0))
+ goto fail_ready_state;
+ } else {
goto fail_ready_state;
+ }
}
megasas_init_ctrl_params(instance);
@@ -5297,38 +5422,57 @@ static int megasas_init_fw(struct megasas_instance *instance)
fusion = instance->ctrl_context;
- if (instance->adapter_type == VENTURA_SERIES) {
- scratch_pad_3 =
- readl(&instance->reg_set->outbound_scratch_pad_3);
- instance->max_raid_mapsize = ((scratch_pad_3 >>
+ if (instance->adapter_type >= VENTURA_SERIES) {
+ scratch_pad_2 =
+ megasas_readl(instance,
+ &instance->reg_set->outbound_scratch_pad_2);
+ instance->max_raid_mapsize = ((scratch_pad_2 >>
MR_MAX_RAID_MAP_SIZE_OFFSET_SHIFT) &
MR_MAX_RAID_MAP_SIZE_MASK);
}
/* Check if MSI-X is supported while in ready state */
- msix_enable = (instance->instancet->read_fw_status_reg(reg_set) &
+ msix_enable = (instance->instancet->read_fw_status_reg(instance) &
0x4000000) >> 0x1a;
if (msix_enable && !msix_disable) {
int irq_flags = PCI_IRQ_MSIX;
- scratch_pad_2 = readl
- (&instance->reg_set->outbound_scratch_pad_2);
+ scratch_pad_1 = megasas_readl
+ (instance, &instance->reg_set->outbound_scratch_pad_1);
/* Check max MSI-X vectors */
if (fusion) {
if (instance->adapter_type == THUNDERBOLT_SERIES) {
/* Thunderbolt Series*/
- instance->msix_vectors = (scratch_pad_2
+ instance->msix_vectors = (scratch_pad_1
& MR_MAX_REPLY_QUEUES_OFFSET) + 1;
fw_msix_count = instance->msix_vectors;
- } else { /* Invader series supports more than 8 MSI-x vectors*/
- instance->msix_vectors = ((scratch_pad_2
+ } else {
+ instance->msix_vectors = ((scratch_pad_1
& MR_MAX_REPLY_QUEUES_EXT_OFFSET)
>> MR_MAX_REPLY_QUEUES_EXT_OFFSET_SHIFT) + 1;
- if (instance->msix_vectors > 16)
- instance->msix_combined = true;
+
+ /*
+ * For Invader series, > 8 MSI-x vectors
+ * supported by FW/HW implies combined
+ * reply queue mode is enabled.
+ * For Ventura series, > 16 MSI-x vectors
+ * supported by FW/HW implies combined
+ * reply queue mode is enabled.
+ */
+ switch (instance->adapter_type) {
+ case INVADER_SERIES:
+ if (instance->msix_vectors > 8)
+ instance->msix_combined = true;
+ break;
+ case AERO_SERIES:
+ case VENTURA_SERIES:
+ if (instance->msix_vectors > 16)
+ instance->msix_combined = true;
+ break;
+ }
if (rdpq_enable)
- instance->is_rdpq = (scratch_pad_2 & MR_RDPQ_MODE_OFFSET) ?
+ instance->is_rdpq = (scratch_pad_1 & MR_RDPQ_MODE_OFFSET) ?
1 : 0;
fw_msix_count = instance->msix_vectors;
/* Save 1-15 reply post index address to local memory
@@ -5377,7 +5521,7 @@ static int megasas_init_fw(struct megasas_instance *instance)
if (!instance->msix_vectors) {
i = pci_alloc_irq_vectors(instance->pdev, 1, 1, PCI_IRQ_LEGACY);
if (i < 0)
- goto fail_setup_irqs;
+ goto fail_init_adapter;
}
megasas_setup_reply_map(instance);
@@ -5403,13 +5547,14 @@ static int megasas_init_fw(struct megasas_instance *instance)
if (instance->instancet->init_adapter(instance))
goto fail_init_adapter;
- if (instance->adapter_type == VENTURA_SERIES) {
- scratch_pad_4 =
- readl(&instance->reg_set->outbound_scratch_pad_4);
- if ((scratch_pad_4 & MR_NVME_PAGE_SIZE_MASK) >=
+ if (instance->adapter_type >= VENTURA_SERIES) {
+ scratch_pad_3 =
+ megasas_readl(instance,
+ &instance->reg_set->outbound_scratch_pad_3);
+ if ((scratch_pad_3 & MR_NVME_PAGE_SIZE_MASK) >=
MR_DEFAULT_NVME_PAGE_SHIFT)
instance->nvme_page_size =
- (1 << (scratch_pad_4 & MR_NVME_PAGE_SIZE_MASK));
+ (1 << (scratch_pad_3 & MR_NVME_PAGE_SIZE_MASK));
dev_info(&instance->pdev->dev,
"NVME page size\t: (%d)\n", instance->nvme_page_size);
@@ -5439,7 +5584,7 @@ static int megasas_init_fw(struct megasas_instance *instance)
memset(instance->ld_ids, 0xff, MEGASAS_MAX_LD_IDS);
/* stream detection initialization */
- if (instance->adapter_type == VENTURA_SERIES) {
+ if (instance->adapter_type >= VENTURA_SERIES) {
fusion->stream_detect_by_ld =
kcalloc(MAX_LOGICAL_DRIVES_EXT,
sizeof(struct LD_STREAM_DETECT *),
@@ -5539,6 +5684,11 @@ static int megasas_init_fw(struct megasas_instance *instance)
instance->crash_dump_buf = NULL;
}
+ if (instance->snapdump_wait_time) {
+ megasas_get_snapdump_properties(instance);
+ dev_info(&instance->pdev->dev, "Snap dump wait time\t: %d\n",
+ instance->snapdump_wait_time);
+ }
dev_info(&instance->pdev->dev,
"pci id\t\t: (0x%04x)/(0x%04x)/(0x%04x)/(0x%04x)\n",
@@ -5553,7 +5703,6 @@ static int megasas_init_fw(struct megasas_instance *instance)
dev_info(&instance->pdev->dev, "jbod sync map : %s\n",
instance->use_seqnum_jbod_fp ? "yes" : "no");
-
instance->max_sectors_per_req = instance->max_num_sge *
SGE_BUFFER_SIZE / 512;
if (tmp_sectors && (instance->max_sectors_per_req > tmp_sectors))
@@ -5576,19 +5725,32 @@ static int megasas_init_fw(struct megasas_instance *instance)
/* Launch SR-IOV heartbeat timer */
if (instance->requestorId) {
- if (!megasas_sriov_start_heartbeat(instance, 1))
+ if (!megasas_sriov_start_heartbeat(instance, 1)) {
megasas_start_timer(instance);
- else
+ } else {
instance->skip_heartbeat_timer_del = 1;
+ goto fail_get_ld_pd_list;
+ }
}
+ /*
+ * Create and start watchdog thread which will monitor
+ * controller state every 1 sec and trigger OCR when
+ * it enters fault state
+ */
+ if (instance->adapter_type != MFI_SERIES)
+ if (megasas_fusion_start_watchdog(instance) != SUCCESS)
+ goto fail_start_watchdog;
+
return 0;
+fail_start_watchdog:
+ if (instance->requestorId && !instance->skip_heartbeat_timer_del)
+ del_timer_sync(&instance->sriov_heartbeat_timer);
fail_get_ld_pd_list:
instance->instancet->disable_intr(instance);
-fail_init_adapter:
megasas_destroy_irqs(instance);
-fail_setup_irqs:
+fail_init_adapter:
if (instance->msix_vectors)
pci_free_irq_vectors(instance->pdev);
instance->msix_vectors = 0;
@@ -6022,13 +6184,13 @@ static int megasas_io_attach(struct megasas_instance *instance)
* @instance: Adapter soft state
* Description:
*
- * For Ventura, driver/FW will operate in 64bit DMA addresses.
+ * For Ventura, driver/FW will operate in 63bit DMA addresses.
*
* For invader-
* By default, driver/FW will operate in 32bit DMA addresses
* for consistent DMA mapping but if 32 bit consistent
- * DMA mask fails, driver will try with 64 bit consistent
- * mask provided FW is true 64bit DMA capable
+ * DMA mask fails, driver will try with 63 bit consistent
+ * mask provided FW is true 63bit DMA capable
*
* For older controllers(Thunderbolt and MFI based adapters)-
* driver/FW will operate in 32 bit consistent DMA addresses.
@@ -6038,31 +6200,31 @@ megasas_set_dma_mask(struct megasas_instance *instance)
{
u64 consistent_mask;
struct pci_dev *pdev;
- u32 scratch_pad_2;
+ u32 scratch_pad_1;
pdev = instance->pdev;
- consistent_mask = (instance->adapter_type == VENTURA_SERIES) ?
- DMA_BIT_MASK(64) : DMA_BIT_MASK(32);
+ consistent_mask = (instance->adapter_type >= VENTURA_SERIES) ?
+ DMA_BIT_MASK(63) : DMA_BIT_MASK(32);
if (IS_DMA64) {
- if (dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)) &&
+ if (dma_set_mask(&pdev->dev, DMA_BIT_MASK(63)) &&
dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)))
goto fail_set_dma_mask;
- if ((*pdev->dev.dma_mask == DMA_BIT_MASK(64)) &&
+ if ((*pdev->dev.dma_mask == DMA_BIT_MASK(63)) &&
(dma_set_coherent_mask(&pdev->dev, consistent_mask) &&
dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)))) {
/*
* If 32 bit DMA mask fails, then try for 64 bit mask
* for FW capable of handling 64 bit DMA.
*/
- scratch_pad_2 = readl
- (&instance->reg_set->outbound_scratch_pad_2);
+ scratch_pad_1 = megasas_readl
+ (instance, &instance->reg_set->outbound_scratch_pad_1);
- if (!(scratch_pad_2 & MR_CAN_HANDLE_64_BIT_DMA_OFFSET))
+ if (!(scratch_pad_1 & MR_CAN_HANDLE_64_BIT_DMA_OFFSET))
goto fail_set_dma_mask;
else if (dma_set_mask_and_coherent(&pdev->dev,
- DMA_BIT_MASK(64)))
+ DMA_BIT_MASK(63)))
goto fail_set_dma_mask;
}
} else if (dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)))
@@ -6074,8 +6236,8 @@ megasas_set_dma_mask(struct megasas_instance *instance)
instance->consistent_mask_64bit = true;
dev_info(&pdev->dev, "%s bit DMA mask and %s bit consistent mask\n",
- ((*pdev->dev.dma_mask == DMA_BIT_MASK(64)) ? "64" : "32"),
- (instance->consistent_mask_64bit ? "64" : "32"));
+ ((*pdev->dev.dma_mask == DMA_BIT_MASK(64)) ? "63" : "32"),
+ (instance->consistent_mask_64bit ? "63" : "32"));
return 0;
@@ -6088,12 +6250,14 @@ fail_set_dma_mask:
/*
* megasas_set_adapter_type - Set adapter type.
* Supported controllers can be divided in
- * 4 categories- enum MR_ADAPTER_TYPE {
- * MFI_SERIES = 1,
- * THUNDERBOLT_SERIES = 2,
- * INVADER_SERIES = 3,
- * VENTURA_SERIES = 4,
- * };
+ * different categories-
+ * enum MR_ADAPTER_TYPE {
+ * MFI_SERIES = 1,
+ * THUNDERBOLT_SERIES = 2,
+ * INVADER_SERIES = 3,
+ * VENTURA_SERIES = 4,
+ * AERO_SERIES = 5,
+ * };
* @instance: Adapter soft state
* return: void
*/
@@ -6104,6 +6268,12 @@ static inline void megasas_set_adapter_type(struct megasas_instance *instance)
instance->adapter_type = MFI_SERIES;
} else {
switch (instance->pdev->device) {
+ case PCI_DEVICE_ID_LSI_AERO_10E1:
+ case PCI_DEVICE_ID_LSI_AERO_10E2:
+ case PCI_DEVICE_ID_LSI_AERO_10E5:
+ case PCI_DEVICE_ID_LSI_AERO_10E6:
+ instance->adapter_type = AERO_SERIES;
+ break;
case PCI_DEVICE_ID_LSI_VENTURA:
case PCI_DEVICE_ID_LSI_CRUSADER:
case PCI_DEVICE_ID_LSI_HARPOON:
@@ -6171,6 +6341,7 @@ static int megasas_alloc_ctrl_mem(struct megasas_instance *instance)
if (megasas_alloc_mfi_ctrl_mem(instance))
goto fail;
break;
+ case AERO_SERIES:
case VENTURA_SERIES:
case THUNDERBOLT_SERIES:
case INVADER_SERIES:
@@ -6245,6 +6416,14 @@ int megasas_alloc_ctrl_dma_buffers(struct megasas_instance *instance)
"Failed to allocate PD list buffer\n");
return -ENOMEM;
}
+
+ instance->snapdump_prop = dma_alloc_coherent(&pdev->dev,
+ sizeof(struct MR_SNAPDUMP_PROPERTIES),
+ &instance->snapdump_prop_h, GFP_KERNEL);
+
+ if (!instance->snapdump_prop)
+ dev_err(&pdev->dev,
+ "Failed to allocate snapdump properties buffer\n");
}
instance->pd_list_buf =
@@ -6388,6 +6567,12 @@ void megasas_free_ctrl_dma_buffers(struct megasas_instance *instance)
dma_free_coherent(&pdev->dev, CRASH_DMA_BUF_SIZE,
instance->crash_dump_buf,
instance->crash_dump_h);
+
+ if (instance->snapdump_prop)
+ dma_free_coherent(&pdev->dev,
+ sizeof(struct MR_SNAPDUMP_PROPERTIES),
+ instance->snapdump_prop,
+ instance->snapdump_prop_h);
}
/*
@@ -6434,12 +6619,10 @@ static inline void megasas_init_ctrl_params(struct megasas_instance *instance)
instance->disableOnlineCtrlReset = 1;
instance->UnevenSpanSupport = 0;
- if (instance->adapter_type != MFI_SERIES) {
+ if (instance->adapter_type != MFI_SERIES)
INIT_WORK(&instance->work_init, megasas_fusion_ocr_wq);
- INIT_WORK(&instance->crash_init, megasas_fusion_crash_dump_wq);
- } else {
+ else
INIT_WORK(&instance->work_init, process_fw_state_change_wq);
- }
}
/**
@@ -6455,6 +6638,13 @@ static int megasas_probe_one(struct pci_dev *pdev,
struct megasas_instance *instance;
u16 control = 0;
+ switch (pdev->device) {
+ case PCI_DEVICE_ID_LSI_AERO_10E1:
+ case PCI_DEVICE_ID_LSI_AERO_10E5:
+ dev_info(&pdev->dev, "Adapter is in configurable secure mode\n");
+ break;
+ }
+
/* Reset MSI-X in the kdump kernel */
if (reset_devices) {
pos = pci_find_capability(pdev, PCI_CAP_ID_MSIX);
@@ -6708,6 +6898,10 @@ megasas_suspend(struct pci_dev *pdev, pm_message_t state)
if (instance->requestorId && !instance->skip_heartbeat_timer_del)
del_timer_sync(&instance->sriov_heartbeat_timer);
+ /* Stop the FW fault detection watchdog */
+ if (instance->adapter_type != MFI_SERIES)
+ megasas_fusion_stop_watchdog(instance);
+
megasas_flush_cache(instance);
megasas_shutdown_controller(instance, MR_DCMD_HIBERNATE_SHUTDOWN);
@@ -6843,8 +7037,16 @@ megasas_resume(struct pci_dev *pdev)
if (megasas_start_aen(instance))
dev_err(&instance->pdev->dev, "Start AEN failed\n");
+ /* Re-launch FW fault watchdog */
+ if (instance->adapter_type != MFI_SERIES)
+ if (megasas_fusion_start_watchdog(instance) != SUCCESS)
+ goto fail_start_watchdog;
+
return 0;
+fail_start_watchdog:
+ if (instance->requestorId && !instance->skip_heartbeat_timer_del)
+ del_timer_sync(&instance->sriov_heartbeat_timer);
fail_init_mfi:
megasas_free_ctrl_dma_buffers(instance);
megasas_free_ctrl_mem(instance);
@@ -6912,6 +7114,10 @@ static void megasas_detach_one(struct pci_dev *pdev)
if (instance->requestorId && !instance->skip_heartbeat_timer_del)
del_timer_sync(&instance->sriov_heartbeat_timer);
+ /* Stop the FW fault detection watchdog */
+ if (instance->adapter_type != MFI_SERIES)
+ megasas_fusion_stop_watchdog(instance);
+
if (instance->fw_crash_state != UNAVAILABLE)
megasas_free_host_crash_buffer(instance);
scsi_remove_host(instance->host);
@@ -6956,7 +7162,7 @@ skip_firing_dcmds:
if (instance->msix_vectors)
pci_free_irq_vectors(instance->pdev);
- if (instance->adapter_type == VENTURA_SERIES) {
+ if (instance->adapter_type >= VENTURA_SERIES) {
for (i = 0; i < MAX_LOGICAL_DRIVES_EXT; ++i)
kfree(fusion->stream_detect_by_ld[i]);
kfree(fusion->stream_detect_by_ld);
@@ -7737,8 +7943,15 @@ megasas_aen_polling(struct work_struct *work)
break;
case MR_EVT_CTRL_PROP_CHANGED:
- dcmd_ret = megasas_get_ctrl_info(instance);
- break;
+ dcmd_ret = megasas_get_ctrl_info(instance);
+ if (dcmd_ret == DCMD_SUCCESS &&
+ instance->snapdump_wait_time) {
+ megasas_get_snapdump_properties(instance);
+ dev_info(&instance->pdev->dev,
+ "Snap dump wait time\t: %d\n",
+ instance->snapdump_wait_time);
+ }
+ break;
default:
doscan = 0;
break;
diff --git a/drivers/scsi/megaraid/megaraid_sas_fp.c b/drivers/scsi/megaraid/megaraid_sas_fp.c
index 59ecbb3b53b5..87c2c0472c8f 100644
--- a/drivers/scsi/megaraid/megaraid_sas_fp.c
+++ b/drivers/scsi/megaraid/megaraid_sas_fp.c
@@ -2,7 +2,8 @@
* Linux MegaRAID driver for SAS based RAID controllers
*
* Copyright (c) 2009-2013 LSI Corporation
- * Copyright (c) 2013-2014 Avago Technologies
+ * Copyright (c) 2013-2016 Avago Technologies
+ * Copyright (c) 2016-2018 Broadcom Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -19,17 +20,14 @@
*
* FILE: megaraid_sas_fp.c
*
- * Authors: Avago Technologies
+ * Authors: Broadcom Inc.
* Sumant Patro
* Varad Talamacki
* Manoj Jose
- * Kashyap Desai <kashyap.desai@avagotech.com>
- * Sumit Saxena <sumit.saxena@avagotech.com>
+ * Kashyap Desai <kashyap.desai@broadcom.com>
+ * Sumit Saxena <sumit.saxena@broadcom.com>
*
- * Send feedback to: megaraidlinux.pdl@avagotech.com
- *
- * Mail to: Avago Technologies, 350 West Trimble Road, Building 90,
- * San Jose, California 95131
+ * Send feedback to: megaraidlinux.pdl@broadcom.com
*/
#include <linux/kernel.h>
@@ -745,7 +743,7 @@ static u8 mr_spanset_get_phy_params(struct megasas_instance *instance, u32 ld,
*pDevHandle = MR_PdDevHandleGet(pd, map);
*pPdInterface = MR_PdInterfaceTypeGet(pd, map);
/* get second pd also for raid 1/10 fast path writes*/
- if ((instance->adapter_type == VENTURA_SERIES) &&
+ if ((instance->adapter_type >= VENTURA_SERIES) &&
(raid->level == 1) &&
!io_info->isRead) {
r1_alt_pd = MR_ArPdGet(arRef, physArm + 1, map);
@@ -770,7 +768,7 @@ static u8 mr_spanset_get_phy_params(struct megasas_instance *instance, u32 ld,
}
*pdBlock += stripRef + le64_to_cpu(MR_LdSpanPtrGet(ld, span, map)->startBlk);
- if (instance->adapter_type == VENTURA_SERIES) {
+ if (instance->adapter_type >= VENTURA_SERIES) {
((struct RAID_CONTEXT_G35 *)pRAID_Context)->span_arm =
(span << RAID_CTX_SPANARM_SPAN_SHIFT) | physArm;
io_info->span_arm =
@@ -861,7 +859,7 @@ u8 MR_GetPhyParams(struct megasas_instance *instance, u32 ld, u64 stripRow,
*pDevHandle = MR_PdDevHandleGet(pd, map);
*pPdInterface = MR_PdInterfaceTypeGet(pd, map);
/* get second pd also for raid 1/10 fast path writes*/
- if ((instance->adapter_type == VENTURA_SERIES) &&
+ if ((instance->adapter_type >= VENTURA_SERIES) &&
(raid->level == 1) &&
!io_info->isRead) {
r1_alt_pd = MR_ArPdGet(arRef, physArm + 1, map);
@@ -888,7 +886,7 @@ u8 MR_GetPhyParams(struct megasas_instance *instance, u32 ld, u64 stripRow,
}
*pdBlock += stripRef + le64_to_cpu(MR_LdSpanPtrGet(ld, span, map)->startBlk);
- if (instance->adapter_type == VENTURA_SERIES) {
+ if (instance->adapter_type >= VENTURA_SERIES) {
((struct RAID_CONTEXT_G35 *)pRAID_Context)->span_arm =
(span << RAID_CTX_SPANARM_SPAN_SHIFT) | physArm;
io_info->span_arm =
@@ -1266,7 +1264,7 @@ void mr_update_load_balance_params(struct MR_DRV_RAID_MAP_ALL *drv_map,
for (ldCount = 0; ldCount < MAX_LOGICAL_DRIVES_EXT; ldCount++) {
ld = MR_TargetIdToLdGet(ldCount, drv_map);
- if (ld >= MAX_LOGICAL_DRIVES_EXT) {
+ if (ld >= MAX_LOGICAL_DRIVES_EXT - 1) {
lbInfo[ldCount].loadBalanceFlag = 0;
continue;
}
diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c
index f74b5ea24f0f..211c17c33aa0 100644
--- a/drivers/scsi/megaraid/megaraid_sas_fusion.c
+++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c
@@ -2,7 +2,8 @@
* Linux MegaRAID driver for SAS based RAID controllers
*
* Copyright (c) 2009-2013 LSI Corporation
- * Copyright (c) 2013-2014 Avago Technologies
+ * Copyright (c) 2013-2016 Avago Technologies
+ * Copyright (c) 2016-2018 Broadcom Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -19,16 +20,13 @@
*
* FILE: megaraid_sas_fusion.c
*
- * Authors: Avago Technologies
+ * Authors: Broadcom Inc.
* Sumant Patro
* Adam Radford
- * Kashyap Desai <kashyap.desai@avagotech.com>
- * Sumit Saxena <sumit.saxena@avagotech.com>
+ * Kashyap Desai <kashyap.desai@broadcom.com>
+ * Sumit Saxena <sumit.saxena@broadcom.com>
*
- * Send feedback to: megaraidlinux.pdl@avagotech.com
- *
- * Mail to: Avago Technologies, 350 West Trimble Road, Building 90,
- * San Jose, California 95131
+ * Send feedback to: megaraidlinux.pdl@broadcom.com
*/
#include <linux/kernel.h>
@@ -48,6 +46,7 @@
#include <linux/mutex.h>
#include <linux/poll.h>
#include <linux/vmalloc.h>
+#include <linux/workqueue.h>
#include <scsi/scsi.h>
#include <scsi/scsi_cmnd.h>
@@ -74,7 +73,7 @@ void
megasas_return_cmd(struct megasas_instance *instance, struct megasas_cmd *cmd);
int megasas_alloc_cmds(struct megasas_instance *instance);
int
-megasas_clear_intr_fusion(struct megasas_register_set __iomem *regs);
+megasas_clear_intr_fusion(struct megasas_instance *instance);
int
megasas_issue_polled(struct megasas_instance *instance,
struct megasas_cmd *cmd);
@@ -95,6 +94,9 @@ static void megasas_free_rdpq_fusion(struct megasas_instance *instance);
static void megasas_free_reply_fusion(struct megasas_instance *instance);
static inline
void megasas_configure_queue_sizes(struct megasas_instance *instance);
+static void megasas_fusion_crash_dump(struct megasas_instance *instance);
+extern u32 megasas_readl(struct megasas_instance *instance,
+ const volatile void __iomem *addr);
/**
* megasas_check_same_4gb_region - check if allocation
@@ -165,9 +167,11 @@ megasas_disable_intr_fusion(struct megasas_instance *instance)
}
int
-megasas_clear_intr_fusion(struct megasas_register_set __iomem *regs)
+megasas_clear_intr_fusion(struct megasas_instance *instance)
{
u32 status;
+ struct megasas_register_set __iomem *regs;
+ regs = instance->reg_set;
/*
* Check if it is our interrupt
*/
@@ -262,16 +266,17 @@ megasas_fusion_update_can_queue(struct megasas_instance *instance, int fw_boot_c
reg_set = instance->reg_set;
- /* ventura FW does not fill outbound_scratch_pad_3 with queue depth */
+ /* ventura FW does not fill outbound_scratch_pad_2 with queue depth */
if (instance->adapter_type < VENTURA_SERIES)
cur_max_fw_cmds =
- readl(&instance->reg_set->outbound_scratch_pad_3) & 0x00FFFF;
+ megasas_readl(instance,
+ &instance->reg_set->outbound_scratch_pad_2) & 0x00FFFF;
if (dual_qdepth_disable || !cur_max_fw_cmds)
- cur_max_fw_cmds = instance->instancet->read_fw_status_reg(reg_set) & 0x00FFFF;
+ cur_max_fw_cmds = instance->instancet->read_fw_status_reg(instance) & 0x00FFFF;
else
ldio_threshold =
- (instance->instancet->read_fw_status_reg(reg_set) & 0x00FFFF) - MEGASAS_FUSION_IOCTL_CMDS;
+ (instance->instancet->read_fw_status_reg(instance) & 0x00FFFF) - MEGASAS_FUSION_IOCTL_CMDS;
dev_info(&instance->pdev->dev,
"Current firmware supports maximum commands: %d\t LDIO threshold: %d\n",
@@ -807,10 +812,8 @@ megasas_free_rdpq_fusion(struct megasas_instance *instance) {
}
- if (fusion->reply_frames_desc_pool)
- dma_pool_destroy(fusion->reply_frames_desc_pool);
- if (fusion->reply_frames_desc_pool_align)
- dma_pool_destroy(fusion->reply_frames_desc_pool_align);
+ dma_pool_destroy(fusion->reply_frames_desc_pool);
+ dma_pool_destroy(fusion->reply_frames_desc_pool_align);
if (fusion->rdpq_virt)
dma_free_coherent(&instance->pdev->dev,
@@ -830,8 +833,7 @@ megasas_free_reply_fusion(struct megasas_instance *instance) {
fusion->reply_frames_desc[0],
fusion->reply_frames_desc_phys[0]);
- if (fusion->reply_frames_desc_pool)
- dma_pool_destroy(fusion->reply_frames_desc_pool);
+ dma_pool_destroy(fusion->reply_frames_desc_pool);
}
@@ -974,7 +976,7 @@ megasas_ioc_init_fusion(struct megasas_instance *instance)
struct megasas_header *frame_hdr;
const char *sys_info;
MFI_CAPABILITIES *drv_ops;
- u32 scratch_pad_2;
+ u32 scratch_pad_1;
ktime_t time;
bool cur_fw_64bit_dma_capable;
@@ -985,14 +987,14 @@ megasas_ioc_init_fusion(struct megasas_instance *instance)
cmd = fusion->ioc_init_cmd;
- scratch_pad_2 = readl
- (&instance->reg_set->outbound_scratch_pad_2);
+ scratch_pad_1 = megasas_readl
+ (instance, &instance->reg_set->outbound_scratch_pad_1);
- cur_rdpq_mode = (scratch_pad_2 & MR_RDPQ_MODE_OFFSET) ? 1 : 0;
+ cur_rdpq_mode = (scratch_pad_1 & MR_RDPQ_MODE_OFFSET) ? 1 : 0;
if (instance->adapter_type == INVADER_SERIES) {
cur_fw_64bit_dma_capable =
- (scratch_pad_2 & MR_CAN_HANDLE_64_BIT_DMA_OFFSET) ? true : false;
+ (scratch_pad_1 & MR_CAN_HANDLE_64_BIT_DMA_OFFSET) ? true : false;
if (instance->consistent_mask_64bit && !cur_fw_64bit_dma_capable) {
dev_err(&instance->pdev->dev, "Driver was operating on 64bit "
@@ -1010,7 +1012,7 @@ megasas_ioc_init_fusion(struct megasas_instance *instance)
goto fail_fw_init;
}
- instance->fw_sync_cache_support = (scratch_pad_2 &
+ instance->fw_sync_cache_support = (scratch_pad_1 &
MR_CAN_HANDLE_SYNC_CACHE_OFFSET) ? 1 : 0;
dev_info(&instance->pdev->dev, "FW supports sync cache\t: %s\n",
instance->fw_sync_cache_support ? "Yes" : "No");
@@ -1043,9 +1045,7 @@ megasas_ioc_init_fusion(struct megasas_instance *instance)
frame_hdr = &cmd->frame->hdr;
frame_hdr->cmd_status = 0xFF;
- frame_hdr->flags = cpu_to_le16(
- le16_to_cpu(frame_hdr->flags) |
- MFI_FRAME_DONT_POST_IN_REPLY_QUEUE);
+ frame_hdr->flags |= cpu_to_le16(MFI_FRAME_DONT_POST_IN_REPLY_QUEUE);
init_frame->cmd = MFI_CMD_INIT;
init_frame->cmd_status = 0xFF;
@@ -1107,7 +1107,7 @@ megasas_ioc_init_fusion(struct megasas_instance *instance)
instance->instancet->disable_intr(instance);
for (i = 0; i < (10 * 1000); i += 20) {
- if (readl(&instance->reg_set->doorbell) & 1)
+ if (megasas_readl(instance, &instance->reg_set->doorbell) & 1)
msleep(20);
else
break;
@@ -1115,7 +1115,7 @@ megasas_ioc_init_fusion(struct megasas_instance *instance)
megasas_fire_cmd_fusion(instance, &req_desc);
- wait_and_poll(instance, cmd, MFI_POLL_TIMEOUT_SECS);
+ wait_and_poll(instance, cmd, MFI_IO_TIMEOUT_SECS);
frame_hdr = &cmd->frame->hdr;
if (frame_hdr->cmd_status != 0) {
@@ -1559,14 +1559,12 @@ void megasas_configure_queue_sizes(struct megasas_instance *instance)
fusion = instance->ctrl_context;
max_cmd = instance->max_fw_cmds;
- if (instance->adapter_type == VENTURA_SERIES)
+ if (instance->adapter_type >= VENTURA_SERIES)
instance->max_mpt_cmds = instance->max_fw_cmds * RAID_1_PEER_CMDS;
else
instance->max_mpt_cmds = instance->max_fw_cmds;
- instance->max_scsi_cmds = instance->max_fw_cmds -
- (MEGASAS_FUSION_INTERNAL_CMDS +
- MEGASAS_FUSION_IOCTL_CMDS);
+ instance->max_scsi_cmds = instance->max_fw_cmds - instance->max_mfi_cmds;
instance->cur_can_queue = instance->max_scsi_cmds;
instance->host->can_queue = instance->cur_can_queue;
@@ -1627,8 +1625,7 @@ static inline void megasas_free_ioc_init_cmd(struct megasas_instance *instance)
fusion->ioc_init_cmd->frame,
fusion->ioc_init_cmd->frame_phys_addr);
- if (fusion->ioc_init_cmd)
- kfree(fusion->ioc_init_cmd);
+ kfree(fusion->ioc_init_cmd);
}
/**
@@ -1642,7 +1639,7 @@ megasas_init_adapter_fusion(struct megasas_instance *instance)
{
struct megasas_register_set __iomem *reg_set;
struct fusion_context *fusion;
- u32 scratch_pad_2;
+ u32 scratch_pad_1;
int i = 0, count;
fusion = instance->ctrl_context;
@@ -1659,20 +1656,21 @@ megasas_init_adapter_fusion(struct megasas_instance *instance)
megasas_configure_queue_sizes(instance);
- scratch_pad_2 = readl(&instance->reg_set->outbound_scratch_pad_2);
- /* If scratch_pad_2 & MEGASAS_MAX_CHAIN_SIZE_UNITS_MASK is set,
+ scratch_pad_1 = megasas_readl(instance,
+ &instance->reg_set->outbound_scratch_pad_1);
+ /* If scratch_pad_1 & MEGASAS_MAX_CHAIN_SIZE_UNITS_MASK is set,
* Firmware support extended IO chain frame which is 4 times more than
* legacy Firmware.
* Legacy Firmware - Frame size is (8 * 128) = 1K
* 1M IO Firmware - Frame size is (8 * 128 * 4) = 4K
*/
- if (scratch_pad_2 & MEGASAS_MAX_CHAIN_SIZE_UNITS_MASK)
+ if (scratch_pad_1 & MEGASAS_MAX_CHAIN_SIZE_UNITS_MASK)
instance->max_chain_frame_sz =
- ((scratch_pad_2 & MEGASAS_MAX_CHAIN_SIZE_MASK) >>
+ ((scratch_pad_1 & MEGASAS_MAX_CHAIN_SIZE_MASK) >>
MEGASAS_MAX_CHAIN_SHIFT) * MEGASAS_1MB_IO;
else
instance->max_chain_frame_sz =
- ((scratch_pad_2 & MEGASAS_MAX_CHAIN_SIZE_MASK) >>
+ ((scratch_pad_1 & MEGASAS_MAX_CHAIN_SIZE_MASK) >>
MEGASAS_MAX_CHAIN_SHIFT) * MEGASAS_256K_IO;
if (instance->max_chain_frame_sz < MEGASAS_CHAIN_FRAME_SZ_MIN) {
@@ -1760,6 +1758,90 @@ fail_alloc_mfi_cmds:
}
/**
+ * megasas_fault_detect_work - Worker function of
+ * FW fault handling workqueue.
+ */
+static void
+megasas_fault_detect_work(struct work_struct *work)
+{
+ struct megasas_instance *instance =
+ container_of(work, struct megasas_instance,
+ fw_fault_work.work);
+ u32 fw_state, dma_state, status;
+
+ /* Check the fw state */
+ fw_state = instance->instancet->read_fw_status_reg(instance) &
+ MFI_STATE_MASK;
+
+ if (fw_state == MFI_STATE_FAULT) {
+ dma_state = instance->instancet->read_fw_status_reg(instance) &
+ MFI_STATE_DMADONE;
+ /* Start collecting crash, if DMA bit is done */
+ if (instance->crash_dump_drv_support &&
+ instance->crash_dump_app_support && dma_state) {
+ megasas_fusion_crash_dump(instance);
+ } else {
+ if (instance->unload == 0) {
+ status = megasas_reset_fusion(instance->host, 0);
+ if (status != SUCCESS) {
+ dev_err(&instance->pdev->dev,
+ "Failed from %s %d, do not re-arm timer\n",
+ __func__, __LINE__);
+ return;
+ }
+ }
+ }
+ }
+
+ if (instance->fw_fault_work_q)
+ queue_delayed_work(instance->fw_fault_work_q,
+ &instance->fw_fault_work,
+ msecs_to_jiffies(MEGASAS_WATCHDOG_THREAD_INTERVAL));
+}
+
+int
+megasas_fusion_start_watchdog(struct megasas_instance *instance)
+{
+ /* Check if the Fault WQ is already started */
+ if (instance->fw_fault_work_q)
+ return SUCCESS;
+
+ INIT_DELAYED_WORK(&instance->fw_fault_work, megasas_fault_detect_work);
+
+ snprintf(instance->fault_handler_work_q_name,
+ sizeof(instance->fault_handler_work_q_name),
+ "poll_megasas%d_status", instance->host->host_no);
+
+ instance->fw_fault_work_q =
+ create_singlethread_workqueue(instance->fault_handler_work_q_name);
+ if (!instance->fw_fault_work_q) {
+ dev_err(&instance->pdev->dev, "Failed from %s %d\n",
+ __func__, __LINE__);
+ return FAILED;
+ }
+
+ queue_delayed_work(instance->fw_fault_work_q,
+ &instance->fw_fault_work,
+ msecs_to_jiffies(MEGASAS_WATCHDOG_THREAD_INTERVAL));
+
+ return SUCCESS;
+}
+
+void
+megasas_fusion_stop_watchdog(struct megasas_instance *instance)
+{
+ struct workqueue_struct *wq;
+
+ if (instance->fw_fault_work_q) {
+ wq = instance->fw_fault_work_q;
+ instance->fw_fault_work_q = NULL;
+ if (!cancel_delayed_work_sync(&instance->fw_fault_work))
+ flush_workqueue(wq);
+ destroy_workqueue(wq);
+ }
+}
+
+/**
* map_cmd_status - Maps FW cmd status to OS cmd status
* @cmd : Pointer to cmd
* @status : status of cmd returned by FW
@@ -2543,19 +2625,22 @@ megasas_build_ldio_fusion(struct megasas_instance *instance,
struct MR_DRV_RAID_MAP_ALL *local_map_ptr;
u8 *raidLUN;
unsigned long spinlock_flags;
- union RAID_CONTEXT_UNION *praid_context;
struct MR_LD_RAID *raid = NULL;
struct MR_PRIV_DEVICE *mrdev_priv;
+ struct RAID_CONTEXT *rctx;
+ struct RAID_CONTEXT_G35 *rctx_g35;
device_id = MEGASAS_DEV_INDEX(scp);
fusion = instance->ctrl_context;
io_request = cmd->io_request;
- io_request->RaidContext.raid_context.virtual_disk_tgt_id =
- cpu_to_le16(device_id);
- io_request->RaidContext.raid_context.status = 0;
- io_request->RaidContext.raid_context.ex_status = 0;
+ rctx = &io_request->RaidContext.raid_context;
+ rctx_g35 = &io_request->RaidContext.raid_context_g35;
+
+ rctx->virtual_disk_tgt_id = cpu_to_le16(device_id);
+ rctx->status = 0;
+ rctx->ex_status = 0;
req_desc = (union MEGASAS_REQUEST_DESCRIPTOR_UNION *)cmd->request_desc;
@@ -2631,11 +2716,10 @@ megasas_build_ldio_fusion(struct megasas_instance *instance,
raid = MR_LdRaidGet(ld, local_map_ptr);
if (!raid || (!fusion->fast_path_io)) {
- io_request->RaidContext.raid_context.reg_lock_flags = 0;
+ rctx->reg_lock_flags = 0;
fp_possible = false;
} else {
- if (MR_BuildRaidContext(instance, &io_info,
- &io_request->RaidContext.raid_context,
+ if (MR_BuildRaidContext(instance, &io_info, rctx,
local_map_ptr, &raidLUN))
fp_possible = (io_info.fpOkForIo > 0) ? true : false;
}
@@ -2643,9 +2727,7 @@ megasas_build_ldio_fusion(struct megasas_instance *instance,
cmd->request_desc->SCSIIO.MSIxIndex =
instance->reply_map[raw_smp_processor_id()];
- praid_context = &io_request->RaidContext;
-
- if (instance->adapter_type == VENTURA_SERIES) {
+ if (instance->adapter_type >= VENTURA_SERIES) {
/* FP for Optimal raid level 1.
* All large RAID-1 writes (> 32 KiB, both WT and WB modes)
* are built by the driver as LD I/Os.
@@ -2681,17 +2763,17 @@ megasas_build_ldio_fusion(struct megasas_instance *instance,
/* In ventura if stream detected for a read and it is
* read ahead capable make this IO as LDIO
*/
- if (is_stream_detected(&io_request->RaidContext.raid_context_g35))
+ if (is_stream_detected(rctx_g35))
fp_possible = false;
}
/* If raid is NULL, set CPU affinity to default CPU0 */
if (raid)
- megasas_set_raidflag_cpu_affinity(praid_context,
+ megasas_set_raidflag_cpu_affinity(&io_request->RaidContext,
raid, fp_possible, io_info.isRead,
scsi_buff_len);
else
- praid_context->raid_context_g35.routing_flags |=
+ rctx_g35->routing_flags |=
(MR_RAID_CTX_CPUSEL_0 << MR_RAID_CTX_ROUTINGFLAGS_CPUSEL_SHIFT);
}
@@ -2703,25 +2785,20 @@ megasas_build_ldio_fusion(struct megasas_instance *instance,
(MPI2_REQ_DESCRIPT_FLAGS_FP_IO
<< MEGASAS_REQ_DESCRIPT_FLAGS_TYPE_SHIFT);
if (instance->adapter_type == INVADER_SERIES) {
- if (io_request->RaidContext.raid_context.reg_lock_flags ==
- REGION_TYPE_UNUSED)
+ if (rctx->reg_lock_flags == REGION_TYPE_UNUSED)
cmd->request_desc->SCSIIO.RequestFlags =
(MEGASAS_REQ_DESCRIPT_FLAGS_NO_LOCK <<
MEGASAS_REQ_DESCRIPT_FLAGS_TYPE_SHIFT);
- io_request->RaidContext.raid_context.type
- = MPI2_TYPE_CUDA;
- io_request->RaidContext.raid_context.nseg = 0x1;
+ rctx->type = MPI2_TYPE_CUDA;
+ rctx->nseg = 0x1;
io_request->IoFlags |= cpu_to_le16(MPI25_SAS_DEVICE0_FLAGS_ENABLED_FAST_PATH);
- io_request->RaidContext.raid_context.reg_lock_flags |=
+ rctx->reg_lock_flags |=
(MR_RL_FLAGS_GRANT_DESTINATION_CUDA |
MR_RL_FLAGS_SEQ_NUM_ENABLE);
- } else if (instance->adapter_type == VENTURA_SERIES) {
- io_request->RaidContext.raid_context_g35.nseg_type |=
- (1 << RAID_CONTEXT_NSEG_SHIFT);
- io_request->RaidContext.raid_context_g35.nseg_type |=
- (MPI2_TYPE_CUDA << RAID_CONTEXT_TYPE_SHIFT);
- io_request->RaidContext.raid_context_g35.routing_flags |=
- (1 << MR_RAID_CTX_ROUTINGFLAGS_SQN_SHIFT);
+ } else if (instance->adapter_type >= VENTURA_SERIES) {
+ rctx_g35->nseg_type |= (1 << RAID_CONTEXT_NSEG_SHIFT);
+ rctx_g35->nseg_type |= (MPI2_TYPE_CUDA << RAID_CONTEXT_TYPE_SHIFT);
+ rctx_g35->routing_flags |= (1 << MR_RAID_CTX_ROUTINGFLAGS_SQN_SHIFT);
io_request->IoFlags |=
cpu_to_le16(MPI25_SAS_DEVICE0_FLAGS_ENABLED_FAST_PATH);
}
@@ -2734,17 +2811,15 @@ megasas_build_ldio_fusion(struct megasas_instance *instance,
&io_info, local_map_ptr);
scp->SCp.Status |= MEGASAS_LOAD_BALANCE_FLAG;
cmd->pd_r1_lb = io_info.pd_after_lb;
- if (instance->adapter_type == VENTURA_SERIES)
- io_request->RaidContext.raid_context_g35.span_arm
- = io_info.span_arm;
+ if (instance->adapter_type >= VENTURA_SERIES)
+ rctx_g35->span_arm = io_info.span_arm;
else
- io_request->RaidContext.raid_context.span_arm
- = io_info.span_arm;
+ rctx->span_arm = io_info.span_arm;
} else
scp->SCp.Status &= ~MEGASAS_LOAD_BALANCE_FLAG;
- if (instance->adapter_type == VENTURA_SERIES)
+ if (instance->adapter_type >= VENTURA_SERIES)
cmd->r1_alt_dev_handle = io_info.r1_alt_dev_handle;
else
cmd->r1_alt_dev_handle = MR_DEVHANDLE_INVALID;
@@ -2762,31 +2837,26 @@ megasas_build_ldio_fusion(struct megasas_instance *instance,
/* populate the LUN field */
memcpy(io_request->LUN, raidLUN, 8);
} else {
- io_request->RaidContext.raid_context.timeout_value =
+ rctx->timeout_value =
cpu_to_le16(local_map_ptr->raidMap.fpPdIoTimeoutSec);
cmd->request_desc->SCSIIO.RequestFlags =
(MEGASAS_REQ_DESCRIPT_FLAGS_LD_IO
<< MEGASAS_REQ_DESCRIPT_FLAGS_TYPE_SHIFT);
if (instance->adapter_type == INVADER_SERIES) {
if (io_info.do_fp_rlbypass ||
- (io_request->RaidContext.raid_context.reg_lock_flags
- == REGION_TYPE_UNUSED))
+ (rctx->reg_lock_flags == REGION_TYPE_UNUSED))
cmd->request_desc->SCSIIO.RequestFlags =
(MEGASAS_REQ_DESCRIPT_FLAGS_NO_LOCK <<
MEGASAS_REQ_DESCRIPT_FLAGS_TYPE_SHIFT);
- io_request->RaidContext.raid_context.type
- = MPI2_TYPE_CUDA;
- io_request->RaidContext.raid_context.reg_lock_flags |=
+ rctx->type = MPI2_TYPE_CUDA;
+ rctx->reg_lock_flags |=
(MR_RL_FLAGS_GRANT_DESTINATION_CPU0 |
- MR_RL_FLAGS_SEQ_NUM_ENABLE);
- io_request->RaidContext.raid_context.nseg = 0x1;
- } else if (instance->adapter_type == VENTURA_SERIES) {
- io_request->RaidContext.raid_context_g35.routing_flags |=
- (1 << MR_RAID_CTX_ROUTINGFLAGS_SQN_SHIFT);
- io_request->RaidContext.raid_context_g35.nseg_type |=
- (1 << RAID_CONTEXT_NSEG_SHIFT);
- io_request->RaidContext.raid_context_g35.nseg_type |=
- (MPI2_TYPE_CUDA << RAID_CONTEXT_TYPE_SHIFT);
+ MR_RL_FLAGS_SEQ_NUM_ENABLE);
+ rctx->nseg = 0x1;
+ } else if (instance->adapter_type >= VENTURA_SERIES) {
+ rctx_g35->routing_flags |= (1 << MR_RAID_CTX_ROUTINGFLAGS_SQN_SHIFT);
+ rctx_g35->nseg_type |= (1 << RAID_CONTEXT_NSEG_SHIFT);
+ rctx_g35->nseg_type |= (MPI2_TYPE_CUDA << RAID_CONTEXT_TYPE_SHIFT);
}
io_request->Function = MEGASAS_MPI2_FUNCTION_LD_IO_REQUEST;
io_request->DevHandle = cpu_to_le16(device_id);
@@ -2832,7 +2902,7 @@ static void megasas_build_ld_nonrw_fusion(struct megasas_instance *instance,
device_id < instance->fw_supported_vd_count)) {
ld = MR_TargetIdToLdGet(device_id, local_map_ptr);
- if (ld >= instance->fw_supported_vd_count)
+ if (ld >= instance->fw_supported_vd_count - 1)
fp_possible = 0;
else {
raid = MR_LdRaidGet(ld, local_map_ptr);
@@ -2855,7 +2925,7 @@ static void megasas_build_ld_nonrw_fusion(struct megasas_instance *instance,
/* set RAID context values */
pRAID_Context->config_seq_num = raid->seqNum;
- if (instance->adapter_type != VENTURA_SERIES)
+ if (instance->adapter_type < VENTURA_SERIES)
pRAID_Context->reg_lock_flags = REGION_TYPE_SHARED_READ;
pRAID_Context->timeout_value =
cpu_to_le16(raid->fpIoTimeoutForLd);
@@ -2940,7 +3010,7 @@ megasas_build_syspd_fusion(struct megasas_instance *instance,
cpu_to_le16(device_id + (MAX_PHYSICAL_DEVICES - 1));
pRAID_Context->config_seq_num = pd_sync->seq[pd_index].seqNum;
io_request->DevHandle = pd_sync->seq[pd_index].devHandle;
- if (instance->adapter_type == VENTURA_SERIES) {
+ if (instance->adapter_type >= VENTURA_SERIES) {
io_request->RaidContext.raid_context_g35.routing_flags |=
(1 << MR_RAID_CTX_ROUTINGFLAGS_SQN_SHIFT);
io_request->RaidContext.raid_context_g35.nseg_type |=
@@ -3073,7 +3143,7 @@ megasas_build_io_fusion(struct megasas_instance *instance,
return 1;
}
- if (instance->adapter_type == VENTURA_SERIES) {
+ if (instance->adapter_type >= VENTURA_SERIES) {
set_num_sge(&io_request->RaidContext.raid_context_g35, sge_count);
cpu_to_le16s(&io_request->RaidContext.raid_context_g35.routing_flags);
cpu_to_le16s(&io_request->RaidContext.raid_context_g35.nseg_type);
@@ -3385,7 +3455,7 @@ complete_cmd_fusion(struct megasas_instance *instance, u32 MSIxIndex)
atomic_dec(&lbinfo->scsi_pending_cmds[cmd_fusion->pd_r1_lb]);
cmd_fusion->scmd->SCp.Status &= ~MEGASAS_LOAD_BALANCE_FLAG;
}
- //Fall thru and complete IO
+ /* Fall through - and complete IO */
case MEGASAS_MPI2_FUNCTION_LD_IO_REQUEST: /* LD-IO Path */
atomic_dec(&instance->fw_outstanding);
if (cmd_fusion->r1_alt_dev_handle == MR_DEVHANDLE_INVALID) {
@@ -3501,18 +3571,13 @@ megasas_complete_cmd_dpc_fusion(unsigned long instance_addr)
{
struct megasas_instance *instance =
(struct megasas_instance *)instance_addr;
- unsigned long flags;
u32 count, MSIxIndex;
count = instance->msix_vectors > 0 ? instance->msix_vectors : 1;
/* If we have already declared adapter dead, donot complete cmds */
- spin_lock_irqsave(&instance->hba_lock, flags);
- if (atomic_read(&instance->adprecovery) == MEGASAS_HW_CRITICAL_ERROR) {
- spin_unlock_irqrestore(&instance->hba_lock, flags);
+ if (atomic_read(&instance->adprecovery) == MEGASAS_HW_CRITICAL_ERROR)
return;
- }
- spin_unlock_irqrestore(&instance->hba_lock, flags);
for (MSIxIndex = 0 ; MSIxIndex < count; MSIxIndex++)
complete_cmd_fusion(instance, MSIxIndex);
@@ -3525,48 +3590,24 @@ irqreturn_t megasas_isr_fusion(int irq, void *devp)
{
struct megasas_irq_context *irq_context = devp;
struct megasas_instance *instance = irq_context->instance;
- u32 mfiStatus, fw_state, dma_state;
+ u32 mfiStatus;
if (instance->mask_interrupts)
return IRQ_NONE;
if (!instance->msix_vectors) {
- mfiStatus = instance->instancet->clear_intr(instance->reg_set);
+ mfiStatus = instance->instancet->clear_intr(instance);
if (!mfiStatus)
return IRQ_NONE;
}
/* If we are resetting, bail */
if (test_bit(MEGASAS_FUSION_IN_RESET, &instance->reset_flags)) {
- instance->instancet->clear_intr(instance->reg_set);
+ instance->instancet->clear_intr(instance);
return IRQ_HANDLED;
}
- if (!complete_cmd_fusion(instance, irq_context->MSIxIndex)) {
- instance->instancet->clear_intr(instance->reg_set);
- /* If we didn't complete any commands, check for FW fault */
- fw_state = instance->instancet->read_fw_status_reg(
- instance->reg_set) & MFI_STATE_MASK;
- dma_state = instance->instancet->read_fw_status_reg
- (instance->reg_set) & MFI_STATE_DMADONE;
- if (instance->crash_dump_drv_support &&
- instance->crash_dump_app_support) {
- /* Start collecting crash, if DMA bit is done */
- if ((fw_state == MFI_STATE_FAULT) && dma_state)
- schedule_work(&instance->crash_init);
- else if (fw_state == MFI_STATE_FAULT) {
- if (instance->unload == 0)
- schedule_work(&instance->work_init);
- }
- } else if (fw_state == MFI_STATE_FAULT) {
- dev_warn(&instance->pdev->dev, "Iop2SysDoorbellInt"
- "for scsi%d\n", instance->host->host_no);
- if (instance->unload == 0)
- schedule_work(&instance->work_init);
- }
- }
-
- return IRQ_HANDLED;
+ return complete_cmd_fusion(instance, irq_context->MSIxIndex);
}
/**
@@ -3692,9 +3733,9 @@ megasas_release_fusion(struct megasas_instance *instance)
* @regs: MFI register set
*/
static u32
-megasas_read_fw_status_reg_fusion(struct megasas_register_set __iomem *regs)
+megasas_read_fw_status_reg_fusion(struct megasas_instance *instance)
{
- return readl(&(regs)->outbound_scratch_pad);
+ return megasas_readl(instance, &instance->reg_set->outbound_scratch_pad_0);
}
/**
@@ -3756,11 +3797,12 @@ megasas_adp_reset_fusion(struct megasas_instance *instance,
writel(MPI2_WRSEQ_6TH_KEY_VALUE, &instance->reg_set->fusion_seq_offset);
/* Check that the diag write enable (DRWE) bit is on */
- host_diag = readl(&instance->reg_set->fusion_host_diag);
+ host_diag = megasas_readl(instance, &instance->reg_set->fusion_host_diag);
retry = 0;
while (!(host_diag & HOST_DIAG_WRITE_ENABLE)) {
msleep(100);
- host_diag = readl(&instance->reg_set->fusion_host_diag);
+ host_diag = megasas_readl(instance,
+ &instance->reg_set->fusion_host_diag);
if (retry++ == 100) {
dev_warn(&instance->pdev->dev,
"Host diag unlock failed from %s %d\n",
@@ -3777,11 +3819,12 @@ megasas_adp_reset_fusion(struct megasas_instance *instance,
msleep(3000);
/* Make sure reset adapter bit is cleared */
- host_diag = readl(&instance->reg_set->fusion_host_diag);
+ host_diag = megasas_readl(instance, &instance->reg_set->fusion_host_diag);
retry = 0;
while (host_diag & HOST_DIAG_RESET_ADAPTER) {
msleep(100);
- host_diag = readl(&instance->reg_set->fusion_host_diag);
+ host_diag = megasas_readl(instance,
+ &instance->reg_set->fusion_host_diag);
if (retry++ == 1000) {
dev_warn(&instance->pdev->dev,
"Diag reset adapter never cleared %s %d\n",
@@ -3792,14 +3835,14 @@ megasas_adp_reset_fusion(struct megasas_instance *instance,
if (host_diag & HOST_DIAG_RESET_ADAPTER)
return -1;
- abs_state = instance->instancet->read_fw_status_reg(instance->reg_set)
+ abs_state = instance->instancet->read_fw_status_reg(instance)
& MFI_STATE_MASK;
retry = 0;
while ((abs_state <= MFI_STATE_FW_INIT) && (retry++ < 1000)) {
msleep(100);
abs_state = instance->instancet->
- read_fw_status_reg(instance->reg_set) & MFI_STATE_MASK;
+ read_fw_status_reg(instance) & MFI_STATE_MASK;
}
if (abs_state <= MFI_STATE_FW_INIT) {
dev_warn(&instance->pdev->dev,
@@ -3822,17 +3865,60 @@ megasas_check_reset_fusion(struct megasas_instance *instance,
return 0;
}
+/**
+ * megasas_trigger_snap_dump - Trigger snap dump in FW
+ * @instance: Soft instance of adapter
+ */
+static inline void megasas_trigger_snap_dump(struct megasas_instance *instance)
+{
+ int j;
+ u32 fw_state;
+
+ if (!instance->disableOnlineCtrlReset) {
+ dev_info(&instance->pdev->dev, "Trigger snap dump\n");
+ writel(MFI_ADP_TRIGGER_SNAP_DUMP,
+ &instance->reg_set->doorbell);
+ readl(&instance->reg_set->doorbell);
+ }
+
+ for (j = 0; j < instance->snapdump_wait_time; j++) {
+ fw_state = instance->instancet->read_fw_status_reg(instance) &
+ MFI_STATE_MASK;
+ if (fw_state == MFI_STATE_FAULT) {
+ dev_err(&instance->pdev->dev,
+ "Found FW in FAULT state, after snap dump trigger\n");
+ return;
+ }
+ msleep(1000);
+ }
+}
+
/* This function waits for outstanding commands on fusion to complete */
int megasas_wait_for_outstanding_fusion(struct megasas_instance *instance,
int reason, int *convert)
{
int i, outstanding, retval = 0, hb_seconds_missed = 0;
u32 fw_state;
+ u32 waittime_for_io_completion;
+
+ waittime_for_io_completion =
+ min_t(u32, resetwaittime,
+ (resetwaittime - instance->snapdump_wait_time));
- for (i = 0; i < resetwaittime; i++) {
+ if (reason == MFI_IO_TIMEOUT_OCR) {
+ dev_info(&instance->pdev->dev,
+ "MFI command is timed out\n");
+ megasas_complete_cmd_dpc_fusion((unsigned long)instance);
+ if (instance->snapdump_wait_time)
+ megasas_trigger_snap_dump(instance);
+ retval = 1;
+ goto out;
+ }
+
+ for (i = 0; i < waittime_for_io_completion; i++) {
/* Check if firmware is in fault state */
- fw_state = instance->instancet->read_fw_status_reg(
- instance->reg_set) & MFI_STATE_MASK;
+ fw_state = instance->instancet->read_fw_status_reg(instance) &
+ MFI_STATE_MASK;
if (fw_state == MFI_STATE_FAULT) {
dev_warn(&instance->pdev->dev, "Found FW in FAULT state,"
" will reset adapter scsi%d.\n",
@@ -3850,13 +3936,6 @@ int megasas_wait_for_outstanding_fusion(struct megasas_instance *instance,
goto out;
}
- if (reason == MFI_IO_TIMEOUT_OCR) {
- dev_info(&instance->pdev->dev,
- "MFI IO is timed out, initiating OCR\n");
- megasas_complete_cmd_dpc_fusion((unsigned long)instance);
- retval = 1;
- goto out;
- }
/* If SR-IOV VF mode & heartbeat timeout, don't wait */
if (instance->requestorId && !reason) {
@@ -3901,6 +3980,12 @@ int megasas_wait_for_outstanding_fusion(struct megasas_instance *instance,
msleep(1000);
}
+ if (instance->snapdump_wait_time) {
+ megasas_trigger_snap_dump(instance);
+ retval = 1;
+ goto out;
+ }
+
if (atomic_read(&instance->fw_outstanding)) {
dev_err(&instance->pdev->dev, "pending commands remain after waiting, "
"will reset adapter scsi%d.\n",
@@ -3908,6 +3993,7 @@ int megasas_wait_for_outstanding_fusion(struct megasas_instance *instance,
*convert = 1;
retval = 1;
}
+
out:
return retval;
}
@@ -4518,7 +4604,7 @@ int megasas_reset_fusion(struct Scsi_Host *shost, int reason)
mutex_unlock(&instance->reset_mutex);
return FAILED;
}
- status_reg = instance->instancet->read_fw_status_reg(instance->reg_set);
+ status_reg = instance->instancet->read_fw_status_reg(instance);
abs_state = status_reg & MFI_STATE_MASK;
/* IO timeout detected, forcibly put FW in FAULT state */
@@ -4527,7 +4613,7 @@ int megasas_reset_fusion(struct Scsi_Host *shost, int reason)
dev_info(&instance->pdev->dev, "IO/DCMD timeout is detected, "
"forcibly FAULT Firmware\n");
atomic_set(&instance->adprecovery, MEGASAS_ADPRESET_SM_INFAULT);
- status_reg = readl(&instance->reg_set->doorbell);
+ status_reg = megasas_readl(instance, &instance->reg_set->doorbell);
writel(status_reg | MFI_STATE_FORCE_OCR,
&instance->reg_set->doorbell);
readl(&instance->reg_set->doorbell);
@@ -4578,7 +4664,7 @@ int megasas_reset_fusion(struct Scsi_Host *shost, int reason)
for (i = 0 ; i < instance->max_scsi_cmds; i++) {
cmd_fusion = fusion->cmd_list[i];
/*check for extra commands issued by driver*/
- if (instance->adapter_type == VENTURA_SERIES) {
+ if (instance->adapter_type >= VENTURA_SERIES) {
r1_cmd = fusion->cmd_list[i + instance->max_fw_cmds];
megasas_return_cmd_fusion(instance, r1_cmd);
}
@@ -4605,8 +4691,7 @@ int megasas_reset_fusion(struct Scsi_Host *shost, int reason)
atomic_set(&instance->fw_outstanding, 0);
- status_reg = instance->instancet->read_fw_status_reg(
- instance->reg_set);
+ status_reg = instance->instancet->read_fw_status_reg(instance);
abs_state = status_reg & MFI_STATE_MASK;
reset_adapter = status_reg & MFI_RESET_ADAPTER;
if (instance->disableOnlineCtrlReset ||
@@ -4677,7 +4762,7 @@ transition_to_ready:
megasas_setup_jbod_map(instance);
/* reset stream detection array */
- if (instance->adapter_type == VENTURA_SERIES) {
+ if (instance->adapter_type >= VENTURA_SERIES) {
for (j = 0; j < MAX_LOGICAL_DRIVES_EXT; ++j) {
memset(fusion->stream_detect_by_ld[j],
0, sizeof(struct LD_STREAM_DETECT));
@@ -4721,6 +4806,13 @@ transition_to_ready:
megasas_set_crash_dump_params(instance,
MR_CRASH_BUF_TURN_OFF);
+ if (instance->snapdump_wait_time) {
+ megasas_get_snapdump_properties(instance);
+ dev_info(&instance->pdev->dev,
+ "Snap dump wait time\t: %d\n",
+ instance->snapdump_wait_time);
+ }
+
retval = SUCCESS;
/* Adapter reset completed successfully */
@@ -4752,16 +4844,15 @@ out:
return retval;
}
-/* Fusion Crash dump collection work queue */
-void megasas_fusion_crash_dump_wq(struct work_struct *work)
+/* Fusion Crash dump collection */
+void megasas_fusion_crash_dump(struct megasas_instance *instance)
{
- struct megasas_instance *instance =
- container_of(work, struct megasas_instance, crash_init);
u32 status_reg;
u8 partial_copy = 0;
+ int wait = 0;
- status_reg = instance->instancet->read_fw_status_reg(instance->reg_set);
+ status_reg = instance->instancet->read_fw_status_reg(instance);
/*
* Allocate host crash buffers to copy data from 1 MB DMA crash buffer
@@ -4777,8 +4868,8 @@ void megasas_fusion_crash_dump_wq(struct work_struct *work)
"crash dump and initiating OCR\n");
status_reg |= MFI_STATE_CRASH_DUMP_DONE;
writel(status_reg,
- &instance->reg_set->outbound_scratch_pad);
- readl(&instance->reg_set->outbound_scratch_pad);
+ &instance->reg_set->outbound_scratch_pad_0);
+ readl(&instance->reg_set->outbound_scratch_pad_0);
return;
}
megasas_alloc_host_crash_buffer(instance);
@@ -4786,21 +4877,41 @@ void megasas_fusion_crash_dump_wq(struct work_struct *work)
"allocated: %d\n", instance->drv_buf_alloc);
}
- /*
- * Driver has allocated max buffers, which can be allocated
- * and FW has more crash dump data, then driver will
- * ignore the data.
- */
- if (instance->drv_buf_index >= (instance->drv_buf_alloc)) {
- dev_info(&instance->pdev->dev, "Driver is done copying "
- "the buffer: %d\n", instance->drv_buf_alloc);
- status_reg |= MFI_STATE_CRASH_DUMP_DONE;
- partial_copy = 1;
- } else {
- memcpy(instance->crash_buf[instance->drv_buf_index],
- instance->crash_dump_buf, CRASH_DMA_BUF_SIZE);
- instance->drv_buf_index++;
- status_reg &= ~MFI_STATE_DMADONE;
+ while (!(status_reg & MFI_STATE_CRASH_DUMP_DONE) &&
+ (wait < MEGASAS_WATCHDOG_WAIT_COUNT)) {
+ if (!(status_reg & MFI_STATE_DMADONE)) {
+ /*
+ * Next crash dump buffer is not yet DMA'd by FW
+ * Check after 10ms. Wait for 1 second for FW to
+ * post the next buffer. If not bail out.
+ */
+ wait++;
+ msleep(MEGASAS_WAIT_FOR_NEXT_DMA_MSECS);
+ status_reg = instance->instancet->read_fw_status_reg(
+ instance);
+ continue;
+ }
+
+ wait = 0;
+ if (instance->drv_buf_index >= instance->drv_buf_alloc) {
+ dev_info(&instance->pdev->dev,
+ "Driver is done copying the buffer: %d\n",
+ instance->drv_buf_alloc);
+ status_reg |= MFI_STATE_CRASH_DUMP_DONE;
+ partial_copy = 1;
+ break;
+ } else {
+ memcpy(instance->crash_buf[instance->drv_buf_index],
+ instance->crash_dump_buf, CRASH_DMA_BUF_SIZE);
+ instance->drv_buf_index++;
+ status_reg &= ~MFI_STATE_DMADONE;
+ }
+
+ writel(status_reg, &instance->reg_set->outbound_scratch_pad_0);
+ readl(&instance->reg_set->outbound_scratch_pad_0);
+
+ msleep(MEGASAS_WAIT_FOR_NEXT_DMA_MSECS);
+ status_reg = instance->instancet->read_fw_status_reg(instance);
}
if (status_reg & MFI_STATE_CRASH_DUMP_DONE) {
@@ -4809,13 +4920,10 @@ void megasas_fusion_crash_dump_wq(struct work_struct *work)
instance->fw_crash_buffer_size = instance->drv_buf_index;
instance->fw_crash_state = AVAILABLE;
instance->drv_buf_index = 0;
- writel(status_reg, &instance->reg_set->outbound_scratch_pad);
- readl(&instance->reg_set->outbound_scratch_pad);
+ writel(status_reg, &instance->reg_set->outbound_scratch_pad_0);
+ readl(&instance->reg_set->outbound_scratch_pad_0);
if (!partial_copy)
megasas_reset_fusion(instance->host, 0);
- } else {
- writel(status_reg, &instance->reg_set->outbound_scratch_pad);
- readl(&instance->reg_set->outbound_scratch_pad);
}
}
diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.h b/drivers/scsi/megaraid/megaraid_sas_fusion.h
index 8e5ebee6517f..ca73c50fe723 100644
--- a/drivers/scsi/megaraid/megaraid_sas_fusion.h
+++ b/drivers/scsi/megaraid/megaraid_sas_fusion.h
@@ -2,7 +2,8 @@
* Linux MegaRAID driver for SAS based RAID controllers
*
* Copyright (c) 2009-2013 LSI Corporation
- * Copyright (c) 2013-2014 Avago Technologies
+ * Copyright (c) 2013-2016 Avago Technologies
+ * Copyright (c) 2016-2018 Broadcom Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -19,16 +20,13 @@
*
* FILE: megaraid_sas_fusion.h
*
- * Authors: Avago Technologies
+ * Authors: Broadcom Inc.
* Manoj Jose
* Sumant Patro
- * Kashyap Desai <kashyap.desai@avagotech.com>
- * Sumit Saxena <sumit.saxena@avagotech.com>
+ * Kashyap Desai <kashyap.desai@broadcom.com>
+ * Sumit Saxena <sumit.saxena@broadcom.com>
*
- * Send feedback to: megaraidlinux.pdl@avagotech.com
- *
- * Mail to: Avago Technologies, 350 West Trimble Road, Building 90,
- * San Jose, California 95131
+ * Send feedback to: megaraidlinux.pdl@broadcom.com
*/
#ifndef _MEGARAID_SAS_FUSION_H_
@@ -725,6 +723,7 @@ struct MPI2_IOC_INIT_REQUEST {
#define MR_DCMD_CTRL_SHARED_HOST_MEM_ALLOC 0x010e8485 /* SR-IOV HB alloc*/
#define MR_DCMD_LD_VF_MAP_GET_ALL_LDS_111 0x03200200
#define MR_DCMD_LD_VF_MAP_GET_ALL_LDS 0x03150200
+#define MR_DCMD_CTRL_SNAPDUMP_GET_PROPERTIES 0x01200100
struct MR_DEV_HANDLE_INFO {
__le16 curDevHdl;
@@ -1063,6 +1062,9 @@ struct MR_FW_RAID_MAP_DYNAMIC {
#define MPI26_IEEE_SGE_FLAGS_NSF_NVME_PRP (0x08)
#define MPI26_IEEE_SGE_FLAGS_NSF_NVME_SGL (0x10)
+#define MEGASAS_DEFAULT_SNAP_DUMP_WAIT_TIME 15
+#define MEGASAS_MAX_SNAP_DUMP_WAIT_TIME 60
+
struct megasas_register_set;
struct megasas_instance;
@@ -1350,6 +1352,14 @@ enum CMD_RET_VALUES {
RETURN_CMD = 3,
};
+struct MR_SNAPDUMP_PROPERTIES {
+ u8 offload_num;
+ u8 max_num_supported;
+ u8 cur_num_supported;
+ u8 trigger_min_num_sec_before_ocr;
+ u8 reserved[12];
+};
+
void megasas_free_cmds_fusion(struct megasas_instance *instance);
int megasas_ioc_init_fusion(struct megasas_instance *instance);
u8 megasas_get_map_info(struct megasas_instance *instance);
diff --git a/drivers/scsi/mesh.c b/drivers/scsi/mesh.c
index ec6940f2fcb3..f3e182eb0970 100644
--- a/drivers/scsi/mesh.c
+++ b/drivers/scsi/mesh.c
@@ -1838,7 +1838,7 @@ static struct scsi_host_template mesh_template = {
.this_id = 7,
.sg_tablesize = SG_ALL,
.cmd_per_lun = 2,
- .use_clustering = DISABLE_CLUSTERING,
+ .max_segment_size = 65535,
};
static int mesh_probe(struct macio_dev *mdev, const struct of_device_id *match)
diff --git a/drivers/scsi/mpt3sas/mpi/mpi2.h b/drivers/scsi/mpt3sas/mpi/mpi2.h
index 1e45268a78fc..7efd17a3c25b 100644
--- a/drivers/scsi/mpt3sas/mpi/mpi2.h
+++ b/drivers/scsi/mpt3sas/mpi/mpi2.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
- * Copyright 2000-2015 Avago Technologies. All rights reserved.
+ * Copyright 2000-2020 Broadcom Inc. All rights reserved.
*
*
* Name: mpi2.h
@@ -9,7 +9,7 @@
* scatter/gather formats.
* Creation Date: June 21, 2006
*
- * mpi2.h Version: 02.00.50
+ * mpi2.h Version: 02.00.53
*
* NOTE: Names (typedefs, defines, etc.) beginning with an MPI25 or Mpi25
* prefix are for use only on MPI v2.5 products, and must not be used
@@ -116,7 +116,12 @@
* 02-03-17 02.00.48 Bumped MPI2_HEADER_VERSION_UNIT.
* 06-13-17 02.00.49 Bumped MPI2_HEADER_VERSION_UNIT.
* 09-29-17 02.00.50 Bumped MPI2_HEADER_VERSION_UNIT.
- * --------------------------------------------------------------------------
+ * 07-22-18 02.00.51 Added SECURE_BOOT define.
+ * Bumped MPI2_HEADER_VERSION_UNIT
+ * 08-15-18 02.00.52 Bumped MPI2_HEADER_VERSION_UNIT.
+ * 08-28-18 02.00.53 Bumped MPI2_HEADER_VERSION_UNIT.
+ * Added MPI2_IOCSTATUS_FAILURE
+ * --------------------------------------------------------------------------
*/
#ifndef MPI2_H
@@ -156,7 +161,7 @@
/* Unit and Dev versioning for this MPI header set */
-#define MPI2_HEADER_VERSION_UNIT (0x32)
+#define MPI2_HEADER_VERSION_UNIT (0x35)
#define MPI2_HEADER_VERSION_DEV (0x00)
#define MPI2_HEADER_VERSION_UNIT_MASK (0xFF00)
#define MPI2_HEADER_VERSION_UNIT_SHIFT (8)
@@ -257,6 +262,8 @@ typedef volatile struct _MPI2_SYSTEM_INTERFACE_REGS {
*/
#define MPI2_HOST_DIAGNOSTIC_OFFSET (0x00000008)
+#define MPI26_DIAG_SECURE_BOOT (0x80000000)
+
#define MPI2_DIAG_SBR_RELOAD (0x00002000)
#define MPI2_DIAG_BOOT_DEVICE_SELECT_MASK (0x00001800)
@@ -687,7 +694,9 @@ typedef union _MPI2_REPLY_DESCRIPTORS_UNION {
#define MPI2_IOCSTATUS_INVALID_FIELD (0x0007)
#define MPI2_IOCSTATUS_INVALID_STATE (0x0008)
#define MPI2_IOCSTATUS_OP_STATE_NOT_SUPPORTED (0x0009)
+/*MPI v2.6 and later */
#define MPI2_IOCSTATUS_INSUFFICIENT_POWER (0x000A)
+#define MPI2_IOCSTATUS_FAILURE (0x000F)
/****************************************************************************
* Config IOCStatus values
diff --git a/drivers/scsi/mpt3sas/mpi/mpi2_cnfg.h b/drivers/scsi/mpt3sas/mpi/mpi2_cnfg.h
index 5122920a961a..398fa6fde960 100644
--- a/drivers/scsi/mpt3sas/mpi/mpi2_cnfg.h
+++ b/drivers/scsi/mpt3sas/mpi/mpi2_cnfg.h
@@ -1,13 +1,13 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
- * Copyright 2000-2015 Avago Technologies. All rights reserved.
+ * Copyright 2000-2020 Broadcom Inc. All rights reserved.
*
*
* Name: mpi2_cnfg.h
* Title: MPI Configuration messages and pages
* Creation Date: November 10, 2006
*
- * mpi2_cnfg.h Version: 02.00.42
+ * mpi2_cnfg.h Version: 02.00.46
*
* NOTE: Names (typedefs, defines, etc.) beginning with an MPI25 or Mpi25
* prefix are for use only on MPI v2.5 products, and must not be used
@@ -231,6 +231,18 @@
* Added NOIOB field to PCIe Device Page 2.
* Added MPI26_PCIEDEV2_CAP_DATA_BLK_ALIGN_AND_GRAN to
* the Capabilities field of PCIe Device Page 2.
+ * 07-22-18 02.00.43 Added defines for SAS3916 and SAS3816.
+ * Added WRiteCache defines to IO Unit Page 1.
+ * Added MaxEnclosureLevel to BIOS Page 1.
+ * Added OEMRD to SAS Enclosure Page 1.
+ * Added DMDReportPCIe to PCIe IO Unit Page 1.
+ * Added Flags field and flags for Retimers to
+ * PCIe Switch Page 1.
+ * 08-02-18 02.00.44 Added Slotx2, Slotx4 to ManPage 7.
+ * 08-15-18 02.00.45 Added ProductSpecific field at end of IOC Page 1
+ * 08-28-18 02.00.46 Added NVMs Write Cache flag to IOUnitPage1
+ * Added DMDReport Delay Time defines to
+ * PCIeIOUnitPage1
* --------------------------------------------------------------------------
*/
@@ -568,8 +580,17 @@ typedef struct _MPI2_CONFIG_REPLY {
#define MPI26_MFGPAGE_DEVID_SAS3616 (0x00D1)
#define MPI26_MFGPAGE_DEVID_SAS3708 (0x00D2)
-#define MPI26_MFGPAGE_DEVID_SAS3816 (0x00A1)
-#define MPI26_MFGPAGE_DEVID_SAS3916 (0x00A0)
+#define MPI26_MFGPAGE_DEVID_SEC_MASK_3916 (0x0003)
+#define MPI26_MFGPAGE_DEVID_INVALID0_3916 (0x00E0)
+#define MPI26_MFGPAGE_DEVID_CFG_SEC_3916 (0x00E1)
+#define MPI26_MFGPAGE_DEVID_HARD_SEC_3916 (0x00E2)
+#define MPI26_MFGPAGE_DEVID_INVALID1_3916 (0x00E3)
+
+#define MPI26_MFGPAGE_DEVID_SEC_MASK_3816 (0x0003)
+#define MPI26_MFGPAGE_DEVID_INVALID0_3816 (0x00E4)
+#define MPI26_MFGPAGE_DEVID_CFG_SEC_3816 (0x00E5)
+#define MPI26_MFGPAGE_DEVID_HARD_SEC_3816 (0x00E6)
+#define MPI26_MFGPAGE_DEVID_INVALID1_3816 (0x00E7)
/*Manufacturing Page 0 */
@@ -932,7 +953,11 @@ typedef struct _MPI2_CONFIG_PAGE_IO_UNIT_1 {
#define MPI2_IOUNITPAGE1_PAGEVERSION (0x04)
-/*IO Unit Page 1 Flags defines */
+/* IO Unit Page 1 Flags defines */
+#define MPI26_IOUNITPAGE1_NVME_WRCACHE_MASK (0x00030000)
+#define MPI26_IOUNITPAGE1_NVME_WRCACHE_ENABLE (0x00000000)
+#define MPI26_IOUNITPAGE1_NVME_WRCACHE_DISABLE (0x00010000)
+#define MPI26_IOUNITPAGE1_NVME_WRCACHE_NO_CHANGE (0x00020000)
#define MPI2_IOUNITPAGE1_ATA_SECURITY_FREEZE_LOCK (0x00004000)
#define MPI25_IOUNITPAGE1_NEW_DEVICE_FAST_PATH_DISABLE (0x00002000)
#define MPI25_IOUNITPAGE1_DISABLE_FAST_PATH (0x00001000)
@@ -1511,7 +1536,7 @@ typedef struct _MPI2_CONFIG_PAGE_BIOS_1 {
U32 BiosOptions; /*0x04 */
U32 IOCSettings; /*0x08 */
U8 SSUTimeout; /*0x0C */
- U8 Reserved1; /*0x0D */
+ U8 MaxEnclosureLevel; /*0x0D */
U16 Reserved2; /*0x0E */
U32 DeviceSettings; /*0x10 */
U16 NumberOfDevices; /*0x14 */
@@ -1531,7 +1556,6 @@ typedef struct _MPI2_CONFIG_PAGE_BIOS_1 {
#define MPI2_BIOSPAGE1_OPTIONS_ADVANCED_CONFIG (0x00004000)
#define MPI2_BIOSPAGE1_OPTIONS_PNS_MASK (0x00003800)
-#define MPI2_BIOSPAGE1_OPTIONS_PNS_MASK (0x00003800)
#define MPI2_BIOSPAGE1_OPTIONS_PNS_PBDHL (0x00000000)
#define MPI2_BIOSPAGE1_OPTIONS_PNS_ENCSLOSURE (0x00000800)
#define MPI2_BIOSPAGE1_OPTIONS_PNS_LWWID (0x00001000)
@@ -3271,10 +3295,12 @@ typedef struct _MPI2_CONFIG_PAGE_SAS_ENCLOSURE_0 {
U16 NumSlots; /*0x18 */
U16 StartSlot; /*0x1A */
U8 ChassisSlot; /*0x1C */
- U8 EnclosureLeve; /*0x1D */
+ U8 EnclosureLevel; /*0x1D */
U16 SEPDevHandle; /*0x1E */
- U32 Reserved3; /*0x20 */
- U32 Reserved4; /*0x24 */
+ U8 OEMRD; /*0x20 */
+ U8 Reserved1a; /*0x21 */
+ U16 Reserved2; /*0x22 */
+ U32 Reserved3; /*0x24 */
} MPI2_CONFIG_PAGE_SAS_ENCLOSURE_0,
*PTR_MPI2_CONFIG_PAGE_SAS_ENCLOSURE_0,
Mpi2SasEnclosurePage0_t, *pMpi2SasEnclosurePage0_t,
@@ -3285,6 +3311,8 @@ typedef struct _MPI2_CONFIG_PAGE_SAS_ENCLOSURE_0 {
#define MPI2_SASENCLOSURE0_PAGEVERSION (0x04)
/*values for SAS Enclosure Page 0 Flags field */
+#define MPI26_SAS_ENCLS0_FLAGS_OEMRD_VALID (0x0080)
+#define MPI26_SAS_ENCLS0_FLAGS_OEMRD_COLLECTING (0x0040)
#define MPI2_SAS_ENCLS0_FLAGS_CHASSIS_SLOT_VALID (0x0020)
#define MPI2_SAS_ENCLS0_FLAGS_ENCL_LEVEL_VALID (0x0010)
#define MPI2_SAS_ENCLS0_FLAGS_MNG_MASK (0x000F)
@@ -3298,6 +3326,8 @@ typedef struct _MPI2_CONFIG_PAGE_SAS_ENCLOSURE_0 {
#define MPI26_ENCLOSURE0_PAGEVERSION (0x04)
/*Values for Enclosure Page 0 Flags field */
+#define MPI26_ENCLS0_FLAGS_OEMRD_VALID (0x0080)
+#define MPI26_ENCLS0_FLAGS_OEMRD_COLLECTING (0x0040)
#define MPI26_ENCLS0_FLAGS_CHASSIS_SLOT_VALID (0x0020)
#define MPI26_ENCLS0_FLAGS_ENCL_LEVEL_VALID (0x0010)
#define MPI26_ENCLS0_FLAGS_MNG_MASK (0x000F)
@@ -3696,8 +3726,9 @@ typedef struct _MPI26_PCIE_IO_UNIT1_PHY_DATA {
Mpi26PCIeIOUnit1PhyData_t, *pMpi26PCIeIOUnit1PhyData_t;
/*values for LinkFlags */
-#define MPI26_PCIEIOUNIT1_LINKFLAGS_DIS_SRIS (0x00)
-#define MPI26_PCIEIOUNIT1_LINKFLAGS_EN_SRIS (0x01)
+#define MPI26_PCIEIOUNIT1_LINKFLAGS_DIS_SEPARATE_REFCLK (0x00)
+#define MPI26_PCIEIOUNIT1_LINKFLAGS_SRIS_EN (0x01)
+#define MPI26_PCIEIOUNIT1_LINKFLAGS_SRNS_EN (0x02)
/*
*Host code (drivers, BIOS, utilities, etc.) should leave this define set to
@@ -3714,7 +3745,7 @@ typedef struct _MPI26_CONFIG_PAGE_PIOUNIT_1 {
U16 AdditionalControlFlags; /*0x0C */
U16 NVMeMaxQueueDepth; /*0x0E */
U8 NumPhys; /*0x10 */
- U8 Reserved1; /*0x11 */
+ U8 DMDReportPCIe; /*0x11 */
U16 Reserved2; /*0x12 */
MPI26_PCIE_IO_UNIT1_PHY_DATA
PhyData[MPI26_PCIE_IOUNIT1_PHY_MAX];/*0x14 */
@@ -3736,6 +3767,12 @@ typedef struct _MPI26_CONFIG_PAGE_PIOUNIT_1 {
#define MPI26_PCIEIOUNIT1_MAX_RATE_8_0 (0x40)
#define MPI26_PCIEIOUNIT1_MAX_RATE_16_0 (0x50)
+/*values for PCIe IO Unit Page 1 DMDReportPCIe */
+#define MPI26_PCIEIOUNIT1_DMDRPT_UNIT_MASK (0x80)
+#define MPI26_PCIEIOUNIT1_DMDRPT_UNIT_1_SEC (0x00)
+#define MPI26_PCIEIOUNIT1_DMDRPT_UNIT_16_SEC (0x80)
+#define MPI26_PCIEIOUNIT1_DMDRPT_DELAY_TIME_MASK (0x7F)
+
/*see mpi2_pci.h for values for PCIe IO Unit Page 0 ControllerPhyDeviceInfo
*values
*/
@@ -3788,6 +3825,9 @@ typedef struct _MPI26_CONFIG_PAGE_PSWITCH_1 {
/*use MPI26_PCIE_NEG_LINK_RATE_ defines for the NegotiatedLinkRate field */
+/* defines for the Flags field */
+#define MPI26_PCIESWITCH1_2_RETIMER_PRESENCE (0x0002)
+#define MPI26_PCIESWITCH1_RETIMER_PRESENCE (0x0001)
/****************************************************************************
* PCIe Device Config Pages (MPI v2.6 and later)
@@ -3849,19 +3889,21 @@ typedef struct _MPI26_CONFIG_PAGE_PCIEDEV_0 {
*field
*/
-/*values for PCIe Device Page 0 Flags field */
-#define MPI26_PCIEDEV0_FLAGS_UNAUTHORIZED_DEVICE (0x8000)
-#define MPI26_PCIEDEV0_FLAGS_ENABLED_FAST_PATH (0x4000)
-#define MPI26_PCIEDEV0_FLAGS_FAST_PATH_CAPABLE (0x2000)
-#define MPI26_PCIEDEV0_FLAGS_ASYNCHRONOUS_NOTIFICATION (0x0400)
-#define MPI26_PCIEDEV0_FLAGS_ATA_SW_PRESERVATION (0x0200)
-#define MPI26_PCIEDEV0_FLAGS_UNSUPPORTED_DEVICE (0x0100)
-#define MPI26_PCIEDEV0_FLAGS_ATA_48BIT_LBA_SUPPORTED (0x0080)
-#define MPI26_PCIEDEV0_FLAGS_ATA_SMART_SUPPORTED (0x0040)
-#define MPI26_PCIEDEV0_FLAGS_ATA_NCQ_SUPPORTED (0x0020)
-#define MPI26_PCIEDEV0_FLAGS_ATA_FUA_SUPPORTED (0x0010)
-#define MPI26_PCIEDEV0_FLAGS_ENCL_LEVEL_VALID (0x0002)
-#define MPI26_PCIEDEV0_FLAGS_DEVICE_PRESENT (0x0001)
+/*values for PCIe Device Page 0 Flags field*/
+#define MPI26_PCIEDEV0_FLAGS_2_RETIMER_PRESENCE (0x00020000)
+#define MPI26_PCIEDEV0_FLAGS_RETIMER_PRESENCE (0x00010000)
+#define MPI26_PCIEDEV0_FLAGS_UNAUTHORIZED_DEVICE (0x00008000)
+#define MPI26_PCIEDEV0_FLAGS_ENABLED_FAST_PATH (0x00004000)
+#define MPI26_PCIEDEV0_FLAGS_FAST_PATH_CAPABLE (0x00002000)
+#define MPI26_PCIEDEV0_FLAGS_ASYNCHRONOUS_NOTIFICATION (0x00000400)
+#define MPI26_PCIEDEV0_FLAGS_ATA_SW_PRESERVATION (0x00000200)
+#define MPI26_PCIEDEV0_FLAGS_UNSUPPORTED_DEVICE (0x00000100)
+#define MPI26_PCIEDEV0_FLAGS_ATA_48BIT_LBA_SUPPORTED (0x00000080)
+#define MPI26_PCIEDEV0_FLAGS_ATA_SMART_SUPPORTED (0x00000040)
+#define MPI26_PCIEDEV0_FLAGS_ATA_NCQ_SUPPORTED (0x00000020)
+#define MPI26_PCIEDEV0_FLAGS_ATA_FUA_SUPPORTED (0x00000010)
+#define MPI26_PCIEDEV0_FLAGS_ENCL_LEVEL_VALID (0x00000002)
+#define MPI26_PCIEDEV0_FLAGS_DEVICE_PRESENT (0x00000001)
/* values for PCIe Device Page 0 SupportedLinkRates field */
#define MPI26_PCIEDEV0_LINK_RATE_16_0_SUPPORTED (0x08)
diff --git a/drivers/scsi/mpt3sas/mpi/mpi2_image.h b/drivers/scsi/mpt3sas/mpi/mpi2_image.h
new file mode 100644
index 000000000000..4959585f029d
--- /dev/null
+++ b/drivers/scsi/mpt3sas/mpi/mpi2_image.h
@@ -0,0 +1,506 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2016-2020 Broadcom Limited. All rights reserved.
+ *
+ * Name: mpi2_image.h
+ * Description: Contains definitions for firmware and other component images
+ * Creation Date: 04/02/2018
+ * Version: 02.06.03
+ *
+ *
+ * Version History
+ * ---------------
+ *
+ * Date Version Description
+ * -------- -------- ------------------------------------------------------
+ * 08-01-18 02.06.00 Initial version for MPI 2.6.5.
+ * 08-14-18 02.06.01 Corrected define for MPI26_IMAGE_HEADER_SIGNATURE0_MPI26
+ * 08-28-18 02.06.02 Added MPI2_EXT_IMAGE_TYPE_RDE
+ * 09-07-18 02.06.03 Added MPI26_EVENT_PCIE_TOPO_PI_16_LANES
+ */
+#ifndef MPI2_IMAGE_H
+#define MPI2_IMAGE_H
+
+
+/*FW Image Header */
+typedef struct _MPI2_FW_IMAGE_HEADER {
+ U32 Signature; /*0x00 */
+ U32 Signature0; /*0x04 */
+ U32 Signature1; /*0x08 */
+ U32 Signature2; /*0x0C */
+ MPI2_VERSION_UNION MPIVersion; /*0x10 */
+ MPI2_VERSION_UNION FWVersion; /*0x14 */
+ MPI2_VERSION_UNION NVDATAVersion; /*0x18 */
+ MPI2_VERSION_UNION PackageVersion; /*0x1C */
+ U16 VendorID; /*0x20 */
+ U16 ProductID; /*0x22 */
+ U16 ProtocolFlags; /*0x24 */
+ U16 Reserved26; /*0x26 */
+ U32 IOCCapabilities; /*0x28 */
+ U32 ImageSize; /*0x2C */
+ U32 NextImageHeaderOffset; /*0x30 */
+ U32 Checksum; /*0x34 */
+ U32 Reserved38; /*0x38 */
+ U32 Reserved3C; /*0x3C */
+ U32 Reserved40; /*0x40 */
+ U32 Reserved44; /*0x44 */
+ U32 Reserved48; /*0x48 */
+ U32 Reserved4C; /*0x4C */
+ U32 Reserved50; /*0x50 */
+ U32 Reserved54; /*0x54 */
+ U32 Reserved58; /*0x58 */
+ U32 Reserved5C; /*0x5C */
+ U32 BootFlags; /*0x60 */
+ U32 FirmwareVersionNameWhat; /*0x64 */
+ U8 FirmwareVersionName[32]; /*0x68 */
+ U32 VendorNameWhat; /*0x88 */
+ U8 VendorName[32]; /*0x8C */
+ U32 PackageNameWhat; /*0x88 */
+ U8 PackageName[32]; /*0x8C */
+ U32 ReservedD0; /*0xD0 */
+ U32 ReservedD4; /*0xD4 */
+ U32 ReservedD8; /*0xD8 */
+ U32 ReservedDC; /*0xDC */
+ U32 ReservedE0; /*0xE0 */
+ U32 ReservedE4; /*0xE4 */
+ U32 ReservedE8; /*0xE8 */
+ U32 ReservedEC; /*0xEC */
+ U32 ReservedF0; /*0xF0 */
+ U32 ReservedF4; /*0xF4 */
+ U32 ReservedF8; /*0xF8 */
+ U32 ReservedFC; /*0xFC */
+} MPI2_FW_IMAGE_HEADER, *PTR_MPI2_FW_IMAGE_HEADER,
+ Mpi2FWImageHeader_t, *pMpi2FWImageHeader_t;
+
+/*Signature field */
+#define MPI2_FW_HEADER_SIGNATURE_OFFSET (0x00)
+#define MPI2_FW_HEADER_SIGNATURE_MASK (0xFF000000)
+#define MPI2_FW_HEADER_SIGNATURE (0xEA000000)
+#define MPI26_FW_HEADER_SIGNATURE (0xEB000000)
+
+/*Signature0 field */
+#define MPI2_FW_HEADER_SIGNATURE0_OFFSET (0x04)
+#define MPI2_FW_HEADER_SIGNATURE0 (0x5AFAA55A)
+/*Last byte is defined by architecture */
+#define MPI26_FW_HEADER_SIGNATURE0_BASE (0x5AEAA500)
+#define MPI26_FW_HEADER_SIGNATURE0_ARC_0 (0x5A)
+#define MPI26_FW_HEADER_SIGNATURE0_ARC_1 (0x00)
+#define MPI26_FW_HEADER_SIGNATURE0_ARC_2 (0x01)
+/*legacy (0x5AEAA55A) */
+#define MPI26_FW_HEADER_SIGNATURE0_ARC_3 (0x02)
+#define MPI26_FW_HEADER_SIGNATURE0 \
+ (MPI26_FW_HEADER_SIGNATURE0_BASE+MPI26_FW_HEADER_SIGNATURE0_ARC_0)
+#define MPI26_FW_HEADER_SIGNATURE0_3516 \
+ (MPI26_FW_HEADER_SIGNATURE0_BASE+MPI26_FW_HEADER_SIGNATURE0_ARC_1)
+#define MPI26_FW_HEADER_SIGNATURE0_4008 \
+ (MPI26_FW_HEADER_SIGNATURE0_BASE+MPI26_FW_HEADER_SIGNATURE0_ARC_3)
+
+/*Signature1 field */
+#define MPI2_FW_HEADER_SIGNATURE1_OFFSET (0x08)
+#define MPI2_FW_HEADER_SIGNATURE1 (0xA55AFAA5)
+#define MPI26_FW_HEADER_SIGNATURE1 (0xA55AEAA5)
+
+/*Signature2 field */
+#define MPI2_FW_HEADER_SIGNATURE2_OFFSET (0x0C)
+#define MPI2_FW_HEADER_SIGNATURE2 (0x5AA55AFA)
+#define MPI26_FW_HEADER_SIGNATURE2 (0x5AA55AEA)
+
+/*defines for using the ProductID field */
+#define MPI2_FW_HEADER_PID_TYPE_MASK (0xF000)
+#define MPI2_FW_HEADER_PID_TYPE_SAS (0x2000)
+
+#define MPI2_FW_HEADER_PID_PROD_MASK (0x0F00)
+#define MPI2_FW_HEADER_PID_PROD_A (0x0000)
+#define MPI2_FW_HEADER_PID_PROD_TARGET_INITIATOR_SCSI (0x0200)
+#define MPI2_FW_HEADER_PID_PROD_IR_SCSI (0x0700)
+
+#define MPI2_FW_HEADER_PID_FAMILY_MASK (0x00FF)
+/*SAS ProductID Family bits */
+#define MPI2_FW_HEADER_PID_FAMILY_2108_SAS (0x0013)
+#define MPI2_FW_HEADER_PID_FAMILY_2208_SAS (0x0014)
+#define MPI25_FW_HEADER_PID_FAMILY_3108_SAS (0x0021)
+#define MPI26_FW_HEADER_PID_FAMILY_3324_SAS (0x0028)
+#define MPI26_FW_HEADER_PID_FAMILY_3516_SAS (0x0031)
+
+/*use MPI2_IOCFACTS_PROTOCOL_ defines for ProtocolFlags field */
+
+/*use MPI2_IOCFACTS_CAPABILITY_ defines for IOCCapabilities field */
+
+#define MPI2_FW_HEADER_IMAGESIZE_OFFSET (0x2C)
+#define MPI2_FW_HEADER_NEXTIMAGE_OFFSET (0x30)
+
+#define MPI26_FW_HEADER_BOOTFLAGS_OFFSET (0x60)
+#define MPI2_FW_HEADER_BOOTFLAGS_ISSI32M_FLAG (0x00000001)
+#define MPI2_FW_HEADER_BOOTFLAGS_W25Q256JW_FLAG (0x00000002)
+/*This image has a auto-discovery version of SPI */
+#define MPI2_FW_HEADER_BOOTFLAGS_AUTO_SPI_FLAG (0x00000004)
+
+
+#define MPI2_FW_HEADER_VERNMHWAT_OFFSET (0x64)
+
+#define MPI2_FW_HEADER_WHAT_SIGNATURE (0x29232840)
+
+#define MPI2_FW_HEADER_SIZE (0x100)
+
+
+/****************************************************************************
+ * Component Image Format and related defines *
+ ****************************************************************************/
+
+/*Maximum number of Hash Exclusion entries in a Component Image Header */
+#define MPI26_COMP_IMG_HDR_NUM_HASH_EXCL (4)
+
+/*Hash Exclusion Format */
+typedef struct _MPI26_HASH_EXCLUSION_FORMAT {
+ U32 Offset; /*0x00 */
+ U32 Size; /*0x04 */
+} MPI26_HASH_EXCLUSION_FORMAT,
+ *PTR_MPI26_HASH_EXCLUSION_FORMAT,
+ Mpi26HashSxclusionFormat_t,
+ *pMpi26HashExclusionFormat_t;
+
+/*FW Image Header */
+typedef struct _MPI26_COMPONENT_IMAGE_HEADER {
+ U32 Signature0; /*0x00 */
+ U32 LoadAddress; /*0x04 */
+ U32 DataSize; /*0x08 */
+ U32 StartAddress; /*0x0C */
+ U32 Signature1; /*0x10 */
+ U32 FlashOffset; /*0x14 */
+ U32 FlashSize; /*0x18 */
+ U32 VersionStringOffset; /*0x1C */
+ U32 BuildDateStringOffset; /*0x20 */
+ U32 BuildTimeStringOffset; /*0x24 */
+ U32 EnvironmentVariableOffset; /*0x28 */
+ U32 ApplicationSpecific; /*0x2C */
+ U32 Signature2; /*0x30 */
+ U32 HeaderSize; /*0x34 */
+ U32 Crc; /*0x38 */
+ U8 NotFlashImage; /*0x3C */
+ U8 Compressed; /*0x3D */
+ U16 Reserved3E; /*0x3E */
+ U32 SecondaryFlashOffset; /*0x40 */
+ U32 Reserved44; /*0x44 */
+ U32 Reserved48; /*0x48 */
+ MPI2_VERSION_UNION RMCInterfaceVersion; /*0x4C */
+ MPI2_VERSION_UNION Reserved50; /*0x50 */
+ MPI2_VERSION_UNION FWVersion; /*0x54 */
+ MPI2_VERSION_UNION NvdataVersion; /*0x58 */
+ MPI26_HASH_EXCLUSION_FORMAT
+ HashExclusion[MPI26_COMP_IMG_HDR_NUM_HASH_EXCL];/*0x5C */
+ U32 NextImageHeaderOffset; /*0x7C */
+ U32 Reserved80[32]; /*0x80 -- 0xFC */
+} MPI26_COMPONENT_IMAGE_HEADER,
+ *PTR_MPI26_COMPONENT_IMAGE_HEADER,
+ Mpi26ComponentImageHeader_t,
+ *pMpi26ComponentImageHeader_t;
+
+
+/**** Definitions for Signature0 field ****/
+#define MPI26_IMAGE_HEADER_SIGNATURE0_MPI26 (0xEB000042)
+
+/**** Definitions for Signature1 field ****/
+#define MPI26_IMAGE_HEADER_SIGNATURE1_APPLICATION (0x20505041)
+#define MPI26_IMAGE_HEADER_SIGNATURE1_CBB (0x20424243)
+#define MPI26_IMAGE_HEADER_SIGNATURE1_MFG (0x2047464D)
+#define MPI26_IMAGE_HEADER_SIGNATURE1_BIOS (0x534F4942)
+#define MPI26_IMAGE_HEADER_SIGNATURE1_HIIM (0x4D494948)
+#define MPI26_IMAGE_HEADER_SIGNATURE1_HIIA (0x41494948)
+#define MPI26_IMAGE_HEADER_SIGNATURE1_CPLD (0x444C5043)
+#define MPI26_IMAGE_HEADER_SIGNATURE1_SPD (0x20445053)
+#define MPI26_IMAGE_HEADER_SIGNATURE1_NVDATA (0x5444564E)
+#define MPI26_IMAGE_HEADER_SIGNATURE1_GAS_GAUGE (0x20534147)
+#define MPI26_IMAGE_HEADER_SIGNATURE1_PBLP (0x50424C50)
+
+/**** Definitions for Signature2 field ****/
+#define MPI26_IMAGE_HEADER_SIGNATURE2_VALUE (0x50584546)
+
+/**** Offsets for Image Header Fields ****/
+#define MPI26_IMAGE_HEADER_SIGNATURE0_OFFSET (0x00)
+#define MPI26_IMAGE_HEADER_LOAD_ADDRESS_OFFSET (0x04)
+#define MPI26_IMAGE_HEADER_DATA_SIZE_OFFSET (0x08)
+#define MPI26_IMAGE_HEADER_START_ADDRESS_OFFSET (0x0C)
+#define MPI26_IMAGE_HEADER_SIGNATURE1_OFFSET (0x10)
+#define MPI26_IMAGE_HEADER_FLASH_OFFSET_OFFSET (0x14)
+#define MPI26_IMAGE_HEADER_FLASH_SIZE_OFFSET (0x18)
+#define MPI26_IMAGE_HEADER_VERSION_STRING_OFFSET_OFFSET (0x1C)
+#define MPI26_IMAGE_HEADER_BUILD_DATE_STRING_OFFSET_OFFSET (0x20)
+#define MPI26_IMAGE_HEADER_BUILD_TIME_OFFSET_OFFSET (0x24)
+#define MPI26_IMAGE_HEADER_ENVIROMENT_VAR_OFFSET_OFFSET (0x28)
+#define MPI26_IMAGE_HEADER_APPLICATION_SPECIFIC_OFFSET (0x2C)
+#define MPI26_IMAGE_HEADER_SIGNATURE2_OFFSET (0x30)
+#define MPI26_IMAGE_HEADER_HEADER_SIZE_OFFSET (0x34)
+#define MPI26_IMAGE_HEADER_CRC_OFFSET (0x38)
+#define MPI26_IMAGE_HEADER_NOT_FLASH_IMAGE_OFFSET (0x3C)
+#define MPI26_IMAGE_HEADER_COMPRESSED_OFFSET (0x3D)
+#define MPI26_IMAGE_HEADER_SECONDARY_FLASH_OFFSET_OFFSET (0x40)
+#define MPI26_IMAGE_HEADER_RMC_INTERFACE_VER_OFFSET (0x4C)
+#define MPI26_IMAGE_HEADER_COMPONENT_IMAGE_VER_OFFSET (0x54)
+#define MPI26_IMAGE_HEADER_HASH_EXCLUSION_OFFSET (0x5C)
+#define MPI26_IMAGE_HEADER_NEXT_IMAGE_HEADER_OFFSET_OFFSET (0x7C)
+
+
+#define MPI26_IMAGE_HEADER_SIZE (0x100)
+
+
+/*Extended Image Header */
+typedef struct _MPI2_EXT_IMAGE_HEADER {
+ U8 ImageType; /*0x00 */
+ U8 Reserved1; /*0x01 */
+ U16 Reserved2; /*0x02 */
+ U32 Checksum; /*0x04 */
+ U32 ImageSize; /*0x08 */
+ U32 NextImageHeaderOffset; /*0x0C */
+ U32 PackageVersion; /*0x10 */
+ U32 Reserved3; /*0x14 */
+ U32 Reserved4; /*0x18 */
+ U32 Reserved5; /*0x1C */
+ U8 IdentifyString[32]; /*0x20 */
+} MPI2_EXT_IMAGE_HEADER, *PTR_MPI2_EXT_IMAGE_HEADER,
+ Mpi2ExtImageHeader_t, *pMpi2ExtImageHeader_t;
+
+/*useful offsets */
+#define MPI2_EXT_IMAGE_IMAGETYPE_OFFSET (0x00)
+#define MPI2_EXT_IMAGE_IMAGESIZE_OFFSET (0x08)
+#define MPI2_EXT_IMAGE_NEXTIMAGE_OFFSET (0x0C)
+#define MPI2_EXT_IMAGE_PACKAGEVERSION_OFFSET (0x10)
+
+#define MPI2_EXT_IMAGE_HEADER_SIZE (0x40)
+
+/*defines for the ImageType field */
+#define MPI2_EXT_IMAGE_TYPE_UNSPECIFIED (0x00)
+#define MPI2_EXT_IMAGE_TYPE_FW (0x01)
+#define MPI2_EXT_IMAGE_TYPE_NVDATA (0x03)
+#define MPI2_EXT_IMAGE_TYPE_BOOTLOADER (0x04)
+#define MPI2_EXT_IMAGE_TYPE_INITIALIZATION (0x05)
+#define MPI2_EXT_IMAGE_TYPE_FLASH_LAYOUT (0x06)
+#define MPI2_EXT_IMAGE_TYPE_SUPPORTED_DEVICES (0x07)
+#define MPI2_EXT_IMAGE_TYPE_MEGARAID (0x08)
+#define MPI2_EXT_IMAGE_TYPE_ENCRYPTED_HASH (0x09)
+#define MPI2_EXT_IMAGE_TYPE_RDE (0x0A)
+#define MPI2_EXT_IMAGE_TYPE_MIN_PRODUCT_SPECIFIC (0x80)
+#define MPI2_EXT_IMAGE_TYPE_MAX_PRODUCT_SPECIFIC (0xFF)
+
+#define MPI2_EXT_IMAGE_TYPE_MAX (MPI2_EXT_IMAGE_TYPE_MAX_PRODUCT_SPECIFIC)
+
+/*FLASH Layout Extended Image Data */
+
+/*
+ *Host code (drivers, BIOS, utilities, etc.) should leave this define set to
+ *one and check RegionsPerLayout at runtime.
+ */
+#ifndef MPI2_FLASH_NUMBER_OF_REGIONS
+#define MPI2_FLASH_NUMBER_OF_REGIONS (1)
+#endif
+
+/*
+ *Host code (drivers, BIOS, utilities, etc.) should leave this define set to
+ *one and check NumberOfLayouts at runtime.
+ */
+#ifndef MPI2_FLASH_NUMBER_OF_LAYOUTS
+#define MPI2_FLASH_NUMBER_OF_LAYOUTS (1)
+#endif
+
+typedef struct _MPI2_FLASH_REGION {
+ U8 RegionType; /*0x00 */
+ U8 Reserved1; /*0x01 */
+ U16 Reserved2; /*0x02 */
+ U32 RegionOffset; /*0x04 */
+ U32 RegionSize; /*0x08 */
+ U32 Reserved3; /*0x0C */
+} MPI2_FLASH_REGION, *PTR_MPI2_FLASH_REGION,
+ Mpi2FlashRegion_t, *pMpi2FlashRegion_t;
+
+typedef struct _MPI2_FLASH_LAYOUT {
+ U32 FlashSize; /*0x00 */
+ U32 Reserved1; /*0x04 */
+ U32 Reserved2; /*0x08 */
+ U32 Reserved3; /*0x0C */
+ MPI2_FLASH_REGION Region[MPI2_FLASH_NUMBER_OF_REGIONS]; /*0x10 */
+} MPI2_FLASH_LAYOUT, *PTR_MPI2_FLASH_LAYOUT,
+ Mpi2FlashLayout_t, *pMpi2FlashLayout_t;
+
+typedef struct _MPI2_FLASH_LAYOUT_DATA {
+ U8 ImageRevision; /*0x00 */
+ U8 Reserved1; /*0x01 */
+ U8 SizeOfRegion; /*0x02 */
+ U8 Reserved2; /*0x03 */
+ U16 NumberOfLayouts; /*0x04 */
+ U16 RegionsPerLayout; /*0x06 */
+ U16 MinimumSectorAlignment; /*0x08 */
+ U16 Reserved3; /*0x0A */
+ U32 Reserved4; /*0x0C */
+ MPI2_FLASH_LAYOUT Layout[MPI2_FLASH_NUMBER_OF_LAYOUTS]; /*0x10 */
+} MPI2_FLASH_LAYOUT_DATA, *PTR_MPI2_FLASH_LAYOUT_DATA,
+ Mpi2FlashLayoutData_t, *pMpi2FlashLayoutData_t;
+
+/*defines for the RegionType field */
+#define MPI2_FLASH_REGION_UNUSED (0x00)
+#define MPI2_FLASH_REGION_FIRMWARE (0x01)
+#define MPI2_FLASH_REGION_BIOS (0x02)
+#define MPI2_FLASH_REGION_NVDATA (0x03)
+#define MPI2_FLASH_REGION_FIRMWARE_BACKUP (0x05)
+#define MPI2_FLASH_REGION_MFG_INFORMATION (0x06)
+#define MPI2_FLASH_REGION_CONFIG_1 (0x07)
+#define MPI2_FLASH_REGION_CONFIG_2 (0x08)
+#define MPI2_FLASH_REGION_MEGARAID (0x09)
+#define MPI2_FLASH_REGION_COMMON_BOOT_BLOCK (0x0A)
+#define MPI2_FLASH_REGION_INIT (MPI2_FLASH_REGION_COMMON_BOOT_BLOCK)
+#define MPI2_FLASH_REGION_CBB_BACKUP (0x0D)
+#define MPI2_FLASH_REGION_SBR (0x0E)
+#define MPI2_FLASH_REGION_SBR_BACKUP (0x0F)
+#define MPI2_FLASH_REGION_HIIM (0x10)
+#define MPI2_FLASH_REGION_HIIA (0x11)
+#define MPI2_FLASH_REGION_CTLR (0x12)
+#define MPI2_FLASH_REGION_IMR_FIRMWARE (0x13)
+#define MPI2_FLASH_REGION_MR_NVDATA (0x14)
+#define MPI2_FLASH_REGION_CPLD (0x15)
+#define MPI2_FLASH_REGION_PSOC (0x16)
+
+/*ImageRevision */
+#define MPI2_FLASH_LAYOUT_IMAGE_REVISION (0x00)
+
+/*Supported Devices Extended Image Data */
+
+/*
+ *Host code (drivers, BIOS, utilities, etc.) should leave this define set to
+ *one and check NumberOfDevices at runtime.
+ */
+#ifndef MPI2_SUPPORTED_DEVICES_IMAGE_NUM_DEVICES
+#define MPI2_SUPPORTED_DEVICES_IMAGE_NUM_DEVICES (1)
+#endif
+
+typedef struct _MPI2_SUPPORTED_DEVICE {
+ U16 DeviceID; /*0x00 */
+ U16 VendorID; /*0x02 */
+ U16 DeviceIDMask; /*0x04 */
+ U16 Reserved1; /*0x06 */
+ U8 LowPCIRev; /*0x08 */
+ U8 HighPCIRev; /*0x09 */
+ U16 Reserved2; /*0x0A */
+ U32 Reserved3; /*0x0C */
+} MPI2_SUPPORTED_DEVICE, *PTR_MPI2_SUPPORTED_DEVICE,
+ Mpi2SupportedDevice_t, *pMpi2SupportedDevice_t;
+
+typedef struct _MPI2_SUPPORTED_DEVICES_DATA {
+ U8 ImageRevision; /*0x00 */
+ U8 Reserved1; /*0x01 */
+ U8 NumberOfDevices; /*0x02 */
+ U8 Reserved2; /*0x03 */
+ U32 Reserved3; /*0x04 */
+ MPI2_SUPPORTED_DEVICE
+ SupportedDevice[MPI2_SUPPORTED_DEVICES_IMAGE_NUM_DEVICES];/*0x08 */
+} MPI2_SUPPORTED_DEVICES_DATA, *PTR_MPI2_SUPPORTED_DEVICES_DATA,
+ Mpi2SupportedDevicesData_t, *pMpi2SupportedDevicesData_t;
+
+/*ImageRevision */
+#define MPI2_SUPPORTED_DEVICES_IMAGE_REVISION (0x00)
+
+/*Init Extended Image Data */
+
+typedef struct _MPI2_INIT_IMAGE_FOOTER {
+ U32 BootFlags; /*0x00 */
+ U32 ImageSize; /*0x04 */
+ U32 Signature0; /*0x08 */
+ U32 Signature1; /*0x0C */
+ U32 Signature2; /*0x10 */
+ U32 ResetVector; /*0x14 */
+} MPI2_INIT_IMAGE_FOOTER, *PTR_MPI2_INIT_IMAGE_FOOTER,
+ Mpi2InitImageFooter_t, *pMpi2InitImageFooter_t;
+
+/*defines for the BootFlags field */
+#define MPI2_INIT_IMAGE_BOOTFLAGS_OFFSET (0x00)
+
+/*defines for the ImageSize field */
+#define MPI2_INIT_IMAGE_IMAGESIZE_OFFSET (0x04)
+
+/*defines for the Signature0 field */
+#define MPI2_INIT_IMAGE_SIGNATURE0_OFFSET (0x08)
+#define MPI2_INIT_IMAGE_SIGNATURE0 (0x5AA55AEA)
+
+/*defines for the Signature1 field */
+#define MPI2_INIT_IMAGE_SIGNATURE1_OFFSET (0x0C)
+#define MPI2_INIT_IMAGE_SIGNATURE1 (0xA55AEAA5)
+
+/*defines for the Signature2 field */
+#define MPI2_INIT_IMAGE_SIGNATURE2_OFFSET (0x10)
+#define MPI2_INIT_IMAGE_SIGNATURE2 (0x5AEAA55A)
+
+/*Signature fields as individual bytes */
+#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_0 (0xEA)
+#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_1 (0x5A)
+#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_2 (0xA5)
+#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_3 (0x5A)
+
+#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_4 (0xA5)
+#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_5 (0xEA)
+#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_6 (0x5A)
+#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_7 (0xA5)
+
+#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_8 (0x5A)
+#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_9 (0xA5)
+#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_A (0xEA)
+#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_B (0x5A)
+
+/*defines for the ResetVector field */
+#define MPI2_INIT_IMAGE_RESETVECTOR_OFFSET (0x14)
+
+
+/* Encrypted Hash Extended Image Data */
+
+typedef struct _MPI25_ENCRYPTED_HASH_ENTRY {
+ U8 HashImageType; /*0x00 */
+ U8 HashAlgorithm; /*0x01 */
+ U8 EncryptionAlgorithm; /*0x02 */
+ U8 Reserved1; /*0x03 */
+ U32 Reserved2; /*0x04 */
+ U32 EncryptedHash[1]; /*0x08 */ /* variable length */
+} MPI25_ENCRYPTED_HASH_ENTRY, *PTR_MPI25_ENCRYPTED_HASH_ENTRY,
+Mpi25EncryptedHashEntry_t, *pMpi25EncryptedHashEntry_t;
+
+/* values for HashImageType */
+#define MPI25_HASH_IMAGE_TYPE_UNUSED (0x00)
+#define MPI25_HASH_IMAGE_TYPE_FIRMWARE (0x01)
+#define MPI25_HASH_IMAGE_TYPE_BIOS (0x02)
+
+#define MPI26_HASH_IMAGE_TYPE_UNUSED (0x00)
+#define MPI26_HASH_IMAGE_TYPE_FIRMWARE (0x01)
+#define MPI26_HASH_IMAGE_TYPE_BIOS (0x02)
+#define MPI26_HASH_IMAGE_TYPE_KEY_HASH (0x03)
+
+/* values for HashAlgorithm */
+#define MPI25_HASH_ALGORITHM_UNUSED (0x00)
+#define MPI25_HASH_ALGORITHM_SHA256 (0x01)
+
+#define MPI26_HASH_ALGORITHM_VERSION_MASK (0xE0)
+#define MPI26_HASH_ALGORITHM_VERSION_NONE (0x00)
+#define MPI26_HASH_ALGORITHM_VERSION_SHA1 (0x20)
+#define MPI26_HASH_ALGORITHM_VERSION_SHA2 (0x40)
+#define MPI26_HASH_ALGORITHM_VERSION_SHA3 (0x60)
+#define MPI26_HASH_ALGORITHM_SIZE_MASK (0x1F)
+#define MPI26_HASH_ALGORITHM_SIZE_256 (0x01)
+#define MPI26_HASH_ALGORITHM_SIZE_512 (0x02)
+
+
+/* values for EncryptionAlgorithm */
+#define MPI25_ENCRYPTION_ALG_UNUSED (0x00)
+#define MPI25_ENCRYPTION_ALG_RSA256 (0x01)
+
+#define MPI26_ENCRYPTION_ALG_UNUSED (0x00)
+#define MPI26_ENCRYPTION_ALG_RSA256 (0x01)
+#define MPI26_ENCRYPTION_ALG_RSA512 (0x02)
+#define MPI26_ENCRYPTION_ALG_RSA1024 (0x03)
+#define MPI26_ENCRYPTION_ALG_RSA2048 (0x04)
+#define MPI26_ENCRYPTION_ALG_RSA4096 (0x05)
+
+typedef struct _MPI25_ENCRYPTED_HASH_DATA {
+ U8 ImageVersion; /*0x00 */
+ U8 NumHash; /*0x01 */
+ U16 Reserved1; /*0x02 */
+ U32 Reserved2; /*0x04 */
+ MPI25_ENCRYPTED_HASH_ENTRY EncryptedHashEntry[1]; /*0x08 */
+} MPI25_ENCRYPTED_HASH_DATA, *PTR_MPI25_ENCRYPTED_HASH_DATA,
+Mpi25EncryptedHashData_t, *pMpi25EncryptedHashData_t;
+
+
+#endif /* MPI2_IMAGE_H */
diff --git a/drivers/scsi/mpt3sas/mpi/mpi2_init.h b/drivers/scsi/mpt3sas/mpi/mpi2_init.h
index 6213ce6791ac..8f1b903fe0a9 100644
--- a/drivers/scsi/mpt3sas/mpi/mpi2_init.h
+++ b/drivers/scsi/mpt3sas/mpi/mpi2_init.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
- * Copyright 2000-2015 Avago Technologies. All rights reserved.
+ * Copyright 2000-2020 Broadcom Inc. All rights reserved.
*
*
* Name: mpi2_init.h
diff --git a/drivers/scsi/mpt3sas/mpi/mpi2_ioc.h b/drivers/scsi/mpt3sas/mpi/mpi2_ioc.h
index 1faec3a93e69..68ea408cd5c5 100644
--- a/drivers/scsi/mpt3sas/mpi/mpi2_ioc.h
+++ b/drivers/scsi/mpt3sas/mpi/mpi2_ioc.h
@@ -1,13 +1,13 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
- * Copyright 2000-2015 Avago Technologies. All rights reserved.
+ * Copyright 2000-2020 Broadcom Inc. All rights reserved.
*
*
* Name: mpi2_ioc.h
* Title: MPI IOC, Port, Event, FW Download, and FW Upload messages
* Creation Date: October 11, 2006
*
- * mpi2_ioc.h Version: 02.00.34
+ * mpi2_ioc.h Version: 02.00.37
*
* NOTE: Names (typedefs, defines, etc.) beginning with an MPI25 or Mpi25
* prefix are for use only on MPI v2.5 products, and must not be used
@@ -171,6 +171,10 @@
* 09-29-17 02.00.34 Added MPI26_EVENT_PCIDEV_STAT_RC_PCIE_HOT_RESET_FAILED
* to the ReasonCode field in PCIe Device Status Change
* Event Data.
+ * 07-22-18 02.00.35 Added FW_DOWNLOAD_ITYPE_CPLD and _PSOC.
+ * Moved FW image definitions ionto new mpi2_image,h
+ * 08-14-18 02.00.36 Fixed definition of MPI2_FW_DOWNLOAD_ITYPE_PSOC (0x16)
+ * 09-07-18 02.00.37 Added MPI26_EVENT_PCIE_TOPO_PI_16_LANES
* --------------------------------------------------------------------------
*/
@@ -1255,6 +1259,7 @@ typedef struct _MPI26_EVENT_PCIE_TOPO_PORT_ENTRY {
#define MPI26_EVENT_PCIE_TOPO_PI_2_LANES (0x20)
#define MPI26_EVENT_PCIE_TOPO_PI_4_LANES (0x30)
#define MPI26_EVENT_PCIE_TOPO_PI_8_LANES (0x40)
+#define MPI26_EVENT_PCIE_TOPO_PI_16_LANES (0x50)
#define MPI26_EVENT_PCIE_TOPO_PI_RATE_MASK (0x0F)
#define MPI26_EVENT_PCIE_TOPO_PI_RATE_UNKNOWN (0x00)
@@ -1450,7 +1455,11 @@ typedef struct _MPI2_FW_DOWNLOAD_REQUEST {
#define MPI2_FW_DOWNLOAD_ITYPE_CTLR (0x12)
#define MPI2_FW_DOWNLOAD_ITYPE_IMR_FIRMWARE (0x13)
#define MPI2_FW_DOWNLOAD_ITYPE_MR_NVDATA (0x14)
+/*MPI v2.6 and newer */
+#define MPI2_FW_DOWNLOAD_ITYPE_CPLD (0x15)
+#define MPI2_FW_DOWNLOAD_ITYPE_PSOC (0x16)
#define MPI2_FW_DOWNLOAD_ITYPE_MIN_PRODUCT_SPECIFIC (0xF0)
+#define MPI2_FW_DOWNLOAD_ITYPE_TERMINATE (0xFF)
/*MPI v2.0 FWDownload TransactionContext Element */
typedef struct _MPI2_FW_DOWNLOAD_TCSGE {
@@ -1597,352 +1606,6 @@ typedef struct _MPI2_FW_UPLOAD_REPLY {
} MPI2_FW_UPLOAD_REPLY, *PTR_MPI2_FW_UPLOAD_REPLY,
Mpi2FWUploadReply_t, *pMPi2FWUploadReply_t;
-/*FW Image Header */
-typedef struct _MPI2_FW_IMAGE_HEADER {
- U32 Signature; /*0x00 */
- U32 Signature0; /*0x04 */
- U32 Signature1; /*0x08 */
- U32 Signature2; /*0x0C */
- MPI2_VERSION_UNION MPIVersion; /*0x10 */
- MPI2_VERSION_UNION FWVersion; /*0x14 */
- MPI2_VERSION_UNION NVDATAVersion; /*0x18 */
- MPI2_VERSION_UNION PackageVersion; /*0x1C */
- U16 VendorID; /*0x20 */
- U16 ProductID; /*0x22 */
- U16 ProtocolFlags; /*0x24 */
- U16 Reserved26; /*0x26 */
- U32 IOCCapabilities; /*0x28 */
- U32 ImageSize; /*0x2C */
- U32 NextImageHeaderOffset; /*0x30 */
- U32 Checksum; /*0x34 */
- U32 Reserved38; /*0x38 */
- U32 Reserved3C; /*0x3C */
- U32 Reserved40; /*0x40 */
- U32 Reserved44; /*0x44 */
- U32 Reserved48; /*0x48 */
- U32 Reserved4C; /*0x4C */
- U32 Reserved50; /*0x50 */
- U32 Reserved54; /*0x54 */
- U32 Reserved58; /*0x58 */
- U32 Reserved5C; /*0x5C */
- U32 BootFlags; /*0x60 */
- U32 FirmwareVersionNameWhat; /*0x64 */
- U8 FirmwareVersionName[32]; /*0x68 */
- U32 VendorNameWhat; /*0x88 */
- U8 VendorName[32]; /*0x8C */
- U32 PackageNameWhat; /*0x88 */
- U8 PackageName[32]; /*0x8C */
- U32 ReservedD0; /*0xD0 */
- U32 ReservedD4; /*0xD4 */
- U32 ReservedD8; /*0xD8 */
- U32 ReservedDC; /*0xDC */
- U32 ReservedE0; /*0xE0 */
- U32 ReservedE4; /*0xE4 */
- U32 ReservedE8; /*0xE8 */
- U32 ReservedEC; /*0xEC */
- U32 ReservedF0; /*0xF0 */
- U32 ReservedF4; /*0xF4 */
- U32 ReservedF8; /*0xF8 */
- U32 ReservedFC; /*0xFC */
-} MPI2_FW_IMAGE_HEADER, *PTR_MPI2_FW_IMAGE_HEADER,
- Mpi2FWImageHeader_t, *pMpi2FWImageHeader_t;
-
-/*Signature field */
-#define MPI2_FW_HEADER_SIGNATURE_OFFSET (0x00)
-#define MPI2_FW_HEADER_SIGNATURE_MASK (0xFF000000)
-#define MPI2_FW_HEADER_SIGNATURE (0xEA000000)
-#define MPI26_FW_HEADER_SIGNATURE (0xEB000000)
-
-/*Signature0 field */
-#define MPI2_FW_HEADER_SIGNATURE0_OFFSET (0x04)
-#define MPI2_FW_HEADER_SIGNATURE0 (0x5AFAA55A)
-/* Last byte is defined by architecture */
-#define MPI26_FW_HEADER_SIGNATURE0_BASE (0x5AEAA500)
-#define MPI26_FW_HEADER_SIGNATURE0_ARC_0 (0x5A)
-#define MPI26_FW_HEADER_SIGNATURE0_ARC_1 (0x00)
-#define MPI26_FW_HEADER_SIGNATURE0_ARC_2 (0x01)
-/* legacy (0x5AEAA55A) */
-#define MPI26_FW_HEADER_SIGNATURE0_ARC_3 (0x02)
-#define MPI26_FW_HEADER_SIGNATURE0 \
- (MPI26_FW_HEADER_SIGNATURE0_BASE+MPI26_FW_HEADER_SIGNATURE0_ARC_0)
-#define MPI26_FW_HEADER_SIGNATURE0_3516 \
- (MPI26_FW_HEADER_SIGNATURE0_BASE+MPI26_FW_HEADER_SIGNATURE0_ARC_1)
-#define MPI26_FW_HEADER_SIGNATURE0_4008 \
- (MPI26_FW_HEADER_SIGNATURE0_BASE+MPI26_FW_HEADER_SIGNATURE0_ARC_3)
-
-/*Signature1 field */
-#define MPI2_FW_HEADER_SIGNATURE1_OFFSET (0x08)
-#define MPI2_FW_HEADER_SIGNATURE1 (0xA55AFAA5)
-#define MPI26_FW_HEADER_SIGNATURE1 (0xA55AEAA5)
-
-/*Signature2 field */
-#define MPI2_FW_HEADER_SIGNATURE2_OFFSET (0x0C)
-#define MPI2_FW_HEADER_SIGNATURE2 (0x5AA55AFA)
-#define MPI26_FW_HEADER_SIGNATURE2 (0x5AA55AEA)
-
-/*defines for using the ProductID field */
-#define MPI2_FW_HEADER_PID_TYPE_MASK (0xF000)
-#define MPI2_FW_HEADER_PID_TYPE_SAS (0x2000)
-
-#define MPI2_FW_HEADER_PID_PROD_MASK (0x0F00)
-#define MPI2_FW_HEADER_PID_PROD_A (0x0000)
-#define MPI2_FW_HEADER_PID_PROD_TARGET_INITIATOR_SCSI (0x0200)
-#define MPI2_FW_HEADER_PID_PROD_IR_SCSI (0x0700)
-
-#define MPI2_FW_HEADER_PID_FAMILY_MASK (0x00FF)
-/*SAS ProductID Family bits */
-#define MPI2_FW_HEADER_PID_FAMILY_2108_SAS (0x0013)
-#define MPI2_FW_HEADER_PID_FAMILY_2208_SAS (0x0014)
-#define MPI25_FW_HEADER_PID_FAMILY_3108_SAS (0x0021)
-#define MPI26_FW_HEADER_PID_FAMILY_3324_SAS (0x0028)
-#define MPI26_FW_HEADER_PID_FAMILY_3516_SAS (0x0031)
-
-/*use MPI2_IOCFACTS_PROTOCOL_ defines for ProtocolFlags field */
-
-/*use MPI2_IOCFACTS_CAPABILITY_ defines for IOCCapabilities field */
-
-#define MPI2_FW_HEADER_IMAGESIZE_OFFSET (0x2C)
-#define MPI2_FW_HEADER_NEXTIMAGE_OFFSET (0x30)
-#define MPI26_FW_HEADER_BOOTFLAGS_OFFSET (0x60)
-#define MPI2_FW_HEADER_VERNMHWAT_OFFSET (0x64)
-
-#define MPI2_FW_HEADER_WHAT_SIGNATURE (0x29232840)
-
-#define MPI2_FW_HEADER_SIZE (0x100)
-
-/*Extended Image Header */
-typedef struct _MPI2_EXT_IMAGE_HEADER {
- U8 ImageType; /*0x00 */
- U8 Reserved1; /*0x01 */
- U16 Reserved2; /*0x02 */
- U32 Checksum; /*0x04 */
- U32 ImageSize; /*0x08 */
- U32 NextImageHeaderOffset; /*0x0C */
- U32 PackageVersion; /*0x10 */
- U32 Reserved3; /*0x14 */
- U32 Reserved4; /*0x18 */
- U32 Reserved5; /*0x1C */
- U8 IdentifyString[32]; /*0x20 */
-} MPI2_EXT_IMAGE_HEADER, *PTR_MPI2_EXT_IMAGE_HEADER,
- Mpi2ExtImageHeader_t, *pMpi2ExtImageHeader_t;
-
-/*useful offsets */
-#define MPI2_EXT_IMAGE_IMAGETYPE_OFFSET (0x00)
-#define MPI2_EXT_IMAGE_IMAGESIZE_OFFSET (0x08)
-#define MPI2_EXT_IMAGE_NEXTIMAGE_OFFSET (0x0C)
-
-#define MPI2_EXT_IMAGE_HEADER_SIZE (0x40)
-
-/*defines for the ImageType field */
-#define MPI2_EXT_IMAGE_TYPE_UNSPECIFIED (0x00)
-#define MPI2_EXT_IMAGE_TYPE_FW (0x01)
-#define MPI2_EXT_IMAGE_TYPE_NVDATA (0x03)
-#define MPI2_EXT_IMAGE_TYPE_BOOTLOADER (0x04)
-#define MPI2_EXT_IMAGE_TYPE_INITIALIZATION (0x05)
-#define MPI2_EXT_IMAGE_TYPE_FLASH_LAYOUT (0x06)
-#define MPI2_EXT_IMAGE_TYPE_SUPPORTED_DEVICES (0x07)
-#define MPI2_EXT_IMAGE_TYPE_MEGARAID (0x08)
-#define MPI2_EXT_IMAGE_TYPE_ENCRYPTED_HASH (0x09)
-#define MPI2_EXT_IMAGE_TYPE_MIN_PRODUCT_SPECIFIC (0x80)
-#define MPI2_EXT_IMAGE_TYPE_MAX_PRODUCT_SPECIFIC (0xFF)
-
-#define MPI2_EXT_IMAGE_TYPE_MAX (MPI2_EXT_IMAGE_TYPE_MAX_PRODUCT_SPECIFIC)
-
-/*FLASH Layout Extended Image Data */
-
-/*
- *Host code (drivers, BIOS, utilities, etc.) should leave this define set to
- *one and check RegionsPerLayout at runtime.
- */
-#ifndef MPI2_FLASH_NUMBER_OF_REGIONS
-#define MPI2_FLASH_NUMBER_OF_REGIONS (1)
-#endif
-
-/*
- *Host code (drivers, BIOS, utilities, etc.) should leave this define set to
- *one and check NumberOfLayouts at runtime.
- */
-#ifndef MPI2_FLASH_NUMBER_OF_LAYOUTS
-#define MPI2_FLASH_NUMBER_OF_LAYOUTS (1)
-#endif
-
-typedef struct _MPI2_FLASH_REGION {
- U8 RegionType; /*0x00 */
- U8 Reserved1; /*0x01 */
- U16 Reserved2; /*0x02 */
- U32 RegionOffset; /*0x04 */
- U32 RegionSize; /*0x08 */
- U32 Reserved3; /*0x0C */
-} MPI2_FLASH_REGION, *PTR_MPI2_FLASH_REGION,
- Mpi2FlashRegion_t, *pMpi2FlashRegion_t;
-
-typedef struct _MPI2_FLASH_LAYOUT {
- U32 FlashSize; /*0x00 */
- U32 Reserved1; /*0x04 */
- U32 Reserved2; /*0x08 */
- U32 Reserved3; /*0x0C */
- MPI2_FLASH_REGION Region[MPI2_FLASH_NUMBER_OF_REGIONS]; /*0x10 */
-} MPI2_FLASH_LAYOUT, *PTR_MPI2_FLASH_LAYOUT,
- Mpi2FlashLayout_t, *pMpi2FlashLayout_t;
-
-typedef struct _MPI2_FLASH_LAYOUT_DATA {
- U8 ImageRevision; /*0x00 */
- U8 Reserved1; /*0x01 */
- U8 SizeOfRegion; /*0x02 */
- U8 Reserved2; /*0x03 */
- U16 NumberOfLayouts; /*0x04 */
- U16 RegionsPerLayout; /*0x06 */
- U16 MinimumSectorAlignment; /*0x08 */
- U16 Reserved3; /*0x0A */
- U32 Reserved4; /*0x0C */
- MPI2_FLASH_LAYOUT Layout[MPI2_FLASH_NUMBER_OF_LAYOUTS]; /*0x10 */
-} MPI2_FLASH_LAYOUT_DATA, *PTR_MPI2_FLASH_LAYOUT_DATA,
- Mpi2FlashLayoutData_t, *pMpi2FlashLayoutData_t;
-
-/*defines for the RegionType field */
-#define MPI2_FLASH_REGION_UNUSED (0x00)
-#define MPI2_FLASH_REGION_FIRMWARE (0x01)
-#define MPI2_FLASH_REGION_BIOS (0x02)
-#define MPI2_FLASH_REGION_NVDATA (0x03)
-#define MPI2_FLASH_REGION_FIRMWARE_BACKUP (0x05)
-#define MPI2_FLASH_REGION_MFG_INFORMATION (0x06)
-#define MPI2_FLASH_REGION_CONFIG_1 (0x07)
-#define MPI2_FLASH_REGION_CONFIG_2 (0x08)
-#define MPI2_FLASH_REGION_MEGARAID (0x09)
-#define MPI2_FLASH_REGION_COMMON_BOOT_BLOCK (0x0A)
-#define MPI2_FLASH_REGION_INIT (MPI2_FLASH_REGION_COMMON_BOOT_BLOCK)
-#define MPI2_FLASH_REGION_CBB_BACKUP (0x0D)
-#define MPI2_FLASH_REGION_SBR (0x0E)
-#define MPI2_FLASH_REGION_SBR_BACKUP (0x0F)
-#define MPI2_FLASH_REGION_HIIM (0x10)
-#define MPI2_FLASH_REGION_HIIA (0x11)
-#define MPI2_FLASH_REGION_CTLR (0x12)
-#define MPI2_FLASH_REGION_IMR_FIRMWARE (0x13)
-#define MPI2_FLASH_REGION_MR_NVDATA (0x14)
-
-/*ImageRevision */
-#define MPI2_FLASH_LAYOUT_IMAGE_REVISION (0x00)
-
-/*Supported Devices Extended Image Data */
-
-/*
- *Host code (drivers, BIOS, utilities, etc.) should leave this define set to
- *one and check NumberOfDevices at runtime.
- */
-#ifndef MPI2_SUPPORTED_DEVICES_IMAGE_NUM_DEVICES
-#define MPI2_SUPPORTED_DEVICES_IMAGE_NUM_DEVICES (1)
-#endif
-
-typedef struct _MPI2_SUPPORTED_DEVICE {
- U16 DeviceID; /*0x00 */
- U16 VendorID; /*0x02 */
- U16 DeviceIDMask; /*0x04 */
- U16 Reserved1; /*0x06 */
- U8 LowPCIRev; /*0x08 */
- U8 HighPCIRev; /*0x09 */
- U16 Reserved2; /*0x0A */
- U32 Reserved3; /*0x0C */
-} MPI2_SUPPORTED_DEVICE, *PTR_MPI2_SUPPORTED_DEVICE,
- Mpi2SupportedDevice_t, *pMpi2SupportedDevice_t;
-
-typedef struct _MPI2_SUPPORTED_DEVICES_DATA {
- U8 ImageRevision; /*0x00 */
- U8 Reserved1; /*0x01 */
- U8 NumberOfDevices; /*0x02 */
- U8 Reserved2; /*0x03 */
- U32 Reserved3; /*0x04 */
- MPI2_SUPPORTED_DEVICE
- SupportedDevice[MPI2_SUPPORTED_DEVICES_IMAGE_NUM_DEVICES];/*0x08 */
-} MPI2_SUPPORTED_DEVICES_DATA, *PTR_MPI2_SUPPORTED_DEVICES_DATA,
- Mpi2SupportedDevicesData_t, *pMpi2SupportedDevicesData_t;
-
-/*ImageRevision */
-#define MPI2_SUPPORTED_DEVICES_IMAGE_REVISION (0x00)
-
-/*Init Extended Image Data */
-
-typedef struct _MPI2_INIT_IMAGE_FOOTER {
- U32 BootFlags; /*0x00 */
- U32 ImageSize; /*0x04 */
- U32 Signature0; /*0x08 */
- U32 Signature1; /*0x0C */
- U32 Signature2; /*0x10 */
- U32 ResetVector; /*0x14 */
-} MPI2_INIT_IMAGE_FOOTER, *PTR_MPI2_INIT_IMAGE_FOOTER,
- Mpi2InitImageFooter_t, *pMpi2InitImageFooter_t;
-
-/*defines for the BootFlags field */
-#define MPI2_INIT_IMAGE_BOOTFLAGS_OFFSET (0x00)
-
-/*defines for the ImageSize field */
-#define MPI2_INIT_IMAGE_IMAGESIZE_OFFSET (0x04)
-
-/*defines for the Signature0 field */
-#define MPI2_INIT_IMAGE_SIGNATURE0_OFFSET (0x08)
-#define MPI2_INIT_IMAGE_SIGNATURE0 (0x5AA55AEA)
-
-/*defines for the Signature1 field */
-#define MPI2_INIT_IMAGE_SIGNATURE1_OFFSET (0x0C)
-#define MPI2_INIT_IMAGE_SIGNATURE1 (0xA55AEAA5)
-
-/*defines for the Signature2 field */
-#define MPI2_INIT_IMAGE_SIGNATURE2_OFFSET (0x10)
-#define MPI2_INIT_IMAGE_SIGNATURE2 (0x5AEAA55A)
-
-/*Signature fields as individual bytes */
-#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_0 (0xEA)
-#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_1 (0x5A)
-#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_2 (0xA5)
-#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_3 (0x5A)
-
-#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_4 (0xA5)
-#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_5 (0xEA)
-#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_6 (0x5A)
-#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_7 (0xA5)
-
-#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_8 (0x5A)
-#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_9 (0xA5)
-#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_A (0xEA)
-#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_B (0x5A)
-
-/*defines for the ResetVector field */
-#define MPI2_INIT_IMAGE_RESETVECTOR_OFFSET (0x14)
-
-
-/* Encrypted Hash Extended Image Data */
-
-typedef struct _MPI25_ENCRYPTED_HASH_ENTRY {
- U8 HashImageType; /* 0x00 */
- U8 HashAlgorithm; /* 0x01 */
- U8 EncryptionAlgorithm; /* 0x02 */
- U8 Reserved1; /* 0x03 */
- U32 Reserved2; /* 0x04 */
- U32 EncryptedHash[1]; /* 0x08 */ /* variable length */
-} MPI25_ENCRYPTED_HASH_ENTRY, *PTR_MPI25_ENCRYPTED_HASH_ENTRY,
-Mpi25EncryptedHashEntry_t, *pMpi25EncryptedHashEntry_t;
-
-/* values for HashImageType */
-#define MPI25_HASH_IMAGE_TYPE_UNUSED (0x00)
-#define MPI25_HASH_IMAGE_TYPE_FIRMWARE (0x01)
-#define MPI25_HASH_IMAGE_TYPE_BIOS (0x02)
-
-/* values for HashAlgorithm */
-#define MPI25_HASH_ALGORITHM_UNUSED (0x00)
-#define MPI25_HASH_ALGORITHM_SHA256 (0x01)
-
-/* values for EncryptionAlgorithm */
-#define MPI25_ENCRYPTION_ALG_UNUSED (0x00)
-#define MPI25_ENCRYPTION_ALG_RSA256 (0x01)
-
-typedef struct _MPI25_ENCRYPTED_HASH_DATA {
- U8 ImageVersion; /* 0x00 */
- U8 NumHash; /* 0x01 */
- U16 Reserved1; /* 0x02 */
- U32 Reserved2; /* 0x04 */
- MPI25_ENCRYPTED_HASH_ENTRY EncryptedHashEntry[1]; /* 0x08 */
-} MPI25_ENCRYPTED_HASH_DATA, *PTR_MPI25_ENCRYPTED_HASH_DATA,
-Mpi25EncryptedHashData_t, *pMpi25EncryptedHashData_t;
-
/****************************************************************************
* PowerManagementControl message
diff --git a/drivers/scsi/mpt3sas/mpi/mpi2_pci.h b/drivers/scsi/mpt3sas/mpi/mpi2_pci.h
index f0281f943ec9..63a09509d7d1 100644
--- a/drivers/scsi/mpt3sas/mpi/mpi2_pci.h
+++ b/drivers/scsi/mpt3sas/mpi/mpi2_pci.h
@@ -1,12 +1,12 @@
/*
- * Copyright 2012-2015 Avago Technologies. All rights reserved.
+ * Copyright 2000-2020 Broadcom Inc. All rights reserved.
*
*
* Name: mpi2_pci.h
* Title: MPI PCIe Attached Devices structures and definitions.
* Creation Date: October 9, 2012
*
- * mpi2_pci.h Version: 02.00.02
+ * mpi2_pci.h Version: 02.00.03
*
* NOTE: Names (typedefs, defines, etc.) beginning with an MPI25 or Mpi25
* prefix are for use only on MPI v2.5 products, and must not be used
@@ -23,6 +23,7 @@
* Removed SOP support.
* 07-01-16 02.00.02 Added MPI26_NVME_FLAGS_FORCE_ADMIN_ERR_RESP to
* NVME Encapsulated Request.
+ * 07-22-18 02.00.03 Updted flags field for NVME Encapsulated req
* --------------------------------------------------------------------------
*/
@@ -75,10 +76,10 @@ typedef struct _MPI26_NVME_ENCAPSULATED_REQUEST {
#define MPI26_NVME_FLAGS_SUBMISSIONQ_ADMIN (0x0010)
/*Error Response Address Space */
#define MPI26_NVME_FLAGS_MASK_ERROR_RSP_ADDR (0x000C)
+#define MPI26_NVME_FLAGS_MASK_ERROR_RSP_ADDR_MASK (0x000C)
#define MPI26_NVME_FLAGS_SYSTEM_RSP_ADDR (0x0000)
-#define MPI26_NVME_FLAGS_IOCPLB_RSP_ADDR (0x0008)
-#define MPI26_NVME_FLAGS_IOCPLBNTA_RSP_ADDR (0x000C)
-/*Data Direction*/
+#define MPI26_NVME_FLAGS_IOCCTL_RSP_ADDR (0x0008)
+/* Data Direction*/
#define MPI26_NVME_FLAGS_DATADIRECTION_MASK (0x0003)
#define MPI26_NVME_FLAGS_NODATATRANSFER (0x0000)
#define MPI26_NVME_FLAGS_WRITE (0x0001)
diff --git a/drivers/scsi/mpt3sas/mpi/mpi2_raid.h b/drivers/scsi/mpt3sas/mpi/mpi2_raid.h
index b9bb1c178f12..b770eb516c14 100644
--- a/drivers/scsi/mpt3sas/mpi/mpi2_raid.h
+++ b/drivers/scsi/mpt3sas/mpi/mpi2_raid.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
- * Copyright 2000-2014 Avago Technologies. All rights reserved.
+ * Copyright 2000-2020 Broadcom Inc. All rights reserved.
*
*
* Name: mpi2_raid.h
diff --git a/drivers/scsi/mpt3sas/mpi/mpi2_sas.h b/drivers/scsi/mpt3sas/mpi/mpi2_sas.h
index afa17ff246b4..16c922a8a02b 100644
--- a/drivers/scsi/mpt3sas/mpi/mpi2_sas.h
+++ b/drivers/scsi/mpt3sas/mpi/mpi2_sas.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
- * Copyright 2000-2015 Avago Technologies. All rights reserved.
+ * Copyright 2000-2020 Broadcom Inc. All rights reserved.
*
*
* Name: mpi2_sas.h
diff --git a/drivers/scsi/mpt3sas/mpi/mpi2_tool.h b/drivers/scsi/mpt3sas/mpi/mpi2_tool.h
index 629296ee9236..3f966b6796b3 100644
--- a/drivers/scsi/mpt3sas/mpi/mpi2_tool.h
+++ b/drivers/scsi/mpt3sas/mpi/mpi2_tool.h
@@ -1,13 +1,13 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
- * Copyright 2000-2014 Avago Technologies. All rights reserved.
+ * Copyright 2000-2020 Broadcom Inc. All rights reserved.
*
*
* Name: mpi2_tool.h
* Title: MPI diagnostic tool structures and definitions
* Creation Date: March 26, 2007
*
- * mpi2_tool.h Version: 02.00.14
+ * mpi2_tool.h Version: 02.00.15
*
* Version History
* ---------------
@@ -38,6 +38,8 @@
* 11-18-14 02.00.13 Updated copyright information.
* 08-25-16 02.00.14 Added new values for the Flags field of Toolbox Clean
* Tool Request Message.
+ * 07-22-18 02.00.15 Added defines for new TOOLBOX_PCIE_LANE_MARGINING tool.
+ * Added option for DeviceInfo field in ISTWI tool.
* --------------------------------------------------------------------------
*/
@@ -58,6 +60,7 @@
#define MPI2_TOOLBOX_BEACON_TOOL (0x05)
#define MPI2_TOOLBOX_DIAGNOSTIC_CLI_TOOL (0x06)
#define MPI2_TOOLBOX_TEXT_DISPLAY_TOOL (0x07)
+#define MPI26_TOOLBOX_BACKEND_PCIE_LANE_MARGIN (0x08)
/****************************************************************************
* Toolbox reply
@@ -226,6 +229,13 @@ typedef struct _MPI2_TOOLBOX_ISTWI_READ_WRITE_REQUEST {
#define MPI2_TOOL_ISTWI_FLAG_AUTO_RESERVE_RELEASE (0x80)
#define MPI2_TOOL_ISTWI_FLAG_PAGE_ADDR_MASK (0x07)
+/*MPI26 TOOLBOX Request MsgFlags defines */
+#define MPI26_TOOLBOX_REQ_MSGFLAGS_ADDRESSING_MASK (0x01)
+/*Request uses Man Page 43 device index addressing */
+#define MPI26_TOOLBOX_REQ_MSGFLAGS_ADDRESSING_DEVINDEX (0x00)
+/*Request uses Man Page 43 device info struct addressing */
+#define MPI26_TOOLBOX_REQ_MSGFLAGS_ADDRESSING_DEVINFO (0x01)
+
/*Toolbox ISTWI Read Write Tool reply message */
typedef struct _MPI2_TOOLBOX_ISTWI_REPLY {
U8 Tool; /*0x00 */
@@ -387,6 +397,64 @@ Mpi2ToolboxTextDisplayRequest_t,
#define MPI2_TOOLBOX_CONSOLE_FLAG_TIMESTAMP (0x01)
+/***************************************************************************
+ * Toolbox Backend Lane Margining Tool
+ ***************************************************************************
+ */
+
+/*Toolbox Backend Lane Margining Tool request message */
+typedef struct _MPI26_TOOLBOX_LANE_MARGINING_REQUEST {
+ U8 Tool; /*0x00 */
+ U8 Reserved1; /*0x01 */
+ U8 ChainOffset; /*0x02 */
+ U8 Function; /*0x03 */
+ U16 Reserved2; /*0x04 */
+ U8 Reserved3; /*0x06 */
+ U8 MsgFlags; /*0x07 */
+ U8 VP_ID; /*0x08 */
+ U8 VF_ID; /*0x09 */
+ U16 Reserved4; /*0x0A */
+ U8 Command; /*0x0C */
+ U8 SwitchPort; /*0x0D */
+ U16 DevHandle; /*0x0E */
+ U8 RegisterOffset; /*0x10 */
+ U8 Reserved5; /*0x11 */
+ U16 DataLength; /*0x12 */
+ MPI25_SGE_IO_UNION SGL; /*0x14 */
+} MPI26_TOOLBOX_LANE_MARGINING_REQUEST,
+ *PTR_MPI2_TOOLBOX_LANE_MARGINING_REQUEST,
+ Mpi26ToolboxLaneMarginingRequest_t,
+ *pMpi2ToolboxLaneMarginingRequest_t;
+
+/* defines for the Command field */
+#define MPI26_TOOL_MARGIN_COMMAND_ENTER_MARGIN_MODE (0x01)
+#define MPI26_TOOL_MARGIN_COMMAND_READ_REGISTER_DATA (0x02)
+#define MPI26_TOOL_MARGIN_COMMAND_WRITE_REGISTER_DATA (0x03)
+#define MPI26_TOOL_MARGIN_COMMAND_EXIT_MARGIN_MODE (0x04)
+
+
+/*Toolbox Backend Lane Margining Tool reply message */
+typedef struct _MPI26_TOOLBOX_LANE_MARGINING_REPLY {
+ U8 Tool; /*0x00 */
+ U8 Reserved1; /*0x01 */
+ U8 MsgLength; /*0x02 */
+ U8 Function; /*0x03 */
+ U16 Reserved2; /*0x04 */
+ U8 Reserved3; /*0x06 */
+ U8 MsgFlags; /*0x07 */
+ U8 VP_ID; /*0x08 */
+ U8 VF_ID; /*0x09 */
+ U16 Reserved4; /*0x0A */
+ U16 Reserved5; /*0x0C */
+ U16 IOCStatus; /*0x0E */
+ U32 IOCLogInfo; /*0x10 */
+ U16 ReturnedDataLength; /*0x14 */
+ U16 Reserved6; /*0x16 */
+} MPI26_TOOLBOX_LANE_MARGINING_REPLY,
+ *PTR_MPI26_TOOLBOX_LANE_MARGINING_REPLY,
+ Mpi26ToolboxLaneMarginingReply_t,
+ *pMpi26ToolboxLaneMarginingReply_t;
+
/*****************************************************************************
*
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c
index 2500377d0723..0a6cb8f0680c 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.c
@@ -157,6 +157,32 @@ module_param_call(mpt3sas_fwfault_debug, _scsih_set_fwfault_debug,
param_get_int, &mpt3sas_fwfault_debug, 0644);
/**
+ * _base_readl_aero - retry readl for max three times.
+ * @addr - MPT Fusion system interface register address
+ *
+ * Retry the readl() for max three times if it gets zero value
+ * while reading the system interface register.
+ */
+static inline u32
+_base_readl_aero(const volatile void __iomem *addr)
+{
+ u32 i = 0, ret_val;
+
+ do {
+ ret_val = readl(addr);
+ i++;
+ } while (ret_val == 0 && i < 3);
+
+ return ret_val;
+}
+
+static inline u32
+_base_readl(const volatile void __iomem *addr)
+{
+ return readl(addr);
+}
+
+/**
* _base_clone_reply_to_sys_mem - copies reply to reply free iomem
* in BAR0 space.
*
@@ -716,7 +742,7 @@ mpt3sas_halt_firmware(struct MPT3SAS_ADAPTER *ioc)
dump_stack();
- doorbell = readl(&ioc->chip->Doorbell);
+ doorbell = ioc->base_readl(&ioc->chip->Doorbell);
if ((doorbell & MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_FAULT)
mpt3sas_base_fault_info(ioc , doorbell);
else {
@@ -1325,10 +1351,10 @@ _base_mask_interrupts(struct MPT3SAS_ADAPTER *ioc)
u32 him_register;
ioc->mask_interrupts = 1;
- him_register = readl(&ioc->chip->HostInterruptMask);
+ him_register = ioc->base_readl(&ioc->chip->HostInterruptMask);
him_register |= MPI2_HIM_DIM + MPI2_HIM_RIM + MPI2_HIM_RESET_IRQ_MASK;
writel(him_register, &ioc->chip->HostInterruptMask);
- readl(&ioc->chip->HostInterruptMask);
+ ioc->base_readl(&ioc->chip->HostInterruptMask);
}
/**
@@ -1342,7 +1368,7 @@ _base_unmask_interrupts(struct MPT3SAS_ADAPTER *ioc)
{
u32 him_register;
- him_register = readl(&ioc->chip->HostInterruptMask);
+ him_register = ioc->base_readl(&ioc->chip->HostInterruptMask);
him_register &= ~MPI2_HIM_RIM;
writel(him_register, &ioc->chip->HostInterruptMask);
ioc->mask_interrupts = 0;
@@ -3319,8 +3345,9 @@ _base_mpi_ep_writeq(__u64 b, volatile void __iomem *addr,
static inline void
_base_writeq(__u64 b, volatile void __iomem *addr, spinlock_t *writeq_lock)
{
+ wmb();
__raw_writeq(b, addr);
- mmiowb();
+ barrier();
}
#else
static inline void
@@ -4060,7 +4087,7 @@ _base_static_config_pages(struct MPT3SAS_ADAPTER *ioc)
* flag unset in NVDATA.
*/
mpt3sas_config_get_manufacturing_pg11(ioc, &mpi_reply, &ioc->manu_pg11);
- if (ioc->manu_pg11.EEDPTagMode == 0) {
+ if (!ioc->is_gen35_ioc && ioc->manu_pg11.EEDPTagMode == 0) {
pr_err("%s: overriding NVDATA EEDPTagMode setting\n",
ioc->name);
ioc->manu_pg11.EEDPTagMode &= ~0x3;
@@ -4854,7 +4881,7 @@ mpt3sas_base_get_iocstate(struct MPT3SAS_ADAPTER *ioc, int cooked)
{
u32 s, sc;
- s = readl(&ioc->chip->Doorbell);
+ s = ioc->base_readl(&ioc->chip->Doorbell);
sc = s & MPI2_IOC_STATE_MASK;
return cooked ? sc : s;
}
@@ -4910,7 +4937,7 @@ _base_wait_for_doorbell_int(struct MPT3SAS_ADAPTER *ioc, int timeout)
count = 0;
cntdn = 1000 * timeout;
do {
- int_status = readl(&ioc->chip->HostInterruptStatus);
+ int_status = ioc->base_readl(&ioc->chip->HostInterruptStatus);
if (int_status & MPI2_HIS_IOC2SYS_DB_STATUS) {
dhsprintk(ioc,
ioc_info(ioc, "%s: successful count(%d), timeout(%d)\n",
@@ -4936,7 +4963,7 @@ _base_spin_on_doorbell_int(struct MPT3SAS_ADAPTER *ioc, int timeout)
count = 0;
cntdn = 2000 * timeout;
do {
- int_status = readl(&ioc->chip->HostInterruptStatus);
+ int_status = ioc->base_readl(&ioc->chip->HostInterruptStatus);
if (int_status & MPI2_HIS_IOC2SYS_DB_STATUS) {
dhsprintk(ioc,
ioc_info(ioc, "%s: successful count(%d), timeout(%d)\n",
@@ -4974,14 +5001,14 @@ _base_wait_for_doorbell_ack(struct MPT3SAS_ADAPTER *ioc, int timeout)
count = 0;
cntdn = 1000 * timeout;
do {
- int_status = readl(&ioc->chip->HostInterruptStatus);
+ int_status = ioc->base_readl(&ioc->chip->HostInterruptStatus);
if (!(int_status & MPI2_HIS_SYS2IOC_DB_STATUS)) {
dhsprintk(ioc,
ioc_info(ioc, "%s: successful count(%d), timeout(%d)\n",
__func__, count, timeout));
return 0;
} else if (int_status & MPI2_HIS_IOC2SYS_DB_STATUS) {
- doorbell = readl(&ioc->chip->Doorbell);
+ doorbell = ioc->base_readl(&ioc->chip->Doorbell);
if ((doorbell & MPI2_IOC_STATE_MASK) ==
MPI2_IOC_STATE_FAULT) {
mpt3sas_base_fault_info(ioc , doorbell);
@@ -5016,7 +5043,7 @@ _base_wait_for_doorbell_not_used(struct MPT3SAS_ADAPTER *ioc, int timeout)
count = 0;
cntdn = 1000 * timeout;
do {
- doorbell_reg = readl(&ioc->chip->Doorbell);
+ doorbell_reg = ioc->base_readl(&ioc->chip->Doorbell);
if (!(doorbell_reg & MPI2_DOORBELL_USED)) {
dhsprintk(ioc,
ioc_info(ioc, "%s: successful count(%d), timeout(%d)\n",
@@ -5078,6 +5105,39 @@ _base_send_ioc_reset(struct MPT3SAS_ADAPTER *ioc, u8 reset_type, int timeout)
}
/**
+ * mpt3sas_wait_for_ioc - IOC's operational state is checked here.
+ * @ioc: per adapter object
+ * @wait_count: timeout in seconds
+ *
+ * Return: Waits up to timeout seconds for the IOC to
+ * become operational. Returns 0 if IOC is present
+ * and operational; otherwise returns -EFAULT.
+ */
+
+int
+mpt3sas_wait_for_ioc(struct MPT3SAS_ADAPTER *ioc, int timeout)
+{
+ int wait_state_count = 0;
+ u32 ioc_state;
+
+ do {
+ ioc_state = mpt3sas_base_get_iocstate(ioc, 1);
+ if (ioc_state == MPI2_IOC_STATE_OPERATIONAL)
+ break;
+ ssleep(1);
+ ioc_info(ioc, "%s: waiting for operational state(count=%d)\n",
+ __func__, ++wait_state_count);
+ } while (--timeout);
+ if (!timeout) {
+ ioc_err(ioc, "%s: failed due to ioc not operational\n", __func__);
+ return -EFAULT;
+ }
+ if (wait_state_count)
+ ioc_info(ioc, "ioc is operational\n");
+ return 0;
+}
+
+/**
* _base_handshake_req_reply_wait - send request thru doorbell interface
* @ioc: per adapter object
* @request_bytes: request length
@@ -5098,13 +5158,13 @@ _base_handshake_req_reply_wait(struct MPT3SAS_ADAPTER *ioc, int request_bytes,
__le32 *mfp;
/* make sure doorbell is not in use */
- if ((readl(&ioc->chip->Doorbell) & MPI2_DOORBELL_USED)) {
+ if ((ioc->base_readl(&ioc->chip->Doorbell) & MPI2_DOORBELL_USED)) {
ioc_err(ioc, "doorbell is in use (line=%d)\n", __LINE__);
return -EFAULT;
}
/* clear pending doorbell interrupts from previous state changes */
- if (readl(&ioc->chip->HostInterruptStatus) &
+ if (ioc->base_readl(&ioc->chip->HostInterruptStatus) &
MPI2_HIS_IOC2SYS_DB_STATUS)
writel(0, &ioc->chip->HostInterruptStatus);
@@ -5147,7 +5207,7 @@ _base_handshake_req_reply_wait(struct MPT3SAS_ADAPTER *ioc, int request_bytes,
}
/* read the first two 16-bits, it gives the total length of the reply */
- reply[0] = le16_to_cpu(readl(&ioc->chip->Doorbell)
+ reply[0] = le16_to_cpu(ioc->base_readl(&ioc->chip->Doorbell)
& MPI2_DOORBELL_DATA_MASK);
writel(0, &ioc->chip->HostInterruptStatus);
if ((_base_wait_for_doorbell_int(ioc, 5))) {
@@ -5155,7 +5215,7 @@ _base_handshake_req_reply_wait(struct MPT3SAS_ADAPTER *ioc, int request_bytes,
__LINE__);
return -EFAULT;
}
- reply[1] = le16_to_cpu(readl(&ioc->chip->Doorbell)
+ reply[1] = le16_to_cpu(ioc->base_readl(&ioc->chip->Doorbell)
& MPI2_DOORBELL_DATA_MASK);
writel(0, &ioc->chip->HostInterruptStatus);
@@ -5166,9 +5226,10 @@ _base_handshake_req_reply_wait(struct MPT3SAS_ADAPTER *ioc, int request_bytes,
return -EFAULT;
}
if (i >= reply_bytes/2) /* overflow case */
- readl(&ioc->chip->Doorbell);
+ ioc->base_readl(&ioc->chip->Doorbell);
else
- reply[i] = le16_to_cpu(readl(&ioc->chip->Doorbell)
+ reply[i] = le16_to_cpu(
+ ioc->base_readl(&ioc->chip->Doorbell)
& MPI2_DOORBELL_DATA_MASK);
writel(0, &ioc->chip->HostInterruptStatus);
}
@@ -5211,11 +5272,9 @@ mpt3sas_base_sas_iounit_control(struct MPT3SAS_ADAPTER *ioc,
Mpi2SasIoUnitControlRequest_t *mpi_request)
{
u16 smid;
- u32 ioc_state;
u8 issue_reset = 0;
int rc;
void *request;
- u16 wait_state_count;
dinitprintk(ioc, ioc_info(ioc, "%s\n", __func__));
@@ -5227,20 +5286,9 @@ mpt3sas_base_sas_iounit_control(struct MPT3SAS_ADAPTER *ioc,
goto out;
}
- wait_state_count = 0;
- ioc_state = mpt3sas_base_get_iocstate(ioc, 1);
- while (ioc_state != MPI2_IOC_STATE_OPERATIONAL) {
- if (wait_state_count++ == 10) {
- ioc_err(ioc, "%s: failed due to ioc not operational\n",
- __func__);
- rc = -EFAULT;
- goto out;
- }
- ssleep(1);
- ioc_state = mpt3sas_base_get_iocstate(ioc, 1);
- ioc_info(ioc, "%s: waiting for operational state(count=%d)\n",
- __func__, wait_state_count);
- }
+ rc = mpt3sas_wait_for_ioc(ioc, IOC_OPERATIONAL_WAIT_COUNT);
+ if (rc)
+ goto out;
smid = mpt3sas_base_get_smid(ioc, ioc->base_cb_idx);
if (!smid) {
@@ -5306,11 +5354,9 @@ mpt3sas_base_scsi_enclosure_processor(struct MPT3SAS_ADAPTER *ioc,
Mpi2SepReply_t *mpi_reply, Mpi2SepRequest_t *mpi_request)
{
u16 smid;
- u32 ioc_state;
u8 issue_reset = 0;
int rc;
void *request;
- u16 wait_state_count;
dinitprintk(ioc, ioc_info(ioc, "%s\n", __func__));
@@ -5322,20 +5368,9 @@ mpt3sas_base_scsi_enclosure_processor(struct MPT3SAS_ADAPTER *ioc,
goto out;
}
- wait_state_count = 0;
- ioc_state = mpt3sas_base_get_iocstate(ioc, 1);
- while (ioc_state != MPI2_IOC_STATE_OPERATIONAL) {
- if (wait_state_count++ == 10) {
- ioc_err(ioc, "%s: failed due to ioc not operational\n",
- __func__);
- rc = -EFAULT;
- goto out;
- }
- ssleep(1);
- ioc_state = mpt3sas_base_get_iocstate(ioc, 1);
- ioc_info(ioc, "%s: waiting for operational state(count=%d)\n",
- __func__, wait_state_count);
- }
+ rc = mpt3sas_wait_for_ioc(ioc, IOC_OPERATIONAL_WAIT_COUNT);
+ if (rc)
+ goto out;
smid = mpt3sas_base_get_smid(ioc, ioc->base_cb_idx);
if (!smid) {
@@ -6020,14 +6055,14 @@ _base_diag_reset(struct MPT3SAS_ADAPTER *ioc)
if (count++ > 20)
goto out;
- host_diagnostic = readl(&ioc->chip->HostDiagnostic);
+ host_diagnostic = ioc->base_readl(&ioc->chip->HostDiagnostic);
drsprintk(ioc,
ioc_info(ioc, "wrote magic sequence: count(%d), host_diagnostic(0x%08x)\n",
count, host_diagnostic));
} while ((host_diagnostic & MPI2_DIAG_DIAG_WRITE_ENABLE) == 0);
- hcb_size = readl(&ioc->chip->HCBSize);
+ hcb_size = ioc->base_readl(&ioc->chip->HCBSize);
drsprintk(ioc, ioc_info(ioc, "diag reset: issued\n"));
writel(host_diagnostic | MPI2_DIAG_RESET_ADAPTER,
@@ -6040,7 +6075,7 @@ _base_diag_reset(struct MPT3SAS_ADAPTER *ioc)
for (count = 0; count < (300000000 /
MPI2_HARD_RESET_PCIE_SECOND_READ_DELAY_MICRO_SEC); count++) {
- host_diagnostic = readl(&ioc->chip->HostDiagnostic);
+ host_diagnostic = ioc->base_readl(&ioc->chip->HostDiagnostic);
if (host_diagnostic == 0xFFFFFFFF)
goto out;
@@ -6391,6 +6426,10 @@ mpt3sas_base_attach(struct MPT3SAS_ADAPTER *ioc)
ioc->rdpq_array_enable_assigned = 0;
ioc->dma_mask = 0;
+ if (ioc->is_aero_ioc)
+ ioc->base_readl = &_base_readl_aero;
+ else
+ ioc->base_readl = &_base_readl;
r = mpt3sas_base_map_resources(ioc);
if (r)
goto out_free_resources;
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h
index 8f1d6b071b39..800351932cc3 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.h
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.h
@@ -55,6 +55,7 @@
#include "mpi/mpi2_tool.h"
#include "mpi/mpi2_sas.h"
#include "mpi/mpi2_pci.h"
+#include "mpi/mpi2_image.h"
#include <scsi/scsi.h>
#include <scsi/scsi_cmnd.h>
@@ -74,9 +75,9 @@
#define MPT3SAS_DRIVER_NAME "mpt3sas"
#define MPT3SAS_AUTHOR "Avago Technologies <MPT-FusionLinux.pdl@avagotech.com>"
#define MPT3SAS_DESCRIPTION "LSI MPT Fusion SAS 3.0 Device Driver"
-#define MPT3SAS_DRIVER_VERSION "26.100.00.00"
-#define MPT3SAS_MAJOR_VERSION 26
-#define MPT3SAS_MINOR_VERSION 100
+#define MPT3SAS_DRIVER_VERSION "27.101.00.00"
+#define MPT3SAS_MAJOR_VERSION 27
+#define MPT3SAS_MINOR_VERSION 101
#define MPT3SAS_BUILD_VERSION 0
#define MPT3SAS_RELEASE_VERSION 00
@@ -139,6 +140,9 @@
#define DEFAULT_NUM_FWCHAIN_ELEMTS 8
#define FW_IMG_HDR_READ_TIMEOUT 15
+
+#define IOC_OPERATIONAL_WAIT_COUNT 10
+
/*
* NVMe defines
*/
@@ -908,6 +912,7 @@ typedef void (*NVME_BUILD_PRP)(struct MPT3SAS_ADAPTER *ioc, u16 smid,
typedef void (*PUT_SMID_IO_FP_HIP) (struct MPT3SAS_ADAPTER *ioc, u16 smid,
u16 funcdep);
typedef void (*PUT_SMID_DEFAULT) (struct MPT3SAS_ADAPTER *ioc, u16 smid);
+typedef u32 (*BASE_READ_REG) (const volatile void __iomem *addr);
/* IOC Facts and Port Facts converted from little endian to cpu */
union mpi3_version_union {
@@ -1388,6 +1393,7 @@ struct MPT3SAS_ADAPTER {
u8 hide_drives;
spinlock_t diag_trigger_lock;
u8 diag_trigger_active;
+ BASE_READ_REG base_readl;
struct SL_WH_MASTER_TRIGGER_T diag_trigger_master;
struct SL_WH_EVENT_TRIGGERS_T diag_trigger_event;
struct SL_WH_SCSI_TRIGGERS_T diag_trigger_scsi;
@@ -1395,6 +1401,7 @@ struct MPT3SAS_ADAPTER {
void *device_remove_in_progress;
u16 device_remove_in_progress_sz;
u8 is_gen35_ioc;
+ u8 is_aero_ioc;
PUT_SMID_IO_FP_HIP put_smid_scsi_io;
};
@@ -1487,6 +1494,7 @@ mpt3sas_wait_for_commands_to_complete(struct MPT3SAS_ADAPTER *ioc);
u8 mpt3sas_base_check_cmd_timeout(struct MPT3SAS_ADAPTER *ioc,
u8 status, void *mpi_request, int sz);
+int mpt3sas_wait_for_ioc(struct MPT3SAS_ADAPTER *ioc, int wait_count);
/* scsih shared API */
struct scsi_cmnd *mpt3sas_scsih_scsi_lookup_get(struct MPT3SAS_ADAPTER *ioc,
diff --git a/drivers/scsi/mpt3sas/mpt3sas_config.c b/drivers/scsi/mpt3sas/mpt3sas_config.c
index 02209447f4ef..fb0a17252f86 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_config.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_config.c
@@ -119,7 +119,7 @@ _config_display_some_debug(struct MPT3SAS_ADAPTER *ioc, u16 smid,
desc = "raid_volume";
break;
case MPI2_CONFIG_PAGETYPE_MANUFACTURING:
- desc = "manufaucturing";
+ desc = "manufacturing";
break;
case MPI2_CONFIG_PAGETYPE_RAID_PHYSDISK:
desc = "physdisk";
@@ -300,11 +300,9 @@ _config_request(struct MPT3SAS_ADAPTER *ioc, Mpi2ConfigRequest_t
void *config_page, u16 config_page_sz)
{
u16 smid;
- u32 ioc_state;
Mpi2ConfigRequest_t *config_request;
int r;
u8 retry_count, issue_host_reset = 0;
- u16 wait_state_count;
struct config_request mem;
u32 ioc_status = UINT_MAX;
@@ -361,23 +359,10 @@ _config_request(struct MPT3SAS_ADAPTER *ioc, Mpi2ConfigRequest_t
ioc_info(ioc, "%s: attempting retry (%d)\n",
__func__, retry_count);
}
- wait_state_count = 0;
- ioc_state = mpt3sas_base_get_iocstate(ioc, 1);
- while (ioc_state != MPI2_IOC_STATE_OPERATIONAL) {
- if (wait_state_count++ == MPT3_CONFIG_PAGE_DEFAULT_TIMEOUT) {
- ioc_err(ioc, "%s: failed due to ioc not operational\n",
- __func__);
- ioc->config_cmds.status = MPT3_CMD_NOT_USED;
- r = -EFAULT;
- goto free_mem;
- }
- ssleep(1);
- ioc_state = mpt3sas_base_get_iocstate(ioc, 1);
- ioc_info(ioc, "%s: waiting for operational state(count=%d)\n",
- __func__, wait_state_count);
- }
- if (wait_state_count)
- ioc_info(ioc, "%s: ioc is operational\n", __func__);
+
+ r = mpt3sas_wait_for_ioc(ioc, MPT3_CONFIG_PAGE_DEFAULT_TIMEOUT);
+ if (r)
+ goto free_mem;
smid = mpt3sas_base_get_smid(ioc, ioc->config_cb_idx);
if (!smid) {
@@ -673,10 +658,6 @@ mpt3sas_config_set_manufacturing_pg11(struct MPT3SAS_ADAPTER *ioc,
r = _config_request(ioc, &mpi_request, mpi_reply,
MPT3_CONFIG_PAGE_DEFAULT_TIMEOUT, config_page,
sizeof(*config_page));
- mpi_request.Action = MPI2_CONFIG_ACTION_PAGE_WRITE_NVRAM;
- r = _config_request(ioc, &mpi_request, mpi_reply,
- MPT3_CONFIG_PAGE_DEFAULT_TIMEOUT, config_page,
- sizeof(*config_page));
out:
return r;
}
diff --git a/drivers/scsi/mpt3sas/mpt3sas_ctl.c b/drivers/scsi/mpt3sas/mpt3sas_ctl.c
index 4afa597cbfba..b2bb47c14d35 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_ctl.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_ctl.c
@@ -641,7 +641,6 @@ _ctl_do_mpt_command(struct MPT3SAS_ADAPTER *ioc, struct mpt3_ioctl_command karg,
MPI2DefaultReply_t *mpi_reply;
Mpi26NVMeEncapsulatedRequest_t *nvme_encap_request = NULL;
struct _pcie_device *pcie_device = NULL;
- u32 ioc_state;
u16 smid;
u8 timeout;
u8 issue_reset;
@@ -654,7 +653,6 @@ _ctl_do_mpt_command(struct MPT3SAS_ADAPTER *ioc, struct mpt3_ioctl_command karg,
dma_addr_t data_in_dma = 0;
size_t data_in_sz = 0;
long ret;
- u16 wait_state_count;
u16 device_handle = MPT3SAS_INVALID_DEVICE_HANDLE;
u8 tr_method = MPI26_SCSITASKMGMT_MSGFLAGS_PROTOCOL_LVL_RST_PCIE;
@@ -666,22 +664,9 @@ _ctl_do_mpt_command(struct MPT3SAS_ADAPTER *ioc, struct mpt3_ioctl_command karg,
goto out;
}
- wait_state_count = 0;
- ioc_state = mpt3sas_base_get_iocstate(ioc, 1);
- while (ioc_state != MPI2_IOC_STATE_OPERATIONAL) {
- if (wait_state_count++ == 10) {
- ioc_err(ioc, "%s: failed due to ioc not operational\n",
- __func__);
- ret = -EFAULT;
- goto out;
- }
- ssleep(1);
- ioc_state = mpt3sas_base_get_iocstate(ioc, 1);
- ioc_info(ioc, "%s: waiting for operational state(count=%d)\n",
- __func__, wait_state_count);
- }
- if (wait_state_count)
- ioc_info(ioc, "%s: ioc is operational\n", __func__);
+ ret = mpt3sas_wait_for_ioc(ioc, IOC_OPERATIONAL_WAIT_COUNT);
+ if (ret)
+ goto out;
mpi_request = kzalloc(ioc->request_sz, GFP_KERNEL);
if (!mpi_request) {
diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
index 03c52847ed07..6be39dc27103 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
@@ -3748,6 +3748,40 @@ _scsih_tm_tr_complete(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index,
return _scsih_check_for_pending_tm(ioc, smid);
}
+/** _scsih_allow_scmd_to_device - check whether scmd needs to
+ * issue to IOC or not.
+ * @ioc: per adapter object
+ * @scmd: pointer to scsi command object
+ *
+ * Returns true if scmd can be issued to IOC otherwise returns false.
+ */
+inline bool _scsih_allow_scmd_to_device(struct MPT3SAS_ADAPTER *ioc,
+ struct scsi_cmnd *scmd)
+{
+
+ if (ioc->pci_error_recovery)
+ return false;
+
+ if (ioc->hba_mpi_version_belonged == MPI2_VERSION) {
+ if (ioc->remove_host)
+ return false;
+
+ return true;
+ }
+
+ if (ioc->remove_host) {
+
+ switch (scmd->cmnd[0]) {
+ case SYNCHRONIZE_CACHE:
+ case START_STOP:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ return true;
+}
/**
* _scsih_sas_control_complete - completion routine
@@ -4571,7 +4605,7 @@ scsih_qcmd(struct Scsi_Host *shost, struct scsi_cmnd *scmd)
return 0;
}
- if (ioc->pci_error_recovery || ioc->remove_host) {
+ if (!(_scsih_allow_scmd_to_device(ioc, scmd))) {
scmd->result = DID_NO_CONNECT << 16;
scmd->scsi_done(scmd);
return 0;
@@ -9641,6 +9675,7 @@ static void scsih_remove(struct pci_dev *pdev)
/* release all the volumes */
_scsih_ir_shutdown(ioc);
+ sas_remove_host(shost);
list_for_each_entry_safe(raid_device, next, &ioc->raid_device_list,
list) {
if (raid_device->starget) {
@@ -9682,7 +9717,6 @@ static void scsih_remove(struct pci_dev *pdev)
ioc->sas_hba.num_phys = 0;
}
- sas_remove_host(shost);
mpt3sas_base_detach(ioc);
spin_lock(&gioc_lock);
list_del(&ioc->list);
@@ -10139,7 +10173,6 @@ static struct scsi_host_template mpt2sas_driver_template = {
.sg_tablesize = MPT2SAS_SG_DEPTH,
.max_sectors = 32767,
.cmd_per_lun = 7,
- .use_clustering = ENABLE_CLUSTERING,
.shost_attrs = mpt3sas_host_attrs,
.sdev_attrs = mpt3sas_dev_attrs,
.track_queue_depth = 1,
@@ -10178,7 +10211,6 @@ static struct scsi_host_template mpt3sas_driver_template = {
.sg_tablesize = MPT3SAS_SG_DEPTH,
.max_sectors = 32767,
.cmd_per_lun = 7,
- .use_clustering = ENABLE_CLUSTERING,
.shost_attrs = mpt3sas_host_attrs,
.sdev_attrs = mpt3sas_dev_attrs,
.track_queue_depth = 1,
@@ -10250,6 +10282,10 @@ _scsih_determine_hba_mpi_version(struct pci_dev *pdev)
case MPI26_MFGPAGE_DEVID_SAS3516_1:
case MPI26_MFGPAGE_DEVID_SAS3416:
case MPI26_MFGPAGE_DEVID_SAS3616:
+ case MPI26_MFGPAGE_DEVID_CFG_SEC_3916:
+ case MPI26_MFGPAGE_DEVID_HARD_SEC_3916:
+ case MPI26_MFGPAGE_DEVID_CFG_SEC_3816:
+ case MPI26_MFGPAGE_DEVID_HARD_SEC_3816:
return MPI26_VERSION;
}
return 0;
@@ -10337,8 +10373,17 @@ _scsih_probe(struct pci_dev *pdev, const struct pci_device_id *id)
case MPI26_MFGPAGE_DEVID_SAS3616:
ioc->is_gen35_ioc = 1;
break;
+ case MPI26_MFGPAGE_DEVID_CFG_SEC_3816:
+ case MPI26_MFGPAGE_DEVID_CFG_SEC_3916:
+ dev_info(&pdev->dev,
+ "HBA is in Configurable Secure mode\n");
+ /* fall through */
+ case MPI26_MFGPAGE_DEVID_HARD_SEC_3816:
+ case MPI26_MFGPAGE_DEVID_HARD_SEC_3916:
+ ioc->is_aero_ioc = ioc->is_gen35_ioc = 1;
+ break;
default:
- ioc->is_gen35_ioc = 0;
+ ioc->is_gen35_ioc = ioc->is_aero_ioc = 0;
}
if ((ioc->hba_mpi_version_belonged == MPI25_VERSION &&
pdev->revision >= SAS3_PCI_DEVICE_C0_REVISION) ||
@@ -10795,6 +10840,23 @@ static const struct pci_device_id mpt3sas_pci_table[] = {
/* Mercator ~ 3616*/
{ MPI2_MFGPAGE_VENDORID_LSI, MPI26_MFGPAGE_DEVID_SAS3616,
PCI_ANY_ID, PCI_ANY_ID },
+
+ /* Aero SI 0x00E1 Configurable Secure
+ * 0x00E2 Hard Secure
+ */
+ { MPI2_MFGPAGE_VENDORID_LSI, MPI26_MFGPAGE_DEVID_CFG_SEC_3916,
+ PCI_ANY_ID, PCI_ANY_ID },
+ { MPI2_MFGPAGE_VENDORID_LSI, MPI26_MFGPAGE_DEVID_HARD_SEC_3916,
+ PCI_ANY_ID, PCI_ANY_ID },
+
+ /* Sea SI 0x00E5 Configurable Secure
+ * 0x00E6 Hard Secure
+ */
+ { MPI2_MFGPAGE_VENDORID_LSI, MPI26_MFGPAGE_DEVID_CFG_SEC_3816,
+ PCI_ANY_ID, PCI_ANY_ID },
+ { MPI2_MFGPAGE_VENDORID_LSI, MPI26_MFGPAGE_DEVID_HARD_SEC_3816,
+ PCI_ANY_ID, PCI_ANY_ID },
+
{0} /* Terminating entry */
};
MODULE_DEVICE_TABLE(pci, mpt3sas_pci_table);
diff --git a/drivers/scsi/mpt3sas/mpt3sas_transport.c b/drivers/scsi/mpt3sas/mpt3sas_transport.c
index 6a8a3c09b4b1..60ae2d0feb2b 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_transport.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_transport.c
@@ -296,7 +296,6 @@ _transport_expander_report_manufacture(struct MPT3SAS_ADAPTER *ioc,
struct rep_manu_request *manufacture_request;
int rc;
u16 smid;
- u32 ioc_state;
void *psge;
u8 issue_reset = 0;
void *data_out = NULL;
@@ -304,7 +303,6 @@ _transport_expander_report_manufacture(struct MPT3SAS_ADAPTER *ioc,
dma_addr_t data_in_dma;
size_t data_in_sz;
size_t data_out_sz;
- u16 wait_state_count;
if (ioc->shost_recovery || ioc->pci_error_recovery) {
ioc_info(ioc, "%s: host reset in progress!\n", __func__);
@@ -320,22 +318,9 @@ _transport_expander_report_manufacture(struct MPT3SAS_ADAPTER *ioc,
}
ioc->transport_cmds.status = MPT3_CMD_PENDING;
- wait_state_count = 0;
- ioc_state = mpt3sas_base_get_iocstate(ioc, 1);
- while (ioc_state != MPI2_IOC_STATE_OPERATIONAL) {
- if (wait_state_count++ == 10) {
- ioc_err(ioc, "%s: failed due to ioc not operational\n",
- __func__);
- rc = -EFAULT;
- goto out;
- }
- ssleep(1);
- ioc_state = mpt3sas_base_get_iocstate(ioc, 1);
- ioc_info(ioc, "%s: waiting for operational state(count=%d)\n",
- __func__, wait_state_count);
- }
- if (wait_state_count)
- ioc_info(ioc, "%s: ioc is operational\n", __func__);
+ rc = mpt3sas_wait_for_ioc(ioc, IOC_OPERATIONAL_WAIT_COUNT);
+ if (rc)
+ goto out;
smid = mpt3sas_base_get_smid(ioc, ioc->transport_cb_idx);
if (!smid) {
@@ -821,10 +806,13 @@ mpt3sas_transport_port_remove(struct MPT3SAS_ADAPTER *ioc, u64 sas_address,
mpt3sas_port->remote_identify.sas_address,
mpt3sas_phy->phy_id);
mpt3sas_phy->phy_belongs_to_port = 0;
- sas_port_delete_phy(mpt3sas_port->port, mpt3sas_phy->phy);
+ if (!ioc->remove_host)
+ sas_port_delete_phy(mpt3sas_port->port,
+ mpt3sas_phy->phy);
list_del(&mpt3sas_phy->port_siblings);
}
- sas_port_delete(mpt3sas_port->port);
+ if (!ioc->remove_host)
+ sas_port_delete(mpt3sas_port->port);
kfree(mpt3sas_port);
}
@@ -1076,13 +1064,11 @@ _transport_get_expander_phy_error_log(struct MPT3SAS_ADAPTER *ioc,
struct phy_error_log_reply *phy_error_log_reply;
int rc;
u16 smid;
- u32 ioc_state;
void *psge;
u8 issue_reset = 0;
void *data_out = NULL;
dma_addr_t data_out_dma;
u32 sz;
- u16 wait_state_count;
if (ioc->shost_recovery || ioc->pci_error_recovery) {
ioc_info(ioc, "%s: host reset in progress!\n", __func__);
@@ -1098,22 +1084,9 @@ _transport_get_expander_phy_error_log(struct MPT3SAS_ADAPTER *ioc,
}
ioc->transport_cmds.status = MPT3_CMD_PENDING;
- wait_state_count = 0;
- ioc_state = mpt3sas_base_get_iocstate(ioc, 1);
- while (ioc_state != MPI2_IOC_STATE_OPERATIONAL) {
- if (wait_state_count++ == 10) {
- ioc_err(ioc, "%s: failed due to ioc not operational\n",
- __func__);
- rc = -EFAULT;
- goto out;
- }
- ssleep(1);
- ioc_state = mpt3sas_base_get_iocstate(ioc, 1);
- ioc_info(ioc, "%s: waiting for operational state(count=%d)\n",
- __func__, wait_state_count);
- }
- if (wait_state_count)
- ioc_info(ioc, "%s: ioc is operational\n", __func__);
+ rc = mpt3sas_wait_for_ioc(ioc, IOC_OPERATIONAL_WAIT_COUNT);
+ if (rc)
+ goto out;
smid = mpt3sas_base_get_smid(ioc, ioc->transport_cb_idx);
if (!smid) {
@@ -1381,13 +1354,11 @@ _transport_expander_phy_control(struct MPT3SAS_ADAPTER *ioc,
struct phy_control_reply *phy_control_reply;
int rc;
u16 smid;
- u32 ioc_state;
void *psge;
u8 issue_reset = 0;
void *data_out = NULL;
dma_addr_t data_out_dma;
u32 sz;
- u16 wait_state_count;
if (ioc->shost_recovery || ioc->pci_error_recovery) {
ioc_info(ioc, "%s: host reset in progress!\n", __func__);
@@ -1403,22 +1374,9 @@ _transport_expander_phy_control(struct MPT3SAS_ADAPTER *ioc,
}
ioc->transport_cmds.status = MPT3_CMD_PENDING;
- wait_state_count = 0;
- ioc_state = mpt3sas_base_get_iocstate(ioc, 1);
- while (ioc_state != MPI2_IOC_STATE_OPERATIONAL) {
- if (wait_state_count++ == 10) {
- ioc_err(ioc, "%s: failed due to ioc not operational\n",
- __func__);
- rc = -EFAULT;
- goto out;
- }
- ssleep(1);
- ioc_state = mpt3sas_base_get_iocstate(ioc, 1);
- ioc_info(ioc, "%s: waiting for operational state(count=%d)\n",
- __func__, wait_state_count);
- }
- if (wait_state_count)
- ioc_info(ioc, "%s: ioc is operational\n", __func__);
+ rc = mpt3sas_wait_for_ioc(ioc, IOC_OPERATIONAL_WAIT_COUNT);
+ if (rc)
+ goto out;
smid = mpt3sas_base_get_smid(ioc, ioc->transport_cb_idx);
if (!smid) {
@@ -1880,7 +1838,6 @@ _transport_smp_handler(struct bsg_job *job, struct Scsi_Host *shost,
Mpi2SmpPassthroughReply_t *mpi_reply;
int rc;
u16 smid;
- u32 ioc_state;
void *psge;
dma_addr_t dma_addr_in;
dma_addr_t dma_addr_out;
@@ -1888,7 +1845,6 @@ _transport_smp_handler(struct bsg_job *job, struct Scsi_Host *shost,
void *addr_out = NULL;
size_t dma_len_in;
size_t dma_len_out;
- u16 wait_state_count;
unsigned int reslen = 0;
if (ioc->shost_recovery || ioc->pci_error_recovery) {
@@ -1924,22 +1880,9 @@ _transport_smp_handler(struct bsg_job *job, struct Scsi_Host *shost,
if (rc)
goto unmap_out;
- wait_state_count = 0;
- ioc_state = mpt3sas_base_get_iocstate(ioc, 1);
- while (ioc_state != MPI2_IOC_STATE_OPERATIONAL) {
- if (wait_state_count++ == 10) {
- ioc_err(ioc, "%s: failed due to ioc not operational\n",
- __func__);
- rc = -EFAULT;
- goto unmap_in;
- }
- ssleep(1);
- ioc_state = mpt3sas_base_get_iocstate(ioc, 1);
- ioc_info(ioc, "%s: waiting for operational state(count=%d)\n",
- __func__, wait_state_count);
- }
- if (wait_state_count)
- ioc_info(ioc, "%s: ioc is operational\n", __func__);
+ rc = mpt3sas_wait_for_ioc(ioc, IOC_OPERATIONAL_WAIT_COUNT);
+ if (rc)
+ goto unmap_in;
smid = mpt3sas_base_get_smid(ioc, ioc->transport_cb_idx);
if (!smid) {
diff --git a/drivers/scsi/mvme147.c b/drivers/scsi/mvme147.c
index 7d1ab414b78f..ca96d6d9c350 100644
--- a/drivers/scsi/mvme147.c
+++ b/drivers/scsi/mvme147.c
@@ -78,7 +78,6 @@ static struct scsi_host_template mvme147_host_template = {
.this_id = 7,
.sg_tablesize = SG_ALL,
.cmd_per_lun = CMD_PER_LUN,
- .use_clustering = ENABLE_CLUSTERING
};
static struct Scsi_Host *mvme147_shost;
diff --git a/drivers/scsi/mvsas/mv_init.c b/drivers/scsi/mvsas/mv_init.c
index 3ac34373746c..030d911ee374 100644
--- a/drivers/scsi/mvsas/mv_init.c
+++ b/drivers/scsi/mvsas/mv_init.c
@@ -59,7 +59,6 @@ static struct scsi_host_template mvs_sht = {
.this_id = -1,
.sg_tablesize = SG_ALL,
.max_sectors = SCSI_DEFAULT_MAX_SECTORS,
- .use_clustering = ENABLE_CLUSTERING,
.eh_device_reset_handler = sas_eh_device_reset_handler,
.eh_target_reset_handler = sas_eh_target_reset_handler,
.target_destroy = sas_target_destroy,
diff --git a/drivers/scsi/mvumi.c b/drivers/scsi/mvumi.c
index 2458974d1af6..dbe753fba486 100644
--- a/drivers/scsi/mvumi.c
+++ b/drivers/scsi/mvumi.c
@@ -2197,6 +2197,7 @@ static struct scsi_host_template mvumi_template = {
.eh_timed_out = mvumi_timed_out,
.eh_host_reset_handler = mvumi_host_reset,
.bios_param = mvumi_bios_param,
+ .dma_boundary = PAGE_SIZE - 1,
.this_id = -1,
};
@@ -2620,7 +2621,7 @@ static int __maybe_unused mvumi_resume(struct pci_dev *pdev)
}
ret = mvumi_pci_set_master(pdev);
- ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+ ret = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
if (ret)
goto fail;
ret = pci_request_regions(mhba->pdev, MV_DRIVER_NAME);
diff --git a/drivers/scsi/myrb.c b/drivers/scsi/myrb.c
index 0642f2d0a3bb..539ac8ce4fcd 100644
--- a/drivers/scsi/myrb.c
+++ b/drivers/scsi/myrb.c
@@ -1528,6 +1528,7 @@ static int myrb_ldev_queuecommand(struct Scsi_Host *shost,
scmd->scsi_done(scmd);
return 0;
}
+ /* fall through */
case WRITE_6:
lba = (((scmd->cmnd[1] & 0x1F) << 16) |
(scmd->cmnd[2] << 8) |
@@ -1544,6 +1545,7 @@ static int myrb_ldev_queuecommand(struct Scsi_Host *shost,
scmd->scsi_done(scmd);
return 0;
}
+ /* fall through */
case WRITE_10:
case VERIFY: /* 0x2F */
case WRITE_VERIFY: /* 0x2E */
@@ -1560,6 +1562,7 @@ static int myrb_ldev_queuecommand(struct Scsi_Host *shost,
scmd->scsi_done(scmd);
return 0;
}
+ /* fall through */
case WRITE_12:
case VERIFY_12: /* 0xAF */
case WRITE_VERIFY_12: /* 0xAE */
diff --git a/drivers/scsi/ncr53c8xx.c b/drivers/scsi/ncr53c8xx.c
index 6cd3e289ef99..1a236a3dfd51 100644
--- a/drivers/scsi/ncr53c8xx.c
+++ b/drivers/scsi/ncr53c8xx.c
@@ -8313,7 +8313,6 @@ struct Scsi_Host * __init ncr_attach(struct scsi_host_template *tpnt,
tpnt->this_id = 7;
tpnt->sg_tablesize = SCSI_NCR_SG_TABLESIZE;
tpnt->cmd_per_lun = SCSI_NCR_CMD_PER_LUN;
- tpnt->use_clustering = ENABLE_CLUSTERING;
if (device->differential)
driver_setup.diff_support = device->differential;
diff --git a/drivers/scsi/nsp32.c b/drivers/scsi/nsp32.c
index 5aac3e801903..00e3cbee55b8 100644
--- a/drivers/scsi/nsp32.c
+++ b/drivers/scsi/nsp32.c
@@ -274,7 +274,7 @@ static struct scsi_host_template nsp32_template = {
.sg_tablesize = NSP32_SG_SIZE,
.max_sectors = 128,
.this_id = NSP32_HOST_SCSIID,
- .use_clustering = DISABLE_CLUSTERING,
+ .dma_boundary = PAGE_SIZE - 1,
.eh_abort_handler = nsp32_eh_abort,
.eh_host_reset_handler = nsp32_eh_host_reset,
/* .highmem_io = 1, */
diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c
index e19fa883376f..60cf7c5eb880 100644
--- a/drivers/scsi/osd/osd_initiator.c
+++ b/drivers/scsi/osd/osd_initiator.c
@@ -506,11 +506,11 @@ static void osd_request_async_done(struct request *req, blk_status_t error)
_set_error_resid(or, req, error);
if (req->next_rq) {
- __blk_put_request(req->q, req->next_rq);
+ blk_put_request(req->next_rq);
req->next_rq = NULL;
}
- __blk_put_request(req->q, req);
+ blk_put_request(req);
or->request = NULL;
or->in.req = NULL;
or->out.req = NULL;
diff --git a/drivers/scsi/osst.c b/drivers/scsi/osst.c
index 7a1a1edde35d..664c1238a87f 100644
--- a/drivers/scsi/osst.c
+++ b/drivers/scsi/osst.c
@@ -341,7 +341,7 @@ static void osst_end_async(struct request *req, blk_status_t status)
blk_rq_unmap_user(SRpnt->bio);
}
- __blk_put_request(req->q, req);
+ blk_put_request(req);
}
/* osst_request memory management */
diff --git a/drivers/scsi/pcmcia/nsp_cs.c b/drivers/scsi/pcmcia/nsp_cs.c
index f3230494a8c9..1bd6825a4f14 100644
--- a/drivers/scsi/pcmcia/nsp_cs.c
+++ b/drivers/scsi/pcmcia/nsp_cs.c
@@ -86,7 +86,7 @@ static struct scsi_host_template nsp_driver_template = {
.can_queue = 1,
.this_id = NSP_INITIATOR_ID,
.sg_tablesize = SG_ALL,
- .use_clustering = DISABLE_CLUSTERING,
+ .dma_boundary = PAGE_SIZE - 1,
};
static nsp_hw_data nsp_data_base; /* attach <-> detect glue */
diff --git a/drivers/scsi/pcmcia/qlogic_stub.c b/drivers/scsi/pcmcia/qlogic_stub.c
index 173351a8554b..828d53faf09a 100644
--- a/drivers/scsi/pcmcia/qlogic_stub.c
+++ b/drivers/scsi/pcmcia/qlogic_stub.c
@@ -72,7 +72,7 @@ static struct scsi_host_template qlogicfas_driver_template = {
.can_queue = 1,
.this_id = -1,
.sg_tablesize = SG_ALL,
- .use_clustering = DISABLE_CLUSTERING,
+ .dma_boundary = PAGE_SIZE - 1,
};
/*====================================================================*/
diff --git a/drivers/scsi/pcmcia/sym53c500_cs.c b/drivers/scsi/pcmcia/sym53c500_cs.c
index a3b63bea0e50..d1e98a6ea28f 100644
--- a/drivers/scsi/pcmcia/sym53c500_cs.c
+++ b/drivers/scsi/pcmcia/sym53c500_cs.c
@@ -680,7 +680,6 @@ static struct scsi_host_template sym53c500_driver_template = {
.can_queue = 1,
.this_id = 7,
.sg_tablesize = 32,
- .use_clustering = ENABLE_CLUSTERING,
.shost_attrs = SYM53C500_shost_attrs
};
diff --git a/drivers/scsi/pm8001/pm8001_init.c b/drivers/scsi/pm8001/pm8001_init.c
index d71e7e4ec29c..a36060c23b37 100644
--- a/drivers/scsi/pm8001/pm8001_init.c
+++ b/drivers/scsi/pm8001/pm8001_init.c
@@ -84,7 +84,6 @@ static struct scsi_host_template pm8001_sht = {
.this_id = -1,
.sg_tablesize = SG_ALL,
.max_sectors = SCSI_DEFAULT_MAX_SECTORS,
- .use_clustering = ENABLE_CLUSTERING,
.eh_device_reset_handler = sas_eh_device_reset_handler,
.eh_target_reset_handler = sas_eh_target_reset_handler,
.target_destroy = sas_target_destroy,
diff --git a/drivers/scsi/pmcraid.c b/drivers/scsi/pmcraid.c
index 4e86994e10e8..7c4673308f5b 100644
--- a/drivers/scsi/pmcraid.c
+++ b/drivers/scsi/pmcraid.c
@@ -846,16 +846,9 @@ static void pmcraid_erp_done(struct pmcraid_cmd *cmd)
cmd->ioa_cb->ioarcb.cdb[0], ioasc);
}
- /* if we had allocated sense buffers for request sense, copy the sense
- * release the buffers
- */
- if (cmd->sense_buffer != NULL) {
- memcpy(scsi_cmd->sense_buffer,
- cmd->sense_buffer,
- SCSI_SENSE_BUFFERSIZE);
- pci_free_consistent(pinstance->pdev,
- SCSI_SENSE_BUFFERSIZE,
- cmd->sense_buffer, cmd->sense_buffer_dma);
+ if (cmd->sense_buffer) {
+ dma_unmap_single(&pinstance->pdev->dev, cmd->sense_buffer_dma,
+ SCSI_SENSE_BUFFERSIZE, DMA_FROM_DEVICE);
cmd->sense_buffer = NULL;
cmd->sense_buffer_dma = 0;
}
@@ -2444,13 +2437,12 @@ static void pmcraid_request_sense(struct pmcraid_cmd *cmd)
{
struct pmcraid_ioarcb *ioarcb = &cmd->ioa_cb->ioarcb;
struct pmcraid_ioadl_desc *ioadl = ioarcb->add_data.u.ioadl;
+ struct device *dev = &cmd->drv_inst->pdev->dev;
- /* allocate DMAable memory for sense buffers */
- cmd->sense_buffer = pci_alloc_consistent(cmd->drv_inst->pdev,
- SCSI_SENSE_BUFFERSIZE,
- &cmd->sense_buffer_dma);
-
- if (cmd->sense_buffer == NULL) {
+ cmd->sense_buffer = cmd->scsi_cmd->sense_buffer;
+ cmd->sense_buffer_dma = dma_map_single(dev, cmd->sense_buffer,
+ SCSI_SENSE_BUFFERSIZE, DMA_FROM_DEVICE);
+ if (dma_mapping_error(dev, cmd->sense_buffer_dma)) {
pmcraid_err
("couldn't allocate sense buffer for request sense\n");
pmcraid_erp_done(cmd);
@@ -2491,17 +2483,15 @@ static void pmcraid_request_sense(struct pmcraid_cmd *cmd)
/**
* pmcraid_cancel_all - cancel all outstanding IOARCBs as part of error recovery
* @cmd: command that failed
- * @sense: true if request_sense is required after cancel all
+ * @need_sense: true if request_sense is required after cancel all
*
* This function sends a cancel all to a device to clear the queue.
*/
-static void pmcraid_cancel_all(struct pmcraid_cmd *cmd, u32 sense)
+static void pmcraid_cancel_all(struct pmcraid_cmd *cmd, bool need_sense)
{
struct scsi_cmnd *scsi_cmd = cmd->scsi_cmd;
struct pmcraid_ioarcb *ioarcb = &cmd->ioa_cb->ioarcb;
struct pmcraid_resource_entry *res = scsi_cmd->device->hostdata;
- void (*cmd_done) (struct pmcraid_cmd *) = sense ? pmcraid_erp_done
- : pmcraid_request_sense;
memset(ioarcb->cdb, 0, PMCRAID_MAX_CDB_LEN);
ioarcb->request_flags0 = SYNC_OVERRIDE;
@@ -2519,7 +2509,8 @@ static void pmcraid_cancel_all(struct pmcraid_cmd *cmd, u32 sense)
/* writing to IOARRIN must be protected by host_lock, as mid-layer
* schedule queuecommand while we are doing this
*/
- pmcraid_send_cmd(cmd, cmd_done,
+ pmcraid_send_cmd(cmd, need_sense ?
+ pmcraid_erp_done : pmcraid_request_sense,
PMCRAID_REQUEST_SENSE_TIMEOUT,
pmcraid_timeout_handler);
}
@@ -2612,7 +2603,7 @@ static int pmcraid_error_handler(struct pmcraid_cmd *cmd)
struct pmcraid_ioasa *ioasa = &cmd->ioa_cb->ioasa;
u32 ioasc = le32_to_cpu(ioasa->ioasc);
u32 masked_ioasc = ioasc & PMCRAID_IOASC_SENSE_MASK;
- u32 sense_copied = 0;
+ bool sense_copied = false;
if (!res) {
pmcraid_info("resource pointer is NULL\n");
@@ -2684,7 +2675,7 @@ static int pmcraid_error_handler(struct pmcraid_cmd *cmd)
memcpy(scsi_cmd->sense_buffer,
ioasa->sense_data,
data_size);
- sense_copied = 1;
+ sense_copied = true;
}
if (RES_IS_GSCSI(res->cfg_entry))
@@ -3523,7 +3514,7 @@ static int pmcraid_build_passthrough_ioadls(
return -ENOMEM;
}
- sglist->num_dma_sg = pci_map_sg(cmd->drv_inst->pdev,
+ sglist->num_dma_sg = dma_map_sg(&cmd->drv_inst->pdev->dev,
sglist->scatterlist,
sglist->num_sg, direction);
@@ -3572,7 +3563,7 @@ static void pmcraid_release_passthrough_ioadls(
struct pmcraid_sglist *sglist = cmd->sglist;
if (buflen > 0) {
- pci_unmap_sg(cmd->drv_inst->pdev,
+ dma_unmap_sg(&cmd->drv_inst->pdev->dev,
sglist->scatterlist,
sglist->num_sg,
direction);
@@ -4158,7 +4149,6 @@ static struct scsi_host_template pmcraid_host_template = {
.max_sectors = PMCRAID_IOA_MAX_SECTORS,
.no_write_same = 1,
.cmd_per_lun = PMCRAID_MAX_CMD_PER_LUN,
- .use_clustering = ENABLE_CLUSTERING,
.shost_attrs = pmcraid_host_attrs,
.proc_name = PMCRAID_DRIVER_NAME,
};
@@ -4708,9 +4698,9 @@ static void
pmcraid_release_host_rrqs(struct pmcraid_instance *pinstance, int maxindex)
{
int i;
- for (i = 0; i < maxindex; i++) {
- pci_free_consistent(pinstance->pdev,
+ for (i = 0; i < maxindex; i++) {
+ dma_free_coherent(&pinstance->pdev->dev,
HRRQ_ENTRY_SIZE * PMCRAID_MAX_CMD,
pinstance->hrrq_start[i],
pinstance->hrrq_start_bus_addr[i]);
@@ -4737,11 +4727,9 @@ static int pmcraid_allocate_host_rrqs(struct pmcraid_instance *pinstance)
for (i = 0; i < pinstance->num_hrrq; i++) {
pinstance->hrrq_start[i] =
- pci_alloc_consistent(
- pinstance->pdev,
- buffer_size,
- &(pinstance->hrrq_start_bus_addr[i]));
-
+ dma_alloc_coherent(&pinstance->pdev->dev, buffer_size,
+ &pinstance->hrrq_start_bus_addr[i],
+ GFP_KERNEL);
if (!pinstance->hrrq_start[i]) {
pmcraid_err("pci_alloc failed for hrrq vector : %d\n",
i);
@@ -4770,7 +4758,7 @@ static int pmcraid_allocate_host_rrqs(struct pmcraid_instance *pinstance)
static void pmcraid_release_hcams(struct pmcraid_instance *pinstance)
{
if (pinstance->ccn.msg != NULL) {
- pci_free_consistent(pinstance->pdev,
+ dma_free_coherent(&pinstance->pdev->dev,
PMCRAID_AEN_HDR_SIZE +
sizeof(struct pmcraid_hcam_ccn_ext),
pinstance->ccn.msg,
@@ -4782,7 +4770,7 @@ static void pmcraid_release_hcams(struct pmcraid_instance *pinstance)
}
if (pinstance->ldn.msg != NULL) {
- pci_free_consistent(pinstance->pdev,
+ dma_free_coherent(&pinstance->pdev->dev,
PMCRAID_AEN_HDR_SIZE +
sizeof(struct pmcraid_hcam_ldn),
pinstance->ldn.msg,
@@ -4803,17 +4791,15 @@ static void pmcraid_release_hcams(struct pmcraid_instance *pinstance)
*/
static int pmcraid_allocate_hcams(struct pmcraid_instance *pinstance)
{
- pinstance->ccn.msg = pci_alloc_consistent(
- pinstance->pdev,
+ pinstance->ccn.msg = dma_alloc_coherent(&pinstance->pdev->dev,
PMCRAID_AEN_HDR_SIZE +
sizeof(struct pmcraid_hcam_ccn_ext),
- &(pinstance->ccn.baddr));
+ &pinstance->ccn.baddr, GFP_KERNEL);
- pinstance->ldn.msg = pci_alloc_consistent(
- pinstance->pdev,
+ pinstance->ldn.msg = dma_alloc_coherent(&pinstance->pdev->dev,
PMCRAID_AEN_HDR_SIZE +
sizeof(struct pmcraid_hcam_ldn),
- &(pinstance->ldn.baddr));
+ &pinstance->ldn.baddr, GFP_KERNEL);
if (pinstance->ldn.msg == NULL || pinstance->ccn.msg == NULL) {
pmcraid_release_hcams(pinstance);
@@ -4841,7 +4827,7 @@ static void pmcraid_release_config_buffers(struct pmcraid_instance *pinstance)
{
if (pinstance->cfg_table != NULL &&
pinstance->cfg_table_bus_addr != 0) {
- pci_free_consistent(pinstance->pdev,
+ dma_free_coherent(&pinstance->pdev->dev,
sizeof(struct pmcraid_config_table),
pinstance->cfg_table,
pinstance->cfg_table_bus_addr);
@@ -4886,10 +4872,10 @@ static int pmcraid_allocate_config_buffers(struct pmcraid_instance *pinstance)
list_add_tail(&pinstance->res_entries[i].queue,
&pinstance->free_res_q);
- pinstance->cfg_table =
- pci_alloc_consistent(pinstance->pdev,
+ pinstance->cfg_table = dma_alloc_coherent(&pinstance->pdev->dev,
sizeof(struct pmcraid_config_table),
- &pinstance->cfg_table_bus_addr);
+ &pinstance->cfg_table_bus_addr,
+ GFP_KERNEL);
if (NULL == pinstance->cfg_table) {
pmcraid_err("couldn't alloc DMA memory for config table\n");
@@ -4954,7 +4940,7 @@ static void pmcraid_release_buffers(struct pmcraid_instance *pinstance)
pmcraid_release_host_rrqs(pinstance, pinstance->num_hrrq);
if (pinstance->inq_data != NULL) {
- pci_free_consistent(pinstance->pdev,
+ dma_free_coherent(&pinstance->pdev->dev,
sizeof(struct pmcraid_inquiry_data),
pinstance->inq_data,
pinstance->inq_data_baddr);
@@ -4964,7 +4950,7 @@ static void pmcraid_release_buffers(struct pmcraid_instance *pinstance)
}
if (pinstance->timestamp_data != NULL) {
- pci_free_consistent(pinstance->pdev,
+ dma_free_coherent(&pinstance->pdev->dev,
sizeof(struct pmcraid_timestamp_data),
pinstance->timestamp_data,
pinstance->timestamp_data_baddr);
@@ -4981,8 +4967,8 @@ static void pmcraid_release_buffers(struct pmcraid_instance *pinstance)
* This routine pre-allocates memory based on the type of block as below:
* cmdblocks(PMCRAID_MAX_CMD): kernel memory using kernel's slab_allocator,
* IOARCBs(PMCRAID_MAX_CMD) : DMAable memory, using pci pool allocator
- * config-table entries : DMAable memory using pci_alloc_consistent
- * HostRRQs : DMAable memory, using pci_alloc_consistent
+ * config-table entries : DMAable memory using dma_alloc_coherent
+ * HostRRQs : DMAable memory, using dma_alloc_coherent
*
* Return Value
* 0 in case all of the blocks are allocated, -ENOMEM otherwise.
@@ -5019,11 +5005,9 @@ static int pmcraid_init_buffers(struct pmcraid_instance *pinstance)
}
/* allocate DMAable memory for page D0 INQUIRY buffer */
- pinstance->inq_data = pci_alloc_consistent(
- pinstance->pdev,
+ pinstance->inq_data = dma_alloc_coherent(&pinstance->pdev->dev,
sizeof(struct pmcraid_inquiry_data),
- &pinstance->inq_data_baddr);
-
+ &pinstance->inq_data_baddr, GFP_KERNEL);
if (pinstance->inq_data == NULL) {
pmcraid_err("couldn't allocate DMA memory for INQUIRY\n");
pmcraid_release_buffers(pinstance);
@@ -5031,11 +5015,10 @@ static int pmcraid_init_buffers(struct pmcraid_instance *pinstance)
}
/* allocate DMAable memory for set timestamp data buffer */
- pinstance->timestamp_data = pci_alloc_consistent(
- pinstance->pdev,
+ pinstance->timestamp_data = dma_alloc_coherent(&pinstance->pdev->dev,
sizeof(struct pmcraid_timestamp_data),
- &pinstance->timestamp_data_baddr);
-
+ &pinstance->timestamp_data_baddr,
+ GFP_KERNEL);
if (pinstance->timestamp_data == NULL) {
pmcraid_err("couldn't allocate DMA memory for \
set time_stamp \n");
@@ -5324,12 +5307,12 @@ static int pmcraid_resume(struct pci_dev *pdev)
pci_set_master(pdev);
- if ((sizeof(dma_addr_t) == 4) ||
- pci_set_dma_mask(pdev, DMA_BIT_MASK(64)))
- rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+ if (sizeof(dma_addr_t) == 4 ||
+ dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)))
+ rc = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
if (rc == 0)
- rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+ rc = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
if (rc != 0) {
dev_err(&pdev->dev, "resume: Failed to set PCI DMA mask\n");
@@ -5733,19 +5716,19 @@ static int pmcraid_probe(struct pci_dev *pdev,
/* Firmware requires the system bus address of IOARCB to be within
* 32-bit addressable range though it has 64-bit IOARRIN register.
* However, firmware supports 64-bit streaming DMA buffers, whereas
- * coherent buffers are to be 32-bit. Since pci_alloc_consistent always
+ * coherent buffers are to be 32-bit. Since dma_alloc_coherent always
* returns memory within 4GB (if not, change this logic), coherent
* buffers are within firmware acceptable address ranges.
*/
- if ((sizeof(dma_addr_t) == 4) ||
- pci_set_dma_mask(pdev, DMA_BIT_MASK(64)))
- rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+ if (sizeof(dma_addr_t) == 4 ||
+ dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)))
+ rc = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
/* firmware expects 32-bit DMA addresses for IOARRIN register; set 32
- * bit mask for pci_alloc_consistent to return addresses within 4GB
+ * bit mask for dma_alloc_coherent to return addresses within 4GB
*/
if (rc == 0)
- rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+ rc = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
if (rc != 0) {
dev_err(&pdev->dev, "Failed to set PCI DMA mask\n");
diff --git a/drivers/scsi/ppa.c b/drivers/scsi/ppa.c
index ee86a0c62dbf..c182b5458f98 100644
--- a/drivers/scsi/ppa.c
+++ b/drivers/scsi/ppa.c
@@ -978,7 +978,6 @@ static struct scsi_host_template ppa_template = {
.bios_param = ppa_biosparam,
.this_id = -1,
.sg_tablesize = SG_ALL,
- .use_clustering = ENABLE_CLUSTERING,
.can_queue = 1,
.slave_alloc = ppa_adjust_queue,
};
diff --git a/drivers/scsi/ps3rom.c b/drivers/scsi/ps3rom.c
index 4924424d20fe..8d769138c01c 100644
--- a/drivers/scsi/ps3rom.c
+++ b/drivers/scsi/ps3rom.c
@@ -349,7 +349,6 @@ static struct scsi_host_template ps3rom_host_template = {
.sg_tablesize = SG_ALL,
.emulated = 1, /* only sg driver uses this */
.max_sectors = PS3ROM_MAX_SECTORS,
- .use_clustering = ENABLE_CLUSTERING,
.module = THIS_MODULE,
};
diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c
index d5a4f17fce51..edcaf4b0cb0b 100644
--- a/drivers/scsi/qedf/qedf_main.c
+++ b/drivers/scsi/qedf/qedf_main.c
@@ -785,7 +785,6 @@ static struct scsi_host_template qedf_host_template = {
.name = QEDF_MODULE_NAME,
.this_id = -1,
.cmd_per_lun = 32,
- .use_clustering = ENABLE_CLUSTERING,
.max_sectors = 0xffff,
.queuecommand = qedf_queuecommand,
.shost_attrs = qedf_host_attrs,
@@ -2935,8 +2934,7 @@ static void qedf_free_fcoe_pf_param(struct qedf_ctx *qedf)
qedf_free_global_queues(qedf);
- if (qedf->global_queues)
- kfree(qedf->global_queues);
+ kfree(qedf->global_queues);
}
/*
diff --git a/drivers/scsi/qedi/qedi.h b/drivers/scsi/qedi/qedi.h
index a6f96b35e971..a26bb5066b90 100644
--- a/drivers/scsi/qedi/qedi.h
+++ b/drivers/scsi/qedi/qedi.h
@@ -45,7 +45,7 @@ struct qedi_endpoint;
#define QEDI_MAX_TASK_NUM 0x0FFF
#define QEDI_MAX_ISCSI_CONNS_PER_HBA 1024
#define QEDI_ISCSI_MAX_BDS_PER_CMD 255 /* Firmware max BDs is 255 */
-#define MAX_OUSTANDING_TASKS_PER_CON 1024
+#define MAX_OUTSTANDING_TASKS_PER_CON 1024
#define QEDI_MAX_BD_LEN 0xffff
#define QEDI_BD_SPLIT_SZ 0x1000
@@ -63,12 +63,9 @@ struct qedi_endpoint;
#define QEDI_LOCAL_PORT_INVALID 0xffff
#define TX_RX_RING 16
#define RX_RING (TX_RX_RING - 1)
-#define LL2_SINGLE_BUF_SIZE 0x400
-#define QEDI_PAGE_SIZE 4096
#define QEDI_PAGE_ALIGN(addr) ALIGN(addr, QEDI_PAGE_SIZE)
#define QEDI_PAGE_MASK (~((QEDI_PAGE_SIZE) - 1))
-#define QEDI_PAGE_SIZE 4096
#define QEDI_HW_DMA_BOUNDARY 0xfff
#define QEDI_PATH_HANDLE 0xFE0000000UL
@@ -146,7 +143,7 @@ struct skb_work_list {
};
/* Queue sizes in number of elements */
-#define QEDI_SQ_SIZE MAX_OUSTANDING_TASKS_PER_CON
+#define QEDI_SQ_SIZE MAX_OUTSTANDING_TASKS_PER_CON
#define QEDI_CQ_SIZE 2048
#define QEDI_CMDQ_SIZE QEDI_MAX_ISCSI_TASK
#define QEDI_PROTO_CQ_PROD_IDX 0
diff --git a/drivers/scsi/qedi/qedi_iscsi.c b/drivers/scsi/qedi/qedi_iscsi.c
index 2f0a4f2c5ff8..4da660c1c431 100644
--- a/drivers/scsi/qedi/qedi_iscsi.c
+++ b/drivers/scsi/qedi/qedi_iscsi.c
@@ -61,7 +61,6 @@ struct scsi_host_template qedi_host_template = {
.max_sectors = 0xffff,
.dma_boundary = QEDI_HW_DMA_BOUNDARY,
.cmd_per_lun = 128,
- .use_clustering = ENABLE_CLUSTERING,
.shost_attrs = qedi_shost_attrs,
};
diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c
index 105b0e4d7818..5c53409a8cea 100644
--- a/drivers/scsi/qedi/qedi_main.c
+++ b/drivers/scsi/qedi/qedi_main.c
@@ -44,6 +44,11 @@ module_param(qedi_io_tracing, uint, 0644);
MODULE_PARM_DESC(qedi_io_tracing,
" Enable logging of SCSI requests/completions into trace buffer. (default off).");
+uint qedi_ll2_buf_size = 0x400;
+module_param(qedi_ll2_buf_size, uint, 0644);
+MODULE_PARM_DESC(qedi_ll2_buf_size,
+ "parameter to set ping packet size, default - 0x400, Jumbo packets - 0x2400.");
+
const struct qed_iscsi_ops *qedi_ops;
static struct scsi_transport_template *qedi_scsi_transport;
static struct pci_driver qedi_pci_driver;
@@ -228,7 +233,7 @@ static int __qedi_alloc_uio_rings(struct qedi_uio_dev *udev)
}
/* Allocating memory for Tx/Rx pkt buffer */
- udev->ll2_buf_size = TX_RX_RING * LL2_SINGLE_BUF_SIZE;
+ udev->ll2_buf_size = TX_RX_RING * qedi_ll2_buf_size;
udev->ll2_buf_size = QEDI_PAGE_ALIGN(udev->ll2_buf_size);
udev->ll2_buf = (void *)__get_free_pages(GFP_KERNEL | __GFP_COMP |
__GFP_ZERO, 2);
@@ -283,7 +288,7 @@ static int qedi_alloc_uio_rings(struct qedi_ctx *qedi)
qedi->udev = udev;
udev->tx_pkt = udev->ll2_buf;
- udev->rx_pkt = udev->ll2_buf + LL2_SINGLE_BUF_SIZE;
+ udev->rx_pkt = udev->ll2_buf + qedi_ll2_buf_size;
return 0;
err_uctrl:
@@ -644,8 +649,7 @@ static struct qedi_ctx *qedi_host_alloc(struct pci_dev *pdev)
qedi->max_active_conns = ISCSI_MAX_SESS_PER_HBA;
qedi->max_sqes = QEDI_SQ_SIZE;
- if (shost_use_blk_mq(shost))
- shost->nr_hw_queues = MIN_NUM_CPUS_MSIX(qedi);
+ shost->nr_hw_queues = MIN_NUM_CPUS_MSIX(qedi);
pci_set_drvdata(pdev, qedi);
@@ -659,7 +663,7 @@ static int qedi_ll2_rx(void *cookie, struct sk_buff *skb, u32 arg1, u32 arg2)
struct qedi_uio_dev *udev;
struct qedi_uio_ctrl *uctrl;
struct skb_work_list *work;
- u32 prod;
+ struct ethhdr *eh;
if (!qedi) {
QEDI_ERR(NULL, "qedi is NULL\n");
@@ -673,6 +677,29 @@ static int qedi_ll2_rx(void *cookie, struct sk_buff *skb, u32 arg1, u32 arg2)
return 0;
}
+ eh = (struct ethhdr *)skb->data;
+ /* Undo VLAN encapsulation */
+ if (eh->h_proto == htons(ETH_P_8021Q)) {
+ memmove((u8 *)eh + VLAN_HLEN, eh, ETH_ALEN * 2);
+ eh = (struct ethhdr *)skb_pull(skb, VLAN_HLEN);
+ skb_reset_mac_header(skb);
+ }
+
+ /* Filter out non FIP/FCoE frames here to free them faster */
+ if (eh->h_proto != htons(ETH_P_ARP) &&
+ eh->h_proto != htons(ETH_P_IP) &&
+ eh->h_proto != htons(ETH_P_IPV6)) {
+ QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_LL2,
+ "Dropping frame ethertype [0x%x] len [0x%x].\n",
+ eh->h_proto, skb->len);
+ kfree_skb(skb);
+ return 0;
+ }
+
+ QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_LL2,
+ "Allowed frame ethertype [0x%x] len [0x%x].\n",
+ eh->h_proto, skb->len);
+
udev = qedi->udev;
uctrl = udev->uctrl;
@@ -695,17 +722,10 @@ static int qedi_ll2_rx(void *cookie, struct sk_buff *skb, u32 arg1, u32 arg2)
spin_lock_bh(&qedi->ll2_lock);
list_add_tail(&work->list, &qedi->ll2_skb_list);
+ spin_unlock_bh(&qedi->ll2_lock);
- ++uctrl->hw_rx_prod_cnt;
- prod = (uctrl->hw_rx_prod + 1) % RX_RING;
- if (prod != uctrl->host_rx_cons) {
- uctrl->hw_rx_prod = prod;
- spin_unlock_bh(&qedi->ll2_lock);
- wake_up_process(qedi->ll2_recv_thread);
- return 0;
- }
+ wake_up_process(qedi->ll2_recv_thread);
- spin_unlock_bh(&qedi->ll2_lock);
return 0;
}
@@ -720,6 +740,7 @@ static int qedi_ll2_process_skb(struct qedi_ctx *qedi, struct sk_buff *skb,
u32 rx_bd_prod;
void *pkt;
int len = 0;
+ u32 prod;
if (!qedi) {
QEDI_ERR(NULL, "qedi is NULL\n");
@@ -728,12 +749,16 @@ static int qedi_ll2_process_skb(struct qedi_ctx *qedi, struct sk_buff *skb,
udev = qedi->udev;
uctrl = udev->uctrl;
- pkt = udev->rx_pkt + (uctrl->hw_rx_prod * LL2_SINGLE_BUF_SIZE);
- len = min_t(u32, skb->len, (u32)LL2_SINGLE_BUF_SIZE);
+
+ ++uctrl->hw_rx_prod_cnt;
+ prod = (uctrl->hw_rx_prod + 1) % RX_RING;
+
+ pkt = udev->rx_pkt + (prod * qedi_ll2_buf_size);
+ len = min_t(u32, skb->len, (u32)qedi_ll2_buf_size);
memcpy(pkt, skb->data, len);
memset(&rxbd, 0, sizeof(rxbd));
- rxbd.rx_pkt_index = uctrl->hw_rx_prod;
+ rxbd.rx_pkt_index = prod;
rxbd.rx_pkt_len = len;
rxbd.vlan_id = vlan_id;
@@ -744,6 +769,16 @@ static int qedi_ll2_process_skb(struct qedi_ctx *qedi, struct sk_buff *skb,
memcpy(p_rxbd, &rxbd, sizeof(rxbd));
+ QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_LL2,
+ "hw_rx_prod [%d] prod [%d] hw_rx_bd_prod [%d] rx_pkt_idx [%d] rx_len [%d].\n",
+ uctrl->hw_rx_prod, prod, uctrl->hw_rx_bd_prod,
+ rxbd.rx_pkt_index, rxbd.rx_pkt_len);
+ QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_LL2,
+ "host_rx_cons [%d] hw_rx_bd_cons [%d].\n",
+ uctrl->host_rx_cons, uctrl->host_rx_bd_cons);
+
+ uctrl->hw_rx_prod = prod;
+
/* notify the iscsiuio about new packet */
uio_event_notify(&udev->qedi_uinfo);
@@ -796,7 +831,7 @@ static int qedi_set_iscsi_pf_param(struct qedi_ctx *qedi)
int rval = 0;
- num_sq_pages = (MAX_OUSTANDING_TASKS_PER_CON * 8) / PAGE_SIZE;
+ num_sq_pages = (MAX_OUTSTANDING_TASKS_PER_CON * 8) / QEDI_PAGE_SIZE;
qedi->num_queues = MIN_NUM_CPUS_MSIX(qedi);
@@ -834,7 +869,7 @@ static int qedi_set_iscsi_pf_param(struct qedi_ctx *qedi)
qedi->pf_params.iscsi_pf_params.max_fin_rt = 2;
for (log_page_size = 0 ; log_page_size < 32 ; log_page_size++) {
- if ((1 << log_page_size) == PAGE_SIZE)
+ if ((1 << log_page_size) == QEDI_PAGE_SIZE)
break;
}
qedi->pf_params.iscsi_pf_params.log_page_size = log_page_size;
@@ -952,6 +987,9 @@ static int qedi_find_boot_info(struct qedi_ctx *qedi,
cls_sess = iscsi_conn_to_session(cls_conn);
sess = cls_sess->dd_data;
+ if (!iscsi_is_session_online(cls_sess))
+ continue;
+
if (pri_ctrl_flags) {
if (!strcmp(pri_tgt->iscsi_name, sess->targetname) &&
!strcmp(pri_tgt->ip_addr, ep_ip_addr)) {
@@ -1298,7 +1336,7 @@ static int qedi_request_msix_irq(struct qedi_ctx *qedi)
int i, rc, cpu;
cpu = cpumask_first(cpu_online_mask);
- for (i = 0; i < MIN_NUM_CPUS_MSIX(qedi); i++) {
+ for (i = 0; i < qedi->int_info.msix_cnt; i++) {
rc = request_irq(qedi->int_info.msix[i].vector,
qedi_msix_handler, 0, "qedi",
&qedi->fp_array[i]);
@@ -1376,7 +1414,7 @@ static void qedi_free_bdq(struct qedi_ctx *qedi)
int i;
if (qedi->bdq_pbl_list)
- dma_free_coherent(&qedi->pdev->dev, PAGE_SIZE,
+ dma_free_coherent(&qedi->pdev->dev, QEDI_PAGE_SIZE,
qedi->bdq_pbl_list, qedi->bdq_pbl_list_dma);
if (qedi->bdq_pbl)
@@ -1437,7 +1475,7 @@ static int qedi_alloc_bdq(struct qedi_ctx *qedi)
/* Alloc dma memory for BDQ page buffer list */
qedi->bdq_pbl_mem_size = QEDI_BDQ_NUM * sizeof(struct scsi_bd);
- qedi->bdq_pbl_mem_size = ALIGN(qedi->bdq_pbl_mem_size, PAGE_SIZE);
+ qedi->bdq_pbl_mem_size = ALIGN(qedi->bdq_pbl_mem_size, QEDI_PAGE_SIZE);
qedi->rq_num_entries = qedi->bdq_pbl_mem_size / sizeof(struct scsi_bd);
QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_CONN, "rq_num_entries = %d.\n",
@@ -1472,7 +1510,8 @@ static int qedi_alloc_bdq(struct qedi_ctx *qedi)
}
/* Allocate list of PBL pages */
- qedi->bdq_pbl_list = dma_zalloc_coherent(&qedi->pdev->dev, PAGE_SIZE,
+ qedi->bdq_pbl_list = dma_zalloc_coherent(&qedi->pdev->dev,
+ QEDI_PAGE_SIZE,
&qedi->bdq_pbl_list_dma,
GFP_KERNEL);
if (!qedi->bdq_pbl_list) {
@@ -1485,13 +1524,14 @@ static int qedi_alloc_bdq(struct qedi_ctx *qedi)
* Now populate PBL list with pages that contain pointers to the
* individual buffers.
*/
- qedi->bdq_pbl_list_num_entries = qedi->bdq_pbl_mem_size / PAGE_SIZE;
+ qedi->bdq_pbl_list_num_entries = qedi->bdq_pbl_mem_size /
+ QEDI_PAGE_SIZE;
list = (u64 *)qedi->bdq_pbl_list;
page = qedi->bdq_pbl_list_dma;
for (i = 0; i < qedi->bdq_pbl_list_num_entries; i++) {
*list = qedi->bdq_pbl_dma;
list++;
- page += PAGE_SIZE;
+ page += QEDI_PAGE_SIZE;
}
return 0;
diff --git a/drivers/scsi/qedi/qedi_version.h b/drivers/scsi/qedi/qedi_version.h
index 8a0e523fc089..41bcbbafebd4 100644
--- a/drivers/scsi/qedi/qedi_version.h
+++ b/drivers/scsi/qedi/qedi_version.h
@@ -7,8 +7,8 @@
* this source tree.
*/
-#define QEDI_MODULE_VERSION "8.33.0.20"
+#define QEDI_MODULE_VERSION "8.33.0.21"
#define QEDI_DRIVER_MAJOR_VER 8
#define QEDI_DRIVER_MINOR_VER 33
#define QEDI_DRIVER_REV_VER 0
-#define QEDI_DRIVER_ENG_VER 20
+#define QEDI_DRIVER_ENG_VER 21
diff --git a/drivers/scsi/qla1280.c b/drivers/scsi/qla1280.c
index 15a50cc7e4b3..a414f51302b7 100644
--- a/drivers/scsi/qla1280.c
+++ b/drivers/scsi/qla1280.c
@@ -383,20 +383,10 @@
#include "qla1280.h"
-#ifndef BITS_PER_LONG
-#error "BITS_PER_LONG not defined!"
-#endif
-#if (BITS_PER_LONG == 64) || defined CONFIG_HIGHMEM
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
#define QLA_64BIT_PTR 1
#endif
-#ifdef QLA_64BIT_PTR
-#define pci_dma_hi32(a) ((a >> 16) >> 16)
-#else
-#define pci_dma_hi32(a) 0
-#endif
-#define pci_dma_lo32(a) (a & 0xffffffff)
-
#define NVRAM_DELAY() udelay(500) /* 2 microseconds */
#if defined(__ia64__) && !defined(ia64_platform_is)
@@ -1790,8 +1780,8 @@ qla1280_load_firmware_dma(struct scsi_qla_host *ha)
mb[4] = cnt;
mb[3] = ha->request_dma & 0xffff;
mb[2] = (ha->request_dma >> 16) & 0xffff;
- mb[7] = pci_dma_hi32(ha->request_dma) & 0xffff;
- mb[6] = pci_dma_hi32(ha->request_dma) >> 16;
+ mb[7] = upper_32_bits(ha->request_dma) & 0xffff;
+ mb[6] = upper_32_bits(ha->request_dma) >> 16;
dprintk(2, "%s: op=%d 0x%p = 0x%4x,0x%4x,0x%4x,0x%4x\n",
__func__, mb[0],
(void *)(long)ha->request_dma,
@@ -1810,8 +1800,8 @@ qla1280_load_firmware_dma(struct scsi_qla_host *ha)
mb[4] = cnt;
mb[3] = p_tbuf & 0xffff;
mb[2] = (p_tbuf >> 16) & 0xffff;
- mb[7] = pci_dma_hi32(p_tbuf) & 0xffff;
- mb[6] = pci_dma_hi32(p_tbuf) >> 16;
+ mb[7] = upper_32_bits(p_tbuf) & 0xffff;
+ mb[6] = upper_32_bits(p_tbuf) >> 16;
err = qla1280_mailbox_command(ha, BIT_4 | BIT_3 | BIT_2 |
BIT_1 | BIT_0, mb);
@@ -1933,8 +1923,8 @@ qla1280_init_rings(struct scsi_qla_host *ha)
mb[3] = ha->request_dma & 0xffff;
mb[2] = (ha->request_dma >> 16) & 0xffff;
mb[4] = 0;
- mb[7] = pci_dma_hi32(ha->request_dma) & 0xffff;
- mb[6] = pci_dma_hi32(ha->request_dma) >> 16;
+ mb[7] = upper_32_bits(ha->request_dma) & 0xffff;
+ mb[6] = upper_32_bits(ha->request_dma) >> 16;
if (!(status = qla1280_mailbox_command(ha, BIT_7 | BIT_6 | BIT_4 |
BIT_3 | BIT_2 | BIT_1 | BIT_0,
&mb[0]))) {
@@ -1947,8 +1937,8 @@ qla1280_init_rings(struct scsi_qla_host *ha)
mb[3] = ha->response_dma & 0xffff;
mb[2] = (ha->response_dma >> 16) & 0xffff;
mb[5] = 0;
- mb[7] = pci_dma_hi32(ha->response_dma) & 0xffff;
- mb[6] = pci_dma_hi32(ha->response_dma) >> 16;
+ mb[7] = upper_32_bits(ha->response_dma) & 0xffff;
+ mb[6] = upper_32_bits(ha->response_dma) >> 16;
status = qla1280_mailbox_command(ha, BIT_7 | BIT_6 | BIT_5 |
BIT_3 | BIT_2 | BIT_1 | BIT_0,
&mb[0]);
@@ -2914,13 +2904,13 @@ qla1280_64bit_start_scsi(struct scsi_qla_host *ha, struct srb * sp)
SCSI_BUS_32(cmd));
#endif
*dword_ptr++ =
- cpu_to_le32(pci_dma_lo32(dma_handle));
+ cpu_to_le32(lower_32_bits(dma_handle));
*dword_ptr++ =
- cpu_to_le32(pci_dma_hi32(dma_handle));
+ cpu_to_le32(upper_32_bits(dma_handle));
*dword_ptr++ = cpu_to_le32(sg_dma_len(s));
dprintk(3, "S/G Segment phys_addr=%x %x, len=0x%x\n",
- cpu_to_le32(pci_dma_hi32(dma_handle)),
- cpu_to_le32(pci_dma_lo32(dma_handle)),
+ cpu_to_le32(upper_32_bits(dma_handle)),
+ cpu_to_le32(lower_32_bits(dma_handle)),
cpu_to_le32(sg_dma_len(sg_next(s))));
remseg--;
}
@@ -2976,14 +2966,14 @@ qla1280_64bit_start_scsi(struct scsi_qla_host *ha, struct srb * sp)
SCSI_BUS_32(cmd));
#endif
*dword_ptr++ =
- cpu_to_le32(pci_dma_lo32(dma_handle));
+ cpu_to_le32(lower_32_bits(dma_handle));
*dword_ptr++ =
- cpu_to_le32(pci_dma_hi32(dma_handle));
+ cpu_to_le32(upper_32_bits(dma_handle));
*dword_ptr++ =
cpu_to_le32(sg_dma_len(s));
dprintk(3, "S/G Segment Cont. phys_addr=%x %x, len=0x%x\n",
- cpu_to_le32(pci_dma_hi32(dma_handle)),
- cpu_to_le32(pci_dma_lo32(dma_handle)),
+ cpu_to_le32(upper_32_bits(dma_handle)),
+ cpu_to_le32(lower_32_bits(dma_handle)),
cpu_to_le32(sg_dma_len(s)));
}
remseg -= cnt;
@@ -3178,10 +3168,10 @@ qla1280_32bit_start_scsi(struct scsi_qla_host *ha, struct srb * sp)
if (cnt == 4)
break;
*dword_ptr++ =
- cpu_to_le32(pci_dma_lo32(sg_dma_address(s)));
+ cpu_to_le32(lower_32_bits(sg_dma_address(s)));
*dword_ptr++ = cpu_to_le32(sg_dma_len(s));
dprintk(3, "S/G Segment phys_addr=0x%lx, len=0x%x\n",
- (pci_dma_lo32(sg_dma_address(s))),
+ (lower_32_bits(sg_dma_address(s))),
(sg_dma_len(s)));
remseg--;
}
@@ -3224,13 +3214,13 @@ qla1280_32bit_start_scsi(struct scsi_qla_host *ha, struct srb * sp)
if (cnt == 7)
break;
*dword_ptr++ =
- cpu_to_le32(pci_dma_lo32(sg_dma_address(s)));
+ cpu_to_le32(lower_32_bits(sg_dma_address(s)));
*dword_ptr++ =
cpu_to_le32(sg_dma_len(s));
dprintk(1,
"S/G Segment Cont. phys_addr=0x%x, "
"len=0x%x\n",
- cpu_to_le32(pci_dma_lo32(sg_dma_address(s))),
+ cpu_to_le32(lower_32_bits(sg_dma_address(s))),
cpu_to_le32(sg_dma_len(s)));
}
remseg -= cnt;
@@ -4213,7 +4203,6 @@ static struct scsi_host_template qla1280_driver_template = {
.can_queue = MAX_OUTSTANDING_COMMANDS,
.this_id = -1,
.sg_tablesize = SG_ALL,
- .use_clustering = ENABLE_CLUSTERING,
};
diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c
index 0bb9ac6ece92..00444dc79756 100644
--- a/drivers/scsi/qla2xxx/qla_attr.c
+++ b/drivers/scsi/qla2xxx/qla_attr.c
@@ -2712,6 +2712,8 @@ qla24xx_vport_delete(struct fc_vport *fc_vport)
test_bit(FCPORT_UPDATE_NEEDED, &vha->dpc_flags))
msleep(1000);
+ qla_nvme_delete(vha);
+
qla24xx_disable_vp(vha);
qla2x00_wait_for_sess_deletion(vha);
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index eb59c796a795..364bb52ed2a6 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -237,15 +237,13 @@ qla2x00_async_login(struct scsi_qla_host *vha, fc_port_t *fcport,
qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2);
sp->done = qla2x00_async_login_sp_done;
- if (N2N_TOPO(fcport->vha->hw) && fcport_is_bigger(fcport)) {
+ if (N2N_TOPO(fcport->vha->hw) && fcport_is_bigger(fcport))
lio->u.logio.flags |= SRB_LOGIN_PRLI_ONLY;
- } else {
+ else
lio->u.logio.flags |= SRB_LOGIN_COND_PLOGI;
- if (fcport->fc4f_nvme)
- lio->u.logio.flags |= SRB_LOGIN_SKIP_PRLI;
-
- }
+ if (fcport->fc4f_nvme)
+ lio->u.logio.flags |= SRB_LOGIN_SKIP_PRLI;
ql_dbg(ql_dbg_disc, vha, 0x2072,
"Async-login - %8phC hdl=%x, loopid=%x portid=%02x%02x%02x "
diff --git a/drivers/scsi/qla2xxx/qla_mid.c b/drivers/scsi/qla2xxx/qla_mid.c
index d620f4bebcd0..099d8e9851cb 100644
--- a/drivers/scsi/qla2xxx/qla_mid.c
+++ b/drivers/scsi/qla2xxx/qla_mid.c
@@ -507,6 +507,7 @@ qla24xx_create_vhost(struct fc_vport *fc_vport)
qla2x00_start_timer(vha, WATCH_INTERVAL);
vha->req = base_vha->req;
+ vha->flags.nvme_enabled = base_vha->flags.nvme_enabled;
host->can_queue = base_vha->req->length + 128;
host->cmd_per_lun = 3;
if (IS_T10_PI_CAPABLE(ha) && ql2xenabledif)
diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c
index 7e78e7eff783..39d892bbd219 100644
--- a/drivers/scsi/qla2xxx/qla_nvme.c
+++ b/drivers/scsi/qla2xxx/qla_nvme.c
@@ -272,17 +272,6 @@ static void qla_nvme_fcp_abort(struct nvme_fc_local_port *lport,
schedule_work(&priv->abort_work);
}
-static void qla_nvme_poll(struct nvme_fc_local_port *lport, void *hw_queue_handle)
-{
- struct qla_qpair *qpair = hw_queue_handle;
- unsigned long flags;
- struct scsi_qla_host *vha = lport->private;
-
- spin_lock_irqsave(&qpair->qp_lock, flags);
- qla24xx_process_response_queue(vha, qpair->rsp);
- spin_unlock_irqrestore(&qpair->qp_lock, flags);
-}
-
static inline int qla2x00_start_nvme_mq(srb_t *sp)
{
unsigned long flags;
@@ -474,21 +463,10 @@ static int qla_nvme_post_cmd(struct nvme_fc_local_port *lport,
int rval = -ENODEV;
srb_t *sp;
struct qla_qpair *qpair = hw_queue_handle;
- struct nvme_private *priv;
+ struct nvme_private *priv = fd->private;
struct qla_nvme_rport *qla_rport = rport->private;
- if (!fd || !qpair) {
- ql_log(ql_log_warn, NULL, 0x2134,
- "NO NVMe request or Queue Handle\n");
- return rval;
- }
-
- priv = fd->private;
fcport = qla_rport->fcport;
- if (!fcport) {
- ql_log(ql_log_warn, NULL, 0x210e, "No fcport ptr\n");
- return rval;
- }
vha = fcport->vha;
@@ -517,6 +495,7 @@ static int qla_nvme_post_cmd(struct nvme_fc_local_port *lport,
sp->name = "nvme_cmd";
sp->done = qla_nvme_sp_done;
sp->qpair = qpair;
+ sp->vha = vha;
nvme = &sp->u.iocb_cmd;
nvme->u.nvme.desc = fd;
@@ -564,7 +543,7 @@ static void qla_nvme_remoteport_delete(struct nvme_fc_remote_port *rport)
schedule_work(&fcport->free_work);
}
- fcport->nvme_flag &= ~(NVME_FLAG_REGISTERED | NVME_FLAG_DELETING);
+ fcport->nvme_flag &= ~NVME_FLAG_DELETING;
ql_log(ql_log_info, fcport->vha, 0x2110,
"remoteport_delete of %p completed.\n", fcport);
}
@@ -578,7 +557,6 @@ static struct nvme_fc_port_template qla_nvme_fc_transport = {
.ls_abort = qla_nvme_ls_abort,
.fcp_io = qla_nvme_post_cmd,
.fcp_abort = qla_nvme_fcp_abort,
- .poll_queue = qla_nvme_poll,
.max_hw_queues = 8,
.max_sgl_segments = 128,
.max_dif_sgl_segments = 64,
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index d0ecc729a90a..ea69dafc9774 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -328,7 +328,6 @@ struct scsi_host_template qla2xxx_driver_template = {
.map_queues = qla2xxx_map_queues,
.this_id = -1,
.cmd_per_lun = 3,
- .use_clustering = ENABLE_CLUSTERING,
.sg_tablesize = SG_ALL,
.max_sectors = 0xFFFF,
@@ -857,13 +856,9 @@ qla2xxx_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
}
if (ha->mqenable) {
- if (shost_use_blk_mq(vha->host)) {
- tag = blk_mq_unique_tag(cmd->request);
- hwq = blk_mq_unique_tag_to_hwq(tag);
- qpair = ha->queue_pair_map[hwq];
- } else if (vha->vp_idx && vha->qpair) {
- qpair = vha->qpair;
- }
+ tag = blk_mq_unique_tag(cmd->request);
+ hwq = blk_mq_unique_tag_to_hwq(tag);
+ qpair = ha->queue_pair_map[hwq];
if (qpair)
return qla2xxx_mqueuecommand(host, cmd, qpair);
@@ -1464,7 +1459,7 @@ __qla2xxx_eh_generic_reset(char *name, enum nexus_wait_type type,
goto eh_reset_failed;
}
err = 2;
- if (do_reset(fcport, cmd->device->lun, cmd->request->cpu + 1)
+ if (do_reset(fcport, cmd->device->lun, blk_mq_rq_cpu(cmd->request) + 1)
!= QLA_SUCCESS) {
ql_log(ql_log_warn, vha, 0x800c,
"do_reset failed for cmd=%p.\n", cmd);
@@ -1746,10 +1741,53 @@ qla2x00_loop_reset(scsi_qla_host_t *vha)
return QLA_SUCCESS;
}
+static void qla2x00_abort_srb(struct qla_qpair *qp, srb_t *sp, const int res,
+ unsigned long *flags)
+ __releases(qp->qp_lock_ptr)
+ __acquires(qp->qp_lock_ptr)
+{
+ scsi_qla_host_t *vha = qp->vha;
+ struct qla_hw_data *ha = vha->hw;
+
+ if (sp->type == SRB_NVME_CMD || sp->type == SRB_NVME_LS) {
+ if (!sp_get(sp)) {
+ /* got sp */
+ spin_unlock_irqrestore(qp->qp_lock_ptr, *flags);
+ qla_nvme_abort(ha, sp, res);
+ spin_lock_irqsave(qp->qp_lock_ptr, *flags);
+ }
+ } else if (GET_CMD_SP(sp) && !ha->flags.eeh_busy &&
+ !test_bit(ABORT_ISP_ACTIVE, &vha->dpc_flags) &&
+ !qla2x00_isp_reg_stat(ha) && sp->type == SRB_SCSI_CMD) {
+ /*
+ * Don't abort commands in adapter during EEH recovery as it's
+ * not accessible/responding.
+ *
+ * Get a reference to the sp and drop the lock. The reference
+ * ensures this sp->done() call and not the call in
+ * qla2xxx_eh_abort() ends the SCSI cmd (with result 'res').
+ */
+ if (!sp_get(sp)) {
+ int status;
+
+ spin_unlock_irqrestore(qp->qp_lock_ptr, *flags);
+ status = qla2xxx_eh_abort(GET_CMD_SP(sp));
+ spin_lock_irqsave(qp->qp_lock_ptr, *flags);
+ /*
+ * Get rid of extra reference caused
+ * by early exit from qla2xxx_eh_abort
+ */
+ if (status == FAST_IO_FAIL)
+ atomic_dec(&sp->ref_count);
+ }
+ }
+ sp->done(sp, res);
+}
+
static void
__qla2x00_abort_all_cmds(struct qla_qpair *qp, int res)
{
- int cnt, status;
+ int cnt;
unsigned long flags;
srb_t *sp;
scsi_qla_host_t *vha = qp->vha;
@@ -1768,50 +1806,7 @@ __qla2x00_abort_all_cmds(struct qla_qpair *qp, int res)
req->outstanding_cmds[cnt] = NULL;
switch (sp->cmd_type) {
case TYPE_SRB:
- if (sp->type == SRB_NVME_CMD ||
- sp->type == SRB_NVME_LS) {
- if (!sp_get(sp)) {
- /* got sp */
- spin_unlock_irqrestore
- (qp->qp_lock_ptr,
- flags);
- qla_nvme_abort(ha, sp, res);
- spin_lock_irqsave
- (qp->qp_lock_ptr, flags);
- }
- } else if (GET_CMD_SP(sp) &&
- !ha->flags.eeh_busy &&
- (!test_bit(ABORT_ISP_ACTIVE,
- &vha->dpc_flags)) &&
- !qla2x00_isp_reg_stat(ha) &&
- (sp->type == SRB_SCSI_CMD)) {
- /*
- * Don't abort commands in adapter
- * during EEH recovery as it's not
- * accessible/responding.
- *
- * Get a reference to the sp and drop
- * the lock. The reference ensures this
- * sp->done() call and not the call in
- * qla2xxx_eh_abort() ends the SCSI cmd
- * (with result 'res').
- */
- if (!sp_get(sp)) {
- spin_unlock_irqrestore
- (qp->qp_lock_ptr, flags);
- status = qla2xxx_eh_abort(
- GET_CMD_SP(sp));
- spin_lock_irqsave
- (qp->qp_lock_ptr, flags);
- /*
- * Get rid of extra reference caused
- * by early exit from qla2xxx_eh_abort
- */
- if (status == FAST_IO_FAIL)
- atomic_dec(&sp->ref_count);
- }
- }
- sp->done(sp, res);
+ qla2x00_abort_srb(qp, sp, res, &flags);
break;
case TYPE_TGT_CMD:
if (!vha->hw->tgt.tgt_ops || !tgt ||
@@ -3159,7 +3154,7 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
goto probe_failed;
}
- if (ha->mqenable && shost_use_blk_mq(host)) {
+ if (ha->mqenable) {
/* number of hardware queues supported by blk/scsi-mq*/
host->nr_hw_queues = ha->max_qpairs;
@@ -3271,25 +3266,17 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
base_vha->mgmt_svr_loop_id, host->sg_tablesize);
if (ha->mqenable) {
- bool mq = false;
bool startit = false;
- if (QLA_TGT_MODE_ENABLED()) {
- mq = true;
+ if (QLA_TGT_MODE_ENABLED())
startit = false;
- }
- if ((ql2x_ini_mode == QLA2XXX_INI_MODE_ENABLED) &&
- shost_use_blk_mq(host)) {
- mq = true;
+ if (ql2x_ini_mode == QLA2XXX_INI_MODE_ENABLED)
startit = true;
- }
- if (mq) {
- /* Create start of day qpairs for Block MQ */
- for (i = 0; i < ha->max_qpairs; i++)
- qla2xxx_create_qpair(base_vha, 5, 0, startit);
- }
+ /* Create start of day qpairs for Block MQ */
+ for (i = 0; i < ha->max_qpairs; i++)
+ qla2xxx_create_qpair(base_vha, 5, 0, startit);
}
if (ha->flags.running_gold_fw)
@@ -3570,6 +3557,8 @@ qla2x00_delete_all_vps(struct qla_hw_data *ha, scsi_qla_host_t *base_vha)
spin_unlock_irqrestore(&ha->vport_slock, flags);
mutex_unlock(&ha->vport_lock);
+ qla_nvme_delete(vha);
+
fc_vport_terminate(vha->fc_vport);
scsi_host_put(vha->host);
@@ -4191,12 +4180,10 @@ fail_free_nvram:
kfree(ha->nvram);
ha->nvram = NULL;
fail_free_ctx_mempool:
- if (ha->ctx_mempool)
- mempool_destroy(ha->ctx_mempool);
+ mempool_destroy(ha->ctx_mempool);
ha->ctx_mempool = NULL;
fail_free_srb_mempool:
- if (ha->srb_mempool)
- mempool_destroy(ha->srb_mempool);
+ mempool_destroy(ha->srb_mempool);
ha->srb_mempool = NULL;
fail_free_gid_list:
dma_free_coherent(&ha->pdev->dev, qla2x00_gid_list_size(ha),
@@ -4498,8 +4485,7 @@ qla2x00_mem_free(struct qla_hw_data *ha)
dma_free_coherent(&ha->pdev->dev, MCTP_DUMP_SIZE, ha->mctp_dump,
ha->mctp_dump_dma);
- if (ha->srb_mempool)
- mempool_destroy(ha->srb_mempool);
+ mempool_destroy(ha->srb_mempool);
if (ha->dcbx_tlv)
dma_free_coherent(&ha->pdev->dev, DCBX_TLV_DATA_SIZE,
@@ -4531,8 +4517,7 @@ qla2x00_mem_free(struct qla_hw_data *ha)
if (ha->async_pd)
dma_pool_free(ha->s_dma_pool, ha->async_pd, ha->async_pd_dma);
- if (ha->s_dma_pool)
- dma_pool_destroy(ha->s_dma_pool);
+ dma_pool_destroy(ha->s_dma_pool);
if (ha->gid_list)
dma_free_coherent(&ha->pdev->dev, qla2x00_gid_list_size(ha),
@@ -4553,14 +4538,11 @@ qla2x00_mem_free(struct qla_hw_data *ha)
}
}
- if (ha->dl_dma_pool)
- dma_pool_destroy(ha->dl_dma_pool);
+ dma_pool_destroy(ha->dl_dma_pool);
- if (ha->fcp_cmnd_dma_pool)
- dma_pool_destroy(ha->fcp_cmnd_dma_pool);
+ dma_pool_destroy(ha->fcp_cmnd_dma_pool);
- if (ha->ctx_mempool)
- mempool_destroy(ha->ctx_mempool);
+ mempool_destroy(ha->ctx_mempool);
qlt_mem_free(ha);
@@ -6952,11 +6934,12 @@ static int qla2xxx_map_queues(struct Scsi_Host *shost)
{
int rc;
scsi_qla_host_t *vha = (scsi_qla_host_t *)shost->hostdata;
+ struct blk_mq_queue_map *qmap = &shost->tag_set.map[0];
if (USER_CTRL_IRQ(vha->hw))
- rc = blk_mq_map_queues(&shost->tag_set);
+ rc = blk_mq_map_queues(qmap);
else
- rc = blk_mq_pci_map_queues(&shost->tag_set, vha->hw->pdev, 0);
+ rc = blk_mq_pci_map_queues(qmap, vha->hw->pdev, 0);
return rc;
}
@@ -7106,8 +7089,7 @@ qla2x00_module_exit(void)
qla2x00_release_firmware();
kmem_cache_destroy(srb_cachep);
qlt_exit();
- if (ctx_cachep)
- kmem_cache_destroy(ctx_cachep);
+ kmem_cache_destroy(ctx_cachep);
fc_release_transport(qla2xxx_transport_template);
fc_release_transport(qla2xxx_transport_vport_template);
}
diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index c4504740f0e2..510337eac106 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -2379,20 +2379,20 @@ void qlt_xmit_tm_rsp(struct qla_tgt_mgmt_cmd *mcmd)
}
if (mcmd->flags == QLA24XX_MGMT_SEND_NACK) {
- if (mcmd->orig_iocb.imm_ntfy.u.isp24.status_subcode ==
- ELS_LOGO ||
- mcmd->orig_iocb.imm_ntfy.u.isp24.status_subcode ==
- ELS_PRLO ||
- mcmd->orig_iocb.imm_ntfy.u.isp24.status_subcode ==
- ELS_TPRLO) {
+ switch (mcmd->orig_iocb.imm_ntfy.u.isp24.status_subcode) {
+ case ELS_LOGO:
+ case ELS_PRLO:
+ case ELS_TPRLO:
ql_dbg(ql_dbg_disc, vha, 0x2106,
"TM response logo %phC status %#x state %#x",
mcmd->sess->port_name, mcmd->fc_tm_rsp,
mcmd->flags);
qlt_schedule_sess_for_deletion(mcmd->sess);
- } else {
+ break;
+ default:
qlt_send_notify_ack(vha->hw->base_qpair,
&mcmd->orig_iocb.imm_ntfy, 0, 0, 0, 0, 0, 0);
+ break;
}
} else {
if (mcmd->orig_iocb.atio.u.raw.entry_type == ABTS_RECV_24XX) {
@@ -2660,9 +2660,9 @@ static void qlt_load_cont_data_segments(struct qla_tgt_prm *prm)
cnt < QLA_TGT_DATASEGS_PER_CONT_24XX && prm->seg_cnt;
cnt++, prm->seg_cnt--) {
*dword_ptr++ =
- cpu_to_le32(pci_dma_lo32
+ cpu_to_le32(lower_32_bits
(sg_dma_address(prm->sg)));
- *dword_ptr++ = cpu_to_le32(pci_dma_hi32
+ *dword_ptr++ = cpu_to_le32(upper_32_bits
(sg_dma_address(prm->sg)));
*dword_ptr++ = cpu_to_le32(sg_dma_len(prm->sg));
@@ -2704,9 +2704,9 @@ static void qlt_load_data_segments(struct qla_tgt_prm *prm)
(cnt < QLA_TGT_DATASEGS_PER_CMD_24XX) && prm->seg_cnt;
cnt++, prm->seg_cnt--) {
*dword_ptr++ =
- cpu_to_le32(pci_dma_lo32(sg_dma_address(prm->sg)));
+ cpu_to_le32(lower_32_bits(sg_dma_address(prm->sg)));
- *dword_ptr++ = cpu_to_le32(pci_dma_hi32(
+ *dword_ptr++ = cpu_to_le32(upper_32_bits(
sg_dma_address(prm->sg)));
*dword_ptr++ = cpu_to_le32(sg_dma_len(prm->sg));
diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h
index 721da593b1bc..577e1786a3f1 100644
--- a/drivers/scsi/qla2xxx/qla_target.h
+++ b/drivers/scsi/qla2xxx/qla_target.h
@@ -771,14 +771,6 @@ int qla2x00_wait_for_hba_online(struct scsi_qla_host *);
#define FC_TM_REJECT 4
#define FC_TM_FAILED 5
-#if (BITS_PER_LONG > 32) || defined(CONFIG_HIGHMEM64G)
-#define pci_dma_lo32(a) (a & 0xffffffff)
-#define pci_dma_hi32(a) ((((a) >> 16)>>16) & 0xffffffff)
-#else
-#define pci_dma_lo32(a) (a & 0xffffffff)
-#define pci_dma_hi32(a) 0
-#endif
-
#define QLA_TGT_SENSE_VALID(sense) ((sense != NULL) && \
(((const uint8_t *)(sense))[0] & 0x70) == 0x70)
diff --git a/drivers/scsi/qla2xxx/qla_version.h b/drivers/scsi/qla2xxx/qla_version.h
index 12bafff71a1a..ca7945cb959b 100644
--- a/drivers/scsi/qla2xxx/qla_version.h
+++ b/drivers/scsi/qla2xxx/qla_version.h
@@ -7,7 +7,7 @@
/*
* Driver version
*/
-#define QLA2XXX_VERSION "10.00.00.11-k"
+#define QLA2XXX_VERSION "10.00.00.12-k"
#define QLA_DRIVER_MAJOR_VER 10
#define QLA_DRIVER_MINOR_VER 0
diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
index 65053c066680..283e6b80abb5 100644
--- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c
+++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
@@ -108,11 +108,6 @@ static ssize_t tcm_qla2xxx_format_wwn(char *buf, size_t len, u64 wwn)
b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7]);
}
-static char *tcm_qla2xxx_get_fabric_name(void)
-{
- return "qla2xxx";
-}
-
/*
* From drivers/scsi/scsi_transport_fc.c:fc_parse_wwn
*/
@@ -178,11 +173,6 @@ static int tcm_qla2xxx_npiv_parse_wwn(
return 0;
}
-static char *tcm_qla2xxx_npiv_get_fabric_name(void)
-{
- return "qla2xxx_npiv";
-}
-
static char *tcm_qla2xxx_get_fabric_wwn(struct se_portal_group *se_tpg)
{
struct tcm_qla2xxx_tpg *tpg = container_of(se_tpg,
@@ -964,38 +954,14 @@ static ssize_t tcm_qla2xxx_tpg_enable_show(struct config_item *item,
atomic_read(&tpg->lport_tpg_enabled));
}
-static void tcm_qla2xxx_depend_tpg(struct work_struct *work)
-{
- struct tcm_qla2xxx_tpg *base_tpg = container_of(work,
- struct tcm_qla2xxx_tpg, tpg_base_work);
- struct se_portal_group *se_tpg = &base_tpg->se_tpg;
- struct scsi_qla_host *base_vha = base_tpg->lport->qla_vha;
-
- if (!target_depend_item(&se_tpg->tpg_group.cg_item)) {
- atomic_set(&base_tpg->lport_tpg_enabled, 1);
- qlt_enable_vha(base_vha);
- }
- complete(&base_tpg->tpg_base_comp);
-}
-
-static void tcm_qla2xxx_undepend_tpg(struct work_struct *work)
-{
- struct tcm_qla2xxx_tpg *base_tpg = container_of(work,
- struct tcm_qla2xxx_tpg, tpg_base_work);
- struct se_portal_group *se_tpg = &base_tpg->se_tpg;
- struct scsi_qla_host *base_vha = base_tpg->lport->qla_vha;
-
- if (!qlt_stop_phase1(base_vha->vha_tgt.qla_tgt)) {
- atomic_set(&base_tpg->lport_tpg_enabled, 0);
- target_undepend_item(&se_tpg->tpg_group.cg_item);
- }
- complete(&base_tpg->tpg_base_comp);
-}
-
static ssize_t tcm_qla2xxx_tpg_enable_store(struct config_item *item,
const char *page, size_t count)
{
struct se_portal_group *se_tpg = to_tpg(item);
+ struct se_wwn *se_wwn = se_tpg->se_tpg_wwn;
+ struct tcm_qla2xxx_lport *lport = container_of(se_wwn,
+ struct tcm_qla2xxx_lport, lport_wwn);
+ struct scsi_qla_host *vha = lport->qla_vha;
struct tcm_qla2xxx_tpg *tpg = container_of(se_tpg,
struct tcm_qla2xxx_tpg, se_tpg);
unsigned long op;
@@ -1014,24 +980,16 @@ static ssize_t tcm_qla2xxx_tpg_enable_store(struct config_item *item,
if (atomic_read(&tpg->lport_tpg_enabled))
return -EEXIST;
- INIT_WORK(&tpg->tpg_base_work, tcm_qla2xxx_depend_tpg);
+ atomic_set(&tpg->lport_tpg_enabled, 1);
+ qlt_enable_vha(vha);
} else {
if (!atomic_read(&tpg->lport_tpg_enabled))
return count;
- INIT_WORK(&tpg->tpg_base_work, tcm_qla2xxx_undepend_tpg);
+ atomic_set(&tpg->lport_tpg_enabled, 0);
+ qlt_stop_phase1(vha->vha_tgt.qla_tgt);
}
- init_completion(&tpg->tpg_base_comp);
- schedule_work(&tpg->tpg_base_work);
- wait_for_completion(&tpg->tpg_base_comp);
- if (op) {
- if (!atomic_read(&tpg->lport_tpg_enabled))
- return -ENODEV;
- } else {
- if (atomic_read(&tpg->lport_tpg_enabled))
- return -EPERM;
- }
return count;
}
@@ -1920,14 +1878,13 @@ static struct configfs_attribute *tcm_qla2xxx_wwn_attrs[] = {
static const struct target_core_fabric_ops tcm_qla2xxx_ops = {
.module = THIS_MODULE,
- .name = "qla2xxx",
+ .fabric_name = "qla2xxx",
.node_acl_size = sizeof(struct tcm_qla2xxx_nacl),
/*
* XXX: Limit assumes single page per scatter-gather-list entry.
* Current maximum is ~4.9 MB per se_cmd->t_data_sg with PAGE_SIZE=4096
*/
.max_data_sg_nents = 1200,
- .get_fabric_name = tcm_qla2xxx_get_fabric_name,
.tpg_get_wwn = tcm_qla2xxx_get_fabric_wwn,
.tpg_get_tag = tcm_qla2xxx_get_tag,
.tpg_check_demo_mode = tcm_qla2xxx_check_demo_mode,
@@ -1969,9 +1926,8 @@ static const struct target_core_fabric_ops tcm_qla2xxx_ops = {
static const struct target_core_fabric_ops tcm_qla2xxx_npiv_ops = {
.module = THIS_MODULE,
- .name = "qla2xxx_npiv",
+ .fabric_name = "qla2xxx_npiv",
.node_acl_size = sizeof(struct tcm_qla2xxx_nacl),
- .get_fabric_name = tcm_qla2xxx_npiv_get_fabric_name,
.tpg_get_wwn = tcm_qla2xxx_get_fabric_wwn,
.tpg_get_tag = tcm_qla2xxx_get_tag,
.tpg_check_demo_mode = tcm_qla2xxx_check_demo_mode,
diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.h b/drivers/scsi/qla2xxx/tcm_qla2xxx.h
index 7550ba2831c3..147cf6c90366 100644
--- a/drivers/scsi/qla2xxx/tcm_qla2xxx.h
+++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.h
@@ -48,9 +48,6 @@ struct tcm_qla2xxx_tpg {
struct tcm_qla2xxx_tpg_attrib tpg_attrib;
/* Returned by tcm_qla2xxx_make_tpg() */
struct se_portal_group se_tpg;
- /* Items for dealing with configfs_depend_item */
- struct completion tpg_base_comp;
- struct work_struct tpg_base_work;
};
struct tcm_qla2xxx_fc_loopid {
diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c
index 051164f755a4..949e186cc5d7 100644
--- a/drivers/scsi/qla4xxx/ql4_os.c
+++ b/drivers/scsi/qla4xxx/ql4_os.c
@@ -205,7 +205,6 @@ static struct scsi_host_template qla4xxx_driver_template = {
.this_id = -1,
.cmd_per_lun = 3,
- .use_clustering = ENABLE_CLUSTERING,
.sg_tablesize = SG_ALL,
.max_sectors = 0xFFFF,
@@ -4160,20 +4159,16 @@ static void qla4xxx_mem_free(struct scsi_qla_host *ha)
ha->fw_dump_size = 0;
/* Free srb pool. */
- if (ha->srb_mempool)
- mempool_destroy(ha->srb_mempool);
-
+ mempool_destroy(ha->srb_mempool);
ha->srb_mempool = NULL;
- if (ha->chap_dma_pool)
- dma_pool_destroy(ha->chap_dma_pool);
+ dma_pool_destroy(ha->chap_dma_pool);
if (ha->chap_list)
vfree(ha->chap_list);
ha->chap_list = NULL;
- if (ha->fw_ddb_dma_pool)
- dma_pool_destroy(ha->fw_ddb_dma_pool);
+ dma_pool_destroy(ha->fw_ddb_dma_pool);
/* release io space registers */
if (is_qla8022(ha)) {
diff --git a/drivers/scsi/qlogicfas.c b/drivers/scsi/qlogicfas.c
index 95431d605c24..8f709002f746 100644
--- a/drivers/scsi/qlogicfas.c
+++ b/drivers/scsi/qlogicfas.c
@@ -193,7 +193,7 @@ static struct scsi_host_template qlogicfas_driver_template = {
.can_queue = 1,
.this_id = -1,
.sg_tablesize = SG_ALL,
- .use_clustering = DISABLE_CLUSTERING,
+ .dma_boundary = PAGE_SIZE - 1,
};
static __init int qlogicfas_init(void)
diff --git a/drivers/scsi/qlogicpti.c b/drivers/scsi/qlogicpti.c
index 9d09228eee28..e35ce762d454 100644
--- a/drivers/scsi/qlogicpti.c
+++ b/drivers/scsi/qlogicpti.c
@@ -1287,7 +1287,6 @@ static struct scsi_host_template qpti_template = {
.can_queue = QLOGICPTI_REQ_QUEUE_LEN,
.this_id = 7,
.sg_tablesize = QLOGICPTI_MAX_SG(QLOGICPTI_REQ_QUEUE_LEN),
- .use_clustering = ENABLE_CLUSTERING,
};
static const struct of_device_id qpti_match[];
diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index fc1356d101b0..7675ff0ca2ea 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -780,11 +780,8 @@ MODULE_LICENSE("GPL");
module_param(scsi_logging_level, int, S_IRUGO|S_IWUSR);
MODULE_PARM_DESC(scsi_logging_level, "a bit mask of logging levels");
-#ifdef CONFIG_SCSI_MQ_DEFAULT
+/* This should go away in the future, it doesn't do anything anymore */
bool scsi_use_blk_mq = true;
-#else
-bool scsi_use_blk_mq = false;
-#endif
module_param_named(use_blk_mq, scsi_use_blk_mq, bool, S_IWUSR | S_IRUGO);
static int __init init_scsi(void)
diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index 60bcc6df97a9..661512bec3ac 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -3973,7 +3973,6 @@ static int scsi_debug_slave_configure(struct scsi_device *sdp)
return 1; /* no resources, will be marked offline */
}
sdp->hostdata = devip;
- blk_queue_max_segment_size(sdp->request_queue, -1U);
if (sdebug_no_uld)
sdp->no_uld_attach = 1;
config_cdb_len(sdp);
@@ -5851,7 +5850,7 @@ static struct scsi_host_template sdebug_driver_template = {
.sg_tablesize = SG_MAX_SEGMENTS,
.cmd_per_lun = DEF_CMD_PER_LUN,
.max_sectors = -1U,
- .use_clustering = DISABLE_CLUSTERING,
+ .max_segment_size = -1U,
.module = THIS_MODULE,
.track_queue_depth = 1,
};
@@ -5866,8 +5865,9 @@ static int sdebug_driver_probe(struct device *dev)
sdbg_host = to_sdebug_host(dev);
sdebug_driver_template.can_queue = sdebug_max_queue;
- if (sdebug_clustering)
- sdebug_driver_template.use_clustering = ENABLE_CLUSTERING;
+ if (!sdebug_clustering)
+ sdebug_driver_template.dma_boundary = PAGE_SIZE - 1;
+
hpnt = scsi_host_alloc(&sdebug_driver_template, sizeof(sdbg_host));
if (NULL == hpnt) {
pr_err("scsi_host_alloc failed\n");
@@ -5881,8 +5881,7 @@ static int sdebug_driver_probe(struct device *dev)
}
/* Decide whether to tell scsi subsystem that we want mq */
/* Following should give the same answer for each host */
- if (shost_use_blk_mq(hpnt))
- hpnt->nr_hw_queues = submit_queues;
+ hpnt->nr_hw_queues = submit_queues;
sdbg_host->shost = hpnt;
*((struct sdebug_host_info **)hpnt->hostdata) = sdbg_host;
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index c736d61b1648..16eef068e9e9 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -297,19 +297,19 @@ enum blk_eh_timer_return scsi_times_out(struct request *req)
if (rtn == BLK_EH_DONE) {
/*
- * For blk-mq, we must set the request state to complete now
- * before sending the request to the scsi error handler. This
- * will prevent a use-after-free in the event the LLD manages
- * to complete the request before the error handler finishes
- * processing this timed out request.
+ * Set the command to complete first in order to prevent a real
+ * completion from releasing the command while error handling
+ * is using it. If the command was already completed, then the
+ * lower level driver beat the timeout handler, and it is safe
+ * to return without escalating error recovery.
*
- * If the request was already completed, then the LLD beat the
- * time out handler from transferring the request to the scsi
- * error handler. In that case we can return immediately as no
- * further action is required.
+ * If timeout handling lost the race to a real completion, the
+ * block layer may ignore that due to a fake timeout injection,
+ * so return RESET_TIMER to allow error handling another shot
+ * at this command.
*/
- if (req->q->mq_ops && !blk_mq_mark_complete(req))
- return rtn;
+ if (test_and_set_bit(SCMD_STATE_COMPLETE, &scmd->state))
+ return BLK_EH_RESET_TIMER;
if (scsi_abort_command(scmd) != SUCCESS) {
set_host_byte(scmd, DID_TIME_OUT);
scsi_eh_scmd_add(scmd);
@@ -1932,7 +1932,7 @@ maybe_retry:
static void eh_lock_door_done(struct request *req, blk_status_t status)
{
- __blk_put_request(req->q, req);
+ blk_put_request(req);
}
/**
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index fa6e0c3b3aa6..b13cc9288ba0 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -168,8 +168,6 @@ static void scsi_mq_requeue_cmd(struct scsi_cmnd *cmd)
static void __scsi_queue_insert(struct scsi_cmnd *cmd, int reason, bool unbusy)
{
struct scsi_device *device = cmd->device;
- struct request_queue *q = device->request_queue;
- unsigned long flags;
SCSI_LOG_MLQUEUE(1, scmd_printk(KERN_INFO, cmd,
"Inserting command %p into mlqueue\n", cmd));
@@ -190,26 +188,20 @@ static void __scsi_queue_insert(struct scsi_cmnd *cmd, int reason, bool unbusy)
* before blk_cleanup_queue() finishes.
*/
cmd->result = 0;
- if (q->mq_ops) {
- /*
- * Before a SCSI command is dispatched,
- * get_device(&sdev->sdev_gendev) is called and the host,
- * target and device busy counters are increased. Since
- * requeuing a request causes these actions to be repeated and
- * since scsi_device_unbusy() has already been called,
- * put_device(&device->sdev_gendev) must still be called. Call
- * put_device() after blk_mq_requeue_request() to avoid that
- * removal of the SCSI device can start before requeueing has
- * happened.
- */
- blk_mq_requeue_request(cmd->request, true);
- put_device(&device->sdev_gendev);
- return;
- }
- spin_lock_irqsave(q->queue_lock, flags);
- blk_requeue_request(q, cmd->request);
- kblockd_schedule_work(&device->requeue_work);
- spin_unlock_irqrestore(q->queue_lock, flags);
+
+ /*
+ * Before a SCSI command is dispatched,
+ * get_device(&sdev->sdev_gendev) is called and the host,
+ * target and device busy counters are increased. Since
+ * requeuing a request causes these actions to be repeated and
+ * since scsi_device_unbusy() has already been called,
+ * put_device(&device->sdev_gendev) must still be called. Call
+ * put_device() after blk_mq_requeue_request() to avoid that
+ * removal of the SCSI device can start before requeueing has
+ * happened.
+ */
+ blk_mq_requeue_request(cmd->request, true);
+ put_device(&device->sdev_gendev);
}
/*
@@ -370,10 +362,7 @@ void scsi_device_unbusy(struct scsi_device *sdev)
static void scsi_kick_queue(struct request_queue *q)
{
- if (q->mq_ops)
- blk_mq_run_hw_queues(q, false);
- else
- blk_run_queue(q);
+ blk_mq_run_hw_queues(q, false);
}
/*
@@ -534,10 +523,7 @@ static void scsi_run_queue(struct request_queue *q)
if (!list_empty(&sdev->host->starved_list))
scsi_starved_list_run(sdev->host);
- if (q->mq_ops)
- blk_mq_run_hw_queues(q, false);
- else
- blk_run_queue(q);
+ blk_mq_run_hw_queues(q, false);
}
void scsi_requeue_run_queue(struct work_struct *work)
@@ -550,42 +536,6 @@ void scsi_requeue_run_queue(struct work_struct *work)
scsi_run_queue(q);
}
-/*
- * Function: scsi_requeue_command()
- *
- * Purpose: Handle post-processing of completed commands.
- *
- * Arguments: q - queue to operate on
- * cmd - command that may need to be requeued.
- *
- * Returns: Nothing
- *
- * Notes: After command completion, there may be blocks left
- * over which weren't finished by the previous command
- * this can be for a number of reasons - the main one is
- * I/O errors in the middle of the request, in which case
- * we need to request the blocks that come after the bad
- * sector.
- * Notes: Upon return, cmd is a stale pointer.
- */
-static void scsi_requeue_command(struct request_queue *q, struct scsi_cmnd *cmd)
-{
- struct scsi_device *sdev = cmd->device;
- struct request *req = cmd->request;
- unsigned long flags;
-
- spin_lock_irqsave(q->queue_lock, flags);
- blk_unprep_request(req);
- req->special = NULL;
- scsi_put_command(cmd);
- blk_requeue_request(q, req);
- spin_unlock_irqrestore(q->queue_lock, flags);
-
- scsi_run_queue(q);
-
- put_device(&sdev->sdev_gendev);
-}
-
void scsi_run_host_queues(struct Scsi_Host *shost)
{
struct scsi_device *sdev;
@@ -626,42 +576,6 @@ static void scsi_mq_uninit_cmd(struct scsi_cmnd *cmd)
scsi_del_cmd_from_list(cmd);
}
-/*
- * Function: scsi_release_buffers()
- *
- * Purpose: Free resources allocate for a scsi_command.
- *
- * Arguments: cmd - command that we are bailing.
- *
- * Lock status: Assumed that no lock is held upon entry.
- *
- * Returns: Nothing
- *
- * Notes: In the event that an upper level driver rejects a
- * command, we must release resources allocated during
- * the __init_io() function. Primarily this would involve
- * the scatter-gather table.
- */
-static void scsi_release_buffers(struct scsi_cmnd *cmd)
-{
- if (cmd->sdb.table.nents)
- sg_free_table_chained(&cmd->sdb.table, false);
-
- memset(&cmd->sdb, 0, sizeof(cmd->sdb));
-
- if (scsi_prot_sg_count(cmd))
- sg_free_table_chained(&cmd->prot_sdb->table, false);
-}
-
-static void scsi_release_bidi_buffers(struct scsi_cmnd *cmd)
-{
- struct scsi_data_buffer *bidi_sdb = cmd->request->next_rq->special;
-
- sg_free_table_chained(&bidi_sdb->table, false);
- kmem_cache_free(scsi_sdb_cache, bidi_sdb);
- cmd->request->next_rq->special = NULL;
-}
-
/* Returns false when no more bytes to process, true if there are more */
static bool scsi_end_request(struct request *req, blk_status_t error,
unsigned int bytes, unsigned int bidi_bytes)
@@ -687,46 +601,30 @@ static bool scsi_end_request(struct request *req, blk_status_t error,
destroy_rcu_head(&cmd->rcu);
}
- if (req->mq_ctx) {
- /*
- * In the MQ case the command gets freed by __blk_mq_end_request,
- * so we have to do all cleanup that depends on it earlier.
- *
- * We also can't kick the queues from irq context, so we
- * will have to defer it to a workqueue.
- */
- scsi_mq_uninit_cmd(cmd);
-
- /*
- * queue is still alive, so grab the ref for preventing it
- * from being cleaned up during running queue.
- */
- percpu_ref_get(&q->q_usage_counter);
-
- __blk_mq_end_request(req, error);
-
- if (scsi_target(sdev)->single_lun ||
- !list_empty(&sdev->host->starved_list))
- kblockd_schedule_work(&sdev->requeue_work);
- else
- blk_mq_run_hw_queues(q, true);
-
- percpu_ref_put(&q->q_usage_counter);
- } else {
- unsigned long flags;
+ /*
+ * In the MQ case the command gets freed by __blk_mq_end_request,
+ * so we have to do all cleanup that depends on it earlier.
+ *
+ * We also can't kick the queues from irq context, so we
+ * will have to defer it to a workqueue.
+ */
+ scsi_mq_uninit_cmd(cmd);
- if (bidi_bytes)
- scsi_release_bidi_buffers(cmd);
- scsi_release_buffers(cmd);
- scsi_put_command(cmd);
+ /*
+ * queue is still alive, so grab the ref for preventing it
+ * from being cleaned up during running queue.
+ */
+ percpu_ref_get(&q->q_usage_counter);
- spin_lock_irqsave(q->queue_lock, flags);
- blk_finish_request(req, error);
- spin_unlock_irqrestore(q->queue_lock, flags);
+ __blk_mq_end_request(req, error);
- scsi_run_queue(q);
- }
+ if (scsi_target(sdev)->single_lun ||
+ !list_empty(&sdev->host->starved_list))
+ kblockd_schedule_work(&sdev->requeue_work);
+ else
+ blk_mq_run_hw_queues(q, true);
+ percpu_ref_put(&q->q_usage_counter);
put_device(&sdev->sdev_gendev);
return false;
}
@@ -774,13 +672,7 @@ static void scsi_io_completion_reprep(struct scsi_cmnd *cmd,
struct request_queue *q)
{
/* A new command will be prepared and issued. */
- if (q->mq_ops) {
- scsi_mq_requeue_cmd(cmd);
- } else {
- /* Unprep request and put it back at head of the queue. */
- scsi_release_buffers(cmd);
- scsi_requeue_command(q, cmd);
- }
+ scsi_mq_requeue_cmd(cmd);
}
/* Helper for scsi_io_completion() when special action required. */
@@ -1120,7 +1012,8 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
scsi_io_completion_action(cmd, result);
}
-static int scsi_init_sgtable(struct request *req, struct scsi_data_buffer *sdb)
+static blk_status_t scsi_init_sgtable(struct request *req,
+ struct scsi_data_buffer *sdb)
{
int count;
@@ -1129,7 +1022,7 @@ static int scsi_init_sgtable(struct request *req, struct scsi_data_buffer *sdb)
*/
if (unlikely(sg_alloc_table_chained(&sdb->table,
blk_rq_nr_phys_segments(req), sdb->table.sgl)))
- return BLKPREP_DEFER;
+ return BLK_STS_RESOURCE;
/*
* Next, walk the list, and fill in the addresses and sizes of
@@ -1139,7 +1032,7 @@ static int scsi_init_sgtable(struct request *req, struct scsi_data_buffer *sdb)
BUG_ON(count > sdb->table.nents);
sdb->table.nents = count;
sdb->length = blk_rq_payload_bytes(req);
- return BLKPREP_OK;
+ return BLK_STS_OK;
}
/*
@@ -1149,62 +1042,48 @@ static int scsi_init_sgtable(struct request *req, struct scsi_data_buffer *sdb)
*
* Arguments: cmd - Command descriptor we wish to initialize
*
- * Returns: 0 on success
- * BLKPREP_DEFER if the failure is retryable
- * BLKPREP_KILL if the failure is fatal
+ * Returns: BLK_STS_OK on success
+ * BLK_STS_RESOURCE if the failure is retryable
+ * BLK_STS_IOERR if the failure is fatal
*/
-int scsi_init_io(struct scsi_cmnd *cmd)
+blk_status_t scsi_init_io(struct scsi_cmnd *cmd)
{
- struct scsi_device *sdev = cmd->device;
struct request *rq = cmd->request;
- bool is_mq = (rq->mq_ctx != NULL);
- int error = BLKPREP_KILL;
+ blk_status_t ret;
if (WARN_ON_ONCE(!blk_rq_nr_phys_segments(rq)))
- goto err_exit;
+ return BLK_STS_IOERR;
- error = scsi_init_sgtable(rq, &cmd->sdb);
- if (error)
- goto err_exit;
+ ret = scsi_init_sgtable(rq, &cmd->sdb);
+ if (ret)
+ return ret;
if (blk_bidi_rq(rq)) {
- if (!rq->q->mq_ops) {
- struct scsi_data_buffer *bidi_sdb =
- kmem_cache_zalloc(scsi_sdb_cache, GFP_ATOMIC);
- if (!bidi_sdb) {
- error = BLKPREP_DEFER;
- goto err_exit;
- }
-
- rq->next_rq->special = bidi_sdb;
- }
-
- error = scsi_init_sgtable(rq->next_rq, rq->next_rq->special);
- if (error)
- goto err_exit;
+ ret = scsi_init_sgtable(rq->next_rq, rq->next_rq->special);
+ if (ret)
+ goto out_free_sgtables;
}
if (blk_integrity_rq(rq)) {
struct scsi_data_buffer *prot_sdb = cmd->prot_sdb;
int ivecs, count;
- if (prot_sdb == NULL) {
+ if (WARN_ON_ONCE(!prot_sdb)) {
/*
* This can happen if someone (e.g. multipath)
* queues a command to a device on an adapter
* that does not support DIX.
*/
- WARN_ON_ONCE(1);
- error = BLKPREP_KILL;
- goto err_exit;
+ ret = BLK_STS_IOERR;
+ goto out_free_sgtables;
}
ivecs = blk_rq_count_integrity_sg(rq->q, rq->bio);
if (sg_alloc_table_chained(&prot_sdb->table, ivecs,
prot_sdb->table.sgl)) {
- error = BLKPREP_DEFER;
- goto err_exit;
+ ret = BLK_STS_RESOURCE;
+ goto out_free_sgtables;
}
count = blk_rq_map_integrity_sg(rq->q, rq->bio,
@@ -1216,17 +1095,10 @@ int scsi_init_io(struct scsi_cmnd *cmd)
cmd->prot_sdb->table.nents = count;
}
- return BLKPREP_OK;
-err_exit:
- if (is_mq) {
- scsi_mq_free_sgtables(cmd);
- } else {
- scsi_release_buffers(cmd);
- cmd->request->special = NULL;
- scsi_put_command(cmd);
- put_device(&sdev->sdev_gendev);
- }
- return error;
+ return BLK_STS_OK;
+out_free_sgtables:
+ scsi_mq_free_sgtables(cmd);
+ return ret;
}
EXPORT_SYMBOL(scsi_init_io);
@@ -1312,7 +1184,8 @@ void scsi_init_command(struct scsi_device *dev, struct scsi_cmnd *cmd)
scsi_add_cmd_to_list(cmd);
}
-static int scsi_setup_scsi_cmnd(struct scsi_device *sdev, struct request *req)
+static blk_status_t scsi_setup_scsi_cmnd(struct scsi_device *sdev,
+ struct request *req)
{
struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req);
@@ -1323,8 +1196,8 @@ static int scsi_setup_scsi_cmnd(struct scsi_device *sdev, struct request *req)
* submit a request without an attached bio.
*/
if (req->bio) {
- int ret = scsi_init_io(cmd);
- if (unlikely(ret))
+ blk_status_t ret = scsi_init_io(cmd);
+ if (unlikely(ret != BLK_STS_OK))
return ret;
} else {
BUG_ON(blk_rq_bytes(req));
@@ -1336,20 +1209,21 @@ static int scsi_setup_scsi_cmnd(struct scsi_device *sdev, struct request *req)
cmd->cmnd = scsi_req(req)->cmd;
cmd->transfersize = blk_rq_bytes(req);
cmd->allowed = scsi_req(req)->retries;
- return BLKPREP_OK;
+ return BLK_STS_OK;
}
/*
* Setup a normal block command. These are simple request from filesystems
* that still need to be translated to SCSI CDBs from the ULD.
*/
-static int scsi_setup_fs_cmnd(struct scsi_device *sdev, struct request *req)
+static blk_status_t scsi_setup_fs_cmnd(struct scsi_device *sdev,
+ struct request *req)
{
struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req);
if (unlikely(sdev->handler && sdev->handler->prep_fn)) {
- int ret = sdev->handler->prep_fn(sdev, req);
- if (ret != BLKPREP_OK)
+ blk_status_t ret = sdev->handler->prep_fn(sdev, req);
+ if (ret != BLK_STS_OK)
return ret;
}
@@ -1358,7 +1232,8 @@ static int scsi_setup_fs_cmnd(struct scsi_device *sdev, struct request *req)
return scsi_cmd_to_driver(cmd)->init_command(cmd);
}
-static int scsi_setup_cmnd(struct scsi_device *sdev, struct request *req)
+static blk_status_t scsi_setup_cmnd(struct scsi_device *sdev,
+ struct request *req)
{
struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req);
@@ -1375,129 +1250,48 @@ static int scsi_setup_cmnd(struct scsi_device *sdev, struct request *req)
return scsi_setup_fs_cmnd(sdev, req);
}
-static int
+static blk_status_t
scsi_prep_state_check(struct scsi_device *sdev, struct request *req)
{
- int ret = BLKPREP_OK;
-
- /*
- * If the device is not in running state we will reject some
- * or all commands.
- */
- if (unlikely(sdev->sdev_state != SDEV_RUNNING)) {
- switch (sdev->sdev_state) {
- case SDEV_OFFLINE:
- case SDEV_TRANSPORT_OFFLINE:
- /*
- * If the device is offline we refuse to process any
- * commands. The device must be brought online
- * before trying any recovery commands.
- */
- sdev_printk(KERN_ERR, sdev,
- "rejecting I/O to offline device\n");
- ret = BLKPREP_KILL;
- break;
- case SDEV_DEL:
- /*
- * If the device is fully deleted, we refuse to
- * process any commands as well.
- */
- sdev_printk(KERN_ERR, sdev,
- "rejecting I/O to dead device\n");
- ret = BLKPREP_KILL;
- break;
- case SDEV_BLOCK:
- case SDEV_CREATED_BLOCK:
- ret = BLKPREP_DEFER;
- break;
- case SDEV_QUIESCE:
- /*
- * If the devices is blocked we defer normal commands.
- */
- if (req && !(req->rq_flags & RQF_PREEMPT))
- ret = BLKPREP_DEFER;
- break;
- default:
- /*
- * For any other not fully online state we only allow
- * special commands. In particular any user initiated
- * command is not allowed.
- */
- if (req && !(req->rq_flags & RQF_PREEMPT))
- ret = BLKPREP_KILL;
- break;
- }
- }
- return ret;
-}
-
-static int
-scsi_prep_return(struct request_queue *q, struct request *req, int ret)
-{
- struct scsi_device *sdev = q->queuedata;
-
- switch (ret) {
- case BLKPREP_KILL:
- case BLKPREP_INVALID:
- scsi_req(req)->result = DID_NO_CONNECT << 16;
- /* release the command and kill it */
- if (req->special) {
- struct scsi_cmnd *cmd = req->special;
- scsi_release_buffers(cmd);
- scsi_put_command(cmd);
- put_device(&sdev->sdev_gendev);
- req->special = NULL;
- }
- break;
- case BLKPREP_DEFER:
+ switch (sdev->sdev_state) {
+ case SDEV_OFFLINE:
+ case SDEV_TRANSPORT_OFFLINE:
/*
- * If we defer, the blk_peek_request() returns NULL, but the
- * queue must be restarted, so we schedule a callback to happen
- * shortly.
+ * If the device is offline we refuse to process any
+ * commands. The device must be brought online
+ * before trying any recovery commands.
*/
- if (atomic_read(&sdev->device_busy) == 0)
- blk_delay_queue(q, SCSI_QUEUE_DELAY);
- break;
+ sdev_printk(KERN_ERR, sdev,
+ "rejecting I/O to offline device\n");
+ return BLK_STS_IOERR;
+ case SDEV_DEL:
+ /*
+ * If the device is fully deleted, we refuse to
+ * process any commands as well.
+ */
+ sdev_printk(KERN_ERR, sdev,
+ "rejecting I/O to dead device\n");
+ return BLK_STS_IOERR;
+ case SDEV_BLOCK:
+ case SDEV_CREATED_BLOCK:
+ return BLK_STS_RESOURCE;
+ case SDEV_QUIESCE:
+ /*
+ * If the devices is blocked we defer normal commands.
+ */
+ if (req && !(req->rq_flags & RQF_PREEMPT))
+ return BLK_STS_RESOURCE;
+ return BLK_STS_OK;
default:
- req->rq_flags |= RQF_DONTPREP;
- }
-
- return ret;
-}
-
-static int scsi_prep_fn(struct request_queue *q, struct request *req)
-{
- struct scsi_device *sdev = q->queuedata;
- struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req);
- int ret;
-
- ret = scsi_prep_state_check(sdev, req);
- if (ret != BLKPREP_OK)
- goto out;
-
- if (!req->special) {
- /* Bail if we can't get a reference to the device */
- if (unlikely(!get_device(&sdev->sdev_gendev))) {
- ret = BLKPREP_DEFER;
- goto out;
- }
-
- scsi_init_command(sdev, cmd);
- req->special = cmd;
+ /*
+ * For any other not fully online state we only allow
+ * special commands. In particular any user initiated
+ * command is not allowed.
+ */
+ if (req && !(req->rq_flags & RQF_PREEMPT))
+ return BLK_STS_IOERR;
+ return BLK_STS_OK;
}
-
- cmd->tag = req->tag;
- cmd->request = req;
- cmd->prot_op = SCSI_PROT_NORMAL;
-
- ret = scsi_setup_cmnd(sdev, req);
-out:
- return scsi_prep_return(q, req, ret);
-}
-
-static void scsi_unprep_fn(struct request_queue *q, struct request *req)
-{
- scsi_uninit_cmd(blk_mq_rq_to_pdu(req));
}
/*
@@ -1519,14 +1313,8 @@ static inline int scsi_dev_queue_ready(struct request_queue *q,
/*
* unblock after device_blocked iterates to zero
*/
- if (atomic_dec_return(&sdev->device_blocked) > 0) {
- /*
- * For the MQ case we take care of this in the caller.
- */
- if (!q->mq_ops)
- blk_delay_queue(q, SCSI_QUEUE_DELAY);
+ if (atomic_dec_return(&sdev->device_blocked) > 0)
goto out_dec;
- }
SCSI_LOG_MLQUEUE(3, sdev_printk(KERN_INFO, sdev,
"unblocking device at zero depth\n"));
}
@@ -1661,13 +1449,13 @@ out_dec:
* needs to return 'not busy'. Otherwise, request stacking drivers
* may hold requests forever.
*/
-static int scsi_lld_busy(struct request_queue *q)
+static bool scsi_mq_lld_busy(struct request_queue *q)
{
struct scsi_device *sdev = q->queuedata;
struct Scsi_Host *shost;
if (blk_queue_dying(q))
- return 0;
+ return false;
shost = sdev->host;
@@ -1678,43 +1466,9 @@ static int scsi_lld_busy(struct request_queue *q)
* in SCSI layer.
*/
if (scsi_host_in_recovery(shost) || scsi_device_is_busy(sdev))
- return 1;
-
- return 0;
-}
-
-/*
- * Kill a request for a dead device
- */
-static void scsi_kill_request(struct request *req, struct request_queue *q)
-{
- struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req);
- struct scsi_device *sdev;
- struct scsi_target *starget;
- struct Scsi_Host *shost;
-
- blk_start_request(req);
-
- scmd_printk(KERN_INFO, cmd, "killing request\n");
-
- sdev = cmd->device;
- starget = scsi_target(sdev);
- shost = sdev->host;
- scsi_init_cmd_errh(cmd);
- cmd->result = DID_NO_CONNECT << 16;
- atomic_inc(&cmd->device->iorequest_cnt);
-
- /*
- * SCSI request completion path will do scsi_device_unbusy(),
- * bump busy counts. To bump the counters, we need to dance
- * with the locks as normal issue path does.
- */
- atomic_inc(&sdev->device_busy);
- atomic_inc(&shost->host_busy);
- if (starget->can_queue > 0)
- atomic_inc(&starget->target_busy);
+ return true;
- blk_complete_request(req);
+ return false;
}
static void scsi_softirq_done(struct request *rq)
@@ -1837,170 +1591,6 @@ static int scsi_dispatch_cmd(struct scsi_cmnd *cmd)
return 0;
}
-/**
- * scsi_done - Invoke completion on finished SCSI command.
- * @cmd: The SCSI Command for which a low-level device driver (LLDD) gives
- * ownership back to SCSI Core -- i.e. the LLDD has finished with it.
- *
- * Description: This function is the mid-level's (SCSI Core) interrupt routine,
- * which regains ownership of the SCSI command (de facto) from a LLDD, and
- * calls blk_complete_request() for further processing.
- *
- * This function is interrupt context safe.
- */
-static void scsi_done(struct scsi_cmnd *cmd)
-{
- trace_scsi_dispatch_cmd_done(cmd);
- blk_complete_request(cmd->request);
-}
-
-/*
- * Function: scsi_request_fn()
- *
- * Purpose: Main strategy routine for SCSI.
- *
- * Arguments: q - Pointer to actual queue.
- *
- * Returns: Nothing
- *
- * Lock status: request queue lock assumed to be held when called.
- *
- * Note: See sd_zbc.c sd_zbc_write_lock_zone() for write order
- * protection for ZBC disks.
- */
-static void scsi_request_fn(struct request_queue *q)
- __releases(q->queue_lock)
- __acquires(q->queue_lock)
-{
- struct scsi_device *sdev = q->queuedata;
- struct Scsi_Host *shost;
- struct scsi_cmnd *cmd;
- struct request *req;
-
- /*
- * To start with, we keep looping until the queue is empty, or until
- * the host is no longer able to accept any more requests.
- */
- shost = sdev->host;
- for (;;) {
- int rtn;
- /*
- * get next queueable request. We do this early to make sure
- * that the request is fully prepared even if we cannot
- * accept it.
- */
- req = blk_peek_request(q);
- if (!req)
- break;
-
- if (unlikely(!scsi_device_online(sdev))) {
- sdev_printk(KERN_ERR, sdev,
- "rejecting I/O to offline device\n");
- scsi_kill_request(req, q);
- continue;
- }
-
- if (!scsi_dev_queue_ready(q, sdev))
- break;
-
- /*
- * Remove the request from the request list.
- */
- if (!(blk_queue_tagged(q) && !blk_queue_start_tag(q, req)))
- blk_start_request(req);
-
- spin_unlock_irq(q->queue_lock);
- cmd = blk_mq_rq_to_pdu(req);
- if (cmd != req->special) {
- printk(KERN_CRIT "impossible request in %s.\n"
- "please mail a stack trace to "
- "linux-scsi@vger.kernel.org\n",
- __func__);
- blk_dump_rq_flags(req, "foo");
- BUG();
- }
-
- /*
- * We hit this when the driver is using a host wide
- * tag map. For device level tag maps the queue_depth check
- * in the device ready fn would prevent us from trying
- * to allocate a tag. Since the map is a shared host resource
- * we add the dev to the starved list so it eventually gets
- * a run when a tag is freed.
- */
- if (blk_queue_tagged(q) && !(req->rq_flags & RQF_QUEUED)) {
- spin_lock_irq(shost->host_lock);
- if (list_empty(&sdev->starved_entry))
- list_add_tail(&sdev->starved_entry,
- &shost->starved_list);
- spin_unlock_irq(shost->host_lock);
- goto not_ready;
- }
-
- if (!scsi_target_queue_ready(shost, sdev))
- goto not_ready;
-
- if (!scsi_host_queue_ready(q, shost, sdev))
- goto host_not_ready;
-
- if (sdev->simple_tags)
- cmd->flags |= SCMD_TAGGED;
- else
- cmd->flags &= ~SCMD_TAGGED;
-
- /*
- * Finally, initialize any error handling parameters, and set up
- * the timers for timeouts.
- */
- scsi_init_cmd_errh(cmd);
-
- /*
- * Dispatch the command to the low-level driver.
- */
- cmd->scsi_done = scsi_done;
- rtn = scsi_dispatch_cmd(cmd);
- if (rtn) {
- scsi_queue_insert(cmd, rtn);
- spin_lock_irq(q->queue_lock);
- goto out_delay;
- }
- spin_lock_irq(q->queue_lock);
- }
-
- return;
-
- host_not_ready:
- if (scsi_target(sdev)->can_queue > 0)
- atomic_dec(&scsi_target(sdev)->target_busy);
- not_ready:
- /*
- * lock q, handle tag, requeue req, and decrement device_busy. We
- * must return with queue_lock held.
- *
- * Decrementing device_busy without checking it is OK, as all such
- * cases (host limits or settings) should run the queue at some
- * later time.
- */
- spin_lock_irq(q->queue_lock);
- blk_requeue_request(q, req);
- atomic_dec(&sdev->device_busy);
-out_delay:
- if (!atomic_read(&sdev->device_busy) && !scsi_device_blocked(sdev))
- blk_delay_queue(q, SCSI_QUEUE_DELAY);
-}
-
-static inline blk_status_t prep_to_mq(int ret)
-{
- switch (ret) {
- case BLKPREP_OK:
- return BLK_STS_OK;
- case BLKPREP_DEFER:
- return BLK_STS_RESOURCE;
- default:
- return BLK_STS_IOERR;
- }
-}
-
/* Size in bytes of the sg-list stored in the scsi-mq command-private data. */
static unsigned int scsi_mq_sgl_size(struct Scsi_Host *shost)
{
@@ -2008,7 +1598,7 @@ static unsigned int scsi_mq_sgl_size(struct Scsi_Host *shost)
sizeof(struct scatterlist);
}
-static int scsi_mq_prep_fn(struct request *req)
+static blk_status_t scsi_mq_prep_fn(struct request *req)
{
struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req);
struct scsi_device *sdev = req->q->queuedata;
@@ -2052,8 +1642,18 @@ static int scsi_mq_prep_fn(struct request *req)
static void scsi_mq_done(struct scsi_cmnd *cmd)
{
+ if (unlikely(test_and_set_bit(SCMD_STATE_COMPLETE, &cmd->state)))
+ return;
trace_scsi_dispatch_cmd_done(cmd);
- blk_mq_complete_request(cmd->request);
+
+ /*
+ * If the block layer didn't complete the request due to a timeout
+ * injection, scsi must clear its internal completed state so that the
+ * timeout handler will see it needs to escalate its own error
+ * recovery.
+ */
+ if (unlikely(!blk_mq_complete_request(cmd->request)))
+ clear_bit(SCMD_STATE_COMPLETE, &cmd->state);
}
static void scsi_mq_put_budget(struct blk_mq_hw_ctx *hctx)
@@ -2096,9 +1696,15 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
blk_status_t ret;
int reason;
- ret = prep_to_mq(scsi_prep_state_check(sdev, req));
- if (ret != BLK_STS_OK)
- goto out_put_budget;
+ /*
+ * If the device is not in running state we will reject some or all
+ * commands.
+ */
+ if (unlikely(sdev->sdev_state != SDEV_RUNNING)) {
+ ret = scsi_prep_state_check(sdev, req);
+ if (ret != BLK_STS_OK)
+ goto out_put_budget;
+ }
ret = BLK_STS_RESOURCE;
if (!scsi_target_queue_ready(shost, sdev))
@@ -2106,8 +1712,9 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
if (!scsi_host_queue_ready(q, shost, sdev))
goto out_dec_target_busy;
+ clear_bit(SCMD_STATE_COMPLETE, &cmd->state);
if (!(req->rq_flags & RQF_DONTPREP)) {
- ret = prep_to_mq(scsi_mq_prep_fn(req));
+ ret = scsi_mq_prep_fn(req);
if (ret != BLK_STS_OK)
goto out_dec_host_busy;
req->rq_flags |= RQF_DONTPREP;
@@ -2208,7 +1815,7 @@ static int scsi_map_queues(struct blk_mq_tag_set *set)
if (shost->hostt->map_queues)
return shost->hostt->map_queues(shost);
- return blk_mq_map_queues(set);
+ return blk_mq_map_queues(&set->map[0]);
}
void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q)
@@ -2235,10 +1842,8 @@ void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q)
blk_queue_segment_boundary(q, shost->dma_boundary);
dma_set_seg_boundary(dev, shost->dma_boundary);
- blk_queue_max_segment_size(q, dma_get_max_seg_size(dev));
-
- if (!shost->use_clustering)
- q->limits.cluster = 0;
+ blk_queue_max_segment_size(q,
+ min(shost->max_segment_size, dma_get_max_seg_size(dev)));
/*
* Set a reasonable default alignment: The larger of 32-byte (dword),
@@ -2251,77 +1856,6 @@ void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q)
}
EXPORT_SYMBOL_GPL(__scsi_init_queue);
-static int scsi_old_init_rq(struct request_queue *q, struct request *rq,
- gfp_t gfp)
-{
- struct Scsi_Host *shost = q->rq_alloc_data;
- const bool unchecked_isa_dma = shost->unchecked_isa_dma;
- struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);
-
- memset(cmd, 0, sizeof(*cmd));
-
- if (unchecked_isa_dma)
- cmd->flags |= SCMD_UNCHECKED_ISA_DMA;
- cmd->sense_buffer = scsi_alloc_sense_buffer(unchecked_isa_dma, gfp,
- NUMA_NO_NODE);
- if (!cmd->sense_buffer)
- goto fail;
- cmd->req.sense = cmd->sense_buffer;
-
- if (scsi_host_get_prot(shost) >= SHOST_DIX_TYPE0_PROTECTION) {
- cmd->prot_sdb = kmem_cache_zalloc(scsi_sdb_cache, gfp);
- if (!cmd->prot_sdb)
- goto fail_free_sense;
- }
-
- return 0;
-
-fail_free_sense:
- scsi_free_sense_buffer(unchecked_isa_dma, cmd->sense_buffer);
-fail:
- return -ENOMEM;
-}
-
-static void scsi_old_exit_rq(struct request_queue *q, struct request *rq)
-{
- struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);
-
- if (cmd->prot_sdb)
- kmem_cache_free(scsi_sdb_cache, cmd->prot_sdb);
- scsi_free_sense_buffer(cmd->flags & SCMD_UNCHECKED_ISA_DMA,
- cmd->sense_buffer);
-}
-
-struct request_queue *scsi_old_alloc_queue(struct scsi_device *sdev)
-{
- struct Scsi_Host *shost = sdev->host;
- struct request_queue *q;
-
- q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE, NULL);
- if (!q)
- return NULL;
- q->cmd_size = sizeof(struct scsi_cmnd) + shost->hostt->cmd_size;
- q->rq_alloc_data = shost;
- q->request_fn = scsi_request_fn;
- q->init_rq_fn = scsi_old_init_rq;
- q->exit_rq_fn = scsi_old_exit_rq;
- q->initialize_rq_fn = scsi_initialize_rq;
-
- if (blk_init_allocated_queue(q) < 0) {
- blk_cleanup_queue(q);
- return NULL;
- }
-
- __scsi_init_queue(shost, q);
- blk_queue_flag_set(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
- blk_queue_prep_rq(q, scsi_prep_fn);
- blk_queue_unprep_rq(q, scsi_unprep_fn);
- blk_queue_softirq_done(q, scsi_softirq_done);
- blk_queue_rq_timed_out(q, scsi_times_out);
- blk_queue_lld_busy(q, scsi_lld_busy);
- return q;
-}
-
static const struct blk_mq_ops scsi_mq_ops = {
.get_budget = scsi_mq_get_budget,
.put_budget = scsi_mq_put_budget,
@@ -2334,6 +1868,7 @@ static const struct blk_mq_ops scsi_mq_ops = {
.init_request = scsi_mq_init_request,
.exit_request = scsi_mq_exit_request,
.initialize_rq_fn = scsi_initialize_rq,
+ .busy = scsi_mq_lld_busy,
.map_queues = scsi_map_queues,
};
@@ -2388,10 +1923,7 @@ struct scsi_device *scsi_device_from_queue(struct request_queue *q)
{
struct scsi_device *sdev = NULL;
- if (q->mq_ops) {
- if (q->mq_ops == &scsi_mq_ops)
- sdev = q->queuedata;
- } else if (q->request_fn == scsi_request_fn)
+ if (q->mq_ops == &scsi_mq_ops)
sdev = q->queuedata;
if (!sdev || !get_device(&sdev->sdev_gendev))
sdev = NULL;
@@ -2995,39 +2527,6 @@ void sdev_evt_send_simple(struct scsi_device *sdev,
EXPORT_SYMBOL_GPL(sdev_evt_send_simple);
/**
- * scsi_request_fn_active() - number of kernel threads inside scsi_request_fn()
- * @sdev: SCSI device to count the number of scsi_request_fn() callers for.
- */
-static int scsi_request_fn_active(struct scsi_device *sdev)
-{
- struct request_queue *q = sdev->request_queue;
- int request_fn_active;
-
- WARN_ON_ONCE(sdev->host->use_blk_mq);
-
- spin_lock_irq(q->queue_lock);
- request_fn_active = q->request_fn_active;
- spin_unlock_irq(q->queue_lock);
-
- return request_fn_active;
-}
-
-/**
- * scsi_wait_for_queuecommand() - wait for ongoing queuecommand() calls
- * @sdev: SCSI device pointer.
- *
- * Wait until the ongoing shost->hostt->queuecommand() calls that are
- * invoked from scsi_request_fn() have finished.
- */
-static void scsi_wait_for_queuecommand(struct scsi_device *sdev)
-{
- WARN_ON_ONCE(sdev->host->use_blk_mq);
-
- while (scsi_request_fn_active(sdev))
- msleep(20);
-}
-
-/**
* scsi_device_quiesce - Block user issued commands.
* @sdev: scsi device to quiesce.
*
@@ -3150,7 +2649,6 @@ EXPORT_SYMBOL(scsi_target_resume);
int scsi_internal_device_block_nowait(struct scsi_device *sdev)
{
struct request_queue *q = sdev->request_queue;
- unsigned long flags;
int err = 0;
err = scsi_device_set_state(sdev, SDEV_BLOCK);
@@ -3166,14 +2664,7 @@ int scsi_internal_device_block_nowait(struct scsi_device *sdev)
* block layer from calling the midlayer with this device's
* request queue.
*/
- if (q->mq_ops) {
- blk_mq_quiesce_queue_nowait(q);
- } else {
- spin_lock_irqsave(q->queue_lock, flags);
- blk_stop_queue(q);
- spin_unlock_irqrestore(q->queue_lock, flags);
- }
-
+ blk_mq_quiesce_queue_nowait(q);
return 0;
}
EXPORT_SYMBOL_GPL(scsi_internal_device_block_nowait);
@@ -3204,12 +2695,8 @@ static int scsi_internal_device_block(struct scsi_device *sdev)
mutex_lock(&sdev->state_mutex);
err = scsi_internal_device_block_nowait(sdev);
- if (err == 0) {
- if (q->mq_ops)
- blk_mq_quiesce_queue(q);
- else
- scsi_wait_for_queuecommand(sdev);
- }
+ if (err == 0)
+ blk_mq_quiesce_queue(q);
mutex_unlock(&sdev->state_mutex);
return err;
@@ -3218,15 +2705,8 @@ static int scsi_internal_device_block(struct scsi_device *sdev)
void scsi_start_queue(struct scsi_device *sdev)
{
struct request_queue *q = sdev->request_queue;
- unsigned long flags;
- if (q->mq_ops) {
- blk_mq_unquiesce_queue(q);
- } else {
- spin_lock_irqsave(q->queue_lock, flags);
- blk_start_queue(q);
- spin_unlock_irqrestore(q->queue_lock, flags);
- }
+ blk_mq_unquiesce_queue(q);
}
/**
diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h
index 99f1db5e467e..5f21547b2ad2 100644
--- a/drivers/scsi/scsi_priv.h
+++ b/drivers/scsi/scsi_priv.h
@@ -92,7 +92,6 @@ extern void scsi_queue_insert(struct scsi_cmnd *cmd, int reason);
extern void scsi_io_completion(struct scsi_cmnd *, unsigned int);
extern void scsi_run_host_queues(struct Scsi_Host *shost);
extern void scsi_requeue_run_queue(struct work_struct *work);
-extern struct request_queue *scsi_old_alloc_queue(struct scsi_device *sdev);
extern struct request_queue *scsi_mq_alloc_queue(struct scsi_device *sdev);
extern void scsi_start_queue(struct scsi_device *sdev);
extern int scsi_mq_setup_tags(struct Scsi_Host *shost);
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 78ca63dfba4a..dd0d516f65e2 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -266,10 +266,7 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget,
*/
sdev->borken = 1;
- if (shost_use_blk_mq(shost))
- sdev->request_queue = scsi_mq_alloc_queue(sdev);
- else
- sdev->request_queue = scsi_old_alloc_queue(sdev);
+ sdev->request_queue = scsi_mq_alloc_queue(sdev);
if (!sdev->request_queue) {
/* release fn is set up in scsi_sysfs_device_initialise, so
* have to free and put manually here */
@@ -280,11 +277,6 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget,
WARN_ON_ONCE(!blk_get_queue(sdev->request_queue));
sdev->request_queue->queuedata = sdev;
- if (!shost_use_blk_mq(sdev->host)) {
- blk_queue_init_tags(sdev->request_queue,
- sdev->host->cmd_per_lun, shost->bqt,
- shost->hostt->tag_alloc_policy);
- }
scsi_change_queue_depth(sdev, sdev->host->cmd_per_lun ?
sdev->host->cmd_per_lun : 1);
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 3aee9464a7bf..6a9040faed00 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -367,7 +367,6 @@ store_shost_eh_deadline(struct device *dev, struct device_attribute *attr,
static DEVICE_ATTR(eh_deadline, S_IRUGO | S_IWUSR, show_shost_eh_deadline, store_shost_eh_deadline);
-shost_rd_attr(use_blk_mq, "%d\n");
shost_rd_attr(unique_id, "%u\n");
shost_rd_attr(cmd_per_lun, "%hd\n");
shost_rd_attr(can_queue, "%hd\n");
@@ -386,6 +385,13 @@ show_host_busy(struct device *dev, struct device_attribute *attr, char *buf)
}
static DEVICE_ATTR(host_busy, S_IRUGO, show_host_busy, NULL);
+static ssize_t
+show_use_blk_mq(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "1\n");
+}
+static DEVICE_ATTR(use_blk_mq, S_IRUGO, show_use_blk_mq, NULL);
+
static struct attribute *scsi_sysfs_shost_attrs[] = {
&dev_attr_use_blk_mq.attr,
&dev_attr_unique_id.attr,
diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
index 381668fa135d..d7035270d274 100644
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -3592,7 +3592,7 @@ fc_bsg_job_timeout(struct request *req)
/* the blk_end_sync_io() doesn't check the error */
if (inflight)
- __blk_complete_request(req);
+ blk_mq_end_request(req, BLK_STS_IOERR);
return BLK_EH_DONE;
}
@@ -3684,14 +3684,9 @@ static void
fc_bsg_goose_queue(struct fc_rport *rport)
{
struct request_queue *q = rport->rqst_q;
- unsigned long flags;
-
- if (!q)
- return;
- spin_lock_irqsave(q->queue_lock, flags);
- blk_run_queue_async(q);
- spin_unlock_irqrestore(q->queue_lock, flags);
+ if (q)
+ blk_mq_run_hw_queues(q, true);
}
/**
@@ -3759,6 +3754,37 @@ static int fc_bsg_dispatch(struct bsg_job *job)
return fc_bsg_host_dispatch(shost, job);
}
+static blk_status_t fc_bsg_rport_prep(struct fc_rport *rport)
+{
+ if (rport->port_state == FC_PORTSTATE_BLOCKED &&
+ !(rport->flags & FC_RPORT_FAST_FAIL_TIMEDOUT))
+ return BLK_STS_RESOURCE;
+
+ if (rport->port_state != FC_PORTSTATE_ONLINE)
+ return BLK_STS_IOERR;
+
+ return BLK_STS_OK;
+}
+
+
+static int fc_bsg_dispatch_prep(struct bsg_job *job)
+{
+ struct fc_rport *rport = fc_bsg_to_rport(job);
+ blk_status_t ret;
+
+ ret = fc_bsg_rport_prep(rport);
+ switch (ret) {
+ case BLK_STS_OK:
+ break;
+ case BLK_STS_RESOURCE:
+ return -EAGAIN;
+ default:
+ return -EIO;
+ }
+
+ return fc_bsg_dispatch(job);
+}
+
/**
* fc_bsg_hostadd - Create and add the bsg hooks so we can receive requests
* @shost: shost for fc_host
@@ -3780,7 +3806,8 @@ fc_bsg_hostadd(struct Scsi_Host *shost, struct fc_host_attrs *fc_host)
snprintf(bsg_name, sizeof(bsg_name),
"fc_host%d", shost->host_no);
- q = bsg_setup_queue(dev, bsg_name, fc_bsg_dispatch, i->f->dd_bsg_size);
+ q = bsg_setup_queue(dev, bsg_name, fc_bsg_dispatch, fc_bsg_job_timeout,
+ i->f->dd_bsg_size);
if (IS_ERR(q)) {
dev_err(dev,
"fc_host%d: bsg interface failed to initialize - setup queue\n",
@@ -3788,26 +3815,11 @@ fc_bsg_hostadd(struct Scsi_Host *shost, struct fc_host_attrs *fc_host)
return PTR_ERR(q);
}
__scsi_init_queue(shost, q);
- blk_queue_rq_timed_out(q, fc_bsg_job_timeout);
blk_queue_rq_timeout(q, FC_DEFAULT_BSG_TIMEOUT);
fc_host->rqst_q = q;
return 0;
}
-static int fc_bsg_rport_prep(struct request_queue *q, struct request *req)
-{
- struct fc_rport *rport = dev_to_rport(q->queuedata);
-
- if (rport->port_state == FC_PORTSTATE_BLOCKED &&
- !(rport->flags & FC_RPORT_FAST_FAIL_TIMEDOUT))
- return BLKPREP_DEFER;
-
- if (rport->port_state != FC_PORTSTATE_ONLINE)
- return BLKPREP_KILL;
-
- return BLKPREP_OK;
-}
-
/**
* fc_bsg_rportadd - Create and add the bsg hooks so we can receive requests
* @shost: shost that rport is attached to
@@ -3825,15 +3837,13 @@ fc_bsg_rportadd(struct Scsi_Host *shost, struct fc_rport *rport)
if (!i->f->bsg_request)
return -ENOTSUPP;
- q = bsg_setup_queue(dev, dev_name(dev), fc_bsg_dispatch,
- i->f->dd_bsg_size);
+ q = bsg_setup_queue(dev, dev_name(dev), fc_bsg_dispatch_prep,
+ fc_bsg_job_timeout, i->f->dd_bsg_size);
if (IS_ERR(q)) {
dev_err(dev, "failed to setup bsg queue\n");
return PTR_ERR(q);
}
__scsi_init_queue(shost, q);
- blk_queue_prep_rq(q, fc_bsg_rport_prep);
- blk_queue_rq_timed_out(q, fc_bsg_job_timeout);
blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT);
rport->rqst_q = q;
return 0;
@@ -3852,10 +3862,7 @@ fc_bsg_rportadd(struct Scsi_Host *shost, struct fc_rport *rport)
static void
fc_bsg_remove(struct request_queue *q)
{
- if (q) {
- bsg_unregister_queue(q);
- blk_cleanup_queue(q);
- }
+ bsg_remove_queue(q);
}
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
index 6fd2fe210fc3..0508831d6fb9 100644
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c
@@ -37,6 +37,18 @@
#define ISCSI_TRANSPORT_VERSION "2.0-870"
+#define CREATE_TRACE_POINTS
+#include <trace/events/iscsi.h>
+
+/*
+ * Export tracepoint symbols to be used by other modules.
+ */
+EXPORT_TRACEPOINT_SYMBOL_GPL(iscsi_dbg_conn);
+EXPORT_TRACEPOINT_SYMBOL_GPL(iscsi_dbg_eh);
+EXPORT_TRACEPOINT_SYMBOL_GPL(iscsi_dbg_session);
+EXPORT_TRACEPOINT_SYMBOL_GPL(iscsi_dbg_tcp);
+EXPORT_TRACEPOINT_SYMBOL_GPL(iscsi_dbg_sw_tcp);
+
static int dbg_session;
module_param_named(debug_session, dbg_session, int,
S_IRUGO | S_IWUSR);
@@ -59,6 +71,9 @@ MODULE_PARM_DESC(debug_conn,
iscsi_cls_session_printk(KERN_INFO, _session, \
"%s: " dbg_fmt, \
__func__, ##arg); \
+ iscsi_dbg_trace(trace_iscsi_dbg_trans_session, \
+ &(_session)->dev, \
+ "%s " dbg_fmt, __func__, ##arg); \
} while (0);
#define ISCSI_DBG_TRANS_CONN(_conn, dbg_fmt, arg...) \
@@ -66,7 +81,10 @@ MODULE_PARM_DESC(debug_conn,
if (dbg_conn) \
iscsi_cls_conn_printk(KERN_INFO, _conn, \
"%s: " dbg_fmt, \
- __func__, ##arg); \
+ __func__, ##arg); \
+ iscsi_dbg_trace(trace_iscsi_dbg_trans_conn, \
+ &(_conn)->dev, \
+ "%s " dbg_fmt, __func__, ##arg); \
} while (0);
struct iscsi_internal {
@@ -1542,7 +1560,7 @@ iscsi_bsg_host_add(struct Scsi_Host *shost, struct iscsi_cls_host *ihost)
return -ENOTSUPP;
snprintf(bsg_name, sizeof(bsg_name), "iscsi_host%d", shost->host_no);
- q = bsg_setup_queue(dev, bsg_name, iscsi_bsg_host_dispatch, 0);
+ q = bsg_setup_queue(dev, bsg_name, iscsi_bsg_host_dispatch, NULL, 0);
if (IS_ERR(q)) {
shost_printk(KERN_ERR, shost, "bsg interface failed to "
"initialize - no request queue\n");
@@ -1576,10 +1594,7 @@ static int iscsi_remove_host(struct transport_container *tc,
struct Scsi_Host *shost = dev_to_shost(dev);
struct iscsi_cls_host *ihost = shost->shost_data;
- if (ihost->bsg_q) {
- bsg_unregister_queue(ihost->bsg_q);
- blk_cleanup_queue(ihost->bsg_q);
- }
+ bsg_remove_queue(ihost->bsg_q);
return 0;
}
@@ -4497,6 +4512,20 @@ int iscsi_unregister_transport(struct iscsi_transport *tt)
}
EXPORT_SYMBOL_GPL(iscsi_unregister_transport);
+void iscsi_dbg_trace(void (*trace)(struct device *dev, struct va_format *),
+ struct device *dev, const char *fmt, ...)
+{
+ struct va_format vaf;
+ va_list args;
+
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ trace(dev, &vaf);
+ va_end(args);
+}
+EXPORT_SYMBOL_GPL(iscsi_dbg_trace);
+
static __init int iscsi_transport_init(void)
{
int err;
diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c
index 0a165b2b3e81..692b46937e52 100644
--- a/drivers/scsi/scsi_transport_sas.c
+++ b/drivers/scsi/scsi_transport_sas.c
@@ -198,7 +198,7 @@ static int sas_bsg_initialize(struct Scsi_Host *shost, struct sas_rphy *rphy)
if (rphy) {
q = bsg_setup_queue(&rphy->dev, dev_name(&rphy->dev),
- sas_smp_dispatch, 0);
+ sas_smp_dispatch, NULL, 0);
if (IS_ERR(q))
return PTR_ERR(q);
rphy->q = q;
@@ -207,7 +207,7 @@ static int sas_bsg_initialize(struct Scsi_Host *shost, struct sas_rphy *rphy)
snprintf(name, sizeof(name), "sas_host%d", shost->host_no);
q = bsg_setup_queue(&shost->shost_gendev, name,
- sas_smp_dispatch, 0);
+ sas_smp_dispatch, NULL, 0);
if (IS_ERR(q))
return PTR_ERR(q);
to_sas_host_attrs(shost)->q = q;
@@ -246,11 +246,7 @@ static int sas_host_remove(struct transport_container *tc, struct device *dev,
struct Scsi_Host *shost = dev_to_shost(dev);
struct request_queue *q = to_sas_host_attrs(shost)->q;
- if (q) {
- bsg_unregister_queue(q);
- blk_cleanup_queue(q);
- }
-
+ bsg_remove_queue(q);
return 0;
}
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index bd0a5c694a97..a1a44f52e0e8 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -114,7 +114,7 @@ static int sd_suspend_system(struct device *);
static int sd_suspend_runtime(struct device *);
static int sd_resume(struct device *);
static void sd_rescan(struct device *);
-static int sd_init_command(struct scsi_cmnd *SCpnt);
+static blk_status_t sd_init_command(struct scsi_cmnd *SCpnt);
static void sd_uninit_command(struct scsi_cmnd *SCpnt);
static int sd_done(struct scsi_cmnd *);
static void sd_eh_reset(struct scsi_cmnd *);
@@ -751,7 +751,7 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode)
blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
}
-static int sd_setup_unmap_cmnd(struct scsi_cmnd *cmd)
+static blk_status_t sd_setup_unmap_cmnd(struct scsi_cmnd *cmd)
{
struct scsi_device *sdp = cmd->device;
struct request *rq = cmd->request;
@@ -762,7 +762,7 @@ static int sd_setup_unmap_cmnd(struct scsi_cmnd *cmd)
rq->special_vec.bv_page = mempool_alloc(sd_page_pool, GFP_ATOMIC);
if (!rq->special_vec.bv_page)
- return BLKPREP_DEFER;
+ return BLK_STS_RESOURCE;
clear_highpage(rq->special_vec.bv_page);
rq->special_vec.bv_offset = 0;
rq->special_vec.bv_len = data_len;
@@ -786,7 +786,8 @@ static int sd_setup_unmap_cmnd(struct scsi_cmnd *cmd)
return scsi_init_io(cmd);
}
-static int sd_setup_write_same16_cmnd(struct scsi_cmnd *cmd, bool unmap)
+static blk_status_t sd_setup_write_same16_cmnd(struct scsi_cmnd *cmd,
+ bool unmap)
{
struct scsi_device *sdp = cmd->device;
struct request *rq = cmd->request;
@@ -796,7 +797,7 @@ static int sd_setup_write_same16_cmnd(struct scsi_cmnd *cmd, bool unmap)
rq->special_vec.bv_page = mempool_alloc(sd_page_pool, GFP_ATOMIC);
if (!rq->special_vec.bv_page)
- return BLKPREP_DEFER;
+ return BLK_STS_RESOURCE;
clear_highpage(rq->special_vec.bv_page);
rq->special_vec.bv_offset = 0;
rq->special_vec.bv_len = data_len;
@@ -817,7 +818,8 @@ static int sd_setup_write_same16_cmnd(struct scsi_cmnd *cmd, bool unmap)
return scsi_init_io(cmd);
}
-static int sd_setup_write_same10_cmnd(struct scsi_cmnd *cmd, bool unmap)
+static blk_status_t sd_setup_write_same10_cmnd(struct scsi_cmnd *cmd,
+ bool unmap)
{
struct scsi_device *sdp = cmd->device;
struct request *rq = cmd->request;
@@ -827,7 +829,7 @@ static int sd_setup_write_same10_cmnd(struct scsi_cmnd *cmd, bool unmap)
rq->special_vec.bv_page = mempool_alloc(sd_page_pool, GFP_ATOMIC);
if (!rq->special_vec.bv_page)
- return BLKPREP_DEFER;
+ return BLK_STS_RESOURCE;
clear_highpage(rq->special_vec.bv_page);
rq->special_vec.bv_offset = 0;
rq->special_vec.bv_len = data_len;
@@ -848,7 +850,7 @@ static int sd_setup_write_same10_cmnd(struct scsi_cmnd *cmd, bool unmap)
return scsi_init_io(cmd);
}
-static int sd_setup_write_zeroes_cmnd(struct scsi_cmnd *cmd)
+static blk_status_t sd_setup_write_zeroes_cmnd(struct scsi_cmnd *cmd)
{
struct request *rq = cmd->request;
struct scsi_device *sdp = cmd->device;
@@ -866,7 +868,7 @@ static int sd_setup_write_zeroes_cmnd(struct scsi_cmnd *cmd)
}
if (sdp->no_write_same)
- return BLKPREP_INVALID;
+ return BLK_STS_TARGET;
if (sdkp->ws16 || sector > 0xffffffff || nr_sectors > 0xffff)
return sd_setup_write_same16_cmnd(cmd, false);
@@ -943,7 +945,7 @@ out:
* Will set up either WRITE SAME(10) or WRITE SAME(16) depending on
* the preference indicated by the target device.
**/
-static int sd_setup_write_same_cmnd(struct scsi_cmnd *cmd)
+static blk_status_t sd_setup_write_same_cmnd(struct scsi_cmnd *cmd)
{
struct request *rq = cmd->request;
struct scsi_device *sdp = cmd->device;
@@ -952,10 +954,10 @@ static int sd_setup_write_same_cmnd(struct scsi_cmnd *cmd)
sector_t sector = blk_rq_pos(rq);
unsigned int nr_sectors = blk_rq_sectors(rq);
unsigned int nr_bytes = blk_rq_bytes(rq);
- int ret;
+ blk_status_t ret;
if (sdkp->device->no_write_same)
- return BLKPREP_INVALID;
+ return BLK_STS_TARGET;
BUG_ON(bio_offset(bio) || bio_iovec(bio).bv_len != sdp->sector_size);
@@ -996,7 +998,7 @@ static int sd_setup_write_same_cmnd(struct scsi_cmnd *cmd)
return ret;
}
-static int sd_setup_flush_cmnd(struct scsi_cmnd *cmd)
+static blk_status_t sd_setup_flush_cmnd(struct scsi_cmnd *cmd)
{
struct request *rq = cmd->request;
@@ -1009,10 +1011,10 @@ static int sd_setup_flush_cmnd(struct scsi_cmnd *cmd)
cmd->allowed = SD_MAX_RETRIES;
rq->timeout = rq->q->rq_timeout * SD_FLUSH_TIMEOUT_MULTIPLIER;
- return BLKPREP_OK;
+ return BLK_STS_OK;
}
-static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt)
+static blk_status_t sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt)
{
struct request *rq = SCpnt->request;
struct scsi_device *sdp = SCpnt->device;
@@ -1022,18 +1024,14 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt)
sector_t threshold;
unsigned int this_count = blk_rq_sectors(rq);
unsigned int dif, dix;
- int ret;
unsigned char protect;
+ blk_status_t ret;
ret = scsi_init_io(SCpnt);
- if (ret != BLKPREP_OK)
+ if (ret != BLK_STS_OK)
return ret;
WARN_ON_ONCE(SCpnt != rq->special);
- /* from here on until we're complete, any goto out
- * is used for a killable error condition */
- ret = BLKPREP_KILL;
-
SCSI_LOG_HLQUEUE(1,
scmd_printk(KERN_INFO, SCpnt,
"%s: block=%llu, count=%d\n",
@@ -1046,7 +1044,7 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt)
blk_rq_sectors(rq)));
SCSI_LOG_HLQUEUE(2, scmd_printk(KERN_INFO, SCpnt,
"Retry with 0x%p\n", SCpnt));
- goto out;
+ return BLK_STS_IOERR;
}
if (sdp->changed) {
@@ -1055,7 +1053,7 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt)
* the changed bit has been reset
*/
/* printk("SCSI disk has been changed or is not present. Prohibiting further I/O.\n"); */
- goto out;
+ return BLK_STS_IOERR;
}
/*
@@ -1093,31 +1091,28 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt)
if ((block & 1) || (blk_rq_sectors(rq) & 1)) {
scmd_printk(KERN_ERR, SCpnt,
"Bad block number requested\n");
- goto out;
- } else {
- block = block >> 1;
- this_count = this_count >> 1;
+ return BLK_STS_IOERR;
}
+ block = block >> 1;
+ this_count = this_count >> 1;
}
if (sdp->sector_size == 2048) {
if ((block & 3) || (blk_rq_sectors(rq) & 3)) {
scmd_printk(KERN_ERR, SCpnt,
"Bad block number requested\n");
- goto out;
- } else {
- block = block >> 2;
- this_count = this_count >> 2;
+ return BLK_STS_IOERR;
}
+ block = block >> 2;
+ this_count = this_count >> 2;
}
if (sdp->sector_size == 4096) {
if ((block & 7) || (blk_rq_sectors(rq) & 7)) {
scmd_printk(KERN_ERR, SCpnt,
"Bad block number requested\n");
- goto out;
- } else {
- block = block >> 3;
- this_count = this_count >> 3;
+ return BLK_STS_IOERR;
}
+ block = block >> 3;
+ this_count = this_count >> 3;
}
if (rq_data_dir(rq) == WRITE) {
SCpnt->cmnd[0] = WRITE_6;
@@ -1129,7 +1124,7 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt)
SCpnt->cmnd[0] = READ_6;
} else {
scmd_printk(KERN_ERR, SCpnt, "Unknown command %d\n", req_op(rq));
- goto out;
+ return BLK_STS_IOERR;
}
SCSI_LOG_HLQUEUE(2, scmd_printk(KERN_INFO, SCpnt,
@@ -1149,10 +1144,8 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt)
if (protect && sdkp->protection_type == T10_PI_TYPE2_PROTECTION) {
SCpnt->cmnd = mempool_alloc(sd_cdb_pool, GFP_ATOMIC);
- if (unlikely(SCpnt->cmnd == NULL)) {
- ret = BLKPREP_DEFER;
- goto out;
- }
+ if (unlikely(!SCpnt->cmnd))
+ return BLK_STS_RESOURCE;
SCpnt->cmd_len = SD_EXT_CDB_SIZE;
memset(SCpnt->cmnd, 0, SCpnt->cmd_len);
@@ -1220,7 +1213,7 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt)
*/
scmd_printk(KERN_ERR, SCpnt,
"FUA write on READ/WRITE(6) drive\n");
- goto out;
+ return BLK_STS_IOERR;
}
SCpnt->cmnd[1] |= (unsigned char) ((block >> 16) & 0x1f);
@@ -1244,12 +1237,10 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt)
* This indicates that the command is ready from our end to be
* queued.
*/
- ret = BLKPREP_OK;
- out:
- return ret;
+ return BLK_STS_OK;
}
-static int sd_init_command(struct scsi_cmnd *cmd)
+static blk_status_t sd_init_command(struct scsi_cmnd *cmd)
{
struct request *rq = cmd->request;
@@ -1265,7 +1256,7 @@ static int sd_init_command(struct scsi_cmnd *cmd)
case SD_LBP_ZERO:
return sd_setup_write_same10_cmnd(cmd, false);
default:
- return BLKPREP_INVALID;
+ return BLK_STS_TARGET;
}
case REQ_OP_WRITE_ZEROES:
return sd_setup_write_zeroes_cmnd(cmd);
@@ -1280,7 +1271,7 @@ static int sd_init_command(struct scsi_cmnd *cmd)
return sd_zbc_setup_reset_cmnd(cmd);
default:
WARN_ON_ONCE(1);
- return BLKPREP_KILL;
+ return BLK_STS_NOTSUPP;
}
}
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index 1d63f3a23ffb..7f43e6839bce 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -271,7 +271,7 @@ static inline int sd_is_zoned(struct scsi_disk *sdkp)
extern int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buffer);
extern void sd_zbc_print_zones(struct scsi_disk *sdkp);
-extern int sd_zbc_setup_reset_cmnd(struct scsi_cmnd *cmd);
+extern blk_status_t sd_zbc_setup_reset_cmnd(struct scsi_cmnd *cmd);
extern void sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes,
struct scsi_sense_hdr *sshdr);
extern int sd_zbc_report_zones(struct gendisk *disk, sector_t sector,
@@ -288,9 +288,9 @@ static inline int sd_zbc_read_zones(struct scsi_disk *sdkp,
static inline void sd_zbc_print_zones(struct scsi_disk *sdkp) {}
-static inline int sd_zbc_setup_reset_cmnd(struct scsi_cmnd *cmd)
+static inline blk_status_t sd_zbc_setup_reset_cmnd(struct scsi_cmnd *cmd)
{
- return BLKPREP_INVALID;
+ return BLK_STS_TARGET;
}
static inline void sd_zbc_complete(struct scsi_cmnd *cmd,
diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
index e06c48c866e4..83365b29a4d8 100644
--- a/drivers/scsi/sd_zbc.c
+++ b/drivers/scsi/sd_zbc.c
@@ -185,7 +185,7 @@ static inline sector_t sd_zbc_zone_sectors(struct scsi_disk *sdkp)
*
* Called from sd_init_command() for a REQ_OP_ZONE_RESET request.
*/
-int sd_zbc_setup_reset_cmnd(struct scsi_cmnd *cmd)
+blk_status_t sd_zbc_setup_reset_cmnd(struct scsi_cmnd *cmd)
{
struct request *rq = cmd->request;
struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
@@ -194,14 +194,14 @@ int sd_zbc_setup_reset_cmnd(struct scsi_cmnd *cmd)
if (!sd_is_zoned(sdkp))
/* Not a zoned device */
- return BLKPREP_KILL;
+ return BLK_STS_IOERR;
if (sdkp->device->changed)
- return BLKPREP_KILL;
+ return BLK_STS_IOERR;
if (sector & (sd_zbc_zone_sectors(sdkp) - 1))
/* Unaligned request */
- return BLKPREP_KILL;
+ return BLK_STS_IOERR;
cmd->cmd_len = 16;
memset(cmd->cmnd, 0, cmd->cmd_len);
@@ -214,7 +214,7 @@ int sd_zbc_setup_reset_cmnd(struct scsi_cmnd *cmd)
cmd->transfersize = 0;
cmd->allowed = 0;
- return BLKPREP_OK;
+ return BLK_STS_OK;
}
/**
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index c6ad00703c5b..4e27460ec926 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -1390,7 +1390,7 @@ sg_rq_end_io(struct request *rq, blk_status_t status)
*/
srp->rq = NULL;
scsi_req_free_cmd(scsi_req(rq));
- __blk_put_request(rq->q, rq);
+ blk_put_request(rq);
write_lock_irqsave(&sfp->rq_list_lock, iflags);
if (unlikely(srp->orphan)) {
diff --git a/drivers/scsi/sgiwd93.c b/drivers/scsi/sgiwd93.c
index 5ed696dc9bbd..713bce998b0e 100644
--- a/drivers/scsi/sgiwd93.c
+++ b/drivers/scsi/sgiwd93.c
@@ -208,7 +208,7 @@ static struct scsi_host_template sgiwd93_template = {
.this_id = 7,
.sg_tablesize = SG_ALL,
.cmd_per_lun = 8,
- .use_clustering = DISABLE_CLUSTERING,
+ .dma_boundary = PAGE_SIZE - 1,
};
static int sgiwd93_probe(struct platform_device *pdev)
diff --git a/drivers/scsi/smartpqi/smartpqi.h b/drivers/scsi/smartpqi/smartpqi.h
index e97bf2670315..af962368818b 100644
--- a/drivers/scsi/smartpqi/smartpqi.h
+++ b/drivers/scsi/smartpqi/smartpqi.h
@@ -21,6 +21,9 @@
#if !defined(_SMARTPQI_H)
#define _SMARTPQI_H
+#include <scsi/scsi_host.h>
+#include <linux/bsg-lib.h>
+
#pragma pack(1)
#define PQI_DEVICE_SIGNATURE "PQI DREG"
@@ -97,6 +100,12 @@ struct pqi_ctrl_registers {
struct pqi_device_registers pqi_registers; /* 4000h */
};
+#if ((HZ) < 1000)
+#define PQI_HZ 1000
+#else
+#define PQI_HZ (HZ)
+#endif
+
#define PQI_DEVICE_REGISTERS_OFFSET 0x4000
enum pqi_io_path {
@@ -347,6 +356,10 @@ struct pqi_event_config {
#define PQI_MAX_EVENT_DESCRIPTORS 255
+#define PQI_EVENT_OFA_MEMORY_ALLOCATION 0x0
+#define PQI_EVENT_OFA_QUIESCE 0x1
+#define PQI_EVENT_OFA_CANCELLED 0x2
+
struct pqi_event_response {
struct pqi_iu_header header;
u8 event_type;
@@ -354,7 +367,17 @@ struct pqi_event_response {
u8 request_acknowlege : 1;
__le16 event_id;
__le32 additional_event_id;
- u8 data[16];
+ union {
+ struct {
+ __le32 bytes_requested;
+ u8 reserved[12];
+ } ofa_memory_allocation;
+
+ struct {
+ __le16 reason; /* reason for cancellation */
+ u8 reserved[14];
+ } ofa_cancelled;
+ } data;
};
struct pqi_event_acknowledge_request {
@@ -389,6 +412,54 @@ struct pqi_task_management_response {
u8 response_code;
};
+struct pqi_vendor_general_request {
+ struct pqi_iu_header header;
+ __le16 request_id;
+ __le16 function_code;
+ union {
+ struct {
+ __le16 first_section;
+ __le16 last_section;
+ u8 reserved[48];
+ } config_table_update;
+
+ struct {
+ __le64 buffer_address;
+ __le32 buffer_length;
+ u8 reserved[40];
+ } ofa_memory_allocation;
+ } data;
+};
+
+struct pqi_vendor_general_response {
+ struct pqi_iu_header header;
+ __le16 request_id;
+ __le16 function_code;
+ __le16 status;
+ u8 reserved[2];
+};
+
+#define PQI_VENDOR_GENERAL_CONFIG_TABLE_UPDATE 0
+#define PQI_VENDOR_GENERAL_HOST_MEMORY_UPDATE 1
+
+#define PQI_OFA_VERSION 1
+#define PQI_OFA_SIGNATURE "OFA_QRM"
+#define PQI_OFA_MAX_SG_DESCRIPTORS 64
+
+#define PQI_OFA_MEMORY_DESCRIPTOR_LENGTH \
+ (offsetof(struct pqi_ofa_memory, sg_descriptor) + \
+ (PQI_OFA_MAX_SG_DESCRIPTORS * sizeof(struct pqi_sg_descriptor)))
+
+struct pqi_ofa_memory {
+ __le64 signature; /* "OFA_QRM" */
+ __le16 version; /* version of this struct(1 = 1st version) */
+ u8 reserved[62];
+ __le32 bytes_allocated; /* total allocated memory in bytes */
+ __le16 num_memory_descriptors;
+ u8 reserved1[2];
+ struct pqi_sg_descriptor sg_descriptor[1];
+};
+
struct pqi_aio_error_info {
u8 status;
u8 service_response;
@@ -419,6 +490,7 @@ struct pqi_raid_error_info {
#define PQI_REQUEST_IU_GENERAL_ADMIN 0x60
#define PQI_REQUEST_IU_REPORT_VENDOR_EVENT_CONFIG 0x72
#define PQI_REQUEST_IU_SET_VENDOR_EVENT_CONFIG 0x73
+#define PQI_REQUEST_IU_VENDOR_GENERAL 0x75
#define PQI_REQUEST_IU_ACKNOWLEDGE_VENDOR_EVENT 0xf6
#define PQI_RESPONSE_IU_GENERAL_MANAGEMENT 0x81
@@ -430,6 +502,7 @@ struct pqi_raid_error_info {
#define PQI_RESPONSE_IU_AIO_PATH_IO_ERROR 0xf3
#define PQI_RESPONSE_IU_AIO_PATH_DISABLED 0xf4
#define PQI_RESPONSE_IU_VENDOR_EVENT 0xf5
+#define PQI_RESPONSE_IU_VENDOR_GENERAL 0xf7
#define PQI_GENERAL_ADMIN_FUNCTION_REPORT_DEVICE_CAPABILITY 0x0
#define PQI_GENERAL_ADMIN_FUNCTION_CREATE_IQ 0x10
@@ -492,6 +565,7 @@ struct pqi_raid_error_info {
#define PQI_EVENT_TYPE_HARDWARE 0x2
#define PQI_EVENT_TYPE_PHYSICAL_DEVICE 0x4
#define PQI_EVENT_TYPE_LOGICAL_DEVICE 0x5
+#define PQI_EVENT_TYPE_OFA 0xfb
#define PQI_EVENT_TYPE_AIO_STATE_CHANGE 0xfd
#define PQI_EVENT_TYPE_AIO_CONFIG_CHANGE 0xfe
@@ -556,6 +630,7 @@ typedef u32 pqi_index_t;
#define SOP_TASK_ATTRIBUTE_ACA 4
#define SOP_TMF_COMPLETE 0x0
+#define SOP_TMF_REJECTED 0x4
#define SOP_TMF_FUNCTION_SUCCEEDED 0x8
/* additional CDB bytes usage field codes */
@@ -644,11 +719,13 @@ struct pqi_encryption_info {
#define PQI_CONFIG_TABLE_MAX_LENGTH ((u16)~0)
/* configuration table section IDs */
+#define PQI_CONFIG_TABLE_ALL_SECTIONS (-1)
#define PQI_CONFIG_TABLE_SECTION_GENERAL_INFO 0
#define PQI_CONFIG_TABLE_SECTION_FIRMWARE_FEATURES 1
#define PQI_CONFIG_TABLE_SECTION_FIRMWARE_ERRATA 2
#define PQI_CONFIG_TABLE_SECTION_DEBUG 3
#define PQI_CONFIG_TABLE_SECTION_HEARTBEAT 4
+#define PQI_CONFIG_TABLE_SECTION_SOFT_RESET 5
struct pqi_config_table {
u8 signature[8]; /* "CFGTABLE" */
@@ -680,6 +757,18 @@ struct pqi_config_table_general_info {
/* command */
};
+struct pqi_config_table_firmware_features {
+ struct pqi_config_table_section_header header;
+ __le16 num_elements;
+ u8 features_supported[];
+/* u8 features_requested_by_host[]; */
+/* u8 features_enabled[]; */
+};
+
+#define PQI_FIRMWARE_FEATURE_OFA 0
+#define PQI_FIRMWARE_FEATURE_SMP 1
+#define PQI_FIRMWARE_FEATURE_SOFT_RESET_HANDSHAKE 11
+
struct pqi_config_table_debug {
struct pqi_config_table_section_header header;
__le32 scratchpad;
@@ -690,6 +779,22 @@ struct pqi_config_table_heartbeat {
__le32 heartbeat_counter;
};
+struct pqi_config_table_soft_reset {
+ struct pqi_config_table_section_header header;
+ u8 soft_reset_status;
+};
+
+#define PQI_SOFT_RESET_INITIATE 0x1
+#define PQI_SOFT_RESET_ABORT 0x2
+
+enum pqi_soft_reset_status {
+ RESET_INITIATE_FIRMWARE,
+ RESET_INITIATE_DRIVER,
+ RESET_ABORT,
+ RESET_NORESPONSE,
+ RESET_TIMEDOUT
+};
+
union pqi_reset_register {
struct {
u32 reset_type : 3;
@@ -808,8 +913,10 @@ struct pqi_scsi_dev {
u8 scsi3addr[8];
__be64 wwid;
u8 volume_id[16];
+ u8 unique_id[16];
u8 is_physical_device : 1;
u8 is_external_raid_device : 1;
+ u8 is_expander_smp_device : 1;
u8 target_lun_valid : 1;
u8 device_gone : 1;
u8 new_device : 1;
@@ -817,6 +924,7 @@ struct pqi_scsi_dev {
u8 volume_offline : 1;
bool aio_enabled; /* only valid for physical disks */
bool in_reset;
+ bool in_remove;
bool device_offline;
u8 vendor[8]; /* bytes 8-15 of inquiry data */
u8 model[16]; /* bytes 16-31 of inquiry data */
@@ -854,6 +962,8 @@ struct pqi_scsi_dev {
#define CISS_VPD_LV_DEVICE_GEOMETRY 0xc1 /* vendor-specific page */
#define CISS_VPD_LV_BYPASS_STATUS 0xc2 /* vendor-specific page */
#define CISS_VPD_LV_STATUS 0xc3 /* vendor-specific page */
+#define SCSI_VPD_HEADER_SZ 4
+#define SCSI_VPD_DEVICE_ID_IDX 8 /* Index of page id in page */
#define VPD_PAGE (1 << 8)
@@ -916,6 +1026,7 @@ struct pqi_sas_node {
struct pqi_sas_port {
struct list_head port_list_entry;
u64 sas_address;
+ struct pqi_scsi_dev *device;
struct sas_port *port;
int next_phy_index;
struct list_head phy_list_head;
@@ -947,13 +1058,15 @@ struct pqi_io_request {
struct list_head request_list_entry;
};
-#define PQI_NUM_SUPPORTED_EVENTS 6
+#define PQI_NUM_SUPPORTED_EVENTS 7
struct pqi_event {
bool pending;
u8 event_type;
__le16 event_id;
__le32 additional_event_id;
+ __le32 ofa_bytes_requested;
+ __le16 ofa_cancel_reason;
};
#define PQI_RESERVED_IO_SLOTS_LUN_RESET 1
@@ -1014,12 +1127,16 @@ struct pqi_ctrl_info {
struct mutex scan_mutex;
struct mutex lun_reset_mutex;
+ struct mutex ofa_mutex; /* serialize ofa */
bool controller_online;
bool block_requests;
+ bool in_shutdown;
+ bool in_ofa;
u8 inbound_spanning_supported : 1;
u8 outbound_spanning_supported : 1;
u8 pqi_mode_enabled : 1;
u8 pqi_reset_quiesce_supported : 1;
+ u8 soft_reset_handshake_supported : 1;
struct list_head scsi_device_list;
spinlock_t scsi_device_list_lock;
@@ -1040,6 +1157,7 @@ struct pqi_ctrl_info {
int previous_num_interrupts;
u32 previous_heartbeat_count;
__le32 __iomem *heartbeat_counter;
+ u8 __iomem *soft_reset_status;
struct timer_list heartbeat_timer;
struct work_struct ctrl_offline_work;
@@ -1051,6 +1169,10 @@ struct pqi_ctrl_info {
struct list_head raid_bypass_retry_list;
spinlock_t raid_bypass_retry_list_lock;
struct work_struct raid_bypass_retry_work;
+
+ struct pqi_ofa_memory *pqi_ofa_mem_virt_addr;
+ dma_addr_t pqi_ofa_mem_dma_handle;
+ void **pqi_ofa_chunk_virt_addr;
};
enum pqi_ctrl_mode {
@@ -1080,8 +1202,13 @@ enum pqi_ctrl_mode {
#define BMIC_WRITE 0x27
#define BMIC_SENSE_CONTROLLER_PARAMETERS 0x64
#define BMIC_SENSE_SUBSYSTEM_INFORMATION 0x66
+#define BMIC_CSMI_PASSTHRU 0x68
#define BMIC_WRITE_HOST_WELLNESS 0xa5
#define BMIC_FLUSH_CACHE 0xc2
+#define BMIC_SET_DIAG_OPTIONS 0xf4
+#define BMIC_SENSE_DIAG_OPTIONS 0xf5
+
+#define CSMI_CC_SAS_SMP_PASSTHRU 0X17
#define SA_FLUSH_CACHE 0x1
@@ -1109,6 +1236,10 @@ struct bmic_identify_controller {
u8 reserved3[32];
};
+#define SA_EXPANDER_SMP_DEVICE 0x05
+/*SCSI Invalid Device Type for SAS devices*/
+#define PQI_SAS_SCSI_INVALID_DEVTYPE 0xff
+
struct bmic_identify_physical_device {
u8 scsi_bus; /* SCSI Bus number on controller */
u8 scsi_id; /* SCSI ID on this bus */
@@ -1189,6 +1320,50 @@ struct bmic_identify_physical_device {
u8 padding_to_multiple_of_512[9];
};
+struct bmic_smp_request {
+ u8 frame_type;
+ u8 function;
+ u8 allocated_response_length;
+ u8 request_length;
+ u8 additional_request_bytes[1016];
+};
+
+struct bmic_smp_response {
+ u8 frame_type;
+ u8 function;
+ u8 function_result;
+ u8 response_length;
+ u8 additional_response_bytes[1016];
+};
+
+struct bmic_csmi_ioctl_header {
+ __le32 header_length;
+ u8 signature[8];
+ __le32 timeout;
+ __le32 control_code;
+ __le32 return_code;
+ __le32 length;
+};
+
+struct bmic_csmi_smp_passthru {
+ u8 phy_identifier;
+ u8 port_identifier;
+ u8 connection_rate;
+ u8 reserved;
+ __be64 destination_sas_address;
+ __le32 request_length;
+ struct bmic_smp_request request;
+ u8 connection_status;
+ u8 reserved1[3];
+ __le32 response_length;
+ struct bmic_smp_response response;
+};
+
+struct bmic_csmi_smp_passthru_buffer {
+ struct bmic_csmi_ioctl_header ioctl_header;
+ struct bmic_csmi_smp_passthru parameters;
+};
+
struct bmic_flush_cache {
u8 disable_flag;
u8 system_power_action;
@@ -1206,8 +1381,42 @@ enum bmic_flush_cache_shutdown_event {
RESTART = 4
};
+struct bmic_diag_options {
+ __le32 options;
+};
+
#pragma pack()
+static inline struct pqi_ctrl_info *shost_to_hba(struct Scsi_Host *shost)
+{
+ void *hostdata = shost_priv(shost);
+
+ return *((struct pqi_ctrl_info **)hostdata);
+}
+
+static inline bool pqi_ctrl_offline(struct pqi_ctrl_info *ctrl_info)
+{
+ return !ctrl_info->controller_online;
+}
+
+static inline void pqi_ctrl_busy(struct pqi_ctrl_info *ctrl_info)
+{
+ atomic_inc(&ctrl_info->num_busy_threads);
+}
+
+static inline void pqi_ctrl_unbusy(struct pqi_ctrl_info *ctrl_info)
+{
+ atomic_dec(&ctrl_info->num_busy_threads);
+}
+
+static inline bool pqi_ctrl_blocked(struct pqi_ctrl_info *ctrl_info)
+{
+ return ctrl_info->block_requests;
+}
+
+void pqi_sas_smp_handler(struct bsg_job *job, struct Scsi_Host *shost,
+ struct sas_rphy *rphy);
+
int pqi_add_sas_host(struct Scsi_Host *shost, struct pqi_ctrl_info *ctrl_info);
void pqi_delete_sas_host(struct pqi_ctrl_info *ctrl_info);
int pqi_add_sas_device(struct pqi_sas_node *pqi_sas_node,
@@ -1216,6 +1425,9 @@ void pqi_remove_sas_device(struct pqi_scsi_dev *device);
struct pqi_scsi_dev *pqi_find_device_by_sas_rphy(
struct pqi_ctrl_info *ctrl_info, struct sas_rphy *rphy);
void pqi_prep_for_scsi_done(struct scsi_cmnd *scmd);
+int pqi_csmi_smp_passthru(struct pqi_ctrl_info *ctrl_info,
+ struct bmic_csmi_smp_passthru_buffer *buffer, size_t buffer_length,
+ struct pqi_raid_error_info *error_info);
extern struct sas_function_template pqi_sas_transport_functions;
diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c
index a25a07a0b7f0..e2fa3f476227 100644
--- a/drivers/scsi/smartpqi/smartpqi_init.c
+++ b/drivers/scsi/smartpqi/smartpqi_init.c
@@ -40,11 +40,11 @@
#define BUILD_TIMESTAMP
#endif
-#define DRIVER_VERSION "1.1.4-130"
+#define DRIVER_VERSION "1.2.4-070"
#define DRIVER_MAJOR 1
-#define DRIVER_MINOR 1
+#define DRIVER_MINOR 2
#define DRIVER_RELEASE 4
-#define DRIVER_REVISION 130
+#define DRIVER_REVISION 70
#define DRIVER_NAME "Microsemi PQI Driver (v" \
DRIVER_VERSION BUILD_TIMESTAMP ")"
@@ -74,6 +74,15 @@ static int pqi_aio_submit_io(struct pqi_ctrl_info *ctrl_info,
struct scsi_cmnd *scmd, u32 aio_handle, u8 *cdb,
unsigned int cdb_length, struct pqi_queue_group *queue_group,
struct pqi_encryption_info *encryption_info, bool raid_bypass);
+static void pqi_ofa_ctrl_quiesce(struct pqi_ctrl_info *ctrl_info);
+static void pqi_ofa_ctrl_unquiesce(struct pqi_ctrl_info *ctrl_info);
+static int pqi_ofa_ctrl_restart(struct pqi_ctrl_info *ctrl_info);
+static void pqi_ofa_setup_host_buffer(struct pqi_ctrl_info *ctrl_info,
+ u32 bytes_requested);
+static void pqi_ofa_free_host_buffer(struct pqi_ctrl_info *ctrl_info);
+static int pqi_ofa_host_memory_update(struct pqi_ctrl_info *ctrl_info);
+static int pqi_device_wait_for_pending_io(struct pqi_ctrl_info *ctrl_info,
+ struct pqi_scsi_dev *device, unsigned long timeout_secs);
/* for flags argument to pqi_submit_raid_request_synchronous() */
#define PQI_SYNC_FLAGS_INTERRUPTABLE 0x1
@@ -113,6 +122,7 @@ static unsigned int pqi_supported_event_types[] = {
PQI_EVENT_TYPE_HARDWARE,
PQI_EVENT_TYPE_PHYSICAL_DEVICE,
PQI_EVENT_TYPE_LOGICAL_DEVICE,
+ PQI_EVENT_TYPE_OFA,
PQI_EVENT_TYPE_AIO_STATE_CHANGE,
PQI_EVENT_TYPE_AIO_CONFIG_CHANGE,
};
@@ -176,16 +186,14 @@ static inline void pqi_scsi_done(struct scsi_cmnd *scmd)
scmd->scsi_done(scmd);
}
-static inline bool pqi_scsi3addr_equal(u8 *scsi3addr1, u8 *scsi3addr2)
+static inline void pqi_disable_write_same(struct scsi_device *sdev)
{
- return memcmp(scsi3addr1, scsi3addr2, 8) == 0;
+ sdev->no_write_same = 1;
}
-static inline struct pqi_ctrl_info *shost_to_hba(struct Scsi_Host *shost)
+static inline bool pqi_scsi3addr_equal(u8 *scsi3addr1, u8 *scsi3addr2)
{
- void *hostdata = shost_priv(shost);
-
- return *((struct pqi_ctrl_info **)hostdata);
+ return memcmp(scsi3addr1, scsi3addr2, 8) == 0;
}
static inline bool pqi_is_logical_device(struct pqi_scsi_dev *device)
@@ -198,11 +206,6 @@ static inline bool pqi_is_external_raid_addr(u8 *scsi3addr)
return scsi3addr[2] != 0;
}
-static inline bool pqi_ctrl_offline(struct pqi_ctrl_info *ctrl_info)
-{
- return !ctrl_info->controller_online;
-}
-
static inline void pqi_check_ctrl_health(struct pqi_ctrl_info *ctrl_info)
{
if (ctrl_info->controller_online)
@@ -241,11 +244,6 @@ static inline void pqi_ctrl_unblock_requests(struct pqi_ctrl_info *ctrl_info)
scsi_unblock_requests(ctrl_info->scsi_host);
}
-static inline bool pqi_ctrl_blocked(struct pqi_ctrl_info *ctrl_info)
-{
- return ctrl_info->block_requests;
-}
-
static unsigned long pqi_wait_if_ctrl_blocked(struct pqi_ctrl_info *ctrl_info,
unsigned long timeout_msecs)
{
@@ -275,16 +273,6 @@ static unsigned long pqi_wait_if_ctrl_blocked(struct pqi_ctrl_info *ctrl_info,
return remaining_msecs;
}
-static inline void pqi_ctrl_busy(struct pqi_ctrl_info *ctrl_info)
-{
- atomic_inc(&ctrl_info->num_busy_threads);
-}
-
-static inline void pqi_ctrl_unbusy(struct pqi_ctrl_info *ctrl_info)
-{
- atomic_dec(&ctrl_info->num_busy_threads);
-}
-
static inline void pqi_ctrl_wait_until_quiesced(struct pqi_ctrl_info *ctrl_info)
{
while (atomic_read(&ctrl_info->num_busy_threads) >
@@ -312,11 +300,39 @@ static inline bool pqi_device_in_reset(struct pqi_scsi_dev *device)
return device->in_reset;
}
+static inline void pqi_ctrl_ofa_start(struct pqi_ctrl_info *ctrl_info)
+{
+ ctrl_info->in_ofa = true;
+}
+
+static inline void pqi_ctrl_ofa_done(struct pqi_ctrl_info *ctrl_info)
+{
+ ctrl_info->in_ofa = false;
+}
+
+static inline bool pqi_ctrl_in_ofa(struct pqi_ctrl_info *ctrl_info)
+{
+ return ctrl_info->in_ofa;
+}
+
+static inline void pqi_device_remove_start(struct pqi_scsi_dev *device)
+{
+ device->in_remove = true;
+}
+
+static inline bool pqi_device_in_remove(struct pqi_ctrl_info *ctrl_info,
+ struct pqi_scsi_dev *device)
+{
+ return device->in_remove & !ctrl_info->in_shutdown;
+}
+
static inline void pqi_schedule_rescan_worker_with_delay(
struct pqi_ctrl_info *ctrl_info, unsigned long delay)
{
if (pqi_ctrl_offline(ctrl_info))
return;
+ if (pqi_ctrl_in_ofa(ctrl_info))
+ return;
schedule_delayed_work(&ctrl_info->rescan_work, delay);
}
@@ -326,7 +342,7 @@ static inline void pqi_schedule_rescan_worker(struct pqi_ctrl_info *ctrl_info)
pqi_schedule_rescan_worker_with_delay(ctrl_info, 0);
}
-#define PQI_RESCAN_WORK_DELAY (10 * HZ)
+#define PQI_RESCAN_WORK_DELAY (10 * PQI_HZ)
static inline void pqi_schedule_rescan_worker_delayed(
struct pqi_ctrl_info *ctrl_info)
@@ -347,6 +363,27 @@ static inline u32 pqi_read_heartbeat_counter(struct pqi_ctrl_info *ctrl_info)
return readl(ctrl_info->heartbeat_counter);
}
+static inline u8 pqi_read_soft_reset_status(struct pqi_ctrl_info *ctrl_info)
+{
+ if (!ctrl_info->soft_reset_status)
+ return 0;
+
+ return readb(ctrl_info->soft_reset_status);
+}
+
+static inline void pqi_clear_soft_reset_status(struct pqi_ctrl_info *ctrl_info,
+ u8 clear)
+{
+ u8 status;
+
+ if (!ctrl_info->soft_reset_status)
+ return;
+
+ status = pqi_read_soft_reset_status(ctrl_info);
+ status &= ~clear;
+ writeb(status, ctrl_info->soft_reset_status);
+}
+
static int pqi_map_single(struct pci_dev *pci_dev,
struct pqi_sg_descriptor *sg_descriptor, void *buffer,
size_t buffer_length, enum dma_data_direction data_direction)
@@ -390,6 +427,7 @@ static int pqi_build_raid_path_request(struct pqi_ctrl_info *ctrl_info,
u16 vpd_page, enum dma_data_direction *dir)
{
u8 *cdb;
+ size_t cdb_length = buffer_length;
memset(request, 0, sizeof(*request));
@@ -412,7 +450,7 @@ static int pqi_build_raid_path_request(struct pqi_ctrl_info *ctrl_info,
cdb[1] = 0x1;
cdb[2] = (u8)vpd_page;
}
- cdb[4] = (u8)buffer_length;
+ cdb[4] = (u8)cdb_length;
break;
case CISS_REPORT_LOG:
case CISS_REPORT_PHYS:
@@ -422,32 +460,45 @@ static int pqi_build_raid_path_request(struct pqi_ctrl_info *ctrl_info,
cdb[1] = CISS_REPORT_PHYS_EXTENDED;
else
cdb[1] = CISS_REPORT_LOG_EXTENDED;
- put_unaligned_be32(buffer_length, &cdb[6]);
+ put_unaligned_be32(cdb_length, &cdb[6]);
break;
case CISS_GET_RAID_MAP:
request->data_direction = SOP_READ_FLAG;
cdb[0] = CISS_READ;
cdb[1] = CISS_GET_RAID_MAP;
- put_unaligned_be32(buffer_length, &cdb[6]);
+ put_unaligned_be32(cdb_length, &cdb[6]);
break;
case SA_FLUSH_CACHE:
request->data_direction = SOP_WRITE_FLAG;
cdb[0] = BMIC_WRITE;
cdb[6] = BMIC_FLUSH_CACHE;
- put_unaligned_be16(buffer_length, &cdb[7]);
+ put_unaligned_be16(cdb_length, &cdb[7]);
break;
+ case BMIC_SENSE_DIAG_OPTIONS:
+ cdb_length = 0;
+ /* fall through */
case BMIC_IDENTIFY_CONTROLLER:
case BMIC_IDENTIFY_PHYSICAL_DEVICE:
request->data_direction = SOP_READ_FLAG;
cdb[0] = BMIC_READ;
cdb[6] = cmd;
- put_unaligned_be16(buffer_length, &cdb[7]);
+ put_unaligned_be16(cdb_length, &cdb[7]);
break;
+ case BMIC_SET_DIAG_OPTIONS:
+ cdb_length = 0;
+ /* fall through */
case BMIC_WRITE_HOST_WELLNESS:
request->data_direction = SOP_WRITE_FLAG;
cdb[0] = BMIC_WRITE;
cdb[6] = cmd;
- put_unaligned_be16(buffer_length, &cdb[7]);
+ put_unaligned_be16(cdb_length, &cdb[7]);
+ break;
+ case BMIC_CSMI_PASSTHRU:
+ request->data_direction = SOP_BIDIRECTIONAL;
+ cdb[0] = BMIC_WRITE;
+ cdb[5] = CSMI_CC_SAS_SMP_PASSTHRU;
+ cdb[6] = cmd;
+ put_unaligned_be16(cdb_length, &cdb[7]);
break;
default:
dev_err(&ctrl_info->pci_dev->dev, "unknown command 0x%c\n",
@@ -509,43 +560,130 @@ static void pqi_free_io_request(struct pqi_io_request *io_request)
atomic_dec(&io_request->refcount);
}
-static int pqi_identify_controller(struct pqi_ctrl_info *ctrl_info,
- struct bmic_identify_controller *buffer)
+static int pqi_send_scsi_raid_request(struct pqi_ctrl_info *ctrl_info, u8 cmd,
+ u8 *scsi3addr, void *buffer, size_t buffer_length, u16 vpd_page,
+ struct pqi_raid_error_info *error_info,
+ unsigned long timeout_msecs)
{
int rc;
enum dma_data_direction dir;
struct pqi_raid_path_request request;
rc = pqi_build_raid_path_request(ctrl_info, &request,
- BMIC_IDENTIFY_CONTROLLER, RAID_CTLR_LUNID, buffer,
- sizeof(*buffer), 0, &dir);
+ cmd, scsi3addr, buffer,
+ buffer_length, vpd_page, &dir);
if (rc)
return rc;
- rc = pqi_submit_raid_request_synchronous(ctrl_info, &request.header, 0,
- NULL, NO_TIMEOUT);
+ rc = pqi_submit_raid_request_synchronous(ctrl_info, &request.header,
+ 0, error_info, timeout_msecs);
pqi_pci_unmap(ctrl_info->pci_dev, request.sg_descriptors, 1, dir);
return rc;
}
-static int pqi_scsi_inquiry(struct pqi_ctrl_info *ctrl_info,
+/* Helper functions for pqi_send_scsi_raid_request */
+
+static inline int pqi_send_ctrl_raid_request(struct pqi_ctrl_info *ctrl_info,
+ u8 cmd, void *buffer, size_t buffer_length)
+{
+ return pqi_send_scsi_raid_request(ctrl_info, cmd, RAID_CTLR_LUNID,
+ buffer, buffer_length, 0, NULL, NO_TIMEOUT);
+}
+
+static inline int pqi_send_ctrl_raid_with_error(struct pqi_ctrl_info *ctrl_info,
+ u8 cmd, void *buffer, size_t buffer_length,
+ struct pqi_raid_error_info *error_info)
+{
+ return pqi_send_scsi_raid_request(ctrl_info, cmd, RAID_CTLR_LUNID,
+ buffer, buffer_length, 0, error_info, NO_TIMEOUT);
+}
+
+
+static inline int pqi_identify_controller(struct pqi_ctrl_info *ctrl_info,
+ struct bmic_identify_controller *buffer)
+{
+ return pqi_send_ctrl_raid_request(ctrl_info, BMIC_IDENTIFY_CONTROLLER,
+ buffer, sizeof(*buffer));
+}
+
+static inline int pqi_scsi_inquiry(struct pqi_ctrl_info *ctrl_info,
u8 *scsi3addr, u16 vpd_page, void *buffer, size_t buffer_length)
{
+ return pqi_send_scsi_raid_request(ctrl_info, INQUIRY, scsi3addr,
+ buffer, buffer_length, vpd_page, NULL, NO_TIMEOUT);
+}
+
+static bool pqi_vpd_page_supported(struct pqi_ctrl_info *ctrl_info,
+ u8 *scsi3addr, u16 vpd_page)
+{
int rc;
- enum dma_data_direction dir;
- struct pqi_raid_path_request request;
+ int i;
+ int pages;
+ unsigned char *buf, bufsize;
- rc = pqi_build_raid_path_request(ctrl_info, &request,
- INQUIRY, scsi3addr, buffer, buffer_length, vpd_page,
- &dir);
- if (rc)
- return rc;
+ buf = kzalloc(256, GFP_KERNEL);
+ if (!buf)
+ return false;
- rc = pqi_submit_raid_request_synchronous(ctrl_info, &request.header, 0,
- NULL, NO_TIMEOUT);
+ /* Get the size of the page list first */
+ rc = pqi_scsi_inquiry(ctrl_info, scsi3addr,
+ VPD_PAGE | SCSI_VPD_SUPPORTED_PAGES,
+ buf, SCSI_VPD_HEADER_SZ);
+ if (rc != 0)
+ goto exit_unsupported;
+
+ pages = buf[3];
+ if ((pages + SCSI_VPD_HEADER_SZ) <= 255)
+ bufsize = pages + SCSI_VPD_HEADER_SZ;
+ else
+ bufsize = 255;
+
+ /* Get the whole VPD page list */
+ rc = pqi_scsi_inquiry(ctrl_info, scsi3addr,
+ VPD_PAGE | SCSI_VPD_SUPPORTED_PAGES,
+ buf, bufsize);
+ if (rc != 0)
+ goto exit_unsupported;
+
+ pages = buf[3];
+ for (i = 1; i <= pages; i++)
+ if (buf[3 + i] == vpd_page)
+ goto exit_supported;
+
+exit_unsupported:
+ kfree(buf);
+ return false;
+
+exit_supported:
+ kfree(buf);
+ return true;
+}
+
+static int pqi_get_device_id(struct pqi_ctrl_info *ctrl_info,
+ u8 *scsi3addr, u8 *device_id, int buflen)
+{
+ int rc;
+ unsigned char *buf;
+
+ if (!pqi_vpd_page_supported(ctrl_info, scsi3addr, SCSI_VPD_DEVICE_ID))
+ return 1; /* function not supported */
+
+ buf = kzalloc(64, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ rc = pqi_scsi_inquiry(ctrl_info, scsi3addr,
+ VPD_PAGE | SCSI_VPD_DEVICE_ID,
+ buf, 64);
+ if (rc == 0) {
+ if (buflen > 16)
+ buflen = 16;
+ memcpy(device_id, &buf[SCSI_VPD_DEVICE_ID_IDX], buflen);
+ }
+
+ kfree(buf);
- pqi_pci_unmap(ctrl_info->pci_dev, request.sg_descriptors, 1, dir);
return rc;
}
@@ -580,9 +718,7 @@ static int pqi_flush_cache(struct pqi_ctrl_info *ctrl_info,
enum bmic_flush_cache_shutdown_event shutdown_event)
{
int rc;
- struct pqi_raid_path_request request;
struct bmic_flush_cache *flush_cache;
- enum dma_data_direction dir;
/*
* Don't bother trying to flush the cache if the controller is
@@ -597,42 +733,55 @@ static int pqi_flush_cache(struct pqi_ctrl_info *ctrl_info,
flush_cache->shutdown_event = shutdown_event;
- rc = pqi_build_raid_path_request(ctrl_info, &request,
- SA_FLUSH_CACHE, RAID_CTLR_LUNID, flush_cache,
- sizeof(*flush_cache), 0, &dir);
- if (rc)
- goto out;
-
- rc = pqi_submit_raid_request_synchronous(ctrl_info, &request.header,
- 0, NULL, NO_TIMEOUT);
+ rc = pqi_send_ctrl_raid_request(ctrl_info, SA_FLUSH_CACHE, flush_cache,
+ sizeof(*flush_cache));
- pqi_pci_unmap(ctrl_info->pci_dev, request.sg_descriptors, 1, dir);
-out:
kfree(flush_cache);
return rc;
}
-static int pqi_write_host_wellness(struct pqi_ctrl_info *ctrl_info,
- void *buffer, size_t buffer_length)
+int pqi_csmi_smp_passthru(struct pqi_ctrl_info *ctrl_info,
+ struct bmic_csmi_smp_passthru_buffer *buffer, size_t buffer_length,
+ struct pqi_raid_error_info *error_info)
+{
+ return pqi_send_ctrl_raid_with_error(ctrl_info, BMIC_CSMI_PASSTHRU,
+ buffer, buffer_length, error_info);
+}
+
+#define PQI_FETCH_PTRAID_DATA (1UL<<31)
+
+static int pqi_set_diag_rescan(struct pqi_ctrl_info *ctrl_info)
{
int rc;
- struct pqi_raid_path_request request;
- enum dma_data_direction dir;
+ struct bmic_diag_options *diag;
- rc = pqi_build_raid_path_request(ctrl_info, &request,
- BMIC_WRITE_HOST_WELLNESS, RAID_CTLR_LUNID, buffer,
- buffer_length, 0, &dir);
+ diag = kzalloc(sizeof(*diag), GFP_KERNEL);
+ if (!diag)
+ return -ENOMEM;
+
+ rc = pqi_send_ctrl_raid_request(ctrl_info, BMIC_SENSE_DIAG_OPTIONS,
+ diag, sizeof(*diag));
if (rc)
- return rc;
+ goto out;
- rc = pqi_submit_raid_request_synchronous(ctrl_info, &request.header,
- 0, NULL, NO_TIMEOUT);
+ diag->options |= cpu_to_le32(PQI_FETCH_PTRAID_DATA);
+
+ rc = pqi_send_ctrl_raid_request(ctrl_info, BMIC_SET_DIAG_OPTIONS,
+ diag, sizeof(*diag));
+out:
+ kfree(diag);
- pqi_pci_unmap(ctrl_info->pci_dev, request.sg_descriptors, 1, dir);
return rc;
}
+static inline int pqi_write_host_wellness(struct pqi_ctrl_info *ctrl_info,
+ void *buffer, size_t buffer_length)
+{
+ return pqi_send_ctrl_raid_request(ctrl_info, BMIC_WRITE_HOST_WELLNESS,
+ buffer, buffer_length);
+}
+
#pragma pack(1)
struct bmic_host_wellness_driver_version {
@@ -640,6 +789,7 @@ struct bmic_host_wellness_driver_version {
u8 driver_version_tag[2];
__le16 driver_version_length;
char driver_version[32];
+ u8 dont_write_tag[2];
u8 end_tag[2];
};
@@ -669,6 +819,8 @@ static int pqi_write_driver_version_to_host_wellness(
strncpy(buffer->driver_version, "Linux " DRIVER_VERSION,
sizeof(buffer->driver_version) - 1);
buffer->driver_version[sizeof(buffer->driver_version) - 1] = '\0';
+ buffer->dont_write_tag[0] = 'D';
+ buffer->dont_write_tag[1] = 'W';
buffer->end_tag[0] = 'Z';
buffer->end_tag[1] = 'Z';
@@ -742,7 +894,7 @@ static int pqi_write_current_time_to_host_wellness(
return rc;
}
-#define PQI_UPDATE_TIME_WORK_INTERVAL (24UL * 60 * 60 * HZ)
+#define PQI_UPDATE_TIME_WORK_INTERVAL (24UL * 60 * 60 * PQI_HZ)
static void pqi_update_time_worker(struct work_struct *work)
{
@@ -776,23 +928,11 @@ static inline void pqi_cancel_update_time_worker(
cancel_delayed_work_sync(&ctrl_info->update_time_work);
}
-static int pqi_report_luns(struct pqi_ctrl_info *ctrl_info, u8 cmd,
+static inline int pqi_report_luns(struct pqi_ctrl_info *ctrl_info, u8 cmd,
void *buffer, size_t buffer_length)
{
- int rc;
- enum dma_data_direction dir;
- struct pqi_raid_path_request request;
-
- rc = pqi_build_raid_path_request(ctrl_info, &request,
- cmd, RAID_CTLR_LUNID, buffer, buffer_length, 0, &dir);
- if (rc)
- return rc;
-
- rc = pqi_submit_raid_request_synchronous(ctrl_info, &request.header, 0,
- NULL, NO_TIMEOUT);
-
- pqi_pci_unmap(ctrl_info->pci_dev, request.sg_descriptors, 1, dir);
- return rc;
+ return pqi_send_ctrl_raid_request(ctrl_info, cmd, buffer,
+ buffer_length);
}
static int pqi_report_phys_logical_luns(struct pqi_ctrl_info *ctrl_info, u8 cmd,
@@ -1010,8 +1150,6 @@ static int pqi_validate_raid_map(struct pqi_ctrl_info *ctrl_info,
char *err_msg;
u32 raid_map_size;
u32 r5or6_blocks_per_row;
- unsigned int num_phys_disks;
- unsigned int num_raid_map_entries;
raid_map_size = get_unaligned_le32(&raid_map->structure_size);
@@ -1020,22 +1158,6 @@ static int pqi_validate_raid_map(struct pqi_ctrl_info *ctrl_info,
goto bad_raid_map;
}
- if (raid_map_size > sizeof(*raid_map)) {
- err_msg = "RAID map too large";
- goto bad_raid_map;
- }
-
- num_phys_disks = get_unaligned_le16(&raid_map->layout_map_count) *
- (get_unaligned_le16(&raid_map->data_disks_per_row) +
- get_unaligned_le16(&raid_map->metadata_disks_per_row));
- num_raid_map_entries = num_phys_disks *
- get_unaligned_le16(&raid_map->row_cnt);
-
- if (num_raid_map_entries > RAID_MAP_MAX_ENTRIES) {
- err_msg = "invalid number of map entries in RAID map";
- goto bad_raid_map;
- }
-
if (device->raid_level == SA_RAID_1) {
if (get_unaligned_le16(&raid_map->layout_map_count) != 2) {
err_msg = "invalid RAID-1 map";
@@ -1074,27 +1196,45 @@ static int pqi_get_raid_map(struct pqi_ctrl_info *ctrl_info,
struct pqi_scsi_dev *device)
{
int rc;
- enum dma_data_direction dir;
- struct pqi_raid_path_request request;
+ u32 raid_map_size;
struct raid_map *raid_map;
raid_map = kmalloc(sizeof(*raid_map), GFP_KERNEL);
if (!raid_map)
return -ENOMEM;
- rc = pqi_build_raid_path_request(ctrl_info, &request,
- CISS_GET_RAID_MAP, device->scsi3addr, raid_map,
- sizeof(*raid_map), 0, &dir);
+ rc = pqi_send_scsi_raid_request(ctrl_info, CISS_GET_RAID_MAP,
+ device->scsi3addr, raid_map, sizeof(*raid_map),
+ 0, NULL, NO_TIMEOUT);
+
if (rc)
goto error;
- rc = pqi_submit_raid_request_synchronous(ctrl_info, &request.header, 0,
- NULL, NO_TIMEOUT);
+ raid_map_size = get_unaligned_le32(&raid_map->structure_size);
- pqi_pci_unmap(ctrl_info->pci_dev, request.sg_descriptors, 1, dir);
+ if (raid_map_size > sizeof(*raid_map)) {
- if (rc)
- goto error;
+ kfree(raid_map);
+
+ raid_map = kmalloc(raid_map_size, GFP_KERNEL);
+ if (!raid_map)
+ return -ENOMEM;
+
+ rc = pqi_send_scsi_raid_request(ctrl_info, CISS_GET_RAID_MAP,
+ device->scsi3addr, raid_map, raid_map_size,
+ 0, NULL, NO_TIMEOUT);
+ if (rc)
+ goto error;
+
+ if (get_unaligned_le32(&raid_map->structure_size)
+ != raid_map_size) {
+ dev_warn(&ctrl_info->pci_dev->dev,
+ "Requested %d bytes, received %d bytes",
+ raid_map_size,
+ get_unaligned_le32(&raid_map->structure_size));
+ goto error;
+ }
+ }
rc = pqi_validate_raid_map(ctrl_info, device, raid_map);
if (rc)
@@ -1165,6 +1305,9 @@ static void pqi_get_volume_status(struct pqi_ctrl_info *ctrl_info,
if (rc)
goto out;
+ if (vpd->page_code != CISS_VPD_LV_STATUS)
+ goto out;
+
page_length = offsetof(struct ciss_vpd_logical_volume_status,
volume_status) + vpd->page_length;
if (page_length < sizeof(*vpd))
@@ -1190,6 +1333,9 @@ static int pqi_get_device_info(struct pqi_ctrl_info *ctrl_info,
u8 *buffer;
unsigned int retries;
+ if (device->is_expander_smp_device)
+ return 0;
+
buffer = kmalloc(64, GFP_KERNEL);
if (!buffer)
return -ENOMEM;
@@ -1225,6 +1371,14 @@ static int pqi_get_device_info(struct pqi_ctrl_info *ctrl_info,
}
}
+ if (pqi_get_device_id(ctrl_info, device->scsi3addr,
+ device->unique_id, sizeof(device->unique_id)) < 0)
+ dev_warn(&ctrl_info->pci_dev->dev,
+ "Can't get device id for scsi %d:%d:%d:%d\n",
+ ctrl_info->scsi_host->host_no,
+ device->bus, device->target,
+ device->lun);
+
out:
kfree(buffer);
@@ -1387,9 +1541,24 @@ static int pqi_add_device(struct pqi_ctrl_info *ctrl_info,
return rc;
}
+#define PQI_PENDING_IO_TIMEOUT_SECS 20
+
static inline void pqi_remove_device(struct pqi_ctrl_info *ctrl_info,
struct pqi_scsi_dev *device)
{
+ int rc;
+
+ pqi_device_remove_start(device);
+
+ rc = pqi_device_wait_for_pending_io(ctrl_info, device,
+ PQI_PENDING_IO_TIMEOUT_SECS);
+ if (rc)
+ dev_err(&ctrl_info->pci_dev->dev,
+ "scsi %d:%d:%d:%d removing device with %d outstanding commands\n",
+ ctrl_info->scsi_host->host_no, device->bus,
+ device->target, device->lun,
+ atomic_read(&device->scsi_cmds_outstanding));
+
if (pqi_is_logical_device(device))
scsi_remove_device(device->sdev);
else
@@ -1454,6 +1623,14 @@ static enum pqi_find_result pqi_scsi_find_entry(struct pqi_ctrl_info *ctrl_info,
return DEVICE_NOT_FOUND;
}
+static inline const char *pqi_device_type(struct pqi_scsi_dev *device)
+{
+ if (device->is_expander_smp_device)
+ return "Enclosure SMP ";
+
+ return scsi_device_type(device->devtype);
+}
+
#define PQI_DEV_INFO_BUFFER_LENGTH 128
static void pqi_dev_info(struct pqi_ctrl_info *ctrl_info,
@@ -1489,7 +1666,7 @@ static void pqi_dev_info(struct pqi_ctrl_info *ctrl_info,
count += snprintf(buffer + count, PQI_DEV_INFO_BUFFER_LENGTH - count,
" %s %.8s %.16s ",
- scsi_device_type(device->devtype),
+ pqi_device_type(device),
device->vendor,
device->model);
@@ -1534,6 +1711,8 @@ static void pqi_scsi_update_device(struct pqi_scsi_dev *existing_device,
existing_device->is_physical_device = new_device->is_physical_device;
existing_device->is_external_raid_device =
new_device->is_external_raid_device;
+ existing_device->is_expander_smp_device =
+ new_device->is_expander_smp_device;
existing_device->aio_enabled = new_device->aio_enabled;
memcpy(existing_device->vendor, new_device->vendor,
sizeof(existing_device->vendor));
@@ -1558,6 +1737,7 @@ static void pqi_scsi_update_device(struct pqi_scsi_dev *existing_device,
new_device->raid_bypass_configured;
existing_device->raid_bypass_enabled =
new_device->raid_bypass_enabled;
+ existing_device->device_offline = false;
/* To prevent this from being freed later. */
new_device->raid_map = NULL;
@@ -1589,6 +1769,14 @@ static inline void pqi_fixup_botched_add(struct pqi_ctrl_info *ctrl_info,
device->keep_device = false;
}
+static inline bool pqi_is_device_added(struct pqi_scsi_dev *device)
+{
+ if (device->is_expander_smp_device)
+ return device->sas_port != NULL;
+
+ return device->sdev != NULL;
+}
+
static void pqi_update_device_list(struct pqi_ctrl_info *ctrl_info,
struct pqi_scsi_dev *new_device_list[], unsigned int num_new_devices)
{
@@ -1674,6 +1862,9 @@ static void pqi_update_device_list(struct pqi_ctrl_info *ctrl_info,
spin_unlock_irqrestore(&ctrl_info->scsi_device_list_lock, flags);
+ if (pqi_ctrl_in_ofa(ctrl_info))
+ pqi_ctrl_ofa_done(ctrl_info);
+
/* Remove all devices that have gone away. */
list_for_each_entry_safe(device, next, &delete_list,
delete_list_entry) {
@@ -1683,7 +1874,7 @@ static void pqi_update_device_list(struct pqi_ctrl_info *ctrl_info,
} else {
pqi_dev_info(ctrl_info, "removed", device);
}
- if (device->sdev)
+ if (pqi_is_device_added(device))
pqi_remove_device(ctrl_info, device);
list_del(&device->delete_list_entry);
pqi_free_device(device);
@@ -1705,7 +1896,7 @@ static void pqi_update_device_list(struct pqi_ctrl_info *ctrl_info,
/* Expose any new devices. */
list_for_each_entry_safe(device, next, &add_list, add_list_entry) {
- if (!device->sdev) {
+ if (!pqi_is_device_added(device)) {
pqi_dev_info(ctrl_info, "added", device);
rc = pqi_add_device(ctrl_info, device);
if (rc) {
@@ -1722,7 +1913,12 @@ static void pqi_update_device_list(struct pqi_ctrl_info *ctrl_info,
static bool pqi_is_supported_device(struct pqi_scsi_dev *device)
{
- bool is_supported = false;
+ bool is_supported;
+
+ if (device->is_expander_smp_device)
+ return true;
+
+ is_supported = false;
switch (device->devtype) {
case TYPE_DISK:
@@ -1756,6 +1952,30 @@ static inline bool pqi_skip_device(u8 *scsi3addr)
return false;
}
+static inline bool pqi_is_device_with_sas_address(struct pqi_scsi_dev *device)
+{
+ if (!device->is_physical_device)
+ return false;
+
+ if (device->is_expander_smp_device)
+ return true;
+
+ switch (device->devtype) {
+ case TYPE_DISK:
+ case TYPE_ZBC:
+ case TYPE_ENCLOSURE:
+ return true;
+ }
+
+ return false;
+}
+
+static inline bool pqi_expose_device(struct pqi_scsi_dev *device)
+{
+ return !device->is_physical_device ||
+ !pqi_skip_device(device->scsi3addr);
+}
+
static int pqi_update_scsi_devices(struct pqi_ctrl_info *ctrl_info)
{
int i;
@@ -1864,9 +2084,14 @@ static int pqi_update_scsi_devices(struct pqi_ctrl_info *ctrl_info)
memcpy(device->scsi3addr, scsi3addr, sizeof(device->scsi3addr));
device->is_physical_device = is_physical_device;
- if (!is_physical_device)
+ if (is_physical_device) {
+ if (phys_lun_ext_entry->device_type ==
+ SA_EXPANDER_SMP_DEVICE)
+ device->is_expander_smp_device = true;
+ } else {
device->is_external_raid_device =
pqi_is_external_raid_addr(scsi3addr);
+ }
/* Gather information about the device. */
rc = pqi_get_device_info(ctrl_info, device);
@@ -1899,30 +2124,23 @@ static int pqi_update_scsi_devices(struct pqi_ctrl_info *ctrl_info)
device->wwid = phys_lun_ext_entry->wwid;
if ((phys_lun_ext_entry->device_flags &
REPORT_PHYS_LUN_DEV_FLAG_AIO_ENABLED) &&
- phys_lun_ext_entry->aio_handle)
+ phys_lun_ext_entry->aio_handle) {
device->aio_enabled = true;
+ device->aio_handle =
+ phys_lun_ext_entry->aio_handle;
+ }
+ if (device->devtype == TYPE_DISK ||
+ device->devtype == TYPE_ZBC) {
+ pqi_get_physical_disk_info(ctrl_info,
+ device, id_phys);
+ }
} else {
memcpy(device->volume_id, log_lun_ext_entry->volume_id,
sizeof(device->volume_id));
}
- switch (device->devtype) {
- case TYPE_DISK:
- case TYPE_ZBC:
- case TYPE_ENCLOSURE:
- if (device->is_physical_device) {
- device->sas_address =
- get_unaligned_be64(&device->wwid);
- if (device->devtype == TYPE_DISK ||
- device->devtype == TYPE_ZBC) {
- device->aio_handle =
- phys_lun_ext_entry->aio_handle;
- pqi_get_physical_disk_info(ctrl_info,
- device, id_phys);
- }
- }
- break;
- }
+ if (pqi_is_device_with_sas_address(device))
+ device->sas_address = get_unaligned_be64(&device->wwid);
new_device_list[num_valid_devices++] = device;
}
@@ -1965,7 +2183,7 @@ static void pqi_remove_all_scsi_devices(struct pqi_ctrl_info *ctrl_info)
if (!device)
break;
- if (device->sdev)
+ if (pqi_is_device_added(device))
pqi_remove_device(ctrl_info, device);
pqi_free_device(device);
}
@@ -1991,7 +2209,13 @@ static int pqi_scan_scsi_devices(struct pqi_ctrl_info *ctrl_info)
static void pqi_scan_start(struct Scsi_Host *shost)
{
- pqi_scan_scsi_devices(shost_to_hba(shost));
+ struct pqi_ctrl_info *ctrl_info;
+
+ ctrl_info = shost_to_hba(shost);
+ if (pqi_ctrl_in_ofa(ctrl_info))
+ return;
+
+ pqi_scan_scsi_devices(ctrl_info);
}
/* Returns TRUE if scan is finished. */
@@ -2018,6 +2242,12 @@ static void pqi_wait_until_lun_reset_finished(struct pqi_ctrl_info *ctrl_info)
mutex_unlock(&ctrl_info->lun_reset_mutex);
}
+static void pqi_wait_until_ofa_finished(struct pqi_ctrl_info *ctrl_info)
+{
+ mutex_lock(&ctrl_info->ofa_mutex);
+ mutex_unlock(&ctrl_info->ofa_mutex);
+}
+
static inline void pqi_set_encryption_info(
struct pqi_encryption_info *encryption_info, struct raid_map *raid_map,
u64 first_block)
@@ -2325,9 +2555,6 @@ static int pqi_raid_bypass_submit_scsi_cmd(struct pqi_ctrl_info *ctrl_info,
(map_row * total_disks_per_row) + first_column;
}
- if (unlikely(map_index >= RAID_MAP_MAX_ENTRIES))
- return PQI_RAID_BYPASS_INELIGIBLE;
-
aio_handle = raid_map->disk_data[map_index].aio_handle;
disk_block = get_unaligned_le64(&raid_map->disk_starting_blk) +
first_row * strip_size +
@@ -2397,7 +2624,7 @@ static int pqi_wait_for_pqi_mode_ready(struct pqi_ctrl_info *ctrl_info)
u8 status;
pqi_registers = ctrl_info->pqi_registers;
- timeout = (PQI_MODE_READY_TIMEOUT_SECS * HZ) + jiffies;
+ timeout = (PQI_MODE_READY_TIMEOUT_SECS * PQI_HZ) + jiffies;
while (1) {
signature = readq(&pqi_registers->signature);
@@ -2458,10 +2685,9 @@ static inline void pqi_take_device_offline(struct scsi_device *sdev, char *path)
return;
device->device_offline = true;
- scsi_device_set_state(sdev, SDEV_OFFLINE);
ctrl_info = shost_to_hba(sdev->host);
pqi_schedule_rescan_worker(ctrl_info);
- dev_err(&ctrl_info->pci_dev->dev, "offlined %s scsi %d:%d:%d:%d\n",
+ dev_err(&ctrl_info->pci_dev->dev, "re-scanning %s scsi %d:%d:%d:%d\n",
path, ctrl_info->scsi_host->host_no, device->bus,
device->target, device->lun);
}
@@ -2665,6 +2891,9 @@ static int pqi_interpret_task_management_response(
case SOP_TMF_FUNCTION_SUCCEEDED:
rc = 0;
break;
+ case SOP_TMF_REJECTED:
+ rc = -EAGAIN;
+ break;
default:
rc = -EIO;
break;
@@ -2704,8 +2933,17 @@ static unsigned int pqi_process_io_intr(struct pqi_ctrl_info *ctrl_info,
switch (response->header.iu_type) {
case PQI_RESPONSE_IU_RAID_PATH_IO_SUCCESS:
case PQI_RESPONSE_IU_AIO_PATH_IO_SUCCESS:
+ if (io_request->scmd)
+ io_request->scmd->result = 0;
+ /* fall through */
case PQI_RESPONSE_IU_GENERAL_MANAGEMENT:
break;
+ case PQI_RESPONSE_IU_VENDOR_GENERAL:
+ io_request->status =
+ get_unaligned_le16(
+ &((struct pqi_vendor_general_response *)
+ response)->status);
+ break;
case PQI_RESPONSE_IU_TASK_MANAGEMENT:
io_request->status =
pqi_interpret_task_management_response(
@@ -2825,6 +3063,111 @@ static void pqi_acknowledge_event(struct pqi_ctrl_info *ctrl_info,
pqi_send_event_ack(ctrl_info, &request, sizeof(request));
}
+#define PQI_SOFT_RESET_STATUS_TIMEOUT_SECS 30
+#define PQI_SOFT_RESET_STATUS_POLL_INTERVAL_SECS 1
+
+static enum pqi_soft_reset_status pqi_poll_for_soft_reset_status(
+ struct pqi_ctrl_info *ctrl_info)
+{
+ unsigned long timeout;
+ u8 status;
+
+ timeout = (PQI_SOFT_RESET_STATUS_TIMEOUT_SECS * PQI_HZ) + jiffies;
+
+ while (1) {
+ status = pqi_read_soft_reset_status(ctrl_info);
+ if (status & PQI_SOFT_RESET_INITIATE)
+ return RESET_INITIATE_DRIVER;
+
+ if (status & PQI_SOFT_RESET_ABORT)
+ return RESET_ABORT;
+
+ if (time_after(jiffies, timeout)) {
+ dev_err(&ctrl_info->pci_dev->dev,
+ "timed out waiting for soft reset status\n");
+ return RESET_TIMEDOUT;
+ }
+
+ if (!sis_is_firmware_running(ctrl_info))
+ return RESET_NORESPONSE;
+
+ ssleep(PQI_SOFT_RESET_STATUS_POLL_INTERVAL_SECS);
+ }
+}
+
+static void pqi_process_soft_reset(struct pqi_ctrl_info *ctrl_info,
+ enum pqi_soft_reset_status reset_status)
+{
+ int rc;
+
+ switch (reset_status) {
+ case RESET_INITIATE_DRIVER:
+ /* fall through */
+ case RESET_TIMEDOUT:
+ dev_info(&ctrl_info->pci_dev->dev,
+ "resetting controller %u\n", ctrl_info->ctrl_id);
+ sis_soft_reset(ctrl_info);
+ /* fall through */
+ case RESET_INITIATE_FIRMWARE:
+ rc = pqi_ofa_ctrl_restart(ctrl_info);
+ pqi_ofa_free_host_buffer(ctrl_info);
+ dev_info(&ctrl_info->pci_dev->dev,
+ "Online Firmware Activation for controller %u: %s\n",
+ ctrl_info->ctrl_id, rc == 0 ? "SUCCESS" : "FAILED");
+ break;
+ case RESET_ABORT:
+ pqi_ofa_ctrl_unquiesce(ctrl_info);
+ dev_info(&ctrl_info->pci_dev->dev,
+ "Online Firmware Activation for controller %u: %s\n",
+ ctrl_info->ctrl_id, "ABORTED");
+ break;
+ case RESET_NORESPONSE:
+ pqi_ofa_free_host_buffer(ctrl_info);
+ pqi_take_ctrl_offline(ctrl_info);
+ break;
+ }
+}
+
+static void pqi_ofa_process_event(struct pqi_ctrl_info *ctrl_info,
+ struct pqi_event *event)
+{
+ u16 event_id;
+ enum pqi_soft_reset_status status;
+
+ event_id = get_unaligned_le16(&event->event_id);
+
+ mutex_lock(&ctrl_info->ofa_mutex);
+
+ if (event_id == PQI_EVENT_OFA_QUIESCE) {
+ dev_info(&ctrl_info->pci_dev->dev,
+ "Received Online Firmware Activation quiesce event for controller %u\n",
+ ctrl_info->ctrl_id);
+ pqi_ofa_ctrl_quiesce(ctrl_info);
+ pqi_acknowledge_event(ctrl_info, event);
+ if (ctrl_info->soft_reset_handshake_supported) {
+ status = pqi_poll_for_soft_reset_status(ctrl_info);
+ pqi_process_soft_reset(ctrl_info, status);
+ } else {
+ pqi_process_soft_reset(ctrl_info,
+ RESET_INITIATE_FIRMWARE);
+ }
+
+ } else if (event_id == PQI_EVENT_OFA_MEMORY_ALLOCATION) {
+ pqi_acknowledge_event(ctrl_info, event);
+ pqi_ofa_setup_host_buffer(ctrl_info,
+ le32_to_cpu(event->ofa_bytes_requested));
+ pqi_ofa_host_memory_update(ctrl_info);
+ } else if (event_id == PQI_EVENT_OFA_CANCELLED) {
+ pqi_ofa_free_host_buffer(ctrl_info);
+ pqi_acknowledge_event(ctrl_info, event);
+ dev_info(&ctrl_info->pci_dev->dev,
+ "Online Firmware Activation(%u) cancel reason : %u\n",
+ ctrl_info->ctrl_id, event->ofa_cancel_reason);
+ }
+
+ mutex_unlock(&ctrl_info->ofa_mutex);
+}
+
static void pqi_event_worker(struct work_struct *work)
{
unsigned int i;
@@ -2844,6 +3187,11 @@ static void pqi_event_worker(struct work_struct *work)
for (i = 0; i < PQI_NUM_SUPPORTED_EVENTS; i++) {
if (event->pending) {
event->pending = false;
+ if (event->event_type == PQI_EVENT_TYPE_OFA) {
+ pqi_ctrl_unbusy(ctrl_info);
+ pqi_ofa_process_event(ctrl_info, event);
+ return;
+ }
pqi_acknowledge_event(ctrl_info, event);
}
event++;
@@ -2853,7 +3201,7 @@ out:
pqi_ctrl_unbusy(ctrl_info);
}
-#define PQI_HEARTBEAT_TIMER_INTERVAL (10 * HZ)
+#define PQI_HEARTBEAT_TIMER_INTERVAL (10 * PQI_HZ)
static void pqi_heartbeat_timer_handler(struct timer_list *t)
{
@@ -2922,6 +3270,24 @@ static inline bool pqi_is_supported_event(unsigned int event_type)
return pqi_event_type_to_event_index(event_type) != -1;
}
+static void pqi_ofa_capture_event_payload(struct pqi_event *event,
+ struct pqi_event_response *response)
+{
+ u16 event_id;
+
+ event_id = get_unaligned_le16(&event->event_id);
+
+ if (event->event_type == PQI_EVENT_TYPE_OFA) {
+ if (event_id == PQI_EVENT_OFA_MEMORY_ALLOCATION) {
+ event->ofa_bytes_requested =
+ response->data.ofa_memory_allocation.bytes_requested;
+ } else if (event_id == PQI_EVENT_OFA_CANCELLED) {
+ event->ofa_cancel_reason =
+ response->data.ofa_cancelled.reason;
+ }
+ }
+}
+
static unsigned int pqi_process_event_intr(struct pqi_ctrl_info *ctrl_info)
{
unsigned int num_events;
@@ -2956,6 +3322,7 @@ static unsigned int pqi_process_event_intr(struct pqi_ctrl_info *ctrl_info)
event->event_id = response->event_id;
event->additional_event_id =
response->additional_event_id;
+ pqi_ofa_capture_event_payload(event, response);
}
}
@@ -3389,7 +3756,7 @@ static int pqi_alloc_admin_queues(struct pqi_ctrl_info *ctrl_info)
return 0;
}
-#define PQI_ADMIN_QUEUE_CREATE_TIMEOUT_JIFFIES HZ
+#define PQI_ADMIN_QUEUE_CREATE_TIMEOUT_JIFFIES PQI_HZ
#define PQI_ADMIN_QUEUE_CREATE_POLL_INTERVAL_MSECS 1
static int pqi_create_admin_queues(struct pqi_ctrl_info *ctrl_info)
@@ -3482,7 +3849,7 @@ static int pqi_poll_for_admin_response(struct pqi_ctrl_info *ctrl_info,
admin_queues = &ctrl_info->admin_queues;
oq_ci = admin_queues->oq_ci_copy;
- timeout = (PQI_ADMIN_REQUEST_TIMEOUT_SECS * HZ) + jiffies;
+ timeout = (PQI_ADMIN_REQUEST_TIMEOUT_SECS * PQI_HZ) + jiffies;
while (1) {
oq_pi = readl(admin_queues->oq_pi);
@@ -3597,7 +3964,7 @@ static int pqi_wait_for_completion_io(struct pqi_ctrl_info *ctrl_info,
while (1) {
if (wait_for_completion_io_timeout(wait,
- PQI_WAIT_FOR_COMPLETION_IO_TIMEOUT_SECS * HZ)) {
+ PQI_WAIT_FOR_COMPLETION_IO_TIMEOUT_SECS * PQI_HZ)) {
rc = 0;
break;
}
@@ -4927,7 +5294,17 @@ void pqi_prep_for_scsi_done(struct scsi_cmnd *scmd)
{
struct pqi_scsi_dev *device;
+ if (!scmd->device) {
+ set_host_byte(scmd, DID_NO_CONNECT);
+ return;
+ }
+
device = scmd->device->hostdata;
+ if (!device) {
+ set_host_byte(scmd, DID_NO_CONNECT);
+ return;
+ }
+
atomic_dec(&device->scsi_cmds_outstanding);
}
@@ -4944,16 +5321,24 @@ static int pqi_scsi_queue_command(struct Scsi_Host *shost,
device = scmd->device->hostdata;
ctrl_info = shost_to_hba(shost);
+ if (!device) {
+ set_host_byte(scmd, DID_NO_CONNECT);
+ pqi_scsi_done(scmd);
+ return 0;
+ }
+
atomic_inc(&device->scsi_cmds_outstanding);
- if (pqi_ctrl_offline(ctrl_info)) {
+ if (pqi_ctrl_offline(ctrl_info) || pqi_device_in_remove(ctrl_info,
+ device)) {
set_host_byte(scmd, DID_NO_CONNECT);
pqi_scsi_done(scmd);
return 0;
}
pqi_ctrl_busy(ctrl_info);
- if (pqi_ctrl_blocked(ctrl_info) || pqi_device_in_reset(device)) {
+ if (pqi_ctrl_blocked(ctrl_info) || pqi_device_in_reset(device) ||
+ pqi_ctrl_in_ofa(ctrl_info)) {
rc = SCSI_MLQUEUE_HOST_BUSY;
goto out;
}
@@ -5098,25 +5483,75 @@ static void pqi_fail_io_queued_for_device(struct pqi_ctrl_info *ctrl_info,
}
}
+static void pqi_fail_io_queued_for_all_devices(struct pqi_ctrl_info *ctrl_info)
+{
+ unsigned int i;
+ unsigned int path;
+ struct pqi_queue_group *queue_group;
+ unsigned long flags;
+ struct pqi_io_request *io_request;
+ struct pqi_io_request *next;
+ struct scsi_cmnd *scmd;
+
+ for (i = 0; i < ctrl_info->num_queue_groups; i++) {
+ queue_group = &ctrl_info->queue_groups[i];
+
+ for (path = 0; path < 2; path++) {
+ spin_lock_irqsave(&queue_group->submit_lock[path],
+ flags);
+
+ list_for_each_entry_safe(io_request, next,
+ &queue_group->request_list[path],
+ request_list_entry) {
+
+ scmd = io_request->scmd;
+ if (!scmd)
+ continue;
+
+ list_del(&io_request->request_list_entry);
+ set_host_byte(scmd, DID_RESET);
+ pqi_scsi_done(scmd);
+ }
+
+ spin_unlock_irqrestore(
+ &queue_group->submit_lock[path], flags);
+ }
+ }
+}
+
static int pqi_device_wait_for_pending_io(struct pqi_ctrl_info *ctrl_info,
- struct pqi_scsi_dev *device)
+ struct pqi_scsi_dev *device, unsigned long timeout_secs)
{
+ unsigned long timeout;
+
+ timeout = (timeout_secs * PQI_HZ) + jiffies;
+
while (atomic_read(&device->scsi_cmds_outstanding)) {
pqi_check_ctrl_health(ctrl_info);
if (pqi_ctrl_offline(ctrl_info))
return -ENXIO;
+ if (timeout_secs != NO_TIMEOUT) {
+ if (time_after(jiffies, timeout)) {
+ dev_err(&ctrl_info->pci_dev->dev,
+ "timed out waiting for pending IO\n");
+ return -ETIMEDOUT;
+ }
+ }
usleep_range(1000, 2000);
}
return 0;
}
-static int pqi_ctrl_wait_for_pending_io(struct pqi_ctrl_info *ctrl_info)
+static int pqi_ctrl_wait_for_pending_io(struct pqi_ctrl_info *ctrl_info,
+ unsigned long timeout_secs)
{
bool io_pending;
unsigned long flags;
+ unsigned long timeout;
struct pqi_scsi_dev *device;
+ timeout = (timeout_secs * PQI_HZ) + jiffies;
while (1) {
io_pending = false;
@@ -5138,6 +5573,13 @@ static int pqi_ctrl_wait_for_pending_io(struct pqi_ctrl_info *ctrl_info)
if (pqi_ctrl_offline(ctrl_info))
return -ENXIO;
+ if (timeout_secs != NO_TIMEOUT) {
+ if (time_after(jiffies, timeout)) {
+ dev_err(&ctrl_info->pci_dev->dev,
+ "timed out waiting for pending IO\n");
+ return -ETIMEDOUT;
+ }
+ }
usleep_range(1000, 2000);
}
@@ -5161,7 +5603,7 @@ static int pqi_wait_for_lun_reset_completion(struct pqi_ctrl_info *ctrl_info,
while (1) {
if (wait_for_completion_io_timeout(wait,
- PQI_LUN_RESET_TIMEOUT_SECS * HZ)) {
+ PQI_LUN_RESET_TIMEOUT_SECS * PQI_HZ)) {
rc = 0;
break;
}
@@ -5212,20 +5654,56 @@ static int pqi_lun_reset(struct pqi_ctrl_info *ctrl_info,
return rc;
}
+#define PQI_LUN_RESET_RETRIES 3
+#define PQI_LUN_RESET_RETRY_INTERVAL_MSECS 10000
/* Performs a reset at the LUN level. */
-static int pqi_device_reset(struct pqi_ctrl_info *ctrl_info,
+static int _pqi_device_reset(struct pqi_ctrl_info *ctrl_info,
struct pqi_scsi_dev *device)
{
int rc;
+ unsigned int retries;
+ unsigned long timeout_secs;
- rc = pqi_lun_reset(ctrl_info, device);
- if (rc == 0)
- rc = pqi_device_wait_for_pending_io(ctrl_info, device);
+ for (retries = 0;;) {
+ rc = pqi_lun_reset(ctrl_info, device);
+ if (rc != -EAGAIN ||
+ ++retries > PQI_LUN_RESET_RETRIES)
+ break;
+ msleep(PQI_LUN_RESET_RETRY_INTERVAL_MSECS);
+ }
+ timeout_secs = rc ? PQI_LUN_RESET_TIMEOUT_SECS : NO_TIMEOUT;
+
+ rc |= pqi_device_wait_for_pending_io(ctrl_info, device, timeout_secs);
return rc == 0 ? SUCCESS : FAILED;
}
+static int pqi_device_reset(struct pqi_ctrl_info *ctrl_info,
+ struct pqi_scsi_dev *device)
+{
+ int rc;
+
+ mutex_lock(&ctrl_info->lun_reset_mutex);
+
+ pqi_ctrl_block_requests(ctrl_info);
+ pqi_ctrl_wait_until_quiesced(ctrl_info);
+ pqi_fail_io_queued_for_device(ctrl_info, device);
+ rc = pqi_wait_until_inbound_queues_empty(ctrl_info);
+ pqi_device_reset_start(device);
+ pqi_ctrl_unblock_requests(ctrl_info);
+
+ if (rc)
+ rc = FAILED;
+ else
+ rc = _pqi_device_reset(ctrl_info, device);
+
+ pqi_device_reset_done(device);
+
+ mutex_unlock(&ctrl_info->lun_reset_mutex);
+ return rc;
+}
+
static int pqi_eh_device_reset_handler(struct scsi_cmnd *scmd)
{
int rc;
@@ -5243,28 +5721,16 @@ static int pqi_eh_device_reset_handler(struct scsi_cmnd *scmd)
pqi_check_ctrl_health(ctrl_info);
if (pqi_ctrl_offline(ctrl_info)) {
+ dev_err(&ctrl_info->pci_dev->dev,
+ "controller %u offlined - cannot send device reset\n",
+ ctrl_info->ctrl_id);
rc = FAILED;
goto out;
}
- mutex_lock(&ctrl_info->lun_reset_mutex);
-
- pqi_ctrl_block_requests(ctrl_info);
- pqi_ctrl_wait_until_quiesced(ctrl_info);
- pqi_fail_io_queued_for_device(ctrl_info, device);
- rc = pqi_wait_until_inbound_queues_empty(ctrl_info);
- pqi_device_reset_start(device);
- pqi_ctrl_unblock_requests(ctrl_info);
-
- if (rc)
- rc = FAILED;
- else
- rc = pqi_device_reset(ctrl_info, device);
-
- pqi_device_reset_done(device);
-
- mutex_unlock(&ctrl_info->lun_reset_mutex);
+ pqi_wait_until_ofa_finished(ctrl_info);
+ rc = pqi_device_reset(ctrl_info, device);
out:
dev_err(&ctrl_info->pci_dev->dev,
"reset of scsi %d:%d:%d:%d: %s\n",
@@ -5308,6 +5774,10 @@ static int pqi_slave_alloc(struct scsi_device *sdev)
scsi_change_queue_depth(sdev,
device->advertised_queue_depth);
}
+ if (pqi_is_logical_device(device))
+ pqi_disable_write_same(sdev);
+ else
+ sdev->allow_restart = 1;
}
spin_unlock_irqrestore(&ctrl_info->scsi_device_list_lock, flags);
@@ -5319,7 +5789,8 @@ static int pqi_map_queues(struct Scsi_Host *shost)
{
struct pqi_ctrl_info *ctrl_info = shost_to_hba(shost);
- return blk_mq_pci_map_queues(&shost->tag_set, ctrl_info->pci_dev, 0);
+ return blk_mq_pci_map_queues(&shost->tag_set.map[0],
+ ctrl_info->pci_dev, 0);
}
static int pqi_getpciinfo_ioctl(struct pqi_ctrl_info *ctrl_info,
@@ -5579,6 +6050,9 @@ static int pqi_ioctl(struct scsi_device *sdev, int cmd, void __user *arg)
ctrl_info = shost_to_hba(sdev->host);
+ if (pqi_ctrl_in_ofa(ctrl_info))
+ return -EBUSY;
+
switch (cmd) {
case CCISS_DEREGDISK:
case CCISS_REGNEWDISK:
@@ -5683,6 +6157,150 @@ static struct device_attribute *pqi_shost_attrs[] = {
NULL
};
+static ssize_t pqi_unique_id_show(struct device *dev,
+ struct device_attribute *attr, char *buffer)
+{
+ struct pqi_ctrl_info *ctrl_info;
+ struct scsi_device *sdev;
+ struct pqi_scsi_dev *device;
+ unsigned long flags;
+ unsigned char uid[16];
+
+ sdev = to_scsi_device(dev);
+ ctrl_info = shost_to_hba(sdev->host);
+
+ spin_lock_irqsave(&ctrl_info->scsi_device_list_lock, flags);
+
+ device = sdev->hostdata;
+ if (!device) {
+ spin_unlock_irqrestore(&ctrl_info->scsi_device_list_lock,
+ flags);
+ return -ENODEV;
+ }
+ memcpy(uid, device->unique_id, sizeof(uid));
+
+ spin_unlock_irqrestore(&ctrl_info->scsi_device_list_lock, flags);
+
+ return snprintf(buffer, PAGE_SIZE,
+ "%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X\n",
+ uid[0], uid[1], uid[2], uid[3],
+ uid[4], uid[5], uid[6], uid[7],
+ uid[8], uid[9], uid[10], uid[11],
+ uid[12], uid[13], uid[14], uid[15]);
+}
+
+static ssize_t pqi_lunid_show(struct device *dev,
+ struct device_attribute *attr, char *buffer)
+{
+ struct pqi_ctrl_info *ctrl_info;
+ struct scsi_device *sdev;
+ struct pqi_scsi_dev *device;
+ unsigned long flags;
+ u8 lunid[8];
+
+ sdev = to_scsi_device(dev);
+ ctrl_info = shost_to_hba(sdev->host);
+
+ spin_lock_irqsave(&ctrl_info->scsi_device_list_lock, flags);
+
+ device = sdev->hostdata;
+ if (!device) {
+ spin_unlock_irqrestore(&ctrl_info->scsi_device_list_lock,
+ flags);
+ return -ENODEV;
+ }
+ memcpy(lunid, device->scsi3addr, sizeof(lunid));
+
+ spin_unlock_irqrestore(&ctrl_info->scsi_device_list_lock, flags);
+
+ return snprintf(buffer, PAGE_SIZE, "0x%8phN\n", lunid);
+}
+
+#define MAX_PATHS 8
+static ssize_t pqi_path_info_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct pqi_ctrl_info *ctrl_info;
+ struct scsi_device *sdev;
+ struct pqi_scsi_dev *device;
+ unsigned long flags;
+ int i;
+ int output_len = 0;
+ u8 box;
+ u8 bay;
+ u8 path_map_index = 0;
+ char *active;
+ unsigned char phys_connector[2];
+
+ sdev = to_scsi_device(dev);
+ ctrl_info = shost_to_hba(sdev->host);
+
+ spin_lock_irqsave(&ctrl_info->scsi_device_list_lock, flags);
+
+ device = sdev->hostdata;
+ if (!device) {
+ spin_unlock_irqrestore(&ctrl_info->scsi_device_list_lock,
+ flags);
+ return -ENODEV;
+ }
+
+ bay = device->bay;
+ for (i = 0; i < MAX_PATHS; i++) {
+ path_map_index = 1<<i;
+ if (i == device->active_path_index)
+ active = "Active";
+ else if (device->path_map & path_map_index)
+ active = "Inactive";
+ else
+ continue;
+
+ output_len += scnprintf(buf + output_len,
+ PAGE_SIZE - output_len,
+ "[%d:%d:%d:%d] %20.20s ",
+ ctrl_info->scsi_host->host_no,
+ device->bus, device->target,
+ device->lun,
+ scsi_device_type(device->devtype));
+
+ if (device->devtype == TYPE_RAID ||
+ pqi_is_logical_device(device))
+ goto end_buffer;
+
+ memcpy(&phys_connector, &device->phys_connector[i],
+ sizeof(phys_connector));
+ if (phys_connector[0] < '0')
+ phys_connector[0] = '0';
+ if (phys_connector[1] < '0')
+ phys_connector[1] = '0';
+
+ output_len += scnprintf(buf + output_len,
+ PAGE_SIZE - output_len,
+ "PORT: %.2s ", phys_connector);
+
+ box = device->box[i];
+ if (box != 0 && box != 0xFF)
+ output_len += scnprintf(buf + output_len,
+ PAGE_SIZE - output_len,
+ "BOX: %hhu ", box);
+
+ if ((device->devtype == TYPE_DISK ||
+ device->devtype == TYPE_ZBC) &&
+ pqi_expose_device(device))
+ output_len += scnprintf(buf + output_len,
+ PAGE_SIZE - output_len,
+ "BAY: %hhu ", bay);
+
+end_buffer:
+ output_len += scnprintf(buf + output_len,
+ PAGE_SIZE - output_len,
+ "%s\n", active);
+ }
+
+ spin_unlock_irqrestore(&ctrl_info->scsi_device_list_lock, flags);
+ return output_len;
+}
+
+
static ssize_t pqi_sas_address_show(struct device *dev,
struct device_attribute *attr, char *buffer)
{
@@ -5759,12 +6377,18 @@ static ssize_t pqi_raid_level_show(struct device *dev,
return snprintf(buffer, PAGE_SIZE, "%s\n", raid_level);
}
+static DEVICE_ATTR(lunid, 0444, pqi_lunid_show, NULL);
+static DEVICE_ATTR(unique_id, 0444, pqi_unique_id_show, NULL);
+static DEVICE_ATTR(path_info, 0444, pqi_path_info_show, NULL);
static DEVICE_ATTR(sas_address, 0444, pqi_sas_address_show, NULL);
static DEVICE_ATTR(ssd_smart_path_enabled, 0444,
pqi_ssd_smart_path_enabled_show, NULL);
static DEVICE_ATTR(raid_level, 0444, pqi_raid_level_show, NULL);
static struct device_attribute *pqi_sdev_attrs[] = {
+ &dev_attr_lunid,
+ &dev_attr_unique_id,
+ &dev_attr_path_info,
&dev_attr_sas_address,
&dev_attr_ssd_smart_path_enabled,
&dev_attr_raid_level,
@@ -5779,7 +6403,6 @@ static struct scsi_host_template pqi_driver_template = {
.scan_start = pqi_scan_start,
.scan_finished = pqi_scan_finished,
.this_id = -1,
- .use_clustering = ENABLE_CLUSTERING,
.eh_device_reset_handler = pqi_eh_device_reset_handler,
.ioctl = pqi_ioctl,
.slave_alloc = pqi_slave_alloc,
@@ -5947,6 +6570,244 @@ out:
return rc;
}
+struct pqi_config_table_section_info {
+ struct pqi_ctrl_info *ctrl_info;
+ void *section;
+ u32 section_offset;
+ void __iomem *section_iomem_addr;
+};
+
+static inline bool pqi_is_firmware_feature_supported(
+ struct pqi_config_table_firmware_features *firmware_features,
+ unsigned int bit_position)
+{
+ unsigned int byte_index;
+
+ byte_index = bit_position / BITS_PER_BYTE;
+
+ if (byte_index >= le16_to_cpu(firmware_features->num_elements))
+ return false;
+
+ return firmware_features->features_supported[byte_index] &
+ (1 << (bit_position % BITS_PER_BYTE)) ? true : false;
+}
+
+static inline bool pqi_is_firmware_feature_enabled(
+ struct pqi_config_table_firmware_features *firmware_features,
+ void __iomem *firmware_features_iomem_addr,
+ unsigned int bit_position)
+{
+ unsigned int byte_index;
+ u8 __iomem *features_enabled_iomem_addr;
+
+ byte_index = (bit_position / BITS_PER_BYTE) +
+ (le16_to_cpu(firmware_features->num_elements) * 2);
+
+ features_enabled_iomem_addr = firmware_features_iomem_addr +
+ offsetof(struct pqi_config_table_firmware_features,
+ features_supported) + byte_index;
+
+ return *((__force u8 *)features_enabled_iomem_addr) &
+ (1 << (bit_position % BITS_PER_BYTE)) ? true : false;
+}
+
+static inline void pqi_request_firmware_feature(
+ struct pqi_config_table_firmware_features *firmware_features,
+ unsigned int bit_position)
+{
+ unsigned int byte_index;
+
+ byte_index = (bit_position / BITS_PER_BYTE) +
+ le16_to_cpu(firmware_features->num_elements);
+
+ firmware_features->features_supported[byte_index] |=
+ (1 << (bit_position % BITS_PER_BYTE));
+}
+
+static int pqi_config_table_update(struct pqi_ctrl_info *ctrl_info,
+ u16 first_section, u16 last_section)
+{
+ struct pqi_vendor_general_request request;
+
+ memset(&request, 0, sizeof(request));
+
+ request.header.iu_type = PQI_REQUEST_IU_VENDOR_GENERAL;
+ put_unaligned_le16(sizeof(request) - PQI_REQUEST_HEADER_LENGTH,
+ &request.header.iu_length);
+ put_unaligned_le16(PQI_VENDOR_GENERAL_CONFIG_TABLE_UPDATE,
+ &request.function_code);
+ put_unaligned_le16(first_section,
+ &request.data.config_table_update.first_section);
+ put_unaligned_le16(last_section,
+ &request.data.config_table_update.last_section);
+
+ return pqi_submit_raid_request_synchronous(ctrl_info, &request.header,
+ 0, NULL, NO_TIMEOUT);
+}
+
+static int pqi_enable_firmware_features(struct pqi_ctrl_info *ctrl_info,
+ struct pqi_config_table_firmware_features *firmware_features,
+ void __iomem *firmware_features_iomem_addr)
+{
+ void *features_requested;
+ void __iomem *features_requested_iomem_addr;
+
+ features_requested = firmware_features->features_supported +
+ le16_to_cpu(firmware_features->num_elements);
+
+ features_requested_iomem_addr = firmware_features_iomem_addr +
+ (features_requested - (void *)firmware_features);
+
+ memcpy_toio(features_requested_iomem_addr, features_requested,
+ le16_to_cpu(firmware_features->num_elements));
+
+ return pqi_config_table_update(ctrl_info,
+ PQI_CONFIG_TABLE_SECTION_FIRMWARE_FEATURES,
+ PQI_CONFIG_TABLE_SECTION_FIRMWARE_FEATURES);
+}
+
+struct pqi_firmware_feature {
+ char *feature_name;
+ unsigned int feature_bit;
+ bool supported;
+ bool enabled;
+ void (*feature_status)(struct pqi_ctrl_info *ctrl_info,
+ struct pqi_firmware_feature *firmware_feature);
+};
+
+static void pqi_firmware_feature_status(struct pqi_ctrl_info *ctrl_info,
+ struct pqi_firmware_feature *firmware_feature)
+{
+ if (!firmware_feature->supported) {
+ dev_info(&ctrl_info->pci_dev->dev, "%s not supported by controller\n",
+ firmware_feature->feature_name);
+ return;
+ }
+
+ if (firmware_feature->enabled) {
+ dev_info(&ctrl_info->pci_dev->dev,
+ "%s enabled\n", firmware_feature->feature_name);
+ return;
+ }
+
+ dev_err(&ctrl_info->pci_dev->dev, "failed to enable %s\n",
+ firmware_feature->feature_name);
+}
+
+static inline void pqi_firmware_feature_update(struct pqi_ctrl_info *ctrl_info,
+ struct pqi_firmware_feature *firmware_feature)
+{
+ if (firmware_feature->feature_status)
+ firmware_feature->feature_status(ctrl_info, firmware_feature);
+}
+
+static DEFINE_MUTEX(pqi_firmware_features_mutex);
+
+static struct pqi_firmware_feature pqi_firmware_features[] = {
+ {
+ .feature_name = "Online Firmware Activation",
+ .feature_bit = PQI_FIRMWARE_FEATURE_OFA,
+ .feature_status = pqi_firmware_feature_status,
+ },
+ {
+ .feature_name = "Serial Management Protocol",
+ .feature_bit = PQI_FIRMWARE_FEATURE_SMP,
+ .feature_status = pqi_firmware_feature_status,
+ },
+ {
+ .feature_name = "New Soft Reset Handshake",
+ .feature_bit = PQI_FIRMWARE_FEATURE_SOFT_RESET_HANDSHAKE,
+ .feature_status = pqi_firmware_feature_status,
+ },
+};
+
+static void pqi_process_firmware_features(
+ struct pqi_config_table_section_info *section_info)
+{
+ int rc;
+ struct pqi_ctrl_info *ctrl_info;
+ struct pqi_config_table_firmware_features *firmware_features;
+ void __iomem *firmware_features_iomem_addr;
+ unsigned int i;
+ unsigned int num_features_supported;
+
+ ctrl_info = section_info->ctrl_info;
+ firmware_features = section_info->section;
+ firmware_features_iomem_addr = section_info->section_iomem_addr;
+
+ for (i = 0, num_features_supported = 0;
+ i < ARRAY_SIZE(pqi_firmware_features); i++) {
+ if (pqi_is_firmware_feature_supported(firmware_features,
+ pqi_firmware_features[i].feature_bit)) {
+ pqi_firmware_features[i].supported = true;
+ num_features_supported++;
+ } else {
+ pqi_firmware_feature_update(ctrl_info,
+ &pqi_firmware_features[i]);
+ }
+ }
+
+ if (num_features_supported == 0)
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(pqi_firmware_features); i++) {
+ if (!pqi_firmware_features[i].supported)
+ continue;
+ pqi_request_firmware_feature(firmware_features,
+ pqi_firmware_features[i].feature_bit);
+ }
+
+ rc = pqi_enable_firmware_features(ctrl_info, firmware_features,
+ firmware_features_iomem_addr);
+ if (rc) {
+ dev_err(&ctrl_info->pci_dev->dev,
+ "failed to enable firmware features in PQI configuration table\n");
+ for (i = 0; i < ARRAY_SIZE(pqi_firmware_features); i++) {
+ if (!pqi_firmware_features[i].supported)
+ continue;
+ pqi_firmware_feature_update(ctrl_info,
+ &pqi_firmware_features[i]);
+ }
+ return;
+ }
+
+ ctrl_info->soft_reset_handshake_supported = false;
+ for (i = 0; i < ARRAY_SIZE(pqi_firmware_features); i++) {
+ if (!pqi_firmware_features[i].supported)
+ continue;
+ if (pqi_is_firmware_feature_enabled(firmware_features,
+ firmware_features_iomem_addr,
+ pqi_firmware_features[i].feature_bit)) {
+ pqi_firmware_features[i].enabled = true;
+ if (pqi_firmware_features[i].feature_bit ==
+ PQI_FIRMWARE_FEATURE_SOFT_RESET_HANDSHAKE)
+ ctrl_info->soft_reset_handshake_supported =
+ true;
+ }
+ pqi_firmware_feature_update(ctrl_info,
+ &pqi_firmware_features[i]);
+ }
+}
+
+static void pqi_init_firmware_features(void)
+{
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(pqi_firmware_features); i++) {
+ pqi_firmware_features[i].supported = false;
+ pqi_firmware_features[i].enabled = false;
+ }
+}
+
+static void pqi_process_firmware_features_section(
+ struct pqi_config_table_section_info *section_info)
+{
+ mutex_lock(&pqi_firmware_features_mutex);
+ pqi_init_firmware_features();
+ pqi_process_firmware_features(section_info);
+ mutex_unlock(&pqi_firmware_features_mutex);
+}
+
static int pqi_process_config_table(struct pqi_ctrl_info *ctrl_info)
{
u32 table_length;
@@ -5954,8 +6815,11 @@ static int pqi_process_config_table(struct pqi_ctrl_info *ctrl_info)
void __iomem *table_iomem_addr;
struct pqi_config_table *config_table;
struct pqi_config_table_section_header *section;
+ struct pqi_config_table_section_info section_info;
table_length = ctrl_info->config_table_length;
+ if (table_length == 0)
+ return 0;
config_table = kmalloc(table_length, GFP_KERNEL);
if (!config_table) {
@@ -5972,13 +6836,22 @@ static int pqi_process_config_table(struct pqi_ctrl_info *ctrl_info)
ctrl_info->config_table_offset;
memcpy_fromio(config_table, table_iomem_addr, table_length);
+ section_info.ctrl_info = ctrl_info;
section_offset =
get_unaligned_le32(&config_table->first_section_offset);
while (section_offset) {
section = (void *)config_table + section_offset;
+ section_info.section = section;
+ section_info.section_offset = section_offset;
+ section_info.section_iomem_addr =
+ table_iomem_addr + section_offset;
+
switch (get_unaligned_le16(&section->section_id)) {
+ case PQI_CONFIG_TABLE_SECTION_FIRMWARE_FEATURES:
+ pqi_process_firmware_features_section(&section_info);
+ break;
case PQI_CONFIG_TABLE_SECTION_HEARTBEAT:
if (pqi_disable_heartbeat)
dev_warn(&ctrl_info->pci_dev->dev,
@@ -5991,6 +6864,13 @@ static int pqi_process_config_table(struct pqi_ctrl_info *ctrl_info)
struct pqi_config_table_heartbeat,
heartbeat_counter);
break;
+ case PQI_CONFIG_TABLE_SECTION_SOFT_RESET:
+ ctrl_info->soft_reset_status =
+ table_iomem_addr +
+ section_offset +
+ offsetof(struct pqi_config_table_soft_reset,
+ soft_reset_status);
+ break;
}
section_offset =
@@ -6123,10 +7003,6 @@ static int pqi_ctrl_init(struct pqi_ctrl_info *ctrl_info)
ctrl_info->pqi_mode_enabled = true;
pqi_save_ctrl_mode(ctrl_info, PQI_MODE);
- rc = pqi_process_config_table(ctrl_info);
- if (rc)
- return rc;
-
rc = pqi_alloc_admin_queues(ctrl_info);
if (rc) {
dev_err(&ctrl_info->pci_dev->dev,
@@ -6188,6 +7064,11 @@ static int pqi_ctrl_init(struct pqi_ctrl_info *ctrl_info)
pqi_change_irq_mode(ctrl_info, IRQ_MODE_MSIX);
ctrl_info->controller_online = true;
+
+ rc = pqi_process_config_table(ctrl_info);
+ if (rc)
+ return rc;
+
pqi_start_heartbeat_timer(ctrl_info);
rc = pqi_enable_events(ctrl_info);
@@ -6209,6 +7090,13 @@ static int pqi_ctrl_init(struct pqi_ctrl_info *ctrl_info)
return rc;
}
+ rc = pqi_set_diag_rescan(ctrl_info);
+ if (rc) {
+ dev_err(&ctrl_info->pci_dev->dev,
+ "error enabling multi-lun rescan\n");
+ return rc;
+ }
+
rc = pqi_write_driver_version_to_host_wellness(ctrl_info);
if (rc) {
dev_err(&ctrl_info->pci_dev->dev,
@@ -6266,6 +7154,24 @@ static int pqi_ctrl_init_resume(struct pqi_ctrl_info *ctrl_info)
return rc;
/*
+ * Get the controller properties. This allows us to determine
+ * whether or not it supports PQI mode.
+ */
+ rc = sis_get_ctrl_properties(ctrl_info);
+ if (rc) {
+ dev_err(&ctrl_info->pci_dev->dev,
+ "error obtaining controller properties\n");
+ return rc;
+ }
+
+ rc = sis_get_pqi_capabilities(ctrl_info);
+ if (rc) {
+ dev_err(&ctrl_info->pci_dev->dev,
+ "error obtaining controller capabilities\n");
+ return rc;
+ }
+
+ /*
* If the function we are about to call succeeds, the
* controller will transition from legacy SIS mode
* into PQI mode.
@@ -6305,9 +7211,14 @@ static int pqi_ctrl_init_resume(struct pqi_ctrl_info *ctrl_info)
pqi_change_irq_mode(ctrl_info, IRQ_MODE_MSIX);
ctrl_info->controller_online = true;
- pqi_start_heartbeat_timer(ctrl_info);
pqi_ctrl_unblock_requests(ctrl_info);
+ rc = pqi_process_config_table(ctrl_info);
+ if (rc)
+ return rc;
+
+ pqi_start_heartbeat_timer(ctrl_info);
+
rc = pqi_enable_events(ctrl_info);
if (rc) {
dev_err(&ctrl_info->pci_dev->dev,
@@ -6315,6 +7226,20 @@ static int pqi_ctrl_init_resume(struct pqi_ctrl_info *ctrl_info)
return rc;
}
+ rc = pqi_get_ctrl_firmware_version(ctrl_info);
+ if (rc) {
+ dev_err(&ctrl_info->pci_dev->dev,
+ "error obtaining firmware version\n");
+ return rc;
+ }
+
+ rc = pqi_set_diag_rescan(ctrl_info);
+ if (rc) {
+ dev_err(&ctrl_info->pci_dev->dev,
+ "error enabling multi-lun rescan\n");
+ return rc;
+ }
+
rc = pqi_write_driver_version_to_host_wellness(ctrl_info);
if (rc) {
dev_err(&ctrl_info->pci_dev->dev,
@@ -6425,6 +7350,7 @@ static struct pqi_ctrl_info *pqi_alloc_ctrl_info(int numa_node)
mutex_init(&ctrl_info->scan_mutex);
mutex_init(&ctrl_info->lun_reset_mutex);
+ mutex_init(&ctrl_info->ofa_mutex);
INIT_LIST_HEAD(&ctrl_info->scsi_device_list);
spin_lock_init(&ctrl_info->scsi_device_list_lock);
@@ -6501,6 +7427,217 @@ static void pqi_remove_ctrl(struct pqi_ctrl_info *ctrl_info)
pqi_free_ctrl_resources(ctrl_info);
}
+static void pqi_ofa_ctrl_quiesce(struct pqi_ctrl_info *ctrl_info)
+{
+ pqi_cancel_update_time_worker(ctrl_info);
+ pqi_cancel_rescan_worker(ctrl_info);
+ pqi_wait_until_lun_reset_finished(ctrl_info);
+ pqi_wait_until_scan_finished(ctrl_info);
+ pqi_ctrl_ofa_start(ctrl_info);
+ pqi_ctrl_block_requests(ctrl_info);
+ pqi_ctrl_wait_until_quiesced(ctrl_info);
+ pqi_ctrl_wait_for_pending_io(ctrl_info, PQI_PENDING_IO_TIMEOUT_SECS);
+ pqi_fail_io_queued_for_all_devices(ctrl_info);
+ pqi_wait_until_inbound_queues_empty(ctrl_info);
+ pqi_stop_heartbeat_timer(ctrl_info);
+ ctrl_info->pqi_mode_enabled = false;
+ pqi_save_ctrl_mode(ctrl_info, SIS_MODE);
+}
+
+static void pqi_ofa_ctrl_unquiesce(struct pqi_ctrl_info *ctrl_info)
+{
+ pqi_ofa_free_host_buffer(ctrl_info);
+ ctrl_info->pqi_mode_enabled = true;
+ pqi_save_ctrl_mode(ctrl_info, PQI_MODE);
+ ctrl_info->controller_online = true;
+ pqi_ctrl_unblock_requests(ctrl_info);
+ pqi_start_heartbeat_timer(ctrl_info);
+ pqi_schedule_update_time_worker(ctrl_info);
+ pqi_clear_soft_reset_status(ctrl_info,
+ PQI_SOFT_RESET_ABORT);
+ pqi_scan_scsi_devices(ctrl_info);
+}
+
+static int pqi_ofa_alloc_mem(struct pqi_ctrl_info *ctrl_info,
+ u32 total_size, u32 chunk_size)
+{
+ u32 sg_count;
+ u32 size;
+ int i;
+ struct pqi_sg_descriptor *mem_descriptor = NULL;
+ struct device *dev;
+ struct pqi_ofa_memory *ofap;
+
+ dev = &ctrl_info->pci_dev->dev;
+
+ sg_count = (total_size + chunk_size - 1);
+ sg_count /= chunk_size;
+
+ ofap = ctrl_info->pqi_ofa_mem_virt_addr;
+
+ if (sg_count*chunk_size < total_size)
+ goto out;
+
+ ctrl_info->pqi_ofa_chunk_virt_addr =
+ kcalloc(sg_count, sizeof(void *), GFP_KERNEL);
+ if (!ctrl_info->pqi_ofa_chunk_virt_addr)
+ goto out;
+
+ for (size = 0, i = 0; size < total_size; size += chunk_size, i++) {
+ dma_addr_t dma_handle;
+
+ ctrl_info->pqi_ofa_chunk_virt_addr[i] =
+ dma_zalloc_coherent(dev, chunk_size, &dma_handle,
+ GFP_KERNEL);
+
+ if (!ctrl_info->pqi_ofa_chunk_virt_addr[i])
+ break;
+
+ mem_descriptor = &ofap->sg_descriptor[i];
+ put_unaligned_le64 ((u64) dma_handle, &mem_descriptor->address);
+ put_unaligned_le32 (chunk_size, &mem_descriptor->length);
+ }
+
+ if (!size || size < total_size)
+ goto out_free_chunks;
+
+ put_unaligned_le32(CISS_SG_LAST, &mem_descriptor->flags);
+ put_unaligned_le16(sg_count, &ofap->num_memory_descriptors);
+ put_unaligned_le32(size, &ofap->bytes_allocated);
+
+ return 0;
+
+out_free_chunks:
+ while (--i >= 0) {
+ mem_descriptor = &ofap->sg_descriptor[i];
+ dma_free_coherent(dev, chunk_size,
+ ctrl_info->pqi_ofa_chunk_virt_addr[i],
+ get_unaligned_le64(&mem_descriptor->address));
+ }
+ kfree(ctrl_info->pqi_ofa_chunk_virt_addr);
+
+out:
+ put_unaligned_le32 (0, &ofap->bytes_allocated);
+ return -ENOMEM;
+}
+
+static int pqi_ofa_alloc_host_buffer(struct pqi_ctrl_info *ctrl_info)
+{
+ u32 total_size;
+ u32 min_chunk_size;
+ u32 chunk_sz;
+
+ total_size = le32_to_cpu(
+ ctrl_info->pqi_ofa_mem_virt_addr->bytes_allocated);
+ min_chunk_size = total_size / PQI_OFA_MAX_SG_DESCRIPTORS;
+
+ for (chunk_sz = total_size; chunk_sz >= min_chunk_size; chunk_sz /= 2)
+ if (!pqi_ofa_alloc_mem(ctrl_info, total_size, chunk_sz))
+ return 0;
+
+ return -ENOMEM;
+}
+
+static void pqi_ofa_setup_host_buffer(struct pqi_ctrl_info *ctrl_info,
+ u32 bytes_requested)
+{
+ struct pqi_ofa_memory *pqi_ofa_memory;
+ struct device *dev;
+
+ dev = &ctrl_info->pci_dev->dev;
+ pqi_ofa_memory = dma_zalloc_coherent(dev,
+ PQI_OFA_MEMORY_DESCRIPTOR_LENGTH,
+ &ctrl_info->pqi_ofa_mem_dma_handle,
+ GFP_KERNEL);
+
+ if (!pqi_ofa_memory)
+ return;
+
+ put_unaligned_le16(PQI_OFA_VERSION, &pqi_ofa_memory->version);
+ memcpy(&pqi_ofa_memory->signature, PQI_OFA_SIGNATURE,
+ sizeof(pqi_ofa_memory->signature));
+ pqi_ofa_memory->bytes_allocated = cpu_to_le32(bytes_requested);
+
+ ctrl_info->pqi_ofa_mem_virt_addr = pqi_ofa_memory;
+
+ if (pqi_ofa_alloc_host_buffer(ctrl_info) < 0) {
+ dev_err(dev, "Failed to allocate host buffer of size = %u",
+ bytes_requested);
+ }
+}
+
+static void pqi_ofa_free_host_buffer(struct pqi_ctrl_info *ctrl_info)
+{
+ int i;
+ struct pqi_sg_descriptor *mem_descriptor;
+ struct pqi_ofa_memory *ofap;
+
+ ofap = ctrl_info->pqi_ofa_mem_virt_addr;
+
+ if (!ofap)
+ return;
+
+ if (!ofap->bytes_allocated)
+ goto out;
+
+ mem_descriptor = ofap->sg_descriptor;
+
+ for (i = 0; i < get_unaligned_le16(&ofap->num_memory_descriptors);
+ i++) {
+ dma_free_coherent(&ctrl_info->pci_dev->dev,
+ get_unaligned_le32(&mem_descriptor[i].length),
+ ctrl_info->pqi_ofa_chunk_virt_addr[i],
+ get_unaligned_le64(&mem_descriptor[i].address));
+ }
+ kfree(ctrl_info->pqi_ofa_chunk_virt_addr);
+
+out:
+ dma_free_coherent(&ctrl_info->pci_dev->dev,
+ PQI_OFA_MEMORY_DESCRIPTOR_LENGTH, ofap,
+ ctrl_info->pqi_ofa_mem_dma_handle);
+ ctrl_info->pqi_ofa_mem_virt_addr = NULL;
+}
+
+static int pqi_ofa_host_memory_update(struct pqi_ctrl_info *ctrl_info)
+{
+ struct pqi_vendor_general_request request;
+ size_t size;
+ struct pqi_ofa_memory *ofap;
+
+ memset(&request, 0, sizeof(request));
+
+ ofap = ctrl_info->pqi_ofa_mem_virt_addr;
+
+ request.header.iu_type = PQI_REQUEST_IU_VENDOR_GENERAL;
+ put_unaligned_le16(sizeof(request) - PQI_REQUEST_HEADER_LENGTH,
+ &request.header.iu_length);
+ put_unaligned_le16(PQI_VENDOR_GENERAL_HOST_MEMORY_UPDATE,
+ &request.function_code);
+
+ if (ofap) {
+ size = offsetof(struct pqi_ofa_memory, sg_descriptor) +
+ get_unaligned_le16(&ofap->num_memory_descriptors) *
+ sizeof(struct pqi_sg_descriptor);
+
+ put_unaligned_le64((u64)ctrl_info->pqi_ofa_mem_dma_handle,
+ &request.data.ofa_memory_allocation.buffer_address);
+ put_unaligned_le32(size,
+ &request.data.ofa_memory_allocation.buffer_length);
+
+ }
+
+ return pqi_submit_raid_request_synchronous(ctrl_info, &request.header,
+ 0, NULL, NO_TIMEOUT);
+}
+
+#define PQI_POST_RESET_DELAY_B4_MSGU_READY 5000
+
+static int pqi_ofa_ctrl_restart(struct pqi_ctrl_info *ctrl_info)
+{
+ msleep(PQI_POST_RESET_DELAY_B4_MSGU_READY);
+ return pqi_ctrl_init_resume(ctrl_info);
+}
+
static void pqi_perform_lockup_action(void)
{
switch (pqi_lockup_action) {
@@ -6599,7 +7736,7 @@ static int pqi_pci_probe(struct pci_dev *pci_dev,
const struct pci_device_id *id)
{
int rc;
- int node;
+ int node, cp_node;
struct pqi_ctrl_info *ctrl_info;
pqi_print_ctrl_info(pci_dev, id);
@@ -6617,8 +7754,12 @@ static int pqi_pci_probe(struct pci_dev *pci_dev,
"controller device ID matched using wildcards\n");
node = dev_to_node(&pci_dev->dev);
- if (node == NUMA_NO_NODE)
- set_dev_node(&pci_dev->dev, 0);
+ if (node == NUMA_NO_NODE) {
+ cp_node = cpu_to_node(0);
+ if (cp_node == NUMA_NO_NODE)
+ cp_node = 0;
+ set_dev_node(&pci_dev->dev, cp_node);
+ }
ctrl_info = pqi_alloc_ctrl_info(node);
if (!ctrl_info) {
@@ -6653,6 +7794,8 @@ static void pqi_pci_remove(struct pci_dev *pci_dev)
if (!ctrl_info)
return;
+ ctrl_info->in_shutdown = true;
+
pqi_remove_ctrl(ctrl_info);
}
@@ -6670,6 +7813,7 @@ static void pqi_shutdown(struct pci_dev *pci_dev)
* storage.
*/
rc = pqi_flush_cache(ctrl_info, SHUTDOWN);
+ pqi_free_interrupts(ctrl_info);
pqi_reset(ctrl_info);
if (rc == 0)
return;
@@ -6714,11 +7858,12 @@ static __maybe_unused int pqi_suspend(struct pci_dev *pci_dev, pm_message_t stat
pqi_cancel_rescan_worker(ctrl_info);
pqi_wait_until_scan_finished(ctrl_info);
pqi_wait_until_lun_reset_finished(ctrl_info);
+ pqi_wait_until_ofa_finished(ctrl_info);
pqi_flush_cache(ctrl_info, SUSPEND);
pqi_ctrl_block_requests(ctrl_info);
pqi_ctrl_wait_until_quiesced(ctrl_info);
pqi_wait_until_inbound_queues_empty(ctrl_info);
- pqi_ctrl_wait_for_pending_io(ctrl_info);
+ pqi_ctrl_wait_for_pending_io(ctrl_info, NO_TIMEOUT);
pqi_stop_heartbeat_timer(ctrl_info);
if (state.event == PM_EVENT_FREEZE)
@@ -6804,6 +7949,14 @@ static const struct pci_device_id pqi_pci_id_table[] = {
},
{
PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
+ 0x193d, 0xc460)
+ },
+ {
+ PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
+ 0x193d, 0xc461)
+ },
+ {
+ PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
0x193d, 0xf460)
},
{
@@ -6840,6 +7993,30 @@ static const struct pci_device_id pqi_pci_id_table[] = {
},
{
PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
+ 0x19e5, 0xd227)
+ },
+ {
+ PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
+ 0x19e5, 0xd228)
+ },
+ {
+ PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
+ 0x19e5, 0xd229)
+ },
+ {
+ PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
+ 0x19e5, 0xd22a)
+ },
+ {
+ PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
+ 0x19e5, 0xd22b)
+ },
+ {
+ PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
+ 0x19e5, 0xd22c)
+ },
+ {
+ PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
PCI_VENDOR_ID_ADAPTEC2, 0x0110)
},
{
diff --git a/drivers/scsi/smartpqi/smartpqi_sas_transport.c b/drivers/scsi/smartpqi/smartpqi_sas_transport.c
index b209a35e482e..0e4ef215115f 100644
--- a/drivers/scsi/smartpqi/smartpqi_sas_transport.c
+++ b/drivers/scsi/smartpqi/smartpqi_sas_transport.c
@@ -17,9 +17,11 @@
*/
#include <linux/kernel.h>
+#include <linux/bsg-lib.h>
#include <scsi/scsi_host.h>
#include <scsi/scsi_cmnd.h>
#include <scsi/scsi_transport_sas.h>
+#include <asm/unaligned.h>
#include "smartpqi.h"
static struct pqi_sas_phy *pqi_alloc_sas_phy(struct pqi_sas_port *pqi_sas_port)
@@ -97,14 +99,32 @@ static int pqi_sas_port_add_rphy(struct pqi_sas_port *pqi_sas_port,
identify = &rphy->identify;
identify->sas_address = pqi_sas_port->sas_address;
- identify->initiator_port_protocols = SAS_PROTOCOL_STP;
- identify->target_port_protocols = SAS_PROTOCOL_STP;
+
+ if (pqi_sas_port->device &&
+ pqi_sas_port->device->is_expander_smp_device) {
+ identify->initiator_port_protocols = SAS_PROTOCOL_SMP;
+ identify->target_port_protocols = SAS_PROTOCOL_SMP;
+ } else {
+ identify->initiator_port_protocols = SAS_PROTOCOL_STP;
+ identify->target_port_protocols = SAS_PROTOCOL_STP;
+ }
return sas_rphy_add(rphy);
}
+static struct sas_rphy *pqi_sas_rphy_alloc(struct pqi_sas_port *pqi_sas_port)
+{
+ if (pqi_sas_port->device &&
+ pqi_sas_port->device->is_expander_smp_device)
+ return sas_expander_alloc(pqi_sas_port->port,
+ SAS_FANOUT_EXPANDER_DEVICE);
+
+ return sas_end_device_alloc(pqi_sas_port->port);
+}
+
static struct pqi_sas_port *pqi_alloc_sas_port(
- struct pqi_sas_node *pqi_sas_node, u64 sas_address)
+ struct pqi_sas_node *pqi_sas_node, u64 sas_address,
+ struct pqi_scsi_dev *device)
{
int rc;
struct pqi_sas_port *pqi_sas_port;
@@ -127,6 +147,7 @@ static struct pqi_sas_port *pqi_alloc_sas_port(
pqi_sas_port->port = port;
pqi_sas_port->sas_address = sas_address;
+ pqi_sas_port->device = device;
list_add_tail(&pqi_sas_port->port_list_entry,
&pqi_sas_node->port_list_head);
@@ -146,7 +167,7 @@ static void pqi_free_sas_port(struct pqi_sas_port *pqi_sas_port)
struct pqi_sas_phy *next;
list_for_each_entry_safe(pqi_sas_phy, next,
- &pqi_sas_port->phy_list_head, phy_list_entry)
+ &pqi_sas_port->phy_list_head, phy_list_entry)
pqi_free_sas_phy(pqi_sas_phy);
sas_port_delete(pqi_sas_port->port);
@@ -176,7 +197,7 @@ static void pqi_free_sas_node(struct pqi_sas_node *pqi_sas_node)
return;
list_for_each_entry_safe(pqi_sas_port, next,
- &pqi_sas_node->port_list_head, port_list_entry)
+ &pqi_sas_node->port_list_head, port_list_entry)
pqi_free_sas_port(pqi_sas_port);
kfree(pqi_sas_node);
@@ -206,13 +227,14 @@ int pqi_add_sas_host(struct Scsi_Host *shost, struct pqi_ctrl_info *ctrl_info)
struct pqi_sas_port *pqi_sas_port;
struct pqi_sas_phy *pqi_sas_phy;
- parent_dev = &shost->shost_gendev;
+ parent_dev = &shost->shost_dev;
pqi_sas_node = pqi_alloc_sas_node(parent_dev);
if (!pqi_sas_node)
return -ENOMEM;
- pqi_sas_port = pqi_alloc_sas_port(pqi_sas_node, ctrl_info->sas_address);
+ pqi_sas_port = pqi_alloc_sas_port(pqi_sas_node,
+ ctrl_info->sas_address, NULL);
if (!pqi_sas_port) {
rc = -ENODEV;
goto free_sas_node;
@@ -254,11 +276,12 @@ int pqi_add_sas_device(struct pqi_sas_node *pqi_sas_node,
struct pqi_sas_port *pqi_sas_port;
struct sas_rphy *rphy;
- pqi_sas_port = pqi_alloc_sas_port(pqi_sas_node, device->sas_address);
+ pqi_sas_port = pqi_alloc_sas_port(pqi_sas_node,
+ device->sas_address, device);
if (!pqi_sas_port)
return -ENOMEM;
- rphy = sas_end_device_alloc(pqi_sas_port->port);
+ rphy = pqi_sas_rphy_alloc(pqi_sas_port);
if (!rphy) {
rc = -ENODEV;
goto free_sas_port;
@@ -329,6 +352,128 @@ static int pqi_sas_phy_speed(struct sas_phy *phy,
return -EINVAL;
}
+#define CSMI_IOCTL_TIMEOUT 60
+#define SMP_CRC_FIELD_LENGTH 4
+
+static struct bmic_csmi_smp_passthru_buffer *
+pqi_build_csmi_smp_passthru_buffer(struct sas_rphy *rphy,
+ struct bsg_job *job)
+{
+ struct bmic_csmi_smp_passthru_buffer *smp_buf;
+ struct bmic_csmi_ioctl_header *ioctl_header;
+ struct bmic_csmi_smp_passthru *parameters;
+ u32 req_size;
+ u32 resp_size;
+
+ smp_buf = kzalloc(sizeof(*smp_buf), GFP_KERNEL);
+ if (!smp_buf)
+ return NULL;
+
+ req_size = job->request_payload.payload_len;
+ resp_size = job->reply_payload.payload_len;
+
+ ioctl_header = &smp_buf->ioctl_header;
+ put_unaligned_le32(sizeof(smp_buf->ioctl_header),
+ &ioctl_header->header_length);
+ put_unaligned_le32(CSMI_IOCTL_TIMEOUT, &ioctl_header->timeout);
+ put_unaligned_le32(CSMI_CC_SAS_SMP_PASSTHRU,
+ &ioctl_header->control_code);
+ put_unaligned_le32(sizeof(smp_buf->parameters), &ioctl_header->length);
+
+ parameters = &smp_buf->parameters;
+ parameters->phy_identifier = rphy->identify.phy_identifier;
+ parameters->port_identifier = 0;
+ parameters->connection_rate = 0;
+ put_unaligned_be64(rphy->identify.sas_address,
+ &parameters->destination_sas_address);
+
+ if (req_size > SMP_CRC_FIELD_LENGTH)
+ req_size -= SMP_CRC_FIELD_LENGTH;
+
+ put_unaligned_le32(req_size, &parameters->request_length);
+
+ put_unaligned_le32(resp_size, &parameters->response_length);
+
+ sg_copy_to_buffer(job->request_payload.sg_list,
+ job->reply_payload.sg_cnt, &parameters->request,
+ req_size);
+
+ return smp_buf;
+}
+
+static unsigned int pqi_build_sas_smp_handler_reply(
+ struct bmic_csmi_smp_passthru_buffer *smp_buf, struct bsg_job *job,
+ struct pqi_raid_error_info *error_info)
+{
+ sg_copy_from_buffer(job->reply_payload.sg_list,
+ job->reply_payload.sg_cnt, &smp_buf->parameters.response,
+ le32_to_cpu(smp_buf->parameters.response_length));
+
+ job->reply_len = le16_to_cpu(error_info->sense_data_length);
+ memcpy(job->reply, error_info->data,
+ le16_to_cpu(error_info->sense_data_length));
+
+ return job->reply_payload.payload_len -
+ get_unaligned_le32(&error_info->data_in_transferred);
+}
+
+void pqi_sas_smp_handler(struct bsg_job *job, struct Scsi_Host *shost,
+ struct sas_rphy *rphy)
+{
+ int rc;
+ struct pqi_ctrl_info *ctrl_info = shost_to_hba(shost);
+ struct bmic_csmi_smp_passthru_buffer *smp_buf;
+ struct pqi_raid_error_info error_info;
+ unsigned int reslen = 0;
+
+ pqi_ctrl_busy(ctrl_info);
+
+ if (job->reply_payload.payload_len == 0) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ if (!rphy) {
+ rc = -EINVAL;
+ goto out;
+ }
+
+ if (rphy->identify.device_type != SAS_FANOUT_EXPANDER_DEVICE) {
+ rc = -EINVAL;
+ goto out;
+ }
+
+ if (job->request_payload.sg_cnt > 1 || job->reply_payload.sg_cnt > 1) {
+ rc = -EINVAL;
+ goto out;
+ }
+
+ if (pqi_ctrl_offline(ctrl_info)) {
+ rc = -ENXIO;
+ goto out;
+ }
+
+ if (pqi_ctrl_blocked(ctrl_info)) {
+ rc = -EBUSY;
+ goto out;
+ }
+
+ smp_buf = pqi_build_csmi_smp_passthru_buffer(rphy, job);
+ if (!smp_buf) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ rc = pqi_csmi_smp_passthru(ctrl_info, smp_buf, sizeof(*smp_buf),
+ &error_info);
+ if (rc)
+ goto out;
+
+ reslen = pqi_build_sas_smp_handler_reply(smp_buf, job, &error_info);
+out:
+ bsg_job_done(job, rc, reslen);
+ pqi_ctrl_unbusy(ctrl_info);
+}
struct sas_function_template pqi_sas_transport_functions = {
.get_linkerrors = pqi_sas_get_linkerrors,
.get_enclosure_identifier = pqi_sas_get_enclosure_identifier,
@@ -338,4 +483,5 @@ struct sas_function_template pqi_sas_transport_functions = {
.phy_setup = pqi_sas_phy_setup,
.phy_release = pqi_sas_phy_release,
.set_phy_speed = pqi_sas_phy_speed,
+ .smp_handler = pqi_sas_smp_handler,
};
diff --git a/drivers/scsi/smartpqi/smartpqi_sis.c b/drivers/scsi/smartpqi/smartpqi_sis.c
index ea91658c7060..dcd11c6418cc 100644
--- a/drivers/scsi/smartpqi/smartpqi_sis.c
+++ b/drivers/scsi/smartpqi/smartpqi_sis.c
@@ -34,6 +34,7 @@
#define SIS_REENABLE_SIS_MODE 0x1
#define SIS_ENABLE_MSIX 0x40
#define SIS_ENABLE_INTX 0x80
+#define SIS_SOFT_RESET 0x100
#define SIS_CMD_READY 0x200
#define SIS_TRIGGER_SHUTDOWN 0x800000
#define SIS_PQI_RESET_QUIESCE 0x1000000
@@ -59,7 +60,7 @@
#define SIS_CTRL_KERNEL_UP 0x80
#define SIS_CTRL_KERNEL_PANIC 0x100
-#define SIS_CTRL_READY_TIMEOUT_SECS 30
+#define SIS_CTRL_READY_TIMEOUT_SECS 180
#define SIS_CTRL_READY_RESUME_TIMEOUT_SECS 90
#define SIS_CTRL_READY_POLL_INTERVAL_MSECS 10
@@ -90,7 +91,7 @@ static int sis_wait_for_ctrl_ready_with_timeout(struct pqi_ctrl_info *ctrl_info,
unsigned long timeout;
u32 status;
- timeout = (timeout_secs * HZ) + jiffies;
+ timeout = (timeout_secs * PQI_HZ) + jiffies;
while (1) {
status = readl(&ctrl_info->registers->sis_firmware_status);
@@ -202,7 +203,7 @@ static int sis_send_sync_cmd(struct pqi_ctrl_info *ctrl_info,
* the top of the loop in order to give the controller time to start
* processing the command before we start polling.
*/
- timeout = (SIS_CMD_COMPLETE_TIMEOUT_SECS * HZ) + jiffies;
+ timeout = (SIS_CMD_COMPLETE_TIMEOUT_SECS * PQI_HZ) + jiffies;
while (1) {
msleep(SIS_CMD_COMPLETE_POLL_INTERVAL_MSECS);
doorbell = readl(&registers->sis_ctrl_to_host_doorbell);
@@ -348,7 +349,7 @@ static int sis_wait_for_doorbell_bit_to_clear(
u32 doorbell_register;
unsigned long timeout;
- timeout = (SIS_DOORBELL_BIT_CLEAR_TIMEOUT_SECS * HZ) + jiffies;
+ timeout = (SIS_DOORBELL_BIT_CLEAR_TIMEOUT_SECS * PQI_HZ) + jiffies;
while (1) {
doorbell_register =
@@ -420,6 +421,12 @@ u32 sis_read_driver_scratch(struct pqi_ctrl_info *ctrl_info)
return readl(&ctrl_info->registers->sis_driver_scratch);
}
+void sis_soft_reset(struct pqi_ctrl_info *ctrl_info)
+{
+ writel(SIS_SOFT_RESET,
+ &ctrl_info->registers->sis_host_to_ctrl_doorbell);
+}
+
static void __attribute__((unused)) verify_structures(void)
{
BUILD_BUG_ON(offsetof(struct sis_base_struct,
diff --git a/drivers/scsi/smartpqi/smartpqi_sis.h b/drivers/scsi/smartpqi/smartpqi_sis.h
index 2bf889dbf5ab..d018cb9c3f82 100644
--- a/drivers/scsi/smartpqi/smartpqi_sis.h
+++ b/drivers/scsi/smartpqi/smartpqi_sis.h
@@ -33,5 +33,6 @@ int sis_pqi_reset_quiesce(struct pqi_ctrl_info *ctrl_info);
int sis_reenable_sis_mode(struct pqi_ctrl_info *ctrl_info);
void sis_write_driver_scratch(struct pqi_ctrl_info *ctrl_info, u32 value);
u32 sis_read_driver_scratch(struct pqi_ctrl_info *ctrl_info);
+void sis_soft_reset(struct pqi_ctrl_info *ctrl_info);
#endif /* _SMARTPQI_SIS_H */
diff --git a/drivers/scsi/snic/snic_main.c b/drivers/scsi/snic/snic_main.c
index 5295277d6325..5e824fd6047a 100644
--- a/drivers/scsi/snic/snic_main.c
+++ b/drivers/scsi/snic/snic_main.c
@@ -127,7 +127,6 @@ static struct scsi_host_template snic_host_template = {
.this_id = -1,
.cmd_per_lun = SNIC_DFLT_QUEUE_DEPTH,
.can_queue = SNIC_MAX_IO_REQ,
- .use_clustering = ENABLE_CLUSTERING,
.sg_tablesize = SNIC_MAX_SG_DESC_CNT,
.max_sectors = 0x800,
.shost_attrs = snic_attrs,
diff --git a/drivers/scsi/snic/snic_trc.c b/drivers/scsi/snic/snic_trc.c
index fc60c933d6c0..458eaba24c78 100644
--- a/drivers/scsi/snic/snic_trc.c
+++ b/drivers/scsi/snic/snic_trc.c
@@ -126,7 +126,7 @@ snic_trc_init(void)
int tbuf_sz = 0, ret;
tbuf_sz = (snic_trace_max_pages * PAGE_SIZE);
- tbuf = vmalloc(tbuf_sz);
+ tbuf = vzalloc(tbuf_sz);
if (!tbuf) {
SNIC_ERR("Failed to Allocate Trace Buffer Size. %d\n", tbuf_sz);
SNIC_ERR("Trace Facility not enabled.\n");
@@ -135,7 +135,6 @@ snic_trc_init(void)
return ret;
}
- memset(tbuf, 0, tbuf_sz);
trc->buf = (struct snic_trc_data *) tbuf;
spin_lock_init(&trc->lock);
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index 54dd70ae9731..38ddbbfe5f3c 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -80,7 +80,7 @@ MODULE_ALIAS_SCSI_DEVICE(TYPE_WORM);
static DEFINE_MUTEX(sr_mutex);
static int sr_probe(struct device *);
static int sr_remove(struct device *);
-static int sr_init_command(struct scsi_cmnd *SCpnt);
+static blk_status_t sr_init_command(struct scsi_cmnd *SCpnt);
static int sr_done(struct scsi_cmnd *);
static int sr_runtime_suspend(struct device *dev);
@@ -384,22 +384,22 @@ static int sr_done(struct scsi_cmnd *SCpnt)
return good_bytes;
}
-static int sr_init_command(struct scsi_cmnd *SCpnt)
+static blk_status_t sr_init_command(struct scsi_cmnd *SCpnt)
{
int block = 0, this_count, s_size;
struct scsi_cd *cd;
struct request *rq = SCpnt->request;
- int ret;
+ blk_status_t ret;
ret = scsi_init_io(SCpnt);
- if (ret != BLKPREP_OK)
+ if (ret != BLK_STS_OK)
goto out;
WARN_ON_ONCE(SCpnt != rq->special);
cd = scsi_cd(rq->rq_disk);
/* from here on until we're complete, any goto out
* is used for a killable error condition */
- ret = BLKPREP_KILL;
+ ret = BLK_STS_IOERR;
SCSI_LOG_HLQUEUE(1, scmd_printk(KERN_INFO, SCpnt,
"Doing sr request, block = %d\n", block));
@@ -516,7 +516,7 @@ static int sr_init_command(struct scsi_cmnd *SCpnt)
* This indicates that the command is ready from our end to be
* queued.
*/
- ret = BLKPREP_OK;
+ ret = BLK_STS_OK;
out:
return ret;
}
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index 307df2fa39a3..7ff22d3f03e3 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -530,7 +530,7 @@ static void st_scsi_execute_end(struct request *req, blk_status_t status)
complete(SRpnt->waiting);
blk_rq_unmap_user(tmp);
- __blk_put_request(req->q, req);
+ blk_put_request(req);
}
static int st_scsi_execute(struct st_request *SRpnt, const unsigned char *cmd,
diff --git a/drivers/scsi/stex.c b/drivers/scsi/stex.c
index 9b20643ab49d..f6bef7ad65e7 100644
--- a/drivers/scsi/stex.c
+++ b/drivers/scsi/stex.c
@@ -1489,6 +1489,7 @@ static struct scsi_host_template driver_template = {
.eh_abort_handler = stex_abort,
.eh_host_reset_handler = stex_reset,
.this_id = -1,
+ .dma_boundary = PAGE_SIZE - 1,
};
static struct pci_device_id stex_pci_tbl[] = {
@@ -1617,19 +1618,6 @@ static struct st_card_info stex_card_info[] = {
},
};
-static int stex_set_dma_mask(struct pci_dev * pdev)
-{
- int ret;
-
- if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))
- && !pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)))
- return 0;
- ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
- if (!ret)
- ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
- return ret;
-}
-
static int stex_request_irq(struct st_hba *hba)
{
struct pci_dev *pdev = hba->pdev;
@@ -1710,7 +1698,9 @@ static int stex_probe(struct pci_dev *pdev, const struct pci_device_id *id)
goto out_release_regions;
}
- err = stex_set_dma_mask(pdev);
+ err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+ if (err)
+ err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
if (err) {
printk(KERN_ERR DRV_NAME "(%s): set dma mask failed\n",
pci_name(pdev));
diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index 8f88348ebe42..84380bae20f1 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -1698,7 +1698,6 @@ static struct scsi_host_template scsi_driver = {
.slave_configure = storvsc_device_configure,
.cmd_per_lun = 2048,
.this_id = -1,
- .use_clustering = ENABLE_CLUSTERING,
/* Make sure we dont get a sg segment crosses a page boundary */
.dma_boundary = PAGE_SIZE-1,
.no_write_same = 1,
diff --git a/drivers/scsi/sun3_scsi.c b/drivers/scsi/sun3_scsi.c
index 9492638296c8..95a7ea7eefa0 100644
--- a/drivers/scsi/sun3_scsi.c
+++ b/drivers/scsi/sun3_scsi.c
@@ -500,7 +500,7 @@ static struct scsi_host_template sun3_scsi_template = {
.this_id = 7,
.sg_tablesize = SG_NONE,
.cmd_per_lun = 2,
- .use_clustering = DISABLE_CLUSTERING,
+ .dma_boundary = PAGE_SIZE - 1,
.cmd_size = NCR5380_CMD_SIZE,
};
diff --git a/drivers/scsi/sun_esp.c b/drivers/scsi/sun_esp.c
index a11efbcb7f8b..c71bd01fef94 100644
--- a/drivers/scsi/sun_esp.c
+++ b/drivers/scsi/sun_esp.c
@@ -529,11 +529,10 @@ static int esp_sbus_probe(struct platform_device *op)
int hme = 0;
int ret;
- if (dp->parent &&
- (!strcmp(dp->parent->name, "espdma") ||
- !strcmp(dp->parent->name, "dma")))
+ if (of_node_name_eq(dp->parent, "espdma") ||
+ of_node_name_eq(dp->parent, "dma"))
dma_node = dp->parent;
- else if (!strcmp(dp->name, "SUNW,fas")) {
+ else if (of_node_name_eq(dp, "SUNW,fas")) {
dma_node = op->dev.of_node;
hme = 1;
}
diff --git a/drivers/scsi/sym53c8xx_2/sym_glue.c b/drivers/scsi/sym53c8xx_2/sym_glue.c
index 5f10aa9bad9b..57f6d63e4c40 100644
--- a/drivers/scsi/sym53c8xx_2/sym_glue.c
+++ b/drivers/scsi/sym53c8xx_2/sym_glue.c
@@ -1312,9 +1312,9 @@ static struct Scsi_Host *sym_attach(struct scsi_host_template *tpnt, int unit,
sprintf(np->s.inst_name, "sym%d", np->s.unit);
if ((SYM_CONF_DMA_ADDRESSING_MODE > 0) && (np->features & FE_DAC) &&
- !pci_set_dma_mask(pdev, DMA_DAC_MASK)) {
+ !dma_set_mask(&pdev->dev, DMA_DAC_MASK)) {
set_dac(np);
- } else if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32))) {
+ } else if (dma_set_mask(&pdev->dev, DMA_BIT_MASK(32))) {
printf_warning("%s: No suitable DMA available\n", sym_name(np));
goto attach_failed;
}
@@ -1660,7 +1660,6 @@ static struct scsi_host_template sym2_template = {
.eh_bus_reset_handler = sym53c8xx_eh_bus_reset_handler,
.eh_host_reset_handler = sym53c8xx_eh_host_reset_handler,
.this_id = 7,
- .use_clustering = ENABLE_CLUSTERING,
.max_sectors = 0xFFFF,
#ifdef SYM_LINUX_PROC_INFO_SUPPORT
.show_info = sym_show_info,
diff --git a/drivers/scsi/ufs/Kconfig b/drivers/scsi/ufs/Kconfig
index 2ddd426323e9..2ddbb26d9c26 100644
--- a/drivers/scsi/ufs/Kconfig
+++ b/drivers/scsi/ufs/Kconfig
@@ -80,6 +80,14 @@ config SCSI_UFSHCD_PLATFORM
If unsure, say N.
+config SCSI_UFS_CDNS_PLATFORM
+ tristate "Cadence UFS Controller platform driver"
+ depends on SCSI_UFSHCD_PLATFORM
+ help
+ This selects the Cadence-specific additions to UFSHCD platform driver.
+
+ If unsure, say N.
+
config SCSI_UFS_DWC_TC_PLATFORM
tristate "DesignWare platform support using a G210 Test Chip"
depends on SCSI_UFSHCD_PLATFORM
diff --git a/drivers/scsi/ufs/Makefile b/drivers/scsi/ufs/Makefile
index aca481329828..a3bd70c3652c 100644
--- a/drivers/scsi/ufs/Makefile
+++ b/drivers/scsi/ufs/Makefile
@@ -2,6 +2,7 @@
# UFSHCD makefile
obj-$(CONFIG_SCSI_UFS_DWC_TC_PCI) += tc-dwc-g210-pci.o ufshcd-dwc.o tc-dwc-g210.o
obj-$(CONFIG_SCSI_UFS_DWC_TC_PLATFORM) += tc-dwc-g210-pltfrm.o ufshcd-dwc.o tc-dwc-g210.o
+obj-$(CONFIG_SCSI_UFS_CDNS_PLATFORM) += cdns-pltfrm.o
obj-$(CONFIG_SCSI_UFS_QCOM) += ufs-qcom.o
obj-$(CONFIG_SCSI_UFSHCD) += ufshcd-core.o
ufshcd-core-y += ufshcd.o ufs-sysfs.o
diff --git a/drivers/scsi/ufs/cdns-pltfrm.c b/drivers/scsi/ufs/cdns-pltfrm.c
new file mode 100644
index 000000000000..4a37b4f57164
--- /dev/null
+++ b/drivers/scsi/ufs/cdns-pltfrm.c
@@ -0,0 +1,148 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Platform UFS Host driver for Cadence controller
+ *
+ * Copyright (C) 2018 Cadence Design Systems, Inc.
+ *
+ * Authors:
+ * Jan Kotas <jank@cadence.com>
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/of.h>
+#include <linux/time.h>
+
+#include "ufshcd-pltfrm.h"
+
+#define CDNS_UFS_REG_HCLKDIV 0xFC
+
+/**
+ * Sets HCLKDIV register value based on the core_clk
+ * @hba: host controller instance
+ *
+ * Return zero for success and non-zero for failure
+ */
+static int cdns_ufs_set_hclkdiv(struct ufs_hba *hba)
+{
+ struct ufs_clk_info *clki;
+ struct list_head *head = &hba->clk_list_head;
+ unsigned long core_clk_rate = 0;
+ u32 core_clk_div = 0;
+
+ if (list_empty(head))
+ return 0;
+
+ list_for_each_entry(clki, head, list) {
+ if (IS_ERR_OR_NULL(clki->clk))
+ continue;
+ if (!strcmp(clki->name, "core_clk"))
+ core_clk_rate = clk_get_rate(clki->clk);
+ }
+
+ if (!core_clk_rate) {
+ dev_err(hba->dev, "%s: unable to find core_clk rate\n",
+ __func__);
+ return -EINVAL;
+ }
+
+ core_clk_div = core_clk_rate / USEC_PER_SEC;
+
+ ufshcd_writel(hba, core_clk_div, CDNS_UFS_REG_HCLKDIV);
+ /**
+ * Make sure the register was updated,
+ * UniPro layer will not work with an incorrect value.
+ */
+ mb();
+
+ return 0;
+}
+
+/**
+ * Sets clocks used by the controller
+ * @hba: host controller instance
+ * @on: if true, enable clocks, otherwise disable
+ * @status: notify stage (pre, post change)
+ *
+ * Return zero for success and non-zero for failure
+ */
+static int cdns_ufs_setup_clocks(struct ufs_hba *hba, bool on,
+ enum ufs_notify_change_status status)
+{
+ if ((!on) || (status == PRE_CHANGE))
+ return 0;
+
+ return cdns_ufs_set_hclkdiv(hba);
+}
+
+static struct ufs_hba_variant_ops cdns_pltfm_hba_vops = {
+ .name = "cdns-ufs-pltfm",
+ .setup_clocks = cdns_ufs_setup_clocks,
+};
+
+/**
+ * cdns_ufs_pltfrm_probe - probe routine of the driver
+ * @pdev: pointer to platform device handle
+ *
+ * Return zero for success and non-zero for failure
+ */
+static int cdns_ufs_pltfrm_probe(struct platform_device *pdev)
+{
+ int err;
+ struct device *dev = &pdev->dev;
+
+ /* Perform generic probe */
+ err = ufshcd_pltfrm_init(pdev, &cdns_pltfm_hba_vops);
+ if (err)
+ dev_err(dev, "ufshcd_pltfrm_init() failed %d\n", err);
+
+ return err;
+}
+
+/**
+ * cdns_ufs_pltfrm_remove - removes the ufs driver
+ * @pdev: pointer to platform device handle
+ *
+ * Always returns 0
+ */
+static int cdns_ufs_pltfrm_remove(struct platform_device *pdev)
+{
+ struct ufs_hba *hba = platform_get_drvdata(pdev);
+
+ ufshcd_remove(hba);
+ return 0;
+}
+
+static const struct of_device_id cdns_ufs_of_match[] = {
+ { .compatible = "cdns,ufshc" },
+ {},
+};
+
+MODULE_DEVICE_TABLE(of, cdns_ufs_of_match);
+
+static const struct dev_pm_ops cdns_ufs_dev_pm_ops = {
+ .suspend = ufshcd_pltfrm_suspend,
+ .resume = ufshcd_pltfrm_resume,
+ .runtime_suspend = ufshcd_pltfrm_runtime_suspend,
+ .runtime_resume = ufshcd_pltfrm_runtime_resume,
+ .runtime_idle = ufshcd_pltfrm_runtime_idle,
+};
+
+static struct platform_driver cdns_ufs_pltfrm_driver = {
+ .probe = cdns_ufs_pltfrm_probe,
+ .remove = cdns_ufs_pltfrm_remove,
+ .driver = {
+ .name = "cdns-ufshcd",
+ .pm = &cdns_ufs_dev_pm_ops,
+ .of_match_table = cdns_ufs_of_match,
+ },
+};
+
+module_platform_driver(cdns_ufs_pltfrm_driver);
+
+MODULE_AUTHOR("Jan Kotas <jank@cadence.com>");
+MODULE_DESCRIPTION("Cadence UFS host controller platform driver");
+MODULE_LICENSE("GPL v2");
+MODULE_VERSION(UFSHCD_DRIVER_VERSION);
diff --git a/drivers/scsi/ufs/ufs.h b/drivers/scsi/ufs/ufs.h
index 58087d3916d0..dd65fea07687 100644
--- a/drivers/scsi/ufs/ufs.h
+++ b/drivers/scsi/ufs/ufs.h
@@ -46,7 +46,7 @@
#define QUERY_DESC_HDR_SIZE 2
#define QUERY_OSF_SIZE (GENERAL_UPIU_REQUEST_SIZE - \
(sizeof(struct utp_upiu_header)))
-#define RESPONSE_UPIU_SENSE_DATA_LENGTH 18
+#define UFS_SENSE_SIZE 18
#define UPIU_HEADER_DWORD(byte3, byte2, byte1, byte0)\
cpu_to_be32((byte3 << 24) | (byte2 << 16) |\
@@ -378,6 +378,20 @@ enum query_opcode {
UPIU_QUERY_OPCODE_TOGGLE_FLAG = 0x8,
};
+/* bRefClkFreq attribute values */
+enum ufs_ref_clk_freq {
+ REF_CLK_FREQ_19_2_MHZ = 0,
+ REF_CLK_FREQ_26_MHZ = 1,
+ REF_CLK_FREQ_38_4_MHZ = 2,
+ REF_CLK_FREQ_52_MHZ = 3,
+ REF_CLK_FREQ_INVAL = -1,
+};
+
+struct ufs_ref_clk {
+ unsigned long freq_hz;
+ enum ufs_ref_clk_freq val;
+};
+
/* Query response result code */
enum {
QUERY_RESULT_SUCCESS = 0x00,
@@ -444,7 +458,7 @@ struct utp_cmd_rsp {
__be32 residual_transfer_count;
__be32 reserved[4];
__be16 sense_data_len;
- u8 sense_data[RESPONSE_UPIU_SENSE_DATA_LENGTH];
+ u8 sense_data[UFS_SENSE_SIZE];
};
/**
diff --git a/drivers/scsi/ufs/ufs_bsg.c b/drivers/scsi/ufs/ufs_bsg.c
index e5f8e54bf644..775bb4e5e36e 100644
--- a/drivers/scsi/ufs/ufs_bsg.c
+++ b/drivers/scsi/ufs/ufs_bsg.c
@@ -157,7 +157,7 @@ void ufs_bsg_remove(struct ufs_hba *hba)
if (!hba->bsg_queue)
return;
- bsg_unregister_queue(hba->bsg_queue);
+ bsg_remove_queue(hba->bsg_queue);
device_del(bsg_dev);
put_device(bsg_dev);
@@ -193,7 +193,7 @@ int ufs_bsg_probe(struct ufs_hba *hba)
if (ret)
goto out;
- q = bsg_setup_queue(bsg_dev, dev_name(bsg_dev), ufs_bsg_request, 0);
+ q = bsg_setup_queue(bsg_dev, dev_name(bsg_dev), ufs_bsg_request, NULL, 0);
if (IS_ERR(q)) {
ret = PTR_ERR(q);
goto out;
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index f1c57cd33b5b..9ba7671b84f8 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -51,8 +51,6 @@
#define CREATE_TRACE_POINTS
#include <trace/events/ufs.h>
-#define UFSHCD_REQ_SENSE_SIZE 18
-
#define UFSHCD_ENABLE_INTRS (UTP_TRANSFER_REQ_COMPL |\
UTP_TASK_REQ_COMPL |\
UFSHCD_ERROR_MASK)
@@ -1551,6 +1549,7 @@ start:
* currently running. Hence, fall through to cancel gating
* work and to enable clocks.
*/
+ /* fallthrough */
case CLKS_OFF:
ufshcd_scsi_block_requests(hba);
hba->clk_gating.state = REQ_CLKS_ON;
@@ -1562,6 +1561,7 @@ start:
* fall through to check if we should wait for this
* work to be done or not.
*/
+ /* fallthrough */
case REQ_CLKS_ON:
if (async) {
rc = -EAGAIN;
@@ -1890,11 +1890,10 @@ static inline void ufshcd_copy_sense_data(struct ufshcd_lrb *lrbp)
int len_to_copy;
len = be16_to_cpu(lrbp->ucd_rsp_ptr->sr.sense_data_len);
- len_to_copy = min_t(int, RESPONSE_UPIU_SENSE_DATA_LENGTH, len);
+ len_to_copy = min_t(int, UFS_SENSE_SIZE, len);
- memcpy(lrbp->sense_buffer,
- lrbp->ucd_rsp_ptr->sr.sense_data,
- min_t(int, len_to_copy, UFSHCD_REQ_SENSE_SIZE));
+ memcpy(lrbp->sense_buffer, lrbp->ucd_rsp_ptr->sr.sense_data,
+ len_to_copy);
}
}
@@ -2456,7 +2455,7 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
WARN_ON(lrbp->cmd);
lrbp->cmd = cmd;
- lrbp->sense_bufflen = UFSHCD_REQ_SENSE_SIZE;
+ lrbp->sense_bufflen = UFS_SENSE_SIZE;
lrbp->sense_buffer = cmd->sense_buffer;
lrbp->task_tag = tag;
lrbp->lun = ufshcd_scsi_to_upiu_lun(cmd->device->lun);
@@ -4620,6 +4619,7 @@ ufshcd_scsi_cmd_status(struct ufshcd_lrb *lrbp, int scsi_status)
switch (scsi_status) {
case SAM_STAT_CHECK_CONDITION:
ufshcd_copy_sense_data(lrbp);
+ /* fallthrough */
case SAM_STAT_GOOD:
result |= DID_OK << 16 |
COMMAND_COMPLETE << 8 |
@@ -6701,6 +6701,74 @@ static void ufshcd_def_desc_sizes(struct ufs_hba *hba)
hba->desc_size.hlth_desc = QUERY_DESC_HEALTH_DEF_SIZE;
}
+static struct ufs_ref_clk ufs_ref_clk_freqs[] = {
+ {19200000, REF_CLK_FREQ_19_2_MHZ},
+ {26000000, REF_CLK_FREQ_26_MHZ},
+ {38400000, REF_CLK_FREQ_38_4_MHZ},
+ {52000000, REF_CLK_FREQ_52_MHZ},
+ {0, REF_CLK_FREQ_INVAL},
+};
+
+static enum ufs_ref_clk_freq
+ufs_get_bref_clk_from_hz(unsigned long freq)
+{
+ int i;
+
+ for (i = 0; ufs_ref_clk_freqs[i].freq_hz; i++)
+ if (ufs_ref_clk_freqs[i].freq_hz == freq)
+ return ufs_ref_clk_freqs[i].val;
+
+ return REF_CLK_FREQ_INVAL;
+}
+
+void ufshcd_parse_dev_ref_clk_freq(struct ufs_hba *hba, struct clk *refclk)
+{
+ unsigned long freq;
+
+ freq = clk_get_rate(refclk);
+
+ hba->dev_ref_clk_freq =
+ ufs_get_bref_clk_from_hz(freq);
+
+ if (hba->dev_ref_clk_freq == REF_CLK_FREQ_INVAL)
+ dev_err(hba->dev,
+ "invalid ref_clk setting = %ld\n", freq);
+}
+
+static int ufshcd_set_dev_ref_clk(struct ufs_hba *hba)
+{
+ int err;
+ u32 ref_clk;
+ u32 freq = hba->dev_ref_clk_freq;
+
+ err = ufshcd_query_attr_retry(hba, UPIU_QUERY_OPCODE_READ_ATTR,
+ QUERY_ATTR_IDN_REF_CLK_FREQ, 0, 0, &ref_clk);
+
+ if (err) {
+ dev_err(hba->dev, "failed reading bRefClkFreq. err = %d\n",
+ err);
+ goto out;
+ }
+
+ if (ref_clk == freq)
+ goto out; /* nothing to update */
+
+ err = ufshcd_query_attr_retry(hba, UPIU_QUERY_OPCODE_WRITE_ATTR,
+ QUERY_ATTR_IDN_REF_CLK_FREQ, 0, 0, &freq);
+
+ if (err) {
+ dev_err(hba->dev, "bRefClkFreq setting to %lu Hz failed\n",
+ ufs_ref_clk_freqs[freq].freq_hz);
+ goto out;
+ }
+
+ dev_dbg(hba->dev, "bRefClkFreq setting to %lu Hz succeeded\n",
+ ufs_ref_clk_freqs[freq].freq_hz);
+
+out:
+ return err;
+}
+
/**
* ufshcd_probe_hba - probe hba to detect device and initialize
* @hba: per-adapter instance
@@ -6766,6 +6834,12 @@ static int ufshcd_probe_hba(struct ufs_hba *hba)
"%s: Failed getting max supported power mode\n",
__func__);
} else {
+ /*
+ * Set the right value to bRefClkFreq before attempting to
+ * switch to HS gears.
+ */
+ if (hba->dev_ref_clk_freq != REF_CLK_FREQ_INVAL)
+ ufshcd_set_dev_ref_clk(hba);
ret = ufshcd_config_pwr_mode(hba, &hba->max_pwr_info.info);
if (ret) {
dev_err(hba->dev, "%s: Failed setting power mode, err = %d\n",
@@ -6910,6 +6984,7 @@ static struct scsi_host_template ufshcd_driver_template = {
.max_host_blocked = 1,
.track_queue_depth = 1,
.sdev_groups = ufshcd_driver_groups,
+ .dma_boundary = PAGE_SIZE - 1,
};
static int ufshcd_config_vreg_load(struct device *dev, struct ufs_vreg *vreg,
@@ -7252,6 +7327,14 @@ static int ufshcd_init_clocks(struct ufs_hba *hba)
goto out;
}
+ /*
+ * Parse device ref clk freq as per device tree "ref_clk".
+ * Default dev_ref_clk_freq is set to REF_CLK_FREQ_INVAL
+ * in ufshcd_alloc_host().
+ */
+ if (!strcmp(clki->name, "ref_clk"))
+ ufshcd_parse_dev_ref_clk_freq(hba, clki->clk);
+
if (clki->max_freq) {
ret = clk_set_rate(clki->clk, clki->max_freq);
if (ret) {
@@ -7379,19 +7462,19 @@ ufshcd_send_request_sense(struct ufs_hba *hba, struct scsi_device *sdp)
0,
0,
0,
- UFSHCD_REQ_SENSE_SIZE,
+ UFS_SENSE_SIZE,
0};
char *buffer;
int ret;
- buffer = kzalloc(UFSHCD_REQ_SENSE_SIZE, GFP_KERNEL);
+ buffer = kzalloc(UFS_SENSE_SIZE, GFP_KERNEL);
if (!buffer) {
ret = -ENOMEM;
goto out;
}
ret = scsi_execute(sdp, cmd, DMA_FROM_DEVICE, buffer,
- UFSHCD_REQ_SENSE_SIZE, NULL, NULL,
+ UFS_SENSE_SIZE, NULL, NULL,
msecs_to_jiffies(1000), 3, 0, RQF_PM, NULL);
if (ret)
pr_err("%s: failed with err %d\n", __func__, ret);
@@ -8105,6 +8188,7 @@ int ufshcd_alloc_host(struct device *dev, struct ufs_hba **hba_handle)
hba->host = host;
hba->dev = dev;
*hba_handle = hba;
+ hba->dev_ref_clk_freq = REF_CLK_FREQ_INVAL;
INIT_LIST_HEAD(&hba->clk_list_head);
diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
index 1a1c2b487a4e..69ba7445d2b3 100644
--- a/drivers/scsi/ufs/ufshcd.h
+++ b/drivers/scsi/ufs/ufshcd.h
@@ -550,6 +550,7 @@ struct ufs_hba {
void *priv;
unsigned int irq;
bool is_irq_enabled;
+ enum ufs_ref_clk_freq dev_ref_clk_freq;
/* Interrupt aggregation support is broken */
#define UFSHCD_QUIRK_BROKEN_INTR_AGGR 0x1
@@ -768,6 +769,7 @@ void ufshcd_remove(struct ufs_hba *);
int ufshcd_wait_for_register(struct ufs_hba *hba, u32 reg, u32 mask,
u32 val, unsigned long interval_us,
unsigned long timeout_ms, bool can_sleep);
+void ufshcd_parse_dev_ref_clk_freq(struct ufs_hba *hba, struct clk *refclk);
static inline void check_upiu_size(void)
{
diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
index 1c72db94270e..772b976e4ee4 100644
--- a/drivers/scsi/virtio_scsi.c
+++ b/drivers/scsi/virtio_scsi.c
@@ -68,33 +68,6 @@ struct virtio_scsi_vq {
struct virtqueue *vq;
};
-/*
- * Per-target queue state.
- *
- * This struct holds the data needed by the queue steering policy. When a
- * target is sent multiple requests, we need to drive them to the same queue so
- * that FIFO processing order is kept. However, if a target was idle, we can
- * choose a queue arbitrarily. In this case the queue is chosen according to
- * the current VCPU, so the driver expects the number of request queues to be
- * equal to the number of VCPUs. This makes it easy and fast to select the
- * queue, and also lets the driver optimize the IRQ affinity for the virtqueues
- * (each virtqueue's affinity is set to the CPU that "owns" the queue).
- *
- * tgt_seq is held to serialize reading and writing req_vq.
- *
- * Decrements of reqs are never concurrent with writes of req_vq: before the
- * decrement reqs will be != 0; after the decrement the virtqueue completion
- * routine will not use the req_vq so it can be changed by a new request.
- * Thus they can happen outside the tgt_seq, provided of course we make reqs
- * an atomic_t.
- */
-struct virtio_scsi_target_state {
- seqcount_t tgt_seq;
-
- /* Currently active virtqueue for requests sent to this target. */
- struct virtio_scsi_vq *req_vq;
-};
-
/* Driver instance state */
struct virtio_scsi {
struct virtio_device *vdev;
@@ -693,34 +666,12 @@ static int virtscsi_abort(struct scsi_cmnd *sc)
return virtscsi_tmf(vscsi, cmd);
}
-static int virtscsi_target_alloc(struct scsi_target *starget)
-{
- struct Scsi_Host *sh = dev_to_shost(starget->dev.parent);
- struct virtio_scsi *vscsi = shost_priv(sh);
-
- struct virtio_scsi_target_state *tgt =
- kmalloc(sizeof(*tgt), GFP_KERNEL);
- if (!tgt)
- return -ENOMEM;
-
- seqcount_init(&tgt->tgt_seq);
- tgt->req_vq = &vscsi->req_vqs[0];
-
- starget->hostdata = tgt;
- return 0;
-}
-
-static void virtscsi_target_destroy(struct scsi_target *starget)
-{
- struct virtio_scsi_target_state *tgt = starget->hostdata;
- kfree(tgt);
-}
-
static int virtscsi_map_queues(struct Scsi_Host *shost)
{
struct virtio_scsi *vscsi = shost_priv(shost);
+ struct blk_mq_queue_map *qmap = &shost->tag_set.map[0];
- return blk_mq_virtio_map_queues(&shost->tag_set, vscsi->vdev, 2);
+ return blk_mq_virtio_map_queues(qmap, vscsi->vdev, 2);
}
/*
@@ -747,9 +698,6 @@ static struct scsi_host_template virtscsi_host_template = {
.slave_alloc = virtscsi_device_alloc,
.dma_boundary = UINT_MAX,
- .use_clustering = ENABLE_CLUSTERING,
- .target_alloc = virtscsi_target_alloc,
- .target_destroy = virtscsi_target_destroy,
.map_queues = virtscsi_map_queues,
.track_queue_depth = 1,
.force_blk_mq = 1,
diff --git a/drivers/scsi/vmw_pvscsi.c b/drivers/scsi/vmw_pvscsi.c
index 0d6b2a88fc8e..ecee4b3ff073 100644
--- a/drivers/scsi/vmw_pvscsi.c
+++ b/drivers/scsi/vmw_pvscsi.c
@@ -1007,7 +1007,6 @@ static struct scsi_host_template pvscsi_template = {
.sg_tablesize = PVSCSI_MAX_NUM_SG_ENTRIES_PER_SEGMENT,
.dma_boundary = UINT_MAX,
.max_sectors = 0xffff,
- .use_clustering = ENABLE_CLUSTERING,
.change_queue_depth = pvscsi_change_queue_depth,
.eh_abort_handler = pvscsi_abort,
.eh_device_reset_handler = pvscsi_device_reset,
diff --git a/drivers/scsi/wd719x.c b/drivers/scsi/wd719x.c
index 974bfb3f30f4..e3310e9488d2 100644
--- a/drivers/scsi/wd719x.c
+++ b/drivers/scsi/wd719x.c
@@ -153,8 +153,6 @@ static int wd719x_direct_cmd(struct wd719x *wd, u8 opcode, u8 dev, u8 lun,
static void wd719x_destroy(struct wd719x *wd)
{
- struct wd719x_scb *scb;
-
/* stop the RISC */
if (wd719x_direct_cmd(wd, WD719X_CMD_SLEEP, 0, 0, 0, 0,
WD719X_WAIT_FOR_RISC))
@@ -162,37 +160,35 @@ static void wd719x_destroy(struct wd719x *wd)
/* disable RISC */
wd719x_writeb(wd, WD719X_PCI_MODE_SELECT, 0);
- /* free all SCBs */
- list_for_each_entry(scb, &wd->active_scbs, list)
- pci_free_consistent(wd->pdev, sizeof(struct wd719x_scb), scb,
- scb->phys);
- list_for_each_entry(scb, &wd->free_scbs, list)
- pci_free_consistent(wd->pdev, sizeof(struct wd719x_scb), scb,
- scb->phys);
+ WARN_ON_ONCE(!list_empty(&wd->active_scbs));
+
/* free internal buffers */
- pci_free_consistent(wd->pdev, wd->fw_size, wd->fw_virt, wd->fw_phys);
+ dma_free_coherent(&wd->pdev->dev, wd->fw_size, wd->fw_virt,
+ wd->fw_phys);
wd->fw_virt = NULL;
- pci_free_consistent(wd->pdev, WD719X_HASH_TABLE_SIZE, wd->hash_virt,
- wd->hash_phys);
+ dma_free_coherent(&wd->pdev->dev, WD719X_HASH_TABLE_SIZE, wd->hash_virt,
+ wd->hash_phys);
wd->hash_virt = NULL;
- pci_free_consistent(wd->pdev, sizeof(struct wd719x_host_param),
- wd->params, wd->params_phys);
+ dma_free_coherent(&wd->pdev->dev, sizeof(struct wd719x_host_param),
+ wd->params, wd->params_phys);
wd->params = NULL;
free_irq(wd->pdev->irq, wd);
}
-/* finish a SCSI command, mark SCB (if any) as free, unmap buffers */
-static void wd719x_finish_cmd(struct scsi_cmnd *cmd, int result)
+/* finish a SCSI command, unmap buffers */
+static void wd719x_finish_cmd(struct wd719x_scb *scb, int result)
{
+ struct scsi_cmnd *cmd = scb->cmd;
struct wd719x *wd = shost_priv(cmd->device->host);
- struct wd719x_scb *scb = (struct wd719x_scb *) cmd->host_scribble;
- if (scb) {
- list_move(&scb->list, &wd->free_scbs);
- dma_unmap_single(&wd->pdev->dev, cmd->SCp.dma_handle,
- SCSI_SENSE_BUFFERSIZE, DMA_FROM_DEVICE);
- scsi_dma_unmap(cmd);
- }
+ list_del(&scb->list);
+
+ dma_unmap_single(&wd->pdev->dev, scb->phys,
+ sizeof(struct wd719x_scb), DMA_BIDIRECTIONAL);
+ scsi_dma_unmap(cmd);
+ dma_unmap_single(&wd->pdev->dev, cmd->SCp.dma_handle,
+ SCSI_SENSE_BUFFERSIZE, DMA_FROM_DEVICE);
+
cmd->result = result << 16;
cmd->scsi_done(cmd);
}
@@ -202,36 +198,10 @@ static int wd719x_queuecommand(struct Scsi_Host *sh, struct scsi_cmnd *cmd)
{
int i, count_sg;
unsigned long flags;
- struct wd719x_scb *scb;
+ struct wd719x_scb *scb = scsi_cmd_priv(cmd);
struct wd719x *wd = shost_priv(sh);
- dma_addr_t phys;
- cmd->host_scribble = NULL;
-
- /* get a free SCB - either from existing ones or allocate a new one */
- spin_lock_irqsave(wd->sh->host_lock, flags);
- scb = list_first_entry_or_null(&wd->free_scbs, struct wd719x_scb, list);
- if (scb) {
- list_del(&scb->list);
- phys = scb->phys;
- } else {
- spin_unlock_irqrestore(wd->sh->host_lock, flags);
- scb = pci_alloc_consistent(wd->pdev, sizeof(struct wd719x_scb),
- &phys);
- spin_lock_irqsave(wd->sh->host_lock, flags);
- if (!scb) {
- dev_err(&wd->pdev->dev, "unable to allocate SCB\n");
- wd719x_finish_cmd(cmd, DID_ERROR);
- spin_unlock_irqrestore(wd->sh->host_lock, flags);
- return 0;
- }
- }
- memset(scb, 0, sizeof(struct wd719x_scb));
- list_add(&scb->list, &wd->active_scbs);
-
- scb->phys = phys;
scb->cmd = cmd;
- cmd->host_scribble = (char *) scb;
scb->CDB_tag = 0; /* Tagged queueing not supported yet */
scb->devid = cmd->device->id;
@@ -240,10 +210,19 @@ static int wd719x_queuecommand(struct Scsi_Host *sh, struct scsi_cmnd *cmd)
/* copy the command */
memcpy(scb->CDB, cmd->cmnd, cmd->cmd_len);
+ /* map SCB */
+ scb->phys = dma_map_single(&wd->pdev->dev, scb, sizeof(*scb),
+ DMA_BIDIRECTIONAL);
+
+ if (dma_mapping_error(&wd->pdev->dev, scb->phys))
+ goto out_error;
+
/* map sense buffer */
scb->sense_buf_length = SCSI_SENSE_BUFFERSIZE;
cmd->SCp.dma_handle = dma_map_single(&wd->pdev->dev, cmd->sense_buffer,
SCSI_SENSE_BUFFERSIZE, DMA_FROM_DEVICE);
+ if (dma_mapping_error(&wd->pdev->dev, cmd->SCp.dma_handle))
+ goto out_unmap_scb;
scb->sense_buf = cpu_to_le32(cmd->SCp.dma_handle);
/* request autosense */
@@ -258,11 +237,8 @@ static int wd719x_queuecommand(struct Scsi_Host *sh, struct scsi_cmnd *cmd)
/* Scather/gather */
count_sg = scsi_dma_map(cmd);
- if (count_sg < 0) {
- wd719x_finish_cmd(cmd, DID_ERROR);
- spin_unlock_irqrestore(wd->sh->host_lock, flags);
- return 0;
- }
+ if (count_sg < 0)
+ goto out_unmap_sense;
BUG_ON(count_sg > WD719X_SG);
if (count_sg) {
@@ -283,19 +259,33 @@ static int wd719x_queuecommand(struct Scsi_Host *sh, struct scsi_cmnd *cmd)
scb->data_p = 0;
}
+ spin_lock_irqsave(wd->sh->host_lock, flags);
+
/* check if the Command register is free */
if (wd719x_readb(wd, WD719X_AMR_COMMAND) != WD719X_CMD_READY) {
spin_unlock_irqrestore(wd->sh->host_lock, flags);
return SCSI_MLQUEUE_HOST_BUSY;
}
+ list_add(&scb->list, &wd->active_scbs);
+
/* write pointer to the AMR */
wd719x_writel(wd, WD719X_AMR_SCB_IN, scb->phys);
/* send SCB opcode */
wd719x_writeb(wd, WD719X_AMR_COMMAND, WD719X_CMD_PROCESS_SCB);
spin_unlock_irqrestore(wd->sh->host_lock, flags);
+ return 0;
+out_unmap_sense:
+ dma_unmap_single(&wd->pdev->dev, cmd->SCp.dma_handle,
+ SCSI_SENSE_BUFFERSIZE, DMA_FROM_DEVICE);
+out_unmap_scb:
+ dma_unmap_single(&wd->pdev->dev, scb->phys, sizeof(*scb),
+ DMA_BIDIRECTIONAL);
+out_error:
+ cmd->result = DID_ERROR << 16;
+ cmd->scsi_done(cmd);
return 0;
}
@@ -327,8 +317,8 @@ static int wd719x_chip_init(struct wd719x *wd)
wd->fw_size = ALIGN(fw_wcs->size, 4) + fw_risc->size;
if (!wd->fw_virt)
- wd->fw_virt = pci_alloc_consistent(wd->pdev, wd->fw_size,
- &wd->fw_phys);
+ wd->fw_virt = dma_alloc_coherent(&wd->pdev->dev, wd->fw_size,
+ &wd->fw_phys, GFP_KERNEL);
if (!wd->fw_virt) {
ret = -ENOMEM;
goto wd719x_init_end;
@@ -464,7 +454,7 @@ static int wd719x_abort(struct scsi_cmnd *cmd)
{
int action, result;
unsigned long flags;
- struct wd719x_scb *scb = (struct wd719x_scb *)cmd->host_scribble;
+ struct wd719x_scb *scb = scsi_cmd_priv(cmd);
struct wd719x *wd = shost_priv(cmd->device->host);
dev_info(&wd->pdev->dev, "abort command, tag: %x\n", cmd->tag);
@@ -526,10 +516,8 @@ static int wd719x_host_reset(struct scsi_cmnd *cmd)
result = FAILED;
/* flush all SCBs */
- list_for_each_entry_safe(scb, tmp, &wd->active_scbs, list) {
- struct scsi_cmnd *tmp_cmd = scb->cmd;
- wd719x_finish_cmd(tmp_cmd, result);
- }
+ list_for_each_entry_safe(scb, tmp, &wd->active_scbs, list)
+ wd719x_finish_cmd(scb, result);
spin_unlock_irqrestore(wd->sh->host_lock, flags);
return result;
@@ -555,7 +543,6 @@ static inline void wd719x_interrupt_SCB(struct wd719x *wd,
union wd719x_regs regs,
struct wd719x_scb *scb)
{
- struct scsi_cmnd *cmd;
int result;
/* now have to find result from card */
@@ -643,9 +630,8 @@ static inline void wd719x_interrupt_SCB(struct wd719x *wd,
result = DID_ERROR;
break;
}
- cmd = scb->cmd;
- wd719x_finish_cmd(cmd, result);
+ wd719x_finish_cmd(scb, result);
}
static irqreturn_t wd719x_interrupt(int irq, void *dev_id)
@@ -809,7 +795,6 @@ static int wd719x_board_found(struct Scsi_Host *sh)
int ret;
INIT_LIST_HEAD(&wd->active_scbs);
- INIT_LIST_HEAD(&wd->free_scbs);
sh->base = pci_resource_start(wd->pdev, 0);
@@ -820,17 +805,18 @@ static int wd719x_board_found(struct Scsi_Host *sh)
wd->fw_virt = NULL;
/* memory area for host (EEPROM) parameters */
- wd->params = pci_alloc_consistent(wd->pdev,
- sizeof(struct wd719x_host_param),
- &wd->params_phys);
+ wd->params = dma_alloc_coherent(&wd->pdev->dev,
+ sizeof(struct wd719x_host_param),
+ &wd->params_phys, GFP_KERNEL);
if (!wd->params) {
dev_warn(&wd->pdev->dev, "unable to allocate parameter buffer\n");
return -ENOMEM;
}
/* memory area for the RISC for hash table of outstanding requests */
- wd->hash_virt = pci_alloc_consistent(wd->pdev, WD719X_HASH_TABLE_SIZE,
- &wd->hash_phys);
+ wd->hash_virt = dma_alloc_coherent(&wd->pdev->dev,
+ WD719X_HASH_TABLE_SIZE,
+ &wd->hash_phys, GFP_KERNEL);
if (!wd->hash_virt) {
dev_warn(&wd->pdev->dev, "unable to allocate hash buffer\n");
ret = -ENOMEM;
@@ -862,10 +848,10 @@ static int wd719x_board_found(struct Scsi_Host *sh)
fail_free_irq:
free_irq(wd->pdev->irq, wd);
fail_free_hash:
- pci_free_consistent(wd->pdev, WD719X_HASH_TABLE_SIZE, wd->hash_virt,
+ dma_free_coherent(&wd->pdev->dev, WD719X_HASH_TABLE_SIZE, wd->hash_virt,
wd->hash_phys);
fail_free_params:
- pci_free_consistent(wd->pdev, sizeof(struct wd719x_host_param),
+ dma_free_coherent(&wd->pdev->dev, sizeof(struct wd719x_host_param),
wd->params, wd->params_phys);
return ret;
@@ -874,6 +860,7 @@ fail_free_params:
static struct scsi_host_template wd719x_template = {
.module = THIS_MODULE,
.name = "Western Digital 719x",
+ .cmd_size = sizeof(struct wd719x_scb),
.queuecommand = wd719x_queuecommand,
.eh_abort_handler = wd719x_abort,
.eh_device_reset_handler = wd719x_dev_reset,
@@ -884,7 +871,6 @@ static struct scsi_host_template wd719x_template = {
.can_queue = 255,
.this_id = 7,
.sg_tablesize = WD719X_SG,
- .use_clustering = ENABLE_CLUSTERING,
};
static int wd719x_pci_probe(struct pci_dev *pdev, const struct pci_device_id *d)
@@ -897,7 +883,7 @@ static int wd719x_pci_probe(struct pci_dev *pdev, const struct pci_device_id *d)
if (err)
goto fail;
- if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32))) {
+ if (dma_set_mask(&pdev->dev, DMA_BIT_MASK(32))) {
dev_warn(&pdev->dev, "Unable to set 32-bit DMA mask\n");
goto disable_device;
}
diff --git a/drivers/scsi/wd719x.h b/drivers/scsi/wd719x.h
index 0455b1633ca7..abaabd419a54 100644
--- a/drivers/scsi/wd719x.h
+++ b/drivers/scsi/wd719x.h
@@ -74,7 +74,6 @@ struct wd719x {
void *hash_virt; /* hash table CPU address */
dma_addr_t hash_phys; /* hash table bus address */
struct list_head active_scbs;
- struct list_head free_scbs;
};
/* timeout delays in microsecs */
diff --git a/drivers/scsi/xen-scsifront.c b/drivers/scsi/xen-scsifront.c
index 61389bdc7926..f0068e96a177 100644
--- a/drivers/scsi/xen-scsifront.c
+++ b/drivers/scsi/xen-scsifront.c
@@ -696,7 +696,6 @@ static struct scsi_host_template scsifront_sht = {
.this_id = -1,
.cmd_size = sizeof(struct vscsifrnt_shadow),
.sg_tablesize = VSCSIIF_SG_TABLESIZE,
- .use_clustering = DISABLE_CLUSTERING,
.proc_name = "scsifront",
};
@@ -1112,7 +1111,7 @@ static void scsifront_backend_changed(struct xenbus_device *dev,
case XenbusStateClosed:
if (dev->state == XenbusStateClosed)
break;
- /* Missed the backend's Closing state -- fallthrough */
+ /* fall through - Missed the backend's Closing state */
case XenbusStateClosing:
scsifront_disconnect(info);
break;
diff --git a/drivers/staging/rts5208/rtsx.c b/drivers/staging/rts5208/rtsx.c
index 69e6abe14abf..c57d66a7405f 100644
--- a/drivers/staging/rts5208/rtsx.c
+++ b/drivers/staging/rts5208/rtsx.c
@@ -237,12 +237,6 @@ static struct scsi_host_template rtsx_host_template = {
/* limit the total size of a transfer to 120 KB */
.max_sectors = 240,
- /* merge commands... this seems to help performance, but
- * periodically someone should test to see which setting is more
- * optimal.
- */
- .use_clustering = 1,
-
/* emulated HBA */
.emulated = 1,
diff --git a/drivers/staging/unisys/visorhba/visorhba_main.c b/drivers/staging/unisys/visorhba/visorhba_main.c
index 4fc521c51c0e..5cf93e8eb77c 100644
--- a/drivers/staging/unisys/visorhba/visorhba_main.c
+++ b/drivers/staging/unisys/visorhba/visorhba_main.c
@@ -645,7 +645,6 @@ static struct scsi_host_template visorhba_driver_template = {
.this_id = -1,
.slave_alloc = visorhba_slave_alloc,
.slave_destroy = visorhba_slave_destroy,
- .use_clustering = ENABLE_CLUSTERING,
};
/*
diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index c1d5a173553d..984941e036c8 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -1493,8 +1493,6 @@ __iscsit_check_dataout_hdr(struct iscsi_conn *conn, void *buf,
if (hdr->flags & ISCSI_FLAG_CMD_FINAL)
iscsit_stop_dataout_timer(cmd);
- transport_check_aborted_status(se_cmd,
- (hdr->flags & ISCSI_FLAG_CMD_FINAL));
return iscsit_dump_data_payload(conn, payload_length, 1);
}
} else {
@@ -1509,12 +1507,9 @@ __iscsit_check_dataout_hdr(struct iscsi_conn *conn, void *buf,
* TASK_ABORTED status.
*/
if (se_cmd->transport_state & CMD_T_ABORTED) {
- if (hdr->flags & ISCSI_FLAG_CMD_FINAL)
- if (--cmd->outstanding_r2ts < 1) {
- iscsit_stop_dataout_timer(cmd);
- transport_check_aborted_status(
- se_cmd, 1);
- }
+ if (hdr->flags & ISCSI_FLAG_CMD_FINAL &&
+ --cmd->outstanding_r2ts < 1)
+ iscsit_stop_dataout_timer(cmd);
return iscsit_dump_data_payload(conn, payload_length, 1);
}
diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c
index 95d0a22b2ad6..a5481dfeae8d 100644
--- a/drivers/target/iscsi/iscsi_target_configfs.c
+++ b/drivers/target/iscsi/iscsi_target_configfs.c
@@ -1343,11 +1343,6 @@ static struct configfs_attribute *lio_target_discovery_auth_attrs[] = {
/* Start functions for target_core_fabric_ops */
-static char *iscsi_get_fabric_name(void)
-{
- return "iSCSI";
-}
-
static int iscsi_get_cmd_state(struct se_cmd *se_cmd)
{
struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd);
@@ -1549,9 +1544,9 @@ static void lio_release_cmd(struct se_cmd *se_cmd)
const struct target_core_fabric_ops iscsi_ops = {
.module = THIS_MODULE,
- .name = "iscsi",
+ .fabric_alias = "iscsi",
+ .fabric_name = "iSCSI",
.node_acl_size = sizeof(struct iscsi_node_acl),
- .get_fabric_name = iscsi_get_fabric_name,
.tpg_get_wwn = lio_tpg_get_endpoint_wwn,
.tpg_get_tag = lio_tpg_get_tag,
.tpg_get_default_depth = lio_tpg_get_default_depth,
@@ -1596,4 +1591,6 @@ const struct target_core_fabric_ops iscsi_ops = {
.tfc_tpg_nacl_attrib_attrs = lio_target_nacl_attrib_attrs,
.tfc_tpg_nacl_auth_attrs = lio_target_nacl_auth_attrs,
.tfc_tpg_nacl_param_attrs = lio_target_nacl_param_attrs,
+
+ .write_pending_must_be_called = true,
};
diff --git a/drivers/target/iscsi/iscsi_target_erl1.c b/drivers/target/iscsi/iscsi_target_erl1.c
index a211e8154f4c..1b54a9c70851 100644
--- a/drivers/target/iscsi/iscsi_target_erl1.c
+++ b/drivers/target/iscsi/iscsi_target_erl1.c
@@ -943,20 +943,8 @@ int iscsit_execute_cmd(struct iscsi_cmd *cmd, int ooo)
return 0;
}
spin_unlock_bh(&cmd->istate_lock);
- /*
- * Determine if delayed TASK_ABORTED status for WRITEs
- * should be sent now if no unsolicited data out
- * payloads are expected, or if the delayed status
- * should be sent after unsolicited data out with
- * ISCSI_FLAG_CMD_FINAL set in iscsi_handle_data_out()
- */
- if (transport_check_aborted_status(se_cmd,
- (cmd->unsolicited_data == 0)) != 0)
+ if (cmd->se_cmd.transport_state & CMD_T_ABORTED)
return 0;
- /*
- * Otherwise send CHECK_CONDITION and sense for
- * exception
- */
return transport_send_check_condition_and_sense(se_cmd,
cmd->sense_reason, 0);
}
@@ -974,13 +962,7 @@ int iscsit_execute_cmd(struct iscsi_cmd *cmd, int ooo)
if (!(cmd->cmd_flags &
ICF_NON_IMMEDIATE_UNSOLICITED_DATA)) {
- /*
- * Send the delayed TASK_ABORTED status for
- * WRITEs if no more unsolicitied data is
- * expected.
- */
- if (transport_check_aborted_status(se_cmd, 1)
- != 0)
+ if (cmd->se_cmd.transport_state & CMD_T_ABORTED)
return 0;
iscsit_set_dataout_sequence_values(cmd);
@@ -995,11 +977,7 @@ int iscsit_execute_cmd(struct iscsi_cmd *cmd, int ooo)
if ((cmd->data_direction == DMA_TO_DEVICE) &&
!(cmd->cmd_flags & ICF_NON_IMMEDIATE_UNSOLICITED_DATA)) {
- /*
- * Send the delayed TASK_ABORTED status for WRITEs if
- * no more nsolicitied data is expected.
- */
- if (transport_check_aborted_status(se_cmd, 1) != 0)
+ if (cmd->se_cmd.transport_state & CMD_T_ABORTED)
return 0;
iscsit_set_unsoliticed_dataout(cmd);
diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c
index 36b742932c72..86987da86dd6 100644
--- a/drivers/target/iscsi/iscsi_target_util.c
+++ b/drivers/target/iscsi/iscsi_target_util.c
@@ -150,24 +150,26 @@ void iscsit_free_r2ts_from_list(struct iscsi_cmd *cmd)
static int iscsit_wait_for_tag(struct se_session *se_sess, int state, int *cpup)
{
int tag = -1;
- DEFINE_WAIT(wait);
+ DEFINE_SBQ_WAIT(wait);
struct sbq_wait_state *ws;
+ struct sbitmap_queue *sbq;
if (state == TASK_RUNNING)
return tag;
- ws = &se_sess->sess_tag_pool.ws[0];
+ sbq = &se_sess->sess_tag_pool;
+ ws = &sbq->ws[0];
for (;;) {
- prepare_to_wait_exclusive(&ws->wait, &wait, state);
+ sbitmap_prepare_to_wait(sbq, ws, &wait, state);
if (signal_pending_state(state, current))
break;
- tag = sbitmap_queue_get(&se_sess->sess_tag_pool, cpup);
+ tag = sbitmap_queue_get(sbq, cpup);
if (tag >= 0)
break;
schedule();
}
- finish_wait(&ws->wait, &wait);
+ sbitmap_finish_wait(sbq, ws, &wait);
return tag;
}
diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c
index bc8918f382e4..7bd7c0c0db6f 100644
--- a/drivers/target/loopback/tcm_loop.c
+++ b/drivers/target/loopback/tcm_loop.c
@@ -324,7 +324,7 @@ static struct scsi_host_template tcm_loop_driver_template = {
.sg_tablesize = 256,
.cmd_per_lun = 1024,
.max_sectors = 0xFFFF,
- .use_clustering = DISABLE_CLUSTERING,
+ .dma_boundary = PAGE_SIZE - 1,
.slave_alloc = tcm_loop_slave_alloc,
.module = THIS_MODULE,
.track_queue_depth = 1,
@@ -460,11 +460,6 @@ static void tcm_loop_release_core_bus(void)
pr_debug("Releasing TCM Loop Core BUS\n");
}
-static char *tcm_loop_get_fabric_name(void)
-{
- return "loopback";
-}
-
static inline struct tcm_loop_tpg *tl_tpg(struct se_portal_group *se_tpg)
{
return container_of(se_tpg, struct tcm_loop_tpg, tl_se_tpg);
@@ -1149,8 +1144,7 @@ static struct configfs_attribute *tcm_loop_wwn_attrs[] = {
static const struct target_core_fabric_ops loop_ops = {
.module = THIS_MODULE,
- .name = "loopback",
- .get_fabric_name = tcm_loop_get_fabric_name,
+ .fabric_name = "loopback",
.tpg_get_wwn = tcm_loop_get_endpoint_wwn,
.tpg_get_tag = tcm_loop_get_tag,
.tpg_check_demo_mode = tcm_loop_check_demo_mode,
diff --git a/drivers/target/sbp/sbp_target.c b/drivers/target/sbp/sbp_target.c
index 3d10189ecedc..08cee13dfb9a 100644
--- a/drivers/target/sbp/sbp_target.c
+++ b/drivers/target/sbp/sbp_target.c
@@ -1694,11 +1694,6 @@ static int sbp_check_false(struct se_portal_group *se_tpg)
return 0;
}
-static char *sbp_get_fabric_name(void)
-{
- return "sbp";
-}
-
static char *sbp_get_fabric_wwn(struct se_portal_group *se_tpg)
{
struct sbp_tpg *tpg = container_of(se_tpg, struct sbp_tpg, se_tpg);
@@ -2323,8 +2318,7 @@ static struct configfs_attribute *sbp_tpg_attrib_attrs[] = {
static const struct target_core_fabric_ops sbp_ops = {
.module = THIS_MODULE,
- .name = "sbp",
- .get_fabric_name = sbp_get_fabric_name,
+ .fabric_name = "sbp",
.tpg_get_wwn = sbp_get_fabric_wwn,
.tpg_get_tag = sbp_get_tag,
.tpg_check_demo_mode = sbp_check_true,
diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c
index 4f134b0c3e29..6b0d9beacf90 100644
--- a/drivers/target/target_core_alua.c
+++ b/drivers/target/target_core_alua.c
@@ -451,7 +451,7 @@ static inline void set_ascq(struct se_cmd *cmd, u8 alua_ascq)
pr_debug("[%s]: ALUA TG Port not available, "
"SenseKey: NOT_READY, ASC/ASCQ: "
"0x04/0x%02x\n",
- cmd->se_tfo->get_fabric_name(), alua_ascq);
+ cmd->se_tfo->fabric_name, alua_ascq);
cmd->scsi_asc = 0x04;
cmd->scsi_ascq = alua_ascq;
@@ -1229,13 +1229,13 @@ static int core_alua_update_tpg_secondary_metadata(struct se_lun *lun)
if (se_tpg->se_tpg_tfo->tpg_get_tag != NULL) {
path = kasprintf(GFP_KERNEL, "%s/alua/%s/%s+%hu/lun_%llu",
- db_root, se_tpg->se_tpg_tfo->get_fabric_name(),
+ db_root, se_tpg->se_tpg_tfo->fabric_name,
se_tpg->se_tpg_tfo->tpg_get_wwn(se_tpg),
se_tpg->se_tpg_tfo->tpg_get_tag(se_tpg),
lun->unpacked_lun);
} else {
path = kasprintf(GFP_KERNEL, "%s/alua/%s/%s/lun_%llu",
- db_root, se_tpg->se_tpg_tfo->get_fabric_name(),
+ db_root, se_tpg->se_tpg_tfo->fabric_name,
se_tpg->se_tpg_tfo->tpg_get_wwn(se_tpg),
lun->unpacked_lun);
}
diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c
index f6b1549f4142..72016d0dfca5 100644
--- a/drivers/target/target_core_configfs.c
+++ b/drivers/target/target_core_configfs.c
@@ -172,7 +172,10 @@ static struct target_fabric_configfs *target_core_get_fabric(
mutex_lock(&g_tf_lock);
list_for_each_entry(tf, &g_tf_list, tf_list) {
- if (!strcmp(tf->tf_ops->name, name)) {
+ const char *cmp_name = tf->tf_ops->fabric_alias;
+ if (!cmp_name)
+ cmp_name = tf->tf_ops->fabric_name;
+ if (!strcmp(cmp_name, name)) {
atomic_inc(&tf->tf_access_cnt);
mutex_unlock(&g_tf_lock);
return tf;
@@ -249,7 +252,7 @@ static struct config_group *target_core_register_fabric(
return ERR_PTR(-EINVAL);
}
pr_debug("Target_Core_ConfigFS: REGISTER -> Located fabric:"
- " %s\n", tf->tf_ops->name);
+ " %s\n", tf->tf_ops->fabric_name);
/*
* On a successful target_core_get_fabric() look, the returned
* struct target_fabric_configfs *tf will contain a usage reference.
@@ -282,7 +285,7 @@ static void target_core_deregister_fabric(
" tf list\n", config_item_name(item));
pr_debug("Target_Core_ConfigFS: DEREGISTER -> located fabric:"
- " %s\n", tf->tf_ops->name);
+ " %s\n", tf->tf_ops->fabric_name);
atomic_dec(&tf->tf_access_cnt);
pr_debug("Target_Core_ConfigFS: DEREGISTER -> Releasing ci"
@@ -342,17 +345,20 @@ EXPORT_SYMBOL(target_undepend_item);
static int target_fabric_tf_ops_check(const struct target_core_fabric_ops *tfo)
{
- if (!tfo->name) {
- pr_err("Missing tfo->name\n");
- return -EINVAL;
+ if (tfo->fabric_alias) {
+ if (strlen(tfo->fabric_alias) >= TARGET_FABRIC_NAME_SIZE) {
+ pr_err("Passed alias: %s exceeds "
+ "TARGET_FABRIC_NAME_SIZE\n", tfo->fabric_alias);
+ return -EINVAL;
+ }
}
- if (strlen(tfo->name) >= TARGET_FABRIC_NAME_SIZE) {
- pr_err("Passed name: %s exceeds TARGET_FABRIC"
- "_NAME_SIZE\n", tfo->name);
+ if (!tfo->fabric_name) {
+ pr_err("Missing tfo->fabric_name\n");
return -EINVAL;
}
- if (!tfo->get_fabric_name) {
- pr_err("Missing tfo->get_fabric_name()\n");
+ if (strlen(tfo->fabric_name) >= TARGET_FABRIC_NAME_SIZE) {
+ pr_err("Passed name: %s exceeds "
+ "TARGET_FABRIC_NAME_SIZE\n", tfo->fabric_name);
return -EINVAL;
}
if (!tfo->tpg_get_wwn) {
@@ -486,7 +492,7 @@ void target_unregister_template(const struct target_core_fabric_ops *fo)
mutex_lock(&g_tf_lock);
list_for_each_entry(t, &g_tf_list, tf_list) {
- if (!strcmp(t->tf_ops->name, fo->name)) {
+ if (!strcmp(t->tf_ops->fabric_name, fo->fabric_name)) {
BUG_ON(atomic_read(&t->tf_access_cnt));
list_del(&t->tf_list);
mutex_unlock(&g_tf_lock);
@@ -532,9 +538,9 @@ DEF_CONFIGFS_ATTRIB_SHOW(emulate_tpu);
DEF_CONFIGFS_ATTRIB_SHOW(emulate_tpws);
DEF_CONFIGFS_ATTRIB_SHOW(emulate_caw);
DEF_CONFIGFS_ATTRIB_SHOW(emulate_3pc);
+DEF_CONFIGFS_ATTRIB_SHOW(emulate_pr);
DEF_CONFIGFS_ATTRIB_SHOW(pi_prot_type);
DEF_CONFIGFS_ATTRIB_SHOW(hw_pi_prot_type);
-DEF_CONFIGFS_ATTRIB_SHOW(pi_prot_format);
DEF_CONFIGFS_ATTRIB_SHOW(pi_prot_verify);
DEF_CONFIGFS_ATTRIB_SHOW(enforce_pr_isids);
DEF_CONFIGFS_ATTRIB_SHOW(is_nonrot);
@@ -592,6 +598,7 @@ static ssize_t _name##_store(struct config_item *item, const char *page, \
DEF_CONFIGFS_ATTRIB_STORE_BOOL(emulate_fua_write);
DEF_CONFIGFS_ATTRIB_STORE_BOOL(emulate_caw);
DEF_CONFIGFS_ATTRIB_STORE_BOOL(emulate_3pc);
+DEF_CONFIGFS_ATTRIB_STORE_BOOL(emulate_pr);
DEF_CONFIGFS_ATTRIB_STORE_BOOL(enforce_pr_isids);
DEF_CONFIGFS_ATTRIB_STORE_BOOL(is_nonrot);
@@ -613,12 +620,17 @@ static void dev_set_t10_wwn_model_alias(struct se_device *dev)
const char *configname;
configname = config_item_name(&dev->dev_group.cg_item);
- if (strlen(configname) >= 16) {
+ if (strlen(configname) >= INQUIRY_MODEL_LEN) {
pr_warn("dev[%p]: Backstore name '%s' is too long for "
- "INQUIRY_MODEL, truncating to 16 bytes\n", dev,
+ "INQUIRY_MODEL, truncating to 15 characters\n", dev,
configname);
}
- snprintf(&dev->t10_wwn.model[0], 16, "%s", configname);
+ /*
+ * XXX We can't use sizeof(dev->t10_wwn.model) (INQUIRY_MODEL_LEN + 1)
+ * here without potentially breaking existing setups, so continue to
+ * truncate one byte shorter than what can be carried in INQUIRY.
+ */
+ strlcpy(dev->t10_wwn.model, configname, INQUIRY_MODEL_LEN);
}
static ssize_t emulate_model_alias_store(struct config_item *item,
@@ -640,11 +652,12 @@ static ssize_t emulate_model_alias_store(struct config_item *item,
if (ret < 0)
return ret;
+ BUILD_BUG_ON(sizeof(dev->t10_wwn.model) != INQUIRY_MODEL_LEN + 1);
if (flag) {
dev_set_t10_wwn_model_alias(dev);
} else {
- strncpy(&dev->t10_wwn.model[0],
- dev->transport->inquiry_prod, 16);
+ strlcpy(dev->t10_wwn.model, dev->transport->inquiry_prod,
+ sizeof(dev->t10_wwn.model));
}
da->emulate_model_alias = flag;
return count;
@@ -1116,9 +1129,10 @@ CONFIGFS_ATTR(, emulate_tpu);
CONFIGFS_ATTR(, emulate_tpws);
CONFIGFS_ATTR(, emulate_caw);
CONFIGFS_ATTR(, emulate_3pc);
+CONFIGFS_ATTR(, emulate_pr);
CONFIGFS_ATTR(, pi_prot_type);
CONFIGFS_ATTR_RO(, hw_pi_prot_type);
-CONFIGFS_ATTR(, pi_prot_format);
+CONFIGFS_ATTR_WO(, pi_prot_format);
CONFIGFS_ATTR(, pi_prot_verify);
CONFIGFS_ATTR(, enforce_pr_isids);
CONFIGFS_ATTR(, is_nonrot);
@@ -1156,6 +1170,7 @@ struct configfs_attribute *sbc_attrib_attrs[] = {
&attr_emulate_tpws,
&attr_emulate_caw,
&attr_emulate_3pc,
+ &attr_emulate_pr,
&attr_pi_prot_type,
&attr_hw_pi_prot_type,
&attr_pi_prot_format,
@@ -1211,6 +1226,74 @@ static struct t10_wwn *to_t10_wwn(struct config_item *item)
}
/*
+ * STANDARD and VPD page 0x83 T10 Vendor Identification
+ */
+static ssize_t target_wwn_vendor_id_show(struct config_item *item,
+ char *page)
+{
+ return sprintf(page, "%s\n", &to_t10_wwn(item)->vendor[0]);
+}
+
+static ssize_t target_wwn_vendor_id_store(struct config_item *item,
+ const char *page, size_t count)
+{
+ struct t10_wwn *t10_wwn = to_t10_wwn(item);
+ struct se_device *dev = t10_wwn->t10_dev;
+ /* +2 to allow for a trailing (stripped) '\n' and null-terminator */
+ unsigned char buf[INQUIRY_VENDOR_LEN + 2];
+ char *stripped = NULL;
+ size_t len;
+ int i;
+
+ len = strlcpy(buf, page, sizeof(buf));
+ if (len < sizeof(buf)) {
+ /* Strip any newline added from userspace. */
+ stripped = strstrip(buf);
+ len = strlen(stripped);
+ }
+ if (len > INQUIRY_VENDOR_LEN) {
+ pr_err("Emulated T10 Vendor Identification exceeds"
+ " INQUIRY_VENDOR_LEN: " __stringify(INQUIRY_VENDOR_LEN)
+ "\n");
+ return -EOVERFLOW;
+ }
+
+ /*
+ * SPC 4.3.1:
+ * ASCII data fields shall contain only ASCII printable characters (i.e.,
+ * code values 20h to 7Eh) and may be terminated with one or more ASCII
+ * null (00h) characters.
+ */
+ for (i = 0; i < len; i++) {
+ if ((stripped[i] < 0x20) || (stripped[i] > 0x7E)) {
+ pr_err("Emulated T10 Vendor Identification contains"
+ " non-ASCII-printable characters\n");
+ return -EINVAL;
+ }
+ }
+
+ /*
+ * Check to see if any active exports exist. If they do exist, fail
+ * here as changing this information on the fly (underneath the
+ * initiator side OS dependent multipath code) could cause negative
+ * effects.
+ */
+ if (dev->export_count) {
+ pr_err("Unable to set T10 Vendor Identification while"
+ " active %d exports exist\n", dev->export_count);
+ return -EINVAL;
+ }
+
+ BUILD_BUG_ON(sizeof(dev->t10_wwn.vendor) != INQUIRY_VENDOR_LEN + 1);
+ strlcpy(dev->t10_wwn.vendor, stripped, sizeof(dev->t10_wwn.vendor));
+
+ pr_debug("Target_Core_ConfigFS: Set emulated T10 Vendor Identification:"
+ " %s\n", dev->t10_wwn.vendor);
+
+ return count;
+}
+
+/*
* VPD page 0x80 Unit serial
*/
static ssize_t target_wwn_vpd_unit_serial_show(struct config_item *item,
@@ -1356,6 +1439,7 @@ DEF_DEV_WWN_ASSOC_SHOW(vpd_assoc_target_port, 0x10);
/* VPD page 0x83 Association: SCSI Target Device */
DEF_DEV_WWN_ASSOC_SHOW(vpd_assoc_scsi_target_device, 0x20);
+CONFIGFS_ATTR(target_wwn_, vendor_id);
CONFIGFS_ATTR(target_wwn_, vpd_unit_serial);
CONFIGFS_ATTR_RO(target_wwn_, vpd_protocol_identifier);
CONFIGFS_ATTR_RO(target_wwn_, vpd_assoc_logical_unit);
@@ -1363,6 +1447,7 @@ CONFIGFS_ATTR_RO(target_wwn_, vpd_assoc_target_port);
CONFIGFS_ATTR_RO(target_wwn_, vpd_assoc_scsi_target_device);
static struct configfs_attribute *target_core_dev_wwn_attrs[] = {
+ &target_wwn_attr_vendor_id,
&target_wwn_attr_vpd_unit_serial,
&target_wwn_attr_vpd_protocol_identifier,
&target_wwn_attr_vpd_assoc_logical_unit,
@@ -1400,7 +1485,7 @@ static ssize_t target_core_dev_pr_show_spc3_res(struct se_device *dev,
core_pr_dump_initiator_port(pr_reg, i_buf, PR_REG_ISID_ID_LEN);
return sprintf(page, "SPC-3 Reservation: %s Initiator: %s%s\n",
- se_nacl->se_tpg->se_tpg_tfo->get_fabric_name(),
+ se_nacl->se_tpg->se_tpg_tfo->fabric_name,
se_nacl->initiatorname, i_buf);
}
@@ -1414,7 +1499,7 @@ static ssize_t target_core_dev_pr_show_spc2_res(struct se_device *dev,
if (se_nacl) {
len = sprintf(page,
"SPC-2 Reservation: %s Initiator: %s\n",
- se_nacl->se_tpg->se_tpg_tfo->get_fabric_name(),
+ se_nacl->se_tpg->se_tpg_tfo->fabric_name,
se_nacl->initiatorname);
} else {
len = sprintf(page, "No SPC-2 Reservation holder\n");
@@ -1427,6 +1512,9 @@ static ssize_t target_pr_res_holder_show(struct config_item *item, char *page)
struct se_device *dev = pr_to_dev(item);
int ret;
+ if (!dev->dev_attrib.emulate_pr)
+ return sprintf(page, "SPC_RESERVATIONS_DISABLED\n");
+
if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_PGR)
return sprintf(page, "Passthrough\n");
@@ -1489,13 +1577,13 @@ static ssize_t target_pr_res_pr_holder_tg_port_show(struct config_item *item,
tfo = se_tpg->se_tpg_tfo;
len += sprintf(page+len, "SPC-3 Reservation: %s"
- " Target Node Endpoint: %s\n", tfo->get_fabric_name(),
+ " Target Node Endpoint: %s\n", tfo->fabric_name,
tfo->tpg_get_wwn(se_tpg));
len += sprintf(page+len, "SPC-3 Reservation: Relative Port"
" Identifier Tag: %hu %s Portal Group Tag: %hu"
" %s Logical Unit: %llu\n", pr_reg->tg_pt_sep_rtpi,
- tfo->get_fabric_name(), tfo->tpg_get_tag(se_tpg),
- tfo->get_fabric_name(), pr_reg->pr_aptpl_target_lun);
+ tfo->fabric_name, tfo->tpg_get_tag(se_tpg),
+ tfo->fabric_name, pr_reg->pr_aptpl_target_lun);
out_unlock:
spin_unlock(&dev->dev_reservation_lock);
@@ -1526,7 +1614,7 @@ static ssize_t target_pr_res_pr_registered_i_pts_show(struct config_item *item,
core_pr_dump_initiator_port(pr_reg, i_buf,
PR_REG_ISID_ID_LEN);
sprintf(buf, "%s Node: %s%s Key: 0x%016Lx PRgen: 0x%08x\n",
- tfo->get_fabric_name(),
+ tfo->fabric_name,
pr_reg->pr_reg_nacl->initiatorname, i_buf, pr_reg->pr_res_key,
pr_reg->pr_res_generation);
@@ -1567,12 +1655,14 @@ static ssize_t target_pr_res_type_show(struct config_item *item, char *page)
{
struct se_device *dev = pr_to_dev(item);
+ if (!dev->dev_attrib.emulate_pr)
+ return sprintf(page, "SPC_RESERVATIONS_DISABLED\n");
if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_PGR)
return sprintf(page, "SPC_PASSTHROUGH\n");
- else if (dev->dev_reservation_flags & DRF_SPC2_RESERVATIONS)
+ if (dev->dev_reservation_flags & DRF_SPC2_RESERVATIONS)
return sprintf(page, "SPC2_RESERVATIONS\n");
- else
- return sprintf(page, "SPC3_PERSISTENT_RESERVATIONS\n");
+
+ return sprintf(page, "SPC3_PERSISTENT_RESERVATIONS\n");
}
static ssize_t target_pr_res_aptpl_active_show(struct config_item *item,
@@ -1580,7 +1670,8 @@ static ssize_t target_pr_res_aptpl_active_show(struct config_item *item,
{
struct se_device *dev = pr_to_dev(item);
- if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_PGR)
+ if (!dev->dev_attrib.emulate_pr ||
+ (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_PGR))
return 0;
return sprintf(page, "APTPL Bit Status: %s\n",
@@ -1592,7 +1683,8 @@ static ssize_t target_pr_res_aptpl_metadata_show(struct config_item *item,
{
struct se_device *dev = pr_to_dev(item);
- if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_PGR)
+ if (!dev->dev_attrib.emulate_pr ||
+ (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_PGR))
return 0;
return sprintf(page, "Ready to process PR APTPL metadata..\n");
@@ -1638,7 +1730,8 @@ static ssize_t target_pr_res_aptpl_metadata_store(struct config_item *item,
u16 tpgt = 0;
u8 type = 0;
- if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_PGR)
+ if (!dev->dev_attrib.emulate_pr ||
+ (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_PGR))
return count;
if (dev->dev_reservation_flags & DRF_SPC2_RESERVATIONS)
return count;
@@ -2746,7 +2839,7 @@ static ssize_t target_tg_pt_gp_members_show(struct config_item *item,
struct se_portal_group *tpg = lun->lun_tpg;
cur_len = snprintf(buf, TG_PT_GROUP_NAME_BUF, "%s/%s/tpgt_%hu"
- "/%s\n", tpg->se_tpg_tfo->get_fabric_name(),
+ "/%s\n", tpg->se_tpg_tfo->fabric_name,
tpg->se_tpg_tfo->tpg_get_wwn(tpg),
tpg->se_tpg_tfo->tpg_get_tag(tpg),
config_item_name(&lun->lun_group.cg_item));
diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
index 47b5ef153135..93c56f4a9911 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -95,7 +95,7 @@ transport_lookup_cmd_lun(struct se_cmd *se_cmd, u64 unpacked_lun)
deve->lun_access_ro) {
pr_err("TARGET_CORE[%s]: Detected WRITE_PROTECTED LUN"
" Access for 0x%08llx\n",
- se_cmd->se_tfo->get_fabric_name(),
+ se_cmd->se_tfo->fabric_name,
unpacked_lun);
rcu_read_unlock();
ret = TCM_WRITE_PROTECTED;
@@ -114,7 +114,7 @@ out_unlock:
if (unpacked_lun != 0) {
pr_err("TARGET_CORE[%s]: Detected NON_EXISTENT_LUN"
" Access for 0x%08llx\n",
- se_cmd->se_tfo->get_fabric_name(),
+ se_cmd->se_tfo->fabric_name,
unpacked_lun);
return TCM_NON_EXISTENT_LUN;
}
@@ -188,7 +188,7 @@ out_unlock:
if (!se_lun) {
pr_debug("TARGET_CORE[%s]: Detected NON_EXISTENT_LUN"
" Access for 0x%08llx\n",
- se_cmd->se_tfo->get_fabric_name(),
+ se_cmd->se_tfo->fabric_name,
unpacked_lun);
return -ENODEV;
}
@@ -237,7 +237,7 @@ struct se_dev_entry *core_get_se_deve_from_rtpi(
if (!lun) {
pr_err("%s device entries device pointer is"
" NULL, but Initiator has access.\n",
- tpg->se_tpg_tfo->get_fabric_name());
+ tpg->se_tpg_tfo->fabric_name);
continue;
}
if (lun->lun_rtpi != rtpi)
@@ -571,9 +571,9 @@ int core_dev_add_lun(
return rc;
pr_debug("%s_TPG[%u]_LUN[%llu] - Activated %s Logical Unit from"
- " CORE HBA: %u\n", tpg->se_tpg_tfo->get_fabric_name(),
+ " CORE HBA: %u\n", tpg->se_tpg_tfo->fabric_name,
tpg->se_tpg_tfo->tpg_get_tag(tpg), lun->unpacked_lun,
- tpg->se_tpg_tfo->get_fabric_name(), dev->se_hba->hba_id);
+ tpg->se_tpg_tfo->fabric_name, dev->se_hba->hba_id);
/*
* Update LUN maps for dynamically added initiators when
* generate_node_acl is enabled.
@@ -604,9 +604,9 @@ void core_dev_del_lun(
struct se_lun *lun)
{
pr_debug("%s_TPG[%u]_LUN[%llu] - Deactivating %s Logical Unit from"
- " device object\n", tpg->se_tpg_tfo->get_fabric_name(),
+ " device object\n", tpg->se_tpg_tfo->fabric_name,
tpg->se_tpg_tfo->tpg_get_tag(tpg), lun->unpacked_lun,
- tpg->se_tpg_tfo->get_fabric_name());
+ tpg->se_tpg_tfo->fabric_name);
core_tpg_remove_lun(tpg, lun);
}
@@ -621,7 +621,7 @@ struct se_lun_acl *core_dev_init_initiator_node_lun_acl(
if (strlen(nacl->initiatorname) >= TRANSPORT_IQN_LEN) {
pr_err("%s InitiatorName exceeds maximum size.\n",
- tpg->se_tpg_tfo->get_fabric_name());
+ tpg->se_tpg_tfo->fabric_name);
*ret = -EOVERFLOW;
return NULL;
}
@@ -664,7 +664,7 @@ int core_dev_add_initiator_node_lun_acl(
return -EINVAL;
pr_debug("%s_TPG[%hu]_LUN[%llu->%llu] - Added %s ACL for "
- " InitiatorNode: %s\n", tpg->se_tpg_tfo->get_fabric_name(),
+ " InitiatorNode: %s\n", tpg->se_tpg_tfo->fabric_name,
tpg->se_tpg_tfo->tpg_get_tag(tpg), lun->unpacked_lun, lacl->mapped_lun,
lun_access_ro ? "RO" : "RW",
nacl->initiatorname);
@@ -697,7 +697,7 @@ int core_dev_del_initiator_node_lun_acl(
pr_debug("%s_TPG[%hu]_LUN[%llu] - Removed ACL for"
" InitiatorNode: %s Mapped LUN: %llu\n",
- tpg->se_tpg_tfo->get_fabric_name(),
+ tpg->se_tpg_tfo->fabric_name,
tpg->se_tpg_tfo->tpg_get_tag(tpg), lun->unpacked_lun,
nacl->initiatorname, lacl->mapped_lun);
@@ -709,9 +709,9 @@ void core_dev_free_initiator_node_lun_acl(
struct se_lun_acl *lacl)
{
pr_debug("%s_TPG[%hu] - Freeing ACL for %s InitiatorNode: %s"
- " Mapped LUN: %llu\n", tpg->se_tpg_tfo->get_fabric_name(),
+ " Mapped LUN: %llu\n", tpg->se_tpg_tfo->fabric_name,
tpg->se_tpg_tfo->tpg_get_tag(tpg),
- tpg->se_tpg_tfo->get_fabric_name(),
+ tpg->se_tpg_tfo->fabric_name,
lacl->se_lun_nacl->initiatorname, lacl->mapped_lun);
kfree(lacl);
@@ -720,36 +720,17 @@ void core_dev_free_initiator_node_lun_acl(
static void scsi_dump_inquiry(struct se_device *dev)
{
struct t10_wwn *wwn = &dev->t10_wwn;
- char buf[17];
- int i, device_type;
+ int device_type = dev->transport->get_device_type(dev);
+
/*
* Print Linux/SCSI style INQUIRY formatting to the kernel ring buffer
*/
- for (i = 0; i < 8; i++)
- if (wwn->vendor[i] >= 0x20)
- buf[i] = wwn->vendor[i];
- else
- buf[i] = ' ';
- buf[i] = '\0';
- pr_debug(" Vendor: %s\n", buf);
-
- for (i = 0; i < 16; i++)
- if (wwn->model[i] >= 0x20)
- buf[i] = wwn->model[i];
- else
- buf[i] = ' ';
- buf[i] = '\0';
- pr_debug(" Model: %s\n", buf);
-
- for (i = 0; i < 4; i++)
- if (wwn->revision[i] >= 0x20)
- buf[i] = wwn->revision[i];
- else
- buf[i] = ' ';
- buf[i] = '\0';
- pr_debug(" Revision: %s\n", buf);
-
- device_type = dev->transport->get_device_type(dev);
+ pr_debug(" Vendor: %-" __stringify(INQUIRY_VENDOR_LEN) "s\n",
+ wwn->vendor);
+ pr_debug(" Model: %-" __stringify(INQUIRY_MODEL_LEN) "s\n",
+ wwn->model);
+ pr_debug(" Revision: %-" __stringify(INQUIRY_REVISION_LEN) "s\n",
+ wwn->revision);
pr_debug(" Type: %s ", scsi_device_type(device_type));
}
@@ -805,6 +786,7 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name)
dev->dev_attrib.emulate_tpws = DA_EMULATE_TPWS;
dev->dev_attrib.emulate_caw = DA_EMULATE_CAW;
dev->dev_attrib.emulate_3pc = DA_EMULATE_3PC;
+ dev->dev_attrib.emulate_pr = DA_EMULATE_PR;
dev->dev_attrib.pi_prot_type = TARGET_DIF_TYPE0_PROT;
dev->dev_attrib.enforce_pr_isids = DA_ENFORCE_PR_ISIDS;
dev->dev_attrib.force_pr_aptpl = DA_FORCE_PR_APTPL;
@@ -822,13 +804,19 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name)
xcopy_lun = &dev->xcopy_lun;
rcu_assign_pointer(xcopy_lun->lun_se_dev, dev);
- init_completion(&xcopy_lun->lun_ref_comp);
init_completion(&xcopy_lun->lun_shutdown_comp);
INIT_LIST_HEAD(&xcopy_lun->lun_deve_list);
INIT_LIST_HEAD(&xcopy_lun->lun_dev_link);
mutex_init(&xcopy_lun->lun_tg_pt_md_mutex);
xcopy_lun->lun_tpg = &xcopy_pt_tpg;
+ /* Preload the default INQUIRY const values */
+ strlcpy(dev->t10_wwn.vendor, "LIO-ORG", sizeof(dev->t10_wwn.vendor));
+ strlcpy(dev->t10_wwn.model, dev->transport->inquiry_prod,
+ sizeof(dev->t10_wwn.model));
+ strlcpy(dev->t10_wwn.revision, dev->transport->inquiry_rev,
+ sizeof(dev->t10_wwn.revision));
+
return dev;
}
@@ -987,35 +975,10 @@ int target_configure_device(struct se_device *dev)
goto out_destroy_device;
/*
- * Startup the struct se_device processing thread
- */
- dev->tmr_wq = alloc_workqueue("tmr-%s", WQ_MEM_RECLAIM | WQ_UNBOUND, 1,
- dev->transport->name);
- if (!dev->tmr_wq) {
- pr_err("Unable to create tmr workqueue for %s\n",
- dev->transport->name);
- ret = -ENOMEM;
- goto out_free_alua;
- }
-
- /*
* Setup work_queue for QUEUE_FULL
*/
INIT_WORK(&dev->qf_work_queue, target_qf_do_work);
- /*
- * Preload the initial INQUIRY const values if we are doing
- * anything virtual (IBLOCK, FILEIO, RAMDISK), but not for TCM/pSCSI
- * passthrough because this is being provided by the backend LLD.
- */
- if (!(dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH)) {
- strncpy(&dev->t10_wwn.vendor[0], "LIO-ORG", 8);
- strncpy(&dev->t10_wwn.model[0],
- dev->transport->inquiry_prod, 16);
- strncpy(&dev->t10_wwn.revision[0],
- dev->transport->inquiry_rev, 4);
- }
-
scsi_dump_inquiry(dev);
spin_lock(&hba->device_lock);
@@ -1026,8 +989,6 @@ int target_configure_device(struct se_device *dev)
return 0;
-out_free_alua:
- core_alua_free_lu_gp_mem(dev);
out_destroy_device:
dev->transport->destroy_device(dev);
out_free_index:
@@ -1046,8 +1007,6 @@ void target_free_device(struct se_device *dev)
WARN_ON(!list_empty(&dev->dev_sep_list));
if (target_dev_configured(dev)) {
- destroy_workqueue(dev->tmr_wq);
-
dev->transport->destroy_device(dev);
mutex_lock(&device_mutex);
@@ -1159,6 +1118,18 @@ passthrough_parse_cdb(struct se_cmd *cmd,
}
/*
+ * With emulate_pr disabled, all reservation requests should fail,
+ * regardless of whether or not TRANSPORT_FLAG_PASSTHROUGH_PGR is set.
+ */
+ if (!dev->dev_attrib.emulate_pr &&
+ ((cdb[0] == PERSISTENT_RESERVE_IN) ||
+ (cdb[0] == PERSISTENT_RESERVE_OUT) ||
+ (cdb[0] == RELEASE || cdb[0] == RELEASE_10) ||
+ (cdb[0] == RESERVE || cdb[0] == RESERVE_10))) {
+ return TCM_UNSUPPORTED_SCSI_OPCODE;
+ }
+
+ /*
* For PERSISTENT RESERVE IN/OUT, RELEASE, and RESERVE we need to
* emulate the response, since tcmu does not have the information
* required to process these commands.
diff --git a/drivers/target/target_core_fabric_configfs.c b/drivers/target/target_core_fabric_configfs.c
index aa2f4f632ebe..9a6e20a2af7d 100644
--- a/drivers/target/target_core_fabric_configfs.c
+++ b/drivers/target/target_core_fabric_configfs.c
@@ -203,7 +203,7 @@ static ssize_t target_fabric_mappedlun_write_protect_store(
pr_debug("%s_ConfigFS: Changed Initiator ACL: %s"
" Mapped LUN: %llu Write Protect bit to %s\n",
- se_tpg->se_tpg_tfo->get_fabric_name(),
+ se_tpg->se_tpg_tfo->fabric_name,
se_nacl->initiatorname, lacl->mapped_lun, (wp) ? "ON" : "OFF");
return count;
diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h
index 0c6635587930..853344415963 100644
--- a/drivers/target/target_core_internal.h
+++ b/drivers/target/target_core_internal.h
@@ -138,7 +138,6 @@ int init_se_kmem_caches(void);
void release_se_kmem_caches(void);
u32 scsi_get_new_index(scsi_index_t);
void transport_subsystem_check_init(void);
-int transport_cmd_finish_abort(struct se_cmd *);
unsigned char *transport_dump_cmd_direction(struct se_cmd *);
void transport_dump_dev_state(struct se_device *, char *, int *);
void transport_dump_dev_info(struct se_device *, struct se_lun *,
@@ -148,7 +147,6 @@ int transport_dump_vpd_assoc(struct t10_vpd *, unsigned char *, int);
int transport_dump_vpd_ident_type(struct t10_vpd *, unsigned char *, int);
int transport_dump_vpd_ident(struct t10_vpd *, unsigned char *, int);
void transport_clear_lun_ref(struct se_lun *);
-void transport_send_task_abort(struct se_cmd *);
sense_reason_t target_cmd_size_check(struct se_cmd *cmd, unsigned int size);
void target_qf_do_work(struct work_struct *work);
bool target_check_wce(struct se_device *dev);
diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c
index 10db5656fd5d..397f38cb7f4e 100644
--- a/drivers/target/target_core_pr.c
+++ b/drivers/target/target_core_pr.c
@@ -235,7 +235,7 @@ target_scsi2_reservation_release(struct se_cmd *cmd)
tpg = sess->se_tpg;
pr_debug("SCSI-2 Released reservation for %s LUN: %llu ->"
" MAPPED LUN: %llu for %s\n",
- tpg->se_tpg_tfo->get_fabric_name(),
+ tpg->se_tpg_tfo->fabric_name,
cmd->se_lun->unpacked_lun, cmd->orig_fe_lun,
sess->se_node_acl->initiatorname);
@@ -278,7 +278,7 @@ target_scsi2_reservation_reserve(struct se_cmd *cmd)
if (dev->dev_reserved_node_acl &&
(dev->dev_reserved_node_acl != sess->se_node_acl)) {
pr_err("SCSI-2 RESERVATION CONFLIFT for %s fabric\n",
- tpg->se_tpg_tfo->get_fabric_name());
+ tpg->se_tpg_tfo->fabric_name);
pr_err("Original reserver LUN: %llu %s\n",
cmd->se_lun->unpacked_lun,
dev->dev_reserved_node_acl->initiatorname);
@@ -297,7 +297,7 @@ target_scsi2_reservation_reserve(struct se_cmd *cmd)
dev->dev_reservation_flags |= DRF_SPC2_RESERVATIONS_WITH_ISID;
}
pr_debug("SCSI-2 Reserved %s LUN: %llu -> MAPPED LUN: %llu"
- " for %s\n", tpg->se_tpg_tfo->get_fabric_name(),
+ " for %s\n", tpg->se_tpg_tfo->fabric_name,
cmd->se_lun->unpacked_lun, cmd->orig_fe_lun,
sess->se_node_acl->initiatorname);
@@ -914,11 +914,11 @@ static void core_scsi3_aptpl_reserve(
pr_debug("SPC-3 PR [%s] Service Action: APTPL RESERVE created"
" new reservation holder TYPE: %s ALL_TG_PT: %d\n",
- tpg->se_tpg_tfo->get_fabric_name(),
+ tpg->se_tpg_tfo->fabric_name,
core_scsi3_pr_dump_type(pr_reg->pr_res_type),
(pr_reg->pr_reg_all_tg_pt) ? 1 : 0);
pr_debug("SPC-3 PR [%s] RESERVE Node: %s%s\n",
- tpg->se_tpg_tfo->get_fabric_name(), node_acl->initiatorname,
+ tpg->se_tpg_tfo->fabric_name, node_acl->initiatorname,
i_buf);
}
@@ -1036,19 +1036,19 @@ static void __core_scsi3_dump_registration(
core_pr_dump_initiator_port(pr_reg, i_buf, PR_REG_ISID_ID_LEN);
pr_debug("SPC-3 PR [%s] Service Action: REGISTER%s Initiator"
- " Node: %s%s\n", tfo->get_fabric_name(), (register_type == REGISTER_AND_MOVE) ?
+ " Node: %s%s\n", tfo->fabric_name, (register_type == REGISTER_AND_MOVE) ?
"_AND_MOVE" : (register_type == REGISTER_AND_IGNORE_EXISTING_KEY) ?
"_AND_IGNORE_EXISTING_KEY" : "", nacl->initiatorname,
i_buf);
pr_debug("SPC-3 PR [%s] registration on Target Port: %s,0x%04x\n",
- tfo->get_fabric_name(), tfo->tpg_get_wwn(se_tpg),
+ tfo->fabric_name, tfo->tpg_get_wwn(se_tpg),
tfo->tpg_get_tag(se_tpg));
pr_debug("SPC-3 PR [%s] for %s TCM Subsystem %s Object Target"
- " Port(s)\n", tfo->get_fabric_name(),
+ " Port(s)\n", tfo->fabric_name,
(pr_reg->pr_reg_all_tg_pt) ? "ALL" : "SINGLE",
dev->transport->name);
pr_debug("SPC-3 PR [%s] SA Res Key: 0x%016Lx PRgeneration:"
- " 0x%08x APTPL: %d\n", tfo->get_fabric_name(),
+ " 0x%08x APTPL: %d\n", tfo->fabric_name,
pr_reg->pr_res_key, pr_reg->pr_res_generation,
pr_reg->pr_reg_aptpl);
}
@@ -1329,7 +1329,7 @@ static void __core_scsi3_free_registration(
*/
while (atomic_read(&pr_reg->pr_res_holders) != 0) {
pr_debug("SPC-3 PR [%s] waiting for pr_res_holders\n",
- tfo->get_fabric_name());
+ tfo->fabric_name);
cpu_relax();
}
@@ -1341,15 +1341,15 @@ static void __core_scsi3_free_registration(
spin_lock(&pr_tmpl->registration_lock);
pr_debug("SPC-3 PR [%s] Service Action: UNREGISTER Initiator"
- " Node: %s%s\n", tfo->get_fabric_name(),
+ " Node: %s%s\n", tfo->fabric_name,
pr_reg->pr_reg_nacl->initiatorname,
i_buf);
pr_debug("SPC-3 PR [%s] for %s TCM Subsystem %s Object Target"
- " Port(s)\n", tfo->get_fabric_name(),
+ " Port(s)\n", tfo->fabric_name,
(pr_reg->pr_reg_all_tg_pt) ? "ALL" : "SINGLE",
dev->transport->name);
pr_debug("SPC-3 PR [%s] SA Res Key: 0x%016Lx PRgeneration:"
- " 0x%08x\n", tfo->get_fabric_name(), pr_reg->pr_res_key,
+ " 0x%08x\n", tfo->fabric_name, pr_reg->pr_res_key,
pr_reg->pr_res_generation);
if (!preempt_and_abort_list) {
@@ -1645,7 +1645,7 @@ core_scsi3_decode_spec_i_port(
dest_tpg = tmp_tpg;
pr_debug("SPC-3 PR SPEC_I_PT: Located %s Node:"
" %s Port RTPI: %hu\n",
- dest_tpg->se_tpg_tfo->get_fabric_name(),
+ dest_tpg->se_tpg_tfo->fabric_name,
dest_node_acl->initiatorname, dest_rtpi);
spin_lock(&dev->se_port_lock);
@@ -1662,7 +1662,7 @@ core_scsi3_decode_spec_i_port(
pr_debug("SPC-3 PR SPEC_I_PT: Got %s data_length: %u tpdl: %u"
" tid_len: %d for %s + %s\n",
- dest_tpg->se_tpg_tfo->get_fabric_name(), cmd->data_length,
+ dest_tpg->se_tpg_tfo->fabric_name, cmd->data_length,
tpdl, tid_len, i_str, iport_ptr);
if (tid_len > tpdl) {
@@ -1683,7 +1683,7 @@ core_scsi3_decode_spec_i_port(
if (!dest_se_deve) {
pr_err("Unable to locate %s dest_se_deve"
" from destination RTPI: %hu\n",
- dest_tpg->se_tpg_tfo->get_fabric_name(),
+ dest_tpg->se_tpg_tfo->fabric_name,
dest_rtpi);
core_scsi3_nodeacl_undepend_item(dest_node_acl);
@@ -1704,7 +1704,7 @@ core_scsi3_decode_spec_i_port(
pr_debug("SPC-3 PR SPEC_I_PT: Located %s Node: %s"
" dest_se_deve mapped_lun: %llu\n",
- dest_tpg->se_tpg_tfo->get_fabric_name(),
+ dest_tpg->se_tpg_tfo->fabric_name,
dest_node_acl->initiatorname, dest_se_deve->mapped_lun);
/*
@@ -1815,7 +1815,7 @@ core_scsi3_decode_spec_i_port(
pr_debug("SPC-3 PR [%s] SPEC_I_PT: Successfully"
" registered Transport ID for Node: %s%s Mapped LUN:"
- " %llu\n", dest_tpg->se_tpg_tfo->get_fabric_name(),
+ " %llu\n", dest_tpg->se_tpg_tfo->fabric_name,
dest_node_acl->initiatorname, i_buf, (dest_se_deve) ?
dest_se_deve->mapped_lun : 0);
@@ -1913,7 +1913,7 @@ static int core_scsi3_update_aptpl_buf(
"res_holder=1\nres_type=%02x\n"
"res_scope=%02x\nres_all_tg_pt=%d\n"
"mapped_lun=%llu\n", reg_count,
- tpg->se_tpg_tfo->get_fabric_name(),
+ tpg->se_tpg_tfo->fabric_name,
pr_reg->pr_reg_nacl->initiatorname, isid_buf,
pr_reg->pr_res_key, pr_reg->pr_res_type,
pr_reg->pr_res_scope, pr_reg->pr_reg_all_tg_pt,
@@ -1923,7 +1923,7 @@ static int core_scsi3_update_aptpl_buf(
"initiator_fabric=%s\ninitiator_node=%s\n%s"
"sa_res_key=%llu\nres_holder=0\n"
"res_all_tg_pt=%d\nmapped_lun=%llu\n",
- reg_count, tpg->se_tpg_tfo->get_fabric_name(),
+ reg_count, tpg->se_tpg_tfo->fabric_name,
pr_reg->pr_reg_nacl->initiatorname, isid_buf,
pr_reg->pr_res_key, pr_reg->pr_reg_all_tg_pt,
pr_reg->pr_res_mapped_lun);
@@ -1942,7 +1942,7 @@ static int core_scsi3_update_aptpl_buf(
*/
snprintf(tmp, 512, "target_fabric=%s\ntarget_node=%s\n"
"tpgt=%hu\nport_rtpi=%hu\ntarget_lun=%llu\nPR_REG_END:"
- " %d\n", tpg->se_tpg_tfo->get_fabric_name(),
+ " %d\n", tpg->se_tpg_tfo->fabric_name,
tpg->se_tpg_tfo->tpg_get_wwn(tpg),
tpg->se_tpg_tfo->tpg_get_tag(tpg),
pr_reg->tg_pt_sep_rtpi, pr_reg->pr_aptpl_target_lun,
@@ -2168,7 +2168,7 @@ core_scsi3_emulate_pro_register(struct se_cmd *cmd, u64 res_key, u64 sa_res_key,
pr_reg->pr_res_key = sa_res_key;
pr_debug("SPC-3 PR [%s] REGISTER%s: Changed Reservation"
" Key for %s to: 0x%016Lx PRgeneration:"
- " 0x%08x\n", cmd->se_tfo->get_fabric_name(),
+ " 0x%08x\n", cmd->se_tfo->fabric_name,
(register_type == REGISTER_AND_IGNORE_EXISTING_KEY) ? "_AND_IGNORE_EXISTING_KEY" : "",
pr_reg->pr_reg_nacl->initiatorname,
pr_reg->pr_res_key, pr_reg->pr_res_generation);
@@ -2356,9 +2356,9 @@ core_scsi3_pro_reserve(struct se_cmd *cmd, int type, int scope, u64 res_key)
pr_err("SPC-3 PR: Attempted RESERVE from"
" [%s]: %s while reservation already held by"
" [%s]: %s, returning RESERVATION_CONFLICT\n",
- cmd->se_tfo->get_fabric_name(),
+ cmd->se_tfo->fabric_name,
se_sess->se_node_acl->initiatorname,
- pr_res_nacl->se_tpg->se_tpg_tfo->get_fabric_name(),
+ pr_res_nacl->se_tpg->se_tpg_tfo->fabric_name,
pr_res_holder->pr_reg_nacl->initiatorname);
spin_unlock(&dev->dev_reservation_lock);
@@ -2379,9 +2379,9 @@ core_scsi3_pro_reserve(struct se_cmd *cmd, int type, int scope, u64 res_key)
" [%s]: %s trying to change TYPE and/or SCOPE,"
" while reservation already held by [%s]: %s,"
" returning RESERVATION_CONFLICT\n",
- cmd->se_tfo->get_fabric_name(),
+ cmd->se_tfo->fabric_name,
se_sess->se_node_acl->initiatorname,
- pr_res_nacl->se_tpg->se_tpg_tfo->get_fabric_name(),
+ pr_res_nacl->se_tpg->se_tpg_tfo->fabric_name,
pr_res_holder->pr_reg_nacl->initiatorname);
spin_unlock(&dev->dev_reservation_lock);
@@ -2414,10 +2414,10 @@ core_scsi3_pro_reserve(struct se_cmd *cmd, int type, int scope, u64 res_key)
pr_debug("SPC-3 PR [%s] Service Action: RESERVE created new"
" reservation holder TYPE: %s ALL_TG_PT: %d\n",
- cmd->se_tfo->get_fabric_name(), core_scsi3_pr_dump_type(type),
+ cmd->se_tfo->fabric_name, core_scsi3_pr_dump_type(type),
(pr_reg->pr_reg_all_tg_pt) ? 1 : 0);
pr_debug("SPC-3 PR [%s] RESERVE Node: %s%s\n",
- cmd->se_tfo->get_fabric_name(),
+ cmd->se_tfo->fabric_name,
se_sess->se_node_acl->initiatorname,
i_buf);
spin_unlock(&dev->dev_reservation_lock);
@@ -2506,12 +2506,12 @@ out:
if (!dev->dev_pr_res_holder) {
pr_debug("SPC-3 PR [%s] Service Action: %s RELEASE cleared"
" reservation holder TYPE: %s ALL_TG_PT: %d\n",
- tfo->get_fabric_name(), (explicit) ? "explicit" :
+ tfo->fabric_name, (explicit) ? "explicit" :
"implicit", core_scsi3_pr_dump_type(pr_res_type),
(pr_reg->pr_reg_all_tg_pt) ? 1 : 0);
}
pr_debug("SPC-3 PR [%s] RELEASE Node: %s%s\n",
- tfo->get_fabric_name(), se_nacl->initiatorname,
+ tfo->fabric_name, se_nacl->initiatorname,
i_buf);
/*
* Clear TYPE and SCOPE for the next PROUT Service Action: RESERVE
@@ -2609,9 +2609,9 @@ core_scsi3_emulate_pro_release(struct se_cmd *cmd, int type, int scope,
" reservation from [%s]: %s with different TYPE "
"and/or SCOPE while reservation already held by"
" [%s]: %s, returning RESERVATION_CONFLICT\n",
- cmd->se_tfo->get_fabric_name(),
+ cmd->se_tfo->fabric_name,
se_sess->se_node_acl->initiatorname,
- pr_res_nacl->se_tpg->se_tpg_tfo->get_fabric_name(),
+ pr_res_nacl->se_tpg->se_tpg_tfo->fabric_name,
pr_res_holder->pr_reg_nacl->initiatorname);
spin_unlock(&dev->dev_reservation_lock);
@@ -2752,7 +2752,7 @@ core_scsi3_emulate_pro_clear(struct se_cmd *cmd, u64 res_key)
spin_unlock(&pr_tmpl->registration_lock);
pr_debug("SPC-3 PR [%s] Service Action: CLEAR complete\n",
- cmd->se_tfo->get_fabric_name());
+ cmd->se_tfo->fabric_name);
core_scsi3_update_and_write_aptpl(cmd->se_dev, false);
@@ -2791,11 +2791,11 @@ static void __core_scsi3_complete_pro_preempt(
pr_debug("SPC-3 PR [%s] Service Action: PREEMPT%s created new"
" reservation holder TYPE: %s ALL_TG_PT: %d\n",
- tfo->get_fabric_name(), (preempt_type == PREEMPT_AND_ABORT) ? "_AND_ABORT" : "",
+ tfo->fabric_name, (preempt_type == PREEMPT_AND_ABORT) ? "_AND_ABORT" : "",
core_scsi3_pr_dump_type(type),
(pr_reg->pr_reg_all_tg_pt) ? 1 : 0);
pr_debug("SPC-3 PR [%s] PREEMPT%s from Node: %s%s\n",
- tfo->get_fabric_name(), (preempt_type == PREEMPT_AND_ABORT) ? "_AND_ABORT" : "",
+ tfo->fabric_name, (preempt_type == PREEMPT_AND_ABORT) ? "_AND_ABORT" : "",
nacl->initiatorname, i_buf);
/*
* For PREEMPT_AND_ABORT, add the preempting reservation's
@@ -3282,7 +3282,7 @@ core_scsi3_emulate_pro_register_and_move(struct se_cmd *cmd, u64 res_key,
" proto_ident: 0x%02x does not match ident: 0x%02x"
" from fabric: %s\n", proto_ident,
dest_se_tpg->proto_id,
- dest_tf_ops->get_fabric_name());
+ dest_tf_ops->fabric_name);
ret = TCM_INVALID_PARAMETER_LIST;
goto out;
}
@@ -3299,7 +3299,7 @@ core_scsi3_emulate_pro_register_and_move(struct se_cmd *cmd, u64 res_key,
buf = NULL;
pr_debug("SPC-3 PR [%s] Extracted initiator %s identifier: %s"
- " %s\n", dest_tf_ops->get_fabric_name(), (iport_ptr != NULL) ?
+ " %s\n", dest_tf_ops->fabric_name, (iport_ptr != NULL) ?
"port" : "device", initiator_str, (iport_ptr != NULL) ?
iport_ptr : "");
/*
@@ -3344,7 +3344,7 @@ after_iport_check:
if (!dest_node_acl) {
pr_err("Unable to locate %s dest_node_acl for"
- " TransportID%s\n", dest_tf_ops->get_fabric_name(),
+ " TransportID%s\n", dest_tf_ops->fabric_name,
initiator_str);
ret = TCM_INVALID_PARAMETER_LIST;
goto out;
@@ -3360,7 +3360,7 @@ after_iport_check:
}
pr_debug("SPC-3 PR REGISTER_AND_MOVE: Found %s dest_node_acl:"
- " %s from TransportID\n", dest_tf_ops->get_fabric_name(),
+ " %s from TransportID\n", dest_tf_ops->fabric_name,
dest_node_acl->initiatorname);
/*
@@ -3370,7 +3370,7 @@ after_iport_check:
dest_se_deve = core_get_se_deve_from_rtpi(dest_node_acl, rtpi);
if (!dest_se_deve) {
pr_err("Unable to locate %s dest_se_deve from RTPI:"
- " %hu\n", dest_tf_ops->get_fabric_name(), rtpi);
+ " %hu\n", dest_tf_ops->fabric_name, rtpi);
ret = TCM_INVALID_PARAMETER_LIST;
goto out;
}
@@ -3385,7 +3385,7 @@ after_iport_check:
pr_debug("SPC-3 PR REGISTER_AND_MOVE: Located %s node %s LUN"
" ACL for dest_se_deve->mapped_lun: %llu\n",
- dest_tf_ops->get_fabric_name(), dest_node_acl->initiatorname,
+ dest_tf_ops->fabric_name, dest_node_acl->initiatorname,
dest_se_deve->mapped_lun);
/*
@@ -3501,13 +3501,13 @@ after_iport_check:
pr_debug("SPC-3 PR [%s] Service Action: REGISTER_AND_MOVE"
" created new reservation holder TYPE: %s on object RTPI:"
- " %hu PRGeneration: 0x%08x\n", dest_tf_ops->get_fabric_name(),
+ " %hu PRGeneration: 0x%08x\n", dest_tf_ops->fabric_name,
core_scsi3_pr_dump_type(type), rtpi,
dest_pr_reg->pr_res_generation);
pr_debug("SPC-3 PR Successfully moved reservation from"
" %s Fabric Node: %s%s -> %s Fabric Node: %s %s\n",
- tf_ops->get_fabric_name(), pr_reg_nacl->initiatorname,
- i_buf, dest_tf_ops->get_fabric_name(),
+ tf_ops->fabric_name, pr_reg_nacl->initiatorname,
+ i_buf, dest_tf_ops->fabric_name,
dest_node_acl->initiatorname, (iport_ptr != NULL) ?
iport_ptr : "");
/*
@@ -4095,6 +4095,8 @@ target_check_reservation(struct se_cmd *cmd)
return 0;
if (dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE)
return 0;
+ if (!dev->dev_attrib.emulate_pr)
+ return 0;
if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_PGR)
return 0;
diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c
index 47d76c862014..b5388a106567 100644
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c
@@ -179,20 +179,20 @@ out_free:
static void
pscsi_set_inquiry_info(struct scsi_device *sdev, struct t10_wwn *wwn)
{
- unsigned char *buf;
-
if (sdev->inquiry_len < INQUIRY_LEN)
return;
-
- buf = sdev->inquiry;
- if (!buf)
- return;
/*
- * Use sdev->inquiry from drivers/scsi/scsi_scan.c:scsi_alloc_sdev()
+ * Use sdev->inquiry data from drivers/scsi/scsi_scan.c:scsi_add_lun()
*/
- memcpy(&wwn->vendor[0], &buf[8], sizeof(wwn->vendor));
- memcpy(&wwn->model[0], &buf[16], sizeof(wwn->model));
- memcpy(&wwn->revision[0], &buf[32], sizeof(wwn->revision));
+ BUILD_BUG_ON(sizeof(wwn->vendor) != INQUIRY_VENDOR_LEN + 1);
+ snprintf(wwn->vendor, sizeof(wwn->vendor),
+ "%." __stringify(INQUIRY_VENDOR_LEN) "s", sdev->vendor);
+ BUILD_BUG_ON(sizeof(wwn->model) != INQUIRY_MODEL_LEN + 1);
+ snprintf(wwn->model, sizeof(wwn->model),
+ "%." __stringify(INQUIRY_MODEL_LEN) "s", sdev->model);
+ BUILD_BUG_ON(sizeof(wwn->revision) != INQUIRY_REVISION_LEN + 1);
+ snprintf(wwn->revision, sizeof(wwn->revision),
+ "%." __stringify(INQUIRY_REVISION_LEN) "s", sdev->rev);
}
static int
@@ -811,7 +811,6 @@ static ssize_t pscsi_show_configfs_dev_params(struct se_device *dev, char *b)
struct scsi_device *sd = pdv->pdv_sd;
unsigned char host_id[16];
ssize_t bl;
- int i;
if (phv->phv_mode == PHV_VIRTUAL_HOST_ID)
snprintf(host_id, 16, "%d", pdv->pdv_host_id);
@@ -824,29 +823,12 @@ static ssize_t pscsi_show_configfs_dev_params(struct se_device *dev, char *b)
host_id);
if (sd) {
- bl += sprintf(b + bl, " ");
- bl += sprintf(b + bl, "Vendor: ");
- for (i = 0; i < 8; i++) {
- if (ISPRINT(sd->vendor[i])) /* printable character? */
- bl += sprintf(b + bl, "%c", sd->vendor[i]);
- else
- bl += sprintf(b + bl, " ");
- }
- bl += sprintf(b + bl, " Model: ");
- for (i = 0; i < 16; i++) {
- if (ISPRINT(sd->model[i])) /* printable character ? */
- bl += sprintf(b + bl, "%c", sd->model[i]);
- else
- bl += sprintf(b + bl, " ");
- }
- bl += sprintf(b + bl, " Rev: ");
- for (i = 0; i < 4; i++) {
- if (ISPRINT(sd->rev[i])) /* printable character ? */
- bl += sprintf(b + bl, "%c", sd->rev[i]);
- else
- bl += sprintf(b + bl, " ");
- }
- bl += sprintf(b + bl, "\n");
+ bl += sprintf(b + bl, " Vendor: %."
+ __stringify(INQUIRY_VENDOR_LEN) "s", sd->vendor);
+ bl += sprintf(b + bl, " Model: %."
+ __stringify(INQUIRY_MODEL_LEN) "s", sd->model);
+ bl += sprintf(b + bl, " Rev: %."
+ __stringify(INQUIRY_REVISION_LEN) "s\n", sd->rev);
}
return bl;
}
@@ -1094,7 +1076,7 @@ static void pscsi_req_done(struct request *req, blk_status_t status)
break;
}
- __blk_put_request(req->q, req);
+ blk_put_request(req);
kfree(pt);
}
diff --git a/drivers/target/target_core_spc.c b/drivers/target/target_core_spc.c
index f459118bc11b..47094ae01c04 100644
--- a/drivers/target/target_core_spc.c
+++ b/drivers/target/target_core_spc.c
@@ -108,12 +108,19 @@ spc_emulate_inquiry_std(struct se_cmd *cmd, unsigned char *buf)
buf[7] = 0x2; /* CmdQue=1 */
- memcpy(&buf[8], "LIO-ORG ", 8);
- memset(&buf[16], 0x20, 16);
+ /*
+ * ASCII data fields described as being left-aligned shall have any
+ * unused bytes at the end of the field (i.e., highest offset) and the
+ * unused bytes shall be filled with ASCII space characters (20h).
+ */
+ memset(&buf[8], 0x20,
+ INQUIRY_VENDOR_LEN + INQUIRY_MODEL_LEN + INQUIRY_REVISION_LEN);
+ memcpy(&buf[8], dev->t10_wwn.vendor,
+ strnlen(dev->t10_wwn.vendor, INQUIRY_VENDOR_LEN));
memcpy(&buf[16], dev->t10_wwn.model,
- min_t(size_t, strlen(dev->t10_wwn.model), 16));
+ strnlen(dev->t10_wwn.model, INQUIRY_MODEL_LEN));
memcpy(&buf[32], dev->t10_wwn.revision,
- min_t(size_t, strlen(dev->t10_wwn.revision), 4));
+ strnlen(dev->t10_wwn.revision, INQUIRY_REVISION_LEN));
buf[4] = 31; /* Set additional length to 31 */
return 0;
@@ -251,7 +258,10 @@ check_t10_vend_desc:
buf[off] = 0x2; /* ASCII */
buf[off+1] = 0x1; /* T10 Vendor ID */
buf[off+2] = 0x0;
- memcpy(&buf[off+4], "LIO-ORG", 8);
+ /* left align Vendor ID and pad with spaces */
+ memset(&buf[off+4], 0x20, INQUIRY_VENDOR_LEN);
+ memcpy(&buf[off+4], dev->t10_wwn.vendor,
+ strnlen(dev->t10_wwn.vendor, INQUIRY_VENDOR_LEN));
/* Extra Byte for NULL Terminator */
id_len++;
/* Identifier Length */
@@ -1281,6 +1291,14 @@ spc_parse_cdb(struct se_cmd *cmd, unsigned int *size)
struct se_device *dev = cmd->se_dev;
unsigned char *cdb = cmd->t_task_cdb;
+ if (!dev->dev_attrib.emulate_pr &&
+ ((cdb[0] == PERSISTENT_RESERVE_IN) ||
+ (cdb[0] == PERSISTENT_RESERVE_OUT) ||
+ (cdb[0] == RELEASE || cdb[0] == RELEASE_10) ||
+ (cdb[0] == RESERVE || cdb[0] == RESERVE_10))) {
+ return TCM_UNSUPPORTED_SCSI_OPCODE;
+ }
+
switch (cdb[0]) {
case MODE_SELECT:
*size = cdb[4];
diff --git a/drivers/target/target_core_stat.c b/drivers/target/target_core_stat.c
index f0db91ebd735..8d9ceedfd455 100644
--- a/drivers/target/target_core_stat.c
+++ b/drivers/target/target_core_stat.c
@@ -246,43 +246,25 @@ static ssize_t target_stat_lu_lu_name_show(struct config_item *item, char *page)
static ssize_t target_stat_lu_vend_show(struct config_item *item, char *page)
{
struct se_device *dev = to_stat_lu_dev(item);
- int i;
- char str[sizeof(dev->t10_wwn.vendor)+1];
- /* scsiLuVendorId */
- for (i = 0; i < sizeof(dev->t10_wwn.vendor); i++)
- str[i] = ISPRINT(dev->t10_wwn.vendor[i]) ?
- dev->t10_wwn.vendor[i] : ' ';
- str[i] = '\0';
- return snprintf(page, PAGE_SIZE, "%s\n", str);
+ return snprintf(page, PAGE_SIZE, "%-" __stringify(INQUIRY_VENDOR_LEN)
+ "s\n", dev->t10_wwn.vendor);
}
static ssize_t target_stat_lu_prod_show(struct config_item *item, char *page)
{
struct se_device *dev = to_stat_lu_dev(item);
- int i;
- char str[sizeof(dev->t10_wwn.model)+1];
- /* scsiLuProductId */
- for (i = 0; i < sizeof(dev->t10_wwn.model); i++)
- str[i] = ISPRINT(dev->t10_wwn.model[i]) ?
- dev->t10_wwn.model[i] : ' ';
- str[i] = '\0';
- return snprintf(page, PAGE_SIZE, "%s\n", str);
+ return snprintf(page, PAGE_SIZE, "%-" __stringify(INQUIRY_MODEL_LEN)
+ "s\n", dev->t10_wwn.model);
}
static ssize_t target_stat_lu_rev_show(struct config_item *item, char *page)
{
struct se_device *dev = to_stat_lu_dev(item);
- int i;
- char str[sizeof(dev->t10_wwn.revision)+1];
- /* scsiLuRevisionId */
- for (i = 0; i < sizeof(dev->t10_wwn.revision); i++)
- str[i] = ISPRINT(dev->t10_wwn.revision[i]) ?
- dev->t10_wwn.revision[i] : ' ';
- str[i] = '\0';
- return snprintf(page, PAGE_SIZE, "%s\n", str);
+ return snprintf(page, PAGE_SIZE, "%-" __stringify(INQUIRY_REVISION_LEN)
+ "s\n", dev->t10_wwn.revision);
}
static ssize_t target_stat_lu_dev_type_show(struct config_item *item, char *page)
@@ -612,7 +594,7 @@ static ssize_t target_stat_tgt_port_name_show(struct config_item *item,
dev = rcu_dereference(lun->lun_se_dev);
if (dev)
ret = snprintf(page, PAGE_SIZE, "%sPort#%u\n",
- tpg->se_tpg_tfo->get_fabric_name(),
+ tpg->se_tpg_tfo->fabric_name,
lun->lun_rtpi);
rcu_read_unlock();
return ret;
@@ -767,7 +749,7 @@ static ssize_t target_stat_transport_device_show(struct config_item *item,
if (dev) {
/* scsiTransportType */
ret = snprintf(page, PAGE_SIZE, "scsiTransport%s\n",
- tpg->se_tpg_tfo->get_fabric_name());
+ tpg->se_tpg_tfo->fabric_name);
}
rcu_read_unlock();
return ret;
diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c
index 6d1179a7f043..ad0061e09d4c 100644
--- a/drivers/target/target_core_tmr.c
+++ b/drivers/target/target_core_tmr.c
@@ -163,18 +163,23 @@ void core_tmr_abort_task(
continue;
printk("ABORT_TASK: Found referenced %s task_tag: %llu\n",
- se_cmd->se_tfo->get_fabric_name(), ref_tag);
+ se_cmd->se_tfo->fabric_name, ref_tag);
- if (!__target_check_io_state(se_cmd, se_sess, 0))
+ if (!__target_check_io_state(se_cmd, se_sess,
+ dev->dev_attrib.emulate_tas))
continue;
spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
- cancel_work_sync(&se_cmd->work);
- transport_wait_for_tasks(se_cmd);
+ /*
+ * Ensure that this ABORT request is visible to the LU RESET
+ * code.
+ */
+ if (!tmr->tmr_dev)
+ WARN_ON_ONCE(transport_lookup_tmr_lun(tmr->task_cmd,
+ se_cmd->orig_fe_lun) < 0);
- if (!transport_cmd_finish_abort(se_cmd))
- target_put_sess_cmd(se_cmd);
+ target_put_cmd_and_wait(se_cmd);
printk("ABORT_TASK: Sending TMR_FUNCTION_COMPLETE for"
" ref_tag: %llu\n", ref_tag);
@@ -268,14 +273,28 @@ static void core_tmr_drain_tmr_list(
(preempt_and_abort_list) ? "Preempt" : "", tmr_p,
tmr_p->function, tmr_p->response, cmd->t_state);
- cancel_work_sync(&cmd->work);
- transport_wait_for_tasks(cmd);
-
- if (!transport_cmd_finish_abort(cmd))
- target_put_sess_cmd(cmd);
+ target_put_cmd_and_wait(cmd);
}
}
+/**
+ * core_tmr_drain_state_list() - abort SCSI commands associated with a device
+ *
+ * @dev: Device for which to abort outstanding SCSI commands.
+ * @prout_cmd: Pointer to the SCSI PREEMPT AND ABORT if this function is called
+ * to realize the PREEMPT AND ABORT functionality.
+ * @tmr_sess: Session through which the LUN RESET has been received.
+ * @tas: Task Aborted Status (TAS) bit from the SCSI control mode page.
+ * A quote from SPC-4, paragraph "7.5.10 Control mode page":
+ * "A task aborted status (TAS) bit set to zero specifies that
+ * aborted commands shall be terminated by the device server
+ * without any response to the application client. A TAS bit set
+ * to one specifies that commands aborted by the actions of an I_T
+ * nexus other than the I_T nexus on which the command was
+ * received shall be completed with TASK ABORTED status."
+ * @preempt_and_abort_list: For the PREEMPT AND ABORT functionality, a list
+ * with registrations that will be preempted.
+ */
static void core_tmr_drain_state_list(
struct se_device *dev,
struct se_cmd *prout_cmd,
@@ -350,18 +369,7 @@ static void core_tmr_drain_state_list(
cmd->tag, (preempt_and_abort_list) ? "preempt" : "",
cmd->pr_res_key);
- /*
- * If the command may be queued onto a workqueue cancel it now.
- *
- * This is equivalent to removal from the execute queue in the
- * loop above, but we do it down here given that
- * cancel_work_sync may block.
- */
- cancel_work_sync(&cmd->work);
- transport_wait_for_tasks(cmd);
-
- if (!transport_cmd_finish_abort(cmd))
- target_put_sess_cmd(cmd);
+ target_put_cmd_and_wait(cmd);
}
}
@@ -398,7 +406,7 @@ int core_tmr_lun_reset(
if (tmr_nacl && tmr_tpg) {
pr_debug("LUN_RESET: TMR caller fabric: %s"
" initiator port %s\n",
- tmr_tpg->se_tpg_tfo->get_fabric_name(),
+ tmr_tpg->se_tpg_tfo->fabric_name,
tmr_nacl->initiatorname);
}
}
diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c
index 02e8a5d86658..e2ace1059437 100644
--- a/drivers/target/target_core_tpg.c
+++ b/drivers/target/target_core_tpg.c
@@ -151,7 +151,7 @@ void core_tpg_add_node_to_devs(
pr_debug("TARGET_CORE[%s]->TPG[%u]_LUN[%llu] - Adding %s"
" access for LUN in Demo Mode\n",
- tpg->se_tpg_tfo->get_fabric_name(),
+ tpg->se_tpg_tfo->fabric_name,
tpg->se_tpg_tfo->tpg_get_tag(tpg), lun->unpacked_lun,
lun_access_ro ? "READ-ONLY" : "READ-WRITE");
@@ -176,7 +176,7 @@ target_set_nacl_queue_depth(struct se_portal_group *tpg,
if (!acl->queue_depth) {
pr_warn("Queue depth for %s Initiator Node: %s is 0,"
- "defaulting to 1.\n", tpg->se_tpg_tfo->get_fabric_name(),
+ "defaulting to 1.\n", tpg->se_tpg_tfo->fabric_name,
acl->initiatorname);
acl->queue_depth = 1;
}
@@ -227,11 +227,11 @@ static void target_add_node_acl(struct se_node_acl *acl)
pr_debug("%s_TPG[%hu] - Added %s ACL with TCQ Depth: %d for %s"
" Initiator Node: %s\n",
- tpg->se_tpg_tfo->get_fabric_name(),
+ tpg->se_tpg_tfo->fabric_name,
tpg->se_tpg_tfo->tpg_get_tag(tpg),
acl->dynamic_node_acl ? "DYNAMIC" : "",
acl->queue_depth,
- tpg->se_tpg_tfo->get_fabric_name(),
+ tpg->se_tpg_tfo->fabric_name,
acl->initiatorname);
}
@@ -313,7 +313,7 @@ struct se_node_acl *core_tpg_add_initiator_node_acl(
if (acl->dynamic_node_acl) {
acl->dynamic_node_acl = 0;
pr_debug("%s_TPG[%u] - Replacing dynamic ACL"
- " for %s\n", tpg->se_tpg_tfo->get_fabric_name(),
+ " for %s\n", tpg->se_tpg_tfo->fabric_name,
tpg->se_tpg_tfo->tpg_get_tag(tpg), initiatorname);
mutex_unlock(&tpg->acl_node_mutex);
return acl;
@@ -321,7 +321,7 @@ struct se_node_acl *core_tpg_add_initiator_node_acl(
pr_err("ACL entry for %s Initiator"
" Node %s already exists for TPG %u, ignoring"
- " request.\n", tpg->se_tpg_tfo->get_fabric_name(),
+ " request.\n", tpg->se_tpg_tfo->fabric_name,
initiatorname, tpg->se_tpg_tfo->tpg_get_tag(tpg));
mutex_unlock(&tpg->acl_node_mutex);
return ERR_PTR(-EEXIST);
@@ -380,9 +380,9 @@ void core_tpg_del_initiator_node_acl(struct se_node_acl *acl)
core_free_device_list_for_node(acl, tpg);
pr_debug("%s_TPG[%hu] - Deleted ACL with TCQ Depth: %d for %s"
- " Initiator Node: %s\n", tpg->se_tpg_tfo->get_fabric_name(),
+ " Initiator Node: %s\n", tpg->se_tpg_tfo->fabric_name,
tpg->se_tpg_tfo->tpg_get_tag(tpg), acl->queue_depth,
- tpg->se_tpg_tfo->get_fabric_name(), acl->initiatorname);
+ tpg->se_tpg_tfo->fabric_name, acl->initiatorname);
kfree(acl);
}
@@ -418,7 +418,7 @@ int core_tpg_set_initiator_node_queue_depth(
pr_debug("Successfully changed queue depth to: %d for Initiator"
" Node: %s on %s Target Portal Group: %u\n", acl->queue_depth,
- acl->initiatorname, tpg->se_tpg_tfo->get_fabric_name(),
+ acl->initiatorname, tpg->se_tpg_tfo->fabric_name,
tpg->se_tpg_tfo->tpg_get_tag(tpg));
return 0;
@@ -512,7 +512,7 @@ int core_tpg_register(
spin_unlock_bh(&tpg_lock);
pr_debug("TARGET_CORE[%s]: Allocated portal_group for endpoint: %s, "
- "Proto: %d, Portal Tag: %u\n", se_tpg->se_tpg_tfo->get_fabric_name(),
+ "Proto: %d, Portal Tag: %u\n", se_tpg->se_tpg_tfo->fabric_name,
se_tpg->se_tpg_tfo->tpg_get_wwn(se_tpg) ?
se_tpg->se_tpg_tfo->tpg_get_wwn(se_tpg) : NULL,
se_tpg->proto_id, se_tpg->se_tpg_tfo->tpg_get_tag(se_tpg));
@@ -528,7 +528,7 @@ int core_tpg_deregister(struct se_portal_group *se_tpg)
LIST_HEAD(node_list);
pr_debug("TARGET_CORE[%s]: Deallocating portal_group for endpoint: %s, "
- "Proto: %d, Portal Tag: %u\n", tfo->get_fabric_name(),
+ "Proto: %d, Portal Tag: %u\n", tfo->fabric_name,
tfo->tpg_get_wwn(se_tpg) ? tfo->tpg_get_wwn(se_tpg) : NULL,
se_tpg->proto_id, tfo->tpg_get_tag(se_tpg));
@@ -577,7 +577,6 @@ struct se_lun *core_tpg_alloc_lun(
}
lun->unpacked_lun = unpacked_lun;
atomic_set(&lun->lun_acl_count, 0);
- init_completion(&lun->lun_ref_comp);
init_completion(&lun->lun_shutdown_comp);
INIT_LIST_HEAD(&lun->lun_deve_list);
INIT_LIST_HEAD(&lun->lun_dev_link);
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 2cfd61d62e97..ef9e75b359d4 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -224,19 +224,28 @@ void transport_subsystem_check_init(void)
sub_api_initialized = 1;
}
+static void target_release_sess_cmd_refcnt(struct percpu_ref *ref)
+{
+ struct se_session *sess = container_of(ref, typeof(*sess), cmd_count);
+
+ wake_up(&sess->cmd_list_wq);
+}
+
/**
* transport_init_session - initialize a session object
* @se_sess: Session object pointer.
*
* The caller must have zero-initialized @se_sess before calling this function.
*/
-void transport_init_session(struct se_session *se_sess)
+int transport_init_session(struct se_session *se_sess)
{
INIT_LIST_HEAD(&se_sess->sess_list);
INIT_LIST_HEAD(&se_sess->sess_acl_list);
INIT_LIST_HEAD(&se_sess->sess_cmd_list);
spin_lock_init(&se_sess->sess_cmd_lock);
init_waitqueue_head(&se_sess->cmd_list_wq);
+ return percpu_ref_init(&se_sess->cmd_count,
+ target_release_sess_cmd_refcnt, 0, GFP_KERNEL);
}
EXPORT_SYMBOL(transport_init_session);
@@ -247,6 +256,7 @@ EXPORT_SYMBOL(transport_init_session);
struct se_session *transport_alloc_session(enum target_prot_op sup_prot_ops)
{
struct se_session *se_sess;
+ int ret;
se_sess = kmem_cache_zalloc(se_sess_cache, GFP_KERNEL);
if (!se_sess) {
@@ -254,7 +264,11 @@ struct se_session *transport_alloc_session(enum target_prot_op sup_prot_ops)
" se_sess_cache\n");
return ERR_PTR(-ENOMEM);
}
- transport_init_session(se_sess);
+ ret = transport_init_session(se_sess);
+ if (ret < 0) {
+ kmem_cache_free(se_sess_cache, se_sess);
+ return ERR_PTR(ret);
+ }
se_sess->sup_prot_ops = sup_prot_ops;
return se_sess;
@@ -273,14 +287,11 @@ int transport_alloc_session_tags(struct se_session *se_sess,
{
int rc;
- se_sess->sess_cmd_map = kcalloc(tag_size, tag_num,
- GFP_KERNEL | __GFP_NOWARN | __GFP_RETRY_MAYFAIL);
+ se_sess->sess_cmd_map = kvcalloc(tag_size, tag_num,
+ GFP_KERNEL | __GFP_RETRY_MAYFAIL);
if (!se_sess->sess_cmd_map) {
- se_sess->sess_cmd_map = vzalloc(array_size(tag_size, tag_num));
- if (!se_sess->sess_cmd_map) {
- pr_err("Unable to allocate se_sess->sess_cmd_map\n");
- return -ENOMEM;
- }
+ pr_err("Unable to allocate se_sess->sess_cmd_map\n");
+ return -ENOMEM;
}
rc = sbitmap_queue_init_node(&se_sess->sess_tag_pool, tag_num, -1,
@@ -397,7 +408,7 @@ void __transport_register_session(
list_add_tail(&se_sess->sess_list, &se_tpg->tpg_sess_list);
pr_debug("TARGET_CORE[%s]: Registered fabric_sess_ptr: %p\n",
- se_tpg->se_tpg_tfo->get_fabric_name(), se_sess->fabric_sess_ptr);
+ se_tpg->se_tpg_tfo->fabric_name, se_sess->fabric_sess_ptr);
}
EXPORT_SYMBOL(__transport_register_session);
@@ -581,6 +592,7 @@ void transport_free_session(struct se_session *se_sess)
sbitmap_queue_free(&se_sess->sess_tag_pool);
kvfree(se_sess->sess_cmd_map);
}
+ percpu_ref_exit(&se_sess->cmd_count);
kmem_cache_free(se_sess_cache, se_sess);
}
EXPORT_SYMBOL(transport_free_session);
@@ -602,7 +614,7 @@ void transport_deregister_session(struct se_session *se_sess)
spin_unlock_irqrestore(&se_tpg->session_lock, flags);
pr_debug("TARGET_CORE[%s]: Deregistered fabric_sess\n",
- se_tpg->se_tpg_tfo->get_fabric_name());
+ se_tpg->se_tpg_tfo->fabric_name);
/*
* If last kref is dropping now for an explicit NodeACL, awake sleeping
* ->acl_free_comp caller to wakeup configfs se_node_acl->acl_group
@@ -695,32 +707,6 @@ static void transport_lun_remove_cmd(struct se_cmd *cmd)
percpu_ref_put(&lun->lun_ref);
}
-int transport_cmd_finish_abort(struct se_cmd *cmd)
-{
- bool send_tas = cmd->transport_state & CMD_T_TAS;
- bool ack_kref = (cmd->se_cmd_flags & SCF_ACK_KREF);
- int ret = 0;
-
- if (send_tas)
- transport_send_task_abort(cmd);
-
- if (cmd->se_cmd_flags & SCF_SE_LUN_CMD)
- transport_lun_remove_cmd(cmd);
- /*
- * Allow the fabric driver to unmap any resources before
- * releasing the descriptor via TFO->release_cmd()
- */
- if (!send_tas)
- cmd->se_tfo->aborted_task(cmd);
-
- if (transport_cmd_check_stop_to_fabric(cmd))
- return 1;
- if (!send_tas && ack_kref)
- ret = target_put_sess_cmd(cmd);
-
- return ret;
-}
-
static void target_complete_failure_work(struct work_struct *work)
{
struct se_cmd *cmd = container_of(work, struct se_cmd, work);
@@ -770,12 +756,88 @@ void transport_copy_sense_to_cmd(struct se_cmd *cmd, unsigned char *sense)
}
EXPORT_SYMBOL(transport_copy_sense_to_cmd);
+static void target_handle_abort(struct se_cmd *cmd)
+{
+ bool tas = cmd->transport_state & CMD_T_TAS;
+ bool ack_kref = cmd->se_cmd_flags & SCF_ACK_KREF;
+ int ret;
+
+ pr_debug("tag %#llx: send_abort_response = %d\n", cmd->tag, tas);
+
+ if (tas) {
+ if (!(cmd->se_cmd_flags & SCF_SCSI_TMR_CDB)) {
+ cmd->scsi_status = SAM_STAT_TASK_ABORTED;
+ pr_debug("Setting SAM_STAT_TASK_ABORTED status for CDB: 0x%02x, ITT: 0x%08llx\n",
+ cmd->t_task_cdb[0], cmd->tag);
+ trace_target_cmd_complete(cmd);
+ ret = cmd->se_tfo->queue_status(cmd);
+ if (ret) {
+ transport_handle_queue_full(cmd, cmd->se_dev,
+ ret, false);
+ return;
+ }
+ } else {
+ cmd->se_tmr_req->response = TMR_FUNCTION_REJECTED;
+ cmd->se_tfo->queue_tm_rsp(cmd);
+ }
+ } else {
+ /*
+ * Allow the fabric driver to unmap any resources before
+ * releasing the descriptor via TFO->release_cmd().
+ */
+ cmd->se_tfo->aborted_task(cmd);
+ if (ack_kref)
+ WARN_ON_ONCE(target_put_sess_cmd(cmd) != 0);
+ /*
+ * To do: establish a unit attention condition on the I_T
+ * nexus associated with cmd. See also the paragraph "Aborting
+ * commands" in SAM.
+ */
+ }
+
+ WARN_ON_ONCE(kref_read(&cmd->cmd_kref) == 0);
+
+ transport_lun_remove_cmd(cmd);
+
+ transport_cmd_check_stop_to_fabric(cmd);
+}
+
+static void target_abort_work(struct work_struct *work)
+{
+ struct se_cmd *cmd = container_of(work, struct se_cmd, work);
+
+ target_handle_abort(cmd);
+}
+
+static bool target_cmd_interrupted(struct se_cmd *cmd)
+{
+ int post_ret;
+
+ if (cmd->transport_state & CMD_T_ABORTED) {
+ if (cmd->transport_complete_callback)
+ cmd->transport_complete_callback(cmd, false, &post_ret);
+ INIT_WORK(&cmd->work, target_abort_work);
+ queue_work(target_completion_wq, &cmd->work);
+ return true;
+ } else if (cmd->transport_state & CMD_T_STOP) {
+ if (cmd->transport_complete_callback)
+ cmd->transport_complete_callback(cmd, false, &post_ret);
+ complete_all(&cmd->t_transport_stop_comp);
+ return true;
+ }
+
+ return false;
+}
+
+/* May be called from interrupt context so must not sleep. */
void target_complete_cmd(struct se_cmd *cmd, u8 scsi_status)
{
- struct se_device *dev = cmd->se_dev;
int success;
unsigned long flags;
+ if (target_cmd_interrupted(cmd))
+ return;
+
cmd->scsi_status = scsi_status;
spin_lock_irqsave(&cmd->t_state_lock, flags);
@@ -791,34 +853,12 @@ void target_complete_cmd(struct se_cmd *cmd, u8 scsi_status)
break;
}
- /*
- * Check for case where an explicit ABORT_TASK has been received
- * and transport_wait_for_tasks() will be waiting for completion..
- */
- if (cmd->transport_state & CMD_T_ABORTED ||
- cmd->transport_state & CMD_T_STOP) {
- spin_unlock_irqrestore(&cmd->t_state_lock, flags);
- /*
- * If COMPARE_AND_WRITE was stopped by __transport_wait_for_tasks(),
- * release se_device->caw_sem obtained by sbc_compare_and_write()
- * since target_complete_ok_work() or target_complete_failure_work()
- * won't be called to invoke the normal CAW completion callbacks.
- */
- if (cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE) {
- up(&dev->caw_sem);
- }
- complete_all(&cmd->t_transport_stop_comp);
- return;
- } else if (!success) {
- INIT_WORK(&cmd->work, target_complete_failure_work);
- } else {
- INIT_WORK(&cmd->work, target_complete_ok_work);
- }
-
cmd->t_state = TRANSPORT_COMPLETE;
cmd->transport_state |= (CMD_T_COMPLETE | CMD_T_ACTIVE);
spin_unlock_irqrestore(&cmd->t_state_lock, flags);
+ INIT_WORK(&cmd->work, success ? target_complete_ok_work :
+ target_complete_failure_work);
if (cmd->se_cmd_flags & SCF_USE_CPUID)
queue_work_on(cmd->cpuid, target_completion_wq, &cmd->work);
else
@@ -880,7 +920,7 @@ void target_qf_do_work(struct work_struct *work)
atomic_dec_mb(&dev->dev_qf_count);
pr_debug("Processing %s cmd: %p QUEUE_FULL in work queue"
- " context: %s\n", cmd->se_tfo->get_fabric_name(), cmd,
+ " context: %s\n", cmd->se_tfo->fabric_name, cmd,
(cmd->t_state == TRANSPORT_COMPLETE_QF_OK) ? "COMPLETE_OK" :
(cmd->t_state == TRANSPORT_COMPLETE_QF_WP) ? "WRITE_PENDING"
: "UNKNOWN");
@@ -1244,7 +1284,7 @@ target_cmd_size_check(struct se_cmd *cmd, unsigned int size)
} else if (size != cmd->data_length) {
pr_warn_ratelimited("TARGET_CORE[%s]: Expected Transfer Length:"
" %u does not match SCSI CDB Length: %u for SAM Opcode:"
- " 0x%02x\n", cmd->se_tfo->get_fabric_name(),
+ " 0x%02x\n", cmd->se_tfo->fabric_name,
cmd->data_length, size, cmd->t_task_cdb[0]);
if (cmd->data_direction == DMA_TO_DEVICE) {
@@ -1316,7 +1356,8 @@ void transport_init_se_cmd(
INIT_LIST_HEAD(&cmd->se_cmd_list);
INIT_LIST_HEAD(&cmd->state_list);
init_completion(&cmd->t_transport_stop_comp);
- cmd->compl = NULL;
+ cmd->free_compl = NULL;
+ cmd->abrt_compl = NULL;
spin_lock_init(&cmd->t_state_lock);
INIT_WORK(&cmd->work, NULL);
kref_init(&cmd->cmd_kref);
@@ -1396,7 +1437,7 @@ target_setup_cmd_from_cdb(struct se_cmd *cmd, unsigned char *cdb)
ret = dev->transport->parse_cdb(cmd);
if (ret == TCM_UNSUPPORTED_SCSI_OPCODE)
pr_warn_ratelimited("%s/%s: Unsupported SCSI Opcode 0x%02x, sending CHECK_CONDITION.\n",
- cmd->se_tfo->get_fabric_name(),
+ cmd->se_tfo->fabric_name,
cmd->se_sess->se_node_acl->initiatorname,
cmd->t_task_cdb[0]);
if (ret)
@@ -1792,8 +1833,11 @@ void transport_generic_request_failure(struct se_cmd *cmd,
if (cmd->transport_complete_callback)
cmd->transport_complete_callback(cmd, false, &post_ret);
- if (transport_check_aborted_status(cmd, 1))
+ if (cmd->transport_state & CMD_T_ABORTED) {
+ INIT_WORK(&cmd->work, target_abort_work);
+ queue_work(target_completion_wq, &cmd->work);
return;
+ }
switch (sense_reason) {
case TCM_NON_EXISTENT_LUN:
@@ -1999,8 +2043,6 @@ static bool target_handle_task_attr(struct se_cmd *cmd)
return true;
}
-static int __transport_check_aborted_status(struct se_cmd *, int);
-
void target_execute_cmd(struct se_cmd *cmd)
{
/*
@@ -2009,20 +2051,10 @@ void target_execute_cmd(struct se_cmd *cmd)
*
* If the received CDB has already been aborted stop processing it here.
*/
- spin_lock_irq(&cmd->t_state_lock);
- if (__transport_check_aborted_status(cmd, 1)) {
- spin_unlock_irq(&cmd->t_state_lock);
- return;
- }
- if (cmd->transport_state & CMD_T_STOP) {
- pr_debug("%s:%d CMD_T_STOP for ITT: 0x%08llx\n",
- __func__, __LINE__, cmd->tag);
-
- spin_unlock_irq(&cmd->t_state_lock);
- complete_all(&cmd->t_transport_stop_comp);
+ if (target_cmd_interrupted(cmd))
return;
- }
+ spin_lock_irq(&cmd->t_state_lock);
cmd->t_state = TRANSPORT_PROCESSING;
cmd->transport_state &= ~CMD_T_PRE_EXECUTE;
cmd->transport_state |= CMD_T_ACTIVE | CMD_T_SENT;
@@ -2571,7 +2603,8 @@ transport_generic_new_cmd(struct se_cmd *cmd)
* Determine if frontend context caller is requesting the stopping of
* this command for frontend exceptions.
*/
- if (cmd->transport_state & CMD_T_STOP) {
+ if (cmd->transport_state & CMD_T_STOP &&
+ !cmd->se_tfo->write_pending_must_be_called) {
pr_debug("%s:%d CMD_T_STOP for ITT: 0x%08llx\n",
__func__, __LINE__, cmd->tag);
@@ -2635,13 +2668,29 @@ static void target_wait_free_cmd(struct se_cmd *cmd, bool *aborted, bool *tas)
}
/*
+ * Call target_put_sess_cmd() and wait until target_release_cmd_kref(@cmd) has
+ * finished.
+ */
+void target_put_cmd_and_wait(struct se_cmd *cmd)
+{
+ DECLARE_COMPLETION_ONSTACK(compl);
+
+ WARN_ON_ONCE(cmd->abrt_compl);
+ cmd->abrt_compl = &compl;
+ target_put_sess_cmd(cmd);
+ wait_for_completion(&compl);
+}
+
+/*
* This function is called by frontend drivers after processing of a command
* has finished.
*
- * The protocol for ensuring that either the regular flow or the TMF
- * code drops one reference is as follows:
+ * The protocol for ensuring that either the regular frontend command
+ * processing flow or target_handle_abort() code drops one reference is as
+ * follows:
* - Calling .queue_data_in(), .queue_status() or queue_tm_rsp() will cause
- * the frontend driver to drop one reference, synchronously or asynchronously.
+ * the frontend driver to call this function synchronously or asynchronously.
+ * That will cause one reference to be dropped.
* - During regular command processing the target core sets CMD_T_COMPLETE
* before invoking one of the .queue_*() functions.
* - The code that aborts commands skips commands and TMFs for which
@@ -2653,7 +2702,7 @@ static void target_wait_free_cmd(struct se_cmd *cmd, bool *aborted, bool *tas)
* - For aborted commands for which CMD_T_TAS has been set .queue_status() will
* be called and will drop a reference.
* - For aborted commands for which CMD_T_TAS has not been set .aborted_task()
- * will be called. transport_cmd_finish_abort() will drop the final reference.
+ * will be called. target_handle_abort() will drop the final reference.
*/
int transport_generic_free_cmd(struct se_cmd *cmd, int wait_for_tasks)
{
@@ -2677,9 +2726,8 @@ int transport_generic_free_cmd(struct se_cmd *cmd, int wait_for_tasks)
transport_lun_remove_cmd(cmd);
}
if (aborted)
- cmd->compl = &compl;
- if (!aborted || tas)
- ret = target_put_sess_cmd(cmd);
+ cmd->free_compl = &compl;
+ ret = target_put_sess_cmd(cmd);
if (aborted) {
pr_debug("Detected CMD_T_ABORTED for ITT: %llu\n", cmd->tag);
wait_for_completion(&compl);
@@ -2719,6 +2767,7 @@ int target_get_sess_cmd(struct se_cmd *se_cmd, bool ack_kref)
}
se_cmd->transport_state |= CMD_T_PRE_EXECUTE;
list_add_tail(&se_cmd->se_cmd_list, &se_sess->sess_cmd_list);
+ percpu_ref_get(&se_sess->cmd_count);
out:
spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
@@ -2743,21 +2792,24 @@ static void target_release_cmd_kref(struct kref *kref)
{
struct se_cmd *se_cmd = container_of(kref, struct se_cmd, cmd_kref);
struct se_session *se_sess = se_cmd->se_sess;
- struct completion *compl = se_cmd->compl;
+ struct completion *free_compl = se_cmd->free_compl;
+ struct completion *abrt_compl = se_cmd->abrt_compl;
unsigned long flags;
if (se_sess) {
spin_lock_irqsave(&se_sess->sess_cmd_lock, flags);
list_del_init(&se_cmd->se_cmd_list);
- if (se_sess->sess_tearing_down && list_empty(&se_sess->sess_cmd_list))
- wake_up(&se_sess->cmd_list_wq);
spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
}
target_free_cmd_mem(se_cmd);
se_cmd->se_tfo->release_cmd(se_cmd);
- if (compl)
- complete(compl);
+ if (free_compl)
+ complete(free_compl);
+ if (abrt_compl)
+ complete(abrt_compl);
+
+ percpu_ref_put(&se_sess->cmd_count);
}
/**
@@ -2886,6 +2938,8 @@ void target_sess_cmd_list_set_waiting(struct se_session *se_sess)
spin_lock_irqsave(&se_sess->sess_cmd_lock, flags);
se_sess->sess_tearing_down = 1;
spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
+
+ percpu_ref_kill(&se_sess->cmd_count);
}
EXPORT_SYMBOL(target_sess_cmd_list_set_waiting);
@@ -2900,52 +2954,24 @@ void target_wait_for_sess_cmds(struct se_session *se_sess)
WARN_ON_ONCE(!se_sess->sess_tearing_down);
- spin_lock_irq(&se_sess->sess_cmd_lock);
do {
- ret = wait_event_lock_irq_timeout(
- se_sess->cmd_list_wq,
- list_empty(&se_sess->sess_cmd_list),
- se_sess->sess_cmd_lock, 180 * HZ);
+ ret = wait_event_timeout(se_sess->cmd_list_wq,
+ percpu_ref_is_zero(&se_sess->cmd_count),
+ 180 * HZ);
list_for_each_entry(cmd, &se_sess->sess_cmd_list, se_cmd_list)
target_show_cmd("session shutdown: still waiting for ",
cmd);
} while (ret <= 0);
- spin_unlock_irq(&se_sess->sess_cmd_lock);
}
EXPORT_SYMBOL(target_wait_for_sess_cmds);
-static void target_lun_confirm(struct percpu_ref *ref)
-{
- struct se_lun *lun = container_of(ref, struct se_lun, lun_ref);
-
- complete(&lun->lun_ref_comp);
-}
-
+/*
+ * Prevent that new percpu_ref_tryget_live() calls succeed and wait until
+ * all references to the LUN have been released. Called during LUN shutdown.
+ */
void transport_clear_lun_ref(struct se_lun *lun)
{
- /*
- * Mark the percpu-ref as DEAD, switch to atomic_t mode, drop
- * the initial reference and schedule confirm kill to be
- * executed after one full RCU grace period has completed.
- */
- percpu_ref_kill_and_confirm(&lun->lun_ref, target_lun_confirm);
- /*
- * The first completion waits for percpu_ref_switch_to_atomic_rcu()
- * to call target_lun_confirm after lun->lun_ref has been marked
- * as __PERCPU_REF_DEAD on all CPUs, and switches to atomic_t
- * mode so that percpu_ref_tryget_live() lookup of lun->lun_ref
- * fails for all new incoming I/O.
- */
- wait_for_completion(&lun->lun_ref_comp);
- /*
- * The second completion waits for percpu_ref_put_many() to
- * invoke ->release() after lun->lun_ref has switched to
- * atomic_t mode, and lun->lun_ref.count has reached zero.
- *
- * At this point all target-core lun->lun_ref references have
- * been dropped via transport_lun_remove_cmd(), and it's safe
- * to proceed with the remaining LUN shutdown.
- */
+ percpu_ref_kill(&lun->lun_ref);
wait_for_completion(&lun->lun_shutdown_comp);
}
@@ -3229,6 +3255,8 @@ transport_send_check_condition_and_sense(struct se_cmd *cmd,
{
unsigned long flags;
+ WARN_ON_ONCE(cmd->se_cmd_flags & SCF_SCSI_TMR_CDB);
+
spin_lock_irqsave(&cmd->t_state_lock, flags);
if (cmd->se_cmd_flags & SCF_SENT_CHECK_CONDITION) {
spin_unlock_irqrestore(&cmd->t_state_lock, flags);
@@ -3245,114 +3273,15 @@ transport_send_check_condition_and_sense(struct se_cmd *cmd,
}
EXPORT_SYMBOL(transport_send_check_condition_and_sense);
-static int __transport_check_aborted_status(struct se_cmd *cmd, int send_status)
- __releases(&cmd->t_state_lock)
- __acquires(&cmd->t_state_lock)
-{
- int ret;
-
- assert_spin_locked(&cmd->t_state_lock);
- WARN_ON_ONCE(!irqs_disabled());
-
- if (!(cmd->transport_state & CMD_T_ABORTED))
- return 0;
- /*
- * If cmd has been aborted but either no status is to be sent or it has
- * already been sent, just return
- */
- if (!send_status || !(cmd->se_cmd_flags & SCF_SEND_DELAYED_TAS)) {
- if (send_status)
- cmd->se_cmd_flags |= SCF_SEND_DELAYED_TAS;
- return 1;
- }
-
- pr_debug("Sending delayed SAM_STAT_TASK_ABORTED status for CDB:"
- " 0x%02x ITT: 0x%08llx\n", cmd->t_task_cdb[0], cmd->tag);
-
- cmd->se_cmd_flags &= ~SCF_SEND_DELAYED_TAS;
- cmd->scsi_status = SAM_STAT_TASK_ABORTED;
- trace_target_cmd_complete(cmd);
-
- spin_unlock_irq(&cmd->t_state_lock);
- ret = cmd->se_tfo->queue_status(cmd);
- if (ret)
- transport_handle_queue_full(cmd, cmd->se_dev, ret, false);
- spin_lock_irq(&cmd->t_state_lock);
-
- return 1;
-}
-
-int transport_check_aborted_status(struct se_cmd *cmd, int send_status)
-{
- int ret;
-
- spin_lock_irq(&cmd->t_state_lock);
- ret = __transport_check_aborted_status(cmd, send_status);
- spin_unlock_irq(&cmd->t_state_lock);
-
- return ret;
-}
-EXPORT_SYMBOL(transport_check_aborted_status);
-
-void transport_send_task_abort(struct se_cmd *cmd)
-{
- unsigned long flags;
- int ret;
-
- spin_lock_irqsave(&cmd->t_state_lock, flags);
- if (cmd->se_cmd_flags & (SCF_SENT_CHECK_CONDITION)) {
- spin_unlock_irqrestore(&cmd->t_state_lock, flags);
- return;
- }
- spin_unlock_irqrestore(&cmd->t_state_lock, flags);
-
- /*
- * If there are still expected incoming fabric WRITEs, we wait
- * until until they have completed before sending a TASK_ABORTED
- * response. This response with TASK_ABORTED status will be
- * queued back to fabric module by transport_check_aborted_status().
- */
- if (cmd->data_direction == DMA_TO_DEVICE) {
- if (cmd->se_tfo->write_pending_status(cmd) != 0) {
- spin_lock_irqsave(&cmd->t_state_lock, flags);
- if (cmd->se_cmd_flags & SCF_SEND_DELAYED_TAS) {
- spin_unlock_irqrestore(&cmd->t_state_lock, flags);
- goto send_abort;
- }
- cmd->se_cmd_flags |= SCF_SEND_DELAYED_TAS;
- spin_unlock_irqrestore(&cmd->t_state_lock, flags);
- return;
- }
- }
-send_abort:
- cmd->scsi_status = SAM_STAT_TASK_ABORTED;
-
- transport_lun_remove_cmd(cmd);
-
- pr_debug("Setting SAM_STAT_TASK_ABORTED status for CDB: 0x%02x, ITT: 0x%08llx\n",
- cmd->t_task_cdb[0], cmd->tag);
-
- trace_target_cmd_complete(cmd);
- ret = cmd->se_tfo->queue_status(cmd);
- if (ret)
- transport_handle_queue_full(cmd, cmd->se_dev, ret, false);
-}
-
static void target_tmr_work(struct work_struct *work)
{
struct se_cmd *cmd = container_of(work, struct se_cmd, work);
struct se_device *dev = cmd->se_dev;
struct se_tmr_req *tmr = cmd->se_tmr_req;
- unsigned long flags;
int ret;
- spin_lock_irqsave(&cmd->t_state_lock, flags);
- if (cmd->transport_state & CMD_T_ABORTED) {
- tmr->response = TMR_FUNCTION_REJECTED;
- spin_unlock_irqrestore(&cmd->t_state_lock, flags);
- goto check_stop;
- }
- spin_unlock_irqrestore(&cmd->t_state_lock, flags);
+ if (cmd->transport_state & CMD_T_ABORTED)
+ goto aborted;
switch (tmr->function) {
case TMR_ABORT_TASK:
@@ -3386,18 +3315,16 @@ static void target_tmr_work(struct work_struct *work)
break;
}
- spin_lock_irqsave(&cmd->t_state_lock, flags);
- if (cmd->transport_state & CMD_T_ABORTED) {
- spin_unlock_irqrestore(&cmd->t_state_lock, flags);
- goto check_stop;
- }
- spin_unlock_irqrestore(&cmd->t_state_lock, flags);
+ if (cmd->transport_state & CMD_T_ABORTED)
+ goto aborted;
cmd->se_tfo->queue_tm_rsp(cmd);
-check_stop:
- transport_lun_remove_cmd(cmd);
transport_cmd_check_stop_to_fabric(cmd);
+ return;
+
+aborted:
+ target_handle_abort(cmd);
}
int transport_generic_handle_tmr(
@@ -3416,16 +3343,15 @@ int transport_generic_handle_tmr(
spin_unlock_irqrestore(&cmd->t_state_lock, flags);
if (aborted) {
- pr_warn_ratelimited("handle_tmr caught CMD_T_ABORTED TMR %d"
- "ref_tag: %llu tag: %llu\n", cmd->se_tmr_req->function,
- cmd->se_tmr_req->ref_task_tag, cmd->tag);
- transport_lun_remove_cmd(cmd);
- transport_cmd_check_stop_to_fabric(cmd);
+ pr_warn_ratelimited("handle_tmr caught CMD_T_ABORTED TMR %d ref_tag: %llu tag: %llu\n",
+ cmd->se_tmr_req->function,
+ cmd->se_tmr_req->ref_task_tag, cmd->tag);
+ target_handle_abort(cmd);
return 0;
}
INIT_WORK(&cmd->work, target_tmr_work);
- queue_work(cmd->se_dev->tmr_wq, &cmd->work);
+ schedule_work(&cmd->work);
return 0;
}
EXPORT_SYMBOL(transport_generic_handle_tmr);
diff --git a/drivers/target/target_core_ua.c b/drivers/target/target_core_ua.c
index c8ac242ce888..ced1c10364eb 100644
--- a/drivers/target/target_core_ua.c
+++ b/drivers/target/target_core_ua.c
@@ -266,7 +266,7 @@ bool core_scsi3_ua_for_check_condition(struct se_cmd *cmd, u8 *key, u8 *asc,
pr_debug("[%s]: %s UNIT ATTENTION condition with"
" INTLCK_CTRL: %d, mapped LUN: %llu, got CDB: 0x%02x"
" reported ASC: 0x%02x, ASCQ: 0x%02x\n",
- nacl->se_tpg->se_tpg_tfo->get_fabric_name(),
+ nacl->se_tpg->se_tpg_tfo->fabric_name,
(dev->dev_attrib.emulate_ua_intlck_ctrl != 0) ? "Reporting" :
"Releasing", dev->dev_attrib.emulate_ua_intlck_ctrl,
cmd->orig_fe_lun, cmd->t_task_cdb[0], *asc, *ascq);
@@ -327,7 +327,7 @@ int core_scsi3_ua_clear_for_request_sense(
pr_debug("[%s]: Released UNIT ATTENTION condition, mapped"
" LUN: %llu, got REQUEST_SENSE reported ASC: 0x%02x,"
- " ASCQ: 0x%02x\n", nacl->se_tpg->se_tpg_tfo->get_fabric_name(),
+ " ASCQ: 0x%02x\n", nacl->se_tpg->se_tpg_tfo->fabric_name,
cmd->orig_fe_lun, *asc, *ascq);
return (head) ? -EPERM : 0;
diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index 9cd404acdb82..1e6d24943565 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -958,7 +958,7 @@ static int add_to_cmdr_queue(struct tcmu_cmd *tcmu_cmd)
* 0 success
* 1 internally queued to wait for ring memory to free.
*/
-static sense_reason_t queue_cmd_ring(struct tcmu_cmd *tcmu_cmd, int *scsi_err)
+static int queue_cmd_ring(struct tcmu_cmd *tcmu_cmd, sense_reason_t *scsi_err)
{
struct tcmu_dev *udev = tcmu_cmd->tcmu_dev;
struct se_cmd *se_cmd = tcmu_cmd->se_cmd;
diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c
index 70adcfdca8d1..c2e1fc927fdf 100644
--- a/drivers/target/target_core_xcopy.c
+++ b/drivers/target/target_core_xcopy.c
@@ -399,11 +399,6 @@ struct se_portal_group xcopy_pt_tpg;
static struct se_session xcopy_pt_sess;
static struct se_node_acl xcopy_pt_nacl;
-static char *xcopy_pt_get_fabric_name(void)
-{
- return "xcopy-pt";
-}
-
static int xcopy_pt_get_cmd_state(struct se_cmd *se_cmd)
{
return 0;
@@ -463,7 +458,7 @@ static int xcopy_pt_queue_status(struct se_cmd *se_cmd)
}
static const struct target_core_fabric_ops xcopy_pt_tfo = {
- .get_fabric_name = xcopy_pt_get_fabric_name,
+ .fabric_name = "xcopy-pt",
.get_cmd_state = xcopy_pt_get_cmd_state,
.release_cmd = xcopy_pt_release_cmd,
.check_stop_free = xcopy_pt_check_stop_free,
@@ -479,6 +474,8 @@ static const struct target_core_fabric_ops xcopy_pt_tfo = {
int target_xcopy_setup_pt(void)
{
+ int ret;
+
xcopy_wq = alloc_workqueue("xcopy_wq", WQ_MEM_RECLAIM, 0);
if (!xcopy_wq) {
pr_err("Unable to allocate xcopy_wq\n");
@@ -496,7 +493,9 @@ int target_xcopy_setup_pt(void)
INIT_LIST_HEAD(&xcopy_pt_nacl.acl_list);
INIT_LIST_HEAD(&xcopy_pt_nacl.acl_sess_list);
memset(&xcopy_pt_sess, 0, sizeof(struct se_session));
- transport_init_session(&xcopy_pt_sess);
+ ret = transport_init_session(&xcopy_pt_sess);
+ if (ret < 0)
+ return ret;
xcopy_pt_nacl.se_tpg = &xcopy_pt_tpg;
xcopy_pt_nacl.nacl_sess = &xcopy_pt_sess;
diff --git a/drivers/target/tcm_fc/tfc_conf.c b/drivers/target/tcm_fc/tfc_conf.c
index e55c4d537592..1ce49518d440 100644
--- a/drivers/target/tcm_fc/tfc_conf.c
+++ b/drivers/target/tcm_fc/tfc_conf.c
@@ -392,11 +392,6 @@ static inline struct ft_tpg *ft_tpg(struct se_portal_group *se_tpg)
return container_of(se_tpg, struct ft_tpg, se_tpg);
}
-static char *ft_get_fabric_name(void)
-{
- return "fc";
-}
-
static char *ft_get_fabric_wwn(struct se_portal_group *se_tpg)
{
return ft_tpg(se_tpg)->lport_wwn->name;
@@ -427,9 +422,8 @@ static u32 ft_tpg_get_inst_index(struct se_portal_group *se_tpg)
static const struct target_core_fabric_ops ft_fabric_ops = {
.module = THIS_MODULE,
- .name = "fc",
+ .fabric_name = "fc",
.node_acl_size = sizeof(struct ft_node_acl),
- .get_fabric_name = ft_get_fabric_name,
.tpg_get_wwn = ft_get_fabric_wwn,
.tpg_get_tag = ft_get_tag,
.tpg_check_demo_mode = ft_check_false,
diff --git a/drivers/usb/gadget/function/f_tcm.c b/drivers/usb/gadget/function/f_tcm.c
index 106988a6661a..34f5982cab78 100644
--- a/drivers/usb/gadget/function/f_tcm.c
+++ b/drivers/usb/gadget/function/f_tcm.c
@@ -1256,11 +1256,6 @@ static int usbg_check_false(struct se_portal_group *se_tpg)
return 0;
}
-static char *usbg_get_fabric_name(void)
-{
- return "usb_gadget";
-}
-
static char *usbg_get_fabric_wwn(struct se_portal_group *se_tpg)
{
struct usbg_tpg *tpg = container_of(se_tpg,
@@ -1718,8 +1713,7 @@ static int usbg_check_stop_free(struct se_cmd *se_cmd)
static const struct target_core_fabric_ops usbg_ops = {
.module = THIS_MODULE,
- .name = "usb_gadget",
- .get_fabric_name = usbg_get_fabric_name,
+ .fabric_name = "usb_gadget",
.tpg_get_wwn = usbg_get_fabric_wwn,
.tpg_get_tag = usbg_get_tag,
.tpg_check_demo_mode = usbg_check_true,
diff --git a/drivers/usb/image/microtek.c b/drivers/usb/image/microtek.c
index 9f2f563c82ed..607be1f4fe27 100644
--- a/drivers/usb/image/microtek.c
+++ b/drivers/usb/image/microtek.c
@@ -632,7 +632,6 @@ static struct scsi_host_template mts_scsi_host_template = {
.sg_tablesize = SG_ALL,
.can_queue = 1,
.this_id = -1,
- .use_clustering = 1,
.emulated = 1,
.slave_alloc = mts_slave_alloc,
.slave_configure = mts_slave_configure,
diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c
index e227bb5b794f..fde2e71a6ade 100644
--- a/drivers/usb/storage/scsiglue.c
+++ b/drivers/usb/storage/scsiglue.c
@@ -639,13 +639,6 @@ static const struct scsi_host_template usb_stor_host_template = {
*/
.max_sectors = 240,
- /*
- * merge commands... this seems to help performance, but
- * periodically someone should test to see which setting is more
- * optimal.
- */
- .use_clustering = 1,
-
/* emulated HBA */
.emulated = 1,
diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c
index 1f7b401c4d04..36742e8e7edc 100644
--- a/drivers/usb/storage/uas.c
+++ b/drivers/usb/storage/uas.c
@@ -879,6 +879,7 @@ static struct scsi_host_template uas_host_template = {
.this_id = -1,
.sg_tablesize = SG_NONE,
.skip_settle_delay = 1,
+ .dma_boundary = PAGE_SIZE - 1,
};
#define UNUSUAL_DEV(id_vendor, id_product, bcdDeviceMin, bcdDeviceMax, \
diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index 50dffe83714c..a08472ae5b1b 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -285,11 +285,6 @@ static int vhost_scsi_check_false(struct se_portal_group *se_tpg)
return 0;
}
-static char *vhost_scsi_get_fabric_name(void)
-{
- return "vhost";
-}
-
static char *vhost_scsi_get_fabric_wwn(struct se_portal_group *se_tpg)
{
struct vhost_scsi_tpg *tpg = container_of(se_tpg,
@@ -2289,8 +2284,7 @@ static struct configfs_attribute *vhost_scsi_wwn_attrs[] = {
static const struct target_core_fabric_ops vhost_scsi_ops = {
.module = THIS_MODULE,
- .name = "vhost",
- .get_fabric_name = vhost_scsi_get_fabric_name,
+ .fabric_name = "vhost",
.tpg_get_wwn = vhost_scsi_get_fabric_wwn,
.tpg_get_tag = vhost_scsi_get_tpgt,
.tpg_check_demo_mode = vhost_scsi_check_true,
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 2a7f545bd0b5..989cf872b98c 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -53,8 +53,6 @@
* API.
*/
-#define XEN_SWIOTLB_ERROR_CODE (~(dma_addr_t)0x0)
-
static char *xen_io_tlb_start, *xen_io_tlb_end;
static unsigned long xen_io_tlb_nslabs;
/*
@@ -405,8 +403,8 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
map = swiotlb_tbl_map_single(dev, start_dma_addr, phys, size, dir,
attrs);
- if (map == SWIOTLB_MAP_ERROR)
- return XEN_SWIOTLB_ERROR_CODE;
+ if (map == DMA_MAPPING_ERROR)
+ return DMA_MAPPING_ERROR;
dev_addr = xen_phys_to_bus(map);
xen_dma_map_page(dev, pfn_to_page(map >> PAGE_SHIFT),
@@ -421,7 +419,7 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
attrs |= DMA_ATTR_SKIP_CPU_SYNC;
swiotlb_tbl_unmap_single(dev, map, size, dir, attrs);
- return XEN_SWIOTLB_ERROR_CODE;
+ return DMA_MAPPING_ERROR;
}
/*
@@ -443,21 +441,8 @@ static void xen_unmap_single(struct device *hwdev, dma_addr_t dev_addr,
xen_dma_unmap_page(hwdev, dev_addr, size, dir, attrs);
/* NOTE: We use dev_addr here, not paddr! */
- if (is_xen_swiotlb_buffer(dev_addr)) {
+ if (is_xen_swiotlb_buffer(dev_addr))
swiotlb_tbl_unmap_single(hwdev, paddr, size, dir, attrs);
- return;
- }
-
- if (dir != DMA_FROM_DEVICE)
- return;
-
- /*
- * phys_to_virt doesn't work with hihgmem page but we could
- * call dma_mark_clean() with hihgmem page here. However, we
- * are fine since dma_mark_clean() is null on POWERPC. We can
- * make dma_mark_clean() take a physical address if necessary.
- */
- dma_mark_clean(phys_to_virt(paddr), size);
}
static void xen_swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
@@ -495,11 +480,6 @@ xen_swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
if (target == SYNC_FOR_DEVICE)
xen_dma_sync_single_for_device(hwdev, dev_addr, size, dir);
-
- if (dir != DMA_FROM_DEVICE)
- return;
-
- dma_mark_clean(phys_to_virt(paddr), size);
}
void
@@ -574,7 +554,7 @@ xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
sg_phys(sg),
sg->length,
dir, attrs);
- if (map == SWIOTLB_MAP_ERROR) {
+ if (map == DMA_MAPPING_ERROR) {
dev_warn(hwdev, "swiotlb buffer is full\n");
/* Don't panic here, we expect map_sg users
to do proper error handling. */
@@ -700,11 +680,6 @@ xen_swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt,
return dma_common_get_sgtable(dev, sgt, cpu_addr, handle, size, attrs);
}
-static int xen_swiotlb_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
- return dma_addr == XEN_SWIOTLB_ERROR_CODE;
-}
-
const struct dma_map_ops xen_swiotlb_dma_ops = {
.alloc = xen_swiotlb_alloc_coherent,
.free = xen_swiotlb_free_coherent,
@@ -719,5 +694,4 @@ const struct dma_map_ops xen_swiotlb_dma_ops = {
.dma_supported = xen_swiotlb_dma_supported,
.mmap = xen_swiotlb_dma_mmap,
.get_sgtable = xen_swiotlb_get_sgtable,
- .mapping_error = xen_swiotlb_mapping_error,
};
diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c
index 14a3d4cbc2a7..c9e23a126218 100644
--- a/drivers/xen/xen-scsiback.c
+++ b/drivers/xen/xen-scsiback.c
@@ -1712,11 +1712,6 @@ static struct configfs_attribute *scsiback_wwn_attrs[] = {
NULL,
};
-static char *scsiback_get_fabric_name(void)
-{
- return "xen-pvscsi";
-}
-
static int scsiback_port_link(struct se_portal_group *se_tpg,
struct se_lun *lun)
{
@@ -1810,8 +1805,7 @@ static int scsiback_check_false(struct se_portal_group *se_tpg)
static const struct target_core_fabric_ops scsiback_ops = {
.module = THIS_MODULE,
- .name = "xen-pvscsi",
- .get_fabric_name = scsiback_get_fabric_name,
+ .fabric_name = "xen-pvscsi",
.tpg_get_wwn = scsiback_get_fabric_wwn,
.tpg_get_tag = scsiback_get_tag,
.tpg_check_demo_mode = scsiback_check_true,
diff --git a/fs/aio.c b/fs/aio.c
index bc401d5bcdc2..b906ff70c90f 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -70,6 +70,12 @@ struct aio_ring {
struct io_event io_events[0];
}; /* 128 bytes + ring size */
+/*
+ * Plugging is meant to work with larger batches of IOs. If we don't
+ * have more than the below, then don't bother setting up a plug.
+ */
+#define AIO_PLUG_THRESHOLD 2
+
#define AIO_RING_PAGES 8
struct kioctx_table {
@@ -902,7 +908,7 @@ static void put_reqs_available(struct kioctx *ctx, unsigned nr)
local_irq_restore(flags);
}
-static bool get_reqs_available(struct kioctx *ctx)
+static bool __get_reqs_available(struct kioctx *ctx)
{
struct kioctx_cpu *kcpu;
bool ret = false;
@@ -994,6 +1000,14 @@ static void user_refill_reqs_available(struct kioctx *ctx)
spin_unlock_irq(&ctx->completion_lock);
}
+static bool get_reqs_available(struct kioctx *ctx)
+{
+ if (__get_reqs_available(ctx))
+ return true;
+ user_refill_reqs_available(ctx);
+ return __get_reqs_available(ctx);
+}
+
/* aio_get_req
* Allocate a slot for an aio request.
* Returns NULL if no requests are free.
@@ -1002,24 +1016,16 @@ static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx)
{
struct aio_kiocb *req;
- if (!get_reqs_available(ctx)) {
- user_refill_reqs_available(ctx);
- if (!get_reqs_available(ctx))
- return NULL;
- }
-
- req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL|__GFP_ZERO);
+ req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL);
if (unlikely(!req))
- goto out_put;
+ return NULL;
percpu_ref_get(&ctx->reqs);
+ req->ki_ctx = ctx;
INIT_LIST_HEAD(&req->ki_list);
refcount_set(&req->ki_refcnt, 0);
- req->ki_ctx = ctx;
+ req->ki_eventfd = NULL;
return req;
-out_put:
- put_reqs_available(ctx, 1);
- return NULL;
}
static struct kioctx *lookup_ioctx(unsigned long ctx_id)
@@ -1059,6 +1065,15 @@ static inline void iocb_put(struct aio_kiocb *iocb)
}
}
+static void aio_fill_event(struct io_event *ev, struct aio_kiocb *iocb,
+ long res, long res2)
+{
+ ev->obj = (u64)(unsigned long)iocb->ki_user_iocb;
+ ev->data = iocb->ki_user_data;
+ ev->res = res;
+ ev->res2 = res2;
+}
+
/* aio_complete
* Called when the io request on the given iocb is complete.
*/
@@ -1086,10 +1101,7 @@ static void aio_complete(struct aio_kiocb *iocb, long res, long res2)
ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
event = ev_page + pos % AIO_EVENTS_PER_PAGE;
- event->obj = (u64)(unsigned long)iocb->ki_user_iocb;
- event->data = iocb->ki_user_data;
- event->res = res;
- event->res2 = res2;
+ aio_fill_event(event, iocb, res, res2);
kunmap_atomic(ev_page);
flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
@@ -1416,7 +1428,7 @@ static void aio_complete_rw(struct kiocb *kiocb, long res, long res2)
aio_complete(iocb, res, res2);
}
-static int aio_prep_rw(struct kiocb *req, struct iocb *iocb)
+static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb)
{
int ret;
@@ -1438,21 +1450,26 @@ static int aio_prep_rw(struct kiocb *req, struct iocb *iocb)
ret = ioprio_check_cap(iocb->aio_reqprio);
if (ret) {
pr_debug("aio ioprio check cap error: %d\n", ret);
- fput(req->ki_filp);
- return ret;
+ goto out_fput;
}
req->ki_ioprio = iocb->aio_reqprio;
} else
- req->ki_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0);
+ req->ki_ioprio = get_current_ioprio();
ret = kiocb_set_rw_flags(req, iocb->aio_rw_flags);
if (unlikely(ret))
- fput(req->ki_filp);
+ goto out_fput;
+
+ req->ki_flags &= ~IOCB_HIPRI; /* no one is going to poll for this I/O */
+ return 0;
+
+out_fput:
+ fput(req->ki_filp);
return ret;
}
-static int aio_setup_rw(int rw, struct iocb *iocb, struct iovec **iovec,
+static int aio_setup_rw(int rw, const struct iocb *iocb, struct iovec **iovec,
bool vectored, bool compat, struct iov_iter *iter)
{
void __user *buf = (void __user *)(uintptr_t)iocb->aio_buf;
@@ -1487,12 +1504,12 @@ static inline void aio_rw_done(struct kiocb *req, ssize_t ret)
ret = -EINTR;
/*FALLTHRU*/
default:
- aio_complete_rw(req, ret, 0);
+ req->ki_complete(req, ret, 0);
}
}
-static ssize_t aio_read(struct kiocb *req, struct iocb *iocb, bool vectored,
- bool compat)
+static ssize_t aio_read(struct kiocb *req, const struct iocb *iocb,
+ bool vectored, bool compat)
{
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
struct iov_iter iter;
@@ -1524,8 +1541,8 @@ out_fput:
return ret;
}
-static ssize_t aio_write(struct kiocb *req, struct iocb *iocb, bool vectored,
- bool compat)
+static ssize_t aio_write(struct kiocb *req, const struct iocb *iocb,
+ bool vectored, bool compat)
{
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
struct iov_iter iter;
@@ -1580,7 +1597,8 @@ static void aio_fsync_work(struct work_struct *work)
aio_complete(container_of(req, struct aio_kiocb, fsync), ret, 0);
}
-static int aio_fsync(struct fsync_iocb *req, struct iocb *iocb, bool datasync)
+static int aio_fsync(struct fsync_iocb *req, const struct iocb *iocb,
+ bool datasync)
{
if (unlikely(iocb->aio_buf || iocb->aio_offset || iocb->aio_nbytes ||
iocb->aio_rw_flags))
@@ -1708,7 +1726,7 @@ aio_poll_queue_proc(struct file *file, struct wait_queue_head *head,
add_wait_queue(head, &pt->iocb->poll.wait);
}
-static ssize_t aio_poll(struct aio_kiocb *aiocb, struct iocb *iocb)
+static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
{
struct kioctx *ctx = aiocb->ki_ctx;
struct poll_iocb *req = &aiocb->poll;
@@ -1728,6 +1746,10 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, struct iocb *iocb)
if (unlikely(!req->file))
return -EBADF;
+ req->head = NULL;
+ req->woken = false;
+ req->cancelled = false;
+
apt.pt._qproc = aio_poll_queue_proc;
apt.pt._key = req->events;
apt.iocb = aiocb;
@@ -1776,44 +1798,44 @@ out:
return 0;
}
-static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
- bool compat)
+static int __io_submit_one(struct kioctx *ctx, const struct iocb *iocb,
+ struct iocb __user *user_iocb, bool compat)
{
struct aio_kiocb *req;
- struct iocb iocb;
ssize_t ret;
- if (unlikely(copy_from_user(&iocb, user_iocb, sizeof(iocb))))
- return -EFAULT;
-
/* enforce forwards compatibility on users */
- if (unlikely(iocb.aio_reserved2)) {
+ if (unlikely(iocb->aio_reserved2)) {
pr_debug("EINVAL: reserve field set\n");
return -EINVAL;
}
/* prevent overflows */
if (unlikely(
- (iocb.aio_buf != (unsigned long)iocb.aio_buf) ||
- (iocb.aio_nbytes != (size_t)iocb.aio_nbytes) ||
- ((ssize_t)iocb.aio_nbytes < 0)
+ (iocb->aio_buf != (unsigned long)iocb->aio_buf) ||
+ (iocb->aio_nbytes != (size_t)iocb->aio_nbytes) ||
+ ((ssize_t)iocb->aio_nbytes < 0)
)) {
pr_debug("EINVAL: overflow check\n");
return -EINVAL;
}
+ if (!get_reqs_available(ctx))
+ return -EAGAIN;
+
+ ret = -EAGAIN;
req = aio_get_req(ctx);
if (unlikely(!req))
- return -EAGAIN;
+ goto out_put_reqs_available;
- if (iocb.aio_flags & IOCB_FLAG_RESFD) {
+ if (iocb->aio_flags & IOCB_FLAG_RESFD) {
/*
* If the IOCB_FLAG_RESFD flag of aio_flags is set, get an
* instance of the file* now. The file descriptor must be
* an eventfd() fd, and will be signaled for each completed
* event using the eventfd_signal() function.
*/
- req->ki_eventfd = eventfd_ctx_fdget((int) iocb.aio_resfd);
+ req->ki_eventfd = eventfd_ctx_fdget((int) iocb->aio_resfd);
if (IS_ERR(req->ki_eventfd)) {
ret = PTR_ERR(req->ki_eventfd);
req->ki_eventfd = NULL;
@@ -1828,32 +1850,32 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
}
req->ki_user_iocb = user_iocb;
- req->ki_user_data = iocb.aio_data;
+ req->ki_user_data = iocb->aio_data;
- switch (iocb.aio_lio_opcode) {
+ switch (iocb->aio_lio_opcode) {
case IOCB_CMD_PREAD:
- ret = aio_read(&req->rw, &iocb, false, compat);
+ ret = aio_read(&req->rw, iocb, false, compat);
break;
case IOCB_CMD_PWRITE:
- ret = aio_write(&req->rw, &iocb, false, compat);
+ ret = aio_write(&req->rw, iocb, false, compat);
break;
case IOCB_CMD_PREADV:
- ret = aio_read(&req->rw, &iocb, true, compat);
+ ret = aio_read(&req->rw, iocb, true, compat);
break;
case IOCB_CMD_PWRITEV:
- ret = aio_write(&req->rw, &iocb, true, compat);
+ ret = aio_write(&req->rw, iocb, true, compat);
break;
case IOCB_CMD_FSYNC:
- ret = aio_fsync(&req->fsync, &iocb, false);
+ ret = aio_fsync(&req->fsync, iocb, false);
break;
case IOCB_CMD_FDSYNC:
- ret = aio_fsync(&req->fsync, &iocb, true);
+ ret = aio_fsync(&req->fsync, iocb, true);
break;
case IOCB_CMD_POLL:
- ret = aio_poll(req, &iocb);
+ ret = aio_poll(req, iocb);
break;
default:
- pr_debug("invalid aio operation %d\n", iocb.aio_lio_opcode);
+ pr_debug("invalid aio operation %d\n", iocb->aio_lio_opcode);
ret = -EINVAL;
break;
}
@@ -1867,14 +1889,25 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
goto out_put_req;
return 0;
out_put_req:
- put_reqs_available(ctx, 1);
- percpu_ref_put(&ctx->reqs);
if (req->ki_eventfd)
eventfd_ctx_put(req->ki_eventfd);
- kmem_cache_free(kiocb_cachep, req);
+ iocb_put(req);
+out_put_reqs_available:
+ put_reqs_available(ctx, 1);
return ret;
}
+static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
+ bool compat)
+{
+ struct iocb iocb;
+
+ if (unlikely(copy_from_user(&iocb, user_iocb, sizeof(iocb))))
+ return -EFAULT;
+
+ return __io_submit_one(ctx, &iocb, user_iocb, compat);
+}
+
/* sys_io_submit:
* Queue the nr iocbs pointed to by iocbpp for processing. Returns
* the number of iocbs queued. May return -EINVAL if the aio_context
@@ -1907,7 +1940,8 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
if (nr > ctx->nr_events)
nr = ctx->nr_events;
- blk_start_plug(&plug);
+ if (nr > AIO_PLUG_THRESHOLD)
+ blk_start_plug(&plug);
for (i = 0; i < nr; i++) {
struct iocb __user *user_iocb;
@@ -1920,7 +1954,8 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
if (ret)
break;
}
- blk_finish_plug(&plug);
+ if (nr > AIO_PLUG_THRESHOLD)
+ blk_finish_plug(&plug);
percpu_ref_put(&ctx->users);
return i ? i : ret;
@@ -1947,7 +1982,8 @@ COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id,
if (nr > ctx->nr_events)
nr = ctx->nr_events;
- blk_start_plug(&plug);
+ if (nr > AIO_PLUG_THRESHOLD)
+ blk_start_plug(&plug);
for (i = 0; i < nr; i++) {
compat_uptr_t user_iocb;
@@ -1960,7 +1996,8 @@ COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id,
if (ret)
break;
}
- blk_finish_plug(&plug);
+ if (nr > AIO_PLUG_THRESHOLD)
+ blk_finish_plug(&plug);
percpu_ref_put(&ctx->users);
return i ? i : ret;
@@ -2065,11 +2102,13 @@ static long do_io_getevents(aio_context_t ctx_id,
* specifies an infinite timeout. Note that the timeout pointed to by
* timeout is relative. Will fail with -ENOSYS if not implemented.
*/
+#if !defined(CONFIG_64BIT_TIME) || defined(CONFIG_64BIT)
+
SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id,
long, min_nr,
long, nr,
struct io_event __user *, events,
- struct timespec __user *, timeout)
+ struct __kernel_timespec __user *, timeout)
{
struct timespec64 ts;
int ret;
@@ -2083,6 +2122,8 @@ SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id,
return ret;
}
+#endif
+
struct __aio_sigset {
const sigset_t __user *sigmask;
size_t sigsetsize;
@@ -2093,7 +2134,7 @@ SYSCALL_DEFINE6(io_pgetevents,
long, min_nr,
long, nr,
struct io_event __user *, events,
- struct timespec __user *, timeout,
+ struct __kernel_timespec __user *, timeout,
const struct __aio_sigset __user *, usig)
{
struct __aio_sigset ksig = { NULL, };
@@ -2107,33 +2148,56 @@ SYSCALL_DEFINE6(io_pgetevents,
if (usig && copy_from_user(&ksig, usig, sizeof(ksig)))
return -EFAULT;
- if (ksig.sigmask) {
- if (ksig.sigsetsize != sizeof(sigset_t))
- return -EINVAL;
- if (copy_from_user(&ksigmask, ksig.sigmask, sizeof(ksigmask)))
- return -EFAULT;
- sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP));
- sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
- }
+ ret = set_user_sigmask(ksig.sigmask, &ksigmask, &sigsaved, ksig.sigsetsize);
+ if (ret)
+ return ret;
ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL);
- if (signal_pending(current)) {
- if (ksig.sigmask) {
- current->saved_sigmask = sigsaved;
- set_restore_sigmask();
- }
+ restore_user_sigmask(ksig.sigmask, &sigsaved);
+ if (signal_pending(current) && !ret)
+ ret = -ERESTARTNOHAND;
- if (!ret)
- ret = -ERESTARTNOHAND;
- } else {
- if (ksig.sigmask)
- sigprocmask(SIG_SETMASK, &sigsaved, NULL);
- }
+ return ret;
+}
+
+#if defined(CONFIG_COMPAT_32BIT_TIME) && !defined(CONFIG_64BIT)
+
+SYSCALL_DEFINE6(io_pgetevents_time32,
+ aio_context_t, ctx_id,
+ long, min_nr,
+ long, nr,
+ struct io_event __user *, events,
+ struct old_timespec32 __user *, timeout,
+ const struct __aio_sigset __user *, usig)
+{
+ struct __aio_sigset ksig = { NULL, };
+ sigset_t ksigmask, sigsaved;
+ struct timespec64 ts;
+ int ret;
+
+ if (timeout && unlikely(get_old_timespec32(&ts, timeout)))
+ return -EFAULT;
+
+ if (usig && copy_from_user(&ksig, usig, sizeof(ksig)))
+ return -EFAULT;
+
+
+ ret = set_user_sigmask(ksig.sigmask, &ksigmask, &sigsaved, ksig.sigsetsize);
+ if (ret)
+ return ret;
+
+ ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL);
+ restore_user_sigmask(ksig.sigmask, &sigsaved);
+ if (signal_pending(current) && !ret)
+ ret = -ERESTARTNOHAND;
return ret;
}
-#ifdef CONFIG_COMPAT
+#endif
+
+#if defined(CONFIG_COMPAT_32BIT_TIME)
+
COMPAT_SYSCALL_DEFINE5(io_getevents, compat_aio_context_t, ctx_id,
compat_long_t, min_nr,
compat_long_t, nr,
@@ -2152,12 +2216,17 @@ COMPAT_SYSCALL_DEFINE5(io_getevents, compat_aio_context_t, ctx_id,
return ret;
}
+#endif
+
+#ifdef CONFIG_COMPAT
struct __compat_aio_sigset {
compat_sigset_t __user *sigmask;
compat_size_t sigsetsize;
};
+#if defined(CONFIG_COMPAT_32BIT_TIME)
+
COMPAT_SYSCALL_DEFINE6(io_pgetevents,
compat_aio_context_t, ctx_id,
compat_long_t, min_nr,
@@ -2177,27 +2246,47 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents,
if (usig && copy_from_user(&ksig, usig, sizeof(ksig)))
return -EFAULT;
- if (ksig.sigmask) {
- if (ksig.sigsetsize != sizeof(compat_sigset_t))
- return -EINVAL;
- if (get_compat_sigset(&ksigmask, ksig.sigmask))
- return -EFAULT;
- sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP));
- sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
- }
+ ret = set_compat_user_sigmask(ksig.sigmask, &ksigmask, &sigsaved, ksig.sigsetsize);
+ if (ret)
+ return ret;
ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL);
- if (signal_pending(current)) {
- if (ksig.sigmask) {
- current->saved_sigmask = sigsaved;
- set_restore_sigmask();
- }
- if (!ret)
- ret = -ERESTARTNOHAND;
- } else {
- if (ksig.sigmask)
- sigprocmask(SIG_SETMASK, &sigsaved, NULL);
- }
+ restore_user_sigmask(ksig.sigmask, &sigsaved);
+ if (signal_pending(current) && !ret)
+ ret = -ERESTARTNOHAND;
+
+ return ret;
+}
+
+#endif
+
+COMPAT_SYSCALL_DEFINE6(io_pgetevents_time64,
+ compat_aio_context_t, ctx_id,
+ compat_long_t, min_nr,
+ compat_long_t, nr,
+ struct io_event __user *, events,
+ struct __kernel_timespec __user *, timeout,
+ const struct __compat_aio_sigset __user *, usig)
+{
+ struct __compat_aio_sigset ksig = { NULL, };
+ sigset_t ksigmask, sigsaved;
+ struct timespec64 t;
+ int ret;
+
+ if (timeout && get_timespec64(&t, timeout))
+ return -EFAULT;
+
+ if (usig && copy_from_user(&ksig, usig, sizeof(ksig)))
+ return -EFAULT;
+
+ ret = set_compat_user_sigmask(ksig.sigmask, &ksigmask, &sigsaved, ksig.sigsetsize);
+ if (ret)
+ return ret;
+
+ ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL);
+ restore_user_sigmask(ksig.sigmask, &sigsaved);
+ if (signal_pending(current) && !ret)
+ ret = -ERESTARTNOHAND;
return ret;
}
diff --git a/fs/block_dev.c b/fs/block_dev.c
index de2135178e62..450be88cffef 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -181,7 +181,7 @@ static void blkdev_bio_end_io_simple(struct bio *bio)
struct task_struct *waiter = bio->bi_private;
WRITE_ONCE(bio->bi_private, NULL);
- wake_up_process(waiter);
+ blk_wake_io_task(waiter);
}
static ssize_t
@@ -232,14 +232,18 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
bio.bi_opf = dio_bio_write_op(iocb);
task_io_account_write(ret);
}
+ if (iocb->ki_flags & IOCB_HIPRI)
+ bio.bi_opf |= REQ_HIPRI;
qc = submit_bio(&bio);
for (;;) {
- set_current_state(TASK_UNINTERRUPTIBLE);
+ __set_current_state(TASK_UNINTERRUPTIBLE);
+
if (!READ_ONCE(bio.bi_private))
break;
+
if (!(iocb->ki_flags & IOCB_HIPRI) ||
- !blk_poll(bdev_get_queue(bdev), qc))
+ !blk_poll(bdev_get_queue(bdev), qc, true))
io_schedule();
}
__set_current_state(TASK_RUNNING);
@@ -298,12 +302,13 @@ static void blkdev_bio_end_io(struct bio *bio)
}
dio->iocb->ki_complete(iocb, ret, 0);
- bio_put(&dio->bio);
+ if (dio->multi_bio)
+ bio_put(&dio->bio);
} else {
struct task_struct *waiter = dio->waiter;
WRITE_ONCE(dio->waiter, NULL);
- wake_up_process(waiter);
+ blk_wake_io_task(waiter);
}
}
@@ -328,6 +333,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
struct blk_plug plug;
struct blkdev_dio *dio;
struct bio *bio;
+ bool is_poll = (iocb->ki_flags & IOCB_HIPRI) != 0;
bool is_read = (iov_iter_rw(iter) == READ), is_sync;
loff_t pos = iocb->ki_pos;
blk_qc_t qc = BLK_QC_T_NONE;
@@ -338,20 +344,27 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
return -EINVAL;
bio = bio_alloc_bioset(GFP_KERNEL, nr_pages, &blkdev_dio_pool);
- bio_get(bio); /* extra ref for the completion handler */
dio = container_of(bio, struct blkdev_dio, bio);
dio->is_sync = is_sync = is_sync_kiocb(iocb);
- if (dio->is_sync)
+ if (dio->is_sync) {
dio->waiter = current;
- else
+ bio_get(bio);
+ } else {
dio->iocb = iocb;
+ }
dio->size = 0;
dio->multi_bio = false;
dio->should_dirty = is_read && iter_is_iovec(iter);
- blk_start_plug(&plug);
+ /*
+ * Don't plug for HIPRI/polled IO, as those should go straight
+ * to issue
+ */
+ if (!is_poll)
+ blk_start_plug(&plug);
+
for (;;) {
bio_set_dev(bio, bdev);
bio->bi_iter.bi_sector = pos >> 9;
@@ -381,11 +394,21 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
nr_pages = iov_iter_npages(iter, BIO_MAX_PAGES);
if (!nr_pages) {
+ if (iocb->ki_flags & IOCB_HIPRI)
+ bio->bi_opf |= REQ_HIPRI;
+
qc = submit_bio(bio);
break;
}
if (!dio->multi_bio) {
+ /*
+ * AIO needs an extra reference to ensure the dio
+ * structure which is embedded into the first bio
+ * stays around.
+ */
+ if (!is_sync)
+ bio_get(bio);
dio->multi_bio = true;
atomic_set(&dio->ref, 2);
} else {
@@ -395,18 +418,21 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
submit_bio(bio);
bio = bio_alloc(GFP_KERNEL, nr_pages);
}
- blk_finish_plug(&plug);
+
+ if (!is_poll)
+ blk_finish_plug(&plug);
if (!is_sync)
return -EIOCBQUEUED;
for (;;) {
- set_current_state(TASK_UNINTERRUPTIBLE);
+ __set_current_state(TASK_UNINTERRUPTIBLE);
+
if (!READ_ONCE(dio->waiter))
break;
if (!(iocb->ki_flags & IOCB_HIPRI) ||
- !blk_poll(bdev_get_queue(bdev), qc))
+ !blk_poll(bdev_get_queue(bdev), qc, true))
io_schedule();
}
__set_current_state(TASK_RUNNING);
diff --git a/fs/buffer.c b/fs/buffer.c
index 1286c2b95498..d60d61e8ed7d 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -3060,11 +3060,6 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
*/
bio = bio_alloc(GFP_NOIO, 1);
- if (wbc) {
- wbc_init_bio(wbc, bio);
- wbc_account_io(wbc, bh->b_page, bh->b_size);
- }
-
bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
bio_set_dev(bio, bh->b_bdev);
bio->bi_write_hint = write_hint;
@@ -3084,6 +3079,11 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
op_flags |= REQ_PRIO;
bio_set_op_attrs(bio, op, op_flags);
+ if (wbc) {
+ wbc_init_bio(wbc, bio);
+ wbc_account_io(wbc, bh->b_page, bh->b_size);
+ }
+
submit_bio(bio);
return 0;
}
diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c
index e94a8d1d08a3..a568dac7b3a1 100644
--- a/fs/cifs/smbdirect.c
+++ b/fs/cifs/smbdirect.c
@@ -1724,7 +1724,7 @@ static struct smbd_connection *_smbd_get_connection(
info->responder_resources);
/* Need to send IRD/ORD in private data for iWARP */
- info->id->device->get_port_immutable(
+ info->id->device->ops.get_port_immutable(
info->id->device, info->id->port_num, &port_immutable);
if (port_immutable.core_cap_flags & RDMA_CORE_PORT_IWARP) {
ird_ord_hdr[0] = info->responder_resources;
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 41a0e97252ae..dbc1a1f080ce 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -518,7 +518,7 @@ static struct bio *dio_await_one(struct dio *dio)
dio->waiter = current;
spin_unlock_irqrestore(&dio->bio_lock, flags);
if (!(dio->iocb->ki_flags & IOCB_HIPRI) ||
- !blk_poll(dio->bio_disk->queue, dio->bio_cookie))
+ !blk_poll(dio->bio_disk->queue, dio->bio_cookie, true))
io_schedule();
/* wake up sets us TASK_RUNNING */
spin_lock_irqsave(&dio->bio_lock, flags);
@@ -1265,6 +1265,8 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
} else {
dio->op = REQ_OP_READ;
}
+ if (iocb->ki_flags & IOCB_HIPRI)
+ dio->op_flags |= REQ_HIPRI;
/*
* For AIO O_(D)SYNC writes we need to defer completions to a workqueue
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 42bbe6824b4b..8a5a1010886b 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -2223,31 +2223,13 @@ SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events,
* If the caller wants a certain signal mask to be set during the wait,
* we apply it here.
*/
- if (sigmask) {
- if (sigsetsize != sizeof(sigset_t))
- return -EINVAL;
- if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask)))
- return -EFAULT;
- sigsaved = current->blocked;
- set_current_blocked(&ksigmask);
- }
+ error = set_user_sigmask(sigmask, &ksigmask, &sigsaved, sigsetsize);
+ if (error)
+ return error;
error = do_epoll_wait(epfd, events, maxevents, timeout);
- /*
- * If we changed the signal mask, we need to restore the original one.
- * In case we've got a signal while waiting, we do not restore the
- * signal mask yet, and we allow do_signal() to deliver the signal on
- * the way back to userspace, before the signal mask is restored.
- */
- if (sigmask) {
- if (error == -EINTR) {
- memcpy(&current->saved_sigmask, &sigsaved,
- sizeof(sigsaved));
- set_restore_sigmask();
- } else
- set_current_blocked(&sigsaved);
- }
+ restore_user_sigmask(sigmask, &sigsaved);
return error;
}
@@ -2266,31 +2248,13 @@ COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd,
* If the caller wants a certain signal mask to be set during the wait,
* we apply it here.
*/
- if (sigmask) {
- if (sigsetsize != sizeof(compat_sigset_t))
- return -EINVAL;
- if (get_compat_sigset(&ksigmask, sigmask))
- return -EFAULT;
- sigsaved = current->blocked;
- set_current_blocked(&ksigmask);
- }
+ err = set_compat_user_sigmask(sigmask, &ksigmask, &sigsaved, sigsetsize);
+ if (err)
+ return err;
err = do_epoll_wait(epfd, events, maxevents, timeout);
- /*
- * If we changed the signal mask, we need to restore the original one.
- * In case we've got a signal while waiting, we do not restore the
- * signal mask yet, and we allow do_signal() to deliver the signal on
- * the way back to userspace, before the signal mask is restored.
- */
- if (sigmask) {
- if (err == -EINTR) {
- memcpy(&current->saved_sigmask, &sigsaved,
- sizeof(sigsaved));
- set_restore_sigmask();
- } else
- set_current_blocked(&sigsaved);
- }
+ restore_user_sigmask(sigmask, &sigsaved);
return err;
}
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index db7590178dfc..2aa62d58d8dd 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -374,13 +374,13 @@ static int io_submit_init_bio(struct ext4_io_submit *io,
bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
if (!bio)
return -ENOMEM;
- wbc_init_bio(io->io_wbc, bio);
bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
bio_set_dev(bio, bh->b_bdev);
bio->bi_end_io = ext4_end_bio;
bio->bi_private = ext4_get_io_end(io->io_end);
io->io_bio = bio;
io->io_next_block = bh->b_blocknr;
+ wbc_init_bio(io->io_wbc, bio);
return 0;
}
diff --git a/fs/inode.c b/fs/inode.c
index 35d2108d567c..0cd47fe0dbe5 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -2149,7 +2149,9 @@ EXPORT_SYMBOL(timespec64_trunc);
*/
struct timespec64 current_time(struct inode *inode)
{
- struct timespec64 now = current_kernel_time64();
+ struct timespec64 now;
+
+ ktime_get_coarse_real_ts64(&now);
if (unlikely(!inode->i_sb)) {
WARN(1, "current_time() called with uninitialized super_block in the inode");
diff --git a/fs/iomap.c b/fs/iomap.c
index 2d9b93cc4930..3a0cd557b4cf 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -1543,7 +1543,7 @@ static void iomap_dio_bio_end_io(struct bio *bio)
if (dio->wait_for_completion) {
struct task_struct *waiter = dio->submit.waiter;
WRITE_ONCE(dio->submit.waiter, NULL);
- wake_up_process(waiter);
+ blk_wake_io_task(waiter);
} else if (dio->flags & IOMAP_DIO_WRITE) {
struct inode *inode = file_inode(dio->iocb->ki_filp);
@@ -1571,6 +1571,7 @@ iomap_dio_zero(struct iomap_dio *dio, struct iomap *iomap, loff_t pos,
unsigned len)
{
struct page *page = ZERO_PAGE(0);
+ int flags = REQ_SYNC | REQ_IDLE;
struct bio *bio;
bio = bio_alloc(GFP_KERNEL, 1);
@@ -1579,9 +1580,12 @@ iomap_dio_zero(struct iomap_dio *dio, struct iomap *iomap, loff_t pos,
bio->bi_private = dio;
bio->bi_end_io = iomap_dio_bio_end_io;
+ if (dio->iocb->ki_flags & IOCB_HIPRI)
+ flags |= REQ_HIPRI;
+
get_page(page);
__bio_add_page(bio, page, len, 0);
- bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_SYNC | REQ_IDLE);
+ bio_set_op_attrs(bio, REQ_OP_WRITE, flags);
atomic_inc(&dio->ref);
return submit_bio(bio);
@@ -1687,6 +1691,9 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length,
bio_set_pages_dirty(bio);
}
+ if (dio->iocb->ki_flags & IOCB_HIPRI)
+ bio->bi_opf |= REQ_HIPRI;
+
iov_iter_advance(dio->submit.iter, n);
dio->size += n;
@@ -1914,14 +1921,15 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
return -EIOCBQUEUED;
for (;;) {
- set_current_state(TASK_UNINTERRUPTIBLE);
+ __set_current_state(TASK_UNINTERRUPTIBLE);
+
if (!READ_ONCE(dio->submit.waiter))
break;
if (!(iocb->ki_flags & IOCB_HIPRI) ||
!dio->submit.last_queue ||
!blk_poll(dio->submit.last_queue,
- dio->submit.cookie))
+ dio->submit.cookie, true))
io_schedule();
}
__set_current_state(TASK_RUNNING);
diff --git a/fs/select.c b/fs/select.c
index 22b3bf89f051..4c8652390c94 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -287,12 +287,18 @@ int poll_select_set_timeout(struct timespec64 *to, time64_t sec, long nsec)
return 0;
}
+enum poll_time_type {
+ PT_TIMEVAL = 0,
+ PT_OLD_TIMEVAL = 1,
+ PT_TIMESPEC = 2,
+ PT_OLD_TIMESPEC = 3,
+};
+
static int poll_select_copy_remaining(struct timespec64 *end_time,
void __user *p,
- int timeval, int ret)
+ enum poll_time_type pt_type, int ret)
{
struct timespec64 rts;
- struct timeval rtv;
if (!p)
return ret;
@@ -310,18 +316,40 @@ static int poll_select_copy_remaining(struct timespec64 *end_time,
rts.tv_sec = rts.tv_nsec = 0;
- if (timeval) {
- if (sizeof(rtv) > sizeof(rtv.tv_sec) + sizeof(rtv.tv_usec))
- memset(&rtv, 0, sizeof(rtv));
- rtv.tv_sec = rts.tv_sec;
- rtv.tv_usec = rts.tv_nsec / NSEC_PER_USEC;
+ switch (pt_type) {
+ case PT_TIMEVAL:
+ {
+ struct timeval rtv;
- if (!copy_to_user(p, &rtv, sizeof(rtv)))
+ if (sizeof(rtv) > sizeof(rtv.tv_sec) + sizeof(rtv.tv_usec))
+ memset(&rtv, 0, sizeof(rtv));
+ rtv.tv_sec = rts.tv_sec;
+ rtv.tv_usec = rts.tv_nsec / NSEC_PER_USEC;
+ if (!copy_to_user(p, &rtv, sizeof(rtv)))
+ return ret;
+ }
+ break;
+ case PT_OLD_TIMEVAL:
+ {
+ struct old_timeval32 rtv;
+
+ rtv.tv_sec = rts.tv_sec;
+ rtv.tv_usec = rts.tv_nsec / NSEC_PER_USEC;
+ if (!copy_to_user(p, &rtv, sizeof(rtv)))
+ return ret;
+ }
+ break;
+ case PT_TIMESPEC:
+ if (!put_timespec64(&rts, p))
return ret;
-
- } else if (!put_timespec64(&rts, p))
- return ret;
-
+ break;
+ case PT_OLD_TIMESPEC:
+ if (!put_old_timespec32(&rts, p))
+ return ret;
+ break;
+ default:
+ BUG();
+ }
/*
* If an application puts its timeval in read-only memory, we
* don't want the Linux-specific update to the timeval to
@@ -689,7 +717,7 @@ static int kern_select(int n, fd_set __user *inp, fd_set __user *outp,
}
ret = core_sys_select(n, inp, outp, exp, to);
- ret = poll_select_copy_remaining(&end_time, tvp, 1, ret);
+ ret = poll_select_copy_remaining(&end_time, tvp, PT_TIMEVAL, ret);
return ret;
}
@@ -701,49 +729,41 @@ SYSCALL_DEFINE5(select, int, n, fd_set __user *, inp, fd_set __user *, outp,
}
static long do_pselect(int n, fd_set __user *inp, fd_set __user *outp,
- fd_set __user *exp, struct timespec __user *tsp,
- const sigset_t __user *sigmask, size_t sigsetsize)
+ fd_set __user *exp, void __user *tsp,
+ const sigset_t __user *sigmask, size_t sigsetsize,
+ enum poll_time_type type)
{
sigset_t ksigmask, sigsaved;
struct timespec64 ts, end_time, *to = NULL;
int ret;
if (tsp) {
- if (get_timespec64(&ts, tsp))
- return -EFAULT;
+ switch (type) {
+ case PT_TIMESPEC:
+ if (get_timespec64(&ts, tsp))
+ return -EFAULT;
+ break;
+ case PT_OLD_TIMESPEC:
+ if (get_old_timespec32(&ts, tsp))
+ return -EFAULT;
+ break;
+ default:
+ BUG();
+ }
to = &end_time;
if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
return -EINVAL;
}
- if (sigmask) {
- /* XXX: Don't preclude handling different sized sigset_t's. */
- if (sigsetsize != sizeof(sigset_t))
- return -EINVAL;
- if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask)))
- return -EFAULT;
-
- sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
- sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
- }
+ ret = set_user_sigmask(sigmask, &ksigmask, &sigsaved, sigsetsize);
+ if (ret)
+ return ret;
ret = core_sys_select(n, inp, outp, exp, to);
- ret = poll_select_copy_remaining(&end_time, tsp, 0, ret);
+ ret = poll_select_copy_remaining(&end_time, tsp, type, ret);
- if (ret == -ERESTARTNOHAND) {
- /*
- * Don't restore the signal mask yet. Let do_signal() deliver
- * the signal on the way back to userspace, before the signal
- * mask is restored.
- */
- if (sigmask) {
- memcpy(&current->saved_sigmask, &sigsaved,
- sizeof(sigsaved));
- set_restore_sigmask();
- }
- } else if (sigmask)
- sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+ restore_user_sigmask(sigmask, &sigsaved);
return ret;
}
@@ -755,7 +775,7 @@ static long do_pselect(int n, fd_set __user *inp, fd_set __user *outp,
* the sigset size.
*/
SYSCALL_DEFINE6(pselect6, int, n, fd_set __user *, inp, fd_set __user *, outp,
- fd_set __user *, exp, struct timespec __user *, tsp,
+ fd_set __user *, exp, struct __kernel_timespec __user *, tsp,
void __user *, sig)
{
size_t sigsetsize = 0;
@@ -769,9 +789,31 @@ SYSCALL_DEFINE6(pselect6, int, n, fd_set __user *, inp, fd_set __user *, outp,
return -EFAULT;
}
- return do_pselect(n, inp, outp, exp, tsp, up, sigsetsize);
+ return do_pselect(n, inp, outp, exp, tsp, up, sigsetsize, PT_TIMESPEC);
}
+#if defined(CONFIG_COMPAT_32BIT_TIME) && !defined(CONFIG_64BIT)
+
+SYSCALL_DEFINE6(pselect6_time32, int, n, fd_set __user *, inp, fd_set __user *, outp,
+ fd_set __user *, exp, struct old_timespec32 __user *, tsp,
+ void __user *, sig)
+{
+ size_t sigsetsize = 0;
+ sigset_t __user *up = NULL;
+
+ if (sig) {
+ if (!access_ok(VERIFY_READ, sig, sizeof(void *)+sizeof(size_t))
+ || __get_user(up, (sigset_t __user * __user *)sig)
+ || __get_user(sigsetsize,
+ (size_t __user *)(sig+sizeof(void *))))
+ return -EFAULT;
+ }
+
+ return do_pselect(n, inp, outp, exp, tsp, up, sigsetsize, PT_OLD_TIMESPEC);
+}
+
+#endif
+
#ifdef __ARCH_WANT_SYS_OLD_SELECT
struct sel_arg_struct {
unsigned long n;
@@ -1045,7 +1087,7 @@ SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds,
}
SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, unsigned int, nfds,
- struct timespec __user *, tsp, const sigset_t __user *, sigmask,
+ struct __kernel_timespec __user *, tsp, const sigset_t __user *, sigmask,
size_t, sigsetsize)
{
sigset_t ksigmask, sigsaved;
@@ -1061,89 +1103,62 @@ SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, unsigned int, nfds,
return -EINVAL;
}
- if (sigmask) {
- /* XXX: Don't preclude handling different sized sigset_t's. */
- if (sigsetsize != sizeof(sigset_t))
- return -EINVAL;
- if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask)))
- return -EFAULT;
-
- sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
- sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
- }
+ ret = set_user_sigmask(sigmask, &ksigmask, &sigsaved, sigsetsize);
+ if (ret)
+ return ret;
ret = do_sys_poll(ufds, nfds, to);
+ restore_user_sigmask(sigmask, &sigsaved);
+
/* We can restart this syscall, usually */
- if (ret == -EINTR) {
- /*
- * Don't restore the signal mask yet. Let do_signal() deliver
- * the signal on the way back to userspace, before the signal
- * mask is restored.
- */
- if (sigmask) {
- memcpy(&current->saved_sigmask, &sigsaved,
- sizeof(sigsaved));
- set_restore_sigmask();
- }
+ if (ret == -EINTR)
ret = -ERESTARTNOHAND;
- } else if (sigmask)
- sigprocmask(SIG_SETMASK, &sigsaved, NULL);
- ret = poll_select_copy_remaining(&end_time, tsp, 0, ret);
+ ret = poll_select_copy_remaining(&end_time, tsp, PT_TIMESPEC, ret);
return ret;
}
-#ifdef CONFIG_COMPAT
-#define __COMPAT_NFDBITS (8 * sizeof(compat_ulong_t))
+#if defined(CONFIG_COMPAT_32BIT_TIME) && !defined(CONFIG_64BIT)
-static
-int compat_poll_select_copy_remaining(struct timespec64 *end_time, void __user *p,
- int timeval, int ret)
+SYSCALL_DEFINE5(ppoll_time32, struct pollfd __user *, ufds, unsigned int, nfds,
+ struct old_timespec32 __user *, tsp, const sigset_t __user *, sigmask,
+ size_t, sigsetsize)
{
- struct timespec64 ts;
+ sigset_t ksigmask, sigsaved;
+ struct timespec64 ts, end_time, *to = NULL;
+ int ret;
- if (!p)
- return ret;
+ if (tsp) {
+ if (get_old_timespec32(&ts, tsp))
+ return -EFAULT;
- if (current->personality & STICKY_TIMEOUTS)
- goto sticky;
+ to = &end_time;
+ if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
+ return -EINVAL;
+ }
- /* No update for zero timeout */
- if (!end_time->tv_sec && !end_time->tv_nsec)
+ ret = set_user_sigmask(sigmask, &ksigmask, &sigsaved, sigsetsize);
+ if (ret)
return ret;
- ktime_get_ts64(&ts);
- ts = timespec64_sub(*end_time, ts);
- if (ts.tv_sec < 0)
- ts.tv_sec = ts.tv_nsec = 0;
+ ret = do_sys_poll(ufds, nfds, to);
- if (timeval) {
- struct old_timeval32 rtv;
+ restore_user_sigmask(sigmask, &sigsaved);
- rtv.tv_sec = ts.tv_sec;
- rtv.tv_usec = ts.tv_nsec / NSEC_PER_USEC;
+ /* We can restart this syscall, usually */
+ if (ret == -EINTR)
+ ret = -ERESTARTNOHAND;
- if (!copy_to_user(p, &rtv, sizeof(rtv)))
- return ret;
- } else {
- if (!put_old_timespec32(&ts, p))
- return ret;
- }
- /*
- * If an application puts its timeval in read-only memory, we
- * don't want the Linux-specific update to the timeval to
- * cause a fault after the select has completed
- * successfully. However, because we're not updating the
- * timeval, we can't restart the system call.
- */
+ ret = poll_select_copy_remaining(&end_time, tsp, PT_OLD_TIMESPEC, ret);
-sticky:
- if (ret == -ERESTARTNOHAND)
- ret = -EINTR;
return ret;
}
+#endif
+
+#ifdef CONFIG_COMPAT
+#define __COMPAT_NFDBITS (8 * sizeof(compat_ulong_t))
/*
* Ooo, nasty. We need here to frob 32-bit unsigned longs to
@@ -1275,7 +1290,7 @@ static int do_compat_select(int n, compat_ulong_t __user *inp,
}
ret = compat_core_sys_select(n, inp, outp, exp, to);
- ret = compat_poll_select_copy_remaining(&end_time, tvp, 1, ret);
+ ret = poll_select_copy_remaining(&end_time, tvp, PT_OLD_TIMEVAL, ret);
return ret;
}
@@ -1307,52 +1322,66 @@ COMPAT_SYSCALL_DEFINE1(old_select, struct compat_sel_arg_struct __user *, arg)
static long do_compat_pselect(int n, compat_ulong_t __user *inp,
compat_ulong_t __user *outp, compat_ulong_t __user *exp,
- struct old_timespec32 __user *tsp, compat_sigset_t __user *sigmask,
- compat_size_t sigsetsize)
+ void __user *tsp, compat_sigset_t __user *sigmask,
+ compat_size_t sigsetsize, enum poll_time_type type)
{
sigset_t ksigmask, sigsaved;
struct timespec64 ts, end_time, *to = NULL;
int ret;
if (tsp) {
- if (get_old_timespec32(&ts, tsp))
- return -EFAULT;
+ switch (type) {
+ case PT_OLD_TIMESPEC:
+ if (get_old_timespec32(&ts, tsp))
+ return -EFAULT;
+ break;
+ case PT_TIMESPEC:
+ if (get_timespec64(&ts, tsp))
+ return -EFAULT;
+ break;
+ default:
+ BUG();
+ }
to = &end_time;
if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
return -EINVAL;
}
- if (sigmask) {
- if (sigsetsize != sizeof(compat_sigset_t))
- return -EINVAL;
- if (get_compat_sigset(&ksigmask, sigmask))
- return -EFAULT;
-
- sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
- sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
- }
+ ret = set_compat_user_sigmask(sigmask, &ksigmask, &sigsaved, sigsetsize);
+ if (ret)
+ return ret;
ret = compat_core_sys_select(n, inp, outp, exp, to);
- ret = compat_poll_select_copy_remaining(&end_time, tsp, 0, ret);
+ ret = poll_select_copy_remaining(&end_time, tsp, type, ret);
- if (ret == -ERESTARTNOHAND) {
- /*
- * Don't restore the signal mask yet. Let do_signal() deliver
- * the signal on the way back to userspace, before the signal
- * mask is restored.
- */
- if (sigmask) {
- memcpy(&current->saved_sigmask, &sigsaved,
- sizeof(sigsaved));
- set_restore_sigmask();
- }
- } else if (sigmask)
- sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+ restore_user_sigmask(sigmask, &sigsaved);
return ret;
}
+COMPAT_SYSCALL_DEFINE6(pselect6_time64, int, n, compat_ulong_t __user *, inp,
+ compat_ulong_t __user *, outp, compat_ulong_t __user *, exp,
+ struct __kernel_timespec __user *, tsp, void __user *, sig)
+{
+ compat_size_t sigsetsize = 0;
+ compat_uptr_t up = 0;
+
+ if (sig) {
+ if (!access_ok(VERIFY_READ, sig,
+ sizeof(compat_uptr_t)+sizeof(compat_size_t)) ||
+ __get_user(up, (compat_uptr_t __user *)sig) ||
+ __get_user(sigsetsize,
+ (compat_size_t __user *)(sig+sizeof(up))))
+ return -EFAULT;
+ }
+
+ return do_compat_pselect(n, inp, outp, exp, tsp, compat_ptr(up),
+ sigsetsize, PT_TIMESPEC);
+}
+
+#if defined(CONFIG_COMPAT_32BIT_TIME)
+
COMPAT_SYSCALL_DEFINE6(pselect6, int, n, compat_ulong_t __user *, inp,
compat_ulong_t __user *, outp, compat_ulong_t __user *, exp,
struct old_timespec32 __user *, tsp, void __user *, sig)
@@ -1368,10 +1397,14 @@ COMPAT_SYSCALL_DEFINE6(pselect6, int, n, compat_ulong_t __user *, inp,
(compat_size_t __user *)(sig+sizeof(up))))
return -EFAULT;
}
+
return do_compat_pselect(n, inp, outp, exp, tsp, compat_ptr(up),
- sigsetsize);
+ sigsetsize, PT_OLD_TIMESPEC);
}
+#endif
+
+#if defined(CONFIG_COMPAT_32BIT_TIME)
COMPAT_SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds,
unsigned int, nfds, struct old_timespec32 __user *, tsp,
const compat_sigset_t __user *, sigmask, compat_size_t, sigsetsize)
@@ -1389,36 +1422,57 @@ COMPAT_SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds,
return -EINVAL;
}
- if (sigmask) {
- if (sigsetsize != sizeof(compat_sigset_t))
- return -EINVAL;
- if (get_compat_sigset(&ksigmask, sigmask))
+ ret = set_compat_user_sigmask(sigmask, &ksigmask, &sigsaved, sigsetsize);
+ if (ret)
+ return ret;
+
+ ret = do_sys_poll(ufds, nfds, to);
+
+ restore_user_sigmask(sigmask, &sigsaved);
+
+ /* We can restart this syscall, usually */
+ if (ret == -EINTR)
+ ret = -ERESTARTNOHAND;
+
+ ret = poll_select_copy_remaining(&end_time, tsp, PT_OLD_TIMESPEC, ret);
+
+ return ret;
+}
+#endif
+
+/* New compat syscall for 64 bit time_t*/
+COMPAT_SYSCALL_DEFINE5(ppoll_time64, struct pollfd __user *, ufds,
+ unsigned int, nfds, struct __kernel_timespec __user *, tsp,
+ const compat_sigset_t __user *, sigmask, compat_size_t, sigsetsize)
+{
+ sigset_t ksigmask, sigsaved;
+ struct timespec64 ts, end_time, *to = NULL;
+ int ret;
+
+ if (tsp) {
+ if (get_timespec64(&ts, tsp))
return -EFAULT;
- sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
- sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
+ to = &end_time;
+ if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
+ return -EINVAL;
}
+ ret = set_compat_user_sigmask(sigmask, &ksigmask, &sigsaved, sigsetsize);
+ if (ret)
+ return ret;
+
ret = do_sys_poll(ufds, nfds, to);
+ restore_user_sigmask(sigmask, &sigsaved);
+
/* We can restart this syscall, usually */
- if (ret == -EINTR) {
- /*
- * Don't restore the signal mask yet. Let do_signal() deliver
- * the signal on the way back to userspace, before the signal
- * mask is restored.
- */
- if (sigmask) {
- memcpy(&current->saved_sigmask, &sigsaved,
- sizeof(sigsaved));
- set_restore_sigmask();
- }
+ if (ret == -EINTR)
ret = -ERESTARTNOHAND;
- } else if (sigmask)
- sigprocmask(SIG_SETMASK, &sigsaved, NULL);
- ret = compat_poll_select_copy_remaining(&end_time, tsp, 0, ret);
+ ret = poll_select_copy_remaining(&end_time, tsp, PT_TIMESPEC, ret);
return ret;
}
+
#endif
diff --git a/include/asm-generic/dma-mapping.h b/include/asm-generic/dma-mapping.h
index 880a292d792f..c13f46109e88 100644
--- a/include/asm-generic/dma-mapping.h
+++ b/include/asm-generic/dma-mapping.h
@@ -4,7 +4,7 @@
static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
{
- return &dma_direct_ops;
+ return NULL;
}
#endif /* _ASM_GENERIC_DMA_MAPPING_H */
diff --git a/include/linux/alcor_pci.h b/include/linux/alcor_pci.h
new file mode 100644
index 000000000000..da973e8a2da8
--- /dev/null
+++ b/include/linux/alcor_pci.h
@@ -0,0 +1,286 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright (C) 2018 Oleksij Rempel <linux@rempel-privat.de>
+ *
+ * Driver for Alcor Micro AU6601 and AU6621 controllers
+ */
+
+#ifndef __ALCOR_PCI_H
+#define __ALCOR_PCI_H
+
+#define ALCOR_SD_CARD 0
+#define ALCOR_MS_CARD 1
+
+#define DRV_NAME_ALCOR_PCI_SDMMC "alcor_sdmmc"
+#define DRV_NAME_ALCOR_PCI_MS "alcor_ms"
+
+#define PCI_ID_ALCOR_MICRO 0x1AEA
+#define PCI_ID_AU6601 0x6601
+#define PCI_ID_AU6621 0x6621
+
+#define MHZ_TO_HZ(freq) ((freq) * 1000 * 1000)
+
+#define AU6601_BASE_CLOCK 31000000
+#define AU6601_MIN_CLOCK 150000
+#define AU6601_MAX_CLOCK 208000000
+#define AU6601_MAX_DMA_SEGMENTS 1
+#define AU6601_MAX_PIO_SEGMENTS 1
+#define AU6601_MAX_DMA_BLOCK_SIZE 0x1000
+#define AU6601_MAX_PIO_BLOCK_SIZE 0x200
+#define AU6601_MAX_DMA_BLOCKS 1
+#define AU6601_DMA_LOCAL_SEGMENTS 1
+
+/* registers spotter by reverse engineering but still
+ * with unknown functionality:
+ * 0x10 - ADMA phy address. AU6621 only?
+ * 0x51 - LED ctrl?
+ * 0x52 - unknown
+ * 0x61 - LED related? Always toggled BIT0
+ * 0x63 - Same as 0x61?
+ * 0x77 - unknown
+ */
+
+/* SDMA phy address. Higher then 0x0800.0000?
+ * The au6601 and au6621 have different DMA engines with different issues. One
+ * For example au6621 engine is triggered by addr change. No other interaction
+ * is needed. This means, if we get two buffers with same address, then engine
+ * will stall.
+ */
+#define AU6601_REG_SDMA_ADDR 0x00
+#define AU6601_SDMA_MASK 0xffffffff
+
+#define AU6601_DMA_BOUNDARY 0x05
+#define AU6621_DMA_PAGE_CNT 0x05
+/* PIO */
+#define AU6601_REG_BUFFER 0x08
+/* ADMA ctrl? AU6621 only. */
+#define AU6621_DMA_CTRL 0x0c
+#define AU6621_DMA_ENABLE BIT(0)
+/* CMD index */
+#define AU6601_REG_CMD_OPCODE 0x23
+/* CMD parametr */
+#define AU6601_REG_CMD_ARG 0x24
+/* CMD response 4x4 Bytes */
+#define AU6601_REG_CMD_RSP0 0x30
+#define AU6601_REG_CMD_RSP1 0x34
+#define AU6601_REG_CMD_RSP2 0x38
+#define AU6601_REG_CMD_RSP3 0x3C
+/* default timeout set to 125: 125 * 40ms = 5 sec
+ * how exactly it is calculated?
+ */
+#define AU6601_TIME_OUT_CTRL 0x69
+/* Block size for SDMA or PIO */
+#define AU6601_REG_BLOCK_SIZE 0x6c
+/* Some power related reg, used together with AU6601_OUTPUT_ENABLE */
+#define AU6601_POWER_CONTROL 0x70
+
+/* PLL ctrl */
+#define AU6601_CLK_SELECT 0x72
+#define AU6601_CLK_OVER_CLK 0x80
+#define AU6601_CLK_384_MHZ 0x30
+#define AU6601_CLK_125_MHZ 0x20
+#define AU6601_CLK_48_MHZ 0x10
+#define AU6601_CLK_EXT_PLL 0x04
+#define AU6601_CLK_X2_MODE 0x02
+#define AU6601_CLK_ENABLE 0x01
+#define AU6601_CLK_31_25_MHZ 0x00
+
+#define AU6601_CLK_DIVIDER 0x73
+
+#define AU6601_INTERFACE_MODE_CTRL 0x74
+#define AU6601_DLINK_MODE 0x80
+#define AU6601_INTERRUPT_DELAY_TIME 0x40
+#define AU6601_SIGNAL_REQ_CTRL 0x30
+#define AU6601_MS_CARD_WP BIT(3)
+#define AU6601_SD_CARD_WP BIT(0)
+
+/* same register values are used for:
+ * - AU6601_OUTPUT_ENABLE
+ * - AU6601_POWER_CONTROL
+ */
+#define AU6601_ACTIVE_CTRL 0x75
+#define AU6601_XD_CARD BIT(4)
+/* AU6601_MS_CARD_ACTIVE - will cativate MS card section? */
+#define AU6601_MS_CARD BIT(3)
+#define AU6601_SD_CARD BIT(0)
+
+/* card slot state. It should automatically detect type of
+ * the card
+ */
+#define AU6601_DETECT_STATUS 0x76
+#define AU6601_DETECT_EN BIT(7)
+#define AU6601_MS_DETECTED BIT(3)
+#define AU6601_SD_DETECTED BIT(0)
+#define AU6601_DETECT_STATUS_M 0xf
+
+#define AU6601_REG_SW_RESET 0x79
+#define AU6601_BUF_CTRL_RESET BIT(7)
+#define AU6601_RESET_DATA BIT(3)
+#define AU6601_RESET_CMD BIT(0)
+
+#define AU6601_OUTPUT_ENABLE 0x7a
+
+#define AU6601_PAD_DRIVE0 0x7b
+#define AU6601_PAD_DRIVE1 0x7c
+#define AU6601_PAD_DRIVE2 0x7d
+/* read EEPROM? */
+#define AU6601_FUNCTION 0x7f
+
+#define AU6601_CMD_XFER_CTRL 0x81
+#define AU6601_CMD_17_BYTE_CRC 0xc0
+#define AU6601_CMD_6_BYTE_WO_CRC 0x80
+#define AU6601_CMD_6_BYTE_CRC 0x40
+#define AU6601_CMD_START_XFER 0x20
+#define AU6601_CMD_STOP_WAIT_RDY 0x10
+#define AU6601_CMD_NO_RESP 0x00
+
+#define AU6601_REG_BUS_CTRL 0x82
+#define AU6601_BUS_WIDTH_4BIT 0x20
+#define AU6601_BUS_WIDTH_8BIT 0x10
+#define AU6601_BUS_WIDTH_1BIT 0x00
+
+#define AU6601_DATA_XFER_CTRL 0x83
+#define AU6601_DATA_WRITE BIT(7)
+#define AU6601_DATA_DMA_MODE BIT(6)
+#define AU6601_DATA_START_XFER BIT(0)
+
+#define AU6601_DATA_PIN_STATE 0x84
+#define AU6601_BUS_STAT_CMD BIT(15)
+/* BIT(4) - BIT(7) are permanently 1.
+ * May be reserved or not attached DAT4-DAT7
+ */
+#define AU6601_BUS_STAT_DAT3 BIT(3)
+#define AU6601_BUS_STAT_DAT2 BIT(2)
+#define AU6601_BUS_STAT_DAT1 BIT(1)
+#define AU6601_BUS_STAT_DAT0 BIT(0)
+#define AU6601_BUS_STAT_DAT_MASK 0xf
+
+#define AU6601_OPT 0x85
+#define AU6601_OPT_CMD_LINE_LEVEL 0x80
+#define AU6601_OPT_NCRC_16_CLK BIT(4)
+#define AU6601_OPT_CMD_NWT BIT(3)
+#define AU6601_OPT_STOP_CLK BIT(2)
+#define AU6601_OPT_DDR_MODE BIT(1)
+#define AU6601_OPT_SD_18V BIT(0)
+
+#define AU6601_CLK_DELAY 0x86
+#define AU6601_CLK_DATA_POSITIVE_EDGE 0x80
+#define AU6601_CLK_CMD_POSITIVE_EDGE 0x40
+#define AU6601_CLK_POSITIVE_EDGE_ALL (AU6601_CLK_CMD_POSITIVE_EDGE \
+ | AU6601_CLK_DATA_POSITIVE_EDGE)
+
+
+#define AU6601_REG_INT_STATUS 0x90
+#define AU6601_REG_INT_ENABLE 0x94
+#define AU6601_INT_DATA_END_BIT_ERR BIT(22)
+#define AU6601_INT_DATA_CRC_ERR BIT(21)
+#define AU6601_INT_DATA_TIMEOUT_ERR BIT(20)
+#define AU6601_INT_CMD_INDEX_ERR BIT(19)
+#define AU6601_INT_CMD_END_BIT_ERR BIT(18)
+#define AU6601_INT_CMD_CRC_ERR BIT(17)
+#define AU6601_INT_CMD_TIMEOUT_ERR BIT(16)
+#define AU6601_INT_ERROR BIT(15)
+#define AU6601_INT_OVER_CURRENT_ERR BIT(8)
+#define AU6601_INT_CARD_INSERT BIT(7)
+#define AU6601_INT_CARD_REMOVE BIT(6)
+#define AU6601_INT_READ_BUF_RDY BIT(5)
+#define AU6601_INT_WRITE_BUF_RDY BIT(4)
+#define AU6601_INT_DMA_END BIT(3)
+#define AU6601_INT_DATA_END BIT(1)
+#define AU6601_INT_CMD_END BIT(0)
+
+#define AU6601_INT_NORMAL_MASK 0x00007FFF
+#define AU6601_INT_ERROR_MASK 0xFFFF8000
+
+#define AU6601_INT_CMD_MASK (AU6601_INT_CMD_END | \
+ AU6601_INT_CMD_TIMEOUT_ERR | AU6601_INT_CMD_CRC_ERR | \
+ AU6601_INT_CMD_END_BIT_ERR | AU6601_INT_CMD_INDEX_ERR)
+#define AU6601_INT_DATA_MASK (AU6601_INT_DATA_END | AU6601_INT_DMA_END | \
+ AU6601_INT_READ_BUF_RDY | AU6601_INT_WRITE_BUF_RDY | \
+ AU6601_INT_DATA_TIMEOUT_ERR | AU6601_INT_DATA_CRC_ERR | \
+ AU6601_INT_DATA_END_BIT_ERR)
+#define AU6601_INT_ALL_MASK ((u32)-1)
+
+/* MS_CARD mode registers */
+
+#define AU6601_MS_STATUS 0xa0
+
+#define AU6601_MS_BUS_MODE_CTRL 0xa1
+#define AU6601_MS_BUS_8BIT_MODE 0x03
+#define AU6601_MS_BUS_4BIT_MODE 0x01
+#define AU6601_MS_BUS_1BIT_MODE 0x00
+
+#define AU6601_MS_TPC_CMD 0xa2
+#define AU6601_MS_TPC_READ_PAGE_DATA 0x02
+#define AU6601_MS_TPC_READ_REG 0x04
+#define AU6601_MS_TPC_GET_INT 0x07
+#define AU6601_MS_TPC_WRITE_PAGE_DATA 0x0D
+#define AU6601_MS_TPC_WRITE_REG 0x0B
+#define AU6601_MS_TPC_SET_RW_REG_ADRS 0x08
+#define AU6601_MS_TPC_SET_CMD 0x0E
+#define AU6601_MS_TPC_EX_SET_CMD 0x09
+#define AU6601_MS_TPC_READ_SHORT_DATA 0x03
+#define AU6601_MS_TPC_WRITE_SHORT_DATA 0x0C
+
+#define AU6601_MS_TRANSFER_MODE 0xa3
+#define AU6601_MS_XFER_INT_TIMEOUT_CHK BIT(2)
+#define AU6601_MS_XFER_DMA_ENABLE BIT(1)
+#define AU6601_MS_XFER_START BIT(0)
+
+#define AU6601_MS_DATA_PIN_STATE 0xa4
+
+#define AU6601_MS_INT_STATUS 0xb0
+#define AU6601_MS_INT_ENABLE 0xb4
+#define AU6601_MS_INT_OVER_CURRENT_ERROR BIT(23)
+#define AU6601_MS_INT_DATA_CRC_ERROR BIT(21)
+#define AU6601_MS_INT_INT_TIMEOUT BIT(20)
+#define AU6601_MS_INT_INT_RESP_ERROR BIT(19)
+#define AU6601_MS_INT_CED_ERROR BIT(18)
+#define AU6601_MS_INT_TPC_TIMEOUT BIT(16)
+#define AU6601_MS_INT_ERROR BIT(15)
+#define AU6601_MS_INT_CARD_INSERT BIT(7)
+#define AU6601_MS_INT_CARD_REMOVE BIT(6)
+#define AU6601_MS_INT_BUF_READ_RDY BIT(5)
+#define AU6601_MS_INT_BUF_WRITE_RDY BIT(4)
+#define AU6601_MS_INT_DMA_END BIT(3)
+#define AU6601_MS_INT_TPC_END BIT(1)
+
+#define AU6601_MS_INT_DATA_MASK 0x00000038
+#define AU6601_MS_INT_TPC_MASK 0x003d8002
+#define AU6601_MS_INT_TPC_ERROR 0x003d0000
+
+#define ALCOR_PCIE_LINK_CTRL_OFFSET 0x10
+#define ALCOR_PCIE_LINK_CAP_OFFSET 0x0c
+#define ALCOR_CAP_START_OFFSET 0x34
+
+struct alcor_dev_cfg {
+ u8 dma;
+};
+
+struct alcor_pci_priv {
+ struct pci_dev *pdev;
+ struct pci_dev *parent_pdev;
+ struct device *dev;
+ void __iomem *iobase;
+ unsigned int irq;
+
+ unsigned long id; /* idr id */
+
+ struct alcor_dev_cfg *cfg;
+
+ /* PCI ASPM related vars */
+ int pdev_cap_off;
+ u8 pdev_aspm_cap;
+ int parent_cap_off;
+ u8 parent_aspm_cap;
+ u8 ext_config_dev_aspm;
+};
+
+void alcor_write8(struct alcor_pci_priv *priv, u8 val, unsigned int addr);
+void alcor_write16(struct alcor_pci_priv *priv, u16 val, unsigned int addr);
+void alcor_write32(struct alcor_pci_priv *priv, u32 val, unsigned int addr);
+void alcor_write32be(struct alcor_pci_priv *priv, u32 val, unsigned int addr);
+u8 alcor_read8(struct alcor_pci_priv *priv, unsigned int addr);
+u32 alcor_read32(struct alcor_pci_priv *priv, unsigned int addr);
+u32 alcor_read32be(struct alcor_pci_priv *priv, unsigned int addr);
+#endif
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 056fb627edb3..7380b094dcca 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -491,35 +491,40 @@ do { \
bio_clear_flag(bio, BIO_THROTTLED);\
(bio)->bi_disk = (bdev)->bd_disk; \
(bio)->bi_partno = (bdev)->bd_partno; \
+ bio_associate_blkg(bio); \
} while (0)
#define bio_copy_dev(dst, src) \
do { \
(dst)->bi_disk = (src)->bi_disk; \
(dst)->bi_partno = (src)->bi_partno; \
+ bio_clone_blkg_association(dst, src); \
} while (0)
#define bio_dev(bio) \
disk_devt((bio)->bi_disk)
#if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP)
-int bio_associate_blkcg_from_page(struct bio *bio, struct page *page);
+void bio_associate_blkg_from_page(struct bio *bio, struct page *page);
#else
-static inline int bio_associate_blkcg_from_page(struct bio *bio,
- struct page *page) { return 0; }
+static inline void bio_associate_blkg_from_page(struct bio *bio,
+ struct page *page) { }
#endif
#ifdef CONFIG_BLK_CGROUP
-int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css);
-int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg);
-void bio_disassociate_task(struct bio *bio);
-void bio_clone_blkcg_association(struct bio *dst, struct bio *src);
+void bio_disassociate_blkg(struct bio *bio);
+void bio_associate_blkg(struct bio *bio);
+void bio_associate_blkg_from_css(struct bio *bio,
+ struct cgroup_subsys_state *css);
+void bio_clone_blkg_association(struct bio *dst, struct bio *src);
#else /* CONFIG_BLK_CGROUP */
-static inline int bio_associate_blkcg(struct bio *bio,
- struct cgroup_subsys_state *blkcg_css) { return 0; }
-static inline void bio_disassociate_task(struct bio *bio) { }
-static inline void bio_clone_blkcg_association(struct bio *dst,
- struct bio *src) { }
+static inline void bio_disassociate_blkg(struct bio *bio) { }
+static inline void bio_associate_blkg(struct bio *bio) { }
+static inline void bio_associate_blkg_from_css(struct bio *bio,
+ struct cgroup_subsys_state *css)
+{ }
+static inline void bio_clone_blkg_association(struct bio *dst,
+ struct bio *src) { }
#endif /* CONFIG_BLK_CGROUP */
#ifdef CONFIG_HIGHMEM
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 6d766a19f2bb..f025fd1e22e6 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -21,6 +21,7 @@
#include <linux/blkdev.h>
#include <linux/atomic.h>
#include <linux/kthread.h>
+#include <linux/fs.h>
/* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */
#define BLKG_STAT_CPU_BATCH (INT_MAX / 2)
@@ -122,11 +123,8 @@ struct blkcg_gq {
/* all non-root blkcg_gq's are guaranteed to have access to parent */
struct blkcg_gq *parent;
- /* request allocation list for this blkcg-q pair */
- struct request_list rl;
-
/* reference count */
- atomic_t refcnt;
+ struct percpu_ref refcnt;
/* is this blkg online? protected by both blkcg and q locks */
bool online;
@@ -184,6 +182,8 @@ extern struct cgroup_subsys_state * const blkcg_root_css;
struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg,
struct request_queue *q, bool update_hint);
+struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
+ struct request_queue *q);
struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
struct request_queue *q);
int blkcg_init_queue(struct request_queue *q);
@@ -230,22 +230,62 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
char *input, struct blkg_conf_ctx *ctx);
void blkg_conf_finish(struct blkg_conf_ctx *ctx);
+/**
+ * blkcg_css - find the current css
+ *
+ * Find the css associated with either the kthread or the current task.
+ * This may return a dying css, so it is up to the caller to use tryget logic
+ * to confirm it is alive and well.
+ */
+static inline struct cgroup_subsys_state *blkcg_css(void)
+{
+ struct cgroup_subsys_state *css;
+
+ css = kthread_blkcg();
+ if (css)
+ return css;
+ return task_css(current, io_cgrp_id);
+}
static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css)
{
return css ? container_of(css, struct blkcg, css) : NULL;
}
-static inline struct blkcg *bio_blkcg(struct bio *bio)
+/**
+ * __bio_blkcg - internal, inconsistent version to get blkcg
+ *
+ * DO NOT USE.
+ * This function is inconsistent and consequently is dangerous to use. The
+ * first part of the function returns a blkcg where a reference is owned by the
+ * bio. This means it does not need to be rcu protected as it cannot go away
+ * with the bio owning a reference to it. However, the latter potentially gets
+ * it from task_css(). This can race against task migration and the cgroup
+ * dying. It is also semantically different as it must be called rcu protected
+ * and is susceptible to failure when trying to get a reference to it.
+ * Therefore, it is not ok to assume that *_get() will always succeed on the
+ * blkcg returned here.
+ */
+static inline struct blkcg *__bio_blkcg(struct bio *bio)
{
- struct cgroup_subsys_state *css;
+ if (bio && bio->bi_blkg)
+ return bio->bi_blkg->blkcg;
+ return css_to_blkcg(blkcg_css());
+}
- if (bio && bio->bi_css)
- return css_to_blkcg(bio->bi_css);
- css = kthread_blkcg();
- if (css)
- return css_to_blkcg(css);
- return css_to_blkcg(task_css(current, io_cgrp_id));
+/**
+ * bio_blkcg - grab the blkcg associated with a bio
+ * @bio: target bio
+ *
+ * This returns the blkcg associated with a bio, %NULL if not associated.
+ * Callers are expected to either handle %NULL or know association has been
+ * done prior to calling this.
+ */
+static inline struct blkcg *bio_blkcg(struct bio *bio)
+{
+ if (bio && bio->bi_blkg)
+ return bio->bi_blkg->blkcg;
+ return NULL;
}
static inline bool blk_cgroup_congested(void)
@@ -328,16 +368,12 @@ static inline struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg,
* @q: request_queue of interest
*
* Lookup blkg for the @blkcg - @q pair. This function should be called
- * under RCU read lock and is guaranteed to return %NULL if @q is bypassing
- * - see blk_queue_bypass_start() for details.
+ * under RCU read loc.
*/
static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg,
struct request_queue *q)
{
WARN_ON_ONCE(!rcu_read_lock_held());
-
- if (unlikely(blk_queue_bypass(q)))
- return NULL;
return __blkg_lookup(blkcg, q, false);
}
@@ -451,26 +487,35 @@ static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen)
*/
static inline void blkg_get(struct blkcg_gq *blkg)
{
- WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0);
- atomic_inc(&blkg->refcnt);
+ percpu_ref_get(&blkg->refcnt);
}
/**
- * blkg_try_get - try and get a blkg reference
+ * blkg_tryget - try and get a blkg reference
* @blkg: blkg to get
*
* This is for use when doing an RCU lookup of the blkg. We may be in the midst
* of freeing this blkg, so we can only use it if the refcnt is not zero.
*/
-static inline struct blkcg_gq *blkg_try_get(struct blkcg_gq *blkg)
+static inline bool blkg_tryget(struct blkcg_gq *blkg)
{
- if (atomic_inc_not_zero(&blkg->refcnt))
- return blkg;
- return NULL;
+ return percpu_ref_tryget(&blkg->refcnt);
}
+/**
+ * blkg_tryget_closest - try and get a blkg ref on the closet blkg
+ * @blkg: blkg to get
+ *
+ * This walks up the blkg tree to find the closest non-dying blkg and returns
+ * the blkg that it did association with as it may not be the passed in blkg.
+ */
+static inline struct blkcg_gq *blkg_tryget_closest(struct blkcg_gq *blkg)
+{
+ while (blkg && !percpu_ref_tryget(&blkg->refcnt))
+ blkg = blkg->parent;
-void __blkg_release_rcu(struct rcu_head *rcu);
+ return blkg;
+}
/**
* blkg_put - put a blkg reference
@@ -478,9 +523,7 @@ void __blkg_release_rcu(struct rcu_head *rcu);
*/
static inline void blkg_put(struct blkcg_gq *blkg)
{
- WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0);
- if (atomic_dec_and_test(&blkg->refcnt))
- call_rcu(&blkg->rcu_head, __blkg_release_rcu);
+ percpu_ref_put(&blkg->refcnt);
}
/**
@@ -515,94 +558,6 @@ static inline void blkg_put(struct blkcg_gq *blkg)
if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \
(p_blkg)->q, false)))
-/**
- * blk_get_rl - get request_list to use
- * @q: request_queue of interest
- * @bio: bio which will be attached to the allocated request (may be %NULL)
- *
- * The caller wants to allocate a request from @q to use for @bio. Find
- * the request_list to use and obtain a reference on it. Should be called
- * under queue_lock. This function is guaranteed to return non-%NULL
- * request_list.
- */
-static inline struct request_list *blk_get_rl(struct request_queue *q,
- struct bio *bio)
-{
- struct blkcg *blkcg;
- struct blkcg_gq *blkg;
-
- rcu_read_lock();
-
- blkcg = bio_blkcg(bio);
-
- /* bypass blkg lookup and use @q->root_rl directly for root */
- if (blkcg == &blkcg_root)
- goto root_rl;
-
- /*
- * Try to use blkg->rl. blkg lookup may fail under memory pressure
- * or if either the blkcg or queue is going away. Fall back to
- * root_rl in such cases.
- */
- blkg = blkg_lookup(blkcg, q);
- if (unlikely(!blkg))
- goto root_rl;
-
- blkg_get(blkg);
- rcu_read_unlock();
- return &blkg->rl;
-root_rl:
- rcu_read_unlock();
- return &q->root_rl;
-}
-
-/**
- * blk_put_rl - put request_list
- * @rl: request_list to put
- *
- * Put the reference acquired by blk_get_rl(). Should be called under
- * queue_lock.
- */
-static inline void blk_put_rl(struct request_list *rl)
-{
- if (rl->blkg->blkcg != &blkcg_root)
- blkg_put(rl->blkg);
-}
-
-/**
- * blk_rq_set_rl - associate a request with a request_list
- * @rq: request of interest
- * @rl: target request_list
- *
- * Associate @rq with @rl so that accounting and freeing can know the
- * request_list @rq came from.
- */
-static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl)
-{
- rq->rl = rl;
-}
-
-/**
- * blk_rq_rl - return the request_list a request came from
- * @rq: request of interest
- *
- * Return the request_list @rq is allocated from.
- */
-static inline struct request_list *blk_rq_rl(struct request *rq)
-{
- return rq->rl;
-}
-
-struct request_list *__blk_queue_next_rl(struct request_list *rl,
- struct request_queue *q);
-/**
- * blk_queue_for_each_rl - iterate through all request_lists of a request_queue
- *
- * Should be used under queue_lock.
- */
-#define blk_queue_for_each_rl(rl, q) \
- for ((rl) = &(q)->root_rl; (rl); (rl) = __blk_queue_next_rl((rl), (q)))
-
static inline int blkg_stat_init(struct blkg_stat *stat, gfp_t gfp)
{
int ret;
@@ -797,32 +752,34 @@ static inline bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg
struct bio *bio) { return false; }
#endif
+
+static inline void blkcg_bio_issue_init(struct bio *bio)
+{
+ bio_issue_init(&bio->bi_issue, bio_sectors(bio));
+}
+
static inline bool blkcg_bio_issue_check(struct request_queue *q,
struct bio *bio)
{
- struct blkcg *blkcg;
struct blkcg_gq *blkg;
bool throtl = false;
rcu_read_lock();
- blkcg = bio_blkcg(bio);
-
- /* associate blkcg if bio hasn't attached one */
- bio_associate_blkcg(bio, &blkcg->css);
-
- blkg = blkg_lookup(blkcg, q);
- if (unlikely(!blkg)) {
- spin_lock_irq(q->queue_lock);
- blkg = blkg_lookup_create(blkcg, q);
- if (IS_ERR(blkg))
- blkg = NULL;
- spin_unlock_irq(q->queue_lock);
+
+ if (!bio->bi_blkg) {
+ char b[BDEVNAME_SIZE];
+
+ WARN_ONCE(1,
+ "no blkg associated for bio on block-device: %s\n",
+ bio_devname(bio, b));
+ bio_associate_blkg(bio);
}
+ blkg = bio->bi_blkg;
+
throtl = blk_throtl_bio(q, blkg, bio);
if (!throtl) {
- blkg = blkg ?: q->root_blkg;
/*
* If the bio is flagged with BIO_QUEUE_ENTERED it means this
* is a split bio and we would have already accounted for the
@@ -834,6 +791,8 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q,
blkg_rwstat_add(&blkg->stat_ios, bio->bi_opf, 1);
}
+ blkcg_bio_issue_init(bio);
+
rcu_read_unlock();
return !throtl;
}
@@ -930,6 +889,7 @@ static inline int blkcg_activate_policy(struct request_queue *q,
static inline void blkcg_deactivate_policy(struct request_queue *q,
const struct blkcg_policy *pol) { }
+static inline struct blkcg *__bio_blkcg(struct bio *bio) { return NULL; }
static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; }
static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
@@ -939,12 +899,7 @@ static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; }
static inline void blkg_get(struct blkcg_gq *blkg) { }
static inline void blkg_put(struct blkcg_gq *blkg) { }
-static inline struct request_list *blk_get_rl(struct request_queue *q,
- struct bio *bio) { return &q->root_rl; }
-static inline void blk_put_rl(struct request_list *rl) { }
-static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { }
-static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; }
-
+static inline void blkcg_bio_issue_init(struct bio *bio) { }
static inline bool blkcg_bio_issue_check(struct request_queue *q,
struct bio *bio) { return true; }
diff --git a/include/linux/blk-mq-pci.h b/include/linux/blk-mq-pci.h
index 9f4c17f0d2d8..0b1f45c62623 100644
--- a/include/linux/blk-mq-pci.h
+++ b/include/linux/blk-mq-pci.h
@@ -2,10 +2,10 @@
#ifndef _LINUX_BLK_MQ_PCI_H
#define _LINUX_BLK_MQ_PCI_H
-struct blk_mq_tag_set;
+struct blk_mq_queue_map;
struct pci_dev;
-int blk_mq_pci_map_queues(struct blk_mq_tag_set *set, struct pci_dev *pdev,
+int blk_mq_pci_map_queues(struct blk_mq_queue_map *qmap, struct pci_dev *pdev,
int offset);
#endif /* _LINUX_BLK_MQ_PCI_H */
diff --git a/include/linux/blk-mq-rdma.h b/include/linux/blk-mq-rdma.h
index b4ade198007d..7b6ecf9ac4c3 100644
--- a/include/linux/blk-mq-rdma.h
+++ b/include/linux/blk-mq-rdma.h
@@ -4,7 +4,7 @@
struct blk_mq_tag_set;
struct ib_device;
-int blk_mq_rdma_map_queues(struct blk_mq_tag_set *set,
+int blk_mq_rdma_map_queues(struct blk_mq_queue_map *map,
struct ib_device *dev, int first_vec);
#endif /* _LINUX_BLK_MQ_RDMA_H */
diff --git a/include/linux/blk-mq-virtio.h b/include/linux/blk-mq-virtio.h
index 69b4da262c45..687ae287e1dc 100644
--- a/include/linux/blk-mq-virtio.h
+++ b/include/linux/blk-mq-virtio.h
@@ -2,10 +2,10 @@
#ifndef _LINUX_BLK_MQ_VIRTIO_H
#define _LINUX_BLK_MQ_VIRTIO_H
-struct blk_mq_tag_set;
+struct blk_mq_queue_map;
struct virtio_device;
-int blk_mq_virtio_map_queues(struct blk_mq_tag_set *set,
+int blk_mq_virtio_map_queues(struct blk_mq_queue_map *qmap,
struct virtio_device *vdev, int first_vec);
#endif /* _LINUX_BLK_MQ_VIRTIO_H */
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 2286dc12c6bc..0e030f5f76b6 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -37,7 +37,8 @@ struct blk_mq_hw_ctx {
struct blk_mq_ctx *dispatch_from;
unsigned int dispatch_busy;
- unsigned int nr_ctx;
+ unsigned short type;
+ unsigned short nr_ctx;
struct blk_mq_ctx **ctxs;
spinlock_t dispatch_wait_lock;
@@ -74,10 +75,31 @@ struct blk_mq_hw_ctx {
struct srcu_struct srcu[0];
};
+struct blk_mq_queue_map {
+ unsigned int *mq_map;
+ unsigned int nr_queues;
+ unsigned int queue_offset;
+};
+
+enum hctx_type {
+ HCTX_TYPE_DEFAULT, /* all I/O not otherwise accounted for */
+ HCTX_TYPE_READ, /* just for READ I/O */
+ HCTX_TYPE_POLL, /* polled I/O of any kind */
+
+ HCTX_MAX_TYPES,
+};
+
struct blk_mq_tag_set {
- unsigned int *mq_map;
+ /*
+ * map[] holds ctx -> hctx mappings, one map exists for each type
+ * that the driver wishes to support. There are no restrictions
+ * on maps being of the same size, and it's perfectly legal to
+ * share maps between types.
+ */
+ struct blk_mq_queue_map map[HCTX_MAX_TYPES];
+ unsigned int nr_maps; /* nr entries in map[] */
const struct blk_mq_ops *ops;
- unsigned int nr_hw_queues;
+ unsigned int nr_hw_queues; /* nr hw queues across maps */
unsigned int queue_depth; /* max hw supported */
unsigned int reserved_tags;
unsigned int cmd_size; /* per-request extra data */
@@ -99,6 +121,7 @@ struct blk_mq_queue_data {
typedef blk_status_t (queue_rq_fn)(struct blk_mq_hw_ctx *,
const struct blk_mq_queue_data *);
+typedef void (commit_rqs_fn)(struct blk_mq_hw_ctx *);
typedef bool (get_budget_fn)(struct blk_mq_hw_ctx *);
typedef void (put_budget_fn)(struct blk_mq_hw_ctx *);
typedef enum blk_eh_timer_return (timeout_fn)(struct request *, bool);
@@ -109,11 +132,13 @@ typedef int (init_request_fn)(struct blk_mq_tag_set *set, struct request *,
typedef void (exit_request_fn)(struct blk_mq_tag_set *set, struct request *,
unsigned int);
-typedef void (busy_iter_fn)(struct blk_mq_hw_ctx *, struct request *, void *,
+typedef bool (busy_iter_fn)(struct blk_mq_hw_ctx *, struct request *, void *,
bool);
-typedef void (busy_tag_iter_fn)(struct request *, void *, bool);
-typedef int (poll_fn)(struct blk_mq_hw_ctx *, unsigned int);
+typedef bool (busy_tag_iter_fn)(struct request *, void *, bool);
+typedef int (poll_fn)(struct blk_mq_hw_ctx *);
typedef int (map_queues_fn)(struct blk_mq_tag_set *set);
+typedef bool (busy_fn)(struct request_queue *);
+typedef void (complete_fn)(struct request *);
struct blk_mq_ops {
@@ -123,6 +148,15 @@ struct blk_mq_ops {
queue_rq_fn *queue_rq;
/*
+ * If a driver uses bd->last to judge when to submit requests to
+ * hardware, it must define this function. In case of errors that
+ * make us stop issuing further requests, this hook serves the
+ * purpose of kicking the hardware (which the last request otherwise
+ * would have done).
+ */
+ commit_rqs_fn *commit_rqs;
+
+ /*
* Reserve budget before queue request, once .queue_rq is
* run, it is driver's responsibility to release the
* reserved budget. Also we have to handle failure case
@@ -141,7 +175,7 @@ struct blk_mq_ops {
*/
poll_fn *poll;
- softirq_done_fn *complete;
+ complete_fn *complete;
/*
* Called when the block layer side of a hardware queue has been
@@ -165,6 +199,11 @@ struct blk_mq_ops {
/* Called from inside blk_get_request() */
void (*initialize_rq_fn)(struct request *rq);
+ /*
+ * If set, returns whether or not this queue currently is busy
+ */
+ busy_fn *busy;
+
map_queues_fn *map_queues;
#ifdef CONFIG_BLK_DEBUG_FS
@@ -218,6 +257,8 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule);
void blk_mq_free_request(struct request *rq);
bool blk_mq_can_queue(struct blk_mq_hw_ctx *);
+bool blk_mq_queue_inflight(struct request_queue *q);
+
enum {
/* return when out of requests */
BLK_MQ_REQ_NOWAIT = (__force blk_mq_req_flags_t)(1 << 0),
@@ -264,7 +305,7 @@ void blk_mq_add_to_requeue_list(struct request *rq, bool at_head,
bool kick_requeue_list);
void blk_mq_kick_requeue_list(struct request_queue *q);
void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs);
-void blk_mq_complete_request(struct request *rq);
+bool blk_mq_complete_request(struct request *rq);
bool blk_mq_bio_list_merge(struct request_queue *q, struct list_head *list,
struct bio *bio);
bool blk_mq_queue_stopped(struct request_queue *q);
@@ -288,24 +329,12 @@ void blk_mq_freeze_queue_wait(struct request_queue *q);
int blk_mq_freeze_queue_wait_timeout(struct request_queue *q,
unsigned long timeout);
-int blk_mq_map_queues(struct blk_mq_tag_set *set);
+int blk_mq_map_queues(struct blk_mq_queue_map *qmap);
void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues);
void blk_mq_quiesce_queue_nowait(struct request_queue *q);
-/**
- * blk_mq_mark_complete() - Set request state to complete
- * @rq: request to set to complete state
- *
- * Returns true if request state was successfully set to complete. If
- * successful, the caller is responsibile for seeing this request is ended, as
- * blk_mq_complete_request will not work again.
- */
-static inline bool blk_mq_mark_complete(struct request *rq)
-{
- return cmpxchg(&rq->state, MQ_RQ_IN_FLIGHT, MQ_RQ_COMPLETE) ==
- MQ_RQ_IN_FLIGHT;
-}
+unsigned int blk_mq_rq_cpu(struct request *rq);
/*
* Driver command data is immediately after the request. So subtract request
@@ -328,4 +357,14 @@ static inline void *blk_mq_rq_to_pdu(struct request *rq)
for ((i) = 0; (i) < (hctx)->nr_ctx && \
({ ctx = (hctx)->ctxs[(i)]; 1; }); (i)++)
+static inline blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx,
+ struct request *rq)
+{
+ if (rq->tag != -1)
+ return rq->tag | (hctx->queue_num << BLK_QC_T_SHIFT);
+
+ return rq->internal_tag | (hctx->queue_num << BLK_QC_T_SHIFT) |
+ BLK_QC_T_INTERNAL;
+}
+
#endif
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 1dcf652ba0aa..5c7e7f859a24 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -174,11 +174,11 @@ struct bio {
void *bi_private;
#ifdef CONFIG_BLK_CGROUP
/*
- * Optional ioc and css associated with this bio. Put on bio
- * release. Read comment on top of bio_associate_current().
+ * Represents the association of the css and request_queue for the bio.
+ * If a bio goes direct to device, it will not have a blkg as it will
+ * not have a request_queue associated with it. The reference is put
+ * on release of the bio.
*/
- struct io_context *bi_ioc;
- struct cgroup_subsys_state *bi_css;
struct blkcg_gq *bi_blkg;
struct bio_issue bi_issue;
#endif
@@ -228,6 +228,7 @@ struct bio {
#define BIO_TRACE_COMPLETION 10 /* bio_endio() should trace the final completion
* of this bio. */
#define BIO_QUEUE_ENTERED 11 /* can use blk_queue_enter_live() */
+#define BIO_TRACKED 12 /* set if bio goes through the rq_qos path */
/* See BVEC_POOL_OFFSET below before adding new flags */
@@ -323,6 +324,8 @@ enum req_flag_bits {
/* command specific flags for REQ_OP_WRITE_ZEROES: */
__REQ_NOUNMAP, /* do not free blocks when zeroing */
+ __REQ_HIPRI,
+
/* for driver use */
__REQ_DRV,
__REQ_SWAP, /* swapping request. */
@@ -343,8 +346,8 @@ enum req_flag_bits {
#define REQ_RAHEAD (1ULL << __REQ_RAHEAD)
#define REQ_BACKGROUND (1ULL << __REQ_BACKGROUND)
#define REQ_NOWAIT (1ULL << __REQ_NOWAIT)
-
#define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP)
+#define REQ_HIPRI (1ULL << __REQ_HIPRI)
#define REQ_DRV (1ULL << __REQ_DRV)
#define REQ_SWAP (1ULL << __REQ_SWAP)
@@ -422,17 +425,6 @@ static inline bool blk_qc_t_valid(blk_qc_t cookie)
return cookie != BLK_QC_T_NONE;
}
-static inline blk_qc_t blk_tag_to_qc_t(unsigned int tag, unsigned int queue_num,
- bool internal)
-{
- blk_qc_t ret = tag | (queue_num << BLK_QC_T_SHIFT);
-
- if (internal)
- ret |= BLK_QC_T_INTERNAL;
-
- return ret;
-}
-
static inline unsigned int blk_qc_t_to_queue_num(blk_qc_t cookie)
{
return (cookie & ~BLK_QC_T_INTERNAL) >> BLK_QC_T_SHIFT;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 4293dc1cd160..338604dff7d0 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -58,25 +58,6 @@ struct blk_stat_callback;
typedef void (rq_end_io_fn)(struct request *, blk_status_t);
-#define BLK_RL_SYNCFULL (1U << 0)
-#define BLK_RL_ASYNCFULL (1U << 1)
-
-struct request_list {
- struct request_queue *q; /* the queue this rl belongs to */
-#ifdef CONFIG_BLK_CGROUP
- struct blkcg_gq *blkg; /* blkg this request pool belongs to */
-#endif
- /*
- * count[], starved[], and wait[] are indexed by
- * BLK_RW_SYNC/BLK_RW_ASYNC
- */
- int count[2];
- int starved[2];
- mempool_t *rq_pool;
- wait_queue_head_t wait[2];
- unsigned int flags;
-};
-
/*
* request flags */
typedef __u32 __bitwise req_flags_t;
@@ -85,8 +66,6 @@ typedef __u32 __bitwise req_flags_t;
#define RQF_SORTED ((__force req_flags_t)(1 << 0))
/* drive already may have started this one */
#define RQF_STARTED ((__force req_flags_t)(1 << 1))
-/* uses tagged queueing */
-#define RQF_QUEUED ((__force req_flags_t)(1 << 2))
/* may not be passed by ioscheduler */
#define RQF_SOFTBARRIER ((__force req_flags_t)(1 << 3))
/* request for flush sequence */
@@ -150,8 +129,8 @@ enum mq_rq_state {
struct request {
struct request_queue *q;
struct blk_mq_ctx *mq_ctx;
+ struct blk_mq_hw_ctx *mq_hctx;
- int cpu;
unsigned int cmd_flags; /* op and common flags */
req_flags_t rq_flags;
@@ -245,11 +224,7 @@ struct request {
refcount_t ref;
unsigned int timeout;
-
- /* access through blk_rq_set_deadline, blk_rq_deadline */
- unsigned long __deadline;
-
- struct list_head timeout_list;
+ unsigned long deadline;
union {
struct __call_single_data csd;
@@ -264,10 +239,6 @@ struct request {
/* for bidi */
struct request *next_rq;
-
-#ifdef CONFIG_BLK_CGROUP
- struct request_list *rl; /* rl this rq is alloced from */
-#endif
};
static inline bool blk_op_is_scsi(unsigned int op)
@@ -311,41 +282,21 @@ static inline unsigned short req_get_ioprio(struct request *req)
struct blk_queue_ctx;
-typedef void (request_fn_proc) (struct request_queue *q);
typedef blk_qc_t (make_request_fn) (struct request_queue *q, struct bio *bio);
-typedef bool (poll_q_fn) (struct request_queue *q, blk_qc_t);
-typedef int (prep_rq_fn) (struct request_queue *, struct request *);
-typedef void (unprep_rq_fn) (struct request_queue *, struct request *);
struct bio_vec;
-typedef void (softirq_done_fn)(struct request *);
typedef int (dma_drain_needed_fn)(struct request *);
-typedef int (lld_busy_fn) (struct request_queue *q);
-typedef int (bsg_job_fn) (struct bsg_job *);
-typedef int (init_rq_fn)(struct request_queue *, struct request *, gfp_t);
-typedef void (exit_rq_fn)(struct request_queue *, struct request *);
enum blk_eh_timer_return {
BLK_EH_DONE, /* drivers has completed the command */
BLK_EH_RESET_TIMER, /* reset timer and try again */
};
-typedef enum blk_eh_timer_return (rq_timed_out_fn)(struct request *);
-
enum blk_queue_state {
Queue_down,
Queue_up,
};
-struct blk_queue_tag {
- struct request **tag_index; /* map of busy tags */
- unsigned long *tag_map; /* bit map of free/busy tags */
- int max_depth; /* what we will send to device */
- int real_max_depth; /* what the array can hold */
- atomic_t refcnt; /* map can be shared */
- int alloc_policy; /* tag allocation policy */
- int next_tag; /* next tag */
-};
#define BLK_TAG_ALLOC_FIFO 0 /* allocate starting from 0 */
#define BLK_TAG_ALLOC_RR 1 /* allocate starting from last allocated tag */
@@ -389,7 +340,6 @@ struct queue_limits {
unsigned char misaligned;
unsigned char discard_misaligned;
- unsigned char cluster;
unsigned char raid_partial_stripes_expensive;
enum blk_zoned_model zoned;
};
@@ -444,40 +394,15 @@ struct request_queue {
struct list_head queue_head;
struct request *last_merge;
struct elevator_queue *elevator;
- int nr_rqs[2]; /* # allocated [a]sync rqs */
- int nr_rqs_elvpriv; /* # allocated rqs w/ elvpriv */
struct blk_queue_stats *stats;
struct rq_qos *rq_qos;
- /*
- * If blkcg is not used, @q->root_rl serves all requests. If blkcg
- * is used, root blkg allocates from @q->root_rl and all other
- * blkgs from their own blkg->rl. Which one to use should be
- * determined using bio_request_list().
- */
- struct request_list root_rl;
-
- request_fn_proc *request_fn;
make_request_fn *make_request_fn;
- poll_q_fn *poll_fn;
- prep_rq_fn *prep_rq_fn;
- unprep_rq_fn *unprep_rq_fn;
- softirq_done_fn *softirq_done_fn;
- rq_timed_out_fn *rq_timed_out_fn;
dma_drain_needed_fn *dma_drain_needed;
- lld_busy_fn *lld_busy_fn;
- /* Called just after a request is allocated */
- init_rq_fn *init_rq_fn;
- /* Called just before a request is freed */
- exit_rq_fn *exit_rq_fn;
- /* Called from inside blk_get_request() */
- void (*initialize_rq_fn)(struct request *rq);
const struct blk_mq_ops *mq_ops;
- unsigned int *mq_map;
-
/* sw queues */
struct blk_mq_ctx __percpu *queue_ctx;
unsigned int nr_queues;
@@ -488,17 +413,6 @@ struct request_queue {
struct blk_mq_hw_ctx **queue_hw_ctx;
unsigned int nr_hw_queues;
- /*
- * Dispatch queue sorting
- */
- sector_t end_sector;
- struct request *boundary_rq;
-
- /*
- * Delayed queue handling
- */
- struct delayed_work delay_work;
-
struct backing_dev_info *backing_dev_info;
/*
@@ -529,13 +443,7 @@ struct request_queue {
*/
gfp_t bounce_gfp;
- /*
- * protects queue structures from reentrancy. ->__queue_lock should
- * _never_ be used directly, it is queue private. always use
- * ->queue_lock.
- */
- spinlock_t __queue_lock;
- spinlock_t *queue_lock;
+ spinlock_t queue_lock;
/*
* queue kobject
@@ -545,7 +453,7 @@ struct request_queue {
/*
* mq queue kobject
*/
- struct kobject mq_kobj;
+ struct kobject *mq_kobj;
#ifdef CONFIG_BLK_DEV_INTEGRITY
struct blk_integrity integrity;
@@ -561,27 +469,12 @@ struct request_queue {
* queue settings
*/
unsigned long nr_requests; /* Max # of requests */
- unsigned int nr_congestion_on;
- unsigned int nr_congestion_off;
- unsigned int nr_batching;
unsigned int dma_drain_size;
void *dma_drain_buffer;
unsigned int dma_pad_mask;
unsigned int dma_alignment;
- struct blk_queue_tag *queue_tags;
-
- unsigned int nr_sorted;
- unsigned int in_flight[2];
-
- /*
- * Number of active block driver functions for which blk_drain_queue()
- * must wait. Must be incremented around functions that unlock the
- * queue_lock internally, e.g. scsi_request_fn().
- */
- unsigned int request_fn_active;
-
unsigned int rq_timeout;
int poll_nsec;
@@ -590,7 +483,6 @@ struct request_queue {
struct timer_list timeout;
struct work_struct timeout_work;
- struct list_head timeout_list;
struct list_head icq_list;
#ifdef CONFIG_BLK_CGROUP
@@ -645,11 +537,9 @@ struct request_queue {
struct mutex sysfs_lock;
- int bypass_depth;
atomic_t mq_freeze_depth;
#if defined(CONFIG_BLK_DEV_BSG)
- bsg_job_fn *bsg_job_fn;
struct bsg_class_device bsg_dev;
#endif
@@ -669,12 +559,12 @@ struct request_queue {
#ifdef CONFIG_BLK_DEBUG_FS
struct dentry *debugfs_dir;
struct dentry *sched_debugfs_dir;
+ struct dentry *rqos_debugfs_dir;
#endif
bool mq_sysfs_init_done;
size_t cmd_size;
- void *rq_alloc_data;
struct work_struct release_work;
@@ -682,10 +572,8 @@ struct request_queue {
u64 write_hints[BLK_MAX_WRITE_HINTS];
};
-#define QUEUE_FLAG_QUEUED 0 /* uses generic tag queueing */
#define QUEUE_FLAG_STOPPED 1 /* queue is stopped */
#define QUEUE_FLAG_DYING 2 /* queue being torn down */
-#define QUEUE_FLAG_BYPASS 3 /* act as dumb FIFO queue */
#define QUEUE_FLAG_BIDI 4 /* queue supports bidi requests */
#define QUEUE_FLAG_NOMERGES 5 /* disable merge attempts */
#define QUEUE_FLAG_SAME_COMP 6 /* complete on same CPU-group */
@@ -718,19 +606,15 @@ struct request_queue {
(1 << QUEUE_FLAG_ADD_RANDOM))
#define QUEUE_FLAG_MQ_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \
- (1 << QUEUE_FLAG_SAME_COMP) | \
- (1 << QUEUE_FLAG_POLL))
+ (1 << QUEUE_FLAG_SAME_COMP))
void blk_queue_flag_set(unsigned int flag, struct request_queue *q);
void blk_queue_flag_clear(unsigned int flag, struct request_queue *q);
bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
-bool blk_queue_flag_test_and_clear(unsigned int flag, struct request_queue *q);
-#define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags)
#define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
#define blk_queue_dying(q) test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags)
#define blk_queue_dead(q) test_bit(QUEUE_FLAG_DEAD, &(q)->queue_flags)
-#define blk_queue_bypass(q) test_bit(QUEUE_FLAG_BYPASS, &(q)->queue_flags)
#define blk_queue_init_done(q) test_bit(QUEUE_FLAG_INIT_DONE, &(q)->queue_flags)
#define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
#define blk_queue_noxmerges(q) \
@@ -757,37 +641,20 @@ bool blk_queue_flag_test_and_clear(unsigned int flag, struct request_queue *q);
extern void blk_set_pm_only(struct request_queue *q);
extern void blk_clear_pm_only(struct request_queue *q);
-static inline int queue_in_flight(struct request_queue *q)
-{
- return q->in_flight[0] + q->in_flight[1];
-}
-
static inline bool blk_account_rq(struct request *rq)
{
return (rq->rq_flags & RQF_STARTED) && !blk_rq_is_passthrough(rq);
}
-#define blk_rq_cpu_valid(rq) ((rq)->cpu != -1)
#define blk_bidi_rq(rq) ((rq)->next_rq != NULL)
-/* rq->queuelist of dequeued request must be list_empty() */
-#define blk_queued_rq(rq) (!list_empty(&(rq)->queuelist))
#define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist)
#define rq_data_dir(rq) (op_is_write(req_op(rq)) ? WRITE : READ)
-/*
- * Driver can handle struct request, if it either has an old style
- * request_fn defined, or is blk-mq based.
- */
-static inline bool queue_is_rq_based(struct request_queue *q)
-{
- return q->request_fn || q->mq_ops;
-}
-
-static inline unsigned int blk_queue_cluster(struct request_queue *q)
+static inline bool queue_is_mq(struct request_queue *q)
{
- return q->limits.cluster;
+ return q->mq_ops;
}
static inline enum blk_zoned_model
@@ -845,27 +712,6 @@ static inline bool rq_is_sync(struct request *rq)
return op_is_sync(rq->cmd_flags);
}
-static inline bool blk_rl_full(struct request_list *rl, bool sync)
-{
- unsigned int flag = sync ? BLK_RL_SYNCFULL : BLK_RL_ASYNCFULL;
-
- return rl->flags & flag;
-}
-
-static inline void blk_set_rl_full(struct request_list *rl, bool sync)
-{
- unsigned int flag = sync ? BLK_RL_SYNCFULL : BLK_RL_ASYNCFULL;
-
- rl->flags |= flag;
-}
-
-static inline void blk_clear_rl_full(struct request_list *rl, bool sync)
-{
- unsigned int flag = sync ? BLK_RL_SYNCFULL : BLK_RL_ASYNCFULL;
-
- rl->flags &= ~flag;
-}
-
static inline bool rq_mergeable(struct request *rq)
{
if (blk_rq_is_passthrough(rq))
@@ -902,16 +748,6 @@ static inline unsigned int blk_queue_depth(struct request_queue *q)
return q->nr_requests;
}
-/*
- * q->prep_rq_fn return values
- */
-enum {
- BLKPREP_OK, /* serve it */
- BLKPREP_KILL, /* fatal error, kill, return -EIO */
- BLKPREP_DEFER, /* leave on queue */
- BLKPREP_INVALID, /* invalid command, kill, return -EREMOTEIO */
-};
-
extern unsigned long blk_max_low_pfn, blk_max_pfn;
/*
@@ -983,10 +819,8 @@ extern blk_qc_t direct_make_request(struct bio *bio);
extern void blk_rq_init(struct request_queue *q, struct request *rq);
extern void blk_init_request_from_bio(struct request *req, struct bio *bio);
extern void blk_put_request(struct request *);
-extern void __blk_put_request(struct request_queue *, struct request *);
extern struct request *blk_get_request(struct request_queue *, unsigned int op,
blk_mq_req_flags_t flags);
-extern void blk_requeue_request(struct request_queue *, struct request *);
extern int blk_lld_busy(struct request_queue *q);
extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
struct bio_set *bs, gfp_t gfp_mask,
@@ -996,7 +830,6 @@ extern void blk_rq_unprep_clone(struct request *rq);
extern blk_status_t blk_insert_cloned_request(struct request_queue *q,
struct request *rq);
extern int blk_rq_append_bio(struct request *rq, struct bio **bio);
-extern void blk_delay_queue(struct request_queue *, unsigned long);
extern void blk_queue_split(struct request_queue *, struct bio **);
extern void blk_recount_segments(struct request_queue *, struct bio *);
extern int scsi_verify_blk_ioctl(struct block_device *, unsigned int);
@@ -1009,15 +842,7 @@ extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t,
extern int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags);
extern void blk_queue_exit(struct request_queue *q);
-extern void blk_start_queue(struct request_queue *q);
-extern void blk_start_queue_async(struct request_queue *q);
-extern void blk_stop_queue(struct request_queue *q);
extern void blk_sync_queue(struct request_queue *q);
-extern void __blk_stop_queue(struct request_queue *q);
-extern void __blk_run_queue(struct request_queue *q);
-extern void __blk_run_queue_uncond(struct request_queue *q);
-extern void blk_run_queue(struct request_queue *);
-extern void blk_run_queue_async(struct request_queue *q);
extern int blk_rq_map_user(struct request_queue *, struct request *,
struct rq_map_data *, void __user *, unsigned long,
gfp_t);
@@ -1034,7 +859,7 @@ extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *,
int blk_status_to_errno(blk_status_t status);
blk_status_t errno_to_blk_status(int errno);
-bool blk_poll(struct request_queue *q, blk_qc_t cookie);
+int blk_poll(struct request_queue *q, blk_qc_t cookie, bool spin);
static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
{
@@ -1172,13 +997,6 @@ static inline unsigned int blk_rq_count_bios(struct request *rq)
return nr_bios;
}
-/*
- * Request issue related functions.
- */
-extern struct request *blk_peek_request(struct request_queue *q);
-extern void blk_start_request(struct request *rq);
-extern struct request *blk_fetch_request(struct request_queue *q);
-
void blk_steal_bios(struct bio_list *list, struct request *rq);
/*
@@ -1196,27 +1014,18 @@ void blk_steal_bios(struct bio_list *list, struct request *rq);
*/
extern bool blk_update_request(struct request *rq, blk_status_t error,
unsigned int nr_bytes);
-extern void blk_finish_request(struct request *rq, blk_status_t error);
-extern bool blk_end_request(struct request *rq, blk_status_t error,
- unsigned int nr_bytes);
extern void blk_end_request_all(struct request *rq, blk_status_t error);
extern bool __blk_end_request(struct request *rq, blk_status_t error,
unsigned int nr_bytes);
extern void __blk_end_request_all(struct request *rq, blk_status_t error);
extern bool __blk_end_request_cur(struct request *rq, blk_status_t error);
-extern void blk_complete_request(struct request *);
extern void __blk_complete_request(struct request *);
extern void blk_abort_request(struct request *);
-extern void blk_unprep_request(struct request *);
/*
* Access functions for manipulating queue properties
*/
-extern struct request_queue *blk_init_queue_node(request_fn_proc *rfn,
- spinlock_t *lock, int node_id);
-extern struct request_queue *blk_init_queue(request_fn_proc *, spinlock_t *);
-extern int blk_init_allocated_queue(struct request_queue *);
extern void blk_cleanup_queue(struct request_queue *);
extern void blk_queue_make_request(struct request_queue *, make_request_fn *);
extern void blk_queue_bounce_limit(struct request_queue *, u64);
@@ -1255,15 +1064,10 @@ extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int);
extern int blk_queue_dma_drain(struct request_queue *q,
dma_drain_needed_fn *dma_drain_needed,
void *buf, unsigned int size);
-extern void blk_queue_lld_busy(struct request_queue *q, lld_busy_fn *fn);
extern void blk_queue_segment_boundary(struct request_queue *, unsigned long);
extern void blk_queue_virt_boundary(struct request_queue *, unsigned long);
-extern void blk_queue_prep_rq(struct request_queue *, prep_rq_fn *pfn);
-extern void blk_queue_unprep_rq(struct request_queue *, unprep_rq_fn *ufn);
extern void blk_queue_dma_alignment(struct request_queue *, int);
extern void blk_queue_update_dma_alignment(struct request_queue *, int);
-extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *);
-extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *);
extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
extern void blk_queue_flush_queueable(struct request_queue *q, bool queueable);
extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua);
@@ -1299,8 +1103,7 @@ extern long nr_blockdev_pages(void);
bool __must_check blk_get_queue(struct request_queue *);
struct request_queue *blk_alloc_queue(gfp_t);
-struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id,
- spinlock_t *lock);
+struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id);
extern void blk_put_queue(struct request_queue *);
extern void blk_set_queue_dying(struct request_queue *);
@@ -1317,9 +1120,10 @@ extern void blk_set_queue_dying(struct request_queue *);
* schedule() where blk_schedule_flush_plug() is called.
*/
struct blk_plug {
- struct list_head list; /* requests */
struct list_head mq_list; /* blk-mq requests */
struct list_head cb_list; /* md requires an unplug callback */
+ unsigned short rq_count;
+ bool multiple_queues;
};
#define BLK_MAX_REQUEST_COUNT 16
#define BLK_PLUG_FLUSH_SIZE (128 * 1024)
@@ -1358,31 +1162,10 @@ static inline bool blk_needs_flush_plug(struct task_struct *tsk)
struct blk_plug *plug = tsk->plug;
return plug &&
- (!list_empty(&plug->list) ||
- !list_empty(&plug->mq_list) ||
+ (!list_empty(&plug->mq_list) ||
!list_empty(&plug->cb_list));
}
-/*
- * tag stuff
- */
-extern int blk_queue_start_tag(struct request_queue *, struct request *);
-extern struct request *blk_queue_find_tag(struct request_queue *, int);
-extern void blk_queue_end_tag(struct request_queue *, struct request *);
-extern int blk_queue_init_tags(struct request_queue *, int, struct blk_queue_tag *, int);
-extern void blk_queue_free_tags(struct request_queue *);
-extern int blk_queue_resize_tags(struct request_queue *, int);
-extern struct blk_queue_tag *blk_init_tags(int, int);
-extern void blk_free_tags(struct blk_queue_tag *);
-
-static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt,
- int tag)
-{
- if (unlikely(bqt == NULL || tag >= bqt->real_max_depth))
- return NULL;
- return bqt->tag_index[tag];
-}
-
extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *);
extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, struct page *page);
@@ -1982,4 +1765,17 @@ static inline int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
#endif /* CONFIG_BLOCK */
+static inline void blk_wake_io_task(struct task_struct *waiter)
+{
+ /*
+ * If we're polling, the task itself is doing the completions. For
+ * that case, we don't need to signal a wakeup, it's enough to just
+ * mark us as RUNNING.
+ */
+ if (waiter == current)
+ __set_current_state(TASK_RUNNING);
+ else
+ wake_up_process(waiter);
+}
+
#endif
diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h
index 6aeaf6472665..b356e0006731 100644
--- a/include/linux/bsg-lib.h
+++ b/include/linux/bsg-lib.h
@@ -31,6 +31,9 @@ struct device;
struct scatterlist;
struct request_queue;
+typedef int (bsg_job_fn) (struct bsg_job *);
+typedef enum blk_eh_timer_return (bsg_timeout_fn)(struct request *);
+
struct bsg_buffer {
unsigned int payload_len;
int sg_cnt;
@@ -72,7 +75,8 @@ struct bsg_job {
void bsg_job_done(struct bsg_job *job, int result,
unsigned int reply_payload_rcv_len);
struct request_queue *bsg_setup_queue(struct device *dev, const char *name,
- bsg_job_fn *job_fn, int dd_job_size);
+ bsg_job_fn *job_fn, bsg_timeout_fn *timeout, int dd_job_size);
+void bsg_remove_queue(struct request_queue *q);
void bsg_job_put(struct bsg_job *job);
int __must_check bsg_job_get(struct bsg_job *job);
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 9d12757a65b0..9968332cceed 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -93,6 +93,8 @@ extern struct css_set init_css_set;
bool css_has_online_children(struct cgroup_subsys_state *css);
struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss);
+struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgroup,
+ struct cgroup_subsys *ss);
struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgroup,
struct cgroup_subsys *ss);
struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry,
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 88720b443cd6..056be0d03722 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -169,6 +169,10 @@ typedef struct {
compat_sigset_word sig[_COMPAT_NSIG_WORDS];
} compat_sigset_t;
+int set_compat_user_sigmask(const compat_sigset_t __user *usigmask,
+ sigset_t *set, sigset_t *oldset,
+ size_t sigsetsize);
+
struct compat_sigaction {
#ifndef __ARCH_HAS_IRIX_SIGACTION
compat_uptr_t sa_handler;
@@ -558,6 +562,12 @@ asmlinkage long compat_sys_io_pgetevents(compat_aio_context_t ctx_id,
struct io_event __user *events,
struct old_timespec32 __user *timeout,
const struct __compat_aio_sigset __user *usig);
+asmlinkage long compat_sys_io_pgetevents_time64(compat_aio_context_t ctx_id,
+ compat_long_t min_nr,
+ compat_long_t nr,
+ struct io_event __user *events,
+ struct __kernel_timespec __user *timeout,
+ const struct __compat_aio_sigset __user *usig);
/* fs/cookies.c */
asmlinkage long compat_sys_lookup_dcookie(u32, u32, char __user *, compat_size_t);
@@ -643,11 +653,21 @@ asmlinkage long compat_sys_pselect6(int n, compat_ulong_t __user *inp,
compat_ulong_t __user *exp,
struct old_timespec32 __user *tsp,
void __user *sig);
+asmlinkage long compat_sys_pselect6_time64(int n, compat_ulong_t __user *inp,
+ compat_ulong_t __user *outp,
+ compat_ulong_t __user *exp,
+ struct __kernel_timespec __user *tsp,
+ void __user *sig);
asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds,
unsigned int nfds,
struct old_timespec32 __user *tsp,
const compat_sigset_t __user *sigmask,
compat_size_t sigsetsize);
+asmlinkage long compat_sys_ppoll_time64(struct pollfd __user *ufds,
+ unsigned int nfds,
+ struct __kernel_timespec __user *tsp,
+ const compat_sigset_t __user *sigmask,
+ compat_size_t sigsetsize);
/* fs/signalfd.c */
asmlinkage long compat_sys_signalfd4(int ufd,
@@ -768,6 +788,9 @@ asmlinkage long compat_sys_rt_sigpending(compat_sigset_t __user *uset,
asmlinkage long compat_sys_rt_sigtimedwait(compat_sigset_t __user *uthese,
struct compat_siginfo __user *uinfo,
struct old_timespec32 __user *uts, compat_size_t sigsetsize);
+asmlinkage long compat_sys_rt_sigtimedwait_time64(compat_sigset_t __user *uthese,
+ struct compat_siginfo __user *uinfo,
+ struct __kernel_timespec __user *uts, compat_size_t sigsetsize);
asmlinkage long compat_sys_rt_sigqueueinfo(compat_pid_t pid, int sig,
struct compat_siginfo __user *uinfo);
/* No generic prototype for rt_sigreturn */
@@ -873,6 +896,9 @@ asmlinkage long compat_sys_move_pages(pid_t pid, compat_ulong_t nr_pages,
asmlinkage long compat_sys_rt_tgsigqueueinfo(compat_pid_t tgid,
compat_pid_t pid, int sig,
struct compat_siginfo __user *uinfo);
+asmlinkage long compat_sys_recvmmsg_time64(int fd, struct compat_mmsghdr __user *mmsg,
+ unsigned vlen, unsigned int flags,
+ struct __kernel_timespec __user *timeout);
asmlinkage long compat_sys_recvmmsg(int fd, struct compat_mmsghdr __user *mmsg,
unsigned vlen, unsigned int flags,
struct old_timespec32 __user *timeout);
diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h
index 30213adbb6b9..2ad5c363d7d5 100644
--- a/include/linux/dma-debug.h
+++ b/include/linux/dma-debug.h
@@ -30,8 +30,6 @@ struct bus_type;
extern void dma_debug_add_bus(struct bus_type *bus);
-extern int dma_debug_resize_entries(u32 num_entries);
-
extern void debug_dma_map_single(struct device *dev, const void *addr,
unsigned long len);
@@ -72,17 +70,6 @@ extern void debug_dma_sync_single_for_device(struct device *dev,
dma_addr_t dma_handle,
size_t size, int direction);
-extern void debug_dma_sync_single_range_for_cpu(struct device *dev,
- dma_addr_t dma_handle,
- unsigned long offset,
- size_t size,
- int direction);
-
-extern void debug_dma_sync_single_range_for_device(struct device *dev,
- dma_addr_t dma_handle,
- unsigned long offset,
- size_t size, int direction);
-
extern void debug_dma_sync_sg_for_cpu(struct device *dev,
struct scatterlist *sg,
int nelems, int direction);
@@ -101,11 +88,6 @@ static inline void dma_debug_add_bus(struct bus_type *bus)
{
}
-static inline int dma_debug_resize_entries(u32 num_entries)
-{
- return 0;
-}
-
static inline void debug_dma_map_single(struct device *dev, const void *addr,
unsigned long len)
{
@@ -174,22 +156,6 @@ static inline void debug_dma_sync_single_for_device(struct device *dev,
{
}
-static inline void debug_dma_sync_single_range_for_cpu(struct device *dev,
- dma_addr_t dma_handle,
- unsigned long offset,
- size_t size,
- int direction)
-{
-}
-
-static inline void debug_dma_sync_single_range_for_device(struct device *dev,
- dma_addr_t dma_handle,
- unsigned long offset,
- size_t size,
- int direction)
-{
-}
-
static inline void debug_dma_sync_sg_for_cpu(struct device *dev,
struct scatterlist *sg,
int nelems, int direction)
diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h
index 9e66bfe369aa..b7338702592a 100644
--- a/include/linux/dma-direct.h
+++ b/include/linux/dma-direct.h
@@ -5,8 +5,6 @@
#include <linux/dma-mapping.h>
#include <linux/mem_encrypt.h>
-#define DIRECT_MAPPING_ERROR (~(dma_addr_t)0)
-
#ifdef CONFIG_ARCH_HAS_PHYS_TO_DMA
#include <asm/dma-direct.h>
#else
@@ -50,14 +48,6 @@ static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
return __sme_clr(__dma_to_phys(dev, daddr));
}
-#ifdef CONFIG_ARCH_HAS_DMA_MARK_CLEAN
-void dma_mark_clean(void *addr, size_t size);
-#else
-static inline void dma_mark_clean(void *addr, size_t size)
-{
-}
-#endif /* CONFIG_ARCH_HAS_DMA_MARK_CLEAN */
-
u64 dma_direct_get_required_mask(struct device *dev);
void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
gfp_t gfp, unsigned long attrs);
@@ -67,11 +57,8 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs);
void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr,
dma_addr_t dma_addr, unsigned long attrs);
-dma_addr_t dma_direct_map_page(struct device *dev, struct page *page,
- unsigned long offset, size_t size, enum dma_data_direction dir,
- unsigned long attrs);
-int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
- enum dma_data_direction dir, unsigned long attrs);
+struct page *__dma_direct_alloc_pages(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs);
+void __dma_direct_free_pages(struct device *dev, size_t size, struct page *page);
int dma_direct_supported(struct device *dev, u64 mask);
-int dma_direct_mapping_error(struct device *dev, dma_addr_t dma_addr);
#endif /* _LINUX_DMA_DIRECT_H */
diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h
index e8ca5e654277..e760dc5d1fa8 100644
--- a/include/linux/dma-iommu.h
+++ b/include/linux/dma-iommu.h
@@ -69,7 +69,6 @@ dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys,
size_t size, enum dma_data_direction dir, unsigned long attrs);
void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle,
size_t size, enum dma_data_direction dir, unsigned long attrs);
-int iommu_dma_mapping_error(struct device *dev, dma_addr_t dma_addr);
/* The DMA API isn't _quite_ the whole story, though... */
void iommu_dma_map_msi_msg(int irq, struct msi_msg *msg);
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index d327bdd53716..ba521d5506c9 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -128,13 +128,14 @@ struct dma_map_ops {
enum dma_data_direction dir);
void (*cache_sync)(struct device *dev, void *vaddr, size_t size,
enum dma_data_direction direction);
- int (*mapping_error)(struct device *dev, dma_addr_t dma_addr);
int (*dma_supported)(struct device *dev, u64 mask);
u64 (*get_required_mask)(struct device *dev);
};
-extern const struct dma_map_ops dma_direct_ops;
+#define DMA_MAPPING_ERROR (~(dma_addr_t)0)
+
extern const struct dma_map_ops dma_virt_ops;
+extern const struct dma_map_ops dma_dummy_ops;
#define DMA_BIT_MASK(n) (((n) == 64) ? ~0ULL : ((1ULL<<(n))-1))
@@ -220,6 +221,69 @@ static inline const struct dma_map_ops *get_dma_ops(struct device *dev)
}
#endif
+static inline bool dma_is_direct(const struct dma_map_ops *ops)
+{
+ return likely(!ops);
+}
+
+/*
+ * All the dma_direct_* declarations are here just for the indirect call bypass,
+ * and must not be used directly drivers!
+ */
+dma_addr_t dma_direct_map_page(struct device *dev, struct page *page,
+ unsigned long offset, size_t size, enum dma_data_direction dir,
+ unsigned long attrs);
+int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
+ enum dma_data_direction dir, unsigned long attrs);
+
+#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \
+ defined(CONFIG_SWIOTLB)
+void dma_direct_sync_single_for_device(struct device *dev,
+ dma_addr_t addr, size_t size, enum dma_data_direction dir);
+void dma_direct_sync_sg_for_device(struct device *dev,
+ struct scatterlist *sgl, int nents, enum dma_data_direction dir);
+#else
+static inline void dma_direct_sync_single_for_device(struct device *dev,
+ dma_addr_t addr, size_t size, enum dma_data_direction dir)
+{
+}
+static inline void dma_direct_sync_sg_for_device(struct device *dev,
+ struct scatterlist *sgl, int nents, enum dma_data_direction dir)
+{
+}
+#endif
+
+#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \
+ defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) || \
+ defined(CONFIG_SWIOTLB)
+void dma_direct_unmap_page(struct device *dev, dma_addr_t addr,
+ size_t size, enum dma_data_direction dir, unsigned long attrs);
+void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl,
+ int nents, enum dma_data_direction dir, unsigned long attrs);
+void dma_direct_sync_single_for_cpu(struct device *dev,
+ dma_addr_t addr, size_t size, enum dma_data_direction dir);
+void dma_direct_sync_sg_for_cpu(struct device *dev,
+ struct scatterlist *sgl, int nents, enum dma_data_direction dir);
+#else
+static inline void dma_direct_unmap_page(struct device *dev, dma_addr_t addr,
+ size_t size, enum dma_data_direction dir, unsigned long attrs)
+{
+}
+static inline void dma_direct_unmap_sg(struct device *dev,
+ struct scatterlist *sgl, int nents, enum dma_data_direction dir,
+ unsigned long attrs)
+{
+}
+static inline void dma_direct_sync_single_for_cpu(struct device *dev,
+ dma_addr_t addr, size_t size, enum dma_data_direction dir)
+{
+}
+static inline void dma_direct_sync_sg_for_cpu(struct device *dev,
+ struct scatterlist *sgl, int nents, enum dma_data_direction dir)
+{
+}
+#endif
+
static inline dma_addr_t dma_map_single_attrs(struct device *dev, void *ptr,
size_t size,
enum dma_data_direction dir,
@@ -230,9 +294,12 @@ static inline dma_addr_t dma_map_single_attrs(struct device *dev, void *ptr,
BUG_ON(!valid_dma_direction(dir));
debug_dma_map_single(dev, ptr, size);
- addr = ops->map_page(dev, virt_to_page(ptr),
- offset_in_page(ptr), size,
- dir, attrs);
+ if (dma_is_direct(ops))
+ addr = dma_direct_map_page(dev, virt_to_page(ptr),
+ offset_in_page(ptr), size, dir, attrs);
+ else
+ addr = ops->map_page(dev, virt_to_page(ptr),
+ offset_in_page(ptr), size, dir, attrs);
debug_dma_map_page(dev, virt_to_page(ptr),
offset_in_page(ptr), size,
dir, addr, true);
@@ -247,11 +314,19 @@ static inline void dma_unmap_single_attrs(struct device *dev, dma_addr_t addr,
const struct dma_map_ops *ops = get_dma_ops(dev);
BUG_ON(!valid_dma_direction(dir));
- if (ops->unmap_page)
+ if (dma_is_direct(ops))
+ dma_direct_unmap_page(dev, addr, size, dir, attrs);
+ else if (ops->unmap_page)
ops->unmap_page(dev, addr, size, dir, attrs);
debug_dma_unmap_page(dev, addr, size, dir, true);
}
+static inline void dma_unmap_page_attrs(struct device *dev, dma_addr_t addr,
+ size_t size, enum dma_data_direction dir, unsigned long attrs)
+{
+ return dma_unmap_single_attrs(dev, addr, size, dir, attrs);
+}
+
/*
* dma_maps_sg_attrs returns 0 on error and > 0 on success.
* It should never return a value < 0.
@@ -264,7 +339,10 @@ static inline int dma_map_sg_attrs(struct device *dev, struct scatterlist *sg,
int ents;
BUG_ON(!valid_dma_direction(dir));
- ents = ops->map_sg(dev, sg, nents, dir, attrs);
+ if (dma_is_direct(ops))
+ ents = dma_direct_map_sg(dev, sg, nents, dir, attrs);
+ else
+ ents = ops->map_sg(dev, sg, nents, dir, attrs);
BUG_ON(ents < 0);
debug_dma_map_sg(dev, sg, nents, ents, dir);
@@ -279,7 +357,9 @@ static inline void dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg
BUG_ON(!valid_dma_direction(dir));
debug_dma_unmap_sg(dev, sg, nents, dir);
- if (ops->unmap_sg)
+ if (dma_is_direct(ops))
+ dma_direct_unmap_sg(dev, sg, nents, dir, attrs);
+ else if (ops->unmap_sg)
ops->unmap_sg(dev, sg, nents, dir, attrs);
}
@@ -293,25 +373,15 @@ static inline dma_addr_t dma_map_page_attrs(struct device *dev,
dma_addr_t addr;
BUG_ON(!valid_dma_direction(dir));
- addr = ops->map_page(dev, page, offset, size, dir, attrs);
+ if (dma_is_direct(ops))
+ addr = dma_direct_map_page(dev, page, offset, size, dir, attrs);
+ else
+ addr = ops->map_page(dev, page, offset, size, dir, attrs);
debug_dma_map_page(dev, page, offset, size, dir, addr, false);
return addr;
}
-static inline void dma_unmap_page_attrs(struct device *dev,
- dma_addr_t addr, size_t size,
- enum dma_data_direction dir,
- unsigned long attrs)
-{
- const struct dma_map_ops *ops = get_dma_ops(dev);
-
- BUG_ON(!valid_dma_direction(dir));
- if (ops->unmap_page)
- ops->unmap_page(dev, addr, size, dir, attrs);
- debug_dma_unmap_page(dev, addr, size, dir, false);
-}
-
static inline dma_addr_t dma_map_resource(struct device *dev,
phys_addr_t phys_addr,
size_t size,
@@ -327,7 +397,7 @@ static inline dma_addr_t dma_map_resource(struct device *dev,
BUG_ON(pfn_valid(PHYS_PFN(phys_addr)));
addr = phys_addr;
- if (ops->map_resource)
+ if (ops && ops->map_resource)
addr = ops->map_resource(dev, phys_addr, size, dir, attrs);
debug_dma_map_resource(dev, phys_addr, size, dir, addr);
@@ -342,7 +412,7 @@ static inline void dma_unmap_resource(struct device *dev, dma_addr_t addr,
const struct dma_map_ops *ops = get_dma_ops(dev);
BUG_ON(!valid_dma_direction(dir));
- if (ops->unmap_resource)
+ if (ops && ops->unmap_resource)
ops->unmap_resource(dev, addr, size, dir, attrs);
debug_dma_unmap_resource(dev, addr, size, dir);
}
@@ -354,11 +424,20 @@ static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr,
const struct dma_map_ops *ops = get_dma_ops(dev);
BUG_ON(!valid_dma_direction(dir));
- if (ops->sync_single_for_cpu)
+ if (dma_is_direct(ops))
+ dma_direct_sync_single_for_cpu(dev, addr, size, dir);
+ else if (ops->sync_single_for_cpu)
ops->sync_single_for_cpu(dev, addr, size, dir);
debug_dma_sync_single_for_cpu(dev, addr, size, dir);
}
+static inline void dma_sync_single_range_for_cpu(struct device *dev,
+ dma_addr_t addr, unsigned long offset, size_t size,
+ enum dma_data_direction dir)
+{
+ return dma_sync_single_for_cpu(dev, addr + offset, size, dir);
+}
+
static inline void dma_sync_single_for_device(struct device *dev,
dma_addr_t addr, size_t size,
enum dma_data_direction dir)
@@ -366,37 +445,18 @@ static inline void dma_sync_single_for_device(struct device *dev,
const struct dma_map_ops *ops = get_dma_ops(dev);
BUG_ON(!valid_dma_direction(dir));
- if (ops->sync_single_for_device)
+ if (dma_is_direct(ops))
+ dma_direct_sync_single_for_device(dev, addr, size, dir);
+ else if (ops->sync_single_for_device)
ops->sync_single_for_device(dev, addr, size, dir);
debug_dma_sync_single_for_device(dev, addr, size, dir);
}
-static inline void dma_sync_single_range_for_cpu(struct device *dev,
- dma_addr_t addr,
- unsigned long offset,
- size_t size,
- enum dma_data_direction dir)
-{
- const struct dma_map_ops *ops = get_dma_ops(dev);
-
- BUG_ON(!valid_dma_direction(dir));
- if (ops->sync_single_for_cpu)
- ops->sync_single_for_cpu(dev, addr + offset, size, dir);
- debug_dma_sync_single_range_for_cpu(dev, addr, offset, size, dir);
-}
-
static inline void dma_sync_single_range_for_device(struct device *dev,
- dma_addr_t addr,
- unsigned long offset,
- size_t size,
- enum dma_data_direction dir)
+ dma_addr_t addr, unsigned long offset, size_t size,
+ enum dma_data_direction dir)
{
- const struct dma_map_ops *ops = get_dma_ops(dev);
-
- BUG_ON(!valid_dma_direction(dir));
- if (ops->sync_single_for_device)
- ops->sync_single_for_device(dev, addr + offset, size, dir);
- debug_dma_sync_single_range_for_device(dev, addr, offset, size, dir);
+ return dma_sync_single_for_device(dev, addr + offset, size, dir);
}
static inline void
@@ -406,7 +466,9 @@ dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
const struct dma_map_ops *ops = get_dma_ops(dev);
BUG_ON(!valid_dma_direction(dir));
- if (ops->sync_sg_for_cpu)
+ if (dma_is_direct(ops))
+ dma_direct_sync_sg_for_cpu(dev, sg, nelems, dir);
+ else if (ops->sync_sg_for_cpu)
ops->sync_sg_for_cpu(dev, sg, nelems, dir);
debug_dma_sync_sg_for_cpu(dev, sg, nelems, dir);
}
@@ -418,7 +480,9 @@ dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
const struct dma_map_ops *ops = get_dma_ops(dev);
BUG_ON(!valid_dma_direction(dir));
- if (ops->sync_sg_for_device)
+ if (dma_is_direct(ops))
+ dma_direct_sync_sg_for_device(dev, sg, nelems, dir);
+ else if (ops->sync_sg_for_device)
ops->sync_sg_for_device(dev, sg, nelems, dir);
debug_dma_sync_sg_for_device(dev, sg, nelems, dir);
@@ -431,16 +495,8 @@ dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
#define dma_map_page(d, p, o, s, r) dma_map_page_attrs(d, p, o, s, r, 0)
#define dma_unmap_page(d, a, s, r) dma_unmap_page_attrs(d, a, s, r, 0)
-static inline void
-dma_cache_sync(struct device *dev, void *vaddr, size_t size,
- enum dma_data_direction dir)
-{
- const struct dma_map_ops *ops = get_dma_ops(dev);
-
- BUG_ON(!valid_dma_direction(dir));
- if (ops->cache_sync)
- ops->cache_sync(dev, vaddr, size, dir);
-}
+void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
+ enum dma_data_direction dir);
extern int dma_common_mmap(struct device *dev, struct vm_area_struct *vma,
void *cpu_addr, dma_addr_t dma_addr, size_t size,
@@ -455,107 +511,29 @@ void *dma_common_pages_remap(struct page **pages, size_t size,
const void *caller);
void dma_common_free_remap(void *cpu_addr, size_t size, unsigned long vm_flags);
-/**
- * dma_mmap_attrs - map a coherent DMA allocation into user space
- * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
- * @vma: vm_area_struct describing requested user mapping
- * @cpu_addr: kernel CPU-view address returned from dma_alloc_attrs
- * @handle: device-view address returned from dma_alloc_attrs
- * @size: size of memory originally requested in dma_alloc_attrs
- * @attrs: attributes of mapping properties requested in dma_alloc_attrs
- *
- * Map a coherent DMA buffer previously allocated by dma_alloc_attrs
- * into user space. The coherent DMA buffer must not be freed by the
- * driver until the user space mapping has been released.
- */
-static inline int
-dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma, void *cpu_addr,
- dma_addr_t dma_addr, size_t size, unsigned long attrs)
-{
- const struct dma_map_ops *ops = get_dma_ops(dev);
- BUG_ON(!ops);
- if (ops->mmap)
- return ops->mmap(dev, vma, cpu_addr, dma_addr, size, attrs);
- return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size, attrs);
-}
+int __init dma_atomic_pool_init(gfp_t gfp, pgprot_t prot);
+bool dma_in_atomic_pool(void *start, size_t size);
+void *dma_alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags);
+bool dma_free_from_pool(void *start, size_t size);
+int dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
+ void *cpu_addr, dma_addr_t dma_addr, size_t size,
+ unsigned long attrs);
#define dma_mmap_coherent(d, v, c, h, s) dma_mmap_attrs(d, v, c, h, s, 0)
int
dma_common_get_sgtable(struct device *dev, struct sg_table *sgt, void *cpu_addr,
dma_addr_t dma_addr, size_t size, unsigned long attrs);
-static inline int
-dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt, void *cpu_addr,
- dma_addr_t dma_addr, size_t size,
- unsigned long attrs)
-{
- const struct dma_map_ops *ops = get_dma_ops(dev);
- BUG_ON(!ops);
- if (ops->get_sgtable)
- return ops->get_sgtable(dev, sgt, cpu_addr, dma_addr, size,
- attrs);
- return dma_common_get_sgtable(dev, sgt, cpu_addr, dma_addr, size,
- attrs);
-}
-
+int dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt,
+ void *cpu_addr, dma_addr_t dma_addr, size_t size,
+ unsigned long attrs);
#define dma_get_sgtable(d, t, v, h, s) dma_get_sgtable_attrs(d, t, v, h, s, 0)
-#ifndef arch_dma_alloc_attrs
-#define arch_dma_alloc_attrs(dev) (true)
-#endif
-
-static inline void *dma_alloc_attrs(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t flag,
- unsigned long attrs)
-{
- const struct dma_map_ops *ops = get_dma_ops(dev);
- void *cpu_addr;
-
- BUG_ON(!ops);
- WARN_ON_ONCE(dev && !dev->coherent_dma_mask);
-
- if (dma_alloc_from_dev_coherent(dev, size, dma_handle, &cpu_addr))
- return cpu_addr;
-
- /* let the implementation decide on the zone to allocate from: */
- flag &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM);
-
- if (!arch_dma_alloc_attrs(&dev))
- return NULL;
- if (!ops->alloc)
- return NULL;
-
- cpu_addr = ops->alloc(dev, size, dma_handle, flag, attrs);
- debug_dma_alloc_coherent(dev, size, *dma_handle, cpu_addr);
- return cpu_addr;
-}
-
-static inline void dma_free_attrs(struct device *dev, size_t size,
- void *cpu_addr, dma_addr_t dma_handle,
- unsigned long attrs)
-{
- const struct dma_map_ops *ops = get_dma_ops(dev);
-
- BUG_ON(!ops);
-
- if (dma_release_from_dev_coherent(dev, get_order(size), cpu_addr))
- return;
- /*
- * On non-coherent platforms which implement DMA-coherent buffers via
- * non-cacheable remaps, ops->free() may call vunmap(). Thus getting
- * this far in IRQ context is a) at risk of a BUG_ON() or trying to
- * sleep on some machines, and b) an indication that the driver is
- * probably misusing the coherent API anyway.
- */
- WARN_ON(irqs_disabled());
-
- if (!ops->free || !cpu_addr)
- return;
-
- debug_dma_free_coherent(dev, size, cpu_addr, dma_handle);
- ops->free(dev, size, cpu_addr, dma_handle, attrs);
-}
+void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle,
+ gfp_t flag, unsigned long attrs);
+void dma_free_attrs(struct device *dev, size_t size, void *cpu_addr,
+ dma_addr_t dma_handle, unsigned long attrs);
static inline void *dma_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t gfp)
@@ -573,43 +551,16 @@ static inline void dma_free_coherent(struct device *dev, size_t size,
static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
{
- const struct dma_map_ops *ops = get_dma_ops(dev);
-
debug_dma_mapping_error(dev, dma_addr);
- if (ops->mapping_error)
- return ops->mapping_error(dev, dma_addr);
- return 0;
-}
-
-static inline void dma_check_mask(struct device *dev, u64 mask)
-{
- if (sme_active() && (mask < (((u64)sme_get_me_mask() << 1) - 1)))
- dev_warn(dev, "SME is active, device will require DMA bounce buffers\n");
-}
-
-static inline int dma_supported(struct device *dev, u64 mask)
-{
- const struct dma_map_ops *ops = get_dma_ops(dev);
-
- if (!ops)
- return 0;
- if (!ops->dma_supported)
- return 1;
- return ops->dma_supported(dev, mask);
-}
-
-#ifndef HAVE_ARCH_DMA_SET_MASK
-static inline int dma_set_mask(struct device *dev, u64 mask)
-{
- if (!dev->dma_mask || !dma_supported(dev, mask))
- return -EIO;
- dma_check_mask(dev, mask);
-
- *dev->dma_mask = mask;
+ if (dma_addr == DMA_MAPPING_ERROR)
+ return -ENOMEM;
return 0;
}
-#endif
+
+int dma_supported(struct device *dev, u64 mask);
+int dma_set_mask(struct device *dev, u64 mask);
+int dma_set_coherent_mask(struct device *dev, u64 mask);
static inline u64 dma_get_mask(struct device *dev)
{
@@ -618,21 +569,6 @@ static inline u64 dma_get_mask(struct device *dev)
return DMA_BIT_MASK(32);
}
-#ifdef CONFIG_ARCH_HAS_DMA_SET_COHERENT_MASK
-int dma_set_coherent_mask(struct device *dev, u64 mask);
-#else
-static inline int dma_set_coherent_mask(struct device *dev, u64 mask)
-{
- if (!dma_supported(dev, mask))
- return -EIO;
-
- dma_check_mask(dev, mask);
-
- dev->coherent_dma_mask = mask;
- return 0;
-}
-#endif
-
/*
* Set both the DMA mask and the coherent DMA mask to the same thing.
* Note that we don't check the return value from dma_set_coherent_mask()
@@ -676,8 +612,7 @@ static inline unsigned int dma_get_max_seg_size(struct device *dev)
return SZ_64K;
}
-static inline unsigned int dma_set_max_seg_size(struct device *dev,
- unsigned int size)
+static inline int dma_set_max_seg_size(struct device *dev, unsigned int size)
{
if (dev->dma_parms) {
dev->dma_parms->max_segment_size = size;
@@ -709,12 +644,13 @@ static inline unsigned long dma_max_pfn(struct device *dev)
}
#endif
+/*
+ * Please always use dma_alloc_coherent instead as it already zeroes the memory!
+ */
static inline void *dma_zalloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t flag)
{
- void *ret = dma_alloc_coherent(dev, size, dma_handle,
- flag | __GFP_ZERO);
- return ret;
+ return dma_alloc_coherent(dev, size, dma_handle, flag);
}
static inline int dma_get_cache_alignment(void)
diff --git a/include/linux/dma-noncoherent.h b/include/linux/dma-noncoherent.h
index 9051b055beec..69b36ed31a99 100644
--- a/include/linux/dma-noncoherent.h
+++ b/include/linux/dma-noncoherent.h
@@ -38,7 +38,10 @@ pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot,
void arch_dma_cache_sync(struct device *dev, void *vaddr, size_t size,
enum dma_data_direction direction);
#else
-#define arch_dma_cache_sync NULL
+static inline void arch_dma_cache_sync(struct device *dev, void *vaddr,
+ size_t size, enum dma_data_direction direction)
+{
+}
#endif /* CONFIG_DMA_NONCOHERENT_CACHE_SYNC */
#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE
@@ -69,4 +72,6 @@ static inline void arch_sync_dma_for_cpu_all(struct device *dev)
}
#endif /* CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL */
+void arch_dma_prep_coherent(struct page *page, size_t size);
+
#endif /* _LINUX_DMA_NONCOHERENT_H */
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 015bb59c0331..2e9e2763bf47 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -23,74 +23,6 @@ enum elv_merge {
ELEVATOR_DISCARD_MERGE = 3,
};
-typedef enum elv_merge (elevator_merge_fn) (struct request_queue *, struct request **,
- struct bio *);
-
-typedef void (elevator_merge_req_fn) (struct request_queue *, struct request *, struct request *);
-
-typedef void (elevator_merged_fn) (struct request_queue *, struct request *, enum elv_merge);
-
-typedef int (elevator_allow_bio_merge_fn) (struct request_queue *,
- struct request *, struct bio *);
-
-typedef int (elevator_allow_rq_merge_fn) (struct request_queue *,
- struct request *, struct request *);
-
-typedef void (elevator_bio_merged_fn) (struct request_queue *,
- struct request *, struct bio *);
-
-typedef int (elevator_dispatch_fn) (struct request_queue *, int);
-
-typedef void (elevator_add_req_fn) (struct request_queue *, struct request *);
-typedef struct request *(elevator_request_list_fn) (struct request_queue *, struct request *);
-typedef void (elevator_completed_req_fn) (struct request_queue *, struct request *);
-typedef int (elevator_may_queue_fn) (struct request_queue *, unsigned int);
-
-typedef void (elevator_init_icq_fn) (struct io_cq *);
-typedef void (elevator_exit_icq_fn) (struct io_cq *);
-typedef int (elevator_set_req_fn) (struct request_queue *, struct request *,
- struct bio *, gfp_t);
-typedef void (elevator_put_req_fn) (struct request *);
-typedef void (elevator_activate_req_fn) (struct request_queue *, struct request *);
-typedef void (elevator_deactivate_req_fn) (struct request_queue *, struct request *);
-
-typedef int (elevator_init_fn) (struct request_queue *,
- struct elevator_type *e);
-typedef void (elevator_exit_fn) (struct elevator_queue *);
-typedef void (elevator_registered_fn) (struct request_queue *);
-
-struct elevator_ops
-{
- elevator_merge_fn *elevator_merge_fn;
- elevator_merged_fn *elevator_merged_fn;
- elevator_merge_req_fn *elevator_merge_req_fn;
- elevator_allow_bio_merge_fn *elevator_allow_bio_merge_fn;
- elevator_allow_rq_merge_fn *elevator_allow_rq_merge_fn;
- elevator_bio_merged_fn *elevator_bio_merged_fn;
-
- elevator_dispatch_fn *elevator_dispatch_fn;
- elevator_add_req_fn *elevator_add_req_fn;
- elevator_activate_req_fn *elevator_activate_req_fn;
- elevator_deactivate_req_fn *elevator_deactivate_req_fn;
-
- elevator_completed_req_fn *elevator_completed_req_fn;
-
- elevator_request_list_fn *elevator_former_req_fn;
- elevator_request_list_fn *elevator_latter_req_fn;
-
- elevator_init_icq_fn *elevator_init_icq_fn; /* see iocontext.h */
- elevator_exit_icq_fn *elevator_exit_icq_fn; /* ditto */
-
- elevator_set_req_fn *elevator_set_req_fn;
- elevator_put_req_fn *elevator_put_req_fn;
-
- elevator_may_queue_fn *elevator_may_queue_fn;
-
- elevator_init_fn *elevator_init_fn;
- elevator_exit_fn *elevator_exit_fn;
- elevator_registered_fn *elevator_registered_fn;
-};
-
struct blk_mq_alloc_data;
struct blk_mq_hw_ctx;
@@ -137,17 +69,14 @@ struct elevator_type
struct kmem_cache *icq_cache;
/* fields provided by elevator implementation */
- union {
- struct elevator_ops sq;
- struct elevator_mq_ops mq;
- } ops;
+ struct elevator_mq_ops ops;
+
size_t icq_size; /* see iocontext.h */
size_t icq_align; /* ditto */
struct elv_fs_entry *elevator_attrs;
char elevator_name[ELV_NAME_MAX];
const char *elevator_alias;
struct module *elevator_owner;
- bool uses_mq;
#ifdef CONFIG_BLK_DEBUG_FS
const struct blk_mq_debugfs_attr *queue_debugfs_attrs;
const struct blk_mq_debugfs_attr *hctx_debugfs_attrs;
@@ -175,40 +104,25 @@ struct elevator_queue
struct kobject kobj;
struct mutex sysfs_lock;
unsigned int registered:1;
- unsigned int uses_mq:1;
DECLARE_HASHTABLE(hash, ELV_HASH_BITS);
};
/*
* block elevator interface
*/
-extern void elv_dispatch_sort(struct request_queue *, struct request *);
-extern void elv_dispatch_add_tail(struct request_queue *, struct request *);
-extern void elv_add_request(struct request_queue *, struct request *, int);
-extern void __elv_add_request(struct request_queue *, struct request *, int);
extern enum elv_merge elv_merge(struct request_queue *, struct request **,
struct bio *);
extern void elv_merge_requests(struct request_queue *, struct request *,
struct request *);
extern void elv_merged_request(struct request_queue *, struct request *,
enum elv_merge);
-extern void elv_bio_merged(struct request_queue *q, struct request *,
- struct bio *);
extern bool elv_attempt_insert_merge(struct request_queue *, struct request *);
-extern void elv_requeue_request(struct request_queue *, struct request *);
extern struct request *elv_former_request(struct request_queue *, struct request *);
extern struct request *elv_latter_request(struct request_queue *, struct request *);
-extern int elv_may_queue(struct request_queue *, unsigned int);
-extern void elv_completed_request(struct request_queue *, struct request *);
-extern int elv_set_request(struct request_queue *q, struct request *rq,
- struct bio *bio, gfp_t gfp_mask);
-extern void elv_put_request(struct request_queue *, struct request *);
-extern void elv_drain_elevator(struct request_queue *);
/*
* io scheduler registration
*/
-extern void __init load_default_elevator_module(void);
extern int elv_register(struct elevator_type *);
extern void elv_unregister(struct elevator_type *);
@@ -260,9 +174,5 @@ enum {
#define rq_entry_fifo(ptr) list_entry((ptr), struct request, queuelist)
#define rq_fifo_clear(rq) list_del_init(&(rq)->queuelist)
-#else /* CONFIG_BLOCK */
-
-static inline void load_default_elevator_module(void) { }
-
#endif /* CONFIG_BLOCK */
#endif
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1cda6648a41f..811c77743dad 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2026,7 +2026,7 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
.ki_filp = filp,
.ki_flags = iocb_flags(filp),
.ki_hint = ki_hint_validate(file_write_hint(filp)),
- .ki_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0),
+ .ki_ioprio = get_current_ioprio(),
};
}
diff --git a/include/linux/futex.h b/include/linux/futex.h
index 821ae502d3d8..ccaef0097785 100644
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -9,9 +9,6 @@ struct inode;
struct mm_struct;
struct task_struct;
-extern int
-handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi);
-
/*
* Futexes are matched on equal values of this key.
* The key type depends on whether it's a shared or private mapping.
@@ -55,11 +52,6 @@ extern void exit_robust_list(struct task_struct *curr);
long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
u32 __user *uaddr2, u32 val2, u32 val3);
-#ifdef CONFIG_HAVE_FUTEX_CMPXCHG
-#define futex_cmpxchg_enabled 1
-#else
-extern int futex_cmpxchg_enabled;
-#endif
#else
static inline void exit_robust_list(struct task_struct *curr)
{
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 70fc838e6773..06c0fd594097 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -17,6 +17,7 @@
#include <linux/percpu-refcount.h>
#include <linux/uuid.h>
#include <linux/blk_types.h>
+#include <asm/local.h>
#ifdef CONFIG_BLOCK
@@ -89,6 +90,7 @@ struct disk_stats {
unsigned long merges[NR_STAT_GROUPS];
unsigned long io_ticks;
unsigned long time_in_queue;
+ local_t in_flight[2];
};
#define PARTITION_META_INFO_VOLNAMELTH 64
@@ -122,14 +124,13 @@ struct hd_struct {
int make_it_fail;
#endif
unsigned long stamp;
- atomic_t in_flight[2];
#ifdef CONFIG_SMP
struct disk_stats __percpu *dkstats;
#else
struct disk_stats dkstats;
#endif
struct percpu_ref ref;
- struct rcu_head rcu_head;
+ struct rcu_work rcu_work;
};
#define GENHD_FL_REMOVABLE 1
@@ -295,8 +296,11 @@ extern struct hd_struct *disk_map_sector_rcu(struct gendisk *disk,
#define part_stat_lock() ({ rcu_read_lock(); get_cpu(); })
#define part_stat_unlock() do { put_cpu(); rcu_read_unlock(); } while (0)
-#define __part_stat_add(cpu, part, field, addnd) \
- (per_cpu_ptr((part)->dkstats, (cpu))->field += (addnd))
+#define part_stat_get_cpu(part, field, cpu) \
+ (per_cpu_ptr((part)->dkstats, (cpu))->field)
+
+#define part_stat_get(part, field) \
+ part_stat_get_cpu(part, field, smp_processor_id())
#define part_stat_read(part, field) \
({ \
@@ -333,10 +337,9 @@ static inline void free_part_stats(struct hd_struct *part)
#define part_stat_lock() ({ rcu_read_lock(); 0; })
#define part_stat_unlock() rcu_read_unlock()
-#define __part_stat_add(cpu, part, field, addnd) \
- ((part)->dkstats.field += addnd)
-
-#define part_stat_read(part, field) ((part)->dkstats.field)
+#define part_stat_get(part, field) ((part)->dkstats.field)
+#define part_stat_get_cpu(part, field, cpu) part_stat_get(part, field)
+#define part_stat_read(part, field) part_stat_get(part, field)
static inline void part_stat_set_all(struct hd_struct *part, int value)
{
@@ -362,22 +365,33 @@ static inline void free_part_stats(struct hd_struct *part)
part_stat_read(part, field[STAT_WRITE]) + \
part_stat_read(part, field[STAT_DISCARD]))
-#define part_stat_add(cpu, part, field, addnd) do { \
- __part_stat_add((cpu), (part), field, addnd); \
+#define __part_stat_add(part, field, addnd) \
+ (part_stat_get(part, field) += (addnd))
+
+#define part_stat_add(part, field, addnd) do { \
+ __part_stat_add((part), field, addnd); \
if ((part)->partno) \
- __part_stat_add((cpu), &part_to_disk((part))->part0, \
+ __part_stat_add(&part_to_disk((part))->part0, \
field, addnd); \
} while (0)
-#define part_stat_dec(cpu, gendiskp, field) \
- part_stat_add(cpu, gendiskp, field, -1)
-#define part_stat_inc(cpu, gendiskp, field) \
- part_stat_add(cpu, gendiskp, field, 1)
-#define part_stat_sub(cpu, gendiskp, field, subnd) \
- part_stat_add(cpu, gendiskp, field, -subnd)
-
-void part_in_flight(struct request_queue *q, struct hd_struct *part,
- unsigned int inflight[2]);
+#define part_stat_dec(gendiskp, field) \
+ part_stat_add(gendiskp, field, -1)
+#define part_stat_inc(gendiskp, field) \
+ part_stat_add(gendiskp, field, 1)
+#define part_stat_sub(gendiskp, field, subnd) \
+ part_stat_add(gendiskp, field, -subnd)
+
+#define part_stat_local_dec(gendiskp, field) \
+ local_dec(&(part_stat_get(gendiskp, field)))
+#define part_stat_local_inc(gendiskp, field) \
+ local_inc(&(part_stat_get(gendiskp, field)))
+#define part_stat_local_read(gendiskp, field) \
+ local_read(&(part_stat_get(gendiskp, field)))
+#define part_stat_local_read_cpu(gendiskp, field, cpu) \
+ local_read(&(part_stat_get_cpu(gendiskp, field, cpu)))
+
+unsigned int part_in_flight(struct request_queue *q, struct hd_struct *part);
void part_in_flight_rw(struct request_queue *q, struct hd_struct *part,
unsigned int inflight[2]);
void part_dec_in_flight(struct request_queue *q, struct hd_struct *part,
@@ -398,8 +412,7 @@ static inline void free_part_info(struct hd_struct *part)
kfree(part->info);
}
-/* block/blk-core.c */
-extern void part_round_stats(struct request_queue *q, int cpu, struct hd_struct *part);
+void update_io_ticks(struct hd_struct *part, unsigned long now);
/* block/genhd.c */
extern void device_add_disk(struct device *parent, struct gendisk *disk,
diff --git a/include/linux/ide.h b/include/linux/ide.h
index c74b0321922a..e7d29ae633cd 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -10,7 +10,7 @@
#include <linux/init.h>
#include <linux/ioport.h>
#include <linux/ata.h>
-#include <linux/blkdev.h>
+#include <linux/blk-mq.h>
#include <linux/proc_fs.h>
#include <linux/interrupt.h>
#include <linux/bitops.h>
@@ -50,6 +50,7 @@ struct ide_request {
struct scsi_request sreq;
u8 sense[SCSI_SENSE_BUFFERSIZE];
u8 type;
+ void *special;
};
static inline struct ide_request *ide_req(struct request *rq)
@@ -529,6 +530,10 @@ struct ide_drive_s {
struct request_queue *queue; /* request queue */
+ bool (*prep_rq)(struct ide_drive_s *, struct request *);
+
+ struct blk_mq_tag_set tag_set;
+
struct request *rq; /* current request */
void *driver_data; /* extra driver data */
u16 *id; /* identification info */
@@ -612,6 +617,10 @@ struct ide_drive_s {
bool sense_rq_armed;
struct request *sense_rq;
struct request_sense sense_data;
+
+ /* async sense insertion */
+ struct work_struct rq_work;
+ struct list_head rq_list;
};
typedef struct ide_drive_s ide_drive_t;
@@ -1089,6 +1098,7 @@ extern int ide_pci_clk;
int ide_end_rq(ide_drive_t *, struct request *, blk_status_t, unsigned int);
void ide_kill_rq(ide_drive_t *, struct request *);
+void ide_insert_request_head(ide_drive_t *, struct request *);
void __ide_set_handler(ide_drive_t *, ide_handler_t *, unsigned int);
void ide_set_handler(ide_drive_t *, ide_handler_t *, unsigned int);
@@ -1208,7 +1218,7 @@ extern void ide_stall_queue(ide_drive_t *drive, unsigned long timeout);
extern void ide_timer_expiry(struct timer_list *t);
extern irqreturn_t ide_intr(int irq, void *dev_id);
-extern void do_ide_request(struct request_queue *);
+extern blk_status_t ide_queue_rq(struct blk_mq_hw_ctx *, const struct blk_mq_queue_data *);
extern void ide_requeue_and_plug(ide_drive_t *drive, struct request *rq);
void ide_init_disk(struct gendisk *, ide_drive_t *);
diff --git a/include/linux/init.h b/include/linux/init.h
index 9c2aba1dbabf..5255069f5a9f 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -146,7 +146,6 @@ extern unsigned int reset_devices;
/* used by init/main.c */
void setup_arch(char **);
void prepare_namespace(void);
-void __init load_default_modules(void);
int __init init_rootfs(void);
#if defined(CONFIG_STRICT_KERNEL_RWX) || defined(CONFIG_STRICT_MODULE_RWX)
diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h
index 9e30ed6443db..e9bfe6972aed 100644
--- a/include/linux/ioprio.h
+++ b/include/linux/ioprio.h
@@ -71,6 +71,19 @@ static inline int task_nice_ioclass(struct task_struct *task)
}
/*
+ * If the calling process has set an I/O priority, use that. Otherwise, return
+ * the default I/O priority.
+ */
+static inline int get_current_ioprio(void)
+{
+ struct io_context *ioc = current->io_context;
+
+ if (ioc)
+ return ioc->ioprio;
+ return IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0);
+}
+
+/*
* For inheritance, return the highest of the two given priorities
*/
extern int ioprio_best(unsigned short aprio, unsigned short bprio);
diff --git a/include/linux/key.h b/include/linux/key.h
index e58ee10f6e58..7099985e35a9 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -346,6 +346,9 @@ static inline key_serial_t key_serial(const struct key *key)
extern void key_set_timeout(struct key *, unsigned);
+extern key_ref_t lookup_user_key(key_serial_t id, unsigned long flags,
+ key_perm_t perm);
+
/*
* The permissions required on a key that we're looking up.
*/
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 38c95d66ab12..68133842e6d7 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -135,7 +135,6 @@ enum {
ATA_SHT_EMULATED = 1,
ATA_SHT_THIS_ID = -1,
- ATA_SHT_USE_CLUSTERING = 1,
/* struct ata_taskfile flags */
ATA_TFLAG_LBA48 = (1 << 0), /* enable 48-bit LBA and "HOB" */
@@ -1360,7 +1359,6 @@ extern struct device_attribute *ata_common_sdev_attrs[];
.tag_alloc_policy = BLK_TAG_ALLOC_RR, \
.this_id = ATA_SHT_THIS_ID, \
.emulated = ATA_SHT_EMULATED, \
- .use_clustering = ATA_SHT_USE_CLUSTERING, \
.proc_name = drv_name, \
.slave_configure = ata_scsi_slave_config, \
.slave_destroy = ata_scsi_slave_destroy, \
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index 097072c5a852..5440f11b0907 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -38,6 +38,10 @@ enum {
NDD_UNARMED = 1,
/* locked memory devices should not be accessed */
NDD_LOCKED = 2,
+ /* memory under security wipes should not be accessed */
+ NDD_SECURITY_OVERWRITE = 3,
+ /* tracking whether or not there is a pending device reference */
+ NDD_WORK_PENDING = 4,
/* need to set a limit somewhere, but yes, this is likely overkill */
ND_IOCTL_MAX_BUFLEN = SZ_4M,
@@ -87,7 +91,7 @@ struct nvdimm_bus_descriptor {
ndctl_fn ndctl;
int (*flush_probe)(struct nvdimm_bus_descriptor *nd_desc);
int (*clear_to_send)(struct nvdimm_bus_descriptor *nd_desc,
- struct nvdimm *nvdimm, unsigned int cmd);
+ struct nvdimm *nvdimm, unsigned int cmd, void *data);
};
struct nd_cmd_desc {
@@ -155,6 +159,46 @@ static inline struct nd_blk_region_desc *to_blk_region_desc(
}
+enum nvdimm_security_state {
+ NVDIMM_SECURITY_DISABLED,
+ NVDIMM_SECURITY_UNLOCKED,
+ NVDIMM_SECURITY_LOCKED,
+ NVDIMM_SECURITY_FROZEN,
+ NVDIMM_SECURITY_OVERWRITE,
+};
+
+#define NVDIMM_PASSPHRASE_LEN 32
+#define NVDIMM_KEY_DESC_LEN 22
+
+struct nvdimm_key_data {
+ u8 data[NVDIMM_PASSPHRASE_LEN];
+};
+
+enum nvdimm_passphrase_type {
+ NVDIMM_USER,
+ NVDIMM_MASTER,
+};
+
+struct nvdimm_security_ops {
+ enum nvdimm_security_state (*state)(struct nvdimm *nvdimm,
+ enum nvdimm_passphrase_type pass_type);
+ int (*freeze)(struct nvdimm *nvdimm);
+ int (*change_key)(struct nvdimm *nvdimm,
+ const struct nvdimm_key_data *old_data,
+ const struct nvdimm_key_data *new_data,
+ enum nvdimm_passphrase_type pass_type);
+ int (*unlock)(struct nvdimm *nvdimm,
+ const struct nvdimm_key_data *key_data);
+ int (*disable)(struct nvdimm *nvdimm,
+ const struct nvdimm_key_data *key_data);
+ int (*erase)(struct nvdimm *nvdimm,
+ const struct nvdimm_key_data *key_data,
+ enum nvdimm_passphrase_type pass_type);
+ int (*overwrite)(struct nvdimm *nvdimm,
+ const struct nvdimm_key_data *key_data);
+ int (*query_overwrite)(struct nvdimm *nvdimm);
+};
+
void badrange_init(struct badrange *badrange);
int badrange_add(struct badrange *badrange, u64 addr, u64 length);
void badrange_forget(struct badrange *badrange, phys_addr_t start,
@@ -165,6 +209,7 @@ struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
struct nvdimm_bus_descriptor *nfit_desc);
void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus);
struct nvdimm_bus *to_nvdimm_bus(struct device *dev);
+struct nvdimm_bus *nvdimm_to_bus(struct nvdimm *nvdimm);
struct nvdimm *to_nvdimm(struct device *dev);
struct nd_region *to_nd_region(struct device *dev);
struct device *nd_region_dev(struct nd_region *nd_region);
@@ -175,10 +220,21 @@ const char *nvdimm_name(struct nvdimm *nvdimm);
struct kobject *nvdimm_kobj(struct nvdimm *nvdimm);
unsigned long nvdimm_cmd_mask(struct nvdimm *nvdimm);
void *nvdimm_provider_data(struct nvdimm *nvdimm);
-struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
- const struct attribute_group **groups, unsigned long flags,
- unsigned long cmd_mask, int num_flush,
- struct resource *flush_wpq);
+struct nvdimm *__nvdimm_create(struct nvdimm_bus *nvdimm_bus,
+ void *provider_data, const struct attribute_group **groups,
+ unsigned long flags, unsigned long cmd_mask, int num_flush,
+ struct resource *flush_wpq, const char *dimm_id,
+ const struct nvdimm_security_ops *sec_ops);
+static inline struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus,
+ void *provider_data, const struct attribute_group **groups,
+ unsigned long flags, unsigned long cmd_mask, int num_flush,
+ struct resource *flush_wpq)
+{
+ return __nvdimm_create(nvdimm_bus, provider_data, groups, flags,
+ cmd_mask, num_flush, flush_wpq, NULL, NULL);
+}
+
+int nvdimm_security_setup_events(struct nvdimm *nvdimm);
const struct nd_cmd_desc *nd_cmd_dimm_desc(int cmd);
const struct nd_cmd_desc *nd_cmd_bus_desc(int cmd);
u32 nd_cmd_in_size(struct nvdimm *nvdimm, int cmd,
@@ -204,6 +260,16 @@ u64 nd_fletcher64(void *addr, size_t len, bool le);
void nvdimm_flush(struct nd_region *nd_region);
int nvdimm_has_flush(struct nd_region *nd_region);
int nvdimm_has_cache(struct nd_region *nd_region);
+int nvdimm_in_overwrite(struct nvdimm *nvdimm);
+
+static inline int nvdimm_ctl(struct nvdimm *nvdimm, unsigned int cmd, void *buf,
+ unsigned int buf_len, int *cmd_rc)
+{
+ struct nvdimm_bus *nvdimm_bus = nvdimm_to_bus(nvdimm);
+ struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus);
+
+ return nd_desc->ndctl(nd_desc, nvdimm, cmd, buf, buf_len, cmd_rc);
+}
#ifdef CONFIG_ARCH_HAS_PMEM_API
#define ARCH_MEMREMAP_PMEM MEMREMAP_WB
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 2fdeac1a420d..5d865a5d5cdc 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -90,7 +90,7 @@ typedef int (nvm_get_chk_meta_fn)(struct nvm_dev *, sector_t, int,
struct nvm_chk_meta *);
typedef int (nvm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *);
typedef int (nvm_submit_io_sync_fn)(struct nvm_dev *, struct nvm_rq *);
-typedef void *(nvm_create_dma_pool_fn)(struct nvm_dev *, char *);
+typedef void *(nvm_create_dma_pool_fn)(struct nvm_dev *, char *, int);
typedef void (nvm_destroy_dma_pool_fn)(void *);
typedef void *(nvm_dev_dma_alloc_fn)(struct nvm_dev *, void *, gfp_t,
dma_addr_t *);
@@ -357,6 +357,7 @@ struct nvm_geo {
u32 clba; /* sectors per chunk */
u16 csecs; /* sector size */
u16 sos; /* out-of-band area size */
+ bool ext; /* metadata in extended data buffer */
/* device write constrains */
u32 ws_min; /* minimum write size */
diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h
index 1e70060c92ce..e2687a30e5a1 100644
--- a/include/linux/mfd/tmio.h
+++ b/include/linux/mfd/tmio.h
@@ -54,12 +54,8 @@
* idle before writing to some registers.
*/
#define TMIO_MMC_HAS_IDLE_WAIT BIT(4)
-/*
- * A GPIO is used for card hotplug detection. We need an extra flag for this,
- * because 0 is a valid GPIO number too, and requiring users to specify
- * cd_gpio < 0 to disable GPIO hotplug would break backwards compatibility.
- */
-#define TMIO_MMC_USE_GPIO_CD BIT(5)
+
+/* BIT(5) is unused */
/*
* Some controllers have CMD12 automatically
@@ -104,7 +100,6 @@ struct tmio_mmc_data {
unsigned long capabilities2;
unsigned long flags;
u32 ocr_mask; /* available voltages */
- unsigned int cd_gpio;
int alignment_shift;
dma_addr_t dma_rx_offset;
unsigned int max_blk_count;
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 4d16ba04790e..54299251d40d 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -751,8 +751,8 @@ struct mlx5_hca_vport_context {
u64 node_guid;
u32 cap_mask1;
u32 cap_mask1_perm;
- u32 cap_mask2;
- u32 cap_mask2_perm;
+ u16 cap_mask2;
+ u16 cap_mask2_perm;
u16 lid;
u8 init_type_reply; /* bitmask: see ib spec 14.2.5.6 InitTypeReply */
u8 lmc;
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 821b751485fb..35fe5217b244 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -76,13 +76,7 @@ enum {
};
enum {
- MLX5_GENERAL_OBJ_TYPES_CAP_UCTX = (1ULL << 4),
- MLX5_GENERAL_OBJ_TYPES_CAP_UMEM = (1ULL << 5),
-};
-
-enum {
- MLX5_OBJ_TYPE_UCTX = 0x0004,
- MLX5_OBJ_TYPE_UMEM = 0x0005,
+ MLX5_SHARED_RESOURCE_UID = 0xffff,
};
enum {
@@ -144,6 +138,9 @@ enum {
MLX5_CMD_OP_DESTROY_XRQ = 0x718,
MLX5_CMD_OP_QUERY_XRQ = 0x719,
MLX5_CMD_OP_ARM_XRQ = 0x71a,
+ MLX5_CMD_OP_QUERY_XRQ_DC_PARAMS_ENTRY = 0x725,
+ MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY = 0x726,
+ MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS = 0x727,
MLX5_CMD_OP_QUERY_VPORT_STATE = 0x750,
MLX5_CMD_OP_MODIFY_VPORT_STATE = 0x751,
MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT = 0x752,
@@ -247,6 +244,7 @@ enum {
MLX5_CMD_OP_MODIFY_FLOW_TABLE = 0x93c,
MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT = 0x93d,
MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT = 0x93e,
+ MLX5_CMD_OP_QUERY_PACKET_REFORMAT_CONTEXT = 0x93f,
MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT = 0x940,
MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT = 0x941,
MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT = 0x942,
@@ -259,9 +257,19 @@ enum {
MLX5_CMD_OP_MODIFY_GENERAL_OBJECT = 0xa01,
MLX5_CMD_OP_QUERY_GENERAL_OBJECT = 0xa02,
MLX5_CMD_OP_DESTROY_GENERAL_OBJECT = 0xa03,
+ MLX5_CMD_OP_CREATE_UCTX = 0xa04,
+ MLX5_CMD_OP_DESTROY_UCTX = 0xa06,
+ MLX5_CMD_OP_CREATE_UMEM = 0xa08,
+ MLX5_CMD_OP_DESTROY_UMEM = 0xa0a,
MLX5_CMD_OP_MAX
};
+/* Valid range for general commands that don't work over an object */
+enum {
+ MLX5_CMD_OP_GENERAL_START = 0xb00,
+ MLX5_CMD_OP_GENERAL_END = 0xd00,
+};
+
struct mlx5_ifc_flow_table_fields_supported_bits {
u8 outer_dmac[0x1];
u8 outer_smac[0x1];
@@ -1179,7 +1187,10 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 reserved_at_440[0x20];
- u8 reserved_at_460[0x10];
+ u8 reserved_at_460[0x3];
+ u8 log_max_uctx[0x5];
+ u8 reserved_at_468[0x3];
+ u8 log_max_umem[0x5];
u8 max_num_eqs[0x10];
u8 reserved_at_480[0x3];
@@ -6694,7 +6705,7 @@ struct mlx5_ifc_dealloc_transport_domain_out_bits {
struct mlx5_ifc_dealloc_transport_domain_in_bits {
u8 opcode[0x10];
- u8 reserved_at_10[0x10];
+ u8 uid[0x10];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
@@ -7547,7 +7558,7 @@ struct mlx5_ifc_alloc_transport_domain_out_bits {
struct mlx5_ifc_alloc_transport_domain_in_bits {
u8 opcode[0x10];
- u8 reserved_at_10[0x10];
+ u8 uid[0x10];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
@@ -7569,7 +7580,7 @@ struct mlx5_ifc_alloc_q_counter_out_bits {
struct mlx5_ifc_alloc_q_counter_in_bits {
u8 opcode[0x10];
- u8 reserved_at_10[0x10];
+ u8 uid[0x10];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
@@ -9390,9 +9401,9 @@ struct mlx5_ifc_general_obj_out_cmd_hdr_bits {
};
struct mlx5_ifc_umem_bits {
- u8 modify_field_select[0x40];
+ u8 reserved_at_0[0x80];
- u8 reserved_at_40[0x5b];
+ u8 reserved_at_80[0x1b];
u8 log_page_size[0x5];
u8 page_offset[0x20];
@@ -9403,21 +9414,46 @@ struct mlx5_ifc_umem_bits {
};
struct mlx5_ifc_uctx_bits {
- u8 modify_field_select[0x40];
-
u8 cap[0x20];
- u8 reserved_at_60[0x1a0];
+ u8 reserved_at_20[0x160];
};
struct mlx5_ifc_create_umem_in_bits {
- struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr;
- struct mlx5_ifc_umem_bits umem;
+ u8 opcode[0x10];
+ u8 uid[0x10];
+
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+
+ u8 reserved_at_40[0x40];
+
+ struct mlx5_ifc_umem_bits umem;
};
struct mlx5_ifc_create_uctx_in_bits {
- struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr;
- struct mlx5_ifc_uctx_bits uctx;
+ u8 opcode[0x10];
+ u8 reserved_at_10[0x10];
+
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+
+ u8 reserved_at_40[0x40];
+
+ struct mlx5_ifc_uctx_bits uctx;
+};
+
+struct mlx5_ifc_destroy_uctx_in_bits {
+ u8 opcode[0x10];
+ u8 reserved_at_10[0x10];
+
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+
+ u8 reserved_at_40[0x10];
+ u8 uid[0x10];
+
+ u8 reserved_at_60[0x20];
};
struct mlx5_ifc_mtrc_string_db_param_bits {
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 2a5fe75dd082..4d35ff36ceff 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -147,6 +147,9 @@ struct mmc_host_ops {
/* Prepare HS400 target operating frequency depending host driver */
int (*prepare_hs400_tuning)(struct mmc_host *host, struct mmc_ios *ios);
+ /* Prepare switch to DDR during the HS400 init sequence */
+ int (*hs400_prepare_ddr)(struct mmc_host *host);
+
/* Prepare for switching from HS400 to HS200 */
void (*hs400_downgrade)(struct mmc_host *host);
@@ -331,7 +334,7 @@ struct mmc_host {
#define MMC_CAP_UHS (MMC_CAP_UHS_SDR12 | MMC_CAP_UHS_SDR25 | \
MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_SDR104 | \
MMC_CAP_UHS_DDR50)
-/* (1 << 21) is free for reuse */
+#define MMC_CAP_SYNC_RUNTIME_PM (1 << 21) /* Synced runtime PM suspends. */
#define MMC_CAP_DRIVER_TYPE_A (1 << 23) /* Host supports Driver Type A */
#define MMC_CAP_DRIVER_TYPE_C (1 << 24) /* Host supports Driver Type C */
#define MMC_CAP_DRIVER_TYPE_D (1 << 25) /* Host supports Driver Type D */
diff --git a/include/linux/mmc/slot-gpio.h b/include/linux/mmc/slot-gpio.h
index 06607c59c4d0..feebd7aa6f5c 100644
--- a/include/linux/mmc/slot-gpio.h
+++ b/include/linux/mmc/slot-gpio.h
@@ -17,12 +17,7 @@
struct mmc_host;
int mmc_gpio_get_ro(struct mmc_host *host);
-int mmc_gpio_request_ro(struct mmc_host *host, unsigned int gpio);
-
int mmc_gpio_get_cd(struct mmc_host *host);
-int mmc_gpio_request_cd(struct mmc_host *host, unsigned int gpio,
- unsigned int debounce);
-
int mmc_gpiod_request_cd(struct mmc_host *host, const char *con_id,
unsigned int idx, bool override_active_level,
unsigned int debounce, bool *gpio_invert);
diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h
index 496ff759f84c..91745cc3704c 100644
--- a/include/linux/nvme-fc-driver.h
+++ b/include/linux/nvme-fc-driver.h
@@ -403,7 +403,6 @@ struct nvme_fc_port_template {
void **handle);
void (*delete_queue)(struct nvme_fc_local_port *,
unsigned int qidx, void *handle);
- void (*poll_queue)(struct nvme_fc_local_port *, void *handle);
int (*ls_req)(struct nvme_fc_local_port *,
struct nvme_fc_remote_port *,
struct nvmefc_ls_req *);
@@ -649,22 +648,6 @@ enum {
* sequence in one LLDD operation. Errors during Data
* sequence transmit must not allow RSP sequence to be sent.
*/
- NVMET_FCTGTFEAT_CMD_IN_ISR = (1 << 1),
- /* Bit 2: When 0, the LLDD is calling the cmd rcv handler
- * in a non-isr context, allowing the transport to finish
- * op completion in the calling context. When 1, the LLDD
- * is calling the cmd rcv handler in an ISR context,
- * requiring the transport to transition to a workqueue
- * for op completion.
- */
- NVMET_FCTGTFEAT_OPDONE_IN_ISR = (1 << 2),
- /* Bit 3: When 0, the LLDD is calling the op done handler
- * in a non-isr context, allowing the transport to finish
- * op completion in the calling context. When 1, the LLDD
- * is calling the op done handler in an ISR context,
- * requiring the transport to transition to a workqueue
- * for op completion.
- */
};
diff --git a/include/linux/nvme-tcp.h b/include/linux/nvme-tcp.h
new file mode 100644
index 000000000000..03d87c0550a9
--- /dev/null
+++ b/include/linux/nvme-tcp.h
@@ -0,0 +1,189 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * NVMe over Fabrics TCP protocol header.
+ * Copyright (c) 2018 Lightbits Labs. All rights reserved.
+ */
+
+#ifndef _LINUX_NVME_TCP_H
+#define _LINUX_NVME_TCP_H
+
+#include <linux/nvme.h>
+
+#define NVME_TCP_DISC_PORT 8009
+#define NVME_TCP_ADMIN_CCSZ SZ_8K
+#define NVME_TCP_DIGEST_LENGTH 4
+
+enum nvme_tcp_pfv {
+ NVME_TCP_PFV_1_0 = 0x0,
+};
+
+enum nvme_tcp_fatal_error_status {
+ NVME_TCP_FES_INVALID_PDU_HDR = 0x01,
+ NVME_TCP_FES_PDU_SEQ_ERR = 0x02,
+ NVME_TCP_FES_HDR_DIGEST_ERR = 0x03,
+ NVME_TCP_FES_DATA_OUT_OF_RANGE = 0x04,
+ NVME_TCP_FES_R2T_LIMIT_EXCEEDED = 0x05,
+ NVME_TCP_FES_DATA_LIMIT_EXCEEDED = 0x05,
+ NVME_TCP_FES_UNSUPPORTED_PARAM = 0x06,
+};
+
+enum nvme_tcp_digest_option {
+ NVME_TCP_HDR_DIGEST_ENABLE = (1 << 0),
+ NVME_TCP_DATA_DIGEST_ENABLE = (1 << 1),
+};
+
+enum nvme_tcp_pdu_type {
+ nvme_tcp_icreq = 0x0,
+ nvme_tcp_icresp = 0x1,
+ nvme_tcp_h2c_term = 0x2,
+ nvme_tcp_c2h_term = 0x3,
+ nvme_tcp_cmd = 0x4,
+ nvme_tcp_rsp = 0x5,
+ nvme_tcp_h2c_data = 0x6,
+ nvme_tcp_c2h_data = 0x7,
+ nvme_tcp_r2t = 0x9,
+};
+
+enum nvme_tcp_pdu_flags {
+ NVME_TCP_F_HDGST = (1 << 0),
+ NVME_TCP_F_DDGST = (1 << 1),
+ NVME_TCP_F_DATA_LAST = (1 << 2),
+ NVME_TCP_F_DATA_SUCCESS = (1 << 3),
+};
+
+/**
+ * struct nvme_tcp_hdr - nvme tcp pdu common header
+ *
+ * @type: pdu type
+ * @flags: pdu specific flags
+ * @hlen: pdu header length
+ * @pdo: pdu data offset
+ * @plen: pdu wire byte length
+ */
+struct nvme_tcp_hdr {
+ __u8 type;
+ __u8 flags;
+ __u8 hlen;
+ __u8 pdo;
+ __le32 plen;
+};
+
+/**
+ * struct nvme_tcp_icreq_pdu - nvme tcp initialize connection request pdu
+ *
+ * @hdr: pdu generic header
+ * @pfv: pdu version format
+ * @hpda: host pdu data alignment (dwords, 0's based)
+ * @digest: digest types enabled
+ * @maxr2t: maximum r2ts per request supported
+ */
+struct nvme_tcp_icreq_pdu {
+ struct nvme_tcp_hdr hdr;
+ __le16 pfv;
+ __u8 hpda;
+ __u8 digest;
+ __le32 maxr2t;
+ __u8 rsvd2[112];
+};
+
+/**
+ * struct nvme_tcp_icresp_pdu - nvme tcp initialize connection response pdu
+ *
+ * @hdr: pdu common header
+ * @pfv: pdu version format
+ * @cpda: controller pdu data alignment (dowrds, 0's based)
+ * @digest: digest types enabled
+ * @maxdata: maximum data capsules per r2t supported
+ */
+struct nvme_tcp_icresp_pdu {
+ struct nvme_tcp_hdr hdr;
+ __le16 pfv;
+ __u8 cpda;
+ __u8 digest;
+ __le32 maxdata;
+ __u8 rsvd[112];
+};
+
+/**
+ * struct nvme_tcp_term_pdu - nvme tcp terminate connection pdu
+ *
+ * @hdr: pdu common header
+ * @fes: fatal error status
+ * @fei: fatal error information
+ */
+struct nvme_tcp_term_pdu {
+ struct nvme_tcp_hdr hdr;
+ __le16 fes;
+ __le32 fei;
+ __u8 rsvd[8];
+};
+
+/**
+ * struct nvme_tcp_cmd_pdu - nvme tcp command capsule pdu
+ *
+ * @hdr: pdu common header
+ * @cmd: nvme command
+ */
+struct nvme_tcp_cmd_pdu {
+ struct nvme_tcp_hdr hdr;
+ struct nvme_command cmd;
+};
+
+/**
+ * struct nvme_tcp_rsp_pdu - nvme tcp response capsule pdu
+ *
+ * @hdr: pdu common header
+ * @hdr: nvme-tcp generic header
+ * @cqe: nvme completion queue entry
+ */
+struct nvme_tcp_rsp_pdu {
+ struct nvme_tcp_hdr hdr;
+ struct nvme_completion cqe;
+};
+
+/**
+ * struct nvme_tcp_r2t_pdu - nvme tcp ready-to-transfer pdu
+ *
+ * @hdr: pdu common header
+ * @command_id: nvme command identifier which this relates to
+ * @ttag: transfer tag (controller generated)
+ * @r2t_offset: offset from the start of the command data
+ * @r2t_length: length the host is allowed to send
+ */
+struct nvme_tcp_r2t_pdu {
+ struct nvme_tcp_hdr hdr;
+ __u16 command_id;
+ __u16 ttag;
+ __le32 r2t_offset;
+ __le32 r2t_length;
+ __u8 rsvd[4];
+};
+
+/**
+ * struct nvme_tcp_data_pdu - nvme tcp data pdu
+ *
+ * @hdr: pdu common header
+ * @command_id: nvme command identifier which this relates to
+ * @ttag: transfer tag (controller generated)
+ * @data_offset: offset from the start of the command data
+ * @data_length: length of the data stream
+ */
+struct nvme_tcp_data_pdu {
+ struct nvme_tcp_hdr hdr;
+ __u16 command_id;
+ __u16 ttag;
+ __le32 data_offset;
+ __le32 data_length;
+ __u8 rsvd[4];
+};
+
+union nvme_tcp_pdu {
+ struct nvme_tcp_icreq_pdu icreq;
+ struct nvme_tcp_icresp_pdu icresp;
+ struct nvme_tcp_cmd_pdu cmd;
+ struct nvme_tcp_rsp_pdu rsp;
+ struct nvme_tcp_r2t_pdu r2t;
+ struct nvme_tcp_data_pdu data;
+};
+
+#endif /* _LINUX_NVME_TCP_H */
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 818dbe9331be..bbcc83886899 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -52,15 +52,20 @@ enum {
enum {
NVMF_TRTYPE_RDMA = 1, /* RDMA */
NVMF_TRTYPE_FC = 2, /* Fibre Channel */
+ NVMF_TRTYPE_TCP = 3, /* TCP/IP */
NVMF_TRTYPE_LOOP = 254, /* Reserved for host usage */
NVMF_TRTYPE_MAX,
};
/* Transport Requirements codes for Discovery Log Page entry TREQ field */
enum {
- NVMF_TREQ_NOT_SPECIFIED = 0, /* Not specified */
- NVMF_TREQ_REQUIRED = 1, /* Required */
- NVMF_TREQ_NOT_REQUIRED = 2, /* Not Required */
+ NVMF_TREQ_NOT_SPECIFIED = 0, /* Not specified */
+ NVMF_TREQ_REQUIRED = 1, /* Required */
+ NVMF_TREQ_NOT_REQUIRED = 2, /* Not Required */
+#define NVME_TREQ_SECURE_CHANNEL_MASK \
+ (NVMF_TREQ_REQUIRED | NVMF_TREQ_NOT_REQUIRED)
+
+ NVMF_TREQ_DISABLE_SQFLOW = (1 << 2), /* Supports SQ flow control disable */
};
/* RDMA QP Service Type codes for Discovery Log Page entry TSAS
@@ -198,6 +203,11 @@ enum {
NVME_PS_FLAGS_NON_OP_STATE = 1 << 1,
};
+enum nvme_ctrl_attr {
+ NVME_CTRL_ATTR_HID_128_BIT = (1 << 0),
+ NVME_CTRL_ATTR_TBKAS = (1 << 6),
+};
+
struct nvme_id_ctrl {
__le16 vid;
__le16 ssvid;
@@ -214,7 +224,11 @@ struct nvme_id_ctrl {
__le32 rtd3e;
__le32 oaes;
__le32 ctratt;
- __u8 rsvd100[156];
+ __u8 rsvd100[28];
+ __le16 crdt1;
+ __le16 crdt2;
+ __le16 crdt3;
+ __u8 rsvd134[122];
__le16 oacs;
__u8 acl;
__u8 aerl;
@@ -481,12 +495,21 @@ enum {
NVME_AER_NOTICE_NS_CHANGED = 0x00,
NVME_AER_NOTICE_FW_ACT_STARTING = 0x01,
NVME_AER_NOTICE_ANA = 0x03,
+ NVME_AER_NOTICE_DISC_CHANGED = 0xf0,
};
enum {
- NVME_AEN_CFG_NS_ATTR = 1 << 8,
- NVME_AEN_CFG_FW_ACT = 1 << 9,
- NVME_AEN_CFG_ANA_CHANGE = 1 << 11,
+ NVME_AEN_BIT_NS_ATTR = 8,
+ NVME_AEN_BIT_FW_ACT = 9,
+ NVME_AEN_BIT_ANA_CHANGE = 11,
+ NVME_AEN_BIT_DISC_CHANGE = 31,
+};
+
+enum {
+ NVME_AEN_CFG_NS_ATTR = 1 << NVME_AEN_BIT_NS_ATTR,
+ NVME_AEN_CFG_FW_ACT = 1 << NVME_AEN_BIT_FW_ACT,
+ NVME_AEN_CFG_ANA_CHANGE = 1 << NVME_AEN_BIT_ANA_CHANGE,
+ NVME_AEN_CFG_DISC_CHANGE = 1 << NVME_AEN_BIT_DISC_CHANGE,
};
struct nvme_lba_range_type {
@@ -639,7 +662,12 @@ struct nvme_common_command {
__le32 cdw2[2];
__le64 metadata;
union nvme_data_ptr dptr;
- __le32 cdw10[6];
+ __le32 cdw10;
+ __le32 cdw11;
+ __le32 cdw12;
+ __le32 cdw13;
+ __le32 cdw14;
+ __le32 cdw15;
};
struct nvme_rw_command {
@@ -738,6 +766,15 @@ enum {
NVME_HOST_MEM_RETURN = (1 << 1),
};
+struct nvme_feat_host_behavior {
+ __u8 acre;
+ __u8 resv1[511];
+};
+
+enum {
+ NVME_ENABLE_ACRE = 1,
+};
+
/* Admin commands */
enum nvme_admin_opcode {
@@ -792,6 +829,7 @@ enum {
NVME_FEAT_RRL = 0x12,
NVME_FEAT_PLM_CONFIG = 0x13,
NVME_FEAT_PLM_WINDOW = 0x14,
+ NVME_FEAT_HOST_BEHAVIOR = 0x16,
NVME_FEAT_SW_PROGRESS = 0x80,
NVME_FEAT_HOST_ID = 0x81,
NVME_FEAT_RESV_MASK = 0x82,
@@ -1030,6 +1068,10 @@ struct nvmf_disc_rsp_page_hdr {
struct nvmf_disc_rsp_page_entry entries[0];
};
+enum {
+ NVME_CONNECT_DISABLE_SQFLOW = (1 << 2),
+};
+
struct nvmf_connect_command {
__u8 opcode;
__u8 resv1;
@@ -1126,6 +1168,20 @@ struct nvme_command {
};
};
+struct nvme_error_slot {
+ __le64 error_count;
+ __le16 sqid;
+ __le16 cmdid;
+ __le16 status_field;
+ __le16 param_error_location;
+ __le64 lba;
+ __le32 nsid;
+ __u8 vs;
+ __u8 resv[3];
+ __le64 cs;
+ __u8 resv2[24];
+};
+
static inline bool nvme_is_write(struct nvme_command *cmd)
{
/*
@@ -1243,6 +1299,7 @@ enum {
NVME_SC_ANA_TRANSITION = 0x303,
NVME_SC_HOST_PATH_ERROR = 0x370,
+ NVME_SC_CRD = 0x1800,
NVME_SC_DNR = 0x4000,
};
diff --git a/include/linux/platform_data/mmc-esdhc-imx.h b/include/linux/platform_data/mmc-esdhc-imx.h
index 640dec8b5b0c..b606ca4197df 100644
--- a/include/linux/platform_data/mmc-esdhc-imx.h
+++ b/include/linux/platform_data/mmc-esdhc-imx.h
@@ -30,15 +30,11 @@ enum cd_types {
*
* ESDHC_WP(CD)_CONTROLLER type is not available on i.MX25/35.
*
- * @wp_gpio: gpio for write_protect
- * @cd_gpio: gpio for card_detect interrupt
* @wp_type: type of write_protect method (see wp_types enum above)
* @cd_type: type of card_detect method (see cd_types enum above)
*/
struct esdhc_platform_data {
- unsigned int wp_gpio;
- unsigned int cd_gpio;
enum wp_types wp_type;
enum cd_types cd_type;
int max_bus_width;
diff --git a/include/linux/platform_data/mmc-pxamci.h b/include/linux/platform_data/mmc-pxamci.h
index 752f97c62ef2..7e44e84e7150 100644
--- a/include/linux/platform_data/mmc-pxamci.h
+++ b/include/linux/platform_data/mmc-pxamci.h
@@ -15,11 +15,7 @@ struct pxamci_platform_data {
int (*get_ro)(struct device *);
int (*setpower)(struct device *, unsigned int);
void (*exit)(struct device *, void *);
- int gpio_card_detect; /* gpio detecting card insertion */
- int gpio_card_ro; /* gpio detecting read only toggle */
bool gpio_card_ro_invert; /* gpio ro is inverted */
- int gpio_power; /* gpio powering up MMC bus */
- bool gpio_power_invert; /* gpio power is inverted */
};
extern void pxa_set_mci_info(struct pxamci_platform_data *info);
diff --git a/include/linux/platform_data/mmc-s3cmci.h b/include/linux/platform_data/mmc-s3cmci.h
index b68d9f0bdd9e..33310b11cbdd 100644
--- a/include/linux/platform_data/mmc-s3cmci.h
+++ b/include/linux/platform_data/mmc-s3cmci.h
@@ -7,7 +7,6 @@
* @no_wprotect: Set this to indicate there is no write-protect switch.
* @no_detect: Set this if there is no detect switch.
* @wprotect_invert: Invert the default sense of the write protect switch.
- * @detect_invert: Invert the default sense of the write protect switch.
* @use_dma: Set to allow the use of DMA.
* @gpio_detect: GPIO number for the card detect line.
* @gpio_wprotect: GPIO number for the write protect line.
@@ -31,11 +30,8 @@ struct s3c24xx_mci_pdata {
unsigned int no_wprotect:1;
unsigned int no_detect:1;
unsigned int wprotect_invert:1;
- unsigned int detect_invert:1; /* set => detect active high */
unsigned int use_dma:1;
- unsigned int gpio_detect;
- unsigned int gpio_wprotect;
unsigned long ocr_avail;
void (*set_power)(unsigned char power_mode,
unsigned short vdd);
diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h
index 804a50983ec5..14d558146aea 100644
--- a/include/linux/sbitmap.h
+++ b/include/linux/sbitmap.h
@@ -30,14 +30,24 @@ struct seq_file;
*/
struct sbitmap_word {
/**
- * @word: The bitmap word itself.
+ * @depth: Number of bits being used in @word/@cleared
*/
- unsigned long word;
+ unsigned long depth;
/**
- * @depth: Number of bits being used in @word.
+ * @word: word holding free bits
*/
- unsigned long depth;
+ unsigned long word ____cacheline_aligned_in_smp;
+
+ /**
+ * @cleared: word holding cleared bits
+ */
+ unsigned long cleared ____cacheline_aligned_in_smp;
+
+ /**
+ * @swap_lock: Held while swapping word <-> cleared
+ */
+ spinlock_t swap_lock;
} ____cacheline_aligned_in_smp;
/**
@@ -125,6 +135,11 @@ struct sbitmap_queue {
*/
struct sbq_wait_state *ws;
+ /*
+ * @ws_active: count of currently active ws waitqueues
+ */
+ atomic_t ws_active;
+
/**
* @round_robin: Allocate bits in strict round-robin order.
*/
@@ -250,12 +265,14 @@ static inline void __sbitmap_for_each_set(struct sbitmap *sb,
nr = SB_NR_TO_BIT(sb, start);
while (scanned < sb->depth) {
- struct sbitmap_word *word = &sb->map[index];
- unsigned int depth = min_t(unsigned int, word->depth - nr,
+ unsigned long word;
+ unsigned int depth = min_t(unsigned int,
+ sb->map[index].depth - nr,
sb->depth - scanned);
scanned += depth;
- if (!word->word)
+ word = sb->map[index].word & ~sb->map[index].cleared;
+ if (!word)
goto next;
/*
@@ -265,7 +282,7 @@ static inline void __sbitmap_for_each_set(struct sbitmap *sb,
*/
depth += nr;
while (1) {
- nr = find_next_bit(&word->word, depth, nr);
+ nr = find_next_bit(&word, depth, nr);
if (nr >= depth)
break;
if (!fn(sb, (index << sb->shift) + nr, data))
@@ -310,6 +327,19 @@ static inline void sbitmap_clear_bit(struct sbitmap *sb, unsigned int bitnr)
clear_bit(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr));
}
+/*
+ * This one is special, since it doesn't actually clear the bit, rather it
+ * sets the corresponding bit in the ->cleared mask instead. Paired with
+ * the caller doing sbitmap_batch_clear() if a given index is full, which
+ * will clear the previously freed entries in the corresponding ->word.
+ */
+static inline void sbitmap_deferred_clear_bit(struct sbitmap *sb, unsigned int bitnr)
+{
+ unsigned long *addr = &sb->map[SB_NR_TO_INDEX(sb, bitnr)].cleared;
+
+ set_bit(SB_NR_TO_BIT(sb, bitnr), addr);
+}
+
static inline void sbitmap_clear_bit_unlock(struct sbitmap *sb,
unsigned int bitnr)
{
@@ -321,8 +351,6 @@ static inline int sbitmap_test_bit(struct sbitmap *sb, unsigned int bitnr)
return test_bit(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr));
}
-unsigned int sbitmap_weight(const struct sbitmap *sb);
-
/**
* sbitmap_show() - Dump &struct sbitmap information to a &struct seq_file.
* @sb: Bitmap to show.
@@ -531,4 +559,45 @@ void sbitmap_queue_wake_up(struct sbitmap_queue *sbq);
*/
void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m);
+struct sbq_wait {
+ struct sbitmap_queue *sbq; /* if set, sbq_wait is accounted */
+ struct wait_queue_entry wait;
+};
+
+#define DEFINE_SBQ_WAIT(name) \
+ struct sbq_wait name = { \
+ .sbq = NULL, \
+ .wait = { \
+ .private = current, \
+ .func = autoremove_wake_function, \
+ .entry = LIST_HEAD_INIT((name).wait.entry), \
+ } \
+ }
+
+/*
+ * Wrapper around prepare_to_wait_exclusive(), which maintains some extra
+ * internal state.
+ */
+void sbitmap_prepare_to_wait(struct sbitmap_queue *sbq,
+ struct sbq_wait_state *ws,
+ struct sbq_wait *sbq_wait, int state);
+
+/*
+ * Must be paired with sbitmap_prepare_to_wait().
+ */
+void sbitmap_finish_wait(struct sbitmap_queue *sbq, struct sbq_wait_state *ws,
+ struct sbq_wait *sbq_wait);
+
+/*
+ * Wrapper around add_wait_queue(), which maintains some extra internal state
+ */
+void sbitmap_add_wait_queue(struct sbitmap_queue *sbq,
+ struct sbq_wait_state *ws,
+ struct sbq_wait *sbq_wait);
+
+/*
+ * Must be paired with sbitmap_add_wait_queue()
+ */
+void sbitmap_del_wait_queue(struct sbq_wait *sbq_wait);
+
#endif /* __LINUX_SCALE_BITMAP_H */
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index 093aa57120b0..b96f0d0b5b8f 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -324,10 +324,10 @@ size_t sg_zero_buffer(struct scatterlist *sgl, unsigned int nents,
* Like SG_CHUNK_SIZE, but for archs that have sg chaining. This limit
* is totally arbitrary, a setting of 2048 will get you at least 8mb ios.
*/
-#ifdef CONFIG_ARCH_HAS_SG_CHAIN
-#define SG_MAX_SEGMENTS 2048
-#else
+#ifdef CONFIG_ARCH_NO_SG_CHAIN
#define SG_MAX_SEGMENTS SG_CHUNK_SIZE
+#else
+#define SG_MAX_SEGMENTS 2048
#endif
#ifdef CONFIG_SG_POOL
diff --git a/include/linux/signal.h b/include/linux/signal.h
index f428e86f4800..cc7e2c1cd444 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -273,6 +273,10 @@ extern int group_send_sig_info(int sig, struct kernel_siginfo *info,
struct task_struct *p, enum pid_type type);
extern int __group_send_sig_info(int, struct kernel_siginfo *, struct task_struct *);
extern int sigprocmask(int, sigset_t *, sigset_t *);
+extern int set_user_sigmask(const sigset_t __user *usigmask, sigset_t *set,
+ sigset_t *oldset, size_t sigsetsize);
+extern void restore_user_sigmask(const void __user *usigmask,
+ sigset_t *sigsaved);
extern void set_current_blocked(sigset_t *);
extern void __set_current_blocked(const sigset_t *);
extern int show_unhandled_signals;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 2a57a365c711..93f56fddd92a 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3339,6 +3339,9 @@ static inline int skb_copy_datagram_msg(const struct sk_buff *from, int offset,
}
int skb_copy_and_csum_datagram_msg(struct sk_buff *skb, int hlen,
struct msghdr *msg);
+int skb_copy_and_hash_datagram_iter(const struct sk_buff *skb, int offset,
+ struct iov_iter *to, int len,
+ struct ahash_request *hash);
int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset,
struct iov_iter *from, int len);
int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *frm);
diff --git a/include/linux/socket.h b/include/linux/socket.h
index 84c48a3c0227..ab2041a00e01 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -349,7 +349,8 @@ struct ucred {
extern int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr);
extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data);
-struct timespec64;
+struct __kernel_timespec;
+struct old_timespec32;
/* The __sys_...msg variants allow MSG_CMSG_COMPAT iff
* forbid_cmsg_compat==false
@@ -358,8 +359,10 @@ extern long __sys_recvmsg(int fd, struct user_msghdr __user *msg,
unsigned int flags, bool forbid_cmsg_compat);
extern long __sys_sendmsg(int fd, struct user_msghdr __user *msg,
unsigned int flags, bool forbid_cmsg_compat);
-extern int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
- unsigned int flags, struct timespec64 *timeout);
+extern int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
+ unsigned int vlen, unsigned int flags,
+ struct __kernel_timespec __user *timeout,
+ struct old_timespec32 __user *timeout32);
extern int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg,
unsigned int vlen, unsigned int flags,
bool forbid_cmsg_compat);
diff --git a/include/linux/spi/mmc_spi.h b/include/linux/spi/mmc_spi.h
index bfde741a543d..778ae8eb1f3e 100644
--- a/include/linux/spi/mmc_spi.h
+++ b/include/linux/spi/mmc_spi.h
@@ -8,11 +8,6 @@
struct device;
struct mmc_host;
-#define MMC_SPI_USE_CD_GPIO (1 << 0)
-#define MMC_SPI_USE_RO_GPIO (1 << 1)
-#define MMC_SPI_CD_GPIO_ACTIVE_LOW (1 << 2)
-#define MMC_SPI_RO_GPIO_ACTIVE_LOW (1 << 3)
-
/* Put this in platform_data of a device being used to manage an MMC/SD
* card slot. (Modeled after PXA mmc glue; see that for usage examples.)
*
@@ -27,16 +22,6 @@ struct mmc_spi_platform_data {
void *);
void (*exit)(struct device *, void *);
- /*
- * Card Detect and Read Only GPIOs. To enable debouncing on the card
- * detect GPIO, set the cd_debounce to the debounce time in
- * microseconds.
- */
- unsigned int flags;
- unsigned int cd_gpio;
- unsigned int cd_debounce;
- unsigned int ro_gpio;
-
/* Capabilities to pass into mmc core (e.g. MMC_CAP_NEEDS_POLL). */
unsigned long caps;
unsigned long caps2;
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index a387b59640a4..7c007ed7505f 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -16,8 +16,6 @@ enum swiotlb_force {
SWIOTLB_NO_FORCE, /* swiotlb=noforce */
};
-extern enum swiotlb_force swiotlb_force;
-
/*
* Maximum allowable number of contiguous slabs to map,
* must be a power of 2. What is the appropriate value ?
@@ -46,9 +44,6 @@ enum dma_sync_target {
SYNC_FOR_DEVICE = 1,
};
-/* define the last possible byte of physical address space as a mapping error */
-#define SWIOTLB_MAP_ERROR (~(phys_addr_t)0x0)
-
extern phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
dma_addr_t tbl_dma_addr,
phys_addr_t phys, size_t size,
@@ -65,56 +60,44 @@ extern void swiotlb_tbl_sync_single(struct device *hwdev,
size_t size, enum dma_data_direction dir,
enum dma_sync_target target);
-/* Accessory functions. */
-
-extern dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
- unsigned long offset, size_t size,
- enum dma_data_direction dir,
- unsigned long attrs);
-extern void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
- size_t size, enum dma_data_direction dir,
- unsigned long attrs);
-
-extern int
-swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
- enum dma_data_direction dir,
- unsigned long attrs);
-
-extern void
-swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
- int nelems, enum dma_data_direction dir,
- unsigned long attrs);
-
-extern void
-swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
- size_t size, enum dma_data_direction dir);
-
-extern void
-swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg,
- int nelems, enum dma_data_direction dir);
-
-extern void
-swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
- size_t size, enum dma_data_direction dir);
-
-extern void
-swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
- int nelems, enum dma_data_direction dir);
-
extern int
swiotlb_dma_supported(struct device *hwdev, u64 mask);
#ifdef CONFIG_SWIOTLB
-extern void __init swiotlb_exit(void);
+extern enum swiotlb_force swiotlb_force;
+extern phys_addr_t io_tlb_start, io_tlb_end;
+
+static inline bool is_swiotlb_buffer(phys_addr_t paddr)
+{
+ return paddr >= io_tlb_start && paddr < io_tlb_end;
+}
+
+bool swiotlb_map(struct device *dev, phys_addr_t *phys, dma_addr_t *dma_addr,
+ size_t size, enum dma_data_direction dir, unsigned long attrs);
+void __init swiotlb_exit(void);
unsigned int swiotlb_max_segment(void);
#else
-static inline void swiotlb_exit(void) { }
-static inline unsigned int swiotlb_max_segment(void) { return 0; }
-#endif
+#define swiotlb_force SWIOTLB_NO_FORCE
+static inline bool is_swiotlb_buffer(phys_addr_t paddr)
+{
+ return false;
+}
+static inline bool swiotlb_map(struct device *dev, phys_addr_t *phys,
+ dma_addr_t *dma_addr, size_t size, enum dma_data_direction dir,
+ unsigned long attrs)
+{
+ return false;
+}
+static inline void swiotlb_exit(void)
+{
+}
+static inline unsigned int swiotlb_max_segment(void)
+{
+ return 0;
+}
+#endif /* CONFIG_SWIOTLB */
extern void swiotlb_print_info(void);
extern void swiotlb_set_max_segment(unsigned int);
-extern const struct dma_map_ops swiotlb_dma_ops;
-
#endif /* __LINUX_SWIOTLB_H */
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 2ac3d13a915b..251979d2e709 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -296,12 +296,18 @@ asmlinkage long sys_io_getevents(aio_context_t ctx_id,
long min_nr,
long nr,
struct io_event __user *events,
- struct timespec __user *timeout);
+ struct __kernel_timespec __user *timeout);
asmlinkage long sys_io_pgetevents(aio_context_t ctx_id,
long min_nr,
long nr,
struct io_event __user *events,
- struct timespec __user *timeout,
+ struct __kernel_timespec __user *timeout,
+ const struct __aio_sigset *sig);
+asmlinkage long sys_io_pgetevents_time32(aio_context_t ctx_id,
+ long min_nr,
+ long nr,
+ struct io_event __user *events,
+ struct old_timespec32 __user *timeout,
const struct __aio_sigset *sig);
/* fs/xattr.c */
@@ -466,10 +472,16 @@ asmlinkage long sys_sendfile64(int out_fd, int in_fd,
/* fs/select.c */
asmlinkage long sys_pselect6(int, fd_set __user *, fd_set __user *,
- fd_set __user *, struct timespec __user *,
+ fd_set __user *, struct __kernel_timespec __user *,
+ void __user *);
+asmlinkage long sys_pselect6_time32(int, fd_set __user *, fd_set __user *,
+ fd_set __user *, struct old_timespec32 __user *,
void __user *);
asmlinkage long sys_ppoll(struct pollfd __user *, unsigned int,
- struct timespec __user *, const sigset_t __user *,
+ struct __kernel_timespec __user *, const sigset_t __user *,
+ size_t);
+asmlinkage long sys_ppoll_time32(struct pollfd __user *, unsigned int,
+ struct old_timespec32 __user *, const sigset_t __user *,
size_t);
/* fs/signalfd.c */
@@ -541,7 +553,7 @@ asmlinkage long sys_unshare(unsigned long unshare_flags);
/* kernel/futex.c */
asmlinkage long sys_futex(u32 __user *uaddr, int op, u32 val,
- struct timespec __user *utime, u32 __user *uaddr2,
+ struct __kernel_timespec __user *utime, u32 __user *uaddr2,
u32 val3);
asmlinkage long sys_get_robust_list(int pid,
struct robust_list_head __user * __user *head_ptr,
@@ -637,6 +649,10 @@ asmlinkage long sys_rt_sigtimedwait(const sigset_t __user *uthese,
siginfo_t __user *uinfo,
const struct __kernel_timespec __user *uts,
size_t sigsetsize);
+asmlinkage long sys_rt_sigtimedwait_time32(const sigset_t __user *uthese,
+ siginfo_t __user *uinfo,
+ const struct old_timespec32 __user *uts,
+ size_t sigsetsize);
asmlinkage long sys_rt_sigqueueinfo(pid_t pid, int sig, siginfo_t __user *uinfo);
/* kernel/sys.c */
@@ -831,6 +847,9 @@ asmlinkage long sys_accept4(int, struct sockaddr __user *, int __user *, int);
asmlinkage long sys_recvmmsg(int fd, struct mmsghdr __user *msg,
unsigned int vlen, unsigned flags,
struct __kernel_timespec __user *timeout);
+asmlinkage long sys_recvmmsg_time32(int fd, struct mmsghdr __user *msg,
+ unsigned int vlen, unsigned flags,
+ struct old_timespec32 __user *timeout);
asmlinkage long sys_wait4(pid_t pid, int __user *stat_addr,
int options, struct rusage __user *ru);
diff --git a/include/linux/time32.h b/include/linux/time32.h
index 61904a6c098f..118b9977080c 100644
--- a/include/linux/time32.h
+++ b/include/linux/time32.h
@@ -96,31 +96,6 @@ static inline int timespec_compare(const struct timespec *lhs, const struct time
return lhs->tv_nsec - rhs->tv_nsec;
}
-extern void set_normalized_timespec(struct timespec *ts, time_t sec, s64 nsec);
-
-static inline struct timespec timespec_add(struct timespec lhs,
- struct timespec rhs)
-{
- struct timespec ts_delta;
-
- set_normalized_timespec(&ts_delta, lhs.tv_sec + rhs.tv_sec,
- lhs.tv_nsec + rhs.tv_nsec);
- return ts_delta;
-}
-
-/*
- * sub = lhs - rhs, in normalized form
- */
-static inline struct timespec timespec_sub(struct timespec lhs,
- struct timespec rhs)
-{
- struct timespec ts_delta;
-
- set_normalized_timespec(&ts_delta, lhs.tv_sec - rhs.tv_sec,
- lhs.tv_nsec - rhs.tv_nsec);
- return ts_delta;
-}
-
/*
* Returns true if the timespec is norm, false if denorm:
*/
diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 29975e93fcb8..a8ab0f143ac4 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -262,18 +262,4 @@ void read_persistent_wall_and_boot_offset(struct timespec64 *wall_clock,
struct timespec64 *boot_offset);
extern int update_persistent_clock64(struct timespec64 now);
-/*
- * deprecated aliases, don't use in new code
- */
-#define getnstimeofday64(ts) ktime_get_real_ts64(ts)
-
-static inline struct timespec64 current_kernel_time64(void)
-{
- struct timespec64 ts;
-
- ktime_get_coarse_real_ts64(&ts);
-
- return ts;
-}
-
#endif
diff --git a/include/linux/timekeeping32.h b/include/linux/timekeeping32.h
index a502616f7e1c..cc59cc9e0e84 100644
--- a/include/linux/timekeeping32.h
+++ b/include/linux/timekeeping32.h
@@ -6,15 +6,6 @@
* over time so we can remove the file here.
*/
-static inline void do_gettimeofday(struct timeval *tv)
-{
- struct timespec64 now;
-
- ktime_get_real_ts64(&now);
- tv->tv_sec = now.tv_sec;
- tv->tv_usec = now.tv_nsec/1000;
-}
-
static inline unsigned long get_seconds(void)
{
return ktime_get_real_seconds();
@@ -52,10 +43,4 @@ static inline void getboottime(struct timespec *ts)
*ts = timespec64_to_timespec(ts64);
}
-/*
- * Persistent clock related interfaces
- */
-extern void read_persistent_clock(struct timespec *ts);
-extern int update_persistent_clock(struct timespec now);
-
#endif
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 55ce99ddb912..ecf584f6b82d 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -11,6 +11,7 @@
#include <linux/kernel.h>
#include <linux/thread_info.h>
+#include <crypto/hash.h>
#include <uapi/linux/uio.h>
struct page;
@@ -266,9 +267,11 @@ static inline void iov_iter_reexpand(struct iov_iter *i, size_t count)
{
i->count = count;
}
-size_t csum_and_copy_to_iter(const void *addr, size_t bytes, __wsum *csum, struct iov_iter *i);
+size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *csump, struct iov_iter *i);
size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i);
bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i);
+size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp,
+ struct iov_iter *i);
int import_iovec(int type, const struct iovec __user * uvector,
unsigned nr_segs, unsigned fast_segs,
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index fdfd04e348f6..738a0c24874f 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -246,7 +246,8 @@ static inline void wbc_attach_fdatawrite_inode(struct writeback_control *wbc,
*
* @bio is a part of the writeback in progress controlled by @wbc. Perform
* writeback specific initialization. This is used to apply the cgroup
- * writeback context.
+ * writeback context. Must be called after the bio has been associated with
+ * a device.
*/
static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio)
{
@@ -257,7 +258,7 @@ static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio)
* regular writeback instead of writing things out itself.
*/
if (wbc->wb)
- bio_associate_blkcg(bio, wbc->wb->blkcg_css);
+ bio_associate_blkg_from_css(bio, wbc->wb->blkcg_css);
}
#else /* CONFIG_CGROUP_WRITEBACK */
diff --git a/include/rdma/ib_fmr_pool.h b/include/rdma/ib_fmr_pool.h
index f62b842e6596..f8982e4e9702 100644
--- a/include/rdma/ib_fmr_pool.h
+++ b/include/rdma/ib_fmr_pool.h
@@ -88,6 +88,6 @@ struct ib_pool_fmr *ib_fmr_pool_map_phys(struct ib_fmr_pool *pool_handle,
int list_len,
u64 io_virtual_address);
-int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr);
+void ib_fmr_pool_unmap(struct ib_pool_fmr *fmr);
#endif /* IB_FMR_POOL_H */
diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h
index f6ba366051c7..fdef558e3a2d 100644
--- a/include/rdma/ib_mad.h
+++ b/include/rdma/ib_mad.h
@@ -277,6 +277,7 @@ enum ib_port_capability_mask_bits {
IB_PORT_SYS_IMAGE_GUID_SUP = 1 << 11,
IB_PORT_PKEY_SW_EXT_PORT_TRAP_SUP = 1 << 12,
IB_PORT_EXTENDED_SPEEDS_SUP = 1 << 14,
+ IB_PORT_CAP_MASK2_SUP = 1 << 15,
IB_PORT_CM_SUP = 1 << 16,
IB_PORT_SNMP_TUNNEL_SUP = 1 << 17,
IB_PORT_REINIT_SUP = 1 << 18,
@@ -295,6 +296,15 @@ enum ib_port_capability_mask_bits {
IB_PORT_HIERARCHY_INFO_SUP = 1ULL << 31,
};
+enum ib_port_capability_mask2_bits {
+ IB_PORT_SET_NODE_DESC_SUP = 1 << 0,
+ IB_PORT_EX_PORT_INFO_EX_SUP = 1 << 1,
+ IB_PORT_VIRT_SUP = 1 << 2,
+ IB_PORT_SWITCH_PORT_STATE_TABLE_SUP = 1 << 3,
+ IB_PORT_LINK_WIDTH_2X_SUP = 1 << 4,
+ IB_PORT_LINK_SPEED_HDR_SUP = 1 << 5,
+};
+
#define OPA_CLASS_PORT_INFO_PR_SUPPORT BIT(26)
struct opa_class_port_info {
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 9c0c2132a2d6..a3ceed3a040a 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -41,14 +41,11 @@
#include <linux/types.h>
#include <linux/device.h>
-#include <linux/mm.h>
#include <linux/dma-mapping.h>
#include <linux/kref.h>
#include <linux/list.h>
#include <linux/rwsem.h>
-#include <linux/scatterlist.h>
#include <linux/workqueue.h>
-#include <linux/socket.h>
#include <linux/irq_poll.h>
#include <uapi/linux/if_ether.h>
#include <net/ipv6.h>
@@ -56,7 +53,7 @@
#include <linux/string.h>
#include <linux/slab.h>
#include <linux/netdevice.h>
-
+#include <linux/refcount.h>
#include <linux/if_link.h>
#include <linux/atomic.h>
#include <linux/mmu_notifier.h>
@@ -437,6 +434,7 @@ enum ib_port_state {
enum ib_port_width {
IB_WIDTH_1X = 1,
+ IB_WIDTH_2X = 16,
IB_WIDTH_4X = 2,
IB_WIDTH_8X = 4,
IB_WIDTH_12X = 8
@@ -446,6 +444,7 @@ static inline int ib_width_enum_to_int(enum ib_port_width width)
{
switch (width) {
case IB_WIDTH_1X: return 1;
+ case IB_WIDTH_2X: return 2;
case IB_WIDTH_4X: return 4;
case IB_WIDTH_8X: return 8;
case IB_WIDTH_12X: return 12;
@@ -595,6 +594,7 @@ struct ib_port_attr {
u8 active_width;
u8 active_speed;
u8 phys_state;
+ u16 port_cap_flags2;
};
enum ib_device_modify_flags {
@@ -732,7 +732,11 @@ enum ib_rate {
IB_RATE_25_GBPS = 15,
IB_RATE_100_GBPS = 16,
IB_RATE_200_GBPS = 17,
- IB_RATE_300_GBPS = 18
+ IB_RATE_300_GBPS = 18,
+ IB_RATE_28_GBPS = 19,
+ IB_RATE_50_GBPS = 20,
+ IB_RATE_400_GBPS = 21,
+ IB_RATE_600_GBPS = 22,
};
/**
@@ -1508,6 +1512,10 @@ struct ib_ucontext {
#endif
struct ib_rdmacg_object cg_obj;
+ /*
+ * Implementation details of the RDMA core, don't use in drivers:
+ */
+ struct rdma_restrack_entry res;
};
struct ib_uobject {
@@ -2256,82 +2264,86 @@ struct ib_counters_read_attr {
struct uverbs_attr_bundle;
-struct ib_device {
- /* Do not access @dma_device directly from ULP nor from HW drivers. */
- struct device *dma_device;
-
- char name[IB_DEVICE_NAME_MAX];
-
- struct list_head event_handler_list;
- spinlock_t event_handler_lock;
-
- rwlock_t client_data_lock;
- struct list_head core_list;
- /* Access to the client_data_list is protected by the client_data_lock
- * rwlock and the lists_rwsem read-write semaphore
- */
- struct list_head client_data_list;
-
- struct ib_cache cache;
- /**
- * port_immutable is indexed by port number
- */
- struct ib_port_immutable *port_immutable;
-
- int num_comp_vectors;
-
- struct ib_port_pkey_list *port_pkey_list;
-
- struct iw_cm_verbs *iwcm;
-
+/**
+ * struct ib_device_ops - InfiniBand device operations
+ * This structure defines all the InfiniBand device operations, providers will
+ * need to define the supported operations, otherwise they will be set to null.
+ */
+struct ib_device_ops {
+ int (*post_send)(struct ib_qp *qp, const struct ib_send_wr *send_wr,
+ const struct ib_send_wr **bad_send_wr);
+ int (*post_recv)(struct ib_qp *qp, const struct ib_recv_wr *recv_wr,
+ const struct ib_recv_wr **bad_recv_wr);
+ void (*drain_rq)(struct ib_qp *qp);
+ void (*drain_sq)(struct ib_qp *qp);
+ int (*poll_cq)(struct ib_cq *cq, int num_entries, struct ib_wc *wc);
+ int (*peek_cq)(struct ib_cq *cq, int wc_cnt);
+ int (*req_notify_cq)(struct ib_cq *cq, enum ib_cq_notify_flags flags);
+ int (*req_ncomp_notif)(struct ib_cq *cq, int wc_cnt);
+ int (*post_srq_recv)(struct ib_srq *srq,
+ const struct ib_recv_wr *recv_wr,
+ const struct ib_recv_wr **bad_recv_wr);
+ int (*process_mad)(struct ib_device *device, int process_mad_flags,
+ u8 port_num, const struct ib_wc *in_wc,
+ const struct ib_grh *in_grh,
+ const struct ib_mad_hdr *in_mad, size_t in_mad_size,
+ struct ib_mad_hdr *out_mad, size_t *out_mad_size,
+ u16 *out_mad_pkey_index);
+ int (*query_device)(struct ib_device *device,
+ struct ib_device_attr *device_attr,
+ struct ib_udata *udata);
+ int (*modify_device)(struct ib_device *device, int device_modify_mask,
+ struct ib_device_modify *device_modify);
+ void (*get_dev_fw_str)(struct ib_device *device, char *str);
+ const struct cpumask *(*get_vector_affinity)(struct ib_device *ibdev,
+ int comp_vector);
+ int (*query_port)(struct ib_device *device, u8 port_num,
+ struct ib_port_attr *port_attr);
+ int (*modify_port)(struct ib_device *device, u8 port_num,
+ int port_modify_mask,
+ struct ib_port_modify *port_modify);
/**
- * alloc_hw_stats - Allocate a struct rdma_hw_stats and fill in the
- * driver initialized data. The struct is kfree()'ed by the sysfs
- * core when the device is removed. A lifespan of -1 in the return
- * struct tells the core to set a default lifespan.
+ * The following mandatory functions are used only at device
+ * registration. Keep functions such as these at the end of this
+ * structure to avoid cache line misses when accessing struct ib_device
+ * in fast paths.
*/
- struct rdma_hw_stats *(*alloc_hw_stats)(struct ib_device *device,
- u8 port_num);
+ int (*get_port_immutable)(struct ib_device *device, u8 port_num,
+ struct ib_port_immutable *immutable);
+ enum rdma_link_layer (*get_link_layer)(struct ib_device *device,
+ u8 port_num);
/**
- * get_hw_stats - Fill in the counter value(s) in the stats struct.
- * @index - The index in the value array we wish to have updated, or
- * num_counters if we want all stats updated
- * Return codes -
- * < 0 - Error, no counters updated
- * index - Updated the single counter pointed to by index
- * num_counters - Updated all counters (will reset the timestamp
- * and prevent further calls for lifespan milliseconds)
- * Drivers are allowed to update all counters in leiu of just the
- * one given in index at their option
- */
- int (*get_hw_stats)(struct ib_device *device,
- struct rdma_hw_stats *stats,
- u8 port, int index);
- int (*query_device)(struct ib_device *device,
- struct ib_device_attr *device_attr,
- struct ib_udata *udata);
- int (*query_port)(struct ib_device *device,
- u8 port_num,
- struct ib_port_attr *port_attr);
- enum rdma_link_layer (*get_link_layer)(struct ib_device *device,
- u8 port_num);
- /* When calling get_netdev, the HW vendor's driver should return the
+ * When calling get_netdev, the HW vendor's driver should return the
* net device of device @device at port @port_num or NULL if such
* a net device doesn't exist. The vendor driver should call dev_hold
* on this net device. The HW vendor's device driver must guarantee
* that this function returns NULL before the net device has finished
* NETDEV_UNREGISTER state.
*/
- struct net_device *(*get_netdev)(struct ib_device *device,
- u8 port_num);
- /* query_gid should be return GID value for @device, when @port_num
+ struct net_device *(*get_netdev)(struct ib_device *device, u8 port_num);
+ /**
+ * rdma netdev operation
+ *
+ * Driver implementing alloc_rdma_netdev or rdma_netdev_get_params
+ * must return -EOPNOTSUPP if it doesn't support the specified type.
+ */
+ struct net_device *(*alloc_rdma_netdev)(
+ struct ib_device *device, u8 port_num, enum rdma_netdev_t type,
+ const char *name, unsigned char name_assign_type,
+ void (*setup)(struct net_device *));
+
+ int (*rdma_netdev_get_params)(struct ib_device *device, u8 port_num,
+ enum rdma_netdev_t type,
+ struct rdma_netdev_alloc_params *params);
+ /**
+ * query_gid should be return GID value for @device, when @port_num
* link layer is either IB or iWarp. It is no-op if @port_num port
* is RoCE link layer.
*/
- int (*query_gid)(struct ib_device *device,
- u8 port_num, int index,
- union ib_gid *gid);
- /* When calling add_gid, the HW vendor's driver should add the gid
+ int (*query_gid)(struct ib_device *device, u8 port_num, int index,
+ union ib_gid *gid);
+ /**
+ * When calling add_gid, the HW vendor's driver should add the gid
* of device of port at gid index available at @attr. Meta-info of
* that gid (for example, the network device related to this gid) is
* available at @attr. @context allows the HW vendor driver to store
@@ -2343,213 +2355,186 @@ struct ib_device {
* concurrently for different ports. This function is only called when
* roce_gid_table is used.
*/
- int (*add_gid)(const struct ib_gid_attr *attr,
- void **context);
- /* When calling del_gid, the HW vendor's driver should delete the
+ int (*add_gid)(const struct ib_gid_attr *attr, void **context);
+ /**
+ * When calling del_gid, the HW vendor's driver should delete the
* gid of device @device at gid index gid_index of port port_num
* available in @attr.
* Upon the deletion of a GID entry, the HW vendor must free any
* allocated memory. The caller will clear @context afterwards.
* This function is only called when roce_gid_table is used.
*/
- int (*del_gid)(const struct ib_gid_attr *attr,
- void **context);
- int (*query_pkey)(struct ib_device *device,
- u8 port_num, u16 index, u16 *pkey);
- int (*modify_device)(struct ib_device *device,
- int device_modify_mask,
- struct ib_device_modify *device_modify);
- int (*modify_port)(struct ib_device *device,
- u8 port_num, int port_modify_mask,
- struct ib_port_modify *port_modify);
- struct ib_ucontext * (*alloc_ucontext)(struct ib_device *device,
- struct ib_udata *udata);
- int (*dealloc_ucontext)(struct ib_ucontext *context);
- int (*mmap)(struct ib_ucontext *context,
- struct vm_area_struct *vma);
- struct ib_pd * (*alloc_pd)(struct ib_device *device,
- struct ib_ucontext *context,
- struct ib_udata *udata);
- int (*dealloc_pd)(struct ib_pd *pd);
- struct ib_ah * (*create_ah)(struct ib_pd *pd,
- struct rdma_ah_attr *ah_attr,
- struct ib_udata *udata);
- int (*modify_ah)(struct ib_ah *ah,
- struct rdma_ah_attr *ah_attr);
- int (*query_ah)(struct ib_ah *ah,
- struct rdma_ah_attr *ah_attr);
- int (*destroy_ah)(struct ib_ah *ah);
- struct ib_srq * (*create_srq)(struct ib_pd *pd,
- struct ib_srq_init_attr *srq_init_attr,
- struct ib_udata *udata);
- int (*modify_srq)(struct ib_srq *srq,
- struct ib_srq_attr *srq_attr,
- enum ib_srq_attr_mask srq_attr_mask,
- struct ib_udata *udata);
- int (*query_srq)(struct ib_srq *srq,
- struct ib_srq_attr *srq_attr);
- int (*destroy_srq)(struct ib_srq *srq);
- int (*post_srq_recv)(struct ib_srq *srq,
- const struct ib_recv_wr *recv_wr,
- const struct ib_recv_wr **bad_recv_wr);
- struct ib_qp * (*create_qp)(struct ib_pd *pd,
- struct ib_qp_init_attr *qp_init_attr,
- struct ib_udata *udata);
- int (*modify_qp)(struct ib_qp *qp,
- struct ib_qp_attr *qp_attr,
- int qp_attr_mask,
- struct ib_udata *udata);
- int (*query_qp)(struct ib_qp *qp,
- struct ib_qp_attr *qp_attr,
- int qp_attr_mask,
- struct ib_qp_init_attr *qp_init_attr);
- int (*destroy_qp)(struct ib_qp *qp);
- int (*post_send)(struct ib_qp *qp,
- const struct ib_send_wr *send_wr,
- const struct ib_send_wr **bad_send_wr);
- int (*post_recv)(struct ib_qp *qp,
- const struct ib_recv_wr *recv_wr,
- const struct ib_recv_wr **bad_recv_wr);
- struct ib_cq * (*create_cq)(struct ib_device *device,
- const struct ib_cq_init_attr *attr,
- struct ib_ucontext *context,
- struct ib_udata *udata);
- int (*modify_cq)(struct ib_cq *cq, u16 cq_count,
- u16 cq_period);
- int (*destroy_cq)(struct ib_cq *cq);
- int (*resize_cq)(struct ib_cq *cq, int cqe,
- struct ib_udata *udata);
- int (*poll_cq)(struct ib_cq *cq, int num_entries,
- struct ib_wc *wc);
- int (*peek_cq)(struct ib_cq *cq, int wc_cnt);
- int (*req_notify_cq)(struct ib_cq *cq,
- enum ib_cq_notify_flags flags);
- int (*req_ncomp_notif)(struct ib_cq *cq,
- int wc_cnt);
- struct ib_mr * (*get_dma_mr)(struct ib_pd *pd,
- int mr_access_flags);
- struct ib_mr * (*reg_user_mr)(struct ib_pd *pd,
- u64 start, u64 length,
- u64 virt_addr,
- int mr_access_flags,
- struct ib_udata *udata);
- int (*rereg_user_mr)(struct ib_mr *mr,
- int flags,
- u64 start, u64 length,
- u64 virt_addr,
- int mr_access_flags,
- struct ib_pd *pd,
- struct ib_udata *udata);
- int (*dereg_mr)(struct ib_mr *mr);
- struct ib_mr * (*alloc_mr)(struct ib_pd *pd,
- enum ib_mr_type mr_type,
- u32 max_num_sg);
- int (*map_mr_sg)(struct ib_mr *mr,
- struct scatterlist *sg,
- int sg_nents,
- unsigned int *sg_offset);
- struct ib_mw * (*alloc_mw)(struct ib_pd *pd,
- enum ib_mw_type type,
- struct ib_udata *udata);
- int (*dealloc_mw)(struct ib_mw *mw);
- struct ib_fmr * (*alloc_fmr)(struct ib_pd *pd,
- int mr_access_flags,
- struct ib_fmr_attr *fmr_attr);
- int (*map_phys_fmr)(struct ib_fmr *fmr,
- u64 *page_list, int list_len,
- u64 iova);
- int (*unmap_fmr)(struct list_head *fmr_list);
- int (*dealloc_fmr)(struct ib_fmr *fmr);
- int (*attach_mcast)(struct ib_qp *qp,
- union ib_gid *gid,
- u16 lid);
- int (*detach_mcast)(struct ib_qp *qp,
- union ib_gid *gid,
- u16 lid);
- int (*process_mad)(struct ib_device *device,
- int process_mad_flags,
- u8 port_num,
- const struct ib_wc *in_wc,
- const struct ib_grh *in_grh,
- const struct ib_mad_hdr *in_mad,
- size_t in_mad_size,
- struct ib_mad_hdr *out_mad,
- size_t *out_mad_size,
- u16 *out_mad_pkey_index);
- struct ib_xrcd * (*alloc_xrcd)(struct ib_device *device,
- struct ib_ucontext *ucontext,
- struct ib_udata *udata);
- int (*dealloc_xrcd)(struct ib_xrcd *xrcd);
- struct ib_flow * (*create_flow)(struct ib_qp *qp,
- struct ib_flow_attr
- *flow_attr,
- int domain,
- struct ib_udata *udata);
- int (*destroy_flow)(struct ib_flow *flow_id);
- int (*check_mr_status)(struct ib_mr *mr, u32 check_mask,
- struct ib_mr_status *mr_status);
- void (*disassociate_ucontext)(struct ib_ucontext *ibcontext);
- void (*drain_rq)(struct ib_qp *qp);
- void (*drain_sq)(struct ib_qp *qp);
- int (*set_vf_link_state)(struct ib_device *device, int vf, u8 port,
- int state);
- int (*get_vf_config)(struct ib_device *device, int vf, u8 port,
- struct ifla_vf_info *ivf);
- int (*get_vf_stats)(struct ib_device *device, int vf, u8 port,
- struct ifla_vf_stats *stats);
- int (*set_vf_guid)(struct ib_device *device, int vf, u8 port, u64 guid,
- int type);
- struct ib_wq * (*create_wq)(struct ib_pd *pd,
- struct ib_wq_init_attr *init_attr,
- struct ib_udata *udata);
- int (*destroy_wq)(struct ib_wq *wq);
- int (*modify_wq)(struct ib_wq *wq,
- struct ib_wq_attr *attr,
- u32 wq_attr_mask,
- struct ib_udata *udata);
- struct ib_rwq_ind_table * (*create_rwq_ind_table)(struct ib_device *device,
- struct ib_rwq_ind_table_init_attr *init_attr,
- struct ib_udata *udata);
- int (*destroy_rwq_ind_table)(struct ib_rwq_ind_table *wq_ind_table);
- struct ib_flow_action * (*create_flow_action_esp)(struct ib_device *device,
- const struct ib_flow_action_attrs_esp *attr,
- struct uverbs_attr_bundle *attrs);
- int (*destroy_flow_action)(struct ib_flow_action *action);
- int (*modify_flow_action_esp)(struct ib_flow_action *action,
- const struct ib_flow_action_attrs_esp *attr,
- struct uverbs_attr_bundle *attrs);
- struct ib_dm * (*alloc_dm)(struct ib_device *device,
- struct ib_ucontext *context,
- struct ib_dm_alloc_attr *attr,
- struct uverbs_attr_bundle *attrs);
- int (*dealloc_dm)(struct ib_dm *dm);
- struct ib_mr * (*reg_dm_mr)(struct ib_pd *pd, struct ib_dm *dm,
- struct ib_dm_mr_attr *attr,
- struct uverbs_attr_bundle *attrs);
- struct ib_counters * (*create_counters)(struct ib_device *device,
- struct uverbs_attr_bundle *attrs);
- int (*destroy_counters)(struct ib_counters *counters);
- int (*read_counters)(struct ib_counters *counters,
- struct ib_counters_read_attr *counters_read_attr,
- struct uverbs_attr_bundle *attrs);
+ int (*del_gid)(const struct ib_gid_attr *attr, void **context);
+ int (*query_pkey)(struct ib_device *device, u8 port_num, u16 index,
+ u16 *pkey);
+ struct ib_ucontext *(*alloc_ucontext)(struct ib_device *device,
+ struct ib_udata *udata);
+ int (*dealloc_ucontext)(struct ib_ucontext *context);
+ int (*mmap)(struct ib_ucontext *context, struct vm_area_struct *vma);
+ void (*disassociate_ucontext)(struct ib_ucontext *ibcontext);
+ struct ib_pd *(*alloc_pd)(struct ib_device *device,
+ struct ib_ucontext *context,
+ struct ib_udata *udata);
+ int (*dealloc_pd)(struct ib_pd *pd);
+ struct ib_ah *(*create_ah)(struct ib_pd *pd,
+ struct rdma_ah_attr *ah_attr, u32 flags,
+ struct ib_udata *udata);
+ int (*modify_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
+ int (*query_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
+ int (*destroy_ah)(struct ib_ah *ah, u32 flags);
+ struct ib_srq *(*create_srq)(struct ib_pd *pd,
+ struct ib_srq_init_attr *srq_init_attr,
+ struct ib_udata *udata);
+ int (*modify_srq)(struct ib_srq *srq, struct ib_srq_attr *srq_attr,
+ enum ib_srq_attr_mask srq_attr_mask,
+ struct ib_udata *udata);
+ int (*query_srq)(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
+ int (*destroy_srq)(struct ib_srq *srq);
+ struct ib_qp *(*create_qp)(struct ib_pd *pd,
+ struct ib_qp_init_attr *qp_init_attr,
+ struct ib_udata *udata);
+ int (*modify_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
+ int qp_attr_mask, struct ib_udata *udata);
+ int (*query_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
+ int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr);
+ int (*destroy_qp)(struct ib_qp *qp);
+ struct ib_cq *(*create_cq)(struct ib_device *device,
+ const struct ib_cq_init_attr *attr,
+ struct ib_ucontext *context,
+ struct ib_udata *udata);
+ int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period);
+ int (*destroy_cq)(struct ib_cq *cq);
+ int (*resize_cq)(struct ib_cq *cq, int cqe, struct ib_udata *udata);
+ struct ib_mr *(*get_dma_mr)(struct ib_pd *pd, int mr_access_flags);
+ struct ib_mr *(*reg_user_mr)(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt_addr, int mr_access_flags,
+ struct ib_udata *udata);
+ int (*rereg_user_mr)(struct ib_mr *mr, int flags, u64 start, u64 length,
+ u64 virt_addr, int mr_access_flags,
+ struct ib_pd *pd, struct ib_udata *udata);
+ int (*dereg_mr)(struct ib_mr *mr);
+ struct ib_mr *(*alloc_mr)(struct ib_pd *pd, enum ib_mr_type mr_type,
+ u32 max_num_sg);
+ int (*advise_mr)(struct ib_pd *pd,
+ enum ib_uverbs_advise_mr_advice advice, u32 flags,
+ struct ib_sge *sg_list, u32 num_sge,
+ struct uverbs_attr_bundle *attrs);
+ int (*map_mr_sg)(struct ib_mr *mr, struct scatterlist *sg, int sg_nents,
+ unsigned int *sg_offset);
+ int (*check_mr_status)(struct ib_mr *mr, u32 check_mask,
+ struct ib_mr_status *mr_status);
+ struct ib_mw *(*alloc_mw)(struct ib_pd *pd, enum ib_mw_type type,
+ struct ib_udata *udata);
+ int (*dealloc_mw)(struct ib_mw *mw);
+ struct ib_fmr *(*alloc_fmr)(struct ib_pd *pd, int mr_access_flags,
+ struct ib_fmr_attr *fmr_attr);
+ int (*map_phys_fmr)(struct ib_fmr *fmr, u64 *page_list, int list_len,
+ u64 iova);
+ int (*unmap_fmr)(struct list_head *fmr_list);
+ int (*dealloc_fmr)(struct ib_fmr *fmr);
+ int (*attach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid);
+ int (*detach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid);
+ struct ib_xrcd *(*alloc_xrcd)(struct ib_device *device,
+ struct ib_ucontext *ucontext,
+ struct ib_udata *udata);
+ int (*dealloc_xrcd)(struct ib_xrcd *xrcd);
+ struct ib_flow *(*create_flow)(struct ib_qp *qp,
+ struct ib_flow_attr *flow_attr,
+ int domain, struct ib_udata *udata);
+ int (*destroy_flow)(struct ib_flow *flow_id);
+ struct ib_flow_action *(*create_flow_action_esp)(
+ struct ib_device *device,
+ const struct ib_flow_action_attrs_esp *attr,
+ struct uverbs_attr_bundle *attrs);
+ int (*destroy_flow_action)(struct ib_flow_action *action);
+ int (*modify_flow_action_esp)(
+ struct ib_flow_action *action,
+ const struct ib_flow_action_attrs_esp *attr,
+ struct uverbs_attr_bundle *attrs);
+ int (*set_vf_link_state)(struct ib_device *device, int vf, u8 port,
+ int state);
+ int (*get_vf_config)(struct ib_device *device, int vf, u8 port,
+ struct ifla_vf_info *ivf);
+ int (*get_vf_stats)(struct ib_device *device, int vf, u8 port,
+ struct ifla_vf_stats *stats);
+ int (*set_vf_guid)(struct ib_device *device, int vf, u8 port, u64 guid,
+ int type);
+ struct ib_wq *(*create_wq)(struct ib_pd *pd,
+ struct ib_wq_init_attr *init_attr,
+ struct ib_udata *udata);
+ int (*destroy_wq)(struct ib_wq *wq);
+ int (*modify_wq)(struct ib_wq *wq, struct ib_wq_attr *attr,
+ u32 wq_attr_mask, struct ib_udata *udata);
+ struct ib_rwq_ind_table *(*create_rwq_ind_table)(
+ struct ib_device *device,
+ struct ib_rwq_ind_table_init_attr *init_attr,
+ struct ib_udata *udata);
+ int (*destroy_rwq_ind_table)(struct ib_rwq_ind_table *wq_ind_table);
+ struct ib_dm *(*alloc_dm)(struct ib_device *device,
+ struct ib_ucontext *context,
+ struct ib_dm_alloc_attr *attr,
+ struct uverbs_attr_bundle *attrs);
+ int (*dealloc_dm)(struct ib_dm *dm);
+ struct ib_mr *(*reg_dm_mr)(struct ib_pd *pd, struct ib_dm *dm,
+ struct ib_dm_mr_attr *attr,
+ struct uverbs_attr_bundle *attrs);
+ struct ib_counters *(*create_counters)(
+ struct ib_device *device, struct uverbs_attr_bundle *attrs);
+ int (*destroy_counters)(struct ib_counters *counters);
+ int (*read_counters)(struct ib_counters *counters,
+ struct ib_counters_read_attr *counters_read_attr,
+ struct uverbs_attr_bundle *attrs);
+ /**
+ * alloc_hw_stats - Allocate a struct rdma_hw_stats and fill in the
+ * driver initialized data. The struct is kfree()'ed by the sysfs
+ * core when the device is removed. A lifespan of -1 in the return
+ * struct tells the core to set a default lifespan.
+ */
+ struct rdma_hw_stats *(*alloc_hw_stats)(struct ib_device *device,
+ u8 port_num);
+ /**
+ * get_hw_stats - Fill in the counter value(s) in the stats struct.
+ * @index - The index in the value array we wish to have updated, or
+ * num_counters if we want all stats updated
+ * Return codes -
+ * < 0 - Error, no counters updated
+ * index - Updated the single counter pointed to by index
+ * num_counters - Updated all counters (will reset the timestamp
+ * and prevent further calls for lifespan milliseconds)
+ * Drivers are allowed to update all counters in leiu of just the
+ * one given in index at their option
+ */
+ int (*get_hw_stats)(struct ib_device *device,
+ struct rdma_hw_stats *stats, u8 port, int index);
+};
+
+struct ib_device {
+ /* Do not access @dma_device directly from ULP nor from HW drivers. */
+ struct device *dma_device;
+ struct ib_device_ops ops;
+ char name[IB_DEVICE_NAME_MAX];
+
+ struct list_head event_handler_list;
+ spinlock_t event_handler_lock;
+
+ rwlock_t client_data_lock;
+ struct list_head core_list;
+ /* Access to the client_data_list is protected by the client_data_lock
+ * rwlock and the lists_rwsem read-write semaphore
+ */
+ struct list_head client_data_list;
+ struct ib_cache cache;
/**
- * rdma netdev operation
- *
- * Driver implementing alloc_rdma_netdev or rdma_netdev_get_params
- * must return -EOPNOTSUPP if it doesn't support the specified type.
+ * port_immutable is indexed by port number
*/
- struct net_device *(*alloc_rdma_netdev)(
- struct ib_device *device,
- u8 port_num,
- enum rdma_netdev_t type,
- const char *name,
- unsigned char name_assign_type,
- void (*setup)(struct net_device *));
+ struct ib_port_immutable *port_immutable;
- int (*rdma_netdev_get_params)(struct ib_device *device, u8 port_num,
- enum rdma_netdev_t type,
- struct rdma_netdev_alloc_params *params);
+ int num_comp_vectors;
+
+ struct ib_port_pkey_list *port_pkey_list;
+
+ struct iw_cm_verbs *iwcm;
struct module *owner;
struct device dev;
@@ -2592,19 +2577,14 @@ struct ib_device {
*/
struct rdma_restrack_root res;
- /**
- * The following mandatory functions are used only at device
- * registration. Keep functions such as these at the end of this
- * structure to avoid cache line misses when accessing struct ib_device
- * in fast paths.
- */
- int (*get_port_immutable)(struct ib_device *, u8, struct ib_port_immutable *);
- void (*get_dev_fw_str)(struct ib_device *, char *str);
- const struct cpumask *(*get_vector_affinity)(struct ib_device *ibdev,
- int comp_vector);
-
- const struct uverbs_object_tree_def *const *driver_specs;
+ const struct uapi_definition *driver_def;
enum rdma_driver_id driver_id;
+ /*
+ * Provides synchronization between device unregistration and netlink
+ * commands on a device. To be used only by core.
+ */
+ refcount_t refcount;
+ struct completion unreg_completion;
};
struct ib_client {
@@ -2653,6 +2633,8 @@ void ib_unregister_client(struct ib_client *client);
void *ib_get_client_data(struct ib_device *device, struct ib_client *client);
void ib_set_client_data(struct ib_device *device, struct ib_client *client,
void *data);
+void ib_set_device_ops(struct ib_device *device,
+ const struct ib_device_ops *ops);
#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
@@ -3109,7 +3091,7 @@ static inline bool rdma_cap_roce_gid_table(const struct ib_device *device,
u8 port_num)
{
return rdma_protocol_roce(device, port_num) &&
- device->add_gid && device->del_gid;
+ device->ops.add_gid && device->ops.del_gid;
}
/*
@@ -3169,15 +3151,22 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
__ib_alloc_pd((device), (flags), KBUILD_MODNAME)
void ib_dealloc_pd(struct ib_pd *pd);
+enum rdma_create_ah_flags {
+ /* In a sleepable context */
+ RDMA_CREATE_AH_SLEEPABLE = BIT(0),
+};
+
/**
* rdma_create_ah - Creates an address handle for the given address vector.
* @pd: The protection domain associated with the address handle.
* @ah_attr: The attributes of the address vector.
+ * @flags: Create address handle flags (see enum rdma_create_ah_flags).
*
* The address handle is used to reference a local or global destination
* in all UD QP post sends.
*/
-struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr);
+struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
+ u32 flags);
/**
* rdma_create_user_ah - Creates an address handle for the given address vector.
@@ -3267,11 +3256,17 @@ int rdma_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
*/
int rdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
+enum rdma_destroy_ah_flags {
+ /* In a sleepable context */
+ RDMA_DESTROY_AH_SLEEPABLE = BIT(0),
+};
+
/**
* rdma_destroy_ah - Destroys an address handle.
* @ah: The address handle to destroy.
+ * @flags: Destroy address handle flags (see enum rdma_destroy_ah_flags).
*/
-int rdma_destroy_ah(struct ib_ah *ah);
+int rdma_destroy_ah(struct ib_ah *ah, u32 flags);
/**
* ib_create_srq - Creates a SRQ associated with the specified protection
@@ -3333,7 +3328,8 @@ static inline int ib_post_srq_recv(struct ib_srq *srq,
{
const struct ib_recv_wr *dummy;
- return srq->device->post_srq_recv(srq, recv_wr, bad_recv_wr ? : &dummy);
+ return srq->device->ops.post_srq_recv(srq, recv_wr,
+ bad_recv_wr ? : &dummy);
}
/**
@@ -3436,7 +3432,7 @@ static inline int ib_post_send(struct ib_qp *qp,
{
const struct ib_send_wr *dummy;
- return qp->device->post_send(qp, send_wr, bad_send_wr ? : &dummy);
+ return qp->device->ops.post_send(qp, send_wr, bad_send_wr ? : &dummy);
}
/**
@@ -3453,7 +3449,7 @@ static inline int ib_post_recv(struct ib_qp *qp,
{
const struct ib_recv_wr *dummy;
- return qp->device->post_recv(qp, recv_wr, bad_recv_wr ? : &dummy);
+ return qp->device->ops.post_recv(qp, recv_wr, bad_recv_wr ? : &dummy);
}
struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private,
@@ -3526,7 +3522,7 @@ int ib_destroy_cq(struct ib_cq *cq);
static inline int ib_poll_cq(struct ib_cq *cq, int num_entries,
struct ib_wc *wc)
{
- return cq->device->poll_cq(cq, num_entries, wc);
+ return cq->device->ops.poll_cq(cq, num_entries, wc);
}
/**
@@ -3559,7 +3555,7 @@ static inline int ib_poll_cq(struct ib_cq *cq, int num_entries,
static inline int ib_req_notify_cq(struct ib_cq *cq,
enum ib_cq_notify_flags flags)
{
- return cq->device->req_notify_cq(cq, flags);
+ return cq->device->ops.req_notify_cq(cq, flags);
}
/**
@@ -3571,8 +3567,8 @@ static inline int ib_req_notify_cq(struct ib_cq *cq,
*/
static inline int ib_req_ncomp_notif(struct ib_cq *cq, int wc_cnt)
{
- return cq->device->req_ncomp_notif ?
- cq->device->req_ncomp_notif(cq, wc_cnt) :
+ return cq->device->ops.req_ncomp_notif ?
+ cq->device->ops.req_ncomp_notif(cq, wc_cnt) :
-ENOSYS;
}
@@ -3836,7 +3832,7 @@ static inline int ib_map_phys_fmr(struct ib_fmr *fmr,
u64 *page_list, int list_len,
u64 iova)
{
- return fmr->device->map_phys_fmr(fmr, page_list, list_len, iova);
+ return fmr->device->ops.map_phys_fmr(fmr, page_list, list_len, iova);
}
/**
@@ -4189,10 +4185,10 @@ static inline const struct cpumask *
ib_get_vector_affinity(struct ib_device *device, int comp_vector)
{
if (comp_vector < 0 || comp_vector >= device->num_comp_vectors ||
- !device->get_vector_affinity)
+ !device->ops.get_vector_affinity)
return NULL;
- return device->get_vector_affinity(device, comp_vector);
+ return device->ops.get_vector_affinity(device, comp_vector);
}
@@ -4204,10 +4200,10 @@ ib_get_vector_affinity(struct ib_device *device, int comp_vector)
*/
void rdma_roce_rescan_device(struct ib_device *ibdev);
-struct ib_ucontext *ib_uverbs_get_ucontext(struct ib_uverbs_file *ufile);
+struct ib_ucontext *ib_uverbs_get_ucontext_file(struct ib_uverbs_file *ufile);
+
-int uverbs_destroy_def_handler(struct ib_uverbs_file *file,
- struct uverbs_attr_bundle *attrs);
+int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs);
struct net_device *rdma_alloc_netdev(struct ib_device *device, u8 port_num,
enum rdma_netdev_t type, const char *name,
diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h
index 3584d0816fcd..dd0ed8048bb4 100644
--- a/include/rdma/rdma_vt.h
+++ b/include/rdma/rdma_vt.h
@@ -269,6 +269,13 @@ struct rvt_driver_provided {
void * (*qp_priv_alloc)(struct rvt_dev_info *rdi, struct rvt_qp *qp);
/*
+ * Init a struture allocated with qp_priv_alloc(). This should be
+ * called after all qp fields have been initialized in rdmavt.
+ */
+ int (*qp_priv_init)(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+ struct ib_qp_init_attr *init_attr);
+
+ /*
* Free the driver's private qp structure.
*/
void (*qp_priv_free)(struct rvt_dev_info *rdi, struct rvt_qp *qp);
diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h
index 2638fa7cd702..8f179be9d9a9 100644
--- a/include/rdma/restrack.h
+++ b/include/rdma/restrack.h
@@ -39,6 +39,10 @@ enum rdma_restrack_type {
*/
RDMA_RESTRACK_MR,
/**
+ * @RDMA_RESTRACK_CTX: Verbs contexts (CTX)
+ */
+ RDMA_RESTRACK_CTX,
+ /**
* @RDMA_RESTRACK_MAX: Last entry, used for array dclarations
*/
RDMA_RESTRACK_MAX
@@ -112,6 +116,10 @@ struct rdma_restrack_entry {
* @type: various objects in restrack database
*/
enum rdma_restrack_type type;
+ /**
+ * @user: user resource
+ */
+ bool user;
};
/**
@@ -136,11 +144,8 @@ int rdma_restrack_count(struct rdma_restrack_root *res,
enum rdma_restrack_type type,
struct pid_namespace *ns);
-/**
- * rdma_restrack_add() - add object to the reource tracking database
- * @res: resource entry
- */
-void rdma_restrack_add(struct rdma_restrack_entry *res);
+void rdma_restrack_kadd(struct rdma_restrack_entry *res);
+void rdma_restrack_uadd(struct rdma_restrack_entry *res);
/**
* rdma_restrack_del() - delete object from the reource tracking database
@@ -155,7 +160,7 @@ void rdma_restrack_del(struct rdma_restrack_entry *res);
*/
static inline bool rdma_is_kernel_res(struct rdma_restrack_entry *res)
{
- return !res->task;
+ return !res->user;
}
/**
diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h
index 84d3d15f1f38..27da906beea7 100644
--- a/include/rdma/uverbs_ioctl.h
+++ b/include/rdma/uverbs_ioctl.h
@@ -79,6 +79,8 @@ struct uverbs_attr_spec {
*/
u8 alloc_and_copy:1;
u8 mandatory:1;
+ /* True if this is from UVERBS_ATTR_UHW */
+ u8 is_udata:1;
union {
struct {
@@ -140,6 +142,13 @@ struct uverbs_attr_spec {
*
* The tree encodes multiple types, and uses a scheme where OBJ_ID,0,0 returns
* the object slot, and OBJ_ID,METH_ID,0 and returns the method slot.
+ *
+ * This also encodes the tables for the write() and write() extended commands
+ * using the coding
+ * OBJ_ID,UVERBS_API_METHOD_IS_WRITE,command #
+ * OBJ_ID,UVERBS_API_METHOD_IS_WRITE_EX,command_ex #
+ * ie the WRITE path is treated as a special method type in the ioctl
+ * framework.
*/
enum uapi_radix_data {
UVERBS_API_NS_FLAG = 1U << UVERBS_ID_NS_SHIFT,
@@ -147,12 +156,16 @@ enum uapi_radix_data {
UVERBS_API_ATTR_KEY_BITS = 6,
UVERBS_API_ATTR_KEY_MASK = GENMASK(UVERBS_API_ATTR_KEY_BITS - 1, 0),
UVERBS_API_ATTR_BKEY_LEN = (1 << UVERBS_API_ATTR_KEY_BITS) - 1,
+ UVERBS_API_WRITE_KEY_NUM = 1 << UVERBS_API_ATTR_KEY_BITS,
UVERBS_API_METHOD_KEY_BITS = 5,
UVERBS_API_METHOD_KEY_SHIFT = UVERBS_API_ATTR_KEY_BITS,
- UVERBS_API_METHOD_KEY_NUM_CORE = 24,
- UVERBS_API_METHOD_KEY_NUM_DRIVER = (1 << UVERBS_API_METHOD_KEY_BITS) -
- UVERBS_API_METHOD_KEY_NUM_CORE,
+ UVERBS_API_METHOD_KEY_NUM_CORE = 22,
+ UVERBS_API_METHOD_IS_WRITE = 30 << UVERBS_API_METHOD_KEY_SHIFT,
+ UVERBS_API_METHOD_IS_WRITE_EX = 31 << UVERBS_API_METHOD_KEY_SHIFT,
+ UVERBS_API_METHOD_KEY_NUM_DRIVER =
+ (UVERBS_API_METHOD_IS_WRITE >> UVERBS_API_METHOD_KEY_SHIFT) -
+ UVERBS_API_METHOD_KEY_NUM_CORE,
UVERBS_API_METHOD_KEY_MASK = GENMASK(
UVERBS_API_METHOD_KEY_BITS + UVERBS_API_METHOD_KEY_SHIFT - 1,
UVERBS_API_METHOD_KEY_SHIFT),
@@ -205,7 +218,22 @@ static inline __attribute_const__ u32 uapi_key_ioctl_method(u32 id)
return id << UVERBS_API_METHOD_KEY_SHIFT;
}
-static inline __attribute_const__ u32 uapi_key_attr_to_method(u32 attr_key)
+static inline __attribute_const__ u32 uapi_key_write_method(u32 id)
+{
+ if (id >= UVERBS_API_WRITE_KEY_NUM)
+ return UVERBS_API_KEY_ERR;
+ return UVERBS_API_METHOD_IS_WRITE | id;
+}
+
+static inline __attribute_const__ u32 uapi_key_write_ex_method(u32 id)
+{
+ if (id >= UVERBS_API_WRITE_KEY_NUM)
+ return UVERBS_API_KEY_ERR;
+ return UVERBS_API_METHOD_IS_WRITE_EX | id;
+}
+
+static inline __attribute_const__ u32
+uapi_key_attr_to_ioctl_method(u32 attr_key)
{
return attr_key &
(UVERBS_API_OBJ_KEY_MASK | UVERBS_API_METHOD_KEY_MASK);
@@ -213,10 +241,23 @@ static inline __attribute_const__ u32 uapi_key_attr_to_method(u32 attr_key)
static inline __attribute_const__ bool uapi_key_is_ioctl_method(u32 key)
{
- return (key & UVERBS_API_METHOD_KEY_MASK) != 0 &&
+ unsigned int method = key & UVERBS_API_METHOD_KEY_MASK;
+
+ return method != 0 && method < UVERBS_API_METHOD_IS_WRITE &&
(key & UVERBS_API_ATTR_KEY_MASK) == 0;
}
+static inline __attribute_const__ bool uapi_key_is_write_method(u32 key)
+{
+ return (key & UVERBS_API_METHOD_KEY_MASK) == UVERBS_API_METHOD_IS_WRITE;
+}
+
+static inline __attribute_const__ bool uapi_key_is_write_ex_method(u32 key)
+{
+ return (key & UVERBS_API_METHOD_KEY_MASK) ==
+ UVERBS_API_METHOD_IS_WRITE_EX;
+}
+
static inline __attribute_const__ u32 uapi_key_attrs_start(u32 ioctl_method_key)
{
/* 0 is the method slot itself */
@@ -246,9 +287,12 @@ static inline __attribute_const__ u32 uapi_key_attr(u32 id)
return id;
}
+/* Only true for ioctl methods */
static inline __attribute_const__ bool uapi_key_is_attr(u32 key)
{
- return (key & UVERBS_API_METHOD_KEY_MASK) != 0 &&
+ unsigned int method = key & UVERBS_API_METHOD_KEY_MASK;
+
+ return method != 0 && method < UVERBS_API_METHOD_IS_WRITE &&
(key & UVERBS_API_ATTR_KEY_MASK) != 0;
}
@@ -285,8 +329,7 @@ struct uverbs_method_def {
u32 flags;
size_t num_attrs;
const struct uverbs_attr_def * const (*attrs)[];
- int (*handler)(struct ib_uverbs_file *ufile,
- struct uverbs_attr_bundle *ctx);
+ int (*handler)(struct uverbs_attr_bundle *attrs);
};
struct uverbs_object_def {
@@ -296,11 +339,132 @@ struct uverbs_object_def {
const struct uverbs_method_def * const (*methods)[];
};
-struct uverbs_object_tree_def {
- size_t num_objects;
- const struct uverbs_object_def * const (*objects)[];
+enum uapi_definition_kind {
+ UAPI_DEF_END = 0,
+ UAPI_DEF_OBJECT_START,
+ UAPI_DEF_WRITE,
+ UAPI_DEF_CHAIN_OBJ_TREE,
+ UAPI_DEF_CHAIN,
+ UAPI_DEF_IS_SUPPORTED_FUNC,
+ UAPI_DEF_IS_SUPPORTED_DEV_FN,
+};
+
+enum uapi_definition_scope {
+ UAPI_SCOPE_OBJECT = 1,
+ UAPI_SCOPE_METHOD = 2,
};
+struct uapi_definition {
+ u8 kind;
+ u8 scope;
+ union {
+ struct {
+ u16 object_id;
+ } object_start;
+ struct {
+ u16 command_num;
+ u8 is_ex:1;
+ u8 has_udata:1;
+ u8 has_resp:1;
+ u8 req_size;
+ u8 resp_size;
+ } write;
+ };
+
+ union {
+ bool (*func_is_supported)(struct ib_device *device);
+ int (*func_write)(struct uverbs_attr_bundle *attrs);
+ const struct uapi_definition *chain;
+ const struct uverbs_object_def *chain_obj_tree;
+ size_t needs_fn_offset;
+ };
+};
+
+/* Define things connected to object_id */
+#define DECLARE_UVERBS_OBJECT(_object_id, ...) \
+ { \
+ .kind = UAPI_DEF_OBJECT_START, \
+ .object_start = { .object_id = _object_id }, \
+ }, \
+ ##__VA_ARGS__
+
+/* Use in a var_args of DECLARE_UVERBS_OBJECT */
+#define DECLARE_UVERBS_WRITE(_command_num, _func, _cmd_desc, ...) \
+ { \
+ .kind = UAPI_DEF_WRITE, \
+ .scope = UAPI_SCOPE_OBJECT, \
+ .write = { .is_ex = 0, .command_num = _command_num }, \
+ .func_write = _func, \
+ _cmd_desc, \
+ }, \
+ ##__VA_ARGS__
+
+/* Use in a var_args of DECLARE_UVERBS_OBJECT */
+#define DECLARE_UVERBS_WRITE_EX(_command_num, _func, _cmd_desc, ...) \
+ { \
+ .kind = UAPI_DEF_WRITE, \
+ .scope = UAPI_SCOPE_OBJECT, \
+ .write = { .is_ex = 1, .command_num = _command_num }, \
+ .func_write = _func, \
+ _cmd_desc, \
+ }, \
+ ##__VA_ARGS__
+
+/*
+ * Object is only supported if the function pointer named ibdev_fn in struct
+ * ib_device is not NULL.
+ */
+#define UAPI_DEF_OBJ_NEEDS_FN(ibdev_fn) \
+ { \
+ .kind = UAPI_DEF_IS_SUPPORTED_DEV_FN, \
+ .scope = UAPI_SCOPE_OBJECT, \
+ .needs_fn_offset = \
+ offsetof(struct ib_device_ops, ibdev_fn) + \
+ BUILD_BUG_ON_ZERO( \
+ sizeof(((struct ib_device_ops *)0)->ibdev_fn) != \
+ sizeof(void *)), \
+ }
+
+/*
+ * Method is only supported if the function pointer named ibdev_fn in struct
+ * ib_device is not NULL.
+ */
+#define UAPI_DEF_METHOD_NEEDS_FN(ibdev_fn) \
+ { \
+ .kind = UAPI_DEF_IS_SUPPORTED_DEV_FN, \
+ .scope = UAPI_SCOPE_METHOD, \
+ .needs_fn_offset = \
+ offsetof(struct ib_device_ops, ibdev_fn) + \
+ BUILD_BUG_ON_ZERO( \
+ sizeof(((struct ib_device_ops *)0)->ibdev_fn) != \
+ sizeof(void *)), \
+ }
+
+/* Call a function to determine if the entire object is supported or not */
+#define UAPI_DEF_IS_OBJ_SUPPORTED(_func) \
+ { \
+ .kind = UAPI_DEF_IS_SUPPORTED_FUNC, \
+ .scope = UAPI_SCOPE_OBJECT, .func_is_supported = _func, \
+ }
+
+/* Include another struct uapi_definition in this one */
+#define UAPI_DEF_CHAIN(_def_var) \
+ { \
+ .kind = UAPI_DEF_CHAIN, .chain = _def_var, \
+ }
+
+/* Temporary until the tree base description is replaced */
+#define UAPI_DEF_CHAIN_OBJ_TREE(_object_enum, _object_ptr, ...) \
+ { \
+ .kind = UAPI_DEF_CHAIN_OBJ_TREE, \
+ .object_start = { .object_id = _object_enum }, \
+ .chain_obj_tree = _object_ptr, \
+ }, \
+ ##__VA_ARGS__
+#define UAPI_DEF_CHAIN_OBJ_TREE_NAMED(_object_enum, ...) \
+ UAPI_DEF_CHAIN_OBJ_TREE(_object_enum, &UVERBS_OBJECT(_object_enum), \
+ ##__VA_ARGS__)
+
/*
* =======================================
* Attribute Specifications
@@ -361,6 +525,12 @@ struct uverbs_object_tree_def {
.u2.objs_arr.max_len = _max_len, \
__VA_ARGS__ } })
+/*
+ * Only for use with UVERBS_ATTR_IDR, allows any uobject type to be accepted,
+ * the user must validate the type of the uobject instead.
+ */
+#define UVERBS_IDR_ANY_OBJECT 0xFFFF
+
#define UVERBS_ATTR_IDR(_attr_id, _idr_type, _access, ...) \
(&(const struct uverbs_attr_def){ \
.id = _attr_id, \
@@ -433,25 +603,12 @@ struct uverbs_object_tree_def {
#define UVERBS_ATTR_UHW() \
UVERBS_ATTR_PTR_IN(UVERBS_ATTR_UHW_IN, \
UVERBS_ATTR_MIN_SIZE(0), \
- UA_OPTIONAL), \
+ UA_OPTIONAL, \
+ .is_udata = 1), \
UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_UHW_OUT, \
UVERBS_ATTR_MIN_SIZE(0), \
- UA_OPTIONAL)
-
-/*
- * =======================================
- * Declaration helpers
- * =======================================
- */
-
-#define DECLARE_UVERBS_OBJECT_TREE(_name, ...) \
- static const struct uverbs_object_def *const _name##_ptr[] = { \
- __VA_ARGS__, \
- }; \
- static const struct uverbs_object_tree_def _name = { \
- .num_objects = ARRAY_SIZE(_name##_ptr), \
- .objects = &_name##_ptr, \
- }
+ UA_OPTIONAL, \
+ .is_udata = 1)
/* =================================================
* Parsing infrastructure
@@ -492,6 +649,8 @@ struct uverbs_attr {
};
struct uverbs_attr_bundle {
+ struct ib_udata driver_udata;
+ struct ib_udata ucore;
struct ib_uverbs_file *ufile;
DECLARE_BITMAP(attr_present, UVERBS_API_ATTR_BKEY_LEN);
struct uverbs_attr attrs[];
@@ -560,6 +719,28 @@ uverbs_attr_get_len(const struct uverbs_attr_bundle *attrs_bundle, u16 idx)
return attr->ptr_attr.len;
}
+/*
+ * uverbs_attr_ptr_get_array_size() - Get array size pointer by a ptr
+ * attribute.
+ * @attrs: The attribute bundle
+ * @idx: The ID of the attribute
+ * @elem_size: The size of the element in the array
+ */
+static inline int
+uverbs_attr_ptr_get_array_size(struct uverbs_attr_bundle *attrs, u16 idx,
+ size_t elem_size)
+{
+ int size = uverbs_attr_get_len(attrs, idx);
+
+ if (size < 0)
+ return size;
+
+ if (size % elem_size)
+ return -EINVAL;
+
+ return size / elem_size;
+}
+
/**
* uverbs_attr_get_uobjs_arr() - Provides array's properties for attribute for
* UVERBS_ATTR_TYPE_IDRS_ARRAY.
@@ -660,6 +841,12 @@ static inline int _uverbs_copy_from_or_zero(void *to,
#define uverbs_copy_from_or_zero(to, attrs_bundle, idx) \
_uverbs_copy_from_or_zero(to, attrs_bundle, idx, sizeof(*to))
+static inline struct ib_ucontext *
+ib_uverbs_get_ucontext(const struct uverbs_attr_bundle *attrs)
+{
+ return ib_uverbs_get_ucontext_file(attrs->ufile);
+}
+
#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
int uverbs_get_flags64(u64 *to, const struct uverbs_attr_bundle *attrs_bundle,
size_t idx, u64 allowed_bits);
@@ -684,6 +871,8 @@ static inline __malloc void *uverbs_zalloc(struct uverbs_attr_bundle *bundle,
int _uverbs_get_const(s64 *to, const struct uverbs_attr_bundle *attrs_bundle,
size_t idx, s64 lower_bound, u64 upper_bound,
s64 *def_val);
+int uverbs_copy_to_struct_or_zero(const struct uverbs_attr_bundle *bundle,
+ size_t idx, const void *from, size_t size);
#else
static inline int
uverbs_get_flags64(u64 *to, const struct uverbs_attr_bundle *attrs_bundle,
@@ -719,6 +908,12 @@ _uverbs_get_const(s64 *to, const struct uverbs_attr_bundle *attrs_bundle,
{
return -EINVAL;
}
+static inline int
+uverbs_copy_to_struct_or_zero(const struct uverbs_attr_bundle *bundle,
+ size_t idx, const void *from, size_t size)
+{
+ return -EINVAL;
+}
#endif
#define uverbs_get_const(_to, _attrs_bundle, _idx) \
diff --git a/include/rdma/uverbs_named_ioctl.h b/include/rdma/uverbs_named_ioctl.h
index b3b21733cc55..3447bfe356d6 100644
--- a/include/rdma/uverbs_named_ioctl.h
+++ b/include/rdma/uverbs_named_ioctl.h
@@ -43,7 +43,7 @@
#define _UVERBS_NAME(x, y) _UVERBS_PASTE(x, y)
#define UVERBS_METHOD(id) _UVERBS_NAME(UVERBS_MODULE_NAME, _method_##id)
#define UVERBS_HANDLER(id) _UVERBS_NAME(UVERBS_MODULE_NAME, _handler_##id)
-#define UVERBS_OBJECT(id) _UVERBS_NAME(UVERBS_MOUDLE_NAME, _object_##id)
+#define UVERBS_OBJECT(id) _UVERBS_NAME(UVERBS_MODULE_NAME, _object_##id)
/* These are static so they do not need to be qualified */
#define UVERBS_METHOD_ATTRS(method_id) _method_attrs_##method_id
@@ -102,18 +102,11 @@
#define ADD_UVERBS_METHODS(_name, _object_id, ...) \
static const struct uverbs_method_def *const UVERBS_OBJECT_METHODS( \
_object_id)[] = { __VA_ARGS__ }; \
- static const struct uverbs_object_def _name##_struct = { \
+ static const struct uverbs_object_def _name = { \
.id = _object_id, \
.num_methods = ARRAY_SIZE(UVERBS_OBJECT_METHODS(_object_id)), \
.methods = &UVERBS_OBJECT_METHODS(_object_id) \
- }; \
- static const struct uverbs_object_def *const _name##_ptrs[] = { \
- &_name##_struct, \
- }; \
- static const struct uverbs_object_tree_def _name = { \
- .num_objects = 1, \
- .objects = &_name##_ptrs, \
- }
+ };
/* Used by drivers to declare a complete parsing tree for a single method that
* differs only in having additional driver specific attributes.
diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h
index 3db2802fbc68..883abcf6d36e 100644
--- a/include/rdma/uverbs_std_types.h
+++ b/include/rdma/uverbs_std_types.h
@@ -37,15 +37,6 @@
#include <rdma/uverbs_ioctl.h>
#include <rdma/ib_user_ioctl_verbs.h>
-#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
-const struct uverbs_object_tree_def *uverbs_default_get_objects(void);
-#else
-static inline const struct uverbs_object_tree_def *uverbs_default_get_objects(void)
-{
- return NULL;
-}
-#endif
-
/* Returns _id, or causes a compile error if _id is not a u32.
*
* The uobj APIs should only be used with the write based uAPI to access
@@ -54,15 +45,15 @@ static inline const struct uverbs_object_tree_def *uverbs_default_get_objects(vo
*/
#define _uobj_check_id(_id) ((_id) * typecheck(u32, _id))
-#define uobj_get_type(_ufile, _object) \
- uapi_get_object((_ufile)->device->uapi, _object)
+#define uobj_get_type(_attrs, _object) \
+ uapi_get_object((_attrs)->ufile->device->uapi, _object)
-#define uobj_get_read(_type, _id, _ufile) \
- rdma_lookup_get_uobject(uobj_get_type(_ufile, _type), _ufile, \
+#define uobj_get_read(_type, _id, _attrs) \
+ rdma_lookup_get_uobject(uobj_get_type(_attrs, _type), (_attrs)->ufile, \
_uobj_check_id(_id), UVERBS_LOOKUP_READ)
-#define ufd_get_read(_type, _fdnum, _ufile) \
- rdma_lookup_get_uobject(uobj_get_type(_ufile, _type), _ufile, \
+#define ufd_get_read(_type, _fdnum, _attrs) \
+ rdma_lookup_get_uobject(uobj_get_type(_attrs, _type), (_attrs)->ufile, \
(_fdnum)*typecheck(s32, _fdnum), \
UVERBS_LOOKUP_READ)
@@ -72,26 +63,27 @@ static inline void *_uobj_get_obj_read(struct ib_uobject *uobj)
return NULL;
return uobj->object;
}
-#define uobj_get_obj_read(_object, _type, _id, _ufile) \
+#define uobj_get_obj_read(_object, _type, _id, _attrs) \
((struct ib_##_object *)_uobj_get_obj_read( \
- uobj_get_read(_type, _id, _ufile)))
+ uobj_get_read(_type, _id, _attrs)))
-#define uobj_get_write(_type, _id, _ufile) \
- rdma_lookup_get_uobject(uobj_get_type(_ufile, _type), _ufile, \
+#define uobj_get_write(_type, _id, _attrs) \
+ rdma_lookup_get_uobject(uobj_get_type(_attrs, _type), (_attrs)->ufile, \
_uobj_check_id(_id), UVERBS_LOOKUP_WRITE)
int __uobj_perform_destroy(const struct uverbs_api_object *obj, u32 id,
- struct ib_uverbs_file *ufile, int success_res);
-#define uobj_perform_destroy(_type, _id, _ufile, _success_res) \
- __uobj_perform_destroy(uobj_get_type(_ufile, _type), \
- _uobj_check_id(_id), _ufile, _success_res)
+ const struct uverbs_attr_bundle *attrs);
+#define uobj_perform_destroy(_type, _id, _attrs) \
+ __uobj_perform_destroy(uobj_get_type(_attrs, _type), \
+ _uobj_check_id(_id), _attrs)
struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj,
- u32 id, struct ib_uverbs_file *ufile);
+ u32 id,
+ const struct uverbs_attr_bundle *attrs);
-#define uobj_get_destroy(_type, _id, _ufile) \
- __uobj_get_destroy(uobj_get_type(_ufile, _type), _uobj_check_id(_id), \
- _ufile)
+#define uobj_get_destroy(_type, _id, _attrs) \
+ __uobj_get_destroy(uobj_get_type(_attrs, _type), _uobj_check_id(_id), \
+ _attrs)
static inline void uobj_put_destroy(struct ib_uobject *uobj)
{
@@ -111,14 +103,13 @@ static inline void uobj_put_write(struct ib_uobject *uobj)
rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE);
}
-static inline int __must_check uobj_alloc_commit(struct ib_uobject *uobj,
- int success_res)
+static inline int __must_check uobj_alloc_commit(struct ib_uobject *uobj)
{
int ret = rdma_alloc_commit_uobject(uobj);
if (ret)
return ret;
- return success_res;
+ return 0;
}
static inline void uobj_alloc_abort(struct ib_uobject *uobj)
@@ -127,18 +118,18 @@ static inline void uobj_alloc_abort(struct ib_uobject *uobj)
}
static inline struct ib_uobject *
-__uobj_alloc(const struct uverbs_api_object *obj, struct ib_uverbs_file *ufile,
- struct ib_device **ib_dev)
+__uobj_alloc(const struct uverbs_api_object *obj,
+ struct uverbs_attr_bundle *attrs, struct ib_device **ib_dev)
{
- struct ib_uobject *uobj = rdma_alloc_begin_uobject(obj, ufile);
+ struct ib_uobject *uobj = rdma_alloc_begin_uobject(obj, attrs->ufile);
if (!IS_ERR(uobj))
*ib_dev = uobj->context->device;
return uobj;
}
-#define uobj_alloc(_type, _ufile, _ib_dev) \
- __uobj_alloc(uobj_get_type(_ufile, _type), _ufile, _ib_dev)
+#define uobj_alloc(_type, _attrs, _ib_dev) \
+ __uobj_alloc(uobj_get_type(_attrs, _type), _attrs, _ib_dev)
static inline void uverbs_flow_action_fill_action(struct ib_flow_action *action,
struct ib_uobject *uobj,
@@ -191,5 +182,17 @@ static inline void ib_set_flow(struct ib_uobject *uobj, struct ib_flow *ibflow,
uflow->resources = uflow_res;
}
+struct uverbs_api_object {
+ const struct uverbs_obj_type *type_attrs;
+ const struct uverbs_obj_type_class *type_class;
+ u8 disabled:1;
+ u32 id;
+};
+
+static inline u32 uobj_get_object_id(struct ib_uobject *uobj)
+{
+ return uobj->uapi_object->id;
+}
+
#endif
diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index c891ada3c5c2..d85e6befa26b 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h
@@ -61,6 +61,9 @@ struct scsi_pointer {
/* flags preserved across unprep / reprep */
#define SCMD_PRESERVED_FLAGS (SCMD_UNCHECKED_ISA_DMA | SCMD_INITIALIZED)
+/* for scmd->state */
+#define SCMD_STATE_COMPLETE 0
+
struct scsi_cmnd {
struct scsi_request req;
struct scsi_device *device;
@@ -145,6 +148,7 @@ struct scsi_cmnd {
int result; /* Status code from lower level driver */
int flags; /* Command flags */
+ unsigned long state; /* Command completion state */
unsigned char tag; /* SCSI-II queued command tag */
};
@@ -171,7 +175,7 @@ extern void *scsi_kmap_atomic_sg(struct scatterlist *sg, int sg_count,
size_t *offset, size_t *len);
extern void scsi_kunmap_atomic_sg(void *virt);
-extern int scsi_init_io(struct scsi_cmnd *cmd);
+extern blk_status_t scsi_init_io(struct scsi_cmnd *cmd);
#ifdef CONFIG_SCSI_DMA
extern int scsi_dma_map(struct scsi_cmnd *cmd);
diff --git a/include/scsi/scsi_dh.h b/include/scsi/scsi_dh.h
index c7bba2b24849..a862dc23c68d 100644
--- a/include/scsi/scsi_dh.h
+++ b/include/scsi/scsi_dh.h
@@ -69,7 +69,7 @@ struct scsi_device_handler {
int (*attach)(struct scsi_device *);
void (*detach)(struct scsi_device *);
int (*activate)(struct scsi_device *, activate_complete, void *);
- int (*prep_fn)(struct scsi_device *, struct request *);
+ blk_status_t (*prep_fn)(struct scsi_device *, struct request *);
int (*set_params)(struct scsi_device *, const char *);
void (*rescan)(struct scsi_device *);
};
diff --git a/include/scsi/scsi_driver.h b/include/scsi/scsi_driver.h
index fae8b465233e..6dffa8555a39 100644
--- a/include/scsi/scsi_driver.h
+++ b/include/scsi/scsi_driver.h
@@ -2,6 +2,7 @@
#ifndef _SCSI_SCSI_DRIVER_H
#define _SCSI_SCSI_DRIVER_H
+#include <linux/blk_types.h>
#include <linux/device.h>
struct module;
@@ -13,7 +14,7 @@ struct scsi_driver {
struct device_driver gendrv;
void (*rescan)(struct device *);
- int (*init_command)(struct scsi_cmnd *);
+ blk_status_t (*init_command)(struct scsi_cmnd *);
void (*uninit_command)(struct scsi_cmnd *);
int (*done)(struct scsi_cmnd *);
int (*eh_action)(struct scsi_cmnd *, int);
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index 5ea06d310a25..6ca954e9f752 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -11,7 +11,6 @@
#include <linux/blk-mq.h>
#include <scsi/scsi.h>
-struct request_queue;
struct block_device;
struct completion;
struct module;
@@ -22,7 +21,6 @@ struct scsi_target;
struct Scsi_Host;
struct scsi_host_cmd_pool;
struct scsi_transport_template;
-struct blk_queue_tags;
/*
@@ -44,9 +42,6 @@ struct blk_queue_tags;
#define MODE_INITIATOR 0x01
#define MODE_TARGET 0x02
-#define DISABLE_CLUSTERING 0
-#define ENABLE_CLUSTERING 1
-
struct scsi_host_template {
struct module *module;
const char *name;
@@ -366,6 +361,11 @@ struct scsi_host_template {
unsigned int max_sectors;
/*
+ * Maximum size in bytes of a single segment.
+ */
+ unsigned int max_segment_size;
+
+ /*
* DMA scatter gather segment boundary limit. A segment crossing this
* boundary will be split in two.
*/
@@ -415,16 +415,6 @@ struct scsi_host_template {
unsigned unchecked_isa_dma:1;
/*
- * True if this host adapter can make good use of clustering.
- * I originally thought that if the tablesize was large that it
- * was a waste of CPU cycles to prepare a cluster list, but
- * it works out that the Buslogic is faster if you use a smaller
- * number of segments (i.e. use clustering). I guess it is
- * inefficient.
- */
- unsigned use_clustering:1;
-
- /*
* True for emulated SCSI host adapters (e.g. ATAPI).
*/
unsigned emulated:1;
@@ -547,14 +537,8 @@ struct Scsi_Host {
struct scsi_host_template *hostt;
struct scsi_transport_template *transportt;
- /*
- * Area to keep a shared tag map (if needed, will be
- * NULL if not).
- */
- union {
- struct blk_queue_tag *bqt;
- struct blk_mq_tag_set tag_set;
- };
+ /* Area to keep a shared tag map */
+ struct blk_mq_tag_set tag_set;
atomic_t host_busy; /* commands actually active on low-level */
atomic_t host_blocked;
@@ -604,6 +588,7 @@ struct Scsi_Host {
short unsigned int sg_tablesize;
short unsigned int sg_prot_tablesize;
unsigned int max_sectors;
+ unsigned int max_segment_size;
unsigned long dma_boundary;
/*
* In scsi-mq mode, the number of hardware queues supported by the LLD.
@@ -621,7 +606,6 @@ struct Scsi_Host {
unsigned active_mode:2;
unsigned unchecked_isa_dma:1;
- unsigned use_clustering:1;
/*
* Host has requested that no further requests come through for the
@@ -648,7 +632,6 @@ struct Scsi_Host {
/* The controller does not support WRITE SAME */
unsigned no_write_same:1;
- unsigned use_blk_mq:1;
unsigned use_cmd_list:1;
/* Host responded with short (<36 bytes) INQUIRY result */
@@ -742,11 +725,6 @@ static inline int scsi_host_in_recovery(struct Scsi_Host *shost)
shost->tmf_in_progress;
}
-static inline bool shost_use_blk_mq(struct Scsi_Host *shost)
-{
- return shost->use_blk_mq;
-}
-
extern int scsi_queue_work(struct Scsi_Host *, struct work_struct *);
extern void scsi_flush_work(struct Scsi_Host *);
diff --git a/include/scsi/scsi_tcq.h b/include/scsi/scsi_tcq.h
index e192a0caa850..6053d46e794e 100644
--- a/include/scsi/scsi_tcq.h
+++ b/include/scsi/scsi_tcq.h
@@ -23,19 +23,15 @@ static inline struct scsi_cmnd *scsi_host_find_tag(struct Scsi_Host *shost,
int tag)
{
struct request *req = NULL;
+ u16 hwq;
if (tag == SCSI_NO_TAG)
return NULL;
- if (shost_use_blk_mq(shost)) {
- u16 hwq = blk_mq_unique_tag_to_hwq(tag);
-
- if (hwq < shost->tag_set.nr_hw_queues) {
- req = blk_mq_tag_to_rq(shost->tag_set.tags[hwq],
- blk_mq_unique_tag_to_tag(tag));
- }
- } else {
- req = blk_map_queue_find_tag(shost->bqt, tag);
+ hwq = blk_mq_unique_tag_to_hwq(tag);
+ if (hwq < shost->tag_set.nr_hw_queues) {
+ req = blk_mq_tag_to_rq(shost->tag_set.tags[hwq],
+ blk_mq_unique_tag_to_tag(tag));
}
if (!req)
diff --git a/include/scsi/srp.h b/include/scsi/srp.h
index c16a3c9a4d9b..9220758d5087 100644
--- a/include/scsi/srp.h
+++ b/include/scsi/srp.h
@@ -67,7 +67,8 @@ enum {
enum {
SRP_NO_DATA_DESC = 0,
SRP_DATA_DESC_DIRECT = 1,
- SRP_DATA_DESC_INDIRECT = 2
+ SRP_DATA_DESC_INDIRECT = 2,
+ SRP_DATA_DESC_IMM = 3, /* new in SRP2 */
};
enum {
@@ -111,9 +112,16 @@ struct srp_indirect_buf {
struct srp_direct_buf desc_list[0];
} __attribute__((packed));
+/* Immediate data buffer descriptor as defined in SRP2. */
+struct srp_imm_buf {
+ __be32 len;
+};
+
+/* srp_login_req.flags */
enum {
SRP_MULTICHAN_SINGLE = 0,
- SRP_MULTICHAN_MULTI = 1
+ SRP_MULTICHAN_MULTI = 1,
+ SRP_IMMED_REQUESTED = 0x80, /* new in SRP2 */
};
struct srp_login_req {
@@ -124,7 +132,9 @@ struct srp_login_req {
u8 reserved2[4];
__be16 req_buf_fmt;
u8 req_flags;
- u8 reserved3[5];
+ u8 reserved3[1];
+ __be16 imm_data_offset; /* new in SRP2 */
+ u8 reserved4[2];
u8 initiator_port_id[16];
u8 target_port_id[16];
};
@@ -144,6 +154,16 @@ struct srp_login_req_rdma {
__be32 req_it_iu_len;
u8 initiator_port_id[16];
u8 target_port_id[16];
+ __be16 imm_data_offset;
+ u8 reserved[6];
+};
+
+/* srp_login_rsp.rsp_flags */
+enum {
+ SRP_LOGIN_RSP_MULTICHAN_NO_CHAN = 0x0,
+ SRP_LOGIN_RSP_MULTICHAN_TERMINATED = 0x1,
+ SRP_LOGIN_RSP_MULTICHAN_MAINTAINED = 0x2,
+ SRP_LOGIN_RSP_IMMED_SUPP = 0x80, /* new in SRP2 */
};
/*
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index e3bdb0550a59..69b7b955902c 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -46,6 +46,10 @@
/* Used by transport_get_inquiry_vpd_device_ident() */
#define INQUIRY_VPD_DEVICE_IDENTIFIER_LEN 254
+#define INQUIRY_VENDOR_LEN 8
+#define INQUIRY_MODEL_LEN 16
+#define INQUIRY_REVISION_LEN 4
+
/* Attempts before moving from SHORT to LONG */
#define PYX_TRANSPORT_WINDOW_CLOSED_THRESHOLD 3
#define PYX_TRANSPORT_WINDOW_CLOSED_WAIT_SHORT 3 /* In milliseconds */
@@ -87,6 +91,8 @@
#define DA_EMULATE_3PC 1
/* No Emulation for PSCSI by default */
#define DA_EMULATE_ALUA 0
+/* Emulate SCSI2 RESERVE/RELEASE and Persistent Reservations by default */
+#define DA_EMULATE_PR 1
/* Enforce SCSI Initiator Port TransportID with 'ISID' for PR */
#define DA_ENFORCE_PR_ISIDS 1
/* Force SPC-3 PR Activate Persistence across Target Power Loss */
@@ -134,7 +140,6 @@ enum se_cmd_flags_table {
SCF_SENT_CHECK_CONDITION = 0x00000800,
SCF_OVERFLOW_BIT = 0x00001000,
SCF_UNDERFLOW_BIT = 0x00002000,
- SCF_SEND_DELAYED_TAS = 0x00004000,
SCF_ALUA_NON_OPTIMIZED = 0x00008000,
SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC = 0x00020000,
SCF_COMPARE_AND_WRITE = 0x00080000,
@@ -314,9 +319,13 @@ struct t10_vpd {
};
struct t10_wwn {
- char vendor[8];
- char model[16];
- char revision[4];
+ /*
+ * SCSI left aligned strings may not be null terminated. +1 to ensure a
+ * null terminator is always present.
+ */
+ char vendor[INQUIRY_VENDOR_LEN + 1];
+ char model[INQUIRY_MODEL_LEN + 1];
+ char revision[INQUIRY_REVISION_LEN + 1];
char unit_serial[INQUIRY_VPD_SERIAL_LEN];
spinlock_t t10_vpd_lock;
struct se_device *t10_dev;
@@ -474,7 +483,8 @@ struct se_cmd {
struct se_session *se_sess;
struct se_tmr_req *se_tmr_req;
struct list_head se_cmd_list;
- struct completion *compl;
+ struct completion *free_compl;
+ struct completion *abrt_compl;
const struct target_core_fabric_ops *se_tfo;
sense_reason_t (*execute_cmd)(struct se_cmd *);
sense_reason_t (*transport_complete_callback)(struct se_cmd *, bool, int *);
@@ -601,6 +611,7 @@ struct se_session {
struct se_node_acl *se_node_acl;
struct se_portal_group *se_tpg;
void *fabric_sess_ptr;
+ struct percpu_ref cmd_count;
struct list_head sess_list;
struct list_head sess_acl_list;
struct list_head sess_cmd_list;
@@ -664,7 +675,7 @@ struct se_dev_attrib {
int emulate_tpws;
int emulate_caw;
int emulate_3pc;
- int pi_prot_format;
+ int emulate_pr;
enum target_prot_type pi_prot_type;
enum target_prot_type hw_pi_prot_type;
int pi_prot_verify;
@@ -731,7 +742,6 @@ struct se_lun {
struct scsi_port_stats lun_stats;
struct config_group lun_group;
struct se_port_stat_grps port_stat_grps;
- struct completion lun_ref_comp;
struct completion lun_shutdown_comp;
struct percpu_ref lun_ref;
struct list_head lun_dev_link;
@@ -794,7 +804,6 @@ struct se_device {
struct t10_pr_registration *dev_pr_res_holder;
struct list_head dev_sep_list;
struct list_head dev_tmr_list;
- struct workqueue_struct *tmr_wq;
struct work_struct qf_work_queue;
struct list_head delayed_cmd_list;
struct list_head state_list;
diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h
index f4147b398431..ee5ddd81cd8d 100644
--- a/include/target/target_core_fabric.h
+++ b/include/target/target_core_fabric.h
@@ -8,7 +8,18 @@
struct target_core_fabric_ops {
struct module *module;
- const char *name;
+ /*
+ * XXX: Special case for iscsi/iSCSI...
+ * If non-null, fabric_alias is used for matching target/$fabric
+ * ConfigFS paths. If null, fabric_name is used for this (see below).
+ */
+ const char *fabric_alias;
+ /*
+ * fabric_name is used for matching target/$fabric ConfigFS paths
+ * without a fabric_alias (see above). It's also used for the ALUA state
+ * path and is stored on disk with PR state.
+ */
+ const char *fabric_name;
size_t node_acl_size;
/*
* Limits number of scatterlist entries per SCF_SCSI_DATA_CDB payload.
@@ -23,7 +34,6 @@ struct target_core_fabric_ops {
* XXX: Currently assumes single PAGE_SIZE per scatterlist entry
*/
u32 max_data_sg_nents;
- char *(*get_fabric_name)(void);
char *(*tpg_get_wwn)(struct se_portal_group *);
u16 (*tpg_get_tag)(struct se_portal_group *);
u32 (*tpg_get_default_depth)(struct se_portal_group *);
@@ -101,6 +111,13 @@ struct target_core_fabric_ops {
struct configfs_attribute **tfc_tpg_nacl_attrib_attrs;
struct configfs_attribute **tfc_tpg_nacl_auth_attrs;
struct configfs_attribute **tfc_tpg_nacl_param_attrs;
+
+ /*
+ * Set this member variable to true if the SCSI transport protocol
+ * (e.g. iSCSI) requires that the Data-Out buffer is transferred in
+ * its entirety before a command is aborted.
+ */
+ bool write_pending_must_be_called;
};
int target_register_template(const struct target_core_fabric_ops *fo);
@@ -116,7 +133,7 @@ struct se_session *target_setup_session(struct se_portal_group *,
struct se_session *, void *));
void target_remove_session(struct se_session *);
-void transport_init_session(struct se_session *);
+int transport_init_session(struct se_session *se_sess);
struct se_session *transport_alloc_session(enum target_prot_op);
int transport_alloc_session_tags(struct se_session *, unsigned int,
unsigned int);
@@ -149,12 +166,12 @@ int target_submit_tmr(struct se_cmd *se_cmd, struct se_session *se_sess,
int transport_handle_cdb_direct(struct se_cmd *);
sense_reason_t transport_generic_new_cmd(struct se_cmd *);
+void target_put_cmd_and_wait(struct se_cmd *cmd);
void target_execute_cmd(struct se_cmd *cmd);
int transport_generic_free_cmd(struct se_cmd *, int);
bool transport_wait_for_tasks(struct se_cmd *);
-int transport_check_aborted_status(struct se_cmd *, int);
int transport_send_check_condition_and_sense(struct se_cmd *,
sense_reason_t, int);
int target_get_sess_cmd(struct se_cmd *, bool);
diff --git a/include/trace/events/bcache.h b/include/trace/events/bcache.h
index 2cbd6e42ad83..e4526f85c19d 100644
--- a/include/trace/events/bcache.h
+++ b/include/trace/events/bcache.h
@@ -221,9 +221,30 @@ DEFINE_EVENT(cache_set, bcache_journal_entry_full,
TP_ARGS(c)
);
-DEFINE_EVENT(bcache_bio, bcache_journal_write,
- TP_PROTO(struct bio *bio),
- TP_ARGS(bio)
+TRACE_EVENT(bcache_journal_write,
+ TP_PROTO(struct bio *bio, u32 keys),
+ TP_ARGS(bio, keys),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev )
+ __field(sector_t, sector )
+ __field(unsigned int, nr_sector )
+ __array(char, rwbs, 6 )
+ __field(u32, nr_keys )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = bio_dev(bio);
+ __entry->sector = bio->bi_iter.bi_sector;
+ __entry->nr_sector = bio->bi_iter.bi_size >> 9;
+ __entry->nr_keys = keys;
+ blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
+ ),
+
+ TP_printk("%d,%d %s %llu + %u keys %u",
+ MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+ (unsigned long long)__entry->sector, __entry->nr_sector,
+ __entry->nr_keys)
);
/* Btree */
diff --git a/include/trace/events/iscsi.h b/include/trace/events/iscsi.h
new file mode 100644
index 000000000000..87408faf6e4e
--- /dev/null
+++ b/include/trace/events/iscsi.h
@@ -0,0 +1,107 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM iscsi
+
+#if !defined(_TRACE_ISCSI_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_ISCSI_H
+
+#include <linux/tracepoint.h>
+
+/* max debug message length */
+#define ISCSI_MSG_MAX 256
+
+/*
+ * Declare tracepoint helper function.
+ */
+void iscsi_dbg_trace(void (*trace)(struct device *dev, struct va_format *),
+ struct device *dev, const char *fmt, ...);
+
+/*
+ * Declare event class for iscsi debug messages.
+ */
+DECLARE_EVENT_CLASS(iscsi_log_msg,
+
+ TP_PROTO(struct device *dev, struct va_format *vaf),
+
+ TP_ARGS(dev, vaf),
+
+ TP_STRUCT__entry(
+ __string(dname, dev_name(dev) )
+ __dynamic_array(char, msg, ISCSI_MSG_MAX )
+ ),
+
+ TP_fast_assign(
+ __assign_str(dname, dev_name(dev));
+ vsnprintf(__get_str(msg), ISCSI_MSG_MAX, vaf->fmt, *vaf->va);
+ ),
+
+ TP_printk("%s: %s",__get_str(dname), __get_str(msg)
+ )
+);
+
+/*
+ * Define event to capture iscsi connection debug messages.
+ */
+DEFINE_EVENT(iscsi_log_msg, iscsi_dbg_conn,
+ TP_PROTO(struct device *dev, struct va_format *vaf),
+
+ TP_ARGS(dev, vaf)
+);
+
+/*
+ * Define event to capture iscsi session debug messages.
+ */
+DEFINE_EVENT(iscsi_log_msg, iscsi_dbg_session,
+ TP_PROTO(struct device *dev, struct va_format *vaf),
+
+ TP_ARGS(dev, vaf)
+);
+
+/*
+ * Define event to capture iscsi error handling debug messages.
+ */
+DEFINE_EVENT(iscsi_log_msg, iscsi_dbg_eh,
+ TP_PROTO(struct device *dev, struct va_format *vaf),
+
+ TP_ARGS(dev, vaf)
+);
+
+/*
+ * Define event to capture iscsi tcp debug messages.
+ */
+DEFINE_EVENT(iscsi_log_msg, iscsi_dbg_tcp,
+ TP_PROTO(struct device *dev, struct va_format *vaf),
+
+ TP_ARGS(dev, vaf)
+);
+
+/*
+ * Define event to capture iscsi sw tcp debug messages.
+ */
+DEFINE_EVENT(iscsi_log_msg, iscsi_dbg_sw_tcp,
+ TP_PROTO(struct device *dev, struct va_format *vaf),
+
+ TP_ARGS(dev, vaf)
+);
+
+/*
+ * Define event to capture iscsi transport session debug messages.
+ */
+DEFINE_EVENT(iscsi_log_msg, iscsi_dbg_trans_session,
+ TP_PROTO(struct device *dev, struct va_format *vaf),
+
+ TP_ARGS(dev, vaf)
+);
+
+/*
+ * Define event to capture iscsi transport connection debug messages.
+ */
+DEFINE_EVENT(iscsi_log_msg, iscsi_dbg_trans_conn,
+ TP_PROTO(struct device *dev, struct va_format *vaf),
+
+ TP_ARGS(dev, vaf)
+);
+
+#endif /* _TRACE_ISCSI_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/uapi/linux/aio_abi.h b/include/uapi/linux/aio_abi.h
index ce43d340f010..8387e0af0f76 100644
--- a/include/uapi/linux/aio_abi.h
+++ b/include/uapi/linux/aio_abi.h
@@ -50,6 +50,8 @@ enum {
*
* IOCB_FLAG_RESFD - Set if the "aio_resfd" member of the "struct iocb"
* is valid.
+ * IOCB_FLAG_IOPRIO - Set if the "aio_reqprio" member of the "struct iocb"
+ * is valid.
*/
#define IOCB_FLAG_RESFD (1 << 0)
#define IOCB_FLAG_IOPRIO (1 << 1)
diff --git a/include/uapi/linux/mmc/ioctl.h b/include/uapi/linux/mmc/ioctl.h
index 45f369dc0a42..00c08120f3ba 100644
--- a/include/uapi/linux/mmc/ioctl.h
+++ b/include/uapi/linux/mmc/ioctl.h
@@ -5,7 +5,10 @@
#include <linux/types.h>
struct mmc_ioc_cmd {
- /* Implies direction of data. true = write, false = read */
+ /*
+ * Direction of data: nonzero = write, zero = read.
+ * Bit 31 selects 'Reliable Write' for RPMB.
+ */
int write_flag;
/* Application-specific command. true = precede with CMD55 */
diff --git a/include/uapi/rdma/hfi/hfi1_user.h b/include/uapi/rdma/hfi/hfi1_user.h
index c6a984c0c881..01ac5853d9ac 100644
--- a/include/uapi/rdma/hfi/hfi1_user.h
+++ b/include/uapi/rdma/hfi/hfi1_user.h
@@ -6,7 +6,7 @@
*
* GPL LICENSE SUMMARY
*
- * Copyright(c) 2015 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
@@ -95,7 +95,7 @@
#define HFI1_CAP_SDMA_AHG (1UL << 2) /* Enable SDMA AHG support */
#define HFI1_CAP_EXTENDED_PSN (1UL << 3) /* Enable Extended PSN support */
#define HFI1_CAP_HDRSUPP (1UL << 4) /* Enable Header Suppression */
-/* 1UL << 5 unused */
+#define HFI1_CAP_TID_RDMA (1UL << 5) /* Enable TID RDMA operations */
#define HFI1_CAP_USE_SDMA_HEAD (1UL << 6) /* DMA Hdr Q tail vs. use CSR */
#define HFI1_CAP_MULTI_PKT_EGR (1UL << 7) /* Enable multi-packet Egr buffs*/
#define HFI1_CAP_NODROP_RHQ_FULL (1UL << 8) /* Don't drop on Hdr Q full */
@@ -106,7 +106,7 @@
#define HFI1_CAP_NO_INTEGRITY (1UL << 13) /* Enable ctxt integrity checks */
#define HFI1_CAP_PKEY_CHECK (1UL << 14) /* Enable ctxt PKey checking */
#define HFI1_CAP_STATIC_RATE_CTRL (1UL << 15) /* Allow PBC.StaticRateControl */
-/* 1UL << 16 unused */
+#define HFI1_CAP_OPFN (1UL << 16) /* Enable the OPFN protocol */
#define HFI1_CAP_SDMA_HEAD_CHECK (1UL << 17) /* SDMA head checking */
#define HFI1_CAP_EARLY_CREDIT_RETURN (1UL << 18) /* early credit return */
diff --git a/include/uapi/rdma/hns-abi.h b/include/uapi/rdma/hns-abi.h
index c1f87735514f..ef3c7ec793a7 100644
--- a/include/uapi/rdma/hns-abi.h
+++ b/include/uapi/rdma/hns-abi.h
@@ -46,6 +46,12 @@ struct hns_roce_ib_create_cq_resp {
__aligned_u64 cap_flags;
};
+struct hns_roce_ib_create_srq {
+ __aligned_u64 buf_addr;
+ __aligned_u64 db_addr;
+ __aligned_u64 que_addr;
+};
+
struct hns_roce_ib_create_qp {
__aligned_u64 buf_addr;
__aligned_u64 db_addr;
diff --git a/include/uapi/rdma/ib_user_ioctl_cmds.h b/include/uapi/rdma/ib_user_ioctl_cmds.h
index 2c881aaf05c2..64f0e3aacd3f 100644
--- a/include/uapi/rdma/ib_user_ioctl_cmds.h
+++ b/include/uapi/rdma/ib_user_ioctl_cmds.h
@@ -63,6 +63,23 @@ enum {
UVERBS_ATTR_UHW_OUT,
};
+enum uverbs_methods_device {
+ UVERBS_METHOD_INVOKE_WRITE,
+ UVERBS_METHOD_INFO_HANDLES,
+ UVERBS_METHOD_QUERY_PORT,
+};
+
+enum uverbs_attrs_invoke_write_cmd_attr_ids {
+ UVERBS_ATTR_CORE_IN,
+ UVERBS_ATTR_CORE_OUT,
+ UVERBS_ATTR_WRITE_CMD,
+};
+
+enum uverbs_attrs_query_port_cmd_attr_ids {
+ UVERBS_ATTR_QUERY_PORT_PORT_NUM,
+ UVERBS_ATTR_QUERY_PORT_RESP,
+};
+
enum uverbs_attrs_create_cq_cmd_attr_ids {
UVERBS_ATTR_CREATE_CQ_HANDLE,
UVERBS_ATTR_CREATE_CQ_CQE,
@@ -135,6 +152,19 @@ enum uverbs_attrs_reg_dm_mr_cmd_attr_ids {
enum uverbs_methods_mr {
UVERBS_METHOD_DM_MR_REG,
+ UVERBS_METHOD_MR_DESTROY,
+ UVERBS_METHOD_ADVISE_MR,
+};
+
+enum uverbs_attrs_mr_destroy_ids {
+ UVERBS_ATTR_DESTROY_MR_HANDLE,
+};
+
+enum uverbs_attrs_advise_mr_cmd_attr_ids {
+ UVERBS_ATTR_ADVISE_MR_PD_HANDLE,
+ UVERBS_ATTR_ADVISE_MR_ADVICE,
+ UVERBS_ATTR_ADVISE_MR_FLAGS,
+ UVERBS_ATTR_ADVISE_MR_SGE_LIST,
};
enum uverbs_attrs_create_counters_cmd_attr_ids {
@@ -157,4 +187,58 @@ enum uverbs_methods_actions_counters_ops {
UVERBS_METHOD_COUNTERS_READ,
};
+enum uverbs_attrs_info_handles_id {
+ UVERBS_ATTR_INFO_OBJECT_ID,
+ UVERBS_ATTR_INFO_TOTAL_HANDLES,
+ UVERBS_ATTR_INFO_HANDLES_LIST,
+};
+
+enum uverbs_methods_pd {
+ UVERBS_METHOD_PD_DESTROY,
+};
+
+enum uverbs_attrs_pd_destroy_ids {
+ UVERBS_ATTR_DESTROY_PD_HANDLE,
+};
+
+enum uverbs_methods_mw {
+ UVERBS_METHOD_MW_DESTROY,
+};
+
+enum uverbs_attrs_mw_destroy_ids {
+ UVERBS_ATTR_DESTROY_MW_HANDLE,
+};
+
+enum uverbs_methods_xrcd {
+ UVERBS_METHOD_XRCD_DESTROY,
+};
+
+enum uverbs_attrs_xrcd_destroy_ids {
+ UVERBS_ATTR_DESTROY_XRCD_HANDLE,
+};
+
+enum uverbs_methods_ah {
+ UVERBS_METHOD_AH_DESTROY,
+};
+
+enum uverbs_attrs_ah_destroy_ids {
+ UVERBS_ATTR_DESTROY_AH_HANDLE,
+};
+
+enum uverbs_methods_rwq_ind_tbl {
+ UVERBS_METHOD_RWQ_IND_TBL_DESTROY,
+};
+
+enum uverbs_attrs_rwq_ind_tbl_destroy_ids {
+ UVERBS_ATTR_DESTROY_RWQ_IND_TBL_HANDLE,
+};
+
+enum uverbs_methods_flow {
+ UVERBS_METHOD_FLOW_DESTROY,
+};
+
+enum uverbs_attrs_flow_destroy_ids {
+ UVERBS_ATTR_DESTROY_FLOW_HANDLE,
+};
+
#endif
diff --git a/include/uapi/rdma/ib_user_ioctl_verbs.h b/include/uapi/rdma/ib_user_ioctl_verbs.h
index 6cdf192070a2..72c7fc75f960 100644
--- a/include/uapi/rdma/ib_user_ioctl_verbs.h
+++ b/include/uapi/rdma/ib_user_ioctl_verbs.h
@@ -35,6 +35,7 @@
#define IB_USER_IOCTL_VERBS_H
#include <linux/types.h>
+#include <rdma/ib_user_verbs.h>
#ifndef RDMA_UAPI_PTR
#define RDMA_UAPI_PTR(_type, _name) __aligned_u64 _name
@@ -157,4 +158,19 @@ enum ib_uverbs_read_counters_flags {
IB_UVERBS_READ_COUNTERS_PREFER_CACHED = 1 << 0,
};
+enum ib_uverbs_advise_mr_advice {
+ IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH,
+ IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE,
+};
+
+enum ib_uverbs_advise_mr_flag {
+ IB_UVERBS_ADVISE_MR_FLAG_FLUSH = 1 << 0,
+};
+
+struct ib_uverbs_query_port_resp_ex {
+ struct ib_uverbs_query_port_resp legacy_resp;
+ __u16 port_cap_flags2;
+ __u8 reserved[6];
+};
+
#endif
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index 1254b51a551a..480d9a60b68e 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -46,7 +46,7 @@
#define IB_USER_VERBS_ABI_VERSION 6
#define IB_USER_VERBS_CMD_THRESHOLD 50
-enum {
+enum ib_uverbs_write_cmds {
IB_USER_VERBS_CMD_GET_CONTEXT,
IB_USER_VERBS_CMD_QUERY_DEVICE,
IB_USER_VERBS_CMD_QUERY_PORT,
@@ -164,6 +164,7 @@ struct ib_uverbs_get_context {
struct ib_uverbs_get_context_resp {
__u32 async_fd;
__u32 num_comp_vectors;
+ __aligned_u64 driver_data[0];
};
struct ib_uverbs_query_device {
@@ -310,6 +311,7 @@ struct ib_uverbs_alloc_pd {
struct ib_uverbs_alloc_pd_resp {
__u32 pd_handle;
+ __u32 driver_data[0];
};
struct ib_uverbs_dealloc_pd {
@@ -325,6 +327,7 @@ struct ib_uverbs_open_xrcd {
struct ib_uverbs_open_xrcd_resp {
__u32 xrcd_handle;
+ __u32 driver_data[0];
};
struct ib_uverbs_close_xrcd {
@@ -345,6 +348,7 @@ struct ib_uverbs_reg_mr_resp {
__u32 mr_handle;
__u32 lkey;
__u32 rkey;
+ __u32 driver_data[0];
};
struct ib_uverbs_rereg_mr {
@@ -356,11 +360,13 @@ struct ib_uverbs_rereg_mr {
__aligned_u64 hca_va;
__u32 pd_handle;
__u32 access_flags;
+ __aligned_u64 driver_data[0];
};
struct ib_uverbs_rereg_mr_resp {
__u32 lkey;
__u32 rkey;
+ __aligned_u64 driver_data[0];
};
struct ib_uverbs_dereg_mr {
@@ -372,11 +378,13 @@ struct ib_uverbs_alloc_mw {
__u32 pd_handle;
__u8 mw_type;
__u8 reserved[3];
+ __aligned_u64 driver_data[0];
};
struct ib_uverbs_alloc_mw_resp {
__u32 mw_handle;
__u32 rkey;
+ __aligned_u64 driver_data[0];
};
struct ib_uverbs_dealloc_mw {
@@ -419,6 +427,7 @@ struct ib_uverbs_ex_create_cq {
struct ib_uverbs_create_cq_resp {
__u32 cq_handle;
__u32 cqe;
+ __aligned_u64 driver_data[0];
};
struct ib_uverbs_ex_create_cq_resp {
@@ -629,6 +638,7 @@ struct ib_uverbs_create_qp_resp {
__u32 max_recv_sge;
__u32 max_inline_data;
__u32 reserved;
+ __u32 driver_data[0];
};
struct ib_uverbs_ex_create_qp_resp {
@@ -733,9 +743,6 @@ struct ib_uverbs_ex_modify_qp {
__u32 reserved;
};
-struct ib_uverbs_modify_qp_resp {
-};
-
struct ib_uverbs_ex_modify_qp_resp {
__u32 comp_mask;
__u32 response_length;
@@ -863,10 +870,12 @@ struct ib_uverbs_create_ah {
__u32 pd_handle;
__u32 reserved;
struct ib_uverbs_ah_attr attr;
+ __aligned_u64 driver_data[0];
};
struct ib_uverbs_create_ah_resp {
__u32 ah_handle;
+ __u32 driver_data[0];
};
struct ib_uverbs_destroy_ah {
@@ -1175,6 +1184,7 @@ struct ib_uverbs_create_srq_resp {
__u32 max_wr;
__u32 max_sge;
__u32 srqn;
+ __u32 driver_data[0];
};
struct ib_uverbs_modify_srq {
diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h
index 8fa9f90e2bb1..87b3198f4b5d 100644
--- a/include/uapi/rdma/mlx5-abi.h
+++ b/include/uapi/rdma/mlx5-abi.h
@@ -48,6 +48,7 @@ enum {
MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC = 1 << 6,
MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC = 1 << 7,
MLX5_QP_FLAG_ALLOW_SCATTER_CQE = 1 << 8,
+ MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE = 1 << 9,
};
enum {
@@ -236,6 +237,7 @@ enum mlx5_ib_query_dev_resp_flags {
/* Support 128B CQE compression */
MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_COMP = 1 << 0,
MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_PAD = 1 << 1,
+ MLX5_IB_QUERY_DEV_RESP_PACKET_BASED_CREDIT_MODE = 1 << 2,
};
enum mlx5_ib_tunnel_offloads {
diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
index 408e220034de..b8d121d457f1 100644
--- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h
+++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
@@ -158,6 +158,7 @@ enum mlx5_ib_create_flow_attrs {
MLX5_IB_ATTR_CREATE_FLOW_MATCHER,
MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS,
MLX5_IB_ATTR_CREATE_FLOW_TAG,
+ MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX,
};
enum mlx5_ib_destoy_flow_attrs {
diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
index f9c41bf59efc..2e18b77a817f 100644
--- a/include/uapi/rdma/rdma_netlink.h
+++ b/include/uapi/rdma/rdma_netlink.h
@@ -283,6 +283,9 @@ enum rdma_nldev_attr {
/*
* Device and port capabilities
+ *
+ * When used for port info, first 32-bits are CapabilityMask followed by
+ * 16-bit CapabilityMask2.
*/
RDMA_NLDEV_ATTR_CAP_FLAGS, /* u64 */
diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c
index d1a5d885ce13..73e02ea5d5d1 100644
--- a/init/do_mounts_initrd.c
+++ b/init/do_mounts_initrd.c
@@ -53,9 +53,6 @@ static void __init handle_initrd(void)
ksys_mkdir("/old", 0700);
ksys_chdir("/old");
- /* try loading default modules from initrd */
- load_default_modules();
-
/*
* In case that a resume from disk is carried out by linuxrc or one of
* its children, we need to tell the freezer not to wait for us.
diff --git a/init/initramfs.c b/init/initramfs.c
index f6f4a1e4cd54..fca899622937 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -646,12 +646,6 @@ static int __init populate_rootfs(void)
#endif
}
flush_delayed_fput();
- /*
- * Try loading default modules from initramfs. This gives
- * us a chance to load before device_initcalls.
- */
- load_default_modules();
-
return 0;
}
rootfs_initcall(populate_rootfs);
diff --git a/init/main.c b/init/main.c
index f6e901ec6b78..86d894852bef 100644
--- a/init/main.c
+++ b/init/main.c
@@ -992,17 +992,6 @@ static void __init do_pre_smp_initcalls(void)
do_one_initcall(initcall_from_entry(fn));
}
-/*
- * This function requests modules which should be loaded by default and is
- * called twice right after initrd is mounted and right before init is
- * exec'd. If such modules are on either initrd or rootfs, they will be
- * loaded before control is passed to userland.
- */
-void __init load_default_modules(void)
-{
- load_default_elevator_module();
-}
-
static int run_init_process(const char *init_filename)
{
argv_init[0] = init_filename;
@@ -1176,5 +1165,4 @@ static noinline void __init kernel_init_freeable(void)
*/
integrity_load_keys();
- load_default_modules();
}
diff --git a/kernel/Makefile b/kernel/Makefile
index 9dc7f519129d..cde93d54c571 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -49,9 +49,6 @@ obj-$(CONFIG_PROFILING) += profile.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-y += time/
obj-$(CONFIG_FUTEX) += futex.o
-ifeq ($(CONFIG_COMPAT),y)
-obj-$(CONFIG_FUTEX) += futex_compat.o
-endif
obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
obj-$(CONFIG_SMP) += smp.o
ifneq ($(CONFIG_SMP),y)
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 7a8429f8e280..39eb36ba36ad 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -493,7 +493,7 @@ static struct cgroup_subsys_state *cgroup_tryget_css(struct cgroup *cgrp,
}
/**
- * cgroup_e_css - obtain a cgroup's effective css for the specified subsystem
+ * cgroup_e_css_by_mask - obtain a cgroup's effective css for the specified ss
* @cgrp: the cgroup of interest
* @ss: the subsystem of interest (%NULL returns @cgrp->self)
*
@@ -502,8 +502,8 @@ static struct cgroup_subsys_state *cgroup_tryget_css(struct cgroup *cgrp,
* enabled. If @ss is associated with the hierarchy @cgrp is on, this
* function is guaranteed to return non-NULL css.
*/
-static struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
- struct cgroup_subsys *ss)
+static struct cgroup_subsys_state *cgroup_e_css_by_mask(struct cgroup *cgrp,
+ struct cgroup_subsys *ss)
{
lockdep_assert_held(&cgroup_mutex);
@@ -524,6 +524,35 @@ static struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
}
/**
+ * cgroup_e_css - obtain a cgroup's effective css for the specified subsystem
+ * @cgrp: the cgroup of interest
+ * @ss: the subsystem of interest
+ *
+ * Find and get the effective css of @cgrp for @ss. The effective css is
+ * defined as the matching css of the nearest ancestor including self which
+ * has @ss enabled. If @ss is not mounted on the hierarchy @cgrp is on,
+ * the root css is returned, so this function always returns a valid css.
+ *
+ * The returned css is not guaranteed to be online, and therefore it is the
+ * callers responsiblity to tryget a reference for it.
+ */
+struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
+ struct cgroup_subsys *ss)
+{
+ struct cgroup_subsys_state *css;
+
+ do {
+ css = cgroup_css(cgrp, ss);
+
+ if (css)
+ return css;
+ cgrp = cgroup_parent(cgrp);
+ } while (cgrp);
+
+ return init_css_set.subsys[ss->id];
+}
+
+/**
* cgroup_get_e_css - get a cgroup's effective css for the specified subsystem
* @cgrp: the cgroup of interest
* @ss: the subsystem of interest
@@ -605,10 +634,11 @@ EXPORT_SYMBOL_GPL(of_css);
*
* Should be called under cgroup_[tree_]mutex.
*/
-#define for_each_e_css(css, ssid, cgrp) \
- for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++) \
- if (!((css) = cgroup_e_css(cgrp, cgroup_subsys[(ssid)]))) \
- ; \
+#define for_each_e_css(css, ssid, cgrp) \
+ for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++) \
+ if (!((css) = cgroup_e_css_by_mask(cgrp, \
+ cgroup_subsys[(ssid)]))) \
+ ; \
else
/**
@@ -1007,7 +1037,7 @@ static struct css_set *find_existing_css_set(struct css_set *old_cset,
* @ss is in this hierarchy, so we want the
* effective css from @cgrp.
*/
- template[i] = cgroup_e_css(cgrp, ss);
+ template[i] = cgroup_e_css_by_mask(cgrp, ss);
} else {
/*
* @ss is not in this hierarchy, so we don't want
@@ -3024,7 +3054,7 @@ static int cgroup_apply_control(struct cgroup *cgrp)
return ret;
/*
- * At this point, cgroup_e_css() results reflect the new csses
+ * At this point, cgroup_e_css_by_mask() results reflect the new csses
* making the following cgroup_update_dfl_csses() properly update
* css associations of all tasks in the subtree.
*/
diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
index 645c7a2ecde8..ca88b867e7fe 100644
--- a/kernel/dma/Kconfig
+++ b/kernel/dma/Kconfig
@@ -35,13 +35,8 @@ config ARCH_HAS_DMA_COHERENT_TO_PFN
config ARCH_HAS_DMA_MMAP_PGPROT
bool
-config DMA_DIRECT_OPS
- bool
- depends on HAS_DMA
-
config DMA_NONCOHERENT_CACHE_SYNC
bool
- depends on DMA_DIRECT_OPS
config DMA_VIRT_OPS
bool
@@ -49,5 +44,12 @@ config DMA_VIRT_OPS
config SWIOTLB
bool
- select DMA_DIRECT_OPS
select NEED_DMA_MAP_STATE
+
+config DMA_REMAP
+ depends on MMU
+ bool
+
+config DMA_DIRECT_REMAP
+ bool
+ select DMA_REMAP
diff --git a/kernel/dma/Makefile b/kernel/dma/Makefile
index 7d581e4eea4a..72ff6e46aa86 100644
--- a/kernel/dma/Makefile
+++ b/kernel/dma/Makefile
@@ -1,10 +1,9 @@
# SPDX-License-Identifier: GPL-2.0
-obj-$(CONFIG_HAS_DMA) += mapping.o
+obj-$(CONFIG_HAS_DMA) += mapping.o direct.o dummy.o
obj-$(CONFIG_DMA_CMA) += contiguous.o
obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += coherent.o
-obj-$(CONFIG_DMA_DIRECT_OPS) += direct.o
obj-$(CONFIG_DMA_VIRT_OPS) += virt.o
obj-$(CONFIG_DMA_API_DEBUG) += debug.o
obj-$(CONFIG_SWIOTLB) += swiotlb.o
-
+obj-$(CONFIG_DMA_REMAP) += remap.o
diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c
index 231ca4628062..164706da2a73 100644
--- a/kernel/dma/debug.c
+++ b/kernel/dma/debug.c
@@ -17,6 +17,8 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
+#define pr_fmt(fmt) "DMA-API: " fmt
+
#include <linux/sched/task_stack.h>
#include <linux/scatterlist.h>
#include <linux/dma-mapping.h>
@@ -41,10 +43,9 @@
#define HASH_FN_SHIFT 13
#define HASH_FN_MASK (HASH_SIZE - 1)
-/* allow architectures to override this if absolutely required */
-#ifndef PREALLOC_DMA_DEBUG_ENTRIES
#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16)
-#endif
+/* If the pool runs out, add this many new entries at once */
+#define DMA_DEBUG_DYNAMIC_ENTRIES (PAGE_SIZE / sizeof(struct dma_debug_entry))
enum {
dma_debug_single,
@@ -142,6 +143,7 @@ static struct dentry *show_all_errors_dent __read_mostly;
static struct dentry *show_num_errors_dent __read_mostly;
static struct dentry *num_free_entries_dent __read_mostly;
static struct dentry *min_free_entries_dent __read_mostly;
+static struct dentry *nr_total_entries_dent __read_mostly;
static struct dentry *filter_dent __read_mostly;
/* per-driver filter related state */
@@ -234,7 +236,7 @@ static bool driver_filter(struct device *dev)
error_count += 1; \
if (driver_filter(dev) && \
(show_all_errors || show_num_errors > 0)) { \
- WARN(1, "%s %s: " format, \
+ WARN(1, pr_fmt("%s %s: ") format, \
dev ? dev_driver_string(dev) : "NULL", \
dev ? dev_name(dev) : "NULL", ## arg); \
dump_entry_trace(entry); \
@@ -519,7 +521,7 @@ static void active_cacheline_inc_overlap(phys_addr_t cln)
* prematurely.
*/
WARN_ONCE(overlap > ACTIVE_CACHELINE_MAX_OVERLAP,
- "DMA-API: exceeded %d overlapping mappings of cacheline %pa\n",
+ pr_fmt("exceeded %d overlapping mappings of cacheline %pa\n"),
ACTIVE_CACHELINE_MAX_OVERLAP, &cln);
}
@@ -614,7 +616,7 @@ void debug_dma_assert_idle(struct page *page)
cln = to_cacheline_number(entry);
err_printk(entry->dev, entry,
- "DMA-API: cpu touching an active dma mapped cacheline [cln=%pa]\n",
+ "cpu touching an active dma mapped cacheline [cln=%pa]\n",
&cln);
}
@@ -634,7 +636,7 @@ static void add_dma_entry(struct dma_debug_entry *entry)
rc = active_cacheline_insert(entry);
if (rc == -ENOMEM) {
- pr_err("DMA-API: cacheline tracking ENOMEM, dma-debug disabled\n");
+ pr_err("cacheline tracking ENOMEM, dma-debug disabled\n");
global_disable = true;
}
@@ -643,6 +645,24 @@ static void add_dma_entry(struct dma_debug_entry *entry)
*/
}
+static int dma_debug_create_entries(gfp_t gfp)
+{
+ struct dma_debug_entry *entry;
+ int i;
+
+ entry = (void *)get_zeroed_page(gfp);
+ if (!entry)
+ return -ENOMEM;
+
+ for (i = 0; i < DMA_DEBUG_DYNAMIC_ENTRIES; i++)
+ list_add_tail(&entry[i].list, &free_entries);
+
+ num_free_entries += DMA_DEBUG_DYNAMIC_ENTRIES;
+ nr_total_entries += DMA_DEBUG_DYNAMIC_ENTRIES;
+
+ return 0;
+}
+
static struct dma_debug_entry *__dma_entry_alloc(void)
{
struct dma_debug_entry *entry;
@@ -658,6 +678,18 @@ static struct dma_debug_entry *__dma_entry_alloc(void)
return entry;
}
+void __dma_entry_alloc_check_leak(void)
+{
+ u32 tmp = nr_total_entries % nr_prealloc_entries;
+
+ /* Shout each time we tick over some multiple of the initial pool */
+ if (tmp < DMA_DEBUG_DYNAMIC_ENTRIES) {
+ pr_info("dma_debug_entry pool grown to %u (%u00%%)\n",
+ nr_total_entries,
+ (nr_total_entries / nr_prealloc_entries));
+ }
+}
+
/* struct dma_entry allocator
*
* The next two functions implement the allocator for
@@ -669,12 +701,14 @@ static struct dma_debug_entry *dma_entry_alloc(void)
unsigned long flags;
spin_lock_irqsave(&free_entries_lock, flags);
-
- if (list_empty(&free_entries)) {
- global_disable = true;
- spin_unlock_irqrestore(&free_entries_lock, flags);
- pr_err("DMA-API: debugging out of memory - disabling\n");
- return NULL;
+ if (num_free_entries == 0) {
+ if (dma_debug_create_entries(GFP_ATOMIC)) {
+ global_disable = true;
+ spin_unlock_irqrestore(&free_entries_lock, flags);
+ pr_err("debugging out of memory - disabling\n");
+ return NULL;
+ }
+ __dma_entry_alloc_check_leak();
}
entry = __dma_entry_alloc();
@@ -707,52 +741,6 @@ static void dma_entry_free(struct dma_debug_entry *entry)
spin_unlock_irqrestore(&free_entries_lock, flags);
}
-int dma_debug_resize_entries(u32 num_entries)
-{
- int i, delta, ret = 0;
- unsigned long flags;
- struct dma_debug_entry *entry;
- LIST_HEAD(tmp);
-
- spin_lock_irqsave(&free_entries_lock, flags);
-
- if (nr_total_entries < num_entries) {
- delta = num_entries - nr_total_entries;
-
- spin_unlock_irqrestore(&free_entries_lock, flags);
-
- for (i = 0; i < delta; i++) {
- entry = kzalloc(sizeof(*entry), GFP_KERNEL);
- if (!entry)
- break;
-
- list_add_tail(&entry->list, &tmp);
- }
-
- spin_lock_irqsave(&free_entries_lock, flags);
-
- list_splice(&tmp, &free_entries);
- nr_total_entries += i;
- num_free_entries += i;
- } else {
- delta = nr_total_entries - num_entries;
-
- for (i = 0; i < delta && !list_empty(&free_entries); i++) {
- entry = __dma_entry_alloc();
- kfree(entry);
- }
-
- nr_total_entries -= i;
- }
-
- if (nr_total_entries != num_entries)
- ret = 1;
-
- spin_unlock_irqrestore(&free_entries_lock, flags);
-
- return ret;
-}
-
/*
* DMA-API debugging init code
*
@@ -761,36 +749,6 @@ int dma_debug_resize_entries(u32 num_entries)
* 2. Preallocate a given number of dma_debug_entry structs
*/
-static int prealloc_memory(u32 num_entries)
-{
- struct dma_debug_entry *entry, *next_entry;
- int i;
-
- for (i = 0; i < num_entries; ++i) {
- entry = kzalloc(sizeof(*entry), GFP_KERNEL);
- if (!entry)
- goto out_err;
-
- list_add_tail(&entry->list, &free_entries);
- }
-
- num_free_entries = num_entries;
- min_free_entries = num_entries;
-
- pr_info("DMA-API: preallocated %d debug entries\n", num_entries);
-
- return 0;
-
-out_err:
-
- list_for_each_entry_safe(entry, next_entry, &free_entries, list) {
- list_del(&entry->list);
- kfree(entry);
- }
-
- return -ENOMEM;
-}
-
static ssize_t filter_read(struct file *file, char __user *user_buf,
size_t count, loff_t *ppos)
{
@@ -850,7 +808,7 @@ static ssize_t filter_write(struct file *file, const char __user *userbuf,
* switched off.
*/
if (current_driver_name[0])
- pr_info("DMA-API: switching off dma-debug driver filter\n");
+ pr_info("switching off dma-debug driver filter\n");
current_driver_name[0] = 0;
current_driver = NULL;
goto out_unlock;
@@ -868,7 +826,7 @@ static ssize_t filter_write(struct file *file, const char __user *userbuf,
current_driver_name[i] = 0;
current_driver = NULL;
- pr_info("DMA-API: enable driver filter for driver [%s]\n",
+ pr_info("enable driver filter for driver [%s]\n",
current_driver_name);
out_unlock:
@@ -887,7 +845,7 @@ static int dma_debug_fs_init(void)
{
dma_debug_dent = debugfs_create_dir("dma-api", NULL);
if (!dma_debug_dent) {
- pr_err("DMA-API: can not create debugfs directory\n");
+ pr_err("can not create debugfs directory\n");
return -ENOMEM;
}
@@ -926,6 +884,12 @@ static int dma_debug_fs_init(void)
if (!min_free_entries_dent)
goto out_err;
+ nr_total_entries_dent = debugfs_create_u32("nr_total_entries", 0444,
+ dma_debug_dent,
+ &nr_total_entries);
+ if (!nr_total_entries_dent)
+ goto out_err;
+
filter_dent = debugfs_create_file("driver_filter", 0644,
dma_debug_dent, NULL, &filter_fops);
if (!filter_dent)
@@ -973,7 +937,7 @@ static int dma_debug_device_change(struct notifier_block *nb, unsigned long acti
count = device_dma_allocations(dev, &entry);
if (count == 0)
break;
- err_printk(dev, entry, "DMA-API: device driver has pending "
+ err_printk(dev, entry, "device driver has pending "
"DMA allocations while released from device "
"[count=%d]\n"
"One of leaked entries details: "
@@ -1009,7 +973,7 @@ void dma_debug_add_bus(struct bus_type *bus)
static int dma_debug_init(void)
{
- int i;
+ int i, nr_pages;
/* Do not use dma_debug_initialized here, since we really want to be
* called to set dma_debug_initialized
@@ -1023,24 +987,31 @@ static int dma_debug_init(void)
}
if (dma_debug_fs_init() != 0) {
- pr_err("DMA-API: error creating debugfs entries - disabling\n");
+ pr_err("error creating debugfs entries - disabling\n");
global_disable = true;
return 0;
}
- if (prealloc_memory(nr_prealloc_entries) != 0) {
- pr_err("DMA-API: debugging out of memory error - disabled\n");
+ nr_pages = DIV_ROUND_UP(nr_prealloc_entries, DMA_DEBUG_DYNAMIC_ENTRIES);
+ for (i = 0; i < nr_pages; ++i)
+ dma_debug_create_entries(GFP_KERNEL);
+ if (num_free_entries >= nr_prealloc_entries) {
+ pr_info("preallocated %d debug entries\n", nr_total_entries);
+ } else if (num_free_entries > 0) {
+ pr_warn("%d debug entries requested but only %d allocated\n",
+ nr_prealloc_entries, nr_total_entries);
+ } else {
+ pr_err("debugging out of memory error - disabled\n");
global_disable = true;
return 0;
}
-
- nr_total_entries = num_free_entries;
+ min_free_entries = num_free_entries;
dma_debug_initialized = true;
- pr_info("DMA-API: debugging enabled by kernel config\n");
+ pr_info("debugging enabled by kernel config\n");
return 0;
}
core_initcall(dma_debug_init);
@@ -1051,7 +1022,7 @@ static __init int dma_debug_cmdline(char *str)
return -EINVAL;
if (strncmp(str, "off", 3) == 0) {
- pr_info("DMA-API: debugging disabled on kernel command line\n");
+ pr_info("debugging disabled on kernel command line\n");
global_disable = true;
}
@@ -1085,11 +1056,11 @@ static void check_unmap(struct dma_debug_entry *ref)
if (dma_mapping_error(ref->dev, ref->dev_addr)) {
err_printk(ref->dev, NULL,
- "DMA-API: device driver tries to free an "
+ "device driver tries to free an "
"invalid DMA memory address\n");
} else {
err_printk(ref->dev, NULL,
- "DMA-API: device driver tries to free DMA "
+ "device driver tries to free DMA "
"memory it has not allocated [device "
"address=0x%016llx] [size=%llu bytes]\n",
ref->dev_addr, ref->size);
@@ -1098,7 +1069,7 @@ static void check_unmap(struct dma_debug_entry *ref)
}
if (ref->size != entry->size) {
- err_printk(ref->dev, entry, "DMA-API: device driver frees "
+ err_printk(ref->dev, entry, "device driver frees "
"DMA memory with different size "
"[device address=0x%016llx] [map size=%llu bytes] "
"[unmap size=%llu bytes]\n",
@@ -1106,7 +1077,7 @@ static void check_unmap(struct dma_debug_entry *ref)
}
if (ref->type != entry->type) {
- err_printk(ref->dev, entry, "DMA-API: device driver frees "
+ err_printk(ref->dev, entry, "device driver frees "
"DMA memory with wrong function "
"[device address=0x%016llx] [size=%llu bytes] "
"[mapped as %s] [unmapped as %s]\n",
@@ -1114,7 +1085,7 @@ static void check_unmap(struct dma_debug_entry *ref)
type2name[entry->type], type2name[ref->type]);
} else if ((entry->type == dma_debug_coherent) &&
(phys_addr(ref) != phys_addr(entry))) {
- err_printk(ref->dev, entry, "DMA-API: device driver frees "
+ err_printk(ref->dev, entry, "device driver frees "
"DMA memory with different CPU address "
"[device address=0x%016llx] [size=%llu bytes] "
"[cpu alloc address=0x%016llx] "
@@ -1126,7 +1097,7 @@ static void check_unmap(struct dma_debug_entry *ref)
if (ref->sg_call_ents && ref->type == dma_debug_sg &&
ref->sg_call_ents != entry->sg_call_ents) {
- err_printk(ref->dev, entry, "DMA-API: device driver frees "
+ err_printk(ref->dev, entry, "device driver frees "
"DMA sg list with different entry count "
"[map count=%d] [unmap count=%d]\n",
entry->sg_call_ents, ref->sg_call_ents);
@@ -1137,7 +1108,7 @@ static void check_unmap(struct dma_debug_entry *ref)
* DMA API don't handle this properly, so check for it here
*/
if (ref->direction != entry->direction) {
- err_printk(ref->dev, entry, "DMA-API: device driver frees "
+ err_printk(ref->dev, entry, "device driver frees "
"DMA memory with different direction "
"[device address=0x%016llx] [size=%llu bytes] "
"[mapped with %s] [unmapped with %s]\n",
@@ -1153,7 +1124,7 @@ static void check_unmap(struct dma_debug_entry *ref)
*/
if (entry->map_err_type == MAP_ERR_NOT_CHECKED) {
err_printk(ref->dev, entry,
- "DMA-API: device driver failed to check map error"
+ "device driver failed to check map error"
"[device address=0x%016llx] [size=%llu bytes] "
"[mapped as %s]",
ref->dev_addr, ref->size,
@@ -1178,7 +1149,7 @@ static void check_for_stack(struct device *dev,
return;
addr = page_address(page) + offset;
if (object_is_on_stack(addr))
- err_printk(dev, NULL, "DMA-API: device driver maps memory from stack [addr=%p]\n", addr);
+ err_printk(dev, NULL, "device driver maps memory from stack [addr=%p]\n", addr);
} else {
/* Stack is vmalloced. */
int i;
@@ -1188,7 +1159,7 @@ static void check_for_stack(struct device *dev,
continue;
addr = (u8 *)current->stack + i * PAGE_SIZE + offset;
- err_printk(dev, NULL, "DMA-API: device driver maps memory from stack [probable addr=%p]\n", addr);
+ err_printk(dev, NULL, "device driver maps memory from stack [probable addr=%p]\n", addr);
break;
}
}
@@ -1208,7 +1179,7 @@ static void check_for_illegal_area(struct device *dev, void *addr, unsigned long
{
if (overlap(addr, len, _stext, _etext) ||
overlap(addr, len, __start_rodata, __end_rodata))
- err_printk(dev, NULL, "DMA-API: device driver maps memory from kernel text or rodata [addr=%p] [len=%lu]\n", addr, len);
+ err_printk(dev, NULL, "device driver maps memory from kernel text or rodata [addr=%p] [len=%lu]\n", addr, len);
}
static void check_sync(struct device *dev,
@@ -1224,7 +1195,7 @@ static void check_sync(struct device *dev,
entry = bucket_find_contain(&bucket, ref, &flags);
if (!entry) {
- err_printk(dev, NULL, "DMA-API: device driver tries "
+ err_printk(dev, NULL, "device driver tries "
"to sync DMA memory it has not allocated "
"[device address=0x%016llx] [size=%llu bytes]\n",
(unsigned long long)ref->dev_addr, ref->size);
@@ -1232,7 +1203,7 @@ static void check_sync(struct device *dev,
}
if (ref->size > entry->size) {
- err_printk(dev, entry, "DMA-API: device driver syncs"
+ err_printk(dev, entry, "device driver syncs"
" DMA memory outside allocated range "
"[device address=0x%016llx] "
"[allocation size=%llu bytes] "
@@ -1245,7 +1216,7 @@ static void check_sync(struct device *dev,
goto out;
if (ref->direction != entry->direction) {
- err_printk(dev, entry, "DMA-API: device driver syncs "
+ err_printk(dev, entry, "device driver syncs "
"DMA memory with different direction "
"[device address=0x%016llx] [size=%llu bytes] "
"[mapped with %s] [synced with %s]\n",
@@ -1256,7 +1227,7 @@ static void check_sync(struct device *dev,
if (to_cpu && !(entry->direction == DMA_FROM_DEVICE) &&
!(ref->direction == DMA_TO_DEVICE))
- err_printk(dev, entry, "DMA-API: device driver syncs "
+ err_printk(dev, entry, "device driver syncs "
"device read-only DMA memory for cpu "
"[device address=0x%016llx] [size=%llu bytes] "
"[mapped with %s] [synced with %s]\n",
@@ -1266,7 +1237,7 @@ static void check_sync(struct device *dev,
if (!to_cpu && !(entry->direction == DMA_TO_DEVICE) &&
!(ref->direction == DMA_FROM_DEVICE))
- err_printk(dev, entry, "DMA-API: device driver syncs "
+ err_printk(dev, entry, "device driver syncs "
"device write-only DMA memory to device "
"[device address=0x%016llx] [size=%llu bytes] "
"[mapped with %s] [synced with %s]\n",
@@ -1276,7 +1247,7 @@ static void check_sync(struct device *dev,
if (ref->sg_call_ents && ref->type == dma_debug_sg &&
ref->sg_call_ents != entry->sg_call_ents) {
- err_printk(ref->dev, entry, "DMA-API: device driver syncs "
+ err_printk(ref->dev, entry, "device driver syncs "
"DMA sg list with different entry count "
"[map count=%d] [sync count=%d]\n",
entry->sg_call_ents, ref->sg_call_ents);
@@ -1297,7 +1268,7 @@ static void check_sg_segment(struct device *dev, struct scatterlist *sg)
* whoever generated the list forgot to check them.
*/
if (sg->length > max_seg)
- err_printk(dev, NULL, "DMA-API: mapping sg segment longer than device claims to support [len=%u] [max=%u]\n",
+ err_printk(dev, NULL, "mapping sg segment longer than device claims to support [len=%u] [max=%u]\n",
sg->length, max_seg);
/*
* In some cases this could potentially be the DMA API
@@ -1307,7 +1278,7 @@ static void check_sg_segment(struct device *dev, struct scatterlist *sg)
start = sg_dma_address(sg);
end = start + sg_dma_len(sg) - 1;
if ((start ^ end) & ~boundary)
- err_printk(dev, NULL, "DMA-API: mapping sg segment across boundary [start=0x%016llx] [end=0x%016llx] [boundary=0x%016llx]\n",
+ err_printk(dev, NULL, "mapping sg segment across boundary [start=0x%016llx] [end=0x%016llx] [boundary=0x%016llx]\n",
start, end, boundary);
#endif
}
@@ -1319,11 +1290,11 @@ void debug_dma_map_single(struct device *dev, const void *addr,
return;
if (!virt_addr_valid(addr))
- err_printk(dev, NULL, "DMA-API: device driver maps memory from invalid area [addr=%p] [len=%lu]\n",
+ err_printk(dev, NULL, "device driver maps memory from invalid area [addr=%p] [len=%lu]\n",
addr, len);
if (is_vmalloc_addr(addr))
- err_printk(dev, NULL, "DMA-API: device driver maps memory from vmalloc area [addr=%p] [len=%lu]\n",
+ err_printk(dev, NULL, "device driver maps memory from vmalloc area [addr=%p] [len=%lu]\n",
addr, len);
}
EXPORT_SYMBOL(debug_dma_map_single);
@@ -1662,48 +1633,6 @@ void debug_dma_sync_single_for_device(struct device *dev,
}
EXPORT_SYMBOL(debug_dma_sync_single_for_device);
-void debug_dma_sync_single_range_for_cpu(struct device *dev,
- dma_addr_t dma_handle,
- unsigned long offset, size_t size,
- int direction)
-{
- struct dma_debug_entry ref;
-
- if (unlikely(dma_debug_disabled()))
- return;
-
- ref.type = dma_debug_single;
- ref.dev = dev;
- ref.dev_addr = dma_handle;
- ref.size = offset + size;
- ref.direction = direction;
- ref.sg_call_ents = 0;
-
- check_sync(dev, &ref, true);
-}
-EXPORT_SYMBOL(debug_dma_sync_single_range_for_cpu);
-
-void debug_dma_sync_single_range_for_device(struct device *dev,
- dma_addr_t dma_handle,
- unsigned long offset,
- size_t size, int direction)
-{
- struct dma_debug_entry ref;
-
- if (unlikely(dma_debug_disabled()))
- return;
-
- ref.type = dma_debug_single;
- ref.dev = dev;
- ref.dev_addr = dma_handle;
- ref.size = offset + size;
- ref.direction = direction;
- ref.sg_call_ents = 0;
-
- check_sync(dev, &ref, false);
-}
-EXPORT_SYMBOL(debug_dma_sync_single_range_for_device);
-
void debug_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
int nelems, int direction)
{
@@ -1780,7 +1709,7 @@ static int __init dma_debug_driver_setup(char *str)
}
if (current_driver_name[0])
- pr_info("DMA-API: enable driver filter for driver [%s]\n",
+ pr_info("enable driver filter for driver [%s]\n",
current_driver_name);
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 375c77e8d52f..355d16acee6d 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -13,6 +13,7 @@
#include <linux/dma-noncoherent.h>
#include <linux/pfn.h>
#include <linux/set_memory.h>
+#include <linux/swiotlb.h>
/*
* Most architectures use ZONE_DMA for the first 16 Megabytes, but
@@ -30,27 +31,16 @@ static inline bool force_dma_unencrypted(void)
return sev_active();
}
-static bool
-check_addr(struct device *dev, dma_addr_t dma_addr, size_t size,
- const char *caller)
+static void report_addr(struct device *dev, dma_addr_t dma_addr, size_t size)
{
- if (unlikely(dev && !dma_capable(dev, dma_addr, size))) {
- if (!dev->dma_mask) {
- dev_err(dev,
- "%s: call on device without dma_mask\n",
- caller);
- return false;
- }
-
- if (*dev->dma_mask >= DMA_BIT_MASK(32) || dev->bus_dma_mask) {
- dev_err(dev,
- "%s: overflow %pad+%zu of device mask %llx bus mask %llx\n",
- caller, &dma_addr, size,
- *dev->dma_mask, dev->bus_dma_mask);
- }
- return false;
+ if (!dev->dma_mask) {
+ dev_err_once(dev, "DMA map on device without dma_mask\n");
+ } else if (*dev->dma_mask >= DMA_BIT_MASK(32) || dev->bus_dma_mask) {
+ dev_err_once(dev,
+ "overflow %pad+%zu of DMA mask %llx bus mask %llx\n",
+ &dma_addr, size, *dev->dma_mask, dev->bus_dma_mask);
}
- return true;
+ WARN_ON_ONCE(1);
}
static inline dma_addr_t phys_to_dma_direct(struct device *dev,
@@ -103,14 +93,13 @@ static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size)
min_not_zero(dev->coherent_dma_mask, dev->bus_dma_mask);
}
-void *dma_direct_alloc_pages(struct device *dev, size_t size,
+struct page *__dma_direct_alloc_pages(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
{
unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
int page_order = get_order(size);
struct page *page = NULL;
u64 phys_mask;
- void *ret;
if (attrs & DMA_ATTR_NO_WARN)
gfp |= __GFP_NOWARN;
@@ -150,11 +139,34 @@ again:
}
}
+ return page;
+}
+
+void *dma_direct_alloc_pages(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
+{
+ struct page *page;
+ void *ret;
+
+ page = __dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs);
if (!page)
return NULL;
+
+ if (PageHighMem(page)) {
+ /*
+ * Depending on the cma= arguments and per-arch setup
+ * dma_alloc_from_contiguous could return highmem pages.
+ * Without remapping there is no way to return them here,
+ * so log an error and fail.
+ */
+ dev_info(dev, "Rejecting highmem page from CMA.\n");
+ __dma_direct_free_pages(dev, size, page);
+ return NULL;
+ }
+
ret = page_address(page);
if (force_dma_unencrypted()) {
- set_memory_decrypted((unsigned long)ret, 1 << page_order);
+ set_memory_decrypted((unsigned long)ret, 1 << get_order(size));
*dma_handle = __phys_to_dma(dev, page_to_phys(page));
} else {
*dma_handle = phys_to_dma(dev, page_to_phys(page));
@@ -163,20 +175,22 @@ again:
return ret;
}
-/*
- * NOTE: this function must never look at the dma_addr argument, because we want
- * to be able to use it as a helper for iommu implementations as well.
- */
+void __dma_direct_free_pages(struct device *dev, size_t size, struct page *page)
+{
+ unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+
+ if (!dma_release_from_contiguous(dev, page, count))
+ __free_pages(page, get_order(size));
+}
+
void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr,
dma_addr_t dma_addr, unsigned long attrs)
{
- unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
unsigned int page_order = get_order(size);
if (force_dma_unencrypted())
set_memory_encrypted((unsigned long)cpu_addr, 1 << page_order);
- if (!dma_release_from_contiguous(dev, virt_to_page(cpu_addr), count))
- free_pages((unsigned long)cpu_addr, page_order);
+ __dma_direct_free_pages(dev, size, virt_to_page(cpu_addr));
}
void *dma_direct_alloc(struct device *dev, size_t size,
@@ -196,67 +210,111 @@ void dma_direct_free(struct device *dev, size_t size,
dma_direct_free_pages(dev, size, cpu_addr, dma_addr, attrs);
}
-static void dma_direct_sync_single_for_device(struct device *dev,
+#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \
+ defined(CONFIG_SWIOTLB)
+void dma_direct_sync_single_for_device(struct device *dev,
dma_addr_t addr, size_t size, enum dma_data_direction dir)
{
- if (dev_is_dma_coherent(dev))
- return;
- arch_sync_dma_for_device(dev, dma_to_phys(dev, addr), size, dir);
+ phys_addr_t paddr = dma_to_phys(dev, addr);
+
+ if (unlikely(is_swiotlb_buffer(paddr)))
+ swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_DEVICE);
+
+ if (!dev_is_dma_coherent(dev))
+ arch_sync_dma_for_device(dev, paddr, size, dir);
}
+EXPORT_SYMBOL(dma_direct_sync_single_for_device);
-static void dma_direct_sync_sg_for_device(struct device *dev,
+void dma_direct_sync_sg_for_device(struct device *dev,
struct scatterlist *sgl, int nents, enum dma_data_direction dir)
{
struct scatterlist *sg;
int i;
- if (dev_is_dma_coherent(dev))
- return;
+ for_each_sg(sgl, sg, nents, i) {
+ if (unlikely(is_swiotlb_buffer(sg_phys(sg))))
+ swiotlb_tbl_sync_single(dev, sg_phys(sg), sg->length,
+ dir, SYNC_FOR_DEVICE);
- for_each_sg(sgl, sg, nents, i)
- arch_sync_dma_for_device(dev, sg_phys(sg), sg->length, dir);
+ if (!dev_is_dma_coherent(dev))
+ arch_sync_dma_for_device(dev, sg_phys(sg), sg->length,
+ dir);
+ }
}
+EXPORT_SYMBOL(dma_direct_sync_sg_for_device);
+#endif
#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \
- defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL)
-static void dma_direct_sync_single_for_cpu(struct device *dev,
+ defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) || \
+ defined(CONFIG_SWIOTLB)
+void dma_direct_sync_single_for_cpu(struct device *dev,
dma_addr_t addr, size_t size, enum dma_data_direction dir)
{
- if (dev_is_dma_coherent(dev))
- return;
- arch_sync_dma_for_cpu(dev, dma_to_phys(dev, addr), size, dir);
- arch_sync_dma_for_cpu_all(dev);
+ phys_addr_t paddr = dma_to_phys(dev, addr);
+
+ if (!dev_is_dma_coherent(dev)) {
+ arch_sync_dma_for_cpu(dev, paddr, size, dir);
+ arch_sync_dma_for_cpu_all(dev);
+ }
+
+ if (unlikely(is_swiotlb_buffer(paddr)))
+ swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_CPU);
}
+EXPORT_SYMBOL(dma_direct_sync_single_for_cpu);
-static void dma_direct_sync_sg_for_cpu(struct device *dev,
+void dma_direct_sync_sg_for_cpu(struct device *dev,
struct scatterlist *sgl, int nents, enum dma_data_direction dir)
{
struct scatterlist *sg;
int i;
- if (dev_is_dma_coherent(dev))
- return;
+ for_each_sg(sgl, sg, nents, i) {
+ if (!dev_is_dma_coherent(dev))
+ arch_sync_dma_for_cpu(dev, sg_phys(sg), sg->length, dir);
+
+ if (unlikely(is_swiotlb_buffer(sg_phys(sg))))
+ swiotlb_tbl_sync_single(dev, sg_phys(sg), sg->length, dir,
+ SYNC_FOR_CPU);
+ }
- for_each_sg(sgl, sg, nents, i)
- arch_sync_dma_for_cpu(dev, sg_phys(sg), sg->length, dir);
- arch_sync_dma_for_cpu_all(dev);
+ if (!dev_is_dma_coherent(dev))
+ arch_sync_dma_for_cpu_all(dev);
}
+EXPORT_SYMBOL(dma_direct_sync_sg_for_cpu);
-static void dma_direct_unmap_page(struct device *dev, dma_addr_t addr,
+void dma_direct_unmap_page(struct device *dev, dma_addr_t addr,
size_t size, enum dma_data_direction dir, unsigned long attrs)
{
+ phys_addr_t phys = dma_to_phys(dev, addr);
+
if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
dma_direct_sync_single_for_cpu(dev, addr, size, dir);
+
+ if (unlikely(is_swiotlb_buffer(phys)))
+ swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
}
+EXPORT_SYMBOL(dma_direct_unmap_page);
-static void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl,
+void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl,
int nents, enum dma_data_direction dir, unsigned long attrs)
{
- if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
- dma_direct_sync_sg_for_cpu(dev, sgl, nents, dir);
+ struct scatterlist *sg;
+ int i;
+
+ for_each_sg(sgl, sg, nents, i)
+ dma_direct_unmap_page(dev, sg->dma_address, sg_dma_len(sg), dir,
+ attrs);
}
+EXPORT_SYMBOL(dma_direct_unmap_sg);
#endif
+static inline bool dma_direct_possible(struct device *dev, dma_addr_t dma_addr,
+ size_t size)
+{
+ return swiotlb_force != SWIOTLB_FORCE &&
+ (!dev || dma_capable(dev, dma_addr, size));
+}
+
dma_addr_t dma_direct_map_page(struct device *dev, struct page *page,
unsigned long offset, size_t size, enum dma_data_direction dir,
unsigned long attrs)
@@ -264,13 +322,17 @@ dma_addr_t dma_direct_map_page(struct device *dev, struct page *page,
phys_addr_t phys = page_to_phys(page) + offset;
dma_addr_t dma_addr = phys_to_dma(dev, phys);
- if (!check_addr(dev, dma_addr, size, __func__))
- return DIRECT_MAPPING_ERROR;
+ if (unlikely(!dma_direct_possible(dev, dma_addr, size)) &&
+ !swiotlb_map(dev, &phys, &dma_addr, size, dir, attrs)) {
+ report_addr(dev, dma_addr, size);
+ return DMA_MAPPING_ERROR;
+ }
- if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
- dma_direct_sync_single_for_device(dev, dma_addr, size, dir);
+ if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+ arch_sync_dma_for_device(dev, phys, size, dir);
return dma_addr;
}
+EXPORT_SYMBOL(dma_direct_map_page);
int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
enum dma_data_direction dir, unsigned long attrs)
@@ -279,18 +341,20 @@ int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
struct scatterlist *sg;
for_each_sg(sgl, sg, nents, i) {
- BUG_ON(!sg_page(sg));
-
- sg_dma_address(sg) = phys_to_dma(dev, sg_phys(sg));
- if (!check_addr(dev, sg_dma_address(sg), sg->length, __func__))
- return 0;
+ sg->dma_address = dma_direct_map_page(dev, sg_page(sg),
+ sg->offset, sg->length, dir, attrs);
+ if (sg->dma_address == DMA_MAPPING_ERROR)
+ goto out_unmap;
sg_dma_len(sg) = sg->length;
}
- if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
- dma_direct_sync_sg_for_device(dev, sgl, nents, dir);
return nents;
+
+out_unmap:
+ dma_direct_unmap_sg(dev, sgl, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC);
+ return 0;
}
+EXPORT_SYMBOL(dma_direct_map_sg);
/*
* Because 32-bit DMA masks are so common we expect every architecture to be
@@ -316,31 +380,3 @@ int dma_direct_supported(struct device *dev, u64 mask)
*/
return mask >= __phys_to_dma(dev, min_mask);
}
-
-int dma_direct_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
- return dma_addr == DIRECT_MAPPING_ERROR;
-}
-
-const struct dma_map_ops dma_direct_ops = {
- .alloc = dma_direct_alloc,
- .free = dma_direct_free,
- .map_page = dma_direct_map_page,
- .map_sg = dma_direct_map_sg,
-#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE)
- .sync_single_for_device = dma_direct_sync_single_for_device,
- .sync_sg_for_device = dma_direct_sync_sg_for_device,
-#endif
-#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \
- defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL)
- .sync_single_for_cpu = dma_direct_sync_single_for_cpu,
- .sync_sg_for_cpu = dma_direct_sync_sg_for_cpu,
- .unmap_page = dma_direct_unmap_page,
- .unmap_sg = dma_direct_unmap_sg,
-#endif
- .get_required_mask = dma_direct_get_required_mask,
- .dma_supported = dma_direct_supported,
- .mapping_error = dma_direct_mapping_error,
- .cache_sync = arch_dma_cache_sync,
-};
-EXPORT_SYMBOL(dma_direct_ops);
diff --git a/kernel/dma/dummy.c b/kernel/dma/dummy.c
new file mode 100644
index 000000000000..05607642c888
--- /dev/null
+++ b/kernel/dma/dummy.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Dummy DMA ops that always fail.
+ */
+#include <linux/dma-mapping.h>
+
+static int dma_dummy_mmap(struct device *dev, struct vm_area_struct *vma,
+ void *cpu_addr, dma_addr_t dma_addr, size_t size,
+ unsigned long attrs)
+{
+ return -ENXIO;
+}
+
+static dma_addr_t dma_dummy_map_page(struct device *dev, struct page *page,
+ unsigned long offset, size_t size, enum dma_data_direction dir,
+ unsigned long attrs)
+{
+ return DMA_MAPPING_ERROR;
+}
+
+static int dma_dummy_map_sg(struct device *dev, struct scatterlist *sgl,
+ int nelems, enum dma_data_direction dir,
+ unsigned long attrs)
+{
+ return 0;
+}
+
+static int dma_dummy_supported(struct device *hwdev, u64 mask)
+{
+ return 0;
+}
+
+const struct dma_map_ops dma_dummy_ops = {
+ .mmap = dma_dummy_mmap,
+ .map_page = dma_dummy_map_page,
+ .map_sg = dma_dummy_map_sg,
+ .dma_supported = dma_dummy_supported,
+};
+EXPORT_SYMBOL(dma_dummy_ops);
diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c
index 58dec7a92b7b..d7c34d2d1ba5 100644
--- a/kernel/dma/mapping.c
+++ b/kernel/dma/mapping.c
@@ -5,8 +5,9 @@
* Copyright (c) 2006 SUSE Linux Products GmbH
* Copyright (c) 2006 Tejun Heo <teheo@suse.de>
*/
-
+#include <linux/memblock.h> /* for max_pfn */
#include <linux/acpi.h>
+#include <linux/dma-direct.h>
#include <linux/dma-noncoherent.h>
#include <linux/export.h>
#include <linux/gfp.h>
@@ -223,7 +224,20 @@ int dma_common_get_sgtable(struct device *dev, struct sg_table *sgt,
sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0);
return ret;
}
-EXPORT_SYMBOL(dma_common_get_sgtable);
+
+int dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt,
+ void *cpu_addr, dma_addr_t dma_addr, size_t size,
+ unsigned long attrs)
+{
+ const struct dma_map_ops *ops = get_dma_ops(dev);
+
+ if (!dma_is_direct(ops) && ops->get_sgtable)
+ return ops->get_sgtable(dev, sgt, cpu_addr, dma_addr, size,
+ attrs);
+ return dma_common_get_sgtable(dev, sgt, cpu_addr, dma_addr, size,
+ attrs);
+}
+EXPORT_SYMBOL(dma_get_sgtable_attrs);
/*
* Create userspace mapping for the DMA-coherent memory.
@@ -261,88 +275,179 @@ int dma_common_mmap(struct device *dev, struct vm_area_struct *vma,
return -ENXIO;
#endif /* !CONFIG_ARCH_NO_COHERENT_DMA_MMAP */
}
-EXPORT_SYMBOL(dma_common_mmap);
-#ifdef CONFIG_MMU
-static struct vm_struct *__dma_common_pages_remap(struct page **pages,
- size_t size, unsigned long vm_flags, pgprot_t prot,
- const void *caller)
+/**
+ * dma_mmap_attrs - map a coherent DMA allocation into user space
+ * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
+ * @vma: vm_area_struct describing requested user mapping
+ * @cpu_addr: kernel CPU-view address returned from dma_alloc_attrs
+ * @dma_addr: device-view address returned from dma_alloc_attrs
+ * @size: size of memory originally requested in dma_alloc_attrs
+ * @attrs: attributes of mapping properties requested in dma_alloc_attrs
+ *
+ * Map a coherent DMA buffer previously allocated by dma_alloc_attrs into user
+ * space. The coherent DMA buffer must not be freed by the driver until the
+ * user space mapping has been released.
+ */
+int dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
+ void *cpu_addr, dma_addr_t dma_addr, size_t size,
+ unsigned long attrs)
{
- struct vm_struct *area;
+ const struct dma_map_ops *ops = get_dma_ops(dev);
- area = get_vm_area_caller(size, vm_flags, caller);
- if (!area)
- return NULL;
+ if (!dma_is_direct(ops) && ops->mmap)
+ return ops->mmap(dev, vma, cpu_addr, dma_addr, size, attrs);
+ return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size, attrs);
+}
+EXPORT_SYMBOL(dma_mmap_attrs);
- if (map_vm_area(area, prot, pages)) {
- vunmap(area->addr);
- return NULL;
+#ifndef ARCH_HAS_DMA_GET_REQUIRED_MASK
+static u64 dma_default_get_required_mask(struct device *dev)
+{
+ u32 low_totalram = ((max_pfn - 1) << PAGE_SHIFT);
+ u32 high_totalram = ((max_pfn - 1) >> (32 - PAGE_SHIFT));
+ u64 mask;
+
+ if (!high_totalram) {
+ /* convert to mask just covering totalram */
+ low_totalram = (1 << (fls(low_totalram) - 1));
+ low_totalram += low_totalram - 1;
+ mask = low_totalram;
+ } else {
+ high_totalram = (1 << (fls(high_totalram) - 1));
+ high_totalram += high_totalram - 1;
+ mask = (((u64)high_totalram) << 32) + 0xffffffff;
}
+ return mask;
+}
- return area;
+u64 dma_get_required_mask(struct device *dev)
+{
+ const struct dma_map_ops *ops = get_dma_ops(dev);
+
+ if (dma_is_direct(ops))
+ return dma_direct_get_required_mask(dev);
+ if (ops->get_required_mask)
+ return ops->get_required_mask(dev);
+ return dma_default_get_required_mask(dev);
}
+EXPORT_SYMBOL_GPL(dma_get_required_mask);
+#endif
-/*
- * remaps an array of PAGE_SIZE pages into another vm_area
- * Cannot be used in non-sleeping contexts
- */
-void *dma_common_pages_remap(struct page **pages, size_t size,
- unsigned long vm_flags, pgprot_t prot,
- const void *caller)
+#ifndef arch_dma_alloc_attrs
+#define arch_dma_alloc_attrs(dev) (true)
+#endif
+
+void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle,
+ gfp_t flag, unsigned long attrs)
{
- struct vm_struct *area;
+ const struct dma_map_ops *ops = get_dma_ops(dev);
+ void *cpu_addr;
+
+ WARN_ON_ONCE(dev && !dev->coherent_dma_mask);
- area = __dma_common_pages_remap(pages, size, vm_flags, prot, caller);
- if (!area)
+ if (dma_alloc_from_dev_coherent(dev, size, dma_handle, &cpu_addr))
+ return cpu_addr;
+
+ /* let the implementation decide on the zone to allocate from: */
+ flag &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM);
+
+ if (!arch_dma_alloc_attrs(&dev))
return NULL;
- area->pages = pages;
+ if (dma_is_direct(ops))
+ cpu_addr = dma_direct_alloc(dev, size, dma_handle, flag, attrs);
+ else if (ops->alloc)
+ cpu_addr = ops->alloc(dev, size, dma_handle, flag, attrs);
+ else
+ return NULL;
- return area->addr;
+ debug_dma_alloc_coherent(dev, size, *dma_handle, cpu_addr);
+ return cpu_addr;
}
+EXPORT_SYMBOL(dma_alloc_attrs);
-/*
- * remaps an allocated contiguous region into another vm_area.
- * Cannot be used in non-sleeping contexts
- */
-
-void *dma_common_contiguous_remap(struct page *page, size_t size,
- unsigned long vm_flags,
- pgprot_t prot, const void *caller)
+void dma_free_attrs(struct device *dev, size_t size, void *cpu_addr,
+ dma_addr_t dma_handle, unsigned long attrs)
{
- int i;
- struct page **pages;
- struct vm_struct *area;
+ const struct dma_map_ops *ops = get_dma_ops(dev);
- pages = kmalloc(sizeof(struct page *) << get_order(size), GFP_KERNEL);
- if (!pages)
- return NULL;
+ if (dma_release_from_dev_coherent(dev, get_order(size), cpu_addr))
+ return;
+ /*
+ * On non-coherent platforms which implement DMA-coherent buffers via
+ * non-cacheable remaps, ops->free() may call vunmap(). Thus getting
+ * this far in IRQ context is a) at risk of a BUG_ON() or trying to
+ * sleep on some machines, and b) an indication that the driver is
+ * probably misusing the coherent API anyway.
+ */
+ WARN_ON(irqs_disabled());
+
+ if (!cpu_addr)
+ return;
- for (i = 0; i < (size >> PAGE_SHIFT); i++)
- pages[i] = nth_page(page, i);
+ debug_dma_free_coherent(dev, size, cpu_addr, dma_handle);
+ if (dma_is_direct(ops))
+ dma_direct_free(dev, size, cpu_addr, dma_handle, attrs);
+ else if (ops->free)
+ ops->free(dev, size, cpu_addr, dma_handle, attrs);
+}
+EXPORT_SYMBOL(dma_free_attrs);
- area = __dma_common_pages_remap(pages, size, vm_flags, prot, caller);
+static inline void dma_check_mask(struct device *dev, u64 mask)
+{
+ if (sme_active() && (mask < (((u64)sme_get_me_mask() << 1) - 1)))
+ dev_warn(dev, "SME is active, device will require DMA bounce buffers\n");
+}
- kfree(pages);
+int dma_supported(struct device *dev, u64 mask)
+{
+ const struct dma_map_ops *ops = get_dma_ops(dev);
- if (!area)
- return NULL;
- return area->addr;
+ if (dma_is_direct(ops))
+ return dma_direct_supported(dev, mask);
+ if (!ops->dma_supported)
+ return 1;
+ return ops->dma_supported(dev, mask);
}
+EXPORT_SYMBOL(dma_supported);
-/*
- * unmaps a range previously mapped by dma_common_*_remap
- */
-void dma_common_free_remap(void *cpu_addr, size_t size, unsigned long vm_flags)
+#ifndef HAVE_ARCH_DMA_SET_MASK
+int dma_set_mask(struct device *dev, u64 mask)
{
- struct vm_struct *area = find_vm_area(cpu_addr);
+ if (!dev->dma_mask || !dma_supported(dev, mask))
+ return -EIO;
- if (!area || (area->flags & vm_flags) != vm_flags) {
- WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr);
- return;
- }
+ dma_check_mask(dev, mask);
+ *dev->dma_mask = mask;
+ return 0;
+}
+EXPORT_SYMBOL(dma_set_mask);
+#endif
+
+#ifndef CONFIG_ARCH_HAS_DMA_SET_COHERENT_MASK
+int dma_set_coherent_mask(struct device *dev, u64 mask)
+{
+ if (!dma_supported(dev, mask))
+ return -EIO;
- unmap_kernel_range((unsigned long)cpu_addr, PAGE_ALIGN(size));
- vunmap(cpu_addr);
+ dma_check_mask(dev, mask);
+ dev->coherent_dma_mask = mask;
+ return 0;
}
+EXPORT_SYMBOL(dma_set_coherent_mask);
#endif
+
+void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
+ enum dma_data_direction dir)
+{
+ const struct dma_map_ops *ops = get_dma_ops(dev);
+
+ BUG_ON(!valid_dma_direction(dir));
+
+ if (dma_is_direct(ops))
+ arch_dma_cache_sync(dev, vaddr, size, dir);
+ else if (ops->cache_sync)
+ ops->cache_sync(dev, vaddr, size, dir);
+}
+EXPORT_SYMBOL(dma_cache_sync);
diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c
new file mode 100644
index 000000000000..18cc09fc27b9
--- /dev/null
+++ b/kernel/dma/remap.c
@@ -0,0 +1,256 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ * Copyright (c) 2014 The Linux Foundation
+ */
+#include <linux/dma-direct.h>
+#include <linux/dma-noncoherent.h>
+#include <linux/dma-contiguous.h>
+#include <linux/init.h>
+#include <linux/genalloc.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+
+static struct vm_struct *__dma_common_pages_remap(struct page **pages,
+ size_t size, unsigned long vm_flags, pgprot_t prot,
+ const void *caller)
+{
+ struct vm_struct *area;
+
+ area = get_vm_area_caller(size, vm_flags, caller);
+ if (!area)
+ return NULL;
+
+ if (map_vm_area(area, prot, pages)) {
+ vunmap(area->addr);
+ return NULL;
+ }
+
+ return area;
+}
+
+/*
+ * Remaps an array of PAGE_SIZE pages into another vm_area.
+ * Cannot be used in non-sleeping contexts
+ */
+void *dma_common_pages_remap(struct page **pages, size_t size,
+ unsigned long vm_flags, pgprot_t prot,
+ const void *caller)
+{
+ struct vm_struct *area;
+
+ area = __dma_common_pages_remap(pages, size, vm_flags, prot, caller);
+ if (!area)
+ return NULL;
+
+ area->pages = pages;
+
+ return area->addr;
+}
+
+/*
+ * Remaps an allocated contiguous region into another vm_area.
+ * Cannot be used in non-sleeping contexts
+ */
+void *dma_common_contiguous_remap(struct page *page, size_t size,
+ unsigned long vm_flags,
+ pgprot_t prot, const void *caller)
+{
+ int i;
+ struct page **pages;
+ struct vm_struct *area;
+
+ pages = kmalloc(sizeof(struct page *) << get_order(size), GFP_KERNEL);
+ if (!pages)
+ return NULL;
+
+ for (i = 0; i < (size >> PAGE_SHIFT); i++)
+ pages[i] = nth_page(page, i);
+
+ area = __dma_common_pages_remap(pages, size, vm_flags, prot, caller);
+
+ kfree(pages);
+
+ if (!area)
+ return NULL;
+ return area->addr;
+}
+
+/*
+ * Unmaps a range previously mapped by dma_common_*_remap
+ */
+void dma_common_free_remap(void *cpu_addr, size_t size, unsigned long vm_flags)
+{
+ struct vm_struct *area = find_vm_area(cpu_addr);
+
+ if (!area || (area->flags & vm_flags) != vm_flags) {
+ WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr);
+ return;
+ }
+
+ unmap_kernel_range((unsigned long)cpu_addr, PAGE_ALIGN(size));
+ vunmap(cpu_addr);
+}
+
+#ifdef CONFIG_DMA_DIRECT_REMAP
+static struct gen_pool *atomic_pool __ro_after_init;
+
+#define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K
+static size_t atomic_pool_size __initdata = DEFAULT_DMA_COHERENT_POOL_SIZE;
+
+static int __init early_coherent_pool(char *p)
+{
+ atomic_pool_size = memparse(p, &p);
+ return 0;
+}
+early_param("coherent_pool", early_coherent_pool);
+
+int __init dma_atomic_pool_init(gfp_t gfp, pgprot_t prot)
+{
+ unsigned int pool_size_order = get_order(atomic_pool_size);
+ unsigned long nr_pages = atomic_pool_size >> PAGE_SHIFT;
+ struct page *page;
+ void *addr;
+ int ret;
+
+ if (dev_get_cma_area(NULL))
+ page = dma_alloc_from_contiguous(NULL, nr_pages,
+ pool_size_order, false);
+ else
+ page = alloc_pages(gfp, pool_size_order);
+ if (!page)
+ goto out;
+
+ arch_dma_prep_coherent(page, atomic_pool_size);
+
+ atomic_pool = gen_pool_create(PAGE_SHIFT, -1);
+ if (!atomic_pool)
+ goto free_page;
+
+ addr = dma_common_contiguous_remap(page, atomic_pool_size, VM_USERMAP,
+ prot, __builtin_return_address(0));
+ if (!addr)
+ goto destroy_genpool;
+
+ ret = gen_pool_add_virt(atomic_pool, (unsigned long)addr,
+ page_to_phys(page), atomic_pool_size, -1);
+ if (ret)
+ goto remove_mapping;
+ gen_pool_set_algo(atomic_pool, gen_pool_first_fit_order_align, NULL);
+
+ pr_info("DMA: preallocated %zu KiB pool for atomic allocations\n",
+ atomic_pool_size / 1024);
+ return 0;
+
+remove_mapping:
+ dma_common_free_remap(addr, atomic_pool_size, VM_USERMAP);
+destroy_genpool:
+ gen_pool_destroy(atomic_pool);
+ atomic_pool = NULL;
+free_page:
+ if (!dma_release_from_contiguous(NULL, page, nr_pages))
+ __free_pages(page, pool_size_order);
+out:
+ pr_err("DMA: failed to allocate %zu KiB pool for atomic coherent allocation\n",
+ atomic_pool_size / 1024);
+ return -ENOMEM;
+}
+
+bool dma_in_atomic_pool(void *start, size_t size)
+{
+ return addr_in_gen_pool(atomic_pool, (unsigned long)start, size);
+}
+
+void *dma_alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags)
+{
+ unsigned long val;
+ void *ptr = NULL;
+
+ if (!atomic_pool) {
+ WARN(1, "coherent pool not initialised!\n");
+ return NULL;
+ }
+
+ val = gen_pool_alloc(atomic_pool, size);
+ if (val) {
+ phys_addr_t phys = gen_pool_virt_to_phys(atomic_pool, val);
+
+ *ret_page = pfn_to_page(__phys_to_pfn(phys));
+ ptr = (void *)val;
+ memset(ptr, 0, size);
+ }
+
+ return ptr;
+}
+
+bool dma_free_from_pool(void *start, size_t size)
+{
+ if (!dma_in_atomic_pool(start, size))
+ return false;
+ gen_pool_free(atomic_pool, (unsigned long)start, size);
+ return true;
+}
+
+void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
+ gfp_t flags, unsigned long attrs)
+{
+ struct page *page = NULL;
+ void *ret;
+
+ size = PAGE_ALIGN(size);
+
+ if (!gfpflags_allow_blocking(flags) &&
+ !(attrs & DMA_ATTR_NO_KERNEL_MAPPING)) {
+ ret = dma_alloc_from_pool(size, &page, flags);
+ if (!ret)
+ return NULL;
+ *dma_handle = phys_to_dma(dev, page_to_phys(page));
+ return ret;
+ }
+
+ page = __dma_direct_alloc_pages(dev, size, dma_handle, flags, attrs);
+ if (!page)
+ return NULL;
+
+ /* remove any dirty cache lines on the kernel alias */
+ arch_dma_prep_coherent(page, size);
+
+ if (attrs & DMA_ATTR_NO_KERNEL_MAPPING)
+ return page; /* opaque cookie */
+
+ /* create a coherent mapping */
+ ret = dma_common_contiguous_remap(page, size, VM_USERMAP,
+ arch_dma_mmap_pgprot(dev, PAGE_KERNEL, attrs),
+ __builtin_return_address(0));
+ if (!ret) {
+ __dma_direct_free_pages(dev, size, page);
+ return ret;
+ }
+
+ *dma_handle = phys_to_dma(dev, page_to_phys(page));
+ memset(ret, 0, size);
+
+ return ret;
+}
+
+void arch_dma_free(struct device *dev, size_t size, void *vaddr,
+ dma_addr_t dma_handle, unsigned long attrs)
+{
+ if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) {
+ /* vaddr is a struct page cookie, not a kernel address */
+ __dma_direct_free_pages(dev, size, vaddr);
+ } else if (!dma_free_from_pool(vaddr, PAGE_ALIGN(size))) {
+ phys_addr_t phys = dma_to_phys(dev, dma_handle);
+ struct page *page = pfn_to_page(__phys_to_pfn(phys));
+
+ vunmap(vaddr);
+ __dma_direct_free_pages(dev, size, page);
+ }
+}
+
+long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr,
+ dma_addr_t dma_addr)
+{
+ return __phys_to_pfn(dma_to_phys(dev, dma_addr));
+}
+#endif /* CONFIG_DMA_DIRECT_REMAP */
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 045930e32c0e..d6361776dc5c 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -21,7 +21,6 @@
#include <linux/cache.h>
#include <linux/dma-direct.h>
-#include <linux/dma-noncoherent.h>
#include <linux/mm.h>
#include <linux/export.h>
#include <linux/spinlock.h>
@@ -65,7 +64,7 @@ enum swiotlb_force swiotlb_force;
* swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this
* API.
*/
-static phys_addr_t io_tlb_start, io_tlb_end;
+phys_addr_t io_tlb_start, io_tlb_end;
/*
* The number of IO TLB blocks (in groups of 64) between io_tlb_start and
@@ -383,11 +382,6 @@ void __init swiotlb_exit(void)
max_segment = 0;
}
-static int is_swiotlb_buffer(phys_addr_t paddr)
-{
- return paddr >= io_tlb_start && paddr < io_tlb_end;
-}
-
/*
* Bounce: copy the swiotlb buffer back to the original dma location
*/
@@ -526,7 +520,7 @@ not_found:
spin_unlock_irqrestore(&io_tlb_lock, flags);
if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit())
dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes)\n", size);
- return SWIOTLB_MAP_ERROR;
+ return DMA_MAPPING_ERROR;
found:
spin_unlock_irqrestore(&io_tlb_lock, flags);
@@ -623,237 +617,36 @@ void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr,
}
}
-static dma_addr_t swiotlb_bounce_page(struct device *dev, phys_addr_t *phys,
+/*
+ * Create a swiotlb mapping for the buffer at @phys, and in case of DMAing
+ * to the device copy the data into it as well.
+ */
+bool swiotlb_map(struct device *dev, phys_addr_t *phys, dma_addr_t *dma_addr,
size_t size, enum dma_data_direction dir, unsigned long attrs)
{
- dma_addr_t dma_addr;
+ trace_swiotlb_bounced(dev, *dma_addr, size, swiotlb_force);
if (unlikely(swiotlb_force == SWIOTLB_NO_FORCE)) {
dev_warn_ratelimited(dev,
"Cannot do DMA to address %pa\n", phys);
- return DIRECT_MAPPING_ERROR;
+ return false;
}
/* Oh well, have to allocate and map a bounce buffer. */
*phys = swiotlb_tbl_map_single(dev, __phys_to_dma(dev, io_tlb_start),
*phys, size, dir, attrs);
- if (*phys == SWIOTLB_MAP_ERROR)
- return DIRECT_MAPPING_ERROR;
+ if (*phys == DMA_MAPPING_ERROR)
+ return false;
/* Ensure that the address returned is DMA'ble */
- dma_addr = __phys_to_dma(dev, *phys);
- if (unlikely(!dma_capable(dev, dma_addr, size))) {
+ *dma_addr = __phys_to_dma(dev, *phys);
+ if (unlikely(!dma_capable(dev, *dma_addr, size))) {
swiotlb_tbl_unmap_single(dev, *phys, size, dir,
attrs | DMA_ATTR_SKIP_CPU_SYNC);
- return DIRECT_MAPPING_ERROR;
- }
-
- return dma_addr;
-}
-
-/*
- * Map a single buffer of the indicated size for DMA in streaming mode. The
- * physical address to use is returned.
- *
- * Once the device is given the dma address, the device owns this memory until
- * either swiotlb_unmap_page or swiotlb_dma_sync_single is performed.
- */
-dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
- unsigned long offset, size_t size,
- enum dma_data_direction dir,
- unsigned long attrs)
-{
- phys_addr_t phys = page_to_phys(page) + offset;
- dma_addr_t dev_addr = phys_to_dma(dev, phys);
-
- BUG_ON(dir == DMA_NONE);
- /*
- * If the address happens to be in the device's DMA window,
- * we can safely return the device addr and not worry about bounce
- * buffering it.
- */
- if (!dma_capable(dev, dev_addr, size) ||
- swiotlb_force == SWIOTLB_FORCE) {
- trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force);
- dev_addr = swiotlb_bounce_page(dev, &phys, size, dir, attrs);
- }
-
- if (!dev_is_dma_coherent(dev) &&
- (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0 &&
- dev_addr != DIRECT_MAPPING_ERROR)
- arch_sync_dma_for_device(dev, phys, size, dir);
-
- return dev_addr;
-}
-
-/*
- * Unmap a single streaming mode DMA translation. The dma_addr and size must
- * match what was provided for in a previous swiotlb_map_page call. All
- * other usages are undefined.
- *
- * After this call, reads by the cpu to the buffer are guaranteed to see
- * whatever the device wrote there.
- */
-void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
- size_t size, enum dma_data_direction dir,
- unsigned long attrs)
-{
- phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
-
- BUG_ON(dir == DMA_NONE);
-
- if (!dev_is_dma_coherent(hwdev) &&
- (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
- arch_sync_dma_for_cpu(hwdev, paddr, size, dir);
-
- if (is_swiotlb_buffer(paddr)) {
- swiotlb_tbl_unmap_single(hwdev, paddr, size, dir, attrs);
- return;
+ return false;
}
- if (dir != DMA_FROM_DEVICE)
- return;
-
- /*
- * phys_to_virt doesn't work with hihgmem page but we could
- * call dma_mark_clean() with hihgmem page here. However, we
- * are fine since dma_mark_clean() is null on POWERPC. We can
- * make dma_mark_clean() take a physical address if necessary.
- */
- dma_mark_clean(phys_to_virt(paddr), size);
-}
-
-/*
- * Make physical memory consistent for a single streaming mode DMA translation
- * after a transfer.
- *
- * If you perform a swiotlb_map_page() but wish to interrogate the buffer
- * using the cpu, yet do not wish to teardown the dma mapping, you must
- * call this function before doing so. At the next point you give the dma
- * address back to the card, you must first perform a
- * swiotlb_dma_sync_for_device, and then the device again owns the buffer
- */
-static void
-swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
- size_t size, enum dma_data_direction dir,
- enum dma_sync_target target)
-{
- phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
-
- BUG_ON(dir == DMA_NONE);
-
- if (!dev_is_dma_coherent(hwdev) && target == SYNC_FOR_CPU)
- arch_sync_dma_for_cpu(hwdev, paddr, size, dir);
-
- if (is_swiotlb_buffer(paddr))
- swiotlb_tbl_sync_single(hwdev, paddr, size, dir, target);
-
- if (!dev_is_dma_coherent(hwdev) && target == SYNC_FOR_DEVICE)
- arch_sync_dma_for_device(hwdev, paddr, size, dir);
-
- if (!is_swiotlb_buffer(paddr) && dir == DMA_FROM_DEVICE)
- dma_mark_clean(phys_to_virt(paddr), size);
-}
-
-void
-swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
- size_t size, enum dma_data_direction dir)
-{
- swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_CPU);
-}
-
-void
-swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
- size_t size, enum dma_data_direction dir)
-{
- swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_DEVICE);
-}
-
-/*
- * Map a set of buffers described by scatterlist in streaming mode for DMA.
- * This is the scatter-gather version of the above swiotlb_map_page
- * interface. Here the scatter gather list elements are each tagged with the
- * appropriate dma address and length. They are obtained via
- * sg_dma_{address,length}(SG).
- *
- * Device ownership issues as mentioned above for swiotlb_map_page are the
- * same here.
- */
-int
-swiotlb_map_sg_attrs(struct device *dev, struct scatterlist *sgl, int nelems,
- enum dma_data_direction dir, unsigned long attrs)
-{
- struct scatterlist *sg;
- int i;
-
- for_each_sg(sgl, sg, nelems, i) {
- sg->dma_address = swiotlb_map_page(dev, sg_page(sg), sg->offset,
- sg->length, dir, attrs);
- if (sg->dma_address == DIRECT_MAPPING_ERROR)
- goto out_error;
- sg_dma_len(sg) = sg->length;
- }
-
- return nelems;
-
-out_error:
- swiotlb_unmap_sg_attrs(dev, sgl, i, dir,
- attrs | DMA_ATTR_SKIP_CPU_SYNC);
- sg_dma_len(sgl) = 0;
- return 0;
-}
-
-/*
- * Unmap a set of streaming mode DMA translations. Again, cpu read rules
- * concerning calls here are the same as for swiotlb_unmap_page() above.
- */
-void
-swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
- int nelems, enum dma_data_direction dir,
- unsigned long attrs)
-{
- struct scatterlist *sg;
- int i;
-
- BUG_ON(dir == DMA_NONE);
-
- for_each_sg(sgl, sg, nelems, i)
- swiotlb_unmap_page(hwdev, sg->dma_address, sg_dma_len(sg), dir,
- attrs);
-}
-
-/*
- * Make physical memory consistent for a set of streaming mode DMA translations
- * after a transfer.
- *
- * The same as swiotlb_sync_single_* but for a scatter-gather list, same rules
- * and usage.
- */
-static void
-swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl,
- int nelems, enum dma_data_direction dir,
- enum dma_sync_target target)
-{
- struct scatterlist *sg;
- int i;
-
- for_each_sg(sgl, sg, nelems, i)
- swiotlb_sync_single(hwdev, sg->dma_address,
- sg_dma_len(sg), dir, target);
-}
-
-void
-swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg,
- int nelems, enum dma_data_direction dir)
-{
- swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_CPU);
-}
-
-void
-swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
- int nelems, enum dma_data_direction dir)
-{
- swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_DEVICE);
+ return true;
}
/*
@@ -867,19 +660,3 @@ swiotlb_dma_supported(struct device *hwdev, u64 mask)
{
return __phys_to_dma(hwdev, io_tlb_end - 1) <= mask;
}
-
-const struct dma_map_ops swiotlb_dma_ops = {
- .mapping_error = dma_direct_mapping_error,
- .alloc = dma_direct_alloc,
- .free = dma_direct_free,
- .sync_single_for_cpu = swiotlb_sync_single_for_cpu,
- .sync_single_for_device = swiotlb_sync_single_for_device,
- .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
- .sync_sg_for_device = swiotlb_sync_sg_for_device,
- .map_sg = swiotlb_map_sg_attrs,
- .unmap_sg = swiotlb_unmap_sg_attrs,
- .map_page = swiotlb_map_page,
- .unmap_page = swiotlb_unmap_page,
- .dma_supported = dma_direct_supported,
-};
-EXPORT_SYMBOL(swiotlb_dma_ops);
diff --git a/kernel/dma/virt.c b/kernel/dma/virt.c
index 631ddec4b60a..ebe128833af7 100644
--- a/kernel/dma/virt.c
+++ b/kernel/dma/virt.c
@@ -13,7 +13,7 @@ static void *dma_virt_alloc(struct device *dev, size_t size,
{
void *ret;
- ret = (void *)__get_free_pages(gfp, get_order(size));
+ ret = (void *)__get_free_pages(gfp | __GFP_ZERO, get_order(size));
if (ret)
*dma_handle = (uintptr_t)ret;
return ret;
diff --git a/kernel/futex.c b/kernel/futex.c
index 5cc8083a4c89..054105854e0e 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -44,6 +44,7 @@
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
+#include <linux/compat.h>
#include <linux/slab.h>
#include <linux/poll.h>
#include <linux/fs.h>
@@ -173,8 +174,10 @@
* double_lock_hb() and double_unlock_hb(), respectively.
*/
-#ifndef CONFIG_HAVE_FUTEX_CMPXCHG
-int __read_mostly futex_cmpxchg_enabled;
+#ifdef CONFIG_HAVE_FUTEX_CMPXCHG
+#define futex_cmpxchg_enabled 1
+#else
+static int __read_mostly futex_cmpxchg_enabled;
#endif
/*
@@ -3417,7 +3420,7 @@ err_unlock:
* Process a futex-list entry, check whether it's owned by the
* dying task, and do notification if so:
*/
-int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi)
+static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi)
{
u32 uval, uninitialized_var(nval), mval;
@@ -3612,10 +3615,10 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
- struct timespec __user *, utime, u32 __user *, uaddr2,
+ struct __kernel_timespec __user *, utime, u32 __user *, uaddr2,
u32, val3)
{
- struct timespec ts;
+ struct timespec64 ts;
ktime_t t, *tp = NULL;
u32 val2 = 0;
int cmd = op & FUTEX_CMD_MASK;
@@ -3625,12 +3628,12 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
cmd == FUTEX_WAIT_REQUEUE_PI)) {
if (unlikely(should_fail_futex(!(op & FUTEX_PRIVATE_FLAG))))
return -EFAULT;
- if (copy_from_user(&ts, utime, sizeof(ts)) != 0)
+ if (get_timespec64(&ts, utime))
return -EFAULT;
- if (!timespec_valid(&ts))
+ if (!timespec64_valid(&ts))
return -EINVAL;
- t = timespec_to_ktime(ts);
+ t = timespec64_to_ktime(ts);
if (cmd == FUTEX_WAIT)
t = ktime_add_safe(ktime_get(), t);
tp = &t;
@@ -3646,6 +3649,194 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
}
+#ifdef CONFIG_COMPAT
+/*
+ * Fetch a robust-list pointer. Bit 0 signals PI futexes:
+ */
+static inline int
+compat_fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry,
+ compat_uptr_t __user *head, unsigned int *pi)
+{
+ if (get_user(*uentry, head))
+ return -EFAULT;
+
+ *entry = compat_ptr((*uentry) & ~1);
+ *pi = (unsigned int)(*uentry) & 1;
+
+ return 0;
+}
+
+static void __user *futex_uaddr(struct robust_list __user *entry,
+ compat_long_t futex_offset)
+{
+ compat_uptr_t base = ptr_to_compat(entry);
+ void __user *uaddr = compat_ptr(base + futex_offset);
+
+ return uaddr;
+}
+
+/*
+ * Walk curr->robust_list (very carefully, it's a userspace list!)
+ * and mark any locks found there dead, and notify any waiters.
+ *
+ * We silently return on any sign of list-walking problem.
+ */
+void compat_exit_robust_list(struct task_struct *curr)
+{
+ struct compat_robust_list_head __user *head = curr->compat_robust_list;
+ struct robust_list __user *entry, *next_entry, *pending;
+ unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
+ unsigned int uninitialized_var(next_pi);
+ compat_uptr_t uentry, next_uentry, upending;
+ compat_long_t futex_offset;
+ int rc;
+
+ if (!futex_cmpxchg_enabled)
+ return;
+
+ /*
+ * Fetch the list head (which was registered earlier, via
+ * sys_set_robust_list()):
+ */
+ if (compat_fetch_robust_entry(&uentry, &entry, &head->list.next, &pi))
+ return;
+ /*
+ * Fetch the relative futex offset:
+ */
+ if (get_user(futex_offset, &head->futex_offset))
+ return;
+ /*
+ * Fetch any possibly pending lock-add first, and handle it
+ * if it exists:
+ */
+ if (compat_fetch_robust_entry(&upending, &pending,
+ &head->list_op_pending, &pip))
+ return;
+
+ next_entry = NULL; /* avoid warning with gcc */
+ while (entry != (struct robust_list __user *) &head->list) {
+ /*
+ * Fetch the next entry in the list before calling
+ * handle_futex_death:
+ */
+ rc = compat_fetch_robust_entry(&next_uentry, &next_entry,
+ (compat_uptr_t __user *)&entry->next, &next_pi);
+ /*
+ * A pending lock might already be on the list, so
+ * dont process it twice:
+ */
+ if (entry != pending) {
+ void __user *uaddr = futex_uaddr(entry, futex_offset);
+
+ if (handle_futex_death(uaddr, curr, pi))
+ return;
+ }
+ if (rc)
+ return;
+ uentry = next_uentry;
+ entry = next_entry;
+ pi = next_pi;
+ /*
+ * Avoid excessively long or circular lists:
+ */
+ if (!--limit)
+ break;
+
+ cond_resched();
+ }
+ if (pending) {
+ void __user *uaddr = futex_uaddr(pending, futex_offset);
+
+ handle_futex_death(uaddr, curr, pip);
+ }
+}
+
+COMPAT_SYSCALL_DEFINE2(set_robust_list,
+ struct compat_robust_list_head __user *, head,
+ compat_size_t, len)
+{
+ if (!futex_cmpxchg_enabled)
+ return -ENOSYS;
+
+ if (unlikely(len != sizeof(*head)))
+ return -EINVAL;
+
+ current->compat_robust_list = head;
+
+ return 0;
+}
+
+COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid,
+ compat_uptr_t __user *, head_ptr,
+ compat_size_t __user *, len_ptr)
+{
+ struct compat_robust_list_head __user *head;
+ unsigned long ret;
+ struct task_struct *p;
+
+ if (!futex_cmpxchg_enabled)
+ return -ENOSYS;
+
+ rcu_read_lock();
+
+ ret = -ESRCH;
+ if (!pid)
+ p = current;
+ else {
+ p = find_task_by_vpid(pid);
+ if (!p)
+ goto err_unlock;
+ }
+
+ ret = -EPERM;
+ if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS))
+ goto err_unlock;
+
+ head = p->compat_robust_list;
+ rcu_read_unlock();
+
+ if (put_user(sizeof(*head), len_ptr))
+ return -EFAULT;
+ return put_user(ptr_to_compat(head), head_ptr);
+
+err_unlock:
+ rcu_read_unlock();
+
+ return ret;
+}
+#endif /* CONFIG_COMPAT */
+
+#ifdef CONFIG_COMPAT_32BIT_TIME
+COMPAT_SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
+ struct old_timespec32 __user *, utime, u32 __user *, uaddr2,
+ u32, val3)
+{
+ struct timespec64 ts;
+ ktime_t t, *tp = NULL;
+ int val2 = 0;
+ int cmd = op & FUTEX_CMD_MASK;
+
+ if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
+ cmd == FUTEX_WAIT_BITSET ||
+ cmd == FUTEX_WAIT_REQUEUE_PI)) {
+ if (get_old_timespec32(&ts, utime))
+ return -EFAULT;
+ if (!timespec64_valid(&ts))
+ return -EINVAL;
+
+ t = timespec64_to_ktime(ts);
+ if (cmd == FUTEX_WAIT)
+ t = ktime_add_safe(ktime_get(), t);
+ tp = &t;
+ }
+ if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
+ cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
+ val2 = (int) (unsigned long) utime;
+
+ return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
+}
+#endif /* CONFIG_COMPAT_32BIT_TIME */
+
static void __init futex_detect_cmpxchg(void)
{
#ifndef CONFIG_HAVE_FUTEX_CMPXCHG
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c
deleted file mode 100644
index 410a77a8f6e2..000000000000
--- a/kernel/futex_compat.c
+++ /dev/null
@@ -1,202 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * linux/kernel/futex_compat.c
- *
- * Futex compatibililty routines.
- *
- * Copyright 2006, Red Hat, Inc., Ingo Molnar
- */
-
-#include <linux/linkage.h>
-#include <linux/compat.h>
-#include <linux/nsproxy.h>
-#include <linux/futex.h>
-#include <linux/ptrace.h>
-#include <linux/syscalls.h>
-
-#include <linux/uaccess.h>
-
-
-/*
- * Fetch a robust-list pointer. Bit 0 signals PI futexes:
- */
-static inline int
-fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry,
- compat_uptr_t __user *head, unsigned int *pi)
-{
- if (get_user(*uentry, head))
- return -EFAULT;
-
- *entry = compat_ptr((*uentry) & ~1);
- *pi = (unsigned int)(*uentry) & 1;
-
- return 0;
-}
-
-static void __user *futex_uaddr(struct robust_list __user *entry,
- compat_long_t futex_offset)
-{
- compat_uptr_t base = ptr_to_compat(entry);
- void __user *uaddr = compat_ptr(base + futex_offset);
-
- return uaddr;
-}
-
-/*
- * Walk curr->robust_list (very carefully, it's a userspace list!)
- * and mark any locks found there dead, and notify any waiters.
- *
- * We silently return on any sign of list-walking problem.
- */
-void compat_exit_robust_list(struct task_struct *curr)
-{
- struct compat_robust_list_head __user *head = curr->compat_robust_list;
- struct robust_list __user *entry, *next_entry, *pending;
- unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
- unsigned int uninitialized_var(next_pi);
- compat_uptr_t uentry, next_uentry, upending;
- compat_long_t futex_offset;
- int rc;
-
- if (!futex_cmpxchg_enabled)
- return;
-
- /*
- * Fetch the list head (which was registered earlier, via
- * sys_set_robust_list()):
- */
- if (fetch_robust_entry(&uentry, &entry, &head->list.next, &pi))
- return;
- /*
- * Fetch the relative futex offset:
- */
- if (get_user(futex_offset, &head->futex_offset))
- return;
- /*
- * Fetch any possibly pending lock-add first, and handle it
- * if it exists:
- */
- if (fetch_robust_entry(&upending, &pending,
- &head->list_op_pending, &pip))
- return;
-
- next_entry = NULL; /* avoid warning with gcc */
- while (entry != (struct robust_list __user *) &head->list) {
- /*
- * Fetch the next entry in the list before calling
- * handle_futex_death:
- */
- rc = fetch_robust_entry(&next_uentry, &next_entry,
- (compat_uptr_t __user *)&entry->next, &next_pi);
- /*
- * A pending lock might already be on the list, so
- * dont process it twice:
- */
- if (entry != pending) {
- void __user *uaddr = futex_uaddr(entry, futex_offset);
-
- if (handle_futex_death(uaddr, curr, pi))
- return;
- }
- if (rc)
- return;
- uentry = next_uentry;
- entry = next_entry;
- pi = next_pi;
- /*
- * Avoid excessively long or circular lists:
- */
- if (!--limit)
- break;
-
- cond_resched();
- }
- if (pending) {
- void __user *uaddr = futex_uaddr(pending, futex_offset);
-
- handle_futex_death(uaddr, curr, pip);
- }
-}
-
-COMPAT_SYSCALL_DEFINE2(set_robust_list,
- struct compat_robust_list_head __user *, head,
- compat_size_t, len)
-{
- if (!futex_cmpxchg_enabled)
- return -ENOSYS;
-
- if (unlikely(len != sizeof(*head)))
- return -EINVAL;
-
- current->compat_robust_list = head;
-
- return 0;
-}
-
-COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid,
- compat_uptr_t __user *, head_ptr,
- compat_size_t __user *, len_ptr)
-{
- struct compat_robust_list_head __user *head;
- unsigned long ret;
- struct task_struct *p;
-
- if (!futex_cmpxchg_enabled)
- return -ENOSYS;
-
- rcu_read_lock();
-
- ret = -ESRCH;
- if (!pid)
- p = current;
- else {
- p = find_task_by_vpid(pid);
- if (!p)
- goto err_unlock;
- }
-
- ret = -EPERM;
- if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS))
- goto err_unlock;
-
- head = p->compat_robust_list;
- rcu_read_unlock();
-
- if (put_user(sizeof(*head), len_ptr))
- return -EFAULT;
- return put_user(ptr_to_compat(head), head_ptr);
-
-err_unlock:
- rcu_read_unlock();
-
- return ret;
-}
-
-COMPAT_SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
- struct old_timespec32 __user *, utime, u32 __user *, uaddr2,
- u32, val3)
-{
- struct timespec ts;
- ktime_t t, *tp = NULL;
- int val2 = 0;
- int cmd = op & FUTEX_CMD_MASK;
-
- if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
- cmd == FUTEX_WAIT_BITSET ||
- cmd == FUTEX_WAIT_REQUEUE_PI)) {
- if (compat_get_timespec(&ts, utime))
- return -EFAULT;
- if (!timespec_valid(&ts))
- return -EINVAL;
-
- t = timespec_to_ktime(ts);
- if (cmd == FUTEX_WAIT)
- t = ktime_add_safe(ktime_get(), t);
- tp = &t;
- }
- if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
- cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
- val2 = (int) (unsigned long) utime;
-
- return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
-}
diff --git a/kernel/pid.c b/kernel/pid.c
index b2f6c506035d..20881598bdfa 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -233,8 +233,10 @@ out_unlock:
out_free:
spin_lock_irq(&pidmap_lock);
- while (++i <= ns->level)
- idr_remove(&ns->idr, (pid->numbers + i)->nr);
+ while (++i <= ns->level) {
+ upid = pid->numbers + i;
+ idr_remove(&upid->ns->idr, upid->nr);
+ }
/* On failure to allocate the first pid, reset the state */
if (ns->pid_allocated == PIDNS_ADDING)
diff --git a/kernel/signal.c b/kernel/signal.c
index 9a32bc2088c9..53e07d97ffe0 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2735,6 +2735,84 @@ int sigprocmask(int how, sigset_t *set, sigset_t *oldset)
}
EXPORT_SYMBOL(sigprocmask);
+/*
+ * The api helps set app-provided sigmasks.
+ *
+ * This is useful for syscalls such as ppoll, pselect, io_pgetevents and
+ * epoll_pwait where a new sigmask is passed from userland for the syscalls.
+ */
+int set_user_sigmask(const sigset_t __user *usigmask, sigset_t *set,
+ sigset_t *oldset, size_t sigsetsize)
+{
+ if (!usigmask)
+ return 0;
+
+ if (sigsetsize != sizeof(sigset_t))
+ return -EINVAL;
+ if (copy_from_user(set, usigmask, sizeof(sigset_t)))
+ return -EFAULT;
+
+ *oldset = current->blocked;
+ set_current_blocked(set);
+
+ return 0;
+}
+EXPORT_SYMBOL(set_user_sigmask);
+
+#ifdef CONFIG_COMPAT
+int set_compat_user_sigmask(const compat_sigset_t __user *usigmask,
+ sigset_t *set, sigset_t *oldset,
+ size_t sigsetsize)
+{
+ if (!usigmask)
+ return 0;
+
+ if (sigsetsize != sizeof(compat_sigset_t))
+ return -EINVAL;
+ if (get_compat_sigset(set, usigmask))
+ return -EFAULT;
+
+ *oldset = current->blocked;
+ set_current_blocked(set);
+
+ return 0;
+}
+EXPORT_SYMBOL(set_compat_user_sigmask);
+#endif
+
+/*
+ * restore_user_sigmask:
+ * usigmask: sigmask passed in from userland.
+ * sigsaved: saved sigmask when the syscall started and changed the sigmask to
+ * usigmask.
+ *
+ * This is useful for syscalls such as ppoll, pselect, io_pgetevents and
+ * epoll_pwait where a new sigmask is passed in from userland for the syscalls.
+ */
+void restore_user_sigmask(const void __user *usigmask, sigset_t *sigsaved)
+{
+
+ if (!usigmask)
+ return;
+ /*
+ * When signals are pending, do not restore them here.
+ * Restoring sigmask here can lead to delivering signals that the above
+ * syscalls are intended to block because of the sigmask passed in.
+ */
+ if (signal_pending(current)) {
+ current->saved_sigmask = *sigsaved;
+ set_restore_sigmask();
+ return;
+ }
+
+ /*
+ * This is needed because the fast syscall return path does not restore
+ * saved_sigmask when signals are not pending.
+ */
+ set_current_blocked(sigsaved);
+}
+EXPORT_SYMBOL(restore_user_sigmask);
+
/**
* sys_rt_sigprocmask - change the list of currently blocked signals
* @how: whether to add, remove, or set signals
@@ -3254,7 +3332,71 @@ SYSCALL_DEFINE4(rt_sigtimedwait, const sigset_t __user *, uthese,
return ret;
}
+#ifdef CONFIG_COMPAT_32BIT_TIME
+SYSCALL_DEFINE4(rt_sigtimedwait_time32, const sigset_t __user *, uthese,
+ siginfo_t __user *, uinfo,
+ const struct old_timespec32 __user *, uts,
+ size_t, sigsetsize)
+{
+ sigset_t these;
+ struct timespec64 ts;
+ kernel_siginfo_t info;
+ int ret;
+
+ if (sigsetsize != sizeof(sigset_t))
+ return -EINVAL;
+
+ if (copy_from_user(&these, uthese, sizeof(these)))
+ return -EFAULT;
+
+ if (uts) {
+ if (get_old_timespec32(&ts, uts))
+ return -EFAULT;
+ }
+
+ ret = do_sigtimedwait(&these, &info, uts ? &ts : NULL);
+
+ if (ret > 0 && uinfo) {
+ if (copy_siginfo_to_user(uinfo, &info))
+ ret = -EFAULT;
+ }
+
+ return ret;
+}
+#endif
+
#ifdef CONFIG_COMPAT
+COMPAT_SYSCALL_DEFINE4(rt_sigtimedwait_time64, compat_sigset_t __user *, uthese,
+ struct compat_siginfo __user *, uinfo,
+ struct __kernel_timespec __user *, uts, compat_size_t, sigsetsize)
+{
+ sigset_t s;
+ struct timespec64 t;
+ kernel_siginfo_t info;
+ long ret;
+
+ if (sigsetsize != sizeof(sigset_t))
+ return -EINVAL;
+
+ if (get_compat_sigset(&s, uthese))
+ return -EFAULT;
+
+ if (uts) {
+ if (get_timespec64(&t, uts))
+ return -EFAULT;
+ }
+
+ ret = do_sigtimedwait(&s, &info, uts ? &t : NULL);
+
+ if (ret > 0 && uinfo) {
+ if (copy_siginfo_to_user32(uinfo, &info))
+ ret = -EFAULT;
+ }
+
+ return ret;
+}
+
+#ifdef CONFIG_COMPAT_32BIT_TIME
COMPAT_SYSCALL_DEFINE4(rt_sigtimedwait, compat_sigset_t __user *, uthese,
struct compat_siginfo __user *, uinfo,
struct old_timespec32 __user *, uts, compat_size_t, sigsetsize)
@@ -3285,6 +3427,7 @@ COMPAT_SYSCALL_DEFINE4(rt_sigtimedwait, compat_sigset_t __user *, uthese,
return ret;
}
#endif
+#endif
/**
* sys_kill - send a signal to a process
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index df556175be50..ab9d0e3c6d50 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -284,7 +284,9 @@ COND_SYSCALL_COMPAT(move_pages);
COND_SYSCALL(perf_event_open);
COND_SYSCALL(accept4);
COND_SYSCALL(recvmmsg);
+COND_SYSCALL(recvmmsg_time32);
COND_SYSCALL_COMPAT(recvmmsg);
+COND_SYSCALL_COMPAT(recvmmsg_time64);
/*
* Architecture specific syscalls: see further below
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index bc3a3c37ec9c..36a2bef00125 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -554,17 +554,9 @@ static void sync_rtc_clock(void)
}
#ifdef CONFIG_GENERIC_CMOS_UPDATE
-int __weak update_persistent_clock(struct timespec now)
-{
- return -ENODEV;
-}
-
int __weak update_persistent_clock64(struct timespec64 now64)
{
- struct timespec now;
-
- now = timespec64_to_timespec(now64);
- return update_persistent_clock(now);
+ return -ENODEV;
}
#endif
diff --git a/kernel/time/time.c b/kernel/time/time.c
index 5aa0a156e331..2edb5088a70b 100644
--- a/kernel/time/time.c
+++ b/kernel/time/time.c
@@ -384,42 +384,6 @@ time64_t mktime64(const unsigned int year0, const unsigned int mon0,
EXPORT_SYMBOL(mktime64);
/**
- * set_normalized_timespec - set timespec sec and nsec parts and normalize
- *
- * @ts: pointer to timespec variable to be set
- * @sec: seconds to set
- * @nsec: nanoseconds to set
- *
- * Set seconds and nanoseconds field of a timespec variable and
- * normalize to the timespec storage format
- *
- * Note: The tv_nsec part is always in the range of
- * 0 <= tv_nsec < NSEC_PER_SEC
- * For negative values only the tv_sec field is negative !
- */
-void set_normalized_timespec(struct timespec *ts, time_t sec, s64 nsec)
-{
- while (nsec >= NSEC_PER_SEC) {
- /*
- * The following asm() prevents the compiler from
- * optimising this loop into a modulo operation. See
- * also __iter_div_u64_rem() in include/linux/time.h
- */
- asm("" : "+rm"(nsec));
- nsec -= NSEC_PER_SEC;
- ++sec;
- }
- while (nsec < 0) {
- asm("" : "+rm"(nsec));
- nsec += NSEC_PER_SEC;
- --sec;
- }
- ts->tv_sec = sec;
- ts->tv_nsec = nsec;
-}
-EXPORT_SYMBOL(set_normalized_timespec);
-
-/**
* ns_to_timespec - Convert nanoseconds to timespec
* @nsec: the nanoseconds value to be converted
*
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index c801e25875a3..ac5dbf2cd4a2 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1464,7 +1464,7 @@ u64 timekeeping_max_deferment(void)
}
/**
- * read_persistent_clock - Return time from the persistent clock.
+ * read_persistent_clock64 - Return time from the persistent clock.
*
* Weak dummy function for arches that do not yet support it.
* Reads the time from the battery backed persistent clock.
@@ -1472,20 +1472,12 @@ u64 timekeeping_max_deferment(void)
*
* XXX - Do be sure to remove it once all arches implement it.
*/
-void __weak read_persistent_clock(struct timespec *ts)
+void __weak read_persistent_clock64(struct timespec64 *ts)
{
ts->tv_sec = 0;
ts->tv_nsec = 0;
}
-void __weak read_persistent_clock64(struct timespec64 *ts64)
-{
- struct timespec ts;
-
- read_persistent_clock(&ts);
- *ts64 = timespec_to_timespec64(ts);
-}
-
/**
* read_persistent_wall_and_boot_offset - Read persistent clock, and also offset
* from the boot.
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 2868d85f1fb1..fac0ddf8a8e2 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -764,9 +764,9 @@ blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio)
if (!bt || !(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP))
return NULL;
- if (!bio->bi_css)
+ if (!bio->bi_blkg)
return NULL;
- return cgroup_get_kernfs_id(bio->bi_css->cgroup);
+ return cgroup_get_kernfs_id(bio_blkcg(bio)->css.cgroup);
}
#else
static union kernfs_node_id *
diff --git a/lib/Kconfig b/lib/Kconfig
index 7dbbcfe9cd90..79bc2eef9c14 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -577,7 +577,7 @@ config SG_POOL
# sg chaining option
#
-config ARCH_HAS_SG_CHAIN
+config ARCH_NO_SG_CHAIN
def_bool n
config ARCH_HAS_PMEM_API
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 54c248526b55..1928009f506e 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -6,6 +6,7 @@
#include <linux/vmalloc.h>
#include <linux/splice.h>
#include <net/checksum.h>
+#include <linux/scatterlist.h>
#define PIPE_PARANOIA /* for now */
@@ -1464,10 +1465,11 @@ bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum,
}
EXPORT_SYMBOL(csum_and_copy_from_iter_full);
-size_t csum_and_copy_to_iter(const void *addr, size_t bytes, __wsum *csum,
+size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *csump,
struct iov_iter *i)
{
const char *from = addr;
+ __wsum *csum = csump;
__wsum sum, next;
size_t off = 0;
@@ -1510,6 +1512,21 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, __wsum *csum,
}
EXPORT_SYMBOL(csum_and_copy_to_iter);
+size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp,
+ struct iov_iter *i)
+{
+ struct ahash_request *hash = hashp;
+ struct scatterlist sg;
+ size_t copied;
+
+ copied = copy_to_iter(addr, bytes, i);
+ sg_init_one(&sg, addr, copied);
+ ahash_request_set_crypt(hash, &sg, NULL, copied);
+ crypto_ahash_update(hash);
+ return copied;
+}
+EXPORT_SYMBOL(hash_and_copy_to_iter);
+
int iov_iter_npages(const struct iov_iter *i, int maxpages)
{
size_t size = i->count;
diff --git a/lib/sbitmap.c b/lib/sbitmap.c
index fdd1b8aa8ac6..65c2d06250a6 100644
--- a/lib/sbitmap.c
+++ b/lib/sbitmap.c
@@ -20,6 +20,47 @@
#include <linux/sbitmap.h>
#include <linux/seq_file.h>
+/*
+ * See if we have deferred clears that we can batch move
+ */
+static inline bool sbitmap_deferred_clear(struct sbitmap *sb, int index)
+{
+ unsigned long mask, val;
+ unsigned long __maybe_unused flags;
+ bool ret = false;
+
+ /* Silence bogus lockdep warning */
+#if defined(CONFIG_LOCKDEP)
+ local_irq_save(flags);
+#endif
+ spin_lock(&sb->map[index].swap_lock);
+
+ if (!sb->map[index].cleared)
+ goto out_unlock;
+
+ /*
+ * First get a stable cleared mask, setting the old mask to 0.
+ */
+ do {
+ mask = sb->map[index].cleared;
+ } while (cmpxchg(&sb->map[index].cleared, mask, 0) != mask);
+
+ /*
+ * Now clear the masked bits in our free word
+ */
+ do {
+ val = sb->map[index].word;
+ } while (cmpxchg(&sb->map[index].word, val, val & ~mask) != val);
+
+ ret = true;
+out_unlock:
+ spin_unlock(&sb->map[index].swap_lock);
+#if defined(CONFIG_LOCKDEP)
+ local_irq_restore(flags);
+#endif
+ return ret;
+}
+
int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift,
gfp_t flags, int node)
{
@@ -59,6 +100,7 @@ int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift,
for (i = 0; i < sb->map_nr; i++) {
sb->map[i].depth = min(depth, bits_per_word);
depth -= sb->map[i].depth;
+ spin_lock_init(&sb->map[i].swap_lock);
}
return 0;
}
@@ -69,6 +111,9 @@ void sbitmap_resize(struct sbitmap *sb, unsigned int depth)
unsigned int bits_per_word = 1U << sb->shift;
unsigned int i;
+ for (i = 0; i < sb->map_nr; i++)
+ sbitmap_deferred_clear(sb, i);
+
sb->depth = depth;
sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word);
@@ -111,6 +156,24 @@ static int __sbitmap_get_word(unsigned long *word, unsigned long depth,
return nr;
}
+static int sbitmap_find_bit_in_index(struct sbitmap *sb, int index,
+ unsigned int alloc_hint, bool round_robin)
+{
+ int nr;
+
+ do {
+ nr = __sbitmap_get_word(&sb->map[index].word,
+ sb->map[index].depth, alloc_hint,
+ !round_robin);
+ if (nr != -1)
+ break;
+ if (!sbitmap_deferred_clear(sb, index))
+ break;
+ } while (1);
+
+ return nr;
+}
+
int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint, bool round_robin)
{
unsigned int i, index;
@@ -118,24 +181,28 @@ int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint, bool round_robin)
index = SB_NR_TO_INDEX(sb, alloc_hint);
+ /*
+ * Unless we're doing round robin tag allocation, just use the
+ * alloc_hint to find the right word index. No point in looping
+ * twice in find_next_zero_bit() for that case.
+ */
+ if (round_robin)
+ alloc_hint = SB_NR_TO_BIT(sb, alloc_hint);
+ else
+ alloc_hint = 0;
+
for (i = 0; i < sb->map_nr; i++) {
- nr = __sbitmap_get_word(&sb->map[index].word,
- sb->map[index].depth,
- SB_NR_TO_BIT(sb, alloc_hint),
- !round_robin);
+ nr = sbitmap_find_bit_in_index(sb, index, alloc_hint,
+ round_robin);
if (nr != -1) {
nr += index << sb->shift;
break;
}
/* Jump to next index. */
- index++;
- alloc_hint = index << sb->shift;
-
- if (index >= sb->map_nr) {
+ alloc_hint = 0;
+ if (++index >= sb->map_nr)
index = 0;
- alloc_hint = 0;
- }
}
return nr;
@@ -151,6 +218,7 @@ int sbitmap_get_shallow(struct sbitmap *sb, unsigned int alloc_hint,
index = SB_NR_TO_INDEX(sb, alloc_hint);
for (i = 0; i < sb->map_nr; i++) {
+again:
nr = __sbitmap_get_word(&sb->map[index].word,
min(sb->map[index].depth, shallow_depth),
SB_NR_TO_BIT(sb, alloc_hint), true);
@@ -159,6 +227,9 @@ int sbitmap_get_shallow(struct sbitmap *sb, unsigned int alloc_hint,
break;
}
+ if (sbitmap_deferred_clear(sb, index))
+ goto again;
+
/* Jump to next index. */
index++;
alloc_hint = index << sb->shift;
@@ -178,7 +249,7 @@ bool sbitmap_any_bit_set(const struct sbitmap *sb)
unsigned int i;
for (i = 0; i < sb->map_nr; i++) {
- if (sb->map[i].word)
+ if (sb->map[i].word & ~sb->map[i].cleared)
return true;
}
return false;
@@ -191,9 +262,10 @@ bool sbitmap_any_bit_clear(const struct sbitmap *sb)
for (i = 0; i < sb->map_nr; i++) {
const struct sbitmap_word *word = &sb->map[i];
+ unsigned long mask = word->word & ~word->cleared;
unsigned long ret;
- ret = find_first_zero_bit(&word->word, word->depth);
+ ret = find_first_zero_bit(&mask, word->depth);
if (ret < word->depth)
return true;
}
@@ -201,23 +273,36 @@ bool sbitmap_any_bit_clear(const struct sbitmap *sb)
}
EXPORT_SYMBOL_GPL(sbitmap_any_bit_clear);
-unsigned int sbitmap_weight(const struct sbitmap *sb)
+static unsigned int __sbitmap_weight(const struct sbitmap *sb, bool set)
{
unsigned int i, weight = 0;
for (i = 0; i < sb->map_nr; i++) {
const struct sbitmap_word *word = &sb->map[i];
- weight += bitmap_weight(&word->word, word->depth);
+ if (set)
+ weight += bitmap_weight(&word->word, word->depth);
+ else
+ weight += bitmap_weight(&word->cleared, word->depth);
}
return weight;
}
-EXPORT_SYMBOL_GPL(sbitmap_weight);
+
+static unsigned int sbitmap_weight(const struct sbitmap *sb)
+{
+ return __sbitmap_weight(sb, true);
+}
+
+static unsigned int sbitmap_cleared(const struct sbitmap *sb)
+{
+ return __sbitmap_weight(sb, false);
+}
void sbitmap_show(struct sbitmap *sb, struct seq_file *m)
{
seq_printf(m, "depth=%u\n", sb->depth);
- seq_printf(m, "busy=%u\n", sbitmap_weight(sb));
+ seq_printf(m, "busy=%u\n", sbitmap_weight(sb) - sbitmap_cleared(sb));
+ seq_printf(m, "cleared=%u\n", sbitmap_cleared(sb));
seq_printf(m, "bits_per_word=%u\n", 1U << sb->shift);
seq_printf(m, "map_nr=%u\n", sb->map_nr);
}
@@ -325,6 +410,7 @@ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth,
sbq->min_shallow_depth = UINT_MAX;
sbq->wake_batch = sbq_calc_wake_batch(sbq, depth);
atomic_set(&sbq->wake_index, 0);
+ atomic_set(&sbq->ws_active, 0);
sbq->ws = kzalloc_node(SBQ_WAIT_QUEUES * sizeof(*sbq->ws), flags, node);
if (!sbq->ws) {
@@ -440,6 +526,9 @@ static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq)
{
int i, wake_index;
+ if (!atomic_read(&sbq->ws_active))
+ return NULL;
+
wake_index = atomic_read(&sbq->wake_index);
for (i = 0; i < SBQ_WAIT_QUEUES; i++) {
struct sbq_wait_state *ws = &sbq->ws[wake_index];
@@ -509,7 +598,8 @@ EXPORT_SYMBOL_GPL(sbitmap_queue_wake_up);
void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr,
unsigned int cpu)
{
- sbitmap_clear_bit_unlock(&sbq->sb, nr);
+ sbitmap_deferred_clear_bit(&sbq->sb, nr);
+
/*
* Pairs with the memory barrier in set_current_state() to ensure the
* proper ordering of clear_bit_unlock()/waitqueue_active() in the waker
@@ -564,6 +654,7 @@ void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m)
seq_printf(m, "wake_batch=%u\n", sbq->wake_batch);
seq_printf(m, "wake_index=%d\n", atomic_read(&sbq->wake_index));
+ seq_printf(m, "ws_active=%d\n", atomic_read(&sbq->ws_active));
seq_puts(m, "ws={\n");
for (i = 0; i < SBQ_WAIT_QUEUES; i++) {
@@ -579,3 +670,48 @@ void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m)
seq_printf(m, "min_shallow_depth=%u\n", sbq->min_shallow_depth);
}
EXPORT_SYMBOL_GPL(sbitmap_queue_show);
+
+void sbitmap_add_wait_queue(struct sbitmap_queue *sbq,
+ struct sbq_wait_state *ws,
+ struct sbq_wait *sbq_wait)
+{
+ if (!sbq_wait->sbq) {
+ sbq_wait->sbq = sbq;
+ atomic_inc(&sbq->ws_active);
+ }
+ add_wait_queue(&ws->wait, &sbq_wait->wait);
+}
+EXPORT_SYMBOL_GPL(sbitmap_add_wait_queue);
+
+void sbitmap_del_wait_queue(struct sbq_wait *sbq_wait)
+{
+ list_del_init(&sbq_wait->wait.entry);
+ if (sbq_wait->sbq) {
+ atomic_dec(&sbq_wait->sbq->ws_active);
+ sbq_wait->sbq = NULL;
+ }
+}
+EXPORT_SYMBOL_GPL(sbitmap_del_wait_queue);
+
+void sbitmap_prepare_to_wait(struct sbitmap_queue *sbq,
+ struct sbq_wait_state *ws,
+ struct sbq_wait *sbq_wait, int state)
+{
+ if (!sbq_wait->sbq) {
+ atomic_inc(&sbq->ws_active);
+ sbq_wait->sbq = sbq;
+ }
+ prepare_to_wait_exclusive(&ws->wait, &sbq_wait->wait, state);
+}
+EXPORT_SYMBOL_GPL(sbitmap_prepare_to_wait);
+
+void sbitmap_finish_wait(struct sbitmap_queue *sbq, struct sbq_wait_state *ws,
+ struct sbq_wait *sbq_wait)
+{
+ finish_wait(&ws->wait, &sbq_wait->wait);
+ if (sbq_wait->sbq) {
+ atomic_dec(&sbq->ws_active);
+ sbq_wait->sbq = NULL;
+ }
+}
+EXPORT_SYMBOL_GPL(sbitmap_finish_wait);
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index 7c6096a71704..9ba349e775ef 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -271,7 +271,7 @@ int __sg_alloc_table(struct sg_table *table, unsigned int nents,
if (nents == 0)
return -EINVAL;
-#ifndef CONFIG_ARCH_HAS_SG_CHAIN
+#ifdef CONFIG_ARCH_NO_SG_CHAIN
if (WARN_ON_ONCE(nents > max_ents))
return -EINVAL;
#endif
diff --git a/mm/page_io.c b/mm/page_io.c
index d4d1c89bcddd..3475733b1926 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -140,7 +140,7 @@ out:
unlock_page(page);
WRITE_ONCE(bio->bi_private, NULL);
bio_put(bio);
- wake_up_process(waiter);
+ blk_wake_io_task(waiter);
put_task_struct(waiter);
}
@@ -339,7 +339,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
goto out;
}
bio->bi_opf = REQ_OP_WRITE | REQ_SWAP | wbc_to_write_flags(wbc);
- bio_associate_blkcg_from_page(bio, page);
+ bio_associate_blkg_from_page(bio, page);
count_swpout_vm_event(page);
set_page_writeback(page);
unlock_page(page);
@@ -405,11 +405,12 @@ int swap_readpage(struct page *page, bool synchronous)
bio_get(bio);
qc = submit_bio(bio);
while (synchronous) {
- set_current_state(TASK_UNINTERRUPTIBLE);
+ __set_current_state(TASK_UNINTERRUPTIBLE);
+
if (!READ_ONCE(bio->bi_private))
break;
- if (!blk_poll(disk->queue, qc))
+ if (!blk_poll(disk->queue, qc, true))
break;
}
__set_current_state(TASK_RUNNING);
diff --git a/net/compat.c b/net/compat.c
index 47a614b370cd..f7084780a8f8 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -810,34 +810,23 @@ COMPAT_SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, buf, compat_size_t, len
return __compat_sys_recvfrom(fd, buf, len, flags, addr, addrlen);
}
-static int __compat_sys_recvmmsg(int fd, struct compat_mmsghdr __user *mmsg,
- unsigned int vlen, unsigned int flags,
- struct old_timespec32 __user *timeout)
+COMPAT_SYSCALL_DEFINE5(recvmmsg_time64, int, fd, struct compat_mmsghdr __user *, mmsg,
+ unsigned int, vlen, unsigned int, flags,
+ struct __kernel_timespec __user *, timeout)
{
- int datagrams;
- struct timespec64 ktspec;
-
- if (timeout == NULL)
- return __sys_recvmmsg(fd, (struct mmsghdr __user *)mmsg, vlen,
- flags | MSG_CMSG_COMPAT, NULL);
-
- if (compat_get_timespec64(&ktspec, timeout))
- return -EFAULT;
-
- datagrams = __sys_recvmmsg(fd, (struct mmsghdr __user *)mmsg, vlen,
- flags | MSG_CMSG_COMPAT, &ktspec);
- if (datagrams > 0 && compat_put_timespec64(&ktspec, timeout))
- datagrams = -EFAULT;
-
- return datagrams;
+ return __sys_recvmmsg(fd, (struct mmsghdr __user *)mmsg, vlen,
+ flags | MSG_CMSG_COMPAT, timeout, NULL);
}
+#ifdef CONFIG_COMPAT_32BIT_TIME
COMPAT_SYSCALL_DEFINE5(recvmmsg, int, fd, struct compat_mmsghdr __user *, mmsg,
unsigned int, vlen, unsigned int, flags,
struct old_timespec32 __user *, timeout)
{
- return __compat_sys_recvmmsg(fd, mmsg, vlen, flags, timeout);
+ return __sys_recvmmsg(fd, (struct mmsghdr __user *)mmsg, vlen,
+ flags | MSG_CMSG_COMPAT, NULL, timeout);
}
+#endif
COMPAT_SYSCALL_DEFINE2(socketcall, int, call, u32 __user *, args)
{
@@ -925,8 +914,9 @@ COMPAT_SYSCALL_DEFINE2(socketcall, int, call, u32 __user *, args)
ret = __compat_sys_recvmsg(a0, compat_ptr(a1), a[2]);
break;
case SYS_RECVMMSG:
- ret = __compat_sys_recvmmsg(a0, compat_ptr(a1), a[2], a[3],
- compat_ptr(a[4]));
+ ret = __sys_recvmmsg(a0, compat_ptr(a1), a[2],
+ a[3] | MSG_CMSG_COMPAT, NULL,
+ compat_ptr(a[4]));
break;
case SYS_ACCEPT4:
ret = __sys_accept4(a0, compat_ptr(a1), compat_ptr(a[2]), a[3]);
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 4bf62b1afa3b..b2651bb6d2a3 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -408,27 +408,20 @@ int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
}
EXPORT_SYMBOL(skb_kill_datagram);
-/**
- * skb_copy_datagram_iter - Copy a datagram to an iovec iterator.
- * @skb: buffer to copy
- * @offset: offset in the buffer to start copying from
- * @to: iovec iterator to copy to
- * @len: amount of data to copy from buffer to iovec
- */
-int skb_copy_datagram_iter(const struct sk_buff *skb, int offset,
- struct iov_iter *to, int len)
+int __skb_datagram_iter(const struct sk_buff *skb, int offset,
+ struct iov_iter *to, int len, bool fault_short,
+ size_t (*cb)(const void *, size_t, void *, struct iov_iter *),
+ void *data)
{
int start = skb_headlen(skb);
int i, copy = start - offset, start_off = offset, n;
struct sk_buff *frag_iter;
- trace_skb_copy_datagram_iovec(skb, len);
-
/* Copy header. */
if (copy > 0) {
if (copy > len)
copy = len;
- n = copy_to_iter(skb->data + offset, copy, to);
+ n = cb(skb->data + offset, copy, data, to);
offset += n;
if (n != copy)
goto short_copy;
@@ -445,11 +438,14 @@ int skb_copy_datagram_iter(const struct sk_buff *skb, int offset,
end = start + skb_frag_size(frag);
if ((copy = end - offset) > 0) {
+ struct page *page = skb_frag_page(frag);
+ u8 *vaddr = kmap(page);
+
if (copy > len)
copy = len;
- n = copy_page_to_iter(skb_frag_page(frag),
- frag->page_offset + offset -
- start, copy, to);
+ n = cb(vaddr + frag->page_offset +
+ offset - start, copy, data, to);
+ kunmap(page);
offset += n;
if (n != copy)
goto short_copy;
@@ -468,8 +464,8 @@ int skb_copy_datagram_iter(const struct sk_buff *skb, int offset,
if ((copy = end - offset) > 0) {
if (copy > len)
copy = len;
- if (skb_copy_datagram_iter(frag_iter, offset - start,
- to, copy))
+ if (__skb_datagram_iter(frag_iter, offset - start,
+ to, copy, fault_short, cb, data))
goto fault;
if ((len -= copy) == 0)
return 0;
@@ -490,11 +486,50 @@ fault:
return -EFAULT;
short_copy:
- if (iov_iter_count(to))
+ if (fault_short || iov_iter_count(to))
goto fault;
return 0;
}
+
+/**
+ * skb_copy_and_hash_datagram_iter - Copy datagram to an iovec iterator
+ * and update a hash.
+ * @skb: buffer to copy
+ * @offset: offset in the buffer to start copying from
+ * @to: iovec iterator to copy to
+ * @len: amount of data to copy from buffer to iovec
+ * @hash: hash request to update
+ */
+int skb_copy_and_hash_datagram_iter(const struct sk_buff *skb, int offset,
+ struct iov_iter *to, int len,
+ struct ahash_request *hash)
+{
+ return __skb_datagram_iter(skb, offset, to, len, true,
+ hash_and_copy_to_iter, hash);
+}
+EXPORT_SYMBOL(skb_copy_and_hash_datagram_iter);
+
+static size_t simple_copy_to_iter(const void *addr, size_t bytes,
+ void *data __always_unused, struct iov_iter *i)
+{
+ return copy_to_iter(addr, bytes, i);
+}
+
+/**
+ * skb_copy_datagram_iter - Copy a datagram to an iovec iterator.
+ * @skb: buffer to copy
+ * @offset: offset in the buffer to start copying from
+ * @to: iovec iterator to copy to
+ * @len: amount of data to copy from buffer to iovec
+ */
+int skb_copy_datagram_iter(const struct sk_buff *skb, int offset,
+ struct iov_iter *to, int len)
+{
+ trace_skb_copy_datagram_iovec(skb, len);
+ return __skb_datagram_iter(skb, offset, to, len, false,
+ simple_copy_to_iter, NULL);
+}
EXPORT_SYMBOL(skb_copy_datagram_iter);
/**
@@ -645,87 +680,21 @@ int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from)
}
EXPORT_SYMBOL(zerocopy_sg_from_iter);
+/**
+ * skb_copy_and_csum_datagram_iter - Copy datagram to an iovec iterator
+ * and update a checksum.
+ * @skb: buffer to copy
+ * @offset: offset in the buffer to start copying from
+ * @to: iovec iterator to copy to
+ * @len: amount of data to copy from buffer to iovec
+ * @csump: checksum pointer
+ */
static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
struct iov_iter *to, int len,
__wsum *csump)
{
- int start = skb_headlen(skb);
- int i, copy = start - offset, start_off = offset;
- struct sk_buff *frag_iter;
- int pos = 0;
- int n;
-
- /* Copy header. */
- if (copy > 0) {
- if (copy > len)
- copy = len;
- n = csum_and_copy_to_iter(skb->data + offset, copy, csump, to);
- offset += n;
- if (n != copy)
- goto fault;
- if ((len -= copy) == 0)
- return 0;
- pos = copy;
- }
-
- for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
- int end;
- const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
-
- WARN_ON(start > offset + len);
-
- end = start + skb_frag_size(frag);
- if ((copy = end - offset) > 0) {
- __wsum csum2 = 0;
- struct page *page = skb_frag_page(frag);
- u8 *vaddr = kmap(page);
-
- if (copy > len)
- copy = len;
- n = csum_and_copy_to_iter(vaddr + frag->page_offset +
- offset - start, copy,
- &csum2, to);
- kunmap(page);
- offset += n;
- if (n != copy)
- goto fault;
- *csump = csum_block_add(*csump, csum2, pos);
- if (!(len -= copy))
- return 0;
- pos += copy;
- }
- start = end;
- }
-
- skb_walk_frags(skb, frag_iter) {
- int end;
-
- WARN_ON(start > offset + len);
-
- end = start + frag_iter->len;
- if ((copy = end - offset) > 0) {
- __wsum csum2 = 0;
- if (copy > len)
- copy = len;
- if (skb_copy_and_csum_datagram(frag_iter,
- offset - start,
- to, copy,
- &csum2))
- goto fault;
- *csump = csum_block_add(*csump, csum2, pos);
- if ((len -= copy) == 0)
- return 0;
- offset += copy;
- pos += copy;
- }
- start = end;
- }
- if (!len)
- return 0;
-
-fault:
- iov_iter_revert(to, offset - start_off);
- return -EFAULT;
+ return __skb_datagram_iter(skb, offset, to, len, true,
+ csum_and_copy_to_iter, csump);
}
/**
diff --git a/net/rds/ib.c b/net/rds/ib.c
index eba75c1ba359..9d7b7586f240 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -148,8 +148,8 @@ static void rds_ib_add_one(struct ib_device *device)
has_fr = (device->attrs.device_cap_flags &
IB_DEVICE_MEM_MGT_EXTENSIONS);
- has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
- device->map_phys_fmr && device->unmap_fmr);
+ has_fmr = (device->ops.alloc_fmr && device->ops.dealloc_fmr &&
+ device->ops.map_phys_fmr && device->ops.unmap_fmr);
rds_ibdev->use_fastreg = (has_fr && !has_fmr);
rds_ibdev->fmr_max_remaps = device->attrs.max_map_per_fmr?: 32;
diff --git a/net/socket.c b/net/socket.c
index 334fcc617ef2..e89884e2197b 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2341,8 +2341,9 @@ SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
* Linux recvmmsg interface
*/
-int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
- unsigned int flags, struct timespec64 *timeout)
+static int do_recvmmsg(int fd, struct mmsghdr __user *mmsg,
+ unsigned int vlen, unsigned int flags,
+ struct timespec64 *timeout)
{
int fput_needed, err, datagrams;
struct socket *sock;
@@ -2451,25 +2452,32 @@ out_put:
return datagrams;
}
-static int do_sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
- unsigned int vlen, unsigned int flags,
- struct __kernel_timespec __user *timeout)
+int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
+ unsigned int vlen, unsigned int flags,
+ struct __kernel_timespec __user *timeout,
+ struct old_timespec32 __user *timeout32)
{
int datagrams;
struct timespec64 timeout_sys;
- if (flags & MSG_CMSG_COMPAT)
- return -EINVAL;
-
- if (!timeout)
- return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL);
+ if (timeout && get_timespec64(&timeout_sys, timeout))
+ return -EFAULT;
- if (get_timespec64(&timeout_sys, timeout))
+ if (timeout32 && get_old_timespec32(&timeout_sys, timeout32))
return -EFAULT;
- datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
+ if (!timeout && !timeout32)
+ return do_recvmmsg(fd, mmsg, vlen, flags, NULL);
+
+ datagrams = do_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
- if (datagrams > 0 && put_timespec64(&timeout_sys, timeout))
+ if (datagrams <= 0)
+ return datagrams;
+
+ if (timeout && put_timespec64(&timeout_sys, timeout))
+ datagrams = -EFAULT;
+
+ if (timeout32 && put_old_timespec32(&timeout_sys, timeout32))
datagrams = -EFAULT;
return datagrams;
@@ -2479,8 +2487,23 @@ SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
unsigned int, vlen, unsigned int, flags,
struct __kernel_timespec __user *, timeout)
{
- return do_sys_recvmmsg(fd, mmsg, vlen, flags, timeout);
+ if (flags & MSG_CMSG_COMPAT)
+ return -EINVAL;
+
+ return __sys_recvmmsg(fd, mmsg, vlen, flags, timeout, NULL);
+}
+
+#ifdef CONFIG_COMPAT_32BIT_TIME
+SYSCALL_DEFINE5(recvmmsg_time32, int, fd, struct mmsghdr __user *, mmsg,
+ unsigned int, vlen, unsigned int, flags,
+ struct old_timespec32 __user *, timeout)
+{
+ if (flags & MSG_CMSG_COMPAT)
+ return -EINVAL;
+
+ return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL, timeout);
}
+#endif
#ifdef __ARCH_WANT_SYS_SOCKETCALL
/* Argument list sizes for sys_socketcall */
@@ -2600,8 +2623,15 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
a[2], true);
break;
case SYS_RECVMMSG:
- err = do_sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2],
- a[3], (struct __kernel_timespec __user *)a[4]);
+ if (IS_ENABLED(CONFIG_64BIT) || !IS_ENABLED(CONFIG_64BIT_TIME))
+ err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
+ a[2], a[3],
+ (struct __kernel_timespec __user *)a[4],
+ NULL);
+ else
+ err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
+ a[2], a[3], NULL,
+ (struct old_timespec32 __user *)a[4]);
break;
case SYS_ACCEPT4:
err = __sys_accept4(a0, (struct sockaddr __user *)a1,
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index 7f5632cd5a48..fd8fea59fe92 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -41,7 +41,7 @@ enum {
bool
fmr_is_supported(struct rpcrdma_ia *ia)
{
- if (!ia->ri_device->alloc_fmr) {
+ if (!ia->ri_device->ops.alloc_fmr) {
pr_info("rpcrdma: 'fmr' mode is not supported by device %s\n",
ia->ri_device->name);
return false;
diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c
index a3891ae9fa0f..389a298274d3 100644
--- a/security/keys/encrypted-keys/encrypted.c
+++ b/security/keys/encrypted-keys/encrypted.c
@@ -45,6 +45,7 @@ static const char hmac_alg[] = "hmac(sha256)";
static const char blkcipher_alg[] = "cbc(aes)";
static const char key_format_default[] = "default";
static const char key_format_ecryptfs[] = "ecryptfs";
+static const char key_format_enc32[] = "enc32";
static unsigned int ivsize;
static int blksize;
@@ -54,6 +55,7 @@ static int blksize;
#define HASH_SIZE SHA256_DIGEST_SIZE
#define MAX_DATA_SIZE 4096
#define MIN_DATA_SIZE 20
+#define KEY_ENC32_PAYLOAD_LEN 32
static struct crypto_shash *hash_tfm;
@@ -62,12 +64,13 @@ enum {
};
enum {
- Opt_error = -1, Opt_default, Opt_ecryptfs
+ Opt_error = -1, Opt_default, Opt_ecryptfs, Opt_enc32
};
static const match_table_t key_format_tokens = {
{Opt_default, "default"},
{Opt_ecryptfs, "ecryptfs"},
+ {Opt_enc32, "enc32"},
{Opt_error, NULL}
};
@@ -195,6 +198,7 @@ static int datablob_parse(char *datablob, const char **format,
key_format = match_token(p, key_format_tokens, args);
switch (key_format) {
case Opt_ecryptfs:
+ case Opt_enc32:
case Opt_default:
*format = p;
*master_desc = strsep(&datablob, " \t");
@@ -625,15 +629,22 @@ static struct encrypted_key_payload *encrypted_key_alloc(struct key *key,
format_len = (!format) ? strlen(key_format_default) : strlen(format);
decrypted_datalen = dlen;
payload_datalen = decrypted_datalen;
- if (format && !strcmp(format, key_format_ecryptfs)) {
- if (dlen != ECRYPTFS_MAX_KEY_BYTES) {
- pr_err("encrypted_key: keylen for the ecryptfs format "
- "must be equal to %d bytes\n",
- ECRYPTFS_MAX_KEY_BYTES);
- return ERR_PTR(-EINVAL);
+ if (format) {
+ if (!strcmp(format, key_format_ecryptfs)) {
+ if (dlen != ECRYPTFS_MAX_KEY_BYTES) {
+ pr_err("encrypted_key: keylen for the ecryptfs format must be equal to %d bytes\n",
+ ECRYPTFS_MAX_KEY_BYTES);
+ return ERR_PTR(-EINVAL);
+ }
+ decrypted_datalen = ECRYPTFS_MAX_KEY_BYTES;
+ payload_datalen = sizeof(struct ecryptfs_auth_tok);
+ } else if (!strcmp(format, key_format_enc32)) {
+ if (decrypted_datalen != KEY_ENC32_PAYLOAD_LEN) {
+ pr_err("encrypted_key: enc32 key payload incorrect length: %d\n",
+ decrypted_datalen);
+ return ERR_PTR(-EINVAL);
+ }
}
- decrypted_datalen = ECRYPTFS_MAX_KEY_BYTES;
- payload_datalen = sizeof(struct ecryptfs_auth_tok);
}
encrypted_datalen = roundup(decrypted_datalen, blksize);
diff --git a/security/keys/internal.h b/security/keys/internal.h
index 74cb0ff42fed..479909b858c7 100644
--- a/security/keys/internal.h
+++ b/security/keys/internal.h
@@ -158,8 +158,6 @@ extern struct key *request_key_and_link(struct key_type *type,
extern bool lookup_user_key_possessed(const struct key *key,
const struct key_match_data *match_data);
-extern key_ref_t lookup_user_key(key_serial_t id, unsigned long flags,
- key_perm_t perm);
#define KEY_LOOKUP_CREATE 0x01
#define KEY_LOOKUP_PARTIAL 0x02
#define KEY_LOOKUP_FOR_UNLINK 0x04
diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c
index 8b8994920620..02c77e928f68 100644
--- a/security/keys/process_keys.c
+++ b/security/keys/process_keys.c
@@ -754,6 +754,7 @@ reget_creds:
put_cred(ctx.cred);
goto try_again;
}
+EXPORT_SYMBOL(lookup_user_key);
/*
* Join the named keyring as the session keyring if possible else attempt to
diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild
index 778ceb651000..10ddf223055b 100644
--- a/tools/testing/nvdimm/Kbuild
+++ b/tools/testing/nvdimm/Kbuild
@@ -37,6 +37,7 @@ obj-$(CONFIG_DEV_DAX) += device_dax.o
obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o
nfit-y := $(ACPI_SRC)/core.o
+nfit-y += $(ACPI_SRC)/intel.o
nfit-$(CONFIG_X86_MCE) += $(ACPI_SRC)/mce.o
nfit-y += acpi_nfit_test.o
nfit-y += config_check.o
@@ -79,6 +80,8 @@ libnvdimm-$(CONFIG_ND_CLAIM) += $(NVDIMM_SRC)/claim.o
libnvdimm-$(CONFIG_BTT) += $(NVDIMM_SRC)/btt_devs.o
libnvdimm-$(CONFIG_NVDIMM_PFN) += $(NVDIMM_SRC)/pfn_devs.o
libnvdimm-$(CONFIG_NVDIMM_DAX) += $(NVDIMM_SRC)/dax_devs.o
+libnvdimm-$(CONFIG_NVDIMM_KEYS) += $(NVDIMM_SRC)/security.o
+libnvdimm-y += dimm_devs.o
libnvdimm-y += libnvdimm_test.o
libnvdimm-y += config_check.o
diff --git a/tools/testing/nvdimm/dimm_devs.c b/tools/testing/nvdimm/dimm_devs.c
new file mode 100644
index 000000000000..e75238404555
--- /dev/null
+++ b/tools/testing/nvdimm/dimm_devs.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Intel Corp. 2018 */
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/nd.h>
+#include "pmem.h"
+#include "pfn.h"
+#include "nd.h"
+#include "nd-core.h"
+
+ssize_t security_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct nvdimm *nvdimm = to_nvdimm(dev);
+
+ /*
+ * For the test version we need to poll the "hardware" in order
+ * to get the updated status for unlock testing.
+ */
+ nvdimm->sec.state = nvdimm_security_state(nvdimm, false);
+ nvdimm->sec.ext_state = nvdimm_security_state(nvdimm, true);
+
+ switch (nvdimm->sec.state) {
+ case NVDIMM_SECURITY_DISABLED:
+ return sprintf(buf, "disabled\n");
+ case NVDIMM_SECURITY_UNLOCKED:
+ return sprintf(buf, "unlocked\n");
+ case NVDIMM_SECURITY_LOCKED:
+ return sprintf(buf, "locked\n");
+ case NVDIMM_SECURITY_FROZEN:
+ return sprintf(buf, "frozen\n");
+ case NVDIMM_SECURITY_OVERWRITE:
+ return sprintf(buf, "overwrite\n");
+ default:
+ return -ENOTTY;
+ }
+
+ return -ENOTTY;
+}
+
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index 6c16ac36d482..b579f962451d 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -143,6 +143,13 @@ static u32 handle[] = {
static unsigned long dimm_fail_cmd_flags[ARRAY_SIZE(handle)];
static int dimm_fail_cmd_code[ARRAY_SIZE(handle)];
+struct nfit_test_sec {
+ u8 state;
+ u8 ext_state;
+ u8 passphrase[32];
+ u8 master_passphrase[32];
+ u64 overwrite_end_time;
+} dimm_sec_info[NUM_DCR];
static const struct nd_intel_smart smart_def = {
.flags = ND_INTEL_SMART_HEALTH_VALID
@@ -936,6 +943,242 @@ static int override_return_code(int dimm, unsigned int func, int rc)
return rc;
}
+static int nd_intel_test_cmd_security_status(struct nfit_test *t,
+ struct nd_intel_get_security_state *nd_cmd,
+ unsigned int buf_len, int dimm)
+{
+ struct device *dev = &t->pdev.dev;
+ struct nfit_test_sec *sec = &dimm_sec_info[dimm];
+
+ nd_cmd->status = 0;
+ nd_cmd->state = sec->state;
+ nd_cmd->extended_state = sec->ext_state;
+ dev_dbg(dev, "security state (%#x) returned\n", nd_cmd->state);
+
+ return 0;
+}
+
+static int nd_intel_test_cmd_unlock_unit(struct nfit_test *t,
+ struct nd_intel_unlock_unit *nd_cmd,
+ unsigned int buf_len, int dimm)
+{
+ struct device *dev = &t->pdev.dev;
+ struct nfit_test_sec *sec = &dimm_sec_info[dimm];
+
+ if (!(sec->state & ND_INTEL_SEC_STATE_LOCKED) ||
+ (sec->state & ND_INTEL_SEC_STATE_FROZEN)) {
+ nd_cmd->status = ND_INTEL_STATUS_INVALID_STATE;
+ dev_dbg(dev, "unlock unit: invalid state: %#x\n",
+ sec->state);
+ } else if (memcmp(nd_cmd->passphrase, sec->passphrase,
+ ND_INTEL_PASSPHRASE_SIZE) != 0) {
+ nd_cmd->status = ND_INTEL_STATUS_INVALID_PASS;
+ dev_dbg(dev, "unlock unit: invalid passphrase\n");
+ } else {
+ nd_cmd->status = 0;
+ sec->state = ND_INTEL_SEC_STATE_ENABLED;
+ dev_dbg(dev, "Unit unlocked\n");
+ }
+
+ dev_dbg(dev, "unlocking status returned: %#x\n", nd_cmd->status);
+ return 0;
+}
+
+static int nd_intel_test_cmd_set_pass(struct nfit_test *t,
+ struct nd_intel_set_passphrase *nd_cmd,
+ unsigned int buf_len, int dimm)
+{
+ struct device *dev = &t->pdev.dev;
+ struct nfit_test_sec *sec = &dimm_sec_info[dimm];
+
+ if (sec->state & ND_INTEL_SEC_STATE_FROZEN) {
+ nd_cmd->status = ND_INTEL_STATUS_INVALID_STATE;
+ dev_dbg(dev, "set passphrase: wrong security state\n");
+ } else if (memcmp(nd_cmd->old_pass, sec->passphrase,
+ ND_INTEL_PASSPHRASE_SIZE) != 0) {
+ nd_cmd->status = ND_INTEL_STATUS_INVALID_PASS;
+ dev_dbg(dev, "set passphrase: wrong passphrase\n");
+ } else {
+ memcpy(sec->passphrase, nd_cmd->new_pass,
+ ND_INTEL_PASSPHRASE_SIZE);
+ sec->state |= ND_INTEL_SEC_STATE_ENABLED;
+ nd_cmd->status = 0;
+ dev_dbg(dev, "passphrase updated\n");
+ }
+
+ return 0;
+}
+
+static int nd_intel_test_cmd_freeze_lock(struct nfit_test *t,
+ struct nd_intel_freeze_lock *nd_cmd,
+ unsigned int buf_len, int dimm)
+{
+ struct device *dev = &t->pdev.dev;
+ struct nfit_test_sec *sec = &dimm_sec_info[dimm];
+
+ if (!(sec->state & ND_INTEL_SEC_STATE_ENABLED)) {
+ nd_cmd->status = ND_INTEL_STATUS_INVALID_STATE;
+ dev_dbg(dev, "freeze lock: wrong security state\n");
+ } else {
+ sec->state |= ND_INTEL_SEC_STATE_FROZEN;
+ nd_cmd->status = 0;
+ dev_dbg(dev, "security frozen\n");
+ }
+
+ return 0;
+}
+
+static int nd_intel_test_cmd_disable_pass(struct nfit_test *t,
+ struct nd_intel_disable_passphrase *nd_cmd,
+ unsigned int buf_len, int dimm)
+{
+ struct device *dev = &t->pdev.dev;
+ struct nfit_test_sec *sec = &dimm_sec_info[dimm];
+
+ if (!(sec->state & ND_INTEL_SEC_STATE_ENABLED) ||
+ (sec->state & ND_INTEL_SEC_STATE_FROZEN)) {
+ nd_cmd->status = ND_INTEL_STATUS_INVALID_STATE;
+ dev_dbg(dev, "disable passphrase: wrong security state\n");
+ } else if (memcmp(nd_cmd->passphrase, sec->passphrase,
+ ND_INTEL_PASSPHRASE_SIZE) != 0) {
+ nd_cmd->status = ND_INTEL_STATUS_INVALID_PASS;
+ dev_dbg(dev, "disable passphrase: wrong passphrase\n");
+ } else {
+ memset(sec->passphrase, 0, ND_INTEL_PASSPHRASE_SIZE);
+ sec->state = 0;
+ dev_dbg(dev, "disable passphrase: done\n");
+ }
+
+ return 0;
+}
+
+static int nd_intel_test_cmd_secure_erase(struct nfit_test *t,
+ struct nd_intel_secure_erase *nd_cmd,
+ unsigned int buf_len, int dimm)
+{
+ struct device *dev = &t->pdev.dev;
+ struct nfit_test_sec *sec = &dimm_sec_info[dimm];
+
+ if (!(sec->state & ND_INTEL_SEC_STATE_ENABLED) ||
+ (sec->state & ND_INTEL_SEC_STATE_FROZEN)) {
+ nd_cmd->status = ND_INTEL_STATUS_INVALID_STATE;
+ dev_dbg(dev, "secure erase: wrong security state\n");
+ } else if (memcmp(nd_cmd->passphrase, sec->passphrase,
+ ND_INTEL_PASSPHRASE_SIZE) != 0) {
+ nd_cmd->status = ND_INTEL_STATUS_INVALID_PASS;
+ dev_dbg(dev, "secure erase: wrong passphrase\n");
+ } else {
+ memset(sec->passphrase, 0, ND_INTEL_PASSPHRASE_SIZE);
+ memset(sec->master_passphrase, 0, ND_INTEL_PASSPHRASE_SIZE);
+ sec->state = 0;
+ sec->ext_state = ND_INTEL_SEC_ESTATE_ENABLED;
+ dev_dbg(dev, "secure erase: done\n");
+ }
+
+ return 0;
+}
+
+static int nd_intel_test_cmd_overwrite(struct nfit_test *t,
+ struct nd_intel_overwrite *nd_cmd,
+ unsigned int buf_len, int dimm)
+{
+ struct device *dev = &t->pdev.dev;
+ struct nfit_test_sec *sec = &dimm_sec_info[dimm];
+
+ if ((sec->state & ND_INTEL_SEC_STATE_ENABLED) &&
+ memcmp(nd_cmd->passphrase, sec->passphrase,
+ ND_INTEL_PASSPHRASE_SIZE) != 0) {
+ nd_cmd->status = ND_INTEL_STATUS_INVALID_PASS;
+ dev_dbg(dev, "overwrite: wrong passphrase\n");
+ return 0;
+ }
+
+ memset(sec->passphrase, 0, ND_INTEL_PASSPHRASE_SIZE);
+ sec->state = ND_INTEL_SEC_STATE_OVERWRITE;
+ dev_dbg(dev, "overwrite progressing.\n");
+ sec->overwrite_end_time = get_jiffies_64() + 5 * HZ;
+
+ return 0;
+}
+
+static int nd_intel_test_cmd_query_overwrite(struct nfit_test *t,
+ struct nd_intel_query_overwrite *nd_cmd,
+ unsigned int buf_len, int dimm)
+{
+ struct device *dev = &t->pdev.dev;
+ struct nfit_test_sec *sec = &dimm_sec_info[dimm];
+
+ if (!(sec->state & ND_INTEL_SEC_STATE_OVERWRITE)) {
+ nd_cmd->status = ND_INTEL_STATUS_OQUERY_SEQUENCE_ERR;
+ return 0;
+ }
+
+ if (time_is_before_jiffies64(sec->overwrite_end_time)) {
+ sec->overwrite_end_time = 0;
+ sec->state = 0;
+ sec->ext_state = ND_INTEL_SEC_ESTATE_ENABLED;
+ dev_dbg(dev, "overwrite is complete\n");
+ } else
+ nd_cmd->status = ND_INTEL_STATUS_OQUERY_INPROGRESS;
+ return 0;
+}
+
+static int nd_intel_test_cmd_master_set_pass(struct nfit_test *t,
+ struct nd_intel_set_master_passphrase *nd_cmd,
+ unsigned int buf_len, int dimm)
+{
+ struct device *dev = &t->pdev.dev;
+ struct nfit_test_sec *sec = &dimm_sec_info[dimm];
+
+ if (!(sec->ext_state & ND_INTEL_SEC_ESTATE_ENABLED)) {
+ nd_cmd->status = ND_INTEL_STATUS_NOT_SUPPORTED;
+ dev_dbg(dev, "master set passphrase: in wrong state\n");
+ } else if (sec->ext_state & ND_INTEL_SEC_ESTATE_PLIMIT) {
+ nd_cmd->status = ND_INTEL_STATUS_INVALID_STATE;
+ dev_dbg(dev, "master set passphrase: in wrong security state\n");
+ } else if (memcmp(nd_cmd->old_pass, sec->master_passphrase,
+ ND_INTEL_PASSPHRASE_SIZE) != 0) {
+ nd_cmd->status = ND_INTEL_STATUS_INVALID_PASS;
+ dev_dbg(dev, "master set passphrase: wrong passphrase\n");
+ } else {
+ memcpy(sec->master_passphrase, nd_cmd->new_pass,
+ ND_INTEL_PASSPHRASE_SIZE);
+ sec->ext_state = ND_INTEL_SEC_ESTATE_ENABLED;
+ dev_dbg(dev, "master passphrase: updated\n");
+ }
+
+ return 0;
+}
+
+static int nd_intel_test_cmd_master_secure_erase(struct nfit_test *t,
+ struct nd_intel_master_secure_erase *nd_cmd,
+ unsigned int buf_len, int dimm)
+{
+ struct device *dev = &t->pdev.dev;
+ struct nfit_test_sec *sec = &dimm_sec_info[dimm];
+
+ if (!(sec->ext_state & ND_INTEL_SEC_ESTATE_ENABLED)) {
+ nd_cmd->status = ND_INTEL_STATUS_NOT_SUPPORTED;
+ dev_dbg(dev, "master secure erase: in wrong state\n");
+ } else if (sec->ext_state & ND_INTEL_SEC_ESTATE_PLIMIT) {
+ nd_cmd->status = ND_INTEL_STATUS_INVALID_STATE;
+ dev_dbg(dev, "master secure erase: in wrong security state\n");
+ } else if (memcmp(nd_cmd->passphrase, sec->master_passphrase,
+ ND_INTEL_PASSPHRASE_SIZE) != 0) {
+ nd_cmd->status = ND_INTEL_STATUS_INVALID_PASS;
+ dev_dbg(dev, "master secure erase: wrong passphrase\n");
+ } else {
+ /* we do not erase master state passphrase ever */
+ sec->ext_state = ND_INTEL_SEC_ESTATE_ENABLED;
+ memset(sec->passphrase, 0, ND_INTEL_PASSPHRASE_SIZE);
+ sec->state = 0;
+ dev_dbg(dev, "master secure erase: done\n");
+ }
+
+ return 0;
+}
+
+
static int get_dimm(struct nfit_mem *nfit_mem, unsigned int func)
{
int i;
@@ -983,6 +1226,46 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
return i;
switch (func) {
+ case NVDIMM_INTEL_GET_SECURITY_STATE:
+ rc = nd_intel_test_cmd_security_status(t,
+ buf, buf_len, i);
+ break;
+ case NVDIMM_INTEL_UNLOCK_UNIT:
+ rc = nd_intel_test_cmd_unlock_unit(t,
+ buf, buf_len, i);
+ break;
+ case NVDIMM_INTEL_SET_PASSPHRASE:
+ rc = nd_intel_test_cmd_set_pass(t,
+ buf, buf_len, i);
+ break;
+ case NVDIMM_INTEL_DISABLE_PASSPHRASE:
+ rc = nd_intel_test_cmd_disable_pass(t,
+ buf, buf_len, i);
+ break;
+ case NVDIMM_INTEL_FREEZE_LOCK:
+ rc = nd_intel_test_cmd_freeze_lock(t,
+ buf, buf_len, i);
+ break;
+ case NVDIMM_INTEL_SECURE_ERASE:
+ rc = nd_intel_test_cmd_secure_erase(t,
+ buf, buf_len, i);
+ break;
+ case NVDIMM_INTEL_OVERWRITE:
+ rc = nd_intel_test_cmd_overwrite(t,
+ buf, buf_len, i - t->dcr_idx);
+ break;
+ case NVDIMM_INTEL_QUERY_OVERWRITE:
+ rc = nd_intel_test_cmd_query_overwrite(t,
+ buf, buf_len, i - t->dcr_idx);
+ break;
+ case NVDIMM_INTEL_SET_MASTER_PASSPHRASE:
+ rc = nd_intel_test_cmd_master_set_pass(t,
+ buf, buf_len, i);
+ break;
+ case NVDIMM_INTEL_MASTER_SECURE_ERASE:
+ rc = nd_intel_test_cmd_master_secure_erase(t,
+ buf, buf_len, i);
+ break;
case ND_INTEL_ENABLE_LSS_STATUS:
rc = nd_intel_test_cmd_set_lss_status(t,
buf, buf_len);
@@ -1328,10 +1611,22 @@ static ssize_t fail_cmd_code_store(struct device *dev, struct device_attribute *
}
static DEVICE_ATTR_RW(fail_cmd_code);
+static ssize_t lock_dimm_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t size)
+{
+ int dimm = dimm_name_to_id(dev);
+ struct nfit_test_sec *sec = &dimm_sec_info[dimm];
+
+ sec->state = ND_INTEL_SEC_STATE_ENABLED | ND_INTEL_SEC_STATE_LOCKED;
+ return size;
+}
+static DEVICE_ATTR_WO(lock_dimm);
+
static struct attribute *nfit_test_dimm_attributes[] = {
&dev_attr_fail_cmd.attr,
&dev_attr_fail_cmd_code.attr,
&dev_attr_handle.attr,
+ &dev_attr_lock_dimm.attr,
NULL,
};
@@ -1361,6 +1656,17 @@ static int nfit_test_dimm_init(struct nfit_test *t)
return 0;
}
+static void security_init(struct nfit_test *t)
+{
+ int i;
+
+ for (i = 0; i < t->num_dcr; i++) {
+ struct nfit_test_sec *sec = &dimm_sec_info[i];
+
+ sec->ext_state = ND_INTEL_SEC_ESTATE_ENABLED;
+ }
+}
+
static void smart_init(struct nfit_test *t)
{
int i;
@@ -1439,6 +1745,7 @@ static int nfit_test0_alloc(struct nfit_test *t)
if (nfit_test_dimm_init(t))
return -ENOMEM;
smart_init(t);
+ security_init(t);
return ars_state_init(&t->pdev.dev, &t->ars_state);
}
@@ -2210,6 +2517,20 @@ static void nfit_test0_setup(struct nfit_test *t)
set_bit(ND_INTEL_FW_FINISH_UPDATE, &acpi_desc->dimm_cmd_force_en);
set_bit(ND_INTEL_FW_FINISH_QUERY, &acpi_desc->dimm_cmd_force_en);
set_bit(ND_INTEL_ENABLE_LSS_STATUS, &acpi_desc->dimm_cmd_force_en);
+ set_bit(NVDIMM_INTEL_GET_SECURITY_STATE,
+ &acpi_desc->dimm_cmd_force_en);
+ set_bit(NVDIMM_INTEL_SET_PASSPHRASE, &acpi_desc->dimm_cmd_force_en);
+ set_bit(NVDIMM_INTEL_DISABLE_PASSPHRASE,
+ &acpi_desc->dimm_cmd_force_en);
+ set_bit(NVDIMM_INTEL_UNLOCK_UNIT, &acpi_desc->dimm_cmd_force_en);
+ set_bit(NVDIMM_INTEL_FREEZE_LOCK, &acpi_desc->dimm_cmd_force_en);
+ set_bit(NVDIMM_INTEL_SECURE_ERASE, &acpi_desc->dimm_cmd_force_en);
+ set_bit(NVDIMM_INTEL_OVERWRITE, &acpi_desc->dimm_cmd_force_en);
+ set_bit(NVDIMM_INTEL_QUERY_OVERWRITE, &acpi_desc->dimm_cmd_force_en);
+ set_bit(NVDIMM_INTEL_SET_MASTER_PASSPHRASE,
+ &acpi_desc->dimm_cmd_force_en);
+ set_bit(NVDIMM_INTEL_MASTER_SECURE_ERASE,
+ &acpi_desc->dimm_cmd_force_en);
}
static void nfit_test1_setup(struct nfit_test *t)