summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/devicetree/bindings/arc/archs-idu-intc.txt46
-rw-r--r--Documentation/devicetree/bindings/arc/archs-intc.txt22
-rw-r--r--Documentation/devicetree/bindings/arc/axs101.txt7
-rw-r--r--Documentation/devicetree/bindings/arc/axs103.txt8
-rw-r--r--Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt2
-rw-r--r--Documentation/filesystems/xfs.txt12
-rw-r--r--Kbuild1
-rw-r--r--MAINTAINERS7
-rw-r--r--arch/arc/Kconfig159
-rw-r--r--arch/arc/Makefile17
-rw-r--r--arch/arc/boot/dts/Makefile2
-rw-r--r--arch/arc/boot/dts/axc001.dtsi100
-rw-r--r--arch/arc/boot/dts/axc003.dtsi102
-rw-r--r--arch/arc/boot/dts/axc003_idu.dtsi126
-rw-r--r--arch/arc/boot/dts/axs101.dts21
-rw-r--r--arch/arc/boot/dts/axs103.dts24
-rw-r--r--arch/arc/boot/dts/axs103_idu.dts24
-rw-r--r--arch/arc/boot/dts/axs10x_mb.dtsi224
-rw-r--r--arch/arc/boot/dts/nsim_700.dts (renamed from arch/arc/boot/dts/angel4.dts)2
-rw-r--r--arch/arc/boot/dts/nsim_hs.dts53
-rw-r--r--arch/arc/boot/dts/nsim_hs_idu.dts72
-rw-r--r--arch/arc/boot/dts/nsimosci_hs.dts80
-rw-r--r--arch/arc/boot/dts/nsimosci_hs_idu.dts101
-rw-r--r--arch/arc/boot/dts/vdk_axc003.dtsi61
-rw-r--r--arch/arc/boot/dts/vdk_axc003_idu.dtsi76
-rw-r--r--arch/arc/boot/dts/vdk_axs10x_mb.dtsi93
-rw-r--r--arch/arc/boot/dts/vdk_hs38.dts21
-rw-r--r--arch/arc/boot/dts/vdk_hs38_smp.dts21
-rw-r--r--arch/arc/configs/axs101_defconfig111
-rw-r--r--arch/arc/configs/axs103_defconfig117
-rw-r--r--arch/arc/configs/axs103_smp_defconfig118
-rw-r--r--arch/arc/configs/nsim_700_defconfig7
-rw-r--r--arch/arc/configs/nsim_hs_defconfig64
-rw-r--r--arch/arc/configs/nsim_hs_smp_defconfig63
-rw-r--r--arch/arc/configs/nsimosci_defconfig5
-rw-r--r--arch/arc/configs/nsimosci_hs_defconfig73
-rw-r--r--arch/arc/configs/nsimosci_hs_smp_defconfig93
-rw-r--r--arch/arc/configs/tb10x_defconfig3
-rw-r--r--arch/arc/configs/vdk_hs38_defconfig102
-rw-r--r--arch/arc/configs/vdk_hs38_smp_defconfig104
-rw-r--r--arch/arc/include/asm/Kbuild1
-rw-r--r--arch/arc/include/asm/arcregs.h66
-rw-r--r--arch/arc/include/asm/atomic.h35
-rw-r--r--arch/arc/include/asm/barrier.h48
-rw-r--r--arch/arc/include/asm/bitops.h529
-rw-r--r--arch/arc/include/asm/cache.h18
-rw-r--r--arch/arc/include/asm/cacheflush.h4
-rw-r--r--arch/arc/include/asm/cmpxchg.h26
-rw-r--r--arch/arc/include/asm/delay.h9
-rw-r--r--arch/arc/include/asm/dma-mapping.h31
-rw-r--r--arch/arc/include/asm/elf.h5
-rw-r--r--arch/arc/include/asm/entry-arcv2.h190
-rw-r--r--arch/arc/include/asm/entry-compact.h307
-rw-r--r--arch/arc/include/asm/entry.h378
-rw-r--r--arch/arc/include/asm/io.h42
-rw-r--r--arch/arc/include/asm/irq.h6
-rw-r--r--arch/arc/include/asm/irqflags-arcv2.h124
-rw-r--r--arch/arc/include/asm/irqflags-compact.h183
-rw-r--r--arch/arc/include/asm/irqflags.h168
-rw-r--r--arch/arc/include/asm/mcip.h94
-rw-r--r--arch/arc/include/asm/mmu.h24
-rw-r--r--arch/arc/include/asm/pgtable.h10
-rw-r--r--arch/arc/include/asm/processor.h37
-rw-r--r--arch/arc/include/asm/ptrace.h43
-rw-r--r--arch/arc/include/asm/spinlock.h32
-rw-r--r--arch/arc/include/asm/thread_info.h1
-rw-r--r--arch/arc/include/asm/uaccess.h17
-rw-r--r--arch/arc/include/uapi/asm/page.h2
-rw-r--r--arch/arc/kernel/Makefile6
-rw-r--r--arch/arc/kernel/asm-offsets.c5
-rw-r--r--arch/arc/kernel/devtree.c2
-rw-r--r--arch/arc/kernel/entry-arcv2.S239
-rw-r--r--arch/arc/kernel/entry-compact.S393
-rw-r--r--arch/arc/kernel/entry.S527
-rw-r--r--arch/arc/kernel/head.S4
-rw-r--r--arch/arc/kernel/intc-arcv2.c143
-rw-r--r--arch/arc/kernel/intc-compact.c226
-rw-r--r--arch/arc/kernel/irq.c210
-rw-r--r--arch/arc/kernel/mcip.c341
-rw-r--r--arch/arc/kernel/perf_event.c7
-rw-r--r--arch/arc/kernel/process.c14
-rw-r--r--arch/arc/kernel/ptrace.c92
-rw-r--r--arch/arc/kernel/setup.c63
-rw-r--r--arch/arc/kernel/signal.c62
-rw-r--r--arch/arc/kernel/smp.c24
-rw-r--r--arch/arc/kernel/stacktrace.c18
-rw-r--r--arch/arc/kernel/time.c88
-rw-r--r--arch/arc/kernel/troubleshoot.c33
-rw-r--r--arch/arc/lib/Makefile6
-rw-r--r--arch/arc/lib/memcmp.S30
-rw-r--r--arch/arc/lib/memcpy-archs.S236
-rw-r--r--arch/arc/lib/memset-archs.S93
-rw-r--r--arch/arc/lib/strcmp-archs.S78
-rw-r--r--arch/arc/mm/Makefile2
-rw-r--r--arch/arc/mm/cache.c (renamed from arch/arc/mm/cache_arc700.c)552
-rw-r--r--arch/arc/mm/dma.c24
-rw-r--r--arch/arc/mm/tlb.c60
-rw-r--r--arch/arc/mm/tlbex.S44
-rw-r--r--arch/arc/plat-arcfpga/Kconfig33
-rw-r--r--arch/arc/plat-arcfpga/include/plat/smp.h118
-rw-r--r--arch/arc/plat-arcfpga/smp.c186
-rw-r--r--arch/arc/plat-axs10x/Kconfig46
-rw-r--r--arch/arc/plat-axs10x/Makefile9
-rw-r--r--arch/arc/plat-axs10x/axs10x.c484
-rw-r--r--arch/arc/plat-sim/Kconfig14
-rw-r--r--arch/arc/plat-sim/Makefile (renamed from arch/arc/plat-arcfpga/Makefile)3
-rw-r--r--arch/arc/plat-sim/platform.c (renamed from arch/arc/plat-arcfpga/platform.c)24
-rw-r--r--arch/arm/boot/dts/armada-370-xp.dtsi2
-rw-r--r--arch/arm/boot/dts/armada-370.dtsi8
-rw-r--r--arch/arm/boot/dts/armada-xp-mv78260.dtsi2
-rw-r--r--arch/arm/boot/dts/armada-xp-mv78460.dtsi2
-rw-r--r--arch/arm/boot/dts/armada-xp.dtsi10
-rw-r--r--arch/arm/include/asm/xen/hypervisor.h8
-rw-r--r--arch/arm/include/asm/xen/page.h1
-rw-r--r--arch/arm/kernel/setup.c2
-rw-r--r--arch/arm/mach-lpc32xx/irq.c8
-rw-r--r--arch/arm/xen/enlighten.c62
-rw-r--r--arch/arm/xen/mm.c2
-rw-r--r--arch/arm/xen/p2m.c2
-rw-r--r--arch/arm64/kernel/setup.c2
-rw-r--r--arch/avr32/mach-at32ap/extint.c3
-rw-r--r--arch/m68k/mac/psc.c12
-rw-r--r--arch/mips/ath25/ar2315.c4
-rw-r--r--arch/mips/ath25/ar5312.c4
-rw-r--r--arch/mips/pci/pci-ar2315.c4
-rw-r--r--arch/mips/ralink/irq.c3
-rw-r--r--arch/mn10300/kernel/irq.c6
-rw-r--r--arch/s390/configs/default_defconfig15
-rw-r--r--arch/s390/configs/gcov_defconfig13
-rw-r--r--arch/s390/configs/performance_defconfig16
-rw-r--r--arch/s390/defconfig12
-rw-r--r--arch/s390/include/asm/cpu.h2
-rw-r--r--arch/s390/include/asm/ipl.h1
-rw-r--r--arch/s390/include/asm/sclp.h18
-rw-r--r--arch/s390/include/asm/smp.h2
-rw-r--r--arch/s390/kernel/base.S21
-rw-r--r--arch/s390/kernel/crash_dump.c27
-rw-r--r--arch/s390/kernel/perf_cpum_sf.c2
-rw-r--r--arch/s390/kernel/setup.c5
-rw-r--r--arch/s390/kernel/smp.c150
-rw-r--r--arch/s390/net/bpf_jit_comp.c2
-rw-r--r--arch/tile/Kconfig7
-rw-r--r--arch/tile/include/asm/irq.h5
-rw-r--r--arch/tile/include/asm/processor.h2
-rw-r--r--arch/tile/include/asm/spinlock_32.h6
-rw-r--r--arch/tile/include/asm/spinlock_64.h5
-rw-r--r--arch/tile/include/asm/stack.h13
-rw-r--r--arch/tile/include/asm/thread_info.h1
-rw-r--r--arch/tile/include/asm/traps.h8
-rw-r--r--arch/tile/include/asm/uaccess.h66
-rw-r--r--arch/tile/include/asm/word-at-a-time.h36
-rw-r--r--arch/tile/include/hv/hypervisor.h60
-rw-r--r--arch/tile/kernel/entry.S7
-rw-r--r--arch/tile/kernel/hvglue.S3
-rw-r--r--arch/tile/kernel/hvglue_trace.c4
-rw-r--r--arch/tile/kernel/intvec_64.S6
-rw-r--r--arch/tile/kernel/process.c138
-rw-r--r--arch/tile/kernel/setup.c2
-rw-r--r--arch/tile/kernel/stack.c125
-rw-r--r--arch/tile/kernel/traps.c15
-rw-r--r--arch/tile/kernel/vdso/vgettimeofday.c10
-rw-r--r--arch/tile/lib/exports.c3
-rw-r--r--arch/tile/lib/spinlock_32.c11
-rw-r--r--arch/tile/lib/spinlock_64.c11
-rw-r--r--arch/tile/lib/usercopy_32.S46
-rw-r--r--arch/tile/lib/usercopy_64.S46
-rw-r--r--arch/tile/mm/fault.c17
-rw-r--r--arch/um/drivers/hostaudio_kern.c20
-rw-r--r--arch/x86/configs/xen.config28
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c2
-rw-r--r--arch/x86/kvm/mmu_audit.c2
-rw-r--r--arch/x86/platform/uv/uv_nmi.c2
-rw-r--r--block/scsi_ioctl.c4
-rw-r--r--drivers/acpi/resource.c4
-rw-r--r--drivers/base/property.c26
-rw-r--r--drivers/block/null_blk.c4
-rw-r--r--drivers/block/nvme-core.c138
-rw-r--r--drivers/block/xen-blkback/blkback.c23
-rw-r--r--drivers/block/xen-blkback/common.h6
-rw-r--r--drivers/block/xen-blkback/xenbus.c167
-rw-r--r--drivers/block/xen-blkfront.c157
-rw-r--r--drivers/char/ipmi/ipmi_watchdog.c6
-rw-r--r--drivers/clocksource/exynos_mct.c43
-rw-r--r--drivers/cpuidle/cpuidle-powernv.c15
-rw-r--r--drivers/crypto/qat/qat_common/adf_accel_engine.c5
-rw-r--r--drivers/crypto/qat/qat_common/adf_transport.c2
-rw-r--r--drivers/dma/dmatest.c4
-rw-r--r--drivers/gpio/gpio-bcm-kona.c5
-rw-r--r--drivers/gpio/gpio-dwapb.c4
-rw-r--r--drivers/gpio/gpio-msic.c3
-rw-r--r--drivers/gpio/gpiolib.c4
-rw-r--r--drivers/ide/ide.c2
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c4
-rw-r--r--drivers/input/misc/ati_remote2.c4
-rw-r--r--drivers/input/mouse/psmouse-base.c2
-rw-r--r--drivers/iommu/amd_iommu.c26
-rw-r--r--drivers/iommu/arm-smmu-v3.c3
-rw-r--r--drivers/iommu/arm-smmu.c2
-rw-r--r--drivers/iommu/iommu.c13
-rw-r--r--drivers/leds/leds-gpio.c2
-rw-r--r--drivers/mfd/asic3.c3
-rw-r--r--drivers/misc/lis3lv02d/lis3lv02d.c2
-rw-r--r--drivers/mtd/ubi/block.c2
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-desc.c2
-rw-r--r--drivers/net/ethernet/apm/xgene/xgene_enet_main.c2
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x.h12
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c32
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h12
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c58
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c43
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c60
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c2
-rw-r--r--drivers/net/ethernet/broadcom/genet/bcmgenet.h1
-rw-r--r--drivers/net/ethernet/broadcom/genet/bcmmii.c54
-rw-r--r--drivers/net/ethernet/cavium/liquidio/lio_ethtool.c5
-rw-r--r--drivers/net/ethernet/cavium/liquidio/octeon_device.c11
-rw-r--r--drivers/net/ethernet/cavium/liquidio/octeon_droq.c4
-rw-r--r--drivers/net/ethernet/cavium/liquidio/request_manager.c3
-rw-r--r--drivers/net/ethernet/cisco/enic/enic_main.c4
-rw-r--r--drivers/net/ethernet/cisco/enic/vnic_rq.h91
-rw-r--r--drivers/net/ethernet/freescale/Kconfig4
-rw-r--r--drivers/net/ethernet/freescale/fec.h2
-rw-r--r--drivers/net/ethernet/freescale/fec_main.c30
-rw-r--r--drivers/net/ethernet/icplus/ipg.c2
-rw-r--r--drivers/net/ethernet/icplus/ipg.h2
-rw-r--r--drivers/net/ethernet/intel/e1000e/ich8lan.c126
-rw-r--r--drivers/net/ethernet/intel/e1000e/netdev.c38
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_txrx.c4
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40evf.h1
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c6
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40evf_main.c110
-rw-r--r--drivers/net/ethernet/intel/igb/e1000_82575.c12
-rw-r--r--drivers/net/ethernet/intel/igb/e1000_defines.h3
-rw-r--r--drivers/net/ethernet/intel/igb/igb_main.c2
-rw-r--r--drivers/net/ethernet/marvell/mvneta.c27
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_netdev.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_rx.c17
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_tx.c20
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/intf.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mlx4_en.h2
-rw-r--r--drivers/net/ethernet/myricom/myri10ge/myri10ge.c6
-rw-r--r--drivers/net/ethernet/renesas/ravb_ptp.c4
-rw-r--r--drivers/net/ethernet/sis/sis900.h4
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/mmc_core.c4
-rw-r--r--drivers/net/ethernet/via/Kconfig2
-rw-r--r--drivers/net/phy/bcm7xxx.c7
-rw-r--r--drivers/net/phy/mdio-bcm-unimac.c43
-rw-r--r--drivers/net/phy/phy_device.c25
-rw-r--r--drivers/net/phy/vitesse.c14
-rw-r--r--drivers/net/vmxnet3/vmxnet3_int.h4
-rw-r--r--drivers/net/wireless/ath/wil6210/main.c4
-rw-r--r--drivers/net/wireless/libertas_tf/if_usb.c6
-rw-r--r--drivers/net/xen-netback/netback.c2
-rw-r--r--drivers/net/xen-netfront.c8
-rw-r--r--drivers/pci/host/pci-keystone.c7
-rw-r--r--drivers/pci/xen-pcifront.c8
-rw-r--r--drivers/pinctrl/mediatek/pinctrl-mtk-common.c3
-rw-r--r--drivers/pinctrl/pinctrl-adi2.c4
-rw-r--r--drivers/pinctrl/pinctrl-st.c4
-rw-r--r--drivers/pinctrl/samsung/pinctrl-exynos.c4
-rw-r--r--drivers/pinctrl/samsung/pinctrl-s3c24xx.c3
-rw-r--r--drivers/pinctrl/samsung/pinctrl-s3c64xx.c8
-rw-r--r--drivers/pinctrl/sunxi/pinctrl-sunxi.c6
-rw-r--r--drivers/power/test_power.c16
-rw-r--r--drivers/s390/char/sclp_cmd.c23
-rw-r--r--drivers/s390/char/sclp_early.c10
-rw-r--r--drivers/s390/char/zcore.c2
-rw-r--r--drivers/s390/crypto/ap_bus.c424
-rw-r--r--drivers/s390/crypto/ap_bus.h13
-rw-r--r--drivers/s390/crypto/zcrypt_cex4.c2
-rw-r--r--drivers/sh/intc/core.c5
-rw-r--r--drivers/sh/intc/virq.c3
-rw-r--r--drivers/thermal/intel_powerclamp.c4
-rw-r--r--drivers/tty/hvc/hvc_iucv.c2
-rw-r--r--drivers/tty/hvc/hvc_tile.c3
-rw-r--r--drivers/tty/hvc/hvc_xen.c2
-rw-r--r--drivers/tty/sysrq.c2
-rw-r--r--drivers/usb/atm/ueagle-atm.c4
-rw-r--r--drivers/video/fbdev/uvesafb.c2
-rw-r--r--drivers/video/fbdev/vt8623fb.c4
-rw-r--r--drivers/virtio/virtio_mmio.c2
-rw-r--r--drivers/xen/events/events_base.c2
-rw-r--r--drivers/xen/events/events_fifo.c2
-rw-r--r--drivers/xen/gntdev.c2
-rw-r--r--drivers/xen/grant-table.c1
-rw-r--r--drivers/xen/manage.c2
-rw-r--r--drivers/xen/tmem.c4
-rw-r--r--drivers/xen/xenbus/xenbus_client.c8
-rw-r--r--drivers/xen/xenbus/xenbus_probe.c2
-rw-r--r--fs/block_dev.c2
-rw-r--r--fs/btrfs/async-thread.c1
-rw-r--r--fs/btrfs/async-thread.h2
-rw-r--r--fs/btrfs/backref.c59
-rw-r--r--fs/btrfs/ctree.c16
-rw-r--r--fs/btrfs/ctree.h28
-rw-r--r--fs/btrfs/delayed-ref.c372
-rw-r--r--fs/btrfs/delayed-ref.h29
-rw-r--r--fs/btrfs/dev-replace.c7
-rw-r--r--fs/btrfs/disk-io.c56
-rw-r--r--fs/btrfs/extent-tree.c308
-rw-r--r--fs/btrfs/extent-tree.h0
-rw-r--r--fs/btrfs/extent_io.c9
-rw-r--r--fs/btrfs/file.c9
-rw-r--r--fs/btrfs/free-space-cache.c14
-rw-r--r--fs/btrfs/inode.c26
-rw-r--r--fs/btrfs/ioctl.c50
-rw-r--r--fs/btrfs/ordered-data.c37
-rw-r--r--fs/btrfs/ordered-data.h6
-rw-r--r--fs/btrfs/qgroup.c1052
-rw-r--r--fs/btrfs/qgroup.h61
-rw-r--r--fs/btrfs/relocation.c19
-rw-r--r--fs/btrfs/scrub.c26
-rw-r--r--fs/btrfs/send.c147
-rw-r--r--fs/btrfs/super.c397
-rw-r--r--fs/btrfs/sysfs.c148
-rw-r--r--fs/btrfs/sysfs.h8
-rw-r--r--fs/btrfs/tests/qgroup-tests.c109
-rw-r--r--fs/btrfs/transaction.c79
-rw-r--r--fs/btrfs/transaction.h24
-rw-r--r--fs/btrfs/tree-defrag.c3
-rw-r--r--fs/btrfs/tree-log.c6
-rw-r--r--fs/btrfs/ulist.c47
-rw-r--r--fs/btrfs/ulist.h1
-rw-r--r--fs/btrfs/volumes.c186
-rw-r--r--fs/btrfs/volumes.h9
-rw-r--r--fs/cifs/Kconfig9
-rw-r--r--fs/cifs/cifsglob.h13
-rw-r--r--fs/cifs/cifspdu.h12
-rw-r--r--fs/cifs/cifssmb.c5
-rw-r--r--fs/cifs/connect.c13
-rw-r--r--fs/cifs/ioctl.c27
-rw-r--r--fs/cifs/smb2ops.c180
-rw-r--r--fs/cifs/smb2pdu.c67
-rw-r--r--fs/cifs/smb2pdu.h81
-rw-r--r--fs/cifs/smbfsctl.h3
-rw-r--r--fs/dax.c34
-rw-r--r--fs/ext2/file.c4
-rw-r--r--fs/ext4/file.c16
-rw-r--r--fs/ext4/inode.c21
-rw-r--r--fs/nfs/super.c2
-rw-r--r--fs/seq_file.c1
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c281
-rw-r--r--fs/xfs/libxfs/xfs_alloc.h10
-rw-r--r--fs/xfs/libxfs/xfs_attr.c25
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c29
-rw-r--r--fs/xfs/libxfs/xfs_format.h65
-rw-r--r--fs/xfs/libxfs/xfs_fs.h1
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c542
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.h15
-rw-r--r--fs/xfs/libxfs/xfs_ialloc_btree.c93
-rw-r--r--fs/xfs/libxfs/xfs_ialloc_btree.h10
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.c8
-rw-r--r--fs/xfs/libxfs/xfs_sb.c34
-rw-r--r--fs/xfs/libxfs/xfs_shared.h6
-rw-r--r--fs/xfs/libxfs/xfs_trans_resv.h4
-rw-r--r--fs/xfs/libxfs/xfs_trans_space.h2
-rw-r--r--fs/xfs/xfs_aops.c158
-rw-r--r--fs/xfs/xfs_aops.h7
-rw-r--r--fs/xfs/xfs_attr_inactive.c16
-rw-r--r--fs/xfs/xfs_bmap_util.c89
-rw-r--r--fs/xfs/xfs_buf.c6
-rw-r--r--fs/xfs/xfs_buf.h2
-rw-r--r--fs/xfs/xfs_dquot.c8
-rw-r--r--fs/xfs/xfs_error.c4
-rw-r--r--fs/xfs/xfs_error.h4
-rw-r--r--fs/xfs/xfs_extfree_item.c2
-rw-r--r--fs/xfs/xfs_file.c166
-rw-r--r--fs/xfs/xfs_filestream.c3
-rw-r--r--fs/xfs/xfs_fsops.c10
-rw-r--r--fs/xfs/xfs_inode.c204
-rw-r--r--fs/xfs/xfs_ioctl.c14
-rw-r--r--fs/xfs/xfs_iomap.c18
-rw-r--r--fs/xfs/xfs_iops.c48
-rw-r--r--fs/xfs/xfs_itable.c13
-rw-r--r--fs/xfs/xfs_linux.h14
-rw-r--r--fs/xfs/xfs_log.c51
-rw-r--r--fs/xfs/xfs_log.h13
-rw-r--r--fs/xfs/xfs_log_cil.c12
-rw-r--r--fs/xfs/xfs_log_priv.h2
-rw-r--r--fs/xfs/xfs_log_recover.c97
-rw-r--r--fs/xfs/xfs_mount.c16
-rw-r--r--fs/xfs/xfs_mount.h4
-rw-r--r--fs/xfs/xfs_pnfs.c4
-rw-r--r--fs/xfs/xfs_qm.c7
-rw-r--r--fs/xfs/xfs_qm_syscalls.c20
-rw-r--r--fs/xfs/xfs_quota.h1
-rw-r--r--fs/xfs/xfs_rtalloc.c16
-rw-r--r--fs/xfs/xfs_super.c25
-rw-r--r--fs/xfs/xfs_symlink.c19
-rw-r--r--fs/xfs/xfs_trace.h47
-rw-r--r--fs/xfs/xfs_trans.c91
-rw-r--r--fs/xfs/xfs_trans.h7
-rw-r--r--fs/xfs/xfs_trans_ail.c6
-rw-r--r--fs/xfs/xfs_trans_dquot.c32
-rw-r--r--fs/xfs/xfs_trans_priv.h2
-rw-r--r--include/acpi/acpi_bus.h2
-rw-r--r--include/acpi/video.h2
-rw-r--r--include/asm-generic/barrier.h28
-rw-r--r--include/linux/acpi.h4
-rw-r--r--include/linux/compiler.h15
-rw-r--r--include/linux/fs.h9
-rw-r--r--include/linux/irq.h1
-rw-r--r--include/linux/irqdesc.h49
-rw-r--r--include/linux/irqnr.h6
-rw-r--r--include/linux/kernel.h18
-rw-r--r--include/linux/module.h46
-rw-r--r--include/linux/moduleparam.h99
-rw-r--r--include/linux/of.h4
-rw-r--r--include/linux/rbtree.h16
-rw-r--r--include/linux/rbtree_augmented.h21
-rw-r--r--include/linux/rbtree_latch.h212
-rw-r--r--include/linux/rcupdate.h15
-rw-r--r--include/linux/seqlock.h81
-rw-r--r--include/net/ax25.h16
-rw-r--r--include/net/sock.h2
-rw-r--r--include/trace/events/btrfs.h55
-rw-r--r--include/uapi/linux/in.h16
-rw-r--r--include/uapi/linux/libc-compat.h22
-rw-r--r--init/Kconfig29
-rw-r--r--kernel/configs/xen.config48
-rw-r--r--kernel/jump_label.c10
-rw-r--r--kernel/module.c309
-rw-r--r--kernel/params.c116
-rw-r--r--kernel/power/Kconfig2
-rw-r--r--kernel/power/hibernate.c4
-rw-r--r--kernel/time/Makefile2
-rw-r--r--kernel/time/timekeeping.c29
-rw-r--r--kernel/time/timer.c3
-rw-r--r--kernel/workqueue.c7
-rw-r--r--lib/bug.c7
-rw-r--r--lib/kobject.c1
-rw-r--r--lib/rbtree.c76
-rw-r--r--mm/slab_common.c2
-rw-r--r--net/ax25/af_ax25.c30
-rw-r--r--net/ax25/ax25_in.c2
-rw-r--r--net/core/flow_dissector.c2
-rw-r--r--net/core/sock.c3
-rw-r--r--net/dsa/slave.c2
-rw-r--r--net/ipv4/fib_semantics.c4
-rw-r--r--net/mac80211/rate.c4
-rw-r--r--net/sched/cls_flower.c4
-rw-r--r--net/sctp/output.c4
-rw-r--r--net/sctp/socket.c6
-rw-r--r--net/sunrpc/auth.c2
-rw-r--r--net/sunrpc/xprtsock.c6
-rw-r--r--net/tipc/bcast.c5
-rw-r--r--net/tipc/link.c2
-rw-r--r--net/tipc/link.h1
-rw-r--r--scripts/kconfig/Makefile7
-rw-r--r--scripts/sortextable.c5
-rw-r--r--security/apparmor/lsm.c6
-rw-r--r--security/integrity/ima/ima_crypto.c2
-rw-r--r--sound/core/ctljack.c5
-rw-r--r--sound/core/init.c3
-rw-r--r--sound/pci/hda/hda_intel.c6
-rw-r--r--sound/pci/hda/patch_hdmi.c2
-rw-r--r--sound/pci/hda/patch_realtek.c44
-rw-r--r--sound/pci/hda/patch_via.c8
458 files changed, 13810 insertions, 6643 deletions
diff --git a/Documentation/devicetree/bindings/arc/archs-idu-intc.txt b/Documentation/devicetree/bindings/arc/archs-idu-intc.txt
new file mode 100644
index 000000000000..0dcb7c7d3e40
--- /dev/null
+++ b/Documentation/devicetree/bindings/arc/archs-idu-intc.txt
@@ -0,0 +1,46 @@
+* ARC-HS Interrupt Distribution Unit
+
+ This optional 2nd level interrupt controller can be used in SMP configurations for
+ dynamic IRQ routing, load balancing of common/external IRQs towards core intc.
+
+Properties:
+
+- compatible: "snps,archs-idu-intc"
+- interrupt-controller: This is an interrupt controller.
+- interrupt-parent: <reference to parent core intc>
+- #interrupt-cells: Must be <2>.
+- interrupts: <...> specifies the upstream core irqs
+
+ First cell specifies the "common" IRQ from peripheral to IDU
+ Second cell specifies the irq distribution mode to cores
+ 0=Round Robin; 1=cpu0, 2=cpu1, 4=cpu2, 8=cpu3
+
+ intc accessed via the special ARC AUX register interface, hence "reg" property
+ is not specified.
+
+Example:
+ core_intc: core-interrupt-controller {
+ compatible = "snps,archs-intc";
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ };
+
+ idu_intc: idu-interrupt-controller {
+ compatible = "snps,archs-idu-intc";
+ interrupt-controller;
+ interrupt-parent = <&core_intc>;
+
+ /*
+ * <hwirq distribution>
+ * distribution: 0=RR; 1=cpu0, 2=cpu1, 4=cpu2, 8=cpu3
+ */
+ #interrupt-cells = <2>;
+
+ /* upstream core irqs: downstream these are "COMMON" irq 0,1.. */
+ interrupts = <24 25 26 27 28 29 30 31>;
+ };
+
+ some_device: serial@c0fc1000 {
+ interrupt-parent = <&idu_intc>;
+ interrupts = <0 0>; /* upstream idu IRQ #24, Round Robin */
+ };
diff --git a/Documentation/devicetree/bindings/arc/archs-intc.txt b/Documentation/devicetree/bindings/arc/archs-intc.txt
new file mode 100644
index 000000000000..69f326d6a5ad
--- /dev/null
+++ b/Documentation/devicetree/bindings/arc/archs-intc.txt
@@ -0,0 +1,22 @@
+* ARC-HS incore Interrupt Controller (Provided by cores implementing ARCv2 ISA)
+
+Properties:
+
+- compatible: "snps,archs-intc"
+- interrupt-controller: This is an interrupt controller.
+- #interrupt-cells: Must be <1>.
+
+ Single Cell "interrupts" property of a device specifies the IRQ number
+ between 16 to 256
+
+ intc accessed via the special ARC AUX register interface, hence "reg" property
+ is not specified.
+
+Example:
+
+ intc: interrupt-controller {
+ compatible = "snps,archs-intc";
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ interrupts = <16 17 18 19 20 21 22 23 24 25>;
+ };
diff --git a/Documentation/devicetree/bindings/arc/axs101.txt b/Documentation/devicetree/bindings/arc/axs101.txt
new file mode 100644
index 000000000000..48290d5178b5
--- /dev/null
+++ b/Documentation/devicetree/bindings/arc/axs101.txt
@@ -0,0 +1,7 @@
+Synopsys DesignWare ARC Software Development Platforms Device Tree Bindings
+---------------------------------------------------------------------------
+
+SDP Main Board with an AXC001 CPU Card hoisting ARC700 core in silicon
+
+Required root node properties:
+ - compatible = "snps,axs101", "snps,arc-sdp";
diff --git a/Documentation/devicetree/bindings/arc/axs103.txt b/Documentation/devicetree/bindings/arc/axs103.txt
new file mode 100644
index 000000000000..6eea862e72b9
--- /dev/null
+++ b/Documentation/devicetree/bindings/arc/axs103.txt
@@ -0,0 +1,8 @@
+Synopsys DesignWare ARC Software Development Platforms Device Tree Bindings
+---------------------------------------------------------------------------
+
+SDP Main Board with an AXC003 FPGA Card which can contain various flavours of
+HS38x cores.
+
+Required root node properties:
+ - compatible = "snps,axs103", "snps,arc-sdp";
diff --git a/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt b/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt
index 750d577e8083..f5a8ca29aff0 100644
--- a/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt
+++ b/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt
@@ -1,7 +1,7 @@
* Marvell Armada 370 / Armada XP Ethernet Controller (NETA)
Required properties:
-- compatible: should be "marvell,armada-370-neta".
+- compatible: "marvell,armada-370-neta" or "marvell,armada-xp-neta".
- reg: address and length of the register set for the device.
- interrupts: interrupt for the device
- phy: See ethernet.txt file in the same directory.
diff --git a/Documentation/filesystems/xfs.txt b/Documentation/filesystems/xfs.txt
index 5a5a05582b58..8146e9fd5ffc 100644
--- a/Documentation/filesystems/xfs.txt
+++ b/Documentation/filesystems/xfs.txt
@@ -236,10 +236,10 @@ Removed Mount Options
Name Removed
---- -------
- delaylog/nodelaylog v3.20
- ihashsize v3.20
- irixsgid v3.20
- osyncisdsync/osyncisosync v3.20
+ delaylog/nodelaylog v4.0
+ ihashsize v4.0
+ irixsgid v4.0
+ osyncisdsync/osyncisosync v4.0
sysctls
@@ -346,5 +346,5 @@ Removed Sysctls
Name Removed
---- -------
- fs.xfs.xfsbufd_centisec v3.20
- fs.xfs.age_buffer_centisecs v3.20
+ fs.xfs.xfsbufd_centisec v4.0
+ fs.xfs.age_buffer_centisecs v4.0
diff --git a/Kbuild b/Kbuild
index df99a5f53beb..f55cefd9bf29 100644
--- a/Kbuild
+++ b/Kbuild
@@ -52,7 +52,6 @@ $(obj)/$(bounds-file): kernel/bounds.s FORCE
timeconst-file := include/generated/timeconst.h
-#always += $(timeconst-file)
targets += $(timeconst-file)
quiet_cmd_gentimeconst = GEN $@
diff --git a/MAINTAINERS b/MAINTAINERS
index af61ea8d2162..058b0fbc52ff 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9828,6 +9828,13 @@ F: arch/arc/
F: Documentation/devicetree/bindings/arc/
F: drivers/tty/serial/arc_uart.c
+SYNOPSYS ARC SDP platform support
+M: Alexey Brodkin <abrodkin@synopsys.com>
+S: Supported
+F: arch/arc/plat-axs10x
+F: arch/arc/boot/dts/ax*
+F: Documentation/devicetree/bindings/arc/axs10*
+
SYSTEM CONFIGURATION (SYSCON)
M: Lee Jones <lee.jones@linaro.org>
M: Arnd Bergmann <arnd@arndb.de>
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index df94ac1f75b6..e7cee0a5c56d 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -81,17 +81,37 @@ menu "ARC Architecture Configuration"
menu "ARC Platform/SoC/Board"
-source "arch/arc/plat-arcfpga/Kconfig"
+source "arch/arc/plat-sim/Kconfig"
source "arch/arc/plat-tb10x/Kconfig"
+source "arch/arc/plat-axs10x/Kconfig"
#New platform adds here
endmenu
+choice
+ prompt "ARC Instruction Set"
+ default ISA_ARCOMPACT
+
+config ISA_ARCOMPACT
+ bool "ARCompact ISA"
+ help
+ The original ARC ISA of ARC600/700 cores
+
+config ISA_ARCV2
+ bool "ARC ISA v2"
+ help
+ ISA for the Next Generation ARC-HS cores
+
+endchoice
+
menu "ARC CPU Configuration"
choice
prompt "ARC Core"
- default ARC_CPU_770
+ default ARC_CPU_770 if ISA_ARCOMPACT
+ default ARC_CPU_HS if ISA_ARCV2
+
+if ISA_ARCOMPACT
config ARC_CPU_750D
bool "ARC750D"
@@ -100,7 +120,7 @@ config ARC_CPU_750D
config ARC_CPU_770
bool "ARC770"
- select ARC_CPU_REL_4_10
+ select ARC_HAS_SWAPE
help
Support for ARC770 core introduced with Rel 4.10 (Summer 2011)
This core has a bunch of cool new features:
@@ -109,6 +129,27 @@ config ARC_CPU_770
-Caches: New Prog Model, Region Flush
-Insns: endian swap, load-locked/store-conditional, time-stamp-ctr
+endif #ISA_ARCOMPACT
+
+config ARC_CPU_HS
+ bool "ARC-HS"
+ depends on ISA_ARCV2
+ help
+ Support for ARC HS38x Cores based on ARCv2 ISA
+ The notable features are:
+ - SMP configurations of upto 4 core with coherency
+ - Optional L2 Cache and IO-Coherency
+ - Revised Interrupt Architecture (multiple priorites, reg banks,
+ auto stack switch, auto regfile save/restore)
+ - MMUv4 (PIPT dcache, Huge Pages)
+ - Instructions for
+ * 64bit load/store: LDD, STD
+ * Hardware assisted divide/remainder: DIV, REM
+ * Function prologue/epilogue: ENTER_S, LEAVE_S
+ * IRQ enable/disable: CLRI, SETI
+ * pop count: FFS, FLS
+ * SETcc, BMSKN, XBFU...
+
endchoice
config CPU_BIG_ENDIAN
@@ -117,17 +158,13 @@ config CPU_BIG_ENDIAN
help
Build kernel for Big Endian Mode of ARC CPU
-# If a platform can't work with 0x8000_0000 based dma_addr_t
-config ARC_PLAT_NEEDS_CPU_TO_DMA
- bool
-
config SMP
- bool "Symmetric Multi-Processing (Incomplete)"
+ bool "Symmetric Multi-Processing"
default n
+ select ARC_HAS_COH_CACHES if ISA_ARCV2
+ select ARC_MCIP if ISA_ARCV2
help
- This enables support for systems with more than one CPU. If you have
- a system with only one CPU, say N. If you have a system with more
- than one CPU, say Y.
+ This enables support for systems with more than one CPU.
if SMP
@@ -137,13 +174,20 @@ config ARC_HAS_COH_CACHES
config ARC_HAS_REENTRANT_IRQ_LV2
def_bool n
-endif
+config ARC_MCIP
+ bool "ARConnect Multicore IP (MCIP) Support "
+ depends on ISA_ARCV2
+ help
+ This IP block enables SMP in ARC-HS38 cores.
+ It provides for cross-core interrupts, multi-core debug
+ hardware semaphores, shared memory,....
config NR_CPUS
int "Maximum number of CPUs (2-4096)"
range 2 4096
- depends on SMP
- default "2"
+ default "4"
+
+endif #SMP
menuconfig ARC_CACHE
bool "Enable Cache Support"
@@ -185,7 +229,7 @@ config ARC_CACHE_PAGES
config ARC_CACHE_VIPT_ALIASING
bool "Support VIPT Aliasing D$"
- depends on ARC_HAS_DCACHE
+ depends on ARC_HAS_DCACHE && ISA_ARCOMPACT
default n
endif #ARC_CACHE
@@ -226,9 +270,10 @@ config ARC_HAS_HW_MPY
Multipler. Otherwise software multipy lib is used
choice
- prompt "ARC700 MMU Version"
+ prompt "MMU Version"
default ARC_MMU_V3 if ARC_CPU_770
default ARC_MMU_V2 if ARC_CPU_750D
+ default ARC_MMU_V4 if ARC_CPU_HS
config ARC_MMU_V1
bool "MMU v1"
@@ -249,6 +294,10 @@ config ARC_MMU_V3
Variable Page size (1k-16k), var JTLB size 128 x (2 or 4)
Shared Address Spaces (SASID)
+config ARC_MMU_V4
+ bool "MMU v4"
+ depends on ISA_ARCV2
+
endchoice
@@ -271,6 +320,8 @@ config ARC_PAGE_SIZE_4K
endchoice
+if ISA_ARCOMPACT
+
config ARC_COMPACT_IRQ_LEVELS
bool "ARCompact IRQ Priorities: High(2)/Low(1)"
default n
@@ -290,7 +341,7 @@ config ARC_IRQ5_LV2
config ARC_IRQ6_LV2
bool
-endif
+endif #ARC_COMPACT_IRQ_LEVELS
config ARC_FPU_SAVE_RESTORE
bool "Enable FPU state persistence across context switch"
@@ -303,32 +354,53 @@ config ARC_FPU_SAVE_RESTORE
based on actual usage of FPU by a task. Thus our implemn does
this for all tasks in system.
+endif #ISA_ARCOMPACT
+
config ARC_CANT_LLSC
def_bool n
-menuconfig ARC_CPU_REL_4_10
- bool "Enable support for Rel 4.10 features"
- default n
- help
- -ARC770 (and dependent features) enabled
- -ARC750 also shares some of the new features with 770
-
config ARC_HAS_LLSC
bool "Insn: LLOCK/SCOND (efficient atomic ops)"
default y
- depends on ARC_CPU_770 && !ARC_CANT_LLSC
+ depends on !ARC_CPU_750D && !ARC_CANT_LLSC
config ARC_HAS_SWAPE
bool "Insn: SWAPE (endian-swap)"
default y
- depends on ARC_CPU_REL_4_10
-config ARC_HAS_RTSC
- bool "Insn: RTSC (64-bit r/o cycle counter)"
+if ISA_ARCV2
+
+config ARC_HAS_LL64
+ bool "Insn: 64bit LDD/STD"
+ help
+ Enable gcc to generate 64-bit load/store instructions
+ ISA mandates even/odd registers to allow encoding of two
+ dest operands with 2 possible source operands.
default y
- depends on ARC_CPU_REL_4_10
+
+config ARC_HAS_RTC
+ bool "Local 64-bit r/o cycle counter"
+ default n
depends on !SMP
+config ARC_HAS_GRTC
+ bool "SMP synchronized 64-bit cycle counter"
+ default y
+ depends on SMP
+
+config ARC_NUMBER_OF_INTERRUPTS
+ int "Number of interrupts"
+ range 8 240
+ default 32
+ help
+ This defines the number of interrupts on the ARCv2HS core.
+ It affects the size of vector table.
+ The initial 8 IRQs are fixed (Timer, ICI etc) and although configurable
+ in hardware, it keep things simple for Linux to assume they are always
+ present.
+
+endif # ISA_ARCV2
+
endmenu # "ARC CPU Configuration"
config LINUX_LINK_BASE
@@ -354,8 +426,10 @@ config ARC_CURR_IN_REG
config ARC_EMUL_UNALIGNED
bool "Emulate unaligned memory access (userspace only)"
+ default N
select SYSCTL_ARCH_UNALIGN_NO_WARN
select SYSCTL_ARCH_UNALIGN_ALLOW
+ depends on ISA_ARCOMPACT
help
This enables misaligned 16 & 32 bit memory access from user space.
Use ONLY-IF-ABS-NECESSARY as it will be very slow and also can hide
@@ -378,9 +452,10 @@ menuconfig ARC_DBG
bool "ARC debugging"
default y
+if ARC_DBG
+
config ARC_DW2_UNWIND
bool "Enable DWARF specific kernel stack unwind"
- depends on ARC_DBG
default y
select KALLSYMS
help
@@ -394,18 +469,38 @@ config ARC_DW2_UNWIND
config ARC_DBG_TLB_PARANOIA
bool "Paranoia Checks in Low Level TLB Handlers"
- depends on ARC_DBG
default n
config ARC_DBG_TLB_MISS_COUNT
bool "Profile TLB Misses"
default n
select DEBUG_FS
- depends on ARC_DBG
help
Counts number of I and D TLB Misses and exports them via Debugfs
The counters can be cleared via Debugfs as well
+if SMP
+
+config ARC_IPI_DBG
+ bool "Debug Inter Core interrupts"
+ default n
+
+endif
+
+endif
+
+config ARC_UBOOT_SUPPORT
+ bool "Support uboot arg Handling"
+ default n
+ help
+ ARC Linux by default checks for uboot provided args as pointers to
+ external cmdline or DTB. This however breaks in absence of uboot,
+ when booting from Metaware debugger directly, as the registers are
+ not zeroed out on reset by mdb and/or ARCv2 based cores. The bogus
+ registers look like uboot args to kernel which then chokes.
+ So only enable the uboot arg checking/processing if users are sure
+ of uboot being in play.
+
config ARC_BUILTIN_DTB_NAME
string "Built in DTB"
help
diff --git a/arch/arc/Makefile b/arch/arc/Makefile
index db72fec0e160..6107062c0111 100644
--- a/arch/arc/Makefile
+++ b/arch/arc/Makefile
@@ -9,12 +9,14 @@
UTS_MACHINE := arc
ifeq ($(CROSS_COMPILE),)
-CROSS_COMPILE := arc-linux-uclibc-
+CROSS_COMPILE := arc-linux-
endif
KBUILD_DEFCONFIG := nsim_700_defconfig
-cflags-y += -mA7 -fno-common -pipe -fno-builtin -D__linux__
+cflags-y += -fno-common -pipe -fno-builtin -D__linux__
+cflags-$(CONFIG_ISA_ARCOMPACT) += -mA7
+cflags-$(CONFIG_ISA_ARCV2) += -mcpu=archs
ifdef CONFIG_ARC_CURR_IN_REG
# For a global register defintion, make sure it gets passed to every file
@@ -33,7 +35,11 @@ cflags-$(atleast_gcc44) += -fsection-anchors
cflags-$(CONFIG_ARC_HAS_LLSC) += -mlock
cflags-$(CONFIG_ARC_HAS_SWAPE) += -mswape
-cflags-$(CONFIG_ARC_HAS_RTSC) += -mrtsc
+
+ifndef CONFIG_ARC_HAS_LL64
+cflags-$(CONFIG_ISA_ARCV2) += -mno-ll64
+endif
+
cflags-$(CONFIG_ARC_DW2_UNWIND) += -fasynchronous-unwind-tables
# By default gcc 4.8 generates dwarf4 which kernel unwinder can't grok
@@ -81,8 +87,9 @@ core-y += arch/arc/
# w/o this dtb won't embed into kernel binary
core-y += arch/arc/boot/dts/
-core-$(CONFIG_ARC_PLAT_FPGA_LEGACY) += arch/arc/plat-arcfpga/
-core-$(CONFIG_ARC_PLAT_TB10X) += arch/arc/plat-tb10x/
+core-$(CONFIG_ARC_PLAT_SIM) += arch/arc/plat-sim/
+core-$(CONFIG_ARC_PLAT_TB10X) += arch/arc/plat-tb10x/
+core-$(CONFIG_ARC_PLAT_AXS10X) += arch/arc/plat-axs10x/
drivers-$(CONFIG_OPROFILE) += arch/arc/oprofile/
diff --git a/arch/arc/boot/dts/Makefile b/arch/arc/boot/dts/Makefile
index faf240e29ec2..b0e3f19bbd07 100644
--- a/arch/arc/boot/dts/Makefile
+++ b/arch/arc/boot/dts/Makefile
@@ -1,5 +1,5 @@
# Built-in dtb
-builtindtb-y := angel4
+builtindtb-y := nsim_700
ifneq ($(CONFIG_ARC_BUILTIN_DTB_NAME),"")
builtindtb-y := $(patsubst "%",%,$(CONFIG_ARC_BUILTIN_DTB_NAME))
diff --git a/arch/arc/boot/dts/axc001.dtsi b/arch/arc/boot/dts/axc001.dtsi
new file mode 100644
index 000000000000..a5e2726a067e
--- /dev/null
+++ b/arch/arc/boot/dts/axc001.dtsi
@@ -0,0 +1,100 @@
+/*
+ * Copyright (C) 2013-15 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * Device tree for AXC001 770D/EM6/AS221 CPU card
+ * Note that this file only supports the 770D CPU
+ */
+
+/ {
+ compatible = "snps,arc";
+ clock-frequency = <750000000>; /* 750 MHZ */
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ cpu_card {
+ compatible = "simple-bus";
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ ranges = <0x00000000 0xf0000000 0x10000000>;
+
+ cpu_intc: arc700-intc@cpu {
+ compatible = "snps,arc700-intc";
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ };
+
+ /*
+ * this GPIO block ORs all interrupts on CPU card (creg,..)
+ * to uplink only 1 IRQ to ARC core intc
+ */
+ dw-apb-gpio@0x2000 {
+ compatible = "snps,dw-apb-gpio";
+ reg = < 0x2000 0x80 >;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ ictl_intc: gpio-controller@0 {
+ compatible = "snps,dw-apb-gpio-port";
+ gpio-controller;
+ #gpio-cells = <2>;
+ snps,nr-gpios = <30>;
+ reg = <0>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ interrupt-parent = <&cpu_intc>;
+ interrupts = <15>;
+ };
+ };
+
+ debug_uart: dw-apb-uart@0x5000 {
+ compatible = "snps,dw-apb-uart";
+ reg = <0x5000 0x100>;
+ clock-frequency = <33333000>;
+ interrupt-parent = <&ictl_intc>;
+ interrupts = <19 4>;
+ baud = <115200>;
+ reg-shift = <2>;
+ reg-io-width = <4>;
+ };
+
+ arcpmu0: pmu {
+ compatible = "snps,arc700-pct";
+ };
+ };
+
+ /*
+ * This INTC is actually connected to DW APB GPIO
+ * which acts as a wire between MB INTC and CPU INTC.
+ * GPIO INTC is configured in platform init code
+ * and here we mimic direct connection from MB INTC to
+ * CPU INTC, thus we set "interrupts = <7>" instead of
+ * "interrupts = <12>"
+ *
+ * This intc actually resides on MB, but we move it here to
+ * avoid duplicating the MB dtsi file given that IRQ from
+ * this intc to cpu intc are different for axs101 and axs103
+ */
+ mb_intc: dw-apb-ictl@0xe0012000 {
+ #interrupt-cells = <1>;
+ compatible = "snps,dw-apb-ictl";
+ reg = < 0xe0012000 0x200 >;
+ interrupt-controller;
+ interrupt-parent = <&cpu_intc>;
+ interrupts = < 7 >;
+ };
+
+ memory {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges = <0x00000000 0x80000000 0x40000000>;
+ device_type = "memory";
+ reg = <0x00000000 0x20000000>; /* 512MiB */
+ };
+};
diff --git a/arch/arc/boot/dts/axc003.dtsi b/arch/arc/boot/dts/axc003.dtsi
new file mode 100644
index 000000000000..15c8d6226c9d
--- /dev/null
+++ b/arch/arc/boot/dts/axc003.dtsi
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * Device tree for AXC003 CPU card: HS38x UP configuration
+ */
+
+/ {
+ compatible = "snps,arc";
+ clock-frequency = <75000000>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ cpu_card {
+ compatible = "simple-bus";
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ ranges = <0x00000000 0xf0000000 0x10000000>;
+
+ cpu_intc: archs-intc@cpu {
+ compatible = "snps,archs-intc";
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ };
+
+ /*
+ * this GPIO block ORs all interrupts on CPU card (creg,..)
+ * to uplink only 1 IRQ to ARC core intc
+ */
+ dw-apb-gpio@0x2000 {
+ compatible = "snps,dw-apb-gpio";
+ reg = < 0x2000 0x80 >;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ ictl_intc: gpio-controller@0 {
+ compatible = "snps,dw-apb-gpio-port";
+ gpio-controller;
+ #gpio-cells = <2>;
+ snps,nr-gpios = <30>;
+ reg = <0>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ interrupt-parent = <&cpu_intc>;
+ interrupts = <25>;
+ };
+ };
+
+ debug_uart: dw-apb-uart@0x5000 {
+ compatible = "snps,dw-apb-uart";
+ reg = <0x5000 0x100>;
+ clock-frequency = <33333000>;
+ interrupt-parent = <&ictl_intc>;
+ interrupts = <2 4>;
+ baud = <115200>;
+ reg-shift = <2>;
+ reg-io-width = <4>;
+ };
+
+ arcpct0: pct {
+ compatible = "snps,archs-pct";
+ #interrupt-cells = <1>;
+ interrupt-parent = <&cpu_intc>;
+ interrupts = <20>;
+ };
+ };
+
+ /*
+ * This INTC is actually connected to DW APB GPIO
+ * which acts as a wire between MB INTC and CPU INTC.
+ * GPIO INTC is configured in platform init code
+ * and here we mimic direct connection from MB INTC to
+ * CPU INTC, thus we set "interrupts = <7>" instead of
+ * "interrupts = <12>"
+ *
+ * This intc actually resides on MB, but we move it here to
+ * avoid duplicating the MB dtsi file given that IRQ from
+ * this intc to cpu intc are different for axs101 and axs103
+ */
+ mb_intc: dw-apb-ictl@0xe0012000 {
+ #interrupt-cells = <1>;
+ compatible = "snps,dw-apb-ictl";
+ reg = < 0xe0012000 0x200 >;
+ interrupt-controller;
+ interrupt-parent = <&cpu_intc>;
+ interrupts = < 24 >;
+ };
+
+ memory {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges = <0x00000000 0x80000000 0x40000000>;
+ device_type = "memory";
+ reg = <0x00000000 0x20000000>; /* 512MiB */
+ };
+};
diff --git a/arch/arc/boot/dts/axc003_idu.dtsi b/arch/arc/boot/dts/axc003_idu.dtsi
new file mode 100644
index 000000000000..199d42820eca
--- /dev/null
+++ b/arch/arc/boot/dts/axc003_idu.dtsi
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2014, 2015 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * Device tree for AXC003 CPU card: HS38x2 (Dual Core) with IDU intc
+ */
+
+/ {
+ compatible = "snps,arc";
+ clock-frequency = <75000000>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ cpu_card {
+ compatible = "simple-bus";
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ ranges = <0x00000000 0xf0000000 0x10000000>;
+
+ cpu_intc: archs-intc@cpu {
+ compatible = "snps,archs-intc";
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ };
+
+ idu_intc: idu-interrupt-controller {
+ compatible = "snps,archs-idu-intc";
+ interrupt-controller;
+ interrupt-parent = <&cpu_intc>;
+
+ /*
+ * <hwirq distribution>
+ * distribution: 0=RR; 1=cpu0, 2=cpu1, 4=cpu2, 8=cpu3
+ */
+ #interrupt-cells = <2>;
+
+ /*
+ * upstream irqs to core intc - downstream these are
+ * "COMMON" irq 0,1..
+ */
+ interrupts = <24 25>;
+ };
+
+ /*
+ * this GPIO block ORs all interrupts on CPU card (creg,..)
+ * to uplink only 1 IRQ to ARC core intc
+ */
+ dw-apb-gpio@0x2000 {
+ compatible = "snps,dw-apb-gpio";
+ reg = < 0x2000 0x80 >;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ ictl_intc: gpio-controller@0 {
+ compatible = "snps,dw-apb-gpio-port";
+ gpio-controller;
+ #gpio-cells = <2>;
+ snps,nr-gpios = <30>;
+ reg = <0>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ interrupt-parent = <&idu_intc>;
+
+ /*
+ * cmn irq 1 -> cpu irq 25
+ * Distribute to cpu0 only
+ */
+ interrupts = <1 1>;
+ };
+ };
+
+ debug_uart: dw-apb-uart@0x5000 {
+ compatible = "snps,dw-apb-uart";
+ reg = <0x5000 0x100>;
+ clock-frequency = <33333000>;
+ interrupt-parent = <&ictl_intc>;
+ interrupts = <2 4>;
+ baud = <115200>;
+ reg-shift = <2>;
+ reg-io-width = <4>;
+ };
+
+ arcpct0: pct {
+ compatible = "snps,archs-pct";
+ #interrupt-cells = <1>;
+ interrupt-parent = <&cpu_intc>;
+ interrupts = <20>;
+ };
+ };
+
+ /*
+ * This INTC is actually connected to DW APB GPIO
+ * which acts as a wire between MB INTC and CPU INTC.
+ * GPIO INTC is configured in platform init code
+ * and here we mimic direct connection from MB INTC to
+ * CPU INTC, thus we set "interrupts = <0 1>" instead of
+ * "interrupts = <12>"
+ *
+ * This intc actually resides on MB, but we move it here to
+ * avoid duplicating the MB dtsi file given that IRQ from
+ * this intc to cpu intc are different for axs101 and axs103
+ */
+ mb_intc: dw-apb-ictl@0xe0012000 {
+ #interrupt-cells = <1>;
+ compatible = "snps,dw-apb-ictl";
+ reg = < 0xe0012000 0x200 >;
+ interrupt-controller;
+ interrupt-parent = <&idu_intc>;
+ interrupts = <0 1>; /* cmn irq 0 -> cpu irq 24
+ distribute to cpu0 only */
+ };
+
+ memory {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges = <0x00000000 0x80000000 0x40000000>;
+ device_type = "memory";
+ reg = <0x00000000 0x20000000>; /* 512MiB */
+ };
+};
diff --git a/arch/arc/boot/dts/axs101.dts b/arch/arc/boot/dts/axs101.dts
new file mode 100644
index 000000000000..3f9b0582e734
--- /dev/null
+++ b/arch/arc/boot/dts/axs101.dts
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2013-15 Synopsys, Inc. (www.synopsys.com)
+ *
+ * ARC AXS101 S/W development platform
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+/dts-v1/;
+
+/include/ "axc001.dtsi"
+/include/ "axs10x_mb.dtsi"
+
+/ {
+ compatible = "snps,axs101", "snps,arc-sdp";
+
+ chosen {
+ bootargs = "earlycon=uart8250,mmio32,0xe0022000,115200n8 console=tty0 console=ttyS3,115200n8 consoleblank=0";
+ };
+};
diff --git a/arch/arc/boot/dts/axs103.dts b/arch/arc/boot/dts/axs103.dts
new file mode 100644
index 000000000000..e6d0e31ea299
--- /dev/null
+++ b/arch/arc/boot/dts/axs103.dts
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * Device Tree for AXS103 SDP with AXS10X Main Board and
+ * AXC003 FPGA Card (with UP bitfile)
+ */
+/dts-v1/;
+
+/include/ "axc003.dtsi"
+/include/ "axs10x_mb.dtsi"
+
+/ {
+ compatible = "snps,axs103", "snps,arc-sdp";
+
+ chosen {
+ bootargs = "earlycon=uart8250,mmio32,0xe0022000,115200n8 console=ttyS3,115200n8 debug print-fatal-signals=1";
+ };
+};
diff --git a/arch/arc/boot/dts/axs103_idu.dts b/arch/arc/boot/dts/axs103_idu.dts
new file mode 100644
index 000000000000..f999fef5a60a
--- /dev/null
+++ b/arch/arc/boot/dts/axs103_idu.dts
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * Device Tree for AXS103 SDP with AXS10X Main Board and
+ * AXC003 FPGA Card (with SMP bitfile)
+ */
+/dts-v1/;
+
+/include/ "axc003_idu.dtsi"
+/include/ "axs10x_mb.dtsi"
+
+/ {
+ compatible = "snps,axs103", "snps,arc-sdp";
+
+ chosen {
+ bootargs = "earlycon=uart8250,mmio32,0xe0022000,115200n8 console=ttyS3,115200n8 debug print-fatal-signals=1";
+ };
+};
diff --git a/arch/arc/boot/dts/axs10x_mb.dtsi b/arch/arc/boot/dts/axs10x_mb.dtsi
new file mode 100644
index 000000000000..f3db32154973
--- /dev/null
+++ b/arch/arc/boot/dts/axs10x_mb.dtsi
@@ -0,0 +1,224 @@
+/*
+ * Support for peripherals on the AXS10x mainboard
+ *
+ * Copyright (C) 2013-15 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/ {
+ axs10x_mb {
+ compatible = "simple-bus";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges = <0x00000000 0xe0000000 0x10000000>;
+ interrupt-parent = <&mb_intc>;
+
+ clocks {
+ i2cclk: i2cclk {
+ compatible = "fixed-clock";
+ clock-frequency = <50000000>;
+ #clock-cells = <0>;
+ };
+
+ apbclk: apbclk {
+ compatible = "fixed-clock";
+ clock-frequency = <50000000>;
+ #clock-cells = <0>;
+ };
+
+ mmcclk: mmcclk {
+ compatible = "fixed-clock";
+ clock-frequency = <50000000>;
+ #clock-cells = <0>;
+ };
+ };
+
+ ethernet@0x18000 {
+ #interrupt-cells = <1>;
+ compatible = "snps,dwmac";
+ reg = < 0x18000 0x2000 >;
+ interrupts = < 4 >;
+ interrupt-names = "macirq";
+ phy-mode = "rgmii";
+ snps,pbl = < 32 >;
+ clocks = <&apbclk>;
+ clock-names = "stmmaceth";
+ };
+
+ ehci@0x40000 {
+ compatible = "generic-ehci";
+ reg = < 0x40000 0x100 >;
+ interrupts = < 8 >;
+ };
+
+ ohci@0x60000 {
+ compatible = "generic-ohci";
+ reg = < 0x60000 0x100 >;
+ interrupts = < 8 >;
+ };
+
+ /*
+ * According to DW Mobile Storage databook it is required
+ * to use "Hold Register" if card is enumerated in SDR12 or
+ * SDR25 modes.
+ *
+ * Utilization of "Hold Register" is already implemented via
+ * dw_mci_pltfm_prepare_command() which in its turn gets
+ * used through dw_mci_drv_data->prepare_command call-back.
+ * This call-back is used in Altera Socfpga platform and so
+ * we may reuse it saying that we're compatible with their
+ * "altr,socfpga-dw-mshc".
+ *
+ * Most probably "Hold Register" utilization is platform-
+ * independent requirement which means that single unified
+ * "snps,dw-mshc" should be enough for all users of DW MMC once
+ * dw_mci_pltfm_prepare_command() is used in generic platform
+ * code.
+ */
+ mmc@0x15000 {
+ compatible = "altr,socfpga-dw-mshc";
+ reg = < 0x15000 0x400 >;
+ num-slots = < 1 >;
+ fifo-depth = < 16 >;
+ card-detect-delay = < 200 >;
+ clocks = <&apbclk>, <&mmcclk>;
+ clock-names = "biu", "ciu";
+ interrupts = < 7 >;
+ bus-width = < 4 >;
+ };
+
+ uart@0x20000 {
+ compatible = "snps,dw-apb-uart";
+ reg = <0x20000 0x100>;
+ clock-frequency = <33333333>;
+ interrupts = <17>;
+ baud = <115200>;
+ reg-shift = <2>;
+ reg-io-width = <4>;
+ };
+
+ uart@0x21000 {
+ compatible = "snps,dw-apb-uart";
+ reg = <0x21000 0x100>;
+ clock-frequency = <33333333>;
+ interrupts = <18>;
+ baud = <115200>;
+ reg-shift = <2>;
+ reg-io-width = <4>;
+ };
+
+ /* UART muxed with USB data port (ttyS3) */
+ uart@0x22000 {
+ compatible = "snps,dw-apb-uart";
+ reg = <0x22000 0x100>;
+ clock-frequency = <33333333>;
+ interrupts = <19>;
+ baud = <115200>;
+ reg-shift = <2>;
+ reg-io-width = <4>;
+ };
+
+ i2c@0x1d000 {
+ compatible = "snps,designware-i2c";
+ reg = <0x1d000 0x100>;
+ clock-frequency = <400000>;
+ clocks = <&i2cclk>;
+ interrupts = <14>;
+ };
+
+ i2c@0x1e000 {
+ compatible = "snps,designware-i2c";
+ reg = <0x1e000 0x100>;
+ clock-frequency = <400000>;
+ clocks = <&i2cclk>;
+ interrupts = <15>;
+ };
+
+ i2c@0x1f000 {
+ compatible = "snps,designware-i2c";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x1f000 0x100>;
+ clock-frequency = <400000>;
+ clocks = <&i2cclk>;
+ interrupts = <16>;
+
+ eeprom@0x54{
+ compatible = "24c01";
+ reg = <0x54>;
+ pagesize = <0x8>;
+ };
+
+ eeprom@0x57{
+ compatible = "24c04";
+ reg = <0x57>;
+ pagesize = <0x8>;
+ };
+ };
+
+ gpio0:gpio@13000 {
+ compatible = "snps,dw-apb-gpio";
+ reg = <0x13000 0x1000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ gpio0_banka: gpio-controller@0 {
+ compatible = "snps,dw-apb-gpio-port";
+ gpio-controller;
+ #gpio-cells = <2>;
+ snps,nr-gpios = <32>;
+ reg = <0>;
+ };
+
+ gpio0_bankb: gpio-controller@1 {
+ compatible = "snps,dw-apb-gpio-port";
+ gpio-controller;
+ #gpio-cells = <2>;
+ snps,nr-gpios = <8>;
+ reg = <1>;
+ };
+
+ gpio0_bankc: gpio-controller@2 {
+ compatible = "snps,dw-apb-gpio-port";
+ gpio-controller;
+ #gpio-cells = <2>;
+ snps,nr-gpios = <8>;
+ reg = <2>;
+ };
+ };
+
+ gpio1:gpio@14000 {
+ compatible = "snps,dw-apb-gpio";
+ reg = <0x14000 0x1000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ gpio1_banka: gpio-controller@0 {
+ compatible = "snps,dw-apb-gpio-port";
+ gpio-controller;
+ #gpio-cells = <2>;
+ snps,nr-gpios = <30>;
+ reg = <0>;
+ };
+
+ gpio1_bankb: gpio-controller@1 {
+ compatible = "snps,dw-apb-gpio-port";
+ gpio-controller;
+ #gpio-cells = <2>;
+ snps,nr-gpios = <10>;
+ reg = <1>;
+ };
+
+ gpio1_bankc: gpio-controller@2 {
+ compatible = "snps,dw-apb-gpio-port";
+ gpio-controller;
+ #gpio-cells = <2>;
+ snps,nr-gpios = <8>;
+ reg = <2>;
+ };
+ };
+ };
+};
diff --git a/arch/arc/boot/dts/angel4.dts b/arch/arc/boot/dts/nsim_700.dts
index 3b076fbd8366..105a0017023f 100644
--- a/arch/arc/boot/dts/angel4.dts
+++ b/arch/arc/boot/dts/nsim_700.dts
@@ -10,7 +10,7 @@
/include/ "skeleton.dtsi"
/ {
- compatible = "snps,arc-angel4";
+ compatible = "snps,nsim";
clock-frequency = <80000000>; /* 80 MHZ */
#address-cells = <1>;
#size-cells = <1>;
diff --git a/arch/arc/boot/dts/nsim_hs.dts b/arch/arc/boot/dts/nsim_hs.dts
new file mode 100644
index 000000000000..911f069e0540
--- /dev/null
+++ b/arch/arc/boot/dts/nsim_hs.dts
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+/dts-v1/;
+
+/include/ "skeleton.dtsi"
+
+/ {
+ compatible = "snps,nsim_hs";
+ interrupt-parent = <&core_intc>;
+
+ chosen {
+ bootargs = "earlycon=arc_uart,mmio32,0xc0fc1000,115200n8 console=ttyARC0,115200n8";
+ };
+
+ aliases {
+ serial0 = &arcuart0;
+ };
+
+ fpga {
+ compatible = "simple-bus";
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ /* child and parent address space 1:1 mapped */
+ ranges;
+
+ core_intc: core-interrupt-controller {
+ compatible = "snps,archs-intc";
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ };
+
+ arcuart0: serial@c0fc1000 {
+ compatible = "snps,arc-uart";
+ reg = <0xc0fc1000 0x100>;
+ interrupts = <24>;
+ clock-frequency = <80000000>;
+ current-speed = <115200>;
+ status = "okay";
+ };
+
+ arcpct0: pct {
+ compatible = "snps,archs-pct";
+ #interrupt-cells = <1>;
+ interrupts = <20>;
+ };
+ };
+};
diff --git a/arch/arc/boot/dts/nsim_hs_idu.dts b/arch/arc/boot/dts/nsim_hs_idu.dts
new file mode 100644
index 000000000000..46ab31975612
--- /dev/null
+++ b/arch/arc/boot/dts/nsim_hs_idu.dts
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+/dts-v1/;
+
+/include/ "skeleton.dtsi"
+
+/ {
+ compatible = "snps,nsim_hs";
+ interrupt-parent = <&core_intc>;
+
+ chosen {
+ bootargs = "earlycon=arc_uart,mmio32,0xc0fc1000,115200n8 console=ttyARC0,115200n8";
+ };
+
+ aliases {
+ serial0 = &arcuart0;
+ };
+
+ fpga {
+ compatible = "simple-bus";
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ /* child and parent address space 1:1 mapped */
+ ranges;
+
+ core_intc: core-interrupt-controller {
+ compatible = "snps,archs-intc";
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ };
+
+ idu_intc: idu-interrupt-controller {
+ compatible = "snps,archs-idu-intc";
+ interrupt-controller;
+ interrupt-parent = <&core_intc>;
+
+ /*
+ * <hwirq distribution>
+ * distribution: 0=RR; 1=cpu0, 2=cpu1, 4=cpu2, 8=cpu3
+ */
+ #interrupt-cells = <2>;
+
+ /*
+ * upstream irqs to core intc - downstream these are
+ * "COMMON" irq 0,1..
+ */
+ interrupts = <24 25 26 27 28 29 30 31>;
+ };
+
+ arcuart0: serial@c0fc1000 {
+ compatible = "snps,arc-uart";
+ reg = <0xc0fc1000 0x100>;
+ interrupt-parent = <&idu_intc>;
+ interrupts = <0 0>;
+ clock-frequency = <80000000>;
+ current-speed = <115200>;
+ status = "okay";
+ };
+
+ arcpct0: pct {
+ compatible = "snps,archs-pct";
+ #interrupt-cells = <1>;
+ interrupts = <20>;
+ };
+ };
+};
diff --git a/arch/arc/boot/dts/nsimosci_hs.dts b/arch/arc/boot/dts/nsimosci_hs.dts
new file mode 100644
index 000000000000..d64a96f8515a
--- /dev/null
+++ b/arch/arc/boot/dts/nsimosci_hs.dts
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+/dts-v1/;
+
+/include/ "skeleton.dtsi"
+
+/ {
+ compatible = "snps,nsimosci_hs";
+ clock-frequency = <20000000>; /* 20 MHZ */
+ #address-cells = <1>;
+ #size-cells = <1>;
+ interrupt-parent = <&core_intc>;
+
+ chosen {
+ /* this is for console on PGU */
+ /* bootargs = "console=tty0 consoleblank=0"; */
+ /* this is for console on serial */
+ bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblank=0 debug";
+ };
+
+ aliases {
+ serial0 = &uart0;
+ };
+
+ fpga {
+ compatible = "simple-bus";
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ /* child and parent address space 1:1 mapped */
+ ranges;
+
+ core_intc: core-interrupt-controller {
+ compatible = "snps,archs-intc";
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ };
+
+ uart0: serial@f0000000 {
+ compatible = "ns8250";
+ reg = <0xf0000000 0x2000>;
+ interrupts = <24>;
+ clock-frequency = <3686400>;
+ baud = <115200>;
+ reg-shift = <2>;
+ reg-io-width = <4>;
+ no-loopback-test = <1>;
+ };
+
+ pgu0: pgu@f9000000 {
+ compatible = "snps,arcpgufb";
+ reg = <0xf9000000 0x400>;
+ };
+
+ ps2: ps2@f9001000 {
+ compatible = "snps,arc_ps2";
+ reg = <0xf9000400 0x14>;
+ interrupts = <27>;
+ interrupt-names = "arc_ps2_irq";
+ };
+
+ eth0: ethernet@f0003000 {
+ compatible = "snps,oscilan";
+ reg = <0xf0003000 0x44>;
+ interrupts = <25>, <26>;
+ interrupt-names = "rx", "tx";
+ };
+
+ arcpct0: pct {
+ compatible = "snps,archs-pct";
+ #interrupt-cells = <1>;
+ interrupts = <20>;
+ };
+ };
+};
diff --git a/arch/arc/boot/dts/nsimosci_hs_idu.dts b/arch/arc/boot/dts/nsimosci_hs_idu.dts
new file mode 100644
index 000000000000..f6bf0ca95a57
--- /dev/null
+++ b/arch/arc/boot/dts/nsimosci_hs_idu.dts
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+/dts-v1/;
+
+/include/ "skeleton.dtsi"
+
+/ {
+ compatible = "snps,nsimosci_hs";
+ clock-frequency = <5000000>; /* 5 MHZ */
+ #address-cells = <1>;
+ #size-cells = <1>;
+ interrupt-parent = <&core_intc>;
+
+ chosen {
+ /* this is for console on serial */
+ bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblan=0 debug";
+ };
+
+ aliases {
+ serial0 = &uart0;
+ };
+
+ fpga {
+ compatible = "simple-bus";
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ /* child and parent address space 1:1 mapped */
+ ranges;
+
+ core_intc: core-interrupt-controller {
+ compatible = "snps,archs-intc";
+ interrupt-controller;
+ #interrupt-cells = <1>;
+/* interrupts = <16 17 18 19 20 21 22 23 24 25>; */
+ };
+
+ idu_intc: idu-interrupt-controller {
+ compatible = "snps,archs-idu-intc";
+ interrupt-controller;
+ interrupt-parent = <&core_intc>;
+
+ /*
+ * <hwirq distribution>
+ * distribution: 0=RR; 1=cpu0, 2=cpu1, 4=cpu2, 8=cpu3
+ */
+ #interrupt-cells = <2>;
+
+ /*
+ * upstream irqs to core intc - downstream these are
+ * "COMMON" irq 0,1..
+ */
+ interrupts = <24 25 26 27 28 29 30 31>;
+ };
+
+ uart0: serial@f0000000 {
+ compatible = "ns8250";
+ reg = <0xf0000000 0x2000>;
+ interrupt-parent = <&idu_intc>;
+ interrupts = <0 0>; /* cmn irq 0 -> cpu irq 24
+ RR distribute to all cpus */
+ clock-frequency = <3686400>;
+ baud = <115200>;
+ reg-shift = <2>;
+ reg-io-width = <4>;
+ no-loopback-test = <1>;
+ };
+
+ pgu0: pgu@f9000000 {
+ compatible = "snps,arcpgufb";
+ reg = <0xf9000000 0x400>;
+ };
+
+ ps2: ps2@f9001000 {
+ compatible = "snps,arc_ps2";
+ reg = <0xf9000400 0x14>;
+ interrupts = <3 0>;
+ interrupt-parent = <&idu_intc>;
+ interrupt-names = "arc_ps2_irq";
+ };
+
+ eth0: ethernet@f0003000 {
+ compatible = "snps,oscilan";
+ reg = <0xf0003000 0x44>;
+ interrupt-parent = <&idu_intc>;
+ interrupts = <1 2>, <2 2>;
+ interrupt-names = "rx", "tx";
+ };
+
+ arcpct0: pct {
+ compatible = "snps,archs-pct";
+ #interrupt-cells = <1>;
+ interrupts = <20>;
+ };
+ };
+};
diff --git a/arch/arc/boot/dts/vdk_axc003.dtsi b/arch/arc/boot/dts/vdk_axc003.dtsi
new file mode 100644
index 000000000000..9393fd902f0d
--- /dev/null
+++ b/arch/arc/boot/dts/vdk_axc003.dtsi
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2013, 2014 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * Device tree for AXC003 CPU card: HS38x UP configuration (VDK version)
+ */
+
+/ {
+ compatible = "snps,arc";
+ clock-frequency = <50000000>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ cpu_card {
+ compatible = "simple-bus";
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ ranges = <0x00000000 0xf0000000 0x10000000>;
+
+ cpu_intc: archs-intc@cpu {
+ compatible = "snps,archs-intc";
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ };
+
+ debug_uart: dw-apb-uart@0x5000 {
+ compatible = "snps,dw-apb-uart";
+ reg = <0x5000 0x100>;
+ clock-frequency = <2403200>;
+ interrupt-parent = <&cpu_intc>;
+ interrupts = <19>;
+ baud = <115200>;
+ reg-shift = <2>;
+ reg-io-width = <4>;
+ };
+
+ };
+
+ mb_intc: dw-apb-ictl@0xe0012000 {
+ #interrupt-cells = <1>;
+ compatible = "snps,dw-apb-ictl";
+ reg = < 0xe0012000 0x200 >;
+ interrupt-controller;
+ interrupt-parent = <&cpu_intc>;
+ interrupts = < 18 >;
+ };
+
+ memory {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges = <0x00000000 0x80000000 0x40000000>;
+ device_type = "memory";
+ reg = <0x00000000 0x20000000>; /* 512MiB */
+ };
+};
diff --git a/arch/arc/boot/dts/vdk_axc003_idu.dtsi b/arch/arc/boot/dts/vdk_axc003_idu.dtsi
new file mode 100644
index 000000000000..9bee8ed09eb0
--- /dev/null
+++ b/arch/arc/boot/dts/vdk_axc003_idu.dtsi
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2014, 2015 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * Device tree for AXC003 CPU card:
+ * HS38x2 (Dual Core) with IDU intc (VDK version)
+ */
+
+/ {
+ compatible = "snps,arc";
+ clock-frequency = <50000000>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ cpu_card {
+ compatible = "simple-bus";
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ ranges = <0x00000000 0xf0000000 0x10000000>;
+
+ cpu_intc: archs-intc@cpu {
+ compatible = "snps,archs-intc";
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ };
+
+ idu_intc: idu-interrupt-controller {
+ compatible = "snps,archs-idu-intc";
+ interrupt-controller;
+ interrupt-parent = <&cpu_intc>;
+
+ /*
+ * <hwirq distribution>
+ * distribution: 0=RR; 1=cpu0, 2=cpu1, 4=cpu2, 8=cpu3
+ */
+ #interrupt-cells = <2>;
+
+ interrupts = <24 25 26 27>;
+ };
+
+ debug_uart: dw-apb-uart@0x5000 {
+ compatible = "snps,dw-apb-uart";
+ reg = <0x5000 0x100>;
+ clock-frequency = <2403200>;
+ interrupt-parent = <&idu_intc>;
+ interrupts = <2 0>;
+ baud = <115200>;
+ reg-shift = <2>;
+ reg-io-width = <4>;
+ };
+
+ };
+
+ mb_intc: dw-apb-ictl@0xe0012000 {
+ #interrupt-cells = <1>;
+ compatible = "snps,dw-apb-ictl";
+ reg = < 0xe0012000 0x200 >;
+ interrupt-controller;
+ interrupt-parent = <&idu_intc>;
+ interrupts = < 0 0 >;
+ };
+
+ memory {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges = <0x00000000 0x80000000 0x40000000>;
+ device_type = "memory";
+ reg = <0x00000000 0x20000000>; /* 512MiB */
+ };
+};
diff --git a/arch/arc/boot/dts/vdk_axs10x_mb.dtsi b/arch/arc/boot/dts/vdk_axs10x_mb.dtsi
new file mode 100644
index 000000000000..45cd665fca23
--- /dev/null
+++ b/arch/arc/boot/dts/vdk_axs10x_mb.dtsi
@@ -0,0 +1,93 @@
+/*
+ * Support for peripherals on the AXS10x mainboard (VDK version)
+ *
+ * Copyright (C) 2013-15 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/ {
+ axs10x_mb_vdk {
+ compatible = "simple-bus";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges = <0x00000000 0xe0000000 0x10000000>;
+ interrupt-parent = <&mb_intc>;
+
+ clocks {
+ apbclk: apbclk {
+ compatible = "fixed-clock";
+ clock-frequency = <50000000>;
+ #clock-cells = <0>;
+ };
+
+ };
+
+ ethernet@0x18000 {
+ #interrupt-cells = <1>;
+ compatible = "snps,dwmac";
+ reg = < 0x18000 0x2000 >;
+ interrupts = < 4 >;
+ interrupt-names = "macirq";
+ phy-mode = "rgmii";
+ snps,phy-addr = < 0 >; // VDK model phy address is 0
+ snps,pbl = < 32 >;
+ clocks = <&apbclk>;
+ clock-names = "stmmaceth";
+ };
+
+ ehci@0x40000 {
+ compatible = "generic-ehci";
+ reg = < 0x40000 0x100 >;
+ interrupts = < 8 >;
+ };
+
+ uart@0x20000 {
+ compatible = "snps,dw-apb-uart";
+ reg = <0x20000 0x100>;
+ clock-frequency = <2403200>;
+ interrupts = <17>;
+ baud = <115200>;
+ reg-shift = <2>;
+ reg-io-width = <4>;
+ };
+
+ uart@0x21000 {
+ compatible = "snps,dw-apb-uart";
+ reg = <0x21000 0x100>;
+ clock-frequency = <2403200>;
+ interrupts = <18>;
+ baud = <115200>;
+ reg-shift = <2>;
+ reg-io-width = <4>;
+ };
+
+ uart@0x22000 {
+ compatible = "snps,dw-apb-uart";
+ reg = <0x22000 0x100>;
+ clock-frequency = <2403200>;
+ interrupts = <19>;
+ baud = <115200>;
+ reg-shift = <2>;
+ reg-io-width = <4>;
+ };
+
+/* PGU output directly sent to virtual LCD screen; hdmi controller not modelled */
+ pgu@0x17000 {
+ compatible = "snps,arcpgufb";
+ reg = <0x17000 0x400>;
+ clock-frequency = <51000000>; /* PGU'clock is initated in init function */
+ /* interrupts = <5>; PGU interrupts not used, this vector is used for ps2 below */
+ };
+
+/* VDK has additional ps2 keyboard/mouse interface integrated in LCD screen model */
+ ps2: ps2@e0017400 {
+ compatible = "snps,arc_ps2";
+ reg = <0x17400 0x14>;
+ interrupts = <5>;
+ interrupt-names = "arc_ps2_irq";
+ };
+ };
+};
diff --git a/arch/arc/boot/dts/vdk_hs38.dts b/arch/arc/boot/dts/vdk_hs38.dts
new file mode 100644
index 000000000000..5d803dd2de59
--- /dev/null
+++ b/arch/arc/boot/dts/vdk_hs38.dts
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com)
+ *
+ * ARC HS38 Virtual Development Kit (VDK)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+/dts-v1/;
+
+/include/ "vdk_axc003.dtsi"
+/include/ "vdk_axs10x_mb.dtsi"
+
+/ {
+ compatible = "snps,axs103";
+
+ chosen {
+ bootargs = "earlycon=uart8250,mmio32,0xe0022000,115200n8 console=tty0 console=ttyS3,115200n8 consoleblank=0";
+ };
+};
diff --git a/arch/arc/boot/dts/vdk_hs38_smp.dts b/arch/arc/boot/dts/vdk_hs38_smp.dts
new file mode 100644
index 000000000000..031a5bc79b3e
--- /dev/null
+++ b/arch/arc/boot/dts/vdk_hs38_smp.dts
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com)
+ *
+ * ARC HS38 Virtual Development Kit, SMP version (VDK)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+/dts-v1/;
+
+/include/ "vdk_axc003_idu.dtsi"
+/include/ "vdk_axs10x_mb.dtsi"
+
+/ {
+ compatible = "snps,axs103";
+
+ chosen {
+ bootargs = "earlycon=uart8250,mmio32,0xe0022000,115200n8 console=tty0 console=ttyS3,115200n8 consoleblank=0";
+ };
+};
diff --git a/arch/arc/configs/axs101_defconfig b/arch/arc/configs/axs101_defconfig
new file mode 100644
index 000000000000..562dac6a7f78
--- /dev/null
+++ b/arch/arc/configs/axs101_defconfig
@@ -0,0 +1,111 @@
+CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
+CONFIG_DEFAULT_HOSTNAME="ARCLinux"
+# CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_NO_HZ_IDLE=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_NAMESPACES=y
+# CONFIG_UTS_NS is not set
+# CONFIG_PID_NS is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE="../arc_initramfs/"
+CONFIG_EMBEDDED=y
+CONFIG_PERF_EVENTS=y
+# CONFIG_VM_EVENT_COUNTERS is not set
+# CONFIG_SLUB_DEBUG is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_MODULES=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_ARC_PLAT_AXS10X=y
+CONFIG_AXS101=y
+CONFIG_ARC_CACHE_LINE_SHIFT=5
+CONFIG_ARC_BUILTIN_DTB_NAME="axs101"
+CONFIG_PREEMPT=y
+# CONFIG_COMPACTION is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_IP_PNP_RARP=y
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_IPV6 is not set
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_FIRMWARE_IN_KERNEL is not set
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_NETDEVICES=y
+# CONFIG_NET_VENDOR_ARC is not set
+# CONFIG_NET_VENDOR_BROADCOM is not set
+# CONFIG_NET_VENDOR_INTEL is not set
+# CONFIG_NET_VENDOR_MARVELL is not set
+# CONFIG_NET_VENDOR_MICREL is not set
+# CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_SEEQ is not set
+CONFIG_STMMAC_ETH=y
+# CONFIG_NET_VENDOR_VIA is not set
+# CONFIG_NET_VENDOR_WIZNET is not set
+CONFIG_NATIONAL_PHY=y
+# CONFIG_USB_NET_DRIVERS is not set
+CONFIG_INPUT_EVDEV=y
+CONFIG_MOUSE_PS2_TOUCHKIT=y
+CONFIG_MOUSE_SERIAL=y
+CONFIG_MOUSE_SYNAPTICS_USB=y
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_DEVKMEM is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_DW=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_DESIGNWARE_PLATFORM=y
+# CONFIG_HWMON is not set
+CONFIG_FB=y
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
+CONFIG_LOGO=y
+# CONFIG_LOGO_LINUX_MONO is not set
+# CONFIG_LOGO_LINUX_VGA16 is not set
+# CONFIG_LOGO_LINUX_CLUT224 is not set
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_EHCI_HCD_PLATFORM=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_OHCI_HCD_PLATFORM=y
+CONFIG_USB_STORAGE=y
+CONFIG_MMC=y
+CONFIG_MMC_SDHCI=y
+CONFIG_MMC_SDHCI_PLTFM=y
+CONFIG_MMC_DW=y
+CONFIG_MMC_DW_IDMAC=y
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_NTFS_FS=y
+CONFIG_TMPFS=y
+CONFIG_JFFS2_FS=y
+CONFIG_NFS_FS=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ISO8859_1=y
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_STRIP_ASM_SYMS=y
+CONFIG_LOCKUP_DETECTOR=y
+CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=10
+# CONFIG_SCHED_DEBUG is not set
+# CONFIG_DEBUG_PREEMPT is not set
+# CONFIG_FTRACE is not set
diff --git a/arch/arc/configs/axs103_defconfig b/arch/arc/configs/axs103_defconfig
new file mode 100644
index 000000000000..83a6d8d5cc58
--- /dev/null
+++ b/arch/arc/configs/axs103_defconfig
@@ -0,0 +1,117 @@
+CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
+CONFIG_DEFAULT_HOSTNAME="ARCLinux"
+# CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_NO_HZ_IDLE=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_NAMESPACES=y
+# CONFIG_UTS_NS is not set
+# CONFIG_PID_NS is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE="../../arc_initramfs_hs/"
+CONFIG_EMBEDDED=y
+CONFIG_PERF_EVENTS=y
+# CONFIG_VM_EVENT_COUNTERS is not set
+# CONFIG_SLUB_DEBUG is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_MODULES=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_ARC_PLAT_AXS10X=y
+CONFIG_AXS103=y
+CONFIG_ISA_ARCV2=y
+CONFIG_ARC_BUILTIN_DTB_NAME="axs103"
+CONFIG_PREEMPT=y
+# CONFIG_COMPACTION is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_IP_PNP_RARP=y
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_IPV6 is not set
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_FIRMWARE_IN_KERNEL is not set
+CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_NAND=y
+CONFIG_MTD_NAND_AXS=y
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_NETDEVICES=y
+# CONFIG_NET_VENDOR_ARC is not set
+# CONFIG_NET_VENDOR_BROADCOM is not set
+# CONFIG_NET_VENDOR_INTEL is not set
+# CONFIG_NET_VENDOR_MARVELL is not set
+# CONFIG_NET_VENDOR_MICREL is not set
+# CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_SEEQ is not set
+CONFIG_STMMAC_ETH=y
+# CONFIG_NET_VENDOR_VIA is not set
+# CONFIG_NET_VENDOR_WIZNET is not set
+CONFIG_NATIONAL_PHY=y
+# CONFIG_USB_NET_DRIVERS is not set
+CONFIG_INPUT_EVDEV=y
+CONFIG_MOUSE_PS2_TOUCHKIT=y
+CONFIG_MOUSE_SERIAL=y
+CONFIG_MOUSE_SYNAPTICS_USB=y
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_DEVKMEM is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_DW=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_DESIGNWARE_PLATFORM=y
+# CONFIG_HWMON is not set
+CONFIG_FB=y
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
+CONFIG_LOGO=y
+# CONFIG_LOGO_LINUX_MONO is not set
+# CONFIG_LOGO_LINUX_VGA16 is not set
+# CONFIG_LOGO_LINUX_CLUT224 is not set
+CONFIG_USB=y
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_EHCI_HCD_PLATFORM=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_OHCI_HCD_PLATFORM=y
+CONFIG_USB_STORAGE=y
+CONFIG_MMC=y
+CONFIG_MMC_SDHCI=y
+CONFIG_MMC_SDHCI_PLTFM=y
+CONFIG_MMC_DW=y
+CONFIG_MMC_DW_IDMAC=y
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_NTFS_FS=y
+CONFIG_TMPFS=y
+CONFIG_JFFS2_FS=y
+CONFIG_NFS_FS=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ISO8859_1=y
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_STRIP_ASM_SYMS=y
+CONFIG_LOCKUP_DETECTOR=y
+CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=10
+# CONFIG_SCHED_DEBUG is not set
+# CONFIG_DEBUG_PREEMPT is not set
+# CONFIG_FTRACE is not set
diff --git a/arch/arc/configs/axs103_smp_defconfig b/arch/arc/configs/axs103_smp_defconfig
new file mode 100644
index 000000000000..f1e1c84e0dda
--- /dev/null
+++ b/arch/arc/configs/axs103_smp_defconfig
@@ -0,0 +1,118 @@
+CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
+CONFIG_DEFAULT_HOSTNAME="ARCLinux"
+# CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_NO_HZ_IDLE=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_NAMESPACES=y
+# CONFIG_UTS_NS is not set
+# CONFIG_PID_NS is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE="../../arc_initramfs_hs/"
+CONFIG_EMBEDDED=y
+CONFIG_PERF_EVENTS=y
+# CONFIG_VM_EVENT_COUNTERS is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_SLAB=y
+CONFIG_MODULES=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_ARC_PLAT_AXS10X=y
+CONFIG_AXS103=y
+CONFIG_ISA_ARCV2=y
+CONFIG_SMP=y
+CONFIG_ARC_BUILTIN_DTB_NAME="axs103_idu"
+CONFIG_PREEMPT=y
+# CONFIG_COMPACTION is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_IP_PNP_RARP=y
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_IPV6 is not set
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_FIRMWARE_IN_KERNEL is not set
+CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_NAND=y
+CONFIG_MTD_NAND_AXS=y
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_NETDEVICES=y
+# CONFIG_NET_VENDOR_ARC is not set
+# CONFIG_NET_VENDOR_BROADCOM is not set
+# CONFIG_NET_VENDOR_INTEL is not set
+# CONFIG_NET_VENDOR_MARVELL is not set
+# CONFIG_NET_VENDOR_MICREL is not set
+# CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_SEEQ is not set
+CONFIG_STMMAC_ETH=y
+# CONFIG_NET_VENDOR_VIA is not set
+# CONFIG_NET_VENDOR_WIZNET is not set
+CONFIG_NATIONAL_PHY=y
+# CONFIG_USB_NET_DRIVERS is not set
+CONFIG_INPUT_EVDEV=y
+CONFIG_MOUSE_PS2_TOUCHKIT=y
+CONFIG_MOUSE_SERIAL=y
+CONFIG_MOUSE_SYNAPTICS_USB=y
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_DEVKMEM is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_DW=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_DESIGNWARE_PLATFORM=y
+# CONFIG_HWMON is not set
+CONFIG_FB=y
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
+CONFIG_LOGO=y
+# CONFIG_LOGO_LINUX_MONO is not set
+# CONFIG_LOGO_LINUX_VGA16 is not set
+# CONFIG_LOGO_LINUX_CLUT224 is not set
+CONFIG_USB=y
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_EHCI_HCD_PLATFORM=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_OHCI_HCD_PLATFORM=y
+CONFIG_USB_STORAGE=y
+CONFIG_MMC=y
+CONFIG_MMC_SDHCI=y
+CONFIG_MMC_SDHCI_PLTFM=y
+CONFIG_MMC_DW=y
+CONFIG_MMC_DW_IDMAC=y
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_NTFS_FS=y
+CONFIG_TMPFS=y
+CONFIG_JFFS2_FS=y
+CONFIG_NFS_FS=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ISO8859_1=y
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_STRIP_ASM_SYMS=y
+CONFIG_LOCKUP_DETECTOR=y
+CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=10
+# CONFIG_SCHED_DEBUG is not set
+# CONFIG_DEBUG_PREEMPT is not set
+# CONFIG_FTRACE is not set
diff --git a/arch/arc/configs/nsim_700_defconfig b/arch/arc/configs/nsim_700_defconfig
index ef4d3bc7b6c0..138f9d887957 100644
--- a/arch/arc/configs/nsim_700_defconfig
+++ b/arch/arc/configs/nsim_700_defconfig
@@ -1,4 +1,4 @@
-CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
+CONFIG_CROSS_COMPILE="arc-linux-"
# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_DEFAULT_HOSTNAME="ARCLinux"
# CONFIG_SWAP is not set
@@ -22,9 +22,8 @@ CONFIG_MODULES=y
# CONFIG_BLK_DEV_BSG is not set
# CONFIG_IOSCHED_DEADLINE is not set
# CONFIG_IOSCHED_CFQ is not set
-CONFIG_ARC_PLAT_FPGA_LEGACY=y
-# CONFIG_ARC_HAS_RTSC is not set
-CONFIG_ARC_BUILTIN_DTB_NAME="angel4"
+CONFIG_ARC_PLAT_SIM=y
+CONFIG_ARC_BUILTIN_DTB_NAME="nsim_700"
CONFIG_PREEMPT=y
# CONFIG_COMPACTION is not set
# CONFIG_CROSS_MEMORY_ATTACH is not set
diff --git a/arch/arc/configs/nsim_hs_defconfig b/arch/arc/configs/nsim_hs_defconfig
new file mode 100644
index 000000000000..f761a7c70761
--- /dev/null
+++ b/arch/arc/configs/nsim_hs_defconfig
@@ -0,0 +1,64 @@
+CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_DEFAULT_HOSTNAME="ARCLinux"
+# CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_NAMESPACES=y
+# CONFIG_UTS_NS is not set
+# CONFIG_PID_NS is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE="../arc_initramfs_hs/"
+CONFIG_KALLSYMS_ALL=y
+CONFIG_EMBEDDED=y
+# CONFIG_SLUB_DEBUG is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_KPROBES=y
+CONFIG_MODULES=y
+# CONFIG_LBDAF is not set
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_IOSCHED_DEADLINE is not set
+# CONFIG_IOSCHED_CFQ is not set
+CONFIG_ARC_PLAT_SIM=y
+CONFIG_ISA_ARCV2=y
+CONFIG_ARC_BUILTIN_DTB_NAME="nsim_hs"
+CONFIG_PREEMPT=y
+# CONFIG_COMPACTION is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_UNIX_DIAG=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+# CONFIG_IPV6 is not set
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_FIRMWARE_IN_KERNEL is not set
+# CONFIG_BLK_DEV is not set
+# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_DEVKMEM is not set
+CONFIG_SERIAL_ARC=y
+CONFIG_SERIAL_ARC_CONSOLE=y
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+# CONFIG_VGA_CONSOLE is not set
+# CONFIG_HID is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_TMPFS=y
+# CONFIG_MISC_FILESYSTEMS is not set
+CONFIG_NFS_FS=y
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
+# CONFIG_ENABLE_MUST_CHECK is not set
+# CONFIG_DEBUG_PREEMPT is not set
+CONFIG_XZ_DEC=y
diff --git a/arch/arc/configs/nsim_hs_smp_defconfig b/arch/arc/configs/nsim_hs_smp_defconfig
new file mode 100644
index 000000000000..dc6f74f41283
--- /dev/null
+++ b/arch/arc/configs/nsim_hs_smp_defconfig
@@ -0,0 +1,63 @@
+CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_DEFAULT_HOSTNAME="ARCLinux"
+# CONFIG_SWAP is not set
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_NAMESPACES=y
+# CONFIG_UTS_NS is not set
+# CONFIG_PID_NS is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE="../arc_initramfs_hs/"
+CONFIG_KALLSYMS_ALL=y
+CONFIG_EMBEDDED=y
+# CONFIG_SLUB_DEBUG is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_KPROBES=y
+CONFIG_MODULES=y
+# CONFIG_LBDAF is not set
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_IOSCHED_DEADLINE is not set
+# CONFIG_IOSCHED_CFQ is not set
+CONFIG_ARC_PLAT_SIM=y
+CONFIG_ARC_BOARD_ML509=y
+CONFIG_ISA_ARCV2=y
+CONFIG_SMP=y
+CONFIG_ARC_BUILTIN_DTB_NAME="nsim_hs_idu"
+CONFIG_PREEMPT=y
+# CONFIG_COMPACTION is not set
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_UNIX_DIAG=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+# CONFIG_IPV6 is not set
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_FIRMWARE_IN_KERNEL is not set
+# CONFIG_BLK_DEV is not set
+# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_DEVKMEM is not set
+CONFIG_SERIAL_ARC=y
+CONFIG_SERIAL_ARC_CONSOLE=y
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+# CONFIG_VGA_CONSOLE is not set
+# CONFIG_HID is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_TMPFS=y
+# CONFIG_MISC_FILESYSTEMS is not set
+CONFIG_NFS_FS=y
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_XZ_DEC=y
diff --git a/arch/arc/configs/nsimosci_defconfig b/arch/arc/configs/nsimosci_defconfig
index d2ac4e56ba1d..31e1d95764ff 100644
--- a/arch/arc/configs/nsimosci_defconfig
+++ b/arch/arc/configs/nsimosci_defconfig
@@ -1,4 +1,4 @@
-CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
+CONFIG_CROSS_COMPILE="arc-linux-"
# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_DEFAULT_HOSTNAME="ARCLinux"
# CONFIG_SWAP is not set
@@ -23,8 +23,7 @@ CONFIG_MODULES=y
# CONFIG_BLK_DEV_BSG is not set
# CONFIG_IOSCHED_DEADLINE is not set
# CONFIG_IOSCHED_CFQ is not set
-CONFIG_ARC_PLAT_FPGA_LEGACY=y
-# CONFIG_ARC_HAS_RTSC is not set
+CONFIG_ARC_PLAT_SIM=y
CONFIG_ARC_BUILTIN_DTB_NAME="nsimosci"
# CONFIG_COMPACTION is not set
CONFIG_NET=y
diff --git a/arch/arc/configs/nsimosci_hs_defconfig b/arch/arc/configs/nsimosci_hs_defconfig
new file mode 100644
index 000000000000..3fef0a210c56
--- /dev/null
+++ b/arch/arc/configs/nsimosci_hs_defconfig
@@ -0,0 +1,73 @@
+CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_DEFAULT_HOSTNAME="ARCLinux"
+# CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_NAMESPACES=y
+# CONFIG_UTS_NS is not set
+# CONFIG_PID_NS is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE="../arc_initramfs_hs/"
+CONFIG_KALLSYMS_ALL=y
+CONFIG_EMBEDDED=y
+# CONFIG_SLUB_DEBUG is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_KPROBES=y
+CONFIG_MODULES=y
+# CONFIG_LBDAF is not set
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_IOSCHED_DEADLINE is not set
+# CONFIG_IOSCHED_CFQ is not set
+CONFIG_ARC_PLAT_SIM=y
+CONFIG_ISA_ARCV2=y
+CONFIG_ARC_BUILTIN_DTB_NAME="nsimosci_hs"
+# CONFIG_COMPACTION is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_UNIX_DIAG=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+# CONFIG_IPV6 is not set
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_FIRMWARE_IN_KERNEL is not set
+# CONFIG_BLK_DEV is not set
+CONFIG_NETDEVICES=y
+CONFIG_NET_OSCI_LAN=y
+CONFIG_INPUT_EVDEV=y
+# CONFIG_MOUSE_PS2_ALPS is not set
+# CONFIG_MOUSE_PS2_LOGIPS2PP is not set
+# CONFIG_MOUSE_PS2_SYNAPTICS is not set
+# CONFIG_MOUSE_PS2_TRACKPOINT is not set
+CONFIG_MOUSE_PS2_TOUCHKIT=y
+# CONFIG_SERIO_SERPORT is not set
+CONFIG_SERIO_ARC_PS2=y
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_DEVKMEM is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=1
+CONFIG_SERIAL_8250_RUNTIME_UARTS=1
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+CONFIG_FB=y
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_LOGO=y
+# CONFIG_HID is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_TMPFS=y
+# CONFIG_MISC_FILESYSTEMS is not set
+CONFIG_NFS_FS=y
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
+# CONFIG_ENABLE_MUST_CHECK is not set
diff --git a/arch/arc/configs/nsimosci_hs_smp_defconfig b/arch/arc/configs/nsimosci_hs_smp_defconfig
new file mode 100644
index 000000000000..51784837daae
--- /dev/null
+++ b/arch/arc/configs/nsimosci_hs_smp_defconfig
@@ -0,0 +1,93 @@
+CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
+CONFIG_DEFAULT_HOSTNAME="ARCLinux"
+# CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+# CONFIG_UTS_NS is not set
+# CONFIG_PID_NS is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE="../arc_initramfs_hs/"
+# CONFIG_COMPAT_BRK is not set
+CONFIG_KPROBES=y
+CONFIG_MODULES=y
+# CONFIG_LBDAF is not set
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_IOSCHED_DEADLINE is not set
+# CONFIG_IOSCHED_CFQ is not set
+CONFIG_ARC_PLAT_SIM=y
+CONFIG_ARC_BOARD_ML509=y
+CONFIG_ISA_ARCV2=y
+CONFIG_SMP=y
+CONFIG_ARC_HAS_LL64=y
+# CONFIG_ARC_HAS_RTSC is not set
+CONFIG_ARC_BUILTIN_DTB_NAME="nsimosci_hs_idu"
+CONFIG_PREEMPT=y
+# CONFIG_COMPACTION is not set
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_PACKET_DIAG=y
+CONFIG_UNIX=y
+CONFIG_UNIX_DIAG=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_INET_LRO is not set
+# CONFIG_IPV6 is not set
+# CONFIG_WIRELESS is not set
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_FIRMWARE_IN_KERNEL is not set
+# CONFIG_BLK_DEV is not set
+CONFIG_NETDEVICES=y
+# CONFIG_NET_VENDOR_ARC is not set
+# CONFIG_NET_CADENCE is not set
+# CONFIG_NET_VENDOR_BROADCOM is not set
+# CONFIG_NET_VENDOR_INTEL is not set
+# CONFIG_NET_VENDOR_MARVELL is not set
+# CONFIG_NET_VENDOR_MICREL is not set
+# CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_SEEQ is not set
+# CONFIG_NET_VENDOR_STMICRO is not set
+# CONFIG_NET_VENDOR_VIA is not set
+# CONFIG_NET_VENDOR_WIZNET is not set
+CONFIG_NET_OSCI_LAN=y
+# CONFIG_WLAN is not set
+CONFIG_INPUT_EVDEV=y
+CONFIG_MOUSE_PS2_TOUCHKIT=y
+# CONFIG_SERIO_SERPORT is not set
+CONFIG_SERIO_LIBPS2=y
+CONFIG_SERIO_ARC_PS2=y
+CONFIG_VT_HW_CONSOLE_BINDING=y
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_DEVKMEM is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=1
+CONFIG_SERIAL_8250_RUNTIME_UARTS=1
+CONFIG_SERIAL_8250_DW=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+CONFIG_FB=y
+CONFIG_ARCPGU_RGB888=y
+CONFIG_ARCPGU_DISPTYPE=0
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_LOGO=y
+# CONFIG_HID is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_TMPFS=y
+# CONFIG_MISC_FILESYSTEMS is not set
+CONFIG_NFS_FS=y
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_FTRACE=y
diff --git a/arch/arc/configs/tb10x_defconfig b/arch/arc/configs/tb10x_defconfig
index 6be6492442d6..3b4dc9cebcf1 100644
--- a/arch/arc/configs/tb10x_defconfig
+++ b/arch/arc/configs/tb10x_defconfig
@@ -1,4 +1,4 @@
-CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
+CONFIG_CROSS_COMPILE="arc-linux-"
# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_DEFAULT_HOSTNAME="tb10x"
CONFIG_SYSVIPC=y
@@ -26,7 +26,6 @@ CONFIG_MODULE_UNLOAD=y
# CONFIG_BLOCK is not set
CONFIG_ARC_PLAT_TB10X=y
CONFIG_ARC_CACHE_LINE_SHIFT=5
-# CONFIG_ARC_HAS_RTSC is not set
CONFIG_ARC_STACK_NONEXEC=y
CONFIG_HZ=250
CONFIG_ARC_BUILTIN_DTB_NAME="abilis_tb100_dvk"
diff --git a/arch/arc/configs/vdk_hs38_defconfig b/arch/arc/configs/vdk_hs38_defconfig
new file mode 100644
index 000000000000..ef35ef3923dd
--- /dev/null
+++ b/arch/arc/configs/vdk_hs38_defconfig
@@ -0,0 +1,102 @@
+CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_DEFAULT_HOSTNAME="ARCLinux"
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EMBEDDED=y
+CONFIG_PERF_EVENTS=y
+# CONFIG_VM_EVENT_COUNTERS is not set
+# CONFIG_SLUB_DEBUG is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_ARC_PLAT_AXS10X=y
+CONFIG_AXS103=y
+CONFIG_ISA_ARCV2=y
+CONFIG_ARC_UBOOT_SUPPORT=y
+CONFIG_ARC_BUILTIN_DTB_NAME="vdk_hs38"
+CONFIG_PREEMPT=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_IP_PNP_RARP=y
+# CONFIG_IPV6 is not set
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_FIRMWARE_IN_KERNEL is not set
+CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_SLRAM=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_NETDEVICES=y
+# CONFIG_NET_VENDOR_ARC is not set
+# CONFIG_NET_VENDOR_BROADCOM is not set
+# CONFIG_NET_VENDOR_INTEL is not set
+# CONFIG_NET_VENDOR_MARVELL is not set
+# CONFIG_NET_VENDOR_MICREL is not set
+# CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_SEEQ is not set
+CONFIG_STMMAC_ETH=y
+# CONFIG_NET_VENDOR_VIA is not set
+# CONFIG_NET_VENDOR_WIZNET is not set
+CONFIG_NATIONAL_PHY=y
+CONFIG_MOUSE_PS2_TOUCHKIT=y
+CONFIG_SERIO_ARC_PS2=y
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_DEVKMEM is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_DW=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+CONFIG_FB=y
+CONFIG_ARCPGU_RGB888=y
+CONFIG_ARCPGU_DISPTYPE=0
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
+CONFIG_LOGO=y
+# CONFIG_LOGO_LINUX_MONO is not set
+# CONFIG_LOGO_LINUX_VGA16 is not set
+# CONFIG_LOGO_LINUX_CLUT224 is not set
+CONFIG_USB=y
+CONFIG_USB_EHCI_HCD=y
+# CONFIG_USB_EHCI_TT_NEWSCHED is not set
+CONFIG_USB_EHCI_HCD_PLATFORM=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_OHCI_HCD_PLATFORM=y
+CONFIG_USB_STORAGE=y
+CONFIG_USB_SERIAL=y
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_NTFS_FS=y
+CONFIG_TMPFS=y
+CONFIG_JFFS2_FS=y
+CONFIG_NFS_FS=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ISO8859_1=y
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_STRIP_ASM_SYMS=y
+CONFIG_DEBUG_SHIRQ=y
+CONFIG_LOCKUP_DETECTOR=y
+CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=10
+# CONFIG_SCHED_DEBUG is not set
+# CONFIG_DEBUG_PREEMPT is not set
+# CONFIG_FTRACE is not set
diff --git a/arch/arc/configs/vdk_hs38_smp_defconfig b/arch/arc/configs/vdk_hs38_smp_defconfig
new file mode 100644
index 000000000000..634509e5e572
--- /dev/null
+++ b/arch/arc/configs/vdk_hs38_smp_defconfig
@@ -0,0 +1,104 @@
+CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_DEFAULT_HOSTNAME="ARCLinux"
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EMBEDDED=y
+CONFIG_PERF_EVENTS=y
+# CONFIG_VM_EVENT_COUNTERS is not set
+# CONFIG_SLUB_DEBUG is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_ARC_PLAT_AXS10X=y
+CONFIG_AXS103=y
+CONFIG_ISA_ARCV2=y
+CONFIG_SMP=y
+# CONFIG_ARC_HAS_GRTC is not set
+CONFIG_ARC_UBOOT_SUPPORT=y
+CONFIG_ARC_BUILTIN_DTB_NAME="vdk_hs38_smp"
+CONFIG_PREEMPT=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_IP_PNP_RARP=y
+# CONFIG_IPV6 is not set
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_FIRMWARE_IN_KERNEL is not set
+CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_SLRAM=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_NETDEVICES=y
+# CONFIG_NET_VENDOR_ARC is not set
+# CONFIG_NET_VENDOR_BROADCOM is not set
+# CONFIG_NET_VENDOR_INTEL is not set
+# CONFIG_NET_VENDOR_MARVELL is not set
+# CONFIG_NET_VENDOR_MICREL is not set
+# CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_SEEQ is not set
+CONFIG_STMMAC_ETH=y
+# CONFIG_NET_VENDOR_VIA is not set
+# CONFIG_NET_VENDOR_WIZNET is not set
+CONFIG_NATIONAL_PHY=y
+CONFIG_MOUSE_PS2_TOUCHKIT=y
+CONFIG_SERIO_ARC_PS2=y
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_DEVKMEM is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_DW=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+CONFIG_FB=y
+CONFIG_ARCPGU_RGB888=y
+CONFIG_ARCPGU_DISPTYPE=0
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
+CONFIG_LOGO=y
+# CONFIG_LOGO_LINUX_MONO is not set
+# CONFIG_LOGO_LINUX_VGA16 is not set
+# CONFIG_LOGO_LINUX_CLUT224 is not set
+CONFIG_USB=y
+CONFIG_USB_EHCI_HCD=y
+# CONFIG_USB_EHCI_TT_NEWSCHED is not set
+CONFIG_USB_EHCI_HCD_PLATFORM=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_OHCI_HCD_PLATFORM=y
+CONFIG_USB_STORAGE=y
+CONFIG_USB_SERIAL=y
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_NTFS_FS=y
+CONFIG_TMPFS=y
+CONFIG_JFFS2_FS=y
+CONFIG_NFS_FS=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ISO8859_1=y
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_STRIP_ASM_SYMS=y
+CONFIG_DEBUG_SHIRQ=y
+CONFIG_LOCKUP_DETECTOR=y
+CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=10
+# CONFIG_SCHED_DEBUG is not set
+# CONFIG_DEBUG_PREEMPT is not set
+# CONFIG_FTRACE is not set
diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild
index 769b312c1abb..1a80cc91a03b 100644
--- a/arch/arc/include/asm/Kbuild
+++ b/arch/arc/include/asm/Kbuild
@@ -1,5 +1,4 @@
generic-y += auxvec.h
-generic-y += barrier.h
generic-y += bitsperlong.h
generic-y += bugs.h
generic-y += clkdev.h
diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h
index e2b1b1211b0d..070f58827a5c 100644
--- a/arch/arc/include/asm/arcregs.h
+++ b/arch/arc/include/asm/arcregs.h
@@ -16,6 +16,8 @@
#define ARC_REG_PERIBASE_BCR 0x69
#define ARC_REG_FP_BCR 0x6B /* ARCompact: Single-Precision FPU */
#define ARC_REG_DPFP_BCR 0x6C /* ARCompact: Dbl Precision FPU */
+#define ARC_REG_FP_V2_BCR 0xc8 /* ARCv2 FPU */
+#define ARC_REG_SLC_BCR 0xce
#define ARC_REG_DCCM_BCR 0x74 /* DCCM Present + SZ */
#define ARC_REG_TIMERS_BCR 0x75
#define ARC_REG_AP_BCR 0x76
@@ -31,6 +33,7 @@
#define ARC_REG_BPU_BCR 0xc0
#define ARC_REG_ISA_CFG_BCR 0xc1
#define ARC_REG_RTT_BCR 0xF2
+#define ARC_REG_IRQ_BCR 0xF3
#define ARC_REG_SMART_BCR 0xFF
/* status32 Bits Positions */
@@ -51,6 +54,7 @@
* [15: 8] = Exception Cause Code
* [ 7: 0] = Exception Parameters (for certain types only)
*/
+#ifdef CONFIG_ISA_ARCOMPACT
#define ECR_V_MEM_ERR 0x01
#define ECR_V_INSN_ERR 0x02
#define ECR_V_MACH_CHK 0x20
@@ -58,6 +62,15 @@
#define ECR_V_DTLB_MISS 0x22
#define ECR_V_PROTV 0x23
#define ECR_V_TRAP 0x25
+#else
+#define ECR_V_MEM_ERR 0x01
+#define ECR_V_INSN_ERR 0x02
+#define ECR_V_MACH_CHK 0x03
+#define ECR_V_ITLB_MISS 0x04
+#define ECR_V_DTLB_MISS 0x05
+#define ECR_V_PROTV 0x06
+#define ECR_V_TRAP 0x09
+#endif
/* DTLB Miss and Protection Violation Cause Codes */
@@ -76,9 +89,6 @@
#define ECR_C_BIT_DTLB_LD_MISS 8
#define ECR_C_BIT_DTLB_ST_MISS 9
-/* Dummy ECR values for Interrupts */
-#define event_IRQ1 0x0031abcd
-#define event_IRQ2 0x0032abcd
/* Auxiliary registers */
#define AUX_IDENTITY 4
@@ -204,9 +214,11 @@ struct bcr_identity {
struct bcr_isa {
#ifdef CONFIG_CPU_BIG_ENDIAN
- unsigned int pad1:23, atomic1:1, ver:8;
+ unsigned int div_rem:4, pad2:4, ldd:1, unalign:1, atomic:1, be:1,
+ pad1:11, atomic1:1, ver:8;
#else
- unsigned int ver:8, atomic1:1, pad1:23;
+ unsigned int ver:8, atomic1:1, pad1:11, be:1, atomic:1, unalign:1,
+ ldd:1, pad2:4, div_rem:4;
#endif
};
@@ -269,11 +281,19 @@ struct bcr_fp_arcompact {
#endif
};
+struct bcr_fp_arcv2 {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+ unsigned int pad2:15, dp:1, pad1:7, sp:1, ver:8;
+#else
+ unsigned int ver:8, sp:1, pad1:7, dp:1, pad2:15;
+#endif
+};
+
struct bcr_timer {
#ifdef CONFIG_CPU_BIG_ENDIAN
- unsigned int pad2:15, rtsc:1, pad1:6, t1:1, t0:1, ver:8;
+ unsigned int pad2:15, rtsc:1, pad1:5, rtc:1, t1:1, t0:1, ver:8;
#else
- unsigned int ver:8, t0:1, t1:1, pad1:6, rtsc:1, pad2:15;
+ unsigned int ver:8, t0:1, t1:1, rtc:1, pad1:5, rtsc:1, pad2:15;
#endif
};
@@ -285,6 +305,14 @@ struct bcr_bpu_arcompact {
#endif
};
+struct bcr_bpu_arcv2 {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+ unsigned int pad:6, fbe:2, tqe:2, ts:4, ft:1, rse:2, pte:3, bce:3, ver:8;
+#else
+ unsigned int ver:8, bce:3, pte:3, rse:2, ft:1, ts:4, tqe:2, fbe:2, pad:6;
+#endif
+};
+
struct bcr_generic {
#ifdef CONFIG_CPU_BIG_ENDIAN
unsigned int pad:24, ver:8;
@@ -299,11 +327,12 @@ struct bcr_generic {
*/
struct cpuinfo_arc_mmu {
- unsigned int ver, pg_sz, sets, ways, u_dtlb, u_itlb, num_tlb;
+ unsigned int ver:4, pg_sz_k:8, s_pg_sz_m:8, u_dtlb:6, u_itlb:6;
+ unsigned int num_tlb:16, sets:12, ways:4;
};
struct cpuinfo_arc_cache {
- unsigned int sz_k:8, line_len:8, assoc:4, ver:4, alias:1, vipt:1, pad:6;
+ unsigned int sz_k:14, line_len:8, assoc:4, ver:4, alias:1, vipt:1;
};
struct cpuinfo_arc_bpu {
@@ -315,14 +344,13 @@ struct cpuinfo_arc_ccm {
};
struct cpuinfo_arc {
- struct cpuinfo_arc_cache icache, dcache;
+ struct cpuinfo_arc_cache icache, dcache, slc;
struct cpuinfo_arc_mmu mmu;
struct cpuinfo_arc_bpu bpu;
struct bcr_identity core;
struct bcr_isa isa;
struct bcr_timer timers;
unsigned int vec_base;
- unsigned int uncached_base;
struct cpuinfo_arc_ccm iccm, dccm;
struct {
unsigned int swap:1, norm:1, minmax:1, barrel:1, crc:1, pad1:3,
@@ -336,6 +364,22 @@ struct cpuinfo_arc {
extern struct cpuinfo_arc cpuinfo_arc700[];
+static inline int is_isa_arcv2(void)
+{
+ return IS_ENABLED(CONFIG_ISA_ARCV2);
+}
+
+static inline int is_isa_arcompact(void)
+{
+ return IS_ENABLED(CONFIG_ISA_ARCOMPACT);
+}
+
+#if defined(CONFIG_ISA_ARCOMPACT) && !defined(_CPU_DEFAULT_A7)
+#error "Toolchain not configured for ARCompact builds"
+#elif defined(CONFIG_ISA_ARCV2) && !defined(_CPU_DEFAULT_HS)
+#error "Toolchain not configured for ARCv2 builds"
+#endif
+
#endif /* __ASEMBLY__ */
#endif /* _ASM_ARC_ARCREGS_H */
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index 9917a45fc430..03484cb4d16d 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -23,13 +23,21 @@
#define atomic_set(v, i) (((v)->counter) = (i))
+#ifdef CONFIG_ISA_ARCV2
+#define PREFETCHW " prefetchw [%1] \n"
+#else
+#define PREFETCHW
+#endif
+
#define ATOMIC_OP(op, c_op, asm_op) \
static inline void atomic_##op(int i, atomic_t *v) \
{ \
unsigned int temp; \
\
__asm__ __volatile__( \
- "1: llock %0, [%1] \n" \
+ "1: \n" \
+ PREFETCHW \
+ " llock %0, [%1] \n" \
" " #asm_op " %0, %0, %2 \n" \
" scond %0, [%1] \n" \
" bnz 1b \n" \
@@ -43,8 +51,16 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \
{ \
unsigned int temp; \
\
+ /* \
+ * Explicit full memory barrier needed before/after as \
+ * LLOCK/SCOND thmeselves don't provide any such semantics \
+ */ \
+ smp_mb(); \
+ \
__asm__ __volatile__( \
- "1: llock %0, [%1] \n" \
+ "1: \n" \
+ PREFETCHW \
+ " llock %0, [%1] \n" \
" " #asm_op " %0, %0, %2 \n" \
" scond %0, [%1] \n" \
" bnz 1b \n" \
@@ -52,6 +68,8 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \
: "r"(&v->counter), "ir"(i) \
: "cc"); \
\
+ smp_mb(); \
+ \
return temp; \
}
@@ -105,6 +123,9 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \
unsigned long flags; \
unsigned long temp; \
\
+ /* \
+ * spin lock/unlock provides the needed smp_mb() before/after \
+ */ \
atomic_ops_lock(flags); \
temp = v->counter; \
temp c_op i; \
@@ -142,9 +163,19 @@ ATOMIC_OP(and, &=, and)
#define __atomic_add_unless(v, a, u) \
({ \
int c, old; \
+ \
+ /* \
+ * Explicit full memory barrier needed before/after as \
+ * LLOCK/SCOND thmeselves don't provide any such semantics \
+ */ \
+ smp_mb(); \
+ \
c = atomic_read(v); \
while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c)\
c = old; \
+ \
+ smp_mb(); \
+ \
c; \
})
diff --git a/arch/arc/include/asm/barrier.h b/arch/arc/include/asm/barrier.h
new file mode 100644
index 000000000000..a7209983ee64
--- /dev/null
+++ b/arch/arc/include/asm/barrier.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ASM_BARRIER_H
+#define __ASM_BARRIER_H
+
+#ifdef CONFIG_ISA_ARCV2
+
+/*
+ * ARCv2 based HS38 cores are in-order issue, but still weakly ordered
+ * due to micro-arch buffering/queuing of load/store, cache hit vs. miss ...
+ *
+ * Explicit barrier provided by DMB instruction
+ * - Operand supports fine grained load/store/load+store semantics
+ * - Ensures that selected memory operation issued before it will complete
+ * before any subsequent memory operation of same type
+ * - DMB guarantees SMP as well as local barrier semantics
+ * (asm-generic/barrier.h ensures sane smp_*mb if not defined here, i.e.
+ * UP: barrier(), SMP: smp_*mb == *mb)
+ * - DSYNC provides DMB+completion_of_cache_bpu_maintenance_ops hence not needed
+ * in the general case. Plus it only provides full barrier.
+ */
+
+#define mb() asm volatile("dmb 3\n" : : : "memory")
+#define rmb() asm volatile("dmb 1\n" : : : "memory")
+#define wmb() asm volatile("dmb 2\n" : : : "memory")
+
+#endif
+
+#ifdef CONFIG_ISA_ARCOMPACT
+
+/*
+ * ARCompact based cores (ARC700) only have SYNC instruction which is super
+ * heavy weight as it flushes the pipeline as well.
+ * There are no real SMP implementations of such cores.
+ */
+
+#define mb() asm volatile("sync\n" : : : "memory")
+#endif
+
+#include <asm-generic/barrier.h>
+
+#endif
diff --git a/arch/arc/include/asm/bitops.h b/arch/arc/include/asm/bitops.h
index 4051e9525939..99fe118d3730 100644
--- a/arch/arc/include/asm/bitops.h
+++ b/arch/arc/include/asm/bitops.h
@@ -18,83 +18,50 @@
#include <linux/types.h>
#include <linux/compiler.h>
#include <asm/barrier.h>
+#ifndef CONFIG_ARC_HAS_LLSC
+#include <asm/smp.h>
+#endif
-/*
- * Hardware assisted read-modify-write using ARC700 LLOCK/SCOND insns.
- * The Kconfig glue ensures that in SMP, this is only set if the container
- * SoC/platform has cross-core coherent LLOCK/SCOND
- */
#if defined(CONFIG_ARC_HAS_LLSC)
-static inline void set_bit(unsigned long nr, volatile unsigned long *m)
-{
- unsigned int temp;
-
- m += nr >> 5;
-
- /*
- * ARC ISA micro-optimization:
- *
- * Instructions dealing with bitpos only consider lower 5 bits (0-31)
- * e.g (x << 33) is handled like (x << 1) by ASL instruction
- * (mem pointer still needs adjustment to point to next word)
- *
- * Hence the masking to clamp @nr arg can be elided in general.
- *
- * However if @nr is a constant (above assumed it in a register),
- * and greater than 31, gcc can optimize away (x << 33) to 0,
- * as overflow, given the 32-bit ISA. Thus masking needs to be done
- * for constant @nr, but no code is generated due to const prop.
- */
- if (__builtin_constant_p(nr))
- nr &= 0x1f;
-
- __asm__ __volatile__(
- "1: llock %0, [%1] \n"
- " bset %0, %0, %2 \n"
- " scond %0, [%1] \n"
- " bnz 1b \n"
- : "=&r"(temp)
- : "r"(m), "ir"(nr)
- : "cc");
-}
-
-static inline void clear_bit(unsigned long nr, volatile unsigned long *m)
-{
- unsigned int temp;
-
- m += nr >> 5;
-
- if (__builtin_constant_p(nr))
- nr &= 0x1f;
-
- __asm__ __volatile__(
- "1: llock %0, [%1] \n"
- " bclr %0, %0, %2 \n"
- " scond %0, [%1] \n"
- " bnz 1b \n"
- : "=&r"(temp)
- : "r"(m), "ir"(nr)
- : "cc");
-}
-
-static inline void change_bit(unsigned long nr, volatile unsigned long *m)
-{
- unsigned int temp;
-
- m += nr >> 5;
-
- if (__builtin_constant_p(nr))
- nr &= 0x1f;
+/*
+ * Hardware assisted Atomic-R-M-W
+ */
- __asm__ __volatile__(
- "1: llock %0, [%1] \n"
- " bxor %0, %0, %2 \n"
- " scond %0, [%1] \n"
- " bnz 1b \n"
- : "=&r"(temp)
- : "r"(m), "ir"(nr)
- : "cc");
+#define BIT_OP(op, c_op, asm_op) \
+static inline void op##_bit(unsigned long nr, volatile unsigned long *m)\
+{ \
+ unsigned int temp; \
+ \
+ m += nr >> 5; \
+ \
+ /* \
+ * ARC ISA micro-optimization: \
+ * \
+ * Instructions dealing with bitpos only consider lower 5 bits \
+ * e.g (x << 33) is handled like (x << 1) by ASL instruction \
+ * (mem pointer still needs adjustment to point to next word) \
+ * \
+ * Hence the masking to clamp @nr arg can be elided in general. \
+ * \
+ * However if @nr is a constant (above assumed in a register), \
+ * and greater than 31, gcc can optimize away (x << 33) to 0, \
+ * as overflow, given the 32-bit ISA. Thus masking needs to be \
+ * done for const @nr, but no code is generated due to gcc \
+ * const prop. \
+ */ \
+ if (__builtin_constant_p(nr)) \
+ nr &= 0x1f; \
+ \
+ __asm__ __volatile__( \
+ "1: llock %0, [%1] \n" \
+ " " #asm_op " %0, %0, %2 \n" \
+ " scond %0, [%1] \n" \
+ " bnz 1b \n" \
+ : "=&r"(temp) /* Early clobber, to prevent reg reuse */ \
+ : "r"(m), /* Not "m": llock only supports reg direct addr mode */ \
+ "ir"(nr) \
+ : "cc"); \
}
/*
@@ -108,75 +75,38 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *m)
* Since ARC lacks a equivalent h/w primitive, the bit is set unconditionally
* and the old value of bit is returned
*/
-static inline int test_and_set_bit(unsigned long nr, volatile unsigned long *m)
-{
- unsigned long old, temp;
-
- m += nr >> 5;
-
- if (__builtin_constant_p(nr))
- nr &= 0x1f;
-
- __asm__ __volatile__(
- "1: llock %0, [%2] \n"
- " bset %1, %0, %3 \n"
- " scond %1, [%2] \n"
- " bnz 1b \n"
- : "=&r"(old), "=&r"(temp)
- : "r"(m), "ir"(nr)
- : "cc");
-
- return (old & (1 << nr)) != 0;
-}
-
-static inline int
-test_and_clear_bit(unsigned long nr, volatile unsigned long *m)
-{
- unsigned int old, temp;
-
- m += nr >> 5;
-
- if (__builtin_constant_p(nr))
- nr &= 0x1f;
-
- __asm__ __volatile__(
- "1: llock %0, [%2] \n"
- " bclr %1, %0, %3 \n"
- " scond %1, [%2] \n"
- " bnz 1b \n"
- : "=&r"(old), "=&r"(temp)
- : "r"(m), "ir"(nr)
- : "cc");
-
- return (old & (1 << nr)) != 0;
-}
-
-static inline int
-test_and_change_bit(unsigned long nr, volatile unsigned long *m)
-{
- unsigned int old, temp;
-
- m += nr >> 5;
-
- if (__builtin_constant_p(nr))
- nr &= 0x1f;
-
- __asm__ __volatile__(
- "1: llock %0, [%2] \n"
- " bxor %1, %0, %3 \n"
- " scond %1, [%2] \n"
- " bnz 1b \n"
- : "=&r"(old), "=&r"(temp)
- : "r"(m), "ir"(nr)
- : "cc");
-
- return (old & (1 << nr)) != 0;
+#define TEST_N_BIT_OP(op, c_op, asm_op) \
+static inline int test_and_##op##_bit(unsigned long nr, volatile unsigned long *m)\
+{ \
+ unsigned long old, temp; \
+ \
+ m += nr >> 5; \
+ \
+ if (__builtin_constant_p(nr)) \
+ nr &= 0x1f; \
+ \
+ /* \
+ * Explicit full memory barrier needed before/after as \
+ * LLOCK/SCOND themselves don't provide any such smenatic \
+ */ \
+ smp_mb(); \
+ \
+ __asm__ __volatile__( \
+ "1: llock %0, [%2] \n" \
+ " " #asm_op " %1, %0, %3 \n" \
+ " scond %1, [%2] \n" \
+ " bnz 1b \n" \
+ : "=&r"(old), "=&r"(temp) \
+ : "r"(m), "ir"(nr) \
+ : "cc"); \
+ \
+ smp_mb(); \
+ \
+ return (old & (1 << nr)) != 0; \
}
#else /* !CONFIG_ARC_HAS_LLSC */
-#include <asm/smp.h>
-
/*
* Non hardware assisted Atomic-R-M-W
* Locking would change to irq-disabling only (UP) and spinlocks (SMP)
@@ -193,108 +123,43 @@ test_and_change_bit(unsigned long nr, volatile unsigned long *m)
* at compile time)
*/
-static inline void set_bit(unsigned long nr, volatile unsigned long *m)
-{
- unsigned long temp, flags;
- m += nr >> 5;
-
- if (__builtin_constant_p(nr))
- nr &= 0x1f;
-
- bitops_lock(flags);
-
- temp = *m;
- *m = temp | (1UL << nr);
-
- bitops_unlock(flags);
-}
-
-static inline void clear_bit(unsigned long nr, volatile unsigned long *m)
-{
- unsigned long temp, flags;
- m += nr >> 5;
-
- if (__builtin_constant_p(nr))
- nr &= 0x1f;
-
- bitops_lock(flags);
-
- temp = *m;
- *m = temp & ~(1UL << nr);
-
- bitops_unlock(flags);
-}
-
-static inline void change_bit(unsigned long nr, volatile unsigned long *m)
-{
- unsigned long temp, flags;
- m += nr >> 5;
-
- if (__builtin_constant_p(nr))
- nr &= 0x1f;
-
- bitops_lock(flags);
-
- temp = *m;
- *m = temp ^ (1UL << nr);
-
- bitops_unlock(flags);
-}
-
-static inline int test_and_set_bit(unsigned long nr, volatile unsigned long *m)
-{
- unsigned long old, flags;
- m += nr >> 5;
-
- if (__builtin_constant_p(nr))
- nr &= 0x1f;
-
- bitops_lock(flags);
-
- old = *m;
- *m = old | (1 << nr);
-
- bitops_unlock(flags);
-
- return (old & (1 << nr)) != 0;
-}
-
-static inline int
-test_and_clear_bit(unsigned long nr, volatile unsigned long *m)
-{
- unsigned long old, flags;
- m += nr >> 5;
-
- if (__builtin_constant_p(nr))
- nr &= 0x1f;
-
- bitops_lock(flags);
-
- old = *m;
- *m = old & ~(1 << nr);
-
- bitops_unlock(flags);
-
- return (old & (1 << nr)) != 0;
+#define BIT_OP(op, c_op, asm_op) \
+static inline void op##_bit(unsigned long nr, volatile unsigned long *m)\
+{ \
+ unsigned long temp, flags; \
+ m += nr >> 5; \
+ \
+ if (__builtin_constant_p(nr)) \
+ nr &= 0x1f; \
+ \
+ /* \
+ * spin lock/unlock provide the needed smp_mb() before/after \
+ */ \
+ bitops_lock(flags); \
+ \
+ temp = *m; \
+ *m = temp c_op (1UL << nr); \
+ \
+ bitops_unlock(flags); \
}
-static inline int
-test_and_change_bit(unsigned long nr, volatile unsigned long *m)
-{
- unsigned long old, flags;
- m += nr >> 5;
-
- if (__builtin_constant_p(nr))
- nr &= 0x1f;
-
- bitops_lock(flags);
-
- old = *m;
- *m = old ^ (1 << nr);
-
- bitops_unlock(flags);
-
- return (old & (1 << nr)) != 0;
+#define TEST_N_BIT_OP(op, c_op, asm_op) \
+static inline int test_and_##op##_bit(unsigned long nr, volatile unsigned long *m)\
+{ \
+ unsigned long old, flags; \
+ m += nr >> 5; \
+ \
+ if (__builtin_constant_p(nr)) \
+ nr &= 0x1f; \
+ \
+ bitops_lock(flags); \
+ \
+ old = *m; \
+ *m = old c_op (1 << nr); \
+ \
+ bitops_unlock(flags); \
+ \
+ return (old & (1 << nr)) != 0; \
}
#endif /* CONFIG_ARC_HAS_LLSC */
@@ -303,86 +168,51 @@ test_and_change_bit(unsigned long nr, volatile unsigned long *m)
* Non atomic variants
**************************************/
-static inline void __set_bit(unsigned long nr, volatile unsigned long *m)
-{
- unsigned long temp;
- m += nr >> 5;
-
- if (__builtin_constant_p(nr))
- nr &= 0x1f;
-
- temp = *m;
- *m = temp | (1UL << nr);
-}
-
-static inline void __clear_bit(unsigned long nr, volatile unsigned long *m)
-{
- unsigned long temp;
- m += nr >> 5;
-
- if (__builtin_constant_p(nr))
- nr &= 0x1f;
-
- temp = *m;
- *m = temp & ~(1UL << nr);
-}
-
-static inline void __change_bit(unsigned long nr, volatile unsigned long *m)
-{
- unsigned long temp;
- m += nr >> 5;
-
- if (__builtin_constant_p(nr))
- nr &= 0x1f;
-
- temp = *m;
- *m = temp ^ (1UL << nr);
-}
-
-static inline int
-__test_and_set_bit(unsigned long nr, volatile unsigned long *m)
-{
- unsigned long old;
- m += nr >> 5;
-
- if (__builtin_constant_p(nr))
- nr &= 0x1f;
-
- old = *m;
- *m = old | (1 << nr);
-
- return (old & (1 << nr)) != 0;
+#define __BIT_OP(op, c_op, asm_op) \
+static inline void __##op##_bit(unsigned long nr, volatile unsigned long *m) \
+{ \
+ unsigned long temp; \
+ m += nr >> 5; \
+ \
+ if (__builtin_constant_p(nr)) \
+ nr &= 0x1f; \
+ \
+ temp = *m; \
+ *m = temp c_op (1UL << nr); \
}
-static inline int
-__test_and_clear_bit(unsigned long nr, volatile unsigned long *m)
-{
- unsigned long old;
- m += nr >> 5;
-
- if (__builtin_constant_p(nr))
- nr &= 0x1f;
-
- old = *m;
- *m = old & ~(1 << nr);
-
- return (old & (1 << nr)) != 0;
+#define __TEST_N_BIT_OP(op, c_op, asm_op) \
+static inline int __test_and_##op##_bit(unsigned long nr, volatile unsigned long *m)\
+{ \
+ unsigned long old; \
+ m += nr >> 5; \
+ \
+ if (__builtin_constant_p(nr)) \
+ nr &= 0x1f; \
+ \
+ old = *m; \
+ *m = old c_op (1 << nr); \
+ \
+ return (old & (1 << nr)) != 0; \
}
-static inline int
-__test_and_change_bit(unsigned long nr, volatile unsigned long *m)
-{
- unsigned long old;
- m += nr >> 5;
-
- if (__builtin_constant_p(nr))
- nr &= 0x1f;
-
- old = *m;
- *m = old ^ (1 << nr);
-
- return (old & (1 << nr)) != 0;
-}
+#define BIT_OPS(op, c_op, asm_op) \
+ \
+ /* set_bit(), clear_bit(), change_bit() */ \
+ BIT_OP(op, c_op, asm_op) \
+ \
+ /* test_and_set_bit(), test_and_clear_bit(), test_and_change_bit() */\
+ TEST_N_BIT_OP(op, c_op, asm_op) \
+ \
+ /* __set_bit(), __clear_bit(), __change_bit() */ \
+ __BIT_OP(op, c_op, asm_op) \
+ \
+ /* __test_and_set_bit(), __test_and_clear_bit(), __test_and_change_bit() */\
+ __TEST_N_BIT_OP(op, c_op, asm_op)
+
+BIT_OPS(set, |, bset)
+BIT_OPS(clear, & ~, bclr)
+BIT_OPS(change, ^, bxor)
/*
* This routine doesn't need to be atomic.
@@ -402,6 +232,8 @@ test_bit(unsigned int nr, const volatile unsigned long *addr)
return ((mask & *addr) != 0);
}
+#ifdef CONFIG_ISA_ARCOMPACT
+
/*
* Count the number of zeros, starting from MSB
* Helper for fls( ) friends
@@ -494,6 +326,75 @@ static inline __attribute__ ((const)) int __ffs(unsigned long word)
return ffs(word) - 1;
}
+#else /* CONFIG_ISA_ARCV2 */
+
+/*
+ * fls = Find Last Set in word
+ * @result: [1-32]
+ * fls(1) = 1, fls(0x80000000) = 32, fls(0) = 0
+ */
+static inline __attribute__ ((const)) int fls(unsigned long x)
+{
+ int n;
+
+ asm volatile(
+ " fls.f %0, %1 \n" /* 0:31; 0(Z) if src 0 */
+ " add.nz %0, %0, 1 \n" /* 0:31 -> 1:32 */
+ : "=r"(n) /* Early clobber not needed */
+ : "r"(x)
+ : "cc");
+
+ return n;
+}
+
+/*
+ * __fls: Similar to fls, but zero based (0-31). Also 0 if no bit set
+ */
+static inline __attribute__ ((const)) int __fls(unsigned long x)
+{
+ /* FLS insn has exactly same semantics as the API */
+ return __builtin_arc_fls(x);
+}
+
+/*
+ * ffs = Find First Set in word (LSB to MSB)
+ * @result: [1-32], 0 if all 0's
+ */
+static inline __attribute__ ((const)) int ffs(unsigned long x)
+{
+ int n;
+
+ asm volatile(
+ " ffs.f %0, %1 \n" /* 0:31; 31(Z) if src 0 */
+ " add.nz %0, %0, 1 \n" /* 0:31 -> 1:32 */
+ " mov.z %0, 0 \n" /* 31(Z)-> 0 */
+ : "=r"(n) /* Early clobber not needed */
+ : "r"(x)
+ : "cc");
+
+ return n;
+}
+
+/*
+ * __ffs: Similar to ffs, but zero based (0-31)
+ */
+static inline __attribute__ ((const)) int __ffs(unsigned long x)
+{
+ int n;
+
+ asm volatile(
+ " ffs.f %0, %1 \n" /* 0:31; 31(Z) if src 0 */
+ " mov.z %0, 0 \n" /* 31(Z)-> 0 */
+ : "=r"(n)
+ : "r"(x)
+ : "cc");
+
+ return n;
+
+}
+
+#endif /* CONFIG_ISA_ARCOMPACT */
+
/*
* ffz = Find First Zero in word.
* @return:[0-31], 32 if all 1's
diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h
index 7861255da32d..d67345d3e2d4 100644
--- a/arch/arc/include/asm/cache.h
+++ b/arch/arc/include/asm/cache.h
@@ -60,7 +60,7 @@ extern void read_decode_cache_bcr(void);
#define ARC_REG_IC_IVIC 0x10
#define ARC_REG_IC_CTRL 0x11
#define ARC_REG_IC_IVIL 0x19
-#if defined(CONFIG_ARC_MMU_V3)
+#if defined(CONFIG_ARC_MMU_V3) || defined(CONFIG_ARC_MMU_V4)
#define ARC_REG_IC_PTAG 0x1E
#endif
@@ -74,12 +74,24 @@ extern void read_decode_cache_bcr(void);
#define ARC_REG_DC_IVDL 0x4A
#define ARC_REG_DC_FLSH 0x4B
#define ARC_REG_DC_FLDL 0x4C
-#if defined(CONFIG_ARC_MMU_V3)
#define ARC_REG_DC_PTAG 0x5C
-#endif
/* Bit val in DC_CTRL */
#define DC_CTRL_INV_MODE_FLUSH 0x40
#define DC_CTRL_FLUSH_STATUS 0x100
+/*System-level cache (L2 cache) related Auxiliary registers */
+#define ARC_REG_SLC_CFG 0x901
+#define ARC_REG_SLC_CTRL 0x903
+#define ARC_REG_SLC_FLUSH 0x904
+#define ARC_REG_SLC_INVALIDATE 0x905
+#define ARC_REG_SLC_RGN_START 0x914
+#define ARC_REG_SLC_RGN_END 0x916
+
+/* Bit val in SLC_CONTROL */
+#define SLC_CTRL_IM 0x040
+#define SLC_CTRL_DISABLE 0x001
+#define SLC_CTRL_BUSY 0x100
+#define SLC_CTRL_RGN_OP_INV 0x200
+
#endif /* _ASM_CACHE_H */
diff --git a/arch/arc/include/asm/cacheflush.h b/arch/arc/include/asm/cacheflush.h
index 6abc4972bc93..0992d3dbcc65 100644
--- a/arch/arc/include/asm/cacheflush.h
+++ b/arch/arc/include/asm/cacheflush.h
@@ -34,9 +34,7 @@ void flush_cache_all(void);
void flush_icache_range(unsigned long start, unsigned long end);
void __sync_icache_dcache(unsigned long paddr, unsigned long vaddr, int len);
void __inv_icache_page(unsigned long paddr, unsigned long vaddr);
-void ___flush_dcache_page(unsigned long paddr, unsigned long vaddr);
-#define __flush_dcache_page(p, v) \
- ___flush_dcache_page((unsigned long)p, (unsigned long)v)
+void __flush_dcache_page(unsigned long paddr, unsigned long vaddr);
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
diff --git a/arch/arc/include/asm/cmpxchg.h b/arch/arc/include/asm/cmpxchg.h
index 03cd6894855d..44fd531f4d7b 100644
--- a/arch/arc/include/asm/cmpxchg.h
+++ b/arch/arc/include/asm/cmpxchg.h
@@ -10,6 +10,8 @@
#define __ASM_ARC_CMPXCHG_H
#include <linux/types.h>
+
+#include <asm/barrier.h>
#include <asm/smp.h>
#ifdef CONFIG_ARC_HAS_LLSC
@@ -19,16 +21,25 @@ __cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new)
{
unsigned long prev;
+ /*
+ * Explicit full memory barrier needed before/after as
+ * LLOCK/SCOND thmeselves don't provide any such semantics
+ */
+ smp_mb();
+
__asm__ __volatile__(
"1: llock %0, [%1] \n"
" brne %0, %2, 2f \n"
" scond %3, [%1] \n"
" bnz 1b \n"
"2: \n"
- : "=&r"(prev)
- : "r"(ptr), "ir"(expected),
- "r"(new) /* can't be "ir". scond can't take limm for "b" */
- : "cc");
+ : "=&r"(prev) /* Early clobber, to prevent reg reuse */
+ : "r"(ptr), /* Not "m": llock only supports reg direct addr mode */
+ "ir"(expected),
+ "r"(new) /* can't be "ir". scond can't take LIMM for "b" */
+ : "cc", "memory"); /* so that gcc knows memory is being written here */
+
+ smp_mb();
return prev;
}
@@ -42,6 +53,9 @@ __cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new)
int prev;
volatile unsigned long *p = ptr;
+ /*
+ * spin lock/unlock provide the needed smp_mb() before/after
+ */
atomic_ops_lock(flags);
prev = *p;
if (prev == expected)
@@ -77,12 +91,16 @@ static inline unsigned long __xchg(unsigned long val, volatile void *ptr,
switch (size) {
case 4:
+ smp_mb();
+
__asm__ __volatile__(
" ex %0, [%1] \n"
: "+r"(val)
: "r"(ptr)
: "memory");
+ smp_mb();
+
return val;
}
return __xchg_bad_pointer();
diff --git a/arch/arc/include/asm/delay.h b/arch/arc/include/asm/delay.h
index 43de30256981..08e7e2a16ac1 100644
--- a/arch/arc/include/asm/delay.h
+++ b/arch/arc/include/asm/delay.h
@@ -22,11 +22,10 @@
static inline void __delay(unsigned long loops)
{
__asm__ __volatile__(
- "1: sub.f %0, %0, 1 \n"
- " jpnz 1b \n"
- : "+r"(loops)
- :
- : "cc");
+ " lp 1f \n"
+ " nop \n"
+ "1: \n"
+ : "+l"(loops));
}
extern void __bad_udelay(void);
diff --git a/arch/arc/include/asm/dma-mapping.h b/arch/arc/include/asm/dma-mapping.h
index f787894613ad..2d28ba939d8e 100644
--- a/arch/arc/include/asm/dma-mapping.h
+++ b/arch/arc/include/asm/dma-mapping.h
@@ -14,23 +14,6 @@
#include <asm-generic/dma-coherent.h>
#include <asm/cacheflush.h>
-#ifndef CONFIG_ARC_PLAT_NEEDS_CPU_TO_DMA
-/*
- * dma_map_* API take cpu addresses, which is kernel logical address in the
- * untranslated address space (0x8000_0000) based. The dma address (bus addr)
- * ideally needs to be 0x0000_0000 based hence these glue routines.
- * However given that intermediate bus bridges can ignore the high bit, we can
- * do with these routines being no-ops.
- * If a platform/device comes up which sriclty requires 0 based bus addr
- * (e.g. AHB-PCI bridge on Angel4 board), then it can provide it's own versions
- */
-#define plat_dma_addr_to_kernel(dev, addr) ((unsigned long)(addr))
-#define plat_kernel_addr_to_dma(dev, ptr) ((dma_addr_t)(ptr))
-
-#else
-#include <plat/dma_addr.h>
-#endif
-
void *dma_alloc_noncoherent(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t gfp);
@@ -94,7 +77,7 @@ dma_map_single(struct device *dev, void *cpu_addr, size_t size,
enum dma_data_direction dir)
{
_dma_cache_sync((unsigned long)cpu_addr, size, dir);
- return plat_kernel_addr_to_dma(dev, cpu_addr);
+ return (dma_addr_t)cpu_addr;
}
static inline void
@@ -147,16 +130,14 @@ static inline void
dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
size_t size, enum dma_data_direction dir)
{
- _dma_cache_sync(plat_dma_addr_to_kernel(dev, dma_handle), size,
- DMA_FROM_DEVICE);
+ _dma_cache_sync(dma_handle, size, DMA_FROM_DEVICE);
}
static inline void
dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle,
size_t size, enum dma_data_direction dir)
{
- _dma_cache_sync(plat_dma_addr_to_kernel(dev, dma_handle), size,
- DMA_TO_DEVICE);
+ _dma_cache_sync(dma_handle, size, DMA_TO_DEVICE);
}
static inline void
@@ -164,8 +145,7 @@ dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle,
unsigned long offset, size_t size,
enum dma_data_direction direction)
{
- _dma_cache_sync(plat_dma_addr_to_kernel(dev, dma_handle) + offset,
- size, DMA_FROM_DEVICE);
+ _dma_cache_sync(dma_handle + offset, size, DMA_FROM_DEVICE);
}
static inline void
@@ -173,8 +153,7 @@ dma_sync_single_range_for_device(struct device *dev, dma_addr_t dma_handle,
unsigned long offset, size_t size,
enum dma_data_direction direction)
{
- _dma_cache_sync(plat_dma_addr_to_kernel(dev, dma_handle) + offset,
- size, DMA_TO_DEVICE);
+ _dma_cache_sync(dma_handle + offset, size, DMA_TO_DEVICE);
}
static inline void
diff --git a/arch/arc/include/asm/elf.h b/arch/arc/include/asm/elf.h
index a26282857683..51a99e25fe33 100644
--- a/arch/arc/include/asm/elf.h
+++ b/arch/arc/include/asm/elf.h
@@ -15,6 +15,11 @@
/* These ELF defines belong to uapi but libc elf.h already defines them */
#define EM_ARCOMPACT 93
+#define EM_ARCV2 195 /* ARCv2 Cores */
+
+#define EM_ARC_INUSE (IS_ENABLED(CONFIG_ISA_ARCOMPACT) ? \
+ EM_ARCOMPACT : EM_ARCV2)
+
/* ARC Relocations (kernel Modules only) */
#define R_ARC_32 0x4
#define R_ARC_32_ME 0x1B
diff --git a/arch/arc/include/asm/entry-arcv2.h b/arch/arc/include/asm/entry-arcv2.h
new file mode 100644
index 000000000000..b5ff87e6f4b7
--- /dev/null
+++ b/arch/arc/include/asm/entry-arcv2.h
@@ -0,0 +1,190 @@
+
+#ifndef __ASM_ARC_ENTRY_ARCV2_H
+#define __ASM_ARC_ENTRY_ARCV2_H
+
+#include <asm/asm-offsets.h>
+#include <asm/irqflags-arcv2.h>
+#include <asm/thread_info.h> /* For THREAD_SIZE */
+
+/*------------------------------------------------------------------------*/
+.macro INTERRUPT_PROLOGUE called_from
+
+ ; Before jumping to Interrupt Vector, hardware micro-ops did following:
+ ; 1. SP auto-switched to kernel mode stack
+ ; 2. STATUS32.Z flag set to U mode at time of interrupt (U:1, K:0)
+ ; 3. Auto saved: r0-r11, blink, LPE,LPS,LPC, JLI,LDI,EI, PC, STAT32
+ ;
+ ; Now manually save: r12, sp, fp, gp, r25
+
+ PUSH r12
+
+ ; Saving pt_regs->sp correctly requires some extra work due to the way
+ ; Auto stack switch works
+ ; - U mode: retrieve it from AUX_USER_SP
+ ; - K mode: add the offset from current SP where H/w starts auto push
+ ;
+ ; Utilize the fact that Z bit is set if Intr taken in U mode
+ mov.nz r9, sp
+ add.nz r9, r9, SZ_PT_REGS - PT_sp - 4
+ bnz 1f
+
+ lr r9, [AUX_USER_SP]
+1:
+ PUSH r9 ; SP
+
+ PUSH fp
+ PUSH gp
+
+#ifdef CONFIG_ARC_CURR_IN_REG
+ PUSH r25 ; user_r25
+ GET_CURR_TASK_ON_CPU r25
+#else
+ sub sp, sp, 4
+#endif
+
+.ifnc \called_from, exception
+ sub sp, sp, 12 ; BTA/ECR/orig_r0 placeholder per pt_regs
+.endif
+
+.endm
+
+/*------------------------------------------------------------------------*/
+.macro INTERRUPT_EPILOGUE called_from
+
+.ifnc \called_from, exception
+ add sp, sp, 12 ; skip BTA/ECR/orig_r0 placeholderss
+.endif
+
+#ifdef CONFIG_ARC_CURR_IN_REG
+ POP r25
+#else
+ add sp, sp, 4
+#endif
+
+ POP gp
+ POP fp
+
+ ; Don't touch AUX_USER_SP if returning to K mode (Z bit set)
+ ; (Z bit set on K mode is inverse of INTERRUPT_PROLOGUE)
+ add.z sp, sp, 4
+ bz 1f
+
+ POPAX AUX_USER_SP
+1:
+ POP r12
+
+.endm
+
+/*------------------------------------------------------------------------*/
+.macro EXCEPTION_PROLOGUE
+
+ ; Before jumping to Exception Vector, hardware micro-ops did following:
+ ; 1. SP auto-switched to kernel mode stack
+ ; 2. STATUS32.Z flag set to U mode at time of interrupt (U:1,K:0)
+ ;
+ ; Now manually save the complete reg file
+
+ PUSH r9 ; freeup a register: slot of erstatus
+
+ PUSHAX eret
+ sub sp, sp, 12 ; skip JLI, LDI, EI
+ PUSH lp_count
+ PUSHAX lp_start
+ PUSHAX lp_end
+ PUSH blink
+
+ PUSH r11
+ PUSH r10
+
+ ld.as r9, [sp, 10] ; load stashed r9 (status32 stack slot)
+ lr r10, [erstatus]
+ st.as r10, [sp, 10] ; save status32 at it's right stack slot
+
+ PUSH r9
+ PUSH r8
+ PUSH r7
+ PUSH r6
+ PUSH r5
+ PUSH r4
+ PUSH r3
+ PUSH r2
+ PUSH r1
+ PUSH r0
+
+ ; -- for interrupts, regs above are auto-saved by h/w in that order --
+ ; Now do what ISR prologue does (manually save r12, sp, fp, gp, r25)
+ ;
+ ; Set Z flag if this was from U mode (expected by INTERRUPT_PROLOGUE)
+ ; Although H/w exception micro-ops do set Z flag for U mode (just like
+ ; for interrupts), it could get clobbered in case we soft land here from
+ ; a TLB Miss exception handler (tlbex.S)
+
+ and r10, r10, STATUS_U_MASK
+ xor.f 0, r10, STATUS_U_MASK
+
+ INTERRUPT_PROLOGUE exception
+
+ PUSHAX erbta
+ PUSHAX ecr ; r9 contains ECR, expected by EV_Trap
+
+ PUSH r0 ; orig_r0
+.endm
+
+/*------------------------------------------------------------------------*/
+.macro EXCEPTION_EPILOGUE
+
+ ; Assumes r0 has PT_status32
+ btst r0, STATUS_U_BIT ; Z flag set if K, used in INTERRUPT_EPILOGUE
+
+ add sp, sp, 8 ; orig_r0/ECR don't need restoring
+ POPAX erbta
+
+ INTERRUPT_EPILOGUE exception
+
+ POP r0
+ POP r1
+ POP r2
+ POP r3
+ POP r4
+ POP r5
+ POP r6
+ POP r7
+ POP r8
+ POP r9
+ POP r10
+ POP r11
+
+ POP blink
+ POPAX lp_end
+ POPAX lp_start
+
+ POP r9
+ mov lp_count, r9
+
+ add sp, sp, 12 ; skip JLI, LDI, EI
+ POPAX eret
+ POPAX erstatus
+
+ ld.as r9, [sp, -12] ; reload r9 which got clobbered
+.endm
+
+.macro FAKE_RET_FROM_EXCPN
+ lr r9, [status32]
+ bic r9, r9, (STATUS_U_MASK|STATUS_DE_MASK|STATUS_AE_MASK)
+ or r9, r9, (STATUS_L_MASK|STATUS_IE_MASK)
+ kflag r9
+.endm
+
+/* Get thread_info of "current" tsk */
+.macro GET_CURR_THR_INFO_FROM_SP reg
+ bmskn \reg, sp, THREAD_SHIFT - 1
+.endm
+
+/* Get CPU-ID of this core */
+.macro GET_CPU_ID reg
+ lr \reg, [identity]
+ xbfu \reg, \reg, 0xE8 /* 00111 01000 */
+ /* M = 8-1 N = 8 */
+.endm
+
+#endif
diff --git a/arch/arc/include/asm/entry-compact.h b/arch/arc/include/asm/entry-compact.h
new file mode 100644
index 000000000000..415443c2a8c4
--- /dev/null
+++ b/arch/arc/include/asm/entry-compact.h
@@ -0,0 +1,307 @@
+/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Vineetg: March 2009 (Supporting 2 levels of Interrupts)
+ * Stack switching code can no longer reliably rely on the fact that
+ * if we are NOT in user mode, stack is switched to kernel mode.
+ * e.g. L2 IRQ interrupted a L1 ISR which had not yet completed
+ * it's prologue including stack switching from user mode
+ *
+ * Vineetg: Aug 28th 2008: Bug #94984
+ * -Zero Overhead Loop Context shd be cleared when entering IRQ/EXcp/Trap
+ * Normally CPU does this automatically, however when doing FAKE rtie,
+ * we also need to explicitly do this. The problem in macros
+ * FAKE_RET_FROM_EXCPN and FAKE_RET_FROM_EXCPN_LOCK_IRQ was that this bit
+ * was being "CLEARED" rather then "SET". Actually "SET" clears ZOL context
+ *
+ * Vineetg: May 5th 2008
+ * -Modified CALLEE_REG save/restore macros to handle the fact that
+ * r25 contains the kernel current task ptr
+ * - Defined Stack Switching Macro to be reused in all intr/excp hdlrs
+ * - Shaved off 11 instructions from RESTORE_ALL_INT1 by using the
+ * address Write back load ld.ab instead of seperate ld/add instn
+ *
+ * Amit Bhor, Sameer Dhavale: Codito Technologies 2004
+ */
+
+#ifndef __ASM_ARC_ENTRY_COMPACT_H
+#define __ASM_ARC_ENTRY_COMPACT_H
+
+#include <asm/asm-offsets.h>
+#include <asm/irqflags-compact.h>
+#include <asm/thread_info.h> /* For THREAD_SIZE */
+
+/*--------------------------------------------------------------
+ * Switch to Kernel Mode stack if SP points to User Mode stack
+ *
+ * Entry : r9 contains pre-IRQ/exception/trap status32
+ * Exit : SP set to K mode stack
+ * SP at the time of entry (K/U) saved @ pt_regs->sp
+ * Clobbers: r9
+ *-------------------------------------------------------------*/
+
+.macro SWITCH_TO_KERNEL_STK
+
+ /* User Mode when this happened ? Yes: Proceed to switch stack */
+ bbit1 r9, STATUS_U_BIT, 88f
+
+ /* OK we were already in kernel mode when this event happened, thus can
+ * assume SP is kernel mode SP. _NO_ need to do any stack switching
+ */
+
+#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS
+ /* However....
+ * If Level 2 Interrupts enabled, we may end up with a corner case:
+ * 1. User Task executing
+ * 2. L1 IRQ taken, ISR starts (CPU auto-switched to KERNEL mode)
+ * 3. But before it could switch SP from USER to KERNEL stack
+ * a L2 IRQ "Interrupts" L1
+ * Thay way although L2 IRQ happened in Kernel mode, stack is still
+ * not switched.
+ * To handle this, we may need to switch stack even if in kernel mode
+ * provided SP has values in range of USER mode stack ( < 0x7000_0000 )
+ */
+ brlo sp, VMALLOC_START, 88f
+
+ /* TODO: vineetg:
+ * We need to be a bit more cautious here. What if a kernel bug in
+ * L1 ISR, caused SP to go whaco (some small value which looks like
+ * USER stk) and then we take L2 ISR.
+ * Above brlo alone would treat it as a valid L1-L2 sceanrio
+ * instead of shouting alound
+ * The only feasible way is to make sure this L2 happened in
+ * L1 prelogue ONLY i.e. ilink2 is less than a pre-set marker in
+ * L1 ISR before it switches stack
+ */
+
+#endif
+
+ /*------Intr/Ecxp happened in kernel mode, SP already setup ------ */
+ /* save it nevertheless @ pt_regs->sp for uniformity */
+
+ b.d 66f
+ st sp, [sp, PT_sp - SZ_PT_REGS]
+
+88: /*------Intr/Ecxp happened in user mode, "switch" stack ------ */
+
+ GET_CURR_TASK_ON_CPU r9
+
+ /* With current tsk in r9, get it's kernel mode stack base */
+ GET_TSK_STACK_BASE r9, r9
+
+ /* save U mode SP @ pt_regs->sp */
+ st sp, [r9, PT_sp - SZ_PT_REGS]
+
+ /* final SP switch */
+ mov sp, r9
+66:
+.endm
+
+/*------------------------------------------------------------
+ * "FAKE" a rtie to return from CPU Exception context
+ * This is to re-enable Exceptions within exception
+ * Look at EV_ProtV to see how this is actually used
+ *-------------------------------------------------------------*/
+
+.macro FAKE_RET_FROM_EXCPN
+
+ ld r9, [sp, PT_status32]
+ bic r9, r9, (STATUS_U_MASK|STATUS_DE_MASK)
+ bset r9, r9, STATUS_L_BIT
+ sr r9, [erstatus]
+ mov r9, 55f
+ sr r9, [eret]
+
+ rtie
+55:
+.endm
+
+/*--------------------------------------------------------------
+ * For early Exception/ISR Prologue, a core reg is temporarily needed to
+ * code the rest of prolog (stack switching). This is done by stashing
+ * it to memory (non-SMP case) or SCRATCH0 Aux Reg (SMP).
+ *
+ * Before saving the full regfile - this reg is restored back, only
+ * to be saved again on kernel mode stack, as part of pt_regs.
+ *-------------------------------------------------------------*/
+.macro PROLOG_FREEUP_REG reg, mem
+#ifdef CONFIG_SMP
+ sr \reg, [ARC_REG_SCRATCH_DATA0]
+#else
+ st \reg, [\mem]
+#endif
+.endm
+
+.macro PROLOG_RESTORE_REG reg, mem
+#ifdef CONFIG_SMP
+ lr \reg, [ARC_REG_SCRATCH_DATA0]
+#else
+ ld \reg, [\mem]
+#endif
+.endm
+
+/*--------------------------------------------------------------
+ * Exception Entry prologue
+ * -Switches stack to K mode (if not already)
+ * -Saves the register file
+ *
+ * After this it is safe to call the "C" handlers
+ *-------------------------------------------------------------*/
+.macro EXCEPTION_PROLOGUE
+
+ /* Need at least 1 reg to code the early exception prologue */
+ PROLOG_FREEUP_REG r9, @ex_saved_reg1
+
+ /* U/K mode at time of exception (stack not switched if already K) */
+ lr r9, [erstatus]
+
+ /* ARC700 doesn't provide auto-stack switching */
+ SWITCH_TO_KERNEL_STK
+
+#ifdef CONFIG_ARC_CURR_IN_REG
+ /* Treat r25 as scratch reg (save on stack) and load with "current" */
+ PUSH r25
+ GET_CURR_TASK_ON_CPU r25
+#else
+ sub sp, sp, 4
+#endif
+
+ st.a r0, [sp, -8] /* orig_r0 needed for syscall (skip ECR slot) */
+ sub sp, sp, 4 /* skip pt_regs->sp, already saved above */
+
+ /* Restore r9 used to code the early prologue */
+ PROLOG_RESTORE_REG r9, @ex_saved_reg1
+
+ /* now we are ready to save the regfile */
+ SAVE_R0_TO_R12
+ PUSH gp
+ PUSH fp
+ PUSH blink
+ PUSHAX eret
+ PUSHAX erstatus
+ PUSH lp_count
+ PUSHAX lp_end
+ PUSHAX lp_start
+ PUSHAX erbta
+
+ lr r9, [ecr]
+ st r9, [sp, PT_event] /* EV_Trap expects r9 to have ECR */
+.endm
+
+/*--------------------------------------------------------------
+ * Restore all registers used by system call or Exceptions
+ * SP should always be pointing to the next free stack element
+ * when entering this macro.
+ *
+ * NOTE:
+ *
+ * It is recommended that lp_count/ilink1/ilink2 not be used as a dest reg
+ * for memory load operations. If used in that way interrupts are deffered
+ * by hardware and that is not good.
+ *-------------------------------------------------------------*/
+.macro EXCEPTION_EPILOGUE
+ POPAX erbta
+ POPAX lp_start
+ POPAX lp_end
+
+ POP r9
+ mov lp_count, r9 ;LD to lp_count is not allowed
+
+ POPAX erstatus
+ POPAX eret
+ POP blink
+ POP fp
+ POP gp
+ RESTORE_R12_TO_R0
+
+ ld sp, [sp] /* restore original sp */
+ /* orig_r0, ECR, user_r25 skipped automatically */
+.endm
+
+/* Dummy ECR values for Interrupts */
+#define event_IRQ1 0x0031abcd
+#define event_IRQ2 0x0032abcd
+
+.macro INTERRUPT_PROLOGUE LVL
+
+ /* free up r9 as scratchpad */
+ PROLOG_FREEUP_REG r9, @int\LVL\()_saved_reg
+
+ /* Which mode (user/kernel) was the system in when intr occured */
+ lr r9, [status32_l\LVL\()]
+
+ SWITCH_TO_KERNEL_STK
+
+#ifdef CONFIG_ARC_CURR_IN_REG
+ /* Treat r25 as scratch reg (save on stack) and load with "current" */
+ PUSH r25
+ GET_CURR_TASK_ON_CPU r25
+#else
+ sub sp, sp, 4
+#endif
+
+ PUSH 0x003\LVL\()abcd /* Dummy ECR */
+ sub sp, sp, 8 /* skip orig_r0 (not needed)
+ skip pt_regs->sp, already saved above */
+
+ /* Restore r9 used to code the early prologue */
+ PROLOG_RESTORE_REG r9, @int\LVL\()_saved_reg
+
+ SAVE_R0_TO_R12
+ PUSH gp
+ PUSH fp
+ PUSH blink
+ PUSH ilink\LVL\()
+ PUSHAX status32_l\LVL\()
+ PUSH lp_count
+ PUSHAX lp_end
+ PUSHAX lp_start
+ PUSHAX bta_l\LVL\()
+.endm
+
+/*--------------------------------------------------------------
+ * Restore all registers used by interrupt handlers.
+ *
+ * NOTE:
+ *
+ * It is recommended that lp_count/ilink1/ilink2 not be used as a dest reg
+ * for memory load operations. If used in that way interrupts are deffered
+ * by hardware and that is not good.
+ *-------------------------------------------------------------*/
+.macro INTERRUPT_EPILOGUE LVL
+ POPAX bta_l\LVL\()
+ POPAX lp_start
+ POPAX lp_end
+
+ POP r9
+ mov lp_count, r9 ;LD to lp_count is not allowed
+
+ POPAX status32_l\LVL\()
+ POP ilink\LVL\()
+ POP blink
+ POP fp
+ POP gp
+ RESTORE_R12_TO_R0
+
+ ld sp, [sp] /* restore original sp */
+ /* orig_r0, ECR, user_r25 skipped automatically */
+.endm
+
+/* Get thread_info of "current" tsk */
+.macro GET_CURR_THR_INFO_FROM_SP reg
+ bic \reg, sp, (THREAD_SIZE - 1)
+.endm
+
+/* Get CPU-ID of this core */
+.macro GET_CPU_ID reg
+ lr \reg, [identity]
+ lsr \reg, \reg, 8
+ bmsk \reg, \reg, 7
+.endm
+
+#endif /* __ASM_ARC_ENTRY_COMPACT_H */
diff --git a/arch/arc/include/asm/entry.h b/arch/arc/include/asm/entry.h
index 884081099f80..ad7860c5ce15 100644
--- a/arch/arc/include/asm/entry.h
+++ b/arch/arc/include/asm/entry.h
@@ -1,45 +1,27 @@
/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
* Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
- *
- * Vineetg: March 2009 (Supporting 2 levels of Interrupts)
- * Stack switching code can no longer reliably rely on the fact that
- * if we are NOT in user mode, stack is switched to kernel mode.
- * e.g. L2 IRQ interrupted a L1 ISR which had not yet completed
- * it's prologue including stack switching from user mode
- *
- * Vineetg: Aug 28th 2008: Bug #94984
- * -Zero Overhead Loop Context shd be cleared when entering IRQ/EXcp/Trap
- * Normally CPU does this automatically, however when doing FAKE rtie,
- * we also need to explicitly do this. The problem in macros
- * FAKE_RET_FROM_EXCPN and FAKE_RET_FROM_EXCPN_LOCK_IRQ was that this bit
- * was being "CLEARED" rather then "SET". Actually "SET" clears ZOL context
- *
- * Vineetg: May 5th 2008
- * -Modified CALLEE_REG save/restore macros to handle the fact that
- * r25 contains the kernel current task ptr
- * - Defined Stack Switching Macro to be reused in all intr/excp hdlrs
- * - Shaved off 11 instructions from RESTORE_ALL_INT1 by using the
- * address Write back load ld.ab instead of seperate ld/add instn
- *
- * Amit Bhor, Sameer Dhavale: Codito Technologies 2004
*/
#ifndef __ASM_ARC_ENTRY_H
#define __ASM_ARC_ENTRY_H
-#ifdef __ASSEMBLY__
#include <asm/unistd.h> /* For NR_syscalls defination */
-#include <asm/asm-offsets.h>
#include <asm/arcregs.h>
#include <asm/ptrace.h>
#include <asm/processor.h> /* For VMALLOC_START */
-#include <asm/thread_info.h> /* For THREAD_SIZE */
#include <asm/mmu.h>
+#ifdef CONFIG_ISA_ARCOMPACT
+#include <asm/entry-compact.h> /* ISA specific bits */
+#else
+#include <asm/entry-arcv2.h>
+#endif
+
/* Note on the LD/ST addr modes with addr reg wback
*
* LD.a same as LD.aw
@@ -143,8 +125,6 @@
POP r13
.endm
-#define OFF_USER_R25_FROM_R24 (SZ_CALLEE_REGS + SZ_PT_REGS - 8)/4
-
/*--------------------------------------------------------------
* Collect User Mode callee regs as struct callee_regs - needed by
* fork/do_signal/unaligned-access-emulation.
@@ -157,12 +137,13 @@
*-------------------------------------------------------------*/
.macro SAVE_CALLEE_SAVED_USER
+ mov r12, sp ; save SP as ref to pt_regs
SAVE_R13_TO_R24
#ifdef CONFIG_ARC_CURR_IN_REG
- ; Retrieve orig r25 and save it on stack
- ld.as r12, [sp, OFF_USER_R25_FROM_R24]
- st.a r12, [sp, -4]
+ ; Retrieve orig r25 and save it with rest of callee_regs
+ ld.as r12, [r12, PT_user_r25]
+ PUSH r12
#else
PUSH r25
#endif
@@ -209,12 +190,16 @@
.macro RESTORE_CALLEE_SAVED_USER
#ifdef CONFIG_ARC_CURR_IN_REG
- ld.ab r12, [sp, 4]
- st.as r12, [sp, OFF_USER_R25_FROM_R24]
+ POP r12
#else
POP r25
#endif
RESTORE_R24_TO_R13
+
+ ; SP is back to start of pt_regs
+#ifdef CONFIG_ARC_CURR_IN_REG
+ st.as r12, [sp, PT_user_r25]
+#endif
.endm
/*--------------------------------------------------------------
@@ -240,117 +225,6 @@
.endm
-/*--------------------------------------------------------------
- * Switch to Kernel Mode stack if SP points to User Mode stack
- *
- * Entry : r9 contains pre-IRQ/exception/trap status32
- * Exit : SP is set to kernel mode stack pointer
- * If CURR_IN_REG, r25 set to "current" task pointer
- * Clobbers: r9
- *-------------------------------------------------------------*/
-
-.macro SWITCH_TO_KERNEL_STK
-
- /* User Mode when this happened ? Yes: Proceed to switch stack */
- bbit1 r9, STATUS_U_BIT, 88f
-
- /* OK we were already in kernel mode when this event happened, thus can
- * assume SP is kernel mode SP. _NO_ need to do any stack switching
- */
-
-#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS
- /* However....
- * If Level 2 Interrupts enabled, we may end up with a corner case:
- * 1. User Task executing
- * 2. L1 IRQ taken, ISR starts (CPU auto-switched to KERNEL mode)
- * 3. But before it could switch SP from USER to KERNEL stack
- * a L2 IRQ "Interrupts" L1
- * Thay way although L2 IRQ happened in Kernel mode, stack is still
- * not switched.
- * To handle this, we may need to switch stack even if in kernel mode
- * provided SP has values in range of USER mode stack ( < 0x7000_0000 )
- */
- brlo sp, VMALLOC_START, 88f
-
- /* TODO: vineetg:
- * We need to be a bit more cautious here. What if a kernel bug in
- * L1 ISR, caused SP to go whaco (some small value which looks like
- * USER stk) and then we take L2 ISR.
- * Above brlo alone would treat it as a valid L1-L2 sceanrio
- * instead of shouting alound
- * The only feasible way is to make sure this L2 happened in
- * L1 prelogue ONLY i.e. ilink2 is less than a pre-set marker in
- * L1 ISR before it switches stack
- */
-
-#endif
-
- /* Save Pre Intr/Exception KERNEL MODE SP on kernel stack
- * safe-keeping not really needed, but it keeps the epilogue code
- * (SP restore) simpler/uniform.
- */
- b.d 66f
- mov r9, sp
-
-88: /*------Intr/Ecxp happened in user mode, "switch" stack ------ */
-
- GET_CURR_TASK_ON_CPU r9
-
- /* With current tsk in r9, get it's kernel mode stack base */
- GET_TSK_STACK_BASE r9, r9
-
-66:
-#ifdef CONFIG_ARC_CURR_IN_REG
- /*
- * Treat r25 as scratch reg, save it on stack first
- * Load it with current task pointer
- */
- st r25, [r9, -4]
- GET_CURR_TASK_ON_CPU r25
-#endif
-
- /* Save Pre Intr/Exception User SP on kernel stack */
- st.a sp, [r9, -16] ; Make room for orig_r0, ECR, user_r25
-
- /* CAUTION:
- * SP should be set at the very end when we are done with everything
- * In case of 2 levels of interrupt we depend on value of SP to assume
- * that everything else is done (loading r25 etc)
- */
-
- /* set SP to point to kernel mode stack */
- mov sp, r9
-
- /* ----- Stack Switched to kernel Mode, Now save REG FILE ----- */
-
-.endm
-
-/*------------------------------------------------------------
- * "FAKE" a rtie to return from CPU Exception context
- * This is to re-enable Exceptions within exception
- * Look at EV_ProtV to see how this is actually used
- *-------------------------------------------------------------*/
-
-.macro FAKE_RET_FROM_EXCPN reg
-
- ld \reg, [sp, PT_status32]
- bic \reg, \reg, (STATUS_U_MASK|STATUS_DE_MASK)
- bset \reg, \reg, STATUS_L_BIT
- sr \reg, [erstatus]
- mov \reg, 55f
- sr \reg, [eret]
-
- rtie
-55:
-.endm
-
-/*
- * @reg [OUT] &thread_info of "current"
- */
-.macro GET_CURR_THR_INFO_FROM_SP reg
- bic \reg, sp, (THREAD_SIZE - 1)
-.endm
-
/*
* @reg [OUT] thread_info->flags of "current"
*/
@@ -359,222 +233,6 @@
ld \reg, [\reg, THREAD_INFO_FLAGS]
.endm
-/*--------------------------------------------------------------
- * For early Exception Prologue, a core reg is temporarily needed to
- * code the rest of prolog (stack switching). This is done by stashing
- * it to memory (non-SMP case) or SCRATCH0 Aux Reg (SMP).
- *
- * Before saving the full regfile - this reg is restored back, only
- * to be saved again on kernel mode stack, as part of pt_regs.
- *-------------------------------------------------------------*/
-.macro EXCPN_PROLOG_FREEUP_REG reg
-#ifdef CONFIG_SMP
- sr \reg, [ARC_REG_SCRATCH_DATA0]
-#else
- st \reg, [@ex_saved_reg1]
-#endif
-.endm
-
-.macro EXCPN_PROLOG_RESTORE_REG reg
-#ifdef CONFIG_SMP
- lr \reg, [ARC_REG_SCRATCH_DATA0]
-#else
- ld \reg, [@ex_saved_reg1]
-#endif
-.endm
-
-/*--------------------------------------------------------------
- * Exception Entry prologue
- * -Switches stack to K mode (if not already)
- * -Saves the register file
- *
- * After this it is safe to call the "C" handlers
- *-------------------------------------------------------------*/
-.macro EXCEPTION_PROLOGUE
-
- /* Need at least 1 reg to code the early exception prologue */
- EXCPN_PROLOG_FREEUP_REG r9
-
- /* U/K mode at time of exception (stack not switched if already K) */
- lr r9, [erstatus]
-
- /* ARC700 doesn't provide auto-stack switching */
- SWITCH_TO_KERNEL_STK
-
- /* save the regfile */
- SAVE_ALL_SYS
-.endm
-
-/*--------------------------------------------------------------
- * Save all registers used by Exceptions (TLB Miss, Prot-V, Mem err etc)
- * Requires SP to be already switched to kernel mode Stack
- * sp points to the next free element on the stack at exit of this macro.
- * Registers are pushed / popped in the order defined in struct ptregs
- * in asm/ptrace.h
- * Note that syscalls are implemented via TRAP which is also a exception
- * from CPU's point of view
- *-------------------------------------------------------------*/
-.macro SAVE_ALL_SYS
-
- lr r9, [ecr]
- st r9, [sp, 8] /* ECR */
- st r0, [sp, 4] /* orig_r0, needed only for sys calls */
-
- /* Restore r9 used to code the early prologue */
- EXCPN_PROLOG_RESTORE_REG r9
-
- SAVE_R0_TO_R12
- PUSH gp
- PUSH fp
- PUSH blink
- PUSHAX eret
- PUSHAX erstatus
- PUSH lp_count
- PUSHAX lp_end
- PUSHAX lp_start
- PUSHAX erbta
-.endm
-
-/*--------------------------------------------------------------
- * Restore all registers used by system call or Exceptions
- * SP should always be pointing to the next free stack element
- * when entering this macro.
- *
- * NOTE:
- *
- * It is recommended that lp_count/ilink1/ilink2 not be used as a dest reg
- * for memory load operations. If used in that way interrupts are deffered
- * by hardware and that is not good.
- *-------------------------------------------------------------*/
-.macro RESTORE_ALL_SYS
- POPAX erbta
- POPAX lp_start
- POPAX lp_end
-
- POP r9
- mov lp_count, r9 ;LD to lp_count is not allowed
-
- POPAX erstatus
- POPAX eret
- POP blink
- POP fp
- POP gp
- RESTORE_R12_TO_R0
-
- ld sp, [sp] /* restore original sp */
- /* orig_r0, ECR, user_r25 skipped automatically */
-.endm
-
-
-/*--------------------------------------------------------------
- * Save all registers used by interrupt handlers.
- *-------------------------------------------------------------*/
-.macro SAVE_ALL_INT1
-
- /* restore original r9 to be saved as part of reg-file */
-#ifdef CONFIG_SMP
- lr r9, [ARC_REG_SCRATCH_DATA0]
-#else
- ld r9, [@int1_saved_reg]
-#endif
-
- /* now we are ready to save the remaining context :) */
- st event_IRQ1, [sp, 8] /* Dummy ECR */
- st 0, [sp, 4] /* orig_r0 , N/A for IRQ */
-
- SAVE_R0_TO_R12
- PUSH gp
- PUSH fp
- PUSH blink
- PUSH ilink1
- PUSHAX status32_l1
- PUSH lp_count
- PUSHAX lp_end
- PUSHAX lp_start
- PUSHAX bta_l1
-.endm
-
-.macro SAVE_ALL_INT2
-
- /* TODO-vineetg: SMP we can't use global nor can we use
- * SCRATCH0 as we do for int1 because while int1 is using
- * it, int2 can come
- */
- /* retsore original r9 , saved in sys_saved_r9 */
- ld r9, [@int2_saved_reg]
-
- /* now we are ready to save the remaining context :) */
- st event_IRQ2, [sp, 8] /* Dummy ECR */
- st 0, [sp, 4] /* orig_r0 , N/A for IRQ */
-
- SAVE_R0_TO_R12
- PUSH gp
- PUSH fp
- PUSH blink
- PUSH ilink2
- PUSHAX status32_l2
- PUSH lp_count
- PUSHAX lp_end
- PUSHAX lp_start
- PUSHAX bta_l2
-.endm
-
-/*--------------------------------------------------------------
- * Restore all registers used by interrupt handlers.
- *
- * NOTE:
- *
- * It is recommended that lp_count/ilink1/ilink2 not be used as a dest reg
- * for memory load operations. If used in that way interrupts are deffered
- * by hardware and that is not good.
- *-------------------------------------------------------------*/
-
-.macro RESTORE_ALL_INT1
- POPAX bta_l1
- POPAX lp_start
- POPAX lp_end
-
- POP r9
- mov lp_count, r9 ;LD to lp_count is not allowed
-
- POPAX status32_l1
- POP ilink1
- POP blink
- POP fp
- POP gp
- RESTORE_R12_TO_R0
-
- ld sp, [sp] /* restore original sp */
- /* orig_r0, ECR, user_r25 skipped automatically */
-.endm
-
-.macro RESTORE_ALL_INT2
- POPAX bta_l2
- POPAX lp_start
- POPAX lp_end
-
- POP r9
- mov lp_count, r9 ;LD to lp_count is not allowed
-
- POPAX status32_l2
- POP ilink2
- POP blink
- POP fp
- POP gp
- RESTORE_R12_TO_R0
-
- ld sp, [sp] /* restore original sp */
- /* orig_r0, ECR, user_r25 skipped automatically */
-.endm
-
-
-/* Get CPU-ID of this core */
-.macro GET_CPU_ID reg
- lr \reg, [identity]
- lsr \reg, \reg, 8
- bmsk \reg, \reg, 7
-.endm
-
#ifdef CONFIG_SMP
/*-------------------------------------------------
@@ -643,6 +301,4 @@
#endif /* CONFIG_ARC_CURR_IN_REG */
-#endif /* __ASSEMBLY__ */
-
#endif /* __ASM_ARC_ENTRY_H */
diff --git a/arch/arc/include/asm/io.h b/arch/arc/include/asm/io.h
index 7cc4ced5dbf4..694ece8a0243 100644
--- a/arch/arc/include/asm/io.h
+++ b/arch/arc/include/asm/io.h
@@ -99,9 +99,45 @@ static inline void __raw_writel(u32 w, volatile void __iomem *addr)
}
-#define readb_relaxed readb
-#define readw_relaxed readw
-#define readl_relaxed readl
+#ifdef CONFIG_ISA_ARCV2
+#include <asm/barrier.h>
+#define __iormb() rmb()
+#define __iowmb() wmb()
+#else
+#define __iormb() do { } while (0)
+#define __iowmb() do { } while (0)
+#endif
+
+/*
+ * MMIO can also get buffered/optimized in micro-arch, so barriers needed
+ * Based on ARM model for the typical use case
+ *
+ * <ST [DMA buffer]>
+ * <writel MMIO "go" reg>
+ * or:
+ * <readl MMIO "status" reg>
+ * <LD [DMA buffer]>
+ *
+ * http://lkml.kernel.org/r/20150622133656.GG1583@arm.com
+ */
+#define readb(c) ({ u8 __v = readb_relaxed(c); __iormb(); __v; })
+#define readw(c) ({ u16 __v = readw_relaxed(c); __iormb(); __v; })
+#define readl(c) ({ u32 __v = readl_relaxed(c); __iormb(); __v; })
+
+#define writeb(v,c) ({ __iowmb(); writeb_relaxed(v,c); })
+#define writew(v,c) ({ __iowmb(); writew_relaxed(v,c); })
+#define writel(v,c) ({ __iowmb(); writel_relaxed(v,c); })
+
+/*
+ * Relaxed API for drivers which can handle any ordering themselves
+ */
+#define readb_relaxed(c) __raw_readb(c)
+#define readw_relaxed(c) __raw_readw(c)
+#define readl_relaxed(c) __raw_readl(c)
+
+#define writeb_relaxed(v,c) __raw_writeb(v,c)
+#define writew_relaxed(v,c) __raw_writew(v,c)
+#define writel_relaxed(v,c) __raw_writel(v,c)
#include <asm-generic/io.h>
diff --git a/arch/arc/include/asm/irq.h b/arch/arc/include/asm/irq.h
index f38652fb2ed7..bc5103637326 100644
--- a/arch/arc/include/asm/irq.h
+++ b/arch/arc/include/asm/irq.h
@@ -13,8 +13,14 @@
#define NR_IRQS 128 /* allow some CPU external IRQ handling */
/* Platform Independent IRQs */
+#ifdef CONFIG_ISA_ARCOMPACT
#define TIMER0_IRQ 3
#define TIMER1_IRQ 4
+#else
+#define TIMER0_IRQ 16
+#define TIMER1_IRQ 17
+#define IPI_IRQ 19
+#endif
#include <linux/interrupt.h>
#include <asm-generic/irq.h>
diff --git a/arch/arc/include/asm/irqflags-arcv2.h b/arch/arc/include/asm/irqflags-arcv2.h
new file mode 100644
index 000000000000..ad481c24070d
--- /dev/null
+++ b/arch/arc/include/asm/irqflags-arcv2.h
@@ -0,0 +1,124 @@
+/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ASM_IRQFLAGS_ARCV2_H
+#define __ASM_IRQFLAGS_ARCV2_H
+
+#include <asm/arcregs.h>
+
+/* status32 Bits */
+#define STATUS_AD_BIT 19 /* Disable Align chk: core supports non-aligned */
+#define STATUS_IE_BIT 31
+
+#define STATUS_AD_MASK (1<<STATUS_AD_BIT)
+#define STATUS_IE_MASK (1<<STATUS_IE_BIT)
+
+#define AUX_USER_SP 0x00D
+#define AUX_IRQ_CTRL 0x00E
+#define AUX_IRQ_ACT 0x043 /* Active Intr across all levels */
+#define AUX_IRQ_LVL_PEND 0x200 /* Pending Intr across all levels */
+#define AUX_IRQ_PRIORITY 0x206
+#define ICAUSE 0x40a
+#define AUX_IRQ_SELECT 0x40b
+#define AUX_IRQ_ENABLE 0x40c
+
+/* Was Intr taken in User Mode */
+#define AUX_IRQ_ACT_BIT_U 31
+
+/* 0 is highest level, but taken by FIRQs, if present in design */
+#define ARCV2_IRQ_DEF_PRIO 0
+
+/* seed value for status register */
+#define ISA_INIT_STATUS_BITS (STATUS_IE_MASK | STATUS_AD_MASK | \
+ (ARCV2_IRQ_DEF_PRIO << 1))
+
+#ifndef __ASSEMBLY__
+
+/*
+ * Save IRQ state and disable IRQs
+ */
+static inline long arch_local_irq_save(void)
+{
+ unsigned long flags;
+
+ __asm__ __volatile__(" clri %0 \n" : "=r" (flags) : : "memory");
+
+ return flags;
+}
+
+/*
+ * restore saved IRQ state
+ */
+static inline void arch_local_irq_restore(unsigned long flags)
+{
+ __asm__ __volatile__(" seti %0 \n" : : "r" (flags) : "memory");
+}
+
+/*
+ * Unconditionally Enable IRQs
+ */
+static inline void arch_local_irq_enable(void)
+{
+ unsigned int irqact = read_aux_reg(AUX_IRQ_ACT);
+
+ if (irqact & 0xffff)
+ write_aux_reg(AUX_IRQ_ACT, irqact & ~0xffff);
+
+ __asm__ __volatile__(" seti \n" : : : "memory");
+}
+
+/*
+ * Unconditionally Disable IRQs
+ */
+static inline void arch_local_irq_disable(void)
+{
+ __asm__ __volatile__(" clri \n" : : : "memory");
+}
+
+/*
+ * save IRQ state
+ */
+static inline long arch_local_save_flags(void)
+{
+ unsigned long temp;
+
+ __asm__ __volatile__(
+ " lr %0, [status32] \n"
+ : "=&r"(temp)
+ :
+ : "memory");
+
+ return temp;
+}
+
+/*
+ * Query IRQ state
+ */
+static inline int arch_irqs_disabled_flags(unsigned long flags)
+{
+ return !(flags & (STATUS_IE_MASK));
+}
+
+static inline int arch_irqs_disabled(void)
+{
+ return arch_irqs_disabled_flags(arch_local_save_flags());
+}
+
+#else
+
+.macro IRQ_DISABLE scratch
+ clri
+.endm
+
+.macro IRQ_ENABLE scratch
+ seti
+.endm
+
+#endif /* __ASSEMBLY__ */
+
+#endif
diff --git a/arch/arc/include/asm/irqflags-compact.h b/arch/arc/include/asm/irqflags-compact.h
new file mode 100644
index 000000000000..aa805575c320
--- /dev/null
+++ b/arch/arc/include/asm/irqflags-compact.h
@@ -0,0 +1,183 @@
+/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ASM_IRQFLAGS_ARCOMPACT_H
+#define __ASM_IRQFLAGS_ARCOMPACT_H
+
+/* vineetg: March 2010 : local_irq_save( ) optimisation
+ * -Remove explicit mov of current status32 into reg, that is not needed
+ * -Use BIC insn instead of INVERTED + AND
+ * -Conditionally disable interrupts (if they are not enabled, don't disable)
+*/
+
+#include <asm/arcregs.h>
+
+/* status32 Reg bits related to Interrupt Handling */
+#define STATUS_E1_BIT 1 /* Int 1 enable */
+#define STATUS_E2_BIT 2 /* Int 2 enable */
+#define STATUS_A1_BIT 3 /* Int 1 active */
+#define STATUS_A2_BIT 4 /* Int 2 active */
+
+#define STATUS_E1_MASK (1<<STATUS_E1_BIT)
+#define STATUS_E2_MASK (1<<STATUS_E2_BIT)
+#define STATUS_A1_MASK (1<<STATUS_A1_BIT)
+#define STATUS_A2_MASK (1<<STATUS_A2_BIT)
+#define STATUS_IE_MASK (STATUS_E1_MASK | STATUS_E2_MASK)
+
+/* Other Interrupt Handling related Aux regs */
+#define AUX_IRQ_LEV 0x200 /* IRQ Priority: L1 or L2 */
+#define AUX_IRQ_HINT 0x201 /* For generating Soft Interrupts */
+#define AUX_IRQ_LV12 0x43 /* interrupt level register */
+
+#define AUX_IENABLE 0x40c
+#define AUX_ITRIGGER 0x40d
+#define AUX_IPULSE 0x415
+
+#define ISA_INIT_STATUS_BITS STATUS_IE_MASK
+
+#ifndef __ASSEMBLY__
+
+/******************************************************************
+ * IRQ Control Macros
+ *
+ * All of them have "memory" clobber (compiler barrier) which is needed to
+ * ensure that LD/ST requiring irq safetly (R-M-W when LLSC is not available)
+ * are redone after IRQs are re-enabled (and gcc doesn't reuse stale register)
+ *
+ * Noted at the time of Abilis Timer List corruption
+ * Orig Bug + Rejected solution : https://lkml.org/lkml/2013/3/29/67
+ * Reasoning : https://lkml.org/lkml/2013/4/8/15
+ *
+ ******************************************************************/
+
+/*
+ * Save IRQ state and disable IRQs
+ */
+static inline long arch_local_irq_save(void)
+{
+ unsigned long temp, flags;
+
+ __asm__ __volatile__(
+ " lr %1, [status32] \n"
+ " bic %0, %1, %2 \n"
+ " and.f 0, %1, %2 \n"
+ " flag.nz %0 \n"
+ : "=r"(temp), "=r"(flags)
+ : "n"((STATUS_E1_MASK | STATUS_E2_MASK))
+ : "memory", "cc");
+
+ return flags;
+}
+
+/*
+ * restore saved IRQ state
+ */
+static inline void arch_local_irq_restore(unsigned long flags)
+{
+
+ __asm__ __volatile__(
+ " flag %0 \n"
+ :
+ : "r"(flags)
+ : "memory");
+}
+
+/*
+ * Unconditionally Enable IRQs
+ */
+extern void arch_local_irq_enable(void);
+
+/*
+ * Unconditionally Disable IRQs
+ */
+static inline void arch_local_irq_disable(void)
+{
+ unsigned long temp;
+
+ __asm__ __volatile__(
+ " lr %0, [status32] \n"
+ " and %0, %0, %1 \n"
+ " flag %0 \n"
+ : "=&r"(temp)
+ : "n"(~(STATUS_E1_MASK | STATUS_E2_MASK))
+ : "memory");
+}
+
+/*
+ * save IRQ state
+ */
+static inline long arch_local_save_flags(void)
+{
+ unsigned long temp;
+
+ __asm__ __volatile__(
+ " lr %0, [status32] \n"
+ : "=&r"(temp)
+ :
+ : "memory");
+
+ return temp;
+}
+
+/*
+ * Query IRQ state
+ */
+static inline int arch_irqs_disabled_flags(unsigned long flags)
+{
+ return !(flags & (STATUS_E1_MASK
+#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS
+ | STATUS_E2_MASK
+#endif
+ ));
+}
+
+static inline int arch_irqs_disabled(void)
+{
+ return arch_irqs_disabled_flags(arch_local_save_flags());
+}
+
+#else
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+
+.macro TRACE_ASM_IRQ_DISABLE
+ bl trace_hardirqs_off
+.endm
+
+.macro TRACE_ASM_IRQ_ENABLE
+ bl trace_hardirqs_on
+.endm
+
+#else
+
+.macro TRACE_ASM_IRQ_DISABLE
+.endm
+
+.macro TRACE_ASM_IRQ_ENABLE
+.endm
+
+#endif
+
+.macro IRQ_DISABLE scratch
+ lr \scratch, [status32]
+ bic \scratch, \scratch, (STATUS_E1_MASK | STATUS_E2_MASK)
+ flag \scratch
+ TRACE_ASM_IRQ_DISABLE
+.endm
+
+.macro IRQ_ENABLE scratch
+ lr \scratch, [status32]
+ or \scratch, \scratch, (STATUS_E1_MASK | STATUS_E2_MASK)
+ flag \scratch
+ TRACE_ASM_IRQ_ENABLE
+.endm
+
+#endif /* __ASSEMBLY__ */
+
+#endif
diff --git a/arch/arc/include/asm/irqflags.h b/arch/arc/include/asm/irqflags.h
index 27ecc6975a58..59bc6a64f75d 100644
--- a/arch/arc/include/asm/irqflags.h
+++ b/arch/arc/include/asm/irqflags.h
@@ -1,4 +1,5 @@
/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
* Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
*
* This program is free software; you can redistribute it and/or modify
@@ -9,171 +10,10 @@
#ifndef __ASM_ARC_IRQFLAGS_H
#define __ASM_ARC_IRQFLAGS_H
-/* vineetg: March 2010 : local_irq_save( ) optimisation
- * -Remove explicit mov of current status32 into reg, that is not needed
- * -Use BIC insn instead of INVERTED + AND
- * -Conditionally disable interrupts (if they are not enabled, don't disable)
-*/
-
-#include <asm/arcregs.h>
-
-/* status32 Reg bits related to Interrupt Handling */
-#define STATUS_E1_BIT 1 /* Int 1 enable */
-#define STATUS_E2_BIT 2 /* Int 2 enable */
-#define STATUS_A1_BIT 3 /* Int 1 active */
-#define STATUS_A2_BIT 4 /* Int 2 active */
-
-#define STATUS_E1_MASK (1<<STATUS_E1_BIT)
-#define STATUS_E2_MASK (1<<STATUS_E2_BIT)
-#define STATUS_A1_MASK (1<<STATUS_A1_BIT)
-#define STATUS_A2_MASK (1<<STATUS_A2_BIT)
-
-/* Other Interrupt Handling related Aux regs */
-#define AUX_IRQ_LEV 0x200 /* IRQ Priority: L1 or L2 */
-#define AUX_IRQ_HINT 0x201 /* For generating Soft Interrupts */
-#define AUX_IRQ_LV12 0x43 /* interrupt level register */
-
-#define AUX_IENABLE 0x40c
-#define AUX_ITRIGGER 0x40d
-#define AUX_IPULSE 0x415
-
-#ifndef __ASSEMBLY__
-
-/******************************************************************
- * IRQ Control Macros
- *
- * All of them have "memory" clobber (compiler barrier) which is needed to
- * ensure that LD/ST requiring irq safetly (R-M-W when LLSC is not available)
- * are redone after IRQs are re-enabled (and gcc doesn't reuse stale register)
- *
- * Noted at the time of Abilis Timer List corruption
- * Orig Bug + Rejected solution : https://lkml.org/lkml/2013/3/29/67
- * Reasoning : https://lkml.org/lkml/2013/4/8/15
- *
- ******************************************************************/
-
-/*
- * Save IRQ state and disable IRQs
- */
-static inline long arch_local_irq_save(void)
-{
- unsigned long temp, flags;
-
- __asm__ __volatile__(
- " lr %1, [status32] \n"
- " bic %0, %1, %2 \n"
- " and.f 0, %1, %2 \n"
- " flag.nz %0 \n"
- : "=r"(temp), "=r"(flags)
- : "n"((STATUS_E1_MASK | STATUS_E2_MASK))
- : "memory", "cc");
-
- return flags;
-}
-
-/*
- * restore saved IRQ state
- */
-static inline void arch_local_irq_restore(unsigned long flags)
-{
-
- __asm__ __volatile__(
- " flag %0 \n"
- :
- : "r"(flags)
- : "memory");
-}
-
-/*
- * Unconditionally Enable IRQs
- */
-extern void arch_local_irq_enable(void);
-
-/*
- * Unconditionally Disable IRQs
- */
-static inline void arch_local_irq_disable(void)
-{
- unsigned long temp;
-
- __asm__ __volatile__(
- " lr %0, [status32] \n"
- " and %0, %0, %1 \n"
- " flag %0 \n"
- : "=&r"(temp)
- : "n"(~(STATUS_E1_MASK | STATUS_E2_MASK))
- : "memory");
-}
-
-/*
- * save IRQ state
- */
-static inline long arch_local_save_flags(void)
-{
- unsigned long temp;
-
- __asm__ __volatile__(
- " lr %0, [status32] \n"
- : "=&r"(temp)
- :
- : "memory");
-
- return temp;
-}
-
-/*
- * Query IRQ state
- */
-static inline int arch_irqs_disabled_flags(unsigned long flags)
-{
- return !(flags & (STATUS_E1_MASK
-#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS
- | STATUS_E2_MASK
-#endif
- ));
-}
-
-static inline int arch_irqs_disabled(void)
-{
- return arch_irqs_disabled_flags(arch_local_save_flags());
-}
-
-#else
-
-#ifdef CONFIG_TRACE_IRQFLAGS
-
-.macro TRACE_ASM_IRQ_DISABLE
- bl trace_hardirqs_off
-.endm
-
-.macro TRACE_ASM_IRQ_ENABLE
- bl trace_hardirqs_on
-.endm
-
+#ifdef CONFIG_ISA_ARCOMPACT
+#include <asm/irqflags-compact.h>
#else
-
-.macro TRACE_ASM_IRQ_DISABLE
-.endm
-
-.macro TRACE_ASM_IRQ_ENABLE
-.endm
-
+#include <asm/irqflags-arcv2.h>
#endif
-.macro IRQ_DISABLE scratch
- lr \scratch, [status32]
- bic \scratch, \scratch, (STATUS_E1_MASK | STATUS_E2_MASK)
- flag \scratch
- TRACE_ASM_IRQ_DISABLE
-.endm
-
-.macro IRQ_ENABLE scratch
- lr \scratch, [status32]
- or \scratch, \scratch, (STATUS_E1_MASK | STATUS_E2_MASK)
- flag \scratch
- TRACE_ASM_IRQ_ENABLE
-.endm
-
-#endif /* __ASSEMBLY__ */
-
#endif
diff --git a/arch/arc/include/asm/mcip.h b/arch/arc/include/asm/mcip.h
new file mode 100644
index 000000000000..52c11f0bb0e5
--- /dev/null
+++ b/arch/arc/include/asm/mcip.h
@@ -0,0 +1,94 @@
+/*
+ * ARConnect IP Support (Multi core enabler: Cross core IPI, RTC ...)
+ *
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ASM_MCIP_H
+#define __ASM_MCIP_H
+
+#ifdef CONFIG_ISA_ARCV2
+
+#include <asm/arcregs.h>
+
+#define ARC_REG_MCIP_BCR 0x0d0
+#define ARC_REG_MCIP_CMD 0x600
+#define ARC_REG_MCIP_WDATA 0x601
+#define ARC_REG_MCIP_READBACK 0x602
+
+struct mcip_cmd {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+ unsigned int pad:8, param:16, cmd:8;
+#else
+ unsigned int cmd:8, param:16, pad:8;
+#endif
+
+#define CMD_INTRPT_GENERATE_IRQ 0x01
+#define CMD_INTRPT_GENERATE_ACK 0x02
+#define CMD_INTRPT_READ_STATUS 0x03
+#define CMD_INTRPT_CHECK_SOURCE 0x04
+
+/* Semaphore Commands */
+#define CMD_SEMA_CLAIM_AND_READ 0x11
+#define CMD_SEMA_RELEASE 0x12
+
+#define CMD_DEBUG_SET_MASK 0x34
+#define CMD_DEBUG_SET_SELECT 0x36
+
+#define CMD_GRTC_READ_LO 0x42
+#define CMD_GRTC_READ_HI 0x43
+
+#define CMD_IDU_ENABLE 0x71
+#define CMD_IDU_DISABLE 0x72
+#define CMD_IDU_SET_MODE 0x74
+#define CMD_IDU_SET_DEST 0x76
+#define CMD_IDU_SET_MASK 0x7C
+
+#define IDU_M_TRIG_LEVEL 0x0
+#define IDU_M_TRIG_EDGE 0x1
+
+#define IDU_M_DISTRI_RR 0x0
+#define IDU_M_DISTRI_DEST 0x2
+};
+
+/*
+ * MCIP programming model
+ *
+ * - Simple commands write {cmd:8,param:16} to MCIP_CMD aux reg
+ * (param could be irq, common_irq, core_id ...)
+ * - More involved commands setup MCIP_WDATA with cmd specific data
+ * before invoking the simple command
+ */
+static inline void __mcip_cmd(unsigned int cmd, unsigned int param)
+{
+ struct mcip_cmd buf;
+
+ buf.pad = 0;
+ buf.cmd = cmd;
+ buf.param = param;
+
+ WRITE_AUX(ARC_REG_MCIP_CMD, buf);
+}
+
+/*
+ * Setup additional data for a cmd
+ * Callers need to lock to ensure atomicity
+ */
+static inline void __mcip_cmd_data(unsigned int cmd, unsigned int param,
+ unsigned int data)
+{
+ write_aux_reg(ARC_REG_MCIP_WDATA, data);
+
+ __mcip_cmd(cmd, param);
+}
+
+extern void mcip_init_early_smp(void);
+extern void mcip_init_smp(unsigned int cpu);
+
+#endif
+
+#endif
diff --git a/arch/arc/include/asm/mmu.h b/arch/arc/include/asm/mmu.h
index 8c84ae98c337..0f9c3eb5327e 100644
--- a/arch/arc/include/asm/mmu.h
+++ b/arch/arc/include/asm/mmu.h
@@ -15,24 +15,41 @@
#define CONFIG_ARC_MMU_VER 2
#elif defined(CONFIG_ARC_MMU_V3)
#define CONFIG_ARC_MMU_VER 3
+#elif defined(CONFIG_ARC_MMU_V4)
+#define CONFIG_ARC_MMU_VER 4
#endif
/* MMU Management regs */
#define ARC_REG_MMU_BCR 0x06f
+#if (CONFIG_ARC_MMU_VER < 4)
#define ARC_REG_TLBPD0 0x405
#define ARC_REG_TLBPD1 0x406
#define ARC_REG_TLBINDEX 0x407
#define ARC_REG_TLBCOMMAND 0x408
#define ARC_REG_PID 0x409
#define ARC_REG_SCRATCH_DATA0 0x418
+#else
+#define ARC_REG_TLBPD0 0x460
+#define ARC_REG_TLBPD1 0x461
+#define ARC_REG_TLBINDEX 0x464
+#define ARC_REG_TLBCOMMAND 0x465
+#define ARC_REG_PID 0x468
+#define ARC_REG_SCRATCH_DATA0 0x46c
+#endif
/* Bits in MMU PID register */
-#define MMU_ENABLE (1 << 31) /* Enable MMU for process */
+#define __TLB_ENABLE (1 << 31)
+#define __PROG_ENABLE (1 << 30)
+#define MMU_ENABLE (__TLB_ENABLE | __PROG_ENABLE)
/* Error code if probe fails */
#define TLB_LKUP_ERR 0x80000000
+#if (CONFIG_ARC_MMU_VER < 4)
#define TLB_DUP_ERR (TLB_LKUP_ERR | 0x00000001)
+#else
+#define TLB_DUP_ERR (TLB_LKUP_ERR | 0x40000000)
+#endif
/* TLB Commands */
#define TLBWrite 0x1
@@ -45,6 +62,11 @@
#define TLBIVUTLB 0x6 /* explicitly inv uTLBs */
#endif
+#if (CONFIG_ARC_MMU_VER >= 4)
+#define TLBInsertEntry 0x7
+#define TLBDeleteEntry 0x8
+#endif
+
#ifndef __ASSEMBLY__
typedef struct {
diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h
index 9615fe1701c6..1281718802f7 100644
--- a/arch/arc/include/asm/pgtable.h
+++ b/arch/arc/include/asm/pgtable.h
@@ -72,8 +72,18 @@
#define _PAGE_READ (1<<3) /* Page has user read perm (H) */
#define _PAGE_ACCESSED (1<<4) /* Page is accessed (S) */
#define _PAGE_MODIFIED (1<<5) /* Page modified (dirty) (S) */
+
+#if (CONFIG_ARC_MMU_VER >= 4)
+#define _PAGE_WTHRU (1<<7) /* Page cache mode write-thru (H) */
+#endif
+
#define _PAGE_GLOBAL (1<<8) /* Page is global (H) */
#define _PAGE_PRESENT (1<<9) /* TLB entry is valid (H) */
+
+#if (CONFIG_ARC_MMU_VER >= 4)
+#define _PAGE_SZ (1<<10) /* Page Size indicator (H) */
+#endif
+
#define _PAGE_SHARED_CODE (1<<11) /* Shared Code page with cmn vaddr
usable for shared TLB entries (H) */
#endif
diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h
index 52312cb5dbe2..ee682d8e0213 100644
--- a/arch/arc/include/asm/processor.h
+++ b/arch/arc/include/asm/processor.h
@@ -77,7 +77,7 @@ struct task_struct;
*/
#define TSK_K_ESP(tsk) (tsk->thread.ksp)
-#define TSK_K_REG(tsk, off) (*((unsigned int *)(TSK_K_ESP(tsk) + \
+#define TSK_K_REG(tsk, off) (*((unsigned long *)(TSK_K_ESP(tsk) + \
sizeof(struct callee_regs) + off)))
#define TSK_K_BLINK(tsk) TSK_K_REG(tsk, 4)
@@ -100,29 +100,26 @@ extern unsigned int get_wchan(struct task_struct *p);
#endif /* !__ASSEMBLY__ */
-/* Kernels Virtual memory area.
- * Unlike other architectures(MIPS, sh, cris ) ARC 700 does not have a
- * "kernel translated" region (like KSEG2 in MIPS). So we use a upper part
- * of the translated bottom 2GB for kernel virtual memory and protect
- * these pages from user accesses by disabling Ru, Eu and Wu.
+/*
+ * System Memory Map on ARC
+ *
+ * ---------------------------- (lower 2G, Translated) -------------------------
+ * 0x0000_0000 0x5FFF_FFFF (user vaddr: TASK_SIZE)
+ * 0x6000_0000 0x6FFF_FFFF (reserved gutter between U/K)
+ * 0x7000_0000 0x7FFF_FFFF (kvaddr: vmalloc/modules/pkmap..)
+ *
+ * PAGE_OFFSET ---------------- (Upper 2G, Untranslated) -----------------------
+ * 0x8000_0000 0xBFFF_FFFF (kernel direct mapped)
+ * 0xC000_0000 0xFFFF_FFFF (peripheral uncached space)
+ * -----------------------------------------------------------------------------
*/
-#define VMALLOC_SIZE (0x10000000) /* 256M */
-#define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE)
-#define VMALLOC_END (PAGE_OFFSET)
+#define VMALLOC_START 0x70000000
+#define VMALLOC_SIZE (PAGE_OFFSET - VMALLOC_START)
+#define VMALLOC_END (VMALLOC_START + VMALLOC_SIZE)
-/* Most of the architectures seem to be keeping some kind of padding between
- * userspace TASK_SIZE and PAGE_OFFSET. i.e TASK_SIZE != PAGE_OFFSET.
- */
#define USER_KERNEL_GUTTER 0x10000000
-/* User address space:
- * On ARC700, CPU allows the entire lower half of 32 bit address space to be
- * translated. Thus potentially 2G (0:0x7FFF_FFFF) could be User vaddr space.
- * However we steal 256M for kernel addr (0x7000_0000:0x7FFF_FFFF) and another
- * 256M (0x6000_0000:0x6FFF_FFFF) is gutter between user/kernel spaces
- * Thus total User vaddr space is (0:0x5FFF_FFFF)
- */
-#define TASK_SIZE (PAGE_OFFSET - VMALLOC_SIZE - USER_KERNEL_GUTTER)
+#define TASK_SIZE (VMALLOC_START - USER_KERNEL_GUTTER)
#define STACK_TOP TASK_SIZE
#define STACK_TOP_MAX STACK_TOP
diff --git a/arch/arc/include/asm/ptrace.h b/arch/arc/include/asm/ptrace.h
index 1bfeec2c0558..91755972b9a2 100644
--- a/arch/arc/include/asm/ptrace.h
+++ b/arch/arc/include/asm/ptrace.h
@@ -16,6 +16,7 @@
/* THE pt_regs: Defines how regs are saved during entry into kernel */
+#ifdef CONFIG_ISA_ARCOMPACT
struct pt_regs {
/* Real registers */
@@ -56,6 +57,48 @@ struct pt_regs {
long user_r25;
};
+#else
+
+struct pt_regs {
+
+ long orig_r0;
+
+ union {
+ struct {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+ unsigned long state:8, ecr_vec:8,
+ ecr_cause:8, ecr_param:8;
+#else
+ unsigned long ecr_param:8, ecr_cause:8,
+ ecr_vec:8, state:8;
+#endif
+ };
+ unsigned long event;
+ };
+
+ long bta; /* bta_l1, bta_l2, erbta */
+
+ long user_r25;
+
+ long r26; /* gp */
+ long fp;
+ long sp; /* user/kernel sp depending on where we came from */
+
+ long r12;
+
+ /*------- Below list auto saved by h/w -----------*/
+ long r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11;
+
+ long blink;
+ long lp_end, lp_start, lp_count;
+
+ long ei, ldi, jli;
+
+ long ret;
+ long status32;
+};
+
+#endif
/* Callee saved registers - need to be saved only when you are scheduled out */
diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h
index b6a8c2dfbe6e..e1651df6a93d 100644
--- a/arch/arc/include/asm/spinlock.h
+++ b/arch/arc/include/asm/spinlock.h
@@ -22,24 +22,46 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
{
unsigned int tmp = __ARCH_SPIN_LOCK_LOCKED__;
+ /*
+ * This smp_mb() is technically superfluous, we only need the one
+ * after the lock for providing the ACQUIRE semantics.
+ * However doing the "right" thing was regressing hackbench
+ * so keeping this, pending further investigation
+ */
+ smp_mb();
+
__asm__ __volatile__(
"1: ex %0, [%1] \n"
" breq %0, %2, 1b \n"
: "+&r" (tmp)
: "r"(&(lock->slock)), "ir"(__ARCH_SPIN_LOCK_LOCKED__)
: "memory");
+
+ /*
+ * ACQUIRE barrier to ensure load/store after taking the lock
+ * don't "bleed-up" out of the critical section (leak-in is allowed)
+ * http://www.spinics.net/lists/kernel/msg2010409.html
+ *
+ * ARCv2 only has load-load, store-store and all-all barrier
+ * thus need the full all-all barrier
+ */
+ smp_mb();
}
static inline int arch_spin_trylock(arch_spinlock_t *lock)
{
unsigned int tmp = __ARCH_SPIN_LOCK_LOCKED__;
+ smp_mb();
+
__asm__ __volatile__(
"1: ex %0, [%1] \n"
: "+r" (tmp)
: "r"(&(lock->slock))
: "memory");
+ smp_mb();
+
return (tmp == __ARCH_SPIN_LOCK_UNLOCKED__);
}
@@ -47,12 +69,22 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
{
unsigned int tmp = __ARCH_SPIN_LOCK_UNLOCKED__;
+ /*
+ * RELEASE barrier: given the instructions avail on ARCv2, full barrier
+ * is the only option
+ */
+ smp_mb();
+
__asm__ __volatile__(
" ex %0, [%1] \n"
: "+r" (tmp)
: "r"(&(lock->slock))
: "memory");
+ /*
+ * superfluous, but keeping for now - see pairing version in
+ * arch_spin_lock above
+ */
smp_mb();
}
diff --git a/arch/arc/include/asm/thread_info.h b/arch/arc/include/asm/thread_info.h
index aca0d5a45c7b..3af67455659a 100644
--- a/arch/arc/include/asm/thread_info.h
+++ b/arch/arc/include/asm/thread_info.h
@@ -25,6 +25,7 @@
#endif
#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
+#define THREAD_SHIFT (PAGE_SHIFT << THREAD_SIZE_ORDER)
#ifndef __ASSEMBLY__
diff --git a/arch/arc/include/asm/uaccess.h b/arch/arc/include/asm/uaccess.h
index 30c9baffa96f..d1da6032b715 100644
--- a/arch/arc/include/asm/uaccess.h
+++ b/arch/arc/include/asm/uaccess.h
@@ -659,31 +659,30 @@ static inline unsigned long __arc_clear_user(void __user *to, unsigned long n)
static inline long
__arc_strncpy_from_user(char *dst, const char __user *src, long count)
{
- long res = count;
+ long res = 0;
char val;
- unsigned int hw_count;
if (count == 0)
return 0;
__asm__ __volatile__(
- " lp 2f \n"
+ " lp 3f \n"
"1: ldb.ab %3, [%2, 1] \n"
- " breq.d %3, 0, 2f \n"
+ " breq.d %3, 0, 3f \n"
" stb.ab %3, [%1, 1] \n"
- "2: sub %0, %6, %4 \n"
- "3: ;nop \n"
+ " add %0, %0, 1 # Num of NON NULL bytes copied \n"
+ "3: \n"
" .section .fixup, \"ax\" \n"
" .align 4 \n"
- "4: mov %0, %5 \n"
+ "4: mov %0, %4 # sets @res as -EFAULT \n"
" j 3b \n"
" .previous \n"
" .section __ex_table, \"a\" \n"
" .align 4 \n"
" .word 1b, 4b \n"
" .previous \n"
- : "=r"(res), "+r"(dst), "+r"(src), "=&r"(val), "=l"(hw_count)
- : "g"(-EFAULT), "ir"(count), "4"(count) /* this "4" seeds lp_count */
+ : "+r"(res), "+r"(dst), "+r"(src), "=r"(val)
+ : "g"(-EFAULT), "l"(count)
: "memory");
return res;
diff --git a/arch/arc/include/uapi/asm/page.h b/arch/arc/include/uapi/asm/page.h
index e5d41e08240c..9d129a2a1351 100644
--- a/arch/arc/include/uapi/asm/page.h
+++ b/arch/arc/include/uapi/asm/page.h
@@ -30,7 +30,7 @@
#define PAGE_OFFSET (0x80000000)
#else
#define PAGE_SIZE (1UL << PAGE_SHIFT) /* Default 8K */
-#define PAGE_OFFSET (0x80000000UL) /* Kernel starts at 2G onwards */
+#define PAGE_OFFSET (0x80000000UL) /* Kernel starts at 2G onwards */
#endif
#define PAGE_MASK (~(PAGE_SIZE-1))
diff --git a/arch/arc/kernel/Makefile b/arch/arc/kernel/Makefile
index 113f2033da9f..e7f3625a19b5 100644
--- a/arch/arc/kernel/Makefile
+++ b/arch/arc/kernel/Makefile
@@ -8,12 +8,14 @@
# Pass UTS_MACHINE for user_regset definition
CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"'
-obj-y := arcksyms.o setup.o irq.o time.o reset.o ptrace.o entry.o process.o
+obj-y := arcksyms.o setup.o irq.o time.o reset.o ptrace.o process.o devtree.o
obj-y += signal.o traps.o sys.o troubleshoot.o stacktrace.o disasm.o clk.o
-obj-y += devtree.o
+obj-$(CONFIG_ISA_ARCOMPACT) += entry-compact.o intc-compact.o
+obj-$(CONFIG_ISA_ARCV2) += entry-arcv2.o intc-arcv2.o
obj-$(CONFIG_MODULES) += arcksyms.o module.o
obj-$(CONFIG_SMP) += smp.o
+obj-$(CONFIG_ARC_MCIP) += mcip.o
obj-$(CONFIG_ARC_DW2_UNWIND) += unwind.o
obj-$(CONFIG_KPROBES) += kprobes.o
obj-$(CONFIG_ARC_EMUL_UNALIGNED) += unaligned.o
diff --git a/arch/arc/kernel/asm-offsets.c b/arch/arc/kernel/asm-offsets.c
index 6c3aa0edb9b5..ecaf34e9235c 100644
--- a/arch/arc/kernel/asm-offsets.c
+++ b/arch/arc/kernel/asm-offsets.c
@@ -37,6 +37,8 @@ int main(void)
DEFINE(TASK_ACT_MM, offsetof(struct task_struct, active_mm));
DEFINE(TASK_TGID, offsetof(struct task_struct, tgid));
+ DEFINE(TASK_PID, offsetof(struct task_struct, pid));
+ DEFINE(TASK_COMM, offsetof(struct task_struct, comm));
DEFINE(MM_CTXT, offsetof(struct mm_struct, context));
DEFINE(MM_PGD, offsetof(struct mm_struct, pgd));
@@ -56,8 +58,11 @@ int main(void)
DEFINE(PT_r5, offsetof(struct pt_regs, r5));
DEFINE(PT_r6, offsetof(struct pt_regs, r6));
DEFINE(PT_r7, offsetof(struct pt_regs, r7));
+ DEFINE(PT_ret, offsetof(struct pt_regs, ret));
DEFINE(SZ_CALLEE_REGS, sizeof(struct callee_regs));
DEFINE(SZ_PT_REGS, sizeof(struct pt_regs));
+ DEFINE(PT_user_r25, offsetof(struct pt_regs, user_r25));
+
return 0;
}
diff --git a/arch/arc/kernel/devtree.c b/arch/arc/kernel/devtree.c
index e32b54abff51..7e844fd8213f 100644
--- a/arch/arc/kernel/devtree.c
+++ b/arch/arc/kernel/devtree.c
@@ -32,6 +32,8 @@ static void __init arc_set_early_base_baud(unsigned long dt_root)
if (of_flat_dt_is_compatible(dt_root, "abilis,arc-tb10x"))
arc_base_baud = core_clk/3;
+ else if (of_flat_dt_is_compatible(dt_root, "snps,arc-sdp"))
+ arc_base_baud = 33333333; /* Fixed 33MHz clk (AXS10x) */
else
arc_base_baud = core_clk;
}
diff --git a/arch/arc/kernel/entry-arcv2.S b/arch/arc/kernel/entry-arcv2.S
new file mode 100644
index 000000000000..bd7105d3172f
--- /dev/null
+++ b/arch/arc/kernel/entry-arcv2.S
@@ -0,0 +1,239 @@
+/*
+ * ARCv2 ISA based core Low Level Intr/Traps/Exceptions(non-TLB) Handling
+ *
+ * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h> /* ARC_{EXTRY,EXIT} */
+#include <asm/entry.h> /* SAVE_ALL_{INT1,INT2,TRAP...} */
+#include <asm/errno.h>
+#include <asm/arcregs.h>
+#include <asm/irqflags.h>
+
+ .cpu HS
+
+#define VECTOR .word
+
+;############################ Vector Table #################################
+
+ .section .vector,"a",@progbits
+ .align 4
+
+# Initial 16 slots are Exception Vectors
+VECTOR stext ; Restart Vector (jump to entry point)
+VECTOR mem_service ; Mem exception
+VECTOR instr_service ; Instrn Error
+VECTOR EV_MachineCheck ; Fatal Machine check
+VECTOR EV_TLBMissI ; Intruction TLB miss
+VECTOR EV_TLBMissD ; Data TLB miss
+VECTOR EV_TLBProtV ; Protection Violation
+VECTOR EV_PrivilegeV ; Privilege Violation
+VECTOR EV_SWI ; Software Breakpoint
+VECTOR EV_Trap ; Trap exception
+VECTOR EV_Extension ; Extn Instruction Exception
+VECTOR EV_DivZero ; Divide by Zero
+VECTOR EV_DCError ; Data Cache Error
+VECTOR EV_Misaligned ; Misaligned Data Access
+VECTOR reserved ; Reserved slots
+VECTOR reserved ; Reserved slots
+
+# Begin Interrupt Vectors
+VECTOR handle_interrupt ; (16) Timer0
+VECTOR handle_interrupt ; unused (Timer1)
+VECTOR handle_interrupt ; unused (WDT)
+VECTOR handle_interrupt ; (19) ICI (inter core interrupt)
+VECTOR handle_interrupt
+VECTOR handle_interrupt
+VECTOR handle_interrupt
+VECTOR handle_interrupt ; (23) End of fixed IRQs
+
+.rept CONFIG_ARC_NUMBER_OF_INTERRUPTS - 8
+ VECTOR handle_interrupt
+.endr
+
+ .section .text, "ax",@progbits
+
+res_service: ; processor restart
+ flag 0x1 ; not implemented
+ nop
+ nop
+
+reserved: ; processor restart
+ rtie ; jump to processor initializations
+
+;##################### Interrupt Handling ##############################
+
+ENTRY(handle_interrupt)
+
+ INTERRUPT_PROLOGUE irq
+
+ clri ; To make status32.IE agree with CPU internal state
+
+ lr r0, [ICAUSE]
+
+ mov blink, ret_from_exception
+
+ b.d arch_do_IRQ
+ mov r1, sp
+
+END(handle_interrupt)
+
+;################### Non TLB Exception Handling #############################
+
+ENTRY(EV_SWI)
+ flag 1
+END(EV_SWI)
+
+ENTRY(EV_DivZero)
+ flag 1
+END(EV_DivZero)
+
+ENTRY(EV_DCError)
+ flag 1
+END(EV_DCError)
+
+ENTRY(EV_Misaligned)
+
+ EXCEPTION_PROLOGUE
+
+ lr r0, [efa] ; Faulting Data address
+ mov r1, sp
+
+ FAKE_RET_FROM_EXCPN
+
+ SAVE_CALLEE_SAVED_USER
+ mov r2, sp ; callee_regs
+
+ bl do_misaligned_access
+
+ ; TBD: optimize - do this only if a callee reg was involved
+ ; either a dst of emulated LD/ST or src with address-writeback
+ RESTORE_CALLEE_SAVED_USER
+
+ b ret_from_exception
+END(EV_Misaligned)
+
+; ---------------------------------------------
+; Protection Violation Exception Handler
+; ---------------------------------------------
+
+ENTRY(EV_TLBProtV)
+
+ EXCEPTION_PROLOGUE
+
+ lr r0, [efa] ; Faulting Data address
+ mov r1, sp ; pt_regs
+
+ FAKE_RET_FROM_EXCPN
+
+ mov blink, ret_from_exception
+ b do_page_fault
+
+END(EV_TLBProtV)
+
+; From Linux standpoint Slow Path I/D TLB Miss is same a ProtV as they
+; need to call do_page_fault().
+; ECR in pt_regs provides whether access was R/W/X
+
+.global call_do_page_fault
+.set call_do_page_fault, EV_TLBProtV
+
+;############# Common Handlers for ARCompact and ARCv2 ##############
+
+#include "entry.S"
+
+;############# Return from Intr/Excp/Trap (ARCv2 ISA Specifics) ##############
+;
+; Restore the saved sys context (common exit-path for EXCPN/IRQ/Trap)
+; IRQ shd definitely not happen between now and rtie
+; All 2 entry points to here already disable interrupts
+
+.Lrestore_regs:
+
+ ld r0, [sp, PT_status32] ; U/K mode at time of entry
+ lr r10, [AUX_IRQ_ACT]
+
+ bmsk r11, r10, 15 ; AUX_IRQ_ACT.ACTIVE
+ breq r11, 0, .Lexcept_ret ; No intr active, ret from Exception
+
+;####### Return from Intr #######
+
+debug_marker_l1:
+ bbit1.nt r0, STATUS_DE_BIT, .Lintr_ret_to_delay_slot
+
+.Lisr_ret_fast_path:
+ ; Handle special case #1: (Entry via Exception, Return via IRQ)
+ ;
+ ; Exception in U mode, preempted in kernel, Intr taken (K mode), orig
+ ; task now returning to U mode (riding the Intr)
+ ; AUX_IRQ_ACTIVE won't have U bit set (since intr in K mode), hence SP
+ ; won't be switched to correct U mode value (from AUX_SP)
+ ; So force AUX_IRQ_ACT.U for such a case
+
+ btst r0, STATUS_U_BIT ; Z flag set if K (Z clear for U)
+ bset.nz r11, r11, AUX_IRQ_ACT_BIT_U ; NZ means U
+ sr r11, [AUX_IRQ_ACT]
+
+ INTERRUPT_EPILOGUE irq
+ rtie
+
+;####### Return from Exception / pure kernel mode #######
+
+.Lexcept_ret: ; Expects r0 has PT_status32
+
+debug_marker_syscall:
+ EXCEPTION_EPILOGUE
+ rtie
+
+;####### Return from Intr to insn in delay slot #######
+
+; Handle special case #2: (Entry via Exception in Delay Slot, Return via IRQ)
+;
+; Intr returning to a Delay Slot (DS) insn
+; (since IRQ NOT allowed in DS in ARCv2, this can only happen if orig
+; entry was via Exception in DS which got preempted in kernel).
+;
+; IRQ RTIE won't reliably restore DE bit and/or BTA, needs handling
+.Lintr_ret_to_delay_slot:
+debug_marker_ds:
+
+ ld r2, [@intr_to_DE_cnt]
+ add r2, r2, 1
+ st r2, [@intr_to_DE_cnt]
+
+ ld r2, [sp, PT_ret]
+ ld r3, [sp, PT_status32]
+
+ bic r0, r3, STATUS_U_MASK|STATUS_DE_MASK|STATUS_IE_MASK|STATUS_L_MASK
+ st r0, [sp, PT_status32]
+
+ mov r1, .Lintr_ret_to_delay_slot_2
+ st r1, [sp, PT_ret]
+
+ st r2, [sp, 0]
+ st r3, [sp, 4]
+
+ b .Lisr_ret_fast_path
+
+.Lintr_ret_to_delay_slot_2:
+ sub sp, sp, SZ_PT_REGS
+ st r9, [sp, -4]
+
+ ld r9, [sp, 0]
+ sr r9, [eret]
+
+ ld r9, [sp, 4]
+ sr r9, [erstatus]
+
+ ld r9, [sp, 8]
+ sr r9, [erbta]
+
+ ld r9, [sp, -4]
+ add sp, sp, SZ_PT_REGS
+ rtie
+
+END(ret_from_exception)
diff --git a/arch/arc/kernel/entry-compact.S b/arch/arc/kernel/entry-compact.S
new file mode 100644
index 000000000000..15d457b4403a
--- /dev/null
+++ b/arch/arc/kernel/entry-compact.S
@@ -0,0 +1,393 @@
+/*
+ * Low Level Interrupts/Traps/Exceptions(non-TLB) Handling for ARCompact ISA
+ *
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * vineetg: May 2011
+ * -Userspace unaligned access emulation
+ *
+ * vineetg: Feb 2011 (ptrace low level code fixes)
+ * -traced syscall return code (r0) was not saved into pt_regs for restoring
+ * into user reg-file when traded task rets to user space.
+ * -syscalls needing arch-wrappers (mainly for passing sp as pt_regs)
+ * were not invoking post-syscall trace hook (jumping directly into
+ * ret_from_system_call)
+ *
+ * vineetg: Nov 2010:
+ * -Vector table jumps (@8 bytes) converted into branches (@4 bytes)
+ * -To maintain the slot size of 8 bytes/vector, added nop, which is
+ * not executed at runtime.
+ *
+ * vineetg: Nov 2009 (Everything needed for TIF_RESTORE_SIGMASK)
+ * -do_signal()invoked upon TIF_RESTORE_SIGMASK as well
+ * -Wrappers for sys_{,rt_}sigsuspend() nolonger needed as they don't
+ * need ptregs anymore
+ *
+ * Vineetg: Oct 2009
+ * -In a rare scenario, Process gets a Priv-V exception and gets scheduled
+ * out. Since we don't do FAKE RTIE for Priv-V, CPU excpetion state remains
+ * active (AE bit enabled). This causes a double fault for a subseq valid
+ * exception. Thus FAKE RTIE needed in low level Priv-Violation handler.
+ * Instr Error could also cause similar scenario, so same there as well.
+ *
+ * Vineetg: March 2009 (Supporting 2 levels of Interrupts)
+ *
+ * Vineetg: Aug 28th 2008: Bug #94984
+ * -Zero Overhead Loop Context shd be cleared when entering IRQ/EXcp/Trap
+ * Normally CPU does this automatically, however when doing FAKE rtie,
+ * we need to explicitly do this. The problem in macros
+ * FAKE_RET_FROM_EXCPN and FAKE_RET_FROM_EXCPN_LOCK_IRQ was that this bit
+ * was being "CLEARED" rather then "SET". Since it is Loop INHIBIT Bit,
+ * setting it and not clearing it clears ZOL context
+ *
+ * Vineetg: May 16th, 2008
+ * - r25 now contains the Current Task when in kernel
+ *
+ * Vineetg: Dec 22, 2007
+ * Minor Surgery of Low Level ISR to make it SMP safe
+ * - MMU_SCRATCH0 Reg used for freeing up r9 in Level 1 ISR
+ * - _current_task is made an array of NR_CPUS
+ * - Access of _current_task wrapped inside a macro so that if hardware
+ * team agrees for a dedicated reg, no other code is touched
+ *
+ * Amit Bhor, Rahul Trivedi, Kanika Nema, Sameer Dhavale : Codito Tech 2004
+ */
+
+#include <linux/errno.h>
+#include <linux/linkage.h> /* {EXTRY,EXIT} */
+#include <asm/entry.h>
+#include <asm/irqflags.h>
+
+ .cpu A7
+
+;############################ Vector Table #################################
+
+.macro VECTOR lbl
+#if 1 /* Just in case, build breaks */
+ j \lbl
+#else
+ b \lbl
+ nop
+#endif
+.endm
+
+ .section .vector, "ax",@progbits
+ .align 4
+
+/* Each entry in the vector table must occupy 2 words. Since it is a jump
+ * across sections (.vector to .text) we are gauranteed that 'j somewhere'
+ * will use the 'j limm' form of the intrsuction as long as somewhere is in
+ * a section other than .vector.
+ */
+
+; ********* Critical System Events **********************
+VECTOR res_service ; 0x0, Restart Vector (0x0)
+VECTOR mem_service ; 0x8, Mem exception (0x1)
+VECTOR instr_service ; 0x10, Instrn Error (0x2)
+
+; ******************** Device ISRs **********************
+#ifdef CONFIG_ARC_IRQ3_LV2
+VECTOR handle_interrupt_level2
+#else
+VECTOR handle_interrupt_level1
+#endif
+
+VECTOR handle_interrupt_level1
+
+#ifdef CONFIG_ARC_IRQ5_LV2
+VECTOR handle_interrupt_level2
+#else
+VECTOR handle_interrupt_level1
+#endif
+
+#ifdef CONFIG_ARC_IRQ6_LV2
+VECTOR handle_interrupt_level2
+#else
+VECTOR handle_interrupt_level1
+#endif
+
+.rept 25
+VECTOR handle_interrupt_level1 ; Other devices
+.endr
+
+/* FOR ARC600: timer = 0x3, uart = 0x8, emac = 0x10 */
+
+; ******************** Exceptions **********************
+VECTOR EV_MachineCheck ; 0x100, Fatal Machine check (0x20)
+VECTOR EV_TLBMissI ; 0x108, Intruction TLB miss (0x21)
+VECTOR EV_TLBMissD ; 0x110, Data TLB miss (0x22)
+VECTOR EV_TLBProtV ; 0x118, Protection Violation (0x23)
+ ; or Misaligned Access
+VECTOR EV_PrivilegeV ; 0x120, Privilege Violation (0x24)
+VECTOR EV_Trap ; 0x128, Trap exception (0x25)
+VECTOR EV_Extension ; 0x130, Extn Intruction Excp (0x26)
+
+.rept 24
+VECTOR reserved ; Reserved Exceptions
+.endr
+
+
+;##################### Scratch Mem for IRQ stack switching #############
+
+ARCFP_DATA int1_saved_reg
+ .align 32
+ .type int1_saved_reg, @object
+ .size int1_saved_reg, 4
+int1_saved_reg:
+ .zero 4
+
+/* Each Interrupt level needs its own scratch */
+#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS
+
+ARCFP_DATA int2_saved_reg
+ .type int2_saved_reg, @object
+ .size int2_saved_reg, 4
+int2_saved_reg:
+ .zero 4
+
+#endif
+
+; ---------------------------------------------
+ .section .text, "ax",@progbits
+
+res_service: ; processor restart
+ flag 0x1 ; not implemented
+ nop
+ nop
+
+reserved: ; processor restart
+ rtie ; jump to processor initializations
+
+;##################### Interrupt Handling ##############################
+
+#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS
+; ---------------------------------------------
+; Level 2 ISR: Can interrupt a Level 1 ISR
+; ---------------------------------------------
+ENTRY(handle_interrupt_level2)
+
+ INTERRUPT_PROLOGUE 2
+
+ ;------------------------------------------------------
+ ; if L2 IRQ interrupted a L1 ISR, disable preemption
+ ;------------------------------------------------------
+
+ ld r9, [sp, PT_status32] ; get statu32_l2 (saved in pt_regs)
+ bbit0 r9, STATUS_A1_BIT, 1f ; L1 not active when L2 IRQ, so normal
+
+ ; A1 is set in status32_l2
+ ; bump thread_info->preempt_count (Disable preemption)
+ GET_CURR_THR_INFO_FROM_SP r10
+ ld r9, [r10, THREAD_INFO_PREEMPT_COUNT]
+ add r9, r9, 1
+ st r9, [r10, THREAD_INFO_PREEMPT_COUNT]
+
+1:
+ ;------------------------------------------------------
+ ; setup params for Linux common ISR and invoke it
+ ;------------------------------------------------------
+ lr r0, [icause2]
+ and r0, r0, 0x1f
+
+ bl.d @arch_do_IRQ
+ mov r1, sp
+
+ mov r8,0x2
+ sr r8, [AUX_IRQ_LV12] ; clear bit in Sticky Status Reg
+
+ b ret_from_exception
+
+END(handle_interrupt_level2)
+
+#endif
+
+; ---------------------------------------------
+; Level 1 ISR
+; ---------------------------------------------
+ENTRY(handle_interrupt_level1)
+
+ INTERRUPT_PROLOGUE 1
+
+ lr r0, [icause1]
+ and r0, r0, 0x1f
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+ ; icause1 needs to be read early, before calling tracing, which
+ ; can clobber scratch regs, hence use of stack to stash it
+ push r0
+ TRACE_ASM_IRQ_DISABLE
+ pop r0
+#endif
+
+ bl.d @arch_do_IRQ
+ mov r1, sp
+
+ mov r8,0x1
+ sr r8, [AUX_IRQ_LV12] ; clear bit in Sticky Status Reg
+
+ b ret_from_exception
+END(handle_interrupt_level1)
+
+;################### Non TLB Exception Handling #############################
+
+; ---------------------------------------------
+; Protection Violation Exception Handler
+; ---------------------------------------------
+
+ENTRY(EV_TLBProtV)
+
+ EXCEPTION_PROLOGUE
+
+ lr r2, [ecr]
+ lr r0, [efa] ; Faulting Data address (not part of pt_regs saved above)
+
+ ; Exception auto-disables further Intr/exceptions.
+ ; Re-enable them by pretending to return from exception
+ ; (so rest of handler executes in pure K mode)
+
+ FAKE_RET_FROM_EXCPN
+
+ mov r1, sp ; Handle to pt_regs
+
+ ;------ (5) Type of Protection Violation? ----------
+ ;
+ ; ProtV Hardware Exception is triggered for Access Faults of 2 types
+ ; -Access Violaton : 00_23_(00|01|02|03)_00
+ ; x r w r+w
+ ; -Unaligned Access : 00_23_04_00
+ ;
+ bbit1 r2, ECR_C_BIT_PROTV_MISALIG_DATA, 4f
+
+ ;========= (6a) Access Violation Processing ========
+ bl do_page_fault
+ b ret_from_exception
+
+ ;========== (6b) Non aligned access ============
+4:
+
+ SAVE_CALLEE_SAVED_USER
+ mov r2, sp ; callee_regs
+
+ bl do_misaligned_access
+
+ ; TBD: optimize - do this only if a callee reg was involved
+ ; either a dst of emulated LD/ST or src with address-writeback
+ RESTORE_CALLEE_SAVED_USER
+
+ b ret_from_exception
+
+END(EV_TLBProtV)
+
+; Wrapper for Linux page fault handler called from EV_TLBMiss*
+; Very similar to ProtV handler case (6a) above, but avoids the extra checks
+; for Misaligned access
+;
+ENTRY(call_do_page_fault)
+
+ EXCEPTION_PROLOGUE
+ lr r0, [efa] ; Faulting Data address
+ mov r1, sp
+ FAKE_RET_FROM_EXCPN
+
+ mov blink, ret_from_exception
+ b do_page_fault
+
+END(call_do_page_fault)
+
+;############# Common Handlers for ARCompact and ARCv2 ##############
+
+#include "entry.S"
+
+;############# Return from Intr/Excp/Trap (ARC Specifics) ##############
+;
+; Restore the saved sys context (common exit-path for EXCPN/IRQ/Trap)
+; IRQ shd definitely not happen between now and rtie
+; All 2 entry points to here already disable interrupts
+
+.Lrestore_regs:
+
+ TRACE_ASM_IRQ_ENABLE
+
+ lr r10, [status32]
+
+ ; Restore REG File. In case multiple Events outstanding,
+ ; use the same priorty as rtie: EXCPN, L2 IRQ, L1 IRQ, None
+ ; Note that we use realtime STATUS32 (not pt_regs->status32) to
+ ; decide that.
+
+ ; if Returning from Exception
+ btst r10, STATUS_AE_BIT
+ bnz .Lexcep_ret
+
+ ; Not Exception so maybe Interrupts (Level 1 or 2)
+
+#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS
+
+ ; Level 2 interrupt return Path - from hardware standpoint
+ bbit0 r10, STATUS_A2_BIT, not_level2_interrupt
+
+ ;------------------------------------------------------------------
+ ; However the context returning might not have taken L2 intr itself
+ ; e.g. Task'A' user-code -> L2 intr -> schedule -> 'B' user-code ret
+ ; Special considerations needed for the context which took L2 intr
+
+ ld r9, [sp, PT_event] ; Ensure this is L2 intr context
+ brne r9, event_IRQ2, 149f
+
+ ;------------------------------------------------------------------
+ ; if L2 IRQ interrupted an L1 ISR, we'd disabled preemption earlier
+ ; so that sched doesn't move to new task, causing L1 to be delayed
+ ; undeterministically. Now that we've achieved that, let's reset
+ ; things to what they were, before returning from L2 context
+ ;----------------------------------------------------------------
+
+ ld r9, [sp, PT_status32] ; get statu32_l2 (saved in pt_regs)
+ bbit0 r9, STATUS_A1_BIT, 149f ; L1 not active when L2 IRQ, so normal
+
+ ; decrement thread_info->preempt_count (re-enable preemption)
+ GET_CURR_THR_INFO_FROM_SP r10
+ ld r9, [r10, THREAD_INFO_PREEMPT_COUNT]
+
+ ; paranoid check, given A1 was active when A2 happened, preempt count
+ ; must not be 0 because we would have incremented it.
+ ; If this does happen we simply HALT as it means a BUG !!!
+ cmp r9, 0
+ bnz 2f
+ flag 1
+
+2:
+ sub r9, r9, 1
+ st r9, [r10, THREAD_INFO_PREEMPT_COUNT]
+
+149:
+ ;return from level 2
+ INTERRUPT_EPILOGUE 2
+debug_marker_l2:
+ rtie
+
+not_level2_interrupt:
+
+#endif
+
+ bbit0 r10, STATUS_A1_BIT, .Lpure_k_mode_ret
+
+ ;return from level 1
+ INTERRUPT_EPILOGUE 1
+debug_marker_l1:
+ rtie
+
+.Lexcep_ret:
+.Lpure_k_mode_ret:
+
+ ;this case is for syscalls or Exceptions or pure kernel mode
+
+ EXCEPTION_EPILOGUE
+debug_marker_syscall:
+ rtie
+
+END(ret_from_exception)
diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S
index d868289c5a26..f7a82fd4d601 100644
--- a/arch/arc/kernel/entry.S
+++ b/arch/arc/kernel/entry.S
@@ -1,60 +1,13 @@
/*
- * Low Level Interrupts/Traps/Exceptions(non-TLB) Handling for ARC
+ * Common Low Level Interrupts/Traps/Exceptions(non-TLB) Handling for ARC
+ * (included from entry-<isa>.S
*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
* Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
- *
- * vineetg: May 2011
- * -Userspace unaligned access emulation
- *
- * vineetg: Feb 2011 (ptrace low level code fixes)
- * -traced syscall return code (r0) was not saved into pt_regs for restoring
- * into user reg-file when traded task rets to user space.
- * -syscalls needing arch-wrappers (mainly for passing sp as pt_regs)
- * were not invoking post-syscall trace hook (jumping directly into
- * ret_from_system_call)
- *
- * vineetg: Nov 2010:
- * -Vector table jumps (@8 bytes) converted into branches (@4 bytes)
- * -To maintain the slot size of 8 bytes/vector, added nop, which is
- * not executed at runtime.
- *
- * vineetg: Nov 2009 (Everything needed for TIF_RESTORE_SIGMASK)
- * -do_signal()invoked upon TIF_RESTORE_SIGMASK as well
- * -Wrappers for sys_{,rt_}sigsuspend() nolonger needed as they don't
- * need ptregs anymore
- *
- * Vineetg: Oct 2009
- * -In a rare scenario, Process gets a Priv-V exception and gets scheduled
- * out. Since we don't do FAKE RTIE for Priv-V, CPU excpetion state remains
- * active (AE bit enabled). This causes a double fault for a subseq valid
- * exception. Thus FAKE RTIE needed in low level Priv-Violation handler.
- * Instr Error could also cause similar scenario, so same there as well.
- *
- * Vineetg: March 2009 (Supporting 2 levels of Interrupts)
- *
- * Vineetg: Aug 28th 2008: Bug #94984
- * -Zero Overhead Loop Context shd be cleared when entering IRQ/EXcp/Trap
- * Normally CPU does this automatically, however when doing FAKE rtie,
- * we need to explicitly do this. The problem in macros
- * FAKE_RET_FROM_EXCPN and FAKE_RET_FROM_EXCPN_LOCK_IRQ was that this bit
- * was being "CLEARED" rather then "SET". Since it is Loop INHIBIT Bit,
- * setting it and not clearing it clears ZOL context
- *
- * Vineetg: May 16th, 2008
- * - r25 now contains the Current Task when in kernel
- *
- * Vineetg: Dec 22, 2007
- * Minor Surgery of Low Level ISR to make it SMP safe
- * - MMU_SCRATCH0 Reg used for freeing up r9 in Level 1 ISR
- * - _current_task is made an array of NR_CPUS
- * - Access of _current_task wrapped inside a macro so that if hardware
- * team agrees for a dedicated reg, no other code is touched
- *
- * Amit Bhor, Rahul Trivedi, Kanika Nema, Sameer Dhavale : Codito Tech 2004
*/
/*------------------------------------------------------------------
@@ -67,206 +20,59 @@
* Global Pointer (gp) r26
* Frame Pointer (fp) r27
* Stack Pointer (sp) r28
- * Interrupt link register (ilink1) r29
- * Interrupt link register (ilink2) r30
* Branch link register (blink) r31
*------------------------------------------------------------------
*/
- .cpu A7
-
-;############################ Vector Table #################################
-
-.macro VECTOR lbl
-#if 1 /* Just in case, build breaks */
- j \lbl
-#else
- b \lbl
- nop
-#endif
-.endm
-
- .section .vector, "ax",@progbits
- .align 4
-
-/* Each entry in the vector table must occupy 2 words. Since it is a jump
- * across sections (.vector to .text) we are gauranteed that 'j somewhere'
- * will use the 'j limm' form of the intrsuction as long as somewhere is in
- * a section other than .vector.
- */
-
-; ********* Critical System Events **********************
-VECTOR res_service ; 0x0, Restart Vector (0x0)
-VECTOR mem_service ; 0x8, Mem exception (0x1)
-VECTOR instr_service ; 0x10, Instrn Error (0x2)
-
-; ******************** Device ISRs **********************
-#ifdef CONFIG_ARC_IRQ3_LV2
-VECTOR handle_interrupt_level2
-#else
-VECTOR handle_interrupt_level1
-#endif
-
-VECTOR handle_interrupt_level1
-
-#ifdef CONFIG_ARC_IRQ5_LV2
-VECTOR handle_interrupt_level2
-#else
-VECTOR handle_interrupt_level1
-#endif
-
-#ifdef CONFIG_ARC_IRQ6_LV2
-VECTOR handle_interrupt_level2
-#else
-VECTOR handle_interrupt_level1
-#endif
-
-.rept 25
-VECTOR handle_interrupt_level1 ; Other devices
-.endr
-
-/* FOR ARC600: timer = 0x3, uart = 0x8, emac = 0x10 */
-
-; ******************** Exceptions **********************
-VECTOR EV_MachineCheck ; 0x100, Fatal Machine check (0x20)
-VECTOR EV_TLBMissI ; 0x108, Intruction TLB miss (0x21)
-VECTOR EV_TLBMissD ; 0x110, Data TLB miss (0x22)
-VECTOR EV_TLBProtV ; 0x118, Protection Violation (0x23)
- ; or Misaligned Access
-VECTOR EV_PrivilegeV ; 0x120, Privilege Violation (0x24)
-VECTOR EV_Trap ; 0x128, Trap exception (0x25)
-VECTOR EV_Extension ; 0x130, Extn Intruction Excp (0x26)
-
-.rept 24
-VECTOR reserved ; Reserved Exceptions
-.endr
-
-#include <linux/linkage.h> /* {EXTRY,EXIT} */
-#include <asm/entry.h> /* SAVE_ALL_{INT1,INT2,SYS...} */
-#include <asm/errno.h>
-#include <asm/arcregs.h>
-#include <asm/irqflags.h>
-
-;##################### Scratch Mem for IRQ stack switching #############
-
-ARCFP_DATA int1_saved_reg
- .align 32
- .type int1_saved_reg, @object
- .size int1_saved_reg, 4
-int1_saved_reg:
- .zero 4
-
-/* Each Interrupt level needs its own scratch */
-#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS
-
-ARCFP_DATA int2_saved_reg
- .type int2_saved_reg, @object
- .size int2_saved_reg, 4
-int2_saved_reg:
- .zero 4
-
-#endif
-
-; ---------------------------------------------
- .section .text, "ax",@progbits
-
-res_service: ; processor restart
- flag 0x1 ; not implemented
- nop
- nop
-
-reserved: ; processor restart
- rtie ; jump to processor initializations
-
-;##################### Interrupt Handling ##############################
-
-#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS
-; ---------------------------------------------
-; Level 2 ISR: Can interrupt a Level 1 ISR
-; ---------------------------------------------
-ENTRY(handle_interrupt_level2)
+;################### Special Sys Call Wrappers ##########################
- ; TODO-vineetg for SMP this wont work
- ; free up r9 as scratchpad
- st r9, [@int2_saved_reg]
+ENTRY(sys_clone_wrapper)
+ SAVE_CALLEE_SAVED_USER
+ bl @sys_clone
+ DISCARD_CALLEE_SAVED_USER
- ;Which mode (user/kernel) was the system in when intr occured
- lr r9, [status32_l2]
+ GET_CURR_THR_INFO_FLAGS r10
+ btst r10, TIF_SYSCALL_TRACE
+ bnz tracesys_exit
- SWITCH_TO_KERNEL_STK
- SAVE_ALL_INT2
+ b ret_from_system_call
+END(sys_clone_wrapper)
- ;------------------------------------------------------
- ; if L2 IRQ interrupted a L1 ISR, disable preemption
- ;------------------------------------------------------
+ENTRY(ret_from_fork)
+ ; when the forked child comes here from the __switch_to function
+ ; r0 has the last task pointer.
+ ; put last task in scheduler queue
+ bl @schedule_tail
- ld r9, [sp, PT_status32] ; get statu32_l2 (saved in pt_regs)
- bbit0 r9, STATUS_A1_BIT, 1f ; L1 not active when L2 IRQ, so normal
+ ld r9, [sp, PT_status32]
+ brne r9, 0, 1f
- ; A1 is set in status32_l2
- ; bump thread_info->preempt_count (Disable preemption)
- GET_CURR_THR_INFO_FROM_SP r10
- ld r9, [r10, THREAD_INFO_PREEMPT_COUNT]
- add r9, r9, 1
- st r9, [r10, THREAD_INFO_PREEMPT_COUNT]
+ jl.d [r14] ; kernel thread entry point
+ mov r0, r13 ; (see PF_KTHREAD block in copy_thread)
1:
- ;------------------------------------------------------
- ; setup params for Linux common ISR and invoke it
- ;------------------------------------------------------
- lr r0, [icause2]
- and r0, r0, 0x1f
-
- bl.d @arch_do_IRQ
- mov r1, sp
-
- mov r8,0x2
- sr r8, [AUX_IRQ_LV12] ; clear bit in Sticky Status Reg
-
- b ret_from_exception
-
-END(handle_interrupt_level2)
-
-#endif
-
-; ---------------------------------------------
-; Level 1 ISR
-; ---------------------------------------------
-ENTRY(handle_interrupt_level1)
-
- /* free up r9 as scratchpad */
-#ifdef CONFIG_SMP
- sr r9, [ARC_REG_SCRATCH_DATA0]
-#else
- st r9, [@int1_saved_reg]
-#endif
-
- ;Which mode (user/kernel) was the system in when intr occured
- lr r9, [status32_l1]
-
- SWITCH_TO_KERNEL_STK
- SAVE_ALL_INT1
+ ; Return to user space
+ ; 1. Any forked task (Reach here via BRne above)
+ ; 2. First ever init task (Reach here via return from JL above)
+ ; This is the historic "kernel_execve" use-case, to return to init
+ ; user mode, in a round about way since that is always done from
+ ; a kernel thread which is executed via JL above but always returns
+ ; out whenever kernel_execve (now inline do_fork()) is involved
+ b ret_from_exception
+END(ret_from_fork)
- lr r0, [icause1]
- and r0, r0, 0x1f
+#ifdef CONFIG_ARC_DW2_UNWIND
+; Workaround for bug 94179 (STAR ):
+; Despite -fasynchronous-unwind-tables, linker is not making dwarf2 unwinder
+; section (.debug_frame) as loadable. So we force it here.
+; This also fixes STAR 9000487933 where the prev-workaround (objcopy --setflag)
+; would not work after a clean build due to kernel build system dependencies.
+.section .debug_frame, "wa",@progbits
-#ifdef CONFIG_TRACE_IRQFLAGS
- ; icause1 needs to be read early, before calling tracing, which
- ; can clobber scratch regs, hence use of stack to stash it
- push r0
- TRACE_ASM_IRQ_DISABLE
- pop r0
+; Reset to .text as this file is included in entry-<isa>.S
+.section .text, "ax",@progbits
#endif
- bl.d @arch_do_IRQ
- mov r1, sp
-
- mov r8,0x1
- sr r8, [AUX_IRQ_LV12] ; clear bit in Sticky Status Reg
-
- b ret_from_exception
-END(handle_interrupt_level1)
-
;################### Non TLB Exception Handling #############################
; ---------------------------------------------
@@ -280,7 +86,7 @@ ENTRY(instr_service)
lr r0, [efa]
mov r1, sp
- FAKE_RET_FROM_EXCPN r9
+ FAKE_RET_FROM_EXCPN
bl do_insterror_or_kprobe
b ret_from_exception
@@ -297,7 +103,7 @@ ENTRY(mem_service)
lr r0, [efa]
mov r1, sp
- FAKE_RET_FROM_EXCPN r9
+ FAKE_RET_FROM_EXCPN
bl do_memory_error
b ret_from_exception
@@ -334,60 +140,6 @@ ENTRY(EV_MachineCheck)
END(EV_MachineCheck)
; ---------------------------------------------
-; Protection Violation Exception Handler
-; ---------------------------------------------
-
-ENTRY(EV_TLBProtV)
-
- EXCEPTION_PROLOGUE
-
- ;---------(3) Save some more regs-----------------
- ; vineetg: Mar 6th: Random Seg Fault issue #1
- ; ecr and efa were not saved in case an Intr sneaks in
- ; after fake rtie
-
- lr r2, [ecr]
- lr r0, [efa] ; Faulting Data address
-
- ; --------(4) Return from CPU Exception Mode ---------
- ; Fake a rtie, but rtie to next label
- ; That way, subsequently, do_page_fault ( ) executes in pure kernel
- ; mode with further Exceptions enabled
-
- FAKE_RET_FROM_EXCPN r9
-
- mov r1, sp
-
- ;------ (5) Type of Protection Violation? ----------
- ;
- ; ProtV Hardware Exception is triggered for Access Faults of 2 types
- ; -Access Violaton : 00_23_(00|01|02|03)_00
- ; x r w r+w
- ; -Unaligned Access : 00_23_04_00
- ;
- bbit1 r2, ECR_C_BIT_PROTV_MISALIG_DATA, 4f
-
- ;========= (6a) Access Violation Processing ========
- bl do_page_fault
- b ret_from_exception
-
- ;========== (6b) Non aligned access ============
-4:
-
- SAVE_CALLEE_SAVED_USER
- mov r2, sp ; callee_regs
-
- bl do_misaligned_access
-
- ; TBD: optimize - do this only if a callee reg was involved
- ; either a dst of emulated LD/ST or src with address-writeback
- RESTORE_CALLEE_SAVED_USER
-
- b ret_from_exception
-
-END(EV_TLBProtV)
-
-; ---------------------------------------------
; Privilege Violation Exception Handler
; ---------------------------------------------
ENTRY(EV_PrivilegeV)
@@ -397,7 +149,7 @@ ENTRY(EV_PrivilegeV)
lr r0, [efa]
mov r1, sp
- FAKE_RET_FROM_EXCPN r9
+ FAKE_RET_FROM_EXCPN
bl do_privilege_fault
b ret_from_exception
@@ -413,14 +165,17 @@ ENTRY(EV_Extension)
lr r0, [efa]
mov r1, sp
- FAKE_RET_FROM_EXCPN r9
+ FAKE_RET_FROM_EXCPN
bl do_extension_fault
b ret_from_exception
END(EV_Extension)
-;######################### System Call Tracing #########################
+;################ Trap Handling (Syscall, Breakpoint) ##################
+; ---------------------------------------------
+; syscall Tracing
+; ---------------------------------------------
tracesys:
; save EFA in case tracer wants the PC of traced task
; using ERET won't work since next-PC has already committed
@@ -463,10 +218,9 @@ tracesys_exit:
b ret_from_exception ; NOT ret_from_system_call at is saves r0 which
; we'd done before calling post hook above
-;################### Break Point TRAP ##########################
-
- ; ======= (5b) Trap is due to Break-Point =========
-
+; ---------------------------------------------
+; Breakpoint TRAP
+; ---------------------------------------------
trap_with_param:
; stop_pc info by gdb needs this info
@@ -475,7 +229,7 @@ trap_with_param:
; Now that we have read EFA, it is safe to do "fake" rtie
; and get out of CPU exception mode
- FAKE_RET_FROM_EXCPN r11
+ FAKE_RET_FROM_EXCPN
; Save callee regs in case gdb wants to have a look
; SP will grow up by size of CALLEE Reg-File
@@ -494,37 +248,33 @@ trap_with_param:
b ret_from_exception
-;##################### Trap Handling ##############################
-;
-; EV_Trap caused by TRAP_S and TRAP0 instructions.
-;------------------------------------------------------------------
-; (1) System Calls
-; :parameters in r0-r7.
-; :r8 has the system call number
-; (2) Break Points
-;------------------------------------------------------------------
+; ---------------------------------------------
+; syscall TRAP
+; ABI: (r0-r7) upto 8 args, (r8) syscall number
+; ---------------------------------------------
ENTRY(EV_Trap)
EXCEPTION_PROLOGUE
- ;------- (4) What caused the Trap --------------
- lr r12, [ecr]
- bmsk.f 0, r12, 7
+ ;============ TRAP 1 :breakpoints
+ ; Check ECR for trap with arg (PROLOGUE ensures r9 has ECR)
+ bmsk.f 0, r9, 7
bnz trap_with_param
- ; ======= (5a) Trap is due to System Call ========
+ ;============ TRAP (no param): syscall top level
- ; Before doing anything, return from CPU Exception Mode
- FAKE_RET_FROM_EXCPN r11
+ ; First return from Exception to pure K mode (Exception/IRQs renabled)
+ FAKE_RET_FROM_EXCPN
- ; If syscall tracing ongoing, invoke pre-pos-hooks
+ ; If syscall tracing ongoing, invoke pre-post-hooks
GET_CURR_THR_INFO_FLAGS r10
btst r10, TIF_SYSCALL_TRACE
bnz tracesys ; this never comes back
- ;============ This is normal System Call case ==========
- ; Sys-call num shd not exceed the total system calls avail
+ ;============ Normal syscall case
+
+ ; syscall num shd not exceed the total system calls avail
cmp r8, NR_syscalls
mov.hi r0, -ENOSYS
bhi ret_from_system_call
@@ -565,7 +315,7 @@ resume_user_mode_begin:
; Fast Path return to user mode if no pending work
GET_CURR_THR_INFO_FLAGS r9
and.f 0, r9, _TIF_WORK_MASK
- bz restore_regs
+ bz .Lrestore_regs
; --- (Slow Path #1) task preemption ---
bbit0 r9, TIF_NEED_RESCHED, .Lchk_pend_signals
@@ -624,11 +374,11 @@ resume_kernel_mode:
; Can't preempt if preemption disabled
GET_CURR_THR_INFO_FROM_SP r10
ld r8, [r10, THREAD_INFO_PREEMPT_COUNT]
- brne r8, 0, restore_regs
+ brne r8, 0, .Lrestore_regs
; check if this task's NEED_RESCHED flag set
ld r9, [r10, THREAD_INFO_FLAGS]
- bbit0 r9, TIF_NEED_RESCHED, restore_regs
+ bbit0 r9, TIF_NEED_RESCHED, .Lrestore_regs
; Invoke PREEMPTION
bl preempt_schedule_irq
@@ -636,142 +386,7 @@ resume_kernel_mode:
; preempt_schedule_irq() always returns with IRQ disabled
#endif
- ; fall through
-
-;############# Return from Intr/Excp/Trap (ARC Specifics) ##############
-;
-; Restore the saved sys context (common exit-path for EXCPN/IRQ/Trap)
-; IRQ shd definitely not happen between now and rtie
-; All 2 entry points to here already disable interrupts
-
-restore_regs :
-
- TRACE_ASM_IRQ_ENABLE
-
- lr r10, [status32]
-
- ; Restore REG File. In case multiple Events outstanding,
- ; use the same priorty as rtie: EXCPN, L2 IRQ, L1 IRQ, None
- ; Note that we use realtime STATUS32 (not pt_regs->status32) to
- ; decide that.
-
- ; if Returning from Exception
- bbit0 r10, STATUS_AE_BIT, not_exception
- RESTORE_ALL_SYS
- rtie
-
- ; Not Exception so maybe Interrupts (Level 1 or 2)
-
-not_exception:
-
-#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS
-
- ; Level 2 interrupt return Path - from hardware standpoint
- bbit0 r10, STATUS_A2_BIT, not_level2_interrupt
-
- ;------------------------------------------------------------------
- ; However the context returning might not have taken L2 intr itself
- ; e.g. Task'A' user-code -> L2 intr -> schedule -> 'B' user-code ret
- ; Special considerations needed for the context which took L2 intr
-
- ld r9, [sp, PT_event] ; Ensure this is L2 intr context
- brne r9, event_IRQ2, 149f
-
- ;------------------------------------------------------------------
- ; if L2 IRQ interrupted an L1 ISR, we'd disabled preemption earlier
- ; so that sched doesn't move to new task, causing L1 to be delayed
- ; undeterministically. Now that we've achieved that, let's reset
- ; things to what they were, before returning from L2 context
- ;----------------------------------------------------------------
-
- ld r9, [sp, PT_status32] ; get statu32_l2 (saved in pt_regs)
- bbit0 r9, STATUS_A1_BIT, 149f ; L1 not active when L2 IRQ, so normal
-
- ; decrement thread_info->preempt_count (re-enable preemption)
- GET_CURR_THR_INFO_FROM_SP r10
- ld r9, [r10, THREAD_INFO_PREEMPT_COUNT]
-
- ; paranoid check, given A1 was active when A2 happened, preempt count
- ; must not be 0 because we would have incremented it.
- ; If this does happen we simply HALT as it means a BUG !!!
- cmp r9, 0
- bnz 2f
- flag 1
-
-2:
- sub r9, r9, 1
- st r9, [r10, THREAD_INFO_PREEMPT_COUNT]
-
-149:
- ;return from level 2
- RESTORE_ALL_INT2
-debug_marker_l2:
- rtie
-
-not_level2_interrupt:
-
-#endif
-
- bbit0 r10, STATUS_A1_BIT, not_level1_interrupt
+ b .Lrestore_regs
- ;return from level 1
+##### DONT ADD CODE HERE - .Lrestore_regs actually follows in entry-<isa>.S
- RESTORE_ALL_INT1
-debug_marker_l1:
- rtie
-
-not_level1_interrupt:
-
- ;this case is for syscalls or Exceptions (with fake rtie)
-
- RESTORE_ALL_SYS
-debug_marker_syscall:
- rtie
-
-END(ret_from_exception)
-
-ENTRY(ret_from_fork)
- ; when the forked child comes here from the __switch_to function
- ; r0 has the last task pointer.
- ; put last task in scheduler queue
- bl @schedule_tail
-
- ld r9, [sp, PT_status32]
- brne r9, 0, 1f
-
- jl.d [r14] ; kernel thread entry point
- mov r0, r13 ; (see PF_KTHREAD block in copy_thread)
-
-1:
- ; Return to user space
- ; 1. Any forked task (Reach here via BRne above)
- ; 2. First ever init task (Reach here via return from JL above)
- ; This is the historic "kernel_execve" use-case, to return to init
- ; user mode, in a round about way since that is always done from
- ; a kernel thread which is executed via JL above but always returns
- ; out whenever kernel_execve (now inline do_fork()) is involved
- b ret_from_exception
-END(ret_from_fork)
-
-;################### Special Sys Call Wrappers ##########################
-
-ENTRY(sys_clone_wrapper)
- SAVE_CALLEE_SAVED_USER
- bl @sys_clone
- DISCARD_CALLEE_SAVED_USER
-
- GET_CURR_THR_INFO_FLAGS r10
- btst r10, TIF_SYSCALL_TRACE
- bnz tracesys_exit
-
- b ret_from_system_call
-END(sys_clone_wrapper)
-
-#ifdef CONFIG_ARC_DW2_UNWIND
-; Workaround for bug 94179 (STAR ):
-; Despite -fasynchronous-unwind-tables, linker is not making dwarf2 unwinder
-; section (.debug_frame) as loadable. So we force it here.
-; This also fixes STAR 9000487933 where the prev-workaround (objcopy --setflag)
-; would not work after a clean build due to kernel build system dependencies.
-.section .debug_frame, "wa",@progbits
-#endif
diff --git a/arch/arc/kernel/head.S b/arch/arc/kernel/head.S
index b0e8666fdccc..812f95e6ae69 100644
--- a/arch/arc/kernel/head.S
+++ b/arch/arc/kernel/head.S
@@ -49,8 +49,6 @@
1:
.endm
- .cpu A7
-
.section .init.text, "ax",@progbits
.type stext, @function
.globl stext
@@ -83,6 +81,7 @@ stext:
st.ab 0, [r5, 4]
1:
+#ifdef CONFIG_ARC_UBOOT_SUPPORT
; Uboot - kernel ABI
; r0 = [0] No uboot interaction, [1] cmdline in r2, [2] DTB in r2
; r1 = magic number (board identity, unused as of now
@@ -90,6 +89,7 @@ stext:
; These are handled later in setup_arch()
st r0, [@uboot_tag]
st r2, [@uboot_arg]
+#endif
; setup "current" tsk and optionally cache it in dedicated r25
mov r9, @init_task
diff --git a/arch/arc/kernel/intc-arcv2.c b/arch/arc/kernel/intc-arcv2.c
new file mode 100644
index 000000000000..6208c630abed
--- /dev/null
+++ b/arch/arc/kernel/intc-arcv2.c
@@ -0,0 +1,143 @@
+/*
+ * Copyright (C) 2014 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/irqdomain.h>
+#include <linux/irqchip.h>
+#include "../../drivers/irqchip/irqchip.h"
+#include <asm/irq.h>
+
+/*
+ * Early Hardware specific Interrupt setup
+ * -Called very early (start_kernel -> setup_arch -> setup_processor)
+ * -Platform Independent (must for any ARC Core)
+ * -Needed for each CPU (hence not foldable into init_IRQ)
+ */
+void arc_init_IRQ(void)
+{
+ unsigned int tmp;
+
+ struct aux_irq_ctrl {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+ unsigned int res3:18, save_idx_regs:1, res2:1,
+ save_u_to_u:1, save_lp_regs:1, save_blink:1,
+ res:4, save_nr_gpr_pairs:5;
+#else
+ unsigned int save_nr_gpr_pairs:5, res:4,
+ save_blink:1, save_lp_regs:1, save_u_to_u:1,
+ res2:1, save_idx_regs:1, res3:18;
+#endif
+ } ictrl;
+
+ *(unsigned int *)&ictrl = 0;
+
+ ictrl.save_nr_gpr_pairs = 6; /* r0 to r11 (r12 saved manually) */
+ ictrl.save_blink = 1;
+ ictrl.save_lp_regs = 1; /* LP_COUNT, LP_START, LP_END */
+ ictrl.save_u_to_u = 0; /* user ctxt saved on kernel stack */
+ ictrl.save_idx_regs = 1; /* JLI, LDI, EI */
+
+ WRITE_AUX(AUX_IRQ_CTRL, ictrl);
+
+ /* setup status32, don't enable intr yet as kernel doesn't want */
+ tmp = read_aux_reg(0xa);
+ tmp |= ISA_INIT_STATUS_BITS;
+ tmp &= ~STATUS_IE_MASK;
+ asm volatile("flag %0 \n"::"r"(tmp));
+
+ /*
+ * ARCv2 core intc provides multiple interrupt priorities (upto 16).
+ * Typical builds though have only two levels (0-high, 1-low)
+ * Linux by default uses lower prio 1 for most irqs, reserving 0 for
+ * NMI style interrupts in future (say perf)
+ *
+ * Read the intc BCR to confirm that Linux default priority is avail
+ * in h/w
+ *
+ * Note:
+ * IRQ_BCR[27..24] contains N-1 (for N priority levels) and prio level
+ * is 0 based.
+ */
+ tmp = (read_aux_reg(ARC_REG_IRQ_BCR) >> 24 ) & 0xF;
+ if (ARCV2_IRQ_DEF_PRIO > tmp)
+ panic("Linux default irq prio incorrect\n");
+}
+
+static void arcv2_irq_mask(struct irq_data *data)
+{
+ write_aux_reg(AUX_IRQ_SELECT, data->irq);
+ write_aux_reg(AUX_IRQ_ENABLE, 0);
+}
+
+static void arcv2_irq_unmask(struct irq_data *data)
+{
+ write_aux_reg(AUX_IRQ_SELECT, data->irq);
+ write_aux_reg(AUX_IRQ_ENABLE, 1);
+}
+
+void arcv2_irq_enable(struct irq_data *data)
+{
+ /* set default priority */
+ write_aux_reg(AUX_IRQ_SELECT, data->irq);
+ write_aux_reg(AUX_IRQ_PRIORITY, ARCV2_IRQ_DEF_PRIO);
+
+ /*
+ * hw auto enables (linux unmask) all by default
+ * So no need to do IRQ_ENABLE here
+ * XXX: However OSCI LAN need it
+ */
+ write_aux_reg(AUX_IRQ_ENABLE, 1);
+}
+
+static struct irq_chip arcv2_irq_chip = {
+ .name = "ARCv2 core Intc",
+ .irq_mask = arcv2_irq_mask,
+ .irq_unmask = arcv2_irq_unmask,
+ .irq_enable = arcv2_irq_enable
+};
+
+static int arcv2_irq_map(struct irq_domain *d, unsigned int irq,
+ irq_hw_number_t hw)
+{
+ if (irq == TIMER0_IRQ || irq == IPI_IRQ)
+ irq_set_chip_and_handler(irq, &arcv2_irq_chip, handle_percpu_irq);
+ else
+ irq_set_chip_and_handler(irq, &arcv2_irq_chip, handle_level_irq);
+
+ return 0;
+}
+
+static const struct irq_domain_ops arcv2_irq_ops = {
+ .xlate = irq_domain_xlate_onecell,
+ .map = arcv2_irq_map,
+};
+
+static struct irq_domain *root_domain;
+
+static int __init
+init_onchip_IRQ(struct device_node *intc, struct device_node *parent)
+{
+ if (parent)
+ panic("DeviceTree incore intc not a root irq controller\n");
+
+ root_domain = irq_domain_add_legacy(intc, NR_CPU_IRQS, 0, 0,
+ &arcv2_irq_ops, NULL);
+
+ if (!root_domain)
+ panic("root irq domain not avail\n");
+
+ /* with this we don't need to export root_domain */
+ irq_set_default_host(root_domain);
+
+ return 0;
+}
+
+IRQCHIP_DECLARE(arc_intc, "snps,archs-intc", init_onchip_IRQ);
diff --git a/arch/arc/kernel/intc-compact.c b/arch/arc/kernel/intc-compact.c
new file mode 100644
index 000000000000..fcdddb631766
--- /dev/null
+++ b/arch/arc/kernel/intc-compact.c
@@ -0,0 +1,226 @@
+/*
+ * Copyright (C) 2011-12 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/irqdomain.h>
+#include <linux/irqchip.h>
+#include "../../drivers/irqchip/irqchip.h"
+#include <asm/irq.h>
+
+/*
+ * Early Hardware specific Interrupt setup
+ * -Platform independent, needed for each CPU (not foldable into init_IRQ)
+ * -Called very early (start_kernel -> setup_arch -> setup_processor)
+ *
+ * what it does ?
+ * -Optionally, setup the High priority Interrupts as Level 2 IRQs
+ */
+void arc_init_IRQ(void)
+{
+ int level_mask = 0;
+
+ /* setup any high priority Interrupts (Level2 in ARCompact jargon) */
+ level_mask |= IS_ENABLED(CONFIG_ARC_IRQ3_LV2) << 3;
+ level_mask |= IS_ENABLED(CONFIG_ARC_IRQ5_LV2) << 5;
+ level_mask |= IS_ENABLED(CONFIG_ARC_IRQ6_LV2) << 6;
+
+ /*
+ * Write to register, even if no LV2 IRQs configured to reset it
+ * in case bootloader had mucked with it
+ */
+ write_aux_reg(AUX_IRQ_LEV, level_mask);
+
+ if (level_mask)
+ pr_info("Level-2 interrupts bitset %x\n", level_mask);
+}
+
+/*
+ * ARC700 core includes a simple on-chip intc supporting
+ * -per IRQ enable/disable
+ * -2 levels of interrupts (high/low)
+ * -all interrupts being level triggered
+ *
+ * To reduce platform code, we assume all IRQs directly hooked-up into intc.
+ * Platforms with external intc, hence cascaded IRQs, are free to over-ride
+ * below, per IRQ.
+ */
+
+static void arc_irq_mask(struct irq_data *data)
+{
+ unsigned int ienb;
+
+ ienb = read_aux_reg(AUX_IENABLE);
+ ienb &= ~(1 << data->irq);
+ write_aux_reg(AUX_IENABLE, ienb);
+}
+
+static void arc_irq_unmask(struct irq_data *data)
+{
+ unsigned int ienb;
+
+ ienb = read_aux_reg(AUX_IENABLE);
+ ienb |= (1 << data->irq);
+ write_aux_reg(AUX_IENABLE, ienb);
+}
+
+static struct irq_chip onchip_intc = {
+ .name = "ARC In-core Intc",
+ .irq_mask = arc_irq_mask,
+ .irq_unmask = arc_irq_unmask,
+};
+
+static int arc_intc_domain_map(struct irq_domain *d, unsigned int irq,
+ irq_hw_number_t hw)
+{
+ /*
+ * XXX: the IPI IRQ needs to be handled like TIMER too. However ARC core
+ * code doesn't own it (like TIMER0). ISS IDU / ezchip define it
+ * in platform header which can't be included here as it goes
+ * against multi-platform image philisophy
+ */
+ if (irq == TIMER0_IRQ)
+ irq_set_chip_and_handler(irq, &onchip_intc, handle_percpu_irq);
+ else
+ irq_set_chip_and_handler(irq, &onchip_intc, handle_level_irq);
+
+ return 0;
+}
+
+static const struct irq_domain_ops arc_intc_domain_ops = {
+ .xlate = irq_domain_xlate_onecell,
+ .map = arc_intc_domain_map,
+};
+
+static struct irq_domain *root_domain;
+
+static int __init
+init_onchip_IRQ(struct device_node *intc, struct device_node *parent)
+{
+ if (parent)
+ panic("DeviceTree incore intc not a root irq controller\n");
+
+ root_domain = irq_domain_add_legacy(intc, NR_CPU_IRQS, 0, 0,
+ &arc_intc_domain_ops, NULL);
+
+ if (!root_domain)
+ panic("root irq domain not avail\n");
+
+ /* with this we don't need to export root_domain */
+ irq_set_default_host(root_domain);
+
+ return 0;
+}
+
+IRQCHIP_DECLARE(arc_intc, "snps,arc700-intc", init_onchip_IRQ);
+
+/*
+ * arch_local_irq_enable - Enable interrupts.
+ *
+ * 1. Explicitly called to re-enable interrupts
+ * 2. Implicitly called from spin_unlock_irq, write_unlock_irq etc
+ * which maybe in hard ISR itself
+ *
+ * Semantics of this function change depending on where it is called from:
+ *
+ * -If called from hard-ISR, it must not invert interrupt priorities
+ * e.g. suppose TIMER is high priority (Level 2) IRQ
+ * Time hard-ISR, timer_interrupt( ) calls spin_unlock_irq several times.
+ * Here local_irq_enable( ) shd not re-enable lower priority interrupts
+ * -If called from soft-ISR, it must re-enable all interrupts
+ * soft ISR are low prioity jobs which can be very slow, thus all IRQs
+ * must be enabled while they run.
+ * Now hardware context wise we may still be in L2 ISR (not done rtie)
+ * still we must re-enable both L1 and L2 IRQs
+ * Another twist is prev scenario with flow being
+ * L1 ISR ==> interrupted by L2 ISR ==> L2 soft ISR
+ * here we must not re-enable Ll as prev Ll Interrupt's h/w context will get
+ * over-written (this is deficiency in ARC700 Interrupt mechanism)
+ */
+
+#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS /* Complex version for 2 IRQ levels */
+
+void arch_local_irq_enable(void)
+{
+
+ unsigned long flags = arch_local_save_flags();
+
+ /* Allow both L1 and L2 at the onset */
+ flags |= (STATUS_E1_MASK | STATUS_E2_MASK);
+
+ /* Called from hard ISR (between irq_enter and irq_exit) */
+ if (in_irq()) {
+
+ /* If in L2 ISR, don't re-enable any further IRQs as this can
+ * cause IRQ priorities to get upside down. e.g. it could allow
+ * L1 be taken while in L2 hard ISR which is wrong not only in
+ * theory, it can also cause the dreaded L1-L2-L1 scenario
+ */
+ if (flags & STATUS_A2_MASK)
+ flags &= ~(STATUS_E1_MASK | STATUS_E2_MASK);
+
+ /* Even if in L1 ISR, allowe Higher prio L2 IRQs */
+ else if (flags & STATUS_A1_MASK)
+ flags &= ~(STATUS_E1_MASK);
+ }
+
+ /* called from soft IRQ, ideally we want to re-enable all levels */
+
+ else if (in_softirq()) {
+
+ /* However if this is case of L1 interrupted by L2,
+ * re-enabling both may cause whaco L1-L2-L1 scenario
+ * because ARC700 allows level 1 to interrupt an active L2 ISR
+ * Thus we disable both
+ * However some code, executing in soft ISR wants some IRQs
+ * to be enabled so we re-enable L2 only
+ *
+ * How do we determine L1 intr by L2
+ * -A2 is set (means in L2 ISR)
+ * -E1 is set in this ISR's pt_regs->status32 which is
+ * saved copy of status32_l2 when l2 ISR happened
+ */
+ struct pt_regs *pt = get_irq_regs();
+
+ if ((flags & STATUS_A2_MASK) && pt &&
+ (pt->status32 & STATUS_A1_MASK)) {
+ /*flags &= ~(STATUS_E1_MASK | STATUS_E2_MASK); */
+ flags &= ~(STATUS_E1_MASK);
+ }
+ }
+
+ arch_local_irq_restore(flags);
+}
+
+#else /* ! CONFIG_ARC_COMPACT_IRQ_LEVELS */
+
+/*
+ * Simpler version for only 1 level of interrupt
+ * Here we only Worry about Level 1 Bits
+ */
+void arch_local_irq_enable(void)
+{
+ unsigned long flags;
+
+ /*
+ * ARC IDE Drivers tries to re-enable interrupts from hard-isr
+ * context which is simply wrong
+ */
+ if (in_irq()) {
+ WARN_ONCE(1, "IRQ enabled from hard-isr");
+ return;
+ }
+
+ flags = arch_local_save_flags();
+ flags |= (STATUS_E1_MASK | STATUS_E2_MASK);
+ arch_local_irq_restore(flags);
+}
+#endif
+EXPORT_SYMBOL(arch_local_irq_enable);
diff --git a/arch/arc/kernel/irq.c b/arch/arc/kernel/irq.c
index 620ec2fe32a9..2989a7bcf8a8 100644
--- a/arch/arc/kernel/irq.c
+++ b/arch/arc/kernel/irq.c
@@ -8,116 +8,10 @@
*/
#include <linux/interrupt.h>
-#include <linux/module.h>
-#include <linux/of.h>
-#include <linux/irqdomain.h>
#include <linux/irqchip.h>
-#include "../../drivers/irqchip/irqchip.h"
-#include <asm/sections.h>
-#include <asm/irq.h>
#include <asm/mach_desc.h>
/*
- * Early Hardware specific Interrupt setup
- * -Platform independent, needed for each CPU (not foldable into init_IRQ)
- * -Called very early (start_kernel -> setup_arch -> setup_processor)
- *
- * what it does ?
- * -Optionally, setup the High priority Interrupts as Level 2 IRQs
- */
-void arc_init_IRQ(void)
-{
- int level_mask = 0;
-
- /* setup any high priority Interrupts (Level2 in ARCompact jargon) */
- level_mask |= IS_ENABLED(CONFIG_ARC_IRQ3_LV2) << 3;
- level_mask |= IS_ENABLED(CONFIG_ARC_IRQ5_LV2) << 5;
- level_mask |= IS_ENABLED(CONFIG_ARC_IRQ6_LV2) << 6;
-
- /*
- * Write to register, even if no LV2 IRQs configured to reset it
- * in case bootloader had mucked with it
- */
- write_aux_reg(AUX_IRQ_LEV, level_mask);
-
- if (level_mask)
- pr_info("Level-2 interrupts bitset %x\n", level_mask);
-}
-
-/*
- * ARC700 core includes a simple on-chip intc supporting
- * -per IRQ enable/disable
- * -2 levels of interrupts (high/low)
- * -all interrupts being level triggered
- *
- * To reduce platform code, we assume all IRQs directly hooked-up into intc.
- * Platforms with external intc, hence cascaded IRQs, are free to over-ride
- * below, per IRQ.
- */
-
-static void arc_irq_mask(struct irq_data *data)
-{
- unsigned int ienb;
-
- ienb = read_aux_reg(AUX_IENABLE);
- ienb &= ~(1 << data->irq);
- write_aux_reg(AUX_IENABLE, ienb);
-}
-
-static void arc_irq_unmask(struct irq_data *data)
-{
- unsigned int ienb;
-
- ienb = read_aux_reg(AUX_IENABLE);
- ienb |= (1 << data->irq);
- write_aux_reg(AUX_IENABLE, ienb);
-}
-
-static struct irq_chip onchip_intc = {
- .name = "ARC In-core Intc",
- .irq_mask = arc_irq_mask,
- .irq_unmask = arc_irq_unmask,
-};
-
-static int arc_intc_domain_map(struct irq_domain *d, unsigned int irq,
- irq_hw_number_t hw)
-{
- if (irq == TIMER0_IRQ)
- irq_set_chip_and_handler(irq, &onchip_intc, handle_percpu_irq);
- else
- irq_set_chip_and_handler(irq, &onchip_intc, handle_level_irq);
-
- return 0;
-}
-
-static const struct irq_domain_ops arc_intc_domain_ops = {
- .xlate = irq_domain_xlate_onecell,
- .map = arc_intc_domain_map,
-};
-
-static struct irq_domain *root_domain;
-
-static int __init
-init_onchip_IRQ(struct device_node *intc, struct device_node *parent)
-{
- if (parent)
- panic("DeviceTree incore intc not a root irq controller\n");
-
- root_domain = irq_domain_add_legacy(intc, NR_CPU_IRQS, 0, 0,
- &arc_intc_domain_ops, NULL);
-
- if (!root_domain)
- panic("root irq domain not avail\n");
-
- /* with this we don't need to export root_domain */
- irq_set_default_host(root_domain);
-
- return 0;
-}
-
-IRQCHIP_DECLARE(arc_intc, "snps,arc700-intc", init_onchip_IRQ);
-
-/*
* Late Interrupt system init called from start_kernel for Boot CPU only
*
* Since slab must already be initialized, platforms can start doing any
@@ -178,107 +72,3 @@ void arc_request_percpu_irq(int irq, int cpu,
enable_percpu_irq(irq, 0);
}
-
-/*
- * arch_local_irq_enable - Enable interrupts.
- *
- * 1. Explicitly called to re-enable interrupts
- * 2. Implicitly called from spin_unlock_irq, write_unlock_irq etc
- * which maybe in hard ISR itself
- *
- * Semantics of this function change depending on where it is called from:
- *
- * -If called from hard-ISR, it must not invert interrupt priorities
- * e.g. suppose TIMER is high priority (Level 2) IRQ
- * Time hard-ISR, timer_interrupt( ) calls spin_unlock_irq several times.
- * Here local_irq_enable( ) shd not re-enable lower priority interrupts
- * -If called from soft-ISR, it must re-enable all interrupts
- * soft ISR are low prioity jobs which can be very slow, thus all IRQs
- * must be enabled while they run.
- * Now hardware context wise we may still be in L2 ISR (not done rtie)
- * still we must re-enable both L1 and L2 IRQs
- * Another twist is prev scenario with flow being
- * L1 ISR ==> interrupted by L2 ISR ==> L2 soft ISR
- * here we must not re-enable Ll as prev Ll Interrupt's h/w context will get
- * over-written (this is deficiency in ARC700 Interrupt mechanism)
- */
-
-#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS /* Complex version for 2 IRQ levels */
-
-void arch_local_irq_enable(void)
-{
-
- unsigned long flags;
- flags = arch_local_save_flags();
-
- /* Allow both L1 and L2 at the onset */
- flags |= (STATUS_E1_MASK | STATUS_E2_MASK);
-
- /* Called from hard ISR (between irq_enter and irq_exit) */
- if (in_irq()) {
-
- /* If in L2 ISR, don't re-enable any further IRQs as this can
- * cause IRQ priorities to get upside down. e.g. it could allow
- * L1 be taken while in L2 hard ISR which is wrong not only in
- * theory, it can also cause the dreaded L1-L2-L1 scenario
- */
- if (flags & STATUS_A2_MASK)
- flags &= ~(STATUS_E1_MASK | STATUS_E2_MASK);
-
- /* Even if in L1 ISR, allowe Higher prio L2 IRQs */
- else if (flags & STATUS_A1_MASK)
- flags &= ~(STATUS_E1_MASK);
- }
-
- /* called from soft IRQ, ideally we want to re-enable all levels */
-
- else if (in_softirq()) {
-
- /* However if this is case of L1 interrupted by L2,
- * re-enabling both may cause whaco L1-L2-L1 scenario
- * because ARC700 allows level 1 to interrupt an active L2 ISR
- * Thus we disable both
- * However some code, executing in soft ISR wants some IRQs
- * to be enabled so we re-enable L2 only
- *
- * How do we determine L1 intr by L2
- * -A2 is set (means in L2 ISR)
- * -E1 is set in this ISR's pt_regs->status32 which is
- * saved copy of status32_l2 when l2 ISR happened
- */
- struct pt_regs *pt = get_irq_regs();
- if ((flags & STATUS_A2_MASK) && pt &&
- (pt->status32 & STATUS_A1_MASK)) {
- /*flags &= ~(STATUS_E1_MASK | STATUS_E2_MASK); */
- flags &= ~(STATUS_E1_MASK);
- }
- }
-
- arch_local_irq_restore(flags);
-}
-
-#else /* ! CONFIG_ARC_COMPACT_IRQ_LEVELS */
-
-/*
- * Simpler version for only 1 level of interrupt
- * Here we only Worry about Level 1 Bits
- */
-void arch_local_irq_enable(void)
-{
- unsigned long flags;
-
- /*
- * ARC IDE Drivers tries to re-enable interrupts from hard-isr
- * context which is simply wrong
- */
- if (in_irq()) {
- WARN_ONCE(1, "IRQ enabled from hard-isr");
- return;
- }
-
- flags = arch_local_save_flags();
- flags |= (STATUS_E1_MASK | STATUS_E2_MASK);
- arch_local_irq_restore(flags);
-}
-#endif
-EXPORT_SYMBOL(arch_local_irq_enable);
diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c
new file mode 100644
index 000000000000..30284e8de6ff
--- /dev/null
+++ b/arch/arc/kernel/mcip.c
@@ -0,0 +1,341 @@
+/*
+ * ARC ARConnect (MultiCore IP) support (formerly known as MCIP)
+ *
+ * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/smp.h>
+#include <linux/irq.h>
+#include <linux/spinlock.h>
+#include <asm/mcip.h>
+
+static char smp_cpuinfo_buf[128];
+static int idu_detected;
+
+static DEFINE_RAW_SPINLOCK(mcip_lock);
+
+/*
+ * Any SMP specific init any CPU does when it comes up.
+ * Here we setup the CPU to enable Inter-Processor-Interrupts
+ * Called for each CPU
+ * -Master : init_IRQ()
+ * -Other(s) : start_kernel_secondary()
+ */
+void mcip_init_smp(unsigned int cpu)
+{
+ smp_ipi_irq_setup(cpu, IPI_IRQ);
+}
+
+static void mcip_ipi_send(int cpu)
+{
+ unsigned long flags;
+ int ipi_was_pending;
+
+ /*
+ * NOTE: We must spin here if the other cpu hasn't yet
+ * serviced a previous message. This can burn lots
+ * of time, but we MUST follows this protocol or
+ * ipi messages can be lost!!!
+ * Also, we must release the lock in this loop because
+ * the other side may get to this same loop and not
+ * be able to ack -- thus causing deadlock.
+ */
+
+ do {
+ raw_spin_lock_irqsave(&mcip_lock, flags);
+ __mcip_cmd(CMD_INTRPT_READ_STATUS, cpu);
+ ipi_was_pending = read_aux_reg(ARC_REG_MCIP_READBACK);
+ if (ipi_was_pending == 0)
+ break; /* break out but keep lock */
+ raw_spin_unlock_irqrestore(&mcip_lock, flags);
+ } while (1);
+
+ __mcip_cmd(CMD_INTRPT_GENERATE_IRQ, cpu);
+ raw_spin_unlock_irqrestore(&mcip_lock, flags);
+
+#ifdef CONFIG_ARC_IPI_DBG
+ if (ipi_was_pending)
+ pr_info("IPI ACK delayed from cpu %d\n", cpu);
+#endif
+}
+
+static void mcip_ipi_clear(int irq)
+{
+ unsigned int cpu, c;
+ unsigned long flags;
+ unsigned int __maybe_unused copy;
+
+ raw_spin_lock_irqsave(&mcip_lock, flags);
+
+ /* Who sent the IPI */
+ __mcip_cmd(CMD_INTRPT_CHECK_SOURCE, 0);
+
+ copy = cpu = read_aux_reg(ARC_REG_MCIP_READBACK); /* 1,2,4,8... */
+
+ /*
+ * In rare case, multiple concurrent IPIs sent to same target can
+ * possibly be coalesced by MCIP into 1 asserted IRQ, so @cpus can be
+ * "vectored" (multiple bits sets) as opposed to typical single bit
+ */
+ do {
+ c = __ffs(cpu); /* 0,1,2,3 */
+ __mcip_cmd(CMD_INTRPT_GENERATE_ACK, c);
+ cpu &= ~(1U << c);
+ } while (cpu);
+
+ raw_spin_unlock_irqrestore(&mcip_lock, flags);
+
+#ifdef CONFIG_ARC_IPI_DBG
+ if (c != __ffs(copy))
+ pr_info("IPIs from %x coalesced to %x\n",
+ copy, raw_smp_processor_id());
+#endif
+}
+
+volatile int wake_flag;
+
+static void mcip_wakeup_cpu(int cpu, unsigned long pc)
+{
+ BUG_ON(cpu == 0);
+ wake_flag = cpu;
+}
+
+void arc_platform_smp_wait_to_boot(int cpu)
+{
+ while (wake_flag != cpu)
+ ;
+
+ wake_flag = 0;
+ __asm__ __volatile__("j @first_lines_of_secondary \n");
+}
+
+struct plat_smp_ops plat_smp_ops = {
+ .info = smp_cpuinfo_buf,
+ .cpu_kick = mcip_wakeup_cpu,
+ .ipi_send = mcip_ipi_send,
+ .ipi_clear = mcip_ipi_clear,
+};
+
+void mcip_init_early_smp(void)
+{
+#define IS_AVAIL1(var, str) ((var) ? str : "")
+
+ struct mcip_bcr {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+ unsigned int pad3:8,
+ idu:1, llm:1, num_cores:6,
+ iocoh:1, grtc:1, dbg:1, pad2:1,
+ msg:1, sem:1, ipi:1, pad:1,
+ ver:8;
+#else
+ unsigned int ver:8,
+ pad:1, ipi:1, sem:1, msg:1,
+ pad2:1, dbg:1, grtc:1, iocoh:1,
+ num_cores:6, llm:1, idu:1,
+ pad3:8;
+#endif
+ } mp;
+
+ READ_BCR(ARC_REG_MCIP_BCR, mp);
+
+ sprintf(smp_cpuinfo_buf,
+ "Extn [SMP]\t: ARConnect (v%d): %d cores with %s%s%s%s\n",
+ mp.ver, mp.num_cores,
+ IS_AVAIL1(mp.ipi, "IPI "),
+ IS_AVAIL1(mp.idu, "IDU "),
+ IS_AVAIL1(mp.dbg, "DEBUG "),
+ IS_AVAIL1(mp.grtc, "GRTC"));
+
+ idu_detected = mp.idu;
+
+ if (mp.dbg) {
+ __mcip_cmd_data(CMD_DEBUG_SET_SELECT, 0, 0xf);
+ __mcip_cmd_data(CMD_DEBUG_SET_MASK, 0xf, 0xf);
+ }
+
+ if (IS_ENABLED(CONFIG_ARC_HAS_GRTC) && !mp.grtc)
+ panic("kernel trying to use non-existent GRTC\n");
+}
+
+/***************************************************************************
+ * ARCv2 Interrupt Distribution Unit (IDU)
+ *
+ * Connects external "COMMON" IRQs to core intc, providing:
+ * -dynamic routing (IRQ affinity)
+ * -load balancing (Round Robin interrupt distribution)
+ * -1:N distribution
+ *
+ * It physically resides in the MCIP hw block
+ */
+
+#include <linux/irqchip.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include "../../drivers/irqchip/irqchip.h"
+
+/*
+ * Set the DEST for @cmn_irq to @cpu_mask (1 bit per core)
+ */
+static void idu_set_dest(unsigned int cmn_irq, unsigned int cpu_mask)
+{
+ __mcip_cmd_data(CMD_IDU_SET_DEST, cmn_irq, cpu_mask);
+}
+
+static void idu_set_mode(unsigned int cmn_irq, unsigned int lvl,
+ unsigned int distr)
+{
+ union {
+ unsigned int word;
+ struct {
+ unsigned int distr:2, pad:2, lvl:1, pad2:27;
+ };
+ } data;
+
+ data.distr = distr;
+ data.lvl = lvl;
+ __mcip_cmd_data(CMD_IDU_SET_MODE, cmn_irq, data.word);
+}
+
+static void idu_irq_mask(struct irq_data *data)
+{
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&mcip_lock, flags);
+ __mcip_cmd_data(CMD_IDU_SET_MASK, data->hwirq, 1);
+ raw_spin_unlock_irqrestore(&mcip_lock, flags);
+}
+
+static void idu_irq_unmask(struct irq_data *data)
+{
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&mcip_lock, flags);
+ __mcip_cmd_data(CMD_IDU_SET_MASK, data->hwirq, 0);
+ raw_spin_unlock_irqrestore(&mcip_lock, flags);
+}
+
+static int
+idu_irq_set_affinity(struct irq_data *d, const struct cpumask *cpumask, bool f)
+{
+ return IRQ_SET_MASK_OK;
+}
+
+static struct irq_chip idu_irq_chip = {
+ .name = "MCIP IDU Intc",
+ .irq_mask = idu_irq_mask,
+ .irq_unmask = idu_irq_unmask,
+#ifdef CONFIG_SMP
+ .irq_set_affinity = idu_irq_set_affinity,
+#endif
+
+};
+
+static int idu_first_irq;
+
+static void idu_cascade_isr(unsigned int core_irq, struct irq_desc *desc)
+{
+ struct irq_domain *domain = irq_desc_get_handler_data(desc);
+ unsigned int idu_irq;
+
+ idu_irq = core_irq - idu_first_irq;
+ generic_handle_irq(irq_find_mapping(domain, idu_irq));
+}
+
+static int idu_irq_map(struct irq_domain *d, unsigned int virq, irq_hw_number_t hwirq)
+{
+ irq_set_chip_and_handler(virq, &idu_irq_chip, handle_level_irq);
+ irq_set_status_flags(virq, IRQ_MOVE_PCNTXT);
+
+ return 0;
+}
+
+static int idu_irq_xlate(struct irq_domain *d, struct device_node *n,
+ const u32 *intspec, unsigned int intsize,
+ irq_hw_number_t *out_hwirq, unsigned int *out_type)
+{
+ irq_hw_number_t hwirq = *out_hwirq = intspec[0];
+ int distri = intspec[1];
+ unsigned long flags;
+
+ *out_type = IRQ_TYPE_NONE;
+
+ /* XXX: validate distribution scheme again online cpu mask */
+ if (distri == 0) {
+ /* 0 - Round Robin to all cpus, otherwise 1 bit per core */
+ raw_spin_lock_irqsave(&mcip_lock, flags);
+ idu_set_dest(hwirq, BIT(num_online_cpus()) - 1);
+ idu_set_mode(hwirq, IDU_M_TRIG_LEVEL, IDU_M_DISTRI_RR);
+ raw_spin_unlock_irqrestore(&mcip_lock, flags);
+ } else {
+ /*
+ * DEST based distribution for Level Triggered intr can only
+ * have 1 CPU, so generalize it to always contain 1 cpu
+ */
+ int cpu = ffs(distri);
+
+ if (cpu != fls(distri))
+ pr_warn("IDU irq %lx distri mode set to cpu %x\n",
+ hwirq, cpu);
+
+ raw_spin_lock_irqsave(&mcip_lock, flags);
+ idu_set_dest(hwirq, cpu);
+ idu_set_mode(hwirq, IDU_M_TRIG_LEVEL, IDU_M_DISTRI_DEST);
+ raw_spin_unlock_irqrestore(&mcip_lock, flags);
+ }
+
+ return 0;
+}
+
+static const struct irq_domain_ops idu_irq_ops = {
+ .xlate = idu_irq_xlate,
+ .map = idu_irq_map,
+};
+
+/*
+ * [16, 23]: Statically assigned always private-per-core (Timers, WDT, IPI)
+ * [24, 23+C]: If C > 0 then "C" common IRQs
+ * [24+C, N]: Not statically assigned, private-per-core
+ */
+
+
+static int __init
+idu_of_init(struct device_node *intc, struct device_node *parent)
+{
+ struct irq_domain *domain;
+ /* Read IDU BCR to confirm nr_irqs */
+ int nr_irqs = of_irq_count(intc);
+ int i, irq;
+
+ if (!idu_detected)
+ panic("IDU not detected, but DeviceTree using it");
+
+ pr_info("MCIP: IDU referenced from Devicetree %d irqs\n", nr_irqs);
+
+ domain = irq_domain_add_linear(intc, nr_irqs, &idu_irq_ops, NULL);
+
+ /* Parent interrupts (core-intc) are already mapped */
+
+ for (i = 0; i < nr_irqs; i++) {
+ /*
+ * Return parent uplink IRQs (towards core intc) 24,25,.....
+ * this step has been done before already
+ * however we need it to get the parent virq and set IDU handler
+ * as first level isr
+ */
+ irq = irq_of_parse_and_map(intc, i);
+ if (!i)
+ idu_first_irq = irq;
+
+ irq_set_handler_data(irq, domain);
+ irq_set_chained_handler(irq, idu_cascade_isr);
+ }
+
+ __mcip_cmd(CMD_IDU_ENABLE, 0);
+
+ return 0;
+}
+IRQCHIP_DECLARE(arcv2_idu_intc, "snps,archs-idu-intc", idu_of_init);
diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c
index fd2ec50102f2..1287388c258a 100644
--- a/arch/arc/kernel/perf_event.c
+++ b/arch/arc/kernel/perf_event.c
@@ -266,10 +266,9 @@ static int arc_pmu_add(struct perf_event *event, int flags)
static int arc_pmu_device_probe(struct platform_device *pdev)
{
- struct arc_pmu *arc_pmu;
struct arc_reg_pct_build pct_bcr;
struct arc_reg_cc_build cc_bcr;
- int i, j, ret;
+ int i, j;
union cc_name {
struct {
@@ -336,9 +335,7 @@ static int arc_pmu_device_probe(struct platform_device *pdev)
/* ARC 700 PMU does not support sampling events */
arc_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
- ret = perf_pmu_register(&arc_pmu->pmu, pdev->name, PERF_TYPE_RAW);
-
- return ret;
+ return perf_pmu_register(&arc_pmu->pmu, pdev->name, PERF_TYPE_RAW);
}
#ifdef CONFIG_OF
diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c
index e095c557afdd..44092456776f 100644
--- a/arch/arc/kernel/process.c
+++ b/arch/arc/kernel/process.c
@@ -44,7 +44,11 @@ SYSCALL_DEFINE0(arc_gettls)
void arch_cpu_idle(void)
{
/* sleep, but enable all interrupts before committing */
- __asm__("sleep 0x3");
+ if (is_isa_arcompact()) {
+ __asm__("sleep 0x3");
+ } else {
+ __asm__("sleep 0x10");
+ }
}
asmlinkage void ret_from_fork(void);
@@ -166,8 +170,7 @@ void start_thread(struct pt_regs * regs, unsigned long pc, unsigned long usp)
* [L] ZOL loop inhibited to begin with - cleared by a LP insn
* Interrupts enabled
*/
- regs->status32 = STATUS_U_MASK | STATUS_L_MASK |
- STATUS_E1_MASK | STATUS_E2_MASK;
+ regs->status32 = STATUS_U_MASK | STATUS_L_MASK | ISA_INIT_STATUS_BITS;
/* bogus seed values for debugging */
regs->lp_start = 0x10;
@@ -197,8 +200,11 @@ int elf_check_arch(const struct elf32_hdr *x)
{
unsigned int eflags;
- if (x->e_machine != EM_ARCOMPACT)
+ if (x->e_machine != EM_ARC_INUSE) {
+ pr_err("ELF not built for %s ISA\n",
+ is_isa_arcompact() ? "ARCompact":"ARCv2");
return 0;
+ }
eflags = x->e_flags;
if ((eflags & EF_ARC_OSABI_MSK) < EF_ARC_OSABI_CURRENT) {
diff --git a/arch/arc/kernel/ptrace.c b/arch/arc/kernel/ptrace.c
index 13b3ffb27a38..4442204fe238 100644
--- a/arch/arc/kernel/ptrace.c
+++ b/arch/arc/kernel/ptrace.c
@@ -47,10 +47,47 @@ static int genregs_get(struct task_struct *target,
offsetof(struct user_regs_struct, LOC) + 4);
REG_O_ZERO(pad);
- REG_O_CHUNK(scratch, callee, ptregs);
+ REG_O_ONE(scratch.bta, &ptregs->bta);
+ REG_O_ONE(scratch.lp_start, &ptregs->lp_start);
+ REG_O_ONE(scratch.lp_end, &ptregs->lp_end);
+ REG_O_ONE(scratch.lp_count, &ptregs->lp_count);
+ REG_O_ONE(scratch.status32, &ptregs->status32);
+ REG_O_ONE(scratch.ret, &ptregs->ret);
+ REG_O_ONE(scratch.blink, &ptregs->blink);
+ REG_O_ONE(scratch.fp, &ptregs->fp);
+ REG_O_ONE(scratch.gp, &ptregs->r26);
+ REG_O_ONE(scratch.r12, &ptregs->r12);
+ REG_O_ONE(scratch.r11, &ptregs->r11);
+ REG_O_ONE(scratch.r10, &ptregs->r10);
+ REG_O_ONE(scratch.r9, &ptregs->r9);
+ REG_O_ONE(scratch.r8, &ptregs->r8);
+ REG_O_ONE(scratch.r7, &ptregs->r7);
+ REG_O_ONE(scratch.r6, &ptregs->r6);
+ REG_O_ONE(scratch.r5, &ptregs->r5);
+ REG_O_ONE(scratch.r4, &ptregs->r4);
+ REG_O_ONE(scratch.r3, &ptregs->r3);
+ REG_O_ONE(scratch.r2, &ptregs->r2);
+ REG_O_ONE(scratch.r1, &ptregs->r1);
+ REG_O_ONE(scratch.r0, &ptregs->r0);
+ REG_O_ONE(scratch.sp, &ptregs->sp);
+
REG_O_ZERO(pad2);
- REG_O_CHUNK(callee, efa, cregs);
- REG_O_CHUNK(efa, stop_pc, &target->thread.fault_address);
+
+ REG_O_ONE(callee.r25, &cregs->r25);
+ REG_O_ONE(callee.r24, &cregs->r24);
+ REG_O_ONE(callee.r23, &cregs->r23);
+ REG_O_ONE(callee.r22, &cregs->r22);
+ REG_O_ONE(callee.r21, &cregs->r21);
+ REG_O_ONE(callee.r20, &cregs->r20);
+ REG_O_ONE(callee.r19, &cregs->r19);
+ REG_O_ONE(callee.r18, &cregs->r18);
+ REG_O_ONE(callee.r17, &cregs->r17);
+ REG_O_ONE(callee.r16, &cregs->r16);
+ REG_O_ONE(callee.r15, &cregs->r15);
+ REG_O_ONE(callee.r14, &cregs->r14);
+ REG_O_ONE(callee.r13, &cregs->r13);
+
+ REG_O_ONE(efa, &target->thread.fault_address);
if (!ret) {
if (in_brkpt_trap(ptregs)) {
@@ -97,12 +134,51 @@ static int genregs_set(struct task_struct *target,
offsetof(struct user_regs_struct, LOC) + 4);
REG_IGNORE_ONE(pad);
- /* TBD: disallow updates to STATUS32 etc*/
- REG_IN_CHUNK(scratch, pad2, ptregs); /* pt_regs[bta..sp] */
+
+ REG_IN_ONE(scratch.bta, &ptregs->bta);
+ REG_IN_ONE(scratch.lp_start, &ptregs->lp_start);
+ REG_IN_ONE(scratch.lp_end, &ptregs->lp_end);
+ REG_IN_ONE(scratch.lp_count, &ptregs->lp_count);
+
+ REG_IGNORE_ONE(scratch.status32);
+
+ REG_IN_ONE(scratch.ret, &ptregs->ret);
+ REG_IN_ONE(scratch.blink, &ptregs->blink);
+ REG_IN_ONE(scratch.fp, &ptregs->fp);
+ REG_IN_ONE(scratch.gp, &ptregs->r26);
+ REG_IN_ONE(scratch.r12, &ptregs->r12);
+ REG_IN_ONE(scratch.r11, &ptregs->r11);
+ REG_IN_ONE(scratch.r10, &ptregs->r10);
+ REG_IN_ONE(scratch.r9, &ptregs->r9);
+ REG_IN_ONE(scratch.r8, &ptregs->r8);
+ REG_IN_ONE(scratch.r7, &ptregs->r7);
+ REG_IN_ONE(scratch.r6, &ptregs->r6);
+ REG_IN_ONE(scratch.r5, &ptregs->r5);
+ REG_IN_ONE(scratch.r4, &ptregs->r4);
+ REG_IN_ONE(scratch.r3, &ptregs->r3);
+ REG_IN_ONE(scratch.r2, &ptregs->r2);
+ REG_IN_ONE(scratch.r1, &ptregs->r1);
+ REG_IN_ONE(scratch.r0, &ptregs->r0);
+ REG_IN_ONE(scratch.sp, &ptregs->sp);
+
REG_IGNORE_ONE(pad2);
- REG_IN_CHUNK(callee, efa, cregs); /* callee_regs[r25..r13] */
+
+ REG_IN_ONE(callee.r25, &cregs->r25);
+ REG_IN_ONE(callee.r24, &cregs->r24);
+ REG_IN_ONE(callee.r23, &cregs->r23);
+ REG_IN_ONE(callee.r22, &cregs->r22);
+ REG_IN_ONE(callee.r21, &cregs->r21);
+ REG_IN_ONE(callee.r20, &cregs->r20);
+ REG_IN_ONE(callee.r19, &cregs->r19);
+ REG_IN_ONE(callee.r18, &cregs->r18);
+ REG_IN_ONE(callee.r17, &cregs->r17);
+ REG_IN_ONE(callee.r16, &cregs->r16);
+ REG_IN_ONE(callee.r15, &cregs->r15);
+ REG_IN_ONE(callee.r14, &cregs->r14);
+ REG_IN_ONE(callee.r13, &cregs->r13);
+
REG_IGNORE_ONE(efa); /* efa update invalid */
- REG_IGNORE_ONE(stop_pc); /* PC updated via @ret */
+ REG_IGNORE_ONE(stop_pc); /* PC updated via @ret */
return ret;
}
@@ -124,7 +200,7 @@ static const struct user_regset arc_regsets[] = {
static const struct user_regset_view user_arc_view = {
.name = UTS_MACHINE,
- .e_machine = EM_ARCOMPACT,
+ .e_machine = EM_ARC_INUSE,
.regsets = arc_regsets,
.n = ARRAY_SIZE(arc_regsets)
};
diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c
index 1d167c6df8ca..a3d186211ed3 100644
--- a/arch/arc/kernel/setup.c
+++ b/arch/arc/kernel/setup.c
@@ -30,6 +30,8 @@
#define FIX_PTR(x) __asm__ __volatile__(";" : "+r"(x))
+unsigned int intr_to_DE_cnt;
+
/* Part of U-boot ABI: see head.S */
int __initdata uboot_tag;
char __initdata *uboot_arg;
@@ -54,7 +56,7 @@ static void read_arc_build_cfg_regs(void)
cpu->vec_base = read_aux_reg(AUX_INTR_VEC_BASE);
READ_BCR(ARC_REG_D_UNCACH_BCR, uncached_space);
- cpu->uncached_base = uncached_space.start << 24;
+ BUG_ON((uncached_space.start << 24) != ARC_UNCACHED_ADDR_SPACE);
READ_BCR(ARC_REG_MUL_BCR, cpu->extn_mpy);
@@ -96,7 +98,7 @@ static void read_arc_build_cfg_regs(void)
read_decode_mmu_bcr();
read_decode_cache_bcr();
- {
+ if (is_isa_arcompact()) {
struct bcr_fp_arcompact sp, dp;
struct bcr_bpu_arcompact bpu;
@@ -112,6 +114,19 @@ static void read_arc_build_cfg_regs(void)
cpu->bpu.num_cache = 256 << (bpu.ent - 1);
cpu->bpu.num_pred = 256 << (bpu.ent - 1);
}
+ } else {
+ struct bcr_fp_arcv2 spdp;
+ struct bcr_bpu_arcv2 bpu;
+
+ READ_BCR(ARC_REG_FP_V2_BCR, spdp);
+ cpu->extn.fpu_sp = spdp.sp ? 1 : 0;
+ cpu->extn.fpu_dp = spdp.dp ? 1 : 0;
+
+ READ_BCR(ARC_REG_BPU_BCR, bpu);
+ cpu->bpu.ver = bpu.ver;
+ cpu->bpu.full = bpu.ft;
+ cpu->bpu.num_cache = 256 << bpu.bce;
+ cpu->bpu.num_pred = 2048 << bpu.pte;
}
READ_BCR(ARC_REG_AP_BCR, bcr);
@@ -131,6 +146,7 @@ static const struct cpuinfo_data arc_cpu_tbl[] = {
{ {0x30, "ARC 700" }, 0x33},
{ {0x34, "ARC 700 R4.10"}, 0x34},
{ {0x35, "ARC 700 R4.11"}, 0x35},
+ { {0x50, "ARC HS38" }, 0x51},
{ {0x00, NULL } }
};
@@ -149,13 +165,17 @@ static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len)
FIX_PTR(cpu);
- {
+ if (is_isa_arcompact()) {
isa_nm = "ARCompact";
be = IS_ENABLED(CONFIG_CPU_BIG_ENDIAN);
atomic = cpu->isa.atomic1;
if (!cpu->isa.ver) /* ISA BCR absent, use Kconfig info */
atomic = IS_ENABLED(CONFIG_ARC_HAS_LLSC);
+ } else {
+ isa_nm = "ARCv2";
+ be = cpu->isa.be;
+ atomic = cpu->isa.atomic;
}
n += scnprintf(buf + n, len - n,
@@ -183,16 +203,34 @@ static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len)
n += scnprintf(buf + n, len - n, "Timers\t\t: %s%s%s%s\nISA Extn\t: ",
IS_AVAIL1(cpu->timers.t0, "Timer0 "),
IS_AVAIL1(cpu->timers.t1, "Timer1 "),
- IS_AVAIL2(cpu->timers.rtsc, "64-bit RTSC ", CONFIG_ARC_HAS_RTSC));
+ IS_AVAIL2(cpu->timers.rtc, "64-bit RTC ",
+ CONFIG_ARC_HAS_RTC));
- n += i = scnprintf(buf + n, len - n, "%s%s",
- IS_AVAIL2(atomic, "atomic ", CONFIG_ARC_HAS_LLSC));
+ n += i = scnprintf(buf + n, len - n, "%s%s%s%s%s",
+ IS_AVAIL2(atomic, "atomic ", CONFIG_ARC_HAS_LLSC),
+ IS_AVAIL2(cpu->isa.ldd, "ll64 ", CONFIG_ARC_HAS_LL64),
+ IS_AVAIL1(cpu->isa.unalign, "unalign (not used)"));
if (i)
n += scnprintf(buf + n, len - n, "\n\t\t: ");
+ if (cpu->extn_mpy.ver) {
+ if (cpu->extn_mpy.ver <= 0x2) { /* ARCompact */
+ n += scnprintf(buf + n, len - n, "mpy ");
+ } else {
+ int opt = 2; /* stock MPY/MPYH */
+
+ if (cpu->extn_mpy.dsp) /* OPT 7-9 */
+ opt = cpu->extn_mpy.dsp + 6;
+
+ n += scnprintf(buf + n, len - n, "mpy[opt %d] ", opt);
+ }
+ n += scnprintf(buf + n, len - n, "%s",
+ IS_USED(CONFIG_ARC_HAS_HW_MPY));
+ }
+
n += scnprintf(buf + n, len - n, "%s%s%s%s%s%s%s%s\n",
- IS_AVAIL1(cpu->extn_mpy.ver, "mpy "),
+ IS_AVAIL1(cpu->isa.div_rem, "div_rem "),
IS_AVAIL1(cpu->extn.norm, "norm "),
IS_AVAIL1(cpu->extn.barrel, "barrel-shift "),
IS_AVAIL1(cpu->extn.swap, "swap "),
@@ -219,7 +257,7 @@ static char *arc_extn_mumbojumbo(int cpu_id, char *buf, int len)
n += scnprintf(buf + n, len - n,
"Vector Table\t: %#x\nUncached Base\t: %#x\n",
- cpu->vec_base, cpu->uncached_base);
+ cpu->vec_base, ARC_UNCACHED_ADDR_SPACE);
if (cpu->extn.fpu_sp || cpu->extn.fpu_dp)
n += scnprintf(buf + n, len - n, "FPU\t\t: %s%s\n",
@@ -254,8 +292,8 @@ static void arc_chk_core_config(void)
if (!cpu->timers.t1)
panic("Timer1 is not present!\n");
- if (IS_ENABLED(CONFIG_ARC_HAS_RTSC) && !cpu->timers.rtsc)
- panic("RTSC is not present\n");
+ if (IS_ENABLED(CONFIG_ARC_HAS_RTC) && !cpu->timers.rtc)
+ panic("RTC is not present\n");
#ifdef CONFIG_ARC_HAS_DCCM
/*
@@ -323,13 +361,16 @@ static inline int is_kernel(unsigned long addr)
void __init setup_arch(char **cmdline_p)
{
+#ifdef CONFIG_ARC_UBOOT_SUPPORT
/* make sure that uboot passed pointer to cmdline/dtb is valid */
if (uboot_tag && is_kernel((unsigned long)uboot_arg))
panic("Invalid uboot arg\n");
/* See if u-boot passed an external Device Tree blob */
machine_desc = setup_machine_fdt(uboot_arg); /* uboot_tag == 2 */
- if (!machine_desc) {
+ if (!machine_desc)
+#endif
+ {
/* No, so try the embedded one */
machine_desc = setup_machine_fdt(__dtb_start);
if (!machine_desc)
diff --git a/arch/arc/kernel/signal.c b/arch/arc/kernel/signal.c
index 2251fb4bbfd7..004b7f0bc76c 100644
--- a/arch/arc/kernel/signal.c
+++ b/arch/arc/kernel/signal.c
@@ -67,7 +67,33 @@ stash_usr_regs(struct rt_sigframe __user *sf, struct pt_regs *regs,
sigset_t *set)
{
int err;
- err = __copy_to_user(&(sf->uc.uc_mcontext.regs.scratch), regs,
+ struct user_regs_struct uregs;
+
+ uregs.scratch.bta = regs->bta;
+ uregs.scratch.lp_start = regs->lp_start;
+ uregs.scratch.lp_end = regs->lp_end;
+ uregs.scratch.lp_count = regs->lp_count;
+ uregs.scratch.status32 = regs->status32;
+ uregs.scratch.ret = regs->ret;
+ uregs.scratch.blink = regs->blink;
+ uregs.scratch.fp = regs->fp;
+ uregs.scratch.gp = regs->r26;
+ uregs.scratch.r12 = regs->r12;
+ uregs.scratch.r11 = regs->r11;
+ uregs.scratch.r10 = regs->r10;
+ uregs.scratch.r9 = regs->r9;
+ uregs.scratch.r8 = regs->r8;
+ uregs.scratch.r7 = regs->r7;
+ uregs.scratch.r6 = regs->r6;
+ uregs.scratch.r5 = regs->r5;
+ uregs.scratch.r4 = regs->r4;
+ uregs.scratch.r3 = regs->r3;
+ uregs.scratch.r2 = regs->r2;
+ uregs.scratch.r1 = regs->r1;
+ uregs.scratch.r0 = regs->r0;
+ uregs.scratch.sp = regs->sp;
+
+ err = __copy_to_user(&(sf->uc.uc_mcontext.regs.scratch), &uregs.scratch,
sizeof(sf->uc.uc_mcontext.regs.scratch));
err |= __copy_to_user(&sf->uc.uc_sigmask, set, sizeof(sigset_t));
@@ -78,14 +104,40 @@ static int restore_usr_regs(struct pt_regs *regs, struct rt_sigframe __user *sf)
{
sigset_t set;
int err;
+ struct user_regs_struct uregs;
err = __copy_from_user(&set, &sf->uc.uc_sigmask, sizeof(set));
if (!err)
set_current_blocked(&set);
- err |= __copy_from_user(regs, &(sf->uc.uc_mcontext.regs.scratch),
+ err |= __copy_from_user(&uregs.scratch,
+ &(sf->uc.uc_mcontext.regs.scratch),
sizeof(sf->uc.uc_mcontext.regs.scratch));
+ regs->bta = uregs.scratch.bta;
+ regs->lp_start = uregs.scratch.lp_start;
+ regs->lp_end = uregs.scratch.lp_end;
+ regs->lp_count = uregs.scratch.lp_count;
+ regs->status32 = uregs.scratch.status32;
+ regs->ret = uregs.scratch.ret;
+ regs->blink = uregs.scratch.blink;
+ regs->fp = uregs.scratch.fp;
+ regs->r26 = uregs.scratch.gp;
+ regs->r12 = uregs.scratch.r12;
+ regs->r11 = uregs.scratch.r11;
+ regs->r10 = uregs.scratch.r10;
+ regs->r9 = uregs.scratch.r9;
+ regs->r8 = uregs.scratch.r8;
+ regs->r7 = uregs.scratch.r7;
+ regs->r6 = uregs.scratch.r6;
+ regs->r5 = uregs.scratch.r5;
+ regs->r4 = uregs.scratch.r4;
+ regs->r3 = uregs.scratch.r3;
+ regs->r2 = uregs.scratch.r2;
+ regs->r1 = uregs.scratch.r1;
+ regs->r0 = uregs.scratch.r0;
+ regs->sp = uregs.scratch.sp;
+
return err;
}
@@ -284,7 +336,7 @@ static void arc_restart_syscall(struct k_sigaction *ka, struct pt_regs *regs)
* their orig user space value when we ret from kernel
*/
regs->r0 = regs->orig_r0;
- regs->ret -= 4;
+ regs->ret -= is_isa_arcv2() ? 2 : 4;
break;
}
}
@@ -325,10 +377,10 @@ void do_signal(struct pt_regs *regs)
if (regs->r0 == -ERESTARTNOHAND ||
regs->r0 == -ERESTARTSYS || regs->r0 == -ERESTARTNOINTR) {
regs->r0 = regs->orig_r0;
- regs->ret -= 4;
+ regs->ret -= is_isa_arcv2() ? 2 : 4;
} else if (regs->r0 == -ERESTART_RESTARTBLOCK) {
regs->r8 = __NR_restart_syscall;
- regs->ret -= 4;
+ regs->ret -= is_isa_arcv2() ? 2 : 4;
}
syscall_wont_restart(regs); /* No more restarts */
}
diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c
index 6a400b1b0b62..be13d12420ba 100644
--- a/arch/arc/kernel/smp.c
+++ b/arch/arc/kernel/smp.c
@@ -31,7 +31,7 @@ arch_spinlock_t smp_atomic_ops_lock = __ARCH_SPIN_LOCK_UNLOCKED;
arch_spinlock_t smp_bitops_lock = __ARCH_SPIN_LOCK_UNLOCKED;
#endif
-struct plat_smp_ops plat_smp_ops;
+struct plat_smp_ops __weak plat_smp_ops;
/* XXX: per cpu ? Only needed once in early seconday boot */
struct task_struct *secondary_idle_tsk;
@@ -182,7 +182,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
/*
* not supported here
*/
-int __init setup_profiling_timer(unsigned int multiplier)
+int setup_profiling_timer(unsigned int multiplier)
{
return -EINVAL;
}
@@ -278,8 +278,10 @@ static void ipi_cpu_stop(void)
machine_halt();
}
-static inline void __do_IPI(unsigned long msg)
+static inline int __do_IPI(unsigned long msg)
{
+ int rc = 0;
+
switch (msg) {
case IPI_RESCHEDULE:
scheduler_ipi();
@@ -294,8 +296,10 @@ static inline void __do_IPI(unsigned long msg)
break;
default:
- pr_warn("IPI with unexpected msg %ld\n", msg);
+ rc = 1;
}
+
+ return rc;
}
/*
@@ -305,6 +309,7 @@ static inline void __do_IPI(unsigned long msg)
irqreturn_t do_IPI(int irq, void *dev_id)
{
unsigned long pending;
+ unsigned long __maybe_unused copy;
pr_debug("IPI [%ld] received on cpu %d\n",
*this_cpu_ptr(&ipi_data), smp_processor_id());
@@ -316,11 +321,18 @@ irqreturn_t do_IPI(int irq, void *dev_id)
* "dequeue" the msg corresponding to this IPI (and possibly other
* piggybacked msg from elided IPIs: see ipi_send_msg_one() above)
*/
- pending = xchg(this_cpu_ptr(&ipi_data), 0);
+ copy = pending = xchg(this_cpu_ptr(&ipi_data), 0);
do {
unsigned long msg = __ffs(pending);
- __do_IPI(msg);
+ int rc;
+
+ rc = __do_IPI(msg);
+#ifdef CONFIG_ARC_IPI_DBG
+ /* IPI received but no valid @msg */
+ if (rc)
+ pr_info("IPI with bogus msg %ld in %ld\n", msg, copy);
+#endif
pending &= ~(1U << msg);
} while (pending);
diff --git a/arch/arc/kernel/stacktrace.c b/arch/arc/kernel/stacktrace.c
index 92320d6f737c..001de4ce711e 100644
--- a/arch/arc/kernel/stacktrace.c
+++ b/arch/arc/kernel/stacktrace.c
@@ -122,19 +122,17 @@ arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs,
while (1) {
address = UNW_PC(&frame_info);
- if (address && __kernel_text_address(address)) {
- if (consumer_fn(address, arg) == -1)
- break;
- }
+ if (!address || !__kernel_text_address(address))
+ break;
- ret = arc_unwind(&frame_info);
+ if (consumer_fn(address, arg) == -1)
+ break;
- if (ret == 0) {
- frame_info.regs.r63 = frame_info.regs.r31;
- continue;
- } else {
+ ret = arc_unwind(&frame_info);
+ if (ret)
break;
- }
+
+ frame_info.regs.r63 = frame_info.regs.r31;
}
return address; /* return the last address it saw */
diff --git a/arch/arc/kernel/time.c b/arch/arc/kernel/time.c
index dbe74f418019..3364d2bbc515 100644
--- a/arch/arc/kernel/time.c
+++ b/arch/arc/kernel/time.c
@@ -26,6 +26,7 @@
* while TIMER1 for free running (clocksource)
*
* Newer ARC700 cores have 64bit clk fetching RTSC insn, preferred over TIMER1
+ * which however is currently broken
*/
#include <linux/spinlock.h>
@@ -44,6 +45,8 @@
#include <asm/clk.h>
#include <asm/mach_desc.h>
+#include <asm/mcip.h>
+
/* Timer related Aux registers */
#define ARC_REG_TIMER0_LIMIT 0x23 /* timer 0 limit */
#define ARC_REG_TIMER0_CTRL 0x22 /* timer 0 control */
@@ -59,14 +62,10 @@
/********** Clock Source Device *********/
-#ifdef CONFIG_ARC_HAS_RTSC
+#ifdef CONFIG_ARC_HAS_GRTC
-int arc_counter_setup(void)
+static int arc_counter_setup(void)
{
- /*
- * For SMP this needs to be 0. However Kconfig glue doesn't
- * enable this option for SMP configs
- */
return 1;
}
@@ -75,45 +74,84 @@ static cycle_t arc_counter_read(struct clocksource *cs)
unsigned long flags;
union {
#ifdef CONFIG_CPU_BIG_ENDIAN
- struct { u32 high, low; };
+ struct { u32 h, l; };
#else
- struct { u32 low, high; };
+ struct { u32 l, h; };
#endif
cycle_t full;
} stamp;
- flags = arch_local_irq_save();
+ local_irq_save(flags);
- __asm__ __volatile(
- " .extCoreRegister tsch, 58, r, cannot_shortcut \n"
- " rtsc %0, 0 \n"
- " mov %1, 0 \n"
- : "=r" (stamp.low), "=r" (stamp.high));
+ __mcip_cmd(CMD_GRTC_READ_LO, 0);
+ stamp.l = read_aux_reg(ARC_REG_MCIP_READBACK);
+
+ __mcip_cmd(CMD_GRTC_READ_HI, 0);
+ stamp.h = read_aux_reg(ARC_REG_MCIP_READBACK);
- arch_local_irq_restore(flags);
+ local_irq_restore(flags);
return stamp.full;
}
static struct clocksource arc_counter = {
- .name = "ARC RTSC",
- .rating = 300,
+ .name = "ARConnect GRTC",
+ .rating = 400,
.read = arc_counter_read,
- .mask = CLOCKSOURCE_MASK(32),
+ .mask = CLOCKSOURCE_MASK(64),
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
-#else /* !CONFIG_ARC_HAS_RTSC */
+#else
+
+#ifdef CONFIG_ARC_HAS_RTC
+
+#define AUX_RTC_CTRL 0x103
+#define AUX_RTC_LOW 0x104
+#define AUX_RTC_HIGH 0x105
-static bool is_usable_as_clocksource(void)
+int arc_counter_setup(void)
{
-#ifdef CONFIG_SMP
- return 0;
+ write_aux_reg(AUX_RTC_CTRL, 1);
+
+ /* Not usable in SMP */
+ return !IS_ENABLED(CONFIG_SMP);
+}
+
+static cycle_t arc_counter_read(struct clocksource *cs)
+{
+ unsigned long status;
+ union {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+ struct { u32 high, low; };
#else
- return 1;
+ struct { u32 low, high; };
#endif
+ cycle_t full;
+ } stamp;
+
+
+ __asm__ __volatile(
+ "1: \n"
+ " lr %0, [AUX_RTC_LOW] \n"
+ " lr %1, [AUX_RTC_HIGH] \n"
+ " lr %2, [AUX_RTC_CTRL] \n"
+ " bbit0.nt %2, 31, 1b \n"
+ : "=r" (stamp.low), "=r" (stamp.high), "=r" (status));
+
+ return stamp.full;
}
+static struct clocksource arc_counter = {
+ .name = "ARCv2 RTC",
+ .rating = 350,
+ .read = arc_counter_read,
+ .mask = CLOCKSOURCE_MASK(64),
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+#else /* !CONFIG_ARC_HAS_RTC */
+
/*
* set 32bit TIMER1 to keep counting monotonically and wraparound
*/
@@ -123,7 +161,8 @@ int arc_counter_setup(void)
write_aux_reg(ARC_REG_TIMER1_CNT, 0);
write_aux_reg(ARC_REG_TIMER1_CTRL, TIMER_CTRL_NH);
- return is_usable_as_clocksource();
+ /* Not usable in SMP */
+ return !IS_ENABLED(CONFIG_SMP);
}
static cycle_t arc_counter_read(struct clocksource *cs)
@@ -140,6 +179,7 @@ static struct clocksource arc_counter = {
};
#endif
+#endif
/********** Clock Event Device *********/
diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c
index e00a01879025..e0cf99893212 100644
--- a/arch/arc/kernel/troubleshoot.c
+++ b/arch/arc/kernel/troubleshoot.c
@@ -14,6 +14,7 @@
#include <linux/proc_fs.h>
#include <linux/file.h>
#include <asm/arcregs.h>
+#include <asm/irqflags.h>
/*
* Common routine to print scratch regs (r0-r12) or callee regs (r13-r25)
@@ -34,7 +35,10 @@ static noinline void print_reg_file(long *reg_rev, int start_num)
n += scnprintf(buf + n, len - n, "\n");
/* because pt_regs has regs reversed: r12..r0, r25..r13 */
- reg_rev--;
+ if (is_isa_arcv2() && start_num == 0)
+ reg_rev++;
+ else
+ reg_rev--;
}
if (start_num != 0)
@@ -152,6 +156,15 @@ static void show_ecr_verbose(struct pt_regs *regs)
((cause_code == 0x02) ? "Write" : "EX"));
} else if (vec == ECR_V_INSN_ERR) {
pr_cont("Illegal Insn\n");
+#ifdef CONFIG_ISA_ARCV2
+ } else if (vec == ECR_V_MEM_ERR) {
+ if (cause_code == 0x00)
+ pr_cont("Bus Error from Insn Mem\n");
+ else if (cause_code == 0x10)
+ pr_cont("Bus Error from Data Mem\n");
+ else
+ pr_cont("Bus Error, check PRM\n");
+#endif
} else {
pr_cont("Check Programmer's Manual\n");
}
@@ -185,12 +198,20 @@ void show_regs(struct pt_regs *regs)
pr_info("[STAT32]: 0x%08lx", regs->status32);
-#define STS_BIT(r, bit) r->status32 & STATUS_##bit##_MASK ? #bit : ""
- if (!user_mode(regs))
- pr_cont(" : %2s %2s %2s %2s %2s\n",
- STS_BIT(regs, AE), STS_BIT(regs, A2), STS_BIT(regs, A1),
- STS_BIT(regs, E2), STS_BIT(regs, E1));
+#define STS_BIT(r, bit) r->status32 & STATUS_##bit##_MASK ? #bit" " : ""
+#ifdef CONFIG_ISA_ARCOMPACT
+ pr_cont(" : %2s%2s%2s%2s%2s%2s%2s\n",
+ (regs->status32 & STATUS_U_MASK) ? "U " : "K ",
+ STS_BIT(regs, DE), STS_BIT(regs, AE),
+ STS_BIT(regs, A2), STS_BIT(regs, A1),
+ STS_BIT(regs, E2), STS_BIT(regs, E1));
+#else
+ pr_cont(" : %2s%2s%2s%2s\n",
+ STS_BIT(regs, IE),
+ (regs->status32 & STATUS_U_MASK) ? "U " : "K ",
+ STS_BIT(regs, DE), STS_BIT(regs, AE));
+#endif
pr_info("BTA: 0x%08lx\t SP: 0x%08lx\t FP: 0x%08lx\n",
regs->bta, regs->sp, regs->fp);
pr_info("LPS: 0x%08lx\tLPE: 0x%08lx\tLPC: 0x%08lx\n",
diff --git a/arch/arc/lib/Makefile b/arch/arc/lib/Makefile
index db46e200baba..b1656d156097 100644
--- a/arch/arc/lib/Makefile
+++ b/arch/arc/lib/Makefile
@@ -5,5 +5,7 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-lib-y := strchr-700.o strcmp.o strcpy-700.o strlen.o
-lib-y += memcmp.o memcpy-700.o memset.o
+lib-y := strchr-700.o strcpy-700.o strlen.o memcmp.o
+
+lib-$(CONFIG_ISA_ARCOMPACT) += memcpy-700.o memset.o strcmp.o
+lib-$(CONFIG_ISA_ARCV2) += memcpy-archs.o memset-archs.o strcmp-archs.o
diff --git a/arch/arc/lib/memcmp.S b/arch/arc/lib/memcmp.S
index 978bf8314dfb..a4015e7d9ab7 100644
--- a/arch/arc/lib/memcmp.S
+++ b/arch/arc/lib/memcmp.S
@@ -24,14 +24,32 @@ ENTRY(memcmp)
ld r4,[r0,0]
ld r5,[r1,0]
lsr.f lp_count,r3,3
+#ifdef CONFIG_ISA_ARCV2
+ /* In ARCv2 a branch can't be the last instruction in a zero overhead
+ * loop.
+ * So we move the branch to the start of the loop, duplicate it
+ * after the end, and set up r12 so that the branch isn't taken
+ * initially.
+ */
+ mov_s r12,WORD2
+ lpne .Loop_end
+ brne WORD2,r12,.Lodd
+ ld WORD2,[r0,4]
+#else
lpne .Loop_end
ld_s WORD2,[r0,4]
+#endif
ld_s r12,[r1,4]
brne r4,r5,.Leven
ld.a r4,[r0,8]
ld.a r5,[r1,8]
+#ifdef CONFIG_ISA_ARCV2
+.Loop_end:
+ brne WORD2,r12,.Lodd
+#else
brne WORD2,r12,.Lodd
.Loop_end:
+#endif
asl_s SHIFT,SHIFT,3
bhs_s .Last_cmp
brne r4,r5,.Leven
@@ -89,7 +107,6 @@ ENTRY(memcmp)
bset.cs r0,r0,31
.Lodd:
cmp_s WORD2,r12
-
mov_s r0,1
j_s.d [blink]
bset.cs r0,r0,31
@@ -100,14 +117,25 @@ ENTRY(memcmp)
ldb r4,[r0,0]
ldb r5,[r1,0]
lsr.f lp_count,r3
+#ifdef CONFIG_ISA_ARCV2
+ mov r12,r3
lpne .Lbyte_end
+ brne r3,r12,.Lbyte_odd
+#else
+ lpne .Lbyte_end
+#endif
ldb_s r3,[r0,1]
ldb r12,[r1,1]
brne r4,r5,.Lbyte_even
ldb.a r4,[r0,2]
ldb.a r5,[r1,2]
+#ifdef CONFIG_ISA_ARCV2
+.Lbyte_end:
+ brne r3,r12,.Lbyte_odd
+#else
brne r3,r12,.Lbyte_odd
.Lbyte_end:
+#endif
bcc .Lbyte_even
brne r4,r5,.Lbyte_even
ldb_s r3,[r0,1]
diff --git a/arch/arc/lib/memcpy-archs.S b/arch/arc/lib/memcpy-archs.S
new file mode 100644
index 000000000000..1b2b3acfed52
--- /dev/null
+++ b/arch/arc/lib/memcpy-archs.S
@@ -0,0 +1,236 @@
+/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+
+#ifdef __LITTLE_ENDIAN__
+# define SHIFT_1(RX,RY,IMM) asl RX, RY, IMM ; <<
+# define SHIFT_2(RX,RY,IMM) lsr RX, RY, IMM ; >>
+# define MERGE_1(RX,RY,IMM) asl RX, RY, IMM
+# define MERGE_2(RX,RY,IMM)
+# define EXTRACT_1(RX,RY,IMM) and RX, RY, 0xFFFF
+# define EXTRACT_2(RX,RY,IMM) lsr RX, RY, IMM
+#else
+# define SHIFT_1(RX,RY,IMM) lsr RX, RY, IMM ; >>
+# define SHIFT_2(RX,RY,IMM) asl RX, RY, IMM ; <<
+# define MERGE_1(RX,RY,IMM) asl RX, RY, IMM ; <<
+# define MERGE_2(RX,RY,IMM) asl RX, RY, IMM ; <<
+# define EXTRACT_1(RX,RY,IMM) lsr RX, RY, IMM
+# define EXTRACT_2(RX,RY,IMM) lsr RX, RY, 0x08
+#endif
+
+#ifdef CONFIG_ARC_HAS_LL64
+# define PREFETCH_READ(RX) prefetch [RX, 56]
+# define PREFETCH_WRITE(RX) prefetchw [RX, 64]
+# define LOADX(DST,RX) ldd.ab DST, [RX, 8]
+# define STOREX(SRC,RX) std.ab SRC, [RX, 8]
+# define ZOLSHFT 5
+# define ZOLAND 0x1F
+#else
+# define PREFETCH_READ(RX) prefetch [RX, 28]
+# define PREFETCH_WRITE(RX) prefetchw [RX, 32]
+# define LOADX(DST,RX) ld.ab DST, [RX, 4]
+# define STOREX(SRC,RX) st.ab SRC, [RX, 4]
+# define ZOLSHFT 4
+# define ZOLAND 0xF
+#endif
+
+ENTRY(memcpy)
+ prefetch [r1] ; Prefetch the read location
+ prefetchw [r0] ; Prefetch the write location
+ mov.f 0, r2
+;;; if size is zero
+ jz.d [blink]
+ mov r3, r0 ; don;t clobber ret val
+
+;;; if size <= 8
+ cmp r2, 8
+ bls.d @smallchunk
+ mov.f lp_count, r2
+
+ and.f r4, r0, 0x03
+ rsub lp_count, r4, 4
+ lpnz @aligndestination
+ ;; LOOP BEGIN
+ ldb.ab r5, [r1,1]
+ sub r2, r2, 1
+ stb.ab r5, [r3,1]
+aligndestination:
+
+;;; Check the alignment of the source
+ and.f r4, r1, 0x03
+ bnz.d @sourceunaligned
+
+;;; CASE 0: Both source and destination are 32bit aligned
+;;; Convert len to Dwords, unfold x4
+ lsr.f lp_count, r2, ZOLSHFT
+ lpnz @copy32_64bytes
+ ;; LOOP START
+ LOADX (r6, r1)
+ PREFETCH_READ (r1)
+ PREFETCH_WRITE (r3)
+ LOADX (r8, r1)
+ LOADX (r10, r1)
+ LOADX (r4, r1)
+ STOREX (r6, r3)
+ STOREX (r8, r3)
+ STOREX (r10, r3)
+ STOREX (r4, r3)
+copy32_64bytes:
+
+ and.f lp_count, r2, ZOLAND ;Last remaining 31 bytes
+smallchunk:
+ lpnz @copyremainingbytes
+ ;; LOOP START
+ ldb.ab r5, [r1,1]
+ stb.ab r5, [r3,1]
+copyremainingbytes:
+
+ j [blink]
+;;; END CASE 0
+
+sourceunaligned:
+ cmp r4, 2
+ beq.d @unalignedOffby2
+ sub r2, r2, 1
+
+ bhi.d @unalignedOffby3
+ ldb.ab r5, [r1, 1]
+
+;;; CASE 1: The source is unaligned, off by 1
+ ;; Hence I need to read 1 byte for a 16bit alignment
+ ;; and 2bytes to reach 32bit alignment
+ ldh.ab r6, [r1, 2]
+ sub r2, r2, 2
+ ;; Convert to words, unfold x2
+ lsr.f lp_count, r2, 3
+ MERGE_1 (r6, r6, 8)
+ MERGE_2 (r5, r5, 24)
+ or r5, r5, r6
+
+ ;; Both src and dst are aligned
+ lpnz @copy8bytes_1
+ ;; LOOP START
+ ld.ab r6, [r1, 4]
+ prefetch [r1, 28] ;Prefetch the next read location
+ ld.ab r8, [r1,4]
+ prefetchw [r3, 32] ;Prefetch the next write location
+
+ SHIFT_1 (r7, r6, 24)
+ or r7, r7, r5
+ SHIFT_2 (r5, r6, 8)
+
+ SHIFT_1 (r9, r8, 24)
+ or r9, r9, r5
+ SHIFT_2 (r5, r8, 8)
+
+ st.ab r7, [r3, 4]
+ st.ab r9, [r3, 4]
+copy8bytes_1:
+
+ ;; Write back the remaining 16bits
+ EXTRACT_1 (r6, r5, 16)
+ sth.ab r6, [r3, 2]
+ ;; Write back the remaining 8bits
+ EXTRACT_2 (r5, r5, 16)
+ stb.ab r5, [r3, 1]
+
+ and.f lp_count, r2, 0x07 ;Last 8bytes
+ lpnz @copybytewise_1
+ ;; LOOP START
+ ldb.ab r6, [r1,1]
+ stb.ab r6, [r3,1]
+copybytewise_1:
+ j [blink]
+
+unalignedOffby2:
+;;; CASE 2: The source is unaligned, off by 2
+ ldh.ab r5, [r1, 2]
+ sub r2, r2, 1
+
+ ;; Both src and dst are aligned
+ ;; Convert to words, unfold x2
+ lsr.f lp_count, r2, 3
+#ifdef __BIG_ENDIAN__
+ asl.nz r5, r5, 16
+#endif
+ lpnz @copy8bytes_2
+ ;; LOOP START
+ ld.ab r6, [r1, 4]
+ prefetch [r1, 28] ;Prefetch the next read location
+ ld.ab r8, [r1,4]
+ prefetchw [r3, 32] ;Prefetch the next write location
+
+ SHIFT_1 (r7, r6, 16)
+ or r7, r7, r5
+ SHIFT_2 (r5, r6, 16)
+
+ SHIFT_1 (r9, r8, 16)
+ or r9, r9, r5
+ SHIFT_2 (r5, r8, 16)
+
+ st.ab r7, [r3, 4]
+ st.ab r9, [r3, 4]
+copy8bytes_2:
+
+#ifdef __BIG_ENDIAN__
+ lsr.nz r5, r5, 16
+#endif
+ sth.ab r5, [r3, 2]
+
+ and.f lp_count, r2, 0x07 ;Last 8bytes
+ lpnz @copybytewise_2
+ ;; LOOP START
+ ldb.ab r6, [r1,1]
+ stb.ab r6, [r3,1]
+copybytewise_2:
+ j [blink]
+
+unalignedOffby3:
+;;; CASE 3: The source is unaligned, off by 3
+;;; Hence, I need to read 1byte for achieve the 32bit alignment
+
+ ;; Both src and dst are aligned
+ ;; Convert to words, unfold x2
+ lsr.f lp_count, r2, 3
+#ifdef __BIG_ENDIAN__
+ asl.ne r5, r5, 24
+#endif
+ lpnz @copy8bytes_3
+ ;; LOOP START
+ ld.ab r6, [r1, 4]
+ prefetch [r1, 28] ;Prefetch the next read location
+ ld.ab r8, [r1,4]
+ prefetch [r3, 32] ;Prefetch the next write location
+
+ SHIFT_1 (r7, r6, 8)
+ or r7, r7, r5
+ SHIFT_2 (r5, r6, 24)
+
+ SHIFT_1 (r9, r8, 8)
+ or r9, r9, r5
+ SHIFT_2 (r5, r8, 24)
+
+ st.ab r7, [r3, 4]
+ st.ab r9, [r3, 4]
+copy8bytes_3:
+
+#ifdef __BIG_ENDIAN__
+ lsr.nz r5, r5, 24
+#endif
+ stb.ab r5, [r3, 1]
+
+ and.f lp_count, r2, 0x07 ;Last 8bytes
+ lpnz @copybytewise_3
+ ;; LOOP START
+ ldb.ab r6, [r1,1]
+ stb.ab r6, [r3,1]
+copybytewise_3:
+ j [blink]
+
+END(memcpy)
diff --git a/arch/arc/lib/memset-archs.S b/arch/arc/lib/memset-archs.S
new file mode 100644
index 000000000000..92d573c734b5
--- /dev/null
+++ b/arch/arc/lib/memset-archs.S
@@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+
+#undef PREALLOC_NOT_AVAIL
+
+#ifdef PREALLOC_NOT_AVAIL
+#define PREWRITE(A,B) prefetchw [(A),(B)]
+#else
+#define PREWRITE(A,B) prealloc [(A),(B)]
+#endif
+
+ENTRY(memset)
+ prefetchw [r0] ; Prefetch the write location
+ mov.f 0, r2
+;;; if size is zero
+ jz.d [blink]
+ mov r3, r0 ; don't clobber ret val
+
+;;; if length < 8
+ brls.d.nt r2, 8, .Lsmallchunk
+ mov.f lp_count,r2
+
+ and.f r4, r0, 0x03
+ rsub lp_count, r4, 4
+ lpnz @.Laligndestination
+ ;; LOOP BEGIN
+ stb.ab r1, [r3,1]
+ sub r2, r2, 1
+.Laligndestination:
+
+;;; Destination is aligned
+ and r1, r1, 0xFF
+ asl r4, r1, 8
+ or r4, r4, r1
+ asl r5, r4, 16
+ or r5, r5, r4
+ mov r4, r5
+
+ sub3 lp_count, r2, 8
+ cmp r2, 64
+ bmsk.hi r2, r2, 5
+ mov.ls lp_count, 0
+ add3.hi r2, r2, 8
+
+;;; Convert len to Dwords, unfold x8
+ lsr.f lp_count, lp_count, 6
+ lpnz @.Lset64bytes
+ ;; LOOP START
+ PREWRITE(r3, 64) ;Prefetch the next write location
+ std.ab r4, [r3, 8]
+ std.ab r4, [r3, 8]
+ std.ab r4, [r3, 8]
+ std.ab r4, [r3, 8]
+ std.ab r4, [r3, 8]
+ std.ab r4, [r3, 8]
+ std.ab r4, [r3, 8]
+ std.ab r4, [r3, 8]
+.Lset64bytes:
+
+ lsr.f lp_count, r2, 5 ;Last remaining max 124 bytes
+ lpnz .Lset32bytes
+ ;; LOOP START
+ prefetchw [r3, 32] ;Prefetch the next write location
+ std.ab r4, [r3, 8]
+ std.ab r4, [r3, 8]
+ std.ab r4, [r3, 8]
+ std.ab r4, [r3, 8]
+.Lset32bytes:
+
+ and.f lp_count, r2, 0x1F ;Last remaining 31 bytes
+.Lsmallchunk:
+ lpnz .Lcopy3bytes
+ ;; LOOP START
+ stb.ab r1, [r3, 1]
+.Lcopy3bytes:
+
+ j [blink]
+
+END(memset)
+
+ENTRY(memzero)
+ ; adjust bzero args to memset args
+ mov r2, r1
+ b.d memset ;tail call so need to tinker with blink
+ mov r1, 0
+END(memzero)
diff --git a/arch/arc/lib/strcmp-archs.S b/arch/arc/lib/strcmp-archs.S
new file mode 100644
index 000000000000..4f338eec3365
--- /dev/null
+++ b/arch/arc/lib/strcmp-archs.S
@@ -0,0 +1,78 @@
+/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+
+ENTRY(strcmp)
+ or r2, r0, r1
+ bmsk_s r2, r2, 1
+ brne r2, 0, @.Lcharloop
+
+;;; s1 and s2 are word aligned
+ ld.ab r2, [r0, 4]
+
+ mov_s r12, 0x01010101
+ ror r11, r12
+ .align 4
+.LwordLoop:
+ ld.ab r3, [r1, 4]
+ ;; Detect NULL char in str1
+ sub r4, r2, r12
+ ld.ab r5, [r0, 4]
+ bic r4, r4, r2
+ and r4, r4, r11
+ brne.d.nt r4, 0, .LfoundNULL
+ ;; Check if the read locations are the same
+ cmp r2, r3
+ beq.d .LwordLoop
+ mov.eq r2, r5
+
+ ;; A match is found, spot it out
+#ifdef __LITTLE_ENDIAN__
+ swape r3, r3
+ mov_s r0, 1
+ swape r2, r2
+#else
+ mov_s r0, 1
+#endif
+ cmp_s r2, r3
+ j_s.d [blink]
+ bset.lo r0, r0, 31
+
+ .align 4
+.LfoundNULL:
+#ifdef __BIG_ENDIAN__
+ swape r4, r4
+ swape r2, r2
+ swape r3, r3
+#endif
+ ;; Find null byte
+ ffs r0, r4
+ bmsk r2, r2, r0
+ bmsk r3, r3, r0
+ swape r2, r2
+ swape r3, r3
+ ;; make the return value
+ sub.f r0, r2, r3
+ mov.hi r0, 1
+ j_s.d [blink]
+ bset.lo r0, r0, 31
+
+ .align 4
+.Lcharloop:
+ ldb.ab r2, [r0, 1]
+ ldb.ab r3, [r1, 1]
+ nop
+ breq r2, 0, .Lcmpend
+ breq r2, r3, .Lcharloop
+
+ .align 4
+.Lcmpend:
+ j_s.d [blink]
+ sub r0, r2, r3
+END(strcmp)
diff --git a/arch/arc/mm/Makefile b/arch/arc/mm/Makefile
index ac95cc239c1e..7beb941556c3 100644
--- a/arch/arc/mm/Makefile
+++ b/arch/arc/mm/Makefile
@@ -7,4 +7,4 @@
#
obj-y := extable.o ioremap.o dma.o fault.o init.o
-obj-y += tlb.o tlbex.o cache_arc700.o mmap.o
+obj-y += tlb.o tlbex.o cache.o mmap.o
diff --git a/arch/arc/mm/cache_arc700.c b/arch/arc/mm/cache.c
index 12b2100db073..b29d62ed4f7e 100644
--- a/arch/arc/mm/cache_arc700.c
+++ b/arch/arc/mm/cache.c
@@ -1,64 +1,12 @@
/*
- * ARC700 VIPT Cache Management
+ * ARC Cache Management
*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
* Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
- *
- * vineetg: May 2011: for Non-aliasing VIPT D-cache following can be NOPs
- * -flush_cache_dup_mm (fork)
- * -likewise for flush_cache_mm (exit/execve)
- * -likewise for flush_cache_range,flush_cache_page (munmap, exit, COW-break)
- *
- * vineetg: Apr 2011
- * -Now that MMU can support larger pg sz (16K), the determiniation of
- * aliasing shd not be based on assumption of 8k pg
- *
- * vineetg: Mar 2011
- * -optimised version of flush_icache_range( ) for making I/D coherent
- * when vaddr is available (agnostic of num of aliases)
- *
- * vineetg: Mar 2011
- * -Added documentation about I-cache aliasing on ARC700 and the way it
- * was handled up until MMU V2.
- * -Spotted a three year old bug when killing the 4 aliases, which needs
- * bottom 2 bits, so we need to do paddr | {0x00, 0x01, 0x02, 0x03}
- * instead of paddr | {0x00, 0x01, 0x10, 0x11}
- * (Rajesh you owe me one now)
- *
- * vineetg: Dec 2010
- * -Off-by-one error when computing num_of_lines to flush
- * This broke signal handling with bionic which uses synthetic sigret stub
- *
- * vineetg: Mar 2010
- * -GCC can't generate ZOL for core cache flush loops.
- * Conv them into iterations based as opposed to while (start < end) types
- *
- * Vineetg: July 2009
- * -In I-cache flush routine we used to chk for aliasing for every line INV.
- * Instead now we setup routines per cache geometry and invoke them
- * via function pointers.
- *
- * Vineetg: Jan 2009
- * -Cache Line flush routines used to flush an extra line beyond end addr
- * because check was while (end >= start) instead of (end > start)
- * =Some call sites had to work around by doing -1, -4 etc to end param
- * =Some callers didnt care. This was spec bad in case of INV routines
- * which would discard valid data (cause of the horrible ext2 bug
- * in ARC IDE driver)
- *
- * vineetg: June 11th 2008: Fixed flush_icache_range( )
- * -Since ARC700 caches are not coherent (I$ doesnt snoop D$) both need
- * to be flushed, which it was not doing.
- * -load_module( ) passes vmalloc addr (Kernel Virtual Addr) to the API,
- * however ARC cache maintenance OPs require PHY addr. Thus need to do
- * vmalloc_to_phy.
- * -Also added optimisation there, that for range > PAGE SIZE we flush the
- * entire cache in one shot rather than line by line. For e.g. a module
- * with Code sz 600k, old code flushed 600k worth of cache (line-by-line),
- * while cache is only 16 or 32k.
*/
#include <linux/module.h>
@@ -73,9 +21,15 @@
#include <asm/cachectl.h>
#include <asm/setup.h>
+static int l2_line_sz;
+
+void (*_cache_line_loop_ic_fn)(unsigned long paddr, unsigned long vaddr,
+ unsigned long sz, const int cacheop);
+
char *arc_cache_mumbojumbo(int c, char *buf, int len)
{
int n = 0;
+ struct cpuinfo_arc_cache *p;
#define PR_CACHE(p, cfg, str) \
if (!(p)->ver) \
@@ -91,6 +45,11 @@ char *arc_cache_mumbojumbo(int c, char *buf, int len)
PR_CACHE(&cpuinfo_arc700[c].icache, CONFIG_ARC_HAS_ICACHE, "I-Cache");
PR_CACHE(&cpuinfo_arc700[c].dcache, CONFIG_ARC_HAS_DCACHE, "D-Cache");
+ p = &cpuinfo_arc700[c].slc;
+ if (p->ver)
+ n += scnprintf(buf + n, len - n,
+ "SLC\t\t: %uK, %uB Line\n", p->sz_k, p->line_len);
+
return buf;
}
@@ -101,7 +60,7 @@ char *arc_cache_mumbojumbo(int c, char *buf, int len)
*/
void read_decode_cache_bcr(void)
{
- struct cpuinfo_arc_cache *p_ic, *p_dc;
+ struct cpuinfo_arc_cache *p_ic, *p_dc, *p_slc;
unsigned int cpu = smp_processor_id();
struct bcr_cache {
#ifdef CONFIG_CPU_BIG_ENDIAN
@@ -111,14 +70,29 @@ void read_decode_cache_bcr(void)
#endif
} ibcr, dbcr;
+ struct bcr_generic sbcr;
+
+ struct bcr_slc_cfg {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+ unsigned int pad:24, way:2, lsz:2, sz:4;
+#else
+ unsigned int sz:4, lsz:2, way:2, pad:24;
+#endif
+ } slc_cfg;
+
p_ic = &cpuinfo_arc700[cpu].icache;
READ_BCR(ARC_REG_IC_BCR, ibcr);
if (!ibcr.ver)
goto dc_chk;
- BUG_ON(ibcr.config != 3);
- p_ic->assoc = 2; /* Fixed to 2w set assoc */
+ if (ibcr.ver <= 3) {
+ BUG_ON(ibcr.config != 3);
+ p_ic->assoc = 2; /* Fixed to 2w set assoc */
+ } else if (ibcr.ver >= 4) {
+ p_ic->assoc = 1 << ibcr.config; /* 1,2,4,8 */
+ }
+
p_ic->line_len = 8 << ibcr.line_len;
p_ic->sz_k = 1 << (ibcr.sz - 1);
p_ic->ver = ibcr.ver;
@@ -130,94 +104,140 @@ dc_chk:
READ_BCR(ARC_REG_DC_BCR, dbcr);
if (!dbcr.ver)
- return;
+ goto slc_chk;
+
+ if (dbcr.ver <= 3) {
+ BUG_ON(dbcr.config != 2);
+ p_dc->assoc = 4; /* Fixed to 4w set assoc */
+ p_dc->vipt = 1;
+ p_dc->alias = p_dc->sz_k/p_dc->assoc/TO_KB(PAGE_SIZE) > 1;
+ } else if (dbcr.ver >= 4) {
+ p_dc->assoc = 1 << dbcr.config; /* 1,2,4,8 */
+ p_dc->vipt = 0;
+ p_dc->alias = 0; /* PIPT so can't VIPT alias */
+ }
- BUG_ON(dbcr.config != 2);
- p_dc->assoc = 4; /* Fixed to 4w set assoc */
p_dc->line_len = 16 << dbcr.line_len;
p_dc->sz_k = 1 << (dbcr.sz - 1);
p_dc->ver = dbcr.ver;
- p_dc->vipt = 1;
- p_dc->alias = p_dc->sz_k/p_dc->assoc/TO_KB(PAGE_SIZE) > 1;
+
+slc_chk:
+ if (!is_isa_arcv2())
+ return;
+
+ p_slc = &cpuinfo_arc700[cpu].slc;
+ READ_BCR(ARC_REG_SLC_BCR, sbcr);
+ if (sbcr.ver) {
+ READ_BCR(ARC_REG_SLC_CFG, slc_cfg);
+ p_slc->ver = sbcr.ver;
+ p_slc->sz_k = 128 << slc_cfg.sz;
+ l2_line_sz = p_slc->line_len = (slc_cfg.lsz == 0) ? 128 : 64;
+ }
}
/*
- * 1. Validate the Cache Geomtery (compile time config matches hardware)
- * 2. If I-cache suffers from aliasing, setup work arounds (difft flush rtn)
- * (aliasing D-cache configurations are not supported YET)
- * 3. Enable the Caches, setup default flush mode for D-Cache
- * 3. Calculate the SHMLBA used by user space
+ * Line Operation on {I,D}-Cache
*/
-void arc_cache_init(void)
-{
- unsigned int __maybe_unused cpu = smp_processor_id();
- char str[256];
-
- printk(arc_cache_mumbojumbo(0, str, sizeof(str)));
- if (IS_ENABLED(CONFIG_ARC_HAS_ICACHE)) {
- struct cpuinfo_arc_cache *ic = &cpuinfo_arc700[cpu].icache;
+#define OP_INV 0x1
+#define OP_FLUSH 0x2
+#define OP_FLUSH_N_INV 0x3
+#define OP_INV_IC 0x4
- if (!ic->ver)
- panic("cache support enabled but non-existent cache\n");
+/*
+ * I-Cache Aliasing in ARC700 VIPT caches (MMU v1-v3)
+ *
+ * ARC VIPT I-cache uses vaddr to index into cache and paddr to match the tag.
+ * The orig Cache Management Module "CDU" only required paddr to invalidate a
+ * certain line since it sufficed as index in Non-Aliasing VIPT cache-geometry.
+ * Infact for distinct V1,V2,P: all of {V1-P},{V2-P},{P-P} would end up fetching
+ * the exact same line.
+ *
+ * However for larger Caches (way-size > page-size) - i.e. in Aliasing config,
+ * paddr alone could not be used to correctly index the cache.
+ *
+ * ------------------
+ * MMU v1/v2 (Fixed Page Size 8k)
+ * ------------------
+ * The solution was to provide CDU with these additonal vaddr bits. These
+ * would be bits [x:13], x would depend on cache-geometry, 13 comes from
+ * standard page size of 8k.
+ * H/w folks chose [17:13] to be a future safe range, and moreso these 5 bits
+ * of vaddr could easily be "stuffed" in the paddr as bits [4:0] since the
+ * orig 5 bits of paddr were anyways ignored by CDU line ops, as they
+ * represent the offset within cache-line. The adv of using this "clumsy"
+ * interface for additional info was no new reg was needed in CDU programming
+ * model.
+ *
+ * 17:13 represented the max num of bits passable, actual bits needed were
+ * fewer, based on the num-of-aliases possible.
+ * -for 2 alias possibility, only bit 13 needed (32K cache)
+ * -for 4 alias possibility, bits 14:13 needed (64K cache)
+ *
+ * ------------------
+ * MMU v3
+ * ------------------
+ * This ver of MMU supports variable page sizes (1k-16k): although Linux will
+ * only support 8k (default), 16k and 4k.
+ * However from hardware perspective, smaller page sizes aggrevate aliasing
+ * meaning more vaddr bits needed to disambiguate the cache-line-op ;
+ * the existing scheme of piggybacking won't work for certain configurations.
+ * Two new registers IC_PTAG and DC_PTAG inttoduced.
+ * "tag" bits are provided in PTAG, index bits in existing IVIL/IVDL/FLDL regs
+ */
- if (ic->line_len != L1_CACHE_BYTES)
- panic("ICache line [%d] != kernel Config [%d]",
- ic->line_len, L1_CACHE_BYTES);
+static inline
+void __cache_line_loop_v2(unsigned long paddr, unsigned long vaddr,
+ unsigned long sz, const int op)
+{
+ unsigned int aux_cmd;
+ int num_lines;
+ const int full_page = __builtin_constant_p(sz) && sz == PAGE_SIZE;
- if (ic->ver != CONFIG_ARC_MMU_VER)
- panic("Cache ver [%d] doesn't match MMU ver [%d]\n",
- ic->ver, CONFIG_ARC_MMU_VER);
+ if (op == OP_INV_IC) {
+ aux_cmd = ARC_REG_IC_IVIL;
+ } else {
+ /* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */
+ aux_cmd = op & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL;
}
- if (IS_ENABLED(CONFIG_ARC_HAS_DCACHE)) {
- struct cpuinfo_arc_cache *dc = &cpuinfo_arc700[cpu].dcache;
- int handled;
-
- if (!dc->ver)
- panic("cache support enabled but non-existent cache\n");
+ /* Ensure we properly floor/ceil the non-line aligned/sized requests
+ * and have @paddr - aligned to cache line and integral @num_lines.
+ * This however can be avoided for page sized since:
+ * -@paddr will be cache-line aligned already (being page aligned)
+ * -@sz will be integral multiple of line size (being page sized).
+ */
+ if (!full_page) {
+ sz += paddr & ~CACHE_LINE_MASK;
+ paddr &= CACHE_LINE_MASK;
+ vaddr &= CACHE_LINE_MASK;
+ }
- if (dc->line_len != L1_CACHE_BYTES)
- panic("DCache line [%d] != kernel Config [%d]",
- dc->line_len, L1_CACHE_BYTES);
+ num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES);
- /* check for D-Cache aliasing */
- handled = IS_ENABLED(CONFIG_ARC_CACHE_VIPT_ALIASING);
+ /* MMUv2 and before: paddr contains stuffed vaddrs bits */
+ paddr |= (vaddr >> PAGE_SHIFT) & 0x1F;
- if (dc->alias && !handled)
- panic("Enable CONFIG_ARC_CACHE_VIPT_ALIASING\n");
- else if (!dc->alias && handled)
- panic("Don't need CONFIG_ARC_CACHE_VIPT_ALIASING\n");
+ while (num_lines-- > 0) {
+ write_aux_reg(aux_cmd, paddr);
+ paddr += L1_CACHE_BYTES;
}
}
-#define OP_INV 0x1
-#define OP_FLUSH 0x2
-#define OP_FLUSH_N_INV 0x3
-#define OP_INV_IC 0x4
-
-/*
- * Common Helper for Line Operations on {I,D}-Cache
- */
-static inline void __cache_line_loop(unsigned long paddr, unsigned long vaddr,
- unsigned long sz, const int cacheop)
+static inline
+void __cache_line_loop_v3(unsigned long paddr, unsigned long vaddr,
+ unsigned long sz, const int op)
{
unsigned int aux_cmd, aux_tag;
int num_lines;
- const int full_page_op = __builtin_constant_p(sz) && sz == PAGE_SIZE;
+ const int full_page = __builtin_constant_p(sz) && sz == PAGE_SIZE;
- if (cacheop == OP_INV_IC) {
+ if (op == OP_INV_IC) {
aux_cmd = ARC_REG_IC_IVIL;
-#if (CONFIG_ARC_MMU_VER > 2)
aux_tag = ARC_REG_IC_PTAG;
-#endif
- }
- else {
- /* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */
- aux_cmd = cacheop & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL;
-#if (CONFIG_ARC_MMU_VER > 2)
+ } else {
+ aux_cmd = op & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL;
aux_tag = ARC_REG_DC_PTAG;
-#endif
}
/* Ensure we properly floor/ceil the non-line aligned/sized requests
@@ -226,177 +246,169 @@ static inline void __cache_line_loop(unsigned long paddr, unsigned long vaddr,
* -@paddr will be cache-line aligned already (being page aligned)
* -@sz will be integral multiple of line size (being page sized).
*/
- if (!full_page_op) {
+ if (!full_page) {
sz += paddr & ~CACHE_LINE_MASK;
paddr &= CACHE_LINE_MASK;
vaddr &= CACHE_LINE_MASK;
}
-
num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES);
-#if (CONFIG_ARC_MMU_VER <= 2)
- /* MMUv2 and before: paddr contains stuffed vaddrs bits */
- paddr |= (vaddr >> PAGE_SHIFT) & 0x1F;
-#else
- /* if V-P const for loop, PTAG can be written once outside loop */
- if (full_page_op)
+ /*
+ * MMUv3, cache ops require paddr in PTAG reg
+ * if V-P const for loop, PTAG can be written once outside loop
+ */
+ if (full_page)
write_aux_reg(aux_tag, paddr);
-#endif
while (num_lines-- > 0) {
-#if (CONFIG_ARC_MMU_VER > 2)
- /* MMUv3, cache ops require paddr seperately */
- if (!full_page_op) {
+ if (!full_page) {
write_aux_reg(aux_tag, paddr);
paddr += L1_CACHE_BYTES;
}
write_aux_reg(aux_cmd, vaddr);
vaddr += L1_CACHE_BYTES;
-#else
+ }
+}
+
+/*
+ * In HS38x (MMU v4), although icache is VIPT, only paddr is needed for cache
+ * maintenance ops (in IVIL reg), as long as icache doesn't alias.
+ *
+ * For Aliasing icache, vaddr is also needed (in IVIL), while paddr is
+ * specified in PTAG (similar to MMU v3)
+ */
+static inline
+void __cache_line_loop_v4(unsigned long paddr, unsigned long vaddr,
+ unsigned long sz, const int cacheop)
+{
+ unsigned int aux_cmd;
+ int num_lines;
+ const int full_page_op = __builtin_constant_p(sz) && sz == PAGE_SIZE;
+
+ if (cacheop == OP_INV_IC) {
+ aux_cmd = ARC_REG_IC_IVIL;
+ } else {
+ /* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */
+ aux_cmd = cacheop & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL;
+ }
+
+ /* Ensure we properly floor/ceil the non-line aligned/sized requests
+ * and have @paddr - aligned to cache line and integral @num_lines.
+ * This however can be avoided for page sized since:
+ * -@paddr will be cache-line aligned already (being page aligned)
+ * -@sz will be integral multiple of line size (being page sized).
+ */
+ if (!full_page_op) {
+ sz += paddr & ~CACHE_LINE_MASK;
+ paddr &= CACHE_LINE_MASK;
+ }
+
+ num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES);
+
+ while (num_lines-- > 0) {
write_aux_reg(aux_cmd, paddr);
paddr += L1_CACHE_BYTES;
-#endif
}
}
+#if (CONFIG_ARC_MMU_VER < 3)
+#define __cache_line_loop __cache_line_loop_v2
+#elif (CONFIG_ARC_MMU_VER == 3)
+#define __cache_line_loop __cache_line_loop_v3
+#elif (CONFIG_ARC_MMU_VER > 3)
+#define __cache_line_loop __cache_line_loop_v4
+#endif
+
#ifdef CONFIG_ARC_HAS_DCACHE
/***************************************************************
* Machine specific helpers for Entire D-Cache or Per Line ops
*/
-static inline unsigned int __before_dc_op(const int op)
+static inline void __before_dc_op(const int op)
{
- unsigned int reg = reg;
-
if (op == OP_FLUSH_N_INV) {
/* Dcache provides 2 cmd: FLUSH or INV
* INV inturn has sub-modes: DISCARD or FLUSH-BEFORE
* flush-n-inv is achieved by INV cmd but with IM=1
* So toggle INV sub-mode depending on op request and default
*/
- reg = read_aux_reg(ARC_REG_DC_CTRL);
- write_aux_reg(ARC_REG_DC_CTRL, reg | DC_CTRL_INV_MODE_FLUSH)
- ;
+ const unsigned int ctl = ARC_REG_DC_CTRL;
+ write_aux_reg(ctl, read_aux_reg(ctl) | DC_CTRL_INV_MODE_FLUSH);
}
-
- return reg;
}
-static inline void __after_dc_op(const int op, unsigned int reg)
+static inline void __after_dc_op(const int op)
{
- if (op & OP_FLUSH) /* flush / flush-n-inv both wait */
- while (read_aux_reg(ARC_REG_DC_CTRL) & DC_CTRL_FLUSH_STATUS);
+ if (op & OP_FLUSH) {
+ const unsigned int ctl = ARC_REG_DC_CTRL;
+ unsigned int reg;
- /* Switch back to default Invalidate mode */
- if (op == OP_FLUSH_N_INV)
- write_aux_reg(ARC_REG_DC_CTRL, reg & ~DC_CTRL_INV_MODE_FLUSH);
+ /* flush / flush-n-inv both wait */
+ while ((reg = read_aux_reg(ctl)) & DC_CTRL_FLUSH_STATUS)
+ ;
+
+ /* Switch back to default Invalidate mode */
+ if (op == OP_FLUSH_N_INV)
+ write_aux_reg(ctl, reg & ~DC_CTRL_INV_MODE_FLUSH);
+ }
}
/*
* Operation on Entire D-Cache
- * @cacheop = {OP_INV, OP_FLUSH, OP_FLUSH_N_INV}
+ * @op = {OP_INV, OP_FLUSH, OP_FLUSH_N_INV}
* Note that constant propagation ensures all the checks are gone
* in generated code
*/
-static inline void __dc_entire_op(const int cacheop)
+static inline void __dc_entire_op(const int op)
{
- unsigned int ctrl_reg;
int aux;
- ctrl_reg = __before_dc_op(cacheop);
+ __before_dc_op(op);
- if (cacheop & OP_INV) /* Inv or flush-n-inv use same cmd reg */
+ if (op & OP_INV) /* Inv or flush-n-inv use same cmd reg */
aux = ARC_REG_DC_IVDC;
else
aux = ARC_REG_DC_FLSH;
write_aux_reg(aux, 0x1);
- __after_dc_op(cacheop, ctrl_reg);
+ __after_dc_op(op);
}
/* For kernel mappings cache operation: index is same as paddr */
#define __dc_line_op_k(p, sz, op) __dc_line_op(p, p, sz, op)
/*
- * D-Cache : Per Line INV (discard or wback+discard) or FLUSH (wback)
+ * D-Cache Line ops: Per Line INV (discard or wback+discard) or FLUSH (wback)
*/
static inline void __dc_line_op(unsigned long paddr, unsigned long vaddr,
- unsigned long sz, const int cacheop)
+ unsigned long sz, const int op)
{
unsigned long flags;
- unsigned int ctrl_reg;
local_irq_save(flags);
- ctrl_reg = __before_dc_op(cacheop);
+ __before_dc_op(op);
- __cache_line_loop(paddr, vaddr, sz, cacheop);
+ __cache_line_loop(paddr, vaddr, sz, op);
- __after_dc_op(cacheop, ctrl_reg);
+ __after_dc_op(op);
local_irq_restore(flags);
}
#else
-#define __dc_entire_op(cacheop)
-#define __dc_line_op(paddr, vaddr, sz, cacheop)
-#define __dc_line_op_k(paddr, sz, cacheop)
+#define __dc_entire_op(op)
+#define __dc_line_op(paddr, vaddr, sz, op)
+#define __dc_line_op_k(paddr, sz, op)
#endif /* CONFIG_ARC_HAS_DCACHE */
-
#ifdef CONFIG_ARC_HAS_ICACHE
-/*
- * I-Cache Aliasing in ARC700 VIPT caches
- *
- * ARC VIPT I-cache uses vaddr to index into cache and paddr to match the tag.
- * The orig Cache Management Module "CDU" only required paddr to invalidate a
- * certain line since it sufficed as index in Non-Aliasing VIPT cache-geometry.
- * Infact for distinct V1,V2,P: all of {V1-P},{V2-P},{P-P} would end up fetching
- * the exact same line.
- *
- * However for larger Caches (way-size > page-size) - i.e. in Aliasing config,
- * paddr alone could not be used to correctly index the cache.
- *
- * ------------------
- * MMU v1/v2 (Fixed Page Size 8k)
- * ------------------
- * The solution was to provide CDU with these additonal vaddr bits. These
- * would be bits [x:13], x would depend on cache-geometry, 13 comes from
- * standard page size of 8k.
- * H/w folks chose [17:13] to be a future safe range, and moreso these 5 bits
- * of vaddr could easily be "stuffed" in the paddr as bits [4:0] since the
- * orig 5 bits of paddr were anyways ignored by CDU line ops, as they
- * represent the offset within cache-line. The adv of using this "clumsy"
- * interface for additional info was no new reg was needed in CDU programming
- * model.
- *
- * 17:13 represented the max num of bits passable, actual bits needed were
- * fewer, based on the num-of-aliases possible.
- * -for 2 alias possibility, only bit 13 needed (32K cache)
- * -for 4 alias possibility, bits 14:13 needed (64K cache)
- *
- * ------------------
- * MMU v3
- * ------------------
- * This ver of MMU supports variable page sizes (1k-16k): although Linux will
- * only support 8k (default), 16k and 4k.
- * However from hardware perspective, smaller page sizes aggrevate aliasing
- * meaning more vaddr bits needed to disambiguate the cache-line-op ;
- * the existing scheme of piggybacking won't work for certain configurations.
- * Two new registers IC_PTAG and DC_PTAG inttoduced.
- * "tag" bits are provided in PTAG, index bits in existing IVIL/IVDL/FLDL regs
- */
-
-/***********************************************************
- * Machine specific helper for per line I-Cache invalidate.
- */
-
static inline void __ic_entire_inv(void)
{
write_aux_reg(ARC_REG_IC_IVIC, 1);
@@ -410,7 +422,7 @@ __ic_line_inv_vaddr_local(unsigned long paddr, unsigned long vaddr,
unsigned long flags;
local_irq_save(flags);
- __cache_line_loop(paddr, vaddr, sz, OP_INV_IC);
+ (*_cache_line_loop_ic_fn)(paddr, vaddr, sz, OP_INV_IC);
local_irq_restore(flags);
}
@@ -453,6 +465,53 @@ static void __ic_line_inv_vaddr(unsigned long paddr, unsigned long vaddr,
#endif /* CONFIG_ARC_HAS_ICACHE */
+noinline void slc_op(unsigned long paddr, unsigned long sz, const int op)
+{
+#ifdef CONFIG_ISA_ARCV2
+ unsigned long flags;
+ unsigned int ctrl;
+
+ local_irq_save(flags);
+
+ /*
+ * The Region Flush operation is specified by CTRL.RGN_OP[11..9]
+ * - b'000 (default) is Flush,
+ * - b'001 is Invalidate if CTRL.IM == 0
+ * - b'001 is Flush-n-Invalidate if CTRL.IM == 1
+ */
+ ctrl = read_aux_reg(ARC_REG_SLC_CTRL);
+
+ /* Don't rely on default value of IM bit */
+ if (!(op & OP_FLUSH)) /* i.e. OP_INV */
+ ctrl &= ~SLC_CTRL_IM; /* clear IM: Disable flush before Inv */
+ else
+ ctrl |= SLC_CTRL_IM;
+
+ if (op & OP_INV)
+ ctrl |= SLC_CTRL_RGN_OP_INV; /* Inv or flush-n-inv */
+ else
+ ctrl &= ~SLC_CTRL_RGN_OP_INV;
+
+ write_aux_reg(ARC_REG_SLC_CTRL, ctrl);
+
+ /*
+ * Lower bits are ignored, no need to clip
+ * END needs to be setup before START (latter triggers the operation)
+ * END can't be same as START, so add (l2_line_sz - 1) to sz
+ */
+ write_aux_reg(ARC_REG_SLC_RGN_END, (paddr + sz + l2_line_sz - 1));
+ write_aux_reg(ARC_REG_SLC_RGN_START, paddr);
+
+ while (read_aux_reg(ARC_REG_SLC_CTRL) & SLC_CTRL_BUSY);
+
+ local_irq_restore(flags);
+#endif
+}
+
+static inline int need_slc_flush(void)
+{
+ return is_isa_arcv2() && l2_line_sz;
+}
/***********************************************************
* Exported APIs
@@ -493,7 +552,7 @@ void flush_dcache_page(struct page *page)
} else if (page_mapped(page)) {
/* kernel reading from page with U-mapping */
- void *paddr = page_address(page);
+ unsigned long paddr = (unsigned long)page_address(page);
unsigned long vaddr = page->index << PAGE_CACHE_SHIFT;
if (addr_not_cache_congruent(paddr, vaddr))
@@ -502,22 +561,30 @@ void flush_dcache_page(struct page *page)
}
EXPORT_SYMBOL(flush_dcache_page);
-
void dma_cache_wback_inv(unsigned long start, unsigned long sz)
{
__dc_line_op_k(start, sz, OP_FLUSH_N_INV);
+
+ if (need_slc_flush())
+ slc_op(start, sz, OP_FLUSH_N_INV);
}
EXPORT_SYMBOL(dma_cache_wback_inv);
void dma_cache_inv(unsigned long start, unsigned long sz)
{
__dc_line_op_k(start, sz, OP_INV);
+
+ if (need_slc_flush())
+ slc_op(start, sz, OP_INV);
}
EXPORT_SYMBOL(dma_cache_inv);
void dma_cache_wback(unsigned long start, unsigned long sz)
{
__dc_line_op_k(start, sz, OP_FLUSH);
+
+ if (need_slc_flush())
+ slc_op(start, sz, OP_FLUSH);
}
EXPORT_SYMBOL(dma_cache_wback);
@@ -605,7 +672,7 @@ void __inv_icache_page(unsigned long paddr, unsigned long vaddr)
* wrapper to clearout kernel or userspace mappings of a page
* For kernel mappings @vaddr == @paddr
*/
-void ___flush_dcache_page(unsigned long paddr, unsigned long vaddr)
+void __flush_dcache_page(unsigned long paddr, unsigned long vaddr)
{
__dc_line_op(paddr, vaddr & PAGE_MASK, PAGE_SIZE, OP_FLUSH_N_INV);
}
@@ -637,7 +704,7 @@ void flush_cache_page(struct vm_area_struct *vma, unsigned long u_vaddr,
u_vaddr &= PAGE_MASK;
- ___flush_dcache_page(paddr, u_vaddr);
+ __flush_dcache_page(paddr, u_vaddr);
if (vma->vm_flags & VM_EXEC)
__inv_icache_page(paddr, u_vaddr);
@@ -663,8 +730,8 @@ void flush_anon_page(struct vm_area_struct *vma, struct page *page,
void copy_user_highpage(struct page *to, struct page *from,
unsigned long u_vaddr, struct vm_area_struct *vma)
{
- void *kfrom = page_address(from);
- void *kto = page_address(to);
+ unsigned long kfrom = (unsigned long)page_address(from);
+ unsigned long kto = (unsigned long)page_address(to);
int clean_src_k_mappings = 0;
/*
@@ -680,7 +747,7 @@ void copy_user_highpage(struct page *to, struct page *from,
clean_src_k_mappings = 1;
}
- copy_page(kto, kfrom);
+ copy_page((void *)kto, (void *)kfrom);
/*
* Mark DST page K-mapping as dirty for a later finalization by
@@ -721,3 +788,56 @@ SYSCALL_DEFINE3(cacheflush, uint32_t, start, uint32_t, sz, uint32_t, flags)
flush_cache_all();
return 0;
}
+
+void arc_cache_init(void)
+{
+ unsigned int __maybe_unused cpu = smp_processor_id();
+ char str[256];
+
+ printk(arc_cache_mumbojumbo(0, str, sizeof(str)));
+
+ if (IS_ENABLED(CONFIG_ARC_HAS_ICACHE)) {
+ struct cpuinfo_arc_cache *ic = &cpuinfo_arc700[cpu].icache;
+
+ if (!ic->ver)
+ panic("cache support enabled but non-existent cache\n");
+
+ if (ic->line_len != L1_CACHE_BYTES)
+ panic("ICache line [%d] != kernel Config [%d]",
+ ic->line_len, L1_CACHE_BYTES);
+
+ if (ic->ver != CONFIG_ARC_MMU_VER)
+ panic("Cache ver [%d] doesn't match MMU ver [%d]\n",
+ ic->ver, CONFIG_ARC_MMU_VER);
+
+ /*
+ * In MMU v4 (HS38x) the alising icache config uses IVIL/PTAG
+ * pair to provide vaddr/paddr respectively, just as in MMU v3
+ */
+ if (is_isa_arcv2() && ic->alias)
+ _cache_line_loop_ic_fn = __cache_line_loop_v3;
+ else
+ _cache_line_loop_ic_fn = __cache_line_loop;
+ }
+
+ if (IS_ENABLED(CONFIG_ARC_HAS_DCACHE)) {
+ struct cpuinfo_arc_cache *dc = &cpuinfo_arc700[cpu].dcache;
+
+ if (!dc->ver)
+ panic("cache support enabled but non-existent cache\n");
+
+ if (dc->line_len != L1_CACHE_BYTES)
+ panic("DCache line [%d] != kernel Config [%d]",
+ dc->line_len, L1_CACHE_BYTES);
+
+ /* check for D-Cache aliasing on ARCompact: ARCv2 has PIPT */
+ if (is_isa_arcompact()) {
+ int handled = IS_ENABLED(CONFIG_ARC_CACHE_VIPT_ALIASING);
+
+ if (dc->alias && !handled)
+ panic("Enable CONFIG_ARC_CACHE_VIPT_ALIASING\n");
+ else if (!dc->alias && handled)
+ panic("Disable CONFIG_ARC_CACHE_VIPT_ALIASING\n");
+ }
+ }
+}
diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c
index 12cc6485b218..74a637a1cfc4 100644
--- a/arch/arc/mm/dma.c
+++ b/arch/arc/mm/dma.c
@@ -14,8 +14,6 @@
* Cache bit off in the TLB entry.
*
* The default DMA address == Phy address which is 0x8000_0000 based.
- * A platform/device can make it zero based, by over-riding
- * plat_{dma,kernel}_addr_to_{kernel,dma}
*/
#include <linux/dma-mapping.h>
@@ -37,7 +35,7 @@ void *dma_alloc_noncoherent(struct device *dev, size_t size,
return NULL;
/* This is bus address, platform dependent */
- *dma_handle = plat_kernel_addr_to_dma(dev, paddr);
+ *dma_handle = (dma_addr_t)paddr;
return paddr;
}
@@ -46,8 +44,7 @@ EXPORT_SYMBOL(dma_alloc_noncoherent);
void dma_free_noncoherent(struct device *dev, size_t size, void *vaddr,
dma_addr_t dma_handle)
{
- free_pages_exact((void *)plat_dma_addr_to_kernel(dev, dma_handle),
- size);
+ free_pages_exact((void *)dma_handle, size);
}
EXPORT_SYMBOL(dma_free_noncoherent);
@@ -67,7 +64,19 @@ void *dma_alloc_coherent(struct device *dev, size_t size,
memset(kvaddr, 0, size);
/* This is bus address, platform dependent */
- *dma_handle = plat_kernel_addr_to_dma(dev, paddr);
+ *dma_handle = (dma_addr_t)paddr;
+
+ /*
+ * Evict any existing L1 and/or L2 lines for the backing page
+ * in case it was used earlier as a normal "cached" page.
+ * Yeah this bit us - STAR 9000898266
+ *
+ * Although core does call flush_cache_vmap(), it gets kvaddr hence
+ * can't be used to efficiently flush L1 and/or L2 which need paddr
+ * Currently flush_cache_vmap nukes the L1 cache completely which
+ * will be optimized as a separate commit
+ */
+ dma_cache_wback_inv((unsigned long)paddr, size);
return kvaddr;
}
@@ -78,8 +87,7 @@ void dma_free_coherent(struct device *dev, size_t size, void *kvaddr,
{
iounmap((void __force __iomem *)kvaddr);
- free_pages_exact((void *)plat_dma_addr_to_kernel(dev, dma_handle),
- size);
+ free_pages_exact((void *)dma_handle, size);
}
EXPORT_SYMBOL(dma_free_coherent);
diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index 7f47d2a56f44..2c7ce8bb7475 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -113,6 +113,8 @@ static inline void __tlb_entry_erase(void)
write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite);
}
+#if (CONFIG_ARC_MMU_VER < 4)
+
static inline unsigned int tlb_entry_lkup(unsigned long vaddr_n_asid)
{
unsigned int idx;
@@ -210,6 +212,28 @@ static void tlb_entry_insert(unsigned int pd0, unsigned int pd1)
write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite);
}
+#else /* CONFIG_ARC_MMU_VER >= 4) */
+
+static void utlb_invalidate(void)
+{
+ /* No need since uTLB is always in sync with JTLB */
+}
+
+static void tlb_entry_erase(unsigned int vaddr_n_asid)
+{
+ write_aux_reg(ARC_REG_TLBPD0, vaddr_n_asid | _PAGE_PRESENT);
+ write_aux_reg(ARC_REG_TLBCOMMAND, TLBDeleteEntry);
+}
+
+static void tlb_entry_insert(unsigned int pd0, unsigned int pd1)
+{
+ write_aux_reg(ARC_REG_TLBPD0, pd0);
+ write_aux_reg(ARC_REG_TLBPD1, pd1);
+ write_aux_reg(ARC_REG_TLBCOMMAND, TLBInsertEntry);
+}
+
+#endif
+
/*
* Un-conditionally (without lookup) erase the entire MMU contents
*/
@@ -582,23 +606,42 @@ void read_decode_mmu_bcr(void)
#endif
} *mmu3;
+ struct bcr_mmu_4 {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+ unsigned int ver:8, sasid:1, sz1:4, sz0:4, res:2, pae:1,
+ n_ways:2, n_entry:2, n_super:2, u_itlb:3, u_dtlb:3;
+#else
+ /* DTLB ITLB JES JE JA */
+ unsigned int u_dtlb:3, u_itlb:3, n_super:2, n_entry:2, n_ways:2,
+ pae:1, res:2, sz0:4, sz1:4, sasid:1, ver:8;
+#endif
+ } *mmu4;
+
tmp = read_aux_reg(ARC_REG_MMU_BCR);
mmu->ver = (tmp >> 24);
if (mmu->ver <= 2) {
mmu2 = (struct bcr_mmu_1_2 *)&tmp;
- mmu->pg_sz = PAGE_SIZE;
+ mmu->pg_sz_k = TO_KB(PAGE_SIZE);
mmu->sets = 1 << mmu2->sets;
mmu->ways = 1 << mmu2->ways;
mmu->u_dtlb = mmu2->u_dtlb;
mmu->u_itlb = mmu2->u_itlb;
- } else {
+ } else if (mmu->ver == 3) {
mmu3 = (struct bcr_mmu_3 *)&tmp;
- mmu->pg_sz = 512 << mmu3->pg_sz;
+ mmu->pg_sz_k = 1 << (mmu3->pg_sz - 1);
mmu->sets = 1 << mmu3->sets;
mmu->ways = 1 << mmu3->ways;
mmu->u_dtlb = mmu3->u_dtlb;
mmu->u_itlb = mmu3->u_itlb;
+ } else {
+ mmu4 = (struct bcr_mmu_4 *)&tmp;
+ mmu->pg_sz_k = 1 << (mmu4->sz0 - 1);
+ mmu->s_pg_sz_m = 1 << (mmu4->sz1 - 11);
+ mmu->sets = 64 << mmu4->n_entry;
+ mmu->ways = mmu4->n_ways * 2;
+ mmu->u_dtlb = mmu4->u_dtlb * 4;
+ mmu->u_itlb = mmu4->u_itlb * 4;
}
mmu->num_tlb = mmu->sets * mmu->ways;
@@ -608,10 +651,15 @@ char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len)
{
int n = 0;
struct cpuinfo_arc_mmu *p_mmu = &cpuinfo_arc700[cpu_id].mmu;
+ char super_pg[64] = "";
+
+ if (p_mmu->s_pg_sz_m)
+ scnprintf(super_pg, 64, "%dM Super Page%s, ",
+ p_mmu->s_pg_sz_m, " (not used)");
n += scnprintf(buf + n, len - n,
- "MMU [v%x]\t: %dk PAGE, JTLB %d (%dx%d), uDTLB %d, uITLB %d %s\n",
- p_mmu->ver, TO_KB(p_mmu->pg_sz),
+ "MMU [v%x]\t: %dk PAGE, %sJTLB %d (%dx%d), uDTLB %d, uITLB %d %s\n",
+ p_mmu->ver, p_mmu->pg_sz_k, super_pg,
p_mmu->num_tlb, p_mmu->sets, p_mmu->ways,
p_mmu->u_dtlb, p_mmu->u_itlb,
IS_ENABLED(CONFIG_ARC_MMU_SASID) ? ",SASID" : "");
@@ -639,7 +687,7 @@ void arc_mmu_init(void)
mmu->ver, CONFIG_ARC_MMU_VER);
}
- if (mmu->pg_sz != PAGE_SIZE)
+ if (mmu->pg_sz_k != TO_KB(PAGE_SIZE))
panic("MMU pg size != PAGE_SIZE (%luk)\n", TO_KB(PAGE_SIZE));
/* Enable the MMU */
diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S
index d572f1c2c724..f6f4c3cb505d 100644
--- a/arch/arc/mm/tlbex.S
+++ b/arch/arc/mm/tlbex.S
@@ -35,8 +35,6 @@
* Rahul Trivedi, Amit Bhor: Codito Technologies 2004
*/
- .cpu A7
-
#include <linux/linkage.h>
#include <asm/entry.h>
#include <asm/mmu.h>
@@ -46,6 +44,7 @@
#include <asm/processor.h>
#include <asm/tlb-mmu1.h>
+#ifdef CONFIG_ISA_ARCOMPACT
;-----------------------------------------------------------------
; ARC700 Exception Handling doesn't auto-switch stack and it only provides
; ONE scratch AUX reg "ARC_REG_SCRATCH_DATA0"
@@ -123,6 +122,24 @@ ex_saved_reg1:
#endif
.endm
+#else /* ARCv2 */
+
+.macro TLBMISS_FREEUP_REGS
+ PUSH r0
+ PUSH r1
+ PUSH r2
+ PUSH r3
+.endm
+
+.macro TLBMISS_RESTORE_REGS
+ POP r3
+ POP r2
+ POP r1
+ POP r0
+.endm
+
+#endif
+
;============================================================================
; Troubleshooting Stuff
;============================================================================
@@ -241,6 +258,7 @@ ex_saved_reg1:
; Commit the TLB entry into MMU
.macro COMMIT_ENTRY_TO_MMU
+#if (CONFIG_ARC_MMU_VER < 4)
/* Get free TLB slot: Set = computed from vaddr, way = random */
sr TLBGetIndex, [ARC_REG_TLBCOMMAND]
@@ -251,6 +269,10 @@ ex_saved_reg1:
#else
sr TLBWrite, [ARC_REG_TLBCOMMAND]
#endif
+
+#else
+ sr TLBInsertEntry, [ARC_REG_TLBCOMMAND]
+#endif
.endm
@@ -291,6 +313,7 @@ ENTRY(EV_TLBMissI)
CONV_PTE_TO_TLB
COMMIT_ENTRY_TO_MMU
TLBMISS_RESTORE_REGS
+EV_TLBMissI_fast_ret: ; additional label for VDK OS-kit instrumentation
rtie
END(EV_TLBMissI)
@@ -356,6 +379,7 @@ ENTRY(EV_TLBMissD)
COMMIT_ENTRY_TO_MMU
TLBMISS_RESTORE_REGS
+EV_TLBMissD_fast_ret: ; additional label for VDK OS-kit instrumentation
rtie
;-------- Common routine to call Linux Page Fault Handler -----------
@@ -366,19 +390,5 @@ do_slow_path_pf:
; Slow path TLB Miss handled as a regular ARC Exception
; (stack switching / save the complete reg-file).
- EXCEPTION_PROLOGUE
-
- ; ------- setup args for Linux Page fault Hanlder ---------
- mov_s r1, sp
- lr r0, [efa]
-
- ; We don't want exceptions to be disabled while the fault is handled.
- ; Now that we have saved the context we return from exception hence
- ; exceptions get re-enable
-
- FAKE_RET_FROM_EXCPN r9
-
- bl do_page_fault
- b ret_from_exception
-
+ b call_do_page_fault
END(EV_TLBMissD)
diff --git a/arch/arc/plat-arcfpga/Kconfig b/arch/arc/plat-arcfpga/Kconfig
deleted file mode 100644
index 217593a70751..000000000000
--- a/arch/arc/plat-arcfpga/Kconfig
+++ /dev/null
@@ -1,33 +0,0 @@
-#
-# Copyright (C) 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2 as
-# published by the Free Software Foundation.
-#
-
-menuconfig ARC_PLAT_FPGA_LEGACY
- bool "\"Legacy\" ARC FPGA dev Boards"
- select ARC_HAS_COH_CACHES if SMP
- help
- Support for ARC development boards, provided by Synopsys.
- These are based on FPGA or ISS. e.g.
- - ARCAngel4
- - ML509
- - MetaWare ISS
-
-if ARC_PLAT_FPGA_LEGACY
-
-config ISS_SMP_EXTN
- bool "ARC SMP Extensions (ISS Models only)"
- default n
- depends on SMP
- help
- SMP Extensions to ARC700, in a "simulation only" Model, supported in
- ARC ISS (Instruction Set Simulator).
- The SMP extensions include:
- -IDU (Interrupt Distribution Unit)
- -XTL (To enable CPU start/stop/set-PC for another CPU)
- It doesn't provide coherent Caches and/or Atomic Ops (LLOCK/SCOND)
-
-endif
diff --git a/arch/arc/plat-arcfpga/include/plat/smp.h b/arch/arc/plat-arcfpga/include/plat/smp.h
deleted file mode 100644
index c09eb4cfc77c..000000000000
--- a/arch/arc/plat-arcfpga/include/plat/smp.h
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Rajeshwar Ranga: Interrupt Distribution Unit API's
- */
-
-#ifndef __PLAT_ARCFPGA_SMP_H
-#define __PLAT_ARCFPGA_SMP_H
-
-#ifdef CONFIG_SMP
-
-#include <linux/types.h>
-#include <asm/arcregs.h>
-
-#define ARC_AUX_IDU_REG_CMD 0x2000
-#define ARC_AUX_IDU_REG_PARAM 0x2001
-
-#define ARC_AUX_XTL_REG_CMD 0x2002
-#define ARC_AUX_XTL_REG_PARAM 0x2003
-
-#define ARC_REG_MP_BCR 0x2021
-
-#define ARC_XTL_CMD_WRITE_PC 0x04
-#define ARC_XTL_CMD_CLEAR_HALT 0x02
-
-/*
- * Build Configuration Register which identifies the sub-components
- */
-struct bcr_mp {
-#ifdef CONFIG_CPU_BIG_ENDIAN
- unsigned int mp_arch:16, pad:5, sdu:1, idu:1, scu:1, ver:8;
-#else
- unsigned int ver:8, scu:1, idu:1, sdu:1, pad:5, mp_arch:16;
-#endif
-};
-
-/* IDU supports 256 common interrupts */
-#define NR_IDU_IRQS 256
-
-/*
- * The Aux Regs layout is same bit-by-bit in both BE/LE modes.
- * However when casted as a bitfield encoded "C" struct, gcc treats it as
- * memory, generating different code for BE/LE, requiring strcture adj (see
- * include/asm/arcregs.h)
- *
- * However when manually "carving" the value for a Aux, no special handling
- * of BE is needed because of the property discribed above
- */
-#define IDU_SET_COMMAND(irq, cmd) \
-do { \
- uint32_t __val; \
- __val = (((irq & 0xFF) << 8) | (cmd & 0xFF)); \
- write_aux_reg(ARC_AUX_IDU_REG_CMD, __val); \
-} while (0)
-
-#define IDU_SET_PARAM(par) write_aux_reg(ARC_AUX_IDU_REG_PARAM, par)
-#define IDU_GET_PARAM() read_aux_reg(ARC_AUX_IDU_REG_PARAM)
-
-/* IDU Commands */
-#define IDU_DISABLE 0x00
-#define IDU_ENABLE 0x01
-#define IDU_IRQ_CLEAR 0x02
-#define IDU_IRQ_ASSERT 0x03
-#define IDU_IRQ_WMODE 0x04
-#define IDU_IRQ_STATUS 0x05
-#define IDU_IRQ_ACK 0x06
-#define IDU_IRQ_PEND 0x07
-#define IDU_IRQ_RMODE 0x08
-#define IDU_IRQ_WBITMASK 0x09
-#define IDU_IRQ_RBITMASK 0x0A
-
-#define idu_enable() IDU_SET_COMMAND(0, IDU_ENABLE)
-#define idu_disable() IDU_SET_COMMAND(0, IDU_DISABLE)
-
-#define idu_irq_assert(irq) IDU_SET_COMMAND((irq), IDU_IRQ_ASSERT)
-#define idu_irq_clear(irq) IDU_SET_COMMAND((irq), IDU_IRQ_CLEAR)
-
-/* IDU Interrupt Mode - Destination Encoding */
-#define IDU_IRQ_MOD_DISABLE 0x00
-#define IDU_IRQ_MOD_ROUND_RECP 0x01
-#define IDU_IRQ_MOD_TCPU_FIRSTRECP 0x02
-#define IDU_IRQ_MOD_TCPU_ALLRECP 0x03
-
-/* IDU Interrupt Mode - Triggering Mode */
-#define IDU_IRQ_MODE_LEVEL_TRIG 0x00
-#define IDU_IRQ_MODE_PULSE_TRIG 0x01
-
-#define IDU_IRQ_MODE_PARAM(dest_mode, trig_mode) \
- (((trig_mode & 0x01) << 15) | (dest_mode & 0xFF))
-
-struct idu_irq_config {
- uint8_t irq;
- uint8_t dest_mode;
- uint8_t trig_mode;
-};
-
-struct idu_irq_status {
- uint8_t irq;
- bool enabled;
- bool status;
- bool ack;
- bool pend;
- uint8_t next_rr;
-};
-
-extern void idu_irq_set_tgtcpu(uint8_t irq, uint32_t mask);
-extern void idu_irq_set_mode(uint8_t irq, uint8_t dest_mode, uint8_t trig_mode);
-
-extern void iss_model_init_smp(unsigned int cpu);
-extern void iss_model_init_early_smp(void);
-
-#endif /* CONFIG_SMP */
-
-#endif
diff --git a/arch/arc/plat-arcfpga/smp.c b/arch/arc/plat-arcfpga/smp.c
deleted file mode 100644
index 64797ba3bbe3..000000000000
--- a/arch/arc/plat-arcfpga/smp.c
+++ /dev/null
@@ -1,186 +0,0 @@
-/*
- * ARC700 Simulation-only Extensions for SMP
- *
- * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Vineet Gupta - 2012 : split off arch common and plat specific SMP
- * Rajeshwar Ranga - 2007 : Interrupt Distribution Unit API's
- */
-
-#include <linux/smp.h>
-#include <linux/irq.h>
-#include <plat/smp.h>
-
-#define IDU_INTERRUPT_0 16
-
-static char smp_cpuinfo_buf[128];
-
-/*
- *-------------------------------------------------------------------
- * Platform specific callbacks expected by arch SMP code
- *-------------------------------------------------------------------
- */
-
-/*
- * Master kick starting another CPU
- */
-static void iss_model_smp_wakeup_cpu(int cpu, unsigned long pc)
-{
- /* setup the start PC */
- write_aux_reg(ARC_AUX_XTL_REG_PARAM, pc);
-
- /* Trigger WRITE_PC cmd for this cpu */
- write_aux_reg(ARC_AUX_XTL_REG_CMD,
- (ARC_XTL_CMD_WRITE_PC | (cpu << 8)));
-
- /* Take the cpu out of Halt */
- write_aux_reg(ARC_AUX_XTL_REG_CMD,
- (ARC_XTL_CMD_CLEAR_HALT | (cpu << 8)));
-
-}
-
-static inline int get_hw_config_num_irq(void)
-{
- uint32_t val = read_aux_reg(ARC_REG_VECBASE_BCR);
-
- switch (val & 0x03) {
- case 0:
- return 16;
- case 1:
- return 32;
- case 2:
- return 8;
- default:
- return 0;
- }
-
- return 0;
-}
-
-/*
- * Any SMP specific init any CPU does when it comes up.
- * Here we setup the CPU to enable Inter-Processor-Interrupts
- * Called for each CPU
- * -Master : init_IRQ()
- * -Other(s) : start_kernel_secondary()
- */
-void iss_model_init_smp(unsigned int cpu)
-{
- /* Check if CPU is configured for more than 16 interrupts */
- if (NR_IRQS <= 16 || get_hw_config_num_irq() <= 16)
- panic("[arcfpga] IRQ system can't support IDU IPI\n");
-
- idu_disable();
-
- /****************************************************************
- * IDU provides a set of Common IRQs, each of which can be dynamically
- * attached to (1|many|all) CPUs.
- * The Common IRQs [0-15] are mapped as CPU pvt [16-31]
- *
- * Here we use a simple 1:1 mapping:
- * A CPU 'x' is wired to Common IRQ 'x'.
- * So an IDU ASSERT on IRQ 'x' will trigger Interupt on CPU 'x', which
- * makes up for our simple IPI plumbing.
- *
- * TBD: Have a dedicated multicast IRQ for sending IPIs to all CPUs
- * w/o having to do one-at-a-time
- ******************************************************************/
-
- /*
- * Claim an IRQ which would trigger IPI on this CPU.
- * In IDU parlance it involves setting up a cpu bitmask for the IRQ
- * The bitmap here contains only 1 CPU (self).
- */
- idu_irq_set_tgtcpu(cpu, 0x1 << cpu);
-
- /* Set the IRQ destination to use the bitmask above */
- idu_irq_set_mode(cpu, 7, /* XXX: IDU_IRQ_MOD_TCPU_ALLRECP: ISS bug */
- IDU_IRQ_MODE_PULSE_TRIG);
-
- idu_enable();
-
- /* Attach the arch-common IPI ISR to our IDU IRQ */
- smp_ipi_irq_setup(cpu, IDU_INTERRUPT_0 + cpu);
-}
-
-static void iss_model_ipi_send(int cpu)
-{
- idu_irq_assert(cpu);
-}
-
-static void iss_model_ipi_clear(int irq)
-{
- idu_irq_clear(IDU_INTERRUPT_0 + smp_processor_id());
-}
-
-void iss_model_init_early_smp(void)
-{
-#define IS_AVAIL1(var, str) ((var) ? str : "")
-
- struct bcr_mp mp;
-
- READ_BCR(ARC_REG_MP_BCR, mp);
-
- sprintf(smp_cpuinfo_buf, "Extn [ISS-SMP]: v%d, arch(%d) %s %s %s\n",
- mp.ver, mp.mp_arch, IS_AVAIL1(mp.scu, "SCU"),
- IS_AVAIL1(mp.idu, "IDU"), IS_AVAIL1(mp.sdu, "SDU"));
-
- plat_smp_ops.info = smp_cpuinfo_buf;
-
- plat_smp_ops.cpu_kick = iss_model_smp_wakeup_cpu;
- plat_smp_ops.ipi_send = iss_model_ipi_send;
- plat_smp_ops.ipi_clear = iss_model_ipi_clear;
-}
-
-/*
- *-------------------------------------------------------------------
- * Low level Platform IPI Providers
- *-------------------------------------------------------------------
- */
-
-/* Set the Mode for the Common IRQ */
-void idu_irq_set_mode(uint8_t irq, uint8_t dest_mode, uint8_t trig_mode)
-{
- uint32_t par = IDU_IRQ_MODE_PARAM(dest_mode, trig_mode);
-
- IDU_SET_PARAM(par);
- IDU_SET_COMMAND(irq, IDU_IRQ_WMODE);
-}
-
-/* Set the target cpu Bitmask for Common IRQ */
-void idu_irq_set_tgtcpu(uint8_t irq, uint32_t mask)
-{
- IDU_SET_PARAM(mask);
- IDU_SET_COMMAND(irq, IDU_IRQ_WBITMASK);
-}
-
-/* Get the Interrupt Acknowledged status for IRQ (as CPU Bitmask) */
-bool idu_irq_get_ack(uint8_t irq)
-{
- uint32_t val;
-
- IDU_SET_COMMAND(irq, IDU_IRQ_ACK);
- val = IDU_GET_PARAM();
-
- return val & (1 << irq);
-}
-
-/*
- * Get the Interrupt Pending status for IRQ (as CPU Bitmask)
- * -Pending means CPU has not yet noticed the IRQ (e.g. disabled)
- * -After Interrupt has been taken, the IPI expcitily needs to be
- * cleared, to be acknowledged.
- */
-bool idu_irq_get_pend(uint8_t irq)
-{
- uint32_t val;
-
- IDU_SET_COMMAND(irq, IDU_IRQ_PEND);
- val = IDU_GET_PARAM();
-
- return val & (1 << irq);
-}
diff --git a/arch/arc/plat-axs10x/Kconfig b/arch/arc/plat-axs10x/Kconfig
new file mode 100644
index 000000000000..d475f9d4847c
--- /dev/null
+++ b/arch/arc/plat-axs10x/Kconfig
@@ -0,0 +1,46 @@
+#
+# Copyright (C) 2013-15 Synopsys, Inc. (www.synopsys.com)
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+#
+
+menuconfig ARC_PLAT_AXS10X
+ bool "Synopsys ARC AXS10x Software Development Platforms"
+ select DW_APB_ICTL
+ select GPIO_DWAPB
+ select OF_GPIO
+ select GENERIC_IRQ_CHIP
+ select ARCH_REQUIRE_GPIOLIB
+ help
+ Support for the ARC AXS10x Software Development Platforms.
+
+ The AXS10x Platforms consist of a mainboard with peripherals,
+ on which several daughter cards can be placed. The daughter cards
+ typically contain a CPU and memory.
+
+if ARC_PLAT_AXS10X
+
+config AXS101
+ depends on ISA_ARCOMPACT
+ bool "AXS101 with AXC001 CPU Card (ARC 770D/EM6/AS221)"
+ help
+ This adds support for the 770D/EM6/AS221 CPU Card. Only the ARC
+ 770D is supported in Linux.
+
+ The AXS101 Platform consists of an AXS10x mainboard with
+ this daughtercard. Please use the axs101.dts device tree
+ with this configuration.
+
+config AXS103
+ bool "AXS103 with AXC003 CPU Card (ARC HS38x)"
+ depends on ISA_ARCV2
+ help
+ This adds support for the HS38x CPU Card.
+
+ The AXS103 Platform consists of an AXS10x mainboard with
+ this daughtercard. Please use the axs103.dts device tree
+ with this configuration.
+
+endif
diff --git a/arch/arc/plat-axs10x/Makefile b/arch/arc/plat-axs10x/Makefile
new file mode 100644
index 000000000000..d4748f27f86e
--- /dev/null
+++ b/arch/arc/plat-axs10x/Makefile
@@ -0,0 +1,9 @@
+#
+# Copyright (C) 2013-15 Synopsys, Inc. (www.synopsys.com)
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+#
+
+obj-$(CONFIG_ARC_PLAT_AXS10X) += axs10x.o
diff --git a/arch/arc/plat-axs10x/axs10x.c b/arch/arc/plat-axs10x/axs10x.c
new file mode 100644
index 000000000000..99f7da513a48
--- /dev/null
+++ b/arch/arc/plat-axs10x/axs10x.c
@@ -0,0 +1,484 @@
+/*
+ * AXS101/AXS103 Software Development Platform
+ *
+ * Copyright (C) 2013-15 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/of_platform.h>
+
+#include <asm/asm-offsets.h>
+#include <asm/clk.h>
+#include <asm/io.h>
+#include <asm/mach_desc.h>
+#include <asm/mcip.h>
+
+#define AXS_MB_CGU 0xE0010000
+#define AXS_MB_CREG 0xE0011000
+
+#define CREG_MB_IRQ_MUX (AXS_MB_CREG + 0x214)
+#define CREG_MB_SW_RESET (AXS_MB_CREG + 0x220)
+#define CREG_MB_VER (AXS_MB_CREG + 0x230)
+#define CREG_MB_CONFIG (AXS_MB_CREG + 0x234)
+
+#define AXC001_CREG 0xF0001000
+#define AXC001_GPIO_INTC 0xF0003000
+
+static void __init axs10x_enable_gpio_intc_wire(void)
+{
+ /*
+ * Peripherals on CPU Card and Mother Board are wired to cpu intc via
+ * intermediate DW APB GPIO blocks (mainly for debouncing)
+ *
+ * ---------------------
+ * | snps,arc700-intc |
+ * ---------------------
+ * | #7 | #15
+ * ------------------- -------------------
+ * | snps,dw-apb-gpio | | snps,dw-apb-gpio |
+ * ------------------- -------------------
+ * | |
+ * | [ Debug UART on cpu card ]
+ * |
+ * ------------------------
+ * | snps,dw-apb-intc (MB)|
+ * ------------------------
+ * | | | |
+ * [eth] [uart] [... other perip on Main Board]
+ *
+ * Current implementation of "irq-dw-apb-ictl" driver doesn't work well
+ * with stacked INTCs. In particular problem happens if its master INTC
+ * not yet instantiated. See discussion here -
+ * https://lkml.org/lkml/2015/3/4/755
+ *
+ * So setup the first gpio block as a passive pass thru and hide it from
+ * DT hardware topology - connect MB intc directly to cpu intc
+ * The GPIO "wire" needs to be init nevertheless (here)
+ *
+ * One side adv is that peripheral interrupt handling avoids one nested
+ * intc ISR hop
+ */
+#define GPIO_INTEN (AXC001_GPIO_INTC + 0x30)
+#define GPIO_INTMASK (AXC001_GPIO_INTC + 0x34)
+#define GPIO_INTTYPE_LEVEL (AXC001_GPIO_INTC + 0x38)
+#define GPIO_INT_POLARITY (AXC001_GPIO_INTC + 0x3c)
+#define MB_TO_GPIO_IRQ 12
+
+ iowrite32(~(1 << MB_TO_GPIO_IRQ), (void __iomem *) GPIO_INTMASK);
+ iowrite32(0, (void __iomem *) GPIO_INTTYPE_LEVEL);
+ iowrite32(~0, (void __iomem *) GPIO_INT_POLARITY);
+ iowrite32(1 << MB_TO_GPIO_IRQ, (void __iomem *) GPIO_INTEN);
+}
+
+static inline void __init
+write_cgu_reg(uint32_t value, void __iomem *reg, void __iomem *lock_reg)
+{
+ unsigned int loops = 128 * 1024, ctr;
+
+ iowrite32(value, reg);
+
+ ctr = loops;
+ while (((ioread32(lock_reg) & 1) == 1) && ctr--) /* wait for unlock */
+ cpu_relax();
+
+ ctr = loops;
+ while (((ioread32(lock_reg) & 1) == 0) && ctr--) /* wait for re-lock */
+ cpu_relax();
+}
+
+static void __init axs10x_print_board_ver(unsigned int creg, const char *str)
+{
+ union ver {
+ struct {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+ unsigned int pad:11, y:12, m:4, d:5;
+#else
+ unsigned int d:5, m:4, y:12, pad:11;
+#endif
+ };
+ unsigned int val;
+ } board;
+
+ board.val = ioread32((void __iomem *)creg);
+ pr_info("AXS: %s FPGA Date: %u-%u-%u\n", str, board.d, board.m,
+ board.y);
+}
+
+static void __init axs10x_early_init(void)
+{
+ int mb_rev;
+ char mb[32];
+
+ /* Determine motherboard version */
+ if (ioread32((void __iomem *) CREG_MB_CONFIG) & (1 << 28))
+ mb_rev = 3; /* HT-3 (rev3.0) */
+ else
+ mb_rev = 2; /* HT-2 (rev2.0) */
+
+ axs10x_enable_gpio_intc_wire();
+
+ scnprintf(mb, 32, "MainBoard v%d", mb_rev);
+ axs10x_print_board_ver(CREG_MB_VER, mb);
+}
+
+#ifdef CONFIG_AXS101
+
+#define CREG_CPU_ADDR_770 (AXC001_CREG + 0x20)
+#define CREG_CPU_ADDR_TUNN (AXC001_CREG + 0x60)
+#define CREG_CPU_ADDR_770_UPD (AXC001_CREG + 0x34)
+#define CREG_CPU_ADDR_TUNN_UPD (AXC001_CREG + 0x74)
+
+#define CREG_CPU_ARC770_IRQ_MUX (AXC001_CREG + 0x114)
+#define CREG_CPU_GPIO_UART_MUX (AXC001_CREG + 0x120)
+
+/*
+ * Set up System Memory Map for ARC cpu / peripherals controllers
+ *
+ * Each AXI master has a 4GB memory map specified as 16 apertures of 256MB, each
+ * of which maps to a corresponding 256MB aperture in Target slave memory map.
+ *
+ * e.g. ARC cpu AXI Master's aperture 8 (0x8000_0000) is mapped to aperture 0
+ * (0x0000_0000) of DDR Port 0 (slave #1)
+ *
+ * Access from cpu to MB controllers such as GMAC is setup using AXI Tunnel:
+ * which has master/slaves on both ends.
+ * e.g. aperture 14 (0xE000_0000) of ARC cpu is mapped to aperture 14
+ * (0xE000_0000) of CPU Card AXI Tunnel slave (slave #3) which is mapped to
+ * MB AXI Tunnel Master, which also has a mem map setup
+ *
+ * In the reverse direction, MB AXI Masters (e.g. GMAC) mem map is setup
+ * to map to MB AXI Tunnel slave which connects to CPU Card AXI Tunnel Master
+ */
+struct aperture {
+ unsigned int slave_sel:4, slave_off:4, pad:24;
+};
+
+/* CPU Card target slaves */
+#define AXC001_SLV_NONE 0
+#define AXC001_SLV_DDR_PORT0 1
+#define AXC001_SLV_SRAM 2
+#define AXC001_SLV_AXI_TUNNEL 3
+#define AXC001_SLV_AXI2APB 6
+#define AXC001_SLV_DDR_PORT1 7
+
+/* MB AXI Target slaves */
+#define AXS_MB_SLV_NONE 0
+#define AXS_MB_SLV_AXI_TUNNEL_CPU 1
+#define AXS_MB_SLV_AXI_TUNNEL_HAPS 2
+#define AXS_MB_SLV_SRAM 3
+#define AXS_MB_SLV_CONTROL 4
+
+/* MB AXI masters */
+#define AXS_MB_MST_TUNNEL_CPU 0
+#define AXS_MB_MST_USB_OHCI 10
+
+/*
+ * memmap for ARC core on CPU Card
+ */
+static const struct aperture axc001_memmap[16] = {
+ {AXC001_SLV_AXI_TUNNEL, 0x0},
+ {AXC001_SLV_AXI_TUNNEL, 0x1},
+ {AXC001_SLV_SRAM, 0x0}, /* 0x2000_0000: Local SRAM */
+ {AXC001_SLV_NONE, 0x0},
+ {AXC001_SLV_NONE, 0x0},
+ {AXC001_SLV_NONE, 0x0},
+ {AXC001_SLV_NONE, 0x0},
+ {AXC001_SLV_NONE, 0x0},
+ {AXC001_SLV_DDR_PORT0, 0x0}, /* 0x8000_0000: DDR 0..256M */
+ {AXC001_SLV_DDR_PORT0, 0x1}, /* 0x9000_0000: DDR 256..512M */
+ {AXC001_SLV_DDR_PORT0, 0x2},
+ {AXC001_SLV_DDR_PORT0, 0x3},
+ {AXC001_SLV_NONE, 0x0},
+ {AXC001_SLV_AXI_TUNNEL, 0xD},
+ {AXC001_SLV_AXI_TUNNEL, 0xE}, /* MB: CREG, CGU... */
+ {AXC001_SLV_AXI2APB, 0x0}, /* CPU Card local CREG, CGU... */
+};
+
+/*
+ * memmap for CPU Card AXI Tunnel Master (for access by MB controllers)
+ * GMAC (MB) -> MB AXI Tunnel slave -> CPU Card AXI Tunnel Master -> DDR
+ */
+static const struct aperture axc001_axi_tunnel_memmap[16] = {
+ {AXC001_SLV_AXI_TUNNEL, 0x0},
+ {AXC001_SLV_AXI_TUNNEL, 0x1},
+ {AXC001_SLV_SRAM, 0x0},
+ {AXC001_SLV_NONE, 0x0},
+ {AXC001_SLV_NONE, 0x0},
+ {AXC001_SLV_NONE, 0x0},
+ {AXC001_SLV_NONE, 0x0},
+ {AXC001_SLV_NONE, 0x0},
+ {AXC001_SLV_DDR_PORT1, 0x0},
+ {AXC001_SLV_DDR_PORT1, 0x1},
+ {AXC001_SLV_DDR_PORT1, 0x2},
+ {AXC001_SLV_DDR_PORT1, 0x3},
+ {AXC001_SLV_NONE, 0x0},
+ {AXC001_SLV_AXI_TUNNEL, 0xD},
+ {AXC001_SLV_AXI_TUNNEL, 0xE},
+ {AXC001_SLV_AXI2APB, 0x0},
+};
+
+/*
+ * memmap for MB AXI Masters
+ * Same mem map for all perip controllers as well as MB AXI Tunnel Master
+ */
+static const struct aperture axs_mb_memmap[16] = {
+ {AXS_MB_SLV_SRAM, 0x0},
+ {AXS_MB_SLV_SRAM, 0x0},
+ {AXS_MB_SLV_NONE, 0x0},
+ {AXS_MB_SLV_NONE, 0x0},
+ {AXS_MB_SLV_NONE, 0x0},
+ {AXS_MB_SLV_NONE, 0x0},
+ {AXS_MB_SLV_NONE, 0x0},
+ {AXS_MB_SLV_NONE, 0x0},
+ {AXS_MB_SLV_AXI_TUNNEL_CPU, 0x8}, /* DDR on CPU Card */
+ {AXS_MB_SLV_AXI_TUNNEL_CPU, 0x9}, /* DDR on CPU Card */
+ {AXS_MB_SLV_AXI_TUNNEL_CPU, 0xA},
+ {AXS_MB_SLV_AXI_TUNNEL_CPU, 0xB},
+ {AXS_MB_SLV_NONE, 0x0},
+ {AXS_MB_SLV_AXI_TUNNEL_HAPS, 0xD},
+ {AXS_MB_SLV_CONTROL, 0x0}, /* MB Local CREG, CGU... */
+ {AXS_MB_SLV_AXI_TUNNEL_CPU, 0xF},
+};
+
+static noinline void __init
+axs101_set_memmap(void __iomem *base, const struct aperture map[16])
+{
+ unsigned int slave_select, slave_offset;
+ int i;
+
+ slave_select = slave_offset = 0;
+ for (i = 0; i < 8; i++) {
+ slave_select |= map[i].slave_sel << (i << 2);
+ slave_offset |= map[i].slave_off << (i << 2);
+ }
+
+ iowrite32(slave_select, base + 0x0); /* SLV0 */
+ iowrite32(slave_offset, base + 0x8); /* OFFSET0 */
+
+ slave_select = slave_offset = 0;
+ for (i = 0; i < 8; i++) {
+ slave_select |= map[i+8].slave_sel << (i << 2);
+ slave_offset |= map[i+8].slave_off << (i << 2);
+ }
+
+ iowrite32(slave_select, base + 0x4); /* SLV1 */
+ iowrite32(slave_offset, base + 0xC); /* OFFSET1 */
+}
+
+static void __init axs101_early_init(void)
+{
+ int i;
+
+ /* ARC 770D memory view */
+ axs101_set_memmap((void __iomem *) CREG_CPU_ADDR_770, axc001_memmap);
+ iowrite32(1, (void __iomem *) CREG_CPU_ADDR_770_UPD);
+
+ /* AXI tunnel memory map (incoming traffic from MB into CPU Card */
+ axs101_set_memmap((void __iomem *) CREG_CPU_ADDR_TUNN,
+ axc001_axi_tunnel_memmap);
+ iowrite32(1, (void __iomem *) CREG_CPU_ADDR_TUNN_UPD);
+
+ /* MB peripherals memory map */
+ for (i = AXS_MB_MST_TUNNEL_CPU; i <= AXS_MB_MST_USB_OHCI; i++)
+ axs101_set_memmap((void __iomem *) AXS_MB_CREG + (i << 4),
+ axs_mb_memmap);
+
+ iowrite32(0x3ff, (void __iomem *) AXS_MB_CREG + 0x100); /* Update */
+
+ /* GPIO pins 18 and 19 are used as UART rx and tx, respectively. */
+ iowrite32(0x01, (void __iomem *) CREG_CPU_GPIO_UART_MUX);
+
+ /* Set up the MB interrupt system: mux interrupts to GPIO7) */
+ iowrite32(0x01, (void __iomem *) CREG_MB_IRQ_MUX);
+
+ /* reset ethernet and ULPI interfaces */
+ iowrite32(0x18, (void __iomem *) CREG_MB_SW_RESET);
+
+ /* map GPIO 14:10 to ARC 9:5 (IRQ mux change for MB v2 onwards) */
+ iowrite32(0x52, (void __iomem *) CREG_CPU_ARC770_IRQ_MUX);
+
+ axs10x_early_init();
+}
+
+#endif /* CONFIG_AXS101 */
+
+#ifdef CONFIG_AXS103
+
+#define AXC003_CGU 0xF0000000
+#define AXC003_CREG 0xF0001000
+#define AXC003_MST_AXI_TUNNEL 0
+#define AXC003_MST_HS38 1
+
+#define CREG_CPU_AXI_M0_IRQ_MUX (AXC003_CREG + 0x440)
+#define CREG_CPU_GPIO_UART_MUX (AXC003_CREG + 0x480)
+#define CREG_CPU_TUN_IO_CTRL (AXC003_CREG + 0x494)
+
+
+union pll_reg {
+ struct {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+ unsigned int pad:17, noupd:1, bypass:1, edge:1, high:6, low:6;
+#else
+ unsigned int low:6, high:6, edge:1, bypass:1, noupd:1, pad:17;
+#endif
+ };
+ unsigned int val;
+};
+
+static unsigned int __init axs103_get_freq(void)
+{
+ union pll_reg idiv, fbdiv, odiv;
+ unsigned int f = 33333333;
+
+ idiv.val = ioread32((void __iomem *)AXC003_CGU + 0x80 + 0);
+ fbdiv.val = ioread32((void __iomem *)AXC003_CGU + 0x80 + 4);
+ odiv.val = ioread32((void __iomem *)AXC003_CGU + 0x80 + 8);
+
+ if (idiv.bypass != 1)
+ f = f / (idiv.low + idiv.high);
+
+ if (fbdiv.bypass != 1)
+ f = f * (fbdiv.low + fbdiv.high);
+
+ if (odiv.bypass != 1)
+ f = f / (odiv.low + odiv.high);
+
+ f = (f + 500000) / 1000000; /* Rounding */
+ return f;
+}
+
+static inline unsigned int __init encode_div(unsigned int id, int upd)
+{
+ union pll_reg div;
+
+ div.val = 0;
+
+ div.noupd = !upd;
+ div.bypass = id == 1 ? 1 : 0;
+ div.edge = (id%2 == 0) ? 0 : 1; /* 0 = rising */
+ div.low = (id%2 == 0) ? id >> 1 : (id >> 1)+1;
+ div.high = id >> 1;
+
+ return div.val;
+}
+
+noinline static void __init
+axs103_set_freq(unsigned int id, unsigned int fd, unsigned int od)
+{
+ write_cgu_reg(encode_div(id, 0),
+ (void __iomem *)AXC003_CGU + 0x80 + 0,
+ (void __iomem *)AXC003_CGU + 0x110);
+
+ write_cgu_reg(encode_div(fd, 0),
+ (void __iomem *)AXC003_CGU + 0x80 + 4,
+ (void __iomem *)AXC003_CGU + 0x110);
+
+ write_cgu_reg(encode_div(od, 1),
+ (void __iomem *)AXC003_CGU + 0x80 + 8,
+ (void __iomem *)AXC003_CGU + 0x110);
+}
+
+static void __init axs103_early_init(void)
+{
+ switch (arc_get_core_freq()/1000000) {
+ case 33:
+ axs103_set_freq(1, 1, 1);
+ break;
+ case 50:
+ axs103_set_freq(1, 30, 20);
+ break;
+ case 75:
+ axs103_set_freq(2, 45, 10);
+ break;
+ case 90:
+ axs103_set_freq(2, 54, 10);
+ break;
+ case 100:
+ axs103_set_freq(1, 30, 10);
+ break;
+ case 125:
+ axs103_set_freq(2, 45, 6);
+ break;
+ default:
+ /*
+ * In this case, core_frequency derived from
+ * DT "clock-frequency" might not match with board value.
+ * Hence update it to match the board value.
+ */
+ arc_set_core_freq(axs103_get_freq() * 1000000);
+ break;
+ }
+
+ pr_info("Freq is %dMHz\n", axs103_get_freq());
+
+ /* Memory maps already config in pre-bootloader */
+
+ /* set GPIO mux to UART */
+ iowrite32(0x01, (void __iomem *) CREG_CPU_GPIO_UART_MUX);
+
+ iowrite32((0x00100000U | 0x000C0000U | 0x00003322U),
+ (void __iomem *) CREG_CPU_TUN_IO_CTRL);
+
+ /* Set up the AXS_MB interrupt system.*/
+ iowrite32(12, (void __iomem *) (CREG_CPU_AXI_M0_IRQ_MUX
+ + (AXC003_MST_HS38 << 2)));
+
+ /* connect ICTL - Main Board with GPIO line */
+ iowrite32(0x01, (void __iomem *) CREG_MB_IRQ_MUX);
+
+ axs10x_print_board_ver(AXC003_CREG + 4088, "AXC003 CPU Card");
+
+ axs10x_early_init();
+
+#ifdef CONFIG_ARC_MCIP
+ /* No Hardware init, but filling the smp ops callbacks */
+ mcip_init_early_smp();
+#endif
+}
+#endif
+
+#ifdef CONFIG_AXS101
+
+static const char *axs101_compat[] __initconst = {
+ "snps,axs101",
+ NULL,
+};
+
+MACHINE_START(AXS101, "axs101")
+ .dt_compat = axs101_compat,
+ .init_early = axs101_early_init,
+MACHINE_END
+
+#endif /* CONFIG_AXS101 */
+
+#ifdef CONFIG_AXS103
+
+static const char *axs103_compat[] __initconst = {
+ "snps,axs103",
+ NULL,
+};
+
+MACHINE_START(AXS103, "axs103")
+ .dt_compat = axs103_compat,
+ .init_early = axs103_early_init,
+#ifdef CONFIG_ARC_MCIP
+ .init_smp = mcip_init_smp,
+#endif
+MACHINE_END
+
+/*
+ * For the VDK OS-kit, to get the offset to pid and command fields
+ */
+char coware_swa_pid_offset[TASK_PID];
+char coware_swa_comm_offset[TASK_COMM];
+
+#endif /* CONFIG_AXS103 */
diff --git a/arch/arc/plat-sim/Kconfig b/arch/arc/plat-sim/Kconfig
new file mode 100644
index 000000000000..18e39fcc488a
--- /dev/null
+++ b/arch/arc/plat-sim/Kconfig
@@ -0,0 +1,14 @@
+#
+# Copyright (C) 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+#
+
+menuconfig ARC_PLAT_SIM
+ bool "ARC nSIM based simulation virtual platforms"
+ select ARC_HAS_COH_CACHES if SMP
+ help
+ Support for nSIM based ARC simulation platforms
+ This includes the standalone nSIM (uart only) vs. System C OSCI VP
diff --git a/arch/arc/plat-arcfpga/Makefile b/arch/arc/plat-sim/Makefile
index 66fd0ecd68b3..00b1a958cec7 100644
--- a/arch/arc/plat-arcfpga/Makefile
+++ b/arch/arc/plat-sim/Makefile
@@ -6,7 +6,4 @@
# published by the Free Software Foundation.
#
-KBUILD_CFLAGS += -Iarch/arc/plat-arcfpga/include
-
obj-y := platform.o
-obj-$(CONFIG_ISS_SMP_EXTN) += smp.o
diff --git a/arch/arc/plat-arcfpga/platform.c b/arch/arc/plat-sim/platform.c
index afc88254acc1..d9e35b4a2f08 100644
--- a/arch/arc/plat-arcfpga/platform.c
+++ b/arch/arc/plat-sim/platform.c
@@ -1,5 +1,5 @@
/*
- * ARC FPGA Platform support code
+ * ARC simulation Platform support code
*
* Copyright (C) 2012 Synopsys, Inc. (www.synopsys.com)
*
@@ -10,7 +10,7 @@
#include <linux/init.h>
#include <asm/mach_desc.h>
-#include <plat/smp.h>
+#include <asm/mcip.h>
/*----------------------- Machine Descriptions ------------------------------
*
@@ -20,26 +20,18 @@
* callback set, by matching the DT compatible name.
*/
-static const char *legacy_fpga_compat[] __initconst = {
- "snps,arc-angel4",
- "snps,arc-ml509",
- NULL,
-};
-
-MACHINE_START(LEGACY_FPGA, "legacy_fpga")
- .dt_compat = legacy_fpga_compat,
-#ifdef CONFIG_ISS_SMP_EXTN
- .init_early = iss_model_init_early_smp,
- .init_smp = iss_model_init_smp,
-#endif
-MACHINE_END
-
static const char *simulation_compat[] __initconst = {
"snps,nsim",
+ "snps,nsim_hs",
"snps,nsimosci",
+ "snps,nsimosci_hs",
NULL,
};
MACHINE_START(SIMULATION, "simulation")
.dt_compat = simulation_compat,
+#ifdef CONFIG_ARC_MCIP
+ .init_early = mcip_init_early_smp,
+ .init_smp = mcip_init_smp,
+#endif
MACHINE_END
diff --git a/arch/arm/boot/dts/armada-370-xp.dtsi b/arch/arm/boot/dts/armada-370-xp.dtsi
index 7f0252c580e4..a718866ba52d 100644
--- a/arch/arm/boot/dts/armada-370-xp.dtsi
+++ b/arch/arm/boot/dts/armada-370-xp.dtsi
@@ -268,7 +268,6 @@
};
eth0: ethernet@70000 {
- compatible = "marvell,armada-370-neta";
reg = <0x70000 0x4000>;
interrupts = <8>;
clocks = <&gateclk 4>;
@@ -284,7 +283,6 @@
};
eth1: ethernet@74000 {
- compatible = "marvell,armada-370-neta";
reg = <0x74000 0x4000>;
interrupts = <10>;
clocks = <&gateclk 3>;
diff --git a/arch/arm/boot/dts/armada-370.dtsi b/arch/arm/boot/dts/armada-370.dtsi
index 3f036bd635f4..53a1a5abe147 100644
--- a/arch/arm/boot/dts/armada-370.dtsi
+++ b/arch/arm/boot/dts/armada-370.dtsi
@@ -311,6 +311,14 @@
dmacap,memset;
};
};
+
+ ethernet@70000 {
+ compatible = "marvell,armada-370-neta";
+ };
+
+ ethernet@74000 {
+ compatible = "marvell,armada-370-neta";
+ };
};
};
};
diff --git a/arch/arm/boot/dts/armada-xp-mv78260.dtsi b/arch/arm/boot/dts/armada-xp-mv78260.dtsi
index 8479fdc9e9c2..c5fdc99f0dbe 100644
--- a/arch/arm/boot/dts/armada-xp-mv78260.dtsi
+++ b/arch/arm/boot/dts/armada-xp-mv78260.dtsi
@@ -318,7 +318,7 @@
};
eth3: ethernet@34000 {
- compatible = "marvell,armada-370-neta";
+ compatible = "marvell,armada-xp-neta";
reg = <0x34000 0x4000>;
interrupts = <14>;
clocks = <&gateclk 1>;
diff --git a/arch/arm/boot/dts/armada-xp-mv78460.dtsi b/arch/arm/boot/dts/armada-xp-mv78460.dtsi
index 661d54c81580..0e24f1a38540 100644
--- a/arch/arm/boot/dts/armada-xp-mv78460.dtsi
+++ b/arch/arm/boot/dts/armada-xp-mv78460.dtsi
@@ -356,7 +356,7 @@
};
eth3: ethernet@34000 {
- compatible = "marvell,armada-370-neta";
+ compatible = "marvell,armada-xp-neta";
reg = <0x34000 0x4000>;
interrupts = <14>;
clocks = <&gateclk 1>;
diff --git a/arch/arm/boot/dts/armada-xp.dtsi b/arch/arm/boot/dts/armada-xp.dtsi
index e78ce4ab6b75..0854d4493da7 100644
--- a/arch/arm/boot/dts/armada-xp.dtsi
+++ b/arch/arm/boot/dts/armada-xp.dtsi
@@ -185,7 +185,7 @@
};
eth2: ethernet@30000 {
- compatible = "marvell,armada-370-neta";
+ compatible = "marvell,armada-xp-neta";
reg = <0x30000 0x4000>;
interrupts = <12>;
clocks = <&gateclk 2>;
@@ -228,6 +228,14 @@
};
};
+ ethernet@70000 {
+ compatible = "marvell,armada-xp-neta";
+ };
+
+ ethernet@74000 {
+ compatible = "marvell,armada-xp-neta";
+ };
+
xor@f0900 {
compatible = "marvell,orion-xor";
reg = <0xF0900 0x100
diff --git a/arch/arm/include/asm/xen/hypervisor.h b/arch/arm/include/asm/xen/hypervisor.h
index 1317ee40f4df..04ff8e7b37df 100644
--- a/arch/arm/include/asm/xen/hypervisor.h
+++ b/arch/arm/include/asm/xen/hypervisor.h
@@ -1,6 +1,8 @@
#ifndef _ASM_ARM_XEN_HYPERVISOR_H
#define _ASM_ARM_XEN_HYPERVISOR_H
+#include <linux/init.h>
+
extern struct shared_info *HYPERVISOR_shared_info;
extern struct start_info *xen_start_info;
@@ -18,4 +20,10 @@ static inline enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
extern struct dma_map_ops *xen_dma_ops;
+#ifdef CONFIG_XEN
+void __init xen_early_init(void);
+#else
+static inline void xen_early_init(void) { return; }
+#endif
+
#endif /* _ASM_ARM_XEN_HYPERVISOR_H */
diff --git a/arch/arm/include/asm/xen/page.h b/arch/arm/include/asm/xen/page.h
index 0b579b2f4e0e..1bee8ca12494 100644
--- a/arch/arm/include/asm/xen/page.h
+++ b/arch/arm/include/asm/xen/page.h
@@ -12,7 +12,6 @@
#include <xen/interface/grant_table.h>
#define phys_to_machine_mapping_valid(pfn) (1)
-#define mfn_to_virt(m) (__va(mfn_to_pfn(m) << PAGE_SHIFT))
#define pte_mfn pte_pfn
#define mfn_pte pfn_pte
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index e6d8c7658ffd..36c18b73c1f4 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -46,6 +46,7 @@
#include <asm/cacheflush.h>
#include <asm/cachetype.h>
#include <asm/tlbflush.h>
+#include <asm/xen/hypervisor.h>
#include <asm/prom.h>
#include <asm/mach/arch.h>
@@ -972,6 +973,7 @@ void __init setup_arch(char **cmdline_p)
arm_dt_init_cpu_maps();
psci_init();
+ xen_early_init();
#ifdef CONFIG_SMP
if (is_smp()) {
if (!mdesc->smp_init || !mdesc->smp_init()) {
diff --git a/arch/arm/mach-lpc32xx/irq.c b/arch/arm/mach-lpc32xx/irq.c
index 9ecb8f9c4ef5..d4f7dc87042b 100644
--- a/arch/arm/mach-lpc32xx/irq.c
+++ b/arch/arm/mach-lpc32xx/irq.c
@@ -283,25 +283,25 @@ static int lpc32xx_set_irq_type(struct irq_data *d, unsigned int type)
case IRQ_TYPE_EDGE_RISING:
/* Rising edge sensitive */
__lpc32xx_set_irq_type(d->hwirq, 1, 1);
- __irq_set_handler_locked(d->hwirq, handle_edge_irq);
+ __irq_set_handler_locked(d->irq, handle_edge_irq);
break;
case IRQ_TYPE_EDGE_FALLING:
/* Falling edge sensitive */
__lpc32xx_set_irq_type(d->hwirq, 0, 1);
- __irq_set_handler_locked(d->hwirq, handle_edge_irq);
+ __irq_set_handler_locked(d->irq, handle_edge_irq);
break;
case IRQ_TYPE_LEVEL_LOW:
/* Low level sensitive */
__lpc32xx_set_irq_type(d->hwirq, 0, 0);
- __irq_set_handler_locked(d->hwirq, handle_level_irq);
+ __irq_set_handler_locked(d->irq, handle_level_irq);
break;
case IRQ_TYPE_LEVEL_HIGH:
/* High level sensitive */
__lpc32xx_set_irq_type(d->hwirq, 1, 0);
- __irq_set_handler_locked(d->hwirq, handle_level_irq);
+ __irq_set_handler_locked(d->irq, handle_level_irq);
break;
/* Other modes are not supported */
diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c
index 7d0f07020c80..6c09cc440a2b 100644
--- a/arch/arm/xen/enlighten.c
+++ b/arch/arm/xen/enlighten.c
@@ -24,6 +24,7 @@
#include <linux/cpuidle.h>
#include <linux/cpufreq.h>
#include <linux/cpu.h>
+#include <linux/console.h>
#include <linux/mm.h>
@@ -51,7 +52,9 @@ EXPORT_SYMBOL_GPL(xen_have_vector_callback);
int xen_platform_pci_unplug = XEN_UNPLUG_ALL;
EXPORT_SYMBOL_GPL(xen_platform_pci_unplug);
-static __read_mostly int xen_events_irq = -1;
+static __read_mostly unsigned int xen_events_irq;
+
+static __initdata struct device_node *xen_node;
int xen_remap_domain_mfn_array(struct vm_area_struct *vma,
unsigned long addr,
@@ -150,40 +153,28 @@ static irqreturn_t xen_arm_callback(int irq, void *arg)
* documentation of the Xen Device Tree format.
*/
#define GRANT_TABLE_PHYSADDR 0
-static int __init xen_guest_init(void)
+void __init xen_early_init(void)
{
- struct xen_add_to_physmap xatp;
- static struct shared_info *shared_info_page = 0;
- struct device_node *node;
int len;
const char *s = NULL;
const char *version = NULL;
const char *xen_prefix = "xen,xen-";
- struct resource res;
- phys_addr_t grant_frames;
- node = of_find_compatible_node(NULL, NULL, "xen,xen");
- if (!node) {
+ xen_node = of_find_compatible_node(NULL, NULL, "xen,xen");
+ if (!xen_node) {
pr_debug("No Xen support\n");
- return 0;
+ return;
}
- s = of_get_property(node, "compatible", &len);
+ s = of_get_property(xen_node, "compatible", &len);
if (strlen(xen_prefix) + 3 < len &&
!strncmp(xen_prefix, s, strlen(xen_prefix)))
version = s + strlen(xen_prefix);
if (version == NULL) {
pr_debug("Xen version not found\n");
- return 0;
+ return;
}
- if (of_address_to_resource(node, GRANT_TABLE_PHYSADDR, &res))
- return 0;
- grant_frames = res.start;
- xen_events_irq = irq_of_parse_and_map(node, 0);
- pr_info("Xen %s support found, events_irq=%d gnttab_frame=%pa\n",
- version, xen_events_irq, &grant_frames);
- if (xen_events_irq < 0)
- return -ENODEV;
+ pr_info("Xen %s support found\n", version);
xen_domain_type = XEN_HVM_DOMAIN;
@@ -194,9 +185,34 @@ static int __init xen_guest_init(void)
else
xen_start_info->flags &= ~(SIF_INITDOMAIN|SIF_PRIVILEGED);
- if (!shared_info_page)
- shared_info_page = (struct shared_info *)
- get_zeroed_page(GFP_KERNEL);
+ if (!console_set_on_cmdline && !xen_initial_domain())
+ add_preferred_console("hvc", 0, NULL);
+}
+
+static int __init xen_guest_init(void)
+{
+ struct xen_add_to_physmap xatp;
+ struct shared_info *shared_info_page = NULL;
+ struct resource res;
+ phys_addr_t grant_frames;
+
+ if (!xen_domain())
+ return 0;
+
+ if (of_address_to_resource(xen_node, GRANT_TABLE_PHYSADDR, &res)) {
+ pr_err("Xen grant table base address not found\n");
+ return -ENODEV;
+ }
+ grant_frames = res.start;
+
+ xen_events_irq = irq_of_parse_and_map(xen_node, 0);
+ if (!xen_events_irq) {
+ pr_err("Xen event channel interrupt not found\n");
+ return -ENODEV;
+ }
+
+ shared_info_page = (struct shared_info *)get_zeroed_page(GFP_KERNEL);
+
if (!shared_info_page) {
pr_err("not enough memory\n");
return -ENOMEM;
diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
index 498325074a06..03e75fef15b8 100644
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -15,10 +15,10 @@
#include <xen/xen.h>
#include <xen/interface/grant_table.h>
#include <xen/interface/memory.h>
+#include <xen/page.h>
#include <xen/swiotlb-xen.h>
#include <asm/cacheflush.h>
-#include <asm/xen/page.h>
#include <asm/xen/hypercall.h>
#include <asm/xen/interface.h>
diff --git a/arch/arm/xen/p2m.c b/arch/arm/xen/p2m.c
index cb7a14c5cd69..887596c67b12 100644
--- a/arch/arm/xen/p2m.c
+++ b/arch/arm/xen/p2m.c
@@ -10,10 +10,10 @@
#include <xen/xen.h>
#include <xen/interface/memory.h>
+#include <xen/page.h>
#include <xen/swiotlb-xen.h>
#include <asm/cacheflush.h>
-#include <asm/xen/page.h>
#include <asm/xen/hypercall.h>
#include <asm/xen/interface.h>
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index ffd3970721bf..f3067d4d4e35 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -64,6 +64,7 @@
#include <asm/psci.h>
#include <asm/efi.h>
#include <asm/virt.h>
+#include <asm/xen/hypervisor.h>
unsigned long elf_hwcap __read_mostly;
EXPORT_SYMBOL_GPL(elf_hwcap);
@@ -401,6 +402,7 @@ void __init setup_arch(char **cmdline_p)
} else {
psci_acpi_init();
}
+ xen_early_init();
cpu_read_bootcpu_ops();
#ifdef CONFIG_SMP
diff --git a/arch/avr32/mach-at32ap/extint.c b/arch/avr32/mach-at32ap/extint.c
index cfb298d66305..2d48b6a46166 100644
--- a/arch/avr32/mach-at32ap/extint.c
+++ b/arch/avr32/mach-at32ap/extint.c
@@ -231,8 +231,7 @@ static int __init eic_probe(struct platform_device *pdev)
irq_set_chip_data(eic->first_irq + i, eic);
}
- irq_set_chained_handler(int_irq, demux_eic_irq);
- irq_set_handler_data(int_irq, eic);
+ irq_set_chained_handler_and_data(int_irq, demux_eic_irq, eic);
if (pdev->id == 0) {
nmi_eic = eic;
diff --git a/arch/m68k/mac/psc.c b/arch/m68k/mac/psc.c
index 835fa04511c8..272dde481d17 100644
--- a/arch/m68k/mac/psc.c
+++ b/arch/m68k/mac/psc.c
@@ -148,14 +148,10 @@ static void psc_irq(unsigned int irq, struct irq_desc *desc)
void __init psc_register_interrupts(void)
{
- irq_set_chained_handler(IRQ_AUTO_3, psc_irq);
- irq_set_handler_data(IRQ_AUTO_3, (void *)0x30);
- irq_set_chained_handler(IRQ_AUTO_4, psc_irq);
- irq_set_handler_data(IRQ_AUTO_4, (void *)0x40);
- irq_set_chained_handler(IRQ_AUTO_5, psc_irq);
- irq_set_handler_data(IRQ_AUTO_5, (void *)0x50);
- irq_set_chained_handler(IRQ_AUTO_6, psc_irq);
- irq_set_handler_data(IRQ_AUTO_6, (void *)0x60);
+ irq_set_chained_handler_and_data(IRQ_AUTO_3, psc_irq, (void *)0x30);
+ irq_set_chained_handler_and_data(IRQ_AUTO_4, psc_irq, (void *)0x40);
+ irq_set_chained_handler_and_data(IRQ_AUTO_5, psc_irq, (void *)0x50);
+ irq_set_chained_handler_and_data(IRQ_AUTO_6, psc_irq, (void *)0x60);
}
void psc_irq_enable(int irq) {
diff --git a/arch/mips/ath25/ar2315.c b/arch/mips/ath25/ar2315.c
index 8742e1cee492..ec9a371f1e62 100644
--- a/arch/mips/ath25/ar2315.c
+++ b/arch/mips/ath25/ar2315.c
@@ -161,8 +161,8 @@ void __init ar2315_arch_init_irq(void)
irq = irq_create_mapping(domain, AR2315_MISC_IRQ_AHB);
setup_irq(irq, &ar2315_ahb_err_interrupt);
- irq_set_chained_handler(AR2315_IRQ_MISC, ar2315_misc_irq_handler);
- irq_set_handler_data(AR2315_IRQ_MISC, domain);
+ irq_set_chained_handler_and_data(AR2315_IRQ_MISC,
+ ar2315_misc_irq_handler, domain);
ar2315_misc_irq_domain = domain;
}
diff --git a/arch/mips/ath25/ar5312.c b/arch/mips/ath25/ar5312.c
index 094b938fd603..e63e38fa4880 100644
--- a/arch/mips/ath25/ar5312.c
+++ b/arch/mips/ath25/ar5312.c
@@ -156,8 +156,8 @@ void __init ar5312_arch_init_irq(void)
irq = irq_create_mapping(domain, AR5312_MISC_IRQ_AHB_PROC);
setup_irq(irq, &ar5312_ahb_err_interrupt);
- irq_set_chained_handler(AR5312_IRQ_MISC, ar5312_misc_irq_handler);
- irq_set_handler_data(AR5312_IRQ_MISC, domain);
+ irq_set_chained_handler_and_data(AR5312_IRQ_MISC,
+ ar5312_misc_irq_handler, domain);
ar5312_misc_irq_domain = domain;
}
diff --git a/arch/mips/pci/pci-ar2315.c b/arch/mips/pci/pci-ar2315.c
index dadb30306a0a..f8d0acb4f973 100644
--- a/arch/mips/pci/pci-ar2315.c
+++ b/arch/mips/pci/pci-ar2315.c
@@ -384,8 +384,8 @@ static void ar2315_pci_irq_init(struct ar2315_pci_ctrl *apc)
apc->irq_ext = irq_create_mapping(apc->domain, AR2315_PCI_IRQ_EXT);
- irq_set_chained_handler(apc->irq, ar2315_pci_irq_handler);
- irq_set_handler_data(apc->irq, apc);
+ irq_set_chained_handler_and_data(apc->irq, ar2315_pci_irq_handler,
+ apc);
/* Clear any pending Abort or external Interrupts
* and enable interrupt processing */
diff --git a/arch/mips/ralink/irq.c b/arch/mips/ralink/irq.c
index da301e0a2f1f..53707aacc0f8 100644
--- a/arch/mips/ralink/irq.c
+++ b/arch/mips/ralink/irq.c
@@ -184,8 +184,7 @@ static int __init intc_of_init(struct device_node *node,
rt_intc_w32(INTC_INT_GLOBAL, INTC_REG_ENABLE);
- irq_set_chained_handler(irq, ralink_intc_irq_handler);
- irq_set_handler_data(irq, domain);
+ irq_set_chained_handler_and_data(irq, ralink_intc_irq_handler, domain);
/* tell the kernel which irq is used for performance monitoring */
rt_perfcount_irq = irq_create_mapping(domain, 9);
diff --git a/arch/mn10300/kernel/irq.c b/arch/mn10300/kernel/irq.c
index 6ab3b73efcf8..480de70f4059 100644
--- a/arch/mn10300/kernel/irq.c
+++ b/arch/mn10300/kernel/irq.c
@@ -320,11 +320,11 @@ void migrate_irqs(void)
if (irqd_is_per_cpu(data))
continue;
- if (cpumask_test_cpu(self, &data->affinity) &&
+ if (cpumask_test_cpu(self, data->affinity) &&
!cpumask_intersects(&irq_affinity[irq], cpu_online_mask)) {
int cpu_id;
cpu_id = cpumask_first(cpu_online_mask);
- cpumask_set_cpu(cpu_id, &data->affinity);
+ cpumask_set_cpu(cpu_id, data->affinity);
}
/* We need to operate irq_affinity_online atomically. */
arch_local_cli_save(flags);
@@ -335,7 +335,7 @@ void migrate_irqs(void)
GxICR(irq) = x & GxICR_LEVEL;
tmp = GxICR(irq);
- new = cpumask_any_and(&data->affinity,
+ new = cpumask_any_and(data->affinity,
cpu_online_mask);
irq_affinity_online[irq] = new;
diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig
index 64707750c780..940cbddd9237 100644
--- a/arch/s390/configs/default_defconfig
+++ b/arch/s390/configs/default_defconfig
@@ -17,13 +17,15 @@ CONFIG_CGROUP_FREEZER=y
CONFIG_CGROUP_DEVICE=y
CONFIG_CPUSETS=y
CONFIG_CGROUP_CPUACCT=y
-CONFIG_RESOURCE_COUNTERS=y
CONFIG_CGROUP_PERF=y
CONFIG_CFS_BANDWIDTH=y
CONFIG_RT_GROUP_SCHED=y
CONFIG_BLK_CGROUP=y
+CONFIG_NAMESPACES=y
CONFIG_SCHED_AUTOGROUP=y
CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+CONFIG_BPF_SYSCALL=y
# CONFIG_COMPAT_BRK is not set
CONFIG_PROFILING=y
CONFIG_OPROFILE=m
@@ -44,6 +46,7 @@ CONFIG_SOLARIS_X86_PARTITION=y
CONFIG_UNIXWARE_DISKLABEL=y
CONFIG_CFQ_GROUP_IOSCHED=y
CONFIG_DEFAULT_DEADLINE=y
+CONFIG_LIVEPATCH=y
CONFIG_MARCH_Z196=y
CONFIG_TUNE_ZEC12=y
CONFIG_NR_CPUS=256
@@ -242,9 +245,9 @@ CONFIG_NF_CONNTRACK_IPV4=m
# CONFIG_NF_CONNTRACK_PROC_COMPAT is not set
CONFIG_NF_TABLES_IPV4=m
CONFIG_NFT_CHAIN_ROUTE_IPV4=m
-CONFIG_NFT_CHAIN_NAT_IPV4=m
CONFIG_NF_TABLES_ARP=m
CONFIG_NF_NAT_IPV4=m
+CONFIG_NFT_CHAIN_NAT_IPV4=m
CONFIG_IP_NF_IPTABLES=m
CONFIG_IP_NF_MATCH_AH=m
CONFIG_IP_NF_MATCH_ECN=m
@@ -264,8 +267,8 @@ CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NF_CONNTRACK_IPV6=m
CONFIG_NF_TABLES_IPV6=m
CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_CHAIN_NAT_IPV6=m
CONFIG_NF_NAT_IPV6=m
+CONFIG_NFT_CHAIN_NAT_IPV6=m
CONFIG_IP6_NF_IPTABLES=m
CONFIG_IP6_NF_MATCH_AH=m
CONFIG_IP6_NF_MATCH_EUI64=m
@@ -353,7 +356,6 @@ CONFIG_BLK_DEV_NBD=m
CONFIG_BLK_DEV_OSD=m
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=32768
-CONFIG_BLK_DEV_XIP=y
CONFIG_CDROM_PKTCDVD=m
CONFIG_ATA_OVER_ETH=m
CONFIG_VIRTIO_BLK=y
@@ -458,7 +460,6 @@ CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT2_FS_XIP=y
CONFIG_EXT3_FS=y
# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
CONFIG_EXT3_FS_POSIX_ACL=y
@@ -544,7 +545,6 @@ CONFIG_FRAME_WARN=1024
CONFIG_READABLE_ASM=y
CONFIG_UNUSED_SYMBOLS=y
CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_PAGEALLOC=y
CONFIG_DEBUG_OBJECTS=y
CONFIG_DEBUG_OBJECTS_SELFTEST=y
@@ -558,6 +558,7 @@ CONFIG_SLUB_STATS=y
CONFIG_DEBUG_STACK_USAGE=y
CONFIG_DEBUG_VM=y
CONFIG_DEBUG_VM_RB=y
+CONFIG_DEBUG_MEMORY_INIT=y
CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m
CONFIG_DEBUG_PER_CPU_MAPS=y
CONFIG_DEBUG_SHIRQ=y
@@ -575,7 +576,6 @@ CONFIG_DEBUG_LIST=y
CONFIG_DEBUG_SG=y
CONFIG_DEBUG_NOTIFIERS=y
CONFIG_DEBUG_CREDENTIALS=y
-CONFIG_PROVE_RCU=y
CONFIG_RCU_TORTURE_TEST=m
CONFIG_RCU_CPU_STALL_TIMEOUT=300
CONFIG_NOTIFIER_ERROR_INJECTION=m
@@ -611,7 +611,6 @@ CONFIG_TEST_BPF=m
# CONFIG_STRICT_DEVMEM is not set
CONFIG_S390_PTDUMP=y
CONFIG_ENCRYPTED_KEYS=m
-CONFIG_KEYS_DEBUG_PROC_KEYS=y
CONFIG_SECURITY=y
CONFIG_SECURITY_NETWORK=y
CONFIG_SECURITY_SELINUX=y
diff --git a/arch/s390/configs/gcov_defconfig b/arch/s390/configs/gcov_defconfig
index 5c3097272cd8..d793fec91797 100644
--- a/arch/s390/configs/gcov_defconfig
+++ b/arch/s390/configs/gcov_defconfig
@@ -17,11 +17,13 @@ CONFIG_CGROUP_FREEZER=y
CONFIG_CGROUP_DEVICE=y
CONFIG_CPUSETS=y
CONFIG_CGROUP_CPUACCT=y
-CONFIG_RESOURCE_COUNTERS=y
CONFIG_CGROUP_PERF=y
CONFIG_BLK_CGROUP=y
+CONFIG_NAMESPACES=y
CONFIG_SCHED_AUTOGROUP=y
CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+CONFIG_BPF_SYSCALL=y
# CONFIG_COMPAT_BRK is not set
CONFIG_PROFILING=y
CONFIG_OPROFILE=m
@@ -240,9 +242,9 @@ CONFIG_NF_CONNTRACK_IPV4=m
# CONFIG_NF_CONNTRACK_PROC_COMPAT is not set
CONFIG_NF_TABLES_IPV4=m
CONFIG_NFT_CHAIN_ROUTE_IPV4=m
-CONFIG_NFT_CHAIN_NAT_IPV4=m
CONFIG_NF_TABLES_ARP=m
CONFIG_NF_NAT_IPV4=m
+CONFIG_NFT_CHAIN_NAT_IPV4=m
CONFIG_IP_NF_IPTABLES=m
CONFIG_IP_NF_MATCH_AH=m
CONFIG_IP_NF_MATCH_ECN=m
@@ -262,8 +264,8 @@ CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NF_CONNTRACK_IPV6=m
CONFIG_NF_TABLES_IPV6=m
CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_CHAIN_NAT_IPV6=m
CONFIG_NF_NAT_IPV6=m
+CONFIG_NFT_CHAIN_NAT_IPV6=m
CONFIG_IP6_NF_IPTABLES=m
CONFIG_IP6_NF_MATCH_AH=m
CONFIG_IP6_NF_MATCH_EUI64=m
@@ -350,7 +352,6 @@ CONFIG_BLK_DEV_NBD=m
CONFIG_BLK_DEV_OSD=m
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=32768
-CONFIG_BLK_DEV_XIP=y
CONFIG_CDROM_PKTCDVD=m
CONFIG_ATA_OVER_ETH=m
CONFIG_VIRTIO_BLK=y
@@ -455,7 +456,6 @@ CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT2_FS_XIP=y
CONFIG_EXT3_FS=y
# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
CONFIG_EXT3_FS_POSIX_ACL=y
@@ -538,7 +538,7 @@ CONFIG_DEBUG_INFO=y
CONFIG_FRAME_WARN=1024
CONFIG_UNUSED_SYMBOLS=y
CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_MEMORY_INIT=y
CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m
CONFIG_PANIC_ON_OOPS=y
CONFIG_TIMER_STATS=y
@@ -558,7 +558,6 @@ CONFIG_ATOMIC64_SELFTEST=y
# CONFIG_STRICT_DEVMEM is not set
CONFIG_S390_PTDUMP=y
CONFIG_ENCRYPTED_KEYS=m
-CONFIG_KEYS_DEBUG_PROC_KEYS=y
CONFIG_SECURITY=y
CONFIG_SECURITY_NETWORK=y
CONFIG_SECURITY_SELINUX=y
diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig
index bda70f1ffd2c..38a77e9c8aa6 100644
--- a/arch/s390/configs/performance_defconfig
+++ b/arch/s390/configs/performance_defconfig
@@ -17,11 +17,13 @@ CONFIG_CGROUP_FREEZER=y
CONFIG_CGROUP_DEVICE=y
CONFIG_CPUSETS=y
CONFIG_CGROUP_CPUACCT=y
-CONFIG_RESOURCE_COUNTERS=y
CONFIG_CGROUP_PERF=y
CONFIG_BLK_CGROUP=y
+CONFIG_NAMESPACES=y
CONFIG_SCHED_AUTOGROUP=y
CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+CONFIG_BPF_SYSCALL=y
# CONFIG_COMPAT_BRK is not set
CONFIG_PROFILING=y
CONFIG_OPROFILE=m
@@ -42,9 +44,10 @@ CONFIG_SOLARIS_X86_PARTITION=y
CONFIG_UNIXWARE_DISKLABEL=y
CONFIG_CFQ_GROUP_IOSCHED=y
CONFIG_DEFAULT_DEADLINE=y
+CONFIG_LIVEPATCH=y
CONFIG_MARCH_Z196=y
CONFIG_TUNE_ZEC12=y
-CONFIG_NR_CPUS=256
+CONFIG_NR_CPUS=512
CONFIG_HZ_100=y
CONFIG_MEMORY_HOTPLUG=y
CONFIG_MEMORY_HOTREMOVE=y
@@ -238,9 +241,9 @@ CONFIG_NF_CONNTRACK_IPV4=m
# CONFIG_NF_CONNTRACK_PROC_COMPAT is not set
CONFIG_NF_TABLES_IPV4=m
CONFIG_NFT_CHAIN_ROUTE_IPV4=m
-CONFIG_NFT_CHAIN_NAT_IPV4=m
CONFIG_NF_TABLES_ARP=m
CONFIG_NF_NAT_IPV4=m
+CONFIG_NFT_CHAIN_NAT_IPV4=m
CONFIG_IP_NF_IPTABLES=m
CONFIG_IP_NF_MATCH_AH=m
CONFIG_IP_NF_MATCH_ECN=m
@@ -260,8 +263,8 @@ CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NF_CONNTRACK_IPV6=m
CONFIG_NF_TABLES_IPV6=m
CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_CHAIN_NAT_IPV6=m
CONFIG_NF_NAT_IPV6=m
+CONFIG_NFT_CHAIN_NAT_IPV6=m
CONFIG_IP6_NF_IPTABLES=m
CONFIG_IP6_NF_MATCH_AH=m
CONFIG_IP6_NF_MATCH_EUI64=m
@@ -348,7 +351,6 @@ CONFIG_BLK_DEV_NBD=m
CONFIG_BLK_DEV_OSD=m
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=32768
-CONFIG_BLK_DEV_XIP=y
CONFIG_CDROM_PKTCDVD=m
CONFIG_ATA_OVER_ETH=m
CONFIG_VIRTIO_BLK=y
@@ -453,7 +455,6 @@ CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT2_FS_XIP=y
CONFIG_EXT3_FS=y
# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
CONFIG_EXT3_FS_POSIX_ACL=y
@@ -536,7 +537,7 @@ CONFIG_DEBUG_INFO=y
CONFIG_FRAME_WARN=1024
CONFIG_UNUSED_SYMBOLS=y
CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_MEMORY_INIT=y
CONFIG_PANIC_ON_OOPS=y
CONFIG_TIMER_STATS=y
CONFIG_RCU_TORTURE_TEST=m
@@ -553,7 +554,6 @@ CONFIG_ATOMIC64_SELFTEST=y
# CONFIG_STRICT_DEVMEM is not set
CONFIG_S390_PTDUMP=y
CONFIG_ENCRYPTED_KEYS=m
-CONFIG_KEYS_DEBUG_PROC_KEYS=y
CONFIG_SECURITY=y
CONFIG_SECURITY_NETWORK=y
CONFIG_SECURITY_SELINUX=y
diff --git a/arch/s390/defconfig b/arch/s390/defconfig
index 83ef702d2403..9256b48e7e43 100644
--- a/arch/s390/defconfig
+++ b/arch/s390/defconfig
@@ -8,7 +8,6 @@ CONFIG_TASKSTATS=y
CONFIG_TASK_DELAY_ACCT=y
CONFIG_TASK_XACCT=y
CONFIG_TASK_IO_ACCOUNTING=y
-CONFIG_RCU_FAST_NO_HZ=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_CGROUPS=y
@@ -31,9 +30,11 @@ CONFIG_JUMP_LABEL=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
CONFIG_MODVERSIONS=y
+CONFIG_BLK_DEV_INTEGRITY=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_IBM_PARTITION=y
CONFIG_DEFAULT_DEADLINE=y
+CONFIG_LIVEPATCH=y
CONFIG_MARCH_Z196=y
CONFIG_NR_CPUS=256
CONFIG_HZ_100=y
@@ -41,7 +42,6 @@ CONFIG_MEMORY_HOTPLUG=y
CONFIG_MEMORY_HOTREMOVE=y
CONFIG_KSM=y
CONFIG_TRANSPARENT_HUGEPAGE=y
-CONFIG_CMA=y
CONFIG_CRASH_DUMP=y
CONFIG_BINFMT_MISC=m
CONFIG_HIBERNATION=y
@@ -125,6 +125,7 @@ CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_PAGEALLOC=y
CONFIG_DETECT_HUNG_TASK=y
+CONFIG_PANIC_ON_OOPS=y
CONFIG_TIMER_STATS=y
CONFIG_DEBUG_RT_MUTEXES=y
CONFIG_PROVE_LOCKING=y
@@ -135,12 +136,16 @@ CONFIG_DEBUG_LIST=y
CONFIG_DEBUG_PI_LIST=y
CONFIG_DEBUG_SG=y
CONFIG_DEBUG_NOTIFIERS=y
-CONFIG_PROVE_RCU=y
CONFIG_RCU_CPU_STALL_TIMEOUT=60
+# CONFIG_RCU_CPU_STALL_INFO is not set
CONFIG_RCU_TRACE=y
CONFIG_LATENCYTOP=y
CONFIG_DEBUG_STRICT_USER_COPY_CHECKS=y
+CONFIG_TRACER_SNAPSHOT=y
+CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y
+CONFIG_STACK_TRACER=y
CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_UPROBE_EVENT=y
CONFIG_KPROBES_SANITY_TEST=y
# CONFIG_STRICT_DEVMEM is not set
CONFIG_S390_PTDUMP=y
@@ -187,6 +192,7 @@ CONFIG_CRYPTO_ZLIB=m
CONFIG_CRYPTO_LZO=m
CONFIG_CRYPTO_LZ4=m
CONFIG_CRYPTO_LZ4HC=m
+CONFIG_CRYPTO_ANSI_CPRNG=m
CONFIG_ZCRYPT=m
CONFIG_CRYPTO_SHA1_S390=m
CONFIG_CRYPTO_SHA256_S390=m
diff --git a/arch/s390/include/asm/cpu.h b/arch/s390/include/asm/cpu.h
index f5a8e2fcde0c..91541000378e 100644
--- a/arch/s390/include/asm/cpu.h
+++ b/arch/s390/include/asm/cpu.h
@@ -8,8 +8,6 @@
#ifndef _ASM_S390_CPU_H
#define _ASM_S390_CPU_H
-#define MAX_CPU_ADDRESS 255
-
#ifndef __ASSEMBLY__
#include <linux/types.h>
diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h
index ece606c2ee86..39ae6a359747 100644
--- a/arch/s390/include/asm/ipl.h
+++ b/arch/s390/include/asm/ipl.h
@@ -94,7 +94,6 @@ struct dump_save_areas {
};
extern struct dump_save_areas dump_save_areas;
-struct save_area_ext *dump_save_area_create(int cpu);
extern void do_reipl(void);
extern void do_halt(void);
diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
index c891f41b2753..f6ff06077631 100644
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -11,6 +11,7 @@
#include <asm/cpu.h>
#define SCLP_CHP_INFO_MASK_SIZE 32
+#define SCLP_MAX_CORES 256
struct sclp_chp_info {
u8 recognized[SCLP_CHP_INFO_MASK_SIZE];
@@ -26,7 +27,7 @@ struct sclp_ipl_info {
char loadparm[LOADPARM_LEN];
};
-struct sclp_cpu_entry {
+struct sclp_core_entry {
u8 core_id;
u8 reserved0[2];
u8 : 3;
@@ -38,12 +39,11 @@ struct sclp_cpu_entry {
u8 reserved1;
} __attribute__((packed));
-struct sclp_cpu_info {
+struct sclp_core_info {
unsigned int configured;
unsigned int standby;
unsigned int combined;
- int has_cpu_type;
- struct sclp_cpu_entry cpu[MAX_CPU_ADDRESS + 1];
+ struct sclp_core_entry core[SCLP_MAX_CORES];
};
struct sclp_info {
@@ -51,7 +51,7 @@ struct sclp_info {
unsigned char has_vt220 : 1;
unsigned char has_siif : 1;
unsigned char has_sigpif : 1;
- unsigned char has_cpu_type : 1;
+ unsigned char has_core_type : 1;
unsigned char has_sprp : 1;
unsigned int ibc;
unsigned int mtid;
@@ -60,15 +60,15 @@ struct sclp_info {
unsigned long long rzm;
unsigned long long rnmax;
unsigned long long hamax;
- unsigned int max_cpu;
+ unsigned int max_cores;
unsigned long hsa_size;
unsigned long long facilities;
};
extern struct sclp_info sclp;
-int sclp_get_cpu_info(struct sclp_cpu_info *info);
-int sclp_cpu_configure(u8 cpu);
-int sclp_cpu_deconfigure(u8 cpu);
+int sclp_get_core_info(struct sclp_core_info *info);
+int sclp_core_configure(u8 core);
+int sclp_core_deconfigure(u8 core);
int sclp_sdias_blk_count(void);
int sclp_sdias_copy(void *dest, int blk_num, int nr_blks);
int sclp_chp_configure(struct chp_id chpid);
diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h
index b3bd0282dd98..5df26b11cf47 100644
--- a/arch/s390/include/asm/smp.h
+++ b/arch/s390/include/asm/smp.h
@@ -29,6 +29,7 @@ extern void smp_call_ipl_cpu(void (*func)(void *), void *);
extern int smp_find_processor_id(u16 address);
extern int smp_store_status(int cpu);
+extern void smp_save_dump_cpus(void);
extern int smp_vcpu_scheduled(int cpu);
extern void smp_yield_cpu(int cpu);
extern void smp_cpu_set_polarization(int cpu, int val);
@@ -54,6 +55,7 @@ static inline int smp_store_status(int cpu) { return 0; }
static inline int smp_vcpu_scheduled(int cpu) { return 1; }
static inline void smp_yield_cpu(int cpu) { }
static inline void smp_fill_possible_mask(void) { }
+static inline void smp_save_dump_cpus(void) { }
#endif /* CONFIG_SMP */
diff --git a/arch/s390/kernel/base.S b/arch/s390/kernel/base.S
index daed3fde42ec..326f717df587 100644
--- a/arch/s390/kernel/base.S
+++ b/arch/s390/kernel/base.S
@@ -78,15 +78,20 @@ s390_base_pgm_handler_fn:
#
# Calls diag 308 subcode 1 and continues execution
#
-# The following conditions must be ensured before calling this function:
-# * Prefix register = 0
-# * Lowcore protection is disabled
-#
ENTRY(diag308_reset)
larl %r4,.Lctlregs # Save control registers
stctg %c0,%c15,0(%r4)
+ lg %r2,0(%r4) # Disable lowcore protection
+ nilh %r2,0xefff
+ larl %r4,.Lctlreg0
+ stg %r2,0(%r4)
+ lctlg %c0,%c0,0(%r4)
larl %r4,.Lfpctl # Floating point control register
stfpc 0(%r4)
+ larl %r4,.Lprefix # Save prefix register
+ stpx 0(%r4)
+ larl %r4,.Lprefix_zero # Set prefix register to 0
+ spx 0(%r4)
larl %r4,.Lcontinue_psw # Save PSW flags
epsw %r2,%r3
stm %r2,%r3,0(%r4)
@@ -106,6 +111,8 @@ ENTRY(diag308_reset)
lctlg %c0,%c15,0(%r4)
larl %r4,.Lfpctl # Restore floating point ctl register
lfpc 0(%r4)
+ larl %r4,.Lprefix # Restore prefix register
+ spx 0(%r4)
larl %r4,.Lcontinue_psw # Restore PSW flags
lpswe 0(%r4)
.Lcontinue:
@@ -122,10 +129,16 @@ ENTRY(diag308_reset)
.section .bss
.align 8
+.Lctlreg0:
+ .quad 0
.Lctlregs:
.rept 16
.quad 0
.endr
.Lfpctl:
.long 0
+.Lprefix:
+ .long 0
+.Lprefix_zero:
+ .long 0
.previous
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index 7a75ad4594e3..0c6c01eb3613 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -45,31 +45,6 @@ static struct memblock_type oldmem_type = {
struct dump_save_areas dump_save_areas;
/*
- * Allocate and add a save area for a CPU
- */
-struct save_area_ext *dump_save_area_create(int cpu)
-{
- struct save_area_ext **save_areas, *save_area;
-
- save_area = kmalloc(sizeof(*save_area), GFP_KERNEL);
- if (!save_area)
- return NULL;
- if (cpu + 1 > dump_save_areas.count) {
- dump_save_areas.count = cpu + 1;
- save_areas = krealloc(dump_save_areas.areas,
- dump_save_areas.count * sizeof(void *),
- GFP_KERNEL | __GFP_ZERO);
- if (!save_areas) {
- kfree(save_area);
- return NULL;
- }
- dump_save_areas.areas = save_areas;
- }
- dump_save_areas.areas[cpu] = save_area;
- return save_area;
-}
-
-/*
* Return physical address for virtual address
*/
static inline void *load_real_addr(void *addr)
@@ -416,7 +391,7 @@ static void *nt_s390_vx_low(void *ptr, __vector128 *vx_regs)
ptr += len;
/* Copy lower halves of SIMD registers 0-15 */
for (i = 0; i < 16; i++) {
- memcpy(ptr, &vx_regs[i], 8);
+ memcpy(ptr, &vx_regs[i].u[2], 8);
ptr += 8;
}
return ptr;
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index e6a1578fc000..afe05bfb7e00 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -1572,7 +1572,7 @@ static int param_set_sfb_size(const char *val, const struct kernel_param *kp)
}
#define param_check_sfb_size(name, p) __param_check(name, p, void)
-static struct kernel_param_ops param_ops_sfb_size = {
+static const struct kernel_param_ops param_ops_sfb_size = {
.set = param_set_sfb_size,
.get = param_get_sfb_size,
};
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 73941bf42350..f7f027caaaaa 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -868,6 +868,11 @@ void __init setup_arch(char **cmdline_p)
check_initrd();
reserve_crashkernel();
+ /*
+ * Be aware that smp_save_dump_cpus() triggers a system reset.
+ * Therefore CPU and device initialization should be done afterwards.
+ */
+ smp_save_dump_cpus();
setup_resources();
setup_vmcoreinfo();
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 0d9d59d4710e..6f54c175f5c9 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -31,6 +31,7 @@
#include <linux/cpu.h>
#include <linux/slab.h>
#include <linux/crash_dump.h>
+#include <linux/memblock.h>
#include <asm/asm-offsets.h>
#include <asm/switch_to.h>
#include <asm/facility.h>
@@ -69,7 +70,7 @@ struct pcpu {
u16 address; /* physical cpu address */
};
-static u8 boot_cpu_type;
+static u8 boot_core_type;
static struct pcpu pcpu_devices[NR_CPUS];
unsigned int smp_cpu_mt_shift;
@@ -531,15 +532,12 @@ EXPORT_SYMBOL(smp_ctl_clear_bit);
#ifdef CONFIG_CRASH_DUMP
-static inline void __smp_store_cpu_state(int cpu, u16 address, int is_boot_cpu)
+static void __smp_store_cpu_state(struct save_area_ext *sa_ext, u16 address,
+ int is_boot_cpu)
{
- void *lc = pcpu_devices[0].lowcore;
- struct save_area_ext *sa_ext;
+ void *lc = (void *)(unsigned long) store_prefix();
unsigned long vx_sa;
- sa_ext = dump_save_area_create(cpu);
- if (!sa_ext)
- panic("could not allocate memory for save area\n");
if (is_boot_cpu) {
/* Copy the registers of the boot CPU. */
copy_oldmem_page(1, (void *) &sa_ext->sa, sizeof(sa_ext->sa),
@@ -554,14 +552,33 @@ static inline void __smp_store_cpu_state(int cpu, u16 address, int is_boot_cpu)
if (!MACHINE_HAS_VX)
return;
/* Get the VX registers */
- vx_sa = __get_free_page(GFP_KERNEL);
+ vx_sa = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
if (!vx_sa)
panic("could not allocate memory for VX save area\n");
__pcpu_sigp_relax(address, SIGP_STORE_ADDITIONAL_STATUS, vx_sa, NULL);
memcpy(sa_ext->vx_regs, (void *) vx_sa, sizeof(sa_ext->vx_regs));
- free_page(vx_sa);
+ memblock_free(vx_sa, PAGE_SIZE);
}
+int smp_store_status(int cpu)
+{
+ unsigned long vx_sa;
+ struct pcpu *pcpu;
+
+ pcpu = pcpu_devices + cpu;
+ if (__pcpu_sigp_relax(pcpu->address, SIGP_STOP_AND_STORE_STATUS,
+ 0, NULL) != SIGP_CC_ORDER_CODE_ACCEPTED)
+ return -EIO;
+ if (!MACHINE_HAS_VX)
+ return 0;
+ vx_sa = __pa(pcpu->lowcore->vector_save_area_addr);
+ __pcpu_sigp_relax(pcpu->address, SIGP_STORE_ADDITIONAL_STATUS,
+ vx_sa, NULL);
+ return 0;
+}
+
+#endif /* CONFIG_CRASH_DUMP */
+
/*
* Collect CPU state of the previous, crashed system.
* There are four cases:
@@ -589,10 +606,12 @@ static inline void __smp_store_cpu_state(int cpu, u16 address, int is_boot_cpu)
* old system. The ELF sections are picked up by the crash_dump code
* via elfcorehdr_addr.
*/
-static void __init smp_store_cpu_states(struct sclp_cpu_info *info)
+void __init smp_save_dump_cpus(void)
{
- unsigned int cpu, address, i, j;
- int is_boot_cpu;
+#ifdef CONFIG_CRASH_DUMP
+ int addr, cpu, boot_cpu_addr, max_cpu_addr;
+ struct save_area_ext *sa_ext;
+ bool is_boot_cpu;
if (is_kdump_kernel())
/* Previous system stored the CPU states. Nothing to do. */
@@ -602,42 +621,36 @@ static void __init smp_store_cpu_states(struct sclp_cpu_info *info)
return;
/* Set multi-threading state to the previous system. */
pcpu_set_smt(sclp.mtid_prev);
- /* Collect CPU states. */
- cpu = 0;
- for (i = 0; i < info->configured; i++) {
- /* Skip CPUs with different CPU type. */
- if (info->has_cpu_type && info->cpu[i].type != boot_cpu_type)
+ max_cpu_addr = SCLP_MAX_CORES << sclp.mtid_prev;
+ for (cpu = 0, addr = 0; addr <= max_cpu_addr; addr++) {
+ if (__pcpu_sigp_relax(addr, SIGP_SENSE, 0, NULL) ==
+ SIGP_CC_NOT_OPERATIONAL)
continue;
- for (j = 0; j <= smp_cpu_mtid; j++, cpu++) {
- address = (info->cpu[i].core_id << smp_cpu_mt_shift) + j;
- is_boot_cpu = (address == pcpu_devices[0].address);
- if (is_boot_cpu && !OLDMEM_BASE)
- /* Skip boot CPU for standard zfcp dump. */
- continue;
- /* Get state for this CPu. */
- __smp_store_cpu_state(cpu, address, is_boot_cpu);
- }
+ cpu += 1;
}
-}
-
-int smp_store_status(int cpu)
-{
- unsigned long vx_sa;
- struct pcpu *pcpu;
-
- pcpu = pcpu_devices + cpu;
- if (__pcpu_sigp_relax(pcpu->address, SIGP_STOP_AND_STORE_STATUS,
- 0, NULL) != SIGP_CC_ORDER_CODE_ACCEPTED)
- return -EIO;
- if (!MACHINE_HAS_VX)
- return 0;
- vx_sa = __pa(pcpu->lowcore->vector_save_area_addr);
- __pcpu_sigp_relax(pcpu->address, SIGP_STORE_ADDITIONAL_STATUS,
- vx_sa, NULL);
- return 0;
-}
-
+ dump_save_areas.areas = (void *)memblock_alloc(sizeof(void *) * cpu, 8);
+ dump_save_areas.count = cpu;
+ boot_cpu_addr = stap();
+ for (cpu = 0, addr = 0; addr <= max_cpu_addr; addr++) {
+ if (__pcpu_sigp_relax(addr, SIGP_SENSE, 0, NULL) ==
+ SIGP_CC_NOT_OPERATIONAL)
+ continue;
+ sa_ext = (void *) memblock_alloc(sizeof(*sa_ext), 8);
+ dump_save_areas.areas[cpu] = sa_ext;
+ if (!sa_ext)
+ panic("could not allocate memory for save area\n");
+ is_boot_cpu = (addr == boot_cpu_addr);
+ cpu += 1;
+ if (is_boot_cpu && !OLDMEM_BASE)
+ /* Skip boot CPU for standard zfcp dump. */
+ continue;
+ /* Get state for this CPU. */
+ __smp_store_cpu_state(sa_ext, addr, is_boot_cpu);
+ }
+ diag308_reset();
+ pcpu_set_smt(0);
#endif /* CONFIG_CRASH_DUMP */
+}
void smp_cpu_set_polarization(int cpu, int val)
{
@@ -649,21 +662,22 @@ int smp_cpu_get_polarization(int cpu)
return pcpu_devices[cpu].polarization;
}
-static struct sclp_cpu_info *smp_get_cpu_info(void)
+static struct sclp_core_info *smp_get_core_info(void)
{
static int use_sigp_detection;
- struct sclp_cpu_info *info;
+ struct sclp_core_info *info;
int address;
info = kzalloc(sizeof(*info), GFP_KERNEL);
- if (info && (use_sigp_detection || sclp_get_cpu_info(info))) {
+ if (info && (use_sigp_detection || sclp_get_core_info(info))) {
use_sigp_detection = 1;
- for (address = 0; address <= MAX_CPU_ADDRESS;
+ for (address = 0;
+ address < (SCLP_MAX_CORES << smp_cpu_mt_shift);
address += (1U << smp_cpu_mt_shift)) {
if (__pcpu_sigp_relax(address, SIGP_SENSE, 0, NULL) ==
SIGP_CC_NOT_OPERATIONAL)
continue;
- info->cpu[info->configured].core_id =
+ info->core[info->configured].core_id =
address >> smp_cpu_mt_shift;
info->configured++;
}
@@ -674,7 +688,7 @@ static struct sclp_cpu_info *smp_get_cpu_info(void)
static int smp_add_present_cpu(int cpu);
-static int __smp_rescan_cpus(struct sclp_cpu_info *info, int sysfs_add)
+static int __smp_rescan_cpus(struct sclp_core_info *info, int sysfs_add)
{
struct pcpu *pcpu;
cpumask_t avail;
@@ -685,9 +699,9 @@ static int __smp_rescan_cpus(struct sclp_cpu_info *info, int sysfs_add)
cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask);
cpu = cpumask_first(&avail);
for (i = 0; (i < info->combined) && (cpu < nr_cpu_ids); i++) {
- if (info->has_cpu_type && info->cpu[i].type != boot_cpu_type)
+ if (sclp.has_core_type && info->core[i].type != boot_core_type)
continue;
- address = info->cpu[i].core_id << smp_cpu_mt_shift;
+ address = info->core[i].core_id << smp_cpu_mt_shift;
for (j = 0; j <= smp_cpu_mtid; j++) {
if (pcpu_find_address(cpu_present_mask, address + j))
continue;
@@ -713,41 +727,37 @@ static int __smp_rescan_cpus(struct sclp_cpu_info *info, int sysfs_add)
static void __init smp_detect_cpus(void)
{
unsigned int cpu, mtid, c_cpus, s_cpus;
- struct sclp_cpu_info *info;
+ struct sclp_core_info *info;
u16 address;
/* Get CPU information */
- info = smp_get_cpu_info();
+ info = smp_get_core_info();
if (!info)
panic("smp_detect_cpus failed to allocate memory\n");
/* Find boot CPU type */
- if (info->has_cpu_type) {
+ if (sclp.has_core_type) {
address = stap();
for (cpu = 0; cpu < info->combined; cpu++)
- if (info->cpu[cpu].core_id == address) {
+ if (info->core[cpu].core_id == address) {
/* The boot cpu dictates the cpu type. */
- boot_cpu_type = info->cpu[cpu].type;
+ boot_core_type = info->core[cpu].type;
break;
}
if (cpu >= info->combined)
panic("Could not find boot CPU type");
}
-#ifdef CONFIG_CRASH_DUMP
- /* Collect CPU state of previous system */
- smp_store_cpu_states(info);
-#endif
-
/* Set multi-threading state for the current system */
- mtid = boot_cpu_type ? sclp.mtid : sclp.mtid_cp;
+ mtid = boot_core_type ? sclp.mtid : sclp.mtid_cp;
mtid = (mtid < smp_max_threads) ? mtid : smp_max_threads - 1;
pcpu_set_smt(mtid);
/* Print number of CPUs */
c_cpus = s_cpus = 0;
for (cpu = 0; cpu < info->combined; cpu++) {
- if (info->has_cpu_type && info->cpu[cpu].type != boot_cpu_type)
+ if (sclp.has_core_type &&
+ info->core[cpu].type != boot_core_type)
continue;
if (cpu < info->configured)
c_cpus += smp_cpu_mtid + 1;
@@ -884,7 +894,7 @@ void __init smp_fill_possible_mask(void)
sclp_max = max(sclp.mtid, sclp.mtid_cp) + 1;
sclp_max = min(smp_max_threads, sclp_max);
- sclp_max = sclp.max_cpu * sclp_max ?: nr_cpu_ids;
+ sclp_max = sclp.max_cores * sclp_max ?: nr_cpu_ids;
possible = setup_possible_cpus ?: nr_cpu_ids;
possible = min(possible, sclp_max);
for (cpu = 0; cpu < possible && cpu < nr_cpu_ids; cpu++)
@@ -977,7 +987,7 @@ static ssize_t cpu_configure_store(struct device *dev,
case 0:
if (pcpu->state != CPU_STATE_CONFIGURED)
break;
- rc = sclp_cpu_deconfigure(pcpu->address >> smp_cpu_mt_shift);
+ rc = sclp_core_deconfigure(pcpu->address >> smp_cpu_mt_shift);
if (rc)
break;
for (i = 0; i <= smp_cpu_mtid; i++) {
@@ -992,7 +1002,7 @@ static ssize_t cpu_configure_store(struct device *dev,
case 1:
if (pcpu->state != CPU_STATE_STANDBY)
break;
- rc = sclp_cpu_configure(pcpu->address >> smp_cpu_mt_shift);
+ rc = sclp_core_configure(pcpu->address >> smp_cpu_mt_shift);
if (rc)
break;
for (i = 0; i <= smp_cpu_mtid; i++) {
@@ -1107,10 +1117,10 @@ out:
int __ref smp_rescan_cpus(void)
{
- struct sclp_cpu_info *info;
+ struct sclp_core_info *info;
int nr;
- info = smp_get_cpu_info();
+ info = smp_get_core_info();
if (!info)
return -ENOMEM;
get_online_cpus();
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index d3766dd67e23..fee782acc2ee 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -250,7 +250,7 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
({ \
/* Branch instruction needs 6 bytes */ \
int rel = (addrs[i + off + 1] - (addrs[i + 1] - 6)) / 2;\
- _EMIT6(op1 | reg(b1, b2) << 16 | rel, op2 | mask); \
+ _EMIT6(op1 | reg(b1, b2) << 16 | (rel & 0xffff), op2 | mask); \
REG_SET_SEEN(b1); \
REG_SET_SEEN(b2); \
})
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index 59cf0b911898..9def1f52d03a 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -24,11 +24,14 @@ config TILE
select MODULES_USE_ELF_RELA
select HAVE_ARCH_TRACEHOOK
select HAVE_SYSCALL_TRACEPOINTS
+ select USER_STACKTRACE_SUPPORT
select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
select HAVE_DEBUG_STACKOVERFLOW
select ARCH_WANT_FRAME_POINTERS
select HAVE_CONTEXT_TRACKING
select EDAC_SUPPORT
+ select GENERIC_STRNCPY_FROM_USER
+ select GENERIC_STRNLEN_USER
# FIXME: investigate whether we need/want these options.
# select HAVE_IOREMAP_PROT
@@ -125,8 +128,10 @@ config HVC_TILE
select HVC_IRQ if TILEGX
def_bool y
+# Building with ARCH=tilegx (or ARCH=tile) implies using the
+# 64-bit TILE-Gx toolchain, so force CONFIG_TILEGX on.
config TILEGX
- bool "Building for TILE-Gx (64-bit) processor"
+ def_bool ARCH != "tilepro"
select SPARSE_IRQ
select GENERIC_IRQ_LEGACY_ALLOC_HWIRQ
select HAVE_FUNCTION_TRACER
diff --git a/arch/tile/include/asm/irq.h b/arch/tile/include/asm/irq.h
index 1fe86911838b..84a924034bdb 100644
--- a/arch/tile/include/asm/irq.h
+++ b/arch/tile/include/asm/irq.h
@@ -78,4 +78,9 @@ void tile_irq_activate(unsigned int irq, int tile_irq_type);
void setup_irq_regs(void);
+#ifdef __tilegx__
+void arch_trigger_all_cpu_backtrace(bool self);
+#define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace
+#endif
+
#endif /* _ASM_TILE_IRQ_H */
diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h
index dd4f9f17e30a..139dfdee0134 100644
--- a/arch/tile/include/asm/processor.h
+++ b/arch/tile/include/asm/processor.h
@@ -111,8 +111,6 @@ struct thread_struct {
unsigned long long interrupt_mask;
/* User interrupt-control 0 state */
unsigned long intctrl_0;
- /* Is this task currently doing a backtrace? */
- bool in_backtrace;
/* Any other miscellaneous processor state bits */
unsigned long proc_status;
#if !CHIP_HAS_FIXED_INTVEC_BASE()
diff --git a/arch/tile/include/asm/spinlock_32.h b/arch/tile/include/asm/spinlock_32.h
index c0a77b38d39a..b14b1ba5bf9c 100644
--- a/arch/tile/include/asm/spinlock_32.h
+++ b/arch/tile/include/asm/spinlock_32.h
@@ -41,8 +41,12 @@ static inline int arch_spin_is_locked(arch_spinlock_t *lock)
* to claim the lock is held, since it will be momentarily
* if not already. There's no need to wait for a "valid"
* lock->next_ticket to become available.
+ * Use READ_ONCE() to ensure that calling this in a loop is OK.
*/
- return lock->next_ticket != lock->current_ticket;
+ int curr = READ_ONCE(lock->current_ticket);
+ int next = READ_ONCE(lock->next_ticket);
+
+ return next != curr;
}
void arch_spin_lock(arch_spinlock_t *lock);
diff --git a/arch/tile/include/asm/spinlock_64.h b/arch/tile/include/asm/spinlock_64.h
index 9a12b9c7e5d3..b9718fb4e74a 100644
--- a/arch/tile/include/asm/spinlock_64.h
+++ b/arch/tile/include/asm/spinlock_64.h
@@ -18,6 +18,8 @@
#ifndef _ASM_TILE_SPINLOCK_64_H
#define _ASM_TILE_SPINLOCK_64_H
+#include <linux/compiler.h>
+
/* Shifts and masks for the various fields in "lock". */
#define __ARCH_SPIN_CURRENT_SHIFT 17
#define __ARCH_SPIN_NEXT_MASK 0x7fff
@@ -44,7 +46,8 @@ static inline u32 arch_spin_next(u32 val)
/* The lock is locked if a task would have to wait to get it. */
static inline int arch_spin_is_locked(arch_spinlock_t *lock)
{
- u32 val = lock->lock;
+ /* Use READ_ONCE() to ensure that calling this in a loop is OK. */
+ u32 val = READ_ONCE(lock->lock);
return arch_spin_current(val) != arch_spin_next(val);
}
diff --git a/arch/tile/include/asm/stack.h b/arch/tile/include/asm/stack.h
index 0e9d382a2d45..c3cb42615a9f 100644
--- a/arch/tile/include/asm/stack.h
+++ b/arch/tile/include/asm/stack.h
@@ -58,17 +58,14 @@ extern int KBacktraceIterator_end(struct KBacktraceIterator *kbt);
/* Advance to the next frame. */
extern void KBacktraceIterator_next(struct KBacktraceIterator *kbt);
+/* Dump just the contents of the pt_regs structure. */
+extern void tile_show_regs(struct pt_regs *);
+
/*
* Dump stack given complete register info. Use only from the
* architecture-specific code; show_stack()
- * and dump_stack() (in entry.S) are architecture-independent entry points.
+ * and dump_stack() are architecture-independent entry points.
*/
-extern void tile_show_stack(struct KBacktraceIterator *, int headers);
-
-/* Dump stack of current process, with registers to seed the backtrace. */
-extern void dump_stack_regs(struct pt_regs *);
-
-/* Helper method for assembly dump_stack(). */
-extern void _dump_stack(int dummy, ulong pc, ulong lr, ulong sp, ulong r52);
+extern void tile_show_stack(struct KBacktraceIterator *);
#endif /* _ASM_TILE_STACK_H */
diff --git a/arch/tile/include/asm/thread_info.h b/arch/tile/include/asm/thread_info.h
index f804c39a5e4d..dc1fb28d9636 100644
--- a/arch/tile/include/asm/thread_info.h
+++ b/arch/tile/include/asm/thread_info.h
@@ -42,6 +42,7 @@ struct thread_info {
unsigned long unalign_jit_tmp[4]; /* temp r0..r3 storage */
void __user *unalign_jit_base; /* unalign fixup JIT base */
#endif
+ bool in_backtrace; /* currently doing backtrace? */
};
/*
diff --git a/arch/tile/include/asm/traps.h b/arch/tile/include/asm/traps.h
index 4b99a1c3aab2..11c82270c1f5 100644
--- a/arch/tile/include/asm/traps.h
+++ b/arch/tile/include/asm/traps.h
@@ -52,6 +52,14 @@ void do_timer_interrupt(struct pt_regs *, int fault_num);
/* kernel/messaging.c */
void hv_message_intr(struct pt_regs *, int intnum);
+#define TILE_NMI_DUMP_STACK 1 /* Dump stack for sysrq+'l' */
+
+/* kernel/process.c */
+void do_nmi_dump_stack(struct pt_regs *regs);
+
+/* kernel/traps.c */
+void do_nmi(struct pt_regs *, int fault_num, unsigned long reason);
+
/* kernel/irq.c */
void tile_dev_intr(struct pt_regs *, int intnum);
diff --git a/arch/tile/include/asm/uaccess.h b/arch/tile/include/asm/uaccess.h
index a33276bf5ca1..0a9c4265763b 100644
--- a/arch/tile/include/asm/uaccess.h
+++ b/arch/tile/include/asm/uaccess.h
@@ -65,6 +65,13 @@ static inline int is_arch_mappable_range(unsigned long addr,
#endif
/*
+ * Note that using this definition ignores is_arch_mappable_range(),
+ * so on tilepro code that uses user_addr_max() is constrained not
+ * to reference the tilepro user-interrupt region.
+ */
+#define user_addr_max() (current_thread_info()->addr_limit.seg)
+
+/*
* Test whether a block of memory is a valid user space address.
* Returns 0 if the range is valid, nonzero otherwise.
*/
@@ -471,62 +478,9 @@ copy_in_user(void __user *to, const void __user *from, unsigned long n)
#endif
-/**
- * strlen_user: - Get the size of a string in user space.
- * @str: The string to measure.
- *
- * Context: User context only. This function may sleep.
- *
- * Get the size of a NUL-terminated string in user space.
- *
- * Returns the size of the string INCLUDING the terminating NUL.
- * On exception, returns 0.
- *
- * If there is a limit on the length of a valid string, you may wish to
- * consider using strnlen_user() instead.
- */
-extern long strnlen_user_asm(const char __user *str, long n);
-static inline long __must_check strnlen_user(const char __user *str, long n)
-{
- might_fault();
- return strnlen_user_asm(str, n);
-}
-#define strlen_user(str) strnlen_user(str, LONG_MAX)
-
-/**
- * strncpy_from_user: - Copy a NUL terminated string from userspace, with less checking.
- * @dst: Destination address, in kernel space. This buffer must be at
- * least @count bytes long.
- * @src: Source address, in user space.
- * @count: Maximum number of bytes to copy, including the trailing NUL.
- *
- * Copies a NUL-terminated string from userspace to kernel space.
- * Caller must check the specified block with access_ok() before calling
- * this function.
- *
- * On success, returns the length of the string (not including the trailing
- * NUL).
- *
- * If access to userspace fails, returns -EFAULT (some data may have been
- * copied).
- *
- * If @count is smaller than the length of the string, copies @count bytes
- * and returns @count.
- */
-extern long strncpy_from_user_asm(char *dst, const char __user *src, long);
-static inline long __must_check __strncpy_from_user(
- char *dst, const char __user *src, long count)
-{
- might_fault();
- return strncpy_from_user_asm(dst, src, count);
-}
-static inline long __must_check strncpy_from_user(
- char *dst, const char __user *src, long count)
-{
- if (access_ok(VERIFY_READ, src, 1))
- return __strncpy_from_user(dst, src, count);
- return -EFAULT;
-}
+extern long strnlen_user(const char __user *str, long n);
+extern long strlen_user(const char __user *str);
+extern long strncpy_from_user(char *dst, const char __user *src, long);
/**
* clear_user: - Zero a block of memory in user space.
diff --git a/arch/tile/include/asm/word-at-a-time.h b/arch/tile/include/asm/word-at-a-time.h
new file mode 100644
index 000000000000..9e5ce0d7b292
--- /dev/null
+++ b/arch/tile/include/asm/word-at-a-time.h
@@ -0,0 +1,36 @@
+#ifndef _ASM_WORD_AT_A_TIME_H
+#define _ASM_WORD_AT_A_TIME_H
+
+#include <asm/byteorder.h>
+
+struct word_at_a_time { /* unused */ };
+#define WORD_AT_A_TIME_CONSTANTS {}
+
+/* Generate 0x01 byte values for non-zero bytes using a SIMD instruction. */
+static inline unsigned long has_zero(unsigned long val, unsigned long *data,
+ const struct word_at_a_time *c)
+{
+#ifdef __tilegx__
+ unsigned long mask = __insn_v1cmpeqi(val, 0);
+#else /* tilepro */
+ unsigned long mask = __insn_seqib(val, 0);
+#endif
+ *data = mask;
+ return mask;
+}
+
+/* These operations are both nops. */
+#define prep_zero_mask(val, data, c) (data)
+#define create_zero_mask(data) (data)
+
+/* And this operation just depends on endianness. */
+static inline long find_zero(unsigned long mask)
+{
+#ifdef __BIG_ENDIAN
+ return __builtin_clzl(mask) >> 3;
+#else
+ return __builtin_ctzl(mask) >> 3;
+#endif
+}
+
+#endif /* _ASM_WORD_AT_A_TIME_H */
diff --git a/arch/tile/include/hv/hypervisor.h b/arch/tile/include/hv/hypervisor.h
index e0e6af4e783b..f10b332b3b65 100644
--- a/arch/tile/include/hv/hypervisor.h
+++ b/arch/tile/include/hv/hypervisor.h
@@ -321,8 +321,11 @@
/** hv_console_set_ipi */
#define HV_DISPATCH_CONSOLE_SET_IPI 63
+/** hv_send_nmi */
+#define HV_DISPATCH_SEND_NMI 65
+
/** One more than the largest dispatch value */
-#define _HV_DISPATCH_END 64
+#define _HV_DISPATCH_END 66
#ifndef __ASSEMBLER__
@@ -1253,6 +1256,11 @@ void hv_downcall_dispatch(void);
#define INT_DMATLB_ACCESS_DWNCL INT_DMA_CPL
/** Device interrupt downcall interrupt vector */
#define INT_DEV_INTR_DWNCL INT_WORLD_ACCESS
+/** NMI downcall interrupt vector */
+#define INT_NMI_DWNCL 64
+
+#define HV_NMI_FLAG_FORCE 0x1 /**< Force an NMI downcall regardless of
+ the ICS bit of the client. */
#ifndef __ASSEMBLER__
@@ -1780,6 +1788,56 @@ int hv_dev_poll(int devhdl, __hv32 events, HV_IntArg intarg);
int hv_dev_poll_cancel(int devhdl);
+/** NMI information */
+typedef struct
+{
+ /** Result: negative error, or HV_NMI_RESULT_xxx. */
+ int result;
+
+ /** PC from interrupted remote core (if result != HV_NMI_RESULT_FAIL_HV). */
+ HV_VirtAddr pc;
+
+} HV_NMI_Info;
+
+/** NMI issued successfully. */
+#define HV_NMI_RESULT_OK 0
+
+/** NMI not issued: remote tile running at client PL with ICS set. */
+#define HV_NMI_RESULT_FAIL_ICS 1
+
+/** NMI not issued: remote tile waiting in hypervisor. */
+#define HV_NMI_RESULT_FAIL_HV 2
+
+/** Force an NMI downcall regardless of the ICS bit of the client. */
+#define HV_NMI_FLAG_FORCE 0x1
+
+/** Send an NMI interrupt request to a particular tile.
+ *
+ * This will cause the NMI to be issued on the remote tile regardless
+ * of the state of the client interrupt mask. However, if the remote
+ * tile is in the hypervisor, it will not execute the NMI, and
+ * HV_NMI_RESULT_FAIL_HV will be returned. Similarly, if the remote
+ * tile is in a client interrupt critical section at the time of the
+ * NMI, it will not execute the NMI, and HV_NMI_RESULT_FAIL_ICS will
+ * be returned. In this second case, however, if HV_NMI_FLAG_FORCE
+ * is set in flags, then the remote tile will enter its NMI interrupt
+ * vector regardless. Forcing the NMI vector during an interrupt
+ * critical section will mean that the client can not safely continue
+ * execution after handling the interrupt.
+ *
+ * @param tile Tile to which the NMI request is sent.
+ * @param info NMI information which is defined by and interpreted by the
+ * supervisor, is passed to the specified tile, and is
+ * stored in the SPR register SYSTEM_SAVE_{CLIENT_PL}_2 on the
+ * specified tile when entering the NMI handler routine.
+ * Typically, this parameter stores the NMI type, or an aligned
+ * VA plus some special bits, etc.
+ * @param flags Flags (HV_NMI_FLAG_xxx).
+ * @return Information about the requested NMI.
+ */
+HV_NMI_Info hv_send_nmi(HV_Coord tile, unsigned long info, __hv64 flags);
+
+
/** Scatter-gather list for preada/pwritea calls. */
typedef struct
#if CHIP_VA_WIDTH() <= 32
diff --git a/arch/tile/kernel/entry.S b/arch/tile/kernel/entry.S
index 3d9175992a20..670a3569450f 100644
--- a/arch/tile/kernel/entry.S
+++ b/arch/tile/kernel/entry.S
@@ -27,13 +27,6 @@ STD_ENTRY(current_text_addr)
{ move r0, lr; jrp lr }
STD_ENDPROC(current_text_addr)
-STD_ENTRY(dump_stack)
- { move r2, lr; lnk r1 }
- { move r4, r52; addli r1, r1, dump_stack - . }
- { move r3, sp; j _dump_stack }
- jrp lr /* keep backtracer happy */
- STD_ENDPROC(dump_stack)
-
STD_ENTRY(KBacktraceIterator_init_current)
{ move r2, lr; lnk r1 }
{ move r4, r52; addli r1, r1, KBacktraceIterator_init_current - . }
diff --git a/arch/tile/kernel/hvglue.S b/arch/tile/kernel/hvglue.S
index 2ab456622391..d78ee2ad610c 100644
--- a/arch/tile/kernel/hvglue.S
+++ b/arch/tile/kernel/hvglue.S
@@ -71,4 +71,5 @@ gensym hv_flush_all, 0x6e0, 32
gensym hv_get_ipi_pte, 0x700, 32
gensym hv_set_pte_super_shift, 0x720, 32
gensym hv_console_set_ipi, 0x7e0, 32
-gensym hv_glue_internals, 0x800, 30720
+gensym hv_send_nmi, 0x820, 32
+gensym hv_glue_internals, 0x820, 30688
diff --git a/arch/tile/kernel/hvglue_trace.c b/arch/tile/kernel/hvglue_trace.c
index 85c74ad29312..add0d71395c6 100644
--- a/arch/tile/kernel/hvglue_trace.c
+++ b/arch/tile/kernel/hvglue_trace.c
@@ -75,6 +75,7 @@
#define hv_get_ipi_pte _hv_get_ipi_pte
#define hv_set_pte_super_shift _hv_set_pte_super_shift
#define hv_console_set_ipi _hv_console_set_ipi
+#define hv_send_nmi _hv_send_nmi
#include <hv/hypervisor.h>
#undef hv_init
#undef hv_install_context
@@ -134,6 +135,7 @@
#undef hv_get_ipi_pte
#undef hv_set_pte_super_shift
#undef hv_console_set_ipi
+#undef hv_send_nmi
/*
* Provide macros based on <linux/syscalls.h> to provide a wrapper
@@ -264,3 +266,5 @@ HV_WRAP9(int, hv_flush_remote, HV_PhysAddr, cache_pa,
HV_VirtAddr, tlb_va, unsigned long, tlb_length,
unsigned long, tlb_pgsize, unsigned long*, tlb_cpumask,
HV_Remote_ASID*, asids, int, asidcount)
+HV_WRAP3(HV_NMI_Info, hv_send_nmi, HV_Coord, tile, unsigned long, info,
+ __hv64, flags)
diff --git a/arch/tile/kernel/intvec_64.S b/arch/tile/kernel/intvec_64.S
index 5b67efcecabd..800b91d3f9dc 100644
--- a/arch/tile/kernel/intvec_64.S
+++ b/arch/tile/kernel/intvec_64.S
@@ -515,6 +515,10 @@ intvec_\vecname:
.ifc \c_routine, handle_perf_interrupt
mfspr r2, AUX_PERF_COUNT_STS
.endif
+ .ifc \c_routine, do_nmi
+ mfspr r2, SPR_SYSTEM_SAVE_K_2 /* nmi type */
+ .else
+ .endif
.endif
.endif
.endif
@@ -1571,3 +1575,5 @@ intrpt_start:
/* Synthetic interrupt delivered only by the simulator */
int_hand INT_BREAKPOINT, BREAKPOINT, do_breakpoint
+ /* Synthetic interrupt delivered by hv */
+ int_hand INT_NMI_DWNCL, NMI_DWNCL, do_nmi, handle_nmi
diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c
index b403c2e3e263..a45213781ad0 100644
--- a/arch/tile/kernel/process.c
+++ b/arch/tile/kernel/process.c
@@ -27,6 +27,7 @@
#include <linux/kernel.h>
#include <linux/tracehook.h>
#include <linux/signal.h>
+#include <linux/delay.h>
#include <linux/context_tracking.h>
#include <asm/stack.h>
#include <asm/switch_to.h>
@@ -132,7 +133,6 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
(CALLEE_SAVED_REGS_COUNT - 2) * sizeof(unsigned long));
callee_regs[0] = sp; /* r30 = function */
callee_regs[1] = arg; /* r31 = arg */
- childregs->ex1 = PL_ICS_EX1(KERNEL_PL, 0);
p->thread.pc = (unsigned long) ret_from_kernel_thread;
return 0;
}
@@ -546,31 +546,141 @@ void exit_thread(void)
#endif
}
-void show_regs(struct pt_regs *regs)
+void tile_show_regs(struct pt_regs *regs)
{
- struct task_struct *tsk = validate_current();
int i;
-
- if (tsk != &corrupt_current)
- show_regs_print_info(KERN_ERR);
#ifdef __tilegx__
for (i = 0; i < 17; i++)
- pr_err(" r%-2d: " REGFMT " r%-2d: " REGFMT " r%-2d: " REGFMT "\n",
+ pr_err(" r%-2d: "REGFMT" r%-2d: "REGFMT" r%-2d: "REGFMT"\n",
i, regs->regs[i], i+18, regs->regs[i+18],
i+36, regs->regs[i+36]);
- pr_err(" r17: " REGFMT " r35: " REGFMT " tp : " REGFMT "\n",
+ pr_err(" r17: "REGFMT" r35: "REGFMT" tp : "REGFMT"\n",
regs->regs[17], regs->regs[35], regs->tp);
- pr_err(" sp : " REGFMT " lr : " REGFMT "\n", regs->sp, regs->lr);
+ pr_err(" sp : "REGFMT" lr : "REGFMT"\n", regs->sp, regs->lr);
#else
for (i = 0; i < 13; i++)
- pr_err(" r%-2d: " REGFMT " r%-2d: " REGFMT " r%-2d: " REGFMT " r%-2d: " REGFMT "\n",
+ pr_err(" r%-2d: "REGFMT" r%-2d: "REGFMT
+ " r%-2d: "REGFMT" r%-2d: "REGFMT"\n",
i, regs->regs[i], i+14, regs->regs[i+14],
i+27, regs->regs[i+27], i+40, regs->regs[i+40]);
- pr_err(" r13: " REGFMT " tp : " REGFMT " sp : " REGFMT " lr : " REGFMT "\n",
+ pr_err(" r13: "REGFMT" tp : "REGFMT" sp : "REGFMT" lr : "REGFMT"\n",
regs->regs[13], regs->tp, regs->sp, regs->lr);
#endif
- pr_err(" pc : " REGFMT " ex1: %ld faultnum: %ld\n",
- regs->pc, regs->ex1, regs->faultnum);
+ pr_err(" pc : "REGFMT" ex1: %ld faultnum: %ld flags:%s%s%s%s\n",
+ regs->pc, regs->ex1, regs->faultnum,
+ is_compat_task() ? " compat" : "",
+ (regs->flags & PT_FLAGS_DISABLE_IRQ) ? " noirq" : "",
+ !(regs->flags & PT_FLAGS_CALLER_SAVES) ? " nocallersave" : "",
+ (regs->flags & PT_FLAGS_RESTORE_REGS) ? " restoreregs" : "");
+}
+
+void show_regs(struct pt_regs *regs)
+{
+ struct KBacktraceIterator kbt;
+
+ show_regs_print_info(KERN_DEFAULT);
+ tile_show_regs(regs);
+
+ KBacktraceIterator_init(&kbt, NULL, regs);
+ tile_show_stack(&kbt);
+}
+
+/* To ensure stack dump on tiles occurs one by one. */
+static DEFINE_SPINLOCK(backtrace_lock);
+/* To ensure no backtrace occurs before all of the stack dump are done. */
+static atomic_t backtrace_cpus;
+/* The cpu mask to avoid reentrance. */
+static struct cpumask backtrace_mask;
- dump_stack_regs(regs);
+void do_nmi_dump_stack(struct pt_regs *regs)
+{
+ int is_idle = is_idle_task(current) && !in_interrupt();
+ int cpu;
+
+ nmi_enter();
+ cpu = smp_processor_id();
+ if (WARN_ON_ONCE(!cpumask_test_and_clear_cpu(cpu, &backtrace_mask)))
+ goto done;
+
+ spin_lock(&backtrace_lock);
+ if (is_idle)
+ pr_info("CPU: %d idle\n", cpu);
+ else
+ show_regs(regs);
+ spin_unlock(&backtrace_lock);
+ atomic_dec(&backtrace_cpus);
+done:
+ nmi_exit();
+}
+
+#ifdef __tilegx__
+void arch_trigger_all_cpu_backtrace(bool self)
+{
+ struct cpumask mask;
+ HV_Coord tile;
+ unsigned int timeout;
+ int cpu;
+ int ongoing;
+ HV_NMI_Info info[NR_CPUS];
+
+ ongoing = atomic_cmpxchg(&backtrace_cpus, 0, num_online_cpus() - 1);
+ if (ongoing != 0) {
+ pr_err("Trying to do all-cpu backtrace.\n");
+ pr_err("But another all-cpu backtrace is ongoing (%d cpus left)\n",
+ ongoing);
+ if (self) {
+ pr_err("Reporting the stack on this cpu only.\n");
+ dump_stack();
+ }
+ return;
+ }
+
+ cpumask_copy(&mask, cpu_online_mask);
+ cpumask_clear_cpu(smp_processor_id(), &mask);
+ cpumask_copy(&backtrace_mask, &mask);
+
+ /* Backtrace for myself first. */
+ if (self)
+ dump_stack();
+
+ /* Tentatively dump stack on remote tiles via NMI. */
+ timeout = 100;
+ while (!cpumask_empty(&mask) && timeout) {
+ for_each_cpu(cpu, &mask) {
+ tile.x = cpu_x(cpu);
+ tile.y = cpu_y(cpu);
+ info[cpu] = hv_send_nmi(tile, TILE_NMI_DUMP_STACK, 0);
+ if (info[cpu].result == HV_NMI_RESULT_OK)
+ cpumask_clear_cpu(cpu, &mask);
+ }
+
+ mdelay(10);
+ timeout--;
+ }
+
+ /* Warn about cpus stuck in ICS and decrement their counts here. */
+ if (!cpumask_empty(&mask)) {
+ for_each_cpu(cpu, &mask) {
+ switch (info[cpu].result) {
+ case HV_NMI_RESULT_FAIL_ICS:
+ pr_warn("Skipping stack dump of cpu %d in ICS at pc %#llx\n",
+ cpu, info[cpu].pc);
+ break;
+ case HV_NMI_RESULT_FAIL_HV:
+ pr_warn("Skipping stack dump of cpu %d in hypervisor\n",
+ cpu);
+ break;
+ case HV_ENOSYS:
+ pr_warn("Hypervisor too old to allow remote stack dumps.\n");
+ goto skip_for_each;
+ default: /* should not happen */
+ pr_warn("Skipping stack dump of cpu %d [%d,%#llx]\n",
+ cpu, info[cpu].result, info[cpu].pc);
+ break;
+ }
+ }
+skip_for_each:
+ atomic_sub(cpumask_weight(&mask), &backtrace_cpus);
+ }
}
+#endif /* __tilegx_ */
diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c
index d366675e4bf8..99c9ff87e018 100644
--- a/arch/tile/kernel/setup.c
+++ b/arch/tile/kernel/setup.c
@@ -71,7 +71,7 @@ static unsigned long __initdata node_percpu[MAX_NUMNODES];
* per-CPU stack and boot info.
*/
DEFINE_PER_CPU(unsigned long, boot_sp) =
- (unsigned long)init_stack + THREAD_SIZE;
+ (unsigned long)init_stack + THREAD_SIZE - STACK_TOP_DELTA;
#ifdef CONFIG_SMP
DEFINE_PER_CPU(unsigned long, boot_pc) = (unsigned long)start_kernel;
diff --git a/arch/tile/kernel/stack.c b/arch/tile/kernel/stack.c
index c42dce50acd8..35d34635e4f1 100644
--- a/arch/tile/kernel/stack.c
+++ b/arch/tile/kernel/stack.c
@@ -23,6 +23,7 @@
#include <linux/mmzone.h>
#include <linux/dcache.h>
#include <linux/fs.h>
+#include <linux/hardirq.h>
#include <linux/string.h>
#include <asm/backtrace.h>
#include <asm/page.h>
@@ -109,7 +110,7 @@ static struct pt_regs *valid_fault_handler(struct KBacktraceIterator* kbt)
if (kbt->verbose)
pr_err(" <%s while in user mode>\n", fault);
} else {
- if (kbt->verbose)
+ if (kbt->verbose && (p->pc != 0 || p->sp != 0 || p->ex1 != 0))
pr_err(" (odd fault: pc %#lx, sp %#lx, ex1 %#lx?)\n",
p->pc, p->sp, p->ex1);
return NULL;
@@ -119,10 +120,12 @@ static struct pt_regs *valid_fault_handler(struct KBacktraceIterator* kbt)
return p;
}
-/* Is the pc pointing to a sigreturn trampoline? */
-static int is_sigreturn(unsigned long pc)
+/* Is the iterator pointing to a sigreturn trampoline? */
+static int is_sigreturn(struct KBacktraceIterator *kbt)
{
- return current->mm && (pc == VDSO_SYM(&__vdso_rt_sigreturn));
+ return kbt->task->mm &&
+ (kbt->it.pc == ((ulong)kbt->task->mm->context.vdso_base +
+ (ulong)&__vdso_rt_sigreturn));
}
/* Return a pt_regs pointer for a valid signal handler frame */
@@ -131,7 +134,7 @@ static struct pt_regs *valid_sigframe(struct KBacktraceIterator* kbt,
{
BacktraceIterator *b = &kbt->it;
- if (is_sigreturn(b->pc) && b->sp < PAGE_OFFSET &&
+ if (is_sigreturn(kbt) && b->sp < PAGE_OFFSET &&
b->sp % sizeof(long) == 0) {
int retval;
pagefault_disable();
@@ -151,11 +154,6 @@ static struct pt_regs *valid_sigframe(struct KBacktraceIterator* kbt,
return NULL;
}
-static int KBacktraceIterator_is_sigreturn(struct KBacktraceIterator *kbt)
-{
- return is_sigreturn(kbt->it.pc);
-}
-
static int KBacktraceIterator_restart(struct KBacktraceIterator *kbt)
{
struct pt_regs *p;
@@ -178,7 +176,7 @@ static int KBacktraceIterator_next_item_inclusive(
{
for (;;) {
do {
- if (!KBacktraceIterator_is_sigreturn(kbt))
+ if (!is_sigreturn(kbt))
return KBT_ONGOING;
} while (backtrace_next(&kbt->it));
@@ -357,51 +355,50 @@ static void describe_addr(struct KBacktraceIterator *kbt,
*/
static bool start_backtrace(void)
{
- if (current->thread.in_backtrace) {
+ if (current_thread_info()->in_backtrace) {
pr_err("Backtrace requested while in backtrace!\n");
return false;
}
- current->thread.in_backtrace = true;
+ current_thread_info()->in_backtrace = true;
return true;
}
static void end_backtrace(void)
{
- current->thread.in_backtrace = false;
+ current_thread_info()->in_backtrace = false;
}
/*
* This method wraps the backtracer's more generic support.
* It is only invoked from the architecture-specific code; show_stack()
- * and dump_stack() (in entry.S) are architecture-independent entry points.
+ * and dump_stack() are architecture-independent entry points.
*/
-void tile_show_stack(struct KBacktraceIterator *kbt, int headers)
+void tile_show_stack(struct KBacktraceIterator *kbt)
{
int i;
int have_mmap_sem = 0;
if (!start_backtrace())
return;
- if (headers) {
- /*
- * Add a blank line since if we are called from panic(),
- * then bust_spinlocks() spit out a space in front of us
- * and it will mess up our KERN_ERR.
- */
- pr_err("Starting stack dump of tid %d, pid %d (%s) on cpu %d at cycle %lld\n",
- kbt->task->pid, kbt->task->tgid, kbt->task->comm,
- raw_smp_processor_id(), get_cycles());
- }
kbt->verbose = 1;
i = 0;
for (; !KBacktraceIterator_end(kbt); KBacktraceIterator_next(kbt)) {
char namebuf[KSYM_NAME_LEN+100];
unsigned long address = kbt->it.pc;
- /* Try to acquire the mmap_sem as we pass into userspace. */
- if (address < PAGE_OFFSET && !have_mmap_sem && kbt->task->mm)
+ /*
+ * Try to acquire the mmap_sem as we pass into userspace.
+ * If we're in an interrupt context, don't even try, since
+ * it's not safe to call e.g. d_path() from an interrupt,
+ * since it uses spin locks without disabling interrupts.
+ * Note we test "kbt->task == current", not "kbt->is_current",
+ * since we're checking that "current" will work in d_path().
+ */
+ if (kbt->task == current && address < PAGE_OFFSET &&
+ !have_mmap_sem && kbt->task->mm && !in_interrupt()) {
have_mmap_sem =
down_read_trylock(&kbt->task->mm->mmap_sem);
+ }
describe_addr(kbt, address, have_mmap_sem,
namebuf, sizeof(namebuf));
@@ -416,24 +413,12 @@ void tile_show_stack(struct KBacktraceIterator *kbt, int headers)
}
if (kbt->end == KBT_LOOP)
pr_err("Stack dump stopped; next frame identical to this one\n");
- if (headers)
- pr_err("Stack dump complete\n");
if (have_mmap_sem)
up_read(&kbt->task->mm->mmap_sem);
end_backtrace();
}
EXPORT_SYMBOL(tile_show_stack);
-
-/* This is called from show_regs() and _dump_stack() */
-void dump_stack_regs(struct pt_regs *regs)
-{
- struct KBacktraceIterator kbt;
- KBacktraceIterator_init(&kbt, NULL, regs);
- tile_show_stack(&kbt, 1);
-}
-EXPORT_SYMBOL(dump_stack_regs);
-
static struct pt_regs *regs_to_pt_regs(struct pt_regs *regs,
ulong pc, ulong lr, ulong sp, ulong r52)
{
@@ -445,11 +430,15 @@ static struct pt_regs *regs_to_pt_regs(struct pt_regs *regs,
return regs;
}
-/* This is called from dump_stack() and just converts to pt_regs */
+/* Deprecated function currently only used by kernel_double_fault(). */
void _dump_stack(int dummy, ulong pc, ulong lr, ulong sp, ulong r52)
{
+ struct KBacktraceIterator kbt;
struct pt_regs regs;
- dump_stack_regs(regs_to_pt_regs(&regs, pc, lr, sp, r52));
+
+ regs_to_pt_regs(&regs, pc, lr, sp, r52);
+ KBacktraceIterator_init(&kbt, NULL, &regs);
+ tile_show_stack(&kbt);
}
/* This is called from KBacktraceIterator_init_current() */
@@ -461,22 +450,30 @@ void _KBacktraceIterator_init_current(struct KBacktraceIterator *kbt, ulong pc,
regs_to_pt_regs(&regs, pc, lr, sp, r52));
}
-/* This is called only from kernel/sched/core.c, with esp == NULL */
+/*
+ * Called from sched_show_task() with task != NULL, or dump_stack()
+ * with task == NULL. The esp argument is always NULL.
+ */
void show_stack(struct task_struct *task, unsigned long *esp)
{
struct KBacktraceIterator kbt;
- if (task == NULL || task == current)
+ if (task == NULL || task == current) {
KBacktraceIterator_init_current(&kbt);
- else
+ KBacktraceIterator_next(&kbt); /* don't show first frame */
+ } else {
KBacktraceIterator_init(&kbt, task, NULL);
- tile_show_stack(&kbt, 0);
+ }
+ tile_show_stack(&kbt);
}
#ifdef CONFIG_STACKTRACE
/* Support generic Linux stack API too */
-void save_stack_trace_tsk(struct task_struct *task, struct stack_trace *trace)
+static void save_stack_trace_common(struct task_struct *task,
+ struct pt_regs *regs,
+ bool user,
+ struct stack_trace *trace)
{
struct KBacktraceIterator kbt;
int skip = trace->skip;
@@ -484,31 +481,57 @@ void save_stack_trace_tsk(struct task_struct *task, struct stack_trace *trace)
if (!start_backtrace())
goto done;
- if (task == NULL || task == current)
+ if (regs != NULL) {
+ KBacktraceIterator_init(&kbt, NULL, regs);
+ } else if (task == NULL || task == current) {
KBacktraceIterator_init_current(&kbt);
- else
+ skip++; /* don't show KBacktraceIterator_init_current */
+ } else {
KBacktraceIterator_init(&kbt, task, NULL);
+ }
for (; !KBacktraceIterator_end(&kbt); KBacktraceIterator_next(&kbt)) {
if (skip) {
--skip;
continue;
}
- if (i >= trace->max_entries || kbt.it.pc < PAGE_OFFSET)
+ if (i >= trace->max_entries ||
+ (!user && kbt.it.pc < PAGE_OFFSET))
break;
trace->entries[i++] = kbt.it.pc;
}
end_backtrace();
done:
+ if (i < trace->max_entries)
+ trace->entries[i++] = ULONG_MAX;
trace->nr_entries = i;
}
+
+void save_stack_trace_tsk(struct task_struct *task, struct stack_trace *trace)
+{
+ save_stack_trace_common(task, NULL, false, trace);
+}
EXPORT_SYMBOL(save_stack_trace_tsk);
void save_stack_trace(struct stack_trace *trace)
{
- save_stack_trace_tsk(NULL, trace);
+ save_stack_trace_common(NULL, NULL, false, trace);
}
EXPORT_SYMBOL_GPL(save_stack_trace);
+void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
+{
+ save_stack_trace_common(NULL, regs, false, trace);
+}
+
+void save_stack_trace_user(struct stack_trace *trace)
+{
+ /* Trace user stack if we are not a kernel thread. */
+ if (current->mm)
+ save_stack_trace_common(NULL, task_pt_regs(current),
+ true, trace);
+ else if (trace->nr_entries < trace->max_entries)
+ trace->entries[trace->nr_entries++] = ULONG_MAX;
+}
#endif
/* In entry.S */
diff --git a/arch/tile/kernel/traps.c b/arch/tile/kernel/traps.c
index 312fc134c1cb..0011a9ff0525 100644
--- a/arch/tile/kernel/traps.c
+++ b/arch/tile/kernel/traps.c
@@ -395,6 +395,21 @@ done:
exception_exit(prev_state);
}
+void do_nmi(struct pt_regs *regs, int fault_num, unsigned long reason)
+{
+ switch (reason) {
+ case TILE_NMI_DUMP_STACK:
+ do_nmi_dump_stack(regs);
+ break;
+ default:
+ panic("Unexpected do_nmi type %ld", reason);
+ return;
+ }
+}
+
+/* Deprecated function currently only used here. */
+extern void _dump_stack(int dummy, ulong pc, ulong lr, ulong sp, ulong r52);
+
void kernel_double_fault(int dummy, ulong pc, ulong lr, ulong sp, ulong r52)
{
_dump_stack(dummy, pc, lr, sp, r52);
diff --git a/arch/tile/kernel/vdso/vgettimeofday.c b/arch/tile/kernel/vdso/vgettimeofday.c
index 8bb21eda07d8..e63310c49742 100644
--- a/arch/tile/kernel/vdso/vgettimeofday.c
+++ b/arch/tile/kernel/vdso/vgettimeofday.c
@@ -67,7 +67,7 @@ static inline int do_realtime(struct vdso_data *vdso, struct timespec *ts)
u64 ns;
do {
- count = read_seqcount_begin(&vdso->tb_seq);
+ count = raw_read_seqcount_begin(&vdso->tb_seq);
ts->tv_sec = vdso->wall_time_sec;
ns = vdso->wall_time_snsec;
ns += vgetsns(vdso);
@@ -86,7 +86,7 @@ static inline int do_monotonic(struct vdso_data *vdso, struct timespec *ts)
u64 ns;
do {
- count = read_seqcount_begin(&vdso->tb_seq);
+ count = raw_read_seqcount_begin(&vdso->tb_seq);
ts->tv_sec = vdso->monotonic_time_sec;
ns = vdso->monotonic_time_snsec;
ns += vgetsns(vdso);
@@ -105,7 +105,7 @@ static inline int do_realtime_coarse(struct vdso_data *vdso,
unsigned count;
do {
- count = read_seqcount_begin(&vdso->tb_seq);
+ count = raw_read_seqcount_begin(&vdso->tb_seq);
ts->tv_sec = vdso->wall_time_coarse_sec;
ts->tv_nsec = vdso->wall_time_coarse_nsec;
} while (unlikely(read_seqcount_retry(&vdso->tb_seq, count)));
@@ -119,7 +119,7 @@ static inline int do_monotonic_coarse(struct vdso_data *vdso,
unsigned count;
do {
- count = read_seqcount_begin(&vdso->tb_seq);
+ count = raw_read_seqcount_begin(&vdso->tb_seq);
ts->tv_sec = vdso->monotonic_time_coarse_sec;
ts->tv_nsec = vdso->monotonic_time_coarse_nsec;
} while (unlikely(read_seqcount_retry(&vdso->tb_seq, count)));
@@ -137,7 +137,7 @@ struct syscall_return_value __vdso_gettimeofday(struct timeval *tv,
/* The use of the timezone is obsolete, normally tz is NULL. */
if (unlikely(tz != NULL)) {
do {
- count = read_seqcount_begin(&vdso->tz_seq);
+ count = raw_read_seqcount_begin(&vdso->tz_seq);
tz->tz_minuteswest = vdso->tz_minuteswest;
tz->tz_dsttime = vdso->tz_dsttime;
} while (unlikely(read_seqcount_retry(&vdso->tz_seq, count)));
diff --git a/arch/tile/lib/exports.c b/arch/tile/lib/exports.c
index 82733c87d67e..9d171ca4302c 100644
--- a/arch/tile/lib/exports.c
+++ b/arch/tile/lib/exports.c
@@ -18,8 +18,6 @@
/* arch/tile/lib/usercopy.S */
#include <linux/uaccess.h>
-EXPORT_SYMBOL(strnlen_user_asm);
-EXPORT_SYMBOL(strncpy_from_user_asm);
EXPORT_SYMBOL(clear_user_asm);
EXPORT_SYMBOL(flush_user_asm);
EXPORT_SYMBOL(finv_user_asm);
@@ -28,7 +26,6 @@ EXPORT_SYMBOL(finv_user_asm);
#include <linux/kernel.h>
#include <asm/processor.h>
EXPORT_SYMBOL(current_text_addr);
-EXPORT_SYMBOL(dump_stack);
/* arch/tile/kernel/head.S */
EXPORT_SYMBOL(empty_zero_page);
diff --git a/arch/tile/lib/spinlock_32.c b/arch/tile/lib/spinlock_32.c
index b34f79aada48..88c2a53362e7 100644
--- a/arch/tile/lib/spinlock_32.c
+++ b/arch/tile/lib/spinlock_32.c
@@ -65,8 +65,17 @@ EXPORT_SYMBOL(arch_spin_trylock);
void arch_spin_unlock_wait(arch_spinlock_t *lock)
{
u32 iterations = 0;
- while (arch_spin_is_locked(lock))
+ int curr = READ_ONCE(lock->current_ticket);
+ int next = READ_ONCE(lock->next_ticket);
+
+ /* Return immediately if unlocked. */
+ if (next == curr)
+ return;
+
+ /* Wait until the current locker has released the lock. */
+ do {
delay_backoff(iterations++);
+ } while (READ_ONCE(lock->current_ticket) == curr);
}
EXPORT_SYMBOL(arch_spin_unlock_wait);
diff --git a/arch/tile/lib/spinlock_64.c b/arch/tile/lib/spinlock_64.c
index d6fb9581e980..c8d1f94ff1fe 100644
--- a/arch/tile/lib/spinlock_64.c
+++ b/arch/tile/lib/spinlock_64.c
@@ -65,8 +65,17 @@ EXPORT_SYMBOL(arch_spin_trylock);
void arch_spin_unlock_wait(arch_spinlock_t *lock)
{
u32 iterations = 0;
- while (arch_spin_is_locked(lock))
+ u32 val = READ_ONCE(lock->lock);
+ u32 curr = arch_spin_current(val);
+
+ /* Return immediately if unlocked. */
+ if (arch_spin_next(val) == curr)
+ return;
+
+ /* Wait until the current locker has released the lock. */
+ do {
delay_backoff(iterations++);
+ } while (arch_spin_current(READ_ONCE(lock->lock)) == curr);
}
EXPORT_SYMBOL(arch_spin_unlock_wait);
diff --git a/arch/tile/lib/usercopy_32.S b/arch/tile/lib/usercopy_32.S
index 1bc162224638..db93ad5fae25 100644
--- a/arch/tile/lib/usercopy_32.S
+++ b/arch/tile/lib/usercopy_32.S
@@ -20,52 +20,6 @@
/* Access user memory, but use MMU to avoid propagating kernel exceptions. */
/*
- * strnlen_user_asm takes the pointer in r0, and the length bound in r1.
- * It returns the length, including the terminating NUL, or zero on exception.
- * If length is greater than the bound, returns one plus the bound.
- */
-STD_ENTRY(strnlen_user_asm)
- { bz r1, 2f; addi r3, r0, -1 } /* bias down to include NUL */
-1: { lb_u r4, r0; addi r1, r1, -1 }
- bz r4, 2f
- { bnzt r1, 1b; addi r0, r0, 1 }
-2: { sub r0, r0, r3; jrp lr }
- STD_ENDPROC(strnlen_user_asm)
- .pushsection .fixup,"ax"
-strnlen_user_fault:
- { move r0, zero; jrp lr }
- ENDPROC(strnlen_user_fault)
- .section __ex_table,"a"
- .align 4
- .word 1b, strnlen_user_fault
- .popsection
-
-/*
- * strncpy_from_user_asm takes the kernel target pointer in r0,
- * the userspace source pointer in r1, and the length bound (including
- * the trailing NUL) in r2. On success, it returns the string length
- * (not including the trailing NUL), or -EFAULT on failure.
- */
-STD_ENTRY(strncpy_from_user_asm)
- { bz r2, 2f; move r3, r0 }
-1: { lb_u r4, r1; addi r1, r1, 1; addi r2, r2, -1 }
- { sb r0, r4; addi r0, r0, 1 }
- bz r4, 2f
- bnzt r2, 1b
- { sub r0, r0, r3; jrp lr }
-2: addi r0, r0, -1 /* don't count the trailing NUL */
- { sub r0, r0, r3; jrp lr }
- STD_ENDPROC(strncpy_from_user_asm)
- .pushsection .fixup,"ax"
-strncpy_from_user_fault:
- { movei r0, -EFAULT; jrp lr }
- ENDPROC(strncpy_from_user_fault)
- .section __ex_table,"a"
- .align 4
- .word 1b, strncpy_from_user_fault
- .popsection
-
-/*
* clear_user_asm takes the user target address in r0 and the
* number of bytes to zero in r1.
* It returns the number of uncopiable bytes (hopefully zero) in r0.
diff --git a/arch/tile/lib/usercopy_64.S b/arch/tile/lib/usercopy_64.S
index b3b31a3306f8..9322dc551e91 100644
--- a/arch/tile/lib/usercopy_64.S
+++ b/arch/tile/lib/usercopy_64.S
@@ -20,52 +20,6 @@
/* Access user memory, but use MMU to avoid propagating kernel exceptions. */
/*
- * strnlen_user_asm takes the pointer in r0, and the length bound in r1.
- * It returns the length, including the terminating NUL, or zero on exception.
- * If length is greater than the bound, returns one plus the bound.
- */
-STD_ENTRY(strnlen_user_asm)
- { beqz r1, 2f; addi r3, r0, -1 } /* bias down to include NUL */
-1: { ld1u r4, r0; addi r1, r1, -1 }
- beqz r4, 2f
- { bnezt r1, 1b; addi r0, r0, 1 }
-2: { sub r0, r0, r3; jrp lr }
- STD_ENDPROC(strnlen_user_asm)
- .pushsection .fixup,"ax"
-strnlen_user_fault:
- { move r0, zero; jrp lr }
- ENDPROC(strnlen_user_fault)
- .section __ex_table,"a"
- .align 8
- .quad 1b, strnlen_user_fault
- .popsection
-
-/*
- * strncpy_from_user_asm takes the kernel target pointer in r0,
- * the userspace source pointer in r1, and the length bound (including
- * the trailing NUL) in r2. On success, it returns the string length
- * (not including the trailing NUL), or -EFAULT on failure.
- */
-STD_ENTRY(strncpy_from_user_asm)
- { beqz r2, 2f; move r3, r0 }
-1: { ld1u r4, r1; addi r1, r1, 1; addi r2, r2, -1 }
- { st1 r0, r4; addi r0, r0, 1 }
- beqz r4, 2f
- bnezt r2, 1b
- { sub r0, r0, r3; jrp lr }
-2: addi r0, r0, -1 /* don't count the trailing NUL */
- { sub r0, r0, r3; jrp lr }
- STD_ENDPROC(strncpy_from_user_asm)
- .pushsection .fixup,"ax"
-strncpy_from_user_fault:
- { movei r0, -EFAULT; jrp lr }
- ENDPROC(strncpy_from_user_fault)
- .section __ex_table,"a"
- .align 8
- .quad 1b, strncpy_from_user_fault
- .popsection
-
-/*
* clear_user_asm takes the user target address in r0 and the
* number of bytes to zero in r1.
* It returns the number of uncopiable bytes (hopefully zero) in r0.
diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c
index 3f4f58d34a92..13eac59bf16a 100644
--- a/arch/tile/mm/fault.c
+++ b/arch/tile/mm/fault.c
@@ -699,11 +699,10 @@ struct intvec_state do_page_fault_ics(struct pt_regs *regs, int fault_num,
* interrupt away appropriately and return immediately. We can't do
* page faults for user code while in kernel mode.
*/
-void do_page_fault(struct pt_regs *regs, int fault_num,
- unsigned long address, unsigned long write)
+static inline void __do_page_fault(struct pt_regs *regs, int fault_num,
+ unsigned long address, unsigned long write)
{
int is_page_fault;
- enum ctx_state prev_state = exception_enter();
#ifdef CONFIG_KPROBES
/*
@@ -713,7 +712,7 @@ void do_page_fault(struct pt_regs *regs, int fault_num,
*/
if (notify_die(DIE_PAGE_FAULT, "page fault", regs, -1,
regs->faultnum, SIGSEGV) == NOTIFY_STOP)
- goto done;
+ return;
#endif
#ifdef __tilegx__
@@ -835,18 +834,22 @@ void do_page_fault(struct pt_regs *regs, int fault_num,
async->is_fault = is_page_fault;
async->is_write = write;
async->address = address;
- goto done;
+ return;
}
}
#endif
handle_page_fault(regs, fault_num, is_page_fault, address, write);
+}
-done:
+void do_page_fault(struct pt_regs *regs, int fault_num,
+ unsigned long address, unsigned long write)
+{
+ enum ctx_state prev_state = exception_enter();
+ __do_page_fault(regs, fault_num, address, write);
exception_exit(prev_state);
}
-
#if CHIP_HAS_TILE_DMA()
/*
* This routine effectively re-issues asynchronous page faults
diff --git a/arch/um/drivers/hostaudio_kern.c b/arch/um/drivers/hostaudio_kern.c
index 9b90fdc4b151..f6b911cc3923 100644
--- a/arch/um/drivers/hostaudio_kern.c
+++ b/arch/um/drivers/hostaudio_kern.c
@@ -185,9 +185,9 @@ static int hostaudio_open(struct inode *inode, struct file *file)
int ret;
#ifdef DEBUG
- kparam_block_sysfs_write(dsp);
+ kernel_param_lock(THIS_MODULE);
printk(KERN_DEBUG "hostaudio: open called (host: %s)\n", dsp);
- kparam_unblock_sysfs_write(dsp);
+ kernel_param_unlock(THIS_MODULE);
#endif
state = kmalloc(sizeof(struct hostaudio_state), GFP_KERNEL);
@@ -199,11 +199,11 @@ static int hostaudio_open(struct inode *inode, struct file *file)
if (file->f_mode & FMODE_WRITE)
w = 1;
- kparam_block_sysfs_write(dsp);
+ kernel_param_lock(THIS_MODULE);
mutex_lock(&hostaudio_mutex);
ret = os_open_file(dsp, of_set_rw(OPENFLAGS(), r, w), 0);
mutex_unlock(&hostaudio_mutex);
- kparam_unblock_sysfs_write(dsp);
+ kernel_param_unlock(THIS_MODULE);
if (ret < 0) {
kfree(state);
@@ -260,17 +260,17 @@ static int hostmixer_open_mixdev(struct inode *inode, struct file *file)
if (file->f_mode & FMODE_WRITE)
w = 1;
- kparam_block_sysfs_write(mixer);
+ kernel_param_lock(THIS_MODULE);
mutex_lock(&hostaudio_mutex);
ret = os_open_file(mixer, of_set_rw(OPENFLAGS(), r, w), 0);
mutex_unlock(&hostaudio_mutex);
- kparam_unblock_sysfs_write(mixer);
+ kernel_param_unlock(THIS_MODULE);
if (ret < 0) {
- kparam_block_sysfs_write(dsp);
+ kernel_param_lock(THIS_MODULE);
printk(KERN_ERR "hostaudio_open_mixdev failed to open '%s', "
"err = %d\n", dsp, -ret);
- kparam_unblock_sysfs_write(dsp);
+ kernel_param_unlock(THIS_MODULE);
kfree(state);
return ret;
}
@@ -326,10 +326,10 @@ MODULE_LICENSE("GPL");
static int __init hostaudio_init_module(void)
{
- __kernel_param_lock();
+ kernel_param_lock(THIS_MODULE);
printk(KERN_INFO "UML Audio Relay (host dsp = %s, host mixer = %s)\n",
dsp, mixer);
- __kernel_param_unlock();
+ kernel_param_unlock(THIS_MODULE);
module_data.dev_audio = register_sound_dsp(&hostaudio_fops, -1);
if (module_data.dev_audio < 0) {
diff --git a/arch/x86/configs/xen.config b/arch/x86/configs/xen.config
new file mode 100644
index 000000000000..d9fc7139fd46
--- /dev/null
+++ b/arch/x86/configs/xen.config
@@ -0,0 +1,28 @@
+# global x86 required specific stuff
+# On 32-bit HIGHMEM4G is not allowed
+CONFIG_HIGHMEM64G=y
+CONFIG_64BIT=y
+
+# These enable us to allow some of the
+# not so generic stuff below
+CONFIG_HYPERVISOR_GUEST=y
+CONFIG_PCI=y
+CONFIG_PCI_MSI=y
+CONFIG_X86_MCE=y
+CONFIG_ACPI_PROCESSOR=y
+CONFIG_CPU_FREQ=y
+
+# x86 xen specific config options
+CONFIG_XEN_PVH=y
+CONFIG_XEN_MAX_DOMAIN_MEMORY=500
+CONFIG_XEN_SAVE_RESTORE=y
+# CONFIG_XEN_DEBUG_FS is not set
+CONFIG_XEN_MCE_LOG=y
+CONFIG_XEN_ACPI_PROCESSOR=m
+# x86 specific backend drivers
+CONFIG_XEN_PCIDEV_BACKEND=m
+# x86 specific frontend drivers
+CONFIG_XEN_PCIDEV_FRONTEND=m
+# depends on MEMORY_HOTPLUG, arm64 doesn't enable this yet,
+# move to generic config if it ever does.
+CONFIG_XEN_BALLOON_MEMORY_HOTPLUG=y
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 2bfc8a7c88c1..dccad38b59a8 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -1537,7 +1537,7 @@ static void __exit aesni_exit(void)
crypto_fpu_exit();
}
-module_init(aesni_init);
+late_initcall(aesni_init);
module_exit(aesni_exit);
MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, Intel AES-NI instructions optimized");
diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c
index a4f62e6f2db2..03d518e499a6 100644
--- a/arch/x86/kvm/mmu_audit.c
+++ b/arch/x86/kvm/mmu_audit.c
@@ -297,7 +297,7 @@ static int mmu_audit_set(const char *val, const struct kernel_param *kp)
return 0;
}
-static struct kernel_param_ops audit_param_ops = {
+static const struct kernel_param_ops audit_param_ops = {
.set = mmu_audit_set,
.get = param_get_bool,
};
diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c
index 7488cafab955..020c101c255f 100644
--- a/arch/x86/platform/uv/uv_nmi.c
+++ b/arch/x86/platform/uv/uv_nmi.c
@@ -104,7 +104,7 @@ static int param_set_local64(const char *val, const struct kernel_param *kp)
return 0;
}
-static struct kernel_param_ops param_ops_local64 = {
+static const struct kernel_param_ops param_ops_local64 = {
.get = param_get_local64,
.set = param_set_local64,
};
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index 55b6f15dac90..dda653ce7b24 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -326,8 +326,8 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
goto out_put_request;
}
- ret = -EFAULT;
- if (blk_fill_sghdr_rq(q, rq, hdr, mode))
+ ret = blk_fill_sghdr_rq(q, rq, hdr, mode);
+ if (ret < 0)
goto out_free_cdb;
ret = 0;
diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c
index fcb7807ea8b7..10561ce16ed1 100644
--- a/drivers/acpi/resource.c
+++ b/drivers/acpi/resource.c
@@ -660,8 +660,10 @@ static int add_region_before(u64 start, u64 end, u8 space_id,
return -ENOMEM;
error = request_range(start, end, space_id, flags, desc);
- if (error)
+ if (error) {
+ kfree(reg);
return error;
+ }
reg->start = start;
reg->end = end;
diff --git a/drivers/base/property.c b/drivers/base/property.c
index e645852396ba..f3f6d167f3f1 100644
--- a/drivers/base/property.c
+++ b/drivers/base/property.c
@@ -129,9 +129,9 @@ EXPORT_SYMBOL_GPL(device_property_present);
bool fwnode_property_present(struct fwnode_handle *fwnode, const char *propname)
{
if (is_of_node(fwnode))
- return of_property_read_bool(of_node(fwnode), propname);
+ return of_property_read_bool(to_of_node(fwnode), propname);
else if (is_acpi_node(fwnode))
- return !acpi_dev_prop_get(acpi_node(fwnode), propname, NULL);
+ return !acpi_dev_prop_get(to_acpi_node(fwnode), propname, NULL);
return !!pset_prop_get(to_pset(fwnode), propname);
}
@@ -286,10 +286,10 @@ EXPORT_SYMBOL_GPL(device_property_read_string);
({ \
int _ret_; \
if (is_of_node(_fwnode_)) \
- _ret_ = OF_DEV_PROP_READ_ARRAY(of_node(_fwnode_), _propname_, \
+ _ret_ = OF_DEV_PROP_READ_ARRAY(to_of_node(_fwnode_), _propname_, \
_type_, _val_, _nval_); \
else if (is_acpi_node(_fwnode_)) \
- _ret_ = acpi_dev_prop_read(acpi_node(_fwnode_), _propname_, \
+ _ret_ = acpi_dev_prop_read(to_acpi_node(_fwnode_), _propname_, \
_proptype_, _val_, _nval_); \
else \
_ret_ = pset_prop_read_array(to_pset(_fwnode_), _propname_, \
@@ -425,11 +425,11 @@ int fwnode_property_read_string_array(struct fwnode_handle *fwnode,
{
if (is_of_node(fwnode))
return val ?
- of_property_read_string_array(of_node(fwnode), propname,
- val, nval) :
- of_property_count_strings(of_node(fwnode), propname);
+ of_property_read_string_array(to_of_node(fwnode),
+ propname, val, nval) :
+ of_property_count_strings(to_of_node(fwnode), propname);
else if (is_acpi_node(fwnode))
- return acpi_dev_prop_read(acpi_node(fwnode), propname,
+ return acpi_dev_prop_read(to_acpi_node(fwnode), propname,
DEV_PROP_STRING, val, nval);
return pset_prop_read_array(to_pset(fwnode), propname,
@@ -456,9 +456,9 @@ int fwnode_property_read_string(struct fwnode_handle *fwnode,
const char *propname, const char **val)
{
if (is_of_node(fwnode))
- return of_property_read_string(of_node(fwnode), propname, val);
+ return of_property_read_string(to_of_node(fwnode), propname, val);
else if (is_acpi_node(fwnode))
- return acpi_dev_prop_read(acpi_node(fwnode), propname,
+ return acpi_dev_prop_read(to_acpi_node(fwnode), propname,
DEV_PROP_STRING, val, 1);
return -ENXIO;
@@ -476,13 +476,13 @@ struct fwnode_handle *device_get_next_child_node(struct device *dev,
if (IS_ENABLED(CONFIG_OF) && dev->of_node) {
struct device_node *node;
- node = of_get_next_available_child(dev->of_node, of_node(child));
+ node = of_get_next_available_child(dev->of_node, to_of_node(child));
if (node)
return &node->fwnode;
} else if (IS_ENABLED(CONFIG_ACPI)) {
struct acpi_device *node;
- node = acpi_get_next_child(dev, acpi_node(child));
+ node = acpi_get_next_child(dev, to_acpi_node(child));
if (node)
return acpi_fwnode_handle(node);
}
@@ -501,7 +501,7 @@ EXPORT_SYMBOL_GPL(device_get_next_child_node);
void fwnode_handle_put(struct fwnode_handle *fwnode)
{
if (is_of_node(fwnode))
- of_node_put(of_node(fwnode));
+ of_node_put(to_of_node(fwnode));
}
EXPORT_SYMBOL_GPL(fwnode_handle_put);
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index 6f9b7534928e..69de41a87b74 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -99,7 +99,7 @@ static int null_set_queue_mode(const char *str, const struct kernel_param *kp)
return null_param_store_val(str, &queue_mode, NULL_Q_BIO, NULL_Q_MQ);
}
-static struct kernel_param_ops null_queue_mode_param_ops = {
+static const struct kernel_param_ops null_queue_mode_param_ops = {
.set = null_set_queue_mode,
.get = param_get_int,
};
@@ -127,7 +127,7 @@ static int null_set_irqmode(const char *str, const struct kernel_param *kp)
NULL_IRQ_TIMER);
}
-static struct kernel_param_ops null_irqmode_param_ops = {
+static const struct kernel_param_ops null_irqmode_param_ops = {
.set = null_set_irqmode,
.get = param_get_int,
};
diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index e5112714188f..34338d7438f5 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -193,6 +193,13 @@ static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
return 0;
}
+static void nvme_admin_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
+{
+ struct nvme_queue *nvmeq = hctx->driver_data;
+
+ nvmeq->tags = NULL;
+}
+
static int nvme_admin_init_request(void *data, struct request *req,
unsigned int hctx_idx, unsigned int rq_idx,
unsigned int numa_node)
@@ -606,7 +613,10 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx,
return;
}
if (req->cmd_type == REQ_TYPE_DRV_PRIV) {
- req->errors = status;
+ if (cmd_rq->ctx == CMD_CTX_CANCELLED)
+ req->errors = -EINTR;
+ else
+ req->errors = status;
} else {
req->errors = nvme_error_status(status);
}
@@ -1161,12 +1171,13 @@ static int adapter_delete_sq(struct nvme_dev *dev, u16 sqid)
int nvme_identify_ctrl(struct nvme_dev *dev, struct nvme_id_ctrl **id)
{
- struct nvme_command c = {
- .identify.opcode = nvme_admin_identify,
- .identify.cns = cpu_to_le32(1),
- };
+ struct nvme_command c = { };
int error;
+ /* gcc-4.4.4 (at least) has issues with initializers and anon unions */
+ c.identify.opcode = nvme_admin_identify;
+ c.identify.cns = cpu_to_le32(1);
+
*id = kmalloc(sizeof(struct nvme_id_ctrl), GFP_KERNEL);
if (!*id)
return -ENOMEM;
@@ -1181,12 +1192,13 @@ int nvme_identify_ctrl(struct nvme_dev *dev, struct nvme_id_ctrl **id)
int nvme_identify_ns(struct nvme_dev *dev, unsigned nsid,
struct nvme_id_ns **id)
{
- struct nvme_command c = {
- .identify.opcode = nvme_admin_identify,
- .identify.nsid = cpu_to_le32(nsid),
- };
+ struct nvme_command c = { };
int error;
+ /* gcc-4.4.4 (at least) has issues with initializers and anon unions */
+ c.identify.opcode = nvme_admin_identify,
+ c.identify.nsid = cpu_to_le32(nsid),
+
*id = kmalloc(sizeof(struct nvme_id_ns), GFP_KERNEL);
if (!*id)
return -ENOMEM;
@@ -1230,14 +1242,14 @@ int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11,
int nvme_get_log_page(struct nvme_dev *dev, struct nvme_smart_log **log)
{
- struct nvme_command c = {
- .common.opcode = nvme_admin_get_log_page,
- .common.nsid = cpu_to_le32(0xFFFFFFFF),
- .common.cdw10[0] = cpu_to_le32(
+ struct nvme_command c = { };
+ int error;
+
+ c.common.opcode = nvme_admin_get_log_page,
+ c.common.nsid = cpu_to_le32(0xFFFFFFFF),
+ c.common.cdw10[0] = cpu_to_le32(
(((sizeof(struct nvme_smart_log) / 4) - 1) << 16) |
NVME_LOG_SMART),
- };
- int error;
*log = kmalloc(sizeof(struct nvme_smart_log), GFP_KERNEL);
if (!*log)
@@ -1606,6 +1618,7 @@ static struct blk_mq_ops nvme_mq_admin_ops = {
.queue_rq = nvme_queue_rq,
.map_queue = blk_mq_map_queue,
.init_hctx = nvme_admin_init_hctx,
+ .exit_hctx = nvme_admin_exit_hctx,
.init_request = nvme_admin_init_request,
.timeout = nvme_timeout,
};
@@ -1648,6 +1661,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
}
if (!blk_get_queue(dev->admin_q)) {
nvme_dev_remove_admin(dev);
+ dev->admin_q = NULL;
return -ENODEV;
}
} else
@@ -2349,19 +2363,20 @@ static int nvme_dev_add(struct nvme_dev *dev)
}
kfree(ctrl);
- dev->tagset.ops = &nvme_mq_ops;
- dev->tagset.nr_hw_queues = dev->online_queues - 1;
- dev->tagset.timeout = NVME_IO_TIMEOUT;
- dev->tagset.numa_node = dev_to_node(dev->dev);
- dev->tagset.queue_depth =
+ if (!dev->tagset.tags) {
+ dev->tagset.ops = &nvme_mq_ops;
+ dev->tagset.nr_hw_queues = dev->online_queues - 1;
+ dev->tagset.timeout = NVME_IO_TIMEOUT;
+ dev->tagset.numa_node = dev_to_node(dev->dev);
+ dev->tagset.queue_depth =
min_t(int, dev->q_depth, BLK_MQ_MAX_DEPTH) - 1;
- dev->tagset.cmd_size = nvme_cmd_size(dev);
- dev->tagset.flags = BLK_MQ_F_SHOULD_MERGE;
- dev->tagset.driver_data = dev;
-
- if (blk_mq_alloc_tag_set(&dev->tagset))
- return 0;
+ dev->tagset.cmd_size = nvme_cmd_size(dev);
+ dev->tagset.flags = BLK_MQ_F_SHOULD_MERGE;
+ dev->tagset.driver_data = dev;
+ if (blk_mq_alloc_tag_set(&dev->tagset))
+ return 0;
+ }
schedule_work(&dev->scan_work);
return 0;
}
@@ -2734,8 +2749,10 @@ static void nvme_free_dev(struct kref *kref)
put_device(dev->device);
nvme_free_namespaces(dev);
nvme_release_instance(dev);
- blk_mq_free_tag_set(&dev->tagset);
- blk_put_queue(dev->admin_q);
+ if (dev->tagset.tags)
+ blk_mq_free_tag_set(&dev->tagset);
+ if (dev->admin_q)
+ blk_put_queue(dev->admin_q);
kfree(dev->queues);
kfree(dev->entry);
kfree(dev);
@@ -2866,6 +2883,9 @@ static int nvme_dev_start(struct nvme_dev *dev)
free_tags:
nvme_dev_remove_admin(dev);
+ blk_put_queue(dev->admin_q);
+ dev->admin_q = NULL;
+ dev->queues[0]->tags = NULL;
disable:
nvme_disable_queue(dev, 0);
nvme_dev_list_remove(dev);
@@ -2907,25 +2927,43 @@ static int nvme_dev_resume(struct nvme_dev *dev)
spin_unlock(&dev_list_lock);
} else {
nvme_unfreeze_queues(dev);
- schedule_work(&dev->scan_work);
+ nvme_dev_add(dev);
nvme_set_irq_hints(dev);
}
return 0;
}
+static void nvme_dead_ctrl(struct nvme_dev *dev)
+{
+ dev_warn(dev->dev, "Device failed to resume\n");
+ kref_get(&dev->kref);
+ if (IS_ERR(kthread_run(nvme_remove_dead_ctrl, dev, "nvme%d",
+ dev->instance))) {
+ dev_err(dev->dev,
+ "Failed to start controller remove task\n");
+ kref_put(&dev->kref, nvme_free_dev);
+ }
+}
+
static void nvme_dev_reset(struct nvme_dev *dev)
{
+ bool in_probe = work_busy(&dev->probe_work);
+
nvme_dev_shutdown(dev);
- if (nvme_dev_resume(dev)) {
- dev_warn(dev->dev, "Device failed to resume\n");
- kref_get(&dev->kref);
- if (IS_ERR(kthread_run(nvme_remove_dead_ctrl, dev, "nvme%d",
- dev->instance))) {
- dev_err(dev->dev,
- "Failed to start controller remove task\n");
- kref_put(&dev->kref, nvme_free_dev);
- }
+
+ /* Synchronize with device probe so that work will see failure status
+ * and exit gracefully without trying to schedule another reset */
+ flush_work(&dev->probe_work);
+
+ /* Fail this device if reset occured during probe to avoid
+ * infinite initialization loops. */
+ if (in_probe) {
+ nvme_dead_ctrl(dev);
+ return;
}
+ /* Schedule device resume asynchronously so the reset work is available
+ * to cleanup errors that may occur during reinitialization */
+ schedule_work(&dev->probe_work);
}
static void nvme_reset_failed_dev(struct work_struct *ws)
@@ -2957,6 +2995,7 @@ static int nvme_reset(struct nvme_dev *dev)
if (!ret) {
flush_work(&dev->reset_work);
+ flush_work(&dev->probe_work);
return 0;
}
@@ -3053,26 +3092,9 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
static void nvme_async_probe(struct work_struct *work)
{
struct nvme_dev *dev = container_of(work, struct nvme_dev, probe_work);
- int result;
- result = nvme_dev_start(dev);
- if (result)
- goto reset;
-
- if (dev->online_queues > 1)
- result = nvme_dev_add(dev);
- if (result)
- goto reset;
-
- nvme_set_irq_hints(dev);
- return;
- reset:
- spin_lock(&dev_list_lock);
- if (!work_busy(&dev->reset_work)) {
- dev->reset_workfn = nvme_reset_failed_dev;
- queue_work(nvme_workq, &dev->reset_work);
- }
- spin_unlock(&dev_list_lock);
+ if (nvme_dev_resume(dev) && !work_busy(&dev->reset_work))
+ nvme_dead_ctrl(dev);
}
static void nvme_reset_notify(struct pci_dev *pdev, bool prepare)
@@ -3104,8 +3126,8 @@ static void nvme_remove(struct pci_dev *pdev)
flush_work(&dev->reset_work);
flush_work(&dev->scan_work);
device_remove_file(dev->device, &dev_attr_reset_controller);
- nvme_dev_shutdown(dev);
nvme_dev_remove(dev);
+ nvme_dev_shutdown(dev);
nvme_dev_remove_admin(dev);
device_destroy(nvme_class, MKDEV(nvme_char_major, dev->instance));
nvme_free_queues(dev, 0);
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 713fc9ff1149..ced96777b677 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -84,6 +84,13 @@ MODULE_PARM_DESC(max_persistent_grants,
"Maximum number of grants to map persistently");
/*
+ * Maximum order of pages to be used for the shared ring between front and
+ * backend, 4KB page granularity is used.
+ */
+unsigned int xen_blkif_max_ring_order = XENBUS_MAX_RING_PAGE_ORDER;
+module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, S_IRUGO);
+MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring");
+/*
* The LRU mechanism to clean the lists of persistent grants needs to
* be executed periodically. The time interval between consecutive executions
* of the purge mechanism is set in ms.
@@ -729,7 +736,7 @@ static void xen_blkbk_unmap_and_respond(struct pending_req *req)
struct grant_page **pages = req->segments;
unsigned int invcount;
- invcount = xen_blkbk_unmap_prepare(blkif, pages, req->nr_pages,
+ invcount = xen_blkbk_unmap_prepare(blkif, pages, req->nr_segs,
req->unmap, req->unmap_pages);
work->data = req;
@@ -915,7 +922,7 @@ static int xen_blkbk_map_seg(struct pending_req *pending_req)
int rc;
rc = xen_blkbk_map(pending_req->blkif, pending_req->segments,
- pending_req->nr_pages,
+ pending_req->nr_segs,
(pending_req->operation != BLKIF_OP_READ));
return rc;
@@ -931,7 +938,7 @@ static int xen_blkbk_parse_indirect(struct blkif_request *req,
int indirect_grefs, rc, n, nseg, i;
struct blkif_request_segment *segments = NULL;
- nseg = pending_req->nr_pages;
+ nseg = pending_req->nr_segs;
indirect_grefs = INDIRECT_PAGES(nseg);
BUG_ON(indirect_grefs > BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST);
@@ -1251,7 +1258,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
pending_req->id = req->u.rw.id;
pending_req->operation = req_operation;
pending_req->status = BLKIF_RSP_OKAY;
- pending_req->nr_pages = nseg;
+ pending_req->nr_segs = nseg;
if (req->operation != BLKIF_OP_INDIRECT) {
preq.dev = req->u.rw.handle;
@@ -1372,7 +1379,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
fail_flush:
xen_blkbk_unmap(blkif, pending_req->segments,
- pending_req->nr_pages);
+ pending_req->nr_segs);
fail_response:
/* Haven't submitted any bio's yet. */
make_response(blkif, req->u.rw.id, req_operation, BLKIF_RSP_ERROR);
@@ -1438,6 +1445,12 @@ static int __init xen_blkif_init(void)
if (!xen_domain())
return -ENODEV;
+ if (xen_blkif_max_ring_order > XENBUS_MAX_RING_PAGE_ORDER) {
+ pr_info("Invalid max_ring_order (%d), will use default max: %d.\n",
+ xen_blkif_max_ring_order, XENBUS_MAX_RING_PAGE_ORDER);
+ xen_blkif_max_ring_order = XENBUS_MAX_RING_PAGE_ORDER;
+ }
+
rc = xen_blkif_interface_init();
if (rc)
goto failed_init;
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index f620b5d3f77c..45a044a53d1e 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -44,6 +44,7 @@
#include <xen/interface/io/blkif.h>
#include <xen/interface/io/protocols.h>
+extern unsigned int xen_blkif_max_ring_order;
/*
* This is the maximum number of segments that would be allowed in indirect
* requests. This value will also be passed to the frontend.
@@ -248,7 +249,7 @@ struct backend_info;
#define PERSISTENT_GNT_WAS_ACTIVE 1
/* Number of requests that we can fit in a ring */
-#define XEN_BLKIF_REQS 32
+#define XEN_BLKIF_REQS_PER_PAGE 32
struct persistent_gnt {
struct page *page;
@@ -320,6 +321,7 @@ struct xen_blkif {
struct work_struct free_work;
/* Thread shutdown wait queue. */
wait_queue_head_t shutdown_wq;
+ unsigned int nr_ring_pages;
};
struct seg_buf {
@@ -343,7 +345,7 @@ struct grant_page {
struct pending_req {
struct xen_blkif *blkif;
u64 id;
- int nr_pages;
+ int nr_segs;
atomic_t pendcnt;
unsigned short operation;
int status;
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 6ab69ad61ee1..deb3f001791f 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -25,6 +25,7 @@
/* Enlarge the array size in order to fully show blkback name. */
#define BLKBACK_NAME_LEN (20)
+#define RINGREF_NAME_LEN (20)
struct backend_info {
struct xenbus_device *dev;
@@ -124,8 +125,6 @@ static void xen_update_blkif_status(struct xen_blkif *blkif)
static struct xen_blkif *xen_blkif_alloc(domid_t domid)
{
struct xen_blkif *blkif;
- struct pending_req *req, *n;
- int i, j;
BUILD_BUG_ON(MAX_INDIRECT_PAGES > BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST);
@@ -151,55 +150,15 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
INIT_LIST_HEAD(&blkif->pending_free);
INIT_WORK(&blkif->free_work, xen_blkif_deferred_free);
-
- for (i = 0; i < XEN_BLKIF_REQS; i++) {
- req = kzalloc(sizeof(*req), GFP_KERNEL);
- if (!req)
- goto fail;
- list_add_tail(&req->free_list,
- &blkif->pending_free);
- for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
- req->segments[j] = kzalloc(sizeof(*req->segments[0]),
- GFP_KERNEL);
- if (!req->segments[j])
- goto fail;
- }
- for (j = 0; j < MAX_INDIRECT_PAGES; j++) {
- req->indirect_pages[j] = kzalloc(sizeof(*req->indirect_pages[0]),
- GFP_KERNEL);
- if (!req->indirect_pages[j])
- goto fail;
- }
- }
spin_lock_init(&blkif->pending_free_lock);
init_waitqueue_head(&blkif->pending_free_wq);
init_waitqueue_head(&blkif->shutdown_wq);
return blkif;
-
-fail:
- list_for_each_entry_safe(req, n, &blkif->pending_free, free_list) {
- list_del(&req->free_list);
- for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
- if (!req->segments[j])
- break;
- kfree(req->segments[j]);
- }
- for (j = 0; j < MAX_INDIRECT_PAGES; j++) {
- if (!req->indirect_pages[j])
- break;
- kfree(req->indirect_pages[j]);
- }
- kfree(req);
- }
-
- kmem_cache_free(xen_blkif_cachep, blkif);
-
- return ERR_PTR(-ENOMEM);
}
-static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t gref,
- unsigned int evtchn)
+static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t *gref,
+ unsigned int nr_grefs, unsigned int evtchn)
{
int err;
@@ -207,7 +166,7 @@ static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t gref,
if (blkif->irq)
return 0;
- err = xenbus_map_ring_valloc(blkif->be->dev, &gref, 1,
+ err = xenbus_map_ring_valloc(blkif->be->dev, gref, nr_grefs,
&blkif->blk_ring);
if (err < 0)
return err;
@@ -217,21 +176,21 @@ static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t gref,
{
struct blkif_sring *sring;
sring = (struct blkif_sring *)blkif->blk_ring;
- BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE);
+ BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE * nr_grefs);
break;
}
case BLKIF_PROTOCOL_X86_32:
{
struct blkif_x86_32_sring *sring_x86_32;
sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring;
- BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE);
+ BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE * nr_grefs);
break;
}
case BLKIF_PROTOCOL_X86_64:
{
struct blkif_x86_64_sring *sring_x86_64;
sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring;
- BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE);
+ BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE * nr_grefs);
break;
}
default:
@@ -312,7 +271,7 @@ static void xen_blkif_free(struct xen_blkif *blkif)
i++;
}
- WARN_ON(i != XEN_BLKIF_REQS);
+ WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages));
kmem_cache_free(xen_blkif_cachep, blkif);
}
@@ -597,6 +556,11 @@ static int xen_blkbk_probe(struct xenbus_device *dev,
if (err)
goto fail;
+ err = xenbus_printf(XBT_NIL, dev->nodename, "max-ring-page-order", "%u",
+ xen_blkif_max_ring_order);
+ if (err)
+ pr_warn("%s write out 'max-ring-page-order' failed\n", __func__);
+
err = xenbus_switch_state(dev, XenbusStateInitWait);
if (err)
goto fail;
@@ -860,22 +824,66 @@ again:
static int connect_ring(struct backend_info *be)
{
struct xenbus_device *dev = be->dev;
- unsigned long ring_ref;
- unsigned int evtchn;
+ unsigned int ring_ref[XENBUS_MAX_RING_PAGES];
+ unsigned int evtchn, nr_grefs, ring_page_order;
unsigned int pers_grants;
char protocol[64] = "";
- int err;
+ struct pending_req *req, *n;
+ int err, i, j;
pr_debug("%s %s\n", __func__, dev->otherend);
- err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu",
- &ring_ref, "event-channel", "%u", &evtchn, NULL);
- if (err) {
- xenbus_dev_fatal(dev, err,
- "reading %s/ring-ref and event-channel",
+ err = xenbus_scanf(XBT_NIL, dev->otherend, "event-channel", "%u",
+ &evtchn);
+ if (err != 1) {
+ err = -EINVAL;
+ xenbus_dev_fatal(dev, err, "reading %s/event-channel",
dev->otherend);
return err;
}
+ pr_info("event-channel %u\n", evtchn);
+
+ err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-page-order", "%u",
+ &ring_page_order);
+ if (err != 1) {
+ err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-ref",
+ "%u", &ring_ref[0]);
+ if (err != 1) {
+ err = -EINVAL;
+ xenbus_dev_fatal(dev, err, "reading %s/ring-ref",
+ dev->otherend);
+ return err;
+ }
+ nr_grefs = 1;
+ pr_info("%s:using single page: ring-ref %d\n", dev->otherend,
+ ring_ref[0]);
+ } else {
+ unsigned int i;
+
+ if (ring_page_order > xen_blkif_max_ring_order) {
+ err = -EINVAL;
+ xenbus_dev_fatal(dev, err, "%s/request %d ring page order exceed max:%d",
+ dev->otherend, ring_page_order,
+ xen_blkif_max_ring_order);
+ return err;
+ }
+
+ nr_grefs = 1 << ring_page_order;
+ for (i = 0; i < nr_grefs; i++) {
+ char ring_ref_name[RINGREF_NAME_LEN];
+
+ snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i);
+ err = xenbus_scanf(XBT_NIL, dev->otherend, ring_ref_name,
+ "%u", &ring_ref[i]);
+ if (err != 1) {
+ err = -EINVAL;
+ xenbus_dev_fatal(dev, err, "reading %s/%s",
+ dev->otherend, ring_ref_name);
+ return err;
+ }
+ pr_info("ring-ref%u: %u\n", i, ring_ref[i]);
+ }
+ }
be->blkif->blk_protocol = BLKIF_PROTOCOL_DEFAULT;
err = xenbus_gather(XBT_NIL, dev->otherend, "protocol",
@@ -900,20 +908,55 @@ static int connect_ring(struct backend_info *be)
be->blkif->vbd.feature_gnt_persistent = pers_grants;
be->blkif->vbd.overflow_max_grants = 0;
+ be->blkif->nr_ring_pages = nr_grefs;
- pr_info("ring-ref %ld, event-channel %d, protocol %d (%s) %s\n",
- ring_ref, evtchn, be->blkif->blk_protocol, protocol,
+ pr_info("ring-pages:%d, event-channel %d, protocol %d (%s) %s\n",
+ nr_grefs, evtchn, be->blkif->blk_protocol, protocol,
pers_grants ? "persistent grants" : "");
+ for (i = 0; i < nr_grefs * XEN_BLKIF_REQS_PER_PAGE; i++) {
+ req = kzalloc(sizeof(*req), GFP_KERNEL);
+ if (!req)
+ goto fail;
+ list_add_tail(&req->free_list, &be->blkif->pending_free);
+ for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
+ req->segments[j] = kzalloc(sizeof(*req->segments[0]), GFP_KERNEL);
+ if (!req->segments[j])
+ goto fail;
+ }
+ for (j = 0; j < MAX_INDIRECT_PAGES; j++) {
+ req->indirect_pages[j] = kzalloc(sizeof(*req->indirect_pages[0]),
+ GFP_KERNEL);
+ if (!req->indirect_pages[j])
+ goto fail;
+ }
+ }
+
/* Map the shared frame, irq etc. */
- err = xen_blkif_map(be->blkif, ring_ref, evtchn);
+ err = xen_blkif_map(be->blkif, ring_ref, nr_grefs, evtchn);
if (err) {
- xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u",
- ring_ref, evtchn);
+ xenbus_dev_fatal(dev, err, "mapping ring-ref port %u", evtchn);
return err;
}
return 0;
+
+fail:
+ list_for_each_entry_safe(req, n, &be->blkif->pending_free, free_list) {
+ list_del(&req->free_list);
+ for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
+ if (!req->segments[j])
+ break;
+ kfree(req->segments[j]);
+ }
+ for (j = 0; j < MAX_INDIRECT_PAGES; j++) {
+ if (!req->indirect_pages[j])
+ break;
+ kfree(req->indirect_pages[j]);
+ }
+ kfree(req);
+ }
+ return -ENOMEM;
}
static const struct xenbus_device_id xen_blkbk_ids[] = {
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 2c61cf8c6f61..6d89ed35d80c 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -98,7 +98,21 @@ static unsigned int xen_blkif_max_segments = 32;
module_param_named(max, xen_blkif_max_segments, int, S_IRUGO);
MODULE_PARM_DESC(max, "Maximum amount of segments in indirect requests (default is 32)");
-#define BLK_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE)
+/*
+ * Maximum order of pages to be used for the shared ring between front and
+ * backend, 4KB page granularity is used.
+ */
+static unsigned int xen_blkif_max_ring_order;
+module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, S_IRUGO);
+MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring");
+
+#define BLK_RING_SIZE(info) __CONST_RING_SIZE(blkif, PAGE_SIZE * (info)->nr_ring_pages)
+#define BLK_MAX_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE * XENBUS_MAX_RING_PAGES)
+/*
+ * ring-ref%i i=(-1UL) would take 11 characters + 'ring-ref' is 8, so 19
+ * characters are enough. Define to 20 to keep consist with backend.
+ */
+#define RINGREF_NAME_LEN (20)
/*
* We have one of these per vbd, whether ide, scsi or 'other'. They
@@ -114,13 +128,14 @@ struct blkfront_info
int vdevice;
blkif_vdev_t handle;
enum blkif_state connected;
- int ring_ref;
+ int ring_ref[XENBUS_MAX_RING_PAGES];
+ unsigned int nr_ring_pages;
struct blkif_front_ring ring;
unsigned int evtchn, irq;
struct request_queue *rq;
struct work_struct work;
struct gnttab_free_callback callback;
- struct blk_shadow shadow[BLK_RING_SIZE];
+ struct blk_shadow shadow[BLK_MAX_RING_SIZE];
struct list_head grants;
struct list_head indirect_pages;
unsigned int persistent_gnts_c;
@@ -139,8 +154,6 @@ static unsigned int nr_minors;
static unsigned long *minors;
static DEFINE_SPINLOCK(minor_lock);
-#define MAXIMUM_OUTSTANDING_BLOCK_REQS \
- (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)
#define GRANT_INVALID_REF 0
#define PARTS_PER_DISK 16
@@ -170,7 +183,7 @@ static int blkfront_setup_indirect(struct blkfront_info *info);
static int get_id_from_freelist(struct blkfront_info *info)
{
unsigned long free = info->shadow_free;
- BUG_ON(free >= BLK_RING_SIZE);
+ BUG_ON(free >= BLK_RING_SIZE(info));
info->shadow_free = info->shadow[free].req.u.rw.id;
info->shadow[free].req.u.rw.id = 0x0fffffee; /* debug */
return free;
@@ -983,7 +996,7 @@ static void blkif_free(struct blkfront_info *info, int suspend)
}
}
- for (i = 0; i < BLK_RING_SIZE; i++) {
+ for (i = 0; i < BLK_RING_SIZE(info); i++) {
/*
* Clear persistent grants present in requests already
* on the shared ring
@@ -1033,12 +1046,15 @@ free_shadow:
flush_work(&info->work);
/* Free resources associated with old device channel. */
- if (info->ring_ref != GRANT_INVALID_REF) {
- gnttab_end_foreign_access(info->ring_ref, 0,
- (unsigned long)info->ring.sring);
- info->ring_ref = GRANT_INVALID_REF;
- info->ring.sring = NULL;
+ for (i = 0; i < info->nr_ring_pages; i++) {
+ if (info->ring_ref[i] != GRANT_INVALID_REF) {
+ gnttab_end_foreign_access(info->ring_ref[i], 0, 0);
+ info->ring_ref[i] = GRANT_INVALID_REF;
+ }
}
+ free_pages((unsigned long)info->ring.sring, get_order(info->nr_ring_pages * PAGE_SIZE));
+ info->ring.sring = NULL;
+
if (info->irq)
unbind_from_irqhandler(info->irq, info);
info->evtchn = info->irq = 0;
@@ -1058,12 +1074,6 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
s->req.u.indirect.nr_segments : s->req.u.rw.nr_segments;
if (bret->operation == BLKIF_OP_READ && info->feature_persistent) {
- /*
- * Copy the data received from the backend into the bvec.
- * Since bv_offset can be different than 0, and bv_len different
- * than PAGE_SIZE, we have to keep track of the current offset,
- * to be sure we are copying the data from the right shared page.
- */
for_each_sg(s->sg, sg, nseg, i) {
BUG_ON(sg->offset + sg->length > PAGE_SIZE);
shared_data = kmap_atomic(
@@ -1157,7 +1167,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
* never have given to it (we stamp it up to BLK_RING_SIZE -
* look in get_id_from_freelist.
*/
- if (id >= BLK_RING_SIZE) {
+ if (id >= BLK_RING_SIZE(info)) {
WARN(1, "%s: response to %s has incorrect id (%ld)\n",
info->gd->disk_name, op_name(bret->operation), id);
/* We can't safely get the 'struct request' as
@@ -1245,26 +1255,30 @@ static int setup_blkring(struct xenbus_device *dev,
struct blkfront_info *info)
{
struct blkif_sring *sring;
- grant_ref_t gref;
- int err;
+ int err, i;
+ unsigned long ring_size = info->nr_ring_pages * PAGE_SIZE;
+ grant_ref_t gref[XENBUS_MAX_RING_PAGES];
- info->ring_ref = GRANT_INVALID_REF;
+ for (i = 0; i < info->nr_ring_pages; i++)
+ info->ring_ref[i] = GRANT_INVALID_REF;
- sring = (struct blkif_sring *)__get_free_page(GFP_NOIO | __GFP_HIGH);
+ sring = (struct blkif_sring *)__get_free_pages(GFP_NOIO | __GFP_HIGH,
+ get_order(ring_size));
if (!sring) {
xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
return -ENOMEM;
}
SHARED_RING_INIT(sring);
- FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
+ FRONT_RING_INIT(&info->ring, sring, ring_size);
- err = xenbus_grant_ring(dev, info->ring.sring, 1, &gref);
+ err = xenbus_grant_ring(dev, info->ring.sring, info->nr_ring_pages, gref);
if (err < 0) {
- free_page((unsigned long)sring);
+ free_pages((unsigned long)sring, get_order(ring_size));
info->ring.sring = NULL;
goto fail;
}
- info->ring_ref = gref;
+ for (i = 0; i < info->nr_ring_pages; i++)
+ info->ring_ref[i] = gref[i];
err = xenbus_alloc_evtchn(dev, &info->evtchn);
if (err)
@@ -1292,7 +1306,18 @@ static int talk_to_blkback(struct xenbus_device *dev,
{
const char *message = NULL;
struct xenbus_transaction xbt;
- int err;
+ int err, i;
+ unsigned int max_page_order = 0;
+ unsigned int ring_page_order = 0;
+
+ err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
+ "max-ring-page-order", "%u", &max_page_order);
+ if (err != 1)
+ info->nr_ring_pages = 1;
+ else {
+ ring_page_order = min(xen_blkif_max_ring_order, max_page_order);
+ info->nr_ring_pages = 1 << ring_page_order;
+ }
/* Create shared ring, alloc event channel. */
err = setup_blkring(dev, info);
@@ -1306,11 +1331,32 @@ again:
goto destroy_blkring;
}
- err = xenbus_printf(xbt, dev->nodename,
- "ring-ref", "%u", info->ring_ref);
- if (err) {
- message = "writing ring-ref";
- goto abort_transaction;
+ if (info->nr_ring_pages == 1) {
+ err = xenbus_printf(xbt, dev->nodename,
+ "ring-ref", "%u", info->ring_ref[0]);
+ if (err) {
+ message = "writing ring-ref";
+ goto abort_transaction;
+ }
+ } else {
+ err = xenbus_printf(xbt, dev->nodename,
+ "ring-page-order", "%u", ring_page_order);
+ if (err) {
+ message = "writing ring-page-order";
+ goto abort_transaction;
+ }
+
+ for (i = 0; i < info->nr_ring_pages; i++) {
+ char ring_ref_name[RINGREF_NAME_LEN];
+
+ snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i);
+ err = xenbus_printf(xbt, dev->nodename, ring_ref_name,
+ "%u", info->ring_ref[i]);
+ if (err) {
+ message = "writing ring-ref";
+ goto abort_transaction;
+ }
+ }
}
err = xenbus_printf(xbt, dev->nodename,
"event-channel", "%u", info->evtchn);
@@ -1338,6 +1384,9 @@ again:
goto destroy_blkring;
}
+ for (i = 0; i < BLK_RING_SIZE(info); i++)
+ info->shadow[i].req.u.rw.id = i+1;
+ info->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff;
xenbus_switch_state(dev, XenbusStateInitialised);
return 0;
@@ -1361,7 +1410,7 @@ again:
static int blkfront_probe(struct xenbus_device *dev,
const struct xenbus_device_id *id)
{
- int err, vdevice, i;
+ int err, vdevice;
struct blkfront_info *info;
/* FIXME: Use dynamic device id if this is not set. */
@@ -1422,21 +1471,10 @@ static int blkfront_probe(struct xenbus_device *dev,
info->connected = BLKIF_STATE_DISCONNECTED;
INIT_WORK(&info->work, blkif_restart_queue);
- for (i = 0; i < BLK_RING_SIZE; i++)
- info->shadow[i].req.u.rw.id = i+1;
- info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff;
-
/* Front end dir is a number, which is used as the id. */
info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0);
dev_set_drvdata(&dev->dev, info);
- err = talk_to_blkback(dev, info);
- if (err) {
- kfree(info);
- dev_set_drvdata(&dev->dev, NULL);
- return err;
- }
-
return 0;
}
@@ -1476,10 +1514,10 @@ static int blkif_recover(struct blkfront_info *info)
/* Stage 2: Set up free list. */
memset(&info->shadow, 0, sizeof(info->shadow));
- for (i = 0; i < BLK_RING_SIZE; i++)
+ for (i = 0; i < BLK_RING_SIZE(info); i++)
info->shadow[i].req.u.rw.id = i+1;
info->shadow_free = info->ring.req_prod_pvt;
- info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff;
+ info->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff;
rc = blkfront_setup_indirect(info);
if (rc) {
@@ -1491,7 +1529,7 @@ static int blkif_recover(struct blkfront_info *info)
blk_queue_max_segments(info->rq, segs);
bio_list_init(&bio_list);
INIT_LIST_HEAD(&requests);
- for (i = 0; i < BLK_RING_SIZE; i++) {
+ for (i = 0; i < BLK_RING_SIZE(info); i++) {
/* Not in use? */
if (!copy[i].request)
continue;
@@ -1697,7 +1735,7 @@ static int blkfront_setup_indirect(struct blkfront_info *info)
segs = info->max_indirect_segments;
}
- err = fill_grant_buffer(info, (segs + INDIRECT_GREFS(segs)) * BLK_RING_SIZE);
+ err = fill_grant_buffer(info, (segs + INDIRECT_GREFS(segs)) * BLK_RING_SIZE(info));
if (err)
goto out_of_memory;
@@ -1707,7 +1745,7 @@ static int blkfront_setup_indirect(struct blkfront_info *info)
* grants, we need to allocate a set of pages that can be
* used for mapping indirect grefs
*/
- int num = INDIRECT_GREFS(segs) * BLK_RING_SIZE;
+ int num = INDIRECT_GREFS(segs) * BLK_RING_SIZE(info);
BUG_ON(!list_empty(&info->indirect_pages));
for (i = 0; i < num; i++) {
@@ -1718,7 +1756,7 @@ static int blkfront_setup_indirect(struct blkfront_info *info)
}
}
- for (i = 0; i < BLK_RING_SIZE; i++) {
+ for (i = 0; i < BLK_RING_SIZE(info); i++) {
info->shadow[i].grants_used = kzalloc(
sizeof(info->shadow[i].grants_used[0]) * segs,
GFP_NOIO);
@@ -1740,7 +1778,7 @@ static int blkfront_setup_indirect(struct blkfront_info *info)
return 0;
out_of_memory:
- for (i = 0; i < BLK_RING_SIZE; i++) {
+ for (i = 0; i < BLK_RING_SIZE(info); i++) {
kfree(info->shadow[i].grants_used);
info->shadow[i].grants_used = NULL;
kfree(info->shadow[i].sg);
@@ -1906,8 +1944,15 @@ static void blkback_changed(struct xenbus_device *dev,
dev_dbg(&dev->dev, "blkfront:blkback_changed to state %d.\n", backend_state);
switch (backend_state) {
- case XenbusStateInitialising:
case XenbusStateInitWait:
+ if (dev->state != XenbusStateInitialising)
+ break;
+ if (talk_to_blkback(dev, info)) {
+ kfree(info);
+ dev_set_drvdata(&dev->dev, NULL);
+ break;
+ }
+ case XenbusStateInitialising:
case XenbusStateInitialised:
case XenbusStateReconfiguring:
case XenbusStateReconfigured:
@@ -2091,6 +2136,12 @@ static int __init xlblk_init(void)
if (!xen_domain())
return -ENODEV;
+ if (xen_blkif_max_ring_order > XENBUS_MAX_RING_PAGE_ORDER) {
+ pr_info("Invalid max_ring_order (%d), will use default max: %d.\n",
+ xen_blkif_max_ring_order, XENBUS_MAX_RING_PAGE_ORDER);
+ xen_blkif_max_ring_order = 0;
+ }
+
if (!xen_has_pv_disk_devices())
return -ENODEV;
diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c
index 37b8be7cba95..0ac3bd1a5497 100644
--- a/drivers/char/ipmi/ipmi_watchdog.c
+++ b/drivers/char/ipmi/ipmi_watchdog.c
@@ -208,7 +208,7 @@ static int set_param_timeout(const char *val, const struct kernel_param *kp)
return rv;
}
-static struct kernel_param_ops param_ops_timeout = {
+static const struct kernel_param_ops param_ops_timeout = {
.set = set_param_timeout,
.get = param_get_int,
};
@@ -270,14 +270,14 @@ static int set_param_wdog_ifnum(const char *val, const struct kernel_param *kp)
return 0;
}
-static struct kernel_param_ops param_ops_wdog_ifnum = {
+static const struct kernel_param_ops param_ops_wdog_ifnum = {
.set = set_param_wdog_ifnum,
.get = param_get_int,
};
#define param_check_wdog_ifnum param_check_int
-static struct kernel_param_ops param_ops_str = {
+static const struct kernel_param_ops param_ops_str = {
.set = set_param_str,
.get = get_param_str,
};
diff --git a/drivers/clocksource/exynos_mct.c b/drivers/clocksource/exynos_mct.c
index 935b05936dbd..9064ff743598 100644
--- a/drivers/clocksource/exynos_mct.c
+++ b/drivers/clocksource/exynos_mct.c
@@ -462,15 +462,12 @@ static int exynos4_local_timer_setup(struct clock_event_device *evt)
exynos4_mct_write(TICK_BASE_CNT, mevt->base + MCT_L_TCNTB_OFFSET);
if (mct_int_type == MCT_INT_SPI) {
- evt->irq = mct_irqs[MCT_L0_IRQ + cpu];
- if (request_irq(evt->irq, exynos4_mct_tick_isr,
- IRQF_TIMER | IRQF_NOBALANCING,
- evt->name, mevt)) {
- pr_err("exynos-mct: cannot register IRQ %d\n",
- evt->irq);
+
+ if (evt->irq == -1)
return -EIO;
- }
- irq_force_affinity(mct_irqs[MCT_L0_IRQ + cpu], cpumask_of(cpu));
+
+ irq_force_affinity(evt->irq, cpumask_of(cpu));
+ enable_irq(evt->irq);
} else {
enable_percpu_irq(mct_irqs[MCT_L0_IRQ], 0);
}
@@ -483,10 +480,12 @@ static int exynos4_local_timer_setup(struct clock_event_device *evt)
static void exynos4_local_timer_stop(struct clock_event_device *evt)
{
evt->set_mode(CLOCK_EVT_MODE_UNUSED, evt);
- if (mct_int_type == MCT_INT_SPI)
- free_irq(evt->irq, this_cpu_ptr(&percpu_mct_tick));
- else
+ if (mct_int_type == MCT_INT_SPI) {
+ if (evt->irq != -1)
+ disable_irq_nosync(evt->irq);
+ } else {
disable_percpu_irq(mct_irqs[MCT_L0_IRQ]);
+ }
}
static int exynos4_mct_cpu_notify(struct notifier_block *self,
@@ -518,7 +517,7 @@ static struct notifier_block exynos4_mct_cpu_nb = {
static void __init exynos4_timer_resources(struct device_node *np, void __iomem *base)
{
- int err;
+ int err, cpu;
struct mct_clock_event_device *mevt = this_cpu_ptr(&percpu_mct_tick);
struct clk *mct_clk, *tick_clk;
@@ -545,7 +544,25 @@ static void __init exynos4_timer_resources(struct device_node *np, void __iomem
WARN(err, "MCT: can't request IRQ %d (%d)\n",
mct_irqs[MCT_L0_IRQ], err);
} else {
- irq_set_affinity(mct_irqs[MCT_L0_IRQ], cpumask_of(0));
+ for_each_possible_cpu(cpu) {
+ int mct_irq = mct_irqs[MCT_L0_IRQ + cpu];
+ struct mct_clock_event_device *pcpu_mevt =
+ per_cpu_ptr(&percpu_mct_tick, cpu);
+
+ pcpu_mevt->evt.irq = -1;
+
+ irq_set_status_flags(mct_irq, IRQ_NOAUTOEN);
+ if (request_irq(mct_irq,
+ exynos4_mct_tick_isr,
+ IRQF_TIMER | IRQF_NOBALANCING,
+ pcpu_mevt->name, pcpu_mevt)) {
+ pr_err("exynos-mct: cannot register IRQ (cpu%d)\n",
+ cpu);
+
+ continue;
+ }
+ pcpu_mevt->evt.irq = mct_irq;
+ }
}
err = register_cpu_notifier(&exynos4_mct_cpu_nb);
diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c
index 1e3ef5ec4784..845bafcfa792 100644
--- a/drivers/cpuidle/cpuidle-powernv.c
+++ b/drivers/cpuidle/cpuidle-powernv.c
@@ -67,6 +67,8 @@ static int nap_loop(struct cpuidle_device *dev,
return index;
}
+/* Register for fastsleep only in oneshot mode of broadcast */
+#ifdef CONFIG_TICK_ONESHOT
static int fastsleep_loop(struct cpuidle_device *dev,
struct cpuidle_driver *drv,
int index)
@@ -90,7 +92,7 @@ static int fastsleep_loop(struct cpuidle_device *dev,
return index;
}
-
+#endif
/*
* States for dedicated partition case.
*/
@@ -216,7 +218,14 @@ static int powernv_add_idle_states(void)
powernv_states[nr_idle_states].flags = 0;
powernv_states[nr_idle_states].target_residency = 100;
powernv_states[nr_idle_states].enter = &nap_loop;
- } else if (flags[i] & OPAL_PM_SLEEP_ENABLED ||
+ }
+
+ /*
+ * All cpuidle states with CPUIDLE_FLAG_TIMER_STOP set must come
+ * within this config dependency check.
+ */
+#ifdef CONFIG_TICK_ONESHOT
+ if (flags[i] & OPAL_PM_SLEEP_ENABLED ||
flags[i] & OPAL_PM_SLEEP_ENABLED_ER1) {
/* Add FASTSLEEP state */
strcpy(powernv_states[nr_idle_states].name, "FastSleep");
@@ -225,7 +234,7 @@ static int powernv_add_idle_states(void)
powernv_states[nr_idle_states].target_residency = 300000;
powernv_states[nr_idle_states].enter = &fastsleep_loop;
}
-
+#endif
powernv_states[nr_idle_states].exit_latency =
((unsigned int)latency_ns[i]) / 1000;
diff --git a/drivers/crypto/qat/qat_common/adf_accel_engine.c b/drivers/crypto/qat/qat_common/adf_accel_engine.c
index 7f8b66c915ed..fdda8e7ae302 100644
--- a/drivers/crypto/qat/qat_common/adf_accel_engine.c
+++ b/drivers/crypto/qat/qat_common/adf_accel_engine.c
@@ -88,10 +88,7 @@ void adf_ae_fw_release(struct adf_accel_dev *accel_dev)
qat_uclo_del_uof_obj(loader_data->fw_loader);
qat_hal_deinit(loader_data->fw_loader);
-
- if (loader_data->uof_fw)
- release_firmware(loader_data->uof_fw);
-
+ release_firmware(loader_data->uof_fw);
loader_data->uof_fw = NULL;
loader_data->fw_loader = NULL;
}
diff --git a/drivers/crypto/qat/qat_common/adf_transport.c b/drivers/crypto/qat/qat_common/adf_transport.c
index ccec327489da..db2926bff8a5 100644
--- a/drivers/crypto/qat/qat_common/adf_transport.c
+++ b/drivers/crypto/qat/qat_common/adf_transport.c
@@ -449,7 +449,7 @@ static int adf_init_bank(struct adf_accel_dev *accel_dev,
err:
for (i = 0; i < ADF_ETR_MAX_RINGS_PER_BANK; i++) {
ring = &bank->rings[i];
- if (hw_data->tx_rings_mask & (1 << i) && ring->inflights)
+ if (hw_data->tx_rings_mask & (1 << i))
kfree(ring->inflights);
}
return -ENOMEM;
diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c
index 220ee49633e4..b8576fd6bd0e 100644
--- a/drivers/dma/dmatest.c
+++ b/drivers/dma/dmatest.c
@@ -120,7 +120,7 @@ static struct dmatest_info {
static int dmatest_run_set(const char *val, const struct kernel_param *kp);
static int dmatest_run_get(char *val, const struct kernel_param *kp);
-static struct kernel_param_ops run_ops = {
+static const struct kernel_param_ops run_ops = {
.set = dmatest_run_set,
.get = dmatest_run_get,
};
@@ -195,7 +195,7 @@ static int dmatest_wait_get(char *val, const struct kernel_param *kp)
return param_get_bool(val, kp);
}
-static struct kernel_param_ops wait_ops = {
+static const struct kernel_param_ops wait_ops = {
.get = dmatest_wait_get,
.set = param_set_bool,
};
diff --git a/drivers/gpio/gpio-bcm-kona.c b/drivers/gpio/gpio-bcm-kona.c
index 8333f878919c..40343fa92c7b 100644
--- a/drivers/gpio/gpio-bcm-kona.c
+++ b/drivers/gpio/gpio-bcm-kona.c
@@ -657,8 +657,9 @@ static int bcm_kona_gpio_probe(struct platform_device *pdev)
}
for (i = 0; i < kona_gpio->num_bank; i++) {
bank = &kona_gpio->banks[i];
- irq_set_chained_handler(bank->irq, bcm_kona_gpio_irq_handler);
- irq_set_handler_data(bank->irq, bank);
+ irq_set_chained_handler_and_data(bank->irq,
+ bcm_kona_gpio_irq_handler,
+ bank);
}
spin_lock_init(&kona_gpio->lock);
diff --git a/drivers/gpio/gpio-dwapb.c b/drivers/gpio/gpio-dwapb.c
index 58faf04fce5d..55fa9853a7f2 100644
--- a/drivers/gpio/gpio-dwapb.c
+++ b/drivers/gpio/gpio-dwapb.c
@@ -348,8 +348,8 @@ static void dwapb_configure_irqs(struct dwapb_gpio *gpio,
irq_gc->chip_types[1].handler = handle_edge_irq;
if (!pp->irq_shared) {
- irq_set_chained_handler(pp->irq, dwapb_irq_handler);
- irq_set_handler_data(pp->irq, gpio);
+ irq_set_chained_handler_and_data(pp->irq, dwapb_irq_handler,
+ gpio);
} else {
/*
* Request a shared IRQ since where MFD would have devices
diff --git a/drivers/gpio/gpio-msic.c b/drivers/gpio/gpio-msic.c
index 01acf0a8cdb1..7bcfb87a5fa6 100644
--- a/drivers/gpio/gpio-msic.c
+++ b/drivers/gpio/gpio-msic.c
@@ -309,8 +309,7 @@ static int platform_msic_gpio_probe(struct platform_device *pdev)
&msic_irqchip,
handle_simple_irq);
}
- irq_set_chained_handler(mg->irq, msic_gpio_irq_handler);
- irq_set_handler_data(mg->irq, mg);
+ irq_set_chained_handler_and_data(mg->irq, msic_gpio_irq_handler, mg);
return 0;
err:
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index be42ab368a80..bf4bd1d120c3 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -2052,14 +2052,14 @@ struct gpio_desc *fwnode_get_named_gpiod(struct fwnode_handle *fwnode,
if (is_of_node(fwnode)) {
enum of_gpio_flags flags;
- desc = of_get_named_gpiod_flags(of_node(fwnode), propname, 0,
+ desc = of_get_named_gpiod_flags(to_of_node(fwnode), propname, 0,
&flags);
if (!IS_ERR(desc))
active_low = flags & OF_GPIO_ACTIVE_LOW;
} else if (is_acpi_node(fwnode)) {
struct acpi_gpio_info info;
- desc = acpi_get_gpiod_by_index(acpi_node(fwnode), propname, 0,
+ desc = acpi_get_gpiod_by_index(to_acpi_node(fwnode), propname, 0,
&info);
if (!IS_ERR(desc))
active_low = info.active_low;
diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c
index e29b02ca9e91..f086ef387475 100644
--- a/drivers/ide/ide.c
+++ b/drivers/ide/ide.c
@@ -199,7 +199,7 @@ static int ide_set_dev_param_mask(const char *s, const struct kernel_param *kp)
return 0;
}
-static struct kernel_param_ops param_ops_ide_dev_mask = {
+static const struct kernel_param_ops param_ops_ide_dev_mask = {
.set = ide_set_dev_param_mask
};
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index eada8f758ad4..267dc4f75502 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -99,7 +99,7 @@ module_param(register_always, bool, 0444);
MODULE_PARM_DESC(register_always,
"Use memory registration even for contiguous memory regions");
-static struct kernel_param_ops srp_tmo_ops;
+static const struct kernel_param_ops srp_tmo_ops;
static int srp_reconnect_delay = 10;
module_param_cb(reconnect_delay, &srp_tmo_ops, &srp_reconnect_delay,
@@ -184,7 +184,7 @@ out:
return res;
}
-static struct kernel_param_ops srp_tmo_ops = {
+static const struct kernel_param_ops srp_tmo_ops = {
.get = srp_tmo_get,
.set = srp_tmo_set,
};
diff --git a/drivers/input/misc/ati_remote2.c b/drivers/input/misc/ati_remote2.c
index f63341f20b91..cfd58e87da26 100644
--- a/drivers/input/misc/ati_remote2.c
+++ b/drivers/input/misc/ati_remote2.c
@@ -94,7 +94,7 @@ static int ati_remote2_get_mode_mask(char *buffer,
static unsigned int channel_mask = ATI_REMOTE2_MAX_CHANNEL_MASK;
#define param_check_channel_mask(name, p) __param_check(name, p, unsigned int)
-static struct kernel_param_ops param_ops_channel_mask = {
+static const struct kernel_param_ops param_ops_channel_mask = {
.set = ati_remote2_set_channel_mask,
.get = ati_remote2_get_channel_mask,
};
@@ -103,7 +103,7 @@ MODULE_PARM_DESC(channel_mask, "Bitmask of channels to accept <15:Channel16>...<
static unsigned int mode_mask = ATI_REMOTE2_MAX_MODE_MASK;
#define param_check_mode_mask(name, p) __param_check(name, p, unsigned int)
-static struct kernel_param_ops param_ops_mode_mask = {
+static const struct kernel_param_ops param_ops_mode_mask = {
.set = ati_remote2_set_mode_mask,
.get = ati_remote2_get_mode_mask,
};
diff --git a/drivers/input/mouse/psmouse-base.c b/drivers/input/mouse/psmouse-base.c
index 7c4ba43d253e..ec3477036150 100644
--- a/drivers/input/mouse/psmouse-base.c
+++ b/drivers/input/mouse/psmouse-base.c
@@ -47,7 +47,7 @@ MODULE_LICENSE("GPL");
static unsigned int psmouse_max_proto = PSMOUSE_AUTO;
static int psmouse_set_maxproto(const char *val, const struct kernel_param *);
static int psmouse_get_maxproto(char *buffer, const struct kernel_param *kp);
-static struct kernel_param_ops param_ops_proto_abbrev = {
+static const struct kernel_param_ops param_ops_proto_abbrev = {
.set = psmouse_set_maxproto,
.get = psmouse_get_maxproto,
};
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index d3e5e9abe3b6..a57e9b749895 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -117,6 +117,7 @@ struct kmem_cache *amd_iommu_irq_cache;
static void update_domain(struct protection_domain *domain);
static int alloc_passthrough_domain(void);
+static int protection_domain_init(struct protection_domain *domain);
/****************************************************************************
*
@@ -1881,12 +1882,9 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void)
if (!dma_dom)
return NULL;
- spin_lock_init(&dma_dom->domain.lock);
-
- dma_dom->domain.id = domain_id_alloc();
- if (dma_dom->domain.id == 0)
+ if (protection_domain_init(&dma_dom->domain))
goto free_dma_dom;
- INIT_LIST_HEAD(&dma_dom->domain.dev_list);
+
dma_dom->domain.mode = PAGE_MODE_2_LEVEL;
dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL);
dma_dom->domain.flags = PD_DMA_OPS_MASK;
@@ -2916,6 +2914,18 @@ static void protection_domain_free(struct protection_domain *domain)
kfree(domain);
}
+static int protection_domain_init(struct protection_domain *domain)
+{
+ spin_lock_init(&domain->lock);
+ mutex_init(&domain->api_lock);
+ domain->id = domain_id_alloc();
+ if (!domain->id)
+ return -ENOMEM;
+ INIT_LIST_HEAD(&domain->dev_list);
+
+ return 0;
+}
+
static struct protection_domain *protection_domain_alloc(void)
{
struct protection_domain *domain;
@@ -2924,12 +2934,8 @@ static struct protection_domain *protection_domain_alloc(void)
if (!domain)
return NULL;
- spin_lock_init(&domain->lock);
- mutex_init(&domain->api_lock);
- domain->id = domain_id_alloc();
- if (!domain->id)
+ if (protection_domain_init(domain))
goto out_err;
- INIT_LIST_HEAD(&domain->dev_list);
add_domain_to_list(domain);
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index f14130121298..8e9ec81ce4bb 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -1389,8 +1389,7 @@ static void arm_smmu_domain_free(struct iommu_domain *domain)
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
struct arm_smmu_device *smmu = smmu_domain->smmu;
- if (smmu_domain->pgtbl_ops)
- free_io_pgtable_ops(smmu_domain->pgtbl_ops);
+ free_io_pgtable_ops(smmu_domain->pgtbl_ops);
/* Free the CD and ASID, if we allocated them */
if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index dce041b1c139..4cd0c29cb585 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -1566,7 +1566,7 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
return -ENODEV;
}
- if ((id & ID0_S1TS) && ((smmu->version == 1) || (id & ID0_ATOSNS))) {
+ if ((id & ID0_S1TS) && ((smmu->version == 1) || !(id & ID0_ATOSNS))) {
smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
dev_notice(smmu->dev, "\taddress translation ops\n");
}
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 49e7542510d1..f286090931cc 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -847,13 +847,24 @@ static int add_iommu_group(struct device *dev, void *data)
{
struct iommu_callback_data *cb = data;
const struct iommu_ops *ops = cb->ops;
+ int ret;
if (!ops->add_device)
return 0;
WARN_ON(dev->iommu_group);
- return ops->add_device(dev);
+ ret = ops->add_device(dev);
+
+ /*
+ * We ignore -ENODEV errors for now, as they just mean that the
+ * device is not translated by an IOMMU. We still care about
+ * other errors and fail to initialize when they happen.
+ */
+ if (ret == -ENODEV)
+ ret = 0;
+
+ return ret;
}
static int remove_iommu_group(struct device *dev, void *data)
diff --git a/drivers/leds/leds-gpio.c b/drivers/leds/leds-gpio.c
index 15eb3f86f670..d2d54d62afee 100644
--- a/drivers/leds/leds-gpio.c
+++ b/drivers/leds/leds-gpio.c
@@ -191,7 +191,7 @@ static struct gpio_leds_priv *gpio_leds_create(struct platform_device *pdev)
goto err;
}
- np = of_node(child);
+ np = to_of_node(child);
if (fwnode_property_present(child, "label")) {
fwnode_property_read_string(child, "label", &led.name);
diff --git a/drivers/mfd/asic3.c b/drivers/mfd/asic3.c
index 977bd3a3eed0..120df5c08741 100644
--- a/drivers/mfd/asic3.c
+++ b/drivers/mfd/asic3.c
@@ -417,9 +417,8 @@ static int __init asic3_irq_probe(struct platform_device *pdev)
asic3_write_register(asic, ASIC3_OFFSET(INTR, INT_MASK),
ASIC3_INTMASK_GINTMASK);
- irq_set_chained_handler(asic->irq_nr, asic3_irq_demux);
+ irq_set_chained_handler_and_data(asic->irq_nr, asic3_irq_demux, asic);
irq_set_irq_type(asic->irq_nr, IRQ_TYPE_EDGE_RISING);
- irq_set_handler_data(asic->irq_nr, asic);
return 0;
}
diff --git a/drivers/misc/lis3lv02d/lis3lv02d.c b/drivers/misc/lis3lv02d/lis3lv02d.c
index 4739689d23ad..fb8705fc3aca 100644
--- a/drivers/misc/lis3lv02d/lis3lv02d.c
+++ b/drivers/misc/lis3lv02d/lis3lv02d.c
@@ -115,7 +115,7 @@ static int param_set_axis(const char *val, const struct kernel_param *kp)
return ret;
}
-static struct kernel_param_ops param_ops_axis = {
+static const struct kernel_param_ops param_ops_axis = {
.set = param_set_axis,
.get = param_get_int,
};
diff --git a/drivers/mtd/ubi/block.c b/drivers/mtd/ubi/block.c
index 1a92d30689e7..ebf46ad2d513 100644
--- a/drivers/mtd/ubi/block.c
+++ b/drivers/mtd/ubi/block.c
@@ -162,7 +162,7 @@ static int __init ubiblock_set_param(const char *val,
return 0;
}
-static struct kernel_param_ops ubiblock_param_ops = {
+static const struct kernel_param_ops ubiblock_param_ops = {
.set = ubiblock_set_param,
};
module_param_cb(block, &ubiblock_param_ops, NULL, 0);
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c
index dd03ad865caf..661cdaa7ea96 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c
@@ -268,7 +268,7 @@ static int xgbe_alloc_pages(struct xgbe_prv_data *pdata,
int ret;
/* Try to obtain pages, decreasing order if necessary */
- gfp |= __GFP_COLD | __GFP_COMP;
+ gfp |= __GFP_COLD | __GFP_COMP | __GFP_NOWARN;
while (order >= 0) {
pages = alloc_pages(gfp, order);
if (pages)
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
index 95153b234c71..299eb4315fe6 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
@@ -948,7 +948,7 @@ static int xgene_enet_get_resources(struct xgene_enet_pdata *pdata)
struct resource *res;
void __iomem *base_addr;
u32 offset;
- int ret;
+ int ret = 0;
pdev = pdata->pdev;
dev = &pdev->dev;
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
index 7a4aaa3c01b6..cd4ae76bbff2 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
@@ -530,7 +530,6 @@ enum bnx2x_tpa_mode_t {
struct bnx2x_alloc_pool {
struct page *page;
- dma_addr_t dma;
unsigned int offset;
};
@@ -2418,10 +2417,13 @@ void bnx2x_igu_clear_sb_gen(struct bnx2x *bp, u8 func, u8 idu_sb_id,
AEU_INPUTS_ATTN_BITS_IGU_PARITY_ERROR | \
AEU_INPUTS_ATTN_BITS_MISC_PARITY_ERROR)
-#define HW_PRTY_ASSERT_SET_3 (AEU_INPUTS_ATTN_BITS_MCP_LATCHED_ROM_PARITY | \
- AEU_INPUTS_ATTN_BITS_MCP_LATCHED_UMP_RX_PARITY | \
- AEU_INPUTS_ATTN_BITS_MCP_LATCHED_UMP_TX_PARITY | \
- AEU_INPUTS_ATTN_BITS_MCP_LATCHED_SCPAD_PARITY)
+#define HW_PRTY_ASSERT_SET_3_WITHOUT_SCPAD \
+ (AEU_INPUTS_ATTN_BITS_MCP_LATCHED_ROM_PARITY | \
+ AEU_INPUTS_ATTN_BITS_MCP_LATCHED_UMP_RX_PARITY | \
+ AEU_INPUTS_ATTN_BITS_MCP_LATCHED_UMP_TX_PARITY)
+
+#define HW_PRTY_ASSERT_SET_3 (HW_PRTY_ASSERT_SET_3_WITHOUT_SCPAD | \
+ AEU_INPUTS_ATTN_BITS_MCP_LATCHED_SCPAD_PARITY)
#define HW_PRTY_ASSERT_SET_4 (AEU_INPUTS_ATTN_BITS_PGLUE_PARITY_ERROR | \
AEU_INPUTS_ATTN_BITS_ATC_PARITY_ERROR)
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index e2a65334708d..a90d7364334f 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -563,23 +563,20 @@ static int bnx2x_alloc_rx_sge(struct bnx2x *bp, struct bnx2x_fastpath *fp,
return -ENOMEM;
}
- pool->dma = dma_map_page(&bp->pdev->dev, pool->page, 0,
- PAGE_SIZE, DMA_FROM_DEVICE);
- if (unlikely(dma_mapping_error(&bp->pdev->dev,
- pool->dma))) {
- __free_pages(pool->page, PAGES_PER_SGE_SHIFT);
- pool->page = NULL;
- BNX2X_ERR("Can't map sge\n");
- return -ENOMEM;
- }
pool->offset = 0;
}
+ mapping = dma_map_page(&bp->pdev->dev, pool->page,
+ pool->offset, SGE_PAGE_SIZE, DMA_FROM_DEVICE);
+ if (unlikely(dma_mapping_error(&bp->pdev->dev, mapping))) {
+ BNX2X_ERR("Can't map sge\n");
+ return -ENOMEM;
+ }
+
get_page(pool->page);
sw_buf->page = pool->page;
sw_buf->offset = pool->offset;
- mapping = pool->dma + sw_buf->offset;
dma_unmap_addr_set(sw_buf, mapping, mapping);
sge->addr_hi = cpu_to_le32(U64_HI(mapping));
@@ -648,9 +645,9 @@ static int bnx2x_fill_frag_skb(struct bnx2x *bp, struct bnx2x_fastpath *fp,
return err;
}
- dma_unmap_single(&bp->pdev->dev,
- dma_unmap_addr(&old_rx_pg, mapping),
- SGE_PAGE_SIZE, DMA_FROM_DEVICE);
+ dma_unmap_page(&bp->pdev->dev,
+ dma_unmap_addr(&old_rx_pg, mapping),
+ SGE_PAGE_SIZE, DMA_FROM_DEVICE);
/* Add one frag and update the appropriate fields in the skb */
if (fp->mode == TPA_MODE_LRO)
skb_fill_page_desc(skb, j, old_rx_pg.page,
@@ -3421,8 +3418,13 @@ static int bnx2x_pkt_req_lin(struct bnx2x *bp, struct sk_buff *skb,
u32 wnd_sum = 0;
/* Headers length */
- hlen = (int)(skb_transport_header(skb) - skb->data) +
- tcp_hdrlen(skb);
+ if (xmit_type & XMIT_GSO_ENC)
+ hlen = (int)(skb_inner_transport_header(skb) -
+ skb->data) +
+ inner_tcp_hdrlen(skb);
+ else
+ hlen = (int)(skb_transport_header(skb) -
+ skb->data) + tcp_hdrlen(skb);
/* Amount of data (w/o headers) on linear part of SKB*/
first_bd_sz = skb_headlen(skb) - hlen;
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
index 2b30081ec26d..03b7404d5b9b 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
@@ -807,8 +807,8 @@ static inline void bnx2x_free_rx_sge(struct bnx2x *bp,
/* Since many fragments can share the same page, make sure to
* only unmap and free the page once.
*/
- dma_unmap_single(&bp->pdev->dev, dma_unmap_addr(sw_buf, mapping),
- SGE_PAGE_SIZE, DMA_FROM_DEVICE);
+ dma_unmap_page(&bp->pdev->dev, dma_unmap_addr(sw_buf, mapping),
+ SGE_PAGE_SIZE, DMA_FROM_DEVICE);
put_page(page);
@@ -974,14 +974,6 @@ static inline void bnx2x_free_rx_mem_pool(struct bnx2x *bp,
if (!pool->page)
return;
- /* Page was not fully fragmented. Unmap unused space */
- if (pool->offset < PAGE_SIZE) {
- dma_addr_t dma = pool->dma + pool->offset;
- int size = PAGE_SIZE - pool->offset;
-
- dma_unmap_single(&bp->pdev->dev, dma, size, DMA_FROM_DEVICE);
- }
-
put_page(pool->page);
pool->page = NULL;
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
index 48ed005ba73f..76b9052a961c 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
@@ -257,14 +257,15 @@ static int bnx2x_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
{
struct bnx2x *bp = netdev_priv(dev);
int cfg_idx = bnx2x_get_link_cfg_idx(bp);
+ u32 media_type;
/* Dual Media boards present all available port types */
cmd->supported = bp->port.supported[cfg_idx] |
(bp->port.supported[cfg_idx ^ 1] &
(SUPPORTED_TP | SUPPORTED_FIBRE));
cmd->advertising = bp->port.advertising[cfg_idx];
- if (bp->link_params.phy[bnx2x_get_cur_phy_idx(bp)].media_type ==
- ETH_PHY_SFP_1G_FIBER) {
+ media_type = bp->link_params.phy[bnx2x_get_cur_phy_idx(bp)].media_type;
+ if (media_type == ETH_PHY_SFP_1G_FIBER) {
cmd->supported &= ~(SUPPORTED_10000baseT_Full);
cmd->advertising &= ~(ADVERTISED_10000baseT_Full);
}
@@ -312,12 +313,26 @@ static int bnx2x_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
cmd->lp_advertising |= ADVERTISED_100baseT_Full;
if (status & LINK_STATUS_LINK_PARTNER_1000THD_CAPABLE)
cmd->lp_advertising |= ADVERTISED_1000baseT_Half;
- if (status & LINK_STATUS_LINK_PARTNER_1000TFD_CAPABLE)
- cmd->lp_advertising |= ADVERTISED_1000baseT_Full;
+ if (status & LINK_STATUS_LINK_PARTNER_1000TFD_CAPABLE) {
+ if (media_type == ETH_PHY_KR) {
+ cmd->lp_advertising |=
+ ADVERTISED_1000baseKX_Full;
+ } else {
+ cmd->lp_advertising |=
+ ADVERTISED_1000baseT_Full;
+ }
+ }
if (status & LINK_STATUS_LINK_PARTNER_2500XFD_CAPABLE)
cmd->lp_advertising |= ADVERTISED_2500baseX_Full;
- if (status & LINK_STATUS_LINK_PARTNER_10GXFD_CAPABLE)
- cmd->lp_advertising |= ADVERTISED_10000baseT_Full;
+ if (status & LINK_STATUS_LINK_PARTNER_10GXFD_CAPABLE) {
+ if (media_type == ETH_PHY_KR) {
+ cmd->lp_advertising |=
+ ADVERTISED_10000baseKR_Full;
+ } else {
+ cmd->lp_advertising |=
+ ADVERTISED_10000baseT_Full;
+ }
+ }
if (status & LINK_STATUS_LINK_PARTNER_20GXFD_CAPABLE)
cmd->lp_advertising |= ADVERTISED_20000baseKR2_Full;
}
@@ -564,15 +579,20 @@ static int bnx2x_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
return -EINVAL;
}
- if (!(bp->port.supported[cfg_idx] &
- SUPPORTED_1000baseT_Full)) {
+ if (bp->port.supported[cfg_idx] &
+ SUPPORTED_1000baseT_Full) {
+ advertising = (ADVERTISED_1000baseT_Full |
+ ADVERTISED_TP);
+
+ } else if (bp->port.supported[cfg_idx] &
+ SUPPORTED_1000baseKX_Full) {
+ advertising = ADVERTISED_1000baseKX_Full;
+ } else {
DP(BNX2X_MSG_ETHTOOL,
"1G full not supported\n");
return -EINVAL;
}
- advertising = (ADVERTISED_1000baseT_Full |
- ADVERTISED_TP);
break;
case SPEED_2500:
@@ -600,17 +620,22 @@ static int bnx2x_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
return -EINVAL;
}
phy_idx = bnx2x_get_cur_phy_idx(bp);
- if (!(bp->port.supported[cfg_idx]
- & SUPPORTED_10000baseT_Full) ||
- (bp->link_params.phy[phy_idx].media_type ==
+ if ((bp->port.supported[cfg_idx] &
+ SUPPORTED_10000baseT_Full) &&
+ (bp->link_params.phy[phy_idx].media_type !=
ETH_PHY_SFP_1G_FIBER)) {
+ advertising = (ADVERTISED_10000baseT_Full |
+ ADVERTISED_FIBRE);
+ } else if (bp->port.supported[cfg_idx] &
+ SUPPORTED_10000baseKR_Full) {
+ advertising = (ADVERTISED_10000baseKR_Full |
+ ADVERTISED_FIBRE);
+ } else {
DP(BNX2X_MSG_ETHTOOL,
"10G full not supported\n");
return -EINVAL;
}
- advertising = (ADVERTISED_10000baseT_Full |
- ADVERTISED_FIBRE);
break;
default:
@@ -633,6 +658,7 @@ static int bnx2x_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
bp->link_params.multi_phy_config = new_multi_phy_config;
if (netif_running(dev)) {
bnx2x_stats_handle(bp, STATS_EVENT_STOP);
+ bnx2x_force_link_reset(bp);
bnx2x_link_set(bp);
}
@@ -1204,6 +1230,7 @@ static int bnx2x_acquire_nvram_lock(struct bnx2x *bp)
if (!(val & (MCPR_NVM_SW_ARB_ARB_ARB1 << port))) {
DP(BNX2X_MSG_ETHTOOL | BNX2X_MSG_NVM,
"cannot get access to nvram interface\n");
+ bnx2x_release_hw_lock(bp, HW_LOCK_RESOURCE_NVRAM);
return -EBUSY;
}
@@ -1944,6 +1971,7 @@ static int bnx2x_set_pauseparam(struct net_device *dev,
if (netif_running(dev)) {
bnx2x_stats_handle(bp, STATS_EVENT_STOP);
+ bnx2x_force_link_reset(bp);
bnx2x_link_set(bp);
}
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
index 21a0d6afca4a..a0b03c27e0a3 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
@@ -3392,9 +3392,9 @@ static void bnx2x_calc_ieee_aneg_adv(struct bnx2x_phy *phy,
case BNX2X_FLOW_CTRL_AUTO:
switch (params->req_fc_auto_adv) {
case BNX2X_FLOW_CTRL_BOTH:
+ case BNX2X_FLOW_CTRL_RX:
*ieee_fc |= MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_BOTH;
break;
- case BNX2X_FLOW_CTRL_RX:
case BNX2X_FLOW_CTRL_TX:
*ieee_fc |=
MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_ASYMMETRIC;
@@ -3488,14 +3488,21 @@ static void bnx2x_ext_phy_set_pause(struct link_params *params,
bnx2x_cl45_write(bp, phy, MDIO_AN_DEVAD, MDIO_AN_REG_ADV_PAUSE, val);
}
-static void bnx2x_pause_resolve(struct link_vars *vars, u32 pause_result)
-{ /* LD LP */
+static void bnx2x_pause_resolve(struct bnx2x_phy *phy,
+ struct link_params *params,
+ struct link_vars *vars,
+ u32 pause_result)
+{
+ struct bnx2x *bp = params->bp;
+ /* LD LP */
switch (pause_result) { /* ASYM P ASYM P */
case 0xb: /* 1 0 1 1 */
+ DP(NETIF_MSG_LINK, "Flow Control: TX only\n");
vars->flow_ctrl = BNX2X_FLOW_CTRL_TX;
break;
case 0xe: /* 1 1 1 0 */
+ DP(NETIF_MSG_LINK, "Flow Control: RX only\n");
vars->flow_ctrl = BNX2X_FLOW_CTRL_RX;
break;
@@ -3503,10 +3510,22 @@ static void bnx2x_pause_resolve(struct link_vars *vars, u32 pause_result)
case 0x7: /* 0 1 1 1 */
case 0xd: /* 1 1 0 1 */
case 0xf: /* 1 1 1 1 */
- vars->flow_ctrl = BNX2X_FLOW_CTRL_BOTH;
+ /* If the user selected to advertise RX ONLY,
+ * although we advertised both, need to enable
+ * RX only.
+ */
+ if (params->req_fc_auto_adv == BNX2X_FLOW_CTRL_BOTH) {
+ DP(NETIF_MSG_LINK, "Flow Control: RX & TX\n");
+ vars->flow_ctrl = BNX2X_FLOW_CTRL_BOTH;
+ } else {
+ DP(NETIF_MSG_LINK, "Flow Control: RX only\n");
+ vars->flow_ctrl = BNX2X_FLOW_CTRL_RX;
+ }
break;
default:
+ DP(NETIF_MSG_LINK, "Flow Control: None\n");
+ vars->flow_ctrl = BNX2X_FLOW_CTRL_NONE;
break;
}
if (pause_result & (1<<0))
@@ -3567,7 +3586,7 @@ static void bnx2x_ext_phy_update_adv_fc(struct bnx2x_phy *phy,
pause_result |= (lp_pause &
MDIO_AN_REG_ADV_PAUSE_MASK) >> 10;
DP(NETIF_MSG_LINK, "Ext PHY pause result 0x%x\n", pause_result);
- bnx2x_pause_resolve(vars, pause_result);
+ bnx2x_pause_resolve(phy, params, vars, pause_result);
}
@@ -5396,7 +5415,7 @@ static void bnx2x_update_adv_fc(struct bnx2x_phy *phy,
MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_MASK)>>7;
DP(NETIF_MSG_LINK, "pause_result CL37 0x%x\n", pause_result);
}
- bnx2x_pause_resolve(vars, pause_result);
+ bnx2x_pause_resolve(phy, params, vars, pause_result);
}
@@ -7129,7 +7148,7 @@ static void bnx2x_8073_resolve_fc(struct bnx2x_phy *phy,
pause_result |= (lp_pause &
MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_BOTH) >> 7;
- bnx2x_pause_resolve(vars, pause_result);
+ bnx2x_pause_resolve(phy, params, vars, pause_result);
DP(NETIF_MSG_LINK, "Ext PHY CL37 pause result 0x%x\n",
pause_result);
}
@@ -11474,7 +11493,9 @@ static const struct bnx2x_phy phy_warpcore = {
SUPPORTED_100baseT_Half |
SUPPORTED_100baseT_Full |
SUPPORTED_1000baseT_Full |
+ SUPPORTED_1000baseKX_Full |
SUPPORTED_10000baseT_Full |
+ SUPPORTED_10000baseKR_Full |
SUPPORTED_20000baseKR2_Full |
SUPPORTED_20000baseMLD2_Full |
SUPPORTED_FIBRE |
@@ -11980,8 +12001,8 @@ static int bnx2x_populate_int_phy(struct bnx2x *bp, u32 shmem_base, u8 port,
break;
case PORT_HW_CFG_NET_SERDES_IF_KR:
phy->media_type = ETH_PHY_KR;
- phy->supported &= (SUPPORTED_1000baseT_Full |
- SUPPORTED_10000baseT_Full |
+ phy->supported &= (SUPPORTED_1000baseKX_Full |
+ SUPPORTED_10000baseKR_Full |
SUPPORTED_FIBRE |
SUPPORTED_Autoneg |
SUPPORTED_Pause |
@@ -11999,8 +12020,8 @@ static int bnx2x_populate_int_phy(struct bnx2x *bp, u32 shmem_base, u8 port,
phy->media_type = ETH_PHY_KR;
phy->flags |= FLAGS_WC_DUAL_MODE;
phy->supported &= (SUPPORTED_20000baseKR2_Full |
- SUPPORTED_10000baseT_Full |
- SUPPORTED_1000baseT_Full |
+ SUPPORTED_10000baseKR_Full |
+ SUPPORTED_1000baseKX_Full |
SUPPORTED_Autoneg |
SUPPORTED_FIBRE |
SUPPORTED_Pause |
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 33501bcddc48..c27af12314ed 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -2287,13 +2287,11 @@ static int bnx2x_set_spio(struct bnx2x *bp, int spio, u32 mode)
void bnx2x_calc_fc_adv(struct bnx2x *bp)
{
u8 cfg_idx = bnx2x_get_link_cfg_idx(bp);
+
+ bp->port.advertising[cfg_idx] &= ~(ADVERTISED_Asym_Pause |
+ ADVERTISED_Pause);
switch (bp->link_vars.ieee_fc &
MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_MASK) {
- case MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_NONE:
- bp->port.advertising[cfg_idx] &= ~(ADVERTISED_Asym_Pause |
- ADVERTISED_Pause);
- break;
-
case MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_BOTH:
bp->port.advertising[cfg_idx] |= (ADVERTISED_Asym_Pause |
ADVERTISED_Pause);
@@ -2304,8 +2302,6 @@ void bnx2x_calc_fc_adv(struct bnx2x *bp)
break;
default:
- bp->port.advertising[cfg_idx] &= ~(ADVERTISED_Asym_Pause |
- ADVERTISED_Pause);
break;
}
}
@@ -2351,12 +2347,16 @@ int bnx2x_initial_phy_init(struct bnx2x *bp, int load_mode)
if (load_mode == LOAD_DIAG) {
struct link_params *lp = &bp->link_params;
lp->loopback_mode = LOOPBACK_XGXS;
- /* do PHY loopback at 10G speed, if possible */
- if (lp->req_line_speed[cfx_idx] < SPEED_10000) {
+ /* Prefer doing PHY loopback at highest speed */
+ if (lp->req_line_speed[cfx_idx] < SPEED_20000) {
if (lp->speed_cap_mask[cfx_idx] &
- PORT_HW_CFG_SPEED_CAPABILITY_D0_10G)
+ PORT_HW_CFG_SPEED_CAPABILITY_D0_20G)
lp->req_line_speed[cfx_idx] =
- SPEED_10000;
+ SPEED_20000;
+ else if (lp->speed_cap_mask[cfx_idx] &
+ PORT_HW_CFG_SPEED_CAPABILITY_D0_10G)
+ lp->req_line_speed[cfx_idx] =
+ SPEED_10000;
else
lp->req_line_speed[cfx_idx] =
SPEED_1000;
@@ -4867,9 +4867,7 @@ static bool bnx2x_check_blocks_with_parity3(struct bnx2x *bp, u32 sig,
res = true;
break;
case AEU_INPUTS_ATTN_BITS_MCP_LATCHED_SCPAD_PARITY:
- if (print)
- _print_next_block((*par_num)++,
- "MCP SCPAD");
+ (*par_num)++;
/* clear latched SCPAD PATIRY from MCP */
REG_WR(bp, MISC_REG_AEU_CLR_LATCH_SIGNAL,
1UL << 10);
@@ -4931,6 +4929,7 @@ static bool bnx2x_parity_attn(struct bnx2x *bp, bool *global, bool print,
(sig[3] & HW_PRTY_ASSERT_SET_3) ||
(sig[4] & HW_PRTY_ASSERT_SET_4)) {
int par_num = 0;
+
DP(NETIF_MSG_HW, "Was parity error: HW block parity attention:\n"
"[0]:0x%08x [1]:0x%08x [2]:0x%08x [3]:0x%08x [4]:0x%08x\n",
sig[0] & HW_PRTY_ASSERT_SET_0,
@@ -4938,9 +4937,18 @@ static bool bnx2x_parity_attn(struct bnx2x *bp, bool *global, bool print,
sig[2] & HW_PRTY_ASSERT_SET_2,
sig[3] & HW_PRTY_ASSERT_SET_3,
sig[4] & HW_PRTY_ASSERT_SET_4);
- if (print)
- netdev_err(bp->dev,
- "Parity errors detected in blocks: ");
+ if (print) {
+ if (((sig[0] & HW_PRTY_ASSERT_SET_0) ||
+ (sig[1] & HW_PRTY_ASSERT_SET_1) ||
+ (sig[2] & HW_PRTY_ASSERT_SET_2) ||
+ (sig[4] & HW_PRTY_ASSERT_SET_4)) ||
+ (sig[3] & HW_PRTY_ASSERT_SET_3_WITHOUT_SCPAD)) {
+ netdev_err(bp->dev,
+ "Parity errors detected in blocks: ");
+ } else {
+ print = false;
+ }
+ }
res |= bnx2x_check_blocks_with_parity0(bp,
sig[0] & HW_PRTY_ASSERT_SET_0, &par_num, print);
res |= bnx2x_check_blocks_with_parity1(bp,
@@ -8431,7 +8439,7 @@ int bnx2x_set_eth_mac(struct bnx2x *bp, bool set)
BNX2X_ETH_MAC, &ramrod_flags);
} else { /* vf */
return bnx2x_vfpf_config_mac(bp, bp->dev->dev_addr,
- bp->fp->index, true);
+ bp->fp->index, set);
}
}
@@ -9323,7 +9331,8 @@ unload_error:
* function stop ramrod is sent, since as part of this ramrod FW access
* PTP registers.
*/
- bnx2x_stop_ptp(bp);
+ if (bp->flags & PTP_SUPPORTED)
+ bnx2x_stop_ptp(bp);
/* Disable HW interrupts, NAPI */
bnx2x_netif_stop(bp, 1);
@@ -11147,6 +11156,12 @@ static void bnx2x_link_settings_requested(struct bnx2x *bp)
bp->port.advertising[idx] |=
(ADVERTISED_1000baseT_Full |
ADVERTISED_TP);
+ } else if (bp->port.supported[idx] &
+ SUPPORTED_1000baseKX_Full) {
+ bp->link_params.req_line_speed[idx] =
+ SPEED_1000;
+ bp->port.advertising[idx] |=
+ ADVERTISED_1000baseKX_Full;
} else {
BNX2X_ERR("NVRAM config error. Invalid link_config 0x%x speed_cap_mask 0x%x\n",
link_config,
@@ -11179,6 +11194,13 @@ static void bnx2x_link_settings_requested(struct bnx2x *bp)
bp->port.advertising[idx] |=
(ADVERTISED_10000baseT_Full |
ADVERTISED_FIBRE);
+ } else if (bp->port.supported[idx] &
+ SUPPORTED_10000baseKR_Full) {
+ bp->link_params.req_line_speed[idx] =
+ SPEED_10000;
+ bp->port.advertising[idx] |=
+ (ADVERTISED_10000baseKR_Full |
+ ADVERTISED_FIBRE);
} else {
BNX2X_ERR("NVRAM config error. Invalid link_config 0x%x speed_cap_mask 0x%x\n",
link_config,
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
index 07cdf9bbffef..4ad415ac8cfe 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
@@ -424,7 +424,7 @@ static void __bnx2x_vlan_mac_h_exec_pending(struct bnx2x *bp,
o->head_exe_request = false;
o->saved_ramrod_flags = 0;
rc = bnx2x_exe_queue_step(bp, &o->exe_queue, &ramrod_flags);
- if (rc != 0) {
+ if ((rc != 0) && (rc != 1)) {
BNX2X_ERR("execution of pending commands failed with rc %d\n",
rc);
#ifdef BNX2X_STOP_ON_ERROR
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
index 6f2887a5e0be..6159deab8c98 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
@@ -594,6 +594,7 @@ struct bcmgenet_priv {
wait_queue_head_t wq;
struct phy_device *phydev;
struct device_node *phy_dn;
+ struct device_node *mdio_dn;
struct mii_bus *mii_bus;
u16 gphy_rev;
struct clk *clk_eee;
diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c
index 6bef04e2f735..adf23d2ac488 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmmii.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c
@@ -408,6 +408,52 @@ static int bcmgenet_mii_probe(struct net_device *dev)
return 0;
}
+/* Workaround for integrated BCM7xxx Gigabit PHYs which have a problem with
+ * their internal MDIO management controller making them fail to successfully
+ * be read from or written to for the first transaction. We insert a dummy
+ * BMSR read here to make sure that phy_get_device() and get_phy_id() can
+ * correctly read the PHY MII_PHYSID1/2 registers and successfully register a
+ * PHY device for this peripheral.
+ *
+ * Once the PHY driver is registered, we can workaround subsequent reads from
+ * there (e.g: during system-wide power management).
+ *
+ * bus->reset is invoked before mdiobus_scan during mdiobus_register and is
+ * therefore the right location to stick that workaround. Since we do not want
+ * to read from non-existing PHYs, we either use bus->phy_mask or do a manual
+ * Device Tree scan to limit the search area.
+ */
+static int bcmgenet_mii_bus_reset(struct mii_bus *bus)
+{
+ struct net_device *dev = bus->priv;
+ struct bcmgenet_priv *priv = netdev_priv(dev);
+ struct device_node *np = priv->mdio_dn;
+ struct device_node *child = NULL;
+ u32 read_mask = 0;
+ int addr = 0;
+
+ if (!np) {
+ read_mask = 1 << priv->phy_addr;
+ } else {
+ for_each_available_child_of_node(np, child) {
+ addr = of_mdio_parse_addr(&dev->dev, child);
+ if (addr < 0)
+ continue;
+
+ read_mask |= 1 << addr;
+ }
+ }
+
+ for (addr = 0; addr < PHY_MAX_ADDR; addr++) {
+ if (read_mask & 1 << addr) {
+ dev_dbg(&dev->dev, "Workaround for PHY @ %d\n", addr);
+ mdiobus_read(bus, addr, MII_BMSR);
+ }
+ }
+
+ return 0;
+}
+
static int bcmgenet_mii_alloc(struct bcmgenet_priv *priv)
{
struct mii_bus *bus;
@@ -427,6 +473,7 @@ static int bcmgenet_mii_alloc(struct bcmgenet_priv *priv)
bus->parent = &priv->pdev->dev;
bus->read = bcmgenet_mii_read;
bus->write = bcmgenet_mii_write;
+ bus->reset = bcmgenet_mii_bus_reset;
snprintf(bus->id, MII_BUS_ID_SIZE, "%s-%d",
priv->pdev->name, priv->pdev->id);
@@ -443,7 +490,6 @@ static int bcmgenet_mii_of_init(struct bcmgenet_priv *priv)
{
struct device_node *dn = priv->pdev->dev.of_node;
struct device *kdev = &priv->pdev->dev;
- struct device_node *mdio_dn;
char *compat;
int ret;
@@ -451,14 +497,14 @@ static int bcmgenet_mii_of_init(struct bcmgenet_priv *priv)
if (!compat)
return -ENOMEM;
- mdio_dn = of_find_compatible_node(dn, NULL, compat);
+ priv->mdio_dn = of_find_compatible_node(dn, NULL, compat);
kfree(compat);
- if (!mdio_dn) {
+ if (!priv->mdio_dn) {
dev_err(kdev, "unable to find MDIO bus node\n");
return -ENODEV;
}
- ret = of_mdiobus_register(priv->mii_bus, mdio_dn);
+ ret = of_mdiobus_register(priv->mii_bus, priv->mdio_dn);
if (ret) {
dev_err(kdev, "failed to register MDIO bus\n");
return ret;
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
index 160f8077692c..29f330831784 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
@@ -434,8 +434,9 @@ static int lio_set_phys_id(struct net_device *netdev,
if (ret)
return ret;
- octnet_mdio45_access(lio, 1, LIO68XX_LED_BEACON_ADDR,
- &lio->phy_beacon_val);
+ ret = octnet_mdio45_access(lio, 1,
+ LIO68XX_LED_BEACON_ADDR,
+ &lio->phy_beacon_val);
if (ret)
return ret;
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.c b/drivers/net/ethernet/cavium/liquidio/octeon_device.c
index 0d3106b464b2..f67641a2ff9e 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_device.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.c
@@ -650,14 +650,12 @@ void octeon_free_device_mem(struct octeon_device *oct)
for (i = 0; i < MAX_OCTEON_OUTPUT_QUEUES; i++) {
/* could check mask as well */
- if (oct->droq[i])
- vfree(oct->droq[i]);
+ vfree(oct->droq[i]);
}
for (i = 0; i < MAX_OCTEON_INSTR_QUEUES; i++) {
/* could check mask as well */
- if (oct->instr_queue[i])
- vfree(oct->instr_queue[i]);
+ vfree(oct->instr_queue[i]);
}
i = oct->octeon_id;
@@ -1078,10 +1076,7 @@ octeon_unregister_dispatch_fn(struct octeon_device *oct, u16 opcode,
oct->dispatch.count--;
spin_unlock_bh(&oct->dispatch.lock);
-
- if (dfree)
- vfree(dfree);
-
+ vfree(dfree);
return retval;
}
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
index 94b502a0cf33..4dba86eaa045 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
@@ -216,9 +216,7 @@ int octeon_delete_droq(struct octeon_device *oct, u32 q_no)
dev_dbg(&oct->pci_dev->dev, "%s[%d]\n", __func__, q_no);
octeon_droq_destroy_ring_buffers(oct, droq);
-
- if (droq->recv_buf_list)
- vfree(droq->recv_buf_list);
+ vfree(droq->recv_buf_list);
if (droq->info_base_addr)
cnnic_free_aligned_dma(oct->pci_dev, droq->info_list,
diff --git a/drivers/net/ethernet/cavium/liquidio/request_manager.c b/drivers/net/ethernet/cavium/liquidio/request_manager.c
index 356796bf9b87..a2a24652c8f3 100644
--- a/drivers/net/ethernet/cavium/liquidio/request_manager.c
+++ b/drivers/net/ethernet/cavium/liquidio/request_manager.c
@@ -175,8 +175,7 @@ int octeon_delete_instr_queue(struct octeon_device *oct, u32 iq_no)
desc_size =
CFG_GET_IQ_INSTR_TYPE(CHIP_FIELD(oct, cn6xxx, conf));
- if (iq->request_list)
- vfree(iq->request_list);
+ vfree(iq->request_list);
if (iq->base_addr) {
q_size = iq->max_count * desc_size;
diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c
index eadae1b412c6..da2004e2a741 100644
--- a/drivers/net/ethernet/cisco/enic/enic_main.c
+++ b/drivers/net/ethernet/cisco/enic/enic_main.c
@@ -1208,7 +1208,7 @@ static int enic_poll(struct napi_struct *napi, int budget)
napi_complete(napi);
vnic_intr_unmask(&enic->intr[intr]);
}
- enic_poll_unlock_napi(&enic->rq[cq_rq]);
+ enic_poll_unlock_napi(&enic->rq[cq_rq], napi);
return rq_work_done;
}
@@ -1414,7 +1414,7 @@ static int enic_poll_msix_rq(struct napi_struct *napi, int budget)
*/
enic_calc_int_moderation(enic, &enic->rq[rq]);
- enic_poll_unlock_napi(&enic->rq[rq]);
+ enic_poll_unlock_napi(&enic->rq[rq], napi);
if (work_done < work_to_do) {
/* Some work done, but not enough to stay in polling,
diff --git a/drivers/net/ethernet/cisco/enic/vnic_rq.h b/drivers/net/ethernet/cisco/enic/vnic_rq.h
index 8111d5202df2..b9c82f143d7e 100644
--- a/drivers/net/ethernet/cisco/enic/vnic_rq.h
+++ b/drivers/net/ethernet/cisco/enic/vnic_rq.h
@@ -21,6 +21,7 @@
#define _VNIC_RQ_H_
#include <linux/pci.h>
+#include <linux/netdevice.h>
#include "vnic_dev.h"
#include "vnic_cq.h"
@@ -75,6 +76,12 @@ struct vnic_rq_buf {
uint64_t wr_id;
};
+enum enic_poll_state {
+ ENIC_POLL_STATE_IDLE,
+ ENIC_POLL_STATE_NAPI,
+ ENIC_POLL_STATE_POLL
+};
+
struct vnic_rq {
unsigned int index;
struct vnic_dev *vdev;
@@ -86,19 +93,7 @@ struct vnic_rq {
void *os_buf_head;
unsigned int pkts_outstanding;
#ifdef CONFIG_NET_RX_BUSY_POLL
-#define ENIC_POLL_STATE_IDLE 0
-#define ENIC_POLL_STATE_NAPI (1 << 0) /* NAPI owns this poll */
-#define ENIC_POLL_STATE_POLL (1 << 1) /* poll owns this poll */
-#define ENIC_POLL_STATE_NAPI_YIELD (1 << 2) /* NAPI yielded this poll */
-#define ENIC_POLL_STATE_POLL_YIELD (1 << 3) /* poll yielded this poll */
-#define ENIC_POLL_YIELD (ENIC_POLL_STATE_NAPI_YIELD | \
- ENIC_POLL_STATE_POLL_YIELD)
-#define ENIC_POLL_LOCKED (ENIC_POLL_STATE_NAPI | \
- ENIC_POLL_STATE_POLL)
-#define ENIC_POLL_USER_PEND (ENIC_POLL_STATE_POLL | \
- ENIC_POLL_STATE_POLL_YIELD)
- unsigned int bpoll_state;
- spinlock_t bpoll_lock;
+ atomic_t bpoll_state;
#endif /* CONFIG_NET_RX_BUSY_POLL */
};
@@ -215,76 +210,43 @@ static inline int vnic_rq_fill(struct vnic_rq *rq,
#ifdef CONFIG_NET_RX_BUSY_POLL
static inline void enic_busy_poll_init_lock(struct vnic_rq *rq)
{
- spin_lock_init(&rq->bpoll_lock);
- rq->bpoll_state = ENIC_POLL_STATE_IDLE;
+ atomic_set(&rq->bpoll_state, ENIC_POLL_STATE_IDLE);
}
static inline bool enic_poll_lock_napi(struct vnic_rq *rq)
{
- bool rc = true;
-
- spin_lock(&rq->bpoll_lock);
- if (rq->bpoll_state & ENIC_POLL_LOCKED) {
- WARN_ON(rq->bpoll_state & ENIC_POLL_STATE_NAPI);
- rq->bpoll_state |= ENIC_POLL_STATE_NAPI_YIELD;
- rc = false;
- } else {
- rq->bpoll_state = ENIC_POLL_STATE_NAPI;
- }
- spin_unlock(&rq->bpoll_lock);
+ int rc = atomic_cmpxchg(&rq->bpoll_state, ENIC_POLL_STATE_IDLE,
+ ENIC_POLL_STATE_NAPI);
- return rc;
+ return (rc == ENIC_POLL_STATE_IDLE);
}
-static inline bool enic_poll_unlock_napi(struct vnic_rq *rq)
+static inline void enic_poll_unlock_napi(struct vnic_rq *rq,
+ struct napi_struct *napi)
{
- bool rc = false;
-
- spin_lock(&rq->bpoll_lock);
- WARN_ON(rq->bpoll_state &
- (ENIC_POLL_STATE_POLL | ENIC_POLL_STATE_NAPI_YIELD));
- if (rq->bpoll_state & ENIC_POLL_STATE_POLL_YIELD)
- rc = true;
- rq->bpoll_state = ENIC_POLL_STATE_IDLE;
- spin_unlock(&rq->bpoll_lock);
-
- return rc;
+ WARN_ON(atomic_read(&rq->bpoll_state) != ENIC_POLL_STATE_NAPI);
+ napi_gro_flush(napi, false);
+ atomic_set(&rq->bpoll_state, ENIC_POLL_STATE_IDLE);
}
static inline bool enic_poll_lock_poll(struct vnic_rq *rq)
{
- bool rc = true;
-
- spin_lock_bh(&rq->bpoll_lock);
- if (rq->bpoll_state & ENIC_POLL_LOCKED) {
- rq->bpoll_state |= ENIC_POLL_STATE_POLL_YIELD;
- rc = false;
- } else {
- rq->bpoll_state |= ENIC_POLL_STATE_POLL;
- }
- spin_unlock_bh(&rq->bpoll_lock);
+ int rc = atomic_cmpxchg(&rq->bpoll_state, ENIC_POLL_STATE_IDLE,
+ ENIC_POLL_STATE_POLL);
- return rc;
+ return (rc == ENIC_POLL_STATE_IDLE);
}
-static inline bool enic_poll_unlock_poll(struct vnic_rq *rq)
-{
- bool rc = false;
- spin_lock_bh(&rq->bpoll_lock);
- WARN_ON(rq->bpoll_state & ENIC_POLL_STATE_NAPI);
- if (rq->bpoll_state & ENIC_POLL_STATE_POLL_YIELD)
- rc = true;
- rq->bpoll_state = ENIC_POLL_STATE_IDLE;
- spin_unlock_bh(&rq->bpoll_lock);
-
- return rc;
+static inline void enic_poll_unlock_poll(struct vnic_rq *rq)
+{
+ WARN_ON(atomic_read(&rq->bpoll_state) != ENIC_POLL_STATE_POLL);
+ atomic_set(&rq->bpoll_state, ENIC_POLL_STATE_IDLE);
}
static inline bool enic_poll_busy_polling(struct vnic_rq *rq)
{
- WARN_ON(!(rq->bpoll_state & ENIC_POLL_LOCKED));
- return rq->bpoll_state & ENIC_POLL_USER_PEND;
+ return atomic_read(&rq->bpoll_state) & ENIC_POLL_STATE_POLL;
}
#else
@@ -298,7 +260,8 @@ static inline bool enic_poll_lock_napi(struct vnic_rq *rq)
return true;
}
-static inline bool enic_poll_unlock_napi(struct vnic_rq *rq)
+static inline bool enic_poll_unlock_napi(struct vnic_rq *rq,
+ struct napi_struct *napi)
{
return false;
}
diff --git a/drivers/net/ethernet/freescale/Kconfig b/drivers/net/ethernet/freescale/Kconfig
index b8de87b03046..ff76d4e9dc1b 100644
--- a/drivers/net/ethernet/freescale/Kconfig
+++ b/drivers/net/ethernet/freescale/Kconfig
@@ -83,12 +83,12 @@ config UGETH_TX_ON_DEMAND
config GIANFAR
tristate "Gianfar Ethernet"
- depends on FSL_SOC
select FSL_PQ_MDIO
select PHYLIB
select CRC32
---help---
This driver supports the Gigabit TSEC on the MPC83xx, MPC85xx,
- and MPC86xx family of chips, and the FEC on the 8540.
+ and MPC86xx family of chips, the eTSEC on LS1021A and the FEC
+ on the 8540.
endif # NET_VENDOR_FREESCALE
diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
index a86af8a7485d..1eee73cccdf5 100644
--- a/drivers/net/ethernet/freescale/fec.h
+++ b/drivers/net/ethernet/freescale/fec.h
@@ -428,6 +428,8 @@ struct bufdesc_ex {
#define FEC_QUIRK_BUG_CAPTURE (1 << 10)
/* Controller has only one MDIO bus */
#define FEC_QUIRK_SINGLE_MDIO (1 << 11)
+/* Controller supports RACC register */
+#define FEC_QUIRK_HAS_RACC (1 << 12)
struct fec_enet_priv_tx_q {
int index;
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index e464aeaeed2c..1f89c59b4353 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -85,28 +85,30 @@ static struct platform_device_id fec_devtype[] = {
.driver_data = 0,
}, {
.name = "imx25-fec",
- .driver_data = FEC_QUIRK_USE_GASKET,
+ .driver_data = FEC_QUIRK_USE_GASKET | FEC_QUIRK_HAS_RACC,
}, {
.name = "imx27-fec",
- .driver_data = 0,
+ .driver_data = FEC_QUIRK_HAS_RACC,
}, {
.name = "imx28-fec",
.driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_SWAP_FRAME |
- FEC_QUIRK_SINGLE_MDIO,
+ FEC_QUIRK_SINGLE_MDIO | FEC_QUIRK_HAS_RACC,
}, {
.name = "imx6q-fec",
.driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_HAS_GBIT |
FEC_QUIRK_HAS_BUFDESC_EX | FEC_QUIRK_HAS_CSUM |
- FEC_QUIRK_HAS_VLAN | FEC_QUIRK_ERR006358,
+ FEC_QUIRK_HAS_VLAN | FEC_QUIRK_ERR006358 |
+ FEC_QUIRK_HAS_RACC,
}, {
.name = "mvf600-fec",
- .driver_data = FEC_QUIRK_ENET_MAC,
+ .driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_HAS_RACC,
}, {
.name = "imx6sx-fec",
.driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_HAS_GBIT |
FEC_QUIRK_HAS_BUFDESC_EX | FEC_QUIRK_HAS_CSUM |
FEC_QUIRK_HAS_VLAN | FEC_QUIRK_HAS_AVB |
- FEC_QUIRK_ERR007885 | FEC_QUIRK_BUG_CAPTURE,
+ FEC_QUIRK_ERR007885 | FEC_QUIRK_BUG_CAPTURE |
+ FEC_QUIRK_HAS_RACC,
}, {
/* sentinel */
}
@@ -970,13 +972,15 @@ fec_restart(struct net_device *ndev)
writel(fep->phy_speed, fep->hwp + FEC_MII_SPEED);
#if !defined(CONFIG_M5272)
- /* set RX checksum */
- val = readl(fep->hwp + FEC_RACC);
- if (fep->csum_flags & FLAG_RX_CSUM_ENABLED)
- val |= FEC_RACC_OPTIONS;
- else
- val &= ~FEC_RACC_OPTIONS;
- writel(val, fep->hwp + FEC_RACC);
+ if (fep->quirks & FEC_QUIRK_HAS_RACC) {
+ /* set RX checksum */
+ val = readl(fep->hwp + FEC_RACC);
+ if (fep->csum_flags & FLAG_RX_CSUM_ENABLED)
+ val |= FEC_RACC_OPTIONS;
+ else
+ val &= ~FEC_RACC_OPTIONS;
+ writel(val, fep->hwp + FEC_RACC);
+ }
#endif
/*
diff --git a/drivers/net/ethernet/icplus/ipg.c b/drivers/net/ethernet/icplus/ipg.c
index ff2903652f4b..c3b6af83f070 100644
--- a/drivers/net/ethernet/icplus/ipg.c
+++ b/drivers/net/ethernet/icplus/ipg.c
@@ -1028,7 +1028,7 @@ static struct net_device_stats *ipg_nic_get_stats(struct net_device *dev)
/* detailed rx_errors */
sp->stats.rx_length_errors += ipg_r16(IPG_INRANGELENGTHERRORS) +
- ipg_r16(IPG_FRAMETOOLONGERRRORS);
+ ipg_r16(IPG_FRAMETOOLONGERRORS);
sp->stats.rx_crc_errors += ipg_r16(IPG_FRAMECHECKSEQERRORS);
/* Unutilized IPG statistic registers. */
diff --git a/drivers/net/ethernet/icplus/ipg.h b/drivers/net/ethernet/icplus/ipg.h
index a21e4f5702b5..de606281f97b 100644
--- a/drivers/net/ethernet/icplus/ipg.h
+++ b/drivers/net/ethernet/icplus/ipg.h
@@ -102,7 +102,7 @@ enum ipg_regs {
#define IPG_MCSTFRAMESRCVDOK 0xB8
#define IPG_BCSTFRAMESRCVDOK 0xBE
#define IPG_MACCONTROLFRAMESRCVD 0xC6
-#define IPG_FRAMETOOLONGERRRORS 0xC8
+#define IPG_FRAMETOOLONGERRORS 0xC8
#define IPG_INRANGELENGTHERRORS 0xCA
#define IPG_FRAMECHECKSEQERRORS 0xCC
#define IPG_FRAMESLOSTRXERRORS 0xCE
diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
index b074b9a667b3..91a5a0ae9cd7 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
@@ -237,17 +237,19 @@ static bool e1000_phy_is_accessible_pchlan(struct e1000_hw *hw)
if (ret_val)
return false;
out:
- if ((hw->mac.type == e1000_pch_lpt) ||
- (hw->mac.type == e1000_pch_spt)) {
- /* Unforce SMBus mode in PHY */
- e1e_rphy_locked(hw, CV_SMB_CTRL, &phy_reg);
- phy_reg &= ~CV_SMB_CTRL_FORCE_SMBUS;
- e1e_wphy_locked(hw, CV_SMB_CTRL, phy_reg);
+ if ((hw->mac.type == e1000_pch_lpt) || (hw->mac.type == e1000_pch_spt)) {
+ /* Only unforce SMBus if ME is not active */
+ if (!(er32(FWSM) & E1000_ICH_FWSM_FW_VALID)) {
+ /* Unforce SMBus mode in PHY */
+ e1e_rphy_locked(hw, CV_SMB_CTRL, &phy_reg);
+ phy_reg &= ~CV_SMB_CTRL_FORCE_SMBUS;
+ e1e_wphy_locked(hw, CV_SMB_CTRL, phy_reg);
- /* Unforce SMBus mode in MAC */
- mac_reg = er32(CTRL_EXT);
- mac_reg &= ~E1000_CTRL_EXT_FORCE_SMBUS;
- ew32(CTRL_EXT, mac_reg);
+ /* Unforce SMBus mode in MAC */
+ mac_reg = er32(CTRL_EXT);
+ mac_reg &= ~E1000_CTRL_EXT_FORCE_SMBUS;
+ ew32(CTRL_EXT, mac_reg);
+ }
}
return true;
@@ -1087,6 +1089,7 @@ s32 e1000_enable_ulp_lpt_lp(struct e1000_hw *hw, bool to_sx)
u32 mac_reg;
s32 ret_val = 0;
u16 phy_reg;
+ u16 oem_reg = 0;
if ((hw->mac.type < e1000_pch_lpt) ||
(hw->adapter->pdev->device == E1000_DEV_ID_PCH_LPT_I217_LM) ||
@@ -1128,33 +1131,37 @@ s32 e1000_enable_ulp_lpt_lp(struct e1000_hw *hw, bool to_sx)
if (ret_val)
goto out;
+ /* Force SMBus mode in PHY */
+ ret_val = e1000_read_phy_reg_hv_locked(hw, CV_SMB_CTRL, &phy_reg);
+ if (ret_val)
+ goto release;
+ phy_reg |= CV_SMB_CTRL_FORCE_SMBUS;
+ e1000_write_phy_reg_hv_locked(hw, CV_SMB_CTRL, phy_reg);
+
+ /* Force SMBus mode in MAC */
+ mac_reg = er32(CTRL_EXT);
+ mac_reg |= E1000_CTRL_EXT_FORCE_SMBUS;
+ ew32(CTRL_EXT, mac_reg);
+
/* Si workaround for ULP entry flow on i127/rev6 h/w. Enable
* LPLU and disable Gig speed when entering ULP
*/
if ((hw->phy.type == e1000_phy_i217) && (hw->phy.revision == 6)) {
ret_val = e1000_read_phy_reg_hv_locked(hw, HV_OEM_BITS,
- &phy_reg);
+ &oem_reg);
if (ret_val)
goto release;
+
+ phy_reg = oem_reg;
phy_reg |= HV_OEM_BITS_LPLU | HV_OEM_BITS_GBE_DIS;
+
ret_val = e1000_write_phy_reg_hv_locked(hw, HV_OEM_BITS,
phy_reg);
+
if (ret_val)
goto release;
}
- /* Force SMBus mode in PHY */
- ret_val = e1000_read_phy_reg_hv_locked(hw, CV_SMB_CTRL, &phy_reg);
- if (ret_val)
- goto release;
- phy_reg |= CV_SMB_CTRL_FORCE_SMBUS;
- e1000_write_phy_reg_hv_locked(hw, CV_SMB_CTRL, phy_reg);
-
- /* Force SMBus mode in MAC */
- mac_reg = er32(CTRL_EXT);
- mac_reg |= E1000_CTRL_EXT_FORCE_SMBUS;
- ew32(CTRL_EXT, mac_reg);
-
/* Set Inband ULP Exit, Reset to SMBus mode and
* Disable SMBus Release on PERST# in PHY
*/
@@ -1166,10 +1173,15 @@ s32 e1000_enable_ulp_lpt_lp(struct e1000_hw *hw, bool to_sx)
if (to_sx) {
if (er32(WUFC) & E1000_WUFC_LNKC)
phy_reg |= I218_ULP_CONFIG1_WOL_HOST;
+ else
+ phy_reg &= ~I218_ULP_CONFIG1_WOL_HOST;
phy_reg |= I218_ULP_CONFIG1_STICKY_ULP;
+ phy_reg &= ~I218_ULP_CONFIG1_INBAND_EXIT;
} else {
phy_reg |= I218_ULP_CONFIG1_INBAND_EXIT;
+ phy_reg &= ~I218_ULP_CONFIG1_STICKY_ULP;
+ phy_reg &= ~I218_ULP_CONFIG1_WOL_HOST;
}
e1000_write_phy_reg_hv_locked(hw, I218_ULP_CONFIG1, phy_reg);
@@ -1181,6 +1193,15 @@ s32 e1000_enable_ulp_lpt_lp(struct e1000_hw *hw, bool to_sx)
/* Commit ULP changes in PHY by starting auto ULP configuration */
phy_reg |= I218_ULP_CONFIG1_START;
e1000_write_phy_reg_hv_locked(hw, I218_ULP_CONFIG1, phy_reg);
+
+ if ((hw->phy.type == e1000_phy_i217) && (hw->phy.revision == 6) &&
+ to_sx && (er32(STATUS) & E1000_STATUS_LU)) {
+ ret_val = e1000_write_phy_reg_hv_locked(hw, HV_OEM_BITS,
+ oem_reg);
+ if (ret_val)
+ goto release;
+ }
+
release:
hw->phy.ops.release(hw);
out:
@@ -1379,16 +1400,20 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
if (((hw->mac.type == e1000_pch2lan) ||
(hw->mac.type == e1000_pch_lpt) ||
(hw->mac.type == e1000_pch_spt)) && link) {
- u32 reg;
+ u16 speed, duplex;
- reg = er32(STATUS);
+ e1000e_get_speed_and_duplex_copper(hw, &speed, &duplex);
tipg_reg = er32(TIPG);
tipg_reg &= ~E1000_TIPG_IPGT_MASK;
- if (!(reg & (E1000_STATUS_FD | E1000_STATUS_SPEED_MASK))) {
+ if (duplex == HALF_DUPLEX && speed == SPEED_10) {
tipg_reg |= 0xFF;
/* Reduce Rx latency in analog PHY */
emi_val = 0;
+ } else if (hw->mac.type == e1000_pch_spt &&
+ duplex == FULL_DUPLEX && speed != SPEED_1000) {
+ tipg_reg |= 0xC;
+ emi_val = 1;
} else {
/* Roll back the default values */
@@ -1412,14 +1437,59 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
if (ret_val)
return ret_val;
+
+ if (hw->mac.type == e1000_pch_spt) {
+ u16 data;
+ u16 ptr_gap;
+
+ if (speed == SPEED_1000) {
+ ret_val = hw->phy.ops.acquire(hw);
+ if (ret_val)
+ return ret_val;
+
+ ret_val = e1e_rphy_locked(hw,
+ PHY_REG(776, 20),
+ &data);
+ if (ret_val) {
+ hw->phy.ops.release(hw);
+ return ret_val;
+ }
+
+ ptr_gap = (data & (0x3FF << 2)) >> 2;
+ if (ptr_gap < 0x18) {
+ data &= ~(0x3FF << 2);
+ data |= (0x18 << 2);
+ ret_val =
+ e1e_wphy_locked(hw,
+ PHY_REG(776, 20),
+ data);
+ }
+ hw->phy.ops.release(hw);
+ if (ret_val)
+ return ret_val;
+ }
+ }
+ }
+
+ /* I217 Packet Loss issue:
+ * ensure that FEXTNVM4 Beacon Duration is set correctly
+ * on power up.
+ * Set the Beacon Duration for I217 to 8 usec
+ */
+ if ((hw->mac.type == e1000_pch_lpt) || (hw->mac.type == e1000_pch_spt)) {
+ u32 mac_reg;
+
+ mac_reg = er32(FEXTNVM4);
+ mac_reg &= ~E1000_FEXTNVM4_BEACON_DURATION_MASK;
+ mac_reg |= E1000_FEXTNVM4_BEACON_DURATION_8USEC;
+ ew32(FEXTNVM4, mac_reg);
}
/* Work-around I218 hang issue */
if ((hw->adapter->pdev->device == E1000_DEV_ID_PCH_LPTLP_I218_LM) ||
(hw->adapter->pdev->device == E1000_DEV_ID_PCH_LPTLP_I218_V) ||
(hw->adapter->pdev->device == E1000_DEV_ID_PCH_I218_LM3) ||
- (hw->adapter->pdev->device == E1000_DEV_ID_PCH_I218_V3) ||
- (hw->mac.type == e1000_pch_spt)) {
+ (hw->adapter->pdev->device == E1000_DEV_ID_PCH_I218_V3)) {
ret_val = e1000_k1_workaround_lpt_lp(hw, link);
if (ret_val)
return ret_val;
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index e62b9dcb91fe..89d788d8f263 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -6354,13 +6354,14 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool runtime)
}
/**
- * e1000e_disable_aspm - Disable ASPM states
+ * __e1000e_disable_aspm - Disable ASPM states
* @pdev: pointer to PCI device struct
* @state: bit-mask of ASPM states to disable
+ * @locked: indication if this context holds pci_bus_sem locked.
*
* Some devices *must* have certain ASPM states disabled per hardware errata.
**/
-static void e1000e_disable_aspm(struct pci_dev *pdev, u16 state)
+static void __e1000e_disable_aspm(struct pci_dev *pdev, u16 state, int locked)
{
struct pci_dev *parent = pdev->bus->self;
u16 aspm_dis_mask = 0;
@@ -6399,7 +6400,10 @@ static void e1000e_disable_aspm(struct pci_dev *pdev, u16 state)
"L1" : "");
#ifdef CONFIG_PCIEASPM
- pci_disable_link_state_locked(pdev, state);
+ if (locked)
+ pci_disable_link_state_locked(pdev, state);
+ else
+ pci_disable_link_state(pdev, state);
/* Double-check ASPM control. If not disabled by the above, the
* BIOS is preventing that from happening (or CONFIG_PCIEASPM is
@@ -6422,6 +6426,32 @@ static void e1000e_disable_aspm(struct pci_dev *pdev, u16 state)
aspm_dis_mask);
}
+/**
+ * e1000e_disable_aspm - Disable ASPM states.
+ * @pdev: pointer to PCI device struct
+ * @state: bit-mask of ASPM states to disable
+ *
+ * This function acquires the pci_bus_sem!
+ * Some devices *must* have certain ASPM states disabled per hardware errata.
+ **/
+static void e1000e_disable_aspm(struct pci_dev *pdev, u16 state)
+{
+ __e1000e_disable_aspm(pdev, state, 0);
+}
+
+/**
+ * e1000e_disable_aspm_locked Disable ASPM states.
+ * @pdev: pointer to PCI device struct
+ * @state: bit-mask of ASPM states to disable
+ *
+ * This function must be called with pci_bus_sem acquired!
+ * Some devices *must* have certain ASPM states disabled per hardware errata.
+ **/
+static void e1000e_disable_aspm_locked(struct pci_dev *pdev, u16 state)
+{
+ __e1000e_disable_aspm(pdev, state, 1);
+}
+
#ifdef CONFIG_PM
static int __e1000_resume(struct pci_dev *pdev)
{
@@ -6435,7 +6465,7 @@ static int __e1000_resume(struct pci_dev *pdev)
if (adapter->flags2 & FLAG2_DISABLE_ASPM_L1)
aspm_disable_flag |= PCIE_LINK_STATE_L1;
if (aspm_disable_flag)
- e1000e_disable_aspm(pdev, aspm_disable_flag);
+ e1000e_disable_aspm_locked(pdev, aspm_disable_flag);
pci_set_master(pdev);
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
index f54996f19629..395f32f226c0 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
@@ -484,6 +484,8 @@ int i40evf_setup_tx_descriptors(struct i40e_ring *tx_ring)
if (!dev)
return -ENOMEM;
+ /* warn if we are about to overwrite the pointer */
+ WARN_ON(tx_ring->tx_bi);
bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count;
tx_ring->tx_bi = kzalloc(bi_size, GFP_KERNEL);
if (!tx_ring->tx_bi)
@@ -644,6 +646,8 @@ int i40evf_setup_rx_descriptors(struct i40e_ring *rx_ring)
struct device *dev = rx_ring->dev;
int bi_size;
+ /* warn if we are about to overwrite the pointer */
+ WARN_ON(rx_ring->rx_bi);
bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
rx_ring->rx_bi = kzalloc(bi_size, GFP_KERNEL);
if (!rx_ring->rx_bi)
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h
index 1b98c25b3092..fea3b75a9a35 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf.h
+++ b/drivers/net/ethernet/intel/i40evf/i40evf.h
@@ -264,7 +264,6 @@ extern const char i40evf_driver_version[];
int i40evf_up(struct i40evf_adapter *adapter);
void i40evf_down(struct i40evf_adapter *adapter);
-void i40evf_reinit_locked(struct i40evf_adapter *adapter);
void i40evf_reset(struct i40evf_adapter *adapter);
void i40evf_set_ethtool_ops(struct net_device *netdev);
void i40evf_update_stats(struct i40evf_adapter *adapter);
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
index f4e77665bc54..2b53c870e7f1 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
@@ -267,8 +267,10 @@ static int i40evf_set_ringparam(struct net_device *netdev,
adapter->tx_desc_count = new_tx_count;
adapter->rx_desc_count = new_rx_count;
- if (netif_running(netdev))
- i40evf_reinit_locked(adapter);
+ if (netif_running(netdev)) {
+ adapter->flags |= I40EVF_FLAG_RESET_NEEDED;
+ schedule_work(&adapter->reset_task);
+ }
return 0;
}
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
index 7c53aca4b5a6..4ab4ebba07a1 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
@@ -170,7 +170,8 @@ static void i40evf_tx_timeout(struct net_device *netdev)
struct i40evf_adapter *adapter = netdev_priv(netdev);
adapter->tx_timeout_count++;
- if (!(adapter->flags & I40EVF_FLAG_RESET_PENDING)) {
+ if (!(adapter->flags & (I40EVF_FLAG_RESET_PENDING |
+ I40EVF_FLAG_RESET_NEEDED))) {
adapter->flags |= I40EVF_FLAG_RESET_NEEDED;
schedule_work(&adapter->reset_task);
}
@@ -1460,7 +1461,7 @@ static void i40evf_configure_rss(struct i40evf_adapter *adapter)
for (i = 0; i <= I40E_VFQF_HLUT_MAX_INDEX; i++) {
lut = 0;
for (j = 0; j < 4; j++) {
- if (cqueue == adapter->vsi_res->num_queue_pairs)
+ if (cqueue == adapter->num_active_queues)
cqueue = 0;
lut |= ((cqueue) << (8 * j));
cqueue++;
@@ -1470,8 +1471,8 @@ static void i40evf_configure_rss(struct i40evf_adapter *adapter)
i40e_flush(hw);
}
-#define I40EVF_RESET_WAIT_MS 100
-#define I40EVF_RESET_WAIT_COUNT 200
+#define I40EVF_RESET_WAIT_MS 10
+#define I40EVF_RESET_WAIT_COUNT 500
/**
* i40evf_reset_task - Call-back task to handle hardware reset
* @work: pointer to work_struct
@@ -1495,10 +1496,17 @@ static void i40evf_reset_task(struct work_struct *work)
&adapter->crit_section))
usleep_range(500, 1000);
+ i40evf_misc_irq_disable(adapter);
if (adapter->flags & I40EVF_FLAG_RESET_NEEDED) {
- dev_info(&adapter->pdev->dev, "Requesting reset from PF\n");
+ adapter->flags &= ~I40EVF_FLAG_RESET_NEEDED;
+ /* Restart the AQ here. If we have been reset but didn't
+ * detect it, or if the PF had to reinit, our AQ will be hosed.
+ */
+ i40evf_shutdown_adminq(hw);
+ i40evf_init_adminq(hw);
i40evf_request_reset(adapter);
}
+ adapter->flags |= I40EVF_FLAG_RESET_PENDING;
/* poll until we see the reset actually happen */
for (i = 0; i < I40EVF_RESET_WAIT_COUNT; i++) {
@@ -1507,10 +1515,10 @@ static void i40evf_reset_task(struct work_struct *work)
if ((rstat_val != I40E_VFR_VFACTIVE) &&
(rstat_val != I40E_VFR_COMPLETED))
break;
- msleep(I40EVF_RESET_WAIT_MS);
+ usleep_range(500, 1000);
}
if (i == I40EVF_RESET_WAIT_COUNT) {
- adapter->flags &= ~I40EVF_FLAG_RESET_PENDING;
+ dev_info(&adapter->pdev->dev, "Never saw reset\n");
goto continue_reset; /* act like the reset happened */
}
@@ -1518,11 +1526,12 @@ static void i40evf_reset_task(struct work_struct *work)
for (i = 0; i < I40EVF_RESET_WAIT_COUNT; i++) {
rstat_val = rd32(hw, I40E_VFGEN_RSTAT) &
I40E_VFGEN_RSTAT_VFR_STATE_MASK;
- if ((rstat_val == I40E_VFR_VFACTIVE) ||
- (rstat_val == I40E_VFR_COMPLETED))
+ if (rstat_val == I40E_VFR_VFACTIVE)
break;
msleep(I40EVF_RESET_WAIT_MS);
}
+ /* extra wait to make sure minimum wait is met */
+ msleep(I40EVF_RESET_WAIT_MS);
if (i == I40EVF_RESET_WAIT_COUNT) {
struct i40evf_mac_filter *f, *ftmp;
struct i40evf_vlan_filter *fv, *fvtmp;
@@ -1534,11 +1543,10 @@ static void i40evf_reset_task(struct work_struct *work)
if (netif_running(adapter->netdev)) {
set_bit(__I40E_DOWN, &adapter->vsi.state);
- i40evf_irq_disable(adapter);
- i40evf_napi_disable_all(adapter);
- netif_tx_disable(netdev);
- netif_tx_stop_all_queues(netdev);
netif_carrier_off(netdev);
+ netif_tx_disable(netdev);
+ i40evf_napi_disable_all(adapter);
+ i40evf_irq_disable(adapter);
i40evf_free_traffic_irqs(adapter);
i40evf_free_all_tx_resources(adapter);
i40evf_free_all_rx_resources(adapter);
@@ -1550,6 +1558,7 @@ static void i40evf_reset_task(struct work_struct *work)
list_del(&f->list);
kfree(f);
}
+
list_for_each_entry_safe(fv, fvtmp, &adapter->vlan_filter_list,
list) {
list_del(&fv->list);
@@ -1564,22 +1573,27 @@ static void i40evf_reset_task(struct work_struct *work)
i40evf_shutdown_adminq(hw);
adapter->netdev->flags &= ~IFF_UP;
clear_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section);
+ adapter->flags &= ~I40EVF_FLAG_RESET_PENDING;
+ dev_info(&adapter->pdev->dev, "Reset task did not complete, VF disabled\n");
return; /* Do not attempt to reinit. It's dead, Jim. */
}
continue_reset:
- adapter->flags &= ~I40EVF_FLAG_RESET_PENDING;
-
- i40evf_irq_disable(adapter);
-
if (netif_running(adapter->netdev)) {
- i40evf_napi_disable_all(adapter);
- netif_tx_disable(netdev);
- netif_tx_stop_all_queues(netdev);
netif_carrier_off(netdev);
+ netif_tx_stop_all_queues(netdev);
+ i40evf_napi_disable_all(adapter);
}
+ i40evf_irq_disable(adapter);
adapter->state = __I40EVF_RESETTING;
+ adapter->flags &= ~I40EVF_FLAG_RESET_PENDING;
+
+ /* free the Tx/Rx rings and descriptors, might be better to just
+ * re-use them sometime in the future
+ */
+ i40evf_free_all_rx_resources(adapter);
+ i40evf_free_all_tx_resources(adapter);
/* kill and reinit the admin queue */
if (i40evf_shutdown_adminq(hw))
@@ -1603,6 +1617,7 @@ continue_reset:
adapter->aq_required = I40EVF_FLAG_AQ_ADD_MAC_FILTER;
adapter->aq_required |= I40EVF_FLAG_AQ_ADD_VLAN_FILTER;
clear_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section);
+ i40evf_misc_irq_enable(adapter);
mod_timer(&adapter->watchdog_timer, jiffies + 2);
@@ -1624,7 +1639,10 @@ continue_reset:
goto reset_err;
i40evf_irq_enable(adapter, true);
+ } else {
+ adapter->state = __I40EVF_DOWN;
}
+
return;
reset_err:
dev_err(&adapter->pdev->dev, "failed to allocate resources during reinit\n");
@@ -1667,6 +1685,11 @@ static void i40evf_adminq_task(struct work_struct *work)
memset(event.msg_buf, 0, I40EVF_MAX_AQ_BUF_SIZE);
} while (pending);
+ if ((adapter->flags &
+ (I40EVF_FLAG_RESET_PENDING | I40EVF_FLAG_RESET_NEEDED)) ||
+ adapter->state == __I40EVF_RESETTING)
+ goto freedom;
+
/* check for error indications */
val = rd32(hw, hw->aq.arq.len);
oldval = val;
@@ -1702,6 +1725,7 @@ static void i40evf_adminq_task(struct work_struct *work)
if (oldval != val)
wr32(hw, hw->aq.asq.len, val);
+freedom:
kfree(event.msg_buf);
out:
/* re-enable Admin queue interrupt cause */
@@ -1897,47 +1921,6 @@ static struct net_device_stats *i40evf_get_stats(struct net_device *netdev)
}
/**
- * i40evf_reinit_locked - Software reinit
- * @adapter: board private structure
- *
- * Reinititalizes the ring structures in response to a software configuration
- * change. Roughly the same as close followed by open, but skips releasing
- * and reallocating the interrupts.
- **/
-void i40evf_reinit_locked(struct i40evf_adapter *adapter)
-{
- struct net_device *netdev = adapter->netdev;
- int err;
-
- WARN_ON(in_interrupt());
-
- i40evf_down(adapter);
-
- /* allocate transmit descriptors */
- err = i40evf_setup_all_tx_resources(adapter);
- if (err)
- goto err_reinit;
-
- /* allocate receive descriptors */
- err = i40evf_setup_all_rx_resources(adapter);
- if (err)
- goto err_reinit;
-
- i40evf_configure(adapter);
-
- err = i40evf_up_complete(adapter);
- if (err)
- goto err_reinit;
-
- i40evf_irq_enable(adapter, true);
- return;
-
-err_reinit:
- dev_err(&adapter->pdev->dev, "failed to allocate resources during reinit\n");
- i40evf_close(netdev);
-}
-
-/**
* i40evf_change_mtu - Change the Maximum Transfer Unit
* @netdev: network interface device structure
* @new_mtu: new value for maximum frame size
@@ -1952,9 +1935,10 @@ static int i40evf_change_mtu(struct net_device *netdev, int new_mtu)
if ((new_mtu < 68) || (max_frame > I40E_MAX_RXBUFFER))
return -EINVAL;
- /* must set new MTU before calling down or up */
netdev->mtu = new_mtu;
- i40evf_reinit_locked(adapter);
+ adapter->flags |= I40EVF_FLAG_RESET_NEEDED;
+ schedule_work(&adapter->reset_task);
+
return 0;
}
diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.c b/drivers/net/ethernet/intel/igb/e1000_82575.c
index 0f69ef81751a..b0182dd31346 100644
--- a/drivers/net/ethernet/intel/igb/e1000_82575.c
+++ b/drivers/net/ethernet/intel/igb/e1000_82575.c
@@ -1,5 +1,5 @@
/* Intel(R) Gigabit Ethernet Linux driver
- * Copyright(c) 2007-2014 Intel Corporation.
+ * Copyright(c) 2007-2015 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -1900,8 +1900,8 @@ static void igb_clear_hw_cntrs_82575(struct e1000_hw *hw)
* igb_rx_fifo_flush_82575 - Clean rx fifo after RX enable
* @hw: pointer to the HW structure
*
- * After rx enable if managability is enabled then there is likely some
- * bad data at the start of the fifo and possibly in the DMA fifo. This
+ * After rx enable if manageability is enabled then there is likely some
+ * bad data at the start of the fifo and possibly in the DMA fifo. This
* function clears the fifos and flushes any packets that came in as rx was
* being enabled.
**/
@@ -1910,6 +1910,11 @@ void igb_rx_fifo_flush_82575(struct e1000_hw *hw)
u32 rctl, rlpml, rxdctl[4], rfctl, temp_rctl, rx_enabled;
int i, ms_wait;
+ /* disable IPv6 options as per hardware errata */
+ rfctl = rd32(E1000_RFCTL);
+ rfctl |= E1000_RFCTL_IPV6_EX_DIS;
+ wr32(E1000_RFCTL, rfctl);
+
if (hw->mac.type != e1000_82575 ||
!(rd32(E1000_MANC) & E1000_MANC_RCV_TCO_EN))
return;
@@ -1937,7 +1942,6 @@ void igb_rx_fifo_flush_82575(struct e1000_hw *hw)
* incoming packets are rejected. Set enable and wait 2ms so that
* any packet that was coming in as RCTL.EN was set is flushed
*/
- rfctl = rd32(E1000_RFCTL);
wr32(E1000_RFCTL, rfctl & ~E1000_RFCTL_LEF);
rlpml = rd32(E1000_RLPML);
diff --git a/drivers/net/ethernet/intel/igb/e1000_defines.h b/drivers/net/ethernet/intel/igb/e1000_defines.h
index 217f8138851b..f8684aa285be 100644
--- a/drivers/net/ethernet/intel/igb/e1000_defines.h
+++ b/drivers/net/ethernet/intel/igb/e1000_defines.h
@@ -344,7 +344,8 @@
#define E1000_RXCSUM_PCSD 0x00002000 /* packet checksum disabled */
/* Header split receive */
-#define E1000_RFCTL_LEF 0x00040000
+#define E1000_RFCTL_IPV6_EX_DIS 0x00010000
+#define E1000_RFCTL_LEF 0x00040000
/* Collision related configuration parameters */
#define E1000_COLLISION_THRESHOLD 15
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index f287186192bb..2f70a9b152bd 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -58,7 +58,7 @@
#define MAJ 5
#define MIN 2
-#define BUILD 15
+#define BUILD 18
#define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
__stringify(BUILD) "-k"
char igb_driver_name[] = "igb";
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 5bdf78231a4e..370e20ed224c 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -310,6 +310,7 @@ struct mvneta_port {
unsigned int link;
unsigned int duplex;
unsigned int speed;
+ unsigned int tx_csum_limit;
int use_inband_status:1;
};
@@ -2508,8 +2509,10 @@ static int mvneta_change_mtu(struct net_device *dev, int mtu)
dev->mtu = mtu;
- if (!netif_running(dev))
+ if (!netif_running(dev)) {
+ netdev_update_features(dev);
return 0;
+ }
/* The interface is running, so we have to force a
* reallocation of the queues
@@ -2538,9 +2541,26 @@ static int mvneta_change_mtu(struct net_device *dev, int mtu)
mvneta_start_dev(pp);
mvneta_port_up(pp);
+ netdev_update_features(dev);
+
return 0;
}
+static netdev_features_t mvneta_fix_features(struct net_device *dev,
+ netdev_features_t features)
+{
+ struct mvneta_port *pp = netdev_priv(dev);
+
+ if (pp->tx_csum_limit && dev->mtu > pp->tx_csum_limit) {
+ features &= ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
+ netdev_info(dev,
+ "Disable IP checksum for MTU greater than %dB\n",
+ pp->tx_csum_limit);
+ }
+
+ return features;
+}
+
/* Get mac address */
static void mvneta_get_mac_addr(struct mvneta_port *pp, unsigned char *addr)
{
@@ -2862,6 +2882,7 @@ static const struct net_device_ops mvneta_netdev_ops = {
.ndo_set_rx_mode = mvneta_set_rx_mode,
.ndo_set_mac_address = mvneta_set_mac_addr,
.ndo_change_mtu = mvneta_change_mtu,
+ .ndo_fix_features = mvneta_fix_features,
.ndo_get_stats64 = mvneta_get_stats64,
.ndo_do_ioctl = mvneta_ioctl,
};
@@ -3107,6 +3128,9 @@ static int mvneta_probe(struct platform_device *pdev)
}
}
+ if (of_device_is_compatible(dn, "marvell,armada-370-neta"))
+ pp->tx_csum_limit = 1600;
+
pp->tx_ring_size = MVNETA_MAX_TXD;
pp->rx_ring_size = MVNETA_MAX_RXD;
@@ -3185,6 +3209,7 @@ static int mvneta_remove(struct platform_device *pdev)
static const struct of_device_id mvneta_match[] = {
{ .compatible = "marvell,armada-370-neta" },
+ { .compatible = "marvell,armada-xp-neta" },
{ }
};
MODULE_DEVICE_TABLE(of, mvneta_match);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 77179d7ae4cc..e0de2fd1ce12 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -1977,10 +1977,6 @@ void mlx4_en_free_resources(struct mlx4_en_priv *priv)
mlx4_en_destroy_cq(priv, &priv->rx_cq[i]);
}
- if (priv->base_tx_qpn) {
- mlx4_qp_release_range(priv->mdev->dev, priv->base_tx_qpn, priv->tx_ring_num);
- priv->base_tx_qpn = 0;
- }
}
int mlx4_en_alloc_resources(struct mlx4_en_priv *priv)
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index 35f726c17e48..7a4f20bb7fcb 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -718,7 +718,7 @@ static int get_fixed_ipv6_csum(__wsum hw_checksum, struct sk_buff *skb,
}
#endif
static int check_csum(struct mlx4_cqe *cqe, struct sk_buff *skb, void *va,
- int hwtstamp_rx_filter)
+ netdev_features_t dev_features)
{
__wsum hw_checksum = 0;
@@ -726,14 +726,8 @@ static int check_csum(struct mlx4_cqe *cqe, struct sk_buff *skb, void *va,
hw_checksum = csum_unfold((__force __sum16)cqe->checksum);
- if (((struct ethhdr *)va)->h_proto == htons(ETH_P_8021Q) &&
- hwtstamp_rx_filter != HWTSTAMP_FILTER_NONE) {
- /* next protocol non IPv4 or IPv6 */
- if (((struct vlan_hdr *)hdr)->h_vlan_encapsulated_proto
- != htons(ETH_P_IP) &&
- ((struct vlan_hdr *)hdr)->h_vlan_encapsulated_proto
- != htons(ETH_P_IPV6))
- return -1;
+ if (cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_VLAN_PRESENT_MASK) &&
+ !(dev_features & NETIF_F_HW_VLAN_CTAG_RX)) {
hw_checksum = get_fixed_vlan_csum(hw_checksum, hdr);
hdr += sizeof(struct vlan_hdr);
}
@@ -896,7 +890,8 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
if (ip_summed == CHECKSUM_COMPLETE) {
void *va = skb_frag_address(skb_shinfo(gro_skb)->frags);
- if (check_csum(cqe, gro_skb, va, ring->hwtstamp_rx_filter)) {
+ if (check_csum(cqe, gro_skb, va,
+ dev->features)) {
ip_summed = CHECKSUM_NONE;
ring->csum_none++;
ring->csum_complete--;
@@ -951,7 +946,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
}
if (ip_summed == CHECKSUM_COMPLETE) {
- if (check_csum(cqe, skb, skb->data, ring->hwtstamp_rx_filter)) {
+ if (check_csum(cqe, skb, skb->data, dev->features)) {
ip_summed = CHECKSUM_NONE;
ring->csum_complete--;
ring->csum_none++;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 7bed3a88579f..c10d98f6ad96 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -66,6 +66,7 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
ring->size = size;
ring->size_mask = size - 1;
ring->stride = stride;
+ ring->full_size = ring->size - HEADROOM - MAX_DESC_TXBBS;
tmp = size * sizeof(struct mlx4_en_tx_info);
ring->tx_info = kmalloc_node(tmp, GFP_KERNEL | __GFP_NOWARN, node);
@@ -180,6 +181,7 @@ void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv,
mlx4_bf_free(mdev->dev, &ring->bf);
mlx4_qp_remove(mdev->dev, &ring->qp);
mlx4_qp_free(mdev->dev, &ring->qp);
+ mlx4_qp_release_range(priv->mdev->dev, ring->qpn, 1);
mlx4_en_unmap_buffer(&ring->wqres.buf);
mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size);
kfree(ring->bounce_buf);
@@ -231,6 +233,11 @@ void mlx4_en_deactivate_tx_ring(struct mlx4_en_priv *priv,
MLX4_QP_STATE_RST, NULL, 0, 0, &ring->qp);
}
+static inline bool mlx4_en_is_tx_ring_full(struct mlx4_en_tx_ring *ring)
+{
+ return ring->prod - ring->cons > ring->full_size;
+}
+
static void mlx4_en_stamp_wqe(struct mlx4_en_priv *priv,
struct mlx4_en_tx_ring *ring, int index,
u8 owner)
@@ -473,11 +480,10 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev,
netdev_tx_completed_queue(ring->tx_queue, packets, bytes);
- /*
- * Wakeup Tx queue if this stopped, and at least 1 packet
- * was completed
+ /* Wakeup Tx queue if this stopped, and ring is not full.
*/
- if (netif_tx_queue_stopped(ring->tx_queue) && txbbs_skipped > 0) {
+ if (netif_tx_queue_stopped(ring->tx_queue) &&
+ !mlx4_en_is_tx_ring_full(ring)) {
netif_tx_wake_queue(ring->tx_queue);
ring->wake_queue++;
}
@@ -921,8 +927,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
skb_tx_timestamp(skb);
/* Check available TXBBs And 2K spare for prefetch */
- stop_queue = (int)(ring->prod - ring_cons) >
- ring->size - HEADROOM - MAX_DESC_TXBBS;
+ stop_queue = mlx4_en_is_tx_ring_full(ring);
if (unlikely(stop_queue)) {
netif_tx_stop_queue(ring->tx_queue);
ring->queue_stopped++;
@@ -991,8 +996,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
smp_rmb();
ring_cons = ACCESS_ONCE(ring->cons);
- if (unlikely(((int)(ring->prod - ring_cons)) <=
- ring->size - HEADROOM - MAX_DESC_TXBBS)) {
+ if (unlikely(!mlx4_en_is_tx_ring_full(ring))) {
netif_tx_wake_queue(ring->tx_queue);
ring->wake_queue++;
}
diff --git a/drivers/net/ethernet/mellanox/mlx4/intf.c b/drivers/net/ethernet/mellanox/mlx4/intf.c
index 6fce58718837..0d80aed59043 100644
--- a/drivers/net/ethernet/mellanox/mlx4/intf.c
+++ b/drivers/net/ethernet/mellanox/mlx4/intf.c
@@ -93,8 +93,14 @@ int mlx4_register_interface(struct mlx4_interface *intf)
mutex_lock(&intf_mutex);
list_add_tail(&intf->list, &intf_list);
- list_for_each_entry(priv, &dev_list, dev_list)
+ list_for_each_entry(priv, &dev_list, dev_list) {
+ if (mlx4_is_mfunc(&priv->dev) && (intf->flags & MLX4_INTFF_BONDING)) {
+ mlx4_dbg(&priv->dev,
+ "SRIOV, disabling HA mode for intf proto %d\n", intf->protocol);
+ intf->flags &= ~MLX4_INTFF_BONDING;
+ }
mlx4_add_device(intf, priv);
+ }
mutex_unlock(&intf_mutex);
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index d5f9adb6a784..666d1669eb52 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -279,6 +279,7 @@ struct mlx4_en_tx_ring {
u32 size; /* number of TXBBs */
u32 size_mask;
u16 stride;
+ u32 full_size;
u16 cqn; /* index of port CQ associated with this ring */
u32 buf_size;
__be32 doorbell_qpn;
@@ -580,7 +581,6 @@ struct mlx4_en_priv {
int vids[128];
bool wol;
struct device *ddev;
- int base_tx_qpn;
struct hlist_head mac_hash[MLX4_EN_MAC_HASH_SIZE];
struct hwtstamp_config hwtstamp_config;
u32 counter_index;
diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
index 2bae50292dcd..83651ac8ddb9 100644
--- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
+++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
@@ -279,7 +279,7 @@ MODULE_FIRMWARE("myri10ge_eth_z8e.dat");
MODULE_FIRMWARE("myri10ge_rss_ethp_z8e.dat");
MODULE_FIRMWARE("myri10ge_rss_eth_z8e.dat");
-/* Careful: must be accessed under kparam_block_sysfs_write */
+/* Careful: must be accessed under kernel_param_lock() */
static char *myri10ge_fw_name = NULL;
module_param(myri10ge_fw_name, charp, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(myri10ge_fw_name, "Firmware image name");
@@ -3427,7 +3427,7 @@ static void myri10ge_select_firmware(struct myri10ge_priv *mgp)
}
}
- kparam_block_sysfs_write(myri10ge_fw_name);
+ kernel_param_lock(THIS_MODULE);
if (myri10ge_fw_name != NULL) {
char *fw_name = kstrdup(myri10ge_fw_name, GFP_KERNEL);
if (fw_name) {
@@ -3435,7 +3435,7 @@ static void myri10ge_select_firmware(struct myri10ge_priv *mgp)
set_fw_name(mgp, fw_name, true);
}
}
- kparam_unblock_sysfs_write(myri10ge_fw_name);
+ kernel_param_unlock(THIS_MODULE);
if (mgp->board_number < MYRI10GE_MAX_BOARDS &&
myri10ge_fw_names[mgp->board_number] != NULL &&
diff --git a/drivers/net/ethernet/renesas/ravb_ptp.c b/drivers/net/ethernet/renesas/ravb_ptp.c
index 42656da50500..7a8ce920c49e 100644
--- a/drivers/net/ethernet/renesas/ravb_ptp.c
+++ b/drivers/net/ethernet/renesas/ravb_ptp.c
@@ -116,8 +116,10 @@ static int ravb_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
priv->ptp.current_addend = addend;
gccr = ravb_read(ndev, GCCR);
- if (gccr & GCCR_LTI)
+ if (gccr & GCCR_LTI) {
+ spin_unlock_irqrestore(&priv->lock, flags);
return -EBUSY;
+ }
ravb_write(ndev, addend & GTI_TIV, GTI);
ravb_write(ndev, gccr | GCCR_LTI, GCCR);
diff --git a/drivers/net/ethernet/sis/sis900.h b/drivers/net/ethernet/sis/sis900.h
index 1341f33e6084..7d430d322931 100644
--- a/drivers/net/ethernet/sis/sis900.h
+++ b/drivers/net/ethernet/sis/sis900.h
@@ -56,7 +56,7 @@ enum sis900_configuration_register_bits {
EDB_MASTER_EN = 0x00002000
};
-enum sis900_eeprom_access_reigster_bits {
+enum sis900_eeprom_access_register_bits {
MDC = 0x00000040, MDDIR = 0x00000020, MDIO = 0x00000010, /* 7016 specific */
EECS = 0x00000008, EECLK = 0x00000004, EEDO = 0x00000002,
EEDI = 0x00000001
@@ -73,7 +73,7 @@ enum sis900_interrupt_register_bits {
RxERR = 0x00000004, RxDESC = 0x00000002, RxOK = 0x00000001
};
-enum sis900_interrupt_enable_reigster_bits {
+enum sis900_interrupt_enable_register_bits {
IE = 0x00000001
};
diff --git a/drivers/net/ethernet/stmicro/stmmac/mmc_core.c b/drivers/net/ethernet/stmicro/stmmac/mmc_core.c
index 08c483bd2ec7..3f20bb1fe570 100644
--- a/drivers/net/ethernet/stmicro/stmmac/mmc_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/mmc_core.c
@@ -73,7 +73,7 @@
#define MMC_RX_OCTETCOUNT_G 0x00000188
#define MMC_RX_BROADCASTFRAME_G 0x0000018c
#define MMC_RX_MULTICASTFRAME_G 0x00000190
-#define MMC_RX_CRC_ERRROR 0x00000194
+#define MMC_RX_CRC_ERROR 0x00000194
#define MMC_RX_ALIGN_ERROR 0x00000198
#define MMC_RX_RUN_ERROR 0x0000019C
#define MMC_RX_JABBER_ERROR 0x000001A0
@@ -196,7 +196,7 @@ void dwmac_mmc_read(void __iomem *ioaddr, struct stmmac_counters *mmc)
mmc->mmc_rx_octetcount_g += readl(ioaddr + MMC_RX_OCTETCOUNT_G);
mmc->mmc_rx_broadcastframe_g += readl(ioaddr + MMC_RX_BROADCASTFRAME_G);
mmc->mmc_rx_multicastframe_g += readl(ioaddr + MMC_RX_MULTICASTFRAME_G);
- mmc->mmc_rx_crc_error += readl(ioaddr + MMC_RX_CRC_ERRROR);
+ mmc->mmc_rx_crc_error += readl(ioaddr + MMC_RX_CRC_ERROR);
mmc->mmc_rx_align_error += readl(ioaddr + MMC_RX_ALIGN_ERROR);
mmc->mmc_rx_run_error += readl(ioaddr + MMC_RX_RUN_ERROR);
mmc->mmc_rx_jabber_error += readl(ioaddr + MMC_RX_JABBER_ERROR);
diff --git a/drivers/net/ethernet/via/Kconfig b/drivers/net/ethernet/via/Kconfig
index 8b0b1d6aca72..2f1264b882b9 100644
--- a/drivers/net/ethernet/via/Kconfig
+++ b/drivers/net/ethernet/via/Kconfig
@@ -18,6 +18,7 @@ if NET_VENDOR_VIA
config VIA_RHINE
tristate "VIA Rhine support"
depends on (PCI || OF_IRQ)
+ depends on HAS_DMA
select CRC32
select MII
---help---
@@ -42,6 +43,7 @@ config VIA_RHINE_MMIO
config VIA_VELOCITY
tristate "VIA Velocity support"
depends on (PCI || (OF_ADDRESS && OF_IRQ))
+ depends on HAS_DMA
select CRC32
select CRC_CCITT
select MII
diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c
index 4dea85bfc545..6b701b3ded74 100644
--- a/drivers/net/phy/bcm7xxx.c
+++ b/drivers/net/phy/bcm7xxx.c
@@ -246,6 +246,13 @@ static int bcm7xxx_28nm_config_init(struct phy_device *phydev)
pr_info_once("%s: %s PHY revision: 0x%02x, patch: %d\n",
dev_name(&phydev->dev), phydev->drv->name, rev, patch);
+ /* Dummy read to a register to workaround an issue upon reset where the
+ * internal inverter may not allow the first MDIO transaction to pass
+ * the MDIO management controller and make us return 0xffff for such
+ * reads.
+ */
+ phy_read(phydev, MII_BMSR);
+
switch (rev) {
case 0xb0:
ret = bcm7xxx_28nm_b0_afe_config_init(phydev);
diff --git a/drivers/net/phy/mdio-bcm-unimac.c b/drivers/net/phy/mdio-bcm-unimac.c
index fc7abc50b4f1..6a52a7f0fa0d 100644
--- a/drivers/net/phy/mdio-bcm-unimac.c
+++ b/drivers/net/phy/mdio-bcm-unimac.c
@@ -120,6 +120,48 @@ static int unimac_mdio_write(struct mii_bus *bus, int phy_id,
return 0;
}
+/* Workaround for integrated BCM7xxx Gigabit PHYs which have a problem with
+ * their internal MDIO management controller making them fail to successfully
+ * be read from or written to for the first transaction. We insert a dummy
+ * BMSR read here to make sure that phy_get_device() and get_phy_id() can
+ * correctly read the PHY MII_PHYSID1/2 registers and successfully register a
+ * PHY device for this peripheral.
+ *
+ * Once the PHY driver is registered, we can workaround subsequent reads from
+ * there (e.g: during system-wide power management).
+ *
+ * bus->reset is invoked before mdiobus_scan during mdiobus_register and is
+ * therefore the right location to stick that workaround. Since we do not want
+ * to read from non-existing PHYs, we either use bus->phy_mask or do a manual
+ * Device Tree scan to limit the search area.
+ */
+static int unimac_mdio_reset(struct mii_bus *bus)
+{
+ struct device_node *np = bus->dev.of_node;
+ struct device_node *child;
+ u32 read_mask = 0;
+ int addr;
+
+ if (!np) {
+ read_mask = ~bus->phy_mask;
+ } else {
+ for_each_available_child_of_node(np, child) {
+ addr = of_mdio_parse_addr(&bus->dev, child);
+ if (addr < 0)
+ continue;
+
+ read_mask |= 1 << addr;
+ }
+ }
+
+ for (addr = 0; addr < PHY_MAX_ADDR; addr++) {
+ if (read_mask & 1 << addr)
+ mdiobus_read(bus, addr, MII_BMSR);
+ }
+
+ return 0;
+}
+
static int unimac_mdio_probe(struct platform_device *pdev)
{
struct unimac_mdio_priv *priv;
@@ -155,6 +197,7 @@ static int unimac_mdio_probe(struct platform_device *pdev)
bus->parent = &pdev->dev;
bus->read = unimac_mdio_read;
bus->write = unimac_mdio_write;
+ bus->reset = unimac_mdio_reset;
snprintf(bus->id, MII_BUS_ID_SIZE, "%s", pdev->name);
bus->irq = kcalloc(PHY_MAX_ADDR, sizeof(int), GFP_KERNEL);
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index bdfe51fc3a65..0302483de240 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -230,7 +230,7 @@ static int get_phy_c45_ids(struct mii_bus *bus, int addr, u32 *phy_id,
for (i = 1;
i < num_ids && c45_ids->devices_in_package == 0;
i++) {
- reg_addr = MII_ADDR_C45 | i << 16 | MDIO_DEVS2;
+retry: reg_addr = MII_ADDR_C45 | i << 16 | MDIO_DEVS2;
phy_reg = mdiobus_read(bus, addr, reg_addr);
if (phy_reg < 0)
return -EIO;
@@ -242,12 +242,20 @@ static int get_phy_c45_ids(struct mii_bus *bus, int addr, u32 *phy_id,
return -EIO;
c45_ids->devices_in_package |= (phy_reg & 0xffff);
- /* If mostly Fs, there is no device there,
- * let's get out of here.
- */
if ((c45_ids->devices_in_package & 0x1fffffff) == 0x1fffffff) {
- *phy_id = 0xffffffff;
- return 0;
+ if (i) {
+ /* If mostly Fs, there is no device there,
+ * then let's continue to probe more, as some
+ * 10G PHYs have zero Devices In package,
+ * e.g. Cortina CS4315/CS4340 PHY.
+ */
+ i = 0;
+ goto retry;
+ } else {
+ /* no device there, let's get out of here */
+ *phy_id = 0xffffffff;
+ return 0;
+ }
}
}
@@ -796,10 +804,11 @@ static int genphy_config_advert(struct phy_device *phydev)
if (phydev->supported & (SUPPORTED_1000baseT_Half |
SUPPORTED_1000baseT_Full)) {
adv |= ethtool_adv_to_mii_ctrl1000_t(advertise);
- if (adv != oldadv)
- changed = 1;
}
+ if (adv != oldadv)
+ changed = 1;
+
err = phy_write(phydev, MII_CTRL1000, adv);
if (err < 0)
return err;
diff --git a/drivers/net/phy/vitesse.c b/drivers/net/phy/vitesse.c
index 76cad712ddb2..17cad185169d 100644
--- a/drivers/net/phy/vitesse.c
+++ b/drivers/net/phy/vitesse.c
@@ -66,6 +66,7 @@
#define PHY_ID_VSC8244 0x000fc6c0
#define PHY_ID_VSC8514 0x00070670
#define PHY_ID_VSC8574 0x000704a0
+#define PHY_ID_VSC8641 0x00070431
#define PHY_ID_VSC8662 0x00070660
#define PHY_ID_VSC8221 0x000fc550
#define PHY_ID_VSC8211 0x000fc4b0
@@ -272,6 +273,18 @@ static struct phy_driver vsc82xx_driver[] = {
.config_intr = &vsc82xx_config_intr,
.driver = { .owner = THIS_MODULE,},
}, {
+ .phy_id = PHY_ID_VSC8641,
+ .name = "Vitesse VSC8641",
+ .phy_id_mask = 0x000ffff0,
+ .features = PHY_GBIT_FEATURES,
+ .flags = PHY_HAS_INTERRUPT,
+ .config_init = &vsc824x_config_init,
+ .config_aneg = &vsc82x4_config_aneg,
+ .read_status = &genphy_read_status,
+ .ack_interrupt = &vsc824x_ack_interrupt,
+ .config_intr = &vsc82xx_config_intr,
+ .driver = { .owner = THIS_MODULE,},
+}, {
.phy_id = PHY_ID_VSC8662,
.name = "Vitesse VSC8662",
.phy_id_mask = 0x000ffff0,
@@ -318,6 +331,7 @@ static struct mdio_device_id __maybe_unused vitesse_tbl[] = {
{ PHY_ID_VSC8244, 0x000fffc0 },
{ PHY_ID_VSC8514, 0x000ffff0 },
{ PHY_ID_VSC8574, 0x000ffff0 },
+ { PHY_ID_VSC8641, 0x000ffff0 },
{ PHY_ID_VSC8662, 0x000ffff0 },
{ PHY_ID_VSC8221, 0x000ffff0 },
{ PHY_ID_VSC8211, 0x000ffff0 },
diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h
index e9f1075f7d4c..2652245631d1 100644
--- a/drivers/net/vmxnet3/vmxnet3_int.h
+++ b/drivers/net/vmxnet3/vmxnet3_int.h
@@ -69,10 +69,10 @@
/*
* Version numbers
*/
-#define VMXNET3_DRIVER_VERSION_STRING "1.3.5.0-k"
+#define VMXNET3_DRIVER_VERSION_STRING "1.4.2.0-k"
/* a 32-bit int, each byte encode a verion number in VMXNET3_DRIVER_VERSION */
-#define VMXNET3_DRIVER_VERSION_NUM 0x01030500
+#define VMXNET3_DRIVER_VERSION_NUM 0x01040200
#if defined(CONFIG_PCI_MSI)
/* RSS only makes sense if MSI-X is supported. */
diff --git a/drivers/net/wireless/ath/wil6210/main.c b/drivers/net/wireless/ath/wil6210/main.c
index b9febab89167..6ca6193ab8a6 100644
--- a/drivers/net/wireless/ath/wil6210/main.c
+++ b/drivers/net/wireless/ath/wil6210/main.c
@@ -62,7 +62,7 @@ static int mtu_max_set(const char *val, const struct kernel_param *kp)
return ret;
}
-static struct kernel_param_ops mtu_max_ops = {
+static const struct kernel_param_ops mtu_max_ops = {
.set = mtu_max_set,
.get = param_get_uint,
};
@@ -91,7 +91,7 @@ static int ring_order_set(const char *val, const struct kernel_param *kp)
return 0;
}
-static struct kernel_param_ops ring_order_ops = {
+static const struct kernel_param_ops ring_order_ops = {
.set = ring_order_set,
.get = param_get_uint,
};
diff --git a/drivers/net/wireless/libertas_tf/if_usb.c b/drivers/net/wireless/libertas_tf/if_usb.c
index 1a20cee5febe..799a2efe5793 100644
--- a/drivers/net/wireless/libertas_tf/if_usb.c
+++ b/drivers/net/wireless/libertas_tf/if_usb.c
@@ -821,15 +821,15 @@ static int if_usb_prog_firmware(struct if_usb_card *cardp)
lbtf_deb_enter(LBTF_DEB_USB);
- kparam_block_sysfs_write(fw_name);
+ kernel_param_lock(THIS_MODULE);
ret = request_firmware(&cardp->fw, lbtf_fw_name, &cardp->udev->dev);
if (ret < 0) {
pr_err("request_firmware() failed with %#x\n", ret);
pr_err("firmware %s not found\n", lbtf_fw_name);
- kparam_unblock_sysfs_write(fw_name);
+ kernel_param_unlock(THIS_MODULE);
goto done;
}
- kparam_unblock_sysfs_write(fw_name);
+ kernel_param_unlock(THIS_MODULE);
if (check_fwfile_format(cardp->fw->data, cardp->fw->size))
goto release_fw;
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 5485f91294e7..880d0d63e872 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -44,9 +44,9 @@
#include <xen/xen.h>
#include <xen/events.h>
#include <xen/interface/memory.h>
+#include <xen/page.h>
#include <asm/xen/hypercall.h>
-#include <asm/xen/page.h>
/* Provide an option to disable split event channels at load time as
* event channels are limited resource. Split event channels are
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 56d8afd11077..f948c46d5132 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -45,7 +45,6 @@
#include <linux/slab.h>
#include <net/ip.h>
-#include <asm/xen/page.h>
#include <xen/xen.h>
#include <xen/xenbus.h>
#include <xen/events.h>
@@ -1245,10 +1244,6 @@ static struct net_device *xennet_create_dev(struct xenbus_device *dev)
np = netdev_priv(netdev);
np->xbdev = dev;
- /* No need to use rtnl_lock() before the call below as it
- * happens before register_netdev().
- */
- netif_set_real_num_tx_queues(netdev, 0);
np->queues = NULL;
err = -ENOMEM;
@@ -1900,9 +1895,6 @@ abort_transaction_no_dev_fatal:
xennet_disconnect_backend(info);
kfree(info->queues);
info->queues = NULL;
- rtnl_lock();
- netif_set_real_num_tx_queues(info->netdev, 0);
- rtnl_unlock();
out:
return err;
}
diff --git a/drivers/pci/host/pci-keystone.c b/drivers/pci/host/pci-keystone.c
index b75d684aefcd..734da589cdfb 100644
--- a/drivers/pci/host/pci-keystone.c
+++ b/drivers/pci/host/pci-keystone.c
@@ -221,10 +221,9 @@ static void ks_pcie_setup_interrupts(struct keystone_pcie *ks_pcie)
/* MSI IRQ */
if (IS_ENABLED(CONFIG_PCI_MSI)) {
for (i = 0; i < ks_pcie->num_msi_host_irqs; i++) {
- irq_set_chained_handler(ks_pcie->msi_host_irqs[i],
- ks_pcie_msi_irq_handler);
- irq_set_handler_data(ks_pcie->msi_host_irqs[i],
- ks_pcie);
+ irq_set_chained_handler_and_data(ks_pcie->msi_host_irqs[i],
+ ks_pcie_msi_irq_handler,
+ ks_pcie);
}
}
}
diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c
index 240f38872085..8b7a900cd28b 100644
--- a/drivers/pci/xen-pcifront.c
+++ b/drivers/pci/xen-pcifront.c
@@ -20,6 +20,7 @@
#include <linux/workqueue.h>
#include <linux/bitops.h>
#include <linux/time.h>
+#include <linux/ktime.h>
#include <xen/platform_pci.h>
#include <asm/xen/swiotlb-xen.h>
@@ -115,7 +116,6 @@ static int do_pci_op(struct pcifront_device *pdev, struct xen_pci_op *op)
evtchn_port_t port = pdev->evtchn;
unsigned irq = pdev->irq;
s64 ns, ns_timeout;
- struct timeval tv;
spin_lock_irqsave(&pdev->sh_info_lock, irq_flags);
@@ -132,8 +132,7 @@ static int do_pci_op(struct pcifront_device *pdev, struct xen_pci_op *op)
* (in the latter case we end up continually re-executing poll() with a
* timeout in the past). 1s difference gives plenty of slack for error.
*/
- do_gettimeofday(&tv);
- ns_timeout = timeval_to_ns(&tv) + 2 * (s64)NSEC_PER_SEC;
+ ns_timeout = ktime_get_ns() + 2 * (s64)NSEC_PER_SEC;
xen_clear_irq_pending(irq);
@@ -141,8 +140,7 @@ static int do_pci_op(struct pcifront_device *pdev, struct xen_pci_op *op)
(unsigned long *)&pdev->sh_info->flags)) {
xen_poll_irq_timeout(irq, jiffies + 3*HZ);
xen_clear_irq_pending(irq);
- do_gettimeofday(&tv);
- ns = timeval_to_ns(&tv);
+ ns = ktime_get_ns();
if (ns > ns_timeout) {
dev_err(&pdev->xdev->dev,
"pciback not responding!!!\n");
diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c
index c4fc77aa766e..ad1ea1695b4a 100644
--- a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c
+++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c
@@ -1351,8 +1351,7 @@ int mtk_pctrl_init(struct platform_device *pdev,
set_irq_flags(virq, IRQF_VALID);
};
- irq_set_chained_handler(irq, mtk_eint_irq_handler);
- irq_set_handler_data(irq, pctl);
+ irq_set_chained_handler_and_data(irq, mtk_eint_irq_handler, pctl);
set_irq_flags(irq, IRQF_VALID);
return 0;
diff --git a/drivers/pinctrl/pinctrl-adi2.c b/drivers/pinctrl/pinctrl-adi2.c
index 873433da0f2c..c3c3d2345fc6 100644
--- a/drivers/pinctrl/pinctrl-adi2.c
+++ b/drivers/pinctrl/pinctrl-adi2.c
@@ -865,8 +865,8 @@ static int adi_gpio_pint_probe(struct platform_device *pdev)
pint->pint_map_port = adi_pint_map_port;
platform_set_drvdata(pdev, pint);
- irq_set_chained_handler(pint->irq, adi_gpio_handle_pint_irq);
- irq_set_handler_data(pint->irq, pint);
+ irq_set_chained_handler_and_data(pint->irq, adi_gpio_handle_pint_irq,
+ pint);
list_add_tail(&pint->node, &adi_pint_list);
diff --git a/drivers/pinctrl/pinctrl-st.c b/drivers/pinctrl/pinctrl-st.c
index d34ac879af9e..c262e5f35c28 100644
--- a/drivers/pinctrl/pinctrl-st.c
+++ b/drivers/pinctrl/pinctrl-st.c
@@ -1661,8 +1661,8 @@ static int st_pctl_probe_dt(struct platform_device *pdev,
if (IS_ERR(info->irqmux_base))
return PTR_ERR(info->irqmux_base);
- irq_set_chained_handler(irq, st_gpio_irqmux_handler);
- irq_set_handler_data(irq, info);
+ irq_set_chained_handler_and_data(irq, st_gpio_irqmux_handler,
+ info);
}
diff --git a/drivers/pinctrl/samsung/pinctrl-exynos.c b/drivers/pinctrl/samsung/pinctrl-exynos.c
index 0b7afa50121a..b18dabba03a4 100644
--- a/drivers/pinctrl/samsung/pinctrl-exynos.c
+++ b/drivers/pinctrl/samsung/pinctrl-exynos.c
@@ -563,8 +563,8 @@ static int exynos_eint_wkup_init(struct samsung_pinctrl_drv_data *d)
return -ENOMEM;
}
- irq_set_chained_handler(irq, exynos_irq_demux_eint16_31);
- irq_set_handler_data(irq, muxed_data);
+ irq_set_chained_handler_and_data(irq, exynos_irq_demux_eint16_31,
+ muxed_data);
bank = d->pin_banks;
idx = 0;
diff --git a/drivers/pinctrl/samsung/pinctrl-s3c24xx.c b/drivers/pinctrl/samsung/pinctrl-s3c24xx.c
index f1993f42114c..01b43dbfb795 100644
--- a/drivers/pinctrl/samsung/pinctrl-s3c24xx.c
+++ b/drivers/pinctrl/samsung/pinctrl-s3c24xx.c
@@ -514,8 +514,7 @@ static int s3c24xx_eint_init(struct samsung_pinctrl_drv_data *d)
}
eint_data->parents[i] = irq;
- irq_set_chained_handler(irq, handlers[i]);
- irq_set_handler_data(irq, eint_data);
+ irq_set_chained_handler_and_data(irq, handlers[i], eint_data);
}
bank = d->pin_banks;
diff --git a/drivers/pinctrl/samsung/pinctrl-s3c64xx.c b/drivers/pinctrl/samsung/pinctrl-s3c64xx.c
index 7756c1e9e763..ec8cc3b47621 100644
--- a/drivers/pinctrl/samsung/pinctrl-s3c64xx.c
+++ b/drivers/pinctrl/samsung/pinctrl-s3c64xx.c
@@ -506,8 +506,7 @@ static int s3c64xx_eint_gpio_init(struct samsung_pinctrl_drv_data *d)
data->domains[nr_domains++] = bank->irq_domain;
}
- irq_set_chained_handler(d->irq, s3c64xx_eint_gpio_irq);
- irq_set_handler_data(d->irq, data);
+ irq_set_chained_handler_and_data(d->irq, s3c64xx_eint_gpio_irq, data);
return 0;
}
@@ -731,8 +730,9 @@ static int s3c64xx_eint_eint0_init(struct samsung_pinctrl_drv_data *d)
return -ENXIO;
}
- irq_set_chained_handler(irq, s3c64xx_eint0_handlers[i]);
- irq_set_handler_data(irq, data);
+ irq_set_chained_handler_and_data(irq,
+ s3c64xx_eint0_handlers[i],
+ data);
}
bank = d->pin_banks;
diff --git a/drivers/pinctrl/sunxi/pinctrl-sunxi.c b/drivers/pinctrl/sunxi/pinctrl-sunxi.c
index d7857c72e627..f09573e13203 100644
--- a/drivers/pinctrl/sunxi/pinctrl-sunxi.c
+++ b/drivers/pinctrl/sunxi/pinctrl-sunxi.c
@@ -1005,9 +1005,9 @@ int sunxi_pinctrl_init(struct platform_device *pdev,
writel(0xffffffff,
pctl->membase + sunxi_irq_status_reg_from_bank(i));
- irq_set_chained_handler(pctl->irq[i],
- sunxi_pinctrl_irq_handler);
- irq_set_handler_data(pctl->irq[i], pctl);
+ irq_set_chained_handler_and_data(pctl->irq[i],
+ sunxi_pinctrl_irq_handler,
+ pctl);
}
dev_info(&pdev->dev, "initialized sunXi PIO driver\n");
diff --git a/drivers/power/test_power.c b/drivers/power/test_power.c
index f986e0cca7ac..83c42ea88f2b 100644
--- a/drivers/power/test_power.c
+++ b/drivers/power/test_power.c
@@ -448,42 +448,42 @@ static int param_set_battery_voltage(const char *key,
#define param_get_battery_voltage param_get_int
-static struct kernel_param_ops param_ops_ac_online = {
+static const struct kernel_param_ops param_ops_ac_online = {
.set = param_set_ac_online,
.get = param_get_ac_online,
};
-static struct kernel_param_ops param_ops_usb_online = {
+static const struct kernel_param_ops param_ops_usb_online = {
.set = param_set_usb_online,
.get = param_get_usb_online,
};
-static struct kernel_param_ops param_ops_battery_status = {
+static const struct kernel_param_ops param_ops_battery_status = {
.set = param_set_battery_status,
.get = param_get_battery_status,
};
-static struct kernel_param_ops param_ops_battery_present = {
+static const struct kernel_param_ops param_ops_battery_present = {
.set = param_set_battery_present,
.get = param_get_battery_present,
};
-static struct kernel_param_ops param_ops_battery_technology = {
+static const struct kernel_param_ops param_ops_battery_technology = {
.set = param_set_battery_technology,
.get = param_get_battery_technology,
};
-static struct kernel_param_ops param_ops_battery_health = {
+static const struct kernel_param_ops param_ops_battery_health = {
.set = param_set_battery_health,
.get = param_get_battery_health,
};
-static struct kernel_param_ops param_ops_battery_capacity = {
+static const struct kernel_param_ops param_ops_battery_capacity = {
.set = param_set_battery_capacity,
.get = param_get_battery_capacity,
};
-static struct kernel_param_ops param_ops_battery_voltage = {
+static const struct kernel_param_ops param_ops_battery_voltage = {
.set = param_set_battery_voltage,
.get = param_get_battery_voltage,
};
diff --git a/drivers/s390/char/sclp_cmd.c b/drivers/s390/char/sclp_cmd.c
index f74c040d5c10..e9485fbbb373 100644
--- a/drivers/s390/char/sclp_cmd.c
+++ b/drivers/s390/char/sclp_cmd.c
@@ -92,8 +92,8 @@ struct read_cpu_info_sccb {
u8 reserved[4096 - 16];
} __attribute__((packed, aligned(PAGE_SIZE)));
-static void sclp_fill_cpu_info(struct sclp_cpu_info *info,
- struct read_cpu_info_sccb *sccb)
+static void sclp_fill_core_info(struct sclp_core_info *info,
+ struct read_cpu_info_sccb *sccb)
{
char *page = (char *) sccb;
@@ -101,12 +101,11 @@ static void sclp_fill_cpu_info(struct sclp_cpu_info *info,
info->configured = sccb->nr_configured;
info->standby = sccb->nr_standby;
info->combined = sccb->nr_configured + sccb->nr_standby;
- info->has_cpu_type = sclp.has_cpu_type;
- memcpy(&info->cpu, page + sccb->offset_configured,
- info->combined * sizeof(struct sclp_cpu_entry));
+ memcpy(&info->core, page + sccb->offset_configured,
+ info->combined * sizeof(struct sclp_core_entry));
}
-int sclp_get_cpu_info(struct sclp_cpu_info *info)
+int sclp_get_core_info(struct sclp_core_info *info)
{
int rc;
struct read_cpu_info_sccb *sccb;
@@ -127,7 +126,7 @@ int sclp_get_cpu_info(struct sclp_cpu_info *info)
rc = -EIO;
goto out;
}
- sclp_fill_cpu_info(info, sccb);
+ sclp_fill_core_info(info, sccb);
out:
free_page((unsigned long) sccb);
return rc;
@@ -137,7 +136,7 @@ struct cpu_configure_sccb {
struct sccb_header header;
} __attribute__((packed, aligned(8)));
-static int do_cpu_configure(sclp_cmdw_t cmd)
+static int do_core_configure(sclp_cmdw_t cmd)
{
struct cpu_configure_sccb *sccb;
int rc;
@@ -171,14 +170,14 @@ out:
return rc;
}
-int sclp_cpu_configure(u8 cpu)
+int sclp_core_configure(u8 core)
{
- return do_cpu_configure(SCLP_CMDW_CONFIGURE_CPU | cpu << 8);
+ return do_core_configure(SCLP_CMDW_CONFIGURE_CPU | core << 8);
}
-int sclp_cpu_deconfigure(u8 cpu)
+int sclp_core_deconfigure(u8 core)
{
- return do_cpu_configure(SCLP_CMDW_DECONFIGURE_CPU | cpu << 8);
+ return do_core_configure(SCLP_CMDW_DECONFIGURE_CPU | core << 8);
}
#ifdef CONFIG_MEMORY_HOTPLUG
diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c
index d7f696d95597..aeed7969fd79 100644
--- a/drivers/s390/char/sclp_early.c
+++ b/drivers/s390/char/sclp_early.c
@@ -98,7 +98,7 @@ static int __init sclp_read_info_early(struct read_info_sccb *sccb)
static void __init sclp_facilities_detect(struct read_info_sccb *sccb)
{
- struct sclp_cpu_entry *cpue;
+ struct sclp_core_entry *cpue;
u16 boot_cpu_address, cpu;
if (sclp_read_info_early(sccb))
@@ -106,7 +106,7 @@ static void __init sclp_facilities_detect(struct read_info_sccb *sccb)
sclp.facilities = sccb->facilities;
sclp.has_sprp = !!(sccb->fac84 & 0x02);
- sclp.has_cpu_type = !!(sccb->fac84 & 0x01);
+ sclp.has_core_type = !!(sccb->fac84 & 0x01);
if (sccb->fac85 & 0x02)
S390_lowcore.machine_flags |= MACHINE_FLAG_ESOP;
sclp.rnmax = sccb->rnmax ? sccb->rnmax : sccb->rnmax2;
@@ -116,11 +116,11 @@ static void __init sclp_facilities_detect(struct read_info_sccb *sccb)
if (!sccb->hcpua) {
if (MACHINE_IS_VM)
- sclp.max_cpu = 64;
+ sclp.max_cores = 64;
else
- sclp.max_cpu = sccb->ncpurl;
+ sclp.max_cores = sccb->ncpurl;
} else {
- sclp.max_cpu = sccb->hcpua + 1;
+ sclp.max_cores = sccb->hcpua + 1;
}
boot_cpu_address = stap();
diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c
index 9a3dd95029cc..823f41fc4bbd 100644
--- a/drivers/s390/char/zcore.c
+++ b/drivers/s390/char/zcore.c
@@ -154,7 +154,7 @@ static int __init init_cpu_info(enum arch_id arch)
/* get info for boot cpu from lowcore, stored in the HSA */
- sa_ext = dump_save_area_create(0);
+ sa_ext = dump_save_areas.areas[0];
if (!sa_ext)
return -ENOMEM;
if (memcpy_hsa_kernel(&sa_ext->sa, sys_info.sa_base,
diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c
index 3ba611419759..559a9dcdb15d 100644
--- a/drivers/s390/crypto/ap_bus.c
+++ b/drivers/s390/crypto/ap_bus.c
@@ -60,7 +60,7 @@ static int __ap_poll_device(struct ap_device *ap_dev, unsigned long *flags);
static int ap_device_remove(struct device *dev);
static int ap_device_probe(struct device *dev);
static void ap_interrupt_handler(struct airq_struct *airq);
-static void ap_reset(struct ap_device *ap_dev);
+static void ap_reset(struct ap_device *ap_dev, unsigned long *flags);
static void ap_config_timeout(unsigned long ptr);
static int ap_select_domain(void);
static void ap_query_configuration(void);
@@ -310,35 +310,26 @@ static inline int __ap_query_configuration(struct ap_config_info *config)
static int ap_query_functions(ap_qid_t qid, unsigned int *functions)
{
struct ap_queue_status status;
- int i;
+
status = __ap_query_functions(qid, functions);
- for (i = 0; i < AP_MAX_RESET; i++) {
- if (ap_queue_status_invalid_test(&status))
- return -ENODEV;
+ if (ap_queue_status_invalid_test(&status))
+ return -ENODEV;
- switch (status.response_code) {
- case AP_RESPONSE_NORMAL:
- return 0;
- case AP_RESPONSE_RESET_IN_PROGRESS:
- case AP_RESPONSE_BUSY:
- break;
- case AP_RESPONSE_Q_NOT_AVAIL:
- case AP_RESPONSE_DECONFIGURED:
- case AP_RESPONSE_CHECKSTOPPED:
- case AP_RESPONSE_INVALID_ADDRESS:
- return -ENODEV;
- case AP_RESPONSE_OTHERWISE_CHANGED:
- break;
- default:
- break;
- }
- if (i < AP_MAX_RESET - 1) {
- udelay(5);
- status = __ap_query_functions(qid, functions);
- }
+ switch (status.response_code) {
+ case AP_RESPONSE_NORMAL:
+ return 0;
+ case AP_RESPONSE_Q_NOT_AVAIL:
+ case AP_RESPONSE_DECONFIGURED:
+ case AP_RESPONSE_CHECKSTOPPED:
+ case AP_RESPONSE_INVALID_ADDRESS:
+ return -ENODEV;
+ case AP_RESPONSE_RESET_IN_PROGRESS:
+ case AP_RESPONSE_BUSY:
+ case AP_RESPONSE_OTHERWISE_CHANGED:
+ default:
+ return -EBUSY;
}
- return -EBUSY;
}
/**
@@ -350,47 +341,25 @@ static int ap_query_functions(ap_qid_t qid, unsigned int *functions)
* on the return value it waits a while and tests the AP queue if interrupts
* have been switched on using ap_test_queue().
*/
-static int ap_queue_enable_interruption(ap_qid_t qid, void *ind)
+static int ap_queue_enable_interruption(struct ap_device *ap_dev, void *ind)
{
struct ap_queue_status status;
- int t_depth, t_device_type, rc, i;
- rc = -EBUSY;
- status = ap_queue_interruption_control(qid, ind);
-
- for (i = 0; i < AP_MAX_RESET; i++) {
- switch (status.response_code) {
- case AP_RESPONSE_NORMAL:
- if (status.int_enabled)
- return 0;
- break;
- case AP_RESPONSE_RESET_IN_PROGRESS:
- case AP_RESPONSE_BUSY:
- if (i < AP_MAX_RESET - 1) {
- udelay(5);
- status = ap_queue_interruption_control(qid,
- ind);
- continue;
- }
- break;
- case AP_RESPONSE_Q_NOT_AVAIL:
- case AP_RESPONSE_DECONFIGURED:
- case AP_RESPONSE_CHECKSTOPPED:
- case AP_RESPONSE_INVALID_ADDRESS:
- return -ENODEV;
- case AP_RESPONSE_OTHERWISE_CHANGED:
- if (status.int_enabled)
- return 0;
- break;
- default:
- break;
- }
- if (i < AP_MAX_RESET - 1) {
- udelay(5);
- status = ap_test_queue(qid, &t_depth, &t_device_type);
- }
+ status = ap_queue_interruption_control(ap_dev->qid, ind);
+ switch (status.response_code) {
+ case AP_RESPONSE_NORMAL:
+ case AP_RESPONSE_OTHERWISE_CHANGED:
+ return 0;
+ case AP_RESPONSE_Q_NOT_AVAIL:
+ case AP_RESPONSE_DECONFIGURED:
+ case AP_RESPONSE_CHECKSTOPPED:
+ case AP_RESPONSE_INVALID_ADDRESS:
+ return -ENODEV;
+ case AP_RESPONSE_RESET_IN_PROGRESS:
+ case AP_RESPONSE_BUSY:
+ default:
+ return -EBUSY;
}
- return rc;
}
/**
@@ -511,109 +480,94 @@ int ap_recv(ap_qid_t qid, unsigned long long *psmid, void *msg, size_t length)
EXPORT_SYMBOL(ap_recv);
/**
+ * __ap_schedule_poll_timer(): Schedule poll timer.
+ *
+ * Set up the timer to run the poll tasklet
+ */
+static inline void __ap_schedule_poll_timer(void)
+{
+ ktime_t hr_time;
+
+ spin_lock_bh(&ap_poll_timer_lock);
+ if (!hrtimer_is_queued(&ap_poll_timer) && !ap_suspend_flag) {
+ hr_time = ktime_set(0, poll_timeout);
+ hrtimer_forward_now(&ap_poll_timer, hr_time);
+ hrtimer_restart(&ap_poll_timer);
+ }
+ spin_unlock_bh(&ap_poll_timer_lock);
+}
+
+/**
+ * ap_schedule_poll_timer(): Schedule poll timer.
+ *
+ * Set up the timer to run the poll tasklet
+ */
+static inline void ap_schedule_poll_timer(void)
+{
+ if (ap_using_interrupts())
+ return;
+ __ap_schedule_poll_timer();
+}
+
+
+/**
* ap_query_queue(): Check if an AP queue is available.
* @qid: The AP queue number
* @queue_depth: Pointer to queue depth value
* @device_type: Pointer to device type value
- *
- * The test is repeated for AP_MAX_RESET times.
*/
static int ap_query_queue(ap_qid_t qid, int *queue_depth, int *device_type)
{
struct ap_queue_status status;
- int t_depth, t_device_type, rc, i;
+ int t_depth, t_device_type;
- rc = -EBUSY;
- for (i = 0; i < AP_MAX_RESET; i++) {
- status = ap_test_queue(qid, &t_depth, &t_device_type);
- switch (status.response_code) {
- case AP_RESPONSE_NORMAL:
- *queue_depth = t_depth + 1;
- *device_type = t_device_type;
- rc = 0;
- break;
- case AP_RESPONSE_Q_NOT_AVAIL:
- rc = -ENODEV;
- break;
- case AP_RESPONSE_RESET_IN_PROGRESS:
- break;
- case AP_RESPONSE_DECONFIGURED:
- rc = -ENODEV;
- break;
- case AP_RESPONSE_CHECKSTOPPED:
- rc = -ENODEV;
- break;
- case AP_RESPONSE_INVALID_ADDRESS:
- rc = -ENODEV;
- break;
- case AP_RESPONSE_OTHERWISE_CHANGED:
- break;
- case AP_RESPONSE_BUSY:
- break;
- default:
- BUG();
- }
- if (rc != -EBUSY)
- break;
- if (i < AP_MAX_RESET - 1)
- udelay(5);
+ status = ap_test_queue(qid, &t_depth, &t_device_type);
+ switch (status.response_code) {
+ case AP_RESPONSE_NORMAL:
+ *queue_depth = t_depth + 1;
+ *device_type = t_device_type;
+ return 0;
+ case AP_RESPONSE_Q_NOT_AVAIL:
+ case AP_RESPONSE_DECONFIGURED:
+ case AP_RESPONSE_CHECKSTOPPED:
+ case AP_RESPONSE_INVALID_ADDRESS:
+ return -ENODEV;
+ case AP_RESPONSE_RESET_IN_PROGRESS:
+ case AP_RESPONSE_OTHERWISE_CHANGED:
+ case AP_RESPONSE_BUSY:
+ return -EBUSY;
+ default:
+ BUG();
}
- return rc;
}
/**
* ap_init_queue(): Reset an AP queue.
* @qid: The AP queue number
*
- * Reset an AP queue and wait for it to become available again.
+ * Submit the Reset command to an AP queue.
+ * Since the reset is asynchron set the state to 'RESET_IN_PROGRESS'
+ * and check later via ap_poll_queue() if the reset is done.
*/
-static int ap_init_queue(ap_qid_t qid)
+static int ap_init_queue(struct ap_device *ap_dev)
{
struct ap_queue_status status;
- int rc, dummy, i;
- rc = -ENODEV;
- status = ap_reset_queue(qid);
- for (i = 0; i < AP_MAX_RESET; i++) {
- switch (status.response_code) {
- case AP_RESPONSE_NORMAL:
- if (status.queue_empty)
- rc = 0;
- break;
- case AP_RESPONSE_Q_NOT_AVAIL:
- case AP_RESPONSE_DECONFIGURED:
- case AP_RESPONSE_CHECKSTOPPED:
- i = AP_MAX_RESET; /* return with -ENODEV */
- break;
- case AP_RESPONSE_RESET_IN_PROGRESS:
- rc = -EBUSY;
- case AP_RESPONSE_BUSY:
- default:
- break;
- }
- if (rc != -ENODEV && rc != -EBUSY)
- break;
- if (i < AP_MAX_RESET - 1) {
- /* Time we are waiting until we give up (0.7sec * 90).
- * Since the actual request (in progress) will not
- * interrupted immediately for the reset command,
- * we have to be patient. In worst case we have to
- * wait 60sec + reset time (some msec).
- */
- schedule_timeout(AP_RESET_TIMEOUT);
- status = ap_test_queue(qid, &dummy, &dummy);
- }
- }
- if (rc == 0 && ap_using_interrupts()) {
- rc = ap_queue_enable_interruption(qid, ap_airq.lsi_ptr);
- /* If interruption mode is supported by the machine,
- * but an AP can not be enabled for interruption then
- * the AP will be discarded. */
- if (rc)
- pr_err("Registering adapter interrupts for "
- "AP %d failed\n", AP_QID_DEVICE(qid));
+ status = ap_reset_queue(ap_dev->qid);
+ switch (status.response_code) {
+ case AP_RESPONSE_NORMAL:
+ ap_dev->interrupt = AP_INTR_DISABLED;
+ ap_dev->reset = AP_RESET_IN_PROGRESS;
+ return 0;
+ case AP_RESPONSE_RESET_IN_PROGRESS:
+ case AP_RESPONSE_BUSY:
+ return -EBUSY;
+ case AP_RESPONSE_Q_NOT_AVAIL:
+ case AP_RESPONSE_DECONFIGURED:
+ case AP_RESPONSE_CHECKSTOPPED:
+ default:
+ return -ENODEV;
}
- return rc;
}
/**
@@ -729,10 +683,63 @@ static ssize_t ap_pendingq_count_show(struct device *dev,
static DEVICE_ATTR(pendingq_count, 0444, ap_pendingq_count_show, NULL);
+static ssize_t ap_reset_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct ap_device *ap_dev = to_ap_dev(dev);
+ int rc = 0;
+
+ spin_lock_bh(&ap_dev->lock);
+ switch (ap_dev->reset) {
+ case AP_RESET_IGNORE:
+ rc = snprintf(buf, PAGE_SIZE, "No Reset Timer set.\n");
+ break;
+ case AP_RESET_ARMED:
+ rc = snprintf(buf, PAGE_SIZE, "Reset Timer armed.\n");
+ break;
+ case AP_RESET_DO:
+ rc = snprintf(buf, PAGE_SIZE, "Reset Timer expired.\n");
+ break;
+ case AP_RESET_IN_PROGRESS:
+ rc = snprintf(buf, PAGE_SIZE, "Reset in progress.\n");
+ break;
+ default:
+ break;
+ }
+ spin_unlock_bh(&ap_dev->lock);
+ return rc;
+}
+
+static DEVICE_ATTR(reset, 0444, ap_reset_show, NULL);
+
+static ssize_t ap_interrupt_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct ap_device *ap_dev = to_ap_dev(dev);
+ int rc = 0;
+
+ spin_lock_bh(&ap_dev->lock);
+ switch (ap_dev->interrupt) {
+ case AP_INTR_DISABLED:
+ rc = snprintf(buf, PAGE_SIZE, "Interrupts disabled.\n");
+ break;
+ case AP_INTR_ENABLED:
+ rc = snprintf(buf, PAGE_SIZE, "Interrupts enabled.\n");
+ break;
+ case AP_INTR_IN_PROGRESS:
+ rc = snprintf(buf, PAGE_SIZE, "Enable Interrupt pending.\n");
+ break;
+ }
+ spin_unlock_bh(&ap_dev->lock);
+ return rc;
+}
+
+static DEVICE_ATTR(interrupt, 0444, ap_interrupt_show, NULL);
+
static ssize_t ap_modalias_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
- return sprintf(buf, "ap:t%02X", to_ap_dev(dev)->device_type);
+ return sprintf(buf, "ap:t%02X\n", to_ap_dev(dev)->device_type);
}
static DEVICE_ATTR(modalias, 0444, ap_modalias_show, NULL);
@@ -753,6 +760,8 @@ static struct attribute *ap_dev_attrs[] = {
&dev_attr_request_count.attr,
&dev_attr_requestq_count.attr,
&dev_attr_pendingq_count.attr,
+ &dev_attr_reset.attr,
+ &dev_attr_interrupt.attr,
&dev_attr_modalias.attr,
&dev_attr_ap_functions.attr,
NULL
@@ -926,6 +935,10 @@ static int ap_device_probe(struct device *dev)
spin_lock_bh(&ap_device_list_lock);
list_del_init(&ap_dev->list);
spin_unlock_bh(&ap_device_list_lock);
+ } else {
+ if (ap_dev->reset == AP_RESET_IN_PROGRESS ||
+ ap_dev->interrupt == AP_INTR_IN_PROGRESS)
+ __ap_schedule_poll_timer();
}
return rc;
}
@@ -1411,7 +1424,7 @@ static void ap_scan_bus(struct work_struct *unused)
struct ap_device *ap_dev;
struct device *dev;
ap_qid_t qid;
- int queue_depth, device_type;
+ int queue_depth = 0, device_type = 0;
unsigned int device_functions;
int rc, i;
@@ -1429,15 +1442,9 @@ static void ap_scan_bus(struct work_struct *unused)
else
rc = -ENODEV;
if (dev) {
- if (rc == -EBUSY) {
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(AP_RESET_TIMEOUT);
- rc = ap_query_queue(qid, &queue_depth,
- &device_type);
- }
ap_dev = to_ap_dev(dev);
spin_lock_bh(&ap_dev->lock);
- if (rc || ap_dev->unregistered) {
+ if (rc == -ENODEV || ap_dev->unregistered) {
spin_unlock_bh(&ap_dev->lock);
if (ap_dev->unregistered)
i--;
@@ -1451,13 +1458,15 @@ static void ap_scan_bus(struct work_struct *unused)
}
if (rc)
continue;
- rc = ap_init_queue(qid);
- if (rc)
- continue;
ap_dev = kzalloc(sizeof(*ap_dev), GFP_KERNEL);
if (!ap_dev)
break;
ap_dev->qid = qid;
+ rc = ap_init_queue(ap_dev);
+ if ((rc != 0) && (rc != -EBUSY)) {
+ kfree(ap_dev);
+ continue;
+ }
ap_dev->queue_depth = queue_depth;
ap_dev->unregistered = 1;
spin_lock_init(&ap_dev->lock);
@@ -1520,36 +1529,6 @@ ap_config_timeout(unsigned long ptr)
}
/**
- * __ap_schedule_poll_timer(): Schedule poll timer.
- *
- * Set up the timer to run the poll tasklet
- */
-static inline void __ap_schedule_poll_timer(void)
-{
- ktime_t hr_time;
-
- spin_lock_bh(&ap_poll_timer_lock);
- if (!hrtimer_is_queued(&ap_poll_timer) && !ap_suspend_flag) {
- hr_time = ktime_set(0, poll_timeout);
- hrtimer_forward_now(&ap_poll_timer, hr_time);
- hrtimer_restart(&ap_poll_timer);
- }
- spin_unlock_bh(&ap_poll_timer_lock);
-}
-
-/**
- * ap_schedule_poll_timer(): Schedule poll timer.
- *
- * Set up the timer to run the poll tasklet
- */
-static inline void ap_schedule_poll_timer(void)
-{
- if (ap_using_interrupts())
- return;
- __ap_schedule_poll_timer();
-}
-
-/**
* ap_poll_read(): Receive pending reply messages from an AP device.
* @ap_dev: pointer to the AP device
* @flags: pointer to control flags, bit 2^0 is set if another poll is
@@ -1568,6 +1547,7 @@ static int ap_poll_read(struct ap_device *ap_dev, unsigned long *flags)
ap_dev->reply->message, ap_dev->reply->length);
switch (status.response_code) {
case AP_RESPONSE_NORMAL:
+ ap_dev->interrupt = status.int_enabled;
atomic_dec(&ap_poll_requests);
ap_decrease_queue_count(ap_dev);
list_for_each_entry(ap_msg, &ap_dev->pendingq, list) {
@@ -1582,6 +1562,7 @@ static int ap_poll_read(struct ap_device *ap_dev, unsigned long *flags)
*flags |= 1;
break;
case AP_RESPONSE_NO_PENDING_REPLY:
+ ap_dev->interrupt = status.int_enabled;
if (status.queue_empty) {
/* The card shouldn't forget requests but who knows. */
atomic_sub(ap_dev->queue_count, &ap_poll_requests);
@@ -1612,7 +1593,8 @@ static int ap_poll_write(struct ap_device *ap_dev, unsigned long *flags)
struct ap_message *ap_msg;
if (ap_dev->requestq_count <= 0 ||
- ap_dev->queue_count >= ap_dev->queue_depth)
+ (ap_dev->queue_count >= ap_dev->queue_depth) ||
+ (ap_dev->reset == AP_RESET_IN_PROGRESS))
return 0;
/* Start the next request on the queue. */
ap_msg = list_entry(ap_dev->requestq.next, struct ap_message, list);
@@ -1646,6 +1628,8 @@ static int ap_poll_write(struct ap_device *ap_dev, unsigned long *flags)
/**
* ap_poll_queue(): Poll AP device for pending replies and send new messages.
+ * Check if the queue has a pending reset. In case it's done re-enable
+ * interrupts, otherwise reschedule the poll_timer for another attempt.
* @ap_dev: pointer to the bus device
* @flags: pointer to control flags, bit 2^0 is set if another poll is
* required, bit 2^1 is set if the poll timer needs to get armed
@@ -1656,7 +1640,51 @@ static int ap_poll_write(struct ap_device *ap_dev, unsigned long *flags)
*/
static inline int ap_poll_queue(struct ap_device *ap_dev, unsigned long *flags)
{
- int rc;
+ int rc, depth, type;
+ struct ap_queue_status status;
+
+
+ if (ap_dev->reset == AP_RESET_IN_PROGRESS) {
+ status = ap_test_queue(ap_dev->qid, &depth, &type);
+ switch (status.response_code) {
+ case AP_RESPONSE_NORMAL:
+ ap_dev->reset = AP_RESET_IGNORE;
+ if (ap_using_interrupts()) {
+ rc = ap_queue_enable_interruption(
+ ap_dev, ap_airq.lsi_ptr);
+ if (!rc)
+ ap_dev->interrupt = AP_INTR_IN_PROGRESS;
+ else if (rc == -ENODEV) {
+ pr_err("Registering adapter interrupts for "
+ "AP %d failed\n", AP_QID_DEVICE(ap_dev->qid));
+ return rc;
+ }
+ }
+ /* fall through */
+ case AP_RESPONSE_BUSY:
+ case AP_RESPONSE_RESET_IN_PROGRESS:
+ *flags |= AP_POLL_AFTER_TIMEOUT;
+ break;
+ case AP_RESPONSE_Q_NOT_AVAIL:
+ case AP_RESPONSE_DECONFIGURED:
+ case AP_RESPONSE_CHECKSTOPPED:
+ return -ENODEV;
+ default:
+ break;
+ }
+ }
+
+ if ((ap_dev->reset != AP_RESET_IN_PROGRESS) &&
+ (ap_dev->interrupt == AP_INTR_IN_PROGRESS)) {
+ status = ap_test_queue(ap_dev->qid, &depth, &type);
+ if (ap_using_interrupts()) {
+ if (status.int_enabled == 1)
+ ap_dev->interrupt = AP_INTR_ENABLED;
+ else
+ *flags |= AP_POLL_AFTER_TIMEOUT;
+ } else
+ ap_dev->interrupt = AP_INTR_DISABLED;
+ }
rc = ap_poll_read(ap_dev, flags);
if (rc)
@@ -1676,7 +1704,8 @@ static int __ap_queue_message(struct ap_device *ap_dev, struct ap_message *ap_ms
struct ap_queue_status status;
if (list_empty(&ap_dev->requestq) &&
- ap_dev->queue_count < ap_dev->queue_depth) {
+ (ap_dev->queue_count < ap_dev->queue_depth) &&
+ (ap_dev->reset != AP_RESET_IN_PROGRESS)) {
status = __ap_send(ap_dev->qid, ap_msg->psmid,
ap_msg->message, ap_msg->length,
ap_msg->special);
@@ -1789,21 +1818,20 @@ static enum hrtimer_restart ap_poll_timeout(struct hrtimer *unused)
* Reset a not responding AP device and move all requests from the
* pending queue to the request queue.
*/
-static void ap_reset(struct ap_device *ap_dev)
+static void ap_reset(struct ap_device *ap_dev, unsigned long *flags)
{
int rc;
- ap_dev->reset = AP_RESET_IGNORE;
atomic_sub(ap_dev->queue_count, &ap_poll_requests);
ap_dev->queue_count = 0;
list_splice_init(&ap_dev->pendingq, &ap_dev->requestq);
ap_dev->requestq_count += ap_dev->pendingq_count;
ap_dev->pendingq_count = 0;
- rc = ap_init_queue(ap_dev->qid);
+ rc = ap_init_queue(ap_dev);
if (rc == -ENODEV)
ap_dev->unregistered = 1;
else
- __ap_schedule_poll_timer();
+ *flags |= AP_POLL_AFTER_TIMEOUT;
}
static int __ap_poll_device(struct ap_device *ap_dev, unsigned long *flags)
@@ -1812,7 +1840,7 @@ static int __ap_poll_device(struct ap_device *ap_dev, unsigned long *flags)
if (ap_poll_queue(ap_dev, flags))
ap_dev->unregistered = 1;
if (ap_dev->reset == AP_RESET_DO)
- ap_reset(ap_dev);
+ ap_reset(ap_dev, flags);
}
return 0;
}
@@ -1845,9 +1873,9 @@ static void ap_poll_all(unsigned long dummy)
spin_unlock(&ap_dev->lock);
}
spin_unlock(&ap_device_list_lock);
- } while (flags & 1);
- if (flags & 2)
- ap_schedule_poll_timer();
+ } while (flags & AP_POLL_IMMEDIATELY);
+ if (flags & AP_POLL_AFTER_TIMEOUT)
+ __ap_schedule_poll_timer();
}
/**
diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h
index 2737d261a324..00468c8d0781 100644
--- a/drivers/s390/crypto/ap_bus.h
+++ b/drivers/s390/crypto/ap_bus.h
@@ -32,11 +32,13 @@
#define AP_DEVICES 64 /* Number of AP devices. */
#define AP_DOMAINS 256 /* Number of AP domains. */
-#define AP_MAX_RESET 90 /* Maximum number of resets. */
#define AP_RESET_TIMEOUT (HZ*0.7) /* Time in ticks for reset timeouts. */
#define AP_CONFIG_TIME 30 /* Time in seconds between AP bus rescans. */
#define AP_POLL_TIME 1 /* Time in ticks between receive polls. */
+#define AP_POLL_IMMEDIATELY 1 /* continue running poll tasklet */
+#define AP_POLL_AFTER_TIMEOUT 2 /* run poll tasklet again after timout */
+
extern int ap_domain_index;
/**
@@ -135,6 +137,14 @@ static inline int ap_test_bit(unsigned int *ptr, unsigned int nr)
#define AP_RESET_IGNORE 0 /* request timeout will be ignored */
#define AP_RESET_ARMED 1 /* request timeout timer is active */
#define AP_RESET_DO 2 /* AP reset required */
+#define AP_RESET_IN_PROGRESS 3 /* AP reset in progress */
+
+/*
+ * AP interrupt states
+ */
+#define AP_INTR_DISABLED 0 /* AP interrupt disabled */
+#define AP_INTR_ENABLED 1 /* AP interrupt enabled */
+#define AP_INTR_IN_PROGRESS 3 /* AP interrupt in progress */
struct ap_device;
struct ap_message;
@@ -168,6 +178,7 @@ struct ap_device {
struct timer_list timeout; /* Timer for request timeouts. */
int reset; /* Reset required after req. timeout. */
+ int interrupt; /* indicate if interrupts are enabled */
int queue_count; /* # messages currently on AP queue. */
struct list_head pendingq; /* List of message sent to AP queue. */
diff --git a/drivers/s390/crypto/zcrypt_cex4.c b/drivers/s390/crypto/zcrypt_cex4.c
index 71e698b85772..bb3908818505 100644
--- a/drivers/s390/crypto/zcrypt_cex4.c
+++ b/drivers/s390/crypto/zcrypt_cex4.c
@@ -39,7 +39,7 @@
* But the maximum time limit managed by the stomper code is set to 60sec.
* Hence we have to wait at least that time period.
*/
-#define CEX4_CLEANUP_TIME (61*HZ)
+#define CEX4_CLEANUP_TIME (900*HZ)
static struct ap_device_id zcrypt_cex4_ids[] = {
{ AP_DEVICE(AP_DEVICE_TYPE_CEX4) },
diff --git a/drivers/sh/intc/core.c b/drivers/sh/intc/core.c
index 81f22980b2de..156b790072b4 100644
--- a/drivers/sh/intc/core.c
+++ b/drivers/sh/intc/core.c
@@ -366,8 +366,9 @@ int __init register_intc_controller(struct intc_desc *desc)
/* redirect this interrupts to the first one */
irq_set_chip(irq2, &dummy_irq_chip);
- irq_set_chained_handler(irq2, intc_redirect_irq);
- irq_set_handler_data(irq2, (void *)irq);
+ irq_set_chained_handler_and_data(irq2,
+ intc_redirect_irq,
+ (void *)irq);
}
}
diff --git a/drivers/sh/intc/virq.c b/drivers/sh/intc/virq.c
index f30ac9354ff2..f5f1b821241a 100644
--- a/drivers/sh/intc/virq.c
+++ b/drivers/sh/intc/virq.c
@@ -243,8 +243,9 @@ restart:
*/
irq_set_nothread(irq);
- irq_set_chained_handler(entry->pirq, intc_virq_handler);
+ /* Set handler data before installing the handler */
add_virq_to_pirq(entry->pirq, irq);
+ irq_set_chained_handler(entry->pirq, intc_virq_handler);
radix_tree_tag_clear(&d->tree, entry->enum_id,
INTC_TAG_VIRQ_NEEDS_ALLOC);
diff --git a/drivers/thermal/intel_powerclamp.c b/drivers/thermal/intel_powerclamp.c
index 2e6716104d3f..5820e8513927 100644
--- a/drivers/thermal/intel_powerclamp.c
+++ b/drivers/thermal/intel_powerclamp.c
@@ -119,7 +119,7 @@ exit:
return ret;
}
-static struct kernel_param_ops duration_ops = {
+static const struct kernel_param_ops duration_ops = {
.set = duration_set,
.get = param_get_int,
};
@@ -167,7 +167,7 @@ exit_win:
return ret;
}
-static struct kernel_param_ops window_size_ops = {
+static const struct kernel_param_ops window_size_ops = {
.set = window_size_set,
.get = param_get_int,
};
diff --git a/drivers/tty/hvc/hvc_iucv.c b/drivers/tty/hvc/hvc_iucv.c
index f78a87b07872..bb809cf36617 100644
--- a/drivers/tty/hvc/hvc_iucv.c
+++ b/drivers/tty/hvc/hvc_iucv.c
@@ -1345,7 +1345,7 @@ static int param_get_vmidfilter(char *buffer, const struct kernel_param *kp)
#define param_check_vmidfilter(name, p) __param_check(name, p, void)
-static struct kernel_param_ops param_ops_vmidfilter = {
+static const struct kernel_param_ops param_ops_vmidfilter = {
.set = param_set_vmidfilter,
.get = param_get_vmidfilter,
};
diff --git a/drivers/tty/hvc/hvc_tile.c b/drivers/tty/hvc/hvc_tile.c
index 3f6cd3102db5..9da1e842bbe9 100644
--- a/drivers/tty/hvc/hvc_tile.c
+++ b/drivers/tty/hvc/hvc_tile.c
@@ -51,7 +51,8 @@ int tile_console_write(const char *buf, int count)
_SIM_CONTROL_OPERATOR_BITS));
return 0;
} else {
- return hv_console_write((HV_VirtAddr)buf, count);
+ /* Translate 0 bytes written to EAGAIN for hvc_console_print. */
+ return hv_console_write((HV_VirtAddr)buf, count) ?: -EAGAIN;
}
}
diff --git a/drivers/tty/hvc/hvc_xen.c b/drivers/tty/hvc/hvc_xen.c
index 7a3d146a5f0e..a9d837f83ce8 100644
--- a/drivers/tty/hvc/hvc_xen.c
+++ b/drivers/tty/hvc/hvc_xen.c
@@ -302,7 +302,7 @@ static int xen_initial_domain_console_init(void)
static void xen_console_update_evtchn(struct xencons_info *info)
{
if (xen_hvm_domain()) {
- uint64_t v;
+ uint64_t v = 0;
int err;
err = hvm_get_parameter(HVM_PARAM_CONSOLE_EVTCHN, &v);
diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
index 2847108cc8dd..b5b427888b24 100644
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -988,7 +988,7 @@ static int sysrq_reset_seq_param_set(const char *buffer,
return 0;
}
-static struct kernel_param_ops param_ops_sysrq_reset_seq = {
+static const struct kernel_param_ops param_ops_sysrq_reset_seq = {
.get = param_get_ushort,
.set = sysrq_reset_seq_param_set,
};
diff --git a/drivers/usb/atm/ueagle-atm.c b/drivers/usb/atm/ueagle-atm.c
index 888998a7fe31..a2ae88dbda78 100644
--- a/drivers/usb/atm/ueagle-atm.c
+++ b/drivers/usb/atm/ueagle-atm.c
@@ -1599,7 +1599,7 @@ static void cmvs_file_name(struct uea_softc *sc, char *const cmv_name, int ver)
char file_arr[] = "CMVxy.bin";
char *file;
- kparam_block_sysfs_write(cmv_file);
+ kernel_param_lock(THIS_MODULE);
/* set proper name corresponding modem version and line type */
if (cmv_file[sc->modem_index] == NULL) {
if (UEA_CHIP_VERSION(sc) == ADI930)
@@ -1618,7 +1618,7 @@ static void cmvs_file_name(struct uea_softc *sc, char *const cmv_name, int ver)
strlcat(cmv_name, file, UEA_FW_NAME_MAX);
if (ver == 2)
strlcat(cmv_name, ".v2", UEA_FW_NAME_MAX);
- kparam_unblock_sysfs_write(cmv_file);
+ kernel_param_unlock(THIS_MODULE);
}
static int request_cmvs_old(struct uea_softc *sc,
diff --git a/drivers/video/fbdev/uvesafb.c b/drivers/video/fbdev/uvesafb.c
index d32d1c4d1b99..178ae93b7ebd 100644
--- a/drivers/video/fbdev/uvesafb.c
+++ b/drivers/video/fbdev/uvesafb.c
@@ -1977,7 +1977,7 @@ static int param_set_scroll(const char *val, const struct kernel_param *kp)
return 0;
}
-static struct kernel_param_ops param_ops_scroll = {
+static const struct kernel_param_ops param_ops_scroll = {
.set = param_set_scroll,
};
#define param_check_scroll(name, p) __param_check(name, p, void)
diff --git a/drivers/video/fbdev/vt8623fb.c b/drivers/video/fbdev/vt8623fb.c
index ea7f056ed5fe..8bac309c24b9 100644
--- a/drivers/video/fbdev/vt8623fb.c
+++ b/drivers/video/fbdev/vt8623fb.c
@@ -754,9 +754,9 @@ static int vt8623_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
/* Prepare startup mode */
- kparam_block_sysfs_write(mode_option);
+ kernel_param_lock(THIS_MODULE);
rc = fb_find_mode(&(info->var), info, mode_option, NULL, 0, NULL, 8);
- kparam_unblock_sysfs_write(mode_option);
+ kernel_param_unlock(THIS_MODULE);
if (! ((rc == 1) || (rc == 2))) {
rc = -EINVAL;
dev_err(info->device, "mode %s not found\n", mode_option);
diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
index 7a5e60dea6c5..10189b5b627f 100644
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -691,7 +691,7 @@ static int vm_cmdline_get(char *buffer, const struct kernel_param *kp)
return strlen(buffer) + 1;
}
-static struct kernel_param_ops vm_cmdline_param_ops = {
+static const struct kernel_param_ops vm_cmdline_param_ops = {
.set = vm_cmdline_set,
.get = vm_cmdline_get,
};
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index 38387950490e..96093ae369a5 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -39,8 +39,8 @@
#include <asm/irq.h>
#include <asm/idle.h>
#include <asm/io_apic.h>
-#include <asm/xen/page.h>
#include <asm/xen/pci.h>
+#include <xen/page.h>
#endif
#include <asm/sync_bitops.h>
#include <asm/xen/hypercall.h>
diff --git a/drivers/xen/events/events_fifo.c b/drivers/xen/events/events_fifo.c
index 417415d738d0..ed673e1acd61 100644
--- a/drivers/xen/events/events_fifo.c
+++ b/drivers/xen/events/events_fifo.c
@@ -44,13 +44,13 @@
#include <asm/sync_bitops.h>
#include <asm/xen/hypercall.h>
#include <asm/xen/hypervisor.h>
-#include <asm/xen/page.h>
#include <xen/xen.h>
#include <xen/xen-ops.h>
#include <xen/events.h>
#include <xen/interface/xen.h>
#include <xen/interface/event_channel.h>
+#include <xen/page.h>
#include "events_internal.h"
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 89274850741b..67b9163db718 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -41,9 +41,9 @@
#include <xen/balloon.h>
#include <xen/gntdev.h>
#include <xen/events.h>
+#include <xen/page.h>
#include <asm/xen/hypervisor.h>
#include <asm/xen/hypercall.h>
-#include <asm/xen/page.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Derek G. Murray <Derek.Murray@cl.cam.ac.uk>, "
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index b1c7170e5c9e..62f591f8763c 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -138,7 +138,6 @@ static struct gnttab_free_callback *gnttab_free_callback_list;
static int gnttab_expand(unsigned int req_entries);
#define RPP (PAGE_SIZE / sizeof(grant_ref_t))
-#define SPP (PAGE_SIZE / sizeof(grant_status_t))
static inline grant_ref_t *__gnttab_entry(grant_ref_t entry)
{
diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
index 9e6a85104a20..d10effee9b9e 100644
--- a/drivers/xen/manage.c
+++ b/drivers/xen/manage.c
@@ -19,10 +19,10 @@
#include <xen/grant_table.h>
#include <xen/events.h>
#include <xen/hvc-console.h>
+#include <xen/page.h>
#include <xen/xen-ops.h>
#include <asm/xen/hypercall.h>
-#include <asm/xen/page.h>
#include <asm/xen/hypervisor.h>
enum shutdown_state {
diff --git a/drivers/xen/tmem.c b/drivers/xen/tmem.c
index d88f36754bf7..239738f944ba 100644
--- a/drivers/xen/tmem.c
+++ b/drivers/xen/tmem.c
@@ -17,8 +17,8 @@
#include <xen/xen.h>
#include <xen/interface/xen.h>
+#include <xen/page.h>
#include <asm/xen/hypercall.h>
-#include <asm/xen/page.h>
#include <asm/xen/hypervisor.h>
#include <xen/tmem.h>
@@ -389,7 +389,7 @@ static int __init xen_tmem_init(void)
}
#endif
#ifdef CONFIG_CLEANCACHE
- BUG_ON(sizeof(struct cleancache_filekey) != sizeof(struct tmem_oid));
+ BUILD_BUG_ON(sizeof(struct cleancache_filekey) != sizeof(struct tmem_oid));
if (tmem_enabled && cleancache) {
int err;
diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
index 96b2011d25f3..9ad327238ba9 100644
--- a/drivers/xen/xenbus/xenbus_client.c
+++ b/drivers/xen/xenbus/xenbus_client.c
@@ -37,7 +37,7 @@
#include <linux/vmalloc.h>
#include <linux/export.h>
#include <asm/xen/hypervisor.h>
-#include <asm/xen/page.h>
+#include <xen/page.h>
#include <xen/interface/xen.h>
#include <xen/interface/event_channel.h>
#include <xen/balloon.h>
@@ -379,16 +379,16 @@ int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr,
int i, j;
for (i = 0; i < nr_pages; i++) {
- unsigned long addr = (unsigned long)vaddr +
- (PAGE_SIZE * i);
err = gnttab_grant_foreign_access(dev->otherend_id,
- virt_to_mfn(addr), 0);
+ virt_to_mfn(vaddr), 0);
if (err < 0) {
xenbus_dev_fatal(dev, err,
"granting access to ring page");
goto fail;
}
grefs[i] = err;
+
+ vaddr = vaddr + PAGE_SIZE;
}
return 0;
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index 5390a674b5e3..4308fb3cf7c2 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -742,7 +742,7 @@ static int xenbus_resume_cb(struct notifier_block *nb,
int err = 0;
if (xen_hvm_domain()) {
- uint64_t v;
+ uint64_t v = 0;
err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v);
if (!err && v)
diff --git a/fs/block_dev.c b/fs/block_dev.c
index b155d32db766..4fe10f93db8a 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -43,7 +43,7 @@ static inline struct bdev_inode *BDEV_I(struct inode *inode)
return container_of(inode, struct bdev_inode, vfs_inode);
}
-inline struct block_device *I_BDEV(struct inode *inode)
+struct block_device *I_BDEV(struct inode *inode)
{
return &BDEV_I(inode)->bdev;
}
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index df9932b00d08..1ce06c849a86 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -85,6 +85,7 @@ BTRFS_WORK_HELPER(extent_refs_helper);
BTRFS_WORK_HELPER(scrub_helper);
BTRFS_WORK_HELPER(scrubwrc_helper);
BTRFS_WORK_HELPER(scrubnc_helper);
+BTRFS_WORK_HELPER(scrubparity_helper);
static struct __btrfs_workqueue *
__btrfs_alloc_workqueue(const char *name, unsigned int flags, int max_active,
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h
index ec2ee477f8ba..b0b093b6afec 100644
--- a/fs/btrfs/async-thread.h
+++ b/fs/btrfs/async-thread.h
@@ -64,6 +64,8 @@ BTRFS_WORK_HELPER_PROTO(extent_refs_helper);
BTRFS_WORK_HELPER_PROTO(scrub_helper);
BTRFS_WORK_HELPER_PROTO(scrubwrc_helper);
BTRFS_WORK_HELPER_PROTO(scrubnc_helper);
+BTRFS_WORK_HELPER_PROTO(scrubparity_helper);
+
struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name,
unsigned int flags,
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 614aaa1969bd..802fabb30e15 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -250,8 +250,12 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
* the first item to check. But sometimes, we may enter it with
* slot==nritems. In that case, go to the next leaf before we continue.
*/
- if (path->slots[0] >= btrfs_header_nritems(path->nodes[0]))
- ret = btrfs_next_old_leaf(root, path, time_seq);
+ if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
+ if (time_seq == (u64)-1)
+ ret = btrfs_next_leaf(root, path);
+ else
+ ret = btrfs_next_old_leaf(root, path, time_seq);
+ }
while (!ret && count < total_refs) {
eb = path->nodes[0];
@@ -291,7 +295,10 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
eie = NULL;
}
next:
- ret = btrfs_next_old_item(root, path, time_seq);
+ if (time_seq == (u64)-1)
+ ret = btrfs_next_item(root, path);
+ else
+ ret = btrfs_next_old_item(root, path, time_seq);
}
if (ret > 0)
@@ -334,6 +341,8 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
if (path->search_commit_root)
root_level = btrfs_header_level(root->commit_root);
+ else if (time_seq == (u64)-1)
+ root_level = btrfs_header_level(root->node);
else
root_level = btrfs_old_root_level(root, time_seq);
@@ -343,7 +352,12 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
}
path->lowest_level = level;
- ret = btrfs_search_old_slot(root, &ref->key_for_search, path, time_seq);
+ if (time_seq == (u64)-1)
+ ret = btrfs_search_slot(NULL, root, &ref->key_for_search, path,
+ 0, 0);
+ else
+ ret = btrfs_search_old_slot(root, &ref->key_for_search, path,
+ time_seq);
/* root node has been locked, we can release @subvol_srcu safely here */
srcu_read_unlock(&fs_info->subvol_srcu, index);
@@ -491,7 +505,9 @@ static int __add_missing_keys(struct btrfs_fs_info *fs_info,
BUG_ON(!ref->wanted_disk_byte);
eb = read_tree_block(fs_info->tree_root, ref->wanted_disk_byte,
0);
- if (!eb || !extent_buffer_uptodate(eb)) {
+ if (IS_ERR(eb)) {
+ return PTR_ERR(eb);
+ } else if (!extent_buffer_uptodate(eb)) {
free_extent_buffer(eb);
return -EIO;
}
@@ -507,7 +523,7 @@ static int __add_missing_keys(struct btrfs_fs_info *fs_info,
}
/*
- * merge two lists of backrefs and adjust counts accordingly
+ * merge backrefs and adjust counts accordingly
*
* mode = 1: merge identical keys, if key is set
* FIXME: if we add more keys in __add_prelim_ref, we can merge more here.
@@ -535,9 +551,9 @@ static void __merge_refs(struct list_head *head, int mode)
ref2 = list_entry(pos2, struct __prelim_ref, list);
+ if (!ref_for_same_block(ref1, ref2))
+ continue;
if (mode == 1) {
- if (!ref_for_same_block(ref1, ref2))
- continue;
if (!ref1->parent && ref2->parent) {
xchg = ref1;
ref1 = ref2;
@@ -572,8 +588,8 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
struct list_head *prefs, u64 *total_refs,
u64 inum)
{
+ struct btrfs_delayed_ref_node *node;
struct btrfs_delayed_extent_op *extent_op = head->extent_op;
- struct rb_node *n = &head->node.rb_node;
struct btrfs_key key;
struct btrfs_key op_key = {0};
int sgn;
@@ -583,12 +599,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
btrfs_disk_key_to_cpu(&op_key, &extent_op->key);
spin_lock(&head->lock);
- n = rb_first(&head->ref_root);
- while (n) {
- struct btrfs_delayed_ref_node *node;
- node = rb_entry(n, struct btrfs_delayed_ref_node,
- rb_node);
- n = rb_next(n);
+ list_for_each_entry(node, &head->ref_list, list) {
if (node->seq > seq)
continue;
@@ -882,6 +893,11 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info,
*
* NOTE: This can return values > 0
*
+ * If time_seq is set to (u64)-1, it will not search delayed_refs, and behave
+ * much like trans == NULL case, the difference only lies in it will not
+ * commit root.
+ * The special case is for qgroup to search roots in commit_transaction().
+ *
* FIXME some caching might speed things up
*/
static int find_parent_nodes(struct btrfs_trans_handle *trans,
@@ -920,6 +936,9 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
path->skip_locking = 1;
}
+ if (time_seq == (u64)-1)
+ path->skip_locking = 1;
+
/*
* grab both a lock on the path and a lock on the delayed ref head.
* We need both to get a consistent picture of how the refs look
@@ -934,9 +953,10 @@ again:
BUG_ON(ret == 0);
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
- if (trans && likely(trans->type != __TRANS_DUMMY)) {
+ if (trans && likely(trans->type != __TRANS_DUMMY) &&
+ time_seq != (u64)-1) {
#else
- if (trans) {
+ if (trans && time_seq != (u64)-1) {
#endif
/*
* look if there are updates for this ref queued and lock the
@@ -1034,7 +1054,10 @@ again:
eb = read_tree_block(fs_info->extent_root,
ref->parent, 0);
- if (!eb || !extent_buffer_uptodate(eb)) {
+ if (IS_ERR(eb)) {
+ ret = PTR_ERR(eb);
+ goto out;
+ } else if (!extent_buffer_uptodate(eb)) {
free_extent_buffer(eb);
ret = -EIO;
goto out;
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 0f11ebc92f02..54114b4887dd 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1439,8 +1439,9 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
btrfs_tree_read_unlock(eb_root);
free_extent_buffer(eb_root);
old = read_tree_block(root, logical, 0);
- if (WARN_ON(!old || !extent_buffer_uptodate(old))) {
- free_extent_buffer(old);
+ if (WARN_ON(IS_ERR(old) || !extent_buffer_uptodate(old))) {
+ if (!IS_ERR(old))
+ free_extent_buffer(old);
btrfs_warn(root->fs_info,
"failed to read tree block %llu from get_old_root", logical);
} else {
@@ -1685,7 +1686,9 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
if (!cur || !uptodate) {
if (!cur) {
cur = read_tree_block(root, blocknr, gen);
- if (!cur || !extent_buffer_uptodate(cur)) {
+ if (IS_ERR(cur)) {
+ return PTR_ERR(cur);
+ } else if (!extent_buffer_uptodate(cur)) {
free_extent_buffer(cur);
return -EIO;
}
@@ -1864,8 +1867,9 @@ static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root,
eb = read_tree_block(root, btrfs_node_blockptr(parent, slot),
btrfs_node_ptr_generation(parent, slot));
- if (eb && !extent_buffer_uptodate(eb)) {
- free_extent_buffer(eb);
+ if (IS_ERR(eb) || !extent_buffer_uptodate(eb)) {
+ if (!IS_ERR(eb))
+ free_extent_buffer(eb);
eb = NULL;
}
@@ -2494,7 +2498,7 @@ read_block_for_search(struct btrfs_trans_handle *trans,
ret = -EAGAIN;
tmp = read_tree_block(root, blocknr, 0);
- if (tmp) {
+ if (!IS_ERR(tmp)) {
/*
* If the read above didn't mark this buffer up to date,
* it will never end up being up to date. Set ret to EIO now
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 6f364e1d8d3d..80a9aefb0c46 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -174,7 +174,7 @@ struct btrfs_ordered_sum;
/* csum types */
#define BTRFS_CSUM_TYPE_CRC32 0
-static int btrfs_csum_sizes[] = { 4, 0 };
+static int btrfs_csum_sizes[] = { 4 };
/* four bytes for CRC32 */
#define BTRFS_EMPTY_DIR_SIZE 0
@@ -1619,10 +1619,7 @@ struct btrfs_fs_info {
struct task_struct *cleaner_kthread;
int thread_pool_size;
- struct kobject super_kobj;
struct kobject *space_info_kobj;
- struct kobject *device_dir_kobj;
- struct completion kobj_unregister;
int do_barriers;
int closing;
int log_root_recovering;
@@ -1698,6 +1695,7 @@ struct btrfs_fs_info {
struct btrfs_workqueue *scrub_workers;
struct btrfs_workqueue *scrub_wr_completion_workers;
struct btrfs_workqueue *scrub_nocow_workers;
+ struct btrfs_workqueue *scrub_parity_workers;
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
u32 check_integrity_print_mask;
@@ -1735,7 +1733,7 @@ struct btrfs_fs_info {
/* list of dirty qgroups to be written at next commit */
struct list_head dirty_qgroups;
- /* used by btrfs_qgroup_record_ref for an efficient tree traversal */
+ /* used by qgroup for an efficient tree traversal */
u64 qgroup_seq;
/* qgroup rescan items */
@@ -3458,6 +3456,7 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes, u64 write_bytes)
void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes);
void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
+void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans);
int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
struct inode *inode);
void btrfs_orphan_release_metadata(struct inode *inode);
@@ -3515,6 +3514,9 @@ int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans,
int __get_raid_index(u64 flags);
int btrfs_start_write_no_snapshoting(struct btrfs_root *root);
void btrfs_end_write_no_snapshoting(struct btrfs_root *root);
+void check_system_chunk(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ const u64 type);
/* ctree.c */
int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
int level, int *slot);
@@ -4050,6 +4052,7 @@ void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
#ifdef CONFIG_BTRFS_ASSERT
+__cold
static inline void assfail(char *expr, char *file, int line)
{
pr_err("BTRFS: assertion failed: %s, file: %s, line: %d",
@@ -4065,10 +4068,12 @@ static inline void assfail(char *expr, char *file, int line)
#define btrfs_assert()
__printf(5, 6)
+__cold
void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
unsigned int line, int errno, const char *fmt, ...);
+__cold
void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root, const char *function,
unsigned int line, int errno);
@@ -4111,11 +4116,17 @@ static inline int __btrfs_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag)
* Call btrfs_abort_transaction as early as possible when an error condition is
* detected, that way the exact line number is reported.
*/
-
#define btrfs_abort_transaction(trans, root, errno) \
do { \
- __btrfs_abort_transaction(trans, root, __func__, \
- __LINE__, errno); \
+ /* Report first abort since mount */ \
+ if (!test_and_set_bit(BTRFS_FS_STATE_TRANS_ABORTED, \
+ &((root)->fs_info->fs_state))) { \
+ WARN(1, KERN_DEBUG \
+ "BTRFS: Transaction aborted (error %d)\n", \
+ (errno)); \
+ } \
+ __btrfs_abort_transaction((trans), (root), __func__, \
+ __LINE__, (errno)); \
} while (0)
#define btrfs_std_error(fs_info, errno) \
@@ -4132,6 +4143,7 @@ do { \
} while (0)
__printf(5, 6)
+__cold
void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function,
unsigned int line, int errno, const char *fmt, ...);
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index 8f8ed7d20bac..ac3e81da6d4e 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -22,6 +22,7 @@
#include "ctree.h"
#include "delayed-ref.h"
#include "transaction.h"
+#include "qgroup.h"
struct kmem_cache *btrfs_delayed_ref_head_cachep;
struct kmem_cache *btrfs_delayed_tree_ref_cachep;
@@ -84,87 +85,6 @@ static int comp_data_refs(struct btrfs_delayed_data_ref *ref2,
return 0;
}
-/*
- * entries in the rb tree are ordered by the byte number of the extent,
- * type of the delayed backrefs and content of delayed backrefs.
- */
-static int comp_entry(struct btrfs_delayed_ref_node *ref2,
- struct btrfs_delayed_ref_node *ref1,
- bool compare_seq)
-{
- if (ref1->bytenr < ref2->bytenr)
- return -1;
- if (ref1->bytenr > ref2->bytenr)
- return 1;
- if (ref1->is_head && ref2->is_head)
- return 0;
- if (ref2->is_head)
- return -1;
- if (ref1->is_head)
- return 1;
- if (ref1->type < ref2->type)
- return -1;
- if (ref1->type > ref2->type)
- return 1;
- if (ref1->no_quota > ref2->no_quota)
- return 1;
- if (ref1->no_quota < ref2->no_quota)
- return -1;
- /* merging of sequenced refs is not allowed */
- if (compare_seq) {
- if (ref1->seq < ref2->seq)
- return -1;
- if (ref1->seq > ref2->seq)
- return 1;
- }
- if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY ||
- ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) {
- return comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref2),
- btrfs_delayed_node_to_tree_ref(ref1),
- ref1->type);
- } else if (ref1->type == BTRFS_EXTENT_DATA_REF_KEY ||
- ref1->type == BTRFS_SHARED_DATA_REF_KEY) {
- return comp_data_refs(btrfs_delayed_node_to_data_ref(ref2),
- btrfs_delayed_node_to_data_ref(ref1));
- }
- BUG();
- return 0;
-}
-
-/*
- * insert a new ref into the rbtree. This returns any existing refs
- * for the same (bytenr,parent) tuple, or NULL if the new node was properly
- * inserted.
- */
-static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root,
- struct rb_node *node)
-{
- struct rb_node **p = &root->rb_node;
- struct rb_node *parent_node = NULL;
- struct btrfs_delayed_ref_node *entry;
- struct btrfs_delayed_ref_node *ins;
- int cmp;
-
- ins = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
- while (*p) {
- parent_node = *p;
- entry = rb_entry(parent_node, struct btrfs_delayed_ref_node,
- rb_node);
-
- cmp = comp_entry(entry, ins, 1);
- if (cmp < 0)
- p = &(*p)->rb_left;
- else if (cmp > 0)
- p = &(*p)->rb_right;
- else
- return entry;
- }
-
- rb_link_node(node, parent_node, p);
- rb_insert_color(node, root);
- return NULL;
-}
-
/* insert a new ref to head ref rbtree */
static struct btrfs_delayed_ref_head *htree_insert(struct rb_root *root,
struct rb_node *node)
@@ -268,7 +188,7 @@ static inline void drop_delayed_ref(struct btrfs_trans_handle *trans,
rb_erase(&head->href_node, &delayed_refs->href_root);
} else {
assert_spin_locked(&head->lock);
- rb_erase(&ref->rb_node, &head->ref_root);
+ list_del(&ref->list);
}
ref->in_tree = 0;
btrfs_put_delayed_ref(ref);
@@ -277,99 +197,6 @@ static inline void drop_delayed_ref(struct btrfs_trans_handle *trans,
trans->delayed_ref_updates--;
}
-static int merge_ref(struct btrfs_trans_handle *trans,
- struct btrfs_delayed_ref_root *delayed_refs,
- struct btrfs_delayed_ref_head *head,
- struct btrfs_delayed_ref_node *ref, u64 seq)
-{
- struct rb_node *node;
- int mod = 0;
- int done = 0;
-
- node = rb_next(&ref->rb_node);
- while (!done && node) {
- struct btrfs_delayed_ref_node *next;
-
- next = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
- node = rb_next(node);
- if (seq && next->seq >= seq)
- break;
- if (comp_entry(ref, next, 0))
- continue;
-
- if (ref->action == next->action) {
- mod = next->ref_mod;
- } else {
- if (ref->ref_mod < next->ref_mod) {
- struct btrfs_delayed_ref_node *tmp;
-
- tmp = ref;
- ref = next;
- next = tmp;
- done = 1;
- }
- mod = -next->ref_mod;
- }
-
- drop_delayed_ref(trans, delayed_refs, head, next);
- ref->ref_mod += mod;
- if (ref->ref_mod == 0) {
- drop_delayed_ref(trans, delayed_refs, head, ref);
- done = 1;
- } else {
- /*
- * You can't have multiples of the same ref on a tree
- * block.
- */
- WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY ||
- ref->type == BTRFS_SHARED_BLOCK_REF_KEY);
- }
- }
- return done;
-}
-
-void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
- struct btrfs_delayed_ref_root *delayed_refs,
- struct btrfs_delayed_ref_head *head)
-{
- struct rb_node *node;
- u64 seq = 0;
-
- assert_spin_locked(&head->lock);
- /*
- * We don't have too much refs to merge in the case of delayed data
- * refs.
- */
- if (head->is_data)
- return;
-
- spin_lock(&fs_info->tree_mod_seq_lock);
- if (!list_empty(&fs_info->tree_mod_seq_list)) {
- struct seq_list *elem;
-
- elem = list_first_entry(&fs_info->tree_mod_seq_list,
- struct seq_list, list);
- seq = elem->seq;
- }
- spin_unlock(&fs_info->tree_mod_seq_lock);
-
- node = rb_first(&head->ref_root);
- while (node) {
- struct btrfs_delayed_ref_node *ref;
-
- ref = rb_entry(node, struct btrfs_delayed_ref_node,
- rb_node);
- /* We can't merge refs that are outside of our seq count */
- if (seq && ref->seq >= seq)
- break;
- if (merge_ref(trans, delayed_refs, head, ref, seq))
- node = rb_first(&head->ref_root);
- else
- node = rb_next(&ref->rb_node);
- }
-}
-
int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_root *delayed_refs,
u64 seq)
@@ -443,45 +270,71 @@ again:
}
/*
- * helper function to update an extent delayed ref in the
- * rbtree. existing and update must both have the same
- * bytenr and parent
+ * Helper to insert the ref_node to the tail or merge with tail.
*
- * This may free existing if the update cancels out whatever
- * operation it was doing.
+ * Return 0 for insert.
+ * Return >0 for merge.
*/
-static noinline void
-update_existing_ref(struct btrfs_trans_handle *trans,
- struct btrfs_delayed_ref_root *delayed_refs,
- struct btrfs_delayed_ref_head *head,
- struct btrfs_delayed_ref_node *existing,
- struct btrfs_delayed_ref_node *update)
+static int
+add_delayed_ref_tail_merge(struct btrfs_trans_handle *trans,
+ struct btrfs_delayed_ref_root *root,
+ struct btrfs_delayed_ref_head *href,
+ struct btrfs_delayed_ref_node *ref)
{
- if (update->action != existing->action) {
- /*
- * this is effectively undoing either an add or a
- * drop. We decrement the ref_mod, and if it goes
- * down to zero we just delete the entry without
- * every changing the extent allocation tree.
- */
- existing->ref_mod--;
- if (existing->ref_mod == 0)
- drop_delayed_ref(trans, delayed_refs, head, existing);
- else
- WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
- existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
+ struct btrfs_delayed_ref_node *exist;
+ int mod;
+ int ret = 0;
+
+ spin_lock(&href->lock);
+ /* Check whether we can merge the tail node with ref */
+ if (list_empty(&href->ref_list))
+ goto add_tail;
+ exist = list_entry(href->ref_list.prev, struct btrfs_delayed_ref_node,
+ list);
+ /* No need to compare bytenr nor is_head */
+ if (exist->type != ref->type || exist->no_quota != ref->no_quota ||
+ exist->seq != ref->seq)
+ goto add_tail;
+
+ if ((exist->type == BTRFS_TREE_BLOCK_REF_KEY ||
+ exist->type == BTRFS_SHARED_BLOCK_REF_KEY) &&
+ comp_tree_refs(btrfs_delayed_node_to_tree_ref(exist),
+ btrfs_delayed_node_to_tree_ref(ref),
+ ref->type))
+ goto add_tail;
+ if ((exist->type == BTRFS_EXTENT_DATA_REF_KEY ||
+ exist->type == BTRFS_SHARED_DATA_REF_KEY) &&
+ comp_data_refs(btrfs_delayed_node_to_data_ref(exist),
+ btrfs_delayed_node_to_data_ref(ref)))
+ goto add_tail;
+
+ /* Now we are sure we can merge */
+ ret = 1;
+ if (exist->action == ref->action) {
+ mod = ref->ref_mod;
} else {
- WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
- existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
- /*
- * the action on the existing ref matches
- * the action on the ref we're trying to add.
- * Bump the ref_mod by one so the backref that
- * is eventually added/removed has the correct
- * reference count
- */
- existing->ref_mod += update->ref_mod;
+ /* Need to change action */
+ if (exist->ref_mod < ref->ref_mod) {
+ exist->action = ref->action;
+ mod = -exist->ref_mod;
+ exist->ref_mod = ref->ref_mod;
+ } else
+ mod = -ref->ref_mod;
}
+ exist->ref_mod += mod;
+
+ /* remove existing tail if its ref_mod is zero */
+ if (exist->ref_mod == 0)
+ drop_delayed_ref(trans, root, href, exist);
+ spin_unlock(&href->lock);
+ return ret;
+
+add_tail:
+ list_add_tail(&ref->list, &href->ref_list);
+ atomic_inc(&root->num_entries);
+ trans->delayed_ref_updates++;
+ spin_unlock(&href->lock);
+ return ret;
}
/*
@@ -568,12 +421,14 @@ update_existing_head_ref(struct btrfs_delayed_ref_root *delayed_refs,
static noinline struct btrfs_delayed_ref_head *
add_delayed_ref_head(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
- struct btrfs_delayed_ref_node *ref, u64 bytenr,
- u64 num_bytes, int action, int is_data)
+ struct btrfs_delayed_ref_node *ref,
+ struct btrfs_qgroup_extent_record *qrecord,
+ u64 bytenr, u64 num_bytes, int action, int is_data)
{
struct btrfs_delayed_ref_head *existing;
struct btrfs_delayed_ref_head *head_ref = NULL;
struct btrfs_delayed_ref_root *delayed_refs;
+ struct btrfs_qgroup_extent_record *qexisting;
int count_mod = 1;
int must_insert_reserved = 0;
@@ -618,10 +473,22 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
head_ref = btrfs_delayed_node_to_head(ref);
head_ref->must_insert_reserved = must_insert_reserved;
head_ref->is_data = is_data;
- head_ref->ref_root = RB_ROOT;
+ INIT_LIST_HEAD(&head_ref->ref_list);
head_ref->processing = 0;
head_ref->total_ref_mod = count_mod;
+ /* Record qgroup extent info if provided */
+ if (qrecord) {
+ qrecord->bytenr = bytenr;
+ qrecord->num_bytes = num_bytes;
+ qrecord->old_roots = NULL;
+
+ qexisting = btrfs_qgroup_insert_dirty_extent(delayed_refs,
+ qrecord);
+ if (qexisting)
+ kfree(qrecord);
+ }
+
spin_lock_init(&head_ref->lock);
mutex_init(&head_ref->mutex);
@@ -659,10 +526,10 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
u64 num_bytes, u64 parent, u64 ref_root, int level,
int action, int no_quota)
{
- struct btrfs_delayed_ref_node *existing;
struct btrfs_delayed_tree_ref *full_ref;
struct btrfs_delayed_ref_root *delayed_refs;
u64 seq = 0;
+ int ret;
if (action == BTRFS_ADD_DELAYED_EXTENT)
action = BTRFS_ADD_DELAYED_REF;
@@ -693,21 +560,14 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
trace_add_delayed_tree_ref(ref, full_ref, action);
- spin_lock(&head_ref->lock);
- existing = tree_insert(&head_ref->ref_root, &ref->rb_node);
- if (existing) {
- update_existing_ref(trans, delayed_refs, head_ref, existing,
- ref);
- /*
- * we've updated the existing ref, free the newly
- * allocated ref
- */
+ ret = add_delayed_ref_tail_merge(trans, delayed_refs, head_ref, ref);
+
+ /*
+ * XXX: memory should be freed at the same level allocated.
+ * But bad practice is anywhere... Follow it now. Need cleanup.
+ */
+ if (ret > 0)
kmem_cache_free(btrfs_delayed_tree_ref_cachep, full_ref);
- } else {
- atomic_inc(&delayed_refs->num_entries);
- trans->delayed_ref_updates++;
- }
- spin_unlock(&head_ref->lock);
}
/*
@@ -721,10 +581,10 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info,
u64 num_bytes, u64 parent, u64 ref_root, u64 owner,
u64 offset, int action, int no_quota)
{
- struct btrfs_delayed_ref_node *existing;
struct btrfs_delayed_data_ref *full_ref;
struct btrfs_delayed_ref_root *delayed_refs;
u64 seq = 0;
+ int ret;
if (action == BTRFS_ADD_DELAYED_EXTENT)
action = BTRFS_ADD_DELAYED_REF;
@@ -758,21 +618,10 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info,
trace_add_delayed_data_ref(ref, full_ref, action);
- spin_lock(&head_ref->lock);
- existing = tree_insert(&head_ref->ref_root, &ref->rb_node);
- if (existing) {
- update_existing_ref(trans, delayed_refs, head_ref, existing,
- ref);
- /*
- * we've updated the existing ref, free the newly
- * allocated ref
- */
+ ret = add_delayed_ref_tail_merge(trans, delayed_refs, head_ref, ref);
+
+ if (ret > 0)
kmem_cache_free(btrfs_delayed_data_ref_cachep, full_ref);
- } else {
- atomic_inc(&delayed_refs->num_entries);
- trans->delayed_ref_updates++;
- }
- spin_unlock(&head_ref->lock);
}
/*
@@ -790,6 +639,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_tree_ref *ref;
struct btrfs_delayed_ref_head *head_ref;
struct btrfs_delayed_ref_root *delayed_refs;
+ struct btrfs_qgroup_extent_record *record = NULL;
if (!is_fstree(ref_root) || !fs_info->quota_enabled)
no_quota = 0;
@@ -800,9 +650,13 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
return -ENOMEM;
head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
- if (!head_ref) {
- kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
- return -ENOMEM;
+ if (!head_ref)
+ goto free_ref;
+
+ if (fs_info->quota_enabled && is_fstree(ref_root)) {
+ record = kmalloc(sizeof(*record), GFP_NOFS);
+ if (!record)
+ goto free_head_ref;
}
head_ref->extent_op = extent_op;
@@ -814,7 +668,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
* insert both the head node and the new ref without dropping
* the spin lock
*/
- head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node,
+ head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, record,
bytenr, num_bytes, action, 0);
add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr,
@@ -823,6 +677,13 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
spin_unlock(&delayed_refs->lock);
return 0;
+
+free_head_ref:
+ kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
+free_ref:
+ kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
+
+ return -ENOMEM;
}
/*
@@ -839,6 +700,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_data_ref *ref;
struct btrfs_delayed_ref_head *head_ref;
struct btrfs_delayed_ref_root *delayed_refs;
+ struct btrfs_qgroup_extent_record *record = NULL;
if (!is_fstree(ref_root) || !fs_info->quota_enabled)
no_quota = 0;
@@ -854,6 +716,16 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
return -ENOMEM;
}
+ if (fs_info->quota_enabled && is_fstree(ref_root)) {
+ record = kmalloc(sizeof(*record), GFP_NOFS);
+ if (!record) {
+ kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
+ kmem_cache_free(btrfs_delayed_ref_head_cachep,
+ head_ref);
+ return -ENOMEM;
+ }
+ }
+
head_ref->extent_op = extent_op;
delayed_refs = &trans->transaction->delayed_refs;
@@ -863,7 +735,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
* insert both the head node and the new ref without dropping
* the spin lock
*/
- head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node,
+ head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, record,
bytenr, num_bytes, action, 1);
add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr,
@@ -891,9 +763,9 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
delayed_refs = &trans->transaction->delayed_refs;
spin_lock(&delayed_refs->lock);
- add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr,
- num_bytes, BTRFS_UPDATE_DELAYED_HEAD,
- extent_op->is_data);
+ add_delayed_ref_head(fs_info, trans, &head_ref->node, NULL, bytenr,
+ num_bytes, BTRFS_UPDATE_DELAYED_HEAD,
+ extent_op->is_data);
spin_unlock(&delayed_refs->lock);
return 0;
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index 5eb0892396d0..13fb5e6090fe 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -24,9 +24,25 @@
#define BTRFS_ADD_DELAYED_EXTENT 3 /* record a full extent allocation */
#define BTRFS_UPDATE_DELAYED_HEAD 4 /* not changing ref count on head ref */
+/*
+ * XXX: Qu: I really hate the design that ref_head and tree/data ref shares the
+ * same ref_node structure.
+ * Ref_head is in a higher logic level than tree/data ref, and duplicated
+ * bytenr/num_bytes in ref_node is really a waste or memory, they should be
+ * referred from ref_head.
+ * This gets more disgusting after we use list to store tree/data ref in
+ * ref_head. Must clean this mess up later.
+ */
struct btrfs_delayed_ref_node {
+ /*
+ * ref_head use rb tree, stored in ref_root->href.
+ * indexed by bytenr
+ */
struct rb_node rb_node;
+ /*data/tree ref use list, stored in ref_head->ref_list. */
+ struct list_head list;
+
/* the starting bytenr of the extent */
u64 bytenr;
@@ -83,7 +99,7 @@ struct btrfs_delayed_ref_head {
struct mutex mutex;
spinlock_t lock;
- struct rb_root ref_root;
+ struct list_head ref_list;
struct rb_node href_node;
@@ -132,6 +148,9 @@ struct btrfs_delayed_ref_root {
/* head ref rbtree */
struct rb_root href_root;
+ /* dirty extent records */
+ struct rb_root dirty_extent_root;
+
/* this spin lock protects the rbtree and the entries inside */
spinlock_t lock;
@@ -156,6 +175,14 @@ struct btrfs_delayed_ref_root {
int flushing;
u64 run_delayed_start;
+
+ /*
+ * To make qgroup to skip given root.
+ * This is for snapshot, as btrfs_qgroup_inherit() will manully
+ * modify counters for snapshot and its source, so we should skip
+ * the snapshot in new_root/old_roots or it will get calculated twice
+ */
+ u64 qgroup_to_skip;
};
extern struct kmem_cache *btrfs_delayed_ref_head_cachep;
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 0573848c7333..862fbc206755 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -376,6 +376,10 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
WARN_ON(!tgt_device);
dev_replace->tgtdev = tgt_device;
+ ret = btrfs_kobj_add_device(tgt_device->fs_devices, tgt_device);
+ if (ret)
+ btrfs_error(root->fs_info, ret, "kobj add dev failed");
+
printk_in_rcu(KERN_INFO
"BTRFS: dev_replace from %s (devid %llu) to %s started\n",
src_device->missing ? "<missing disk>" :
@@ -583,8 +587,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
mutex_unlock(&uuid_mutex);
/* replace the sysfs entry */
- btrfs_kobj_rm_device(fs_info, src_device);
- btrfs_kobj_add_device(fs_info, tgt_device);
+ btrfs_kobj_rm_device(fs_info->fs_devices, src_device);
btrfs_rm_dev_replace_free_srcdev(fs_info, src_device);
/* write back the superblocks */
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 0bccf18dc1dc..3f43bfea3684 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1149,12 +1149,12 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
buf = btrfs_find_create_tree_block(root, bytenr);
if (!buf)
- return NULL;
+ return ERR_PTR(-ENOMEM);
ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid);
if (ret) {
free_extent_buffer(buf);
- return NULL;
+ return ERR_PTR(ret);
}
return buf;
@@ -1509,20 +1509,19 @@ static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
generation = btrfs_root_generation(&root->root_item);
root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
generation);
- if (!root->node) {
- ret = -ENOMEM;
+ if (IS_ERR(root->node)) {
+ ret = PTR_ERR(root->node);
goto find_fail;
} else if (!btrfs_buffer_uptodate(root->node, generation, 0)) {
ret = -EIO;
- goto read_fail;
+ free_extent_buffer(root->node);
+ goto find_fail;
}
root->commit_root = btrfs_root_node(root);
out:
btrfs_free_path(path);
return root;
-read_fail:
- free_extent_buffer(root->node);
find_fail:
kfree(root);
alloc_fail:
@@ -2320,8 +2319,12 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
log_tree_root->node = read_tree_block(tree_root, bytenr,
fs_info->generation + 1);
- if (!log_tree_root->node ||
- !extent_buffer_uptodate(log_tree_root->node)) {
+ if (IS_ERR(log_tree_root->node)) {
+ printk(KERN_ERR "BTRFS: failed to read log tree\n");
+ ret = PTR_ERR(log_tree_root->node);
+ kfree(log_tree_root);
+ return ret;
+ } else if (!extent_buffer_uptodate(log_tree_root->node)) {
printk(KERN_ERR "BTRFS: failed to read log tree\n");
free_extent_buffer(log_tree_root->node);
kfree(log_tree_root);
@@ -2494,7 +2497,6 @@ int open_ctree(struct super_block *sb,
seqlock_init(&fs_info->profiles_lock);
init_rwsem(&fs_info->delayed_iput_sem);
- init_completion(&fs_info->kobj_unregister);
INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
INIT_LIST_HEAD(&fs_info->space_info);
INIT_LIST_HEAD(&fs_info->tree_mod_seq_list);
@@ -2797,8 +2799,8 @@ int open_ctree(struct super_block *sb,
chunk_root->node = read_tree_block(chunk_root,
btrfs_super_chunk_root(disk_super),
generation);
- if (!chunk_root->node ||
- !test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) {
+ if (IS_ERR(chunk_root->node) ||
+ !extent_buffer_uptodate(chunk_root->node)) {
printk(KERN_ERR "BTRFS: failed to read chunk root on %s\n",
sb->s_id);
goto fail_tree_roots;
@@ -2834,8 +2836,8 @@ retry_root_backup:
tree_root->node = read_tree_block(tree_root,
btrfs_super_root(disk_super),
generation);
- if (!tree_root->node ||
- !test_bit(EXTENT_BUFFER_UPTODATE, &tree_root->node->bflags)) {
+ if (IS_ERR(tree_root->node) ||
+ !extent_buffer_uptodate(tree_root->node)) {
printk(KERN_WARNING "BTRFS: failed to read tree root on %s\n",
sb->s_id);
@@ -2874,10 +2876,22 @@ retry_root_backup:
btrfs_close_extra_devices(fs_devices, 1);
+ ret = btrfs_sysfs_add_fsid(fs_devices, NULL);
+ if (ret) {
+ pr_err("BTRFS: failed to init sysfs fsid interface: %d\n", ret);
+ goto fail_block_groups;
+ }
+
+ ret = btrfs_sysfs_add_device(fs_devices);
+ if (ret) {
+ pr_err("BTRFS: failed to init sysfs device interface: %d\n", ret);
+ goto fail_fsdev_sysfs;
+ }
+
ret = btrfs_sysfs_add_one(fs_info);
if (ret) {
pr_err("BTRFS: failed to init sysfs interface: %d\n", ret);
- goto fail_block_groups;
+ goto fail_fsdev_sysfs;
}
ret = btrfs_init_space_info(fs_info);
@@ -3055,6 +3069,9 @@ fail_cleaner:
fail_sysfs:
btrfs_sysfs_remove_one(fs_info);
+fail_fsdev_sysfs:
+ btrfs_sysfs_remove_fsid(fs_info->fs_devices);
+
fail_block_groups:
btrfs_put_block_group_cache(fs_info);
btrfs_free_block_groups(fs_info);
@@ -3725,6 +3742,7 @@ void close_ctree(struct btrfs_root *root)
}
btrfs_sysfs_remove_one(fs_info);
+ btrfs_sysfs_remove_fsid(fs_info->fs_devices);
btrfs_free_fs_roots(fs_info);
@@ -4053,6 +4071,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
while ((node = rb_first(&delayed_refs->href_root)) != NULL) {
struct btrfs_delayed_ref_head *head;
+ struct btrfs_delayed_ref_node *tmp;
bool pin_bytes = false;
head = rb_entry(node, struct btrfs_delayed_ref_head,
@@ -4068,11 +4087,10 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
continue;
}
spin_lock(&head->lock);
- while ((node = rb_first(&head->ref_root)) != NULL) {
- ref = rb_entry(node, struct btrfs_delayed_ref_node,
- rb_node);
+ list_for_each_entry_safe_reverse(ref, tmp, &head->ref_list,
+ list) {
ref->in_tree = 0;
- rb_erase(&ref->rb_node, &head->ref_root);
+ list_del(&ref->list);
atomic_dec(&delayed_refs->num_entries);
btrfs_put_delayed_ref(ref);
}
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 0ec3acd14cbf..38b76cc02f48 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -79,11 +79,10 @@ static int update_block_group(struct btrfs_trans_handle *trans,
u64 num_bytes, int alloc);
static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
- u64 bytenr, u64 num_bytes, u64 parent,
+ struct btrfs_delayed_ref_node *node, u64 parent,
u64 root_objectid, u64 owner_objectid,
u64 owner_offset, int refs_to_drop,
- struct btrfs_delayed_extent_op *extra_op,
- int no_quota);
+ struct btrfs_delayed_extent_op *extra_op);
static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
struct extent_buffer *leaf,
struct btrfs_extent_item *ei);
@@ -1967,10 +1966,9 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
- u64 bytenr, u64 num_bytes,
+ struct btrfs_delayed_ref_node *node,
u64 parent, u64 root_objectid,
u64 owner, u64 offset, int refs_to_add,
- int no_quota,
struct btrfs_delayed_extent_op *extent_op)
{
struct btrfs_fs_info *fs_info = root->fs_info;
@@ -1978,9 +1976,11 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
struct extent_buffer *leaf;
struct btrfs_extent_item *item;
struct btrfs_key key;
+ u64 bytenr = node->bytenr;
+ u64 num_bytes = node->num_bytes;
u64 refs;
int ret;
- enum btrfs_qgroup_operation_type type = BTRFS_QGROUP_OPER_ADD_EXCL;
+ int no_quota = node->no_quota;
path = btrfs_alloc_path();
if (!path)
@@ -1996,26 +1996,8 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
bytenr, num_bytes, parent,
root_objectid, owner, offset,
refs_to_add, extent_op);
- if ((ret < 0 && ret != -EAGAIN) || (!ret && no_quota))
+ if ((ret < 0 && ret != -EAGAIN) || !ret)
goto out;
- /*
- * Ok we were able to insert an inline extent and it appears to be a new
- * reference, deal with the qgroup accounting.
- */
- if (!ret && !no_quota) {
- ASSERT(root->fs_info->quota_enabled);
- leaf = path->nodes[0];
- btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
- item = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_extent_item);
- if (btrfs_extent_refs(leaf, item) > (u64)refs_to_add)
- type = BTRFS_QGROUP_OPER_ADD_SHARED;
- btrfs_release_path(path);
-
- ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid,
- bytenr, num_bytes, type, 0);
- goto out;
- }
/*
* Ok we had -EAGAIN which means we didn't have space to insert and
@@ -2026,8 +2008,6 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
refs = btrfs_extent_refs(leaf, item);
- if (refs)
- type = BTRFS_QGROUP_OPER_ADD_SHARED;
btrfs_set_extent_refs(leaf, item, refs + refs_to_add);
if (extent_op)
__run_delayed_extent_op(extent_op, leaf, item);
@@ -2035,13 +2015,6 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(leaf);
btrfs_release_path(path);
- if (!no_quota) {
- ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid,
- bytenr, num_bytes, type, 0);
- if (ret)
- goto out;
- }
-
path->reada = 1;
path->leave_spinning = 1;
/* now insert the actual backref */
@@ -2087,17 +2060,15 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
ref->objectid, ref->offset,
&ins, node->ref_mod);
} else if (node->action == BTRFS_ADD_DELAYED_REF) {
- ret = __btrfs_inc_extent_ref(trans, root, node->bytenr,
- node->num_bytes, parent,
+ ret = __btrfs_inc_extent_ref(trans, root, node, parent,
ref_root, ref->objectid,
ref->offset, node->ref_mod,
- node->no_quota, extent_op);
+ extent_op);
} else if (node->action == BTRFS_DROP_DELAYED_REF) {
- ret = __btrfs_free_extent(trans, root, node->bytenr,
- node->num_bytes, parent,
+ ret = __btrfs_free_extent(trans, root, node, parent,
ref_root, ref->objectid,
ref->offset, node->ref_mod,
- extent_op, node->no_quota);
+ extent_op);
} else {
BUG();
}
@@ -2255,15 +2226,14 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
ref->level, &ins,
node->no_quota);
} else if (node->action == BTRFS_ADD_DELAYED_REF) {
- ret = __btrfs_inc_extent_ref(trans, root, node->bytenr,
- node->num_bytes, parent, ref_root,
- ref->level, 0, 1, node->no_quota,
+ ret = __btrfs_inc_extent_ref(trans, root, node,
+ parent, ref_root,
+ ref->level, 0, 1,
extent_op);
} else if (node->action == BTRFS_DROP_DELAYED_REF) {
- ret = __btrfs_free_extent(trans, root, node->bytenr,
- node->num_bytes, parent, ref_root,
- ref->level, 0, 1, extent_op,
- node->no_quota);
+ ret = __btrfs_free_extent(trans, root, node,
+ parent, ref_root,
+ ref->level, 0, 1, extent_op);
} else {
BUG();
}
@@ -2323,28 +2293,14 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
return ret;
}
-static noinline struct btrfs_delayed_ref_node *
+static inline struct btrfs_delayed_ref_node *
select_delayed_ref(struct btrfs_delayed_ref_head *head)
{
- struct rb_node *node;
- struct btrfs_delayed_ref_node *ref, *last = NULL;;
+ if (list_empty(&head->ref_list))
+ return NULL;
- /*
- * select delayed ref of type BTRFS_ADD_DELAYED_REF first.
- * this prevents ref count from going down to zero when
- * there still are pending delayed ref.
- */
- node = rb_first(&head->ref_root);
- while (node) {
- ref = rb_entry(node, struct btrfs_delayed_ref_node,
- rb_node);
- if (ref->action == BTRFS_ADD_DELAYED_REF)
- return ref;
- else if (last == NULL)
- last = ref;
- node = rb_next(node);
- }
- return last;
+ return list_entry(head->ref_list.next, struct btrfs_delayed_ref_node,
+ list);
}
/*
@@ -2396,16 +2352,7 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
}
}
- /*
- * We need to try and merge add/drops of the same ref since we
- * can run into issues with relocate dropping the implicit ref
- * and then it being added back again before the drop can
- * finish. If we merged anything we need to re-loop so we can
- * get a good ref.
- */
spin_lock(&locked_ref->lock);
- btrfs_merge_delayed_refs(trans, fs_info, delayed_refs,
- locked_ref);
/*
* locked_ref is the head node, so we have to go one
@@ -2482,7 +2429,7 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
spin_unlock(&locked_ref->lock);
spin_lock(&delayed_refs->lock);
spin_lock(&locked_ref->lock);
- if (rb_first(&locked_ref->ref_root) ||
+ if (!list_empty(&locked_ref->ref_list) ||
locked_ref->extent_op) {
spin_unlock(&locked_ref->lock);
spin_unlock(&delayed_refs->lock);
@@ -2496,7 +2443,7 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
} else {
actual_count++;
ref->in_tree = 0;
- rb_erase(&ref->rb_node, &locked_ref->ref_root);
+ list_del(&ref->list);
}
atomic_dec(&delayed_refs->num_entries);
@@ -2864,9 +2811,6 @@ again:
goto again;
}
out:
- ret = btrfs_delayed_qgroup_accounting(trans, root->fs_info);
- if (ret)
- return ret;
assert_qgroups_uptodate(trans);
return 0;
}
@@ -2905,7 +2849,6 @@ static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_node *ref;
struct btrfs_delayed_data_ref *data_ref;
struct btrfs_delayed_ref_root *delayed_refs;
- struct rb_node *node;
int ret = 0;
delayed_refs = &trans->transaction->delayed_refs;
@@ -2934,11 +2877,7 @@ static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
spin_unlock(&delayed_refs->lock);
spin_lock(&head->lock);
- node = rb_first(&head->ref_root);
- while (node) {
- ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
- node = rb_next(node);
-
+ list_for_each_entry(ref, &head->ref_list, list) {
/* If it's a shared ref we know a cross reference exists */
if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) {
ret = 1;
@@ -3693,7 +3632,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
found->disk_total += total_bytes * factor;
found->bytes_used += bytes_used;
found->disk_used += bytes_used * factor;
- found->full = 0;
+ if (total_bytes > 0)
+ found->full = 0;
spin_unlock(&found->lock);
*space_info = found;
return 0;
@@ -3721,7 +3661,10 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
found->bytes_reserved = 0;
found->bytes_readonly = 0;
found->bytes_may_use = 0;
- found->full = 0;
+ if (total_bytes > 0)
+ found->full = 0;
+ else
+ found->full = 1;
found->force_alloc = CHUNK_ALLOC_NO_FORCE;
found->chunk_alloc = 0;
found->flush = 0;
@@ -3975,6 +3918,9 @@ commit_trans:
!atomic_read(&root->fs_info->open_ioctl_trans)) {
need_commit--;
+ if (need_commit > 0)
+ btrfs_wait_ordered_roots(fs_info, -1);
+
trans = btrfs_join_transaction(root);
if (IS_ERR(trans))
return PTR_ERR(trans);
@@ -4088,7 +4034,7 @@ static int should_alloc_chunk(struct btrfs_root *root,
return 1;
}
-static u64 get_system_chunk_thresh(struct btrfs_root *root, u64 type)
+static u64 get_profile_num_devs(struct btrfs_root *root, u64 type)
{
u64 num_dev;
@@ -4102,24 +4048,43 @@ static u64 get_system_chunk_thresh(struct btrfs_root *root, u64 type)
else
num_dev = 1; /* DUP or single */
- /* metadata for updaing devices and chunk tree */
- return btrfs_calc_trans_metadata_size(root, num_dev + 1);
+ return num_dev;
}
-static void check_system_chunk(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, u64 type)
+/*
+ * If @is_allocation is true, reserve space in the system space info necessary
+ * for allocating a chunk, otherwise if it's false, reserve space necessary for
+ * removing a chunk.
+ */
+void check_system_chunk(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 type)
{
struct btrfs_space_info *info;
u64 left;
u64 thresh;
+ int ret = 0;
+ u64 num_devs;
+
+ /*
+ * Needed because we can end up allocating a system chunk and for an
+ * atomic and race free space reservation in the chunk block reserve.
+ */
+ ASSERT(mutex_is_locked(&root->fs_info->chunk_mutex));
info = __find_space_info(root->fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
spin_lock(&info->lock);
left = info->total_bytes - info->bytes_used - info->bytes_pinned -
- info->bytes_reserved - info->bytes_readonly;
+ info->bytes_reserved - info->bytes_readonly -
+ info->bytes_may_use;
spin_unlock(&info->lock);
- thresh = get_system_chunk_thresh(root, type);
+ num_devs = get_profile_num_devs(root, type);
+
+ /* num_devs device items to update and 1 chunk item to add or remove */
+ thresh = btrfs_calc_trunc_metadata_size(root, num_devs) +
+ btrfs_calc_trans_metadata_size(root, 1);
+
if (left < thresh && btrfs_test_opt(root, ENOSPC_DEBUG)) {
btrfs_info(root->fs_info, "left=%llu, need=%llu, flags=%llu",
left, thresh, type);
@@ -4130,7 +4095,21 @@ static void check_system_chunk(struct btrfs_trans_handle *trans,
u64 flags;
flags = btrfs_get_alloc_profile(root->fs_info->chunk_root, 0);
- btrfs_alloc_chunk(trans, root, flags);
+ /*
+ * Ignore failure to create system chunk. We might end up not
+ * needing it, as we might not need to COW all nodes/leafs from
+ * the paths we visit in the chunk tree (they were already COWed
+ * or created in the current transaction for example).
+ */
+ ret = btrfs_alloc_chunk(trans, root, flags);
+ }
+
+ if (!ret) {
+ ret = btrfs_block_rsv_add(root->fs_info->chunk_root,
+ &root->fs_info->chunk_block_rsv,
+ thresh, BTRFS_RESERVE_NO_FLUSH);
+ if (!ret)
+ trans->chunk_bytes_reserved += thresh;
}
}
@@ -5188,6 +5167,24 @@ void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
trans->bytes_reserved = 0;
}
+/*
+ * To be called after all the new block groups attached to the transaction
+ * handle have been created (btrfs_create_pending_block_groups()).
+ */
+void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans)
+{
+ struct btrfs_fs_info *fs_info = trans->root->fs_info;
+
+ if (!trans->chunk_bytes_reserved)
+ return;
+
+ WARN_ON_ONCE(!list_empty(&trans->new_bgs));
+
+ block_rsv_release_bytes(fs_info, &fs_info->chunk_block_rsv, NULL,
+ trans->chunk_bytes_reserved);
+ trans->chunk_bytes_reserved = 0;
+}
+
/* Can only return 0 or -ENOSPC */
int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
struct inode *inode)
@@ -6092,11 +6089,10 @@ static void add_pinned_bytes(struct btrfs_fs_info *fs_info, u64 num_bytes,
static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
- u64 bytenr, u64 num_bytes, u64 parent,
+ struct btrfs_delayed_ref_node *node, u64 parent,
u64 root_objectid, u64 owner_objectid,
u64 owner_offset, int refs_to_drop,
- struct btrfs_delayed_extent_op *extent_op,
- int no_quota)
+ struct btrfs_delayed_extent_op *extent_op)
{
struct btrfs_key key;
struct btrfs_path *path;
@@ -6110,10 +6106,12 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
int extent_slot = 0;
int found_extent = 0;
int num_to_del = 1;
+ int no_quota = node->no_quota;
u32 item_size;
u64 refs;
+ u64 bytenr = node->bytenr;
+ u64 num_bytes = node->num_bytes;
int last_ref = 0;
- enum btrfs_qgroup_operation_type type = BTRFS_QGROUP_OPER_SUB_EXCL;
bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
SKINNY_METADATA);
@@ -6294,7 +6292,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
refs -= refs_to_drop;
if (refs > 0) {
- type = BTRFS_QGROUP_OPER_SUB_SHARED;
if (extent_op)
__run_delayed_extent_op(extent_op, leaf, ei);
/*
@@ -6356,18 +6353,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
}
btrfs_release_path(path);
- /* Deal with the quota accounting */
- if (!ret && last_ref && !no_quota) {
- int mod_seq = 0;
-
- if (owner_objectid >= BTRFS_FIRST_FREE_OBJECTID &&
- type == BTRFS_QGROUP_OPER_SUB_SHARED)
- mod_seq = 1;
-
- ret = btrfs_qgroup_record_ref(trans, info, root_objectid,
- bytenr, num_bytes, type,
- mod_seq);
- }
out:
btrfs_free_path(path);
return ret;
@@ -6393,7 +6378,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
goto out_delayed_unlock;
spin_lock(&head->lock);
- if (rb_first(&head->ref_root))
+ if (!list_empty(&head->ref_list))
goto out;
if (head->extent_op) {
@@ -7303,13 +7288,6 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(path->nodes[0]);
btrfs_free_path(path);
- /* Always set parent to 0 here since its exclusive anyway. */
- ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid,
- ins->objectid, ins->offset,
- BTRFS_QGROUP_OPER_ADD_EXCL, 0);
- if (ret)
- return ret;
-
ret = update_block_group(trans, root, ins->objectid, ins->offset, 1);
if (ret) { /* -ENOENT, logic error */
btrfs_err(fs_info, "update block group failed for %llu %llu",
@@ -7391,14 +7369,6 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(leaf);
btrfs_free_path(path);
- if (!no_quota) {
- ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid,
- ins->objectid, num_bytes,
- BTRFS_QGROUP_OPER_ADD_EXCL, 0);
- if (ret)
- return ret;
- }
-
ret = update_block_group(trans, root, ins->objectid, root->nodesize,
1);
if (ret) { /* -ENOENT, logic error */
@@ -7755,12 +7725,18 @@ reada:
wc->reada_slot = slot;
}
+/*
+ * TODO: Modify related function to add related node/leaf to dirty_extent_root,
+ * for later qgroup accounting.
+ *
+ * Current, this function does nothing.
+ */
static int account_leaf_items(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *eb)
{
int nr = btrfs_header_nritems(eb);
- int i, extent_type, ret;
+ int i, extent_type;
struct btrfs_key key;
struct btrfs_file_extent_item *fi;
u64 bytenr, num_bytes;
@@ -7783,13 +7759,6 @@ static int account_leaf_items(struct btrfs_trans_handle *trans,
continue;
num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
-
- ret = btrfs_qgroup_record_ref(trans, root->fs_info,
- root->objectid,
- bytenr, num_bytes,
- BTRFS_QGROUP_OPER_SUB_SUBTREE, 0);
- if (ret)
- return ret;
}
return 0;
}
@@ -7858,6 +7827,8 @@ static int adjust_slots_upwards(struct btrfs_root *root,
/*
* root_eb is the subtree root and is locked before this function is called.
+ * TODO: Modify this function to mark all (including complete shared node)
+ * to dirty_extent_root to allow it get accounted in qgroup.
*/
static int account_shared_subtree(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
@@ -7920,7 +7891,11 @@ walk_down:
child_gen = btrfs_node_ptr_generation(eb, parent_slot);
eb = read_tree_block(root, child_bytenr, child_gen);
- if (!eb || !extent_buffer_uptodate(eb)) {
+ if (IS_ERR(eb)) {
+ ret = PTR_ERR(eb);
+ goto out;
+ } else if (!extent_buffer_uptodate(eb)) {
+ free_extent_buffer(eb);
ret = -EIO;
goto out;
}
@@ -7931,16 +7906,6 @@ walk_down:
btrfs_tree_read_lock(eb);
btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
path->locks[level] = BTRFS_READ_LOCK_BLOCKING;
-
- ret = btrfs_qgroup_record_ref(trans, root->fs_info,
- root->objectid,
- child_bytenr,
- root->nodesize,
- BTRFS_QGROUP_OPER_SUB_SUBTREE,
- 0);
- if (ret)
- goto out;
-
}
if (level == 0) {
@@ -8151,7 +8116,9 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
if (reada && level == 1)
reada_walk_down(trans, root, wc, path);
next = read_tree_block(root, bytenr, generation);
- if (!next || !extent_buffer_uptodate(next)) {
+ if (IS_ERR(next)) {
+ return PTR_ERR(next);
+ } else if (!extent_buffer_uptodate(next)) {
free_extent_buffer(next);
return -EIO;
}
@@ -8533,24 +8500,6 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
goto out_end_trans;
}
- /*
- * Qgroup update accounting is run from
- * delayed ref handling. This usually works
- * out because delayed refs are normally the
- * only way qgroup updates are added. However,
- * we may have added updates during our tree
- * walk so run qgroups here to make sure we
- * don't lose any updates.
- */
- ret = btrfs_delayed_qgroup_accounting(trans,
- root->fs_info);
- if (ret)
- printk_ratelimited(KERN_ERR "BTRFS: Failure %d "
- "running qgroup updates "
- "during snapshot delete. "
- "Quota is out of sync, "
- "rescan required.\n", ret);
-
btrfs_end_transaction_throttle(trans, tree_root);
if (!for_reloc && btrfs_need_cleaner_sleep(root)) {
pr_debug("BTRFS: drop snapshot early exit\n");
@@ -8604,14 +8553,6 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
}
root_dropped = true;
out_end_trans:
- ret = btrfs_delayed_qgroup_accounting(trans, tree_root->fs_info);
- if (ret)
- printk_ratelimited(KERN_ERR "BTRFS: Failure %d "
- "running qgroup updates "
- "during snapshot delete. "
- "Quota is out of sync, "
- "rescan required.\n", ret);
-
btrfs_end_transaction_throttle(trans, tree_root);
out_free:
kfree(wc);
@@ -9562,6 +9503,19 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
free_excluded_extents(root, cache);
+ /*
+ * Call to ensure the corresponding space_info object is created and
+ * assigned to our block group, but don't update its counters just yet.
+ * We want our bg to be added to the rbtree with its ->space_info set.
+ */
+ ret = update_space_info(root->fs_info, cache->flags, 0, 0,
+ &cache->space_info);
+ if (ret) {
+ btrfs_remove_free_space_cache(cache);
+ btrfs_put_block_group(cache);
+ return ret;
+ }
+
ret = btrfs_add_block_group_cache(root->fs_info, cache);
if (ret) {
btrfs_remove_free_space_cache(cache);
@@ -9569,6 +9523,10 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
return ret;
}
+ /*
+ * Now that our block group has its ->space_info set and is inserted in
+ * the rbtree, update the space info's counters.
+ */
ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
&cache->space_info);
if (ret) {
diff --git a/fs/btrfs/extent-tree.h b/fs/btrfs/extent-tree.h
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/fs/btrfs/extent-tree.h
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index c374e1e71e5f..02d05817cbdf 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1277,7 +1277,12 @@ int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, gfp_t mask)
{
- return clear_extent_bit(tree, start, end, bits, 0, 0, NULL, mask);
+ int wake = 0;
+
+ if (bits & EXTENT_LOCKED)
+ wake = 1;
+
+ return clear_extent_bit(tree, start, end, bits, wake, 0, NULL, mask);
}
int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
@@ -4490,6 +4495,8 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
}
if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
flags |= FIEMAP_EXTENT_ENCODED;
+ if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
+ flags |= FIEMAP_EXTENT_UNWRITTEN;
free_extent_map(em);
em = NULL;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index b072e17479aa..795d754327a7 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1868,6 +1868,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
struct btrfs_log_ctx ctx;
int ret = 0;
bool full_sync = 0;
+ const u64 len = end - start + 1;
trace_btrfs_sync_file(file, datasync);
@@ -1896,7 +1897,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
* all extents are persisted and the respective file extent
* items are in the fs/subvol btree.
*/
- ret = btrfs_wait_ordered_range(inode, start, end - start + 1);
+ ret = btrfs_wait_ordered_range(inode, start, len);
} else {
/*
* Start any new ordered operations before starting to log the
@@ -1968,8 +1969,10 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
*/
smp_mb();
if (btrfs_inode_in_log(inode, root->fs_info->generation) ||
- (full_sync && BTRFS_I(inode)->last_trans <=
- root->fs_info->last_trans_committed)) {
+ (BTRFS_I(inode)->last_trans <=
+ root->fs_info->last_trans_committed &&
+ (full_sync ||
+ !btrfs_have_ordered_extents_in_range(inode, start, len)))) {
/*
* We'v had everything committed since the last time we were
* modified so clear this flag in case it was set for whatever
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 9dbe5b548fa6..fb5a6b1c62a6 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -231,6 +231,7 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root,
{
int ret = 0;
struct btrfs_path *path = btrfs_alloc_path();
+ bool locked = false;
if (!path) {
ret = -ENOMEM;
@@ -238,6 +239,7 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root,
}
if (block_group) {
+ locked = true;
mutex_lock(&trans->transaction->cache_write_mutex);
if (!list_empty(&block_group->io_list)) {
list_del_init(&block_group->io_list);
@@ -269,18 +271,14 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root,
*/
ret = btrfs_truncate_inode_items(trans, root, inode,
0, BTRFS_EXTENT_DATA_KEY);
- if (ret) {
- mutex_unlock(&trans->transaction->cache_write_mutex);
- btrfs_abort_transaction(trans, root, ret);
- return ret;
- }
+ if (ret)
+ goto fail;
ret = btrfs_update_inode(trans, root, inode);
- if (block_group)
- mutex_unlock(&trans->transaction->cache_write_mutex);
-
fail:
+ if (locked)
+ mutex_unlock(&trans->transaction->cache_write_mutex);
if (ret)
btrfs_abort_transaction(trans, root, ret);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 8bb013672aee..855935f6671a 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4986,24 +4986,40 @@ static void evict_inode_truncate_pages(struct inode *inode)
}
write_unlock(&map_tree->lock);
+ /*
+ * Keep looping until we have no more ranges in the io tree.
+ * We can have ongoing bios started by readpages (called from readahead)
+ * that didn't get their end io callbacks called yet or they are still
+ * in progress ((extent_io.c:end_bio_extent_readpage()). This means some
+ * ranges can still be locked and eviction started because before
+ * submitting those bios, which are executed by a separate task (work
+ * queue kthread), inode references (inode->i_count) were not taken
+ * (which would be dropped in the end io callback of each bio).
+ * Therefore here we effectively end up waiting for those bios and
+ * anyone else holding locked ranges without having bumped the inode's
+ * reference count - if we don't do it, when they access the inode's
+ * io_tree to unlock a range it may be too late, leading to an
+ * use-after-free issue.
+ */
spin_lock(&io_tree->lock);
while (!RB_EMPTY_ROOT(&io_tree->state)) {
struct extent_state *state;
struct extent_state *cached_state = NULL;
+ u64 start;
+ u64 end;
node = rb_first(&io_tree->state);
state = rb_entry(node, struct extent_state, rb_node);
- atomic_inc(&state->refs);
+ start = state->start;
+ end = state->end;
spin_unlock(&io_tree->lock);
- lock_extent_bits(io_tree, state->start, state->end,
- 0, &cached_state);
- clear_extent_bit(io_tree, state->start, state->end,
+ lock_extent_bits(io_tree, start, end, 0, &cached_state);
+ clear_extent_bit(io_tree, start, end,
EXTENT_LOCKED | EXTENT_DIRTY |
EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
EXTENT_DEFRAG, 1, 1,
&cached_state, GFP_NOFS);
- free_extent_state(state);
cond_resched();
spin_lock(&io_tree->lock);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 1c22c6518504..c86b835da7a8 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -553,8 +553,8 @@ static noinline int create_subvol(struct inode *dir,
key.offset = (u64)-1;
new_root = btrfs_read_fs_root_no_name(root->fs_info, &key);
if (IS_ERR(new_root)) {
- btrfs_abort_transaction(trans, root, PTR_ERR(new_root));
ret = PTR_ERR(new_root);
+ btrfs_abort_transaction(trans, root, ret);
goto fail;
}
@@ -1318,7 +1318,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
i = range->start >> PAGE_CACHE_SHIFT;
}
if (!max_to_defrag)
- max_to_defrag = last_index + 1;
+ max_to_defrag = last_index - i + 1;
/*
* make writeback starts from i, so the defrag range can be
@@ -1368,7 +1368,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
ra_index = max(i, ra_index);
btrfs_force_ra(inode->i_mapping, ra, file, ra_index,
cluster);
- ra_index += max_cluster;
+ ra_index += cluster;
}
mutex_lock(&inode->i_mutex);
@@ -2271,10 +2271,7 @@ static noinline int btrfs_ioctl_ino_lookup(struct file *file,
{
struct btrfs_ioctl_ino_lookup_args *args;
struct inode *inode;
- int ret;
-
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
+ int ret = 0;
args = memdup_user(argp, sizeof(*args));
if (IS_ERR(args))
@@ -2282,13 +2279,28 @@ static noinline int btrfs_ioctl_ino_lookup(struct file *file,
inode = file_inode(file);
+ /*
+ * Unprivileged query to obtain the containing subvolume root id. The
+ * path is reset so it's consistent with btrfs_search_path_in_tree.
+ */
if (args->treeid == 0)
args->treeid = BTRFS_I(inode)->root->root_key.objectid;
+ if (args->objectid == BTRFS_FIRST_FREE_OBJECTID) {
+ args->name[0] = 0;
+ goto out;
+ }
+
+ if (!capable(CAP_SYS_ADMIN)) {
+ ret = -EPERM;
+ goto out;
+ }
+
ret = btrfs_search_path_in_tree(BTRFS_I(inode)->root->fs_info,
args->treeid, args->objectid,
args->name);
+out:
if (ret == 0 && copy_to_user(argp, args, sizeof(*args)))
ret = -EFAULT;
@@ -2413,8 +2425,6 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
goto out_unlock_inode;
}
- d_invalidate(dentry);
-
down_write(&root->fs_info->subvol_sem);
err = may_destroy_subvol(dest);
@@ -2508,7 +2518,7 @@ out_up_write:
out_unlock_inode:
mutex_unlock(&inode->i_mutex);
if (!err) {
- shrink_dcache_sb(root->fs_info->sb);
+ d_invalidate(dentry);
btrfs_invalidate_inodes(dest);
d_delete(dentry);
ASSERT(dest->send_in_progress == 0);
@@ -2879,12 +2889,19 @@ static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst,
return ret;
}
-static int extent_same_check_offsets(struct inode *inode, u64 off, u64 len)
+static int extent_same_check_offsets(struct inode *inode, u64 off, u64 *plen,
+ u64 olen)
{
+ u64 len = *plen;
u64 bs = BTRFS_I(inode)->root->fs_info->sb->s_blocksize;
- if (off + len > inode->i_size || off + len < off)
+ if (off + olen > inode->i_size || off + olen < off)
return -EINVAL;
+
+ /* if we extend to eof, continue to block boundary */
+ if (off + len == inode->i_size)
+ *plen = len = ALIGN(inode->i_size, bs) - off;
+
/* Check that we are block aligned - btrfs_clone() requires this */
if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs))
return -EINVAL;
@@ -2892,10 +2909,11 @@ static int extent_same_check_offsets(struct inode *inode, u64 off, u64 len)
return 0;
}
-static int btrfs_extent_same(struct inode *src, u64 loff, u64 len,
+static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
struct inode *dst, u64 dst_loff)
{
int ret;
+ u64 len = olen;
/*
* btrfs_clone() can't handle extents in the same file
@@ -2910,11 +2928,11 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 len,
btrfs_double_lock(src, loff, dst, dst_loff, len);
- ret = extent_same_check_offsets(src, loff, len);
+ ret = extent_same_check_offsets(src, loff, &len, olen);
if (ret)
goto out_unlock;
- ret = extent_same_check_offsets(dst, dst_loff, len);
+ ret = extent_same_check_offsets(dst, dst_loff, &len, olen);
if (ret)
goto out_unlock;
@@ -2927,7 +2945,7 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 len,
ret = btrfs_cmp_data(src, loff, dst, dst_loff, len);
if (ret == 0)
- ret = btrfs_clone(src, dst, loff, len, len, dst_loff);
+ ret = btrfs_clone(src, dst, loff, olen, len, dst_loff);
out_unlock:
btrfs_double_unlock(src, loff, dst, dst_loff, len);
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 760c4a5e096b..89656d799ff6 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -198,9 +198,6 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
entry->file_offset = file_offset;
entry->start = start;
entry->len = len;
- if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) &&
- !(type == BTRFS_ORDERED_NOCOW))
- entry->csum_bytes_left = disk_len;
entry->disk_len = disk_len;
entry->bytes_left = len;
entry->inode = igrab(inode);
@@ -286,10 +283,6 @@ void btrfs_add_ordered_sum(struct inode *inode,
tree = &BTRFS_I(inode)->ordered_tree;
spin_lock_irq(&tree->lock);
list_add_tail(&sum->list, &entry->list);
- WARN_ON(entry->csum_bytes_left < sum->len);
- entry->csum_bytes_left -= sum->len;
- if (entry->csum_bytes_left == 0)
- wake_up(&entry->wait);
spin_unlock_irq(&tree->lock);
}
@@ -509,7 +502,21 @@ void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans,
wait_event(ordered->wait, test_bit(BTRFS_ORDERED_IO_DONE,
&ordered->flags));
- list_add_tail(&ordered->trans_list, &trans->ordered);
+ /*
+ * If our ordered extent completed it means it updated the
+ * fs/subvol and csum trees already, so no need to make the
+ * current transaction's commit wait for it, as we end up
+ * holding memory unnecessarily and delaying the inode's iput
+ * until the transaction commit (we schedule an iput for the
+ * inode when the ordered extent's refcount drops to 0), which
+ * prevents it from being evictable until the transaction
+ * commits.
+ */
+ if (test_bit(BTRFS_ORDERED_COMPLETE, &ordered->flags))
+ btrfs_put_ordered_extent(ordered);
+ else
+ list_add_tail(&ordered->trans_list, &trans->ordered);
+
spin_lock_irq(&log->log_extents_lock[index]);
}
spin_unlock_irq(&log->log_extents_lock[index]);
@@ -844,6 +851,20 @@ out:
return entry;
}
+bool btrfs_have_ordered_extents_in_range(struct inode *inode,
+ u64 file_offset,
+ u64 len)
+{
+ struct btrfs_ordered_extent *oe;
+
+ oe = btrfs_lookup_ordered_range(inode, file_offset, len);
+ if (oe) {
+ btrfs_put_ordered_extent(oe);
+ return true;
+ }
+ return false;
+}
+
/*
* lookup and return any extent before 'file_offset'. NULL is returned
* if none is found
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index e96cd4ccd805..7176cc0fe43f 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -89,9 +89,6 @@ struct btrfs_ordered_extent {
/* number of bytes that still need writing */
u64 bytes_left;
- /* number of bytes that still need csumming */
- u64 csum_bytes_left;
-
/*
* the end of the ordered extent which is behind it but
* didn't update disk_i_size. Please see the comment of
@@ -191,6 +188,9 @@ btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset);
struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode,
u64 file_offset,
u64 len);
+bool btrfs_have_ordered_extents_in_range(struct inode *inode,
+ u64 file_offset,
+ u64 len);
int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
struct btrfs_ordered_extent *ordered);
int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 3d6546581bb9..d5f1f033b7a0 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -34,6 +34,7 @@
#include "extent_io.h"
#include "qgroup.h"
+
/* TODO XXX FIXME
* - subvol delete -> delete when ref goes to 0? delete limits also?
* - reorganize keys
@@ -84,11 +85,42 @@ struct btrfs_qgroup {
/*
* temp variables for accounting operations
+ * Refer to qgroup_shared_accouting() for details.
*/
u64 old_refcnt;
u64 new_refcnt;
};
+static void btrfs_qgroup_update_old_refcnt(struct btrfs_qgroup *qg, u64 seq,
+ int mod)
+{
+ if (qg->old_refcnt < seq)
+ qg->old_refcnt = seq;
+ qg->old_refcnt += mod;
+}
+
+static void btrfs_qgroup_update_new_refcnt(struct btrfs_qgroup *qg, u64 seq,
+ int mod)
+{
+ if (qg->new_refcnt < seq)
+ qg->new_refcnt = seq;
+ qg->new_refcnt += mod;
+}
+
+static inline u64 btrfs_qgroup_get_old_refcnt(struct btrfs_qgroup *qg, u64 seq)
+{
+ if (qg->old_refcnt < seq)
+ return 0;
+ return qg->old_refcnt - seq;
+}
+
+static inline u64 btrfs_qgroup_get_new_refcnt(struct btrfs_qgroup *qg, u64 seq)
+{
+ if (qg->new_refcnt < seq)
+ return 0;
+ return qg->new_refcnt - seq;
+}
+
/*
* glue structure to represent the relations between qgroups.
*/
@@ -1115,14 +1147,14 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
struct ulist *tmp;
int ret = 0;
- tmp = ulist_alloc(GFP_NOFS);
- if (!tmp)
- return -ENOMEM;
-
/* Check the level of src and dst first */
if (btrfs_qgroup_level(src) >= btrfs_qgroup_level(dst))
return -EINVAL;
+ tmp = ulist_alloc(GFP_NOFS);
+ if (!tmp)
+ return -ENOMEM;
+
mutex_lock(&fs_info->qgroup_ioctl_lock);
quota_root = fs_info->quota_root;
if (!quota_root) {
@@ -1356,239 +1388,86 @@ out:
return ret;
}
-static int comp_oper_exist(struct btrfs_qgroup_operation *oper1,
- struct btrfs_qgroup_operation *oper2)
+int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info)
{
- /*
- * Ignore seq and type here, we're looking for any operation
- * at all related to this extent on that root.
- */
- if (oper1->bytenr < oper2->bytenr)
- return -1;
- if (oper1->bytenr > oper2->bytenr)
- return 1;
- if (oper1->ref_root < oper2->ref_root)
- return -1;
- if (oper1->ref_root > oper2->ref_root)
- return 1;
- return 0;
-}
+ struct btrfs_qgroup_extent_record *record;
+ struct btrfs_delayed_ref_root *delayed_refs;
+ struct rb_node *node;
+ u64 qgroup_to_skip;
+ int ret = 0;
-static int qgroup_oper_exists(struct btrfs_fs_info *fs_info,
- struct btrfs_qgroup_operation *oper)
-{
- struct rb_node *n;
- struct btrfs_qgroup_operation *cur;
- int cmp;
+ delayed_refs = &trans->transaction->delayed_refs;
+ qgroup_to_skip = delayed_refs->qgroup_to_skip;
- spin_lock(&fs_info->qgroup_op_lock);
- n = fs_info->qgroup_op_tree.rb_node;
- while (n) {
- cur = rb_entry(n, struct btrfs_qgroup_operation, n);
- cmp = comp_oper_exist(cur, oper);
- if (cmp < 0) {
- n = n->rb_right;
- } else if (cmp) {
- n = n->rb_left;
- } else {
- spin_unlock(&fs_info->qgroup_op_lock);
- return -EEXIST;
- }
+ /*
+ * No need to do lock, since this function will only be called in
+ * btrfs_commmit_transaction().
+ */
+ node = rb_first(&delayed_refs->dirty_extent_root);
+ while (node) {
+ record = rb_entry(node, struct btrfs_qgroup_extent_record,
+ node);
+ ret = btrfs_find_all_roots(NULL, fs_info, record->bytenr, 0,
+ &record->old_roots);
+ if (ret < 0)
+ break;
+ if (qgroup_to_skip)
+ ulist_del(record->old_roots, qgroup_to_skip, 0);
+ node = rb_next(node);
}
- spin_unlock(&fs_info->qgroup_op_lock);
- return 0;
-}
-
-static int comp_oper(struct btrfs_qgroup_operation *oper1,
- struct btrfs_qgroup_operation *oper2)
-{
- if (oper1->bytenr < oper2->bytenr)
- return -1;
- if (oper1->bytenr > oper2->bytenr)
- return 1;
- if (oper1->ref_root < oper2->ref_root)
- return -1;
- if (oper1->ref_root > oper2->ref_root)
- return 1;
- if (oper1->seq < oper2->seq)
- return -1;
- if (oper1->seq > oper2->seq)
- return 1;
- if (oper1->type < oper2->type)
- return -1;
- if (oper1->type > oper2->type)
- return 1;
- return 0;
+ return ret;
}
-static int insert_qgroup_oper(struct btrfs_fs_info *fs_info,
- struct btrfs_qgroup_operation *oper)
+struct btrfs_qgroup_extent_record
+*btrfs_qgroup_insert_dirty_extent(struct btrfs_delayed_ref_root *delayed_refs,
+ struct btrfs_qgroup_extent_record *record)
{
- struct rb_node **p;
- struct rb_node *parent = NULL;
- struct btrfs_qgroup_operation *cur;
- int cmp;
+ struct rb_node **p = &delayed_refs->dirty_extent_root.rb_node;
+ struct rb_node *parent_node = NULL;
+ struct btrfs_qgroup_extent_record *entry;
+ u64 bytenr = record->bytenr;
- spin_lock(&fs_info->qgroup_op_lock);
- p = &fs_info->qgroup_op_tree.rb_node;
while (*p) {
- parent = *p;
- cur = rb_entry(parent, struct btrfs_qgroup_operation, n);
- cmp = comp_oper(cur, oper);
- if (cmp < 0) {
- p = &(*p)->rb_right;
- } else if (cmp) {
+ parent_node = *p;
+ entry = rb_entry(parent_node, struct btrfs_qgroup_extent_record,
+ node);
+ if (bytenr < entry->bytenr)
p = &(*p)->rb_left;
- } else {
- spin_unlock(&fs_info->qgroup_op_lock);
- return -EEXIST;
- }
- }
- rb_link_node(&oper->n, parent, p);
- rb_insert_color(&oper->n, &fs_info->qgroup_op_tree);
- spin_unlock(&fs_info->qgroup_op_lock);
- return 0;
-}
-
-/*
- * Record a quota operation for processing later on.
- * @trans: the transaction we are adding the delayed op to.
- * @fs_info: the fs_info for this fs.
- * @ref_root: the root of the reference we are acting on,
- * @bytenr: the bytenr we are acting on.
- * @num_bytes: the number of bytes in the reference.
- * @type: the type of operation this is.
- * @mod_seq: do we need to get a sequence number for looking up roots.
- *
- * We just add it to our trans qgroup_ref_list and carry on and process these
- * operations in order at some later point. If the reference root isn't a fs
- * root then we don't bother with doing anything.
- *
- * MUST BE HOLDING THE REF LOCK.
- */
-int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 ref_root,
- u64 bytenr, u64 num_bytes,
- enum btrfs_qgroup_operation_type type, int mod_seq)
-{
- struct btrfs_qgroup_operation *oper;
- int ret;
-
- if (!is_fstree(ref_root) || !fs_info->quota_enabled)
- return 0;
-
- oper = kmalloc(sizeof(*oper), GFP_NOFS);
- if (!oper)
- return -ENOMEM;
-
- oper->ref_root = ref_root;
- oper->bytenr = bytenr;
- oper->num_bytes = num_bytes;
- oper->type = type;
- oper->seq = atomic_inc_return(&fs_info->qgroup_op_seq);
- INIT_LIST_HEAD(&oper->elem.list);
- oper->elem.seq = 0;
-
- trace_btrfs_qgroup_record_ref(oper);
-
- if (type == BTRFS_QGROUP_OPER_SUB_SUBTREE) {
- /*
- * If any operation for this bytenr/ref_root combo
- * exists, then we know it's not exclusively owned and
- * shouldn't be queued up.
- *
- * This also catches the case where we have a cloned
- * extent that gets queued up multiple times during
- * drop snapshot.
- */
- if (qgroup_oper_exists(fs_info, oper)) {
- kfree(oper);
- return 0;
- }
- }
-
- ret = insert_qgroup_oper(fs_info, oper);
- if (ret) {
- /* Shouldn't happen so have an assert for developers */
- ASSERT(0);
- kfree(oper);
- return ret;
+ else if (bytenr > entry->bytenr)
+ p = &(*p)->rb_right;
+ else
+ return entry;
}
- list_add_tail(&oper->list, &trans->qgroup_ref_list);
- if (mod_seq)
- btrfs_get_tree_mod_seq(fs_info, &oper->elem);
-
- return 0;
-}
-
-static int qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
- struct btrfs_qgroup_operation *oper)
-{
- struct ulist *tmp;
- int sign = 0;
- int ret = 0;
-
- tmp = ulist_alloc(GFP_NOFS);
- if (!tmp)
- return -ENOMEM;
-
- spin_lock(&fs_info->qgroup_lock);
- if (!fs_info->quota_root)
- goto out;
-
- switch (oper->type) {
- case BTRFS_QGROUP_OPER_ADD_EXCL:
- sign = 1;
- break;
- case BTRFS_QGROUP_OPER_SUB_EXCL:
- sign = -1;
- break;
- default:
- ASSERT(0);
- }
- ret = __qgroup_excl_accounting(fs_info, tmp, oper->ref_root,
- oper->num_bytes, sign);
-out:
- spin_unlock(&fs_info->qgroup_lock);
- ulist_free(tmp);
- return ret;
+ rb_link_node(&record->node, parent_node, p);
+ rb_insert_color(&record->node, &delayed_refs->dirty_extent_root);
+ return NULL;
}
+#define UPDATE_NEW 0
+#define UPDATE_OLD 1
/*
- * Walk all of the roots that pointed to our bytenr and adjust their refcnts as
- * properly.
+ * Walk all of the roots that points to the bytenr and adjust their refcnts.
*/
-static int qgroup_calc_old_refcnt(struct btrfs_fs_info *fs_info,
- u64 root_to_skip, struct ulist *tmp,
- struct ulist *roots, struct ulist *qgroups,
- u64 seq, int *old_roots, int rescan)
+static int qgroup_update_refcnt(struct btrfs_fs_info *fs_info,
+ struct ulist *roots, struct ulist *tmp,
+ struct ulist *qgroups, u64 seq, int update_old)
{
struct ulist_node *unode;
struct ulist_iterator uiter;
struct ulist_node *tmp_unode;
struct ulist_iterator tmp_uiter;
struct btrfs_qgroup *qg;
- int ret;
+ int ret = 0;
+ if (!roots)
+ return 0;
ULIST_ITER_INIT(&uiter);
while ((unode = ulist_next(roots, &uiter))) {
- /* We don't count our current root here */
- if (unode->val == root_to_skip)
- continue;
qg = find_qgroup_rb(fs_info, unode->val);
if (!qg)
continue;
- /*
- * We could have a pending removal of this same ref so we may
- * not have actually found our ref root when doing
- * btrfs_find_all_roots, so we need to keep track of how many
- * old roots we find in case we removed ours and added a
- * different one at the same time. I don't think this could
- * happen in practice but that sort of thinking leads to pain
- * and suffering and to the dark side.
- */
- (*old_roots)++;
ulist_reinit(tmp);
ret = ulist_add(qgroups, qg->qgroupid, ptr_to_u64(qg),
@@ -1603,29 +1482,10 @@ static int qgroup_calc_old_refcnt(struct btrfs_fs_info *fs_info,
struct btrfs_qgroup_list *glist;
qg = u64_to_ptr(tmp_unode->aux);
- /*
- * We use this sequence number to keep from having to
- * run the whole list and 0 out the refcnt every time.
- * We basically use sequnce as the known 0 count and
- * then add 1 everytime we see a qgroup. This is how we
- * get how many of the roots actually point up to the
- * upper level qgroups in order to determine exclusive
- * counts.
- *
- * For rescan we want to set old_refcnt to seq so our
- * exclusive calculations end up correct.
- */
- if (rescan)
- qg->old_refcnt = seq;
- else if (qg->old_refcnt < seq)
- qg->old_refcnt = seq + 1;
+ if (update_old)
+ btrfs_qgroup_update_old_refcnt(qg, seq, 1);
else
- qg->old_refcnt++;
-
- if (qg->new_refcnt < seq)
- qg->new_refcnt = seq + 1;
- else
- qg->new_refcnt++;
+ btrfs_qgroup_update_new_refcnt(qg, seq, 1);
list_for_each_entry(glist, &qg->groups, next_group) {
ret = ulist_add(qgroups, glist->group->qgroupid,
ptr_to_u64(glist->group),
@@ -1644,161 +1504,46 @@ static int qgroup_calc_old_refcnt(struct btrfs_fs_info *fs_info,
}
/*
- * We need to walk forward in our operation tree and account for any roots that
- * were deleted after we made this operation.
- */
-static int qgroup_account_deleted_refs(struct btrfs_fs_info *fs_info,
- struct btrfs_qgroup_operation *oper,
- struct ulist *tmp,
- struct ulist *qgroups, u64 seq,
- int *old_roots)
-{
- struct ulist_node *unode;
- struct ulist_iterator uiter;
- struct btrfs_qgroup *qg;
- struct btrfs_qgroup_operation *tmp_oper;
- struct rb_node *n;
- int ret;
-
- ulist_reinit(tmp);
-
- /*
- * We only walk forward in the tree since we're only interested in
- * removals that happened _after_ our operation.
- */
- spin_lock(&fs_info->qgroup_op_lock);
- n = rb_next(&oper->n);
- spin_unlock(&fs_info->qgroup_op_lock);
- if (!n)
- return 0;
- tmp_oper = rb_entry(n, struct btrfs_qgroup_operation, n);
- while (tmp_oper->bytenr == oper->bytenr) {
- /*
- * If it's not a removal we don't care, additions work out
- * properly with our refcnt tracking.
- */
- if (tmp_oper->type != BTRFS_QGROUP_OPER_SUB_SHARED &&
- tmp_oper->type != BTRFS_QGROUP_OPER_SUB_EXCL)
- goto next;
- qg = find_qgroup_rb(fs_info, tmp_oper->ref_root);
- if (!qg)
- goto next;
- ret = ulist_add(qgroups, qg->qgroupid, ptr_to_u64(qg),
- GFP_ATOMIC);
- if (ret) {
- if (ret < 0)
- return ret;
- /*
- * We only want to increase old_roots if this qgroup is
- * not already in the list of qgroups. If it is already
- * there then that means it must have been re-added or
- * the delete will be discarded because we had an
- * existing ref that we haven't looked up yet. In this
- * case we don't want to increase old_roots. So if ret
- * == 1 then we know that this is the first time we've
- * seen this qgroup and we can bump the old_roots.
- */
- (*old_roots)++;
- ret = ulist_add(tmp, qg->qgroupid, ptr_to_u64(qg),
- GFP_ATOMIC);
- if (ret < 0)
- return ret;
- }
-next:
- spin_lock(&fs_info->qgroup_op_lock);
- n = rb_next(&tmp_oper->n);
- spin_unlock(&fs_info->qgroup_op_lock);
- if (!n)
- break;
- tmp_oper = rb_entry(n, struct btrfs_qgroup_operation, n);
- }
-
- /* Ok now process the qgroups we found */
- ULIST_ITER_INIT(&uiter);
- while ((unode = ulist_next(tmp, &uiter))) {
- struct btrfs_qgroup_list *glist;
-
- qg = u64_to_ptr(unode->aux);
- if (qg->old_refcnt < seq)
- qg->old_refcnt = seq + 1;
- else
- qg->old_refcnt++;
- if (qg->new_refcnt < seq)
- qg->new_refcnt = seq + 1;
- else
- qg->new_refcnt++;
- list_for_each_entry(glist, &qg->groups, next_group) {
- ret = ulist_add(qgroups, glist->group->qgroupid,
- ptr_to_u64(glist->group), GFP_ATOMIC);
- if (ret < 0)
- return ret;
- ret = ulist_add(tmp, glist->group->qgroupid,
- ptr_to_u64(glist->group), GFP_ATOMIC);
- if (ret < 0)
- return ret;
- }
- }
- return 0;
-}
-
-/* Add refcnt for the newly added reference. */
-static int qgroup_calc_new_refcnt(struct btrfs_fs_info *fs_info,
- struct btrfs_qgroup_operation *oper,
- struct btrfs_qgroup *qgroup,
- struct ulist *tmp, struct ulist *qgroups,
- u64 seq)
-{
- struct ulist_node *unode;
- struct ulist_iterator uiter;
- struct btrfs_qgroup *qg;
- int ret;
-
- ulist_reinit(tmp);
- ret = ulist_add(qgroups, qgroup->qgroupid, ptr_to_u64(qgroup),
- GFP_ATOMIC);
- if (ret < 0)
- return ret;
- ret = ulist_add(tmp, qgroup->qgroupid, ptr_to_u64(qgroup),
- GFP_ATOMIC);
- if (ret < 0)
- return ret;
- ULIST_ITER_INIT(&uiter);
- while ((unode = ulist_next(tmp, &uiter))) {
- struct btrfs_qgroup_list *glist;
-
- qg = u64_to_ptr(unode->aux);
- if (oper->type == BTRFS_QGROUP_OPER_ADD_SHARED) {
- if (qg->new_refcnt < seq)
- qg->new_refcnt = seq + 1;
- else
- qg->new_refcnt++;
- } else {
- if (qg->old_refcnt < seq)
- qg->old_refcnt = seq + 1;
- else
- qg->old_refcnt++;
- }
- list_for_each_entry(glist, &qg->groups, next_group) {
- ret = ulist_add(tmp, glist->group->qgroupid,
- ptr_to_u64(glist->group), GFP_ATOMIC);
- if (ret < 0)
- return ret;
- ret = ulist_add(qgroups, glist->group->qgroupid,
- ptr_to_u64(glist->group), GFP_ATOMIC);
- if (ret < 0)
- return ret;
- }
- }
- return 0;
-}
-
-/*
- * This adjusts the counters for all referenced qgroups if need be.
+ * Update qgroup rfer/excl counters.
+ * Rfer update is easy, codes can explain themselves.
+ *
+ * Excl update is tricky, the update is split into 2 part.
+ * Part 1: Possible exclusive <-> sharing detect:
+ * | A | !A |
+ * -------------------------------------
+ * B | * | - |
+ * -------------------------------------
+ * !B | + | ** |
+ * -------------------------------------
+ *
+ * Conditions:
+ * A: cur_old_roots < nr_old_roots (not exclusive before)
+ * !A: cur_old_roots == nr_old_roots (possible exclusive before)
+ * B: cur_new_roots < nr_new_roots (not exclusive now)
+ * !B: cur_new_roots == nr_new_roots (possible exclsuive now)
+ *
+ * Results:
+ * +: Possible sharing -> exclusive -: Possible exclusive -> sharing
+ * *: Definitely not changed. **: Possible unchanged.
+ *
+ * For !A and !B condition, the exception is cur_old/new_roots == 0 case.
+ *
+ * To make the logic clear, we first use condition A and B to split
+ * combination into 4 results.
+ *
+ * Then, for result "+" and "-", check old/new_roots == 0 case, as in them
+ * only on variant maybe 0.
+ *
+ * Lastly, check result **, since there are 2 variants maybe 0, split them
+ * again(2x2).
+ * But this time we don't need to consider other things, the codes and logic
+ * is easy to understand now.
*/
-static int qgroup_adjust_counters(struct btrfs_fs_info *fs_info,
- u64 root_to_skip, u64 num_bytes,
- struct ulist *qgroups, u64 seq,
- int old_roots, int new_roots, int rescan)
+static int qgroup_update_counters(struct btrfs_fs_info *fs_info,
+ struct ulist *qgroups,
+ u64 nr_old_roots,
+ u64 nr_new_roots,
+ u64 num_bytes, u64 seq)
{
struct ulist_node *unode;
struct ulist_iterator uiter;
@@ -1810,423 +1555,191 @@ static int qgroup_adjust_counters(struct btrfs_fs_info *fs_info,
bool dirty = false;
qg = u64_to_ptr(unode->aux);
- /*
- * Wasn't referenced before but is now, add to the reference
- * counters.
- */
- if (qg->old_refcnt <= seq && qg->new_refcnt > seq) {
+ cur_old_count = btrfs_qgroup_get_old_refcnt(qg, seq);
+ cur_new_count = btrfs_qgroup_get_new_refcnt(qg, seq);
+
+ /* Rfer update part */
+ if (cur_old_count == 0 && cur_new_count > 0) {
qg->rfer += num_bytes;
qg->rfer_cmpr += num_bytes;
dirty = true;
}
-
- /*
- * Was referenced before but isn't now, subtract from the
- * reference counters.
- */
- if (qg->old_refcnt > seq && qg->new_refcnt <= seq) {
+ if (cur_old_count > 0 && cur_new_count == 0) {
qg->rfer -= num_bytes;
qg->rfer_cmpr -= num_bytes;
dirty = true;
}
- if (qg->old_refcnt < seq)
- cur_old_count = 0;
- else
- cur_old_count = qg->old_refcnt - seq;
- if (qg->new_refcnt < seq)
- cur_new_count = 0;
- else
- cur_new_count = qg->new_refcnt - seq;
-
- /*
- * If our refcount was the same as the roots previously but our
- * new count isn't the same as the number of roots now then we
- * went from having a exclusive reference on this range to not.
- */
- if (old_roots && cur_old_count == old_roots &&
- (cur_new_count != new_roots || new_roots == 0)) {
- WARN_ON(cur_new_count != new_roots && new_roots == 0);
- qg->excl -= num_bytes;
- qg->excl_cmpr -= num_bytes;
- dirty = true;
+ /* Excl update part */
+ /* Exclusive/none -> shared case */
+ if (cur_old_count == nr_old_roots &&
+ cur_new_count < nr_new_roots) {
+ /* Exclusive -> shared */
+ if (cur_old_count != 0) {
+ qg->excl -= num_bytes;
+ qg->excl_cmpr -= num_bytes;
+ dirty = true;
+ }
}
- /*
- * If we didn't reference all the roots before but now we do we
- * have an exclusive reference to this range.
- */
- if ((!old_roots || (old_roots && cur_old_count != old_roots))
- && cur_new_count == new_roots) {
- qg->excl += num_bytes;
- qg->excl_cmpr += num_bytes;
- dirty = true;
+ /* Shared -> exclusive/none case */
+ if (cur_old_count < nr_old_roots &&
+ cur_new_count == nr_new_roots) {
+ /* Shared->exclusive */
+ if (cur_new_count != 0) {
+ qg->excl += num_bytes;
+ qg->excl_cmpr += num_bytes;
+ dirty = true;
+ }
}
+ /* Exclusive/none -> exclusive/none case */
+ if (cur_old_count == nr_old_roots &&
+ cur_new_count == nr_new_roots) {
+ if (cur_old_count == 0) {
+ /* None -> exclusive/none */
+
+ if (cur_new_count != 0) {
+ /* None -> exclusive */
+ qg->excl += num_bytes;
+ qg->excl_cmpr += num_bytes;
+ dirty = true;
+ }
+ /* None -> none, nothing changed */
+ } else {
+ /* Exclusive -> exclusive/none */
+
+ if (cur_new_count == 0) {
+ /* Exclusive -> none */
+ qg->excl -= num_bytes;
+ qg->excl_cmpr -= num_bytes;
+ dirty = true;
+ }
+ /* Exclusive -> exclusive, nothing changed */
+ }
+ }
if (dirty)
qgroup_dirty(fs_info, qg);
}
return 0;
}
-/*
- * If we removed a data extent and there were other references for that bytenr
- * then we need to lookup all referenced roots to make sure we still don't
- * reference this bytenr. If we do then we can just discard this operation.
- */
-static int check_existing_refs(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
- struct btrfs_qgroup_operation *oper)
-{
- struct ulist *roots = NULL;
- struct ulist_node *unode;
- struct ulist_iterator uiter;
- int ret = 0;
-
- ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr,
- oper->elem.seq, &roots);
- if (ret < 0)
- return ret;
- ret = 0;
-
- ULIST_ITER_INIT(&uiter);
- while ((unode = ulist_next(roots, &uiter))) {
- if (unode->val == oper->ref_root) {
- ret = 1;
- break;
- }
- }
- ulist_free(roots);
- btrfs_put_tree_mod_seq(fs_info, &oper->elem);
-
- return ret;
-}
-
-/*
- * If we share a reference across multiple roots then we may need to adjust
- * various qgroups referenced and exclusive counters. The basic premise is this
- *
- * 1) We have seq to represent a 0 count. Instead of looping through all of the
- * qgroups and resetting their refcount to 0 we just constantly bump this
- * sequence number to act as the base reference count. This means that if
- * anybody is equal to or below this sequence they were never referenced. We
- * jack this sequence up by the number of roots we found each time in order to
- * make sure we don't have any overlap.
- *
- * 2) We first search all the roots that reference the area _except_ the root
- * we're acting on currently. This makes up the old_refcnt of all the qgroups
- * before.
- *
- * 3) We walk all of the qgroups referenced by the root we are currently acting
- * on, and will either adjust old_refcnt in the case of a removal or the
- * new_refcnt in the case of an addition.
- *
- * 4) Finally we walk all the qgroups that are referenced by this range
- * including the root we are acting on currently. We will adjust the counters
- * based on the number of roots we had and will have after this operation.
- *
- * Take this example as an illustration
- *
- * [qgroup 1/0]
- * / | \
- * [qg 0/0] [qg 0/1] [qg 0/2]
- * \ | /
- * [ extent ]
- *
- * Say we are adding a reference that is covered by qg 0/0. The first step
- * would give a refcnt of 1 to qg 0/1 and 0/2 and a refcnt of 2 to qg 1/0 with
- * old_roots being 2. Because it is adding new_roots will be 1. We then go
- * through qg 0/0 which will get the new_refcnt set to 1 and add 1 to qg 1/0's
- * new_refcnt, bringing it to 3. We then walk through all of the qgroups, we
- * notice that the old refcnt for qg 0/0 < the new refcnt, so we added a
- * reference and thus must add the size to the referenced bytes. Everything
- * else is the same so nothing else changes.
- */
-static int qgroup_shared_accounting(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
- struct btrfs_qgroup_operation *oper)
+int
+btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
+ u64 bytenr, u64 num_bytes,
+ struct ulist *old_roots, struct ulist *new_roots)
{
- struct ulist *roots = NULL;
- struct ulist *qgroups, *tmp;
- struct btrfs_qgroup *qgroup;
- struct seq_list elem = SEQ_LIST_INIT(elem);
+ struct ulist *qgroups = NULL;
+ struct ulist *tmp = NULL;
u64 seq;
- int old_roots = 0;
- int new_roots = 0;
+ u64 nr_new_roots = 0;
+ u64 nr_old_roots = 0;
int ret = 0;
- if (oper->elem.seq) {
- ret = check_existing_refs(trans, fs_info, oper);
- if (ret < 0)
- return ret;
- if (ret)
- return 0;
- }
+ if (new_roots)
+ nr_new_roots = new_roots->nnodes;
+ if (old_roots)
+ nr_old_roots = old_roots->nnodes;
- qgroups = ulist_alloc(GFP_NOFS);
- if (!qgroups)
- return -ENOMEM;
+ if (!fs_info->quota_enabled)
+ goto out_free;
+ BUG_ON(!fs_info->quota_root);
+ qgroups = ulist_alloc(GFP_NOFS);
+ if (!qgroups) {
+ ret = -ENOMEM;
+ goto out_free;
+ }
tmp = ulist_alloc(GFP_NOFS);
if (!tmp) {
- ulist_free(qgroups);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto out_free;
}
- btrfs_get_tree_mod_seq(fs_info, &elem);
- ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr, elem.seq,
- &roots);
- btrfs_put_tree_mod_seq(fs_info, &elem);
- if (ret < 0) {
- ulist_free(qgroups);
- ulist_free(tmp);
- return ret;
+ mutex_lock(&fs_info->qgroup_rescan_lock);
+ if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
+ if (fs_info->qgroup_rescan_progress.objectid <= bytenr) {
+ mutex_unlock(&fs_info->qgroup_rescan_lock);
+ ret = 0;
+ goto out_free;
+ }
}
+ mutex_unlock(&fs_info->qgroup_rescan_lock);
+
spin_lock(&fs_info->qgroup_lock);
- qgroup = find_qgroup_rb(fs_info, oper->ref_root);
- if (!qgroup)
- goto out;
seq = fs_info->qgroup_seq;
- /*
- * So roots is the list of all the roots currently pointing at the
- * bytenr, including the ref we are adding if we are adding, or not if
- * we are removing a ref. So we pass in the ref_root to skip that root
- * in our calculations. We set old_refnct and new_refcnt cause who the
- * hell knows what everything looked like before, and it doesn't matter
- * except...
- */
- ret = qgroup_calc_old_refcnt(fs_info, oper->ref_root, tmp, roots, qgroups,
- seq, &old_roots, 0);
+ /* Update old refcnts using old_roots */
+ ret = qgroup_update_refcnt(fs_info, old_roots, tmp, qgroups, seq,
+ UPDATE_OLD);
if (ret < 0)
goto out;
- /*
- * Now adjust the refcounts of the qgroups that care about this
- * reference, either the old_count in the case of removal or new_count
- * in the case of an addition.
- */
- ret = qgroup_calc_new_refcnt(fs_info, oper, qgroup, tmp, qgroups,
- seq);
+ /* Update new refcnts using new_roots */
+ ret = qgroup_update_refcnt(fs_info, new_roots, tmp, qgroups, seq,
+ UPDATE_NEW);
if (ret < 0)
goto out;
- /*
- * ...in the case of removals. If we had a removal before we got around
- * to processing this operation then we need to find that guy and count
- * his references as if they really existed so we don't end up screwing
- * up the exclusive counts. Then whenever we go to process the delete
- * everything will be grand and we can account for whatever exclusive
- * changes need to be made there. We also have to pass in old_roots so
- * we have an accurate count of the roots as it pertains to this
- * operations view of the world.
- */
- ret = qgroup_account_deleted_refs(fs_info, oper, tmp, qgroups, seq,
- &old_roots);
- if (ret < 0)
- goto out;
+ qgroup_update_counters(fs_info, qgroups, nr_old_roots, nr_new_roots,
+ num_bytes, seq);
/*
- * We are adding our root, need to adjust up the number of roots,
- * otherwise old_roots is the number of roots we want.
+ * Bump qgroup_seq to avoid seq overlap
*/
- if (oper->type == BTRFS_QGROUP_OPER_ADD_SHARED) {
- new_roots = old_roots + 1;
- } else {
- new_roots = old_roots;
- old_roots++;
- }
- fs_info->qgroup_seq += old_roots + 1;
-
-
- /*
- * And now the magic happens, bless Arne for having a pretty elegant
- * solution for this.
- */
- qgroup_adjust_counters(fs_info, oper->ref_root, oper->num_bytes,
- qgroups, seq, old_roots, new_roots, 0);
+ fs_info->qgroup_seq += max(nr_old_roots, nr_new_roots) + 1;
out:
spin_unlock(&fs_info->qgroup_lock);
- ulist_free(qgroups);
- ulist_free(roots);
+out_free:
ulist_free(tmp);
+ ulist_free(qgroups);
+ ulist_free(old_roots);
+ ulist_free(new_roots);
return ret;
}
-/*
- * Process a reference to a shared subtree. This type of operation is
- * queued during snapshot removal when we encounter extents which are
- * shared between more than one root.
- */
-static int qgroup_subtree_accounting(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
- struct btrfs_qgroup_operation *oper)
-{
- struct ulist *roots = NULL;
- struct ulist_node *unode;
- struct ulist_iterator uiter;
- struct btrfs_qgroup_list *glist;
- struct ulist *parents;
- int ret = 0;
- int err;
- struct btrfs_qgroup *qg;
- u64 root_obj = 0;
- struct seq_list elem = SEQ_LIST_INIT(elem);
-
- parents = ulist_alloc(GFP_NOFS);
- if (!parents)
- return -ENOMEM;
-
- btrfs_get_tree_mod_seq(fs_info, &elem);
- ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr,
- elem.seq, &roots);
- btrfs_put_tree_mod_seq(fs_info, &elem);
- if (ret < 0)
- goto out;
-
- if (roots->nnodes != 1)
- goto out;
-
- ULIST_ITER_INIT(&uiter);
- unode = ulist_next(roots, &uiter); /* Only want 1 so no need to loop */
- /*
- * If we find our ref root then that means all refs
- * this extent has to the root have not yet been
- * deleted. In that case, we do nothing and let the
- * last ref for this bytenr drive our update.
- *
- * This can happen for example if an extent is
- * referenced multiple times in a snapshot (clone,
- * etc). If we are in the middle of snapshot removal,
- * queued updates for such an extent will find the
- * root if we have not yet finished removing the
- * snapshot.
- */
- if (unode->val == oper->ref_root)
- goto out;
-
- root_obj = unode->val;
- BUG_ON(!root_obj);
-
- spin_lock(&fs_info->qgroup_lock);
- qg = find_qgroup_rb(fs_info, root_obj);
- if (!qg)
- goto out_unlock;
-
- qg->excl += oper->num_bytes;
- qg->excl_cmpr += oper->num_bytes;
- qgroup_dirty(fs_info, qg);
-
- /*
- * Adjust counts for parent groups. First we find all
- * parents, then in the 2nd loop we do the adjustment
- * while adding parents of the parents to our ulist.
- */
- list_for_each_entry(glist, &qg->groups, next_group) {
- err = ulist_add(parents, glist->group->qgroupid,
- ptr_to_u64(glist->group), GFP_ATOMIC);
- if (err < 0) {
- ret = err;
- goto out_unlock;
- }
- }
-
- ULIST_ITER_INIT(&uiter);
- while ((unode = ulist_next(parents, &uiter))) {
- qg = u64_to_ptr(unode->aux);
- qg->excl += oper->num_bytes;
- qg->excl_cmpr += oper->num_bytes;
- qgroup_dirty(fs_info, qg);
-
- /* Add any parents of the parents */
- list_for_each_entry(glist, &qg->groups, next_group) {
- err = ulist_add(parents, glist->group->qgroupid,
- ptr_to_u64(glist->group), GFP_ATOMIC);
- if (err < 0) {
- ret = err;
- goto out_unlock;
- }
- }
- }
-
-out_unlock:
- spin_unlock(&fs_info->qgroup_lock);
-
-out:
- ulist_free(roots);
- ulist_free(parents);
- return ret;
-}
-
-/*
- * btrfs_qgroup_account_ref is called for every ref that is added to or deleted
- * from the fs. First, all roots referencing the extent are searched, and
- * then the space is accounted accordingly to the different roots. The
- * accounting algorithm works in 3 steps documented inline.
- */
-static int btrfs_qgroup_account(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
- struct btrfs_qgroup_operation *oper)
+int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info)
{
+ struct btrfs_qgroup_extent_record *record;
+ struct btrfs_delayed_ref_root *delayed_refs;
+ struct ulist *new_roots = NULL;
+ struct rb_node *node;
+ u64 qgroup_to_skip;
int ret = 0;
- if (!fs_info->quota_enabled)
- return 0;
-
- BUG_ON(!fs_info->quota_root);
+ delayed_refs = &trans->transaction->delayed_refs;
+ qgroup_to_skip = delayed_refs->qgroup_to_skip;
+ while ((node = rb_first(&delayed_refs->dirty_extent_root))) {
+ record = rb_entry(node, struct btrfs_qgroup_extent_record,
+ node);
- mutex_lock(&fs_info->qgroup_rescan_lock);
- if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
- if (fs_info->qgroup_rescan_progress.objectid <= oper->bytenr) {
- mutex_unlock(&fs_info->qgroup_rescan_lock);
- return 0;
+ if (!ret) {
+ /*
+ * Use (u64)-1 as time_seq to do special search, which
+ * doesn't lock tree or delayed_refs and search current
+ * root. It's safe inside commit_transaction().
+ */
+ ret = btrfs_find_all_roots(trans, fs_info,
+ record->bytenr, (u64)-1, &new_roots);
+ if (ret < 0)
+ goto cleanup;
+ if (qgroup_to_skip)
+ ulist_del(new_roots, qgroup_to_skip, 0);
+ ret = btrfs_qgroup_account_extent(trans, fs_info,
+ record->bytenr, record->num_bytes,
+ record->old_roots, new_roots);
+ record->old_roots = NULL;
+ new_roots = NULL;
}
- }
- mutex_unlock(&fs_info->qgroup_rescan_lock);
+cleanup:
+ ulist_free(record->old_roots);
+ ulist_free(new_roots);
+ new_roots = NULL;
+ rb_erase(node, &delayed_refs->dirty_extent_root);
+ kfree(record);
- ASSERT(is_fstree(oper->ref_root));
-
- trace_btrfs_qgroup_account(oper);
-
- switch (oper->type) {
- case BTRFS_QGROUP_OPER_ADD_EXCL:
- case BTRFS_QGROUP_OPER_SUB_EXCL:
- ret = qgroup_excl_accounting(fs_info, oper);
- break;
- case BTRFS_QGROUP_OPER_ADD_SHARED:
- case BTRFS_QGROUP_OPER_SUB_SHARED:
- ret = qgroup_shared_accounting(trans, fs_info, oper);
- break;
- case BTRFS_QGROUP_OPER_SUB_SUBTREE:
- ret = qgroup_subtree_accounting(trans, fs_info, oper);
- break;
- default:
- ASSERT(0);
- }
- return ret;
-}
-
-/*
- * Needs to be called everytime we run delayed refs, even if there is an error
- * in order to cleanup outstanding operations.
- */
-int btrfs_delayed_qgroup_accounting(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info)
-{
- struct btrfs_qgroup_operation *oper;
- int ret = 0;
-
- while (!list_empty(&trans->qgroup_ref_list)) {
- oper = list_first_entry(&trans->qgroup_ref_list,
- struct btrfs_qgroup_operation, list);
- list_del_init(&oper->list);
- if (!ret || !trans->aborted)
- ret = btrfs_qgroup_account(trans, fs_info, oper);
- spin_lock(&fs_info->qgroup_op_lock);
- rb_erase(&oper->n, &fs_info->qgroup_op_tree);
- spin_unlock(&fs_info->qgroup_op_lock);
- btrfs_put_tree_mod_seq(fs_info, &oper->elem);
- kfree(oper);
}
return ret;
}
@@ -2637,15 +2150,13 @@ void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
*/
static int
qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
- struct btrfs_trans_handle *trans, struct ulist *qgroups,
- struct ulist *tmp, struct extent_buffer *scratch_leaf)
+ struct btrfs_trans_handle *trans,
+ struct extent_buffer *scratch_leaf)
{
struct btrfs_key found;
struct ulist *roots = NULL;
struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem);
u64 num_bytes;
- u64 seq;
- int new_roots;
int slot;
int ret;
@@ -2695,33 +2206,15 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
else
num_bytes = found.offset;
- ulist_reinit(qgroups);
ret = btrfs_find_all_roots(NULL, fs_info, found.objectid, 0,
&roots);
if (ret < 0)
goto out;
- spin_lock(&fs_info->qgroup_lock);
- seq = fs_info->qgroup_seq;
- fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
-
- new_roots = 0;
- ret = qgroup_calc_old_refcnt(fs_info, 0, tmp, roots, qgroups,
- seq, &new_roots, 1);
- if (ret < 0) {
- spin_unlock(&fs_info->qgroup_lock);
- ulist_free(roots);
- goto out;
- }
-
- ret = qgroup_adjust_counters(fs_info, 0, num_bytes, qgroups,
- seq, 0, new_roots, 1);
- if (ret < 0) {
- spin_unlock(&fs_info->qgroup_lock);
- ulist_free(roots);
+ /* For rescan, just pass old_roots as NULL */
+ ret = btrfs_qgroup_account_extent(trans, fs_info,
+ found.objectid, num_bytes, NULL, roots);
+ if (ret < 0)
goto out;
- }
- spin_unlock(&fs_info->qgroup_lock);
- ulist_free(roots);
}
out:
btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
@@ -2735,7 +2228,6 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
qgroup_rescan_work);
struct btrfs_path *path;
struct btrfs_trans_handle *trans = NULL;
- struct ulist *tmp = NULL, *qgroups = NULL;
struct extent_buffer *scratch_leaf = NULL;
int err = -ENOMEM;
int ret = 0;
@@ -2743,12 +2235,6 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
path = btrfs_alloc_path();
if (!path)
goto out;
- qgroups = ulist_alloc(GFP_NOFS);
- if (!qgroups)
- goto out;
- tmp = ulist_alloc(GFP_NOFS);
- if (!tmp)
- goto out;
scratch_leaf = kmalloc(sizeof(*scratch_leaf), GFP_NOFS);
if (!scratch_leaf)
goto out;
@@ -2764,7 +2250,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
err = -EINTR;
} else {
err = qgroup_rescan_leaf(fs_info, path, trans,
- qgroups, tmp, scratch_leaf);
+ scratch_leaf);
}
if (err > 0)
btrfs_commit_transaction(trans, fs_info->fs_root);
@@ -2774,8 +2260,6 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
out:
kfree(scratch_leaf);
- ulist_free(qgroups);
- ulist_free(tmp);
btrfs_free_path(path);
mutex_lock(&fs_info->qgroup_rescan_lock);
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index c5242aa9a4b2..6387dcfa354c 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -19,43 +19,18 @@
#ifndef __BTRFS_QGROUP__
#define __BTRFS_QGROUP__
+#include "ulist.h"
+#include "delayed-ref.h"
+
/*
- * A description of the operations, all of these operations only happen when we
- * are adding the 1st reference for that subvolume in the case of adding space
- * or on the last reference delete in the case of subtraction. The only
- * exception is the last one, which is added for confusion.
- *
- * BTRFS_QGROUP_OPER_ADD_EXCL: adding bytes where this subvolume is the only
- * one pointing at the bytes we are adding. This is called on the first
- * allocation.
- *
- * BTRFS_QGROUP_OPER_ADD_SHARED: adding bytes where this bytenr is going to be
- * shared between subvols. This is called on the creation of a ref that already
- * has refs from a different subvolume, so basically reflink.
- *
- * BTRFS_QGROUP_OPER_SUB_EXCL: removing bytes where this subvolume is the only
- * one referencing the range.
- *
- * BTRFS_QGROUP_OPER_SUB_SHARED: removing bytes where this subvolume shares with
- * refs with other subvolumes.
+ * Record a dirty extent, and info qgroup to update quota on it
+ * TODO: Use kmem cache to alloc it.
*/
-enum btrfs_qgroup_operation_type {
- BTRFS_QGROUP_OPER_ADD_EXCL,
- BTRFS_QGROUP_OPER_ADD_SHARED,
- BTRFS_QGROUP_OPER_SUB_EXCL,
- BTRFS_QGROUP_OPER_SUB_SHARED,
- BTRFS_QGROUP_OPER_SUB_SUBTREE,
-};
-
-struct btrfs_qgroup_operation {
- u64 ref_root;
+struct btrfs_qgroup_extent_record {
+ struct rb_node node;
u64 bytenr;
u64 num_bytes;
- u64 seq;
- enum btrfs_qgroup_operation_type type;
- struct seq_list elem;
- struct rb_node n;
- struct list_head list;
+ struct ulist *old_roots;
};
int btrfs_quota_enable(struct btrfs_trans_handle *trans,
@@ -79,16 +54,18 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info);
void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info);
struct btrfs_delayed_extent_op;
-int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 ref_root,
+int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info);
+struct btrfs_qgroup_extent_record
+*btrfs_qgroup_insert_dirty_extent(struct btrfs_delayed_ref_root *delayed_refs,
+ struct btrfs_qgroup_extent_record *record);
+int
+btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
u64 bytenr, u64 num_bytes,
- enum btrfs_qgroup_operation_type type,
- int mod_seq);
-int btrfs_delayed_qgroup_accounting(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info);
-void btrfs_remove_qgroup_operation(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
- struct btrfs_qgroup_operation *oper);
+ struct ulist *old_roots, struct ulist *new_roots);
+int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info);
int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 74b24b01d574..827951fbf7fc 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1847,8 +1847,10 @@ again:
}
eb = read_tree_block(dest, old_bytenr, old_ptr_gen);
- if (!eb || !extent_buffer_uptodate(eb)) {
- ret = (!eb) ? -ENOMEM : -EIO;
+ if (IS_ERR(eb)) {
+ ret = PTR_ERR(eb);
+ } else if (!extent_buffer_uptodate(eb)) {
+ ret = -EIO;
free_extent_buffer(eb);
break;
}
@@ -2002,7 +2004,9 @@ int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
bytenr = btrfs_node_blockptr(eb, path->slots[i]);
eb = read_tree_block(root, bytenr, ptr_gen);
- if (!eb || !extent_buffer_uptodate(eb)) {
+ if (IS_ERR(eb)) {
+ return PTR_ERR(eb);
+ } else if (!extent_buffer_uptodate(eb)) {
free_extent_buffer(eb);
return -EIO;
}
@@ -2710,7 +2714,10 @@ static int do_relocation(struct btrfs_trans_handle *trans,
blocksize = root->nodesize;
generation = btrfs_node_ptr_generation(upper->eb, slot);
eb = read_tree_block(root, bytenr, generation);
- if (!eb || !extent_buffer_uptodate(eb)) {
+ if (IS_ERR(eb)) {
+ err = PTR_ERR(eb);
+ goto next;
+ } else if (!extent_buffer_uptodate(eb)) {
free_extent_buffer(eb);
err = -EIO;
goto next;
@@ -2873,7 +2880,9 @@ static int get_tree_block_key(struct reloc_control *rc,
BUG_ON(block->key_ready);
eb = read_tree_block(rc->extent_root, block->bytenr,
block->key.offset);
- if (!eb || !extent_buffer_uptodate(eb)) {
+ if (IS_ERR(eb)) {
+ return PTR_ERR(eb);
+ } else if (!extent_buffer_uptodate(eb)) {
free_extent_buffer(eb);
return -EIO;
}
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index ab5811545a98..9f2feabe99f2 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -2662,18 +2662,30 @@ static void scrub_free_parity(struct scrub_parity *sparity)
kfree(sparity);
}
+static void scrub_parity_bio_endio_worker(struct btrfs_work *work)
+{
+ struct scrub_parity *sparity = container_of(work, struct scrub_parity,
+ work);
+ struct scrub_ctx *sctx = sparity->sctx;
+
+ scrub_free_parity(sparity);
+ scrub_pending_bio_dec(sctx);
+}
+
static void scrub_parity_bio_endio(struct bio *bio, int error)
{
struct scrub_parity *sparity = (struct scrub_parity *)bio->bi_private;
- struct scrub_ctx *sctx = sparity->sctx;
if (error)
bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
sparity->nsectors);
- scrub_free_parity(sparity);
- scrub_pending_bio_dec(sctx);
bio_put(bio);
+
+ btrfs_init_work(&sparity->work, btrfs_scrubparity_helper,
+ scrub_parity_bio_endio_worker, NULL, NULL);
+ btrfs_queue_work(sparity->sctx->dev_root->fs_info->scrub_parity_workers,
+ &sparity->work);
}
static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
@@ -3589,6 +3601,13 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
ret = -ENOMEM;
goto out;
}
+ fs_info->scrub_parity_workers =
+ btrfs_alloc_workqueue("btrfs-scrubparity", flags,
+ max_active, 2);
+ if (!fs_info->scrub_parity_workers) {
+ ret = -ENOMEM;
+ goto out;
+ }
}
++fs_info->scrub_workers_refcnt;
out:
@@ -3601,6 +3620,7 @@ static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info)
btrfs_destroy_workqueue(fs_info->scrub_workers);
btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers);
btrfs_destroy_workqueue(fs_info->scrub_nocow_workers);
+ btrfs_destroy_workqueue(fs_info->scrub_parity_workers);
}
WARN_ON(fs_info->scrub_workers_refcnt < 0);
}
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index a1216f9b4917..aa72bfd28f7d 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -243,6 +243,7 @@ struct waiting_dir_move {
* after this directory is moved, we can try to rmdir the ino rmdir_ino.
*/
u64 rmdir_ino;
+ bool orphanized;
};
struct orphan_dir_info {
@@ -1158,6 +1159,9 @@ struct backref_ctx {
/* may be truncated in case it's the last extent in a file */
u64 extent_len;
+ /* data offset in the file extent item */
+ u64 data_offset;
+
/* Just to check for bugs in backref resolving */
int found_itself;
};
@@ -1221,7 +1225,7 @@ static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_)
if (ret < 0)
return ret;
- if (offset + bctx->extent_len > i_size)
+ if (offset + bctx->data_offset + bctx->extent_len > i_size)
return 0;
/*
@@ -1363,6 +1367,19 @@ static int find_extent_clone(struct send_ctx *sctx,
backref_ctx->cur_offset = data_offset;
backref_ctx->found_itself = 0;
backref_ctx->extent_len = num_bytes;
+ /*
+ * For non-compressed extents iterate_extent_inodes() gives us extent
+ * offsets that already take into account the data offset, but not for
+ * compressed extents, since the offset is logical and not relative to
+ * the physical extent locations. We must take this into account to
+ * avoid sending clone offsets that go beyond the source file's size,
+ * which would result in the clone ioctl failing with -EINVAL on the
+ * receiving end.
+ */
+ if (compressed == BTRFS_COMPRESS_NONE)
+ backref_ctx->data_offset = 0;
+ else
+ backref_ctx->data_offset = btrfs_file_extent_offset(eb, fi);
/*
* The last extent of a file may be too large due to page alignment.
@@ -1900,8 +1917,13 @@ static int did_overwrite_ref(struct send_ctx *sctx,
goto out;
}
- /* we know that it is or will be overwritten. check this now */
- if (ow_inode < sctx->send_progress)
+ /*
+ * We know that it is or will be overwritten. Check this now.
+ * The current inode being processed might have been the one that caused
+ * inode 'ino' to be orphanized, therefore ow_inode can actually be the
+ * same as sctx->send_progress.
+ */
+ if (ow_inode <= sctx->send_progress)
ret = 1;
else
ret = 0;
@@ -2223,6 +2245,8 @@ static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen,
fs_path_reset(dest);
while (!stop && ino != BTRFS_FIRST_FREE_OBJECTID) {
+ struct waiting_dir_move *wdm;
+
fs_path_reset(name);
if (is_waiting_for_rm(sctx, ino)) {
@@ -2233,7 +2257,11 @@ static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen,
break;
}
- if (is_waiting_for_move(sctx, ino)) {
+ wdm = get_waiting_dir_move(sctx, ino);
+ if (wdm && wdm->orphanized) {
+ ret = gen_unique_name(sctx, ino, gen, name);
+ stop = 1;
+ } else if (wdm) {
ret = get_first_ref(sctx->parent_root, ino,
&parent_inode, &parent_gen, name);
} else {
@@ -2328,8 +2356,12 @@ static int send_subvol_begin(struct send_ctx *sctx)
TLV_PUT_U64(sctx, BTRFS_SEND_A_CTRANSID,
le64_to_cpu(sctx->send_root->root_item.ctransid));
if (parent_root) {
- TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
- sctx->parent_root->root_item.uuid);
+ if (!btrfs_is_empty_uuid(parent_root->root_item.received_uuid))
+ TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
+ parent_root->root_item.received_uuid);
+ else
+ TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
+ parent_root->root_item.uuid);
TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID,
le64_to_cpu(sctx->parent_root->root_item.ctransid));
}
@@ -2923,7 +2955,7 @@ static int is_waiting_for_move(struct send_ctx *sctx, u64 ino)
return entry != NULL;
}
-static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino)
+static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino, bool orphanized)
{
struct rb_node **p = &sctx->waiting_dir_moves.rb_node;
struct rb_node *parent = NULL;
@@ -2934,6 +2966,7 @@ static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino)
return -ENOMEM;
dm->ino = ino;
dm->rmdir_ino = 0;
+ dm->orphanized = orphanized;
while (*p) {
parent = *p;
@@ -3030,7 +3063,7 @@ static int add_pending_dir_move(struct send_ctx *sctx,
goto out;
}
- ret = add_waiting_dir_move(sctx, pm->ino);
+ ret = add_waiting_dir_move(sctx, pm->ino, is_orphan);
if (ret)
goto out;
@@ -3353,8 +3386,40 @@ out:
return ret;
}
+/*
+ * Check if ino ino1 is an ancestor of inode ino2 in the given root.
+ * Return 1 if true, 0 if false and < 0 on error.
+ */
+static int is_ancestor(struct btrfs_root *root,
+ const u64 ino1,
+ const u64 ino1_gen,
+ const u64 ino2,
+ struct fs_path *fs_path)
+{
+ u64 ino = ino2;
+
+ while (ino > BTRFS_FIRST_FREE_OBJECTID) {
+ int ret;
+ u64 parent;
+ u64 parent_gen;
+
+ fs_path_reset(fs_path);
+ ret = get_first_ref(root, ino, &parent, &parent_gen, fs_path);
+ if (ret < 0) {
+ if (ret == -ENOENT && ino == ino2)
+ ret = 0;
+ return ret;
+ }
+ if (parent == ino1)
+ return parent_gen == ino1_gen ? 1 : 0;
+ ino = parent;
+ }
+ return 0;
+}
+
static int wait_for_parent_move(struct send_ctx *sctx,
- struct recorded_ref *parent_ref)
+ struct recorded_ref *parent_ref,
+ const bool is_orphan)
{
int ret = 0;
u64 ino = parent_ref->dir;
@@ -3374,11 +3439,24 @@ static int wait_for_parent_move(struct send_ctx *sctx,
* Our current directory inode may not yet be renamed/moved because some
* ancestor (immediate or not) has to be renamed/moved first. So find if
* such ancestor exists and make sure our own rename/move happens after
- * that ancestor is processed.
+ * that ancestor is processed to avoid path build infinite loops (done
+ * at get_cur_path()).
*/
while (ino > BTRFS_FIRST_FREE_OBJECTID) {
if (is_waiting_for_move(sctx, ino)) {
- ret = 1;
+ /*
+ * If the current inode is an ancestor of ino in the
+ * parent root, we need to delay the rename of the
+ * current inode, otherwise don't delayed the rename
+ * because we can end up with a circular dependency
+ * of renames, resulting in some directories never
+ * getting the respective rename operations issued in
+ * the send stream or getting into infinite path build
+ * loops.
+ */
+ ret = is_ancestor(sctx->parent_root,
+ sctx->cur_ino, sctx->cur_inode_gen,
+ ino, path_before);
break;
}
@@ -3420,7 +3498,7 @@ out:
ino,
&sctx->new_refs,
&sctx->deleted_refs,
- false);
+ is_orphan);
if (!ret)
ret = 1;
}
@@ -3589,6 +3667,17 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
}
}
+ if (S_ISDIR(sctx->cur_inode_mode) && sctx->parent_root &&
+ can_rename) {
+ ret = wait_for_parent_move(sctx, cur, is_orphan);
+ if (ret < 0)
+ goto out;
+ if (ret == 1) {
+ can_rename = false;
+ *pending_move = 1;
+ }
+ }
+
/*
* link/move the ref to the new place. If we have an orphan
* inode, move it and update valid_path. If not, link or move
@@ -3609,18 +3698,11 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
* dirs, we always have one new and one deleted
* ref. The deleted ref is ignored later.
*/
- ret = wait_for_parent_move(sctx, cur);
- if (ret < 0)
- goto out;
- if (ret) {
- *pending_move = 1;
- } else {
- ret = send_rename(sctx, valid_path,
- cur->full_path);
- if (!ret)
- ret = fs_path_copy(valid_path,
- cur->full_path);
- }
+ ret = send_rename(sctx, valid_path,
+ cur->full_path);
+ if (!ret)
+ ret = fs_path_copy(valid_path,
+ cur->full_path);
if (ret < 0)
goto out;
} else {
@@ -4508,8 +4590,21 @@ verbose_printk("btrfs: send_clone offset=%llu, len=%d, clone_root=%llu, "
if (ret < 0)
goto out;
- TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
- clone_root->root->root_item.uuid);
+ /*
+ * If the parent we're using has a received_uuid set then use that as
+ * our clone source as that is what we will look for when doing a
+ * receive.
+ *
+ * This covers the case that we create a snapshot off of a received
+ * subvolume and then use that as the parent and try to receive on a
+ * different host.
+ */
+ if (!btrfs_is_empty_uuid(clone_root->root->root_item.received_uuid))
+ TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
+ clone_root->root->root_item.received_uuid);
+ else
+ TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
+ clone_root->root->root_item.uuid);
TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID,
le64_to_cpu(clone_root->root->root_item.ctransid));
TLV_PUT_PATH(sctx, BTRFS_SEND_A_CLONE_PATH, p);
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 9e66f5e724db..cd7ef34d2dce 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -135,6 +135,7 @@ static void btrfs_handle_error(struct btrfs_fs_info *fs_info)
* __btrfs_std_error decodes expected errors from the caller and
* invokes the approciate error response.
*/
+__cold
void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
unsigned int line, int errno, const char *fmt, ...)
{
@@ -247,18 +248,11 @@ void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
* We'll complete the cleanup in btrfs_end_transaction and
* btrfs_commit_transaction.
*/
+__cold
void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root, const char *function,
unsigned int line, int errno)
{
- /*
- * Report first abort since mount
- */
- if (!test_and_set_bit(BTRFS_FS_STATE_TRANS_ABORTED,
- &root->fs_info->fs_state)) {
- WARN(1, KERN_DEBUG "BTRFS: Transaction aborted (error %d)\n",
- errno);
- }
trans->aborted = errno;
/* Nothing used. The other threads that have joined this
* transaction may be able to continue. */
@@ -281,6 +275,7 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
* __btrfs_panic decodes unexpected, fatal errors from the caller,
* issues an alert, and either panics or BUGs, depending on mount options.
*/
+__cold
void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function,
unsigned int line, int errno, const char *fmt, ...)
{
@@ -841,33 +836,153 @@ out:
return error;
}
-static struct dentry *get_default_root(struct super_block *sb,
- u64 subvol_objectid)
+static char *get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info,
+ u64 subvol_objectid)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(sb);
struct btrfs_root *root = fs_info->tree_root;
- struct btrfs_root *new_root;
- struct btrfs_dir_item *di;
- struct btrfs_path *path;
- struct btrfs_key location;
- struct inode *inode;
- u64 dir_id;
- int new = 0;
+ struct btrfs_root *fs_root;
+ struct btrfs_root_ref *root_ref;
+ struct btrfs_inode_ref *inode_ref;
+ struct btrfs_key key;
+ struct btrfs_path *path = NULL;
+ char *name = NULL, *ptr;
+ u64 dirid;
+ int len;
+ int ret;
+
+ path = btrfs_alloc_path();
+ if (!path) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ path->leave_spinning = 1;
+
+ name = kmalloc(PATH_MAX, GFP_NOFS);
+ if (!name) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ ptr = name + PATH_MAX - 1;
+ ptr[0] = '\0';
/*
- * We have a specific subvol we want to mount, just setup location and
- * go look up the root.
+ * Walk up the subvolume trees in the tree of tree roots by root
+ * backrefs until we hit the top-level subvolume.
*/
- if (subvol_objectid) {
- location.objectid = subvol_objectid;
- location.type = BTRFS_ROOT_ITEM_KEY;
- location.offset = (u64)-1;
- goto find_root;
+ while (subvol_objectid != BTRFS_FS_TREE_OBJECTID) {
+ key.objectid = subvol_objectid;
+ key.type = BTRFS_ROOT_BACKREF_KEY;
+ key.offset = (u64)-1;
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0) {
+ goto err;
+ } else if (ret > 0) {
+ ret = btrfs_previous_item(root, path, subvol_objectid,
+ BTRFS_ROOT_BACKREF_KEY);
+ if (ret < 0) {
+ goto err;
+ } else if (ret > 0) {
+ ret = -ENOENT;
+ goto err;
+ }
+ }
+
+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+ subvol_objectid = key.offset;
+
+ root_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_root_ref);
+ len = btrfs_root_ref_name_len(path->nodes[0], root_ref);
+ ptr -= len + 1;
+ if (ptr < name) {
+ ret = -ENAMETOOLONG;
+ goto err;
+ }
+ read_extent_buffer(path->nodes[0], ptr + 1,
+ (unsigned long)(root_ref + 1), len);
+ ptr[0] = '/';
+ dirid = btrfs_root_ref_dirid(path->nodes[0], root_ref);
+ btrfs_release_path(path);
+
+ key.objectid = subvol_objectid;
+ key.type = BTRFS_ROOT_ITEM_KEY;
+ key.offset = (u64)-1;
+ fs_root = btrfs_read_fs_root_no_name(fs_info, &key);
+ if (IS_ERR(fs_root)) {
+ ret = PTR_ERR(fs_root);
+ goto err;
+ }
+
+ /*
+ * Walk up the filesystem tree by inode refs until we hit the
+ * root directory.
+ */
+ while (dirid != BTRFS_FIRST_FREE_OBJECTID) {
+ key.objectid = dirid;
+ key.type = BTRFS_INODE_REF_KEY;
+ key.offset = (u64)-1;
+
+ ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0);
+ if (ret < 0) {
+ goto err;
+ } else if (ret > 0) {
+ ret = btrfs_previous_item(fs_root, path, dirid,
+ BTRFS_INODE_REF_KEY);
+ if (ret < 0) {
+ goto err;
+ } else if (ret > 0) {
+ ret = -ENOENT;
+ goto err;
+ }
+ }
+
+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+ dirid = key.offset;
+
+ inode_ref = btrfs_item_ptr(path->nodes[0],
+ path->slots[0],
+ struct btrfs_inode_ref);
+ len = btrfs_inode_ref_name_len(path->nodes[0],
+ inode_ref);
+ ptr -= len + 1;
+ if (ptr < name) {
+ ret = -ENAMETOOLONG;
+ goto err;
+ }
+ read_extent_buffer(path->nodes[0], ptr + 1,
+ (unsigned long)(inode_ref + 1), len);
+ ptr[0] = '/';
+ btrfs_release_path(path);
+ }
}
+ btrfs_free_path(path);
+ if (ptr == name + PATH_MAX - 1) {
+ name[0] = '/';
+ name[1] = '\0';
+ } else {
+ memmove(name, ptr, name + PATH_MAX - ptr);
+ }
+ return name;
+
+err:
+ btrfs_free_path(path);
+ kfree(name);
+ return ERR_PTR(ret);
+}
+
+static int get_default_subvol_objectid(struct btrfs_fs_info *fs_info, u64 *objectid)
+{
+ struct btrfs_root *root = fs_info->tree_root;
+ struct btrfs_dir_item *di;
+ struct btrfs_path *path;
+ struct btrfs_key location;
+ u64 dir_id;
+
path = btrfs_alloc_path();
if (!path)
- return ERR_PTR(-ENOMEM);
+ return -ENOMEM;
path->leave_spinning = 1;
/*
@@ -879,58 +994,23 @@ static struct dentry *get_default_root(struct super_block *sb,
di = btrfs_lookup_dir_item(NULL, root, path, dir_id, "default", 7, 0);
if (IS_ERR(di)) {
btrfs_free_path(path);
- return ERR_CAST(di);
+ return PTR_ERR(di);
}
if (!di) {
/*
* Ok the default dir item isn't there. This is weird since
* it's always been there, but don't freak out, just try and
- * mount to root most subvolume.
+ * mount the top-level subvolume.
*/
btrfs_free_path(path);
- dir_id = BTRFS_FIRST_FREE_OBJECTID;
- new_root = fs_info->fs_root;
- goto setup_root;
+ *objectid = BTRFS_FS_TREE_OBJECTID;
+ return 0;
}
btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location);
btrfs_free_path(path);
-
-find_root:
- new_root = btrfs_read_fs_root_no_name(fs_info, &location);
- if (IS_ERR(new_root))
- return ERR_CAST(new_root);
-
- if (!(sb->s_flags & MS_RDONLY)) {
- int ret;
- down_read(&fs_info->cleanup_work_sem);
- ret = btrfs_orphan_cleanup(new_root);
- up_read(&fs_info->cleanup_work_sem);
- if (ret)
- return ERR_PTR(ret);
- }
-
- dir_id = btrfs_root_dirid(&new_root->root_item);
-setup_root:
- location.objectid = dir_id;
- location.type = BTRFS_INODE_ITEM_KEY;
- location.offset = 0;
-
- inode = btrfs_iget(sb, &location, new_root, &new);
- if (IS_ERR(inode))
- return ERR_CAST(inode);
-
- /*
- * If we're just mounting the root most subvol put the inode and return
- * a reference to the dentry. We will have already gotten a reference
- * to the inode in btrfs_fill_super so we're good to go.
- */
- if (!new && d_inode(sb->s_root) == inode) {
- iput(inode);
- return dget(sb->s_root);
- }
-
- return d_obtain_root(inode);
+ *objectid = location.objectid;
+ return 0;
}
static int btrfs_fill_super(struct super_block *sb,
@@ -1108,6 +1188,10 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
seq_puts(seq, ",fatal_errors=panic");
if (info->commit_interval != BTRFS_DEFAULT_COMMIT_INTERVAL)
seq_printf(seq, ",commit=%d", info->commit_interval);
+ seq_printf(seq, ",subvolid=%llu",
+ BTRFS_I(d_inode(dentry))->root->root_key.objectid);
+ seq_puts(seq, ",subvol=");
+ seq_dentry(seq, dentry, " \t\n\\");
return 0;
}
@@ -1138,107 +1222,139 @@ static inline int is_subvolume_inode(struct inode *inode)
}
/*
- * This will strip out the subvol=%s argument for an argument string and add
- * subvolid=0 to make sure we get the actual tree root for path walking to the
- * subvol we want.
+ * This will add subvolid=0 to the argument string while removing any subvol=
+ * and subvolid= arguments to make sure we get the top-level root for path
+ * walking to the subvol we want.
*/
static char *setup_root_args(char *args)
{
- unsigned len = strlen(args) + 2 + 1;
- char *src, *dst, *buf;
+ char *buf, *dst, *sep;
- /*
- * We need the same args as before, but with this substitution:
- * s!subvol=[^,]+!subvolid=0!
- *
- * Since the replacement string is up to 2 bytes longer than the
- * original, allocate strlen(args) + 2 + 1 bytes.
- */
+ if (!args)
+ return kstrdup("subvolid=0", GFP_NOFS);
- src = strstr(args, "subvol=");
- /* This shouldn't happen, but just in case.. */
- if (!src)
- return NULL;
-
- buf = dst = kmalloc(len, GFP_NOFS);
+ /* The worst case is that we add ",subvolid=0" to the end. */
+ buf = dst = kmalloc(strlen(args) + strlen(",subvolid=0") + 1, GFP_NOFS);
if (!buf)
return NULL;
- /*
- * If the subvol= arg is not at the start of the string,
- * copy whatever precedes it into buf.
- */
- if (src != args) {
- *src++ = '\0';
- strcpy(buf, args);
- dst += strlen(args);
+ while (1) {
+ sep = strchrnul(args, ',');
+ if (!strstarts(args, "subvol=") &&
+ !strstarts(args, "subvolid=")) {
+ memcpy(dst, args, sep - args);
+ dst += sep - args;
+ *dst++ = ',';
+ }
+ if (*sep)
+ args = sep + 1;
+ else
+ break;
}
-
strcpy(dst, "subvolid=0");
- dst += strlen("subvolid=0");
-
- /*
- * If there is a "," after the original subvol=... string,
- * copy that suffix into our buffer. Otherwise, we're done.
- */
- src = strchr(src, ',');
- if (src)
- strcpy(dst, src);
return buf;
}
-static struct dentry *mount_subvol(const char *subvol_name, int flags,
- const char *device_name, char *data)
+static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid,
+ int flags, const char *device_name,
+ char *data)
{
struct dentry *root;
- struct vfsmount *mnt;
+ struct vfsmount *mnt = NULL;
char *newargs;
+ int ret;
newargs = setup_root_args(data);
- if (!newargs)
- return ERR_PTR(-ENOMEM);
- mnt = vfs_kern_mount(&btrfs_fs_type, flags, device_name,
- newargs);
+ if (!newargs) {
+ root = ERR_PTR(-ENOMEM);
+ goto out;
+ }
- if (PTR_RET(mnt) == -EBUSY) {
+ mnt = vfs_kern_mount(&btrfs_fs_type, flags, device_name, newargs);
+ if (PTR_ERR_OR_ZERO(mnt) == -EBUSY) {
if (flags & MS_RDONLY) {
- mnt = vfs_kern_mount(&btrfs_fs_type, flags & ~MS_RDONLY, device_name,
- newargs);
+ mnt = vfs_kern_mount(&btrfs_fs_type, flags & ~MS_RDONLY,
+ device_name, newargs);
} else {
- int r;
- mnt = vfs_kern_mount(&btrfs_fs_type, flags | MS_RDONLY, device_name,
- newargs);
+ mnt = vfs_kern_mount(&btrfs_fs_type, flags | MS_RDONLY,
+ device_name, newargs);
if (IS_ERR(mnt)) {
- kfree(newargs);
- return ERR_CAST(mnt);
+ root = ERR_CAST(mnt);
+ mnt = NULL;
+ goto out;
}
- r = btrfs_remount(mnt->mnt_sb, &flags, NULL);
- if (r < 0) {
- /* FIXME: release vfsmount mnt ??*/
- kfree(newargs);
- return ERR_PTR(r);
+ down_write(&mnt->mnt_sb->s_umount);
+ ret = btrfs_remount(mnt->mnt_sb, &flags, NULL);
+ up_write(&mnt->mnt_sb->s_umount);
+ if (ret < 0) {
+ root = ERR_PTR(ret);
+ goto out;
}
}
}
+ if (IS_ERR(mnt)) {
+ root = ERR_CAST(mnt);
+ mnt = NULL;
+ goto out;
+ }
- kfree(newargs);
+ if (!subvol_name) {
+ if (!subvol_objectid) {
+ ret = get_default_subvol_objectid(btrfs_sb(mnt->mnt_sb),
+ &subvol_objectid);
+ if (ret) {
+ root = ERR_PTR(ret);
+ goto out;
+ }
+ }
+ subvol_name = get_subvol_name_from_objectid(btrfs_sb(mnt->mnt_sb),
+ subvol_objectid);
+ if (IS_ERR(subvol_name)) {
+ root = ERR_CAST(subvol_name);
+ subvol_name = NULL;
+ goto out;
+ }
- if (IS_ERR(mnt))
- return ERR_CAST(mnt);
+ }
root = mount_subtree(mnt, subvol_name);
+ /* mount_subtree() drops our reference on the vfsmount. */
+ mnt = NULL;
- if (!IS_ERR(root) && !is_subvolume_inode(d_inode(root))) {
+ if (!IS_ERR(root)) {
struct super_block *s = root->d_sb;
- dput(root);
- root = ERR_PTR(-EINVAL);
- deactivate_locked_super(s);
- printk(KERN_ERR "BTRFS: '%s' is not a valid subvolume\n",
- subvol_name);
+ struct inode *root_inode = d_inode(root);
+ u64 root_objectid = BTRFS_I(root_inode)->root->root_key.objectid;
+
+ ret = 0;
+ if (!is_subvolume_inode(root_inode)) {
+ pr_err("BTRFS: '%s' is not a valid subvolume\n",
+ subvol_name);
+ ret = -EINVAL;
+ }
+ if (subvol_objectid && root_objectid != subvol_objectid) {
+ /*
+ * This will also catch a race condition where a
+ * subvolume which was passed by ID is renamed and
+ * another subvolume is renamed over the old location.
+ */
+ pr_err("BTRFS: subvol '%s' does not match subvolid %llu\n",
+ subvol_name, subvol_objectid);
+ ret = -EINVAL;
+ }
+ if (ret) {
+ dput(root);
+ root = ERR_PTR(ret);
+ deactivate_locked_super(s);
+ }
}
+out:
+ mntput(mnt);
+ kfree(newargs);
+ kfree(subvol_name);
return root;
}
@@ -1303,7 +1419,6 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
{
struct block_device *bdev = NULL;
struct super_block *s;
- struct dentry *root;
struct btrfs_fs_devices *fs_devices = NULL;
struct btrfs_fs_info *fs_info = NULL;
struct security_mnt_opts new_sec_opts;
@@ -1323,10 +1438,10 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
return ERR_PTR(error);
}
- if (subvol_name) {
- root = mount_subvol(subvol_name, flags, device_name, data);
- kfree(subvol_name);
- return root;
+ if (subvol_name || subvol_objectid != BTRFS_FS_TREE_OBJECTID) {
+ /* mount_subvol() will free subvol_name. */
+ return mount_subvol(subvol_name, subvol_objectid, flags,
+ device_name, data);
}
security_init_mnt_opts(&new_sec_opts);
@@ -1392,23 +1507,19 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
error = btrfs_fill_super(s, fs_devices, data,
flags & MS_SILENT ? 1 : 0);
}
-
- root = !error ? get_default_root(s, subvol_objectid) : ERR_PTR(error);
- if (IS_ERR(root)) {
+ if (error) {
deactivate_locked_super(s);
- error = PTR_ERR(root);
goto error_sec_opts;
}
fs_info = btrfs_sb(s);
error = setup_security_options(fs_info, s, &new_sec_opts);
if (error) {
- dput(root);
deactivate_locked_super(s);
goto error_sec_opts;
}
- return root;
+ return dget(s->s_root);
error_close_devices:
btrfs_close_devices(fs_devices);
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index e8a4c86d274d..603b0cc2b9bb 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -33,6 +33,7 @@
#include "volumes.h"
static inline struct btrfs_fs_info *to_fs_info(struct kobject *kobj);
+static inline struct btrfs_fs_devices *to_fs_devs(struct kobject *kobj);
static u64 get_features(struct btrfs_fs_info *fs_info,
enum btrfs_feature_set set)
@@ -428,7 +429,7 @@ static ssize_t btrfs_clone_alignment_show(struct kobject *kobj,
BTRFS_ATTR(clone_alignment, btrfs_clone_alignment_show);
-static struct attribute *btrfs_attrs[] = {
+static const struct attribute *btrfs_attrs[] = {
BTRFS_ATTR_PTR(label),
BTRFS_ATTR_PTR(nodesize),
BTRFS_ATTR_PTR(sectorsize),
@@ -438,21 +439,29 @@ static struct attribute *btrfs_attrs[] = {
static void btrfs_release_super_kobj(struct kobject *kobj)
{
- struct btrfs_fs_info *fs_info = to_fs_info(kobj);
- complete(&fs_info->kobj_unregister);
+ struct btrfs_fs_devices *fs_devs = to_fs_devs(kobj);
+
+ memset(&fs_devs->super_kobj, 0, sizeof(struct kobject));
+ complete(&fs_devs->kobj_unregister);
}
static struct kobj_type btrfs_ktype = {
.sysfs_ops = &kobj_sysfs_ops,
.release = btrfs_release_super_kobj,
- .default_attrs = btrfs_attrs,
};
+static inline struct btrfs_fs_devices *to_fs_devs(struct kobject *kobj)
+{
+ if (kobj->ktype != &btrfs_ktype)
+ return NULL;
+ return container_of(kobj, struct btrfs_fs_devices, super_kobj);
+}
+
static inline struct btrfs_fs_info *to_fs_info(struct kobject *kobj)
{
if (kobj->ktype != &btrfs_ktype)
return NULL;
- return container_of(kobj, struct btrfs_fs_info, super_kobj);
+ return to_fs_devs(kobj)->fs_info;
}
#define NUM_FEATURE_BITS 64
@@ -493,12 +502,12 @@ static int addrm_unknown_feature_attrs(struct btrfs_fs_info *fs_info, bool add)
attrs[0] = &fa->kobj_attr.attr;
if (add) {
int ret;
- ret = sysfs_merge_group(&fs_info->super_kobj,
+ ret = sysfs_merge_group(&fs_info->fs_devices->super_kobj,
&agroup);
if (ret)
return ret;
} else
- sysfs_unmerge_group(&fs_info->super_kobj,
+ sysfs_unmerge_group(&fs_info->fs_devices->super_kobj,
&agroup);
}
@@ -506,25 +515,49 @@ static int addrm_unknown_feature_attrs(struct btrfs_fs_info *fs_info, bool add)
return 0;
}
-static void __btrfs_sysfs_remove_one(struct btrfs_fs_info *fs_info)
+static void __btrfs_sysfs_remove_fsid(struct btrfs_fs_devices *fs_devs)
+{
+ if (fs_devs->device_dir_kobj) {
+ kobject_del(fs_devs->device_dir_kobj);
+ kobject_put(fs_devs->device_dir_kobj);
+ fs_devs->device_dir_kobj = NULL;
+ }
+
+ if (fs_devs->super_kobj.state_initialized) {
+ kobject_del(&fs_devs->super_kobj);
+ kobject_put(&fs_devs->super_kobj);
+ wait_for_completion(&fs_devs->kobj_unregister);
+ }
+}
+
+/* when fs_devs is NULL it will remove all fsid kobject */
+void btrfs_sysfs_remove_fsid(struct btrfs_fs_devices *fs_devs)
{
- kobject_del(&fs_info->super_kobj);
- kobject_put(&fs_info->super_kobj);
- wait_for_completion(&fs_info->kobj_unregister);
+ struct list_head *fs_uuids = btrfs_get_fs_uuids();
+
+ if (fs_devs) {
+ __btrfs_sysfs_remove_fsid(fs_devs);
+ return;
+ }
+
+ list_for_each_entry(fs_devs, fs_uuids, list) {
+ __btrfs_sysfs_remove_fsid(fs_devs);
+ }
}
void btrfs_sysfs_remove_one(struct btrfs_fs_info *fs_info)
{
+ btrfs_reset_fs_info_ptr(fs_info);
+
if (fs_info->space_info_kobj) {
sysfs_remove_files(fs_info->space_info_kobj, allocation_attrs);
kobject_del(fs_info->space_info_kobj);
kobject_put(fs_info->space_info_kobj);
}
- kobject_del(fs_info->device_dir_kobj);
- kobject_put(fs_info->device_dir_kobj);
addrm_unknown_feature_attrs(fs_info, false);
- sysfs_remove_group(&fs_info->super_kobj, &btrfs_feature_attr_group);
- __btrfs_sysfs_remove_one(fs_info);
+ sysfs_remove_group(&fs_info->fs_devices->super_kobj, &btrfs_feature_attr_group);
+ sysfs_remove_files(&fs_info->fs_devices->super_kobj, btrfs_attrs);
+ btrfs_kobj_rm_device(fs_info->fs_devices, NULL);
}
const char * const btrfs_feature_set_names[3] = {
@@ -602,40 +635,60 @@ static void init_feature_attrs(void)
}
}
-int btrfs_kobj_rm_device(struct btrfs_fs_info *fs_info,
+/* when one_device is NULL, it removes all device links */
+
+int btrfs_kobj_rm_device(struct btrfs_fs_devices *fs_devices,
struct btrfs_device *one_device)
{
struct hd_struct *disk;
struct kobject *disk_kobj;
- if (!fs_info->device_dir_kobj)
+ if (!fs_devices->device_dir_kobj)
return -EINVAL;
if (one_device && one_device->bdev) {
disk = one_device->bdev->bd_part;
disk_kobj = &part_to_dev(disk)->kobj;
- sysfs_remove_link(fs_info->device_dir_kobj,
+ sysfs_remove_link(fs_devices->device_dir_kobj,
+ disk_kobj->name);
+ }
+
+ if (one_device)
+ return 0;
+
+ list_for_each_entry(one_device,
+ &fs_devices->devices, dev_list) {
+ if (!one_device->bdev)
+ continue;
+ disk = one_device->bdev->bd_part;
+ disk_kobj = &part_to_dev(disk)->kobj;
+
+ sysfs_remove_link(fs_devices->device_dir_kobj,
disk_kobj->name);
}
return 0;
}
-int btrfs_kobj_add_device(struct btrfs_fs_info *fs_info,
- struct btrfs_device *one_device)
+int btrfs_sysfs_add_device(struct btrfs_fs_devices *fs_devs)
{
- int error = 0;
- struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
- struct btrfs_device *dev;
-
- if (!fs_info->device_dir_kobj)
- fs_info->device_dir_kobj = kobject_create_and_add("devices",
- &fs_info->super_kobj);
+ if (!fs_devs->device_dir_kobj)
+ fs_devs->device_dir_kobj = kobject_create_and_add("devices",
+ &fs_devs->super_kobj);
- if (!fs_info->device_dir_kobj)
+ if (!fs_devs->device_dir_kobj)
return -ENOMEM;
+ return 0;
+}
+
+int btrfs_kobj_add_device(struct btrfs_fs_devices *fs_devices,
+ struct btrfs_device *one_device)
+{
+ int error = 0;
+ struct btrfs_device *dev;
+
list_for_each_entry(dev, &fs_devices->devices, dev_list) {
struct hd_struct *disk;
struct kobject *disk_kobj;
@@ -649,7 +702,7 @@ int btrfs_kobj_add_device(struct btrfs_fs_info *fs_info,
disk = dev->bdev->bd_part;
disk_kobj = &part_to_dev(disk)->kobj;
- error = sysfs_create_link(fs_info->device_dir_kobj,
+ error = sysfs_create_link(fs_devices->device_dir_kobj,
disk_kobj, disk_kobj->name);
if (error)
break;
@@ -667,34 +720,51 @@ static struct dentry *btrfs_debugfs_root_dentry;
/* Debugging tunables and exported data */
u64 btrfs_debugfs_test;
+/*
+ * Can be called by the device discovery thread.
+ * And parent can be specified for seed device
+ */
+int btrfs_sysfs_add_fsid(struct btrfs_fs_devices *fs_devs,
+ struct kobject *parent)
+{
+ int error;
+
+ init_completion(&fs_devs->kobj_unregister);
+ fs_devs->super_kobj.kset = btrfs_kset;
+ error = kobject_init_and_add(&fs_devs->super_kobj,
+ &btrfs_ktype, parent, "%pU", fs_devs->fsid);
+ return error;
+}
+
int btrfs_sysfs_add_one(struct btrfs_fs_info *fs_info)
{
int error;
+ struct btrfs_fs_devices *fs_devs = fs_info->fs_devices;
+ struct kobject *super_kobj = &fs_devs->super_kobj;
+
+ btrfs_set_fs_info_ptr(fs_info);
- init_completion(&fs_info->kobj_unregister);
- fs_info->super_kobj.kset = btrfs_kset;
- error = kobject_init_and_add(&fs_info->super_kobj, &btrfs_ktype, NULL,
- "%pU", fs_info->fsid);
+ error = btrfs_kobj_add_device(fs_devs, NULL);
if (error)
return error;
- error = sysfs_create_group(&fs_info->super_kobj,
- &btrfs_feature_attr_group);
+ error = sysfs_create_files(super_kobj, btrfs_attrs);
if (error) {
- __btrfs_sysfs_remove_one(fs_info);
+ btrfs_kobj_rm_device(fs_devs, NULL);
return error;
}
- error = addrm_unknown_feature_attrs(fs_info, true);
+ error = sysfs_create_group(super_kobj,
+ &btrfs_feature_attr_group);
if (error)
goto failure;
- error = btrfs_kobj_add_device(fs_info, NULL);
+ error = addrm_unknown_feature_attrs(fs_info, true);
if (error)
goto failure;
fs_info->space_info_kobj = kobject_create_and_add("allocation",
- &fs_info->super_kobj);
+ super_kobj);
if (!fs_info->space_info_kobj) {
error = -ENOMEM;
goto failure;
diff --git a/fs/btrfs/sysfs.h b/fs/btrfs/sysfs.h
index 3a4bbed723fd..6392527bcc15 100644
--- a/fs/btrfs/sysfs.h
+++ b/fs/btrfs/sysfs.h
@@ -82,8 +82,12 @@ char *btrfs_printable_features(enum btrfs_feature_set set, u64 flags);
extern const char * const btrfs_feature_set_names[3];
extern struct kobj_type space_info_ktype;
extern struct kobj_type btrfs_raid_ktype;
-int btrfs_kobj_add_device(struct btrfs_fs_info *fs_info,
+int btrfs_kobj_add_device(struct btrfs_fs_devices *fs_devices,
struct btrfs_device *one_device);
-int btrfs_kobj_rm_device(struct btrfs_fs_info *fs_info,
+int btrfs_kobj_rm_device(struct btrfs_fs_devices *fs_devices,
struct btrfs_device *one_device);
+int btrfs_sysfs_add_fsid(struct btrfs_fs_devices *fs_devs,
+ struct kobject *parent);
+int btrfs_sysfs_add_device(struct btrfs_fs_devices *fs_devs);
+void btrfs_sysfs_remove_fsid(struct btrfs_fs_devices *fs_devs);
#endif /* _BTRFS_SYSFS_H_ */
diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c
index c32a7ba76bca..846d277b1901 100644
--- a/fs/btrfs/tests/qgroup-tests.c
+++ b/fs/btrfs/tests/qgroup-tests.c
@@ -21,6 +21,7 @@
#include "../transaction.h"
#include "../disk-io.h"
#include "../qgroup.h"
+#include "../backref.h"
static void init_dummy_trans(struct btrfs_trans_handle *trans)
{
@@ -227,6 +228,8 @@ static int test_no_shared_qgroup(struct btrfs_root *root)
{
struct btrfs_trans_handle trans;
struct btrfs_fs_info *fs_info = root->fs_info;
+ struct ulist *old_roots = NULL;
+ struct ulist *new_roots = NULL;
int ret;
init_dummy_trans(&trans);
@@ -238,10 +241,15 @@ static int test_no_shared_qgroup(struct btrfs_root *root)
return ret;
}
- ret = btrfs_qgroup_record_ref(&trans, fs_info, 5, 4096, 4096,
- BTRFS_QGROUP_OPER_ADD_EXCL, 0);
+ /*
+ * Since the test trans doesn't havee the complicated delayed refs,
+ * we can only call btrfs_qgroup_account_extent() directly to test
+ * quota.
+ */
+ ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &old_roots);
if (ret) {
- test_msg("Couldn't add space to a qgroup %d\n", ret);
+ ulist_free(old_roots);
+ test_msg("Couldn't find old roots: %d\n", ret);
return ret;
}
@@ -249,9 +257,18 @@ static int test_no_shared_qgroup(struct btrfs_root *root)
if (ret)
return ret;
- ret = btrfs_delayed_qgroup_accounting(&trans, fs_info);
+ ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &new_roots);
+ if (ret) {
+ ulist_free(old_roots);
+ ulist_free(new_roots);
+ test_msg("Couldn't find old roots: %d\n", ret);
+ return ret;
+ }
+
+ ret = btrfs_qgroup_account_extent(&trans, fs_info, 4096, 4096,
+ old_roots, new_roots);
if (ret) {
- test_msg("Delayed qgroup accounting failed %d\n", ret);
+ test_msg("Couldn't account space for a qgroup %d\n", ret);
return ret;
}
@@ -259,21 +276,32 @@ static int test_no_shared_qgroup(struct btrfs_root *root)
test_msg("Qgroup counts didn't match expected values\n");
return -EINVAL;
}
+ old_roots = NULL;
+ new_roots = NULL;
+
+ ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &old_roots);
+ if (ret) {
+ ulist_free(old_roots);
+ test_msg("Couldn't find old roots: %d\n", ret);
+ return ret;
+ }
ret = remove_extent_item(root, 4096, 4096);
if (ret)
return -EINVAL;
- ret = btrfs_qgroup_record_ref(&trans, fs_info, 5, 4096, 4096,
- BTRFS_QGROUP_OPER_SUB_EXCL, 0);
+ ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &new_roots);
if (ret) {
- test_msg("Couldn't remove space from the qgroup %d\n", ret);
- return -EINVAL;
+ ulist_free(old_roots);
+ ulist_free(new_roots);
+ test_msg("Couldn't find old roots: %d\n", ret);
+ return ret;
}
- ret = btrfs_delayed_qgroup_accounting(&trans, fs_info);
+ ret = btrfs_qgroup_account_extent(&trans, fs_info, 4096, 4096,
+ old_roots, new_roots);
if (ret) {
- test_msg("Qgroup accounting failed %d\n", ret);
+ test_msg("Couldn't account space for a qgroup %d\n", ret);
return -EINVAL;
}
@@ -294,6 +322,8 @@ static int test_multiple_refs(struct btrfs_root *root)
{
struct btrfs_trans_handle trans;
struct btrfs_fs_info *fs_info = root->fs_info;
+ struct ulist *old_roots = NULL;
+ struct ulist *new_roots = NULL;
int ret;
init_dummy_trans(&trans);
@@ -307,20 +337,29 @@ static int test_multiple_refs(struct btrfs_root *root)
return ret;
}
+ ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &old_roots);
+ if (ret) {
+ ulist_free(old_roots);
+ test_msg("Couldn't find old roots: %d\n", ret);
+ return ret;
+ }
+
ret = insert_normal_tree_ref(root, 4096, 4096, 0, 5);
if (ret)
return ret;
- ret = btrfs_qgroup_record_ref(&trans, fs_info, 5, 4096, 4096,
- BTRFS_QGROUP_OPER_ADD_EXCL, 0);
+ ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &new_roots);
if (ret) {
- test_msg("Couldn't add space to a qgroup %d\n", ret);
+ ulist_free(old_roots);
+ ulist_free(new_roots);
+ test_msg("Couldn't find old roots: %d\n", ret);
return ret;
}
- ret = btrfs_delayed_qgroup_accounting(&trans, fs_info);
+ ret = btrfs_qgroup_account_extent(&trans, fs_info, 4096, 4096,
+ old_roots, new_roots);
if (ret) {
- test_msg("Delayed qgroup accounting failed %d\n", ret);
+ test_msg("Couldn't account space for a qgroup %d\n", ret);
return ret;
}
@@ -329,20 +368,29 @@ static int test_multiple_refs(struct btrfs_root *root)
return -EINVAL;
}
+ ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &old_roots);
+ if (ret) {
+ ulist_free(old_roots);
+ test_msg("Couldn't find old roots: %d\n", ret);
+ return ret;
+ }
+
ret = add_tree_ref(root, 4096, 4096, 0, 256);
if (ret)
return ret;
- ret = btrfs_qgroup_record_ref(&trans, fs_info, 256, 4096, 4096,
- BTRFS_QGROUP_OPER_ADD_SHARED, 0);
+ ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &new_roots);
if (ret) {
- test_msg("Qgroup record ref failed %d\n", ret);
+ ulist_free(old_roots);
+ ulist_free(new_roots);
+ test_msg("Couldn't find old roots: %d\n", ret);
return ret;
}
- ret = btrfs_delayed_qgroup_accounting(&trans, fs_info);
+ ret = btrfs_qgroup_account_extent(&trans, fs_info, 4096, 4096,
+ old_roots, new_roots);
if (ret) {
- test_msg("Qgroup accounting failed %d\n", ret);
+ test_msg("Couldn't account space for a qgroup %d\n", ret);
return ret;
}
@@ -356,20 +404,29 @@ static int test_multiple_refs(struct btrfs_root *root)
return -EINVAL;
}
+ ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &old_roots);
+ if (ret) {
+ ulist_free(old_roots);
+ test_msg("Couldn't find old roots: %d\n", ret);
+ return ret;
+ }
+
ret = remove_extent_ref(root, 4096, 4096, 0, 256);
if (ret)
return ret;
- ret = btrfs_qgroup_record_ref(&trans, fs_info, 256, 4096, 4096,
- BTRFS_QGROUP_OPER_SUB_SHARED, 0);
+ ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &new_roots);
if (ret) {
- test_msg("Qgroup record ref failed %d\n", ret);
+ ulist_free(old_roots);
+ ulist_free(new_roots);
+ test_msg("Couldn't find old roots: %d\n", ret);
return ret;
}
- ret = btrfs_delayed_qgroup_accounting(&trans, fs_info);
+ ret = btrfs_qgroup_account_extent(&trans, fs_info, 4096, 4096,
+ old_roots, new_roots);
if (ret) {
- test_msg("Qgroup accounting failed %d\n", ret);
+ test_msg("Couldn't account space for a qgroup %d\n", ret);
return ret;
}
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 5628e25250c0..c0f18e7266b6 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -225,12 +225,14 @@ loop:
cur_trans->dirty_bg_run = 0;
cur_trans->delayed_refs.href_root = RB_ROOT;
+ cur_trans->delayed_refs.dirty_extent_root = RB_ROOT;
atomic_set(&cur_trans->delayed_refs.num_entries, 0);
cur_trans->delayed_refs.num_heads_ready = 0;
cur_trans->delayed_refs.pending_csums = 0;
cur_trans->delayed_refs.num_heads = 0;
cur_trans->delayed_refs.flushing = 0;
cur_trans->delayed_refs.run_delayed_start = 0;
+ cur_trans->delayed_refs.qgroup_to_skip = 0;
/*
* although the tree mod log is per file system and not per transaction,
@@ -509,6 +511,7 @@ again:
h->transaction = cur_trans;
h->blocks_used = 0;
h->bytes_reserved = 0;
+ h->chunk_bytes_reserved = 0;
h->root = root;
h->delayed_ref_updates = 0;
h->use_count = 1;
@@ -792,6 +795,8 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
if (!list_empty(&trans->new_bgs))
btrfs_create_pending_block_groups(trans, root);
+ btrfs_trans_release_chunk_metadata(trans);
+
if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) &&
should_end_transaction(trans, root) &&
ACCESS_ONCE(cur_trans->state) == TRANS_STATE_RUNNING) {
@@ -1290,6 +1295,12 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
if (pending->error)
goto no_free_objectid;
+ /*
+ * Make qgroup to skip current new snapshot's qgroupid, as it is
+ * accounted by later btrfs_qgroup_inherit().
+ */
+ btrfs_set_skip_qgroup(trans, objectid);
+
btrfs_reloc_pre_snapshot(trans, pending, &to_reserve);
if (to_reserve > 0) {
@@ -1298,7 +1309,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
to_reserve,
BTRFS_RESERVE_NO_FLUSH);
if (pending->error)
- goto no_free_objectid;
+ goto clear_skip_qgroup;
}
key.objectid = objectid;
@@ -1396,25 +1407,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
btrfs_abort_transaction(trans, root, ret);
goto fail;
}
-
- /*
- * We need to flush delayed refs in order to make sure all of our quota
- * operations have been done before we call btrfs_qgroup_inherit.
- */
- ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
- if (ret) {
- btrfs_abort_transaction(trans, root, ret);
- goto fail;
- }
-
- ret = btrfs_qgroup_inherit(trans, fs_info,
- root->root_key.objectid,
- objectid, pending->inherit);
- if (ret) {
- btrfs_abort_transaction(trans, root, ret);
- goto fail;
- }
-
/* see comments in should_cow_block() */
set_bit(BTRFS_ROOT_FORCE_COW, &root->state);
smp_wmb();
@@ -1497,11 +1489,37 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
goto fail;
}
}
+
+ ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ goto fail;
+ }
+
+ /*
+ * account qgroup counters before qgroup_inherit()
+ */
+ ret = btrfs_qgroup_prepare_account_extents(trans, fs_info);
+ if (ret)
+ goto fail;
+ ret = btrfs_qgroup_account_extents(trans, fs_info);
+ if (ret)
+ goto fail;
+ ret = btrfs_qgroup_inherit(trans, fs_info,
+ root->root_key.objectid,
+ objectid, pending->inherit);
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ goto fail;
+ }
+
fail:
pending->error = ret;
dir_item_existed:
trans->block_rsv = rsv;
trans->bytes_reserved = 0;
+clear_skip_qgroup:
+ btrfs_clear_skip_qgroup(trans);
no_free_objectid:
kfree(new_root_item);
root_item_alloc_fail:
@@ -1963,6 +1981,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
goto scrub_continue;
}
+ /* Reocrd old roots for later qgroup accounting */
+ ret = btrfs_qgroup_prepare_account_extents(trans, root->fs_info);
+ if (ret) {
+ mutex_unlock(&root->fs_info->reloc_mutex);
+ goto scrub_continue;
+ }
+
/*
* make sure none of the code above managed to slip in a
* delayed item
@@ -2004,6 +2029,17 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
*/
btrfs_free_log_root_tree(trans, root->fs_info);
+ /*
+ * Since fs roots are all committed, we can get a quite accurate
+ * new_roots. So let's do quota accounting.
+ */
+ ret = btrfs_qgroup_account_extents(trans, root->fs_info);
+ if (ret < 0) {
+ mutex_unlock(&root->fs_info->tree_log_mutex);
+ mutex_unlock(&root->fs_info->reloc_mutex);
+ goto scrub_continue;
+ }
+
ret = commit_cowonly_roots(trans, root);
if (ret) {
mutex_unlock(&root->fs_info->tree_log_mutex);
@@ -2054,6 +2090,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
clear_bit(BTRFS_INODE_BTREE_LOG1_ERR, &btree_ino->runtime_flags);
clear_bit(BTRFS_INODE_BTREE_LOG2_ERR, &btree_ino->runtime_flags);
+ btrfs_trans_release_chunk_metadata(trans);
+
spin_lock(&root->fs_info->trans_lock);
cur_trans->state = TRANS_STATE_UNBLOCKED;
root->fs_info->running_transaction = NULL;
@@ -2123,6 +2161,7 @@ scrub_continue:
btrfs_scrub_continue(root);
cleanup_transaction:
btrfs_trans_release_metadata(trans, root);
+ btrfs_trans_release_chunk_metadata(trans);
trans->block_rsv = NULL;
if (trans->qgroup_reserved) {
btrfs_qgroup_free(root, trans->qgroup_reserved);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 0b24755596ba..eb09c2067fa8 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -102,6 +102,7 @@ struct btrfs_transaction {
struct btrfs_trans_handle {
u64 transid;
u64 bytes_reserved;
+ u64 chunk_bytes_reserved;
u64 qgroup_reserved;
unsigned long use_count;
unsigned long blocks_reserved;
@@ -153,6 +154,29 @@ static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans,
spin_unlock(&BTRFS_I(inode)->lock);
}
+/*
+ * Make qgroup codes to skip given qgroupid, means the old/new_roots for
+ * qgroup won't contain the qgroupid in it.
+ */
+static inline void btrfs_set_skip_qgroup(struct btrfs_trans_handle *trans,
+ u64 qgroupid)
+{
+ struct btrfs_delayed_ref_root *delayed_refs;
+
+ delayed_refs = &trans->transaction->delayed_refs;
+ WARN_ON(delayed_refs->qgroup_to_skip);
+ delayed_refs->qgroup_to_skip = qgroupid;
+}
+
+static inline void btrfs_clear_skip_qgroup(struct btrfs_trans_handle *trans)
+{
+ struct btrfs_delayed_ref_root *delayed_refs;
+
+ delayed_refs = &trans->transaction->delayed_refs;
+ WARN_ON(!delayed_refs->qgroup_to_skip);
+ delayed_refs->qgroup_to_skip = 0;
+}
+
int btrfs_end_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c
index a63719cc9578..a4b9c8b2d35a 100644
--- a/fs/btrfs/tree-defrag.c
+++ b/fs/btrfs/tree-defrag.c
@@ -52,9 +52,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
goto out;
- if (btrfs_test_opt(root, SSD))
- goto out;
-
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index d04968374e9d..1ce80c1c4eb6 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3881,12 +3881,6 @@ static int wait_ordered_extents(struct btrfs_trans_handle *trans,
&ordered->flags))
continue;
- if (ordered->csum_bytes_left) {
- btrfs_start_ordered_extent(inode, ordered, 0);
- wait_event(ordered->wait,
- ordered->csum_bytes_left == 0);
- }
-
list_for_each_entry(sum, &ordered->list, list) {
ret = btrfs_csum_file_blocks(trans, log, sum);
if (ret)
diff --git a/fs/btrfs/ulist.c b/fs/btrfs/ulist.c
index 840a38b2778a..91feb2bdefee 100644
--- a/fs/btrfs/ulist.c
+++ b/fs/btrfs/ulist.c
@@ -132,6 +132,15 @@ static struct ulist_node *ulist_rbtree_search(struct ulist *ulist, u64 val)
return NULL;
}
+static void ulist_rbtree_erase(struct ulist *ulist, struct ulist_node *node)
+{
+ rb_erase(&node->rb_node, &ulist->root);
+ list_del(&node->list);
+ kfree(node);
+ BUG_ON(ulist->nnodes == 0);
+ ulist->nnodes--;
+}
+
static int ulist_rbtree_insert(struct ulist *ulist, struct ulist_node *ins)
{
struct rb_node **p = &ulist->root.rb_node;
@@ -197,9 +206,6 @@ int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux,
node->val = val;
node->aux = aux;
-#ifdef CONFIG_BTRFS_DEBUG
- node->seqnum = ulist->nnodes;
-#endif
ret = ulist_rbtree_insert(ulist, node);
ASSERT(!ret);
@@ -209,6 +215,33 @@ int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux,
return 1;
}
+/*
+ * ulist_del - delete one node from ulist
+ * @ulist: ulist to remove node from
+ * @val: value to delete
+ * @aux: aux to delete
+ *
+ * The deletion will only be done when *BOTH* val and aux matches.
+ * Return 0 for successful delete.
+ * Return > 0 for not found.
+ */
+int ulist_del(struct ulist *ulist, u64 val, u64 aux)
+{
+ struct ulist_node *node;
+
+ node = ulist_rbtree_search(ulist, val);
+ /* Not found */
+ if (!node)
+ return 1;
+
+ if (node->aux != aux)
+ return 1;
+
+ /* Found and delete */
+ ulist_rbtree_erase(ulist, node);
+ return 0;
+}
+
/**
* ulist_next - iterate ulist
* @ulist: ulist to iterate
@@ -237,15 +270,7 @@ struct ulist_node *ulist_next(struct ulist *ulist, struct ulist_iterator *uiter)
uiter->cur_list = uiter->cur_list->next;
} else {
uiter->cur_list = ulist->nodes.next;
-#ifdef CONFIG_BTRFS_DEBUG
- uiter->i = 0;
-#endif
}
node = list_entry(uiter->cur_list, struct ulist_node, list);
-#ifdef CONFIG_BTRFS_DEBUG
- ASSERT(node->seqnum == uiter->i);
- ASSERT(uiter->i >= 0 && uiter->i < ulist->nnodes);
- uiter->i++;
-#endif
return node;
}
diff --git a/fs/btrfs/ulist.h b/fs/btrfs/ulist.h
index 4c29db604bbe..a01a2c45825f 100644
--- a/fs/btrfs/ulist.h
+++ b/fs/btrfs/ulist.h
@@ -57,6 +57,7 @@ void ulist_free(struct ulist *ulist);
int ulist_add(struct ulist *ulist, u64 val, u64 aux, gfp_t gfp_mask);
int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux,
u64 *old_aux, gfp_t gfp_mask);
+int ulist_del(struct ulist *ulist, u64 val, u64 aux);
/* just like ulist_add_merge() but take a pointer for the aux data */
static inline int ulist_add_merge_ptr(struct ulist *ulist, u64 val, void *aux,
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 53af23f2c087..4b438b4c8c91 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -52,6 +52,10 @@ static void btrfs_dev_stat_print_on_load(struct btrfs_device *device);
DEFINE_MUTEX(uuid_mutex);
static LIST_HEAD(fs_uuids);
+struct list_head *btrfs_get_fs_uuids(void)
+{
+ return &fs_uuids;
+}
static struct btrfs_fs_devices *__alloc_fs_devices(void)
{
@@ -441,6 +445,61 @@ static void pending_bios_fn(struct btrfs_work *work)
run_scheduled_bios(device);
}
+
+void btrfs_free_stale_device(struct btrfs_device *cur_dev)
+{
+ struct btrfs_fs_devices *fs_devs;
+ struct btrfs_device *dev;
+
+ if (!cur_dev->name)
+ return;
+
+ list_for_each_entry(fs_devs, &fs_uuids, list) {
+ int del = 1;
+
+ if (fs_devs->opened)
+ continue;
+ if (fs_devs->seeding)
+ continue;
+
+ list_for_each_entry(dev, &fs_devs->devices, dev_list) {
+
+ if (dev == cur_dev)
+ continue;
+ if (!dev->name)
+ continue;
+
+ /*
+ * Todo: This won't be enough. What if the same device
+ * comes back (with new uuid and) with its mapper path?
+ * But for now, this does help as mostly an admin will
+ * either use mapper or non mapper path throughout.
+ */
+ rcu_read_lock();
+ del = strcmp(rcu_str_deref(dev->name),
+ rcu_str_deref(cur_dev->name));
+ rcu_read_unlock();
+ if (!del)
+ break;
+ }
+
+ if (!del) {
+ /* delete the stale device */
+ if (fs_devs->num_devices == 1) {
+ btrfs_sysfs_remove_fsid(fs_devs);
+ list_del(&fs_devs->list);
+ free_fs_devices(fs_devs);
+ } else {
+ fs_devs->num_devices--;
+ list_del(&dev->dev_list);
+ rcu_string_free(dev->name);
+ kfree(dev);
+ }
+ break;
+ }
+ }
+}
+
/*
* Add new device to list of registered devices
*
@@ -556,6 +615,12 @@ static noinline int device_list_add(const char *path,
if (!fs_devices->opened)
device->generation = found_transid;
+ /*
+ * if there is new btrfs on an already registered device,
+ * then remove the stale device entry.
+ */
+ btrfs_free_stale_device(device);
+
*fs_devices_ret = fs_devices;
return ret;
@@ -693,13 +758,13 @@ static void free_device(struct rcu_head *head)
static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
{
- struct btrfs_device *device;
+ struct btrfs_device *device, *tmp;
if (--fs_devices->opened > 0)
return 0;
mutex_lock(&fs_devices->device_list_mutex);
- list_for_each_entry(device, &fs_devices->devices, dev_list) {
+ list_for_each_entry_safe(device, tmp, &fs_devices->devices, dev_list) {
struct btrfs_device *new_device;
struct rcu_string *name;
@@ -1067,15 +1132,31 @@ again:
map = (struct map_lookup *)em->bdev;
for (i = 0; i < map->num_stripes; i++) {
+ u64 end;
+
if (map->stripes[i].dev != device)
continue;
if (map->stripes[i].physical >= physical_start + len ||
map->stripes[i].physical + em->orig_block_len <=
physical_start)
continue;
- *start = map->stripes[i].physical +
- em->orig_block_len;
- ret = 1;
+ /*
+ * Make sure that while processing the pinned list we do
+ * not override our *start with a lower value, because
+ * we can have pinned chunks that fall within this
+ * device hole and that have lower physical addresses
+ * than the pending chunks we processed before. If we
+ * do not take this special care we can end up getting
+ * 2 pending chunks that start at the same physical
+ * device offsets because the end offset of a pinned
+ * chunk can be equal to the start offset of some
+ * pending chunk.
+ */
+ end = map->stripes[i].physical + em->orig_block_len;
+ if (end > *start) {
+ *start = end;
+ ret = 1;
+ }
}
}
if (search_list == &trans->transaction->pending_chunks) {
@@ -1706,7 +1787,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
if (device->bdev) {
device->fs_devices->open_devices--;
/* remove sysfs entry */
- btrfs_kobj_rm_device(root->fs_info, device);
+ btrfs_kobj_rm_device(root->fs_info->fs_devices, device);
}
call_rcu(&device->rcu, free_device);
@@ -1875,6 +1956,9 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
mutex_lock(&uuid_mutex);
WARN_ON(!tgtdev);
mutex_lock(&fs_info->fs_devices->device_list_mutex);
+
+ btrfs_kobj_rm_device(fs_info->fs_devices, tgtdev);
+
if (tgtdev->bdev) {
btrfs_scratch_superblock(tgtdev);
fs_info->fs_devices->open_devices--;
@@ -2211,7 +2295,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
tmp + 1);
/* add sysfs device entry */
- btrfs_kobj_add_device(root->fs_info, device);
+ btrfs_kobj_add_device(root->fs_info->fs_devices, device);
/*
* we've got more storage, clear any full flags on the space
@@ -2252,8 +2336,9 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
*/
snprintf(fsid_buf, BTRFS_UUID_UNPARSED_SIZE, "%pU",
root->fs_info->fsid);
- if (kobject_rename(&root->fs_info->super_kobj, fsid_buf))
- goto error_trans;
+ if (kobject_rename(&root->fs_info->fs_devices->super_kobj,
+ fsid_buf))
+ pr_warn("BTRFS: sysfs: failed to create fsid for sprout\n");
}
root->fs_info->num_tolerated_disk_barrier_failures =
@@ -2289,7 +2374,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
error_trans:
btrfs_end_transaction(trans, root);
rcu_string_free(device->name);
- btrfs_kobj_rm_device(root->fs_info, device);
+ btrfs_kobj_rm_device(root->fs_info->fs_devices, device);
kfree(device);
error:
blkdev_put(bdev, FMODE_EXCL);
@@ -2609,6 +2694,9 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
return -EINVAL;
}
map = (struct map_lookup *)em->bdev;
+ lock_chunks(root->fs_info->chunk_root);
+ check_system_chunk(trans, extent_root, map->type);
+ unlock_chunks(root->fs_info->chunk_root);
for (i = 0; i < map->num_stripes; i++) {
struct btrfs_device *device = map->stripes[i].dev;
@@ -3908,9 +3996,9 @@ int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info)
uuid_root = btrfs_create_tree(trans, fs_info,
BTRFS_UUID_TREE_OBJECTID);
if (IS_ERR(uuid_root)) {
- btrfs_abort_transaction(trans, tree_root,
- PTR_ERR(uuid_root));
- return PTR_ERR(uuid_root);
+ ret = PTR_ERR(uuid_root);
+ btrfs_abort_transaction(trans, tree_root, ret);
+ return ret;
}
fs_info->uuid_root = uuid_root;
@@ -3965,6 +4053,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
int slot;
int failed = 0;
bool retried = false;
+ bool checked_pending_chunks = false;
struct extent_buffer *l;
struct btrfs_key key;
struct btrfs_super_block *super_copy = root->fs_info->super_copy;
@@ -4045,15 +4134,6 @@ again:
goto again;
} else if (failed && retried) {
ret = -ENOSPC;
- lock_chunks(root);
-
- btrfs_device_set_total_bytes(device, old_size);
- if (device->writeable)
- device->fs_devices->total_rw_bytes += diff;
- spin_lock(&root->fs_info->free_chunk_lock);
- root->fs_info->free_chunk_space += diff;
- spin_unlock(&root->fs_info->free_chunk_lock);
- unlock_chunks(root);
goto done;
}
@@ -4065,6 +4145,35 @@ again:
}
lock_chunks(root);
+
+ /*
+ * We checked in the above loop all device extents that were already in
+ * the device tree. However before we have updated the device's
+ * total_bytes to the new size, we might have had chunk allocations that
+ * have not complete yet (new block groups attached to transaction
+ * handles), and therefore their device extents were not yet in the
+ * device tree and we missed them in the loop above. So if we have any
+ * pending chunk using a device extent that overlaps the device range
+ * that we can not use anymore, commit the current transaction and
+ * repeat the search on the device tree - this way we guarantee we will
+ * not have chunks using device extents that end beyond 'new_size'.
+ */
+ if (!checked_pending_chunks) {
+ u64 start = new_size;
+ u64 len = old_size - new_size;
+
+ if (contains_pending_extent(trans, device, &start, len)) {
+ unlock_chunks(root);
+ checked_pending_chunks = true;
+ failed = 0;
+ retried = false;
+ ret = btrfs_commit_transaction(trans, root);
+ if (ret)
+ goto done;
+ goto again;
+ }
+ }
+
btrfs_device_set_disk_total_bytes(device, new_size);
if (list_empty(&device->resized_list))
list_add_tail(&device->resized_list,
@@ -4079,6 +4188,16 @@ again:
btrfs_end_transaction(trans, root);
done:
btrfs_free_path(path);
+ if (ret) {
+ lock_chunks(root);
+ btrfs_device_set_total_bytes(device, old_size);
+ if (device->writeable)
+ device->fs_devices->total_rw_bytes += diff;
+ spin_lock(&root->fs_info->free_chunk_lock);
+ root->fs_info->free_chunk_space += diff;
+ spin_unlock(&root->fs_info->free_chunk_lock);
+ unlock_chunks(root);
+ }
return ret;
}
@@ -6072,6 +6191,8 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
free_extent_map(em);
return -EIO;
}
+ btrfs_warn(root->fs_info, "devid %llu uuid %pU is missing",
+ devid, uuid);
}
map->stripes[i].dev->in_fs_metadata = 1;
}
@@ -6191,10 +6312,11 @@ static int read_one_dev(struct btrfs_root *root,
if (!btrfs_test_opt(root, DEGRADED))
return -EIO;
- btrfs_warn(root->fs_info, "devid %llu missing", devid);
device = add_missing_dev(root, fs_devices, devid, dev_uuid);
if (!device)
return -ENOMEM;
+ btrfs_warn(root->fs_info, "devid %llu uuid %pU missing",
+ devid, dev_uuid);
} else {
if (!device->bdev && !btrfs_test_opt(root, DEGRADED))
return -EIO;
@@ -6722,3 +6844,21 @@ void btrfs_update_commit_device_bytes_used(struct btrfs_root *root,
}
unlock_chunks(root);
}
+
+void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
+ while (fs_devices) {
+ fs_devices->fs_info = fs_info;
+ fs_devices = fs_devices->seed;
+ }
+}
+
+void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
+ while (fs_devices) {
+ fs_devices->fs_info = NULL;
+ fs_devices = fs_devices->seed;
+ }
+}
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index cedae0356558..95842a909e7f 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -253,6 +253,12 @@ struct btrfs_fs_devices {
* nonrot flag set
*/
int rotating;
+
+ struct btrfs_fs_info *fs_info;
+ /* sysfs kobjects */
+ struct kobject super_kobj;
+ struct kobject *device_dir_kobj;
+ struct completion kobj_unregister;
};
#define BTRFS_BIO_INLINE_CSUM_SIZE 64
@@ -535,5 +541,8 @@ static inline void unlock_chunks(struct btrfs_root *root)
mutex_unlock(&root->fs_info->chunk_mutex);
}
+struct list_head *btrfs_get_fs_uuids(void);
+void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info);
+void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info);
#endif
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index a2172f3f69e3..e7b478b49985 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -192,6 +192,15 @@ config CIFS_SMB2
options are also slightly simpler (compared to CIFS) due
to protocol improvements.
+config CIFS_SMB311
+ bool "SMB3.1.1 network file system support (Experimental)"
+ depends on CIFS_SMB2 && INET
+
+ help
+ This enables experimental support for the newest, SMB3.1.1, dialect.
+ This dialect includes improved security negotiation features.
+ If unsure, say N
+
config CIFS_FSCACHE
bool "Provide CIFS client caching support"
depends on CIFS=m && FSCACHE || CIFS=y && FSCACHE=y
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 22b289a3b1c4..b406a32deb1f 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -171,6 +171,10 @@ enum smb_version {
Smb_21,
Smb_30,
Smb_302,
+#ifdef CONFIG_CIFS_SMB311
+ Smb_311,
+#endif /* SMB311 */
+ Smb_version_err
};
struct mid_q_entry;
@@ -368,6 +372,8 @@ struct smb_version_operations {
void (*new_lease_key)(struct cifs_fid *);
int (*generate_signingkey)(struct cifs_ses *);
int (*calc_signature)(struct smb_rqst *, struct TCP_Server_Info *);
+ int (*set_integrity)(const unsigned int, struct cifs_tcon *tcon,
+ struct cifsFileInfo *src_file);
int (*query_mf_symlink)(unsigned int, struct cifs_tcon *,
struct cifs_sb_info *, const unsigned char *,
char *, unsigned int *);
@@ -386,6 +392,9 @@ struct smb_version_operations {
int (*clone_range)(const unsigned int, struct cifsFileInfo *src_file,
struct cifsFileInfo *target_file, u64 src_off, u64 len,
u64 dest_off);
+ int (*duplicate_extents)(const unsigned int, struct cifsFileInfo *src,
+ struct cifsFileInfo *target_file, u64 src_off, u64 len,
+ u64 dest_off);
int (*validate_negotiate)(const unsigned int, struct cifs_tcon *);
ssize_t (*query_all_EAs)(const unsigned int, struct cifs_tcon *,
const unsigned char *, const unsigned char *, char *,
@@ -1617,4 +1626,8 @@ extern struct smb_version_values smb30_values;
#define SMB302_VERSION_STRING "3.02"
/*extern struct smb_version_operations smb302_operations;*/ /* not needed yet */
extern struct smb_version_values smb302_values;
+#define SMB311_VERSION_STRING "3.1.1"
+#define ALT_SMB311_VERSION_STRING "3.11"
+extern struct smb_version_operations smb311_operations;
+extern struct smb_version_values smb311_values;
#endif /* _CIFS_GLOB_H */
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index 5f9822ac0245..47b030da0781 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -2255,6 +2255,8 @@ typedef struct {
/* List of FileSystemAttributes - see 2.5.1 of MS-FSCC */
+#define FILE_SUPPORTS_SPARSE_VDL 0x10000000 /* faster nonsparse extend */
+#define FILE_SUPPORTS_BLOCK_REFCOUNTING 0x08000000 /* allow ioctl dup extents */
#define FILE_SUPPORT_INTEGRITY_STREAMS 0x04000000
#define FILE_SUPPORTS_USN_JOURNAL 0x02000000
#define FILE_SUPPORTS_OPEN_BY_FILE_ID 0x01000000
@@ -2310,6 +2312,16 @@ typedef struct { /* data block encoding of response to level 263 QPathInfo */
char FileName[1];
} __attribute__((packed)) FILE_ALL_INFO; /* level 0x107 QPathInfo */
+typedef struct {
+ __le64 AllocationSize;
+ __le64 EndOfFile; /* size ie offset to first free byte in file */
+ __le32 NumberOfLinks; /* hard links */
+ __u8 DeletePending;
+ __u8 Directory;
+ __u16 Pad;
+} __attribute__((packed)) FILE_STANDARD_INFO; /* level 0x102 QPathInfo */
+
+
/* defines for enumerating possible values of the Unix type field below */
#define UNIX_FILE 0
#define UNIX_DIR 1
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index f26ffbfc64d8..672ef35c9f73 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -625,9 +625,8 @@ CIFSSMBNegotiate(const unsigned int xid, struct cifs_ses *ses)
server->negflavor = CIFS_NEGFLAVOR_UNENCAP;
memcpy(ses->server->cryptkey, pSMBr->u.EncryptionKey,
CIFS_CRYPTO_KEY_SIZE);
- } else if ((pSMBr->hdr.Flags2 & SMBFLG2_EXT_SEC ||
- server->capabilities & CAP_EXTENDED_SECURITY) &&
- (pSMBr->EncryptionKeyLength == 0)) {
+ } else if (pSMBr->hdr.Flags2 & SMBFLG2_EXT_SEC ||
+ server->capabilities & CAP_EXTENDED_SECURITY) {
server->negflavor = CIFS_NEGFLAVOR_EXTENDED;
rc = decode_ext_sec_blob(ses, pSMBr);
} else if (server->sec_mode & SECMODE_PW_ENCRYPT) {
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 8383d5ea4202..773f4dc77630 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -280,6 +280,11 @@ static const match_table_t cifs_smb_version_tokens = {
{ Smb_21, SMB21_VERSION_STRING },
{ Smb_30, SMB30_VERSION_STRING },
{ Smb_302, SMB302_VERSION_STRING },
+#ifdef CONFIG_CIFS_SMB311
+ { Smb_311, SMB311_VERSION_STRING },
+ { Smb_311, ALT_SMB311_VERSION_STRING },
+#endif /* SMB311 */
+ { Smb_version_err, NULL }
};
static int ip_connect(struct TCP_Server_Info *server);
@@ -1133,6 +1138,12 @@ cifs_parse_smb_version(char *value, struct smb_vol *vol)
vol->ops = &smb30_operations; /* currently identical with 3.0 */
vol->vals = &smb302_values;
break;
+#ifdef CONFIG_CIFS_SMB311
+ case Smb_311:
+ vol->ops = &smb311_operations;
+ vol->vals = &smb311_values;
+ break;
+#endif /* SMB311 */
#endif
default:
cifs_dbg(VFS, "Unknown vers= option specified: %s\n", value);
@@ -3461,6 +3472,8 @@ try_mount_again:
else if (ses)
cifs_put_smb_ses(ses);
+ cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_POSIX_PATHS;
+
free_xid(xid);
}
#endif
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index 8b7898b7670f..49b8b6e41a18 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -31,12 +31,15 @@
#include "cifsproto.h"
#include "cifs_debug.h"
#include "cifsfs.h"
+#include <linux/btrfs.h>
#define CIFS_IOCTL_MAGIC 0xCF
#define CIFS_IOC_COPYCHUNK_FILE _IOW(CIFS_IOCTL_MAGIC, 3, int)
+#define CIFS_IOC_SET_INTEGRITY _IO(CIFS_IOCTL_MAGIC, 4)
static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file,
- unsigned long srcfd, u64 off, u64 len, u64 destoff)
+ unsigned long srcfd, u64 off, u64 len, u64 destoff,
+ bool dup_extents)
{
int rc;
struct cifsFileInfo *smb_file_target = dst_file->private_data;
@@ -109,9 +112,14 @@ static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file,
truncate_inode_pages_range(&target_inode->i_data, destoff,
PAGE_CACHE_ALIGN(destoff + len)-1);
- if (target_tcon->ses->server->ops->clone_range)
+ if (dup_extents && target_tcon->ses->server->ops->duplicate_extents)
+ rc = target_tcon->ses->server->ops->duplicate_extents(xid,
+ smb_file_src, smb_file_target, off, len, destoff);
+ else if (!dup_extents && target_tcon->ses->server->ops->clone_range)
rc = target_tcon->ses->server->ops->clone_range(xid,
smb_file_src, smb_file_target, off, len, destoff);
+ else
+ rc = -EOPNOTSUPP;
/* force revalidate of size and timestamps of target file now
that target is updated on the server */
@@ -205,7 +213,20 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
}
break;
case CIFS_IOC_COPYCHUNK_FILE:
- rc = cifs_ioctl_clone(xid, filep, arg, 0, 0, 0);
+ rc = cifs_ioctl_clone(xid, filep, arg, 0, 0, 0, false);
+ break;
+ case BTRFS_IOC_CLONE:
+ rc = cifs_ioctl_clone(xid, filep, arg, 0, 0, 0, true);
+ break;
+ case CIFS_IOC_SET_INTEGRITY:
+ if (pSMBFile == NULL)
+ break;
+ tcon = tlink_tcon(pSMBFile->tlink);
+ if (tcon->ses->server->ops->set_integrity)
+ rc = tcon->ses->server->ops->set_integrity(xid,
+ tcon, pSMBFile);
+ else
+ rc = -EOPNOTSUPP;
break;
default:
cifs_dbg(FYI, "unsupported ioctl\n");
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 54daee5ad4c1..df91bcf56d67 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -806,6 +806,53 @@ smb2_set_file_size(const unsigned int xid, struct cifs_tcon *tcon,
cfile->fid.volatile_fid, cfile->pid, &eof, false);
}
+#ifdef CONFIG_CIFS_SMB311
+static int
+smb2_duplicate_extents(const unsigned int xid,
+ struct cifsFileInfo *srcfile,
+ struct cifsFileInfo *trgtfile, u64 src_off,
+ u64 len, u64 dest_off)
+{
+ int rc;
+ unsigned int ret_data_len;
+ char *retbuf = NULL;
+ struct duplicate_extents_to_file dup_ext_buf;
+ struct cifs_tcon *tcon = tlink_tcon(trgtfile->tlink);
+
+ /* server fileays advertise duplicate extent support with this flag */
+ if ((le32_to_cpu(tcon->fsAttrInfo.Attributes) &
+ FILE_SUPPORTS_BLOCK_REFCOUNTING) == 0)
+ return -EOPNOTSUPP;
+
+ dup_ext_buf.VolatileFileHandle = srcfile->fid.volatile_fid;
+ dup_ext_buf.PersistentFileHandle = srcfile->fid.persistent_fid;
+ dup_ext_buf.SourceFileOffset = cpu_to_le64(src_off);
+ dup_ext_buf.TargetFileOffset = cpu_to_le64(dest_off);
+ dup_ext_buf.ByteCount = cpu_to_le64(len);
+ cifs_dbg(FYI, "duplicate extents: src off %lld dst off %lld len %lld",
+ src_off, dest_off, len);
+
+ rc = smb2_set_file_size(xid, tcon, trgtfile, dest_off + len, false);
+ if (rc)
+ goto duplicate_extents_out;
+
+ rc = SMB2_ioctl(xid, tcon, trgtfile->fid.persistent_fid,
+ trgtfile->fid.volatile_fid,
+ FSCTL_DUPLICATE_EXTENTS_TO_FILE,
+ true /* is_fsctl */, (char *)&dup_ext_buf,
+ sizeof(struct duplicate_extents_to_file),
+ (char **)&retbuf,
+ &ret_data_len);
+
+ if (ret_data_len > 0)
+ cifs_dbg(FYI, "non-zero response length in duplicate extents");
+
+duplicate_extents_out:
+ return rc;
+}
+#endif /* CONFIG_CIFS_SMB311 */
+
+
static int
smb2_set_compression(const unsigned int xid, struct cifs_tcon *tcon,
struct cifsFileInfo *cfile)
@@ -815,6 +862,28 @@ smb2_set_compression(const unsigned int xid, struct cifs_tcon *tcon,
}
static int
+smb3_set_integrity(const unsigned int xid, struct cifs_tcon *tcon,
+ struct cifsFileInfo *cfile)
+{
+ struct fsctl_set_integrity_information_req integr_info;
+ char *retbuf = NULL;
+ unsigned int ret_data_len;
+
+ integr_info.ChecksumAlgorithm = cpu_to_le16(CHECKSUM_TYPE_UNCHANGED);
+ integr_info.Flags = 0;
+ integr_info.Reserved = 0;
+
+ return SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
+ cfile->fid.volatile_fid,
+ FSCTL_SET_INTEGRITY_INFORMATION,
+ true /* is_fsctl */, (char *)&integr_info,
+ sizeof(struct fsctl_set_integrity_information_req),
+ (char **)&retbuf,
+ &ret_data_len);
+
+}
+
+static int
smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
const char *path, struct cifs_sb_info *cifs_sb,
struct cifs_fid *fid, __u16 search_flags,
@@ -1624,6 +1693,7 @@ struct smb_version_operations smb30_operations = {
.new_lease_key = smb2_new_lease_key,
.generate_signingkey = generate_smb3signingkey,
.calc_signature = smb3_calc_signature,
+ .set_integrity = smb3_set_integrity,
.is_read_op = smb21_is_read_op,
.set_oplock_level = smb3_set_oplock_level,
.create_lease_buf = smb3_create_lease_buf,
@@ -1635,6 +1705,94 @@ struct smb_version_operations smb30_operations = {
.fallocate = smb3_fallocate,
};
+#ifdef CONFIG_CIFS_SMB311
+struct smb_version_operations smb311_operations = {
+ .compare_fids = smb2_compare_fids,
+ .setup_request = smb2_setup_request,
+ .setup_async_request = smb2_setup_async_request,
+ .check_receive = smb2_check_receive,
+ .add_credits = smb2_add_credits,
+ .set_credits = smb2_set_credits,
+ .get_credits_field = smb2_get_credits_field,
+ .get_credits = smb2_get_credits,
+ .wait_mtu_credits = smb2_wait_mtu_credits,
+ .get_next_mid = smb2_get_next_mid,
+ .read_data_offset = smb2_read_data_offset,
+ .read_data_length = smb2_read_data_length,
+ .map_error = map_smb2_to_linux_error,
+ .find_mid = smb2_find_mid,
+ .check_message = smb2_check_message,
+ .dump_detail = smb2_dump_detail,
+ .clear_stats = smb2_clear_stats,
+ .print_stats = smb2_print_stats,
+ .dump_share_caps = smb2_dump_share_caps,
+ .is_oplock_break = smb2_is_valid_oplock_break,
+ .downgrade_oplock = smb2_downgrade_oplock,
+ .need_neg = smb2_need_neg,
+ .negotiate = smb2_negotiate,
+ .negotiate_wsize = smb2_negotiate_wsize,
+ .negotiate_rsize = smb2_negotiate_rsize,
+ .sess_setup = SMB2_sess_setup,
+ .logoff = SMB2_logoff,
+ .tree_connect = SMB2_tcon,
+ .tree_disconnect = SMB2_tdis,
+ .qfs_tcon = smb3_qfs_tcon,
+ .is_path_accessible = smb2_is_path_accessible,
+ .can_echo = smb2_can_echo,
+ .echo = SMB2_echo,
+ .query_path_info = smb2_query_path_info,
+ .get_srv_inum = smb2_get_srv_inum,
+ .query_file_info = smb2_query_file_info,
+ .set_path_size = smb2_set_path_size,
+ .set_file_size = smb2_set_file_size,
+ .set_file_info = smb2_set_file_info,
+ .set_compression = smb2_set_compression,
+ .mkdir = smb2_mkdir,
+ .mkdir_setinfo = smb2_mkdir_setinfo,
+ .rmdir = smb2_rmdir,
+ .unlink = smb2_unlink,
+ .rename = smb2_rename_path,
+ .create_hardlink = smb2_create_hardlink,
+ .query_symlink = smb2_query_symlink,
+ .query_mf_symlink = smb3_query_mf_symlink,
+ .create_mf_symlink = smb3_create_mf_symlink,
+ .open = smb2_open_file,
+ .set_fid = smb2_set_fid,
+ .close = smb2_close_file,
+ .flush = smb2_flush_file,
+ .async_readv = smb2_async_readv,
+ .async_writev = smb2_async_writev,
+ .sync_read = smb2_sync_read,
+ .sync_write = smb2_sync_write,
+ .query_dir_first = smb2_query_dir_first,
+ .query_dir_next = smb2_query_dir_next,
+ .close_dir = smb2_close_dir,
+ .calc_smb_size = smb2_calc_size,
+ .is_status_pending = smb2_is_status_pending,
+ .oplock_response = smb2_oplock_response,
+ .queryfs = smb2_queryfs,
+ .mand_lock = smb2_mand_lock,
+ .mand_unlock_range = smb2_unlock_range,
+ .push_mand_locks = smb2_push_mandatory_locks,
+ .get_lease_key = smb2_get_lease_key,
+ .set_lease_key = smb2_set_lease_key,
+ .new_lease_key = smb2_new_lease_key,
+ .generate_signingkey = generate_smb3signingkey,
+ .calc_signature = smb3_calc_signature,
+ .set_integrity = smb3_set_integrity,
+ .is_read_op = smb21_is_read_op,
+ .set_oplock_level = smb3_set_oplock_level,
+ .create_lease_buf = smb3_create_lease_buf,
+ .parse_lease_buf = smb3_parse_lease_buf,
+ .clone_range = smb2_clone_range,
+ .duplicate_extents = smb2_duplicate_extents,
+/* .validate_negotiate = smb3_validate_negotiate, */ /* not used in 3.11 */
+ .wp_retry_size = smb2_wp_retry_size,
+ .dir_needs_close = smb2_dir_needs_close,
+ .fallocate = smb3_fallocate,
+};
+#endif /* CIFS_SMB311 */
+
struct smb_version_values smb20_values = {
.version_string = SMB20_VERSION_STRING,
.protocol_id = SMB20_PROT_ID,
@@ -1714,3 +1872,25 @@ struct smb_version_values smb302_values = {
.signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED,
.create_lease_size = sizeof(struct create_lease_v2),
};
+
+#ifdef CONFIG_CIFS_SMB311
+struct smb_version_values smb311_values = {
+ .version_string = SMB311_VERSION_STRING,
+ .protocol_id = SMB311_PROT_ID,
+ .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU,
+ .large_lock_type = 0,
+ .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK,
+ .shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
+ .unlock_lock_type = SMB2_LOCKFLAG_UNLOCK,
+ .header_size = sizeof(struct smb2_hdr),
+ .max_header_size = MAX_SMB2_HDR_SIZE,
+ .read_rsp_size = sizeof(struct smb2_read_rsp) - 1,
+ .lock_cmd = SMB2_LOCK,
+ .cap_unix = 0,
+ .cap_nt_find = SMB2_NT_FIND,
+ .cap_large_files = SMB2_LARGE_FILES,
+ .signing_enabled = SMB2_NEGOTIATE_SIGNING_ENABLED | SMB2_NEGOTIATE_SIGNING_REQUIRED,
+ .signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED,
+ .create_lease_size = sizeof(struct create_lease_v2),
+};
+#endif /* SMB311 */
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 54cbe19d9c08..b8b4f08ee094 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -304,6 +304,59 @@ small_smb2_init(__le16 smb2_command, struct cifs_tcon *tcon,
return rc;
}
+#ifdef CONFIG_CIFS_SMB311
+/* offset is sizeof smb2_negotiate_req - 4 but rounded up to 8 bytes */
+#define OFFSET_OF_NEG_CONTEXT 0x68 /* sizeof(struct smb2_negotiate_req) - 4 */
+
+
+#define SMB2_PREAUTH_INTEGRITY_CAPABILITIES cpu_to_le16(1)
+#define SMB2_ENCRYPTION_CAPABILITIES cpu_to_le16(2)
+
+static void
+build_preauth_ctxt(struct smb2_preauth_neg_context *pneg_ctxt)
+{
+ pneg_ctxt->ContextType = SMB2_PREAUTH_INTEGRITY_CAPABILITIES;
+ pneg_ctxt->DataLength = cpu_to_le16(38);
+ pneg_ctxt->HashAlgorithmCount = cpu_to_le16(1);
+ pneg_ctxt->SaltLength = cpu_to_le16(SMB311_SALT_SIZE);
+ get_random_bytes(pneg_ctxt->Salt, SMB311_SALT_SIZE);
+ pneg_ctxt->HashAlgorithms = SMB2_PREAUTH_INTEGRITY_SHA512;
+}
+
+static void
+build_encrypt_ctxt(struct smb2_encryption_neg_context *pneg_ctxt)
+{
+ pneg_ctxt->ContextType = SMB2_ENCRYPTION_CAPABILITIES;
+ pneg_ctxt->DataLength = cpu_to_le16(6);
+ pneg_ctxt->CipherCount = cpu_to_le16(2);
+ pneg_ctxt->Ciphers[0] = SMB2_ENCRYPTION_AES128_GCM;
+ pneg_ctxt->Ciphers[1] = SMB2_ENCRYPTION_AES128_CCM;
+}
+
+static void
+assemble_neg_contexts(struct smb2_negotiate_req *req)
+{
+
+ /* +4 is to account for the RFC1001 len field */
+ char *pneg_ctxt = (char *)req + OFFSET_OF_NEG_CONTEXT + 4;
+
+ build_preauth_ctxt((struct smb2_preauth_neg_context *)pneg_ctxt);
+ /* Add 2 to size to round to 8 byte boundary */
+ pneg_ctxt += 2 + sizeof(struct smb2_preauth_neg_context);
+ build_encrypt_ctxt((struct smb2_encryption_neg_context *)pneg_ctxt);
+ req->NegotiateContextOffset = cpu_to_le32(OFFSET_OF_NEG_CONTEXT);
+ req->NegotiateContextCount = cpu_to_le16(2);
+ inc_rfc1001_len(req, 4 + sizeof(struct smb2_preauth_neg_context) + 2
+ + sizeof(struct smb2_encryption_neg_context)); /* calculate hash */
+}
+#else
+static void assemble_neg_contexts(struct smb2_negotiate_req *req)
+{
+ return;
+}
+#endif /* SMB311 */
+
+
/*
*
* SMB2 Worker functions follow:
@@ -363,10 +416,12 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
/* ClientGUID must be zero for SMB2.02 dialect */
if (ses->server->vals->protocol_id == SMB20_PROT_ID)
memset(req->ClientGUID, 0, SMB2_CLIENT_GUID_SIZE);
- else
+ else {
memcpy(req->ClientGUID, server->client_guid,
SMB2_CLIENT_GUID_SIZE);
-
+ if (ses->server->vals->protocol_id == SMB311_PROT_ID)
+ assemble_neg_contexts(req);
+ }
iov[0].iov_base = (char *)req;
/* 4 for rfc1002 length field */
iov[0].iov_len = get_rfc1002_length(req) + 4;
@@ -393,8 +448,12 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
cifs_dbg(FYI, "negotiated smb3.0 dialect\n");
else if (rsp->DialectRevision == cpu_to_le16(SMB302_PROT_ID))
cifs_dbg(FYI, "negotiated smb3.02 dialect\n");
+#ifdef CONFIG_CIFS_SMB311
+ else if (rsp->DialectRevision == cpu_to_le16(SMB311_PROT_ID))
+ cifs_dbg(FYI, "negotiated smb3.1.1 dialect\n");
+#endif /* SMB311 */
else {
- cifs_dbg(VFS, "Illegal dialect returned by server %d\n",
+ cifs_dbg(VFS, "Illegal dialect returned by server 0x%x\n",
le16_to_cpu(rsp->DialectRevision));
rc = -EIO;
goto neg_exit;
@@ -572,7 +631,7 @@ ssetup_ntlmssp_authenticate:
return rc;
req->hdr.SessionId = 0; /* First session, not a reauthenticate */
- req->VcNumber = 0; /* MBZ */
+ req->Flags = 0; /* MBZ */
/* to enable echos and oplocks */
req->hdr.CreditRequest = cpu_to_le16(3);
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index 70867d54fb8b..451108284a2f 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -136,9 +136,6 @@ struct smb2_transform_hdr {
__u64 SessionId;
} __packed;
-/* Encryption Algorithms */
-#define SMB2_ENCRYPTION_AES128_CCM cpu_to_le16(0x0001)
-
/*
* SMB2 flag definitions
*/
@@ -191,7 +188,10 @@ struct smb2_negotiate_req {
__le16 Reserved; /* MBZ */
__le32 Capabilities;
__u8 ClientGUID[SMB2_CLIENT_GUID_SIZE];
- __le64 ClientStartTime; /* MBZ */
+ /* In SMB3.02 and earlier next three were MBZ le64 ClientStartTime */
+ __le32 NegotiateContextOffset; /* SMB3.1.1 only. MBZ earlier */
+ __le16 NegotiateContextCount; /* SMB3.1.1 only. MBZ earlier */
+ __le16 Reserved2;
__le16 Dialects[1]; /* One dialect (vers=) at a time for now */
} __packed;
@@ -200,6 +200,7 @@ struct smb2_negotiate_req {
#define SMB21_PROT_ID 0x0210
#define SMB30_PROT_ID 0x0300
#define SMB302_PROT_ID 0x0302
+#define SMB311_PROT_ID 0x0311
#define BAD_PROT_ID 0xFFFF
/* SecurityMode flags */
@@ -217,12 +218,38 @@ struct smb2_negotiate_req {
#define SMB2_NT_FIND 0x00100000
#define SMB2_LARGE_FILES 0x00200000
+#define SMB311_SALT_SIZE 32
+/* Hash Algorithm Types */
+#define SMB2_PREAUTH_INTEGRITY_SHA512 cpu_to_le16(0x0001)
+
+struct smb2_preauth_neg_context {
+ __le16 ContextType; /* 1 */
+ __le16 DataLength;
+ __le32 Reserved;
+ __le16 HashAlgorithmCount; /* 1 */
+ __le16 SaltLength;
+ __le16 HashAlgorithms; /* HashAlgorithms[0] since only one defined */
+ __u8 Salt[SMB311_SALT_SIZE];
+} __packed;
+
+/* Encryption Algorithms Ciphers */
+#define SMB2_ENCRYPTION_AES128_CCM cpu_to_le16(0x0001)
+#define SMB2_ENCRYPTION_AES128_GCM cpu_to_le16(0x0002)
+
+struct smb2_encryption_neg_context {
+ __le16 ContextType; /* 2 */
+ __le16 DataLength;
+ __le32 Reserved;
+ __le16 CipherCount; /* AES-128-GCM and AES-128-CCM */
+ __le16 Ciphers[2]; /* Ciphers[0] since only one used now */
+} __packed;
+
struct smb2_negotiate_rsp {
struct smb2_hdr hdr;
__le16 StructureSize; /* Must be 65 */
__le16 SecurityMode;
__le16 DialectRevision;
- __le16 Reserved; /* MBZ */
+ __le16 NegotiateContextCount; /* Prior to SMB3.1.1 was Reserved & MBZ */
__u8 ServerGUID[16];
__le32 Capabilities;
__le32 MaxTransactSize;
@@ -232,14 +259,18 @@ struct smb2_negotiate_rsp {
__le64 ServerStartTime;
__le16 SecurityBufferOffset;
__le16 SecurityBufferLength;
- __le32 Reserved2; /* may be any value, ignore */
+ __le32 NegotiateContextOffset; /* Pre:SMB3.1.1 was reserved/ignored */
__u8 Buffer[1]; /* variable length GSS security buffer */
} __packed;
+/* Flags */
+#define SMB2_SESSION_REQ_FLAG_BINDING 0x01
+#define SMB2_SESSION_REQ_FLAG_ENCRYPT_DATA 0x04
+
struct smb2_sess_setup_req {
struct smb2_hdr hdr;
__le16 StructureSize; /* Must be 25 */
- __u8 VcNumber;
+ __u8 Flags;
__u8 SecurityMode;
__le32 Capabilities;
__le32 Channel;
@@ -274,10 +305,13 @@ struct smb2_logoff_rsp {
__le16 Reserved;
} __packed;
+/* Flags/Reserved for SMB3.1.1 */
+#define SMB2_SHAREFLAG_CLUSTER_RECONNECT 0x0001
+
struct smb2_tree_connect_req {
struct smb2_hdr hdr;
__le16 StructureSize; /* Must be 9 */
- __le16 Reserved;
+ __le16 Reserved; /* Flags in SMB3.1.1 */
__le16 PathOffset;
__le16 PathLength;
__u8 Buffer[1]; /* variable length */
@@ -587,6 +621,29 @@ struct copychunk_ioctl_rsp {
__le32 TotalBytesWritten;
} __packed;
+struct fsctl_set_integrity_information_req {
+ __le16 ChecksumAlgorithm;
+ __le16 Reserved;
+ __le32 Flags;
+} __packed;
+
+struct fsctl_get_integrity_information_rsp {
+ __le16 ChecksumAlgorithm;
+ __le16 Reserved;
+ __le32 Flags;
+ __le32 ChecksumChunkSizeInBytes;
+ __le32 ClusterSizeInBytes;
+} __packed;
+
+/* Integrity ChecksumAlgorithm choices for above */
+#define CHECKSUM_TYPE_NONE 0x0000
+#define CHECKSUM_TYPE_CRC64 0x0002
+#define CHECKSUM_TYPE_UNCHANGED 0xFFFF /* set only */
+
+/* Integrity flags for above */
+#define FSCTL_INTEGRITY_FLAG_CHECKSUM_ENFORCEMENT_OFF 0x00000001
+
+
struct validate_negotiate_info_req {
__le32 Capabilities;
__u8 Guid[SMB2_CLIENT_GUID_SIZE];
@@ -620,6 +677,14 @@ struct compress_ioctl {
__le16 CompressionState; /* See cifspdu.h for possible flag values */
} __packed;
+struct duplicate_extents_to_file {
+ __u64 PersistentFileHandle; /* source file handle, opaque endianness */
+ __u64 VolatileFileHandle;
+ __le64 SourceFileOffset;
+ __le64 TargetFileOffset;
+ __le64 ByteCount; /* Bytes to be copied */
+} __packed;
+
struct smb2_ioctl_req {
struct smb2_hdr hdr;
__le16 StructureSize; /* Must be 57 */
diff --git a/fs/cifs/smbfsctl.h b/fs/cifs/smbfsctl.h
index 83efa59535be..a639d0dab453 100644
--- a/fs/cifs/smbfsctl.h
+++ b/fs/cifs/smbfsctl.h
@@ -75,10 +75,13 @@
#define FSCTL_QUERY_SPARING_INFO 0x00090138 /* BB add struct */
#define FSCTL_SET_ZERO_ON_DEALLOC 0x00090194 /* BB add struct */
#define FSCTL_SET_SHORT_NAME_BEHAVIOR 0x000901B4 /* BB add struct */
+#define FSCTL_GET_INTEGRITY_INFORMATION 0x0009027C
#define FSCTL_QUERY_ALLOCATED_RANGES 0x000940CF /* BB add struct */
#define FSCTL_SET_DEFECT_MANAGEMENT 0x00098134 /* BB add struct */
#define FSCTL_FILE_LEVEL_TRIM 0x00098208 /* BB add struct */
+#define FSCTL_DUPLICATE_EXTENTS_TO_FILE 0x00098344
#define FSCTL_SIS_LINK_FILES 0x0009C104
+#define FSCTL_SET_INTEGRITY_INFORMATION 0x0009C280
#define FSCTL_PIPE_PEEK 0x0011400C /* BB add struct */
#define FSCTL_PIPE_TRANSCEIVE 0x0011C017 /* BB add struct */
/* strange that the number for this op is not sequential with previous op */
diff --git a/fs/dax.c b/fs/dax.c
index 6f65f00e58ec..99b5fbc38992 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -309,14 +309,21 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
out:
i_mmap_unlock_read(mapping);
- if (bh->b_end_io)
- bh->b_end_io(bh, 1);
-
return error;
}
-static int do_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
- get_block_t get_block)
+/**
+ * __dax_fault - handle a page fault on a DAX file
+ * @vma: The virtual memory area where the fault occurred
+ * @vmf: The description of the fault
+ * @get_block: The filesystem method used to translate file offsets to blocks
+ *
+ * When a page fault occurs, filesystems may call this helper in their
+ * fault handler for DAX files. __dax_fault() assumes the caller has done all
+ * the necessary locking for the page fault to proceed successfully.
+ */
+int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
+ get_block_t get_block, dax_iodone_t complete_unwritten)
{
struct file *file = vma->vm_file;
struct address_space *mapping = file->f_mapping;
@@ -417,7 +424,19 @@ static int do_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
page_cache_release(page);
}
+ /*
+ * If we successfully insert the new mapping over an unwritten extent,
+ * we need to ensure we convert the unwritten extent. If there is an
+ * error inserting the mapping, the filesystem needs to leave it as
+ * unwritten to prevent exposure of the stale underlying data to
+ * userspace, but we still need to call the completion function so
+ * the private resources on the mapping buffer can be released. We
+ * indicate what the callback should do via the uptodate variable, same
+ * as for normal BH based IO completions.
+ */
error = dax_insert_mapping(inode, &bh, vma, vmf);
+ if (buffer_unwritten(&bh))
+ complete_unwritten(&bh, !error);
out:
if (error == -ENOMEM)
@@ -434,6 +453,7 @@ static int do_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
}
goto out;
}
+EXPORT_SYMBOL(__dax_fault);
/**
* dax_fault - handle a page fault on a DAX file
@@ -445,7 +465,7 @@ static int do_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
* fault handler for DAX files.
*/
int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
- get_block_t get_block)
+ get_block_t get_block, dax_iodone_t complete_unwritten)
{
int result;
struct super_block *sb = file_inode(vma->vm_file)->i_sb;
@@ -454,7 +474,7 @@ int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
sb_start_pagefault(sb);
file_update_time(vma->vm_file);
}
- result = do_dax_fault(vma, vmf, get_block);
+ result = __dax_fault(vma, vmf, get_block, complete_unwritten);
if (vmf->flags & FAULT_FLAG_WRITE)
sb_end_pagefault(sb);
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 3a0a6c6406d0..3b57c9f83c9b 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -28,12 +28,12 @@
#ifdef CONFIG_FS_DAX
static int ext2_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
- return dax_fault(vma, vmf, ext2_get_block);
+ return dax_fault(vma, vmf, ext2_get_block, NULL);
}
static int ext2_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
{
- return dax_mkwrite(vma, vmf, ext2_get_block);
+ return dax_mkwrite(vma, vmf, ext2_get_block, NULL);
}
static const struct vm_operations_struct ext2_dax_vm_ops = {
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index ac517f15741c..bc313ac5d3fa 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -192,15 +192,27 @@ out:
}
#ifdef CONFIG_FS_DAX
+static void ext4_end_io_unwritten(struct buffer_head *bh, int uptodate)
+{
+ struct inode *inode = bh->b_assoc_map->host;
+ /* XXX: breaks on 32-bit > 16GB. Is that even supported? */
+ loff_t offset = (loff_t)(uintptr_t)bh->b_private << inode->i_blkbits;
+ int err;
+ if (!uptodate)
+ return;
+ WARN_ON(!buffer_unwritten(bh));
+ err = ext4_convert_unwritten_extents(NULL, inode, offset, bh->b_size);
+}
+
static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
- return dax_fault(vma, vmf, ext4_get_block);
+ return dax_fault(vma, vmf, ext4_get_block, ext4_end_io_unwritten);
/* Is this the right get_block? */
}
static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
{
- return dax_mkwrite(vma, vmf, ext4_get_block);
+ return dax_mkwrite(vma, vmf, ext4_get_block, ext4_end_io_unwritten);
}
static const struct vm_operations_struct ext4_dax_vm_ops = {
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index f8a8d4ee7459..41f8e55afcd1 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -656,18 +656,6 @@ has_zeroout:
return retval;
}
-static void ext4_end_io_unwritten(struct buffer_head *bh, int uptodate)
-{
- struct inode *inode = bh->b_assoc_map->host;
- /* XXX: breaks on 32-bit > 16GB. Is that even supported? */
- loff_t offset = (loff_t)(uintptr_t)bh->b_private << inode->i_blkbits;
- int err;
- if (!uptodate)
- return;
- WARN_ON(!buffer_unwritten(bh));
- err = ext4_convert_unwritten_extents(NULL, inode, offset, bh->b_size);
-}
-
/* Maximum number of blocks we map for direct IO at once. */
#define DIO_MAX_BLOCKS 4096
@@ -705,10 +693,15 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock,
map_bh(bh, inode->i_sb, map.m_pblk);
bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags;
- if (IS_DAX(inode) && buffer_unwritten(bh) && !io_end) {
+ if (IS_DAX(inode) && buffer_unwritten(bh)) {
+ /*
+ * dgc: I suspect unwritten conversion on ext4+DAX is
+ * fundamentally broken here when there are concurrent
+ * read/write in progress on this inode.
+ */
+ WARN_ON_ONCE(io_end);
bh->b_assoc_map = inode->i_mapping;
bh->b_private = (void *)(unsigned long)iblock;
- bh->b_end_io = ext4_end_io_unwritten;
}
if (io_end && io_end->flag & EXT4_IO_END_UNWRITTEN)
set_buffer_defer_completion(bh);
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index f175b833b6ba..aa62004f1706 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2847,7 +2847,7 @@ static int param_set_portnr(const char *val, const struct kernel_param *kp)
*((unsigned int *)kp->arg) = num;
return 0;
}
-static struct kernel_param_ops param_ops_portnr = {
+static const struct kernel_param_ops param_ops_portnr = {
.set = param_set_portnr,
.get = param_get_uint,
};
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 760e25dad985..1d9c1cbd4d0b 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -541,6 +541,7 @@ int seq_dentry(struct seq_file *m, struct dentry *dentry, const char *esc)
return res;
}
+EXPORT_SYMBOL(seq_dentry);
static void *single_start(struct seq_file *p, loff_t *pos)
{
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 516162be1398..f9e9ffe6fb46 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -149,13 +149,27 @@ xfs_alloc_compute_aligned(
{
xfs_agblock_t bno;
xfs_extlen_t len;
+ xfs_extlen_t diff;
/* Trim busy sections out of found extent */
xfs_extent_busy_trim(args, foundbno, foundlen, &bno, &len);
+ /*
+ * If we have a largish extent that happens to start before min_agbno,
+ * see if we can shift it into range...
+ */
+ if (bno < args->min_agbno && bno + len > args->min_agbno) {
+ diff = args->min_agbno - bno;
+ if (len > diff) {
+ bno += diff;
+ len -= diff;
+ }
+ }
+
if (args->alignment > 1 && len >= args->minlen) {
xfs_agblock_t aligned_bno = roundup(bno, args->alignment);
- xfs_extlen_t diff = aligned_bno - bno;
+
+ diff = aligned_bno - bno;
*resbno = aligned_bno;
*reslen = diff >= len ? 0 : len - diff;
@@ -795,9 +809,13 @@ xfs_alloc_find_best_extent(
* The good extent is closer than this one.
*/
if (!dir) {
+ if (*sbnoa > args->max_agbno)
+ goto out_use_good;
if (*sbnoa >= args->agbno + gdiff)
goto out_use_good;
} else {
+ if (*sbnoa < args->min_agbno)
+ goto out_use_good;
if (*sbnoa <= args->agbno - gdiff)
goto out_use_good;
}
@@ -884,6 +902,17 @@ xfs_alloc_ag_vextent_near(
dofirst = prandom_u32() & 1;
#endif
+ /* handle unitialized agbno range so caller doesn't have to */
+ if (!args->min_agbno && !args->max_agbno)
+ args->max_agbno = args->mp->m_sb.sb_agblocks - 1;
+ ASSERT(args->min_agbno <= args->max_agbno);
+
+ /* clamp agbno to the range if it's outside */
+ if (args->agbno < args->min_agbno)
+ args->agbno = args->min_agbno;
+ if (args->agbno > args->max_agbno)
+ args->agbno = args->max_agbno;
+
restart:
bno_cur_lt = NULL;
bno_cur_gt = NULL;
@@ -976,6 +1005,8 @@ restart:
&ltbnoa, &ltlena);
if (ltlena < args->minlen)
continue;
+ if (ltbnoa < args->min_agbno || ltbnoa > args->max_agbno)
+ continue;
args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
xfs_alloc_fix_len(args);
ASSERT(args->len >= args->minlen);
@@ -1096,11 +1127,11 @@ restart:
XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
xfs_alloc_compute_aligned(args, ltbno, ltlen,
&ltbnoa, &ltlena);
- if (ltlena >= args->minlen)
+ if (ltlena >= args->minlen && ltbnoa >= args->min_agbno)
break;
if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i)))
goto error0;
- if (!i) {
+ if (!i || ltbnoa < args->min_agbno) {
xfs_btree_del_cursor(bno_cur_lt,
XFS_BTREE_NOERROR);
bno_cur_lt = NULL;
@@ -1112,11 +1143,11 @@ restart:
XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
xfs_alloc_compute_aligned(args, gtbno, gtlen,
&gtbnoa, &gtlena);
- if (gtlena >= args->minlen)
+ if (gtlena >= args->minlen && gtbnoa <= args->max_agbno)
break;
if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
goto error0;
- if (!i) {
+ if (!i || gtbnoa > args->max_agbno) {
xfs_btree_del_cursor(bno_cur_gt,
XFS_BTREE_NOERROR);
bno_cur_gt = NULL;
@@ -1216,6 +1247,7 @@ restart:
ASSERT(ltnew >= ltbno);
ASSERT(ltnew + rlen <= ltbnoa + ltlena);
ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
+ ASSERT(ltnew >= args->min_agbno && ltnew <= args->max_agbno);
args->agbno = ltnew;
if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
@@ -1825,11 +1857,11 @@ xfs_alloc_compute_maxlevels(
xfs_extlen_t
xfs_alloc_longest_free_extent(
struct xfs_mount *mp,
- struct xfs_perag *pag)
+ struct xfs_perag *pag,
+ xfs_extlen_t need)
{
- xfs_extlen_t need, delta = 0;
+ xfs_extlen_t delta = 0;
- need = XFS_MIN_FREELIST_PAG(pag, mp);
if (need > pag->pagf_flcount)
delta = need - pag->pagf_flcount;
@@ -1838,131 +1870,150 @@ xfs_alloc_longest_free_extent(
return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
}
+unsigned int
+xfs_alloc_min_freelist(
+ struct xfs_mount *mp,
+ struct xfs_perag *pag)
+{
+ unsigned int min_free;
+
+ /* space needed by-bno freespace btree */
+ min_free = min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_BNOi] + 1,
+ mp->m_ag_maxlevels);
+ /* space needed by-size freespace btree */
+ min_free += min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_CNTi] + 1,
+ mp->m_ag_maxlevels);
+
+ return min_free;
+}
+
+/*
+ * Check if the operation we are fixing up the freelist for should go ahead or
+ * not. If we are freeing blocks, we always allow it, otherwise the allocation
+ * is dependent on whether the size and shape of free space available will
+ * permit the requested allocation to take place.
+ */
+static bool
+xfs_alloc_space_available(
+ struct xfs_alloc_arg *args,
+ xfs_extlen_t min_free,
+ int flags)
+{
+ struct xfs_perag *pag = args->pag;
+ xfs_extlen_t longest;
+ int available;
+
+ if (flags & XFS_ALLOC_FLAG_FREEING)
+ return true;
+
+ /* do we have enough contiguous free space for the allocation? */
+ longest = xfs_alloc_longest_free_extent(args->mp, pag, min_free);
+ if ((args->minlen + args->alignment + args->minalignslop - 1) > longest)
+ return false;
+
+ /* do have enough free space remaining for the allocation? */
+ available = (int)(pag->pagf_freeblks + pag->pagf_flcount -
+ min_free - args->total);
+ if (available < (int)args->minleft)
+ return false;
+
+ return true;
+}
+
/*
* Decide whether to use this allocation group for this allocation.
* If so, fix up the btree freelist's size.
*/
STATIC int /* error */
xfs_alloc_fix_freelist(
- xfs_alloc_arg_t *args, /* allocation argument structure */
- int flags) /* XFS_ALLOC_FLAG_... */
+ struct xfs_alloc_arg *args, /* allocation argument structure */
+ int flags) /* XFS_ALLOC_FLAG_... */
{
- xfs_buf_t *agbp; /* agf buffer pointer */
- xfs_agf_t *agf; /* a.g. freespace structure pointer */
- xfs_buf_t *agflbp;/* agfl buffer pointer */
- xfs_agblock_t bno; /* freelist block */
- xfs_extlen_t delta; /* new blocks needed in freelist */
- int error; /* error result code */
- xfs_extlen_t longest;/* longest extent in allocation group */
- xfs_mount_t *mp; /* file system mount point structure */
- xfs_extlen_t need; /* total blocks needed in freelist */
- xfs_perag_t *pag; /* per-ag information structure */
- xfs_alloc_arg_t targs; /* local allocation arguments */
- xfs_trans_t *tp; /* transaction pointer */
-
- mp = args->mp;
+ struct xfs_mount *mp = args->mp;
+ struct xfs_perag *pag = args->pag;
+ struct xfs_trans *tp = args->tp;
+ struct xfs_buf *agbp = NULL;
+ struct xfs_buf *agflbp = NULL;
+ struct xfs_alloc_arg targs; /* local allocation arguments */
+ xfs_agblock_t bno; /* freelist block */
+ xfs_extlen_t need; /* total blocks needed in freelist */
+ int error;
- pag = args->pag;
- tp = args->tp;
if (!pag->pagf_init) {
- if ((error = xfs_alloc_read_agf(mp, tp, args->agno, flags,
- &agbp)))
- return error;
+ error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp);
+ if (error)
+ goto out_no_agbp;
if (!pag->pagf_init) {
ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
- args->agbp = NULL;
- return 0;
+ goto out_agbp_relse;
}
- } else
- agbp = NULL;
+ }
/*
- * If this is a metadata preferred pag and we are user data
- * then try somewhere else if we are not being asked to
- * try harder at this point
+ * If this is a metadata preferred pag and we are user data then try
+ * somewhere else if we are not being asked to try harder at this
+ * point
*/
if (pag->pagf_metadata && args->userdata &&
(flags & XFS_ALLOC_FLAG_TRYLOCK)) {
ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
- args->agbp = NULL;
- return 0;
+ goto out_agbp_relse;
}
- if (!(flags & XFS_ALLOC_FLAG_FREEING)) {
- /*
- * If it looks like there isn't a long enough extent, or enough
- * total blocks, reject it.
- */
- need = XFS_MIN_FREELIST_PAG(pag, mp);
- longest = xfs_alloc_longest_free_extent(mp, pag);
- if ((args->minlen + args->alignment + args->minalignslop - 1) >
- longest ||
- ((int)(pag->pagf_freeblks + pag->pagf_flcount -
- need - args->total) < (int)args->minleft)) {
- if (agbp)
- xfs_trans_brelse(tp, agbp);
- args->agbp = NULL;
- return 0;
- }
- }
+ need = xfs_alloc_min_freelist(mp, pag);
+ if (!xfs_alloc_space_available(args, need, flags))
+ goto out_agbp_relse;
/*
* Get the a.g. freespace buffer.
* Can fail if we're not blocking on locks, and it's held.
*/
- if (agbp == NULL) {
- if ((error = xfs_alloc_read_agf(mp, tp, args->agno, flags,
- &agbp)))
- return error;
- if (agbp == NULL) {
+ if (!agbp) {
+ error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp);
+ if (error)
+ goto out_no_agbp;
+ if (!agbp) {
ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
- args->agbp = NULL;
- return 0;
- }
- }
- /*
- * Figure out how many blocks we should have in the freelist.
- */
- agf = XFS_BUF_TO_AGF(agbp);
- need = XFS_MIN_FREELIST(agf, mp);
- /*
- * If there isn't enough total or single-extent, reject it.
- */
- if (!(flags & XFS_ALLOC_FLAG_FREEING)) {
- delta = need > be32_to_cpu(agf->agf_flcount) ?
- (need - be32_to_cpu(agf->agf_flcount)) : 0;
- longest = be32_to_cpu(agf->agf_longest);
- longest = (longest > delta) ? (longest - delta) :
- (be32_to_cpu(agf->agf_flcount) > 0 || longest > 0);
- if ((args->minlen + args->alignment + args->minalignslop - 1) >
- longest ||
- ((int)(be32_to_cpu(agf->agf_freeblks) +
- be32_to_cpu(agf->agf_flcount) - need - args->total) <
- (int)args->minleft)) {
- xfs_trans_brelse(tp, agbp);
- args->agbp = NULL;
- return 0;
+ goto out_no_agbp;
}
}
+
+ /* If there isn't enough total space or single-extent, reject it. */
+ need = xfs_alloc_min_freelist(mp, pag);
+ if (!xfs_alloc_space_available(args, need, flags))
+ goto out_agbp_relse;
+
/*
* Make the freelist shorter if it's too long.
+ *
+ * Note that from this point onwards, we will always release the agf and
+ * agfl buffers on error. This handles the case where we error out and
+ * the buffers are clean or may not have been joined to the transaction
+ * and hence need to be released manually. If they have been joined to
+ * the transaction, then xfs_trans_brelse() will handle them
+ * appropriately based on the recursion count and dirty state of the
+ * buffer.
+ *
+ * XXX (dgc): When we have lots of free space, does this buy us
+ * anything other than extra overhead when we need to put more blocks
+ * back on the free list? Maybe we should only do this when space is
+ * getting low or the AGFL is more than half full?
*/
- while (be32_to_cpu(agf->agf_flcount) > need) {
- xfs_buf_t *bp;
+ while (pag->pagf_flcount > need) {
+ struct xfs_buf *bp;
error = xfs_alloc_get_freelist(tp, agbp, &bno, 0);
if (error)
- return error;
- if ((error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 1)))
- return error;
+ goto out_agbp_relse;
+ error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 1);
+ if (error)
+ goto out_agbp_relse;
bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0);
xfs_trans_binval(tp, bp);
}
- /*
- * Initialize the args structure.
- */
+
memset(&targs, 0, sizeof(targs));
targs.tp = tp;
targs.mp = mp;
@@ -1971,21 +2022,20 @@ xfs_alloc_fix_freelist(
targs.alignment = targs.minlen = targs.prod = targs.isfl = 1;
targs.type = XFS_ALLOCTYPE_THIS_AG;
targs.pag = pag;
- if ((error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp)))
- return error;
- /*
- * Make the freelist longer if it's too short.
- */
- while (be32_to_cpu(agf->agf_flcount) < need) {
+ error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp);
+ if (error)
+ goto out_agbp_relse;
+
+ /* Make the freelist longer if it's too short. */
+ while (pag->pagf_flcount < need) {
targs.agbno = 0;
- targs.maxlen = need - be32_to_cpu(agf->agf_flcount);
- /*
- * Allocate as many blocks as possible at once.
- */
- if ((error = xfs_alloc_ag_vextent(&targs))) {
- xfs_trans_brelse(tp, agflbp);
- return error;
- }
+ targs.maxlen = need - pag->pagf_flcount;
+
+ /* Allocate as many blocks as possible at once. */
+ error = xfs_alloc_ag_vextent(&targs);
+ if (error)
+ goto out_agflbp_relse;
+
/*
* Stop if we run out. Won't happen if callers are obeying
* the restrictions correctly. Can happen for free calls
@@ -1994,9 +2044,7 @@ xfs_alloc_fix_freelist(
if (targs.agbno == NULLAGBLOCK) {
if (flags & XFS_ALLOC_FLAG_FREEING)
break;
- xfs_trans_brelse(tp, agflbp);
- args->agbp = NULL;
- return 0;
+ goto out_agflbp_relse;
}
/*
* Put each allocated block on the list.
@@ -2005,12 +2053,21 @@ xfs_alloc_fix_freelist(
error = xfs_alloc_put_freelist(tp, agbp,
agflbp, bno, 0);
if (error)
- return error;
+ goto out_agflbp_relse;
}
}
xfs_trans_brelse(tp, agflbp);
args->agbp = agbp;
return 0;
+
+out_agflbp_relse:
+ xfs_trans_brelse(tp, agflbp);
+out_agbp_relse:
+ if (agbp)
+ xfs_trans_brelse(tp, agbp);
+out_no_agbp:
+ args->agbp = NULL;
+ return error;
}
/*
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
index d1b4b6a5c894..ca1c8168373a 100644
--- a/fs/xfs/libxfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
@@ -112,6 +112,8 @@ typedef struct xfs_alloc_arg {
xfs_extlen_t total; /* total blocks needed in xaction */
xfs_extlen_t alignment; /* align answer to multiple of this */
xfs_extlen_t minalignslop; /* slop for minlen+alignment calcs */
+ xfs_agblock_t min_agbno; /* set an agbno range for NEAR allocs */
+ xfs_agblock_t max_agbno; /* ... */
xfs_extlen_t len; /* output: actual size of extent */
xfs_alloctype_t type; /* allocation type XFS_ALLOCTYPE_... */
xfs_alloctype_t otype; /* original allocation type */
@@ -128,11 +130,9 @@ typedef struct xfs_alloc_arg {
#define XFS_ALLOC_USERDATA 1 /* allocation is for user data*/
#define XFS_ALLOC_INITIAL_USER_DATA 2 /* special case start of file */
-/*
- * Find the length of the longest extent in an AG.
- */
-xfs_extlen_t
-xfs_alloc_longest_free_extent(struct xfs_mount *mp,
+xfs_extlen_t xfs_alloc_longest_free_extent(struct xfs_mount *mp,
+ struct xfs_perag *pag, xfs_extlen_t need);
+unsigned int xfs_alloc_min_freelist(struct xfs_mount *mp,
struct xfs_perag *pag);
/*
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index 0a472fbe06d4..3349c9a1e845 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -266,7 +266,7 @@ xfs_attr_set(
tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
error = xfs_trans_reserve(args.trans, &tres, args.total, 0);
if (error) {
- xfs_trans_cancel(args.trans, 0);
+ xfs_trans_cancel(args.trans);
return error;
}
xfs_ilock(dp, XFS_ILOCK_EXCL);
@@ -276,7 +276,7 @@ xfs_attr_set(
XFS_QMOPT_RES_REGBLKS);
if (error) {
xfs_iunlock(dp, XFS_ILOCK_EXCL);
- xfs_trans_cancel(args.trans, XFS_TRANS_RELEASE_LOG_RES);
+ xfs_trans_cancel(args.trans);
return error;
}
@@ -320,8 +320,7 @@ xfs_attr_set(
xfs_trans_ichgtime(args.trans, dp,
XFS_ICHGTIME_CHG);
}
- err2 = xfs_trans_commit(args.trans,
- XFS_TRANS_RELEASE_LOG_RES);
+ err2 = xfs_trans_commit(args.trans);
xfs_iunlock(dp, XFS_ILOCK_EXCL);
return error ? error : err2;
@@ -383,16 +382,14 @@ xfs_attr_set(
* Commit the last in the sequence of transactions.
*/
xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE);
- error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES);
+ error = xfs_trans_commit(args.trans);
xfs_iunlock(dp, XFS_ILOCK_EXCL);
return error;
out:
- if (args.trans) {
- xfs_trans_cancel(args.trans,
- XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
- }
+ if (args.trans)
+ xfs_trans_cancel(args.trans);
xfs_iunlock(dp, XFS_ILOCK_EXCL);
return error;
}
@@ -462,7 +459,7 @@ xfs_attr_remove(
error = xfs_trans_reserve(args.trans, &M_RES(mp)->tr_attrrm,
XFS_ATTRRM_SPACE_RES(mp), 0);
if (error) {
- xfs_trans_cancel(args.trans, 0);
+ xfs_trans_cancel(args.trans);
return error;
}
@@ -501,16 +498,14 @@ xfs_attr_remove(
* Commit the last in the sequence of transactions.
*/
xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE);
- error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES);
+ error = xfs_trans_commit(args.trans);
xfs_iunlock(dp, XFS_ILOCK_EXCL);
return error;
out:
- if (args.trans) {
- xfs_trans_cancel(args.trans,
- XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
- }
+ if (args.trans)
+ xfs_trans_cancel(args.trans);
xfs_iunlock(dp, XFS_ILOCK_EXCL);
return error;
}
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index f1026e86dabc..63e05b663380 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -1112,7 +1112,6 @@ xfs_bmap_add_attrfork(
int committed; /* xaction was committed */
int logflags; /* logging flags */
int error; /* error return value */
- int cancel_flags = 0;
ASSERT(XFS_IFORK_Q(ip) == 0);
@@ -1124,17 +1123,15 @@ xfs_bmap_add_attrfork(
tp->t_flags |= XFS_TRANS_RESERVE;
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_addafork, blks, 0);
if (error) {
- xfs_trans_cancel(tp, 0);
+ xfs_trans_cancel(tp);
return error;
}
- cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
xfs_ilock(ip, XFS_ILOCK_EXCL);
error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ?
XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
XFS_QMOPT_RES_REGBLKS);
if (error)
goto trans_cancel;
- cancel_flags |= XFS_TRANS_ABORT;
if (XFS_IFORK_Q(ip))
goto trans_cancel;
if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS) {
@@ -1218,14 +1215,14 @@ xfs_bmap_add_attrfork(
error = xfs_bmap_finish(&tp, &flist, &committed);
if (error)
goto bmap_cancel;
- error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+ error = xfs_trans_commit(tp);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
return error;
bmap_cancel:
xfs_bmap_cancel(&flist);
trans_cancel:
- xfs_trans_cancel(tp, cancel_flags);
+ xfs_trans_cancel(tp);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
return error;
}
@@ -3521,7 +3518,8 @@ xfs_bmap_longest_free_extent(
}
}
- longest = xfs_alloc_longest_free_extent(mp, pag);
+ longest = xfs_alloc_longest_free_extent(mp, pag,
+ xfs_alloc_min_freelist(mp, pag));
if (*blen < longest)
*blen = longest;
@@ -4424,7 +4422,15 @@ xfs_bmapi_convert_unwritten(
error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, &bma->idx,
&bma->cur, mval, bma->firstblock, bma->flist,
&tmp_logflags);
- bma->logflags |= tmp_logflags;
+ /*
+ * Log the inode core unconditionally in the unwritten extent conversion
+ * path because the conversion might not have done so (e.g., if the
+ * extent count hasn't changed). We need to make sure the inode is dirty
+ * in the transaction for the sake of fsync(), even if nothing has
+ * changed, because fsync() will not force the log for this transaction
+ * unless it sees the inode pinned.
+ */
+ bma->logflags |= tmp_logflags | XFS_ILOG_CORE;
if (error)
return error;
@@ -5918,7 +5924,7 @@ xfs_bmap_split_extent(
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
XFS_DIOSTRAT_SPACE_RES(mp, 0), 0);
if (error) {
- xfs_trans_cancel(tp, 0);
+ xfs_trans_cancel(tp);
return error;
}
@@ -5936,10 +5942,9 @@ xfs_bmap_split_extent(
if (error)
goto out;
- return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
-
+ return xfs_trans_commit(tp);
out:
- xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
+ xfs_trans_cancel(tp);
return error;
}
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 4daaa662337b..a0ae572051de 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -170,7 +170,7 @@ typedef struct xfs_sb {
__uint32_t sb_features_log_incompat;
__uint32_t sb_crc; /* superblock crc */
- __uint32_t sb_pad;
+ xfs_extlen_t sb_spino_align; /* sparse inode chunk alignment */
xfs_ino_t sb_pquotino; /* project quota inode */
xfs_lsn_t sb_lsn; /* last write sequence */
@@ -256,7 +256,7 @@ typedef struct xfs_dsb {
__be32 sb_features_log_incompat;
__le32 sb_crc; /* superblock crc */
- __be32 sb_pad;
+ __be32 sb_spino_align; /* sparse inode chunk alignment */
__be64 sb_pquotino; /* project quota inode */
__be64 sb_lsn; /* last write sequence */
@@ -457,8 +457,10 @@ xfs_sb_has_ro_compat_feature(
}
#define XFS_SB_FEAT_INCOMPAT_FTYPE (1 << 0) /* filetype in dirent */
+#define XFS_SB_FEAT_INCOMPAT_SPINODES (1 << 1) /* sparse inode chunks */
#define XFS_SB_FEAT_INCOMPAT_ALL \
- (XFS_SB_FEAT_INCOMPAT_FTYPE)
+ (XFS_SB_FEAT_INCOMPAT_FTYPE| \
+ XFS_SB_FEAT_INCOMPAT_SPINODES)
#define XFS_SB_FEAT_INCOMPAT_UNKNOWN ~XFS_SB_FEAT_INCOMPAT_ALL
static inline bool
@@ -506,6 +508,12 @@ static inline int xfs_sb_version_hasfinobt(xfs_sb_t *sbp)
(sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_FINOBT);
}
+static inline bool xfs_sb_version_hassparseinodes(struct xfs_sb *sbp)
+{
+ return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 &&
+ xfs_sb_has_incompat_feature(sbp, XFS_SB_FEAT_INCOMPAT_SPINODES);
+}
+
/*
* end of superblock version macros
*/
@@ -758,19 +766,6 @@ typedef struct xfs_agfl {
#define XFS_AGFL_CRC_OFF offsetof(struct xfs_agfl, agfl_crc)
-
-#define XFS_AG_MAXLEVELS(mp) ((mp)->m_ag_maxlevels)
-#define XFS_MIN_FREELIST_RAW(bl,cl,mp) \
- (MIN(bl + 1, XFS_AG_MAXLEVELS(mp)) + MIN(cl + 1, XFS_AG_MAXLEVELS(mp)))
-#define XFS_MIN_FREELIST(a,mp) \
- (XFS_MIN_FREELIST_RAW( \
- be32_to_cpu((a)->agf_levels[XFS_BTNUM_BNOi]), \
- be32_to_cpu((a)->agf_levels[XFS_BTNUM_CNTi]), mp))
-#define XFS_MIN_FREELIST_PAG(pag,mp) \
- (XFS_MIN_FREELIST_RAW( \
- (unsigned int)(pag)->pagf_levels[XFS_BTNUM_BNOi], \
- (unsigned int)(pag)->pagf_levels[XFS_BTNUM_CNTi], mp))
-
#define XFS_AGB_TO_FSB(mp,agno,agbno) \
(((xfs_fsblock_t)(agno) << (mp)->m_sb.sb_agblklog) | (agbno))
#define XFS_FSB_TO_AGNO(mp,fsbno) \
@@ -1216,26 +1211,54 @@ typedef __uint64_t xfs_inofree_t;
#define XFS_INOBT_ALL_FREE ((xfs_inofree_t)-1)
#define XFS_INOBT_MASK(i) ((xfs_inofree_t)1 << (i))
+#define XFS_INOBT_HOLEMASK_FULL 0 /* holemask for full chunk */
+#define XFS_INOBT_HOLEMASK_BITS (NBBY * sizeof(__uint16_t))
+#define XFS_INODES_PER_HOLEMASK_BIT \
+ (XFS_INODES_PER_CHUNK / (NBBY * sizeof(__uint16_t)))
+
static inline xfs_inofree_t xfs_inobt_maskn(int i, int n)
{
return ((n >= XFS_INODES_PER_CHUNK ? 0 : XFS_INOBT_MASK(n)) - 1) << i;
}
/*
- * Data record structure
+ * The on-disk inode record structure has two formats. The original "full"
+ * format uses a 4-byte freecount. The "sparse" format uses a 1-byte freecount
+ * and replaces the 3 high-order freecount bytes wth the holemask and inode
+ * count.
+ *
+ * The holemask of the sparse record format allows an inode chunk to have holes
+ * that refer to blocks not owned by the inode record. This facilitates inode
+ * allocation in the event of severe free space fragmentation.
*/
typedef struct xfs_inobt_rec {
__be32 ir_startino; /* starting inode number */
- __be32 ir_freecount; /* count of free inodes (set bits) */
+ union {
+ struct {
+ __be32 ir_freecount; /* count of free inodes */
+ } f;
+ struct {
+ __be16 ir_holemask;/* hole mask for sparse chunks */
+ __u8 ir_count; /* total inode count */
+ __u8 ir_freecount; /* count of free inodes */
+ } sp;
+ } ir_u;
__be64 ir_free; /* free inode mask */
} xfs_inobt_rec_t;
typedef struct xfs_inobt_rec_incore {
xfs_agino_t ir_startino; /* starting inode number */
- __int32_t ir_freecount; /* count of free inodes (set bits) */
+ __uint16_t ir_holemask; /* hole mask for sparse chunks */
+ __uint8_t ir_count; /* total inode count */
+ __uint8_t ir_freecount; /* count of free inodes (set bits) */
xfs_inofree_t ir_free; /* free inode mask */
} xfs_inobt_rec_incore_t;
+static inline bool xfs_inobt_issparse(uint16_t holemask)
+{
+ /* non-zero holemask represents a sparse rec. */
+ return holemask;
+}
/*
* Key structure
@@ -1453,8 +1476,8 @@ struct xfs_acl {
sizeof(struct xfs_acl_entry) * XFS_ACL_MAX_ENTRIES((mp)))
/* On-disk XFS extended attribute names */
-#define SGI_ACL_FILE (unsigned char *)"SGI_ACL_FILE"
-#define SGI_ACL_DEFAULT (unsigned char *)"SGI_ACL_DEFAULT"
+#define SGI_ACL_FILE "SGI_ACL_FILE"
+#define SGI_ACL_DEFAULT "SGI_ACL_DEFAULT"
#define SGI_ACL_FILE_SIZE (sizeof(SGI_ACL_FILE)-1)
#define SGI_ACL_DEFAULT_SIZE (sizeof(SGI_ACL_DEFAULT)-1)
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index 18dc721ca19f..89689c6a43e2 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -239,6 +239,7 @@ typedef struct xfs_fsop_resblks {
#define XFS_FSOP_GEOM_FLAGS_V5SB 0x8000 /* version 5 superblock */
#define XFS_FSOP_GEOM_FLAGS_FTYPE 0x10000 /* inode directory types */
#define XFS_FSOP_GEOM_FLAGS_FINOBT 0x20000 /* free inode btree */
+#define XFS_FSOP_GEOM_FLAGS_SPINODES 0x40000 /* sparse inode chunks */
/*
* Minimum and maximum sizes need for growth checks.
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index 1c9e75521250..66efc702452a 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -65,6 +65,8 @@ xfs_inobt_lookup(
int *stat) /* success/failure */
{
cur->bc_rec.i.ir_startino = ino;
+ cur->bc_rec.i.ir_holemask = 0;
+ cur->bc_rec.i.ir_count = 0;
cur->bc_rec.i.ir_freecount = 0;
cur->bc_rec.i.ir_free = 0;
return xfs_btree_lookup(cur, dir, stat);
@@ -82,7 +84,14 @@ xfs_inobt_update(
union xfs_btree_rec rec;
rec.inobt.ir_startino = cpu_to_be32(irec->ir_startino);
- rec.inobt.ir_freecount = cpu_to_be32(irec->ir_freecount);
+ if (xfs_sb_version_hassparseinodes(&cur->bc_mp->m_sb)) {
+ rec.inobt.ir_u.sp.ir_holemask = cpu_to_be16(irec->ir_holemask);
+ rec.inobt.ir_u.sp.ir_count = irec->ir_count;
+ rec.inobt.ir_u.sp.ir_freecount = irec->ir_freecount;
+ } else {
+ /* ir_holemask/ir_count not supported on-disk */
+ rec.inobt.ir_u.f.ir_freecount = cpu_to_be32(irec->ir_freecount);
+ }
rec.inobt.ir_free = cpu_to_be64(irec->ir_free);
return xfs_btree_update(cur, &rec);
}
@@ -100,12 +109,27 @@ xfs_inobt_get_rec(
int error;
error = xfs_btree_get_rec(cur, &rec, stat);
- if (!error && *stat == 1) {
- irec->ir_startino = be32_to_cpu(rec->inobt.ir_startino);
- irec->ir_freecount = be32_to_cpu(rec->inobt.ir_freecount);
- irec->ir_free = be64_to_cpu(rec->inobt.ir_free);
+ if (error || *stat == 0)
+ return error;
+
+ irec->ir_startino = be32_to_cpu(rec->inobt.ir_startino);
+ if (xfs_sb_version_hassparseinodes(&cur->bc_mp->m_sb)) {
+ irec->ir_holemask = be16_to_cpu(rec->inobt.ir_u.sp.ir_holemask);
+ irec->ir_count = rec->inobt.ir_u.sp.ir_count;
+ irec->ir_freecount = rec->inobt.ir_u.sp.ir_freecount;
+ } else {
+ /*
+ * ir_holemask/ir_count not supported on-disk. Fill in hardcoded
+ * values for full inode chunks.
+ */
+ irec->ir_holemask = XFS_INOBT_HOLEMASK_FULL;
+ irec->ir_count = XFS_INODES_PER_CHUNK;
+ irec->ir_freecount =
+ be32_to_cpu(rec->inobt.ir_u.f.ir_freecount);
}
- return error;
+ irec->ir_free = be64_to_cpu(rec->inobt.ir_free);
+
+ return 0;
}
/*
@@ -114,10 +138,14 @@ xfs_inobt_get_rec(
STATIC int
xfs_inobt_insert_rec(
struct xfs_btree_cur *cur,
+ __uint16_t holemask,
+ __uint8_t count,
__int32_t freecount,
xfs_inofree_t free,
int *stat)
{
+ cur->bc_rec.i.ir_holemask = holemask;
+ cur->bc_rec.i.ir_count = count;
cur->bc_rec.i.ir_freecount = freecount;
cur->bc_rec.i.ir_free = free;
return xfs_btree_insert(cur, stat);
@@ -154,7 +182,9 @@ xfs_inobt_insert(
}
ASSERT(i == 0);
- error = xfs_inobt_insert_rec(cur, XFS_INODES_PER_CHUNK,
+ error = xfs_inobt_insert_rec(cur, XFS_INOBT_HOLEMASK_FULL,
+ XFS_INODES_PER_CHUNK,
+ XFS_INODES_PER_CHUNK,
XFS_INOBT_ALL_FREE, &i);
if (error) {
xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
@@ -220,6 +250,7 @@ xfs_ialloc_inode_init(
struct xfs_mount *mp,
struct xfs_trans *tp,
struct list_head *buffer_list,
+ int icount,
xfs_agnumber_t agno,
xfs_agblock_t agbno,
xfs_agblock_t length,
@@ -275,7 +306,7 @@ xfs_ialloc_inode_init(
* they track in the AIL as if they were physically logged.
*/
if (tp)
- xfs_icreate_log(tp, agno, agbno, mp->m_ialloc_inos,
+ xfs_icreate_log(tp, agno, agbno, icount,
mp->m_sb.sb_inodesize, length, gen);
} else
version = 2;
@@ -347,6 +378,214 @@ xfs_ialloc_inode_init(
}
/*
+ * Align startino and allocmask for a recently allocated sparse chunk such that
+ * they are fit for insertion (or merge) into the on-disk inode btrees.
+ *
+ * Background:
+ *
+ * When enabled, sparse inode support increases the inode alignment from cluster
+ * size to inode chunk size. This means that the minimum range between two
+ * non-adjacent inode records in the inobt is large enough for a full inode
+ * record. This allows for cluster sized, cluster aligned block allocation
+ * without need to worry about whether the resulting inode record overlaps with
+ * another record in the tree. Without this basic rule, we would have to deal
+ * with the consequences of overlap by potentially undoing recent allocations in
+ * the inode allocation codepath.
+ *
+ * Because of this alignment rule (which is enforced on mount), there are two
+ * inobt possibilities for newly allocated sparse chunks. One is that the
+ * aligned inode record for the chunk covers a range of inodes not already
+ * covered in the inobt (i.e., it is safe to insert a new sparse record). The
+ * other is that a record already exists at the aligned startino that considers
+ * the newly allocated range as sparse. In the latter case, record content is
+ * merged in hope that sparse inode chunks fill to full chunks over time.
+ */
+STATIC void
+xfs_align_sparse_ino(
+ struct xfs_mount *mp,
+ xfs_agino_t *startino,
+ uint16_t *allocmask)
+{
+ xfs_agblock_t agbno;
+ xfs_agblock_t mod;
+ int offset;
+
+ agbno = XFS_AGINO_TO_AGBNO(mp, *startino);
+ mod = agbno % mp->m_sb.sb_inoalignmt;
+ if (!mod)
+ return;
+
+ /* calculate the inode offset and align startino */
+ offset = mod << mp->m_sb.sb_inopblog;
+ *startino -= offset;
+
+ /*
+ * Since startino has been aligned down, left shift allocmask such that
+ * it continues to represent the same physical inodes relative to the
+ * new startino.
+ */
+ *allocmask <<= offset / XFS_INODES_PER_HOLEMASK_BIT;
+}
+
+/*
+ * Determine whether the source inode record can merge into the target. Both
+ * records must be sparse, the inode ranges must match and there must be no
+ * allocation overlap between the records.
+ */
+STATIC bool
+__xfs_inobt_can_merge(
+ struct xfs_inobt_rec_incore *trec, /* tgt record */
+ struct xfs_inobt_rec_incore *srec) /* src record */
+{
+ uint64_t talloc;
+ uint64_t salloc;
+
+ /* records must cover the same inode range */
+ if (trec->ir_startino != srec->ir_startino)
+ return false;
+
+ /* both records must be sparse */
+ if (!xfs_inobt_issparse(trec->ir_holemask) ||
+ !xfs_inobt_issparse(srec->ir_holemask))
+ return false;
+
+ /* both records must track some inodes */
+ if (!trec->ir_count || !srec->ir_count)
+ return false;
+
+ /* can't exceed capacity of a full record */
+ if (trec->ir_count + srec->ir_count > XFS_INODES_PER_CHUNK)
+ return false;
+
+ /* verify there is no allocation overlap */
+ talloc = xfs_inobt_irec_to_allocmask(trec);
+ salloc = xfs_inobt_irec_to_allocmask(srec);
+ if (talloc & salloc)
+ return false;
+
+ return true;
+}
+
+/*
+ * Merge the source inode record into the target. The caller must call
+ * __xfs_inobt_can_merge() to ensure the merge is valid.
+ */
+STATIC void
+__xfs_inobt_rec_merge(
+ struct xfs_inobt_rec_incore *trec, /* target */
+ struct xfs_inobt_rec_incore *srec) /* src */
+{
+ ASSERT(trec->ir_startino == srec->ir_startino);
+
+ /* combine the counts */
+ trec->ir_count += srec->ir_count;
+ trec->ir_freecount += srec->ir_freecount;
+
+ /*
+ * Merge the holemask and free mask. For both fields, 0 bits refer to
+ * allocated inodes. We combine the allocated ranges with bitwise AND.
+ */
+ trec->ir_holemask &= srec->ir_holemask;
+ trec->ir_free &= srec->ir_free;
+}
+
+/*
+ * Insert a new sparse inode chunk into the associated inode btree. The inode
+ * record for the sparse chunk is pre-aligned to a startino that should match
+ * any pre-existing sparse inode record in the tree. This allows sparse chunks
+ * to fill over time.
+ *
+ * This function supports two modes of handling preexisting records depending on
+ * the merge flag. If merge is true, the provided record is merged with the
+ * existing record and updated in place. The merged record is returned in nrec.
+ * If merge is false, an existing record is replaced with the provided record.
+ * If no preexisting record exists, the provided record is always inserted.
+ *
+ * It is considered corruption if a merge is requested and not possible. Given
+ * the sparse inode alignment constraints, this should never happen.
+ */
+STATIC int
+xfs_inobt_insert_sprec(
+ struct xfs_mount *mp,
+ struct xfs_trans *tp,
+ struct xfs_buf *agbp,
+ int btnum,
+ struct xfs_inobt_rec_incore *nrec, /* in/out: new/merged rec. */
+ bool merge) /* merge or replace */
+{
+ struct xfs_btree_cur *cur;
+ struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp);
+ xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno);
+ int error;
+ int i;
+ struct xfs_inobt_rec_incore rec;
+
+ cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, btnum);
+
+ /* the new record is pre-aligned so we know where to look */
+ error = xfs_inobt_lookup(cur, nrec->ir_startino, XFS_LOOKUP_EQ, &i);
+ if (error)
+ goto error;
+ /* if nothing there, insert a new record and return */
+ if (i == 0) {
+ error = xfs_inobt_insert_rec(cur, nrec->ir_holemask,
+ nrec->ir_count, nrec->ir_freecount,
+ nrec->ir_free, &i);
+ if (error)
+ goto error;
+ XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error);
+
+ goto out;
+ }
+
+ /*
+ * A record exists at this startino. Merge or replace the record
+ * depending on what we've been asked to do.
+ */
+ if (merge) {
+ error = xfs_inobt_get_rec(cur, &rec, &i);
+ if (error)
+ goto error;
+ XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error);
+ XFS_WANT_CORRUPTED_GOTO(mp,
+ rec.ir_startino == nrec->ir_startino,
+ error);
+
+ /*
+ * This should never fail. If we have coexisting records that
+ * cannot merge, something is seriously wrong.
+ */
+ XFS_WANT_CORRUPTED_GOTO(mp, __xfs_inobt_can_merge(nrec, &rec),
+ error);
+
+ trace_xfs_irec_merge_pre(mp, agno, rec.ir_startino,
+ rec.ir_holemask, nrec->ir_startino,
+ nrec->ir_holemask);
+
+ /* merge to nrec to output the updated record */
+ __xfs_inobt_rec_merge(nrec, &rec);
+
+ trace_xfs_irec_merge_post(mp, agno, nrec->ir_startino,
+ nrec->ir_holemask);
+
+ error = xfs_inobt_rec_check_count(mp, nrec);
+ if (error)
+ goto error;
+ }
+
+ error = xfs_inobt_update(cur, nrec);
+ if (error)
+ goto error;
+
+out:
+ xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+ return 0;
+error:
+ xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+ return error;
+}
+
+/*
* Allocate new inodes in the allocation group specified by agbp.
* Return 0 for success, else error code.
*/
@@ -364,11 +603,22 @@ xfs_ialloc_ag_alloc(
xfs_agino_t newlen; /* new number of inodes */
int isaligned = 0; /* inode allocation at stripe unit */
/* boundary */
+ uint16_t allocmask = (uint16_t) -1; /* init. to full chunk */
+ struct xfs_inobt_rec_incore rec;
struct xfs_perag *pag;
+ int do_sparse = 0;
memset(&args, 0, sizeof(args));
args.tp = tp;
args.mp = tp->t_mountp;
+ args.fsbno = NULLFSBLOCK;
+
+#ifdef DEBUG
+ /* randomly do sparse inode allocations */
+ if (xfs_sb_version_hassparseinodes(&tp->t_mountp->m_sb) &&
+ args.mp->m_ialloc_min_blks < args.mp->m_ialloc_blks)
+ do_sparse = prandom_u32() & 1;
+#endif
/*
* Locking will ensure that we don't have two callers in here
@@ -390,6 +640,8 @@ xfs_ialloc_ag_alloc(
agno = be32_to_cpu(agi->agi_seqno);
args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) +
args.mp->m_ialloc_blks;
+ if (do_sparse)
+ goto sparse_alloc;
if (likely(newino != NULLAGINO &&
(args.agbno < be32_to_cpu(agi->agi_length)))) {
args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno);
@@ -428,8 +680,7 @@ xfs_ialloc_ag_alloc(
* subsequent requests.
*/
args.minalignslop = 0;
- } else
- args.fsbno = NULLFSBLOCK;
+ }
if (unlikely(args.fsbno == NULLFSBLOCK)) {
/*
@@ -480,6 +731,47 @@ xfs_ialloc_ag_alloc(
return error;
}
+ /*
+ * Finally, try a sparse allocation if the filesystem supports it and
+ * the sparse allocation length is smaller than a full chunk.
+ */
+ if (xfs_sb_version_hassparseinodes(&args.mp->m_sb) &&
+ args.mp->m_ialloc_min_blks < args.mp->m_ialloc_blks &&
+ args.fsbno == NULLFSBLOCK) {
+sparse_alloc:
+ args.type = XFS_ALLOCTYPE_NEAR_BNO;
+ args.agbno = be32_to_cpu(agi->agi_root);
+ args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno);
+ args.alignment = args.mp->m_sb.sb_spino_align;
+ args.prod = 1;
+
+ args.minlen = args.mp->m_ialloc_min_blks;
+ args.maxlen = args.minlen;
+
+ /*
+ * The inode record will be aligned to full chunk size. We must
+ * prevent sparse allocation from AG boundaries that result in
+ * invalid inode records, such as records that start at agbno 0
+ * or extend beyond the AG.
+ *
+ * Set min agbno to the first aligned, non-zero agbno and max to
+ * the last aligned agbno that is at least one full chunk from
+ * the end of the AG.
+ */
+ args.min_agbno = args.mp->m_sb.sb_inoalignmt;
+ args.max_agbno = round_down(args.mp->m_sb.sb_agblocks,
+ args.mp->m_sb.sb_inoalignmt) -
+ args.mp->m_ialloc_blks;
+
+ error = xfs_alloc_vextent(&args);
+ if (error)
+ return error;
+
+ newlen = args.len << args.mp->m_sb.sb_inopblog;
+ ASSERT(newlen <= XFS_INODES_PER_CHUNK);
+ allocmask = (1 << (newlen / XFS_INODES_PER_HOLEMASK_BIT)) - 1;
+ }
+
if (args.fsbno == NULLFSBLOCK) {
*alloc = 0;
return 0;
@@ -495,8 +787,8 @@ xfs_ialloc_ag_alloc(
* rather than a linear progression to prevent the next generation
* number from being easily guessable.
*/
- error = xfs_ialloc_inode_init(args.mp, tp, NULL, agno, args.agbno,
- args.len, prandom_u32());
+ error = xfs_ialloc_inode_init(args.mp, tp, NULL, newlen, agno,
+ args.agbno, args.len, prandom_u32());
if (error)
return error;
@@ -504,6 +796,73 @@ xfs_ialloc_ag_alloc(
* Convert the results.
*/
newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0);
+
+ if (xfs_inobt_issparse(~allocmask)) {
+ /*
+ * We've allocated a sparse chunk. Align the startino and mask.
+ */
+ xfs_align_sparse_ino(args.mp, &newino, &allocmask);
+
+ rec.ir_startino = newino;
+ rec.ir_holemask = ~allocmask;
+ rec.ir_count = newlen;
+ rec.ir_freecount = newlen;
+ rec.ir_free = XFS_INOBT_ALL_FREE;
+
+ /*
+ * Insert the sparse record into the inobt and allow for a merge
+ * if necessary. If a merge does occur, rec is updated to the
+ * merged record.
+ */
+ error = xfs_inobt_insert_sprec(args.mp, tp, agbp, XFS_BTNUM_INO,
+ &rec, true);
+ if (error == -EFSCORRUPTED) {
+ xfs_alert(args.mp,
+ "invalid sparse inode record: ino 0x%llx holemask 0x%x count %u",
+ XFS_AGINO_TO_INO(args.mp, agno,
+ rec.ir_startino),
+ rec.ir_holemask, rec.ir_count);
+ xfs_force_shutdown(args.mp, SHUTDOWN_CORRUPT_INCORE);
+ }
+ if (error)
+ return error;
+
+ /*
+ * We can't merge the part we've just allocated as for the inobt
+ * due to finobt semantics. The original record may or may not
+ * exist independent of whether physical inodes exist in this
+ * sparse chunk.
+ *
+ * We must update the finobt record based on the inobt record.
+ * rec contains the fully merged and up to date inobt record
+ * from the previous call. Set merge false to replace any
+ * existing record with this one.
+ */
+ if (xfs_sb_version_hasfinobt(&args.mp->m_sb)) {
+ error = xfs_inobt_insert_sprec(args.mp, tp, agbp,
+ XFS_BTNUM_FINO, &rec,
+ false);
+ if (error)
+ return error;
+ }
+ } else {
+ /* full chunk - insert new records to both btrees */
+ error = xfs_inobt_insert(args.mp, tp, agbp, newino, newlen,
+ XFS_BTNUM_INO);
+ if (error)
+ return error;
+
+ if (xfs_sb_version_hasfinobt(&args.mp->m_sb)) {
+ error = xfs_inobt_insert(args.mp, tp, agbp, newino,
+ newlen, XFS_BTNUM_FINO);
+ if (error)
+ return error;
+ }
+ }
+
+ /*
+ * Update AGI counts and newino.
+ */
be32_add_cpu(&agi->agi_count, newlen);
be32_add_cpu(&agi->agi_freecount, newlen);
pag = xfs_perag_get(args.mp, agno);
@@ -512,20 +871,6 @@ xfs_ialloc_ag_alloc(
agi->agi_newino = cpu_to_be32(newino);
/*
- * Insert records describing the new inode chunk into the btrees.
- */
- error = xfs_inobt_insert(args.mp, tp, agbp, newino, newlen,
- XFS_BTNUM_INO);
- if (error)
- return error;
-
- if (xfs_sb_version_hasfinobt(&args.mp->m_sb)) {
- error = xfs_inobt_insert(args.mp, tp, agbp, newino, newlen,
- XFS_BTNUM_FINO);
- if (error)
- return error;
- }
- /*
* Log allocation group header fields
*/
xfs_ialloc_log_agi(tp, agbp,
@@ -645,7 +990,7 @@ xfs_ialloc_ag_select(
* if we fail allocation due to alignment issues then it is most
* likely a real ENOSPC condition.
*/
- ineed = mp->m_ialloc_blks;
+ ineed = mp->m_ialloc_min_blks;
if (flags && ineed > 1)
ineed += xfs_ialloc_cluster_alignment(mp);
longest = pag->pagf_longest;
@@ -732,6 +1077,27 @@ xfs_ialloc_get_rec(
}
/*
+ * Return the offset of the first free inode in the record. If the inode chunk
+ * is sparsely allocated, we convert the record holemask to inode granularity
+ * and mask off the unallocated regions from the inode free mask.
+ */
+STATIC int
+xfs_inobt_first_free_inode(
+ struct xfs_inobt_rec_incore *rec)
+{
+ xfs_inofree_t realfree;
+
+ /* if there are no holes, return the first available offset */
+ if (!xfs_inobt_issparse(rec->ir_holemask))
+ return xfs_lowbit64(rec->ir_free);
+
+ realfree = xfs_inobt_irec_to_allocmask(rec);
+ realfree &= rec->ir_free;
+
+ return xfs_lowbit64(realfree);
+}
+
+/*
* Allocate an inode using the inobt-only algorithm.
*/
STATIC int
@@ -961,7 +1327,7 @@ newino:
}
alloc_inode:
- offset = xfs_lowbit64(rec.ir_free);
+ offset = xfs_inobt_first_free_inode(&rec);
ASSERT(offset >= 0);
ASSERT(offset < XFS_INODES_PER_CHUNK);
ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) %
@@ -1210,7 +1576,7 @@ xfs_dialloc_ag(
if (error)
goto error_cur;
- offset = xfs_lowbit64(rec.ir_free);
+ offset = xfs_inobt_first_free_inode(&rec);
ASSERT(offset >= 0);
ASSERT(offset < XFS_INODES_PER_CHUNK);
ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) %
@@ -1439,6 +1805,83 @@ out_error:
return error;
}
+/*
+ * Free the blocks of an inode chunk. We must consider that the inode chunk
+ * might be sparse and only free the regions that are allocated as part of the
+ * chunk.
+ */
+STATIC void
+xfs_difree_inode_chunk(
+ struct xfs_mount *mp,
+ xfs_agnumber_t agno,
+ struct xfs_inobt_rec_incore *rec,
+ struct xfs_bmap_free *flist)
+{
+ xfs_agblock_t sagbno = XFS_AGINO_TO_AGBNO(mp, rec->ir_startino);
+ int startidx, endidx;
+ int nextbit;
+ xfs_agblock_t agbno;
+ int contigblk;
+ DECLARE_BITMAP(holemask, XFS_INOBT_HOLEMASK_BITS);
+
+ if (!xfs_inobt_issparse(rec->ir_holemask)) {
+ /* not sparse, calculate extent info directly */
+ xfs_bmap_add_free(XFS_AGB_TO_FSB(mp, agno,
+ XFS_AGINO_TO_AGBNO(mp, rec->ir_startino)),
+ mp->m_ialloc_blks, flist, mp);
+ return;
+ }
+
+ /* holemask is only 16-bits (fits in an unsigned long) */
+ ASSERT(sizeof(rec->ir_holemask) <= sizeof(holemask[0]));
+ holemask[0] = rec->ir_holemask;
+
+ /*
+ * Find contiguous ranges of zeroes (i.e., allocated regions) in the
+ * holemask and convert the start/end index of each range to an extent.
+ * We start with the start and end index both pointing at the first 0 in
+ * the mask.
+ */
+ startidx = endidx = find_first_zero_bit(holemask,
+ XFS_INOBT_HOLEMASK_BITS);
+ nextbit = startidx + 1;
+ while (startidx < XFS_INOBT_HOLEMASK_BITS) {
+ nextbit = find_next_zero_bit(holemask, XFS_INOBT_HOLEMASK_BITS,
+ nextbit);
+ /*
+ * If the next zero bit is contiguous, update the end index of
+ * the current range and continue.
+ */
+ if (nextbit != XFS_INOBT_HOLEMASK_BITS &&
+ nextbit == endidx + 1) {
+ endidx = nextbit;
+ goto next;
+ }
+
+ /*
+ * nextbit is not contiguous with the current end index. Convert
+ * the current start/end to an extent and add it to the free
+ * list.
+ */
+ agbno = sagbno + (startidx * XFS_INODES_PER_HOLEMASK_BIT) /
+ mp->m_sb.sb_inopblock;
+ contigblk = ((endidx - startidx + 1) *
+ XFS_INODES_PER_HOLEMASK_BIT) /
+ mp->m_sb.sb_inopblock;
+
+ ASSERT(agbno % mp->m_sb.sb_spino_align == 0);
+ ASSERT(contigblk % mp->m_sb.sb_spino_align == 0);
+ xfs_bmap_add_free(XFS_AGB_TO_FSB(mp, agno, agbno), contigblk,
+ flist, mp);
+
+ /* reset range to current bit and carry on... */
+ startidx = endidx = nextbit;
+
+next:
+ nextbit++;
+ }
+}
+
STATIC int
xfs_difree_inobt(
struct xfs_mount *mp,
@@ -1446,8 +1889,7 @@ xfs_difree_inobt(
struct xfs_buf *agbp,
xfs_agino_t agino,
struct xfs_bmap_free *flist,
- int *deleted,
- xfs_ino_t *first_ino,
+ struct xfs_icluster *xic,
struct xfs_inobt_rec_incore *orec)
{
struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp);
@@ -1501,20 +1943,23 @@ xfs_difree_inobt(
rec.ir_freecount++;
/*
- * When an inode cluster is free, it becomes eligible for removal
+ * When an inode chunk is free, it becomes eligible for removal. Don't
+ * remove the chunk if the block size is large enough for multiple inode
+ * chunks (that might not be free).
*/
if (!(mp->m_flags & XFS_MOUNT_IKEEP) &&
- (rec.ir_freecount == mp->m_ialloc_inos)) {
-
- *deleted = 1;
- *first_ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino);
+ rec.ir_free == XFS_INOBT_ALL_FREE &&
+ mp->m_sb.sb_inopblock <= XFS_INODES_PER_CHUNK) {
+ xic->deleted = 1;
+ xic->first_ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino);
+ xic->alloc = xfs_inobt_irec_to_allocmask(&rec);
/*
* Remove the inode cluster from the AGI B+Tree, adjust the
* AGI and Superblock inode counts, and mark the disk space
* to be freed when the transaction is committed.
*/
- ilen = mp->m_ialloc_inos;
+ ilen = rec.ir_freecount;
be32_add_cpu(&agi->agi_count, -ilen);
be32_add_cpu(&agi->agi_freecount, -(ilen - 1));
xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT);
@@ -1530,11 +1975,9 @@ xfs_difree_inobt(
goto error0;
}
- xfs_bmap_add_free(XFS_AGB_TO_FSB(mp, agno,
- XFS_AGINO_TO_AGBNO(mp, rec.ir_startino)),
- mp->m_ialloc_blks, flist, mp);
+ xfs_difree_inode_chunk(mp, agno, &rec, flist);
} else {
- *deleted = 0;
+ xic->deleted = 0;
error = xfs_inobt_update(cur, &rec);
if (error) {
@@ -1599,7 +2042,9 @@ xfs_difree_finobt(
*/
XFS_WANT_CORRUPTED_GOTO(mp, ibtrec->ir_freecount == 1, error);
- error = xfs_inobt_insert_rec(cur, ibtrec->ir_freecount,
+ error = xfs_inobt_insert_rec(cur, ibtrec->ir_holemask,
+ ibtrec->ir_count,
+ ibtrec->ir_freecount,
ibtrec->ir_free, &i);
if (error)
goto error;
@@ -1634,8 +2079,13 @@ xfs_difree_finobt(
* free inode. Hence, if all of the inodes are free and we aren't
* keeping inode chunks permanently on disk, remove the record.
* Otherwise, update the record with the new information.
+ *
+ * Note that we currently can't free chunks when the block size is large
+ * enough for multiple chunks. Leave the finobt record to remain in sync
+ * with the inobt.
*/
- if (rec.ir_freecount == mp->m_ialloc_inos &&
+ if (rec.ir_free == XFS_INOBT_ALL_FREE &&
+ mp->m_sb.sb_inopblock <= XFS_INODES_PER_CHUNK &&
!(mp->m_flags & XFS_MOUNT_IKEEP)) {
error = xfs_btree_delete(cur, &i);
if (error)
@@ -1671,8 +2121,7 @@ xfs_difree(
struct xfs_trans *tp, /* transaction pointer */
xfs_ino_t inode, /* inode to be freed */
struct xfs_bmap_free *flist, /* extents to free */
- int *deleted,/* set if inode cluster was deleted */
- xfs_ino_t *first_ino)/* first inode in deleted cluster */
+ struct xfs_icluster *xic) /* cluster info if deleted */
{
/* REFERENCED */
xfs_agblock_t agbno; /* block number containing inode */
@@ -1723,8 +2172,7 @@ xfs_difree(
/*
* Fix up the inode allocation btree.
*/
- error = xfs_difree_inobt(mp, tp, agbp, agino, flist, deleted, first_ino,
- &rec);
+ error = xfs_difree_inobt(mp, tp, agbp, agino, flist, xic, &rec);
if (error)
goto error0;
diff --git a/fs/xfs/libxfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h
index 100007d56449..6e450df2979b 100644
--- a/fs/xfs/libxfs/xfs_ialloc.h
+++ b/fs/xfs/libxfs/xfs_ialloc.h
@@ -28,6 +28,13 @@ struct xfs_btree_cur;
/* Move inodes in clusters of this size */
#define XFS_INODE_BIG_CLUSTER_SIZE 8192
+struct xfs_icluster {
+ bool deleted; /* record is deleted */
+ xfs_ino_t first_ino; /* first inode number */
+ uint64_t alloc; /* inode phys. allocation bitmap for
+ * sparse chunks */
+};
+
/* Calculate and return the number of filesystem blocks per inode cluster */
static inline int
xfs_icluster_size_fsb(
@@ -44,8 +51,7 @@ xfs_icluster_size_fsb(
static inline struct xfs_dinode *
xfs_make_iptr(struct xfs_mount *mp, struct xfs_buf *b, int o)
{
- return (struct xfs_dinode *)
- (xfs_buf_offset(b, o << (mp)->m_sb.sb_inodelog));
+ return xfs_buf_offset(b, o << (mp)->m_sb.sb_inodelog);
}
/*
@@ -90,8 +96,7 @@ xfs_difree(
struct xfs_trans *tp, /* transaction pointer */
xfs_ino_t inode, /* inode to be freed */
struct xfs_bmap_free *flist, /* extents to free */
- int *deleted, /* set if inode cluster was deleted */
- xfs_ino_t *first_ino); /* first inode in deleted cluster */
+ struct xfs_icluster *ifree); /* cluster info if deleted */
/*
* Return the location of the inode in imap, for mapping it into a buffer.
@@ -156,7 +161,7 @@ int xfs_inobt_get_rec(struct xfs_btree_cur *cur,
* Inode chunk initialisation routine
*/
int xfs_ialloc_inode_init(struct xfs_mount *mp, struct xfs_trans *tp,
- struct list_head *buffer_list,
+ struct list_head *buffer_list, int icount,
xfs_agnumber_t agno, xfs_agblock_t agbno,
xfs_agblock_t length, unsigned int gen);
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
index 964c465ca69c..674ad8f760be 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -167,7 +167,16 @@ xfs_inobt_init_rec_from_cur(
union xfs_btree_rec *rec)
{
rec->inobt.ir_startino = cpu_to_be32(cur->bc_rec.i.ir_startino);
- rec->inobt.ir_freecount = cpu_to_be32(cur->bc_rec.i.ir_freecount);
+ if (xfs_sb_version_hassparseinodes(&cur->bc_mp->m_sb)) {
+ rec->inobt.ir_u.sp.ir_holemask =
+ cpu_to_be16(cur->bc_rec.i.ir_holemask);
+ rec->inobt.ir_u.sp.ir_count = cur->bc_rec.i.ir_count;
+ rec->inobt.ir_u.sp.ir_freecount = cur->bc_rec.i.ir_freecount;
+ } else {
+ /* ir_holemask/ir_count not supported on-disk */
+ rec->inobt.ir_u.f.ir_freecount =
+ cpu_to_be32(cur->bc_rec.i.ir_freecount);
+ }
rec->inobt.ir_free = cpu_to_be64(cur->bc_rec.i.ir_free);
}
@@ -418,3 +427,85 @@ xfs_inobt_maxrecs(
return blocklen / sizeof(xfs_inobt_rec_t);
return blocklen / (sizeof(xfs_inobt_key_t) + sizeof(xfs_inobt_ptr_t));
}
+
+/*
+ * Convert the inode record holemask to an inode allocation bitmap. The inode
+ * allocation bitmap is inode granularity and specifies whether an inode is
+ * physically allocated on disk (not whether the inode is considered allocated
+ * or free by the fs).
+ *
+ * A bit value of 1 means the inode is allocated, a value of 0 means it is free.
+ */
+uint64_t
+xfs_inobt_irec_to_allocmask(
+ struct xfs_inobt_rec_incore *rec)
+{
+ uint64_t bitmap = 0;
+ uint64_t inodespbit;
+ int nextbit;
+ uint allocbitmap;
+
+ /*
+ * The holemask has 16-bits for a 64 inode record. Therefore each
+ * holemask bit represents multiple inodes. Create a mask of bits to set
+ * in the allocmask for each holemask bit.
+ */
+ inodespbit = (1 << XFS_INODES_PER_HOLEMASK_BIT) - 1;
+
+ /*
+ * Allocated inodes are represented by 0 bits in holemask. Invert the 0
+ * bits to 1 and convert to a uint so we can use xfs_next_bit(). Mask
+ * anything beyond the 16 holemask bits since this casts to a larger
+ * type.
+ */
+ allocbitmap = ~rec->ir_holemask & ((1 << XFS_INOBT_HOLEMASK_BITS) - 1);
+
+ /*
+ * allocbitmap is the inverted holemask so every set bit represents
+ * allocated inodes. To expand from 16-bit holemask granularity to
+ * 64-bit (e.g., bit-per-inode), set inodespbit bits in the target
+ * bitmap for every holemask bit.
+ */
+ nextbit = xfs_next_bit(&allocbitmap, 1, 0);
+ while (nextbit != -1) {
+ ASSERT(nextbit < (sizeof(rec->ir_holemask) * NBBY));
+
+ bitmap |= (inodespbit <<
+ (nextbit * XFS_INODES_PER_HOLEMASK_BIT));
+
+ nextbit = xfs_next_bit(&allocbitmap, 1, nextbit + 1);
+ }
+
+ return bitmap;
+}
+
+#if defined(DEBUG) || defined(XFS_WARN)
+/*
+ * Verify that an in-core inode record has a valid inode count.
+ */
+int
+xfs_inobt_rec_check_count(
+ struct xfs_mount *mp,
+ struct xfs_inobt_rec_incore *rec)
+{
+ int inocount = 0;
+ int nextbit = 0;
+ uint64_t allocbmap;
+ int wordsz;
+
+ wordsz = sizeof(allocbmap) / sizeof(unsigned int);
+ allocbmap = xfs_inobt_irec_to_allocmask(rec);
+
+ nextbit = xfs_next_bit((uint *) &allocbmap, wordsz, nextbit);
+ while (nextbit != -1) {
+ inocount++;
+ nextbit = xfs_next_bit((uint *) &allocbmap, wordsz,
+ nextbit + 1);
+ }
+
+ if (inocount != rec->ir_count)
+ return -EFSCORRUPTED;
+
+ return 0;
+}
+#endif /* DEBUG */
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.h b/fs/xfs/libxfs/xfs_ialloc_btree.h
index d7ebea72c2d0..bd88453217ce 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.h
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.h
@@ -62,4 +62,14 @@ extern struct xfs_btree_cur *xfs_inobt_init_cursor(struct xfs_mount *,
xfs_btnum_t);
extern int xfs_inobt_maxrecs(struct xfs_mount *, int, int);
+/* ir_holemask to inode allocation bitmap conversion */
+uint64_t xfs_inobt_irec_to_allocmask(struct xfs_inobt_rec_incore *);
+
+#if defined(DEBUG) || defined(XFS_WARN)
+int xfs_inobt_rec_check_count(struct xfs_mount *,
+ struct xfs_inobt_rec_incore *);
+#else
+#define xfs_inobt_rec_check_count(mp, rec) 0
+#endif /* DEBUG */
+
#endif /* __XFS_IALLOC_BTREE_H__ */
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index 002b6b3a1988..6526e7696184 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -46,8 +46,7 @@ xfs_inobp_check(
j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog;
for (i = 0; i < j; i++) {
- dip = (xfs_dinode_t *)xfs_buf_offset(bp,
- i * mp->m_sb.sb_inodesize);
+ dip = xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize);
if (!dip->di_next_unlinked) {
xfs_alert(mp,
"Detected bogus zero next_unlinked field in inode %d buffer 0x%llx.",
@@ -86,8 +85,7 @@ xfs_inode_buf_verify(
int di_ok;
xfs_dinode_t *dip;
- dip = (struct xfs_dinode *)xfs_buf_offset(bp,
- (i << mp->m_sb.sb_inodelog));
+ dip = xfs_buf_offset(bp, (i << mp->m_sb.sb_inodelog));
di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&
XFS_DINODE_GOOD_VERSION(dip->di_version);
if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
@@ -186,7 +184,7 @@ xfs_imap_to_bp(
}
*bpp = bp;
- *dipp = (struct xfs_dinode *)xfs_buf_offset(bp, imap->im_boffset);
+ *dipp = xfs_buf_offset(bp, imap->im_boffset);
return 0;
}
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index dc4bfc5d88fc..df9851c46b5c 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -174,6 +174,27 @@ xfs_mount_validate_sb(
return -EFSCORRUPTED;
}
+ /*
+ * Full inode chunks must be aligned to inode chunk size when
+ * sparse inodes are enabled to support the sparse chunk
+ * allocation algorithm and prevent overlapping inode records.
+ */
+ if (xfs_sb_version_hassparseinodes(sbp)) {
+ uint32_t align;
+
+ xfs_alert(mp,
+ "EXPERIMENTAL sparse inode feature enabled. Use at your own risk!");
+
+ align = XFS_INODES_PER_CHUNK * sbp->sb_inodesize
+ >> sbp->sb_blocklog;
+ if (sbp->sb_inoalignmt != align) {
+ xfs_warn(mp,
+"Inode block alignment (%u) must match chunk size (%u) for sparse inodes.",
+ sbp->sb_inoalignmt, align);
+ return -EINVAL;
+ }
+ }
+
if (unlikely(
sbp->sb_logstart == 0 && mp->m_logdev_targp == mp->m_ddev_targp)) {
xfs_warn(mp,
@@ -374,7 +395,7 @@ __xfs_sb_from_disk(
be32_to_cpu(from->sb_features_log_incompat);
/* crc is only used on disk, not in memory; just init to 0 here. */
to->sb_crc = 0;
- to->sb_pad = 0;
+ to->sb_spino_align = be32_to_cpu(from->sb_spino_align);
to->sb_pquotino = be64_to_cpu(from->sb_pquotino);
to->sb_lsn = be64_to_cpu(from->sb_lsn);
/* Convert on-disk flags to in-memory flags? */
@@ -516,7 +537,7 @@ xfs_sb_to_disk(
cpu_to_be32(from->sb_features_incompat);
to->sb_features_log_incompat =
cpu_to_be32(from->sb_features_log_incompat);
- to->sb_pad = 0;
+ to->sb_spino_align = cpu_to_be32(from->sb_spino_align);
to->sb_lsn = cpu_to_be64(from->sb_lsn);
}
}
@@ -689,6 +710,11 @@ xfs_sb_mount_common(
mp->m_ialloc_inos = (int)MAX((__uint16_t)XFS_INODES_PER_CHUNK,
sbp->sb_inopblock);
mp->m_ialloc_blks = mp->m_ialloc_inos >> sbp->sb_inopblog;
+
+ if (sbp->sb_spino_align)
+ mp->m_ialloc_min_blks = sbp->sb_spino_align;
+ else
+ mp->m_ialloc_min_blks = mp->m_ialloc_blks;
}
/*
@@ -792,12 +818,12 @@ xfs_sync_sb(
tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_CHANGE, KM_SLEEP);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_sb, 0, 0);
if (error) {
- xfs_trans_cancel(tp, 0);
+ xfs_trans_cancel(tp);
return error;
}
xfs_log_sb(tp);
if (wait)
xfs_trans_set_sync(tp);
- return xfs_trans_commit(tp, 0);
+ return xfs_trans_commit(tp);
}
diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h
index 8dda4b321343..5be529707903 100644
--- a/fs/xfs/libxfs/xfs_shared.h
+++ b/fs/xfs/libxfs/xfs_shared.h
@@ -182,12 +182,6 @@ int xfs_log_calc_minimum_size(struct xfs_mount *);
#define XFS_TRANS_FREEZE_PROT 0x40 /* Transaction has elevated writer
count in superblock */
/*
- * Values for call flags parameter.
- */
-#define XFS_TRANS_RELEASE_LOG_RES 0x4
-#define XFS_TRANS_ABORT 0x8
-
-/*
* Field values for xfs_trans_mod_sb.
*/
#define XFS_TRANS_SB_ICOUNT 0x00000001
diff --git a/fs/xfs/libxfs/xfs_trans_resv.h b/fs/xfs/libxfs/xfs_trans_resv.h
index 2d5bdfce6d8f..797815012c0e 100644
--- a/fs/xfs/libxfs/xfs_trans_resv.h
+++ b/fs/xfs/libxfs/xfs_trans_resv.h
@@ -73,9 +73,9 @@ struct xfs_trans_resv {
* 2 trees * (2 blocks/level * max depth - 1) * block size
*/
#define XFS_ALLOCFREE_LOG_RES(mp,nx) \
- ((nx) * (2 * XFS_FSB_TO_B((mp), 2 * XFS_AG_MAXLEVELS(mp) - 1)))
+ ((nx) * (2 * XFS_FSB_TO_B((mp), 2 * (mp)->m_ag_maxlevels - 1)))
#define XFS_ALLOCFREE_LOG_COUNT(mp,nx) \
- ((nx) * (2 * (2 * XFS_AG_MAXLEVELS(mp) - 1)))
+ ((nx) * (2 * (2 * (mp)->m_ag_maxlevels - 1)))
/*
* Per-directory log reservation for any directory change.
diff --git a/fs/xfs/libxfs/xfs_trans_space.h b/fs/xfs/libxfs/xfs_trans_space.h
index bf9c4579334d..41e0428d8175 100644
--- a/fs/xfs/libxfs/xfs_trans_space.h
+++ b/fs/xfs/libxfs/xfs_trans_space.h
@@ -67,7 +67,7 @@
#define XFS_DIOSTRAT_SPACE_RES(mp, v) \
(XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK) + (v))
#define XFS_GROWFS_SPACE_RES(mp) \
- (2 * XFS_AG_MAXLEVELS(mp))
+ (2 * (mp)->m_ag_maxlevels)
#define XFS_GROWFSRT_SPACE_RES(mp,b) \
((b) + XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK))
#define XFS_LINK_SPACE_RES(mp,nl) \
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index e5099f268032..3859f5e27a4d 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -109,7 +109,7 @@ xfs_setfilesize_trans_alloc(
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0);
if (error) {
- xfs_trans_cancel(tp, 0);
+ xfs_trans_cancel(tp);
return error;
}
@@ -145,7 +145,7 @@ xfs_setfilesize(
isize = xfs_new_eof(ip, offset + size);
if (!isize) {
xfs_iunlock(ip, XFS_ILOCK_EXCL);
- xfs_trans_cancel(tp, 0);
+ xfs_trans_cancel(tp);
return 0;
}
@@ -155,7 +155,7 @@ xfs_setfilesize(
xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
- return xfs_trans_commit(tp, 0);
+ return xfs_trans_commit(tp);
}
STATIC int
@@ -1348,7 +1348,7 @@ __xfs_get_blocks(
sector_t iblock,
struct buffer_head *bh_result,
int create,
- int direct)
+ bool direct)
{
struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount;
@@ -1413,6 +1413,7 @@ __xfs_get_blocks(
if (error)
return error;
new = 1;
+
} else {
/*
* Delalloc reservations do not require a transaction,
@@ -1507,49 +1508,29 @@ xfs_get_blocks(
struct buffer_head *bh_result,
int create)
{
- return __xfs_get_blocks(inode, iblock, bh_result, create, 0);
+ return __xfs_get_blocks(inode, iblock, bh_result, create, false);
}
-STATIC int
+int
xfs_get_blocks_direct(
struct inode *inode,
sector_t iblock,
struct buffer_head *bh_result,
int create)
{
- return __xfs_get_blocks(inode, iblock, bh_result, create, 1);
+ return __xfs_get_blocks(inode, iblock, bh_result, create, true);
}
-/*
- * Complete a direct I/O write request.
- *
- * The ioend structure is passed from __xfs_get_blocks() to tell us what to do.
- * If no ioend exists (i.e. @private == NULL) then the write IO is an overwrite
- * wholly within the EOF and so there is nothing for us to do. Note that in this
- * case the completion can be called in interrupt context, whereas if we have an
- * ioend we will always be called in task context (i.e. from a workqueue).
- */
-STATIC void
-xfs_end_io_direct_write(
- struct kiocb *iocb,
+static void
+__xfs_end_io_direct_write(
+ struct inode *inode,
+ struct xfs_ioend *ioend,
loff_t offset,
- ssize_t size,
- void *private)
+ ssize_t size)
{
- struct inode *inode = file_inode(iocb->ki_filp);
- struct xfs_inode *ip = XFS_I(inode);
- struct xfs_mount *mp = ip->i_mount;
- struct xfs_ioend *ioend = private;
-
- trace_xfs_gbmap_direct_endio(ip, offset, size,
- ioend ? ioend->io_type : 0, NULL);
+ struct xfs_mount *mp = XFS_I(inode)->i_mount;
- if (!ioend) {
- ASSERT(offset + size <= i_size_read(inode));
- return;
- }
-
- if (XFS_FORCED_SHUTDOWN(mp))
+ if (XFS_FORCED_SHUTDOWN(mp) || ioend->io_error)
goto out_end_io;
/*
@@ -1586,10 +1567,10 @@ xfs_end_io_direct_write(
* here can result in EOF moving backwards and Bad Things Happen when
* that occurs.
*/
- spin_lock(&ip->i_flags_lock);
+ spin_lock(&XFS_I(inode)->i_flags_lock);
if (offset + size > i_size_read(inode))
i_size_write(inode, offset + size);
- spin_unlock(&ip->i_flags_lock);
+ spin_unlock(&XFS_I(inode)->i_flags_lock);
/*
* If we are doing an append IO that needs to update the EOF on disk,
@@ -1606,6 +1587,98 @@ out_end_io:
return;
}
+/*
+ * Complete a direct I/O write request.
+ *
+ * The ioend structure is passed from __xfs_get_blocks() to tell us what to do.
+ * If no ioend exists (i.e. @private == NULL) then the write IO is an overwrite
+ * wholly within the EOF and so there is nothing for us to do. Note that in this
+ * case the completion can be called in interrupt context, whereas if we have an
+ * ioend we will always be called in task context (i.e. from a workqueue).
+ */
+STATIC void
+xfs_end_io_direct_write(
+ struct kiocb *iocb,
+ loff_t offset,
+ ssize_t size,
+ void *private)
+{
+ struct inode *inode = file_inode(iocb->ki_filp);
+ struct xfs_ioend *ioend = private;
+
+ trace_xfs_gbmap_direct_endio(XFS_I(inode), offset, size,
+ ioend ? ioend->io_type : 0, NULL);
+
+ if (!ioend) {
+ ASSERT(offset + size <= i_size_read(inode));
+ return;
+ }
+
+ __xfs_end_io_direct_write(inode, ioend, offset, size);
+}
+
+/*
+ * For DAX we need a mapping buffer callback for unwritten extent conversion
+ * when page faults allocate blocks and then zero them. Note that in this
+ * case the mapping indicated by the ioend may extend beyond EOF. We most
+ * definitely do not want to extend EOF here, so we trim back the ioend size to
+ * EOF.
+ */
+#ifdef CONFIG_FS_DAX
+void
+xfs_end_io_dax_write(
+ struct buffer_head *bh,
+ int uptodate)
+{
+ struct xfs_ioend *ioend = bh->b_private;
+ struct inode *inode = ioend->io_inode;
+ ssize_t size = ioend->io_size;
+
+ ASSERT(IS_DAX(ioend->io_inode));
+
+ /* if there was an error zeroing, then don't convert it */
+ if (!uptodate)
+ ioend->io_error = -EIO;
+
+ /*
+ * Trim update to EOF, so we don't extend EOF during unwritten extent
+ * conversion of partial EOF blocks.
+ */
+ spin_lock(&XFS_I(inode)->i_flags_lock);
+ if (ioend->io_offset + size > i_size_read(inode))
+ size = i_size_read(inode) - ioend->io_offset;
+ spin_unlock(&XFS_I(inode)->i_flags_lock);
+
+ __xfs_end_io_direct_write(inode, ioend, ioend->io_offset, size);
+
+}
+#else
+void xfs_end_io_dax_write(struct buffer_head *bh, int uptodate) { }
+#endif
+
+static inline ssize_t
+xfs_vm_do_dio(
+ struct inode *inode,
+ struct kiocb *iocb,
+ struct iov_iter *iter,
+ loff_t offset,
+ void (*endio)(struct kiocb *iocb,
+ loff_t offset,
+ ssize_t size,
+ void *private),
+ int flags)
+{
+ struct block_device *bdev;
+
+ if (IS_DAX(inode))
+ return dax_do_io(iocb, inode, iter, offset,
+ xfs_get_blocks_direct, endio, 0);
+
+ bdev = xfs_find_bdev_for_inode(inode);
+ return __blockdev_direct_IO(iocb, inode, bdev, iter, offset,
+ xfs_get_blocks_direct, endio, NULL, flags);
+}
+
STATIC ssize_t
xfs_vm_direct_IO(
struct kiocb *iocb,
@@ -1613,16 +1686,11 @@ xfs_vm_direct_IO(
loff_t offset)
{
struct inode *inode = iocb->ki_filp->f_mapping->host;
- struct block_device *bdev = xfs_find_bdev_for_inode(inode);
- if (iov_iter_rw(iter) == WRITE) {
- return __blockdev_direct_IO(iocb, inode, bdev, iter, offset,
- xfs_get_blocks_direct,
- xfs_end_io_direct_write, NULL,
- DIO_ASYNC_EXTEND);
- }
- return __blockdev_direct_IO(iocb, inode, bdev, iter, offset,
- xfs_get_blocks_direct, NULL, NULL, 0);
+ if (iov_iter_rw(iter) == WRITE)
+ return xfs_vm_do_dio(inode, iocb, iter, offset,
+ xfs_end_io_direct_write, DIO_ASYNC_EXTEND);
+ return xfs_vm_do_dio(inode, iocb, iter, offset, NULL, 0);
}
/*
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h
index ac644e0137a4..86afd1ac7895 100644
--- a/fs/xfs/xfs_aops.h
+++ b/fs/xfs/xfs_aops.h
@@ -53,7 +53,12 @@ typedef struct xfs_ioend {
} xfs_ioend_t;
extern const struct address_space_operations xfs_address_space_operations;
-extern int xfs_get_blocks(struct inode *, sector_t, struct buffer_head *, int);
+
+int xfs_get_blocks(struct inode *inode, sector_t offset,
+ struct buffer_head *map_bh, int create);
+int xfs_get_blocks_direct(struct inode *inode, sector_t offset,
+ struct buffer_head *map_bh, int create);
+void xfs_end_io_dax_write(struct buffer_head *bh, int uptodate);
extern void xfs_count_page_state(struct page *, int *, int *);
diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c
index 3fbf167cfb4c..2bb959ada45b 100644
--- a/fs/xfs/xfs_attr_inactive.c
+++ b/fs/xfs/xfs_attr_inactive.c
@@ -394,7 +394,6 @@ xfs_attr_inactive(
{
struct xfs_trans *trans;
struct xfs_mount *mp;
- int cancel_flags = 0;
int lock_mode = XFS_ILOCK_SHARED;
int error = 0;
@@ -423,7 +422,6 @@ xfs_attr_inactive(
goto out_cancel;
lock_mode = XFS_ILOCK_EXCL;
- cancel_flags = XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT;
xfs_ilock(dp, lock_mode);
if (!XFS_IFORK_Q(dp))
@@ -435,8 +433,14 @@ xfs_attr_inactive(
*/
xfs_trans_ijoin(trans, dp, 0);
- /* invalidate and truncate the attribute fork extents */
- if (dp->i_d.di_aformat != XFS_DINODE_FMT_LOCAL) {
+ /*
+ * Invalidate and truncate the attribute fork extents. Make sure the
+ * fork actually has attributes as otherwise the invalidation has no
+ * blocks to read and returns an error. In this case, just do the fork
+ * removal below.
+ */
+ if (xfs_inode_hasattr(dp) &&
+ dp->i_d.di_aformat != XFS_DINODE_FMT_LOCAL) {
error = xfs_attr3_root_inactive(&trans, dp);
if (error)
goto out_cancel;
@@ -449,12 +453,12 @@ xfs_attr_inactive(
/* Reset the attribute fork - this also destroys the in-core fork */
xfs_attr_fork_remove(dp, trans);
- error = xfs_trans_commit(trans, XFS_TRANS_RELEASE_LOG_RES);
+ error = xfs_trans_commit(trans);
xfs_iunlock(dp, lock_mode);
return error;
out_cancel:
- xfs_trans_cancel(trans, cancel_flags);
+ xfs_trans_cancel(trans);
out_destroy_fork:
/* kill the in-core attr fork before we drop the inode lock */
if (dp->i_afp)
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index a52bbd3abc7d..0f34886cf726 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -75,28 +75,20 @@ xfs_bmap_finish(
xfs_efi_log_item_t *efi; /* extent free intention */
int error; /* error return value */
xfs_bmap_free_item_t *free; /* free extent item */
- struct xfs_trans_res tres; /* new log reservation */
xfs_mount_t *mp; /* filesystem mount structure */
xfs_bmap_free_item_t *next; /* next item on free list */
- xfs_trans_t *ntp; /* new transaction pointer */
ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
if (flist->xbf_count == 0) {
*committed = 0;
return 0;
}
- ntp = *tp;
- efi = xfs_trans_get_efi(ntp, flist->xbf_count);
+ efi = xfs_trans_get_efi(*tp, flist->xbf_count);
for (free = flist->xbf_first; free; free = free->xbfi_next)
- xfs_trans_log_efi_extent(ntp, efi, free->xbfi_startblock,
+ xfs_trans_log_efi_extent(*tp, efi, free->xbfi_startblock,
free->xbfi_blockcount);
- tres.tr_logres = ntp->t_log_res;
- tres.tr_logcount = ntp->t_log_count;
- tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
- ntp = xfs_trans_dup(*tp);
- error = xfs_trans_commit(*tp, 0);
- *tp = ntp;
+ error = xfs_trans_roll(tp, NULL);
*committed = 1;
/*
* We have a new transaction, so we should return committed=1,
@@ -105,19 +97,10 @@ xfs_bmap_finish(
if (error)
return error;
- /*
- * transaction commit worked ok so we can drop the extra ticket
- * reference that we gained in xfs_trans_dup()
- */
- xfs_log_ticket_put(ntp->t_ticket);
-
- error = xfs_trans_reserve(ntp, &tres, 0, 0);
- if (error)
- return error;
- efd = xfs_trans_get_efd(ntp, efi, flist->xbf_count);
+ efd = xfs_trans_get_efd(*tp, efi, flist->xbf_count);
for (free = flist->xbf_first; free != NULL; free = next) {
next = free->xbfi_next;
- if ((error = xfs_free_extent(ntp, free->xbfi_startblock,
+ if ((error = xfs_free_extent(*tp, free->xbfi_startblock,
free->xbfi_blockcount))) {
/*
* The bmap free list will be cleaned up at a
@@ -127,7 +110,7 @@ xfs_bmap_finish(
* happens, since this transaction may not be
* dirty yet.
*/
- mp = ntp->t_mountp;
+ mp = (*tp)->t_mountp;
if (!XFS_FORCED_SHUTDOWN(mp))
xfs_force_shutdown(mp,
(error == -EFSCORRUPTED) ?
@@ -135,7 +118,7 @@ xfs_bmap_finish(
SHUTDOWN_META_IO_ERROR);
return error;
}
- xfs_trans_log_efd_extent(ntp, efd, free->xbfi_startblock,
+ xfs_trans_log_efd_extent(*tp, efd, free->xbfi_startblock,
free->xbfi_blockcount);
xfs_bmap_del_free(flist, NULL, free);
}
@@ -878,7 +861,7 @@ xfs_free_eofblocks(
if (need_iolock) {
if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
- xfs_trans_cancel(tp, 0);
+ xfs_trans_cancel(tp);
return -EAGAIN;
}
}
@@ -886,7 +869,7 @@ xfs_free_eofblocks(
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
if (error) {
ASSERT(XFS_FORCED_SHUTDOWN(mp));
- xfs_trans_cancel(tp, 0);
+ xfs_trans_cancel(tp);
if (need_iolock)
xfs_iunlock(ip, XFS_IOLOCK_EXCL);
return error;
@@ -908,12 +891,9 @@ xfs_free_eofblocks(
* If we get an error at this point we simply don't
* bother truncating the file.
*/
- xfs_trans_cancel(tp,
- (XFS_TRANS_RELEASE_LOG_RES |
- XFS_TRANS_ABORT));
+ xfs_trans_cancel(tp);
} else {
- error = xfs_trans_commit(tp,
- XFS_TRANS_RELEASE_LOG_RES);
+ error = xfs_trans_commit(tp);
if (!error)
xfs_inode_clear_eofblocks_tag(ip);
}
@@ -1026,7 +1006,7 @@ xfs_alloc_file_space(
* Free the transaction structure.
*/
ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp));
- xfs_trans_cancel(tp, 0);
+ xfs_trans_cancel(tp);
break;
}
xfs_ilock(ip, XFS_ILOCK_EXCL);
@@ -1053,7 +1033,7 @@ xfs_alloc_file_space(
goto error0;
}
- error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+ error = xfs_trans_commit(tp);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
if (error) {
break;
@@ -1077,7 +1057,7 @@ error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag);
error1: /* Just cancel transaction */
- xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
+ xfs_trans_cancel(tp);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
return error;
}
@@ -1133,14 +1113,29 @@ xfs_zero_remaining_bytes(
break;
ASSERT(imap.br_blockcount >= 1);
ASSERT(imap.br_startoff == offset_fsb);
+ ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
+
+ if (imap.br_startblock == HOLESTARTBLOCK ||
+ imap.br_state == XFS_EXT_UNWRITTEN) {
+ /* skip the entire extent */
+ lastoffset = XFS_FSB_TO_B(mp, imap.br_startoff +
+ imap.br_blockcount) - 1;
+ continue;
+ }
+
lastoffset = XFS_FSB_TO_B(mp, imap.br_startoff + 1) - 1;
if (lastoffset > endoff)
lastoffset = endoff;
- if (imap.br_startblock == HOLESTARTBLOCK)
- continue;
- ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
- if (imap.br_state == XFS_EXT_UNWRITTEN)
+
+ /* DAX can just zero the backing device directly */
+ if (IS_DAX(VFS_I(ip))) {
+ error = dax_zero_page_range(VFS_I(ip), offset,
+ lastoffset - offset + 1,
+ xfs_get_blocks_direct);
+ if (error)
+ return error;
continue;
+ }
error = xfs_buf_read_uncached(XFS_IS_REALTIME_INODE(ip) ?
mp->m_rtdev_targp : mp->m_ddev_targp,
@@ -1289,7 +1284,7 @@ xfs_free_file_space(
* Free the transaction structure.
*/
ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp));
- xfs_trans_cancel(tp, 0);
+ xfs_trans_cancel(tp);
break;
}
xfs_ilock(ip, XFS_ILOCK_EXCL);
@@ -1320,7 +1315,7 @@ xfs_free_file_space(
goto error0;
}
- error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+ error = xfs_trans_commit(tp);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
}
@@ -1330,7 +1325,7 @@ xfs_free_file_space(
error0:
xfs_bmap_cancel(&free_list);
error1:
- xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
+ xfs_trans_cancel(tp);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
goto out;
}
@@ -1462,7 +1457,7 @@ xfs_shift_file_space(
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
XFS_DIOSTRAT_SPACE_RES(mp, 0), 0);
if (error) {
- xfs_trans_cancel(tp, 0);
+ xfs_trans_cancel(tp);
break;
}
@@ -1492,13 +1487,13 @@ xfs_shift_file_space(
if (error)
goto out;
- error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+ error = xfs_trans_commit(tp);
}
return error;
out:
- xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
+ xfs_trans_cancel(tp);
return error;
}
@@ -1718,7 +1713,7 @@ xfs_swap_extents(
tp = xfs_trans_alloc(mp, XFS_TRANS_SWAPEXT);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
if (error) {
- xfs_trans_cancel(tp, 0);
+ xfs_trans_cancel(tp);
goto out_unlock;
}
@@ -1901,7 +1896,7 @@ xfs_swap_extents(
if (mp->m_flags & XFS_MOUNT_WSYNC)
xfs_trans_set_sync(tp);
- error = xfs_trans_commit(tp, 0);
+ error = xfs_trans_commit(tp);
trace_xfs_swap_extent_after(ip, 0);
trace_xfs_swap_extent_after(tip, 1);
@@ -1915,6 +1910,6 @@ out_unlock:
goto out;
out_trans_cancel:
- xfs_trans_cancel(tp, 0);
+ xfs_trans_cancel(tp);
goto out;
}
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 1790b00bea7a..a4b7d92e946c 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1419,9 +1419,9 @@ xfs_buf_submit_wait(
return error;
}
-xfs_caddr_t
+void *
xfs_buf_offset(
- xfs_buf_t *bp,
+ struct xfs_buf *bp,
size_t offset)
{
struct page *page;
@@ -1431,7 +1431,7 @@ xfs_buf_offset(
offset += bp->b_offset;
page = bp->b_pages[offset >> PAGE_SHIFT];
- return (xfs_caddr_t)page_address(page) + (offset & (PAGE_SIZE-1));
+ return page_address(page) + (offset & (PAGE_SIZE-1));
}
/*
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 75ff5d5a7d2e..331c1ccf8264 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -299,7 +299,7 @@ extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *,
xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO)
/* Buffer Utility Routines */
-extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t);
+extern void *xfs_buf_offset(struct xfs_buf *, size_t);
/* Delayed Write Buffer Routines */
extern bool xfs_buf_delwri_queue(struct xfs_buf *, struct list_head *);
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 02c01bbbc789..4143dc75dca4 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -568,8 +568,6 @@ xfs_qm_dqread(
struct xfs_buf *bp;
struct xfs_trans *tp = NULL;
int error;
- int cancelflags = 0;
-
dqp = kmem_zone_zalloc(xfs_qm_dqzone, KM_SLEEP);
@@ -617,7 +615,6 @@ xfs_qm_dqread(
XFS_QM_DQALLOC_SPACE_RES(mp), 0);
if (error)
goto error1;
- cancelflags = XFS_TRANS_RELEASE_LOG_RES;
}
/*
@@ -632,7 +629,6 @@ xfs_qm_dqread(
* allocate (ENOENT).
*/
trace_xfs_dqread_fail(dqp);
- cancelflags |= XFS_TRANS_ABORT;
goto error1;
}
@@ -670,7 +666,7 @@ xfs_qm_dqread(
xfs_trans_brelse(tp, bp);
if (tp) {
- error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+ error = xfs_trans_commit(tp);
if (error)
goto error0;
}
@@ -680,7 +676,7 @@ xfs_qm_dqread(
error1:
if (tp)
- xfs_trans_cancel(tp, cancelflags);
+ xfs_trans_cancel(tp);
error0:
xfs_qm_dqdestroy(dqp);
*O_dqpp = NULL;
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index 338e50bbfd1e..74d0e5966ebc 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -127,7 +127,7 @@ xfs_error_report(
struct xfs_mount *mp,
const char *filename,
int linenum,
- inst_t *ra)
+ void *ra)
{
if (level <= xfs_error_level) {
xfs_alert_tag(mp, XFS_PTAG_ERROR_REPORT,
@@ -146,7 +146,7 @@ xfs_corruption_error(
void *p,
const char *filename,
int linenum,
- inst_t *ra)
+ void *ra)
{
if (level <= xfs_error_level)
xfs_hex_dump(p, 64);
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index c0394ed126fc..4ed3042a0f16 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -21,10 +21,10 @@
struct xfs_mount;
extern void xfs_error_report(const char *tag, int level, struct xfs_mount *mp,
- const char *filename, int linenum, inst_t *ra);
+ const char *filename, int linenum, void *ra);
extern void xfs_corruption_error(const char *tag, int level,
struct xfs_mount *mp, void *p, const char *filename,
- int linenum, inst_t *ra);
+ int linenum, void *ra);
extern void xfs_verifier_error(struct xfs_buf *bp);
#define XFS_ERROR_REPORT(e, lvl, mp) \
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index cb7fe64cdbfa..adc8f8fdd145 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -239,7 +239,7 @@ xfs_efi_init(
xfs_log_item_init(mp, &efip->efi_item, XFS_LI_EFI, &xfs_efi_item_ops);
efip->efi_format.efi_nextents = nextents;
- efip->efi_format.efi_id = (__psint_t)(void*)efip;
+ efip->efi_format.efi_id = (uintptr_t)(void *)efip;
atomic_set(&efip->efi_next_extent, 0);
atomic_set(&efip->efi_refcount, 2);
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 7c62fca53e2f..874507de3485 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -80,14 +80,15 @@ xfs_rw_ilock_demote(
}
/*
- * xfs_iozero
+ * xfs_iozero clears the specified range supplied via the page cache (except in
+ * the DAX case). Writes through the page cache will allocate blocks over holes,
+ * though the callers usually map the holes first and avoid them. If a block is
+ * not completely zeroed, then it will be read from disk before being partially
+ * zeroed.
*
- * xfs_iozero clears the specified range of buffer supplied,
- * and marks all the affected blocks as valid and modified. If
- * an affected block is not allocated, it will be allocated. If
- * an affected block is not completely overwritten, and is not
- * valid before the operation, it will be read from disk before
- * being partially zeroed.
+ * In the DAX case, we can just directly write to the underlying pages. This
+ * will not allocate blocks, but will avoid holes and unwritten extents and so
+ * not do unnecessary work.
*/
int
xfs_iozero(
@@ -97,7 +98,8 @@ xfs_iozero(
{
struct page *page;
struct address_space *mapping;
- int status;
+ int status = 0;
+
mapping = VFS_I(ip)->i_mapping;
do {
@@ -109,20 +111,27 @@ xfs_iozero(
if (bytes > count)
bytes = count;
- status = pagecache_write_begin(NULL, mapping, pos, bytes,
- AOP_FLAG_UNINTERRUPTIBLE,
- &page, &fsdata);
- if (status)
- break;
+ if (IS_DAX(VFS_I(ip))) {
+ status = dax_zero_page_range(VFS_I(ip), pos, bytes,
+ xfs_get_blocks_direct);
+ if (status)
+ break;
+ } else {
+ status = pagecache_write_begin(NULL, mapping, pos, bytes,
+ AOP_FLAG_UNINTERRUPTIBLE,
+ &page, &fsdata);
+ if (status)
+ break;
- zero_user(page, offset, bytes);
+ zero_user(page, offset, bytes);
- status = pagecache_write_end(NULL, mapping, pos, bytes, bytes,
- page, fsdata);
- WARN_ON(status <= 0); /* can't return less than zero! */
+ status = pagecache_write_end(NULL, mapping, pos, bytes,
+ bytes, page, fsdata);
+ WARN_ON(status <= 0); /* can't return less than zero! */
+ status = 0;
+ }
pos += bytes;
count -= bytes;
- status = 0;
} while (count);
return status;
@@ -139,7 +148,7 @@ xfs_update_prealloc_flags(
tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_WRITEID);
error = xfs_trans_reserve(tp, &M_RES(ip->i_mount)->tr_writeid, 0, 0);
if (error) {
- xfs_trans_cancel(tp, 0);
+ xfs_trans_cancel(tp);
return error;
}
@@ -161,7 +170,7 @@ xfs_update_prealloc_flags(
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
if (flags & XFS_PREALLOC_SYNC)
xfs_trans_set_sync(tp);
- return xfs_trans_commit(tp, 0);
+ return xfs_trans_commit(tp);
}
/*
@@ -285,7 +294,7 @@ xfs_file_read_iter(
if (file->f_mode & FMODE_NOCMTIME)
ioflags |= XFS_IO_INVIS;
- if (unlikely(ioflags & XFS_IO_ISDIRECT)) {
+ if ((ioflags & XFS_IO_ISDIRECT) && !IS_DAX(inode)) {
xfs_buftarg_t *target =
XFS_IS_REALTIME_INODE(ip) ?
mp->m_rtdev_targp : mp->m_ddev_targp;
@@ -379,7 +388,11 @@ xfs_file_splice_read(
trace_xfs_file_splice_read(ip, count, *ppos, ioflags);
- ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
+ /* for dax, we need to avoid the page cache */
+ if (IS_DAX(VFS_I(ip)))
+ ret = default_file_splice_read(infilp, ppos, pipe, count, flags);
+ else
+ ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
if (ret > 0)
XFS_STATS_ADD(xs_read_bytes, ret);
@@ -673,7 +686,7 @@ xfs_file_dio_aio_write(
mp->m_rtdev_targp : mp->m_ddev_targp;
/* DIO must be aligned to device logical sector size */
- if ((pos | count) & target->bt_logical_sectormask)
+ if (!IS_DAX(inode) && ((pos | count) & target->bt_logical_sectormask))
return -EINVAL;
/* "unaligned" here means not aligned to a filesystem block */
@@ -759,8 +772,11 @@ xfs_file_dio_aio_write(
out:
xfs_rw_iunlock(ip, iolock);
- /* No fallback to buffered IO on errors for XFS. */
- ASSERT(ret < 0 || ret == count);
+ /*
+ * No fallback to buffered IO on errors for XFS. DAX can result in
+ * partial writes, but direct IO will either complete fully or fail.
+ */
+ ASSERT(ret < 0 || ret == count || IS_DAX(VFS_I(ip)));
return ret;
}
@@ -843,7 +859,7 @@ xfs_file_write_iter(
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
return -EIO;
- if (unlikely(iocb->ki_flags & IOCB_DIRECT))
+ if ((iocb->ki_flags & IOCB_DIRECT) || IS_DAX(inode))
ret = xfs_file_dio_aio_write(iocb, from);
else
ret = xfs_file_buffered_aio_write(iocb, from);
@@ -1064,17 +1080,6 @@ xfs_file_readdir(
return xfs_readdir(ip, ctx, bufsize);
}
-STATIC int
-xfs_file_mmap(
- struct file *filp,
- struct vm_area_struct *vma)
-{
- vma->vm_ops = &xfs_file_vm_ops;
-
- file_accessed(filp);
- return 0;
-}
-
/*
* This type is designed to indicate the type of offset we would like
* to search from page cache for xfs_seek_hole_data().
@@ -1455,48 +1460,83 @@ xfs_file_llseek(
* ordering of:
*
* mmap_sem (MM)
- * i_mmap_lock (XFS - truncate serialisation)
- * page_lock (MM)
- * i_lock (XFS - extent map serialisation)
+ * sb_start_pagefault(vfs, freeze)
+ * i_mmap_lock (XFS - truncate serialisation)
+ * page_lock (MM)
+ * i_lock (XFS - extent map serialisation)
+ */
+
+/*
+ * mmap()d file has taken write protection fault and is being made writable. We
+ * can set the page state up correctly for a writable page, which means we can
+ * do correct delalloc accounting (ENOSPC checking!) and unwritten extent
+ * mapping.
*/
STATIC int
-xfs_filemap_fault(
+xfs_filemap_page_mkwrite(
struct vm_area_struct *vma,
struct vm_fault *vmf)
{
- struct xfs_inode *ip = XFS_I(vma->vm_file->f_mapping->host);
- int error;
+ struct inode *inode = file_inode(vma->vm_file);
+ int ret;
- trace_xfs_filemap_fault(ip);
+ trace_xfs_filemap_page_mkwrite(XFS_I(inode));
- xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
- error = filemap_fault(vma, vmf);
- xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
+ sb_start_pagefault(inode->i_sb);
+ file_update_time(vma->vm_file);
+ xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
- return error;
+ if (IS_DAX(inode)) {
+ ret = __dax_mkwrite(vma, vmf, xfs_get_blocks_direct,
+ xfs_end_io_dax_write);
+ } else {
+ ret = __block_page_mkwrite(vma, vmf, xfs_get_blocks);
+ ret = block_page_mkwrite_return(ret);
+ }
+
+ xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
+ sb_end_pagefault(inode->i_sb);
+
+ return ret;
}
-/*
- * mmap()d file has taken write protection fault and is being made writable. We
- * can set the page state up correctly for a writable page, which means we can
- * do correct delalloc accounting (ENOSPC checking!) and unwritten extent
- * mapping.
- */
STATIC int
-xfs_filemap_page_mkwrite(
+xfs_filemap_fault(
struct vm_area_struct *vma,
struct vm_fault *vmf)
{
- struct xfs_inode *ip = XFS_I(vma->vm_file->f_mapping->host);
- int error;
+ struct xfs_inode *ip = XFS_I(file_inode(vma->vm_file));
+ int ret;
+
+ trace_xfs_filemap_fault(ip);
- trace_xfs_filemap_page_mkwrite(ip);
+ /* DAX can shortcut the normal fault path on write faults! */
+ if ((vmf->flags & FAULT_FLAG_WRITE) && IS_DAX(VFS_I(ip)))
+ return xfs_filemap_page_mkwrite(vma, vmf);
xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
- error = block_page_mkwrite(vma, vmf, xfs_get_blocks);
+ ret = filemap_fault(vma, vmf);
xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
- return error;
+ return ret;
+}
+
+static const struct vm_operations_struct xfs_file_vm_ops = {
+ .fault = xfs_filemap_fault,
+ .map_pages = filemap_map_pages,
+ .page_mkwrite = xfs_filemap_page_mkwrite,
+};
+
+STATIC int
+xfs_file_mmap(
+ struct file *filp,
+ struct vm_area_struct *vma)
+{
+ file_accessed(filp);
+ vma->vm_ops = &xfs_file_vm_ops;
+ if (IS_DAX(file_inode(filp)))
+ vma->vm_flags |= VM_MIXEDMAP;
+ return 0;
}
const struct file_operations xfs_file_operations = {
@@ -1527,9 +1567,3 @@ const struct file_operations xfs_dir_file_operations = {
#endif
.fsync = xfs_dir_fsync,
};
-
-static const struct vm_operations_struct xfs_file_vm_ops = {
- .fault = xfs_filemap_fault,
- .map_pages = filemap_map_pages,
- .page_mkwrite = xfs_filemap_page_mkwrite,
-};
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index da82f1cb4b9b..c4c130f9bfb6 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -196,7 +196,8 @@ xfs_filestream_pick_ag(
goto next_ag;
}
- longest = xfs_alloc_longest_free_extent(mp, pag);
+ longest = xfs_alloc_longest_free_extent(mp, pag,
+ xfs_alloc_min_freelist(mp, pag));
if (((minlen && longest >= minlen) ||
(!minlen && pag->pagf_freeblks >= minfree)) &&
(!pag->pagf_metadata || !(flags & XFS_PICK_USERDATA) ||
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index cb7e8a29dfb6..9b3438a7680f 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -101,7 +101,9 @@ xfs_fs_geometry(
(xfs_sb_version_hasftype(&mp->m_sb) ?
XFS_FSOP_GEOM_FLAGS_FTYPE : 0) |
(xfs_sb_version_hasfinobt(&mp->m_sb) ?
- XFS_FSOP_GEOM_FLAGS_FINOBT : 0);
+ XFS_FSOP_GEOM_FLAGS_FINOBT : 0) |
+ (xfs_sb_version_hassparseinodes(&mp->m_sb) ?
+ XFS_FSOP_GEOM_FLAGS_SPINODES : 0);
geo->logsectsize = xfs_sb_version_hassector(&mp->m_sb) ?
mp->m_sb.sb_logsectsize : BBSIZE;
geo->rtsectsize = mp->m_sb.sb_blocksize;
@@ -201,7 +203,7 @@ xfs_growfs_data_private(
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growdata,
XFS_GROWFS_SPACE_RES(mp), 0);
if (error) {
- xfs_trans_cancel(tp, 0);
+ xfs_trans_cancel(tp);
return error;
}
@@ -489,7 +491,7 @@ xfs_growfs_data_private(
if (dpct)
xfs_trans_mod_sb(tp, XFS_TRANS_SB_IMAXPCT, dpct);
xfs_trans_set_sync(tp);
- error = xfs_trans_commit(tp, 0);
+ error = xfs_trans_commit(tp);
if (error)
return error;
@@ -557,7 +559,7 @@ xfs_growfs_data_private(
return saved_error ? saved_error : error;
error0:
- xfs_trans_cancel(tp, XFS_TRANS_ABORT);
+ xfs_trans_cancel(tp);
return error;
}
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 539a85fddbc2..3da9f4da4f3d 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -905,7 +905,6 @@ xfs_dir_ialloc(
{
xfs_trans_t *tp;
- xfs_trans_t *ntp;
xfs_inode_t *ip;
xfs_buf_t *ialloc_context = NULL;
int code;
@@ -954,8 +953,6 @@ xfs_dir_ialloc(
* to succeed the second time.
*/
if (ialloc_context) {
- struct xfs_trans_res tres;
-
/*
* Normally, xfs_trans_commit releases all the locks.
* We call bhold to hang on to the ialloc_context across
@@ -964,12 +961,6 @@ xfs_dir_ialloc(
* allocation group.
*/
xfs_trans_bhold(tp, ialloc_context);
- /*
- * Save the log reservation so we can use
- * them in the next transaction.
- */
- tres.tr_logres = xfs_trans_get_log_res(tp);
- tres.tr_logcount = xfs_trans_get_log_count(tp);
/*
* We want the quota changes to be associated with the next
@@ -985,35 +976,9 @@ xfs_dir_ialloc(
tp->t_flags &= ~(XFS_TRANS_DQ_DIRTY);
}
- ntp = xfs_trans_dup(tp);
- code = xfs_trans_commit(tp, 0);
- tp = ntp;
- if (committed != NULL) {
+ code = xfs_trans_roll(&tp, 0);
+ if (committed != NULL)
*committed = 1;
- }
- /*
- * If we get an error during the commit processing,
- * release the buffer that is still held and return
- * to the caller.
- */
- if (code) {
- xfs_buf_relse(ialloc_context);
- if (dqinfo) {
- tp->t_dqinfo = dqinfo;
- xfs_trans_free_dqinfo(tp);
- }
- *tpp = ntp;
- *ipp = NULL;
- return code;
- }
-
- /*
- * transaction commit worked ok so we can drop the extra ticket
- * reference that we gained in xfs_trans_dup()
- */
- xfs_log_ticket_put(tp->t_ticket);
- tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
- code = xfs_trans_reserve(tp, &tres, 0, 0);
/*
* Re-attach the quota info that we detached from prev trx.
@@ -1025,7 +990,7 @@ xfs_dir_ialloc(
if (code) {
xfs_buf_relse(ialloc_context);
- *tpp = ntp;
+ *tpp = tp;
*ipp = NULL;
return code;
}
@@ -1127,7 +1092,6 @@ xfs_create(
xfs_bmap_free_t free_list;
xfs_fsblock_t first_block;
bool unlock_dp_on_error = false;
- uint cancel_flags;
int committed;
prid_t prid;
struct xfs_dquot *udqp = NULL;
@@ -1164,8 +1128,6 @@ xfs_create(
tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE);
}
- cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
-
/*
* Initially assume that the file does not exist and
* reserve the resources for that case. If that is not
@@ -1183,10 +1145,9 @@ xfs_create(
resblks = 0;
error = xfs_trans_reserve(tp, tres, 0, 0);
}
- if (error) {
- cancel_flags = 0;
+ if (error)
goto out_trans_cancel;
- }
+
xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
unlock_dp_on_error = true;
@@ -1217,7 +1178,7 @@ xfs_create(
if (error) {
if (error == -ENOSPC)
goto out_trans_cancel;
- goto out_trans_abort;
+ goto out_trans_cancel;
}
/*
@@ -1235,7 +1196,7 @@ xfs_create(
resblks - XFS_IALLOC_SPACE_RES(mp) : 0);
if (error) {
ASSERT(error != -ENOSPC);
- goto out_trans_abort;
+ goto out_trans_cancel;
}
xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
@@ -1269,7 +1230,7 @@ xfs_create(
if (error)
goto out_bmap_cancel;
- error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+ error = xfs_trans_commit(tp);
if (error)
goto out_release_inode;
@@ -1282,10 +1243,8 @@ xfs_create(
out_bmap_cancel:
xfs_bmap_cancel(&free_list);
- out_trans_abort:
- cancel_flags |= XFS_TRANS_ABORT;
out_trans_cancel:
- xfs_trans_cancel(tp, cancel_flags);
+ xfs_trans_cancel(tp);
out_release_inode:
/*
* Wait until after the current transaction is aborted to finish the
@@ -1317,7 +1276,6 @@ xfs_create_tmpfile(
struct xfs_inode *ip = NULL;
struct xfs_trans *tp = NULL;
int error;
- uint cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
prid_t prid;
struct xfs_dquot *udqp = NULL;
struct xfs_dquot *gdqp = NULL;
@@ -1350,10 +1308,8 @@ xfs_create_tmpfile(
resblks = 0;
error = xfs_trans_reserve(tp, tres, 0, 0);
}
- if (error) {
- cancel_flags = 0;
+ if (error)
goto out_trans_cancel;
- }
error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp,
pdqp, resblks, 1, 0);
@@ -1365,7 +1321,7 @@ xfs_create_tmpfile(
if (error) {
if (error == -ENOSPC)
goto out_trans_cancel;
- goto out_trans_abort;
+ goto out_trans_cancel;
}
if (mp->m_flags & XFS_MOUNT_WSYNC)
@@ -1381,9 +1337,9 @@ xfs_create_tmpfile(
ip->i_d.di_nlink--;
error = xfs_iunlink(tp, ip);
if (error)
- goto out_trans_abort;
+ goto out_trans_cancel;
- error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+ error = xfs_trans_commit(tp);
if (error)
goto out_release_inode;
@@ -1394,10 +1350,8 @@ xfs_create_tmpfile(
*ipp = ip;
return 0;
- out_trans_abort:
- cancel_flags |= XFS_TRANS_ABORT;
out_trans_cancel:
- xfs_trans_cancel(tp, cancel_flags);
+ xfs_trans_cancel(tp);
out_release_inode:
/*
* Wait until after the current transaction is aborted to finish the
@@ -1427,7 +1381,6 @@ xfs_link(
int error;
xfs_bmap_free_t free_list;
xfs_fsblock_t first_block;
- int cancel_flags;
int committed;
int resblks;
@@ -1447,17 +1400,14 @@ xfs_link(
goto std_return;
tp = xfs_trans_alloc(mp, XFS_TRANS_LINK);
- cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
resblks = XFS_LINK_SPACE_RES(mp, target_name->len);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_link, resblks, 0);
if (error == -ENOSPC) {
resblks = 0;
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_link, 0, 0);
}
- if (error) {
- cancel_flags = 0;
+ if (error)
goto error_return;
- }
xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL);
@@ -1486,19 +1436,19 @@ xfs_link(
if (sip->i_d.di_nlink == 0) {
error = xfs_iunlink_remove(tp, sip);
if (error)
- goto abort_return;
+ goto error_return;
}
error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino,
&first_block, &free_list, resblks);
if (error)
- goto abort_return;
+ goto error_return;
xfs_trans_ichgtime(tp, tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE);
error = xfs_bumplink(tp, sip);
if (error)
- goto abort_return;
+ goto error_return;
/*
* If this is a synchronous mount, make sure that the
@@ -1512,15 +1462,13 @@ xfs_link(
error = xfs_bmap_finish (&tp, &free_list, &committed);
if (error) {
xfs_bmap_cancel(&free_list);
- goto abort_return;
+ goto error_return;
}
- return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+ return xfs_trans_commit(tp);
- abort_return:
- cancel_flags |= XFS_TRANS_ABORT;
error_return:
- xfs_trans_cancel(tp, cancel_flags);
+ xfs_trans_cancel(tp);
std_return:
return error;
}
@@ -1555,7 +1503,6 @@ xfs_itruncate_extents(
{
struct xfs_mount *mp = ip->i_mount;
struct xfs_trans *tp = *tpp;
- struct xfs_trans *ntp;
xfs_bmap_free_t free_list;
xfs_fsblock_t first_block;
xfs_fileoff_t first_unmap_block;
@@ -1613,29 +1560,7 @@ xfs_itruncate_extents(
if (error)
goto out_bmap_cancel;
- if (committed) {
- /*
- * Mark the inode dirty so it will be logged and
- * moved forward in the log as part of every commit.
- */
- xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
- }
-
- ntp = xfs_trans_dup(tp);
- error = xfs_trans_commit(tp, 0);
- tp = ntp;
-
- xfs_trans_ijoin(tp, ip, 0);
-
- if (error)
- goto out;
-
- /*
- * Transaction commit worked ok so we can drop the extra ticket
- * reference that we gained in xfs_trans_dup()
- */
- xfs_log_ticket_put(tp->t_ticket);
- error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
+ error = xfs_trans_roll(&tp, ip);
if (error)
goto out;
}
@@ -1756,7 +1681,7 @@ xfs_inactive_truncate(
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
if (error) {
ASSERT(XFS_FORCED_SHUTDOWN(mp));
- xfs_trans_cancel(tp, 0);
+ xfs_trans_cancel(tp);
return error;
}
@@ -1777,7 +1702,7 @@ xfs_inactive_truncate(
ASSERT(ip->i_d.di_nextents == 0);
- error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+ error = xfs_trans_commit(tp);
if (error)
goto error_unlock;
@@ -1785,7 +1710,7 @@ xfs_inactive_truncate(
return 0;
error_trans_cancel:
- xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
+ xfs_trans_cancel(tp);
error_unlock:
xfs_iunlock(ip, XFS_ILOCK_EXCL);
return error;
@@ -1835,7 +1760,7 @@ xfs_inactive_ifree(
} else {
ASSERT(XFS_FORCED_SHUTDOWN(mp));
}
- xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES);
+ xfs_trans_cancel(tp);
return error;
}
@@ -1855,7 +1780,7 @@ xfs_inactive_ifree(
__func__, error);
xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
}
- xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
+ xfs_trans_cancel(tp);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
return error;
}
@@ -1874,7 +1799,7 @@ xfs_inactive_ifree(
if (error)
xfs_notice(mp, "%s: xfs_bmap_finish returned error %d",
__func__, error);
- error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+ error = xfs_trans_commit(tp);
if (error)
xfs_notice(mp, "%s: xfs_trans_commit returned error %d",
__func__, error);
@@ -2235,28 +2160,42 @@ xfs_iunlink_remove(
*/
STATIC int
xfs_ifree_cluster(
- xfs_inode_t *free_ip,
- xfs_trans_t *tp,
- xfs_ino_t inum)
+ xfs_inode_t *free_ip,
+ xfs_trans_t *tp,
+ struct xfs_icluster *xic)
{
xfs_mount_t *mp = free_ip->i_mount;
int blks_per_cluster;
int inodes_per_cluster;
int nbufs;
int i, j;
+ int ioffset;
xfs_daddr_t blkno;
xfs_buf_t *bp;
xfs_inode_t *ip;
xfs_inode_log_item_t *iip;
xfs_log_item_t *lip;
struct xfs_perag *pag;
+ xfs_ino_t inum;
+ inum = xic->first_ino;
pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum));
blks_per_cluster = xfs_icluster_size_fsb(mp);
inodes_per_cluster = blks_per_cluster << mp->m_sb.sb_inopblog;
nbufs = mp->m_ialloc_blks / blks_per_cluster;
for (j = 0; j < nbufs; j++, inum += inodes_per_cluster) {
+ /*
+ * The allocation bitmap tells us which inodes of the chunk were
+ * physically allocated. Skip the cluster if an inode falls into
+ * a sparse region.
+ */
+ ioffset = inum - xic->first_ino;
+ if ((xic->alloc & XFS_INOBT_MASK(ioffset)) == 0) {
+ ASSERT(do_mod(ioffset, inodes_per_cluster) == 0);
+ continue;
+ }
+
blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum),
XFS_INO_TO_AGBNO(mp, inum));
@@ -2414,8 +2353,7 @@ xfs_ifree(
xfs_bmap_free_t *flist)
{
int error;
- int delete;
- xfs_ino_t first_ino;
+ struct xfs_icluster xic = { 0 };
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
ASSERT(ip->i_d.di_nlink == 0);
@@ -2431,7 +2369,7 @@ xfs_ifree(
if (error)
return error;
- error = xfs_difree(tp, ip->i_ino, flist, &delete, &first_ino);
+ error = xfs_difree(tp, ip->i_ino, flist, &xic);
if (error)
return error;
@@ -2448,8 +2386,8 @@ xfs_ifree(
ip->i_d.di_gen++;
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
- if (delete)
- error = xfs_ifree_cluster(ip, tp, first_ino);
+ if (xic.deleted)
+ error = xfs_ifree_cluster(ip, tp, &xic);
return error;
}
@@ -2536,7 +2474,6 @@ xfs_remove(
int error = 0;
xfs_bmap_free_t free_list;
xfs_fsblock_t first_block;
- int cancel_flags;
int committed;
uint resblks;
@@ -2557,7 +2494,6 @@ xfs_remove(
tp = xfs_trans_alloc(mp, XFS_TRANS_RMDIR);
else
tp = xfs_trans_alloc(mp, XFS_TRANS_REMOVE);
- cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
/*
* We try to get the real space reservation first,
@@ -2576,7 +2512,6 @@ xfs_remove(
}
if (error) {
ASSERT(error != -ENOSPC);
- cancel_flags = 0;
goto out_trans_cancel;
}
@@ -2588,7 +2523,6 @@ xfs_remove(
/*
* If we're removing a directory perform some additional validation.
*/
- cancel_flags |= XFS_TRANS_ABORT;
if (is_dir) {
ASSERT(ip->i_d.di_nlink >= 2);
if (ip->i_d.di_nlink != 2) {
@@ -2644,7 +2578,7 @@ xfs_remove(
if (error)
goto out_bmap_cancel;
- error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+ error = xfs_trans_commit(tp);
if (error)
goto std_return;
@@ -2656,7 +2590,7 @@ xfs_remove(
out_bmap_cancel:
xfs_bmap_cancel(&free_list);
out_trans_cancel:
- xfs_trans_cancel(tp, cancel_flags);
+ xfs_trans_cancel(tp);
std_return:
return error;
}
@@ -2730,11 +2664,11 @@ xfs_finish_rename(
error = xfs_bmap_finish(&tp, free_list, &committed);
if (error) {
xfs_bmap_cancel(free_list);
- xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
+ xfs_trans_cancel(tp);
return error;
}
- return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+ return xfs_trans_commit(tp);
}
/*
@@ -2855,7 +2789,7 @@ xfs_cross_rename(
out_trans_abort:
xfs_bmap_cancel(free_list);
- xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
+ xfs_trans_cancel(tp);
return error;
}
@@ -2915,7 +2849,6 @@ xfs_rename(
int num_inodes = __XFS_SORT_INODES;
bool new_parent = (src_dp != target_dp);
bool src_is_directory = S_ISDIR(src_ip->i_d.di_mode);
- int cancel_flags = 0;
int spaceres;
int error;
@@ -2951,7 +2884,6 @@ xfs_rename(
}
if (error)
goto out_trans_cancel;
- cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
/*
* Attach the dquots to the inodes
@@ -3022,10 +2954,8 @@ xfs_rename(
error = xfs_dir_createname(tp, target_dp, target_name,
src_ip->i_ino, &first_block,
&free_list, spaceres);
- if (error == -ENOSPC)
- goto out_bmap_cancel;
if (error)
- goto out_trans_abort;
+ goto out_bmap_cancel;
xfs_trans_ichgtime(tp, target_dp,
XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
@@ -3033,7 +2963,7 @@ xfs_rename(
if (new_parent && src_is_directory) {
error = xfs_bumplink(tp, target_dp);
if (error)
- goto out_trans_abort;
+ goto out_bmap_cancel;
}
} else { /* target_ip != NULL */
/*
@@ -3065,7 +2995,7 @@ xfs_rename(
src_ip->i_ino,
&first_block, &free_list, spaceres);
if (error)
- goto out_trans_abort;
+ goto out_bmap_cancel;
xfs_trans_ichgtime(tp, target_dp,
XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
@@ -3076,7 +3006,7 @@ xfs_rename(
*/
error = xfs_droplink(tp, target_ip);
if (error)
- goto out_trans_abort;
+ goto out_bmap_cancel;
if (src_is_directory) {
/*
@@ -3084,7 +3014,7 @@ xfs_rename(
*/
error = xfs_droplink(tp, target_ip);
if (error)
- goto out_trans_abort;
+ goto out_bmap_cancel;
}
} /* target_ip != NULL */
@@ -3101,7 +3031,7 @@ xfs_rename(
&first_block, &free_list, spaceres);
ASSERT(error != -EEXIST);
if (error)
- goto out_trans_abort;
+ goto out_bmap_cancel;
}
/*
@@ -3127,7 +3057,7 @@ xfs_rename(
*/
error = xfs_droplink(tp, src_dp);
if (error)
- goto out_trans_abort;
+ goto out_bmap_cancel;
}
/*
@@ -3142,7 +3072,7 @@ xfs_rename(
error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino,
&first_block, &free_list, spaceres);
if (error)
- goto out_trans_abort;
+ goto out_bmap_cancel;
/*
* For whiteouts, we need to bump the link count on the whiteout inode.
@@ -3156,10 +3086,10 @@ xfs_rename(
ASSERT(VFS_I(wip)->i_nlink == 0 && wip->i_d.di_nlink == 0);
error = xfs_bumplink(tp, wip);
if (error)
- goto out_trans_abort;
+ goto out_bmap_cancel;
error = xfs_iunlink_remove(tp, wip);
if (error)
- goto out_trans_abort;
+ goto out_bmap_cancel;
xfs_trans_log_inode(tp, wip, XFS_ILOG_CORE);
/*
@@ -3180,12 +3110,10 @@ xfs_rename(
IRELE(wip);
return error;
-out_trans_abort:
- cancel_flags |= XFS_TRANS_ABORT;
out_bmap_cancel:
xfs_bmap_cancel(&free_list);
out_trans_cancel:
- xfs_trans_cancel(tp, cancel_flags);
+ xfs_trans_cancel(tp);
if (wip)
IRELE(wip);
return error;
@@ -3464,7 +3392,7 @@ xfs_iflush_int(
ASSERT(ip->i_d.di_version > 1);
/* set *dip = inode's place in the buffer */
- dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset);
+ dip = xfs_buf_offset(bp, ip->i_imap.im_boffset);
if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC),
mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) {
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 87f67c6b654c..ea7d85af5310 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -336,7 +336,7 @@ xfs_set_dmattrs(
tp = xfs_trans_alloc(mp, XFS_TRANS_SET_DMATTRS);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
if (error) {
- xfs_trans_cancel(tp, 0);
+ xfs_trans_cancel(tp);
return error;
}
xfs_ilock(ip, XFS_ILOCK_EXCL);
@@ -346,7 +346,7 @@ xfs_set_dmattrs(
ip->i_d.di_dmstate = state;
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
- error = xfs_trans_commit(tp, 0);
+ error = xfs_trans_commit(tp);
return error;
}
@@ -1076,7 +1076,7 @@ xfs_ioctl_setattr_get_trans(
return tp;
out_cancel:
- xfs_trans_cancel(tp, 0);
+ xfs_trans_cancel(tp);
return ERR_PTR(error);
}
@@ -1253,7 +1253,7 @@ xfs_ioctl_setattr(
else
ip->i_d.di_extsize = 0;
- code = xfs_trans_commit(tp, 0);
+ code = xfs_trans_commit(tp);
/*
* Release any dquot(s) the inode had kept before chown.
@@ -1265,7 +1265,7 @@ xfs_ioctl_setattr(
return code;
error_trans_cancel:
- xfs_trans_cancel(tp, 0);
+ xfs_trans_cancel(tp);
error_free_dquots:
xfs_qm_dqrele(udqp);
xfs_qm_dqrele(pdqp);
@@ -1338,11 +1338,11 @@ xfs_ioc_setxflags(
error = xfs_ioctl_setattr_xflags(tp, ip, &fa);
if (error) {
- xfs_trans_cancel(tp, 0);
+ xfs_trans_cancel(tp);
goto out_drop_write;
}
- error = xfs_trans_commit(tp, 0);
+ error = xfs_trans_commit(tp);
out_drop_write:
mnt_drop_write_file(filp);
return error;
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 38e633bad8c2..1f86033171c8 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -183,7 +183,7 @@ xfs_iomap_write_direct(
* Check for running out of space, note: need lock to return
*/
if (error) {
- xfs_trans_cancel(tp, 0);
+ xfs_trans_cancel(tp);
return error;
}
@@ -213,7 +213,7 @@ xfs_iomap_write_direct(
error = xfs_bmap_finish(&tp, &free_list, &committed);
if (error)
goto out_bmap_cancel;
- error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+ error = xfs_trans_commit(tp);
if (error)
goto out_unlock;
@@ -236,7 +236,7 @@ out_bmap_cancel:
xfs_bmap_cancel(&free_list);
xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag);
out_trans_cancel:
- xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
+ xfs_trans_cancel(tp);
goto out_unlock;
}
@@ -690,7 +690,7 @@ xfs_iomap_write_allocate(
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
nres, 0);
if (error) {
- xfs_trans_cancel(tp, 0);
+ xfs_trans_cancel(tp);
return error;
}
xfs_ilock(ip, XFS_ILOCK_EXCL);
@@ -760,7 +760,7 @@ xfs_iomap_write_allocate(
if (error)
goto trans_cancel;
- error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+ error = xfs_trans_commit(tp);
if (error)
goto error0;
@@ -791,7 +791,7 @@ xfs_iomap_write_allocate(
trans_cancel:
xfs_bmap_cancel(&free_list);
- xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
+ xfs_trans_cancel(tp);
error0:
xfs_iunlock(ip, XFS_ILOCK_EXCL);
return error;
@@ -853,7 +853,7 @@ xfs_iomap_write_unwritten(
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
resblks, 0);
if (error) {
- xfs_trans_cancel(tp, 0);
+ xfs_trans_cancel(tp);
return error;
}
@@ -890,7 +890,7 @@ xfs_iomap_write_unwritten(
if (error)
goto error_on_bmapi_transaction;
- error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+ error = xfs_trans_commit(tp);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
if (error)
return error;
@@ -914,7 +914,7 @@ xfs_iomap_write_unwritten(
error_on_bmapi_transaction:
xfs_bmap_cancel(&free_list);
- xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT));
+ xfs_trans_cancel(tp);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
return error;
}
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 7f51f39f8acc..766b23f86ce9 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -699,7 +699,7 @@ xfs_setattr_nonsize(
if (mp->m_flags & XFS_MOUNT_WSYNC)
xfs_trans_set_sync(tp);
- error = xfs_trans_commit(tp, 0);
+ error = xfs_trans_commit(tp);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
@@ -730,7 +730,7 @@ xfs_setattr_nonsize(
return 0;
out_trans_cancel:
- xfs_trans_cancel(tp, 0);
+ xfs_trans_cancel(tp);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
out_dqrele:
xfs_qm_dqrele(udqp);
@@ -752,7 +752,6 @@ xfs_setattr_size(
struct xfs_trans *tp;
int error;
uint lock_flags = 0;
- uint commit_flags = 0;
bool did_zeroing = false;
trace_xfs_setattr(ip);
@@ -848,7 +847,11 @@ xfs_setattr_size(
* to hope that the caller sees ENOMEM and retries the truncate
* operation.
*/
- error = block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks);
+ if (IS_DAX(inode))
+ error = dax_truncate_page(inode, newsize, xfs_get_blocks_direct);
+ else
+ error = block_truncate_page(inode->i_mapping, newsize,
+ xfs_get_blocks);
if (error)
return error;
truncate_setsize(inode, newsize);
@@ -858,7 +861,6 @@ xfs_setattr_size(
if (error)
goto out_trans_cancel;
- commit_flags = XFS_TRANS_RELEASE_LOG_RES;
lock_flags |= XFS_ILOCK_EXCL;
xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, 0);
@@ -898,7 +900,7 @@ xfs_setattr_size(
if (newsize <= oldsize) {
error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, newsize);
if (error)
- goto out_trans_abort;
+ goto out_trans_cancel;
/*
* Truncated "down", so we're removing references to old data
@@ -925,16 +927,14 @@ xfs_setattr_size(
if (mp->m_flags & XFS_MOUNT_WSYNC)
xfs_trans_set_sync(tp);
- error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+ error = xfs_trans_commit(tp);
out_unlock:
if (lock_flags)
xfs_iunlock(ip, lock_flags);
return error;
-out_trans_abort:
- commit_flags |= XFS_TRANS_ABORT;
out_trans_cancel:
- xfs_trans_cancel(tp, commit_flags);
+ xfs_trans_cancel(tp);
goto out_unlock;
}
@@ -981,7 +981,7 @@ xfs_vn_update_time(
tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0);
if (error) {
- xfs_trans_cancel(tp, 0);
+ xfs_trans_cancel(tp);
return error;
}
@@ -1003,7 +1003,7 @@ xfs_vn_update_time(
}
xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP);
- return xfs_trans_commit(tp, 0);
+ return xfs_trans_commit(tp);
}
#define XFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
@@ -1188,22 +1188,22 @@ xfs_diflags_to_iflags(
struct inode *inode,
struct xfs_inode *ip)
{
- if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE)
+ uint16_t flags = ip->i_d.di_flags;
+
+ inode->i_flags &= ~(S_IMMUTABLE | S_APPEND | S_SYNC |
+ S_NOATIME | S_DAX);
+
+ if (flags & XFS_DIFLAG_IMMUTABLE)
inode->i_flags |= S_IMMUTABLE;
- else
- inode->i_flags &= ~S_IMMUTABLE;
- if (ip->i_d.di_flags & XFS_DIFLAG_APPEND)
+ if (flags & XFS_DIFLAG_APPEND)
inode->i_flags |= S_APPEND;
- else
- inode->i_flags &= ~S_APPEND;
- if (ip->i_d.di_flags & XFS_DIFLAG_SYNC)
+ if (flags & XFS_DIFLAG_SYNC)
inode->i_flags |= S_SYNC;
- else
- inode->i_flags &= ~S_SYNC;
- if (ip->i_d.di_flags & XFS_DIFLAG_NOATIME)
+ if (flags & XFS_DIFLAG_NOATIME)
inode->i_flags |= S_NOATIME;
- else
- inode->i_flags &= ~S_NOATIME;
+ /* XXX: Also needs an on-disk per inode flag! */
+ if (ip->i_mount->m_flags & XFS_MOUNT_DAX)
+ inode->i_flags |= S_DAX;
}
/*
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 80429891dc9b..f41b0c3fddab 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -252,7 +252,7 @@ xfs_bulkstat_grab_ichunk(
}
irec->ir_free |= xfs_inobt_maskn(0, idx);
- *icount = XFS_INODES_PER_CHUNK - irec->ir_freecount;
+ *icount = irec->ir_count - irec->ir_freecount;
}
return 0;
@@ -415,6 +415,8 @@ xfs_bulkstat(
goto del_cursor;
if (icount) {
irbp->ir_startino = r.ir_startino;
+ irbp->ir_holemask = r.ir_holemask;
+ irbp->ir_count = r.ir_count;
irbp->ir_freecount = r.ir_freecount;
irbp->ir_free = r.ir_free;
irbp++;
@@ -447,13 +449,15 @@ xfs_bulkstat(
* If this chunk has any allocated inodes, save it.
* Also start read-ahead now for this chunk.
*/
- if (r.ir_freecount < XFS_INODES_PER_CHUNK) {
+ if (r.ir_freecount < r.ir_count) {
xfs_bulkstat_ichunk_ra(mp, agno, &r);
irbp->ir_startino = r.ir_startino;
+ irbp->ir_holemask = r.ir_holemask;
+ irbp->ir_count = r.ir_count;
irbp->ir_freecount = r.ir_freecount;
irbp->ir_free = r.ir_free;
irbp++;
- icount += XFS_INODES_PER_CHUNK - r.ir_freecount;
+ icount += r.ir_count - r.ir_freecount;
}
error = xfs_btree_increment(cur, 0, &stat);
if (error || stat == 0) {
@@ -599,8 +603,7 @@ xfs_inumbers(
agino = r.ir_startino + XFS_INODES_PER_CHUNK - 1;
buffer[bufidx].xi_startino =
XFS_AGINO_TO_INO(mp, agno, r.ir_startino);
- buffer[bufidx].xi_alloccount =
- XFS_INODES_PER_CHUNK - r.ir_freecount;
+ buffer[bufidx].xi_alloccount = r.ir_count - r.ir_freecount;
buffer[bufidx].xi_allocmask = ~r.ir_free;
if (++bufidx == bcount) {
long written;
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index 7c7842c85a08..85f883dd6207 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -32,26 +32,12 @@ typedef unsigned int __uint32_t;
typedef signed long long int __int64_t;
typedef unsigned long long int __uint64_t;
-typedef __uint32_t inst_t; /* an instruction */
-
typedef __s64 xfs_off_t; /* <file offset> type */
typedef unsigned long long xfs_ino_t; /* <inode> type */
typedef __s64 xfs_daddr_t; /* <disk address> type */
-typedef char * xfs_caddr_t; /* <core address> type */
typedef __u32 xfs_dev_t;
typedef __u32 xfs_nlink_t;
-/* __psint_t is the same size as a pointer */
-#if (BITS_PER_LONG == 32)
-typedef __int32_t __psint_t;
-typedef __uint32_t __psunsigned_t;
-#elif (BITS_PER_LONG == 64)
-typedef __int64_t __psint_t;
-typedef __uint64_t __psunsigned_t;
-#else
-#error BITS_PER_LONG must be 32 or 64
-#endif
-
#include "xfs_types.h"
#include "kmem.h"
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index bcc7cfabb787..08d4fe46f0fa 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -109,7 +109,7 @@ xlog_ungrant_log_space(
STATIC void
xlog_verify_dest_ptr(
struct xlog *log,
- char *ptr);
+ void *ptr);
STATIC void
xlog_verify_grant_tail(
struct xlog *log);
@@ -513,7 +513,7 @@ xfs_log_done(
struct xfs_mount *mp,
struct xlog_ticket *ticket,
struct xlog_in_core **iclog,
- uint flags)
+ bool regrant)
{
struct xlog *log = mp->m_log;
xfs_lsn_t lsn = 0;
@@ -526,14 +526,11 @@ xfs_log_done(
(((ticket->t_flags & XLOG_TIC_INITED) == 0) &&
(xlog_commit_record(log, ticket, iclog, &lsn)))) {
lsn = (xfs_lsn_t) -1;
- if (ticket->t_flags & XLOG_TIC_PERM_RESERV) {
- flags |= XFS_LOG_REL_PERM_RESERV;
- }
+ regrant = false;
}
- if ((ticket->t_flags & XLOG_TIC_PERM_RESERV) == 0 ||
- (flags & XFS_LOG_REL_PERM_RESERV)) {
+ if (!regrant) {
trace_xfs_log_done_nonperm(log, ticket);
/*
@@ -541,7 +538,6 @@ xfs_log_done(
* request has been made to release a permanent reservation.
*/
xlog_ungrant_log_space(log, ticket);
- xfs_log_ticket_put(ticket);
} else {
trace_xfs_log_done_perm(log, ticket);
@@ -553,6 +549,7 @@ xfs_log_done(
ticket->t_flags |= XLOG_TIC_INITED;
}
+ xfs_log_ticket_put(ticket);
return lsn;
}
@@ -1447,7 +1444,7 @@ xlog_alloc_log(
iclog->ic_bp = bp;
iclog->ic_data = bp->b_addr;
#ifdef DEBUG
- log->l_iclog_bak[i] = (xfs_caddr_t)&(iclog->ic_header);
+ log->l_iclog_bak[i] = &iclog->ic_header;
#endif
head = &iclog->ic_header;
memset(head, 0, sizeof(xlog_rec_header_t));
@@ -1602,7 +1599,7 @@ xlog_pack_data(
int i, j, k;
int size = iclog->ic_offset + roundoff;
__be32 cycle_lsn;
- xfs_caddr_t dp;
+ char *dp;
cycle_lsn = CYCLE_LSN_DISK(iclog->ic_header.h_lsn);
@@ -3664,7 +3661,7 @@ xlog_ticket_alloc(
void
xlog_verify_dest_ptr(
struct xlog *log,
- char *ptr)
+ void *ptr)
{
int i;
int good_ptr = 0;
@@ -3767,9 +3764,8 @@ xlog_verify_iclog(
xlog_op_header_t *ophead;
xlog_in_core_t *icptr;
xlog_in_core_2_t *xhdr;
- xfs_caddr_t ptr;
- xfs_caddr_t base_ptr;
- __psint_t field_offset;
+ void *base_ptr, *ptr, *p;
+ ptrdiff_t field_offset;
__uint8_t clientid;
int len, i, j, k, op_len;
int idx;
@@ -3788,9 +3784,9 @@ xlog_verify_iclog(
if (iclog->ic_header.h_magicno != cpu_to_be32(XLOG_HEADER_MAGIC_NUM))
xfs_emerg(log->l_mp, "%s: invalid magic num", __func__);
- ptr = (xfs_caddr_t) &iclog->ic_header;
- for (ptr += BBSIZE; ptr < ((xfs_caddr_t)&iclog->ic_header) + count;
- ptr += BBSIZE) {
+ base_ptr = ptr = &iclog->ic_header;
+ p = &iclog->ic_header;
+ for (ptr += BBSIZE; ptr < base_ptr + count; ptr += BBSIZE) {
if (*(__be32 *)ptr == cpu_to_be32(XLOG_HEADER_MAGIC_NUM))
xfs_emerg(log->l_mp, "%s: unexpected magic num",
__func__);
@@ -3798,20 +3794,19 @@ xlog_verify_iclog(
/* check fields */
len = be32_to_cpu(iclog->ic_header.h_num_logops);
- ptr = iclog->ic_datap;
- base_ptr = ptr;
- ophead = (xlog_op_header_t *)ptr;
+ base_ptr = ptr = iclog->ic_datap;
+ ophead = ptr;
xhdr = iclog->ic_data;
for (i = 0; i < len; i++) {
- ophead = (xlog_op_header_t *)ptr;
+ ophead = ptr;
/* clientid is only 1 byte */
- field_offset = (__psint_t)
- ((xfs_caddr_t)&(ophead->oh_clientid) - base_ptr);
+ p = &ophead->oh_clientid;
+ field_offset = p - base_ptr;
if (!syncing || (field_offset & 0x1ff)) {
clientid = ophead->oh_clientid;
} else {
- idx = BTOBBT((xfs_caddr_t)&(ophead->oh_clientid) - iclog->ic_datap);
+ idx = BTOBBT((char *)&ophead->oh_clientid - iclog->ic_datap);
if (idx >= (XLOG_HEADER_CYCLE_SIZE / BBSIZE)) {
j = idx / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
k = idx % (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
@@ -3829,13 +3824,13 @@ xlog_verify_iclog(
(unsigned long)field_offset);
/* check length */
- field_offset = (__psint_t)
- ((xfs_caddr_t)&(ophead->oh_len) - base_ptr);
+ p = &ophead->oh_len;
+ field_offset = p - base_ptr;
if (!syncing || (field_offset & 0x1ff)) {
op_len = be32_to_cpu(ophead->oh_len);
} else {
- idx = BTOBBT((__psint_t)&ophead->oh_len -
- (__psint_t)iclog->ic_datap);
+ idx = BTOBBT((uintptr_t)&ophead->oh_len -
+ (uintptr_t)iclog->ic_datap);
if (idx >= (XLOG_HEADER_CYCLE_SIZE / BBSIZE)) {
j = idx / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
k = idx % (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 84e0deb95abd..fa27aaec72cb 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -111,15 +111,6 @@ static inline xfs_lsn_t _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2)
#define XFS_LSN_CMP(x,y) _lsn_cmp(x,y)
/*
- * Macros, structures, prototypes for interface to the log manager.
- */
-
-/*
- * Flags to xfs_log_done()
- */
-#define XFS_LOG_REL_PERM_RESERV 0x1
-
-/*
* Flags to xfs_log_force()
*
* XFS_LOG_SYNC: Synchronous force in-core log to disk
@@ -138,7 +129,7 @@ struct xfs_log_callback;
xfs_lsn_t xfs_log_done(struct xfs_mount *mp,
struct xlog_ticket *ticket,
struct xlog_in_core **iclog,
- uint flags);
+ bool regrant);
int _xfs_log_force(struct xfs_mount *mp,
uint flags,
int *log_forced);
@@ -183,7 +174,7 @@ struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket);
void xfs_log_ticket_put(struct xlog_ticket *ticket);
void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
- xfs_lsn_t *commit_lsn, int flags);
+ xfs_lsn_t *commit_lsn, bool regrant);
bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip);
void xfs_log_work_queue(struct xfs_mount *mp);
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 45cc0ce18adf..abc2ccbff739 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -624,7 +624,7 @@ restart:
spin_unlock(&cil->xc_push_lock);
/* xfs_log_done always frees the ticket on error. */
- commit_lsn = xfs_log_done(log->l_mp, tic, &commit_iclog, 0);
+ commit_lsn = xfs_log_done(log->l_mp, tic, &commit_iclog, false);
if (commit_lsn == -1)
goto out_abort;
@@ -773,14 +773,10 @@ xfs_log_commit_cil(
struct xfs_mount *mp,
struct xfs_trans *tp,
xfs_lsn_t *commit_lsn,
- int flags)
+ bool regrant)
{
struct xlog *log = mp->m_log;
struct xfs_cil *cil = log->l_cilp;
- int log_flags = 0;
-
- if (flags & XFS_TRANS_RELEASE_LOG_RES)
- log_flags = XFS_LOG_REL_PERM_RESERV;
/* lock out background commit */
down_read(&cil->xc_ctx_lock);
@@ -795,7 +791,7 @@ xfs_log_commit_cil(
if (commit_lsn)
*commit_lsn = tp->t_commit_lsn;
- xfs_log_done(mp, tp->t_ticket, NULL, log_flags);
+ xfs_log_done(mp, tp->t_ticket, NULL, regrant);
xfs_trans_unreserve_and_mod_sb(tp);
/*
@@ -809,7 +805,7 @@ xfs_log_commit_cil(
* the log items. This affects (at least) processing of stale buffers,
* inodes and EFIs.
*/
- xfs_trans_free_items(tp, tp->t_commit_lsn, 0);
+ xfs_trans_free_items(tp, tp->t_commit_lsn, false);
xlog_cil_push_background(log);
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index db7cbdeb2b42..1c87c8abfbed 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -409,7 +409,7 @@ struct xlog {
/* The following field are used for debugging; need to hold icloglock */
#ifdef DEBUG
- char *l_iclog_bak[XLOG_MAX_ICLOGS];
+ void *l_iclog_bak[XLOG_MAX_ICLOGS];
#endif
};
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 4f5784f85a5b..01dd228ca05e 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -147,7 +147,7 @@ xlog_put_bp(
* Return the address of the start of the given block number's data
* in a log buffer. The buffer covers a log sector-aligned region.
*/
-STATIC xfs_caddr_t
+STATIC char *
xlog_align(
struct xlog *log,
xfs_daddr_t blk_no,
@@ -203,7 +203,7 @@ xlog_bread(
xfs_daddr_t blk_no,
int nbblks,
struct xfs_buf *bp,
- xfs_caddr_t *offset)
+ char **offset)
{
int error;
@@ -225,9 +225,9 @@ xlog_bread_offset(
xfs_daddr_t blk_no, /* block to read from */
int nbblks, /* blocks to read */
struct xfs_buf *bp,
- xfs_caddr_t offset)
+ char *offset)
{
- xfs_caddr_t orig_offset = bp->b_addr;
+ char *orig_offset = bp->b_addr;
int orig_len = BBTOB(bp->b_length);
int error, error2;
@@ -396,7 +396,7 @@ xlog_find_cycle_start(
xfs_daddr_t *last_blk,
uint cycle)
{
- xfs_caddr_t offset;
+ char *offset;
xfs_daddr_t mid_blk;
xfs_daddr_t end_blk;
uint mid_cycle;
@@ -443,7 +443,7 @@ xlog_find_verify_cycle(
uint cycle;
xfs_buf_t *bp;
xfs_daddr_t bufblks;
- xfs_caddr_t buf = NULL;
+ char *buf = NULL;
int error = 0;
/*
@@ -509,7 +509,7 @@ xlog_find_verify_log_record(
{
xfs_daddr_t i;
xfs_buf_t *bp;
- xfs_caddr_t offset = NULL;
+ char *offset = NULL;
xlog_rec_header_t *head = NULL;
int error = 0;
int smallmem = 0;
@@ -616,7 +616,7 @@ xlog_find_head(
xfs_daddr_t *return_head_blk)
{
xfs_buf_t *bp;
- xfs_caddr_t offset;
+ char *offset;
xfs_daddr_t new_blk, first_blk, start_blk, last_blk, head_blk;
int num_scan_bblks;
uint first_half_cycle, last_half_cycle;
@@ -891,7 +891,7 @@ xlog_find_tail(
{
xlog_rec_header_t *rhead;
xlog_op_header_t *op_head;
- xfs_caddr_t offset = NULL;
+ char *offset = NULL;
xfs_buf_t *bp;
int error, i, found;
xfs_daddr_t umount_data_blk;
@@ -1099,7 +1099,7 @@ xlog_find_zeroed(
xfs_daddr_t *blk_no)
{
xfs_buf_t *bp;
- xfs_caddr_t offset;
+ char *offset;
uint first_cycle, last_cycle;
xfs_daddr_t new_blk, last_blk, start_blk;
xfs_daddr_t num_scan_bblks;
@@ -1199,7 +1199,7 @@ bp_err:
STATIC void
xlog_add_record(
struct xlog *log,
- xfs_caddr_t buf,
+ char *buf,
int cycle,
int block,
int tail_cycle,
@@ -1227,7 +1227,7 @@ xlog_write_log_records(
int tail_cycle,
int tail_block)
{
- xfs_caddr_t offset;
+ char *offset;
xfs_buf_t *bp;
int balign, ealign;
int sectbb = log->l_sectBBsize;
@@ -1789,8 +1789,7 @@ xlog_recover_do_inode_buffer(
return -EFSCORRUPTED;
}
- buffer_nextp = (xfs_agino_t *)xfs_buf_offset(bp,
- next_unlinked_offset);
+ buffer_nextp = xfs_buf_offset(bp, next_unlinked_offset);
*buffer_nextp = *logged_nextp;
/*
@@ -1798,7 +1797,7 @@ xlog_recover_do_inode_buffer(
* have to leave the inode in a consistent state for whoever
* reads it next....
*/
- xfs_dinode_calc_crc(mp, (struct xfs_dinode *)
+ xfs_dinode_calc_crc(mp,
xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize));
}
@@ -2503,8 +2502,8 @@ xlog_recover_inode_pass2(
xfs_buf_t *bp;
xfs_dinode_t *dip;
int len;
- xfs_caddr_t src;
- xfs_caddr_t dest;
+ char *src;
+ char *dest;
int error;
int attr_index;
uint fields;
@@ -2546,7 +2545,7 @@ xlog_recover_inode_pass2(
goto out_release;
}
ASSERT(in_f->ilf_fields & XFS_ILOG_CORE);
- dip = (xfs_dinode_t *)xfs_buf_offset(bp, in_f->ilf_boffset);
+ dip = xfs_buf_offset(bp, in_f->ilf_boffset);
/*
* Make sure the place we're flushing out to really looks
@@ -2885,7 +2884,7 @@ xlog_recover_dquot_pass2(
return error;
ASSERT(bp);
- ddq = (xfs_disk_dquot_t *)xfs_buf_offset(bp, dq_f->qlf_boffset);
+ ddq = xfs_buf_offset(bp, dq_f->qlf_boffset);
/*
* If the dquot has an LSN in it, recover the dquot only if it's less
@@ -3068,12 +3067,22 @@ xlog_recover_do_icreate_pass2(
return -EINVAL;
}
- /* existing allocation is fixed value */
- ASSERT(count == mp->m_ialloc_inos);
- ASSERT(length == mp->m_ialloc_blks);
- if (count != mp->m_ialloc_inos ||
- length != mp->m_ialloc_blks) {
- xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad count 2");
+ /*
+ * The inode chunk is either full or sparse and we only support
+ * m_ialloc_min_blks sized sparse allocations at this time.
+ */
+ if (length != mp->m_ialloc_blks &&
+ length != mp->m_ialloc_min_blks) {
+ xfs_warn(log->l_mp,
+ "%s: unsupported chunk length", __FUNCTION__);
+ return -EINVAL;
+ }
+
+ /* verify inode count is consistent with extent length */
+ if ((count >> mp->m_sb.sb_inopblog) != length) {
+ xfs_warn(log->l_mp,
+ "%s: inconsistent inode count and chunk length",
+ __FUNCTION__);
return -EINVAL;
}
@@ -3091,8 +3100,8 @@ xlog_recover_do_icreate_pass2(
XFS_AGB_TO_DADDR(mp, agno, agbno), length, 0))
return 0;
- xfs_ialloc_inode_init(mp, NULL, buffer_list, agno, agbno, length,
- be32_to_cpu(icl->icl_gen));
+ xfs_ialloc_inode_init(mp, NULL, buffer_list, count, agno, agbno, length,
+ be32_to_cpu(icl->icl_gen));
return 0;
}
@@ -3364,17 +3373,17 @@ STATIC int
xlog_recover_add_to_cont_trans(
struct xlog *log,
struct xlog_recover *trans,
- xfs_caddr_t dp,
+ char *dp,
int len)
{
xlog_recover_item_t *item;
- xfs_caddr_t ptr, old_ptr;
+ char *ptr, *old_ptr;
int old_len;
if (list_empty(&trans->r_itemq)) {
/* finish copying rest of trans header */
xlog_recover_add_item(&trans->r_itemq);
- ptr = (xfs_caddr_t) &trans->r_theader +
+ ptr = (char *)&trans->r_theader +
sizeof(xfs_trans_header_t) - len;
memcpy(ptr, dp, len);
return 0;
@@ -3410,12 +3419,12 @@ STATIC int
xlog_recover_add_to_trans(
struct xlog *log,
struct xlog_recover *trans,
- xfs_caddr_t dp,
+ char *dp,
int len)
{
xfs_inode_log_format_t *in_f; /* any will do */
xlog_recover_item_t *item;
- xfs_caddr_t ptr;
+ char *ptr;
if (!len)
return 0;
@@ -3504,7 +3513,7 @@ STATIC int
xlog_recovery_process_trans(
struct xlog *log,
struct xlog_recover *trans,
- xfs_caddr_t dp,
+ char *dp,
unsigned int len,
unsigned int flags,
int pass)
@@ -3611,8 +3620,8 @@ xlog_recover_process_ophdr(
struct hlist_head rhash[],
struct xlog_rec_header *rhead,
struct xlog_op_header *ohead,
- xfs_caddr_t dp,
- xfs_caddr_t end,
+ char *dp,
+ char *end,
int pass)
{
struct xlog_recover *trans;
@@ -3661,11 +3670,11 @@ xlog_recover_process_data(
struct xlog *log,
struct hlist_head rhash[],
struct xlog_rec_header *rhead,
- xfs_caddr_t dp,
+ char *dp,
int pass)
{
struct xlog_op_header *ohead;
- xfs_caddr_t end;
+ char *end;
int num_logops;
int error;
@@ -3751,11 +3760,11 @@ xlog_recover_process_efi(
}
set_bit(XFS_EFI_RECOVERED, &efip->efi_flags);
- error = xfs_trans_commit(tp, 0);
+ error = xfs_trans_commit(tp);
return error;
abort_error:
- xfs_trans_cancel(tp, XFS_TRANS_ABORT);
+ xfs_trans_cancel(tp);
return error;
}
@@ -3857,13 +3866,13 @@ xlog_recover_clear_agi_bucket(
xfs_trans_log_buf(tp, agibp, offset,
(offset + sizeof(xfs_agino_t) - 1));
- error = xfs_trans_commit(tp, 0);
+ error = xfs_trans_commit(tp);
if (error)
goto out_error;
return;
out_abort:
- xfs_trans_cancel(tp, XFS_TRANS_ABORT);
+ xfs_trans_cancel(tp);
out_error:
xfs_warn(mp, "%s: failed to clear agi %d. Continuing.", __func__, agno);
return;
@@ -4010,7 +4019,7 @@ xlog_recover_process_iunlinks(
STATIC int
xlog_unpack_data_crc(
struct xlog_rec_header *rhead,
- xfs_caddr_t dp,
+ char *dp,
struct xlog *log)
{
__le32 crc;
@@ -4040,7 +4049,7 @@ xlog_unpack_data_crc(
STATIC int
xlog_unpack_data(
struct xlog_rec_header *rhead,
- xfs_caddr_t dp,
+ char *dp,
struct xlog *log)
{
int i, j, k;
@@ -4122,7 +4131,7 @@ xlog_do_recovery_pass(
{
xlog_rec_header_t *rhead;
xfs_daddr_t blk_no;
- xfs_caddr_t offset;
+ char *offset;
xfs_buf_t *hbp, *dbp;
int error = 0, h_size;
int bblks, split_bblks;
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 6f23fbdfb365..461e791efad7 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -725,6 +725,22 @@ xfs_mountfs(
}
/*
+ * If enabled, sparse inode chunk alignment is expected to match the
+ * cluster size. Full inode chunk alignment must match the chunk size,
+ * but that is checked on sb read verification...
+ */
+ if (xfs_sb_version_hassparseinodes(&mp->m_sb) &&
+ mp->m_sb.sb_spino_align !=
+ XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size)) {
+ xfs_warn(mp,
+ "Sparse inode block alignment (%u) must match cluster size (%llu).",
+ mp->m_sb.sb_spino_align,
+ XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size));
+ error = -EINVAL;
+ goto out_remove_uuid;
+ }
+
+ /*
* Set inode alignment fields
*/
xfs_set_inoalignment(mp);
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 8c995a2ccb6f..7999e91cd49a 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -101,6 +101,8 @@ typedef struct xfs_mount {
__uint64_t m_flags; /* global mount flags */
int m_ialloc_inos; /* inodes in inode allocation */
int m_ialloc_blks; /* blocks in inode allocation */
+ int m_ialloc_min_blks;/* min blocks in sparse inode
+ * allocation */
int m_inoalign_mask;/* mask sb_inoalignmt if used */
uint m_qflags; /* quota status flags */
struct xfs_trans_resv m_resv; /* precomputed res values */
@@ -179,6 +181,8 @@ typedef struct xfs_mount {
allocator */
#define XFS_MOUNT_NOATTR2 (1ULL << 25) /* disable use of attr2 format */
+#define XFS_MOUNT_DAX (1ULL << 62) /* TEST ONLY! */
+
/*
* Default minimum read and write sizes.
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
index 981a657eca39..ab4a6066f7ca 100644
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -306,7 +306,7 @@ xfs_fs_commit_blocks(
tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
if (error) {
- xfs_trans_cancel(tp, 0);
+ xfs_trans_cancel(tp);
goto out_drop_iolock;
}
@@ -321,7 +321,7 @@ xfs_fs_commit_blocks(
}
xfs_trans_set_sync(tp);
- error = xfs_trans_commit(tp, 0);
+ error = xfs_trans_commit(tp);
out_drop_iolock:
xfs_iunlock(ip, XFS_IOLOCK_EXCL);
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 5538468c7f63..eac9549efd52 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -756,7 +756,7 @@ xfs_qm_qino_alloc(
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_create,
XFS_QM_QINOCREATE_SPACE_RES(mp), 0);
if (error) {
- xfs_trans_cancel(tp, 0);
+ xfs_trans_cancel(tp);
return error;
}
@@ -764,8 +764,7 @@ xfs_qm_qino_alloc(
error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip,
&committed);
if (error) {
- xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
- XFS_TRANS_ABORT);
+ xfs_trans_cancel(tp);
return error;
}
}
@@ -796,7 +795,7 @@ xfs_qm_qino_alloc(
spin_unlock(&mp->m_sb_lock);
xfs_log_sb(tp);
- error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+ error = xfs_trans_commit(tp);
if (error) {
ASSERT(XFS_FORCED_SHUTDOWN(mp));
xfs_alert(mp, "%s failed (error %d)!", __func__, error);
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index 9a25c9275fb3..3640c6e896af 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -239,7 +239,7 @@ xfs_qm_scall_trunc_qfile(
tp = xfs_trans_alloc(mp, XFS_TRANS_TRUNCATE_FILE);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
if (error) {
- xfs_trans_cancel(tp, 0);
+ xfs_trans_cancel(tp);
xfs_iunlock(ip, XFS_IOLOCK_EXCL);
goto out_put;
}
@@ -252,15 +252,14 @@ xfs_qm_scall_trunc_qfile(
error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, 0);
if (error) {
- xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
- XFS_TRANS_ABORT);
+ xfs_trans_cancel(tp);
goto out_unlock;
}
ASSERT(ip->i_d.di_nextents == 0);
xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
- error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+ error = xfs_trans_commit(tp);
out_unlock:
xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
@@ -437,7 +436,7 @@ xfs_qm_scall_setqlim(
tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_setqlim, 0, 0);
if (error) {
- xfs_trans_cancel(tp, 0);
+ xfs_trans_cancel(tp);
goto out_rele;
}
@@ -548,7 +547,7 @@ xfs_qm_scall_setqlim(
dqp->dq_flags |= XFS_DQ_DIRTY;
xfs_trans_log_dquot(tp, dqp);
- error = xfs_trans_commit(tp, 0);
+ error = xfs_trans_commit(tp);
out_rele:
xfs_qm_dqrele(dqp);
@@ -571,7 +570,7 @@ xfs_qm_log_quotaoff_end(
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_equotaoff, 0, 0);
if (error) {
- xfs_trans_cancel(tp, 0);
+ xfs_trans_cancel(tp);
return error;
}
@@ -585,8 +584,7 @@ xfs_qm_log_quotaoff_end(
* We don't care about quotoff's performance.
*/
xfs_trans_set_sync(tp);
- error = xfs_trans_commit(tp, 0);
- return error;
+ return xfs_trans_commit(tp);
}
@@ -605,7 +603,7 @@ xfs_qm_log_quotaoff(
tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_quotaoff, 0, 0);
if (error) {
- xfs_trans_cancel(tp, 0);
+ xfs_trans_cancel(tp);
goto out;
}
@@ -624,7 +622,7 @@ xfs_qm_log_quotaoff(
* We don't care about quotoff's performance.
*/
xfs_trans_set_sync(tp);
- error = xfs_trans_commit(tp, 0);
+ error = xfs_trans_commit(tp);
if (error)
goto out;
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index 5376dd406ba2..ce6506adab7b 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -55,7 +55,6 @@ struct xfs_trans;
typedef struct xfs_dqtrx {
struct xfs_dquot *qt_dquot; /* the dquot this refers to */
ulong qt_blk_res; /* blks reserved on a dquot */
- ulong qt_blk_res_used; /* blks used from the reservation */
ulong qt_ino_res; /* inode reserved on a dquot */
ulong qt_ino_res_used; /* inodes used from the reservation */
long qt_bcount_delta; /* dquot blk count changes */
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index f2079b6911cc..f4e8c06eee26 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -780,7 +780,6 @@ xfs_growfs_rt_alloc(
* Allocate space to the file, as necessary.
*/
while (oblocks < nblocks) {
- int cancelflags = 0;
xfs_trans_t *tp;
tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFSRT_ALLOC);
@@ -792,7 +791,6 @@ xfs_growfs_rt_alloc(
resblks, 0);
if (error)
goto error_cancel;
- cancelflags = XFS_TRANS_RELEASE_LOG_RES;
/*
* Lock the inode.
*/
@@ -804,7 +802,6 @@ xfs_growfs_rt_alloc(
* Allocate blocks to the bitmap file.
*/
nmap = 1;
- cancelflags |= XFS_TRANS_ABORT;
error = xfs_bmapi_write(tp, ip, oblocks, nblocks - oblocks,
XFS_BMAPI_METADATA, &firstblock,
resblks, &map, &nmap, &flist);
@@ -818,14 +815,13 @@ xfs_growfs_rt_alloc(
error = xfs_bmap_finish(&tp, &flist, &committed);
if (error)
goto error_cancel;
- error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+ error = xfs_trans_commit(tp);
if (error)
goto error;
/*
* Now we need to clear the allocated blocks.
* Do this one block per transaction, to keep it simple.
*/
- cancelflags = 0;
for (bno = map.br_startoff, fsbno = map.br_startblock;
bno < map.br_startoff + map.br_blockcount;
bno++, fsbno++) {
@@ -851,7 +847,7 @@ xfs_growfs_rt_alloc(
if (bp == NULL) {
error = -EIO;
error_cancel:
- xfs_trans_cancel(tp, cancelflags);
+ xfs_trans_cancel(tp);
goto error;
}
memset(bp->b_addr, 0, mp->m_sb.sb_blocksize);
@@ -859,7 +855,7 @@ error_cancel:
/*
* Commit the transaction.
*/
- error = xfs_trans_commit(tp, 0);
+ error = xfs_trans_commit(tp);
if (error)
goto error;
}
@@ -973,7 +969,6 @@ xfs_growfs_rt(
bmbno < nrbmblocks;
bmbno++) {
xfs_trans_t *tp;
- int cancelflags = 0;
*nmp = *mp;
nsbp = &nmp->m_sb;
@@ -1015,7 +1010,6 @@ xfs_growfs_rt(
mp->m_rbmip->i_d.di_size =
nsbp->sb_rbmblocks * nsbp->sb_blocksize;
xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE);
- cancelflags |= XFS_TRANS_ABORT;
/*
* Get the summary inode into the transaction.
*/
@@ -1062,7 +1056,7 @@ xfs_growfs_rt(
nsbp->sb_rextents - sbp->sb_rextents, &bp, &sumbno);
if (error) {
error_cancel:
- xfs_trans_cancel(tp, cancelflags);
+ xfs_trans_cancel(tp);
break;
}
/*
@@ -1076,7 +1070,7 @@ error_cancel:
mp->m_rsumlevels = nrsumlevels;
mp->m_rsumsize = nrsumsize;
- error = xfs_trans_commit(tp, 0);
+ error = xfs_trans_commit(tp);
if (error)
break;
}
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 858e1e62bbaa..1fb16562c159 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -112,6 +112,8 @@ static struct xfs_kobj xfs_dbg_kobj; /* global debug sysfs attrs */
#define MNTOPT_DISCARD "discard" /* Discard unused blocks */
#define MNTOPT_NODISCARD "nodiscard" /* Do not discard unused blocks */
+#define MNTOPT_DAX "dax" /* Enable direct access to bdev pages */
+
/*
* Table driven mount option parser.
*
@@ -363,6 +365,10 @@ xfs_parseargs(
mp->m_flags |= XFS_MOUNT_DISCARD;
} else if (!strcmp(this_char, MNTOPT_NODISCARD)) {
mp->m_flags &= ~XFS_MOUNT_DISCARD;
+#ifdef CONFIG_FS_DAX
+ } else if (!strcmp(this_char, MNTOPT_DAX)) {
+ mp->m_flags |= XFS_MOUNT_DAX;
+#endif
} else {
xfs_warn(mp, "unknown mount option [%s].", this_char);
return -EINVAL;
@@ -452,8 +458,8 @@ done:
}
struct proc_xfs_info {
- int flag;
- char *str;
+ uint64_t flag;
+ char *str;
};
STATIC int
@@ -474,6 +480,7 @@ xfs_showargs(
{ XFS_MOUNT_GRPID, "," MNTOPT_GRPID },
{ XFS_MOUNT_DISCARD, "," MNTOPT_DISCARD },
{ XFS_MOUNT_SMALL_INUMS, "," MNTOPT_32BITINODE },
+ { XFS_MOUNT_DAX, "," MNTOPT_DAX },
{ 0, NULL }
};
static struct proc_xfs_info xfs_info_unset[] = {
@@ -1507,6 +1514,20 @@ xfs_fs_fill_super(
if (XFS_SB_VERSION_NUM(&mp->m_sb) == XFS_SB_VERSION_5)
sb->s_flags |= MS_I_VERSION;
+ if (mp->m_flags & XFS_MOUNT_DAX) {
+ xfs_warn(mp,
+ "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
+ if (sb->s_blocksize != PAGE_SIZE) {
+ xfs_alert(mp,
+ "Filesystem block size invalid for DAX Turning DAX off.");
+ mp->m_flags &= ~XFS_MOUNT_DAX;
+ } else if (!sb->s_bdev->bd_disk->fops->direct_access) {
+ xfs_alert(mp,
+ "Block device does not support DAX Turning DAX off.");
+ mp->m_flags &= ~XFS_MOUNT_DAX;
+ }
+ }
+
error = xfs_mountfs(mp);
if (error)
goto out_filestream_unmount;
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index 3df411eadb86..4be27b0210af 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -104,7 +104,7 @@ xfs_readlink_bmap(
cur_chunk += sizeof(struct xfs_dsymlink_hdr);
}
- memcpy(link + offset, bp->b_addr, byte_cnt);
+ memcpy(link + offset, cur_chunk, byte_cnt);
pathlen -= byte_cnt;
offset += byte_cnt;
@@ -178,7 +178,6 @@ xfs_symlink(
struct xfs_bmap_free free_list;
xfs_fsblock_t first_block;
bool unlock_dp_on_error = false;
- uint cancel_flags;
int committed;
xfs_fileoff_t first_fsb;
xfs_filblks_t fs_blocks;
@@ -224,7 +223,6 @@ xfs_symlink(
return error;
tp = xfs_trans_alloc(mp, XFS_TRANS_SYMLINK);
- cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
/*
* The symlink will fit into the inode data fork?
* There can't be any attributes so we get the whole variable part.
@@ -239,10 +237,8 @@ xfs_symlink(
resblks = 0;
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_symlink, 0, 0);
}
- if (error) {
- cancel_flags = 0;
+ if (error)
goto out_trans_cancel;
- }
xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
unlock_dp_on_error = true;
@@ -394,7 +390,7 @@ xfs_symlink(
if (error)
goto out_bmap_cancel;
- error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+ error = xfs_trans_commit(tp);
if (error)
goto out_release_inode;
@@ -407,9 +403,8 @@ xfs_symlink(
out_bmap_cancel:
xfs_bmap_cancel(&free_list);
- cancel_flags |= XFS_TRANS_ABORT;
out_trans_cancel:
- xfs_trans_cancel(tp, cancel_flags);
+ xfs_trans_cancel(tp);
out_release_inode:
/*
* Wait until after the current transaction is aborted to finish the
@@ -464,7 +459,7 @@ xfs_inactive_symlink_rmt(
tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
if (error) {
- xfs_trans_cancel(tp, 0);
+ xfs_trans_cancel(tp);
return error;
}
@@ -533,7 +528,7 @@ xfs_inactive_symlink_rmt(
/*
* Commit the transaction containing extent freeing and EFDs.
*/
- error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+ error = xfs_trans_commit(tp);
if (error) {
ASSERT(XFS_FORCED_SHUTDOWN(mp));
goto error_unlock;
@@ -552,7 +547,7 @@ xfs_inactive_symlink_rmt(
error_bmap_cancel:
xfs_bmap_cancel(&free_list);
error_trans_cancel:
- xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
+ xfs_trans_cancel(tp);
error_unlock:
xfs_iunlock(ip, XFS_ILOCK_EXCL);
return error;
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 615781bf4ee5..8d916d33d93d 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -738,6 +738,53 @@ TRACE_EVENT(xfs_iomap_prealloc_size,
__entry->blocks, __entry->shift, __entry->writeio_blocks)
)
+TRACE_EVENT(xfs_irec_merge_pre,
+ TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agino_t agino,
+ uint16_t holemask, xfs_agino_t nagino, uint16_t nholemask),
+ TP_ARGS(mp, agno, agino, holemask, nagino, nholemask),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_agnumber_t, agno)
+ __field(xfs_agino_t, agino)
+ __field(uint16_t, holemask)
+ __field(xfs_agino_t, nagino)
+ __field(uint16_t, nholemask)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->agno = agno;
+ __entry->agino = agino;
+ __entry->holemask = holemask;
+ __entry->nagino = nagino;
+ __entry->nholemask = holemask;
+ ),
+ TP_printk("dev %d:%d agno %d inobt (%u:0x%x) new (%u:0x%x)",
+ MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno,
+ __entry->agino, __entry->holemask, __entry->nagino,
+ __entry->nholemask)
+)
+
+TRACE_EVENT(xfs_irec_merge_post,
+ TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agino_t agino,
+ uint16_t holemask),
+ TP_ARGS(mp, agno, agino, holemask),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_agnumber_t, agno)
+ __field(xfs_agino_t, agino)
+ __field(uint16_t, holemask)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->agno = agno;
+ __entry->agino = agino;
+ __entry->holemask = holemask;
+ ),
+ TP_printk("dev %d:%d agno %d inobt (%u:0x%x)", MAJOR(__entry->dev),
+ MINOR(__entry->dev), __entry->agno, __entry->agino,
+ __entry->holemask)
+)
+
#define DEFINE_IREF_EVENT(name) \
DEFINE_EVENT(xfs_iref_class, name, \
TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), \
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 220ef2c906b2..0582a27107d4 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -113,7 +113,7 @@ xfs_trans_free(
* blocks. Locks and log items, however, are no inherited. They must
* be added to the new transaction explicitly.
*/
-xfs_trans_t *
+STATIC xfs_trans_t *
xfs_trans_dup(
xfs_trans_t *tp)
{
@@ -251,14 +251,7 @@ xfs_trans_reserve(
*/
undo_log:
if (resp->tr_logres > 0) {
- int log_flags;
-
- if (resp->tr_logflags & XFS_TRANS_PERM_LOG_RES) {
- log_flags = XFS_LOG_REL_PERM_RESERV;
- } else {
- log_flags = 0;
- }
- xfs_log_done(tp->t_mountp, tp->t_ticket, NULL, log_flags);
+ xfs_log_done(tp->t_mountp, tp->t_ticket, NULL, false);
tp->t_ticket = NULL;
tp->t_log_res = 0;
tp->t_flags &= ~XFS_TRANS_PERM_LOG_RES;
@@ -744,7 +737,7 @@ void
xfs_trans_free_items(
struct xfs_trans *tp,
xfs_lsn_t commit_lsn,
- int flags)
+ bool abort)
{
struct xfs_log_item_desc *lidp, *next;
@@ -755,7 +748,7 @@ xfs_trans_free_items(
if (commit_lsn != NULLCOMMITLSN)
lip->li_ops->iop_committing(lip, commit_lsn);
- if (flags & XFS_TRANS_ABORT)
+ if (abort)
lip->li_flags |= XFS_LI_ABORTED;
lip->li_ops->iop_unlock(lip);
@@ -892,27 +885,17 @@ xfs_trans_committed_bulk(
* have already been unlocked as if the commit had succeeded.
* Do not reference the transaction structure after this call.
*/
-int
-xfs_trans_commit(
+static int
+__xfs_trans_commit(
struct xfs_trans *tp,
- uint flags)
+ bool regrant)
{
struct xfs_mount *mp = tp->t_mountp;
xfs_lsn_t commit_lsn = -1;
int error = 0;
- int log_flags = 0;
int sync = tp->t_flags & XFS_TRANS_SYNC;
/*
- * Determine whether this commit is releasing a permanent
- * log reservation or not.
- */
- if (flags & XFS_TRANS_RELEASE_LOG_RES) {
- ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
- log_flags = XFS_LOG_REL_PERM_RESERV;
- }
-
- /*
* If there is nothing to be logged by the transaction,
* then unlock all of the items associated with the
* transaction and free the transaction structure.
@@ -936,7 +919,7 @@ xfs_trans_commit(
xfs_trans_apply_sb_deltas(tp);
xfs_trans_apply_dquot_deltas(tp);
- xfs_log_commit_cil(mp, tp, &commit_lsn, flags);
+ xfs_log_commit_cil(mp, tp, &commit_lsn, regrant);
current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
xfs_trans_free(tp);
@@ -964,18 +947,25 @@ out_unreserve:
*/
xfs_trans_unreserve_and_mod_dquots(tp);
if (tp->t_ticket) {
- commit_lsn = xfs_log_done(mp, tp->t_ticket, NULL, log_flags);
+ commit_lsn = xfs_log_done(mp, tp->t_ticket, NULL, regrant);
if (commit_lsn == -1 && !error)
error = -EIO;
}
current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
- xfs_trans_free_items(tp, NULLCOMMITLSN, error ? XFS_TRANS_ABORT : 0);
+ xfs_trans_free_items(tp, NULLCOMMITLSN, !!error);
xfs_trans_free(tp);
XFS_STATS_INC(xs_trans_empty);
return error;
}
+int
+xfs_trans_commit(
+ struct xfs_trans *tp)
+{
+ return __xfs_trans_commit(tp, false);
+}
+
/*
* Unlock all of the transaction's items and free the transaction.
* The transaction must not have modified any of its items, because
@@ -986,29 +976,22 @@ out_unreserve:
*/
void
xfs_trans_cancel(
- xfs_trans_t *tp,
- int flags)
+ struct xfs_trans *tp)
{
- int log_flags;
- xfs_mount_t *mp = tp->t_mountp;
+ struct xfs_mount *mp = tp->t_mountp;
+ bool dirty = (tp->t_flags & XFS_TRANS_DIRTY);
/*
- * See if the caller is being too lazy to figure out if
- * the transaction really needs an abort.
- */
- if ((flags & XFS_TRANS_ABORT) && !(tp->t_flags & XFS_TRANS_DIRTY))
- flags &= ~XFS_TRANS_ABORT;
- /*
* See if the caller is relying on us to shut down the
* filesystem. This happens in paths where we detect
* corruption and decide to give up.
*/
- if ((tp->t_flags & XFS_TRANS_DIRTY) && !XFS_FORCED_SHUTDOWN(mp)) {
+ if (dirty && !XFS_FORCED_SHUTDOWN(mp)) {
XFS_ERROR_REPORT("xfs_trans_cancel", XFS_ERRLEVEL_LOW, mp);
xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
}
#ifdef DEBUG
- if (!(flags & XFS_TRANS_ABORT) && !XFS_FORCED_SHUTDOWN(mp)) {
+ if (!dirty && !XFS_FORCED_SHUTDOWN(mp)) {
struct xfs_log_item_desc *lidp;
list_for_each_entry(lidp, &tp->t_items, lid_trans)
@@ -1018,27 +1001,20 @@ xfs_trans_cancel(
xfs_trans_unreserve_and_mod_sb(tp);
xfs_trans_unreserve_and_mod_dquots(tp);
- if (tp->t_ticket) {
- if (flags & XFS_TRANS_RELEASE_LOG_RES) {
- ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
- log_flags = XFS_LOG_REL_PERM_RESERV;
- } else {
- log_flags = 0;
- }
- xfs_log_done(mp, tp->t_ticket, NULL, log_flags);
- }
+ if (tp->t_ticket)
+ xfs_log_done(mp, tp->t_ticket, NULL, false);
/* mark this thread as no longer being in a transaction */
current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
- xfs_trans_free_items(tp, NULLCOMMITLSN, flags);
+ xfs_trans_free_items(tp, NULLCOMMITLSN, dirty);
xfs_trans_free(tp);
}
/*
* Roll from one trans in the sequence of PERMANENT transactions to
* the next: permanent transactions are only flushed out when
- * committed with XFS_TRANS_RELEASE_LOG_RES, but we still want as soon
+ * committed with xfs_trans_commit(), but we still want as soon
* as possible to let chunks of it go to the log. So we commit the
* chunk we've been working on and get a new transaction to continue.
*/
@@ -1055,7 +1031,8 @@ xfs_trans_roll(
* Ensure that the inode is always logged.
*/
trans = *tpp;
- xfs_trans_log_inode(trans, dp, XFS_ILOG_CORE);
+ if (dp)
+ xfs_trans_log_inode(trans, dp, XFS_ILOG_CORE);
/*
* Copy the critical parameters from one trans to the next.
@@ -1071,20 +1048,13 @@ xfs_trans_roll(
* is in progress. The caller takes the responsibility to cancel
* the duplicate transaction that gets returned.
*/
- error = xfs_trans_commit(trans, 0);
+ error = __xfs_trans_commit(trans, true);
if (error)
return error;
trans = *tpp;
/*
- * transaction commit worked ok so we can drop the extra ticket
- * reference that we gained in xfs_trans_dup()
- */
- xfs_log_ticket_put(trans->t_ticket);
-
-
- /*
* Reserve space in the log for th next transaction.
* This also pushes items in the "AIL", the list of logged items,
* out to disk if they are taking up space at the tail of the log
@@ -1100,6 +1070,7 @@ xfs_trans_roll(
if (error)
return error;
- xfs_trans_ijoin(trans, dp, 0);
+ if (dp)
+ xfs_trans_ijoin(trans, dp, 0);
return 0;
}
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index b5bc1ab3c4da..3b21b4e5e467 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -133,8 +133,6 @@ typedef struct xfs_trans {
* XFS transaction mechanism exported interfaces that are
* actually macros.
*/
-#define xfs_trans_get_log_res(tp) ((tp)->t_log_res)
-#define xfs_trans_get_log_count(tp) ((tp)->t_log_count)
#define xfs_trans_get_block_res(tp) ((tp)->t_blk_res)
#define xfs_trans_set_sync(tp) ((tp)->t_flags |= XFS_TRANS_SYNC)
@@ -153,7 +151,6 @@ typedef struct xfs_trans {
*/
xfs_trans_t *xfs_trans_alloc(struct xfs_mount *, uint);
xfs_trans_t *_xfs_trans_alloc(struct xfs_mount *, uint, xfs_km_flags_t);
-xfs_trans_t *xfs_trans_dup(xfs_trans_t *);
int xfs_trans_reserve(struct xfs_trans *, struct xfs_trans_res *,
uint, uint);
void xfs_trans_mod_sb(xfs_trans_t *, uint, int64_t);
@@ -228,9 +225,9 @@ void xfs_trans_log_efd_extent(xfs_trans_t *,
struct xfs_efd_log_item *,
xfs_fsblock_t,
xfs_extlen_t);
-int xfs_trans_commit(xfs_trans_t *, uint flags);
+int xfs_trans_commit(struct xfs_trans *);
int xfs_trans_roll(struct xfs_trans **, struct xfs_inode *);
-void xfs_trans_cancel(xfs_trans_t *, int);
+void xfs_trans_cancel(xfs_trans_t *);
int xfs_trans_ail_init(struct xfs_mount *);
void xfs_trans_ail_destroy(struct xfs_mount *);
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 573aefb5a573..1098cf490189 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -159,7 +159,7 @@ xfs_trans_ail_cursor_next(
{
struct xfs_log_item *lip = cur->item;
- if ((__psint_t)lip & 1)
+ if ((uintptr_t)lip & 1)
lip = xfs_ail_min(ailp);
if (lip)
cur->item = xfs_ail_next(ailp, lip);
@@ -196,7 +196,7 @@ xfs_trans_ail_cursor_clear(
list_for_each_entry(cur, &ailp->xa_cursors, list) {
if (cur->item == lip)
cur->item = (struct xfs_log_item *)
- ((__psint_t)cur->item | 1);
+ ((uintptr_t)cur->item | 1);
}
}
@@ -287,7 +287,7 @@ xfs_ail_splice(
* find the place in the AIL where the items belong.
*/
lip = cur ? cur->item : NULL;
- if (!lip || (__psint_t) lip & 1)
+ if (!lip || (uintptr_t)lip & 1)
lip = __xfs_trans_ail_cursor_last(ailp, lsn);
/*
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
index 76a16df55ef7..ce78534a047e 100644
--- a/fs/xfs/xfs_trans_dquot.c
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -90,8 +90,9 @@ xfs_trans_dup_dqinfo(
xfs_trans_t *ntp)
{
xfs_dqtrx_t *oq, *nq;
- int i,j;
+ int i, j;
xfs_dqtrx_t *oqa, *nqa;
+ ulong blk_res_used;
if (!otp->t_dqinfo)
return;
@@ -102,18 +103,23 @@ xfs_trans_dup_dqinfo(
* Because the quota blk reservation is carried forward,
* it is also necessary to carry forward the DQ_DIRTY flag.
*/
- if(otp->t_flags & XFS_TRANS_DQ_DIRTY)
+ if (otp->t_flags & XFS_TRANS_DQ_DIRTY)
ntp->t_flags |= XFS_TRANS_DQ_DIRTY;
for (j = 0; j < XFS_QM_TRANS_DQTYPES; j++) {
oqa = otp->t_dqinfo->dqs[j];
nqa = ntp->t_dqinfo->dqs[j];
for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {
+ blk_res_used = 0;
+
if (oqa[i].qt_dquot == NULL)
break;
oq = &oqa[i];
nq = &nqa[i];
+ if (oq->qt_blk_res && oq->qt_bcount_delta > 0)
+ blk_res_used = oq->qt_bcount_delta;
+
nq->qt_dquot = oq->qt_dquot;
nq->qt_bcount_delta = nq->qt_icount_delta = 0;
nq->qt_rtbcount_delta = 0;
@@ -121,8 +127,8 @@ xfs_trans_dup_dqinfo(
/*
* Transfer whatever is left of the reservations.
*/
- nq->qt_blk_res = oq->qt_blk_res - oq->qt_blk_res_used;
- oq->qt_blk_res = oq->qt_blk_res_used;
+ nq->qt_blk_res = oq->qt_blk_res - blk_res_used;
+ oq->qt_blk_res = blk_res_used;
nq->qt_rtblk_res = oq->qt_rtblk_res -
oq->qt_rtblk_res_used;
@@ -239,10 +245,6 @@ xfs_trans_mod_dquot(
* disk blocks used.
*/
case XFS_TRANS_DQ_BCOUNT:
- if (qtrx->qt_blk_res && delta > 0) {
- qtrx->qt_blk_res_used += (ulong)delta;
- ASSERT(qtrx->qt_blk_res >= qtrx->qt_blk_res_used);
- }
qtrx->qt_bcount_delta += delta;
break;
@@ -423,15 +425,19 @@ xfs_trans_apply_dquot_deltas(
* reservation that a transaction structure knows of.
*/
if (qtrx->qt_blk_res != 0) {
- if (qtrx->qt_blk_res != qtrx->qt_blk_res_used) {
- if (qtrx->qt_blk_res >
- qtrx->qt_blk_res_used)
+ ulong blk_res_used = 0;
+
+ if (qtrx->qt_bcount_delta > 0)
+ blk_res_used = qtrx->qt_bcount_delta;
+
+ if (qtrx->qt_blk_res != blk_res_used) {
+ if (qtrx->qt_blk_res > blk_res_used)
dqp->q_res_bcount -= (xfs_qcnt_t)
(qtrx->qt_blk_res -
- qtrx->qt_blk_res_used);
+ blk_res_used);
else
dqp->q_res_bcount -= (xfs_qcnt_t)
- (qtrx->qt_blk_res_used -
+ (blk_res_used -
qtrx->qt_blk_res);
}
} else {
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
index bd1281862ad7..1b736294558a 100644
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -30,7 +30,7 @@ void xfs_trans_init(struct xfs_mount *);
void xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *);
void xfs_trans_del_item(struct xfs_log_item *);
void xfs_trans_free_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn,
- int flags);
+ bool abort);
void xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp);
void xfs_trans_committed_bulk(struct xfs_ail *ailp, struct xfs_log_vec *lv,
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index b43276f339ef..83061cac719b 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -420,7 +420,7 @@ static inline bool is_acpi_node(struct fwnode_handle *fwnode)
return fwnode && fwnode->type == FWNODE_ACPI;
}
-static inline struct acpi_device *acpi_node(struct fwnode_handle *fwnode)
+static inline struct acpi_device *to_acpi_node(struct fwnode_handle *fwnode)
{
return is_acpi_node(fwnode) ?
container_of(fwnode, struct acpi_device, fwnode) : NULL;
diff --git a/include/acpi/video.h b/include/acpi/video.h
index a7d7f1043e9c..e840b294c6f5 100644
--- a/include/acpi/video.h
+++ b/include/acpi/video.h
@@ -43,7 +43,7 @@ static inline enum acpi_backlight_type acpi_video_get_backlight_type(void)
{
return acpi_backlight_vendor;
}
-static void acpi_video_set_dmi_backlight_type(enum acpi_backlight_type type)
+static inline void acpi_video_set_dmi_backlight_type(enum acpi_backlight_type type)
{
}
#endif
diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h
index e6a83d712ef6..55e3abc2d027 100644
--- a/include/asm-generic/barrier.h
+++ b/include/asm-generic/barrier.h
@@ -55,17 +55,43 @@
#endif
#ifdef CONFIG_SMP
+
+#ifndef smp_mb
#define smp_mb() mb()
+#endif
+
+#ifndef smp_rmb
#define smp_rmb() rmb()
+#endif
+
+#ifndef smp_wmb
#define smp_wmb() wmb()
+#endif
+
+#ifndef smp_read_barrier_depends
#define smp_read_barrier_depends() read_barrier_depends()
-#else
+#endif
+
+#else /* !CONFIG_SMP */
+
+#ifndef smp_mb
#define smp_mb() barrier()
+#endif
+
+#ifndef smp_rmb
#define smp_rmb() barrier()
+#endif
+
+#ifndef smp_wmb
#define smp_wmb() barrier()
+#endif
+
+#ifndef smp_read_barrier_depends
#define smp_read_barrier_depends() do { } while (0)
#endif
+#endif /* CONFIG_SMP */
+
#ifndef smp_store_mb
#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0)
#endif
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 1618cdfb38c7..c471dfc93b71 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -53,7 +53,7 @@ static inline acpi_handle acpi_device_handle(struct acpi_device *adev)
return adev ? adev->handle : NULL;
}
-#define ACPI_COMPANION(dev) acpi_node((dev)->fwnode)
+#define ACPI_COMPANION(dev) to_acpi_node((dev)->fwnode)
#define ACPI_COMPANION_SET(dev, adev) set_primary_fwnode(dev, (adev) ? \
acpi_fwnode_handle(adev) : NULL)
#define ACPI_HANDLE(dev) acpi_device_handle(ACPI_COMPANION(dev))
@@ -454,7 +454,7 @@ static inline bool is_acpi_node(struct fwnode_handle *fwnode)
return false;
}
-static inline struct acpi_device *acpi_node(struct fwnode_handle *fwnode)
+static inline struct acpi_device *to_acpi_node(struct fwnode_handle *fwnode)
{
return NULL;
}
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 26fc8bc77f85..7f8ad9593da7 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -475,6 +475,21 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
(volatile typeof(x) *)&(x); })
#define ACCESS_ONCE(x) (*__ACCESS_ONCE(x))
+/**
+ * lockless_dereference() - safely load a pointer for later dereference
+ * @p: The pointer to load
+ *
+ * Similar to rcu_dereference(), but for situations where the pointed-to
+ * object's lifetime is managed by something other than RCU. That
+ * "something other" might be reference counting or simple immortality.
+ */
+#define lockless_dereference(p) \
+({ \
+ typeof(p) _________p1 = READ_ONCE(p); \
+ smp_read_barrier_depends(); /* Dependency order vs. p above. */ \
+ (_________p1); \
+})
+
/* Ignore/forbid kprobes attach on very low level functions marked by this attribute: */
#ifdef CONFIG_KPROBES
# define __kprobes __attribute__((__section__(".kprobes.text")))
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e351da4a934f..3f1a84635da8 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -70,6 +70,7 @@ typedef int (get_block_t)(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create);
typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
ssize_t bytes, void *private);
+typedef void (dax_iodone_t)(struct buffer_head *bh_map, int uptodate);
#define MAY_EXEC 0x00000001
#define MAY_WRITE 0x00000002
@@ -2655,9 +2656,13 @@ ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *, loff_t,
int dax_clear_blocks(struct inode *, sector_t block, long size);
int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t);
int dax_truncate_page(struct inode *, loff_t from, get_block_t);
-int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t);
+int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t,
+ dax_iodone_t);
+int __dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t,
+ dax_iodone_t);
int dax_pfn_mkwrite(struct vm_area_struct *, struct vm_fault *);
-#define dax_mkwrite(vma, vmf, gb) dax_fault(vma, vmf, gb)
+#define dax_mkwrite(vma, vmf, gb, iod) dax_fault(vma, vmf, gb, iod)
+#define __dax_mkwrite(vma, vmf, gb, iod) __dax_fault(vma, vmf, gb, iod)
#ifdef CONFIG_BLOCK
typedef void (dio_submit_t)(int rw, struct bio *bio, struct inode *inode,
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 812149160d3b..92188b0225bb 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -407,7 +407,6 @@ enum {
IRQCHIP_EOI_THREADED = (1 << 6),
};
-/* This include will go away once we isolated irq_desc usage to core code */
#include <linux/irqdesc.h>
/*
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index c52d1480f272..624a668e61f1 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -3,9 +3,6 @@
/*
* Core internal functions to deal with irq descriptors
- *
- * This include will move to kernel/irq once we cleaned up the tree.
- * For now it's included from <linux/irq.h>
*/
struct irq_affinity_notify;
@@ -103,6 +100,11 @@ static inline struct irq_desc *irq_data_to_desc(struct irq_data *data)
#endif
}
+static inline unsigned int irq_desc_get_irq(struct irq_desc *desc)
+{
+ return desc->irq_data.irq;
+}
+
static inline struct irq_data *irq_desc_get_irq_data(struct irq_desc *desc)
{
return &desc->irq_data;
@@ -188,6 +190,47 @@ __irq_set_chip_handler_name_locked(unsigned int irq, struct irq_chip *chip,
desc->name = name;
}
+/**
+ * irq_set_handler_locked - Set irq handler from a locked region
+ * @data: Pointer to the irq_data structure which identifies the irq
+ * @handler: Flow control handler function for this interrupt
+ *
+ * Sets the handler in the irq descriptor associated to @data.
+ *
+ * Must be called with irq_desc locked and valid parameters. Typical
+ * call site is the irq_set_type() callback.
+ */
+static inline void irq_set_handler_locked(struct irq_data *data,
+ irq_flow_handler_t handler)
+{
+ struct irq_desc *desc = irq_data_to_desc(data);
+
+ desc->handle_irq = handler;
+}
+
+/**
+ * irq_set_chip_handler_name_locked - Set chip, handler and name from a locked region
+ * @data: Pointer to the irq_data structure for which the chip is set
+ * @chip: Pointer to the new irq chip
+ * @handler: Flow control handler function for this interrupt
+ * @name: Name of the interrupt
+ *
+ * Replace the irq chip at the proper hierarchy level in @data and
+ * sets the handler and name in the associated irq descriptor.
+ *
+ * Must be called with irq_desc locked and valid parameters.
+ */
+static inline void
+irq_set_chip_handler_name_locked(struct irq_data *data, struct irq_chip *chip,
+ irq_flow_handler_t handler, const char *name)
+{
+ struct irq_desc *desc = irq_data_to_desc(data);
+
+ desc->handle_irq = handler;
+ desc->name = name;
+ data->chip = chip;
+}
+
static inline int irq_balancing_disabled(unsigned int irq)
{
struct irq_desc *desc;
diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h
index fdd5cc16c9c4..9669bf9d4f48 100644
--- a/include/linux/irqnr.h
+++ b/include/linux/irqnr.h
@@ -23,12 +23,6 @@ unsigned int irq_get_next_irq(unsigned int offset);
; \
else
-#ifdef CONFIG_SMP
-#define irq_node(irq) (irq_get_irq_data(irq)->node)
-#else
-#define irq_node(irq) 0
-#endif
-
# define for_each_active_irq(irq) \
for (irq = irq_get_next_irq(0); irq < nr_irqs; \
irq = irq_get_next_irq(irq + 1))
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 0dfa4e31563d..5f0be58640ea 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -816,13 +816,15 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
#endif
/* Permissions on a sysfs file: you didn't miss the 0 prefix did you? */
-#define VERIFY_OCTAL_PERMISSIONS(perms) \
- (BUILD_BUG_ON_ZERO((perms) < 0) + \
- BUILD_BUG_ON_ZERO((perms) > 0777) + \
- /* User perms >= group perms >= other perms */ \
- BUILD_BUG_ON_ZERO(((perms) >> 6) < (((perms) >> 3) & 7)) + \
- BUILD_BUG_ON_ZERO((((perms) >> 3) & 7) < ((perms) & 7)) + \
- /* Other writable? Generally considered a bad idea. */ \
- BUILD_BUG_ON_ZERO((perms) & 2) + \
+#define VERIFY_OCTAL_PERMISSIONS(perms) \
+ (BUILD_BUG_ON_ZERO((perms) < 0) + \
+ BUILD_BUG_ON_ZERO((perms) > 0777) + \
+ /* USER_READABLE >= GROUP_READABLE >= OTHER_READABLE */ \
+ BUILD_BUG_ON_ZERO((((perms) >> 6) & 4) < (((perms) >> 3) & 4)) + \
+ BUILD_BUG_ON_ZERO((((perms) >> 3) & 4) < ((perms) & 4)) + \
+ /* USER_WRITABLE >= GROUP_WRITABLE */ \
+ BUILD_BUG_ON_ZERO((((perms) >> 6) & 2) < (((perms) >> 3) & 2)) + \
+ /* OTHER_WRITABLE? Generally considered a bad idea. */ \
+ BUILD_BUG_ON_ZERO((perms) & 2) + \
(perms))
#endif
diff --git a/include/linux/module.h b/include/linux/module.h
index 7ffe0851d244..d67b1932cc59 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -17,6 +17,7 @@
#include <linux/moduleparam.h>
#include <linux/jump_label.h>
#include <linux/export.h>
+#include <linux/rbtree_latch.h>
#include <linux/percpu.h>
#include <asm/module.h>
@@ -210,6 +211,13 @@ enum module_state {
MODULE_STATE_UNFORMED, /* Still setting it up. */
};
+struct module;
+
+struct mod_tree_node {
+ struct module *mod;
+ struct latch_tree_node node;
+};
+
struct module {
enum module_state state;
@@ -232,6 +240,9 @@ struct module {
unsigned int num_syms;
/* Kernel parameters. */
+#ifdef CONFIG_SYSFS
+ struct mutex param_lock;
+#endif
struct kernel_param *kp;
unsigned int num_kp;
@@ -271,8 +282,15 @@ struct module {
/* Startup function. */
int (*init)(void);
- /* If this is non-NULL, vfree after init() returns */
- void *module_init;
+ /*
+ * If this is non-NULL, vfree() after init() returns.
+ *
+ * Cacheline align here, such that:
+ * module_init, module_core, init_size, core_size,
+ * init_text_size, core_text_size and mtn_core::{mod,node[0]}
+ * are on the same cacheline.
+ */
+ void *module_init ____cacheline_aligned;
/* Here is the actual code + data, vfree'd on unload. */
void *module_core;
@@ -283,6 +301,16 @@ struct module {
/* The size of the executable code in each section. */
unsigned int init_text_size, core_text_size;
+#ifdef CONFIG_MODULES_TREE_LOOKUP
+ /*
+ * We want mtn_core::{mod,node[0]} to be in the same cacheline as the
+ * above entries such that a regular lookup will only touch one
+ * cacheline.
+ */
+ struct mod_tree_node mtn_core;
+ struct mod_tree_node mtn_init;
+#endif
+
/* Size of RO sections of the module (text+rodata) */
unsigned int init_ro_size, core_ro_size;
@@ -369,7 +397,7 @@ struct module {
ctor_fn_t *ctors;
unsigned int num_ctors;
#endif
-};
+} ____cacheline_aligned;
#ifndef MODULE_ARCH_INIT
#define MODULE_ARCH_INIT {}
#endif
@@ -423,14 +451,22 @@ struct symsearch {
bool unused;
};
-/* Search for an exported symbol by name. */
+/*
+ * Search for an exported symbol by name.
+ *
+ * Must be called with module_mutex held or preemption disabled.
+ */
const struct kernel_symbol *find_symbol(const char *name,
struct module **owner,
const unsigned long **crc,
bool gplok,
bool warn);
-/* Walk the exported symbol table */
+/*
+ * Walk the exported symbol table
+ *
+ * Must be called with module_mutex held or preemption disabled.
+ */
bool each_symbol_section(bool (*fn)(const struct symsearch *arr,
struct module *owner,
void *data), void *data);
diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index 6480dcaca275..c12f2147c350 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -67,8 +67,9 @@ enum {
struct kernel_param {
const char *name;
+ struct module *mod;
const struct kernel_param_ops *ops;
- u16 perm;
+ const u16 perm;
s8 level;
u8 flags;
union {
@@ -108,7 +109,7 @@ struct kparam_array
*
* @perm is 0 if the the variable is not to appear in sysfs, or 0444
* for world-readable, 0644 for root-writable, etc. Note that if it
- * is writable, you may need to use kparam_block_sysfs_write() around
+ * is writable, you may need to use kernel_param_lock() around
* accesses (esp. charp, which can be kfreed when it changes).
*
* The @type is simply pasted to refer to a param_ops_##type and a
@@ -216,16 +217,16 @@ struct kparam_array
parameters. */
#define __module_param_call(prefix, name, ops, arg, perm, level, flags) \
/* Default value instead of permissions? */ \
- static const char __param_str_##name[] = prefix #name; \
+ static const char __param_str_##name[] = prefix #name; \
static struct kernel_param __moduleparam_const __param_##name \
__used \
__attribute__ ((unused,__section__ ("__param"),aligned(sizeof(void *)))) \
- = { __param_str_##name, ops, VERIFY_OCTAL_PERMISSIONS(perm), \
- level, flags, { arg } }
+ = { __param_str_##name, THIS_MODULE, ops, \
+ VERIFY_OCTAL_PERMISSIONS(perm), level, flags, { arg } }
/* Obsolete - use module_param_cb() */
#define module_param_call(name, set, get, arg, perm) \
- static struct kernel_param_ops __param_ops_##name = \
+ static const struct kernel_param_ops __param_ops_##name = \
{ .flags = 0, (void *)set, (void *)get }; \
__module_param_call(MODULE_PARAM_PREFIX, \
name, &__param_ops_##name, arg, \
@@ -238,58 +239,14 @@ __check_old_set_param(int (*oldset)(const char *, struct kernel_param *))
return 0;
}
-/**
- * kparam_block_sysfs_write - make sure a parameter isn't written via sysfs.
- * @name: the name of the parameter
- *
- * There's no point blocking write on a paramter that isn't writable via sysfs!
- */
-#define kparam_block_sysfs_write(name) \
- do { \
- BUG_ON(!(__param_##name.perm & 0222)); \
- __kernel_param_lock(); \
- } while (0)
-
-/**
- * kparam_unblock_sysfs_write - allows sysfs to write to a parameter again.
- * @name: the name of the parameter
- */
-#define kparam_unblock_sysfs_write(name) \
- do { \
- BUG_ON(!(__param_##name.perm & 0222)); \
- __kernel_param_unlock(); \
- } while (0)
-
-/**
- * kparam_block_sysfs_read - make sure a parameter isn't read via sysfs.
- * @name: the name of the parameter
- *
- * This also blocks sysfs writes.
- */
-#define kparam_block_sysfs_read(name) \
- do { \
- BUG_ON(!(__param_##name.perm & 0444)); \
- __kernel_param_lock(); \
- } while (0)
-
-/**
- * kparam_unblock_sysfs_read - allows sysfs to read a parameter again.
- * @name: the name of the parameter
- */
-#define kparam_unblock_sysfs_read(name) \
- do { \
- BUG_ON(!(__param_##name.perm & 0444)); \
- __kernel_param_unlock(); \
- } while (0)
-
#ifdef CONFIG_SYSFS
-extern void __kernel_param_lock(void);
-extern void __kernel_param_unlock(void);
+extern void kernel_param_lock(struct module *mod);
+extern void kernel_param_unlock(struct module *mod);
#else
-static inline void __kernel_param_lock(void)
+static inline void kernel_param_lock(struct module *mod)
{
}
-static inline void __kernel_param_unlock(void)
+static inline void kernel_param_unlock(struct module *mod)
{
}
#endif
@@ -386,64 +343,70 @@ static inline void destroy_params(const struct kernel_param *params,
#define __param_check(name, p, type) \
static inline type __always_unused *__check_##name(void) { return(p); }
-extern struct kernel_param_ops param_ops_byte;
+extern const struct kernel_param_ops param_ops_byte;
extern int param_set_byte(const char *val, const struct kernel_param *kp);
extern int param_get_byte(char *buffer, const struct kernel_param *kp);
#define param_check_byte(name, p) __param_check(name, p, unsigned char)
-extern struct kernel_param_ops param_ops_short;
+extern const struct kernel_param_ops param_ops_short;
extern int param_set_short(const char *val, const struct kernel_param *kp);
extern int param_get_short(char *buffer, const struct kernel_param *kp);
#define param_check_short(name, p) __param_check(name, p, short)
-extern struct kernel_param_ops param_ops_ushort;
+extern const struct kernel_param_ops param_ops_ushort;
extern int param_set_ushort(const char *val, const struct kernel_param *kp);
extern int param_get_ushort(char *buffer, const struct kernel_param *kp);
#define param_check_ushort(name, p) __param_check(name, p, unsigned short)
-extern struct kernel_param_ops param_ops_int;
+extern const struct kernel_param_ops param_ops_int;
extern int param_set_int(const char *val, const struct kernel_param *kp);
extern int param_get_int(char *buffer, const struct kernel_param *kp);
#define param_check_int(name, p) __param_check(name, p, int)
-extern struct kernel_param_ops param_ops_uint;
+extern const struct kernel_param_ops param_ops_uint;
extern int param_set_uint(const char *val, const struct kernel_param *kp);
extern int param_get_uint(char *buffer, const struct kernel_param *kp);
#define param_check_uint(name, p) __param_check(name, p, unsigned int)
-extern struct kernel_param_ops param_ops_long;
+extern const struct kernel_param_ops param_ops_long;
extern int param_set_long(const char *val, const struct kernel_param *kp);
extern int param_get_long(char *buffer, const struct kernel_param *kp);
#define param_check_long(name, p) __param_check(name, p, long)
-extern struct kernel_param_ops param_ops_ulong;
+extern const struct kernel_param_ops param_ops_ulong;
extern int param_set_ulong(const char *val, const struct kernel_param *kp);
extern int param_get_ulong(char *buffer, const struct kernel_param *kp);
#define param_check_ulong(name, p) __param_check(name, p, unsigned long)
-extern struct kernel_param_ops param_ops_ullong;
+extern const struct kernel_param_ops param_ops_ullong;
extern int param_set_ullong(const char *val, const struct kernel_param *kp);
extern int param_get_ullong(char *buffer, const struct kernel_param *kp);
#define param_check_ullong(name, p) __param_check(name, p, unsigned long long)
-extern struct kernel_param_ops param_ops_charp;
+extern const struct kernel_param_ops param_ops_charp;
extern int param_set_charp(const char *val, const struct kernel_param *kp);
extern int param_get_charp(char *buffer, const struct kernel_param *kp);
#define param_check_charp(name, p) __param_check(name, p, char *)
/* We used to allow int as well as bool. We're taking that away! */
-extern struct kernel_param_ops param_ops_bool;
+extern const struct kernel_param_ops param_ops_bool;
extern int param_set_bool(const char *val, const struct kernel_param *kp);
extern int param_get_bool(char *buffer, const struct kernel_param *kp);
#define param_check_bool(name, p) __param_check(name, p, bool)
-extern struct kernel_param_ops param_ops_invbool;
+extern const struct kernel_param_ops param_ops_bool_enable_only;
+extern int param_set_bool_enable_only(const char *val,
+ const struct kernel_param *kp);
+/* getter is the same as for the regular bool */
+#define param_check_bool_enable_only param_check_bool
+
+extern const struct kernel_param_ops param_ops_invbool;
extern int param_set_invbool(const char *val, const struct kernel_param *kp);
extern int param_get_invbool(char *buffer, const struct kernel_param *kp);
#define param_check_invbool(name, p) __param_check(name, p, bool)
/* An int, which can only be set like a bool (though it shows as an int). */
-extern struct kernel_param_ops param_ops_bint;
+extern const struct kernel_param_ops param_ops_bint;
extern int param_set_bint(const char *val, const struct kernel_param *kp);
#define param_get_bint param_get_int
#define param_check_bint param_check_int
@@ -487,9 +450,9 @@ extern int param_set_bint(const char *val, const struct kernel_param *kp);
perm, -1, 0); \
__MODULE_PARM_TYPE(name, "array of " #type)
-extern struct kernel_param_ops param_array_ops;
+extern const struct kernel_param_ops param_array_ops;
-extern struct kernel_param_ops param_ops_string;
+extern const struct kernel_param_ops param_ops_string;
extern int param_set_copystring(const char *val, const struct kernel_param *);
extern int param_get_string(char *buffer, const struct kernel_param *kp);
diff --git a/include/linux/of.h b/include/linux/of.h
index b871ff9d81d7..f05fdcea4e66 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -128,7 +128,7 @@ static inline bool is_of_node(struct fwnode_handle *fwnode)
return fwnode && fwnode->type == FWNODE_OF;
}
-static inline struct device_node *of_node(struct fwnode_handle *fwnode)
+static inline struct device_node *to_of_node(struct fwnode_handle *fwnode)
{
return fwnode ? container_of(fwnode, struct device_node, fwnode) : NULL;
}
@@ -387,7 +387,7 @@ static inline bool is_of_node(struct fwnode_handle *fwnode)
return false;
}
-static inline struct device_node *of_node(struct fwnode_handle *fwnode)
+static inline struct device_node *to_of_node(struct fwnode_handle *fwnode)
{
return NULL;
}
diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h
index fb31765e935a..830c4992088d 100644
--- a/include/linux/rbtree.h
+++ b/include/linux/rbtree.h
@@ -31,6 +31,7 @@
#include <linux/kernel.h>
#include <linux/stddef.h>
+#include <linux/rcupdate.h>
struct rb_node {
unsigned long __rb_parent_color;
@@ -73,11 +74,11 @@ extern struct rb_node *rb_first_postorder(const struct rb_root *);
extern struct rb_node *rb_next_postorder(const struct rb_node *);
/* Fast replacement of a single node without remove/rebalance/add/rebalance */
-extern void rb_replace_node(struct rb_node *victim, struct rb_node *new,
+extern void rb_replace_node(struct rb_node *victim, struct rb_node *new,
struct rb_root *root);
-static inline void rb_link_node(struct rb_node * node, struct rb_node * parent,
- struct rb_node ** rb_link)
+static inline void rb_link_node(struct rb_node *node, struct rb_node *parent,
+ struct rb_node **rb_link)
{
node->__rb_parent_color = (unsigned long)parent;
node->rb_left = node->rb_right = NULL;
@@ -85,6 +86,15 @@ static inline void rb_link_node(struct rb_node * node, struct rb_node * parent,
*rb_link = node;
}
+static inline void rb_link_node_rcu(struct rb_node *node, struct rb_node *parent,
+ struct rb_node **rb_link)
+{
+ node->__rb_parent_color = (unsigned long)parent;
+ node->rb_left = node->rb_right = NULL;
+
+ rcu_assign_pointer(*rb_link, node);
+}
+
#define rb_entry_safe(ptr, type, member) \
({ typeof(ptr) ____ptr = (ptr); \
____ptr ? rb_entry(____ptr, type, member) : NULL; \
diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h
index 378c5ee75f78..14d7b831b63a 100644
--- a/include/linux/rbtree_augmented.h
+++ b/include/linux/rbtree_augmented.h
@@ -123,11 +123,11 @@ __rb_change_child(struct rb_node *old, struct rb_node *new,
{
if (parent) {
if (parent->rb_left == old)
- parent->rb_left = new;
+ WRITE_ONCE(parent->rb_left, new);
else
- parent->rb_right = new;
+ WRITE_ONCE(parent->rb_right, new);
} else
- root->rb_node = new;
+ WRITE_ONCE(root->rb_node, new);
}
extern void __rb_erase_color(struct rb_node *parent, struct rb_root *root,
@@ -137,7 +137,8 @@ static __always_inline struct rb_node *
__rb_erase_augmented(struct rb_node *node, struct rb_root *root,
const struct rb_augment_callbacks *augment)
{
- struct rb_node *child = node->rb_right, *tmp = node->rb_left;
+ struct rb_node *child = node->rb_right;
+ struct rb_node *tmp = node->rb_left;
struct rb_node *parent, *rebalance;
unsigned long pc;
@@ -167,6 +168,7 @@ __rb_erase_augmented(struct rb_node *node, struct rb_root *root,
tmp = parent;
} else {
struct rb_node *successor = child, *child2;
+
tmp = child->rb_left;
if (!tmp) {
/*
@@ -180,6 +182,7 @@ __rb_erase_augmented(struct rb_node *node, struct rb_root *root,
*/
parent = successor;
child2 = successor->rb_right;
+
augment->copy(node, successor);
} else {
/*
@@ -201,19 +204,23 @@ __rb_erase_augmented(struct rb_node *node, struct rb_root *root,
successor = tmp;
tmp = tmp->rb_left;
} while (tmp);
- parent->rb_left = child2 = successor->rb_right;
- successor->rb_right = child;
+ child2 = successor->rb_right;
+ WRITE_ONCE(parent->rb_left, child2);
+ WRITE_ONCE(successor->rb_right, child);
rb_set_parent(child, successor);
+
augment->copy(node, successor);
augment->propagate(parent, successor);
}
- successor->rb_left = tmp = node->rb_left;
+ tmp = node->rb_left;
+ WRITE_ONCE(successor->rb_left, tmp);
rb_set_parent(tmp, successor);
pc = node->__rb_parent_color;
tmp = __rb_parent(pc);
__rb_change_child(node, successor, tmp, root);
+
if (child2) {
successor->__rb_parent_color = pc;
rb_set_parent_color(child2, parent, RB_BLACK);
diff --git a/include/linux/rbtree_latch.h b/include/linux/rbtree_latch.h
new file mode 100644
index 000000000000..4f3432c61d12
--- /dev/null
+++ b/include/linux/rbtree_latch.h
@@ -0,0 +1,212 @@
+/*
+ * Latched RB-trees
+ *
+ * Copyright (C) 2015 Intel Corp., Peter Zijlstra <peterz@infradead.org>
+ *
+ * Since RB-trees have non-atomic modifications they're not immediately suited
+ * for RCU/lockless queries. Even though we made RB-tree lookups non-fatal for
+ * lockless lookups; we cannot guarantee they return a correct result.
+ *
+ * The simplest solution is a seqlock + RB-tree, this will allow lockless
+ * lookups; but has the constraint (inherent to the seqlock) that read sides
+ * cannot nest in write sides.
+ *
+ * If we need to allow unconditional lookups (say as required for NMI context
+ * usage) we need a more complex setup; this data structure provides this by
+ * employing the latch technique -- see @raw_write_seqcount_latch -- to
+ * implement a latched RB-tree which does allow for unconditional lookups by
+ * virtue of always having (at least) one stable copy of the tree.
+ *
+ * However, while we have the guarantee that there is at all times one stable
+ * copy, this does not guarantee an iteration will not observe modifications.
+ * What might have been a stable copy at the start of the iteration, need not
+ * remain so for the duration of the iteration.
+ *
+ * Therefore, this does require a lockless RB-tree iteration to be non-fatal;
+ * see the comment in lib/rbtree.c. Note however that we only require the first
+ * condition -- not seeing partial stores -- because the latch thing isolates
+ * us from loops. If we were to interrupt a modification the lookup would be
+ * pointed at the stable tree and complete while the modification was halted.
+ */
+
+#ifndef RB_TREE_LATCH_H
+#define RB_TREE_LATCH_H
+
+#include <linux/rbtree.h>
+#include <linux/seqlock.h>
+
+struct latch_tree_node {
+ struct rb_node node[2];
+};
+
+struct latch_tree_root {
+ seqcount_t seq;
+ struct rb_root tree[2];
+};
+
+/**
+ * latch_tree_ops - operators to define the tree order
+ * @less: used for insertion; provides the (partial) order between two elements.
+ * @comp: used for lookups; provides the order between the search key and an element.
+ *
+ * The operators are related like:
+ *
+ * comp(a->key,b) < 0 := less(a,b)
+ * comp(a->key,b) > 0 := less(b,a)
+ * comp(a->key,b) == 0 := !less(a,b) && !less(b,a)
+ *
+ * If these operators define a partial order on the elements we make no
+ * guarantee on which of the elements matching the key is found. See
+ * latch_tree_find().
+ */
+struct latch_tree_ops {
+ bool (*less)(struct latch_tree_node *a, struct latch_tree_node *b);
+ int (*comp)(void *key, struct latch_tree_node *b);
+};
+
+static __always_inline struct latch_tree_node *
+__lt_from_rb(struct rb_node *node, int idx)
+{
+ return container_of(node, struct latch_tree_node, node[idx]);
+}
+
+static __always_inline void
+__lt_insert(struct latch_tree_node *ltn, struct latch_tree_root *ltr, int idx,
+ bool (*less)(struct latch_tree_node *a, struct latch_tree_node *b))
+{
+ struct rb_root *root = &ltr->tree[idx];
+ struct rb_node **link = &root->rb_node;
+ struct rb_node *node = &ltn->node[idx];
+ struct rb_node *parent = NULL;
+ struct latch_tree_node *ltp;
+
+ while (*link) {
+ parent = *link;
+ ltp = __lt_from_rb(parent, idx);
+
+ if (less(ltn, ltp))
+ link = &parent->rb_left;
+ else
+ link = &parent->rb_right;
+ }
+
+ rb_link_node_rcu(node, parent, link);
+ rb_insert_color(node, root);
+}
+
+static __always_inline void
+__lt_erase(struct latch_tree_node *ltn, struct latch_tree_root *ltr, int idx)
+{
+ rb_erase(&ltn->node[idx], &ltr->tree[idx]);
+}
+
+static __always_inline struct latch_tree_node *
+__lt_find(void *key, struct latch_tree_root *ltr, int idx,
+ int (*comp)(void *key, struct latch_tree_node *node))
+{
+ struct rb_node *node = rcu_dereference_raw(ltr->tree[idx].rb_node);
+ struct latch_tree_node *ltn;
+ int c;
+
+ while (node) {
+ ltn = __lt_from_rb(node, idx);
+ c = comp(key, ltn);
+
+ if (c < 0)
+ node = rcu_dereference_raw(node->rb_left);
+ else if (c > 0)
+ node = rcu_dereference_raw(node->rb_right);
+ else
+ return ltn;
+ }
+
+ return NULL;
+}
+
+/**
+ * latch_tree_insert() - insert @node into the trees @root
+ * @node: nodes to insert
+ * @root: trees to insert @node into
+ * @ops: operators defining the node order
+ *
+ * It inserts @node into @root in an ordered fashion such that we can always
+ * observe one complete tree. See the comment for raw_write_seqcount_latch().
+ *
+ * The inserts use rcu_assign_pointer() to publish the element such that the
+ * tree structure is stored before we can observe the new @node.
+ *
+ * All modifications (latch_tree_insert, latch_tree_remove) are assumed to be
+ * serialized.
+ */
+static __always_inline void
+latch_tree_insert(struct latch_tree_node *node,
+ struct latch_tree_root *root,
+ const struct latch_tree_ops *ops)
+{
+ raw_write_seqcount_latch(&root->seq);
+ __lt_insert(node, root, 0, ops->less);
+ raw_write_seqcount_latch(&root->seq);
+ __lt_insert(node, root, 1, ops->less);
+}
+
+/**
+ * latch_tree_erase() - removes @node from the trees @root
+ * @node: nodes to remote
+ * @root: trees to remove @node from
+ * @ops: operators defining the node order
+ *
+ * Removes @node from the trees @root in an ordered fashion such that we can
+ * always observe one complete tree. See the comment for
+ * raw_write_seqcount_latch().
+ *
+ * It is assumed that @node will observe one RCU quiescent state before being
+ * reused of freed.
+ *
+ * All modifications (latch_tree_insert, latch_tree_remove) are assumed to be
+ * serialized.
+ */
+static __always_inline void
+latch_tree_erase(struct latch_tree_node *node,
+ struct latch_tree_root *root,
+ const struct latch_tree_ops *ops)
+{
+ raw_write_seqcount_latch(&root->seq);
+ __lt_erase(node, root, 0);
+ raw_write_seqcount_latch(&root->seq);
+ __lt_erase(node, root, 1);
+}
+
+/**
+ * latch_tree_find() - find the node matching @key in the trees @root
+ * @key: search key
+ * @root: trees to search for @key
+ * @ops: operators defining the node order
+ *
+ * Does a lockless lookup in the trees @root for the node matching @key.
+ *
+ * It is assumed that this is called while holding the appropriate RCU read
+ * side lock.
+ *
+ * If the operators define a partial order on the elements (there are multiple
+ * elements which have the same key value) it is undefined which of these
+ * elements will be found. Nor is it possible to iterate the tree to find
+ * further elements with the same key value.
+ *
+ * Returns: a pointer to the node matching @key or NULL.
+ */
+static __always_inline struct latch_tree_node *
+latch_tree_find(void *key, struct latch_tree_root *root,
+ const struct latch_tree_ops *ops)
+{
+ struct latch_tree_node *node;
+ unsigned int seq;
+
+ do {
+ seq = raw_read_seqcount_latch(&root->seq);
+ node = __lt_find(key, root, seq & 1, ops->comp);
+ } while (read_seqcount_retry(&root->seq, seq));
+
+ return node;
+}
+
+#endif /* RB_TREE_LATCH_H */
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 33a056bb886f..4cf5f51b4c9c 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -633,21 +633,6 @@ static inline void rcu_preempt_sleep_check(void)
#define RCU_INITIALIZER(v) (typeof(*(v)) __force __rcu *)(v)
/**
- * lockless_dereference() - safely load a pointer for later dereference
- * @p: The pointer to load
- *
- * Similar to rcu_dereference(), but for situations where the pointed-to
- * object's lifetime is managed by something other than RCU. That
- * "something other" might be reference counting or simple immortality.
- */
-#define lockless_dereference(p) \
-({ \
- typeof(p) _________p1 = READ_ONCE(p); \
- smp_read_barrier_depends(); /* Dependency order vs. p above. */ \
- (_________p1); \
-})
-
-/**
* rcu_assign_pointer() - assign to RCU-protected pointer
* @p: pointer to assign to
* @v: value to assign (publish)
diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 486e685a226a..e0582106ef4f 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -35,6 +35,7 @@
#include <linux/spinlock.h>
#include <linux/preempt.h>
#include <linux/lockdep.h>
+#include <linux/compiler.h>
#include <asm/processor.h>
/*
@@ -274,9 +275,87 @@ static inline void raw_write_seqcount_barrier(seqcount_t *s)
s->sequence++;
}
-/*
+static inline int raw_read_seqcount_latch(seqcount_t *s)
+{
+ return lockless_dereference(s->sequence);
+}
+
+/**
* raw_write_seqcount_latch - redirect readers to even/odd copy
* @s: pointer to seqcount_t
+ *
+ * The latch technique is a multiversion concurrency control method that allows
+ * queries during non-atomic modifications. If you can guarantee queries never
+ * interrupt the modification -- e.g. the concurrency is strictly between CPUs
+ * -- you most likely do not need this.
+ *
+ * Where the traditional RCU/lockless data structures rely on atomic
+ * modifications to ensure queries observe either the old or the new state the
+ * latch allows the same for non-atomic updates. The trade-off is doubling the
+ * cost of storage; we have to maintain two copies of the entire data
+ * structure.
+ *
+ * Very simply put: we first modify one copy and then the other. This ensures
+ * there is always one copy in a stable state, ready to give us an answer.
+ *
+ * The basic form is a data structure like:
+ *
+ * struct latch_struct {
+ * seqcount_t seq;
+ * struct data_struct data[2];
+ * };
+ *
+ * Where a modification, which is assumed to be externally serialized, does the
+ * following:
+ *
+ * void latch_modify(struct latch_struct *latch, ...)
+ * {
+ * smp_wmb(); <- Ensure that the last data[1] update is visible
+ * latch->seq++;
+ * smp_wmb(); <- Ensure that the seqcount update is visible
+ *
+ * modify(latch->data[0], ...);
+ *
+ * smp_wmb(); <- Ensure that the data[0] update is visible
+ * latch->seq++;
+ * smp_wmb(); <- Ensure that the seqcount update is visible
+ *
+ * modify(latch->data[1], ...);
+ * }
+ *
+ * The query will have a form like:
+ *
+ * struct entry *latch_query(struct latch_struct *latch, ...)
+ * {
+ * struct entry *entry;
+ * unsigned seq, idx;
+ *
+ * do {
+ * seq = lockless_dereference(latch->seq);
+ *
+ * idx = seq & 0x01;
+ * entry = data_query(latch->data[idx], ...);
+ *
+ * smp_rmb();
+ * } while (seq != latch->seq);
+ *
+ * return entry;
+ * }
+ *
+ * So during the modification, queries are first redirected to data[1]. Then we
+ * modify data[0]. When that is complete, we redirect queries back to data[0]
+ * and we can modify data[1].
+ *
+ * NOTE: The non-requirement for atomic modifications does _NOT_ include
+ * the publishing of new entries in the case where data is a dynamic
+ * data structure.
+ *
+ * An iteration might start in data[0] and get suspended long enough
+ * to miss an entire modification sequence, once it resumes it might
+ * observe the new entry.
+ *
+ * NOTE: When data is a dynamic data structure; one should use regular RCU
+ * patterns to manage the lifetimes of the objects within.
*/
static inline void raw_write_seqcount_latch(seqcount_t *s)
{
diff --git a/include/net/ax25.h b/include/net/ax25.h
index 16a923a3a43a..e602f8177ebf 100644
--- a/include/net/ax25.h
+++ b/include/net/ax25.h
@@ -13,6 +13,7 @@
#include <linux/slab.h>
#include <linux/atomic.h>
#include <net/neighbour.h>
+#include <net/sock.h>
#define AX25_T1CLAMPLO 1
#define AX25_T1CLAMPHI (30 * HZ)
@@ -246,7 +247,20 @@ typedef struct ax25_cb {
atomic_t refcount;
} ax25_cb;
-#define ax25_sk(__sk) ((ax25_cb *)(__sk)->sk_protinfo)
+struct ax25_sock {
+ struct sock sk;
+ struct ax25_cb *cb;
+};
+
+static inline struct ax25_sock *ax25_sk(const struct sock *sk)
+{
+ return (struct ax25_sock *) sk;
+}
+
+static inline struct ax25_cb *sk_to_ax25(const struct sock *sk)
+{
+ return ax25_sk(sk)->cb;
+}
#define ax25_for_each(__ax25, list) \
hlist_for_each_entry(__ax25, list, ax25_node)
diff --git a/include/net/sock.h b/include/net/sock.h
index 14d539c040d7..05a8c1aea251 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -277,7 +277,6 @@ struct cg_proto;
* @sk_incoming_cpu: record cpu processing incoming packets
* @sk_txhash: computed flow hash for use on transmit
* @sk_filter: socket filtering instructions
- * @sk_protinfo: private area, net family specific, when not using slab
* @sk_timer: sock cleanup timer
* @sk_stamp: time stamp of last packet received
* @sk_tsflags: SO_TIMESTAMPING socket options
@@ -416,7 +415,6 @@ struct sock {
const struct cred *sk_peer_cred;
long sk_rcvtimeo;
long sk_sndtimeo;
- void *sk_protinfo;
struct timer_list sk_timer;
ktime_t sk_stamp;
u16 sk_tsflags;
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index 7f79cf459591..0b73af9be12f 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -1117,61 +1117,6 @@ DEFINE_EVENT(btrfs__workqueue_done, btrfs_workqueue_destroy,
TP_ARGS(wq)
);
-#define show_oper_type(type) \
- __print_symbolic(type, \
- { BTRFS_QGROUP_OPER_ADD_EXCL, "OPER_ADD_EXCL" }, \
- { BTRFS_QGROUP_OPER_ADD_SHARED, "OPER_ADD_SHARED" }, \
- { BTRFS_QGROUP_OPER_SUB_EXCL, "OPER_SUB_EXCL" }, \
- { BTRFS_QGROUP_OPER_SUB_SHARED, "OPER_SUB_SHARED" })
-
-DECLARE_EVENT_CLASS(btrfs_qgroup_oper,
-
- TP_PROTO(struct btrfs_qgroup_operation *oper),
-
- TP_ARGS(oper),
-
- TP_STRUCT__entry(
- __field( u64, ref_root )
- __field( u64, bytenr )
- __field( u64, num_bytes )
- __field( u64, seq )
- __field( int, type )
- __field( u64, elem_seq )
- ),
-
- TP_fast_assign(
- __entry->ref_root = oper->ref_root;
- __entry->bytenr = oper->bytenr,
- __entry->num_bytes = oper->num_bytes;
- __entry->seq = oper->seq;
- __entry->type = oper->type;
- __entry->elem_seq = oper->elem.seq;
- ),
-
- TP_printk("ref_root = %llu, bytenr = %llu, num_bytes = %llu, "
- "seq = %llu, elem.seq = %llu, type = %s",
- (unsigned long long)__entry->ref_root,
- (unsigned long long)__entry->bytenr,
- (unsigned long long)__entry->num_bytes,
- (unsigned long long)__entry->seq,
- (unsigned long long)__entry->elem_seq,
- show_oper_type(__entry->type))
-);
-
-DEFINE_EVENT(btrfs_qgroup_oper, btrfs_qgroup_account,
-
- TP_PROTO(struct btrfs_qgroup_operation *oper),
-
- TP_ARGS(oper)
-);
-
-DEFINE_EVENT(btrfs_qgroup_oper, btrfs_qgroup_record_ref,
-
- TP_PROTO(struct btrfs_qgroup_operation *oper),
-
- TP_ARGS(oper)
-);
-
#endif /* _TRACE_BTRFS_H */
/* This part must be outside protection */
diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h
index 83d6236a2f08..eaf94919291a 100644
--- a/include/uapi/linux/in.h
+++ b/include/uapi/linux/in.h
@@ -19,8 +19,10 @@
#define _UAPI_LINUX_IN_H
#include <linux/types.h>
+#include <linux/libc-compat.h>
#include <linux/socket.h>
+#if __UAPI_DEF_IN_IPPROTO
/* Standard well-defined IP protocols. */
enum {
IPPROTO_IP = 0, /* Dummy protocol for TCP */
@@ -75,12 +77,14 @@ enum {
#define IPPROTO_RAW IPPROTO_RAW
IPPROTO_MAX
};
+#endif
-
+#if __UAPI_DEF_IN_ADDR
/* Internet address. */
struct in_addr {
__be32 s_addr;
};
+#endif
#define IP_TOS 1
#define IP_TTL 2
@@ -158,6 +162,7 @@ struct in_addr {
/* Request struct for multicast socket ops */
+#if __UAPI_DEF_IP_MREQ
struct ip_mreq {
struct in_addr imr_multiaddr; /* IP multicast address of group */
struct in_addr imr_interface; /* local IP address of interface */
@@ -209,14 +214,18 @@ struct group_filter {
#define GROUP_FILTER_SIZE(numsrc) \
(sizeof(struct group_filter) - sizeof(struct __kernel_sockaddr_storage) \
+ (numsrc) * sizeof(struct __kernel_sockaddr_storage))
+#endif
+#if __UAPI_DEF_IN_PKTINFO
struct in_pktinfo {
int ipi_ifindex;
struct in_addr ipi_spec_dst;
struct in_addr ipi_addr;
};
+#endif
/* Structure describing an Internet (IP) socket address. */
+#if __UAPI_DEF_SOCKADDR_IN
#define __SOCK_SIZE__ 16 /* sizeof(struct sockaddr) */
struct sockaddr_in {
__kernel_sa_family_t sin_family; /* Address family */
@@ -228,8 +237,9 @@ struct sockaddr_in {
sizeof(unsigned short int) - sizeof(struct in_addr)];
};
#define sin_zero __pad /* for BSD UNIX comp. -FvK */
+#endif
-
+#if __UAPI_DEF_IN_CLASS
/*
* Definitions of the bits in an Internet address integer.
* On subnets, host and network parts are found according
@@ -280,7 +290,7 @@ struct sockaddr_in {
#define INADDR_ALLHOSTS_GROUP 0xe0000001U /* 224.0.0.1 */
#define INADDR_ALLRTRS_GROUP 0xe0000002U /* 224.0.0.2 */
#define INADDR_MAX_LOCAL_GROUP 0xe00000ffU /* 224.0.0.255 */
-
+#endif
/* <asm/byteorder.h> contains the htonl type stuff.. */
#include <asm/byteorder.h>
diff --git a/include/uapi/linux/libc-compat.h b/include/uapi/linux/libc-compat.h
index fa673e9cc040..7d024ceb075d 100644
--- a/include/uapi/linux/libc-compat.h
+++ b/include/uapi/linux/libc-compat.h
@@ -56,6 +56,13 @@
/* GLIBC headers included first so don't define anything
* that would already be defined. */
+#define __UAPI_DEF_IN_ADDR 0
+#define __UAPI_DEF_IN_IPPROTO 0
+#define __UAPI_DEF_IN_PKTINFO 0
+#define __UAPI_DEF_IP_MREQ 0
+#define __UAPI_DEF_SOCKADDR_IN 0
+#define __UAPI_DEF_IN_CLASS 0
+
#define __UAPI_DEF_IN6_ADDR 0
/* The exception is the in6_addr macros which must be defined
* if the glibc code didn't define them. This guard matches
@@ -78,6 +85,13 @@
/* Linux headers included first, and we must define everything
* we need. The expectation is that glibc will check the
* __UAPI_DEF_* defines and adjust appropriately. */
+#define __UAPI_DEF_IN_ADDR 1
+#define __UAPI_DEF_IN_IPPROTO 1
+#define __UAPI_DEF_IN_PKTINFO 1
+#define __UAPI_DEF_IP_MREQ 1
+#define __UAPI_DEF_SOCKADDR_IN 1
+#define __UAPI_DEF_IN_CLASS 1
+
#define __UAPI_DEF_IN6_ADDR 1
/* We unconditionally define the in6_addr macros and glibc must
* coordinate. */
@@ -103,6 +117,14 @@
* that we need. */
#else /* !defined(__GLIBC__) */
+/* Definitions for in.h */
+#define __UAPI_DEF_IN_ADDR 1
+#define __UAPI_DEF_IN_IPPROTO 1
+#define __UAPI_DEF_IN_PKTINFO 1
+#define __UAPI_DEF_IP_MREQ 1
+#define __UAPI_DEF_SOCKADDR_IN 1
+#define __UAPI_DEF_IN_CLASS 1
+
/* Definitions for in6.h */
#define __UAPI_DEF_IN6_ADDR 1
#define __UAPI_DEF_IN6_ADDR_ALT 1
diff --git a/init/Kconfig b/init/Kconfig
index 7d1ffd2ae536..bcc41bd19999 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1941,26 +1941,21 @@ config MODULE_COMPRESS
bool "Compress modules on installation"
depends on MODULES
help
- This option compresses the kernel modules when 'make
- modules_install' is run.
- The modules will be compressed either using gzip or xz depend on the
- choice made in "Compression algorithm".
+ Compresses kernel modules when 'make modules_install' is run; gzip or
+ xz depending on "Compression algorithm" below.
- module-init-tools has support for gzip format while kmod handle gzip
- and xz compressed modules.
+ module-init-tools MAY support gzip, and kmod MAY support gzip and xz.
- When a kernel module is installed from outside of the main kernel
- source and uses the Kbuild system for installing modules then that
- kernel module will also be compressed when it is installed.
+ Out-of-tree kernel modules installed using Kbuild will also be
+ compressed upon installation.
- This option provides little benefit when the modules are to be used inside
- an initrd or initramfs, it generally is more efficient to compress the whole
- initrd or initramfs instead.
+ Note: for modules inside an initrd or initramfs, it's more efficient
+ to compress the whole initrd or initramfs instead.
- This is fully compatible with signed modules while the signed module is
- compressed. module-init-tools or kmod handles decompression and provide to
- other layer the uncompressed but signed payload.
+ Note: This is fully compatible with signed modules.
+
+ If in doubt, say N.
choice
prompt "Compression algorithm"
@@ -1982,6 +1977,10 @@ endchoice
endif # MODULES
+config MODULES_TREE_LOOKUP
+ def_bool y
+ depends on PERF_EVENTS || TRACING
+
config INIT_ALL_POSSIBLE
bool
help
diff --git a/kernel/configs/xen.config b/kernel/configs/xen.config
new file mode 100644
index 000000000000..ff756221f112
--- /dev/null
+++ b/kernel/configs/xen.config
@@ -0,0 +1,48 @@
+# global stuff - these enable us to allow some
+# of the not so generic stuff below for xen
+CONFIG_PARAVIRT=y
+CONFIG_NET=y
+CONFIG_NET_CORE=y
+CONFIG_NETDEVICES=y
+CONFIG_BLOCK=y
+CONFIG_WATCHDOG=y
+CONFIG_TARGET_CORE=y
+CONFIG_SCSI=y
+CONFIG_FB=y
+CONFIG_INPUT_MISC=y
+CONFIG_MEMORY_HOTPLUG=y
+CONFIG_TTY=y
+# Technically not required but otherwise produces
+# pretty useless systems starting from allnoconfig
+# You want TCP/IP and ELF binaries right?
+CONFIG_INET=y
+CONFIG_BINFMT_ELF=y
+# generic config
+CONFIG_XEN=y
+CONFIG_XEN_DOM0=y
+# backend drivers
+CONFIG_XEN_BACKEND=y
+CONFIG_XEN_BLKDEV_BACKEND=m
+CONFIG_XEN_NETDEV_BACKEND=m
+CONFIG_HVC_XEN=y
+CONFIG_XEN_WDT=m
+CONFIG_XEN_SCSI_BACKEND=m
+# frontend drivers
+CONFIG_XEN_FBDEV_FRONTEND=m
+CONFIG_HVC_XEN_FRONTEND=y
+CONFIG_INPUT_XEN_KBDDEV_FRONTEND=m
+CONFIG_XEN_SCSI_FRONTEND=m
+# others
+CONFIG_XEN_BALLOON=y
+CONFIG_XEN_SCRUB_PAGES=y
+CONFIG_XEN_DEV_EVTCHN=m
+CONFIG_XEN_BLKDEV_FRONTEND=m
+CONFIG_XEN_NETDEV_FRONTEND=m
+CONFIG_XENFS=m
+CONFIG_XEN_COMPAT_XENFS=y
+CONFIG_XEN_SYS_HYPERVISOR=y
+CONFIG_XEN_XENBUS_FRONTEND=y
+CONFIG_XEN_GNTDEV=m
+CONFIG_XEN_GRANT_DEV_ALLOC=m
+CONFIG_SWIOTLB_XEN=y
+CONFIG_XEN_PRIVCMD=m
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 9019f15deab2..52ebaca1b9fc 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -302,7 +302,7 @@ static int jump_label_add_module(struct module *mod)
continue;
key = iterk;
- if (__module_address(iter->key) == mod) {
+ if (within_module(iter->key, mod)) {
/*
* Set key->entries to iter, but preserve JUMP_LABEL_TRUE_BRANCH.
*/
@@ -339,7 +339,7 @@ static void jump_label_del_module(struct module *mod)
key = (struct static_key *)(unsigned long)iter->key;
- if (__module_address(iter->key) == mod)
+ if (within_module(iter->key, mod))
continue;
prev = &key->next;
@@ -443,14 +443,16 @@ static void jump_label_update(struct static_key *key, int enable)
{
struct jump_entry *stop = __stop___jump_table;
struct jump_entry *entry = jump_label_get_entries(key);
-
#ifdef CONFIG_MODULES
- struct module *mod = __module_address((unsigned long)key);
+ struct module *mod;
__jump_label_mod_update(key, enable);
+ preempt_disable();
+ mod = __module_address((unsigned long)key);
if (mod)
stop = mod->jump_entries + mod->num_jump_entries;
+ preempt_enable();
#endif
/* if there are no users, entry can be NULL */
if (entry)
diff --git a/kernel/module.c b/kernel/module.c
index f80a97f7da1f..3e0e19763d24 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -101,48 +101,201 @@
DEFINE_MUTEX(module_mutex);
EXPORT_SYMBOL_GPL(module_mutex);
static LIST_HEAD(modules);
-#ifdef CONFIG_KGDB_KDB
-struct list_head *kdb_modules = &modules; /* kdb needs the list of modules */
-#endif /* CONFIG_KGDB_KDB */
-#ifdef CONFIG_MODULE_SIG
-#ifdef CONFIG_MODULE_SIG_FORCE
-static bool sig_enforce = true;
-#else
-static bool sig_enforce = false;
+#ifdef CONFIG_MODULES_TREE_LOOKUP
+
+/*
+ * Use a latched RB-tree for __module_address(); this allows us to use
+ * RCU-sched lookups of the address from any context.
+ *
+ * Because modules have two address ranges: init and core, we need two
+ * latch_tree_nodes entries. Therefore we need the back-pointer from
+ * mod_tree_node.
+ *
+ * Because init ranges are short lived we mark them unlikely and have placed
+ * them outside the critical cacheline in struct module.
+ *
+ * This is conditional on PERF_EVENTS || TRACING because those can really hit
+ * __module_address() hard by doing a lot of stack unwinding; potentially from
+ * NMI context.
+ */
-static int param_set_bool_enable_only(const char *val,
- const struct kernel_param *kp)
+static __always_inline unsigned long __mod_tree_val(struct latch_tree_node *n)
{
- int err;
- bool test;
- struct kernel_param dummy_kp = *kp;
+ struct mod_tree_node *mtn = container_of(n, struct mod_tree_node, node);
+ struct module *mod = mtn->mod;
- dummy_kp.arg = &test;
+ if (unlikely(mtn == &mod->mtn_init))
+ return (unsigned long)mod->module_init;
- err = param_set_bool(val, &dummy_kp);
- if (err)
- return err;
+ return (unsigned long)mod->module_core;
+}
+
+static __always_inline unsigned long __mod_tree_size(struct latch_tree_node *n)
+{
+ struct mod_tree_node *mtn = container_of(n, struct mod_tree_node, node);
+ struct module *mod = mtn->mod;
- /* Don't let them unset it once it's set! */
- if (!test && sig_enforce)
- return -EROFS;
+ if (unlikely(mtn == &mod->mtn_init))
+ return (unsigned long)mod->init_size;
+
+ return (unsigned long)mod->core_size;
+}
+
+static __always_inline bool
+mod_tree_less(struct latch_tree_node *a, struct latch_tree_node *b)
+{
+ return __mod_tree_val(a) < __mod_tree_val(b);
+}
+
+static __always_inline int
+mod_tree_comp(void *key, struct latch_tree_node *n)
+{
+ unsigned long val = (unsigned long)key;
+ unsigned long start, end;
+
+ start = __mod_tree_val(n);
+ if (val < start)
+ return -1;
+
+ end = start + __mod_tree_size(n);
+ if (val >= end)
+ return 1;
- if (test)
- sig_enforce = true;
return 0;
}
-static const struct kernel_param_ops param_ops_bool_enable_only = {
- .flags = KERNEL_PARAM_OPS_FL_NOARG,
- .set = param_set_bool_enable_only,
- .get = param_get_bool,
+static const struct latch_tree_ops mod_tree_ops = {
+ .less = mod_tree_less,
+ .comp = mod_tree_comp,
};
-#define param_check_bool_enable_only param_check_bool
+static struct mod_tree_root {
+ struct latch_tree_root root;
+ unsigned long addr_min;
+ unsigned long addr_max;
+} mod_tree __cacheline_aligned = {
+ .addr_min = -1UL,
+};
+
+#define module_addr_min mod_tree.addr_min
+#define module_addr_max mod_tree.addr_max
+
+static noinline void __mod_tree_insert(struct mod_tree_node *node)
+{
+ latch_tree_insert(&node->node, &mod_tree.root, &mod_tree_ops);
+}
+
+static void __mod_tree_remove(struct mod_tree_node *node)
+{
+ latch_tree_erase(&node->node, &mod_tree.root, &mod_tree_ops);
+}
+
+/*
+ * These modifications: insert, remove_init and remove; are serialized by the
+ * module_mutex.
+ */
+static void mod_tree_insert(struct module *mod)
+{
+ mod->mtn_core.mod = mod;
+ mod->mtn_init.mod = mod;
+
+ __mod_tree_insert(&mod->mtn_core);
+ if (mod->init_size)
+ __mod_tree_insert(&mod->mtn_init);
+}
+
+static void mod_tree_remove_init(struct module *mod)
+{
+ if (mod->init_size)
+ __mod_tree_remove(&mod->mtn_init);
+}
+
+static void mod_tree_remove(struct module *mod)
+{
+ __mod_tree_remove(&mod->mtn_core);
+ mod_tree_remove_init(mod);
+}
+
+static struct module *mod_find(unsigned long addr)
+{
+ struct latch_tree_node *ltn;
+
+ ltn = latch_tree_find((void *)addr, &mod_tree.root, &mod_tree_ops);
+ if (!ltn)
+ return NULL;
+
+ return container_of(ltn, struct mod_tree_node, node)->mod;
+}
+
+#else /* MODULES_TREE_LOOKUP */
+
+static unsigned long module_addr_min = -1UL, module_addr_max = 0;
+
+static void mod_tree_insert(struct module *mod) { }
+static void mod_tree_remove_init(struct module *mod) { }
+static void mod_tree_remove(struct module *mod) { }
+
+static struct module *mod_find(unsigned long addr)
+{
+ struct module *mod;
+
+ list_for_each_entry_rcu(mod, &modules, list) {
+ if (within_module(addr, mod))
+ return mod;
+ }
+
+ return NULL;
+}
+
+#endif /* MODULES_TREE_LOOKUP */
+
+/*
+ * Bounds of module text, for speeding up __module_address.
+ * Protected by module_mutex.
+ */
+static void __mod_update_bounds(void *base, unsigned int size)
+{
+ unsigned long min = (unsigned long)base;
+ unsigned long max = min + size;
+
+ if (min < module_addr_min)
+ module_addr_min = min;
+ if (max > module_addr_max)
+ module_addr_max = max;
+}
+
+static void mod_update_bounds(struct module *mod)
+{
+ __mod_update_bounds(mod->module_core, mod->core_size);
+ if (mod->init_size)
+ __mod_update_bounds(mod->module_init, mod->init_size);
+}
+
+#ifdef CONFIG_KGDB_KDB
+struct list_head *kdb_modules = &modules; /* kdb needs the list of modules */
+#endif /* CONFIG_KGDB_KDB */
+
+static void module_assert_mutex(void)
+{
+ lockdep_assert_held(&module_mutex);
+}
+
+static void module_assert_mutex_or_preempt(void)
+{
+#ifdef CONFIG_LOCKDEP
+ if (unlikely(!debug_locks))
+ return;
+
+ WARN_ON(!rcu_read_lock_sched_held() &&
+ !lockdep_is_held(&module_mutex));
+#endif
+}
+
+static bool sig_enforce = IS_ENABLED(CONFIG_MODULE_SIG_FORCE);
+#ifndef CONFIG_MODULE_SIG_FORCE
module_param(sig_enforce, bool_enable_only, 0644);
#endif /* !CONFIG_MODULE_SIG_FORCE */
-#endif /* CONFIG_MODULE_SIG */
/* Block module loading/unloading? */
int modules_disabled = 0;
@@ -153,10 +306,6 @@ static DECLARE_WAIT_QUEUE_HEAD(module_wq);
static BLOCKING_NOTIFIER_HEAD(module_notify_list);
-/* Bounds of module allocation, for speeding __module_address.
- * Protected by module_mutex. */
-static unsigned long module_addr_min = -1UL, module_addr_max = 0;
-
int register_module_notifier(struct notifier_block *nb)
{
return blocking_notifier_chain_register(&module_notify_list, nb);
@@ -318,6 +467,8 @@ bool each_symbol_section(bool (*fn)(const struct symsearch *arr,
#endif
};
+ module_assert_mutex_or_preempt();
+
if (each_symbol_in_section(arr, ARRAY_SIZE(arr), NULL, fn, data))
return true;
@@ -457,6 +608,8 @@ static struct module *find_module_all(const char *name, size_t len,
{
struct module *mod;
+ module_assert_mutex();
+
list_for_each_entry(mod, &modules, list) {
if (!even_unformed && mod->state == MODULE_STATE_UNFORMED)
continue;
@@ -1169,11 +1322,17 @@ static inline int check_modstruct_version(Elf_Shdr *sechdrs,
{
const unsigned long *crc;
- /* Since this should be found in kernel (which can't be removed),
- * no locking is necessary. */
+ /*
+ * Since this should be found in kernel (which can't be removed), no
+ * locking is necessary -- use preempt_disable() to placate lockdep.
+ */
+ preempt_disable();
if (!find_symbol(VMLINUX_SYMBOL_STR(module_layout), NULL,
- &crc, true, false))
+ &crc, true, false)) {
+ preempt_enable();
BUG();
+ }
+ preempt_enable();
return check_version(sechdrs, versindex,
VMLINUX_SYMBOL_STR(module_layout), mod, crc,
NULL);
@@ -1661,6 +1820,10 @@ static void mod_sysfs_fini(struct module *mod)
mod_kobject_put(mod);
}
+static void init_param_lock(struct module *mod)
+{
+ mutex_init(&mod->param_lock);
+}
#else /* !CONFIG_SYSFS */
static int mod_sysfs_setup(struct module *mod,
@@ -1683,6 +1846,9 @@ static void del_usage_links(struct module *mod)
{
}
+static void init_param_lock(struct module *mod)
+{
+}
#endif /* CONFIG_SYSFS */
static void mod_sysfs_teardown(struct module *mod)
@@ -1852,10 +2018,11 @@ static void free_module(struct module *mod)
mutex_lock(&module_mutex);
/* Unlink carefully: kallsyms could be walking list. */
list_del_rcu(&mod->list);
+ mod_tree_remove(mod);
/* Remove this module from bug list, this uses list_del_rcu */
module_bug_cleanup(mod);
- /* Wait for RCU synchronizing before releasing mod->list and buglist. */
- synchronize_rcu();
+ /* Wait for RCU-sched synchronizing before releasing mod->list and buglist. */
+ synchronize_sched();
mutex_unlock(&module_mutex);
/* This may be NULL, but that's OK */
@@ -2384,22 +2551,6 @@ void * __weak module_alloc(unsigned long size)
return vmalloc_exec(size);
}
-static void *module_alloc_update_bounds(unsigned long size)
-{
- void *ret = module_alloc(size);
-
- if (ret) {
- mutex_lock(&module_mutex);
- /* Update module bounds. */
- if ((unsigned long)ret < module_addr_min)
- module_addr_min = (unsigned long)ret;
- if ((unsigned long)ret + size > module_addr_max)
- module_addr_max = (unsigned long)ret + size;
- mutex_unlock(&module_mutex);
- }
- return ret;
-}
-
#ifdef CONFIG_DEBUG_KMEMLEAK
static void kmemleak_load_module(const struct module *mod,
const struct load_info *info)
@@ -2805,7 +2956,7 @@ static int move_module(struct module *mod, struct load_info *info)
void *ptr;
/* Do the allocs. */
- ptr = module_alloc_update_bounds(mod->core_size);
+ ptr = module_alloc(mod->core_size);
/*
* The pointer to this block is stored in the module structure
* which is inside the block. Just mark it as not being a
@@ -2819,7 +2970,7 @@ static int move_module(struct module *mod, struct load_info *info)
mod->module_core = ptr;
if (mod->init_size) {
- ptr = module_alloc_update_bounds(mod->init_size);
+ ptr = module_alloc(mod->init_size);
/*
* The pointer to this block is stored in the module structure
* which is inside the block. This block doesn't need to be
@@ -3119,6 +3270,7 @@ static noinline int do_init_module(struct module *mod)
mod->symtab = mod->core_symtab;
mod->strtab = mod->core_strtab;
#endif
+ mod_tree_remove_init(mod);
unset_module_init_ro_nx(mod);
module_arch_freeing_init(mod);
mod->module_init = NULL;
@@ -3127,11 +3279,11 @@ static noinline int do_init_module(struct module *mod)
mod->init_text_size = 0;
/*
* We want to free module_init, but be aware that kallsyms may be
- * walking this with preempt disabled. In all the failure paths,
- * we call synchronize_rcu/synchronize_sched, but we don't want
- * to slow down the success path, so use actual RCU here.
+ * walking this with preempt disabled. In all the failure paths, we
+ * call synchronize_sched(), but we don't want to slow down the success
+ * path, so use actual RCU here.
*/
- call_rcu(&freeinit->rcu, do_free_init);
+ call_rcu_sched(&freeinit->rcu, do_free_init);
mutex_unlock(&module_mutex);
wake_up_all(&module_wq);
@@ -3188,7 +3340,9 @@ again:
err = -EEXIST;
goto out;
}
+ mod_update_bounds(mod);
list_add_rcu(&mod->list, &modules);
+ mod_tree_insert(mod);
err = 0;
out:
@@ -3304,6 +3458,8 @@ static int load_module(struct load_info *info, const char __user *uargs,
if (err)
goto unlink_mod;
+ init_param_lock(mod);
+
/* Now we've got everything in the final locations, we can
* find optional sections. */
err = find_module_sections(mod, info);
@@ -3402,8 +3558,8 @@ static int load_module(struct load_info *info, const char __user *uargs,
/* Unlink carefully: kallsyms could be walking list. */
list_del_rcu(&mod->list);
wake_up_all(&module_wq);
- /* Wait for RCU synchronizing before releasing mod->list. */
- synchronize_rcu();
+ /* Wait for RCU-sched synchronizing before releasing mod->list. */
+ synchronize_sched();
mutex_unlock(&module_mutex);
free_module:
/* Free lock-classes; relies on the preceding sync_rcu() */
@@ -3527,19 +3683,15 @@ const char *module_address_lookup(unsigned long addr,
char **modname,
char *namebuf)
{
- struct module *mod;
const char *ret = NULL;
+ struct module *mod;
preempt_disable();
- list_for_each_entry_rcu(mod, &modules, list) {
- if (mod->state == MODULE_STATE_UNFORMED)
- continue;
- if (within_module(addr, mod)) {
- if (modname)
- *modname = mod->name;
- ret = get_ksymbol(mod, addr, size, offset);
- break;
- }
+ mod = __module_address(addr);
+ if (mod) {
+ if (modname)
+ *modname = mod->name;
+ ret = get_ksymbol(mod, addr, size, offset);
}
/* Make a copy in here where it's safe */
if (ret) {
@@ -3547,6 +3699,7 @@ const char *module_address_lookup(unsigned long addr,
ret = namebuf;
}
preempt_enable();
+
return ret;
}
@@ -3670,6 +3823,8 @@ int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *,
unsigned int i;
int ret;
+ module_assert_mutex();
+
list_for_each_entry(mod, &modules, list) {
if (mod->state == MODULE_STATE_UNFORMED)
continue;
@@ -3844,13 +3999,15 @@ struct module *__module_address(unsigned long addr)
if (addr < module_addr_min || addr > module_addr_max)
return NULL;
- list_for_each_entry_rcu(mod, &modules, list) {
+ module_assert_mutex_or_preempt();
+
+ mod = mod_find(addr);
+ if (mod) {
+ BUG_ON(!within_module(addr, mod));
if (mod->state == MODULE_STATE_UNFORMED)
- continue;
- if (within_module(addr, mod))
- return mod;
+ mod = NULL;
}
- return NULL;
+ return mod;
}
EXPORT_SYMBOL_GPL(__module_address);
diff --git a/kernel/params.c b/kernel/params.c
index 30288c1e15dd..b6554aa71094 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -25,15 +25,34 @@
#include <linux/slab.h>
#include <linux/ctype.h>
-/* Protects all parameters, and incidentally kmalloced_param list. */
+#ifdef CONFIG_SYSFS
+/* Protects all built-in parameters, modules use their own param_lock */
static DEFINE_MUTEX(param_lock);
+/* Use the module's mutex, or if built-in use the built-in mutex */
+#ifdef CONFIG_MODULES
+#define KPARAM_MUTEX(mod) ((mod) ? &(mod)->param_lock : &param_lock)
+#else
+#define KPARAM_MUTEX(mod) (&param_lock)
+#endif
+
+static inline void check_kparam_locked(struct module *mod)
+{
+ BUG_ON(!mutex_is_locked(KPARAM_MUTEX(mod)));
+}
+#else
+static inline void check_kparam_locked(struct module *mod)
+{
+}
+#endif /* !CONFIG_SYSFS */
+
/* This just allows us to keep track of which parameters are kmalloced. */
struct kmalloced_param {
struct list_head list;
char val[];
};
static LIST_HEAD(kmalloced_params);
+static DEFINE_SPINLOCK(kmalloced_params_lock);
static void *kmalloc_parameter(unsigned int size)
{
@@ -43,7 +62,10 @@ static void *kmalloc_parameter(unsigned int size)
if (!p)
return NULL;
+ spin_lock(&kmalloced_params_lock);
list_add(&p->list, &kmalloced_params);
+ spin_unlock(&kmalloced_params_lock);
+
return p->val;
}
@@ -52,6 +74,7 @@ static void maybe_kfree_parameter(void *param)
{
struct kmalloced_param *p;
+ spin_lock(&kmalloced_params_lock);
list_for_each_entry(p, &kmalloced_params, list) {
if (p->val == param) {
list_del(&p->list);
@@ -59,6 +82,7 @@ static void maybe_kfree_parameter(void *param)
break;
}
}
+ spin_unlock(&kmalloced_params_lock);
}
static char dash2underscore(char c)
@@ -119,10 +143,10 @@ static int parse_one(char *param,
return -EINVAL;
pr_debug("handling %s with %p\n", param,
params[i].ops->set);
- mutex_lock(&param_lock);
+ kernel_param_lock(params[i].mod);
param_check_unsafe(&params[i]);
err = params[i].ops->set(val, &params[i]);
- mutex_unlock(&param_lock);
+ kernel_param_unlock(params[i].mod);
return err;
}
}
@@ -254,7 +278,7 @@ char *parse_args(const char *doing,
return scnprintf(buffer, PAGE_SIZE, format, \
*((type *)kp->arg)); \
} \
- struct kernel_param_ops param_ops_##name = { \
+ const struct kernel_param_ops param_ops_##name = { \
.set = param_set_##name, \
.get = param_get_##name, \
}; \
@@ -306,7 +330,7 @@ static void param_free_charp(void *arg)
maybe_kfree_parameter(*((char **)arg));
}
-struct kernel_param_ops param_ops_charp = {
+const struct kernel_param_ops param_ops_charp = {
.set = param_set_charp,
.get = param_get_charp,
.free = param_free_charp,
@@ -331,13 +355,44 @@ int param_get_bool(char *buffer, const struct kernel_param *kp)
}
EXPORT_SYMBOL(param_get_bool);
-struct kernel_param_ops param_ops_bool = {
+const struct kernel_param_ops param_ops_bool = {
.flags = KERNEL_PARAM_OPS_FL_NOARG,
.set = param_set_bool,
.get = param_get_bool,
};
EXPORT_SYMBOL(param_ops_bool);
+int param_set_bool_enable_only(const char *val, const struct kernel_param *kp)
+{
+ int err = 0;
+ bool new_value;
+ bool orig_value = *(bool *)kp->arg;
+ struct kernel_param dummy_kp = *kp;
+
+ dummy_kp.arg = &new_value;
+
+ err = param_set_bool(val, &dummy_kp);
+ if (err)
+ return err;
+
+ /* Don't let them unset it once it's set! */
+ if (!new_value && orig_value)
+ return -EROFS;
+
+ if (new_value)
+ err = param_set_bool(val, kp);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(param_set_bool_enable_only);
+
+const struct kernel_param_ops param_ops_bool_enable_only = {
+ .flags = KERNEL_PARAM_OPS_FL_NOARG,
+ .set = param_set_bool_enable_only,
+ .get = param_get_bool,
+};
+EXPORT_SYMBOL_GPL(param_ops_bool_enable_only);
+
/* This one must be bool. */
int param_set_invbool(const char *val, const struct kernel_param *kp)
{
@@ -359,7 +414,7 @@ int param_get_invbool(char *buffer, const struct kernel_param *kp)
}
EXPORT_SYMBOL(param_get_invbool);
-struct kernel_param_ops param_ops_invbool = {
+const struct kernel_param_ops param_ops_invbool = {
.set = param_set_invbool,
.get = param_get_invbool,
};
@@ -367,12 +422,11 @@ EXPORT_SYMBOL(param_ops_invbool);
int param_set_bint(const char *val, const struct kernel_param *kp)
{
- struct kernel_param boolkp;
+ /* Match bool exactly, by re-using it. */
+ struct kernel_param boolkp = *kp;
bool v;
int ret;
- /* Match bool exactly, by re-using it. */
- boolkp = *kp;
boolkp.arg = &v;
ret = param_set_bool(val, &boolkp);
@@ -382,7 +436,7 @@ int param_set_bint(const char *val, const struct kernel_param *kp)
}
EXPORT_SYMBOL(param_set_bint);
-struct kernel_param_ops param_ops_bint = {
+const struct kernel_param_ops param_ops_bint = {
.flags = KERNEL_PARAM_OPS_FL_NOARG,
.set = param_set_bint,
.get = param_get_int,
@@ -390,7 +444,8 @@ struct kernel_param_ops param_ops_bint = {
EXPORT_SYMBOL(param_ops_bint);
/* We break the rule and mangle the string. */
-static int param_array(const char *name,
+static int param_array(struct module *mod,
+ const char *name,
const char *val,
unsigned int min, unsigned int max,
void *elem, int elemsize,
@@ -421,7 +476,7 @@ static int param_array(const char *name,
/* nul-terminate and parse */
save = val[len];
((char *)val)[len] = '\0';
- BUG_ON(!mutex_is_locked(&param_lock));
+ check_kparam_locked(mod);
ret = set(val, &kp);
if (ret != 0)
@@ -443,7 +498,7 @@ static int param_array_set(const char *val, const struct kernel_param *kp)
const struct kparam_array *arr = kp->arr;
unsigned int temp_num;
- return param_array(kp->name, val, 1, arr->max, arr->elem,
+ return param_array(kp->mod, kp->name, val, 1, arr->max, arr->elem,
arr->elemsize, arr->ops->set, kp->level,
arr->num ?: &temp_num);
}
@@ -452,14 +507,13 @@ static int param_array_get(char *buffer, const struct kernel_param *kp)
{
int i, off, ret;
const struct kparam_array *arr = kp->arr;
- struct kernel_param p;
+ struct kernel_param p = *kp;
- p = *kp;
for (i = off = 0; i < (arr->num ? *arr->num : arr->max); i++) {
if (i)
buffer[off++] = ',';
p.arg = arr->elem + arr->elemsize * i;
- BUG_ON(!mutex_is_locked(&param_lock));
+ check_kparam_locked(p.mod);
ret = arr->ops->get(buffer + off, &p);
if (ret < 0)
return ret;
@@ -479,7 +533,7 @@ static void param_array_free(void *arg)
arr->ops->free(arr->elem + arr->elemsize * i);
}
-struct kernel_param_ops param_array_ops = {
+const struct kernel_param_ops param_array_ops = {
.set = param_array_set,
.get = param_array_get,
.free = param_array_free,
@@ -507,7 +561,7 @@ int param_get_string(char *buffer, const struct kernel_param *kp)
}
EXPORT_SYMBOL(param_get_string);
-struct kernel_param_ops param_ops_string = {
+const struct kernel_param_ops param_ops_string = {
.set = param_set_copystring,
.get = param_get_string,
};
@@ -542,9 +596,9 @@ static ssize_t param_attr_show(struct module_attribute *mattr,
if (!attribute->param->ops->get)
return -EPERM;
- mutex_lock(&param_lock);
+ kernel_param_lock(mk->mod);
count = attribute->param->ops->get(buf, attribute->param);
- mutex_unlock(&param_lock);
+ kernel_param_unlock(mk->mod);
if (count > 0) {
strcat(buf, "\n");
++count;
@@ -554,7 +608,7 @@ static ssize_t param_attr_show(struct module_attribute *mattr,
/* sysfs always hands a nul-terminated string in buf. We rely on that. */
static ssize_t param_attr_store(struct module_attribute *mattr,
- struct module_kobject *km,
+ struct module_kobject *mk,
const char *buf, size_t len)
{
int err;
@@ -563,10 +617,10 @@ static ssize_t param_attr_store(struct module_attribute *mattr,
if (!attribute->param->ops->set)
return -EPERM;
- mutex_lock(&param_lock);
+ kernel_param_lock(mk->mod);
param_check_unsafe(attribute->param);
err = attribute->param->ops->set(buf, attribute->param);
- mutex_unlock(&param_lock);
+ kernel_param_unlock(mk->mod);
if (!err)
return len;
return err;
@@ -580,17 +634,18 @@ static ssize_t param_attr_store(struct module_attribute *mattr,
#endif
#ifdef CONFIG_SYSFS
-void __kernel_param_lock(void)
+void kernel_param_lock(struct module *mod)
{
- mutex_lock(&param_lock);
+ mutex_lock(KPARAM_MUTEX(mod));
}
-EXPORT_SYMBOL(__kernel_param_lock);
-void __kernel_param_unlock(void)
+void kernel_param_unlock(struct module *mod)
{
- mutex_unlock(&param_lock);
+ mutex_unlock(KPARAM_MUTEX(mod));
}
-EXPORT_SYMBOL(__kernel_param_unlock);
+
+EXPORT_SYMBOL(kernel_param_lock);
+EXPORT_SYMBOL(kernel_param_unlock);
/*
* add_sysfs_param - add a parameter to sysfs
@@ -856,6 +911,7 @@ static void __init version_sysfs_builtin(void)
mk = locate_module_kobject(vattr->module_name);
if (mk) {
err = sysfs_create_file(&mk->kobj, &vattr->mattr.attr);
+ WARN_ON_ONCE(err);
kobject_uevent(&mk->kobj, KOBJ_ADD);
kobject_put(&mk->kobj);
}
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 7e01f78f0417..9e302315e33d 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -187,7 +187,7 @@ config DPM_WATCHDOG
config DPM_WATCHDOG_TIMEOUT
int "Watchdog timeout in seconds"
range 1 120
- default 12
+ default 60
depends on DPM_WATCHDOG
config PM_TRACE
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 2329daae5255..690f78f210f2 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -552,7 +552,7 @@ int hibernation_platform_enter(void)
error = disable_nonboot_cpus();
if (error)
- goto Platform_finish;
+ goto Enable_cpus;
local_irq_disable();
syscore_suspend();
@@ -568,6 +568,8 @@ int hibernation_platform_enter(void)
Power_up:
syscore_resume();
local_irq_enable();
+
+ Enable_cpus:
enable_nonboot_cpus();
Platform_finish:
diff --git a/kernel/time/Makefile b/kernel/time/Makefile
index ffc4cc3dcd47..49eca0beed32 100644
--- a/kernel/time/Makefile
+++ b/kernel/time/Makefile
@@ -12,5 +12,3 @@ obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o tick-sched.o
obj-$(CONFIG_TIMER_STATS) += timer_stats.o
obj-$(CONFIG_DEBUG_FS) += timekeeping_debug.o
obj-$(CONFIG_TEST_UDELAY) += test_udelay.o
-
-$(obj)/time.o: $(objtree)/include/config/
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 30b7a409bf1e..bca3667a2de1 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -319,32 +319,7 @@ static inline s64 timekeeping_get_ns(struct tk_read_base *tkr)
* We want to use this from any context including NMI and tracing /
* instrumenting the timekeeping code itself.
*
- * So we handle this differently than the other timekeeping accessor
- * functions which retry when the sequence count has changed. The
- * update side does:
- *
- * smp_wmb(); <- Ensure that the last base[1] update is visible
- * tkf->seq++;
- * smp_wmb(); <- Ensure that the seqcount update is visible
- * update(tkf->base[0], tkr);
- * smp_wmb(); <- Ensure that the base[0] update is visible
- * tkf->seq++;
- * smp_wmb(); <- Ensure that the seqcount update is visible
- * update(tkf->base[1], tkr);
- *
- * The reader side does:
- *
- * do {
- * seq = tkf->seq;
- * smp_rmb();
- * idx = seq & 0x01;
- * now = now(tkf->base[idx]);
- * smp_rmb();
- * } while (seq != tkf->seq)
- *
- * As long as we update base[0] readers are forced off to
- * base[1]. Once base[0] is updated readers are redirected to base[0]
- * and the base[1] update takes place.
+ * Employ the latch technique; see @raw_write_seqcount_latch.
*
* So if a NMI hits the update of base[0] then it will use base[1]
* which is still consistent. In the worst case this can result is a
@@ -407,7 +382,7 @@ static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
u64 now;
do {
- seq = raw_read_seqcount(&tkf->seq);
+ seq = raw_read_seqcount_latch(&tkf->seq);
tkr = tkf->base + (seq & 0x01);
now = ktime_to_ns(tkr->base) + timekeeping_get_ns(tkr);
} while (read_seqcount_retry(&tkf->seq, seq));
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 520499dd85af..5e097fa9faf7 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1566,7 +1566,7 @@ static void migrate_timers(int cpu)
BUG_ON(cpu_online(cpu));
old_base = per_cpu_ptr(&tvec_bases, cpu);
- new_base = this_cpu_ptr(&tvec_bases);
+ new_base = get_cpu_ptr(&tvec_bases);
/*
* The caller is globally serialized and nobody else
* takes two locks at once, deadlock is not possible.
@@ -1590,6 +1590,7 @@ static void migrate_timers(int cpu)
spin_unlock(&old_base->lock);
spin_unlock_irq(&new_base->lock);
+ put_cpu_ptr(&tvec_bases);
}
static int timer_cpu_notify(struct notifier_block *self,
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 5243d4b03087..4c4f06176f74 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -285,12 +285,7 @@ static bool wq_disable_numa;
module_param_named(disable_numa, wq_disable_numa, bool, 0444);
/* see the comment above the definition of WQ_POWER_EFFICIENT */
-#ifdef CONFIG_WQ_POWER_EFFICIENT_DEFAULT
-static bool wq_power_efficient = true;
-#else
-static bool wq_power_efficient;
-#endif
-
+static bool wq_power_efficient = IS_ENABLED(CONFIG_WQ_POWER_EFFICIENT_DEFAULT);
module_param_named(power_efficient, wq_power_efficient, bool, 0444);
static bool wq_numa_enabled; /* unbound NUMA affinity enabled */
diff --git a/lib/bug.c b/lib/bug.c
index 0c3bd9552b6f..cff145f032a5 100644
--- a/lib/bug.c
+++ b/lib/bug.c
@@ -66,7 +66,7 @@ static const struct bug_entry *module_find_bug(unsigned long bugaddr)
struct module *mod;
const struct bug_entry *bug = NULL;
- rcu_read_lock();
+ rcu_read_lock_sched();
list_for_each_entry_rcu(mod, &module_bug_list, bug_list) {
unsigned i;
@@ -77,7 +77,7 @@ static const struct bug_entry *module_find_bug(unsigned long bugaddr)
}
bug = NULL;
out:
- rcu_read_unlock();
+ rcu_read_unlock_sched();
return bug;
}
@@ -88,6 +88,8 @@ void module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
char *secstrings;
unsigned int i;
+ lockdep_assert_held(&module_mutex);
+
mod->bug_table = NULL;
mod->num_bugs = 0;
@@ -113,6 +115,7 @@ void module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
void module_bug_cleanup(struct module *mod)
{
+ lockdep_assert_held(&module_mutex);
list_del_rcu(&mod->bug_list);
}
diff --git a/lib/kobject.c b/lib/kobject.c
index 75ee63834fd1..2e3bd01964a9 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -545,6 +545,7 @@ out:
kfree(devpath);
return error;
}
+EXPORT_SYMBOL_GPL(kobject_move);
/**
* kobject_del - unlink kobject from hierarchy.
diff --git a/lib/rbtree.c b/lib/rbtree.c
index c16c81a3d430..1356454e36de 100644
--- a/lib/rbtree.c
+++ b/lib/rbtree.c
@@ -44,6 +44,30 @@
* parentheses and have some accompanying text comment.
*/
+/*
+ * Notes on lockless lookups:
+ *
+ * All stores to the tree structure (rb_left and rb_right) must be done using
+ * WRITE_ONCE(). And we must not inadvertently cause (temporary) loops in the
+ * tree structure as seen in program order.
+ *
+ * These two requirements will allow lockless iteration of the tree -- not
+ * correct iteration mind you, tree rotations are not atomic so a lookup might
+ * miss entire subtrees.
+ *
+ * But they do guarantee that any such traversal will only see valid elements
+ * and that it will indeed complete -- does not get stuck in a loop.
+ *
+ * It also guarantees that if the lookup returns an element it is the 'correct'
+ * one. But not returning an element does _NOT_ mean it's not present.
+ *
+ * NOTE:
+ *
+ * Stores to __rb_parent_color are not important for simple lookups so those
+ * are left undone as of now. Nor did I check for loops involving parent
+ * pointers.
+ */
+
static inline void rb_set_black(struct rb_node *rb)
{
rb->__rb_parent_color |= RB_BLACK;
@@ -129,8 +153,9 @@ __rb_insert(struct rb_node *node, struct rb_root *root,
* This still leaves us in violation of 4), the
* continuation into Case 3 will fix that.
*/
- parent->rb_right = tmp = node->rb_left;
- node->rb_left = parent;
+ tmp = node->rb_left;
+ WRITE_ONCE(parent->rb_right, tmp);
+ WRITE_ONCE(node->rb_left, parent);
if (tmp)
rb_set_parent_color(tmp, parent,
RB_BLACK);
@@ -149,8 +174,8 @@ __rb_insert(struct rb_node *node, struct rb_root *root,
* / \
* n U
*/
- gparent->rb_left = tmp; /* == parent->rb_right */
- parent->rb_right = gparent;
+ WRITE_ONCE(gparent->rb_left, tmp); /* == parent->rb_right */
+ WRITE_ONCE(parent->rb_right, gparent);
if (tmp)
rb_set_parent_color(tmp, gparent, RB_BLACK);
__rb_rotate_set_parents(gparent, parent, root, RB_RED);
@@ -171,8 +196,9 @@ __rb_insert(struct rb_node *node, struct rb_root *root,
tmp = parent->rb_left;
if (node == tmp) {
/* Case 2 - right rotate at parent */
- parent->rb_left = tmp = node->rb_right;
- node->rb_right = parent;
+ tmp = node->rb_right;
+ WRITE_ONCE(parent->rb_left, tmp);
+ WRITE_ONCE(node->rb_right, parent);
if (tmp)
rb_set_parent_color(tmp, parent,
RB_BLACK);
@@ -183,8 +209,8 @@ __rb_insert(struct rb_node *node, struct rb_root *root,
}
/* Case 3 - left rotate at gparent */
- gparent->rb_right = tmp; /* == parent->rb_left */
- parent->rb_left = gparent;
+ WRITE_ONCE(gparent->rb_right, tmp); /* == parent->rb_left */
+ WRITE_ONCE(parent->rb_left, gparent);
if (tmp)
rb_set_parent_color(tmp, gparent, RB_BLACK);
__rb_rotate_set_parents(gparent, parent, root, RB_RED);
@@ -224,8 +250,9 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root,
* / \ / \
* Sl Sr N Sl
*/
- parent->rb_right = tmp1 = sibling->rb_left;
- sibling->rb_left = parent;
+ tmp1 = sibling->rb_left;
+ WRITE_ONCE(parent->rb_right, tmp1);
+ WRITE_ONCE(sibling->rb_left, parent);
rb_set_parent_color(tmp1, parent, RB_BLACK);
__rb_rotate_set_parents(parent, sibling, root,
RB_RED);
@@ -275,9 +302,10 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root,
* \
* Sr
*/
- sibling->rb_left = tmp1 = tmp2->rb_right;
- tmp2->rb_right = sibling;
- parent->rb_right = tmp2;
+ tmp1 = tmp2->rb_right;
+ WRITE_ONCE(sibling->rb_left, tmp1);
+ WRITE_ONCE(tmp2->rb_right, sibling);
+ WRITE_ONCE(parent->rb_right, tmp2);
if (tmp1)
rb_set_parent_color(tmp1, sibling,
RB_BLACK);
@@ -297,8 +325,9 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root,
* / \ / \
* (sl) sr N (sl)
*/
- parent->rb_right = tmp2 = sibling->rb_left;
- sibling->rb_left = parent;
+ tmp2 = sibling->rb_left;
+ WRITE_ONCE(parent->rb_right, tmp2);
+ WRITE_ONCE(sibling->rb_left, parent);
rb_set_parent_color(tmp1, sibling, RB_BLACK);
if (tmp2)
rb_set_parent(tmp2, parent);
@@ -310,8 +339,9 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root,
sibling = parent->rb_left;
if (rb_is_red(sibling)) {
/* Case 1 - right rotate at parent */
- parent->rb_left = tmp1 = sibling->rb_right;
- sibling->rb_right = parent;
+ tmp1 = sibling->rb_right;
+ WRITE_ONCE(parent->rb_left, tmp1);
+ WRITE_ONCE(sibling->rb_right, parent);
rb_set_parent_color(tmp1, parent, RB_BLACK);
__rb_rotate_set_parents(parent, sibling, root,
RB_RED);
@@ -336,9 +366,10 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root,
break;
}
/* Case 3 - right rotate at sibling */
- sibling->rb_right = tmp1 = tmp2->rb_left;
- tmp2->rb_left = sibling;
- parent->rb_left = tmp2;
+ tmp1 = tmp2->rb_left;
+ WRITE_ONCE(sibling->rb_right, tmp1);
+ WRITE_ONCE(tmp2->rb_left, sibling);
+ WRITE_ONCE(parent->rb_left, tmp2);
if (tmp1)
rb_set_parent_color(tmp1, sibling,
RB_BLACK);
@@ -347,8 +378,9 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root,
sibling = tmp2;
}
/* Case 4 - left rotate at parent + color flips */
- parent->rb_left = tmp2 = sibling->rb_right;
- sibling->rb_right = parent;
+ tmp2 = sibling->rb_right;
+ WRITE_ONCE(parent->rb_left, tmp2);
+ WRITE_ONCE(sibling->rb_right, parent);
rb_set_parent_color(tmp1, sibling, RB_BLACK);
if (tmp2)
rb_set_parent(tmp2, parent);
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 983b78694c46..3e5f8f29c286 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -855,7 +855,7 @@ void __init setup_kmalloc_cache_index_table(void)
}
}
-static void new_kmalloc_cache(int idx, unsigned long flags)
+static void __init new_kmalloc_cache(int idx, unsigned long flags)
{
kmalloc_caches[idx] = create_kmalloc_cache(kmalloc_info[idx].name,
kmalloc_info[idx].size, flags);
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 9c891d0412a2..ae3a47f9d1d5 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -57,7 +57,7 @@ static const struct proto_ops ax25_proto_ops;
static void ax25_free_sock(struct sock *sk)
{
- ax25_cb_put(ax25_sk(sk));
+ ax25_cb_put(sk_to_ax25(sk));
}
/*
@@ -306,7 +306,7 @@ void ax25_destroy_socket(ax25_cb *ax25)
while ((skb = skb_dequeue(&ax25->sk->sk_receive_queue)) != NULL) {
if (skb->sk != ax25->sk) {
/* A pending connection */
- ax25_cb *sax25 = ax25_sk(skb->sk);
+ ax25_cb *sax25 = sk_to_ax25(skb->sk);
/* Queue the unaccepted socket for death */
sock_orphan(skb->sk);
@@ -551,7 +551,7 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname,
return -EFAULT;
lock_sock(sk);
- ax25 = ax25_sk(sk);
+ ax25 = sk_to_ax25(sk);
switch (optname) {
case AX25_WINDOW:
@@ -697,7 +697,7 @@ static int ax25_getsockopt(struct socket *sock, int level, int optname,
length = min_t(unsigned int, maxlen, sizeof(int));
lock_sock(sk);
- ax25 = ax25_sk(sk);
+ ax25 = sk_to_ax25(sk);
switch (optname) {
case AX25_WINDOW:
@@ -796,7 +796,7 @@ out:
static struct proto ax25_proto = {
.name = "AX25",
.owner = THIS_MODULE,
- .obj_size = sizeof(struct sock),
+ .obj_size = sizeof(struct ax25_sock),
};
static int ax25_create(struct net *net, struct socket *sock, int protocol,
@@ -858,7 +858,7 @@ static int ax25_create(struct net *net, struct socket *sock, int protocol,
if (sk == NULL)
return -ENOMEM;
- ax25 = sk->sk_protinfo = ax25_create_cb();
+ ax25 = ax25_sk(sk)->cb = ax25_create_cb();
if (!ax25) {
sk_free(sk);
return -ENOMEM;
@@ -910,7 +910,7 @@ struct sock *ax25_make_new(struct sock *osk, struct ax25_dev *ax25_dev)
sk->sk_state = TCP_ESTABLISHED;
sock_copy_flags(sk, osk);
- oax25 = ax25_sk(osk);
+ oax25 = sk_to_ax25(osk);
ax25->modulus = oax25->modulus;
ax25->backoff = oax25->backoff;
@@ -938,7 +938,7 @@ struct sock *ax25_make_new(struct sock *osk, struct ax25_dev *ax25_dev)
}
}
- sk->sk_protinfo = ax25;
+ ax25_sk(sk)->cb = ax25;
sk->sk_destruct = ax25_free_sock;
ax25->sk = sk;
@@ -956,7 +956,7 @@ static int ax25_release(struct socket *sock)
sock_hold(sk);
sock_orphan(sk);
lock_sock(sk);
- ax25 = ax25_sk(sk);
+ ax25 = sk_to_ax25(sk);
if (sk->sk_type == SOCK_SEQPACKET) {
switch (ax25->state) {
@@ -1066,7 +1066,7 @@ static int ax25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
lock_sock(sk);
- ax25 = ax25_sk(sk);
+ ax25 = sk_to_ax25(sk);
if (!sock_flag(sk, SOCK_ZAPPED)) {
err = -EINVAL;
goto out;
@@ -1113,7 +1113,7 @@ static int __must_check ax25_connect(struct socket *sock,
struct sockaddr *uaddr, int addr_len, int flags)
{
struct sock *sk = sock->sk;
- ax25_cb *ax25 = ax25_sk(sk), *ax25t;
+ ax25_cb *ax25 = sk_to_ax25(sk), *ax25t;
struct full_sockaddr_ax25 *fsa = (struct full_sockaddr_ax25 *)uaddr;
ax25_digi *digi = NULL;
int ct = 0, err = 0;
@@ -1394,7 +1394,7 @@ static int ax25_getname(struct socket *sock, struct sockaddr *uaddr,
memset(fsa, 0, sizeof(*fsa));
lock_sock(sk);
- ax25 = ax25_sk(sk);
+ ax25 = sk_to_ax25(sk);
if (peer != 0) {
if (sk->sk_state != TCP_ESTABLISHED) {
@@ -1446,7 +1446,7 @@ static int ax25_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
return -EINVAL;
lock_sock(sk);
- ax25 = ax25_sk(sk);
+ ax25 = sk_to_ax25(sk);
if (sock_flag(sk, SOCK_ZAPPED)) {
err = -EADDRNOTAVAIL;
@@ -1621,7 +1621,7 @@ static int ax25_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
if (skb == NULL)
goto out;
- if (!ax25_sk(sk)->pidincl)
+ if (!sk_to_ax25(sk)->pidincl)
skb_pull(skb, 1); /* Remove PID */
skb_reset_transport_header(skb);
@@ -1762,7 +1762,7 @@ static int ax25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
case SIOCAX25GETINFO:
case SIOCAX25GETINFOOLD: {
- ax25_cb *ax25 = ax25_sk(sk);
+ ax25_cb *ax25 = sk_to_ax25(sk);
struct ax25_info_struct ax25_info;
ax25_info.t1 = ax25->t1 / HZ;
diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c
index 29a3687237aa..bb5a0e4e98d9 100644
--- a/net/ax25/ax25_in.c
+++ b/net/ax25/ax25_in.c
@@ -353,7 +353,7 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev,
return 0;
}
- ax25 = ax25_sk(make);
+ ax25 = sk_to_ax25(make);
skb_set_owner_r(skb, make);
skb_queue_head(&sk->sk_receive_queue, skb);
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 476e5dda59e1..2a834c6179b9 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -129,7 +129,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
struct flow_dissector_key_ports *key_ports;
struct flow_dissector_key_tags *key_tags;
struct flow_dissector_key_keyid *key_keyid;
- u8 ip_proto;
+ u8 ip_proto = 0;
if (!data) {
data = skb->data;
diff --git a/net/core/sock.c b/net/core/sock.c
index 1e1fe9a68d83..08f16db46070 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1454,7 +1454,7 @@ void sk_destruct(struct sock *sk)
static void __sk_free(struct sock *sk)
{
- if (unlikely(sock_diag_has_destroy_listeners(sk)))
+ if (unlikely(sock_diag_has_destroy_listeners(sk) && sk->sk_net_refcnt))
sock_diag_broadcast_destroy(sk);
else
sk_destruct(sk);
@@ -2269,7 +2269,6 @@ static void sock_def_write_space(struct sock *sk)
static void sock_def_destruct(struct sock *sk)
{
- kfree(sk->sk_protinfo);
}
void sk_send_sigurg(struct sock *sk)
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 04ffad311704..0917123790ea 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -112,7 +112,7 @@ static int dsa_slave_open(struct net_device *dev)
clear_promisc:
if (dev->flags & IFF_PROMISC)
- dev_set_promiscuity(master, 0);
+ dev_set_promiscuity(master, -1);
clear_allmulti:
if (dev->flags & IFF_ALLMULTI)
dev_set_allmulti(master, -1);
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 3bfccd83551c..c7358ea4ae93 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1045,7 +1045,7 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
nla_put_u32(skb, RTA_OIF, fi->fib_nh->nh_oif))
goto nla_put_failure;
if (fi->fib_nh->nh_flags & RTNH_F_LINKDOWN) {
- in_dev = __in_dev_get_rcu(fi->fib_nh->nh_dev);
+ in_dev = __in_dev_get_rtnl(fi->fib_nh->nh_dev);
if (in_dev &&
IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev))
rtm->rtm_flags |= RTNH_F_DEAD;
@@ -1074,7 +1074,7 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
rtnh->rtnh_flags = nh->nh_flags & 0xFF;
if (nh->nh_flags & RTNH_F_LINKDOWN) {
- in_dev = __in_dev_get_rcu(nh->nh_dev);
+ in_dev = __in_dev_get_rtnl(nh->nh_dev);
if (in_dev &&
IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev))
rtnh->rtnh_flags |= RTNH_F_DEAD;
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index 36ba7c4f0283..fda33f961d83 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -103,7 +103,7 @@ ieee80211_rate_control_ops_get(const char *name)
const struct rate_control_ops *ops;
const char *alg_name;
- kparam_block_sysfs_write(ieee80211_default_rc_algo);
+ kernel_param_lock(THIS_MODULE);
if (!name)
alg_name = ieee80211_default_rc_algo;
else
@@ -117,7 +117,7 @@ ieee80211_rate_control_ops_get(const char *name)
/* try built-in one if specific alg requested but not found */
if (!ops && strlen(CONFIG_MAC80211_RC_DEFAULT))
ops = ieee80211_try_rate_control_ops_get(CONFIG_MAC80211_RC_DEFAULT);
- kparam_unblock_sysfs_write(ieee80211_default_rc_algo);
+ kernel_param_unlock(THIS_MODULE);
return ops;
}
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index b92d3f49c23e..9d37ccd95062 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -216,8 +216,8 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
[TCA_FLOWER_KEY_IPV6_DST_MASK] = { .len = sizeof(struct in6_addr) },
[TCA_FLOWER_KEY_TCP_SRC] = { .type = NLA_U16 },
[TCA_FLOWER_KEY_TCP_DST] = { .type = NLA_U16 },
- [TCA_FLOWER_KEY_TCP_SRC] = { .type = NLA_U16 },
- [TCA_FLOWER_KEY_TCP_DST] = { .type = NLA_U16 },
+ [TCA_FLOWER_KEY_UDP_SRC] = { .type = NLA_U16 },
+ [TCA_FLOWER_KEY_UDP_DST] = { .type = NLA_U16 },
};
static void fl_set_key_val(struct nlattr **tb,
diff --git a/net/sctp/output.c b/net/sctp/output.c
index fc5e45b8a832..abe7c2db2412 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -599,7 +599,9 @@ out:
return err;
no_route:
kfree_skb(nskb);
- IP_INC_STATS(sock_net(asoc->base.sk), IPSTATS_MIB_OUTNOROUTES);
+
+ if (asoc)
+ IP_INC_STATS(sock_net(asoc->base.sk), IPSTATS_MIB_OUTNOROUTES);
/* FIXME: Returning the 'err' will effect all the associations
* associated with a socket, although only one of the paths of the
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 5f6c4e61325b..1425ec2bbd5a 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -2121,12 +2121,6 @@ static int sctp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
if (sp->subscribe.sctp_data_io_event)
sctp_ulpevent_read_sndrcvinfo(event, msg);
-#if 0
- /* FIXME: we should be calling IP/IPv6 layers. */
- if (sk->sk_protinfo.af_inet.cmsg_flags)
- ip_cmsg_recv(msg, skb);
-#endif
-
err = copied;
/* If skb's length exceeds the user's buffer, update the skb and
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 47f38be4155f..02f53674dc39 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -72,7 +72,7 @@ static int param_get_hashtbl_sz(char *buffer, const struct kernel_param *kp)
#define param_check_hashtbl_sz(name, p) __param_check(name, p, unsigned int);
-static struct kernel_param_ops param_ops_hashtbl_sz = {
+static const struct kernel_param_ops param_ops_hashtbl_sz = {
.set = param_set_hashtbl_sz,
.get = param_get_hashtbl_sz,
};
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 66891e32c5e3..b0517287075b 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2982,7 +2982,7 @@ static int param_set_portnr(const char *val, const struct kernel_param *kp)
RPC_MAX_RESVPORT);
}
-static struct kernel_param_ops param_ops_portnr = {
+static const struct kernel_param_ops param_ops_portnr = {
.set = param_set_portnr,
.get = param_get_uint,
};
@@ -3001,7 +3001,7 @@ static int param_set_slot_table_size(const char *val,
RPC_MAX_SLOT_TABLE);
}
-static struct kernel_param_ops param_ops_slot_table_size = {
+static const struct kernel_param_ops param_ops_slot_table_size = {
.set = param_set_slot_table_size,
.get = param_get_uint,
};
@@ -3017,7 +3017,7 @@ static int param_set_max_slot_table_size(const char *val,
RPC_MAX_SLOT_TABLE_LIMIT);
}
-static struct kernel_param_ops param_ops_max_slot_table_size = {
+static const struct kernel_param_ops param_ops_max_slot_table_size = {
.set = param_set_max_slot_table_size,
.get = param_get_uint,
};
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 4906ca3c0f3a..a816382fc8af 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -108,6 +108,11 @@ void tipc_bclink_remove_node(struct net *net, u32 addr)
tipc_bclink_lock(net);
tipc_nmap_remove(&tn->bclink->bcast_nodes, addr);
+
+ /* Last node? => reset backlog queue */
+ if (!tn->bclink->bcast_nodes.count)
+ tipc_link_purge_backlog(&tn->bclink->link);
+
tipc_bclink_unlock(net);
}
diff --git a/net/tipc/link.c b/net/tipc/link.c
index ca8b8e0f49b5..eaa9fe54b4ae 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -404,7 +404,7 @@ void tipc_link_reset_fragments(struct tipc_link *l_ptr)
l_ptr->reasm_buf = NULL;
}
-static void tipc_link_purge_backlog(struct tipc_link *l)
+void tipc_link_purge_backlog(struct tipc_link *l)
{
__skb_queue_purge(&l->backlogq);
l->backlog[TIPC_LOW_IMPORTANCE].len = 0;
diff --git a/net/tipc/link.h b/net/tipc/link.h
index 0c02c973e985..ae0a0ea572f2 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -218,6 +218,7 @@ void tipc_link_reset_fragments(struct tipc_link *l_ptr);
int tipc_link_is_up(struct tipc_link *l_ptr);
int tipc_link_is_active(struct tipc_link *l_ptr);
void tipc_link_purge_queues(struct tipc_link *l_ptr);
+void tipc_link_purge_backlog(struct tipc_link *l);
void tipc_link_reset_all(struct tipc_node *node);
void tipc_link_reset(struct tipc_link *l_ptr);
int tipc_link_xmit_skb(struct net *net, struct sk_buff *skb, u32 dest,
diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile
index d9b1fef0c67e..f52abae0ec5f 100644
--- a/scripts/kconfig/Makefile
+++ b/scripts/kconfig/Makefile
@@ -115,6 +115,10 @@ PHONY += kvmconfig
kvmconfig: kvm_guest.config
@:
+PHONY += xenconfig
+xenconfig: xen.config
+ @:
+
PHONY += tinyconfig
tinyconfig:
$(Q)$(MAKE) -f $(srctree)/Makefile allnoconfig tiny.config
@@ -139,7 +143,8 @@ help:
@echo ' randconfig - New config with random answer to all options'
@echo ' listnewconfig - List new options'
@echo ' olddefconfig - Same as silentoldconfig but sets new symbols to their default value'
- @echo ' kvmconfig - Enable additional options for guest kernel support'
+ @echo ' kvmconfig - Enable additional options for kvm guest kernel support'
+ @echo ' xenconfig - Enable additional options for xen dom0 and guest kernel support'
@echo ' tinyconfig - Configure the tiniest possible kernel'
# lxdialog stuff
diff --git a/scripts/sortextable.c b/scripts/sortextable.c
index 1052d4834a44..c2423d913b46 100644
--- a/scripts/sortextable.c
+++ b/scripts/sortextable.c
@@ -47,6 +47,10 @@
#define EM_MICROBLAZE 189
#endif
+#ifndef EM_ARCV2
+#define EM_ARCV2 195
+#endif
+
static int fd_map; /* File descriptor for file being modified. */
static int mmap_failed; /* Boolean flag. */
static void *ehdr_curr; /* current ElfXX_Ehdr * for resource cleanup */
@@ -281,6 +285,7 @@ do_file(char const *const fname)
custom_sort = sort_relative_table;
break;
case EM_ARCOMPACT:
+ case EM_ARCV2:
case EM_ARM:
case EM_AARCH64:
case EM_MICROBLAZE:
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index 5696874e8062..dec607c17b64 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -654,7 +654,7 @@ static struct security_hook_list apparmor_hooks[] = {
static int param_set_aabool(const char *val, const struct kernel_param *kp);
static int param_get_aabool(char *buffer, const struct kernel_param *kp);
#define param_check_aabool param_check_bool
-static struct kernel_param_ops param_ops_aabool = {
+static const struct kernel_param_ops param_ops_aabool = {
.flags = KERNEL_PARAM_OPS_FL_NOARG,
.set = param_set_aabool,
.get = param_get_aabool
@@ -663,7 +663,7 @@ static struct kernel_param_ops param_ops_aabool = {
static int param_set_aauint(const char *val, const struct kernel_param *kp);
static int param_get_aauint(char *buffer, const struct kernel_param *kp);
#define param_check_aauint param_check_uint
-static struct kernel_param_ops param_ops_aauint = {
+static const struct kernel_param_ops param_ops_aauint = {
.set = param_set_aauint,
.get = param_get_aauint
};
@@ -671,7 +671,7 @@ static struct kernel_param_ops param_ops_aauint = {
static int param_set_aalockpolicy(const char *val, const struct kernel_param *kp);
static int param_get_aalockpolicy(char *buffer, const struct kernel_param *kp);
#define param_check_aalockpolicy param_check_bool
-static struct kernel_param_ops param_ops_aalockpolicy = {
+static const struct kernel_param_ops param_ops_aalockpolicy = {
.flags = KERNEL_PARAM_OPS_FL_NOARG,
.set = param_set_aalockpolicy,
.get = param_get_aalockpolicy
diff --git a/security/integrity/ima/ima_crypto.c b/security/integrity/ima/ima_crypto.c
index 686355fea7fd..e24121afb2f2 100644
--- a/security/integrity/ima/ima_crypto.c
+++ b/security/integrity/ima/ima_crypto.c
@@ -55,7 +55,7 @@ static int param_set_bufsize(const char *val, const struct kernel_param *kp)
return 0;
}
-static struct kernel_param_ops param_ops_bufsize = {
+static const struct kernel_param_ops param_ops_bufsize = {
.set = param_set_bufsize,
.get = param_get_uint,
};
diff --git a/sound/core/ctljack.c b/sound/core/ctljack.c
index 9149a4aefa95..84a3cd683068 100644
--- a/sound/core/ctljack.c
+++ b/sound/core/ctljack.c
@@ -41,8 +41,11 @@ static int get_available_index(struct snd_card *card, const char *name)
sid.iface = SNDRV_CTL_ELEM_IFACE_CARD;
strlcpy(sid.name, name, sizeof(sid.name));
- while (snd_ctl_find_id(card, &sid))
+ while (snd_ctl_find_id(card, &sid)) {
sid.index++;
+ /* reset numid; otherwise snd_ctl_find_id() hits this again */
+ sid.numid = 0;
+ }
return sid.index;
}
diff --git a/sound/core/init.c b/sound/core/init.c
index 3e0cebacefe1..20f37fb3800e 100644
--- a/sound/core/init.c
+++ b/sound/core/init.c
@@ -109,13 +109,12 @@ static void snd_card_id_read(struct snd_info_entry *entry,
static int init_info_for_card(struct snd_card *card)
{
- int err;
struct snd_info_entry *entry;
entry = snd_info_create_card_entry(card, "id", card->proc_root);
if (!entry) {
dev_dbg(card->dev, "unable to create card entry\n");
- return err;
+ return -ENOMEM;
}
entry->c.text.read = snd_card_id_read;
card->proc_id = entry;
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 7dea7987d2af..745535d1840a 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -171,7 +171,7 @@ MODULE_PARM_DESC(beep_mode, "Select HDA Beep registration mode "
#ifdef CONFIG_PM
static int param_set_xint(const char *val, const struct kernel_param *kp);
-static struct kernel_param_ops param_ops_xint = {
+static const struct kernel_param_ops param_ops_xint = {
.set = param_set_xint,
.get = param_get_int,
};
@@ -2180,6 +2180,8 @@ static const struct pci_device_id azx_ids[] = {
{ PCI_DEVICE(0x1022, 0x780d),
.driver_data = AZX_DRIVER_GENERIC | AZX_DCAPS_PRESET_ATI_SB },
/* ATI HDMI */
+ { PCI_DEVICE(0x1002, 0x1308),
+ .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS },
{ PCI_DEVICE(0x1002, 0x793b),
.driver_data = AZX_DRIVER_ATIHDMI | AZX_DCAPS_PRESET_ATI_HDMI },
{ PCI_DEVICE(0x1002, 0x7919),
@@ -2188,6 +2190,8 @@ static const struct pci_device_id azx_ids[] = {
.driver_data = AZX_DRIVER_ATIHDMI | AZX_DCAPS_PRESET_ATI_HDMI },
{ PCI_DEVICE(0x1002, 0x970f),
.driver_data = AZX_DRIVER_ATIHDMI | AZX_DCAPS_PRESET_ATI_HDMI },
+ { PCI_DEVICE(0x1002, 0x9840),
+ .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS },
{ PCI_DEVICE(0x1002, 0xaa00),
.driver_data = AZX_DRIVER_ATIHDMI | AZX_DCAPS_PRESET_ATI_HDMI },
{ PCI_DEVICE(0x1002, 0xaa08),
diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
index f8527342a150..2f2433845d04 100644
--- a/sound/pci/hda/patch_hdmi.c
+++ b/sound/pci/hda/patch_hdmi.c
@@ -591,7 +591,7 @@ static int eld_proc_new(struct hdmi_spec_per_pin *per_pin, int index)
static void eld_proc_free(struct hdmi_spec_per_pin *per_pin)
{
- if (!per_pin->codec->bus->shutdown && per_pin->proc_entry) {
+ if (!per_pin->codec->bus->shutdown) {
snd_info_free_entry(per_pin->proc_entry);
per_pin->proc_entry = NULL;
}
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 431a20b17df4..b3b44681d3cf 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -4464,6 +4464,7 @@ enum {
ALC269_FIXUP_LIFEBOOK,
ALC269_FIXUP_LIFEBOOK_EXTMIC,
ALC269_FIXUP_LIFEBOOK_HP_PIN,
+ ALC269_FIXUP_LIFEBOOK_NO_HP_TO_LINEOUT,
ALC269_FIXUP_AMIC,
ALC269_FIXUP_DMIC,
ALC269VB_FIXUP_AMIC,
@@ -4484,6 +4485,7 @@ enum {
ALC269_FIXUP_DELL3_MIC_NO_PRESENCE,
ALC269_FIXUP_HEADSET_MODE,
ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC,
+ ALC269_FIXUP_ASPIRE_HEADSET_MIC,
ALC269_FIXUP_ASUS_X101_FUNC,
ALC269_FIXUP_ASUS_X101_VERB,
ALC269_FIXUP_ASUS_X101,
@@ -4511,6 +4513,7 @@ enum {
ALC255_FIXUP_HEADSET_MODE_NO_HP_MIC,
ALC293_FIXUP_DELL1_MIC_NO_PRESENCE,
ALC292_FIXUP_TPT440_DOCK,
+ ALC292_FIXUP_TPT440_DOCK2,
ALC283_FIXUP_BXBT2807_MIC,
ALC255_FIXUP_DELL_WMI_MIC_MUTE_LED,
ALC282_FIXUP_ASPIRE_V5_PINS,
@@ -4521,6 +4524,8 @@ enum {
ALC288_FIXUP_DELL_HEADSET_MODE,
ALC288_FIXUP_DELL1_MIC_NO_PRESENCE,
ALC288_FIXUP_DELL_XPS_13_GPIO6,
+ ALC288_FIXUP_DELL_XPS_13,
+ ALC288_FIXUP_DISABLE_AAMIX,
ALC292_FIXUP_DELL_E7X,
ALC292_FIXUP_DISABLE_AAMIX,
ALC298_FIXUP_DELL1_MIC_NO_PRESENCE,
@@ -4630,6 +4635,10 @@ static const struct hda_fixup alc269_fixups[] = {
{ }
},
},
+ [ALC269_FIXUP_LIFEBOOK_NO_HP_TO_LINEOUT] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc269_fixup_pincfg_no_hp_to_lineout,
+ },
[ALC269_FIXUP_AMIC] = {
.type = HDA_FIXUP_PINS,
.v.pins = (const struct hda_pintbl[]) {
@@ -4758,6 +4767,15 @@ static const struct hda_fixup alc269_fixups[] = {
.type = HDA_FIXUP_FUNC,
.v.func = alc_fixup_headset_mode_no_hp_mic,
},
+ [ALC269_FIXUP_ASPIRE_HEADSET_MIC] = {
+ .type = HDA_FIXUP_PINS,
+ .v.pins = (const struct hda_pintbl[]) {
+ { 0x19, 0x01a1913c }, /* headset mic w/o jack detect */
+ { }
+ },
+ .chained = true,
+ .chain_id = ALC269_FIXUP_HEADSET_MODE,
+ },
[ALC286_FIXUP_SONY_MIC_NO_PRESENCE] = {
.type = HDA_FIXUP_PINS,
.v.pins = (const struct hda_pintbl[]) {
@@ -4960,6 +4978,12 @@ static const struct hda_fixup alc269_fixups[] = {
.chain_id = ALC269_FIXUP_HEADSET_MODE
},
[ALC292_FIXUP_TPT440_DOCK] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc269_fixup_pincfg_no_hp_to_lineout,
+ .chained = true,
+ .chain_id = ALC292_FIXUP_TPT440_DOCK2
+ },
+ [ALC292_FIXUP_TPT440_DOCK2] = {
.type = HDA_FIXUP_PINS,
.v.pins = (const struct hda_pintbl[]) {
{ 0x16, 0x21211010 }, /* dock headphone */
@@ -5046,9 +5070,23 @@ static const struct hda_fixup alc269_fixups[] = {
.chained = true,
.chain_id = ALC288_FIXUP_DELL1_MIC_NO_PRESENCE
},
+ [ALC288_FIXUP_DISABLE_AAMIX] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc_fixup_disable_aamix,
+ .chained = true,
+ .chain_id = ALC288_FIXUP_DELL_XPS_13_GPIO6
+ },
+ [ALC288_FIXUP_DELL_XPS_13] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc_fixup_dell_xps13,
+ .chained = true,
+ .chain_id = ALC288_FIXUP_DISABLE_AAMIX
+ },
[ALC292_FIXUP_DISABLE_AAMIX] = {
.type = HDA_FIXUP_FUNC,
.v.func = alc_fixup_disable_aamix,
+ .chained = true,
+ .chain_id = ALC269_FIXUP_DELL2_MIC_NO_PRESENCE
},
[ALC292_FIXUP_DELL_E7X] = {
.type = HDA_FIXUP_FUNC,
@@ -5073,6 +5111,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1025, 0x029b, "Acer 1810TZ", ALC269_FIXUP_INV_DMIC),
SND_PCI_QUIRK(0x1025, 0x0349, "Acer AOD260", ALC269_FIXUP_INV_DMIC),
SND_PCI_QUIRK(0x1025, 0x047c, "Acer AC700", ALC269_FIXUP_ACER_AC700),
+ SND_PCI_QUIRK(0x1025, 0x072d, "Acer Aspire V5-571G", ALC269_FIXUP_ASPIRE_HEADSET_MIC),
+ SND_PCI_QUIRK(0x1025, 0x080d, "Acer Aspire V5-122P", ALC269_FIXUP_ASPIRE_HEADSET_MIC),
SND_PCI_QUIRK(0x1025, 0x0740, "Acer AO725", ALC271_FIXUP_HP_GATE_MIC_JACK),
SND_PCI_QUIRK(0x1025, 0x0742, "Acer AO756", ALC271_FIXUP_HP_GATE_MIC_JACK),
SND_PCI_QUIRK(0x1025, 0x0775, "Acer Aspire E1-572", ALC271_FIXUP_HP_GATE_MIC_JACK_E1_572),
@@ -5086,10 +5126,11 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1028, 0x05f6, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1028, 0x0615, "Dell Vostro 5470", ALC290_FIXUP_SUBWOOFER_HSJACK),
SND_PCI_QUIRK(0x1028, 0x0616, "Dell Vostro 5470", ALC290_FIXUP_SUBWOOFER_HSJACK),
+ SND_PCI_QUIRK(0x1028, 0x062e, "Dell Latitude E7450", ALC292_FIXUP_DELL_E7X),
SND_PCI_QUIRK(0x1028, 0x0638, "Dell Inspiron 5439", ALC290_FIXUP_MONO_SPEAKERS_HSJACK),
SND_PCI_QUIRK(0x1028, 0x064a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1028, 0x064b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
- SND_PCI_QUIRK(0x1028, 0x0665, "Dell XPS 13", ALC292_FIXUP_DELL_E7X),
+ SND_PCI_QUIRK(0x1028, 0x0665, "Dell XPS 13", ALC288_FIXUP_DELL_XPS_13),
SND_PCI_QUIRK(0x1028, 0x06c7, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1028, 0x06d9, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1028, 0x06da, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
@@ -5173,6 +5214,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x104d, 0x9084, "Sony VAIO", ALC275_FIXUP_SONY_HWEQ),
SND_PCI_QUIRK(0x104d, 0x9099, "Sony VAIO S13", ALC275_FIXUP_SONY_DISABLE_AAMIX),
SND_PCI_QUIRK(0x10cf, 0x1475, "Lifebook", ALC269_FIXUP_LIFEBOOK),
+ SND_PCI_QUIRK(0x10cf, 0x159f, "Lifebook E780", ALC269_FIXUP_LIFEBOOK_NO_HP_TO_LINEOUT),
SND_PCI_QUIRK(0x10cf, 0x15dc, "Lifebook T731", ALC269_FIXUP_LIFEBOOK_HP_PIN),
SND_PCI_QUIRK(0x10cf, 0x1757, "Lifebook E752", ALC269_FIXUP_LIFEBOOK_HP_PIN),
SND_PCI_QUIRK(0x10cf, 0x1845, "Lifebook U904", ALC269_FIXUP_LIFEBOOK_EXTMIC),
diff --git a/sound/pci/hda/patch_via.c b/sound/pci/hda/patch_via.c
index 0521be8d46a8..da5366405eda 100644
--- a/sound/pci/hda/patch_via.c
+++ b/sound/pci/hda/patch_via.c
@@ -241,7 +241,9 @@ static int via_pin_power_ctl_get(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
{
struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
- ucontrol->value.enumerated.item[0] = codec->power_save_node;
+ struct via_spec *spec = codec->spec;
+
+ ucontrol->value.enumerated.item[0] = spec->gen.power_down_unused;
return 0;
}
@@ -252,9 +254,9 @@ static int via_pin_power_ctl_put(struct snd_kcontrol *kcontrol,
struct via_spec *spec = codec->spec;
bool val = !!ucontrol->value.enumerated.item[0];
- if (val == codec->power_save_node)
+ if (val == spec->gen.power_down_unused)
return 0;
- codec->power_save_node = val;
+ /* codec->power_save_node = val; */ /* widget PM seems yet broken */
spec->gen.power_down_unused = val;
analog_low_current_mode(codec);
return 1;