summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.clang-format25
-rw-r--r--.mailmap1
-rw-r--r--Documentation/ABI/removed/sysfs-kernel-uids (renamed from Documentation/ABI/testing/sysfs-kernel-uids)2
-rw-r--r--Documentation/Makefile2
-rw-r--r--Documentation/PCI/pci.rst2
-rw-r--r--Documentation/accounting/psi.rst2
-rw-r--r--Documentation/admin-guide/binfmt-misc.rst4
-rw-r--r--Documentation/admin-guide/blockdev/zram.rst2
-rw-r--r--Documentation/admin-guide/bootconfig.rst2
-rw-r--r--Documentation/admin-guide/cgroup-v1/index.rst2
-rw-r--r--Documentation/admin-guide/cgroup-v2.rst28
-rw-r--r--Documentation/admin-guide/edid.rst (renamed from Documentation/driver-api/edid.rst)4
-rw-r--r--Documentation/admin-guide/hw-vuln/tsx_async_abort.rst2
-rw-r--r--Documentation/admin-guide/index.rst1
-rw-r--r--Documentation/admin-guide/iostats.rst5
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt26
-rw-r--r--Documentation/admin-guide/kernel-per-CPU-kthreads.rst2
-rw-r--r--Documentation/admin-guide/perf/imx-ddr.rst3
-rw-r--r--Documentation/admin-guide/sysctl/kernel.rst1049
-rw-r--r--Documentation/arm/tcm.rst6
-rw-r--r--Documentation/arm64/silicon-errata.rst2
-rw-r--r--Documentation/block/capability.rst16
-rw-r--r--Documentation/conf.py6
-rw-r--r--Documentation/core-api/index.rst94
-rw-r--r--Documentation/core-api/kobject.rst (renamed from Documentation/kobject.txt)78
-rw-r--r--Documentation/debugging-modules.txt22
-rw-r--r--Documentation/dev-tools/gcov.rst2
-rw-r--r--Documentation/dev-tools/kmemleak.rst3
-rw-r--r--Documentation/devicetree/bindings/crypto/allwinner,sun4i-a10-crypto.yaml2
-rw-r--r--Documentation/devicetree/bindings/edac/dmc-520.yaml59
-rw-r--r--Documentation/devicetree/bindings/input/cypress,tm2-touchkey.txt1
-rw-r--r--Documentation/devicetree/bindings/net/fsl-fman.txt7
-rw-r--r--Documentation/devicetree/bindings/vendor-prefixes.yaml2
-rw-r--r--Documentation/driver-api/80211/mac80211-advanced.rst8
-rw-r--r--Documentation/driver-api/dmaengine/index.rst4
-rw-r--r--Documentation/driver-api/dmaengine/provider.rst12
-rw-r--r--Documentation/driver-api/driver-model/driver.rst2
-rw-r--r--Documentation/driver-api/index.rst4
-rw-r--r--Documentation/driver-api/io-mapping.rst (renamed from Documentation/io-mapping.txt)0
-rw-r--r--Documentation/driver-api/io_ordering.rst (renamed from Documentation/io_ordering.txt)0
-rw-r--r--Documentation/driver-api/ioctl.rst (renamed from Documentation/core-api/ioctl.rst)0
-rw-r--r--Documentation/features/vm/pte_special/arch-support.txt2
-rw-r--r--Documentation/filesystems/9p.rst (renamed from Documentation/filesystems/9p.txt)114
-rw-r--r--Documentation/filesystems/adfs.rst (renamed from Documentation/filesystems/adfs.txt)29
-rw-r--r--Documentation/filesystems/affs.rst (renamed from Documentation/filesystems/affs.txt)62
-rw-r--r--Documentation/filesystems/afs.rst (renamed from Documentation/filesystems/afs.txt)73
-rw-r--r--Documentation/filesystems/autofs-mount-control.rst (renamed from Documentation/filesystems/autofs-mount-control.txt)108
-rw-r--r--Documentation/filesystems/befs.rst (renamed from Documentation/filesystems/befs.txt)59
-rw-r--r--Documentation/filesystems/bfs.rst (renamed from Documentation/filesystems/bfs.txt)37
-rw-r--r--Documentation/filesystems/btrfs.rst (renamed from Documentation/filesystems/btrfs.txt)3
-rw-r--r--Documentation/filesystems/ceph.rst (renamed from Documentation/filesystems/ceph.txt)26
-rw-r--r--Documentation/filesystems/cifs/cifsroot.txt2
-rw-r--r--Documentation/filesystems/cramfs.rst (renamed from Documentation/filesystems/cramfs.txt)19
-rw-r--r--Documentation/filesystems/debugfs.rst (renamed from Documentation/filesystems/debugfs.txt)54
-rw-r--r--Documentation/filesystems/dlmfs.rst (renamed from Documentation/filesystems/dlmfs.txt)28
-rw-r--r--Documentation/filesystems/ecryptfs.rst (renamed from Documentation/filesystems/ecryptfs.txt)51
-rw-r--r--Documentation/filesystems/efivarfs.rst (renamed from Documentation/filesystems/efivarfs.txt)5
-rw-r--r--Documentation/filesystems/erofs.rst (renamed from Documentation/filesystems/erofs.txt)177
-rw-r--r--Documentation/filesystems/ext2.rst (renamed from Documentation/filesystems/ext2.txt)41
-rw-r--r--Documentation/filesystems/ext3.rst (renamed from Documentation/filesystems/ext3.txt)2
-rw-r--r--Documentation/filesystems/f2fs.rst (renamed from Documentation/filesystems/f2fs.txt)254
-rw-r--r--Documentation/filesystems/fuse.rst5
-rw-r--r--Documentation/filesystems/gfs2-uevents.rst (renamed from Documentation/filesystems/gfs2-uevents.txt)20
-rw-r--r--Documentation/filesystems/gfs2.rst (renamed from Documentation/filesystems/gfs2.txt)20
-rw-r--r--Documentation/filesystems/hfs.rst (renamed from Documentation/filesystems/hfs.txt)23
-rw-r--r--Documentation/filesystems/hfsplus.rst (renamed from Documentation/filesystems/hfsplus.txt)2
-rw-r--r--Documentation/filesystems/hpfs.rst (renamed from Documentation/filesystems/hpfs.txt)239
-rw-r--r--Documentation/filesystems/index.rst47
-rw-r--r--Documentation/filesystems/inotify.rst (renamed from Documentation/filesystems/inotify.txt)33
-rw-r--r--Documentation/filesystems/isofs.rst64
-rw-r--r--Documentation/filesystems/isofs.txt48
-rw-r--r--Documentation/filesystems/nfs/index.rst13
-rw-r--r--Documentation/filesystems/nfs/knfsd-stats.rst (renamed from Documentation/filesystems/nfs/knfsd-stats.txt)17
-rw-r--r--Documentation/filesystems/nfs/nfs41-server.rst256
-rw-r--r--Documentation/filesystems/nfs/nfs41-server.txt173
-rw-r--r--Documentation/filesystems/nfs/pnfs.rst (renamed from Documentation/filesystems/nfs/pnfs.txt)25
-rw-r--r--Documentation/filesystems/nfs/rpc-cache.rst (renamed from Documentation/filesystems/nfs/rpc-cache.txt)136
-rw-r--r--Documentation/filesystems/nfs/rpc-server-gss.rst (renamed from Documentation/filesystems/nfs/rpc-server-gss.txt)19
-rw-r--r--Documentation/filesystems/nilfs2.rst (renamed from Documentation/filesystems/nilfs2.txt)40
-rw-r--r--Documentation/filesystems/ntfs.rst (renamed from Documentation/filesystems/ntfs.txt)145
-rw-r--r--Documentation/filesystems/ocfs2-online-filecheck.rst (renamed from Documentation/filesystems/ocfs2-online-filecheck.txt)45
-rw-r--r--Documentation/filesystems/ocfs2.rst (renamed from Documentation/filesystems/ocfs2.txt)31
-rw-r--r--Documentation/filesystems/omfs.rst112
-rw-r--r--Documentation/filesystems/omfs.txt106
-rw-r--r--Documentation/filesystems/orangefs.rst (renamed from Documentation/filesystems/orangefs.txt)187
-rw-r--r--Documentation/filesystems/porting.rst8
-rw-r--r--Documentation/filesystems/proc.rst (renamed from Documentation/filesystems/proc.txt)1544
-rw-r--r--Documentation/filesystems/qnx6.rst (renamed from Documentation/filesystems/qnx6.txt)22
-rw-r--r--Documentation/filesystems/ramfs-rootfs-initramfs.rst (renamed from Documentation/filesystems/ramfs-rootfs-initramfs.txt)54
-rw-r--r--Documentation/filesystems/relay.rst (renamed from Documentation/filesystems/relay.txt)139
-rw-r--r--Documentation/filesystems/romfs.rst (renamed from Documentation/filesystems/romfs.txt)42
-rw-r--r--Documentation/filesystems/squashfs.rst (renamed from Documentation/filesystems/squashfs.txt)60
-rw-r--r--Documentation/filesystems/sysfs.rst (renamed from Documentation/filesystems/sysfs.txt)324
-rw-r--r--Documentation/filesystems/sysv-fs.rst (renamed from Documentation/filesystems/sysv-fs.txt)153
-rw-r--r--Documentation/filesystems/tmpfs.rst (renamed from Documentation/filesystems/tmpfs.txt)44
-rw-r--r--Documentation/filesystems/ubifs-authentication.rst10
-rw-r--r--Documentation/filesystems/ubifs.rst (renamed from Documentation/filesystems/ubifs.txt)25
-rw-r--r--Documentation/filesystems/udf.rst (renamed from Documentation/filesystems/udf.txt)21
-rw-r--r--Documentation/filesystems/virtiofs.rst2
-rw-r--r--Documentation/filesystems/zonefs.rst (renamed from Documentation/filesystems/zonefs.txt)126
-rw-r--r--Documentation/gpu/i915.rst4
-rw-r--r--Documentation/index.rst1
-rw-r--r--Documentation/kbuild/gcc-plugins.rst (renamed from Documentation/core-api/gcc-plugins.rst)4
-rw-r--r--Documentation/kbuild/index.rst1
-rw-r--r--Documentation/kbuild/kbuild.rst2
-rw-r--r--Documentation/kbuild/kconfig-macro-language.rst2
-rw-r--r--Documentation/kbuild/makefiles.rst6
-rw-r--r--Documentation/kbuild/modules.rst4
-rw-r--r--Documentation/kernel-hacking/hacking.rst4
-rw-r--r--Documentation/kernel-hacking/locking.rst176
-rw-r--r--Documentation/kref.txt4
-rw-r--r--Documentation/media/kapi/v4l2-controls.rst8
-rw-r--r--Documentation/misc-devices/index.rst1
-rw-r--r--Documentation/misc-devices/mic/index.rst (renamed from Documentation/mic/index.rst)0
-rw-r--r--Documentation/misc-devices/mic/mic_overview.rst (renamed from Documentation/mic/mic_overview.rst)0
-rw-r--r--Documentation/misc-devices/mic/scif_overview.rst (renamed from Documentation/mic/scif_overview.rst)0
-rw-r--r--Documentation/networking/devlink/devlink-region.rst3
-rw-r--r--Documentation/networking/net_failover.rst6
-rw-r--r--Documentation/networking/rds.txt2
-rw-r--r--Documentation/networking/snmp_counter.rst4
-rw-r--r--Documentation/powerpc/ultravisor.rst4
-rw-r--r--Documentation/process/2.Process.rst108
-rw-r--r--Documentation/process/coding-style.rst18
-rw-r--r--Documentation/process/deprecated.rst120
-rw-r--r--Documentation/process/email-clients.rst4
-rw-r--r--Documentation/process/howto.rst17
-rw-r--r--Documentation/process/kernel-docs.rst10
-rw-r--r--Documentation/process/management-style.rst2
-rw-r--r--Documentation/scsi/scsi_mid_low_api.txt21
-rw-r--r--Documentation/security/siphash.rst8
-rw-r--r--Documentation/target/tcmu-design.rst6
-rw-r--r--Documentation/trace/events.rst63
-rw-r--r--Documentation/translations/it_IT/networking/netdev-FAQ.rst2
-rw-r--r--Documentation/translations/it_IT/process/programming-language.rst30
-rw-r--r--Documentation/translations/zh_CN/filesystems/index.rst27
-rw-r--r--Documentation/translations/zh_CN/filesystems/virtiofs.rst58
-rw-r--r--Documentation/translations/zh_CN/index.rst1
-rw-r--r--Documentation/translations/zh_CN/io_ordering.txt4
-rw-r--r--Documentation/translations/zh_CN/process/5.Posting.rst2
-rw-r--r--Documentation/userspace-api/ioctl/ioctl-number.rst1
-rw-r--r--Documentation/virt/kvm/amd-memory-encryption.rst25
-rw-r--r--Documentation/x86/exception-tables.rst14
-rw-r--r--Documentation/x86/intel-iommu.rst3
-rw-r--r--MAINTAINERS102
-rw-r--r--Makefile4
-rw-r--r--arch/arc/Kconfig4
-rw-r--r--arch/arc/configs/nps_defconfig2
-rw-r--r--arch/arc/configs/nsimosci_defconfig2
-rw-r--r--arch/arc/configs/nsimosci_hs_defconfig2
-rw-r--r--arch/arc/configs/nsimosci_hs_smp_defconfig2
-rw-r--r--arch/arc/include/asm/fpu.h2
-rw-r--r--arch/arc/include/asm/linkage.h2
-rw-r--r--arch/arc/kernel/setup.c2
-rw-r--r--arch/arc/kernel/troubleshoot.c27
-rw-r--r--arch/arm/Makefile4
-rw-r--r--arch/arm/boot/compressed/Makefile4
-rw-r--r--arch/arm/boot/dts/bcm2835-rpi-zero-w.dts1
-rw-r--r--arch/arm/boot/dts/bcm2835-rpi.dtsi1
-rw-r--r--arch/arm/boot/dts/dm8148-evm.dts4
-rw-r--r--arch/arm/boot/dts/dm8148-t410.dts4
-rw-r--r--arch/arm/boot/dts/dra62x-j5eco-evm.dts4
-rw-r--r--arch/arm/boot/dts/dra7.dtsi1
-rw-r--r--arch/arm/boot/dts/exynos4412-galaxy-s3.dtsi4
-rw-r--r--arch/arm/boot/dts/exynos4412-n710x.dts2
-rw-r--r--arch/arm/boot/dts/imx6qdl-phytec-phycore-som.dtsi4
-rw-r--r--arch/arm/boot/dts/motorola-mapphone-common.dtsi2
-rw-r--r--arch/arm/boot/dts/omap3-n900.dts44
-rw-r--r--arch/arm/boot/dts/omap5.dtsi1
-rw-r--r--arch/arm/boot/dts/ox810se.dtsi4
-rw-r--r--arch/arm/boot/dts/ox820.dtsi4
-rw-r--r--arch/arm/boot/dts/sun8i-a33.dtsi2
-rw-r--r--arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts7
-rw-r--r--arch/arm/boot/dts/sun8i-a83t.dtsi6
-rw-r--r--arch/arm/boot/dts/sun8i-r40.dtsi125
-rw-r--r--arch/arm/include/asm/floppy.h88
-rw-r--r--arch/arm/kernel/vdso.c2
-rw-r--r--arch/arm/lib/copy_from_user.S2
-rw-r--r--arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/fsl-ls1043-post.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/fsl-ls1043a-rdb.dts4
-rw-r--r--arch/arm64/boot/dts/freescale/fsl-ls1046a-rdb.dts4
-rw-r--r--arch/arm64/boot/dts/sprd/sc9863a.dtsi2
-rw-r--r--arch/arm64/crypto/chacha-neon-glue.c8
-rw-r--r--arch/arm64/include/asm/alternative.h2
-rw-r--r--arch/arm64/include/asm/mmu.h4
-rw-r--r--arch/arm64/include/asm/pgtable-prot.h6
-rw-r--r--arch/arm64/include/asm/unistd.h2
-rw-r--r--arch/arm64/kernel/smp.c25
-rw-r--r--arch/m68k/emu/nfblock.c3
-rw-r--r--arch/mips/boot/dts/ingenic/ci20.dts44
-rw-r--r--arch/mips/kernel/setup.c3
-rw-r--r--arch/parisc/Kconfig5
-rw-r--r--arch/parisc/Makefile7
-rw-r--r--arch/powerpc/kvm/book3s_pr.c1
-rw-r--r--arch/powerpc/kvm/powerpc.c2
-rw-r--r--arch/powerpc/mm/kasan/kasan_init_32.c9
-rw-r--r--arch/riscv/Kconfig1
-rw-r--r--arch/riscv/Kconfig.socs14
-rw-r--r--arch/riscv/configs/defconfig16
-rw-r--r--arch/riscv/configs/rv32_defconfig16
-rw-r--r--arch/riscv/include/asm/clint.h8
-rw-r--r--arch/riscv/include/asm/pgtable.h78
-rw-r--r--arch/riscv/include/asm/uaccess.h36
-rw-r--r--arch/riscv/kernel/smp.c2
-rw-r--r--arch/riscv/lib/Makefile2
-rw-r--r--arch/s390/kvm/kvm-s390.c18
-rw-r--r--arch/unicore32/include/asm/io.h2
-rw-r--r--arch/x86/Makefile5
-rw-r--r--arch/x86/crypto/Makefile7
-rw-r--r--arch/x86/events/amd/uncore.c17
-rw-r--r--arch/x86/include/asm/cpufeatures.h1
-rw-r--r--arch/x86/include/asm/kvm_emulate.h1
-rw-r--r--arch/x86/include/asm/mce.h6
-rw-r--r--arch/x86/kernel/apic/vector.c14
-rw-r--r--arch/x86/kernel/cpu/amd.c30
-rw-r--r--arch/x86/kernel/cpu/mce/core.c4
-rw-r--r--arch/x86/kernel/cpu/mce/dev-mcelog.c47
-rw-r--r--arch/x86/kernel/cpu/mce/intel.c26
-rw-r--r--arch/x86/kernel/cpu/mce/internal.h2
-rw-r--r--arch/x86/kernel/cpu/mce/therm_throt.c9
-rw-r--r--arch/x86/kvm/Kconfig2
-rw-r--r--arch/x86/kvm/emulate.c1
-rw-r--r--arch/x86/kvm/ioapic.c7
-rw-r--r--arch/x86/kvm/lapic.c8
-rw-r--r--arch/x86/kvm/svm.c28
-rw-r--r--arch/x86/kvm/vmx/nested.c5
-rw-r--r--arch/x86/kvm/vmx/vmx.c18
-rw-r--r--arch/x86/kvm/x86.c14
-rw-r--r--arch/x86/mm/fault.c26
-rw-r--r--arch/x86/mm/ioremap.c21
-rw-r--r--arch/x86/net/bpf_jit_comp32.c10
-rw-r--r--arch/xtensa/platforms/iss/simdisk.c3
-rw-r--r--block/Makefile3
-rw-r--r--block/bfq-cgroup.c87
-rw-r--r--block/bfq-iosched.c18
-rw-r--r--block/bfq-iosched.h1
-rw-r--r--block/bio.c580
-rw-r--r--block/blk-cgroup.c2
-rw-r--r--block/blk-core.c82
-rw-r--r--block/blk-flush.c16
-rw-r--r--block/blk-ioc.c7
-rw-r--r--block/blk-iocost.c5
-rw-r--r--block/blk-map.c508
-rw-r--r--block/blk-mq-sched.c22
-rw-r--r--block/blk-mq.c59
-rw-r--r--block/blk-settings.c39
-rw-r--r--block/blk-zoned.c34
-rw-r--r--block/blk.h138
-rw-r--r--block/genhd.c255
-rw-r--r--block/ioctl.c1
-rw-r--r--block/opal_proto.h1
-rw-r--r--block/partitions/Makefile3
-rw-r--r--block/partitions/acorn.c1
-rw-r--r--block/partitions/acorn.h15
-rw-r--r--block/partitions/aix.c1
-rw-r--r--block/partitions/aix.h2
-rw-r--r--block/partitions/amiga.c11
-rw-r--r--block/partitions/amiga.h7
-rw-r--r--block/partitions/atari.h1
-rw-r--r--block/partitions/check.c198
-rw-r--r--block/partitions/check.h41
-rw-r--r--block/partitions/cmdline.c1
-rw-r--r--block/partitions/cmdline.h3
-rw-r--r--block/partitions/core.c (renamed from block/partition-generic.c)319
-rw-r--r--block/partitions/efi.h3
-rw-r--r--block/partitions/ibm.c1
-rw-r--r--block/partitions/ibm.h2
-rw-r--r--block/partitions/karma.c3
-rw-r--r--block/partitions/karma.h9
-rw-r--r--block/partitions/ldm.c6
-rw-r--r--block/partitions/ldm.h2
-rw-r--r--block/partitions/mac.h1
-rw-r--r--block/partitions/msdos.c172
-rw-r--r--block/partitions/msdos.h9
-rw-r--r--block/partitions/osf.c2
-rw-r--r--block/partitions/osf.h8
-rw-r--r--block/partitions/sgi.c7
-rw-r--r--block/partitions/sgi.h9
-rw-r--r--block/partitions/sun.c9
-rw-r--r--block/partitions/sun.h9
-rw-r--r--block/partitions/sysv68.c1
-rw-r--r--block/partitions/sysv68.h2
-rw-r--r--block/partitions/ultrix.c1
-rw-r--r--block/partitions/ultrix.h6
-rw-r--r--block/sed-opal.c2
-rw-r--r--drivers/acpi/apei/ghes.c2
-rw-r--r--drivers/android/binderfs.c1
-rw-r--r--drivers/ata/Kconfig77
-rw-r--r--drivers/ata/Makefile2
-rw-r--r--drivers/ata/ahci.c10
-rw-r--r--drivers/ata/libata-core.c1126
-rw-r--r--drivers/ata/libata-eh.c224
-rw-r--r--drivers/ata/libata-pata-timings.c192
-rw-r--r--drivers/ata/libata-sata.c1483
-rw-r--r--drivers/ata/libata-scsi.c583
-rw-r--r--drivers/ata/libata-sff.c4
-rw-r--r--drivers/ata/libata-transport.c10
-rw-r--r--drivers/ata/libata.h25
-rw-r--r--drivers/ata/sata_promise.c8
-rw-r--r--drivers/atm/nicstar.c2
-rw-r--r--drivers/auxdisplay/Kconfig16
-rw-r--r--drivers/auxdisplay/charlcd.c2
-rw-r--r--drivers/auxdisplay/img-ascii-lcd.c4
-rw-r--r--drivers/base/memory.c23
-rw-r--r--drivers/base/platform.c25
-rw-r--r--drivers/block/Makefile6
-rw-r--r--drivers/block/aoe/aoeblk.c4
-rw-r--r--drivers/block/brd.c4
-rw-r--r--drivers/block/drbd/drbd_main.c14
-rw-r--r--drivers/block/drbd/drbd_receiver.c1
-rw-r--r--drivers/block/drbd/drbd_worker.c1
-rw-r--r--drivers/block/floppy.c1093
-rw-r--r--drivers/block/loop.c18
-rw-r--r--drivers/block/nbd.c27
-rw-r--r--drivers/block/null_blk_main.c115
-rw-r--r--drivers/block/null_blk_trace.c21
-rw-r--r--drivers/block/null_blk_trace.h79
-rw-r--r--drivers/block/null_blk_zoned.c12
-rw-r--r--drivers/block/pktcdvd.c15
-rw-r--r--drivers/block/ps3vram.c3
-rw-r--r--drivers/block/rsxx/dev.c3
-rw-r--r--drivers/block/rsxx/dma.c2
-rw-r--r--drivers/block/umem.c4
-rw-r--r--drivers/block/virtio_blk.c22
-rw-r--r--drivers/block/xen-blkfront.c6
-rw-r--r--drivers/block/zram/zram_drv.c5
-rw-r--r--drivers/bus/sunxi-rsb.c2
-rw-r--r--drivers/bus/ti-sysc.c3
-rw-r--r--drivers/char/ipmi/ipmi_si_platform.c4
-rw-r--r--drivers/char/tpm/eventlog/common.c12
-rw-r--r--drivers/char/tpm/eventlog/of.c3
-rw-r--r--drivers/char/tpm/eventlog/tpm1.c2
-rw-r--r--drivers/char/tpm/eventlog/tpm2.c2
-rw-r--r--drivers/char/tpm/tpm-chip.c4
-rw-r--r--drivers/char/tpm/tpm.h3
-rw-r--r--drivers/char/tpm/tpm2-cmd.c2
-rw-r--r--drivers/char/tpm/tpm_ibmvtpm.c17
-rw-r--r--drivers/char/tpm/tpm_ibmvtpm.h1
-rw-r--r--drivers/char/tpm/tpm_tis_spi_cr50.c7
-rw-r--r--drivers/char/tpm/tpm_tis_spi_main.c3
-rw-r--r--drivers/clk/clk.c4
-rw-r--r--drivers/clk/imx/clk-imx8mp.c4
-rw-r--r--drivers/clk/imx/clk-scu.c8
-rw-r--r--drivers/clk/qcom/dispcc-sc7180.c19
-rw-r--r--drivers/clk/qcom/videocc-sc7180.c2
-rw-r--r--drivers/clk/ti/clk-43xx.c2
-rw-r--r--drivers/clocksource/hyperv_timer.c6
-rw-r--r--drivers/dma/dmaengine.c2
-rw-r--r--drivers/dma/idxd/cdev.c4
-rw-r--r--drivers/dma/ti/k3-udma-glue.c29
-rw-r--r--drivers/edac/Kconfig7
-rw-r--r--drivers/edac/Makefile1
-rw-r--r--drivers/edac/armada_xp_edac.c26
-rw-r--r--drivers/edac/dmc520_edac.c656
-rw-r--r--drivers/edac/edac_mc.c511
-rw-r--r--drivers/edac/edac_mc.h6
-rw-r--r--drivers/edac/edac_mc_sysfs.c110
-rw-r--r--drivers/edac/edac_module.h1
-rw-r--r--drivers/edac/ghes_edac.c16
-rw-r--r--drivers/edac/mce_amd.c2
-rw-r--r--drivers/edac/synopsys_edac.c22
-rw-r--r--drivers/firmware/efi/efivars.c32
-rw-r--r--drivers/gpio/gpiolib-acpi.c140
-rw-r--r--drivers/gpio/gpiolib.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15.c25
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c2
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c18
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c11
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c114
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c1
-rw-r--r--drivers/gpu/drm/amd/powerplay/amdgpu_smu.c7
-rw-r--r--drivers/gpu/drm/amd/powerplay/navi10_ppt.c22
-rw-r--r--drivers/gpu/drm/amd/powerplay/renoir_ppt.c5
-rw-r--r--drivers/gpu/drm/arm/display/komeda/komeda_drv.c4
-rw-r--r--drivers/gpu/drm/bochs/bochs_hw.c6
-rw-r--r--drivers/gpu/drm/bridge/synopsys/dw-hdmi.c46
-rw-r--r--drivers/gpu/drm/drm_dp_mst_topology.c184
-rw-r--r--drivers/gpu/drm/drm_lease.c3
-rw-r--r--drivers/gpu/drm/drm_prime.c2
-rw-r--r--drivers/gpu/drm/exynos/exynos5433_drm_decon.c5
-rw-r--r--drivers/gpu/drm/exynos/exynos7_drm_decon.c5
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_dma.c28
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_drv.h6
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_fimc.c5
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_fimd.c5
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_g2d.c5
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_gsc.c5
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_rotator.c5
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_scaler.c6
-rw-r--r--drivers/gpu/drm/exynos/exynos_mixer.c7
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.c81
-rw-r--r--drivers/gpu/drm/i915/gt/intel_timeline.c8
-rw-r--r--drivers/gpu/drm/i915/gt/intel_workarounds.c25
-rw-r--r--drivers/gpu/drm/i915/gvt/display.c3
-rw-r--r--drivers/gpu/drm/i915/gvt/opregion.c5
-rw-r--r--drivers/gpu/drm/i915/gvt/vgpu.c12
-rw-r--r--drivers/gpu/drm/i915/i915_request.c28
-rw-r--r--drivers/gpu/drm/i915/i915_request.h2
-rw-r--r--drivers/gpu/drm/i915/i915_utils.h5
-rw-r--r--drivers/gpu/drm/radeon/radeon_ttm.c2
-rw-r--r--drivers/gpu/drm/scheduler/sched_main.c2
-rw-r--r--drivers/hid/hid-google-hammer.c2
-rw-r--r--drivers/hid/hid-ids.h2
-rw-r--r--drivers/hid/hid-picolcd_fb.c4
-rw-r--r--drivers/hid/hid-quirks.c1
-rw-r--r--drivers/hid/hid-sensor-custom.c6
-rw-r--r--drivers/hwtracing/intel_th/msu.c13
-rw-r--r--drivers/hwtracing/intel_th/pci.c5
-rw-r--r--drivers/hwtracing/stm/p_sys-t.c6
-rw-r--r--drivers/i2c/busses/i2c-designware-pcidrv.c1
-rw-r--r--drivers/i2c/busses/i2c-gpio.c2
-rw-r--r--drivers/i2c/busses/i2c-hix5hd2.c1
-rw-r--r--drivers/i2c/busses/i2c-i801.c45
-rw-r--r--drivers/i2c/busses/i2c-nvidia-gpu.c20
-rw-r--r--drivers/i2c/busses/i2c-pca-platform.c2
-rw-r--r--drivers/i2c/busses/i2c-st.c1
-rw-r--r--drivers/i2c/i2c-core-acpi.c10
-rw-r--r--drivers/i3c/device.c50
-rw-r--r--drivers/i3c/master.c28
-rw-r--r--drivers/i3c/master/dw-i3c-master.c2
-rw-r--r--drivers/i3c/master/i3c-master-cdns.c2
-rw-r--r--drivers/iio/accel/adxl372.c1
-rw-r--r--drivers/iio/accel/st_accel_i2c.c2
-rw-r--r--drivers/iio/adc/at91-sama5d2_adc.c15
-rw-r--r--drivers/iio/adc/stm32-dfsdm-adc.c43
-rw-r--r--drivers/iio/chemical/Kconfig2
-rw-r--r--drivers/iio/light/vcnl4000.c15
-rw-r--r--drivers/iio/magnetometer/ak8974.c2
-rw-r--r--drivers/iio/proximity/ping.c2
-rw-r--r--drivers/iio/trigger/stm32-timer-trigger.c11
-rw-r--r--drivers/infiniband/core/device.c4
-rw-r--r--drivers/infiniband/core/nldev.c6
-rw-r--r--drivers/infiniband/core/security.c11
-rw-r--r--drivers/infiniband/core/umem_odp.c2
-rw-r--r--drivers/infiniband/core/user_mad.c33
-rw-r--r--drivers/infiniband/hw/hfi1/user_sdma.c25
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c27
-rw-r--r--drivers/infiniband/hw/mlx5/main.c5
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h1
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c5
-rw-r--r--drivers/infiniband/sw/rdmavt/cq.c2
-rw-r--r--drivers/input/input.c1
-rw-r--r--drivers/input/keyboard/tm2-touchkey.c11
-rw-r--r--drivers/input/mouse/synaptics.c1
-rw-r--r--drivers/input/rmi4/rmi_f11.c4
-rw-r--r--drivers/input/touchscreen/raydium_i2c_ts.c8
-rw-r--r--drivers/iommu/amd_iommu.c4
-rw-r--r--drivers/iommu/dma-iommu.c16
-rw-r--r--drivers/iommu/dmar.c24
-rw-r--r--drivers/iommu/intel-iommu-debugfs.c51
-rw-r--r--drivers/iommu/intel-iommu.c28
-rw-r--r--drivers/iommu/io-pgtable-arm.c4
-rw-r--r--drivers/irqchip/irq-gic-v3.c30
-rw-r--r--drivers/lightnvm/core.c3
-rw-r--r--drivers/lightnvm/pblk-sysfs.c42
-rw-r--r--drivers/macintosh/windfarm_ad7417_sensor.c7
-rw-r--r--drivers/macintosh/windfarm_fcu_controls.c7
-rw-r--r--drivers/macintosh/windfarm_lm75_sensor.c16
-rw-r--r--drivers/macintosh/windfarm_lm87_sensor.c7
-rw-r--r--drivers/macintosh/windfarm_max6690_sensor.c7
-rw-r--r--drivers/macintosh/windfarm_smu_sat.c7
-rw-r--r--drivers/md/bcache/btree.c242
-rw-r--r--drivers/md/bcache/btree.h84
-rw-r--r--drivers/md/bcache/request.c7
-rw-r--r--drivers/md/bcache/request.h3
-rw-r--r--drivers/md/bcache/super.c11
-rw-r--r--drivers/md/bcache/sysfs.c2
-rw-r--r--drivers/md/bcache/writeback.c164
-rw-r--r--drivers/md/bcache/writeback.h19
-rw-r--r--drivers/md/dm.c10
-rw-r--r--drivers/md/md.c11
-rw-r--r--drivers/misc/cardreader/rts5227.c2
-rw-r--r--drivers/misc/cardreader/rts5249.c2
-rw-r--r--drivers/misc/cardreader/rts5260.c2
-rw-r--r--drivers/misc/cardreader/rts5261.c2
-rw-r--r--drivers/misc/eeprom/at24.c3
-rw-r--r--drivers/mmc/core/core.c5
-rw-r--r--drivers/mmc/core/mmc.c7
-rw-r--r--drivers/mmc/core/mmc_ops.c6
-rw-r--r--drivers/mmc/host/rtsx_pci_sdmmc.c13
-rw-r--r--drivers/mmc/host/sdhci-acpi.c84
-rw-r--r--drivers/mmc/host/sdhci-cadence.c18
-rw-r--r--drivers/mmc/host/sdhci-msm.c2
-rw-r--r--drivers/mmc/host/sdhci-of-at91.c8
-rw-r--r--drivers/mmc/host/sdhci-omap.c3
-rw-r--r--drivers/mmc/host/sdhci-pci-gli.c17
-rw-r--r--drivers/mmc/host/sdhci-tegra.c3
-rw-r--r--drivers/net/Kconfig1
-rw-r--r--drivers/net/bonding/bond_alb.c20
-rw-r--r--drivers/net/caif/caif_spi.c72
-rw-r--r--drivers/net/can/dev.c1
-rw-r--r--drivers/net/can/slcan.c3
-rw-r--r--drivers/net/dsa/mt7530.c4
-rw-r--r--drivers/net/dsa/mv88e6xxx/chip.c2
-rw-r--r--drivers/net/dsa/mv88e6xxx/global2.c8
-rw-r--r--drivers/net/dsa/sja1105/sja1105_main.c3
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_netdev.c78
-rw-r--r--drivers/net/ethernet/broadcom/bcmsysport.c2
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c32
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.h2
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c15
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c32
-rw-r--r--drivers/net/ethernet/broadcom/genet/bcmgenet.c142
-rw-r--r--drivers/net/ethernet/broadcom/genet/bcmgenet.h3
-rw-r--r--drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c6
-rw-r--r--drivers/net/ethernet/broadcom/genet/bcmmii.c40
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c4
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c49
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_ptp.c3
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/sge.c52
-rw-r--r--drivers/net/ethernet/freescale/dpaa/dpaa_eth.c114
-rw-r--r--drivers/net/ethernet/freescale/fec_main.c6
-rw-r--r--drivers/net/ethernet/freescale/fman/Kconfig28
-rw-r--r--drivers/net/ethernet/freescale/fman/fman.c18
-rw-r--r--drivers/net/ethernet/freescale/fman/fman.h5
-rw-r--r--drivers/net/ethernet/freescale/fman/fman_memac.c2
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h1
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_enet.c2
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c47
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c1
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c3
-rw-r--r--drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c5
-rw-r--r--drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c51
-rw-r--r--drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c26
-rw-r--r--drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c5
-rw-r--r--drivers/net/ethernet/huawei/hinic/hinic_rx.c3
-rw-r--r--drivers/net/ethernet/huawei/hinic/hinic_tx.c4
-rw-r--r--drivers/net/ethernet/ibm/ibmvnic.c24
-rw-r--r--drivers/net/ethernet/ibm/ibmvnic.h6
-rw-r--r--drivers/net/ethernet/marvell/mvmdio.c2
-rw-r--r--drivers/net/ethernet/marvell/mvneta.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mcg.c62
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/health.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c31
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rx.c11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/vport.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/pci.c50
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/reg.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c8
-rw-r--r--drivers/net/ethernet/micrel/ks8851_mll.c56
-rw-r--r--drivers/net/ethernet/mscc/ocelot.c28
-rw-r--r--drivers/net/ethernet/neterion/vxge/vxge-config.h2
-rw-r--r--drivers/net/ethernet/neterion/vxge/vxge-main.h14
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c8
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_if.h2
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_lif.c22
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_regs.h2
-rw-r--r--drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c2
-rw-r--r--drivers/net/ethernet/realtek/r8169_main.c18
-rw-r--r--drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c2
-rw-r--r--drivers/net/ethernet/sfc/ef10.c32
-rw-r--r--drivers/net/ethernet/sfc/efx.h1
-rw-r--r--drivers/net/ethernet/sfc/efx_channels.c1
-rw-r--r--drivers/net/ethernet/sfc/mcdi.c32
-rw-r--r--drivers/net/ethernet/sfc/net_driver.h3
-rw-r--r--drivers/net/ethernet/sfc/tx.c38
-rw-r--r--drivers/net/ethernet/sfc/tx_common.c29
-rw-r--r--drivers/net/ethernet/sfc/tx_common.h6
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c3
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c14
-rw-r--r--drivers/net/geneve.c8
-rw-r--r--drivers/net/ifb.c6
-rw-r--r--drivers/net/ipvlan/ipvlan_core.c19
-rw-r--r--drivers/net/ipvlan/ipvlan_main.c5
-rw-r--r--drivers/net/macsec.c28
-rw-r--r--drivers/net/macvlan.c2
-rw-r--r--drivers/net/netdevsim/ipsec.c30
-rw-r--r--drivers/net/phy/bcm63xx.c1
-rw-r--r--drivers/net/phy/dp83867.c21
-rw-r--r--drivers/net/phy/mdio-bcm-unimac.c6
-rw-r--r--drivers/net/phy/mdio-mux-bcm-iproc.c7
-rw-r--r--drivers/net/phy/phy.c3
-rw-r--r--drivers/net/phy/phy_device.c6
-rw-r--r--drivers/net/phy/phylink.c8
-rw-r--r--drivers/net/phy/sfp-bus.c32
-rw-r--r--drivers/net/slip/slhc.c14
-rw-r--r--drivers/net/team/team.c2
-rw-r--r--drivers/net/usb/qmi_wwan.c1
-rw-r--r--drivers/net/usb/r8152.c8
-rw-r--r--drivers/net/veth.c2
-rw-r--r--drivers/net/vxlan.c11
-rw-r--r--drivers/net/wireguard/device.c2
-rw-r--r--drivers/net/wireguard/netlink.c8
-rw-r--r--drivers/net/wireguard/noise.c55
-rw-r--r--drivers/net/wireguard/noise.h12
-rw-r--r--drivers/net/wireguard/peer.c7
-rw-r--r--drivers/net/wireguard/queueing.h10
-rw-r--r--drivers/net/wireguard/receive.c7
-rw-r--r--drivers/net/wireless/intel/iwlwifi/cfg/22000.c2
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/acpi.c14
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/acpi.h14
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/dbg.c25
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/dbg.h6
-rw-r--r--drivers/net/wireless/intel/iwlwifi/iwl-drv.c2
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/fw.c9
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/nvm.c3
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c35
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/time-event.c4
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/drv.c3
-rw-r--r--drivers/net/wireless/mediatek/mt76/dma.c9
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8188ee/trx.h1
-rw-r--r--drivers/net/wireless/ti/wlcore/main.c2
-rw-r--r--drivers/nfc/fdp/fdp.c5
-rw-r--r--drivers/nvdimm/blk.c3
-rw-r--r--drivers/nvdimm/btt.c3
-rw-r--r--drivers/nvdimm/pmem.c3
-rw-r--r--drivers/nvme/host/Kconfig2
-rw-r--r--drivers/nvme/host/core.c257
-rw-r--r--drivers/nvme/host/fabrics.c8
-rw-r--r--drivers/nvme/host/fc.c3
-rw-r--r--drivers/nvme/host/multipath.c24
-rw-r--r--drivers/nvme/host/nvme.h6
-rw-r--r--drivers/nvme/host/pci.c91
-rw-r--r--drivers/nvme/host/rdma.c17
-rw-r--r--drivers/nvme/host/tcp.c120
-rw-r--r--drivers/nvme/target/admin-cmd.c35
-rw-r--r--drivers/nvme/target/configfs.c146
-rw-r--r--drivers/nvme/target/core.c9
-rw-r--r--drivers/nvme/target/loop.c3
-rw-r--r--drivers/nvme/target/nvmet.h11
-rw-r--r--drivers/nvme/target/rdma.c15
-rw-r--r--drivers/nvme/target/tcp.c47
-rw-r--r--drivers/of/of_mdio.c1
-rw-r--r--drivers/pinctrl/cirrus/pinctrl-madera-core.c13
-rw-r--r--drivers/pinctrl/core.c1
-rw-r--r--drivers/pinctrl/freescale/pinctrl-scu.c4
-rw-r--r--drivers/pinctrl/meson/pinctrl-meson-gxl.c4
-rw-r--r--drivers/pinctrl/pinctrl-falcon.c2
-rw-r--r--drivers/pinctrl/qcom/pinctrl-msm.c3
-rw-r--r--drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c2
-rw-r--r--drivers/rtc/Kconfig1
-rw-r--r--drivers/s390/block/dasd.c27
-rw-r--r--drivers/s390/block/dasd_eckd.c163
-rw-r--r--drivers/s390/block/dasd_int.h15
-rw-r--r--drivers/s390/block/dcssblk.c4
-rw-r--r--drivers/s390/block/xpram.c4
-rw-r--r--drivers/s390/net/qeth_core.h4
-rw-r--r--drivers/s390/net/qeth_core_main.c176
-rw-r--r--drivers/s390/net/qeth_core_sys.c9
-rw-r--r--drivers/s390/net/qeth_l2_main.c1
-rw-r--r--drivers/s390/net/qeth_l3_main.c1
-rw-r--r--drivers/s390/net/qeth_l3_sys.c9
-rw-r--r--drivers/scsi/BusLogic.c8
-rw-r--r--drivers/scsi/Kconfig1
-rw-r--r--drivers/scsi/aacraid/linit.c7
-rw-r--r--drivers/scsi/aic7xxx/aic79xx_osm.c13
-rw-r--r--drivers/scsi/aic7xxx/aic7xxx_osm.c13
-rw-r--r--drivers/scsi/arcmsr/arcmsr_hba.c13
-rw-r--r--drivers/scsi/ipr.c3
-rw-r--r--drivers/scsi/ipr.h1
-rw-r--r--drivers/scsi/libsas/Kconfig1
-rw-r--r--drivers/scsi/megaraid.c13
-rw-r--r--drivers/scsi/qla2xxx/qla_os.c4
-rw-r--r--drivers/scsi/scsi_debug.c5
-rw-r--r--drivers/scsi/scsicam.c186
-rw-r--r--drivers/scsi/sd.c7
-rw-r--r--drivers/scsi/ufs/ufshcd.c21
-rw-r--r--drivers/slimbus/qcom-ngd-ctrl.c3
-rw-r--r--drivers/soc/fsl/dpio/dpio-driver.c8
-rw-r--r--drivers/soc/samsung/exynos-chipid.c2
-rw-r--r--drivers/staging/greybus/tools/loopback_test.c21
-rw-r--r--drivers/staging/rtl8188eu/os_dep/usb_intf.c1
-rw-r--r--drivers/staging/speakup/main.c2
-rw-r--r--drivers/staging/wfx/hif_tx.c15
-rw-r--r--drivers/staging/wfx/hif_tx.h2
-rw-r--r--drivers/staging/wfx/hif_tx_mib.h15
-rw-r--r--drivers/staging/wfx/sta.c25
-rw-r--r--drivers/tee/amdtee/core.c3
-rw-r--r--drivers/thunderbolt/switch.c2
-rw-r--r--drivers/tty/tty_io.c14
-rw-r--r--drivers/usb/chipidea/udc.c7
-rw-r--r--drivers/usb/class/cdc-acm.c34
-rw-r--r--drivers/usb/core/quirks.c6
-rw-r--r--drivers/usb/host/xhci-pci.c3
-rw-r--r--drivers/usb/host/xhci-plat.c1
-rw-r--r--drivers/usb/host/xhci-trace.h23
-rw-r--r--drivers/usb/serial/option.c2
-rw-r--r--drivers/usb/serial/pl2303.c1
-rw-r--r--drivers/usb/serial/pl2303.h1
-rw-r--r--drivers/usb/typec/ucsi/displayport.c12
-rw-r--r--drivers/virtio/virtio_balloon.c2
-rw-r--r--drivers/virtio/virtio_ring.c4
-rw-r--r--drivers/watchdog/iTCO_vendor.h2
-rw-r--r--drivers/watchdog/iTCO_vendor_support.c16
-rw-r--r--drivers/watchdog/iTCO_wdt.c28
-rw-r--r--fs/afs/addr_list.c2
-rw-r--r--fs/afs/cmservice.c14
-rw-r--r--fs/afs/fs_probe.c2
-rw-r--r--fs/afs/internal.h14
-rw-r--r--fs/afs/rxrpc.c74
-rw-r--r--fs/block_dev.c20
-rw-r--r--fs/btrfs/block-group.c4
-rw-r--r--fs/btrfs/inode.c4
-rw-r--r--fs/buffer.c43
-rw-r--r--fs/ceph/file.c14
-rw-r--r--fs/ceph/snap.c1
-rw-r--r--fs/cifs/dir.c1
-rw-r--r--fs/cifs/file.c3
-rw-r--r--fs/cifs/inode.c2
-rw-r--r--fs/cifs/smb2ops.c4
-rw-r--r--fs/crypto/keysetup.c9
-rw-r--r--fs/erofs/decompressor.c22
-rw-r--r--fs/erofs/internal.h8
-rw-r--r--fs/erofs/super.c2
-rw-r--r--fs/erofs/utils.c90
-rw-r--r--fs/erofs/zdata.c76
-rw-r--r--fs/eventpoll.c8
-rw-r--r--fs/ext4/super.c8
-rw-r--r--fs/ext4/sysfs.c1
-rw-r--r--fs/f2fs/f2fs.h1
-rw-r--r--fs/f2fs/super.c1
-rw-r--r--fs/file.c7
-rw-r--r--fs/fuse/dev.c6
-rw-r--r--fs/fuse/fuse_i.h2
-rw-r--r--fs/gfs2/inode.c2
-rw-r--r--fs/inode.c1
-rw-r--r--fs/internal.h1
-rw-r--r--fs/io-wq.c368
-rw-r--r--fs/io-wq.h65
-rw-r--r--fs/io_uring.c2062
-rw-r--r--fs/locks.c54
-rw-r--r--fs/nfs/Kconfig2
-rw-r--r--fs/nfs/client.c1
-rw-r--r--fs/nfs/fs_context.c9
-rw-r--r--fs/nfs/fscache.c2
-rw-r--r--fs/nfs/namespace.c2
-rw-r--r--fs/nfs/nfs4client.c1
-rw-r--r--fs/open.c3
-rw-r--r--fs/overlayfs/Kconfig1
-rw-r--r--fs/overlayfs/file.c6
-rw-r--r--fs/overlayfs/overlayfs.h7
-rw-r--r--fs/overlayfs/super.c9
-rw-r--r--fs/overlayfs/util.c4
-rw-r--r--fs/pstore/inode.c5
-rw-r--r--fs/pstore/platform.c4
-rw-r--r--fs/pstore/ram.c1
-rw-r--r--fs/pstore/ram_core.c2
-rw-r--r--fs/reiserfs/journal.c5
-rw-r--r--fs/splice.c6
-rw-r--r--fs/zonefs/super.c28
-rw-r--r--include/crypto/curve25519.h6
-rw-r--r--include/drm/drm_dp_mst_helper.h4
-rw-r--r--include/dt-bindings/clock/imx8mn-clock.h4
-rw-r--r--include/linux/bio.h15
-rw-r--r--include/linux/blk-mq.h7
-rw-r--r--include/linux/blkdev.h18
-rw-r--r--include/linux/bpf.h1
-rw-r--r--include/linux/ceph/messenger.h7
-rw-r--r--include/linux/ceph/osdmap.h4
-rw-r--r--include/linux/ceph/rados.h6
-rw-r--r--include/linux/cgroup.h1
-rw-r--r--include/linux/clk-provider.h10
-rw-r--r--include/linux/dmar.h14
-rw-r--r--include/linux/dsa/8021q.h7
-rw-r--r--include/linux/edac.h9
-rw-r--r--include/linux/file.h1
-rw-r--r--include/linux/fs.h2
-rw-r--r--include/linux/futex.h17
-rw-r--r--include/linux/genhd.h534
-rw-r--r--include/linux/i2c.h4
-rw-r--r--include/linux/ieee80211.h4
-rw-r--r--include/linux/inet_diag.h18
-rw-r--r--include/linux/intel-iommu.h2
-rw-r--r--include/linux/io-mapping.h2
-rw-r--r--include/linux/iocontext.h1
-rw-r--r--include/linux/libata.h174
-rw-r--r--include/linux/memcontrol.h12
-rw-r--r--include/linux/mmc/host.h1
-rw-r--r--include/linux/msdos_partition.h50
-rw-r--r--include/linux/netlink.h13
-rw-r--r--include/linux/of_clk.h8
-rw-r--r--include/linux/page-flags.h2
-rw-r--r--include/linux/part_stat.h115
-rw-r--r--include/linux/pci_ids.h2
-rw-r--r--include/linux/phy.h3
-rw-r--r--include/linux/platform_device.h2
-rw-r--r--include/linux/raid/detect.h3
-rw-r--r--include/linux/rhashtable.h2
-rw-r--r--include/linux/seccomp.h3
-rw-r--r--include/linux/skbuff.h36
-rw-r--r--include/linux/socket.h7
-rw-r--r--include/linux/splice.h3
-rw-r--r--include/linux/vmalloc.h5
-rw-r--r--include/linux/workqueue.h16
-rw-r--r--include/net/af_rxrpc.h12
-rw-r--r--include/net/compat.h3
-rw-r--r--include/net/fib_rules.h1
-rw-r--r--include/net/sch_generic.h16
-rw-r--r--include/scsi/scsicam.h7
-rw-r--r--include/soc/mscc/ocelot_dev.h2
-rw-r--r--include/trace/events/afs.h2
-rw-r--r--include/trace/events/io_uring.h103
-rw-r--r--include/uapi/linux/fdreg.h18
-rw-r--r--include/uapi/linux/in.h2
-rw-r--r--include/uapi/linux/input-event-codes.h3
-rw-r--r--include/uapi/linux/io_uring.h42
-rw-r--r--include/uapi/linux/seccomp.h1
-rw-r--r--include/uapi/linux/serio.h10
-rw-r--r--init/Kconfig3
-rw-r--r--init/do_mounts.c12
-rw-r--r--kernel/bpf/bpf_struct_ops.c14
-rw-r--r--kernel/bpf/btf.c5
-rw-r--r--kernel/bpf/cgroup.c7
-rw-r--r--kernel/bpf/syscall.c39
-rw-r--r--kernel/cgroup/cgroup-v1.c3
-rw-r--r--kernel/cgroup/cgroup.c43
-rw-r--r--kernel/fork.c4
-rw-r--r--kernel/futex.c93
-rw-r--r--kernel/irq/manage.c11
-rw-r--r--kernel/notifier.c2
-rw-r--r--kernel/pid.c10
-rw-r--r--kernel/seccomp.c15
-rw-r--r--kernel/sys.c2
-rw-r--r--kernel/task_work.c18
-rw-r--r--kernel/trace/bpf_trace.c2
-rw-r--r--kernel/trace/ftrace.c2
-rw-r--r--kernel/workqueue.c14
-rw-r--r--lib/crypto/chacha20poly1305-selftest.c11
-rw-r--r--mm/hugetlb_cgroup.c3
-rw-r--r--mm/madvise.c12
-rw-r--r--mm/memcontrol.c155
-rw-r--r--mm/mmu_notifier.c27
-rw-r--r--mm/nommu.c10
-rw-r--r--mm/slub.c41
-rw-r--r--mm/sparse.c14
-rw-r--r--mm/swapfile.c41
-rw-r--r--mm/vmalloc.c11
-rw-r--r--net/Kconfig3
-rw-r--r--net/batman-adv/bat_iv_ogm.c4
-rw-r--r--net/bpfilter/main.c14
-rw-r--r--net/caif/caif_dev.c3
-rw-r--r--net/ceph/messenger.c9
-rw-r--r--net/ceph/osd_client.c14
-rw-r--r--net/ceph/osdmap.c9
-rw-r--r--net/compat.c30
-rw-r--r--net/core/dev.c6
-rw-r--r--net/core/devlink.c33
-rw-r--r--net/core/netclassid_cgroup.c47
-rw-r--r--net/core/pktgen.c2
-rw-r--r--net/core/sock.c5
-rw-r--r--net/core/sock_map.c12
-rw-r--r--net/dsa/dsa_priv.h2
-rw-r--r--net/dsa/port.c44
-rw-r--r--net/dsa/slave.c8
-rw-r--r--net/dsa/tag_8021q.c43
-rw-r--r--net/dsa/tag_brcm.c2
-rw-r--r--net/dsa/tag_sja1105.c19
-rw-r--r--net/ethtool/debug.c4
-rw-r--r--net/ethtool/linkinfo.c4
-rw-r--r--net/ethtool/linkmodes.c4
-rw-r--r--net/ethtool/netlink.c16
-rw-r--r--net/ethtool/wol.c4
-rw-r--r--net/hsr/hsr_framereg.c9
-rw-r--r--net/hsr/hsr_netlink.c70
-rw-r--r--net/hsr/hsr_slave.c8
-rw-r--r--net/ieee802154/nl_policy.c6
-rw-r--r--net/ipv4/Kconfig7
-rw-r--r--net/ipv4/bpf_tcp_ca.c7
-rw-r--r--net/ipv4/fib_frontend.c2
-rw-r--r--net/ipv4/gre_demux.c12
-rw-r--r--net/ipv4/inet_connection_sock.c20
-rw-r--r--net/ipv4/inet_diag.c44
-rw-r--r--net/ipv4/ip_gre.c105
-rw-r--r--net/ipv4/ip_vti.c38
-rw-r--r--net/ipv4/ipconfig.c2
-rw-r--r--net/ipv4/raw_diag.c5
-rw-r--r--net/ipv4/tcp.c4
-rw-r--r--net/ipv4/tcp_output.c12
-rw-r--r--net/ipv4/udp_diag.c5
-rw-r--r--net/ipv6/addrconf.c51
-rw-r--r--net/ipv6/ip6_vti.c34
-rw-r--r--net/ipv6/seg6_iptunnel.c2
-rw-r--r--net/ipv6/seg6_local.c2
-rw-r--r--net/ipv6/xfrm6_tunnel.c2
-rw-r--r--net/mac80211/debugfs_sta.c3
-rw-r--r--net/mac80211/key.c20
-rw-r--r--net/mac80211/mesh_hwmp.c3
-rw-r--r--net/mac80211/sta_info.c7
-rw-r--r--net/mac80211/sta_info.h1
-rw-r--r--net/mac80211/tx.c39
-rw-r--r--net/mptcp/options.c19
-rw-r--r--net/netfilter/nf_conntrack_standalone.c2
-rw-r--r--net/netfilter/nf_flow_table_core.c3
-rw-r--r--net/netfilter/nf_flow_table_ip.c14
-rw-r--r--net/netfilter/nf_flow_table_offload.c1
-rw-r--r--net/netfilter/nf_synproxy_core.c2
-rw-r--r--net/netfilter/nf_tables_api.c27
-rw-r--r--net/netfilter/nfnetlink_cthelper.c2
-rw-r--r--net/netfilter/nft_chain_nat.c1
-rw-r--r--net/netfilter/nft_fwd_netdev.c12
-rw-r--r--net/netfilter/nft_payload.c1
-rw-r--r--net/netfilter/nft_set_pipapo.c34
-rw-r--r--net/netfilter/nft_set_rbtree.c87
-rw-r--r--net/netfilter/nft_tunnel.c2
-rw-r--r--net/netfilter/x_tables.c6
-rw-r--r--net/netfilter/xt_recent.c2
-rw-r--r--net/netlink/af_netlink.c43
-rw-r--r--net/nfc/hci/core.c19
-rw-r--r--net/nfc/netlink.c4
-rw-r--r--net/openvswitch/datapath.c1
-rw-r--r--net/packet/af_packet.c34
-rw-r--r--net/packet/internal.h5
-rw-r--r--net/rxrpc/af_rxrpc.c37
-rw-r--r--net/rxrpc/ar-internal.h5
-rw-r--r--net/rxrpc/call_object.c3
-rw-r--r--net/rxrpc/conn_client.c13
-rw-r--r--net/rxrpc/input.c1
-rw-r--r--net/rxrpc/sendmsg.c75
-rw-r--r--net/sched/act_ct.c2
-rw-r--r--net/sched/act_mirred.c6
-rw-r--r--net/sched/cls_route.c4
-rw-r--r--net/sched/cls_tcindex.c3
-rw-r--r--net/sched/sch_cbs.c12
-rw-r--r--net/sched/sch_fq.c1
-rw-r--r--net/sched/sch_taprio.c13
-rw-r--r--net/sctp/diag.c8
-rw-r--r--net/smc/smc_ib.c1
-rw-r--r--net/socket.c33
-rw-r--r--net/tipc/netlink.c1
-rw-r--r--net/wireless/nl80211.c7
-rw-r--r--net/wireless/scan.c6
-rw-r--r--net/xfrm/xfrm_device.c9
-rw-r--r--net/xfrm/xfrm_policy.c2
-rw-r--r--net/xfrm/xfrm_user.c6
-rw-r--r--scripts/Kconfig.include7
-rw-r--r--scripts/Makefile.extrawarn1
-rwxr-xr-xscripts/check-sysctl-docs181
-rwxr-xr-xscripts/documentation-file-ref-check11
-rw-r--r--scripts/dtc/dtc-lexer.l1
-rwxr-xr-xscripts/export_report.pl2
-rw-r--r--scripts/gcc-plugins/Kconfig2
-rw-r--r--scripts/kallsyms.c8
-rw-r--r--scripts/mod/devicetable-offsets.c7
-rw-r--r--scripts/mod/file2alias.c19
-rw-r--r--scripts/mod/modpost.c27
-rwxr-xr-xscripts/parse-maintainers.pl31
-rwxr-xr-xscripts/sphinx-pre-install17
-rw-r--r--security/keys/key.c2
-rw-r--r--security/keys/keyctl.c4
-rw-r--r--sound/core/oss/pcm_plugin.c12
-rw-r--r--sound/core/seq/oss/seq_oss_midi.c1
-rw-r--r--sound/core/seq/seq_virmidi.c1
-rw-r--r--sound/pci/hda/patch_realtek.c25
-rw-r--r--sound/usb/line6/driver.c2
-rw-r--r--sound/usb/line6/midibuf.c2
-rw-r--r--tools/edid/1024x768.S (renamed from Documentation/EDID/1024x768.S)0
-rw-r--r--tools/edid/1280x1024.S (renamed from Documentation/EDID/1280x1024.S)0
-rw-r--r--tools/edid/1600x1200.S (renamed from Documentation/EDID/1600x1200.S)0
-rw-r--r--tools/edid/1680x1050.S (renamed from Documentation/EDID/1680x1050.S)0
-rw-r--r--tools/edid/1920x1080.S (renamed from Documentation/EDID/1920x1080.S)0
-rw-r--r--tools/edid/800x600.S (renamed from Documentation/EDID/800x600.S)0
-rw-r--r--tools/edid/Makefile (renamed from Documentation/EDID/Makefile)0
-rw-r--r--tools/edid/edid.S (renamed from Documentation/EDID/edid.S)0
-rw-r--r--tools/edid/hex (renamed from Documentation/EDID/hex)0
-rw-r--r--tools/include/uapi/asm/errno.h14
-rw-r--r--tools/include/uapi/linux/in.h2
-rw-r--r--tools/perf/Makefile2
-rw-r--r--tools/perf/arch/arm64/util/arm-spe.c20
-rw-r--r--tools/perf/arch/arm64/util/perf_regs.c2
-rw-r--r--tools/perf/arch/powerpc/util/perf_regs.c4
-rw-r--r--tools/perf/arch/x86/util/auxtrace.c14
-rw-r--r--tools/perf/arch/x86/util/event.c12
-rw-r--r--tools/perf/arch/x86/util/header.c4
-rw-r--r--tools/perf/arch/x86/util/intel-bts.c24
-rw-r--r--tools/perf/arch/x86/util/intel-pt.c30
-rw-r--r--tools/perf/arch/x86/util/machine.c6
-rw-r--r--tools/perf/arch/x86/util/perf_regs.c8
-rw-r--r--tools/perf/arch/x86/util/pmu.c6
-rw-r--r--tools/perf/bench/bench.h4
-rw-r--r--tools/perf/bench/epoll-ctl.c8
-rw-r--r--tools/perf/bench/epoll-wait.c12
-rw-r--r--tools/perf/bench/futex-hash.c13
-rw-r--r--tools/perf/bench/futex-lock-pi.c12
-rw-r--r--tools/perf/bench/futex-requeue.c1
-rw-r--r--tools/perf/bench/futex-wake-parallel.c1
-rw-r--r--tools/perf/bench/futex-wake.c5
-rw-r--r--tools/perf/builtin-diff.c3
-rw-r--r--tools/perf/builtin-top.c4
-rw-r--r--tools/perf/pmu-events/jevents.c15
-rw-r--r--tools/perf/tests/bp_account.c2
-rw-r--r--tools/perf/util/block-info.c3
-rw-r--r--tools/perf/util/env.c4
-rw-r--r--tools/perf/util/map.c4
-rw-r--r--tools/perf/util/parse-events.c56
-rw-r--r--tools/perf/util/probe-file.c3
-rw-r--r--tools/perf/util/probe-finder.c11
-rw-r--r--tools/perf/util/setup.py10
-rw-r--r--tools/perf/util/symbol.c13
-rw-r--r--tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c2
-rw-r--r--tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c2
-rw-r--r--tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c2
-rw-r--r--tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h2
-rw-r--r--tools/power/x86/turbostat/Makefile2
-rw-r--r--tools/power/x86/turbostat/turbostat.c142
-rw-r--r--tools/scripts/Makefile.include4
-rwxr-xr-xtools/testing/ktest/ktest.pl16
-rw-r--r--tools/testing/ktest/sample.conf22
-rw-r--r--tools/testing/selftests/Makefile1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/send_signal_sched_switch.c60
-rw-r--r--tools/testing/selftests/bpf/progs/test_send_signal_kern.c6
-rw-r--r--tools/testing/selftests/bpf/test_btf.c42
-rw-r--r--tools/testing/selftests/bpf/verifier/jmp32.c15
-rw-r--r--tools/testing/selftests/net/Makefile4
-rwxr-xr-xtools/testing/selftests/net/fib_tests.sh34
-rw-r--r--tools/testing/selftests/net/forwarding/Makefile76
-rw-r--r--[-rwxr-xr-x]tools/testing/selftests/net/forwarding/ethtool_lib.sh0
-rw-r--r--tools/testing/selftests/net/reuseport_addr_any.c4
-rw-r--r--tools/testing/selftests/netfilter/Makefile6
-rw-r--r--tools/testing/selftests/netfilter/config6
-rw-r--r--tools/testing/selftests/netfilter/nf-queue.c352
-rwxr-xr-xtools/testing/selftests/netfilter/nft_queue.sh332
-rw-r--r--tools/testing/selftests/seccomp/seccomp_bpf.c74
-rw-r--r--tools/testing/selftests/tc-testing/config1
-rwxr-xr-xtools/testing/selftests/wireguard/netns.sh15
-rw-r--r--tools/testing/selftests/wireguard/qemu/Makefile2
-rw-r--r--tools/testing/selftests/wireguard/qemu/init.c1
-rw-r--r--tools/testing/selftests/wireguard/qemu/kernel.config1
-rw-r--r--usr/Kconfig22
1038 files changed, 20348 insertions, 12568 deletions
diff --git a/.clang-format b/.clang-format
index 196ca317bd1f..6ec5558b516b 100644
--- a/.clang-format
+++ b/.clang-format
@@ -86,6 +86,8 @@ ForEachMacros:
- 'bio_for_each_segment_all'
- 'bio_list_for_each'
- 'bip_for_each_vec'
+ - 'bitmap_for_each_clear_region'
+ - 'bitmap_for_each_set_region'
- 'blkg_for_each_descendant_post'
- 'blkg_for_each_descendant_pre'
- 'blk_queue_for_each_rl'
@@ -115,6 +117,7 @@ ForEachMacros:
- 'drm_client_for_each_connector_iter'
- 'drm_client_for_each_modeset'
- 'drm_connector_for_each_possible_encoder'
+ - 'drm_for_each_bridge_in_chain'
- 'drm_for_each_connector_iter'
- 'drm_for_each_crtc'
- 'drm_for_each_encoder'
@@ -136,9 +139,10 @@ ForEachMacros:
- 'for_each_bio'
- 'for_each_board_func_rsrc'
- 'for_each_bvec'
+ - 'for_each_card_auxs'
+ - 'for_each_card_auxs_safe'
- 'for_each_card_components'
- - 'for_each_card_links'
- - 'for_each_card_links_safe'
+ - 'for_each_card_pre_auxs'
- 'for_each_card_prelinks'
- 'for_each_card_rtds'
- 'for_each_card_rtds_safe'
@@ -166,6 +170,7 @@ ForEachMacros:
- 'for_each_dpcm_fe'
- 'for_each_drhd_unit'
- 'for_each_dss_dev'
+ - 'for_each_efi_handle'
- 'for_each_efi_memory_desc'
- 'for_each_efi_memory_desc_in_map'
- 'for_each_element'
@@ -190,6 +195,7 @@ ForEachMacros:
- 'for_each_lru'
- 'for_each_matching_node'
- 'for_each_matching_node_and_match'
+ - 'for_each_member'
- 'for_each_memblock'
- 'for_each_memblock_type'
- 'for_each_memcg_cache_index'
@@ -200,9 +206,11 @@ ForEachMacros:
- 'for_each_msi_entry'
- 'for_each_msi_entry_safe'
- 'for_each_net'
+ - 'for_each_net_continue_reverse'
- 'for_each_netdev'
- 'for_each_netdev_continue'
- 'for_each_netdev_continue_rcu'
+ - 'for_each_netdev_continue_reverse'
- 'for_each_netdev_feature'
- 'for_each_netdev_in_bond_rcu'
- 'for_each_netdev_rcu'
@@ -254,10 +262,10 @@ ForEachMacros:
- 'for_each_reserved_mem_region'
- 'for_each_rtd_codec_dai'
- 'for_each_rtd_codec_dai_rollback'
- - 'for_each_rtdcom'
- - 'for_each_rtdcom_safe'
+ - 'for_each_rtd_components'
- 'for_each_set_bit'
- 'for_each_set_bit_from'
+ - 'for_each_set_clump8'
- 'for_each_sg'
- 'for_each_sg_dma_page'
- 'for_each_sg_page'
@@ -267,6 +275,7 @@ ForEachMacros:
- 'for_each_subelement_id'
- '__for_each_thread'
- 'for_each_thread'
+ - 'for_each_wakeup_source'
- 'for_each_zone'
- 'for_each_zone_zonelist'
- 'for_each_zone_zonelist_nodemask'
@@ -330,6 +339,7 @@ ForEachMacros:
- 'list_for_each'
- 'list_for_each_codec'
- 'list_for_each_codec_safe'
+ - 'list_for_each_continue'
- 'list_for_each_entry'
- 'list_for_each_entry_continue'
- 'list_for_each_entry_continue_rcu'
@@ -351,6 +361,7 @@ ForEachMacros:
- 'llist_for_each_entry'
- 'llist_for_each_entry_safe'
- 'llist_for_each_safe'
+ - 'mci_for_each_dimm'
- 'media_device_for_each_entity'
- 'media_device_for_each_intf'
- 'media_device_for_each_link'
@@ -444,10 +455,16 @@ ForEachMacros:
- 'virtio_device_for_each_vq'
- 'xa_for_each'
- 'xa_for_each_marked'
+ - 'xa_for_each_range'
- 'xa_for_each_start'
- 'xas_for_each'
- 'xas_for_each_conflict'
- 'xas_for_each_marked'
+ - 'xbc_array_for_each_value'
+ - 'xbc_for_each_key_value'
+ - 'xbc_node_for_each_array_value'
+ - 'xbc_node_for_each_child'
+ - 'xbc_node_for_each_key_value'
- 'zorro_for_each_dev'
#IncludeBlocks: Preserve # Unknown to clang-format-5.0
diff --git a/.mailmap b/.mailmap
index ffb8f28290c7..a0dfce8de1ba 100644
--- a/.mailmap
+++ b/.mailmap
@@ -225,6 +225,7 @@ Pratyush Anand <pratyush.anand@gmail.com> <pratyush.anand@st.com>
Praveen BP <praveenbp@ti.com>
Punit Agrawal <punitagrawal@gmail.com> <punit.agrawal@arm.com>
Qais Yousef <qsyousef@gmail.com> <qais.yousef@imgtec.com>
+Quentin Monnet <quentin@isovalent.com> <quentin.monnet@netronome.com>
Quentin Perret <qperret@qperret.net> <quentin.perret@arm.com>
Rafael J. Wysocki <rjw@rjwysocki.net> <rjw@sisk.pl>
Rajesh Shah <rajesh.shah@intel.com>
diff --git a/Documentation/ABI/testing/sysfs-kernel-uids b/Documentation/ABI/removed/sysfs-kernel-uids
index 4182b7061816..dc4463f190a7 100644
--- a/Documentation/ABI/testing/sysfs-kernel-uids
+++ b/Documentation/ABI/removed/sysfs-kernel-uids
@@ -1,5 +1,5 @@
What: /sys/kernel/uids/<uid>/cpu_shares
-Date: December 2007
+Date: December 2007, finally removed in kernel v2.6.34-rc1
Contact: Dhaval Giani <dhaval@linux.vnet.ibm.com>
Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
Description:
diff --git a/Documentation/Makefile b/Documentation/Makefile
index d77bb607aea4..79ecee62d597 100644
--- a/Documentation/Makefile
+++ b/Documentation/Makefile
@@ -13,7 +13,7 @@ endif
SPHINXBUILD = sphinx-build
SPHINXOPTS =
SPHINXDIRS = .
-_SPHINXDIRS = $(patsubst $(srctree)/Documentation/%/index.rst,%,$(wildcard $(srctree)/Documentation/*/index.rst))
+_SPHINXDIRS = $(sort $(patsubst $(srctree)/Documentation/%/index.rst,%,$(wildcard $(srctree)/Documentation/*/index.rst)))
SPHINX_CONF = conf.py
PAPER =
BUILDDIR = $(obj)/output
diff --git a/Documentation/PCI/pci.rst b/Documentation/PCI/pci.rst
index 6864f9a70f5f..8c016d8c9862 100644
--- a/Documentation/PCI/pci.rst
+++ b/Documentation/PCI/pci.rst
@@ -239,7 +239,7 @@ from the PCI device config space. Use the values in the pci_dev structure
as the PCI "bus address" might have been remapped to a "host physical"
address by the arch/chip-set specific kernel support.
-See Documentation/io-mapping.txt for how to access device registers
+See Documentation/driver-api/io-mapping.rst for how to access device registers
or device memory.
The device driver needs to call pci_request_region() to verify
diff --git a/Documentation/accounting/psi.rst b/Documentation/accounting/psi.rst
index 621111ce5740..f2b3439edcc2 100644
--- a/Documentation/accounting/psi.rst
+++ b/Documentation/accounting/psi.rst
@@ -1,3 +1,5 @@
+.. _psi:
+
================================
PSI - Pressure Stall Information
================================
diff --git a/Documentation/admin-guide/binfmt-misc.rst b/Documentation/admin-guide/binfmt-misc.rst
index 97b0d7927078..95c93bbe408a 100644
--- a/Documentation/admin-guide/binfmt-misc.rst
+++ b/Documentation/admin-guide/binfmt-misc.rst
@@ -1,5 +1,5 @@
-Kernel Support for miscellaneous (your favourite) Binary Formats v1.1
-=====================================================================
+Kernel Support for miscellaneous Binary Formats (binfmt_misc)
+=============================================================
This Kernel feature allows you to invoke almost (for restrictions see below)
every program by simply typing its name in the shell.
diff --git a/Documentation/admin-guide/blockdev/zram.rst b/Documentation/admin-guide/blockdev/zram.rst
index 27c77d853028..a6fd1f9b5faf 100644
--- a/Documentation/admin-guide/blockdev/zram.rst
+++ b/Documentation/admin-guide/blockdev/zram.rst
@@ -251,8 +251,6 @@ line of text and contains the following stats separated by whitespace:
================ =============================================================
orig_data_size uncompressed size of data stored in this disk.
- This excludes same-element-filled pages (same_pages) since
- no memory is allocated for them.
Unit: bytes
compr_data_size compressed size of data stored in this disk
mem_used_total the amount of memory allocated for this disk. This
diff --git a/Documentation/admin-guide/bootconfig.rst b/Documentation/admin-guide/bootconfig.rst
index cf2edcd09183..d6b3b77a4129 100644
--- a/Documentation/admin-guide/bootconfig.rst
+++ b/Documentation/admin-guide/bootconfig.rst
@@ -23,7 +23,7 @@ of dot-connected-words, and key and value are connected by ``=``. The value
has to be terminated by semi-colon (``;``) or newline (``\n``).
For array value, array entries are separated by comma (``,``). ::
-KEY[.WORD[...]] = VALUE[, VALUE2[...]][;]
+ KEY[.WORD[...]] = VALUE[, VALUE2[...]][;]
Unlike the kernel command line syntax, spaces are OK around the comma and ``=``.
diff --git a/Documentation/admin-guide/cgroup-v1/index.rst b/Documentation/admin-guide/cgroup-v1/index.rst
index 10bf48bae0b0..226f64473e8e 100644
--- a/Documentation/admin-guide/cgroup-v1/index.rst
+++ b/Documentation/admin-guide/cgroup-v1/index.rst
@@ -1,3 +1,5 @@
+.. _cgroup-v1:
+
========================
Control Groups version 1
========================
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index 3f801461f0f3..fbb111616705 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -9,7 +9,7 @@ This is the authoritative documentation on the design, interface and
conventions of cgroup v2. It describes all userland-visible aspects
of cgroup including core and specific controller behaviors. All
future changes must be reflected in this document. Documentation for
-v1 is available under Documentation/admin-guide/cgroup-v1/.
+v1 is available under :ref:`Documentation/admin-guide/cgroup-v1/index.rst <cgroup-v1>`.
.. CONTENTS
@@ -1023,7 +1023,7 @@ All time durations are in microseconds.
A read-only nested-key file which exists on non-root cgroups.
Shows pressure stall information for CPU. See
- Documentation/accounting/psi.rst for details.
+ :ref:`Documentation/accounting/psi.rst <psi>` for details.
cpu.uclamp.min
A read-write single value file which exists on non-root cgroups.
@@ -1103,7 +1103,7 @@ PAGE_SIZE multiple when read back.
proportionally to the overage, reducing reclaim pressure for
smaller overages.
- Effective min boundary is limited by memory.min values of
+ Effective min boundary is limited by memory.min values of
all ancestor cgroups. If there is memory.min overcommitment
(child cgroup or cgroups are requiring more protected memory
than parent will allow), then each child cgroup will get
@@ -1313,53 +1313,41 @@ PAGE_SIZE multiple when read back.
Number of major page faults incurred
workingset_refault
-
Number of refaults of previously evicted pages
workingset_activate
-
Number of refaulted pages that were immediately activated
workingset_nodereclaim
-
Number of times a shadow node has been reclaimed
pgrefill
-
Amount of scanned pages (in an active LRU list)
pgscan
-
Amount of scanned pages (in an inactive LRU list)
pgsteal
-
Amount of reclaimed pages
pgactivate
-
Amount of pages moved to the active LRU list
pgdeactivate
-
Amount of pages moved to the inactive LRU list
pglazyfree
-
Amount of pages postponed to be freed under memory pressure
pglazyfreed
-
Amount of reclaimed lazyfree pages
thp_fault_alloc
-
Number of transparent hugepages which were allocated to satisfy
a page fault, including COW faults. This counter is not present
when CONFIG_TRANSPARENT_HUGEPAGE is not set.
thp_collapse_alloc
-
Number of transparent hugepages which were allocated to allow
collapsing an existing range of pages. This counter is not
present when CONFIG_TRANSPARENT_HUGEPAGE is not set.
@@ -1403,7 +1391,7 @@ PAGE_SIZE multiple when read back.
A read-only nested-key file which exists on non-root cgroups.
Shows pressure stall information for memory. See
- Documentation/accounting/psi.rst for details.
+ :ref:`Documentation/accounting/psi.rst <psi>` for details.
Usage Guidelines
@@ -1478,7 +1466,7 @@ IO Interface Files
dios Number of discard IOs
====== =====================
- An example read output follows:
+ An example read output follows::
8:16 rbytes=1459200 wbytes=314773504 rios=192 wios=353 dbytes=0 dios=0
8:0 rbytes=90430464 wbytes=299008000 rios=8950 wios=1252 dbytes=50331648 dios=3021
@@ -1643,7 +1631,7 @@ IO Interface Files
A read-only nested-key file which exists on non-root cgroups.
Shows pressure stall information for IO. See
- Documentation/accounting/psi.rst for details.
+ :ref:`Documentation/accounting/psi.rst <psi>` for details.
Writeback
@@ -1853,7 +1841,7 @@ Cpuset Interface Files
from the requested CPUs.
The CPU numbers are comma-separated numbers or ranges.
- For example:
+ For example::
# cat cpuset.cpus
0-4,6,8-10
@@ -1892,7 +1880,7 @@ Cpuset Interface Files
from the requested memory nodes.
The memory node numbers are comma-separated numbers or ranges.
- For example:
+ For example::
# cat cpuset.mems
0-1,3
diff --git a/Documentation/driver-api/edid.rst b/Documentation/admin-guide/edid.rst
index b1b5acd501ed..80deeb21a265 100644
--- a/Documentation/driver-api/edid.rst
+++ b/Documentation/admin-guide/edid.rst
@@ -11,11 +11,13 @@ Today, with the advent of Kernel Mode Setting, a graphics board is
either correctly working because all components follow the standards -
or the computer is unusable, because the screen remains dark after
booting or it displays the wrong area. Cases when this happens are:
+
- The graphics board does not recognize the monitor.
- The graphics board is unable to detect any EDID data.
- The graphics board incorrectly forwards EDID data to the driver.
- The monitor sends no or bogus EDID data.
- A KVM sends its own EDID data instead of querying the connected monitor.
+
Adding the kernel parameter "nomodeset" helps in most cases, but causes
restrictions later on.
@@ -32,7 +34,7 @@ individual data for a specific misbehaving monitor, commented sources
and a Makefile environment are given here.
To create binary EDID and C source code files from the existing data
-material, simply type "make".
+material, simply type "make" in tools/edid/.
If you want to create your own EDID file, copy the file 1024x768.S,
replace the settings with your own data and add a new target to the
diff --git a/Documentation/admin-guide/hw-vuln/tsx_async_abort.rst b/Documentation/admin-guide/hw-vuln/tsx_async_abort.rst
index af6865b822d2..68d96f0e9c95 100644
--- a/Documentation/admin-guide/hw-vuln/tsx_async_abort.rst
+++ b/Documentation/admin-guide/hw-vuln/tsx_async_abort.rst
@@ -136,8 +136,6 @@ enables the mitigation by default.
The mitigation can be controlled at boot time via a kernel command line option.
See :ref:`taa_mitigation_control_command_line`.
-.. _virt_mechanism:
-
Virtualization mitigation
^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/Documentation/admin-guide/index.rst b/Documentation/admin-guide/index.rst
index f1d0ccffbe72..5a6269fb8593 100644
--- a/Documentation/admin-guide/index.rst
+++ b/Documentation/admin-guide/index.rst
@@ -75,6 +75,7 @@ configure specific aspects of kernel behavior to your liking.
cputopology
dell_rbu
device-mapper/index
+ edid
efi-stub
ext4
nfs/index
diff --git a/Documentation/admin-guide/iostats.rst b/Documentation/admin-guide/iostats.rst
index df5b8345c41d..9b14b0c2c9c4 100644
--- a/Documentation/admin-guide/iostats.rst
+++ b/Documentation/admin-guide/iostats.rst
@@ -100,7 +100,7 @@ Field 10 -- # of milliseconds spent doing I/Os (unsigned int)
Since 5.0 this field counts jiffies when at least one request was
started or completed. If request runs more than 2 jiffies then some
- I/O time will not be accounted unless there are other requests.
+ I/O time might be not accounted in case of concurrent requests.
Field 11 -- weighted # of milliseconds spent doing I/Os (unsigned int)
This field is incremented at each I/O start, I/O completion, I/O
@@ -143,6 +143,9 @@ are summed (possibly overflowing the unsigned long variable they are
summed to) and the result given to the user. There is no convenient
user interface for accessing the per-CPU counters themselves.
+Since 4.19 request times are measured with nanoseconds precision and
+truncated to milliseconds before showing in this interface.
+
Disks vs Partitions
-------------------
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index c07815d230bc..d7da466fd5c3 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1099,6 +1099,12 @@
A valid base address must be provided, and the serial
port must already be setup and configured.
+ ec_imx21,<addr>
+ ec_imx6q,<addr>
+ Start an early, polled-mode, output-only console on the
+ Freescale i.MX UART at the specified address. The UART
+ must already be setup and configured.
+
ar3700_uart,<addr>
Start an early, polled-mode console on the
Armada 3700 serial port at the specified
@@ -1779,7 +1785,7 @@
provided by tboot because it makes the system
vulnerable to DMA attacks.
nobounce [Default off]
- Disable bounce buffer for unstrusted devices such as
+ Disable bounce buffer for untrusted devices such as
the Thunderbolt devices. This will treat the untrusted
devices as the trusted ones, hence might expose security
risks of DMA attacks.
@@ -1883,7 +1889,7 @@
No delay
ip= [IP_PNP]
- See Documentation/filesystems/nfs/nfsroot.txt.
+ See Documentation/admin-guide/nfs/nfsroot.rst.
ipcmni_extend [KNL] Extend the maximum number of unique System V
IPC identifiers from 32,768 to 16,777,216.
@@ -2795,7 +2801,7 @@
<name>,<region-number>[,<base>,<size>,<buswidth>,<altbuswidth>]
mtdparts= [MTD]
- See drivers/mtd/cmdlinepart.c.
+ See drivers/mtd/parsers/cmdlinepart.c
multitce=off [PPC] This parameter disables the use of the pSeries
firmware feature for updating multiple TCE entries
@@ -2853,13 +2859,13 @@
Default value is 0.
nfsaddrs= [NFS] Deprecated. Use ip= instead.
- See Documentation/filesystems/nfs/nfsroot.txt.
+ See Documentation/admin-guide/nfs/nfsroot.rst.
nfsroot= [NFS] nfs root filesystem for disk-less boxes.
- See Documentation/filesystems/nfs/nfsroot.txt.
+ See Documentation/admin-guide/nfs/nfsroot.rst.
nfsrootdebug [NFS] enable nfsroot debugging messages.
- See Documentation/filesystems/nfs/nfsroot.txt.
+ See Documentation/admin-guide/nfs/nfsroot.rst.
nfs.callback_nr_threads=
[NFSv4] set the total number of threads that the
@@ -4514,10 +4520,10 @@
Format: <integer>
A nonzero value instructs the soft-lockup detector
- to panic the machine when a soft-lockup occurs. This
- is also controlled by CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC
- which is the respective build-time switch to that
- functionality.
+ to panic the machine when a soft-lockup occurs. It is
+ also controlled by the kernel.softlockup_panic sysctl
+ and CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC, which is the
+ respective build-time switch to that functionality.
softlockup_all_cpu_backtrace=
[KNL] Should the soft-lockup detector generate
diff --git a/Documentation/admin-guide/kernel-per-CPU-kthreads.rst b/Documentation/admin-guide/kernel-per-CPU-kthreads.rst
index baeeba8762ae..21818aca4708 100644
--- a/Documentation/admin-guide/kernel-per-CPU-kthreads.rst
+++ b/Documentation/admin-guide/kernel-per-CPU-kthreads.rst
@@ -234,7 +234,7 @@ To reduce its OS jitter, do any of the following:
Such a workqueue can be confined to a given subset of the
CPUs using the ``/sys/devices/virtual/workqueue/*/cpumask`` sysfs
files. The set of WQ_SYSFS workqueues can be displayed using
- "ls sys/devices/virtual/workqueue". That said, the workqueues
+ "ls /sys/devices/virtual/workqueue". That said, the workqueues
maintainer would like to caution people against indiscriminately
sprinkling WQ_SYSFS across all the workqueues. The reason for
caution is that it is easy to add WQ_SYSFS, but because sysfs is
diff --git a/Documentation/admin-guide/perf/imx-ddr.rst b/Documentation/admin-guide/perf/imx-ddr.rst
index 3726a10a03ba..f05f56c73b7d 100644
--- a/Documentation/admin-guide/perf/imx-ddr.rst
+++ b/Documentation/admin-guide/perf/imx-ddr.rst
@@ -43,7 +43,8 @@ value 1 for supported.
AXI_ID and AXI_MASKING are mapped on DPCR1 register in performance counter.
When non-masked bits are matching corresponding AXI_ID bits then counter is
- incremented. Perf counter is incremented if
+ incremented. Perf counter is incremented if::
+
AxID && AXI_MASKING == AXI_ID && AXI_MASKING
This filter doesn't support filter different AXI ID for axid-read and axid-write
diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
index def074807cee..335696d3360d 100644
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -2,262 +2,197 @@
Documentation for /proc/sys/kernel/
===================================
-kernel version 2.2.10
+.. See scripts/check-sysctl-docs to keep this up to date
+
Copyright (c) 1998, 1999, Rik van Riel <riel@nl.linux.org>
Copyright (c) 2009, Shen Feng<shen@cn.fujitsu.com>
-For general info and legal blurb, please look in index.rst.
+For general info and legal blurb, please look in :doc:`index`.
------------------------------------------------------------------------------
This file contains documentation for the sysctl files in
-/proc/sys/kernel/ and is valid for Linux kernel version 2.2.
+``/proc/sys/kernel/`` and is valid for Linux kernel version 2.2.
The files in this directory can be used to tune and monitor
miscellaneous and general things in the operation of the Linux
-kernel. Since some of the files _can_ be used to screw up your
+kernel. Since some of the files *can* be used to screw up your
system, it is advisable to read both documentation and source
before actually making adjustments.
Currently, these files might (depending on your configuration)
-show up in /proc/sys/kernel:
-
-- acct
-- acpi_video_flags
-- auto_msgmni
-- bootloader_type [ X86 only ]
-- bootloader_version [ X86 only ]
-- cap_last_cap
-- core_pattern
-- core_pipe_limit
-- core_uses_pid
-- ctrl-alt-del
-- dmesg_restrict
-- domainname
-- hostname
-- hotplug
-- hardlockup_all_cpu_backtrace
-- hardlockup_panic
-- hung_task_panic
-- hung_task_check_count
-- hung_task_timeout_secs
-- hung_task_check_interval_secs
-- hung_task_warnings
-- hyperv_record_panic_msg
-- kexec_load_disabled
-- kptr_restrict
-- l2cr [ PPC only ]
-- modprobe ==> Documentation/debugging-modules.txt
-- modules_disabled
-- msg_next_id [ sysv ipc ]
-- msgmax
-- msgmnb
-- msgmni
-- nmi_watchdog
-- osrelease
-- ostype
-- overflowgid
-- overflowuid
-- panic
-- panic_on_oops
-- panic_on_stackoverflow
-- panic_on_unrecovered_nmi
-- panic_on_warn
-- panic_print
-- panic_on_rcu_stall
-- perf_cpu_time_max_percent
-- perf_event_paranoid
-- perf_event_max_stack
-- perf_event_mlock_kb
-- perf_event_max_contexts_per_stack
-- pid_max
-- powersave-nap [ PPC only ]
-- printk
-- printk_delay
-- printk_ratelimit
-- printk_ratelimit_burst
-- pty ==> Documentation/filesystems/devpts.txt
-- randomize_va_space
-- real-root-dev ==> Documentation/admin-guide/initrd.rst
-- reboot-cmd [ SPARC only ]
-- rtsig-max
-- rtsig-nr
-- sched_energy_aware
-- seccomp/ ==> Documentation/userspace-api/seccomp_filter.rst
-- sem
-- sem_next_id [ sysv ipc ]
-- sg-big-buff [ generic SCSI device (sg) ]
-- shm_next_id [ sysv ipc ]
-- shm_rmid_forced
-- shmall
-- shmmax [ sysv ipc ]
-- shmmni
-- softlockup_all_cpu_backtrace
-- soft_watchdog
-- stack_erasing
-- stop-a [ SPARC only ]
-- sysrq ==> Documentation/admin-guide/sysrq.rst
-- sysctl_writes_strict
-- tainted ==> Documentation/admin-guide/tainted-kernels.rst
-- threads-max
-- unknown_nmi_panic
-- watchdog
-- watchdog_thresh
-- version
-
-
-acct:
-=====
+show up in ``/proc/sys/kernel``:
+
+.. contents:: :local:
+
+
+acct
+====
+
+::
-highwater lowwater frequency
+ highwater lowwater frequency
If BSD-style process accounting is enabled these values control
its behaviour. If free space on filesystem where the log lives
-goes below <lowwater>% accounting suspends. If free space gets
-above <highwater>% accounting resumes. <Frequency> determines
+goes below ``lowwater``% accounting suspends. If free space gets
+above ``highwater``% accounting resumes. ``frequency`` determines
how often do we check the amount of free space (value is in
seconds). Default:
-4 2 30
-That is, suspend accounting if there left <= 2% free; resume it
-if we got >=4%; consider information about amount of free space
-valid for 30 seconds.
+::
-acpi_video_flags:
-=================
+ 4 2 30
+
+That is, suspend accounting if free space drops below 2%; resume it
+if it increases to at least 4%; consider information about amount of
+free space valid for 30 seconds.
-flags
-See Doc*/kernel/power/video.txt, it allows mode of video boot to be
-set during run time.
+acpi_video_flags
+================
+See :doc:`/power/video`. This allows the video resume mode to be set,
+in a similar fashion to the ``acpi_sleep`` kernel parameter, by
+combining the following values:
+
+= =======
+1 s3_bios
+2 s3_mode
+4 s3_beep
+= =======
-auto_msgmni:
-============
+
+auto_msgmni
+===========
This variable has no effect and may be removed in future kernel
releases. Reading it always returns 0.
-Up to Linux 3.17, it enabled/disabled automatic recomputing of msgmni
-upon memory add/remove or upon ipc namespace creation/removal.
+Up to Linux 3.17, it enabled/disabled automatic recomputing of
+`msgmni`_
+upon memory add/remove or upon IPC namespace creation/removal.
Echoing "1" into this file enabled msgmni automatic recomputing.
-Echoing "0" turned it off. auto_msgmni default value was 1.
-
+Echoing "0" turned it off. The default value was 1.
-bootloader_type:
-================
-x86 bootloader identification
+bootloader_type (x86 only)
+==========================
This gives the bootloader type number as indicated by the bootloader,
shifted left by 4, and OR'd with the low four bits of the bootloader
version. The reason for this encoding is that this used to match the
-type_of_loader field in the kernel header; the encoding is kept for
+``type_of_loader`` field in the kernel header; the encoding is kept for
backwards compatibility. That is, if the full bootloader type number
is 0x15 and the full version number is 0x234, this file will contain
the value 340 = 0x154.
-See the type_of_loader and ext_loader_type fields in
-Documentation/x86/boot.rst for additional information.
-
+See the ``type_of_loader`` and ``ext_loader_type`` fields in
+:doc:`/x86/boot` for additional information.
-bootloader_version:
-===================
-x86 bootloader version
+bootloader_version (x86 only)
+=============================
The complete bootloader version number. In the example above, this
file will contain the value 564 = 0x234.
-See the type_of_loader and ext_loader_ver fields in
-Documentation/x86/boot.rst for additional information.
+See the ``type_of_loader`` and ``ext_loader_ver`` fields in
+:doc:`/x86/boot` for additional information.
-cap_last_cap:
-=============
+cap_last_cap
+============
Highest valid capability of the running kernel. Exports
-CAP_LAST_CAP from the kernel.
+``CAP_LAST_CAP`` from the kernel.
-core_pattern:
-=============
+core_pattern
+============
-core_pattern is used to specify a core dumpfile pattern name.
+``core_pattern`` is used to specify a core dumpfile pattern name.
* max length 127 characters; default value is "core"
-* core_pattern is used as a pattern template for the output filename;
- certain string patterns (beginning with '%') are substituted with
- their actual values.
-* backward compatibility with core_uses_pid:
+* ``core_pattern`` is used as a pattern template for the output
+ filename; certain string patterns (beginning with '%') are
+ substituted with their actual values.
+* backward compatibility with ``core_uses_pid``:
- If core_pattern does not include "%p" (default does not)
- and core_uses_pid is set, then .PID will be appended to
+ If ``core_pattern`` does not include "%p" (default does not)
+ and ``core_uses_pid`` is set, then .PID will be appended to
the filename.
-* corename format specifiers::
-
- %<NUL> '%' is dropped
- %% output one '%'
- %p pid
- %P global pid (init PID namespace)
- %i tid
- %I global tid (init PID namespace)
- %u uid (in initial user namespace)
- %g gid (in initial user namespace)
- %d dump mode, matches PR_SET_DUMPABLE and
- /proc/sys/fs/suid_dumpable
- %s signal number
- %t UNIX time of dump
- %h hostname
- %e executable filename (may be shortened)
- %E executable path
- %<OTHER> both are dropped
+* corename format specifiers
+
+ ======== ==========================================
+ %<NUL> '%' is dropped
+ %% output one '%'
+ %p pid
+ %P global pid (init PID namespace)
+ %i tid
+ %I global tid (init PID namespace)
+ %u uid (in initial user namespace)
+ %g gid (in initial user namespace)
+ %d dump mode, matches ``PR_SET_DUMPABLE`` and
+ ``/proc/sys/fs/suid_dumpable``
+ %s signal number
+ %t UNIX time of dump
+ %h hostname
+ %e executable filename (may be shortened)
+ %E executable path
+ %c maximum size of core file by resource limit RLIMIT_CORE
+ %<OTHER> both are dropped
+ ======== ==========================================
* If the first character of the pattern is a '|', the kernel will treat
the rest of the pattern as a command to run. The core dump will be
written to the standard input of that program instead of to a file.
-core_pipe_limit:
-================
+core_pipe_limit
+===============
-This sysctl is only applicable when core_pattern is configured to pipe
-core files to a user space helper (when the first character of
-core_pattern is a '|', see above). When collecting cores via a pipe
-to an application, it is occasionally useful for the collecting
-application to gather data about the crashing process from its
-/proc/pid directory. In order to do this safely, the kernel must wait
-for the collecting process to exit, so as not to remove the crashing
-processes proc files prematurely. This in turn creates the
-possibility that a misbehaving userspace collecting process can block
-the reaping of a crashed process simply by never exiting. This sysctl
-defends against that. It defines how many concurrent crashing
-processes may be piped to user space applications in parallel. If
-this value is exceeded, then those crashing processes above that value
-are noted via the kernel log and their cores are skipped. 0 is a
-special value, indicating that unlimited processes may be captured in
-parallel, but that no waiting will take place (i.e. the collecting
-process is not guaranteed access to /proc/<crashing pid>/). This
-value defaults to 0.
-
-
-core_uses_pid:
-==============
+This sysctl is only applicable when `core_pattern`_ is configured to
+pipe core files to a user space helper (when the first character of
+``core_pattern`` is a '|', see above).
+When collecting cores via a pipe to an application, it is occasionally
+useful for the collecting application to gather data about the
+crashing process from its ``/proc/pid`` directory.
+In order to do this safely, the kernel must wait for the collecting
+process to exit, so as not to remove the crashing processes proc files
+prematurely.
+This in turn creates the possibility that a misbehaving userspace
+collecting process can block the reaping of a crashed process simply
+by never exiting.
+This sysctl defends against that.
+It defines how many concurrent crashing processes may be piped to user
+space applications in parallel.
+If this value is exceeded, then those crashing processes above that
+value are noted via the kernel log and their cores are skipped.
+0 is a special value, indicating that unlimited processes may be
+captured in parallel, but that no waiting will take place (i.e. the
+collecting process is not guaranteed access to ``/proc/<crashing
+pid>/``).
+This value defaults to 0.
+
+
+core_uses_pid
+=============
The default coredump filename is "core". By setting
-core_uses_pid to 1, the coredump filename becomes core.PID.
-If core_pattern does not include "%p" (default does not)
-and core_uses_pid is set, then .PID will be appended to
+``core_uses_pid`` to 1, the coredump filename becomes core.PID.
+If `core_pattern`_ does not include "%p" (default does not)
+and ``core_uses_pid`` is set, then .PID will be appended to
the filename.
-ctrl-alt-del:
-=============
+ctrl-alt-del
+============
When the value in this file is 0, ctrl-alt-del is trapped and
-sent to the init(1) program to handle a graceful restart.
+sent to the ``init(1)`` program to handle a graceful restart.
When, however, the value is > 0, Linux's reaction to a Vulcan
Nerve Pinch (tm) will be an immediate reboot, without even
syncing its dirty buffers.
@@ -269,21 +204,22 @@ Note:
to decide what to do with it.
-dmesg_restrict:
-===============
+dmesg_restrict
+==============
This toggle indicates whether unprivileged users are prevented
-from using dmesg(8) to view messages from the kernel's log buffer.
-When dmesg_restrict is set to (0) there are no restrictions. When
-dmesg_restrict is set set to (1), users must have CAP_SYSLOG to use
-dmesg(8).
+from using ``dmesg(8)`` to view messages from the kernel's log
+buffer.
+When ``dmesg_restrict`` is set to 0 there are no restrictions.
+When ``dmesg_restrict`` is set set to 1, users must have
+``CAP_SYSLOG`` to use ``dmesg(8)``.
-The kernel config option CONFIG_SECURITY_DMESG_RESTRICT sets the
-default value of dmesg_restrict.
+The kernel config option ``CONFIG_SECURITY_DMESG_RESTRICT`` sets the
+default value of ``dmesg_restrict``.
-domainname & hostname:
-======================
+domainname & hostname
+=====================
These files can be used to set the NIS/YP domainname and the
hostname of your box in exactly the same way as the commands
@@ -302,167 +238,206 @@ hostname "darkstar" and DNS (Internet Domain Name Server)
domainname "frop.org", not to be confused with the NIS (Network
Information Service) or YP (Yellow Pages) domainname. These two
domain names are in general different. For a detailed discussion
-see the hostname(1) man page.
+see the ``hostname(1)`` man page.
-hardlockup_all_cpu_backtrace:
-=============================
+hardlockup_all_cpu_backtrace
+============================
This value controls the hard lockup detector behavior when a hard
lockup condition is detected as to whether or not to gather further
debug information. If enabled, arch-specific all-CPU stack dumping
will be initiated.
-0: do nothing. This is the default behavior.
-
-1: on detection capture more debug information.
+= ============================================
+0 Do nothing. This is the default behavior.
+1 On detection capture more debug information.
+= ============================================
-hardlockup_panic:
-=================
+hardlockup_panic
+================
This parameter can be used to control whether the kernel panics
when a hard lockup is detected.
- 0 - don't panic on hard lockup
- 1 - panic on hard lockup
+= ===========================
+0 Don't panic on hard lockup.
+1 Panic on hard lockup.
+= ===========================
-See Documentation/admin-guide/lockup-watchdogs.rst for more information. This can
-also be set using the nmi_watchdog kernel parameter.
+See :doc:`/admin-guide/lockup-watchdogs` for more information.
+This can also be set using the nmi_watchdog kernel parameter.
-hotplug:
-========
+hotplug
+=======
Path for the hotplug policy agent.
-Default value is "/sbin/hotplug".
+Default value is "``/sbin/hotplug``".
-hung_task_panic:
-================
+hung_task_panic
+===============
Controls the kernel's behavior when a hung task is detected.
-This file shows up if CONFIG_DETECT_HUNG_TASK is enabled.
-
-0: continue operation. This is the default behavior.
+This file shows up if ``CONFIG_DETECT_HUNG_TASK`` is enabled.
-1: panic immediately.
+= =================================================
+0 Continue operation. This is the default behavior.
+1 Panic immediately.
+= =================================================
-hung_task_check_count:
-======================
+hung_task_check_count
+=====================
The upper bound on the number of tasks that are checked.
-This file shows up if CONFIG_DETECT_HUNG_TASK is enabled.
+This file shows up if ``CONFIG_DETECT_HUNG_TASK`` is enabled.
-hung_task_timeout_secs:
-=======================
+hung_task_timeout_secs
+======================
When a task in D state did not get scheduled
for more than this value report a warning.
-This file shows up if CONFIG_DETECT_HUNG_TASK is enabled.
+This file shows up if ``CONFIG_DETECT_HUNG_TASK`` is enabled.
-0: means infinite timeout - no checking done.
+0 means infinite timeout, no checking is done.
-Possible values to set are in range {0..LONG_MAX/HZ}.
+Possible values to set are in range {0:``LONG_MAX``/``HZ``}.
-hung_task_check_interval_secs:
-==============================
+hung_task_check_interval_secs
+=============================
Hung task check interval. If hung task checking is enabled
-(see hung_task_timeout_secs), the check is done every
-hung_task_check_interval_secs seconds.
-This file shows up if CONFIG_DETECT_HUNG_TASK is enabled.
+(see `hung_task_timeout_secs`_), the check is done every
+``hung_task_check_interval_secs`` seconds.
+This file shows up if ``CONFIG_DETECT_HUNG_TASK`` is enabled.
-0 (default): means use hung_task_timeout_secs as checking interval.
-Possible values to set are in range {0..LONG_MAX/HZ}.
+0 (default) means use ``hung_task_timeout_secs`` as checking
+interval.
+Possible values to set are in range {0:``LONG_MAX``/``HZ``}.
-hung_task_warnings:
-===================
+
+hung_task_warnings
+==================
The maximum number of warnings to report. During a check interval
if a hung task is detected, this value is decreased by 1.
When this value reaches 0, no more warnings will be reported.
-This file shows up if CONFIG_DETECT_HUNG_TASK is enabled.
+This file shows up if ``CONFIG_DETECT_HUNG_TASK`` is enabled.
-1: report an infinite number of warnings.
-hyperv_record_panic_msg:
-========================
+hyperv_record_panic_msg
+=======================
Controls whether the panic kmsg data should be reported to Hyper-V.
-0: do not report panic kmsg data.
+= =========================================================
+0 Do not report panic kmsg data.
+1 Report the panic kmsg data. This is the default behavior.
+= =========================================================
-1: report the panic kmsg data. This is the default behavior.
+kexec_load_disabled
+===================
-kexec_load_disabled:
-====================
-
-A toggle indicating if the kexec_load syscall has been disabled. This
-value defaults to 0 (false: kexec_load enabled), but can be set to 1
-(true: kexec_load disabled). Once true, kexec can no longer be used, and
-the toggle cannot be set back to false. This allows a kexec image to be
-loaded before disabling the syscall, allowing a system to set up (and
-later use) an image without it being altered. Generally used together
-with the "modules_disabled" sysctl.
+A toggle indicating if the ``kexec_load`` syscall has been disabled.
+This value defaults to 0 (false: ``kexec_load`` enabled), but can be
+set to 1 (true: ``kexec_load`` disabled).
+Once true, kexec can no longer be used, and the toggle cannot be set
+back to false.
+This allows a kexec image to be loaded before disabling the syscall,
+allowing a system to set up (and later use) an image without it being
+altered.
+Generally used together with the `modules_disabled`_ sysctl.
-kptr_restrict:
-==============
+kptr_restrict
+=============
This toggle indicates whether restrictions are placed on
-exposing kernel addresses via /proc and other interfaces.
+exposing kernel addresses via ``/proc`` and other interfaces.
+
+When ``kptr_restrict`` is set to 0 (the default) the address is hashed
+before printing.
+(This is the equivalent to %p.)
+
+When ``kptr_restrict`` is set to 1, kernel pointers printed using the
+%pK format specifier will be replaced with 0s unless the user has
+``CAP_SYSLOG`` and effective user and group ids are equal to the real
+ids.
+This is because %pK checks are done at read() time rather than open()
+time, so if permissions are elevated between the open() and the read()
+(e.g via a setuid binary) then %pK will not leak kernel pointers to
+unprivileged users.
+Note, this is a temporary solution only.
+The correct long-term solution is to do the permission checks at
+open() time.
+Consider removing world read permissions from files that use %pK, and
+using `dmesg_restrict`_ to protect against uses of %pK in ``dmesg(8)``
+if leaking kernel pointer values to unprivileged users is a concern.
+
+When ``kptr_restrict`` is set to 2, kernel pointers printed using
+%pK will be replaced with 0s regardless of privileges.
+
+
+modprobe
+========
-When kptr_restrict is set to 0 (the default) the address is hashed before
-printing. (This is the equivalent to %p.)
+This gives the full path of the modprobe command which the kernel will
+use to load modules. This can be used to debug module loading
+requests::
-When kptr_restrict is set to (1), kernel pointers printed using the %pK
-format specifier will be replaced with 0's unless the user has CAP_SYSLOG
-and effective user and group ids are equal to the real ids. This is
-because %pK checks are done at read() time rather than open() time, so
-if permissions are elevated between the open() and the read() (e.g via
-a setuid binary) then %pK will not leak kernel pointers to unprivileged
-users. Note, this is a temporary solution only. The correct long-term
-solution is to do the permission checks at open() time. Consider removing
-world read permissions from files that use %pK, and using dmesg_restrict
-to protect against uses of %pK in dmesg(8) if leaking kernel pointer
-values to unprivileged users is a concern.
+ echo '#! /bin/sh' > /tmp/modprobe
+ echo 'echo "$@" >> /tmp/modprobe.log' >> /tmp/modprobe
+ echo 'exec /sbin/modprobe "$@"' >> /tmp/modprobe
+ chmod a+x /tmp/modprobe
+ echo /tmp/modprobe > /proc/sys/kernel/modprobe
-When kptr_restrict is set to (2), kernel pointers printed using
-%pK will be replaced with 0's regardless of privileges.
+This only applies when the *kernel* is requesting that the module be
+loaded; it won't have any effect if the module is being loaded
+explicitly using ``modprobe`` from userspace.
-l2cr: (PPC only)
+modules_disabled
================
-This flag controls the L2 cache of G3 processor boards. If
-0, the cache is disabled. Enabled if nonzero.
-
-
-modules_disabled:
-=================
-
A toggle value indicating if modules are allowed to be loaded
in an otherwise modular kernel. This toggle defaults to off
(0), but can be set true (1). Once true, modules can be
neither loaded nor unloaded, and the toggle cannot be set back
-to false. Generally used with the "kexec_load_disabled" toggle.
+to false. Generally used with the `kexec_load_disabled`_ toggle.
+
+
+.. _msgmni:
+
+msgmax, msgmnb, and msgmni
+==========================
+
+``msgmax`` is the maximum size of an IPC message, in bytes. 8192 by
+default (``MSGMAX``).
+``msgmnb`` is the maximum size of an IPC queue, in bytes. 16384 by
+default (``MSGMNB``).
-msg_next_id, sem_next_id, and shm_next_id:
-==========================================
+``msgmni`` is the maximum number of IPC queues. 32000 by default
+(``MSGMNI``).
+
+
+msg_next_id, sem_next_id, and shm_next_id (System V IPC)
+========================================================
These three toggles allows to specify desired id for next allocated IPC
object: message, semaphore or shared memory respectively.
By default they are equal to -1, which means generic allocation logic.
-Possible values to set are in range {0..INT_MAX}.
+Possible values to set are in range {0:``INT_MAX``}.
Notes:
1) kernel doesn't guarantee, that new object will have desired id. So,
@@ -472,15 +447,16 @@ Notes:
fails, it is undefined if the value remains unmodified or is reset to -1.
-nmi_watchdog:
-=============
+nmi_watchdog
+============
This parameter can be used to control the NMI watchdog
(i.e. the hard lockup detector) on x86 systems.
-0 - disable the hard lockup detector
-
-1 - enable the hard lockup detector
+= =================================
+0 Disable the hard lockup detector.
+1 Enable the hard lockup detector.
+= =================================
The hard lockup detector monitors each CPU for its ability to respond to
timer interrupts. The mechanism utilizes CPU performance counter registers
@@ -492,11 +468,11 @@ in a KVM virtual machine. This default can be overridden by adding::
nmi_watchdog=1
-to the guest kernel command line (see Documentation/admin-guide/kernel-parameters.rst).
+to the guest kernel command line (see :doc:`/admin-guide/kernel-parameters`).
-numa_balancing:
-===============
+numa_balancing
+==============
Enables/disables automatic page fault based NUMA memory
balancing. Memory is moved automatically to nodes
@@ -514,9 +490,10 @@ ideally is offset by improved memory locality but there is no universal
guarantee. If the target workload is already bound to NUMA nodes then this
feature should be disabled. Otherwise, if the system overhead from the
feature is too high then the rate the kernel samples for NUMA hinting
-faults may be controlled by the numa_balancing_scan_period_min_ms,
+faults may be controlled by the `numa_balancing_scan_period_min_ms,
numa_balancing_scan_delay_ms, numa_balancing_scan_period_max_ms,
-numa_balancing_scan_size_mb, and numa_balancing_settle_count sysctls.
+numa_balancing_scan_size_mb`_, and numa_balancing_settle_count sysctls.
+
numa_balancing_scan_period_min_ms, numa_balancing_scan_delay_ms, numa_balancing_scan_period_max_ms, numa_balancing_scan_size_mb
===============================================================================================================================
@@ -542,23 +519,23 @@ workload pattern changes and minimises performance impact due to remote
memory accesses. These sysctls control the thresholds for scan delays and
the number of pages scanned.
-numa_balancing_scan_period_min_ms is the minimum time in milliseconds to
+``numa_balancing_scan_period_min_ms`` is the minimum time in milliseconds to
scan a tasks virtual memory. It effectively controls the maximum scanning
rate for each task.
-numa_balancing_scan_delay_ms is the starting "scan delay" used for a task
+``numa_balancing_scan_delay_ms`` is the starting "scan delay" used for a task
when it initially forks.
-numa_balancing_scan_period_max_ms is the maximum time in milliseconds to
+``numa_balancing_scan_period_max_ms`` is the maximum time in milliseconds to
scan a tasks virtual memory. It effectively controls the minimum scanning
rate for each task.
-numa_balancing_scan_size_mb is how many megabytes worth of pages are
+``numa_balancing_scan_size_mb`` is how many megabytes worth of pages are
scanned for a given scan.
-osrelease, ostype & version:
-============================
+osrelease, ostype & version
+===========================
::
@@ -569,15 +546,16 @@ osrelease, ostype & version:
# cat version
#5 Wed Feb 25 21:49:24 MET 1998
-The files osrelease and ostype should be clear enough. Version
+The files ``osrelease`` and ``ostype`` should be clear enough.
+``version``
needs a little more clarification however. The '#5' means that
this is the fifth kernel built from this source base and the
date behind it indicates the time the kernel was built.
The only way to tune these values is to rebuild the kernel :-)
-overflowgid & overflowuid:
-==========================
+overflowgid & overflowuid
+=========================
if your architecture did not always support 32-bit UIDs (i.e. arm,
i386, m68k, sh, and sparc32), a fixed UID and GID will be returned to
@@ -588,108 +566,119 @@ These sysctls allow you to change the value of the fixed UID and GID.
The default is 65534.
+panic
+=====
+
+The value in this file determines the behaviour of the kernel on a
panic:
-======
-The value in this file represents the number of seconds the kernel
-waits before rebooting on a panic. When you use the software watchdog,
-the recommended setting is 60.
+* if zero, the kernel will loop forever;
+* if negative, the kernel will reboot immediately;
+* if positive, the kernel will reboot after the corresponding number
+ of seconds.
+When you use the software watchdog, the recommended setting is 60.
-panic_on_io_nmi:
-================
+
+panic_on_io_nmi
+===============
Controls the kernel's behavior when a CPU receives an NMI caused by
an IO error.
-0: try to continue operation (default)
-
-1: panic immediately. The IO error triggered an NMI. This indicates a
- serious system condition which could result in IO data corruption.
- Rather than continuing, panicking might be a better choice. Some
- servers issue this sort of NMI when the dump button is pushed,
- and you can use this option to take a crash dump.
+= ==================================================================
+0 Try to continue operation (default).
+1 Panic immediately. The IO error triggered an NMI. This indicates a
+ serious system condition which could result in IO data corruption.
+ Rather than continuing, panicking might be a better choice. Some
+ servers issue this sort of NMI when the dump button is pushed,
+ and you can use this option to take a crash dump.
+= ==================================================================
-panic_on_oops:
-==============
+panic_on_oops
+=============
Controls the kernel's behaviour when an oops or BUG is encountered.
-0: try to continue operation
-
-1: panic immediately. If the `panic` sysctl is also non-zero then the
- machine will be rebooted.
+= ===================================================================
+0 Try to continue operation.
+1 Panic immediately. If the `panic` sysctl is also non-zero then the
+ machine will be rebooted.
+= ===================================================================
-panic_on_stackoverflow:
-=======================
+panic_on_stackoverflow
+======================
Controls the kernel's behavior when detecting the overflows of
kernel, IRQ and exception stacks except a user stack.
-This file shows up if CONFIG_DEBUG_STACKOVERFLOW is enabled.
-
-0: try to continue operation.
+This file shows up if ``CONFIG_DEBUG_STACKOVERFLOW`` is enabled.
-1: panic immediately.
+= ==========================
+0 Try to continue operation.
+1 Panic immediately.
+= ==========================
-panic_on_unrecovered_nmi:
-=========================
+panic_on_unrecovered_nmi
+========================
The default Linux behaviour on an NMI of either memory or unknown is
to continue operation. For many environments such as scientific
computing it is preferable that the box is taken out and the error
dealt with than an uncorrected parity/ECC error get propagated.
-A small number of systems do generate NMI's for bizarre random reasons
+A small number of systems do generate NMIs for bizarre random reasons
such as power management so the default is off. That sysctl works like
the existing panic controls already in that directory.
-panic_on_warn:
-==============
+panic_on_warn
+=============
Calls panic() in the WARN() path when set to 1. This is useful to avoid
a kernel rebuild when attempting to kdump at the location of a WARN().
-0: only WARN(), default behaviour.
-
-1: call panic() after printing out WARN() location.
+= ================================================
+0 Only WARN(), default behaviour.
+1 Call panic() after printing out WARN() location.
+= ================================================
-panic_print:
-============
+panic_print
+===========
Bitmask for printing system info when panic happens. User can chose
combination of the following bits:
-===== ========================================
+===== ============================================
bit 0 print all tasks info
bit 1 print system memory info
bit 2 print timer info
-bit 3 print locks info if CONFIG_LOCKDEP is on
+bit 3 print locks info if ``CONFIG_LOCKDEP`` is on
bit 4 print ftrace buffer
-===== ========================================
+===== ============================================
So for example to print tasks and memory info on panic, user can::
echo 3 > /proc/sys/kernel/panic_print
-panic_on_rcu_stall:
-===================
+panic_on_rcu_stall
+==================
When set to 1, calls panic() after RCU stall detection messages. This
is useful to define the root cause of RCU stalls using a vmcore.
-0: do not panic() when RCU stall takes place, default behavior.
+= ============================================================
+0 Do not panic() when RCU stall takes place, default behavior.
+1 panic() after printing RCU stall messages.
+= ============================================================
-1: panic() after printing RCU stall messages.
-
-perf_cpu_time_max_percent:
-==========================
+perf_cpu_time_max_percent
+=========================
Hints to the kernel how much CPU time it should be allowed to
use to handle perf sampling events. If the perf subsystem
@@ -702,171 +691,179 @@ unexpectedly take too long to execute, the NMIs can become
stacked up next to each other so much that nothing else is
allowed to execute.
-0:
- disable the mechanism. Do not monitor or correct perf's
- sampling rate no matter how CPU time it takes.
+===== ========================================================
+0 Disable the mechanism. Do not monitor or correct perf's
+ sampling rate no matter how CPU time it takes.
-1-100:
- attempt to throttle perf's sample rate to this
- percentage of CPU. Note: the kernel calculates an
- "expected" length of each sample event. 100 here means
- 100% of that expected length. Even if this is set to
- 100, you may still see sample throttling if this
- length is exceeded. Set to 0 if you truly do not care
- how much CPU is consumed.
+1-100 Attempt to throttle perf's sample rate to this
+ percentage of CPU. Note: the kernel calculates an
+ "expected" length of each sample event. 100 here means
+ 100% of that expected length. Even if this is set to
+ 100, you may still see sample throttling if this
+ length is exceeded. Set to 0 if you truly do not care
+ how much CPU is consumed.
+===== ========================================================
-perf_event_paranoid:
-====================
+perf_event_paranoid
+===================
Controls use of the performance events system by unprivileged
users (without CAP_SYS_ADMIN). The default value is 2.
=== ==================================================================
- -1 Allow use of (almost) all events by all users
+ -1 Allow use of (almost) all events by all users.
- Ignore mlock limit after perf_event_mlock_kb without CAP_IPC_LOCK
+ Ignore mlock limit after perf_event_mlock_kb without
+ ``CAP_IPC_LOCK``.
->=0 Disallow ftrace function tracepoint by users without CAP_SYS_ADMIN
+>=0 Disallow ftrace function tracepoint by users without
+ ``CAP_SYS_ADMIN``.
- Disallow raw tracepoint access by users without CAP_SYS_ADMIN
+ Disallow raw tracepoint access by users without ``CAP_SYS_ADMIN``.
->=1 Disallow CPU event access by users without CAP_SYS_ADMIN
+>=1 Disallow CPU event access by users without ``CAP_SYS_ADMIN``.
->=2 Disallow kernel profiling by users without CAP_SYS_ADMIN
+>=2 Disallow kernel profiling by users without ``CAP_SYS_ADMIN``.
=== ==================================================================
-perf_event_max_stack:
-=====================
+perf_event_max_stack
+====================
-Controls maximum number of stack frames to copy for (attr.sample_type &
-PERF_SAMPLE_CALLCHAIN) configured events, for instance, when using
-'perf record -g' or 'perf trace --call-graph fp'.
+Controls maximum number of stack frames to copy for (``attr.sample_type &
+PERF_SAMPLE_CALLCHAIN``) configured events, for instance, when using
+'``perf record -g``' or '``perf trace --call-graph fp``'.
This can only be done when no events are in use that have callchains
-enabled, otherwise writing to this file will return -EBUSY.
+enabled, otherwise writing to this file will return ``-EBUSY``.
The default value is 127.
-perf_event_mlock_kb:
-====================
+perf_event_mlock_kb
+===================
Control size of per-cpu ring buffer not counted agains mlock limit.
The default value is 512 + 1 page
-perf_event_max_contexts_per_stack:
-==================================
+perf_event_max_contexts_per_stack
+=================================
Controls maximum number of stack frame context entries for
-(attr.sample_type & PERF_SAMPLE_CALLCHAIN) configured events, for
-instance, when using 'perf record -g' or 'perf trace --call-graph fp'.
+(``attr.sample_type & PERF_SAMPLE_CALLCHAIN``) configured events, for
+instance, when using '``perf record -g``' or '``perf trace --call-graph fp``'.
This can only be done when no events are in use that have callchains
-enabled, otherwise writing to this file will return -EBUSY.
+enabled, otherwise writing to this file will return ``-EBUSY``.
The default value is 8.
-pid_max:
-========
+pid_max
+=======
PID allocation wrap value. When the kernel's next PID value
reaches this value, it wraps back to a minimum PID value.
-PIDs of value pid_max or larger are not allocated.
+PIDs of value ``pid_max`` or larger are not allocated.
-ns_last_pid:
-============
+ns_last_pid
+===========
The last pid allocated in the current (the one task using this sysctl
lives in) pid namespace. When selecting a pid for a next task on fork
kernel tries to allocate a number starting from this one.
-powersave-nap: (PPC only)
-=========================
+powersave-nap (PPC only)
+========================
If set, Linux-PPC will use the 'nap' mode of powersaving,
otherwise the 'doze' mode will be used.
+
==============================================================
-printk:
-=======
+printk
+======
-The four values in printk denote: console_loglevel,
-default_message_loglevel, minimum_console_loglevel and
-default_console_loglevel respectively.
+The four values in printk denote: ``console_loglevel``,
+``default_message_loglevel``, ``minimum_console_loglevel`` and
+``default_console_loglevel`` respectively.
These values influence printk() behavior when printing or
-logging error messages. See 'man 2 syslog' for more info on
+logging error messages. See '``man 2 syslog``' for more info on
the different loglevels.
-- console_loglevel:
- messages with a higher priority than
- this will be printed to the console
-- default_message_loglevel:
- messages without an explicit priority
- will be printed with this priority
-- minimum_console_loglevel:
- minimum (highest) value to which
- console_loglevel can be set
-- default_console_loglevel:
- default value for console_loglevel
+======================== =====================================
+console_loglevel messages with a higher priority than
+ this will be printed to the console
+default_message_loglevel messages without an explicit priority
+ will be printed with this priority
+minimum_console_loglevel minimum (highest) value to which
+ console_loglevel can be set
+default_console_loglevel default value for console_loglevel
+======================== =====================================
-printk_delay:
-=============
+printk_delay
+============
-Delay each printk message in printk_delay milliseconds
+Delay each printk message in ``printk_delay`` milliseconds
Value from 0 - 10000 is allowed.
-printk_ratelimit:
-=================
+printk_ratelimit
+================
-Some warning messages are rate limited. printk_ratelimit specifies
+Some warning messages are rate limited. ``printk_ratelimit`` specifies
the minimum length of time between these messages (in seconds).
The default value is 5 seconds.
A value of 0 will disable rate limiting.
-printk_ratelimit_burst:
-=======================
+printk_ratelimit_burst
+======================
-While long term we enforce one message per printk_ratelimit
+While long term we enforce one message per `printk_ratelimit`_
seconds, we do allow a burst of messages to pass through.
-printk_ratelimit_burst specifies the number of messages we can
+``printk_ratelimit_burst`` specifies the number of messages we can
send before ratelimiting kicks in.
The default value is 10 messages.
-printk_devkmsg:
-===============
-
-Control the logging to /dev/kmsg from userspace:
-
-ratelimit:
- default, ratelimited
+printk_devkmsg
+==============
-on: unlimited logging to /dev/kmsg from userspace
+Control the logging to ``/dev/kmsg`` from userspace:
-off: logging to /dev/kmsg disabled
+========= =============================================
+ratelimit default, ratelimited
+on unlimited logging to /dev/kmsg from userspace
+off logging to /dev/kmsg disabled
+========= =============================================
-The kernel command line parameter printk.devkmsg= overrides this and is
+The kernel command line parameter ``printk.devkmsg=`` overrides this and is
a one-time setting until next reboot: once set, it cannot be changed by
this sysctl interface anymore.
+==============================================================
-randomize_va_space:
-===================
+
+pty
+===
+
+See Documentation/filesystems/devpts.txt.
+
+
+randomize_va_space
+==================
This option can be used to select the type of process address
space randomization that is used in the system, for architectures
@@ -881,10 +878,10 @@ that support this feature.
This, among other things, implies that shared libraries will be
loaded to random addresses. Also for PIE-linked binaries, the
location of code start is randomized. This is the default if the
- CONFIG_COMPAT_BRK option is enabled.
+ ``CONFIG_COMPAT_BRK`` option is enabled.
2 Additionally enable heap randomization. This is the default if
- CONFIG_COMPAT_BRK is disabled.
+ ``CONFIG_COMPAT_BRK`` is disabled.
There are a few legacy applications out there (such as some ancient
versions of libc.so.5 from 1996) that assume that brk area starts
@@ -894,31 +891,27 @@ that support this feature.
systems it is safe to choose full randomization.
Systems with ancient and/or broken binaries should be configured
- with CONFIG_COMPAT_BRK enabled, which excludes the heap from process
+ with ``CONFIG_COMPAT_BRK`` enabled, which excludes the heap from process
address space randomization.
== ===========================================================================
-reboot-cmd: (Sparc only)
-========================
-
-??? This seems to be a way to give an argument to the Sparc
-ROM/Flash boot loader. Maybe to tell it what to do after
-rebooting. ???
+real-root-dev
+=============
+See :doc:`/admin-guide/initrd`.
-rtsig-max & rtsig-nr:
-=====================
-The file rtsig-max can be used to tune the maximum number
-of POSIX realtime (queued) signals that can be outstanding
-in the system.
+reboot-cmd (SPARC only)
+=======================
-rtsig-nr shows the number of RT signals currently queued.
+??? This seems to be a way to give an argument to the Sparc
+ROM/Flash boot loader. Maybe to tell it what to do after
+rebooting. ???
-sched_energy_aware:
-===================
+sched_energy_aware
+==================
Enables/disables Energy Aware Scheduling (EAS). EAS starts
automatically on platforms where it can run (that is,
@@ -928,75 +921,88 @@ requirements for EAS but you do not want to use it, change
this value to 0.
-sched_schedstats:
-=================
+sched_schedstats
+================
Enables/disables scheduler statistics. Enabling this feature
incurs a small amount of overhead in the scheduler but is
useful for debugging and performance tuning.
-sg-big-buff:
-============
+seccomp
+=======
+
+See :doc:`/userspace-api/seccomp_filter`.
+
+
+sg-big-buff
+===========
This file shows the size of the generic SCSI (sg) buffer.
You can't tune it just yet, but you could change it on
-compile time by editing include/scsi/sg.h and changing
-the value of SG_BIG_BUFF.
+compile time by editing ``include/scsi/sg.h`` and changing
+the value of ``SG_BIG_BUFF``.
There shouldn't be any reason to change this value. If
you can come up with one, you probably know what you
are doing anyway :)
-shmall:
-=======
+shmall
+======
This parameter sets the total amount of shared memory pages that
-can be used system wide. Hence, SHMALL should always be at least
-ceil(shmmax/PAGE_SIZE).
+can be used system wide. Hence, ``shmall`` should always be at least
+``ceil(shmmax/PAGE_SIZE)``.
-If you are not sure what the default PAGE_SIZE is on your Linux
-system, you can run the following command:
+If you are not sure what the default ``PAGE_SIZE`` is on your Linux
+system, you can run the following command::
# getconf PAGE_SIZE
-shmmax:
-=======
+shmmax
+======
This value can be used to query and set the run time limit
on the maximum shared memory segment size that can be created.
Shared memory segments up to 1Gb are now supported in the
-kernel. This value defaults to SHMMAX.
+kernel. This value defaults to ``SHMMAX``.
-shm_rmid_forced:
-================
+shmmni
+======
+
+This value determines the maximum number of shared memory segments.
+4096 by default (``SHMMNI``).
+
+
+shm_rmid_forced
+===============
Linux lets you set resource limits, including how much memory one
-process can consume, via setrlimit(2). Unfortunately, shared memory
+process can consume, via ``setrlimit(2)``. Unfortunately, shared memory
segments are allowed to exist without association with any process, and
thus might not be counted against any resource limits. If enabled,
shared memory segments are automatically destroyed when their attach
count becomes zero after a detach or a process termination. It will
also destroy segments that were created, but never attached to, on exit
-from the process. The only use left for IPC_RMID is to immediately
+from the process. The only use left for ``IPC_RMID`` is to immediately
destroy an unattached segment. Of course, this breaks the way things are
defined, so some applications might stop working. Note that this
feature will do you no good unless you also configure your resource
-limits (in particular, RLIMIT_AS and RLIMIT_NPROC). Most systems don't
+limits (in particular, ``RLIMIT_AS`` and ``RLIMIT_NPROC``). Most systems don't
need this.
Note that if you change this from 0 to 1, already created segments
without users and with a dead originative process will be destroyed.
-sysctl_writes_strict:
-=====================
+sysctl_writes_strict
+====================
Control how file position affects the behavior of updating sysctl values
-via the /proc/sys interface:
+via the ``/proc/sys`` interface:
== ======================================================================
-1 Legacy per-write sysctl value handling, with no printk warnings.
@@ -1013,8 +1019,8 @@ via the /proc/sys interface:
== ======================================================================
-softlockup_all_cpu_backtrace:
-=============================
+softlockup_all_cpu_backtrace
+============================
This value controls the soft lockup detector thread's behavior
when a soft lockup condition is detected as to whether or not
@@ -1024,43 +1030,80 @@ be issued an NMI and instructed to capture stack trace.
This feature is only applicable for architectures which support
NMI.
-0: do nothing. This is the default behavior.
+= ============================================
+0 Do nothing. This is the default behavior.
+1 On detection capture more debug information.
+= ============================================
-1: on detection capture more debug information.
+softlockup_panic
+=================
-soft_watchdog:
-==============
+This parameter can be used to control whether the kernel panics
+when a soft lockup is detected.
-This parameter can be used to control the soft lockup detector.
+= ============================================
+0 Don't panic on soft lockup.
+1 Panic on soft lockup.
+= ============================================
- 0 - disable the soft lockup detector
+This can also be set using the softlockup_panic kernel parameter.
- 1 - enable the soft lockup detector
+
+soft_watchdog
+=============
+
+This parameter can be used to control the soft lockup detector.
+
+= =================================
+0 Disable the soft lockup detector.
+1 Enable the soft lockup detector.
+= =================================
The soft lockup detector monitors CPUs for threads that are hogging the CPUs
without rescheduling voluntarily, and thus prevent the 'watchdog/N' threads
from running. The mechanism depends on the CPUs ability to respond to timer
interrupts which are needed for the 'watchdog/N' threads to be woken up by
-the watchdog timer function, otherwise the NMI watchdog - if enabled - can
+the watchdog timer function, otherwise the NMI watchdog — if enabled — can
detect a hard lockup condition.
-stack_erasing:
-==============
+stack_erasing
+=============
This parameter can be used to control kernel stack erasing at the end
-of syscalls for kernels built with CONFIG_GCC_PLUGIN_STACKLEAK.
+of syscalls for kernels built with ``CONFIG_GCC_PLUGIN_STACKLEAK``.
That erasing reduces the information which kernel stack leak bugs
can reveal and blocks some uninitialized stack variable attacks.
The tradeoff is the performance impact: on a single CPU system kernel
compilation sees a 1% slowdown, other systems and workloads may vary.
- 0: kernel stack erasing is disabled, STACKLEAK_METRICS are not updated.
+= ====================================================================
+0 Kernel stack erasing is disabled, STACKLEAK_METRICS are not updated.
+1 Kernel stack erasing is enabled (default), it is performed before
+ returning to the userspace at the end of syscalls.
+= ====================================================================
+
+
+stop-a (SPARC only)
+===================
+
+Controls Stop-A:
+
+= ====================================
+0 Stop-A has no effect.
+1 Stop-A breaks to the PROM (default).
+= ====================================
+
+Stop-A is always enabled on a panic, so that the user can return to
+the boot PROM.
- 1: kernel stack erasing is enabled (default), it is performed before
- returning to the userspace at the end of syscalls.
+
+sysrq
+=====
+
+See :doc:`/admin-guide/sysrq`.
tainted
@@ -1090,30 +1133,30 @@ ORed together. The letters are seen in "Tainted" line of Oops reports.
131072 `(T)` The kernel was built with the struct randomization plugin
====== ===== ==============================================================
-See Documentation/admin-guide/tainted-kernels.rst for more information.
+See :doc:`/admin-guide/tainted-kernels` for more information.
-threads-max:
-============
+threads-max
+===========
This value controls the maximum number of threads that can be created
-using fork().
+using ``fork()``.
During initialization the kernel sets this value such that even if the
maximum number of threads is created, the thread structures occupy only
a part (1/8th) of the available RAM pages.
-The minimum value that can be written to threads-max is 1.
+The minimum value that can be written to ``threads-max`` is 1.
-The maximum value that can be written to threads-max is given by the
-constant FUTEX_TID_MASK (0x3fffffff).
+The maximum value that can be written to ``threads-max`` is given by the
+constant ``FUTEX_TID_MASK`` (0x3fffffff).
-If a value outside of this range is written to threads-max an error
-EINVAL occurs.
+If a value outside of this range is written to ``threads-max`` an
+``EINVAL`` error occurs.
-unknown_nmi_panic:
-==================
+unknown_nmi_panic
+=================
The value in this file affects behavior of handling NMI. When the
value is non-zero, unknown NMI is trapped and then panic occurs. At
@@ -1123,37 +1166,39 @@ NMI switch that most IA32 servers have fires unknown NMI up, for
example. If a system hangs up, try pressing the NMI switch.
-watchdog:
-=========
+watchdog
+========
This parameter can be used to disable or enable the soft lockup detector
-_and_ the NMI watchdog (i.e. the hard lockup detector) at the same time.
-
- 0 - disable both lockup detectors
+*and* the NMI watchdog (i.e. the hard lockup detector) at the same time.
- 1 - enable both lockup detectors
+= ==============================
+0 Disable both lockup detectors.
+1 Enable both lockup detectors.
+= ==============================
The soft lockup detector and the NMI watchdog can also be disabled or
-enabled individually, using the soft_watchdog and nmi_watchdog parameters.
-If the watchdog parameter is read, for example by executing::
+enabled individually, using the ``soft_watchdog`` and ``nmi_watchdog``
+parameters.
+If the ``watchdog`` parameter is read, for example by executing::
cat /proc/sys/kernel/watchdog
-the output of this command (0 or 1) shows the logical OR of soft_watchdog
-and nmi_watchdog.
+the output of this command (0 or 1) shows the logical OR of
+``soft_watchdog`` and ``nmi_watchdog``.
-watchdog_cpumask:
-=================
+watchdog_cpumask
+================
This value can be used to control on which cpus the watchdog may run.
-The default cpumask is all possible cores, but if NO_HZ_FULL is
+The default cpumask is all possible cores, but if ``NO_HZ_FULL`` is
enabled in the kernel config, and cores are specified with the
-nohz_full= boot argument, those cores are excluded by default.
+``nohz_full=`` boot argument, those cores are excluded by default.
Offline cores can be included in this mask, and if the core is later
brought online, the watchdog will be started based on the mask value.
-Typically this value would only be touched in the nohz_full case
+Typically this value would only be touched in the ``nohz_full`` case
to re-enable cores that by default were not running the watchdog,
if a kernel lockup was suspected on those cores.
@@ -1164,12 +1209,12 @@ might say::
echo 0,2-4 > /proc/sys/kernel/watchdog_cpumask
-watchdog_thresh:
-================
+watchdog_thresh
+===============
This value can be used to control the frequency of hrtimer and NMI
events and the soft and hard lockup thresholds. The default threshold
is 10 seconds.
-The softlockup threshold is (2 * watchdog_thresh). Setting this
+The softlockup threshold is (``2 * watchdog_thresh``). Setting this
tunable to zero will disable lockup detection altogether.
diff --git a/Documentation/arm/tcm.rst b/Documentation/arm/tcm.rst
index effd9c7bc968..b256f9783883 100644
--- a/Documentation/arm/tcm.rst
+++ b/Documentation/arm/tcm.rst
@@ -4,18 +4,18 @@ ARM TCM (Tightly-Coupled Memory) handling in Linux
Written by Linus Walleij <linus.walleij@stericsson.com>
-Some ARM SoC:s have a so-called TCM (Tightly-Coupled Memory).
+Some ARM SoCs have a so-called TCM (Tightly-Coupled Memory).
This is usually just a few (4-64) KiB of RAM inside the ARM
processor.
-Due to being embedded inside the CPU The TCM has a
+Due to being embedded inside the CPU, the TCM has a
Harvard-architecture, so there is an ITCM (instruction TCM)
and a DTCM (data TCM). The DTCM can not contain any
instructions, but the ITCM can actually contain data.
The size of DTCM or ITCM is minimum 4KiB so the typical
minimum configuration is 4KiB ITCM and 4KiB DTCM.
-ARM CPU:s have special registers to read out status, physical
+ARM CPUs have special registers to read out status, physical
location and size of TCM memories. arch/arm/include/asm/cputype.h
defines a CPUID_TCM register that you can read out from the
system control coprocessor. Documentation from ARM can be found
diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst
index 9120e59578dc..2c08c628febd 100644
--- a/Documentation/arm64/silicon-errata.rst
+++ b/Documentation/arm64/silicon-errata.rst
@@ -110,6 +110,8 @@ stable kernels.
+----------------+-----------------+-----------------+-----------------------------+
| Cavium | ThunderX GICv3 | #23154 | CAVIUM_ERRATUM_23154 |
+----------------+-----------------+-----------------+-----------------------------+
+| Cavium | ThunderX GICv3 | #38539 | N/A |
++----------------+-----------------+-----------------+-----------------------------+
| Cavium | ThunderX Core | #27456 | CAVIUM_ERRATUM_27456 |
+----------------+-----------------+-----------------+-----------------------------+
| Cavium | ThunderX Core | #30115 | CAVIUM_ERRATUM_30115 |
diff --git a/Documentation/block/capability.rst b/Documentation/block/capability.rst
index 2cf258d64bbe..160a5148b915 100644
--- a/Documentation/block/capability.rst
+++ b/Documentation/block/capability.rst
@@ -2,17 +2,9 @@
Generic Block Device Capability
===============================
-This file documents the sysfs file block/<disk>/capability
+This file documents the sysfs file ``block/<disk>/capability``.
-capability is a hex word indicating which capabilities a specific disk
-supports. For more information on bits not listed here, see
-include/linux/genhd.h
+``capability`` is a bitfield, printed in hexadecimal, indicating which
+capabilities a specific block device supports:
-GENHD_FL_MEDIA_CHANGE_NOTIFY
-----------------------------
-
-Value: 4
-
-When this bit is set, the disk supports Asynchronous Notification
-of media change events. These events will be broadcast to user
-space via kernel uevent.
+.. kernel-doc:: include/linux/genhd.h
diff --git a/Documentation/conf.py b/Documentation/conf.py
index 3c7bdf4cd31f..9ae8e9abf846 100644
--- a/Documentation/conf.py
+++ b/Documentation/conf.py
@@ -38,7 +38,11 @@ needs_sphinx = '1.3'
# ones.
extensions = ['kerneldoc', 'rstFlatTable', 'kernel_include', 'cdomain',
'kfigure', 'sphinx.ext.ifconfig', 'automarkup',
- 'maintainers_include']
+ 'maintainers_include', 'sphinx.ext.autosectionlabel' ]
+
+# Ensure that autosectionlabel will produce unique names
+autosectionlabel_prefix_document = True
+autosectionlabel_maxdepth = 2
# The name of the math extension changed on Sphinx 1.4
if (major == 1 and minor > 3) or (major > 1):
diff --git a/Documentation/core-api/index.rst b/Documentation/core-api/index.rst
index a501dc1c90d0..0897ad12c119 100644
--- a/Documentation/core-api/index.rst
+++ b/Documentation/core-api/index.rst
@@ -8,41 +8,81 @@ This is the beginning of a manual for core kernel APIs. The conversion
Core utilities
==============
+This section has general and "core core" documentation. The first is a
+massive grab-bag of kerneldoc info left over from the docbook days; it
+should really be broken up someday when somebody finds the energy to do
+it.
+
.. toctree::
:maxdepth: 1
kernel-api
+ workqueue
+ printk-formats
+ symbol-namespaces
+
+Data structures and low-level utilities
+=======================================
+
+Library functionality that is used throughout the kernel.
+
+.. toctree::
+ :maxdepth: 1
+
+ kobject
assoc_array
+ xarray
+ idr
+ circular-buffers
+ generic-radix-tree
+ packing
+ timekeeping
+ errseq
+
+Concurrency primitives
+======================
+
+How Linux keeps everything from happening at the same time. See
+:doc:`/locking/index` for more related documentation.
+
+.. toctree::
+ :maxdepth: 1
+
atomic_ops
- cachetlb
refcount-vs-atomic
- cpu_hotplug
- idr
local_ops
- workqueue
+ padata
+ ../RCU/index
+
+Low-level hardware management
+=============================
+
+Cache management, managing CPU hotplug, etc.
+
+.. toctree::
+ :maxdepth: 1
+
+ cachetlb
+ cpu_hotplug
+ memory-hotplug
genericirq
- xarray
- librs
- genalloc
- errseq
- packing
- printk-formats
- circular-buffers
- generic-radix-tree
+ protection-keys
+
+Memory management
+=================
+
+How to allocate and use memory in the kernel. Note that there is a lot
+more memory-management documentation in :doc:`/vm/index`.
+
+.. toctree::
+ :maxdepth: 1
+
memory-allocation
mm-api
+ genalloc
pin_user_pages
- gfp_mask-from-fs-io
- timekeeping
boot-time-mm
- memory-hotplug
- protection-keys
- ../RCU/index
- gcc-plugins
- symbol-namespaces
- padata
- ioctl
-
+ gfp_mask-from-fs-io
Interfaces for kernel debugging
===============================
@@ -53,6 +93,16 @@ Interfaces for kernel debugging
debug-objects
tracepoint
+Everything else
+===============
+
+Documents that don't fit elsewhere or which have yet to be categorized.
+
+.. toctree::
+ :maxdepth: 1
+
+ librs
+
.. only:: subproject and html
Indices
diff --git a/Documentation/kobject.txt b/Documentation/core-api/kobject.rst
index ff4c25098119..1f62d4d7d966 100644
--- a/Documentation/kobject.txt
+++ b/Documentation/core-api/kobject.rst
@@ -25,7 +25,7 @@ some terms we will be working with.
usually embedded within some other structure which contains the stuff
the code is really interested in.
- No structure should EVER have more than one kobject embedded within it.
+ No structure should **EVER** have more than one kobject embedded within it.
If it does, the reference counting for the object is sure to be messed
up and incorrect, and your code will be buggy. So do not do this.
@@ -55,7 +55,7 @@ a larger, domain-specific object. To this end, kobjects will be found
embedded in other structures. If you are used to thinking of things in
object-oriented terms, kobjects can be seen as a top-level, abstract class
from which other classes are derived. A kobject implements a set of
-capabilities which are not particularly useful by themselves, but which are
+capabilities which are not particularly useful by themselves, but are
nice to have in other objects. The C language does not allow for the
direct expression of inheritance, so other techniques - such as structure
embedding - must be used.
@@ -65,12 +65,12 @@ this is analogous as to how "list_head" structs are rarely useful on
their own, but are invariably found embedded in the larger objects of
interest.)
-So, for example, the UIO code in drivers/uio/uio.c has a structure that
+So, for example, the UIO code in ``drivers/uio/uio.c`` has a structure that
defines the memory region associated with a uio device::
struct uio_map {
- struct kobject kobj;
- struct uio_mem *mem;
+ struct kobject kobj;
+ struct uio_mem *mem;
};
If you have a struct uio_map structure, finding its embedded kobject is
@@ -78,30 +78,30 @@ just a matter of using the kobj member. Code that works with kobjects will
often have the opposite problem, however: given a struct kobject pointer,
what is the pointer to the containing structure? You must avoid tricks
(such as assuming that the kobject is at the beginning of the structure)
-and, instead, use the container_of() macro, found in <linux/kernel.h>::
+and, instead, use the container_of() macro, found in ``<linux/kernel.h>``::
container_of(pointer, type, member)
where:
- * "pointer" is the pointer to the embedded kobject,
- * "type" is the type of the containing structure, and
- * "member" is the name of the structure field to which "pointer" points.
+ * ``pointer`` is the pointer to the embedded kobject,
+ * ``type`` is the type of the containing structure, and
+ * ``member`` is the name of the structure field to which ``pointer`` points.
The return value from container_of() is a pointer to the corresponding
-container type. So, for example, a pointer "kp" to a struct kobject
-embedded *within* a struct uio_map could be converted to a pointer to the
-*containing* uio_map structure with::
+container type. So, for example, a pointer ``kp`` to a struct kobject
+embedded **within** a struct uio_map could be converted to a pointer to the
+**containing** uio_map structure with::
struct uio_map *u_map = container_of(kp, struct uio_map, kobj);
-For convenience, programmers often define a simple macro for "back-casting"
+For convenience, programmers often define a simple macro for **back-casting**
kobject pointers to the containing type. Exactly this happens in the
-earlier drivers/uio/uio.c, as you can see here::
+earlier ``drivers/uio/uio.c``, as you can see here::
struct uio_map {
- struct kobject kobj;
- struct uio_mem *mem;
+ struct kobject kobj;
+ struct uio_mem *mem;
};
#define to_map(map) container_of(map, struct uio_map, kobj)
@@ -125,7 +125,7 @@ must have an associated kobj_type. After calling kobject_init(), to
register the kobject with sysfs, the function kobject_add() must be called::
int kobject_add(struct kobject *kobj, struct kobject *parent,
- const char *fmt, ...);
+ const char *fmt, ...);
This sets up the parent of the kobject and the name for the kobject
properly. If the kobject is to be associated with a specific kset,
@@ -172,13 +172,13 @@ call to kobject_uevent()::
int kobject_uevent(struct kobject *kobj, enum kobject_action action);
-Use the KOBJ_ADD action for when the kobject is first added to the kernel.
+Use the **KOBJ_ADD** action for when the kobject is first added to the kernel.
This should be done only after any attributes or children of the kobject
have been initialized properly, as userspace will instantly start to look
for them when this call happens.
When the kobject is removed from the kernel (details on how to do that are
-below), the uevent for KOBJ_REMOVE will be automatically created by the
+below), the uevent for **KOBJ_REMOVE** will be automatically created by the
kobject core, so the caller does not have to worry about doing that by
hand.
@@ -238,7 +238,7 @@ Both types of attributes used here, with a kobject that has been created
with the kobject_create_and_add(), can be of type kobj_attribute, so no
special custom attribute is needed to be created.
-See the example module, samples/kobject/kobject-example.c for an
+See the example module, ``samples/kobject/kobject-example.c`` for an
implementation of a simple kobject and attributes.
@@ -270,10 +270,10 @@ such a method has a form like::
void my_object_release(struct kobject *kobj)
{
- struct my_object *mine = container_of(kobj, struct my_object, kobj);
+ struct my_object *mine = container_of(kobj, struct my_object, kobj);
- /* Perform any additional cleanup on this object, then... */
- kfree(mine);
+ /* Perform any additional cleanup on this object, then... */
+ kfree(mine);
}
One important point cannot be overstated: every kobject must have a
@@ -297,11 +297,11 @@ instead, it is associated with the ktype. So let us introduce struct
kobj_type::
struct kobj_type {
- void (*release)(struct kobject *kobj);
- const struct sysfs_ops *sysfs_ops;
- struct attribute **default_attrs;
- const struct kobj_ns_type_operations *(*child_ns_type)(struct kobject *kobj);
- const void *(*namespace)(struct kobject *kobj);
+ void (*release)(struct kobject *kobj);
+ const struct sysfs_ops *sysfs_ops;
+ struct attribute **default_attrs;
+ const struct kobj_ns_type_operations *(*child_ns_type)(struct kobject *kobj);
+ const void *(*namespace)(struct kobject *kobj);
};
This structure is used to describe a particular type of kobject (or, more
@@ -352,8 +352,8 @@ created and never declared statically or on the stack. To create a new
kset use::
struct kset *kset_create_and_add(const char *name,
- struct kset_uevent_ops *u,
- struct kobject *parent);
+ struct kset_uevent_ops *u,
+ struct kobject *parent);
When you are finished with the kset, call::
@@ -365,16 +365,16 @@ Because other references to the kset may still exist, the release may happen
after kset_unregister() returns.
An example of using a kset can be seen in the
-samples/kobject/kset-example.c file in the kernel tree.
+``samples/kobject/kset-example.c`` file in the kernel tree.
If a kset wishes to control the uevent operations of the kobjects
associated with it, it can use the struct kset_uevent_ops to handle it::
struct kset_uevent_ops {
- int (*filter)(struct kset *kset, struct kobject *kobj);
- const char *(*name)(struct kset *kset, struct kobject *kobj);
- int (*uevent)(struct kset *kset, struct kobject *kobj,
- struct kobj_uevent_env *env);
+ int (*filter)(struct kset *kset, struct kobject *kobj);
+ const char *(*name)(struct kset *kset, struct kobject *kobj);
+ int (*uevent)(struct kset *kset, struct kobject *kobj,
+ struct kobj_uevent_env *env);
};
@@ -408,8 +408,8 @@ Kobject removal
After a kobject has been registered with the kobject core successfully, it
must be cleaned up when the code is finished with it. To do that, call
kobject_put(). By doing this, the kobject core will automatically clean up
-all of the memory allocated by this kobject. If a KOBJ_ADD uevent has been
-sent for the object, a corresponding KOBJ_REMOVE uevent will be sent, and
+all of the memory allocated by this kobject. If a ``KOBJ_ADD`` uevent has been
+sent for the object, a corresponding ``KOBJ_REMOVE`` uevent will be sent, and
any other sysfs housekeeping will be handled for the caller properly.
If you need to do a two-stage delete of the kobject (say you are not
@@ -430,5 +430,5 @@ Example code to copy from
=========================
For a more complete example of using ksets and kobjects properly, see the
-example programs samples/kobject/{kobject-example.c,kset-example.c},
-which will be built as loadable modules if you select CONFIG_SAMPLE_KOBJECT.
+example programs ``samples/kobject/{kobject-example.c,kset-example.c}``,
+which will be built as loadable modules if you select ``CONFIG_SAMPLE_KOBJECT``.
diff --git a/Documentation/debugging-modules.txt b/Documentation/debugging-modules.txt
deleted file mode 100644
index 172ad4aec493..000000000000
--- a/Documentation/debugging-modules.txt
+++ /dev/null
@@ -1,22 +0,0 @@
-Debugging Modules after 2.6.3
------------------------------
-
-In almost all distributions, the kernel asks for modules which don't
-exist, such as "net-pf-10" or whatever. Changing "modprobe -q" to
-"succeed" in this case is hacky and breaks some setups, and also we
-want to know if it failed for the fallback code for old aliases in
-fs/char_dev.c, for example.
-
-In the past a debugging message which would fill people's logs was
-emitted. This debugging message has been removed. The correct way
-of debugging module problems is something like this:
-
-echo '#! /bin/sh' > /tmp/modprobe
-echo 'echo "$@" >> /tmp/modprobe.log' >> /tmp/modprobe
-echo 'exec /sbin/modprobe "$@"' >> /tmp/modprobe
-chmod a+x /tmp/modprobe
-echo /tmp/modprobe > /proc/sys/kernel/modprobe
-
-Note that the above applies only when the *kernel* is requesting
-that the module be loaded -- it won't have any effect if that module
-is being loaded explicitly using "modprobe" from userspace.
diff --git a/Documentation/dev-tools/gcov.rst b/Documentation/dev-tools/gcov.rst
index 46aae52a41d0..7bd013596217 100644
--- a/Documentation/dev-tools/gcov.rst
+++ b/Documentation/dev-tools/gcov.rst
@@ -203,7 +203,7 @@ Cause
may not correctly copy files from sysfs.
Solution
- Use ``cat``' to read ``.gcda`` files and ``cp -d`` to copy links.
+ Use ``cat`` to read ``.gcda`` files and ``cp -d`` to copy links.
Alternatively use the mechanism shown in Appendix B.
diff --git a/Documentation/dev-tools/kmemleak.rst b/Documentation/dev-tools/kmemleak.rst
index 3a289e8a1d12..fce262883984 100644
--- a/Documentation/dev-tools/kmemleak.rst
+++ b/Documentation/dev-tools/kmemleak.rst
@@ -8,7 +8,8 @@ with the difference that the orphan objects are not freed but only
reported via /sys/kernel/debug/kmemleak. A similar method is used by the
Valgrind tool (``memcheck --leak-check``) to detect the memory leaks in
user-space applications.
-Kmemleak is supported on x86, arm, powerpc, sparc, sh, microblaze, ppc, mips, s390 and tile.
+Kmemleak is supported on x86, arm, arm64, powerpc, sparc, sh, microblaze, mips,
+s390, nds32, arc and xtensa.
Usage
-----
diff --git a/Documentation/devicetree/bindings/crypto/allwinner,sun4i-a10-crypto.yaml b/Documentation/devicetree/bindings/crypto/allwinner,sun4i-a10-crypto.yaml
index 33c7842917f6..8b9a8f337f16 100644
--- a/Documentation/devicetree/bindings/crypto/allwinner,sun4i-a10-crypto.yaml
+++ b/Documentation/devicetree/bindings/crypto/allwinner,sun4i-a10-crypto.yaml
@@ -23,6 +23,8 @@ properties:
- items:
- const: allwinner,sun7i-a20-crypto
- const: allwinner,sun4i-a10-crypto
+ - items:
+ - const: allwinner,sun8i-a33-crypto
reg:
maxItems: 1
diff --git a/Documentation/devicetree/bindings/edac/dmc-520.yaml b/Documentation/devicetree/bindings/edac/dmc-520.yaml
new file mode 100644
index 000000000000..9272d2bd8634
--- /dev/null
+++ b/Documentation/devicetree/bindings/edac/dmc-520.yaml
@@ -0,0 +1,59 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/edac/dmc-520.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ARM DMC-520 EDAC bindings
+
+maintainers:
+ - Lei Wang <lewan@microsoft.com>
+
+description: |+
+ DMC-520 node is defined to describe DRAM error detection and correction.
+
+ https://static.docs.arm.com/100000/0200/corelink_dmc520_trm_100000_0200_01_en.pdf
+
+properties:
+ compatible:
+ items:
+ - const: brcm,dmc-520
+ - const: arm,dmc-520
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ minItems: 1
+ maxItems: 10
+
+ interrupt-names:
+ minItems: 1
+ maxItems: 10
+ items:
+ enum:
+ - ram_ecc_errc
+ - ram_ecc_errd
+ - dram_ecc_errc
+ - dram_ecc_errd
+ - failed_access
+ - failed_prog
+ - link_err
+ - temperature_event
+ - arch_fsm
+ - phy_request
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - interrupt-names
+
+examples:
+ - |
+ dmc0: dmc@200000 {
+ compatible = "brcm,dmc-520", "arm,dmc-520";
+ reg = <0x200000 0x80000>;
+ interrupts = <0x0 0x349 0x4>, <0x0 0x34B 0x4>;
+ interrupt-names = "dram_ecc_errc", "dram_ecc_errd";
+ };
diff --git a/Documentation/devicetree/bindings/input/cypress,tm2-touchkey.txt b/Documentation/devicetree/bindings/input/cypress,tm2-touchkey.txt
index ef2ae729718f..921172f689b8 100644
--- a/Documentation/devicetree/bindings/input/cypress,tm2-touchkey.txt
+++ b/Documentation/devicetree/bindings/input/cypress,tm2-touchkey.txt
@@ -5,6 +5,7 @@ Required properties:
* "cypress,tm2-touchkey" - for the touchkey found on the tm2 board
* "cypress,midas-touchkey" - for the touchkey found on midas boards
* "cypress,aries-touchkey" - for the touchkey found on aries boards
+ * "coreriver,tc360-touchkey" - for the Coreriver TouchCore 360 touchkey
- reg: I2C address of the chip.
- interrupts: interrupt to which the chip is connected (see interrupt
binding[0]).
diff --git a/Documentation/devicetree/bindings/net/fsl-fman.txt b/Documentation/devicetree/bindings/net/fsl-fman.txt
index 250f8d8cdce4..c00fb0d22c7b 100644
--- a/Documentation/devicetree/bindings/net/fsl-fman.txt
+++ b/Documentation/devicetree/bindings/net/fsl-fman.txt
@@ -110,6 +110,13 @@ PROPERTIES
Usage: required
Definition: See soc/fsl/qman.txt and soc/fsl/bman.txt
+- fsl,erratum-a050385
+ Usage: optional
+ Value type: boolean
+ Definition: A boolean property. Indicates the presence of the
+ erratum A050385 which indicates that DMA transactions that are
+ split can result in a FMan lock.
+
=============================================================================
FMan MURAM Node
diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml
index 9e67944bec9c..b3c8c623744a 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.yaml
+++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml
@@ -205,6 +205,8 @@ patternProperties:
description: Colorful GRP, Shenzhen Xueyushi Technology Ltd.
"^compulab,.*":
description: CompuLab Ltd.
+ "^coreriver,.*":
+ description: CORERIVER Semiconductor Co.,Ltd.
"^corpro,.*":
description: Chengdu Corpro Technology Co., Ltd.
"^cortina,.*":
diff --git a/Documentation/driver-api/80211/mac80211-advanced.rst b/Documentation/driver-api/80211/mac80211-advanced.rst
index 9f1c5bb7ac35..24cb64b3b715 100644
--- a/Documentation/driver-api/80211/mac80211-advanced.rst
+++ b/Documentation/driver-api/80211/mac80211-advanced.rst
@@ -272,8 +272,8 @@ STA information lifetime rules
.. kernel-doc:: net/mac80211/sta_info.c
:doc: STA information lifetime rules
-Aggregation
-===========
+Aggregation Functions
+=====================
.. kernel-doc:: net/mac80211/sta_info.h
:functions: sta_ampdu_mlme
@@ -284,8 +284,8 @@ Aggregation
.. kernel-doc:: net/mac80211/sta_info.h
:functions: tid_ampdu_rx
-Synchronisation
-===============
+Synchronisation Functions
+=========================
TBD
diff --git a/Documentation/driver-api/dmaengine/index.rst b/Documentation/driver-api/dmaengine/index.rst
index b9df904d0a79..bdc45d8b4cfb 100644
--- a/Documentation/driver-api/dmaengine/index.rst
+++ b/Documentation/driver-api/dmaengine/index.rst
@@ -5,8 +5,8 @@ DMAEngine documentation
DMAEngine documentation provides documents for various aspects of DMAEngine
framework.
-DMAEngine documentation
------------------------
+DMAEngine development documentation
+-----------------------------------
This book helps with DMAengine internal APIs and guide for DMAEngine device
driver writers.
diff --git a/Documentation/driver-api/dmaengine/provider.rst b/Documentation/driver-api/dmaengine/provider.rst
index 790a15089f1f..56e5833e8a07 100644
--- a/Documentation/driver-api/dmaengine/provider.rst
+++ b/Documentation/driver-api/dmaengine/provider.rst
@@ -266,11 +266,15 @@ to use.
attached (via the dmaengine_desc_attach_metadata() helper to the descriptor.
From the DMA driver the following is expected for this mode:
+
- DMA_MEM_TO_DEV / DEV_MEM_TO_MEM
+
The data from the provided metadata buffer should be prepared for the DMA
controller to be sent alongside of the payload data. Either by copying to a
hardware descriptor, or highly coupled packet.
+
- DMA_DEV_TO_MEM
+
On transfer completion the DMA driver must copy the metadata to the client
provided metadata buffer before notifying the client about the completion.
After the transfer completion, DMA drivers must not touch the metadata
@@ -284,10 +288,14 @@ to use.
and dmaengine_desc_set_metadata_len() is provided as helper functions.
From the DMA driver the following is expected for this mode:
- - get_metadata_ptr
+
+ - get_metadata_ptr()
+
Should return a pointer for the metadata buffer, the maximum size of the
metadata buffer and the currently used / valid (if any) bytes in the buffer.
- - set_metadata_len
+
+ - set_metadata_len()
+
It is called by the clients after it have placed the metadata to the buffer
to let the DMA driver know the number of valid bytes provided.
diff --git a/Documentation/driver-api/driver-model/driver.rst b/Documentation/driver-api/driver-model/driver.rst
index baa6a85c8287..63887b813005 100644
--- a/Documentation/driver-api/driver-model/driver.rst
+++ b/Documentation/driver-api/driver-model/driver.rst
@@ -210,7 +210,7 @@ probed.
While the typical use case for sync_state() is to have the kernel cleanly take
over management of devices from the bootloader, the usage of sync_state() is
not restricted to that. Use it whenever it makes sense to take an action after
-all the consumers of a device have probed.
+all the consumers of a device have probed::
int (*remove) (struct device *dev);
diff --git a/Documentation/driver-api/index.rst b/Documentation/driver-api/index.rst
index 0ebe205efd0c..d4e78cb3ef4d 100644
--- a/Documentation/driver-api/index.rst
+++ b/Documentation/driver-api/index.rst
@@ -17,6 +17,7 @@ available subsections can be seen below.
driver-model/index
basics
infrastructure
+ ioctl
early-userspace/index
pm/index
clk
@@ -74,11 +75,12 @@ available subsections can be seen below.
connector
console
dcdbas
- edid
eisa
ipmb
isa
isapnp
+ io-mapping
+ io_ordering
generic-counter
lightnvm-pblk
memory-devices/index
diff --git a/Documentation/io-mapping.txt b/Documentation/driver-api/io-mapping.rst
index a966239f04e4..a966239f04e4 100644
--- a/Documentation/io-mapping.txt
+++ b/Documentation/driver-api/io-mapping.rst
diff --git a/Documentation/io_ordering.txt b/Documentation/driver-api/io_ordering.rst
index 2ab303ce9a0d..2ab303ce9a0d 100644
--- a/Documentation/io_ordering.txt
+++ b/Documentation/driver-api/io_ordering.rst
diff --git a/Documentation/core-api/ioctl.rst b/Documentation/driver-api/ioctl.rst
index c455db0e1627..c455db0e1627 100644
--- a/Documentation/core-api/ioctl.rst
+++ b/Documentation/driver-api/ioctl.rst
diff --git a/Documentation/features/vm/pte_special/arch-support.txt b/Documentation/features/vm/pte_special/arch-support.txt
index 2dc5df6a1cf5..3d492a34c8ee 100644
--- a/Documentation/features/vm/pte_special/arch-support.txt
+++ b/Documentation/features/vm/pte_special/arch-support.txt
@@ -23,7 +23,7 @@
| openrisc: | TODO |
| parisc: | TODO |
| powerpc: | ok |
- | riscv: | TODO |
+ | riscv: | ok |
| s390: | ok |
| sh: | ok |
| sparc: | ok |
diff --git a/Documentation/filesystems/9p.txt b/Documentation/filesystems/9p.rst
index fec7144e817c..f054d1c45e86 100644
--- a/Documentation/filesystems/9p.txt
+++ b/Documentation/filesystems/9p.rst
@@ -1,7 +1,10 @@
- v9fs: Plan 9 Resource Sharing for Linux
- =======================================
+.. SPDX-License-Identifier: GPL-2.0
-ABOUT
+=======================================
+v9fs: Plan 9 Resource Sharing for Linux
+=======================================
+
+About
=====
v9fs is a Unix implementation of the Plan 9 9p remote filesystem protocol.
@@ -14,32 +17,34 @@ and Maya Gokhale. Additional development by Greg Watson
The best detailed explanation of the Linux implementation and applications of
the 9p client is available in the form of a USENIX paper:
+
http://www.usenix.org/events/usenix05/tech/freenix/hensbergen.html
Other applications are described in the following papers:
+
* XCPU & Clustering
- http://xcpu.org/papers/xcpu-talk.pdf
+ http://xcpu.org/papers/xcpu-talk.pdf
* KVMFS: control file system for KVM
- http://xcpu.org/papers/kvmfs.pdf
+ http://xcpu.org/papers/kvmfs.pdf
* CellFS: A New Programming Model for the Cell BE
- http://xcpu.org/papers/cellfs-talk.pdf
+ http://xcpu.org/papers/cellfs-talk.pdf
* PROSE I/O: Using 9p to enable Application Partitions
- http://plan9.escet.urjc.es/iwp9/cready/PROSE_iwp9_2006.pdf
+ http://plan9.escet.urjc.es/iwp9/cready/PROSE_iwp9_2006.pdf
* VirtFS: A Virtualization Aware File System pass-through
- http://goo.gl/3WPDg
+ http://goo.gl/3WPDg
-USAGE
+Usage
=====
-For remote file server:
+For remote file server::
mount -t 9p 10.10.1.2 /mnt/9
-For Plan 9 From User Space applications (http://swtch.com/plan9)
+For Plan 9 From User Space applications (http://swtch.com/plan9)::
mount -t 9p `namespace`/acme /mnt/9 -o trans=unix,uname=$USER
-For server running on QEMU host with virtio transport:
+For server running on QEMU host with virtio transport::
mount -t 9p -o trans=virtio <mount_tag> /mnt/9
@@ -48,18 +53,22 @@ mount points. Each 9P export is seen by the client as a virtio device with an
associated "mount_tag" property. Available mount tags can be
seen by reading /sys/bus/virtio/drivers/9pnet_virtio/virtio<n>/mount_tag files.
-OPTIONS
+Options
=======
+ ============= ===============================================================
trans=name select an alternative transport. Valid options are
currently:
- unix - specifying a named pipe mount point
- tcp - specifying a normal TCP/IP connection
- fd - used passed file descriptors for connection
- (see rfdno and wfdno)
- virtio - connect to the next virtio channel available
- (from QEMU with trans_virtio module)
- rdma - connect to a specified RDMA channel
+
+ ======== ============================================
+ unix specifying a named pipe mount point
+ tcp specifying a normal TCP/IP connection
+ fd used passed file descriptors for connection
+ (see rfdno and wfdno)
+ virtio connect to the next virtio channel available
+ (from QEMU with trans_virtio module)
+ rdma connect to a specified RDMA channel
+ ======== ============================================
uname=name user name to attempt mount as on the remote server. The
server may override or ignore this value. Certain user
@@ -69,28 +78,36 @@ OPTIONS
offering several exported file systems.
cache=mode specifies a caching policy. By default, no caches are used.
- none = default no cache policy, metadata and data
+
+ none
+ default no cache policy, metadata and data
alike are synchronous.
- loose = no attempts are made at consistency,
+ loose
+ no attempts are made at consistency,
intended for exclusive, read-only mounts
- fscache = use FS-Cache for a persistent, read-only
+ fscache
+ use FS-Cache for a persistent, read-only
cache backend.
- mmap = minimal cache that is only used for read-write
+ mmap
+ minimal cache that is only used for read-write
mmap. Northing else is cached, like cache=none
debug=n specifies debug level. The debug level is a bitmask.
- 0x01 = display verbose error messages
- 0x02 = developer debug (DEBUG_CURRENT)
- 0x04 = display 9p trace
- 0x08 = display VFS trace
- 0x10 = display Marshalling debug
- 0x20 = display RPC debug
- 0x40 = display transport debug
- 0x80 = display allocation debug
- 0x100 = display protocol message debug
- 0x200 = display Fid debug
- 0x400 = display packet debug
- 0x800 = display fscache tracing debug
+
+ ===== ================================
+ 0x01 display verbose error messages
+ 0x02 developer debug (DEBUG_CURRENT)
+ 0x04 display 9p trace
+ 0x08 display VFS trace
+ 0x10 display Marshalling debug
+ 0x20 display RPC debug
+ 0x40 display transport debug
+ 0x80 display allocation debug
+ 0x100 display protocol message debug
+ 0x200 display Fid debug
+ 0x400 display packet debug
+ 0x800 display fscache tracing debug
+ ===== ================================
rfdno=n the file descriptor for reading with trans=fd
@@ -103,9 +120,12 @@ OPTIONS
noextend force legacy mode (no 9p2000.u or 9p2000.L semantics)
version=name Select 9P protocol version. Valid options are:
- 9p2000 - Legacy mode (same as noextend)
- 9p2000.u - Use 9P2000.u protocol
- 9p2000.L - Use 9P2000.L protocol
+
+ ======== ==============================
+ 9p2000 Legacy mode (same as noextend)
+ 9p2000.u Use 9P2000.u protocol
+ 9p2000.L Use 9P2000.L protocol
+ ======== ==============================
dfltuid attempt to mount as a particular uid
@@ -118,22 +138,27 @@ OPTIONS
hosts. This functionality will be expanded in later versions.
access there are four access modes.
- user = if a user tries to access a file on v9fs
+ user
+ if a user tries to access a file on v9fs
filesystem for the first time, v9fs sends an
attach command (Tattach) for that user.
This is the default mode.
- <uid> = allows only user with uid=<uid> to access
+ <uid>
+ allows only user with uid=<uid> to access
the files on the mounted filesystem
- any = v9fs does single attach and performs all
+ any
+ v9fs does single attach and performs all
operations as one user
- client = ACL based access check on the 9p client
+ clien
+ ACL based access check on the 9p client
side for access validation
cachetag cache tag to use the specified persistent cache.
cache tags for existing cache sessions can be listed at
/sys/fs/9p/caches. (applies only to cache=fscache)
+ ============= ===============================================================
-RESOURCES
+Resources
=========
Protocol specifications are maintained on github:
@@ -158,4 +183,3 @@ http://plan9.bell-labs.com/plan9
For information on Plan 9 from User Space (Plan 9 applications and libraries
ported to Linux/BSD/OSX/etc) check out http://swtch.com/plan9
-
diff --git a/Documentation/filesystems/adfs.txt b/Documentation/filesystems/adfs.rst
index 0baa8e8c1fc1..5b22cae38e5e 100644
--- a/Documentation/filesystems/adfs.txt
+++ b/Documentation/filesystems/adfs.rst
@@ -1,3 +1,9 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============================
+Acorn Disc Filing System - ADFS
+===============================
+
Filesystems supported by ADFS
-----------------------------
@@ -25,6 +31,7 @@ directory updates, specifically updating the access mode and timestamp.
Mount options for ADFS
----------------------
+ ============ ======================================================
uid=nnn All files in the partition will be owned by
user id nnn. Default 0 (root).
gid=nnn All files in the partition will be in group
@@ -36,22 +43,23 @@ Mount options for ADFS
ftsuffix=n When ftsuffix=0, no file type suffix will be applied.
When ftsuffix=1, a hexadecimal suffix corresponding to
the RISC OS file type will be added. Default 0.
+ ============ ======================================================
Mapping of ADFS permissions to Linux permissions
------------------------------------------------
ADFS permissions consist of the following:
- Owner read
- Owner write
- Other read
- Other write
+ - Owner read
+ - Owner write
+ - Other read
+ - Other write
(In older versions, an 'execute' permission did exist, but this
- does not hold the same meaning as the Linux 'execute' permission
- and is now obsolete).
+ does not hold the same meaning as the Linux 'execute' permission
+ and is now obsolete).
- The mapping is performed as follows:
+ The mapping is performed as follows::
Owner read -> -r--r--r--
Owner write -> --w--w---w
@@ -66,17 +74,18 @@ Mapping of ADFS permissions to Linux permissions
Possible other mode permissions -> ----rwxrwx
Hence, with the default masks, if a file is owner read/write, and
- not a UnixExec filetype, then the permissions will be:
+ not a UnixExec filetype, then the permissions will be::
-rw-------
However, if the masks were ownmask=0770,othmask=0007, then this would
- be modified to:
+ be modified to::
+
-rw-rw----
There is no restriction on what you can do with these masks. You may
wish that either read bits give read access to the file for all, but
- keep the default write protection (ownmask=0755,othmask=0577):
+ keep the default write protection (ownmask=0755,othmask=0577)::
-rw-r--r--
diff --git a/Documentation/filesystems/affs.txt b/Documentation/filesystems/affs.rst
index 71b63c2b9841..7f1a40dce6d3 100644
--- a/Documentation/filesystems/affs.txt
+++ b/Documentation/filesystems/affs.rst
@@ -1,9 +1,13 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=============================
Overview of Amiga Filesystems
=============================
Not all varieties of the Amiga filesystems are supported for reading and
writing. The Amiga currently knows six different filesystems:
+============== ===============================================================
DOS\0 The old or original filesystem, not really suited for
hard disks and normally not used on them, either.
Supported read/write.
@@ -23,6 +27,7 @@ DOS\4 The original filesystem with directory cache. The directory
sense on hard disks. Supported read only.
DOS\5 The Fast File System with directory cache. Supported read only.
+============== ===============================================================
All of the above filesystems allow block sizes from 512 to 32K bytes.
Supported block sizes are: 512, 1024, 2048 and 4096 bytes. Larger blocks
@@ -36,14 +41,18 @@ are supported, too.
Mount options for the AFFS
==========================
-protect If this option is set, the protection bits cannot be altered.
+protect
+ If this option is set, the protection bits cannot be altered.
-setuid[=uid] This sets the owner of all files and directories in the file
+setuid[=uid]
+ This sets the owner of all files and directories in the file
system to uid or the uid of the current user, respectively.
-setgid[=gid] Same as above, but for gid.
+setgid[=gid]
+ Same as above, but for gid.
-mode=mode Sets the mode flags to the given (octal) value, regardless
+mode=mode
+ Sets the mode flags to the given (octal) value, regardless
of the original permissions. Directories will get an x
permission if the corresponding r bit is set.
This is useful since most of the plain AmigaOS files
@@ -53,33 +62,41 @@ nofilenametruncate
The file system will return an error when filename exceeds
standard maximum filename length (30 characters).
-reserved=num Sets the number of reserved blocks at the start of the
+reserved=num
+ Sets the number of reserved blocks at the start of the
partition to num. You should never need this option.
Default is 2.
-root=block Sets the block number of the root block. This should never
+root=block
+ Sets the block number of the root block. This should never
be necessary.
-bs=blksize Sets the blocksize to blksize. Valid block sizes are 512,
+bs=blksize
+ Sets the blocksize to blksize. Valid block sizes are 512,
1024, 2048 and 4096. Like the root option, this should
never be necessary, as the affs can figure it out itself.
-quiet The file system will not return an error for disallowed
+quiet
+ The file system will not return an error for disallowed
mode changes.
-verbose The volume name, file system type and block size will
+verbose
+ The volume name, file system type and block size will
be written to the syslog when the filesystem is mounted.
-mufs The filesystem is really a muFS, also it doesn't
+mufs
+ The filesystem is really a muFS, also it doesn't
identify itself as one. This option is necessary if
the filesystem wasn't formatted as muFS, but is used
as one.
-prefix=path Path will be prefixed to every absolute path name of
+prefix=path
+ Path will be prefixed to every absolute path name of
symbolic links on an AFFS partition. Default = "/".
(See below.)
-volume=name When symbolic links with an absolute path are created
+volume=name
+ When symbolic links with an absolute path are created
on an AFFS partition, name will be prepended as the
volume name. Default = "" (empty string).
(See below.)
@@ -119,7 +136,7 @@ The Linux rwxrwxrwx file mode is handled as follows:
- All other flags (suid, sgid, ...) are ignored and will
not be retained.
-
+
Newly created files and directories will get the user and group ID
of the current user and a mode according to the umask.
@@ -148,11 +165,13 @@ might be "User", "WB" and "Graphics", the mount points /amiga/User,
Examples
========
-Command line:
+Command line::
+
mount Archive/Amiga/Workbench3.1.adf /mnt -t affs -o loop,verbose
mount /dev/sda3 /Amiga -t affs
-/etc/fstab entry:
+/etc/fstab entry::
+
/dev/sdb5 /amiga/Workbench affs noauto,user,exec,verbose 0 0
IMPORTANT NOTE
@@ -170,7 +189,8 @@ before booting Windows!
If the damage is already done, the following should fix the RDB
(where <disk> is the device name).
-DO AT YOUR OWN RISK:
+
+DO AT YOUR OWN RISK::
dd if=/dev/<disk> of=rdb.tmp count=1
cp rdb.tmp rdb.fixed
@@ -189,10 +209,14 @@ By default, filenames are truncated to 30 characters without warning.
'nofilenametruncate' mount option can change that behavior.
Case is ignored by the affs in filename matching, but Linux shells
-do care about the case. Example (with /wb being an affs mounted fs):
+do care about the case. Example (with /wb being an affs mounted fs)::
+
rm /wb/WRONGCASE
-will remove /mnt/wrongcase, but
+
+will remove /mnt/wrongcase, but::
+
rm /wb/WR*
+
will not since the names are matched by the shell.
The block allocation is designed for hard disk partitions. If more
@@ -219,4 +243,4 @@ due to an incompatibility with the Amiga floppy controller.
If you are interested in an Amiga Emulator for Linux, look at
-http://web.archive.org/web/*/http://www.freiburg.linux.de/~uae/
+http://web.archive.org/web/%2E/http://www.freiburg.linux.de/~uae/
diff --git a/Documentation/filesystems/afs.txt b/Documentation/filesystems/afs.rst
index 8c6ea7b41048..c4ec39a5966e 100644
--- a/Documentation/filesystems/afs.txt
+++ b/Documentation/filesystems/afs.rst
@@ -1,8 +1,10 @@
- ====================
- kAFS: AFS FILESYSTEM
- ====================
+.. SPDX-License-Identifier: GPL-2.0
-Contents:
+====================
+kAFS: AFS FILESYSTEM
+====================
+
+.. Contents:
- Overview.
- Usage.
@@ -14,8 +16,7 @@ Contents:
- The @sys substitution.
-========
-OVERVIEW
+Overview
========
This filesystem provides a fairly simple secure AFS filesystem driver. It is
@@ -35,35 +36,33 @@ It does not yet support the following AFS features:
(*) pioctl() system call.
-===========
-COMPILATION
+Compilation
===========
The filesystem should be enabled by turning on the kernel configuration
-options:
+options::
CONFIG_AF_RXRPC - The RxRPC protocol transport
CONFIG_RXKAD - The RxRPC Kerberos security handler
CONFIG_AFS - The AFS filesystem
-Additionally, the following can be turned on to aid debugging:
+Additionally, the following can be turned on to aid debugging::
CONFIG_AF_RXRPC_DEBUG - Permit AF_RXRPC debugging to be enabled
CONFIG_AFS_DEBUG - Permit AFS debugging to be enabled
They permit the debugging messages to be turned on dynamically by manipulating
-the masks in the following files:
+the masks in the following files::
/sys/module/af_rxrpc/parameters/debug
/sys/module/kafs/parameters/debug
-=====
-USAGE
+Usage
=====
When inserting the driver modules the root cell must be specified along with a
-list of volume location server IP addresses:
+list of volume location server IP addresses::
modprobe rxrpc
modprobe kafs rootcell=cambridge.redhat.com:172.16.18.73:172.16.18.91
@@ -77,14 +76,14 @@ The second module is the kerberos RxRPC security driver, and the third module
is the actual filesystem driver for the AFS filesystem.
Once the module has been loaded, more modules can be added by the following
-procedure:
+procedure::
echo add grand.central.org 18.9.48.14:128.2.203.61:130.237.48.87 >/proc/fs/afs/cells
Where the parameters to the "add" command are the name of a cell and a list of
volume location servers within that cell, with the latter separated by colons.
-Filesystems can be mounted anywhere by commands similar to the following:
+Filesystems can be mounted anywhere by commands similar to the following::
mount -t afs "%cambridge.redhat.com:root.afs." /afs
mount -t afs "#cambridge.redhat.com:root.cell." /afs/cambridge
@@ -104,8 +103,7 @@ named volume will be looked up in the cell specified during modprobe.
Additional cells can be added through /proc (see later section).
-===========
-MOUNTPOINTS
+Mountpoints
===========
AFS has a concept of mountpoints. In AFS terms, these are specially formatted
@@ -123,42 +121,40 @@ culled first. If all are culled, then the requested volume will also be
unmounted, otherwise error EBUSY will be returned.
This can be used by the administrator to attempt to unmount the whole AFS tree
-mounted on /afs in one go by doing:
+mounted on /afs in one go by doing::
umount /afs
-============
-DYNAMIC ROOT
+Dynamic Root
============
A mount option is available to create a serverless mount that is only usable
-for dynamic lookup. Creating such a mount can be done by, for example:
+for dynamic lookup. Creating such a mount can be done by, for example::
mount -t afs none /afs -o dyn
This creates a mount that just has an empty directory at the root. Attempting
to look up a name in this directory will cause a mountpoint to be created that
-looks up a cell of the same name, for example:
+looks up a cell of the same name, for example::
ls /afs/grand.central.org/
-===============
-PROC FILESYSTEM
+Proc Filesystem
===============
The AFS modules creates a "/proc/fs/afs/" directory and populates it:
(*) A "cells" file that lists cells currently known to the afs module and
- their usage counts:
+ their usage counts::
[root@andromeda ~]# cat /proc/fs/afs/cells
USE NAME
3 cambridge.redhat.com
(*) A directory per cell that contains files that list volume location
- servers, volumes, and active servers known within that cell.
+ servers, volumes, and active servers known within that cell::
[root@andromeda ~]# cat /proc/fs/afs/cambridge.redhat.com/servers
USE ADDR STATE
@@ -171,8 +167,7 @@ The AFS modules creates a "/proc/fs/afs/" directory and populates it:
1 Val 20000000 20000001 20000002 root.afs
-=================
-THE CELL DATABASE
+The Cell Database
=================
The filesystem maintains an internal database of all the cells it knows and the
@@ -181,7 +176,7 @@ the system belongs is added to the database when modprobe is performed by the
"rootcell=" argument or, if compiled in, using a "kafs.rootcell=" argument on
the kernel command line.
-Further cells can be added by commands similar to the following:
+Further cells can be added by commands similar to the following::
echo add CELLNAME VLADDR[:VLADDR][:VLADDR]... >/proc/fs/afs/cells
echo add grand.central.org 18.9.48.14:128.2.203.61:130.237.48.87 >/proc/fs/afs/cells
@@ -189,8 +184,7 @@ Further cells can be added by commands similar to the following:
No other cell database operations are available at this time.
-========
-SECURITY
+Security
========
Secure operations are initiated by acquiring a key using the klog program. A
@@ -198,17 +192,17 @@ very primitive klog program is available at:
http://people.redhat.com/~dhowells/rxrpc/klog.c
-This should be compiled by:
+This should be compiled by::
make klog LDLIBS="-lcrypto -lcrypt -lkrb4 -lkeyutils"
-And then run as:
+And then run as::
./klog
Assuming it's successful, this adds a key of type RxRPC, named for the service
and cell, eg: "afs@<cellname>". This can be viewed with the keyctl program or
-by cat'ing /proc/keys:
+by cat'ing /proc/keys::
[root@andromeda ~]# keyctl show
Session Keyring
@@ -232,20 +226,19 @@ socket), then the operations on the file will be made with key that was used to
open the file.
-=====================
-THE @SYS SUBSTITUTION
+The @sys Substitution
=====================
The list of up to 16 @sys substitutions for the current network namespace can
-be configured by writing a list to /proc/fs/afs/sysname:
+be configured by writing a list to /proc/fs/afs/sysname::
[root@andromeda ~]# echo foo amd64_linux_26 >/proc/fs/afs/sysname
-or cleared entirely by writing an empty list:
+or cleared entirely by writing an empty list::
[root@andromeda ~]# echo >/proc/fs/afs/sysname
-The current list for current network namespace can be retrieved by:
+The current list for current network namespace can be retrieved by::
[root@andromeda ~]# cat /proc/fs/afs/sysname
foo
diff --git a/Documentation/filesystems/autofs-mount-control.txt b/Documentation/filesystems/autofs-mount-control.rst
index acc02fc57993..2903aed92316 100644
--- a/Documentation/filesystems/autofs-mount-control.txt
+++ b/Documentation/filesystems/autofs-mount-control.rst
@@ -1,4 +1,6 @@
+.. SPDX-License-Identifier: GPL-2.0
+====================================================================
Miscellaneous Device control operations for the autofs kernel module
====================================================================
@@ -36,24 +38,24 @@ For example, there are two types of automount maps, direct (in the kernel
module source you will see a third type called an offset, which is just
a direct mount in disguise) and indirect.
-Here is a master map with direct and indirect map entries:
+Here is a master map with direct and indirect map entries::
-/- /etc/auto.direct
-/test /etc/auto.indirect
+ /- /etc/auto.direct
+ /test /etc/auto.indirect
-and the corresponding map files:
+and the corresponding map files::
-/etc/auto.direct:
+ /etc/auto.direct:
-/automount/dparse/g6 budgie:/autofs/export1
-/automount/dparse/g1 shark:/autofs/export1
-and so on.
+ /automount/dparse/g6 budgie:/autofs/export1
+ /automount/dparse/g1 shark:/autofs/export1
+ and so on.
-/etc/auto.indirect:
+/etc/auto.indirect::
-g1 shark:/autofs/export1
-g6 budgie:/autofs/export1
-and so on.
+ g1 shark:/autofs/export1
+ g6 budgie:/autofs/export1
+ and so on.
For the above indirect map an autofs file system is mounted on /test and
mounts are triggered for each sub-directory key by the inode lookup
@@ -69,23 +71,23 @@ use the follow_link inode operation to trigger the mount.
But, each entry in direct and indirect maps can have offsets (making
them multi-mount map entries).
-For example, an indirect mount map entry could also be:
+For example, an indirect mount map entry could also be::
-g1 \
- / shark:/autofs/export5/testing/test \
- /s1 shark:/autofs/export/testing/test/s1 \
- /s2 shark:/autofs/export5/testing/test/s2 \
- /s1/ss1 shark:/autofs/export1 \
- /s2/ss2 shark:/autofs/export2
+ g1 \
+ / shark:/autofs/export5/testing/test \
+ /s1 shark:/autofs/export/testing/test/s1 \
+ /s2 shark:/autofs/export5/testing/test/s2 \
+ /s1/ss1 shark:/autofs/export1 \
+ /s2/ss2 shark:/autofs/export2
-and a similarly a direct mount map entry could also be:
+and a similarly a direct mount map entry could also be::
-/automount/dparse/g1 \
- / shark:/autofs/export5/testing/test \
- /s1 shark:/autofs/export/testing/test/s1 \
- /s2 shark:/autofs/export5/testing/test/s2 \
- /s1/ss1 shark:/autofs/export2 \
- /s2/ss2 shark:/autofs/export2
+ /automount/dparse/g1 \
+ / shark:/autofs/export5/testing/test \
+ /s1 shark:/autofs/export/testing/test/s1 \
+ /s2 shark:/autofs/export5/testing/test/s2 \
+ /s1/ss1 shark:/autofs/export2 \
+ /s2/ss2 shark:/autofs/export2
One of the issues with version 4 of autofs was that, when mounting an
entry with a large number of offsets, possibly with nesting, we needed
@@ -170,32 +172,32 @@ autofs Miscellaneous Device mount control interface
The control interface is opening a device node, typically /dev/autofs.
All the ioctls use a common structure to pass the needed parameter
-information and return operation results:
-
-struct autofs_dev_ioctl {
- __u32 ver_major;
- __u32 ver_minor;
- __u32 size; /* total size of data passed in
- * including this struct */
- __s32 ioctlfd; /* automount command fd */
-
- /* Command parameters */
- union {
- struct args_protover protover;
- struct args_protosubver protosubver;
- struct args_openmount openmount;
- struct args_ready ready;
- struct args_fail fail;
- struct args_setpipefd setpipefd;
- struct args_timeout timeout;
- struct args_requester requester;
- struct args_expire expire;
- struct args_askumount askumount;
- struct args_ismountpoint ismountpoint;
- };
-
- char path[0];
-};
+information and return operation results::
+
+ struct autofs_dev_ioctl {
+ __u32 ver_major;
+ __u32 ver_minor;
+ __u32 size; /* total size of data passed in
+ * including this struct */
+ __s32 ioctlfd; /* automount command fd */
+
+ /* Command parameters */
+ union {
+ struct args_protover protover;
+ struct args_protosubver protosubver;
+ struct args_openmount openmount;
+ struct args_ready ready;
+ struct args_fail fail;
+ struct args_setpipefd setpipefd;
+ struct args_timeout timeout;
+ struct args_requester requester;
+ struct args_expire expire;
+ struct args_askumount askumount;
+ struct args_ismountpoint ismountpoint;
+ };
+
+ char path[0];
+ };
The ioctlfd field is a mount point file descriptor of an autofs mount
point. It is returned by the open call and is used by all calls except
@@ -212,7 +214,7 @@ is used account for the increased structure length when translating the
structure sent from user space.
This structure can be initialized before setting specific fields by using
-the void function call init_autofs_dev_ioctl(struct autofs_dev_ioctl *).
+the void function call init_autofs_dev_ioctl(``struct autofs_dev_ioctl *``).
All of the ioctls perform a copy of this structure from user space to
kernel space and return -EINVAL if the size parameter is smaller than
diff --git a/Documentation/filesystems/befs.txt b/Documentation/filesystems/befs.rst
index da45e6c842b8..79f9740d76ff 100644
--- a/Documentation/filesystems/befs.txt
+++ b/Documentation/filesystems/befs.rst
@@ -1,48 +1,54 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=========================
BeOS filesystem for Linux
+=========================
Document last updated: Dec 6, 2001
-WARNING
+Warning
=======
Make sure you understand that this is alpha software. This means that the
-implementation is neither complete nor well-tested.
+implementation is neither complete nor well-tested.
I DISCLAIM ALL RESPONSIBILITY FOR ANY POSSIBLE BAD EFFECTS OF THIS CODE!
-LICENSE
-=====
-This software is covered by the GNU General Public License.
+License
+=======
+This software is covered by the GNU General Public License.
See the file COPYING for the complete text of the license.
Or the GNU website: <http://www.gnu.org/licenses/licenses.html>
-AUTHOR
-=====
+Author
+======
The largest part of the code written by Will Dyson <will_dyson@pobox.com>
He has been working on the code since Aug 13, 2001. See the changelog for
details.
Original Author: Makoto Kato <m_kato@ga2.so-net.ne.jp>
+
His original code can still be found at:
<http://hp.vector.co.jp/authors/VA008030/bfs/>
+
Does anyone know of a more current email address for Makoto? He doesn't
respond to the address given above...
This filesystem doesn't have a maintainer.
-WHAT IS THIS DRIVER?
-==================
-This module implements the native filesystem of BeOS http://www.beincorporated.com/
+What is this Driver?
+====================
+This module implements the native filesystem of BeOS http://www.beincorporated.com/
for the linux 2.4.1 and later kernels. Currently it is a read-only
implementation.
Which is it, BFS or BEFS?
-================
-Be, Inc said, "BeOS Filesystem is officially called BFS, not BeFS".
+=========================
+Be, Inc said, "BeOS Filesystem is officially called BFS, not BeFS".
But Unixware Boot Filesystem is called bfs, too. And they are already in
the kernel. Because of this naming conflict, on Linux the BeOS
filesystem is called befs.
-HOW TO INSTALL
+How to Install
==============
step 1. Install the BeFS patch into the source code tree of linux.
@@ -54,16 +60,16 @@ is called patch-befs-xxx, you would do the following:
patch -p1 < /path/to/patch-befs-xxx
if the patching step fails (i.e. there are rejected hunks), you can try to
-figure it out yourself (it shouldn't be hard), or mail the maintainer
+figure it out yourself (it shouldn't be hard), or mail the maintainer
(Will Dyson <will_dyson@pobox.com>) for help.
step 2. Configuration & make kernel
The linux kernel has many compile-time options. Most of them are beyond the
scope of this document. I suggest the Kernel-HOWTO document as a good general
-reference on this topic. http://www.linuxdocs.org/HOWTOs/Kernel-HOWTO-4.html
+reference on this topic. http://www.linuxdocs.org/HOWTOs/Kernel-HOWTO-4.html
-However, to use the BeFS module, you must enable it at configure time.
+However, to use the BeFS module, you must enable it at configure time::
cd /foo/bar/linux
make menuconfig (or xconfig)
@@ -82,35 +88,40 @@ step 3. Install
See the kernel howto <http://www.linux.com/howto/Kernel-HOWTO.html> for
instructions on this critical step.
-USING BFS
+Using BFS
=========
To use the BeOS filesystem, use filesystem type 'befs'.
-ex)
+ex::
+
mount -t befs /dev/fd0 /beos
-MOUNT OPTIONS
+Mount Options
=============
+
+============= ===========================================================
uid=nnn All files in the partition will be owned by user id nnn.
gid=nnn All files in the partition will be in group nnn.
iocharset=xxx Use xxx as the name of the NLS translation table.
debug The driver will output debugging information to the syslog.
+============= ===========================================================
-HOW TO GET LASTEST VERSION
+How to Get Lastest Version
==========================
The latest version is currently available at:
<http://befs-driver.sourceforge.net/>
-ANY KNOWN BUGS?
-===========
+Any Known Bugs?
+===============
As of Jan 20, 2002:
-
+
None
-SPECIAL THANKS
+Special Thanks
==============
Dominic Giampalo ... Writing "Practical file system design with Be filesystem"
+
Hiroyuki Yamada ... Testing LinuxPPC.
diff --git a/Documentation/filesystems/bfs.txt b/Documentation/filesystems/bfs.rst
index 843ce91a2e40..ce14b9018807 100644
--- a/Documentation/filesystems/bfs.txt
+++ b/Documentation/filesystems/bfs.rst
@@ -1,4 +1,7 @@
-BFS FILESYSTEM FOR LINUX
+.. SPDX-License-Identifier: GPL-2.0
+
+========================
+BFS Filesystem for Linux
========================
The BFS filesystem is used by SCO UnixWare OS for the /stand slice, which
@@ -9,22 +12,22 @@ In order to access /stand partition under Linux you obviously need to
know the partition number and the kernel must support UnixWare disk slices
(CONFIG_UNIXWARE_DISKLABEL config option). However BFS support does not
depend on having UnixWare disklabel support because one can also mount
-BFS filesystem via loopback:
+BFS filesystem via loopback::
-# losetup /dev/loop0 stand.img
-# mount -t bfs /dev/loop0 /mnt/stand
+ # losetup /dev/loop0 stand.img
+ # mount -t bfs /dev/loop0 /mnt/stand
-where stand.img is a file containing the image of BFS filesystem.
+where stand.img is a file containing the image of BFS filesystem.
When you have finished using it and umounted you need to also deallocate
-/dev/loop0 device by:
+/dev/loop0 device by::
-# losetup -d /dev/loop0
+ # losetup -d /dev/loop0
-You can simplify mounting by just typing:
+You can simplify mounting by just typing::
-# mount -t bfs -o loop stand.img /mnt/stand
+ # mount -t bfs -o loop stand.img /mnt/stand
-this will allocate the first available loopback device (and load loop.o
+this will allocate the first available loopback device (and load loop.o
kernel module if necessary) automatically. If the loopback driver is not
loaded automatically, make sure that you have compiled the module and
that modprobe is functioning. Beware that umount will not deallocate
@@ -33,21 +36,21 @@ that modprobe is functioning. Beware that umount will not deallocate
losetup(8). Read losetup(8) manpage for more info.
To create the BFS image under UnixWare you need to find out first which
-slice contains it. The command prtvtoc(1M) is your friend:
+slice contains it. The command prtvtoc(1M) is your friend::
-# prtvtoc /dev/rdsk/c0b0t0d0s0
+ # prtvtoc /dev/rdsk/c0b0t0d0s0
(assuming your root disk is on target=0, lun=0, bus=0, controller=0). Then you
look for the slice with tag "STAND", which is usually slice 10. With this
-information you can use dd(1) to create the BFS image:
+information you can use dd(1) to create the BFS image::
-# umount /stand
-# dd if=/dev/rdsk/c0b0t0d0sa of=stand.img bs=512
+ # umount /stand
+ # dd if=/dev/rdsk/c0b0t0d0sa of=stand.img bs=512
Just in case, you can verify that you have done the right thing by checking
-the magic number:
+the magic number::
-# od -Ad -tx4 stand.img | more
+ # od -Ad -tx4 stand.img | more
The first 4 bytes should be 0x1badface.
diff --git a/Documentation/filesystems/btrfs.txt b/Documentation/filesystems/btrfs.rst
index f9dad22d95ce..d0904f602819 100644
--- a/Documentation/filesystems/btrfs.txt
+++ b/Documentation/filesystems/btrfs.rst
@@ -1,3 +1,6 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====
BTRFS
=====
diff --git a/Documentation/filesystems/ceph.txt b/Documentation/filesystems/ceph.rst
index b19b6a03f91c..b46a7218248f 100644
--- a/Documentation/filesystems/ceph.txt
+++ b/Documentation/filesystems/ceph.rst
@@ -1,3 +1,6 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============================
Ceph Distributed File System
============================
@@ -15,6 +18,7 @@ Basic features include:
* Easy deployment: most FS components are userspace daemons
Also,
+
* Flexible snapshots (on any directory)
* Recursive accounting (nested files, directories, bytes)
@@ -63,7 +67,7 @@ no 'du' or similar recursive scan of the file system is required.
Finally, Ceph also allows quotas to be set on any directory in the system.
The quota can restrict the number of bytes or the number of files stored
beneath that point in the directory hierarchy. Quotas can be set using
-extended attributes 'ceph.quota.max_files' and 'ceph.quota.max_bytes', eg:
+extended attributes 'ceph.quota.max_files' and 'ceph.quota.max_bytes', eg::
setfattr -n ceph.quota.max_bytes -v 100000000 /some/dir
getfattr -n ceph.quota.max_bytes /some/dir
@@ -76,7 +80,7 @@ from writing as much data as it needs.
Mount Syntax
============
-The basic mount syntax is:
+The basic mount syntax is::
# mount -t ceph monip[:port][,monip2[:port]...]:/[subdir] mnt
@@ -84,7 +88,7 @@ You only need to specify a single monitor, as the client will get the
full list when it connects. (However, if the monitor you specify
happens to be down, the mount won't succeed.) The port can be left
off if the monitor is using the default. So if the monitor is at
-1.2.3.4,
+1.2.3.4::
# mount -t ceph 1.2.3.4:/ /mnt/ceph
@@ -163,14 +167,14 @@ Mount Options
available modes are "no" and "clean". The default is "no".
* no: never attempt to reconnect when client detects that it has been
- blacklisted. Operations will generally fail after being blacklisted.
+ blacklisted. Operations will generally fail after being blacklisted.
* clean: client reconnects to the ceph cluster automatically when it
- detects that it has been blacklisted. During reconnect, client drops
- dirty data/metadata, invalidates page caches and writable file handles.
- After reconnect, file locks become stale because the MDS loses track
- of them. If an inode contains any stale file locks, read/write on the
- inode is not allowed until applications release all stale file locks.
+ detects that it has been blacklisted. During reconnect, client drops
+ dirty data/metadata, invalidates page caches and writable file handles.
+ After reconnect, file locks become stale because the MDS loses track
+ of them. If an inode contains any stale file locks, read/write on the
+ inode is not allowed until applications release all stale file locks.
More Information
================
@@ -179,8 +183,8 @@ For more information on Ceph, see the home page at
https://ceph.com/
The Linux kernel client source tree is available at
- https://github.com/ceph/ceph-client.git
- git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git
+ - https://github.com/ceph/ceph-client.git
+ - git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git
and the source for the full system is at
https://github.com/ceph/ceph.git
diff --git a/Documentation/filesystems/cifs/cifsroot.txt b/Documentation/filesystems/cifs/cifsroot.txt
index 0fa1a2c36a40..947b7ec6ce9e 100644
--- a/Documentation/filesystems/cifs/cifsroot.txt
+++ b/Documentation/filesystems/cifs/cifsroot.txt
@@ -13,7 +13,7 @@ network by utilizing SMB or CIFS protocol.
In order to mount, the network stack will also need to be set up by
using 'ip=' config option. For more details, see
-Documentation/filesystems/nfs/nfsroot.txt.
+Documentation/admin-guide/nfs/nfsroot.rst.
A CIFS root mount currently requires the use of SMB1+UNIX Extensions
which is only supported by the Samba server. SMB1 is the older
diff --git a/Documentation/filesystems/cramfs.txt b/Documentation/filesystems/cramfs.rst
index 8e19a53d648b..afbdbde98bd2 100644
--- a/Documentation/filesystems/cramfs.txt
+++ b/Documentation/filesystems/cramfs.rst
@@ -1,12 +1,15 @@
+.. SPDX-License-Identifier: GPL-2.0
- Cramfs - cram a filesystem onto a small ROM
+===========================================
+Cramfs - cram a filesystem onto a small ROM
+===========================================
-cramfs is designed to be simple and small, and to compress things well.
+cramfs is designed to be simple and small, and to compress things well.
It uses the zlib routines to compress a file one page at a time, and
allows random page access. The meta-data is not compressed, but is
expressed in a very terse representation to make it use much less
-diskspace than traditional filesystems.
+diskspace than traditional filesystems.
You can't write to a cramfs filesystem (making it compressible and
compact also makes it _very_ hard to update on-the-fly), so you have to
@@ -28,9 +31,9 @@ issue.
Hard links are supported, but hard linked files
will still have a link count of 1 in the cramfs image.
-Cramfs directories have no `.' or `..' entries. Directories (like
+Cramfs directories have no ``.`` or ``..`` entries. Directories (like
every other file on cramfs) always have a link count of 1. (There's
-no need to use -noleaf in `find', btw.)
+no need to use -noleaf in ``find``, btw.)
No timestamps are stored in a cramfs, so these default to the epoch
(1970 GMT). Recently-accessed files may have updated timestamps, but
@@ -70,9 +73,9 @@ MTD drivers are cfi_cmdset_0001 (Intel/Sharp CFI flash) or physmap
(Flash device in physical memory map). MTD partitions based on such devices
are fine too. Then that device should be specified with the "mtd:" prefix
as the mount device argument. For example, to mount the MTD device named
-"fs_partition" on the /mnt directory:
+"fs_partition" on the /mnt directory::
-$ mount -t cramfs mtd:fs_partition /mnt
+ $ mount -t cramfs mtd:fs_partition /mnt
To boot a kernel with this as root filesystem, suffice to specify
something like "root=mtd:fs_partition" on the kernel command line.
@@ -90,6 +93,7 @@ https://github.com/npitre/cramfs-tools
For /usr/share/magic
--------------------
+===== ======================= =======================
0 ulelong 0x28cd3d45 Linux cramfs offset 0
>4 ulelong x size %d
>8 ulelong x flags 0x%x
@@ -110,6 +114,7 @@ For /usr/share/magic
>552 ulelong x fsid.blocks %d
>556 ulelong x fsid.files %d
>560 string >\0 name "%.16s"
+===== ======================= =======================
Hacker Notes
diff --git a/Documentation/filesystems/debugfs.txt b/Documentation/filesystems/debugfs.rst
index 55336a47a110..80f332b8eb68 100644
--- a/Documentation/filesystems/debugfs.txt
+++ b/Documentation/filesystems/debugfs.rst
@@ -1,4 +1,11 @@
-Copyright 2009 Jonathan Corbet <corbet@lwn.net>
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
+=======
+DebugFS
+=======
+
+Copyright |copy| 2009 Jonathan Corbet <corbet@lwn.net>
Debugfs exists as a simple way for kernel developers to make information
available to user space. Unlike /proc, which is only meant for information
@@ -6,11 +13,11 @@ about a process, or sysfs, which has strict one-value-per-file rules,
debugfs has no rules at all. Developers can put any information they want
there. The debugfs filesystem is also intended to not serve as a stable
ABI to user space; in theory, there are no stability constraints placed on
-files exported there. The real world is not always so simple, though [1];
+files exported there. The real world is not always so simple, though [1]_;
even debugfs interfaces are best designed with the idea that they will need
to be maintained forever.
-Debugfs is typically mounted with a command like:
+Debugfs is typically mounted with a command like::
mount -t debugfs none /sys/kernel/debug
@@ -23,7 +30,7 @@ Note that the debugfs API is exported GPL-only to modules.
Code using debugfs should include <linux/debugfs.h>. Then, the first order
of business will be to create at least one directory to hold a set of
-debugfs files:
+debugfs files::
struct dentry *debugfs_create_dir(const char *name, struct dentry *parent);
@@ -36,7 +43,7 @@ something went wrong. If ERR_PTR(-ENODEV) is returned, that is an
indication that the kernel has been built without debugfs support and none
of the functions described below will work.
-The most general way to create a file within a debugfs directory is with:
+The most general way to create a file within a debugfs directory is with::
struct dentry *debugfs_create_file(const char *name, umode_t mode,
struct dentry *parent, void *data,
@@ -53,7 +60,7 @@ ERR_PTR(-ERROR) on error, or ERR_PTR(-ENODEV) if debugfs support is
missing.
Create a file with an initial size, the following function can be used
-instead:
+instead::
struct dentry *debugfs_create_file_size(const char *name, umode_t mode,
struct dentry *parent, void *data,
@@ -66,7 +73,7 @@ as the function debugfs_create_file.
In a number of cases, the creation of a set of file operations is not
actually necessary; the debugfs code provides a number of helper functions
for simple situations. Files containing a single integer value can be
-created with any of:
+created with any of::
void debugfs_create_u8(const char *name, umode_t mode,
struct dentry *parent, u8 *value);
@@ -80,7 +87,7 @@ created with any of:
These files support both reading and writing the given value; if a specific
file should not be written to, simply set the mode bits accordingly. The
values in these files are in decimal; if hexadecimal is more appropriate,
-the following functions can be used instead:
+the following functions can be used instead::
void debugfs_create_x8(const char *name, umode_t mode,
struct dentry *parent, u8 *value);
@@ -94,7 +101,7 @@ the following functions can be used instead:
These functions are useful as long as the developer knows the size of the
value to be exported. Some types can have different widths on different
architectures, though, complicating the situation somewhat. There are
-functions meant to help out in such special cases:
+functions meant to help out in such special cases::
void debugfs_create_size_t(const char *name, umode_t mode,
struct dentry *parent, size_t *value);
@@ -103,7 +110,7 @@ As might be expected, this function will create a debugfs file to represent
a variable of type size_t.
Similarly, there are helpers for variables of type unsigned long, in decimal
-and hexadecimal:
+and hexadecimal::
struct dentry *debugfs_create_ulong(const char *name, umode_t mode,
struct dentry *parent,
@@ -111,7 +118,7 @@ and hexadecimal:
void debugfs_create_xul(const char *name, umode_t mode,
struct dentry *parent, unsigned long *value);
-Boolean values can be placed in debugfs with:
+Boolean values can be placed in debugfs with::
struct dentry *debugfs_create_bool(const char *name, umode_t mode,
struct dentry *parent, bool *value);
@@ -120,7 +127,7 @@ A read on the resulting file will yield either Y (for non-zero values) or
N, followed by a newline. If written to, it will accept either upper- or
lower-case values, or 1 or 0. Any other input will be silently ignored.
-Also, atomic_t values can be placed in debugfs with:
+Also, atomic_t values can be placed in debugfs with::
void debugfs_create_atomic_t(const char *name, umode_t mode,
struct dentry *parent, atomic_t *value)
@@ -129,7 +136,7 @@ A read of this file will get atomic_t values, and a write of this file
will set atomic_t values.
Another option is exporting a block of arbitrary binary data, with
-this structure and function:
+this structure and function::
struct debugfs_blob_wrapper {
void *data;
@@ -151,7 +158,7 @@ If you want to dump a block of registers (something that happens quite
often during development, even if little such code reaches mainline.
Debugfs offers two functions: one to make a registers-only file, and
another to insert a register block in the middle of another sequential
-file.
+file::
struct debugfs_reg32 {
char *name;
@@ -175,7 +182,7 @@ The "base" argument may be 0, but you may want to build the reg32 array
using __stringify, and a number of register names (macros) are actually
byte offsets over a base for the register block.
-If you want to dump an u32 array in debugfs, you can create file with:
+If you want to dump an u32 array in debugfs, you can create file with::
void debugfs_create_u32_array(const char *name, umode_t mode,
struct dentry *parent,
@@ -185,7 +192,7 @@ The "array" argument provides data, and the "elements" argument is
the number of elements in the array. Note: Once array is created its
size can not be changed.
-There is a helper function to create device related seq_file:
+There is a helper function to create device related seq_file::
struct dentry *debugfs_create_devm_seqfile(struct device *dev,
const char *name,
@@ -197,14 +204,14 @@ The "dev" argument is the device related to this debugfs file, and
the "read_fn" is a function pointer which to be called to print the
seq_file content.
-There are a couple of other directory-oriented helper functions:
+There are a couple of other directory-oriented helper functions::
- struct dentry *debugfs_rename(struct dentry *old_dir,
+ struct dentry *debugfs_rename(struct dentry *old_dir,
struct dentry *old_dentry,
- struct dentry *new_dir,
+ struct dentry *new_dir,
const char *new_name);
- struct dentry *debugfs_create_symlink(const char *name,
+ struct dentry *debugfs_create_symlink(const char *name,
struct dentry *parent,
const char *target);
@@ -219,7 +226,7 @@ module is unloaded without explicitly removing debugfs entries, the result
will be a lot of stale pointers and no end of highly antisocial behavior.
So all debugfs users - at least those which can be built as modules - must
be prepared to remove all files and directories they create there. A file
-can be removed with:
+can be removed with::
void debugfs_remove(struct dentry *dentry);
@@ -229,7 +236,7 @@ be removed.
Once upon a time, debugfs users were required to remember the dentry
pointer for every debugfs file they created so that all files could be
cleaned up. We live in more civilized times now, though, and debugfs users
-can call:
+can call::
void debugfs_remove_recursive(struct dentry *dentry);
@@ -237,5 +244,4 @@ If this function is passed a pointer for the dentry corresponding to the
top-level directory, the entire hierarchy below that directory will be
removed.
-Notes:
- [1] http://lwn.net/Articles/309298/
+.. [1] http://lwn.net/Articles/309298/
diff --git a/Documentation/filesystems/dlmfs.txt b/Documentation/filesystems/dlmfs.rst
index fcf4d509d118..68daaa7facf9 100644
--- a/Documentation/filesystems/dlmfs.txt
+++ b/Documentation/filesystems/dlmfs.rst
@@ -1,20 +1,25 @@
-dlmfs
-==================
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
+=====
+DLMFS
+=====
+
A minimal DLM userspace interface implemented via a virtual file
system.
dlmfs is built with OCFS2 as it requires most of its infrastructure.
-Project web page: http://ocfs2.wiki.kernel.org
-Tools web page: https://github.com/markfasheh/ocfs2-tools
-OCFS2 mailing lists: http://oss.oracle.com/projects/ocfs2/mailman/
+:Project web page: http://ocfs2.wiki.kernel.org
+:Tools web page: https://github.com/markfasheh/ocfs2-tools
+:OCFS2 mailing lists: http://oss.oracle.com/projects/ocfs2/mailman/
All code copyright 2005 Oracle except when otherwise noted.
-CREDITS
+Credits
=======
-Some code taken from ramfs which is Copyright (C) 2000 Linus Torvalds
+Some code taken from ramfs which is Copyright |copy| 2000 Linus Torvalds
and Transmeta Corp.
Mark Fasheh <mark.fasheh@oracle.com>
@@ -96,14 +101,19 @@ operation. If the lock succeeds, you'll get an fd.
open(2) with O_CREAT to ensure the resource inode is created - dlmfs does
not automatically create inodes for existing lock resources.
+============ ===========================
Open Flag Lock Request Type
---------- -----------------
+============ ===========================
O_RDONLY Shared Read
O_RDWR Exclusive
+============ ===========================
+
+============ ===========================
Open Flag Resulting Locking Behavior
---------- --------------------------
+============ ===========================
O_NONBLOCK Trylock operation
+============ ===========================
You must provide exactly one of O_RDONLY or O_RDWR.
diff --git a/Documentation/filesystems/ecryptfs.txt b/Documentation/filesystems/ecryptfs.rst
index 01d8a08351ac..1f2edef4c57a 100644
--- a/Documentation/filesystems/ecryptfs.txt
+++ b/Documentation/filesystems/ecryptfs.rst
@@ -1,14 +1,18 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+======================================================
eCryptfs: A stacked cryptographic filesystem for Linux
+======================================================
eCryptfs is free software. Please see the file COPYING for details.
For documentation, please see the files in the doc/ subdirectory. For
building and installation instructions please see the INSTALL file.
-Maintainer: Phillip Hellewell
-Lead developer: Michael A. Halcrow <mhalcrow@us.ibm.com>
-Developers: Michael C. Thompson
- Kent Yoder
-Web Site: http://ecryptfs.sf.net
+:Maintainer: Phillip Hellewell
+:Lead developer: Michael A. Halcrow <mhalcrow@us.ibm.com>
+:Developers: Michael C. Thompson
+ Kent Yoder
+:Web Site: http://ecryptfs.sf.net
This software is currently undergoing development. Make sure to
maintain a backup copy of any data you write into eCryptfs.
@@ -19,34 +23,36 @@ SourceForge site:
http://sourceforge.net/projects/ecryptfs/
Userspace requirements include:
- - David Howells' userspace keyring headers and libraries (version
- 1.0 or higher), obtainable from
- http://people.redhat.com/~dhowells/keyutils/
- - Libgcrypt
+
+- David Howells' userspace keyring headers and libraries (version
+ 1.0 or higher), obtainable from
+ http://people.redhat.com/~dhowells/keyutils/
+- Libgcrypt
-NOTES
+.. note::
-In the beta/experimental releases of eCryptfs, when you upgrade
-eCryptfs, you should copy the files to an unencrypted location and
-then copy the files back into the new eCryptfs mount to migrate the
-files.
+ In the beta/experimental releases of eCryptfs, when you upgrade
+ eCryptfs, you should copy the files to an unencrypted location and
+ then copy the files back into the new eCryptfs mount to migrate the
+ files.
-MOUNT-WIDE PASSPHRASE
+Mount-wide Passphrase
+=====================
Create a new directory into which eCryptfs will write its encrypted
files (i.e., /root/crypt). Then, create the mount point directory
-(i.e., /mnt/crypt). Now it's time to mount eCryptfs:
+(i.e., /mnt/crypt). Now it's time to mount eCryptfs::
-mount -t ecryptfs /root/crypt /mnt/crypt
+ mount -t ecryptfs /root/crypt /mnt/crypt
You should be prompted for a passphrase and a salt (the salt may be
blank).
-Try writing a new file:
+Try writing a new file::
-echo "Hello, World" > /mnt/crypt/hello.txt
+ echo "Hello, World" > /mnt/crypt/hello.txt
The operation will complete. Notice that there is a new file in
/root/crypt that is at least 12288 bytes in size (depending on your
@@ -59,10 +65,13 @@ keyctl clear @u
Then umount /mnt/crypt and mount again per the instructions given
above.
-cat /mnt/crypt/hello.txt
+::
+
+ cat /mnt/crypt/hello.txt
-NOTES
+Notes
+=====
eCryptfs version 0.1 should only be mounted on (1) empty directories
or (2) directories containing files only created by eCryptfs. If you
diff --git a/Documentation/filesystems/efivarfs.txt b/Documentation/filesystems/efivarfs.rst
index 686a64bba775..90ac65683e7e 100644
--- a/Documentation/filesystems/efivarfs.txt
+++ b/Documentation/filesystems/efivarfs.rst
@@ -1,5 +1,8 @@
+.. SPDX-License-Identifier: GPL-2.0
+=======================================
efivarfs - a (U)EFI variable filesystem
+=======================================
The efivarfs filesystem was created to address the shortcomings of
using entries in sysfs to maintain EFI variables. The old sysfs EFI
@@ -11,7 +14,7 @@ than a single page, sysfs isn't the best interface for this.
Variables can be created, deleted and modified with the efivarfs
filesystem.
-efivarfs is typically mounted like this,
+efivarfs is typically mounted like this::
mount -t efivarfs none /sys/firmware/efi/efivars
diff --git a/Documentation/filesystems/erofs.txt b/Documentation/filesystems/erofs.rst
index db6d39c3ae71..bf145171c2bf 100644
--- a/Documentation/filesystems/erofs.txt
+++ b/Documentation/filesystems/erofs.rst
@@ -1,3 +1,9 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+======================================
+Enhanced Read-Only File System - EROFS
+======================================
+
Overview
========
@@ -6,6 +12,7 @@ from other read-only file systems, it aims to be designed for flexibility,
scalability, but be kept simple and high performance.
It is designed as a better filesystem solution for the following scenarios:
+
- read-only storage media or
- part of a fully trusted read-only solution, which means it needs to be
@@ -17,6 +24,7 @@ It is designed as a better filesystem solution for the following scenarios:
for those embedded devices with limited memory (ex, smartphone);
Here is the main features of EROFS:
+
- Little endian on-disk design;
- Currently 4KB block size (nobh) and therefore maximum 16TB address space;
@@ -24,13 +32,17 @@ Here is the main features of EROFS:
- Metadata & data could be mixed by design;
- 2 inode versions for different requirements:
+
+ ===================== ============ =====================================
compact (v1) extended (v2)
- Inode metadata size: 32 bytes 64 bytes
- Max file size: 4 GB 16 EB (also limited by max. vol size)
- Max uids/gids: 65536 4294967296
- File change time: no yes (64 + 32-bit timestamp)
- Max hardlinks: 65536 4294967296
- Metadata reserved: 4 bytes 14 bytes
+ ===================== ============ =====================================
+ Inode metadata size 32 bytes 64 bytes
+ Max file size 4 GB 16 EB (also limited by max. vol size)
+ Max uids/gids 65536 4294967296
+ File change time no yes (64 + 32-bit timestamp)
+ Max hardlinks 65536 4294967296
+ Metadata reserved 4 bytes 14 bytes
+ ===================== ============ =====================================
- Support extended attributes (xattrs) as an option;
@@ -43,29 +55,36 @@ Here is the main features of EROFS:
The following git tree provides the file system user-space tools under
development (ex, formatting tool mkfs.erofs):
->> git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs-utils.git
+
+- git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs-utils.git
Bugs and patches are welcome, please kindly help us and send to the following
linux-erofs mailing list:
->> linux-erofs mailing list <linux-erofs@lists.ozlabs.org>
+
+- linux-erofs mailing list <linux-erofs@lists.ozlabs.org>
Mount options
=============
+=================== =========================================================
(no)user_xattr Setup Extended User Attributes. Note: xattr is enabled
by default if CONFIG_EROFS_FS_XATTR is selected.
(no)acl Setup POSIX Access Control List. Note: acl is enabled
by default if CONFIG_EROFS_FS_POSIX_ACL is selected.
cache_strategy=%s Select a strategy for cached decompression from now on:
- disabled: In-place I/O decompression only;
- readahead: Cache the last incomplete compressed physical
+
+ ========== =============================================
+ disabled In-place I/O decompression only;
+ readahead Cache the last incomplete compressed physical
cluster for further reading. It still does
in-place I/O decompression for the rest
compressed physical clusters;
- readaround: Cache the both ends of incomplete compressed
+ readaround Cache the both ends of incomplete compressed
physical clusters for further reading.
It still does in-place I/O decompression
for the rest compressed physical clusters.
+ ========== =============================================
+=================== =========================================================
On-disk details
===============
@@ -73,7 +92,7 @@ On-disk details
Summary
-------
Different from other read-only file systems, an EROFS volume is designed
-to be as simple as possible:
+to be as simple as possible::
|-> aligned with the block size
____________________________________________________________
@@ -83,41 +102,45 @@ to be as simple as possible:
All data areas should be aligned with the block size, but metadata areas
may not. All metadatas can be now observed in two different spaces (views):
+
1. Inode metadata space
+
Each valid inode should be aligned with an inode slot, which is a fixed
value (32 bytes) and designed to be kept in line with compact inode size.
Each inode can be directly found with the following formula:
inode offset = meta_blkaddr * block_size + 32 * nid
- |-> aligned with 8B
- |-> followed closely
- + meta_blkaddr blocks |-> another slot
- _____________________________________________________________________
- | ... | inode | xattrs | extents | data inline | ... | inode ...
- |________|_______|(optional)|(optional)|__(optional)_|_____|__________
- |-> aligned with the inode slot size
- . .
- . .
- . .
- . .
- . .
- . .
- .____________________________________________________|-> aligned with 4B
- | xattr_ibody_header | shared xattrs | inline xattrs |
- |____________________|_______________|_______________|
- |-> 12 bytes <-|->x * 4 bytes<-| .
- . . .
- . . .
- . . .
- ._______________________________.______________________.
- | id | id | id | id | ... | id | ent | ... | ent| ... |
- |____|____|____|____|______|____|_____|_____|____|_____|
- |-> aligned with 4B
- |-> aligned with 4B
+ ::
+
+ |-> aligned with 8B
+ |-> followed closely
+ + meta_blkaddr blocks |-> another slot
+ _____________________________________________________________________
+ | ... | inode | xattrs | extents | data inline | ... | inode ...
+ |________|_______|(optional)|(optional)|__(optional)_|_____|__________
+ |-> aligned with the inode slot size
+ . .
+ . .
+ . .
+ . .
+ . .
+ . .
+ .____________________________________________________|-> aligned with 4B
+ | xattr_ibody_header | shared xattrs | inline xattrs |
+ |____________________|_______________|_______________|
+ |-> 12 bytes <-|->x * 4 bytes<-| .
+ . . .
+ . . .
+ . . .
+ ._______________________________.______________________.
+ | id | id | id | id | ... | id | ent | ... | ent| ... |
+ |____|____|____|____|______|____|_____|_____|____|_____|
+ |-> aligned with 4B
+ |-> aligned with 4B
Inode could be 32 or 64 bytes, which can be distinguished from a common
- field which all inode versions have -- i_format:
+ field which all inode versions have -- i_format::
__________________ __________________
| i_format | | i_format |
@@ -132,16 +155,19 @@ may not. All metadatas can be now observed in two different spaces (views):
proper alignment, and they could be optional for different data mappings.
_currently_ total 4 valid data mappings are supported:
+ == ====================================================================
0 flat file data without data inline (no extent);
1 fixed-sized output data compression (with non-compacted indexes);
2 flat file data with tail packing data inline (no extent);
3 fixed-sized output data compression (with compacted indexes, v5.3+).
+ == ====================================================================
The size of the optional xattrs is indicated by i_xattr_count in inode
header. Large xattrs or xattrs shared by many different files can be
stored in shared xattrs metadata rather than inlined right after inode.
2. Shared xattrs metadata space
+
Shared xattrs space is similar to the above inode space, started with
a specific block indicated by xattr_blkaddr, organized one by one with
proper align.
@@ -149,11 +175,13 @@ may not. All metadatas can be now observed in two different spaces (views):
Each share xattr can also be directly found by the following formula:
xattr offset = xattr_blkaddr * block_size + 4 * xattr_id
- |-> aligned by 4 bytes
- + xattr_blkaddr blocks |-> aligned with 4 bytes
- _________________________________________________________________________
- | ... | xattr_entry | xattr data | ... | xattr_entry | xattr data ...
- |________|_____________|_____________|_____|______________|_______________
+ ::
+
+ |-> aligned by 4 bytes
+ + xattr_blkaddr blocks |-> aligned with 4 bytes
+ _________________________________________________________________________
+ | ... | xattr_entry | xattr data | ... | xattr_entry | xattr data ...
+ |________|_____________|_____________|_____|______________|_______________
Directories
-----------
@@ -163,19 +191,21 @@ random file lookup, and all directory entries are _strictly_ recorded in
alphabetical order in order to support improved prefix binary search
algorithm (could refer to the related source code).
- ___________________________
- / |
- / ______________|________________
- / / | nameoff1 | nameoffN-1
- ____________.______________._______________v________________v__________
-| dirent | dirent | ... | dirent | filename | filename | ... | filename |
-|___.0___|____1___|_____|___N-1__|____0_____|____1_____|_____|___N-1____|
- \ ^
- \ | * could have
- \ | trailing '\0'
- \________________________| nameoff0
+::
+
+ ___________________________
+ / |
+ / ______________|________________
+ / / | nameoff1 | nameoffN-1
+ ____________.______________._______________v________________v__________
+ | dirent | dirent | ... | dirent | filename | filename | ... | filename |
+ |___.0___|____1___|_____|___N-1__|____0_____|____1_____|_____|___N-1____|
+ \ ^
+ \ | * could have
+ \ | trailing '\0'
+ \________________________| nameoff0
- Directory block
+ Directory block
Note that apart from the offset of the first filename, nameoff0 also indicates
the total number of directory entries in this block since it is no need to
@@ -184,28 +214,27 @@ introduce another on-disk field at all.
Compression
-----------
Currently, EROFS supports 4KB fixed-sized output transparent file compression,
-as illustrated below:
-
- |---- Variant-Length Extent ----|-------- VLE --------|----- VLE -----
- clusterofs clusterofs clusterofs
- | | | logical data
-_________v_______________________________v_____________________v_______________
-... | . | | . | | . | ...
-____|____.________|_____________|________.____|_____________|__.__________|____
- |-> cluster <-|-> cluster <-|-> cluster <-|-> cluster <-|-> cluster <-|
- size size size size size
- . . . .
- . . . .
- . . . .
- _______._____________._____________._____________._____________________
- ... | | | | ... physical data
- _______|_____________|_____________|_____________|_____________________
- |-> cluster <-|-> cluster <-|-> cluster <-|
- size size size
+as illustrated below::
+
+ |---- Variant-Length Extent ----|-------- VLE --------|----- VLE -----
+ clusterofs clusterofs clusterofs
+ | | | logical data
+ _________v_______________________________v_____________________v_______________
+ ... | . | | . | | . | ...
+ ____|____.________|_____________|________.____|_____________|__.__________|____
+ |-> cluster <-|-> cluster <-|-> cluster <-|-> cluster <-|-> cluster <-|
+ size size size size size
+ . . . .
+ . . . .
+ . . . .
+ _______._____________._____________._____________._____________________
+ ... | | | | ... physical data
+ _______|_____________|_____________|_____________|_____________________
+ |-> cluster <-|-> cluster <-|-> cluster <-|
+ size size size
Currently each on-disk physical cluster can contain 4KB (un)compressed data
at most. For each logical cluster, there is a corresponding on-disk index to
describe its cluster type, physical cluster address, etc.
See "struct z_erofs_vle_decompressed_index" in erofs_fs.h for more details.
-
diff --git a/Documentation/filesystems/ext2.txt b/Documentation/filesystems/ext2.rst
index 94c2cf0292f5..d83dbbb162e2 100644
--- a/Documentation/filesystems/ext2.txt
+++ b/Documentation/filesystems/ext2.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
The Second Extended Filesystem
==============================
@@ -14,8 +16,9 @@ Options
Most defaults are determined by the filesystem superblock, and can be
set using tune2fs(8). Kernel-determined defaults are indicated by (*).
-bsddf (*) Makes `df' act like BSD.
-minixdf Makes `df' act like Minix.
+==================== === ================================================
+bsddf (*) Makes ``df`` act like BSD.
+minixdf Makes ``df`` act like Minix.
check=none, nocheck (*) Don't do extra checking of bitmaps on mount
(check=normal and check=strict options removed)
@@ -62,6 +65,7 @@ quota, usrquota Enable user disk quota support
grpquota Enable group disk quota support
(requires CONFIG_QUOTA).
+==================== === ================================================
noquota option ls silently ignored by ext2.
@@ -294,9 +298,9 @@ respective fsck programs.
If you're exceptionally paranoid, there are 3 ways of making metadata
writes synchronous on ext2:
-per-file if you have the program source: use the O_SYNC flag to open()
-per-file if you don't have the source: use "chattr +S" on the file
-per-filesystem: add the "sync" option to mount (or in /etc/fstab)
+- per-file if you have the program source: use the O_SYNC flag to open()
+- per-file if you don't have the source: use "chattr +S" on the file
+- per-filesystem: add the "sync" option to mount (or in /etc/fstab)
the first and last are not ext2 specific but do force the metadata to
be written synchronously. See also Journaling below.
@@ -316,10 +320,12 @@ Most of these limits could be overcome with slight changes in the on-disk
format and using a compatibility flag to signal the format change (at
the expense of some compatibility).
-Filesystem block size: 1kB 2kB 4kB 8kB
-
-File size limit: 16GB 256GB 2048GB 2048GB
-Filesystem size limit: 2047GB 8192GB 16384GB 32768GB
+===================== ======= ======= ======= ========
+Filesystem block size 1kB 2kB 4kB 8kB
+===================== ======= ======= ======= ========
+File size limit 16GB 256GB 2048GB 2048GB
+Filesystem size limit 2047GB 8192GB 16384GB 32768GB
+===================== ======= ======= ======= ========
There is a 2.4 kernel limit of 2048GB for a single block device, so no
filesystem larger than that can be created at this time. There is also
@@ -370,19 +376,24 @@ ext4 and journaling.
References
==========
+======================= ===============================================
The kernel source file:/usr/src/linux/fs/ext2/
e2fsprogs (e2fsck) http://e2fsprogs.sourceforge.net/
Design & Implementation http://e2fsprogs.sourceforge.net/ext2intro.html
Journaling (ext3) ftp://ftp.uk.linux.org/pub/linux/sct/fs/jfs/
Filesystem Resizing http://ext2resize.sourceforge.net/
-Compression (*) http://e2compr.sourceforge.net/
+Compression [1]_ http://e2compr.sourceforge.net/
+======================= ===============================================
Implementations for:
+
+======================= ===========================================================
Windows 95/98/NT/2000 http://www.chrysocome.net/explore2fs
-Windows 95 (*) http://www.yipton.net/content.html#FSDEXT2
-DOS client (*) ftp://metalab.unc.edu/pub/Linux/system/filesystems/ext2/
-OS/2 (+) ftp://metalab.unc.edu/pub/Linux/system/filesystems/ext2/
+Windows 95 [1]_ http://www.yipton.net/content.html#FSDEXT2
+DOS client [1]_ ftp://metalab.unc.edu/pub/Linux/system/filesystems/ext2/
+OS/2 [2]_ ftp://metalab.unc.edu/pub/Linux/system/filesystems/ext2/
RISC OS client http://www.esw-heim.tu-clausthal.de/~marco/smorbrod/IscaFS/
+======================= ===========================================================
-(*) no longer actively developed/supported (as of Apr 2001)
-(+) no longer actively developed/supported (as of Mar 2009)
+.. [1] no longer actively developed/supported (as of Apr 2001)
+.. [2] no longer actively developed/supported (as of Mar 2009)
diff --git a/Documentation/filesystems/ext3.txt b/Documentation/filesystems/ext3.rst
index 58758fbef9e0..c06cec3a8fdc 100644
--- a/Documentation/filesystems/ext3.txt
+++ b/Documentation/filesystems/ext3.rst
@@ -1,4 +1,6 @@
+.. SPDX-License-Identifier: GPL-2.0
+===============
Ext3 Filesystem
===============
diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.rst
index 4eb3e2ddd00e..d681203728d7 100644
--- a/Documentation/filesystems/f2fs.txt
+++ b/Documentation/filesystems/f2fs.rst
@@ -1,6 +1,8 @@
-================================================================================
+.. SPDX-License-Identifier: GPL-2.0
+
+==========================================
WHAT IS Flash-Friendly File System (F2FS)?
-================================================================================
+==========================================
NAND flash memory-based storage devices, such as SSD, eMMC, and SD cards, have
been equipped on a variety systems ranging from mobile to server systems. Since
@@ -20,14 +22,15 @@ layout, but also for selecting allocation and cleaning algorithms.
The following git tree provides the file system formatting tool (mkfs.f2fs),
a consistency checking tool (fsck.f2fs), and a debugging tool (dump.f2fs).
->> git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs-tools.git
+
+- git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs-tools.git
For reporting bugs and sending patches, please use the following mailing list:
->> linux-f2fs-devel@lists.sourceforge.net
-================================================================================
-BACKGROUND AND DESIGN ISSUES
-================================================================================
+- linux-f2fs-devel@lists.sourceforge.net
+
+Background and Design issues
+============================
Log-structured File System (LFS)
--------------------------------
@@ -61,6 +64,7 @@ needs to reclaim these obsolete blocks seamlessly to users. This job is called
as a cleaning process.
The process consists of three operations as follows.
+
1. A victim segment is selected through referencing segment usage table.
2. It loads parent index structures of all the data in the victim identified by
segment summary blocks.
@@ -71,9 +75,8 @@ This cleaning job may cause unexpected long delays, so the most important goal
is to hide the latencies to users. And also definitely, it should reduce the
amount of valid data to be moved, and move them quickly as well.
-================================================================================
-KEY FEATURES
-================================================================================
+Key Features
+============
Flash Awareness
---------------
@@ -94,10 +97,11 @@ Cleaning Overhead
- Support multi-head logs for static/dynamic hot and cold data separation
- Introduce adaptive logging for efficient block allocation
-================================================================================
-MOUNT OPTIONS
-================================================================================
+Mount Options
+=============
+
+====================== ============================================================
background_gc=%s Turn on/off cleaning operations, namely garbage
collection, triggered in background when I/O subsystem is
idle. If background_gc=on, it will turn on the garbage
@@ -167,7 +171,10 @@ fault_injection=%d Enable fault injection in all supported types with
fault_type=%d Support configuring fault injection type, should be
enabled with fault_injection option, fault type value
is shown below, it supports single or combined type.
+
+ =================== ===========
Type_Name Type_Value
+ =================== ===========
FAULT_KMALLOC 0x000000001
FAULT_KVMALLOC 0x000000002
FAULT_PAGE_ALLOC 0x000000004
@@ -183,6 +190,7 @@ fault_type=%d Support configuring fault injection type, should be
FAULT_CHECKPOINT 0x000001000
FAULT_DISCARD 0x000002000
FAULT_WRITE_IO 0x000004000
+ =================== ===========
mode=%s Control block allocation mode which supports "adaptive"
and "lfs". In "lfs" mode, there should be no random
writes towards main area.
@@ -219,7 +227,7 @@ fsync_mode=%s Control the policy of fsync. Currently supports "posix",
non-atomic files likewise "nobarrier" mount option.
test_dummy_encryption Enable dummy encryption, which provides a fake fscrypt
context. The fake fscrypt context is used by xfstests.
-checkpoint=%s[:%u[%]] Set to "disable" to turn off checkpointing. Set to "enable"
+checkpoint=%s[:%u[%]] Set to "disable" to turn off checkpointing. Set to "enable"
to reenable checkpointing. Is enabled by default. While
disabled, any unmounting or unexpected shutdowns will cause
the filesystem contents to appear as they did when the
@@ -246,22 +254,22 @@ compress_extension=%s Support adding specified extension, so that f2fs can enab
on compression extension list and enable compression on
these file by default rather than to enable it via ioctl.
For other files, we can still enable compression via ioctl.
+====================== ============================================================
-================================================================================
-DEBUGFS ENTRIES
-================================================================================
+Debugfs Entries
+===============
/sys/kernel/debug/f2fs/ contains information about all the partitions mounted as
f2fs. Each file shows the whole f2fs information.
/sys/kernel/debug/f2fs/status includes:
+
- major file system information managed by f2fs currently
- average SIT information about whole segments
- current memory footprint consumed by f2fs.
-================================================================================
-SYSFS ENTRIES
-================================================================================
+Sysfs Entries
+=============
Information about mounted f2fs file systems can be found in
/sys/fs/f2fs. Each mounted filesystem will have a directory in
@@ -271,22 +279,24 @@ The files in each per-device directory are shown in table below.
Files in /sys/fs/f2fs/<devname>
(see also Documentation/ABI/testing/sysfs-fs-f2fs)
-================================================================================
-USAGE
-================================================================================
+Usage
+=====
1. Download userland tools and compile them.
2. Skip, if f2fs was compiled statically inside kernel.
- Otherwise, insert the f2fs.ko module.
- # insmod f2fs.ko
+ Otherwise, insert the f2fs.ko module::
+
+ # insmod f2fs.ko
-3. Create a directory trying to mount
- # mkdir /mnt/f2fs
+3. Create a directory trying to mount::
-4. Format the block device, and then mount as f2fs
- # mkfs.f2fs -l label /dev/block_device
- # mount -t f2fs /dev/block_device /mnt/f2fs
+ # mkdir /mnt/f2fs
+
+4. Format the block device, and then mount as f2fs::
+
+ # mkfs.f2fs -l label /dev/block_device
+ # mount -t f2fs /dev/block_device /mnt/f2fs
mkfs.f2fs
---------
@@ -294,18 +304,26 @@ The mkfs.f2fs is for the use of formatting a partition as the f2fs filesystem,
which builds a basic on-disk layout.
The options consist of:
--l [label] : Give a volume label, up to 512 unicode name.
--a [0 or 1] : Split start location of each area for heap-based allocation.
- 1 is set by default, which performs this.
--o [int] : Set overprovision ratio in percent over volume size.
- 5 is set by default.
--s [int] : Set the number of segments per section.
- 1 is set by default.
--z [int] : Set the number of sections per zone.
- 1 is set by default.
--e [str] : Set basic extension list. e.g. "mp3,gif,mov"
--t [0 or 1] : Disable discard command or not.
- 1 is set by default, which conducts discard.
+
+=============== ===========================================================
+``-l [label]`` Give a volume label, up to 512 unicode name.
+``-a [0 or 1]`` Split start location of each area for heap-based allocation.
+
+ 1 is set by default, which performs this.
+``-o [int]`` Set overprovision ratio in percent over volume size.
+
+ 5 is set by default.
+``-s [int]`` Set the number of segments per section.
+
+ 1 is set by default.
+``-z [int]`` Set the number of sections per zone.
+
+ 1 is set by default.
+``-e [str]`` Set basic extension list. e.g. "mp3,gif,mov"
+``-t [0 or 1]`` Disable discard command or not.
+
+ 1 is set by default, which conducts discard.
+=============== ===========================================================
fsck.f2fs
---------
@@ -314,7 +332,8 @@ partition, which examines whether the filesystem metadata and user-made data
are cross-referenced correctly or not.
Note that, initial version of the tool does not fix any inconsistency.
-The options consist of:
+The options consist of::
+
-d debug level [default:0]
dump.f2fs
@@ -327,20 +346,21 @@ It shows on-disk inode information recognized by a given inode number, and is
able to dump all the SSA and SIT entries into predefined files, ./dump_ssa and
./dump_sit respectively.
-The options consist of:
+The options consist of::
+
-d debug level [default:0]
-i inode no (hex)
-s [SIT dump segno from #1~#2 (decimal), for all 0~-1]
-a [SSA dump segno from #1~#2 (decimal), for all 0~-1]
-Examples:
-# dump.f2fs -i [ino] /dev/sdx
-# dump.f2fs -s 0~-1 /dev/sdx (SIT dump)
-# dump.f2fs -a 0~-1 /dev/sdx (SSA dump)
+Examples::
+
+ # dump.f2fs -i [ino] /dev/sdx
+ # dump.f2fs -s 0~-1 /dev/sdx (SIT dump)
+ # dump.f2fs -a 0~-1 /dev/sdx (SSA dump)
-================================================================================
-DESIGN
-================================================================================
+Design
+======
On-disk Layout
--------------
@@ -351,7 +371,7 @@ consists of a set of sections. By default, section and zone sizes are set to one
segment size identically, but users can easily modify the sizes by mkfs.
F2FS splits the entire volume into six areas, and all the areas except superblock
-consists of multiple segments as described below.
+consists of multiple segments as described below::
align with the zone size <-|
|-> align with the segment size
@@ -373,28 +393,28 @@ consists of multiple segments as described below.
|__zone__|
- Superblock (SB)
- : It is located at the beginning of the partition, and there exist two copies
+ It is located at the beginning of the partition, and there exist two copies
to avoid file system crash. It contains basic partition information and some
default parameters of f2fs.
- Checkpoint (CP)
- : It contains file system information, bitmaps for valid NAT/SIT sets, orphan
+ It contains file system information, bitmaps for valid NAT/SIT sets, orphan
inode lists, and summary entries of current active segments.
- Segment Information Table (SIT)
- : It contains segment information such as valid block count and bitmap for the
+ It contains segment information such as valid block count and bitmap for the
validity of all the blocks.
- Node Address Table (NAT)
- : It is composed of a block address table for all the node blocks stored in
+ It is composed of a block address table for all the node blocks stored in
Main area.
- Segment Summary Area (SSA)
- : It contains summary entries which contains the owner information of all the
+ It contains summary entries which contains the owner information of all the
data and node blocks stored in Main area.
- Main Area
- : It contains file and directory data including their indices.
+ It contains file and directory data including their indices.
In order to avoid misalignment between file system and flash-based storage, F2FS
aligns the start block address of CP with the segment size. Also, it aligns the
@@ -414,7 +434,7 @@ One of them always indicates the last valid data, which is called as shadow copy
mechanism. In addition to CP, NAT and SIT also adopt the shadow copy mechanism.
For file system consistency, each CP points to which NAT and SIT copies are
-valid, as shown as below.
+valid, as shown as below::
+--------+----------+---------+
| CP | SIT | NAT |
@@ -438,7 +458,7 @@ indirect node. F2FS assigns 4KB to an inode block which contains 923 data block
indices, two direct node pointers, two indirect node pointers, and one double
indirect node pointer as described below. One direct node block contains 1018
data blocks, and one indirect node block contains also 1018 node blocks. Thus,
-one inode block (i.e., a file) covers:
+one inode block (i.e., a file) covers::
4KB * (923 + 2 * 1018 + 2 * 1018 * 1018 + 1018 * 1018 * 1018) := 3.94TB.
@@ -473,6 +493,8 @@ A dentry block consists of 214 dentry slots and file names. Therein a bitmap is
used to represent whether each dentry is valid or not. A dentry block occupies
4KB with the following composition.
+::
+
Dentry Block(4 K) = bitmap (27 bytes) + reserved (3 bytes) +
dentries(11 * 214 bytes) + file name (8 * 214 bytes)
@@ -498,23 +520,25 @@ F2FS implements multi-level hash tables for directory structure. Each level has
a hash table with dedicated number of hash buckets as shown below. Note that
"A(2B)" means a bucket includes 2 data blocks.
-----------------------
-A : bucket
-B : block
-N : MAX_DIR_HASH_DEPTH
-----------------------
+::
+
+ ----------------------
+ A : bucket
+ B : block
+ N : MAX_DIR_HASH_DEPTH
+ ----------------------
-level #0 | A(2B)
- |
-level #1 | A(2B) - A(2B)
- |
-level #2 | A(2B) - A(2B) - A(2B) - A(2B)
- . | . . . .
-level #N/2 | A(2B) - A(2B) - A(2B) - A(2B) - A(2B) - ... - A(2B)
- . | . . . .
-level #N | A(4B) - A(4B) - A(4B) - A(4B) - A(4B) - ... - A(4B)
+ level #0 | A(2B)
+ |
+ level #1 | A(2B) - A(2B)
+ |
+ level #2 | A(2B) - A(2B) - A(2B) - A(2B)
+ . | . . . .
+ level #N/2 | A(2B) - A(2B) - A(2B) - A(2B) - A(2B) - ... - A(2B)
+ . | . . . .
+ level #N | A(4B) - A(4B) - A(4B) - A(4B) - A(4B) - ... - A(4B)
-The number of blocks and buckets are determined by,
+The number of blocks and buckets are determined by::
,- 2, if n < MAX_DIR_HASH_DEPTH / 2,
# of blocks in level #n = |
@@ -532,7 +556,7 @@ dentry consisting of the file name and its inode number. If not found, F2FS
scans the next hash table in level #1. In this way, F2FS scans hash tables in
each levels incrementally from 1 to N. In each levels F2FS needs to scan only
one bucket determined by the following equation, which shows O(log(# of files))
-complexity.
+complexity::
bucket number to scan in level #n = (hash value) % (# of buckets in level #n)
@@ -540,7 +564,8 @@ In the case of file creation, F2FS finds empty consecutive slots that cover the
file name. F2FS searches the empty slots in the hash tables of whole levels from
1 to N in the same way as the lookup operation.
-The following figure shows an example of two cases holding children.
+The following figure shows an example of two cases holding children::
+
--------------> Dir <--------------
| |
child child
@@ -611,14 +636,15 @@ Write-hint Policy
2) whint_mode=user-based. F2FS tries to pass down hints given by
users.
+===================== ======================== ===================
User F2FS Block
----- ---- -----
+===================== ======================== ===================
META WRITE_LIFE_NOT_SET
HOT_NODE "
WARM_NODE "
COLD_NODE "
-*ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME
-*extension list " "
+ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME
+extension list " "
-- buffered io
WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME
@@ -635,11 +661,13 @@ WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET
WRITE_LIFE_NONE " WRITE_LIFE_NONE
WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM
WRITE_LIFE_LONG " WRITE_LIFE_LONG
+===================== ======================== ===================
3) whint_mode=fs-based. F2FS passes down hints with its policy.
+===================== ======================== ===================
User F2FS Block
----- ---- -----
+===================== ======================== ===================
META WRITE_LIFE_MEDIUM;
HOT_NODE WRITE_LIFE_NOT_SET
WARM_NODE "
@@ -662,6 +690,7 @@ WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET
WRITE_LIFE_NONE " WRITE_LIFE_NONE
WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM
WRITE_LIFE_LONG " WRITE_LIFE_LONG
+===================== ======================== ===================
Fallocate(2) Policy
-------------------
@@ -681,6 +710,7 @@ Allocating disk space
However, once F2FS receives ioctl(fd, F2FS_IOC_SET_PIN_FILE) in prior to
fallocate(fd, DEFAULT_MODE), it allocates on-disk blocks addressess having
zero or random data, which is useful to the below scenario where:
+
1. create(fd)
2. ioctl(fd, F2FS_IOC_SET_PIN_FILE)
3. fallocate(fd, 0, 0, size)
@@ -692,39 +722,41 @@ Compression implementation
--------------------------
- New term named cluster is defined as basic unit of compression, file can
-be divided into multiple clusters logically. One cluster includes 4 << n
-(n >= 0) logical pages, compression size is also cluster size, each of
-cluster can be compressed or not.
+ be divided into multiple clusters logically. One cluster includes 4 << n
+ (n >= 0) logical pages, compression size is also cluster size, each of
+ cluster can be compressed or not.
- In cluster metadata layout, one special block address is used to indicate
-cluster is compressed one or normal one, for compressed cluster, following
-metadata maps cluster to [1, 4 << n - 1] physical blocks, in where f2fs
-stores data including compress header and compressed data.
+ cluster is compressed one or normal one, for compressed cluster, following
+ metadata maps cluster to [1, 4 << n - 1] physical blocks, in where f2fs
+ stores data including compress header and compressed data.
- In order to eliminate write amplification during overwrite, F2FS only
-support compression on write-once file, data can be compressed only when
-all logical blocks in file are valid and cluster compress ratio is lower
-than specified threshold.
+ support compression on write-once file, data can be compressed only when
+ all logical blocks in file are valid and cluster compress ratio is lower
+ than specified threshold.
- To enable compression on regular inode, there are three ways:
-* chattr +c file
-* chattr +c dir; touch dir/file
-* mount w/ -o compress_extension=ext; touch file.ext
-
-Compress metadata layout:
- [Dnode Structure]
- +-----------------------------------------------+
- | cluster 1 | cluster 2 | ......... | cluster N |
- +-----------------------------------------------+
- . . . .
- . . . .
- . Compressed Cluster . . Normal Cluster .
-+----------+---------+---------+---------+ +---------+---------+---------+---------+
-|compr flag| block 1 | block 2 | block 3 | | block 1 | block 2 | block 3 | block 4 |
-+----------+---------+---------+---------+ +---------+---------+---------+---------+
- . .
- . .
- . .
- +-------------+-------------+----------+----------------------------+
- | data length | data chksum | reserved | compressed data |
- +-------------+-------------+----------+----------------------------+
+
+ * chattr +c file
+ * chattr +c dir; touch dir/file
+ * mount w/ -o compress_extension=ext; touch file.ext
+
+Compress metadata layout::
+
+ [Dnode Structure]
+ +-----------------------------------------------+
+ | cluster 1 | cluster 2 | ......... | cluster N |
+ +-----------------------------------------------+
+ . . . .
+ . . . .
+ . Compressed Cluster . . Normal Cluster .
+ +----------+---------+---------+---------+ +---------+---------+---------+---------+
+ |compr flag| block 1 | block 2 | block 3 | | block 1 | block 2 | block 3 | block 4 |
+ +----------+---------+---------+---------+ +---------+---------+---------+---------+
+ . .
+ . .
+ . .
+ +-------------+-------------+----------+----------------------------+
+ | data length | data chksum | reserved | compressed data |
+ +-------------+-------------+----------+----------------------------+
diff --git a/Documentation/filesystems/fuse.rst b/Documentation/filesystems/fuse.rst
index 8e455065ce9e..cd717f9bf940 100644
--- a/Documentation/filesystems/fuse.rst
+++ b/Documentation/filesystems/fuse.rst
@@ -1,7 +1,8 @@
.. SPDX-License-Identifier: GPL-2.0
-==============
+
+====
FUSE
-==============
+====
Definitions
===========
diff --git a/Documentation/filesystems/gfs2-uevents.txt b/Documentation/filesystems/gfs2-uevents.rst
index 19a19ebebc34..f162a2c76c69 100644
--- a/Documentation/filesystems/gfs2-uevents.txt
+++ b/Documentation/filesystems/gfs2-uevents.rst
@@ -1,14 +1,18 @@
- uevents and GFS2
- ==================
+.. SPDX-License-Identifier: GPL-2.0
+
+================
+uevents and GFS2
+================
During the lifetime of a GFS2 mount, a number of uevents are generated.
This document explains what the events are and what they are used
for (by gfs_controld in gfs2-utils).
A list of GFS2 uevents
------------------------
+======================
1. ADD
+------
The ADD event occurs at mount time. It will always be the first
uevent generated by the newly created filesystem. If the mount
@@ -21,6 +25,7 @@ with no journal assigned), and read-only (with journal assigned) status
of the filesystem respectively.
2. ONLINE
+---------
The ONLINE uevent is generated after a successful mount or remount. It
has the same environment variables as the ADD uevent. The ONLINE
@@ -29,6 +34,7 @@ RDONLY are a relatively recent addition (2.6.32-rc+) and will not
be generated by older kernels.
3. CHANGE
+---------
The CHANGE uevent is used in two places. One is when reporting the
successful mount of the filesystem by the first node (FIRSTMOUNT=Done).
@@ -52,6 +58,7 @@ cluster. For this reason the ONLINE uevent was used when adding a new
uevent for a successful mount or remount.
4. OFFLINE
+----------
The OFFLINE uevent is only generated due to filesystem errors and is used
as part of the "withdraw" mechanism. Currently this doesn't give any
@@ -59,6 +66,7 @@ information about what the error is, which is something that needs to
be fixed.
5. REMOVE
+---------
The REMOVE uevent is generated at the end of an unsuccessful mount
or at the end of a umount of the filesystem. All REMOVE uevents will
@@ -68,9 +76,10 @@ kobject subsystem.
Information common to all GFS2 uevents (uevent environment variables)
-----------------------------------------------------------------------
+=====================================================================
1. LOCKTABLE=
+--------------
The LOCKTABLE is a string, as supplied on the mount command
line (locktable=) or via fstab. It is used as a filesystem label
@@ -78,6 +87,7 @@ as well as providing the information for a lock_dlm mount to be
able to join the cluster.
2. LOCKPROTO=
+-------------
The LOCKPROTO is a string, and its value depends on what is set
on the mount command line, or via fstab. It will be either
@@ -85,12 +95,14 @@ lock_nolock or lock_dlm. In the future other lock managers
may be supported.
3. JOURNALID=
+-------------
If a journal is in use by the filesystem (journals are not
assigned for spectator mounts) then this will give the
numeric journal id in all GFS2 uevents.
4. UUID=
+--------
With recent versions of gfs2-utils, mkfs.gfs2 writes a UUID
into the filesystem superblock. If it exists, this will
diff --git a/Documentation/filesystems/gfs2.txt b/Documentation/filesystems/gfs2.rst
index cc4f2306609e..8d1ab589ce18 100644
--- a/Documentation/filesystems/gfs2.txt
+++ b/Documentation/filesystems/gfs2.rst
@@ -1,5 +1,8 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==================
Global File System
-------------------
+==================
https://fedorahosted.org/cluster/wiki/HomePage
@@ -14,16 +17,18 @@ on one machine show up immediately on all other machines in the cluster.
GFS uses interchangeable inter-node locking mechanisms, the currently
supported mechanisms are:
- lock_nolock -- allows gfs to be used as a local file system
+ lock_nolock
+ - allows gfs to be used as a local file system
- lock_dlm -- uses a distributed lock manager (dlm) for inter-node locking
- The dlm is found at linux/fs/dlm/
+ lock_dlm
+ - uses a distributed lock manager (dlm) for inter-node locking.
+ The dlm is found at linux/fs/dlm/
Lock_dlm depends on user space cluster management systems found
at the URL above.
To use gfs as a local file system, no external clustering systems are
-needed, simply:
+needed, simply::
$ mkfs -t gfs2 -p lock_nolock -j 1 /dev/block_device
$ mount -t gfs2 /dev/block_device /dir
@@ -37,9 +42,12 @@ GFS2 is not on-disk compatible with previous versions of GFS, but it
is pretty close.
The following man pages can be found at the URL above:
+
+ ============ =============================================
fsck.gfs2 to repair a filesystem
gfs2_grow to expand a filesystem online
gfs2_jadd to add journals to a filesystem online
tunegfs2 to manipulate, examine and tune a filesystem
- gfs2_convert to convert a gfs filesystem to gfs2 in-place
+ gfs2_convert to convert a gfs filesystem to gfs2 in-place
mkfs.gfs2 to make a filesystem
+ ============ =============================================
diff --git a/Documentation/filesystems/hfs.txt b/Documentation/filesystems/hfs.rst
index d096df6db07a..ab17a005e9b1 100644
--- a/Documentation/filesystems/hfs.txt
+++ b/Documentation/filesystems/hfs.rst
@@ -1,11 +1,16 @@
-Note: This filesystem doesn't have a maintainer.
+.. SPDX-License-Identifier: GPL-2.0
+==================================
Macintosh HFS Filesystem for Linux
==================================
-HFS stands for ``Hierarchical File System'' and is the filesystem used
+
+.. Note:: This filesystem doesn't have a maintainer.
+
+
+HFS stands for ``Hierarchical File System`` and is the filesystem used
by the Mac Plus and all later Macintosh models. Earlier Macintosh
-models used MFS (``Macintosh File System''), which is not supported,
+models used MFS (``Macintosh File System``), which is not supported,
MacOS 8.1 and newer support a filesystem called HFS+ that's similar to
HFS but is extended in various areas. Use the hfsplus filesystem driver
to access such filesystems from Linux.
@@ -49,25 +54,25 @@ Writing to HFS Filesystems
HFS is not a UNIX filesystem, thus it does not have the usual features you'd
expect:
- o You can't modify the set-uid, set-gid, sticky or executable bits or the uid
+ * You can't modify the set-uid, set-gid, sticky or executable bits or the uid
and gid of files.
- o You can't create hard- or symlinks, device files, sockets or FIFOs.
+ * You can't create hard- or symlinks, device files, sockets or FIFOs.
HFS does on the other have the concepts of multiple forks per file. These
non-standard forks are represented as hidden additional files in the normal
filesystems namespace which is kind of a cludge and makes the semantics for
the a little strange:
- o You can't create, delete or rename resource forks of files or the
+ * You can't create, delete or rename resource forks of files or the
Finder's metadata.
- o They are however created (with default values), deleted and renamed
+ * They are however created (with default values), deleted and renamed
along with the corresponding data fork or directory.
- o Copying files to a different filesystem will loose those attributes
+ * Copying files to a different filesystem will loose those attributes
that are essential for MacOS to work.
Creating HFS filesystems
-===================================
+========================
The hfsutils package from Robert Leslie contains a program called
hformat that can be used to create HFS filesystem. See
diff --git a/Documentation/filesystems/hfsplus.txt b/Documentation/filesystems/hfsplus.rst
index 59f7569fc9ed..f02f4f5fc020 100644
--- a/Documentation/filesystems/hfsplus.txt
+++ b/Documentation/filesystems/hfsplus.rst
@@ -1,4 +1,6 @@
+.. SPDX-License-Identifier: GPL-2.0
+======================================
Macintosh HFSPlus Filesystem for Linux
======================================
diff --git a/Documentation/filesystems/hpfs.txt b/Documentation/filesystems/hpfs.rst
index 74630bd504fb..0db152278572 100644
--- a/Documentation/filesystems/hpfs.txt
+++ b/Documentation/filesystems/hpfs.rst
@@ -1,13 +1,21 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+====================
Read/Write HPFS 2.09
+====================
+
1998-2004, Mikulas Patocka
-email: mikulas@artax.karlin.mff.cuni.cz
-homepage: http://artax.karlin.mff.cuni.cz/~mikulas/vyplody/hpfs/index-e.cgi
+:email: mikulas@artax.karlin.mff.cuni.cz
+:homepage: http://artax.karlin.mff.cuni.cz/~mikulas/vyplody/hpfs/index-e.cgi
-CREDITS:
+Credits
+=======
Chris Smith, 1993, original read-only HPFS, some code and hpfs structures file
is taken from it
+
Jacques Gelinas, MSDos mmap, Inspired by fs/nfs/mmap.c (Jon Tombs 15 Aug 1993)
+
Werner Almesberger, 1992, 1993, MSDos option parser & CR/LF conversion
Mount options
@@ -50,6 +58,7 @@ timeshift=(-)nnn (default 0)
File names
+==========
As in OS/2, filenames are case insensitive. However, shell thinks that names
are case sensitive, so for example when you create a file FOO, you can use
@@ -64,6 +73,7 @@ access it under names 'a.', 'a..', 'a . . . ' etc.
Extended attributes
+===================
On HPFS partitions, OS/2 can associate to each file a special information called
extended attributes. Extended attributes are pairs of (key,value) where key is
@@ -88,6 +98,7 @@ values doesn't work.
Symlinks
+========
You can do symlinks on HPFS partition, symlinks are achieved by setting extended
attribute named "SYMLINK" with symlink value. Like on ext2, you can chown and
@@ -101,6 +112,7 @@ to analyze or change OS2SYS.INI.
Codepages
+=========
HPFS can contain several uppercasing tables for several codepages and each
file has a pointer to codepage its name is in. However OS/2 was created in
@@ -128,6 +140,7 @@ this codepage - if you don't try to do what I described above :-)
Known bugs
+==========
HPFS386 on OS/2 server is not supported. HPFS386 installed on normal OS/2 client
should work. If you have OS/2 server, use only read-only mode. I don't know how
@@ -152,7 +165,8 @@ would result in directory tree splitting, that takes disk space. Workaround is
to delete other files that are leaf (probability that the file is non-leaf is
about 1/50) or to truncate file first to make some space.
You encounter this problem only if you have many directories so that
-preallocated directory band is full i.e.
+preallocated directory band is full i.e.::
+
number_of_directories / size_of_filesystem_in_mb > 4.
You can't delete open directories.
@@ -174,6 +188,7 @@ anybody know what does it mean?
What does "unbalanced tree" message mean?
+=========================================
Old versions of this driver created sometimes unbalanced dnode trees. OS/2
chkdsk doesn't scream if the tree is unbalanced (and sometimes creates
@@ -187,6 +202,7 @@ whole created by this driver, it is BUG - let me know about it.
Bugs in OS/2
+============
When you have two (or more) lost directories pointing each to other, chkdsk
locks up when repairing filesystem.
@@ -199,98 +215,139 @@ File names like "a .b" are marked as 'long' by OS/2 but chkdsk "corrects" it and
marks them as short (and writes "minor fs error corrected"). This bug is not in
HPFS386.
-Codepage bugs described above.
+Codepage bugs described above
+=============================
If you don't install fixpacks, there are many, many more...
History
+=======
+
+====== =========================================================================
+0.90 First public release
+0.91 Fixed bug that caused shooting to memory when write_inode was called on
+ open inode (rarely happened)
+0.92 Fixed a little memory leak in freeing directory inodes
+0.93 Fixed bug that locked up the machine when there were too many filenames
+ with first 15 characters same
+ Fixed write_file to zero file when writing behind file end
+0.94 Fixed a little memory leak when trying to delete busy file or directory
+0.95 Fixed a bug that i_hpfs_parent_dir was not updated when moving files
+1.90 First version for 2.1.1xx kernels
+1.91 Fixed a bug that chk_sectors failed when sectors were at the end of disk
+ Fixed a race-condition when write_inode is called while deleting file
+ Fixed a bug that could possibly happen (with very low probability) when
+ using 0xff in filenames.
+
+ Rewritten locking to avoid race-conditions
+
+ Mount option 'eas' now works
+
+ Fsync no longer returns error
+
+ Files beginning with '.' are marked hidden
+
+ Remount support added
+
+ Alloc is not so slow when filesystem becomes full
+
+ Atimes are no more updated because it slows down operation
+
+ Code cleanup (removed all commented debug prints)
+1.92 Corrected a bug when sync was called just before closing file
+1.93 Modified, so that it works with kernels >= 2.1.131, I don't know if it
+ works with previous versions
+
+ Fixed a possible problem with disks > 64G (but I don't have one, so I can't
+ test it)
+
+ Fixed a file overflow at 2G
+
+ Added new option 'timeshift'
+
+ Changed behaviour on HPFS386: It is now possible to operate on HPFS386 in
+ read-only mode
+
+ Fixed a bug that slowed down alloc and prevented allocating 100% space
+ (this bug was not destructive)
+1.94 Added workaround for one bug in Linux
+
+ Fixed one buffer leak
+
+ Fixed some incompatibilities with large extended attributes (but it's still
+ not 100% ok, I have no info on it and OS/2 doesn't want to create them)
+
+ Rewritten allocation
-0.90 First public release
-0.91 Fixed bug that caused shooting to memory when write_inode was called on
- open inode (rarely happened)
-0.92 Fixed a little memory leak in freeing directory inodes
-0.93 Fixed bug that locked up the machine when there were too many filenames
- with first 15 characters same
- Fixed write_file to zero file when writing behind file end
-0.94 Fixed a little memory leak when trying to delete busy file or directory
-0.95 Fixed a bug that i_hpfs_parent_dir was not updated when moving files
-1.90 First version for 2.1.1xx kernels
-1.91 Fixed a bug that chk_sectors failed when sectors were at the end of disk
- Fixed a race-condition when write_inode is called while deleting file
- Fixed a bug that could possibly happen (with very low probability) when
- using 0xff in filenames
- Rewritten locking to avoid race-conditions
- Mount option 'eas' now works
- Fsync no longer returns error
- Files beginning with '.' are marked hidden
- Remount support added
- Alloc is not so slow when filesystem becomes full
- Atimes are no more updated because it slows down operation
- Code cleanup (removed all commented debug prints)
-1.92 Corrected a bug when sync was called just before closing file
-1.93 Modified, so that it works with kernels >= 2.1.131, I don't know if it
- works with previous versions
- Fixed a possible problem with disks > 64G (but I don't have one, so I can't
- test it)
- Fixed a file overflow at 2G
- Added new option 'timeshift'
- Changed behaviour on HPFS386: It is now possible to operate on HPFS386 in
- read-only mode
- Fixed a bug that slowed down alloc and prevented allocating 100% space
- (this bug was not destructive)
-1.94 Added workaround for one bug in Linux
- Fixed one buffer leak
- Fixed some incompatibilities with large extended attributes (but it's still
- not 100% ok, I have no info on it and OS/2 doesn't want to create them)
- Rewritten allocation
- Fixed a bug with i_blocks (du sometimes didn't display correct values)
- Directories have no longer archive attribute set (some programs don't like
- it)
- Fixed a bug that it set badly one flag in large anode tree (it was not
- destructive)
-1.95 Fixed one buffer leak, that could happen on corrupted filesystem
- Fixed one bug in allocation in 1.94
-1.96 Added workaround for one bug in OS/2 (HPFS locked up, HPFS386 reported
- error sometimes when opening directories in PMSHELL)
- Fixed a possible bitmap race
- Fixed possible problem on large disks
- You can now delete open files
- Fixed a nondestructive race in rename
-1.97 Support for HPFS v3 (on large partitions)
- Fixed a bug that it didn't allow creation of files > 128M (it should be 2G)
+ Fixed a bug with i_blocks (du sometimes didn't display correct values)
+
+ Directories have no longer archive attribute set (some programs don't like
+ it)
+
+ Fixed a bug that it set badly one flag in large anode tree (it was not
+ destructive)
+1.95 Fixed one buffer leak, that could happen on corrupted filesystem
+
+ Fixed one bug in allocation in 1.94
+1.96 Added workaround for one bug in OS/2 (HPFS locked up, HPFS386 reported
+ error sometimes when opening directories in PMSHELL)
+
+ Fixed a possible bitmap race
+
+ Fixed possible problem on large disks
+
+ You can now delete open files
+
+ Fixed a nondestructive race in rename
+1.97 Support for HPFS v3 (on large partitions)
+
+ ZFixed a bug that it didn't allow creation of files > 128M
+ (it should be 2G)
1.97.1 Changed names of global symbols
+
Fixed a bug when chmoding or chowning root directory
-1.98 Fixed a deadlock when using old_readdir
- Better directory handling; workaround for "unbalanced tree" bug in OS/2
-1.99 Corrected a possible problem when there's not enough space while deleting
- file
- Now it tries to truncate the file if there's not enough space when deleting
- Removed a lot of redundant code
-2.00 Fixed a bug in rename (it was there since 1.96)
- Better anti-fragmentation strategy
-2.01 Fixed problem with directory listing over NFS
- Directory lseek now checks for proper parameters
- Fixed race-condition in buffer code - it is in all filesystems in Linux;
- when reading device (cat /dev/hda) while creating files on it, files
- could be damaged
-2.02 Workaround for bug in breada in Linux. breada could cause accesses beyond
- end of partition
-2.03 Char, block devices and pipes are correctly created
- Fixed non-crashing race in unlink (Alexander Viro)
- Now it works with Japanese version of OS/2
-2.04 Fixed error when ftruncate used to extend file
-2.05 Fixed crash when got mount parameters without =
- Fixed crash when allocation of anode failed due to full disk
- Fixed some crashes when block io or inode allocation failed
-2.06 Fixed some crash on corrupted disk structures
- Better allocation strategy
- Reschedule points added so that it doesn't lock CPU long time
- It should work in read-only mode on Warp Server
-2.07 More fixes for Warp Server. Now it really works
-2.08 Creating new files is not so slow on large disks
- An attempt to sync deleted file does not generate filesystem error
-2.09 Fixed error on extremely fragmented files
-
-
- vim: set textwidth=80:
+1.98 Fixed a deadlock when using old_readdir
+ Better directory handling; workaround for "unbalanced tree" bug in OS/2
+1.99 Corrected a possible problem when there's not enough space while deleting
+ file
+
+ Now it tries to truncate the file if there's not enough space when
+ deleting
+
+ Removed a lot of redundant code
+2.00 Fixed a bug in rename (it was there since 1.96)
+ Better anti-fragmentation strategy
+2.01 Fixed problem with directory listing over NFS
+
+ Directory lseek now checks for proper parameters
+
+ Fixed race-condition in buffer code - it is in all filesystems in Linux;
+ when reading device (cat /dev/hda) while creating files on it, files
+ could be damaged
+2.02 Workaround for bug in breada in Linux. breada could cause accesses beyond
+ end of partition
+2.03 Char, block devices and pipes are correctly created
+
+ Fixed non-crashing race in unlink (Alexander Viro)
+
+ Now it works with Japanese version of OS/2
+2.04 Fixed error when ftruncate used to extend file
+2.05 Fixed crash when got mount parameters without =
+
+ Fixed crash when allocation of anode failed due to full disk
+
+ Fixed some crashes when block io or inode allocation failed
+2.06 Fixed some crash on corrupted disk structures
+
+ Better allocation strategy
+
+ Reschedule points added so that it doesn't lock CPU long time
+
+ It should work in read-only mode on Warp Server
+2.07 More fixes for Warp Server. Now it really works
+2.08 Creating new files is not so slow on large disks
+
+ An attempt to sync deleted file does not generate filesystem error
+2.09 Fixed error on extremely fragmented files
+====== =========================================================================
diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst
index 386eaad008b2..e7b46dac7079 100644
--- a/Documentation/filesystems/index.rst
+++ b/Documentation/filesystems/index.rst
@@ -1,3 +1,5 @@
+.. _filesystems_index:
+
===============================
Filesystems in the Linux kernel
===============================
@@ -46,8 +48,53 @@ Documentation for filesystem implementations.
.. toctree::
:maxdepth: 2
+ 9p
+ adfs
+ affs
+ afs
autofs
+ autofs-mount-control
+ befs
+ bfs
+ btrfs
+ ceph
+ cramfs
+ debugfs
+ dlmfs
+ ecryptfs
+ efivarfs
+ erofs
+ ext2
+ ext3
+ f2fs
+ gfs2
+ gfs2-uevents
+ hfs
+ hfsplus
+ hpfs
fuse
+ inotify
+ isofs
+ nilfs2
+ nfs/index
+ ntfs
+ ocfs2
+ ocfs2-online-filecheck
+ omfs
+ orangefs
overlayfs
+ proc
+ qnx6
+ ramfs-rootfs-initramfs
+ relay
+ romfs
+ squashfs
+ sysfs
+ sysv-fs
+ tmpfs
+ ubifs
+ ubifs-authentication.rst
+ udf
virtiofs
vfat
+ zonefs
diff --git a/Documentation/filesystems/inotify.txt b/Documentation/filesystems/inotify.rst
index 51f61db787fb..7f7ef8af0e1e 100644
--- a/Documentation/filesystems/inotify.txt
+++ b/Documentation/filesystems/inotify.rst
@@ -1,27 +1,36 @@
- inotify
- a powerful yet simple file change notification system
+.. SPDX-License-Identifier: GPL-2.0
+
+===============================================================
+Inotify - A Powerful yet Simple File Change Notification System
+===============================================================
Document started 15 Mar 2005 by Robert Love <rml@novell.com>
+
Document updated 4 Jan 2015 by Zhang Zhen <zhenzhang.zhang@huawei.com>
- --Deleted obsoleted interface, just refer to manpages for user interface.
+
+ - Deleted obsoleted interface, just refer to manpages for user interface.
(i) Rationale
-Q: What is the design decision behind not tying the watch to the open fd of
+Q:
+ What is the design decision behind not tying the watch to the open fd of
the watched object?
-A: Watches are associated with an open inotify device, not an open file.
+A:
+ Watches are associated with an open inotify device, not an open file.
This solves the primary problem with dnotify: keeping the file open pins
the file and thus, worse, pins the mount. Dnotify is therefore infeasible
for use on a desktop system with removable media as the media cannot be
unmounted. Watching a file should not require that it be open.
-Q: What is the design decision behind using an-fd-per-instance as opposed to
+Q:
+ What is the design decision behind using an-fd-per-instance as opposed to
an fd-per-watch?
-A: An fd-per-watch quickly consumes more file descriptors than are allowed,
+A:
+ An fd-per-watch quickly consumes more file descriptors than are allowed,
more fd's than are feasible to manage, and more fd's than are optimally
select()-able. Yes, root can bump the per-process fd limit and yes, users
can use epoll, but requiring both is a silly and extraneous requirement.
@@ -29,8 +38,8 @@ A: An fd-per-watch quickly consumes more file descriptors than are allowed,
spaces is thus sensible. The current design is what user-space developers
want: Users initialize inotify, once, and add n watches, requiring but one
fd and no twiddling with fd limits. Initializing an inotify instance two
- thousand times is silly. If we can implement user-space's preferences
- cleanly--and we can, the idr layer makes stuff like this trivial--then we
+ thousand times is silly. If we can implement user-space's preferences
+ cleanly--and we can, the idr layer makes stuff like this trivial--then we
should.
There are other good arguments. With a single fd, there is a single
@@ -65,9 +74,11 @@ A: An fd-per-watch quickly consumes more file descriptors than are allowed,
need not be a one-fd-per-process mapping; it is one-fd-per-queue and a
process can easily want more than one queue.
-Q: Why the system call approach?
+Q:
+ Why the system call approach?
-A: The poor user-space interface is the second biggest problem with dnotify.
+A:
+ The poor user-space interface is the second biggest problem with dnotify.
Signals are a terrible, terrible interface for file notification. Or for
anything, for that matter. The ideal solution, from all perspectives, is a
file descriptor-based one that allows basic file I/O and poll/select.
diff --git a/Documentation/filesystems/isofs.rst b/Documentation/filesystems/isofs.rst
new file mode 100644
index 000000000000..08fd469091d4
--- /dev/null
+++ b/Documentation/filesystems/isofs.rst
@@ -0,0 +1,64 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==================
+ISO9660 Filesystem
+==================
+
+Mount options that are the same as for msdos and vfat partitions.
+
+ ========= ========================================================
+ gid=nnn All files in the partition will be in group nnn.
+ uid=nnn All files in the partition will be owned by user id nnn.
+ umask=nnn The permission mask (see umask(1)) for the partition.
+ ========= ========================================================
+
+Mount options that are the same as vfat partitions. These are only useful
+when using discs encoded using Microsoft's Joliet extensions.
+
+ ============== =============================================================
+ iocharset=name Character set to use for converting from Unicode to
+ ASCII. Joliet filenames are stored in Unicode format, but
+ Unix for the most part doesn't know how to deal with Unicode.
+ There is also an option of doing UTF-8 translations with the
+ utf8 option.
+ utf8 Encode Unicode names in UTF-8 format. Default is no.
+ ============== =============================================================
+
+Mount options unique to the isofs filesystem.
+
+ ================= ============================================================
+ block=512 Set the block size for the disk to 512 bytes
+ block=1024 Set the block size for the disk to 1024 bytes
+ block=2048 Set the block size for the disk to 2048 bytes
+ check=relaxed Matches filenames with different cases
+ check=strict Matches only filenames with the exact same case
+ cruft Try to handle badly formatted CDs.
+ map=off Do not map non-Rock Ridge filenames to lower case
+ map=normal Map non-Rock Ridge filenames to lower case
+ map=acorn As map=normal but also apply Acorn extensions if present
+ mode=xxx Sets the permissions on files to xxx unless Rock Ridge
+ extensions set the permissions otherwise
+ dmode=xxx Sets the permissions on directories to xxx unless Rock Ridge
+ extensions set the permissions otherwise
+ overriderockperm Set permissions on files and directories according to
+ 'mode' and 'dmode' even though Rock Ridge extensions are
+ present.
+ nojoliet Ignore Joliet extensions if they are present.
+ norock Ignore Rock Ridge extensions if they are present.
+ hide Completely strip hidden files from the file system.
+ showassoc Show files marked with the 'associated' bit
+ unhide Deprecated; showing hidden files is now default;
+ If given, it is a synonym for 'showassoc' which will
+ recreate previous unhide behavior
+ session=x Select number of session on multisession CD
+ sbsector=xxx Session begins from sector xxx
+ ================= ============================================================
+
+Recommended documents about ISO 9660 standard are located at:
+
+- http://www.y-adagio.com/
+- ftp://ftp.ecma.ch/ecma-st/Ecma-119.pdf
+
+Quoting from the PDF "This 2nd Edition of Standard ECMA-119 is technically
+identical with ISO 9660.", so it is a valid and gratis substitute of the
+official ISO specification.
diff --git a/Documentation/filesystems/isofs.txt b/Documentation/filesystems/isofs.txt
deleted file mode 100644
index ba0a93384de0..000000000000
--- a/Documentation/filesystems/isofs.txt
+++ /dev/null
@@ -1,48 +0,0 @@
-Mount options that are the same as for msdos and vfat partitions.
-
- gid=nnn All files in the partition will be in group nnn.
- uid=nnn All files in the partition will be owned by user id nnn.
- umask=nnn The permission mask (see umask(1)) for the partition.
-
-Mount options that are the same as vfat partitions. These are only useful
-when using discs encoded using Microsoft's Joliet extensions.
- iocharset=name Character set to use for converting from Unicode to
- ASCII. Joliet filenames are stored in Unicode format, but
- Unix for the most part doesn't know how to deal with Unicode.
- There is also an option of doing UTF-8 translations with the
- utf8 option.
- utf8 Encode Unicode names in UTF-8 format. Default is no.
-
-Mount options unique to the isofs filesystem.
- block=512 Set the block size for the disk to 512 bytes
- block=1024 Set the block size for the disk to 1024 bytes
- block=2048 Set the block size for the disk to 2048 bytes
- check=relaxed Matches filenames with different cases
- check=strict Matches only filenames with the exact same case
- cruft Try to handle badly formatted CDs.
- map=off Do not map non-Rock Ridge filenames to lower case
- map=normal Map non-Rock Ridge filenames to lower case
- map=acorn As map=normal but also apply Acorn extensions if present
- mode=xxx Sets the permissions on files to xxx unless Rock Ridge
- extensions set the permissions otherwise
- dmode=xxx Sets the permissions on directories to xxx unless Rock Ridge
- extensions set the permissions otherwise
- overriderockperm Set permissions on files and directories according to
- 'mode' and 'dmode' even though Rock Ridge extensions are
- present.
- nojoliet Ignore Joliet extensions if they are present.
- norock Ignore Rock Ridge extensions if they are present.
- hide Completely strip hidden files from the file system.
- showassoc Show files marked with the 'associated' bit
- unhide Deprecated; showing hidden files is now default;
- If given, it is a synonym for 'showassoc' which will
- recreate previous unhide behavior
- session=x Select number of session on multisession CD
- sbsector=xxx Session begins from sector xxx
-
-Recommended documents about ISO 9660 standard are located at:
-http://www.y-adagio.com/
-ftp://ftp.ecma.ch/ecma-st/Ecma-119.pdf
-Quoting from the PDF "This 2nd Edition of Standard ECMA-119 is technically
-identical with ISO 9660.", so it is a valid and gratis substitute of the
-official ISO specification.
diff --git a/Documentation/filesystems/nfs/index.rst b/Documentation/filesystems/nfs/index.rst
new file mode 100644
index 000000000000..65805624e39b
--- /dev/null
+++ b/Documentation/filesystems/nfs/index.rst
@@ -0,0 +1,13 @@
+===============================
+NFS
+===============================
+
+
+.. toctree::
+ :maxdepth: 1
+
+ pnfs
+ rpc-cache
+ rpc-server-gss
+ nfs41-server
+ knfsd-stats
diff --git a/Documentation/filesystems/nfs/knfsd-stats.txt b/Documentation/filesystems/nfs/knfsd-stats.rst
index 1a5d82180b84..80bcf13550de 100644
--- a/Documentation/filesystems/nfs/knfsd-stats.txt
+++ b/Documentation/filesystems/nfs/knfsd-stats.rst
@@ -1,7 +1,9 @@
-
+============================
Kernel NFS Server Statistics
============================
+:Authors: Greg Banks <gnb@sgi.com> - 26 Mar 2009
+
This document describes the format and semantics of the statistics
which the kernel NFS server makes available to userspace. These
statistics are available in several text form pseudo files, each of
@@ -18,7 +20,7 @@ by parsing routines. All other lines contain a sequence of fields
separated by whitespace.
/proc/fs/nfsd/pool_stats
-------------------------
+========================
This file is available in kernels from 2.6.30 onwards, if the
/proc/fs/nfsd filesystem is mounted (it almost always should be).
@@ -109,15 +111,12 @@ this case), or the transport can be enqueued for later attention
(sockets-enqueued counts this case), or the packet can be temporarily
deferred because the transport is currently being used by an nfsd
thread. This last case is not very interesting and is not explicitly
-counted, but can be inferred from the other counters thus:
+counted, but can be inferred from the other counters thus::
-packets-deferred = packets-arrived - ( sockets-enqueued + threads-woken )
+ packets-deferred = packets-arrived - ( sockets-enqueued + threads-woken )
More
-----
-Descriptions of the other statistics file should go here.
-
+====
-Greg Banks <gnb@sgi.com>
-26 Mar 2009
+Descriptions of the other statistics file should go here.
diff --git a/Documentation/filesystems/nfs/nfs41-server.rst b/Documentation/filesystems/nfs/nfs41-server.rst
new file mode 100644
index 000000000000..16b5f02f81c3
--- /dev/null
+++ b/Documentation/filesystems/nfs/nfs41-server.rst
@@ -0,0 +1,256 @@
+=============================
+NFSv4.1 Server Implementation
+=============================
+
+Server support for minorversion 1 can be controlled using the
+/proc/fs/nfsd/versions control file. The string output returned
+by reading this file will contain either "+4.1" or "-4.1"
+correspondingly.
+
+Currently, server support for minorversion 1 is enabled by default.
+It can be disabled at run time by writing the string "-4.1" to
+the /proc/fs/nfsd/versions control file. Note that to write this
+control file, the nfsd service must be taken down. You can use rpc.nfsd
+for this; see rpc.nfsd(8).
+
+(Warning: older servers will interpret "+4.1" and "-4.1" as "+4" and
+"-4", respectively. Therefore, code meant to work on both new and old
+kernels must turn 4.1 on or off *before* turning support for version 4
+on or off; rpc.nfsd does this correctly.)
+
+The NFSv4 minorversion 1 (NFSv4.1) implementation in nfsd is based
+on RFC 5661.
+
+From the many new features in NFSv4.1 the current implementation
+focuses on the mandatory-to-implement NFSv4.1 Sessions, providing
+"exactly once" semantics and better control and throttling of the
+resources allocated for each client.
+
+The table below, taken from the NFSv4.1 document, lists
+the operations that are mandatory to implement (REQ), optional
+(OPT), and NFSv4.0 operations that are required not to implement (MNI)
+in minor version 1. The first column indicates the operations that
+are not supported yet by the linux server implementation.
+
+The OPTIONAL features identified and their abbreviations are as follows:
+
+- **pNFS** Parallel NFS
+- **FDELG** File Delegations
+- **DDELG** Directory Delegations
+
+The following abbreviations indicate the linux server implementation status.
+
+- **I** Implemented NFSv4.1 operations.
+- **NS** Not Supported.
+- **NS\*** Unimplemented optional feature.
+
+Operations
+==========
+
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| Implementation status | Operation | REQ,REC, OPT or NMI | Feature (REQ, REC or OPT) | Definition |
++=======================+======================+=====================+===========================+================+
+| | ACCESS | REQ | | Section 18.1 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| I | BACKCHANNEL_CTL | REQ | | Section 18.33 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| I | BIND_CONN_TO_SESSION | REQ | | Section 18.34 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | CLOSE | REQ | | Section 18.2 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | COMMIT | REQ | | Section 18.3 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | CREATE | REQ | | Section 18.4 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| I | CREATE_SESSION | REQ | | Section 18.36 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| NS* | DELEGPURGE | OPT | FDELG (REQ) | Section 18.5 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | DELEGRETURN | OPT | FDELG, | Section 18.6 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | | | DDELG, pNFS | |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | | | (REQ) | |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| I | DESTROY_CLIENTID | REQ | | Section 18.50 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| I | DESTROY_SESSION | REQ | | Section 18.37 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| I | EXCHANGE_ID | REQ | | Section 18.35 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| I | FREE_STATEID | REQ | | Section 18.38 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | GETATTR | REQ | | Section 18.7 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| I | GETDEVICEINFO | OPT | pNFS (REQ) | Section 18.40 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| NS* | GETDEVICELIST | OPT | pNFS (OPT) | Section 18.41 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | GETFH | REQ | | Section 18.8 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| NS* | GET_DIR_DELEGATION | OPT | DDELG (REQ) | Section 18.39 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| I | LAYOUTCOMMIT | OPT | pNFS (REQ) | Section 18.42 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| I | LAYOUTGET | OPT | pNFS (REQ) | Section 18.43 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| I | LAYOUTRETURN | OPT | pNFS (REQ) | Section 18.44 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | LINK | OPT | | Section 18.9 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | LOCK | REQ | | Section 18.10 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | LOCKT | REQ | | Section 18.11 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | LOCKU | REQ | | Section 18.12 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | LOOKUP | REQ | | Section 18.13 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | LOOKUPP | REQ | | Section 18.14 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | NVERIFY | REQ | | Section 18.15 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | OPEN | REQ | | Section 18.16 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| NS* | OPENATTR | OPT | | Section 18.17 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | OPEN_CONFIRM | MNI | | N/A |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | OPEN_DOWNGRADE | REQ | | Section 18.18 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | PUTFH | REQ | | Section 18.19 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | PUTPUBFH | REQ | | Section 18.20 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | PUTROOTFH | REQ | | Section 18.21 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | READ | REQ | | Section 18.22 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | READDIR | REQ | | Section 18.23 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | READLINK | OPT | | Section 18.24 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | RECLAIM_COMPLETE | REQ | | Section 18.51 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | RELEASE_LOCKOWNER | MNI | | N/A |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | REMOVE | REQ | | Section 18.25 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | RENAME | REQ | | Section 18.26 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | RENEW | MNI | | N/A |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | RESTOREFH | REQ | | Section 18.27 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | SAVEFH | REQ | | Section 18.28 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | SECINFO | REQ | | Section 18.29 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| I | SECINFO_NO_NAME | REC | pNFS files | Section 18.45, |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | | | layout (REQ) | Section 13.12 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| I | SEQUENCE | REQ | | Section 18.46 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | SETATTR | REQ | | Section 18.30 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | SETCLIENTID | MNI | | N/A |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | SETCLIENTID_CONFIRM | MNI | | N/A |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| NS | SET_SSV | REQ | | Section 18.47 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| I | TEST_STATEID | REQ | | Section 18.48 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | VERIFY | REQ | | Section 18.31 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| NS* | WANT_DELEGATION | OPT | FDELG (OPT) | Section 18.49 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+| | WRITE | REQ | | Section 18.32 |
++-----------------------+----------------------+---------------------+---------------------------+----------------+
+
+
+Callback Operations
+===================
++-----------------------+-------------------------+---------------------+---------------------------+---------------+
+| Implementation status | Operation | REQ,REC, OPT or NMI | Feature (REQ, REC or OPT) | Definition |
++=======================+=========================+=====================+===========================+===============+
+| | CB_GETATTR | OPT | FDELG (REQ) | Section 20.1 |
++-----------------------+-------------------------+---------------------+---------------------------+---------------+
+| I | CB_LAYOUTRECALL | OPT | pNFS (REQ) | Section 20.3 |
++-----------------------+-------------------------+---------------------+---------------------------+---------------+
+| NS* | CB_NOTIFY | OPT | DDELG (REQ) | Section 20.4 |
++-----------------------+-------------------------+---------------------+---------------------------+---------------+
+| NS* | CB_NOTIFY_DEVICEID | OPT | pNFS (OPT) | Section 20.12 |
++-----------------------+-------------------------+---------------------+---------------------------+---------------+
+| NS* | CB_NOTIFY_LOCK | OPT | | Section 20.11 |
++-----------------------+-------------------------+---------------------+---------------------------+---------------+
+| NS* | CB_PUSH_DELEG | OPT | FDELG (OPT) | Section 20.5 |
++-----------------------+-------------------------+---------------------+---------------------------+---------------+
+| | CB_RECALL | OPT | FDELG, | Section 20.2 |
++-----------------------+-------------------------+---------------------+---------------------------+---------------+
+| | | | DDELG, pNFS | |
++-----------------------+-------------------------+---------------------+---------------------------+---------------+
+| | | | (REQ) | |
++-----------------------+-------------------------+---------------------+---------------------------+---------------+
+| NS* | CB_RECALL_ANY | OPT | FDELG, | Section 20.6 |
++-----------------------+-------------------------+---------------------+---------------------------+---------------+
+| | | | DDELG, pNFS | |
++-----------------------+-------------------------+---------------------+---------------------------+---------------+
+| | | | (REQ) | |
++-----------------------+-------------------------+---------------------+---------------------------+---------------+
+| NS | CB_RECALL_SLOT | REQ | | Section 20.8 |
++-----------------------+-------------------------+---------------------+---------------------------+---------------+
+| NS* | CB_RECALLABLE_OBJ_AVAIL | OPT | DDELG, pNFS | Section 20.7 |
++-----------------------+-------------------------+---------------------+---------------------------+---------------+
+| | | | (REQ) | |
++-----------------------+-------------------------+---------------------+---------------------------+---------------+
+| I | CB_SEQUENCE | OPT | FDELG, | Section 20.9 |
++-----------------------+-------------------------+---------------------+---------------------------+---------------+
+| | | | DDELG, pNFS | |
++-----------------------+-------------------------+---------------------+---------------------------+---------------+
+| | | | (REQ) | |
++-----------------------+-------------------------+---------------------+---------------------------+---------------+
+| NS* | CB_WANTS_CANCELLED | OPT | FDELG, | Section 20.10 |
++-----------------------+-------------------------+---------------------+---------------------------+---------------+
+| | | | DDELG, pNFS | |
++-----------------------+-------------------------+---------------------+---------------------------+---------------+
+| | | | (REQ) | |
++-----------------------+-------------------------+---------------------+---------------------------+---------------+
+
+
+Implementation notes:
+=====================
+
+SSV:
+ The spec claims this is mandatory, but we don't actually know of any
+ implementations, so we're ignoring it for now. The server returns
+ NFS4ERR_ENCR_ALG_UNSUPP on EXCHANGE_ID, which should be future-proof.
+
+GSS on the backchannel:
+ Again, theoretically required but not widely implemented (in
+ particular, the current Linux client doesn't request it). We return
+ NFS4ERR_ENCR_ALG_UNSUPP on CREATE_SESSION.
+
+DELEGPURGE:
+ mandatory only for servers that support CLAIM_DELEGATE_PREV and/or
+ CLAIM_DELEG_PREV_FH (which allows clients to keep delegations that
+ persist across client reboots). Thus we need not implement this for
+ now.
+
+EXCHANGE_ID:
+ implementation ids are ignored
+
+CREATE_SESSION:
+ backchannel attributes are ignored
+
+SEQUENCE:
+ no support for dynamic slot table renegotiation (optional)
+
+Nonstandard compound limitations:
+ No support for a sessions fore channel RPC compound that requires both a
+ ca_maxrequestsize request and a ca_maxresponsesize reply, so we may
+ fail to live up to the promise we made in CREATE_SESSION fore channel
+ negotiation.
+
+See also http://wiki.linux-nfs.org/wiki/index.php/Server_4.0_and_4.1_issues.
diff --git a/Documentation/filesystems/nfs/nfs41-server.txt b/Documentation/filesystems/nfs/nfs41-server.txt
deleted file mode 100644
index 682a59fabe3f..000000000000
--- a/Documentation/filesystems/nfs/nfs41-server.txt
+++ /dev/null
@@ -1,173 +0,0 @@
-NFSv4.1 Server Implementation
-
-Server support for minorversion 1 can be controlled using the
-/proc/fs/nfsd/versions control file. The string output returned
-by reading this file will contain either "+4.1" or "-4.1"
-correspondingly.
-
-Currently, server support for minorversion 1 is enabled by default.
-It can be disabled at run time by writing the string "-4.1" to
-the /proc/fs/nfsd/versions control file. Note that to write this
-control file, the nfsd service must be taken down. You can use rpc.nfsd
-for this; see rpc.nfsd(8).
-
-(Warning: older servers will interpret "+4.1" and "-4.1" as "+4" and
-"-4", respectively. Therefore, code meant to work on both new and old
-kernels must turn 4.1 on or off *before* turning support for version 4
-on or off; rpc.nfsd does this correctly.)
-
-The NFSv4 minorversion 1 (NFSv4.1) implementation in nfsd is based
-on RFC 5661.
-
-From the many new features in NFSv4.1 the current implementation
-focuses on the mandatory-to-implement NFSv4.1 Sessions, providing
-"exactly once" semantics and better control and throttling of the
-resources allocated for each client.
-
-The table below, taken from the NFSv4.1 document, lists
-the operations that are mandatory to implement (REQ), optional
-(OPT), and NFSv4.0 operations that are required not to implement (MNI)
-in minor version 1. The first column indicates the operations that
-are not supported yet by the linux server implementation.
-
-The OPTIONAL features identified and their abbreviations are as follows:
- pNFS Parallel NFS
- FDELG File Delegations
- DDELG Directory Delegations
-
-The following abbreviations indicate the linux server implementation status.
- I Implemented NFSv4.1 operations.
- NS Not Supported.
- NS* Unimplemented optional feature.
-
-Operations
-
- +----------------------+------------+--------------+----------------+
- | Operation | REQ, REC, | Feature | Definition |
- | | OPT, or | (REQ, REC, | |
- | | MNI | or OPT) | |
- +----------------------+------------+--------------+----------------+
- | ACCESS | REQ | | Section 18.1 |
-I | BACKCHANNEL_CTL | REQ | | Section 18.33 |
-I | BIND_CONN_TO_SESSION | REQ | | Section 18.34 |
- | CLOSE | REQ | | Section 18.2 |
- | COMMIT | REQ | | Section 18.3 |
- | CREATE | REQ | | Section 18.4 |
-I | CREATE_SESSION | REQ | | Section 18.36 |
-NS*| DELEGPURGE | OPT | FDELG (REQ) | Section 18.5 |
- | DELEGRETURN | OPT | FDELG, | Section 18.6 |
- | | | DDELG, pNFS | |
- | | | (REQ) | |
-I | DESTROY_CLIENTID | REQ | | Section 18.50 |
-I | DESTROY_SESSION | REQ | | Section 18.37 |
-I | EXCHANGE_ID | REQ | | Section 18.35 |
-I | FREE_STATEID | REQ | | Section 18.38 |
- | GETATTR | REQ | | Section 18.7 |
-I | GETDEVICEINFO | OPT | pNFS (REQ) | Section 18.40 |
-NS*| GETDEVICELIST | OPT | pNFS (OPT) | Section 18.41 |
- | GETFH | REQ | | Section 18.8 |
-NS*| GET_DIR_DELEGATION | OPT | DDELG (REQ) | Section 18.39 |
-I | LAYOUTCOMMIT | OPT | pNFS (REQ) | Section 18.42 |
-I | LAYOUTGET | OPT | pNFS (REQ) | Section 18.43 |
-I | LAYOUTRETURN | OPT | pNFS (REQ) | Section 18.44 |
- | LINK | OPT | | Section 18.9 |
- | LOCK | REQ | | Section 18.10 |
- | LOCKT | REQ | | Section 18.11 |
- | LOCKU | REQ | | Section 18.12 |
- | LOOKUP | REQ | | Section 18.13 |
- | LOOKUPP | REQ | | Section 18.14 |
- | NVERIFY | REQ | | Section 18.15 |
- | OPEN | REQ | | Section 18.16 |
-NS*| OPENATTR | OPT | | Section 18.17 |
- | OPEN_CONFIRM | MNI | | N/A |
- | OPEN_DOWNGRADE | REQ | | Section 18.18 |
- | PUTFH | REQ | | Section 18.19 |
- | PUTPUBFH | REQ | | Section 18.20 |
- | PUTROOTFH | REQ | | Section 18.21 |
- | READ | REQ | | Section 18.22 |
- | READDIR | REQ | | Section 18.23 |
- | READLINK | OPT | | Section 18.24 |
- | RECLAIM_COMPLETE | REQ | | Section 18.51 |
- | RELEASE_LOCKOWNER | MNI | | N/A |
- | REMOVE | REQ | | Section 18.25 |
- | RENAME | REQ | | Section 18.26 |
- | RENEW | MNI | | N/A |
- | RESTOREFH | REQ | | Section 18.27 |
- | SAVEFH | REQ | | Section 18.28 |
- | SECINFO | REQ | | Section 18.29 |
-I | SECINFO_NO_NAME | REC | pNFS files | Section 18.45, |
- | | | layout (REQ) | Section 13.12 |
-I | SEQUENCE | REQ | | Section 18.46 |
- | SETATTR | REQ | | Section 18.30 |
- | SETCLIENTID | MNI | | N/A |
- | SETCLIENTID_CONFIRM | MNI | | N/A |
-NS | SET_SSV | REQ | | Section 18.47 |
-I | TEST_STATEID | REQ | | Section 18.48 |
- | VERIFY | REQ | | Section 18.31 |
-NS*| WANT_DELEGATION | OPT | FDELG (OPT) | Section 18.49 |
- | WRITE | REQ | | Section 18.32 |
-
-Callback Operations
-
- +-------------------------+-----------+-------------+---------------+
- | Operation | REQ, REC, | Feature | Definition |
- | | OPT, or | (REQ, REC, | |
- | | MNI | or OPT) | |
- +-------------------------+-----------+-------------+---------------+
- | CB_GETATTR | OPT | FDELG (REQ) | Section 20.1 |
-I | CB_LAYOUTRECALL | OPT | pNFS (REQ) | Section 20.3 |
-NS*| CB_NOTIFY | OPT | DDELG (REQ) | Section 20.4 |
-NS*| CB_NOTIFY_DEVICEID | OPT | pNFS (OPT) | Section 20.12 |
-NS*| CB_NOTIFY_LOCK | OPT | | Section 20.11 |
-NS*| CB_PUSH_DELEG | OPT | FDELG (OPT) | Section 20.5 |
- | CB_RECALL | OPT | FDELG, | Section 20.2 |
- | | | DDELG, pNFS | |
- | | | (REQ) | |
-NS*| CB_RECALL_ANY | OPT | FDELG, | Section 20.6 |
- | | | DDELG, pNFS | |
- | | | (REQ) | |
-NS | CB_RECALL_SLOT | REQ | | Section 20.8 |
-NS*| CB_RECALLABLE_OBJ_AVAIL | OPT | DDELG, pNFS | Section 20.7 |
- | | | (REQ) | |
-I | CB_SEQUENCE | OPT | FDELG, | Section 20.9 |
- | | | DDELG, pNFS | |
- | | | (REQ) | |
-NS*| CB_WANTS_CANCELLED | OPT | FDELG, | Section 20.10 |
- | | | DDELG, pNFS | |
- | | | (REQ) | |
- +-------------------------+-----------+-------------+---------------+
-
-Implementation notes:
-
-SSV:
-* The spec claims this is mandatory, but we don't actually know of any
- implementations, so we're ignoring it for now. The server returns
- NFS4ERR_ENCR_ALG_UNSUPP on EXCHANGE_ID, which should be future-proof.
-
-GSS on the backchannel:
-* Again, theoretically required but not widely implemented (in
- particular, the current Linux client doesn't request it). We return
- NFS4ERR_ENCR_ALG_UNSUPP on CREATE_SESSION.
-
-DELEGPURGE:
-* mandatory only for servers that support CLAIM_DELEGATE_PREV and/or
- CLAIM_DELEG_PREV_FH (which allows clients to keep delegations that
- persist across client reboots). Thus we need not implement this for
- now.
-
-EXCHANGE_ID:
-* implementation ids are ignored
-
-CREATE_SESSION:
-* backchannel attributes are ignored
-
-SEQUENCE:
-* no support for dynamic slot table renegotiation (optional)
-
-Nonstandard compound limitations:
-* No support for a sessions fore channel RPC compound that requires both a
- ca_maxrequestsize request and a ca_maxresponsesize reply, so we may
- fail to live up to the promise we made in CREATE_SESSION fore channel
- negotiation.
-
-See also http://wiki.linux-nfs.org/wiki/index.php/Server_4.0_and_4.1_issues.
diff --git a/Documentation/filesystems/nfs/pnfs.txt b/Documentation/filesystems/nfs/pnfs.rst
index 80dc0bdc302a..7c470ecdc3a9 100644
--- a/Documentation/filesystems/nfs/pnfs.txt
+++ b/Documentation/filesystems/nfs/pnfs.rst
@@ -1,15 +1,17 @@
-Reference counting in pnfs:
+==========================
+Reference counting in pnfs
==========================
The are several inter-related caches. We have layouts which can
reference multiple devices, each of which can reference multiple data servers.
Each data server can be referenced by multiple devices. Each device
-can be referenced by multiple layouts. To keep all of this straight,
+can be referenced by multiple layouts. To keep all of this straight,
we need to reference count.
struct pnfs_layout_hdr
-----------------------
+======================
+
The on-the-wire command LAYOUTGET corresponds to struct
pnfs_layout_segment, usually referred to by the variable name lseg.
Each nfs_inode may hold a pointer to a cache of these layout
@@ -25,7 +27,8 @@ the reference count, as the layout is kept around by the lseg that
keeps it in the list.
deviceid_cache
---------------
+==============
+
lsegs reference device ids, which are resolved per nfs_client and
layout driver type. The device ids are held in a RCU cache (struct
nfs4_deviceid_cache). The cache itself is referenced across each
@@ -38,24 +41,26 @@ justification, but seems reasonable given that we can have multiple
deviceid's per filesystem, and multiple filesystems per nfs_client.
The hash code is copied from the nfsd code base. A discussion of
-hashing and variations of this algorithm can be found at:
-http://groups.google.com/group/comp.lang.c/browse_thread/thread/9522965e2b8d3809
+hashing and variations of this algorithm can be found `here.
+<http://groups.google.com/group/comp.lang.c/browse_thread/thread/9522965e2b8d3809>`_
data server cache
------------------
+=================
+
file driver devices refer to data servers, which are kept in a module
level cache. Its reference is held over the lifetime of the deviceid
pointing to it.
lseg
-----
+====
+
lseg maintains an extra reference corresponding to the NFS_LSEG_VALID
bit which holds it in the pnfs_layout_hdr's list. When the final lseg
is removed from the pnfs_layout_hdr's list, the NFS_LAYOUT_DESTROYED
bit is set, preventing any new lsegs from being added.
layout drivers
---------------
+==============
PNFS utilizes what is called layout drivers. The STD defines 4 basic
layout types: "files", "objects", "blocks", and "flexfiles". For each
@@ -68,6 +73,6 @@ Blocks-layout-driver code is in: fs/nfs/blocklayout/.. directory
Flexfiles-layout-driver code is in: fs/nfs/flexfilelayout/.. directory
blocks-layout setup
--------------------
+===================
TODO: Document the setup needs of the blocks layout driver
diff --git a/Documentation/filesystems/nfs/rpc-cache.txt b/Documentation/filesystems/nfs/rpc-cache.rst
index c4dac829db0f..bb164eea969b 100644
--- a/Documentation/filesystems/nfs/rpc-cache.txt
+++ b/Documentation/filesystems/nfs/rpc-cache.rst
@@ -1,9 +1,14 @@
- This document gives a brief introduction to the caching
+=========
+RPC Cache
+=========
+
+This document gives a brief introduction to the caching
mechanisms in the sunrpc layer that is used, in particular,
for NFS authentication.
-CACHES
+Caches
======
+
The caching replaces the old exports table and allows for
a wide variety of values to be caches.
@@ -12,6 +17,7 @@ quite possibly very different in content and use. There is a corpus
of common code for managing these caches.
Examples of caches that are likely to be needed are:
+
- mapping from IP address to client name
- mapping from client name and filesystem to export options
- mapping from UID to list of GIDs, to work around NFS's limitation
@@ -21,6 +27,7 @@ Examples of caches that are likely to be needed are:
- mapping from network identify to public key for crypto authentication.
The common code handles such things as:
+
- general cache lookup with correct locking
- supporting 'NEGATIVE' as well as positive entries
- allowing an EXPIRED time on cache items, and removing
@@ -35,60 +42,66 @@ The common code handles such things as:
Creating a Cache
----------------
-1/ A cache needs a datum to store. This is in the form of a
- structure definition that must contain a
- struct cache_head
+- A cache needs a datum to store. This is in the form of a
+ structure definition that must contain a struct cache_head
as an element, usually the first.
It will also contain a key and some content.
Each cache element is reference counted and contains
expiry and update times for use in cache management.
-2/ A cache needs a "cache_detail" structure that
+- A cache needs a "cache_detail" structure that
describes the cache. This stores the hash table, some
parameters for cache management, and some operations detailing how
to work with particular cache items.
- The operations requires are:
- struct cache_head *alloc(void)
- This simply allocates appropriate memory and returns
- a pointer to the cache_detail embedded within the
- structure
- void cache_put(struct kref *)
- This is called when the last reference to an item is
- dropped. The pointer passed is to the 'ref' field
- in the cache_head. cache_put should release any
- references create by 'cache_init' and, if CACHE_VALID
- is set, any references created by cache_update.
- It should then release the memory allocated by
- 'alloc'.
- int match(struct cache_head *orig, struct cache_head *new)
- test if the keys in the two structures match. Return
- 1 if they do, 0 if they don't.
- void init(struct cache_head *orig, struct cache_head *new)
- Set the 'key' fields in 'new' from 'orig'. This may
- include taking references to shared objects.
- void update(struct cache_head *orig, struct cache_head *new)
- Set the 'content' fileds in 'new' from 'orig'.
- int cache_show(struct seq_file *m, struct cache_detail *cd,
- struct cache_head *h)
- Optional. Used to provide a /proc file that lists the
- contents of a cache. This should show one item,
- usually on just one line.
- int cache_request(struct cache_detail *cd, struct cache_head *h,
- char **bpp, int *blen)
- Format a request to be send to user-space for an item
- to be instantiated. *bpp is a buffer of size *blen.
- bpp should be moved forward over the encoded message,
- and *blen should be reduced to show how much free
- space remains. Return 0 on success or <0 if not
- enough room or other problem.
- int cache_parse(struct cache_detail *cd, char *buf, int len)
- A message from user space has arrived to fill out a
- cache entry. It is in 'buf' of length 'len'.
- cache_parse should parse this, find the item in the
- cache with sunrpc_cache_lookup_rcu, and update the item
- with sunrpc_cache_update.
-
-
-3/ A cache needs to be registered using cache_register(). This
+
+ The operations are:
+
+ struct cache_head \*alloc(void)
+ This simply allocates appropriate memory and returns
+ a pointer to the cache_detail embedded within the
+ structure
+
+ void cache_put(struct kref \*)
+ This is called when the last reference to an item is
+ dropped. The pointer passed is to the 'ref' field
+ in the cache_head. cache_put should release any
+ references create by 'cache_init' and, if CACHE_VALID
+ is set, any references created by cache_update.
+ It should then release the memory allocated by
+ 'alloc'.
+
+ int match(struct cache_head \*orig, struct cache_head \*new)
+ test if the keys in the two structures match. Return
+ 1 if they do, 0 if they don't.
+
+ void init(struct cache_head \*orig, struct cache_head \*new)
+ Set the 'key' fields in 'new' from 'orig'. This may
+ include taking references to shared objects.
+
+ void update(struct cache_head \*orig, struct cache_head \*new)
+ Set the 'content' fileds in 'new' from 'orig'.
+
+ int cache_show(struct seq_file \*m, struct cache_detail \*cd, struct cache_head \*h)
+ Optional. Used to provide a /proc file that lists the
+ contents of a cache. This should show one item,
+ usually on just one line.
+
+ int cache_request(struct cache_detail \*cd, struct cache_head \*h, char \*\*bpp, int \*blen)
+ Format a request to be send to user-space for an item
+ to be instantiated. \*bpp is a buffer of size \*blen.
+ bpp should be moved forward over the encoded message,
+ and \*blen should be reduced to show how much free
+ space remains. Return 0 on success or <0 if not
+ enough room or other problem.
+
+ int cache_parse(struct cache_detail \*cd, char \*buf, int len)
+ A message from user space has arrived to fill out a
+ cache entry. It is in 'buf' of length 'len'.
+ cache_parse should parse this, find the item in the
+ cache with sunrpc_cache_lookup_rcu, and update the item
+ with sunrpc_cache_update.
+
+
+- A cache needs to be registered using cache_register(). This
includes it on a list of caches that will be regularly
cleaned to discard old data.
@@ -107,7 +120,7 @@ cache_check will return -ENOENT in the entry is negative or if an up
call is needed but not possible, -EAGAIN if an upcall is pending,
or 0 if the data is valid;
-cache_check can be passed a "struct cache_req *". This structure is
+cache_check can be passed a "struct cache_req\*". This structure is
typically embedded in the actual request and can be used to create a
deferred copy of the request (struct cache_deferred_req). This is
done when the found cache item is not uptodate, but the is reason to
@@ -139,9 +152,11 @@ The 'channel' works a bit like a datagram socket. Each 'write' is
passed as a whole to the cache for parsing and interpretation.
Each cache can treat the write requests differently, but it is
expected that a message written will contain:
+
- a key
- an expiry time
- a content.
+
with the intention that an item in the cache with the give key
should be create or updated to have the given content, and the
expiry time should be set on that item.
@@ -156,7 +171,8 @@ If there are no more requests to return, read will return EOF, but a
select or poll for read will block waiting for another request to be
added.
-Thus a user-space helper is likely to:
+Thus a user-space helper is likely to::
+
open the channel.
select for readable
read a request
@@ -175,12 +191,13 @@ Each cache should also define a "cache_request" method which
takes a cache item and encodes a request into the buffer
provided.
-Note: If a cache has no active readers on the channel, and has had not
-active readers for more than 60 seconds, further requests will not be
-added to the channel but instead all lookups that do not find a valid
-entry will fail. This is partly for backward compatibility: The
-previous nfs exports table was deemed to be authoritative and a
-failed lookup meant a definite 'no'.
+.. note::
+ If a cache has no active readers on the channel, and has had not
+ active readers for more than 60 seconds, further requests will not be
+ added to the channel but instead all lookups that do not find a valid
+ entry will fail. This is partly for backward compatibility: The
+ previous nfs exports table was deemed to be authoritative and a
+ failed lookup meant a definite 'no'.
request/response format
-----------------------
@@ -193,10 +210,11 @@ with precisely one newline character which should be at the end.
Fields within the record should be separated by spaces, normally one.
If spaces, newlines, or nul characters are needed in a field they
much be quoted. two mechanisms are available:
-1/ If a field begins '\x' then it must contain an even number of
+
+- If a field begins '\x' then it must contain an even number of
hex digits, and pairs of these digits provide the bytes in the
field.
-2/ otherwise a \ in the field must be followed by 3 octal digits
+- otherwise a \ in the field must be followed by 3 octal digits
which give the code for a byte. Other characters are treated
as them selves. At the very least, space, newline, nul, and
'\' must be quoted in this way.
diff --git a/Documentation/filesystems/nfs/rpc-server-gss.txt b/Documentation/filesystems/nfs/rpc-server-gss.rst
index 310bbbaf9080..812754576845 100644
--- a/Documentation/filesystems/nfs/rpc-server-gss.txt
+++ b/Documentation/filesystems/nfs/rpc-server-gss.rst
@@ -1,4 +1,4 @@
-
+=========================================
rpcsec_gss support for kernel RPC servers
=========================================
@@ -9,14 +9,17 @@ NFSv4.1 and higher don't require the client to act as a server for the
purposes of authentication.)
RPCGSS is specified in a few IETF documents:
+
- RFC2203 v1: http://tools.ietf.org/rfc/rfc2203.txt
- RFC5403 v2: http://tools.ietf.org/rfc/rfc5403.txt
+
and there is a 3rd version being proposed:
+
- http://tools.ietf.org/id/draft-williams-rpcsecgssv3.txt
(At draft n. 02 at the time of writing)
Background
-----------
+==========
The RPCGSS Authentication method describes a way to perform GSSAPI
Authentication for NFS. Although GSSAPI is itself completely mechanism
@@ -29,6 +32,7 @@ depends on GSSAPI extensions that are KRB5 specific.
GSSAPI is a complex library, and implementing it completely in kernel is
unwarranted. However GSSAPI operations are fundementally separable in 2
parts:
+
- initial context establishment
- integrity/privacy protection (signing and encrypting of individual
packets)
@@ -41,7 +45,7 @@ kernel, but leave the initial context establishment to userspace. We
need upcalls to request userspace to perform context establishment.
NFS Server Legacy Upcall Mechanism
-----------------------------------
+==================================
The classic upcall mechanism uses a custom text based upcall mechanism
to talk to a custom daemon called rpc.svcgssd that is provide by the
@@ -62,21 +66,20 @@ groups) due to limitation on the size of the buffer that can be send
back to the kernel (4KiB).
NFS Server New RPC Upcall Mechanism
------------------------------------
+===================================
The newer upcall mechanism uses RPC over a unix socket to a daemon
called gss-proxy, implemented by a userspace program called Gssproxy.
-The gss_proxy RPC protocol is currently documented here:
-
- https://fedorahosted.org/gss-proxy/wiki/ProtocolDocumentation
+The gss_proxy RPC protocol is currently documented `here
+<https://fedorahosted.org/gss-proxy/wiki/ProtocolDocumentation>`_.
This upcall mechanism uses the kernel rpc client and connects to the gssproxy
userspace program over a regular unix socket. The gssproxy protocol does not
suffer from the size limitations of the legacy protocol.
Negotiating Upcall Mechanisms
------------------------------
+=============================
To provide backward compatibility, the kernel defaults to using the
legacy mechanism. To switch to the new mechanism, gss-proxy must bind
diff --git a/Documentation/filesystems/nilfs2.txt b/Documentation/filesystems/nilfs2.rst
index f2f3f8592a6f..6c49f04e9e0a 100644
--- a/Documentation/filesystems/nilfs2.txt
+++ b/Documentation/filesystems/nilfs2.rst
@@ -1,5 +1,8 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+======
NILFS2
-------
+======
NILFS2 is a log-structured file system (LFS) supporting continuous
snapshotting. In addition to versioning capability of the entire file
@@ -25,9 +28,9 @@ available from the following download page. At least "mkfs.nilfs2",
cleaner or garbage collector) are required. Details on the tools are
described in the man pages included in the package.
-Project web page: https://nilfs.sourceforge.io/
-Download page: https://nilfs.sourceforge.io/en/download.html
-List info: http://vger.kernel.org/vger-lists.html#linux-nilfs
+:Project web page: https://nilfs.sourceforge.io/
+:Download page: https://nilfs.sourceforge.io/en/download.html
+:List info: http://vger.kernel.org/vger-lists.html#linux-nilfs
Caveats
=======
@@ -47,6 +50,7 @@ Mount options
NILFS2 supports the following mount options:
(*) == default
+======================= =======================================================
barrier(*) This enables/disables the use of write barriers. This
nobarrier requires an IO stack which can support barriers, and
if nilfs gets an error on a barrier write, it will
@@ -79,6 +83,7 @@ discard This enables/disables the use of discard/TRIM commands.
nodiscard(*) The discard/TRIM commands are sent to the underlying
block device when blocks are freed. This is useful
for SSD devices and sparse/thinly-provisioned LUNs.
+======================= =======================================================
Ioctls
======
@@ -87,9 +92,11 @@ There is some NILFS2 specific functionality which can be accessed by application
through the system call interfaces. The list of all NILFS2 specific ioctls are
shown in the table below.
-Table of NILFS2 specific ioctls
-..............................................................................
+Table of NILFS2 specific ioctls:
+
+ ============================== ===============================================
Ioctl Description
+ ============================== ===============================================
NILFS_IOCTL_CHANGE_CPMODE Change mode of given checkpoint between
checkpoint and snapshot state. This ioctl is
used in chcp and mkcp utilities.
@@ -142,11 +149,12 @@ Table of NILFS2 specific ioctls
NILFS_IOCTL_SET_ALLOC_RANGE Define lower limit of segments in bytes and
upper limit of segments in bytes. This ioctl
is used by nilfs_resize utility.
+ ============================== ===============================================
NILFS2 usage
============
-To use nilfs2 as a local file system, simply:
+To use nilfs2 as a local file system, simply::
# mkfs -t nilfs2 /dev/block_device
# mount -t nilfs2 /dev/block_device /dir
@@ -157,18 +165,20 @@ This will also invoke the cleaner through the mount helper program
Checkpoints and snapshots are managed by the following commands.
Their manpages are included in the nilfs-utils package above.
+ ==== ===========================================================
lscp list checkpoints or snapshots.
mkcp make a checkpoint or a snapshot.
chcp change an existing checkpoint to a snapshot or vice versa.
rmcp invalidate specified checkpoint(s).
+ ==== ===========================================================
-To mount a snapshot,
+To mount a snapshot::
# mount -t nilfs2 -r -o cp=<cno> /dev/block_device /snap_dir
where <cno> is the checkpoint number of the snapshot.
-To unmount the NILFS2 mount point or snapshot, simply:
+To unmount the NILFS2 mount point or snapshot, simply::
# umount /dir
@@ -181,7 +191,7 @@ Disk format
A nilfs2 volume is equally divided into a number of segments except
for the super block (SB) and segment #0. A segment is the container
of logs. Each log is composed of summary information blocks, payload
-blocks, and an optional super root block (SR):
+blocks, and an optional super root block (SR)::
______________________________________________________
| |SB| | Segment | Segment | Segment | ... | Segment | |
@@ -200,7 +210,7 @@ blocks, and an optional super root block (SR):
|_blocks__|_________________|__|
The payload blocks are organized per file, and each file consists of
-data blocks and B-tree node blocks:
+data blocks and B-tree node blocks::
|<--- File-A --->|<--- File-B --->|
_______________________________________________________________
@@ -213,7 +223,7 @@ files without data blocks or B-tree node blocks.
The organization of the blocks is recorded in the summary information
blocks, which contains a header structure (nilfs_segment_summary), per
-file structures (nilfs_finfo), and per block structures (nilfs_binfo):
+file structures (nilfs_finfo), and per block structures (nilfs_binfo)::
_________________________________________________________________________
| Summary | finfo | binfo | ... | binfo | finfo | binfo | ... | binfo |...
@@ -223,7 +233,7 @@ file structures (nilfs_finfo), and per block structures (nilfs_binfo):
The logs include regular files, directory files, symbolic link files
and several meta data files. The mata data files are the files used
to maintain file system meta data. The current version of NILFS2 uses
-the following meta data files:
+the following meta data files::
1) Inode file (ifile) -- Stores on-disk inodes
2) Checkpoint file (cpfile) -- Stores checkpoints
@@ -232,7 +242,7 @@ the following meta data files:
(DAT) block numbers. This file serves to
make on-disk blocks relocatable.
-The following figure shows a typical organization of the logs:
+The following figure shows a typical organization of the logs::
_________________________________________________________________________
| Summary | regular file | file | ... | ifile | cpfile | sufile | DAT |SR|
@@ -250,7 +260,7 @@ three special inodes, inodes for the DAT, cpfile, and sufile. Inodes
of regular files, directories, symlinks and other special files, are
included in the ifile. The inode of ifile itself is included in the
corresponding checkpoint entry in the cpfile. Thus, the hierarchy
-among NILFS2 files can be depicted as follows:
+among NILFS2 files can be depicted as follows::
Super block (SB)
|
diff --git a/Documentation/filesystems/ntfs.txt b/Documentation/filesystems/ntfs.rst
index 553f10d03076..5bb093a26485 100644
--- a/Documentation/filesystems/ntfs.txt
+++ b/Documentation/filesystems/ntfs.rst
@@ -1,19 +1,21 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+================================
The Linux NTFS filesystem driver
================================
-Table of contents
-=================
+.. Table of contents
-- Overview
-- Web site
-- Features
-- Supported mount options
-- Known bugs and (mis-)features
-- Using NTFS volume and stripe sets
- - The Device-Mapper driver
- - The Software RAID / MD driver
- - Limitations when using the MD driver
+ - Overview
+ - Web site
+ - Features
+ - Supported mount options
+ - Known bugs and (mis-)features
+ - Using NTFS volume and stripe sets
+ - The Device-Mapper driver
+ - The Software RAID / MD driver
+ - Limitations when using the MD driver
Overview
@@ -66,8 +68,10 @@ Features
partition by creating a large file while in Windows and then loopback
mounting the file while in Linux and creating a Linux filesystem on it that
is used to install Linux on it.
-- A comparison of the two drivers using:
+- A comparison of the two drivers using::
+
time find . -type f -exec md5sum "{}" \;
+
run three times in sequence with each driver (after a reboot) on a 1.4GiB
NTFS partition, showed the new driver to be 20% faster in total time elapsed
(from 9:43 minutes on average down to 7:53). The time spent in user space
@@ -104,6 +108,7 @@ In addition to the generic mount options described by the manual page for the
mount command (man 8 mount, also see man 5 fstab), the NTFS driver supports the
following mount options:
+======================= =======================================================
iocharset=name Deprecated option. Still supported but please use
nls=name in the future. See description for nls=name.
@@ -175,16 +180,22 @@ disable_sparse=<BOOL> If disable_sparse is specified, creation of sparse
errors=opt What to do when critical filesystem errors are found.
Following values can be used for "opt":
- continue: DEFAULT, try to clean-up as much as
+
+ ======== =========================================
+ continue DEFAULT, try to clean-up as much as
possible, e.g. marking a corrupt inode as
bad so it is no longer accessed, and then
continue.
- recover: At present only supported is recovery of
+ recover At present only supported is recovery of
the boot sector from the backup copy.
If read-only mount, the recovery is done
in memory only and not written to disk.
- Note that the options are additive, i.e. specifying:
+ ======== =========================================
+
+ Note that the options are additive, i.e. specifying::
+
errors=continue,errors=recover
+
means the driver will attempt to recover and if that
fails it will clean-up as much as possible and
continue.
@@ -202,12 +213,18 @@ mft_zone_multiplier= Set the MFT zone multiplier for the volume (this
In general use the default. If you have a lot of small
files then use a higher value. The values have the
following meaning:
+
+ ===== =================================
Value MFT zone size (% of volume size)
+ ===== =================================
1 12.5%
2 25%
3 37.5%
4 50%
+ ===== =================================
+
Note this option is irrelevant for read-only mounts.
+======================= =======================================================
Known bugs and (mis-)features
@@ -252,18 +269,18 @@ To create the table describing your volume you will need to know each of its
components and their sizes in sectors, i.e. multiples of 512-byte blocks.
For NT4 fault tolerant volumes you can obtain the sizes using fdisk. So for
-example if one of your partitions is /dev/hda2 you would do:
+example if one of your partitions is /dev/hda2 you would do::
-$ fdisk -ul /dev/hda
+ $ fdisk -ul /dev/hda
-Disk /dev/hda: 81.9 GB, 81964302336 bytes
-255 heads, 63 sectors/track, 9964 cylinders, total 160086528 sectors
-Units = sectors of 1 * 512 = 512 bytes
+ Disk /dev/hda: 81.9 GB, 81964302336 bytes
+ 255 heads, 63 sectors/track, 9964 cylinders, total 160086528 sectors
+ Units = sectors of 1 * 512 = 512 bytes
- Device Boot Start End Blocks Id System
- /dev/hda1 * 63 4209029 2104483+ 83 Linux
- /dev/hda2 4209030 37768814 16779892+ 86 NTFS
- /dev/hda3 37768815 46170809 4200997+ 83 Linux
+ Device Boot Start End Blocks Id System
+ /dev/hda1 * 63 4209029 2104483+ 83 Linux
+ /dev/hda2 4209030 37768814 16779892+ 86 NTFS
+ /dev/hda3 37768815 46170809 4200997+ 83 Linux
And you would know that /dev/hda2 has a size of 37768814 - 4209030 + 1 =
33559785 sectors.
@@ -271,15 +288,17 @@ And you would know that /dev/hda2 has a size of 37768814 - 4209030 + 1 =
For Win2k and later dynamic disks, you can for example use the ldminfo utility
which is part of the Linux LDM tools (the latest version at the time of
writing is linux-ldm-0.0.8.tar.bz2). You can download it from:
+
http://www.linux-ntfs.org/
+
Simply extract the downloaded archive (tar xvjf linux-ldm-0.0.8.tar.bz2), go
into it (cd linux-ldm-0.0.8) and change to the test directory (cd test). You
will find the precompiled (i386) ldminfo utility there. NOTE: You will not be
able to compile this yourself easily so use the binary version!
-Then you would use ldminfo in dump mode to obtain the necessary information:
+Then you would use ldminfo in dump mode to obtain the necessary information::
-$ ./ldminfo --dump /dev/hda
+ $ ./ldminfo --dump /dev/hda
This would dump the LDM database found on /dev/hda which describes all of your
dynamic disks and all the volumes on them. At the bottom you will see the
@@ -305,42 +324,36 @@ give you the correct information to do this.
Assuming you know all your devices and their sizes things are easy.
For a linear raid the table would look like this (note all values are in
-512-byte sectors):
+512-byte sectors)::
---- cut here ---
-# Offset into Size of this Raid type Device Start sector
-# volume device of device
-0 1028161 linear /dev/hda1 0
-1028161 3903762 linear /dev/hdb2 0
-4931923 2103211 linear /dev/hdc1 0
---- cut here ---
+ # Offset into Size of this Raid type Device Start sector
+ # volume device of device
+ 0 1028161 linear /dev/hda1 0
+ 1028161 3903762 linear /dev/hdb2 0
+ 4931923 2103211 linear /dev/hdc1 0
For a striped volume, i.e. raid level 0, you will need to know the chunk size
you used when creating the volume. Windows uses 64kiB as the default, so it
will probably be this unless you changes the defaults when creating the array.
For a raid level 0 the table would look like this (note all values are in
-512-byte sectors):
+512-byte sectors)::
---- cut here ---
-# Offset Size Raid Number Chunk 1st Start 2nd Start
-# into of the type of size Device in Device in
-# volume volume stripes device device
-0 2056320 striped 2 128 /dev/hda1 0 /dev/hdb1 0
---- cut here ---
+ # Offset Size Raid Number Chunk 1st Start 2nd Start
+ # into of the type of size Device in Device in
+ # volume volume stripes device device
+ 0 2056320 striped 2 128 /dev/hda1 0 /dev/hdb1 0
If there are more than two devices, just add each of them to the end of the
line.
Finally, for a mirrored volume, i.e. raid level 1, the table would look like
-this (note all values are in 512-byte sectors):
+this (note all values are in 512-byte sectors)::
---- cut here ---
-# Ofs Size Raid Log Number Region Should Number Source Start Target Start
-# in of the type type of log size sync? of Device in Device in
-# vol volume params mirrors Device Device
-0 2056320 mirror core 2 16 nosync 2 /dev/hda1 0 /dev/hdb1 0
---- cut here ---
+ # Ofs Size Raid Log Number Region Should Number Source Start Target Start
+ # in of the type type of log size sync? of Device in Device in
+ # vol volume params mirrors Device Device
+ 0 2056320 mirror core 2 16 nosync 2 /dev/hda1 0 /dev/hdb1 0
If you are mirroring to multiple devices you can specify further targets at the
end of the line.
@@ -353,17 +366,17 @@ to the "Target Device" or if you specified multiple target devices to all of
them.
Once you have your table, save it in a file somewhere (e.g. /etc/ntfsvolume1),
-and hand it over to dmsetup to work with, like so:
+and hand it over to dmsetup to work with, like so::
-$ dmsetup create myvolume1 /etc/ntfsvolume1
+ $ dmsetup create myvolume1 /etc/ntfsvolume1
You can obviously replace "myvolume1" with whatever name you like.
If it all worked, you will now have the device /dev/device-mapper/myvolume1
which you can then just use as an argument to the mount command as usual to
-mount the ntfs volume. For example:
+mount the ntfs volume. For example::
-$ mount -t ntfs -o ro /dev/device-mapper/myvolume1 /mnt/myvol1
+ $ mount -t ntfs -o ro /dev/device-mapper/myvolume1 /mnt/myvol1
(You need to create the directory /mnt/myvol1 first and of course you can use
anything you like instead of /mnt/myvol1 as long as it is an existing
@@ -395,18 +408,18 @@ Windows by default uses a stripe chunk size of 64k, so you probably want the
"chunk-size 64k" option for each raid-disk, too.
For example, if you have a stripe set consisting of two partitions /dev/hda5
-and /dev/hdb1 your /etc/raidtab would look like this:
-
-raiddev /dev/md0
- raid-level 0
- nr-raid-disks 2
- nr-spare-disks 0
- persistent-superblock 0
- chunk-size 64k
- device /dev/hda5
- raid-disk 0
- device /dev/hdb1
- raid-disk 1
+and /dev/hdb1 your /etc/raidtab would look like this::
+
+ raiddev /dev/md0
+ raid-level 0
+ nr-raid-disks 2
+ nr-spare-disks 0
+ persistent-superblock 0
+ chunk-size 64k
+ device /dev/hda5
+ raid-disk 0
+ device /dev/hdb1
+ raid-disk 1
For linear raid, just change the raid-level above to "raid-level linear", for
mirrors, change it to "raid-level 1", and for stripe sets with parity, change
@@ -427,7 +440,9 @@ Once the raidtab is setup, run for example raid0run -a to start all devices or
raid0run /dev/md0 to start a particular md device, in this case /dev/md0.
Then just use the mount command as usual to mount the ntfs volume using for
-example: mount -t ntfs -o ro /dev/md0 /mnt/myntfsvolume
+example::
+
+ mount -t ntfs -o ro /dev/md0 /mnt/myntfsvolume
It is advisable to do the mount read-only to see if the md volume has been
setup correctly to avoid the possibility of causing damage to the data on the
diff --git a/Documentation/filesystems/ocfs2-online-filecheck.txt b/Documentation/filesystems/ocfs2-online-filecheck.rst
index 139fab175c8a..2257bb53edc1 100644
--- a/Documentation/filesystems/ocfs2-online-filecheck.txt
+++ b/Documentation/filesystems/ocfs2-online-filecheck.rst
@@ -1,5 +1,8 @@
- OCFS2 online file check
- -----------------------
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================================
+OCFS2 file system - online file check
+=====================================
This document will describe OCFS2 online file check feature.
@@ -40,7 +43,7 @@ When there are errors in the OCFS2 filesystem, they are usually accompanied
by the inode number which caused the error. This inode number would be the
input to check/fix the file.
-There is a sysfs directory for each OCFS2 file system mounting:
+There is a sysfs directory for each OCFS2 file system mounting::
/sys/fs/ocfs2/<devname>/filecheck
@@ -50,34 +53,36 @@ communicate with kernel space, tell which file(inode number) will be checked or
fixed. Currently, three operations are supported, which includes checking
inode, fixing inode and setting the size of result record history.
-1. If you want to know what error exactly happened to <inode> before fixing, do
+1. If you want to know what error exactly happened to <inode> before fixing, do::
+
+ # echo "<inode>" > /sys/fs/ocfs2/<devname>/filecheck/check
+ # cat /sys/fs/ocfs2/<devname>/filecheck/check
+
+The output is like this::
- # echo "<inode>" > /sys/fs/ocfs2/<devname>/filecheck/check
- # cat /sys/fs/ocfs2/<devname>/filecheck/check
+ INO DONE ERROR
+ 39502 1 GENERATION
-The output is like this:
- INO DONE ERROR
-39502 1 GENERATION
+ <INO> lists the inode numbers.
+ <DONE> indicates whether the operation has been finished.
+ <ERROR> says what kind of errors was found. For the detailed error numbers,
+ please refer to the file linux/fs/ocfs2/filecheck.h.
-<INO> lists the inode numbers.
-<DONE> indicates whether the operation has been finished.
-<ERROR> says what kind of errors was found. For the detailed error numbers,
-please refer to the file linux/fs/ocfs2/filecheck.h.
+2. If you determine to fix this inode, do::
-2. If you determine to fix this inode, do
+ # echo "<inode>" > /sys/fs/ocfs2/<devname>/filecheck/fix
+ # cat /sys/fs/ocfs2/<devname>/filecheck/fix
- # echo "<inode>" > /sys/fs/ocfs2/<devname>/filecheck/fix
- # cat /sys/fs/ocfs2/<devname>/filecheck/fix
+The output is like this:::
-The output is like this:
- INO DONE ERROR
-39502 1 SUCCESS
+ INO DONE ERROR
+ 39502 1 SUCCESS
This time, the <ERROR> column indicates whether this fix is successful or not.
3. The record cache is used to store the history of check/fix results. It's
default size is 10, and can be adjust between the range of 10 ~ 100. You can
-adjust the size like this:
+adjust the size like this::
# echo "<size>" > /sys/fs/ocfs2/<devname>/filecheck/set
diff --git a/Documentation/filesystems/ocfs2.txt b/Documentation/filesystems/ocfs2.rst
index 4c49e5410595..412386bc6506 100644
--- a/Documentation/filesystems/ocfs2.txt
+++ b/Documentation/filesystems/ocfs2.rst
@@ -1,5 +1,9 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+================
OCFS2 filesystem
-==================
+================
+
OCFS2 is a general purpose extent based shared disk cluster file
system with many similarities to ext3. It supports 64 bit inode
numbers, and has automatically extending metadata groups which may
@@ -14,22 +18,26 @@ OCFS2 mailing lists: http://oss.oracle.com/projects/ocfs2/mailman/
All code copyright 2005 Oracle except when otherwise noted.
-CREDITS:
+Credits
+=======
+
Lots of code taken from ext3 and other projects.
Authors in alphabetical order:
-Joel Becker <joel.becker@oracle.com>
-Zach Brown <zach.brown@oracle.com>
-Mark Fasheh <mfasheh@suse.com>
-Kurt Hackel <kurt.hackel@oracle.com>
-Tao Ma <tao.ma@oracle.com>
-Sunil Mushran <sunil.mushran@oracle.com>
-Manish Singh <manish.singh@oracle.com>
-Tiger Yang <tiger.yang@oracle.com>
+
+- Joel Becker <joel.becker@oracle.com>
+- Zach Brown <zach.brown@oracle.com>
+- Mark Fasheh <mfasheh@suse.com>
+- Kurt Hackel <kurt.hackel@oracle.com>
+- Tao Ma <tao.ma@oracle.com>
+- Sunil Mushran <sunil.mushran@oracle.com>
+- Manish Singh <manish.singh@oracle.com>
+- Tiger Yang <tiger.yang@oracle.com>
Caveats
=======
Features which OCFS2 does not support yet:
+
- Directory change notification (F_NOTIFY)
- Distributed Caching (F_SETLEASE/F_GETLEASE/break_lease)
@@ -37,8 +45,10 @@ Mount options
=============
OCFS2 supports the following mount options:
+
(*) == default
+======================= ========================================================
barrier=1 This enables/disables barriers. barrier=0 disables it,
barrier=1 enables it.
errors=remount-ro(*) Remount the filesystem read-only on an error.
@@ -104,3 +114,4 @@ journal_async_commit Commit block can be written to disk without waiting
for descriptor blocks. If enabled older kernels cannot
mount the device. This will enable 'journal_checksum'
internally.
+======================= ========================================================
diff --git a/Documentation/filesystems/omfs.rst b/Documentation/filesystems/omfs.rst
new file mode 100644
index 000000000000..4c8bb3074169
--- /dev/null
+++ b/Documentation/filesystems/omfs.rst
@@ -0,0 +1,112 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+================================
+Optimized MPEG Filesystem (OMFS)
+================================
+
+Overview
+========
+
+OMFS is a filesystem created by SonicBlue for use in the ReplayTV DVR
+and Rio Karma MP3 player. The filesystem is extent-based, utilizing
+block sizes from 2k to 8k, with hash-based directories. This
+filesystem driver may be used to read and write disks from these
+devices.
+
+Note, it is not recommended that this FS be used in place of a general
+filesystem for your own streaming media device. Native Linux filesystems
+will likely perform better.
+
+More information is available at:
+
+ http://linux-karma.sf.net/
+
+Various utilities, including mkomfs and omfsck, are included with
+omfsprogs, available at:
+
+ http://bobcopeland.com/karma/
+
+Instructions are included in its README.
+
+Options
+=======
+
+OMFS supports the following mount-time options:
+
+ ============ ========================================
+ uid=n make all files owned by specified user
+ gid=n make all files owned by specified group
+ umask=xxx set permission umask to xxx
+ fmask=xxx set umask to xxx for files
+ dmask=xxx set umask to xxx for directories
+ ============ ========================================
+
+Disk format
+===========
+
+OMFS discriminates between "sysblocks" and normal data blocks. The sysblock
+group consists of super block information, file metadata, directory structures,
+and extents. Each sysblock has a header containing CRCs of the entire
+sysblock, and may be mirrored in successive blocks on the disk. A sysblock may
+have a smaller size than a data block, but since they are both addressed by the
+same 64-bit block number, any remaining space in the smaller sysblock is
+unused.
+
+Sysblock header information::
+
+ struct omfs_header {
+ __be64 h_self; /* FS block where this is located */
+ __be32 h_body_size; /* size of useful data after header */
+ __be16 h_crc; /* crc-ccitt of body_size bytes */
+ char h_fill1[2];
+ u8 h_version; /* version, always 1 */
+ char h_type; /* OMFS_INODE_X */
+ u8 h_magic; /* OMFS_IMAGIC */
+ u8 h_check_xor; /* XOR of header bytes before this */
+ __be32 h_fill2;
+ };
+
+Files and directories are both represented by omfs_inode::
+
+ struct omfs_inode {
+ struct omfs_header i_head; /* header */
+ __be64 i_parent; /* parent containing this inode */
+ __be64 i_sibling; /* next inode in hash bucket */
+ __be64 i_ctime; /* ctime, in milliseconds */
+ char i_fill1[35];
+ char i_type; /* OMFS_[DIR,FILE] */
+ __be32 i_fill2;
+ char i_fill3[64];
+ char i_name[OMFS_NAMELEN]; /* filename */
+ __be64 i_size; /* size of file, in bytes */
+ };
+
+Directories in OMFS are implemented as a large hash table. Filenames are
+hashed then prepended into the bucket list beginning at OMFS_DIR_START.
+Lookup requires hashing the filename, then seeking across i_sibling pointers
+until a match is found on i_name. Empty buckets are represented by block
+pointers with all-1s (~0).
+
+A file is an omfs_inode structure followed by an extent table beginning at
+OMFS_EXTENT_START::
+
+ struct omfs_extent_entry {
+ __be64 e_cluster; /* start location of a set of blocks */
+ __be64 e_blocks; /* number of blocks after e_cluster */
+ };
+
+ struct omfs_extent {
+ __be64 e_next; /* next extent table location */
+ __be32 e_extent_count; /* total # extents in this table */
+ __be32 e_fill;
+ struct omfs_extent_entry e_entry; /* start of extent entries */
+ };
+
+Each extent holds the block offset followed by number of blocks allocated to
+the extent. The final extent in each table is a terminator with e_cluster
+being ~0 and e_blocks being ones'-complement of the total number of blocks
+in the table.
+
+If this table overflows, a continuation inode is written and pointed to by
+e_next. These have a header but lack the rest of the inode structure.
+
diff --git a/Documentation/filesystems/omfs.txt b/Documentation/filesystems/omfs.txt
deleted file mode 100644
index 1d0d41ff5c65..000000000000
--- a/Documentation/filesystems/omfs.txt
+++ /dev/null
@@ -1,106 +0,0 @@
-Optimized MPEG Filesystem (OMFS)
-
-Overview
-========
-
-OMFS is a filesystem created by SonicBlue for use in the ReplayTV DVR
-and Rio Karma MP3 player. The filesystem is extent-based, utilizing
-block sizes from 2k to 8k, with hash-based directories. This
-filesystem driver may be used to read and write disks from these
-devices.
-
-Note, it is not recommended that this FS be used in place of a general
-filesystem for your own streaming media device. Native Linux filesystems
-will likely perform better.
-
-More information is available at:
-
- http://linux-karma.sf.net/
-
-Various utilities, including mkomfs and omfsck, are included with
-omfsprogs, available at:
-
- http://bobcopeland.com/karma/
-
-Instructions are included in its README.
-
-Options
-=======
-
-OMFS supports the following mount-time options:
-
- uid=n - make all files owned by specified user
- gid=n - make all files owned by specified group
- umask=xxx - set permission umask to xxx
- fmask=xxx - set umask to xxx for files
- dmask=xxx - set umask to xxx for directories
-
-Disk format
-===========
-
-OMFS discriminates between "sysblocks" and normal data blocks. The sysblock
-group consists of super block information, file metadata, directory structures,
-and extents. Each sysblock has a header containing CRCs of the entire
-sysblock, and may be mirrored in successive blocks on the disk. A sysblock may
-have a smaller size than a data block, but since they are both addressed by the
-same 64-bit block number, any remaining space in the smaller sysblock is
-unused.
-
-Sysblock header information:
-
-struct omfs_header {
- __be64 h_self; /* FS block where this is located */
- __be32 h_body_size; /* size of useful data after header */
- __be16 h_crc; /* crc-ccitt of body_size bytes */
- char h_fill1[2];
- u8 h_version; /* version, always 1 */
- char h_type; /* OMFS_INODE_X */
- u8 h_magic; /* OMFS_IMAGIC */
- u8 h_check_xor; /* XOR of header bytes before this */
- __be32 h_fill2;
-};
-
-Files and directories are both represented by omfs_inode:
-
-struct omfs_inode {
- struct omfs_header i_head; /* header */
- __be64 i_parent; /* parent containing this inode */
- __be64 i_sibling; /* next inode in hash bucket */
- __be64 i_ctime; /* ctime, in milliseconds */
- char i_fill1[35];
- char i_type; /* OMFS_[DIR,FILE] */
- __be32 i_fill2;
- char i_fill3[64];
- char i_name[OMFS_NAMELEN]; /* filename */
- __be64 i_size; /* size of file, in bytes */
-};
-
-Directories in OMFS are implemented as a large hash table. Filenames are
-hashed then prepended into the bucket list beginning at OMFS_DIR_START.
-Lookup requires hashing the filename, then seeking across i_sibling pointers
-until a match is found on i_name. Empty buckets are represented by block
-pointers with all-1s (~0).
-
-A file is an omfs_inode structure followed by an extent table beginning at
-OMFS_EXTENT_START:
-
-struct omfs_extent_entry {
- __be64 e_cluster; /* start location of a set of blocks */
- __be64 e_blocks; /* number of blocks after e_cluster */
-};
-
-struct omfs_extent {
- __be64 e_next; /* next extent table location */
- __be32 e_extent_count; /* total # extents in this table */
- __be32 e_fill;
- struct omfs_extent_entry e_entry; /* start of extent entries */
-};
-
-Each extent holds the block offset followed by number of blocks allocated to
-the extent. The final extent in each table is a terminator with e_cluster
-being ~0 and e_blocks being ones'-complement of the total number of blocks
-in the table.
-
-If this table overflows, a continuation inode is written and pointed to by
-e_next. These have a header but lack the rest of the inode structure.
-
diff --git a/Documentation/filesystems/orangefs.txt b/Documentation/filesystems/orangefs.rst
index f4ba94950e3f..7d6d4cad73c4 100644
--- a/Documentation/filesystems/orangefs.txt
+++ b/Documentation/filesystems/orangefs.rst
@@ -1,3 +1,6 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+========
ORANGEFS
========
@@ -21,25 +24,25 @@ Orangefs features include:
* Stateless
-MAILING LIST ARCHIVES
+Mailing List Archives
=====================
http://lists.orangefs.org/pipermail/devel_lists.orangefs.org/
-MAILING LIST SUBMISSIONS
+Mailing List Submissions
========================
devel@lists.orangefs.org
-DOCUMENTATION
+Documentation
=============
http://www.orangefs.org/documentation/
-USERSPACE FILESYSTEM SOURCE
+Userspace Filesystem Source
===========================
http://www.orangefs.org/download
@@ -48,16 +51,16 @@ Orangefs versions prior to 2.9.3 would not be compatible with the
upstream version of the kernel client.
-RUNNING ORANGEFS ON A SINGLE SERVER
+Running ORANGEFS On a Single Server
===================================
OrangeFS is usually run in large installations with multiple servers and
clients, but a complete filesystem can be run on a single machine for
development and testing.
-On Fedora, install orangefs and orangefs-server.
+On Fedora, install orangefs and orangefs-server::
-dnf -y install orangefs orangefs-server
+ dnf -y install orangefs orangefs-server
There is an example server configuration file in
/etc/orangefs/orangefs.conf. Change localhost to your hostname if
@@ -70,29 +73,29 @@ single line. Uncomment it and change the hostname if necessary. This
controls clients which use libpvfs2. This does not control the
pvfs2-client-core.
-Create the filesystem.
+Create the filesystem::
-pvfs2-server -f /etc/orangefs/orangefs.conf
+ pvfs2-server -f /etc/orangefs/orangefs.conf
-Start the server.
+Start the server::
-systemctl start orangefs-server
+ systemctl start orangefs-server
-Test the server.
+Test the server::
-pvfs2-ping -m /pvfsmnt
+ pvfs2-ping -m /pvfsmnt
Start the client. The module must be compiled in or loaded before this
-point.
+point::
-systemctl start orangefs-client
+ systemctl start orangefs-client
-Mount the filesystem.
+Mount the filesystem::
-mount -t pvfs2 tcp://localhost:3334/orangefs /pvfsmnt
+ mount -t pvfs2 tcp://localhost:3334/orangefs /pvfsmnt
-BUILDING ORANGEFS ON A SINGLE SERVER
+Building ORANGEFS on a Single Server
====================================
Where OrangeFS cannot be installed from distribution packages, it may be
@@ -102,49 +105,51 @@ You can omit --prefix if you don't care that things are sprinkled around
in /usr/local. As of version 2.9.6, OrangeFS uses Berkeley DB by
default, we will probably be changing the default to LMDB soon.
-./configure --prefix=/opt/ofs --with-db-backend=lmdb
+::
-make
+ ./configure --prefix=/opt/ofs --with-db-backend=lmdb
-make install
+ make
-Create an orangefs config file.
+ make install
-/opt/ofs/bin/pvfs2-genconfig /etc/pvfs2.conf
+Create an orangefs config file::
-Create an /etc/pvfs2tab file.
+ /opt/ofs/bin/pvfs2-genconfig /etc/pvfs2.conf
-echo tcp://localhost:3334/orangefs /pvfsmnt pvfs2 defaults,noauto 0 0 > \
- /etc/pvfs2tab
+Create an /etc/pvfs2tab file::
-Create the mount point you specified in the tab file if needed.
+ echo tcp://localhost:3334/orangefs /pvfsmnt pvfs2 defaults,noauto 0 0 > \
+ /etc/pvfs2tab
-mkdir /pvfsmnt
+Create the mount point you specified in the tab file if needed::
-Bootstrap the server.
+ mkdir /pvfsmnt
-/opt/ofs/sbin/pvfs2-server -f /etc/pvfs2.conf
+Bootstrap the server::
-Start the server.
+ /opt/ofs/sbin/pvfs2-server -f /etc/pvfs2.conf
-/opt/osf/sbin/pvfs2-server /etc/pvfs2.conf
+Start the server::
+
+ /opt/osf/sbin/pvfs2-server /etc/pvfs2.conf
Now the server should be running. Pvfs2-ls is a simple
-test to verify that the server is running.
+test to verify that the server is running::
-/opt/ofs/bin/pvfs2-ls /pvfsmnt
+ /opt/ofs/bin/pvfs2-ls /pvfsmnt
If stuff seems to be working, load the kernel module and
-turn on the client core.
+turn on the client core::
-/opt/ofs/sbin/pvfs2-client -p /opt/osf/sbin/pvfs2-client-core
+ /opt/ofs/sbin/pvfs2-client -p /opt/osf/sbin/pvfs2-client-core
-Mount your filesystem.
+Mount your filesystem::
-mount -t pvfs2 tcp://localhost:3334/orangefs /pvfsmnt
+ mount -t pvfs2 tcp://localhost:3334/orangefs /pvfsmnt
-RUNNING XFSTESTS
+Running xfstests
================
It is useful to use a scratch filesystem with xfstests. This can be
@@ -159,21 +164,23 @@ Then there are two FileSystem sections: orangefs and scratch.
This change should be made before creating the filesystem.
-pvfs2-server -f /etc/orangefs/orangefs.conf
+::
+
+ pvfs2-server -f /etc/orangefs/orangefs.conf
-To run xfstests, create /etc/xfsqa.config.
+To run xfstests, create /etc/xfsqa.config::
-TEST_DIR=/orangefs
-TEST_DEV=tcp://localhost:3334/orangefs
-SCRATCH_MNT=/scratch
-SCRATCH_DEV=tcp://localhost:3334/scratch
+ TEST_DIR=/orangefs
+ TEST_DEV=tcp://localhost:3334/orangefs
+ SCRATCH_MNT=/scratch
+ SCRATCH_DEV=tcp://localhost:3334/scratch
-Then xfstests can be run
+Then xfstests can be run::
-./check -pvfs2
+ ./check -pvfs2
-OPTIONS
+Options
=======
The following mount options are accepted:
@@ -193,32 +200,32 @@ The following mount options are accepted:
Distributed locking is being worked on for the future.
-DEBUGGING
+Debugging
=========
If you want the debug (GOSSIP) statements in a particular
-source file (inode.c for example) go to syslog:
+source file (inode.c for example) go to syslog::
echo inode > /sys/kernel/debug/orangefs/kernel-debug
-No debugging (the default):
+No debugging (the default)::
echo none > /sys/kernel/debug/orangefs/kernel-debug
-Debugging from several source files:
+Debugging from several source files::
echo inode,dir > /sys/kernel/debug/orangefs/kernel-debug
-All debugging:
+All debugging::
echo all > /sys/kernel/debug/orangefs/kernel-debug
-Get a list of all debugging keywords:
+Get a list of all debugging keywords::
cat /sys/kernel/debug/orangefs/debug-help
-PROTOCOL BETWEEN KERNEL MODULE AND USERSPACE
+Protocol between Kernel Module and Userspace
============================================
Orangefs is a user space filesystem and an associated kernel module.
@@ -234,7 +241,8 @@ The kernel module implements a pseudo device that userspace
can read from and write to. Userspace can also manipulate the
kernel module through the pseudo device with ioctl.
-THE BUFMAP:
+The Bufmap
+----------
At startup userspace allocates two page-size-aligned (posix_memalign)
mlocked memory buffers, one is used for IO and one is used for readdir
@@ -250,7 +258,8 @@ copied from user space to kernel space with copy_from_user and is used
to initialize the kernel module's "bufmap" (struct orangefs_bufmap), which
then contains:
- * refcnt - a reference counter
+ * refcnt
+ - a reference counter
* desc_size - PVFS2_BUFMAP_DEFAULT_DESC_SIZE (4194304) - the IO buffer's
partition size, which represents the filesystem's block size and
is used for s_blocksize in super blocks.
@@ -259,17 +268,19 @@ then contains:
* desc_shift - log2(desc_size), used for s_blocksize_bits in super blocks.
* total_size - the total size of the IO buffer.
* page_count - the number of 4096 byte pages in the IO buffer.
- * page_array - a pointer to page_count * (sizeof(struct page*)) bytes
+ * page_array - a pointer to ``page_count * (sizeof(struct page*))`` bytes
of kcalloced memory. This memory is used as an array of pointers
to each of the pages in the IO buffer through a call to get_user_pages.
- * desc_array - a pointer to desc_count * (sizeof(struct orangefs_bufmap_desc))
+ * desc_array - a pointer to ``desc_count * (sizeof(struct orangefs_bufmap_desc))``
bytes of kcalloced memory. This memory is further intialized:
user_desc is the kernel's copy of the IO buffer's ORANGEFS_dev_map_desc
structure. user_desc->ptr points to the IO buffer.
- pages_per_desc = bufmap->desc_size / PAGE_SIZE
- offset = 0
+ ::
+
+ pages_per_desc = bufmap->desc_size / PAGE_SIZE
+ offset = 0
bufmap->desc_array[0].page_array = &bufmap->page_array[offset]
bufmap->desc_array[0].array_count = pages_per_desc = 1024
@@ -293,7 +304,8 @@ then contains:
* readdir_index_lock - a spinlock to protect readdir_index_array during
update.
-OPERATIONS:
+Operations
+----------
The kernel module builds an "op" (struct orangefs_kernel_op_s) when it
needs to communicate with userspace. Part of the op contains the "upcall"
@@ -308,13 +320,19 @@ in flight at any given time.
Ops are stateful:
- * unknown - op was just initialized
- * waiting - op is on request_list (upward bound)
- * inprogr - op is in progress (waiting for downcall)
- * serviced - op has matching downcall; ok
- * purged - op has to start a timer since client-core
+ * unknown
+ - op was just initialized
+ * waiting
+ - op is on request_list (upward bound)
+ * inprogr
+ - op is in progress (waiting for downcall)
+ * serviced
+ - op has matching downcall; ok
+ * purged
+ - op has to start a timer since client-core
exited uncleanly before servicing op
- * given up - submitter has given up waiting for it
+ * given up
+ - submitter has given up waiting for it
When some arbitrary userspace program needs to perform a
filesystem operation on Orangefs (readdir, I/O, create, whatever)
@@ -389,10 +407,15 @@ union of structs, each of which is associated with a particular
response type.
The several members outside of the union are:
- - int32_t type - type of operation.
- - int32_t status - return code for the operation.
- - int64_t trailer_size - 0 unless readdir operation.
- - char *trailer_buf - initialized to NULL, used during readdir operations.
+
+ ``int32_t type``
+ - type of operation.
+ ``int32_t status``
+ - return code for the operation.
+ ``int64_t trailer_size``
+ - 0 unless readdir operation.
+ ``char *trailer_buf``
+ - initialized to NULL, used during readdir operations.
The appropriate member inside the union is filled out for any
particular response.
@@ -449,18 +472,20 @@ Userspace uses writev() on /dev/pvfs2-req to pass responses to the requests
made by the kernel side.
A buffer_list containing:
+
- a pointer to the prepared response to the request from the
kernel (struct pvfs2_downcall_t).
- and also, in the case of a readdir request, a pointer to a
buffer containing descriptors for the objects in the target
directory.
+
... is sent to the function (PINT_dev_write_list) which performs
the writev.
PINT_dev_write_list has a local iovec array: struct iovec io_array[10];
The first four elements of io_array are initialized like this for all
-responses:
+responses::
io_array[0].iov_base = address of local variable "proto_ver" (int32_t)
io_array[0].iov_len = sizeof(int32_t)
@@ -475,7 +500,7 @@ responses:
of global variable vfs_request (vfs_request_t)
io_array[3].iov_len = sizeof(pvfs2_downcall_t)
-Readdir responses initialize the fifth element io_array like this:
+Readdir responses initialize the fifth element io_array like this::
io_array[4].iov_base = contents of member trailer_buf (char *)
from out_downcall member of global variable
@@ -517,13 +542,13 @@ from a dentry is cheap, obtaining it from userspace is relatively expensive,
hence the motivation to use the dentry when possible.
The timeout values d_time and getattr_time are jiffy based, and the
-code is designed to avoid the jiffy-wrap problem:
+code is designed to avoid the jiffy-wrap problem::
-"In general, if the clock may have wrapped around more than once, there
-is no way to tell how much time has elapsed. However, if the times t1
-and t2 are known to be fairly close, we can reliably compute the
-difference in a way that takes into account the possibility that the
-clock may have wrapped between times."
+ "In general, if the clock may have wrapped around more than once, there
+ is no way to tell how much time has elapsed. However, if the times t1
+ and t2 are known to be fairly close, we can reliably compute the
+ difference in a way that takes into account the possibility that the
+ clock may have wrapped between times."
- from course notes by instructor Andy Wang
+from course notes by instructor Andy Wang
diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst
index f18506083ced..26c093969573 100644
--- a/Documentation/filesystems/porting.rst
+++ b/Documentation/filesystems/porting.rst
@@ -850,3 +850,11 @@ business doing so.
d_alloc_pseudo() is internal-only; uses outside of alloc_file_pseudo() are
very suspect (and won't work in modules). Such uses are very likely to
be misspelled d_alloc_anon().
+
+---
+
+**mandatory**
+
+[should've been added in 2016] stale comment in finish_open() nonwithstanding,
+failure exits in ->atomic_open() instances should *NOT* fput() the file,
+no matter what. Everything is handled by the caller.
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.rst
index 99ca040e3f90..38b606991065 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.rst
@@ -1,19 +1,20 @@
-------------------------------------------------------------------------------
- T H E /proc F I L E S Y S T E M
-------------------------------------------------------------------------------
-/proc/sys Terrehon Bowden <terrehon@pacbell.net> October 7 1999
- Bodo Bauer <bb@ricochet.net>
+.. SPDX-License-Identifier: GPL-2.0
+
+====================
+The /proc Filesystem
+====================
+
+===================== ======================================= ================
+/proc/sys Terrehon Bowden <terrehon@pacbell.net>, October 7 1999
+ Bodo Bauer <bb@ricochet.net>
+2.4.x update Jorge Nerin <comandante@zaralinux.com> November 14 2000
+move /proc/sys Shen Feng <shen@cn.fujitsu.com> April 1 2009
+fixes/update part 1.1 Stefani Seibold <stefani@seibold.net> June 9 2009
+===================== ======================================= ================
+
-2.4.x update Jorge Nerin <comandante@zaralinux.com> November 14 2000
-move /proc/sys Shen Feng <shen@cn.fujitsu.com> April 1 2009
-------------------------------------------------------------------------------
-Version 1.3 Kernel version 2.2.12
- Kernel version 2.4.0-test11-pre4
-------------------------------------------------------------------------------
-fixes/update part 1.1 Stefani Seibold <stefani@seibold.net> June 9 2009
-Table of Contents
------------------
+.. Table of Contents
0 Preface
0.1 Introduction/Credits
@@ -50,9 +51,8 @@ Table of Contents
4 Configuring procfs
4.1 Mount options
-------------------------------------------------------------------------------
Preface
-------------------------------------------------------------------------------
+=======
0.1 Introduction/Credits
------------------------
@@ -95,20 +95,18 @@ We don't guarantee the correctness of this document, and if you come to us
complaining about how you screwed up your system because of incorrect
documentation, we won't feel responsible...
-------------------------------------------------------------------------------
-CHAPTER 1: COLLECTING SYSTEM INFORMATION
-------------------------------------------------------------------------------
+Chapter 1: Collecting System Information
+========================================
-------------------------------------------------------------------------------
In This Chapter
-------------------------------------------------------------------------------
+---------------
* Investigating the properties of the pseudo file system /proc and its
ability to provide information on the running Linux system
* Examining /proc's structure
* Uncovering various information about the kernel and the processes running
on the system
-------------------------------------------------------------------------------
+------------------------------------------------------------------------------
The proc file system acts as an interface to internal data structures in the
kernel. It can be used to obtain information about the system and to change
@@ -134,9 +132,11 @@ never act on any new process that the kernel may, through chance, have
also assigned the process ID <pid>. Instead, operations on these FDs
usually fail with ESRCH.
-Table 1-1: Process specific entries in /proc
-..............................................................................
+.. table:: Table 1-1: Process specific entries in /proc
+
+ ============= ===============================================================
File Content
+ ============= ===============================================================
clear_refs Clears page referenced bits shown in smaps output
cmdline Command line arguments
cpu Current and last cpu in which it was executed (2.4)(smp)
@@ -160,10 +160,10 @@ Table 1-1: Process specific entries in /proc
can be derived from smaps, but is faster and more convenient
numa_maps An extension based on maps, showing the memory locality and
binding policy as well as mem usage (in pages) of each mapping.
-..............................................................................
+ ============= ===============================================================
For example, to get the status information of a process, all you have to do is
-read the file /proc/PID/status:
+read the file /proc/PID/status::
>cat /proc/self/status
Name: cat
@@ -222,14 +222,17 @@ contains details information about the process itself. Its fields are
explained in Table 1-4.
(for SMP CONFIG users)
+
For making accounting scalable, RSS related information are handled in an
asynchronous manner and the value may not be very precise. To see a precise
snapshot of a moment, you can see /proc/<pid>/smaps file and scan page table.
It's slow but very precise.
-Table 1-2: Contents of the status files (as of 4.19)
-..............................................................................
+.. table:: Table 1-2: Contents of the status files (as of 4.19)
+
+ ========================== ===================================================
Field Content
+ ========================== ===================================================
Name filename of the executable
Umask file mode creation mask
State state (R is running, S is sleeping, D is sleeping
@@ -254,7 +257,8 @@ Table 1-2: Contents of the status files (as of 4.19)
VmPin pinned memory size
VmHWM peak resident set size ("high water mark")
VmRSS size of memory portions. It contains the three
- following parts (VmRSS = RssAnon + RssFile + RssShmem)
+ following parts
+ (VmRSS = RssAnon + RssFile + RssShmem)
RssAnon size of resident anonymous memory
RssFile size of resident file mappings
RssShmem size of resident shmem memory (includes SysV shm,
@@ -292,27 +296,32 @@ Table 1-2: Contents of the status files (as of 4.19)
Mems_allowed_list Same as previous, but in "list format"
voluntary_ctxt_switches number of voluntary context switches
nonvoluntary_ctxt_switches number of non voluntary context switches
-..............................................................................
+ ========================== ===================================================
-Table 1-3: Contents of the statm files (as of 2.6.8-rc3)
-..............................................................................
+
+.. table:: Table 1-3: Contents of the statm files (as of 2.6.8-rc3)
+
+ ======== =============================== ==============================
Field Content
+ ======== =============================== ==============================
size total program size (pages) (same as VmSize in status)
resident size of memory portions (pages) (same as VmRSS in status)
shared number of pages that are shared (i.e. backed by a file, same
as RssFile+RssShmem in status)
trs number of pages that are 'code' (not including libs; broken,
- includes data segment)
+ includes data segment)
lrs number of pages of library (always 0 on 2.6)
drs number of pages of data/stack (including libs; broken,
- includes library text)
+ includes library text)
dt number of dirty pages (always 0 on 2.6)
-..............................................................................
+ ======== =============================== ==============================
+
+.. table:: Table 1-4: Contents of the stat files (as of 2.6.30-rc7)
-Table 1-4: Contents of the stat files (as of 2.6.30-rc7)
-..............................................................................
- Field Content
+ ============= ===============================================================
+ Field Content
+ ============= ===============================================================
pid process id
tcomm filename of the executable
state state (R is running, S is sleeping, D is sleeping in an
@@ -348,7 +357,8 @@ Table 1-4: Contents of the stat files (as of 2.6.30-rc7)
blocked bitmap of blocked signals
sigign bitmap of ignored signals
sigcatch bitmap of caught signals
- 0 (place holder, used to be the wchan address, use /proc/PID/wchan instead)
+ 0 (place holder, used to be the wchan address,
+ use /proc/PID/wchan instead)
0 (place holder)
0 (place holder)
exit_signal signal to send to parent thread on exit
@@ -365,39 +375,40 @@ Table 1-4: Contents of the stat files (as of 2.6.30-rc7)
arg_end address below which program command line is placed
env_start address above which program environment is placed
env_end address below which program environment is placed
- exit_code the thread's exit_code in the form reported by the waitpid system call
-..............................................................................
+ exit_code the thread's exit_code in the form reported by the waitpid
+ system call
+ ============= ===============================================================
The /proc/PID/maps file contains the currently mapped memory regions and
their access permissions.
-The format is:
-
-address perms offset dev inode pathname
-
-08048000-08049000 r-xp 00000000 03:00 8312 /opt/test
-08049000-0804a000 rw-p 00001000 03:00 8312 /opt/test
-0804a000-0806b000 rw-p 00000000 00:00 0 [heap]
-a7cb1000-a7cb2000 ---p 00000000 00:00 0
-a7cb2000-a7eb2000 rw-p 00000000 00:00 0
-a7eb2000-a7eb3000 ---p 00000000 00:00 0
-a7eb3000-a7ed5000 rw-p 00000000 00:00 0
-a7ed5000-a8008000 r-xp 00000000 03:00 4222 /lib/libc.so.6
-a8008000-a800a000 r--p 00133000 03:00 4222 /lib/libc.so.6
-a800a000-a800b000 rw-p 00135000 03:00 4222 /lib/libc.so.6
-a800b000-a800e000 rw-p 00000000 00:00 0
-a800e000-a8022000 r-xp 00000000 03:00 14462 /lib/libpthread.so.0
-a8022000-a8023000 r--p 00013000 03:00 14462 /lib/libpthread.so.0
-a8023000-a8024000 rw-p 00014000 03:00 14462 /lib/libpthread.so.0
-a8024000-a8027000 rw-p 00000000 00:00 0
-a8027000-a8043000 r-xp 00000000 03:00 8317 /lib/ld-linux.so.2
-a8043000-a8044000 r--p 0001b000 03:00 8317 /lib/ld-linux.so.2
-a8044000-a8045000 rw-p 0001c000 03:00 8317 /lib/ld-linux.so.2
-aff35000-aff4a000 rw-p 00000000 00:00 0 [stack]
-ffffe000-fffff000 r-xp 00000000 00:00 0 [vdso]
+The format is::
+
+ address perms offset dev inode pathname
+
+ 08048000-08049000 r-xp 00000000 03:00 8312 /opt/test
+ 08049000-0804a000 rw-p 00001000 03:00 8312 /opt/test
+ 0804a000-0806b000 rw-p 00000000 00:00 0 [heap]
+ a7cb1000-a7cb2000 ---p 00000000 00:00 0
+ a7cb2000-a7eb2000 rw-p 00000000 00:00 0
+ a7eb2000-a7eb3000 ---p 00000000 00:00 0
+ a7eb3000-a7ed5000 rw-p 00000000 00:00 0
+ a7ed5000-a8008000 r-xp 00000000 03:00 4222 /lib/libc.so.6
+ a8008000-a800a000 r--p 00133000 03:00 4222 /lib/libc.so.6
+ a800a000-a800b000 rw-p 00135000 03:00 4222 /lib/libc.so.6
+ a800b000-a800e000 rw-p 00000000 00:00 0
+ a800e000-a8022000 r-xp 00000000 03:00 14462 /lib/libpthread.so.0
+ a8022000-a8023000 r--p 00013000 03:00 14462 /lib/libpthread.so.0
+ a8023000-a8024000 rw-p 00014000 03:00 14462 /lib/libpthread.so.0
+ a8024000-a8027000 rw-p 00000000 00:00 0
+ a8027000-a8043000 r-xp 00000000 03:00 8317 /lib/ld-linux.so.2
+ a8043000-a8044000 r--p 0001b000 03:00 8317 /lib/ld-linux.so.2
+ a8044000-a8045000 rw-p 0001c000 03:00 8317 /lib/ld-linux.so.2
+ aff35000-aff4a000 rw-p 00000000 00:00 0 [stack]
+ ffffe000-fffff000 r-xp 00000000 00:00 0 [vdso]
where "address" is the address space in the process that it occupies, "perms"
-is a set of permissions:
+is a set of permissions::
r = read
w = write
@@ -411,42 +422,44 @@ with the memory region, as the case would be with BSS (uninitialized data).
The "pathname" shows the name associated file for this mapping. If the mapping
is not associated with a file:
- [heap] = the heap of the program
- [stack] = the stack of the main process
- [vdso] = the "virtual dynamic shared object",
+ ======= ====================================
+ [heap] the heap of the program
+ [stack] the stack of the main process
+ [vdso] the "virtual dynamic shared object",
the kernel system call handler
+ ======= ====================================
or if empty, the mapping is anonymous.
The /proc/PID/smaps is an extension based on maps, showing the memory
consumption for each of the process's mappings. For each mapping (aka Virtual
-Memory Area, or VMA) there is a series of lines such as the following:
-
-08048000-080bc000 r-xp 00000000 03:02 13130 /bin/bash
-
-Size: 1084 kB
-KernelPageSize: 4 kB
-MMUPageSize: 4 kB
-Rss: 892 kB
-Pss: 374 kB
-Shared_Clean: 892 kB
-Shared_Dirty: 0 kB
-Private_Clean: 0 kB
-Private_Dirty: 0 kB
-Referenced: 892 kB
-Anonymous: 0 kB
-LazyFree: 0 kB
-AnonHugePages: 0 kB
-ShmemPmdMapped: 0 kB
-Shared_Hugetlb: 0 kB
-Private_Hugetlb: 0 kB
-Swap: 0 kB
-SwapPss: 0 kB
-KernelPageSize: 4 kB
-MMUPageSize: 4 kB
-Locked: 0 kB
-THPeligible: 0
-VmFlags: rd ex mr mw me dw
+Memory Area, or VMA) there is a series of lines such as the following::
+
+ 08048000-080bc000 r-xp 00000000 03:02 13130 /bin/bash
+
+ Size: 1084 kB
+ KernelPageSize: 4 kB
+ MMUPageSize: 4 kB
+ Rss: 892 kB
+ Pss: 374 kB
+ Shared_Clean: 892 kB
+ Shared_Dirty: 0 kB
+ Private_Clean: 0 kB
+ Private_Dirty: 0 kB
+ Referenced: 892 kB
+ Anonymous: 0 kB
+ LazyFree: 0 kB
+ AnonHugePages: 0 kB
+ ShmemPmdMapped: 0 kB
+ Shared_Hugetlb: 0 kB
+ Private_Hugetlb: 0 kB
+ Swap: 0 kB
+ SwapPss: 0 kB
+ KernelPageSize: 4 kB
+ MMUPageSize: 4 kB
+ Locked: 0 kB
+ THPeligible: 0
+ VmFlags: rd ex mr mw me dw
The first of these lines shows the same information as is displayed for the
mapping in /proc/PID/maps. Following lines show the size of the mapping
@@ -461,26 +474,35 @@ The "proportional set size" (PSS) of a process is the count of pages it has
in memory, where each page is divided by the number of processes sharing it.
So if a process has 1000 pages all to itself, and 1000 shared with one other
process, its PSS will be 1500.
+
Note that even a page which is part of a MAP_SHARED mapping, but has only
a single pte mapped, i.e. is currently used by only one process, is accounted
as private and not as shared.
+
"Referenced" indicates the amount of memory currently marked as referenced or
accessed.
+
"Anonymous" shows the amount of memory that does not belong to any file. Even
a mapping associated with a file may contain anonymous pages: when MAP_PRIVATE
and a page is modified, the file page is replaced by a private anonymous copy.
+
"LazyFree" shows the amount of memory which is marked by madvise(MADV_FREE).
The memory isn't freed immediately with madvise(). It's freed in memory
pressure if the memory is clean. Please note that the printed value might
be lower than the real value due to optimizations used in the current
implementation. If this is not desirable please file a bug report.
+
"AnonHugePages" shows the ammount of memory backed by transparent hugepage.
+
"ShmemPmdMapped" shows the ammount of shared (shmem/tmpfs) memory backed by
huge pages.
+
"Shared_Hugetlb" and "Private_Hugetlb" show the ammounts of memory backed by
hugetlbfs page which is *not* counted in "RSS" or "PSS" field for historical
reasons. And these are not included in {Shared,Private}_{Clean,Dirty} field.
+
"Swap" shows how much would-be-anonymous memory is also used, but out on swap.
+
For shmem mappings, "Swap" includes also the size of the mapped (and not
replaced by copy-on-write) part of the underlying shmem object out on swap.
"SwapPss" shows proportional swap share of this mapping. Unlike "Swap", this
@@ -489,36 +511,39 @@ does not take into account swapped out page of underlying shmem objects.
"THPeligible" indicates whether the mapping is eligible for allocating THP
pages - 1 if true, 0 otherwise. It just shows the current status.
-"VmFlags" field deserves a separate description. This member represents the kernel
-flags associated with the particular virtual memory area in two letter encoded
-manner. The codes are the following:
- rd - readable
- wr - writeable
- ex - executable
- sh - shared
- mr - may read
- mw - may write
- me - may execute
- ms - may share
- gd - stack segment growns down
- pf - pure PFN range
- dw - disabled write to the mapped file
- lo - pages are locked in memory
- io - memory mapped I/O area
- sr - sequential read advise provided
- rr - random read advise provided
- dc - do not copy area on fork
- de - do not expand area on remapping
- ac - area is accountable
- nr - swap space is not reserved for the area
- ht - area uses huge tlb pages
- ar - architecture specific flag
- dd - do not include area into core dump
- sd - soft-dirty flag
- mm - mixed map area
- hg - huge page advise flag
- nh - no-huge page advise flag
- mg - mergable advise flag
+"VmFlags" field deserves a separate description. This member represents the
+kernel flags associated with the particular virtual memory area in two letter
+encoded manner. The codes are the following:
+
+ == =======================================
+ rd readable
+ wr writeable
+ ex executable
+ sh shared
+ mr may read
+ mw may write
+ me may execute
+ ms may share
+ gd stack segment growns down
+ pf pure PFN range
+ dw disabled write to the mapped file
+ lo pages are locked in memory
+ io memory mapped I/O area
+ sr sequential read advise provided
+ rr random read advise provided
+ dc do not copy area on fork
+ de do not expand area on remapping
+ ac area is accountable
+ nr swap space is not reserved for the area
+ ht area uses huge tlb pages
+ ar architecture specific flag
+ dd do not include area into core dump
+ sd soft dirty flag
+ mm mixed map area
+ hg huge page advise flag
+ nh no huge page advise flag
+ mg mergable advise flag
+ == =======================================
Note that there is no guarantee that every flag and associated mnemonic will
be present in all further kernel releases. Things get changed, the flags may
@@ -531,6 +556,7 @@ enabled.
Note: reading /proc/PID/maps or /proc/PID/smaps is inherently racy (consistent
output can be achieved only in the single read call).
+
This typically manifests when doing partial reads of these files while the
memory map is being modified. Despite the races, we do provide the following
guarantees:
@@ -544,9 +570,9 @@ The /proc/PID/smaps_rollup file includes the same fields as /proc/PID/smaps,
but their values are the sums of the corresponding values for all mappings of
the process. Additionally, it contains these fields:
-Pss_Anon
-Pss_File
-Pss_Shmem
+- Pss_Anon
+- Pss_File
+- Pss_Shmem
They represent the proportional shares of anonymous, file, and shmem pages, as
described for smaps above. These fields are omitted in smaps since each
@@ -558,20 +584,25 @@ The /proc/PID/clear_refs is used to reset the PG_Referenced and ACCESSED/YOUNG
bits on both physical and virtual pages associated with a process, and the
soft-dirty bit on pte (see Documentation/admin-guide/mm/soft-dirty.rst
for details).
-To clear the bits for all the pages associated with the process
+To clear the bits for all the pages associated with the process::
+
> echo 1 > /proc/PID/clear_refs
-To clear the bits for the anonymous pages associated with the process
+To clear the bits for the anonymous pages associated with the process::
+
> echo 2 > /proc/PID/clear_refs
-To clear the bits for the file mapped pages associated with the process
+To clear the bits for the file mapped pages associated with the process::
+
> echo 3 > /proc/PID/clear_refs
-To clear the soft-dirty bit
+To clear the soft-dirty bit::
+
> echo 4 > /proc/PID/clear_refs
To reset the peak resident set size ("high water mark") to the process's
-current value:
+current value::
+
> echo 5 > /proc/PID/clear_refs
Any other value written to /proc/PID/clear_refs will have no effect.
@@ -584,30 +615,33 @@ Documentation/admin-guide/mm/pagemap.rst.
The /proc/pid/numa_maps is an extension based on maps, showing the memory
locality and binding policy, as well as the memory usage (in pages) of
each mapping. The output follows a general format where mapping details get
-summarized separated by blank spaces, one mapping per each file line:
-
-address policy mapping details
-
-00400000 default file=/usr/local/bin/app mapped=1 active=0 N3=1 kernelpagesize_kB=4
-00600000 default file=/usr/local/bin/app anon=1 dirty=1 N3=1 kernelpagesize_kB=4
-3206000000 default file=/lib64/ld-2.12.so mapped=26 mapmax=6 N0=24 N3=2 kernelpagesize_kB=4
-320621f000 default file=/lib64/ld-2.12.so anon=1 dirty=1 N3=1 kernelpagesize_kB=4
-3206220000 default file=/lib64/ld-2.12.so anon=1 dirty=1 N3=1 kernelpagesize_kB=4
-3206221000 default anon=1 dirty=1 N3=1 kernelpagesize_kB=4
-3206800000 default file=/lib64/libc-2.12.so mapped=59 mapmax=21 active=55 N0=41 N3=18 kernelpagesize_kB=4
-320698b000 default file=/lib64/libc-2.12.so
-3206b8a000 default file=/lib64/libc-2.12.so anon=2 dirty=2 N3=2 kernelpagesize_kB=4
-3206b8e000 default file=/lib64/libc-2.12.so anon=1 dirty=1 N3=1 kernelpagesize_kB=4
-3206b8f000 default anon=3 dirty=3 active=1 N3=3 kernelpagesize_kB=4
-7f4dc10a2000 default anon=3 dirty=3 N3=3 kernelpagesize_kB=4
-7f4dc10b4000 default anon=2 dirty=2 active=1 N3=2 kernelpagesize_kB=4
-7f4dc1200000 default file=/anon_hugepage\040(deleted) huge anon=1 dirty=1 N3=1 kernelpagesize_kB=2048
-7fff335f0000 default stack anon=3 dirty=3 N3=3 kernelpagesize_kB=4
-7fff3369d000 default mapped=1 mapmax=35 active=0 N3=1 kernelpagesize_kB=4
+summarized separated by blank spaces, one mapping per each file line::
+
+ address policy mapping details
+
+ 00400000 default file=/usr/local/bin/app mapped=1 active=0 N3=1 kernelpagesize_kB=4
+ 00600000 default file=/usr/local/bin/app anon=1 dirty=1 N3=1 kernelpagesize_kB=4
+ 3206000000 default file=/lib64/ld-2.12.so mapped=26 mapmax=6 N0=24 N3=2 kernelpagesize_kB=4
+ 320621f000 default file=/lib64/ld-2.12.so anon=1 dirty=1 N3=1 kernelpagesize_kB=4
+ 3206220000 default file=/lib64/ld-2.12.so anon=1 dirty=1 N3=1 kernelpagesize_kB=4
+ 3206221000 default anon=1 dirty=1 N3=1 kernelpagesize_kB=4
+ 3206800000 default file=/lib64/libc-2.12.so mapped=59 mapmax=21 active=55 N0=41 N3=18 kernelpagesize_kB=4
+ 320698b000 default file=/lib64/libc-2.12.so
+ 3206b8a000 default file=/lib64/libc-2.12.so anon=2 dirty=2 N3=2 kernelpagesize_kB=4
+ 3206b8e000 default file=/lib64/libc-2.12.so anon=1 dirty=1 N3=1 kernelpagesize_kB=4
+ 3206b8f000 default anon=3 dirty=3 active=1 N3=3 kernelpagesize_kB=4
+ 7f4dc10a2000 default anon=3 dirty=3 N3=3 kernelpagesize_kB=4
+ 7f4dc10b4000 default anon=2 dirty=2 active=1 N3=2 kernelpagesize_kB=4
+ 7f4dc1200000 default file=/anon_hugepage\040(deleted) huge anon=1 dirty=1 N3=1 kernelpagesize_kB=2048
+ 7fff335f0000 default stack anon=3 dirty=3 N3=3 kernelpagesize_kB=4
+ 7fff3369d000 default mapped=1 mapmax=35 active=0 N3=1 kernelpagesize_kB=4
Where:
+
"address" is the starting address for the mapping;
+
"policy" reports the NUMA memory policy set for the mapping (see Documentation/admin-guide/mm/numa_memory_policy.rst);
+
"mapping details" summarizes mapping data such as mapping type, page usage counters,
node locality page counters (N0 == node0, N1 == node1, ...) and the kernel page
size, in KB, that is backing the mapping up.
@@ -621,81 +655,83 @@ the running kernel. The files used to obtain this information are contained in
system. It depends on the kernel configuration and the loaded modules, which
files are there, and which are missing.
-Table 1-5: Kernel info in /proc
-..............................................................................
- File Content
- apm Advanced power management info
- buddyinfo Kernel memory allocator information (see text) (2.5)
- bus Directory containing bus specific information
- cmdline Kernel command line
- cpuinfo Info about the CPU
- devices Available devices (block and character)
- dma Used DMS channels
- filesystems Supported filesystems
- driver Various drivers grouped here, currently rtc (2.4)
- execdomains Execdomains, related to security (2.4)
- fb Frame Buffer devices (2.4)
- fs File system parameters, currently nfs/exports (2.4)
- ide Directory containing info about the IDE subsystem
- interrupts Interrupt usage
- iomem Memory map (2.4)
- ioports I/O port usage
- irq Masks for irq to cpu affinity (2.4)(smp?)
- isapnp ISA PnP (Plug&Play) Info (2.4)
- kcore Kernel core image (can be ELF or A.OUT(deprecated in 2.4))
- kmsg Kernel messages
- ksyms Kernel symbol table
- loadavg Load average of last 1, 5 & 15 minutes
- locks Kernel locks
- meminfo Memory info
- misc Miscellaneous
- modules List of loaded modules
- mounts Mounted filesystems
- net Networking info (see text)
+.. table:: Table 1-5: Kernel info in /proc
+
+ ============ ===============================================================
+ File Content
+ ============ ===============================================================
+ apm Advanced power management info
+ buddyinfo Kernel memory allocator information (see text) (2.5)
+ bus Directory containing bus specific information
+ cmdline Kernel command line
+ cpuinfo Info about the CPU
+ devices Available devices (block and character)
+ dma Used DMS channels
+ filesystems Supported filesystems
+ driver Various drivers grouped here, currently rtc (2.4)
+ execdomains Execdomains, related to security (2.4)
+ fb Frame Buffer devices (2.4)
+ fs File system parameters, currently nfs/exports (2.4)
+ ide Directory containing info about the IDE subsystem
+ interrupts Interrupt usage
+ iomem Memory map (2.4)
+ ioports I/O port usage
+ irq Masks for irq to cpu affinity (2.4)(smp?)
+ isapnp ISA PnP (Plug&Play) Info (2.4)
+ kcore Kernel core image (can be ELF or A.OUT(deprecated in 2.4))
+ kmsg Kernel messages
+ ksyms Kernel symbol table
+ loadavg Load average of last 1, 5 & 15 minutes
+ locks Kernel locks
+ meminfo Memory info
+ misc Miscellaneous
+ modules List of loaded modules
+ mounts Mounted filesystems
+ net Networking info (see text)
pagetypeinfo Additional page allocator information (see text) (2.5)
- partitions Table of partitions known to the system
- pci Deprecated info of PCI bus (new way -> /proc/bus/pci/,
- decoupled by lspci (2.4)
- rtc Real time clock
- scsi SCSI info (see text)
- slabinfo Slab pool info
- softirqs softirq usage
- stat Overall statistics
- swaps Swap space utilization
- sys See chapter 2
- sysvipc Info of SysVIPC Resources (msg, sem, shm) (2.4)
- tty Info of tty drivers
- uptime Wall clock since boot, combined idle time of all cpus
- version Kernel version
- video bttv info of video resources (2.4)
- vmallocinfo Show vmalloced areas
-..............................................................................
+ partitions Table of partitions known to the system
+ pci Deprecated info of PCI bus (new way -> /proc/bus/pci/,
+ decoupled by lspci (2.4)
+ rtc Real time clock
+ scsi SCSI info (see text)
+ slabinfo Slab pool info
+ softirqs softirq usage
+ stat Overall statistics
+ swaps Swap space utilization
+ sys See chapter 2
+ sysvipc Info of SysVIPC Resources (msg, sem, shm) (2.4)
+ tty Info of tty drivers
+ uptime Wall clock since boot, combined idle time of all cpus
+ version Kernel version
+ video bttv info of video resources (2.4)
+ vmallocinfo Show vmalloced areas
+ ============ ===============================================================
You can, for example, check which interrupts are currently in use and what
-they are used for by looking in the file /proc/interrupts:
-
- > cat /proc/interrupts
- CPU0
- 0: 8728810 XT-PIC timer
- 1: 895 XT-PIC keyboard
- 2: 0 XT-PIC cascade
- 3: 531695 XT-PIC aha152x
- 4: 2014133 XT-PIC serial
- 5: 44401 XT-PIC pcnet_cs
- 8: 2 XT-PIC rtc
- 11: 8 XT-PIC i82365
- 12: 182918 XT-PIC PS/2 Mouse
- 13: 1 XT-PIC fpu
- 14: 1232265 XT-PIC ide0
- 15: 7 XT-PIC ide1
- NMI: 0
+they are used for by looking in the file /proc/interrupts::
+
+ > cat /proc/interrupts
+ CPU0
+ 0: 8728810 XT-PIC timer
+ 1: 895 XT-PIC keyboard
+ 2: 0 XT-PIC cascade
+ 3: 531695 XT-PIC aha152x
+ 4: 2014133 XT-PIC serial
+ 5: 44401 XT-PIC pcnet_cs
+ 8: 2 XT-PIC rtc
+ 11: 8 XT-PIC i82365
+ 12: 182918 XT-PIC PS/2 Mouse
+ 13: 1 XT-PIC fpu
+ 14: 1232265 XT-PIC ide0
+ 15: 7 XT-PIC ide1
+ NMI: 0
In 2.4.* a couple of lines where added to this file LOC & ERR (this time is the
-output of a SMP machine):
+output of a SMP machine)::
- > cat /proc/interrupts
+ > cat /proc/interrupts
- CPU0 CPU1
+ CPU0 CPU1
0: 1243498 1214548 IO-APIC-edge timer
1: 8949 8958 IO-APIC-edge keyboard
2: 0 0 XT-PIC cascade
@@ -708,8 +744,8 @@ output of a SMP machine):
15: 2183 2415 IO-APIC-edge ide1
17: 30564 30414 IO-APIC-level eth0
18: 177 164 IO-APIC-level bttv
- NMI: 2457961 2457959
- LOC: 2457882 2457881
+ NMI: 2457961 2457959
+ LOC: 2457882 2457881
ERR: 2155
NMI is incremented in this case because every timer interrupt generates a NMI
@@ -726,21 +762,25 @@ In 2.6.2* /proc/interrupts was expanded again. This time the goal was for
/proc/interrupts to display every IRQ vector in use by the system, not
just those considered 'most important'. The new vectors are:
- THR -- interrupt raised when a machine check threshold counter
+THR
+ interrupt raised when a machine check threshold counter
(typically counting ECC corrected errors of memory or cache) exceeds
a configurable threshold. Only available on some systems.
- TRM -- a thermal event interrupt occurs when a temperature threshold
+TRM
+ a thermal event interrupt occurs when a temperature threshold
has been exceeded for the CPU. This interrupt may also be generated
when the temperature drops back to normal.
- SPU -- a spurious interrupt is some interrupt that was raised then lowered
+SPU
+ a spurious interrupt is some interrupt that was raised then lowered
by some IO device before it could be fully processed by the APIC. Hence
the APIC sees the interrupt but does not know what device it came from.
For this case the APIC will generate the interrupt with a IRQ vector
of 0xff. This might also be generated by chipset bugs.
- RES, CAL, TLB -- rescheduling, call and TLB flush interrupts are
+RES, CAL, TLB]
+ rescheduling, call and TLB flush interrupts are
sent from one CPU to another per the needs of the OS. Typically,
their statistics are used by kernel developers and interested users to
determine the occurrence of interrupts of the given type.
@@ -756,7 +796,8 @@ IRQ to only one CPU, or to exclude a CPU of handling IRQs. The contents of the
irq subdir is one subdir for each IRQ, and two files; default_smp_affinity and
prof_cpu_mask.
-For example
+For example::
+
> ls /proc/irq/
0 10 12 14 16 18 2 4 6 8 prof_cpu_mask
1 11 13 15 17 19 3 5 7 9 default_smp_affinity
@@ -764,20 +805,20 @@ For example
smp_affinity
smp_affinity is a bitmask, in which you can specify which CPUs can handle the
-IRQ, you can set it by doing:
+IRQ, you can set it by doing::
> echo 1 > /proc/irq/10/smp_affinity
This means that only the first CPU will handle the IRQ, but you can also echo
5 which means that only the first and third CPU can handle the IRQ.
-The contents of each smp_affinity file is the same by default:
+The contents of each smp_affinity file is the same by default::
> cat /proc/irq/0/smp_affinity
ffffffff
There is an alternate interface, smp_affinity_list which allows specifying
-a cpu range instead of a bitmask:
+a cpu range instead of a bitmask::
> cat /proc/irq/0/smp_affinity_list
1024-1031
@@ -810,46 +851,46 @@ Linux uses slab pools for memory management above page level in version 2.2.
Commonly used objects have their own slab pool (such as network buffers,
directory cache, and so on).
-..............................................................................
+::
-> cat /proc/buddyinfo
+ > cat /proc/buddyinfo
-Node 0, zone DMA 0 4 5 4 4 3 ...
-Node 0, zone Normal 1 0 0 1 101 8 ...
-Node 0, zone HighMem 2 0 0 1 1 0 ...
+ Node 0, zone DMA 0 4 5 4 4 3 ...
+ Node 0, zone Normal 1 0 0 1 101 8 ...
+ Node 0, zone HighMem 2 0 0 1 1 0 ...
External fragmentation is a problem under some workloads, and buddyinfo is a
-useful tool for helping diagnose these problems. Buddyinfo will give you a
+useful tool for helping diagnose these problems. Buddyinfo will give you a
clue as to how big an area you can safely allocate, or why a previous
allocation failed.
-Each column represents the number of pages of a certain order which are
-available. In this case, there are 0 chunks of 2^0*PAGE_SIZE available in
-ZONE_DMA, 4 chunks of 2^1*PAGE_SIZE in ZONE_DMA, 101 chunks of 2^4*PAGE_SIZE
-available in ZONE_NORMAL, etc...
+Each column represents the number of pages of a certain order which are
+available. In this case, there are 0 chunks of 2^0*PAGE_SIZE available in
+ZONE_DMA, 4 chunks of 2^1*PAGE_SIZE in ZONE_DMA, 101 chunks of 2^4*PAGE_SIZE
+available in ZONE_NORMAL, etc...
More information relevant to external fragmentation can be found in
-pagetypeinfo.
-
-> cat /proc/pagetypeinfo
-Page block order: 9
-Pages per block: 512
-
-Free pages count per migrate type at order 0 1 2 3 4 5 6 7 8 9 10
-Node 0, zone DMA, type Unmovable 0 0 0 1 1 1 1 1 1 1 0
-Node 0, zone DMA, type Reclaimable 0 0 0 0 0 0 0 0 0 0 0
-Node 0, zone DMA, type Movable 1 1 2 1 2 1 1 0 1 0 2
-Node 0, zone DMA, type Reserve 0 0 0 0 0 0 0 0 0 1 0
-Node 0, zone DMA, type Isolate 0 0 0 0 0 0 0 0 0 0 0
-Node 0, zone DMA32, type Unmovable 103 54 77 1 1 1 11 8 7 1 9
-Node 0, zone DMA32, type Reclaimable 0 0 2 1 0 0 0 0 1 0 0
-Node 0, zone DMA32, type Movable 169 152 113 91 77 54 39 13 6 1 452
-Node 0, zone DMA32, type Reserve 1 2 2 2 2 0 1 1 1 1 0
-Node 0, zone DMA32, type Isolate 0 0 0 0 0 0 0 0 0 0 0
-
-Number of blocks type Unmovable Reclaimable Movable Reserve Isolate
-Node 0, zone DMA 2 0 5 1 0
-Node 0, zone DMA32 41 6 967 2 0
+pagetypeinfo::
+
+ > cat /proc/pagetypeinfo
+ Page block order: 9
+ Pages per block: 512
+
+ Free pages count per migrate type at order 0 1 2 3 4 5 6 7 8 9 10
+ Node 0, zone DMA, type Unmovable 0 0 0 1 1 1 1 1 1 1 0
+ Node 0, zone DMA, type Reclaimable 0 0 0 0 0 0 0 0 0 0 0
+ Node 0, zone DMA, type Movable 1 1 2 1 2 1 1 0 1 0 2
+ Node 0, zone DMA, type Reserve 0 0 0 0 0 0 0 0 0 1 0
+ Node 0, zone DMA, type Isolate 0 0 0 0 0 0 0 0 0 0 0
+ Node 0, zone DMA32, type Unmovable 103 54 77 1 1 1 11 8 7 1 9
+ Node 0, zone DMA32, type Reclaimable 0 0 2 1 0 0 0 0 1 0 0
+ Node 0, zone DMA32, type Movable 169 152 113 91 77 54 39 13 6 1 452
+ Node 0, zone DMA32, type Reserve 1 2 2 2 2 0 1 1 1 1 0
+ Node 0, zone DMA32, type Isolate 0 0 0 0 0 0 0 0 0 0 0
+
+ Number of blocks type Unmovable Reclaimable Movable Reserve Isolate
+ Node 0, zone DMA 2 0 5 1 0
+ Node 0, zone DMA32 41 6 967 2 0
Fragmentation avoidance in the kernel works by grouping pages of different
migrate types into the same contiguous regions of memory called page blocks.
@@ -870,59 +911,63 @@ unless memory has been mlock()'d. Some of the Reclaimable blocks should
also be allocatable although a lot of filesystem metadata may have to be
reclaimed to achieve this.
-..............................................................................
-meminfo:
+meminfo
+~~~~~~~
Provides information about distribution and utilization of memory. This
varies by architecture and compile options. The following is from a
16GB PIII, which has highmem enabled. You may not have all of these fields.
-> cat /proc/meminfo
-
-MemTotal: 16344972 kB
-MemFree: 13634064 kB
-MemAvailable: 14836172 kB
-Buffers: 3656 kB
-Cached: 1195708 kB
-SwapCached: 0 kB
-Active: 891636 kB
-Inactive: 1077224 kB
-HighTotal: 15597528 kB
-HighFree: 13629632 kB
-LowTotal: 747444 kB
-LowFree: 4432 kB
-SwapTotal: 0 kB
-SwapFree: 0 kB
-Dirty: 968 kB
-Writeback: 0 kB
-AnonPages: 861800 kB
-Mapped: 280372 kB
-Shmem: 644 kB
-KReclaimable: 168048 kB
-Slab: 284364 kB
-SReclaimable: 159856 kB
-SUnreclaim: 124508 kB
-PageTables: 24448 kB
-NFS_Unstable: 0 kB
-Bounce: 0 kB
-WritebackTmp: 0 kB
-CommitLimit: 7669796 kB
-Committed_AS: 100056 kB
-VmallocTotal: 112216 kB
-VmallocUsed: 428 kB
-VmallocChunk: 111088 kB
-Percpu: 62080 kB
-HardwareCorrupted: 0 kB
-AnonHugePages: 49152 kB
-ShmemHugePages: 0 kB
-ShmemPmdMapped: 0 kB
-
-
- MemTotal: Total usable ram (i.e. physical ram minus a few reserved
+::
+
+ > cat /proc/meminfo
+
+ MemTotal: 16344972 kB
+ MemFree: 13634064 kB
+ MemAvailable: 14836172 kB
+ Buffers: 3656 kB
+ Cached: 1195708 kB
+ SwapCached: 0 kB
+ Active: 891636 kB
+ Inactive: 1077224 kB
+ HighTotal: 15597528 kB
+ HighFree: 13629632 kB
+ LowTotal: 747444 kB
+ LowFree: 4432 kB
+ SwapTotal: 0 kB
+ SwapFree: 0 kB
+ Dirty: 968 kB
+ Writeback: 0 kB
+ AnonPages: 861800 kB
+ Mapped: 280372 kB
+ Shmem: 644 kB
+ KReclaimable: 168048 kB
+ Slab: 284364 kB
+ SReclaimable: 159856 kB
+ SUnreclaim: 124508 kB
+ PageTables: 24448 kB
+ NFS_Unstable: 0 kB
+ Bounce: 0 kB
+ WritebackTmp: 0 kB
+ CommitLimit: 7669796 kB
+ Committed_AS: 100056 kB
+ VmallocTotal: 112216 kB
+ VmallocUsed: 428 kB
+ VmallocChunk: 111088 kB
+ Percpu: 62080 kB
+ HardwareCorrupted: 0 kB
+ AnonHugePages: 49152 kB
+ ShmemHugePages: 0 kB
+ ShmemPmdMapped: 0 kB
+
+MemTotal
+ Total usable ram (i.e. physical ram minus a few reserved
bits and the kernel binary code)
- MemFree: The sum of LowFree+HighFree
-MemAvailable: An estimate of how much memory is available for starting new
+MemFree
+ The sum of LowFree+HighFree
+MemAvailable
+ An estimate of how much memory is available for starting new
applications, without swapping. Calculated from MemFree,
SReclaimable, the size of the file LRU lists, and the low
watermarks in each zone.
@@ -930,69 +975,99 @@ MemAvailable: An estimate of how much memory is available for starting new
page cache to function well, and that not all reclaimable
slab will be reclaimable, due to items being in use. The
impact of those factors will vary from system to system.
- Buffers: Relatively temporary storage for raw disk blocks
+Buffers
+ Relatively temporary storage for raw disk blocks
shouldn't get tremendously large (20MB or so)
- Cached: in-memory cache for files read from the disk (the
+Cached
+ in-memory cache for files read from the disk (the
pagecache). Doesn't include SwapCached
- SwapCached: Memory that once was swapped out, is swapped back in but
+SwapCached
+ Memory that once was swapped out, is swapped back in but
still also is in the swapfile (if memory is needed it
doesn't need to be swapped out AGAIN because it is already
in the swapfile. This saves I/O)
- Active: Memory that has been used more recently and usually not
+Active
+ Memory that has been used more recently and usually not
reclaimed unless absolutely necessary.
- Inactive: Memory which has been less recently used. It is more
+Inactive
+ Memory which has been less recently used. It is more
eligible to be reclaimed for other purposes
- HighTotal:
- HighFree: Highmem is all memory above ~860MB of physical memory
+HighTotal, HighFree
+ Highmem is all memory above ~860MB of physical memory
Highmem areas are for use by userspace programs, or
for the pagecache. The kernel must use tricks to access
this memory, making it slower to access than lowmem.
- LowTotal:
- LowFree: Lowmem is memory which can be used for everything that
+LowTotal, LowFree
+ Lowmem is memory which can be used for everything that
highmem can be used for, but it is also available for the
kernel's use for its own data structures. Among many
other things, it is where everything from the Slab is
allocated. Bad things happen when you're out of lowmem.
- SwapTotal: total amount of swap space available
- SwapFree: Memory which has been evicted from RAM, and is temporarily
+SwapTotal
+ total amount of swap space available
+SwapFree
+ Memory which has been evicted from RAM, and is temporarily
on the disk
- Dirty: Memory which is waiting to get written back to the disk
- Writeback: Memory which is actively being written back to the disk
- AnonPages: Non-file backed pages mapped into userspace page tables
-HardwareCorrupted: The amount of RAM/memory in KB, the kernel identifies as
+Dirty
+ Memory which is waiting to get written back to the disk
+Writeback
+ Memory which is actively being written back to the disk
+AnonPages
+ Non-file backed pages mapped into userspace page tables
+HardwareCorrupted
+ The amount of RAM/memory in KB, the kernel identifies as
corrupted.
-AnonHugePages: Non-file backed huge pages mapped into userspace page tables
- Mapped: files which have been mmaped, such as libraries
- Shmem: Total memory used by shared memory (shmem) and tmpfs
-ShmemHugePages: Memory used by shared memory (shmem) and tmpfs allocated
+AnonHugePages
+ Non-file backed huge pages mapped into userspace page tables
+Mapped
+ files which have been mmaped, such as libraries
+Shmem
+ Total memory used by shared memory (shmem) and tmpfs
+ShmemHugePages
+ Memory used by shared memory (shmem) and tmpfs allocated
with huge pages
-ShmemPmdMapped: Shared memory mapped into userspace with huge pages
-KReclaimable: Kernel allocations that the kernel will attempt to reclaim
+ShmemPmdMapped
+ Shared memory mapped into userspace with huge pages
+KReclaimable
+ Kernel allocations that the kernel will attempt to reclaim
under memory pressure. Includes SReclaimable (below), and other
direct allocations with a shrinker.
- Slab: in-kernel data structures cache
-SReclaimable: Part of Slab, that might be reclaimed, such as caches
- SUnreclaim: Part of Slab, that cannot be reclaimed on memory pressure
- PageTables: amount of memory dedicated to the lowest level of page
+Slab
+ in-kernel data structures cache
+SReclaimable
+ Part of Slab, that might be reclaimed, such as caches
+SUnreclaim
+ Part of Slab, that cannot be reclaimed on memory pressure
+PageTables
+ amount of memory dedicated to the lowest level of page
tables.
-NFS_Unstable: NFS pages sent to the server, but not yet committed to stable
+NFS_Unstable
+ NFS pages sent to the server, but not yet committed to stable
storage
- Bounce: Memory used for block device "bounce buffers"
-WritebackTmp: Memory used by FUSE for temporary writeback buffers
- CommitLimit: Based on the overcommit ratio ('vm.overcommit_ratio'),
+Bounce
+ Memory used for block device "bounce buffers"
+WritebackTmp
+ Memory used by FUSE for temporary writeback buffers
+CommitLimit
+ Based on the overcommit ratio ('vm.overcommit_ratio'),
this is the total amount of memory currently available to
be allocated on the system. This limit is only adhered to
if strict overcommit accounting is enabled (mode 2 in
'vm.overcommit_memory').
- The CommitLimit is calculated with the following formula:
- CommitLimit = ([total RAM pages] - [total huge TLB pages]) *
- overcommit_ratio / 100 + [total swap pages]
+
+ The CommitLimit is calculated with the following formula::
+
+ CommitLimit = ([total RAM pages] - [total huge TLB pages]) *
+ overcommit_ratio / 100 + [total swap pages]
+
For example, on a system with 1G of physical RAM and 7G
of swap with a `vm.overcommit_ratio` of 30 it would
yield a CommitLimit of 7.3G.
+
For more details, see the memory overcommit documentation
in vm/overcommit-accounting.
-Committed_AS: The amount of memory presently allocated on the system.
+Committed_AS
+ The amount of memory presently allocated on the system.
The committed memory is a sum of all of the memory which
has been allocated by processes, even if it has not been
"used" by them as of yet. A process which malloc()'s 1G
@@ -1005,21 +1080,25 @@ Committed_AS: The amount of memory presently allocated on the system.
This is useful if one needs to guarantee that processes will
not fail due to lack of memory once that memory has been
successfully allocated.
-VmallocTotal: total size of vmalloc memory area
- VmallocUsed: amount of vmalloc area which is used
-VmallocChunk: largest contiguous block of vmalloc area which is free
- Percpu: Memory allocated to the percpu allocator used to back percpu
+VmallocTotal
+ total size of vmalloc memory area
+VmallocUsed
+ amount of vmalloc area which is used
+VmallocChunk
+ largest contiguous block of vmalloc area which is free
+Percpu
+ Memory allocated to the percpu allocator used to back percpu
allocations. This stat excludes the cost of metadata.
-..............................................................................
-
-vmallocinfo:
+vmallocinfo
+~~~~~~~~~~~
Provides information about vmalloced/vmaped areas. One line per area,
containing the virtual address range of the area, size in bytes,
caller information of the creator, and optional information depending
on the kind of area :
+ ========== ===================================================
pages=nr number of pages
phys=addr if a physical address was specified
ioremap I/O mapping (ioremap() and friends)
@@ -1029,49 +1108,54 @@ on the kind of area :
vpages buffer for pages pointers was vmalloced (huge area)
N<node>=nr (Only on NUMA kernels)
Number of pages allocated on memory node <node>
-
-> cat /proc/vmallocinfo
-0xffffc20000000000-0xffffc20000201000 2101248 alloc_large_system_hash+0x204 ...
- /0x2c0 pages=512 vmalloc N0=128 N1=128 N2=128 N3=128
-0xffffc20000201000-0xffffc20000302000 1052672 alloc_large_system_hash+0x204 ...
- /0x2c0 pages=256 vmalloc N0=64 N1=64 N2=64 N3=64
-0xffffc20000302000-0xffffc20000304000 8192 acpi_tb_verify_table+0x21/0x4f...
- phys=7fee8000 ioremap
-0xffffc20000304000-0xffffc20000307000 12288 acpi_tb_verify_table+0x21/0x4f...
- phys=7fee7000 ioremap
-0xffffc2000031d000-0xffffc2000031f000 8192 init_vdso_vars+0x112/0x210
-0xffffc2000031f000-0xffffc2000032b000 49152 cramfs_uncompress_init+0x2e ...
- /0x80 pages=11 vmalloc N0=3 N1=3 N2=2 N3=3
-0xffffc2000033a000-0xffffc2000033d000 12288 sys_swapon+0x640/0xac0 ...
- pages=2 vmalloc N1=2
-0xffffc20000347000-0xffffc2000034c000 20480 xt_alloc_table_info+0xfe ...
- /0x130 [x_tables] pages=4 vmalloc N0=4
-0xffffffffa0000000-0xffffffffa000f000 61440 sys_init_module+0xc27/0x1d00 ...
- pages=14 vmalloc N2=14
-0xffffffffa000f000-0xffffffffa0014000 20480 sys_init_module+0xc27/0x1d00 ...
- pages=4 vmalloc N1=4
-0xffffffffa0014000-0xffffffffa0017000 12288 sys_init_module+0xc27/0x1d00 ...
- pages=2 vmalloc N1=2
-0xffffffffa0017000-0xffffffffa0022000 45056 sys_init_module+0xc27/0x1d00 ...
- pages=10 vmalloc N0=10
-
-..............................................................................
-
-softirqs:
+ ========== ===================================================
+
+::
+
+ > cat /proc/vmallocinfo
+ 0xffffc20000000000-0xffffc20000201000 2101248 alloc_large_system_hash+0x204 ...
+ /0x2c0 pages=512 vmalloc N0=128 N1=128 N2=128 N3=128
+ 0xffffc20000201000-0xffffc20000302000 1052672 alloc_large_system_hash+0x204 ...
+ /0x2c0 pages=256 vmalloc N0=64 N1=64 N2=64 N3=64
+ 0xffffc20000302000-0xffffc20000304000 8192 acpi_tb_verify_table+0x21/0x4f...
+ phys=7fee8000 ioremap
+ 0xffffc20000304000-0xffffc20000307000 12288 acpi_tb_verify_table+0x21/0x4f...
+ phys=7fee7000 ioremap
+ 0xffffc2000031d000-0xffffc2000031f000 8192 init_vdso_vars+0x112/0x210
+ 0xffffc2000031f000-0xffffc2000032b000 49152 cramfs_uncompress_init+0x2e ...
+ /0x80 pages=11 vmalloc N0=3 N1=3 N2=2 N3=3
+ 0xffffc2000033a000-0xffffc2000033d000 12288 sys_swapon+0x640/0xac0 ...
+ pages=2 vmalloc N1=2
+ 0xffffc20000347000-0xffffc2000034c000 20480 xt_alloc_table_info+0xfe ...
+ /0x130 [x_tables] pages=4 vmalloc N0=4
+ 0xffffffffa0000000-0xffffffffa000f000 61440 sys_init_module+0xc27/0x1d00 ...
+ pages=14 vmalloc N2=14
+ 0xffffffffa000f000-0xffffffffa0014000 20480 sys_init_module+0xc27/0x1d00 ...
+ pages=4 vmalloc N1=4
+ 0xffffffffa0014000-0xffffffffa0017000 12288 sys_init_module+0xc27/0x1d00 ...
+ pages=2 vmalloc N1=2
+ 0xffffffffa0017000-0xffffffffa0022000 45056 sys_init_module+0xc27/0x1d00 ...
+ pages=10 vmalloc N0=10
+
+
+softirqs
+~~~~~~~~
Provides counts of softirq handlers serviced since boot time, for each cpu.
-> cat /proc/softirqs
- CPU0 CPU1 CPU2 CPU3
- HI: 0 0 0 0
- TIMER: 27166 27120 27097 27034
- NET_TX: 0 0 0 17
- NET_RX: 42 0 0 39
- BLOCK: 0 0 107 1121
- TASKLET: 0 0 0 290
- SCHED: 27035 26983 26971 26746
- HRTIMER: 0 0 0 0
- RCU: 1678 1769 2178 2250
+::
+
+ > cat /proc/softirqs
+ CPU0 CPU1 CPU2 CPU3
+ HI: 0 0 0 0
+ TIMER: 27166 27120 27097 27034
+ NET_TX: 0 0 0 17
+ NET_RX: 42 0 0 39
+ BLOCK: 0 0 107 1121
+ TASKLET: 0 0 0 290
+ SCHED: 27035 26983 26971 26746
+ HRTIMER: 0 0 0 0
+ RCU: 1678 1769 2178 2250
1.3 IDE devices in /proc/ide
@@ -1083,7 +1167,7 @@ file drivers and a link for each IDE device, pointing to the device directory
in the controller specific subtree.
The file drivers contains general information about the drivers used for the
-IDE devices:
+IDE devices::
> cat /proc/ide/drivers
ide-cdrom version 4.53
@@ -1094,57 +1178,61 @@ subdirectories. These are named ide0, ide1 and so on. Each of these
directories contains the files shown in table 1-6.
-Table 1-6: IDE controller info in /proc/ide/ide?
-..............................................................................
- File Content
- channel IDE channel (0 or 1)
- config Configuration (only for PCI/IDE bridge)
- mate Mate name
- model Type/Chipset of IDE controller
-..............................................................................
+.. table:: Table 1-6: IDE controller info in /proc/ide/ide?
+
+ ======= =======================================
+ File Content
+ ======= =======================================
+ channel IDE channel (0 or 1)
+ config Configuration (only for PCI/IDE bridge)
+ mate Mate name
+ model Type/Chipset of IDE controller
+ ======= =======================================
Each device connected to a controller has a separate subdirectory in the
controllers directory. The files listed in table 1-7 are contained in these
directories.
-Table 1-7: IDE device information
-..............................................................................
- File Content
- cache The cache
- capacity Capacity of the medium (in 512Byte blocks)
- driver driver and version
- geometry physical and logical geometry
- identify device identify block
- media media type
- model device identifier
- settings device setup
- smart_thresholds IDE disk management thresholds
- smart_values IDE disk management values
-..............................................................................
-
-The most interesting file is settings. This file contains a nice overview of
-the drive parameters:
-
- # cat /proc/ide/ide0/hda/settings
- name value min max mode
- ---- ----- --- --- ----
- bios_cyl 526 0 65535 rw
- bios_head 255 0 255 rw
- bios_sect 63 0 63 rw
- breada_readahead 4 0 127 rw
- bswap 0 0 1 r
- file_readahead 72 0 2097151 rw
- io_32bit 0 0 3 rw
- keepsettings 0 0 1 rw
- max_kb_per_request 122 1 127 rw
- multcount 0 0 8 rw
- nice1 1 0 1 rw
- nowerr 0 0 1 rw
- pio_mode write-only 0 255 w
- slow 0 0 1 rw
- unmaskirq 0 0 1 rw
- using_dma 0 0 1 rw
+.. table:: Table 1-7: IDE device information
+
+ ================ ==========================================
+ File Content
+ ================ ==========================================
+ cache The cache
+ capacity Capacity of the medium (in 512Byte blocks)
+ driver driver and version
+ geometry physical and logical geometry
+ identify device identify block
+ media media type
+ model device identifier
+ settings device setup
+ smart_thresholds IDE disk management thresholds
+ smart_values IDE disk management values
+ ================ ==========================================
+
+The most interesting file is ``settings``. This file contains a nice
+overview of the drive parameters::
+
+ # cat /proc/ide/ide0/hda/settings
+ name value min max mode
+ ---- ----- --- --- ----
+ bios_cyl 526 0 65535 rw
+ bios_head 255 0 255 rw
+ bios_sect 63 0 63 rw
+ breada_readahead 4 0 127 rw
+ bswap 0 0 1 r
+ file_readahead 72 0 2097151 rw
+ io_32bit 0 0 3 rw
+ keepsettings 0 0 1 rw
+ max_kb_per_request 122 1 127 rw
+ multcount 0 0 8 rw
+ nice1 1 0 1 rw
+ nowerr 0 0 1 rw
+ pio_mode write-only 0 255 w
+ slow 0 0 1 rw
+ unmaskirq 0 0 1 rw
+ using_dma 0 0 1 rw
1.4 Networking info in /proc/net
@@ -1155,67 +1243,70 @@ additional values you get for IP version 6 if you configure the kernel to
support this. Table 1-9 lists the files and their meaning.
-Table 1-8: IPv6 info in /proc/net
-..............................................................................
- File Content
- udp6 UDP sockets (IPv6)
- tcp6 TCP sockets (IPv6)
- raw6 Raw device statistics (IPv6)
- igmp6 IP multicast addresses, which this host joined (IPv6)
- if_inet6 List of IPv6 interface addresses
- ipv6_route Kernel routing table for IPv6
- rt6_stats Global IPv6 routing tables statistics
- sockstat6 Socket statistics (IPv6)
- snmp6 Snmp data (IPv6)
-..............................................................................
-
-
-Table 1-9: Network info in /proc/net
-..............................................................................
- File Content
- arp Kernel ARP table
- dev network devices with statistics
+.. table:: Table 1-8: IPv6 info in /proc/net
+
+ ========== =====================================================
+ File Content
+ ========== =====================================================
+ udp6 UDP sockets (IPv6)
+ tcp6 TCP sockets (IPv6)
+ raw6 Raw device statistics (IPv6)
+ igmp6 IP multicast addresses, which this host joined (IPv6)
+ if_inet6 List of IPv6 interface addresses
+ ipv6_route Kernel routing table for IPv6
+ rt6_stats Global IPv6 routing tables statistics
+ sockstat6 Socket statistics (IPv6)
+ snmp6 Snmp data (IPv6)
+ ========== =====================================================
+
+.. table:: Table 1-9: Network info in /proc/net
+
+ ============= ================================================================
+ File Content
+ ============= ================================================================
+ arp Kernel ARP table
+ dev network devices with statistics
dev_mcast the Layer2 multicast groups a device is listening too
(interface index, label, number of references, number of bound
- addresses).
- dev_stat network device status
- ip_fwchains Firewall chain linkage
- ip_fwnames Firewall chain names
- ip_masq Directory containing the masquerading tables
- ip_masquerade Major masquerading table
- netstat Network statistics
- raw raw device statistics
- route Kernel routing table
- rpc Directory containing rpc info
- rt_cache Routing cache
- snmp SNMP data
- sockstat Socket statistics
- tcp TCP sockets
- udp UDP sockets
- unix UNIX domain sockets
- wireless Wireless interface data (Wavelan etc)
- igmp IP multicast addresses, which this host joined
- psched Global packet scheduler parameters.
- netlink List of PF_NETLINK sockets
- ip_mr_vifs List of multicast virtual interfaces
- ip_mr_cache List of multicast routing cache
-..............................................................................
+ addresses).
+ dev_stat network device status
+ ip_fwchains Firewall chain linkage
+ ip_fwnames Firewall chain names
+ ip_masq Directory containing the masquerading tables
+ ip_masquerade Major masquerading table
+ netstat Network statistics
+ raw raw device statistics
+ route Kernel routing table
+ rpc Directory containing rpc info
+ rt_cache Routing cache
+ snmp SNMP data
+ sockstat Socket statistics
+ tcp TCP sockets
+ udp UDP sockets
+ unix UNIX domain sockets
+ wireless Wireless interface data (Wavelan etc)
+ igmp IP multicast addresses, which this host joined
+ psched Global packet scheduler parameters.
+ netlink List of PF_NETLINK sockets
+ ip_mr_vifs List of multicast virtual interfaces
+ ip_mr_cache List of multicast routing cache
+ ============= ================================================================
You can use this information to see which network devices are available in
-your system and how much traffic was routed over those devices:
-
- > cat /proc/net/dev
- Inter-|Receive |[...
- face |bytes packets errs drop fifo frame compressed multicast|[...
- lo: 908188 5596 0 0 0 0 0 0 [...
- ppp0:15475140 20721 410 0 0 410 0 0 [...
- eth0: 614530 7085 0 0 0 0 0 1 [...
-
- ...] Transmit
- ...] bytes packets errs drop fifo colls carrier compressed
- ...] 908188 5596 0 0 0 0 0 0
- ...] 1375103 17405 0 0 0 0 0 0
- ...] 1703981 5535 0 0 0 3 0 0
+your system and how much traffic was routed over those devices::
+
+ > cat /proc/net/dev
+ Inter-|Receive |[...
+ face |bytes packets errs drop fifo frame compressed multicast|[...
+ lo: 908188 5596 0 0 0 0 0 0 [...
+ ppp0:15475140 20721 410 0 0 410 0 0 [...
+ eth0: 614530 7085 0 0 0 0 0 1 [...
+
+ ...] Transmit
+ ...] bytes packets errs drop fifo colls carrier compressed
+ ...] 908188 5596 0 0 0 0 0 0
+ ...] 1375103 17405 0 0 0 0 0 0
+ ...] 1703981 5535 0 0 0 3 0 0
In addition, each Channel Bond interface has its own directory. For
example, the bond0 device will have a directory called /proc/net/bond0/.
@@ -1228,62 +1319,62 @@ many times the slaves link has failed.
If you have a SCSI host adapter in your system, you'll find a subdirectory
named after the driver for this adapter in /proc/scsi. You'll also see a list
-of all recognized SCSI devices in /proc/scsi:
+of all recognized SCSI devices in /proc/scsi::
- >cat /proc/scsi/scsi
- Attached devices:
- Host: scsi0 Channel: 00 Id: 00 Lun: 00
- Vendor: IBM Model: DGHS09U Rev: 03E0
- Type: Direct-Access ANSI SCSI revision: 03
- Host: scsi0 Channel: 00 Id: 06 Lun: 00
- Vendor: PIONEER Model: CD-ROM DR-U06S Rev: 1.04
- Type: CD-ROM ANSI SCSI revision: 02
+ >cat /proc/scsi/scsi
+ Attached devices:
+ Host: scsi0 Channel: 00 Id: 00 Lun: 00
+ Vendor: IBM Model: DGHS09U Rev: 03E0
+ Type: Direct-Access ANSI SCSI revision: 03
+ Host: scsi0 Channel: 00 Id: 06 Lun: 00
+ Vendor: PIONEER Model: CD-ROM DR-U06S Rev: 1.04
+ Type: CD-ROM ANSI SCSI revision: 02
The directory named after the driver has one file for each adapter found in
the system. These files contain information about the controller, including
the used IRQ and the IO address range. The amount of information shown is
dependent on the adapter you use. The example shows the output for an Adaptec
-AHA-2940 SCSI adapter:
-
- > cat /proc/scsi/aic7xxx/0
-
- Adaptec AIC7xxx driver version: 5.1.19/3.2.4
- Compile Options:
- TCQ Enabled By Default : Disabled
- AIC7XXX_PROC_STATS : Disabled
- AIC7XXX_RESET_DELAY : 5
- Adapter Configuration:
- SCSI Adapter: Adaptec AHA-294X Ultra SCSI host adapter
- Ultra Wide Controller
- PCI MMAPed I/O Base: 0xeb001000
- Adapter SEEPROM Config: SEEPROM found and used.
- Adaptec SCSI BIOS: Enabled
- IRQ: 10
- SCBs: Active 0, Max Active 2,
- Allocated 15, HW 16, Page 255
- Interrupts: 160328
- BIOS Control Word: 0x18b6
- Adapter Control Word: 0x005b
- Extended Translation: Enabled
- Disconnect Enable Flags: 0xffff
- Ultra Enable Flags: 0x0001
- Tag Queue Enable Flags: 0x0000
- Ordered Queue Tag Flags: 0x0000
- Default Tag Queue Depth: 8
- Tagged Queue By Device array for aic7xxx host instance 0:
- {255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255}
- Actual queue depth per device for aic7xxx host instance 0:
- {1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}
- Statistics:
- (scsi0:0:0:0)
- Device using Wide/Sync transfers at 40.0 MByte/sec, offset 8
- Transinfo settings: current(12/8/1/0), goal(12/8/1/0), user(12/15/1/0)
- Total transfers 160151 (74577 reads and 85574 writes)
- (scsi0:0:6:0)
- Device using Narrow/Sync transfers at 5.0 MByte/sec, offset 15
- Transinfo settings: current(50/15/0/0), goal(50/15/0/0), user(50/15/0/0)
- Total transfers 0 (0 reads and 0 writes)
+AHA-2940 SCSI adapter::
+
+ > cat /proc/scsi/aic7xxx/0
+
+ Adaptec AIC7xxx driver version: 5.1.19/3.2.4
+ Compile Options:
+ TCQ Enabled By Default : Disabled
+ AIC7XXX_PROC_STATS : Disabled
+ AIC7XXX_RESET_DELAY : 5
+ Adapter Configuration:
+ SCSI Adapter: Adaptec AHA-294X Ultra SCSI host adapter
+ Ultra Wide Controller
+ PCI MMAPed I/O Base: 0xeb001000
+ Adapter SEEPROM Config: SEEPROM found and used.
+ Adaptec SCSI BIOS: Enabled
+ IRQ: 10
+ SCBs: Active 0, Max Active 2,
+ Allocated 15, HW 16, Page 255
+ Interrupts: 160328
+ BIOS Control Word: 0x18b6
+ Adapter Control Word: 0x005b
+ Extended Translation: Enabled
+ Disconnect Enable Flags: 0xffff
+ Ultra Enable Flags: 0x0001
+ Tag Queue Enable Flags: 0x0000
+ Ordered Queue Tag Flags: 0x0000
+ Default Tag Queue Depth: 8
+ Tagged Queue By Device array for aic7xxx host instance 0:
+ {255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255}
+ Actual queue depth per device for aic7xxx host instance 0:
+ {1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}
+ Statistics:
+ (scsi0:0:0:0)
+ Device using Wide/Sync transfers at 40.0 MByte/sec, offset 8
+ Transinfo settings: current(12/8/1/0), goal(12/8/1/0), user(12/15/1/0)
+ Total transfers 160151 (74577 reads and 85574 writes)
+ (scsi0:0:6:0)
+ Device using Narrow/Sync transfers at 5.0 MByte/sec, offset 15
+ Transinfo settings: current(50/15/0/0), goal(50/15/0/0), user(50/15/0/0)
+ Total transfers 0 (0 reads and 0 writes)
1.6 Parallel port info in /proc/parport
@@ -1296,18 +1387,20 @@ number (0,1,2,...).
These directories contain the four files shown in Table 1-10.
-Table 1-10: Files in /proc/parport
-..............................................................................
- File Content
- autoprobe Any IEEE-1284 device ID information that has been acquired.
+.. table:: Table 1-10: Files in /proc/parport
+
+ ========= ====================================================================
+ File Content
+ ========= ====================================================================
+ autoprobe Any IEEE-1284 device ID information that has been acquired.
devices list of the device drivers using that port. A + will appear by the
name of the device currently using the port (it might not appear
- against any).
- hardware Parallel port's base address, IRQ line and DMA channel.
+ against any).
+ hardware Parallel port's base address, IRQ line and DMA channel.
irq IRQ that parport is using for that port. This is in a separate
file to allow you to alter it by writing a new value in (IRQ
- number or none).
-..............................................................................
+ number or none).
+ ========= ====================================================================
1.7 TTY info in /proc/tty
-------------------------
@@ -1317,29 +1410,31 @@ directory /proc/tty.You'll find entries for drivers and line disciplines in
this directory, as shown in Table 1-11.
-Table 1-11: Files in /proc/tty
-..............................................................................
- File Content
- drivers list of drivers and their usage
- ldiscs registered line disciplines
- driver/serial usage statistic and status of single tty lines
-..............................................................................
+.. table:: Table 1-11: Files in /proc/tty
+
+ ============= ==============================================
+ File Content
+ ============= ==============================================
+ drivers list of drivers and their usage
+ ldiscs registered line disciplines
+ driver/serial usage statistic and status of single tty lines
+ ============= ==============================================
To see which tty's are currently in use, you can simply look into the file
-/proc/tty/drivers:
-
- > cat /proc/tty/drivers
- pty_slave /dev/pts 136 0-255 pty:slave
- pty_master /dev/ptm 128 0-255 pty:master
- pty_slave /dev/ttyp 3 0-255 pty:slave
- pty_master /dev/pty 2 0-255 pty:master
- serial /dev/cua 5 64-67 serial:callout
- serial /dev/ttyS 4 64-67 serial
- /dev/tty0 /dev/tty0 4 0 system:vtmaster
- /dev/ptmx /dev/ptmx 5 2 system
- /dev/console /dev/console 5 1 system:console
- /dev/tty /dev/tty 5 0 system:/dev/tty
- unknown /dev/tty 4 1-63 console
+/proc/tty/drivers::
+
+ > cat /proc/tty/drivers
+ pty_slave /dev/pts 136 0-255 pty:slave
+ pty_master /dev/ptm 128 0-255 pty:master
+ pty_slave /dev/ttyp 3 0-255 pty:slave
+ pty_master /dev/pty 2 0-255 pty:master
+ serial /dev/cua 5 64-67 serial:callout
+ serial /dev/ttyS 4 64-67 serial
+ /dev/tty0 /dev/tty0 4 0 system:vtmaster
+ /dev/ptmx /dev/ptmx 5 2 system
+ /dev/console /dev/console 5 1 system:console
+ /dev/tty /dev/tty 5 0 system:/dev/tty
+ unknown /dev/tty 4 1-63 console
1.8 Miscellaneous kernel statistics in /proc/stat
@@ -1347,7 +1442,7 @@ To see which tty's are currently in use, you can simply look into the file
Various pieces of information about kernel activity are available in the
/proc/stat file. All of the numbers reported in this file are aggregates
-since the system first booted. For a quick look, simply cat the file:
+since the system first booted. For a quick look, simply cat the file::
> cat /proc/stat
cpu 2255 34 2290 22625563 6290 127 456 0 0 0
@@ -1372,6 +1467,7 @@ second). The meanings of the columns are as follows, from left to right:
- idle: twiddling thumbs
- iowait: In a word, iowait stands for waiting for I/O to complete. But there
are several problems:
+
1. Cpu will not wait for I/O to complete, iowait is the time that a task is
waiting for I/O to complete. When cpu goes into idle state for
outstanding task io, another task will be scheduled on this CPU.
@@ -1379,6 +1475,7 @@ second). The meanings of the columns are as follows, from left to right:
on any CPU, so the iowait of each CPU is difficult to calculate.
3. The value of iowait field in /proc/stat will decrease in certain
conditions.
+
So, the iowait is not reliable by reading from /proc/stat.
- irq: servicing interrupts
- softirq: servicing softirqs
@@ -1422,18 +1519,19 @@ Information about mounted ext4 file systems can be found in
/proc/fs/ext4/dm-0). The files in each per-device directory are shown
in Table 1-12, below.
-Table 1-12: Files in /proc/fs/ext4/<devname>
-..............................................................................
- File Content
+.. table:: Table 1-12: Files in /proc/fs/ext4/<devname>
+
+ ============== ==========================================================
+ File Content
mb_groups details of multiblock allocator buddy cache of free blocks
-..............................................................................
+ ============== ==========================================================
2.0 /proc/consoles
------------------
Shows registered system console lines.
To see which character device lines are currently used for the system console
-/dev/console, you may simply look into the file /proc/consoles:
+/dev/console, you may simply look into the file /proc/consoles::
> cat /proc/consoles
tty0 -WU (ECp) 4:7
@@ -1441,41 +1539,45 @@ To see which character device lines are currently used for the system console
The columns are:
- device name of the device
- operations R = can do read operations
- W = can do write operations
- U = can do unblank
- flags E = it is enabled
- C = it is preferred console
- B = it is primary boot console
- p = it is used for printk buffer
- b = it is not a TTY but a Braille device
- a = it is safe to use when cpu is offline
- major:minor major and minor number of the device separated by a colon
++--------------------+-------------------------------------------------------+
+| device | name of the device |
++====================+=======================================================+
+| operations | * R = can do read operations |
+| | * W = can do write operations |
+| | * U = can do unblank |
++--------------------+-------------------------------------------------------+
+| flags | * E = it is enabled |
+| | * C = it is preferred console |
+| | * B = it is primary boot console |
+| | * p = it is used for printk buffer |
+| | * b = it is not a TTY but a Braille device |
+| | * a = it is safe to use when cpu is offline |
++--------------------+-------------------------------------------------------+
+| major:minor | major and minor number of the device separated by a |
+| | colon |
++--------------------+-------------------------------------------------------+
-------------------------------------------------------------------------------
Summary
-------------------------------------------------------------------------------
+-------
+
The /proc file system serves information about the running system. It not only
allows access to process data but also allows you to request the kernel status
by reading files in the hierarchy.
The directory structure of /proc reflects the types of information and makes
it easy, if not obvious, where to look for specific data.
-------------------------------------------------------------------------------
-------------------------------------------------------------------------------
-CHAPTER 2: MODIFYING SYSTEM PARAMETERS
-------------------------------------------------------------------------------
+Chapter 2: Modifying System Parameters
+======================================
-------------------------------------------------------------------------------
In This Chapter
-------------------------------------------------------------------------------
+---------------
+
* Modifying kernel parameters by writing into files found in /proc/sys
* Exploring the files which modify certain parameters
* Review of the /proc/sys file tree
-------------------------------------------------------------------------------
+------------------------------------------------------------------------------
A very interesting part of /proc is the directory /proc/sys. This is not only
a source of information, it also allows you to change parameters within the
@@ -1503,19 +1605,18 @@ kernels, and became part of it in version 2.2.1 of the Linux kernel.
Please see: Documentation/admin-guide/sysctl/ directory for descriptions of these
entries.
-------------------------------------------------------------------------------
Summary
-------------------------------------------------------------------------------
+-------
+
Certain aspects of kernel behavior can be modified at runtime, without the
need to recompile the kernel, or even to reboot the system. The files in the
/proc/sys tree can not only be read, but also modified. You can use the echo
command to write value into these files, thereby changing the default settings
of the kernel.
-------------------------------------------------------------------------------
-------------------------------------------------------------------------------
-CHAPTER 3: PER-PROCESS PARAMETERS
-------------------------------------------------------------------------------
+
+Chapter 3: Per-process Parameters
+=================================
3.1 /proc/<pid>/oom_adj & /proc/<pid>/oom_score_adj- Adjust the oom-killer score
--------------------------------------------------------------------------------
@@ -1588,26 +1689,28 @@ process should be killed in an out-of-memory situation.
This file contains IO statistics for each running process
Example
--------
+~~~~~~~
+
+::
-test:/tmp # dd if=/dev/zero of=/tmp/test.dat &
-[1] 3828
+ test:/tmp # dd if=/dev/zero of=/tmp/test.dat &
+ [1] 3828
-test:/tmp # cat /proc/3828/io
-rchar: 323934931
-wchar: 323929600
-syscr: 632687
-syscw: 632675
-read_bytes: 0
-write_bytes: 323932160
-cancelled_write_bytes: 0
+ test:/tmp # cat /proc/3828/io
+ rchar: 323934931
+ wchar: 323929600
+ syscr: 632687
+ syscw: 632675
+ read_bytes: 0
+ write_bytes: 323932160
+ cancelled_write_bytes: 0
Description
------------
+~~~~~~~~~~~
rchar
------
+^^^^^
I/O counter: chars read
The number of bytes which this task has caused to be read from storage. This
@@ -1618,7 +1721,7 @@ pagecache)
wchar
------
+^^^^^
I/O counter: chars written
The number of bytes which this task has caused, or shall cause to be written
@@ -1626,7 +1729,7 @@ to disk. Similar caveats apply here as with rchar.
syscr
------
+^^^^^
I/O counter: read syscalls
Attempt to count the number of read I/O operations, i.e. syscalls like read()
@@ -1634,7 +1737,7 @@ and pread().
syscw
------
+^^^^^
I/O counter: write syscalls
Attempt to count the number of write I/O operations, i.e. syscalls like
@@ -1642,7 +1745,7 @@ write() and pwrite().
read_bytes
-----------
+^^^^^^^^^^
I/O counter: bytes read
Attempt to count the number of bytes which this process really did cause to
@@ -1652,7 +1755,7 @@ CIFS at a later time>
write_bytes
------------
+^^^^^^^^^^^
I/O counter: bytes written
Attempt to count the number of bytes which this process caused to be sent to
@@ -1660,7 +1763,7 @@ the storage layer. This is done at page-dirtying time.
cancelled_write_bytes
----------------------
+^^^^^^^^^^^^^^^^^^^^^
The big inaccuracy here is truncate. If a process writes 1MB to a file and
then deletes the file, it will in fact perform no writeout. But it will have
@@ -1673,12 +1776,11 @@ from the truncating task's write_bytes, but there is information loss in doing
that.
-Note
-----
+.. Note::
-At its current implementation state, this is a bit racy on 32-bit machines: if
-process A reads process B's /proc/pid/io while process B is updating one of
-those 64-bit counters, process A could see an intermediate result.
+ At its current implementation state, this is a bit racy on 32-bit machines:
+ if process A reads process B's /proc/pid/io while process B is updating one
+ of those 64-bit counters, process A could see an intermediate result.
More information about this can be found within the taskstats documentation in
@@ -1698,12 +1800,13 @@ of memory types. If a bit of the bitmask is set, memory segments of the
corresponding memory type are dumped, otherwise they are not dumped.
The following 9 memory types are supported:
+
- (bit 0) anonymous private memory
- (bit 1) anonymous shared memory
- (bit 2) file-backed private memory
- (bit 3) file-backed shared memory
- (bit 4) ELF header pages in file-backed private memory areas (it is
- effective only if the bit 2 is cleared)
+ effective only if the bit 2 is cleared)
- (bit 5) hugetlb private memory
- (bit 6) hugetlb shared memory
- (bit 7) DAX private memory
@@ -1719,13 +1822,13 @@ The default value of coredump_filter is 0x33; this means all anonymous memory
segments, ELF header pages and hugetlb private memory are dumped.
If you don't want to dump all shared memory segments attached to pid 1234,
-write 0x31 to the process's proc file.
+write 0x31 to the process's proc file::
$ echo 0x31 > /proc/1234/coredump_filter
When a new process is created, the process inherits the bitmask status from its
parent. It is useful to set up coredump_filter before the program runs.
-For example:
+For example::
$ echo 0x7 > /proc/self/coredump_filter
$ ./some_program
@@ -1733,35 +1836,37 @@ For example:
3.5 /proc/<pid>/mountinfo - Information about mounts
--------------------------------------------------------
-This file contains lines of the form:
+This file contains lines of the form::
-36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue
-(1)(2)(3) (4) (5) (6) (7) (8) (9) (10) (11)
+ 36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue
+ (1)(2)(3) (4) (5) (6) (7) (8) (9) (10) (11)
-(1) mount ID: unique identifier of the mount (may be reused after umount)
-(2) parent ID: ID of parent (or of self for the top of the mount tree)
-(3) major:minor: value of st_dev for files on filesystem
-(4) root: root of the mount within the filesystem
-(5) mount point: mount point relative to the process's root
-(6) mount options: per mount options
-(7) optional fields: zero or more fields of the form "tag[:value]"
-(8) separator: marks the end of the optional fields
-(9) filesystem type: name of filesystem of the form "type[.subtype]"
-(10) mount source: filesystem specific information or "none"
-(11) super options: per super block options
+ (1) mount ID: unique identifier of the mount (may be reused after umount)
+ (2) parent ID: ID of parent (or of self for the top of the mount tree)
+ (3) major:minor: value of st_dev for files on filesystem
+ (4) root: root of the mount within the filesystem
+ (5) mount point: mount point relative to the process's root
+ (6) mount options: per mount options
+ (7) optional fields: zero or more fields of the form "tag[:value]"
+ (8) separator: marks the end of the optional fields
+ (9) filesystem type: name of filesystem of the form "type[.subtype]"
+ (10) mount source: filesystem specific information or "none"
+ (11) super options: per super block options
Parsers should ignore all unrecognised optional fields. Currently the
possible optional fields are:
-shared:X mount is shared in peer group X
-master:X mount is slave to peer group X
-propagate_from:X mount is slave and receives propagation from peer group X (*)
-unbindable mount is unbindable
+================ ==============================================================
+shared:X mount is shared in peer group X
+master:X mount is slave to peer group X
+propagate_from:X mount is slave and receives propagation from peer group X [#]_
+unbindable mount is unbindable
+================ ==============================================================
-(*) X is the closest dominant peer group under the process's root. If
-X is the immediate master of the mount, or if there's no dominant peer
-group under the same root, then only the "master:X" field is present
-and not the "propagate_from:X" field.
+.. [#] X is the closest dominant peer group under the process's root. If
+ X is the immediate master of the mount, or if there's no dominant peer
+ group under the same root, then only the "master:X" field is present
+ and not the "propagate_from:X" field.
For more information on mount propagation see:
@@ -1804,77 +1909,86 @@ created with [see open(2) for details] and 'mnt_id' represents mount ID of
the file system containing the opened file [see 3.5 /proc/<pid>/mountinfo
for details].
-A typical output is
+A typical output is::
pos: 0
flags: 0100002
mnt_id: 19
-All locks associated with a file descriptor are shown in its fdinfo too.
+All locks associated with a file descriptor are shown in its fdinfo too::
-lock: 1: FLOCK ADVISORY WRITE 359 00:13:11691 0 EOF
+ lock: 1: FLOCK ADVISORY WRITE 359 00:13:11691 0 EOF
The files such as eventfd, fsnotify, signalfd, epoll among the regular pos/flags
pair provide additional information particular to the objects they represent.
- Eventfd files
- ~~~~~~~~~~~~~
+Eventfd files
+~~~~~~~~~~~~~
+
+::
+
pos: 0
flags: 04002
mnt_id: 9
eventfd-count: 5a
- where 'eventfd-count' is hex value of a counter.
+where 'eventfd-count' is hex value of a counter.
+
+Signalfd files
+~~~~~~~~~~~~~~
+
+::
- Signalfd files
- ~~~~~~~~~~~~~~
pos: 0
flags: 04002
mnt_id: 9
sigmask: 0000000000000200
- where 'sigmask' is hex value of the signal mask associated
- with a file.
+where 'sigmask' is hex value of the signal mask associated
+with a file.
+
+Epoll files
+~~~~~~~~~~~
+
+::
- Epoll files
- ~~~~~~~~~~~
pos: 0
flags: 02
mnt_id: 9
tfd: 5 events: 1d data: ffffffffffffffff pos:0 ino:61af sdev:7
- where 'tfd' is a target file descriptor number in decimal form,
- 'events' is events mask being watched and the 'data' is data
- associated with a target [see epoll(7) for more details].
+where 'tfd' is a target file descriptor number in decimal form,
+'events' is events mask being watched and the 'data' is data
+associated with a target [see epoll(7) for more details].
- The 'pos' is current offset of the target file in decimal form
- [see lseek(2)], 'ino' and 'sdev' are inode and device numbers
- where target file resides, all in hex format.
+The 'pos' is current offset of the target file in decimal form
+[see lseek(2)], 'ino' and 'sdev' are inode and device numbers
+where target file resides, all in hex format.
- Fsnotify files
- ~~~~~~~~~~~~~~
- For inotify files the format is the following
+Fsnotify files
+~~~~~~~~~~~~~~
+For inotify files the format is the following::
pos: 0
flags: 02000000
inotify wd:3 ino:9e7e sdev:800013 mask:800afce ignored_mask:0 fhandle-bytes:8 fhandle-type:1 f_handle:7e9e0000640d1b6d
- where 'wd' is a watch descriptor in decimal form, ie a target file
- descriptor number, 'ino' and 'sdev' are inode and device where the
- target file resides and the 'mask' is the mask of events, all in hex
- form [see inotify(7) for more details].
+where 'wd' is a watch descriptor in decimal form, ie a target file
+descriptor number, 'ino' and 'sdev' are inode and device where the
+target file resides and the 'mask' is the mask of events, all in hex
+form [see inotify(7) for more details].
- If the kernel was built with exportfs support, the path to the target
- file is encoded as a file handle. The file handle is provided by three
- fields 'fhandle-bytes', 'fhandle-type' and 'f_handle', all in hex
- format.
+If the kernel was built with exportfs support, the path to the target
+file is encoded as a file handle. The file handle is provided by three
+fields 'fhandle-bytes', 'fhandle-type' and 'f_handle', all in hex
+format.
- If the kernel is built without exportfs support the file handle won't be
- printed out.
+If the kernel is built without exportfs support the file handle won't be
+printed out.
- If there is no inotify mark attached yet the 'inotify' line will be omitted.
+If there is no inotify mark attached yet the 'inotify' line will be omitted.
- For fanotify files the format is
+For fanotify files the format is::
pos: 0
flags: 02
@@ -1883,20 +1997,22 @@ pair provide additional information particular to the objects they represent.
fanotify mnt_id:12 mflags:40 mask:38 ignored_mask:40000003
fanotify ino:4f969 sdev:800013 mflags:0 mask:3b ignored_mask:40000000 fhandle-bytes:8 fhandle-type:1 f_handle:69f90400c275b5b4
- where fanotify 'flags' and 'event-flags' are values used in fanotify_init
- call, 'mnt_id' is the mount point identifier, 'mflags' is the value of
- flags associated with mark which are tracked separately from events
- mask. 'ino', 'sdev' are target inode and device, 'mask' is the events
- mask and 'ignored_mask' is the mask of events which are to be ignored.
- All in hex format. Incorporation of 'mflags', 'mask' and 'ignored_mask'
- does provide information about flags and mask used in fanotify_mark
- call [see fsnotify manpage for details].
+where fanotify 'flags' and 'event-flags' are values used in fanotify_init
+call, 'mnt_id' is the mount point identifier, 'mflags' is the value of
+flags associated with mark which are tracked separately from events
+mask. 'ino', 'sdev' are target inode and device, 'mask' is the events
+mask and 'ignored_mask' is the mask of events which are to be ignored.
+All in hex format. Incorporation of 'mflags', 'mask' and 'ignored_mask'
+does provide information about flags and mask used in fanotify_mark
+call [see fsnotify manpage for details].
+
+While the first three lines are mandatory and always printed, the rest is
+optional and may be omitted if no marks created yet.
- While the first three lines are mandatory and always printed, the rest is
- optional and may be omitted if no marks created yet.
+Timerfd files
+~~~~~~~~~~~~~
- Timerfd files
- ~~~~~~~~~~~~~
+::
pos: 0
flags: 02
@@ -1907,18 +2023,18 @@ pair provide additional information particular to the objects they represent.
it_value: (0, 49406829)
it_interval: (1, 0)
- where 'clockid' is the clock type and 'ticks' is the number of the timer expirations
- that have occurred [see timerfd_create(2) for details]. 'settime flags' are
- flags in octal form been used to setup the timer [see timerfd_settime(2) for
- details]. 'it_value' is remaining time until the timer exiration.
- 'it_interval' is the interval for the timer. Note the timer might be set up
- with TIMER_ABSTIME option which will be shown in 'settime flags', but 'it_value'
- still exhibits timer's remaining time.
+where 'clockid' is the clock type and 'ticks' is the number of the timer expirations
+that have occurred [see timerfd_create(2) for details]. 'settime flags' are
+flags in octal form been used to setup the timer [see timerfd_settime(2) for
+details]. 'it_value' is remaining time until the timer exiration.
+'it_interval' is the interval for the timer. Note the timer might be set up
+with TIMER_ABSTIME option which will be shown in 'settime flags', but 'it_value'
+still exhibits timer's remaining time.
3.9 /proc/<pid>/map_files - Information about memory mapped files
---------------------------------------------------------------------
This directory contains symbolic links which represent memory mapped files
-the process is maintaining. Example output:
+the process is maintaining. Example output::
| lr-------- 1 root root 64 Jan 27 11:24 333c600000-333c620000 -> /usr/lib64/ld-2.18.so
| lr-------- 1 root root 64 Jan 27 11:24 333c81f000-333c820000 -> /usr/lib64/ld-2.18.so
@@ -1976,17 +2092,22 @@ When CONFIG_PROC_PID_ARCH_STATUS is enabled, this file displays the
architecture specific status of the task.
Example
--------
+~~~~~~~
+
+::
+
$ cat /proc/6753/arch_status
AVX512_elapsed_ms: 8
Description
------------
+~~~~~~~~~~~
x86 specific entries:
----------------------
- AVX512_elapsed_ms:
- ------------------
+~~~~~~~~~~~~~~~~~~~~~
+
+AVX512_elapsed_ms:
+^^^^^^^^^^^^^^^^^^
+
If AVX512 is supported on the machine, this entry shows the milliseconds
elapsed since the last time AVX512 usage was recorded. The recording
happens on a best effort basis when a task is scheduled out. This means
@@ -2010,17 +2131,18 @@ x86 specific entries:
the task is unlikely an AVX512 user, but depends on the workload and the
scheduling scenario, it also could be a false negative mentioned above.
-------------------------------------------------------------------------------
Configuring procfs
-------------------------------------------------------------------------------
+------------------
4.1 Mount options
---------------------
The following mount options are supported:
+ ========= ========================================================
hidepid= Set /proc/<pid>/ access mode.
gid= Set the group authorized to learn processes information.
+ ========= ========================================================
hidepid=0 means classic mode - everybody may access all /proc/<pid>/ directories
(default).
diff --git a/Documentation/filesystems/qnx6.txt b/Documentation/filesystems/qnx6.rst
index 48ea68f15845..b71308314070 100644
--- a/Documentation/filesystems/qnx6.txt
+++ b/Documentation/filesystems/qnx6.rst
@@ -1,3 +1,6 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===================
The QNX6 Filesystem
===================
@@ -14,10 +17,12 @@ Specification
qnx6fs shares many properties with traditional Unix filesystems. It has the
concepts of blocks, inodes and directories.
+
On QNX it is possible to create little endian and big endian qnx6 filesystems.
This feature makes it possible to create and use a different endianness fs
for the target (QNX is used on quite a range of embedded systems) platform
running on a different endianness.
+
The Linux driver handles endianness transparently. (LE and BE)
Blocks
@@ -26,6 +31,7 @@ Blocks
The space in the device or file is split up into blocks. These are a fixed
size of 512, 1024, 2048 or 4096, which is decided when the filesystem is
created.
+
Blockpointers are 32bit, so the maximum space that can be addressed is
2^32 * 4096 bytes or 16TB
@@ -50,6 +56,7 @@ Each of these root nodes holds information like total size of the stored
data and the addressing levels in that specific tree.
If the level value is 0, up to 16 direct blocks can be addressed by each
node.
+
Level 1 adds an additional indirect addressing level where each indirect
addressing block holds up to blocksize / 4 bytes pointers to data blocks.
Level 2 adds an additional indirect addressing block level (so, already up
@@ -57,11 +64,13 @@ to 16 * 256 * 256 = 1048576 blocks that can be addressed by such a tree).
Unused block pointers are always set to ~0 - regardless of root node,
indirect addressing blocks or inodes.
+
Data leaves are always on the lowest level. So no data is stored on upper
tree levels.
The first Superblock is located at 0x2000. (0x2000 is the bootblock size)
The Audi MMI 3G first superblock directly starts at byte 0.
+
Second superblock position can either be calculated from the superblock
information (total number of filesystem blocks) or by taking the highest
device address, zeroing the last 3 bytes and then subtracting 0x1000 from
@@ -84,6 +93,7 @@ Object mode field is POSIX format. (which makes things easier)
There are also pointers to the first 16 blocks, if the object data can be
addressed with 16 direct blocks.
+
For more than 16 blocks an indirect addressing in form of another tree is
used. (scheme is the same as the one used for the superblock root nodes)
@@ -96,13 +106,18 @@ Directories
A directory is a filesystem object and has an inode just like a file.
It is a specially formatted file containing records which associate each
name with an inode number.
+
'.' inode number points to the directory inode
+
'..' inode number points to the parent directory inode
+
Eeach filename record additionally got a filename length field.
One special case are long filenames or subdirectory names.
+
These got set a filename length field of 0xff in the corresponding directory
record plus the longfile inode number also stored in that record.
+
With that longfilename inode number, the longfilename tree can be walked
starting with the superblock longfilename root node pointers.
@@ -111,6 +126,7 @@ Special files
Symbolic links are also filesystem objects with inodes. They got a specific
bit in the inode mode field identifying them as symbolic link.
+
The directory entry file inode pointer points to the target file inode.
Hard links got an inode, a directory entry, but a specific mode bit set,
@@ -126,9 +142,11 @@ Long filenames
Long filenames are stored in a separate addressing tree. The staring point
is the longfilename root node in the active superblock.
+
Each data block (tree leaves) holds one long filename. That filename is
limited to 510 bytes. The first two starting bytes are used as length field
for the actual filename.
+
If that structure shall fit for all allowed blocksizes, it is clear why there
is a limit of 510 bytes for the actual filename stored.
@@ -138,6 +156,7 @@ Bitmap
The qnx6fs filesystem allocation bitmap is stored in a tree under bitmap
root node in the superblock and each bit in the bitmap represents one
filesystem block.
+
The first block is block 0, which starts 0x1000 after superblock start.
So for a normal qnx6fs 0x3000 (bootblock + superblock) is the physical
address at which block 0 is located.
@@ -149,11 +168,14 @@ Bitmap system area
------------------
The bitmap itself is divided into three parts.
+
First the system area, that is split into two halves.
+
Then userspace.
The requirement for a static, fixed preallocated system area comes from how
qnx6fs deals with writes.
+
Each superblock got it's own half of the system area. So superblock #1
always uses blocks from the lower half while superblock #2 just writes to
blocks represented by the upper half bitmap system area bits.
diff --git a/Documentation/filesystems/ramfs-rootfs-initramfs.txt b/Documentation/filesystems/ramfs-rootfs-initramfs.rst
index 97d42ccaa92d..6c576e241d86 100644
--- a/Documentation/filesystems/ramfs-rootfs-initramfs.txt
+++ b/Documentation/filesystems/ramfs-rootfs-initramfs.rst
@@ -1,5 +1,11 @@
-ramfs, rootfs and initramfs
+.. SPDX-License-Identifier: GPL-2.0
+
+===========================
+Ramfs, rootfs and initramfs
+===========================
+
October 17, 2005
+
Rob Landley <rob@landley.net>
=============================
@@ -99,14 +105,14 @@ out of that.
All this differs from the old initrd in several ways:
- The old initrd was always a separate file, while the initramfs archive is
- linked into the linux kernel image. (The directory linux-*/usr is devoted
- to generating this archive during the build.)
+ linked into the linux kernel image. (The directory ``linux-*/usr`` is
+ devoted to generating this archive during the build.)
- The old initrd file was a gzipped filesystem image (in some file format,
such as ext2, that needed a driver built into the kernel), while the new
initramfs archive is a gzipped cpio archive (like tar only simpler,
- see cpio(1) and Documentation/driver-api/early-userspace/buffer-format.rst). The
- kernel's cpio extraction code is not only extremely small, it's also
+ see cpio(1) and Documentation/driver-api/early-userspace/buffer-format.rst).
+ The kernel's cpio extraction code is not only extremely small, it's also
__init text and data that can be discarded during the boot process.
- The program run by the old initrd (which was called /initrd, not /init) did
@@ -139,7 +145,7 @@ and living in usr/Kconfig) can be used to specify a source for the
initramfs archive, which will automatically be incorporated into the
resulting binary. This option can point to an existing gzipped cpio
archive, a directory containing files to be archived, or a text file
-specification such as the following example:
+specification such as the following example::
dir /dev 755 0 0
nod /dev/console 644 0 0 c 5 1
@@ -175,12 +181,12 @@ or extracting your own preprepared cpio files to feed to the kernel build
(instead of a config file or directory).
The following command line can extract a cpio image (either by the above script
-or by the kernel build) back into its component files:
+or by the kernel build) back into its component files::
cpio -i -d -H newc -F initramfs_data.cpio --no-absolute-filenames
The following shell script can create a prebuilt cpio archive you can
-use in place of the above config file:
+use in place of the above config file::
#!/bin/sh
@@ -202,14 +208,17 @@ use in place of the above config file:
exit 1
fi
-Note: The cpio man page contains some bad advice that will break your initramfs
-archive if you follow it. It says "A typical way to generate the list
-of filenames is with the find command; you should give find the -depth option
-to minimize problems with permissions on directories that are unwritable or not
-searchable." Don't do this when creating initramfs.cpio.gz images, it won't
-work. The Linux kernel cpio extractor won't create files in a directory that
-doesn't exist, so the directory entries must go before the files that go in
-those directories. The above script gets them in the right order.
+.. Note::
+
+ The cpio man page contains some bad advice that will break your initramfs
+ archive if you follow it. It says "A typical way to generate the list
+ of filenames is with the find command; you should give find the -depth
+ option to minimize problems with permissions on directories that are
+ unwritable or not searchable." Don't do this when creating
+ initramfs.cpio.gz images, it won't work. The Linux kernel cpio extractor
+ won't create files in a directory that doesn't exist, so the directory
+ entries must go before the files that go in those directories.
+ The above script gets them in the right order.
External initramfs images:
--------------------------
@@ -236,9 +245,10 @@ An initramfs archive is a complete self-contained root filesystem for Linux.
If you don't already understand what shared libraries, devices, and paths
you need to get a minimal root filesystem up and running, here are some
references:
-http://www.tldp.org/HOWTO/Bootdisk-HOWTO/
-http://www.tldp.org/HOWTO/From-PowerUp-To-Bash-Prompt-HOWTO.html
-http://www.linuxfromscratch.org/lfs/view/stable/
+
+- http://www.tldp.org/HOWTO/Bootdisk-HOWTO/
+- http://www.tldp.org/HOWTO/From-PowerUp-To-Bash-Prompt-HOWTO.html
+- http://www.linuxfromscratch.org/lfs/view/stable/
The "klibc" package (http://www.kernel.org/pub/linux/libs/klibc) is
designed to be a tiny C library to statically link early userspace
@@ -255,7 +265,7 @@ name lookups, even when otherwise statically linked.)
A good first step is to get initramfs to run a statically linked "hello world"
program as init, and test it under an emulator like qemu (www.qemu.org) or
-User Mode Linux, like so:
+User Mode Linux, like so::
cat > hello.c << EOF
#include <stdio.h>
@@ -326,8 +336,8 @@ the above threads) is:
explained his reasoning:
- http://www.uwsg.iu.edu/hypermail/linux/kernel/0112.2/1550.html
- http://www.uwsg.iu.edu/hypermail/linux/kernel/0112.2/1638.html
+ - http://www.uwsg.iu.edu/hypermail/linux/kernel/0112.2/1550.html
+ - http://www.uwsg.iu.edu/hypermail/linux/kernel/0112.2/1638.html
and, most importantly, designed and implemented the initramfs code.
diff --git a/Documentation/filesystems/relay.txt b/Documentation/filesystems/relay.rst
index cd709a94d054..04ad083cfe62 100644
--- a/Documentation/filesystems/relay.txt
+++ b/Documentation/filesystems/relay.rst
@@ -1,3 +1,6 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==================================
relay interface (formerly relayfs)
==================================
@@ -108,6 +111,7 @@ The relay interface implements basic file operations for user space
access to relay channel buffer data. Here are the file operations
that are available and some comments regarding their behavior:
+=========== ============================================================
open() enables user to open an _existing_ channel buffer.
mmap() results in channel buffer being mapped into the caller's
@@ -136,13 +140,16 @@ poll() POLLIN/POLLRDNORM/POLLERR supported. User applications are
close() decrements the channel buffer's refcount. When the refcount
reaches 0, i.e. when no process or kernel client has the
buffer open, the channel buffer is freed.
+=========== ============================================================
In order for a user application to make use of relay files, the
-host filesystem must be mounted. For example,
+host filesystem must be mounted. For example::
mount -t debugfs debugfs /sys/kernel/debug
-NOTE: the host filesystem doesn't need to be mounted for kernel
+.. Note::
+
+ the host filesystem doesn't need to be mounted for kernel
clients to create or use channels - it only needs to be
mounted when user space applications need access to the buffer
data.
@@ -154,7 +161,7 @@ The relay interface kernel API
Here's a summary of the API the relay interface provides to in-kernel clients:
TBD(curr. line MT:/API/)
- channel management functions:
+ channel management functions::
relay_open(base_filename, parent, subbuf_size, n_subbufs,
callbacks, private_data)
@@ -162,17 +169,17 @@ TBD(curr. line MT:/API/)
relay_flush(chan)
relay_reset(chan)
- channel management typically called on instigation of userspace:
+ channel management typically called on instigation of userspace::
relay_subbufs_consumed(chan, cpu, subbufs_consumed)
- write functions:
+ write functions::
relay_write(chan, data, length)
__relay_write(chan, data, length)
relay_reserve(chan, length)
- callbacks:
+ callbacks::
subbuf_start(buf, subbuf, prev_subbuf, prev_padding)
buf_mapped(buf, filp)
@@ -180,7 +187,7 @@ TBD(curr. line MT:/API/)
create_buf_file(filename, parent, mode, buf, is_global)
remove_buf_file(dentry)
- helper functions:
+ helper functions::
relay_buf_full(buf)
subbuf_start_reserve(buf, length)
@@ -215,41 +222,41 @@ the file(s) created in create_buf_file() and is called during
relay_close().
Here are some typical definitions for these callbacks, in this case
-using debugfs:
-
-/*
- * create_buf_file() callback. Creates relay file in debugfs.
- */
-static struct dentry *create_buf_file_handler(const char *filename,
- struct dentry *parent,
- umode_t mode,
- struct rchan_buf *buf,
- int *is_global)
-{
- return debugfs_create_file(filename, mode, parent, buf,
- &relay_file_operations);
-}
-
-/*
- * remove_buf_file() callback. Removes relay file from debugfs.
- */
-static int remove_buf_file_handler(struct dentry *dentry)
-{
- debugfs_remove(dentry);
-
- return 0;
-}
-
-/*
- * relay interface callbacks
- */
-static struct rchan_callbacks relay_callbacks =
-{
- .create_buf_file = create_buf_file_handler,
- .remove_buf_file = remove_buf_file_handler,
-};
-
-And an example relay_open() invocation using them:
+using debugfs::
+
+ /*
+ * create_buf_file() callback. Creates relay file in debugfs.
+ */
+ static struct dentry *create_buf_file_handler(const char *filename,
+ struct dentry *parent,
+ umode_t mode,
+ struct rchan_buf *buf,
+ int *is_global)
+ {
+ return debugfs_create_file(filename, mode, parent, buf,
+ &relay_file_operations);
+ }
+
+ /*
+ * remove_buf_file() callback. Removes relay file from debugfs.
+ */
+ static int remove_buf_file_handler(struct dentry *dentry)
+ {
+ debugfs_remove(dentry);
+
+ return 0;
+ }
+
+ /*
+ * relay interface callbacks
+ */
+ static struct rchan_callbacks relay_callbacks =
+ {
+ .create_buf_file = create_buf_file_handler,
+ .remove_buf_file = remove_buf_file_handler,
+ };
+
+And an example relay_open() invocation using them::
chan = relay_open("cpu", NULL, SUBBUF_SIZE, N_SUBBUFS, &relay_callbacks, NULL);
@@ -339,23 +346,23 @@ whether or not to actually move on to the next sub-buffer.
To implement 'no-overwrite' mode, the userspace client would provide
an implementation of the subbuf_start() callback something like the
-following:
+following::
-static int subbuf_start(struct rchan_buf *buf,
- void *subbuf,
- void *prev_subbuf,
- unsigned int prev_padding)
-{
- if (prev_subbuf)
- *((unsigned *)prev_subbuf) = prev_padding;
+ static int subbuf_start(struct rchan_buf *buf,
+ void *subbuf,
+ void *prev_subbuf,
+ unsigned int prev_padding)
+ {
+ if (prev_subbuf)
+ *((unsigned *)prev_subbuf) = prev_padding;
- if (relay_buf_full(buf))
- return 0;
+ if (relay_buf_full(buf))
+ return 0;
- subbuf_start_reserve(buf, sizeof(unsigned int));
+ subbuf_start_reserve(buf, sizeof(unsigned int));
- return 1;
-}
+ return 1;
+ }
If the current buffer is full, i.e. all sub-buffers remain unconsumed,
the callback returns 0 to indicate that the buffer switch should not
@@ -370,20 +377,20 @@ ready sub-buffers will relay_buf_full() return 0, in which case the
buffer switch can continue.
The implementation of the subbuf_start() callback for 'overwrite' mode
-would be very similar:
+would be very similar::
-static int subbuf_start(struct rchan_buf *buf,
- void *subbuf,
- void *prev_subbuf,
- size_t prev_padding)
-{
- if (prev_subbuf)
- *((unsigned *)prev_subbuf) = prev_padding;
+ static int subbuf_start(struct rchan_buf *buf,
+ void *subbuf,
+ void *prev_subbuf,
+ size_t prev_padding)
+ {
+ if (prev_subbuf)
+ *((unsigned *)prev_subbuf) = prev_padding;
- subbuf_start_reserve(buf, sizeof(unsigned int));
+ subbuf_start_reserve(buf, sizeof(unsigned int));
- return 1;
-}
+ return 1;
+ }
In this case, the relay_buf_full() check is meaningless and the
callback always returns 1, causing the buffer switch to occur
diff --git a/Documentation/filesystems/romfs.txt b/Documentation/filesystems/romfs.rst
index e2b07cc9120a..465b11efa9be 100644
--- a/Documentation/filesystems/romfs.txt
+++ b/Documentation/filesystems/romfs.rst
@@ -1,4 +1,8 @@
-ROMFS - ROM FILE SYSTEM
+.. SPDX-License-Identifier: GPL-2.0
+
+=======================
+ROMFS - ROM File System
+=======================
This is a quite dumb, read only filesystem, mainly for initial RAM
disks of installation disks. It has grown up by the need of having
@@ -51,9 +55,9 @@ the 16 byte padding for the name and the contents, also 16+14+15 = 45
bytes. This is quite rare however, since most file names are longer
than 3 bytes, and shorter than 15 bytes.
-The layout of the filesystem is the following:
+The layout of the filesystem is the following::
-offset content
+ offset content
+---+---+---+---+
0 | - | r | o | m | \
@@ -84,9 +88,9 @@ the source. This algorithm was chosen because although it's not quite
reliable, it does not require any tables, and it is very simple.
The following bytes are now part of the file system; each file header
-must begin on a 16 byte boundary.
+must begin on a 16 byte boundary::
-offset content
+ offset content
+---+---+---+---+
0 | next filehdr|X| The offset of the next file header
@@ -114,7 +118,9 @@ file is user and group 0, this should never be a problem for the
intended use. The mapping of the 8 possible values to file types is
the following:
+== =============== ============================================
mapping spec.info means
+== =============== ============================================
0 hard link link destination [file header]
1 directory first file's header
2 regular file unused, must be zero [MBZ]
@@ -123,6 +129,7 @@ the following:
5 char device - " -
6 socket unused, MBZ
7 fifo unused, MBZ
+== =============== ============================================
Note that hard links are specifically marked in this filesystem, but
they will behave as you can expect (i.e. share the inode number).
@@ -158,24 +165,24 @@ to romfs-subscribe@shadow.banki.hu, the content is irrelevant.
Pending issues:
- Permissions and owner information are pretty essential features of a
-Un*x like system, but romfs does not provide the full possibilities.
-I have never found this limiting, but others might.
+ Un*x like system, but romfs does not provide the full possibilities.
+ I have never found this limiting, but others might.
- The file system is read only, so it can be very small, but in case
-one would want to write _anything_ to a file system, he still needs
-a writable file system, thus negating the size advantages. Possible
-solutions: implement write access as a compile-time option, or a new,
-similarly small writable filesystem for RAM disks.
+ one would want to write _anything_ to a file system, he still needs
+ a writable file system, thus negating the size advantages. Possible
+ solutions: implement write access as a compile-time option, or a new,
+ similarly small writable filesystem for RAM disks.
- Since the files are only required to have alignment on a 16 byte
-boundary, it is currently possibly suboptimal to read or execute files
-from the filesystem. It might be resolved by reordering file data to
-have most of it (i.e. except the start and the end) laying at "natural"
-boundaries, thus it would be possible to directly map a big portion of
-the file contents to the mm subsystem.
+ boundary, it is currently possibly suboptimal to read or execute files
+ from the filesystem. It might be resolved by reordering file data to
+ have most of it (i.e. except the start and the end) laying at "natural"
+ boundaries, thus it would be possible to directly map a big portion of
+ the file contents to the mm subsystem.
- Compression might be an useful feature, but memory is quite a
-limiting factor in my eyes.
+ limiting factor in my eyes.
- Where it is used?
@@ -183,4 +190,5 @@ limiting factor in my eyes.
Have fun,
+
Janos Farkas <chexum@shadow.banki.hu>
diff --git a/Documentation/filesystems/squashfs.txt b/Documentation/filesystems/squashfs.rst
index e5274f84dc56..df42106bae71 100644
--- a/Documentation/filesystems/squashfs.txt
+++ b/Documentation/filesystems/squashfs.rst
@@ -1,7 +1,11 @@
-SQUASHFS 4.0 FILESYSTEM
+.. SPDX-License-Identifier: GPL-2.0
+
+=======================
+Squashfs 4.0 Filesystem
=======================
Squashfs is a compressed read-only filesystem for Linux.
+
It uses zlib, lz4, lzo, or xz compression to compress files, inodes and
directories. Inodes in the system are very small and all blocks are packed to
minimise data overhead. Block sizes greater than 4K are supported up to a
@@ -15,31 +19,33 @@ needed.
Mailing list: squashfs-devel@lists.sourceforge.net
Web site: www.squashfs.org
-1. FILESYSTEM FEATURES
+1. Filesystem Features
----------------------
Squashfs filesystem features versus Cramfs:
+============================== ========= ==========
Squashfs Cramfs
-
-Max filesystem size: 2^64 256 MiB
-Max file size: ~ 2 TiB 16 MiB
-Max files: unlimited unlimited
-Max directories: unlimited unlimited
-Max entries per directory: unlimited unlimited
-Max block size: 1 MiB 4 KiB
-Metadata compression: yes no
-Directory indexes: yes no
-Sparse file support: yes no
-Tail-end packing (fragments): yes no
-Exportable (NFS etc.): yes no
-Hard link support: yes no
-"." and ".." in readdir: yes no
-Real inode numbers: yes no
-32-bit uids/gids: yes no
-File creation time: yes no
-Xattr support: yes no
-ACL support: no no
+============================== ========= ==========
+Max filesystem size 2^64 256 MiB
+Max file size ~ 2 TiB 16 MiB
+Max files unlimited unlimited
+Max directories unlimited unlimited
+Max entries per directory unlimited unlimited
+Max block size 1 MiB 4 KiB
+Metadata compression yes no
+Directory indexes yes no
+Sparse file support yes no
+Tail-end packing (fragments) yes no
+Exportable (NFS etc.) yes no
+Hard link support yes no
+"." and ".." in readdir yes no
+Real inode numbers yes no
+32-bit uids/gids yes no
+File creation time yes no
+Xattr support yes no
+ACL support no no
+============================== ========= ==========
Squashfs compresses data, inodes and directories. In addition, inode and
directory data are highly compacted, and packed on byte boundaries. Each
@@ -47,7 +53,7 @@ compressed inode is on average 8 bytes in length (the exact length varies on
file type, i.e. regular file, directory, symbolic link, and block/char device
inodes have different sizes).
-2. USING SQUASHFS
+2. Using Squashfs
-----------------
As squashfs is a read-only filesystem, the mksquashfs program must be used to
@@ -58,11 +64,11 @@ obtained from this site also.
The squashfs-tools development tree is now located on kernel.org
git://git.kernel.org/pub/scm/fs/squashfs/squashfs-tools.git
-3. SQUASHFS FILESYSTEM DESIGN
+3. Squashfs Filesystem Design
-----------------------------
A squashfs filesystem consists of a maximum of nine parts, packed together on a
-byte alignment:
+byte alignment::
---------------
| superblock |
@@ -229,15 +235,15 @@ location of the xattr list inside each inode, a 32-bit xattr id
is stored. This xattr id is mapped into the location of the xattr
list using a second xattr id lookup table.
-4. TODOS AND OUTSTANDING ISSUES
+4. TODOs and Outstanding Issues
-------------------------------
-4.1 Todo list
+4.1 TODO list
-------------
Implement ACL support.
-4.2 Squashfs internal cache
+4.2 Squashfs Internal Cache
---------------------------
Blocks in Squashfs are compressed. To avoid repeatedly decompressing
diff --git a/Documentation/filesystems/sysfs.txt b/Documentation/filesystems/sysfs.rst
index ddf15b1b0d5a..290891c3fecb 100644
--- a/Documentation/filesystems/sysfs.txt
+++ b/Documentation/filesystems/sysfs.rst
@@ -1,32 +1,36 @@
+.. SPDX-License-Identifier: GPL-2.0
-sysfs - _The_ filesystem for exporting kernel objects.
+=====================================================
+sysfs - _The_ filesystem for exporting kernel objects
+=====================================================
Patrick Mochel <mochel@osdl.org>
+
Mike Murphy <mamurph@cs.clemson.edu>
-Revised: 16 August 2011
-Original: 10 January 2003
+:Revised: 16 August 2011
+:Original: 10 January 2003
What it is:
~~~~~~~~~~~
sysfs is a ram-based filesystem initially based on ramfs. It provides
-a means to export kernel data structures, their attributes, and the
-linkages between them to userspace.
+a means to export kernel data structures, their attributes, and the
+linkages between them to userspace.
sysfs is tied inherently to the kobject infrastructure. Please read
Documentation/kobject.txt for more information concerning the kobject
-interface.
+interface.
Using sysfs
~~~~~~~~~~~
sysfs is always compiled in if CONFIG_SYSFS is defined. You can access
-it by doing:
+it by doing::
- mount -t sysfs sysfs /sys
+ mount -t sysfs sysfs /sys
Directory Creation
@@ -37,7 +41,7 @@ created for it in sysfs. That directory is created as a subdirectory
of the kobject's parent, expressing internal object hierarchies to
userspace. Top-level directories in sysfs represent the common
ancestors of object hierarchies; i.e. the subsystems the objects
-belong to.
+belong to.
Sysfs internally stores a pointer to the kobject that implements a
directory in the kernfs_node object associated with the directory. In
@@ -58,63 +62,63 @@ attributes.
Attributes should be ASCII text files, preferably with only one value
per file. It is noted that it may not be efficient to contain only one
value per file, so it is socially acceptable to express an array of
-values of the same type.
+values of the same type.
Mixing types, expressing multiple lines of data, and doing fancy
formatting of data is heavily frowned upon. Doing these things may get
-you publicly humiliated and your code rewritten without notice.
+you publicly humiliated and your code rewritten without notice.
-An attribute definition is simply:
+An attribute definition is simply::
-struct attribute {
- char * name;
- struct module *owner;
- umode_t mode;
-};
+ struct attribute {
+ char * name;
+ struct module *owner;
+ umode_t mode;
+ };
-int sysfs_create_file(struct kobject * kobj, const struct attribute * attr);
-void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr);
+ int sysfs_create_file(struct kobject * kobj, const struct attribute * attr);
+ void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr);
A bare attribute contains no means to read or write the value of the
attribute. Subsystems are encouraged to define their own attribute
structure and wrapper functions for adding and removing attributes for
-a specific object type.
+a specific object type.
-For example, the driver model defines struct device_attribute like:
+For example, the driver model defines struct device_attribute like::
-struct device_attribute {
- struct attribute attr;
- ssize_t (*show)(struct device *dev, struct device_attribute *attr,
- char *buf);
- ssize_t (*store)(struct device *dev, struct device_attribute *attr,
- const char *buf, size_t count);
-};
+ struct device_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct device *dev, struct device_attribute *attr,
+ char *buf);
+ ssize_t (*store)(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count);
+ };
-int device_create_file(struct device *, const struct device_attribute *);
-void device_remove_file(struct device *, const struct device_attribute *);
+ int device_create_file(struct device *, const struct device_attribute *);
+ void device_remove_file(struct device *, const struct device_attribute *);
-It also defines this helper for defining device attributes:
+It also defines this helper for defining device attributes::
-#define DEVICE_ATTR(_name, _mode, _show, _store) \
-struct device_attribute dev_attr_##_name = __ATTR(_name, _mode, _show, _store)
+ #define DEVICE_ATTR(_name, _mode, _show, _store) \
+ struct device_attribute dev_attr_##_name = __ATTR(_name, _mode, _show, _store)
-For example, declaring
+For example, declaring::
-static DEVICE_ATTR(foo, S_IWUSR | S_IRUGO, show_foo, store_foo);
+ static DEVICE_ATTR(foo, S_IWUSR | S_IRUGO, show_foo, store_foo);
-is equivalent to doing:
+is equivalent to doing::
-static struct device_attribute dev_attr_foo = {
- .attr = {
- .name = "foo",
- .mode = S_IWUSR | S_IRUGO,
- },
- .show = show_foo,
- .store = store_foo,
-};
+ static struct device_attribute dev_attr_foo = {
+ .attr = {
+ .name = "foo",
+ .mode = S_IWUSR | S_IRUGO,
+ },
+ .show = show_foo,
+ .store = store_foo,
+ };
Note as stated in include/linux/kernel.h "OTHER_WRITABLE? Generally
considered a bad idea." so trying to set a sysfs file writable for
@@ -127,15 +131,21 @@ readable. The above case could be shortened to:
static struct device_attribute dev_attr_foo = __ATTR_RW(foo);
the list of helpers available to define your wrapper function is:
-__ATTR_RO(name): assumes default name_show and mode 0444
-__ATTR_WO(name): assumes a name_store only and is restricted to mode
+
+__ATTR_RO(name):
+ assumes default name_show and mode 0444
+__ATTR_WO(name):
+ assumes a name_store only and is restricted to mode
0200 that is root write access only.
-__ATTR_RO_MODE(name, mode): fore more restrictive RO access currently
+__ATTR_RO_MODE(name, mode):
+ fore more restrictive RO access currently
only use case is the EFI System Resource Table
(see drivers/firmware/efi/esrt.c)
-__ATTR_RW(name): assumes default name_show, name_store and setting
+__ATTR_RW(name):
+ assumes default name_show, name_store and setting
mode to 0644.
-__ATTR_NULL: which sets the name to NULL and is used as end of list
+__ATTR_NULL:
+ which sets the name to NULL and is used as end of list
indicator (see: kernel/workqueue.c)
Subsystem-Specific Callbacks
@@ -143,12 +153,12 @@ Subsystem-Specific Callbacks
When a subsystem defines a new attribute type, it must implement a
set of sysfs operations for forwarding read and write calls to the
-show and store methods of the attribute owners.
+show and store methods of the attribute owners::
-struct sysfs_ops {
- ssize_t (*show)(struct kobject *, struct attribute *, char *);
- ssize_t (*store)(struct kobject *, struct attribute *, const char *, size_t);
-};
+ struct sysfs_ops {
+ ssize_t (*show)(struct kobject *, struct attribute *, char *);
+ ssize_t (*store)(struct kobject *, struct attribute *, const char *, size_t);
+ };
[ Subsystems should have already defined a struct kobj_type as a
descriptor for this type, which is where the sysfs_ops pointer is
@@ -157,29 +167,29 @@ stored. See the kobject documentation for more information. ]
When a file is read or written, sysfs calls the appropriate method
for the type. The method then translates the generic struct kobject
and struct attribute pointers to the appropriate pointer types, and
-calls the associated methods.
+calls the associated methods.
-To illustrate:
+To illustrate::
-#define to_dev(obj) container_of(obj, struct device, kobj)
-#define to_dev_attr(_attr) container_of(_attr, struct device_attribute, attr)
+ #define to_dev(obj) container_of(obj, struct device, kobj)
+ #define to_dev_attr(_attr) container_of(_attr, struct device_attribute, attr)
-static ssize_t dev_attr_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- struct device_attribute *dev_attr = to_dev_attr(attr);
- struct device *dev = to_dev(kobj);
- ssize_t ret = -EIO;
+ static ssize_t dev_attr_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
+ {
+ struct device_attribute *dev_attr = to_dev_attr(attr);
+ struct device *dev = to_dev(kobj);
+ ssize_t ret = -EIO;
- if (dev_attr->show)
- ret = dev_attr->show(dev, dev_attr, buf);
- if (ret >= (ssize_t)PAGE_SIZE) {
- printk("dev_attr_show: %pS returned bad count\n",
- dev_attr->show);
- }
- return ret;
-}
+ if (dev_attr->show)
+ ret = dev_attr->show(dev, dev_attr, buf);
+ if (ret >= (ssize_t)PAGE_SIZE) {
+ printk("dev_attr_show: %pS returned bad count\n",
+ dev_attr->show);
+ }
+ return ret;
+ }
@@ -188,11 +198,11 @@ Reading/Writing Attribute Data
To read or write attributes, show() or store() methods must be
specified when declaring the attribute. The method types should be as
-simple as those defined for device attributes:
+simple as those defined for device attributes::
-ssize_t (*show)(struct device *dev, struct device_attribute *attr, char *buf);
-ssize_t (*store)(struct device *dev, struct device_attribute *attr,
- const char *buf, size_t count);
+ ssize_t (*show)(struct device *dev, struct device_attribute *attr, char *buf);
+ ssize_t (*store)(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count);
IOW, they should take only an object, an attribute, and a buffer as parameters.
@@ -200,11 +210,11 @@ IOW, they should take only an object, an attribute, and a buffer as parameters.
sysfs allocates a buffer of size (PAGE_SIZE) and passes it to the
method. Sysfs will call the method exactly once for each read or
write. This forces the following behavior on the method
-implementations:
+implementations:
-- On read(2), the show() method should fill the entire buffer.
+- On read(2), the show() method should fill the entire buffer.
Recall that an attribute should only be exporting one value, or an
- array of similar values, so this shouldn't be that expensive.
+ array of similar values, so this shouldn't be that expensive.
This allows userspace to do partial reads and forward seeks
arbitrarily over the entire file at will. If userspace seeks back to
@@ -218,10 +228,10 @@ implementations:
When writing sysfs files, userspace processes should first read the
entire file, modify the values it wishes to change, then write the
- entire buffer back.
+ entire buffer back.
Attribute method implementations should operate on an identical
- buffer when reading and writing values.
+ buffer when reading and writing values.
Other notes:
@@ -229,7 +239,7 @@ Other notes:
file position.
- The buffer will always be PAGE_SIZE bytes in length. On i386, this
- is 4096.
+ is 4096.
- show() methods should return the number of bytes printed into the
buffer. This is the return value of scnprintf().
@@ -246,31 +256,31 @@ Other notes:
through, be sure to return an error.
- The object passed to the methods will be pinned in memory via sysfs
- referencing counting its embedded object. However, the physical
- entity (e.g. device) the object represents may not be present. Be
- sure to have a way to check this, if necessary.
+ referencing counting its embedded object. However, the physical
+ entity (e.g. device) the object represents may not be present. Be
+ sure to have a way to check this, if necessary.
-A very simple (and naive) implementation of a device attribute is:
+A very simple (and naive) implementation of a device attribute is::
-static ssize_t show_name(struct device *dev, struct device_attribute *attr,
- char *buf)
-{
- return scnprintf(buf, PAGE_SIZE, "%s\n", dev->name);
-}
+ static ssize_t show_name(struct device *dev, struct device_attribute *attr,
+ char *buf)
+ {
+ return scnprintf(buf, PAGE_SIZE, "%s\n", dev->name);
+ }
-static ssize_t store_name(struct device *dev, struct device_attribute *attr,
- const char *buf, size_t count)
-{
- snprintf(dev->name, sizeof(dev->name), "%.*s",
- (int)min(count, sizeof(dev->name) - 1), buf);
- return count;
-}
+ static ssize_t store_name(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+ {
+ snprintf(dev->name, sizeof(dev->name), "%.*s",
+ (int)min(count, sizeof(dev->name) - 1), buf);
+ return count;
+ }
-static DEVICE_ATTR(name, S_IRUGO, show_name, store_name);
+ static DEVICE_ATTR(name, S_IRUGO, show_name, store_name);
-(Note that the real implementation doesn't allow userspace to set the
+(Note that the real implementation doesn't allow userspace to set the
name for a device.)
@@ -278,25 +288,25 @@ Top Level Directory Layout
~~~~~~~~~~~~~~~~~~~~~~~~~~
The sysfs directory arrangement exposes the relationship of kernel
-data structures.
+data structures.
-The top level sysfs directory looks like:
+The top level sysfs directory looks like::
-block/
-bus/
-class/
-dev/
-devices/
-firmware/
-net/
-fs/
+ block/
+ bus/
+ class/
+ dev/
+ devices/
+ firmware/
+ net/
+ fs/
devices/ contains a filesystem representation of the device tree. It maps
directly to the internal kernel device tree, which is a hierarchy of
-struct device.
+struct device.
bus/ contains flat directory layout of the various bus types in the
-kernel. Each bus's directory contains two subdirectories:
+kernel. Each bus's directory contains two subdirectories::
devices/
drivers/
@@ -331,71 +341,71 @@ Current Interfaces
The following interface layers currently exist in sysfs:
-- devices (include/linux/device.h)
-----------------------------------
-Structure:
+devices (include/linux/device.h)
+--------------------------------
+Structure::
-struct device_attribute {
- struct attribute attr;
- ssize_t (*show)(struct device *dev, struct device_attribute *attr,
- char *buf);
- ssize_t (*store)(struct device *dev, struct device_attribute *attr,
- const char *buf, size_t count);
-};
+ struct device_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct device *dev, struct device_attribute *attr,
+ char *buf);
+ ssize_t (*store)(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count);
+ };
-Declaring:
+Declaring::
-DEVICE_ATTR(_name, _mode, _show, _store);
+ DEVICE_ATTR(_name, _mode, _show, _store);
-Creation/Removal:
+Creation/Removal::
-int device_create_file(struct device *dev, const struct device_attribute * attr);
-void device_remove_file(struct device *dev, const struct device_attribute * attr);
+ int device_create_file(struct device *dev, const struct device_attribute * attr);
+ void device_remove_file(struct device *dev, const struct device_attribute * attr);
-- bus drivers (include/linux/device.h)
---------------------------------------
-Structure:
+bus drivers (include/linux/device.h)
+------------------------------------
+Structure::
-struct bus_attribute {
- struct attribute attr;
- ssize_t (*show)(struct bus_type *, char * buf);
- ssize_t (*store)(struct bus_type *, const char * buf, size_t count);
-};
+ struct bus_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct bus_type *, char * buf);
+ ssize_t (*store)(struct bus_type *, const char * buf, size_t count);
+ };
-Declaring:
+Declaring::
-static BUS_ATTR_RW(name);
-static BUS_ATTR_RO(name);
-static BUS_ATTR_WO(name);
+ static BUS_ATTR_RW(name);
+ static BUS_ATTR_RO(name);
+ static BUS_ATTR_WO(name);
-Creation/Removal:
+Creation/Removal::
-int bus_create_file(struct bus_type *, struct bus_attribute *);
-void bus_remove_file(struct bus_type *, struct bus_attribute *);
+ int bus_create_file(struct bus_type *, struct bus_attribute *);
+ void bus_remove_file(struct bus_type *, struct bus_attribute *);
-- device drivers (include/linux/device.h)
------------------------------------------
+device drivers (include/linux/device.h)
+---------------------------------------
-Structure:
+Structure::
-struct driver_attribute {
- struct attribute attr;
- ssize_t (*show)(struct device_driver *, char * buf);
- ssize_t (*store)(struct device_driver *, const char * buf,
- size_t count);
-};
+ struct driver_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct device_driver *, char * buf);
+ ssize_t (*store)(struct device_driver *, const char * buf,
+ size_t count);
+ };
-Declaring:
+Declaring::
-DRIVER_ATTR_RO(_name)
-DRIVER_ATTR_RW(_name)
+ DRIVER_ATTR_RO(_name)
+ DRIVER_ATTR_RW(_name)
-Creation/Removal:
+Creation/Removal::
-int driver_create_file(struct device_driver *, const struct driver_attribute *);
-void driver_remove_file(struct device_driver *, const struct driver_attribute *);
+ int driver_create_file(struct device_driver *, const struct driver_attribute *);
+ void driver_remove_file(struct device_driver *, const struct driver_attribute *);
Documentation
diff --git a/Documentation/filesystems/sysv-fs.txt b/Documentation/filesystems/sysv-fs.rst
index 253b50d1328e..89e40911ad7c 100644
--- a/Documentation/filesystems/sysv-fs.txt
+++ b/Documentation/filesystems/sysv-fs.rst
@@ -1,25 +1,40 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==================
+SystemV Filesystem
+==================
+
It implements all of
- Xenix FS,
- SystemV/386 FS,
- Coherent FS.
To install:
+
* Answer the 'System V and Coherent filesystem support' question with 'y'
when configuring the kernel.
-* To mount a disk or a partition, use
+* To mount a disk or a partition, use::
+
mount [-r] -t sysv device mountpoint
- The file system type names
+
+ The file system type names::
+
-t sysv
-t xenix
-t coherent
+
may be used interchangeably, but the last two will eventually disappear.
Bugs in the present implementation:
+
- Coherent FS:
+
- The "free list interleave" n:m is currently ignored.
- Only file systems with no filesystem name and no pack name are recognized.
- (See Coherent "man mkfs" for a description of these features.)
+ (See Coherent "man mkfs" for a description of these features.)
+
- SystemV Release 2 FS:
+
The superblock is only searched in the blocks 9, 15, 18, which
corresponds to the beginning of track 1 on floppy disks. No support
for this FS on hard disk yet.
@@ -28,12 +43,14 @@ Bugs in the present implementation:
These filesystems are rather similar. Here is a comparison with Minix FS:
* Linux fdisk reports on partitions
+
- Minix FS 0x81 Linux/Minix
- Xenix FS ??
- SystemV FS ??
- Coherent FS 0x08 AIX bootable
* Size of a block or zone (data allocation unit on disk)
+
- Minix FS 1024
- Xenix FS 1024 (also 512 ??)
- SystemV FS 1024 (also 512 and 2048)
@@ -45,37 +62,51 @@ These filesystems are rather similar. Here is a comparison with Minix FS:
all the block numbers (including the super block) are offset by one track.
* Byte ordering of "short" (16 bit entities) on disk:
+
- Minix FS little endian 0 1
- Xenix FS little endian 0 1
- SystemV FS little endian 0 1
- Coherent FS little endian 0 1
+
Of course, this affects only the file system, not the data of files on it!
* Byte ordering of "long" (32 bit entities) on disk:
+
- Minix FS little endian 0 1 2 3
- Xenix FS little endian 0 1 2 3
- SystemV FS little endian 0 1 2 3
- Coherent FS PDP-11 2 3 0 1
+
Of course, this affects only the file system, not the data of files on it!
* Inode on disk: "short", 0 means non-existent, the root dir ino is:
- - Minix FS 1
- - Xenix FS, SystemV FS, Coherent FS 2
+
+ ================================= ==
+ Minix FS 1
+ Xenix FS, SystemV FS, Coherent FS 2
+ ================================= ==
* Maximum number of hard links to a file:
- - Minix FS 250
- - Xenix FS ??
- - SystemV FS ??
- - Coherent FS >=10000
+
+ =========== =========
+ Minix FS 250
+ Xenix FS ??
+ SystemV FS ??
+ Coherent FS >=10000
+ =========== =========
* Free inode management:
- - Minix FS a bitmap
+
+ - Minix FS
+ a bitmap
- Xenix FS, SystemV FS, Coherent FS
There is a cache of a certain number of free inodes in the super-block.
When it is exhausted, new free inodes are found using a linear search.
* Free block management:
- - Minix FS a bitmap
+
+ - Minix FS
+ a bitmap
- Xenix FS, SystemV FS, Coherent FS
Free blocks are organized in a "free list". Maybe a misleading term,
since it is not true that every free block contains a pointer to
@@ -86,13 +117,18 @@ These filesystems are rather similar. Here is a comparison with Minix FS:
0 on Xenix FS and SystemV FS, with a block zeroed out on Coherent FS.
* Super-block location:
- - Minix FS block 1 = bytes 1024..2047
- - Xenix FS block 1 = bytes 1024..2047
- - SystemV FS bytes 512..1023
- - Coherent FS block 1 = bytes 512..1023
+
+ =========== ==========================
+ Minix FS block 1 = bytes 1024..2047
+ Xenix FS block 1 = bytes 1024..2047
+ SystemV FS bytes 512..1023
+ Coherent FS block 1 = bytes 512..1023
+ =========== ==========================
* Super-block layout:
- - Minix FS
+
+ - Minix FS::
+
unsigned short s_ninodes;
unsigned short s_nzones;
unsigned short s_imap_blocks;
@@ -101,7 +137,9 @@ These filesystems are rather similar. Here is a comparison with Minix FS:
unsigned short s_log_zone_size;
unsigned long s_max_size;
unsigned short s_magic;
- - Xenix FS, SystemV FS, Coherent FS
+
+ - Xenix FS, SystemV FS, Coherent FS::
+
unsigned short s_firstdatazone;
unsigned long s_nzones;
unsigned short s_fzone_count;
@@ -120,23 +158,33 @@ These filesystems are rather similar. Here is a comparison with Minix FS:
unsigned short s_interleave_m,s_interleave_n; -- Coherent FS only
char s_fname[6];
char s_fpack[6];
+
then they differ considerably:
- Xenix FS
+
+ Xenix FS::
+
char s_clean;
char s_fill[371];
long s_magic;
long s_type;
- SystemV FS
+
+ SystemV FS::
+
long s_fill[12 or 14];
long s_state;
long s_magic;
long s_type;
- Coherent FS
+
+ Coherent FS::
+
unsigned long s_unique;
+
Note that Coherent FS has no magic.
* Inode layout:
- - Minix FS
+
+ - Minix FS::
+
unsigned short i_mode;
unsigned short i_uid;
unsigned long i_size;
@@ -144,7 +192,9 @@ These filesystems are rather similar. Here is a comparison with Minix FS:
unsigned char i_gid;
unsigned char i_nlinks;
unsigned short i_zone[7+1+1];
- - Xenix FS, SystemV FS, Coherent FS
+
+ - Xenix FS, SystemV FS, Coherent FS::
+
unsigned short i_mode;
unsigned short i_nlink;
unsigned short i_uid;
@@ -155,38 +205,55 @@ These filesystems are rather similar. Here is a comparison with Minix FS:
unsigned long i_mtime;
unsigned long i_ctime;
+
* Regular file data blocks are organized as
- - Minix FS
- 7 direct blocks
- 1 indirect block (pointers to blocks)
- 1 double-indirect block (pointer to pointers to blocks)
- - Xenix FS, SystemV FS, Coherent FS
- 10 direct blocks
- 1 indirect block (pointers to blocks)
- 1 double-indirect block (pointer to pointers to blocks)
- 1 triple-indirect block (pointer to pointers to pointers to blocks)
-* Inode size, inodes per block
- - Minix FS 32 32
- - Xenix FS 64 16
- - SystemV FS 64 16
- - Coherent FS 64 8
+ - Minix FS:
+
+ - 7 direct blocks
+ - 1 indirect block (pointers to blocks)
+ - 1 double-indirect block (pointer to pointers to blocks)
+
+ - Xenix FS, SystemV FS, Coherent FS:
+
+ - 10 direct blocks
+ - 1 indirect block (pointers to blocks)
+ - 1 double-indirect block (pointer to pointers to blocks)
+ - 1 triple-indirect block (pointer to pointers to pointers to blocks)
+
+
+ =========== ========== ================
+ Inode size inodes per block
+ =========== ========== ================
+ Minix FS 32 32
+ Xenix FS 64 16
+ SystemV FS 64 16
+ Coherent FS 64 8
+ =========== ========== ================
* Directory entry on disk
- - Minix FS
+
+ - Minix FS::
+
unsigned short inode;
char name[14/30];
- - Xenix FS, SystemV FS, Coherent FS
+
+ - Xenix FS, SystemV FS, Coherent FS::
+
unsigned short inode;
char name[14];
-* Dir entry size, dir entries per block
- - Minix FS 16/32 64/32
- - Xenix FS 16 64
- - SystemV FS 16 64
- - Coherent FS 16 32
+ =========== ============== =====================
+ Dir entry size dir entries per block
+ =========== ============== =====================
+ Minix FS 16/32 64/32
+ Xenix FS 16 64
+ SystemV FS 16 64
+ Coherent FS 16 32
+ =========== ============== =====================
* How to implement symbolic links such that the host fsck doesn't scream:
+
- Minix FS normal
- Xenix FS kludge: as regular files with chmod 1000
- SystemV FS ??
diff --git a/Documentation/filesystems/tmpfs.txt b/Documentation/filesystems/tmpfs.rst
index 5ecbc03e6b2f..4e95929301a5 100644
--- a/Documentation/filesystems/tmpfs.txt
+++ b/Documentation/filesystems/tmpfs.rst
@@ -1,3 +1,9 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====
+Tmpfs
+=====
+
Tmpfs is a file system which keeps all files in virtual memory.
@@ -14,7 +20,7 @@ If you compare it to ramfs (which was the template to create tmpfs)
you gain swapping and limit checking. Another similar thing is the RAM
disk (/dev/ram*), which simulates a fixed size hard disk in physical
RAM, where you have to create an ordinary filesystem on top. Ramdisks
-cannot swap and you do not have the possibility to resize them.
+cannot swap and you do not have the possibility to resize them.
Since tmpfs lives completely in the page cache and on swap, all tmpfs
pages will be shown as "Shmem" in /proc/meminfo and "Shared" in
@@ -26,7 +32,7 @@ tmpfs has the following uses:
1) There is always a kernel internal mount which you will not see at
all. This is used for shared anonymous mappings and SYSV shared
- memory.
+ memory.
This mount does not depend on CONFIG_TMPFS. If CONFIG_TMPFS is not
set, the user visible part of tmpfs is not build. But the internal
@@ -34,7 +40,7 @@ tmpfs has the following uses:
2) glibc 2.2 and above expects tmpfs to be mounted at /dev/shm for
POSIX shared memory (shm_open, shm_unlink). Adding the following
- line to /etc/fstab should take care of this:
+ line to /etc/fstab should take care of this::
tmpfs /dev/shm tmpfs defaults 0 0
@@ -56,15 +62,17 @@ tmpfs has the following uses:
tmpfs has three mount options for sizing:
-size: The limit of allocated bytes for this tmpfs instance. The
+========= ============================================================
+size The limit of allocated bytes for this tmpfs instance. The
default is half of your physical RAM without swap. If you
oversize your tmpfs instances the machine will deadlock
since the OOM handler will not be able to free that memory.
-nr_blocks: The same as size, but in blocks of PAGE_SIZE.
-nr_inodes: The maximum number of inodes for this instance. The default
+nr_blocks The same as size, but in blocks of PAGE_SIZE.
+nr_inodes The maximum number of inodes for this instance. The default
is half of the number of your physical RAM pages, or (on a
machine with highmem) the number of lowmem RAM pages,
whichever is the lower.
+========= ============================================================
These parameters accept a suffix k, m or g for kilo, mega and giga and
can be changed on remount. The size parameter also accepts a suffix %
@@ -82,6 +90,7 @@ tmpfs has a mount option to set the NUMA memory allocation policy for
all files in that instance (if CONFIG_NUMA is enabled) - which can be
adjusted on the fly via 'mount -o remount ...'
+======================== ==============================================
mpol=default use the process allocation policy
(see set_mempolicy(2))
mpol=prefer:Node prefers to allocate memory from the given Node
@@ -89,6 +98,7 @@ mpol=bind:NodeList allocates memory only from nodes in NodeList
mpol=interleave prefers to allocate from each node in turn
mpol=interleave:NodeList allocates from each node of NodeList in turn
mpol=local prefers to allocate memory from the local node
+======================== ==============================================
NodeList format is a comma-separated list of decimal numbers and ranges,
a range being two hyphen-separated decimal numbers, the smallest and
@@ -98,9 +108,9 @@ A memory policy with a valid NodeList will be saved, as specified, for
use at file creation time. When a task allocates a file in the file
system, the mount option memory policy will be applied with a NodeList,
if any, modified by the calling task's cpuset constraints
-[See Documentation/admin-guide/cgroup-v1/cpusets.rst] and any optional flags, listed
-below. If the resulting NodeLists is the empty set, the effective memory
-policy for the file will revert to "default" policy.
+[See Documentation/admin-guide/cgroup-v1/cpusets.rst] and any optional flags,
+listed below. If the resulting NodeLists is the empty set, the effective
+memory policy for the file will revert to "default" policy.
NUMA memory allocation policies have optional flags that can be used in
conjunction with their modes. These optional flags can be specified
@@ -109,6 +119,8 @@ See Documentation/admin-guide/mm/numa_memory_policy.rst for a list of
all available memory allocation policy mode flags and their effect on
memory policy.
+::
+
=static is equivalent to MPOL_F_STATIC_NODES
=relative is equivalent to MPOL_F_RELATIVE_NODES
@@ -128,9 +140,11 @@ on MountPoint, by 'mount -o remount,mpol=Policy:NodeList MountPoint'.
To specify the initial root directory you can use the following mount
options:
-mode: The permissions as an octal number
-uid: The user id
-gid: The group id
+==== ==================================
+mode The permissions as an octal number
+uid The user id
+gid The group id
+==== ==================================
These options do not have any effect on remount. You can change these
parameters with chmod(1), chown(1) and chgrp(1) on a mounted filesystem.
@@ -141,9 +155,9 @@ will give you tmpfs instance on /mytmpfs which can allocate 10GB
RAM/SWAP in 10240 inodes and it is only accessible by root.
-Author:
+:Author:
Christoph Rohland <cr@sap.com>, 1.12.01
-Updated:
+:Updated:
Hugh Dickins, 4 June 2007
-Updated:
+:Updated:
KOSAKI Motohiro, 16 Mar 2010
diff --git a/Documentation/filesystems/ubifs-authentication.rst b/Documentation/filesystems/ubifs-authentication.rst
index 6a9584f6ff46..16efd729bf7c 100644
--- a/Documentation/filesystems/ubifs-authentication.rst
+++ b/Documentation/filesystems/ubifs-authentication.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
:orphan:
.. UBIFS Authentication
@@ -92,11 +94,11 @@ UBIFS Index & Tree Node Cache
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Basic on-flash UBIFS entities are called *nodes*. UBIFS knows different types
-of nodes. Eg. data nodes (`struct ubifs_data_node`) which store chunks of file
-contents or inode nodes (`struct ubifs_ino_node`) which represent VFS inodes.
-Almost all types of nodes share a common header (`ubifs_ch`) containing basic
+of nodes. Eg. data nodes (``struct ubifs_data_node``) which store chunks of file
+contents or inode nodes (``struct ubifs_ino_node``) which represent VFS inodes.
+Almost all types of nodes share a common header (``ubifs_ch``) containing basic
information like node type, node length, a sequence number, etc. (see
-`fs/ubifs/ubifs-media.h`in kernel source). Exceptions are entries of the LPT
+``fs/ubifs/ubifs-media.h`` in kernel source). Exceptions are entries of the LPT
and some less important node types like padding nodes which are used to pad
unusable content at the end of LEBs.
diff --git a/Documentation/filesystems/ubifs.txt b/Documentation/filesystems/ubifs.rst
index acc80442a3bb..e6ee99762534 100644
--- a/Documentation/filesystems/ubifs.txt
+++ b/Documentation/filesystems/ubifs.rst
@@ -1,5 +1,11 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============
+UBI File System
+===============
+
Introduction
-=============
+============
UBIFS file-system stands for UBI File System. UBI stands for "Unsorted
Block Images". UBIFS is a flash file system, which means it is designed
@@ -79,6 +85,7 @@ Mount options
(*) == default.
+==================== =======================================================
bulk_read read more in one go to take advantage of flash
media that read faster sequentially
no_bulk_read (*) do not bulk-read
@@ -98,6 +105,7 @@ auth_key= specify the key used for authenticating the filesystem.
auth_hash_name= The hash algorithm used for authentication. Used for
both hashing and for creating HMACs. Typical values
include "sha256" or "sha512"
+==================== =======================================================
Quick usage instructions
@@ -107,12 +115,14 @@ The UBI volume to mount is specified using "ubiX_Y" or "ubiX:NAME" syntax,
where "X" is UBI device number, "Y" is UBI volume number, and "NAME" is
UBI volume name.
-Mount volume 0 on UBI device 0 to /mnt/ubifs:
-$ mount -t ubifs ubi0_0 /mnt/ubifs
+Mount volume 0 on UBI device 0 to /mnt/ubifs::
+
+ $ mount -t ubifs ubi0_0 /mnt/ubifs
Mount "rootfs" volume of UBI device 0 to /mnt/ubifs ("rootfs" is volume
-name):
-$ mount -t ubifs ubi0:rootfs /mnt/ubifs
+name)::
+
+ $ mount -t ubifs ubi0:rootfs /mnt/ubifs
The following is an example of the kernel boot arguments to attach mtd0
to UBI and mount volume "rootfs":
@@ -122,5 +132,6 @@ References
==========
UBIFS documentation and FAQ/HOWTO at the MTD web site:
-http://www.linux-mtd.infradead.org/doc/ubifs.html
-http://www.linux-mtd.infradead.org/faq/ubifs.html
+
+- http://www.linux-mtd.infradead.org/doc/ubifs.html
+- http://www.linux-mtd.infradead.org/faq/ubifs.html
diff --git a/Documentation/filesystems/udf.txt b/Documentation/filesystems/udf.rst
index e2f2faf32f18..d9badbf285b2 100644
--- a/Documentation/filesystems/udf.txt
+++ b/Documentation/filesystems/udf.rst
@@ -1,6 +1,8 @@
-*
-* Documentation/filesystems/udf.txt
-*
+.. SPDX-License-Identifier: GPL-2.0
+
+===============
+UDF file system
+===============
If you encounter problems with reading UDF discs using this driver,
please report them according to MAINTAINERS file.
@@ -18,8 +20,10 @@ performance due to very poor read-modify-write support supplied internally
by drive firmware.
-------------------------------------------------------------------------------
+
The following mount options are supported:
+ =========== ======================================
gid= Set the default group.
umask= Set the default umask.
mode= Set the default file permissions.
@@ -34,6 +38,7 @@ The following mount options are supported:
longad Use long ad's (default)
nostrict Unset strict conformance
iocharset= Set the NLS character set
+ =========== ======================================
The uid= and gid= options need a bit more explaining. They will accept a
decimal numeric value and all inodes on that mount will then appear as
@@ -47,13 +52,17 @@ the interactive user will always see the files on the disk as belonging to him.
The remaining are for debugging and disaster recovery:
- novrs Skip volume sequence recognition
+ ===== ================================
+ novrs Skip volume sequence recognition
+ ===== ================================
The following expect a offset from 0.
+ ========== =================================================
session= Set the CDROM session (default= last session)
anchor= Override standard anchor location. (default= 256)
lastblock= Set the last block of the filesystem/
+ ========== =================================================
-------------------------------------------------------------------------------
@@ -62,5 +71,5 @@ For the latest version and toolset see:
https://github.com/pali/udftools
Documentation on UDF and ECMA 167 is available FREE from:
- http://www.osta.org/
- http://www.ecma-international.org/
+ - http://www.osta.org/
+ - http://www.ecma-international.org/
diff --git a/Documentation/filesystems/virtiofs.rst b/Documentation/filesystems/virtiofs.rst
index 4f338e3cb3f7..e06e4951cb39 100644
--- a/Documentation/filesystems/virtiofs.rst
+++ b/Documentation/filesystems/virtiofs.rst
@@ -1,5 +1,7 @@
.. SPDX-License-Identifier: GPL-2.0
+.. _virtiofs_index:
+
===================================================
virtiofs: virtio-fs host<->guest shared file system
===================================================
diff --git a/Documentation/filesystems/zonefs.txt b/Documentation/filesystems/zonefs.rst
index d54fa98ac158..71d845c6a700 100644
--- a/Documentation/filesystems/zonefs.txt
+++ b/Documentation/filesystems/zonefs.rst
@@ -1,4 +1,8 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+================================================
ZoneFS - Zone filesystem for Zoned block devices
+================================================
Introduction
============
@@ -29,6 +33,7 @@ Zoned block devices
Zoned storage devices belong to a class of storage devices with an address
space that is divided into zones. A zone is a group of consecutive LBAs and all
zones are contiguous (there are no LBA gaps). Zones may have different types.
+
* Conventional zones: there are no access constraints to LBAs belonging to
conventional zones. Any read or write access can be executed, similarly to a
regular block device.
@@ -158,6 +163,7 @@ Format options
--------------
Several optional features of zonefs can be enabled at format time.
+
* Conventional zone aggregation: ranges of contiguous conventional zones can be
aggregated into a single larger file instead of the default one file per zone.
* File ownership: The owner UID and GID of zone files is by default 0 (root)
@@ -249,7 +255,7 @@ permissions.
Further action taken by zonefs I/O error recovery can be controlled by the user
with the "errors=xxx" mount option. The table below summarizes the result of
zonefs I/O error processing depending on the mount option and on the zone
-conditions.
+conditions::
+--------------+-----------+-----------------------------------------+
| | | Post error state |
@@ -258,11 +264,11 @@ conditions.
| option | condition | size read write read write |
+--------------+-----------+-----------------------------------------+
| | good | fixed yes no yes yes |
- | remount-ro | read-only | fixed yes no yes no |
+ | remount-ro | read-only | as is yes no yes no |
| (default) | offline | 0 no no no no |
+--------------+-----------+-----------------------------------------+
| | good | fixed yes no yes yes |
- | zone-ro | read-only | fixed yes no yes no |
+ | zone-ro | read-only | as is yes no yes no |
| | offline | 0 no no no no |
+--------------+-----------+-----------------------------------------+
| | good | 0 no no yes yes |
@@ -270,11 +276,12 @@ conditions.
| | offline | 0 no no no no |
+--------------+-----------+-----------------------------------------+
| | good | fixed yes yes yes yes |
- | repair | read-only | fixed yes no yes no |
+ | repair | read-only | as is yes no yes no |
| | offline | 0 no no no no |
+--------------+-----------+-----------------------------------------+
Further notes:
+
* The "errors=remount-ro" mount option is the default behavior of zonefs I/O
error processing if no errors mount option is specified.
* With the "errors=remount-ro" mount option, the change of the file access
@@ -302,13 +309,22 @@ Mount options
zonefs define the "errors=<behavior>" mount option to allow the user to specify
zonefs behavior in response to I/O errors, inode size inconsistencies or zone
condition changes. The defined behaviors are as follow:
+
* remount-ro (default)
* zone-ro
* zone-offline
* repair
-The I/O error actions defined for each behavior are detailed in the previous
-section.
+The run-time I/O error actions defined for each behavior are detailed in the
+previous section. Mount time I/O errors will cause the mount operation to fail.
+The handling of read-only zones also differs between mount-time and run-time.
+If a read-only zone is found at mount time, the zone is always treated in the
+same manner as offline zones, that is, all accesses are disabled and the zone
+file size set to 0. This is necessary as the write pointer of read-only zones
+is defined as invalib by the ZBC and ZAC standards, making it impossible to
+discover the amount of data that has been written to the zone. In the case of a
+read-only zone discovered at run-time, as indicated in the previous section.
+the size of the zone file is left unchanged from its last updated value.
Zonefs User Space Tools
=======================
@@ -325,78 +341,78 @@ Examples
--------
The following formats a 15TB host-managed SMR HDD with 256 MB zones
-with the conventional zones aggregation feature enabled.
+with the conventional zones aggregation feature enabled::
-# mkzonefs -o aggr_cnv /dev/sdX
-# mount -t zonefs /dev/sdX /mnt
-# ls -l /mnt/
-total 0
-dr-xr-xr-x 2 root root 1 Nov 25 13:23 cnv
-dr-xr-xr-x 2 root root 55356 Nov 25 13:23 seq
+ # mkzonefs -o aggr_cnv /dev/sdX
+ # mount -t zonefs /dev/sdX /mnt
+ # ls -l /mnt/
+ total 0
+ dr-xr-xr-x 2 root root 1 Nov 25 13:23 cnv
+ dr-xr-xr-x 2 root root 55356 Nov 25 13:23 seq
The size of the zone files sub-directories indicate the number of files
existing for each type of zones. In this example, there is only one
conventional zone file (all conventional zones are aggregated under a single
-file).
+file)::
-# ls -l /mnt/cnv
-total 137101312
--rw-r----- 1 root root 140391743488 Nov 25 13:23 0
+ # ls -l /mnt/cnv
+ total 137101312
+ -rw-r----- 1 root root 140391743488 Nov 25 13:23 0
-This aggregated conventional zone file can be used as a regular file.
+This aggregated conventional zone file can be used as a regular file::
-# mkfs.ext4 /mnt/cnv/0
-# mount -o loop /mnt/cnv/0 /data
+ # mkfs.ext4 /mnt/cnv/0
+ # mount -o loop /mnt/cnv/0 /data
The "seq" sub-directory grouping files for sequential write zones has in this
-example 55356 zones.
+example 55356 zones::
-# ls -lv /mnt/seq
-total 14511243264
--rw-r----- 1 root root 0 Nov 25 13:23 0
--rw-r----- 1 root root 0 Nov 25 13:23 1
--rw-r----- 1 root root 0 Nov 25 13:23 2
-...
--rw-r----- 1 root root 0 Nov 25 13:23 55354
--rw-r----- 1 root root 0 Nov 25 13:23 55355
+ # ls -lv /mnt/seq
+ total 14511243264
+ -rw-r----- 1 root root 0 Nov 25 13:23 0
+ -rw-r----- 1 root root 0 Nov 25 13:23 1
+ -rw-r----- 1 root root 0 Nov 25 13:23 2
+ ...
+ -rw-r----- 1 root root 0 Nov 25 13:23 55354
+ -rw-r----- 1 root root 0 Nov 25 13:23 55355
For sequential write zone files, the file size changes as data is appended at
-the end of the file, similarly to any regular file system.
+the end of the file, similarly to any regular file system::
-# dd if=/dev/zero of=/mnt/seq/0 bs=4096 count=1 conv=notrunc oflag=direct
-1+0 records in
-1+0 records out
-4096 bytes (4.1 kB, 4.0 KiB) copied, 0.00044121 s, 9.3 MB/s
+ # dd if=/dev/zero of=/mnt/seq/0 bs=4096 count=1 conv=notrunc oflag=direct
+ 1+0 records in
+ 1+0 records out
+ 4096 bytes (4.1 kB, 4.0 KiB) copied, 0.00044121 s, 9.3 MB/s
-# ls -l /mnt/seq/0
--rw-r----- 1 root root 4096 Nov 25 13:23 /mnt/seq/0
+ # ls -l /mnt/seq/0
+ -rw-r----- 1 root root 4096 Nov 25 13:23 /mnt/seq/0
The written file can be truncated to the zone size, preventing any further
-write operation.
+write operation::
-# truncate -s 268435456 /mnt/seq/0
-# ls -l /mnt/seq/0
--rw-r----- 1 root root 268435456 Nov 25 13:49 /mnt/seq/0
+ # truncate -s 268435456 /mnt/seq/0
+ # ls -l /mnt/seq/0
+ -rw-r----- 1 root root 268435456 Nov 25 13:49 /mnt/seq/0
Truncation to 0 size allows freeing the file zone storage space and restart
-append-writes to the file.
+append-writes to the file::
-# truncate -s 0 /mnt/seq/0
-# ls -l /mnt/seq/0
--rw-r----- 1 root root 0 Nov 25 13:49 /mnt/seq/0
+ # truncate -s 0 /mnt/seq/0
+ # ls -l /mnt/seq/0
+ -rw-r----- 1 root root 0 Nov 25 13:49 /mnt/seq/0
Since files are statically mapped to zones on the disk, the number of blocks of
-a file as reported by stat() and fstat() indicates the size of the file zone.
-
-# stat /mnt/seq/0
- File: /mnt/seq/0
- Size: 0 Blocks: 524288 IO Block: 4096 regular empty file
-Device: 870h/2160d Inode: 50431 Links: 1
-Access: (0640/-rw-r-----) Uid: ( 0/ root) Gid: ( 0/ root)
-Access: 2019-11-25 13:23:57.048971997 +0900
-Modify: 2019-11-25 13:52:25.553805765 +0900
-Change: 2019-11-25 13:52:25.553805765 +0900
- Birth: -
+a file as reported by stat() and fstat() indicates the size of the file zone::
+
+ # stat /mnt/seq/0
+ File: /mnt/seq/0
+ Size: 0 Blocks: 524288 IO Block: 4096 regular empty file
+ Device: 870h/2160d Inode: 50431 Links: 1
+ Access: (0640/-rw-r-----) Uid: ( 0/ root) Gid: ( 0/ root)
+ Access: 2019-11-25 13:23:57.048971997 +0900
+ Modify: 2019-11-25 13:52:25.553805765 +0900
+ Change: 2019-11-25 13:52:25.553805765 +0900
+ Birth: -
The number of blocks of the file ("Blocks") in units of 512B blocks gives the
maximum file size of 524288 * 512 B = 256 MB, corresponding to the device zone
diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst
index e539c42a3e78..cc74e24ca3b5 100644
--- a/Documentation/gpu/i915.rst
+++ b/Documentation/gpu/i915.rst
@@ -207,10 +207,10 @@ DPIO
CSR firmware support for DMC
----------------------------
-.. kernel-doc:: drivers/gpu/drm/i915/intel_csr.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_csr.c
:doc: csr support for dmc
-.. kernel-doc:: drivers/gpu/drm/i915/intel_csr.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_csr.c
:internal:
Video BIOS Table (VBT)
diff --git a/Documentation/index.rst b/Documentation/index.rst
index e99d0bd2589d..6fdad61ee443 100644
--- a/Documentation/index.rst
+++ b/Documentation/index.rst
@@ -131,7 +131,6 @@ needed).
usb/index
PCI/index
misc-devices/index
- mic/index
scheduler/index
Architecture-agnostic documentation
diff --git a/Documentation/core-api/gcc-plugins.rst b/Documentation/kbuild/gcc-plugins.rst
index 8502f24396fb..4b1c10f88e30 100644
--- a/Documentation/core-api/gcc-plugins.rst
+++ b/Documentation/kbuild/gcc-plugins.rst
@@ -72,6 +72,10 @@ e.g., on Ubuntu for gcc-4.9::
apt-get install gcc-4.9-plugin-dev
+Or on Fedora::
+
+ dnf install gcc-plugin-devel
+
Enable a GCC plugin based feature in the kernel config::
CONFIG_GCC_PLUGIN_CYC_COMPLEXITY = y
diff --git a/Documentation/kbuild/index.rst b/Documentation/kbuild/index.rst
index 0f144fad99a6..82daf2efcb73 100644
--- a/Documentation/kbuild/index.rst
+++ b/Documentation/kbuild/index.rst
@@ -19,6 +19,7 @@ Kernel Build System
issues
reproducible-builds
+ gcc-plugins
.. only:: subproject and html
diff --git a/Documentation/kbuild/kbuild.rst b/Documentation/kbuild/kbuild.rst
index f1e5dce86af7..510f38d7e78a 100644
--- a/Documentation/kbuild/kbuild.rst
+++ b/Documentation/kbuild/kbuild.rst
@@ -237,7 +237,7 @@ This is solely useful to speed up test compiles.
KBUILD_EXTRA_SYMBOLS
--------------------
For modules that use symbols from other modules.
-See more details in modules.txt.
+See more details in modules.rst.
ALLSOURCE_ARCHS
---------------
diff --git a/Documentation/kbuild/kconfig-macro-language.rst b/Documentation/kbuild/kconfig-macro-language.rst
index 35b3263b7e40..8b413ef9603d 100644
--- a/Documentation/kbuild/kconfig-macro-language.rst
+++ b/Documentation/kbuild/kconfig-macro-language.rst
@@ -44,7 +44,7 @@ intermediate::
def_bool y
Then, Kconfig moves onto the evaluation stage to resolve inter-symbol
-dependency as explained in kconfig-language.txt.
+dependency as explained in kconfig-language.rst.
Variables
diff --git a/Documentation/kbuild/makefiles.rst b/Documentation/kbuild/makefiles.rst
index 6bc126a14b3d..04d5c01a2e99 100644
--- a/Documentation/kbuild/makefiles.rst
+++ b/Documentation/kbuild/makefiles.rst
@@ -924,7 +924,7 @@ When kbuild executes, the following steps are followed (roughly):
$(KBUILD_AFLAGS_MODULE) is used to add arch-specific options that
are used for assembler.
- From commandline AFLAGS_MODULE shall be used (see kbuild.txt).
+ From commandline AFLAGS_MODULE shall be used (see kbuild.rst).
KBUILD_CFLAGS_KERNEL
$(CC) options specific for built-in
@@ -937,7 +937,7 @@ When kbuild executes, the following steps are followed (roughly):
$(KBUILD_CFLAGS_MODULE) is used to add arch-specific options that
are used for $(CC).
- From commandline CFLAGS_MODULE shall be used (see kbuild.txt).
+ From commandline CFLAGS_MODULE shall be used (see kbuild.rst).
KBUILD_LDFLAGS_MODULE
Options for $(LD) when linking modules
@@ -945,7 +945,7 @@ When kbuild executes, the following steps are followed (roughly):
$(KBUILD_LDFLAGS_MODULE) is used to add arch-specific options
used when linking modules. This is often a linker script.
- From commandline LDFLAGS_MODULE shall be used (see kbuild.txt).
+ From commandline LDFLAGS_MODULE shall be used (see kbuild.rst).
KBUILD_LDS
diff --git a/Documentation/kbuild/modules.rst b/Documentation/kbuild/modules.rst
index 69fa48ee93d6..e0b45a257f21 100644
--- a/Documentation/kbuild/modules.rst
+++ b/Documentation/kbuild/modules.rst
@@ -470,9 +470,9 @@ build.
The syntax of the Module.symvers file is::
- <CRC> <Symbol> <Namespace> <Module> <Export Type>
+ <CRC> <Symbol> <Module> <Export Type> <Namespace>
- 0xe1cc2a05 usb_stor_suspend USB_STORAGE drivers/usb/storage/usb-storage EXPORT_SYMBOL_GPL
+ 0xe1cc2a05 usb_stor_suspend drivers/usb/storage/usb-storage EXPORT_SYMBOL_GPL USB_STORAGE
The fields are separated by tabs and values may be empty (e.g.
if no namespace is defined for an exported symbol).
diff --git a/Documentation/kernel-hacking/hacking.rst b/Documentation/kernel-hacking/hacking.rst
index d62aacb2822a..eed2136d847f 100644
--- a/Documentation/kernel-hacking/hacking.rst
+++ b/Documentation/kernel-hacking/hacking.rst
@@ -601,7 +601,7 @@ Defined in ``include/linux/export.h``
This is the variant of `EXPORT_SYMBOL()` that allows specifying a symbol
namespace. Symbol Namespaces are documented in
-``Documentation/core-api/symbol-namespaces.rst``.
+:doc:`../core-api/symbol-namespaces`
:c:func:`EXPORT_SYMBOL_NS_GPL()`
--------------------------------
@@ -610,7 +610,7 @@ Defined in ``include/linux/export.h``
This is the variant of `EXPORT_SYMBOL_GPL()` that allows specifying a symbol
namespace. Symbol Namespaces are documented in
-``Documentation/core-api/symbol-namespaces.rst``.
+:doc:`../core-api/symbol-namespaces`
Routines and Conventions
========================
diff --git a/Documentation/kernel-hacking/locking.rst b/Documentation/kernel-hacking/locking.rst
index a8518ac0d31d..6ed806e6061b 100644
--- a/Documentation/kernel-hacking/locking.rst
+++ b/Documentation/kernel-hacking/locking.rst
@@ -150,17 +150,17 @@ Locking Only In User Context
If you have a data structure which is only ever accessed from user
context, then you can use a simple mutex (``include/linux/mutex.h``) to
protect it. This is the most trivial case: you initialize the mutex.
-Then you can call :c:func:`mutex_lock_interruptible()` to grab the
-mutex, and :c:func:`mutex_unlock()` to release it. There is also a
-:c:func:`mutex_lock()`, which should be avoided, because it will
+Then you can call mutex_lock_interruptible() to grab the
+mutex, and mutex_unlock() to release it. There is also a
+mutex_lock(), which should be avoided, because it will
not return if a signal is received.
Example: ``net/netfilter/nf_sockopt.c`` allows registration of new
-:c:func:`setsockopt()` and :c:func:`getsockopt()` calls, with
-:c:func:`nf_register_sockopt()`. Registration and de-registration
+setsockopt() and getsockopt() calls, with
+nf_register_sockopt(). Registration and de-registration
are only done on module load and unload (and boot time, where there is
no concurrency), and the list of registrations is only consulted for an
-unknown :c:func:`setsockopt()` or :c:func:`getsockopt()` system
+unknown setsockopt() or getsockopt() system
call. The ``nf_sockopt_mutex`` is perfect to protect this, especially
since the setsockopt and getsockopt calls may well sleep.
@@ -170,19 +170,19 @@ Locking Between User Context and Softirqs
If a softirq shares data with user context, you have two problems.
Firstly, the current user context can be interrupted by a softirq, and
secondly, the critical region could be entered from another CPU. This is
-where :c:func:`spin_lock_bh()` (``include/linux/spinlock.h``) is
+where spin_lock_bh() (``include/linux/spinlock.h``) is
used. It disables softirqs on that CPU, then grabs the lock.
-:c:func:`spin_unlock_bh()` does the reverse. (The '_bh' suffix is
+spin_unlock_bh() does the reverse. (The '_bh' suffix is
a historical reference to "Bottom Halves", the old name for software
interrupts. It should really be called spin_lock_softirq()' in a
perfect world).
-Note that you can also use :c:func:`spin_lock_irq()` or
-:c:func:`spin_lock_irqsave()` here, which stop hardware interrupts
+Note that you can also use spin_lock_irq() or
+spin_lock_irqsave() here, which stop hardware interrupts
as well: see `Hard IRQ Context <#hard-irq-context>`__.
This works perfectly for UP as well: the spin lock vanishes, and this
-macro simply becomes :c:func:`local_bh_disable()`
+macro simply becomes local_bh_disable()
(``include/linux/interrupt.h``), which protects you from the softirq
being run.
@@ -216,8 +216,8 @@ Different Tasklets/Timers
~~~~~~~~~~~~~~~~~~~~~~~~~
If another tasklet/timer wants to share data with your tasklet or timer
-, you will both need to use :c:func:`spin_lock()` and
-:c:func:`spin_unlock()` calls. :c:func:`spin_lock_bh()` is
+, you will both need to use spin_lock() and
+spin_unlock() calls. spin_lock_bh() is
unnecessary here, as you are already in a tasklet, and none will be run
on the same CPU.
@@ -234,14 +234,14 @@ The same softirq can run on the other CPUs: you can use a per-CPU array
going so far as to use a softirq, you probably care about scalable
performance enough to justify the extra complexity.
-You'll need to use :c:func:`spin_lock()` and
-:c:func:`spin_unlock()` for shared data.
+You'll need to use spin_lock() and
+spin_unlock() for shared data.
Different Softirqs
~~~~~~~~~~~~~~~~~~
-You'll need to use :c:func:`spin_lock()` and
-:c:func:`spin_unlock()` for shared data, whether it be a timer,
+You'll need to use spin_lock() and
+spin_unlock() for shared data, whether it be a timer,
tasklet, different softirq or the same or another softirq: any of them
could be running on a different CPU.
@@ -259,38 +259,38 @@ If a hardware irq handler shares data with a softirq, you have two
concerns. Firstly, the softirq processing can be interrupted by a
hardware interrupt, and secondly, the critical region could be entered
by a hardware interrupt on another CPU. This is where
-:c:func:`spin_lock_irq()` is used. It is defined to disable
+spin_lock_irq() is used. It is defined to disable
interrupts on that cpu, then grab the lock.
-:c:func:`spin_unlock_irq()` does the reverse.
+spin_unlock_irq() does the reverse.
-The irq handler does not to use :c:func:`spin_lock_irq()`, because
+The irq handler does not need to use spin_lock_irq(), because
the softirq cannot run while the irq handler is running: it can use
-:c:func:`spin_lock()`, which is slightly faster. The only exception
+spin_lock(), which is slightly faster. The only exception
would be if a different hardware irq handler uses the same lock:
-:c:func:`spin_lock_irq()` will stop that from interrupting us.
+spin_lock_irq() will stop that from interrupting us.
This works perfectly for UP as well: the spin lock vanishes, and this
-macro simply becomes :c:func:`local_irq_disable()`
+macro simply becomes local_irq_disable()
(``include/asm/smp.h``), which protects you from the softirq/tasklet/BH
being run.
-:c:func:`spin_lock_irqsave()` (``include/linux/spinlock.h``) is a
+spin_lock_irqsave() (``include/linux/spinlock.h``) is a
variant which saves whether interrupts were on or off in a flags word,
-which is passed to :c:func:`spin_unlock_irqrestore()`. This means
+which is passed to spin_unlock_irqrestore(). This means
that the same code can be used inside an hard irq handler (where
interrupts are already off) and in softirqs (where the irq disabling is
required).
Note that softirqs (and hence tasklets and timers) are run on return
-from hardware interrupts, so :c:func:`spin_lock_irq()` also stops
-these. In that sense, :c:func:`spin_lock_irqsave()` is the most
+from hardware interrupts, so spin_lock_irq() also stops
+these. In that sense, spin_lock_irqsave() is the most
general and powerful locking function.
Locking Between Two Hard IRQ Handlers
-------------------------------------
It is rare to have to share data between two IRQ handlers, but if you
-do, :c:func:`spin_lock_irqsave()` should be used: it is
+do, spin_lock_irqsave() should be used: it is
architecture-specific whether all interrupts are disabled inside irq
handlers themselves.
@@ -304,11 +304,11 @@ Pete Zaitcev gives the following summary:
(``copy_from_user*(`` or ``kmalloc(x,GFP_KERNEL)``).
- Otherwise (== data can be touched in an interrupt), use
- :c:func:`spin_lock_irqsave()` and
- :c:func:`spin_unlock_irqrestore()`.
+ spin_lock_irqsave() and
+ spin_unlock_irqrestore().
- Avoid holding spinlock for more than 5 lines of code and across any
- function call (except accessors like :c:func:`readb()`).
+ function call (except accessors like readb()).
Table of Minimum Requirements
-----------------------------
@@ -320,7 +320,7 @@ particular thread can only run on one CPU at a time, but if it needs
shares data with another thread, locking is required).
Remember the advice above: you can always use
-:c:func:`spin_lock_irqsave()`, which is a superset of all other
+spin_lock_irqsave(), which is a superset of all other
spinlock primitives.
============== ============= ============= ========= ========= ========= ========= ======= ======= ============== ==============
@@ -363,13 +363,13 @@ They can be used if you need no access to the data protected with the
lock when some other thread is holding the lock. You should acquire the
lock later if you then need access to the data protected with the lock.
-:c:func:`spin_trylock()` does not spin but returns non-zero if it
+spin_trylock() does not spin but returns non-zero if it
acquires the spinlock on the first try or 0 if not. This function can be
-used in all contexts like :c:func:`spin_lock()`: you must have
+used in all contexts like spin_lock(): you must have
disabled the contexts that might interrupt you and acquire the spin
lock.
-:c:func:`mutex_trylock()` does not suspend your task but returns
+mutex_trylock() does not suspend your task but returns
non-zero if it could lock the mutex on the first try or 0 if not. This
function cannot be safely used in hardware or software interrupt
contexts despite not sleeping.
@@ -490,14 +490,14 @@ easy, since we copy the data for the user, and never let them access the
objects directly.
There is a slight (and common) optimization here: in
-:c:func:`cache_add()` we set up the fields of the object before
+cache_add() we set up the fields of the object before
grabbing the lock. This is safe, as no-one else can access it until we
put it in cache.
Accessing From Interrupt Context
--------------------------------
-Now consider the case where :c:func:`cache_find()` can be called
+Now consider the case where cache_find() can be called
from interrupt context: either a hardware interrupt or a softirq. An
example would be a timer which deletes object from the cache.
@@ -566,16 +566,16 @@ which are taken away, and the ``+`` are lines which are added.
return ret;
}
-Note that the :c:func:`spin_lock_irqsave()` will turn off
+Note that the spin_lock_irqsave() will turn off
interrupts if they are on, otherwise does nothing (if we are already in
an interrupt handler), hence these functions are safe to call from any
context.
-Unfortunately, :c:func:`cache_add()` calls :c:func:`kmalloc()`
+Unfortunately, cache_add() calls kmalloc()
with the ``GFP_KERNEL`` flag, which is only legal in user context. I
-have assumed that :c:func:`cache_add()` is still only called in
+have assumed that cache_add() is still only called in
user context, otherwise this should become a parameter to
-:c:func:`cache_add()`.
+cache_add().
Exposing Objects Outside This File
----------------------------------
@@ -592,7 +592,7 @@ This makes locking trickier, as it is no longer all in one place.
The second problem is the lifetime problem: if another structure keeps a
pointer to an object, it presumably expects that pointer to remain
valid. Unfortunately, this is only guaranteed while you hold the lock,
-otherwise someone might call :c:func:`cache_delete()` and even
+otherwise someone might call cache_delete() and even
worse, add another object, re-using the same address.
As there is only one lock, you can't hold it forever: no-one else would
@@ -693,8 +693,8 @@ Here is the code::
We encapsulate the reference counting in the standard 'get' and 'put'
functions. Now we can return the object itself from
-:c:func:`cache_find()` which has the advantage that the user can
-now sleep holding the object (eg. to :c:func:`copy_to_user()` to
+cache_find() which has the advantage that the user can
+now sleep holding the object (eg. to copy_to_user() to
name to userspace).
The other point to note is that I said a reference should be held for
@@ -710,7 +710,7 @@ number of atomic operations defined in ``include/asm/atomic.h``: these
are guaranteed to be seen atomically from all CPUs in the system, so no
lock is required. In this case, it is simpler than using spinlocks,
although for anything non-trivial using spinlocks is clearer. The
-:c:func:`atomic_inc()` and :c:func:`atomic_dec_and_test()`
+atomic_inc() and atomic_dec_and_test()
are used instead of the standard increment and decrement operators, and
the lock is no longer used to protect the reference count itself.
@@ -802,7 +802,7 @@ name to change, there are three possibilities:
- You can make ``cache_lock`` non-static, and tell people to grab that
lock before changing the name in any object.
-- You can provide a :c:func:`cache_obj_rename()` which grabs this
+- You can provide a cache_obj_rename() which grabs this
lock and changes the name for the caller, and tell everyone to use
that function.
@@ -861,11 +861,11 @@ Note that I decide that the popularity count should be protected by the
``cache_lock`` rather than the per-object lock: this is because it (like
the :c:type:`struct list_head <list_head>` inside the object)
is logically part of the infrastructure. This way, I don't need to grab
-the lock of every object in :c:func:`__cache_add()` when seeking
+the lock of every object in __cache_add() when seeking
the least popular.
I also decided that the id member is unchangeable, so I don't need to
-grab each object lock in :c:func:`__cache_find()` to examine the
+grab each object lock in __cache_find() to examine the
id: the object lock is only used by a caller who wants to read or write
the name field.
@@ -887,7 +887,7 @@ trivial to diagnose: not a
stay-up-five-nights-talk-to-fluffy-code-bunnies kind of problem.
For a slightly more complex case, imagine you have a region shared by a
-softirq and user context. If you use a :c:func:`spin_lock()` call
+softirq and user context. If you use a spin_lock() call
to protect it, it is possible that the user context will be interrupted
by the softirq while it holds the lock, and the softirq will then spin
forever trying to get the same lock.
@@ -985,12 +985,12 @@ you might do the following::
Sooner or later, this will crash on SMP, because a timer can have just
-gone off before the :c:func:`spin_lock_bh()`, and it will only get
-the lock after we :c:func:`spin_unlock_bh()`, and then try to free
+gone off before the spin_lock_bh(), and it will only get
+the lock after we spin_unlock_bh(), and then try to free
the element (which has already been freed!).
This can be avoided by checking the result of
-:c:func:`del_timer()`: if it returns 1, the timer has been deleted.
+del_timer(): if it returns 1, the timer has been deleted.
If 0, it means (in this case) that it is currently running, so we can
do::
@@ -1012,9 +1012,9 @@ do::
Another common problem is deleting timers which restart themselves (by
-calling :c:func:`add_timer()` at the end of their timer function).
+calling add_timer() at the end of their timer function).
Because this is a fairly common case which is prone to races, you should
-use :c:func:`del_timer_sync()` (``include/linux/timer.h``) to
+use del_timer_sync() (``include/linux/timer.h``) to
handle this case. It returns the number of times the timer had to be
deleted before we finally stopped it from adding itself back in.
@@ -1086,7 +1086,7 @@ adding ``new`` to a single linked list called ``list``::
list->next = new;
-The :c:func:`wmb()` is a write memory barrier. It ensures that the
+The wmb() is a write memory barrier. It ensures that the
first operation (setting the new element's ``next`` pointer) is complete
and will be seen by all CPUs, before the second operation is (putting
the new element into the list). This is important, since modern
@@ -1097,7 +1097,7 @@ rest of the list.
Fortunately, there is a function to do this for standard
:c:type:`struct list_head <list_head>` lists:
-:c:func:`list_add_rcu()` (``include/linux/list.h``).
+list_add_rcu() (``include/linux/list.h``).
Removing an element from the list is even simpler: we replace the
pointer to the old element with a pointer to its successor, and readers
@@ -1108,7 +1108,7 @@ will either see it, or skip over it.
list->next = old->next;
-There is :c:func:`list_del_rcu()` (``include/linux/list.h``) which
+There is list_del_rcu() (``include/linux/list.h``) which
does this (the normal version poisons the old object, which we don't
want).
@@ -1116,9 +1116,9 @@ The reader must also be careful: some CPUs can look through the ``next``
pointer to start reading the contents of the next element early, but
don't realize that the pre-fetched contents is wrong when the ``next``
pointer changes underneath them. Once again, there is a
-:c:func:`list_for_each_entry_rcu()` (``include/linux/list.h``)
+list_for_each_entry_rcu() (``include/linux/list.h``)
to help you. Of course, writers can just use
-:c:func:`list_for_each_entry()`, since there cannot be two
+list_for_each_entry(), since there cannot be two
simultaneous writers.
Our final dilemma is this: when can we actually destroy the removed
@@ -1127,14 +1127,14 @@ the list right now: if we free this element and the ``next`` pointer
changes, the reader will jump off into garbage and crash. We need to
wait until we know that all the readers who were traversing the list
when we deleted the element are finished. We use
-:c:func:`call_rcu()` to register a callback which will actually
+call_rcu() to register a callback which will actually
destroy the object once all pre-existing readers are finished.
-Alternatively, :c:func:`synchronize_rcu()` may be used to block
+Alternatively, synchronize_rcu() may be used to block
until all pre-existing are finished.
But how does Read Copy Update know when the readers are finished? The
method is this: firstly, the readers always traverse the list inside
-:c:func:`rcu_read_lock()`/:c:func:`rcu_read_unlock()` pairs:
+rcu_read_lock()/rcu_read_unlock() pairs:
these simply disable preemption so the reader won't go to sleep while
reading the list.
@@ -1223,12 +1223,12 @@ this is the fundamental idea.
}
Note that the reader will alter the popularity member in
-:c:func:`__cache_find()`, and now it doesn't hold a lock. One
+__cache_find(), and now it doesn't hold a lock. One
solution would be to make it an ``atomic_t``, but for this usage, we
don't really care about races: an approximate result is good enough, so
I didn't change it.
-The result is that :c:func:`cache_find()` requires no
+The result is that cache_find() requires no
synchronization with any other functions, so is almost as fast on SMP as
it would be on UP.
@@ -1240,9 +1240,9 @@ and put the reference count.
Now, because the 'read lock' in RCU is simply disabling preemption, a
caller which always has preemption disabled between calling
-:c:func:`cache_find()` and :c:func:`object_put()` does not
+cache_find() and object_put() does not
need to actually get and put the reference count: we could expose
-:c:func:`__cache_find()` by making it non-static, and such
+__cache_find() by making it non-static, and such
callers could simply call that.
The benefit here is that the reference count is not written to: the
@@ -1260,11 +1260,11 @@ counter. Nice and simple.
If that was too slow (it's usually not, but if you've got a really big
machine to test on and can show that it is), you could instead use a
counter for each CPU, then none of them need an exclusive lock. See
-:c:func:`DEFINE_PER_CPU()`, :c:func:`get_cpu_var()` and
-:c:func:`put_cpu_var()` (``include/linux/percpu.h``).
+DEFINE_PER_CPU(), get_cpu_var() and
+put_cpu_var() (``include/linux/percpu.h``).
Of particular use for simple per-cpu counters is the ``local_t`` type,
-and the :c:func:`cpu_local_inc()` and related functions, which are
+and the cpu_local_inc() and related functions, which are
more efficient than simple code on some architectures
(``include/asm/local.h``).
@@ -1289,10 +1289,10 @@ irq handler doesn't use a lock, and all other accesses are done as so::
enable_irq(irq);
spin_unlock(&lock);
-The :c:func:`disable_irq()` prevents the irq handler from running
+The disable_irq() prevents the irq handler from running
(and waits for it to finish if it's currently running on other CPUs).
The spinlock prevents any other accesses happening at the same time.
-Naturally, this is slower than just a :c:func:`spin_lock_irq()`
+Naturally, this is slower than just a spin_lock_irq()
call, so it only makes sense if this type of access happens extremely
rarely.
@@ -1315,22 +1315,22 @@ from user context, and can sleep.
- Accesses to userspace:
- - :c:func:`copy_from_user()`
+ - copy_from_user()
- - :c:func:`copy_to_user()`
+ - copy_to_user()
- - :c:func:`get_user()`
+ - get_user()
- - :c:func:`put_user()`
+ - put_user()
-- :c:func:`kmalloc(GFP_KERNEL) <kmalloc>`
+- kmalloc(GP_KERNEL) <kmalloc>`
-- :c:func:`mutex_lock_interruptible()` and
- :c:func:`mutex_lock()`
+- mutex_lock_interruptible() and
+ mutex_lock()
- There is a :c:func:`mutex_trylock()` which does not sleep.
+ There is a mutex_trylock() which does not sleep.
Still, it must not be used inside interrupt context since its
- implementation is not safe for that. :c:func:`mutex_unlock()`
+ implementation is not safe for that. mutex_unlock()
will also never sleep. It cannot be used in interrupt context either
since a mutex must be released by the same task that acquired it.
@@ -1340,11 +1340,11 @@ Some Functions Which Don't Sleep
Some functions are safe to call from any context, or holding almost any
lock.
-- :c:func:`printk()`
+- printk()
-- :c:func:`kfree()`
+- kfree()
-- :c:func:`add_timer()` and :c:func:`del_timer()`
+- add_timer() and del_timer()
Mutex API reference
===================
@@ -1400,26 +1400,26 @@ preemption
bh
Bottom Half: for historical reasons, functions with '_bh' in them often
- now refer to any software interrupt, e.g. :c:func:`spin_lock_bh()`
+ now refer to any software interrupt, e.g. spin_lock_bh()
blocks any software interrupt on the current CPU. Bottom halves are
deprecated, and will eventually be replaced by tasklets. Only one bottom
half will be running at any time.
Hardware Interrupt / Hardware IRQ
- Hardware interrupt request. :c:func:`in_irq()` returns true in a
+ Hardware interrupt request. in_irq() returns true in a
hardware interrupt handler.
Interrupt Context
Not user context: processing a hardware irq or software irq. Indicated
- by the :c:func:`in_interrupt()` macro returning true.
+ by the in_interrupt() macro returning true.
SMP
Symmetric Multi-Processor: kernels compiled for multiple-CPU machines.
(``CONFIG_SMP=y``).
Software Interrupt / softirq
- Software interrupt handler. :c:func:`in_irq()` returns false;
- :c:func:`in_softirq()` returns true. Tasklets and softirqs both
+ Software interrupt handler. in_irq() returns false;
+ in_softirq() returns true. Tasklets and softirqs both
fall into the category of 'software interrupts'.
Strictly speaking a softirq is one of up to 32 enumerated software
diff --git a/Documentation/kref.txt b/Documentation/kref.txt
index 3af384156d7e..c61eea6f1bf2 100644
--- a/Documentation/kref.txt
+++ b/Documentation/kref.txt
@@ -128,6 +128,10 @@ since we already have a valid pointer that we own a refcount for. The
put needs no lock because nothing tries to get the data without
already holding a pointer.
+In the above example, kref_put() will be called 2 times in both success
+and error paths. This is necessary because the reference count got
+incremented 2 times by kref_init() and kref_get().
+
Note that the "before" in rule 1 is very important. You should never
do something like::
diff --git a/Documentation/media/kapi/v4l2-controls.rst b/Documentation/media/kapi/v4l2-controls.rst
index b20800cae3f2..5129019afb49 100644
--- a/Documentation/media/kapi/v4l2-controls.rst
+++ b/Documentation/media/kapi/v4l2-controls.rst
@@ -291,8 +291,8 @@ and QUERYMENU. And G/S_CTRL as well as G/TRY/S_EXT_CTRLS are automatically suppo
In practice the basic usage as described above is sufficient for most drivers.
-Inheriting Controls
--------------------
+Inheriting Sub-device Controls
+------------------------------
When a sub-device is registered with a V4L2 driver by calling
v4l2_device_register_subdev() and the ctrl_handler fields of both v4l2_subdev
@@ -757,8 +757,8 @@ attempting to find another control from the same handler will deadlock.
It is recommended not to use this function from inside the control ops.
-Inheriting Controls
--------------------
+Preventing Controls inheritance
+-------------------------------
When one control handler is added to another using v4l2_ctrl_add_handler, then
by default all controls from one are merged to the other. But a subdev might
diff --git a/Documentation/misc-devices/index.rst b/Documentation/misc-devices/index.rst
index f11c5daeada5..c1dcd2628911 100644
--- a/Documentation/misc-devices/index.rst
+++ b/Documentation/misc-devices/index.rst
@@ -20,4 +20,5 @@ fit into other categories.
isl29003
lis3lv02d
max6875
+ mic/index
xilinx_sdfec
diff --git a/Documentation/mic/index.rst b/Documentation/misc-devices/mic/index.rst
index 3a8d06367ef1..3a8d06367ef1 100644
--- a/Documentation/mic/index.rst
+++ b/Documentation/misc-devices/mic/index.rst
diff --git a/Documentation/mic/mic_overview.rst b/Documentation/misc-devices/mic/mic_overview.rst
index 17d956bdaf7c..17d956bdaf7c 100644
--- a/Documentation/mic/mic_overview.rst
+++ b/Documentation/misc-devices/mic/mic_overview.rst
diff --git a/Documentation/mic/scif_overview.rst b/Documentation/misc-devices/mic/scif_overview.rst
index 4c8ad9e43706..4c8ad9e43706 100644
--- a/Documentation/mic/scif_overview.rst
+++ b/Documentation/misc-devices/mic/scif_overview.rst
diff --git a/Documentation/networking/devlink/devlink-region.rst b/Documentation/networking/devlink/devlink-region.rst
index 1a7683e7acb2..8b46e8591fe0 100644
--- a/Documentation/networking/devlink/devlink-region.rst
+++ b/Documentation/networking/devlink/devlink-region.rst
@@ -40,9 +40,6 @@ example usage
# Delete a snapshot using:
$ devlink region del pci/0000:00:05.0/cr-space snapshot 1
- # Trigger (request) a snapshot be taken:
- $ devlink region trigger pci/0000:00:05.0/cr-space
-
# Dump a snapshot:
$ devlink region dump pci/0000:00:05.0/fw-health snapshot 1
0000000000000000 0014 95dc 0014 9514 0035 1670 0034 db30
diff --git a/Documentation/networking/net_failover.rst b/Documentation/networking/net_failover.rst
index 06c97dcb57ca..e143ab79a960 100644
--- a/Documentation/networking/net_failover.rst
+++ b/Documentation/networking/net_failover.rst
@@ -8,9 +8,9 @@ Overview
========
The net_failover driver provides an automated failover mechanism via APIs
-to create and destroy a failover master netdev and mananges a primary and
+to create and destroy a failover master netdev and manages a primary and
standby slave netdevs that get registered via the generic failover
-infrastructrure.
+infrastructure.
The failover netdev acts a master device and controls 2 slave devices. The
original paravirtual interface is registered as 'standby' slave netdev and
@@ -29,7 +29,7 @@ virtio-net accelerated datapath: STANDBY mode
=============================================
net_failover enables hypervisor controlled accelerated datapath to virtio-net
-enabled VMs in a transparent manner with no/minimal guest userspace chanages.
+enabled VMs in a transparent manner with no/minimal guest userspace changes.
To support this, the hypervisor needs to enable VIRTIO_NET_F_STANDBY
feature on the virtio-net interface and assign the same MAC address to both
diff --git a/Documentation/networking/rds.txt b/Documentation/networking/rds.txt
index f2a0147c933d..eec61694e894 100644
--- a/Documentation/networking/rds.txt
+++ b/Documentation/networking/rds.txt
@@ -159,7 +159,7 @@ Socket Interface
set SO_RDS_TRANSPORT on a socket for which the transport has
been previously attached explicitly (by SO_RDS_TRANSPORT) or
implicitly (via bind(2)) will return an error of EOPNOTSUPP.
- An attempt to set SO_RDS_TRANSPPORT to RDS_TRANS_NONE will
+ An attempt to set SO_RDS_TRANSPORT to RDS_TRANS_NONE will
always return EINVAL.
RDMA for RDS
diff --git a/Documentation/networking/snmp_counter.rst b/Documentation/networking/snmp_counter.rst
index 38a4edc4522b..10e11099e74a 100644
--- a/Documentation/networking/snmp_counter.rst
+++ b/Documentation/networking/snmp_counter.rst
@@ -908,8 +908,8 @@ A TLP probe packet is sent.
A packet loss is detected and recovered by TLP.
-TCP Fast Open
-=============
+TCP Fast Open description
+=========================
TCP Fast Open is a technology which allows data transfer before the
3-way handshake complete. Please refer the `TCP Fast Open wiki`_ for a
general description.
diff --git a/Documentation/powerpc/ultravisor.rst b/Documentation/powerpc/ultravisor.rst
index 363736d7fd36..df136c8f91fa 100644
--- a/Documentation/powerpc/ultravisor.rst
+++ b/Documentation/powerpc/ultravisor.rst
@@ -8,8 +8,8 @@ Protected Execution Facility
.. contents::
:depth: 3
-Protected Execution Facility
-############################
+Introduction
+############
Protected Execution Facility (PEF) is an architectural change for
POWER 9 that enables Secure Virtual Machines (SVMs). DD2.3 chips
diff --git a/Documentation/process/2.Process.rst b/Documentation/process/2.Process.rst
index ae020d84d7c4..b21b5b245d13 100644
--- a/Documentation/process/2.Process.rst
+++ b/Documentation/process/2.Process.rst
@@ -18,18 +18,18 @@ major kernel release happening every two or three months. The recent
release history looks like this:
====== =================
- 4.11 April 30, 2017
- 4.12 July 2, 2017
- 4.13 September 3, 2017
- 4.14 November 12, 2017
- 4.15 January 28, 2018
- 4.16 April 1, 2018
+ 5.0 March 3, 2019
+ 5.1 May 5, 2019
+ 5.2 July 7, 2019
+ 5.3 September 15, 2019
+ 5.4 November 24, 2019
+ 5.5 January 6, 2020
====== =================
-Every 4.x release is a major kernel release with new features, internal
-API changes, and more. A typical 4.x release contain about 13,000
-changesets with changes to several hundred thousand lines of code. 4.x is
-thus the leading edge of Linux kernel development; the kernel uses a
+Every 5.x release is a major kernel release with new features, internal
+API changes, and more. A typical release can contain about 13,000
+changesets with changes to several hundred thousand lines of code. 5.x is
+the leading edge of Linux kernel development; the kernel uses a
rolling development model which is continually integrating major changes.
A relatively straightforward discipline is followed with regard to the
@@ -48,9 +48,9 @@ detail later on).
The merge window lasts for approximately two weeks. At the end of this
time, Linus Torvalds will declare that the window is closed and release the
-first of the "rc" kernels. For the kernel which is destined to be 2.6.40,
+first of the "rc" kernels. For the kernel which is destined to be 5.6,
for example, the release which happens at the end of the merge window will
-be called 2.6.40-rc1. The -rc1 release is the signal that the time to
+be called 5.6-rc1. The -rc1 release is the signal that the time to
merge new features has passed, and that the time to stabilize the next
kernel has begun.
@@ -67,22 +67,23 @@ add at any time).
As fixes make their way into the mainline, the patch rate will slow over
time. Linus releases new -rc kernels about once a week; a normal series
will get up to somewhere between -rc6 and -rc9 before the kernel is
-considered to be sufficiently stable and the final 2.6.x release is made.
+considered to be sufficiently stable and the final release is made.
At that point the whole process starts over again.
-As an example, here is how the 4.16 development cycle went (all dates in
-2018):
+As an example, here is how the 5.4 development cycle went (all dates in
+2019):
============== ===============================
- January 28 4.15 stable release
- February 11 4.16-rc1, merge window closes
- February 18 4.16-rc2
- February 25 4.16-rc3
- March 4 4.16-rc4
- March 11 4.16-rc5
- March 18 4.16-rc6
- March 25 4.16-rc7
- April 1 4.16 stable release
+ September 15 5.3 stable release
+ September 30 5.4-rc1, merge window closes
+ October 6 5.4-rc2
+ October 13 5.4-rc3
+ October 20 5.4-rc4
+ October 27 5.4-rc5
+ November 3 5.4-rc6
+ November 10 5.4-rc7
+ November 17 5.4-rc8
+ November 24 5.4 stable release
============== ===============================
How do the developers decide when to close the development cycle and create
@@ -98,43 +99,44 @@ release is made. In the real world, this kind of perfection is hard to
achieve; there are just too many variables in a project of this size.
There comes a point where delaying the final release just makes the problem
worse; the pile of changes waiting for the next merge window will grow
-larger, creating even more regressions the next time around. So most 4.x
+larger, creating even more regressions the next time around. So most 5.x
kernels go out with a handful of known regressions though, hopefully, none
of them are serious.
Once a stable release is made, its ongoing maintenance is passed off to the
-"stable team," currently consisting of Greg Kroah-Hartman. The stable team
-will release occasional updates to the stable release using the 4.x.y
-numbering scheme. To be considered for an update release, a patch must (1)
-fix a significant bug, and (2) already be merged into the mainline for the
-next development kernel. Kernels will typically receive stable updates for
-a little more than one development cycle past their initial release. So,
-for example, the 4.13 kernel's history looked like:
+"stable team," currently Greg Kroah-Hartman. The stable team will release
+occasional updates to the stable release using the 5.x.y numbering scheme.
+To be considered for an update release, a patch must (1) fix a significant
+bug, and (2) already be merged into the mainline for the next development
+kernel. Kernels will typically receive stable updates for a little more
+than one development cycle past their initial release. So, for example, the
+5.2 kernel's history looked like this (all dates in 2019):
============== ===============================
- September 3 4.13 stable release
- September 13 4.13.1
- September 20 4.13.2
- September 27 4.13.3
- October 5 4.13.4
- October 12 4.13.5
+ September 15 5.2 stable release
+ July 14 5.2.1
+ July 21 5.2.2
+ July 26 5.2.3
+ July 28 5.2.4
+ July 31 5.2.5
... ...
- November 24 4.13.16
+ October 11 5.2.21
============== ===============================
-4.13.16 was the final stable update of the 4.13 release.
+5.2.21 was the final stable update of the 5.2 release.
Some kernels are designated "long term" kernels; they will receive support
for a longer period. As of this writing, the current long term kernels
and their maintainers are:
- ====== ====================== ==============================
- 3.16 Ben Hutchings (very long-term stable kernel)
- 4.1 Sasha Levin
- 4.4 Greg Kroah-Hartman (very long-term stable kernel)
- 4.9 Greg Kroah-Hartman
- 4.14 Greg Kroah-Hartman
- ====== ====================== ==============================
+ ====== ================================ =======================
+ 3.16 Ben Hutchings (very long-term kernel)
+ 4.4 Greg Kroah-Hartman & Sasha Levin (very long-term kernel)
+ 4.9 Greg Kroah-Hartman & Sasha Levin
+ 4.14 Greg Kroah-Hartman & Sasha Levin
+ 4.19 Greg Kroah-Hartman & Sasha Levin
+ 5.4 Greg Kroah-Hartman & Sasha Levin
+ ====== ================================ =======================
The selection of a kernel for long-term support is purely a matter of a
maintainer having the need and the time to maintain that release. There
@@ -215,12 +217,12 @@ How patches get into the Kernel
-------------------------------
There is exactly one person who can merge patches into the mainline kernel
-repository: Linus Torvalds. But, of the over 9,500 patches which went
-into the 2.6.38 kernel, only 112 (around 1.3%) were directly chosen by Linus
-himself. The kernel project has long since grown to a size where no single
-developer could possibly inspect and select every patch unassisted. The
-way the kernel developers have addressed this growth is through the use of
-a lieutenant system built around a chain of trust.
+repository: Linus Torvalds. But, for example, of the over 9,500 patches
+which went into the 2.6.38 kernel, only 112 (around 1.3%) were directly
+chosen by Linus himself. The kernel project has long since grown to a size
+where no single developer could possibly inspect and select every patch
+unassisted. The way the kernel developers have addressed this growth is
+through the use of a lieutenant system built around a chain of trust.
The kernel code base is logically broken down into a set of subsystems:
networking, specific architecture support, memory management, video
diff --git a/Documentation/process/coding-style.rst b/Documentation/process/coding-style.rst
index edb296c52f61..acb2f1b36350 100644
--- a/Documentation/process/coding-style.rst
+++ b/Documentation/process/coding-style.rst
@@ -284,9 +284,9 @@ context lines.
4) Naming
---------
-C is a Spartan language, and so should your naming be. Unlike Modula-2
-and Pascal programmers, C programmers do not use cute names like
-ThisVariableIsATemporaryCounter. A C programmer would call that
+C is a Spartan language, and your naming conventions should follow suit.
+Unlike Modula-2 and Pascal programmers, C programmers do not use cute
+names like ThisVariableIsATemporaryCounter. A C programmer would call that
variable ``tmp``, which is much easier to write, and not the least more
difficult to understand.
@@ -300,9 +300,9 @@ that counts the number of active users, you should call that
``count_active_users()`` or similar, you should **not** call it ``cntusr()``.
Encoding the type of a function into the name (so-called Hungarian
-notation) is brain damaged - the compiler knows the types anyway and can
-check those, and it only confuses the programmer. No wonder MicroSoft
-makes buggy programs.
+notation) is asinine - the compiler knows the types anyway and can check
+those, and it only confuses the programmer. No wonder Microsoft makes buggy
+programs.
LOCAL variable names should be short, and to the point. If you have
some random integer loop counter, it should probably be called ``i``.
@@ -806,9 +806,9 @@ covers RTL which is used frequently with assembly language in the kernel.
----------------------------
Kernel developers like to be seen as literate. Do mind the spelling
-of kernel messages to make a good impression. Do not use crippled
-words like ``dont``; use ``do not`` or ``don't`` instead. Make the messages
-concise, clear, and unambiguous.
+of kernel messages to make a good impression. Do not use incorrect
+contractions like ``dont``; use ``do not`` or ``don't`` instead. Make the
+messages concise, clear, and unambiguous.
Kernel messages do not have to be terminated with a period.
diff --git a/Documentation/process/deprecated.rst b/Documentation/process/deprecated.rst
index 179f2a5625a0..652e2aa02a66 100644
--- a/Documentation/process/deprecated.rst
+++ b/Documentation/process/deprecated.rst
@@ -29,6 +29,28 @@ a header file, it isn't the full solution. Such interfaces must either
be fully removed from the kernel, or added to this file to discourage
others from using them in the future.
+BUG() and BUG_ON()
+------------------
+Use WARN() and WARN_ON() instead, and handle the "impossible"
+error condition as gracefully as possible. While the BUG()-family
+of APIs were originally designed to act as an "impossible situation"
+assert and to kill a kernel thread "safely", they turn out to just be
+too risky. (e.g. "In what order do locks need to be released? Have
+various states been restored?") Very commonly, using BUG() will
+destabilize a system or entirely break it, which makes it impossible
+to debug or even get viable crash reports. Linus has `very strong
+<https://lore.kernel.org/lkml/CA+55aFy6jNLsywVYdGp83AMrXBo_P-pkjkphPGrO=82SPKCpLQ@mail.gmail.com/>`_
+feelings `about this
+<https://lore.kernel.org/lkml/CAHk-=whDHsbK3HTOpTF=ue_o04onRwTEaK_ZoJp_fjbqq4+=Jw@mail.gmail.com/>`_.
+
+Note that the WARN()-family should only be used for "expected to
+be unreachable" situations. If you want to warn about "reachable
+but undesirable" situations, please use the pr_warn()-family of
+functions. System owners may have set the *panic_on_warn* sysctl,
+to make sure their systems do not continue running in the face of
+"unreachable" conditions. (For example, see commits like `this one
+<https://git.kernel.org/linus/d4689846881d160a4d12a514e991a740bcb5d65a>`_.)
+
open-coded arithmetic in allocator arguments
--------------------------------------------
Dynamic size calculations (especially multiplication) should not be
@@ -63,51 +85,73 @@ Instead, use the helper::
header = kzalloc(struct_size(header, item, count), GFP_KERNEL);
-See :c:func:`array_size`, :c:func:`array3_size`, and :c:func:`struct_size`,
-for more details as well as the related :c:func:`check_add_overflow` and
-:c:func:`check_mul_overflow` family of functions.
+See array_size(), array3_size(), and struct_size(),
+for more details as well as the related check_add_overflow() and
+check_mul_overflow() family of functions.
simple_strtol(), simple_strtoll(), simple_strtoul(), simple_strtoull()
----------------------------------------------------------------------
-The :c:func:`simple_strtol`, :c:func:`simple_strtoll`,
-:c:func:`simple_strtoul`, and :c:func:`simple_strtoull` functions
+The simple_strtol(), simple_strtoll(),
+simple_strtoul(), and simple_strtoull() functions
explicitly ignore overflows, which may lead to unexpected results
-in callers. The respective :c:func:`kstrtol`, :c:func:`kstrtoll`,
-:c:func:`kstrtoul`, and :c:func:`kstrtoull` functions tend to be the
+in callers. The respective kstrtol(), kstrtoll(),
+kstrtoul(), and kstrtoull() functions tend to be the
correct replacements, though note that those require the string to be
NUL or newline terminated.
strcpy()
--------
-:c:func:`strcpy` performs no bounds checking on the destination
+strcpy() performs no bounds checking on the destination
buffer. This could result in linear overflows beyond the
end of the buffer, leading to all kinds of misbehaviors. While
`CONFIG_FORTIFY_SOURCE=y` and various compiler flags help reduce the
risk of using this function, there is no good reason to add new uses of
-this function. The safe replacement is :c:func:`strscpy`.
+this function. The safe replacement is strscpy().
strncpy() on NUL-terminated strings
-----------------------------------
-Use of :c:func:`strncpy` does not guarantee that the destination buffer
+Use of strncpy() does not guarantee that the destination buffer
will be NUL terminated. This can lead to various linear read overflows
and other misbehavior due to the missing termination. It also NUL-pads the
destination buffer if the source contents are shorter than the destination
buffer size, which may be a needless performance penalty for callers using
-only NUL-terminated strings. The safe replacement is :c:func:`strscpy`.
-(Users of :c:func:`strscpy` still needing NUL-padding will need an
-explicit :c:func:`memset` added.)
+only NUL-terminated strings. The safe replacement is strscpy().
+(Users of strscpy() still needing NUL-padding should instead
+use strscpy_pad().)
-If a caller is using non-NUL-terminated strings, :c:func:`strncpy()` can
+If a caller is using non-NUL-terminated strings, strncpy()() can
still be used, but destinations should be marked with the `__nonstring
<https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html>`_
attribute to avoid future compiler warnings.
strlcpy()
---------
-:c:func:`strlcpy` reads the entire source buffer first, possibly exceeding
+strlcpy() reads the entire source buffer first, possibly exceeding
the given limit of bytes to copy. This is inefficient and can lead to
linear read overflows if a source string is not NUL-terminated. The
-safe replacement is :c:func:`strscpy`.
+safe replacement is strscpy().
+
+%p format specifier
+-------------------
+Traditionally, using "%p" in format strings would lead to regular address
+exposure flaws in dmesg, proc, sysfs, etc. Instead of leaving these to
+be exploitable, all "%p" uses in the kernel are being printed as a hashed
+value, rendering them unusable for addressing. New uses of "%p" should not
+be added to the kernel. For text addresses, using "%pS" is likely better,
+as it produces the more useful symbol name instead. For nearly everything
+else, just do not add "%p" at all.
+
+Paraphrasing Linus's current `guidance <https://lore.kernel.org/lkml/CA+55aFwQEd_d40g4mUCSsVRZzrFPUJt74vc6PPpb675hYNXcKw@mail.gmail.com/>`_:
+
+- If the hashed "%p" value is pointless, ask yourself whether the pointer
+ itself is important. Maybe it should be removed entirely?
+- If you really think the true pointer value is important, why is some
+ system state or user privilege level considered "special"? If you think
+ you can justify it (in comments and commit log) well enough to stand
+ up to Linus's scrutiny, maybe you can use "%px", along with making sure
+ you have sensible permissions.
+
+And finally, know that a toggle for "%p" hashing will `not be accepted <https://lore.kernel.org/lkml/CA+55aFwieC1-nAs+NFq9RTwaR8ef9hWa4MjNBWL41F-8wM49eA@mail.gmail.com/>`_.
Variable Length Arrays (VLAs)
-----------------------------
@@ -122,27 +166,37 @@ memory adjacent to the stack (when built without `CONFIG_VMAP_STACK=y`)
Implicit switch case fall-through
---------------------------------
-The C language allows switch cases to "fall-through" when a "break" statement
-is missing at the end of a case. This, however, introduces ambiguity in the
-code, as it's not always clear if the missing break is intentional or a bug.
+The C language allows switch cases to fall through to the next case
+when a "break" statement is missing at the end of a case. This, however,
+introduces ambiguity in the code, as it's not always clear if the missing
+break is intentional or a bug. For example, it's not obvious just from
+looking at the code if `STATE_ONE` is intentionally designed to fall
+through into `STATE_TWO`::
+
+ switch (value) {
+ case STATE_ONE:
+ do_something();
+ case STATE_TWO:
+ do_other();
+ break;
+ default:
+ WARN("unknown state");
+ }
As there have been a long list of flaws `due to missing "break" statements
<https://cwe.mitre.org/data/definitions/484.html>`_, we no longer allow
-"implicit fall-through".
-
-In order to identify intentional fall-through cases, we have adopted a
-pseudo-keyword macro 'fallthrough' which expands to gcc's extension
-__attribute__((__fallthrough__)). `Statement Attributes
-<https://gcc.gnu.org/onlinedocs/gcc/Statement-Attributes.html>`_
-
-When the C17/C18 [[fallthrough]] syntax is more commonly supported by
+implicit fall-through. In order to identify intentional fall-through
+cases, we have adopted a pseudo-keyword macro "fallthrough" which
+expands to gcc's extension `__attribute__((__fallthrough__))
+<https://gcc.gnu.org/onlinedocs/gcc/Statement-Attributes.html>`_.
+(When the C17/C18 `[[fallthrough]]` syntax is more commonly supported by
C compilers, static analyzers, and IDEs, we can switch to using that syntax
-for the macro pseudo-keyword.
+for the macro pseudo-keyword.)
All switch/case blocks must end in one of:
- break;
- fallthrough;
- continue;
- goto <label>;
- return [expression];
+* break;
+* fallthrough;
+* continue;
+* goto <label>;
+* return [expression];
diff --git a/Documentation/process/email-clients.rst b/Documentation/process/email-clients.rst
index 5273d06c8ff6..c9e4ce2613c0 100644
--- a/Documentation/process/email-clients.rst
+++ b/Documentation/process/email-clients.rst
@@ -237,9 +237,9 @@ using Mutt to send patches through Gmail::
The Mutt docs have lots more information:
- http://dev.mutt.org/trac/wiki/UseCases/Gmail
+ https://gitlab.com/muttmua/mutt/-/wikis/UseCases/Gmail
- http://dev.mutt.org/doc/manual.html
+ http://www.mutt.org/doc/manual/
Pine (TUI)
**********
diff --git a/Documentation/process/howto.rst b/Documentation/process/howto.rst
index b6f5a379ad6c..70791e153de1 100644
--- a/Documentation/process/howto.rst
+++ b/Documentation/process/howto.rst
@@ -243,10 +243,10 @@ branches. These different branches are:
Mainline tree
~~~~~~~~~~~~~
-Mainline tree are maintained by Linus Torvalds, and can be found at
+The mainline tree is maintained by Linus Torvalds, and can be found at
https://kernel.org or in the repo. Its development process is as follows:
- - As soon as a new kernel is released a two weeks window is open,
+ - As soon as a new kernel is released a two week window is open,
during this period of time maintainers can submit big diffs to
Linus, usually the patches that have already been included in the
linux-next for a few weeks. The preferred way to submit big changes
@@ -281,8 +281,9 @@ Various stable trees with multiple major numbers
Kernels with 3-part versions are -stable kernels. They contain
relatively small and critical fixes for security problems or significant
-regressions discovered in a given major mainline release, with the first
-2-part of version number are the same correspondingly.
+regressions discovered in a given major mainline release. Each release
+in a major stable series increments the third part of the version
+number, keeping the first two parts the same.
This is the recommended branch for users who want the most recent stable
kernel and are not interested in helping test development/experimental
@@ -359,10 +360,10 @@ Managing bug reports
One of the best ways to put into practice your hacking skills is by fixing
bugs reported by other people. Not only you will help to make the kernel
-more stable, you'll learn to fix real world problems and you will improve
-your skills, and other developers will be aware of your presence. Fixing
-bugs is one of the best ways to get merits among other developers, because
-not many people like wasting time fixing other people's bugs.
+more stable, but you'll also learn to fix real world problems and you will
+improve your skills, and other developers will be aware of your presence.
+Fixing bugs is one of the best ways to get merits among other developers,
+because not many people like wasting time fixing other people's bugs.
To work in the already reported bug reports, go to https://bugzilla.kernel.org.
diff --git a/Documentation/process/kernel-docs.rst b/Documentation/process/kernel-docs.rst
index 7a45a8e36ea7..9d6d0ac4fca9 100644
--- a/Documentation/process/kernel-docs.rst
+++ b/Documentation/process/kernel-docs.rst
@@ -313,7 +313,7 @@ On-line docs
:URL: http://www.linuxjournal.com/article.php?sid=2391
:Date: 1997
:Keywords: RAID, MD driver.
- :Description: Linux Journal Kernel Korner article. Here is its
+ :Description: Linux Journal Kernel Korner article.
:Abstract: *A description of the implementation of the RAID-1,
RAID-4 and RAID-5 personalities of the MD device driver in the
Linux kernel, providing users with high performance and reliable,
@@ -338,7 +338,7 @@ On-line docs
:Date: 1996
:Keywords: device driver, module, loading/unloading modules,
allocating resources.
- :Description: Linux Journal Kernel Korner article. Here is its
+ :Description: Linux Journal Kernel Korner article.
:Abstract: *This is the first of a series of four articles
co-authored by Alessandro Rubini and Georg Zezchwitz which present
a practical approach to writing Linux device drivers as kernel
@@ -354,7 +354,7 @@ On-line docs
:Keywords: character driver, init_module, clean_up module,
autodetection, mayor number, minor number, file operations,
open(), close().
- :Description: Linux Journal Kernel Korner article. Here is its
+ :Description: Linux Journal Kernel Korner article.
:Abstract: *This article, the second of four, introduces part of
the actual code to create custom module implementing a character
device driver. It describes the code for module initialization and
@@ -367,7 +367,7 @@ On-line docs
:Date: 1996
:Keywords: read(), write(), select(), ioctl(), blocking/non
blocking mode, interrupt handler.
- :Description: Linux Journal Kernel Korner article. Here is its
+ :Description: Linux Journal Kernel Korner article.
:Abstract: *This article, the third of four on writing character
device drivers, introduces concepts of reading, writing, and using
ioctl-calls*.
@@ -378,7 +378,7 @@ On-line docs
:URL: http://www.linuxjournal.com/article.php?sid=1222
:Date: 1996
:Keywords: interrupts, irqs, DMA, bottom halves, task queues.
- :Description: Linux Journal Kernel Korner article. Here is its
+ :Description: Linux Journal Kernel Korner article.
:Abstract: *This is the fourth in a series of articles about
writing character device drivers as loadable kernel modules. This
month, we further investigate the field of interrupt handling.
diff --git a/Documentation/process/management-style.rst b/Documentation/process/management-style.rst
index 186753ff3d2d..dfbc69bf49d4 100644
--- a/Documentation/process/management-style.rst
+++ b/Documentation/process/management-style.rst
@@ -227,7 +227,7 @@ incompetence will grudgingly admit that you at least didn't try to weasel
out of it.
Then make the developer who really screwed up (if you can find them) know
-**in_private** that they screwed up. Not just so they can avoid it in the
+**in private** that they screwed up. Not just so they can avoid it in the
future, but so that they know they owe you one. And, perhaps even more
importantly, they're also likely the person who can fix it. Because, let's
face it, it sure ain't you.
diff --git a/Documentation/scsi/scsi_mid_low_api.txt b/Documentation/scsi/scsi_mid_low_api.txt
index 2a4be1c3e6db..537f04728487 100644
--- a/Documentation/scsi/scsi_mid_low_api.txt
+++ b/Documentation/scsi/scsi_mid_low_api.txt
@@ -299,7 +299,6 @@ Summary:
scsi_host_alloc - return a new scsi_host instance whose refcount==1
scsi_host_get - increments Scsi_Host instance's refcount
scsi_host_put - decrements Scsi_Host instance's refcount (free if 0)
- scsi_partsize - parse partition table into cylinders, heads + sectors
scsi_register - create and register a scsi host adapter instance.
scsi_remove_device - detach and remove a SCSI device
scsi_remove_host - detach and remove all SCSI devices owned by host
@@ -473,26 +472,6 @@ void scsi_host_put(struct Scsi_Host *shost)
/**
- * scsi_partsize - parse partition table into cylinders, heads + sectors
- * @buf: pointer to partition table
- * @capacity: size of (total) disk in 512 byte sectors
- * @cyls: outputs number of cylinders calculated via this pointer
- * @hds: outputs number of heads calculated via this pointer
- * @secs: outputs number of sectors calculated via this pointer
- *
- * Returns 0 on success, -1 on failure
- *
- * Might block: no
- *
- * Notes: Caller owns memory returned (free with kfree() )
- *
- * Defined in: drivers/scsi/scsicam.c
- **/
-int scsi_partsize(unsigned char *buf, unsigned long capacity,
- unsigned int *cyls, unsigned int *hds, unsigned int *secs)
-
-
-/**
* scsi_register - create and register a scsi host adapter instance.
* @sht: pointer to scsi host template
* @privsize: extra bytes to allocate in hostdata array (which is the
diff --git a/Documentation/security/siphash.rst b/Documentation/security/siphash.rst
index 9965821ab333..4eba68cdf0a1 100644
--- a/Documentation/security/siphash.rst
+++ b/Documentation/security/siphash.rst
@@ -128,8 +128,8 @@ then when you can be absolutely certain that the outputs will never be
transmitted out of the kernel. This is only remotely useful over `jhash` as a
means of mitigating hashtable flooding denial of service attacks.
-Generating a key
-================
+Generating a HalfSipHash key
+============================
Keys should always be generated from a cryptographically secure source of
random numbers, either using get_random_bytes or get_random_once:
@@ -139,8 +139,8 @@ get_random_bytes(&key, sizeof(key));
If you're not deriving your key from here, you're doing it wrong.
-Using the functions
-===================
+Using the HalfSipHash functions
+===============================
There are two variants of the function, one that takes a list of integers, and
one that takes a buffer::
diff --git a/Documentation/target/tcmu-design.rst b/Documentation/target/tcmu-design.rst
index a7b426707bf6..e47047e32e27 100644
--- a/Documentation/target/tcmu-design.rst
+++ b/Documentation/target/tcmu-design.rst
@@ -5,7 +5,7 @@ TCM Userspace Design
.. Contents:
- 1) TCM Userspace Design
+ 1) Design
a) Background
b) Benefits
c) Design constraints
@@ -23,8 +23,8 @@ TCM Userspace Design
3) A final note
-TCM Userspace Design
-====================
+Design
+======
TCM is another name for LIO, an in-kernel iSCSI target (server).
Existing TCM targets run in the kernel. TCMU (TCM in Userspace)
diff --git a/Documentation/trace/events.rst b/Documentation/trace/events.rst
index ed79b220bd07..4a2ebe0bd19b 100644
--- a/Documentation/trace/events.rst
+++ b/Documentation/trace/events.rst
@@ -342,7 +342,8 @@ section of Documentation/trace/ftrace.rst), but there are major
differences and the implementation isn't currently tied to it in any
way, so beware about making generalizations between the two.
-Note: Writing into trace_marker (See Documentation/trace/ftrace.rst)
+.. Note::
+ Writing into trace_marker (See Documentation/trace/ftrace.rst)
can also enable triggers that are written into
/sys/kernel/tracing/events/ftrace/print/trigger
@@ -569,14 +570,14 @@ The first creates the event in one step, using synth_event_create().
In this method, the name of the event to create and an array defining
the fields is supplied to synth_event_create(). If successful, a
synthetic event with that name and fields will exist following that
-call. For example, to create a new "schedtest" synthetic event:
+call. For example, to create a new "schedtest" synthetic event::
ret = synth_event_create("schedtest", sched_fields,
ARRAY_SIZE(sched_fields), THIS_MODULE);
The sched_fields param in this example points to an array of struct
synth_field_desc, each of which describes an event field by type and
-name:
+name::
static struct synth_field_desc sched_fields[] = {
{ .type = "pid_t", .name = "next_pid_field" },
@@ -615,7 +616,7 @@ synth_event_gen_cmd_array_start(), the user should create and
initialize a dynevent_cmd object using synth_event_cmd_init().
For example, to create a new "schedtest" synthetic event with two
-fields:
+fields::
struct dynevent_cmd cmd;
char *buf;
@@ -631,7 +632,7 @@ fields:
"u64", "ts_ns");
Alternatively, using an array of struct synth_field_desc fields
-containing the same information:
+containing the same information::
ret = synth_event_gen_cmd_array_start(&cmd, "schedtest", THIS_MODULE,
fields, n_fields);
@@ -640,7 +641,7 @@ Once the synthetic event object has been created, it can then be
populated with more fields. Fields are added one by one using
synth_event_add_field(), supplying the dynevent_cmd object, a field
type, and a field name. For example, to add a new int field named
-"intfield", the following call should be made:
+"intfield", the following call should be made::
ret = synth_event_add_field(&cmd, "int", "intfield");
@@ -649,7 +650,7 @@ the field is considered to be an array.
A group of fields can also be added all at once using an array of
synth_field_desc with add_synth_fields(). For example, this would add
-just the first four sched_fields:
+just the first four sched_fields::
ret = synth_event_add_fields(&cmd, sched_fields, 4);
@@ -658,7 +659,7 @@ synth_event_add_field_str() can be used to add it as-is; it will
also automatically append a ';' to the string.
Once all the fields have been added, the event should be finalized and
-registered by calling the synth_event_gen_cmd_end() function:
+registered by calling the synth_event_gen_cmd_end() function::
ret = synth_event_gen_cmd_end(&cmd);
@@ -691,7 +692,7 @@ trace array)), along with an variable number of u64 args, one for each
synthetic event field, and the number of values being passed.
So, to trace an event corresponding to the synthetic event definition
-above, code like the following could be used:
+above, code like the following could be used::
ret = synth_event_trace(create_synth_test, 7, /* number of values */
444, /* next_pid_field */
@@ -715,7 +716,7 @@ trace array)), along with an array of u64, one for each synthetic
event field.
To trace an event corresponding to the synthetic event definition
-above, code like the following could be used:
+above, code like the following could be used::
u64 vals[7];
@@ -739,7 +740,7 @@ In order to trace a synthetic event, a pointer to the trace event file
is needed. The trace_get_event_file() function can be used to get
it - it will find the file in the given trace instance (in this case
NULL since the top trace array is being used) while at the same time
-preventing the instance containing it from going away:
+preventing the instance containing it from going away::
schedtest_event_file = trace_get_event_file(NULL, "synthetic",
"schedtest");
@@ -751,31 +752,31 @@ To enable a synthetic event from the kernel, trace_array_set_clr_event()
can be used (which is not specific to synthetic events, so does need
the "synthetic" system name to be specified explicitly).
-To enable the event, pass 'true' to it:
+To enable the event, pass 'true' to it::
trace_array_set_clr_event(schedtest_event_file->tr,
"synthetic", "schedtest", true);
-To disable it pass false:
+To disable it pass false::
trace_array_set_clr_event(schedtest_event_file->tr,
"synthetic", "schedtest", false);
Finally, synth_event_trace_array() can be used to actually trace the
-event, which should be visible in the trace buffer afterwards:
+event, which should be visible in the trace buffer afterwards::
ret = synth_event_trace_array(schedtest_event_file, vals,
ARRAY_SIZE(vals));
To remove the synthetic event, the event should be disabled, and the
-trace instance should be 'put' back using trace_put_event_file():
+trace instance should be 'put' back using trace_put_event_file()::
trace_array_set_clr_event(schedtest_event_file->tr,
"synthetic", "schedtest", false);
trace_put_event_file(schedtest_event_file);
If those have been successful, synth_event_delete() can be called to
-remove the event:
+remove the event::
ret = synth_event_delete("schedtest");
@@ -784,7 +785,7 @@ remove the event:
To trace a synthetic using the piecewise method described above, the
synth_event_trace_start() function is used to 'open' the synthetic
-event trace:
+event trace::
struct synth_trace_state trace_state;
@@ -809,7 +810,7 @@ along with the value to set the next field in the event. After each
field is set, the 'cursor' points to the next field, which will be set
by the subsequent call, continuing until all the fields have been set
in order. The same sequence of calls as in the above examples using
-this method would be (without error-handling code):
+this method would be (without error-handling code)::
/* next_pid_field */
ret = synth_event_add_next_val(777, &trace_state);
@@ -837,7 +838,7 @@ used. Each call is passed the same synth_trace_state object used in
the synth_event_trace_start(), along with the field name of the field
to set and the value to set it to. The same sequence of calls as in
the above examples using this method would be (without error-handling
-code):
+code)::
ret = synth_event_add_val("next_pid_field", 777, &trace_state);
ret = synth_event_add_val("next_comm_field", (u64)"silly putty",
@@ -855,7 +856,7 @@ can be used but not both at the same time.
Finally, the event won't be actually traced until it's 'closed',
which is done using synth_event_trace_end(), which takes only the
-struct synth_trace_state object used in the previous calls:
+struct synth_trace_state object used in the previous calls::
ret = synth_event_trace_end(&trace_state);
@@ -878,7 +879,7 @@ function. Before calling kprobe_event_gen_cmd_start(), the user
should create and initialize a dynevent_cmd object using
kprobe_event_cmd_init().
-For example, to create a new "schedtest" kprobe event with two fields:
+For example, to create a new "schedtest" kprobe event with two fields::
struct dynevent_cmd cmd;
char *buf;
@@ -900,18 +901,18 @@ Once the kprobe event object has been created, it can then be
populated with more fields. Fields can be added using
kprobe_event_add_fields(), supplying the dynevent_cmd object along
with a variable arg list of probe fields. For example, to add a
-couple additional fields, the following call could be made:
+couple additional fields, the following call could be made::
ret = kprobe_event_add_fields(&cmd, "flags=%cx", "mode=+4($stack)");
Once all the fields have been added, the event should be finalized and
registered by calling the kprobe_event_gen_cmd_end() or
kretprobe_event_gen_cmd_end() functions, depending on whether a kprobe
-or kretprobe command was started:
+or kretprobe command was started::
ret = kprobe_event_gen_cmd_end(&cmd);
-or
+or::
ret = kretprobe_event_gen_cmd_end(&cmd);
@@ -920,13 +921,13 @@ events.
Similarly, a kretprobe event can be created using
kretprobe_event_gen_cmd_start() with a probe name and location and
-additional params such as $retval:
+additional params such as $retval::
ret = kretprobe_event_gen_cmd_start(&cmd, "gen_kretprobe_test",
"do_sys_open", "$retval");
Similar to the synthetic event case, code like the following can be
-used to enable the newly created kprobe event:
+used to enable the newly created kprobe event::
gen_kprobe_test = trace_get_event_file(NULL, "kprobes", "gen_kprobe_test");
@@ -934,7 +935,7 @@ used to enable the newly created kprobe event:
"kprobes", "gen_kprobe_test", true);
Finally, also similar to synthetic events, the following code can be
-used to give the kprobe event file back and delete the event:
+used to give the kprobe event file back and delete the event::
trace_put_event_file(gen_kprobe_test);
@@ -963,7 +964,7 @@ are described below.
The first step in building a new command string is to create and
initialize an instance of a dynevent_cmd. Here, for instance, we
-create a dynevent_cmd on the stack and initialize it:
+create a dynevent_cmd on the stack and initialize it::
struct dynevent_cmd cmd;
char *buf;
@@ -989,7 +990,7 @@ calls to argument-adding functions.
To add a single argument, define and initialize a struct dynevent_arg
or struct dynevent_arg_pair object. Here's an example of the simplest
possible arg addition, which is simply to append the given string as
-a whitespace-separated argument to the command:
+a whitespace-separated argument to the command::
struct dynevent_arg arg;
@@ -1007,7 +1008,7 @@ the arg.
Here's another more complicated example using an 'arg pair', which is
used to create an argument that consists of a couple components added
together as a unit, for example, a 'type field_name;' arg or a simple
-expression arg e.g. 'flags=%cx':
+expression arg e.g. 'flags=%cx'::
struct dynevent_arg_pair arg_pair;
@@ -1031,7 +1032,7 @@ Any number of dynevent_*_add() calls can be made to build up the string
(until its length surpasses cmd->maxlen). When all the arguments have
been added and the command string is complete, the only thing left to
do is run the command, which happens by simply calling
-dynevent_create():
+dynevent_create()::
ret = dynevent_create(&cmd);
diff --git a/Documentation/translations/it_IT/networking/netdev-FAQ.rst b/Documentation/translations/it_IT/networking/netdev-FAQ.rst
index 8489ead7cff1..7e2456bb7d92 100644
--- a/Documentation/translations/it_IT/networking/netdev-FAQ.rst
+++ b/Documentation/translations/it_IT/networking/netdev-FAQ.rst
@@ -1,6 +1,6 @@
.. include:: ../disclaimer-ita.rst
-:Original: :ref:`Documentation/process/stable-kernel-rules.rst <stable_kernel_rules>`
+:Original: :ref:`Documentation/networking/netdev-FAQ.rst <netdev-FAQ>`
.. _it_netdev-FAQ:
diff --git a/Documentation/translations/it_IT/process/programming-language.rst b/Documentation/translations/it_IT/process/programming-language.rst
index f4b006395849..c4fc9d394c29 100644
--- a/Documentation/translations/it_IT/process/programming-language.rst
+++ b/Documentation/translations/it_IT/process/programming-language.rst
@@ -8,26 +8,26 @@
Linguaggio di programmazione
============================
-Il kernel è scritto nel linguaggio di programmazione C [c-language]_.
-Più precisamente, il kernel viene compilato con ``gcc`` [gcc]_ usando
-l'opzione ``-std=gnu89`` [gcc-c-dialect-options]_: il dialetto GNU
+Il kernel è scritto nel linguaggio di programmazione C [it-c-language]_.
+Più precisamente, il kernel viene compilato con ``gcc`` [it-gcc]_ usando
+l'opzione ``-std=gnu89`` [it-gcc-c-dialect-options]_: il dialetto GNU
dello standard ISO C90 (con l'aggiunta di alcune funzionalità da C99)
-Questo dialetto contiene diverse estensioni al linguaggio [gnu-extensions]_,
+Questo dialetto contiene diverse estensioni al linguaggio [it-gnu-extensions]_,
e molte di queste vengono usate sistematicamente dal kernel.
Il kernel offre un certo livello di supporto per la compilazione con ``clang``
-[clang]_ e ``icc`` [icc]_ su diverse architetture, tuttavia in questo momento
+[it-clang]_ e ``icc`` [it-icc]_ su diverse architetture, tuttavia in questo momento
il supporto non è completo e richiede delle patch aggiuntive.
Attributi
---------
Una delle estensioni più comuni e usate nel kernel sono gli attributi
-[gcc-attribute-syntax]_. Gli attributi permettono di aggiungere una semantica,
+[it-gcc-attribute-syntax]_. Gli attributi permettono di aggiungere una semantica,
definita dell'implementazione, alle entità del linguaggio (come le variabili,
le funzioni o i tipi) senza dover fare importanti modifiche sintattiche al
-linguaggio stesso (come l'aggiunta di nuove parole chiave) [n2049]_.
+linguaggio stesso (come l'aggiunta di nuove parole chiave) [it-n2049]_.
In alcuni casi, gli attributi sono opzionali (ovvero un compilatore che non
dovesse supportarli dovrebbe produrre comunque codice corretto, anche se
@@ -41,11 +41,11 @@ possono usare e/o per accorciare il codice.
Per maggiori informazioni consultate il file d'intestazione
``include/linux/compiler_attributes.h``.
-.. [c-language] http://www.open-std.org/jtc1/sc22/wg14/www/standards
-.. [gcc] https://gcc.gnu.org
-.. [clang] https://clang.llvm.org
-.. [icc] https://software.intel.com/en-us/c-compilers
-.. [gcc-c-dialect-options] https://gcc.gnu.org/onlinedocs/gcc/C-Dialect-Options.html
-.. [gnu-extensions] https://gcc.gnu.org/onlinedocs/gcc/C-Extensions.html
-.. [gcc-attribute-syntax] https://gcc.gnu.org/onlinedocs/gcc/Attribute-Syntax.html
-.. [n2049] http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2049.pdf
+.. [it-c-language] http://www.open-std.org/jtc1/sc22/wg14/www/standards
+.. [it-gcc] https://gcc.gnu.org
+.. [it-clang] https://clang.llvm.org
+.. [it-icc] https://software.intel.com/en-us/c-compilers
+.. [it-gcc-c-dialect-options] https://gcc.gnu.org/onlinedocs/gcc/C-Dialect-Options.html
+.. [it-gnu-extensions] https://gcc.gnu.org/onlinedocs/gcc/C-Extensions.html
+.. [it-gcc-attribute-syntax] https://gcc.gnu.org/onlinedocs/gcc/Attribute-Syntax.html
+.. [it-n2049] http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2049.pdf
diff --git a/Documentation/translations/zh_CN/filesystems/index.rst b/Documentation/translations/zh_CN/filesystems/index.rst
new file mode 100644
index 000000000000..14f155edaf69
--- /dev/null
+++ b/Documentation/translations/zh_CN/filesystems/index.rst
@@ -0,0 +1,27 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: :ref:`Documentation/filesystems/index.rst <filesystems_index>`
+:Translator: Wang Wenhu <wenhu.wang@vivo.com>
+
+.. _cn_filesystems_index:
+
+========================
+Linux Kernel中的文件系统
+========================
+
+这份正在开发的手册或许在未来某个辉煌的日子里以易懂的形式将Linux虚拟\
+文件系统(VFS)层以及基于其上的各种文件系统如何工作呈现给大家。当前\
+可以看到下面的内容。
+
+文件系统
+========
+
+文件系统实现文档。
+
+.. toctree::
+ :maxdepth: 2
+
+ virtiofs
+
diff --git a/Documentation/translations/zh_CN/filesystems/virtiofs.rst b/Documentation/translations/zh_CN/filesystems/virtiofs.rst
new file mode 100644
index 000000000000..09bc9e012e2a
--- /dev/null
+++ b/Documentation/translations/zh_CN/filesystems/virtiofs.rst
@@ -0,0 +1,58 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: :ref:`Documentation/filesystems/virtiofs.rst <virtiofs_index>`
+
+译者
+::
+
+ 中文版维护者: 王文虎 Wang Wenhu <wenhu.wang@vivo.com>
+ 中文版翻译者: 王文虎 Wang Wenhu <wenhu.wang@vivo.com>
+ 中文版校译者: 王文虎 Wang Wenhu <wenhu.wang@vivo.com>
+
+===========================================
+virtiofs: virtio-fs 主机<->客机共享文件系统
+===========================================
+
+- Copyright (C) 2020 Vivo Communication Technology Co. Ltd.
+
+介绍
+====
+Linux的virtiofs文件系统实现了一个半虚拟化VIRTIO类型“virtio-fs”设备的驱动,通过该\
+类型设备实现客机<->主机文件系统共享。它允许客机挂载一个已经导出到主机的目录。
+
+客机通常需要访问主机或者远程系统上的文件。使用场景包括:在新客机安装时让文件对其\
+可见;从主机上的根文件系统启动;对无状态或临时客机提供持久存储和在客机之间共享目录。
+
+尽管在某些任务可能通过使用已有的网络文件系统完成,但是却需要非常难以自动化的配置\
+步骤,且将存储网络暴露给客机。而virtio-fs设备通过提供不经过网络的文件系统访问文件\
+的设计方式解决了这些问题。
+
+另外,virto-fs设备发挥了主客机共存的优点提高了性能,并且提供了网络文件系统所不具备
+的一些语义功能。
+
+用法
+====
+以``myfs``标签将文件系统挂载到``/mnt``:
+
+.. code-block:: sh
+
+ guest# mount -t virtiofs myfs /mnt
+
+请查阅 https://virtio-fs.gitlab.io/ 了解配置QEMU和virtiofsd守护程序的详细信息。
+
+内幕
+====
+由于virtio-fs设备将FUSE协议用于文件系统请求,因此Linux的virtiofs文件系统与FUSE文\
+件系统客户端紧密集成在一起。客机充当FUSE客户端而主机充当FUSE服务器,内核与用户空\
+间之间的/dev/fuse接口由virtio-fs设备接口代替。
+
+FUSE请求被置于虚拟队列中由主机处理。主机填充缓冲区中的响应部分,而客机处理请求的完成部分。
+
+将/dev/fuse映射到虚拟队列需要解决/dev/fuse和虚拟队列之间语义上的差异。每次读取\
+/dev/fuse设备时,FUSE客户端都可以选择要传输的请求,从而可以使某些请求优先于其他\
+请求。虚拟队列有其队列语义,无法更改已入队请求的顺序。在虚拟队列已满的情况下尤
+其关键,因为此时不可能加入高优先级的请求。为了解决此差异,virtio-fs设备采用“hiprio”\
+(高优先级)虚拟队列,专门用于有别于普通请求的高优先级请求。
+
diff --git a/Documentation/translations/zh_CN/index.rst b/Documentation/translations/zh_CN/index.rst
index d3165535ec9e..76850a5dd982 100644
--- a/Documentation/translations/zh_CN/index.rst
+++ b/Documentation/translations/zh_CN/index.rst
@@ -14,6 +14,7 @@
:maxdepth: 2
process/index
+ filesystems/index
目录和表格
----------
diff --git a/Documentation/translations/zh_CN/io_ordering.txt b/Documentation/translations/zh_CN/io_ordering.txt
index 1f8127bdd415..7bb3086227ae 100644
--- a/Documentation/translations/zh_CN/io_ordering.txt
+++ b/Documentation/translations/zh_CN/io_ordering.txt
@@ -1,4 +1,4 @@
-Chinese translated version of Documentation/io_ordering.txt
+Chinese translated version of Documentation/driver-api/io_ordering.rst
If you have any comment or update to the content, please contact the
original document maintainer directly. However, if you have a problem
@@ -8,7 +8,7 @@ or if there is a problem with the translation.
Chinese maintainer: Lin Yongting <linyongting@gmail.com>
---------------------------------------------------------------------
-Documentation/io_ordering.txt 的中文翻译
+Documentation/driver-api/io_ordering.rst 的中文翻译
如果想评论或更新本文的内容,请直接联系原文档的维护者。如果你使用英文
交流有困难的话,也可以向中文版维护者求助。如果本翻译更新不及时或者翻
diff --git a/Documentation/translations/zh_CN/process/5.Posting.rst b/Documentation/translations/zh_CN/process/5.Posting.rst
index 41aba21ff050..9ff9945f918c 100644
--- a/Documentation/translations/zh_CN/process/5.Posting.rst
+++ b/Documentation/translations/zh_CN/process/5.Posting.rst
@@ -5,7 +5,7 @@
.. _cn_development_posting:
-发送补丁
+发布补丁
========
迟早,当您的工作准备好提交给社区进行审查,并最终包含到主线内核中时。不出所料,
diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst
index 2e91370dc159..f759edafd938 100644
--- a/Documentation/userspace-api/ioctl/ioctl-number.rst
+++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
@@ -266,7 +266,6 @@ Code Seq# Include File Comments
'o' 01-A1 `linux/dvb/*.h` DVB
'p' 00-0F linux/phantom.h conflict! (OpenHaptics needs this)
'p' 00-1F linux/rtc.h conflict!
-'p' 00-3F linux/mc146818rtc.h conflict!
'p' 40-7F linux/nvram.h
'p' 80-9F linux/ppdev.h user-space parport
<mailto:tim@cyberelk.net>
diff --git a/Documentation/virt/kvm/amd-memory-encryption.rst b/Documentation/virt/kvm/amd-memory-encryption.rst
index d18c97b4e140..c3129b9ba5cb 100644
--- a/Documentation/virt/kvm/amd-memory-encryption.rst
+++ b/Documentation/virt/kvm/amd-memory-encryption.rst
@@ -53,6 +53,29 @@ key management interface to perform common hypervisor activities such as
encrypting bootstrap code, snapshot, migrating and debugging the guest. For more
information, see the SEV Key Management spec [api-spec]_
+The main ioctl to access SEV is KVM_MEM_ENCRYPT_OP. If the argument
+to KVM_MEM_ENCRYPT_OP is NULL, the ioctl returns 0 if SEV is enabled
+and ``ENOTTY` if it is disabled (on some older versions of Linux,
+the ioctl runs normally even with a NULL argument, and therefore will
+likely return ``EFAULT``). If non-NULL, the argument to KVM_MEM_ENCRYPT_OP
+must be a struct kvm_sev_cmd::
+
+ struct kvm_sev_cmd {
+ __u32 id;
+ __u64 data;
+ __u32 error;
+ __u32 sev_fd;
+ };
+
+
+The ``id`` field contains the subcommand, and the ``data`` field points to
+another struct containing arguments specific to command. The ``sev_fd``
+should point to a file descriptor that is opened on the ``/dev/sev``
+device, if needed (see individual commands).
+
+On output, ``error`` is zero on success, or an error code. Error codes
+are defined in ``<linux/psp-dev.h>`.
+
KVM implements the following commands to support common lifecycle events of SEV
guests, such as launching, running, snapshotting, migrating and decommissioning.
@@ -90,6 +113,8 @@ Returns: 0 on success, -negative on error
On success, the 'handle' field contains a new handle and on error, a negative value.
+KVM_SEV_LAUNCH_START requires the ``sev_fd`` field to be valid.
+
For more details, see SEV spec Section 6.2.
3. KVM_SEV_LAUNCH_UPDATE_DATA
diff --git a/Documentation/x86/exception-tables.rst b/Documentation/x86/exception-tables.rst
index ed6d4b0cf62c..81a393867f10 100644
--- a/Documentation/x86/exception-tables.rst
+++ b/Documentation/x86/exception-tables.rst
@@ -257,6 +257,9 @@ the fault, in our case the actual value is c0199ff5:
the original assembly code: > 3: movl $-14,%eax
and linked in vmlinux : > c0199ff5 <.fixup+10b5> movl $0xfffffff2,%eax
+If the fixup was able to handle the exception, control flow may be returned
+to the instruction after the one that triggered the fault, ie. local label 2b.
+
The assembly code::
> .section __ex_table,"a"
@@ -344,3 +347,14 @@ pointer which points to one of:
it as special.
More functions can easily be added.
+
+CONFIG_BUILDTIME_TABLE_SORT allows the __ex_table section to be sorted post
+link of the kernel image, via a host utility scripts/sorttable. It will set the
+symbol main_extable_sort_needed to 0, avoiding sorting the __ex_table section
+at boot time. With the exception table sorted, at runtime when an exception
+occurs we can quickly lookup the __ex_table entry via binary search.
+
+This is not just a boot time optimization, some architectures require this
+table to be sorted in order to handle exceptions relatively early in the boot
+process. For example, i386 makes use of this form of exception handling before
+paging support is even enabled!
diff --git a/Documentation/x86/intel-iommu.rst b/Documentation/x86/intel-iommu.rst
index 9dae6b47e398..099f13d51d5f 100644
--- a/Documentation/x86/intel-iommu.rst
+++ b/Documentation/x86/intel-iommu.rst
@@ -95,9 +95,10 @@ and any RMRR's processed::
When DMAR is enabled for use, you will notice..
PCI-DMA: Using DMAR IOMMU
+-------------------------
Fault reporting
----------------
+^^^^^^^^^^^^^^^
::
diff --git a/MAINTAINERS b/MAINTAINERS
index 89682b3c76ca..7ec848ae301c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -214,7 +214,7 @@ Q: http://patchwork.kernel.org/project/v9fs-devel/list/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/ericvh/v9fs.git
T: git git://github.com/martinetd/linux.git
S: Maintained
-F: Documentation/filesystems/9p.txt
+F: Documentation/filesystems/9p.rst
F: fs/9p/
F: net/9p/
F: include/net/9p/
@@ -584,7 +584,7 @@ AFFS FILE SYSTEM
M: David Sterba <dsterba@suse.com>
L: linux-fsdevel@vger.kernel.org
S: Odd Fixes
-F: Documentation/filesystems/affs.txt
+F: Documentation/filesystems/affs.rst
F: fs/affs/
AFS FILESYSTEM
@@ -593,7 +593,7 @@ L: linux-afs@lists.infradead.org
S: Supported
F: fs/afs/
F: include/trace/events/afs.h
-F: Documentation/filesystems/afs.txt
+F: Documentation/filesystems/afs.rst
W: https://www.infradead.org/~dhowells/kafs/
AGPGART DRIVER
@@ -3071,7 +3071,7 @@ M: Luis de Bethencourt <luisbg@kernel.org>
M: Salah Triki <salah.triki@gmail.com>
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/luisbg/linux-befs.git
-F: Documentation/filesystems/befs.txt
+F: Documentation/filesystems/befs.rst
F: fs/befs/
BFQ I/O SCHEDULER
@@ -3085,7 +3085,7 @@ F: Documentation/block/bfq-iosched.rst
BFS FILE SYSTEM
M: "Tigran A. Aivazian" <aivazian.tigran@gmail.com>
S: Maintained
-F: Documentation/filesystems/bfs.txt
+F: Documentation/filesystems/bfs.rst
F: fs/bfs/
F: include/uapi/linux/bfs_fs.h
@@ -3618,7 +3618,7 @@ W: http://btrfs.wiki.kernel.org/
Q: http://patchwork.kernel.org/project/linux-btrfs/list/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs.git
S: Maintained
-F: Documentation/filesystems/btrfs.txt
+F: Documentation/filesystems/btrfs.rst
F: fs/btrfs/
F: include/linux/btrfs*
F: include/uapi/linux/btrfs*
@@ -3915,7 +3915,7 @@ W: http://ceph.com/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git
T: git git://github.com/ceph/ceph-client.git
S: Supported
-F: Documentation/filesystems/ceph.txt
+F: Documentation/filesystems/ceph.rst
F: fs/ceph/
CERTIFICATE HANDLING
@@ -4081,7 +4081,6 @@ F: drivers/scsi/snic/
CISCO VIC ETHERNET NIC DRIVER
M: Christian Benvenuti <benve@cisco.com>
M: Govindarajulu Varadarajan <_govind@gmx.com>
-M: Parvi Kaustubhi <pkaustub@cisco.com>
S: Supported
F: drivers/net/ethernet/cisco/enic/
@@ -4432,7 +4431,7 @@ F: include/linux/cpuidle.h
CRAMFS FILESYSTEM
M: Nicolas Pitre <nico@fluxnic.net>
S: Maintained
-F: Documentation/filesystems/cramfs.txt
+F: Documentation/filesystems/cramfs.rst
F: fs/cramfs/
CREATIVE SB0540
@@ -4580,7 +4579,7 @@ F: drivers/infiniband/hw/cxgb4/
F: include/uapi/rdma/cxgb4-abi.h
CXGB4VF ETHERNET DRIVER (CXGB4VF)
-M: Casey Leedom <leedom@chelsio.com>
+M: Vishal Kulkarni <vishal@gmail.com>
L: netdev@vger.kernel.org
W: http://www.chelsio.com
S: Supported
@@ -5210,7 +5209,7 @@ M: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
R: "Rafael J. Wysocki" <rafael@kernel.org>
T: git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core.git
S: Supported
-F: Documentation/kobject.txt
+F: Documentation/core-api/kobject.rst
F: drivers/base/
F: fs/debugfs/
F: fs/sysfs/
@@ -5947,7 +5946,7 @@ W: http://ecryptfs.org
W: https://launchpad.net/ecryptfs
T: git git://git.kernel.org/pub/scm/linux/kernel/git/tyhicks/ecryptfs.git
S: Odd Fixes
-F: Documentation/filesystems/ecryptfs.txt
+F: Documentation/filesystems/ecryptfs.rst
F: fs/ecryptfs/
EDAC-AMD64
@@ -6007,6 +6006,12 @@ F: Documentation/driver-api/edac.rst
F: drivers/edac/
F: include/linux/edac.h
+EDAC-DMC520
+M: Lei Wang <lewan@microsoft.com>
+L: linux-edac@vger.kernel.org
+S: Supported
+F: drivers/edac/dmc520_edac.c
+
EDAC-E752X
M: Mark Gross <mark.gross@intel.com>
L: linux-edac@vger.kernel.org
@@ -6206,7 +6211,6 @@ S: Supported
F: drivers/scsi/be2iscsi/
Emulex 10Gbps NIC BE2, BE3-R, Lancer, Skyhawk-R DRIVER (be2net)
-M: Sathya Perla <sathya.perla@broadcom.com>
M: Ajit Khaparde <ajit.khaparde@broadcom.com>
M: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
M: Somnath Kotur <somnath.kotur@broadcom.com>
@@ -6258,12 +6262,12 @@ F: drivers/video/fbdev/s1d13xxxfb.c
F: include/video/s1d13xxxfb.h
EROFS FILE SYSTEM
-M: Gao Xiang <gaoxiang25@huawei.com>
+M: Gao Xiang <xiang@kernel.org>
M: Chao Yu <yuchao0@huawei.com>
L: linux-erofs@lists.ozlabs.org
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs.git
-F: Documentation/filesystems/erofs.txt
+F: Documentation/filesystems/erofs.rst
F: fs/erofs/
F: include/trace/events/erofs.h
@@ -6324,7 +6328,7 @@ EXT2 FILE SYSTEM
M: Jan Kara <jack@suse.com>
L: linux-ext4@vger.kernel.org
S: Maintained
-F: Documentation/filesystems/ext2.txt
+F: Documentation/filesystems/ext2.rst
F: fs/ext2/
F: include/linux/ext2*
@@ -6398,7 +6402,7 @@ L: linux-f2fs-devel@lists.sourceforge.net
W: https://f2fs.wiki.kernel.org/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git
S: Maintained
-F: Documentation/filesystems/f2fs.txt
+F: Documentation/filesystems/f2fs.rst
F: Documentation/ABI/testing/sysfs-fs-f2fs
F: fs/f2fs/
F: include/linux/f2fs_fs.h
@@ -6943,7 +6947,7 @@ S: Maintained
F: scripts/gcc-plugins/
F: scripts/gcc-plugin.sh
F: scripts/Makefile.gcc-plugins
-F: Documentation/core-api/gcc-plugins.rst
+F: Documentation/kbuild/gcc-plugins.rst
GASKET DRIVER FRAMEWORK
M: Rob Springer <rspringer@google.com>
@@ -7440,13 +7444,13 @@ F: drivers/infiniband/hw/hfi1
HFS FILESYSTEM
L: linux-fsdevel@vger.kernel.org
S: Orphan
-F: Documentation/filesystems/hfs.txt
+F: Documentation/filesystems/hfs.rst
F: fs/hfs/
HFSPLUS FILESYSTEM
L: linux-fsdevel@vger.kernel.org
S: Orphan
-F: Documentation/filesystems/hfsplus.txt
+F: Documentation/filesystems/hfsplus.rst
F: fs/hfsplus/
HGA FRAMEBUFFER DRIVER
@@ -7526,6 +7530,12 @@ F: include/uapi/linux/if_hippi.h
F: net/802/hippi.c
F: drivers/net/hippi/
+HISILICON DMA DRIVER
+M: Zhou Wang <wangzhou1@hisilicon.com>
+L: dmaengine@vger.kernel.org
+S: Maintained
+F: drivers/dma/hisi_dma.c
+
HISILICON SECURITY ENGINE V2 DRIVER (SEC2)
M: Zaibo Xu <xuzaibo@huawei.com>
L: linux-crypto@vger.kernel.org
@@ -7583,7 +7593,8 @@ F: Documentation/admin-guide/perf/hisi-pmu.rst
HISILICON ROCE DRIVER
M: Lijun Ou <oulijun@huawei.com>
-M: Wei Hu(Xavier) <xavier.huwei@huawei.com>
+M: Wei Hu(Xavier) <huwei87@hisilicon.com>
+M: Weihang Li <liweihang@huawei.com>
L: linux-rdma@vger.kernel.org
S: Maintained
F: drivers/infiniband/hw/hns/
@@ -8317,7 +8328,7 @@ M: Jan Kara <jack@suse.cz>
R: Amir Goldstein <amir73il@gmail.com>
L: linux-fsdevel@vger.kernel.org
S: Maintained
-F: Documentation/filesystems/inotify.txt
+F: Documentation/filesystems/inotify.rst
F: fs/notify/inotify/
F: include/linux/inotify.h
F: include/uapi/linux/inotify.h
@@ -8486,7 +8497,6 @@ L: dmaengine@vger.kernel.org
S: Supported
F: drivers/dma/idxd/*
F: include/uapi/linux/idxd.h
-F: include/linux/idxd.h
INTEL IDLE DRIVER
M: Jacob Pan <jacob.jun.pan@linux.intel.com>
@@ -8578,15 +8588,15 @@ M: Ashutosh Dixit <ashutosh.dixit@intel.com>
S: Supported
W: https://github.com/sudeepdutt/mic
W: http://software.intel.com/en-us/mic-developer
+F: Documentation/misc-devices/mic/
+F: drivers/dma/mic_x100_dma.c
+F: drivers/dma/mic_x100_dma.h
+F: drivers/misc/mic/
F: include/linux/mic_bus.h
F: include/linux/scif.h
F: include/uapi/linux/mic_common.h
F: include/uapi/linux/mic_ioctl.h
F: include/uapi/linux/scif_ioctl.h
-F: drivers/misc/mic/
-F: drivers/dma/mic_x100_dma.c
-F: drivers/dma/mic_x100_dma.h
-F: Documentation/mic/
INTEL PMC CORE DRIVER
M: Rajneesh Bhardwaj <rajneesh.bhardwaj@intel.com>
@@ -8693,7 +8703,7 @@ M: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
M: Luca Coelho <luciano.coelho@intel.com>
M: Intel Linux Wireless <linuxwifi@intel.com>
L: linux-wireless@vger.kernel.org
-W: http://intellinuxwireless.org
+W: https://wireless.wiki.kernel.org/en/users/drivers/iwlwifi
T: git git://git.kernel.org/pub/scm/linux/kernel/git/iwlwifi/iwlwifi.git
S: Supported
F: drivers/net/wireless/intel/iwlwifi/
@@ -9284,8 +9294,8 @@ L: keyrings@vger.kernel.org
S: Supported
F: Documentation/security/keys/trusted-encrypted.rst
F: include/keys/trusted-type.h
-F: security/keys/trusted.c
-F: include/keys/trusted.h
+F: include/keys/trusted_tpm.h
+F: security/keys/trusted-keys/
KEYS/KEYRINGS
M: David Howells <dhowells@redhat.com>
@@ -11127,7 +11137,7 @@ M: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
L: linux-mips@vger.kernel.org
W: http://www.linux-mips.org/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/mips/linux.git
-Q: http://patchwork.linux-mips.org/project/linux-mips/list/
+Q: https://patchwork.kernel.org/project/linux-mips/list/
S: Maintained
F: Documentation/devicetree/bindings/mips/
F: Documentation/mips/
@@ -11798,7 +11808,7 @@ W: https://nilfs.sourceforge.io/
W: https://nilfs.osdn.jp/
T: git git://github.com/konis/nilfs2.git
S: Supported
-F: Documentation/filesystems/nilfs2.txt
+F: Documentation/filesystems/nilfs2.rst
F: fs/nilfs2/
F: include/trace/events/nilfs2.h
F: include/uapi/linux/nilfs2_api.h
@@ -11907,7 +11917,7 @@ L: linux-ntfs-dev@lists.sourceforge.net
W: http://www.tuxera.com/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/aia21/ntfs.git
S: Supported
-F: Documentation/filesystems/ntfs.txt
+F: Documentation/filesystems/ntfs.rst
F: fs/ntfs/
NUBUS SUBSYSTEM
@@ -12253,7 +12263,7 @@ OMFS FILESYSTEM
M: Bob Copeland <me@bobcopeland.com>
L: linux-karma-devel@lists.sourceforge.net
S: Maintained
-F: Documentation/filesystems/omfs.txt
+F: Documentation/filesystems/omfs.rst
F: fs/omfs/
OMNIKEY CARDMAN 4000 DRIVER
@@ -12502,8 +12512,8 @@ M: Joseph Qi <joseph.qi@linux.alibaba.com>
L: ocfs2-devel@oss.oracle.com (moderated for non-subscribers)
W: http://ocfs2.wiki.kernel.org
S: Supported
-F: Documentation/filesystems/ocfs2.txt
-F: Documentation/filesystems/dlmfs.txt
+F: Documentation/filesystems/ocfs2.rst
+F: Documentation/filesystems/dlmfs.rst
F: fs/ocfs2/
ORANGEFS FILESYSTEM
@@ -12513,7 +12523,7 @@ L: devel@lists.orangefs.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/hubcap/linux.git
S: Supported
F: fs/orangefs/
-F: Documentation/filesystems/orangefs.txt
+F: Documentation/filesystems/orangefs.rst
ORINOCO DRIVER
L: linux-wireless@vger.kernel.org
@@ -13475,7 +13485,7 @@ S: Maintained
F: fs/proc/
F: include/linux/proc_fs.h
F: tools/testing/selftests/proc/
-F: Documentation/filesystems/proc.txt
+F: Documentation/filesystems/proc.rst
PROC SYSCTL
M: Luis Chamberlain <mcgrof@kernel.org>
@@ -15426,11 +15436,9 @@ F: drivers/infiniband/sw/siw/
F: include/uapi/rdma/siw-abi.h
SOFT-ROCE DRIVER (rxe)
-M: Moni Shoua <monis@mellanox.com>
+M: Zhu Yanjun <yanjunz@mellanox.com>
L: linux-rdma@vger.kernel.org
S: Supported
-W: https://github.com/SoftRoCE/rxe-dev/wiki/rxe-dev:-Home
-Q: http://patchwork.kernel.org/project/linux-rdma/list/
F: drivers/infiniband/sw/rxe/
F: include/uapi/rdma/rdma_user_rxe.h
@@ -15744,7 +15752,7 @@ L: squashfs-devel@lists.sourceforge.net (subscribers-only)
W: http://squashfs.org.uk
T: git git://git.kernel.org/pub/scm/linux/kernel/git/pkl/squashfs-next.git
S: Maintained
-F: Documentation/filesystems/squashfs.txt
+F: Documentation/filesystems/squashfs.rst
F: fs/squashfs/
SRM (Alpha) environment access
@@ -16189,7 +16197,7 @@ F: drivers/platform/x86/system76_acpi.c
SYSV FILESYSTEM
M: Christoph Hellwig <hch@infradead.org>
S: Maintained
-F: Documentation/filesystems/sysv-fs.txt
+F: Documentation/filesystems/sysv-fs.rst
F: fs/sysv/
F: include/linux/sysv_fs.h
@@ -16760,7 +16768,7 @@ Q: http://patchwork.linuxtv.org/project/linux-media/list/
S: Maintained
F: drivers/media/platform/ti-vpe/
F: Documentation/devicetree/bindings/media/ti,vpe.yaml
- Documentation/devicetree/bindings/media/ti,cal.yaml
+F: Documentation/devicetree/bindings/media/ti,cal.yaml
TI WILINK WIRELESS DRIVERS
L: linux-wireless@vger.kernel.org
@@ -17054,7 +17062,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/rw/ubifs.git next
T: git git://git.kernel.org/pub/scm/linux/kernel/git/rw/ubifs.git fixes
W: http://www.linux-mtd.infradead.org/doc/ubifs.html
S: Supported
-F: Documentation/filesystems/ubifs.txt
+F: Documentation/filesystems/ubifs.rst
F: fs/ubifs/
UCLINUX (M68KNOMMU AND COLDFIRE)
@@ -17073,7 +17081,7 @@ F: arch/m68k/include/asm/*_no.*
UDF FILESYSTEM
M: Jan Kara <jack@suse.com>
S: Maintained
-F: Documentation/filesystems/udf.txt
+F: Documentation/filesystems/udf.rst
F: fs/udf/
UDRAW TABLET
@@ -18512,7 +18520,7 @@ L: linux-fsdevel@vger.kernel.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/dlemoal/zonefs.git
S: Maintained
F: fs/zonefs/
-F: Documentation/filesystems/zonefs.txt
+F: Documentation/filesystems/zonefs.rst
ZPOOL COMPRESSED PAGE STORAGE API
M: Dan Streetman <ddstreet@ieee.org>
diff --git a/Makefile b/Makefile
index e25db579ce74..4d0711f54047 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
VERSION = 5
PATCHLEVEL = 6
SUBLEVEL = 0
-EXTRAVERSION = -rc5
+EXTRAVERSION =
NAME = Kleptomaniac Octopus
# *DOCUMENTATION*
@@ -1804,7 +1804,7 @@ existing-targets := $(wildcard $(sort $(targets)))
-include $(foreach f,$(existing-targets),$(dir $(f)).$(notdir $(f)).cmd)
-endif # config-targets
+endif # config-build
endif # mixed-build
endif # need-sub-make
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index ff2a393b635c..7124ab82dfa3 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -154,7 +154,7 @@ config ARC_CPU_HS
help
Support for ARC HS38x Cores based on ARCv2 ISA
The notable features are:
- - SMP configurations of upto 4 core with coherency
+ - SMP configurations of up to 4 cores with coherency
- Optional L2 Cache and IO-Coherency
- Revised Interrupt Architecture (multiple priorites, reg banks,
auto stack switch, auto regfile save/restore)
@@ -192,7 +192,7 @@ config ARC_SMP_HALT_ON_RESET
help
In SMP configuration cores can be configured as Halt-on-reset
or they could all start at same time. For Halt-on-reset, non
- masters are parked until Master kicks them so they can start of
+ masters are parked until Master kicks them so they can start off
at designated entry point. For other case, all jump to common
entry point and spin wait for Master's signal.
diff --git a/arch/arc/configs/nps_defconfig b/arch/arc/configs/nps_defconfig
index 07f26ed39f02..f7a978dfdf1d 100644
--- a/arch/arc/configs/nps_defconfig
+++ b/arch/arc/configs/nps_defconfig
@@ -21,8 +21,6 @@ CONFIG_MODULES=y
CONFIG_MODULE_FORCE_LOAD=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_IOSCHED_DEADLINE is not set
-# CONFIG_IOSCHED_CFQ is not set
CONFIG_ARC_PLAT_EZNPS=y
CONFIG_SMP=y
CONFIG_NR_CPUS=4096
diff --git a/arch/arc/configs/nsimosci_defconfig b/arch/arc/configs/nsimosci_defconfig
index 5dd470b6609e..bf39a0091679 100644
--- a/arch/arc/configs/nsimosci_defconfig
+++ b/arch/arc/configs/nsimosci_defconfig
@@ -20,8 +20,6 @@ CONFIG_ISA_ARCOMPACT=y
CONFIG_KPROBES=y
CONFIG_MODULES=y
# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_IOSCHED_DEADLINE is not set
-# CONFIG_IOSCHED_CFQ is not set
CONFIG_ARC_BUILTIN_DTB_NAME="nsimosci"
# CONFIG_COMPACTION is not set
CONFIG_NET=y
diff --git a/arch/arc/configs/nsimosci_hs_defconfig b/arch/arc/configs/nsimosci_hs_defconfig
index 3532e86f7bff..7121bd71c543 100644
--- a/arch/arc/configs/nsimosci_hs_defconfig
+++ b/arch/arc/configs/nsimosci_hs_defconfig
@@ -19,8 +19,6 @@ CONFIG_PERF_EVENTS=y
CONFIG_KPROBES=y
CONFIG_MODULES=y
# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_IOSCHED_DEADLINE is not set
-# CONFIG_IOSCHED_CFQ is not set
CONFIG_ISA_ARCV2=y
CONFIG_ARC_BUILTIN_DTB_NAME="nsimosci_hs"
# CONFIG_COMPACTION is not set
diff --git a/arch/arc/configs/nsimosci_hs_smp_defconfig b/arch/arc/configs/nsimosci_hs_smp_defconfig
index d90448bee064..f9863b294a70 100644
--- a/arch/arc/configs/nsimosci_hs_smp_defconfig
+++ b/arch/arc/configs/nsimosci_hs_smp_defconfig
@@ -14,8 +14,6 @@ CONFIG_PERF_EVENTS=y
CONFIG_KPROBES=y
CONFIG_MODULES=y
# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_IOSCHED_DEADLINE is not set
-# CONFIG_IOSCHED_CFQ is not set
CONFIG_ISA_ARCV2=y
CONFIG_SMP=y
# CONFIG_ARC_TIMERS_64BIT is not set
diff --git a/arch/arc/include/asm/fpu.h b/arch/arc/include/asm/fpu.h
index 64347250fdf5..006bcf88a7a5 100644
--- a/arch/arc/include/asm/fpu.h
+++ b/arch/arc/include/asm/fpu.h
@@ -43,6 +43,8 @@ extern void fpu_init_task(struct pt_regs *regs);
#endif /* !CONFIG_ISA_ARCOMPACT */
+struct task_struct;
+
extern void fpu_save_restore(struct task_struct *p, struct task_struct *n);
#else /* !CONFIG_ARC_FPU_SAVE_RESTORE */
diff --git a/arch/arc/include/asm/linkage.h b/arch/arc/include/asm/linkage.h
index d9ee43c6b7db..fe19f1d412e7 100644
--- a/arch/arc/include/asm/linkage.h
+++ b/arch/arc/include/asm/linkage.h
@@ -29,6 +29,8 @@
.endm
#define ASM_NL ` /* use '`' to mark new line in macro */
+#define __ALIGN .align 4
+#define __ALIGN_STR __stringify(__ALIGN)
/* annotation for data we want in DCCM - if enabled in .config */
.macro ARCFP_DATA nm
diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c
index e1c647490f00..aa41af6ef4ac 100644
--- a/arch/arc/kernel/setup.c
+++ b/arch/arc/kernel/setup.c
@@ -8,11 +8,11 @@
#include <linux/delay.h>
#include <linux/root_dev.h>
#include <linux/clk.h>
-#include <linux/clk-provider.h>
#include <linux/clocksource.h>
#include <linux/console.h>
#include <linux/module.h>
#include <linux/cpu.h>
+#include <linux/of_clk.h>
#include <linux/of_fdt.h>
#include <linux/of.h>
#include <linux/cache.h>
diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c
index b79886a6cec8..d2999503fb8a 100644
--- a/arch/arc/kernel/troubleshoot.c
+++ b/arch/arc/kernel/troubleshoot.c
@@ -104,8 +104,7 @@ static void show_faulting_vma(unsigned long address)
if (IS_ERR(nm))
nm = "?";
}
- pr_info(" @off 0x%lx in [%s]\n"
- " VMA: 0x%08lx to 0x%08lx\n",
+ pr_info(" @off 0x%lx in [%s] VMA: 0x%08lx to 0x%08lx\n",
vma->vm_start < TASK_UNMAPPED_BASE ?
address : address - vma->vm_start,
nm, vma->vm_start, vma->vm_end);
@@ -120,8 +119,6 @@ static void show_ecr_verbose(struct pt_regs *regs)
unsigned int vec, cause_code;
unsigned long address;
- pr_info("\n[ECR ]: 0x%08lx => ", regs->event);
-
/* For Data fault, this is data address not instruction addr */
address = current->thread.fault_address;
@@ -130,10 +127,10 @@ static void show_ecr_verbose(struct pt_regs *regs)
/* For DTLB Miss or ProtV, display the memory involved too */
if (vec == ECR_V_DTLB_MISS) {
- pr_cont("Invalid %s @ 0x%08lx by insn @ 0x%08lx\n",
+ pr_cont("Invalid %s @ 0x%08lx by insn @ %pS\n",
(cause_code == 0x01) ? "Read" :
((cause_code == 0x02) ? "Write" : "EX"),
- address, regs->ret);
+ address, (void *)regs->ret);
} else if (vec == ECR_V_ITLB_MISS) {
pr_cont("Insn could not be fetched\n");
} else if (vec == ECR_V_MACH_CHK) {
@@ -191,31 +188,31 @@ void show_regs(struct pt_regs *regs)
show_ecr_verbose(regs);
- pr_info("[EFA ]: 0x%08lx\n[BLINK ]: %pS\n[ERET ]: %pS\n",
- current->thread.fault_address,
- (void *)regs->blink, (void *)regs->ret);
-
if (user_mode(regs))
show_faulting_vma(regs->ret); /* faulting code, not data */
- pr_info("[STAT32]: 0x%08lx", regs->status32);
+ pr_info("ECR: 0x%08lx EFA: 0x%08lx ERET: 0x%08lx\n",
+ regs->event, current->thread.fault_address, regs->ret);
+
+ pr_info("STAT32: 0x%08lx", regs->status32);
#define STS_BIT(r, bit) r->status32 & STATUS_##bit##_MASK ? #bit" " : ""
#ifdef CONFIG_ISA_ARCOMPACT
- pr_cont(" : %2s%2s%2s%2s%2s%2s%2s\n",
+ pr_cont(" [%2s%2s%2s%2s%2s%2s%2s]",
(regs->status32 & STATUS_U_MASK) ? "U " : "K ",
STS_BIT(regs, DE), STS_BIT(regs, AE),
STS_BIT(regs, A2), STS_BIT(regs, A1),
STS_BIT(regs, E2), STS_BIT(regs, E1));
#else
- pr_cont(" : %2s%2s%2s%2s\n",
+ pr_cont(" [%2s%2s%2s%2s]",
STS_BIT(regs, IE),
(regs->status32 & STATUS_U_MASK) ? "U " : "K ",
STS_BIT(regs, DE), STS_BIT(regs, AE));
#endif
- pr_info("BTA: 0x%08lx\t SP: 0x%08lx\t FP: 0x%08lx\n",
- regs->bta, regs->sp, regs->fp);
+ pr_cont(" BTA: 0x%08lx\n", regs->bta);
+ pr_info("BLK: %pS\n SP: 0x%08lx FP: 0x%08lx\n",
+ (void *)regs->blink, regs->sp, regs->fp);
pr_info("LPS: 0x%08lx\tLPE: 0x%08lx\tLPC: 0x%08lx\n",
regs->lp_start, regs->lp_end, regs->lp_count);
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index db857d07114f..1fc32b611f8a 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -307,13 +307,15 @@ endif
ifeq ($(CONFIG_STACKPROTECTOR_PER_TASK),y)
prepare: stack_protector_prepare
stack_protector_prepare: prepare0
- $(eval KBUILD_CFLAGS += \
+ $(eval SSP_PLUGIN_CFLAGS := \
-fplugin-arg-arm_ssp_per_task_plugin-tso=$(shell \
awk '{if ($$2 == "THREAD_SZ_ORDER") print $$3;}'\
include/generated/asm-offsets.h) \
-fplugin-arg-arm_ssp_per_task_plugin-offset=$(shell \
awk '{if ($$2 == "TI_STACK_CANARY") print $$3;}'\
include/generated/asm-offsets.h))
+ $(eval KBUILD_CFLAGS += $(SSP_PLUGIN_CFLAGS))
+ $(eval GCC_PLUGINS_CFLAGS += $(SSP_PLUGIN_CFLAGS))
endif
all: $(notdir $(KBUILD_IMAGE))
diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile
index da599c3a1193..9c11e7490292 100644
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@@ -101,7 +101,6 @@ clean-files += piggy_data lib1funcs.S ashldi3.S bswapsdi2.S \
$(libfdt) $(libfdt_hdrs) hyp-stub.S
KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
-KBUILD_CFLAGS += $(DISABLE_ARM_SSP_PER_TASK_PLUGIN)
ifeq ($(CONFIG_FUNCTION_TRACER),y)
ORIG_CFLAGS := $(KBUILD_CFLAGS)
@@ -117,7 +116,8 @@ CFLAGS_fdt_ro.o := $(nossp-flags-y)
CFLAGS_fdt_rw.o := $(nossp-flags-y)
CFLAGS_fdt_wip.o := $(nossp-flags-y)
-ccflags-y := -fpic $(call cc-option,-mno-single-pic-base,) -fno-builtin -I$(obj)
+ccflags-y := -fpic $(call cc-option,-mno-single-pic-base,) -fno-builtin \
+ -I$(obj) $(DISABLE_ARM_SSP_PER_TASK_PLUGIN)
asflags-y := -DZIMAGE
# Supply kernel BSS size to the decompressor via a linker symbol.
diff --git a/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts b/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts
index b75af21069f9..4c3f606e5b8d 100644
--- a/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts
+++ b/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts
@@ -112,6 +112,7 @@
&sdhci {
#address-cells = <1>;
#size-cells = <0>;
+ pinctrl-names = "default";
pinctrl-0 = <&emmc_gpio34 &gpclk2_gpio43>;
bus-width = <4>;
mmc-pwrseq = <&wifi_pwrseq>;
diff --git a/arch/arm/boot/dts/bcm2835-rpi.dtsi b/arch/arm/boot/dts/bcm2835-rpi.dtsi
index 394c8a71b13b..fd2c766e0f71 100644
--- a/arch/arm/boot/dts/bcm2835-rpi.dtsi
+++ b/arch/arm/boot/dts/bcm2835-rpi.dtsi
@@ -15,6 +15,7 @@
firmware: firmware {
compatible = "raspberrypi,bcm2835-firmware", "simple-bus";
mboxes = <&mailbox>;
+ dma-ranges;
};
power: power {
diff --git a/arch/arm/boot/dts/dm8148-evm.dts b/arch/arm/boot/dts/dm8148-evm.dts
index 3931fb068ff0..91d1018ab75f 100644
--- a/arch/arm/boot/dts/dm8148-evm.dts
+++ b/arch/arm/boot/dts/dm8148-evm.dts
@@ -24,12 +24,12 @@
&cpsw_emac0 {
phy-handle = <&ethphy0>;
- phy-mode = "rgmii";
+ phy-mode = "rgmii-id";
};
&cpsw_emac1 {
phy-handle = <&ethphy1>;
- phy-mode = "rgmii";
+ phy-mode = "rgmii-id";
};
&davinci_mdio {
diff --git a/arch/arm/boot/dts/dm8148-t410.dts b/arch/arm/boot/dts/dm8148-t410.dts
index 9e43d5ec0bb2..79ccdd4470f4 100644
--- a/arch/arm/boot/dts/dm8148-t410.dts
+++ b/arch/arm/boot/dts/dm8148-t410.dts
@@ -33,12 +33,12 @@
&cpsw_emac0 {
phy-handle = <&ethphy0>;
- phy-mode = "rgmii";
+ phy-mode = "rgmii-id";
};
&cpsw_emac1 {
phy-handle = <&ethphy1>;
- phy-mode = "rgmii";
+ phy-mode = "rgmii-id";
};
&davinci_mdio {
diff --git a/arch/arm/boot/dts/dra62x-j5eco-evm.dts b/arch/arm/boot/dts/dra62x-j5eco-evm.dts
index 861ab90a3f3a..c16e183822be 100644
--- a/arch/arm/boot/dts/dra62x-j5eco-evm.dts
+++ b/arch/arm/boot/dts/dra62x-j5eco-evm.dts
@@ -24,12 +24,12 @@
&cpsw_emac0 {
phy-handle = <&ethphy0>;
- phy-mode = "rgmii";
+ phy-mode = "rgmii-id";
};
&cpsw_emac1 {
phy-handle = <&ethphy1>;
- phy-mode = "rgmii";
+ phy-mode = "rgmii-id";
};
&davinci_mdio {
diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi
index 4305051bb769..5f5ee16f07a3 100644
--- a/arch/arm/boot/dts/dra7.dtsi
+++ b/arch/arm/boot/dts/dra7.dtsi
@@ -148,6 +148,7 @@
#address-cells = <1>;
#size-cells = <1>;
ranges = <0x0 0x0 0x0 0xc0000000>;
+ dma-ranges = <0x80000000 0x0 0x80000000 0x80000000>;
ti,hwmods = "l3_main_1", "l3_main_2";
reg = <0x0 0x44000000 0x0 0x1000000>,
<0x0 0x45000000 0x0 0x1000>;
diff --git a/arch/arm/boot/dts/exynos4412-galaxy-s3.dtsi b/arch/arm/boot/dts/exynos4412-galaxy-s3.dtsi
index 31719c079d67..44f97546dd0a 100644
--- a/arch/arm/boot/dts/exynos4412-galaxy-s3.dtsi
+++ b/arch/arm/boot/dts/exynos4412-galaxy-s3.dtsi
@@ -33,7 +33,7 @@
};
};
- lcd_vdd3_reg: voltage-regulator-6 {
+ lcd_vdd3_reg: voltage-regulator-7 {
compatible = "regulator-fixed";
regulator-name = "LCD_VDD_2.2V";
regulator-min-microvolt = <2200000>;
@@ -42,7 +42,7 @@
enable-active-high;
};
- ps_als_reg: voltage-regulator-7 {
+ ps_als_reg: voltage-regulator-8 {
compatible = "regulator-fixed";
regulator-name = "LED_A_3.0V";
regulator-min-microvolt = <3000000>;
diff --git a/arch/arm/boot/dts/exynos4412-n710x.dts b/arch/arm/boot/dts/exynos4412-n710x.dts
index 98cd1284cd90..4189e1fb204c 100644
--- a/arch/arm/boot/dts/exynos4412-n710x.dts
+++ b/arch/arm/boot/dts/exynos4412-n710x.dts
@@ -13,7 +13,7 @@
/* bootargs are passed in by bootloader */
- cam_vdda_reg: voltage-regulator-6 {
+ cam_vdda_reg: voltage-regulator-7 {
compatible = "regulator-fixed";
regulator-name = "CAM_SENSOR_CORE_1.2V";
regulator-min-microvolt = <1200000>;
diff --git a/arch/arm/boot/dts/imx6qdl-phytec-phycore-som.dtsi b/arch/arm/boot/dts/imx6qdl-phytec-phycore-som.dtsi
index 4d18952658f8..77d871340eb7 100644
--- a/arch/arm/boot/dts/imx6qdl-phytec-phycore-som.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-phytec-phycore-som.dtsi
@@ -112,7 +112,7 @@
regulators {
vdd_arm: buck1 {
regulator-name = "vdd_arm";
- regulator-min-microvolt = <730000>;
+ regulator-min-microvolt = <925000>;
regulator-max-microvolt = <1380000>;
regulator-initial-mode = <DA9063_BUCK_MODE_SYNC>;
regulator-always-on;
@@ -120,7 +120,7 @@
vdd_soc: buck2 {
regulator-name = "vdd_soc";
- regulator-min-microvolt = <730000>;
+ regulator-min-microvolt = <1150000>;
regulator-max-microvolt = <1380000>;
regulator-initial-mode = <DA9063_BUCK_MODE_SYNC>;
regulator-always-on;
diff --git a/arch/arm/boot/dts/motorola-mapphone-common.dtsi b/arch/arm/boot/dts/motorola-mapphone-common.dtsi
index b6e82b165f5c..9067e0ef4240 100644
--- a/arch/arm/boot/dts/motorola-mapphone-common.dtsi
+++ b/arch/arm/boot/dts/motorola-mapphone-common.dtsi
@@ -429,7 +429,7 @@
reset-gpios = <&gpio6 13 GPIO_ACTIVE_HIGH>; /* gpio173 */
/* gpio_183 with sys_nirq2 pad as wakeup */
- interrupts-extended = <&gpio6 23 IRQ_TYPE_EDGE_FALLING>,
+ interrupts-extended = <&gpio6 23 IRQ_TYPE_LEVEL_LOW>,
<&omap4_pmx_core 0x160>;
interrupt-names = "irq", "wakeup";
wakeup-source;
diff --git a/arch/arm/boot/dts/omap3-n900.dts b/arch/arm/boot/dts/omap3-n900.dts
index c3c6d7d04a76..4089d97405c9 100644
--- a/arch/arm/boot/dts/omap3-n900.dts
+++ b/arch/arm/boot/dts/omap3-n900.dts
@@ -854,34 +854,46 @@
compatible = "ti,omap2-onenand";
reg = <0 0 0x20000>; /* CS0, offset 0, IO size 128K */
+ /*
+ * These timings are based on CONFIG_OMAP_GPMC_DEBUG=y reported
+ * bootloader set values when booted with v5.1
+ * (OneNAND Manufacturer: Samsung):
+ *
+ * cs0 GPMC_CS_CONFIG1: 0xfb001202
+ * cs0 GPMC_CS_CONFIG2: 0x00111100
+ * cs0 GPMC_CS_CONFIG3: 0x00020200
+ * cs0 GPMC_CS_CONFIG4: 0x11001102
+ * cs0 GPMC_CS_CONFIG5: 0x03101616
+ * cs0 GPMC_CS_CONFIG6: 0x90060000
+ */
gpmc,sync-read;
gpmc,sync-write;
gpmc,burst-length = <16>;
gpmc,burst-read;
gpmc,burst-wrap;
gpmc,burst-write;
- gpmc,device-width = <2>; /* GPMC_DEVWIDTH_16BIT */
- gpmc,mux-add-data = <2>; /* GPMC_MUX_AD */
+ gpmc,device-width = <2>;
+ gpmc,mux-add-data = <2>;
gpmc,cs-on-ns = <0>;
- gpmc,cs-rd-off-ns = <87>;
- gpmc,cs-wr-off-ns = <87>;
+ gpmc,cs-rd-off-ns = <102>;
+ gpmc,cs-wr-off-ns = <102>;
gpmc,adv-on-ns = <0>;
- gpmc,adv-rd-off-ns = <10>;
- gpmc,adv-wr-off-ns = <10>;
- gpmc,oe-on-ns = <15>;
- gpmc,oe-off-ns = <87>;
+ gpmc,adv-rd-off-ns = <12>;
+ gpmc,adv-wr-off-ns = <12>;
+ gpmc,oe-on-ns = <12>;
+ gpmc,oe-off-ns = <102>;
gpmc,we-on-ns = <0>;
- gpmc,we-off-ns = <87>;
- gpmc,rd-cycle-ns = <112>;
- gpmc,wr-cycle-ns = <112>;
- gpmc,access-ns = <81>;
- gpmc,page-burst-access-ns = <15>;
+ gpmc,we-off-ns = <102>;
+ gpmc,rd-cycle-ns = <132>;
+ gpmc,wr-cycle-ns = <132>;
+ gpmc,access-ns = <96>;
+ gpmc,page-burst-access-ns = <18>;
gpmc,bus-turnaround-ns = <0>;
gpmc,cycle2cycle-delay-ns = <0>;
gpmc,wait-monitoring-ns = <0>;
- gpmc,clk-activation-ns = <5>;
- gpmc,wr-data-mux-bus-ns = <30>;
- gpmc,wr-access-ns = <81>;
+ gpmc,clk-activation-ns = <6>;
+ gpmc,wr-data-mux-bus-ns = <36>;
+ gpmc,wr-access-ns = <96>;
gpmc,sync-clk-ps = <15000>;
/*
diff --git a/arch/arm/boot/dts/omap5.dtsi b/arch/arm/boot/dts/omap5.dtsi
index d0ecf54d5a23..a7562d3deb1a 100644
--- a/arch/arm/boot/dts/omap5.dtsi
+++ b/arch/arm/boot/dts/omap5.dtsi
@@ -143,6 +143,7 @@
#address-cells = <1>;
#size-cells = <1>;
ranges = <0 0 0 0xc0000000>;
+ dma-ranges = <0x80000000 0x0 0x80000000 0x80000000>;
ti,hwmods = "l3_main_1", "l3_main_2", "l3_main_3";
reg = <0 0x44000000 0 0x2000>,
<0 0x44800000 0 0x3000>,
diff --git a/arch/arm/boot/dts/ox810se.dtsi b/arch/arm/boot/dts/ox810se.dtsi
index 9f6c2b660ed3..0755e5864c4a 100644
--- a/arch/arm/boot/dts/ox810se.dtsi
+++ b/arch/arm/boot/dts/ox810se.dtsi
@@ -323,8 +323,8 @@
interrupt-controller;
reg = <0 0x200>;
#interrupt-cells = <1>;
- valid-mask = <0xFFFFFFFF>;
- clear-mask = <0>;
+ valid-mask = <0xffffffff>;
+ clear-mask = <0xffffffff>;
};
timer0: timer@200 {
diff --git a/arch/arm/boot/dts/ox820.dtsi b/arch/arm/boot/dts/ox820.dtsi
index c9b327732063..90846a7655b4 100644
--- a/arch/arm/boot/dts/ox820.dtsi
+++ b/arch/arm/boot/dts/ox820.dtsi
@@ -240,8 +240,8 @@
reg = <0 0x200>;
interrupts = <GIC_SPI 5 IRQ_TYPE_LEVEL_HIGH>;
#interrupt-cells = <1>;
- valid-mask = <0xFFFFFFFF>;
- clear-mask = <0>;
+ valid-mask = <0xffffffff>;
+ clear-mask = <0xffffffff>;
};
timer0: timer@200 {
diff --git a/arch/arm/boot/dts/sun8i-a33.dtsi b/arch/arm/boot/dts/sun8i-a33.dtsi
index 1532a0e59af4..a2c37adacf77 100644
--- a/arch/arm/boot/dts/sun8i-a33.dtsi
+++ b/arch/arm/boot/dts/sun8i-a33.dtsi
@@ -215,7 +215,7 @@
};
crypto: crypto-engine@1c15000 {
- compatible = "allwinner,sun4i-a10-crypto";
+ compatible = "allwinner,sun8i-a33-crypto";
reg = <0x01c15000 0x1000>;
interrupts = <GIC_SPI 80 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&ccu CLK_BUS_SS>, <&ccu CLK_SS>;
diff --git a/arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts b/arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts
index 2fd31a0a0b34..e8b3669e0e5d 100644
--- a/arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts
+++ b/arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts
@@ -374,8 +374,8 @@
};
&reg_dldo3 {
- regulator-min-microvolt = <2800000>;
- regulator-max-microvolt = <2800000>;
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
regulator-name = "vdd-csi";
};
@@ -498,7 +498,8 @@
};
&usbphy {
- usb0_id_det-gpios = <&pio 7 11 GPIO_ACTIVE_HIGH>; /* PH11 */
+ usb0_id_det-gpios = <&pio 7 11 (GPIO_ACTIVE_HIGH | GPIO_PULL_UP)>; /* PH11 */
+ usb0_vbus_power-supply = <&usb_power_supply>;
usb0_vbus-supply = <&reg_drivevbus>;
usb1_vbus-supply = <&reg_vmain>;
usb2_vbus-supply = <&reg_vmain>;
diff --git a/arch/arm/boot/dts/sun8i-a83t.dtsi b/arch/arm/boot/dts/sun8i-a83t.dtsi
index 74ac7ee9383c..e7b9bef1be6b 100644
--- a/arch/arm/boot/dts/sun8i-a83t.dtsi
+++ b/arch/arm/boot/dts/sun8i-a83t.dtsi
@@ -1006,10 +1006,10 @@
reg = <0x01c30000 0x104>;
interrupts = <GIC_SPI 82 IRQ_TYPE_LEVEL_HIGH>;
interrupt-names = "macirq";
- resets = <&ccu CLK_BUS_EMAC>;
- reset-names = "stmmaceth";
- clocks = <&ccu RST_BUS_EMAC>;
+ clocks = <&ccu CLK_BUS_EMAC>;
clock-names = "stmmaceth";
+ resets = <&ccu RST_BUS_EMAC>;
+ reset-names = "stmmaceth";
status = "disabled";
mdio: mdio {
diff --git a/arch/arm/boot/dts/sun8i-r40.dtsi b/arch/arm/boot/dts/sun8i-r40.dtsi
index 8f09a24b36ec..a9d5d6ddbd71 100644
--- a/arch/arm/boot/dts/sun8i-r40.dtsi
+++ b/arch/arm/boot/dts/sun8i-r40.dtsi
@@ -181,6 +181,32 @@
interrupts = <GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>;
};
+ spi0: spi@1c05000 {
+ compatible = "allwinner,sun8i-r40-spi",
+ "allwinner,sun8i-h3-spi";
+ reg = <0x01c05000 0x1000>;
+ interrupts = <GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&ccu CLK_BUS_SPI0>, <&ccu CLK_SPI0>;
+ clock-names = "ahb", "mod";
+ resets = <&ccu RST_BUS_SPI0>;
+ status = "disabled";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
+
+ spi1: spi@1c06000 {
+ compatible = "allwinner,sun8i-r40-spi",
+ "allwinner,sun8i-h3-spi";
+ reg = <0x01c06000 0x1000>;
+ interrupts = <GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&ccu CLK_BUS_SPI1>, <&ccu CLK_SPI1>;
+ clock-names = "ahb", "mod";
+ resets = <&ccu RST_BUS_SPI1>;
+ status = "disabled";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
+
csi0: csi@1c09000 {
compatible = "allwinner,sun8i-r40-csi0",
"allwinner,sun7i-a20-csi0";
@@ -290,6 +316,29 @@
resets = <&ccu RST_BUS_CE>;
};
+ spi2: spi@1c17000 {
+ compatible = "allwinner,sun8i-r40-spi",
+ "allwinner,sun8i-h3-spi";
+ reg = <0x01c17000 0x1000>;
+ interrupts = <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&ccu CLK_BUS_SPI2>, <&ccu CLK_SPI2>;
+ clock-names = "ahb", "mod";
+ resets = <&ccu RST_BUS_SPI2>;
+ status = "disabled";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
+
+ ahci: sata@1c18000 {
+ compatible = "allwinner,sun8i-r40-ahci";
+ reg = <0x01c18000 0x1000>;
+ interrupts = <GIC_SPI 56 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&ccu CLK_BUS_SATA>, <&ccu CLK_SATA>;
+ resets = <&ccu RST_BUS_SATA>;
+ reset-names = "ahci";
+ status = "disabled";
+ };
+
ehci1: usb@1c19000 {
compatible = "allwinner,sun8i-r40-ehci", "generic-ehci";
reg = <0x01c19000 0x100>;
@@ -336,6 +385,19 @@
status = "disabled";
};
+ spi3: spi@1c1f000 {
+ compatible = "allwinner,sun8i-r40-spi",
+ "allwinner,sun8i-h3-spi";
+ reg = <0x01c1f000 0x1000>;
+ interrupts = <GIC_SPI 50 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&ccu CLK_BUS_SPI3>, <&ccu CLK_SPI3>;
+ clock-names = "ahb", "mod";
+ resets = <&ccu RST_BUS_SPI3>;
+ status = "disabled";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
+
ccu: clock@1c20000 {
compatible = "allwinner,sun8i-r40-ccu";
reg = <0x01c20000 0x400>;
@@ -653,69 +715,6 @@
#size-cells = <0>;
};
- spi0: spi@1c05000 {
- compatible = "allwinner,sun8i-r40-spi",
- "allwinner,sun8i-h3-spi";
- reg = <0x01c05000 0x1000>;
- interrupts = <GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&ccu CLK_BUS_SPI0>, <&ccu CLK_SPI0>;
- clock-names = "ahb", "mod";
- resets = <&ccu RST_BUS_SPI0>;
- status = "disabled";
- #address-cells = <1>;
- #size-cells = <0>;
- };
-
- spi1: spi@1c06000 {
- compatible = "allwinner,sun8i-r40-spi",
- "allwinner,sun8i-h3-spi";
- reg = <0x01c06000 0x1000>;
- interrupts = <GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&ccu CLK_BUS_SPI1>, <&ccu CLK_SPI1>;
- clock-names = "ahb", "mod";
- resets = <&ccu RST_BUS_SPI1>;
- status = "disabled";
- #address-cells = <1>;
- #size-cells = <0>;
- };
-
- spi2: spi@1c07000 {
- compatible = "allwinner,sun8i-r40-spi",
- "allwinner,sun8i-h3-spi";
- reg = <0x01c07000 0x1000>;
- interrupts = <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&ccu CLK_BUS_SPI2>, <&ccu CLK_SPI2>;
- clock-names = "ahb", "mod";
- resets = <&ccu RST_BUS_SPI2>;
- status = "disabled";
- #address-cells = <1>;
- #size-cells = <0>;
- };
-
- spi3: spi@1c0f000 {
- compatible = "allwinner,sun8i-r40-spi",
- "allwinner,sun8i-h3-spi";
- reg = <0x01c0f000 0x1000>;
- interrupts = <GIC_SPI 50 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&ccu CLK_BUS_SPI3>, <&ccu CLK_SPI3>;
- clock-names = "ahb", "mod";
- resets = <&ccu RST_BUS_SPI3>;
- status = "disabled";
- #address-cells = <1>;
- #size-cells = <0>;
- };
-
- ahci: sata@1c18000 {
- compatible = "allwinner,sun8i-r40-ahci";
- reg = <0x01c18000 0x1000>;
- interrupts = <GIC_SPI 56 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&ccu CLK_BUS_SATA>, <&ccu CLK_SATA>;
- resets = <&ccu RST_BUS_SATA>;
- reset-names = "ahci";
- status = "disabled";
-
- };
-
gmac: ethernet@1c50000 {
compatible = "allwinner,sun8i-r40-gmac";
syscon = <&ccu>;
diff --git a/arch/arm/include/asm/floppy.h b/arch/arm/include/asm/floppy.h
index f4fe4d02cef2..79fa327238e8 100644
--- a/arch/arm/include/asm/floppy.h
+++ b/arch/arm/include/asm/floppy.h
@@ -8,16 +8,18 @@
*/
#ifndef __ASM_ARM_FLOPPY_H
#define __ASM_ARM_FLOPPY_H
-#if 0
-#include <mach/floppy.h>
-#endif
-#define fd_outb(val,port) \
- do { \
- if ((port) == (u32)FD_DOR) \
- fd_setdor((val)); \
- else \
- outb((val),(port)); \
+#define fd_outb(val,port) \
+ do { \
+ int new_val = (val); \
+ if (((port) & 7) == FD_DOR) { \
+ if (new_val & 0xf0) \
+ new_val = (new_val & 0x0c) | \
+ floppy_selects[new_val & 3]; \
+ else \
+ new_val &= 0x0c; \
+ } \
+ outb(new_val, (port)); \
} while(0)
#define fd_inb(port) inb((port))
@@ -53,69 +55,7 @@ static inline int fd_dma_setup(void *data, unsigned int length,
* to a non-zero track, and then restoring it to track 0. If an error occurs,
* then there is no floppy drive present. [to be put back in again]
*/
-static unsigned char floppy_selects[2][4] =
-{
- { 0x10, 0x21, 0x23, 0x33 },
- { 0x10, 0x21, 0x23, 0x33 }
-};
-
-#define fd_setdor(dor) \
-do { \
- int new_dor = (dor); \
- if (new_dor & 0xf0) \
- new_dor = (new_dor & 0x0c) | floppy_selects[fdc][new_dor & 3]; \
- else \
- new_dor &= 0x0c; \
- outb(new_dor, FD_DOR); \
-} while (0)
-
-/*
- * Someday, we'll automatically detect which drives are present...
- */
-static inline void fd_scandrives (void)
-{
-#if 0
- int floppy, drive_count;
-
- fd_disable_irq();
- raw_cmd = &default_raw_cmd;
- raw_cmd->flags = FD_RAW_SPIN | FD_RAW_NEED_SEEK;
- raw_cmd->track = 0;
- raw_cmd->rate = ?;
- drive_count = 0;
- for (floppy = 0; floppy < 4; floppy ++) {
- current_drive = drive_count;
- /*
- * Turn on floppy motor
- */
- if (start_motor(redo_fd_request))
- continue;
- /*
- * Set up FDC
- */
- fdc_specify();
- /*
- * Tell FDC to recalibrate
- */
- output_byte(FD_RECALIBRATE);
- LAST_OUT(UNIT(floppy));
- /* wait for command to complete */
- if (!successful) {
- int i;
- for (i = drive_count; i < 3; i--)
- floppy_selects[fdc][i] = floppy_selects[fdc][i + 1];
- floppy_selects[fdc][3] = 0;
- floppy -= 1;
- } else
- drive_count++;
- }
-#else
- floppy_selects[0][0] = 0x10;
- floppy_selects[0][1] = 0x21;
- floppy_selects[0][2] = 0x23;
- floppy_selects[0][3] = 0x33;
-#endif
-}
+static unsigned char floppy_selects[4] = { 0x10, 0x21, 0x23, 0x33 };
#define FDC1 (0x3f0)
@@ -135,9 +75,7 @@ static inline void fd_scandrives (void)
*/
static void driveswap(int *ints, int dummy, int dummy2)
{
- floppy_selects[0][0] ^= floppy_selects[0][1];
- floppy_selects[0][1] ^= floppy_selects[0][0];
- floppy_selects[0][0] ^= floppy_selects[0][1];
+ swap(floppy_selects[0], floppy_selects[1]);
}
#define EXTRA_FLOPPY_PARAMS ,{ "driveswap", &driveswap, NULL, 0, 0 }
diff --git a/arch/arm/kernel/vdso.c b/arch/arm/kernel/vdso.c
index c89ac1b9d28b..e0330a25e1c6 100644
--- a/arch/arm/kernel/vdso.c
+++ b/arch/arm/kernel/vdso.c
@@ -95,6 +95,8 @@ static bool __init cntvct_functional(void)
*/
np = of_find_compatible_node(NULL, NULL, "arm,armv7-timer");
if (!np)
+ np = of_find_compatible_node(NULL, NULL, "arm,armv8-timer");
+ if (!np)
goto out_put;
if (of_property_read_bool(np, "arm,cpu-registers-not-fw-configured"))
diff --git a/arch/arm/lib/copy_from_user.S b/arch/arm/lib/copy_from_user.S
index 95b2e1ce559c..f8016e3db65d 100644
--- a/arch/arm/lib/copy_from_user.S
+++ b/arch/arm/lib/copy_from_user.S
@@ -118,7 +118,7 @@ ENTRY(arm_copy_from_user)
ENDPROC(arm_copy_from_user)
- .pushsection .fixup,"ax"
+ .pushsection .text.fixup,"ax"
.align 0
copy_abort_preamble
ldmfd sp!, {r1, r2, r3}
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
index 0bf375ec959b..55b71bb4baf8 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
@@ -53,7 +53,7 @@
* PSCI node is not added default, U-boot will add missing
* parts if it determines to use PSCI.
*/
- entry-method = "arm,psci";
+ entry-method = "psci";
CPU_PW20: cpu-pw20 {
compatible = "arm,idle-state";
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1043-post.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1043-post.dtsi
index 6082ae022136..d237162a8744 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1043-post.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1043-post.dtsi
@@ -20,6 +20,8 @@
};
&fman0 {
+ fsl,erratum-a050385;
+
/* these aliases provide the FMan ports mapping */
enet0: ethernet@e0000 {
};
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1043a-rdb.dts b/arch/arm64/boot/dts/freescale/fsl-ls1043a-rdb.dts
index 4223a2352d45..dde50c88f5e3 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1043a-rdb.dts
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1043a-rdb.dts
@@ -119,12 +119,12 @@
ethernet@e4000 {
phy-handle = <&rgmii_phy1>;
- phy-connection-type = "rgmii-txid";
+ phy-connection-type = "rgmii-id";
};
ethernet@e6000 {
phy-handle = <&rgmii_phy2>;
- phy-connection-type = "rgmii-txid";
+ phy-connection-type = "rgmii-id";
};
ethernet@e8000 {
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1046a-rdb.dts b/arch/arm64/boot/dts/freescale/fsl-ls1046a-rdb.dts
index dbc23d6cd3b4..d53ccc56bb63 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1046a-rdb.dts
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1046a-rdb.dts
@@ -131,12 +131,12 @@
&fman0 {
ethernet@e4000 {
phy-handle = <&rgmii_phy1>;
- phy-connection-type = "rgmii";
+ phy-connection-type = "rgmii-id";
};
ethernet@e6000 {
phy-handle = <&rgmii_phy2>;
- phy-connection-type = "rgmii";
+ phy-connection-type = "rgmii-id";
};
ethernet@e8000 {
diff --git a/arch/arm64/boot/dts/sprd/sc9863a.dtsi b/arch/arm64/boot/dts/sprd/sc9863a.dtsi
index cd80756c888d..2c590ca1d079 100644
--- a/arch/arm64/boot/dts/sprd/sc9863a.dtsi
+++ b/arch/arm64/boot/dts/sprd/sc9863a.dtsi
@@ -108,7 +108,7 @@
};
idle-states {
- entry-method = "arm,psci";
+ entry-method = "psci";
CORE_PD: core-pd {
compatible = "arm,idle-state";
entry-latency-us = <4000>;
diff --git a/arch/arm64/crypto/chacha-neon-glue.c b/arch/arm64/crypto/chacha-neon-glue.c
index c1f9660d104c..37ca3e889848 100644
--- a/arch/arm64/crypto/chacha-neon-glue.c
+++ b/arch/arm64/crypto/chacha-neon-glue.c
@@ -55,10 +55,10 @@ static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
break;
}
chacha_4block_xor_neon(state, dst, src, nrounds, l);
- bytes -= CHACHA_BLOCK_SIZE * 5;
- src += CHACHA_BLOCK_SIZE * 5;
- dst += CHACHA_BLOCK_SIZE * 5;
- state[12] += 5;
+ bytes -= l;
+ src += l;
+ dst += l;
+ state[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE);
}
}
diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h
index 324e7d5ab37e..5e5dc05d63a0 100644
--- a/arch/arm64/include/asm/alternative.h
+++ b/arch/arm64/include/asm/alternative.h
@@ -221,7 +221,7 @@ alternative_endif
.macro user_alt, label, oldinstr, newinstr, cond
9999: alternative_insn "\oldinstr", "\newinstr", \cond
- _ASM_EXTABLE 9999b, \label
+ _asm_extable 9999b, \label
.endm
/*
diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index e4d862420bb4..d79ce6df9e12 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -29,11 +29,9 @@ typedef struct {
*/
#define ASID(mm) ((mm)->context.id.counter & 0xffff)
-extern bool arm64_use_ng_mappings;
-
static inline bool arm64_kernel_unmapped_at_el0(void)
{
- return arm64_use_ng_mappings;
+ return cpus_have_const_cap(ARM64_UNMAP_KERNEL_AT_EL0);
}
typedef void (*bp_hardening_cb_t)(void);
diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
index 6f87839f0249..1305e28225fc 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -23,11 +23,13 @@
#include <asm/pgtable-types.h>
+extern bool arm64_use_ng_mappings;
+
#define _PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
#define _PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
-#define PTE_MAYBE_NG (arm64_kernel_unmapped_at_el0() ? PTE_NG : 0)
-#define PMD_MAYBE_NG (arm64_kernel_unmapped_at_el0() ? PMD_SECT_NG : 0)
+#define PTE_MAYBE_NG (arm64_use_ng_mappings ? PTE_NG : 0)
+#define PMD_MAYBE_NG (arm64_use_ng_mappings ? PMD_SECT_NG : 0)
#define PROT_DEFAULT (_PROT_DEFAULT | PTE_MAYBE_NG)
#define PROT_SECT_DEFAULT (_PROT_SECT_DEFAULT | PMD_MAYBE_NG)
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index 1dd22da1c3a9..803039d504de 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -25,8 +25,8 @@
#define __NR_compat_gettimeofday 78
#define __NR_compat_sigreturn 119
#define __NR_compat_rt_sigreturn 173
-#define __NR_compat_clock_getres 247
#define __NR_compat_clock_gettime 263
+#define __NR_compat_clock_getres 264
#define __NR_compat_clock_gettime64 403
#define __NR_compat_clock_getres_time64 406
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index d4ed9a19d8fe..5407bf5d98ac 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -958,11 +958,22 @@ void tick_broadcast(const struct cpumask *mask)
}
#endif
+/*
+ * The number of CPUs online, not counting this CPU (which may not be
+ * fully online and so not counted in num_online_cpus()).
+ */
+static inline unsigned int num_other_online_cpus(void)
+{
+ unsigned int this_cpu_online = cpu_online(smp_processor_id());
+
+ return num_online_cpus() - this_cpu_online;
+}
+
void smp_send_stop(void)
{
unsigned long timeout;
- if (num_online_cpus() > 1) {
+ if (num_other_online_cpus()) {
cpumask_t mask;
cpumask_copy(&mask, cpu_online_mask);
@@ -975,10 +986,10 @@ void smp_send_stop(void)
/* Wait up to one second for other CPUs to stop */
timeout = USEC_PER_SEC;
- while (num_online_cpus() > 1 && timeout--)
+ while (num_other_online_cpus() && timeout--)
udelay(1);
- if (num_online_cpus() > 1)
+ if (num_other_online_cpus())
pr_warn("SMP: failed to stop secondary CPUs %*pbl\n",
cpumask_pr_args(cpu_online_mask));
@@ -1001,7 +1012,11 @@ void crash_smp_send_stop(void)
cpus_stopped = 1;
- if (num_online_cpus() == 1) {
+ /*
+ * If this cpu is the only one alive at this point in time, online or
+ * not, there are no stop messages to be sent around, so just back out.
+ */
+ if (num_other_online_cpus() == 0) {
sdei_mask_local_cpu();
return;
}
@@ -1009,7 +1024,7 @@ void crash_smp_send_stop(void)
cpumask_copy(&mask, cpu_online_mask);
cpumask_clear_cpu(smp_processor_id(), &mask);
- atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
+ atomic_set(&waiting_for_crash_ipi, num_other_online_cpus());
pr_crit("SMP: stopping secondary CPUs\n");
smp_cross_call(&mask, IPI_CPU_CRASH_STOP);
diff --git a/arch/m68k/emu/nfblock.c b/arch/m68k/emu/nfblock.c
index 40712e49381b..c3a630440512 100644
--- a/arch/m68k/emu/nfblock.c
+++ b/arch/m68k/emu/nfblock.c
@@ -118,12 +118,11 @@ static int __init nfhd_init_one(int id, u32 blocks, u32 bsize)
dev->bsize = bsize;
dev->bshift = ffs(bsize) - 10;
- dev->queue = blk_alloc_queue(GFP_KERNEL);
+ dev->queue = blk_alloc_queue(nfhd_make_request, NUMA_NO_NODE);
if (dev->queue == NULL)
goto free_dev;
dev->queue->queuedata = dev;
- blk_queue_make_request(dev->queue, nfhd_make_request);
blk_queue_logical_block_size(dev->queue, bsize);
dev->disk = alloc_disk(16);
diff --git a/arch/mips/boot/dts/ingenic/ci20.dts b/arch/mips/boot/dts/ingenic/ci20.dts
index 37b93166bf22..c340f947baa0 100644
--- a/arch/mips/boot/dts/ingenic/ci20.dts
+++ b/arch/mips/boot/dts/ingenic/ci20.dts
@@ -4,6 +4,8 @@
#include "jz4780.dtsi"
#include <dt-bindings/clock/ingenic,tcu.h>
#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/interrupt-controller/irq.h>
+#include <dt-bindings/regulator/active-semi,8865-regulator.h>
/ {
compatible = "img,ci20", "ingenic,jz4780";
@@ -163,63 +165,71 @@
regulators {
vddcore: SUDCDC1 {
- regulator-name = "VDDCORE";
+ regulator-name = "DCDC_REG1";
regulator-min-microvolt = <1100000>;
regulator-max-microvolt = <1100000>;
regulator-always-on;
};
vddmem: SUDCDC2 {
- regulator-name = "VDDMEM";
+ regulator-name = "DCDC_REG2";
regulator-min-microvolt = <1500000>;
regulator-max-microvolt = <1500000>;
regulator-always-on;
};
vcc_33: SUDCDC3 {
- regulator-name = "VCC33";
+ regulator-name = "DCDC_REG3";
regulator-min-microvolt = <3300000>;
regulator-max-microvolt = <3300000>;
regulator-always-on;
};
vcc_50: SUDCDC4 {
- regulator-name = "VCC50";
+ regulator-name = "SUDCDC_REG4";
regulator-min-microvolt = <5000000>;
regulator-max-microvolt = <5000000>;
regulator-always-on;
};
vcc_25: LDO_REG5 {
- regulator-name = "VCC25";
+ regulator-name = "LDO_REG5";
regulator-min-microvolt = <2500000>;
regulator-max-microvolt = <2500000>;
regulator-always-on;
};
wifi_io: LDO_REG6 {
- regulator-name = "WIFIIO";
+ regulator-name = "LDO_REG6";
regulator-min-microvolt = <2500000>;
regulator-max-microvolt = <2500000>;
regulator-always-on;
};
vcc_28: LDO_REG7 {
- regulator-name = "VCC28";
+ regulator-name = "LDO_REG7";
regulator-min-microvolt = <2800000>;
regulator-max-microvolt = <2800000>;
regulator-always-on;
};
vcc_15: LDO_REG8 {
- regulator-name = "VCC15";
+ regulator-name = "LDO_REG8";
regulator-min-microvolt = <1500000>;
regulator-max-microvolt = <1500000>;
regulator-always-on;
};
- vcc_18: LDO_REG9 {
- regulator-name = "VCC18";
- regulator-min-microvolt = <1800000>;
- regulator-max-microvolt = <1800000>;
+ vrtc_18: LDO_REG9 {
+ regulator-name = "LDO_REG9";
+ /* Despite the datasheet stating 3.3V
+ * for REG9 and the driver expecting that,
+ * REG9 outputs 1.8V.
+ * Likely the CI20 uses a proprietary
+ * factory programmed chip variant.
+ * Since this is a simple on/off LDO the
+ * exact values do not matter.
+ */
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
regulator-always-on;
};
vcc_11: LDO_REG10 {
- regulator-name = "VCC11";
- regulator-min-microvolt = <1100000>;
- regulator-max-microvolt = <1100000>;
+ regulator-name = "LDO_REG10";
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <1200000>;
regulator-always-on;
};
};
@@ -261,7 +271,9 @@
rtc@51 {
compatible = "nxp,pcf8563";
reg = <0x51>;
- interrupts = <110>;
+
+ interrupt-parent = <&gpf>;
+ interrupts = <30 IRQ_TYPE_LEVEL_LOW>;
};
};
diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index 1ac2752fb791..a7b469d89e2c 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -605,7 +605,8 @@ static void __init bootcmdline_init(char **cmdline_p)
* If we're configured to take boot arguments from DT, look for those
* now.
*/
- if (IS_ENABLED(CONFIG_MIPS_CMDLINE_FROM_DTB))
+ if (IS_ENABLED(CONFIG_MIPS_CMDLINE_FROM_DTB) ||
+ IS_ENABLED(CONFIG_MIPS_CMDLINE_DTB_EXTEND))
of_scan_flat_dt(bootcmdline_scan_chosen, &dt_bootargs);
#endif
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 71034b54d74e..3801a2ef9bca 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -79,6 +79,11 @@ config MMU
config STACK_GROWSUP
def_bool y
+config ARCH_DEFCONFIG
+ string
+ default "arch/parisc/configs/generic-32bit_defconfig" if !64BIT
+ default "arch/parisc/configs/generic-64bit_defconfig" if 64BIT
+
config GENERIC_LOCKBREAK
bool
default y
diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile
index dca8f2de8cf5..628cd8bb7ad8 100644
--- a/arch/parisc/Makefile
+++ b/arch/parisc/Makefile
@@ -34,6 +34,13 @@ CC_ARCHES = hppa hppa2.0 hppa1.1
LD_BFD := elf32-hppa-linux
endif
+# select defconfig based on actual architecture
+ifeq ($(shell uname -m),parisc64)
+ KBUILD_DEFCONFIG := generic-64bit_defconfig
+else
+ KBUILD_DEFCONFIG := generic-32bit_defconfig
+endif
+
export LD_BFD
ifneq ($(SUBARCH),$(UTS_MACHINE))
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 729a0f12a752..db3a87319642 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -1817,6 +1817,7 @@ static void kvmppc_core_vcpu_free_pr(struct kvm_vcpu *vcpu)
{
struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
+ kvmppc_mmu_destroy_pr(vcpu);
free_page((unsigned long)vcpu->arch.shared & PAGE_MASK);
#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
kfree(vcpu->arch.shadow_vcpu);
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 1af96fb5dc6f..302e9dccdd6d 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -759,7 +759,6 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
return 0;
out_vcpu_uninit:
- kvmppc_mmu_destroy(vcpu);
kvmppc_subarch_vcpu_uninit(vcpu);
return err;
}
@@ -792,7 +791,6 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
kvmppc_core_vcpu_free(vcpu);
- kvmppc_mmu_destroy(vcpu);
kvmppc_subarch_vcpu_uninit(vcpu);
}
diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c
index db5664dde5ff..d2bed3fcb719 100644
--- a/arch/powerpc/mm/kasan/kasan_init_32.c
+++ b/arch/powerpc/mm/kasan/kasan_init_32.c
@@ -120,12 +120,6 @@ static void __init kasan_unmap_early_shadow_vmalloc(void)
unsigned long k_cur;
phys_addr_t pa = __pa(kasan_early_shadow_page);
- if (!early_mmu_has_feature(MMU_FTR_HPTE_TABLE)) {
- int ret = kasan_init_shadow_page_tables(k_start, k_end);
-
- if (ret)
- panic("kasan: kasan_init_shadow_page_tables() failed");
- }
for (k_cur = k_start & PAGE_MASK; k_cur < k_end; k_cur += PAGE_SIZE) {
pmd_t *pmd = pmd_offset(pud_offset(pgd_offset_k(k_cur), k_cur), k_cur);
pte_t *ptep = pte_offset_kernel(pmd, k_cur);
@@ -143,7 +137,8 @@ void __init kasan_mmu_init(void)
int ret;
struct memblock_region *reg;
- if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE)) {
+ if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE) ||
+ IS_ENABLED(CONFIG_KASAN_VMALLOC)) {
ret = kasan_init_shadow_page_tables(KASAN_SHADOW_START, KASAN_SHADOW_END);
if (ret)
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 1a3b5a5276be..cd5db57bfd41 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -50,7 +50,6 @@ config RISCV
select PCI_DOMAINS_GENERIC if PCI
select PCI_MSI if PCI
select RISCV_TIMER
- select UACCESS_MEMCPY if !MMU
select GENERIC_IRQ_MULTI_HANDLER
select GENERIC_ARCH_TOPOLOGY if SMP
select ARCH_HAS_PTE_SPECIAL
diff --git a/arch/riscv/Kconfig.socs b/arch/riscv/Kconfig.socs
index 3078b2de0b2d..a131174a0a77 100644
--- a/arch/riscv/Kconfig.socs
+++ b/arch/riscv/Kconfig.socs
@@ -12,20 +12,6 @@ config SOC_SIFIVE
config SOC_VIRT
bool "QEMU Virt Machine"
- select VIRTIO_PCI
- select VIRTIO_BALLOON
- select VIRTIO_MMIO
- select VIRTIO_CONSOLE
- select VIRTIO_NET
- select NET_9P_VIRTIO
- select VIRTIO_BLK
- select SCSI_VIRTIO
- select DRM_VIRTIO_GPU
- select HW_RANDOM_VIRTIO
- select RPMSG_CHAR
- select RPMSG_VIRTIO
- select CRYPTO_DEV_VIRTIO
- select VIRTIO_INPUT
select POWER_RESET_SYSCON
select POWER_RESET_SYSCON_POWEROFF
select GOLDFISH
diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig
index c8f084203067..2557c5372a25 100644
--- a/arch/riscv/configs/defconfig
+++ b/arch/riscv/configs/defconfig
@@ -31,6 +31,7 @@ CONFIG_IP_PNP_BOOTP=y
CONFIG_IP_PNP_RARP=y
CONFIG_NETLINK_DIAG=y
CONFIG_NET_9P=y
+CONFIG_NET_9P_VIRTIO=y
CONFIG_PCI=y
CONFIG_PCIEPORTBUS=y
CONFIG_PCI_HOST_GENERIC=y
@@ -38,12 +39,15 @@ CONFIG_PCIE_XILINX=y
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_BLK_DEV_LOOP=y
+CONFIG_VIRTIO_BLK=y
CONFIG_BLK_DEV_SD=y
CONFIG_BLK_DEV_SR=y
+CONFIG_SCSI_VIRTIO=y
CONFIG_ATA=y
CONFIG_SATA_AHCI=y
CONFIG_SATA_AHCI_PLATFORM=y
CONFIG_NETDEVICES=y
+CONFIG_VIRTIO_NET=y
CONFIG_MACB=y
CONFIG_E1000E=y
CONFIG_R8169=y
@@ -54,13 +58,16 @@ CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_SERIAL_OF_PLATFORM=y
CONFIG_SERIAL_EARLYCON_RISCV_SBI=y
CONFIG_HVC_RISCV_SBI=y
+CONFIG_VIRTIO_CONSOLE=y
CONFIG_HW_RANDOM=y
+CONFIG_HW_RANDOM_VIRTIO=y
CONFIG_SPI=y
CONFIG_SPI_SIFIVE=y
# CONFIG_PTP_1588_CLOCK is not set
CONFIG_POWER_RESET=y
CONFIG_DRM=y
CONFIG_DRM_RADEON=y
+CONFIG_DRM_VIRTIO_GPU=y
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_USB=y
CONFIG_USB_XHCI_HCD=y
@@ -74,6 +81,12 @@ CONFIG_USB_UAS=y
CONFIG_MMC=y
CONFIG_MMC_SPI=y
CONFIG_RTC_CLASS=y
+CONFIG_VIRTIO_PCI=y
+CONFIG_VIRTIO_BALLOON=y
+CONFIG_VIRTIO_INPUT=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_RPMSG_CHAR=y
+CONFIG_RPMSG_VIRTIO=y
CONFIG_EXT4_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_AUTOFS4_FS=y
@@ -88,16 +101,17 @@ CONFIG_NFS_V4_2=y
CONFIG_ROOT_NFS=y
CONFIG_9P_FS=y
CONFIG_CRYPTO_USER_API_HASH=y
+CONFIG_CRYPTO_DEV_VIRTIO=y
CONFIG_PRINTK_TIME=y
CONFIG_DEBUG_FS=y
CONFIG_DEBUG_PAGEALLOC=y
+CONFIG_SCHED_STACK_END_CHECK=y
CONFIG_DEBUG_VM=y
CONFIG_DEBUG_VM_PGFLAGS=y
CONFIG_DEBUG_MEMORY_INIT=y
CONFIG_DEBUG_PER_CPU_MAPS=y
CONFIG_SOFTLOCKUP_DETECTOR=y
CONFIG_WQ_WATCHDOG=y
-CONFIG_SCHED_STACK_END_CHECK=y
CONFIG_DEBUG_TIMEKEEPING=y
CONFIG_DEBUG_RT_MUTEXES=y
CONFIG_DEBUG_SPINLOCK=y
diff --git a/arch/riscv/configs/rv32_defconfig b/arch/riscv/configs/rv32_defconfig
index a844920a261f..0292879a9690 100644
--- a/arch/riscv/configs/rv32_defconfig
+++ b/arch/riscv/configs/rv32_defconfig
@@ -31,6 +31,7 @@ CONFIG_IP_PNP_BOOTP=y
CONFIG_IP_PNP_RARP=y
CONFIG_NETLINK_DIAG=y
CONFIG_NET_9P=y
+CONFIG_NET_9P_VIRTIO=y
CONFIG_PCI=y
CONFIG_PCIEPORTBUS=y
CONFIG_PCI_HOST_GENERIC=y
@@ -38,12 +39,15 @@ CONFIG_PCIE_XILINX=y
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_BLK_DEV_LOOP=y
+CONFIG_VIRTIO_BLK=y
CONFIG_BLK_DEV_SD=y
CONFIG_BLK_DEV_SR=y
+CONFIG_SCSI_VIRTIO=y
CONFIG_ATA=y
CONFIG_SATA_AHCI=y
CONFIG_SATA_AHCI_PLATFORM=y
CONFIG_NETDEVICES=y
+CONFIG_VIRTIO_NET=y
CONFIG_MACB=y
CONFIG_E1000E=y
CONFIG_R8169=y
@@ -54,11 +58,14 @@ CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_SERIAL_OF_PLATFORM=y
CONFIG_SERIAL_EARLYCON_RISCV_SBI=y
CONFIG_HVC_RISCV_SBI=y
+CONFIG_VIRTIO_CONSOLE=y
CONFIG_HW_RANDOM=y
+CONFIG_HW_RANDOM_VIRTIO=y
# CONFIG_PTP_1588_CLOCK is not set
CONFIG_POWER_RESET=y
CONFIG_DRM=y
CONFIG_DRM_RADEON=y
+CONFIG_DRM_VIRTIO_GPU=y
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_USB=y
CONFIG_USB_XHCI_HCD=y
@@ -70,6 +77,12 @@ CONFIG_USB_OHCI_HCD_PLATFORM=y
CONFIG_USB_STORAGE=y
CONFIG_USB_UAS=y
CONFIG_RTC_CLASS=y
+CONFIG_VIRTIO_PCI=y
+CONFIG_VIRTIO_BALLOON=y
+CONFIG_VIRTIO_INPUT=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_RPMSG_CHAR=y
+CONFIG_RPMSG_VIRTIO=y
CONFIG_EXT4_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_AUTOFS4_FS=y
@@ -84,16 +97,17 @@ CONFIG_NFS_V4_2=y
CONFIG_ROOT_NFS=y
CONFIG_9P_FS=y
CONFIG_CRYPTO_USER_API_HASH=y
+CONFIG_CRYPTO_DEV_VIRTIO=y
CONFIG_PRINTK_TIME=y
CONFIG_DEBUG_FS=y
CONFIG_DEBUG_PAGEALLOC=y
+CONFIG_SCHED_STACK_END_CHECK=y
CONFIG_DEBUG_VM=y
CONFIG_DEBUG_VM_PGFLAGS=y
CONFIG_DEBUG_MEMORY_INIT=y
CONFIG_DEBUG_PER_CPU_MAPS=y
CONFIG_SOFTLOCKUP_DETECTOR=y
CONFIG_WQ_WATCHDOG=y
-CONFIG_SCHED_STACK_END_CHECK=y
CONFIG_DEBUG_TIMEKEEPING=y
CONFIG_DEBUG_RT_MUTEXES=y
CONFIG_DEBUG_SPINLOCK=y
diff --git a/arch/riscv/include/asm/clint.h b/arch/riscv/include/asm/clint.h
index 6eaa2eedd694..a279b17a6aad 100644
--- a/arch/riscv/include/asm/clint.h
+++ b/arch/riscv/include/asm/clint.h
@@ -15,12 +15,12 @@ static inline void clint_send_ipi_single(unsigned long hartid)
writel(1, clint_ipi_base + hartid);
}
-static inline void clint_send_ipi_mask(const struct cpumask *hartid_mask)
+static inline void clint_send_ipi_mask(const struct cpumask *mask)
{
- int hartid;
+ int cpu;
- for_each_cpu(hartid, hartid_mask)
- clint_send_ipi_single(hartid);
+ for_each_cpu(cpu, mask)
+ clint_send_ipi_single(cpuid_to_hartid_map(cpu));
}
static inline void clint_clear_ipi(unsigned long hartid)
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index e43041519edd..393f2014dfee 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -19,6 +19,47 @@
#include <asm/tlbflush.h>
#include <linux/mm_types.h>
+#ifdef CONFIG_MMU
+
+#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1)
+#define VMALLOC_END (PAGE_OFFSET - 1)
+#define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE)
+
+#define BPF_JIT_REGION_SIZE (SZ_128M)
+#define BPF_JIT_REGION_START (PAGE_OFFSET - BPF_JIT_REGION_SIZE)
+#define BPF_JIT_REGION_END (VMALLOC_END)
+
+/*
+ * Roughly size the vmemmap space to be large enough to fit enough
+ * struct pages to map half the virtual address space. Then
+ * position vmemmap directly below the VMALLOC region.
+ */
+#define VMEMMAP_SHIFT \
+ (CONFIG_VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT)
+#define VMEMMAP_SIZE BIT(VMEMMAP_SHIFT)
+#define VMEMMAP_END (VMALLOC_START - 1)
+#define VMEMMAP_START (VMALLOC_START - VMEMMAP_SIZE)
+
+/*
+ * Define vmemmap for pfn_to_page & page_to_pfn calls. Needed if kernel
+ * is configured with CONFIG_SPARSEMEM_VMEMMAP enabled.
+ */
+#define vmemmap ((struct page *)VMEMMAP_START)
+
+#define PCI_IO_SIZE SZ_16M
+#define PCI_IO_END VMEMMAP_START
+#define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE)
+
+#define FIXADDR_TOP PCI_IO_START
+#ifdef CONFIG_64BIT
+#define FIXADDR_SIZE PMD_SIZE
+#else
+#define FIXADDR_SIZE PGDIR_SIZE
+#endif
+#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
+
+#endif
+
#ifdef CONFIG_64BIT
#include <asm/pgtable-64.h>
#else
@@ -90,31 +131,6 @@ extern pgd_t swapper_pg_dir[];
#define __S110 PAGE_SHARED_EXEC
#define __S111 PAGE_SHARED_EXEC
-#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1)
-#define VMALLOC_END (PAGE_OFFSET - 1)
-#define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE)
-
-#define BPF_JIT_REGION_SIZE (SZ_128M)
-#define BPF_JIT_REGION_START (PAGE_OFFSET - BPF_JIT_REGION_SIZE)
-#define BPF_JIT_REGION_END (VMALLOC_END)
-
-/*
- * Roughly size the vmemmap space to be large enough to fit enough
- * struct pages to map half the virtual address space. Then
- * position vmemmap directly below the VMALLOC region.
- */
-#define VMEMMAP_SHIFT \
- (CONFIG_VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT)
-#define VMEMMAP_SIZE BIT(VMEMMAP_SHIFT)
-#define VMEMMAP_END (VMALLOC_START - 1)
-#define VMEMMAP_START (VMALLOC_START - VMEMMAP_SIZE)
-
-/*
- * Define vmemmap for pfn_to_page & page_to_pfn calls. Needed if kernel
- * is configured with CONFIG_SPARSEMEM_VMEMMAP enabled.
- */
-#define vmemmap ((struct page *)VMEMMAP_START)
-
static inline int pmd_present(pmd_t pmd)
{
return (pmd_val(pmd) & (_PAGE_PRESENT | _PAGE_PROT_NONE));
@@ -432,18 +448,6 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
-#define PCI_IO_SIZE SZ_16M
-#define PCI_IO_END VMEMMAP_START
-#define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE)
-
-#define FIXADDR_TOP PCI_IO_START
-#ifdef CONFIG_64BIT
-#define FIXADDR_SIZE PMD_SIZE
-#else
-#define FIXADDR_SIZE PGDIR_SIZE
-#endif
-#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
-
/*
* Task size is 0x4000000000 for RV64 or 0x9fc00000 for RV32.
* Note that PGDIR_SIZE must evenly divide TASK_SIZE.
diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h
index f462a183a9c2..8ce9d607b53d 100644
--- a/arch/riscv/include/asm/uaccess.h
+++ b/arch/riscv/include/asm/uaccess.h
@@ -11,6 +11,24 @@
/*
* User space memory access functions
*/
+
+extern unsigned long __must_check __asm_copy_to_user(void __user *to,
+ const void *from, unsigned long n);
+extern unsigned long __must_check __asm_copy_from_user(void *to,
+ const void __user *from, unsigned long n);
+
+static inline unsigned long
+raw_copy_from_user(void *to, const void __user *from, unsigned long n)
+{
+ return __asm_copy_from_user(to, from, n);
+}
+
+static inline unsigned long
+raw_copy_to_user(void __user *to, const void *from, unsigned long n)
+{
+ return __asm_copy_to_user(to, from, n);
+}
+
#ifdef CONFIG_MMU
#include <linux/errno.h>
#include <linux/compiler.h>
@@ -367,24 +385,6 @@ do { \
-EFAULT; \
})
-
-extern unsigned long __must_check __asm_copy_to_user(void __user *to,
- const void *from, unsigned long n);
-extern unsigned long __must_check __asm_copy_from_user(void *to,
- const void __user *from, unsigned long n);
-
-static inline unsigned long
-raw_copy_from_user(void *to, const void __user *from, unsigned long n)
-{
- return __asm_copy_from_user(to, from, n);
-}
-
-static inline unsigned long
-raw_copy_to_user(void __user *to, const void *from, unsigned long n)
-{
- return __asm_copy_to_user(to, from, n);
-}
-
extern long strncpy_from_user(char *dest, const char __user *src, long count);
extern long __must_check strlen_user(const char __user *str);
diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c
index eb878abcaaf8..e0a6293093f1 100644
--- a/arch/riscv/kernel/smp.c
+++ b/arch/riscv/kernel/smp.c
@@ -96,7 +96,7 @@ static void send_ipi_mask(const struct cpumask *mask, enum ipi_message_type op)
if (IS_ENABLED(CONFIG_RISCV_SBI))
sbi_send_ipi(cpumask_bits(&hartid_mask));
else
- clint_send_ipi_mask(&hartid_mask);
+ clint_send_ipi_mask(mask);
}
static void send_ipi_single(int cpu, enum ipi_message_type op)
diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
index 47e7a8204460..0d0db80800c4 100644
--- a/arch/riscv/lib/Makefile
+++ b/arch/riscv/lib/Makefile
@@ -2,5 +2,5 @@
lib-y += delay.o
lib-y += memcpy.o
lib-y += memset.o
-lib-$(CONFIG_MMU) += uaccess.o
+lib-y += uaccess.o
lib-$(CONFIG_64BIT) += tishift.o
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index d7ff30e45589..c2e6d4ba4e23 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -3268,7 +3268,10 @@ static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
/* Initial reset is a superset of the normal reset */
kvm_arch_vcpu_ioctl_normal_reset(vcpu);
- /* this equals initial cpu reset in pop, but we don't switch to ESA */
+ /*
+ * This equals initial cpu reset in pop, but we don't switch to ESA.
+ * We do not only reset the internal data, but also ...
+ */
vcpu->arch.sie_block->gpsw.mask = 0;
vcpu->arch.sie_block->gpsw.addr = 0;
kvm_s390_set_prefix(vcpu, 0);
@@ -3278,6 +3281,19 @@ static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
+
+ /* ... the data in sync regs */
+ memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
+ vcpu->run->s.regs.ckc = 0;
+ vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
+ vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
+ vcpu->run->psw_addr = 0;
+ vcpu->run->psw_mask = 0;
+ vcpu->run->s.regs.todpr = 0;
+ vcpu->run->s.regs.cputm = 0;
+ vcpu->run->s.regs.ckc = 0;
+ vcpu->run->s.regs.pp = 0;
+ vcpu->run->s.regs.gbea = 1;
vcpu->run->s.regs.fpc = 0;
vcpu->arch.sie_block->gbea = 1;
vcpu->arch.sie_block->pp = 0;
diff --git a/arch/unicore32/include/asm/io.h b/arch/unicore32/include/asm/io.h
index 3ca74e1cde7d..bd4e7c332f85 100644
--- a/arch/unicore32/include/asm/io.h
+++ b/arch/unicore32/include/asm/io.h
@@ -27,7 +27,7 @@ extern void __uc32_iounmap(volatile void __iomem *addr);
* ioremap and friends.
*
* ioremap takes a PCI memory address, as specified in
- * Documentation/io-mapping.txt.
+ * Documentation/driver-api/io-mapping.rst.
*
*/
#define ioremap(cookie, size) __uc32_ioremap(cookie, size)
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 94df0868804b..513a55562d75 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -194,9 +194,10 @@ avx2_instr :=$(call as-instr,vpbroadcastb %xmm0$(comma)%ymm1,-DCONFIG_AS_AVX2=1)
avx512_instr :=$(call as-instr,vpmovm2b %k1$(comma)%zmm5,-DCONFIG_AS_AVX512=1)
sha1_ni_instr :=$(call as-instr,sha1msg1 %xmm0$(comma)%xmm1,-DCONFIG_AS_SHA1_NI=1)
sha256_ni_instr :=$(call as-instr,sha256msg1 %xmm0$(comma)%xmm1,-DCONFIG_AS_SHA256_NI=1)
+adx_instr := $(call as-instr,adox %r10$(comma)%r10,-DCONFIG_AS_ADX=1)
-KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr) $(avx512_instr) $(sha1_ni_instr) $(sha256_ni_instr)
-KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr) $(avx512_instr) $(sha1_ni_instr) $(sha256_ni_instr)
+KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr) $(avx512_instr) $(sha1_ni_instr) $(sha256_ni_instr) $(adx_instr)
+KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr) $(avx512_instr) $(sha1_ni_instr) $(sha256_ni_instr) $(adx_instr)
KBUILD_LDFLAGS := -m elf_$(UTS_MACHINE)
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index b69e00bf20b8..8c2e9eadee8a 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -11,6 +11,7 @@ avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
avx512_supported :=$(call as-instr,vpmovm2b %k1$(comma)%zmm5,yes,no)
sha1_ni_supported :=$(call as-instr,sha1msg1 %xmm0$(comma)%xmm1,yes,no)
sha256_ni_supported :=$(call as-instr,sha256msg1 %xmm0$(comma)%xmm1,yes,no)
+adx_supported := $(call as-instr,adox %r10$(comma)%r10,yes,no)
obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o
@@ -39,7 +40,11 @@ obj-$(CONFIG_CRYPTO_AEGIS128_AESNI_SSE2) += aegis128-aesni.o
obj-$(CONFIG_CRYPTO_NHPOLY1305_SSE2) += nhpoly1305-sse2.o
obj-$(CONFIG_CRYPTO_NHPOLY1305_AVX2) += nhpoly1305-avx2.o
-obj-$(CONFIG_CRYPTO_CURVE25519_X86) += curve25519-x86_64.o
+
+# These modules require the assembler to support ADX.
+ifeq ($(adx_supported),yes)
+ obj-$(CONFIG_CRYPTO_CURVE25519_X86) += curve25519-x86_64.o
+endif
# These modules require assembler to support AVX.
ifeq ($(avx_supported),yes)
diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
index a6ea07f2aa84..4d867a752f0e 100644
--- a/arch/x86/events/amd/uncore.c
+++ b/arch/x86/events/amd/uncore.c
@@ -190,15 +190,12 @@ static int amd_uncore_event_init(struct perf_event *event)
/*
* NB and Last level cache counters (MSRs) are shared across all cores
- * that share the same NB / Last level cache. Interrupts can be directed
- * to a single target core, however, event counts generated by processes
- * running on other cores cannot be masked out. So we do not support
- * sampling and per-thread events.
+ * that share the same NB / Last level cache. On family 16h and below,
+ * Interrupts can be directed to a single target core, however, event
+ * counts generated by processes running on other cores cannot be masked
+ * out. So we do not support sampling and per-thread events via
+ * CAP_NO_INTERRUPT, and we do not enable counter overflow interrupts:
*/
- if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
- return -EINVAL;
-
- /* and we do not enable counter overflow interrupts */
hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB;
hwc->idx = -1;
@@ -306,7 +303,7 @@ static struct pmu amd_nb_pmu = {
.start = amd_uncore_start,
.stop = amd_uncore_stop,
.read = amd_uncore_read,
- .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
+ .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
};
static struct pmu amd_llc_pmu = {
@@ -317,7 +314,7 @@ static struct pmu amd_llc_pmu = {
.start = amd_uncore_start,
.stop = amd_uncore_stop,
.read = amd_uncore_read,
- .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
+ .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
};
static struct amd_uncore *amd_uncore_alloc(unsigned int cpu)
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index f3327cb56edf..4b263ffb793b 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -299,6 +299,7 @@
#define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */
#define X86_FEATURE_AMD_STIBP (13*32+15) /* "" Single Thread Indirect Branch Predictors */
#define X86_FEATURE_AMD_STIBP_ALWAYS_ON (13*32+17) /* "" Single Thread Indirect Branch Predictors always-on preferred */
+#define X86_FEATURE_AMD_PPIN (13*32+23) /* Protected Processor Inventory Number */
#define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */
#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */
#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 2a8f2bd2e5cf..c06e8353efd3 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -360,7 +360,6 @@ struct x86_emulate_ctxt {
u64 d;
unsigned long _eip;
struct operand memop;
- /* Fields above regs are cleared together. */
unsigned long _regs[NR_VCPU_REGS];
struct operand *memopp;
struct fetch_cache fetch;
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 4359b955e0b7..9d5b09913ef3 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -102,7 +102,7 @@
#define MCE_OVERFLOW 0 /* bit 0 in flags means overflow */
-#define MCE_LOG_LEN 32
+#define MCE_LOG_MIN_LEN 32U
#define MCE_LOG_SIGNATURE "MACHINECHECK"
/* AMD Scalable MCA */
@@ -135,11 +135,11 @@
*/
struct mce_log_buffer {
char signature[12]; /* "MACHINECHECK" */
- unsigned len; /* = MCE_LOG_LEN */
+ unsigned len; /* = elements in .mce_entry[] */
unsigned next;
unsigned flags;
unsigned recordlen; /* length of struct mce */
- struct mce entry[MCE_LOG_LEN];
+ struct mce entry[];
};
enum mce_notifier_prios {
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 2c5676b0a6e7..48293d15f1e1 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -838,13 +838,15 @@ static void free_moved_vector(struct apic_chip_data *apicd)
bool managed = apicd->is_managed;
/*
- * This should never happen. Managed interrupts are not
- * migrated except on CPU down, which does not involve the
- * cleanup vector. But try to keep the accounting correct
- * nevertheless.
+ * Managed interrupts are usually not migrated away
+ * from an online CPU, but CPU isolation 'managed_irq'
+ * can make that happen.
+ * 1) Activation does not take the isolation into account
+ * to keep the code simple
+ * 2) Migration away from an isolated CPU can happen when
+ * a non-isolated CPU which is in the calculated
+ * affinity mask comes online.
*/
- WARN_ON_ONCE(managed);
-
trace_vector_free_moved(apicd->irq, cpu, vector, managed);
irq_matrix_free(vector_matrix, cpu, vector, managed);
per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED;
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 1f875fbe1384..aef06c37d338 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -394,6 +394,35 @@ static void amd_detect_cmp(struct cpuinfo_x86 *c)
per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
}
+static void amd_detect_ppin(struct cpuinfo_x86 *c)
+{
+ unsigned long long val;
+
+ if (!cpu_has(c, X86_FEATURE_AMD_PPIN))
+ return;
+
+ /* When PPIN is defined in CPUID, still need to check PPIN_CTL MSR */
+ if (rdmsrl_safe(MSR_AMD_PPIN_CTL, &val))
+ goto clear_ppin;
+
+ /* PPIN is locked in disabled mode, clear feature bit */
+ if ((val & 3UL) == 1UL)
+ goto clear_ppin;
+
+ /* If PPIN is disabled, try to enable it */
+ if (!(val & 2UL)) {
+ wrmsrl_safe(MSR_AMD_PPIN_CTL, val | 2UL);
+ rdmsrl_safe(MSR_AMD_PPIN_CTL, &val);
+ }
+
+ /* If PPIN_EN bit is 1, return from here; otherwise fall through */
+ if (val & 2UL)
+ return;
+
+clear_ppin:
+ clear_cpu_cap(c, X86_FEATURE_AMD_PPIN);
+}
+
u16 amd_get_nb_id(int cpu)
{
return per_cpu(cpu_llc_id, cpu);
@@ -941,6 +970,7 @@ static void init_amd(struct cpuinfo_x86 *c)
amd_detect_cmp(c);
amd_get_topology(c);
srat_detect_node(c);
+ amd_detect_ppin(c);
init_amd_cacheinfo(c);
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 2c4f949611e4..dd06fce537fc 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -142,6 +142,8 @@ void mce_setup(struct mce *m)
if (this_cpu_has(X86_FEATURE_INTEL_PPIN))
rdmsrl(MSR_PPIN, m->ppin);
+ else if (this_cpu_has(X86_FEATURE_AMD_PPIN))
+ rdmsrl(MSR_AMD_PPIN, m->ppin);
m->microcode = boot_cpu_data.microcode;
}
@@ -1877,6 +1879,8 @@ bool filter_mce(struct mce *m)
{
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
return amd_filter_mce(m);
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+ return intel_filter_mce(m);
return false;
}
diff --git a/arch/x86/kernel/cpu/mce/dev-mcelog.c b/arch/x86/kernel/cpu/mce/dev-mcelog.c
index 7c8958dee103..d089567a9ce8 100644
--- a/arch/x86/kernel/cpu/mce/dev-mcelog.c
+++ b/arch/x86/kernel/cpu/mce/dev-mcelog.c
@@ -29,11 +29,7 @@ static char *mce_helper_argv[2] = { mce_helper, NULL };
* separate MCEs from kernel messages to avoid bogus bug reports.
*/
-static struct mce_log_buffer mcelog = {
- .signature = MCE_LOG_SIGNATURE,
- .len = MCE_LOG_LEN,
- .recordlen = sizeof(struct mce),
-};
+static struct mce_log_buffer *mcelog;
static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait);
@@ -45,21 +41,21 @@ static int dev_mce_log(struct notifier_block *nb, unsigned long val,
mutex_lock(&mce_chrdev_read_mutex);
- entry = mcelog.next;
+ entry = mcelog->next;
/*
* When the buffer fills up discard new entries. Assume that the
* earlier errors are the more interesting ones:
*/
- if (entry >= MCE_LOG_LEN) {
- set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog.flags);
+ if (entry >= mcelog->len) {
+ set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog->flags);
goto unlock;
}
- mcelog.next = entry + 1;
+ mcelog->next = entry + 1;
- memcpy(mcelog.entry + entry, mce, sizeof(struct mce));
- mcelog.entry[entry].finished = 1;
+ memcpy(mcelog->entry + entry, mce, sizeof(struct mce));
+ mcelog->entry[entry].finished = 1;
/* wake processes polling /dev/mcelog */
wake_up_interruptible(&mce_chrdev_wait);
@@ -214,21 +210,21 @@ static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf,
/* Only supports full reads right now */
err = -EINVAL;
- if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce))
+ if (*off != 0 || usize < mcelog->len * sizeof(struct mce))
goto out;
- next = mcelog.next;
+ next = mcelog->next;
err = 0;
for (i = 0; i < next; i++) {
- struct mce *m = &mcelog.entry[i];
+ struct mce *m = &mcelog->entry[i];
err |= copy_to_user(buf, m, sizeof(*m));
buf += sizeof(*m);
}
- memset(mcelog.entry, 0, next * sizeof(struct mce));
- mcelog.next = 0;
+ memset(mcelog->entry, 0, next * sizeof(struct mce));
+ mcelog->next = 0;
if (err)
err = -EFAULT;
@@ -242,7 +238,7 @@ out:
static __poll_t mce_chrdev_poll(struct file *file, poll_table *wait)
{
poll_wait(file, &mce_chrdev_wait, wait);
- if (READ_ONCE(mcelog.next))
+ if (READ_ONCE(mcelog->next))
return EPOLLIN | EPOLLRDNORM;
if (!mce_apei_read_done && apei_check_mce())
return EPOLLIN | EPOLLRDNORM;
@@ -261,13 +257,13 @@ static long mce_chrdev_ioctl(struct file *f, unsigned int cmd,
case MCE_GET_RECORD_LEN:
return put_user(sizeof(struct mce), p);
case MCE_GET_LOG_LEN:
- return put_user(MCE_LOG_LEN, p);
+ return put_user(mcelog->len, p);
case MCE_GETCLEAR_FLAGS: {
unsigned flags;
do {
- flags = mcelog.flags;
- } while (cmpxchg(&mcelog.flags, flags, 0) != flags);
+ flags = mcelog->flags;
+ } while (cmpxchg(&mcelog->flags, flags, 0) != flags);
return put_user(flags, p);
}
@@ -339,8 +335,18 @@ static struct miscdevice mce_chrdev_device = {
static __init int dev_mcelog_init_device(void)
{
+ int mce_log_len;
int err;
+ mce_log_len = max(MCE_LOG_MIN_LEN, num_online_cpus());
+ mcelog = kzalloc(sizeof(*mcelog) + mce_log_len * sizeof(struct mce), GFP_KERNEL);
+ if (!mcelog)
+ return -ENOMEM;
+
+ strncpy(mcelog->signature, MCE_LOG_SIGNATURE, sizeof(mcelog->signature));
+ mcelog->len = mce_log_len;
+ mcelog->recordlen = sizeof(struct mce);
+
/* register character device /dev/mcelog */
err = misc_register(&mce_chrdev_device);
if (err) {
@@ -350,6 +356,7 @@ static __init int dev_mcelog_init_device(void)
else
pr_err("Unable to init device /dev/mcelog (rc: %d)\n", err);
+ kfree(mcelog);
return err;
}
diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c
index 5627b1091b85..d8f9230d2034 100644
--- a/arch/x86/kernel/cpu/mce/intel.c
+++ b/arch/x86/kernel/cpu/mce/intel.c
@@ -493,17 +493,18 @@ static void intel_ppin_init(struct cpuinfo_x86 *c)
return;
if ((val & 3UL) == 1UL) {
- /* PPIN available but disabled: */
+ /* PPIN locked in disabled mode */
return;
}
- /* If PPIN is disabled, but not locked, try to enable: */
- if (!(val & 3UL)) {
+ /* If PPIN is disabled, try to enable */
+ if (!(val & 2UL)) {
wrmsrl_safe(MSR_PPIN_CTL, val | 2UL);
rdmsrl_safe(MSR_PPIN_CTL, &val);
}
- if ((val & 3UL) == 2UL)
+ /* Is the enable bit set? */
+ if (val & 2UL)
set_cpu_cap(c, X86_FEATURE_INTEL_PPIN);
}
}
@@ -520,3 +521,20 @@ void mce_intel_feature_clear(struct cpuinfo_x86 *c)
{
intel_clear_lmce();
}
+
+bool intel_filter_mce(struct mce *m)
+{
+ struct cpuinfo_x86 *c = &boot_cpu_data;
+
+ /* MCE errata HSD131, HSM142, HSW131, BDM48, and HSM142 */
+ if ((c->x86 == 6) &&
+ ((c->x86_model == INTEL_FAM6_HASWELL) ||
+ (c->x86_model == INTEL_FAM6_HASWELL_L) ||
+ (c->x86_model == INTEL_FAM6_BROADWELL) ||
+ (c->x86_model == INTEL_FAM6_HASWELL_G)) &&
+ (m->bank == 0) &&
+ ((m->status & 0xa0000000ffffffff) == 0x80000000000f0005))
+ return true;
+
+ return false;
+}
diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
index b785c0d0b590..97db18441d2c 100644
--- a/arch/x86/kernel/cpu/mce/internal.h
+++ b/arch/x86/kernel/cpu/mce/internal.h
@@ -48,6 +48,7 @@ void cmci_disable_bank(int bank);
void intel_init_cmci(void);
void intel_init_lmce(void);
void intel_clear_lmce(void);
+bool intel_filter_mce(struct mce *m);
#else
# define cmci_intel_adjust_timer mce_adjust_timer_default
static inline bool mce_intel_cmci_poll(void) { return false; }
@@ -56,6 +57,7 @@ static inline void cmci_disable_bank(int bank) { }
static inline void intel_init_cmci(void) { }
static inline void intel_init_lmce(void) { }
static inline void intel_clear_lmce(void) { }
+static inline bool intel_filter_mce(struct mce *m) { return false; };
#endif
void mce_timer_kick(unsigned long interval);
diff --git a/arch/x86/kernel/cpu/mce/therm_throt.c b/arch/x86/kernel/cpu/mce/therm_throt.c
index 58b4ee3cda77..f36dc0742085 100644
--- a/arch/x86/kernel/cpu/mce/therm_throt.c
+++ b/arch/x86/kernel/cpu/mce/therm_throt.c
@@ -486,9 +486,14 @@ static int thermal_throttle_offline(unsigned int cpu)
{
struct thermal_state *state = &per_cpu(thermal_state, cpu);
struct device *dev = get_cpu_device(cpu);
+ u32 l;
+
+ /* Mask the thermal vector before draining evtl. pending work */
+ l = apic_read(APIC_LVTTHMR);
+ apic_write(APIC_LVTTHMR, l | APIC_LVT_MASKED);
- cancel_delayed_work(&state->package_throttle.therm_work);
- cancel_delayed_work(&state->core_throttle.therm_work);
+ cancel_delayed_work_sync(&state->package_throttle.therm_work);
+ cancel_delayed_work_sync(&state->core_throttle.therm_work);
state->package_throttle.rate_control_active = false;
state->core_throttle.rate_control_active = false;
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 1bb4927030af..9fea0757db92 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -68,7 +68,7 @@ config KVM_WERROR
depends on (X86_64 && !KASAN) || !COMPILE_TEST
depends on EXPERT
help
- Add -Werror to the build flags for (and only for) i915.ko.
+ Add -Werror to the build flags for KVM.
If in doubt, say "N".
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index dd19fb3539e0..bc00642e5d3b 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -5173,6 +5173,7 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
ctxt->fetch.ptr = ctxt->fetch.data;
ctxt->fetch.end = ctxt->fetch.data + insn_len;
ctxt->opcode_len = 1;
+ ctxt->intercept = x86_intercept_none;
if (insn_len > 0)
memcpy(ctxt->fetch.data, insn, insn_len);
else {
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index 7668fed1ce65..750ff0b29404 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -378,12 +378,15 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
if (e->fields.delivery_mode == APIC_DM_FIXED) {
struct kvm_lapic_irq irq;
- irq.shorthand = APIC_DEST_NOSHORT;
irq.vector = e->fields.vector;
irq.delivery_mode = e->fields.delivery_mode << 8;
- irq.dest_id = e->fields.dest_id;
irq.dest_mode =
kvm_lapic_irq_dest_mode(!!e->fields.dest_mode);
+ irq.level = false;
+ irq.trig_mode = e->fields.trig_mode;
+ irq.shorthand = APIC_DEST_NOSHORT;
+ irq.dest_id = e->fields.dest_id;
+ irq.msi_redir_hint = false;
bitmap_zero(&vcpu_bitmap, 16);
kvm_bitmap_or_dest_vcpus(ioapic->kvm, &irq,
&vcpu_bitmap);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index e3099c642fec..7356a56e6282 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1445,6 +1445,8 @@ static void limit_periodic_timer_frequency(struct kvm_lapic *apic)
}
}
+static void cancel_hv_timer(struct kvm_lapic *apic);
+
static void apic_update_lvtt(struct kvm_lapic *apic)
{
u32 timer_mode = kvm_lapic_get_reg(apic, APIC_LVTT) &
@@ -1454,6 +1456,10 @@ static void apic_update_lvtt(struct kvm_lapic *apic)
if (apic_lvtt_tscdeadline(apic) != (timer_mode ==
APIC_LVT_TIMER_TSCDEADLINE)) {
hrtimer_cancel(&apic->lapic_timer.timer);
+ preempt_disable();
+ if (apic->lapic_timer.hv_timer_in_use)
+ cancel_hv_timer(apic);
+ preempt_enable();
kvm_lapic_set_reg(apic, APIC_TMICT, 0);
apic->lapic_timer.period = 0;
apic->lapic_timer.tscdeadline = 0;
@@ -1715,7 +1721,7 @@ static void start_sw_period(struct kvm_lapic *apic)
hrtimer_start(&apic->lapic_timer.timer,
apic->lapic_timer.target_expiration,
- HRTIMER_MODE_ABS);
+ HRTIMER_MODE_ABS_HARD);
}
bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 24c0b2ba8fb9..50d1ebafe0b3 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1933,14 +1933,6 @@ static void sev_clflush_pages(struct page *pages[], unsigned long npages)
static void __unregister_enc_region_locked(struct kvm *kvm,
struct enc_region *region)
{
- /*
- * The guest may change the memory encryption attribute from C=0 -> C=1
- * or vice versa for this memory range. Lets make sure caches are
- * flushed to ensure that guest data gets written into memory with
- * correct C-bit.
- */
- sev_clflush_pages(region->pages, region->npages);
-
sev_unpin_memory(kvm, region->pages, region->npages);
list_del(&region->list);
kfree(region);
@@ -1971,6 +1963,13 @@ static void sev_vm_destroy(struct kvm *kvm)
mutex_lock(&kvm->lock);
/*
+ * Ensure that all guest tagged cache entries are flushed before
+ * releasing the pages back to the system for use. CLFLUSH will
+ * not do this, so issue a WBINVD.
+ */
+ wbinvd_on_all_cpus();
+
+ /*
* if userspace was terminated before unregistering the memory regions
* then lets unpin all the registered memory.
*/
@@ -6312,7 +6311,8 @@ static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu,
enum exit_fastpath_completion *exit_fastpath)
{
if (!is_guest_mode(vcpu) &&
- to_svm(vcpu)->vmcb->control.exit_code == EXIT_REASON_MSR_WRITE)
+ to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_MSR &&
+ to_svm(vcpu)->vmcb->control.exit_info_1)
*exit_fastpath = handle_fastpath_set_msr_irqoff(vcpu);
}
@@ -7157,6 +7157,9 @@ static int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
if (!svm_sev_enabled())
return -ENOTTY;
+ if (!argp)
+ return 0;
+
if (copy_from_user(&sev_cmd, argp, sizeof(struct kvm_sev_cmd)))
return -EFAULT;
@@ -7284,6 +7287,13 @@ static int svm_unregister_enc_region(struct kvm *kvm,
goto failed;
}
+ /*
+ * Ensure that all guest tagged cache entries are flushed before
+ * releasing the pages back to the system for use. CLFLUSH will
+ * not do this, so issue a WBINVD.
+ */
+ wbinvd_on_all_cpus();
+
__unregister_enc_region_locked(kvm, region);
mutex_unlock(&kvm->lock);
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index e920d7834d73..9750e590c89d 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -224,7 +224,7 @@ static inline void nested_release_evmcs(struct kvm_vcpu *vcpu)
return;
kvm_vcpu_unmap(vcpu, &vmx->nested.hv_evmcs_map, true);
- vmx->nested.hv_evmcs_vmptr = -1ull;
+ vmx->nested.hv_evmcs_vmptr = 0;
vmx->nested.hv_evmcs = NULL;
}
@@ -1923,7 +1923,8 @@ static int nested_vmx_handle_enlightened_vmptrld(struct kvm_vcpu *vcpu,
if (!nested_enlightened_vmentry(vcpu, &evmcs_gpa))
return 1;
- if (unlikely(evmcs_gpa != vmx->nested.hv_evmcs_vmptr)) {
+ if (unlikely(!vmx->nested.hv_evmcs ||
+ evmcs_gpa != vmx->nested.hv_evmcs_vmptr)) {
if (!vmx->nested.hv_evmcs)
vmx->nested.current_vmptr = -1ull;
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 40b1e6138cd5..079d9fbf278e 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -2338,6 +2338,17 @@ static void hardware_disable(void)
kvm_cpu_vmxoff();
}
+/*
+ * There is no X86_FEATURE for SGX yet, but anyway we need to query CPUID
+ * directly instead of going through cpu_has(), to ensure KVM is trapping
+ * ENCLS whenever it's supported in hardware. It does not matter whether
+ * the host OS supports or has enabled SGX.
+ */
+static bool cpu_has_sgx(void)
+{
+ return cpuid_eax(0) >= 0x12 && (cpuid_eax(0x12) & BIT(0));
+}
+
static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt,
u32 msr, u32 *result)
{
@@ -2418,8 +2429,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE |
SECONDARY_EXEC_PT_USE_GPA |
SECONDARY_EXEC_PT_CONCEAL_VMX |
- SECONDARY_EXEC_ENABLE_VMFUNC |
- SECONDARY_EXEC_ENCLS_EXITING;
+ SECONDARY_EXEC_ENABLE_VMFUNC;
+ if (cpu_has_sgx())
+ opt2 |= SECONDARY_EXEC_ENCLS_EXITING;
if (adjust_vmx_controls(min2, opt2,
MSR_IA32_VMX_PROCBASED_CTLS2,
&_cpu_based_2nd_exec_control) < 0)
@@ -6275,7 +6287,7 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
#endif
ASM_CALL_CONSTRAINT
:
- THUNK_TARGET(entry),
+ [thunk_target]"r"(entry),
[ss]"i"(__KERNEL_DS),
[cs]"i"(__KERNEL_CS)
);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5de200663f51..cf95c36cb4f4 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1554,7 +1554,10 @@ EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr);
*/
static int handle_fastpath_set_x2apic_icr_irqoff(struct kvm_vcpu *vcpu, u64 data)
{
- if (lapic_in_kernel(vcpu) && apic_x2apic_mode(vcpu->arch.apic) &&
+ if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(vcpu->arch.apic))
+ return 1;
+
+ if (((data & APIC_SHORT_MASK) == APIC_DEST_NOSHORT) &&
((data & APIC_DEST_MASK) == APIC_DEST_PHYSICAL) &&
((data & APIC_MODE_MASK) == APIC_DM_FIXED)) {
@@ -2444,7 +2447,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
vcpu->last_guest_tsc = tsc_timestamp;
- WARN_ON((s64)vcpu->hv_clock.system_time < 0);
/* If the host uses TSC clocksource, then it is stable */
pvclock_flags = 0;
@@ -7195,10 +7197,12 @@ static void kvm_timer_init(void)
cpu = get_cpu();
policy = cpufreq_cpu_get(cpu);
- if (policy && policy->cpuinfo.max_freq)
- max_tsc_khz = policy->cpuinfo.max_freq;
+ if (policy) {
+ if (policy->cpuinfo.max_freq)
+ max_tsc_khz = policy->cpuinfo.max_freq;
+ cpufreq_cpu_put(policy);
+ }
put_cpu();
- cpufreq_cpu_put(policy);
#endif
cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
CPUFREQ_TRANSITION_NOTIFIER);
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index fa4ea09593ab..629fdf13f846 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -190,7 +190,7 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
return pmd_k;
}
-void vmalloc_sync_all(void)
+static void vmalloc_sync(void)
{
unsigned long address;
@@ -217,6 +217,16 @@ void vmalloc_sync_all(void)
}
}
+void vmalloc_sync_mappings(void)
+{
+ vmalloc_sync();
+}
+
+void vmalloc_sync_unmappings(void)
+{
+ vmalloc_sync();
+}
+
/*
* 32-bit:
*
@@ -319,11 +329,23 @@ out:
#else /* CONFIG_X86_64: */
-void vmalloc_sync_all(void)
+void vmalloc_sync_mappings(void)
{
+ /*
+ * 64-bit mappings might allocate new p4d/pud pages
+ * that need to be propagated to all tasks' PGDs.
+ */
sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END);
}
+void vmalloc_sync_unmappings(void)
+{
+ /*
+ * Unmappings never allocate or free p4d/pud pages.
+ * No work is required here.
+ */
+}
+
/*
* 64-bit:
*
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 44e4beb4239f..18c637c0dc6f 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -106,6 +106,22 @@ static unsigned int __ioremap_check_encrypted(struct resource *res)
return 0;
}
+/*
+ * The EFI runtime services data area is not covered by walk_mem_res(), but must
+ * be mapped encrypted when SEV is active.
+ */
+static void __ioremap_check_other(resource_size_t addr, struct ioremap_desc *desc)
+{
+ if (!sev_active())
+ return;
+
+ if (!IS_ENABLED(CONFIG_EFI))
+ return;
+
+ if (efi_mem_type(addr) == EFI_RUNTIME_SERVICES_DATA)
+ desc->flags |= IORES_MAP_ENCRYPTED;
+}
+
static int __ioremap_collect_map_flags(struct resource *res, void *arg)
{
struct ioremap_desc *desc = arg;
@@ -124,6 +140,9 @@ static int __ioremap_collect_map_flags(struct resource *res, void *arg)
* To avoid multiple resource walks, this function walks resources marked as
* IORESOURCE_MEM and IORESOURCE_BUSY and looking for system RAM and/or a
* resource described not as IORES_DESC_NONE (e.g. IORES_DESC_ACPI_TABLES).
+ *
+ * After that, deal with misc other ranges in __ioremap_check_other() which do
+ * not fall into the above category.
*/
static void __ioremap_check_mem(resource_size_t addr, unsigned long size,
struct ioremap_desc *desc)
@@ -135,6 +154,8 @@ static void __ioremap_check_mem(resource_size_t addr, unsigned long size,
memset(desc, 0, sizeof(struct ioremap_desc));
walk_mem_res(start, end, desc, __ioremap_collect_map_flags);
+
+ __ioremap_check_other(addr, desc);
}
/*
diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c
index 393d251798c0..4d2a7a764602 100644
--- a/arch/x86/net/bpf_jit_comp32.c
+++ b/arch/x86/net/bpf_jit_comp32.c
@@ -2039,10 +2039,12 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
}
/* and dreg_lo,sreg_lo */
EMIT2(0x23, add_2reg(0xC0, sreg_lo, dreg_lo));
- /* and dreg_hi,sreg_hi */
- EMIT2(0x23, add_2reg(0xC0, sreg_hi, dreg_hi));
- /* or dreg_lo,dreg_hi */
- EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi));
+ if (is_jmp64) {
+ /* and dreg_hi,sreg_hi */
+ EMIT2(0x23, add_2reg(0xC0, sreg_hi, dreg_hi));
+ /* or dreg_lo,dreg_hi */
+ EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi));
+ }
goto emit_cond_jmp;
}
case BPF_JMP | BPF_JSET | BPF_K:
diff --git a/arch/xtensa/platforms/iss/simdisk.c b/arch/xtensa/platforms/iss/simdisk.c
index 833109880165..49322b66cda9 100644
--- a/arch/xtensa/platforms/iss/simdisk.c
+++ b/arch/xtensa/platforms/iss/simdisk.c
@@ -267,13 +267,12 @@ static int __init simdisk_setup(struct simdisk *dev, int which,
spin_lock_init(&dev->lock);
dev->users = 0;
- dev->queue = blk_alloc_queue(GFP_KERNEL);
+ dev->queue = blk_alloc_queue(simdisk_make_request, NUMA_NO_NODE);
if (dev->queue == NULL) {
pr_err("blk_alloc_queue failed\n");
goto out_alloc_queue;
}
- blk_queue_make_request(dev->queue, simdisk_make_request);
dev->queue->queuedata = dev;
dev->gd = alloc_disk(SIMDISK_MINORS);
diff --git a/block/Makefile b/block/Makefile
index 1a43750f4b01..206b96e9387f 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -8,8 +8,7 @@ obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-sysfs.o \
blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \
blk-mq-sysfs.o blk-mq-cpumap.o blk-mq-sched.o ioctl.o \
- genhd.o partition-generic.o ioprio.o \
- badblocks.o partitions/ blk-rq-qos.o
+ genhd.o ioprio.o badblocks.o partitions/ blk-rq-qos.o
obj-$(CONFIG_BOUNCE) += bounce.o
obj-$(CONFIG_BLK_SCSI_REQUEST) += scsi_ioctl.o
diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index f0ff6654af28..68882b9b8f11 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -642,6 +642,12 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
{
struct bfq_entity *entity = &bfqq->entity;
+ /*
+ * Get extra reference to prevent bfqq from being freed in
+ * next possible expire or deactivate.
+ */
+ bfqq->ref++;
+
/* If bfqq is empty, then bfq_bfqq_expire also invokes
* bfq_del_bfqq_busy, thereby removing bfqq and its entity
* from data structures related to current group. Otherwise we
@@ -652,12 +658,6 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
bfq_bfqq_expire(bfqd, bfqd->in_service_queue,
false, BFQQE_PREEMPTED);
- /*
- * get extra reference to prevent bfqq from being freed in
- * next possible deactivate
- */
- bfqq->ref++;
-
if (bfq_bfqq_busy(bfqq))
bfq_deactivate_bfqq(bfqd, bfqq, false, false);
else if (entity->on_st_or_in_serv)
@@ -677,7 +677,7 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
if (!bfqd->in_service_queue && !bfqd->rq_in_driver)
bfq_schedule_dispatch(bfqd);
- /* release extra ref taken above */
+ /* release extra ref taken above, bfqq may happen to be freed now */
bfq_put_queue(bfqq);
}
@@ -714,10 +714,7 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
if (entity->sched_data != &bfqg->sched_data) {
bic_set_bfqq(bic, NULL, 0);
- bfq_log_bfqq(bfqd, async_bfqq,
- "bic_change_group: %p %d",
- async_bfqq, async_bfqq->ref);
- bfq_put_queue(async_bfqq);
+ bfq_release_process_ref(bfqd, async_bfqq);
}
}
@@ -818,39 +815,53 @@ static void bfq_flush_idle_tree(struct bfq_service_tree *st)
/**
* bfq_reparent_leaf_entity - move leaf entity to the root_group.
* @bfqd: the device data structure with the root group.
- * @entity: the entity to move.
+ * @entity: the entity to move, if entity is a leaf; or the parent entity
+ * of an active leaf entity to move, if entity is not a leaf.
*/
static void bfq_reparent_leaf_entity(struct bfq_data *bfqd,
- struct bfq_entity *entity)
+ struct bfq_entity *entity,
+ int ioprio_class)
{
- struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+ struct bfq_queue *bfqq;
+ struct bfq_entity *child_entity = entity;
+ while (child_entity->my_sched_data) { /* leaf not reached yet */
+ struct bfq_sched_data *child_sd = child_entity->my_sched_data;
+ struct bfq_service_tree *child_st = child_sd->service_tree +
+ ioprio_class;
+ struct rb_root *child_active = &child_st->active;
+
+ child_entity = bfq_entity_of(rb_first(child_active));
+
+ if (!child_entity)
+ child_entity = child_sd->in_service_entity;
+ }
+
+ bfqq = bfq_entity_to_bfqq(child_entity);
bfq_bfqq_move(bfqd, bfqq, bfqd->root_group);
}
/**
- * bfq_reparent_active_entities - move to the root group all active
- * entities.
+ * bfq_reparent_active_queues - move to the root group all active queues.
* @bfqd: the device data structure with the root group.
* @bfqg: the group to move from.
- * @st: the service tree with the entities.
+ * @st: the service tree to start the search from.
*/
-static void bfq_reparent_active_entities(struct bfq_data *bfqd,
- struct bfq_group *bfqg,
- struct bfq_service_tree *st)
+static void bfq_reparent_active_queues(struct bfq_data *bfqd,
+ struct bfq_group *bfqg,
+ struct bfq_service_tree *st,
+ int ioprio_class)
{
struct rb_root *active = &st->active;
- struct bfq_entity *entity = NULL;
-
- if (!RB_EMPTY_ROOT(&st->active))
- entity = bfq_entity_of(rb_first(active));
+ struct bfq_entity *entity;
- for (; entity ; entity = bfq_entity_of(rb_first(active)))
- bfq_reparent_leaf_entity(bfqd, entity);
+ while ((entity = bfq_entity_of(rb_first(active))))
+ bfq_reparent_leaf_entity(bfqd, entity, ioprio_class);
if (bfqg->sched_data.in_service_entity)
bfq_reparent_leaf_entity(bfqd,
- bfqg->sched_data.in_service_entity);
+ bfqg->sched_data.in_service_entity,
+ ioprio_class);
}
/**
@@ -883,13 +894,6 @@ static void bfq_pd_offline(struct blkg_policy_data *pd)
st = bfqg->sched_data.service_tree + i;
/*
- * The idle tree may still contain bfq_queues belonging
- * to exited task because they never migrated to a different
- * cgroup from the one being destroyed now.
- */
- bfq_flush_idle_tree(st);
-
- /*
* It may happen that some queues are still active
* (busy) upon group destruction (if the corresponding
* processes have been forced to terminate). We move
@@ -901,7 +905,20 @@ static void bfq_pd_offline(struct blkg_policy_data *pd)
* There is no need to put the sync queues, as the
* scheduler has taken no reference.
*/
- bfq_reparent_active_entities(bfqd, bfqg, st);
+ bfq_reparent_active_queues(bfqd, bfqg, st, i);
+
+ /*
+ * The idle tree may still contain bfq_queues
+ * belonging to exited task because they never
+ * migrated to a different cgroup from the one being
+ * destroyed now. In addition, even
+ * bfq_reparent_active_queues() may happen to add some
+ * entities to the idle tree. It happens if, in some
+ * of the calls to bfq_bfqq_move() performed by
+ * bfq_reparent_active_queues(), the queue to move is
+ * empty and gets expired.
+ */
+ bfq_flush_idle_tree(st);
}
__bfq_deactivate_entity(entity, false);
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 8c436abfaf14..78ba57efd16b 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -2716,8 +2716,6 @@ static void bfq_bfqq_save_state(struct bfq_queue *bfqq)
}
}
-
-static
void bfq_release_process_ref(struct bfq_data *bfqd, struct bfq_queue *bfqq)
{
/*
@@ -6215,20 +6213,28 @@ static struct bfq_queue *bfq_init_rq(struct request *rq)
return bfqq;
}
-static void bfq_idle_slice_timer_body(struct bfq_queue *bfqq)
+static void
+bfq_idle_slice_timer_body(struct bfq_data *bfqd, struct bfq_queue *bfqq)
{
- struct bfq_data *bfqd = bfqq->bfqd;
enum bfqq_expiration reason;
unsigned long flags;
spin_lock_irqsave(&bfqd->lock, flags);
- bfq_clear_bfqq_wait_request(bfqq);
+ /*
+ * Considering that bfqq may be in race, we should firstly check
+ * whether bfqq is in service before doing something on it. If
+ * the bfqq in race is not in service, it has already been expired
+ * through __bfq_bfqq_expire func and its wait_request flags has
+ * been cleared in __bfq_bfqd_reset_in_service func.
+ */
if (bfqq != bfqd->in_service_queue) {
spin_unlock_irqrestore(&bfqd->lock, flags);
return;
}
+ bfq_clear_bfqq_wait_request(bfqq);
+
if (bfq_bfqq_budget_timeout(bfqq))
/*
* Also here the queue can be safely expired
@@ -6273,7 +6279,7 @@ static enum hrtimer_restart bfq_idle_slice_timer(struct hrtimer *timer)
* early.
*/
if (bfqq)
- bfq_idle_slice_timer_body(bfqq);
+ bfq_idle_slice_timer_body(bfqd, bfqq);
return HRTIMER_NORESTART;
}
diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
index d1233af9c684..cd224aaf9f52 100644
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h
@@ -955,6 +955,7 @@ void bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq,
bool compensate, enum bfqq_expiration reason);
void bfq_put_queue(struct bfq_queue *bfqq);
void bfq_end_wr_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg);
+void bfq_release_process_ref(struct bfq_data *bfqd, struct bfq_queue *bfqq);
void bfq_schedule_dispatch(struct bfq_data *bfqd);
void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg);
diff --git a/block/bio.c b/block/bio.c
index 94d697217887..21cbaa6a1c20 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -17,6 +17,7 @@
#include <linux/cgroup.h>
#include <linux/blk-cgroup.h>
#include <linux/highmem.h>
+#include <linux/sched/sysctl.h>
#include <trace/events/block.h>
#include "blk.h"
@@ -588,6 +589,49 @@ void bio_truncate(struct bio *bio, unsigned new_size)
}
/**
+ * guard_bio_eod - truncate a BIO to fit the block device
+ * @bio: bio to truncate
+ *
+ * This allows us to do IO even on the odd last sectors of a device, even if the
+ * block size is some multiple of the physical sector size.
+ *
+ * We'll just truncate the bio to the size of the device, and clear the end of
+ * the buffer head manually. Truly out-of-range accesses will turn into actual
+ * I/O errors, this only handles the "we need to be able to do I/O at the final
+ * sector" case.
+ */
+void guard_bio_eod(struct bio *bio)
+{
+ sector_t maxsector;
+ struct hd_struct *part;
+
+ rcu_read_lock();
+ part = __disk_get_part(bio->bi_disk, bio->bi_partno);
+ if (part)
+ maxsector = part_nr_sects_read(part);
+ else
+ maxsector = get_capacity(bio->bi_disk);
+ rcu_read_unlock();
+
+ if (!maxsector)
+ return;
+
+ /*
+ * If the *whole* IO is past the end of the device,
+ * let it through, and the IO layer will turn it into
+ * an EIO.
+ */
+ if (unlikely(bio->bi_iter.bi_sector >= maxsector))
+ return;
+
+ maxsector -= bio->bi_iter.bi_sector;
+ if (likely((bio->bi_iter.bi_size >> 9) <= maxsector))
+ return;
+
+ bio_truncate(bio, maxsector << 9);
+}
+
+/**
* bio_put - release a reference to a bio
* @bio: bio to release reference to
*
@@ -679,6 +723,12 @@ struct bio *bio_clone_fast(struct bio *bio, gfp_t gfp_mask, struct bio_set *bs)
}
EXPORT_SYMBOL(bio_clone_fast);
+const char *bio_devname(struct bio *bio, char *buf)
+{
+ return disk_name(bio->bi_disk, bio->bi_partno, buf);
+}
+EXPORT_SYMBOL(bio_devname);
+
static inline bool page_is_mergeable(const struct bio_vec *bv,
struct page *page, unsigned int len, unsigned int off,
bool *same_page)
@@ -730,7 +780,7 @@ static bool bio_try_merge_pc_page(struct request_queue *q, struct bio *bio,
*
* This should only be used by passthrough bios.
*/
-static int __bio_add_pc_page(struct request_queue *q, struct bio *bio,
+int __bio_add_pc_page(struct request_queue *q, struct bio *bio,
struct page *page, unsigned int len, unsigned int offset,
bool *same_page)
{
@@ -1019,12 +1069,21 @@ static void submit_bio_wait_endio(struct bio *bio)
int submit_bio_wait(struct bio *bio)
{
DECLARE_COMPLETION_ONSTACK_MAP(done, bio->bi_disk->lockdep_map);
+ unsigned long hang_check;
bio->bi_private = &done;
bio->bi_end_io = submit_bio_wait_endio;
bio->bi_opf |= REQ_SYNC;
submit_bio(bio);
- wait_for_completion_io(&done);
+
+ /* Prevent hang_check timer from firing at us during very long I/O */
+ hang_check = sysctl_hung_task_timeout_secs;
+ if (hang_check)
+ while (!wait_for_completion_io_timeout(&done,
+ hang_check * (HZ/2)))
+ ;
+ else
+ wait_for_completion_io(&done);
return blk_status_to_errno(bio->bi_status);
}
@@ -1135,90 +1194,6 @@ void bio_list_copy_data(struct bio *dst, struct bio *src)
}
EXPORT_SYMBOL(bio_list_copy_data);
-struct bio_map_data {
- int is_our_pages;
- struct iov_iter iter;
- struct iovec iov[];
-};
-
-static struct bio_map_data *bio_alloc_map_data(struct iov_iter *data,
- gfp_t gfp_mask)
-{
- struct bio_map_data *bmd;
- if (data->nr_segs > UIO_MAXIOV)
- return NULL;
-
- bmd = kmalloc(struct_size(bmd, iov, data->nr_segs), gfp_mask);
- if (!bmd)
- return NULL;
- memcpy(bmd->iov, data->iov, sizeof(struct iovec) * data->nr_segs);
- bmd->iter = *data;
- bmd->iter.iov = bmd->iov;
- return bmd;
-}
-
-/**
- * bio_copy_from_iter - copy all pages from iov_iter to bio
- * @bio: The &struct bio which describes the I/O as destination
- * @iter: iov_iter as source
- *
- * Copy all pages from iov_iter to bio.
- * Returns 0 on success, or error on failure.
- */
-static int bio_copy_from_iter(struct bio *bio, struct iov_iter *iter)
-{
- struct bio_vec *bvec;
- struct bvec_iter_all iter_all;
-
- bio_for_each_segment_all(bvec, bio, iter_all) {
- ssize_t ret;
-
- ret = copy_page_from_iter(bvec->bv_page,
- bvec->bv_offset,
- bvec->bv_len,
- iter);
-
- if (!iov_iter_count(iter))
- break;
-
- if (ret < bvec->bv_len)
- return -EFAULT;
- }
-
- return 0;
-}
-
-/**
- * bio_copy_to_iter - copy all pages from bio to iov_iter
- * @bio: The &struct bio which describes the I/O as source
- * @iter: iov_iter as destination
- *
- * Copy all pages from bio to iov_iter.
- * Returns 0 on success, or error on failure.
- */
-static int bio_copy_to_iter(struct bio *bio, struct iov_iter iter)
-{
- struct bio_vec *bvec;
- struct bvec_iter_all iter_all;
-
- bio_for_each_segment_all(bvec, bio, iter_all) {
- ssize_t ret;
-
- ret = copy_page_to_iter(bvec->bv_page,
- bvec->bv_offset,
- bvec->bv_len,
- &iter);
-
- if (!iov_iter_count(&iter))
- break;
-
- if (ret < bvec->bv_len)
- return -EFAULT;
- }
-
- return 0;
-}
-
void bio_free_pages(struct bio *bio)
{
struct bio_vec *bvec;
@@ -1229,430 +1204,6 @@ void bio_free_pages(struct bio *bio)
}
EXPORT_SYMBOL(bio_free_pages);
-/**
- * bio_uncopy_user - finish previously mapped bio
- * @bio: bio being terminated
- *
- * Free pages allocated from bio_copy_user_iov() and write back data
- * to user space in case of a read.
- */
-int bio_uncopy_user(struct bio *bio)
-{
- struct bio_map_data *bmd = bio->bi_private;
- int ret = 0;
-
- if (!bio_flagged(bio, BIO_NULL_MAPPED)) {
- /*
- * if we're in a workqueue, the request is orphaned, so
- * don't copy into a random user address space, just free
- * and return -EINTR so user space doesn't expect any data.
- */
- if (!current->mm)
- ret = -EINTR;
- else if (bio_data_dir(bio) == READ)
- ret = bio_copy_to_iter(bio, bmd->iter);
- if (bmd->is_our_pages)
- bio_free_pages(bio);
- }
- kfree(bmd);
- bio_put(bio);
- return ret;
-}
-
-/**
- * bio_copy_user_iov - copy user data to bio
- * @q: destination block queue
- * @map_data: pointer to the rq_map_data holding pages (if necessary)
- * @iter: iovec iterator
- * @gfp_mask: memory allocation flags
- *
- * Prepares and returns a bio for indirect user io, bouncing data
- * to/from kernel pages as necessary. Must be paired with
- * call bio_uncopy_user() on io completion.
- */
-struct bio *bio_copy_user_iov(struct request_queue *q,
- struct rq_map_data *map_data,
- struct iov_iter *iter,
- gfp_t gfp_mask)
-{
- struct bio_map_data *bmd;
- struct page *page;
- struct bio *bio;
- int i = 0, ret;
- int nr_pages;
- unsigned int len = iter->count;
- unsigned int offset = map_data ? offset_in_page(map_data->offset) : 0;
-
- bmd = bio_alloc_map_data(iter, gfp_mask);
- if (!bmd)
- return ERR_PTR(-ENOMEM);
-
- /*
- * We need to do a deep copy of the iov_iter including the iovecs.
- * The caller provided iov might point to an on-stack or otherwise
- * shortlived one.
- */
- bmd->is_our_pages = map_data ? 0 : 1;
-
- nr_pages = DIV_ROUND_UP(offset + len, PAGE_SIZE);
- if (nr_pages > BIO_MAX_PAGES)
- nr_pages = BIO_MAX_PAGES;
-
- ret = -ENOMEM;
- bio = bio_kmalloc(gfp_mask, nr_pages);
- if (!bio)
- goto out_bmd;
-
- ret = 0;
-
- if (map_data) {
- nr_pages = 1 << map_data->page_order;
- i = map_data->offset / PAGE_SIZE;
- }
- while (len) {
- unsigned int bytes = PAGE_SIZE;
-
- bytes -= offset;
-
- if (bytes > len)
- bytes = len;
-
- if (map_data) {
- if (i == map_data->nr_entries * nr_pages) {
- ret = -ENOMEM;
- break;
- }
-
- page = map_data->pages[i / nr_pages];
- page += (i % nr_pages);
-
- i++;
- } else {
- page = alloc_page(q->bounce_gfp | gfp_mask);
- if (!page) {
- ret = -ENOMEM;
- break;
- }
- }
-
- if (bio_add_pc_page(q, bio, page, bytes, offset) < bytes) {
- if (!map_data)
- __free_page(page);
- break;
- }
-
- len -= bytes;
- offset = 0;
- }
-
- if (ret)
- goto cleanup;
-
- if (map_data)
- map_data->offset += bio->bi_iter.bi_size;
-
- /*
- * success
- */
- if ((iov_iter_rw(iter) == WRITE && (!map_data || !map_data->null_mapped)) ||
- (map_data && map_data->from_user)) {
- ret = bio_copy_from_iter(bio, iter);
- if (ret)
- goto cleanup;
- } else {
- if (bmd->is_our_pages)
- zero_fill_bio(bio);
- iov_iter_advance(iter, bio->bi_iter.bi_size);
- }
-
- bio->bi_private = bmd;
- if (map_data && map_data->null_mapped)
- bio_set_flag(bio, BIO_NULL_MAPPED);
- return bio;
-cleanup:
- if (!map_data)
- bio_free_pages(bio);
- bio_put(bio);
-out_bmd:
- kfree(bmd);
- return ERR_PTR(ret);
-}
-
-/**
- * bio_map_user_iov - map user iovec into bio
- * @q: the struct request_queue for the bio
- * @iter: iovec iterator
- * @gfp_mask: memory allocation flags
- *
- * Map the user space address into a bio suitable for io to a block
- * device. Returns an error pointer in case of error.
- */
-struct bio *bio_map_user_iov(struct request_queue *q,
- struct iov_iter *iter,
- gfp_t gfp_mask)
-{
- int j;
- struct bio *bio;
- int ret;
-
- if (!iov_iter_count(iter))
- return ERR_PTR(-EINVAL);
-
- bio = bio_kmalloc(gfp_mask, iov_iter_npages(iter, BIO_MAX_PAGES));
- if (!bio)
- return ERR_PTR(-ENOMEM);
-
- while (iov_iter_count(iter)) {
- struct page **pages;
- ssize_t bytes;
- size_t offs, added = 0;
- int npages;
-
- bytes = iov_iter_get_pages_alloc(iter, &pages, LONG_MAX, &offs);
- if (unlikely(bytes <= 0)) {
- ret = bytes ? bytes : -EFAULT;
- goto out_unmap;
- }
-
- npages = DIV_ROUND_UP(offs + bytes, PAGE_SIZE);
-
- if (unlikely(offs & queue_dma_alignment(q))) {
- ret = -EINVAL;
- j = 0;
- } else {
- for (j = 0; j < npages; j++) {
- struct page *page = pages[j];
- unsigned int n = PAGE_SIZE - offs;
- bool same_page = false;
-
- if (n > bytes)
- n = bytes;
-
- if (!__bio_add_pc_page(q, bio, page, n, offs,
- &same_page)) {
- if (same_page)
- put_page(page);
- break;
- }
-
- added += n;
- bytes -= n;
- offs = 0;
- }
- iov_iter_advance(iter, added);
- }
- /*
- * release the pages we didn't map into the bio, if any
- */
- while (j < npages)
- put_page(pages[j++]);
- kvfree(pages);
- /* couldn't stuff something into bio? */
- if (bytes)
- break;
- }
-
- bio_set_flag(bio, BIO_USER_MAPPED);
-
- /*
- * subtle -- if bio_map_user_iov() ended up bouncing a bio,
- * it would normally disappear when its bi_end_io is run.
- * however, we need it for the unmap, so grab an extra
- * reference to it
- */
- bio_get(bio);
- return bio;
-
- out_unmap:
- bio_release_pages(bio, false);
- bio_put(bio);
- return ERR_PTR(ret);
-}
-
-/**
- * bio_unmap_user - unmap a bio
- * @bio: the bio being unmapped
- *
- * Unmap a bio previously mapped by bio_map_user_iov(). Must be called from
- * process context.
- *
- * bio_unmap_user() may sleep.
- */
-void bio_unmap_user(struct bio *bio)
-{
- bio_release_pages(bio, bio_data_dir(bio) == READ);
- bio_put(bio);
- bio_put(bio);
-}
-
-static void bio_invalidate_vmalloc_pages(struct bio *bio)
-{
-#ifdef ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE
- if (bio->bi_private && !op_is_write(bio_op(bio))) {
- unsigned long i, len = 0;
-
- for (i = 0; i < bio->bi_vcnt; i++)
- len += bio->bi_io_vec[i].bv_len;
- invalidate_kernel_vmap_range(bio->bi_private, len);
- }
-#endif
-}
-
-static void bio_map_kern_endio(struct bio *bio)
-{
- bio_invalidate_vmalloc_pages(bio);
- bio_put(bio);
-}
-
-/**
- * bio_map_kern - map kernel address into bio
- * @q: the struct request_queue for the bio
- * @data: pointer to buffer to map
- * @len: length in bytes
- * @gfp_mask: allocation flags for bio allocation
- *
- * Map the kernel address into a bio suitable for io to a block
- * device. Returns an error pointer in case of error.
- */
-struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len,
- gfp_t gfp_mask)
-{
- unsigned long kaddr = (unsigned long)data;
- unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
- unsigned long start = kaddr >> PAGE_SHIFT;
- const int nr_pages = end - start;
- bool is_vmalloc = is_vmalloc_addr(data);
- struct page *page;
- int offset, i;
- struct bio *bio;
-
- bio = bio_kmalloc(gfp_mask, nr_pages);
- if (!bio)
- return ERR_PTR(-ENOMEM);
-
- if (is_vmalloc) {
- flush_kernel_vmap_range(data, len);
- bio->bi_private = data;
- }
-
- offset = offset_in_page(kaddr);
- for (i = 0; i < nr_pages; i++) {
- unsigned int bytes = PAGE_SIZE - offset;
-
- if (len <= 0)
- break;
-
- if (bytes > len)
- bytes = len;
-
- if (!is_vmalloc)
- page = virt_to_page(data);
- else
- page = vmalloc_to_page(data);
- if (bio_add_pc_page(q, bio, page, bytes,
- offset) < bytes) {
- /* we don't support partial mappings */
- bio_put(bio);
- return ERR_PTR(-EINVAL);
- }
-
- data += bytes;
- len -= bytes;
- offset = 0;
- }
-
- bio->bi_end_io = bio_map_kern_endio;
- return bio;
-}
-
-static void bio_copy_kern_endio(struct bio *bio)
-{
- bio_free_pages(bio);
- bio_put(bio);
-}
-
-static void bio_copy_kern_endio_read(struct bio *bio)
-{
- char *p = bio->bi_private;
- struct bio_vec *bvec;
- struct bvec_iter_all iter_all;
-
- bio_for_each_segment_all(bvec, bio, iter_all) {
- memcpy(p, page_address(bvec->bv_page), bvec->bv_len);
- p += bvec->bv_len;
- }
-
- bio_copy_kern_endio(bio);
-}
-
-/**
- * bio_copy_kern - copy kernel address into bio
- * @q: the struct request_queue for the bio
- * @data: pointer to buffer to copy
- * @len: length in bytes
- * @gfp_mask: allocation flags for bio and page allocation
- * @reading: data direction is READ
- *
- * copy the kernel address into a bio suitable for io to a block
- * device. Returns an error pointer in case of error.
- */
-struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len,
- gfp_t gfp_mask, int reading)
-{
- unsigned long kaddr = (unsigned long)data;
- unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
- unsigned long start = kaddr >> PAGE_SHIFT;
- struct bio *bio;
- void *p = data;
- int nr_pages = 0;
-
- /*
- * Overflow, abort
- */
- if (end < start)
- return ERR_PTR(-EINVAL);
-
- nr_pages = end - start;
- bio = bio_kmalloc(gfp_mask, nr_pages);
- if (!bio)
- return ERR_PTR(-ENOMEM);
-
- while (len) {
- struct page *page;
- unsigned int bytes = PAGE_SIZE;
-
- if (bytes > len)
- bytes = len;
-
- page = alloc_page(q->bounce_gfp | gfp_mask);
- if (!page)
- goto cleanup;
-
- if (!reading)
- memcpy(page_address(page), p, bytes);
-
- if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes)
- break;
-
- len -= bytes;
- p += bytes;
- }
-
- if (reading) {
- bio->bi_end_io = bio_copy_kern_endio_read;
- bio->bi_private = data;
- } else {
- bio->bi_end_io = bio_copy_kern_endio;
- }
-
- return bio;
-
-cleanup:
- bio_free_pages(bio);
- bio_put(bio);
- return ERR_PTR(-ENOMEM);
-}
-
/*
* bio_set_pages_dirty() and bio_check_pages_dirty() are support functions
* for performing direct-IO in BIOs.
@@ -1752,14 +1303,14 @@ defer:
schedule_work(&bio_dirty_work);
}
-void update_io_ticks(struct hd_struct *part, unsigned long now)
+void update_io_ticks(struct hd_struct *part, unsigned long now, bool end)
{
unsigned long stamp;
again:
stamp = READ_ONCE(part->stamp);
if (unlikely(stamp != now)) {
if (likely(cmpxchg(&part->stamp, stamp, now) == stamp)) {
- __part_stat_add(part, io_ticks, 1);
+ __part_stat_add(part, io_ticks, end ? now - stamp : 1);
}
}
if (part->partno) {
@@ -1775,7 +1326,7 @@ void generic_start_io_acct(struct request_queue *q, int op,
part_stat_lock();
- update_io_ticks(part, jiffies);
+ update_io_ticks(part, jiffies, false);
part_stat_inc(part, ios[sgrp]);
part_stat_add(part, sectors[sgrp], sectors);
part_inc_in_flight(q, part, op_is_write(op));
@@ -1793,9 +1344,8 @@ void generic_end_io_acct(struct request_queue *q, int req_op,
part_stat_lock();
- update_io_ticks(part, now);
+ update_io_ticks(part, now, true);
part_stat_add(part, nsecs[sgrp], jiffies_to_nsecs(duration));
- part_stat_add(part, time_in_queue, duration);
part_dec_in_flight(q, part, op_is_write(req_op));
part_stat_unlock();
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index a229b94d5390..c15a26096038 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1010,7 +1010,7 @@ unlock:
* blkcg_init_queue - initialize blkcg part of request queue
* @q: request_queue to initialize
*
- * Called from blk_alloc_queue_node(). Responsible for initializing blkcg
+ * Called from __blk_alloc_queue(). Responsible for initializing blkcg
* part of new request_queue @q.
*
* RETURNS:
diff --git a/block/blk-core.c b/block/blk-core.c
index 60dc9552ef8d..7e4a1da0715e 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -346,7 +346,6 @@ void blk_cleanup_queue(struct request_queue *q)
blk_queue_flag_set(QUEUE_FLAG_NOMERGES, q);
blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
- blk_queue_flag_set(QUEUE_FLAG_DYING, q);
/*
* Drain all requests queued before DYING marking. Set DEAD flag to
@@ -389,12 +388,6 @@ void blk_cleanup_queue(struct request_queue *q)
}
EXPORT_SYMBOL(blk_cleanup_queue);
-struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
-{
- return blk_alloc_queue_node(gfp_mask, NUMA_NO_NODE);
-}
-EXPORT_SYMBOL(blk_alloc_queue);
-
/**
* blk_queue_enter() - try to increase q->q_usage_counter
* @q: request queue pointer
@@ -471,24 +464,19 @@ static void blk_timeout_work(struct work_struct *work)
{
}
-/**
- * blk_alloc_queue_node - allocate a request queue
- * @gfp_mask: memory allocation flags
- * @node_id: NUMA node to allocate memory from
- */
-struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
+struct request_queue *__blk_alloc_queue(int node_id)
{
struct request_queue *q;
int ret;
q = kmem_cache_alloc_node(blk_requestq_cachep,
- gfp_mask | __GFP_ZERO, node_id);
+ GFP_KERNEL | __GFP_ZERO, node_id);
if (!q)
return NULL;
q->last_merge = NULL;
- q->id = ida_simple_get(&blk_queue_ida, 0, 0, gfp_mask);
+ q->id = ida_simple_get(&blk_queue_ida, 0, 0, GFP_KERNEL);
if (q->id < 0)
goto fail_q;
@@ -496,7 +484,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
if (ret)
goto fail_id;
- q->backing_dev_info = bdi_alloc_node(gfp_mask, node_id);
+ q->backing_dev_info = bdi_alloc_node(GFP_KERNEL, node_id);
if (!q->backing_dev_info)
goto fail_split;
@@ -542,6 +530,9 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
if (blkcg_init_queue(q))
goto fail_ref;
+ blk_queue_dma_alignment(q, 511);
+ blk_set_default_limits(&q->limits);
+
return q;
fail_ref:
@@ -558,7 +549,22 @@ fail_q:
kmem_cache_free(blk_requestq_cachep, q);
return NULL;
}
-EXPORT_SYMBOL(blk_alloc_queue_node);
+
+struct request_queue *blk_alloc_queue(make_request_fn make_request, int node_id)
+{
+ struct request_queue *q;
+
+ if (WARN_ON_ONCE(!make_request))
+ return NULL;
+
+ q = __blk_alloc_queue(node_id);
+ if (!q)
+ return NULL;
+ q->make_request_fn = make_request;
+ q->nr_requests = BLKDEV_MAX_RQ;
+ return q;
+}
+EXPORT_SYMBOL(blk_alloc_queue);
bool blk_get_queue(struct request_queue *q)
{
@@ -1121,10 +1127,9 @@ blk_qc_t direct_make_request(struct bio *bio)
if (unlikely(blk_queue_enter(q, nowait ? BLK_MQ_REQ_NOWAIT : 0))) {
if (nowait && !blk_queue_dying(q))
- bio->bi_status = BLK_STS_AGAIN;
+ bio_wouldblock_error(bio);
else
- bio->bi_status = BLK_STS_IOERR;
- bio_endio(bio);
+ bio_io_error(bio);
return BLK_QC_T_NONE;
}
@@ -1203,7 +1208,7 @@ EXPORT_SYMBOL(submit_bio);
/**
* blk_cloned_rq_check_limits - Helper function to check a cloned request
- * for new the queue limits
+ * for the new queue limits
* @q: the queue
* @rq: the request being checked
*
@@ -1339,10 +1344,9 @@ void blk_account_io_done(struct request *req, u64 now)
part_stat_lock();
part = req->part;
- update_io_ticks(part, jiffies);
+ update_io_ticks(part, jiffies, true);
part_stat_inc(part, ios[sgrp]);
part_stat_add(part, nsecs[sgrp], now - req->start_time_ns);
- part_stat_add(part, time_in_queue, nsecs_to_jiffies64(now - req->start_time_ns));
part_dec_in_flight(req->q, part, rq_data_dir(req));
hd_struct_put(part);
@@ -1381,7 +1385,7 @@ void blk_account_io_start(struct request *rq, bool new_io)
rq->part = part;
}
- update_io_ticks(part, jiffies);
+ update_io_ticks(part, jiffies, false);
part_stat_unlock();
}
@@ -1583,23 +1587,6 @@ void blk_rq_unprep_clone(struct request *rq)
}
EXPORT_SYMBOL_GPL(blk_rq_unprep_clone);
-/*
- * Copy attributes of the original request to the clone request.
- * The actual data parts (e.g. ->cmd, ->sense) are not copied.
- */
-static void __blk_rq_prep_clone(struct request *dst, struct request *src)
-{
- dst->__sector = blk_rq_pos(src);
- dst->__data_len = blk_rq_bytes(src);
- if (src->rq_flags & RQF_SPECIAL_PAYLOAD) {
- dst->rq_flags |= RQF_SPECIAL_PAYLOAD;
- dst->special_vec = src->special_vec;
- }
- dst->nr_phys_segments = src->nr_phys_segments;
- dst->ioprio = src->ioprio;
- dst->extra_len = src->extra_len;
-}
-
/**
* blk_rq_prep_clone - Helper function to setup clone request
* @rq: the request to be setup
@@ -1612,8 +1599,6 @@ static void __blk_rq_prep_clone(struct request *dst, struct request *src)
*
* Description:
* Clones bios in @rq_src to @rq, and copies attributes of @rq_src to @rq.
- * The actual data parts of @rq_src (e.g. ->cmd, ->sense)
- * are not copied, and copying such parts is the caller's responsibility.
* Also, pages which the original bios are pointing to are not copied
* and the cloned bios just point same pages.
* So cloned bios must be completed before original bios, which means
@@ -1644,7 +1629,16 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
rq->bio = rq->biotail = bio;
}
- __blk_rq_prep_clone(rq, rq_src);
+ /* Copy attributes of the original request to the clone request. */
+ rq->__sector = blk_rq_pos(rq_src);
+ rq->__data_len = blk_rq_bytes(rq_src);
+ if (rq_src->rq_flags & RQF_SPECIAL_PAYLOAD) {
+ rq->rq_flags |= RQF_SPECIAL_PAYLOAD;
+ rq->special_vec = rq_src->special_vec;
+ }
+ rq->nr_phys_segments = rq_src->nr_phys_segments;
+ rq->ioprio = rq_src->ioprio;
+ rq->extra_len = rq_src->extra_len;
return 0;
diff --git a/block/blk-flush.c b/block/blk-flush.c
index 5cc775bdb06a..c7f396e3d5e2 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -160,9 +160,6 @@ static void blk_account_io_flush(struct request *rq)
*
* CONTEXT:
* spin_lock_irq(fq->mq_flush_lock)
- *
- * RETURNS:
- * %true if requests were added to the dispatch queue, %false otherwise.
*/
static void blk_flush_complete_seq(struct request *rq,
struct blk_flush_queue *fq,
@@ -457,15 +454,6 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
if (!q)
return -ENXIO;
- /*
- * some block devices may not have their queue correctly set up here
- * (e.g. loop device without a backing file) and so issuing a flush
- * here will panic. Ensure there is a request function before issuing
- * the flush.
- */
- if (!q->make_request_fn)
- return -ENXIO;
-
bio = bio_alloc(gfp_mask, 0);
bio_set_dev(bio, bdev);
bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
@@ -485,8 +473,8 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
}
EXPORT_SYMBOL(blkdev_issue_flush);
-struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q,
- int node, int cmd_size, gfp_t flags)
+struct blk_flush_queue *blk_alloc_flush_queue(int node, int cmd_size,
+ gfp_t flags)
{
struct blk_flush_queue *fq;
int rq_sz = sizeof(struct request);
diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index 5ed59ac6ae58..9df50fb507ca 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -84,6 +84,7 @@ static void ioc_destroy_icq(struct io_cq *icq)
* making it impossible to determine icq_cache. Record it in @icq.
*/
icq->__rcu_icq_cache = et->icq_cache;
+ icq->flags |= ICQ_DESTROYED;
call_rcu(&icq->__rcu_head, icq_free_icq_rcu);
}
@@ -212,15 +213,21 @@ static void __ioc_clear_queue(struct list_head *icq_list)
{
unsigned long flags;
+ rcu_read_lock();
while (!list_empty(icq_list)) {
struct io_cq *icq = list_entry(icq_list->next,
struct io_cq, q_node);
struct io_context *ioc = icq->ioc;
spin_lock_irqsave(&ioc->lock, flags);
+ if (icq->flags & ICQ_DESTROYED) {
+ spin_unlock_irqrestore(&ioc->lock, flags);
+ continue;
+ }
ioc_destroy_icq(icq);
spin_unlock_irqrestore(&ioc->lock, flags);
}
+ rcu_read_unlock();
}
/**
diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index 27ca68621137..db35ee682294 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -46,9 +46,6 @@
* If needed, tools/cgroup/iocost_coef_gen.py can be used to generate
* device-specific coefficients.
*
- * If needed, tools/cgroup/iocost_coef_gen.py can be used to generate
- * device-specific coefficients.
- *
* 2. Control Strategy
*
* The device virtual time (vtime) is used as the primary control metric.
@@ -1318,7 +1315,7 @@ static bool iocg_is_idle(struct ioc_gq *iocg)
return false;
/* is something in flight? */
- if (atomic64_read(&iocg->done_vtime) < atomic64_read(&iocg->vtime))
+ if (atomic64_read(&iocg->done_vtime) != atomic64_read(&iocg->vtime))
return false;
return true;
diff --git a/block/blk-map.c b/block/blk-map.c
index b0790268ed9d..b72c361911a4 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -11,6 +11,514 @@
#include "blk.h"
+struct bio_map_data {
+ int is_our_pages;
+ struct iov_iter iter;
+ struct iovec iov[];
+};
+
+static struct bio_map_data *bio_alloc_map_data(struct iov_iter *data,
+ gfp_t gfp_mask)
+{
+ struct bio_map_data *bmd;
+
+ if (data->nr_segs > UIO_MAXIOV)
+ return NULL;
+
+ bmd = kmalloc(struct_size(bmd, iov, data->nr_segs), gfp_mask);
+ if (!bmd)
+ return NULL;
+ memcpy(bmd->iov, data->iov, sizeof(struct iovec) * data->nr_segs);
+ bmd->iter = *data;
+ bmd->iter.iov = bmd->iov;
+ return bmd;
+}
+
+/**
+ * bio_copy_from_iter - copy all pages from iov_iter to bio
+ * @bio: The &struct bio which describes the I/O as destination
+ * @iter: iov_iter as source
+ *
+ * Copy all pages from iov_iter to bio.
+ * Returns 0 on success, or error on failure.
+ */
+static int bio_copy_from_iter(struct bio *bio, struct iov_iter *iter)
+{
+ struct bio_vec *bvec;
+ struct bvec_iter_all iter_all;
+
+ bio_for_each_segment_all(bvec, bio, iter_all) {
+ ssize_t ret;
+
+ ret = copy_page_from_iter(bvec->bv_page,
+ bvec->bv_offset,
+ bvec->bv_len,
+ iter);
+
+ if (!iov_iter_count(iter))
+ break;
+
+ if (ret < bvec->bv_len)
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+/**
+ * bio_copy_to_iter - copy all pages from bio to iov_iter
+ * @bio: The &struct bio which describes the I/O as source
+ * @iter: iov_iter as destination
+ *
+ * Copy all pages from bio to iov_iter.
+ * Returns 0 on success, or error on failure.
+ */
+static int bio_copy_to_iter(struct bio *bio, struct iov_iter iter)
+{
+ struct bio_vec *bvec;
+ struct bvec_iter_all iter_all;
+
+ bio_for_each_segment_all(bvec, bio, iter_all) {
+ ssize_t ret;
+
+ ret = copy_page_to_iter(bvec->bv_page,
+ bvec->bv_offset,
+ bvec->bv_len,
+ &iter);
+
+ if (!iov_iter_count(&iter))
+ break;
+
+ if (ret < bvec->bv_len)
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+/**
+ * bio_uncopy_user - finish previously mapped bio
+ * @bio: bio being terminated
+ *
+ * Free pages allocated from bio_copy_user_iov() and write back data
+ * to user space in case of a read.
+ */
+static int bio_uncopy_user(struct bio *bio)
+{
+ struct bio_map_data *bmd = bio->bi_private;
+ int ret = 0;
+
+ if (!bio_flagged(bio, BIO_NULL_MAPPED)) {
+ /*
+ * if we're in a workqueue, the request is orphaned, so
+ * don't copy into a random user address space, just free
+ * and return -EINTR so user space doesn't expect any data.
+ */
+ if (!current->mm)
+ ret = -EINTR;
+ else if (bio_data_dir(bio) == READ)
+ ret = bio_copy_to_iter(bio, bmd->iter);
+ if (bmd->is_our_pages)
+ bio_free_pages(bio);
+ }
+ kfree(bmd);
+ bio_put(bio);
+ return ret;
+}
+
+/**
+ * bio_copy_user_iov - copy user data to bio
+ * @q: destination block queue
+ * @map_data: pointer to the rq_map_data holding pages (if necessary)
+ * @iter: iovec iterator
+ * @gfp_mask: memory allocation flags
+ *
+ * Prepares and returns a bio for indirect user io, bouncing data
+ * to/from kernel pages as necessary. Must be paired with
+ * call bio_uncopy_user() on io completion.
+ */
+static struct bio *bio_copy_user_iov(struct request_queue *q,
+ struct rq_map_data *map_data, struct iov_iter *iter,
+ gfp_t gfp_mask)
+{
+ struct bio_map_data *bmd;
+ struct page *page;
+ struct bio *bio;
+ int i = 0, ret;
+ int nr_pages;
+ unsigned int len = iter->count;
+ unsigned int offset = map_data ? offset_in_page(map_data->offset) : 0;
+
+ bmd = bio_alloc_map_data(iter, gfp_mask);
+ if (!bmd)
+ return ERR_PTR(-ENOMEM);
+
+ /*
+ * We need to do a deep copy of the iov_iter including the iovecs.
+ * The caller provided iov might point to an on-stack or otherwise
+ * shortlived one.
+ */
+ bmd->is_our_pages = map_data ? 0 : 1;
+
+ nr_pages = DIV_ROUND_UP(offset + len, PAGE_SIZE);
+ if (nr_pages > BIO_MAX_PAGES)
+ nr_pages = BIO_MAX_PAGES;
+
+ ret = -ENOMEM;
+ bio = bio_kmalloc(gfp_mask, nr_pages);
+ if (!bio)
+ goto out_bmd;
+
+ ret = 0;
+
+ if (map_data) {
+ nr_pages = 1 << map_data->page_order;
+ i = map_data->offset / PAGE_SIZE;
+ }
+ while (len) {
+ unsigned int bytes = PAGE_SIZE;
+
+ bytes -= offset;
+
+ if (bytes > len)
+ bytes = len;
+
+ if (map_data) {
+ if (i == map_data->nr_entries * nr_pages) {
+ ret = -ENOMEM;
+ break;
+ }
+
+ page = map_data->pages[i / nr_pages];
+ page += (i % nr_pages);
+
+ i++;
+ } else {
+ page = alloc_page(q->bounce_gfp | gfp_mask);
+ if (!page) {
+ ret = -ENOMEM;
+ break;
+ }
+ }
+
+ if (bio_add_pc_page(q, bio, page, bytes, offset) < bytes) {
+ if (!map_data)
+ __free_page(page);
+ break;
+ }
+
+ len -= bytes;
+ offset = 0;
+ }
+
+ if (ret)
+ goto cleanup;
+
+ if (map_data)
+ map_data->offset += bio->bi_iter.bi_size;
+
+ /*
+ * success
+ */
+ if ((iov_iter_rw(iter) == WRITE &&
+ (!map_data || !map_data->null_mapped)) ||
+ (map_data && map_data->from_user)) {
+ ret = bio_copy_from_iter(bio, iter);
+ if (ret)
+ goto cleanup;
+ } else {
+ if (bmd->is_our_pages)
+ zero_fill_bio(bio);
+ iov_iter_advance(iter, bio->bi_iter.bi_size);
+ }
+
+ bio->bi_private = bmd;
+ if (map_data && map_data->null_mapped)
+ bio_set_flag(bio, BIO_NULL_MAPPED);
+ return bio;
+cleanup:
+ if (!map_data)
+ bio_free_pages(bio);
+ bio_put(bio);
+out_bmd:
+ kfree(bmd);
+ return ERR_PTR(ret);
+}
+
+/**
+ * bio_map_user_iov - map user iovec into bio
+ * @q: the struct request_queue for the bio
+ * @iter: iovec iterator
+ * @gfp_mask: memory allocation flags
+ *
+ * Map the user space address into a bio suitable for io to a block
+ * device. Returns an error pointer in case of error.
+ */
+static struct bio *bio_map_user_iov(struct request_queue *q,
+ struct iov_iter *iter, gfp_t gfp_mask)
+{
+ int j;
+ struct bio *bio;
+ int ret;
+
+ if (!iov_iter_count(iter))
+ return ERR_PTR(-EINVAL);
+
+ bio = bio_kmalloc(gfp_mask, iov_iter_npages(iter, BIO_MAX_PAGES));
+ if (!bio)
+ return ERR_PTR(-ENOMEM);
+
+ while (iov_iter_count(iter)) {
+ struct page **pages;
+ ssize_t bytes;
+ size_t offs, added = 0;
+ int npages;
+
+ bytes = iov_iter_get_pages_alloc(iter, &pages, LONG_MAX, &offs);
+ if (unlikely(bytes <= 0)) {
+ ret = bytes ? bytes : -EFAULT;
+ goto out_unmap;
+ }
+
+ npages = DIV_ROUND_UP(offs + bytes, PAGE_SIZE);
+
+ if (unlikely(offs & queue_dma_alignment(q))) {
+ ret = -EINVAL;
+ j = 0;
+ } else {
+ for (j = 0; j < npages; j++) {
+ struct page *page = pages[j];
+ unsigned int n = PAGE_SIZE - offs;
+ bool same_page = false;
+
+ if (n > bytes)
+ n = bytes;
+
+ if (!__bio_add_pc_page(q, bio, page, n, offs,
+ &same_page)) {
+ if (same_page)
+ put_page(page);
+ break;
+ }
+
+ added += n;
+ bytes -= n;
+ offs = 0;
+ }
+ iov_iter_advance(iter, added);
+ }
+ /*
+ * release the pages we didn't map into the bio, if any
+ */
+ while (j < npages)
+ put_page(pages[j++]);
+ kvfree(pages);
+ /* couldn't stuff something into bio? */
+ if (bytes)
+ break;
+ }
+
+ bio_set_flag(bio, BIO_USER_MAPPED);
+
+ /*
+ * subtle -- if bio_map_user_iov() ended up bouncing a bio,
+ * it would normally disappear when its bi_end_io is run.
+ * however, we need it for the unmap, so grab an extra
+ * reference to it
+ */
+ bio_get(bio);
+ return bio;
+
+ out_unmap:
+ bio_release_pages(bio, false);
+ bio_put(bio);
+ return ERR_PTR(ret);
+}
+
+/**
+ * bio_unmap_user - unmap a bio
+ * @bio: the bio being unmapped
+ *
+ * Unmap a bio previously mapped by bio_map_user_iov(). Must be called from
+ * process context.
+ *
+ * bio_unmap_user() may sleep.
+ */
+static void bio_unmap_user(struct bio *bio)
+{
+ bio_release_pages(bio, bio_data_dir(bio) == READ);
+ bio_put(bio);
+ bio_put(bio);
+}
+
+static void bio_invalidate_vmalloc_pages(struct bio *bio)
+{
+#ifdef ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE
+ if (bio->bi_private && !op_is_write(bio_op(bio))) {
+ unsigned long i, len = 0;
+
+ for (i = 0; i < bio->bi_vcnt; i++)
+ len += bio->bi_io_vec[i].bv_len;
+ invalidate_kernel_vmap_range(bio->bi_private, len);
+ }
+#endif
+}
+
+static void bio_map_kern_endio(struct bio *bio)
+{
+ bio_invalidate_vmalloc_pages(bio);
+ bio_put(bio);
+}
+
+/**
+ * bio_map_kern - map kernel address into bio
+ * @q: the struct request_queue for the bio
+ * @data: pointer to buffer to map
+ * @len: length in bytes
+ * @gfp_mask: allocation flags for bio allocation
+ *
+ * Map the kernel address into a bio suitable for io to a block
+ * device. Returns an error pointer in case of error.
+ */
+static struct bio *bio_map_kern(struct request_queue *q, void *data,
+ unsigned int len, gfp_t gfp_mask)
+{
+ unsigned long kaddr = (unsigned long)data;
+ unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ unsigned long start = kaddr >> PAGE_SHIFT;
+ const int nr_pages = end - start;
+ bool is_vmalloc = is_vmalloc_addr(data);
+ struct page *page;
+ int offset, i;
+ struct bio *bio;
+
+ bio = bio_kmalloc(gfp_mask, nr_pages);
+ if (!bio)
+ return ERR_PTR(-ENOMEM);
+
+ if (is_vmalloc) {
+ flush_kernel_vmap_range(data, len);
+ bio->bi_private = data;
+ }
+
+ offset = offset_in_page(kaddr);
+ for (i = 0; i < nr_pages; i++) {
+ unsigned int bytes = PAGE_SIZE - offset;
+
+ if (len <= 0)
+ break;
+
+ if (bytes > len)
+ bytes = len;
+
+ if (!is_vmalloc)
+ page = virt_to_page(data);
+ else
+ page = vmalloc_to_page(data);
+ if (bio_add_pc_page(q, bio, page, bytes,
+ offset) < bytes) {
+ /* we don't support partial mappings */
+ bio_put(bio);
+ return ERR_PTR(-EINVAL);
+ }
+
+ data += bytes;
+ len -= bytes;
+ offset = 0;
+ }
+
+ bio->bi_end_io = bio_map_kern_endio;
+ return bio;
+}
+
+static void bio_copy_kern_endio(struct bio *bio)
+{
+ bio_free_pages(bio);
+ bio_put(bio);
+}
+
+static void bio_copy_kern_endio_read(struct bio *bio)
+{
+ char *p = bio->bi_private;
+ struct bio_vec *bvec;
+ struct bvec_iter_all iter_all;
+
+ bio_for_each_segment_all(bvec, bio, iter_all) {
+ memcpy(p, page_address(bvec->bv_page), bvec->bv_len);
+ p += bvec->bv_len;
+ }
+
+ bio_copy_kern_endio(bio);
+}
+
+/**
+ * bio_copy_kern - copy kernel address into bio
+ * @q: the struct request_queue for the bio
+ * @data: pointer to buffer to copy
+ * @len: length in bytes
+ * @gfp_mask: allocation flags for bio and page allocation
+ * @reading: data direction is READ
+ *
+ * copy the kernel address into a bio suitable for io to a block
+ * device. Returns an error pointer in case of error.
+ */
+static struct bio *bio_copy_kern(struct request_queue *q, void *data,
+ unsigned int len, gfp_t gfp_mask, int reading)
+{
+ unsigned long kaddr = (unsigned long)data;
+ unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ unsigned long start = kaddr >> PAGE_SHIFT;
+ struct bio *bio;
+ void *p = data;
+ int nr_pages = 0;
+
+ /*
+ * Overflow, abort
+ */
+ if (end < start)
+ return ERR_PTR(-EINVAL);
+
+ nr_pages = end - start;
+ bio = bio_kmalloc(gfp_mask, nr_pages);
+ if (!bio)
+ return ERR_PTR(-ENOMEM);
+
+ while (len) {
+ struct page *page;
+ unsigned int bytes = PAGE_SIZE;
+
+ if (bytes > len)
+ bytes = len;
+
+ page = alloc_page(q->bounce_gfp | gfp_mask);
+ if (!page)
+ goto cleanup;
+
+ if (!reading)
+ memcpy(page_address(page), p, bytes);
+
+ if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes)
+ break;
+
+ len -= bytes;
+ p += bytes;
+ }
+
+ if (reading) {
+ bio->bi_end_io = bio_copy_kern_endio_read;
+ bio->bi_private = data;
+ } else {
+ bio->bi_end_io = bio_copy_kern_endio;
+ }
+
+ return bio;
+
+cleanup:
+ bio_free_pages(bio);
+ bio_put(bio);
+ return ERR_PTR(-ENOMEM);
+}
+
/*
* Append a bio to a passthrough request. Only works if the bio can be merged
* into the request based on the driver constraints.
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 856356b1619e..74cedea56034 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -398,6 +398,28 @@ void blk_mq_sched_insert_request(struct request *rq, bool at_head,
WARN_ON(e && (rq->tag != -1));
if (blk_mq_sched_bypass_insert(hctx, !!e, rq)) {
+ /*
+ * Firstly normal IO request is inserted to scheduler queue or
+ * sw queue, meantime we add flush request to dispatch queue(
+ * hctx->dispatch) directly and there is at most one in-flight
+ * flush request for each hw queue, so it doesn't matter to add
+ * flush request to tail or front of the dispatch queue.
+ *
+ * Secondly in case of NCQ, flush request belongs to non-NCQ
+ * command, and queueing it will fail when there is any
+ * in-flight normal IO request(NCQ command). When adding flush
+ * rq to the front of hctx->dispatch, it is easier to introduce
+ * extra time to flush rq's latency because of S_SCHED_RESTART
+ * compared with adding to the tail of dispatch queue, then
+ * chance of flush merge is increased, and less flush requests
+ * will be issued to controller. It is observed that ~10% time
+ * is saved in blktests block/004 on disk attached to AHCI/NCQ
+ * drive when adding flush rq to the front of hctx->dispatch.
+ *
+ * Simply queue flush rq to the front of hctx->dispatch so that
+ * intensive flush workloads can benefit in case of NCQ HW.
+ */
+ at_head = (rq->rq_flags & RQF_FLUSH_SEQ) ? true : at_head;
blk_mq_request_bypass_insert(rq, at_head, false);
goto run;
}
diff --git a/block/blk-mq.c b/block/blk-mq.c
index d92088dec6c3..f6291ceedee4 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1178,6 +1178,23 @@ static void blk_mq_update_dispatch_busy(struct blk_mq_hw_ctx *hctx, bool busy)
#define BLK_MQ_RESOURCE_DELAY 3 /* ms units */
+static void blk_mq_handle_dev_resource(struct request *rq,
+ struct list_head *list)
+{
+ struct request *next =
+ list_first_entry_or_null(list, struct request, queuelist);
+
+ /*
+ * If an I/O scheduler has been configured and we got a driver tag for
+ * the next request already, free it.
+ */
+ if (next)
+ blk_mq_put_driver_tag(next);
+
+ list_add(&rq->queuelist, list);
+ __blk_mq_requeue_request(rq);
+}
+
/*
* Returns true if we did some work AND can potentially do more.
*/
@@ -1245,17 +1262,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
ret = q->mq_ops->queue_rq(hctx, &bd);
if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) {
- /*
- * If an I/O scheduler has been configured and we got a
- * driver tag for the next request already, free it
- * again.
- */
- if (!list_empty(list)) {
- nxt = list_first_entry(list, struct request, queuelist);
- blk_mq_put_driver_tag(nxt);
- }
- list_add(&rq->queuelist, list);
- __blk_mq_requeue_request(rq);
+ blk_mq_handle_dev_resource(rq, list);
break;
}
@@ -2409,8 +2416,7 @@ blk_mq_alloc_hctx(struct request_queue *q, struct blk_mq_tag_set *set,
init_waitqueue_func_entry(&hctx->dispatch_wait, blk_mq_dispatch_wake);
INIT_LIST_HEAD(&hctx->dispatch_wait.entry);
- hctx->fq = blk_alloc_flush_queue(q, hctx->numa_node, set->cmd_size,
- gfp);
+ hctx->fq = blk_alloc_flush_queue(hctx->numa_node, set->cmd_size, gfp);
if (!hctx->fq)
goto free_bitmap;
@@ -2718,13 +2724,15 @@ void blk_mq_release(struct request_queue *q)
blk_mq_sysfs_deinit(q);
}
-struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
+struct request_queue *blk_mq_init_queue_data(struct blk_mq_tag_set *set,
+ void *queuedata)
{
struct request_queue *uninit_q, *q;
- uninit_q = blk_alloc_queue_node(GFP_KERNEL, set->numa_node);
+ uninit_q = __blk_alloc_queue(set->numa_node);
if (!uninit_q)
return ERR_PTR(-ENOMEM);
+ uninit_q->queuedata = queuedata;
/*
* Initialize the queue without an elevator. device_add_disk() will do
@@ -2736,6 +2744,12 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
return q;
}
+EXPORT_SYMBOL_GPL(blk_mq_init_queue_data);
+
+struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
+{
+ return blk_mq_init_queue_data(set, NULL);
+}
EXPORT_SYMBOL(blk_mq_init_queue);
/*
@@ -2824,7 +2838,6 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
memcpy(new_hctxs, hctxs, q->nr_hw_queues *
sizeof(*hctxs));
q->queue_hw_ctx = new_hctxs;
- q->nr_hw_queues = set->nr_hw_queues;
kfree(hctxs);
hctxs = new_hctxs;
}
@@ -2926,11 +2939,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
INIT_LIST_HEAD(&q->requeue_list);
spin_lock_init(&q->requeue_lock);
- blk_queue_make_request(q, blk_mq_make_request);
-
- /*
- * Do this after blk_queue_make_request() overrides it...
- */
+ q->make_request_fn = blk_mq_make_request;
q->nr_requests = set->queue_depth;
/*
@@ -3023,6 +3032,14 @@ static int blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
static int blk_mq_update_queue_map(struct blk_mq_tag_set *set)
{
+ /*
+ * blk_mq_map_queues() and multiple .map_queues() implementations
+ * expect that set->map[HCTX_TYPE_DEFAULT].nr_queues is set to the
+ * number of hardware queues.
+ */
+ if (set->nr_maps == 1)
+ set->map[HCTX_TYPE_DEFAULT].nr_queues = set->nr_hw_queues;
+
if (set->ops->map_queues && !is_kdump_kernel()) {
int i;
diff --git a/block/blk-settings.c b/block/blk-settings.c
index c8eda2e7b91e..14397b4c4b53 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -87,42 +87,6 @@ void blk_set_stacking_limits(struct queue_limits *lim)
EXPORT_SYMBOL(blk_set_stacking_limits);
/**
- * blk_queue_make_request - define an alternate make_request function for a device
- * @q: the request queue for the device to be affected
- * @mfn: the alternate make_request function
- *
- * Description:
- * The normal way for &struct bios to be passed to a device
- * driver is for them to be collected into requests on a request
- * queue, and then to allow the device driver to select requests
- * off that queue when it is ready. This works well for many block
- * devices. However some block devices (typically virtual devices
- * such as md or lvm) do not benefit from the processing on the
- * request queue, and are served best by having the requests passed
- * directly to them. This can be achieved by providing a function
- * to blk_queue_make_request().
- *
- * Caveat:
- * The driver that does this *must* be able to deal appropriately
- * with buffers in "highmemory". This can be accomplished by either calling
- * kmap_atomic() to get a temporary kernel mapping, or by calling
- * blk_queue_bounce() to create a buffer in normal memory.
- **/
-void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)
-{
- /*
- * set defaults
- */
- q->nr_requests = BLKDEV_MAX_RQ;
-
- q->make_request_fn = mfn;
- blk_queue_dma_alignment(q, 511);
-
- blk_set_default_limits(&q->limits);
-}
-EXPORT_SYMBOL(blk_queue_make_request);
-
-/**
* blk_queue_bounce_limit - set bounce buffer limit for queue
* @q: the request queue for the device
* @max_addr: the maximum address the device can handle
@@ -664,6 +628,9 @@ void disk_stack_limits(struct gendisk *disk, struct block_device *bdev,
printk(KERN_NOTICE "%s: Warning: Device %s is misaligned\n",
top, bottom);
}
+
+ t->backing_dev_info->io_pages =
+ t->limits.max_sectors >> (PAGE_SHIFT - 9);
}
EXPORT_SYMBOL(disk_stack_limits);
diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 05741c6f618b..f87956e0dcaf 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -20,6 +20,38 @@
#include "blk.h"
+#define ZONE_COND_NAME(name) [BLK_ZONE_COND_##name] = #name
+static const char *const zone_cond_name[] = {
+ ZONE_COND_NAME(NOT_WP),
+ ZONE_COND_NAME(EMPTY),
+ ZONE_COND_NAME(IMP_OPEN),
+ ZONE_COND_NAME(EXP_OPEN),
+ ZONE_COND_NAME(CLOSED),
+ ZONE_COND_NAME(READONLY),
+ ZONE_COND_NAME(FULL),
+ ZONE_COND_NAME(OFFLINE),
+};
+#undef ZONE_COND_NAME
+
+/**
+ * blk_zone_cond_str - Return string XXX in BLK_ZONE_COND_XXX.
+ * @zone_cond: BLK_ZONE_COND_XXX.
+ *
+ * Description: Centralize block layer function to convert BLK_ZONE_COND_XXX
+ * into string format. Useful in the debugging and tracing zone conditions. For
+ * invalid BLK_ZONE_COND_XXX it returns string "UNKNOWN".
+ */
+const char *blk_zone_cond_str(enum blk_zone_cond zone_cond)
+{
+ static const char *zone_cond_str = "UNKNOWN";
+
+ if (zone_cond < ARRAY_SIZE(zone_cond_name) && zone_cond_name[zone_cond])
+ zone_cond_str = zone_cond_name[zone_cond];
+
+ return zone_cond_str;
+}
+EXPORT_SYMBOL_GPL(blk_zone_cond_str);
+
static inline sector_t blk_zone_start(struct request_queue *q,
sector_t sector)
{
@@ -173,7 +205,7 @@ int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op,
if (!op_is_zone_mgmt(op))
return -EOPNOTSUPP;
- if (!nr_sectors || end_sector > capacity)
+ if (end_sector <= sector || end_sector > capacity)
/* Out of range */
return -EINVAL;
diff --git a/block/blk.h b/block/blk.h
index 0b8884353f6b..0a94ec68af32 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -4,6 +4,7 @@
#include <linux/idr.h>
#include <linux/blk-mq.h>
+#include <linux/part_stat.h>
#include <xen/xen.h>
#include "blk-mq.h"
#include "blk-mq-sched.h"
@@ -55,8 +56,8 @@ is_flush_rq(struct request *req, struct blk_mq_hw_ctx *hctx)
return hctx->fq->flush_rq == req;
}
-struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q,
- int node, int cmd_size, gfp_t flags);
+struct blk_flush_queue *blk_alloc_flush_queue(int node, int cmd_size,
+ gfp_t flags);
void blk_free_flush_queue(struct blk_flush_queue *q);
void blk_freeze_queue(struct request_queue *q);
@@ -149,6 +150,9 @@ static inline bool integrity_req_gap_front_merge(struct request *req,
return bvec_gap_to_prev(req->q, &bip->bip_vec[bip->bip_vcnt - 1],
bip_next->bip_vec[0].bv_offset);
}
+
+void blk_integrity_add(struct gendisk *);
+void blk_integrity_del(struct gendisk *);
#else /* CONFIG_BLK_DEV_INTEGRITY */
static inline bool integrity_req_gap_back_merge(struct request *req,
struct bio *next)
@@ -171,6 +175,12 @@ static inline bool bio_integrity_endio(struct bio *bio)
static inline void bio_integrity_free(struct bio *bio)
{
}
+static inline void blk_integrity_add(struct gendisk *disk)
+{
+}
+static inline void blk_integrity_del(struct gendisk *disk)
+{
+}
#endif /* CONFIG_BLK_DEV_INTEGRITY */
unsigned long blk_rq_timeout(unsigned long timeout);
@@ -214,6 +224,17 @@ static inline void elevator_exit(struct request_queue *q,
struct hd_struct *__disk_get_part(struct gendisk *disk, int partno);
+ssize_t part_size_show(struct device *dev, struct device_attribute *attr,
+ char *buf);
+ssize_t part_stat_show(struct device *dev, struct device_attribute *attr,
+ char *buf);
+ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr,
+ char *buf);
+ssize_t part_fail_show(struct device *dev, struct device_attribute *attr,
+ char *buf);
+ssize_t part_fail_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count);
+
#ifdef CONFIG_FAIL_IO_TIMEOUT
int blk_should_fake_timeout(struct request_queue *);
ssize_t part_timeout_show(struct device *, struct device_attribute *, char *);
@@ -354,4 +375,117 @@ void blk_queue_free_zone_bitmaps(struct request_queue *q);
static inline void blk_queue_free_zone_bitmaps(struct request_queue *q) {}
#endif
+void part_dec_in_flight(struct request_queue *q, struct hd_struct *part,
+ int rw);
+void part_inc_in_flight(struct request_queue *q, struct hd_struct *part,
+ int rw);
+void update_io_ticks(struct hd_struct *part, unsigned long now, bool end);
+struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector);
+
+int blk_alloc_devt(struct hd_struct *part, dev_t *devt);
+void blk_free_devt(dev_t devt);
+void blk_invalidate_devt(dev_t devt);
+char *disk_name(struct gendisk *hd, int partno, char *buf);
+#define ADDPART_FLAG_NONE 0
+#define ADDPART_FLAG_RAID 1
+#define ADDPART_FLAG_WHOLEDISK 2
+struct hd_struct *__must_check add_partition(struct gendisk *disk, int partno,
+ sector_t start, sector_t len, int flags,
+ struct partition_meta_info *info);
+void __delete_partition(struct percpu_ref *ref);
+void delete_partition(struct gendisk *disk, int partno);
+int disk_expand_part_tbl(struct gendisk *disk, int target);
+
+static inline int hd_ref_init(struct hd_struct *part)
+{
+ if (percpu_ref_init(&part->ref, __delete_partition, 0,
+ GFP_KERNEL))
+ return -ENOMEM;
+ return 0;
+}
+
+static inline void hd_struct_get(struct hd_struct *part)
+{
+ percpu_ref_get(&part->ref);
+}
+
+static inline int hd_struct_try_get(struct hd_struct *part)
+{
+ return percpu_ref_tryget_live(&part->ref);
+}
+
+static inline void hd_struct_put(struct hd_struct *part)
+{
+ percpu_ref_put(&part->ref);
+}
+
+static inline void hd_struct_kill(struct hd_struct *part)
+{
+ percpu_ref_kill(&part->ref);
+}
+
+static inline void hd_free_part(struct hd_struct *part)
+{
+ free_part_stats(part);
+ kfree(part->info);
+ percpu_ref_exit(&part->ref);
+}
+
+/*
+ * Any access of part->nr_sects which is not protected by partition
+ * bd_mutex or gendisk bdev bd_mutex, should be done using this
+ * accessor function.
+ *
+ * Code written along the lines of i_size_read() and i_size_write().
+ * CONFIG_PREEMPTION case optimizes the case of UP kernel with preemption
+ * on.
+ */
+static inline sector_t part_nr_sects_read(struct hd_struct *part)
+{
+#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+ sector_t nr_sects;
+ unsigned seq;
+ do {
+ seq = read_seqcount_begin(&part->nr_sects_seq);
+ nr_sects = part->nr_sects;
+ } while (read_seqcount_retry(&part->nr_sects_seq, seq));
+ return nr_sects;
+#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
+ sector_t nr_sects;
+
+ preempt_disable();
+ nr_sects = part->nr_sects;
+ preempt_enable();
+ return nr_sects;
+#else
+ return part->nr_sects;
+#endif
+}
+
+/*
+ * Should be called with mutex lock held (typically bd_mutex) of partition
+ * to provide mutual exlusion among writers otherwise seqcount might be
+ * left in wrong state leaving the readers spinning infinitely.
+ */
+static inline void part_nr_sects_write(struct hd_struct *part, sector_t size)
+{
+#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+ write_seqcount_begin(&part->nr_sects_seq);
+ part->nr_sects = size;
+ write_seqcount_end(&part->nr_sects_seq);
+#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
+ preempt_disable();
+ part->nr_sects = size;
+ preempt_enable();
+#else
+ part->nr_sects = size;
+#endif
+}
+
+struct request_queue *__blk_alloc_queue(int node_id);
+
+int __bio_add_pc_page(struct request_queue *q, struct bio *bio,
+ struct page *page, unsigned int len, unsigned int offset,
+ bool *same_page);
+
#endif /* BLK_INTERNAL_H */
diff --git a/block/genhd.c b/block/genhd.c
index ff6268970ddc..06b642b23a07 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -4,6 +4,7 @@
*/
#include <linux/module.h>
+#include <linux/ctype.h>
#include <linux/fs.h>
#include <linux/genhd.h>
#include <linux/kdev_t.h>
@@ -26,7 +27,7 @@
#include "blk.h"
static DEFINE_MUTEX(block_class_lock);
-struct kobject *block_depr;
+static struct kobject *block_depr;
/* for extended dynamic devt allocation, currently only one major is used */
#define NR_EXT_DEVT (1 << MINORBITS)
@@ -46,6 +47,78 @@ static void disk_add_events(struct gendisk *disk);
static void disk_del_events(struct gendisk *disk);
static void disk_release_events(struct gendisk *disk);
+/*
+ * Set disk capacity and notify if the size is not currently
+ * zero and will not be set to zero
+ */
+void set_capacity_revalidate_and_notify(struct gendisk *disk, sector_t size,
+ bool revalidate)
+{
+ sector_t capacity = get_capacity(disk);
+
+ set_capacity(disk, size);
+
+ if (revalidate)
+ revalidate_disk(disk);
+
+ if (capacity != size && capacity != 0 && size != 0) {
+ char *envp[] = { "RESIZE=1", NULL };
+
+ kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp);
+ }
+}
+
+EXPORT_SYMBOL_GPL(set_capacity_revalidate_and_notify);
+
+/*
+ * Format the device name of the indicated disk into the supplied buffer and
+ * return a pointer to that same buffer for convenience.
+ */
+char *disk_name(struct gendisk *hd, int partno, char *buf)
+{
+ if (!partno)
+ snprintf(buf, BDEVNAME_SIZE, "%s", hd->disk_name);
+ else if (isdigit(hd->disk_name[strlen(hd->disk_name)-1]))
+ snprintf(buf, BDEVNAME_SIZE, "%sp%d", hd->disk_name, partno);
+ else
+ snprintf(buf, BDEVNAME_SIZE, "%s%d", hd->disk_name, partno);
+
+ return buf;
+}
+
+const char *bdevname(struct block_device *bdev, char *buf)
+{
+ return disk_name(bdev->bd_disk, bdev->bd_part->partno, buf);
+}
+EXPORT_SYMBOL(bdevname);
+
+#ifdef CONFIG_SMP
+static void part_stat_read_all(struct hd_struct *part, struct disk_stats *stat)
+{
+ int cpu;
+
+ memset(stat, 0, sizeof(struct disk_stats));
+ for_each_possible_cpu(cpu) {
+ struct disk_stats *ptr = per_cpu_ptr(part->dkstats, cpu);
+ int group;
+
+ for (group = 0; group < NR_STAT_GROUPS; group++) {
+ stat->nsecs[group] += ptr->nsecs[group];
+ stat->sectors[group] += ptr->sectors[group];
+ stat->ios[group] += ptr->ios[group];
+ stat->merges[group] += ptr->merges[group];
+ }
+
+ stat->io_ticks += ptr->io_ticks;
+ }
+}
+#else /* CONFIG_SMP */
+static void part_stat_read_all(struct hd_struct *part, struct disk_stats *stat)
+{
+ memcpy(stat, &part->dkstats, sizeof(struct disk_stats));
+}
+#endif /* CONFIG_SMP */
+
void part_inc_in_flight(struct request_queue *q, struct hd_struct *part, int rw)
{
if (queue_is_mq(q))
@@ -66,7 +139,8 @@ void part_dec_in_flight(struct request_queue *q, struct hd_struct *part, int rw)
part_stat_local_dec(&part_to_disk(part)->part0, in_flight[rw]);
}
-unsigned int part_in_flight(struct request_queue *q, struct hd_struct *part)
+static unsigned int part_in_flight(struct request_queue *q,
+ struct hd_struct *part)
{
int cpu;
unsigned int inflight;
@@ -86,8 +160,8 @@ unsigned int part_in_flight(struct request_queue *q, struct hd_struct *part)
return inflight;
}
-void part_in_flight_rw(struct request_queue *q, struct hd_struct *part,
- unsigned int inflight[2])
+static void part_in_flight_rw(struct request_queue *q, struct hd_struct *part,
+ unsigned int inflight[2])
{
int cpu;
@@ -143,7 +217,6 @@ struct hd_struct *disk_get_part(struct gendisk *disk, int partno)
return part;
}
-EXPORT_SYMBOL_GPL(disk_get_part);
/**
* disk_part_iter_init - initialize partition iterator
@@ -299,7 +372,42 @@ struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector)
}
return &disk->part0;
}
-EXPORT_SYMBOL_GPL(disk_map_sector_rcu);
+
+/**
+ * disk_has_partitions
+ * @disk: gendisk of interest
+ *
+ * Walk through the partition table and check if valid partition exists.
+ *
+ * CONTEXT:
+ * Don't care.
+ *
+ * RETURNS:
+ * True if the gendisk has at least one valid non-zero size partition.
+ * Otherwise false.
+ */
+bool disk_has_partitions(struct gendisk *disk)
+{
+ struct disk_part_tbl *ptbl;
+ int i;
+ bool ret = false;
+
+ rcu_read_lock();
+ ptbl = rcu_dereference(disk->part_tbl);
+
+ /* Iterate partitions skipping the whole device at index 0 */
+ for (i = 1; i < ptbl->len; i++) {
+ if (rcu_dereference(ptbl->part[i])) {
+ ret = true;
+ break;
+ }
+ }
+
+ rcu_read_unlock();
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(disk_has_partitions);
/*
* Can be deleted altogether. Later.
@@ -908,7 +1016,6 @@ struct gendisk *get_gendisk(dev_t devt, int *partno)
}
return disk;
}
-EXPORT_SYMBOL(get_gendisk);
/**
* bdget_disk - do bdget() by gendisk and partition number
@@ -1154,6 +1261,67 @@ static ssize_t disk_ro_show(struct device *dev,
return sprintf(buf, "%d\n", get_disk_ro(disk) ? 1 : 0);
}
+ssize_t part_size_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct hd_struct *p = dev_to_part(dev);
+
+ return sprintf(buf, "%llu\n",
+ (unsigned long long)part_nr_sects_read(p));
+}
+
+ssize_t part_stat_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct hd_struct *p = dev_to_part(dev);
+ struct request_queue *q = part_to_disk(p)->queue;
+ struct disk_stats stat;
+ unsigned int inflight;
+
+ part_stat_read_all(p, &stat);
+ inflight = part_in_flight(q, p);
+
+ return sprintf(buf,
+ "%8lu %8lu %8llu %8u "
+ "%8lu %8lu %8llu %8u "
+ "%8u %8u %8u "
+ "%8lu %8lu %8llu %8u "
+ "%8lu %8u"
+ "\n",
+ stat.ios[STAT_READ],
+ stat.merges[STAT_READ],
+ (unsigned long long)stat.sectors[STAT_READ],
+ (unsigned int)div_u64(stat.nsecs[STAT_READ], NSEC_PER_MSEC),
+ stat.ios[STAT_WRITE],
+ stat.merges[STAT_WRITE],
+ (unsigned long long)stat.sectors[STAT_WRITE],
+ (unsigned int)div_u64(stat.nsecs[STAT_WRITE], NSEC_PER_MSEC),
+ inflight,
+ jiffies_to_msecs(stat.io_ticks),
+ (unsigned int)div_u64(stat.nsecs[STAT_READ] +
+ stat.nsecs[STAT_WRITE] +
+ stat.nsecs[STAT_DISCARD] +
+ stat.nsecs[STAT_FLUSH],
+ NSEC_PER_MSEC),
+ stat.ios[STAT_DISCARD],
+ stat.merges[STAT_DISCARD],
+ (unsigned long long)stat.sectors[STAT_DISCARD],
+ (unsigned int)div_u64(stat.nsecs[STAT_DISCARD], NSEC_PER_MSEC),
+ stat.ios[STAT_FLUSH],
+ (unsigned int)div_u64(stat.nsecs[STAT_FLUSH], NSEC_PER_MSEC));
+}
+
+ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct hd_struct *p = dev_to_part(dev);
+ struct request_queue *q = part_to_disk(p)->queue;
+ unsigned int inflight[2];
+
+ part_in_flight_rw(q, p, inflight);
+ return sprintf(buf, "%8u %8u\n", inflight[0], inflight[1]);
+}
+
static ssize_t disk_capability_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -1192,10 +1360,33 @@ static DEVICE_ATTR(capability, 0444, disk_capability_show, NULL);
static DEVICE_ATTR(stat, 0444, part_stat_show, NULL);
static DEVICE_ATTR(inflight, 0444, part_inflight_show, NULL);
static DEVICE_ATTR(badblocks, 0644, disk_badblocks_show, disk_badblocks_store);
+
#ifdef CONFIG_FAIL_MAKE_REQUEST
+ssize_t part_fail_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct hd_struct *p = dev_to_part(dev);
+
+ return sprintf(buf, "%d\n", p->make_it_fail);
+}
+
+ssize_t part_fail_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct hd_struct *p = dev_to_part(dev);
+ int i;
+
+ if (count > 0 && sscanf(buf, "%d", &i) > 0)
+ p->make_it_fail = (i == 0) ? 0 : 1;
+
+ return count;
+}
+
static struct device_attribute dev_attr_fail =
__ATTR(make-it-fail, 0644, part_fail_show, part_fail_store);
-#endif
+#endif /* CONFIG_FAIL_MAKE_REQUEST */
+
#ifdef CONFIG_FAIL_IO_TIMEOUT
static struct device_attribute dev_attr_fail_timeout =
__ATTR(io-timeout-fail, 0644, part_timeout_show, part_timeout_store);
@@ -1342,8 +1533,8 @@ static char *block_devnode(struct device *dev, umode_t *mode,
{
struct gendisk *disk = dev_to_disk(dev);
- if (disk->devnode)
- return disk->devnode(disk, mode);
+ if (disk->fops->devnode)
+ return disk->fops->devnode(disk, mode);
return NULL;
}
@@ -1369,6 +1560,7 @@ static int diskstats_show(struct seq_file *seqf, void *v)
struct hd_struct *hd;
char buf[BDEVNAME_SIZE];
unsigned int inflight;
+ struct disk_stats stat;
/*
if (&disk_to_dev(gp)->kobj.entry == block_class.devices.next)
@@ -1380,7 +1572,9 @@ static int diskstats_show(struct seq_file *seqf, void *v)
disk_part_iter_init(&piter, gp, DISK_PITER_INCL_EMPTY_PART0);
while ((hd = disk_part_iter_next(&piter))) {
+ part_stat_read_all(hd, &stat);
inflight = part_in_flight(gp->queue, hd);
+
seq_printf(seqf, "%4d %7d %s "
"%lu %lu %lu %u "
"%lu %lu %lu %u "
@@ -1390,23 +1584,31 @@ static int diskstats_show(struct seq_file *seqf, void *v)
"\n",
MAJOR(part_devt(hd)), MINOR(part_devt(hd)),
disk_name(gp, hd->partno, buf),
- part_stat_read(hd, ios[STAT_READ]),
- part_stat_read(hd, merges[STAT_READ]),
- part_stat_read(hd, sectors[STAT_READ]),
- (unsigned int)part_stat_read_msecs(hd, STAT_READ),
- part_stat_read(hd, ios[STAT_WRITE]),
- part_stat_read(hd, merges[STAT_WRITE]),
- part_stat_read(hd, sectors[STAT_WRITE]),
- (unsigned int)part_stat_read_msecs(hd, STAT_WRITE),
+ stat.ios[STAT_READ],
+ stat.merges[STAT_READ],
+ stat.sectors[STAT_READ],
+ (unsigned int)div_u64(stat.nsecs[STAT_READ],
+ NSEC_PER_MSEC),
+ stat.ios[STAT_WRITE],
+ stat.merges[STAT_WRITE],
+ stat.sectors[STAT_WRITE],
+ (unsigned int)div_u64(stat.nsecs[STAT_WRITE],
+ NSEC_PER_MSEC),
inflight,
- jiffies_to_msecs(part_stat_read(hd, io_ticks)),
- jiffies_to_msecs(part_stat_read(hd, time_in_queue)),
- part_stat_read(hd, ios[STAT_DISCARD]),
- part_stat_read(hd, merges[STAT_DISCARD]),
- part_stat_read(hd, sectors[STAT_DISCARD]),
- (unsigned int)part_stat_read_msecs(hd, STAT_DISCARD),
- part_stat_read(hd, ios[STAT_FLUSH]),
- (unsigned int)part_stat_read_msecs(hd, STAT_FLUSH)
+ jiffies_to_msecs(stat.io_ticks),
+ (unsigned int)div_u64(stat.nsecs[STAT_READ] +
+ stat.nsecs[STAT_WRITE] +
+ stat.nsecs[STAT_DISCARD] +
+ stat.nsecs[STAT_FLUSH],
+ NSEC_PER_MSEC),
+ stat.ios[STAT_DISCARD],
+ stat.merges[STAT_DISCARD],
+ stat.sectors[STAT_DISCARD],
+ (unsigned int)div_u64(stat.nsecs[STAT_DISCARD],
+ NSEC_PER_MSEC),
+ stat.ios[STAT_FLUSH],
+ (unsigned int)div_u64(stat.nsecs[STAT_FLUSH],
+ NSEC_PER_MSEC)
);
}
disk_part_iter_exit(&piter);
@@ -1463,7 +1665,6 @@ dev_t blk_lookup_devt(const char *name, int partno)
class_dev_iter_exit(&iter);
return devt;
}
-EXPORT_SYMBOL(blk_lookup_devt);
struct gendisk *__alloc_disk_node(int minors, int node_id)
{
diff --git a/block/ioctl.c b/block/ioctl.c
index 127194b9f9bd..6e827de1a4c4 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -11,6 +11,7 @@
#include <linux/blktrace_api.h>
#include <linux/pr.h>
#include <linux/uaccess.h>
+#include "blk.h"
static int blkpg_do_ioctl(struct block_device *bdev,
struct blkpg_partition __user *upart, int op)
diff --git a/block/opal_proto.h b/block/opal_proto.h
index 325cbba2465f..b486b3ec7dc4 100644
--- a/block/opal_proto.h
+++ b/block/opal_proto.h
@@ -36,6 +36,7 @@ enum opal_response_token {
#define DTAERROR_NO_METHOD_STATUS 0x89
#define GENERIC_HOST_SESSION_NUM 0x41
+#define FIRST_TPER_SESSION_NUM 4096
#define TPER_SYNC_SUPPORTED 0x01
#define MBR_ENABLED_MASK 0x10
diff --git a/block/partitions/Makefile b/block/partitions/Makefile
index 2f276b677c81..a7f05cdb02a8 100644
--- a/block/partitions/Makefile
+++ b/block/partitions/Makefile
@@ -3,8 +3,7 @@
# Makefile for the linux kernel.
#
-obj-$(CONFIG_BLOCK) := check.o
-
+obj-$(CONFIG_BLOCK) += core.o
obj-$(CONFIG_ACORN_PARTITION) += acorn.o
obj-$(CONFIG_AMIGA_PARTITION) += amiga.o
obj-$(CONFIG_ATARI_PARTITION) += atari.o
diff --git a/block/partitions/acorn.c b/block/partitions/acorn.c
index 7587700fad4a..c64c57b958bf 100644
--- a/block/partitions/acorn.c
+++ b/block/partitions/acorn.c
@@ -11,7 +11,6 @@
#include <linux/adfs_fs.h>
#include "check.h"
-#include "acorn.h"
/*
* Partition types. (Oh for reusability)
diff --git a/block/partitions/acorn.h b/block/partitions/acorn.h
deleted file mode 100644
index 67b06601ca4c..000000000000
--- a/block/partitions/acorn.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * linux/fs/partitions/acorn.h
- *
- * Copyright (C) 1996-2001 Russell King.
- *
- * I _hate_ this partitioning mess - why can't we have one defined
- * format, and everyone stick to it?
- */
-
-int adfspart_check_CUMANA(struct parsed_partitions *state);
-int adfspart_check_ADFS(struct parsed_partitions *state);
-int adfspart_check_ICS(struct parsed_partitions *state);
-int adfspart_check_POWERTEC(struct parsed_partitions *state);
-int adfspart_check_EESOX(struct parsed_partitions *state);
diff --git a/block/partitions/aix.c b/block/partitions/aix.c
index 903f3ed175d0..c7b4fd1a4a97 100644
--- a/block/partitions/aix.c
+++ b/block/partitions/aix.c
@@ -6,7 +6,6 @@
*/
#include "check.h"
-#include "aix.h"
struct lvm_rec {
char lvm_id[4]; /* "_LVM" */
diff --git a/block/partitions/aix.h b/block/partitions/aix.h
deleted file mode 100644
index b4449f0b9f2b..000000000000
--- a/block/partitions/aix.h
+++ /dev/null
@@ -1,2 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-extern int aix_partition(struct parsed_partitions *state);
diff --git a/block/partitions/amiga.c b/block/partitions/amiga.c
index 560936617d9c..9526491d9aed 100644
--- a/block/partitions/amiga.c
+++ b/block/partitions/amiga.c
@@ -14,7 +14,6 @@
#include <linux/affs_hardblocks.h>
#include "check.h"
-#include "amiga.h"
static __inline__ u32
checksum_block(__be32 *m, int size)
@@ -42,9 +41,8 @@ int amiga_partition(struct parsed_partitions *state)
goto rdb_done;
data = read_part_sector(state, blk, &sect);
if (!data) {
- if (warn_no_part)
- pr_err("Dev %s: unable to read RDB block %d\n",
- bdevname(state->bdev, b), blk);
+ pr_err("Dev %s: unable to read RDB block %d\n",
+ bdevname(state->bdev, b), blk);
res = -1;
goto rdb_done;
}
@@ -85,9 +83,8 @@ int amiga_partition(struct parsed_partitions *state)
blk *= blksize; /* Read in terms partition table understands */
data = read_part_sector(state, blk, &sect);
if (!data) {
- if (warn_no_part)
- pr_err("Dev %s: unable to read partition block %d\n",
- bdevname(state->bdev, b), blk);
+ pr_err("Dev %s: unable to read partition block %d\n",
+ bdevname(state->bdev, b), blk);
res = -1;
goto rdb_done;
}
diff --git a/block/partitions/amiga.h b/block/partitions/amiga.h
deleted file mode 100644
index 7e63f4d9d969..000000000000
--- a/block/partitions/amiga.h
+++ /dev/null
@@ -1,7 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * fs/partitions/amiga.h
- */
-
-int amiga_partition(struct parsed_partitions *state);
-
diff --git a/block/partitions/atari.h b/block/partitions/atari.h
index 01c2b9457394..678202442fd3 100644
--- a/block/partitions/atari.h
+++ b/block/partitions/atari.h
@@ -34,4 +34,3 @@ struct rootsector
u16 checksum; /* checksum for bootable disks */
} __packed;
-int atari_partition(struct parsed_partitions *state);
diff --git a/block/partitions/check.c b/block/partitions/check.c
deleted file mode 100644
index ffe408fead0c..000000000000
--- a/block/partitions/check.c
+++ /dev/null
@@ -1,198 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * fs/partitions/check.c
- *
- * Code extracted from drivers/block/genhd.c
- * Copyright (C) 1991-1998 Linus Torvalds
- * Re-organised Feb 1998 Russell King
- *
- * We now have independent partition support from the
- * block drivers, which allows all the partition code to
- * be grouped in one location, and it to be mostly self
- * contained.
- *
- * Added needed MAJORS for new pairs, {hdi,hdj}, {hdk,hdl}
- */
-
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-#include <linux/ctype.h>
-#include <linux/genhd.h>
-
-#include "check.h"
-
-#include "acorn.h"
-#include "amiga.h"
-#include "atari.h"
-#include "ldm.h"
-#include "mac.h"
-#include "msdos.h"
-#include "osf.h"
-#include "sgi.h"
-#include "sun.h"
-#include "ibm.h"
-#include "ultrix.h"
-#include "efi.h"
-#include "karma.h"
-#include "sysv68.h"
-#include "cmdline.h"
-
-int warn_no_part = 1; /*This is ugly: should make genhd removable media aware*/
-
-static int (*check_part[])(struct parsed_partitions *) = {
- /*
- * Probe partition formats with tables at disk address 0
- * that also have an ADFS boot block at 0xdc0.
- */
-#ifdef CONFIG_ACORN_PARTITION_ICS
- adfspart_check_ICS,
-#endif
-#ifdef CONFIG_ACORN_PARTITION_POWERTEC
- adfspart_check_POWERTEC,
-#endif
-#ifdef CONFIG_ACORN_PARTITION_EESOX
- adfspart_check_EESOX,
-#endif
-
- /*
- * Now move on to formats that only have partition info at
- * disk address 0xdc0. Since these may also have stale
- * PC/BIOS partition tables, they need to come before
- * the msdos entry.
- */
-#ifdef CONFIG_ACORN_PARTITION_CUMANA
- adfspart_check_CUMANA,
-#endif
-#ifdef CONFIG_ACORN_PARTITION_ADFS
- adfspart_check_ADFS,
-#endif
-
-#ifdef CONFIG_CMDLINE_PARTITION
- cmdline_partition,
-#endif
-#ifdef CONFIG_EFI_PARTITION
- efi_partition, /* this must come before msdos */
-#endif
-#ifdef CONFIG_SGI_PARTITION
- sgi_partition,
-#endif
-#ifdef CONFIG_LDM_PARTITION
- ldm_partition, /* this must come before msdos */
-#endif
-#ifdef CONFIG_MSDOS_PARTITION
- msdos_partition,
-#endif
-#ifdef CONFIG_OSF_PARTITION
- osf_partition,
-#endif
-#ifdef CONFIG_SUN_PARTITION
- sun_partition,
-#endif
-#ifdef CONFIG_AMIGA_PARTITION
- amiga_partition,
-#endif
-#ifdef CONFIG_ATARI_PARTITION
- atari_partition,
-#endif
-#ifdef CONFIG_MAC_PARTITION
- mac_partition,
-#endif
-#ifdef CONFIG_ULTRIX_PARTITION
- ultrix_partition,
-#endif
-#ifdef CONFIG_IBM_PARTITION
- ibm_partition,
-#endif
-#ifdef CONFIG_KARMA_PARTITION
- karma_partition,
-#endif
-#ifdef CONFIG_SYSV68_PARTITION
- sysv68_partition,
-#endif
- NULL
-};
-
-static struct parsed_partitions *allocate_partitions(struct gendisk *hd)
-{
- struct parsed_partitions *state;
- int nr;
-
- state = kzalloc(sizeof(*state), GFP_KERNEL);
- if (!state)
- return NULL;
-
- nr = disk_max_parts(hd);
- state->parts = vzalloc(array_size(nr, sizeof(state->parts[0])));
- if (!state->parts) {
- kfree(state);
- return NULL;
- }
-
- state->limit = nr;
-
- return state;
-}
-
-void free_partitions(struct parsed_partitions *state)
-{
- vfree(state->parts);
- kfree(state);
-}
-
-struct parsed_partitions *
-check_partition(struct gendisk *hd, struct block_device *bdev)
-{
- struct parsed_partitions *state;
- int i, res, err;
-
- state = allocate_partitions(hd);
- if (!state)
- return NULL;
- state->pp_buf = (char *)__get_free_page(GFP_KERNEL);
- if (!state->pp_buf) {
- free_partitions(state);
- return NULL;
- }
- state->pp_buf[0] = '\0';
-
- state->bdev = bdev;
- disk_name(hd, 0, state->name);
- snprintf(state->pp_buf, PAGE_SIZE, " %s:", state->name);
- if (isdigit(state->name[strlen(state->name)-1]))
- sprintf(state->name, "p");
-
- i = res = err = 0;
- while (!res && check_part[i]) {
- memset(state->parts, 0, state->limit * sizeof(state->parts[0]));
- res = check_part[i++](state);
- if (res < 0) {
- /* We have hit an I/O error which we don't report now.
- * But record it, and let the others do their job.
- */
- err = res;
- res = 0;
- }
-
- }
- if (res > 0) {
- printk(KERN_INFO "%s", state->pp_buf);
-
- free_page((unsigned long)state->pp_buf);
- return state;
- }
- if (state->access_beyond_eod)
- err = -ENOSPC;
- if (err)
- /* The partition is unrecognized. So report I/O errors if there were any */
- res = err;
- if (res) {
- if (warn_no_part)
- strlcat(state->pp_buf,
- " unable to read partition table\n", PAGE_SIZE);
- printk(KERN_INFO "%s", state->pp_buf);
- }
-
- free_page((unsigned long)state->pp_buf);
- free_partitions(state);
- return ERR_PTR(res);
-}
diff --git a/block/partitions/check.h b/block/partitions/check.h
index 6042f769471a..c577e9ee67f0 100644
--- a/block/partitions/check.h
+++ b/block/partitions/check.h
@@ -2,6 +2,7 @@
#include <linux/pagemap.h>
#include <linux/blkdev.h>
#include <linux/genhd.h>
+#include "../blk.h"
/*
* add_gd_partition adds a partitions details to the devices partition
@@ -23,19 +24,14 @@ struct parsed_partitions {
char *pp_buf;
};
-void free_partitions(struct parsed_partitions *state);
+typedef struct {
+ struct page *v;
+} Sector;
-struct parsed_partitions *
-check_partition(struct gendisk *, struct block_device *);
-
-static inline void *read_part_sector(struct parsed_partitions *state,
- sector_t n, Sector *p)
+void *read_part_sector(struct parsed_partitions *state, sector_t n, Sector *p);
+static inline void put_dev_sector(Sector p)
{
- if (n >= get_capacity(state->bdev->bd_disk)) {
- state->access_beyond_eod = true;
- return NULL;
- }
- return read_dev_sector(state->bdev, n, p);
+ put_page(p.v);
}
static inline void
@@ -51,5 +47,24 @@ put_partition(struct parsed_partitions *p, int n, sector_t from, sector_t size)
}
}
-extern int warn_no_part;
-
+/* detection routines go here in alphabetical order: */
+int adfspart_check_ADFS(struct parsed_partitions *state);
+int adfspart_check_CUMANA(struct parsed_partitions *state);
+int adfspart_check_EESOX(struct parsed_partitions *state);
+int adfspart_check_ICS(struct parsed_partitions *state);
+int adfspart_check_POWERTEC(struct parsed_partitions *state);
+int aix_partition(struct parsed_partitions *state);
+int amiga_partition(struct parsed_partitions *state);
+int atari_partition(struct parsed_partitions *state);
+int cmdline_partition(struct parsed_partitions *state);
+int efi_partition(struct parsed_partitions *state);
+int ibm_partition(struct parsed_partitions *);
+int karma_partition(struct parsed_partitions *state);
+int ldm_partition(struct parsed_partitions *state);
+int mac_partition(struct parsed_partitions *state);
+int msdos_partition(struct parsed_partitions *state);
+int osf_partition(struct parsed_partitions *state);
+int sgi_partition(struct parsed_partitions *state);
+int sun_partition(struct parsed_partitions *state);
+int sysv68_partition(struct parsed_partitions *state);
+int ultrix_partition(struct parsed_partitions *state);
diff --git a/block/partitions/cmdline.c b/block/partitions/cmdline.c
index f1edd5452249..8f545c36cde4 100644
--- a/block/partitions/cmdline.c
+++ b/block/partitions/cmdline.c
@@ -18,7 +18,6 @@
#include <linux/cmdline-parser.h>
#include "check.h"
-#include "cmdline.h"
static char *cmdline;
static struct cmdline_parts *bdev_parts;
diff --git a/block/partitions/cmdline.h b/block/partitions/cmdline.h
deleted file mode 100644
index e64a31636a1f..000000000000
--- a/block/partitions/cmdline.h
+++ /dev/null
@@ -1,3 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-int cmdline_partition(struct parsed_partitions *state);
diff --git a/block/partition-generic.c b/block/partitions/core.c
index 564fae77711d..b79c4513629b 100644
--- a/block/partition-generic.c
+++ b/block/partitions/core.c
@@ -1,75 +1,176 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * Code extracted from drivers/block/genhd.c
- * Copyright (C) 1991-1998 Linus Torvalds
- * Re-organised Feb 1998 Russell King
- *
- * We now have independent partition support from the
- * block drivers, which allows all the partition code to
- * be grouped in one location, and it to be mostly self
- * contained.
+ * Copyright (C) 1991-1998 Linus Torvalds
+ * Re-organised Feb 1998 Russell King
*/
-
-#include <linux/init.h>
-#include <linux/module.h>
#include <linux/fs.h>
#include <linux/slab.h>
-#include <linux/kmod.h>
#include <linux/ctype.h>
#include <linux/genhd.h>
+#include <linux/vmalloc.h>
#include <linux/blktrace_api.h>
+#include <linux/raid/detect.h>
+#include "check.h"
-#include "partitions/check.h"
+static int (*check_part[])(struct parsed_partitions *) = {
+ /*
+ * Probe partition formats with tables at disk address 0
+ * that also have an ADFS boot block at 0xdc0.
+ */
+#ifdef CONFIG_ACORN_PARTITION_ICS
+ adfspart_check_ICS,
+#endif
+#ifdef CONFIG_ACORN_PARTITION_POWERTEC
+ adfspart_check_POWERTEC,
+#endif
+#ifdef CONFIG_ACORN_PARTITION_EESOX
+ adfspart_check_EESOX,
+#endif
-#ifdef CONFIG_BLK_DEV_MD
-extern void md_autodetect_dev(dev_t dev);
+ /*
+ * Now move on to formats that only have partition info at
+ * disk address 0xdc0. Since these may also have stale
+ * PC/BIOS partition tables, they need to come before
+ * the msdos entry.
+ */
+#ifdef CONFIG_ACORN_PARTITION_CUMANA
+ adfspart_check_CUMANA,
+#endif
+#ifdef CONFIG_ACORN_PARTITION_ADFS
+ adfspart_check_ADFS,
#endif
-
-/*
- * disk_name() is used by partition check code and the genhd driver.
- * It formats the devicename of the indicated disk into
- * the supplied buffer (of size at least 32), and returns
- * a pointer to that same buffer (for convenience).
- */
-char *disk_name(struct gendisk *hd, int partno, char *buf)
+#ifdef CONFIG_CMDLINE_PARTITION
+ cmdline_partition,
+#endif
+#ifdef CONFIG_EFI_PARTITION
+ efi_partition, /* this must come before msdos */
+#endif
+#ifdef CONFIG_SGI_PARTITION
+ sgi_partition,
+#endif
+#ifdef CONFIG_LDM_PARTITION
+ ldm_partition, /* this must come before msdos */
+#endif
+#ifdef CONFIG_MSDOS_PARTITION
+ msdos_partition,
+#endif
+#ifdef CONFIG_OSF_PARTITION
+ osf_partition,
+#endif
+#ifdef CONFIG_SUN_PARTITION
+ sun_partition,
+#endif
+#ifdef CONFIG_AMIGA_PARTITION
+ amiga_partition,
+#endif
+#ifdef CONFIG_ATARI_PARTITION
+ atari_partition,
+#endif
+#ifdef CONFIG_MAC_PARTITION
+ mac_partition,
+#endif
+#ifdef CONFIG_ULTRIX_PARTITION
+ ultrix_partition,
+#endif
+#ifdef CONFIG_IBM_PARTITION
+ ibm_partition,
+#endif
+#ifdef CONFIG_KARMA_PARTITION
+ karma_partition,
+#endif
+#ifdef CONFIG_SYSV68_PARTITION
+ sysv68_partition,
+#endif
+ NULL
+};
+
+static struct parsed_partitions *allocate_partitions(struct gendisk *hd)
{
- if (!partno)
- snprintf(buf, BDEVNAME_SIZE, "%s", hd->disk_name);
- else if (isdigit(hd->disk_name[strlen(hd->disk_name)-1]))
- snprintf(buf, BDEVNAME_SIZE, "%sp%d", hd->disk_name, partno);
- else
- snprintf(buf, BDEVNAME_SIZE, "%s%d", hd->disk_name, partno);
+ struct parsed_partitions *state;
+ int nr;
- return buf;
-}
+ state = kzalloc(sizeof(*state), GFP_KERNEL);
+ if (!state)
+ return NULL;
-const char *bdevname(struct block_device *bdev, char *buf)
-{
- return disk_name(bdev->bd_disk, bdev->bd_part->partno, buf);
-}
+ nr = disk_max_parts(hd);
+ state->parts = vzalloc(array_size(nr, sizeof(state->parts[0])));
+ if (!state->parts) {
+ kfree(state);
+ return NULL;
+ }
-EXPORT_SYMBOL(bdevname);
+ state->limit = nr;
-const char *bio_devname(struct bio *bio, char *buf)
-{
- return disk_name(bio->bi_disk, bio->bi_partno, buf);
+ return state;
}
-EXPORT_SYMBOL(bio_devname);
-/*
- * There's very little reason to use this, you should really
- * have a struct block_device just about everywhere and use
- * bdevname() instead.
- */
-const char *__bdevname(dev_t dev, char *buffer)
+static void free_partitions(struct parsed_partitions *state)
{
- scnprintf(buffer, BDEVNAME_SIZE, "unknown-block(%u,%u)",
- MAJOR(dev), MINOR(dev));
- return buffer;
+ vfree(state->parts);
+ kfree(state);
}
-EXPORT_SYMBOL(__bdevname);
+static struct parsed_partitions *check_partition(struct gendisk *hd,
+ struct block_device *bdev)
+{
+ struct parsed_partitions *state;
+ int i, res, err;
+
+ state = allocate_partitions(hd);
+ if (!state)
+ return NULL;
+ state->pp_buf = (char *)__get_free_page(GFP_KERNEL);
+ if (!state->pp_buf) {
+ free_partitions(state);
+ return NULL;
+ }
+ state->pp_buf[0] = '\0';
+
+ state->bdev = bdev;
+ disk_name(hd, 0, state->name);
+ snprintf(state->pp_buf, PAGE_SIZE, " %s:", state->name);
+ if (isdigit(state->name[strlen(state->name)-1]))
+ sprintf(state->name, "p");
+
+ i = res = err = 0;
+ while (!res && check_part[i]) {
+ memset(state->parts, 0, state->limit * sizeof(state->parts[0]));
+ res = check_part[i++](state);
+ if (res < 0) {
+ /*
+ * We have hit an I/O error which we don't report now.
+ * But record it, and let the others do their job.
+ */
+ err = res;
+ res = 0;
+ }
+
+ }
+ if (res > 0) {
+ printk(KERN_INFO "%s", state->pp_buf);
+
+ free_page((unsigned long)state->pp_buf);
+ return state;
+ }
+ if (state->access_beyond_eod)
+ err = -ENOSPC;
+ /*
+ * The partition is unrecognized. So report I/O errors if there were any
+ */
+ if (err)
+ res = err;
+ if (res) {
+ strlcat(state->pp_buf,
+ " unable to read partition table\n", PAGE_SIZE);
+ printk(KERN_INFO "%s", state->pp_buf);
+ }
+
+ free_page((unsigned long)state->pp_buf);
+ free_partitions(state);
+ return ERR_PTR(res);
+}
static ssize_t part_partition_show(struct device *dev,
struct device_attribute *attr, char *buf)
@@ -87,13 +188,6 @@ static ssize_t part_start_show(struct device *dev,
return sprintf(buf, "%llu\n",(unsigned long long)p->start_sect);
}
-ssize_t part_size_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct hd_struct *p = dev_to_part(dev);
- return sprintf(buf, "%llu\n",(unsigned long long)part_nr_sects_read(p));
-}
-
static ssize_t part_ro_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -115,74 +209,6 @@ static ssize_t part_discard_alignment_show(struct device *dev,
return sprintf(buf, "%u\n", p->discard_alignment);
}
-ssize_t part_stat_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct hd_struct *p = dev_to_part(dev);
- struct request_queue *q = part_to_disk(p)->queue;
- unsigned int inflight;
-
- inflight = part_in_flight(q, p);
- return sprintf(buf,
- "%8lu %8lu %8llu %8u "
- "%8lu %8lu %8llu %8u "
- "%8u %8u %8u "
- "%8lu %8lu %8llu %8u "
- "%8lu %8u"
- "\n",
- part_stat_read(p, ios[STAT_READ]),
- part_stat_read(p, merges[STAT_READ]),
- (unsigned long long)part_stat_read(p, sectors[STAT_READ]),
- (unsigned int)part_stat_read_msecs(p, STAT_READ),
- part_stat_read(p, ios[STAT_WRITE]),
- part_stat_read(p, merges[STAT_WRITE]),
- (unsigned long long)part_stat_read(p, sectors[STAT_WRITE]),
- (unsigned int)part_stat_read_msecs(p, STAT_WRITE),
- inflight,
- jiffies_to_msecs(part_stat_read(p, io_ticks)),
- jiffies_to_msecs(part_stat_read(p, time_in_queue)),
- part_stat_read(p, ios[STAT_DISCARD]),
- part_stat_read(p, merges[STAT_DISCARD]),
- (unsigned long long)part_stat_read(p, sectors[STAT_DISCARD]),
- (unsigned int)part_stat_read_msecs(p, STAT_DISCARD),
- part_stat_read(p, ios[STAT_FLUSH]),
- (unsigned int)part_stat_read_msecs(p, STAT_FLUSH));
-}
-
-ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr,
- char *buf)
-{
- struct hd_struct *p = dev_to_part(dev);
- struct request_queue *q = part_to_disk(p)->queue;
- unsigned int inflight[2];
-
- part_in_flight_rw(q, p, inflight);
- return sprintf(buf, "%8u %8u\n", inflight[0], inflight[1]);
-}
-
-#ifdef CONFIG_FAIL_MAKE_REQUEST
-ssize_t part_fail_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct hd_struct *p = dev_to_part(dev);
-
- return sprintf(buf, "%d\n", p->make_it_fail);
-}
-
-ssize_t part_fail_store(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t count)
-{
- struct hd_struct *p = dev_to_part(dev);
- int i;
-
- if (count > 0 && sscanf(buf, "%d", &i) > 0)
- p->make_it_fail = (i == 0) ? 0 : 1;
-
- return count;
-}
-#endif
-
static DEVICE_ATTR(partition, 0444, part_partition_show, NULL);
static DEVICE_ATTR(start, 0444, part_start_show, NULL);
static DEVICE_ATTR(size, 0444, part_size_show, NULL);
@@ -369,7 +395,9 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
p->policy = get_disk_ro(disk);
if (info) {
- struct partition_meta_info *pinfo = alloc_part_info(disk);
+ struct partition_meta_info *pinfo;
+
+ pinfo = kzalloc_node(sizeof(*pinfo), GFP_KERNEL, disk->node_id);
if (!pinfo) {
err = -ENOMEM;
goto out_free_stats;
@@ -428,7 +456,7 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
return p;
out_free_info:
- free_part_info(p);
+ kfree(p->info);
out_free_stats:
free_part_stats(p);
out_free:
@@ -525,10 +553,10 @@ static bool blk_add_partition(struct gendisk *disk, struct block_device *bdev,
return true;
}
-#ifdef CONFIG_BLK_DEV_MD
- if (state->parts[p].flags & ADDPART_FLAG_RAID)
+ if (IS_BUILTIN(CONFIG_BLK_DEV_MD) &&
+ (state->parts[p].flags & ADDPART_FLAG_RAID))
md_autodetect_dev(part_to_dev(part)->devt);
-#endif
+
return true;
}
@@ -602,22 +630,29 @@ out_free_state:
return ret;
}
-unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p)
+void *read_part_sector(struct parsed_partitions *state, sector_t n, Sector *p)
{
- struct address_space *mapping = bdev->bd_inode->i_mapping;
+ struct address_space *mapping = state->bdev->bd_inode->i_mapping;
struct page *page;
- page = read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_SHIFT-9)), NULL);
- if (!IS_ERR(page)) {
- if (PageError(page))
- goto fail;
- p->v = page;
- return (unsigned char *)page_address(page) + ((n & ((1 << (PAGE_SHIFT - 9)) - 1)) << 9);
-fail:
- put_page(page);
+ if (n >= get_capacity(state->bdev->bd_disk)) {
+ state->access_beyond_eod = true;
+ return NULL;
}
+
+ page = read_mapping_page(mapping,
+ (pgoff_t)(n >> (PAGE_SHIFT - 9)), NULL);
+ if (IS_ERR(page))
+ goto out;
+ if (PageError(page))
+ goto out_put_page;
+
+ p->v = page;
+ return (unsigned char *)page_address(page) +
+ ((n & ((1 << (PAGE_SHIFT - 9)) - 1)) << SECTOR_SHIFT);
+out_put_page:
+ put_page(page);
+out:
p->v = NULL;
return NULL;
}
-
-EXPORT_SYMBOL(read_dev_sector);
diff --git a/block/partitions/efi.h b/block/partitions/efi.h
index 3e8576157575..907bac5ce8f7 100644
--- a/block/partitions/efi.h
+++ b/block/partitions/efi.h
@@ -113,7 +113,4 @@ typedef struct _legacy_mbr {
__le16 signature;
} __packed legacy_mbr;
-/* Functions */
-extern int efi_partition(struct parsed_partitions *state);
-
#endif
diff --git a/block/partitions/ibm.c b/block/partitions/ibm.c
index a5d480f807f3..073faa6a69b8 100644
--- a/block/partitions/ibm.c
+++ b/block/partitions/ibm.c
@@ -15,7 +15,6 @@
#include <asm/vtoc.h>
#include "check.h"
-#include "ibm.h"
union label_t {
diff --git a/block/partitions/ibm.h b/block/partitions/ibm.h
deleted file mode 100644
index 8bf13febb2b6..000000000000
--- a/block/partitions/ibm.h
+++ /dev/null
@@ -1,2 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-int ibm_partition(struct parsed_partitions *);
diff --git a/block/partitions/karma.c b/block/partitions/karma.c
index 59812d705c3d..4d93512f4bd4 100644
--- a/block/partitions/karma.c
+++ b/block/partitions/karma.c
@@ -8,9 +8,10 @@
*/
#include "check.h"
-#include "karma.h"
#include <linux/compiler.h>
+#define KARMA_LABEL_MAGIC 0xAB56
+
int karma_partition(struct parsed_partitions *state)
{
int i;
diff --git a/block/partitions/karma.h b/block/partitions/karma.h
deleted file mode 100644
index 48e074d417fb..000000000000
--- a/block/partitions/karma.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * fs/partitions/karma.h
- */
-
-#define KARMA_LABEL_MAGIC 0xAB56
-
-int karma_partition(struct parsed_partitions *state);
-
diff --git a/block/partitions/ldm.c b/block/partitions/ldm.c
index a2d97ee1908c..6fdfcb40c537 100644
--- a/block/partitions/ldm.c
+++ b/block/partitions/ldm.c
@@ -14,10 +14,10 @@
#include <linux/stringify.h>
#include <linux/kernel.h>
#include <linux/uuid.h>
+#include <linux/msdos_partition.h>
#include "ldm.h"
#include "check.h"
-#include "msdos.h"
/*
* ldm_debug/info/error/crit - Output an error message
@@ -493,7 +493,7 @@ static bool ldm_validate_partition_table(struct parsed_partitions *state)
{
Sector sect;
u8 *data;
- struct partition *p;
+ struct msdos_partition *p;
int i;
bool result = false;
@@ -508,7 +508,7 @@ static bool ldm_validate_partition_table(struct parsed_partitions *state)
if (*(__le16*) (data + 0x01FE) != cpu_to_le16 (MSDOS_LABEL_MAGIC))
goto out;
- p = (struct partition*)(data + 0x01BE);
+ p = (struct msdos_partition *)(data + 0x01BE);
for (i = 0; i < 4; i++, p++)
if (SYS_IND (p) == LDM_PARTITION) {
result = true;
diff --git a/block/partitions/ldm.h b/block/partitions/ldm.h
index 1ca63e97bccc..841580af7f9b 100644
--- a/block/partitions/ldm.h
+++ b/block/partitions/ldm.h
@@ -193,7 +193,5 @@ struct ldmdb { /* Cache of the database */
struct list_head v_part;
};
-int ldm_partition(struct parsed_partitions *state);
-
#endif /* _FS_PT_LDM_H_ */
diff --git a/block/partitions/mac.h b/block/partitions/mac.h
index 453ed2964804..0e41c9da7532 100644
--- a/block/partitions/mac.h
+++ b/block/partitions/mac.h
@@ -42,4 +42,3 @@ struct mac_driver_desc {
/* ... more stuff */
};
-int mac_partition(struct parsed_partitions *state);
diff --git a/block/partitions/msdos.c b/block/partitions/msdos.c
index 82c44f7df911..8f2fcc080264 100644
--- a/block/partitions/msdos.c
+++ b/block/partitions/msdos.c
@@ -18,13 +18,18 @@
* Check partition table on IDE disks for common CHS translations
*
* Re-organised Feb 1998 Russell King
+ *
+ * BSD disklabel support by Yossi Gottlieb <yogo@math.tau.ac.il>
+ * updated by Marc Espie <Marc.Espie@openbsd.org>
+ *
+ * Unixware slices support by Andrzej Krzysztofowicz <ankry@mif.pg.gda.pl>
+ * and Krzysztof G. Baranowski <kgb@knm.org.pl>
*/
#include <linux/msdos_fs.h>
+#include <linux/msdos_partition.h>
#include "check.h"
-#include "msdos.h"
#include "efi.h"
-#include "aix.h"
/*
* Many architectures don't like unaligned accesses, while
@@ -35,17 +40,17 @@
#define SYS_IND(p) get_unaligned(&p->sys_ind)
-static inline sector_t nr_sects(struct partition *p)
+static inline sector_t nr_sects(struct msdos_partition *p)
{
return (sector_t)get_unaligned_le32(&p->nr_sects);
}
-static inline sector_t start_sect(struct partition *p)
+static inline sector_t start_sect(struct msdos_partition *p)
{
return (sector_t)get_unaligned_le32(&p->start_sect);
}
-static inline int is_extended_partition(struct partition *p)
+static inline int is_extended_partition(struct msdos_partition *p)
{
return (SYS_IND(p) == DOS_EXTENDED_PARTITION ||
SYS_IND(p) == WIN98_EXTENDED_PARTITION ||
@@ -68,7 +73,7 @@ msdos_magic_present(unsigned char *p)
#define AIX_LABEL_MAGIC4 0xC1
static int aix_magic_present(struct parsed_partitions *state, unsigned char *p)
{
- struct partition *pt = (struct partition *) (p + 0x1be);
+ struct msdos_partition *pt = (struct msdos_partition *) (p + 0x1be);
Sector sect;
unsigned char *d;
int slot, ret = 0;
@@ -78,13 +83,19 @@ static int aix_magic_present(struct parsed_partitions *state, unsigned char *p)
p[2] == AIX_LABEL_MAGIC3 &&
p[3] == AIX_LABEL_MAGIC4))
return 0;
- /* Assume the partition table is valid if Linux partitions exists */
+
+ /*
+ * Assume the partition table is valid if Linux partitions exists.
+ * Note that old Solaris/x86 partitions use the same indicator as
+ * Linux swap partitions, so we consider that a Linux partition as
+ * well.
+ */
for (slot = 1; slot <= 4; slot++, pt++) {
- if (pt->sys_ind == LINUX_SWAP_PARTITION ||
- pt->sys_ind == LINUX_RAID_PARTITION ||
- pt->sys_ind == LINUX_DATA_PARTITION ||
- pt->sys_ind == LINUX_LVM_PARTITION ||
- is_extended_partition(pt))
+ if (pt->sys_ind == SOLARIS_X86_PARTITION ||
+ pt->sys_ind == LINUX_RAID_PARTITION ||
+ pt->sys_ind == LINUX_DATA_PARTITION ||
+ pt->sys_ind == LINUX_LVM_PARTITION ||
+ is_extended_partition(pt))
return 0;
}
d = read_part_sector(state, 7, &sect);
@@ -122,7 +133,7 @@ static void parse_extended(struct parsed_partitions *state,
sector_t first_sector, sector_t first_size,
u32 disksig)
{
- struct partition *p;
+ struct msdos_partition *p;
Sector sect;
unsigned char *data;
sector_t this_sector, this_size;
@@ -146,7 +157,7 @@ static void parse_extended(struct parsed_partitions *state,
if (!msdos_magic_present(data + 510))
goto done;
- p = (struct partition *) (data + 0x1be);
+ p = (struct msdos_partition *) (data + 0x1be);
/*
* Usually, the first entry is the real data partition,
@@ -210,6 +221,30 @@ done:
put_dev_sector(sect);
}
+#define SOLARIS_X86_NUMSLICE 16
+#define SOLARIS_X86_VTOC_SANE (0x600DDEEEUL)
+
+struct solaris_x86_slice {
+ __le16 s_tag; /* ID tag of partition */
+ __le16 s_flag; /* permission flags */
+ __le32 s_start; /* start sector no of partition */
+ __le32 s_size; /* # of blocks in partition */
+};
+
+struct solaris_x86_vtoc {
+ unsigned int v_bootinfo[3]; /* info needed by mboot */
+ __le32 v_sanity; /* to verify vtoc sanity */
+ __le32 v_version; /* layout version */
+ char v_volume[8]; /* volume name */
+ __le16 v_sectorsz; /* sector size in bytes */
+ __le16 v_nparts; /* number of partitions */
+ unsigned int v_reserved[10]; /* free space */
+ struct solaris_x86_slice
+ v_slice[SOLARIS_X86_NUMSLICE]; /* slice headers */
+ unsigned int timestamp[SOLARIS_X86_NUMSLICE]; /* timestamp */
+ char v_asciilabel[128]; /* for compatibility */
+};
+
/* james@bpgc.com: Solaris has a nasty indicator: 0x82 which also
indicates linux swap. Be careful before believing this is Solaris. */
@@ -265,6 +300,54 @@ static void parse_solaris_x86(struct parsed_partitions *state,
#endif
}
+/* check against BSD src/sys/sys/disklabel.h for consistency */
+#define BSD_DISKMAGIC (0x82564557UL) /* The disk magic number */
+#define BSD_MAXPARTITIONS 16
+#define OPENBSD_MAXPARTITIONS 16
+#define BSD_FS_UNUSED 0 /* disklabel unused partition entry ID */
+struct bsd_disklabel {
+ __le32 d_magic; /* the magic number */
+ __s16 d_type; /* drive type */
+ __s16 d_subtype; /* controller/d_type specific */
+ char d_typename[16]; /* type name, e.g. "eagle" */
+ char d_packname[16]; /* pack identifier */
+ __u32 d_secsize; /* # of bytes per sector */
+ __u32 d_nsectors; /* # of data sectors per track */
+ __u32 d_ntracks; /* # of tracks per cylinder */
+ __u32 d_ncylinders; /* # of data cylinders per unit */
+ __u32 d_secpercyl; /* # of data sectors per cylinder */
+ __u32 d_secperunit; /* # of data sectors per unit */
+ __u16 d_sparespertrack; /* # of spare sectors per track */
+ __u16 d_sparespercyl; /* # of spare sectors per cylinder */
+ __u32 d_acylinders; /* # of alt. cylinders per unit */
+ __u16 d_rpm; /* rotational speed */
+ __u16 d_interleave; /* hardware sector interleave */
+ __u16 d_trackskew; /* sector 0 skew, per track */
+ __u16 d_cylskew; /* sector 0 skew, per cylinder */
+ __u32 d_headswitch; /* head switch time, usec */
+ __u32 d_trkseek; /* track-to-track seek, usec */
+ __u32 d_flags; /* generic flags */
+#define NDDATA 5
+ __u32 d_drivedata[NDDATA]; /* drive-type specific information */
+#define NSPARE 5
+ __u32 d_spare[NSPARE]; /* reserved for future use */
+ __le32 d_magic2; /* the magic number (again) */
+ __le16 d_checksum; /* xor of data incl. partitions */
+
+ /* filesystem and partition information: */
+ __le16 d_npartitions; /* number of partitions in following */
+ __le32 d_bbsize; /* size of boot area at sn0, bytes */
+ __le32 d_sbsize; /* max size of fs superblock, bytes */
+ struct bsd_partition { /* the partition table */
+ __le32 p_size; /* number of sectors in partition */
+ __le32 p_offset; /* starting sector */
+ __le32 p_fsize; /* filesystem basic fragment size */
+ __u8 p_fstype; /* filesystem type, see below */
+ __u8 p_frag; /* filesystem fragments per block */
+ __le16 p_cpg; /* filesystem cylinders per group */
+ } d_partitions[BSD_MAXPARTITIONS]; /* actually may be more */
+};
+
#if defined(CONFIG_BSD_DISKLABEL)
/*
* Create devices for BSD partitions listed in a disklabel, under a
@@ -349,6 +432,51 @@ static void parse_openbsd(struct parsed_partitions *state,
#endif
}
+#define UNIXWARE_DISKMAGIC (0xCA5E600DUL) /* The disk magic number */
+#define UNIXWARE_DISKMAGIC2 (0x600DDEEEUL) /* The slice table magic nr */
+#define UNIXWARE_NUMSLICE 16
+#define UNIXWARE_FS_UNUSED 0 /* Unused slice entry ID */
+
+struct unixware_slice {
+ __le16 s_label; /* label */
+ __le16 s_flags; /* permission flags */
+ __le32 start_sect; /* starting sector */
+ __le32 nr_sects; /* number of sectors in slice */
+};
+
+struct unixware_disklabel {
+ __le32 d_type; /* drive type */
+ __le32 d_magic; /* the magic number */
+ __le32 d_version; /* version number */
+ char d_serial[12]; /* serial number of the device */
+ __le32 d_ncylinders; /* # of data cylinders per device */
+ __le32 d_ntracks; /* # of tracks per cylinder */
+ __le32 d_nsectors; /* # of data sectors per track */
+ __le32 d_secsize; /* # of bytes per sector */
+ __le32 d_part_start; /* # of first sector of this partition*/
+ __le32 d_unknown1[12]; /* ? */
+ __le32 d_alt_tbl; /* byte offset of alternate table */
+ __le32 d_alt_len; /* byte length of alternate table */
+ __le32 d_phys_cyl; /* # of physical cylinders per device */
+ __le32 d_phys_trk; /* # of physical tracks per cylinder */
+ __le32 d_phys_sec; /* # of physical sectors per track */
+ __le32 d_phys_bytes; /* # of physical bytes per sector */
+ __le32 d_unknown2; /* ? */
+ __le32 d_unknown3; /* ? */
+ __le32 d_pad[8]; /* pad */
+
+ struct unixware_vtoc {
+ __le32 v_magic; /* the magic number */
+ __le32 v_version; /* version number */
+ char v_name[8]; /* volume name */
+ __le16 v_nslices; /* # of slices */
+ __le16 v_unknown1; /* ? */
+ __le32 v_reserved[10]; /* reserved */
+ struct unixware_slice
+ v_slice[UNIXWARE_NUMSLICE]; /* slice headers */
+ } vtoc;
+}; /* 408 */
+
/*
* Create devices for Unixware partitions listed in a disklabel, under a
* dos-like partition. See parse_extended() for more information.
@@ -392,6 +520,8 @@ static void parse_unixware(struct parsed_partitions *state,
#endif
}
+#define MINIX_NR_SUBPARTITIONS 4
+
/*
* Minix 2.0.0/2.0.2 subpartition support.
* Anand Krishnamurthy <anandk@wiproge.med.ge.com>
@@ -403,14 +533,14 @@ static void parse_minix(struct parsed_partitions *state,
#ifdef CONFIG_MINIX_SUBPARTITION
Sector sect;
unsigned char *data;
- struct partition *p;
+ struct msdos_partition *p;
int i;
data = read_part_sector(state, offset, &sect);
if (!data)
return;
- p = (struct partition *)(data + 0x1be);
+ p = (struct msdos_partition *)(data + 0x1be);
/* The first sector of a Minix partition can have either
* a secondary MBR describing its subpartitions, or
@@ -454,7 +584,7 @@ int msdos_partition(struct parsed_partitions *state)
sector_t sector_size = bdev_logical_block_size(state->bdev) / 512;
Sector sect;
unsigned char *data;
- struct partition *p;
+ struct msdos_partition *p;
struct fat_boot_sector *fb;
int slot;
u32 disksig;
@@ -488,7 +618,7 @@ int msdos_partition(struct parsed_partitions *state)
* partition table. Reject this in case the boot indicator
* is not 0 or 0x80.
*/
- p = (struct partition *) (data + 0x1be);
+ p = (struct msdos_partition *) (data + 0x1be);
for (slot = 1; slot <= 4; slot++, p++) {
if (p->boot_ind != 0 && p->boot_ind != 0x80) {
/*
@@ -510,7 +640,7 @@ int msdos_partition(struct parsed_partitions *state)
}
#ifdef CONFIG_EFI_PARTITION
- p = (struct partition *) (data + 0x1be);
+ p = (struct msdos_partition *) (data + 0x1be);
for (slot = 1 ; slot <= 4 ; slot++, p++) {
/* If this is an EFI GPT disk, msdos should ignore it. */
if (SYS_IND(p) == EFI_PMBR_OSTYPE_EFI_GPT) {
@@ -519,7 +649,7 @@ int msdos_partition(struct parsed_partitions *state)
}
}
#endif
- p = (struct partition *) (data + 0x1be);
+ p = (struct msdos_partition *) (data + 0x1be);
disksig = le32_to_cpup((__le32 *)(data + 0x1b8));
@@ -566,7 +696,7 @@ int msdos_partition(struct parsed_partitions *state)
strlcat(state->pp_buf, "\n", PAGE_SIZE);
/* second pass - output for each on a separate line */
- p = (struct partition *) (0x1be + data);
+ p = (struct msdos_partition *) (0x1be + data);
for (slot = 1 ; slot <= 4 ; slot++, p++) {
unsigned char id = SYS_IND(p);
int n;
diff --git a/block/partitions/msdos.h b/block/partitions/msdos.h
deleted file mode 100644
index fcacfc486092..000000000000
--- a/block/partitions/msdos.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * fs/partitions/msdos.h
- */
-
-#define MSDOS_LABEL_MAGIC 0xAA55
-
-int msdos_partition(struct parsed_partitions *state);
-
diff --git a/block/partitions/osf.c b/block/partitions/osf.c
index 4b873973d6c0..84560d0765ed 100644
--- a/block/partitions/osf.c
+++ b/block/partitions/osf.c
@@ -9,9 +9,9 @@
*/
#include "check.h"
-#include "osf.h"
#define MAX_OSF_PARTITIONS 18
+#define DISKLABELMAGIC (0x82564557UL)
int osf_partition(struct parsed_partitions *state)
{
diff --git a/block/partitions/osf.h b/block/partitions/osf.h
deleted file mode 100644
index 4d8088e7ea8c..000000000000
--- a/block/partitions/osf.h
+++ /dev/null
@@ -1,8 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * fs/partitions/osf.h
- */
-
-#define DISKLABELMAGIC (0x82564557UL)
-
-int osf_partition(struct parsed_partitions *state);
diff --git a/block/partitions/sgi.c b/block/partitions/sgi.c
index d7b421c6e530..4273f1bb0515 100644
--- a/block/partitions/sgi.c
+++ b/block/partitions/sgi.c
@@ -6,7 +6,12 @@
*/
#include "check.h"
-#include "sgi.h"
+
+#define SGI_LABEL_MAGIC 0x0be5a941
+
+enum {
+ LINUX_RAID_PARTITION = 0xfd, /* autodetect RAID partition */
+};
struct sgi_disklabel {
__be32 magic_mushroom; /* Big fat spliff... */
diff --git a/block/partitions/sgi.h b/block/partitions/sgi.h
deleted file mode 100644
index a5b77c3987cf..000000000000
--- a/block/partitions/sgi.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * fs/partitions/sgi.h
- */
-
-extern int sgi_partition(struct parsed_partitions *state);
-
-#define SGI_LABEL_MAGIC 0x0be5a941
-
diff --git a/block/partitions/sun.c b/block/partitions/sun.c
index 90f36724e796..47dc53eccf77 100644
--- a/block/partitions/sun.c
+++ b/block/partitions/sun.c
@@ -9,7 +9,14 @@
*/
#include "check.h"
-#include "sun.h"
+
+#define SUN_LABEL_MAGIC 0xDABE
+#define SUN_VTOC_SANITY 0x600DDEEE
+
+enum {
+ SUN_WHOLE_DISK = 5,
+ LINUX_RAID_PARTITION = 0xfd, /* autodetect RAID partition */
+};
int sun_partition(struct parsed_partitions *state)
{
diff --git a/block/partitions/sun.h b/block/partitions/sun.h
deleted file mode 100644
index ae1b9eed3fd7..000000000000
--- a/block/partitions/sun.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * fs/partitions/sun.h
- */
-
-#define SUN_LABEL_MAGIC 0xDABE
-#define SUN_VTOC_SANITY 0x600DDEEE
-
-int sun_partition(struct parsed_partitions *state);
diff --git a/block/partitions/sysv68.c b/block/partitions/sysv68.c
index 92e810826b01..6f6257fd4eb4 100644
--- a/block/partitions/sysv68.c
+++ b/block/partitions/sysv68.c
@@ -6,7 +6,6 @@
*/
#include "check.h"
-#include "sysv68.h"
/*
* Volume ID structure: on first 256-bytes sector of disk
diff --git a/block/partitions/sysv68.h b/block/partitions/sysv68.h
deleted file mode 100644
index 4fb6b8ec78ae..000000000000
--- a/block/partitions/sysv68.h
+++ /dev/null
@@ -1,2 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-extern int sysv68_partition(struct parsed_partitions *state);
diff --git a/block/partitions/ultrix.c b/block/partitions/ultrix.c
index ecd0d7346c3d..4aaa81043ca0 100644
--- a/block/partitions/ultrix.c
+++ b/block/partitions/ultrix.c
@@ -8,7 +8,6 @@
*/
#include "check.h"
-#include "ultrix.h"
int ultrix_partition(struct parsed_partitions *state)
{
diff --git a/block/partitions/ultrix.h b/block/partitions/ultrix.h
deleted file mode 100644
index 9f676cead222..000000000000
--- a/block/partitions/ultrix.h
+++ /dev/null
@@ -1,6 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * fs/partitions/ultrix.h
- */
-
-int ultrix_partition(struct parsed_partitions *state);
diff --git a/block/sed-opal.c b/block/sed-opal.c
index 880cc57a5f6b..daafadbb88ca 100644
--- a/block/sed-opal.c
+++ b/block/sed-opal.c
@@ -1056,7 +1056,7 @@ static int start_opal_session_cont(struct opal_dev *dev)
hsn = response_get_u64(&dev->parsed, 4);
tsn = response_get_u64(&dev->parsed, 5);
- if (hsn == 0 && tsn == 0) {
+ if (hsn != GENERIC_HOST_SESSION_NUM || tsn < FIRST_TPER_SESSION_NUM) {
pr_debug("Couldn't authenticate session\n");
return -EPERM;
}
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 103acbbfcf9a..24c9642e8fc7 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -171,7 +171,7 @@ int ghes_estatus_pool_init(int num_ghes)
* New allocation must be visible in all pgd before it can be found by
* an NMI allocating from the pool.
*/
- vmalloc_sync_all();
+ vmalloc_sync_mappings();
rc = gen_pool_add(ghes_estatus_pool, addr, PAGE_ALIGN(len), -1);
if (rc)
diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c
index 110e41f920c2..f303106b3362 100644
--- a/drivers/android/binderfs.c
+++ b/drivers/android/binderfs.c
@@ -448,6 +448,7 @@ static int binderfs_binder_ctl_create(struct super_block *sb)
inode->i_uid = info->root_uid;
inode->i_gid = info->root_gid;
+ refcount_set(&device->ref, 1);
device->binderfs_inode = inode;
device->miscdev.minor = minor;
diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig
index a6beb2c5a692..05ecdce1b702 100644
--- a/drivers/ata/Kconfig
+++ b/drivers/ata/Kconfig
@@ -34,6 +34,12 @@ if ATA
config ATA_NONSTANDARD
bool
+config SATA_HOST
+ bool
+
+config PATA_TIMINGS
+ bool
+
config ATA_VERBOSE_ERROR
bool "Verbose ATA error reporting"
default y
@@ -45,9 +51,26 @@ config ATA_VERBOSE_ERROR
If unsure, say Y.
+config ATA_FORCE
+ bool "\"libata.force=\" kernel parameter support" if EXPERT
+ default y
+ help
+ This option adds support for "libata.force=" kernel parameter for
+ forcing configuration settings.
+
+ For further information, please read
+ <file:Documentation/admin-guide/kernel-parameters.txt>.
+
+ This option will enlarge the kernel by approx. 3KB. Disable it if
+ kernel size is more important than ability to override the default
+ configuration settings.
+
+ If unsure, say Y.
+
config ATA_ACPI
bool "ATA ACPI Support"
depends on ACPI
+ select PATA_TIMINGS
default y
help
This option adds support for ATA-related ACPI objects.
@@ -73,6 +96,7 @@ config SATA_ZPODD
config SATA_PMP
bool "SATA Port Multiplier support"
+ depends on SATA_HOST
default y
help
This option adds support for SATA Port Multipliers
@@ -85,6 +109,7 @@ comment "Controllers with non-SFF native interface"
config SATA_AHCI
tristate "AHCI SATA support"
depends on PCI
+ select SATA_HOST
help
This option enables support for AHCI Serial ATA.
@@ -111,6 +136,7 @@ config SATA_MOBILE_LPM_POLICY
config SATA_AHCI_PLATFORM
tristate "Platform AHCI SATA support"
+ select SATA_HOST
help
This option enables support for Platform AHCI Serial ATA
controllers.
@@ -121,6 +147,7 @@ config AHCI_BRCM
tristate "Broadcom AHCI SATA support"
depends on ARCH_BRCMSTB || BMIPS_GENERIC || ARCH_BCM_NSP || \
ARCH_BCM_63XX
+ select SATA_HOST
help
This option enables support for the AHCI SATA3 controller found on
Broadcom SoC's.
@@ -130,6 +157,7 @@ config AHCI_BRCM
config AHCI_DA850
tristate "DaVinci DA850 AHCI SATA support"
depends on ARCH_DAVINCI_DA850
+ select SATA_HOST
help
This option enables support for the DaVinci DA850 SoC's
onboard AHCI SATA.
@@ -139,6 +167,7 @@ config AHCI_DA850
config AHCI_DM816
tristate "DaVinci DM816 AHCI SATA support"
depends on ARCH_OMAP2PLUS
+ select SATA_HOST
help
This option enables support for the DaVinci DM816 SoC's
onboard AHCI SATA controller.
@@ -148,6 +177,7 @@ config AHCI_DM816
config AHCI_ST
tristate "ST AHCI SATA support"
depends on ARCH_STI
+ select SATA_HOST
help
This option enables support for ST AHCI SATA controller.
@@ -157,6 +187,7 @@ config AHCI_IMX
tristate "Freescale i.MX AHCI SATA support"
depends on MFD_SYSCON && (ARCH_MXC || COMPILE_TEST)
depends on (HWMON && (THERMAL || !THERMAL_OF)) || !HWMON
+ select SATA_HOST
help
This option enables support for the Freescale i.MX SoC's
onboard AHCI SATA.
@@ -166,6 +197,7 @@ config AHCI_IMX
config AHCI_CEVA
tristate "CEVA AHCI SATA support"
depends on OF
+ select SATA_HOST
help
This option enables support for the CEVA AHCI SATA.
It can be found on the Xilinx Zynq UltraScale+ MPSoC.
@@ -176,6 +208,7 @@ config AHCI_MTK
tristate "MediaTek AHCI SATA support"
depends on ARCH_MEDIATEK
select MFD_SYSCON
+ select SATA_HOST
help
This option enables support for the MediaTek SoC's
onboard AHCI SATA controller.
@@ -185,6 +218,7 @@ config AHCI_MTK
config AHCI_MVEBU
tristate "Marvell EBU AHCI SATA support"
depends on ARCH_MVEBU
+ select SATA_HOST
help
This option enables support for the Marvebu EBU SoC's
onboard AHCI SATA.
@@ -203,6 +237,7 @@ config AHCI_OCTEON
config AHCI_SUNXI
tristate "Allwinner sunxi AHCI SATA support"
depends on ARCH_SUNXI
+ select SATA_HOST
help
This option enables support for the Allwinner sunxi SoC's
onboard AHCI SATA.
@@ -212,6 +247,7 @@ config AHCI_SUNXI
config AHCI_TEGRA
tristate "NVIDIA Tegra AHCI SATA support"
depends on ARCH_TEGRA
+ select SATA_HOST
help
This option enables support for the NVIDIA Tegra SoC's
onboard AHCI SATA.
@@ -221,12 +257,14 @@ config AHCI_TEGRA
config AHCI_XGENE
tristate "APM X-Gene 6.0Gbps AHCI SATA host controller support"
depends on PHY_XGENE
+ select SATA_HOST
help
This option enables support for APM X-Gene SoC SATA host controller.
config AHCI_QORIQ
tristate "Freescale QorIQ AHCI SATA support"
depends on OF
+ select SATA_HOST
help
This option enables support for the Freescale QorIQ AHCI SoC's
onboard AHCI SATA.
@@ -236,6 +274,7 @@ config AHCI_QORIQ
config SATA_FSL
tristate "Freescale 3.0Gbps SATA support"
depends on FSL_SOC
+ select SATA_HOST
help
This option enables support for Freescale 3.0Gbps SATA controller.
It can be found on MPC837x and MPC8315.
@@ -245,6 +284,7 @@ config SATA_FSL
config SATA_GEMINI
tristate "Gemini SATA bridge support"
depends on ARCH_GEMINI || COMPILE_TEST
+ select SATA_HOST
default ARCH_GEMINI
help
This enabled support for the FTIDE010 to SATA bridge
@@ -255,6 +295,7 @@ config SATA_GEMINI
config SATA_AHCI_SEATTLE
tristate "AMD Seattle 6.0Gbps AHCI SATA host controller support"
depends on ARCH_SEATTLE
+ select SATA_HOST
help
This option enables support for AMD Seattle SATA host controller.
@@ -263,12 +304,14 @@ config SATA_AHCI_SEATTLE
config SATA_INIC162X
tristate "Initio 162x SATA support (Very Experimental)"
depends on PCI
+ select SATA_HOST
help
This option enables support for Initio 162x Serial ATA.
config SATA_ACARD_AHCI
tristate "ACard AHCI variant (ATP 8620)"
depends on PCI
+ select SATA_HOST
help
This option enables support for Acard.
@@ -277,6 +320,7 @@ config SATA_ACARD_AHCI
config SATA_SIL24
tristate "Silicon Image 3124/3132 SATA support"
depends on PCI
+ select SATA_HOST
help
This option enables support for Silicon Image 3124/3132 Serial ATA.
@@ -317,6 +361,7 @@ config PDC_ADMA
config PATA_OCTEON_CF
tristate "OCTEON Boot Bus Compact Flash support"
depends on CAVIUM_OCTEON_SOC
+ select PATA_TIMINGS
help
This option enables a polled compact flash driver for use with
compact flash cards attached to the OCTEON boot bus.
@@ -326,6 +371,7 @@ config PATA_OCTEON_CF
config SATA_QSTOR
tristate "Pacific Digital SATA QStor support"
depends on PCI
+ select SATA_HOST
help
This option enables support for Pacific Digital Serial ATA QStor.
@@ -334,6 +380,7 @@ config SATA_QSTOR
config SATA_SX4
tristate "Promise SATA SX4 support (Experimental)"
depends on PCI
+ select SATA_HOST
help
This option enables support for Promise Serial ATA SX4.
@@ -357,6 +404,7 @@ comment "SATA SFF controllers with BMDMA"
config ATA_PIIX
tristate "Intel ESB, ICH, PIIX3, PIIX4 PATA/SATA support"
depends on PCI
+ select SATA_HOST
help
This option enables support for ICH5/6/7/8 Serial ATA
and support for PATA on the Intel ESB/ICH/PIIX3/PIIX4 series
@@ -368,6 +416,7 @@ config SATA_DWC
tristate "DesignWare Cores SATA support"
depends on DMADEVICES
select GENERIC_PHY
+ select SATA_HOST
help
This option enables support for the on-chip SATA controller of the
AppliedMicro processor 460EX.
@@ -398,6 +447,7 @@ config SATA_DWC_VDEBUG
config SATA_HIGHBANK
tristate "Calxeda Highbank SATA support"
depends on ARCH_HIGHBANK || COMPILE_TEST
+ select SATA_HOST
help
This option enables support for the Calxeda Highbank SoC's
onboard SATA.
@@ -409,6 +459,7 @@ config SATA_MV
depends on PCI || ARCH_DOVE || ARCH_MV78XX0 || \
ARCH_MVEBU || ARCH_ORION5X || COMPILE_TEST
select GENERIC_PHY
+ select SATA_HOST
help
This option enables support for the Marvell Serial ATA family.
Currently supports 88SX[56]0[48][01] PCI(-X) chips,
@@ -419,6 +470,7 @@ config SATA_MV
config SATA_NV
tristate "NVIDIA SATA support"
depends on PCI
+ select SATA_HOST
help
This option enables support for NVIDIA Serial ATA.
@@ -427,6 +479,7 @@ config SATA_NV
config SATA_PROMISE
tristate "Promise SATA TX2/TX4 support"
depends on PCI
+ select SATA_HOST
help
This option enables support for Promise Serial ATA TX2/TX4.
@@ -435,6 +488,7 @@ config SATA_PROMISE
config SATA_RCAR
tristate "Renesas R-Car SATA support"
depends on ARCH_RENESAS || COMPILE_TEST
+ select SATA_HOST
help
This option enables support for Renesas R-Car Serial ATA.
@@ -443,6 +497,7 @@ config SATA_RCAR
config SATA_SIL
tristate "Silicon Image SATA support"
depends on PCI
+ select SATA_HOST
help
This option enables support for Silicon Image Serial ATA.
@@ -452,6 +507,7 @@ config SATA_SIS
tristate "SiS 964/965/966/180 SATA support"
depends on PCI
select PATA_SIS
+ select SATA_HOST
help
This option enables support for SiS Serial ATA on
SiS 964/965/966/180 and Parallel ATA on SiS 180.
@@ -462,6 +518,7 @@ config SATA_SIS
config SATA_SVW
tristate "ServerWorks Frodo / Apple K2 SATA support"
depends on PCI
+ select SATA_HOST
help
This option enables support for Broadcom/Serverworks/Apple K2
SATA support.
@@ -471,6 +528,7 @@ config SATA_SVW
config SATA_ULI
tristate "ULi Electronics SATA support"
depends on PCI
+ select SATA_HOST
help
This option enables support for ULi Electronics SATA.
@@ -479,6 +537,7 @@ config SATA_ULI
config SATA_VIA
tristate "VIA SATA support"
depends on PCI
+ select SATA_HOST
help
This option enables support for VIA Serial ATA.
@@ -487,6 +546,7 @@ config SATA_VIA
config SATA_VITESSE
tristate "VITESSE VSC-7174 / INTEL 31244 SATA support"
depends on PCI
+ select SATA_HOST
help
This option enables support for Vitesse VSC7174 and Intel 31244 Serial ATA.
@@ -497,6 +557,7 @@ comment "PATA SFF controllers with BMDMA"
config PATA_ALI
tristate "ALi PATA support"
depends on PCI
+ select PATA_TIMINGS
help
This option enables support for the ALi ATA interfaces
found on the many ALi chipsets.
@@ -506,6 +567,7 @@ config PATA_ALI
config PATA_AMD
tristate "AMD/NVidia PATA support"
depends on PCI
+ select PATA_TIMINGS
help
This option enables support for the AMD and NVidia PATA
interfaces found on the chipsets for Athlon/Athlon64.
@@ -540,6 +602,7 @@ config PATA_ATIIXP
config PATA_ATP867X
tristate "ARTOP/Acard ATP867X PATA support"
depends on PCI
+ select PATA_TIMINGS
help
This option enables support for ARTOP/Acard ATP867X PATA
controllers.
@@ -549,6 +612,7 @@ config PATA_ATP867X
config PATA_BK3710
tristate "Palmchip BK3710 PATA support"
depends on ARCH_DAVINCI
+ select PATA_TIMINGS
help
This option enables support for the integrated IDE controller on
the TI DaVinci SoC.
@@ -558,6 +622,7 @@ config PATA_BK3710
config PATA_CMD64X
tristate "CMD64x PATA support"
depends on PCI
+ select PATA_TIMINGS
help
This option enables support for the CMD64x series chips
except for the CMD640.
@@ -603,6 +668,7 @@ config PATA_CS5536
config PATA_CYPRESS
tristate "Cypress CY82C693 PATA support (Very Experimental)"
depends on PCI
+ select PATA_TIMINGS
help
This option enables support for the Cypress/Contaq CY82C693
chipset found in some Alpha systems
@@ -621,6 +687,7 @@ config PATA_EFAR
config PATA_EP93XX
tristate "Cirrus Logic EP93xx PATA support"
depends on ARCH_EP93XX
+ select PATA_TIMINGS
help
This option enables support for the PATA controller in
the Cirrus Logic EP9312 and EP9315 ARM CPU.
@@ -685,6 +752,7 @@ config PATA_HPT3X3_DMA
config PATA_ICSIDE
tristate "Acorn ICS PATA support"
depends on ARM && ARCH_ACORN
+ select PATA_TIMINGS
help
On Acorn systems, say Y here if you wish to use the ICS PATA
interface card. This is not required for ICS partition support.
@@ -693,6 +761,7 @@ config PATA_ICSIDE
config PATA_IMX
tristate "PATA support for Freescale iMX"
depends on ARCH_MXC
+ select PATA_TIMINGS
help
This option enables support for the PATA host available on Freescale
iMX SoCs.
@@ -778,6 +847,7 @@ config PATA_NINJA32
config PATA_NS87415
tristate "Nat Semi NS87415 PATA support"
depends on PCI
+ select PATA_TIMINGS
help
This option enables support for the National Semiconductor
NS87415 PCI-IDE controller.
@@ -902,6 +972,7 @@ config PATA_TRIFLEX
config PATA_VIA
tristate "VIA PATA support"
depends on PCI
+ select PATA_TIMINGS
help
This option enables support for the VIA PATA interfaces
found on the many VIA chipsets.
@@ -935,6 +1006,7 @@ comment "PIO-only SFF controllers"
config PATA_CMD640_PCI
tristate "CMD640 PCI PATA support (Experimental)"
depends on PCI
+ select PATA_TIMINGS
help
This option enables support for the CMD640 PCI IDE
interface chip. Only the primary channel is currently
@@ -1005,6 +1077,7 @@ config PATA_MPIIX
config PATA_NS87410
tristate "Nat Semi NS87410 PATA support"
depends on PCI
+ select PATA_TIMINGS
help
This option enables support for the National Semiconductor
NS87410 PCI-IDE controller.
@@ -1085,6 +1158,7 @@ config PATA_RZ1000
config PATA_SAMSUNG_CF
tristate "Samsung SoC PATA support"
depends on SAMSUNG_DEV_IDE
+ select PATA_TIMINGS
help
This option enables basic support for Samsung's S3C/S5P board
PATA controllers via the new ATA layer
@@ -1104,6 +1178,7 @@ comment "Generic fallback / legacy drivers"
config PATA_ACPI
tristate "ACPI firmware driver for PATA"
depends on ATA_ACPI && ATA_BMDMA && PCI
+ select PATA_TIMINGS
help
This option enables an ACPI method driver which drives
motherboard PATA controller interfaces through the ACPI
@@ -1113,6 +1188,7 @@ config PATA_ACPI
config ATA_GENERIC
tristate "Generic ATA support"
depends on PCI && ATA_BMDMA
+ select SATA_HOST
help
This option enables support for generic BIOS configured
ATA controllers via the new ATA layer
@@ -1122,6 +1198,7 @@ config ATA_GENERIC
config PATA_LEGACY
tristate "Legacy ISA PATA support (Experimental)"
depends on (ISA || PCI)
+ select PATA_TIMINGS
help
This option enables support for ISA/VLB/PCI bus legacy PATA
ports and allows them to be accessed via the new ATA layer.
diff --git a/drivers/ata/Makefile b/drivers/ata/Makefile
index d8cc2e04a6c7..b8aebfb14e82 100644
--- a/drivers/ata/Makefile
+++ b/drivers/ata/Makefile
@@ -123,7 +123,9 @@ obj-$(CONFIG_PATA_LEGACY) += pata_legacy.o
libata-y := libata-core.o libata-scsi.o libata-eh.o \
libata-transport.o libata-trace.o
+libata-$(CONFIG_SATA_HOST) += libata-sata.o
libata-$(CONFIG_ATA_SFF) += libata-sff.o
libata-$(CONFIG_SATA_PMP) += libata-pmp.o
libata-$(CONFIG_ATA_ACPI) += libata-acpi.o
libata-$(CONFIG_SATA_ZPODD) += libata-zpodd.o
+libata-$(CONFIG_PATA_TIMINGS) += libata-pata-timings.o
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 11ea1aff40db..ad0185c8dcee 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -40,6 +40,7 @@
enum {
AHCI_PCI_BAR_STA2X11 = 0,
AHCI_PCI_BAR_CAVIUM = 0,
+ AHCI_PCI_BAR_LOONGSON = 0,
AHCI_PCI_BAR_ENMOTUS = 2,
AHCI_PCI_BAR_CAVIUM_GEN5 = 4,
AHCI_PCI_BAR_STANDARD = 5,
@@ -245,6 +246,7 @@ static const struct ata_port_info ahci_port_info[] = {
static const struct pci_device_id ahci_pci_tbl[] = {
/* Intel */
+ { PCI_VDEVICE(INTEL, 0x06d6), board_ahci }, /* Comet Lake PCH-H RAID */
{ PCI_VDEVICE(INTEL, 0x2652), board_ahci }, /* ICH6 */
{ PCI_VDEVICE(INTEL, 0x2653), board_ahci }, /* ICH6M */
{ PCI_VDEVICE(INTEL, 0x27c1), board_ahci }, /* ICH7 */
@@ -401,6 +403,8 @@ static const struct pci_device_id ahci_pci_tbl[] = {
{ PCI_VDEVICE(INTEL, 0xa252), board_ahci }, /* Lewisburg RAID*/
{ PCI_VDEVICE(INTEL, 0xa256), board_ahci }, /* Lewisburg RAID*/
{ PCI_VDEVICE(INTEL, 0xa356), board_ahci }, /* Cannon Lake PCH-H RAID */
+ { PCI_VDEVICE(INTEL, 0x06d7), board_ahci }, /* Comet Lake-H RAID */
+ { PCI_VDEVICE(INTEL, 0xa386), board_ahci }, /* Comet Lake PCH-V RAID */
{ PCI_VDEVICE(INTEL, 0x0f22), board_ahci_mobile }, /* Bay Trail AHCI */
{ PCI_VDEVICE(INTEL, 0x0f23), board_ahci_mobile }, /* Bay Trail AHCI */
{ PCI_VDEVICE(INTEL, 0x22a3), board_ahci_mobile }, /* Cherry Tr. AHCI */
@@ -589,6 +593,9 @@ static const struct pci_device_id ahci_pci_tbl[] = {
/* Enmotus */
{ PCI_DEVICE(0x1c44, 0x8000), board_ahci },
+ /* Loongson */
+ { PCI_VDEVICE(LOONGSON, 0x7a08), board_ahci },
+
/* Generic, PCI class code for AHCI */
{ PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,
PCI_CLASS_STORAGE_SATA_AHCI, 0xffffff, board_ahci },
@@ -1680,6 +1687,9 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
ahci_pci_bar = AHCI_PCI_BAR_CAVIUM;
if (pdev->device == 0xa084)
ahci_pci_bar = AHCI_PCI_BAR_CAVIUM_GEN5;
+ } else if (pdev->vendor == PCI_VENDOR_ID_LOONGSON) {
+ if (pdev->device == 0x7a08)
+ ahci_pci_bar = AHCI_PCI_BAR_LOONGSON;
}
/* acquire resources */
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 42c8728f6117..beca5f91bb4c 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -2,10 +2,6 @@
/*
* libata-core.c - helper library for ATA
*
- * Maintained by: Tejun Heo <tj@kernel.org>
- * Please ALWAYS copy linux-ide@vger.kernel.org
- * on emails.
- *
* Copyright 2003-2004 Red Hat, Inc. All rights reserved.
* Copyright 2003-2004 Jeff Garzik
*
@@ -22,6 +18,11 @@
* http://www.compactflash.org (CF)
* http://www.qic.org (QIC157 - Tape and DSC)
* http://www.ce-ata.org (CE-ATA: not supported)
+ *
+ * libata is essentially a library of internal helper functions for
+ * low-level ATA host controller drivers. As such, the API/ABI is
+ * likely to change as new drivers are added and updated.
+ * Do not depend on ABI/API stability.
*/
#include <linux/kernel.h>
@@ -56,6 +57,7 @@
#include <linux/leds.h>
#include <linux/pm_runtime.h>
#include <linux/platform_device.h>
+#include <asm/setup.h>
#define CREATE_TRACE_POINTS
#include <trace/events/libata.h>
@@ -63,11 +65,6 @@
#include "libata.h"
#include "libata-transport.h"
-/* debounce timing parameters in msecs { interval, duration, timeout } */
-const unsigned long sata_deb_timing_normal[] = { 5, 100, 2000 };
-const unsigned long sata_deb_timing_hotplug[] = { 25, 500, 2000 };
-const unsigned long sata_deb_timing_long[] = { 100, 2000, 5000 };
-
const struct ata_port_operations ata_base_port_ops = {
.prereset = ata_std_prereset,
.postreset = ata_std_postreset,
@@ -82,6 +79,7 @@ const struct ata_port_operations sata_port_ops = {
.qc_defer = ata_std_qc_defer,
.hardreset = sata_std_hardreset,
};
+EXPORT_SYMBOL_GPL(sata_port_ops);
static unsigned int ata_dev_init_params(struct ata_device *dev,
u16 heads, u16 sectors);
@@ -91,14 +89,15 @@ static unsigned long ata_dev_blacklisted(const struct ata_device *dev);
atomic_t ata_print_id = ATOMIC_INIT(0);
+#ifdef CONFIG_ATA_FORCE
struct ata_force_param {
const char *name;
- unsigned int cbl;
- int spd_limit;
+ u8 cbl;
+ u8 spd_limit;
unsigned long xfer_mask;
unsigned int horkage_on;
unsigned int horkage_off;
- unsigned int lflags;
+ u16 lflags;
};
struct ata_force_ent {
@@ -110,10 +109,11 @@ struct ata_force_ent {
static struct ata_force_ent *ata_force_tbl;
static int ata_force_tbl_size;
-static char ata_force_param_buf[PAGE_SIZE] __initdata;
+static char ata_force_param_buf[COMMAND_LINE_SIZE] __initdata;
/* param_buf is thrown away after initialization, disallow read */
module_param_string(force, ata_force_param_buf, sizeof(ata_force_param_buf), 0);
MODULE_PARM_DESC(force, "Force ATA configurations including cable type, link speed and transfer mode (see Documentation/admin-guide/kernel-parameters.rst for details)");
+#endif
static int atapi_enabled = 1;
module_param(atapi_enabled, int, 0444);
@@ -224,6 +224,7 @@ struct ata_link *ata_link_next(struct ata_link *link, struct ata_port *ap,
return NULL;
}
+EXPORT_SYMBOL_GPL(ata_link_next);
/**
* ata_dev_next - device iteration helper
@@ -277,6 +278,7 @@ struct ata_device *ata_dev_next(struct ata_device *dev, struct ata_link *link,
goto next;
return dev;
}
+EXPORT_SYMBOL_GPL(ata_dev_next);
/**
* ata_dev_phys_link - find physical link for a device
@@ -303,6 +305,7 @@ struct ata_link *ata_dev_phys_link(struct ata_device *dev)
return ap->slave_link;
}
+#ifdef CONFIG_ATA_FORCE
/**
* ata_force_cbl - force cable type according to libata.force
* @ap: ATA port of interest
@@ -483,6 +486,11 @@ static void ata_force_horkage(struct ata_device *dev)
fe->param.name);
}
}
+#else
+static inline void ata_force_link_limits(struct ata_link *link) { }
+static inline void ata_force_xfermask(struct ata_device *dev) { }
+static inline void ata_force_horkage(struct ata_device *dev) { }
+#endif
/**
* atapi_cmd_type - Determine ATAPI command type from SCSI opcode
@@ -521,79 +529,7 @@ int atapi_cmd_type(u8 opcode)
return ATAPI_MISC;
}
}
-
-/**
- * ata_tf_to_fis - Convert ATA taskfile to SATA FIS structure
- * @tf: Taskfile to convert
- * @pmp: Port multiplier port
- * @is_cmd: This FIS is for command
- * @fis: Buffer into which data will output
- *
- * Converts a standard ATA taskfile to a Serial ATA
- * FIS structure (Register - Host to Device).
- *
- * LOCKING:
- * Inherited from caller.
- */
-void ata_tf_to_fis(const struct ata_taskfile *tf, u8 pmp, int is_cmd, u8 *fis)
-{
- fis[0] = 0x27; /* Register - Host to Device FIS */
- fis[1] = pmp & 0xf; /* Port multiplier number*/
- if (is_cmd)
- fis[1] |= (1 << 7); /* bit 7 indicates Command FIS */
-
- fis[2] = tf->command;
- fis[3] = tf->feature;
-
- fis[4] = tf->lbal;
- fis[5] = tf->lbam;
- fis[6] = tf->lbah;
- fis[7] = tf->device;
-
- fis[8] = tf->hob_lbal;
- fis[9] = tf->hob_lbam;
- fis[10] = tf->hob_lbah;
- fis[11] = tf->hob_feature;
-
- fis[12] = tf->nsect;
- fis[13] = tf->hob_nsect;
- fis[14] = 0;
- fis[15] = tf->ctl;
-
- fis[16] = tf->auxiliary & 0xff;
- fis[17] = (tf->auxiliary >> 8) & 0xff;
- fis[18] = (tf->auxiliary >> 16) & 0xff;
- fis[19] = (tf->auxiliary >> 24) & 0xff;
-}
-
-/**
- * ata_tf_from_fis - Convert SATA FIS to ATA taskfile
- * @fis: Buffer from which data will be input
- * @tf: Taskfile to output
- *
- * Converts a serial ATA FIS structure to a standard ATA taskfile.
- *
- * LOCKING:
- * Inherited from caller.
- */
-
-void ata_tf_from_fis(const u8 *fis, struct ata_taskfile *tf)
-{
- tf->command = fis[2]; /* status */
- tf->feature = fis[3]; /* error */
-
- tf->lbal = fis[4];
- tf->lbam = fis[5];
- tf->lbah = fis[6];
- tf->device = fis[7];
-
- tf->hob_lbal = fis[8];
- tf->hob_lbam = fis[9];
- tf->hob_lbah = fis[10];
-
- tf->nsect = fis[12];
- tf->hob_nsect = fis[13];
-}
+EXPORT_SYMBOL_GPL(atapi_cmd_type);
static const u8 ata_rw_cmds[] = {
/* pio multi */
@@ -868,6 +804,7 @@ unsigned long ata_pack_xfermask(unsigned long pio_mask,
((mwdma_mask << ATA_SHIFT_MWDMA) & ATA_MASK_MWDMA) |
((udma_mask << ATA_SHIFT_UDMA) & ATA_MASK_UDMA);
}
+EXPORT_SYMBOL_GPL(ata_pack_xfermask);
/**
* ata_unpack_xfermask - Unpack xfer_mask into pio, mwdma and udma masks
@@ -923,6 +860,7 @@ u8 ata_xfer_mask2mode(unsigned long xfer_mask)
return ent->base + highbit - ent->shift;
return 0xff;
}
+EXPORT_SYMBOL_GPL(ata_xfer_mask2mode);
/**
* ata_xfer_mode2mask - Find matching xfer_mask for XFER_*
@@ -946,6 +884,7 @@ unsigned long ata_xfer_mode2mask(u8 xfer_mode)
& ~((1 << ent->shift) - 1);
return 0;
}
+EXPORT_SYMBOL_GPL(ata_xfer_mode2mask);
/**
* ata_xfer_mode2shift - Find matching xfer_shift for XFER_*
@@ -968,6 +907,7 @@ int ata_xfer_mode2shift(unsigned long xfer_mode)
return ent->shift;
return -1;
}
+EXPORT_SYMBOL_GPL(ata_xfer_mode2shift);
/**
* ata_mode_string - convert xfer_mask to string
@@ -1014,6 +954,7 @@ const char *ata_mode_string(unsigned long xfer_mask)
return xfer_mode_str[highbit];
return "<n/a>";
}
+EXPORT_SYMBOL_GPL(ata_mode_string);
const char *sata_spd_string(unsigned int spd)
{
@@ -1094,6 +1035,7 @@ unsigned int ata_dev_classify(const struct ata_taskfile *tf)
DPRINTK("unknown device\n");
return ATA_DEV_UNKNOWN;
}
+EXPORT_SYMBOL_GPL(ata_dev_classify);
/**
* ata_id_string - Convert IDENTIFY DEVICE page into string
@@ -1130,6 +1072,7 @@ void ata_id_string(const u16 *id, unsigned char *s,
len -= 2;
}
}
+EXPORT_SYMBOL_GPL(ata_id_string);
/**
* ata_id_c_string - Convert IDENTIFY DEVICE page into C string
@@ -1157,6 +1100,7 @@ void ata_id_c_string(const u16 *id, unsigned char *s,
p--;
*p = '\0';
}
+EXPORT_SYMBOL_GPL(ata_id_c_string);
static u64 ata_id_n_sectors(const u16 *id)
{
@@ -1514,6 +1458,7 @@ unsigned long ata_id_xfermask(const u16 *id)
return ata_pack_xfermask(pio_mask, mwdma_mask, udma_mask);
}
+EXPORT_SYMBOL_GPL(ata_id_xfermask);
static void ata_qc_complete_internal(struct ata_queued_cmd *qc)
{
@@ -1771,6 +1716,7 @@ unsigned int ata_pio_need_iordy(const struct ata_device *adev)
return 1;
return 0;
}
+EXPORT_SYMBOL_GPL(ata_pio_need_iordy);
/**
* ata_pio_mask_no_iordy - Return the non IORDY mask
@@ -1811,6 +1757,7 @@ unsigned int ata_do_dev_read_id(struct ata_device *dev,
return ata_exec_internal(dev, tf, NULL, DMA_FROM_DEVICE,
id, sizeof(id[0]) * ATA_ID_WORDS, 0);
}
+EXPORT_SYMBOL_GPL(ata_do_dev_read_id);
/**
* ata_dev_read_id - Read ID data from the specified device
@@ -2265,6 +2212,8 @@ static int ata_dev_config_ncq(struct ata_device *dev,
desc[0] = '\0';
return 0;
}
+ if (!IS_ENABLED(CONFIG_SATA_HOST))
+ return 0;
if (dev->horkage & ATA_HORKAGE_NONCQ) {
snprintf(desc, desc_sz, "NCQ (not used)");
return 0;
@@ -2783,6 +2732,7 @@ int ata_cable_40wire(struct ata_port *ap)
{
return ATA_CBL_PATA40;
}
+EXPORT_SYMBOL_GPL(ata_cable_40wire);
/**
* ata_cable_80wire - return 80 wire cable type
@@ -2796,6 +2746,7 @@ int ata_cable_80wire(struct ata_port *ap)
{
return ATA_CBL_PATA80;
}
+EXPORT_SYMBOL_GPL(ata_cable_80wire);
/**
* ata_cable_unknown - return unknown PATA cable.
@@ -2808,6 +2759,7 @@ int ata_cable_unknown(struct ata_port *ap)
{
return ATA_CBL_PATA_UNK;
}
+EXPORT_SYMBOL_GPL(ata_cable_unknown);
/**
* ata_cable_ignore - return ignored PATA cable.
@@ -2820,6 +2772,7 @@ int ata_cable_ignore(struct ata_port *ap)
{
return ATA_CBL_PATA_IGN;
}
+EXPORT_SYMBOL_GPL(ata_cable_ignore);
/**
* ata_cable_sata - return SATA cable type
@@ -2832,6 +2785,7 @@ int ata_cable_sata(struct ata_port *ap)
{
return ATA_CBL_SATA;
}
+EXPORT_SYMBOL_GPL(ata_cable_sata);
/**
* ata_bus_probe - Reset and probe ATA bus
@@ -3014,6 +2968,7 @@ struct ata_device *ata_dev_pair(struct ata_device *adev)
return NULL;
return pair;
}
+EXPORT_SYMBOL_GPL(ata_dev_pair);
/**
* sata_down_spd_limit - adjust SATA spd limit downward
@@ -3095,252 +3050,7 @@ int sata_down_spd_limit(struct ata_link *link, u32 spd_limit)
return 0;
}
-static int __sata_set_spd_needed(struct ata_link *link, u32 *scontrol)
-{
- struct ata_link *host_link = &link->ap->link;
- u32 limit, target, spd;
-
- limit = link->sata_spd_limit;
-
- /* Don't configure downstream link faster than upstream link.
- * It doesn't speed up anything and some PMPs choke on such
- * configuration.
- */
- if (!ata_is_host_link(link) && host_link->sata_spd)
- limit &= (1 << host_link->sata_spd) - 1;
-
- if (limit == UINT_MAX)
- target = 0;
- else
- target = fls(limit);
-
- spd = (*scontrol >> 4) & 0xf;
- *scontrol = (*scontrol & ~0xf0) | ((target & 0xf) << 4);
-
- return spd != target;
-}
-
-/**
- * sata_set_spd_needed - is SATA spd configuration needed
- * @link: Link in question
- *
- * Test whether the spd limit in SControl matches
- * @link->sata_spd_limit. This function is used to determine
- * whether hardreset is necessary to apply SATA spd
- * configuration.
- *
- * LOCKING:
- * Inherited from caller.
- *
- * RETURNS:
- * 1 if SATA spd configuration is needed, 0 otherwise.
- */
-static int sata_set_spd_needed(struct ata_link *link)
-{
- u32 scontrol;
-
- if (sata_scr_read(link, SCR_CONTROL, &scontrol))
- return 1;
-
- return __sata_set_spd_needed(link, &scontrol);
-}
-
-/**
- * sata_set_spd - set SATA spd according to spd limit
- * @link: Link to set SATA spd for
- *
- * Set SATA spd of @link according to sata_spd_limit.
- *
- * LOCKING:
- * Inherited from caller.
- *
- * RETURNS:
- * 0 if spd doesn't need to be changed, 1 if spd has been
- * changed. Negative errno if SCR registers are inaccessible.
- */
-int sata_set_spd(struct ata_link *link)
-{
- u32 scontrol;
- int rc;
-
- if ((rc = sata_scr_read(link, SCR_CONTROL, &scontrol)))
- return rc;
-
- if (!__sata_set_spd_needed(link, &scontrol))
- return 0;
-
- if ((rc = sata_scr_write(link, SCR_CONTROL, scontrol)))
- return rc;
-
- return 1;
-}
-
-/*
- * This mode timing computation functionality is ported over from
- * drivers/ide/ide-timing.h and was originally written by Vojtech Pavlik
- */
-/*
- * PIO 0-4, MWDMA 0-2 and UDMA 0-6 timings (in nanoseconds).
- * These were taken from ATA/ATAPI-6 standard, rev 0a, except
- * for UDMA6, which is currently supported only by Maxtor drives.
- *
- * For PIO 5/6 MWDMA 3/4 see the CFA specification 3.0.
- */
-
-static const struct ata_timing ata_timing[] = {
-/* { XFER_PIO_SLOW, 120, 290, 240, 960, 290, 240, 0, 960, 0 }, */
- { XFER_PIO_0, 70, 290, 240, 600, 165, 150, 0, 600, 0 },
- { XFER_PIO_1, 50, 290, 93, 383, 125, 100, 0, 383, 0 },
- { XFER_PIO_2, 30, 290, 40, 330, 100, 90, 0, 240, 0 },
- { XFER_PIO_3, 30, 80, 70, 180, 80, 70, 0, 180, 0 },
- { XFER_PIO_4, 25, 70, 25, 120, 70, 25, 0, 120, 0 },
- { XFER_PIO_5, 15, 65, 25, 100, 65, 25, 0, 100, 0 },
- { XFER_PIO_6, 10, 55, 20, 80, 55, 20, 0, 80, 0 },
-
- { XFER_SW_DMA_0, 120, 0, 0, 0, 480, 480, 50, 960, 0 },
- { XFER_SW_DMA_1, 90, 0, 0, 0, 240, 240, 30, 480, 0 },
- { XFER_SW_DMA_2, 60, 0, 0, 0, 120, 120, 20, 240, 0 },
-
- { XFER_MW_DMA_0, 60, 0, 0, 0, 215, 215, 20, 480, 0 },
- { XFER_MW_DMA_1, 45, 0, 0, 0, 80, 50, 5, 150, 0 },
- { XFER_MW_DMA_2, 25, 0, 0, 0, 70, 25, 5, 120, 0 },
- { XFER_MW_DMA_3, 25, 0, 0, 0, 65, 25, 5, 100, 0 },
- { XFER_MW_DMA_4, 25, 0, 0, 0, 55, 20, 5, 80, 0 },
-
-/* { XFER_UDMA_SLOW, 0, 0, 0, 0, 0, 0, 0, 0, 150 }, */
- { XFER_UDMA_0, 0, 0, 0, 0, 0, 0, 0, 0, 120 },
- { XFER_UDMA_1, 0, 0, 0, 0, 0, 0, 0, 0, 80 },
- { XFER_UDMA_2, 0, 0, 0, 0, 0, 0, 0, 0, 60 },
- { XFER_UDMA_3, 0, 0, 0, 0, 0, 0, 0, 0, 45 },
- { XFER_UDMA_4, 0, 0, 0, 0, 0, 0, 0, 0, 30 },
- { XFER_UDMA_5, 0, 0, 0, 0, 0, 0, 0, 0, 20 },
- { XFER_UDMA_6, 0, 0, 0, 0, 0, 0, 0, 0, 15 },
-
- { 0xFF }
-};
-
-#define ENOUGH(v, unit) (((v)-1)/(unit)+1)
-#define EZ(v, unit) ((v)?ENOUGH(((v) * 1000), unit):0)
-
-static void ata_timing_quantize(const struct ata_timing *t, struct ata_timing *q, int T, int UT)
-{
- q->setup = EZ(t->setup, T);
- q->act8b = EZ(t->act8b, T);
- q->rec8b = EZ(t->rec8b, T);
- q->cyc8b = EZ(t->cyc8b, T);
- q->active = EZ(t->active, T);
- q->recover = EZ(t->recover, T);
- q->dmack_hold = EZ(t->dmack_hold, T);
- q->cycle = EZ(t->cycle, T);
- q->udma = EZ(t->udma, UT);
-}
-
-void ata_timing_merge(const struct ata_timing *a, const struct ata_timing *b,
- struct ata_timing *m, unsigned int what)
-{
- if (what & ATA_TIMING_SETUP ) m->setup = max(a->setup, b->setup);
- if (what & ATA_TIMING_ACT8B ) m->act8b = max(a->act8b, b->act8b);
- if (what & ATA_TIMING_REC8B ) m->rec8b = max(a->rec8b, b->rec8b);
- if (what & ATA_TIMING_CYC8B ) m->cyc8b = max(a->cyc8b, b->cyc8b);
- if (what & ATA_TIMING_ACTIVE ) m->active = max(a->active, b->active);
- if (what & ATA_TIMING_RECOVER) m->recover = max(a->recover, b->recover);
- if (what & ATA_TIMING_DMACK_HOLD) m->dmack_hold = max(a->dmack_hold, b->dmack_hold);
- if (what & ATA_TIMING_CYCLE ) m->cycle = max(a->cycle, b->cycle);
- if (what & ATA_TIMING_UDMA ) m->udma = max(a->udma, b->udma);
-}
-
-const struct ata_timing *ata_timing_find_mode(u8 xfer_mode)
-{
- const struct ata_timing *t = ata_timing;
-
- while (xfer_mode > t->mode)
- t++;
-
- if (xfer_mode == t->mode)
- return t;
-
- WARN_ONCE(true, "%s: unable to find timing for xfer_mode 0x%x\n",
- __func__, xfer_mode);
-
- return NULL;
-}
-
-int ata_timing_compute(struct ata_device *adev, unsigned short speed,
- struct ata_timing *t, int T, int UT)
-{
- const u16 *id = adev->id;
- const struct ata_timing *s;
- struct ata_timing p;
-
- /*
- * Find the mode.
- */
-
- if (!(s = ata_timing_find_mode(speed)))
- return -EINVAL;
-
- memcpy(t, s, sizeof(*s));
-
- /*
- * If the drive is an EIDE drive, it can tell us it needs extended
- * PIO/MW_DMA cycle timing.
- */
-
- if (id[ATA_ID_FIELD_VALID] & 2) { /* EIDE drive */
- memset(&p, 0, sizeof(p));
-
- if (speed >= XFER_PIO_0 && speed < XFER_SW_DMA_0) {
- if (speed <= XFER_PIO_2)
- p.cycle = p.cyc8b = id[ATA_ID_EIDE_PIO];
- else if ((speed <= XFER_PIO_4) ||
- (speed == XFER_PIO_5 && !ata_id_is_cfa(id)))
- p.cycle = p.cyc8b = id[ATA_ID_EIDE_PIO_IORDY];
- } else if (speed >= XFER_MW_DMA_0 && speed <= XFER_MW_DMA_2)
- p.cycle = id[ATA_ID_EIDE_DMA_MIN];
-
- ata_timing_merge(&p, t, t, ATA_TIMING_CYCLE | ATA_TIMING_CYC8B);
- }
-
- /*
- * Convert the timing to bus clock counts.
- */
-
- ata_timing_quantize(t, t, T, UT);
-
- /*
- * Even in DMA/UDMA modes we still use PIO access for IDENTIFY,
- * S.M.A.R.T * and some other commands. We have to ensure that the
- * DMA cycle timing is slower/equal than the fastest PIO timing.
- */
-
- if (speed > XFER_PIO_6) {
- ata_timing_compute(adev, adev->pio_mode, &p, T, UT);
- ata_timing_merge(&p, t, t, ATA_TIMING_ALL);
- }
-
- /*
- * Lengthen active & recovery time so that cycle time is correct.
- */
-
- if (t->act8b + t->rec8b < t->cyc8b) {
- t->act8b += (t->cyc8b - (t->act8b + t->rec8b)) / 2;
- t->rec8b = t->cyc8b - t->act8b;
- }
-
- if (t->active + t->recover < t->cycle) {
- t->active += (t->cycle - (t->active + t->recover)) / 2;
- t->recover = t->cycle - t->active;
- }
-
- /* In a few cases quantisation may produce enough errors to
- leave t->cycle too low for the sum of active and recovery
- if so we must correct this */
- if (t->active + t->recover > t->cycle)
- t->cycle = t->active + t->recover;
-
- return 0;
-}
-
+#ifdef CONFIG_ATA_ACPI
/**
* ata_timing_cycle2mode - find xfer mode for the specified cycle duration
* @xfer_shift: ATA_SHIFT_* value for transfer type to examine.
@@ -3391,6 +3101,7 @@ u8 ata_timing_cycle2mode(unsigned int xfer_shift, int cycle)
return last_mode;
}
+#endif
/**
* ata_down_xfermask_limit - adjust dev xfer masks downward
@@ -3662,6 +3373,7 @@ int ata_do_set_mode(struct ata_link *link, struct ata_device **r_failed_dev)
*r_failed_dev = dev;
return rc;
}
+EXPORT_SYMBOL_GPL(ata_do_set_mode);
/**
* ata_wait_ready - wait for link to become ready
@@ -3771,216 +3483,7 @@ int ata_wait_after_reset(struct ata_link *link, unsigned long deadline,
return ata_wait_ready(link, deadline, check_ready);
}
-
-/**
- * sata_link_debounce - debounce SATA phy status
- * @link: ATA link to debounce SATA phy status for
- * @params: timing parameters { interval, duration, timeout } in msec
- * @deadline: deadline jiffies for the operation
- *
- * Make sure SStatus of @link reaches stable state, determined by
- * holding the same value where DET is not 1 for @duration polled
- * every @interval, before @timeout. Timeout constraints the
- * beginning of the stable state. Because DET gets stuck at 1 on
- * some controllers after hot unplugging, this functions waits
- * until timeout then returns 0 if DET is stable at 1.
- *
- * @timeout is further limited by @deadline. The sooner of the
- * two is used.
- *
- * LOCKING:
- * Kernel thread context (may sleep)
- *
- * RETURNS:
- * 0 on success, -errno on failure.
- */
-int sata_link_debounce(struct ata_link *link, const unsigned long *params,
- unsigned long deadline)
-{
- unsigned long interval = params[0];
- unsigned long duration = params[1];
- unsigned long last_jiffies, t;
- u32 last, cur;
- int rc;
-
- t = ata_deadline(jiffies, params[2]);
- if (time_before(t, deadline))
- deadline = t;
-
- if ((rc = sata_scr_read(link, SCR_STATUS, &cur)))
- return rc;
- cur &= 0xf;
-
- last = cur;
- last_jiffies = jiffies;
-
- while (1) {
- ata_msleep(link->ap, interval);
- if ((rc = sata_scr_read(link, SCR_STATUS, &cur)))
- return rc;
- cur &= 0xf;
-
- /* DET stable? */
- if (cur == last) {
- if (cur == 1 && time_before(jiffies, deadline))
- continue;
- if (time_after(jiffies,
- ata_deadline(last_jiffies, duration)))
- return 0;
- continue;
- }
-
- /* unstable, start over */
- last = cur;
- last_jiffies = jiffies;
-
- /* Check deadline. If debouncing failed, return
- * -EPIPE to tell upper layer to lower link speed.
- */
- if (time_after(jiffies, deadline))
- return -EPIPE;
- }
-}
-
-/**
- * sata_link_resume - resume SATA link
- * @link: ATA link to resume SATA
- * @params: timing parameters { interval, duration, timeout } in msec
- * @deadline: deadline jiffies for the operation
- *
- * Resume SATA phy @link and debounce it.
- *
- * LOCKING:
- * Kernel thread context (may sleep)
- *
- * RETURNS:
- * 0 on success, -errno on failure.
- */
-int sata_link_resume(struct ata_link *link, const unsigned long *params,
- unsigned long deadline)
-{
- int tries = ATA_LINK_RESUME_TRIES;
- u32 scontrol, serror;
- int rc;
-
- if ((rc = sata_scr_read(link, SCR_CONTROL, &scontrol)))
- return rc;
-
- /*
- * Writes to SControl sometimes get ignored under certain
- * controllers (ata_piix SIDPR). Make sure DET actually is
- * cleared.
- */
- do {
- scontrol = (scontrol & 0x0f0) | 0x300;
- if ((rc = sata_scr_write(link, SCR_CONTROL, scontrol)))
- return rc;
- /*
- * Some PHYs react badly if SStatus is pounded
- * immediately after resuming. Delay 200ms before
- * debouncing.
- */
- if (!(link->flags & ATA_LFLAG_NO_DB_DELAY))
- ata_msleep(link->ap, 200);
-
- /* is SControl restored correctly? */
- if ((rc = sata_scr_read(link, SCR_CONTROL, &scontrol)))
- return rc;
- } while ((scontrol & 0xf0f) != 0x300 && --tries);
-
- if ((scontrol & 0xf0f) != 0x300) {
- ata_link_warn(link, "failed to resume link (SControl %X)\n",
- scontrol);
- return 0;
- }
-
- if (tries < ATA_LINK_RESUME_TRIES)
- ata_link_warn(link, "link resume succeeded after %d retries\n",
- ATA_LINK_RESUME_TRIES - tries);
-
- if ((rc = sata_link_debounce(link, params, deadline)))
- return rc;
-
- /* clear SError, some PHYs require this even for SRST to work */
- if (!(rc = sata_scr_read(link, SCR_ERROR, &serror)))
- rc = sata_scr_write(link, SCR_ERROR, serror);
-
- return rc != -EINVAL ? rc : 0;
-}
-
-/**
- * sata_link_scr_lpm - manipulate SControl IPM and SPM fields
- * @link: ATA link to manipulate SControl for
- * @policy: LPM policy to configure
- * @spm_wakeup: initiate LPM transition to active state
- *
- * Manipulate the IPM field of the SControl register of @link
- * according to @policy. If @policy is ATA_LPM_MAX_POWER and
- * @spm_wakeup is %true, the SPM field is manipulated to wake up
- * the link. This function also clears PHYRDY_CHG before
- * returning.
- *
- * LOCKING:
- * EH context.
- *
- * RETURNS:
- * 0 on success, -errno otherwise.
- */
-int sata_link_scr_lpm(struct ata_link *link, enum ata_lpm_policy policy,
- bool spm_wakeup)
-{
- struct ata_eh_context *ehc = &link->eh_context;
- bool woken_up = false;
- u32 scontrol;
- int rc;
-
- rc = sata_scr_read(link, SCR_CONTROL, &scontrol);
- if (rc)
- return rc;
-
- switch (policy) {
- case ATA_LPM_MAX_POWER:
- /* disable all LPM transitions */
- scontrol |= (0x7 << 8);
- /* initiate transition to active state */
- if (spm_wakeup) {
- scontrol |= (0x4 << 12);
- woken_up = true;
- }
- break;
- case ATA_LPM_MED_POWER:
- /* allow LPM to PARTIAL */
- scontrol &= ~(0x1 << 8);
- scontrol |= (0x6 << 8);
- break;
- case ATA_LPM_MED_POWER_WITH_DIPM:
- case ATA_LPM_MIN_POWER_WITH_PARTIAL:
- case ATA_LPM_MIN_POWER:
- if (ata_link_nr_enabled(link) > 0)
- /* no restrictions on LPM transitions */
- scontrol &= ~(0x7 << 8);
- else {
- /* empty port, power off */
- scontrol &= ~0xf;
- scontrol |= (0x1 << 2);
- }
- break;
- default:
- WARN_ON(1);
- }
-
- rc = sata_scr_write(link, SCR_CONTROL, scontrol);
- if (rc)
- return rc;
-
- /* give the link time to transit out of LPM state */
- if (woken_up)
- msleep(10);
-
- /* clear PHYRDY_CHG from SError */
- ehc->i.serror &= ~SERR_PHYRDY_CHG;
- return sata_scr_write(link, SCR_ERROR, SERR_PHYRDY_CHG);
-}
+EXPORT_SYMBOL_GPL(ata_wait_after_reset);
/**
* ata_std_prereset - prepare for reset
@@ -4026,118 +3529,7 @@ int ata_std_prereset(struct ata_link *link, unsigned long deadline)
return 0;
}
-
-/**
- * sata_link_hardreset - reset link via SATA phy reset
- * @link: link to reset
- * @timing: timing parameters { interval, duration, timeout } in msec
- * @deadline: deadline jiffies for the operation
- * @online: optional out parameter indicating link onlineness
- * @check_ready: optional callback to check link readiness
- *
- * SATA phy-reset @link using DET bits of SControl register.
- * After hardreset, link readiness is waited upon using
- * ata_wait_ready() if @check_ready is specified. LLDs are
- * allowed to not specify @check_ready and wait itself after this
- * function returns. Device classification is LLD's
- * responsibility.
- *
- * *@online is set to one iff reset succeeded and @link is online
- * after reset.
- *
- * LOCKING:
- * Kernel thread context (may sleep)
- *
- * RETURNS:
- * 0 on success, -errno otherwise.
- */
-int sata_link_hardreset(struct ata_link *link, const unsigned long *timing,
- unsigned long deadline,
- bool *online, int (*check_ready)(struct ata_link *))
-{
- u32 scontrol;
- int rc;
-
- DPRINTK("ENTER\n");
-
- if (online)
- *online = false;
-
- if (sata_set_spd_needed(link)) {
- /* SATA spec says nothing about how to reconfigure
- * spd. To be on the safe side, turn off phy during
- * reconfiguration. This works for at least ICH7 AHCI
- * and Sil3124.
- */
- if ((rc = sata_scr_read(link, SCR_CONTROL, &scontrol)))
- goto out;
-
- scontrol = (scontrol & 0x0f0) | 0x304;
-
- if ((rc = sata_scr_write(link, SCR_CONTROL, scontrol)))
- goto out;
-
- sata_set_spd(link);
- }
-
- /* issue phy wake/reset */
- if ((rc = sata_scr_read(link, SCR_CONTROL, &scontrol)))
- goto out;
-
- scontrol = (scontrol & 0x0f0) | 0x301;
-
- if ((rc = sata_scr_write_flush(link, SCR_CONTROL, scontrol)))
- goto out;
-
- /* Couldn't find anything in SATA I/II specs, but AHCI-1.1
- * 10.4.2 says at least 1 ms.
- */
- ata_msleep(link->ap, 1);
-
- /* bring link back */
- rc = sata_link_resume(link, timing, deadline);
- if (rc)
- goto out;
- /* if link is offline nothing more to do */
- if (ata_phys_link_offline(link))
- goto out;
-
- /* Link is online. From this point, -ENODEV too is an error. */
- if (online)
- *online = true;
-
- if (sata_pmp_supported(link->ap) && ata_is_host_link(link)) {
- /* If PMP is supported, we have to do follow-up SRST.
- * Some PMPs don't send D2H Reg FIS after hardreset if
- * the first port is empty. Wait only for
- * ATA_TMOUT_PMP_SRST_WAIT.
- */
- if (check_ready) {
- unsigned long pmp_deadline;
-
- pmp_deadline = ata_deadline(jiffies,
- ATA_TMOUT_PMP_SRST_WAIT);
- if (time_after(pmp_deadline, deadline))
- pmp_deadline = deadline;
- ata_wait_ready(link, pmp_deadline, check_ready);
- }
- rc = -EAGAIN;
- goto out;
- }
-
- rc = 0;
- if (check_ready)
- rc = ata_wait_ready(link, deadline, check_ready);
- out:
- if (rc && rc != -EAGAIN) {
- /* online is set iff link is online && reset succeeded */
- if (online)
- *online = false;
- ata_link_err(link, "COMRESET failed (errno=%d)\n", rc);
- }
- DPRINTK("EXIT, rc=%d\n", rc);
- return rc;
-}
+EXPORT_SYMBOL_GPL(ata_std_prereset);
/**
* sata_std_hardreset - COMRESET w/o waiting or classification
@@ -4164,6 +3556,7 @@ int sata_std_hardreset(struct ata_link *link, unsigned int *class,
rc = sata_link_hardreset(link, timing, deadline, &online, NULL);
return online ? -EAGAIN : rc;
}
+EXPORT_SYMBOL_GPL(sata_std_hardreset);
/**
* ata_std_postreset - standard postreset callback
@@ -4192,6 +3585,7 @@ void ata_std_postreset(struct ata_link *link, unsigned int *classes)
DPRINTK("EXIT\n");
}
+EXPORT_SYMBOL_GPL(ata_std_postreset);
/**
* ata_dev_same_device - Determine whether new ID matches configured device
@@ -4979,11 +4373,13 @@ int ata_std_qc_defer(struct ata_queued_cmd *qc)
return ATA_DEFER_LINK;
}
+EXPORT_SYMBOL_GPL(ata_std_qc_defer);
enum ata_completion_errors ata_noop_qc_prep(struct ata_queued_cmd *qc)
{
return AC_ERR_OK;
}
+EXPORT_SYMBOL_GPL(ata_noop_qc_prep);
/**
* ata_sg_init - Associate command with scatter-gather table.
@@ -5327,6 +4723,7 @@ void ata_qc_complete(struct ata_queued_cmd *qc)
__ata_qc_complete(qc);
}
}
+EXPORT_SYMBOL_GPL(ata_qc_complete);
/**
* ata_qc_get_active - get bitmask of active qcs
@@ -5353,64 +4750,6 @@ u64 ata_qc_get_active(struct ata_port *ap)
EXPORT_SYMBOL_GPL(ata_qc_get_active);
/**
- * ata_qc_complete_multiple - Complete multiple qcs successfully
- * @ap: port in question
- * @qc_active: new qc_active mask
- *
- * Complete in-flight commands. This functions is meant to be
- * called from low-level driver's interrupt routine to complete
- * requests normally. ap->qc_active and @qc_active is compared
- * and commands are completed accordingly.
- *
- * Always use this function when completing multiple NCQ commands
- * from IRQ handlers instead of calling ata_qc_complete()
- * multiple times to keep IRQ expect status properly in sync.
- *
- * LOCKING:
- * spin_lock_irqsave(host lock)
- *
- * RETURNS:
- * Number of completed commands on success, -errno otherwise.
- */
-int ata_qc_complete_multiple(struct ata_port *ap, u64 qc_active)
-{
- u64 done_mask, ap_qc_active = ap->qc_active;
- int nr_done = 0;
-
- /*
- * If the internal tag is set on ap->qc_active, then we care about
- * bit0 on the passed in qc_active mask. Move that bit up to match
- * the internal tag.
- */
- if (ap_qc_active & (1ULL << ATA_TAG_INTERNAL)) {
- qc_active |= (qc_active & 0x01) << ATA_TAG_INTERNAL;
- qc_active ^= qc_active & 0x01;
- }
-
- done_mask = ap_qc_active ^ qc_active;
-
- if (unlikely(done_mask & qc_active)) {
- ata_port_err(ap, "illegal qc_active transition (%08llx->%08llx)\n",
- ap->qc_active, qc_active);
- return -EINVAL;
- }
-
- while (done_mask) {
- struct ata_queued_cmd *qc;
- unsigned int tag = __ffs64(done_mask);
-
- qc = ata_qc_from_tag(ap, tag);
- if (qc) {
- ata_qc_complete(qc);
- nr_done++;
- }
- done_mask &= ~(1ULL << tag);
- }
-
- return nr_done;
-}
-
-/**
* ata_qc_issue - issue taskfile to device
* @qc: command to issue to device
*
@@ -5486,111 +4825,6 @@ err:
}
/**
- * sata_scr_valid - test whether SCRs are accessible
- * @link: ATA link to test SCR accessibility for
- *
- * Test whether SCRs are accessible for @link.
- *
- * LOCKING:
- * None.
- *
- * RETURNS:
- * 1 if SCRs are accessible, 0 otherwise.
- */
-int sata_scr_valid(struct ata_link *link)
-{
- struct ata_port *ap = link->ap;
-
- return (ap->flags & ATA_FLAG_SATA) && ap->ops->scr_read;
-}
-
-/**
- * sata_scr_read - read SCR register of the specified port
- * @link: ATA link to read SCR for
- * @reg: SCR to read
- * @val: Place to store read value
- *
- * Read SCR register @reg of @link into *@val. This function is
- * guaranteed to succeed if @link is ap->link, the cable type of
- * the port is SATA and the port implements ->scr_read.
- *
- * LOCKING:
- * None if @link is ap->link. Kernel thread context otherwise.
- *
- * RETURNS:
- * 0 on success, negative errno on failure.
- */
-int sata_scr_read(struct ata_link *link, int reg, u32 *val)
-{
- if (ata_is_host_link(link)) {
- if (sata_scr_valid(link))
- return link->ap->ops->scr_read(link, reg, val);
- return -EOPNOTSUPP;
- }
-
- return sata_pmp_scr_read(link, reg, val);
-}
-
-/**
- * sata_scr_write - write SCR register of the specified port
- * @link: ATA link to write SCR for
- * @reg: SCR to write
- * @val: value to write
- *
- * Write @val to SCR register @reg of @link. This function is
- * guaranteed to succeed if @link is ap->link, the cable type of
- * the port is SATA and the port implements ->scr_read.
- *
- * LOCKING:
- * None if @link is ap->link. Kernel thread context otherwise.
- *
- * RETURNS:
- * 0 on success, negative errno on failure.
- */
-int sata_scr_write(struct ata_link *link, int reg, u32 val)
-{
- if (ata_is_host_link(link)) {
- if (sata_scr_valid(link))
- return link->ap->ops->scr_write(link, reg, val);
- return -EOPNOTSUPP;
- }
-
- return sata_pmp_scr_write(link, reg, val);
-}
-
-/**
- * sata_scr_write_flush - write SCR register of the specified port and flush
- * @link: ATA link to write SCR for
- * @reg: SCR to write
- * @val: value to write
- *
- * This function is identical to sata_scr_write() except that this
- * function performs flush after writing to the register.
- *
- * LOCKING:
- * None if @link is ap->link. Kernel thread context otherwise.
- *
- * RETURNS:
- * 0 on success, negative errno on failure.
- */
-int sata_scr_write_flush(struct ata_link *link, int reg, u32 val)
-{
- if (ata_is_host_link(link)) {
- int rc;
-
- if (sata_scr_valid(link)) {
- rc = link->ap->ops->scr_write(link, reg, val);
- if (rc == 0)
- rc = link->ap->ops->scr_read(link, reg, &val);
- return rc;
- }
- return -EOPNOTSUPP;
- }
-
- return sata_pmp_scr_write(link, reg, val);
-}
-
-/**
* ata_phys_link_online - test whether the given link is online
* @link: ATA link to test
*
@@ -5663,6 +4897,7 @@ bool ata_link_online(struct ata_link *link)
return ata_phys_link_online(link) ||
(slave && ata_phys_link_online(slave));
}
+EXPORT_SYMBOL_GPL(ata_link_online);
/**
* ata_link_offline - test whether the given link is offline
@@ -5689,6 +4924,7 @@ bool ata_link_offline(struct ata_link *link)
return ata_phys_link_offline(link) &&
(!slave || ata_phys_link_offline(slave));
}
+EXPORT_SYMBOL_GPL(ata_link_offline);
#ifdef CONFIG_PM
static void ata_port_request_pm(struct ata_port *ap, pm_message_t mesg,
@@ -5875,6 +5111,7 @@ int ata_host_suspend(struct ata_host *host, pm_message_t mesg)
host->dev->power.power_state = mesg;
return 0;
}
+EXPORT_SYMBOL_GPL(ata_host_suspend);
/**
* ata_host_resume - resume host
@@ -5886,6 +5123,7 @@ void ata_host_resume(struct ata_host *host)
{
host->dev->power.power_state = PMSG_ON;
}
+EXPORT_SYMBOL_GPL(ata_host_resume);
#endif
const struct device_type ata_port_type = {
@@ -6105,6 +5343,7 @@ void ata_host_put(struct ata_host *host)
{
kref_put(&host->kref, ata_host_release);
}
+EXPORT_SYMBOL_GPL(ata_host_put);
/**
* ata_host_alloc - allocate and init basic ATA host resources
@@ -6178,6 +5417,7 @@ struct ata_host *ata_host_alloc(struct device *dev, int max_ports)
kfree(host);
return NULL;
}
+EXPORT_SYMBOL_GPL(ata_host_alloc);
/**
* ata_host_alloc_pinfo - alloc host and init with port_info array
@@ -6226,68 +5466,7 @@ struct ata_host *ata_host_alloc_pinfo(struct device *dev,
return host;
}
-
-/**
- * ata_slave_link_init - initialize slave link
- * @ap: port to initialize slave link for
- *
- * Create and initialize slave link for @ap. This enables slave
- * link handling on the port.
- *
- * In libata, a port contains links and a link contains devices.
- * There is single host link but if a PMP is attached to it,
- * there can be multiple fan-out links. On SATA, there's usually
- * a single device connected to a link but PATA and SATA
- * controllers emulating TF based interface can have two - master
- * and slave.
- *
- * However, there are a few controllers which don't fit into this
- * abstraction too well - SATA controllers which emulate TF
- * interface with both master and slave devices but also have
- * separate SCR register sets for each device. These controllers
- * need separate links for physical link handling
- * (e.g. onlineness, link speed) but should be treated like a
- * traditional M/S controller for everything else (e.g. command
- * issue, softreset).
- *
- * slave_link is libata's way of handling this class of
- * controllers without impacting core layer too much. For
- * anything other than physical link handling, the default host
- * link is used for both master and slave. For physical link
- * handling, separate @ap->slave_link is used. All dirty details
- * are implemented inside libata core layer. From LLD's POV, the
- * only difference is that prereset, hardreset and postreset are
- * called once more for the slave link, so the reset sequence
- * looks like the following.
- *
- * prereset(M) -> prereset(S) -> hardreset(M) -> hardreset(S) ->
- * softreset(M) -> postreset(M) -> postreset(S)
- *
- * Note that softreset is called only for the master. Softreset
- * resets both M/S by definition, so SRST on master should handle
- * both (the standard method will work just fine).
- *
- * LOCKING:
- * Should be called before host is registered.
- *
- * RETURNS:
- * 0 on success, -errno on failure.
- */
-int ata_slave_link_init(struct ata_port *ap)
-{
- struct ata_link *link;
-
- WARN_ON(ap->slave_link);
- WARN_ON(ap->flags & ATA_FLAG_PMP);
-
- link = kzalloc(sizeof(*link), GFP_KERNEL);
- if (!link)
- return -ENOMEM;
-
- ata_link_init(ap, link, 1);
- ap->slave_link = link;
- return 0;
-}
+EXPORT_SYMBOL_GPL(ata_host_alloc_pinfo);
static void ata_host_stop(struct device *gendev, void *res)
{
@@ -6436,6 +5615,7 @@ int ata_host_start(struct ata_host *host)
devres_free(start_dr);
return rc;
}
+EXPORT_SYMBOL_GPL(ata_host_start);
/**
* ata_sas_host_init - Initialize a host struct for sas (ipr, libsas)
@@ -6454,6 +5634,7 @@ void ata_host_init(struct ata_host *host, struct device *dev,
host->ops = ops;
kref_init(&host->kref);
}
+EXPORT_SYMBOL_GPL(ata_host_init);
void __ata_port_probe(struct ata_port *ap)
{
@@ -6609,6 +5790,7 @@ int ata_host_register(struct ata_host *host, struct scsi_host_template *sht)
return rc;
}
+EXPORT_SYMBOL_GPL(ata_host_register);
/**
* ata_host_activate - start host, request IRQ and register it
@@ -6671,6 +5853,7 @@ int ata_host_activate(struct ata_host *host, int irq,
return rc;
}
+EXPORT_SYMBOL_GPL(ata_host_activate);
/**
* ata_port_detach - Detach ATA port in preparation of device removal
@@ -6746,6 +5929,7 @@ void ata_host_detach(struct ata_host *host)
/* the host is dead now, dissociate ACPI */
ata_acpi_dissociate(host);
}
+EXPORT_SYMBOL_GPL(ata_host_detach);
#ifdef CONFIG_PCI
@@ -6766,6 +5950,7 @@ void ata_pci_remove_one(struct pci_dev *pdev)
ata_host_detach(host);
}
+EXPORT_SYMBOL_GPL(ata_pci_remove_one);
void ata_pci_shutdown_one(struct pci_dev *pdev)
{
@@ -6786,6 +5971,7 @@ void ata_pci_shutdown_one(struct pci_dev *pdev)
ap->ops->port_stop(ap);
}
}
+EXPORT_SYMBOL_GPL(ata_pci_shutdown_one);
/* move to PCI subsystem */
int pci_test_config_bits(struct pci_dev *pdev, const struct pci_bits *bits)
@@ -6820,6 +6006,7 @@ int pci_test_config_bits(struct pci_dev *pdev, const struct pci_bits *bits)
return (tmp == bits->val) ? 1 : 0;
}
+EXPORT_SYMBOL_GPL(pci_test_config_bits);
#ifdef CONFIG_PM
void ata_pci_device_do_suspend(struct pci_dev *pdev, pm_message_t mesg)
@@ -6830,6 +6017,7 @@ void ata_pci_device_do_suspend(struct pci_dev *pdev, pm_message_t mesg)
if (mesg.event & PM_EVENT_SLEEP)
pci_set_power_state(pdev, PCI_D3hot);
}
+EXPORT_SYMBOL_GPL(ata_pci_device_do_suspend);
int ata_pci_device_do_resume(struct pci_dev *pdev)
{
@@ -6848,6 +6036,7 @@ int ata_pci_device_do_resume(struct pci_dev *pdev)
pci_set_master(pdev);
return 0;
}
+EXPORT_SYMBOL_GPL(ata_pci_device_do_resume);
int ata_pci_device_suspend(struct pci_dev *pdev, pm_message_t mesg)
{
@@ -6862,6 +6051,7 @@ int ata_pci_device_suspend(struct pci_dev *pdev, pm_message_t mesg)
return 0;
}
+EXPORT_SYMBOL_GPL(ata_pci_device_suspend);
int ata_pci_device_resume(struct pci_dev *pdev)
{
@@ -6873,8 +6063,8 @@ int ata_pci_device_resume(struct pci_dev *pdev)
ata_host_resume(host);
return rc;
}
+EXPORT_SYMBOL_GPL(ata_pci_device_resume);
#endif /* CONFIG_PM */
-
#endif /* CONFIG_PCI */
/**
@@ -6896,7 +6086,9 @@ int ata_platform_remove_one(struct platform_device *pdev)
return 0;
}
+EXPORT_SYMBOL_GPL(ata_platform_remove_one);
+#ifdef CONFIG_ATA_FORCE
static int __init ata_parse_force_one(char **cur,
struct ata_force_ent *force_ent,
const char **reason)
@@ -7076,6 +6268,15 @@ static void __init ata_parse_force_param(void)
ata_force_tbl_size = idx;
}
+static void ata_free_force_param(void)
+{
+ kfree(ata_force_tbl);
+}
+#else
+static inline void ata_parse_force_param(void) { }
+static inline void ata_free_force_param(void) { }
+#endif
+
static int __init ata_init(void)
{
int rc;
@@ -7084,7 +6285,7 @@ static int __init ata_init(void)
rc = ata_sff_init();
if (rc) {
- kfree(ata_force_tbl);
+ ata_free_force_param();
return rc;
}
@@ -7108,7 +6309,7 @@ static void __exit ata_exit(void)
ata_release_transport(ata_scsi_transport_template);
libata_transport_exit();
ata_sff_exit();
- kfree(ata_force_tbl);
+ ata_free_force_param();
}
subsys_initcall(ata_init);
@@ -7120,6 +6321,7 @@ int ata_ratelimit(void)
{
return __ratelimit(&ratelimit);
}
+EXPORT_SYMBOL_GPL(ata_ratelimit);
/**
* ata_msleep - ATA EH owner aware msleep
@@ -7152,6 +6354,7 @@ void ata_msleep(struct ata_port *ap, unsigned int msecs)
if (owns_eh)
ata_eh_acquire(ap);
}
+EXPORT_SYMBOL_GPL(ata_msleep);
/**
* ata_wait_register - wait until register value changes
@@ -7198,38 +6401,7 @@ u32 ata_wait_register(struct ata_port *ap, void __iomem *reg, u32 mask, u32 val,
return tmp;
}
-
-/**
- * sata_lpm_ignore_phy_events - test if PHY event should be ignored
- * @link: Link receiving the event
- *
- * Test whether the received PHY event has to be ignored or not.
- *
- * LOCKING:
- * None:
- *
- * RETURNS:
- * True if the event has to be ignored.
- */
-bool sata_lpm_ignore_phy_events(struct ata_link *link)
-{
- unsigned long lpm_timeout = link->last_lpm_change +
- msecs_to_jiffies(ATA_TMOUT_SPURIOUS_PHY);
-
- /* if LPM is enabled, PHYRDY doesn't mean anything */
- if (link->lpm_policy > ATA_LPM_MAX_POWER)
- return true;
-
- /* ignore the first PHY event after the LPM policy changed
- * as it is might be spurious
- */
- if ((link->flags & ATA_LFLAG_CHANGED) &&
- time_before(jiffies, lpm_timeout))
- return true;
-
- return false;
-}
-EXPORT_SYMBOL_GPL(sata_lpm_ignore_phy_events);
+EXPORT_SYMBOL_GPL(ata_wait_register);
/*
* Dummy port_ops
@@ -7251,10 +6423,12 @@ struct ata_port_operations ata_dummy_port_ops = {
.sched_eh = ata_std_sched_eh,
.end_eh = ata_std_end_eh,
};
+EXPORT_SYMBOL_GPL(ata_dummy_port_ops);
const struct ata_port_info ata_dummy_port_info = {
.port_ops = &ata_dummy_port_ops,
};
+EXPORT_SYMBOL_GPL(ata_dummy_port_info);
/*
* Utility print functions
@@ -7322,127 +6496,3 @@ void ata_print_version(const struct device *dev, const char *version)
dev_printk(KERN_DEBUG, dev, "version %s\n", version);
}
EXPORT_SYMBOL(ata_print_version);
-
-/*
- * libata is essentially a library of internal helper functions for
- * low-level ATA host controller drivers. As such, the API/ABI is
- * likely to change as new drivers are added and updated.
- * Do not depend on ABI/API stability.
- */
-EXPORT_SYMBOL_GPL(sata_deb_timing_normal);
-EXPORT_SYMBOL_GPL(sata_deb_timing_hotplug);
-EXPORT_SYMBOL_GPL(sata_deb_timing_long);
-EXPORT_SYMBOL_GPL(ata_base_port_ops);
-EXPORT_SYMBOL_GPL(sata_port_ops);
-EXPORT_SYMBOL_GPL(ata_dummy_port_ops);
-EXPORT_SYMBOL_GPL(ata_dummy_port_info);
-EXPORT_SYMBOL_GPL(ata_link_next);
-EXPORT_SYMBOL_GPL(ata_dev_next);
-EXPORT_SYMBOL_GPL(ata_std_bios_param);
-EXPORT_SYMBOL_GPL(ata_scsi_unlock_native_capacity);
-EXPORT_SYMBOL_GPL(ata_host_init);
-EXPORT_SYMBOL_GPL(ata_host_alloc);
-EXPORT_SYMBOL_GPL(ata_host_alloc_pinfo);
-EXPORT_SYMBOL_GPL(ata_slave_link_init);
-EXPORT_SYMBOL_GPL(ata_host_start);
-EXPORT_SYMBOL_GPL(ata_host_register);
-EXPORT_SYMBOL_GPL(ata_host_activate);
-EXPORT_SYMBOL_GPL(ata_host_detach);
-EXPORT_SYMBOL_GPL(ata_sg_init);
-EXPORT_SYMBOL_GPL(ata_qc_complete);
-EXPORT_SYMBOL_GPL(ata_qc_complete_multiple);
-EXPORT_SYMBOL_GPL(atapi_cmd_type);
-EXPORT_SYMBOL_GPL(ata_tf_to_fis);
-EXPORT_SYMBOL_GPL(ata_tf_from_fis);
-EXPORT_SYMBOL_GPL(ata_pack_xfermask);
-EXPORT_SYMBOL_GPL(ata_unpack_xfermask);
-EXPORT_SYMBOL_GPL(ata_xfer_mask2mode);
-EXPORT_SYMBOL_GPL(ata_xfer_mode2mask);
-EXPORT_SYMBOL_GPL(ata_xfer_mode2shift);
-EXPORT_SYMBOL_GPL(ata_mode_string);
-EXPORT_SYMBOL_GPL(ata_id_xfermask);
-EXPORT_SYMBOL_GPL(ata_do_set_mode);
-EXPORT_SYMBOL_GPL(ata_std_qc_defer);
-EXPORT_SYMBOL_GPL(ata_noop_qc_prep);
-EXPORT_SYMBOL_GPL(ata_dev_disable);
-EXPORT_SYMBOL_GPL(sata_set_spd);
-EXPORT_SYMBOL_GPL(ata_wait_after_reset);
-EXPORT_SYMBOL_GPL(sata_link_debounce);
-EXPORT_SYMBOL_GPL(sata_link_resume);
-EXPORT_SYMBOL_GPL(sata_link_scr_lpm);
-EXPORT_SYMBOL_GPL(ata_std_prereset);
-EXPORT_SYMBOL_GPL(sata_link_hardreset);
-EXPORT_SYMBOL_GPL(sata_std_hardreset);
-EXPORT_SYMBOL_GPL(ata_std_postreset);
-EXPORT_SYMBOL_GPL(ata_dev_classify);
-EXPORT_SYMBOL_GPL(ata_dev_pair);
-EXPORT_SYMBOL_GPL(ata_ratelimit);
-EXPORT_SYMBOL_GPL(ata_msleep);
-EXPORT_SYMBOL_GPL(ata_wait_register);
-EXPORT_SYMBOL_GPL(ata_scsi_queuecmd);
-EXPORT_SYMBOL_GPL(ata_scsi_slave_config);
-EXPORT_SYMBOL_GPL(ata_scsi_slave_destroy);
-EXPORT_SYMBOL_GPL(ata_scsi_change_queue_depth);
-EXPORT_SYMBOL_GPL(__ata_change_queue_depth);
-EXPORT_SYMBOL_GPL(sata_scr_valid);
-EXPORT_SYMBOL_GPL(sata_scr_read);
-EXPORT_SYMBOL_GPL(sata_scr_write);
-EXPORT_SYMBOL_GPL(sata_scr_write_flush);
-EXPORT_SYMBOL_GPL(ata_link_online);
-EXPORT_SYMBOL_GPL(ata_link_offline);
-#ifdef CONFIG_PM
-EXPORT_SYMBOL_GPL(ata_host_suspend);
-EXPORT_SYMBOL_GPL(ata_host_resume);
-#endif /* CONFIG_PM */
-EXPORT_SYMBOL_GPL(ata_id_string);
-EXPORT_SYMBOL_GPL(ata_id_c_string);
-EXPORT_SYMBOL_GPL(ata_do_dev_read_id);
-EXPORT_SYMBOL_GPL(ata_scsi_simulate);
-
-EXPORT_SYMBOL_GPL(ata_pio_need_iordy);
-EXPORT_SYMBOL_GPL(ata_timing_find_mode);
-EXPORT_SYMBOL_GPL(ata_timing_compute);
-EXPORT_SYMBOL_GPL(ata_timing_merge);
-EXPORT_SYMBOL_GPL(ata_timing_cycle2mode);
-
-#ifdef CONFIG_PCI
-EXPORT_SYMBOL_GPL(pci_test_config_bits);
-EXPORT_SYMBOL_GPL(ata_pci_shutdown_one);
-EXPORT_SYMBOL_GPL(ata_pci_remove_one);
-#ifdef CONFIG_PM
-EXPORT_SYMBOL_GPL(ata_pci_device_do_suspend);
-EXPORT_SYMBOL_GPL(ata_pci_device_do_resume);
-EXPORT_SYMBOL_GPL(ata_pci_device_suspend);
-EXPORT_SYMBOL_GPL(ata_pci_device_resume);
-#endif /* CONFIG_PM */
-#endif /* CONFIG_PCI */
-
-EXPORT_SYMBOL_GPL(ata_platform_remove_one);
-
-EXPORT_SYMBOL_GPL(__ata_ehi_push_desc);
-EXPORT_SYMBOL_GPL(ata_ehi_push_desc);
-EXPORT_SYMBOL_GPL(ata_ehi_clear_desc);
-EXPORT_SYMBOL_GPL(ata_port_desc);
-#ifdef CONFIG_PCI
-EXPORT_SYMBOL_GPL(ata_port_pbar_desc);
-#endif /* CONFIG_PCI */
-EXPORT_SYMBOL_GPL(ata_port_schedule_eh);
-EXPORT_SYMBOL_GPL(ata_link_abort);
-EXPORT_SYMBOL_GPL(ata_port_abort);
-EXPORT_SYMBOL_GPL(ata_port_freeze);
-EXPORT_SYMBOL_GPL(sata_async_notification);
-EXPORT_SYMBOL_GPL(ata_eh_freeze_port);
-EXPORT_SYMBOL_GPL(ata_eh_thaw_port);
-EXPORT_SYMBOL_GPL(ata_eh_qc_complete);
-EXPORT_SYMBOL_GPL(ata_eh_qc_retry);
-EXPORT_SYMBOL_GPL(ata_eh_analyze_ncq_error);
-EXPORT_SYMBOL_GPL(ata_do_eh);
-EXPORT_SYMBOL_GPL(ata_std_error_handler);
-
-EXPORT_SYMBOL_GPL(ata_cable_40wire);
-EXPORT_SYMBOL_GPL(ata_cable_80wire);
-EXPORT_SYMBOL_GPL(ata_cable_unknown);
-EXPORT_SYMBOL_GPL(ata_cable_ignore);
-EXPORT_SYMBOL_GPL(ata_cable_sata);
-EXPORT_SYMBOL_GPL(ata_host_get);
-EXPORT_SYMBOL_GPL(ata_host_put);
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index 3bfd9da58473..474c6c34fe02 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -2,10 +2,6 @@
/*
* libata-eh.c - libata error handling
*
- * Maintained by: Tejun Heo <tj@kernel.org>
- * Please ALWAYS copy linux-ide@vger.kernel.org
- * on emails.
- *
* Copyright 2006 Tejun Heo <htejun@gmail.com>
*
* libata documentation is available via 'make {ps|pdf}docs',
@@ -184,6 +180,7 @@ void __ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...)
__ata_ehi_pushv_desc(ehi, fmt, args);
va_end(args);
}
+EXPORT_SYMBOL_GPL(__ata_ehi_push_desc);
/**
* ata_ehi_push_desc - push error description with separator
@@ -207,6 +204,7 @@ void ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...)
__ata_ehi_pushv_desc(ehi, fmt, args);
va_end(args);
}
+EXPORT_SYMBOL_GPL(ata_ehi_push_desc);
/**
* ata_ehi_clear_desc - clean error description
@@ -222,6 +220,7 @@ void ata_ehi_clear_desc(struct ata_eh_info *ehi)
ehi->desc[0] = '\0';
ehi->desc_len = 0;
}
+EXPORT_SYMBOL_GPL(ata_ehi_clear_desc);
/**
* ata_port_desc - append port description
@@ -249,9 +248,9 @@ void ata_port_desc(struct ata_port *ap, const char *fmt, ...)
__ata_ehi_pushv_desc(&ap->link.eh_info, fmt, args);
va_end(args);
}
+EXPORT_SYMBOL_GPL(ata_port_desc);
#ifdef CONFIG_PCI
-
/**
* ata_port_pbar_desc - append PCI BAR description
* @ap: target ATA port
@@ -288,7 +287,7 @@ void ata_port_pbar_desc(struct ata_port *ap, int bar, ssize_t offset,
ata_port_desc(ap, "%s 0x%llx", name,
start + (unsigned long long)offset);
}
-
+EXPORT_SYMBOL_GPL(ata_port_pbar_desc);
#endif /* CONFIG_PCI */
static int ata_lookup_timeout_table(u8 cmd)
@@ -973,6 +972,7 @@ void ata_port_schedule_eh(struct ata_port *ap)
/* see: ata_std_sched_eh, unless you know better */
ap->ops->sched_eh(ap);
}
+EXPORT_SYMBOL_GPL(ata_port_schedule_eh);
static int ata_do_link_abort(struct ata_port *ap, struct ata_link *link)
{
@@ -1015,6 +1015,7 @@ int ata_link_abort(struct ata_link *link)
{
return ata_do_link_abort(link->ap, link);
}
+EXPORT_SYMBOL_GPL(ata_link_abort);
/**
* ata_port_abort - abort all qc's on the port
@@ -1032,6 +1033,7 @@ int ata_port_abort(struct ata_port *ap)
{
return ata_do_link_abort(ap, NULL);
}
+EXPORT_SYMBOL_GPL(ata_port_abort);
/**
* __ata_port_freeze - freeze port
@@ -1088,79 +1090,7 @@ int ata_port_freeze(struct ata_port *ap)
return nr_aborted;
}
-
-/**
- * sata_async_notification - SATA async notification handler
- * @ap: ATA port where async notification is received
- *
- * Handler to be called when async notification via SDB FIS is
- * received. This function schedules EH if necessary.
- *
- * LOCKING:
- * spin_lock_irqsave(host lock)
- *
- * RETURNS:
- * 1 if EH is scheduled, 0 otherwise.
- */
-int sata_async_notification(struct ata_port *ap)
-{
- u32 sntf;
- int rc;
-
- if (!(ap->flags & ATA_FLAG_AN))
- return 0;
-
- rc = sata_scr_read(&ap->link, SCR_NOTIFICATION, &sntf);
- if (rc == 0)
- sata_scr_write(&ap->link, SCR_NOTIFICATION, sntf);
-
- if (!sata_pmp_attached(ap) || rc) {
- /* PMP is not attached or SNTF is not available */
- if (!sata_pmp_attached(ap)) {
- /* PMP is not attached. Check whether ATAPI
- * AN is configured. If so, notify media
- * change.
- */
- struct ata_device *dev = ap->link.device;
-
- if ((dev->class == ATA_DEV_ATAPI) &&
- (dev->flags & ATA_DFLAG_AN))
- ata_scsi_media_change_notify(dev);
- return 0;
- } else {
- /* PMP is attached but SNTF is not available.
- * ATAPI async media change notification is
- * not used. The PMP must be reporting PHY
- * status change, schedule EH.
- */
- ata_port_schedule_eh(ap);
- return 1;
- }
- } else {
- /* PMP is attached and SNTF is available */
- struct ata_link *link;
-
- /* check and notify ATAPI AN */
- ata_for_each_link(link, ap, EDGE) {
- if (!(sntf & (1 << link->pmp)))
- continue;
-
- if ((link->device->class == ATA_DEV_ATAPI) &&
- (link->device->flags & ATA_DFLAG_AN))
- ata_scsi_media_change_notify(link->device);
- }
-
- /* If PMP is reporting that PHY status of some
- * downstream ports has changed, schedule EH.
- */
- if (sntf & (1 << SATA_PMP_CTRL_PORT)) {
- ata_port_schedule_eh(ap);
- return 1;
- }
-
- return 0;
- }
-}
+EXPORT_SYMBOL_GPL(ata_port_freeze);
/**
* ata_eh_freeze_port - EH helper to freeze port
@@ -1182,6 +1112,7 @@ void ata_eh_freeze_port(struct ata_port *ap)
__ata_port_freeze(ap);
spin_unlock_irqrestore(ap->lock, flags);
}
+EXPORT_SYMBOL_GPL(ata_eh_freeze_port);
/**
* ata_port_thaw_port - EH helper to thaw port
@@ -1289,6 +1220,7 @@ void ata_dev_disable(struct ata_device *dev)
*/
ata_ering_clear(&dev->ering);
}
+EXPORT_SYMBOL_GPL(ata_dev_disable);
/**
* ata_eh_detach_dev - detach ATA device
@@ -1420,62 +1352,6 @@ static const char *ata_err_string(unsigned int err_mask)
}
/**
- * ata_eh_read_log_10h - Read log page 10h for NCQ error details
- * @dev: Device to read log page 10h from
- * @tag: Resulting tag of the failed command
- * @tf: Resulting taskfile registers of the failed command
- *
- * Read log page 10h to obtain NCQ error details and clear error
- * condition.
- *
- * LOCKING:
- * Kernel thread context (may sleep).
- *
- * RETURNS:
- * 0 on success, -errno otherwise.
- */
-static int ata_eh_read_log_10h(struct ata_device *dev,
- int *tag, struct ata_taskfile *tf)
-{
- u8 *buf = dev->link->ap->sector_buf;
- unsigned int err_mask;
- u8 csum;
- int i;
-
- err_mask = ata_read_log_page(dev, ATA_LOG_SATA_NCQ, 0, buf, 1);
- if (err_mask)
- return -EIO;
-
- csum = 0;
- for (i = 0; i < ATA_SECT_SIZE; i++)
- csum += buf[i];
- if (csum)
- ata_dev_warn(dev, "invalid checksum 0x%x on log page 10h\n",
- csum);
-
- if (buf[0] & 0x80)
- return -ENOENT;
-
- *tag = buf[0] & 0x1f;
-
- tf->command = buf[2];
- tf->feature = buf[3];
- tf->lbal = buf[4];
- tf->lbam = buf[5];
- tf->lbah = buf[6];
- tf->device = buf[7];
- tf->hob_lbal = buf[8];
- tf->hob_lbam = buf[9];
- tf->hob_lbah = buf[10];
- tf->nsect = buf[12];
- tf->hob_nsect = buf[13];
- if (dev->class == ATA_DEV_ZAC && ata_id_has_ncq_autosense(dev->id))
- tf->auxiliary = buf[14] << 16 | buf[15] << 8 | buf[16];
-
- return 0;
-}
-
-/**
* atapi_eh_tur - perform ATAPI TEST_UNIT_READY
* @dev: target ATAPI device
* @r_sense_key: out parameter for sense_key
@@ -1659,80 +1535,6 @@ static void ata_eh_analyze_serror(struct ata_link *link)
}
/**
- * ata_eh_analyze_ncq_error - analyze NCQ error
- * @link: ATA link to analyze NCQ error for
- *
- * Read log page 10h, determine the offending qc and acquire
- * error status TF. For NCQ device errors, all LLDDs have to do
- * is setting AC_ERR_DEV in ehi->err_mask. This function takes
- * care of the rest.
- *
- * LOCKING:
- * Kernel thread context (may sleep).
- */
-void ata_eh_analyze_ncq_error(struct ata_link *link)
-{
- struct ata_port *ap = link->ap;
- struct ata_eh_context *ehc = &link->eh_context;
- struct ata_device *dev = link->device;
- struct ata_queued_cmd *qc;
- struct ata_taskfile tf;
- int tag, rc;
-
- /* if frozen, we can't do much */
- if (ap->pflags & ATA_PFLAG_FROZEN)
- return;
-
- /* is it NCQ device error? */
- if (!link->sactive || !(ehc->i.err_mask & AC_ERR_DEV))
- return;
-
- /* has LLDD analyzed already? */
- ata_qc_for_each_raw(ap, qc, tag) {
- if (!(qc->flags & ATA_QCFLAG_FAILED))
- continue;
-
- if (qc->err_mask)
- return;
- }
-
- /* okay, this error is ours */
- memset(&tf, 0, sizeof(tf));
- rc = ata_eh_read_log_10h(dev, &tag, &tf);
- if (rc) {
- ata_link_err(link, "failed to read log page 10h (errno=%d)\n",
- rc);
- return;
- }
-
- if (!(link->sactive & (1 << tag))) {
- ata_link_err(link, "log page 10h reported inactive tag %d\n",
- tag);
- return;
- }
-
- /* we've got the perpetrator, condemn it */
- qc = __ata_qc_from_tag(ap, tag);
- memcpy(&qc->result_tf, &tf, sizeof(tf));
- qc->result_tf.flags = ATA_TFLAG_ISADDR | ATA_TFLAG_LBA | ATA_TFLAG_LBA48;
- qc->err_mask |= AC_ERR_DEV | AC_ERR_NCQ;
- if (dev->class == ATA_DEV_ZAC &&
- ((qc->result_tf.command & ATA_SENSE) || qc->result_tf.auxiliary)) {
- char sense_key, asc, ascq;
-
- sense_key = (qc->result_tf.auxiliary >> 16) & 0xff;
- asc = (qc->result_tf.auxiliary >> 8) & 0xff;
- ascq = qc->result_tf.auxiliary & 0xff;
- ata_scsi_set_sense(dev, qc->scsicmd, sense_key, asc, ascq);
- ata_scsi_set_sense_information(dev, qc->scsicmd,
- &qc->result_tf);
- qc->flags |= ATA_QCFLAG_SENSE_VALID;
- }
-
- ehc->i.err_mask &= ~AC_ERR_DEV;
-}
-
-/**
* ata_eh_analyze_tf - analyze taskfile of a failed qc
* @qc: qc to analyze
* @tf: Taskfile registers to analyze
@@ -3436,7 +3238,8 @@ static int ata_eh_set_lpm(struct ata_link *link, enum ata_lpm_policy policy,
int rc;
/* if the link or host doesn't do LPM, noop */
- if ((link->flags & ATA_LFLAG_NO_LPM) || (ap && !ap->ops->set_lpm))
+ if (!IS_ENABLED(CONFIG_SATA_HOST) ||
+ (link->flags & ATA_LFLAG_NO_LPM) || (ap && !ap->ops->set_lpm))
return 0;
/*
@@ -4052,6 +3855,7 @@ void ata_std_error_handler(struct ata_port *ap)
ata_do_eh(ap, ops->prereset, ops->softreset, hardreset, ops->postreset);
}
+EXPORT_SYMBOL_GPL(ata_std_error_handler);
#ifdef CONFIG_PM
/**
diff --git a/drivers/ata/libata-pata-timings.c b/drivers/ata/libata-pata-timings.c
new file mode 100644
index 000000000000..af341226cc64
--- /dev/null
+++ b/drivers/ata/libata-pata-timings.c
@@ -0,0 +1,192 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Helper library for PATA timings
+ *
+ * Copyright 2003-2004 Red Hat, Inc. All rights reserved.
+ * Copyright 2003-2004 Jeff Garzik
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/libata.h>
+
+/*
+ * This mode timing computation functionality is ported over from
+ * drivers/ide/ide-timing.h and was originally written by Vojtech Pavlik
+ */
+/*
+ * PIO 0-4, MWDMA 0-2 and UDMA 0-6 timings (in nanoseconds).
+ * These were taken from ATA/ATAPI-6 standard, rev 0a, except
+ * for UDMA6, which is currently supported only by Maxtor drives.
+ *
+ * For PIO 5/6 MWDMA 3/4 see the CFA specification 3.0.
+ */
+
+static const struct ata_timing ata_timing[] = {
+/* { XFER_PIO_SLOW, 120, 290, 240, 960, 290, 240, 0, 960, 0 }, */
+ { XFER_PIO_0, 70, 290, 240, 600, 165, 150, 0, 600, 0 },
+ { XFER_PIO_1, 50, 290, 93, 383, 125, 100, 0, 383, 0 },
+ { XFER_PIO_2, 30, 290, 40, 330, 100, 90, 0, 240, 0 },
+ { XFER_PIO_3, 30, 80, 70, 180, 80, 70, 0, 180, 0 },
+ { XFER_PIO_4, 25, 70, 25, 120, 70, 25, 0, 120, 0 },
+ { XFER_PIO_5, 15, 65, 25, 100, 65, 25, 0, 100, 0 },
+ { XFER_PIO_6, 10, 55, 20, 80, 55, 20, 0, 80, 0 },
+
+ { XFER_SW_DMA_0, 120, 0, 0, 0, 480, 480, 50, 960, 0 },
+ { XFER_SW_DMA_1, 90, 0, 0, 0, 240, 240, 30, 480, 0 },
+ { XFER_SW_DMA_2, 60, 0, 0, 0, 120, 120, 20, 240, 0 },
+
+ { XFER_MW_DMA_0, 60, 0, 0, 0, 215, 215, 20, 480, 0 },
+ { XFER_MW_DMA_1, 45, 0, 0, 0, 80, 50, 5, 150, 0 },
+ { XFER_MW_DMA_2, 25, 0, 0, 0, 70, 25, 5, 120, 0 },
+ { XFER_MW_DMA_3, 25, 0, 0, 0, 65, 25, 5, 100, 0 },
+ { XFER_MW_DMA_4, 25, 0, 0, 0, 55, 20, 5, 80, 0 },
+
+/* { XFER_UDMA_SLOW, 0, 0, 0, 0, 0, 0, 0, 0, 150 }, */
+ { XFER_UDMA_0, 0, 0, 0, 0, 0, 0, 0, 0, 120 },
+ { XFER_UDMA_1, 0, 0, 0, 0, 0, 0, 0, 0, 80 },
+ { XFER_UDMA_2, 0, 0, 0, 0, 0, 0, 0, 0, 60 },
+ { XFER_UDMA_3, 0, 0, 0, 0, 0, 0, 0, 0, 45 },
+ { XFER_UDMA_4, 0, 0, 0, 0, 0, 0, 0, 0, 30 },
+ { XFER_UDMA_5, 0, 0, 0, 0, 0, 0, 0, 0, 20 },
+ { XFER_UDMA_6, 0, 0, 0, 0, 0, 0, 0, 0, 15 },
+
+ { 0xFF }
+};
+
+#define ENOUGH(v, unit) (((v)-1)/(unit)+1)
+#define EZ(v, unit) ((v)?ENOUGH(((v) * 1000), unit):0)
+
+static void ata_timing_quantize(const struct ata_timing *t,
+ struct ata_timing *q, int T, int UT)
+{
+ q->setup = EZ(t->setup, T);
+ q->act8b = EZ(t->act8b, T);
+ q->rec8b = EZ(t->rec8b, T);
+ q->cyc8b = EZ(t->cyc8b, T);
+ q->active = EZ(t->active, T);
+ q->recover = EZ(t->recover, T);
+ q->dmack_hold = EZ(t->dmack_hold, T);
+ q->cycle = EZ(t->cycle, T);
+ q->udma = EZ(t->udma, UT);
+}
+
+void ata_timing_merge(const struct ata_timing *a, const struct ata_timing *b,
+ struct ata_timing *m, unsigned int what)
+{
+ if (what & ATA_TIMING_SETUP)
+ m->setup = max(a->setup, b->setup);
+ if (what & ATA_TIMING_ACT8B)
+ m->act8b = max(a->act8b, b->act8b);
+ if (what & ATA_TIMING_REC8B)
+ m->rec8b = max(a->rec8b, b->rec8b);
+ if (what & ATA_TIMING_CYC8B)
+ m->cyc8b = max(a->cyc8b, b->cyc8b);
+ if (what & ATA_TIMING_ACTIVE)
+ m->active = max(a->active, b->active);
+ if (what & ATA_TIMING_RECOVER)
+ m->recover = max(a->recover, b->recover);
+ if (what & ATA_TIMING_DMACK_HOLD)
+ m->dmack_hold = max(a->dmack_hold, b->dmack_hold);
+ if (what & ATA_TIMING_CYCLE)
+ m->cycle = max(a->cycle, b->cycle);
+ if (what & ATA_TIMING_UDMA)
+ m->udma = max(a->udma, b->udma);
+}
+EXPORT_SYMBOL_GPL(ata_timing_merge);
+
+const struct ata_timing *ata_timing_find_mode(u8 xfer_mode)
+{
+ const struct ata_timing *t = ata_timing;
+
+ while (xfer_mode > t->mode)
+ t++;
+
+ if (xfer_mode == t->mode)
+ return t;
+
+ WARN_ONCE(true, "%s: unable to find timing for xfer_mode 0x%x\n",
+ __func__, xfer_mode);
+
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(ata_timing_find_mode);
+
+int ata_timing_compute(struct ata_device *adev, unsigned short speed,
+ struct ata_timing *t, int T, int UT)
+{
+ const u16 *id = adev->id;
+ const struct ata_timing *s;
+ struct ata_timing p;
+
+ /*
+ * Find the mode.
+ */
+ s = ata_timing_find_mode(speed);
+ if (!s)
+ return -EINVAL;
+
+ memcpy(t, s, sizeof(*s));
+
+ /*
+ * If the drive is an EIDE drive, it can tell us it needs extended
+ * PIO/MW_DMA cycle timing.
+ */
+
+ if (id[ATA_ID_FIELD_VALID] & 2) { /* EIDE drive */
+ memset(&p, 0, sizeof(p));
+
+ if (speed >= XFER_PIO_0 && speed < XFER_SW_DMA_0) {
+ if (speed <= XFER_PIO_2)
+ p.cycle = p.cyc8b = id[ATA_ID_EIDE_PIO];
+ else if ((speed <= XFER_PIO_4) ||
+ (speed == XFER_PIO_5 && !ata_id_is_cfa(id)))
+ p.cycle = p.cyc8b = id[ATA_ID_EIDE_PIO_IORDY];
+ } else if (speed >= XFER_MW_DMA_0 && speed <= XFER_MW_DMA_2)
+ p.cycle = id[ATA_ID_EIDE_DMA_MIN];
+
+ ata_timing_merge(&p, t, t, ATA_TIMING_CYCLE | ATA_TIMING_CYC8B);
+ }
+
+ /*
+ * Convert the timing to bus clock counts.
+ */
+
+ ata_timing_quantize(t, t, T, UT);
+
+ /*
+ * Even in DMA/UDMA modes we still use PIO access for IDENTIFY,
+ * S.M.A.R.T * and some other commands. We have to ensure that the
+ * DMA cycle timing is slower/equal than the fastest PIO timing.
+ */
+
+ if (speed > XFER_PIO_6) {
+ ata_timing_compute(adev, adev->pio_mode, &p, T, UT);
+ ata_timing_merge(&p, t, t, ATA_TIMING_ALL);
+ }
+
+ /*
+ * Lengthen active & recovery time so that cycle time is correct.
+ */
+
+ if (t->act8b + t->rec8b < t->cyc8b) {
+ t->act8b += (t->cyc8b - (t->act8b + t->rec8b)) / 2;
+ t->rec8b = t->cyc8b - t->act8b;
+ }
+
+ if (t->active + t->recover < t->cycle) {
+ t->active += (t->cycle - (t->active + t->recover)) / 2;
+ t->recover = t->cycle - t->active;
+ }
+
+ /*
+ * In a few cases quantisation may produce enough errors to
+ * leave t->cycle too low for the sum of active and recovery
+ * if so we must correct this.
+ */
+ if (t->active + t->recover > t->cycle)
+ t->cycle = t->active + t->recover;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ata_timing_compute);
diff --git a/drivers/ata/libata-sata.c b/drivers/ata/libata-sata.c
new file mode 100644
index 000000000000..c16423e44525
--- /dev/null
+++ b/drivers/ata/libata-sata.c
@@ -0,0 +1,1483 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * SATA specific part of ATA helper library
+ *
+ * Copyright 2003-2004 Red Hat, Inc. All rights reserved.
+ * Copyright 2003-2004 Jeff Garzik
+ * Copyright 2006 Tejun Heo <htejun@gmail.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_device.h>
+#include <linux/libata.h>
+
+#include "libata.h"
+#include "libata-transport.h"
+
+/* debounce timing parameters in msecs { interval, duration, timeout } */
+const unsigned long sata_deb_timing_normal[] = { 5, 100, 2000 };
+EXPORT_SYMBOL_GPL(sata_deb_timing_normal);
+const unsigned long sata_deb_timing_hotplug[] = { 25, 500, 2000 };
+EXPORT_SYMBOL_GPL(sata_deb_timing_hotplug);
+const unsigned long sata_deb_timing_long[] = { 100, 2000, 5000 };
+EXPORT_SYMBOL_GPL(sata_deb_timing_long);
+
+/**
+ * sata_scr_valid - test whether SCRs are accessible
+ * @link: ATA link to test SCR accessibility for
+ *
+ * Test whether SCRs are accessible for @link.
+ *
+ * LOCKING:
+ * None.
+ *
+ * RETURNS:
+ * 1 if SCRs are accessible, 0 otherwise.
+ */
+int sata_scr_valid(struct ata_link *link)
+{
+ struct ata_port *ap = link->ap;
+
+ return (ap->flags & ATA_FLAG_SATA) && ap->ops->scr_read;
+}
+EXPORT_SYMBOL_GPL(sata_scr_valid);
+
+/**
+ * sata_scr_read - read SCR register of the specified port
+ * @link: ATA link to read SCR for
+ * @reg: SCR to read
+ * @val: Place to store read value
+ *
+ * Read SCR register @reg of @link into *@val. This function is
+ * guaranteed to succeed if @link is ap->link, the cable type of
+ * the port is SATA and the port implements ->scr_read.
+ *
+ * LOCKING:
+ * None if @link is ap->link. Kernel thread context otherwise.
+ *
+ * RETURNS:
+ * 0 on success, negative errno on failure.
+ */
+int sata_scr_read(struct ata_link *link, int reg, u32 *val)
+{
+ if (ata_is_host_link(link)) {
+ if (sata_scr_valid(link))
+ return link->ap->ops->scr_read(link, reg, val);
+ return -EOPNOTSUPP;
+ }
+
+ return sata_pmp_scr_read(link, reg, val);
+}
+EXPORT_SYMBOL_GPL(sata_scr_read);
+
+/**
+ * sata_scr_write - write SCR register of the specified port
+ * @link: ATA link to write SCR for
+ * @reg: SCR to write
+ * @val: value to write
+ *
+ * Write @val to SCR register @reg of @link. This function is
+ * guaranteed to succeed if @link is ap->link, the cable type of
+ * the port is SATA and the port implements ->scr_read.
+ *
+ * LOCKING:
+ * None if @link is ap->link. Kernel thread context otherwise.
+ *
+ * RETURNS:
+ * 0 on success, negative errno on failure.
+ */
+int sata_scr_write(struct ata_link *link, int reg, u32 val)
+{
+ if (ata_is_host_link(link)) {
+ if (sata_scr_valid(link))
+ return link->ap->ops->scr_write(link, reg, val);
+ return -EOPNOTSUPP;
+ }
+
+ return sata_pmp_scr_write(link, reg, val);
+}
+EXPORT_SYMBOL_GPL(sata_scr_write);
+
+/**
+ * sata_scr_write_flush - write SCR register of the specified port and flush
+ * @link: ATA link to write SCR for
+ * @reg: SCR to write
+ * @val: value to write
+ *
+ * This function is identical to sata_scr_write() except that this
+ * function performs flush after writing to the register.
+ *
+ * LOCKING:
+ * None if @link is ap->link. Kernel thread context otherwise.
+ *
+ * RETURNS:
+ * 0 on success, negative errno on failure.
+ */
+int sata_scr_write_flush(struct ata_link *link, int reg, u32 val)
+{
+ if (ata_is_host_link(link)) {
+ int rc;
+
+ if (sata_scr_valid(link)) {
+ rc = link->ap->ops->scr_write(link, reg, val);
+ if (rc == 0)
+ rc = link->ap->ops->scr_read(link, reg, &val);
+ return rc;
+ }
+ return -EOPNOTSUPP;
+ }
+
+ return sata_pmp_scr_write(link, reg, val);
+}
+EXPORT_SYMBOL_GPL(sata_scr_write_flush);
+
+/**
+ * ata_tf_to_fis - Convert ATA taskfile to SATA FIS structure
+ * @tf: Taskfile to convert
+ * @pmp: Port multiplier port
+ * @is_cmd: This FIS is for command
+ * @fis: Buffer into which data will output
+ *
+ * Converts a standard ATA taskfile to a Serial ATA
+ * FIS structure (Register - Host to Device).
+ *
+ * LOCKING:
+ * Inherited from caller.
+ */
+void ata_tf_to_fis(const struct ata_taskfile *tf, u8 pmp, int is_cmd, u8 *fis)
+{
+ fis[0] = 0x27; /* Register - Host to Device FIS */
+ fis[1] = pmp & 0xf; /* Port multiplier number*/
+ if (is_cmd)
+ fis[1] |= (1 << 7); /* bit 7 indicates Command FIS */
+
+ fis[2] = tf->command;
+ fis[3] = tf->feature;
+
+ fis[4] = tf->lbal;
+ fis[5] = tf->lbam;
+ fis[6] = tf->lbah;
+ fis[7] = tf->device;
+
+ fis[8] = tf->hob_lbal;
+ fis[9] = tf->hob_lbam;
+ fis[10] = tf->hob_lbah;
+ fis[11] = tf->hob_feature;
+
+ fis[12] = tf->nsect;
+ fis[13] = tf->hob_nsect;
+ fis[14] = 0;
+ fis[15] = tf->ctl;
+
+ fis[16] = tf->auxiliary & 0xff;
+ fis[17] = (tf->auxiliary >> 8) & 0xff;
+ fis[18] = (tf->auxiliary >> 16) & 0xff;
+ fis[19] = (tf->auxiliary >> 24) & 0xff;
+}
+EXPORT_SYMBOL_GPL(ata_tf_to_fis);
+
+/**
+ * ata_tf_from_fis - Convert SATA FIS to ATA taskfile
+ * @fis: Buffer from which data will be input
+ * @tf: Taskfile to output
+ *
+ * Converts a serial ATA FIS structure to a standard ATA taskfile.
+ *
+ * LOCKING:
+ * Inherited from caller.
+ */
+
+void ata_tf_from_fis(const u8 *fis, struct ata_taskfile *tf)
+{
+ tf->command = fis[2]; /* status */
+ tf->feature = fis[3]; /* error */
+
+ tf->lbal = fis[4];
+ tf->lbam = fis[5];
+ tf->lbah = fis[6];
+ tf->device = fis[7];
+
+ tf->hob_lbal = fis[8];
+ tf->hob_lbam = fis[9];
+ tf->hob_lbah = fis[10];
+
+ tf->nsect = fis[12];
+ tf->hob_nsect = fis[13];
+}
+EXPORT_SYMBOL_GPL(ata_tf_from_fis);
+
+/**
+ * sata_link_debounce - debounce SATA phy status
+ * @link: ATA link to debounce SATA phy status for
+ * @params: timing parameters { interval, duration, timeout } in msec
+ * @deadline: deadline jiffies for the operation
+ *
+ * Make sure SStatus of @link reaches stable state, determined by
+ * holding the same value where DET is not 1 for @duration polled
+ * every @interval, before @timeout. Timeout constraints the
+ * beginning of the stable state. Because DET gets stuck at 1 on
+ * some controllers after hot unplugging, this functions waits
+ * until timeout then returns 0 if DET is stable at 1.
+ *
+ * @timeout is further limited by @deadline. The sooner of the
+ * two is used.
+ *
+ * LOCKING:
+ * Kernel thread context (may sleep)
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
+ */
+int sata_link_debounce(struct ata_link *link, const unsigned long *params,
+ unsigned long deadline)
+{
+ unsigned long interval = params[0];
+ unsigned long duration = params[1];
+ unsigned long last_jiffies, t;
+ u32 last, cur;
+ int rc;
+
+ t = ata_deadline(jiffies, params[2]);
+ if (time_before(t, deadline))
+ deadline = t;
+
+ if ((rc = sata_scr_read(link, SCR_STATUS, &cur)))
+ return rc;
+ cur &= 0xf;
+
+ last = cur;
+ last_jiffies = jiffies;
+
+ while (1) {
+ ata_msleep(link->ap, interval);
+ if ((rc = sata_scr_read(link, SCR_STATUS, &cur)))
+ return rc;
+ cur &= 0xf;
+
+ /* DET stable? */
+ if (cur == last) {
+ if (cur == 1 && time_before(jiffies, deadline))
+ continue;
+ if (time_after(jiffies,
+ ata_deadline(last_jiffies, duration)))
+ return 0;
+ continue;
+ }
+
+ /* unstable, start over */
+ last = cur;
+ last_jiffies = jiffies;
+
+ /* Check deadline. If debouncing failed, return
+ * -EPIPE to tell upper layer to lower link speed.
+ */
+ if (time_after(jiffies, deadline))
+ return -EPIPE;
+ }
+}
+EXPORT_SYMBOL_GPL(sata_link_debounce);
+
+/**
+ * sata_link_resume - resume SATA link
+ * @link: ATA link to resume SATA
+ * @params: timing parameters { interval, duration, timeout } in msec
+ * @deadline: deadline jiffies for the operation
+ *
+ * Resume SATA phy @link and debounce it.
+ *
+ * LOCKING:
+ * Kernel thread context (may sleep)
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
+ */
+int sata_link_resume(struct ata_link *link, const unsigned long *params,
+ unsigned long deadline)
+{
+ int tries = ATA_LINK_RESUME_TRIES;
+ u32 scontrol, serror;
+ int rc;
+
+ if ((rc = sata_scr_read(link, SCR_CONTROL, &scontrol)))
+ return rc;
+
+ /*
+ * Writes to SControl sometimes get ignored under certain
+ * controllers (ata_piix SIDPR). Make sure DET actually is
+ * cleared.
+ */
+ do {
+ scontrol = (scontrol & 0x0f0) | 0x300;
+ if ((rc = sata_scr_write(link, SCR_CONTROL, scontrol)))
+ return rc;
+ /*
+ * Some PHYs react badly if SStatus is pounded
+ * immediately after resuming. Delay 200ms before
+ * debouncing.
+ */
+ if (!(link->flags & ATA_LFLAG_NO_DB_DELAY))
+ ata_msleep(link->ap, 200);
+
+ /* is SControl restored correctly? */
+ if ((rc = sata_scr_read(link, SCR_CONTROL, &scontrol)))
+ return rc;
+ } while ((scontrol & 0xf0f) != 0x300 && --tries);
+
+ if ((scontrol & 0xf0f) != 0x300) {
+ ata_link_warn(link, "failed to resume link (SControl %X)\n",
+ scontrol);
+ return 0;
+ }
+
+ if (tries < ATA_LINK_RESUME_TRIES)
+ ata_link_warn(link, "link resume succeeded after %d retries\n",
+ ATA_LINK_RESUME_TRIES - tries);
+
+ if ((rc = sata_link_debounce(link, params, deadline)))
+ return rc;
+
+ /* clear SError, some PHYs require this even for SRST to work */
+ if (!(rc = sata_scr_read(link, SCR_ERROR, &serror)))
+ rc = sata_scr_write(link, SCR_ERROR, serror);
+
+ return rc != -EINVAL ? rc : 0;
+}
+EXPORT_SYMBOL_GPL(sata_link_resume);
+
+/**
+ * sata_link_scr_lpm - manipulate SControl IPM and SPM fields
+ * @link: ATA link to manipulate SControl for
+ * @policy: LPM policy to configure
+ * @spm_wakeup: initiate LPM transition to active state
+ *
+ * Manipulate the IPM field of the SControl register of @link
+ * according to @policy. If @policy is ATA_LPM_MAX_POWER and
+ * @spm_wakeup is %true, the SPM field is manipulated to wake up
+ * the link. This function also clears PHYRDY_CHG before
+ * returning.
+ *
+ * LOCKING:
+ * EH context.
+ *
+ * RETURNS:
+ * 0 on success, -errno otherwise.
+ */
+int sata_link_scr_lpm(struct ata_link *link, enum ata_lpm_policy policy,
+ bool spm_wakeup)
+{
+ struct ata_eh_context *ehc = &link->eh_context;
+ bool woken_up = false;
+ u32 scontrol;
+ int rc;
+
+ rc = sata_scr_read(link, SCR_CONTROL, &scontrol);
+ if (rc)
+ return rc;
+
+ switch (policy) {
+ case ATA_LPM_MAX_POWER:
+ /* disable all LPM transitions */
+ scontrol |= (0x7 << 8);
+ /* initiate transition to active state */
+ if (spm_wakeup) {
+ scontrol |= (0x4 << 12);
+ woken_up = true;
+ }
+ break;
+ case ATA_LPM_MED_POWER:
+ /* allow LPM to PARTIAL */
+ scontrol &= ~(0x1 << 8);
+ scontrol |= (0x6 << 8);
+ break;
+ case ATA_LPM_MED_POWER_WITH_DIPM:
+ case ATA_LPM_MIN_POWER_WITH_PARTIAL:
+ case ATA_LPM_MIN_POWER:
+ if (ata_link_nr_enabled(link) > 0)
+ /* no restrictions on LPM transitions */
+ scontrol &= ~(0x7 << 8);
+ else {
+ /* empty port, power off */
+ scontrol &= ~0xf;
+ scontrol |= (0x1 << 2);
+ }
+ break;
+ default:
+ WARN_ON(1);
+ }
+
+ rc = sata_scr_write(link, SCR_CONTROL, scontrol);
+ if (rc)
+ return rc;
+
+ /* give the link time to transit out of LPM state */
+ if (woken_up)
+ msleep(10);
+
+ /* clear PHYRDY_CHG from SError */
+ ehc->i.serror &= ~SERR_PHYRDY_CHG;
+ return sata_scr_write(link, SCR_ERROR, SERR_PHYRDY_CHG);
+}
+EXPORT_SYMBOL_GPL(sata_link_scr_lpm);
+
+static int __sata_set_spd_needed(struct ata_link *link, u32 *scontrol)
+{
+ struct ata_link *host_link = &link->ap->link;
+ u32 limit, target, spd;
+
+ limit = link->sata_spd_limit;
+
+ /* Don't configure downstream link faster than upstream link.
+ * It doesn't speed up anything and some PMPs choke on such
+ * configuration.
+ */
+ if (!ata_is_host_link(link) && host_link->sata_spd)
+ limit &= (1 << host_link->sata_spd) - 1;
+
+ if (limit == UINT_MAX)
+ target = 0;
+ else
+ target = fls(limit);
+
+ spd = (*scontrol >> 4) & 0xf;
+ *scontrol = (*scontrol & ~0xf0) | ((target & 0xf) << 4);
+
+ return spd != target;
+}
+
+/**
+ * sata_set_spd_needed - is SATA spd configuration needed
+ * @link: Link in question
+ *
+ * Test whether the spd limit in SControl matches
+ * @link->sata_spd_limit. This function is used to determine
+ * whether hardreset is necessary to apply SATA spd
+ * configuration.
+ *
+ * LOCKING:
+ * Inherited from caller.
+ *
+ * RETURNS:
+ * 1 if SATA spd configuration is needed, 0 otherwise.
+ */
+static int sata_set_spd_needed(struct ata_link *link)
+{
+ u32 scontrol;
+
+ if (sata_scr_read(link, SCR_CONTROL, &scontrol))
+ return 1;
+
+ return __sata_set_spd_needed(link, &scontrol);
+}
+
+/**
+ * sata_set_spd - set SATA spd according to spd limit
+ * @link: Link to set SATA spd for
+ *
+ * Set SATA spd of @link according to sata_spd_limit.
+ *
+ * LOCKING:
+ * Inherited from caller.
+ *
+ * RETURNS:
+ * 0 if spd doesn't need to be changed, 1 if spd has been
+ * changed. Negative errno if SCR registers are inaccessible.
+ */
+int sata_set_spd(struct ata_link *link)
+{
+ u32 scontrol;
+ int rc;
+
+ if ((rc = sata_scr_read(link, SCR_CONTROL, &scontrol)))
+ return rc;
+
+ if (!__sata_set_spd_needed(link, &scontrol))
+ return 0;
+
+ if ((rc = sata_scr_write(link, SCR_CONTROL, scontrol)))
+ return rc;
+
+ return 1;
+}
+EXPORT_SYMBOL_GPL(sata_set_spd);
+
+/**
+ * sata_link_hardreset - reset link via SATA phy reset
+ * @link: link to reset
+ * @timing: timing parameters { interval, duration, timeout } in msec
+ * @deadline: deadline jiffies for the operation
+ * @online: optional out parameter indicating link onlineness
+ * @check_ready: optional callback to check link readiness
+ *
+ * SATA phy-reset @link using DET bits of SControl register.
+ * After hardreset, link readiness is waited upon using
+ * ata_wait_ready() if @check_ready is specified. LLDs are
+ * allowed to not specify @check_ready and wait itself after this
+ * function returns. Device classification is LLD's
+ * responsibility.
+ *
+ * *@online is set to one iff reset succeeded and @link is online
+ * after reset.
+ *
+ * LOCKING:
+ * Kernel thread context (may sleep)
+ *
+ * RETURNS:
+ * 0 on success, -errno otherwise.
+ */
+int sata_link_hardreset(struct ata_link *link, const unsigned long *timing,
+ unsigned long deadline,
+ bool *online, int (*check_ready)(struct ata_link *))
+{
+ u32 scontrol;
+ int rc;
+
+ DPRINTK("ENTER\n");
+
+ if (online)
+ *online = false;
+
+ if (sata_set_spd_needed(link)) {
+ /* SATA spec says nothing about how to reconfigure
+ * spd. To be on the safe side, turn off phy during
+ * reconfiguration. This works for at least ICH7 AHCI
+ * and Sil3124.
+ */
+ if ((rc = sata_scr_read(link, SCR_CONTROL, &scontrol)))
+ goto out;
+
+ scontrol = (scontrol & 0x0f0) | 0x304;
+
+ if ((rc = sata_scr_write(link, SCR_CONTROL, scontrol)))
+ goto out;
+
+ sata_set_spd(link);
+ }
+
+ /* issue phy wake/reset */
+ if ((rc = sata_scr_read(link, SCR_CONTROL, &scontrol)))
+ goto out;
+
+ scontrol = (scontrol & 0x0f0) | 0x301;
+
+ if ((rc = sata_scr_write_flush(link, SCR_CONTROL, scontrol)))
+ goto out;
+
+ /* Couldn't find anything in SATA I/II specs, but AHCI-1.1
+ * 10.4.2 says at least 1 ms.
+ */
+ ata_msleep(link->ap, 1);
+
+ /* bring link back */
+ rc = sata_link_resume(link, timing, deadline);
+ if (rc)
+ goto out;
+ /* if link is offline nothing more to do */
+ if (ata_phys_link_offline(link))
+ goto out;
+
+ /* Link is online. From this point, -ENODEV too is an error. */
+ if (online)
+ *online = true;
+
+ if (sata_pmp_supported(link->ap) && ata_is_host_link(link)) {
+ /* If PMP is supported, we have to do follow-up SRST.
+ * Some PMPs don't send D2H Reg FIS after hardreset if
+ * the first port is empty. Wait only for
+ * ATA_TMOUT_PMP_SRST_WAIT.
+ */
+ if (check_ready) {
+ unsigned long pmp_deadline;
+
+ pmp_deadline = ata_deadline(jiffies,
+ ATA_TMOUT_PMP_SRST_WAIT);
+ if (time_after(pmp_deadline, deadline))
+ pmp_deadline = deadline;
+ ata_wait_ready(link, pmp_deadline, check_ready);
+ }
+ rc = -EAGAIN;
+ goto out;
+ }
+
+ rc = 0;
+ if (check_ready)
+ rc = ata_wait_ready(link, deadline, check_ready);
+ out:
+ if (rc && rc != -EAGAIN) {
+ /* online is set iff link is online && reset succeeded */
+ if (online)
+ *online = false;
+ ata_link_err(link, "COMRESET failed (errno=%d)\n", rc);
+ }
+ DPRINTK("EXIT, rc=%d\n", rc);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(sata_link_hardreset);
+
+/**
+ * ata_qc_complete_multiple - Complete multiple qcs successfully
+ * @ap: port in question
+ * @qc_active: new qc_active mask
+ *
+ * Complete in-flight commands. This functions is meant to be
+ * called from low-level driver's interrupt routine to complete
+ * requests normally. ap->qc_active and @qc_active is compared
+ * and commands are completed accordingly.
+ *
+ * Always use this function when completing multiple NCQ commands
+ * from IRQ handlers instead of calling ata_qc_complete()
+ * multiple times to keep IRQ expect status properly in sync.
+ *
+ * LOCKING:
+ * spin_lock_irqsave(host lock)
+ *
+ * RETURNS:
+ * Number of completed commands on success, -errno otherwise.
+ */
+int ata_qc_complete_multiple(struct ata_port *ap, u64 qc_active)
+{
+ u64 done_mask, ap_qc_active = ap->qc_active;
+ int nr_done = 0;
+
+ /*
+ * If the internal tag is set on ap->qc_active, then we care about
+ * bit0 on the passed in qc_active mask. Move that bit up to match
+ * the internal tag.
+ */
+ if (ap_qc_active & (1ULL << ATA_TAG_INTERNAL)) {
+ qc_active |= (qc_active & 0x01) << ATA_TAG_INTERNAL;
+ qc_active ^= qc_active & 0x01;
+ }
+
+ done_mask = ap_qc_active ^ qc_active;
+
+ if (unlikely(done_mask & qc_active)) {
+ ata_port_err(ap, "illegal qc_active transition (%08llx->%08llx)\n",
+ ap->qc_active, qc_active);
+ return -EINVAL;
+ }
+
+ while (done_mask) {
+ struct ata_queued_cmd *qc;
+ unsigned int tag = __ffs64(done_mask);
+
+ qc = ata_qc_from_tag(ap, tag);
+ if (qc) {
+ ata_qc_complete(qc);
+ nr_done++;
+ }
+ done_mask &= ~(1ULL << tag);
+ }
+
+ return nr_done;
+}
+EXPORT_SYMBOL_GPL(ata_qc_complete_multiple);
+
+/**
+ * ata_slave_link_init - initialize slave link
+ * @ap: port to initialize slave link for
+ *
+ * Create and initialize slave link for @ap. This enables slave
+ * link handling on the port.
+ *
+ * In libata, a port contains links and a link contains devices.
+ * There is single host link but if a PMP is attached to it,
+ * there can be multiple fan-out links. On SATA, there's usually
+ * a single device connected to a link but PATA and SATA
+ * controllers emulating TF based interface can have two - master
+ * and slave.
+ *
+ * However, there are a few controllers which don't fit into this
+ * abstraction too well - SATA controllers which emulate TF
+ * interface with both master and slave devices but also have
+ * separate SCR register sets for each device. These controllers
+ * need separate links for physical link handling
+ * (e.g. onlineness, link speed) but should be treated like a
+ * traditional M/S controller for everything else (e.g. command
+ * issue, softreset).
+ *
+ * slave_link is libata's way of handling this class of
+ * controllers without impacting core layer too much. For
+ * anything other than physical link handling, the default host
+ * link is used for both master and slave. For physical link
+ * handling, separate @ap->slave_link is used. All dirty details
+ * are implemented inside libata core layer. From LLD's POV, the
+ * only difference is that prereset, hardreset and postreset are
+ * called once more for the slave link, so the reset sequence
+ * looks like the following.
+ *
+ * prereset(M) -> prereset(S) -> hardreset(M) -> hardreset(S) ->
+ * softreset(M) -> postreset(M) -> postreset(S)
+ *
+ * Note that softreset is called only for the master. Softreset
+ * resets both M/S by definition, so SRST on master should handle
+ * both (the standard method will work just fine).
+ *
+ * LOCKING:
+ * Should be called before host is registered.
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
+ */
+int ata_slave_link_init(struct ata_port *ap)
+{
+ struct ata_link *link;
+
+ WARN_ON(ap->slave_link);
+ WARN_ON(ap->flags & ATA_FLAG_PMP);
+
+ link = kzalloc(sizeof(*link), GFP_KERNEL);
+ if (!link)
+ return -ENOMEM;
+
+ ata_link_init(ap, link, 1);
+ ap->slave_link = link;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ata_slave_link_init);
+
+/**
+ * sata_lpm_ignore_phy_events - test if PHY event should be ignored
+ * @link: Link receiving the event
+ *
+ * Test whether the received PHY event has to be ignored or not.
+ *
+ * LOCKING:
+ * None:
+ *
+ * RETURNS:
+ * True if the event has to be ignored.
+ */
+bool sata_lpm_ignore_phy_events(struct ata_link *link)
+{
+ unsigned long lpm_timeout = link->last_lpm_change +
+ msecs_to_jiffies(ATA_TMOUT_SPURIOUS_PHY);
+
+ /* if LPM is enabled, PHYRDY doesn't mean anything */
+ if (link->lpm_policy > ATA_LPM_MAX_POWER)
+ return true;
+
+ /* ignore the first PHY event after the LPM policy changed
+ * as it is might be spurious
+ */
+ if ((link->flags & ATA_LFLAG_CHANGED) &&
+ time_before(jiffies, lpm_timeout))
+ return true;
+
+ return false;
+}
+EXPORT_SYMBOL_GPL(sata_lpm_ignore_phy_events);
+
+static const char *ata_lpm_policy_names[] = {
+ [ATA_LPM_UNKNOWN] = "max_performance",
+ [ATA_LPM_MAX_POWER] = "max_performance",
+ [ATA_LPM_MED_POWER] = "medium_power",
+ [ATA_LPM_MED_POWER_WITH_DIPM] = "med_power_with_dipm",
+ [ATA_LPM_MIN_POWER_WITH_PARTIAL] = "min_power_with_partial",
+ [ATA_LPM_MIN_POWER] = "min_power",
+};
+
+static ssize_t ata_scsi_lpm_store(struct device *device,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct Scsi_Host *shost = class_to_shost(device);
+ struct ata_port *ap = ata_shost_to_port(shost);
+ struct ata_link *link;
+ struct ata_device *dev;
+ enum ata_lpm_policy policy;
+ unsigned long flags;
+
+ /* UNKNOWN is internal state, iterate from MAX_POWER */
+ for (policy = ATA_LPM_MAX_POWER;
+ policy < ARRAY_SIZE(ata_lpm_policy_names); policy++) {
+ const char *name = ata_lpm_policy_names[policy];
+
+ if (strncmp(name, buf, strlen(name)) == 0)
+ break;
+ }
+ if (policy == ARRAY_SIZE(ata_lpm_policy_names))
+ return -EINVAL;
+
+ spin_lock_irqsave(ap->lock, flags);
+
+ ata_for_each_link(link, ap, EDGE) {
+ ata_for_each_dev(dev, &ap->link, ENABLED) {
+ if (dev->horkage & ATA_HORKAGE_NOLPM) {
+ count = -EOPNOTSUPP;
+ goto out_unlock;
+ }
+ }
+ }
+
+ ap->target_lpm_policy = policy;
+ ata_port_schedule_eh(ap);
+out_unlock:
+ spin_unlock_irqrestore(ap->lock, flags);
+ return count;
+}
+
+static ssize_t ata_scsi_lpm_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct Scsi_Host *shost = class_to_shost(dev);
+ struct ata_port *ap = ata_shost_to_port(shost);
+
+ if (ap->target_lpm_policy >= ARRAY_SIZE(ata_lpm_policy_names))
+ return -EINVAL;
+
+ return snprintf(buf, PAGE_SIZE, "%s\n",
+ ata_lpm_policy_names[ap->target_lpm_policy]);
+}
+DEVICE_ATTR(link_power_management_policy, S_IRUGO | S_IWUSR,
+ ata_scsi_lpm_show, ata_scsi_lpm_store);
+EXPORT_SYMBOL_GPL(dev_attr_link_power_management_policy);
+
+static ssize_t ata_ncq_prio_enable_show(struct device *device,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct scsi_device *sdev = to_scsi_device(device);
+ struct ata_port *ap;
+ struct ata_device *dev;
+ bool ncq_prio_enable;
+ int rc = 0;
+
+ ap = ata_shost_to_port(sdev->host);
+
+ spin_lock_irq(ap->lock);
+ dev = ata_scsi_find_dev(ap, sdev);
+ if (!dev) {
+ rc = -ENODEV;
+ goto unlock;
+ }
+
+ ncq_prio_enable = dev->flags & ATA_DFLAG_NCQ_PRIO_ENABLE;
+
+unlock:
+ spin_unlock_irq(ap->lock);
+
+ return rc ? rc : snprintf(buf, 20, "%u\n", ncq_prio_enable);
+}
+
+static ssize_t ata_ncq_prio_enable_store(struct device *device,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ struct scsi_device *sdev = to_scsi_device(device);
+ struct ata_port *ap;
+ struct ata_device *dev;
+ long int input;
+ int rc;
+
+ rc = kstrtol(buf, 10, &input);
+ if (rc)
+ return rc;
+ if ((input < 0) || (input > 1))
+ return -EINVAL;
+
+ ap = ata_shost_to_port(sdev->host);
+ dev = ata_scsi_find_dev(ap, sdev);
+ if (unlikely(!dev))
+ return -ENODEV;
+
+ spin_lock_irq(ap->lock);
+ if (input)
+ dev->flags |= ATA_DFLAG_NCQ_PRIO_ENABLE;
+ else
+ dev->flags &= ~ATA_DFLAG_NCQ_PRIO_ENABLE;
+
+ dev->link->eh_info.action |= ATA_EH_REVALIDATE;
+ dev->link->eh_info.flags |= ATA_EHI_QUIET;
+ ata_port_schedule_eh(ap);
+ spin_unlock_irq(ap->lock);
+
+ ata_port_wait_eh(ap);
+
+ if (input) {
+ spin_lock_irq(ap->lock);
+ if (!(dev->flags & ATA_DFLAG_NCQ_PRIO)) {
+ dev->flags &= ~ATA_DFLAG_NCQ_PRIO_ENABLE;
+ rc = -EIO;
+ }
+ spin_unlock_irq(ap->lock);
+ }
+
+ return rc ? rc : len;
+}
+
+DEVICE_ATTR(ncq_prio_enable, S_IRUGO | S_IWUSR,
+ ata_ncq_prio_enable_show, ata_ncq_prio_enable_store);
+EXPORT_SYMBOL_GPL(dev_attr_ncq_prio_enable);
+
+struct device_attribute *ata_ncq_sdev_attrs[] = {
+ &dev_attr_unload_heads,
+ &dev_attr_ncq_prio_enable,
+ NULL
+};
+EXPORT_SYMBOL_GPL(ata_ncq_sdev_attrs);
+
+static ssize_t
+ata_scsi_em_message_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct Scsi_Host *shost = class_to_shost(dev);
+ struct ata_port *ap = ata_shost_to_port(shost);
+ if (ap->ops->em_store && (ap->flags & ATA_FLAG_EM))
+ return ap->ops->em_store(ap, buf, count);
+ return -EINVAL;
+}
+
+static ssize_t
+ata_scsi_em_message_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct Scsi_Host *shost = class_to_shost(dev);
+ struct ata_port *ap = ata_shost_to_port(shost);
+
+ if (ap->ops->em_show && (ap->flags & ATA_FLAG_EM))
+ return ap->ops->em_show(ap, buf);
+ return -EINVAL;
+}
+DEVICE_ATTR(em_message, S_IRUGO | S_IWUSR,
+ ata_scsi_em_message_show, ata_scsi_em_message_store);
+EXPORT_SYMBOL_GPL(dev_attr_em_message);
+
+static ssize_t
+ata_scsi_em_message_type_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct Scsi_Host *shost = class_to_shost(dev);
+ struct ata_port *ap = ata_shost_to_port(shost);
+
+ return snprintf(buf, 23, "%d\n", ap->em_message_type);
+}
+DEVICE_ATTR(em_message_type, S_IRUGO,
+ ata_scsi_em_message_type_show, NULL);
+EXPORT_SYMBOL_GPL(dev_attr_em_message_type);
+
+static ssize_t
+ata_scsi_activity_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct scsi_device *sdev = to_scsi_device(dev);
+ struct ata_port *ap = ata_shost_to_port(sdev->host);
+ struct ata_device *atadev = ata_scsi_find_dev(ap, sdev);
+
+ if (atadev && ap->ops->sw_activity_show &&
+ (ap->flags & ATA_FLAG_SW_ACTIVITY))
+ return ap->ops->sw_activity_show(atadev, buf);
+ return -EINVAL;
+}
+
+static ssize_t
+ata_scsi_activity_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct scsi_device *sdev = to_scsi_device(dev);
+ struct ata_port *ap = ata_shost_to_port(sdev->host);
+ struct ata_device *atadev = ata_scsi_find_dev(ap, sdev);
+ enum sw_activity val;
+ int rc;
+
+ if (atadev && ap->ops->sw_activity_store &&
+ (ap->flags & ATA_FLAG_SW_ACTIVITY)) {
+ val = simple_strtoul(buf, NULL, 0);
+ switch (val) {
+ case OFF: case BLINK_ON: case BLINK_OFF:
+ rc = ap->ops->sw_activity_store(atadev, val);
+ if (!rc)
+ return count;
+ else
+ return rc;
+ }
+ }
+ return -EINVAL;
+}
+DEVICE_ATTR(sw_activity, S_IWUSR | S_IRUGO, ata_scsi_activity_show,
+ ata_scsi_activity_store);
+EXPORT_SYMBOL_GPL(dev_attr_sw_activity);
+
+/**
+ * __ata_change_queue_depth - helper for ata_scsi_change_queue_depth
+ * @ap: ATA port to which the device change the queue depth
+ * @sdev: SCSI device to configure queue depth for
+ * @queue_depth: new queue depth
+ *
+ * libsas and libata have different approaches for associating a sdev to
+ * its ata_port.
+ *
+ */
+int __ata_change_queue_depth(struct ata_port *ap, struct scsi_device *sdev,
+ int queue_depth)
+{
+ struct ata_device *dev;
+ unsigned long flags;
+
+ if (queue_depth < 1 || queue_depth == sdev->queue_depth)
+ return sdev->queue_depth;
+
+ dev = ata_scsi_find_dev(ap, sdev);
+ if (!dev || !ata_dev_enabled(dev))
+ return sdev->queue_depth;
+
+ /* NCQ enabled? */
+ spin_lock_irqsave(ap->lock, flags);
+ dev->flags &= ~ATA_DFLAG_NCQ_OFF;
+ if (queue_depth == 1 || !ata_ncq_enabled(dev)) {
+ dev->flags |= ATA_DFLAG_NCQ_OFF;
+ queue_depth = 1;
+ }
+ spin_unlock_irqrestore(ap->lock, flags);
+
+ /* limit and apply queue depth */
+ queue_depth = min(queue_depth, sdev->host->can_queue);
+ queue_depth = min(queue_depth, ata_id_queue_depth(dev->id));
+ queue_depth = min(queue_depth, ATA_MAX_QUEUE);
+
+ if (sdev->queue_depth == queue_depth)
+ return -EINVAL;
+
+ return scsi_change_queue_depth(sdev, queue_depth);
+}
+EXPORT_SYMBOL_GPL(__ata_change_queue_depth);
+
+/**
+ * ata_scsi_change_queue_depth - SCSI callback for queue depth config
+ * @sdev: SCSI device to configure queue depth for
+ * @queue_depth: new queue depth
+ *
+ * This is libata standard hostt->change_queue_depth callback.
+ * SCSI will call into this callback when user tries to set queue
+ * depth via sysfs.
+ *
+ * LOCKING:
+ * SCSI layer (we don't care)
+ *
+ * RETURNS:
+ * Newly configured queue depth.
+ */
+int ata_scsi_change_queue_depth(struct scsi_device *sdev, int queue_depth)
+{
+ struct ata_port *ap = ata_shost_to_port(sdev->host);
+
+ return __ata_change_queue_depth(ap, sdev, queue_depth);
+}
+EXPORT_SYMBOL_GPL(ata_scsi_change_queue_depth);
+
+/**
+ * port_alloc - Allocate port for a SAS attached SATA device
+ * @host: ATA host container for all SAS ports
+ * @port_info: Information from low-level host driver
+ * @shost: SCSI host that the scsi device is attached to
+ *
+ * LOCKING:
+ * PCI/etc. bus probe sem.
+ *
+ * RETURNS:
+ * ata_port pointer on success / NULL on failure.
+ */
+
+struct ata_port *ata_sas_port_alloc(struct ata_host *host,
+ struct ata_port_info *port_info,
+ struct Scsi_Host *shost)
+{
+ struct ata_port *ap;
+
+ ap = ata_port_alloc(host);
+ if (!ap)
+ return NULL;
+
+ ap->port_no = 0;
+ ap->lock = &host->lock;
+ ap->pio_mask = port_info->pio_mask;
+ ap->mwdma_mask = port_info->mwdma_mask;
+ ap->udma_mask = port_info->udma_mask;
+ ap->flags |= port_info->flags;
+ ap->ops = port_info->port_ops;
+ ap->cbl = ATA_CBL_SATA;
+
+ return ap;
+}
+EXPORT_SYMBOL_GPL(ata_sas_port_alloc);
+
+/**
+ * ata_sas_port_start - Set port up for dma.
+ * @ap: Port to initialize
+ *
+ * Called just after data structures for each port are
+ * initialized.
+ *
+ * May be used as the port_start() entry in ata_port_operations.
+ *
+ * LOCKING:
+ * Inherited from caller.
+ */
+int ata_sas_port_start(struct ata_port *ap)
+{
+ /*
+ * the port is marked as frozen at allocation time, but if we don't
+ * have new eh, we won't thaw it
+ */
+ if (!ap->ops->error_handler)
+ ap->pflags &= ~ATA_PFLAG_FROZEN;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ata_sas_port_start);
+
+/**
+ * ata_port_stop - Undo ata_sas_port_start()
+ * @ap: Port to shut down
+ *
+ * May be used as the port_stop() entry in ata_port_operations.
+ *
+ * LOCKING:
+ * Inherited from caller.
+ */
+
+void ata_sas_port_stop(struct ata_port *ap)
+{
+}
+EXPORT_SYMBOL_GPL(ata_sas_port_stop);
+
+/**
+ * ata_sas_async_probe - simply schedule probing and return
+ * @ap: Port to probe
+ *
+ * For batch scheduling of probe for sas attached ata devices, assumes
+ * the port has already been through ata_sas_port_init()
+ */
+void ata_sas_async_probe(struct ata_port *ap)
+{
+ __ata_port_probe(ap);
+}
+EXPORT_SYMBOL_GPL(ata_sas_async_probe);
+
+int ata_sas_sync_probe(struct ata_port *ap)
+{
+ return ata_port_probe(ap);
+}
+EXPORT_SYMBOL_GPL(ata_sas_sync_probe);
+
+
+/**
+ * ata_sas_port_init - Initialize a SATA device
+ * @ap: SATA port to initialize
+ *
+ * LOCKING:
+ * PCI/etc. bus probe sem.
+ *
+ * RETURNS:
+ * Zero on success, non-zero on error.
+ */
+
+int ata_sas_port_init(struct ata_port *ap)
+{
+ int rc = ap->ops->port_start(ap);
+
+ if (rc)
+ return rc;
+ ap->print_id = atomic_inc_return(&ata_print_id);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ata_sas_port_init);
+
+int ata_sas_tport_add(struct device *parent, struct ata_port *ap)
+{
+ return ata_tport_add(parent, ap);
+}
+EXPORT_SYMBOL_GPL(ata_sas_tport_add);
+
+void ata_sas_tport_delete(struct ata_port *ap)
+{
+ ata_tport_delete(ap);
+}
+EXPORT_SYMBOL_GPL(ata_sas_tport_delete);
+
+/**
+ * ata_sas_port_destroy - Destroy a SATA port allocated by ata_sas_port_alloc
+ * @ap: SATA port to destroy
+ *
+ */
+
+void ata_sas_port_destroy(struct ata_port *ap)
+{
+ if (ap->ops->port_stop)
+ ap->ops->port_stop(ap);
+ kfree(ap);
+}
+EXPORT_SYMBOL_GPL(ata_sas_port_destroy);
+
+/**
+ * ata_sas_slave_configure - Default slave_config routine for libata devices
+ * @sdev: SCSI device to configure
+ * @ap: ATA port to which SCSI device is attached
+ *
+ * RETURNS:
+ * Zero.
+ */
+
+int ata_sas_slave_configure(struct scsi_device *sdev, struct ata_port *ap)
+{
+ ata_scsi_sdev_config(sdev);
+ ata_scsi_dev_config(sdev, ap->link.device);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ata_sas_slave_configure);
+
+/**
+ * ata_sas_queuecmd - Issue SCSI cdb to libata-managed device
+ * @cmd: SCSI command to be sent
+ * @ap: ATA port to which the command is being sent
+ *
+ * RETURNS:
+ * Return value from __ata_scsi_queuecmd() if @cmd can be queued,
+ * 0 otherwise.
+ */
+
+int ata_sas_queuecmd(struct scsi_cmnd *cmd, struct ata_port *ap)
+{
+ int rc = 0;
+
+ ata_scsi_dump_cdb(ap, cmd);
+
+ if (likely(ata_dev_enabled(ap->link.device)))
+ rc = __ata_scsi_queuecmd(cmd, ap->link.device);
+ else {
+ cmd->result = (DID_BAD_TARGET << 16);
+ cmd->scsi_done(cmd);
+ }
+ return rc;
+}
+EXPORT_SYMBOL_GPL(ata_sas_queuecmd);
+
+int ata_sas_allocate_tag(struct ata_port *ap)
+{
+ unsigned int max_queue = ap->host->n_tags;
+ unsigned int i, tag;
+
+ for (i = 0, tag = ap->sas_last_tag + 1; i < max_queue; i++, tag++) {
+ tag = tag < max_queue ? tag : 0;
+
+ /* the last tag is reserved for internal command. */
+ if (ata_tag_internal(tag))
+ continue;
+
+ if (!test_and_set_bit(tag, &ap->sas_tag_allocated)) {
+ ap->sas_last_tag = tag;
+ return tag;
+ }
+ }
+ return -1;
+}
+
+void ata_sas_free_tag(unsigned int tag, struct ata_port *ap)
+{
+ clear_bit(tag, &ap->sas_tag_allocated);
+}
+
+/**
+ * sata_async_notification - SATA async notification handler
+ * @ap: ATA port where async notification is received
+ *
+ * Handler to be called when async notification via SDB FIS is
+ * received. This function schedules EH if necessary.
+ *
+ * LOCKING:
+ * spin_lock_irqsave(host lock)
+ *
+ * RETURNS:
+ * 1 if EH is scheduled, 0 otherwise.
+ */
+int sata_async_notification(struct ata_port *ap)
+{
+ u32 sntf;
+ int rc;
+
+ if (!(ap->flags & ATA_FLAG_AN))
+ return 0;
+
+ rc = sata_scr_read(&ap->link, SCR_NOTIFICATION, &sntf);
+ if (rc == 0)
+ sata_scr_write(&ap->link, SCR_NOTIFICATION, sntf);
+
+ if (!sata_pmp_attached(ap) || rc) {
+ /* PMP is not attached or SNTF is not available */
+ if (!sata_pmp_attached(ap)) {
+ /* PMP is not attached. Check whether ATAPI
+ * AN is configured. If so, notify media
+ * change.
+ */
+ struct ata_device *dev = ap->link.device;
+
+ if ((dev->class == ATA_DEV_ATAPI) &&
+ (dev->flags & ATA_DFLAG_AN))
+ ata_scsi_media_change_notify(dev);
+ return 0;
+ } else {
+ /* PMP is attached but SNTF is not available.
+ * ATAPI async media change notification is
+ * not used. The PMP must be reporting PHY
+ * status change, schedule EH.
+ */
+ ata_port_schedule_eh(ap);
+ return 1;
+ }
+ } else {
+ /* PMP is attached and SNTF is available */
+ struct ata_link *link;
+
+ /* check and notify ATAPI AN */
+ ata_for_each_link(link, ap, EDGE) {
+ if (!(sntf & (1 << link->pmp)))
+ continue;
+
+ if ((link->device->class == ATA_DEV_ATAPI) &&
+ (link->device->flags & ATA_DFLAG_AN))
+ ata_scsi_media_change_notify(link->device);
+ }
+
+ /* If PMP is reporting that PHY status of some
+ * downstream ports has changed, schedule EH.
+ */
+ if (sntf & (1 << SATA_PMP_CTRL_PORT)) {
+ ata_port_schedule_eh(ap);
+ return 1;
+ }
+
+ return 0;
+ }
+}
+EXPORT_SYMBOL_GPL(sata_async_notification);
+
+/**
+ * ata_eh_read_log_10h - Read log page 10h for NCQ error details
+ * @dev: Device to read log page 10h from
+ * @tag: Resulting tag of the failed command
+ * @tf: Resulting taskfile registers of the failed command
+ *
+ * Read log page 10h to obtain NCQ error details and clear error
+ * condition.
+ *
+ * LOCKING:
+ * Kernel thread context (may sleep).
+ *
+ * RETURNS:
+ * 0 on success, -errno otherwise.
+ */
+static int ata_eh_read_log_10h(struct ata_device *dev,
+ int *tag, struct ata_taskfile *tf)
+{
+ u8 *buf = dev->link->ap->sector_buf;
+ unsigned int err_mask;
+ u8 csum;
+ int i;
+
+ err_mask = ata_read_log_page(dev, ATA_LOG_SATA_NCQ, 0, buf, 1);
+ if (err_mask)
+ return -EIO;
+
+ csum = 0;
+ for (i = 0; i < ATA_SECT_SIZE; i++)
+ csum += buf[i];
+ if (csum)
+ ata_dev_warn(dev, "invalid checksum 0x%x on log page 10h\n",
+ csum);
+
+ if (buf[0] & 0x80)
+ return -ENOENT;
+
+ *tag = buf[0] & 0x1f;
+
+ tf->command = buf[2];
+ tf->feature = buf[3];
+ tf->lbal = buf[4];
+ tf->lbam = buf[5];
+ tf->lbah = buf[6];
+ tf->device = buf[7];
+ tf->hob_lbal = buf[8];
+ tf->hob_lbam = buf[9];
+ tf->hob_lbah = buf[10];
+ tf->nsect = buf[12];
+ tf->hob_nsect = buf[13];
+ if (dev->class == ATA_DEV_ZAC && ata_id_has_ncq_autosense(dev->id))
+ tf->auxiliary = buf[14] << 16 | buf[15] << 8 | buf[16];
+
+ return 0;
+}
+
+/**
+ * ata_eh_analyze_ncq_error - analyze NCQ error
+ * @link: ATA link to analyze NCQ error for
+ *
+ * Read log page 10h, determine the offending qc and acquire
+ * error status TF. For NCQ device errors, all LLDDs have to do
+ * is setting AC_ERR_DEV in ehi->err_mask. This function takes
+ * care of the rest.
+ *
+ * LOCKING:
+ * Kernel thread context (may sleep).
+ */
+void ata_eh_analyze_ncq_error(struct ata_link *link)
+{
+ struct ata_port *ap = link->ap;
+ struct ata_eh_context *ehc = &link->eh_context;
+ struct ata_device *dev = link->device;
+ struct ata_queued_cmd *qc;
+ struct ata_taskfile tf;
+ int tag, rc;
+
+ /* if frozen, we can't do much */
+ if (ap->pflags & ATA_PFLAG_FROZEN)
+ return;
+
+ /* is it NCQ device error? */
+ if (!link->sactive || !(ehc->i.err_mask & AC_ERR_DEV))
+ return;
+
+ /* has LLDD analyzed already? */
+ ata_qc_for_each_raw(ap, qc, tag) {
+ if (!(qc->flags & ATA_QCFLAG_FAILED))
+ continue;
+
+ if (qc->err_mask)
+ return;
+ }
+
+ /* okay, this error is ours */
+ memset(&tf, 0, sizeof(tf));
+ rc = ata_eh_read_log_10h(dev, &tag, &tf);
+ if (rc) {
+ ata_link_err(link, "failed to read log page 10h (errno=%d)\n",
+ rc);
+ return;
+ }
+
+ if (!(link->sactive & (1 << tag))) {
+ ata_link_err(link, "log page 10h reported inactive tag %d\n",
+ tag);
+ return;
+ }
+
+ /* we've got the perpetrator, condemn it */
+ qc = __ata_qc_from_tag(ap, tag);
+ memcpy(&qc->result_tf, &tf, sizeof(tf));
+ qc->result_tf.flags = ATA_TFLAG_ISADDR | ATA_TFLAG_LBA | ATA_TFLAG_LBA48;
+ qc->err_mask |= AC_ERR_DEV | AC_ERR_NCQ;
+ if (dev->class == ATA_DEV_ZAC &&
+ ((qc->result_tf.command & ATA_SENSE) || qc->result_tf.auxiliary)) {
+ char sense_key, asc, ascq;
+
+ sense_key = (qc->result_tf.auxiliary >> 16) & 0xff;
+ asc = (qc->result_tf.auxiliary >> 8) & 0xff;
+ ascq = qc->result_tf.auxiliary & 0xff;
+ ata_scsi_set_sense(dev, qc->scsicmd, sense_key, asc, ascq);
+ ata_scsi_set_sense_information(dev, qc->scsicmd,
+ &qc->result_tf);
+ qc->flags |= ATA_QCFLAG_SENSE_VALID;
+ }
+
+ ehc->i.err_mask &= ~AC_ERR_DEV;
+}
+EXPORT_SYMBOL_GPL(ata_eh_analyze_ncq_error);
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index eb2eb599e602..36e588d88b95 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -2,10 +2,6 @@
/*
* libata-scsi.c - helper library for ATA
*
- * Maintained by: Tejun Heo <tj@kernel.org>
- * Please ALWAYS copy linux-ide@vger.kernel.org
- * on emails.
- *
* Copyright 2003-2004 Red Hat, Inc. All rights reserved.
* Copyright 2003-2004 Jeff Garzik
*
@@ -36,11 +32,12 @@
#include <linux/suspend.h>
#include <asm/unaligned.h>
#include <linux/ioprio.h>
+#include <linux/of.h>
#include "libata.h"
#include "libata-transport.h"
-#define ATA_SCSI_RBUF_SIZE 4096
+#define ATA_SCSI_RBUF_SIZE 576
static DEFINE_SPINLOCK(ata_scsi_rbuf_lock);
static u8 ata_scsi_rbuf[ATA_SCSI_RBUF_SIZE];
@@ -49,8 +46,6 @@ typedef unsigned int (*ata_xlat_func_t)(struct ata_queued_cmd *qc);
static struct ata_device *__ata_scsi_find_dev(struct ata_port *ap,
const struct scsi_device *scsidev);
-static struct ata_device *ata_scsi_find_dev(struct ata_port *ap,
- const struct scsi_device *scsidev);
#define RW_RECOVERY_MPAGE 0x1
#define RW_RECOVERY_MPAGE_LEN 12
@@ -90,71 +85,6 @@ static const u8 def_control_mpage[CONTROL_MPAGE_LEN] = {
0, 30 /* extended self test time, see 05-359r1 */
};
-static const char *ata_lpm_policy_names[] = {
- [ATA_LPM_UNKNOWN] = "max_performance",
- [ATA_LPM_MAX_POWER] = "max_performance",
- [ATA_LPM_MED_POWER] = "medium_power",
- [ATA_LPM_MED_POWER_WITH_DIPM] = "med_power_with_dipm",
- [ATA_LPM_MIN_POWER_WITH_PARTIAL] = "min_power_with_partial",
- [ATA_LPM_MIN_POWER] = "min_power",
-};
-
-static ssize_t ata_scsi_lpm_store(struct device *device,
- struct device_attribute *attr,
- const char *buf, size_t count)
-{
- struct Scsi_Host *shost = class_to_shost(device);
- struct ata_port *ap = ata_shost_to_port(shost);
- struct ata_link *link;
- struct ata_device *dev;
- enum ata_lpm_policy policy;
- unsigned long flags;
-
- /* UNKNOWN is internal state, iterate from MAX_POWER */
- for (policy = ATA_LPM_MAX_POWER;
- policy < ARRAY_SIZE(ata_lpm_policy_names); policy++) {
- const char *name = ata_lpm_policy_names[policy];
-
- if (strncmp(name, buf, strlen(name)) == 0)
- break;
- }
- if (policy == ARRAY_SIZE(ata_lpm_policy_names))
- return -EINVAL;
-
- spin_lock_irqsave(ap->lock, flags);
-
- ata_for_each_link(link, ap, EDGE) {
- ata_for_each_dev(dev, &ap->link, ENABLED) {
- if (dev->horkage & ATA_HORKAGE_NOLPM) {
- count = -EOPNOTSUPP;
- goto out_unlock;
- }
- }
- }
-
- ap->target_lpm_policy = policy;
- ata_port_schedule_eh(ap);
-out_unlock:
- spin_unlock_irqrestore(ap->lock, flags);
- return count;
-}
-
-static ssize_t ata_scsi_lpm_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct Scsi_Host *shost = class_to_shost(dev);
- struct ata_port *ap = ata_shost_to_port(shost);
-
- if (ap->target_lpm_policy >= ARRAY_SIZE(ata_lpm_policy_names))
- return -EINVAL;
-
- return snprintf(buf, PAGE_SIZE, "%s\n",
- ata_lpm_policy_names[ap->target_lpm_policy]);
-}
-DEVICE_ATTR(link_power_management_policy, S_IRUGO | S_IWUSR,
- ata_scsi_lpm_show, ata_scsi_lpm_store);
-EXPORT_SYMBOL_GPL(dev_attr_link_power_management_policy);
-
static ssize_t ata_scsi_park_show(struct device *device,
struct device_attribute *attr, char *buf)
{
@@ -258,83 +188,6 @@ DEVICE_ATTR(unload_heads, S_IRUGO | S_IWUSR,
ata_scsi_park_show, ata_scsi_park_store);
EXPORT_SYMBOL_GPL(dev_attr_unload_heads);
-static ssize_t ata_ncq_prio_enable_show(struct device *device,
- struct device_attribute *attr,
- char *buf)
-{
- struct scsi_device *sdev = to_scsi_device(device);
- struct ata_port *ap;
- struct ata_device *dev;
- bool ncq_prio_enable;
- int rc = 0;
-
- ap = ata_shost_to_port(sdev->host);
-
- spin_lock_irq(ap->lock);
- dev = ata_scsi_find_dev(ap, sdev);
- if (!dev) {
- rc = -ENODEV;
- goto unlock;
- }
-
- ncq_prio_enable = dev->flags & ATA_DFLAG_NCQ_PRIO_ENABLE;
-
-unlock:
- spin_unlock_irq(ap->lock);
-
- return rc ? rc : snprintf(buf, 20, "%u\n", ncq_prio_enable);
-}
-
-static ssize_t ata_ncq_prio_enable_store(struct device *device,
- struct device_attribute *attr,
- const char *buf, size_t len)
-{
- struct scsi_device *sdev = to_scsi_device(device);
- struct ata_port *ap;
- struct ata_device *dev;
- long int input;
- int rc;
-
- rc = kstrtol(buf, 10, &input);
- if (rc)
- return rc;
- if ((input < 0) || (input > 1))
- return -EINVAL;
-
- ap = ata_shost_to_port(sdev->host);
- dev = ata_scsi_find_dev(ap, sdev);
- if (unlikely(!dev))
- return -ENODEV;
-
- spin_lock_irq(ap->lock);
- if (input)
- dev->flags |= ATA_DFLAG_NCQ_PRIO_ENABLE;
- else
- dev->flags &= ~ATA_DFLAG_NCQ_PRIO_ENABLE;
-
- dev->link->eh_info.action |= ATA_EH_REVALIDATE;
- dev->link->eh_info.flags |= ATA_EHI_QUIET;
- ata_port_schedule_eh(ap);
- spin_unlock_irq(ap->lock);
-
- ata_port_wait_eh(ap);
-
- if (input) {
- spin_lock_irq(ap->lock);
- if (!(dev->flags & ATA_DFLAG_NCQ_PRIO)) {
- dev->flags &= ~ATA_DFLAG_NCQ_PRIO_ENABLE;
- rc = -EIO;
- }
- spin_unlock_irq(ap->lock);
- }
-
- return rc ? rc : len;
-}
-
-DEVICE_ATTR(ncq_prio_enable, S_IRUGO | S_IWUSR,
- ata_ncq_prio_enable_show, ata_ncq_prio_enable_store);
-EXPORT_SYMBOL_GPL(dev_attr_ncq_prio_enable);
-
void ata_scsi_set_sense(struct ata_device *dev, struct scsi_cmnd *cmd,
u8 sk, u8 asc, u8 ascq)
{
@@ -383,90 +236,8 @@ static void ata_scsi_set_invalid_parameter(struct ata_device *dev,
field, 0xff, 0);
}
-static ssize_t
-ata_scsi_em_message_store(struct device *dev, struct device_attribute *attr,
- const char *buf, size_t count)
-{
- struct Scsi_Host *shost = class_to_shost(dev);
- struct ata_port *ap = ata_shost_to_port(shost);
- if (ap->ops->em_store && (ap->flags & ATA_FLAG_EM))
- return ap->ops->em_store(ap, buf, count);
- return -EINVAL;
-}
-
-static ssize_t
-ata_scsi_em_message_show(struct device *dev, struct device_attribute *attr,
- char *buf)
-{
- struct Scsi_Host *shost = class_to_shost(dev);
- struct ata_port *ap = ata_shost_to_port(shost);
-
- if (ap->ops->em_show && (ap->flags & ATA_FLAG_EM))
- return ap->ops->em_show(ap, buf);
- return -EINVAL;
-}
-DEVICE_ATTR(em_message, S_IRUGO | S_IWUSR,
- ata_scsi_em_message_show, ata_scsi_em_message_store);
-EXPORT_SYMBOL_GPL(dev_attr_em_message);
-
-static ssize_t
-ata_scsi_em_message_type_show(struct device *dev, struct device_attribute *attr,
- char *buf)
-{
- struct Scsi_Host *shost = class_to_shost(dev);
- struct ata_port *ap = ata_shost_to_port(shost);
-
- return snprintf(buf, 23, "%d\n", ap->em_message_type);
-}
-DEVICE_ATTR(em_message_type, S_IRUGO,
- ata_scsi_em_message_type_show, NULL);
-EXPORT_SYMBOL_GPL(dev_attr_em_message_type);
-
-static ssize_t
-ata_scsi_activity_show(struct device *dev, struct device_attribute *attr,
- char *buf)
-{
- struct scsi_device *sdev = to_scsi_device(dev);
- struct ata_port *ap = ata_shost_to_port(sdev->host);
- struct ata_device *atadev = ata_scsi_find_dev(ap, sdev);
-
- if (atadev && ap->ops->sw_activity_show &&
- (ap->flags & ATA_FLAG_SW_ACTIVITY))
- return ap->ops->sw_activity_show(atadev, buf);
- return -EINVAL;
-}
-
-static ssize_t
-ata_scsi_activity_store(struct device *dev, struct device_attribute *attr,
- const char *buf, size_t count)
-{
- struct scsi_device *sdev = to_scsi_device(dev);
- struct ata_port *ap = ata_shost_to_port(sdev->host);
- struct ata_device *atadev = ata_scsi_find_dev(ap, sdev);
- enum sw_activity val;
- int rc;
-
- if (atadev && ap->ops->sw_activity_store &&
- (ap->flags & ATA_FLAG_SW_ACTIVITY)) {
- val = simple_strtoul(buf, NULL, 0);
- switch (val) {
- case OFF: case BLINK_ON: case BLINK_OFF:
- rc = ap->ops->sw_activity_store(atadev, val);
- if (!rc)
- return count;
- else
- return rc;
- }
- }
- return -EINVAL;
-}
-DEVICE_ATTR(sw_activity, S_IWUSR | S_IRUGO, ata_scsi_activity_show,
- ata_scsi_activity_store);
-EXPORT_SYMBOL_GPL(dev_attr_sw_activity);
-
struct device_attribute *ata_common_sdev_attrs[] = {
&dev_attr_unload_heads,
- &dev_attr_ncq_prio_enable,
NULL
};
EXPORT_SYMBOL_GPL(ata_common_sdev_attrs);
@@ -499,6 +270,7 @@ int ata_std_bios_param(struct scsi_device *sdev, struct block_device *bdev,
return 0;
}
+EXPORT_SYMBOL_GPL(ata_std_bios_param);
/**
* ata_scsi_unlock_native_capacity - unlock native capacity
@@ -528,6 +300,7 @@ void ata_scsi_unlock_native_capacity(struct scsi_device *sdev)
spin_unlock_irqrestore(ap->lock, flags);
ata_port_wait_eh(ap);
}
+EXPORT_SYMBOL_GPL(ata_scsi_unlock_native_capacity);
/**
* ata_get_identity - Handler for HDIO_GET_IDENTITY ioctl
@@ -1215,7 +988,7 @@ static void ata_gen_ata_sense(struct ata_queued_cmd *qc)
scsi_set_sense_information(sb, SCSI_SENSE_BUFFERSIZE, block);
}
-static void ata_scsi_sdev_config(struct scsi_device *sdev)
+void ata_scsi_sdev_config(struct scsi_device *sdev)
{
sdev->use_10_for_rw = 1;
sdev->use_10_for_ms = 1;
@@ -1255,8 +1028,7 @@ static int atapi_drain_needed(struct request *rq)
return atapi_cmd_type(scsi_req(rq)->cmd[0]) == ATAPI_MISC;
}
-static int ata_scsi_dev_config(struct scsi_device *sdev,
- struct ata_device *dev)
+int ata_scsi_dev_config(struct scsi_device *sdev, struct ata_device *dev)
{
struct request_queue *q = sdev->request_queue;
@@ -1344,6 +1116,7 @@ int ata_scsi_slave_config(struct scsi_device *sdev)
return rc;
}
+EXPORT_SYMBOL_GPL(ata_scsi_slave_config);
/**
* ata_scsi_slave_destroy - SCSI device is about to be destroyed
@@ -1383,71 +1156,7 @@ void ata_scsi_slave_destroy(struct scsi_device *sdev)
q->dma_drain_buffer = NULL;
q->dma_drain_size = 0;
}
-
-/**
- * __ata_change_queue_depth - helper for ata_scsi_change_queue_depth
- * @ap: ATA port to which the device change the queue depth
- * @sdev: SCSI device to configure queue depth for
- * @queue_depth: new queue depth
- *
- * libsas and libata have different approaches for associating a sdev to
- * its ata_port.
- *
- */
-int __ata_change_queue_depth(struct ata_port *ap, struct scsi_device *sdev,
- int queue_depth)
-{
- struct ata_device *dev;
- unsigned long flags;
-
- if (queue_depth < 1 || queue_depth == sdev->queue_depth)
- return sdev->queue_depth;
-
- dev = ata_scsi_find_dev(ap, sdev);
- if (!dev || !ata_dev_enabled(dev))
- return sdev->queue_depth;
-
- /* NCQ enabled? */
- spin_lock_irqsave(ap->lock, flags);
- dev->flags &= ~ATA_DFLAG_NCQ_OFF;
- if (queue_depth == 1 || !ata_ncq_enabled(dev)) {
- dev->flags |= ATA_DFLAG_NCQ_OFF;
- queue_depth = 1;
- }
- spin_unlock_irqrestore(ap->lock, flags);
-
- /* limit and apply queue depth */
- queue_depth = min(queue_depth, sdev->host->can_queue);
- queue_depth = min(queue_depth, ata_id_queue_depth(dev->id));
- queue_depth = min(queue_depth, ATA_MAX_QUEUE);
-
- if (sdev->queue_depth == queue_depth)
- return -EINVAL;
-
- return scsi_change_queue_depth(sdev, queue_depth);
-}
-
-/**
- * ata_scsi_change_queue_depth - SCSI callback for queue depth config
- * @sdev: SCSI device to configure queue depth for
- * @queue_depth: new queue depth
- *
- * This is libata standard hostt->change_queue_depth callback.
- * SCSI will call into this callback when user tries to set queue
- * depth via sysfs.
- *
- * LOCKING:
- * SCSI layer (we don't care)
- *
- * RETURNS:
- * Newly configured queue depth.
- */
-int ata_scsi_change_queue_depth(struct scsi_device *sdev, int queue_depth)
-{
- struct ata_port *ap = ata_shost_to_port(sdev->host);
-
- return __ata_change_queue_depth(ap, sdev, queue_depth);
-}
+EXPORT_SYMBOL_GPL(ata_scsi_slave_destroy);
/**
* ata_scsi_start_stop_xlat - Translate SCSI START STOP UNIT command
@@ -2354,10 +2063,6 @@ static unsigned int ata_scsiop_inq_83(struct ata_scsi_args *args, u8 *rbuf)
*/
static unsigned int ata_scsiop_inq_89(struct ata_scsi_args *args, u8 *rbuf)
{
- struct ata_taskfile tf;
-
- memset(&tf, 0, sizeof(tf));
-
rbuf[1] = 0x89; /* our page code */
rbuf[2] = (0x238 >> 8); /* page size fixed at 238h */
rbuf[3] = (0x238 & 0xff);
@@ -2366,14 +2071,14 @@ static unsigned int ata_scsiop_inq_89(struct ata_scsi_args *args, u8 *rbuf)
memcpy(&rbuf[16], "libata ", 16);
memcpy(&rbuf[32], DRV_VERSION, 4);
- /* we don't store the ATA device signature, so we fake it */
-
- tf.command = ATA_DRDY; /* really, this is Status reg */
- tf.lbal = 0x1;
- tf.nsect = 0x1;
-
- ata_tf_to_fis(&tf, 0, 1, &rbuf[36]); /* TODO: PMP? */
rbuf[36] = 0x34; /* force D2H Reg FIS (34h) */
+ rbuf[37] = (1 << 7); /* bit 7 indicates Command FIS */
+ /* TODO: PMP? */
+
+ /* we don't store the ATA device signature, so we fake it */
+ rbuf[38] = ATA_DRDY; /* really, this is Status reg */
+ rbuf[40] = 0x1;
+ rbuf[48] = 0x1;
rbuf[56] = ATA_CMD_ID_ATA;
@@ -3089,7 +2794,7 @@ static struct ata_device *__ata_scsi_find_dev(struct ata_port *ap,
* RETURNS:
* Associated ATA device, or %NULL if not found.
*/
-static struct ata_device *
+struct ata_device *
ata_scsi_find_dev(struct ata_port *ap, const struct scsi_device *scsidev)
{
struct ata_device *dev = __ata_scsi_find_dev(ap, scsidev);
@@ -4299,8 +4004,7 @@ static inline ata_xlat_func_t ata_get_xlat_func(struct ata_device *dev, u8 cmd)
* Prints the contents of a SCSI command via printk().
*/
-static inline void ata_scsi_dump_cdb(struct ata_port *ap,
- struct scsi_cmnd *cmd)
+void ata_scsi_dump_cdb(struct ata_port *ap, struct scsi_cmnd *cmd)
{
#ifdef ATA_VERBOSE_DEBUG
struct scsi_device *scsidev = cmd->device;
@@ -4312,8 +4016,7 @@ static inline void ata_scsi_dump_cdb(struct ata_port *ap,
#endif
}
-static inline int __ata_scsi_queuecmd(struct scsi_cmnd *scmd,
- struct ata_device *dev)
+int __ata_scsi_queuecmd(struct scsi_cmnd *scmd, struct ata_device *dev)
{
u8 scsi_op = scmd->cmnd[0];
ata_xlat_func_t xlat_func;
@@ -4407,6 +4110,7 @@ int ata_scsi_queuecmd(struct Scsi_Host *shost, struct scsi_cmnd *cmd)
return rc;
}
+EXPORT_SYMBOL_GPL(ata_scsi_queuecmd);
/**
* ata_scsi_simulate - simulate SCSI command on ATA device
@@ -4562,26 +4266,51 @@ int ata_scsi_add_hosts(struct ata_host *host, struct scsi_host_template *sht)
*/
shost->max_host_blocked = 1;
- rc = scsi_add_host_with_dma(ap->scsi_host,
- &ap->tdev, ap->host->dev);
+ rc = scsi_add_host_with_dma(shost, &ap->tdev, ap->host->dev);
if (rc)
- goto err_add;
+ goto err_alloc;
}
return 0;
- err_add:
- scsi_host_put(host->ports[i]->scsi_host);
err_alloc:
while (--i >= 0) {
struct Scsi_Host *shost = host->ports[i]->scsi_host;
+ /* scsi_host_put() is in ata_devres_release() */
scsi_remove_host(shost);
- scsi_host_put(shost);
}
return rc;
}
+#ifdef CONFIG_OF
+static void ata_scsi_assign_ofnode(struct ata_device *dev, struct ata_port *ap)
+{
+ struct scsi_device *sdev = dev->sdev;
+ struct device *d = ap->host->dev;
+ struct device_node *np = d->of_node;
+ struct device_node *child;
+
+ for_each_available_child_of_node(np, child) {
+ int ret;
+ u32 val;
+
+ ret = of_property_read_u32(child, "reg", &val);
+ if (ret)
+ continue;
+ if (val == dev->devno) {
+ dev_dbg(d, "found matching device node\n");
+ sdev->sdev_gendev.of_node = child;
+ return;
+ }
+ }
+}
+#else
+static void ata_scsi_assign_ofnode(struct ata_device *dev, struct ata_port *ap)
+{
+}
+#endif
+
void ata_scsi_scan_host(struct ata_port *ap, int sync)
{
int tries = 5;
@@ -4607,6 +4336,7 @@ void ata_scsi_scan_host(struct ata_port *ap, int sync)
NULL);
if (!IS_ERR(sdev)) {
dev->sdev = sdev;
+ ata_scsi_assign_ofnode(dev, ap);
scsi_device_put(sdev);
} else {
dev->sdev = NULL;
@@ -4929,214 +4659,3 @@ void ata_scsi_dev_rescan(struct work_struct *work)
spin_unlock_irqrestore(ap->lock, flags);
mutex_unlock(&ap->scsi_scan_mutex);
}
-
-/**
- * ata_sas_port_alloc - Allocate port for a SAS attached SATA device
- * @host: ATA host container for all SAS ports
- * @port_info: Information from low-level host driver
- * @shost: SCSI host that the scsi device is attached to
- *
- * LOCKING:
- * PCI/etc. bus probe sem.
- *
- * RETURNS:
- * ata_port pointer on success / NULL on failure.
- */
-
-struct ata_port *ata_sas_port_alloc(struct ata_host *host,
- struct ata_port_info *port_info,
- struct Scsi_Host *shost)
-{
- struct ata_port *ap;
-
- ap = ata_port_alloc(host);
- if (!ap)
- return NULL;
-
- ap->port_no = 0;
- ap->lock = &host->lock;
- ap->pio_mask = port_info->pio_mask;
- ap->mwdma_mask = port_info->mwdma_mask;
- ap->udma_mask = port_info->udma_mask;
- ap->flags |= port_info->flags;
- ap->ops = port_info->port_ops;
- ap->cbl = ATA_CBL_SATA;
-
- return ap;
-}
-EXPORT_SYMBOL_GPL(ata_sas_port_alloc);
-
-/**
- * ata_sas_port_start - Set port up for dma.
- * @ap: Port to initialize
- *
- * Called just after data structures for each port are
- * initialized.
- *
- * May be used as the port_start() entry in ata_port_operations.
- *
- * LOCKING:
- * Inherited from caller.
- */
-int ata_sas_port_start(struct ata_port *ap)
-{
- /*
- * the port is marked as frozen at allocation time, but if we don't
- * have new eh, we won't thaw it
- */
- if (!ap->ops->error_handler)
- ap->pflags &= ~ATA_PFLAG_FROZEN;
- return 0;
-}
-EXPORT_SYMBOL_GPL(ata_sas_port_start);
-
-/**
- * ata_port_stop - Undo ata_sas_port_start()
- * @ap: Port to shut down
- *
- * May be used as the port_stop() entry in ata_port_operations.
- *
- * LOCKING:
- * Inherited from caller.
- */
-
-void ata_sas_port_stop(struct ata_port *ap)
-{
-}
-EXPORT_SYMBOL_GPL(ata_sas_port_stop);
-
-/**
- * ata_sas_async_probe - simply schedule probing and return
- * @ap: Port to probe
- *
- * For batch scheduling of probe for sas attached ata devices, assumes
- * the port has already been through ata_sas_port_init()
- */
-void ata_sas_async_probe(struct ata_port *ap)
-{
- __ata_port_probe(ap);
-}
-EXPORT_SYMBOL_GPL(ata_sas_async_probe);
-
-int ata_sas_sync_probe(struct ata_port *ap)
-{
- return ata_port_probe(ap);
-}
-EXPORT_SYMBOL_GPL(ata_sas_sync_probe);
-
-
-/**
- * ata_sas_port_init - Initialize a SATA device
- * @ap: SATA port to initialize
- *
- * LOCKING:
- * PCI/etc. bus probe sem.
- *
- * RETURNS:
- * Zero on success, non-zero on error.
- */
-
-int ata_sas_port_init(struct ata_port *ap)
-{
- int rc = ap->ops->port_start(ap);
-
- if (rc)
- return rc;
- ap->print_id = atomic_inc_return(&ata_print_id);
- return 0;
-}
-EXPORT_SYMBOL_GPL(ata_sas_port_init);
-
-int ata_sas_tport_add(struct device *parent, struct ata_port *ap)
-{
- return ata_tport_add(parent, ap);
-}
-EXPORT_SYMBOL_GPL(ata_sas_tport_add);
-
-void ata_sas_tport_delete(struct ata_port *ap)
-{
- ata_tport_delete(ap);
-}
-EXPORT_SYMBOL_GPL(ata_sas_tport_delete);
-
-/**
- * ata_sas_port_destroy - Destroy a SATA port allocated by ata_sas_port_alloc
- * @ap: SATA port to destroy
- *
- */
-
-void ata_sas_port_destroy(struct ata_port *ap)
-{
- if (ap->ops->port_stop)
- ap->ops->port_stop(ap);
- kfree(ap);
-}
-EXPORT_SYMBOL_GPL(ata_sas_port_destroy);
-
-/**
- * ata_sas_slave_configure - Default slave_config routine for libata devices
- * @sdev: SCSI device to configure
- * @ap: ATA port to which SCSI device is attached
- *
- * RETURNS:
- * Zero.
- */
-
-int ata_sas_slave_configure(struct scsi_device *sdev, struct ata_port *ap)
-{
- ata_scsi_sdev_config(sdev);
- ata_scsi_dev_config(sdev, ap->link.device);
- return 0;
-}
-EXPORT_SYMBOL_GPL(ata_sas_slave_configure);
-
-/**
- * ata_sas_queuecmd - Issue SCSI cdb to libata-managed device
- * @cmd: SCSI command to be sent
- * @ap: ATA port to which the command is being sent
- *
- * RETURNS:
- * Return value from __ata_scsi_queuecmd() if @cmd can be queued,
- * 0 otherwise.
- */
-
-int ata_sas_queuecmd(struct scsi_cmnd *cmd, struct ata_port *ap)
-{
- int rc = 0;
-
- ata_scsi_dump_cdb(ap, cmd);
-
- if (likely(ata_dev_enabled(ap->link.device)))
- rc = __ata_scsi_queuecmd(cmd, ap->link.device);
- else {
- cmd->result = (DID_BAD_TARGET << 16);
- cmd->scsi_done(cmd);
- }
- return rc;
-}
-EXPORT_SYMBOL_GPL(ata_sas_queuecmd);
-
-int ata_sas_allocate_tag(struct ata_port *ap)
-{
- unsigned int max_queue = ap->host->n_tags;
- unsigned int i, tag;
-
- for (i = 0, tag = ap->sas_last_tag + 1; i < max_queue; i++, tag++) {
- tag = tag < max_queue ? tag : 0;
-
- /* the last tag is reserved for internal command. */
- if (ata_tag_internal(tag))
- continue;
-
- if (!test_and_set_bit(tag, &ap->sas_tag_allocated)) {
- ap->sas_last_tag = tag;
- return tag;
- }
- }
- return -1;
-}
-
-void ata_sas_free_tag(unsigned int tag, struct ata_port *ap)
-{
- clear_bit(tag, &ap->sas_tag_allocated);
-}
diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index 038db94216a9..ae7189d1a568 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -2,10 +2,6 @@
/*
* libata-sff.c - helper library for PCI IDE BMDMA
*
- * Maintained by: Tejun Heo <tj@kernel.org>
- * Please ALWAYS copy linux-ide@vger.kernel.org
- * on emails.
- *
* Copyright 2003-2006 Red Hat, Inc. All rights reserved.
* Copyright 2003-2006 Jeff Garzik
*
diff --git a/drivers/ata/libata-transport.c b/drivers/ata/libata-transport.c
index 12a505bb9c5b..6a40e3c6cf49 100644
--- a/drivers/ata/libata-transport.c
+++ b/drivers/ata/libata-transport.c
@@ -208,7 +208,7 @@ show_ata_port_##name(struct device *dev, \
{ \
struct ata_port *ap = transport_class_to_port(dev); \
\
- return snprintf(buf, 20, format_string, cast ap->field); \
+ return scnprintf(buf, 20, format_string, cast ap->field); \
}
#define ata_port_simple_attr(field, name, format_string, type) \
@@ -479,7 +479,7 @@ show_ata_dev_##field(struct device *dev, \
{ \
struct ata_device *ata_dev = transport_class_to_dev(dev); \
\
- return snprintf(buf, 20, format_string, cast ata_dev->field); \
+ return scnprintf(buf, 20, format_string, cast ata_dev->field); \
}
#define ata_dev_simple_attr(field, format_string, type) \
@@ -533,7 +533,7 @@ show_ata_dev_id(struct device *dev,
if (ata_dev->class == ATA_DEV_PMP)
return 0;
for(i=0;i<ATA_ID_WORDS;i++) {
- written += snprintf(buf+written, 20, "%04x%c",
+ written += scnprintf(buf+written, 20, "%04x%c",
ata_dev->id[i],
((i+1) & 7) ? ' ' : '\n');
}
@@ -552,7 +552,7 @@ show_ata_dev_gscr(struct device *dev,
if (ata_dev->class != ATA_DEV_PMP)
return 0;
for(i=0;i<SATA_PMP_GSCR_DWORDS;i++) {
- written += snprintf(buf+written, 20, "%08x%c",
+ written += scnprintf(buf+written, 20, "%08x%c",
ata_dev->gscr[i],
((i+1) & 3) ? ' ' : '\n');
}
@@ -581,7 +581,7 @@ show_ata_dev_trim(struct device *dev,
else
mode = "unqueued";
- return snprintf(buf, 20, "%s\n", mode);
+ return scnprintf(buf, 20, "%s\n", mode);
}
static DEVICE_ATTR(trim, S_IRUGO, show_ata_dev_trim, NULL);
diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h
index cd8090ad43e5..68cdd81d747c 100644
--- a/drivers/ata/libata.h
+++ b/drivers/ata/libata.h
@@ -37,7 +37,11 @@ extern int libata_noacpi;
extern int libata_allow_tpm;
extern const struct device_type ata_port_type;
extern struct ata_link *ata_dev_phys_link(struct ata_device *dev);
+#ifdef CONFIG_ATA_FORCE
extern void ata_force_cbl(struct ata_port *ap);
+#else
+static inline void ata_force_cbl(struct ata_port *ap) { }
+#endif
extern u64 ata_tf_to_lba(const struct ata_taskfile *tf);
extern u64 ata_tf_to_lba48(const struct ata_taskfile *tf);
extern struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev, int tag);
@@ -87,6 +91,18 @@ extern unsigned int ata_read_log_page(struct ata_device *dev, u8 log,
#define to_ata_port(d) container_of(d, struct ata_port, tdev)
+/* libata-sata.c */
+#ifdef CONFIG_SATA_HOST
+int ata_sas_allocate_tag(struct ata_port *ap);
+void ata_sas_free_tag(unsigned int tag, struct ata_port *ap);
+#else
+static inline int ata_sas_allocate_tag(struct ata_port *ap)
+{
+ return -EOPNOTSUPP;
+}
+static inline void ata_sas_free_tag(unsigned int tag, struct ata_port *ap) { }
+#endif
+
/* libata-acpi.c */
#ifdef CONFIG_ATA_ACPI
extern unsigned int ata_acpi_gtf_filter;
@@ -112,6 +128,8 @@ static inline void ata_acpi_bind_dev(struct ata_device *dev) {}
#endif
/* libata-scsi.c */
+extern struct ata_device *ata_scsi_find_dev(struct ata_port *ap,
+ const struct scsi_device *scsidev);
extern int ata_scsi_add_hosts(struct ata_host *host,
struct scsi_host_template *sht);
extern void ata_scsi_scan_host(struct ata_port *ap, int sync);
@@ -128,9 +146,10 @@ extern void ata_scsi_dev_rescan(struct work_struct *work);
extern int ata_bus_probe(struct ata_port *ap);
extern int ata_scsi_user_scan(struct Scsi_Host *shost, unsigned int channel,
unsigned int id, u64 lun);
-int ata_sas_allocate_tag(struct ata_port *ap);
-void ata_sas_free_tag(unsigned int tag, struct ata_port *ap);
-
+void ata_scsi_sdev_config(struct scsi_device *sdev);
+int ata_scsi_dev_config(struct scsi_device *sdev, struct ata_device *dev);
+void ata_scsi_dump_cdb(struct ata_port *ap, struct scsi_cmnd *cmd);
+int __ata_scsi_queuecmd(struct scsi_cmnd *scmd, struct ata_device *dev);
/* libata-eh.c */
extern unsigned long ata_internal_cmd_timeout(struct ata_device *dev, u8 cmd);
diff --git a/drivers/ata/sata_promise.c b/drivers/ata/sata_promise.c
index c451d7d1c817..8729f78cef5f 100644
--- a/drivers/ata/sata_promise.c
+++ b/drivers/ata/sata_promise.c
@@ -157,7 +157,6 @@ static int pdc_sata_hardreset(struct ata_link *link, unsigned int *class,
static void pdc_error_handler(struct ata_port *ap);
static void pdc_post_internal_cmd(struct ata_queued_cmd *qc);
static int pdc_pata_cable_detect(struct ata_port *ap);
-static int pdc_sata_cable_detect(struct ata_port *ap);
static struct scsi_host_template pdc_ata_sht = {
ATA_BASE_SHT(DRV_NAME),
@@ -183,7 +182,7 @@ static const struct ata_port_operations pdc_common_ops = {
static struct ata_port_operations pdc_sata_ops = {
.inherits = &pdc_common_ops,
- .cable_detect = pdc_sata_cable_detect,
+ .cable_detect = ata_cable_sata,
.freeze = pdc_sata_freeze,
.thaw = pdc_sata_thaw,
.scr_read = pdc_sata_scr_read,
@@ -459,11 +458,6 @@ static int pdc_pata_cable_detect(struct ata_port *ap)
return ATA_CBL_PATA80;
}
-static int pdc_sata_cable_detect(struct ata_port *ap)
-{
- return ATA_CBL_SATA;
-}
-
static int pdc_sata_scr_read(struct ata_link *link,
unsigned int sc_reg, u32 *val)
{
diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c
index 8db8c0fb5e2d..7af74fb450a0 100644
--- a/drivers/atm/nicstar.c
+++ b/drivers/atm/nicstar.c
@@ -91,7 +91,7 @@
#ifdef GENERAL_DEBUG
#define PRINTK(args...) printk(args)
#else
-#define PRINTK(args...)
+#define PRINTK(args...) do {} while (0)
#endif /* GENERAL_DEBUG */
#ifdef EXTRA_DEBUG
diff --git a/drivers/auxdisplay/Kconfig b/drivers/auxdisplay/Kconfig
index b8313a04422d..48efa7a047f3 100644
--- a/drivers/auxdisplay/Kconfig
+++ b/drivers/auxdisplay/Kconfig
@@ -111,7 +111,7 @@ config CFAG12864B
If unsure, say N.
config CFAG12864B_RATE
- int "Refresh rate (hertz)"
+ int "Refresh rate (hertz)"
depends on CFAG12864B
default "20"
---help---
@@ -329,7 +329,7 @@ config PANEL_LCD_PROTO
config PANEL_LCD_PIN_E
depends on PANEL_PROFILE="0" && PANEL_LCD="1" && PANEL_LCD_PROTO="0"
- int "Parallel port pin number & polarity connected to the LCD E signal (-17...17) "
+ int "Parallel port pin number & polarity connected to the LCD E signal (-17...17) "
range -17 17
default 14
---help---
@@ -344,7 +344,7 @@ config PANEL_LCD_PIN_E
config PANEL_LCD_PIN_RS
depends on PANEL_PROFILE="0" && PANEL_LCD="1" && PANEL_LCD_PROTO="0"
- int "Parallel port pin number & polarity connected to the LCD RS signal (-17...17) "
+ int "Parallel port pin number & polarity connected to the LCD RS signal (-17...17) "
range -17 17
default 17
---help---
@@ -359,7 +359,7 @@ config PANEL_LCD_PIN_RS
config PANEL_LCD_PIN_RW
depends on PANEL_PROFILE="0" && PANEL_LCD="1" && PANEL_LCD_PROTO="0"
- int "Parallel port pin number & polarity connected to the LCD RW signal (-17...17) "
+ int "Parallel port pin number & polarity connected to the LCD RW signal (-17...17) "
range -17 17
default 16
---help---
@@ -374,7 +374,7 @@ config PANEL_LCD_PIN_RW
config PANEL_LCD_PIN_SCL
depends on PANEL_PROFILE="0" && PANEL_LCD="1" && PANEL_LCD_PROTO!="0"
- int "Parallel port pin number & polarity connected to the LCD SCL signal (-17...17) "
+ int "Parallel port pin number & polarity connected to the LCD SCL signal (-17...17) "
range -17 17
default 1
---help---
@@ -389,7 +389,7 @@ config PANEL_LCD_PIN_SCL
config PANEL_LCD_PIN_SDA
depends on PANEL_PROFILE="0" && PANEL_LCD="1" && PANEL_LCD_PROTO!="0"
- int "Parallel port pin number & polarity connected to the LCD SDA signal (-17...17) "
+ int "Parallel port pin number & polarity connected to the LCD SDA signal (-17...17) "
range -17 17
default 2
---help---
@@ -404,12 +404,12 @@ config PANEL_LCD_PIN_SDA
config PANEL_LCD_PIN_BL
depends on PANEL_PROFILE="0" && PANEL_LCD="1"
- int "Parallel port pin number & polarity connected to the LCD backlight signal (-17...17) "
+ int "Parallel port pin number & polarity connected to the LCD backlight signal (-17...17) "
range -17 17
default 0
---help---
This describes the number of the parallel port pin to which the LCD 'BL' signal
- has been connected. It can be :
+ has been connected. It can be :
0 : no connection (eg: connected to ground)
1..17 : directly connected to any of these pins on the DB25 plug
diff --git a/drivers/auxdisplay/charlcd.c b/drivers/auxdisplay/charlcd.c
index 874c259a8829..c0da3820454b 100644
--- a/drivers/auxdisplay/charlcd.c
+++ b/drivers/auxdisplay/charlcd.c
@@ -88,7 +88,7 @@ struct charlcd_priv {
int len;
} esc_seq;
- unsigned long long drvdata[0];
+ unsigned long long drvdata[];
};
#define charlcd_to_priv(p) container_of(p, struct charlcd_priv, lcd)
diff --git a/drivers/auxdisplay/img-ascii-lcd.c b/drivers/auxdisplay/img-ascii-lcd.c
index efb928e25aef..1cce409ce5ca 100644
--- a/drivers/auxdisplay/img-ascii-lcd.c
+++ b/drivers/auxdisplay/img-ascii-lcd.c
@@ -356,7 +356,6 @@ static int img_ascii_lcd_probe(struct platform_device *pdev)
const struct of_device_id *match;
const struct img_ascii_lcd_config *cfg;
struct img_ascii_lcd_ctx *ctx;
- struct resource *res;
int err;
match = of_match_device(img_ascii_lcd_matches, &pdev->dev);
@@ -378,8 +377,7 @@ static int img_ascii_lcd_probe(struct platform_device *pdev)
&ctx->offset))
return -EINVAL;
} else {
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- ctx->base = devm_ioremap_resource(&pdev->dev, res);
+ ctx->base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(ctx->base))
return PTR_ERR(ctx->base);
}
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index b9f474c11393..4086718f6876 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -97,30 +97,13 @@ static ssize_t phys_index_show(struct device *dev,
}
/*
- * Show whether the memory block is likely to be offlineable (or is already
- * offline). Once offline, the memory block could be removed. The return
- * value does, however, not indicate that there is a way to remove the
- * memory block.
+ * Legacy interface that we cannot remove. Always indicate "removable"
+ * with CONFIG_MEMORY_HOTREMOVE - bad heuristic.
*/
static ssize_t removable_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
- struct memory_block *mem = to_memory_block(dev);
- unsigned long pfn;
- int ret = 1, i;
-
- if (mem->state != MEM_ONLINE)
- goto out;
-
- for (i = 0; i < sections_per_block; i++) {
- if (!present_section_nr(mem->start_section_nr + i))
- continue;
- pfn = section_nr_to_pfn(mem->start_section_nr + i);
- ret &= is_mem_section_removable(pfn, PAGES_PER_SECTION);
- }
-
-out:
- return sprintf(buf, "%d\n", ret);
+ return sprintf(buf, "%d\n", (int)IS_ENABLED(CONFIG_MEMORY_HOTREMOVE));
}
/*
diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 7fa654f1288b..b5ce7b085795 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -363,10 +363,10 @@ static void setup_pdev_dma_masks(struct platform_device *pdev)
{
if (!pdev->dev.coherent_dma_mask)
pdev->dev.coherent_dma_mask = DMA_BIT_MASK(32);
- if (!pdev->dma_mask)
- pdev->dma_mask = DMA_BIT_MASK(32);
- if (!pdev->dev.dma_mask)
- pdev->dev.dma_mask = &pdev->dma_mask;
+ if (!pdev->dev.dma_mask) {
+ pdev->platform_dma_mask = DMA_BIT_MASK(32);
+ pdev->dev.dma_mask = &pdev->platform_dma_mask;
+ }
};
/**
@@ -662,20 +662,8 @@ struct platform_device *platform_device_register_full(
pdev->dev.of_node_reused = pdevinfo->of_node_reused;
if (pdevinfo->dma_mask) {
- /*
- * This memory isn't freed when the device is put,
- * I don't have a nice idea for that though. Conceptually
- * dma_mask in struct device should not be a pointer.
- * See http://thread.gmane.org/gmane.linux.kernel.pci/9081
- */
- pdev->dev.dma_mask =
- kmalloc(sizeof(*pdev->dev.dma_mask), GFP_KERNEL);
- if (!pdev->dev.dma_mask)
- goto err;
-
- kmemleak_ignore(pdev->dev.dma_mask);
-
- *pdev->dev.dma_mask = pdevinfo->dma_mask;
+ pdev->platform_dma_mask = pdevinfo->dma_mask;
+ pdev->dev.dma_mask = &pdev->platform_dma_mask;
pdev->dev.coherent_dma_mask = pdevinfo->dma_mask;
}
@@ -700,7 +688,6 @@ struct platform_device *platform_device_register_full(
if (ret) {
err:
ACPI_COMPANION_SET(&pdev->dev, NULL);
- kfree(pdev->dev.dma_mask);
platform_device_put(pdev);
return ERR_PTR(ret);
}
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index a53cc1e3a2d3..795facd8cf19 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -6,6 +6,9 @@
# Rewritten to use lists instead of if-statements.
#
+# needed for trace events
+ccflags-y += -I$(src)
+
obj-$(CONFIG_MAC_FLOPPY) += swim3.o
obj-$(CONFIG_BLK_DEV_SWIM) += swim_mod.o
obj-$(CONFIG_BLK_DEV_FD) += floppy.o
@@ -39,6 +42,9 @@ obj-$(CONFIG_ZRAM) += zram/
obj-$(CONFIG_BLK_DEV_NULL_BLK) += null_blk.o
null_blk-objs := null_blk_main.o
+ifeq ($(CONFIG_BLK_DEV_ZONED), y)
+null_blk-$(CONFIG_TRACING) += null_blk_trace.o
+endif
null_blk-$(CONFIG_BLK_DEV_ZONED) += null_blk_zoned.o
skd-y := skd_main.o
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index 7b32fb673375..a27804d71e12 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -87,9 +87,9 @@ static ssize_t aoedisk_show_netif(struct device *dev,
if (*nd == NULL)
return snprintf(page, PAGE_SIZE, "none\n");
for (p = page; nd < ne; nd++)
- p += snprintf(p, PAGE_SIZE - (p-page), "%s%s",
+ p += scnprintf(p, PAGE_SIZE - (p-page), "%s%s",
p == page ? "" : ",", (*nd)->name);
- p += snprintf(p, PAGE_SIZE - (p-page), "\n");
+ p += scnprintf(p, PAGE_SIZE - (p-page), "\n");
return p-page;
}
/* firmware version */
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 220c5e18aba0..2fb25c348d53 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -381,12 +381,10 @@ static struct brd_device *brd_alloc(int i)
spin_lock_init(&brd->brd_lock);
INIT_RADIX_TREE(&brd->brd_pages, GFP_ATOMIC);
- brd->brd_queue = blk_alloc_queue(GFP_KERNEL);
+ brd->brd_queue = blk_alloc_queue(brd_make_request, NUMA_NO_NODE);
if (!brd->brd_queue)
goto out_free_dev;
- blk_queue_make_request(brd->brd_queue, brd_make_request);
-
/* This is so fdisk will align partitions on 4k, because of
* direct_access API needing 4k alignment, returning a PFN
* (This is only a problem on very small devices <= 4M,
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index a18155cdce41..c094c3c2c5d4 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2801,7 +2801,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
drbd_init_set_defaults(device);
- q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE);
+ q = blk_alloc_queue(drbd_make_request, NUMA_NO_NODE);
if (!q)
goto out_no_q;
device->rq_queue = q;
@@ -2828,7 +2828,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
q->backing_dev_info->congested_fn = drbd_congested;
q->backing_dev_info->congested_data = device;
- blk_queue_make_request(q, drbd_make_request);
blk_queue_write_cache(q, true, true);
/* Setting the max_hw_sectors to an odd value of 8kibyte here
This triggers a max_bio_size message upon first attach or connect */
@@ -3414,22 +3413,11 @@ int drbd_md_read(struct drbd_device *device, struct drbd_backing_dev *bdev)
* the meta-data super block. This function sets MD_DIRTY, and starts a
* timer that ensures that within five seconds you have to call drbd_md_sync().
*/
-#ifdef DEBUG
-void drbd_md_mark_dirty_(struct drbd_device *device, unsigned int line, const char *func)
-{
- if (!test_and_set_bit(MD_DIRTY, &device->flags)) {
- mod_timer(&device->md_sync_timer, jiffies + HZ);
- device->last_md_mark_dirty.line = line;
- device->last_md_mark_dirty.func = func;
- }
-}
-#else
void drbd_md_mark_dirty(struct drbd_device *device)
{
if (!test_and_set_bit(MD_DIRTY, &device->flags))
mod_timer(&device->md_sync_timer, jiffies + 5*HZ);
}
-#endif
void drbd_uuid_move_history(struct drbd_device *device) __must_hold(local)
{
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 79e216446030..c15e7083b13a 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -33,6 +33,7 @@
#include <linux/random.h>
#include <linux/string.h>
#include <linux/scatterlist.h>
+#include <linux/part_stat.h>
#include "drbd_int.h"
#include "drbd_protocol.h"
#include "drbd_req.h"
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index b7f605c6e231..0dc019da1f8d 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -22,6 +22,7 @@
#include <linux/random.h>
#include <linux/string.h>
#include <linux/scatterlist.h>
+#include <linux/part_stat.h>
#include "drbd_int.h"
#include "drbd_protocol.h"
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 8ef65c085640..c3daa64cb52c 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -171,7 +171,6 @@ static int print_unex = 1;
#include <linux/kernel.h>
#include <linux/timer.h>
#include <linux/workqueue.h>
-#define FDPATCHES
#include <linux/fdreg.h>
#include <linux/fd.h>
#include <linux/hdreg.h>
@@ -306,36 +305,26 @@ static bool initialized;
/* reverse mapping from unit and fdc to drive */
#define REVDRIVE(fdc, unit) ((unit) + ((fdc) << 2))
-#define DP (&drive_params[current_drive])
-#define DRS (&drive_state[current_drive])
-#define DRWE (&write_errors[current_drive])
-#define FDCS (&fdc_state[fdc])
-
-#define UDP (&drive_params[drive])
-#define UDRS (&drive_state[drive])
-#define UDRWE (&write_errors[drive])
-#define UFDCS (&fdc_state[FDC(drive)])
-
#define PH_HEAD(floppy, head) (((((floppy)->stretch & 2) >> 1) ^ head) << 2)
#define STRETCH(floppy) ((floppy)->stretch & FD_STRETCH)
-/* read/write */
-#define COMMAND (raw_cmd->cmd[0])
-#define DR_SELECT (raw_cmd->cmd[1])
-#define TRACK (raw_cmd->cmd[2])
-#define HEAD (raw_cmd->cmd[3])
-#define SECTOR (raw_cmd->cmd[4])
-#define SIZECODE (raw_cmd->cmd[5])
-#define SECT_PER_TRACK (raw_cmd->cmd[6])
-#define GAP (raw_cmd->cmd[7])
-#define SIZECODE2 (raw_cmd->cmd[8])
+/* read/write commands */
+#define COMMAND 0
+#define DR_SELECT 1
+#define TRACK 2
+#define HEAD 3
+#define SECTOR 4
+#define SIZECODE 5
+#define SECT_PER_TRACK 6
+#define GAP 7
+#define SIZECODE2 8
#define NR_RW 9
-/* format */
-#define F_SIZECODE (raw_cmd->cmd[2])
-#define F_SECT_PER_TRACK (raw_cmd->cmd[3])
-#define F_GAP (raw_cmd->cmd[4])
-#define F_FILL (raw_cmd->cmd[5])
+/* format commands */
+#define F_SIZECODE 2
+#define F_SECT_PER_TRACK 3
+#define F_GAP 4
+#define F_FILL 5
#define NR_F 6
/*
@@ -351,14 +340,14 @@ static bool initialized;
#define MAX_REPLIES 16
static unsigned char reply_buffer[MAX_REPLIES];
static int inr; /* size of reply buffer, when called from interrupt */
-#define ST0 (reply_buffer[0])
-#define ST1 (reply_buffer[1])
-#define ST2 (reply_buffer[2])
-#define ST3 (reply_buffer[0]) /* result of GETSTATUS */
-#define R_TRACK (reply_buffer[3])
-#define R_HEAD (reply_buffer[4])
-#define R_SECTOR (reply_buffer[5])
-#define R_SIZECODE (reply_buffer[6])
+#define ST0 0
+#define ST1 1
+#define ST2 2
+#define ST3 0 /* result of GETSTATUS */
+#define R_TRACK 3
+#define R_HEAD 4
+#define R_SECTOR 5
+#define R_SIZECODE 6
#define SEL_DLY (2 * HZ / 100)
@@ -593,7 +582,7 @@ static int buffer_max = -1;
/* fdc related variables, should end up in a struct */
static struct floppy_fdc_state fdc_state[N_FDC];
-static int fdc; /* current fdc */
+static int current_fdc; /* current fdc */
static struct workqueue_struct *floppy_wq;
@@ -604,9 +593,19 @@ static unsigned char fsector_t; /* sector in track */
static unsigned char in_sector_offset; /* offset within physical sector,
* expressed in units of 512 bytes */
+static inline unsigned char fdc_inb(int fdc, int reg)
+{
+ return fd_inb(fdc_state[fdc].address + reg);
+}
+
+static inline void fdc_outb(unsigned char value, int fdc, int reg)
+{
+ fd_outb(value, fdc_state[fdc].address + reg);
+}
+
static inline bool drive_no_geom(int drive)
{
- return !current_type[drive] && !ITYPE(UDRS->fd_device);
+ return !current_type[drive] && !ITYPE(drive_state[drive].fd_device);
}
#ifndef fd_eject
@@ -630,7 +629,7 @@ static inline void set_debugt(void)
static inline void debugt(const char *func, const char *msg)
{
- if (DP->flags & DEBUGT)
+ if (drive_params[current_drive].flags & DEBUGT)
pr_info("%s:%s dtime=%lu\n", func, msg, jiffies - debugtimer);
}
#else
@@ -683,10 +682,10 @@ static void __reschedule_timeout(int drive, const char *message)
delay = 20UL * HZ;
drive = 0;
} else
- delay = UDP->timeout;
+ delay = drive_params[drive].timeout;
mod_delayed_work(floppy_wq, &fd_timeout, delay);
- if (UDP->flags & FD_DEBUG)
+ if (drive_params[drive].flags & FD_DEBUG)
DPRINT("reschedule timeout %s\n", message);
timeout_message = message;
}
@@ -740,33 +739,37 @@ static int disk_change(int drive)
{
int fdc = FDC(drive);
- if (time_before(jiffies, UDRS->select_date + UDP->select_delay))
+ if (time_before(jiffies, drive_state[drive].select_date + drive_params[drive].select_delay))
DPRINT("WARNING disk change called early\n");
- if (!(FDCS->dor & (0x10 << UNIT(drive))) ||
- (FDCS->dor & 3) != UNIT(drive) || fdc != FDC(drive)) {
+ if (!(fdc_state[fdc].dor & (0x10 << UNIT(drive))) ||
+ (fdc_state[fdc].dor & 3) != UNIT(drive) || fdc != FDC(drive)) {
DPRINT("probing disk change on unselected drive\n");
DPRINT("drive=%d fdc=%d dor=%x\n", drive, FDC(drive),
- (unsigned int)FDCS->dor);
+ (unsigned int)fdc_state[fdc].dor);
}
- debug_dcl(UDP->flags,
+ debug_dcl(drive_params[drive].flags,
"checking disk change line for drive %d\n", drive);
- debug_dcl(UDP->flags, "jiffies=%lu\n", jiffies);
- debug_dcl(UDP->flags, "disk change line=%x\n", fd_inb(FD_DIR) & 0x80);
- debug_dcl(UDP->flags, "flags=%lx\n", UDRS->flags);
-
- if (UDP->flags & FD_BROKEN_DCL)
- return test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags);
- if ((fd_inb(FD_DIR) ^ UDP->flags) & 0x80) {
- set_bit(FD_VERIFY_BIT, &UDRS->flags);
+ debug_dcl(drive_params[drive].flags, "jiffies=%lu\n", jiffies);
+ debug_dcl(drive_params[drive].flags, "disk change line=%x\n",
+ fdc_inb(fdc, FD_DIR) & 0x80);
+ debug_dcl(drive_params[drive].flags, "flags=%lx\n",
+ drive_state[drive].flags);
+
+ if (drive_params[drive].flags & FD_BROKEN_DCL)
+ return test_bit(FD_DISK_CHANGED_BIT,
+ &drive_state[drive].flags);
+ if ((fdc_inb(fdc, FD_DIR) ^ drive_params[drive].flags) & 0x80) {
+ set_bit(FD_VERIFY_BIT, &drive_state[drive].flags);
/* verify write protection */
- if (UDRS->maxblock) /* mark it changed */
- set_bit(FD_DISK_CHANGED_BIT, &UDRS->flags);
+ if (drive_state[drive].maxblock) /* mark it changed */
+ set_bit(FD_DISK_CHANGED_BIT,
+ &drive_state[drive].flags);
/* invalidate its geometry */
- if (UDRS->keep_data >= 0) {
- if ((UDP->flags & FTD_MSG) &&
+ if (drive_state[drive].keep_data >= 0) {
+ if ((drive_params[drive].flags & FTD_MSG) &&
current_type[drive] != NULL)
DPRINT("Disk type is undefined after disk change\n");
current_type[drive] = NULL;
@@ -775,8 +778,8 @@ static int disk_change(int drive)
return 1;
} else {
- UDRS->last_checked = jiffies;
- clear_bit(FD_DISK_NEWCHANGE_BIT, &UDRS->flags);
+ drive_state[drive].last_checked = jiffies;
+ clear_bit(FD_DISK_NEWCHANGE_BIT, &drive_state[drive].flags);
}
return 0;
}
@@ -799,26 +802,26 @@ static int set_dor(int fdc, char mask, char data)
unsigned char newdor;
unsigned char olddor;
- if (FDCS->address == -1)
+ if (fdc_state[fdc].address == -1)
return -1;
- olddor = FDCS->dor;
+ olddor = fdc_state[fdc].dor;
newdor = (olddor & mask) | data;
if (newdor != olddor) {
unit = olddor & 0x3;
if (is_selected(olddor, unit) && !is_selected(newdor, unit)) {
drive = REVDRIVE(fdc, unit);
- debug_dcl(UDP->flags,
+ debug_dcl(drive_params[drive].flags,
"calling disk change from set_dor\n");
disk_change(drive);
}
- FDCS->dor = newdor;
- fd_outb(newdor, FD_DOR);
+ fdc_state[fdc].dor = newdor;
+ fdc_outb(newdor, fdc, FD_DOR);
unit = newdor & 0x3;
if (!is_selected(olddor, unit) && is_selected(newdor, unit)) {
drive = REVDRIVE(fdc, unit);
- UDRS->select_date = jiffies;
+ drive_state[drive].select_date = jiffies;
}
}
return olddor;
@@ -826,11 +829,12 @@ static int set_dor(int fdc, char mask, char data)
static void twaddle(void)
{
- if (DP->select_delay)
+ if (drive_params[current_drive].select_delay)
return;
- fd_outb(FDCS->dor & ~(0x10 << UNIT(current_drive)), FD_DOR);
- fd_outb(FDCS->dor, FD_DOR);
- DRS->select_date = jiffies;
+ fdc_outb(fdc_state[current_fdc].dor & ~(0x10 << UNIT(current_drive)),
+ current_fdc, FD_DOR);
+ fdc_outb(fdc_state[current_fdc].dor, current_fdc, FD_DOR);
+ drive_state[current_drive].select_date = jiffies;
}
/*
@@ -841,19 +845,20 @@ static void reset_fdc_info(int mode)
{
int drive;
- FDCS->spec1 = FDCS->spec2 = -1;
- FDCS->need_configure = 1;
- FDCS->perp_mode = 1;
- FDCS->rawcmd = 0;
+ fdc_state[current_fdc].spec1 = fdc_state[current_fdc].spec2 = -1;
+ fdc_state[current_fdc].need_configure = 1;
+ fdc_state[current_fdc].perp_mode = 1;
+ fdc_state[current_fdc].rawcmd = 0;
for (drive = 0; drive < N_DRIVE; drive++)
- if (FDC(drive) == fdc && (mode || UDRS->track != NEED_1_RECAL))
- UDRS->track = NEED_2_RECAL;
+ if (FDC(drive) == current_fdc &&
+ (mode || drive_state[drive].track != NEED_1_RECAL))
+ drive_state[drive].track = NEED_2_RECAL;
}
/* selects the fdc and drive, and enables the fdc's input/dma. */
static void set_fdc(int drive)
{
- unsigned int new_fdc = fdc;
+ unsigned int new_fdc = current_fdc;
if (drive >= 0 && drive < N_DRIVE) {
new_fdc = FDC(drive);
@@ -863,15 +868,15 @@ static void set_fdc(int drive)
pr_info("bad fdc value\n");
return;
}
- fdc = new_fdc;
- set_dor(fdc, ~0, 8);
+ current_fdc = new_fdc;
+ set_dor(current_fdc, ~0, 8);
#if N_FDC > 1
- set_dor(1 - fdc, ~8, 0);
+ set_dor(1 - current_fdc, ~8, 0);
#endif
- if (FDCS->rawcmd == 2)
+ if (fdc_state[current_fdc].rawcmd == 2)
reset_fdc_info(1);
- if (fd_inb(FD_STATUS) != STATUS_READY)
- FDCS->reset = 1;
+ if (fdc_inb(current_fdc, FD_STATUS) != STATUS_READY)
+ fdc_state[current_fdc].reset = 1;
}
/* locks the driver */
@@ -924,19 +929,19 @@ static void floppy_off(unsigned int drive)
unsigned long volatile delta;
int fdc = FDC(drive);
- if (!(FDCS->dor & (0x10 << UNIT(drive))))
+ if (!(fdc_state[fdc].dor & (0x10 << UNIT(drive))))
return;
del_timer(motor_off_timer + drive);
/* make spindle stop in a position which minimizes spinup time
* next time */
- if (UDP->rps) {
- delta = jiffies - UDRS->first_read_date + HZ -
- UDP->spindown_offset;
- delta = ((delta * UDP->rps) % HZ) / UDP->rps;
+ if (drive_params[drive].rps) {
+ delta = jiffies - drive_state[drive].first_read_date + HZ -
+ drive_params[drive].spindown_offset;
+ delta = ((delta * drive_params[drive].rps) % HZ) / drive_params[drive].rps;
motor_off_timer[drive].expires =
- jiffies + UDP->spindown - delta;
+ jiffies + drive_params[drive].spindown - delta;
}
add_timer(motor_off_timer + drive);
}
@@ -952,20 +957,20 @@ static void scandrives(void)
int drive;
int saved_drive;
- if (DP->select_delay)
+ if (drive_params[current_drive].select_delay)
return;
saved_drive = current_drive;
for (i = 0; i < N_DRIVE; i++) {
drive = (saved_drive + i + 1) % N_DRIVE;
- if (UDRS->fd_ref == 0 || UDP->select_delay != 0)
+ if (drive_state[drive].fd_ref == 0 || drive_params[drive].select_delay != 0)
continue; /* skip closed drives */
set_fdc(drive);
- if (!(set_dor(fdc, ~3, UNIT(drive) | (0x10 << UNIT(drive))) &
+ if (!(set_dor(current_fdc, ~3, UNIT(drive) | (0x10 << UNIT(drive))) &
(0x10 << UNIT(drive))))
/* switch the motor off again, if it was off to
* begin with */
- set_dor(fdc, ~(0x10 << UNIT(drive)), 0);
+ set_dor(current_fdc, ~(0x10 << UNIT(drive)), 0);
}
set_fdc(saved_drive);
}
@@ -1011,7 +1016,8 @@ static void cancel_activity(void)
* transfer */
static void fd_watchdog(void)
{
- debug_dcl(DP->flags, "calling disk change from watchdog\n");
+ debug_dcl(drive_params[current_drive].flags,
+ "calling disk change from watchdog\n");
if (disk_change(current_drive)) {
DPRINT("disk removed during i/o\n");
@@ -1035,7 +1041,7 @@ static void main_command_interrupt(void)
static int fd_wait_for_completion(unsigned long expires,
void (*function)(void))
{
- if (FDCS->reset) {
+ if (fdc_state[current_fdc].reset) {
reset_fdc(); /* do the reset during sleep to win time
* if we don't need to sleep, it's a good
* occasion anyways */
@@ -1063,13 +1069,13 @@ static void setup_DMA(void)
pr_cont("%x,", raw_cmd->cmd[i]);
pr_cont("\n");
cont->done(0);
- FDCS->reset = 1;
+ fdc_state[current_fdc].reset = 1;
return;
}
if (((unsigned long)raw_cmd->kernel_data) % 512) {
pr_info("non aligned address: %p\n", raw_cmd->kernel_data);
cont->done(0);
- FDCS->reset = 1;
+ fdc_state[current_fdc].reset = 1;
return;
}
f = claim_dma_lock();
@@ -1077,10 +1083,11 @@ static void setup_DMA(void)
#ifdef fd_dma_setup
if (fd_dma_setup(raw_cmd->kernel_data, raw_cmd->length,
(raw_cmd->flags & FD_RAW_READ) ?
- DMA_MODE_READ : DMA_MODE_WRITE, FDCS->address) < 0) {
+ DMA_MODE_READ : DMA_MODE_WRITE,
+ fdc_state[current_fdc].address) < 0) {
release_dma_lock(f);
cont->done(0);
- FDCS->reset = 1;
+ fdc_state[current_fdc].reset = 1;
return;
}
release_dma_lock(f);
@@ -1091,7 +1098,7 @@ static void setup_DMA(void)
DMA_MODE_READ : DMA_MODE_WRITE);
fd_set_dma_addr(raw_cmd->kernel_data);
fd_set_dma_count(raw_cmd->length);
- virtual_dma_port = FDCS->address;
+ virtual_dma_port = fdc_state[current_fdc].address;
fd_enable_dma();
release_dma_lock(f);
#endif
@@ -1105,18 +1112,18 @@ static int wait_til_ready(void)
int status;
int counter;
- if (FDCS->reset)
+ if (fdc_state[current_fdc].reset)
return -1;
for (counter = 0; counter < 10000; counter++) {
- status = fd_inb(FD_STATUS);
+ status = fdc_inb(current_fdc, FD_STATUS);
if (status & STATUS_READY)
return status;
}
if (initialized) {
- DPRINT("Getstatus times out (%x) on fdc %d\n", status, fdc);
+ DPRINT("Getstatus times out (%x) on fdc %d\n", status, current_fdc);
show_floppy();
}
- FDCS->reset = 1;
+ fdc_state[current_fdc].reset = 1;
return -1;
}
@@ -1129,17 +1136,17 @@ static int output_byte(char byte)
return -1;
if (is_ready_state(status)) {
- fd_outb(byte, FD_DATA);
+ fdc_outb(byte, current_fdc, FD_DATA);
output_log[output_log_pos].data = byte;
output_log[output_log_pos].status = status;
output_log[output_log_pos].jiffies = jiffies;
output_log_pos = (output_log_pos + 1) % OLOGSIZE;
return 0;
}
- FDCS->reset = 1;
+ fdc_state[current_fdc].reset = 1;
if (initialized) {
DPRINT("Unable to send byte %x to FDC. Fdc=%x Status=%x\n",
- byte, fdc, status);
+ byte, current_fdc, status);
show_floppy();
}
return -1;
@@ -1162,16 +1169,16 @@ static int result(void)
return i;
}
if (status == (STATUS_DIR | STATUS_READY | STATUS_BUSY))
- reply_buffer[i] = fd_inb(FD_DATA);
+ reply_buffer[i] = fdc_inb(current_fdc, FD_DATA);
else
break;
}
if (initialized) {
DPRINT("get result error. Fdc=%d Last status=%x Read bytes=%d\n",
- fdc, status, i);
+ current_fdc, status, i);
show_floppy();
}
- FDCS->reset = 1;
+ fdc_state[current_fdc].reset = 1;
return -1;
}
@@ -1208,7 +1215,7 @@ static void perpendicular_mode(void)
default:
DPRINT("Invalid data rate for perpendicular mode!\n");
cont->done(0);
- FDCS->reset = 1;
+ fdc_state[current_fdc].reset = 1;
/*
* convenient way to return to
* redo without too much hassle
@@ -1219,12 +1226,12 @@ static void perpendicular_mode(void)
} else
perp_mode = 0;
- if (FDCS->perp_mode == perp_mode)
+ if (fdc_state[current_fdc].perp_mode == perp_mode)
return;
- if (FDCS->version >= FDC_82077_ORIG) {
+ if (fdc_state[current_fdc].version >= FDC_82077_ORIG) {
output_byte(FD_PERPENDICULAR);
output_byte(perp_mode);
- FDCS->perp_mode = perp_mode;
+ fdc_state[current_fdc].perp_mode = perp_mode;
} else if (perp_mode) {
DPRINT("perpendicular mode not supported by this FDC.\n");
}
@@ -1279,9 +1286,10 @@ static void fdc_specify(void)
int hlt_max_code = 0x7f;
int hut_max_code = 0xf;
- if (FDCS->need_configure && FDCS->version >= FDC_82072A) {
+ if (fdc_state[current_fdc].need_configure &&
+ fdc_state[current_fdc].version >= FDC_82072A) {
fdc_configure();
- FDCS->need_configure = 0;
+ fdc_state[current_fdc].need_configure = 0;
}
switch (raw_cmd->rate & 0x03) {
@@ -1290,7 +1298,7 @@ static void fdc_specify(void)
break;
case 1:
dtr = 300;
- if (FDCS->version >= FDC_82078) {
+ if (fdc_state[current_fdc].version >= FDC_82078) {
/* chose the default rate table, not the one
* where 1 = 2 Mbps */
output_byte(FD_DRIVESPEC);
@@ -1305,27 +1313,30 @@ static void fdc_specify(void)
break;
}
- if (FDCS->version >= FDC_82072) {
+ if (fdc_state[current_fdc].version >= FDC_82072) {
scale_dtr = dtr;
hlt_max_code = 0x00; /* 0==256msec*dtr0/dtr (not linear!) */
hut_max_code = 0x0; /* 0==256msec*dtr0/dtr (not linear!) */
}
/* Convert step rate from microseconds to milliseconds and 4 bits */
- srt = 16 - DIV_ROUND_UP(DP->srt * scale_dtr / 1000, NOMINAL_DTR);
+ srt = 16 - DIV_ROUND_UP(drive_params[current_drive].srt * scale_dtr / 1000,
+ NOMINAL_DTR);
if (slow_floppy)
srt = srt / 4;
SUPBOUND(srt, 0xf);
INFBOUND(srt, 0);
- hlt = DIV_ROUND_UP(DP->hlt * scale_dtr / 2, NOMINAL_DTR);
+ hlt = DIV_ROUND_UP(drive_params[current_drive].hlt * scale_dtr / 2,
+ NOMINAL_DTR);
if (hlt < 0x01)
hlt = 0x01;
else if (hlt > 0x7f)
hlt = hlt_max_code;
- hut = DIV_ROUND_UP(DP->hut * scale_dtr / 16, NOMINAL_DTR);
+ hut = DIV_ROUND_UP(drive_params[current_drive].hut * scale_dtr / 16,
+ NOMINAL_DTR);
if (hut < 0x1)
hut = 0x1;
else if (hut > 0xf)
@@ -1335,11 +1346,12 @@ static void fdc_specify(void)
spec2 = (hlt << 1) | (use_virtual_dma & 1);
/* If these parameters did not change, just return with success */
- if (FDCS->spec1 != spec1 || FDCS->spec2 != spec2) {
+ if (fdc_state[current_fdc].spec1 != spec1 ||
+ fdc_state[current_fdc].spec2 != spec2) {
/* Go ahead and set spec1 and spec2 */
output_byte(FD_SPECIFY);
- output_byte(FDCS->spec1 = spec1);
- output_byte(FDCS->spec2 = spec2);
+ output_byte(fdc_state[current_fdc].spec1 = spec1);
+ output_byte(fdc_state[current_fdc].spec2 = spec2);
}
} /* fdc_specify */
@@ -1350,52 +1362,55 @@ static void fdc_specify(void)
static int fdc_dtr(void)
{
/* If data rate not already set to desired value, set it. */
- if ((raw_cmd->rate & 3) == FDCS->dtr)
+ if ((raw_cmd->rate & 3) == fdc_state[current_fdc].dtr)
return 0;
/* Set dtr */
- fd_outb(raw_cmd->rate & 3, FD_DCR);
+ fdc_outb(raw_cmd->rate & 3, current_fdc, FD_DCR);
/* TODO: some FDC/drive combinations (C&T 82C711 with TEAC 1.2MB)
* need a stabilization period of several milliseconds to be
* enforced after data rate changes before R/W operations.
* Pause 5 msec to avoid trouble. (Needs to be 2 jiffies)
*/
- FDCS->dtr = raw_cmd->rate & 3;
+ fdc_state[current_fdc].dtr = raw_cmd->rate & 3;
return fd_wait_for_completion(jiffies + 2UL * HZ / 100, floppy_ready);
} /* fdc_dtr */
static void tell_sector(void)
{
pr_cont(": track %d, head %d, sector %d, size %d",
- R_TRACK, R_HEAD, R_SECTOR, R_SIZECODE);
+ reply_buffer[R_TRACK], reply_buffer[R_HEAD],
+ reply_buffer[R_SECTOR],
+ reply_buffer[R_SIZECODE]);
} /* tell_sector */
static void print_errors(void)
{
DPRINT("");
- if (ST0 & ST0_ECE) {
+ if (reply_buffer[ST0] & ST0_ECE) {
pr_cont("Recalibrate failed!");
- } else if (ST2 & ST2_CRC) {
+ } else if (reply_buffer[ST2] & ST2_CRC) {
pr_cont("data CRC error");
tell_sector();
- } else if (ST1 & ST1_CRC) {
+ } else if (reply_buffer[ST1] & ST1_CRC) {
pr_cont("CRC error");
tell_sector();
- } else if ((ST1 & (ST1_MAM | ST1_ND)) ||
- (ST2 & ST2_MAM)) {
+ } else if ((reply_buffer[ST1] & (ST1_MAM | ST1_ND)) ||
+ (reply_buffer[ST2] & ST2_MAM)) {
if (!probing) {
pr_cont("sector not found");
tell_sector();
} else
pr_cont("probe failed...");
- } else if (ST2 & ST2_WC) { /* seek error */
+ } else if (reply_buffer[ST2] & ST2_WC) { /* seek error */
pr_cont("wrong cylinder");
- } else if (ST2 & ST2_BC) { /* cylinder marked as bad */
+ } else if (reply_buffer[ST2] & ST2_BC) { /* cylinder marked as bad */
pr_cont("bad cylinder");
} else {
pr_cont("unknown error. ST[0..2] are: 0x%x 0x%x 0x%x",
- ST0, ST1, ST2);
+ reply_buffer[ST0], reply_buffer[ST1],
+ reply_buffer[ST2]);
tell_sector();
}
pr_cont("\n");
@@ -1414,33 +1429,35 @@ static int interpret_errors(void)
if (inr != 7) {
DPRINT("-- FDC reply error\n");
- FDCS->reset = 1;
+ fdc_state[current_fdc].reset = 1;
return 1;
}
/* check IC to find cause of interrupt */
- switch (ST0 & ST0_INTR) {
+ switch (reply_buffer[ST0] & ST0_INTR) {
case 0x40: /* error occurred during command execution */
- if (ST1 & ST1_EOC)
+ if (reply_buffer[ST1] & ST1_EOC)
return 0; /* occurs with pseudo-DMA */
bad = 1;
- if (ST1 & ST1_WP) {
+ if (reply_buffer[ST1] & ST1_WP) {
DPRINT("Drive is write protected\n");
- clear_bit(FD_DISK_WRITABLE_BIT, &DRS->flags);
+ clear_bit(FD_DISK_WRITABLE_BIT,
+ &drive_state[current_drive].flags);
cont->done(0);
bad = 2;
- } else if (ST1 & ST1_ND) {
- set_bit(FD_NEED_TWADDLE_BIT, &DRS->flags);
- } else if (ST1 & ST1_OR) {
- if (DP->flags & FTD_MSG)
+ } else if (reply_buffer[ST1] & ST1_ND) {
+ set_bit(FD_NEED_TWADDLE_BIT,
+ &drive_state[current_drive].flags);
+ } else if (reply_buffer[ST1] & ST1_OR) {
+ if (drive_params[current_drive].flags & FTD_MSG)
DPRINT("Over/Underrun - retrying\n");
bad = 0;
- } else if (*errors >= DP->max_errors.reporting) {
+ } else if (*errors >= drive_params[current_drive].max_errors.reporting) {
print_errors();
}
- if (ST2 & ST2_WC || ST2 & ST2_BC)
+ if (reply_buffer[ST2] & ST2_WC || reply_buffer[ST2] & ST2_BC)
/* wrong cylinder => recal */
- DRS->track = NEED_2_RECAL;
+ drive_state[current_drive].track = NEED_2_RECAL;
return bad;
case 0x80: /* invalid command given */
DPRINT("Invalid FDC command given!\n");
@@ -1473,13 +1490,13 @@ static void setup_rw_floppy(void)
flags |= FD_RAW_INTR;
if ((flags & FD_RAW_SPIN) && !(flags & FD_RAW_NO_MOTOR)) {
- ready_date = DRS->spinup_date + DP->spinup;
+ ready_date = drive_state[current_drive].spinup_date + drive_params[current_drive].spinup;
/* If spinup will take a long time, rerun scandrives
* again just before spinup completion. Beware that
* after scandrives, we must again wait for selection.
*/
- if (time_after(ready_date, jiffies + DP->select_delay)) {
- ready_date -= DP->select_delay;
+ if (time_after(ready_date, jiffies + drive_params[current_drive].select_delay)) {
+ ready_date -= drive_params[current_drive].select_delay;
function = floppy_start;
} else
function = setup_rw_floppy;
@@ -1522,44 +1539,52 @@ static int blind_seek;
static void seek_interrupt(void)
{
debugt(__func__, "");
- if (inr != 2 || (ST0 & 0xF8) != 0x20) {
+ if (inr != 2 || (reply_buffer[ST0] & 0xF8) != 0x20) {
DPRINT("seek failed\n");
- DRS->track = NEED_2_RECAL;
+ drive_state[current_drive].track = NEED_2_RECAL;
cont->error();
cont->redo();
return;
}
- if (DRS->track >= 0 && DRS->track != ST1 && !blind_seek) {
- debug_dcl(DP->flags,
+ if (drive_state[current_drive].track >= 0 &&
+ drive_state[current_drive].track != reply_buffer[ST1] &&
+ !blind_seek) {
+ debug_dcl(drive_params[current_drive].flags,
"clearing NEWCHANGE flag because of effective seek\n");
- debug_dcl(DP->flags, "jiffies=%lu\n", jiffies);
- clear_bit(FD_DISK_NEWCHANGE_BIT, &DRS->flags);
+ debug_dcl(drive_params[current_drive].flags, "jiffies=%lu\n",
+ jiffies);
+ clear_bit(FD_DISK_NEWCHANGE_BIT,
+ &drive_state[current_drive].flags);
/* effective seek */
- DRS->select_date = jiffies;
+ drive_state[current_drive].select_date = jiffies;
}
- DRS->track = ST1;
+ drive_state[current_drive].track = reply_buffer[ST1];
floppy_ready();
}
static void check_wp(void)
{
- if (test_bit(FD_VERIFY_BIT, &DRS->flags)) {
+ if (test_bit(FD_VERIFY_BIT, &drive_state[current_drive].flags)) {
/* check write protection */
output_byte(FD_GETSTATUS);
output_byte(UNIT(current_drive));
if (result() != 1) {
- FDCS->reset = 1;
+ fdc_state[current_fdc].reset = 1;
return;
}
- clear_bit(FD_VERIFY_BIT, &DRS->flags);
- clear_bit(FD_NEED_TWADDLE_BIT, &DRS->flags);
- debug_dcl(DP->flags,
+ clear_bit(FD_VERIFY_BIT, &drive_state[current_drive].flags);
+ clear_bit(FD_NEED_TWADDLE_BIT,
+ &drive_state[current_drive].flags);
+ debug_dcl(drive_params[current_drive].flags,
"checking whether disk is write protected\n");
- debug_dcl(DP->flags, "wp=%x\n", ST3 & 0x40);
- if (!(ST3 & 0x40))
- set_bit(FD_DISK_WRITABLE_BIT, &DRS->flags);
+ debug_dcl(drive_params[current_drive].flags, "wp=%x\n",
+ reply_buffer[ST3] & 0x40);
+ if (!(reply_buffer[ST3] & 0x40))
+ set_bit(FD_DISK_WRITABLE_BIT,
+ &drive_state[current_drive].flags);
else
- clear_bit(FD_DISK_WRITABLE_BIT, &DRS->flags);
+ clear_bit(FD_DISK_WRITABLE_BIT,
+ &drive_state[current_drive].flags);
}
}
@@ -1569,32 +1594,34 @@ static void seek_floppy(void)
blind_seek = 0;
- debug_dcl(DP->flags, "calling disk change from %s\n", __func__);
+ debug_dcl(drive_params[current_drive].flags,
+ "calling disk change from %s\n", __func__);
- if (!test_bit(FD_DISK_NEWCHANGE_BIT, &DRS->flags) &&
+ if (!test_bit(FD_DISK_NEWCHANGE_BIT, &drive_state[current_drive].flags) &&
disk_change(current_drive) && (raw_cmd->flags & FD_RAW_NEED_DISK)) {
/* the media changed flag should be cleared after the seek.
* If it isn't, this means that there is really no disk in
* the drive.
*/
- set_bit(FD_DISK_CHANGED_BIT, &DRS->flags);
+ set_bit(FD_DISK_CHANGED_BIT,
+ &drive_state[current_drive].flags);
cont->done(0);
cont->redo();
return;
}
- if (DRS->track <= NEED_1_RECAL) {
+ if (drive_state[current_drive].track <= NEED_1_RECAL) {
recalibrate_floppy();
return;
- } else if (test_bit(FD_DISK_NEWCHANGE_BIT, &DRS->flags) &&
+ } else if (test_bit(FD_DISK_NEWCHANGE_BIT, &drive_state[current_drive].flags) &&
(raw_cmd->flags & FD_RAW_NEED_DISK) &&
- (DRS->track <= NO_TRACK || DRS->track == raw_cmd->track)) {
+ (drive_state[current_drive].track <= NO_TRACK || drive_state[current_drive].track == raw_cmd->track)) {
/* we seek to clear the media-changed condition. Does anybody
* know a more elegant way, which works on all drives? */
if (raw_cmd->track)
track = raw_cmd->track - 1;
else {
- if (DP->flags & FD_SILENT_DCL_CLEAR) {
- set_dor(fdc, ~(0x10 << UNIT(current_drive)), 0);
+ if (drive_params[current_drive].flags & FD_SILENT_DCL_CLEAR) {
+ set_dor(current_fdc, ~(0x10 << UNIT(current_drive)), 0);
blind_seek = 1;
raw_cmd->flags |= FD_RAW_NEED_SEEK;
}
@@ -1602,7 +1629,7 @@ static void seek_floppy(void)
}
} else {
check_wp();
- if (raw_cmd->track != DRS->track &&
+ if (raw_cmd->track != drive_state[current_drive].track &&
(raw_cmd->flags & FD_RAW_NEED_SEEK))
track = raw_cmd->track;
else {
@@ -1625,9 +1652,9 @@ static void recal_interrupt(void)
{
debugt(__func__, "");
if (inr != 2)
- FDCS->reset = 1;
- else if (ST0 & ST0_ECE) {
- switch (DRS->track) {
+ fdc_state[current_fdc].reset = 1;
+ else if (reply_buffer[ST0] & ST0_ECE) {
+ switch (drive_state[current_drive].track) {
case NEED_1_RECAL:
debugt(__func__, "need 1 recal");
/* after a second recalibrate, we still haven't
@@ -1645,11 +1672,12 @@ static void recal_interrupt(void)
* not to move at recalibration is to
* be already at track 0.) Clear the
* new change flag */
- debug_dcl(DP->flags,
+ debug_dcl(drive_params[current_drive].flags,
"clearing NEWCHANGE flag because of second recalibrate\n");
- clear_bit(FD_DISK_NEWCHANGE_BIT, &DRS->flags);
- DRS->select_date = jiffies;
+ clear_bit(FD_DISK_NEWCHANGE_BIT,
+ &drive_state[current_drive].flags);
+ drive_state[current_drive].select_date = jiffies;
/* fall through */
default:
debugt(__func__, "default");
@@ -1659,11 +1687,11 @@ static void recal_interrupt(void)
* track 0, this might mean that we
* started beyond track 80. Try
* again. */
- DRS->track = NEED_1_RECAL;
+ drive_state[current_drive].track = NEED_1_RECAL;
break;
}
} else
- DRS->track = ST1;
+ drive_state[current_drive].track = reply_buffer[ST1];
floppy_ready();
}
@@ -1693,20 +1721,20 @@ irqreturn_t floppy_interrupt(int irq, void *dev_id)
release_dma_lock(f);
do_floppy = NULL;
- if (fdc >= N_FDC || FDCS->address == -1) {
+ if (current_fdc >= N_FDC || fdc_state[current_fdc].address == -1) {
/* we don't even know which FDC is the culprit */
pr_info("DOR0=%x\n", fdc_state[0].dor);
- pr_info("floppy interrupt on bizarre fdc %d\n", fdc);
+ pr_info("floppy interrupt on bizarre fdc %d\n", current_fdc);
pr_info("handler=%ps\n", handler);
is_alive(__func__, "bizarre fdc");
return IRQ_NONE;
}
- FDCS->reset = 0;
+ fdc_state[current_fdc].reset = 0;
/* We have to clear the reset flag here, because apparently on boxes
* with level triggered interrupts (PS/2, Sparc, ...), it is needed to
- * emit SENSEI's to clear the interrupt line. And FDCS->reset blocks the
- * emission of the SENSEI's.
+ * emit SENSEI's to clear the interrupt line. And fdc_state[fdc].reset
+ * blocks the emission of the SENSEI's.
* It is OK to emit floppy commands because we are in an interrupt
* handler here, and thus we have to fear no interference of other
* activity.
@@ -1725,11 +1753,11 @@ irqreturn_t floppy_interrupt(int irq, void *dev_id)
if (do_print)
print_result("sensei", inr);
max_sensei--;
- } while ((ST0 & 0x83) != UNIT(current_drive) &&
+ } while ((reply_buffer[ST0] & 0x83) != UNIT(current_drive) &&
inr == 2 && max_sensei);
}
if (!handler) {
- FDCS->reset = 1;
+ fdc_state[current_fdc].reset = 1;
return IRQ_NONE;
}
schedule_bh(handler);
@@ -1755,7 +1783,7 @@ static void reset_interrupt(void)
{
debugt(__func__, "");
result(); /* get the status ready for set_fdc */
- if (FDCS->reset) {
+ if (fdc_state[current_fdc].reset) {
pr_info("reset set in interrupt, calling %ps\n", cont->error);
cont->error(); /* a reset just after a reset. BAD! */
}
@@ -1771,7 +1799,7 @@ static void reset_fdc(void)
unsigned long flags;
do_floppy = reset_interrupt;
- FDCS->reset = 0;
+ fdc_state[current_fdc].reset = 0;
reset_fdc_info(0);
/* Pseudo-DMA may intercept 'reset finished' interrupt. */
@@ -1781,12 +1809,13 @@ static void reset_fdc(void)
fd_disable_dma();
release_dma_lock(flags);
- if (FDCS->version >= FDC_82072A)
- fd_outb(0x80 | (FDCS->dtr & 3), FD_STATUS);
+ if (fdc_state[current_fdc].version >= FDC_82072A)
+ fdc_outb(0x80 | (fdc_state[current_fdc].dtr & 3),
+ current_fdc, FD_STATUS);
else {
- fd_outb(FDCS->dor & ~0x04, FD_DOR);
+ fdc_outb(fdc_state[current_fdc].dor & ~0x04, current_fdc, FD_DOR);
udelay(FD_RESET_DELAY);
- fd_outb(FDCS->dor, FD_DOR);
+ fdc_outb(fdc_state[current_fdc].dor, current_fdc, FD_DOR);
}
}
@@ -1813,7 +1842,7 @@ static void show_floppy(void)
print_hex_dump(KERN_INFO, "", DUMP_PREFIX_NONE, 16, 1,
reply_buffer, resultsize, true);
- pr_info("status=%x\n", fd_inb(FD_STATUS));
+ pr_info("status=%x\n", fdc_inb(current_fdc, FD_STATUS));
pr_info("fdc_busy=%lu\n", fdc_busy);
if (do_floppy)
pr_info("do_floppy=%ps\n", do_floppy);
@@ -1850,7 +1879,7 @@ static void floppy_shutdown(struct work_struct *arg)
if (initialized)
DPRINT("floppy timeout called\n");
- FDCS->reset = 1;
+ fdc_state[current_fdc].reset = 1;
if (cont) {
cont->done(0);
cont->redo(); /* this will recall reset when needed */
@@ -1870,29 +1899,29 @@ static int start_motor(void (*function)(void))
mask = 0xfc;
data = UNIT(current_drive);
if (!(raw_cmd->flags & FD_RAW_NO_MOTOR)) {
- if (!(FDCS->dor & (0x10 << UNIT(current_drive)))) {
+ if (!(fdc_state[current_fdc].dor & (0x10 << UNIT(current_drive)))) {
set_debugt();
/* no read since this drive is running */
- DRS->first_read_date = 0;
+ drive_state[current_drive].first_read_date = 0;
/* note motor start time if motor is not yet running */
- DRS->spinup_date = jiffies;
+ drive_state[current_drive].spinup_date = jiffies;
data |= (0x10 << UNIT(current_drive));
}
- } else if (FDCS->dor & (0x10 << UNIT(current_drive)))
+ } else if (fdc_state[current_fdc].dor & (0x10 << UNIT(current_drive)))
mask &= ~(0x10 << UNIT(current_drive));
/* starts motor and selects floppy */
del_timer(motor_off_timer + current_drive);
- set_dor(fdc, mask, data);
+ set_dor(current_fdc, mask, data);
/* wait_for_completion also schedules reset if needed. */
- return fd_wait_for_completion(DRS->select_date + DP->select_delay,
+ return fd_wait_for_completion(drive_state[current_drive].select_date + drive_params[current_drive].select_delay,
function);
}
static void floppy_ready(void)
{
- if (FDCS->reset) {
+ if (fdc_state[current_fdc].reset) {
reset_fdc();
return;
}
@@ -1901,9 +1930,10 @@ static void floppy_ready(void)
if (fdc_dtr())
return;
- debug_dcl(DP->flags, "calling disk change from floppy_ready\n");
+ debug_dcl(drive_params[current_drive].flags,
+ "calling disk change from floppy_ready\n");
if (!(raw_cmd->flags & FD_RAW_NO_MOTOR) &&
- disk_change(current_drive) && !DP->select_delay)
+ disk_change(current_drive) && !drive_params[current_drive].select_delay)
twaddle(); /* this clears the dcl on certain
* drive/controller combinations */
@@ -1932,8 +1962,9 @@ static void floppy_start(void)
reschedule_timeout(current_reqD, "floppy start");
scandrives();
- debug_dcl(DP->flags, "setting NEWCHANGE in floppy_start\n");
- set_bit(FD_DISK_NEWCHANGE_BIT, &DRS->flags);
+ debug_dcl(drive_params[current_drive].flags,
+ "setting NEWCHANGE in floppy_start\n");
+ set_bit(FD_DISK_NEWCHANGE_BIT, &drive_state[current_drive].flags);
floppy_ready();
}
@@ -1991,7 +2022,7 @@ static int wait_til_done(void (*handler)(void), bool interruptible)
return -EINTR;
}
- if (FDCS->reset)
+ if (fdc_state[current_fdc].reset)
command_status = FD_COMMAND_ERROR;
if (command_status == FD_COMMAND_OKAY)
ret = 0;
@@ -2032,14 +2063,14 @@ static int next_valid_format(void)
{
int probed_format;
- probed_format = DRS->probed_format;
+ probed_format = drive_state[current_drive].probed_format;
while (1) {
- if (probed_format >= 8 || !DP->autodetect[probed_format]) {
- DRS->probed_format = 0;
+ if (probed_format >= 8 || !drive_params[current_drive].autodetect[probed_format]) {
+ drive_state[current_drive].probed_format = 0;
return 1;
}
- if (floppy_type[DP->autodetect[probed_format]].sect) {
- DRS->probed_format = probed_format;
+ if (floppy_type[drive_params[current_drive].autodetect[probed_format]].sect) {
+ drive_state[current_drive].probed_format = probed_format;
return 0;
}
probed_format++;
@@ -2051,23 +2082,23 @@ static void bad_flp_intr(void)
int err_count;
if (probing) {
- DRS->probed_format++;
+ drive_state[current_drive].probed_format++;
if (!next_valid_format())
return;
}
err_count = ++(*errors);
- INFBOUND(DRWE->badness, err_count);
- if (err_count > DP->max_errors.abort)
+ INFBOUND(write_errors[current_drive].badness, err_count);
+ if (err_count > drive_params[current_drive].max_errors.abort)
cont->done(0);
- if (err_count > DP->max_errors.reset)
- FDCS->reset = 1;
- else if (err_count > DP->max_errors.recal)
- DRS->track = NEED_2_RECAL;
+ if (err_count > drive_params[current_drive].max_errors.reset)
+ fdc_state[current_fdc].reset = 1;
+ else if (err_count > drive_params[current_drive].max_errors.recal)
+ drive_state[current_drive].track = NEED_2_RECAL;
}
static void set_floppy(int drive)
{
- int type = ITYPE(UDRS->fd_device);
+ int type = ITYPE(drive_state[drive].fd_device);
if (type)
_floppy = floppy_type + type;
@@ -2113,28 +2144,28 @@ static void setup_format_params(int track)
FD_RAW_NEED_DISK | FD_RAW_NEED_SEEK);
raw_cmd->rate = _floppy->rate & 0x43;
raw_cmd->cmd_count = NR_F;
- COMMAND = FM_MODE(_floppy, FD_FORMAT);
- DR_SELECT = UNIT(current_drive) + PH_HEAD(_floppy, format_req.head);
- F_SIZECODE = FD_SIZECODE(_floppy);
- F_SECT_PER_TRACK = _floppy->sect << 2 >> F_SIZECODE;
- F_GAP = _floppy->fmt_gap;
- F_FILL = FD_FILL_BYTE;
+ raw_cmd->cmd[COMMAND] = FM_MODE(_floppy, FD_FORMAT);
+ raw_cmd->cmd[DR_SELECT] = UNIT(current_drive) + PH_HEAD(_floppy, format_req.head);
+ raw_cmd->cmd[F_SIZECODE] = FD_SIZECODE(_floppy);
+ raw_cmd->cmd[F_SECT_PER_TRACK] = _floppy->sect << 2 >> raw_cmd->cmd[F_SIZECODE];
+ raw_cmd->cmd[F_GAP] = _floppy->fmt_gap;
+ raw_cmd->cmd[F_FILL] = FD_FILL_BYTE;
raw_cmd->kernel_data = floppy_track_buffer;
- raw_cmd->length = 4 * F_SECT_PER_TRACK;
+ raw_cmd->length = 4 * raw_cmd->cmd[F_SECT_PER_TRACK];
- if (!F_SECT_PER_TRACK)
+ if (!raw_cmd->cmd[F_SECT_PER_TRACK])
return;
/* allow for about 30ms for data transport per track */
- head_shift = (F_SECT_PER_TRACK + 5) / 6;
+ head_shift = (raw_cmd->cmd[F_SECT_PER_TRACK] + 5) / 6;
/* a ``cylinder'' is two tracks plus a little stepping time */
track_shift = 2 * head_shift + 3;
/* position of logical sector 1 on this track */
n = (track_shift * format_req.track + head_shift * format_req.head)
- % F_SECT_PER_TRACK;
+ % raw_cmd->cmd[F_SECT_PER_TRACK];
/* determine interleave */
il = 1;
@@ -2142,27 +2173,27 @@ static void setup_format_params(int track)
il++;
/* initialize field */
- for (count = 0; count < F_SECT_PER_TRACK; ++count) {
+ for (count = 0; count < raw_cmd->cmd[F_SECT_PER_TRACK]; ++count) {
here[count].track = format_req.track;
here[count].head = format_req.head;
here[count].sect = 0;
- here[count].size = F_SIZECODE;
+ here[count].size = raw_cmd->cmd[F_SIZECODE];
}
/* place logical sectors */
- for (count = 1; count <= F_SECT_PER_TRACK; ++count) {
+ for (count = 1; count <= raw_cmd->cmd[F_SECT_PER_TRACK]; ++count) {
here[n].sect = count;
- n = (n + il) % F_SECT_PER_TRACK;
+ n = (n + il) % raw_cmd->cmd[F_SECT_PER_TRACK];
if (here[n].sect) { /* sector busy, find next free sector */
++n;
- if (n >= F_SECT_PER_TRACK) {
- n -= F_SECT_PER_TRACK;
+ if (n >= raw_cmd->cmd[F_SECT_PER_TRACK]) {
+ n -= raw_cmd->cmd[F_SECT_PER_TRACK];
while (here[n].sect)
++n;
}
}
}
if (_floppy->stretch & FD_SECTBASEMASK) {
- for (count = 0; count < F_SECT_PER_TRACK; count++)
+ for (count = 0; count < raw_cmd->cmd[F_SECT_PER_TRACK]; count++)
here[count].sect += FD_SECTBASE(_floppy) - 1;
}
}
@@ -2191,7 +2222,7 @@ static int do_format(int drive, struct format_descr *tmp_format_req)
set_floppy(drive);
if (!_floppy ||
- _floppy->track > DP->tracks ||
+ _floppy->track > drive_params[current_drive].tracks ||
tmp_format_req->track >= _floppy->track ||
tmp_format_req->head >= _floppy->head ||
(_floppy->sect << 2) % (1 << FD_SIZECODE(_floppy)) ||
@@ -2253,21 +2284,21 @@ static void request_done(int uptodate)
/* maintain values for invalidation on geometry
* change */
block = current_count_sectors + blk_rq_pos(req);
- INFBOUND(DRS->maxblock, block);
+ INFBOUND(drive_state[current_drive].maxblock, block);
if (block > _floppy->sect)
- DRS->maxtrack = 1;
+ drive_state[current_drive].maxtrack = 1;
floppy_end_request(req, 0);
} else {
if (rq_data_dir(req) == WRITE) {
/* record write error information */
- DRWE->write_errors++;
- if (DRWE->write_errors == 1) {
- DRWE->first_error_sector = blk_rq_pos(req);
- DRWE->first_error_generation = DRS->generation;
+ write_errors[current_drive].write_errors++;
+ if (write_errors[current_drive].write_errors == 1) {
+ write_errors[current_drive].first_error_sector = blk_rq_pos(req);
+ write_errors[current_drive].first_error_generation = drive_state[current_drive].generation;
}
- DRWE->last_error_sector = blk_rq_pos(req);
- DRWE->last_error_generation = DRS->generation;
+ write_errors[current_drive].last_error_sector = blk_rq_pos(req);
+ write_errors[current_drive].last_error_generation = drive_state[current_drive].generation;
}
floppy_end_request(req, BLK_STS_IOERR);
}
@@ -2281,43 +2312,46 @@ static void rw_interrupt(void)
int heads;
int nr_sectors;
- if (R_HEAD >= 2) {
+ if (reply_buffer[R_HEAD] >= 2) {
/* some Toshiba floppy controllers occasionnally seem to
* return bogus interrupts after read/write operations, which
* can be recognized by a bad head number (>= 2) */
return;
}
- if (!DRS->first_read_date)
- DRS->first_read_date = jiffies;
+ if (!drive_state[current_drive].first_read_date)
+ drive_state[current_drive].first_read_date = jiffies;
nr_sectors = 0;
- ssize = DIV_ROUND_UP(1 << SIZECODE, 4);
+ ssize = DIV_ROUND_UP(1 << raw_cmd->cmd[SIZECODE], 4);
- if (ST1 & ST1_EOC)
+ if (reply_buffer[ST1] & ST1_EOC)
eoc = 1;
else
eoc = 0;
- if (COMMAND & 0x80)
+ if (raw_cmd->cmd[COMMAND] & 0x80)
heads = 2;
else
heads = 1;
- nr_sectors = (((R_TRACK - TRACK) * heads +
- R_HEAD - HEAD) * SECT_PER_TRACK +
- R_SECTOR - SECTOR + eoc) << SIZECODE >> 2;
+ nr_sectors = (((reply_buffer[R_TRACK] - raw_cmd->cmd[TRACK]) * heads +
+ reply_buffer[R_HEAD] - raw_cmd->cmd[HEAD]) * raw_cmd->cmd[SECT_PER_TRACK] +
+ reply_buffer[R_SECTOR] - raw_cmd->cmd[SECTOR] + eoc) << raw_cmd->cmd[SIZECODE] >> 2;
if (nr_sectors / ssize >
DIV_ROUND_UP(in_sector_offset + current_count_sectors, ssize)) {
DPRINT("long rw: %x instead of %lx\n",
nr_sectors, current_count_sectors);
- pr_info("rs=%d s=%d\n", R_SECTOR, SECTOR);
- pr_info("rh=%d h=%d\n", R_HEAD, HEAD);
- pr_info("rt=%d t=%d\n", R_TRACK, TRACK);
+ pr_info("rs=%d s=%d\n", reply_buffer[R_SECTOR],
+ raw_cmd->cmd[SECTOR]);
+ pr_info("rh=%d h=%d\n", reply_buffer[R_HEAD],
+ raw_cmd->cmd[HEAD]);
+ pr_info("rt=%d t=%d\n", reply_buffer[R_TRACK],
+ raw_cmd->cmd[TRACK]);
pr_info("heads=%d eoc=%d\n", heads, eoc);
pr_info("spt=%d st=%d ss=%d\n",
- SECT_PER_TRACK, fsector_t, ssize);
+ raw_cmd->cmd[SECT_PER_TRACK], fsector_t, ssize);
pr_info("in_sector_offset=%d\n", in_sector_offset);
}
@@ -2347,7 +2381,7 @@ static void rw_interrupt(void)
}
if (probing) {
- if (DP->flags & FTD_MSG)
+ if (drive_params[current_drive].flags & FTD_MSG)
DPRINT("Auto-detected floppy type %s in fd%d\n",
_floppy->name, current_drive);
current_type[current_drive] = _floppy;
@@ -2355,11 +2389,11 @@ static void rw_interrupt(void)
probing = 0;
}
- if (CT(COMMAND) != FD_READ ||
+ if (CT(raw_cmd->cmd[COMMAND]) != FD_READ ||
raw_cmd->kernel_data == bio_data(current_req->bio)) {
/* transfer directly from buffer */
cont->done(1);
- } else if (CT(COMMAND) == FD_READ) {
+ } else if (CT(raw_cmd->cmd[COMMAND]) == FD_READ) {
buffer_track = raw_cmd->track;
buffer_drive = current_drive;
INFBOUND(buffer_max, nr_sectors + fsector_t);
@@ -2418,13 +2452,13 @@ static void copy_buffer(int ssize, int max_sector, int max_sector_2)
min(max_sector, max_sector_2),
blk_rq_sectors(current_req));
- if (current_count_sectors <= 0 && CT(COMMAND) == FD_WRITE &&
+ if (current_count_sectors <= 0 && CT(raw_cmd->cmd[COMMAND]) == FD_WRITE &&
buffer_max > fsector_t + blk_rq_sectors(current_req))
current_count_sectors = min_t(int, buffer_max - fsector_t,
blk_rq_sectors(current_req));
remaining = current_count_sectors << 9;
- if (remaining > blk_rq_bytes(current_req) && CT(COMMAND) == FD_WRITE) {
+ if (remaining > blk_rq_bytes(current_req) && CT(raw_cmd->cmd[COMMAND]) == FD_WRITE) {
DPRINT("in copy buffer\n");
pr_info("current_count_sectors=%ld\n", current_count_sectors);
pr_info("remaining=%d\n", remaining >> 9);
@@ -2459,16 +2493,16 @@ static void copy_buffer(int ssize, int max_sector, int max_sector_2)
fsector_t, buffer_min);
pr_info("current_count_sectors=%ld\n",
current_count_sectors);
- if (CT(COMMAND) == FD_READ)
+ if (CT(raw_cmd->cmd[COMMAND]) == FD_READ)
pr_info("read\n");
- if (CT(COMMAND) == FD_WRITE)
+ if (CT(raw_cmd->cmd[COMMAND]) == FD_WRITE)
pr_info("write\n");
break;
}
if (((unsigned long)buffer) % 512)
DPRINT("%p buffer not aligned\n", buffer);
- if (CT(COMMAND) == FD_READ)
+ if (CT(raw_cmd->cmd[COMMAND]) == FD_READ)
memcpy(buffer, dma_buffer, size);
else
memcpy(dma_buffer, buffer, size);
@@ -2486,7 +2520,7 @@ static void copy_buffer(int ssize, int max_sector, int max_sector_2)
/* work around a bug in pseudo DMA
* (on some FDCs) pseudo DMA does not stop when the CPU stops
* sending data. Hence we need a different way to signal the
- * transfer length: We use SECT_PER_TRACK. Unfortunately, this
+ * transfer length: We use raw_cmd->cmd[SECT_PER_TRACK]. Unfortunately, this
* does not work with MT, hence we can only transfer one head at
* a time
*/
@@ -2495,18 +2529,18 @@ static void virtualdmabug_workaround(void)
int hard_sectors;
int end_sector;
- if (CT(COMMAND) == FD_WRITE) {
- COMMAND &= ~0x80; /* switch off multiple track mode */
+ if (CT(raw_cmd->cmd[COMMAND]) == FD_WRITE) {
+ raw_cmd->cmd[COMMAND] &= ~0x80; /* switch off multiple track mode */
- hard_sectors = raw_cmd->length >> (7 + SIZECODE);
- end_sector = SECTOR + hard_sectors - 1;
- if (end_sector > SECT_PER_TRACK) {
+ hard_sectors = raw_cmd->length >> (7 + raw_cmd->cmd[SIZECODE]);
+ end_sector = raw_cmd->cmd[SECTOR] + hard_sectors - 1;
+ if (end_sector > raw_cmd->cmd[SECT_PER_TRACK]) {
pr_info("too many sectors %d > %d\n",
- end_sector, SECT_PER_TRACK);
+ end_sector, raw_cmd->cmd[SECT_PER_TRACK]);
return;
}
- SECT_PER_TRACK = end_sector;
- /* make sure SECT_PER_TRACK
+ raw_cmd->cmd[SECT_PER_TRACK] = end_sector;
+ /* make sure raw_cmd->cmd[SECT_PER_TRACK]
* points to end of transfer */
}
}
@@ -2539,10 +2573,10 @@ static int make_raw_rw_request(void)
raw_cmd->cmd_count = NR_RW;
if (rq_data_dir(current_req) == READ) {
raw_cmd->flags |= FD_RAW_READ;
- COMMAND = FM_MODE(_floppy, FD_READ);
+ raw_cmd->cmd[COMMAND] = FM_MODE(_floppy, FD_READ);
} else if (rq_data_dir(current_req) == WRITE) {
raw_cmd->flags |= FD_RAW_WRITE;
- COMMAND = FM_MODE(_floppy, FD_WRITE);
+ raw_cmd->cmd[COMMAND] = FM_MODE(_floppy, FD_WRITE);
} else {
DPRINT("%s: unknown command\n", __func__);
return 0;
@@ -2550,24 +2584,24 @@ static int make_raw_rw_request(void)
max_sector = _floppy->sect * _floppy->head;
- TRACK = (int)blk_rq_pos(current_req) / max_sector;
+ raw_cmd->cmd[TRACK] = (int)blk_rq_pos(current_req) / max_sector;
fsector_t = (int)blk_rq_pos(current_req) % max_sector;
- if (_floppy->track && TRACK >= _floppy->track) {
+ if (_floppy->track && raw_cmd->cmd[TRACK] >= _floppy->track) {
if (blk_rq_cur_sectors(current_req) & 1) {
current_count_sectors = 1;
return 1;
} else
return 0;
}
- HEAD = fsector_t / _floppy->sect;
+ raw_cmd->cmd[HEAD] = fsector_t / _floppy->sect;
if (((_floppy->stretch & (FD_SWAPSIDES | FD_SECTBASEMASK)) ||
- test_bit(FD_NEED_TWADDLE_BIT, &DRS->flags)) &&
+ test_bit(FD_NEED_TWADDLE_BIT, &drive_state[current_drive].flags)) &&
fsector_t < _floppy->sect)
max_sector = _floppy->sect;
/* 2M disks have phantom sectors on the first track */
- if ((_floppy->rate & FD_2M) && (!TRACK) && (!HEAD)) {
+ if ((_floppy->rate & FD_2M) && (!raw_cmd->cmd[TRACK]) && (!raw_cmd->cmd[HEAD])) {
max_sector = 2 * _floppy->sect / 3;
if (fsector_t >= max_sector) {
current_count_sectors =
@@ -2575,23 +2609,24 @@ static int make_raw_rw_request(void)
blk_rq_sectors(current_req));
return 1;
}
- SIZECODE = 2;
+ raw_cmd->cmd[SIZECODE] = 2;
} else
- SIZECODE = FD_SIZECODE(_floppy);
+ raw_cmd->cmd[SIZECODE] = FD_SIZECODE(_floppy);
raw_cmd->rate = _floppy->rate & 0x43;
- if ((_floppy->rate & FD_2M) && (TRACK || HEAD) && raw_cmd->rate == 2)
+ if ((_floppy->rate & FD_2M) &&
+ (raw_cmd->cmd[TRACK] || raw_cmd->cmd[HEAD]) && raw_cmd->rate == 2)
raw_cmd->rate = 1;
- if (SIZECODE)
- SIZECODE2 = 0xff;
+ if (raw_cmd->cmd[SIZECODE])
+ raw_cmd->cmd[SIZECODE2] = 0xff;
else
- SIZECODE2 = 0x80;
- raw_cmd->track = TRACK << STRETCH(_floppy);
- DR_SELECT = UNIT(current_drive) + PH_HEAD(_floppy, HEAD);
- GAP = _floppy->gap;
- ssize = DIV_ROUND_UP(1 << SIZECODE, 4);
- SECT_PER_TRACK = _floppy->sect << 2 >> SIZECODE;
- SECTOR = ((fsector_t % _floppy->sect) << 2 >> SIZECODE) +
+ raw_cmd->cmd[SIZECODE2] = 0x80;
+ raw_cmd->track = raw_cmd->cmd[TRACK] << STRETCH(_floppy);
+ raw_cmd->cmd[DR_SELECT] = UNIT(current_drive) + PH_HEAD(_floppy, raw_cmd->cmd[HEAD]);
+ raw_cmd->cmd[GAP] = _floppy->gap;
+ ssize = DIV_ROUND_UP(1 << raw_cmd->cmd[SIZECODE], 4);
+ raw_cmd->cmd[SECT_PER_TRACK] = _floppy->sect << 2 >> raw_cmd->cmd[SIZECODE];
+ raw_cmd->cmd[SECTOR] = ((fsector_t % _floppy->sect) << 2 >> raw_cmd->cmd[SIZECODE]) +
FD_SECTBASE(_floppy);
/* tracksize describes the size which can be filled up with sectors
@@ -2599,24 +2634,24 @@ static int make_raw_rw_request(void)
*/
tracksize = _floppy->sect - _floppy->sect % ssize;
if (tracksize < _floppy->sect) {
- SECT_PER_TRACK++;
+ raw_cmd->cmd[SECT_PER_TRACK]++;
if (tracksize <= fsector_t % _floppy->sect)
- SECTOR--;
+ raw_cmd->cmd[SECTOR]--;
/* if we are beyond tracksize, fill up using smaller sectors */
while (tracksize <= fsector_t % _floppy->sect) {
while (tracksize + ssize > _floppy->sect) {
- SIZECODE--;
+ raw_cmd->cmd[SIZECODE]--;
ssize >>= 1;
}
- SECTOR++;
- SECT_PER_TRACK++;
+ raw_cmd->cmd[SECTOR]++;
+ raw_cmd->cmd[SECT_PER_TRACK]++;
tracksize += ssize;
}
- max_sector = HEAD * _floppy->sect + tracksize;
- } else if (!TRACK && !HEAD && !(_floppy->rate & FD_2M) && probing) {
+ max_sector = raw_cmd->cmd[HEAD] * _floppy->sect + tracksize;
+ } else if (!raw_cmd->cmd[TRACK] && !raw_cmd->cmd[HEAD] && !(_floppy->rate & FD_2M) && probing) {
max_sector = _floppy->sect;
- } else if (!HEAD && CT(COMMAND) == FD_WRITE) {
+ } else if (!raw_cmd->cmd[HEAD] && CT(raw_cmd->cmd[COMMAND]) == FD_WRITE) {
/* for virtual DMA bug workaround */
max_sector = _floppy->sect;
}
@@ -2628,12 +2663,12 @@ static int make_raw_rw_request(void)
(current_drive == buffer_drive) &&
(fsector_t >= buffer_min) && (fsector_t < buffer_max)) {
/* data already in track buffer */
- if (CT(COMMAND) == FD_READ) {
+ if (CT(raw_cmd->cmd[COMMAND]) == FD_READ) {
copy_buffer(1, max_sector, buffer_max);
return 1;
}
} else if (in_sector_offset || blk_rq_sectors(current_req) < ssize) {
- if (CT(COMMAND) == FD_WRITE) {
+ if (CT(raw_cmd->cmd[COMMAND]) == FD_WRITE) {
unsigned int sectors;
sectors = fsector_t + blk_rq_sectors(current_req);
@@ -2644,7 +2679,7 @@ static int make_raw_rw_request(void)
}
raw_cmd->flags &= ~FD_RAW_WRITE;
raw_cmd->flags |= FD_RAW_READ;
- COMMAND = FM_MODE(_floppy, FD_READ);
+ raw_cmd->cmd[COMMAND] = FM_MODE(_floppy, FD_READ);
} else if ((unsigned long)bio_data(current_req->bio) < MAX_DMA_ADDRESS) {
unsigned long dma_limit;
int direct, indirect;
@@ -2677,9 +2712,9 @@ static int make_raw_rw_request(void)
*/
if (!direct ||
(indirect * 2 > direct * 3 &&
- *errors < DP->max_errors.read_track &&
+ *errors < drive_params[current_drive].max_errors.read_track &&
((!probing ||
- (DP->read_track & (1 << DRS->probed_format)))))) {
+ (drive_params[current_drive].read_track & (1 << drive_state[current_drive].probed_format)))))) {
max_size = blk_rq_sectors(current_req);
} else {
raw_cmd->kernel_data = bio_data(current_req->bio);
@@ -2695,7 +2730,7 @@ static int make_raw_rw_request(void)
}
}
- if (CT(COMMAND) == FD_READ)
+ if (CT(raw_cmd->cmd[COMMAND]) == FD_READ)
max_size = max_sector; /* unbounded */
/* claim buffer track if needed */
@@ -2703,7 +2738,7 @@ static int make_raw_rw_request(void)
buffer_drive != current_drive || /* bad drive */
fsector_t > buffer_max ||
fsector_t < buffer_min ||
- ((CT(COMMAND) == FD_READ ||
+ ((CT(raw_cmd->cmd[COMMAND]) == FD_READ ||
(!in_sector_offset && blk_rq_sectors(current_req) >= ssize)) &&
max_sector > 2 * max_buffer_sectors + buffer_min &&
max_size + fsector_t > 2 * max_buffer_sectors + buffer_min)) {
@@ -2715,7 +2750,7 @@ static int make_raw_rw_request(void)
raw_cmd->kernel_data = floppy_track_buffer +
((aligned_sector_t - buffer_min) << 9);
- if (CT(COMMAND) == FD_WRITE) {
+ if (CT(raw_cmd->cmd[COMMAND]) == FD_WRITE) {
/* copy write buffer to track buffer.
* if we get here, we know that the write
* is either aligned or the data already in the buffer
@@ -2737,10 +2772,10 @@ static int make_raw_rw_request(void)
raw_cmd->length <<= 9;
if ((raw_cmd->length < current_count_sectors << 9) ||
(raw_cmd->kernel_data != bio_data(current_req->bio) &&
- CT(COMMAND) == FD_WRITE &&
+ CT(raw_cmd->cmd[COMMAND]) == FD_WRITE &&
(aligned_sector_t + (raw_cmd->length >> 9) > buffer_max ||
aligned_sector_t < buffer_min)) ||
- raw_cmd->length % (128 << SIZECODE) ||
+ raw_cmd->length % (128 << raw_cmd->cmd[SIZECODE]) ||
raw_cmd->length <= 0 || current_count_sectors <= 0) {
DPRINT("fractionary current count b=%lx s=%lx\n",
raw_cmd->length, current_count_sectors);
@@ -2751,9 +2786,10 @@ static int make_raw_rw_request(void)
current_count_sectors);
pr_info("st=%d ast=%d mse=%d msi=%d\n",
fsector_t, aligned_sector_t, max_sector, max_size);
- pr_info("ssize=%x SIZECODE=%d\n", ssize, SIZECODE);
+ pr_info("ssize=%x SIZECODE=%d\n", ssize, raw_cmd->cmd[SIZECODE]);
pr_info("command=%x SECTOR=%d HEAD=%d, TRACK=%d\n",
- COMMAND, SECTOR, HEAD, TRACK);
+ raw_cmd->cmd[COMMAND], raw_cmd->cmd[SECTOR],
+ raw_cmd->cmd[HEAD], raw_cmd->cmd[TRACK]);
pr_info("buffer drive=%d\n", buffer_drive);
pr_info("buffer track=%d\n", buffer_track);
pr_info("buffer_min=%d\n", buffer_min);
@@ -2772,9 +2808,9 @@ static int make_raw_rw_request(void)
fsector_t, buffer_min, raw_cmd->length >> 9);
pr_info("current_count_sectors=%ld\n",
current_count_sectors);
- if (CT(COMMAND) == FD_READ)
+ if (CT(raw_cmd->cmd[COMMAND]) == FD_READ)
pr_info("read\n");
- if (CT(COMMAND) == FD_WRITE)
+ if (CT(raw_cmd->cmd[COMMAND]) == FD_WRITE)
pr_info("write\n");
return 0;
}
@@ -2841,14 +2877,14 @@ do_request:
disk_change(current_drive);
if (test_bit(current_drive, &fake_change) ||
- test_bit(FD_DISK_CHANGED_BIT, &DRS->flags)) {
+ test_bit(FD_DISK_CHANGED_BIT, &drive_state[current_drive].flags)) {
DPRINT("disk absent or changed during operation\n");
request_done(0);
goto do_request;
}
if (!_floppy) { /* Autodetection */
if (!probing) {
- DRS->probed_format = 0;
+ drive_state[current_drive].probed_format = 0;
if (next_valid_format()) {
DPRINT("no autodetectable formats\n");
_floppy = NULL;
@@ -2857,7 +2893,7 @@ do_request:
}
}
probing = 1;
- _floppy = floppy_type + DP->autodetect[DRS->probed_format];
+ _floppy = floppy_type + drive_params[current_drive].autodetect[drive_state[current_drive].probed_format];
} else
probing = 0;
errors = &(current_req->error_count);
@@ -2867,7 +2903,7 @@ do_request:
goto do_request;
}
- if (test_bit(FD_NEED_TWADDLE_BIT, &DRS->flags))
+ if (test_bit(FD_NEED_TWADDLE_BIT, &drive_state[current_drive].flags))
twaddle();
schedule_bh(floppy_start);
debugt(__func__, "queue fd request");
@@ -2936,8 +2972,9 @@ static int poll_drive(bool interruptible, int flag)
raw_cmd->track = 0;
raw_cmd->cmd_count = 0;
cont = &poll_cont;
- debug_dcl(DP->flags, "setting NEWCHANGE in poll_drive\n");
- set_bit(FD_DISK_NEWCHANGE_BIT, &DRS->flags);
+ debug_dcl(drive_params[current_drive].flags,
+ "setting NEWCHANGE in poll_drive\n");
+ set_bit(FD_DISK_NEWCHANGE_BIT, &drive_state[current_drive].flags);
return wait_til_done(floppy_ready, interruptible);
}
@@ -2967,8 +3004,8 @@ static int user_reset_fdc(int drive, int arg, bool interruptible)
return -EINTR;
if (arg == FD_RESET_ALWAYS)
- FDCS->reset = 1;
- if (FDCS->reset) {
+ fdc_state[current_fdc].reset = 1;
+ if (fdc_state[current_fdc].reset) {
cont = &reset_cont;
ret = wait_til_done(reset_fdc, interruptible);
if (ret == -EINTR)
@@ -3001,8 +3038,8 @@ static const char *drive_name(int type, int drive)
if (type)
floppy = floppy_type + type;
else {
- if (UDP->native_format)
- floppy = floppy_type + UDP->native_format;
+ if (drive_params[drive].native_format)
+ floppy = floppy_type + drive_params[drive].native_format;
else
return "(null)";
}
@@ -3179,23 +3216,23 @@ static int raw_cmd_ioctl(int cmd, void __user *param)
int ret2;
int ret;
- if (FDCS->rawcmd <= 1)
- FDCS->rawcmd = 1;
+ if (fdc_state[current_fdc].rawcmd <= 1)
+ fdc_state[current_fdc].rawcmd = 1;
for (drive = 0; drive < N_DRIVE; drive++) {
- if (FDC(drive) != fdc)
+ if (FDC(drive) != current_fdc)
continue;
if (drive == current_drive) {
- if (UDRS->fd_ref > 1) {
- FDCS->rawcmd = 2;
+ if (drive_state[drive].fd_ref > 1) {
+ fdc_state[current_fdc].rawcmd = 2;
break;
}
- } else if (UDRS->fd_ref) {
- FDCS->rawcmd = 2;
+ } else if (drive_state[drive].fd_ref) {
+ fdc_state[current_fdc].rawcmd = 2;
break;
}
}
- if (FDCS->reset)
+ if (fdc_state[current_fdc].reset)
return -EIO;
ret = raw_cmd_copyin(cmd, param, &my_raw_cmd);
@@ -3207,12 +3244,13 @@ static int raw_cmd_ioctl(int cmd, void __user *param)
raw_cmd = my_raw_cmd;
cont = &raw_cmd_cont;
ret = wait_til_done(floppy_start, true);
- debug_dcl(DP->flags, "calling disk change from raw_cmd ioctl\n");
+ debug_dcl(drive_params[current_drive].flags,
+ "calling disk change from raw_cmd ioctl\n");
- if (ret != -EINTR && FDCS->reset)
+ if (ret != -EINTR && fdc_state[current_fdc].reset)
ret = -EIO;
- DRS->track = NO_TRACK;
+ drive_state[current_drive].track = NO_TRACK;
ret2 = raw_cmd_copyout(cmd, param, my_raw_cmd);
if (!ret)
@@ -3240,9 +3278,9 @@ static int set_geometry(unsigned int cmd, struct floppy_struct *g,
(int)g->head <= 0 ||
/* check for overflow in max_sector */
(int)(g->sect * g->head) <= 0 ||
- /* check for zero in F_SECT_PER_TRACK */
+ /* check for zero in raw_cmd->cmd[F_SECT_PER_TRACK] */
(unsigned char)((g->sect << 2) >> FD_SIZECODE(g)) == 0 ||
- g->track <= 0 || g->track > UDP->tracks >> STRETCH(g) ||
+ g->track <= 0 || g->track > drive_params[drive].tracks >> STRETCH(g) ||
/* check if reserved bits are set */
(g->stretch & ~(FD_STRETCH | FD_SWAPSIDES | FD_SECTBASEMASK)) != 0)
return -EINVAL;
@@ -3285,16 +3323,16 @@ static int set_geometry(unsigned int cmd, struct floppy_struct *g,
current_type[drive] = &user_params[drive];
floppy_sizes[drive] = user_params[drive].size;
if (cmd == FDDEFPRM)
- DRS->keep_data = -1;
+ drive_state[current_drive].keep_data = -1;
else
- DRS->keep_data = 1;
+ drive_state[current_drive].keep_data = 1;
/* invalidation. Invalidate only when needed, i.e.
* when there are already sectors in the buffer cache
* whose number will change. This is useful, because
* mtools often changes the geometry of the disk after
* looking at the boot block */
- if (DRS->maxblock > user_params[drive].sect ||
- DRS->maxtrack ||
+ if (drive_state[current_drive].maxblock > user_params[drive].sect ||
+ drive_state[current_drive].maxtrack ||
((user_params[drive].sect ^ oldStretch) &
(FD_SWAPSIDES | FD_SECTBASEMASK)))
invalidate_drive(bdev);
@@ -3407,7 +3445,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
unsigned long param)
{
int drive = (long)bdev->bd_disk->private_data;
- int type = ITYPE(UDRS->fd_device);
+ int type = ITYPE(drive_state[drive].fd_device);
int i;
int ret;
int size;
@@ -3455,7 +3493,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
switch (cmd) {
case FDEJECT:
- if (UDRS->fd_ref != 1)
+ if (drive_state[drive].fd_ref != 1)
/* somebody else has this drive open */
return -EBUSY;
if (lock_fdc(drive))
@@ -3465,8 +3503,8 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
* non-Sparc architectures */
ret = fd_eject(UNIT(drive));
- set_bit(FD_DISK_CHANGED_BIT, &UDRS->flags);
- set_bit(FD_VERIFY_BIT, &UDRS->flags);
+ set_bit(FD_DISK_CHANGED_BIT, &drive_state[drive].flags);
+ set_bit(FD_VERIFY_BIT, &drive_state[drive].flags);
process_fd_request();
return ret;
case FDCLRPRM:
@@ -3474,7 +3512,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
return -EINTR;
current_type[drive] = NULL;
floppy_sizes[drive] = MAX_DISK_SIZE << 1;
- UDRS->keep_data = 0;
+ drive_state[drive].keep_data = 0;
return invalidate_drive(bdev);
case FDSETPRM:
case FDDEFPRM:
@@ -3489,17 +3527,17 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
outparam = &inparam.g;
break;
case FDMSGON:
- UDP->flags |= FTD_MSG;
+ drive_params[drive].flags |= FTD_MSG;
return 0;
case FDMSGOFF:
- UDP->flags &= ~FTD_MSG;
+ drive_params[drive].flags &= ~FTD_MSG;
return 0;
case FDFMTBEG:
if (lock_fdc(drive))
return -EINTR;
if (poll_drive(true, FD_RAW_NEED_DISK) == -EINTR)
return -EINTR;
- ret = UDRS->flags;
+ ret = drive_state[drive].flags;
process_fd_request();
if (ret & FD_VERIFY)
return -ENODEV;
@@ -3507,7 +3545,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
return -EROFS;
return 0;
case FDFMTTRK:
- if (UDRS->fd_ref != 1)
+ if (drive_state[drive].fd_ref != 1)
return -EBUSY;
return do_format(drive, &inparam.f);
case FDFMTEND:
@@ -3516,13 +3554,13 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
return -EINTR;
return invalidate_drive(bdev);
case FDSETEMSGTRESH:
- UDP->max_errors.reporting = (unsigned short)(param & 0x0f);
+ drive_params[drive].max_errors.reporting = (unsigned short)(param & 0x0f);
return 0;
case FDGETMAXERRS:
- outparam = &UDP->max_errors;
+ outparam = &drive_params[drive].max_errors;
break;
case FDSETMAXERRS:
- UDP->max_errors = inparam.max_errors;
+ drive_params[drive].max_errors = inparam.max_errors;
break;
case FDGETDRVTYP:
outparam = drive_name(type, drive);
@@ -3532,10 +3570,10 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
if (!valid_floppy_drive_params(inparam.dp.autodetect,
inparam.dp.native_format))
return -EINVAL;
- *UDP = inparam.dp;
+ drive_params[drive] = inparam.dp;
break;
case FDGETDRVPRM:
- outparam = UDP;
+ outparam = &drive_params[drive];
break;
case FDPOLLDRVSTAT:
if (lock_fdc(drive))
@@ -3545,18 +3583,18 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
process_fd_request();
/* fall through */
case FDGETDRVSTAT:
- outparam = UDRS;
+ outparam = &drive_state[drive];
break;
case FDRESET:
return user_reset_fdc(drive, (int)param, true);
case FDGETFDCSTAT:
- outparam = UFDCS;
+ outparam = &fdc_state[FDC(drive)];
break;
case FDWERRORCLR:
- memset(UDRWE, 0, sizeof(*UDRWE));
+ memset(&write_errors[drive], 0, sizeof(write_errors[drive]));
return 0;
case FDWERRORGET:
- outparam = UDRWE;
+ outparam = &write_errors[drive];
break;
case FDRAWCMD:
if (type)
@@ -3692,7 +3730,7 @@ static int compat_set_geometry(struct block_device *bdev, fmode_t mode, unsigned
mutex_lock(&floppy_mutex);
drive = (long)bdev->bd_disk->private_data;
- type = ITYPE(UDRS->fd_device);
+ type = ITYPE(drive_state[drive].fd_device);
err = set_geometry(cmd == FDSETPRM32 ? FDSETPRM : FDDEFPRM,
&v, drive, type, bdev);
mutex_unlock(&floppy_mutex);
@@ -3708,7 +3746,8 @@ static int compat_get_prm(int drive,
memset(&v, 0, sizeof(v));
mutex_lock(&floppy_mutex);
- err = get_floppy_geometry(drive, ITYPE(UDRS->fd_device), &p);
+ err = get_floppy_geometry(drive, ITYPE(drive_state[drive].fd_device),
+ &p);
if (err) {
mutex_unlock(&floppy_mutex);
return err;
@@ -3732,25 +3771,26 @@ static int compat_setdrvprm(int drive,
if (!valid_floppy_drive_params(v.autodetect, v.native_format))
return -EINVAL;
mutex_lock(&floppy_mutex);
- UDP->cmos = v.cmos;
- UDP->max_dtr = v.max_dtr;
- UDP->hlt = v.hlt;
- UDP->hut = v.hut;
- UDP->srt = v.srt;
- UDP->spinup = v.spinup;
- UDP->spindown = v.spindown;
- UDP->spindown_offset = v.spindown_offset;
- UDP->select_delay = v.select_delay;
- UDP->rps = v.rps;
- UDP->tracks = v.tracks;
- UDP->timeout = v.timeout;
- UDP->interleave_sect = v.interleave_sect;
- UDP->max_errors = v.max_errors;
- UDP->flags = v.flags;
- UDP->read_track = v.read_track;
- memcpy(UDP->autodetect, v.autodetect, sizeof(v.autodetect));
- UDP->checkfreq = v.checkfreq;
- UDP->native_format = v.native_format;
+ drive_params[drive].cmos = v.cmos;
+ drive_params[drive].max_dtr = v.max_dtr;
+ drive_params[drive].hlt = v.hlt;
+ drive_params[drive].hut = v.hut;
+ drive_params[drive].srt = v.srt;
+ drive_params[drive].spinup = v.spinup;
+ drive_params[drive].spindown = v.spindown;
+ drive_params[drive].spindown_offset = v.spindown_offset;
+ drive_params[drive].select_delay = v.select_delay;
+ drive_params[drive].rps = v.rps;
+ drive_params[drive].tracks = v.tracks;
+ drive_params[drive].timeout = v.timeout;
+ drive_params[drive].interleave_sect = v.interleave_sect;
+ drive_params[drive].max_errors = v.max_errors;
+ drive_params[drive].flags = v.flags;
+ drive_params[drive].read_track = v.read_track;
+ memcpy(drive_params[drive].autodetect, v.autodetect,
+ sizeof(v.autodetect));
+ drive_params[drive].checkfreq = v.checkfreq;
+ drive_params[drive].native_format = v.native_format;
mutex_unlock(&floppy_mutex);
return 0;
}
@@ -3762,25 +3802,26 @@ static int compat_getdrvprm(int drive,
memset(&v, 0, sizeof(struct compat_floppy_drive_params));
mutex_lock(&floppy_mutex);
- v.cmos = UDP->cmos;
- v.max_dtr = UDP->max_dtr;
- v.hlt = UDP->hlt;
- v.hut = UDP->hut;
- v.srt = UDP->srt;
- v.spinup = UDP->spinup;
- v.spindown = UDP->spindown;
- v.spindown_offset = UDP->spindown_offset;
- v.select_delay = UDP->select_delay;
- v.rps = UDP->rps;
- v.tracks = UDP->tracks;
- v.timeout = UDP->timeout;
- v.interleave_sect = UDP->interleave_sect;
- v.max_errors = UDP->max_errors;
- v.flags = UDP->flags;
- v.read_track = UDP->read_track;
- memcpy(v.autodetect, UDP->autodetect, sizeof(v.autodetect));
- v.checkfreq = UDP->checkfreq;
- v.native_format = UDP->native_format;
+ v.cmos = drive_params[drive].cmos;
+ v.max_dtr = drive_params[drive].max_dtr;
+ v.hlt = drive_params[drive].hlt;
+ v.hut = drive_params[drive].hut;
+ v.srt = drive_params[drive].srt;
+ v.spinup = drive_params[drive].spinup;
+ v.spindown = drive_params[drive].spindown;
+ v.spindown_offset = drive_params[drive].spindown_offset;
+ v.select_delay = drive_params[drive].select_delay;
+ v.rps = drive_params[drive].rps;
+ v.tracks = drive_params[drive].tracks;
+ v.timeout = drive_params[drive].timeout;
+ v.interleave_sect = drive_params[drive].interleave_sect;
+ v.max_errors = drive_params[drive].max_errors;
+ v.flags = drive_params[drive].flags;
+ v.read_track = drive_params[drive].read_track;
+ memcpy(v.autodetect, drive_params[drive].autodetect,
+ sizeof(v.autodetect));
+ v.checkfreq = drive_params[drive].checkfreq;
+ v.native_format = drive_params[drive].native_format;
mutex_unlock(&floppy_mutex);
if (copy_to_user(arg, &v, sizeof(struct compat_floppy_drive_params)))
@@ -3803,20 +3844,20 @@ static int compat_getdrvstat(int drive, bool poll,
goto Eintr;
process_fd_request();
}
- v.spinup_date = UDRS->spinup_date;
- v.select_date = UDRS->select_date;
- v.first_read_date = UDRS->first_read_date;
- v.probed_format = UDRS->probed_format;
- v.track = UDRS->track;
- v.maxblock = UDRS->maxblock;
- v.maxtrack = UDRS->maxtrack;
- v.generation = UDRS->generation;
- v.keep_data = UDRS->keep_data;
- v.fd_ref = UDRS->fd_ref;
- v.fd_device = UDRS->fd_device;
- v.last_checked = UDRS->last_checked;
- v.dmabuf = (uintptr_t)UDRS->dmabuf;
- v.bufblocks = UDRS->bufblocks;
+ v.spinup_date = drive_state[drive].spinup_date;
+ v.select_date = drive_state[drive].select_date;
+ v.first_read_date = drive_state[drive].first_read_date;
+ v.probed_format = drive_state[drive].probed_format;
+ v.track = drive_state[drive].track;
+ v.maxblock = drive_state[drive].maxblock;
+ v.maxtrack = drive_state[drive].maxtrack;
+ v.generation = drive_state[drive].generation;
+ v.keep_data = drive_state[drive].keep_data;
+ v.fd_ref = drive_state[drive].fd_ref;
+ v.fd_device = drive_state[drive].fd_device;
+ v.last_checked = drive_state[drive].last_checked;
+ v.dmabuf = (uintptr_t) drive_state[drive].dmabuf;
+ v.bufblocks = drive_state[drive].bufblocks;
mutex_unlock(&floppy_mutex);
if (copy_to_user(arg, &v, sizeof(struct compat_floppy_drive_struct)))
@@ -3834,7 +3875,7 @@ static int compat_getfdcstat(int drive,
struct floppy_fdc_state v;
mutex_lock(&floppy_mutex);
- v = *UFDCS;
+ v = fdc_state[FDC(drive)];
mutex_unlock(&floppy_mutex);
memset(&v32, 0, sizeof(struct compat_floppy_fdc_state));
@@ -3864,7 +3905,7 @@ static int compat_werrorget(int drive,
memset(&v32, 0, sizeof(struct compat_floppy_write_errors));
mutex_lock(&floppy_mutex);
- v = *UDRWE;
+ v = write_errors[drive];
mutex_unlock(&floppy_mutex);
v32.write_errors = v.write_errors;
v32.first_error_sector = v.first_error_sector;
@@ -3933,16 +3974,16 @@ static void __init config_types(void)
/* read drive info out of physical CMOS */
drive = 0;
- if (!UDP->cmos)
- UDP->cmos = FLOPPY0_TYPE;
+ if (!drive_params[drive].cmos)
+ drive_params[drive].cmos = FLOPPY0_TYPE;
drive = 1;
- if (!UDP->cmos)
- UDP->cmos = FLOPPY1_TYPE;
+ if (!drive_params[drive].cmos)
+ drive_params[drive].cmos = FLOPPY1_TYPE;
/* FIXME: additional physical CMOS drive detection should go here */
for (drive = 0; drive < N_DRIVE; drive++) {
- unsigned int type = UDP->cmos;
+ unsigned int type = drive_params[drive].cmos;
struct floppy_drive_params *params;
const char *name = NULL;
char temparea[32];
@@ -3972,7 +4013,7 @@ static void __init config_types(void)
pr_cont("%s fd%d is %s", prepend, drive, name);
}
- *UDP = *params;
+ drive_params[drive] = *params;
}
if (has_drive)
@@ -3985,11 +4026,11 @@ static void floppy_release(struct gendisk *disk, fmode_t mode)
mutex_lock(&floppy_mutex);
mutex_lock(&open_lock);
- if (!UDRS->fd_ref--) {
+ if (!drive_state[drive].fd_ref--) {
DPRINT("floppy_release with fd_ref == 0");
- UDRS->fd_ref = 0;
+ drive_state[drive].fd_ref = 0;
}
- if (!UDRS->fd_ref)
+ if (!drive_state[drive].fd_ref)
opened_bdev[drive] = NULL;
mutex_unlock(&open_lock);
mutex_unlock(&floppy_mutex);
@@ -4010,16 +4051,16 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
mutex_lock(&floppy_mutex);
mutex_lock(&open_lock);
- old_dev = UDRS->fd_device;
+ old_dev = drive_state[drive].fd_device;
if (opened_bdev[drive] && opened_bdev[drive] != bdev)
goto out2;
- if (!UDRS->fd_ref && (UDP->flags & FD_BROKEN_DCL)) {
- set_bit(FD_DISK_CHANGED_BIT, &UDRS->flags);
- set_bit(FD_VERIFY_BIT, &UDRS->flags);
+ if (!drive_state[drive].fd_ref && (drive_params[drive].flags & FD_BROKEN_DCL)) {
+ set_bit(FD_DISK_CHANGED_BIT, &drive_state[drive].flags);
+ set_bit(FD_VERIFY_BIT, &drive_state[drive].flags);
}
- UDRS->fd_ref++;
+ drive_state[drive].fd_ref++;
opened_bdev[drive] = bdev;
@@ -4028,7 +4069,7 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
if (!floppy_track_buffer) {
/* if opening an ED drive, reserve a big buffer,
* else reserve a small one */
- if ((UDP->cmos == 6) || (UDP->cmos == 5))
+ if ((drive_params[drive].cmos == 6) || (drive_params[drive].cmos == 5))
try = 64; /* Only 48 actually useful */
else
try = 32; /* Only 24 actually useful */
@@ -4056,38 +4097,39 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
}
new_dev = MINOR(bdev->bd_dev);
- UDRS->fd_device = new_dev;
+ drive_state[drive].fd_device = new_dev;
set_capacity(disks[drive], floppy_sizes[new_dev]);
if (old_dev != -1 && old_dev != new_dev) {
if (buffer_drive == drive)
buffer_track = -1;
}
- if (UFDCS->rawcmd == 1)
- UFDCS->rawcmd = 2;
+ if (fdc_state[FDC(drive)].rawcmd == 1)
+ fdc_state[FDC(drive)].rawcmd = 2;
if (!(mode & FMODE_NDELAY)) {
if (mode & (FMODE_READ|FMODE_WRITE)) {
- UDRS->last_checked = 0;
- clear_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags);
+ drive_state[drive].last_checked = 0;
+ clear_bit(FD_OPEN_SHOULD_FAIL_BIT,
+ &drive_state[drive].flags);
check_disk_change(bdev);
- if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags))
+ if (test_bit(FD_DISK_CHANGED_BIT, &drive_state[drive].flags))
goto out;
- if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags))
+ if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &drive_state[drive].flags))
goto out;
}
res = -EROFS;
if ((mode & FMODE_WRITE) &&
- !test_bit(FD_DISK_WRITABLE_BIT, &UDRS->flags))
+ !test_bit(FD_DISK_WRITABLE_BIT, &drive_state[drive].flags))
goto out;
}
mutex_unlock(&open_lock);
mutex_unlock(&floppy_mutex);
return 0;
out:
- UDRS->fd_ref--;
+ drive_state[drive].fd_ref--;
- if (!UDRS->fd_ref)
+ if (!drive_state[drive].fd_ref)
opened_bdev[drive] = NULL;
out2:
mutex_unlock(&open_lock);
@@ -4103,19 +4145,19 @@ static unsigned int floppy_check_events(struct gendisk *disk,
{
int drive = (long)disk->private_data;
- if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags) ||
- test_bit(FD_VERIFY_BIT, &UDRS->flags))
+ if (test_bit(FD_DISK_CHANGED_BIT, &drive_state[drive].flags) ||
+ test_bit(FD_VERIFY_BIT, &drive_state[drive].flags))
return DISK_EVENT_MEDIA_CHANGE;
- if (time_after(jiffies, UDRS->last_checked + UDP->checkfreq)) {
+ if (time_after(jiffies, drive_state[drive].last_checked + drive_params[drive].checkfreq)) {
if (lock_fdc(drive))
return 0;
poll_drive(false, 0);
process_fd_request();
}
- if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags) ||
- test_bit(FD_VERIFY_BIT, &UDRS->flags) ||
+ if (test_bit(FD_DISK_CHANGED_BIT, &drive_state[drive].flags) ||
+ test_bit(FD_VERIFY_BIT, &drive_state[drive].flags) ||
test_bit(drive, &fake_change) ||
drive_no_geom(drive))
return DISK_EVENT_MEDIA_CHANGE;
@@ -4141,7 +4183,7 @@ static void floppy_rb0_cb(struct bio *bio)
if (bio->bi_status) {
pr_info("floppy: error %d while reading block 0\n",
bio->bi_status);
- set_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags);
+ set_bit(FD_OPEN_SHOULD_FAIL_BIT, &drive_state[drive].flags);
}
complete(&cbdata->complete);
}
@@ -4198,8 +4240,8 @@ static int floppy_revalidate(struct gendisk *disk)
int cf;
int res = 0;
- if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags) ||
- test_bit(FD_VERIFY_BIT, &UDRS->flags) ||
+ if (test_bit(FD_DISK_CHANGED_BIT, &drive_state[drive].flags) ||
+ test_bit(FD_VERIFY_BIT, &drive_state[drive].flags) ||
test_bit(drive, &fake_change) ||
drive_no_geom(drive)) {
if (WARN(atomic_read(&usage_count) == 0,
@@ -4209,20 +4251,20 @@ static int floppy_revalidate(struct gendisk *disk)
res = lock_fdc(drive);
if (res)
return res;
- cf = (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags) ||
- test_bit(FD_VERIFY_BIT, &UDRS->flags));
+ cf = (test_bit(FD_DISK_CHANGED_BIT, &drive_state[drive].flags) ||
+ test_bit(FD_VERIFY_BIT, &drive_state[drive].flags));
if (!(cf || test_bit(drive, &fake_change) || drive_no_geom(drive))) {
process_fd_request(); /*already done by another thread */
return 0;
}
- UDRS->maxblock = 0;
- UDRS->maxtrack = 0;
+ drive_state[drive].maxblock = 0;
+ drive_state[drive].maxtrack = 0;
if (buffer_drive == drive)
buffer_track = -1;
clear_bit(drive, &fake_change);
- clear_bit(FD_DISK_CHANGED_BIT, &UDRS->flags);
+ clear_bit(FD_DISK_CHANGED_BIT, &drive_state[drive].flags);
if (cf)
- UDRS->generation++;
+ drive_state[drive].generation++;
if (drive_no_geom(drive)) {
/* auto-sensing */
res = __floppy_read_block_0(opened_bdev[drive], drive);
@@ -4232,7 +4274,7 @@ static int floppy_revalidate(struct gendisk *disk)
process_fd_request();
}
}
- set_capacity(disk, floppy_sizes[UDRS->fd_device]);
+ set_capacity(disk, floppy_sizes[drive_state[drive].fd_device]);
return res;
}
@@ -4261,23 +4303,23 @@ static char __init get_fdc_version(void)
int r;
output_byte(FD_DUMPREGS); /* 82072 and better know DUMPREGS */
- if (FDCS->reset)
+ if (fdc_state[current_fdc].reset)
return FDC_NONE;
r = result();
if (r <= 0x00)
return FDC_NONE; /* No FDC present ??? */
if ((r == 1) && (reply_buffer[0] == 0x80)) {
- pr_info("FDC %d is an 8272A\n", fdc);
+ pr_info("FDC %d is an 8272A\n", current_fdc);
return FDC_8272A; /* 8272a/765 don't know DUMPREGS */
}
if (r != 10) {
pr_info("FDC %d init: DUMPREGS: unexpected return of %d bytes.\n",
- fdc, r);
+ current_fdc, r);
return FDC_UNKNOWN;
}
if (!fdc_configure()) {
- pr_info("FDC %d is an 82072\n", fdc);
+ pr_info("FDC %d is an 82072\n", current_fdc);
return FDC_82072; /* 82072 doesn't know CONFIGURE */
}
@@ -4285,50 +4327,50 @@ static char __init get_fdc_version(void)
if (need_more_output() == MORE_OUTPUT) {
output_byte(0);
} else {
- pr_info("FDC %d is an 82072A\n", fdc);
+ pr_info("FDC %d is an 82072A\n", current_fdc);
return FDC_82072A; /* 82072A as found on Sparcs. */
}
output_byte(FD_UNLOCK);
r = result();
if ((r == 1) && (reply_buffer[0] == 0x80)) {
- pr_info("FDC %d is a pre-1991 82077\n", fdc);
+ pr_info("FDC %d is a pre-1991 82077\n", current_fdc);
return FDC_82077_ORIG; /* Pre-1991 82077, doesn't know
* LOCK/UNLOCK */
}
if ((r != 1) || (reply_buffer[0] != 0x00)) {
pr_info("FDC %d init: UNLOCK: unexpected return of %d bytes.\n",
- fdc, r);
+ current_fdc, r);
return FDC_UNKNOWN;
}
output_byte(FD_PARTID);
r = result();
if (r != 1) {
pr_info("FDC %d init: PARTID: unexpected return of %d bytes.\n",
- fdc, r);
+ current_fdc, r);
return FDC_UNKNOWN;
}
if (reply_buffer[0] == 0x80) {
- pr_info("FDC %d is a post-1991 82077\n", fdc);
+ pr_info("FDC %d is a post-1991 82077\n", current_fdc);
return FDC_82077; /* Revised 82077AA passes all the tests */
}
switch (reply_buffer[0] >> 5) {
case 0x0:
/* Either a 82078-1 or a 82078SL running at 5Volt */
- pr_info("FDC %d is an 82078.\n", fdc);
+ pr_info("FDC %d is an 82078.\n", current_fdc);
return FDC_82078;
case 0x1:
- pr_info("FDC %d is a 44pin 82078\n", fdc);
+ pr_info("FDC %d is a 44pin 82078\n", current_fdc);
return FDC_82078;
case 0x2:
- pr_info("FDC %d is a S82078B\n", fdc);
+ pr_info("FDC %d is a S82078B\n", current_fdc);
return FDC_S82078B;
case 0x3:
- pr_info("FDC %d is a National Semiconductor PC87306\n", fdc);
+ pr_info("FDC %d is a National Semiconductor PC87306\n", current_fdc);
return FDC_87306;
default:
pr_info("FDC %d init: 82078 variant with unknown PARTID=%d.\n",
- fdc, reply_buffer[0] >> 5);
+ current_fdc, reply_buffer[0] >> 5);
return FDC_82078_UNKN;
}
} /* get_fdc_version */
@@ -4384,7 +4426,7 @@ static void __init set_cmos(int *ints, int dummy, int dummy2)
if (current_drive >= 4 && !FDC2)
FDC2 = 0x370;
#endif
- DP->cmos = ints[2];
+ drive_params[current_drive].cmos = ints[2];
DPRINT("setting CMOS code to %d\n", ints[2]);
}
@@ -4473,7 +4515,7 @@ static ssize_t floppy_cmos_show(struct device *dev,
int drive;
drive = p->id;
- return sprintf(buf, "%X\n", UDP->cmos);
+ return sprintf(buf, "%X\n", drive_params[drive].cmos);
}
static DEVICE_ATTR(cmos, 0444, floppy_cmos_show, NULL);
@@ -4494,7 +4536,7 @@ static int floppy_resume(struct device *dev)
int fdc;
for (fdc = 0; fdc < N_FDC; fdc++)
- if (FDCS->address != -1)
+ if (fdc_state[fdc].address != -1)
user_reset_fdc(-1, FD_RESET_ALWAYS, false);
return 0;
@@ -4604,16 +4646,16 @@ static int __init do_floppy_init(void)
config_types();
for (i = 0; i < N_FDC; i++) {
- fdc = i;
- memset(FDCS, 0, sizeof(*FDCS));
- FDCS->dtr = -1;
- FDCS->dor = 0x4;
+ current_fdc = i;
+ memset(&fdc_state[current_fdc], 0, sizeof(*fdc_state));
+ fdc_state[current_fdc].dtr = -1;
+ fdc_state[current_fdc].dor = 0x4;
#if defined(__sparc__) || defined(__mc68000__)
/*sparcs/sun3x don't have a DOR reset which we can fall back on to */
#ifdef __mc68000__
if (MACH_IS_SUN3X)
#endif
- FDCS->version = FDC_82072A;
+ fdc_state[current_fdc].version = FDC_82072A;
#endif
}
@@ -4628,7 +4670,7 @@ static int __init do_floppy_init(void)
fdc_state[1].address = FDC2;
#endif
- fdc = 0; /* reset fdc in case of unexpected interrupt */
+ current_fdc = 0; /* reset fdc in case of unexpected interrupt */
err = floppy_grab_irq_and_dma();
if (err) {
cancel_delayed_work(&fd_timeout);
@@ -4638,12 +4680,12 @@ static int __init do_floppy_init(void)
/* initialise drive state */
for (drive = 0; drive < N_DRIVE; drive++) {
- memset(UDRS, 0, sizeof(*UDRS));
- memset(UDRWE, 0, sizeof(*UDRWE));
- set_bit(FD_DISK_NEWCHANGE_BIT, &UDRS->flags);
- set_bit(FD_DISK_CHANGED_BIT, &UDRS->flags);
- set_bit(FD_VERIFY_BIT, &UDRS->flags);
- UDRS->fd_device = -1;
+ memset(&drive_state[drive], 0, sizeof(drive_state[drive]));
+ memset(&write_errors[drive], 0, sizeof(write_errors[drive]));
+ set_bit(FD_DISK_NEWCHANGE_BIT, &drive_state[drive].flags);
+ set_bit(FD_DISK_CHANGED_BIT, &drive_state[drive].flags);
+ set_bit(FD_VERIFY_BIT, &drive_state[drive].flags);
+ drive_state[drive].fd_device = -1;
floppy_track_buffer = NULL;
max_buffer_sectors = 0;
}
@@ -4655,29 +4697,30 @@ static int __init do_floppy_init(void)
msleep(10);
for (i = 0; i < N_FDC; i++) {
- fdc = i;
- FDCS->driver_version = FD_DRIVER_VERSION;
+ current_fdc = i;
+ fdc_state[current_fdc].driver_version = FD_DRIVER_VERSION;
for (unit = 0; unit < 4; unit++)
- FDCS->track[unit] = 0;
- if (FDCS->address == -1)
+ fdc_state[current_fdc].track[unit] = 0;
+ if (fdc_state[current_fdc].address == -1)
continue;
- FDCS->rawcmd = 2;
+ fdc_state[current_fdc].rawcmd = 2;
if (user_reset_fdc(-1, FD_RESET_ALWAYS, false)) {
/* free ioports reserved by floppy_grab_irq_and_dma() */
- floppy_release_regions(fdc);
- FDCS->address = -1;
- FDCS->version = FDC_NONE;
+ floppy_release_regions(current_fdc);
+ fdc_state[current_fdc].address = -1;
+ fdc_state[current_fdc].version = FDC_NONE;
continue;
}
/* Try to determine the floppy controller type */
- FDCS->version = get_fdc_version();
- if (FDCS->version == FDC_NONE) {
+ fdc_state[current_fdc].version = get_fdc_version();
+ if (fdc_state[current_fdc].version == FDC_NONE) {
/* free ioports reserved by floppy_grab_irq_and_dma() */
- floppy_release_regions(fdc);
- FDCS->address = -1;
+ floppy_release_regions(current_fdc);
+ fdc_state[current_fdc].address = -1;
continue;
}
- if (can_use_virtual_dma == 2 && FDCS->version < FDC_82072A)
+ if (can_use_virtual_dma == 2 &&
+ fdc_state[current_fdc].version < FDC_82072A)
can_use_virtual_dma = 0;
have_no_fdc = 0;
@@ -4687,7 +4730,7 @@ static int __init do_floppy_init(void)
*/
user_reset_fdc(-1, FD_RESET_ALWAYS, false);
}
- fdc = 0;
+ current_fdc = 0;
cancel_delayed_work(&fd_timeout);
current_drive = 0;
initialized = true;
@@ -4783,7 +4826,7 @@ static void floppy_release_allocated_regions(int fdc, const struct io_region *p)
{
while (p != io_regions) {
p--;
- release_region(FDCS->address + p->offset, p->size);
+ release_region(fdc_state[fdc].address + p->offset, p->size);
}
}
@@ -4794,10 +4837,10 @@ static int floppy_request_regions(int fdc)
const struct io_region *p;
for (p = io_regions; p < ARRAY_END(io_regions); p++) {
- if (!request_region(FDCS->address + p->offset,
+ if (!request_region(fdc_state[fdc].address + p->offset,
p->size, "floppy")) {
DPRINT("Floppy io-port 0x%04lx in use\n",
- FDCS->address + p->offset);
+ fdc_state[fdc].address + p->offset);
floppy_release_allocated_regions(fdc, p);
return -EBUSY;
}
@@ -4839,36 +4882,36 @@ static int floppy_grab_irq_and_dma(void)
}
}
- for (fdc = 0; fdc < N_FDC; fdc++) {
- if (FDCS->address != -1) {
- if (floppy_request_regions(fdc))
+ for (current_fdc = 0; current_fdc < N_FDC; current_fdc++) {
+ if (fdc_state[current_fdc].address != -1) {
+ if (floppy_request_regions(current_fdc))
goto cleanup;
}
}
- for (fdc = 0; fdc < N_FDC; fdc++) {
- if (FDCS->address != -1) {
+ for (current_fdc = 0; current_fdc < N_FDC; current_fdc++) {
+ if (fdc_state[current_fdc].address != -1) {
reset_fdc_info(1);
- fd_outb(FDCS->dor, FD_DOR);
+ fdc_outb(fdc_state[current_fdc].dor, current_fdc, FD_DOR);
}
}
- fdc = 0;
+ current_fdc = 0;
set_dor(0, ~0, 8); /* avoid immediate interrupt */
- for (fdc = 0; fdc < N_FDC; fdc++)
- if (FDCS->address != -1)
- fd_outb(FDCS->dor, FD_DOR);
+ for (current_fdc = 0; current_fdc < N_FDC; current_fdc++)
+ if (fdc_state[current_fdc].address != -1)
+ fdc_outb(fdc_state[current_fdc].dor, current_fdc, FD_DOR);
/*
* The driver will try and free resources and relies on us
* to know if they were allocated or not.
*/
- fdc = 0;
+ current_fdc = 0;
irqdma_allocated = 1;
return 0;
cleanup:
fd_free_irq();
fd_free_dma();
- while (--fdc >= 0)
- floppy_release_regions(fdc);
+ while (--current_fdc >= 0)
+ floppy_release_regions(current_fdc);
atomic_dec(&usage_count);
return -1;
}
@@ -4916,11 +4959,11 @@ static void floppy_release_irq_and_dma(void)
pr_info("auxiliary floppy timer still active\n");
if (work_pending(&floppy_work))
pr_info("work still pending\n");
- old_fdc = fdc;
- for (fdc = 0; fdc < N_FDC; fdc++)
- if (FDCS->address != -1)
- floppy_release_regions(fdc);
- fdc = old_fdc;
+ old_fdc = current_fdc;
+ for (current_fdc = 0; current_fdc < N_FDC; current_fdc++)
+ if (fdc_state[current_fdc].address != -1)
+ floppy_release_regions(current_fdc);
+ current_fdc = old_fdc;
}
#ifdef MODULE
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 739b372a5112..a42c49e04954 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -214,7 +214,8 @@ static void __loop_update_dio(struct loop_device *lo, bool dio)
* LO_FLAGS_READ_ONLY, both are set from kernel, and losetup
* will get updated by ioctl(LOOP_GET_STATUS)
*/
- blk_mq_freeze_queue(lo->lo_queue);
+ if (lo->lo_state == Lo_bound)
+ blk_mq_freeze_queue(lo->lo_queue);
lo->use_dio = use_dio;
if (use_dio) {
blk_queue_flag_clear(QUEUE_FLAG_NOMERGES, lo->lo_queue);
@@ -223,7 +224,8 @@ static void __loop_update_dio(struct loop_device *lo, bool dio)
blk_queue_flag_set(QUEUE_FLAG_NOMERGES, lo->lo_queue);
lo->lo_flags &= ~LO_FLAGS_DIRECT_IO;
}
- blk_mq_unfreeze_queue(lo->lo_queue);
+ if (lo->lo_state == Lo_bound)
+ blk_mq_unfreeze_queue(lo->lo_queue);
}
static int
@@ -1539,16 +1541,16 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg)
if (arg < 512 || arg > PAGE_SIZE || !is_power_of_2(arg))
return -EINVAL;
- if (lo->lo_queue->limits.logical_block_size != arg) {
- sync_blockdev(lo->lo_device);
- kill_bdev(lo->lo_device);
- }
+ if (lo->lo_queue->limits.logical_block_size == arg)
+ return 0;
+
+ sync_blockdev(lo->lo_device);
+ kill_bdev(lo->lo_device);
blk_mq_freeze_queue(lo->lo_queue);
/* kill_bdev should have truncated all the pages */
- if (lo->lo_queue->limits.logical_block_size != arg &&
- lo->lo_device->bd_inode->i_mapping->nrpages) {
+ if (lo->lo_device->bd_inode->i_mapping->nrpages) {
err = -EAGAIN;
pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n",
__func__, lo->lo_number, lo->lo_file_name,
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 78181908f0df..43cff01a5a67 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -395,16 +395,19 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
}
config = nbd->config;
- if (config->num_connections > 1) {
+ if (config->num_connections > 1 ||
+ (config->num_connections == 1 && nbd->tag_set.timeout)) {
dev_err_ratelimited(nbd_to_dev(nbd),
"Connection timed out, retrying (%d/%d alive)\n",
atomic_read(&config->live_connections),
config->num_connections);
/*
* Hooray we have more connections, requeue this IO, the submit
- * path will put it on a real connection.
+ * path will put it on a real connection. Or if only one
+ * connection is configured, the submit path will wait util
+ * a new connection is reconfigured or util dead timeout.
*/
- if (config->socks && config->num_connections > 1) {
+ if (config->socks) {
if (cmd->index < config->num_connections) {
struct nbd_sock *nsock =
config->socks[cmd->index];
@@ -431,12 +434,22 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
* Userspace sets timeout=0 to disable socket disconnection,
* so just warn and reset the timer.
*/
+ struct nbd_sock *nsock = config->socks[cmd->index];
cmd->retries++;
dev_info(nbd_to_dev(nbd), "Possible stuck request %p: control (%s@%llu,%uB). Runtime %u seconds\n",
req, nbdcmd_to_ascii(req_to_nbd_cmd_type(req)),
(unsigned long long)blk_rq_pos(req) << 9,
blk_rq_bytes(req), (req->timeout / HZ) * cmd->retries);
+ mutex_lock(&nsock->tx_lock);
+ if (cmd->cookie != nsock->cookie) {
+ nbd_requeue_cmd(cmd);
+ mutex_unlock(&nsock->tx_lock);
+ mutex_unlock(&cmd->lock);
+ nbd_config_put(nbd);
+ return BLK_EH_DONE;
+ }
+ mutex_unlock(&nsock->tx_lock);
mutex_unlock(&cmd->lock);
nbd_config_put(nbd);
return BLK_EH_RESET_TIMER;
@@ -741,14 +754,12 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n",
result);
/*
- * If we've disconnected or we only have 1
- * connection then we need to make sure we
+ * If we've disconnected, we need to make sure we
* complete this request, otherwise error out
* and let the timeout stuff handle resubmitting
* this request onto another connection.
*/
- if (nbd_disconnected(config) ||
- config->num_connections <= 1) {
+ if (nbd_disconnected(config)) {
cmd->status = BLK_STS_IOERR;
goto out;
}
@@ -825,7 +836,7 @@ static int find_fallback(struct nbd_device *nbd, int index)
if (config->num_connections <= 1) {
dev_err_ratelimited(disk_to_dev(nbd->disk),
- "Attempted send on invalid socket\n");
+ "Dead connection, failed to find a fallback\n");
return new_index;
}
diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c
index 133060431dbd..4e1c0712278e 100644
--- a/drivers/block/null_blk_main.c
+++ b/drivers/block/null_blk_main.c
@@ -23,6 +23,7 @@
#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
static DECLARE_FAULT_ATTR(null_timeout_attr);
static DECLARE_FAULT_ATTR(null_requeue_attr);
+static DECLARE_FAULT_ATTR(null_init_hctx_attr);
#endif
static inline u64 mb_per_tick(int mbps)
@@ -96,11 +97,21 @@ module_param_named(home_node, g_home_node, int, 0444);
MODULE_PARM_DESC(home_node, "Home node for the device");
#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
+/*
+ * For more details about fault injection, please refer to
+ * Documentation/fault-injection/fault-injection.rst.
+ */
static char g_timeout_str[80];
module_param_string(timeout, g_timeout_str, sizeof(g_timeout_str), 0444);
+MODULE_PARM_DESC(timeout, "Fault injection. timeout=<interval>,<probability>,<space>,<times>");
static char g_requeue_str[80];
module_param_string(requeue, g_requeue_str, sizeof(g_requeue_str), 0444);
+MODULE_PARM_DESC(requeue, "Fault injection. requeue=<interval>,<probability>,<space>,<times>");
+
+static char g_init_hctx_str[80];
+module_param_string(init_hctx, g_init_hctx_str, sizeof(g_init_hctx_str), 0444);
+MODULE_PARM_DESC(init_hctx, "Fault injection to fail hctx init. init_hctx=<interval>,<probability>,<space>,<times>");
#endif
static int g_queue_mode = NULL_Q_MQ;
@@ -276,7 +287,7 @@ nullb_device_##NAME##_store(struct config_item *item, const char *page, \
{ \
int (*apply_fn)(struct nullb_device *dev, TYPE new_value) = APPLY;\
struct nullb_device *dev = to_nullb_device(item); \
- TYPE uninitialized_var(new_value); \
+ TYPE new_value = 0; \
int ret; \
\
ret = nullb_device_##TYPE##_attr_store(&new_value, page, count);\
@@ -302,6 +313,12 @@ static int nullb_apply_submit_queues(struct nullb_device *dev,
if (!nullb)
return 0;
+ /*
+ * Make sure that null_init_hctx() does not access nullb->queues[] past
+ * the end of that array.
+ */
+ if (submit_queues > nr_cpu_ids)
+ return -EINVAL;
set = nullb->tag_set;
blk_mq_update_nr_hw_queues(set, submit_queues);
return set->nr_hw_queues == submit_queues ? 0 : -ENOMEM;
@@ -605,6 +622,7 @@ static struct nullb_cmd *__alloc_cmd(struct nullb_queue *nq)
if (tag != -1U) {
cmd = &nq->cmds[tag];
cmd->tag = tag;
+ cmd->error = BLK_STS_OK;
cmd->nq = nq;
if (nq->dev->irqmode == NULL_IRQ_TIMER) {
hrtimer_init(&cmd->timer, CLOCK_MONOTONIC,
@@ -1385,6 +1403,7 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
cmd->timer.function = null_cmd_timer_expired;
}
cmd->rq = bd->rq;
+ cmd->error = BLK_STS_OK;
cmd->nq = nq;
blk_mq_start_request(bd->rq);
@@ -1408,12 +1427,6 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
return null_handle_cmd(cmd, sector, nr_sectors, req_op(bd->rq));
}
-static const struct blk_mq_ops null_mq_ops = {
- .queue_rq = null_queue_rq,
- .complete = null_complete_rq,
- .timeout = null_timeout_rq,
-};
-
static void cleanup_queue(struct nullb_queue *nq)
{
kfree(nq->tag_map);
@@ -1430,9 +1443,56 @@ static void cleanup_queues(struct nullb *nullb)
kfree(nullb->queues);
}
+static void null_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
+{
+ struct nullb_queue *nq = hctx->driver_data;
+ struct nullb *nullb = nq->dev->nullb;
+
+ nullb->nr_queues--;
+}
+
+static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq)
+{
+ init_waitqueue_head(&nq->wait);
+ nq->queue_depth = nullb->queue_depth;
+ nq->dev = nullb->dev;
+}
+
+static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data,
+ unsigned int hctx_idx)
+{
+ struct nullb *nullb = hctx->queue->queuedata;
+ struct nullb_queue *nq;
+
+#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
+ if (g_init_hctx_str[0] && should_fail(&null_init_hctx_attr, 1))
+ return -EFAULT;
+#endif
+
+ nq = &nullb->queues[hctx_idx];
+ hctx->driver_data = nq;
+ null_init_queue(nullb, nq);
+ nullb->nr_queues++;
+
+ return 0;
+}
+
+static const struct blk_mq_ops null_mq_ops = {
+ .queue_rq = null_queue_rq,
+ .complete = null_complete_rq,
+ .timeout = null_timeout_rq,
+ .init_hctx = null_init_hctx,
+ .exit_hctx = null_exit_hctx,
+};
+
static void null_del_dev(struct nullb *nullb)
{
- struct nullb_device *dev = nullb->dev;
+ struct nullb_device *dev;
+
+ if (!nullb)
+ return;
+
+ dev = nullb->dev;
ida_simple_remove(&nullb_indexes, nullb->index);
@@ -1473,33 +1533,6 @@ static const struct block_device_operations null_ops = {
.report_zones = null_report_zones,
};
-static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq)
-{
- BUG_ON(!nullb);
- BUG_ON(!nq);
-
- init_waitqueue_head(&nq->wait);
- nq->queue_depth = nullb->queue_depth;
- nq->dev = nullb->dev;
-}
-
-static void null_init_queues(struct nullb *nullb)
-{
- struct request_queue *q = nullb->q;
- struct blk_mq_hw_ctx *hctx;
- struct nullb_queue *nq;
- int i;
-
- queue_for_each_hw_ctx(q, hctx, i) {
- if (!hctx->nr_ctx || !hctx->tags)
- continue;
- nq = &nullb->queues[i];
- hctx->driver_data = nq;
- null_init_queue(nullb, nq);
- nullb->nr_queues++;
- }
-}
-
static int setup_commands(struct nullb_queue *nq)
{
struct nullb_cmd *cmd;
@@ -1526,8 +1559,7 @@ static int setup_commands(struct nullb_queue *nq)
static int setup_queues(struct nullb *nullb)
{
- nullb->queues = kcalloc(nullb->dev->submit_queues,
- sizeof(struct nullb_queue),
+ nullb->queues = kcalloc(nr_cpu_ids, sizeof(struct nullb_queue),
GFP_KERNEL);
if (!nullb->queues)
return -ENOMEM;
@@ -1669,6 +1701,8 @@ static bool null_setup_fault(void)
return false;
if (!__null_setup_fault(&null_requeue_attr, g_requeue_str))
return false;
+ if (!__null_setup_fault(&null_init_hctx_attr, g_init_hctx_str))
+ return false;
#endif
return true;
}
@@ -1712,19 +1746,17 @@ static int null_add_dev(struct nullb_device *dev)
goto out_cleanup_queues;
nullb->tag_set->timeout = 5 * HZ;
- nullb->q = blk_mq_init_queue(nullb->tag_set);
+ nullb->q = blk_mq_init_queue_data(nullb->tag_set, nullb);
if (IS_ERR(nullb->q)) {
rv = -ENOMEM;
goto out_cleanup_tags;
}
- null_init_queues(nullb);
} else if (dev->queue_mode == NULL_Q_BIO) {
- nullb->q = blk_alloc_queue_node(GFP_KERNEL, dev->home_node);
+ nullb->q = blk_alloc_queue(null_queue_bio, dev->home_node);
if (!nullb->q) {
rv = -ENOMEM;
goto out_cleanup_queues;
}
- blk_queue_make_request(nullb->q, null_queue_bio);
rv = init_driver_queues(nullb);
if (rv)
goto out_cleanup_blk_queue;
@@ -1788,6 +1820,7 @@ out_cleanup_queues:
cleanup_queues(nullb);
out_free_nullb:
kfree(nullb);
+ dev->nullb = NULL;
out:
return rv;
}
diff --git a/drivers/block/null_blk_trace.c b/drivers/block/null_blk_trace.c
new file mode 100644
index 000000000000..f246e7bff698
--- /dev/null
+++ b/drivers/block/null_blk_trace.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * null_blk trace related helpers.
+ *
+ * Copyright (C) 2020 Western Digital Corporation or its affiliates.
+ */
+#include "null_blk_trace.h"
+
+/*
+ * Helper to use for all null_blk traces to extract disk name.
+ */
+const char *nullb_trace_disk_name(struct trace_seq *p, char *name)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+
+ if (name && *name)
+ trace_seq_printf(p, "disk=%s, ", name);
+ trace_seq_putc(p, 0);
+
+ return ret;
+}
diff --git a/drivers/block/null_blk_trace.h b/drivers/block/null_blk_trace.h
new file mode 100644
index 000000000000..4f83032eb544
--- /dev/null
+++ b/drivers/block/null_blk_trace.h
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * null_blk device driver tracepoints.
+ *
+ * Copyright (C) 2020 Western Digital Corporation or its affiliates.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM nullb
+
+#if !defined(_TRACE_NULLB_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_NULLB_H
+
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+
+#include "null_blk.h"
+
+const char *nullb_trace_disk_name(struct trace_seq *p, char *name);
+
+#define __print_disk_name(name) nullb_trace_disk_name(p, name)
+
+#ifndef TRACE_HEADER_MULTI_READ
+static inline void __assign_disk_name(char *name, struct gendisk *disk)
+{
+ if (disk)
+ memcpy(name, disk->disk_name, DISK_NAME_LEN);
+ else
+ memset(name, 0, DISK_NAME_LEN);
+}
+#endif
+
+TRACE_EVENT(nullb_zone_op,
+ TP_PROTO(struct nullb_cmd *cmd, unsigned int zone_no,
+ unsigned int zone_cond),
+ TP_ARGS(cmd, zone_no, zone_cond),
+ TP_STRUCT__entry(
+ __array(char, disk, DISK_NAME_LEN)
+ __field(enum req_opf, op)
+ __field(unsigned int, zone_no)
+ __field(unsigned int, zone_cond)
+ ),
+ TP_fast_assign(
+ __entry->op = req_op(cmd->rq);
+ __entry->zone_no = zone_no;
+ __entry->zone_cond = zone_cond;
+ __assign_disk_name(__entry->disk, cmd->rq->rq_disk);
+ ),
+ TP_printk("%s req=%-15s zone_no=%u zone_cond=%-10s",
+ __print_disk_name(__entry->disk),
+ blk_op_str(__entry->op),
+ __entry->zone_no,
+ blk_zone_cond_str(__entry->zone_cond))
+);
+
+TRACE_EVENT(nullb_report_zones,
+ TP_PROTO(struct nullb *nullb, unsigned int nr_zones),
+ TP_ARGS(nullb, nr_zones),
+ TP_STRUCT__entry(
+ __array(char, disk, DISK_NAME_LEN)
+ __field(unsigned int, nr_zones)
+ ),
+ TP_fast_assign(
+ __entry->nr_zones = nr_zones;
+ __assign_disk_name(__entry->disk, nullb->disk);
+ ),
+ TP_printk("%s nr_zones=%u",
+ __print_disk_name(__entry->disk), __entry->nr_zones)
+);
+
+#endif /* _TRACE_NULLB_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE null_blk_trace
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/drivers/block/null_blk_zoned.c b/drivers/block/null_blk_zoned.c
index ed34785dd64b..673618d8222a 100644
--- a/drivers/block/null_blk_zoned.c
+++ b/drivers/block/null_blk_zoned.c
@@ -2,6 +2,9 @@
#include <linux/vmalloc.h>
#include "null_blk.h"
+#define CREATE_TRACE_POINTS
+#include "null_blk_trace.h"
+
/* zone_size in MBs to sectors. */
#define ZONE_SIZE_SHIFT 11
@@ -80,6 +83,8 @@ int null_report_zones(struct gendisk *disk, sector_t sector,
return 0;
nr_zones = min(nr_zones, dev->nr_zones - first_zone);
+ trace_nullb_report_zones(nullb, nr_zones);
+
for (i = 0; i < nr_zones; i++) {
/*
* Stacked DM target drivers will remap the zone information by
@@ -148,6 +153,8 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
/* Invalid zone condition */
return BLK_STS_IOERR;
}
+
+ trace_nullb_zone_op(cmd, zno, zone->cond);
return BLK_STS_OK;
}
@@ -155,7 +162,8 @@ static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op,
sector_t sector)
{
struct nullb_device *dev = cmd->nq->dev;
- struct blk_zone *zone = &dev->zones[null_zone_no(dev, sector)];
+ unsigned int zone_no = null_zone_no(dev, sector);
+ struct blk_zone *zone = &dev->zones[zone_no];
size_t i;
switch (op) {
@@ -203,6 +211,8 @@ static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op,
default:
return BLK_STS_NOTSUPP;
}
+
+ trace_nullb_zone_op(cmd, zone_no, zone->cond);
return BLK_STS_OK;
}
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 5f970a7d32c0..0b944ac96d6b 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -2493,7 +2493,6 @@ static void pkt_init_queue(struct pktcdvd_device *pd)
{
struct request_queue *q = pd->disk->queue;
- blk_queue_make_request(q, pkt_make_request);
blk_queue_logical_block_size(q, CD_FRAMESIZE);
blk_queue_max_hw_sectors(q, PACKET_MAX_SECTORS);
q->queuedata = pd;
@@ -2679,6 +2678,11 @@ static unsigned int pkt_check_events(struct gendisk *disk,
return attached_disk->fops->check_events(attached_disk, clearing);
}
+static char *pkt_devnode(struct gendisk *disk, umode_t *mode)
+{
+ return kasprintf(GFP_KERNEL, "pktcdvd/%s", disk->disk_name);
+}
+
static const struct block_device_operations pktcdvd_ops = {
.owner = THIS_MODULE,
.open = pkt_open,
@@ -2686,13 +2690,9 @@ static const struct block_device_operations pktcdvd_ops = {
.ioctl = pkt_ioctl,
.compat_ioctl = blkdev_compat_ptr_ioctl,
.check_events = pkt_check_events,
+ .devnode = pkt_devnode,
};
-static char *pktcdvd_devnode(struct gendisk *gd, umode_t *mode)
-{
- return kasprintf(GFP_KERNEL, "pktcdvd/%s", gd->disk_name);
-}
-
/*
* Set up mapping from pktcdvd device to CD-ROM device.
*/
@@ -2748,9 +2748,8 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev)
disk->fops = &pktcdvd_ops;
disk->flags = GENHD_FL_REMOVABLE;
strcpy(disk->disk_name, pd->name);
- disk->devnode = pktcdvd_devnode;
disk->private_data = pd;
- disk->queue = blk_alloc_queue(GFP_KERNEL);
+ disk->queue = blk_alloc_queue(pkt_make_request, NUMA_NO_NODE);
if (!disk->queue)
goto out_mem2;
diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c
index 4628e1a27a2b..821d4d8b1d76 100644
--- a/drivers/block/ps3vram.c
+++ b/drivers/block/ps3vram.c
@@ -737,7 +737,7 @@ static int ps3vram_probe(struct ps3_system_bus_device *dev)
ps3vram_proc_init(dev);
- queue = blk_alloc_queue(GFP_KERNEL);
+ queue = blk_alloc_queue(ps3vram_make_request, NUMA_NO_NODE);
if (!queue) {
dev_err(&dev->core, "blk_alloc_queue failed\n");
error = -ENOMEM;
@@ -746,7 +746,6 @@ static int ps3vram_probe(struct ps3_system_bus_device *dev)
priv->queue = queue;
queue->queuedata = dev;
- blk_queue_make_request(queue, ps3vram_make_request);
blk_queue_max_segments(queue, BLK_MAX_SEGMENTS);
blk_queue_max_segment_size(queue, BLK_MAX_SEGMENT_SIZE);
blk_queue_max_hw_sectors(queue, BLK_SAFE_MAX_SECTORS);
diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c
index c47d28b2ce44..8ffa8260dcaf 100644
--- a/drivers/block/rsxx/dev.c
+++ b/drivers/block/rsxx/dev.c
@@ -248,7 +248,7 @@ int rsxx_setup_dev(struct rsxx_cardinfo *card)
return -ENOMEM;
}
- card->queue = blk_alloc_queue(GFP_KERNEL);
+ card->queue = blk_alloc_queue(rsxx_make_request, NUMA_NO_NODE);
if (!card->queue) {
dev_err(CARD_TO_DEV(card), "Failed queue alloc\n");
unregister_blkdev(card->major, DRIVER_NAME);
@@ -269,7 +269,6 @@ int rsxx_setup_dev(struct rsxx_cardinfo *card)
blk_queue_logical_block_size(card->queue, blk_size);
}
- blk_queue_make_request(card->queue, rsxx_make_request);
blk_queue_max_hw_sectors(card->queue, blkdev_max_hw_sectors);
blk_queue_physical_block_size(card->queue, RSXX_HW_BLK_SIZE);
diff --git a/drivers/block/rsxx/dma.c b/drivers/block/rsxx/dma.c
index 111eb659e66d..1914f5488b22 100644
--- a/drivers/block/rsxx/dma.c
+++ b/drivers/block/rsxx/dma.c
@@ -80,7 +80,7 @@ struct dma_tracker {
struct dma_tracker_list {
spinlock_t lock;
int head;
- struct dma_tracker list[0];
+ struct dma_tracker list[];
};
diff --git a/drivers/block/umem.c b/drivers/block/umem.c
index 4eaf97d7a170..d84e8a878df2 100644
--- a/drivers/block/umem.c
+++ b/drivers/block/umem.c
@@ -885,11 +885,9 @@ static int mm_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
card->biotail = &card->bio;
spin_lock_init(&card->lock);
- card->queue = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE);
+ card->queue = blk_alloc_queue(mm_make_request, NUMA_NO_NODE);
if (!card->queue)
goto failed_alloc;
-
- blk_queue_make_request(card->queue, mm_make_request);
card->queue->queuedata = card;
tasklet_init(&card->tasklet, process_page, (unsigned long)card);
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 54158766334b..f9b1e70f1b31 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -245,13 +245,20 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
err = virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg, num);
if (err) {
virtqueue_kick(vblk->vqs[qid].vq);
- blk_mq_stop_hw_queue(hctx);
+ /* Don't stop the queue if -ENOMEM: we may have failed to
+ * bounce the buffer due to global resource outage.
+ */
+ if (err == -ENOSPC)
+ blk_mq_stop_hw_queue(hctx);
spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
- /* Out of mem doesn't actually happen, since we fall back
- * to direct descriptors */
- if (err == -ENOMEM || err == -ENOSPC)
+ switch (err) {
+ case -ENOSPC:
return BLK_STS_DEV_RESOURCE;
- return BLK_STS_IOERR;
+ case -ENOMEM:
+ return BLK_STS_RESOURCE;
+ default:
+ return BLK_STS_IOERR;
+ }
}
if (bd->last && virtqueue_kick_prepare(vblk->vqs[qid].vq))
@@ -381,18 +388,15 @@ static void virtblk_update_capacity(struct virtio_blk *vblk, bool resize)
cap_str_10,
cap_str_2);
- set_capacity(vblk->disk, capacity);
+ set_capacity_revalidate_and_notify(vblk->disk, capacity, true);
}
static void virtblk_config_changed_work(struct work_struct *work)
{
struct virtio_blk *vblk =
container_of(work, struct virtio_blk, config_work);
- char *envp[] = { "RESIZE=1", NULL };
virtblk_update_capacity(vblk, true);
- revalidate_disk(vblk->disk);
- kobject_uevent_env(&disk_to_dev(vblk->disk)->kobj, KOBJ_CHANGE, envp);
}
static void virtblk_config_changed(struct virtio_device *vdev)
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 9df516a56bb2..915cf5b6388c 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -2338,7 +2338,6 @@ static void blkfront_connect(struct blkfront_info *info)
unsigned long sector_size;
unsigned int physical_sector_size;
unsigned int binfo;
- char *envp[] = { "RESIZE=1", NULL };
int err, i;
struct blkfront_ring_info *rinfo;
@@ -2354,10 +2353,7 @@ static void blkfront_connect(struct blkfront_info *info)
return;
printk(KERN_INFO "Setting capacity to %Lu\n",
sectors);
- set_capacity(info->gd, sectors);
- revalidate_disk(info->gd);
- kobject_uevent_env(&disk_to_dev(info->gd)->kobj,
- KOBJ_CHANGE, envp);
+ set_capacity_revalidate_and_notify(info->gd, sectors, true);
return;
case BLKIF_STATE_SUSPENDED:
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 1bdb5793842b..ebb234f36909 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -33,6 +33,7 @@
#include <linux/sysfs.h>
#include <linux/debugfs.h>
#include <linux/cpuhotplug.h>
+#include <linux/part_stat.h>
#include "zram_drv.h"
@@ -1894,7 +1895,7 @@ static int zram_add(void)
#ifdef CONFIG_ZRAM_WRITEBACK
spin_lock_init(&zram->wb_limit_lock);
#endif
- queue = blk_alloc_queue(GFP_KERNEL);
+ queue = blk_alloc_queue(zram_make_request, NUMA_NO_NODE);
if (!queue) {
pr_err("Error allocating disk queue for device %d\n",
device_id);
@@ -1902,8 +1903,6 @@ static int zram_add(void)
goto out_free_idr;
}
- blk_queue_make_request(queue, zram_make_request);
-
/* gendisk structure */
zram->disk = alloc_disk(1);
if (!zram->disk) {
diff --git a/drivers/bus/sunxi-rsb.c b/drivers/bus/sunxi-rsb.c
index be79d6c6a4e4..1bb00a959c67 100644
--- a/drivers/bus/sunxi-rsb.c
+++ b/drivers/bus/sunxi-rsb.c
@@ -345,7 +345,7 @@ static int sunxi_rsb_read(struct sunxi_rsb *rsb, u8 rtaddr, u8 addr,
if (ret)
goto unlock;
- *buf = readl(rsb->regs + RSB_DATA);
+ *buf = readl(rsb->regs + RSB_DATA) & GENMASK(len * 8 - 1, 0);
unlock:
mutex_unlock(&rsb->lock);
diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c
index 6113fc0a52ae..440019655fbb 100644
--- a/drivers/bus/ti-sysc.c
+++ b/drivers/bus/ti-sysc.c
@@ -1266,6 +1266,8 @@ static const struct sysc_revision_quirk sysc_revision_quirks[] = {
SYSC_QUIRK("gpu", 0x50000000, 0x14, -1, -1, 0x00010201, 0xffffffff, 0),
SYSC_QUIRK("gpu", 0x50000000, 0xfe00, 0xfe10, -1, 0x40000000 , 0xffffffff,
SYSC_MODULE_QUIRK_SGX),
+ SYSC_QUIRK("lcdc", 0, 0, 0x54, -1, 0x4f201000, 0xffffffff,
+ SYSC_QUIRK_SWSUP_SIDLE | SYSC_QUIRK_SWSUP_MSTANDBY),
SYSC_QUIRK("usb_otg_hs", 0, 0x400, 0x404, 0x408, 0x00000050,
0xffffffff, SYSC_QUIRK_SWSUP_SIDLE | SYSC_QUIRK_SWSUP_MSTANDBY),
SYSC_QUIRK("usb_otg_hs", 0, 0, 0x10, -1, 0x4ea2080d, 0xffffffff,
@@ -1294,7 +1296,6 @@ static const struct sysc_revision_quirk sysc_revision_quirks[] = {
SYSC_QUIRK("gpu", 0, 0xfe00, 0xfe10, -1, 0x40000000 , 0xffffffff, 0),
SYSC_QUIRK("hsi", 0, 0, 0x10, 0x14, 0x50043101, 0xffffffff, 0),
SYSC_QUIRK("iss", 0, 0, 0x10, -1, 0x40000101, 0xffffffff, 0),
- SYSC_QUIRK("lcdc", 0, 0, 0x54, -1, 0x4f201000, 0xffffffff, 0),
SYSC_QUIRK("mcasp", 0, 0, 0x4, -1, 0x44306302, 0xffffffff, 0),
SYSC_QUIRK("mcasp", 0, 0, 0x4, -1, 0x44307b02, 0xffffffff, 0),
SYSC_QUIRK("mcbsp", 0, -1, 0x8c, -1, 0, 0, 0),
diff --git a/drivers/char/ipmi/ipmi_si_platform.c b/drivers/char/ipmi/ipmi_si_platform.c
index c78127ccbc0d..638c693e17ad 100644
--- a/drivers/char/ipmi/ipmi_si_platform.c
+++ b/drivers/char/ipmi/ipmi_si_platform.c
@@ -194,7 +194,7 @@ static int platform_ipmi_probe(struct platform_device *pdev)
else
io.slave_addr = slave_addr;
- io.irq = platform_get_irq(pdev, 0);
+ io.irq = platform_get_irq_optional(pdev, 0);
if (io.irq > 0)
io.irq_setup = ipmi_std_irq_setup;
else
@@ -378,7 +378,7 @@ static int acpi_ipmi_probe(struct platform_device *pdev)
io.irq = tmp;
io.irq_setup = acpi_gpe_irq_setup;
} else {
- int irq = platform_get_irq(pdev, 0);
+ int irq = platform_get_irq_optional(pdev, 0);
if (irq > 0) {
io.irq = irq;
diff --git a/drivers/char/tpm/eventlog/common.c b/drivers/char/tpm/eventlog/common.c
index 7a0fca659b6a..7460f230bae4 100644
--- a/drivers/char/tpm/eventlog/common.c
+++ b/drivers/char/tpm/eventlog/common.c
@@ -99,11 +99,8 @@ static int tpm_read_log(struct tpm_chip *chip)
*
* If an event log is found then the securityfs files are setup to
* export it to userspace, otherwise nothing is done.
- *
- * Returns -ENODEV if the firmware has no event log or securityfs is not
- * supported.
*/
-int tpm_bios_log_setup(struct tpm_chip *chip)
+void tpm_bios_log_setup(struct tpm_chip *chip)
{
const char *name = dev_name(&chip->dev);
unsigned int cnt;
@@ -112,7 +109,7 @@ int tpm_bios_log_setup(struct tpm_chip *chip)
rc = tpm_read_log(chip);
if (rc < 0)
- return rc;
+ return;
log_version = rc;
cnt = 0;
@@ -158,13 +155,12 @@ int tpm_bios_log_setup(struct tpm_chip *chip)
cnt++;
}
- return 0;
+ return;
err:
- rc = PTR_ERR(chip->bios_dir[cnt]);
chip->bios_dir[cnt] = NULL;
tpm_bios_log_teardown(chip);
- return rc;
+ return;
}
void tpm_bios_log_teardown(struct tpm_chip *chip)
diff --git a/drivers/char/tpm/eventlog/of.c b/drivers/char/tpm/eventlog/of.c
index af347c190819..a9ce66d09a75 100644
--- a/drivers/char/tpm/eventlog/of.c
+++ b/drivers/char/tpm/eventlog/of.c
@@ -51,7 +51,8 @@ int tpm_read_log_of(struct tpm_chip *chip)
* endian format. For this reason, vtpm doesn't need conversion
* but physical tpm needs the conversion.
*/
- if (of_property_match_string(np, "compatible", "IBM,vtpm") < 0) {
+ if (of_property_match_string(np, "compatible", "IBM,vtpm") < 0 &&
+ of_property_match_string(np, "compatible", "IBM,vtpm20") < 0) {
size = be32_to_cpup((__force __be32 *)sizep);
base = be64_to_cpup((__force __be64 *)basep);
} else {
diff --git a/drivers/char/tpm/eventlog/tpm1.c b/drivers/char/tpm/eventlog/tpm1.c
index 739b1d9d16b6..2c96977ad080 100644
--- a/drivers/char/tpm/eventlog/tpm1.c
+++ b/drivers/char/tpm/eventlog/tpm1.c
@@ -115,6 +115,7 @@ static void *tpm1_bios_measurements_next(struct seq_file *m, void *v,
u32 converted_event_size;
u32 converted_event_type;
+ (*pos)++;
converted_event_size = do_endian_conversion(event->event_size);
v += sizeof(struct tcpa_event) + converted_event_size;
@@ -132,7 +133,6 @@ static void *tpm1_bios_measurements_next(struct seq_file *m, void *v,
((v + sizeof(struct tcpa_event) + converted_event_size) > limit))
return NULL;
- (*pos)++;
return v;
}
diff --git a/drivers/char/tpm/eventlog/tpm2.c b/drivers/char/tpm/eventlog/tpm2.c
index b9aeda1cbcd7..e741b1157525 100644
--- a/drivers/char/tpm/eventlog/tpm2.c
+++ b/drivers/char/tpm/eventlog/tpm2.c
@@ -94,6 +94,7 @@ static void *tpm2_bios_measurements_next(struct seq_file *m, void *v,
size_t event_size;
void *marker;
+ (*pos)++;
event_header = log->bios_event_log;
if (v == SEQ_START_TOKEN) {
@@ -118,7 +119,6 @@ static void *tpm2_bios_measurements_next(struct seq_file *m, void *v,
if (((v + event_size) >= limit) || (event_size == 0))
return NULL;
- (*pos)++;
return v;
}
diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c
index 3d6d394a8661..58073836b555 100644
--- a/drivers/char/tpm/tpm-chip.c
+++ b/drivers/char/tpm/tpm-chip.c
@@ -596,9 +596,7 @@ int tpm_chip_register(struct tpm_chip *chip)
tpm_sysfs_add_device(chip);
- rc = tpm_bios_log_setup(chip);
- if (rc != 0 && rc != -ENODEV)
- return rc;
+ tpm_bios_log_setup(chip);
tpm_add_ppi(chip);
diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h
index 5620747da0cf..0fbcede241ea 100644
--- a/drivers/char/tpm/tpm.h
+++ b/drivers/char/tpm/tpm.h
@@ -226,6 +226,7 @@ int tpm2_auto_startup(struct tpm_chip *chip);
void tpm2_shutdown(struct tpm_chip *chip, u16 shutdown_type);
unsigned long tpm2_calc_ordinal_duration(struct tpm_chip *chip, u32 ordinal);
int tpm2_probe(struct tpm_chip *chip);
+int tpm2_get_cc_attrs_tbl(struct tpm_chip *chip);
int tpm2_find_cc(struct tpm_chip *chip, u32 cc);
int tpm2_init_space(struct tpm_space *space);
void tpm2_del_space(struct tpm_chip *chip, struct tpm_space *space);
@@ -235,7 +236,7 @@ int tpm2_prepare_space(struct tpm_chip *chip, struct tpm_space *space, u8 *cmd,
int tpm2_commit_space(struct tpm_chip *chip, struct tpm_space *space, void *buf,
size_t *bufsiz);
-int tpm_bios_log_setup(struct tpm_chip *chip);
+void tpm_bios_log_setup(struct tpm_chip *chip);
void tpm_bios_log_teardown(struct tpm_chip *chip);
int tpm_dev_common_init(void);
void tpm_dev_common_exit(void);
diff --git a/drivers/char/tpm/tpm2-cmd.c b/drivers/char/tpm/tpm2-cmd.c
index 760329598b99..76f67b155bd5 100644
--- a/drivers/char/tpm/tpm2-cmd.c
+++ b/drivers/char/tpm/tpm2-cmd.c
@@ -615,7 +615,7 @@ out:
return rc;
}
-static int tpm2_get_cc_attrs_tbl(struct tpm_chip *chip)
+int tpm2_get_cc_attrs_tbl(struct tpm_chip *chip)
{
struct tpm_buf buf;
u32 nr_commands;
diff --git a/drivers/char/tpm/tpm_ibmvtpm.c b/drivers/char/tpm/tpm_ibmvtpm.c
index 78cc52690177..1a49db9e108e 100644
--- a/drivers/char/tpm/tpm_ibmvtpm.c
+++ b/drivers/char/tpm/tpm_ibmvtpm.c
@@ -29,6 +29,7 @@ static const char tpm_ibmvtpm_driver_name[] = "tpm_ibmvtpm";
static const struct vio_device_id tpm_ibmvtpm_device_table[] = {
{ "IBM,vtpm", "IBM,vtpm"},
+ { "IBM,vtpm", "IBM,vtpm20"},
{ "", "" }
};
MODULE_DEVICE_TABLE(vio, tpm_ibmvtpm_device_table);
@@ -571,6 +572,7 @@ static irqreturn_t ibmvtpm_interrupt(int irq, void *vtpm_instance)
*/
while ((crq = ibmvtpm_crq_get_next(ibmvtpm)) != NULL) {
ibmvtpm_crq_process(crq, ibmvtpm);
+ wake_up_interruptible(&ibmvtpm->crq_queue.wq);
crq->valid = 0;
smp_wmb();
}
@@ -618,6 +620,7 @@ static int tpm_ibmvtpm_probe(struct vio_dev *vio_dev,
}
crq_q->num_entry = CRQ_RES_BUF_SIZE / sizeof(*crq_q->crq_addr);
+ init_waitqueue_head(&crq_q->wq);
ibmvtpm->crq_dma_handle = dma_map_single(dev, crq_q->crq_addr,
CRQ_RES_BUF_SIZE,
DMA_BIDIRECTIONAL);
@@ -670,6 +673,20 @@ static int tpm_ibmvtpm_probe(struct vio_dev *vio_dev,
if (rc)
goto init_irq_cleanup;
+ if (!strcmp(id->compat, "IBM,vtpm20")) {
+ chip->flags |= TPM_CHIP_FLAG_TPM2;
+ rc = tpm2_get_cc_attrs_tbl(chip);
+ if (rc)
+ goto init_irq_cleanup;
+ }
+
+ if (!wait_event_timeout(ibmvtpm->crq_queue.wq,
+ ibmvtpm->rtce_buf != NULL,
+ HZ)) {
+ dev_err(dev, "CRQ response timed out\n");
+ goto init_irq_cleanup;
+ }
+
return tpm_chip_register(chip);
init_irq_cleanup:
do {
diff --git a/drivers/char/tpm/tpm_ibmvtpm.h b/drivers/char/tpm/tpm_ibmvtpm.h
index 7983f1a33267..b92aa7d3e93e 100644
--- a/drivers/char/tpm/tpm_ibmvtpm.h
+++ b/drivers/char/tpm/tpm_ibmvtpm.h
@@ -26,6 +26,7 @@ struct ibmvtpm_crq_queue {
struct ibmvtpm_crq *crq_addr;
u32 index;
u32 num_entry;
+ wait_queue_head_t wq;
};
struct ibmvtpm_dev {
diff --git a/drivers/char/tpm/tpm_tis_spi_cr50.c b/drivers/char/tpm/tpm_tis_spi_cr50.c
index 37d72e818335..ea759af25634 100644
--- a/drivers/char/tpm/tpm_tis_spi_cr50.c
+++ b/drivers/char/tpm/tpm_tis_spi_cr50.c
@@ -132,7 +132,12 @@ static void cr50_wake_if_needed(struct cr50_spi_phy *cr50_phy)
if (cr50_needs_waking(cr50_phy)) {
/* Assert CS, wait 1 msec, deassert CS */
- struct spi_transfer spi_cs_wake = { .delay_usecs = 1000 };
+ struct spi_transfer spi_cs_wake = {
+ .delay = {
+ .value = 1000,
+ .unit = SPI_DELAY_UNIT_USECS
+ }
+ };
spi_sync_transfer(phy->spi_device, &spi_cs_wake, 1);
/* Wait for it to fully wake */
diff --git a/drivers/char/tpm/tpm_tis_spi_main.c b/drivers/char/tpm/tpm_tis_spi_main.c
index d1754fd6c573..d96755935529 100644
--- a/drivers/char/tpm/tpm_tis_spi_main.c
+++ b/drivers/char/tpm/tpm_tis_spi_main.c
@@ -110,7 +110,8 @@ int tpm_tis_spi_transfer(struct tpm_tis_data *data, u32 addr, u16 len,
spi_xfer.cs_change = 0;
spi_xfer.len = transfer_len;
- spi_xfer.delay_usecs = 5;
+ spi_xfer.delay.value = 5;
+ spi_xfer.delay.unit = SPI_DELAY_UNIT_USECS;
if (in) {
spi_xfer.tx_buf = NULL;
diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index f0f2b599fd7e..95adf6c6db3d 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -4713,7 +4713,7 @@ EXPORT_SYMBOL(of_clk_get_by_name);
*
* Returns: The number of clocks that are possible parents of this node
*/
-unsigned int of_clk_get_parent_count(struct device_node *np)
+unsigned int of_clk_get_parent_count(const struct device_node *np)
{
int count;
@@ -4725,7 +4725,7 @@ unsigned int of_clk_get_parent_count(struct device_node *np)
}
EXPORT_SYMBOL_GPL(of_clk_get_parent_count);
-const char *of_clk_get_parent_name(struct device_node *np, int index)
+const char *of_clk_get_parent_name(const struct device_node *np, int index)
{
struct of_phandle_args clkspec;
struct property *prop;
diff --git a/drivers/clk/imx/clk-imx8mp.c b/drivers/clk/imx/clk-imx8mp.c
index f6c120cca0d4..cf192907b7dc 100644
--- a/drivers/clk/imx/clk-imx8mp.c
+++ b/drivers/clk/imx/clk-imx8mp.c
@@ -560,7 +560,7 @@ static int imx8mp_clocks_probe(struct platform_device *pdev)
hws[IMX8MP_CLK_MEDIA_AXI] = imx8m_clk_hw_composite("media_axi", imx8mp_media_axi_sels, ccm_base + 0x8a00);
hws[IMX8MP_CLK_MEDIA_APB] = imx8m_clk_hw_composite("media_apb", imx8mp_media_apb_sels, ccm_base + 0x8a80);
hws[IMX8MP_CLK_HDMI_APB] = imx8m_clk_hw_composite("hdmi_apb", imx8mp_media_apb_sels, ccm_base + 0x8b00);
- hws[IMX8MP_CLK_HDMI_AXI] = imx8m_clk_hw_composite("hdmi_axi", imx8mp_media_apb_sels, ccm_base + 0x8b80);
+ hws[IMX8MP_CLK_HDMI_AXI] = imx8m_clk_hw_composite("hdmi_axi", imx8mp_media_axi_sels, ccm_base + 0x8b80);
hws[IMX8MP_CLK_GPU_AXI] = imx8m_clk_hw_composite("gpu_axi", imx8mp_gpu_axi_sels, ccm_base + 0x8c00);
hws[IMX8MP_CLK_GPU_AHB] = imx8m_clk_hw_composite("gpu_ahb", imx8mp_gpu_ahb_sels, ccm_base + 0x8c80);
hws[IMX8MP_CLK_NOC] = imx8m_clk_hw_composite_critical("noc", imx8mp_noc_sels, ccm_base + 0x8d00);
@@ -686,7 +686,7 @@ static int imx8mp_clocks_probe(struct platform_device *pdev)
hws[IMX8MP_CLK_CAN1_ROOT] = imx_clk_hw_gate2("can1_root_clk", "can1", ccm_base + 0x4350, 0);
hws[IMX8MP_CLK_CAN2_ROOT] = imx_clk_hw_gate2("can2_root_clk", "can2", ccm_base + 0x4360, 0);
hws[IMX8MP_CLK_SDMA1_ROOT] = imx_clk_hw_gate4("sdma1_root_clk", "ipg_root", ccm_base + 0x43a0, 0);
- hws[IMX8MP_CLK_ENET_QOS_ROOT] = imx_clk_hw_gate4("enet_qos_root_clk", "enet_axi", ccm_base + 0x43b0, 0);
+ hws[IMX8MP_CLK_ENET_QOS_ROOT] = imx_clk_hw_gate4("enet_qos_root_clk", "sim_enet_root_clk", ccm_base + 0x43b0, 0);
hws[IMX8MP_CLK_SIM_ENET_ROOT] = imx_clk_hw_gate4("sim_enet_root_clk", "enet_axi", ccm_base + 0x4400, 0);
hws[IMX8MP_CLK_GPU2D_ROOT] = imx_clk_hw_gate4("gpu2d_root_clk", "gpu2d_div", ccm_base + 0x4450, 0);
hws[IMX8MP_CLK_GPU3D_ROOT] = imx_clk_hw_gate4("gpu3d_root_clk", "gpu3d_core_div", ccm_base + 0x4460, 0);
diff --git a/drivers/clk/imx/clk-scu.c b/drivers/clk/imx/clk-scu.c
index fbef740704d0..b8b2072742a5 100644
--- a/drivers/clk/imx/clk-scu.c
+++ b/drivers/clk/imx/clk-scu.c
@@ -43,12 +43,12 @@ struct imx_sc_msg_req_set_clock_rate {
__le32 rate;
__le16 resource;
u8 clk;
-} __packed;
+} __packed __aligned(4);
struct req_get_clock_rate {
__le16 resource;
u8 clk;
-} __packed;
+} __packed __aligned(4);
struct resp_get_clock_rate {
__le32 rate;
@@ -84,7 +84,7 @@ struct imx_sc_msg_get_clock_parent {
struct req_get_clock_parent {
__le16 resource;
u8 clk;
- } __packed req;
+ } __packed __aligned(4) req;
struct resp_get_clock_parent {
u8 parent;
} resp;
@@ -121,7 +121,7 @@ struct imx_sc_msg_req_clock_enable {
u8 clk;
u8 enable;
u8 autog;
-} __packed;
+} __packed __aligned(4);
static inline struct clk_scu *to_clk_scu(struct clk_hw *hw)
{
diff --git a/drivers/clk/qcom/dispcc-sc7180.c b/drivers/clk/qcom/dispcc-sc7180.c
index dd7af41e47eb..0a5d395bce93 100644
--- a/drivers/clk/qcom/dispcc-sc7180.c
+++ b/drivers/clk/qcom/dispcc-sc7180.c
@@ -592,24 +592,6 @@ static struct clk_branch disp_cc_mdss_rot_clk = {
},
};
-static struct clk_branch disp_cc_mdss_rscc_ahb_clk = {
- .halt_reg = 0x400c,
- .halt_check = BRANCH_HALT,
- .clkr = {
- .enable_reg = 0x400c,
- .enable_mask = BIT(0),
- .hw.init = &(struct clk_init_data){
- .name = "disp_cc_mdss_rscc_ahb_clk",
- .parent_data = &(const struct clk_parent_data){
- .hw = &disp_cc_mdss_ahb_clk_src.clkr.hw,
- },
- .num_parents = 1,
- .flags = CLK_IS_CRITICAL | CLK_SET_RATE_PARENT,
- .ops = &clk_branch2_ops,
- },
- },
-};
-
static struct clk_branch disp_cc_mdss_rscc_vsync_clk = {
.halt_reg = 0x4008,
.halt_check = BRANCH_HALT,
@@ -687,7 +669,6 @@ static struct clk_regmap *disp_cc_sc7180_clocks[] = {
[DISP_CC_MDSS_PCLK0_CLK_SRC] = &disp_cc_mdss_pclk0_clk_src.clkr,
[DISP_CC_MDSS_ROT_CLK] = &disp_cc_mdss_rot_clk.clkr,
[DISP_CC_MDSS_ROT_CLK_SRC] = &disp_cc_mdss_rot_clk_src.clkr,
- [DISP_CC_MDSS_RSCC_AHB_CLK] = &disp_cc_mdss_rscc_ahb_clk.clkr,
[DISP_CC_MDSS_RSCC_VSYNC_CLK] = &disp_cc_mdss_rscc_vsync_clk.clkr,
[DISP_CC_MDSS_VSYNC_CLK] = &disp_cc_mdss_vsync_clk.clkr,
[DISP_CC_MDSS_VSYNC_CLK_SRC] = &disp_cc_mdss_vsync_clk_src.clkr,
diff --git a/drivers/clk/qcom/videocc-sc7180.c b/drivers/clk/qcom/videocc-sc7180.c
index c363c3cc544e..276e5ecd4840 100644
--- a/drivers/clk/qcom/videocc-sc7180.c
+++ b/drivers/clk/qcom/videocc-sc7180.c
@@ -97,7 +97,7 @@ static struct clk_branch video_cc_vcodec0_axi_clk = {
static struct clk_branch video_cc_vcodec0_core_clk = {
.halt_reg = 0x890,
- .halt_check = BRANCH_HALT,
+ .halt_check = BRANCH_HALT_VOTED,
.clkr = {
.enable_reg = 0x890,
.enable_mask = BIT(0),
diff --git a/drivers/clk/ti/clk-43xx.c b/drivers/clk/ti/clk-43xx.c
index af3e7805769e..e5538d577ce5 100644
--- a/drivers/clk/ti/clk-43xx.c
+++ b/drivers/clk/ti/clk-43xx.c
@@ -78,7 +78,7 @@ static const struct omap_clkctrl_reg_data am4_gfx_l3_clkctrl_regs[] __initconst
};
static const struct omap_clkctrl_reg_data am4_l4_rtc_clkctrl_regs[] __initconst = {
- { AM4_L4_RTC_RTC_CLKCTRL, NULL, CLKF_SW_SUP, "clk_32768_ck" },
+ { AM4_L4_RTC_RTC_CLKCTRL, NULL, CLKF_SW_SUP, "clkdiv32k_ick" },
{ 0 },
};
diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c
index 9d808d595ca8..eb0ba7818eb0 100644
--- a/drivers/clocksource/hyperv_timer.c
+++ b/drivers/clocksource/hyperv_timer.c
@@ -343,7 +343,8 @@ static u64 notrace read_hv_clock_tsc_cs(struct clocksource *arg)
static u64 read_hv_sched_clock_tsc(void)
{
- return read_hv_clock_tsc() - hv_sched_clock_offset;
+ return (read_hv_clock_tsc() - hv_sched_clock_offset) *
+ (NSEC_PER_SEC / HV_CLOCK_HZ);
}
static void suspend_hv_clock_tsc(struct clocksource *arg)
@@ -398,7 +399,8 @@ static u64 notrace read_hv_clock_msr_cs(struct clocksource *arg)
static u64 read_hv_sched_clock_msr(void)
{
- return read_hv_clock_msr() - hv_sched_clock_offset;
+ return (read_hv_clock_msr() - hv_sched_clock_offset) *
+ (NSEC_PER_SEC / HV_CLOCK_HZ);
}
static struct clocksource hyperv_cs_msr = {
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index c3b1283b6d31..17909fd1820f 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -1151,7 +1151,7 @@ int dma_async_device_register(struct dma_device *device)
}
if (!device->device_release)
- dev_warn(device->dev,
+ dev_dbg(device->dev,
"WARN: Device release is not defined so it is not safe to unbind this driver while in use\n");
kref_init(&device->ref);
diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c
index df47be612ebb..989b7a25ca61 100644
--- a/drivers/dma/idxd/cdev.c
+++ b/drivers/dma/idxd/cdev.c
@@ -81,9 +81,9 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp)
dev = &idxd->pdev->dev;
idxd_cdev = &wq->idxd_cdev;
- dev_dbg(dev, "%s called\n", __func__);
+ dev_dbg(dev, "%s called: %d\n", __func__, idxd_wq_refcount(wq));
- if (idxd_wq_refcount(wq) > 1 && wq_dedicated(wq))
+ if (idxd_wq_refcount(wq) > 0 && wq_dedicated(wq))
return -EBUSY;
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
diff --git a/drivers/dma/ti/k3-udma-glue.c b/drivers/dma/ti/k3-udma-glue.c
index c1511298ece2..4d7561a1b3e3 100644
--- a/drivers/dma/ti/k3-udma-glue.c
+++ b/drivers/dma/ti/k3-udma-glue.c
@@ -564,12 +564,12 @@ static int k3_udma_glue_cfg_rx_flow(struct k3_udma_glue_rx_channel *rx_chn,
if (IS_ERR(flow->udma_rflow)) {
ret = PTR_ERR(flow->udma_rflow);
dev_err(dev, "UDMAX rflow get err %d\n", ret);
- goto err;
+ return ret;
}
if (flow->udma_rflow_id != xudma_rflow_get_id(flow->udma_rflow)) {
- xudma_rflow_put(rx_chn->common.udmax, flow->udma_rflow);
- return -ENODEV;
+ ret = -ENODEV;
+ goto err_rflow_put;
}
/* request and cfg rings */
@@ -578,7 +578,7 @@ static int k3_udma_glue_cfg_rx_flow(struct k3_udma_glue_rx_channel *rx_chn,
if (!flow->ringrx) {
ret = -ENODEV;
dev_err(dev, "Failed to get RX ring\n");
- goto err;
+ goto err_rflow_put;
}
flow->ringrxfdq = k3_ringacc_request_ring(rx_chn->common.ringacc,
@@ -586,19 +586,19 @@ static int k3_udma_glue_cfg_rx_flow(struct k3_udma_glue_rx_channel *rx_chn,
if (!flow->ringrxfdq) {
ret = -ENODEV;
dev_err(dev, "Failed to get RXFDQ ring\n");
- goto err;
+ goto err_ringrx_free;
}
ret = k3_ringacc_ring_cfg(flow->ringrx, &flow_cfg->rx_cfg);
if (ret) {
dev_err(dev, "Failed to cfg ringrx %d\n", ret);
- goto err;
+ goto err_ringrxfdq_free;
}
ret = k3_ringacc_ring_cfg(flow->ringrxfdq, &flow_cfg->rxfdq_cfg);
if (ret) {
dev_err(dev, "Failed to cfg ringrxfdq %d\n", ret);
- goto err;
+ goto err_ringrxfdq_free;
}
if (rx_chn->remote) {
@@ -648,7 +648,7 @@ static int k3_udma_glue_cfg_rx_flow(struct k3_udma_glue_rx_channel *rx_chn,
if (ret) {
dev_err(dev, "flow%d config failed: %d\n", flow->udma_rflow_id,
ret);
- goto err;
+ goto err_ringrxfdq_free;
}
rx_chn->flows_ready++;
@@ -656,8 +656,17 @@ static int k3_udma_glue_cfg_rx_flow(struct k3_udma_glue_rx_channel *rx_chn,
flow->udma_rflow_id, rx_chn->flows_ready);
return 0;
-err:
- k3_udma_glue_release_rx_flow(rx_chn, flow_idx);
+
+err_ringrxfdq_free:
+ k3_ringacc_ring_free(flow->ringrxfdq);
+
+err_ringrx_free:
+ k3_ringacc_ring_free(flow->ringrx);
+
+err_rflow_put:
+ xudma_rflow_put(rx_chn->common.udmax, flow->udma_rflow);
+ flow->udma_rflow = NULL;
+
return ret;
}
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index b3c99bb5fe77..fe2eb892a1bd 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -523,4 +523,11 @@ config EDAC_BLUEFIELD
Support for error detection and correction on the
Mellanox BlueField SoCs.
+config EDAC_DMC520
+ tristate "ARM DMC-520 ECC"
+ depends on ARM64
+ help
+ Support for error detection and correction on the
+ SoCs with ARM DMC-520 DRAM controller.
+
endif # EDAC
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index d77200c9680b..269e15118cea 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -87,3 +87,4 @@ obj-$(CONFIG_EDAC_TI) += ti_edac.o
obj-$(CONFIG_EDAC_QCOM) += qcom_edac.o
obj-$(CONFIG_EDAC_ASPEED) += aspeed_edac.o
obj-$(CONFIG_EDAC_BLUEFIELD) += bluefield_edac.o
+obj-$(CONFIG_EDAC_DMC520) += dmc520_edac.o
diff --git a/drivers/edac/armada_xp_edac.c b/drivers/edac/armada_xp_edac.c
index 7f227bdcbc84..a7502ebe9bdc 100644
--- a/drivers/edac/armada_xp_edac.c
+++ b/drivers/edac/armada_xp_edac.c
@@ -429,26 +429,26 @@ static void aurora_l2_check(struct edac_device_ctl_info *dci)
src = (attr_cap & AURORA_ERR_ATTR_SRC_MSK) >> AURORA_ERR_ATTR_SRC_OFF;
if (src <= 3)
- len += snprintf(msg+len, size-len, "src=CPU%d ", src);
+ len += scnprintf(msg+len, size-len, "src=CPU%d ", src);
else
- len += snprintf(msg+len, size-len, "src=IO ");
+ len += scnprintf(msg+len, size-len, "src=IO ");
txn = (attr_cap & AURORA_ERR_ATTR_TXN_MSK) >> AURORA_ERR_ATTR_TXN_OFF;
switch (txn) {
case 0:
- len += snprintf(msg+len, size-len, "txn=Data-Read ");
+ len += scnprintf(msg+len, size-len, "txn=Data-Read ");
break;
case 1:
- len += snprintf(msg+len, size-len, "txn=Isn-Read ");
+ len += scnprintf(msg+len, size-len, "txn=Isn-Read ");
break;
case 2:
- len += snprintf(msg+len, size-len, "txn=Clean-Flush ");
+ len += scnprintf(msg+len, size-len, "txn=Clean-Flush ");
break;
case 3:
- len += snprintf(msg+len, size-len, "txn=Eviction ");
+ len += scnprintf(msg+len, size-len, "txn=Eviction ");
break;
case 4:
- len += snprintf(msg+len, size-len,
+ len += scnprintf(msg+len, size-len,
"txn=Read-Modify-Write ");
break;
}
@@ -456,19 +456,19 @@ static void aurora_l2_check(struct edac_device_ctl_info *dci)
err = (attr_cap & AURORA_ERR_ATTR_ERR_MSK) >> AURORA_ERR_ATTR_ERR_OFF;
switch (err) {
case 0:
- len += snprintf(msg+len, size-len, "err=CorrECC ");
+ len += scnprintf(msg+len, size-len, "err=CorrECC ");
break;
case 1:
- len += snprintf(msg+len, size-len, "err=UnCorrECC ");
+ len += scnprintf(msg+len, size-len, "err=UnCorrECC ");
break;
case 2:
- len += snprintf(msg+len, size-len, "err=TagParity ");
+ len += scnprintf(msg+len, size-len, "err=TagParity ");
break;
}
- len += snprintf(msg+len, size-len, "addr=0x%x ", addr_cap & AURORA_ERR_ADDR_CAP_ADDR_MASK);
- len += snprintf(msg+len, size-len, "index=0x%x ", (way_cap & AURORA_ERR_WAY_IDX_MSK) >> AURORA_ERR_WAY_IDX_OFF);
- len += snprintf(msg+len, size-len, "way=0x%x", (way_cap & AURORA_ERR_WAY_CAP_WAY_MASK) >> AURORA_ERR_WAY_CAP_WAY_OFFSET);
+ len += scnprintf(msg+len, size-len, "addr=0x%x ", addr_cap & AURORA_ERR_ADDR_CAP_ADDR_MASK);
+ len += scnprintf(msg+len, size-len, "index=0x%x ", (way_cap & AURORA_ERR_WAY_IDX_MSK) >> AURORA_ERR_WAY_IDX_OFF);
+ len += scnprintf(msg+len, size-len, "way=0x%x", (way_cap & AURORA_ERR_WAY_CAP_WAY_MASK) >> AURORA_ERR_WAY_CAP_WAY_OFFSET);
/* clear error capture registers */
writel(AURORA_ERR_ATTR_CAP_VALID, drvdata->base + AURORA_ERR_ATTR_CAP_REG);
diff --git a/drivers/edac/dmc520_edac.c b/drivers/edac/dmc520_edac.c
new file mode 100644
index 000000000000..fc1153ab1ebb
--- /dev/null
+++ b/drivers/edac/dmc520_edac.c
@@ -0,0 +1,656 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * EDAC driver for DMC-520 memory controller.
+ *
+ * The driver supports 10 interrupt lines,
+ * though only dram_ecc_errc and dram_ecc_errd are currently handled.
+ *
+ * Authors: Rui Zhao <ruizhao@microsoft.com>
+ * Lei Wang <lewan@microsoft.com>
+ * Shiping Ji <shji@microsoft.com>
+ */
+
+#include <linux/bitfield.h>
+#include <linux/edac.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include "edac_mc.h"
+
+/* DMC-520 registers */
+#define REG_OFFSET_FEATURE_CONFIG 0x130
+#define REG_OFFSET_ECC_ERRC_COUNT_31_00 0x158
+#define REG_OFFSET_ECC_ERRC_COUNT_63_32 0x15C
+#define REG_OFFSET_ECC_ERRD_COUNT_31_00 0x160
+#define REG_OFFSET_ECC_ERRD_COUNT_63_32 0x164
+#define REG_OFFSET_INTERRUPT_CONTROL 0x500
+#define REG_OFFSET_INTERRUPT_CLR 0x508
+#define REG_OFFSET_INTERRUPT_STATUS 0x510
+#define REG_OFFSET_DRAM_ECC_ERRC_INT_INFO_31_00 0x528
+#define REG_OFFSET_DRAM_ECC_ERRC_INT_INFO_63_32 0x52C
+#define REG_OFFSET_DRAM_ECC_ERRD_INT_INFO_31_00 0x530
+#define REG_OFFSET_DRAM_ECC_ERRD_INT_INFO_63_32 0x534
+#define REG_OFFSET_ADDRESS_CONTROL_NOW 0x1010
+#define REG_OFFSET_MEMORY_TYPE_NOW 0x1128
+#define REG_OFFSET_SCRUB_CONTROL0_NOW 0x1170
+#define REG_OFFSET_FORMAT_CONTROL 0x18
+
+/* DMC-520 types, masks and bitfields */
+#define RAM_ECC_INT_CE_BIT BIT(0)
+#define RAM_ECC_INT_UE_BIT BIT(1)
+#define DRAM_ECC_INT_CE_BIT BIT(2)
+#define DRAM_ECC_INT_UE_BIT BIT(3)
+#define FAILED_ACCESS_INT_BIT BIT(4)
+#define FAILED_PROG_INT_BIT BIT(5)
+#define LINK_ERR_INT_BIT BIT(6)
+#define TEMPERATURE_EVENT_INT_BIT BIT(7)
+#define ARCH_FSM_INT_BIT BIT(8)
+#define PHY_REQUEST_INT_BIT BIT(9)
+#define MEMORY_WIDTH_MASK GENMASK(1, 0)
+#define SCRUB_TRIGGER0_NEXT_MASK GENMASK(1, 0)
+#define REG_FIELD_DRAM_ECC_ENABLED GENMASK(1, 0)
+#define REG_FIELD_MEMORY_TYPE GENMASK(2, 0)
+#define REG_FIELD_DEVICE_WIDTH GENMASK(9, 8)
+#define REG_FIELD_ADDRESS_CONTROL_COL GENMASK(2, 0)
+#define REG_FIELD_ADDRESS_CONTROL_ROW GENMASK(10, 8)
+#define REG_FIELD_ADDRESS_CONTROL_BANK GENMASK(18, 16)
+#define REG_FIELD_ADDRESS_CONTROL_RANK GENMASK(25, 24)
+#define REG_FIELD_ERR_INFO_LOW_VALID BIT(0)
+#define REG_FIELD_ERR_INFO_LOW_COL GENMASK(10, 1)
+#define REG_FIELD_ERR_INFO_LOW_ROW GENMASK(28, 11)
+#define REG_FIELD_ERR_INFO_LOW_RANK GENMASK(31, 29)
+#define REG_FIELD_ERR_INFO_HIGH_BANK GENMASK(3, 0)
+#define REG_FIELD_ERR_INFO_HIGH_VALID BIT(31)
+
+#define DRAM_ADDRESS_CONTROL_MIN_COL_BITS 8
+#define DRAM_ADDRESS_CONTROL_MIN_ROW_BITS 11
+
+#define DMC520_SCRUB_TRIGGER_ERR_DETECT 2
+#define DMC520_SCRUB_TRIGGER_IDLE 3
+
+/* Driver settings */
+/*
+ * The max-length message would be: "rank:7 bank:15 row:262143 col:1023".
+ * Max length is 34. Using a 40-size buffer is enough.
+ */
+#define DMC520_MSG_BUF_SIZE 40
+#define EDAC_MOD_NAME "dmc520-edac"
+#define EDAC_CTL_NAME "dmc520"
+
+/* the data bus width for the attached memory chips. */
+enum dmc520_mem_width {
+ MEM_WIDTH_X32 = 2,
+ MEM_WIDTH_X64 = 3
+};
+
+/* memory type */
+enum dmc520_mem_type {
+ MEM_TYPE_DDR3 = 1,
+ MEM_TYPE_DDR4 = 2
+};
+
+/* memory device width */
+enum dmc520_dev_width {
+ DEV_WIDTH_X4 = 0,
+ DEV_WIDTH_X8 = 1,
+ DEV_WIDTH_X16 = 2
+};
+
+struct ecc_error_info {
+ u32 col;
+ u32 row;
+ u32 bank;
+ u32 rank;
+};
+
+/* The interrupt config */
+struct dmc520_irq_config {
+ char *name;
+ int mask;
+};
+
+/* The interrupt mappings */
+static struct dmc520_irq_config dmc520_irq_configs[] = {
+ {
+ .name = "ram_ecc_errc",
+ .mask = RAM_ECC_INT_CE_BIT
+ },
+ {
+ .name = "ram_ecc_errd",
+ .mask = RAM_ECC_INT_UE_BIT
+ },
+ {
+ .name = "dram_ecc_errc",
+ .mask = DRAM_ECC_INT_CE_BIT
+ },
+ {
+ .name = "dram_ecc_errd",
+ .mask = DRAM_ECC_INT_UE_BIT
+ },
+ {
+ .name = "failed_access",
+ .mask = FAILED_ACCESS_INT_BIT
+ },
+ {
+ .name = "failed_prog",
+ .mask = FAILED_PROG_INT_BIT
+ },
+ {
+ .name = "link_err",
+ .mask = LINK_ERR_INT_BIT
+ },
+ {
+ .name = "temperature_event",
+ .mask = TEMPERATURE_EVENT_INT_BIT
+ },
+ {
+ .name = "arch_fsm",
+ .mask = ARCH_FSM_INT_BIT
+ },
+ {
+ .name = "phy_request",
+ .mask = PHY_REQUEST_INT_BIT
+ }
+};
+
+#define NUMBER_OF_IRQS ARRAY_SIZE(dmc520_irq_configs)
+
+/*
+ * The EDAC driver private data.
+ * error_lock is to protect concurrent writes to the mci->error_desc through
+ * edac_mc_handle_error().
+ */
+struct dmc520_edac {
+ void __iomem *reg_base;
+ spinlock_t error_lock;
+ u32 mem_width_in_bytes;
+ int irqs[NUMBER_OF_IRQS];
+ int masks[NUMBER_OF_IRQS];
+};
+
+static int dmc520_mc_idx;
+
+static u32 dmc520_read_reg(struct dmc520_edac *pvt, u32 offset)
+{
+ return readl(pvt->reg_base + offset);
+}
+
+static void dmc520_write_reg(struct dmc520_edac *pvt, u32 val, u32 offset)
+{
+ writel(val, pvt->reg_base + offset);
+}
+
+static u32 dmc520_calc_dram_ecc_error(u32 value)
+{
+ u32 total = 0;
+
+ /* Each rank's error counter takes one byte. */
+ while (value > 0) {
+ total += (value & 0xFF);
+ value >>= 8;
+ }
+ return total;
+}
+
+static u32 dmc520_get_dram_ecc_error_count(struct dmc520_edac *pvt,
+ bool is_ce)
+{
+ u32 reg_offset_low, reg_offset_high;
+ u32 err_low, err_high;
+ u32 err_count;
+
+ reg_offset_low = is_ce ? REG_OFFSET_ECC_ERRC_COUNT_31_00 :
+ REG_OFFSET_ECC_ERRD_COUNT_31_00;
+ reg_offset_high = is_ce ? REG_OFFSET_ECC_ERRC_COUNT_63_32 :
+ REG_OFFSET_ECC_ERRD_COUNT_63_32;
+
+ err_low = dmc520_read_reg(pvt, reg_offset_low);
+ err_high = dmc520_read_reg(pvt, reg_offset_high);
+ /* Reset error counters */
+ dmc520_write_reg(pvt, 0, reg_offset_low);
+ dmc520_write_reg(pvt, 0, reg_offset_high);
+
+ err_count = dmc520_calc_dram_ecc_error(err_low) +
+ dmc520_calc_dram_ecc_error(err_high);
+
+ return err_count;
+}
+
+static void dmc520_get_dram_ecc_error_info(struct dmc520_edac *pvt,
+ bool is_ce,
+ struct ecc_error_info *info)
+{
+ u32 reg_offset_low, reg_offset_high;
+ u32 reg_val_low, reg_val_high;
+ bool valid;
+
+ reg_offset_low = is_ce ? REG_OFFSET_DRAM_ECC_ERRC_INT_INFO_31_00 :
+ REG_OFFSET_DRAM_ECC_ERRD_INT_INFO_31_00;
+ reg_offset_high = is_ce ? REG_OFFSET_DRAM_ECC_ERRC_INT_INFO_63_32 :
+ REG_OFFSET_DRAM_ECC_ERRD_INT_INFO_63_32;
+
+ reg_val_low = dmc520_read_reg(pvt, reg_offset_low);
+ reg_val_high = dmc520_read_reg(pvt, reg_offset_high);
+
+ valid = (FIELD_GET(REG_FIELD_ERR_INFO_LOW_VALID, reg_val_low) != 0) &&
+ (FIELD_GET(REG_FIELD_ERR_INFO_HIGH_VALID, reg_val_high) != 0);
+
+ if (valid) {
+ info->col = FIELD_GET(REG_FIELD_ERR_INFO_LOW_COL, reg_val_low);
+ info->row = FIELD_GET(REG_FIELD_ERR_INFO_LOW_ROW, reg_val_low);
+ info->rank = FIELD_GET(REG_FIELD_ERR_INFO_LOW_RANK, reg_val_low);
+ info->bank = FIELD_GET(REG_FIELD_ERR_INFO_HIGH_BANK, reg_val_high);
+ } else {
+ memset(info, 0, sizeof(*info));
+ }
+}
+
+static bool dmc520_is_ecc_enabled(void __iomem *reg_base)
+{
+ u32 reg_val = readl(reg_base + REG_OFFSET_FEATURE_CONFIG);
+
+ return FIELD_GET(REG_FIELD_DRAM_ECC_ENABLED, reg_val);
+}
+
+static enum scrub_type dmc520_get_scrub_type(struct dmc520_edac *pvt)
+{
+ enum scrub_type type = SCRUB_NONE;
+ u32 reg_val, scrub_cfg;
+
+ reg_val = dmc520_read_reg(pvt, REG_OFFSET_SCRUB_CONTROL0_NOW);
+ scrub_cfg = FIELD_GET(SCRUB_TRIGGER0_NEXT_MASK, reg_val);
+
+ if (scrub_cfg == DMC520_SCRUB_TRIGGER_ERR_DETECT ||
+ scrub_cfg == DMC520_SCRUB_TRIGGER_IDLE)
+ type = SCRUB_HW_PROG;
+
+ return type;
+}
+
+/* Get the memory data bus width, in number of bytes. */
+static u32 dmc520_get_memory_width(struct dmc520_edac *pvt)
+{
+ enum dmc520_mem_width mem_width_field;
+ u32 mem_width_in_bytes = 0;
+ u32 reg_val;
+
+ reg_val = dmc520_read_reg(pvt, REG_OFFSET_FORMAT_CONTROL);
+ mem_width_field = FIELD_GET(MEMORY_WIDTH_MASK, reg_val);
+
+ if (mem_width_field == MEM_WIDTH_X32)
+ mem_width_in_bytes = 4;
+ else if (mem_width_field == MEM_WIDTH_X64)
+ mem_width_in_bytes = 8;
+ return mem_width_in_bytes;
+}
+
+static enum mem_type dmc520_get_mtype(struct dmc520_edac *pvt)
+{
+ enum mem_type mt = MEM_UNKNOWN;
+ enum dmc520_mem_type type;
+ u32 reg_val;
+
+ reg_val = dmc520_read_reg(pvt, REG_OFFSET_MEMORY_TYPE_NOW);
+ type = FIELD_GET(REG_FIELD_MEMORY_TYPE, reg_val);
+
+ switch (type) {
+ case MEM_TYPE_DDR3:
+ mt = MEM_DDR3;
+ break;
+
+ case MEM_TYPE_DDR4:
+ mt = MEM_DDR4;
+ break;
+ }
+
+ return mt;
+}
+
+static enum dev_type dmc520_get_dtype(struct dmc520_edac *pvt)
+{
+ enum dmc520_dev_width device_width;
+ enum dev_type dt = DEV_UNKNOWN;
+ u32 reg_val;
+
+ reg_val = dmc520_read_reg(pvt, REG_OFFSET_MEMORY_TYPE_NOW);
+ device_width = FIELD_GET(REG_FIELD_DEVICE_WIDTH, reg_val);
+
+ switch (device_width) {
+ case DEV_WIDTH_X4:
+ dt = DEV_X4;
+ break;
+
+ case DEV_WIDTH_X8:
+ dt = DEV_X8;
+ break;
+
+ case DEV_WIDTH_X16:
+ dt = DEV_X16;
+ break;
+ }
+
+ return dt;
+}
+
+static u32 dmc520_get_rank_count(void __iomem *reg_base)
+{
+ u32 reg_val, rank_bits;
+
+ reg_val = readl(reg_base + REG_OFFSET_ADDRESS_CONTROL_NOW);
+ rank_bits = FIELD_GET(REG_FIELD_ADDRESS_CONTROL_RANK, reg_val);
+
+ return BIT(rank_bits);
+}
+
+static u64 dmc520_get_rank_size(struct dmc520_edac *pvt)
+{
+ u32 reg_val, col_bits, row_bits, bank_bits;
+
+ reg_val = dmc520_read_reg(pvt, REG_OFFSET_ADDRESS_CONTROL_NOW);
+
+ col_bits = FIELD_GET(REG_FIELD_ADDRESS_CONTROL_COL, reg_val) +
+ DRAM_ADDRESS_CONTROL_MIN_COL_BITS;
+ row_bits = FIELD_GET(REG_FIELD_ADDRESS_CONTROL_ROW, reg_val) +
+ DRAM_ADDRESS_CONTROL_MIN_ROW_BITS;
+ bank_bits = FIELD_GET(REG_FIELD_ADDRESS_CONTROL_BANK, reg_val);
+
+ return (u64)pvt->mem_width_in_bytes << (col_bits + row_bits + bank_bits);
+}
+
+static void dmc520_handle_dram_ecc_errors(struct mem_ctl_info *mci,
+ bool is_ce)
+{
+ struct dmc520_edac *pvt = mci->pvt_info;
+ char message[DMC520_MSG_BUF_SIZE];
+ struct ecc_error_info info;
+ u32 cnt;
+
+ dmc520_get_dram_ecc_error_info(pvt, is_ce, &info);
+
+ cnt = dmc520_get_dram_ecc_error_count(pvt, is_ce);
+ if (!cnt)
+ return;
+
+ snprintf(message, ARRAY_SIZE(message),
+ "rank:%d bank:%d row:%d col:%d",
+ info.rank, info.bank,
+ info.row, info.col);
+
+ spin_lock(&pvt->error_lock);
+ edac_mc_handle_error((is_ce ? HW_EVENT_ERR_CORRECTED :
+ HW_EVENT_ERR_UNCORRECTED),
+ mci, cnt, 0, 0, 0, info.rank, -1, -1,
+ message, "");
+ spin_unlock(&pvt->error_lock);
+}
+
+static irqreturn_t dmc520_edac_dram_ecc_isr(int irq, struct mem_ctl_info *mci,
+ bool is_ce)
+{
+ struct dmc520_edac *pvt = mci->pvt_info;
+ u32 i_mask;
+
+ i_mask = is_ce ? DRAM_ECC_INT_CE_BIT : DRAM_ECC_INT_UE_BIT;
+
+ dmc520_handle_dram_ecc_errors(mci, is_ce);
+
+ dmc520_write_reg(pvt, i_mask, REG_OFFSET_INTERRUPT_CLR);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t dmc520_edac_dram_all_isr(int irq, struct mem_ctl_info *mci,
+ u32 irq_mask)
+{
+ struct dmc520_edac *pvt = mci->pvt_info;
+ irqreturn_t irq_ret = IRQ_NONE;
+ u32 status;
+
+ status = dmc520_read_reg(pvt, REG_OFFSET_INTERRUPT_STATUS);
+
+ if ((irq_mask & DRAM_ECC_INT_CE_BIT) &&
+ (status & DRAM_ECC_INT_CE_BIT))
+ irq_ret = dmc520_edac_dram_ecc_isr(irq, mci, true);
+
+ if ((irq_mask & DRAM_ECC_INT_UE_BIT) &&
+ (status & DRAM_ECC_INT_UE_BIT))
+ irq_ret = dmc520_edac_dram_ecc_isr(irq, mci, false);
+
+ return irq_ret;
+}
+
+static irqreturn_t dmc520_isr(int irq, void *data)
+{
+ struct mem_ctl_info *mci = data;
+ struct dmc520_edac *pvt = mci->pvt_info;
+ u32 mask = 0;
+ int idx;
+
+ for (idx = 0; idx < NUMBER_OF_IRQS; idx++) {
+ if (pvt->irqs[idx] == irq) {
+ mask = pvt->masks[idx];
+ break;
+ }
+ }
+ return dmc520_edac_dram_all_isr(irq, mci, mask);
+}
+
+static void dmc520_init_csrow(struct mem_ctl_info *mci)
+{
+ struct dmc520_edac *pvt = mci->pvt_info;
+ struct csrow_info *csi;
+ struct dimm_info *dimm;
+ u32 pages_per_rank;
+ enum dev_type dt;
+ enum mem_type mt;
+ int row, ch;
+ u64 rs;
+
+ dt = dmc520_get_dtype(pvt);
+ mt = dmc520_get_mtype(pvt);
+ rs = dmc520_get_rank_size(pvt);
+ pages_per_rank = rs >> PAGE_SHIFT;
+
+ for (row = 0; row < mci->nr_csrows; row++) {
+ csi = mci->csrows[row];
+
+ for (ch = 0; ch < csi->nr_channels; ch++) {
+ dimm = csi->channels[ch]->dimm;
+ dimm->grain = pvt->mem_width_in_bytes;
+ dimm->dtype = dt;
+ dimm->mtype = mt;
+ dimm->edac_mode = EDAC_FLAG_SECDED;
+ dimm->nr_pages = pages_per_rank / csi->nr_channels;
+ }
+ }
+}
+
+static int dmc520_edac_probe(struct platform_device *pdev)
+{
+ bool registered[NUMBER_OF_IRQS] = { false };
+ int irqs[NUMBER_OF_IRQS] = { -ENXIO };
+ int masks[NUMBER_OF_IRQS] = { 0 };
+ struct edac_mc_layer layers[1];
+ struct dmc520_edac *pvt = NULL;
+ struct mem_ctl_info *mci;
+ void __iomem *reg_base;
+ u32 irq_mask_all = 0;
+ struct resource *res;
+ struct device *dev;
+ int ret, idx, irq;
+ u32 reg_val;
+
+ /* Parse the device node */
+ dev = &pdev->dev;
+
+ for (idx = 0; idx < NUMBER_OF_IRQS; idx++) {
+ irq = platform_get_irq_byname(pdev, dmc520_irq_configs[idx].name);
+ irqs[idx] = irq;
+ masks[idx] = dmc520_irq_configs[idx].mask;
+ if (irq >= 0) {
+ irq_mask_all |= dmc520_irq_configs[idx].mask;
+ edac_dbg(0, "Discovered %s, irq: %d.\n", dmc520_irq_configs[idx].name, irq);
+ }
+ }
+
+ if (!irq_mask_all) {
+ edac_printk(KERN_ERR, EDAC_MOD_NAME,
+ "At least one valid interrupt line is expected.\n");
+ return -EINVAL;
+ }
+
+ /* Initialize dmc520 edac */
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ reg_base = devm_ioremap_resource(dev, res);
+ if (IS_ERR(reg_base))
+ return PTR_ERR(reg_base);
+
+ if (!dmc520_is_ecc_enabled(reg_base))
+ return -ENXIO;
+
+ layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
+ layers[0].size = dmc520_get_rank_count(reg_base);
+ layers[0].is_virt_csrow = true;
+
+ mci = edac_mc_alloc(dmc520_mc_idx++, ARRAY_SIZE(layers), layers, sizeof(*pvt));
+ if (!mci) {
+ edac_printk(KERN_ERR, EDAC_MOD_NAME,
+ "Failed to allocate memory for mc instance\n");
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ pvt = mci->pvt_info;
+
+ pvt->reg_base = reg_base;
+ spin_lock_init(&pvt->error_lock);
+ memcpy(pvt->irqs, irqs, sizeof(irqs));
+ memcpy(pvt->masks, masks, sizeof(masks));
+
+ platform_set_drvdata(pdev, mci);
+
+ mci->pdev = dev;
+ mci->mtype_cap = MEM_FLAG_DDR3 | MEM_FLAG_DDR4;
+ mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED;
+ mci->edac_cap = EDAC_FLAG_SECDED;
+ mci->scrub_cap = SCRUB_FLAG_HW_SRC;
+ mci->scrub_mode = dmc520_get_scrub_type(pvt);
+ mci->ctl_name = EDAC_CTL_NAME;
+ mci->dev_name = dev_name(mci->pdev);
+ mci->mod_name = EDAC_MOD_NAME;
+
+ edac_op_state = EDAC_OPSTATE_INT;
+
+ pvt->mem_width_in_bytes = dmc520_get_memory_width(pvt);
+
+ dmc520_init_csrow(mci);
+
+ /* Clear interrupts, not affecting other unrelated interrupts */
+ reg_val = dmc520_read_reg(pvt, REG_OFFSET_INTERRUPT_CONTROL);
+ dmc520_write_reg(pvt, reg_val & (~irq_mask_all),
+ REG_OFFSET_INTERRUPT_CONTROL);
+ dmc520_write_reg(pvt, irq_mask_all, REG_OFFSET_INTERRUPT_CLR);
+
+ for (idx = 0; idx < NUMBER_OF_IRQS; idx++) {
+ irq = irqs[idx];
+ if (irq >= 0) {
+ ret = devm_request_irq(&pdev->dev, irq,
+ dmc520_isr, IRQF_SHARED,
+ dev_name(&pdev->dev), mci);
+ if (ret < 0) {
+ edac_printk(KERN_ERR, EDAC_MC,
+ "Failed to request irq %d\n", irq);
+ goto err;
+ }
+ registered[idx] = true;
+ }
+ }
+
+ /* Reset DRAM CE/UE counters */
+ if (irq_mask_all & DRAM_ECC_INT_CE_BIT)
+ dmc520_get_dram_ecc_error_count(pvt, true);
+
+ if (irq_mask_all & DRAM_ECC_INT_UE_BIT)
+ dmc520_get_dram_ecc_error_count(pvt, false);
+
+ ret = edac_mc_add_mc(mci);
+ if (ret) {
+ edac_printk(KERN_ERR, EDAC_MOD_NAME,
+ "Failed to register with EDAC core\n");
+ goto err;
+ }
+
+ /* Enable interrupts, not affecting other unrelated interrupts */
+ dmc520_write_reg(pvt, reg_val | irq_mask_all,
+ REG_OFFSET_INTERRUPT_CONTROL);
+
+ return 0;
+
+err:
+ for (idx = 0; idx < NUMBER_OF_IRQS; idx++) {
+ if (registered[idx])
+ devm_free_irq(&pdev->dev, pvt->irqs[idx], mci);
+ }
+ if (mci)
+ edac_mc_free(mci);
+
+ return ret;
+}
+
+static int dmc520_edac_remove(struct platform_device *pdev)
+{
+ u32 reg_val, idx, irq_mask_all = 0;
+ struct mem_ctl_info *mci;
+ struct dmc520_edac *pvt;
+
+ mci = platform_get_drvdata(pdev);
+ pvt = mci->pvt_info;
+
+ /* Disable interrupts */
+ reg_val = dmc520_read_reg(pvt, REG_OFFSET_INTERRUPT_CONTROL);
+ dmc520_write_reg(pvt, reg_val & (~irq_mask_all),
+ REG_OFFSET_INTERRUPT_CONTROL);
+
+ /* free irq's */
+ for (idx = 0; idx < NUMBER_OF_IRQS; idx++) {
+ if (pvt->irqs[idx] >= 0) {
+ irq_mask_all |= pvt->masks[idx];
+ devm_free_irq(&pdev->dev, pvt->irqs[idx], mci);
+ }
+ }
+
+ edac_mc_del_mc(&pdev->dev);
+ edac_mc_free(mci);
+
+ return 0;
+}
+
+static const struct of_device_id dmc520_edac_driver_id[] = {
+ { .compatible = "arm,dmc-520", },
+ { /* end of table */ }
+};
+
+MODULE_DEVICE_TABLE(of, dmc520_edac_driver_id);
+
+static struct platform_driver dmc520_edac_driver = {
+ .driver = {
+ .name = "dmc520",
+ .of_match_table = dmc520_edac_driver_id,
+ },
+
+ .probe = dmc520_edac_probe,
+ .remove = dmc520_edac_remove
+};
+
+module_platform_driver(dmc520_edac_driver);
+
+MODULE_AUTHOR("Rui Zhao <ruizhao@microsoft.com>");
+MODULE_AUTHOR("Lei Wang <lewan@microsoft.com>");
+MODULE_AUTHOR("Shiping Ji <shji@microsoft.com>");
+MODULE_DESCRIPTION("DMC-520 ECC driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index 69e0d90460e6..75ede27bdf6a 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -55,6 +55,11 @@ static LIST_HEAD(mc_devices);
*/
static const char *edac_mc_owner;
+static struct mem_ctl_info *error_desc_to_mci(struct edac_raw_error_desc *e)
+{
+ return container_of(e, struct mem_ctl_info, error_desc);
+}
+
int edac_get_report_status(void)
{
return edac_report;
@@ -278,6 +283,12 @@ void *edac_align_ptr(void **p, unsigned int size, int n_elems)
static void _edac_mc_free(struct mem_ctl_info *mci)
{
+ put_device(&mci->dev);
+}
+
+static void mci_release(struct device *dev)
+{
+ struct mem_ctl_info *mci = container_of(dev, struct mem_ctl_info, dev);
struct csrow_info *csr;
int i, chn, row;
@@ -305,103 +316,26 @@ static void _edac_mc_free(struct mem_ctl_info *mci)
kfree(mci);
}
-struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num,
- unsigned int n_layers,
- struct edac_mc_layer *layers,
- unsigned int sz_pvt)
+static int edac_mc_alloc_csrows(struct mem_ctl_info *mci)
{
- struct mem_ctl_info *mci;
- struct edac_mc_layer *layer;
- struct csrow_info *csr;
- struct rank_info *chan;
- struct dimm_info *dimm;
- u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
- unsigned int pos[EDAC_MAX_LAYERS];
- unsigned int idx, size, tot_dimms = 1, count = 1;
- unsigned int tot_csrows = 1, tot_channels = 1, tot_errcount = 0;
- void *pvt, *p, *ptr = NULL;
- int i, j, row, chn, n, len;
- bool per_rank = false;
-
- if (WARN_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0))
- return NULL;
-
- /*
- * Calculate the total amount of dimms and csrows/cschannels while
- * in the old API emulation mode
- */
- for (idx = 0; idx < n_layers; idx++) {
- tot_dimms *= layers[idx].size;
-
- if (layers[idx].is_virt_csrow)
- tot_csrows *= layers[idx].size;
- else
- tot_channels *= layers[idx].size;
-
- if (layers[idx].type == EDAC_MC_LAYER_CHIP_SELECT)
- per_rank = true;
- }
-
- /* Figure out the offsets of the various items from the start of an mc
- * structure. We want the alignment of each item to be at least as
- * stringent as what the compiler would provide if we could simply
- * hardcode everything into a single struct.
- */
- mci = edac_align_ptr(&ptr, sizeof(*mci), 1);
- layer = edac_align_ptr(&ptr, sizeof(*layer), n_layers);
- for (i = 0; i < n_layers; i++) {
- count *= layers[i].size;
- edac_dbg(4, "errcount layer %d size %d\n", i, count);
- ce_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
- ue_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
- tot_errcount += 2 * count;
- }
-
- edac_dbg(4, "allocating %d error counters\n", tot_errcount);
- pvt = edac_align_ptr(&ptr, sz_pvt, 1);
- size = ((unsigned long)pvt) + sz_pvt;
-
- edac_dbg(1, "allocating %u bytes for mci data (%d %s, %d csrows/channels)\n",
- size,
- tot_dimms,
- per_rank ? "ranks" : "dimms",
- tot_csrows * tot_channels);
-
- mci = kzalloc(size, GFP_KERNEL);
- if (mci == NULL)
- return NULL;
-
- /* Adjust pointers so they point within the memory we just allocated
- * rather than an imaginary chunk of memory located at address 0.
- */
- layer = (struct edac_mc_layer *)(((char *)mci) + ((unsigned long)layer));
- for (i = 0; i < n_layers; i++) {
- mci->ce_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ce_per_layer[i]));
- mci->ue_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ue_per_layer[i]));
- }
- pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
-
- /* setup index and various internal pointers */
- mci->mc_idx = mc_num;
- mci->tot_dimms = tot_dimms;
- mci->pvt_info = pvt;
- mci->n_layers = n_layers;
- mci->layers = layer;
- memcpy(mci->layers, layers, sizeof(*layer) * n_layers);
- mci->nr_csrows = tot_csrows;
- mci->num_cschannel = tot_channels;
- mci->csbased = per_rank;
+ unsigned int tot_channels = mci->num_cschannel;
+ unsigned int tot_csrows = mci->nr_csrows;
+ unsigned int row, chn;
/*
* Alocate and fill the csrow/channels structs
*/
mci->csrows = kcalloc(tot_csrows, sizeof(*mci->csrows), GFP_KERNEL);
if (!mci->csrows)
- goto error;
+ return -ENOMEM;
+
for (row = 0; row < tot_csrows; row++) {
+ struct csrow_info *csr;
+
csr = kzalloc(sizeof(**mci->csrows), GFP_KERNEL);
if (!csr)
- goto error;
+ return -ENOMEM;
+
mci->csrows[row] = csr;
csr->csrow_idx = row;
csr->mci = mci;
@@ -409,34 +343,51 @@ struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num,
csr->channels = kcalloc(tot_channels, sizeof(*csr->channels),
GFP_KERNEL);
if (!csr->channels)
- goto error;
+ return -ENOMEM;
for (chn = 0; chn < tot_channels; chn++) {
+ struct rank_info *chan;
+
chan = kzalloc(sizeof(**csr->channels), GFP_KERNEL);
if (!chan)
- goto error;
+ return -ENOMEM;
+
csr->channels[chn] = chan;
chan->chan_idx = chn;
chan->csrow = csr;
}
}
+ return 0;
+}
+
+static int edac_mc_alloc_dimms(struct mem_ctl_info *mci)
+{
+ unsigned int pos[EDAC_MAX_LAYERS];
+ unsigned int row, chn, idx;
+ int layer;
+ void *p;
+
/*
* Allocate and fill the dimm structs
*/
- mci->dimms = kcalloc(tot_dimms, sizeof(*mci->dimms), GFP_KERNEL);
+ mci->dimms = kcalloc(mci->tot_dimms, sizeof(*mci->dimms), GFP_KERNEL);
if (!mci->dimms)
- goto error;
+ return -ENOMEM;
memset(&pos, 0, sizeof(pos));
row = 0;
chn = 0;
- for (idx = 0; idx < tot_dimms; idx++) {
+ for (idx = 0; idx < mci->tot_dimms; idx++) {
+ struct dimm_info *dimm;
+ struct rank_info *chan;
+ int n, len;
+
chan = mci->csrows[row]->channels[chn];
dimm = kzalloc(sizeof(**mci->dimms), GFP_KERNEL);
if (!dimm)
- goto error;
+ return -ENOMEM;
mci->dimms[idx] = dimm;
dimm->mci = mci;
dimm->idx = idx;
@@ -446,16 +397,16 @@ struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num,
*/
len = sizeof(dimm->label);
p = dimm->label;
- n = snprintf(p, len, "mc#%u", mc_num);
+ n = snprintf(p, len, "mc#%u", mci->mc_idx);
p += n;
len -= n;
- for (j = 0; j < n_layers; j++) {
+ for (layer = 0; layer < mci->n_layers; layer++) {
n = snprintf(p, len, "%s#%u",
- edac_layer_name[layers[j].type],
- pos[j]);
+ edac_layer_name[mci->layers[layer].type],
+ pos[layer]);
p += n;
len -= n;
- dimm->location[j] = pos[j];
+ dimm->location[layer] = pos[layer];
if (len <= 0)
break;
@@ -467,29 +418,109 @@ struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num,
dimm->cschannel = chn;
/* Increment csrow location */
- if (layers[0].is_virt_csrow) {
+ if (mci->layers[0].is_virt_csrow) {
chn++;
- if (chn == tot_channels) {
+ if (chn == mci->num_cschannel) {
chn = 0;
row++;
}
} else {
row++;
- if (row == tot_csrows) {
+ if (row == mci->nr_csrows) {
row = 0;
chn++;
}
}
/* Increment dimm location */
- for (j = n_layers - 1; j >= 0; j--) {
- pos[j]++;
- if (pos[j] < layers[j].size)
+ for (layer = mci->n_layers - 1; layer >= 0; layer--) {
+ pos[layer]++;
+ if (pos[layer] < mci->layers[layer].size)
break;
- pos[j] = 0;
+ pos[layer] = 0;
}
}
+ return 0;
+}
+
+struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num,
+ unsigned int n_layers,
+ struct edac_mc_layer *layers,
+ unsigned int sz_pvt)
+{
+ struct mem_ctl_info *mci;
+ struct edac_mc_layer *layer;
+ unsigned int idx, size, tot_dimms = 1;
+ unsigned int tot_csrows = 1, tot_channels = 1;
+ void *pvt, *ptr = NULL;
+ bool per_rank = false;
+
+ if (WARN_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0))
+ return NULL;
+
+ /*
+ * Calculate the total amount of dimms and csrows/cschannels while
+ * in the old API emulation mode
+ */
+ for (idx = 0; idx < n_layers; idx++) {
+ tot_dimms *= layers[idx].size;
+
+ if (layers[idx].is_virt_csrow)
+ tot_csrows *= layers[idx].size;
+ else
+ tot_channels *= layers[idx].size;
+
+ if (layers[idx].type == EDAC_MC_LAYER_CHIP_SELECT)
+ per_rank = true;
+ }
+
+ /* Figure out the offsets of the various items from the start of an mc
+ * structure. We want the alignment of each item to be at least as
+ * stringent as what the compiler would provide if we could simply
+ * hardcode everything into a single struct.
+ */
+ mci = edac_align_ptr(&ptr, sizeof(*mci), 1);
+ layer = edac_align_ptr(&ptr, sizeof(*layer), n_layers);
+ pvt = edac_align_ptr(&ptr, sz_pvt, 1);
+ size = ((unsigned long)pvt) + sz_pvt;
+
+ edac_dbg(1, "allocating %u bytes for mci data (%d %s, %d csrows/channels)\n",
+ size,
+ tot_dimms,
+ per_rank ? "ranks" : "dimms",
+ tot_csrows * tot_channels);
+
+ mci = kzalloc(size, GFP_KERNEL);
+ if (mci == NULL)
+ return NULL;
+
+ mci->dev.release = mci_release;
+ device_initialize(&mci->dev);
+
+ /* Adjust pointers so they point within the memory we just allocated
+ * rather than an imaginary chunk of memory located at address 0.
+ */
+ layer = (struct edac_mc_layer *)(((char *)mci) + ((unsigned long)layer));
+ pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
+
+ /* setup index and various internal pointers */
+ mci->mc_idx = mc_num;
+ mci->tot_dimms = tot_dimms;
+ mci->pvt_info = pvt;
+ mci->n_layers = n_layers;
+ mci->layers = layer;
+ memcpy(mci->layers, layers, sizeof(*layer) * n_layers);
+ mci->nr_csrows = tot_csrows;
+ mci->num_cschannel = tot_channels;
+ mci->csbased = per_rank;
+
+ if (edac_mc_alloc_csrows(mci))
+ goto error;
+
+ if (edac_mc_alloc_dimms(mci))
+ goto error;
+
mci->op_state = OP_ALLOC;
return mci;
@@ -505,9 +536,6 @@ void edac_mc_free(struct mem_ctl_info *mci)
{
edac_dbg(1, "\n");
- if (device_is_registered(&mci->dev))
- edac_unregister_sysfs(mci);
-
_edac_mc_free(mci);
}
EXPORT_SYMBOL_GPL(edac_mc_free);
@@ -902,88 +930,51 @@ const char *edac_layer_name[] = {
};
EXPORT_SYMBOL_GPL(edac_layer_name);
-static void edac_inc_ce_error(struct mem_ctl_info *mci,
- bool enable_per_layer_report,
- const int pos[EDAC_MAX_LAYERS],
- const u16 count)
+static void edac_inc_ce_error(struct edac_raw_error_desc *e)
{
- int i, index = 0;
-
- mci->ce_mc += count;
-
- if (!enable_per_layer_report) {
- mci->ce_noinfo_count += count;
- return;
- }
+ int pos[EDAC_MAX_LAYERS] = { e->top_layer, e->mid_layer, e->low_layer };
+ struct mem_ctl_info *mci = error_desc_to_mci(e);
+ struct dimm_info *dimm = edac_get_dimm(mci, pos[0], pos[1], pos[2]);
- for (i = 0; i < mci->n_layers; i++) {
- if (pos[i] < 0)
- break;
- index += pos[i];
- mci->ce_per_layer[i][index] += count;
+ mci->ce_mc += e->error_count;
- if (i < mci->n_layers - 1)
- index *= mci->layers[i + 1].size;
- }
+ if (dimm)
+ dimm->ce_count += e->error_count;
+ else
+ mci->ce_noinfo_count += e->error_count;
}
-static void edac_inc_ue_error(struct mem_ctl_info *mci,
- bool enable_per_layer_report,
- const int pos[EDAC_MAX_LAYERS],
- const u16 count)
+static void edac_inc_ue_error(struct edac_raw_error_desc *e)
{
- int i, index = 0;
-
- mci->ue_mc += count;
-
- if (!enable_per_layer_report) {
- mci->ue_noinfo_count += count;
- return;
- }
+ int pos[EDAC_MAX_LAYERS] = { e->top_layer, e->mid_layer, e->low_layer };
+ struct mem_ctl_info *mci = error_desc_to_mci(e);
+ struct dimm_info *dimm = edac_get_dimm(mci, pos[0], pos[1], pos[2]);
- for (i = 0; i < mci->n_layers; i++) {
- if (pos[i] < 0)
- break;
- index += pos[i];
- mci->ue_per_layer[i][index] += count;
+ mci->ue_mc += e->error_count;
- if (i < mci->n_layers - 1)
- index *= mci->layers[i + 1].size;
- }
+ if (dimm)
+ dimm->ue_count += e->error_count;
+ else
+ mci->ue_noinfo_count += e->error_count;
}
-static void edac_ce_error(struct mem_ctl_info *mci,
- const u16 error_count,
- const int pos[EDAC_MAX_LAYERS],
- const char *msg,
- const char *location,
- const char *label,
- const char *detail,
- const char *other_detail,
- const bool enable_per_layer_report,
- const unsigned long page_frame_number,
- const unsigned long offset_in_page,
- long grain)
+static void edac_ce_error(struct edac_raw_error_desc *e)
{
+ struct mem_ctl_info *mci = error_desc_to_mci(e);
unsigned long remapped_page;
- char *msg_aux = "";
-
- if (*msg)
- msg_aux = " ";
if (edac_mc_get_log_ce()) {
- if (other_detail && *other_detail)
- edac_mc_printk(mci, KERN_WARNING,
- "%d CE %s%son %s (%s %s - %s)\n",
- error_count, msg, msg_aux, label,
- location, detail, other_detail);
- else
- edac_mc_printk(mci, KERN_WARNING,
- "%d CE %s%son %s (%s %s)\n",
- error_count, msg, msg_aux, label,
- location, detail);
+ edac_mc_printk(mci, KERN_WARNING,
+ "%d CE %s%son %s (%s page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx%s%s)\n",
+ e->error_count, e->msg,
+ *e->msg ? " " : "",
+ e->label, e->location, e->page_frame_number, e->offset_in_page,
+ e->grain, e->syndrome,
+ *e->other_detail ? " - " : "",
+ e->other_detail);
}
- edac_inc_ce_error(mci, enable_per_layer_report, pos, error_count);
+
+ edac_inc_ce_error(e);
if (mci->scrub_mode == SCRUB_SW_SRC) {
/*
@@ -998,60 +989,64 @@ static void edac_ce_error(struct mem_ctl_info *mci,
* be scrubbed.
*/
remapped_page = mci->ctl_page_to_phys ?
- mci->ctl_page_to_phys(mci, page_frame_number) :
- page_frame_number;
+ mci->ctl_page_to_phys(mci, e->page_frame_number) :
+ e->page_frame_number;
- edac_mc_scrub_block(remapped_page,
- offset_in_page, grain);
+ edac_mc_scrub_block(remapped_page, e->offset_in_page, e->grain);
}
}
-static void edac_ue_error(struct mem_ctl_info *mci,
- const u16 error_count,
- const int pos[EDAC_MAX_LAYERS],
- const char *msg,
- const char *location,
- const char *label,
- const char *detail,
- const char *other_detail,
- const bool enable_per_layer_report)
+static void edac_ue_error(struct edac_raw_error_desc *e)
{
- char *msg_aux = "";
-
- if (*msg)
- msg_aux = " ";
+ struct mem_ctl_info *mci = error_desc_to_mci(e);
if (edac_mc_get_log_ue()) {
- if (other_detail && *other_detail)
- edac_mc_printk(mci, KERN_WARNING,
- "%d UE %s%son %s (%s %s - %s)\n",
- error_count, msg, msg_aux, label,
- location, detail, other_detail);
- else
- edac_mc_printk(mci, KERN_WARNING,
- "%d UE %s%son %s (%s %s)\n",
- error_count, msg, msg_aux, label,
- location, detail);
+ edac_mc_printk(mci, KERN_WARNING,
+ "%d UE %s%son %s (%s page:0x%lx offset:0x%lx grain:%ld%s%s)\n",
+ e->error_count, e->msg,
+ *e->msg ? " " : "",
+ e->label, e->location, e->page_frame_number, e->offset_in_page,
+ e->grain,
+ *e->other_detail ? " - " : "",
+ e->other_detail);
}
if (edac_mc_get_panic_on_ue()) {
- if (other_detail && *other_detail)
- panic("UE %s%son %s (%s%s - %s)\n",
- msg, msg_aux, label, location, detail, other_detail);
- else
- panic("UE %s%son %s (%s%s)\n",
- msg, msg_aux, label, location, detail);
+ panic("UE %s%son %s (%s page:0x%lx offset:0x%lx grain:%ld%s%s)\n",
+ e->msg,
+ *e->msg ? " " : "",
+ e->label, e->location, e->page_frame_number, e->offset_in_page,
+ e->grain,
+ *e->other_detail ? " - " : "",
+ e->other_detail);
}
- edac_inc_ue_error(mci, enable_per_layer_report, pos, error_count);
+ edac_inc_ue_error(e);
}
-void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type,
- struct mem_ctl_info *mci,
- struct edac_raw_error_desc *e)
+static void edac_inc_csrow(struct edac_raw_error_desc *e, int row, int chan)
{
- char detail[80];
- int pos[EDAC_MAX_LAYERS] = { e->top_layer, e->mid_layer, e->low_layer };
+ struct mem_ctl_info *mci = error_desc_to_mci(e);
+ enum hw_event_mc_err_type type = e->type;
+ u16 count = e->error_count;
+
+ if (row < 0)
+ return;
+
+ edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan);
+
+ if (type == HW_EVENT_ERR_CORRECTED) {
+ mci->csrows[row]->ce_count += count;
+ if (chan >= 0)
+ mci->csrows[row]->channels[chan]->ce_count += count;
+ } else {
+ mci->csrows[row]->ue_count += count;
+ }
+}
+
+void edac_raw_mc_handle_error(struct edac_raw_error_desc *e)
+{
+ struct mem_ctl_info *mci = error_desc_to_mci(e);
u8 grain_bits;
/* Sanity-check driver-supplied grain value. */
@@ -1062,31 +1057,16 @@ void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type,
/* Report the error via the trace interface */
if (IS_ENABLED(CONFIG_RAS))
- trace_mc_event(type, e->msg, e->label, e->error_count,
+ trace_mc_event(e->type, e->msg, e->label, e->error_count,
mci->mc_idx, e->top_layer, e->mid_layer,
e->low_layer,
(e->page_frame_number << PAGE_SHIFT) | e->offset_in_page,
grain_bits, e->syndrome, e->other_detail);
- /* Memory type dependent details about the error */
- if (type == HW_EVENT_ERR_CORRECTED) {
- snprintf(detail, sizeof(detail),
- "page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx",
- e->page_frame_number, e->offset_in_page,
- e->grain, e->syndrome);
- edac_ce_error(mci, e->error_count, pos, e->msg, e->location, e->label,
- detail, e->other_detail, e->enable_per_layer_report,
- e->page_frame_number, e->offset_in_page, e->grain);
- } else {
- snprintf(detail, sizeof(detail),
- "page:0x%lx offset:0x%lx grain:%ld",
- e->page_frame_number, e->offset_in_page, e->grain);
-
- edac_ue_error(mci, e->error_count, pos, e->msg, e->location, e->label,
- detail, e->other_detail, e->enable_per_layer_report);
- }
-
-
+ if (e->type == HW_EVENT_ERR_CORRECTED)
+ edac_ce_error(e);
+ else
+ edac_ue_error(e);
}
EXPORT_SYMBOL_GPL(edac_raw_mc_handle_error);
@@ -1108,25 +1088,27 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer };
int i, n_labels = 0;
struct edac_raw_error_desc *e = &mci->error_desc;
+ bool any_memory = true;
edac_dbg(3, "MC%d\n", mci->mc_idx);
/* Fills the error report buffer */
memset(e, 0, sizeof (*e));
e->error_count = error_count;
+ e->type = type;
e->top_layer = top_layer;
e->mid_layer = mid_layer;
e->low_layer = low_layer;
e->page_frame_number = page_frame_number;
e->offset_in_page = offset_in_page;
e->syndrome = syndrome;
- e->msg = msg;
- e->other_detail = other_detail;
+ /* need valid strings here for both: */
+ e->msg = msg ?: "";
+ e->other_detail = other_detail ?: "";
/*
- * Check if the event report is consistent and if the memory
- * location is known. If it is known, enable_per_layer_report will be
- * true, the DIMM(s) label info will be filled and the per-layer
+ * Check if the event report is consistent and if the memory location is
+ * known. If it is, the DIMM(s) label info will be filled and the DIMM's
* error counters will be incremented.
*/
for (i = 0; i < mci->n_layers; i++) {
@@ -1145,7 +1127,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
pos[i] = -1;
}
if (pos[i] >= 0)
- e->enable_per_layer_report = true;
+ any_memory = false;
}
/*
@@ -1176,24 +1158,25 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
/*
* If the error is memory-controller wide, there's no need to
- * seek for the affected DIMMs because the whole
- * channel/memory controller/... may be affected.
- * Also, don't show errors for empty DIMM slots.
+ * seek for the affected DIMMs because the whole channel/memory
+ * controller/... may be affected. Also, don't show errors for
+ * empty DIMM slots.
*/
- if (!e->enable_per_layer_report || !dimm->nr_pages)
+ if (!dimm->nr_pages)
continue;
- if (n_labels >= EDAC_MAX_LABELS) {
- e->enable_per_layer_report = false;
- break;
- }
n_labels++;
- if (p != e->label) {
- strcpy(p, OTHER_LABEL);
- p += strlen(OTHER_LABEL);
+ if (n_labels > EDAC_MAX_LABELS) {
+ p = e->label;
+ *p = '\0';
+ } else {
+ if (p != e->label) {
+ strcpy(p, OTHER_LABEL);
+ p += strlen(OTHER_LABEL);
+ }
+ strcpy(p, dimm->label);
+ p += strlen(p);
}
- strcpy(p, dimm->label);
- p += strlen(p);
/*
* get csrow/channel of the DIMM, in order to allow
@@ -1213,22 +1196,12 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
chan = -2;
}
- if (!e->enable_per_layer_report) {
+ if (any_memory)
strcpy(e->label, "any memory");
- } else {
- edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan);
- if (p == e->label)
- strcpy(e->label, "unknown memory");
- if (type == HW_EVENT_ERR_CORRECTED) {
- if (row >= 0) {
- mci->csrows[row]->ce_count += error_count;
- if (chan >= 0)
- mci->csrows[row]->channels[chan]->ce_count += error_count;
- }
- } else
- if (row >= 0)
- mci->csrows[row]->ue_count += error_count;
- }
+ else if (!*e->label)
+ strcpy(e->label, "unknown memory");
+
+ edac_inc_csrow(e, row, chan);
/* Fill the RAM location data */
p = e->location;
@@ -1244,6 +1217,6 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
if (p > e->location)
*(p - 1) = '\0';
- edac_raw_mc_handle_error(type, mci, e);
+ edac_raw_mc_handle_error(e);
}
EXPORT_SYMBOL_GPL(edac_mc_handle_error);
diff --git a/drivers/edac/edac_mc.h b/drivers/edac/edac_mc.h
index 02aac5c61d00..881b00eadf7a 100644
--- a/drivers/edac/edac_mc.h
+++ b/drivers/edac/edac_mc.h
@@ -212,17 +212,13 @@ extern int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci,
* edac_raw_mc_handle_error() - Reports a memory event to userspace without
* doing anything to discover the error location.
*
- * @type: severity of the error (CE/UE/Fatal)
- * @mci: a struct mem_ctl_info pointer
* @e: error description
*
* This raw function is used internally by edac_mc_handle_error(). It should
* only be called directly when the hardware error come directly from BIOS,
* like in the case of APEI GHES driver.
*/
-void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type,
- struct mem_ctl_info *mci,
- struct edac_raw_error_desc *e);
+void edac_raw_mc_handle_error(struct edac_raw_error_desc *e);
/**
* edac_mc_handle_error() - Reports a memory event to userspace.
diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
index c70ec0a306d8..4e6aca595133 100644
--- a/drivers/edac/edac_mc_sysfs.c
+++ b/drivers/edac/edac_mc_sysfs.c
@@ -274,14 +274,8 @@ static const struct attribute_group *csrow_attr_groups[] = {
NULL
};
-static void csrow_attr_release(struct device *dev)
-{
- /* release device with _edac_mc_free() */
-}
-
static const struct device_type csrow_attr_type = {
.groups = csrow_attr_groups,
- .release = csrow_attr_release,
};
/*
@@ -387,6 +381,14 @@ static const struct attribute_group *csrow_dev_groups[] = {
NULL
};
+static void csrow_release(struct device *dev)
+{
+ /*
+ * Nothing to do, just unregister sysfs here. The mci
+ * device owns the data and will also release it.
+ */
+}
+
static inline int nr_pages_per_csrow(struct csrow_info *csrow)
{
int chan, nr_pages = 0;
@@ -405,6 +407,7 @@ static int edac_create_csrow_object(struct mem_ctl_info *mci,
csrow->dev.type = &csrow_attr_type;
csrow->dev.groups = csrow_dev_groups;
+ csrow->dev.release = csrow_release;
device_initialize(&csrow->dev);
csrow->dev.parent = &mci->dev;
csrow->mci = mci;
@@ -441,10 +444,8 @@ static int edac_create_csrow_objects(struct mem_ctl_info *mci)
error:
for (--i; i >= 0; i--) {
- csrow = mci->csrows[i];
- if (!nr_pages_per_csrow(csrow))
- continue;
- device_unregister(&mci->csrows[i]->dev);
+ if (device_is_registered(&mci->csrows[i]->dev))
+ device_unregister(&mci->csrows[i]->dev);
}
return err;
@@ -453,15 +454,13 @@ error:
static void edac_delete_csrow_objects(struct mem_ctl_info *mci)
{
int i;
- struct csrow_info *csrow;
- for (i = mci->nr_csrows - 1; i >= 0; i--) {
- csrow = mci->csrows[i];
- if (!nr_pages_per_csrow(csrow))
- continue;
- device_unregister(&mci->csrows[i]->dev);
+ for (i = 0; i < mci->nr_csrows; i++) {
+ if (device_is_registered(&mci->csrows[i]->dev))
+ device_unregister(&mci->csrows[i]->dev);
}
}
+
#endif
/*
@@ -552,10 +551,8 @@ static ssize_t dimmdev_ce_count_show(struct device *dev,
char *data)
{
struct dimm_info *dimm = to_dimm(dev);
- u32 count;
- count = dimm->mci->ce_per_layer[dimm->mci->n_layers-1][dimm->idx];
- return sprintf(data, "%u\n", count);
+ return sprintf(data, "%u\n", dimm->ce_count);
}
static ssize_t dimmdev_ue_count_show(struct device *dev,
@@ -563,10 +560,8 @@ static ssize_t dimmdev_ue_count_show(struct device *dev,
char *data)
{
struct dimm_info *dimm = to_dimm(dev);
- u32 count;
- count = dimm->mci->ue_per_layer[dimm->mci->n_layers-1][dimm->idx];
- return sprintf(data, "%u\n", count);
+ return sprintf(data, "%u\n", dimm->ue_count);
}
/* dimm/rank attribute files */
@@ -602,16 +597,18 @@ static const struct attribute_group *dimm_attr_groups[] = {
NULL
};
-static void dimm_attr_release(struct device *dev)
-{
- /* release device with _edac_mc_free() */
-}
-
static const struct device_type dimm_attr_type = {
.groups = dimm_attr_groups,
- .release = dimm_attr_release,
};
+static void dimm_release(struct device *dev)
+{
+ /*
+ * Nothing to do, just unregister sysfs here. The mci
+ * device owns the data and will also release it.
+ */
+}
+
/* Create a DIMM object under specifed memory controller device */
static int edac_create_dimm_object(struct mem_ctl_info *mci,
struct dimm_info *dimm)
@@ -620,6 +617,7 @@ static int edac_create_dimm_object(struct mem_ctl_info *mci,
dimm->mci = mci;
dimm->dev.type = &dimm_attr_type;
+ dimm->dev.release = dimm_release;
device_initialize(&dimm->dev);
dimm->dev.parent = &mci->dev;
@@ -659,7 +657,9 @@ static ssize_t mci_reset_counters_store(struct device *dev,
const char *data, size_t count)
{
struct mem_ctl_info *mci = to_mci(dev);
- int cnt, row, chan, i;
+ struct dimm_info *dimm;
+ int row, chan;
+
mci->ue_mc = 0;
mci->ce_mc = 0;
mci->ue_noinfo_count = 0;
@@ -675,11 +675,9 @@ static ssize_t mci_reset_counters_store(struct device *dev,
ri->channels[chan]->ce_count = 0;
}
- cnt = 1;
- for (i = 0; i < mci->n_layers; i++) {
- cnt *= mci->layers[i].size;
- memset(mci->ce_per_layer[i], 0, cnt * sizeof(u32));
- memset(mci->ue_per_layer[i], 0, cnt * sizeof(u32));
+ mci_for_each_dimm(mci, dimm) {
+ dimm->ue_count = 0;
+ dimm->ce_count = 0;
}
mci->start_time = jiffies;
@@ -884,14 +882,8 @@ static const struct attribute_group *mci_attr_groups[] = {
NULL
};
-static void mci_attr_release(struct device *dev)
-{
- /* release device with _edac_mc_free() */
-}
-
static const struct device_type mci_attr_type = {
.groups = mci_attr_groups,
- .release = mci_attr_release,
};
/*
@@ -910,8 +902,6 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci,
/* get the /sys/devices/system/edac subsys reference */
mci->dev.type = &mci_attr_type;
- device_initialize(&mci->dev);
-
mci->dev.parent = mci_pdev;
mci->dev.groups = groups;
dev_set_name(&mci->dev, "mc%d", mci->mc_idx);
@@ -921,7 +911,7 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci,
err = device_add(&mci->dev);
if (err < 0) {
edac_dbg(1, "failure: create device %s\n", dev_name(&mci->dev));
- put_device(&mci->dev);
+ /* no put_device() here, free mci with _edac_mc_free() */
return err;
}
@@ -937,24 +927,20 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci,
err = edac_create_dimm_object(mci, dimm);
if (err)
- goto fail_unregister_dimm;
+ goto fail;
}
#ifdef CONFIG_EDAC_LEGACY_SYSFS
err = edac_create_csrow_objects(mci);
if (err < 0)
- goto fail_unregister_dimm;
+ goto fail;
#endif
edac_create_debugfs_nodes(mci);
return 0;
-fail_unregister_dimm:
- mci_for_each_dimm(mci, dimm) {
- if (device_is_registered(&dimm->dev))
- device_unregister(&dimm->dev);
- }
- device_unregister(&mci->dev);
+fail:
+ edac_remove_sysfs_mci_device(mci);
return err;
}
@@ -966,6 +952,9 @@ void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci)
{
struct dimm_info *dimm;
+ if (!device_is_registered(&mci->dev))
+ return;
+
edac_dbg(0, "\n");
#ifdef CONFIG_EDAC_DEBUG
@@ -976,17 +965,14 @@ void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci)
#endif
mci_for_each_dimm(mci, dimm) {
- if (dimm->nr_pages == 0)
+ if (!device_is_registered(&dimm->dev))
continue;
edac_dbg(1, "unregistering device %s\n", dev_name(&dimm->dev));
device_unregister(&dimm->dev);
}
-}
-void edac_unregister_sysfs(struct mem_ctl_info *mci)
-{
- edac_dbg(1, "unregistering device %s\n", dev_name(&mci->dev));
- device_unregister(&mci->dev);
+ /* only remove the device, but keep mci */
+ device_del(&mci->dev);
}
static void mc_attr_release(struct device *dev)
@@ -1000,9 +986,6 @@ static void mc_attr_release(struct device *dev)
kfree(dev);
}
-static const struct device_type mc_attr_type = {
- .release = mc_attr_release,
-};
/*
* Init/exit code for the module. Basically, creates/removes /sys/class/rc
*/
@@ -1015,11 +998,10 @@ int __init edac_mc_sysfs_init(void)
return -ENOMEM;
mci_pdev->bus = edac_get_sysfs_subsys();
- mci_pdev->type = &mc_attr_type;
- device_initialize(mci_pdev);
- dev_set_name(mci_pdev, "mc");
+ mci_pdev->release = mc_attr_release;
+ mci_pdev->init_name = "mc";
- err = device_add(mci_pdev);
+ err = device_register(mci_pdev);
if (err < 0) {
edac_dbg(1, "failure: create device %s\n", dev_name(mci_pdev));
put_device(mci_pdev);
diff --git a/drivers/edac/edac_module.h b/drivers/edac/edac_module.h
index 388427d378b1..aa1f91688eb8 100644
--- a/drivers/edac/edac_module.h
+++ b/drivers/edac/edac_module.h
@@ -28,7 +28,6 @@ void edac_mc_sysfs_exit(void);
extern int edac_create_sysfs_mci_device(struct mem_ctl_info *mci,
const struct attribute_group **groups);
extern void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci);
-void edac_unregister_sysfs(struct mem_ctl_info *mci);
extern int edac_get_log_ue(void);
extern int edac_get_log_ce(void);
extern int edac_get_panic_on_ue(void);
diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c
index b99080d8a10c..cb3dab56a875 100644
--- a/drivers/edac/ghes_edac.c
+++ b/drivers/edac/ghes_edac.c
@@ -201,7 +201,6 @@ static void ghes_edac_dmidecode(const struct dmi_header *dh, void *arg)
void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
{
- enum hw_event_mc_err_type type;
struct edac_raw_error_desc *e;
struct mem_ctl_info *mci;
struct ghes_edac_pvt *pvt;
@@ -240,17 +239,17 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
switch (sev) {
case GHES_SEV_CORRECTED:
- type = HW_EVENT_ERR_CORRECTED;
+ e->type = HW_EVENT_ERR_CORRECTED;
break;
case GHES_SEV_RECOVERABLE:
- type = HW_EVENT_ERR_UNCORRECTED;
+ e->type = HW_EVENT_ERR_UNCORRECTED;
break;
case GHES_SEV_PANIC:
- type = HW_EVENT_ERR_FATAL;
+ e->type = HW_EVENT_ERR_FATAL;
break;
default:
case GHES_SEV_NO:
- type = HW_EVENT_ERR_INFO;
+ e->type = HW_EVENT_ERR_INFO;
}
edac_dbg(1, "error validation_bits: 0x%08llx\n",
@@ -356,11 +355,8 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
mem_err->mem_dev_handle);
index = get_dimm_smbios_index(mci, mem_err->mem_dev_handle);
- if (index >= 0) {
+ if (index >= 0)
e->top_layer = index;
- e->enable_per_layer_report = true;
- }
-
}
if (p > e->location)
*(p - 1) = '\0';
@@ -442,7 +438,7 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
if (p > pvt->other_detail)
*(p - 1) = '\0';
- edac_raw_mc_handle_error(type, mci, e);
+ edac_raw_mc_handle_error(e);
unlock:
spin_unlock_irqrestore(&ghes_lock, flags);
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index ea980c556f2e..8874b7722b2f 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -1239,7 +1239,7 @@ static int __init mce_amd_init(void)
case 0x17:
case 0x18:
- pr_warn("Decoding supported only on Scalable MCA processors.\n");
+ pr_warn_once("Decoding supported only on Scalable MCA processors.\n");
return -EINVAL;
default:
diff --git a/drivers/edac/synopsys_edac.c b/drivers/edac/synopsys_edac.c
index 880ffd833718..12211dc040e8 100644
--- a/drivers/edac/synopsys_edac.c
+++ b/drivers/edac/synopsys_edac.c
@@ -477,16 +477,16 @@ static void handle_error(struct mem_ctl_info *mci, struct synps_ecc_status *p)
if (p->ce_cnt) {
pinf = &p->ceinfo;
- if (!priv->p_data->quirks) {
+ if (priv->p_data->quirks & DDR_ECC_INTR_SUPPORT) {
snprintf(priv->message, SYNPS_EDAC_MSG_SIZE,
- "DDR ECC error type:%s Row %d Bank %d Col %d Bit Position: %d Data: 0x%08x",
- "CE", pinf->row, pinf->bank, pinf->col,
+ "DDR ECC error type:%s Row %d Bank %d BankGroup Number %d Block Number %d Bit Position: %d Data: 0x%08x",
+ "CE", pinf->row, pinf->bank,
+ pinf->bankgrpnr, pinf->blknr,
pinf->bitpos, pinf->data);
} else {
snprintf(priv->message, SYNPS_EDAC_MSG_SIZE,
- "DDR ECC error type:%s Row %d Bank %d Col %d BankGroup Number %d Block Number %d Bit Position: %d Data: 0x%08x",
+ "DDR ECC error type:%s Row %d Bank %d Col %d Bit Position: %d Data: 0x%08x",
"CE", pinf->row, pinf->bank, pinf->col,
- pinf->bankgrpnr, pinf->blknr,
pinf->bitpos, pinf->data);
}
@@ -497,15 +497,15 @@ static void handle_error(struct mem_ctl_info *mci, struct synps_ecc_status *p)
if (p->ue_cnt) {
pinf = &p->ueinfo;
- if (!priv->p_data->quirks) {
+ if (priv->p_data->quirks & DDR_ECC_INTR_SUPPORT) {
snprintf(priv->message, SYNPS_EDAC_MSG_SIZE,
- "DDR ECC error type :%s Row %d Bank %d Col %d ",
- "UE", pinf->row, pinf->bank, pinf->col);
+ "DDR ECC error type :%s Row %d Bank %d BankGroup Number %d Block Number %d",
+ "UE", pinf->row, pinf->bank,
+ pinf->bankgrpnr, pinf->blknr);
} else {
snprintf(priv->message, SYNPS_EDAC_MSG_SIZE,
- "DDR ECC error type :%s Row %d Bank %d Col %d BankGroup Number %d Block Number %d",
- "UE", pinf->row, pinf->bank, pinf->col,
- pinf->bankgrpnr, pinf->blknr);
+ "DDR ECC error type :%s Row %d Bank %d Col %d ",
+ "UE", pinf->row, pinf->bank, pinf->col);
}
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
diff --git a/drivers/firmware/efi/efivars.c b/drivers/firmware/efi/efivars.c
index 7576450c8254..aff3dfb4d7ba 100644
--- a/drivers/firmware/efi/efivars.c
+++ b/drivers/firmware/efi/efivars.c
@@ -83,13 +83,16 @@ static ssize_t
efivar_attr_read(struct efivar_entry *entry, char *buf)
{
struct efi_variable *var = &entry->var;
+ unsigned long size = sizeof(var->Data);
char *str = buf;
+ int ret;
if (!entry || !buf)
return -EINVAL;
- var->DataSize = 1024;
- if (efivar_entry_get(entry, &var->Attributes, &var->DataSize, var->Data))
+ ret = efivar_entry_get(entry, &var->Attributes, &size, var->Data);
+ var->DataSize = size;
+ if (ret)
return -EIO;
if (var->Attributes & EFI_VARIABLE_NON_VOLATILE)
@@ -116,13 +119,16 @@ static ssize_t
efivar_size_read(struct efivar_entry *entry, char *buf)
{
struct efi_variable *var = &entry->var;
+ unsigned long size = sizeof(var->Data);
char *str = buf;
+ int ret;
if (!entry || !buf)
return -EINVAL;
- var->DataSize = 1024;
- if (efivar_entry_get(entry, &var->Attributes, &var->DataSize, var->Data))
+ ret = efivar_entry_get(entry, &var->Attributes, &size, var->Data);
+ var->DataSize = size;
+ if (ret)
return -EIO;
str += sprintf(str, "0x%lx\n", var->DataSize);
@@ -133,12 +139,15 @@ static ssize_t
efivar_data_read(struct efivar_entry *entry, char *buf)
{
struct efi_variable *var = &entry->var;
+ unsigned long size = sizeof(var->Data);
+ int ret;
if (!entry || !buf)
return -EINVAL;
- var->DataSize = 1024;
- if (efivar_entry_get(entry, &var->Attributes, &var->DataSize, var->Data))
+ ret = efivar_entry_get(entry, &var->Attributes, &size, var->Data);
+ var->DataSize = size;
+ if (ret)
return -EIO;
memcpy(buf, var->Data, var->DataSize);
@@ -199,6 +208,9 @@ efivar_store_raw(struct efivar_entry *entry, const char *buf, size_t count)
u8 *data;
int err;
+ if (!entry || !buf)
+ return -EINVAL;
+
if (in_compat_syscall()) {
struct compat_efi_variable *compat;
@@ -250,14 +262,16 @@ efivar_show_raw(struct efivar_entry *entry, char *buf)
{
struct efi_variable *var = &entry->var;
struct compat_efi_variable *compat;
+ unsigned long datasize = sizeof(var->Data);
size_t size;
+ int ret;
if (!entry || !buf)
return 0;
- var->DataSize = 1024;
- if (efivar_entry_get(entry, &entry->var.Attributes,
- &entry->var.DataSize, entry->var.Data))
+ ret = efivar_entry_get(entry, &var->Attributes, &datasize, var->Data);
+ var->DataSize = datasize;
+ if (ret)
return -EIO;
if (in_compat_syscall()) {
diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c
index 31fee5e918b7..0017367e94ee 100644
--- a/drivers/gpio/gpiolib-acpi.c
+++ b/drivers/gpio/gpiolib-acpi.c
@@ -21,18 +21,21 @@
#include "gpiolib.h"
#include "gpiolib-acpi.h"
-#define QUIRK_NO_EDGE_EVENTS_ON_BOOT 0x01l
-#define QUIRK_NO_WAKEUP 0x02l
-
static int run_edge_events_on_boot = -1;
module_param(run_edge_events_on_boot, int, 0444);
MODULE_PARM_DESC(run_edge_events_on_boot,
"Run edge _AEI event-handlers at boot: 0=no, 1=yes, -1=auto");
-static int honor_wakeup = -1;
-module_param(honor_wakeup, int, 0444);
-MODULE_PARM_DESC(honor_wakeup,
- "Honor the ACPI wake-capable flag: 0=no, 1=yes, -1=auto");
+static char *ignore_wake;
+module_param(ignore_wake, charp, 0444);
+MODULE_PARM_DESC(ignore_wake,
+ "controller@pin combos on which to ignore the ACPI wake flag "
+ "ignore_wake=controller@pin[,controller@pin[,...]]");
+
+struct acpi_gpiolib_dmi_quirk {
+ bool no_edge_events_on_boot;
+ char *ignore_wake;
+};
/**
* struct acpi_gpio_event - ACPI GPIO event handler data
@@ -202,6 +205,57 @@ static void acpi_gpiochip_request_irqs(struct acpi_gpio_chip *acpi_gpio)
acpi_gpiochip_request_irq(acpi_gpio, event);
}
+static bool acpi_gpio_in_ignore_list(const char *controller_in, int pin_in)
+{
+ const char *controller, *pin_str;
+ int len, pin;
+ char *endp;
+
+ controller = ignore_wake;
+ while (controller) {
+ pin_str = strchr(controller, '@');
+ if (!pin_str)
+ goto err;
+
+ len = pin_str - controller;
+ if (len == strlen(controller_in) &&
+ strncmp(controller, controller_in, len) == 0) {
+ pin = simple_strtoul(pin_str + 1, &endp, 10);
+ if (*endp != 0 && *endp != ',')
+ goto err;
+
+ if (pin == pin_in)
+ return true;
+ }
+
+ controller = strchr(controller, ',');
+ if (controller)
+ controller++;
+ }
+
+ return false;
+err:
+ pr_err_once("Error invalid value for gpiolib_acpi.ignore_wake: %s\n",
+ ignore_wake);
+ return false;
+}
+
+static bool acpi_gpio_irq_is_wake(struct device *parent,
+ struct acpi_resource_gpio *agpio)
+{
+ int pin = agpio->pin_table[0];
+
+ if (agpio->wake_capable != ACPI_WAKE_CAPABLE)
+ return false;
+
+ if (acpi_gpio_in_ignore_list(dev_name(parent), pin)) {
+ dev_info(parent, "Ignoring wakeup on pin %d\n", pin);
+ return false;
+ }
+
+ return true;
+}
+
/* Always returns AE_OK so that we keep looping over the resources */
static acpi_status acpi_gpiochip_alloc_event(struct acpi_resource *ares,
void *context)
@@ -289,7 +343,7 @@ static acpi_status acpi_gpiochip_alloc_event(struct acpi_resource *ares,
event->handle = evt_handle;
event->handler = handler;
event->irq = irq;
- event->irq_is_wake = honor_wakeup && agpio->wake_capable == ACPI_WAKE_CAPABLE;
+ event->irq_is_wake = acpi_gpio_irq_is_wake(chip->parent, agpio);
event->pin = pin;
event->desc = desc;
@@ -1328,7 +1382,9 @@ static const struct dmi_system_id gpiolib_acpi_quirks[] = {
DMI_MATCH(DMI_SYS_VENDOR, "MINIX"),
DMI_MATCH(DMI_PRODUCT_NAME, "Z83-4"),
},
- .driver_data = (void *)QUIRK_NO_EDGE_EVENTS_ON_BOOT,
+ .driver_data = &(struct acpi_gpiolib_dmi_quirk) {
+ .no_edge_events_on_boot = true,
+ },
},
{
/*
@@ -1341,16 +1397,20 @@ static const struct dmi_system_id gpiolib_acpi_quirks[] = {
DMI_MATCH(DMI_SYS_VENDOR, "Wortmann_AG"),
DMI_MATCH(DMI_PRODUCT_NAME, "TERRA_PAD_1061"),
},
- .driver_data = (void *)QUIRK_NO_EDGE_EVENTS_ON_BOOT,
+ .driver_data = &(struct acpi_gpiolib_dmi_quirk) {
+ .no_edge_events_on_boot = true,
+ },
},
{
/*
- * Various HP X2 10 Cherry Trail models use an external
- * embedded-controller connected via I2C + an ACPI GPIO
- * event handler. The embedded controller generates various
- * spurious wakeup events when suspended. So disable wakeup
- * for its handler (it uses the only ACPI GPIO event handler).
- * This breaks wakeup when opening the lid, the user needs
+ * HP X2 10 models with Cherry Trail SoC + TI PMIC use an
+ * external embedded-controller connected via I2C + an ACPI GPIO
+ * event handler on INT33FF:01 pin 0, causing spurious wakeups.
+ * When suspending by closing the LID, the power to the USB
+ * keyboard is turned off, causing INT0002 ACPI events to
+ * trigger once the XHCI controller notices the keyboard is
+ * gone. So INT0002 events cause spurious wakeups too. Ignoring
+ * EC wakes breaks wakeup when opening the lid, the user needs
* to press the power-button to wakeup the system. The
* alternative is suspend simply not working, which is worse.
*/
@@ -1358,33 +1418,61 @@ static const struct dmi_system_id gpiolib_acpi_quirks[] = {
DMI_MATCH(DMI_SYS_VENDOR, "HP"),
DMI_MATCH(DMI_PRODUCT_NAME, "HP x2 Detachable 10-p0XX"),
},
- .driver_data = (void *)QUIRK_NO_WAKEUP,
+ .driver_data = &(struct acpi_gpiolib_dmi_quirk) {
+ .ignore_wake = "INT33FF:01@0,INT0002:00@2",
+ },
+ },
+ {
+ /*
+ * HP X2 10 models with Bay Trail SoC + AXP288 PMIC use an
+ * external embedded-controller connected via I2C + an ACPI GPIO
+ * event handler on INT33FC:02 pin 28, causing spurious wakeups.
+ */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion x2 Detachable"),
+ DMI_MATCH(DMI_BOARD_NAME, "815D"),
+ },
+ .driver_data = &(struct acpi_gpiolib_dmi_quirk) {
+ .ignore_wake = "INT33FC:02@28",
+ },
+ },
+ {
+ /*
+ * HP X2 10 models with Cherry Trail SoC + AXP288 PMIC use an
+ * external embedded-controller connected via I2C + an ACPI GPIO
+ * event handler on INT33FF:01 pin 0, causing spurious wakeups.
+ */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "HP"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion x2 Detachable"),
+ DMI_MATCH(DMI_BOARD_NAME, "813E"),
+ },
+ .driver_data = &(struct acpi_gpiolib_dmi_quirk) {
+ .ignore_wake = "INT33FF:01@0",
+ },
},
{} /* Terminating entry */
};
static int acpi_gpio_setup_params(void)
{
+ const struct acpi_gpiolib_dmi_quirk *quirk = NULL;
const struct dmi_system_id *id;
- long quirks = 0;
id = dmi_first_match(gpiolib_acpi_quirks);
if (id)
- quirks = (long)id->driver_data;
+ quirk = id->driver_data;
if (run_edge_events_on_boot < 0) {
- if (quirks & QUIRK_NO_EDGE_EVENTS_ON_BOOT)
+ if (quirk && quirk->no_edge_events_on_boot)
run_edge_events_on_boot = 0;
else
run_edge_events_on_boot = 1;
}
- if (honor_wakeup < 0) {
- if (quirks & QUIRK_NO_WAKEUP)
- honor_wakeup = 0;
- else
- honor_wakeup = 1;
- }
+ if (ignore_wake == NULL && quirk && quirk->ignore_wake)
+ ignore_wake = quirk->ignore_wake;
return 0;
}
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 4d0106ceeba7..00fb91feba70 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -2306,9 +2306,16 @@ static void gpiochip_irq_disable(struct irq_data *d)
{
struct gpio_chip *chip = irq_data_get_irq_chip_data(d);
+ /*
+ * Since we override .irq_disable() we need to mimic the
+ * behaviour of __irq_disable() in irq/chip.c.
+ * First call .irq_disable() if it exists, else mimic the
+ * behaviour of mask_irq() which calls .irq_mask() if
+ * it exists.
+ */
if (chip->irq.irq_disable)
chip->irq.irq_disable(d);
- else
+ else if (chip->irq.chip->irq_mask)
chip->irq.chip->irq_mask(d);
gpiochip_disable_irq(chip, d->hwirq);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index f24ed9a1a3e5..337d7cdce8e9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -781,11 +781,11 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
ssize_t result = 0;
uint32_t offset, se, sh, cu, wave, simd, thread, bank, *data;
- if (size & 3 || *pos & 3)
+ if (size > 4096 || size & 3 || *pos & 3)
return -EINVAL;
/* decode offset */
- offset = *pos & GENMASK_ULL(11, 0);
+ offset = (*pos & GENMASK_ULL(11, 0)) >> 2;
se = (*pos & GENMASK_ULL(19, 12)) >> 12;
sh = (*pos & GENMASK_ULL(27, 20)) >> 20;
cu = (*pos & GENMASK_ULL(35, 28)) >> 28;
@@ -823,7 +823,7 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
while (size) {
uint32_t value;
- value = data[offset++];
+ value = data[result >> 2];
r = put_user(value, (uint32_t *)buf);
if (r) {
result = r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 39cd545976b7..b8975857d60d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3913,6 +3913,8 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
if (r)
goto out;
+ amdgpu_fbdev_set_suspend(tmp_adev, 0);
+
/* must succeed. */
amdgpu_ras_resume(tmp_adev);
@@ -4086,6 +4088,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
*/
amdgpu_unregister_gpu_instance(tmp_adev);
+ amdgpu_fbdev_set_suspend(adev, 1);
+
/* disable ras on ALL IPs */
if (!(in_ras_intr && !use_baco) &&
amdgpu_device_ip_need_full_reset(tmp_adev))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index dee446278417..c6e9885c071f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -974,7 +974,7 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
/* Map SG to device */
r = -ENOMEM;
nents = dma_map_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
- if (nents != ttm->sg->nents)
+ if (nents == 0)
goto release_sg;
/* convert SG to linear array of pages and dma addresses */
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
index ff2e6e1ccde7..6173951db7b4 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
@@ -693,7 +693,7 @@ static int jpeg_v2_0_set_clockgating_state(void *handle,
bool enable = (state == AMD_CG_STATE_GATE);
if (enable) {
- if (jpeg_v2_0_is_idle(handle))
+ if (!jpeg_v2_0_is_idle(handle))
return -EBUSY;
jpeg_v2_0_enable_clock_gating(adev);
} else {
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
index c6d046df4b70..c04c2078a7c1 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
@@ -477,7 +477,7 @@ static int jpeg_v2_5_set_clockgating_state(void *handle,
continue;
if (enable) {
- if (jpeg_v2_5_is_idle(handle))
+ if (!jpeg_v2_5_is_idle(handle))
return -EBUSY;
jpeg_v2_5_enable_clock_gating(adev, i);
} else {
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 2b488dfb2f21..d8945c31b622 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -89,6 +89,13 @@
#define HDP_MEM_POWER_CTRL__RC_MEM_POWER_CTRL_EN_MASK 0x00010000L
#define HDP_MEM_POWER_CTRL__RC_MEM_POWER_LS_EN_MASK 0x00020000L
#define mmHDP_MEM_POWER_CTRL_BASE_IDX 0
+
+/* for Vega20/arcturus regiter offset change */
+#define mmROM_INDEX_VG20 0x00e4
+#define mmROM_INDEX_VG20_BASE_IDX 0
+#define mmROM_DATA_VG20 0x00e5
+#define mmROM_DATA_VG20_BASE_IDX 0
+
/*
* Indirect registers accessor
*/
@@ -309,6 +316,8 @@ static bool soc15_read_bios_from_rom(struct amdgpu_device *adev,
{
u32 *dw_ptr;
u32 i, length_dw;
+ uint32_t rom_index_offset;
+ uint32_t rom_data_offset;
if (bios == NULL)
return false;
@@ -321,11 +330,23 @@ static bool soc15_read_bios_from_rom(struct amdgpu_device *adev,
dw_ptr = (u32 *)bios;
length_dw = ALIGN(length_bytes, 4) / 4;
+ switch (adev->asic_type) {
+ case CHIP_VEGA20:
+ case CHIP_ARCTURUS:
+ rom_index_offset = SOC15_REG_OFFSET(SMUIO, 0, mmROM_INDEX_VG20);
+ rom_data_offset = SOC15_REG_OFFSET(SMUIO, 0, mmROM_DATA_VG20);
+ break;
+ default:
+ rom_index_offset = SOC15_REG_OFFSET(SMUIO, 0, mmROM_INDEX);
+ rom_data_offset = SOC15_REG_OFFSET(SMUIO, 0, mmROM_DATA);
+ break;
+ }
+
/* set rom index to 0 */
- WREG32(SOC15_REG_OFFSET(SMUIO, 0, mmROM_INDEX), 0);
+ WREG32(rom_index_offset, 0);
/* read out the rom data */
for (i = 0; i < length_dw; i++)
- dw_ptr[i] = RREG32(SOC15_REG_OFFSET(SMUIO, 0, mmROM_DATA));
+ dw_ptr[i] = RREG32(rom_data_offset);
return true;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
index 71f61afdc655..09b0572b838d 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -1352,7 +1352,7 @@ static int vcn_v1_0_set_clockgating_state(void *handle,
if (enable) {
/* wait for STATUS to clear */
- if (vcn_v1_0_is_idle(handle))
+ if (!vcn_v1_0_is_idle(handle))
return -EBUSY;
vcn_v1_0_enable_clock_gating(adev);
} else {
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
index c387c81f8695..b7f17342bbf0 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
@@ -1217,7 +1217,7 @@ static int vcn_v2_0_set_clockgating_state(void *handle,
if (enable) {
/* wait for STATUS to clear */
- if (vcn_v2_0_is_idle(handle))
+ if (!vcn_v2_0_is_idle(handle))
return -EBUSY;
vcn_v2_0_enable_clock_gating(adev);
} else {
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
index 2d64ba1adf99..678253d81154 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
@@ -1672,7 +1672,7 @@ static int vcn_v2_5_set_clockgating_state(void *handle,
return 0;
if (enable) {
- if (vcn_v2_5_is_idle(handle))
+ if (!vcn_v2_5_is_idle(handle))
return -EBUSY;
vcn_v2_5_enable_clock_gating(adev);
} else {
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index e997251a8b57..6240259b3a93 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -522,8 +522,9 @@ static void dm_dcn_crtc_high_irq(void *interrupt_params)
acrtc_state = to_dm_crtc_state(acrtc->base.state);
- DRM_DEBUG_DRIVER("crtc:%d, vupdate-vrr:%d\n", acrtc->crtc_id,
- amdgpu_dm_vrr_active(acrtc_state));
+ DRM_DEBUG_DRIVER("crtc:%d, vupdate-vrr:%d, planes:%d\n", acrtc->crtc_id,
+ amdgpu_dm_vrr_active(acrtc_state),
+ acrtc_state->active_planes);
amdgpu_dm_crtc_handle_crc_irq(&acrtc->base);
drm_crtc_handle_vblank(&acrtc->base);
@@ -543,7 +544,18 @@ static void dm_dcn_crtc_high_irq(void *interrupt_params)
&acrtc_state->vrr_params.adjust);
}
- if (acrtc->pflip_status == AMDGPU_FLIP_SUBMITTED) {
+ /*
+ * If there aren't any active_planes then DCH HUBP may be clock-gated.
+ * In that case, pageflip completion interrupts won't fire and pageflip
+ * completion events won't get delivered. Prevent this by sending
+ * pending pageflip events from here if a flip is still pending.
+ *
+ * If any planes are enabled, use dm_pflip_high_irq() instead, to
+ * avoid race conditions between flip programming and completion,
+ * which could cause too early flip completion events.
+ */
+ if (acrtc->pflip_status == AMDGPU_FLIP_SUBMITTED &&
+ acrtc_state->active_planes == 0) {
if (acrtc->event) {
drm_crtc_send_vblank_event(&acrtc->base, acrtc->event);
acrtc->event = NULL;
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
index cb731c1d30b1..fd9e69634c50 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
@@ -3401,6 +3401,17 @@ static bool retrieve_link_cap(struct dc_link *link)
sink_id.ieee_device_id,
sizeof(sink_id.ieee_device_id));
+ /* Quirk Apple MBP 2017 15" Retina panel: Wrong DP_MAX_LINK_RATE */
+ {
+ uint8_t str_mbp_2017[] = { 101, 68, 21, 101, 98, 97 };
+
+ if ((link->dpcd_caps.sink_dev_id == 0x0010fa) &&
+ !memcmp(link->dpcd_caps.sink_dev_id_str, str_mbp_2017,
+ sizeof(str_mbp_2017))) {
+ link->reported_link_cap.link_rate = 0x0c;
+ }
+ }
+
core_link_read_dpcd(
link,
DP_SINK_HW_REVISION_START,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c
index d51e02fdab4d..5e640f17d3d4 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c
@@ -108,7 +108,6 @@ static const struct hwseq_private_funcs dcn20_private_funcs = {
.enable_power_gating_plane = dcn20_enable_power_gating_plane,
.dpp_pg_control = dcn20_dpp_pg_control,
.hubp_pg_control = dcn20_hubp_pg_control,
- .dsc_pg_control = NULL,
.update_odm = dcn20_update_odm,
.dsc_pg_control = dcn20_dsc_pg_control,
.get_surface_visual_confirm_color = dcn10_get_surface_visual_confirm_color,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
index 85f90f3e24cb..e310d67c399a 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
@@ -335,6 +335,117 @@ struct _vcs_dpi_soc_bounding_box_st dcn2_0_soc = {
.use_urgent_burst_bw = 0
};
+struct _vcs_dpi_soc_bounding_box_st dcn2_0_nv14_soc = {
+ .clock_limits = {
+ {
+ .state = 0,
+ .dcfclk_mhz = 560.0,
+ .fabricclk_mhz = 560.0,
+ .dispclk_mhz = 513.0,
+ .dppclk_mhz = 513.0,
+ .phyclk_mhz = 540.0,
+ .socclk_mhz = 560.0,
+ .dscclk_mhz = 171.0,
+ .dram_speed_mts = 8960.0,
+ },
+ {
+ .state = 1,
+ .dcfclk_mhz = 694.0,
+ .fabricclk_mhz = 694.0,
+ .dispclk_mhz = 642.0,
+ .dppclk_mhz = 642.0,
+ .phyclk_mhz = 600.0,
+ .socclk_mhz = 694.0,
+ .dscclk_mhz = 214.0,
+ .dram_speed_mts = 11104.0,
+ },
+ {
+ .state = 2,
+ .dcfclk_mhz = 875.0,
+ .fabricclk_mhz = 875.0,
+ .dispclk_mhz = 734.0,
+ .dppclk_mhz = 734.0,
+ .phyclk_mhz = 810.0,
+ .socclk_mhz = 875.0,
+ .dscclk_mhz = 245.0,
+ .dram_speed_mts = 14000.0,
+ },
+ {
+ .state = 3,
+ .dcfclk_mhz = 1000.0,
+ .fabricclk_mhz = 1000.0,
+ .dispclk_mhz = 1100.0,
+ .dppclk_mhz = 1100.0,
+ .phyclk_mhz = 810.0,
+ .socclk_mhz = 1000.0,
+ .dscclk_mhz = 367.0,
+ .dram_speed_mts = 16000.0,
+ },
+ {
+ .state = 4,
+ .dcfclk_mhz = 1200.0,
+ .fabricclk_mhz = 1200.0,
+ .dispclk_mhz = 1284.0,
+ .dppclk_mhz = 1284.0,
+ .phyclk_mhz = 810.0,
+ .socclk_mhz = 1200.0,
+ .dscclk_mhz = 428.0,
+ .dram_speed_mts = 16000.0,
+ },
+ /*Extra state, no dispclk ramping*/
+ {
+ .state = 5,
+ .dcfclk_mhz = 1200.0,
+ .fabricclk_mhz = 1200.0,
+ .dispclk_mhz = 1284.0,
+ .dppclk_mhz = 1284.0,
+ .phyclk_mhz = 810.0,
+ .socclk_mhz = 1200.0,
+ .dscclk_mhz = 428.0,
+ .dram_speed_mts = 16000.0,
+ },
+ },
+ .num_states = 5,
+ .sr_exit_time_us = 8.6,
+ .sr_enter_plus_exit_time_us = 10.9,
+ .urgent_latency_us = 4.0,
+ .urgent_latency_pixel_data_only_us = 4.0,
+ .urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
+ .urgent_latency_vm_data_only_us = 4.0,
+ .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 40.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 40.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 40.0,
+ .max_avg_sdp_bw_use_normal_percent = 40.0,
+ .max_avg_dram_bw_use_normal_percent = 40.0,
+ .writeback_latency_us = 12.0,
+ .ideal_dram_bw_after_urgent_percent = 40.0,
+ .max_request_size_bytes = 256,
+ .dram_channel_width_bytes = 2,
+ .fabric_datapath_to_dcn_data_return_bytes = 64,
+ .dcn_downspread_percent = 0.5,
+ .downspread_percent = 0.38,
+ .dram_page_open_time_ns = 50.0,
+ .dram_rw_turnaround_time_ns = 17.5,
+ .dram_return_buffer_per_channel_bytes = 8192,
+ .round_trip_ping_latency_dcfclk_cycles = 131,
+ .urgent_out_of_order_return_per_channel_bytes = 256,
+ .channel_interleave_bytes = 256,
+ .num_banks = 8,
+ .num_chans = 8,
+ .vmm_page_size_bytes = 4096,
+ .dram_clock_change_latency_us = 404.0,
+ .dummy_pstate_latency_us = 5.0,
+ .writeback_dram_clock_change_latency_us = 23.0,
+ .return_bus_width_bytes = 64,
+ .dispclk_dppclk_vco_speed_mhz = 3850,
+ .xfc_bus_transport_time_us = 20,
+ .xfc_xbuf_latency_tolerance_us = 4,
+ .use_urgent_burst_bw = 0
+};
+
struct _vcs_dpi_soc_bounding_box_st dcn2_0_nv12_soc = { 0 };
#ifndef mmDP0_DP_DPHY_INTERNAL_CTRL
@@ -3291,6 +3402,9 @@ void dcn20_patch_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_st
static struct _vcs_dpi_soc_bounding_box_st *get_asic_rev_soc_bb(
uint32_t hw_internal_rev)
{
+ if (ASICREV_IS_NAVI14_M(hw_internal_rev))
+ return &dcn2_0_nv14_soc;
+
if (ASICREV_IS_NAVI12_P(hw_internal_rev))
return &dcn2_0_nv12_soc;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c
index 4861aa5c59ae..fddbd59bf4f9 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c
@@ -116,7 +116,6 @@ static const struct hwseq_private_funcs dcn21_private_funcs = {
.enable_power_gating_plane = dcn20_enable_power_gating_plane,
.dpp_pg_control = dcn20_dpp_pg_control,
.hubp_pg_control = dcn20_hubp_pg_control,
- .dsc_pg_control = NULL,
.update_odm = dcn20_update_odm,
.dsc_pg_control = dcn20_dsc_pg_control,
.get_surface_visual_confirm_color = dcn10_get_surface_visual_confirm_color,
diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
index ad8e9b5628e4..96e81c7bc266 100644
--- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
@@ -2006,8 +2006,11 @@ int smu_set_watermarks_for_clock_ranges(struct smu_context *smu,
smu_feature_is_enabled(smu, SMU_FEATURE_DPM_DCEFCLK_BIT) &&
smu_feature_is_enabled(smu, SMU_FEATURE_DPM_SOCCLK_BIT)) {
smu_set_watermarks_table(smu, table, clock_ranges);
- smu->watermarks_bitmap |= WATERMARKS_EXIST;
- smu->watermarks_bitmap &= ~WATERMARKS_LOADED;
+
+ if (!(smu->watermarks_bitmap & WATERMARKS_EXIST)) {
+ smu->watermarks_bitmap |= WATERMARKS_EXIST;
+ smu->watermarks_bitmap &= ~WATERMARKS_LOADED;
+ }
}
mutex_unlock(&smu->mutex);
diff --git a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c
index 0d73a49166af..aed4d6e60907 100644
--- a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c
+++ b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c
@@ -1063,15 +1063,6 @@ static int navi10_display_config_changed(struct smu_context *smu)
int ret = 0;
if ((smu->watermarks_bitmap & WATERMARKS_EXIST) &&
- !(smu->watermarks_bitmap & WATERMARKS_LOADED)) {
- ret = smu_write_watermarks_table(smu);
- if (ret)
- return ret;
-
- smu->watermarks_bitmap |= WATERMARKS_LOADED;
- }
-
- if ((smu->watermarks_bitmap & WATERMARKS_EXIST) &&
smu_feature_is_supported(smu, SMU_FEATURE_DPM_DCEFCLK_BIT) &&
smu_feature_is_supported(smu, SMU_FEATURE_DPM_SOCCLK_BIT)) {
ret = smu_send_smc_msg_with_param(smu, SMU_MSG_NumOfDisplays,
@@ -1493,6 +1484,7 @@ static int navi10_set_watermarks_table(struct smu_context *smu,
*clock_ranges)
{
int i;
+ int ret = 0;
Watermarks_t *table = watermarks;
if (!table || !clock_ranges)
@@ -1544,6 +1536,18 @@ static int navi10_set_watermarks_table(struct smu_context *smu,
clock_ranges->wm_mcif_clocks_ranges[i].wm_set_id;
}
+ smu->watermarks_bitmap |= WATERMARKS_EXIST;
+
+ /* pass data to smu controller */
+ if (!(smu->watermarks_bitmap & WATERMARKS_LOADED)) {
+ ret = smu_write_watermarks_table(smu);
+ if (ret) {
+ pr_err("Failed to update WMTABLE!");
+ return ret;
+ }
+ smu->watermarks_bitmap |= WATERMARKS_LOADED;
+ }
+
return 0;
}
diff --git a/drivers/gpu/drm/amd/powerplay/renoir_ppt.c b/drivers/gpu/drm/amd/powerplay/renoir_ppt.c
index 568c041c2206..3ad0f4aa3aa3 100644
--- a/drivers/gpu/drm/amd/powerplay/renoir_ppt.c
+++ b/drivers/gpu/drm/amd/powerplay/renoir_ppt.c
@@ -806,9 +806,10 @@ static int renoir_set_watermarks_table(
clock_ranges->wm_mcif_clocks_ranges[i].wm_set_id;
}
+ smu->watermarks_bitmap |= WATERMARKS_EXIST;
+
/* pass data to smu controller */
- if ((smu->watermarks_bitmap & WATERMARKS_EXIST) &&
- !(smu->watermarks_bitmap & WATERMARKS_LOADED)) {
+ if (!(smu->watermarks_bitmap & WATERMARKS_LOADED)) {
ret = smu_write_watermarks_table(smu);
if (ret) {
pr_err("Failed to update WMTABLE!");
diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_drv.c b/drivers/gpu/drm/arm/display/komeda/komeda_drv.c
index ea5cd1e17304..e7933930a657 100644
--- a/drivers/gpu/drm/arm/display/komeda/komeda_drv.c
+++ b/drivers/gpu/drm/arm/display/komeda/komeda_drv.c
@@ -146,14 +146,14 @@ static const struct of_device_id komeda_of_match[] = {
MODULE_DEVICE_TABLE(of, komeda_of_match);
-static int komeda_rt_pm_suspend(struct device *dev)
+static int __maybe_unused komeda_rt_pm_suspend(struct device *dev)
{
struct komeda_drv *mdrv = dev_get_drvdata(dev);
return komeda_dev_suspend(mdrv->mdev);
}
-static int komeda_rt_pm_resume(struct device *dev)
+static int __maybe_unused komeda_rt_pm_resume(struct device *dev)
{
struct komeda_drv *mdrv = dev_get_drvdata(dev);
diff --git a/drivers/gpu/drm/bochs/bochs_hw.c b/drivers/gpu/drm/bochs/bochs_hw.c
index b615b7dfdd9d..a4fc4e6aee39 100644
--- a/drivers/gpu/drm/bochs/bochs_hw.c
+++ b/drivers/gpu/drm/bochs/bochs_hw.c
@@ -156,10 +156,8 @@ int bochs_hw_init(struct drm_device *dev)
size = min(size, mem);
}
- if (pci_request_region(pdev, 0, "bochs-drm") != 0) {
- DRM_ERROR("Cannot request framebuffer\n");
- return -EBUSY;
- }
+ if (pci_request_region(pdev, 0, "bochs-drm") != 0)
+ DRM_WARN("Cannot request framebuffer, boot fb still active?\n");
bochs->fb_map = ioremap(addr, size);
if (bochs->fb_map == NULL) {
diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
index 67fca439bbfb..24965e53d351 100644
--- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
+++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
@@ -1624,28 +1624,34 @@ static void hdmi_config_AVI(struct dw_hdmi *hdmi, struct drm_display_mode *mode)
frame.colorspace = HDMI_COLORSPACE_RGB;
/* Set up colorimetry */
- switch (hdmi->hdmi_data.enc_out_encoding) {
- case V4L2_YCBCR_ENC_601:
- if (hdmi->hdmi_data.enc_in_encoding == V4L2_YCBCR_ENC_XV601)
- frame.colorimetry = HDMI_COLORIMETRY_EXTENDED;
- else
+ if (!hdmi_bus_fmt_is_rgb(hdmi->hdmi_data.enc_out_bus_format)) {
+ switch (hdmi->hdmi_data.enc_out_encoding) {
+ case V4L2_YCBCR_ENC_601:
+ if (hdmi->hdmi_data.enc_in_encoding == V4L2_YCBCR_ENC_XV601)
+ frame.colorimetry = HDMI_COLORIMETRY_EXTENDED;
+ else
+ frame.colorimetry = HDMI_COLORIMETRY_ITU_601;
+ frame.extended_colorimetry =
+ HDMI_EXTENDED_COLORIMETRY_XV_YCC_601;
+ break;
+ case V4L2_YCBCR_ENC_709:
+ if (hdmi->hdmi_data.enc_in_encoding == V4L2_YCBCR_ENC_XV709)
+ frame.colorimetry = HDMI_COLORIMETRY_EXTENDED;
+ else
+ frame.colorimetry = HDMI_COLORIMETRY_ITU_709;
+ frame.extended_colorimetry =
+ HDMI_EXTENDED_COLORIMETRY_XV_YCC_709;
+ break;
+ default: /* Carries no data */
frame.colorimetry = HDMI_COLORIMETRY_ITU_601;
+ frame.extended_colorimetry =
+ HDMI_EXTENDED_COLORIMETRY_XV_YCC_601;
+ break;
+ }
+ } else {
+ frame.colorimetry = HDMI_COLORIMETRY_NONE;
frame.extended_colorimetry =
- HDMI_EXTENDED_COLORIMETRY_XV_YCC_601;
- break;
- case V4L2_YCBCR_ENC_709:
- if (hdmi->hdmi_data.enc_in_encoding == V4L2_YCBCR_ENC_XV709)
- frame.colorimetry = HDMI_COLORIMETRY_EXTENDED;
- else
- frame.colorimetry = HDMI_COLORIMETRY_ITU_709;
- frame.extended_colorimetry =
- HDMI_EXTENDED_COLORIMETRY_XV_YCC_709;
- break;
- default: /* Carries no data */
- frame.colorimetry = HDMI_COLORIMETRY_ITU_601;
- frame.extended_colorimetry =
- HDMI_EXTENDED_COLORIMETRY_XV_YCC_601;
- break;
+ HDMI_EXTENDED_COLORIMETRY_XV_YCC_601;
}
frame.scan_mode = HDMI_SCAN_MODE_NONE;
diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c
index cce0b1bba591..ed0fea2ac322 100644
--- a/drivers/gpu/drm/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/drm_dp_mst_topology.c
@@ -1935,7 +1935,7 @@ static u8 drm_dp_calculate_rad(struct drm_dp_mst_port *port,
return parent_lct + 1;
}
-static bool drm_dp_mst_is_dp_mst_end_device(u8 pdt, bool mcs)
+static bool drm_dp_mst_is_end_device(u8 pdt, bool mcs)
{
switch (pdt) {
case DP_PEER_DEVICE_DP_LEGACY_CONV:
@@ -1965,13 +1965,13 @@ drm_dp_port_set_pdt(struct drm_dp_mst_port *port, u8 new_pdt,
/* Teardown the old pdt, if there is one */
if (port->pdt != DP_PEER_DEVICE_NONE) {
- if (drm_dp_mst_is_dp_mst_end_device(port->pdt, port->mcs)) {
+ if (drm_dp_mst_is_end_device(port->pdt, port->mcs)) {
/*
* If the new PDT would also have an i2c bus,
* don't bother with reregistering it
*/
if (new_pdt != DP_PEER_DEVICE_NONE &&
- drm_dp_mst_is_dp_mst_end_device(new_pdt, new_mcs)) {
+ drm_dp_mst_is_end_device(new_pdt, new_mcs)) {
port->pdt = new_pdt;
port->mcs = new_mcs;
return 0;
@@ -1991,7 +1991,7 @@ drm_dp_port_set_pdt(struct drm_dp_mst_port *port, u8 new_pdt,
port->mcs = new_mcs;
if (port->pdt != DP_PEER_DEVICE_NONE) {
- if (drm_dp_mst_is_dp_mst_end_device(port->pdt, port->mcs)) {
+ if (drm_dp_mst_is_end_device(port->pdt, port->mcs)) {
/* add i2c over sideband */
ret = drm_dp_mst_register_i2c_bus(&port->aux);
} else {
@@ -2172,7 +2172,7 @@ drm_dp_mst_port_add_connector(struct drm_dp_mst_branch *mstb,
}
if (port->pdt != DP_PEER_DEVICE_NONE &&
- drm_dp_mst_is_dp_mst_end_device(port->pdt, port->mcs)) {
+ drm_dp_mst_is_end_device(port->pdt, port->mcs)) {
port->cached_edid = drm_get_edid(port->connector,
&port->aux.ddc);
drm_connector_set_tile_property(port->connector);
@@ -2302,14 +2302,18 @@ drm_dp_mst_handle_link_address_port(struct drm_dp_mst_branch *mstb,
mutex_unlock(&mgr->lock);
}
- if (old_ddps != port->ddps) {
- if (port->ddps) {
- if (!port->input) {
- drm_dp_send_enum_path_resources(mgr, mstb,
- port);
- }
+ /*
+ * Reprobe PBN caps on both hotplug, and when re-probing the link
+ * for our parent mstb
+ */
+ if (old_ddps != port->ddps || !created) {
+ if (port->ddps && !port->input) {
+ ret = drm_dp_send_enum_path_resources(mgr, mstb,
+ port);
+ if (ret == 1)
+ changed = true;
} else {
- port->available_pbn = 0;
+ port->full_pbn = 0;
}
}
@@ -2401,11 +2405,10 @@ drm_dp_mst_handle_conn_stat(struct drm_dp_mst_branch *mstb,
port->ddps = conn_stat->displayport_device_plug_status;
if (old_ddps != port->ddps) {
- if (port->ddps) {
- dowork = true;
- } else {
- port->available_pbn = 0;
- }
+ if (port->ddps && !port->input)
+ drm_dp_send_enum_path_resources(mgr, mstb, port);
+ else
+ port->full_pbn = 0;
}
new_pdt = port->input ? DP_PEER_DEVICE_NONE : conn_stat->peer_device_type;
@@ -2556,13 +2559,6 @@ static int drm_dp_check_and_send_link_address(struct drm_dp_mst_topology_mgr *mg
if (port->input || !port->ddps)
continue;
- if (!port->available_pbn) {
- drm_modeset_lock(&mgr->base.lock, NULL);
- drm_dp_send_enum_path_resources(mgr, mstb, port);
- drm_modeset_unlock(&mgr->base.lock);
- changed = true;
- }
-
if (port->mstb)
mstb_child = drm_dp_mst_topology_get_mstb_validated(
mgr, port->mstb);
@@ -2990,6 +2986,7 @@ drm_dp_send_enum_path_resources(struct drm_dp_mst_topology_mgr *mgr,
ret = drm_dp_mst_wait_tx_reply(mstb, txmsg);
if (ret > 0) {
+ ret = 0;
path_res = &txmsg->reply.u.path_resources;
if (txmsg->reply.reply_type == DP_SIDEBAND_REPLY_NAK) {
@@ -3002,14 +2999,22 @@ drm_dp_send_enum_path_resources(struct drm_dp_mst_topology_mgr *mgr,
path_res->port_number,
path_res->full_payload_bw_number,
path_res->avail_payload_bw_number);
- port->available_pbn =
- path_res->avail_payload_bw_number;
+
+ /*
+ * If something changed, make sure we send a
+ * hotplug
+ */
+ if (port->full_pbn != path_res->full_payload_bw_number ||
+ port->fec_capable != path_res->fec_capable)
+ ret = 1;
+
+ port->full_pbn = path_res->full_payload_bw_number;
port->fec_capable = path_res->fec_capable;
}
}
kfree(txmsg);
- return 0;
+ return ret;
}
static struct drm_dp_mst_port *drm_dp_get_last_connected_port_to_mstb(struct drm_dp_mst_branch *mstb)
@@ -3596,13 +3601,9 @@ drm_dp_mst_topology_mgr_invalidate_mstb(struct drm_dp_mst_branch *mstb)
/* The link address will need to be re-sent on resume */
mstb->link_address_sent = false;
- list_for_each_entry(port, &mstb->ports, next) {
- /* The PBN for each port will also need to be re-probed */
- port->available_pbn = 0;
-
+ list_for_each_entry(port, &mstb->ports, next)
if (port->mstb)
drm_dp_mst_topology_mgr_invalidate_mstb(port->mstb);
- }
}
/**
@@ -4829,41 +4830,102 @@ static bool drm_dp_mst_port_downstream_of_branch(struct drm_dp_mst_port *port,
return false;
}
-static inline
-int drm_dp_mst_atomic_check_bw_limit(struct drm_dp_mst_branch *branch,
- struct drm_dp_mst_topology_state *mst_state)
+static int
+drm_dp_mst_atomic_check_port_bw_limit(struct drm_dp_mst_port *port,
+ struct drm_dp_mst_topology_state *state);
+
+static int
+drm_dp_mst_atomic_check_mstb_bw_limit(struct drm_dp_mst_branch *mstb,
+ struct drm_dp_mst_topology_state *state)
{
- struct drm_dp_mst_port *port;
struct drm_dp_vcpi_allocation *vcpi;
- int pbn_limit = 0, pbn_used = 0;
+ struct drm_dp_mst_port *port;
+ int pbn_used = 0, ret;
+ bool found = false;
- list_for_each_entry(port, &branch->ports, next) {
- if (port->mstb)
- if (drm_dp_mst_atomic_check_bw_limit(port->mstb, mst_state))
- return -ENOSPC;
+ /* Check that we have at least one port in our state that's downstream
+ * of this branch, otherwise we can skip this branch
+ */
+ list_for_each_entry(vcpi, &state->vcpis, next) {
+ if (!vcpi->pbn ||
+ !drm_dp_mst_port_downstream_of_branch(vcpi->port, mstb))
+ continue;
- if (port->available_pbn > 0)
- pbn_limit = port->available_pbn;
+ found = true;
+ break;
}
- DRM_DEBUG_ATOMIC("[MST BRANCH:%p] branch has %d PBN available\n",
- branch, pbn_limit);
+ if (!found)
+ return 0;
- list_for_each_entry(vcpi, &mst_state->vcpis, next) {
- if (!vcpi->pbn)
- continue;
+ if (mstb->port_parent)
+ DRM_DEBUG_ATOMIC("[MSTB:%p] [MST PORT:%p] Checking bandwidth limits on [MSTB:%p]\n",
+ mstb->port_parent->parent, mstb->port_parent,
+ mstb);
+ else
+ DRM_DEBUG_ATOMIC("[MSTB:%p] Checking bandwidth limits\n",
+ mstb);
+
+ list_for_each_entry(port, &mstb->ports, next) {
+ ret = drm_dp_mst_atomic_check_port_bw_limit(port, state);
+ if (ret < 0)
+ return ret;
- if (drm_dp_mst_port_downstream_of_branch(vcpi->port, branch))
- pbn_used += vcpi->pbn;
+ pbn_used += ret;
}
- DRM_DEBUG_ATOMIC("[MST BRANCH:%p] branch used %d PBN\n",
- branch, pbn_used);
- if (pbn_used > pbn_limit) {
- DRM_DEBUG_ATOMIC("[MST BRANCH:%p] No available bandwidth\n",
- branch);
+ return pbn_used;
+}
+
+static int
+drm_dp_mst_atomic_check_port_bw_limit(struct drm_dp_mst_port *port,
+ struct drm_dp_mst_topology_state *state)
+{
+ struct drm_dp_vcpi_allocation *vcpi;
+ int pbn_used = 0;
+
+ if (port->pdt == DP_PEER_DEVICE_NONE)
+ return 0;
+
+ if (drm_dp_mst_is_end_device(port->pdt, port->mcs)) {
+ bool found = false;
+
+ list_for_each_entry(vcpi, &state->vcpis, next) {
+ if (vcpi->port != port)
+ continue;
+ if (!vcpi->pbn)
+ return 0;
+
+ found = true;
+ break;
+ }
+ if (!found)
+ return 0;
+
+ /* This should never happen, as it means we tried to
+ * set a mode before querying the full_pbn
+ */
+ if (WARN_ON(!port->full_pbn))
+ return -EINVAL;
+
+ pbn_used = vcpi->pbn;
+ } else {
+ pbn_used = drm_dp_mst_atomic_check_mstb_bw_limit(port->mstb,
+ state);
+ if (pbn_used <= 0)
+ return pbn_used;
+ }
+
+ if (pbn_used > port->full_pbn) {
+ DRM_DEBUG_ATOMIC("[MSTB:%p] [MST PORT:%p] required PBN of %d exceeds port limit of %d\n",
+ port->parent, port, pbn_used,
+ port->full_pbn);
return -ENOSPC;
}
- return 0;
+
+ DRM_DEBUG_ATOMIC("[MSTB:%p] [MST PORT:%p] uses %d out of %d PBN\n",
+ port->parent, port, pbn_used, port->full_pbn);
+
+ return pbn_used;
}
static inline int
@@ -5061,9 +5123,15 @@ int drm_dp_mst_atomic_check(struct drm_atomic_state *state)
ret = drm_dp_mst_atomic_check_vcpi_alloc_limit(mgr, mst_state);
if (ret)
break;
- ret = drm_dp_mst_atomic_check_bw_limit(mgr->mst_primary, mst_state);
- if (ret)
+
+ mutex_lock(&mgr->lock);
+ ret = drm_dp_mst_atomic_check_mstb_bw_limit(mgr->mst_primary,
+ mst_state);
+ mutex_unlock(&mgr->lock);
+ if (ret < 0)
break;
+ else
+ ret = 0;
}
return ret;
diff --git a/drivers/gpu/drm/drm_lease.c b/drivers/gpu/drm/drm_lease.c
index b481cafdde28..825abe38201a 100644
--- a/drivers/gpu/drm/drm_lease.c
+++ b/drivers/gpu/drm/drm_lease.c
@@ -542,10 +542,12 @@ int drm_mode_create_lease_ioctl(struct drm_device *dev,
}
DRM_DEBUG_LEASE("Creating lease\n");
+ /* lessee will take the ownership of leases */
lessee = drm_lease_create(lessor, &leases);
if (IS_ERR(lessee)) {
ret = PTR_ERR(lessee);
+ idr_destroy(&leases);
goto out_leases;
}
@@ -580,7 +582,6 @@ out_lessee:
out_leases:
put_unused_fd(fd);
- idr_destroy(&leases);
DRM_DEBUG_LEASE("drm_mode_create_lease_ioctl failed: %d\n", ret);
return ret;
diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
index 86d9b0e45c8c..1de2cde2277c 100644
--- a/drivers/gpu/drm/drm_prime.c
+++ b/drivers/gpu/drm/drm_prime.c
@@ -967,7 +967,7 @@ int drm_prime_sg_to_page_addr_arrays(struct sg_table *sgt, struct page **pages,
index = 0;
for_each_sg(sgt->sgl, sg, sgt->nents, count) {
- len = sg->length;
+ len = sg_dma_len(sg);
page = sg_page(sg);
addr = sg_dma_address(sg);
diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
index 8428ae12dfa5..1f79bc2a881e 100644
--- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
@@ -55,6 +55,7 @@ static const char * const decon_clks_name[] = {
struct decon_context {
struct device *dev;
struct drm_device *drm_dev;
+ void *dma_priv;
struct exynos_drm_crtc *crtc;
struct exynos_drm_plane planes[WINDOWS_NR];
struct exynos_drm_plane_config configs[WINDOWS_NR];
@@ -644,7 +645,7 @@ static int decon_bind(struct device *dev, struct device *master, void *data)
decon_clear_channels(ctx->crtc);
- return exynos_drm_register_dma(drm_dev, dev);
+ return exynos_drm_register_dma(drm_dev, dev, &ctx->dma_priv);
}
static void decon_unbind(struct device *dev, struct device *master, void *data)
@@ -654,7 +655,7 @@ static void decon_unbind(struct device *dev, struct device *master, void *data)
decon_atomic_disable(ctx->crtc);
/* detach this sub driver from iommu mapping if supported. */
- exynos_drm_unregister_dma(ctx->drm_dev, ctx->dev);
+ exynos_drm_unregister_dma(ctx->drm_dev, ctx->dev, &ctx->dma_priv);
}
static const struct component_ops decon_component_ops = {
diff --git a/drivers/gpu/drm/exynos/exynos7_drm_decon.c b/drivers/gpu/drm/exynos/exynos7_drm_decon.c
index ff59c641fa80..1eed3327999f 100644
--- a/drivers/gpu/drm/exynos/exynos7_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos7_drm_decon.c
@@ -40,6 +40,7 @@
struct decon_context {
struct device *dev;
struct drm_device *drm_dev;
+ void *dma_priv;
struct exynos_drm_crtc *crtc;
struct exynos_drm_plane planes[WINDOWS_NR];
struct exynos_drm_plane_config configs[WINDOWS_NR];
@@ -127,13 +128,13 @@ static int decon_ctx_initialize(struct decon_context *ctx,
decon_clear_channels(ctx->crtc);
- return exynos_drm_register_dma(drm_dev, ctx->dev);
+ return exynos_drm_register_dma(drm_dev, ctx->dev, &ctx->dma_priv);
}
static void decon_ctx_remove(struct decon_context *ctx)
{
/* detach this sub driver from iommu mapping if supported. */
- exynos_drm_unregister_dma(ctx->drm_dev, ctx->dev);
+ exynos_drm_unregister_dma(ctx->drm_dev, ctx->dev, &ctx->dma_priv);
}
static u32 decon_calc_clkdiv(struct decon_context *ctx,
diff --git a/drivers/gpu/drm/exynos/exynos_drm_dma.c b/drivers/gpu/drm/exynos/exynos_drm_dma.c
index 9ebc02768847..619f81435c1b 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_dma.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_dma.c
@@ -58,7 +58,7 @@ static inline void clear_dma_max_seg_size(struct device *dev)
* mapping.
*/
static int drm_iommu_attach_device(struct drm_device *drm_dev,
- struct device *subdrv_dev)
+ struct device *subdrv_dev, void **dma_priv)
{
struct exynos_drm_private *priv = drm_dev->dev_private;
int ret;
@@ -74,7 +74,14 @@ static int drm_iommu_attach_device(struct drm_device *drm_dev,
return ret;
if (IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)) {
- if (to_dma_iommu_mapping(subdrv_dev))
+ /*
+ * Keep the original DMA mapping of the sub-device and
+ * restore it on Exynos DRM detach, otherwise the DMA
+ * framework considers it as IOMMU-less during the next
+ * probe (in case of deferred probe or modular build)
+ */
+ *dma_priv = to_dma_iommu_mapping(subdrv_dev);
+ if (*dma_priv)
arm_iommu_detach_device(subdrv_dev);
ret = arm_iommu_attach_device(subdrv_dev, priv->mapping);
@@ -98,19 +105,21 @@ static int drm_iommu_attach_device(struct drm_device *drm_dev,
* mapping
*/
static void drm_iommu_detach_device(struct drm_device *drm_dev,
- struct device *subdrv_dev)
+ struct device *subdrv_dev, void **dma_priv)
{
struct exynos_drm_private *priv = drm_dev->dev_private;
- if (IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU))
+ if (IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)) {
arm_iommu_detach_device(subdrv_dev);
- else if (IS_ENABLED(CONFIG_IOMMU_DMA))
+ arm_iommu_attach_device(subdrv_dev, *dma_priv);
+ } else if (IS_ENABLED(CONFIG_IOMMU_DMA))
iommu_detach_device(priv->mapping, subdrv_dev);
clear_dma_max_seg_size(subdrv_dev);
}
-int exynos_drm_register_dma(struct drm_device *drm, struct device *dev)
+int exynos_drm_register_dma(struct drm_device *drm, struct device *dev,
+ void **dma_priv)
{
struct exynos_drm_private *priv = drm->dev_private;
@@ -137,13 +146,14 @@ int exynos_drm_register_dma(struct drm_device *drm, struct device *dev)
priv->mapping = mapping;
}
- return drm_iommu_attach_device(drm, dev);
+ return drm_iommu_attach_device(drm, dev, dma_priv);
}
-void exynos_drm_unregister_dma(struct drm_device *drm, struct device *dev)
+void exynos_drm_unregister_dma(struct drm_device *drm, struct device *dev,
+ void **dma_priv)
{
if (IS_ENABLED(CONFIG_EXYNOS_IOMMU))
- drm_iommu_detach_device(drm, dev);
+ drm_iommu_detach_device(drm, dev, dma_priv);
}
void exynos_drm_cleanup_dma(struct drm_device *drm)
diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.h b/drivers/gpu/drm/exynos/exynos_drm_drv.h
index d4d21d8cfb90..6ae9056e7a18 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.h
@@ -223,8 +223,10 @@ static inline bool is_drm_iommu_supported(struct drm_device *drm_dev)
return priv->mapping ? true : false;
}
-int exynos_drm_register_dma(struct drm_device *drm, struct device *dev);
-void exynos_drm_unregister_dma(struct drm_device *drm, struct device *dev);
+int exynos_drm_register_dma(struct drm_device *drm, struct device *dev,
+ void **dma_priv);
+void exynos_drm_unregister_dma(struct drm_device *drm, struct device *dev,
+ void **dma_priv);
void exynos_drm_cleanup_dma(struct drm_device *drm);
#ifdef CONFIG_DRM_EXYNOS_DPI
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimc.c b/drivers/gpu/drm/exynos/exynos_drm_fimc.c
index 8ea2e1d77802..29ab8be8604c 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fimc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimc.c
@@ -97,6 +97,7 @@ struct fimc_scaler {
struct fimc_context {
struct exynos_drm_ipp ipp;
struct drm_device *drm_dev;
+ void *dma_priv;
struct device *dev;
struct exynos_drm_ipp_task *task;
struct exynos_drm_ipp_formats *formats;
@@ -1133,7 +1134,7 @@ static int fimc_bind(struct device *dev, struct device *master, void *data)
ctx->drm_dev = drm_dev;
ipp->drm_dev = drm_dev;
- exynos_drm_register_dma(drm_dev, dev);
+ exynos_drm_register_dma(drm_dev, dev, &ctx->dma_priv);
exynos_drm_ipp_register(dev, ipp, &ipp_funcs,
DRM_EXYNOS_IPP_CAP_CROP | DRM_EXYNOS_IPP_CAP_ROTATE |
@@ -1153,7 +1154,7 @@ static void fimc_unbind(struct device *dev, struct device *master,
struct exynos_drm_ipp *ipp = &ctx->ipp;
exynos_drm_ipp_unregister(dev, ipp);
- exynos_drm_unregister_dma(drm_dev, dev);
+ exynos_drm_unregister_dma(drm_dev, dev, &ctx->dma_priv);
}
static const struct component_ops fimc_component_ops = {
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
index 21aec38702fc..bb67cad8371f 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
@@ -167,6 +167,7 @@ static struct fimd_driver_data exynos5420_fimd_driver_data = {
struct fimd_context {
struct device *dev;
struct drm_device *drm_dev;
+ void *dma_priv;
struct exynos_drm_crtc *crtc;
struct exynos_drm_plane planes[WINDOWS_NR];
struct exynos_drm_plane_config configs[WINDOWS_NR];
@@ -1090,7 +1091,7 @@ static int fimd_bind(struct device *dev, struct device *master, void *data)
if (is_drm_iommu_supported(drm_dev))
fimd_clear_channels(ctx->crtc);
- return exynos_drm_register_dma(drm_dev, dev);
+ return exynos_drm_register_dma(drm_dev, dev, &ctx->dma_priv);
}
static void fimd_unbind(struct device *dev, struct device *master,
@@ -1100,7 +1101,7 @@ static void fimd_unbind(struct device *dev, struct device *master,
fimd_atomic_disable(ctx->crtc);
- exynos_drm_unregister_dma(ctx->drm_dev, ctx->dev);
+ exynos_drm_unregister_dma(ctx->drm_dev, ctx->dev, &ctx->dma_priv);
if (ctx->encoder)
exynos_dpi_remove(ctx->encoder);
diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
index 2a3382d43bc9..fcee33a43aca 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
@@ -232,6 +232,7 @@ struct g2d_runqueue_node {
struct g2d_data {
struct device *dev;
+ void *dma_priv;
struct clk *gate_clk;
void __iomem *regs;
int irq;
@@ -1409,7 +1410,7 @@ static int g2d_bind(struct device *dev, struct device *master, void *data)
return ret;
}
- ret = exynos_drm_register_dma(drm_dev, dev);
+ ret = exynos_drm_register_dma(drm_dev, dev, &g2d->dma_priv);
if (ret < 0) {
dev_err(dev, "failed to enable iommu.\n");
g2d_fini_cmdlist(g2d);
@@ -1434,7 +1435,7 @@ static void g2d_unbind(struct device *dev, struct device *master, void *data)
priv->g2d_dev = NULL;
cancel_work_sync(&g2d->runqueue_work);
- exynos_drm_unregister_dma(g2d->drm_dev, dev);
+ exynos_drm_unregister_dma(g2d->drm_dev, dev, &g2d->dma_priv);
}
static const struct component_ops g2d_component_ops = {
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.c b/drivers/gpu/drm/exynos/exynos_drm_gsc.c
index 88b6fcaa20be..45e9aee8366a 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gsc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.c
@@ -97,6 +97,7 @@ struct gsc_scaler {
struct gsc_context {
struct exynos_drm_ipp ipp;
struct drm_device *drm_dev;
+ void *dma_priv;
struct device *dev;
struct exynos_drm_ipp_task *task;
struct exynos_drm_ipp_formats *formats;
@@ -1169,7 +1170,7 @@ static int gsc_bind(struct device *dev, struct device *master, void *data)
ctx->drm_dev = drm_dev;
ctx->drm_dev = drm_dev;
- exynos_drm_register_dma(drm_dev, dev);
+ exynos_drm_register_dma(drm_dev, dev, &ctx->dma_priv);
exynos_drm_ipp_register(dev, ipp, &ipp_funcs,
DRM_EXYNOS_IPP_CAP_CROP | DRM_EXYNOS_IPP_CAP_ROTATE |
@@ -1189,7 +1190,7 @@ static void gsc_unbind(struct device *dev, struct device *master,
struct exynos_drm_ipp *ipp = &ctx->ipp;
exynos_drm_ipp_unregister(dev, ipp);
- exynos_drm_unregister_dma(drm_dev, dev);
+ exynos_drm_unregister_dma(drm_dev, dev, &ctx->dma_priv);
}
static const struct component_ops gsc_component_ops = {
diff --git a/drivers/gpu/drm/exynos/exynos_drm_rotator.c b/drivers/gpu/drm/exynos/exynos_drm_rotator.c
index b98482990d1a..dafa87b82052 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_rotator.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_rotator.c
@@ -56,6 +56,7 @@ struct rot_variant {
struct rot_context {
struct exynos_drm_ipp ipp;
struct drm_device *drm_dev;
+ void *dma_priv;
struct device *dev;
void __iomem *regs;
struct clk *clock;
@@ -243,7 +244,7 @@ static int rotator_bind(struct device *dev, struct device *master, void *data)
rot->drm_dev = drm_dev;
ipp->drm_dev = drm_dev;
- exynos_drm_register_dma(drm_dev, dev);
+ exynos_drm_register_dma(drm_dev, dev, &rot->dma_priv);
exynos_drm_ipp_register(dev, ipp, &ipp_funcs,
DRM_EXYNOS_IPP_CAP_CROP | DRM_EXYNOS_IPP_CAP_ROTATE,
@@ -261,7 +262,7 @@ static void rotator_unbind(struct device *dev, struct device *master,
struct exynos_drm_ipp *ipp = &rot->ipp;
exynos_drm_ipp_unregister(dev, ipp);
- exynos_drm_unregister_dma(rot->drm_dev, rot->dev);
+ exynos_drm_unregister_dma(rot->drm_dev, rot->dev, &rot->dma_priv);
}
static const struct component_ops rotator_component_ops = {
diff --git a/drivers/gpu/drm/exynos/exynos_drm_scaler.c b/drivers/gpu/drm/exynos/exynos_drm_scaler.c
index 497973e9b2c5..93c43c8d914e 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_scaler.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_scaler.c
@@ -39,6 +39,7 @@ struct scaler_data {
struct scaler_context {
struct exynos_drm_ipp ipp;
struct drm_device *drm_dev;
+ void *dma_priv;
struct device *dev;
void __iomem *regs;
struct clk *clock[SCALER_MAX_CLK];
@@ -450,7 +451,7 @@ static int scaler_bind(struct device *dev, struct device *master, void *data)
scaler->drm_dev = drm_dev;
ipp->drm_dev = drm_dev;
- exynos_drm_register_dma(drm_dev, dev);
+ exynos_drm_register_dma(drm_dev, dev, &scaler->dma_priv);
exynos_drm_ipp_register(dev, ipp, &ipp_funcs,
DRM_EXYNOS_IPP_CAP_CROP | DRM_EXYNOS_IPP_CAP_ROTATE |
@@ -470,7 +471,8 @@ static void scaler_unbind(struct device *dev, struct device *master,
struct exynos_drm_ipp *ipp = &scaler->ipp;
exynos_drm_ipp_unregister(dev, ipp);
- exynos_drm_unregister_dma(scaler->drm_dev, scaler->dev);
+ exynos_drm_unregister_dma(scaler->drm_dev, scaler->dev,
+ &scaler->dma_priv);
}
static const struct component_ops scaler_component_ops = {
diff --git a/drivers/gpu/drm/exynos/exynos_mixer.c b/drivers/gpu/drm/exynos/exynos_mixer.c
index 38ae9c32feef..21b726baedea 100644
--- a/drivers/gpu/drm/exynos/exynos_mixer.c
+++ b/drivers/gpu/drm/exynos/exynos_mixer.c
@@ -94,6 +94,7 @@ struct mixer_context {
struct platform_device *pdev;
struct device *dev;
struct drm_device *drm_dev;
+ void *dma_priv;
struct exynos_drm_crtc *crtc;
struct exynos_drm_plane planes[MIXER_WIN_NR];
unsigned long flags;
@@ -894,12 +895,14 @@ static int mixer_initialize(struct mixer_context *mixer_ctx,
}
}
- return exynos_drm_register_dma(drm_dev, mixer_ctx->dev);
+ return exynos_drm_register_dma(drm_dev, mixer_ctx->dev,
+ &mixer_ctx->dma_priv);
}
static void mixer_ctx_remove(struct mixer_context *mixer_ctx)
{
- exynos_drm_unregister_dma(mixer_ctx->drm_dev, mixer_ctx->dev);
+ exynos_drm_unregister_dma(mixer_ctx->drm_dev, mixer_ctx->dev,
+ &mixer_ctx->dma_priv);
}
static int mixer_enable_vblank(struct exynos_drm_crtc *crtc)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 60c984e10c4a..7643a30ba4cd 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -423,7 +423,8 @@ eb_validate_vma(struct i915_execbuffer *eb,
if (unlikely(entry->flags & eb->invalid_flags))
return -EINVAL;
- if (unlikely(entry->alignment && !is_power_of_2(entry->alignment)))
+ if (unlikely(entry->alignment &&
+ !is_power_of_2_u64(entry->alignment)))
return -EINVAL;
/*
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index fe8a59aaa629..31455eceeb0c 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1600,17 +1600,6 @@ static void virtual_xfer_breadcrumbs(struct virtual_engine *ve,
spin_unlock(&old->breadcrumbs.irq_lock);
}
-static struct i915_request *
-last_active(const struct intel_engine_execlists *execlists)
-{
- struct i915_request * const *last = READ_ONCE(execlists->active);
-
- while (*last && i915_request_completed(*last))
- last++;
-
- return *last;
-}
-
#define for_each_waiter(p__, rq__) \
list_for_each_entry_lockless(p__, \
&(rq__)->sched.waiters_list, \
@@ -1679,11 +1668,9 @@ need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq)
if (!intel_engine_has_timeslices(engine))
return false;
- if (list_is_last(&rq->sched.link, &engine->active.requests))
- return false;
-
- hint = max(rq_prio(list_next_entry(rq, sched.link)),
- engine->execlists.queue_priority_hint);
+ hint = engine->execlists.queue_priority_hint;
+ if (!list_is_last(&rq->sched.link, &engine->active.requests))
+ hint = max(hint, rq_prio(list_next_entry(rq, sched.link)));
return hint >= effective_prio(rq);
}
@@ -1725,16 +1712,26 @@ static void set_timeslice(struct intel_engine_cs *engine)
set_timer_ms(&engine->execlists.timer, active_timeslice(engine));
}
+static void start_timeslice(struct intel_engine_cs *engine)
+{
+ struct intel_engine_execlists *execlists = &engine->execlists;
+
+ execlists->switch_priority_hint = execlists->queue_priority_hint;
+
+ if (timer_pending(&execlists->timer))
+ return;
+
+ set_timer_ms(&execlists->timer, timeslice(engine));
+}
+
static void record_preemption(struct intel_engine_execlists *execlists)
{
(void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++);
}
-static unsigned long active_preempt_timeout(struct intel_engine_cs *engine)
+static unsigned long active_preempt_timeout(struct intel_engine_cs *engine,
+ const struct i915_request *rq)
{
- struct i915_request *rq;
-
- rq = last_active(&engine->execlists);
if (!rq)
return 0;
@@ -1745,13 +1742,14 @@ static unsigned long active_preempt_timeout(struct intel_engine_cs *engine)
return READ_ONCE(engine->props.preempt_timeout_ms);
}
-static void set_preempt_timeout(struct intel_engine_cs *engine)
+static void set_preempt_timeout(struct intel_engine_cs *engine,
+ const struct i915_request *rq)
{
if (!intel_engine_has_preempt_reset(engine))
return;
set_timer_ms(&engine->execlists.preempt,
- active_preempt_timeout(engine));
+ active_preempt_timeout(engine, rq));
}
static inline void clear_ports(struct i915_request **ports, int count)
@@ -1764,6 +1762,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
struct intel_engine_execlists * const execlists = &engine->execlists;
struct i915_request **port = execlists->pending;
struct i915_request ** const last_port = port + execlists->port_mask;
+ struct i915_request * const *active;
struct i915_request *last;
struct rb_node *rb;
bool submit = false;
@@ -1818,7 +1817,10 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
* i.e. we will retrigger preemption following the ack in case
* of trouble.
*/
- last = last_active(execlists);
+ active = READ_ONCE(execlists->active);
+ while ((last = *active) && i915_request_completed(last))
+ active++;
+
if (last) {
if (need_preempt(engine, last, rb)) {
ENGINE_TRACE(engine,
@@ -1888,11 +1890,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
* Even if ELSP[1] is occupied and not worthy
* of timeslices, our queue might be.
*/
- if (!execlists->timer.expires &&
- need_timeslice(engine, last))
- set_timer_ms(&execlists->timer,
- timeslice(engine));
-
+ start_timeslice(engine);
return;
}
}
@@ -1927,7 +1925,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
if (last && !can_merge_rq(last, rq)) {
spin_unlock(&ve->base.active.lock);
- return; /* leave this for another */
+ start_timeslice(engine);
+ return; /* leave this for another sibling */
}
ENGINE_TRACE(engine,
@@ -2103,7 +2102,7 @@ done:
* Skip if we ended up with exactly the same set of requests,
* e.g. trying to timeslice a pair of ordered contexts
*/
- if (!memcmp(execlists->active, execlists->pending,
+ if (!memcmp(active, execlists->pending,
(port - execlists->pending + 1) * sizeof(*port))) {
do
execlists_schedule_out(fetch_and_zero(port));
@@ -2114,7 +2113,7 @@ done:
clear_ports(port + 1, last_port - port);
execlists_submit_ports(engine);
- set_preempt_timeout(engine);
+ set_preempt_timeout(engine, *active);
} else {
skip_submit:
ring_set_paused(engine, 0);
@@ -4001,26 +4000,6 @@ static int gen12_emit_flush_render(struct i915_request *request,
*cs++ = preparser_disable(false);
intel_ring_advance(request, cs);
-
- /*
- * Wa_1604544889:tgl
- */
- if (IS_TGL_REVID(request->i915, TGL_REVID_A0, TGL_REVID_A0)) {
- flags = 0;
- flags |= PIPE_CONTROL_CS_STALL;
- flags |= PIPE_CONTROL_HDC_PIPELINE_FLUSH;
-
- flags |= PIPE_CONTROL_STORE_DATA_INDEX;
- flags |= PIPE_CONTROL_QW_WRITE;
-
- cs = intel_ring_begin(request, 6);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- cs = gen8_emit_pipe_control(cs, flags,
- LRC_PPHWSP_SCRATCH_ADDR);
- intel_ring_advance(request, cs);
- }
}
return 0;
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index 87716529cd2f..d8d9f1179c2b 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -192,11 +192,15 @@ static void cacheline_release(struct intel_timeline_cacheline *cl)
static void cacheline_free(struct intel_timeline_cacheline *cl)
{
+ if (!i915_active_acquire_if_busy(&cl->active)) {
+ __idle_cacheline_free(cl);
+ return;
+ }
+
GEM_BUG_ON(ptr_test_bit(cl->vaddr, CACHELINE_FREE));
cl->vaddr = ptr_set_bit(cl->vaddr, CACHELINE_FREE);
- if (i915_active_is_idle(&cl->active))
- __idle_cacheline_free(cl);
+ i915_active_release(&cl->active);
}
int intel_timeline_init(struct intel_timeline *timeline,
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 173a7f2d109f..6c2f8462e0f3 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -1529,15 +1529,34 @@ err_obj:
return ERR_PTR(err);
}
+static const struct {
+ u32 start;
+ u32 end;
+} mcr_ranges_gen8[] = {
+ { .start = 0x5500, .end = 0x55ff },
+ { .start = 0x7000, .end = 0x7fff },
+ { .start = 0x9400, .end = 0x97ff },
+ { .start = 0xb000, .end = 0xb3ff },
+ { .start = 0xe000, .end = 0xe7ff },
+ {},
+};
+
static bool mcr_range(struct drm_i915_private *i915, u32 offset)
{
+ int i;
+
+ if (INTEL_GEN(i915) < 8)
+ return false;
+
/*
- * Registers in this range are affected by the MCR selector
+ * Registers in these ranges are affected by the MCR selector
* which only controls CPU initiated MMIO. Routing does not
* work for CS access so we cannot verify them on this path.
*/
- if (INTEL_GEN(i915) >= 8 && (offset >= 0xb000 && offset <= 0xb4ff))
- return true;
+ for (i = 0; mcr_ranges_gen8[i].start; i++)
+ if (offset >= mcr_ranges_gen8[i].start &&
+ offset <= mcr_ranges_gen8[i].end)
+ return true;
return false;
}
diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c
index e1c313da6c00..a62bdf9be682 100644
--- a/drivers/gpu/drm/i915/gvt/display.c
+++ b/drivers/gpu/drm/i915/gvt/display.c
@@ -457,7 +457,8 @@ void intel_vgpu_emulate_hotplug(struct intel_vgpu *vgpu, bool connected)
struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
/* TODO: add more platforms support */
- if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) {
+ if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv) ||
+ IS_COFFEELAKE(dev_priv)) {
if (connected) {
vgpu_vreg_t(vgpu, SFUSE_STRAP) |=
SFUSE_STRAP_DDID_DETECTED;
diff --git a/drivers/gpu/drm/i915/gvt/opregion.c b/drivers/gpu/drm/i915/gvt/opregion.c
index 867e7629025b..33569b910ed5 100644
--- a/drivers/gpu/drm/i915/gvt/opregion.c
+++ b/drivers/gpu/drm/i915/gvt/opregion.c
@@ -147,15 +147,14 @@ static void virt_vbt_generation(struct vbt *v)
/* there's features depending on version! */
v->header.version = 155;
v->header.header_size = sizeof(v->header);
- v->header.vbt_size = sizeof(struct vbt) - sizeof(v->header);
+ v->header.vbt_size = sizeof(struct vbt);
v->header.bdb_offset = offsetof(struct vbt, bdb_header);
strcpy(&v->bdb_header.signature[0], "BIOS_DATA_BLOCK");
v->bdb_header.version = 186; /* child_dev_size = 33 */
v->bdb_header.header_size = sizeof(v->bdb_header);
- v->bdb_header.bdb_size = sizeof(struct vbt) - sizeof(struct vbt_header)
- - sizeof(struct bdb_header);
+ v->bdb_header.bdb_size = sizeof(struct vbt) - sizeof(struct vbt_header);
/* general features */
v->general_features_header.id = BDB_GENERAL_FEATURES;
diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c
index 487af6ea9972..345c2aa3b491 100644
--- a/drivers/gpu/drm/i915/gvt/vgpu.c
+++ b/drivers/gpu/drm/i915/gvt/vgpu.c
@@ -272,10 +272,17 @@ void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu)
{
struct intel_gvt *gvt = vgpu->gvt;
- mutex_lock(&vgpu->vgpu_lock);
-
WARN(vgpu->active, "vGPU is still active!\n");
+ /*
+ * remove idr first so later clean can judge if need to stop
+ * service if no active vgpu.
+ */
+ mutex_lock(&gvt->lock);
+ idr_remove(&gvt->vgpu_idr, vgpu->id);
+ mutex_unlock(&gvt->lock);
+
+ mutex_lock(&vgpu->vgpu_lock);
intel_gvt_debugfs_remove_vgpu(vgpu);
intel_vgpu_clean_sched_policy(vgpu);
intel_vgpu_clean_submission(vgpu);
@@ -290,7 +297,6 @@ void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu)
mutex_unlock(&vgpu->vgpu_lock);
mutex_lock(&gvt->lock);
- idr_remove(&gvt->vgpu_idr, vgpu->id);
if (idr_is_empty(&gvt->vgpu_idr))
intel_gvt_clean_irq(gvt);
intel_gvt_update_vgpu_types(gvt);
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index dcaa85a91090..a18b2a244706 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -527,19 +527,31 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
return NOTIFY_DONE;
}
+static void irq_semaphore_cb(struct irq_work *wrk)
+{
+ struct i915_request *rq =
+ container_of(wrk, typeof(*rq), semaphore_work);
+
+ i915_schedule_bump_priority(rq, I915_PRIORITY_NOSEMAPHORE);
+ i915_request_put(rq);
+}
+
static int __i915_sw_fence_call
semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
{
- struct i915_request *request =
- container_of(fence, typeof(*request), semaphore);
+ struct i915_request *rq = container_of(fence, typeof(*rq), semaphore);
switch (state) {
case FENCE_COMPLETE:
- i915_schedule_bump_priority(request, I915_PRIORITY_NOSEMAPHORE);
+ if (!(READ_ONCE(rq->sched.attr.priority) & I915_PRIORITY_NOSEMAPHORE)) {
+ i915_request_get(rq);
+ init_irq_work(&rq->semaphore_work, irq_semaphore_cb);
+ irq_work_queue(&rq->semaphore_work);
+ }
break;
case FENCE_FREE:
- i915_request_put(request);
+ i915_request_put(rq);
break;
}
@@ -776,8 +788,8 @@ i915_request_await_start(struct i915_request *rq, struct i915_request *signal)
struct dma_fence *fence;
int err;
- GEM_BUG_ON(i915_request_timeline(rq) ==
- rcu_access_pointer(signal->timeline));
+ if (i915_request_timeline(rq) == rcu_access_pointer(signal->timeline))
+ return 0;
if (i915_request_started(signal))
return 0;
@@ -821,7 +833,7 @@ i915_request_await_start(struct i915_request *rq, struct i915_request *signal)
return 0;
err = 0;
- if (intel_timeline_sync_is_later(i915_request_timeline(rq), fence))
+ if (!intel_timeline_sync_is_later(i915_request_timeline(rq), fence))
err = i915_sw_fence_await_dma_fence(&rq->submit,
fence, 0,
I915_FENCE_GFP);
@@ -1318,9 +1330,9 @@ void __i915_request_queue(struct i915_request *rq,
* decide whether to preempt the entire chain so that it is ready to
* run at the earliest possible convenience.
*/
- i915_sw_fence_commit(&rq->semaphore);
if (attr && rq->engine->schedule)
rq->engine->schedule(rq, attr);
+ i915_sw_fence_commit(&rq->semaphore);
i915_sw_fence_commit(&rq->submit);
}
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index f57eadcf3583..fccc339949ec 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -26,6 +26,7 @@
#define I915_REQUEST_H
#include <linux/dma-fence.h>
+#include <linux/irq_work.h>
#include <linux/lockdep.h>
#include "gem/i915_gem_context_types.h"
@@ -208,6 +209,7 @@ struct i915_request {
};
struct list_head execute_cb;
struct i915_sw_fence semaphore;
+ struct irq_work semaphore_work;
/*
* A list of everyone we wait upon, and everyone who waits upon us.
diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
index b0ade76bec90..d34141f7dcd8 100644
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -234,6 +234,11 @@ static inline u64 ptr_to_u64(const void *ptr)
__idx; \
})
+static inline bool is_power_of_2_u64(u64 n)
+{
+ return (n != 0 && ((n & (n - 1)) == 0));
+}
+
static inline void __list_del_many(struct list_head *head,
struct list_head *first)
{
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 3b92311d30b9..b3380ffab4c2 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -528,7 +528,7 @@ static int radeon_ttm_tt_pin_userptr(struct ttm_tt *ttm)
r = -ENOMEM;
nents = dma_map_sg(rdev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
- if (nents != ttm->sg->nents)
+ if (nents == 0)
goto release_sg;
drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index 71ce6215956f..60c4c6a1aac6 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -661,7 +661,9 @@ static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb)
trace_drm_sched_process_job(s_fence);
+ dma_fence_get(&s_fence->finished);
drm_sched_fence_finished(s_fence);
+ dma_fence_put(&s_fence->finished);
wake_up_interruptible(&sched->wake_up_worker);
}
diff --git a/drivers/hid/hid-google-hammer.c b/drivers/hid/hid-google-hammer.c
index 2aa4ed157aec..85a054f1ce38 100644
--- a/drivers/hid/hid-google-hammer.c
+++ b/drivers/hid/hid-google-hammer.c
@@ -533,6 +533,8 @@ static const struct hid_device_id hammer_devices[] = {
{ HID_DEVICE(BUS_USB, HID_GROUP_GENERIC,
USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_MASTERBALL) },
{ HID_DEVICE(BUS_USB, HID_GROUP_GENERIC,
+ USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_MOONBALL) },
+ { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC,
USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_STAFF) },
{ HID_DEVICE(BUS_USB, HID_GROUP_GENERIC,
USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_WAND) },
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 3a400ce603c4..9f2213426556 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -478,6 +478,7 @@
#define USB_DEVICE_ID_GOOGLE_WHISKERS 0x5030
#define USB_DEVICE_ID_GOOGLE_MASTERBALL 0x503c
#define USB_DEVICE_ID_GOOGLE_MAGNEMITE 0x503d
+#define USB_DEVICE_ID_GOOGLE_MOONBALL 0x5044
#define USB_VENDOR_ID_GOTOP 0x08f2
#define USB_DEVICE_ID_SUPER_Q2 0x007f
@@ -726,6 +727,7 @@
#define USB_DEVICE_ID_LENOVO_X1_COVER 0x6085
#define USB_DEVICE_ID_LENOVO_X1_TAB 0x60a3
#define USB_DEVICE_ID_LENOVO_X1_TAB3 0x60b5
+#define USB_DEVICE_ID_LENOVO_PIXART_USB_MOUSE_608D 0x608d
#define USB_VENDOR_ID_LG 0x1fd2
#define USB_DEVICE_ID_LG_MULTITOUCH 0x0064
diff --git a/drivers/hid/hid-picolcd_fb.c b/drivers/hid/hid-picolcd_fb.c
index a549c42e8c90..33c102a60992 100644
--- a/drivers/hid/hid-picolcd_fb.c
+++ b/drivers/hid/hid-picolcd_fb.c
@@ -458,9 +458,9 @@ static ssize_t picolcd_fb_update_rate_show(struct device *dev,
if (ret >= PAGE_SIZE)
break;
else if (i == fb_update_rate)
- ret += snprintf(buf+ret, PAGE_SIZE-ret, "[%u] ", i);
+ ret += scnprintf(buf+ret, PAGE_SIZE-ret, "[%u] ", i);
else
- ret += snprintf(buf+ret, PAGE_SIZE-ret, "%u ", i);
+ ret += scnprintf(buf+ret, PAGE_SIZE-ret, "%u ", i);
if (ret > 0)
buf[min(ret, (size_t)PAGE_SIZE)-1] = '\n';
return ret;
diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c
index 0e7b2d998395..3735546bb524 100644
--- a/drivers/hid/hid-quirks.c
+++ b/drivers/hid/hid-quirks.c
@@ -103,6 +103,7 @@ static const struct hid_device_id hid_quirks[] = {
{ HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_PENSKETCH_M912), HID_QUIRK_MULTI_INPUT },
{ HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_EASYPEN_M406XE), HID_QUIRK_MULTI_INPUT },
{ HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_PIXART_USB_OPTICAL_MOUSE_ID2), HID_QUIRK_ALWAYS_POLL },
+ { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_PIXART_USB_MOUSE_608D), HID_QUIRK_ALWAYS_POLL },
{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_C007), HID_QUIRK_ALWAYS_POLL },
{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_C077), HID_QUIRK_ALWAYS_POLL },
{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_KEYBOARD_G710_PLUS), HID_QUIRK_NOGET },
diff --git a/drivers/hid/hid-sensor-custom.c b/drivers/hid/hid-sensor-custom.c
index fb827c295842..4d25577a8573 100644
--- a/drivers/hid/hid-sensor-custom.c
+++ b/drivers/hid/hid-sensor-custom.c
@@ -313,7 +313,7 @@ static ssize_t show_value(struct device *dev, struct device_attribute *attr,
while (i < ret) {
if (i + attribute->size > ret) {
- len += snprintf(&buf[len],
+ len += scnprintf(&buf[len],
PAGE_SIZE - len,
"%d ", values[i]);
break;
@@ -336,10 +336,10 @@ static ssize_t show_value(struct device *dev, struct device_attribute *attr,
++i;
break;
}
- len += snprintf(&buf[len], PAGE_SIZE - len,
+ len += scnprintf(&buf[len], PAGE_SIZE - len,
"%lld ", value);
}
- len += snprintf(&buf[len], PAGE_SIZE - len, "\n");
+ len += scnprintf(&buf[len], PAGE_SIZE - len, "\n");
return len;
} else if (input)
diff --git a/drivers/hwtracing/intel_th/msu.c b/drivers/hwtracing/intel_th/msu.c
index 8e48c7458aa3..255f8f41c8ff 100644
--- a/drivers/hwtracing/intel_th/msu.c
+++ b/drivers/hwtracing/intel_th/msu.c
@@ -718,9 +718,6 @@ static int msc_win_set_lockout(struct msc_window *win,
if (old != expect) {
ret = -EINVAL;
- dev_warn_ratelimited(msc_dev(win->msc),
- "expected lockout state %d, got %d\n",
- expect, old);
goto unlock;
}
@@ -741,6 +738,10 @@ unlock:
/* from intel_th_msc_window_unlock(), don't warn if not locked */
if (expect == WIN_LOCKED && old == new)
return 0;
+
+ dev_warn_ratelimited(msc_dev(win->msc),
+ "expected lockout state %d, got %d\n",
+ expect, old);
}
return ret;
@@ -760,7 +761,7 @@ static int msc_configure(struct msc *msc)
lockdep_assert_held(&msc->buf_mutex);
if (msc->mode > MSC_MODE_MULTI)
- return -ENOTSUPP;
+ return -EINVAL;
if (msc->mode == MSC_MODE_MULTI) {
if (msc_win_set_lockout(msc->cur_win, WIN_READY, WIN_INUSE))
@@ -1294,7 +1295,7 @@ static int msc_buffer_alloc(struct msc *msc, unsigned long *nr_pages,
} else if (msc->mode == MSC_MODE_MULTI) {
ret = msc_buffer_multi_alloc(msc, nr_pages, nr_wins);
} else {
- ret = -ENOTSUPP;
+ ret = -EINVAL;
}
if (!ret) {
@@ -1530,7 +1531,7 @@ static ssize_t intel_th_msc_read(struct file *file, char __user *buf,
if (ret >= 0)
*ppos = iter->offset;
} else {
- ret = -ENOTSUPP;
+ ret = -EINVAL;
}
put_count:
diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c
index e9d90b53bbc4..86aa6a46bcba 100644
--- a/drivers/hwtracing/intel_th/pci.c
+++ b/drivers/hwtracing/intel_th/pci.c
@@ -235,6 +235,11 @@ static const struct pci_device_id intel_th_pci_id_table[] = {
.driver_data = (kernel_ulong_t)&intel_th_2x,
},
{
+ /* Elkhart Lake CPU */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x4529),
+ .driver_data = (kernel_ulong_t)&intel_th_2x,
+ },
+ {
/* Elkhart Lake */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x4b26),
.driver_data = (kernel_ulong_t)&intel_th_2x,
diff --git a/drivers/hwtracing/stm/p_sys-t.c b/drivers/hwtracing/stm/p_sys-t.c
index b178a5495b67..360b5c03df95 100644
--- a/drivers/hwtracing/stm/p_sys-t.c
+++ b/drivers/hwtracing/stm/p_sys-t.c
@@ -238,7 +238,7 @@ static struct configfs_attribute *sys_t_policy_attrs[] = {
static inline bool sys_t_need_ts(struct sys_t_output *op)
{
if (op->node.ts_interval &&
- time_after(op->ts_jiffies + op->node.ts_interval, jiffies)) {
+ time_after(jiffies, op->ts_jiffies + op->node.ts_interval)) {
op->ts_jiffies = jiffies;
return true;
@@ -250,8 +250,8 @@ static inline bool sys_t_need_ts(struct sys_t_output *op)
static bool sys_t_need_clock_sync(struct sys_t_output *op)
{
if (op->node.clocksync_interval &&
- time_after(op->clocksync_jiffies + op->node.clocksync_interval,
- jiffies)) {
+ time_after(jiffies,
+ op->clocksync_jiffies + op->node.clocksync_interval)) {
op->clocksync_jiffies = jiffies;
return true;
diff --git a/drivers/i2c/busses/i2c-designware-pcidrv.c b/drivers/i2c/busses/i2c-designware-pcidrv.c
index 050adda7c1bd..05b35ac33ce3 100644
--- a/drivers/i2c/busses/i2c-designware-pcidrv.c
+++ b/drivers/i2c/busses/i2c-designware-pcidrv.c
@@ -313,6 +313,7 @@ static void i2c_dw_pci_remove(struct pci_dev *pdev)
pm_runtime_get_noresume(&pdev->dev);
i2c_del_adapter(&dev->adapter);
+ devm_free_irq(&pdev->dev, dev->irq, dev);
pci_free_irq_vectors(pdev);
}
diff --git a/drivers/i2c/busses/i2c-gpio.c b/drivers/i2c/busses/i2c-gpio.c
index 3a9e840a3546..a4a6825c8758 100644
--- a/drivers/i2c/busses/i2c-gpio.c
+++ b/drivers/i2c/busses/i2c-gpio.c
@@ -348,7 +348,7 @@ static struct gpio_desc *i2c_gpio_get_desc(struct device *dev,
if (ret == -ENOENT)
retdesc = ERR_PTR(-EPROBE_DEFER);
- if (ret != -EPROBE_DEFER)
+ if (PTR_ERR(retdesc) != -EPROBE_DEFER)
dev_err(dev, "error trying to get descriptor: %d\n", ret);
return retdesc;
diff --git a/drivers/i2c/busses/i2c-hix5hd2.c b/drivers/i2c/busses/i2c-hix5hd2.c
index 8497c7a95dd4..224f830f77f9 100644
--- a/drivers/i2c/busses/i2c-hix5hd2.c
+++ b/drivers/i2c/busses/i2c-hix5hd2.c
@@ -477,6 +477,7 @@ static int hix5hd2_i2c_remove(struct platform_device *pdev)
i2c_del_adapter(&priv->adap);
pm_runtime_disable(priv->dev);
pm_runtime_set_suspended(priv->dev);
+ clk_disable_unprepare(priv->clk);
return 0;
}
diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c
index ca4f096fef74..a9c03f5c3482 100644
--- a/drivers/i2c/busses/i2c-i801.c
+++ b/drivers/i2c/busses/i2c-i801.c
@@ -132,11 +132,6 @@
#define TCOBASE 0x050
#define TCOCTL 0x054
-#define ACPIBASE 0x040
-#define ACPIBASE_SMI_OFF 0x030
-#define ACPICTRL 0x044
-#define ACPICTRL_EN 0x080
-
#define SBREG_BAR 0x10
#define SBREG_SMBCTRL 0xc6000c
#define SBREG_SMBCTRL_DNV 0xcf000c
@@ -1553,7 +1548,7 @@ i801_add_tco_spt(struct i801_priv *priv, struct pci_dev *pci_dev,
pci_bus_write_config_byte(pci_dev->bus, devfn, 0xe1, hidden);
spin_unlock(&p2sb_spinlock);
- res = &tco_res[ICH_RES_MEM_OFF];
+ res = &tco_res[1];
if (pci_dev->device == PCI_DEVICE_ID_INTEL_DNV_SMBUS)
res->start = (resource_size_t)base64_addr + SBREG_SMBCTRL_DNV;
else
@@ -1563,7 +1558,7 @@ i801_add_tco_spt(struct i801_priv *priv, struct pci_dev *pci_dev,
res->flags = IORESOURCE_MEM;
return platform_device_register_resndata(&pci_dev->dev, "iTCO_wdt", -1,
- tco_res, 3, &spt_tco_platform_data,
+ tco_res, 2, &spt_tco_platform_data,
sizeof(spt_tco_platform_data));
}
@@ -1576,17 +1571,16 @@ static struct platform_device *
i801_add_tco_cnl(struct i801_priv *priv, struct pci_dev *pci_dev,
struct resource *tco_res)
{
- return platform_device_register_resndata(&pci_dev->dev, "iTCO_wdt", -1,
- tco_res, 2, &cnl_tco_platform_data,
- sizeof(cnl_tco_platform_data));
+ return platform_device_register_resndata(&pci_dev->dev,
+ "iTCO_wdt", -1, tco_res, 1, &cnl_tco_platform_data,
+ sizeof(cnl_tco_platform_data));
}
static void i801_add_tco(struct i801_priv *priv)
{
- u32 base_addr, tco_base, tco_ctl, ctrl_val;
struct pci_dev *pci_dev = priv->pci_dev;
- struct resource tco_res[3], *res;
- unsigned int devfn;
+ struct resource tco_res[2], *res;
+ u32 tco_base, tco_ctl;
/* If we have ACPI based watchdog use that instead */
if (acpi_has_watchdog())
@@ -1601,30 +1595,15 @@ static void i801_add_tco(struct i801_priv *priv)
return;
memset(tco_res, 0, sizeof(tco_res));
-
- res = &tco_res[ICH_RES_IO_TCO];
- res->start = tco_base & ~1;
- res->end = res->start + 32 - 1;
- res->flags = IORESOURCE_IO;
-
/*
- * Power Management registers.
+ * Always populate the main iTCO IO resource here. The second entry
+ * for NO_REBOOT MMIO is filled by the SPT specific function.
*/
- devfn = PCI_DEVFN(PCI_SLOT(pci_dev->devfn), 2);
- pci_bus_read_config_dword(pci_dev->bus, devfn, ACPIBASE, &base_addr);
-
- res = &tco_res[ICH_RES_IO_SMI];
- res->start = (base_addr & ~1) + ACPIBASE_SMI_OFF;
- res->end = res->start + 3;
+ res = &tco_res[0];
+ res->start = tco_base & ~1;
+ res->end = res->start + 32 - 1;
res->flags = IORESOURCE_IO;
- /*
- * Enable the ACPI I/O space.
- */
- pci_bus_read_config_dword(pci_dev->bus, devfn, ACPICTRL, &ctrl_val);
- ctrl_val |= ACPICTRL_EN;
- pci_bus_write_config_dword(pci_dev->bus, devfn, ACPICTRL, ctrl_val);
-
if (priv->features & FEATURE_TCO_CNL)
priv->tco_pdev = i801_add_tco_cnl(priv, pci_dev, tco_res);
else
diff --git a/drivers/i2c/busses/i2c-nvidia-gpu.c b/drivers/i2c/busses/i2c-nvidia-gpu.c
index 62e18b4db0ed..f5d25ce00f03 100644
--- a/drivers/i2c/busses/i2c-nvidia-gpu.c
+++ b/drivers/i2c/busses/i2c-nvidia-gpu.c
@@ -8,6 +8,7 @@
#include <linux/delay.h>
#include <linux/i2c.h>
#include <linux/interrupt.h>
+#include <linux/iopoll.h>
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/platform_device.h>
@@ -75,20 +76,15 @@ static void gpu_enable_i2c_bus(struct gpu_i2c_dev *i2cd)
static int gpu_i2c_check_status(struct gpu_i2c_dev *i2cd)
{
- unsigned long target = jiffies + msecs_to_jiffies(1000);
u32 val;
+ int ret;
- do {
- val = readl(i2cd->regs + I2C_MST_CNTL);
- if (!(val & I2C_MST_CNTL_CYCLE_TRIGGER))
- break;
- if ((val & I2C_MST_CNTL_STATUS) !=
- I2C_MST_CNTL_STATUS_BUS_BUSY)
- break;
- usleep_range(500, 600);
- } while (time_is_after_jiffies(target));
-
- if (time_is_before_jiffies(target)) {
+ ret = readl_poll_timeout(i2cd->regs + I2C_MST_CNTL, val,
+ !(val & I2C_MST_CNTL_CYCLE_TRIGGER) ||
+ (val & I2C_MST_CNTL_STATUS) != I2C_MST_CNTL_STATUS_BUS_BUSY,
+ 500, 1000 * USEC_PER_MSEC);
+
+ if (ret) {
dev_err(i2cd->dev, "i2c timeout error %x\n", val);
return -ETIMEDOUT;
}
diff --git a/drivers/i2c/busses/i2c-pca-platform.c b/drivers/i2c/busses/i2c-pca-platform.c
index a7a81846d5b1..635dd697ac0b 100644
--- a/drivers/i2c/busses/i2c-pca-platform.c
+++ b/drivers/i2c/busses/i2c-pca-platform.c
@@ -140,7 +140,7 @@ static int i2c_pca_pf_probe(struct platform_device *pdev)
int ret = 0;
int irq;
- irq = platform_get_irq(pdev, 0);
+ irq = platform_get_irq_optional(pdev, 0);
/* If irq is 0, we do polling. */
if (irq < 0)
irq = 0;
diff --git a/drivers/i2c/busses/i2c-st.c b/drivers/i2c/busses/i2c-st.c
index 54e1fc8a495e..f7f7b5b64720 100644
--- a/drivers/i2c/busses/i2c-st.c
+++ b/drivers/i2c/busses/i2c-st.c
@@ -434,6 +434,7 @@ static void st_i2c_wr_fill_tx_fifo(struct st_i2c_dev *i2c_dev)
/**
* st_i2c_rd_fill_tx_fifo() - Fill the Tx FIFO in read mode
* @i2c_dev: Controller's private data
+ * @max: Maximum amount of data to fill into the Tx FIFO
*
* This functions fills the Tx FIFO with fixed pattern when
* in read mode to trigger clock.
diff --git a/drivers/i2c/i2c-core-acpi.c b/drivers/i2c/i2c-core-acpi.c
index 8f3dbc97a057..8b0ff780919b 100644
--- a/drivers/i2c/i2c-core-acpi.c
+++ b/drivers/i2c/i2c-core-acpi.c
@@ -394,9 +394,17 @@ EXPORT_SYMBOL_GPL(i2c_acpi_find_adapter_by_handle);
static struct i2c_client *i2c_acpi_find_client_by_adev(struct acpi_device *adev)
{
struct device *dev;
+ struct i2c_client *client;
dev = bus_find_device_by_acpi_dev(&i2c_bus_type, adev);
- return dev ? i2c_verify_client(dev) : NULL;
+ if (!dev)
+ return NULL;
+
+ client = i2c_verify_client(dev);
+ if (!client)
+ put_device(dev);
+
+ return client;
}
static int i2c_acpi_notify(struct notifier_block *nb, unsigned long value,
diff --git a/drivers/i3c/device.c b/drivers/i3c/device.c
index 9e2e1406f85e..bb8e60dff988 100644
--- a/drivers/i3c/device.c
+++ b/drivers/i3c/device.c
@@ -213,40 +213,34 @@ i3c_device_match_id(struct i3c_device *i3cdev,
{
struct i3c_device_info devinfo;
const struct i3c_device_id *id;
+ u16 manuf, part, ext_info;
+ bool rndpid;
i3c_device_get_info(i3cdev, &devinfo);
- /*
- * The lower 32bits of the provisional ID is just filled with a random
- * value, try to match using DCR info.
- */
- if (!I3C_PID_RND_LOWER_32BITS(devinfo.pid)) {
- u16 manuf = I3C_PID_MANUF_ID(devinfo.pid);
- u16 part = I3C_PID_PART_ID(devinfo.pid);
- u16 ext_info = I3C_PID_EXTRA_INFO(devinfo.pid);
-
- /* First try to match by manufacturer/part ID. */
- for (id = id_table; id->match_flags != 0; id++) {
- if ((id->match_flags & I3C_MATCH_MANUF_AND_PART) !=
- I3C_MATCH_MANUF_AND_PART)
- continue;
-
- if (manuf != id->manuf_id || part != id->part_id)
- continue;
-
- if ((id->match_flags & I3C_MATCH_EXTRA_INFO) &&
- ext_info != id->extra_info)
- continue;
-
- return id;
- }
- }
+ manuf = I3C_PID_MANUF_ID(devinfo.pid);
+ part = I3C_PID_PART_ID(devinfo.pid);
+ ext_info = I3C_PID_EXTRA_INFO(devinfo.pid);
+ rndpid = I3C_PID_RND_LOWER_32BITS(devinfo.pid);
- /* Fallback to DCR match. */
for (id = id_table; id->match_flags != 0; id++) {
if ((id->match_flags & I3C_MATCH_DCR) &&
- id->dcr == devinfo.dcr)
- return id;
+ id->dcr != devinfo.dcr)
+ continue;
+
+ if ((id->match_flags & I3C_MATCH_MANUF) &&
+ id->manuf_id != manuf)
+ continue;
+
+ if ((id->match_flags & I3C_MATCH_PART) &&
+ (rndpid || id->part_id != part))
+ continue;
+
+ if ((id->match_flags & I3C_MATCH_EXTRA_INFO) &&
+ (rndpid || id->extra_info != ext_info))
+ continue;
+
+ return id;
}
return NULL;
diff --git a/drivers/i3c/master.c b/drivers/i3c/master.c
index 7f8f896fa0c3..d79cd6d54b3a 100644
--- a/drivers/i3c/master.c
+++ b/drivers/i3c/master.c
@@ -241,12 +241,34 @@ out:
}
static DEVICE_ATTR_RO(hdrcap);
+static ssize_t modalias_show(struct device *dev,
+ struct device_attribute *da, char *buf)
+{
+ struct i3c_device *i3c = dev_to_i3cdev(dev);
+ struct i3c_device_info devinfo;
+ u16 manuf, part, ext;
+
+ i3c_device_get_info(i3c, &devinfo);
+ manuf = I3C_PID_MANUF_ID(devinfo.pid);
+ part = I3C_PID_PART_ID(devinfo.pid);
+ ext = I3C_PID_EXTRA_INFO(devinfo.pid);
+
+ if (I3C_PID_RND_LOWER_32BITS(devinfo.pid))
+ return sprintf(buf, "i3c:dcr%02Xmanuf%04X", devinfo.dcr,
+ manuf);
+
+ return sprintf(buf, "i3c:dcr%02Xmanuf%04Xpart%04Xext%04X",
+ devinfo.dcr, manuf, part, ext);
+}
+static DEVICE_ATTR_RO(modalias);
+
static struct attribute *i3c_device_attrs[] = {
&dev_attr_bcr.attr,
&dev_attr_dcr.attr,
&dev_attr_pid.attr,
&dev_attr_dynamic_address.attr,
&dev_attr_hdrcap.attr,
+ &dev_attr_modalias.attr,
NULL,
};
ATTRIBUTE_GROUPS(i3c_device);
@@ -267,7 +289,7 @@ static int i3c_device_uevent(struct device *dev, struct kobj_uevent_env *env)
devinfo.dcr, manuf);
return add_uevent_var(env,
- "MODALIAS=i3c:dcr%02Xmanuf%04Xpart%04xext%04x",
+ "MODALIAS=i3c:dcr%02Xmanuf%04Xpart%04Xext%04X",
devinfo.dcr, manuf, part, ext);
}
@@ -1953,7 +1975,7 @@ of_i3c_master_add_i2c_boardinfo(struct i3c_master_controller *master,
* DEFSLVS command.
*/
if (boardinfo->base.flags & I2C_CLIENT_TEN) {
- dev_err(&master->dev, "I2C device with 10 bit address not supported.");
+ dev_err(dev, "I2C device with 10 bit address not supported.");
return -ENOTSUPP;
}
@@ -2138,7 +2160,7 @@ static int i3c_master_i2c_adapter_init(struct i3c_master_controller *master)
* correctly even if one or more i2c devices are not registered.
*/
i3c_bus_for_each_i2cdev(&master->bus, i2cdev)
- i2cdev->dev = i2c_new_device(adap, &i2cdev->boardinfo->base);
+ i2cdev->dev = i2c_new_client_device(adap, &i2cdev->boardinfo->base);
return 0;
}
diff --git a/drivers/i3c/master/dw-i3c-master.c b/drivers/i3c/master/dw-i3c-master.c
index bd26c3b9634e..5c5306cd50ec 100644
--- a/drivers/i3c/master/dw-i3c-master.c
+++ b/drivers/i3c/master/dw-i3c-master.c
@@ -221,7 +221,7 @@ struct dw_i3c_xfer {
struct completion comp;
int ret;
unsigned int ncmds;
- struct dw_i3c_cmd cmds[0];
+ struct dw_i3c_cmd cmds[];
};
struct dw_i3c_master {
diff --git a/drivers/i3c/master/i3c-master-cdns.c b/drivers/i3c/master/i3c-master-cdns.c
index 54712793709e..3fee8bd7fe20 100644
--- a/drivers/i3c/master/i3c-master-cdns.c
+++ b/drivers/i3c/master/i3c-master-cdns.c
@@ -388,7 +388,7 @@ struct cdns_i3c_xfer {
struct completion comp;
int ret;
unsigned int ncmds;
- struct cdns_i3c_cmd cmds[0];
+ struct cdns_i3c_cmd cmds[];
};
struct cdns_i3c_data {
diff --git a/drivers/iio/accel/adxl372.c b/drivers/iio/accel/adxl372.c
index 67b8817995c0..60daf04ce188 100644
--- a/drivers/iio/accel/adxl372.c
+++ b/drivers/iio/accel/adxl372.c
@@ -237,6 +237,7 @@ static const struct adxl372_axis_lookup adxl372_axis_lookup_table[] = {
.realbits = 12, \
.storagebits = 16, \
.shift = 4, \
+ .endianness = IIO_BE, \
}, \
}
diff --git a/drivers/iio/accel/st_accel_i2c.c b/drivers/iio/accel/st_accel_i2c.c
index 633955d764cc..849cf74153c4 100644
--- a/drivers/iio/accel/st_accel_i2c.c
+++ b/drivers/iio/accel/st_accel_i2c.c
@@ -110,7 +110,7 @@ MODULE_DEVICE_TABLE(of, st_accel_of_match);
#ifdef CONFIG_ACPI
static const struct acpi_device_id st_accel_acpi_match[] = {
- {"SMO8840", (kernel_ulong_t)LNG2DM_ACCEL_DEV_NAME},
+ {"SMO8840", (kernel_ulong_t)LIS2DH12_ACCEL_DEV_NAME},
{"SMO8A90", (kernel_ulong_t)LNG2DM_ACCEL_DEV_NAME},
{ },
};
diff --git a/drivers/iio/adc/at91-sama5d2_adc.c b/drivers/iio/adc/at91-sama5d2_adc.c
index a5c7771227d5..9d96f7d08b95 100644
--- a/drivers/iio/adc/at91-sama5d2_adc.c
+++ b/drivers/iio/adc/at91-sama5d2_adc.c
@@ -723,6 +723,7 @@ static int at91_adc_configure_trigger(struct iio_trigger *trig, bool state)
for_each_set_bit(bit, indio->active_scan_mask, indio->num_channels) {
struct iio_chan_spec const *chan = at91_adc_chan_get(indio, bit);
+ u32 cor;
if (!chan)
continue;
@@ -732,6 +733,20 @@ static int at91_adc_configure_trigger(struct iio_trigger *trig, bool state)
continue;
if (state) {
+ cor = at91_adc_readl(st, AT91_SAMA5D2_COR);
+
+ if (chan->differential)
+ cor |= (BIT(chan->channel) |
+ BIT(chan->channel2)) <<
+ AT91_SAMA5D2_COR_DIFF_OFFSET;
+ else
+ cor &= ~(BIT(chan->channel) <<
+ AT91_SAMA5D2_COR_DIFF_OFFSET);
+
+ at91_adc_writel(st, AT91_SAMA5D2_COR, cor);
+ }
+
+ if (state) {
at91_adc_writel(st, AT91_SAMA5D2_CHER,
BIT(chan->channel));
/* enable irq only if not using DMA */
diff --git a/drivers/iio/adc/stm32-dfsdm-adc.c b/drivers/iio/adc/stm32-dfsdm-adc.c
index 2aad2cda6943..76a60d93fe23 100644
--- a/drivers/iio/adc/stm32-dfsdm-adc.c
+++ b/drivers/iio/adc/stm32-dfsdm-adc.c
@@ -842,31 +842,6 @@ static inline void stm32_dfsdm_process_data(struct stm32_dfsdm_adc *adc,
}
}
-static irqreturn_t stm32_dfsdm_adc_trigger_handler(int irq, void *p)
-{
- struct iio_poll_func *pf = p;
- struct iio_dev *indio_dev = pf->indio_dev;
- struct stm32_dfsdm_adc *adc = iio_priv(indio_dev);
- int available = stm32_dfsdm_adc_dma_residue(adc);
-
- while (available >= indio_dev->scan_bytes) {
- s32 *buffer = (s32 *)&adc->rx_buf[adc->bufi];
-
- stm32_dfsdm_process_data(adc, buffer);
-
- iio_push_to_buffers_with_timestamp(indio_dev, buffer,
- pf->timestamp);
- available -= indio_dev->scan_bytes;
- adc->bufi += indio_dev->scan_bytes;
- if (adc->bufi >= adc->buf_sz)
- adc->bufi = 0;
- }
-
- iio_trigger_notify_done(indio_dev->trig);
-
- return IRQ_HANDLED;
-}
-
static void stm32_dfsdm_dma_buffer_done(void *data)
{
struct iio_dev *indio_dev = data;
@@ -874,11 +849,6 @@ static void stm32_dfsdm_dma_buffer_done(void *data)
int available = stm32_dfsdm_adc_dma_residue(adc);
size_t old_pos;
- if (indio_dev->currentmode & INDIO_BUFFER_TRIGGERED) {
- iio_trigger_poll_chained(indio_dev->trig);
- return;
- }
-
/*
* FIXME: In Kernel interface does not support cyclic DMA buffer,and
* offers only an interface to push data samples per samples.
@@ -906,7 +876,15 @@ static void stm32_dfsdm_dma_buffer_done(void *data)
adc->bufi = 0;
old_pos = 0;
}
- /* regular iio buffer without trigger */
+ /*
+ * In DMA mode the trigger services of IIO are not used
+ * (e.g. no call to iio_trigger_poll).
+ * Calling irq handler associated to the hardware trigger is not
+ * relevant as the conversions have already been done. Data
+ * transfers are performed directly in DMA callback instead.
+ * This implementation avoids to call trigger irq handler that
+ * may sleep, in an atomic context (DMA irq handler context).
+ */
if (adc->dev_data->type == DFSDM_IIO)
iio_push_to_buffers(indio_dev, buffer);
}
@@ -1536,8 +1514,7 @@ static int stm32_dfsdm_adc_init(struct iio_dev *indio_dev)
}
ret = iio_triggered_buffer_setup(indio_dev,
- &iio_pollfunc_store_time,
- &stm32_dfsdm_adc_trigger_handler,
+ &iio_pollfunc_store_time, NULL,
&stm32_dfsdm_buffer_setup_ops);
if (ret) {
stm32_dfsdm_dma_release(indio_dev);
diff --git a/drivers/iio/chemical/Kconfig b/drivers/iio/chemical/Kconfig
index 0b91de4df8f4..a7e65a59bf42 100644
--- a/drivers/iio/chemical/Kconfig
+++ b/drivers/iio/chemical/Kconfig
@@ -91,6 +91,8 @@ config SPS30
tristate "SPS30 particulate matter sensor"
depends on I2C
select CRC8
+ select IIO_BUFFER
+ select IIO_TRIGGERED_BUFFER
help
Say Y here to build support for the Sensirion SPS30 particulate
matter sensor.
diff --git a/drivers/iio/light/vcnl4000.c b/drivers/iio/light/vcnl4000.c
index b0e241aaefb4..e5b00a6611ac 100644
--- a/drivers/iio/light/vcnl4000.c
+++ b/drivers/iio/light/vcnl4000.c
@@ -167,16 +167,17 @@ static int vcnl4200_init(struct vcnl4000_data *data)
data->vcnl4200_ps.reg = VCNL4200_PS_DATA;
switch (id) {
case VCNL4200_PROD_ID:
- /* Integration time is 50ms, but the experiments */
- /* show 54ms in total. */
- data->vcnl4200_al.sampling_rate = ktime_set(0, 54000 * 1000);
- data->vcnl4200_ps.sampling_rate = ktime_set(0, 4200 * 1000);
+ /* Default wait time is 50ms, add 20% tolerance. */
+ data->vcnl4200_al.sampling_rate = ktime_set(0, 60000 * 1000);
+ /* Default wait time is 4.8ms, add 20% tolerance. */
+ data->vcnl4200_ps.sampling_rate = ktime_set(0, 5760 * 1000);
data->al_scale = 24000;
break;
case VCNL4040_PROD_ID:
- /* Integration time is 80ms, add 10ms. */
- data->vcnl4200_al.sampling_rate = ktime_set(0, 100000 * 1000);
- data->vcnl4200_ps.sampling_rate = ktime_set(0, 100000 * 1000);
+ /* Default wait time is 80ms, add 20% tolerance. */
+ data->vcnl4200_al.sampling_rate = ktime_set(0, 96000 * 1000);
+ /* Default wait time is 5ms, add 20% tolerance. */
+ data->vcnl4200_ps.sampling_rate = ktime_set(0, 6000 * 1000);
data->al_scale = 120000;
break;
}
diff --git a/drivers/iio/magnetometer/ak8974.c b/drivers/iio/magnetometer/ak8974.c
index fc7e910f8e8b..d32996702110 100644
--- a/drivers/iio/magnetometer/ak8974.c
+++ b/drivers/iio/magnetometer/ak8974.c
@@ -564,7 +564,7 @@ static int ak8974_read_raw(struct iio_dev *indio_dev,
* We read all axes and discard all but one, for optimized
* reading, use the triggered buffer.
*/
- *val = le16_to_cpu(hw_values[chan->address]);
+ *val = (s16)le16_to_cpu(hw_values[chan->address]);
ret = IIO_VAL_INT;
}
diff --git a/drivers/iio/proximity/ping.c b/drivers/iio/proximity/ping.c
index 34aff108dff5..12b893c5b0ee 100644
--- a/drivers/iio/proximity/ping.c
+++ b/drivers/iio/proximity/ping.c
@@ -269,7 +269,7 @@ static const struct iio_chan_spec ping_chan_spec[] = {
static const struct of_device_id of_ping_match[] = {
{ .compatible = "parallax,ping", .data = &pa_ping_cfg},
- { .compatible = "parallax,laserping", .data = &pa_ping_cfg},
+ { .compatible = "parallax,laserping", .data = &pa_laser_ping_cfg},
{},
};
diff --git a/drivers/iio/trigger/stm32-timer-trigger.c b/drivers/iio/trigger/stm32-timer-trigger.c
index 2e0d32aa8436..2f82e8c32186 100644
--- a/drivers/iio/trigger/stm32-timer-trigger.c
+++ b/drivers/iio/trigger/stm32-timer-trigger.c
@@ -161,7 +161,8 @@ static int stm32_timer_start(struct stm32_timer_trigger *priv,
return 0;
}
-static void stm32_timer_stop(struct stm32_timer_trigger *priv)
+static void stm32_timer_stop(struct stm32_timer_trigger *priv,
+ struct iio_trigger *trig)
{
u32 ccer, cr1;
@@ -179,6 +180,12 @@ static void stm32_timer_stop(struct stm32_timer_trigger *priv)
regmap_write(priv->regmap, TIM_PSC, 0);
regmap_write(priv->regmap, TIM_ARR, 0);
+ /* Force disable master mode */
+ if (stm32_timer_is_trgo2_name(trig->name))
+ regmap_update_bits(priv->regmap, TIM_CR2, TIM_CR2_MMS2, 0);
+ else
+ regmap_update_bits(priv->regmap, TIM_CR2, TIM_CR2_MMS, 0);
+
/* Make sure that registers are updated */
regmap_update_bits(priv->regmap, TIM_EGR, TIM_EGR_UG, TIM_EGR_UG);
}
@@ -197,7 +204,7 @@ static ssize_t stm32_tt_store_frequency(struct device *dev,
return ret;
if (freq == 0) {
- stm32_timer_stop(priv);
+ stm32_timer_stop(priv, trig);
} else {
ret = stm32_timer_start(priv, trig, freq);
if (ret)
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index f6c255202d7f..d0b3d35ad3e4 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -896,7 +896,9 @@ static int add_one_compat_dev(struct ib_device *device,
cdev->dev.parent = device->dev.parent;
rdma_init_coredev(cdev, device, read_pnet(&rnet->net));
cdev->dev.release = compatdev_release;
- dev_set_name(&cdev->dev, "%s", dev_name(&device->dev));
+ ret = dev_set_name(&cdev->dev, "%s", dev_name(&device->dev));
+ if (ret)
+ goto add_err;
ret = device_add(&cdev->dev);
if (ret)
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index e0b0a91da696..9eec26d10d7b 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -918,6 +918,10 @@ static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
nla_strlcpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
IB_DEVICE_NAME_MAX);
+ if (strlen(name) == 0) {
+ err = -EINVAL;
+ goto done;
+ }
err = ib_device_rename(device, name);
goto done;
}
@@ -1514,7 +1518,7 @@ static int nldev_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
nla_strlcpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
sizeof(ibdev_name));
- if (strchr(ibdev_name, '%'))
+ if (strchr(ibdev_name, '%') || strlen(ibdev_name) == 0)
return -EINVAL;
nla_strlcpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type));
diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c
index 2d5608315dc8..75e7ec017836 100644
--- a/drivers/infiniband/core/security.c
+++ b/drivers/infiniband/core/security.c
@@ -349,16 +349,11 @@ static struct ib_ports_pkeys *get_new_pps(const struct ib_qp *qp,
else if (qp_pps)
new_pps->main.pkey_index = qp_pps->main.pkey_index;
- if ((qp_attr_mask & IB_QP_PKEY_INDEX) && (qp_attr_mask & IB_QP_PORT))
+ if (((qp_attr_mask & IB_QP_PKEY_INDEX) &&
+ (qp_attr_mask & IB_QP_PORT)) ||
+ (qp_pps && qp_pps->main.state != IB_PORT_PKEY_NOT_VALID))
new_pps->main.state = IB_PORT_PKEY_VALID;
- if (!(qp_attr_mask & (IB_QP_PKEY_INDEX | IB_QP_PORT)) && qp_pps) {
- new_pps->main.port_num = qp_pps->main.port_num;
- new_pps->main.pkey_index = qp_pps->main.pkey_index;
- if (qp_pps->main.state != IB_PORT_PKEY_NOT_VALID)
- new_pps->main.state = IB_PORT_PKEY_VALID;
- }
-
if (qp_attr_mask & IB_QP_ALT_PATH) {
new_pps->alt.port_num = qp_attr->alt_port_num;
new_pps->alt.pkey_index = qp_attr->alt_pkey_index;
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index cd656ad4953b..3b1e627d9a8d 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -275,8 +275,8 @@ void ib_umem_odp_release(struct ib_umem_odp *umem_odp)
mmu_interval_notifier_remove(&umem_odp->notifier);
kvfree(umem_odp->dma_list);
kvfree(umem_odp->page_list);
- put_pid(umem_odp->tgid);
}
+ put_pid(umem_odp->tgid);
kfree(umem_odp);
}
EXPORT_SYMBOL(ib_umem_odp_release);
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index 1235ffb2389b..da229eab5903 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -1129,17 +1129,30 @@ static const struct file_operations umad_sm_fops = {
.llseek = no_llseek,
};
+static struct ib_umad_port *get_port(struct ib_device *ibdev,
+ struct ib_umad_device *umad_dev,
+ unsigned int port)
+{
+ if (!umad_dev)
+ return ERR_PTR(-EOPNOTSUPP);
+ if (!rdma_is_port_valid(ibdev, port))
+ return ERR_PTR(-EINVAL);
+ if (!rdma_cap_ib_mad(ibdev, port))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ return &umad_dev->ports[port - rdma_start_port(ibdev)];
+}
+
static int ib_umad_get_nl_info(struct ib_device *ibdev, void *client_data,
struct ib_client_nl_info *res)
{
- struct ib_umad_device *umad_dev = client_data;
+ struct ib_umad_port *port = get_port(ibdev, client_data, res->port);
- if (!rdma_is_port_valid(ibdev, res->port))
- return -EINVAL;
+ if (IS_ERR(port))
+ return PTR_ERR(port);
res->abi = IB_USER_MAD_ABI_VERSION;
- res->cdev = &umad_dev->ports[res->port - rdma_start_port(ibdev)].dev;
-
+ res->cdev = &port->dev;
return 0;
}
@@ -1154,15 +1167,13 @@ MODULE_ALIAS_RDMA_CLIENT("umad");
static int ib_issm_get_nl_info(struct ib_device *ibdev, void *client_data,
struct ib_client_nl_info *res)
{
- struct ib_umad_device *umad_dev =
- ib_get_client_data(ibdev, &umad_client);
+ struct ib_umad_port *port = get_port(ibdev, client_data, res->port);
- if (!rdma_is_port_valid(ibdev, res->port))
- return -EINVAL;
+ if (IS_ERR(port))
+ return PTR_ERR(port);
res->abi = IB_USER_MAD_ABI_VERSION;
- res->cdev = &umad_dev->ports[res->port - rdma_start_port(ibdev)].sm_dev;
-
+ res->cdev = &port->sm_dev;
return 0;
}
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
index c2f0d9ba93de..13e4203497b3 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c
@@ -141,6 +141,7 @@ static int defer_packet_queue(
*/
xchg(&pq->state, SDMA_PKT_Q_DEFERRED);
if (list_empty(&pq->busy.list)) {
+ pq->busy.lock = &sde->waitlock;
iowait_get_priority(&pq->busy);
iowait_queue(pkts_sent, &pq->busy, &sde->dmawait);
}
@@ -155,6 +156,7 @@ static void activate_packet_queue(struct iowait *wait, int reason)
{
struct hfi1_user_sdma_pkt_q *pq =
container_of(wait, struct hfi1_user_sdma_pkt_q, busy);
+ pq->busy.lock = NULL;
xchg(&pq->state, SDMA_PKT_Q_ACTIVE);
wake_up(&wait->wait_dma);
};
@@ -256,6 +258,21 @@ pq_reqs_nomem:
return ret;
}
+static void flush_pq_iowait(struct hfi1_user_sdma_pkt_q *pq)
+{
+ unsigned long flags;
+ seqlock_t *lock = pq->busy.lock;
+
+ if (!lock)
+ return;
+ write_seqlock_irqsave(lock, flags);
+ if (!list_empty(&pq->busy.list)) {
+ list_del_init(&pq->busy.list);
+ pq->busy.lock = NULL;
+ }
+ write_sequnlock_irqrestore(lock, flags);
+}
+
int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd,
struct hfi1_ctxtdata *uctxt)
{
@@ -281,6 +298,7 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd,
kfree(pq->reqs);
kfree(pq->req_in_use);
kmem_cache_destroy(pq->txreq_cache);
+ flush_pq_iowait(pq);
kfree(pq);
} else {
spin_unlock(&fd->pq_rcu_lock);
@@ -587,11 +605,12 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
if (ret < 0) {
if (ret != -EBUSY)
goto free_req;
- wait_event_interruptible_timeout(
+ if (wait_event_interruptible_timeout(
pq->busy.wait_dma,
- (pq->state == SDMA_PKT_Q_ACTIVE),
+ pq->state == SDMA_PKT_Q_ACTIVE,
msecs_to_jiffies(
- SDMA_IOWAIT_TIMEOUT));
+ SDMA_IOWAIT_TIMEOUT)) <= 0)
+ flush_pq_iowait(pq);
}
}
*count += idx;
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 367a71bc5f4b..3dec3de903b7 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -330,6 +330,22 @@ static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev,
dump_cqe(dev, cqe);
}
+static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64,
+ u16 tail, u16 head)
+{
+ u16 idx;
+
+ do {
+ idx = tail & (qp->sq.wqe_cnt - 1);
+ if (idx == head)
+ break;
+
+ tail = qp->sq.w_list[idx].next;
+ } while (1);
+ tail = qp->sq.w_list[idx].next;
+ qp->sq.last_poll = tail;
+}
+
static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf)
{
mlx5_frag_buf_free(dev->mdev, &buf->frag_buf);
@@ -368,7 +384,7 @@ static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe,
}
static void sw_comp(struct mlx5_ib_qp *qp, int num_entries, struct ib_wc *wc,
- int *npolled, int is_send)
+ int *npolled, bool is_send)
{
struct mlx5_ib_wq *wq;
unsigned int cur;
@@ -383,10 +399,16 @@ static void sw_comp(struct mlx5_ib_qp *qp, int num_entries, struct ib_wc *wc,
return;
for (i = 0; i < cur && np < num_entries; i++) {
- wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
+ unsigned int idx;
+
+ idx = (is_send) ? wq->last_poll : wq->tail;
+ idx &= (wq->wqe_cnt - 1);
+ wc->wr_id = wq->wrid[idx];
wc->status = IB_WC_WR_FLUSH_ERR;
wc->vendor_err = MLX5_CQE_SYNDROME_WR_FLUSH_ERR;
wq->tail++;
+ if (is_send)
+ wq->last_poll = wq->w_list[idx].next;
np++;
wc->qp = &qp->ibqp;
wc++;
@@ -473,6 +495,7 @@ repoll:
wqe_ctr = be16_to_cpu(cqe64->wqe_counter);
idx = wqe_ctr & (wq->wqe_cnt - 1);
handle_good_req(wc, cqe64, wq, idx);
+ handle_atomics(*cur_qp, cqe64, wq->last_poll, idx);
wc->wr_id = wq->wrid[idx];
wq->tail = wq->wqe_head[idx] + 1;
wc->status = IB_WC_SUCCESS;
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index e4bcfa81b70a..ffa7c2100edb 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -5722,9 +5722,10 @@ mlx5_ib_counter_alloc_stats(struct rdma_counter *counter)
const struct mlx5_ib_counters *cnts =
get_counters(dev, counter->port - 1);
- /* Q counters are in the beginning of all counters */
return rdma_alloc_hw_stats_struct(cnts->names,
- cnts->num_q_counters,
+ cnts->num_q_counters +
+ cnts->num_cong_counters +
+ cnts->num_ext_ppcnt_counters,
RDMA_HW_STATS_DEFAULT_LIFESPAN);
}
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index bb78142bca5e..f3bdbd5e5096 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -288,6 +288,7 @@ struct mlx5_ib_wq {
unsigned head;
unsigned tail;
u16 cur_post;
+ u16 last_poll;
void *cur_edge;
};
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 957f3a52589b..8fe149e808af 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -3775,6 +3775,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
qp->sq.cur_post = 0;
if (qp->sq.wqe_cnt)
qp->sq.cur_edge = get_sq_edge(&qp->sq, 0);
+ qp->sq.last_poll = 0;
qp->db.db[MLX5_RCV_DBR] = 0;
qp->db.db[MLX5_SND_DBR] = 0;
}
@@ -6204,6 +6205,10 @@ struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd,
if (udata->outlen && udata->outlen < min_resp_len)
return ERR_PTR(-EINVAL);
+ if (!capable(CAP_SYS_RAWIO) &&
+ init_attr->create_flags & IB_WQ_FLAGS_DELAY_DROP)
+ return ERR_PTR(-EPERM);
+
dev = to_mdev(pd->device);
switch (init_attr->wq_type) {
case IB_WQT_RQ:
diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c
index 13d7f66eadab..5724cbbe38b1 100644
--- a/drivers/infiniband/sw/rdmavt/cq.c
+++ b/drivers/infiniband/sw/rdmavt/cq.c
@@ -327,7 +327,7 @@ void rvt_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
if (cq->ip)
kref_put(&cq->ip->ref, rvt_release_mmap_info);
else
- vfree(cq->queue);
+ vfree(cq->kqueue);
}
/**
diff --git a/drivers/input/input.c b/drivers/input/input.c
index fce43e62dd45..3cfd2c18eebd 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -190,6 +190,7 @@ static void input_repeat_key(struct timer_list *t)
input_value_sync
};
+ input_set_timestamp(dev, ktime_get());
input_pass_values(dev, vals, ARRAY_SIZE(vals));
if (dev->rep[REP_PERIOD])
diff --git a/drivers/input/keyboard/tm2-touchkey.c b/drivers/input/keyboard/tm2-touchkey.c
index 14b55bacdd0f..fb078e049413 100644
--- a/drivers/input/keyboard/tm2-touchkey.c
+++ b/drivers/input/keyboard/tm2-touchkey.c
@@ -75,6 +75,14 @@ static struct touchkey_variant aries_touchkey_variant = {
.cmd_led_off = ARIES_TOUCHKEY_CMD_LED_OFF,
};
+static const struct touchkey_variant tc360_touchkey_variant = {
+ .keycode_reg = 0x00,
+ .base_reg = 0x00,
+ .fixed_regulator = true,
+ .cmd_led_on = TM2_TOUCHKEY_CMD_LED_ON,
+ .cmd_led_off = TM2_TOUCHKEY_CMD_LED_OFF,
+};
+
static int tm2_touchkey_led_brightness_set(struct led_classdev *led_dev,
enum led_brightness brightness)
{
@@ -327,6 +335,9 @@ static const struct of_device_id tm2_touchkey_of_match[] = {
}, {
.compatible = "cypress,aries-touchkey",
.data = &aries_touchkey_variant,
+ }, {
+ .compatible = "coreriver,tc360-touchkey",
+ .data = &tc360_touchkey_variant,
},
{ },
};
diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c
index 2c666fb34625..4d2036209b45 100644
--- a/drivers/input/mouse/synaptics.c
+++ b/drivers/input/mouse/synaptics.c
@@ -186,6 +186,7 @@ static const char * const smbus_pnp_ids[] = {
"SYN3052", /* HP EliteBook 840 G4 */
"SYN3221", /* HP 15-ay000 */
"SYN323d", /* HP Spectre X360 13-w013dx */
+ "SYN3257", /* HP Envy 13-ad105ng */
NULL
};
diff --git a/drivers/input/rmi4/rmi_f11.c b/drivers/input/rmi4/rmi_f11.c
index 6adea8a3e8fb..ffa39ab153f2 100644
--- a/drivers/input/rmi4/rmi_f11.c
+++ b/drivers/input/rmi4/rmi_f11.c
@@ -1203,8 +1203,8 @@ static int rmi_f11_initialize(struct rmi_function *fn)
* If distance threshold values are set, switch to reduced reporting
* mode so they actually get used by the controller.
*/
- if (ctrl->ctrl0_11[RMI_F11_DELTA_X_THRESHOLD] ||
- ctrl->ctrl0_11[RMI_F11_DELTA_Y_THRESHOLD]) {
+ if (sensor->axis_align.delta_x_threshold ||
+ sensor->axis_align.delta_y_threshold) {
ctrl->ctrl0_11[0] &= ~RMI_F11_REPORT_MODE_MASK;
ctrl->ctrl0_11[0] |= RMI_F11_REPORT_MODE_REDUCED;
}
diff --git a/drivers/input/touchscreen/raydium_i2c_ts.c b/drivers/input/touchscreen/raydium_i2c_ts.c
index 6ed9f22e6401..fe245439adee 100644
--- a/drivers/input/touchscreen/raydium_i2c_ts.c
+++ b/drivers/input/touchscreen/raydium_i2c_ts.c
@@ -432,7 +432,7 @@ static int raydium_i2c_write_object(struct i2c_client *client,
return 0;
}
-static bool raydium_i2c_boot_trigger(struct i2c_client *client)
+static int raydium_i2c_boot_trigger(struct i2c_client *client)
{
static const u8 cmd[7][6] = {
{ 0x08, 0x0C, 0x09, 0x00, 0x50, 0xD7 },
@@ -457,10 +457,10 @@ static bool raydium_i2c_boot_trigger(struct i2c_client *client)
}
}
- return false;
+ return 0;
}
-static bool raydium_i2c_fw_trigger(struct i2c_client *client)
+static int raydium_i2c_fw_trigger(struct i2c_client *client)
{
static const u8 cmd[5][11] = {
{ 0, 0x09, 0x71, 0x0C, 0x09, 0x00, 0x50, 0xD7, 0, 0, 0 },
@@ -483,7 +483,7 @@ static bool raydium_i2c_fw_trigger(struct i2c_client *client)
}
}
- return false;
+ return 0;
}
static int raydium_i2c_check_path(struct i2c_client *client)
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index aac132bd1ef0..20cce366e951 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -3826,7 +3826,7 @@ int amd_iommu_activate_guest_mode(void *data)
entry->lo.fields_vapic.ga_tag = ir_data->ga_tag;
return modify_irte_ga(ir_data->irq_2_irte.devid,
- ir_data->irq_2_irte.index, entry, NULL);
+ ir_data->irq_2_irte.index, entry, ir_data);
}
EXPORT_SYMBOL(amd_iommu_activate_guest_mode);
@@ -3852,7 +3852,7 @@ int amd_iommu_deactivate_guest_mode(void *data)
APICID_TO_IRTE_DEST_HI(cfg->dest_apicid);
return modify_irte_ga(ir_data->irq_2_irte.devid,
- ir_data->irq_2_irte.index, entry, NULL);
+ ir_data->irq_2_irte.index, entry, ir_data);
}
EXPORT_SYMBOL(amd_iommu_deactivate_guest_mode);
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index a2e96a5fd9a7..ba128d1cdaee 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -177,15 +177,15 @@ static int cookie_init_hw_msi_region(struct iommu_dma_cookie *cookie,
start -= iova_offset(iovad, start);
num_pages = iova_align(iovad, end - start) >> iova_shift(iovad);
- msi_page = kcalloc(num_pages, sizeof(*msi_page), GFP_KERNEL);
- if (!msi_page)
- return -ENOMEM;
-
for (i = 0; i < num_pages; i++) {
- msi_page[i].phys = start;
- msi_page[i].iova = start;
- INIT_LIST_HEAD(&msi_page[i].list);
- list_add(&msi_page[i].list, &cookie->msi_page_list);
+ msi_page = kmalloc(sizeof(*msi_page), GFP_KERNEL);
+ if (!msi_page)
+ return -ENOMEM;
+
+ msi_page->phys = start;
+ msi_page->iova = start;
+ INIT_LIST_HEAD(&msi_page->list);
+ list_add(&msi_page->list, &cookie->msi_page_list);
start += iovad->granule;
}
diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index 071bb42bbbc5..f77dae7ba7d4 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -28,6 +28,7 @@
#include <linux/slab.h>
#include <linux/iommu.h>
#include <linux/numa.h>
+#include <linux/limits.h>
#include <asm/irq_remapping.h>
#include <asm/iommu_table.h>
@@ -128,6 +129,13 @@ dmar_alloc_pci_notify_info(struct pci_dev *dev, unsigned long event)
BUG_ON(dev->is_virtfn);
+ /*
+ * Ignore devices that have a domain number higher than what can
+ * be looked up in DMAR, e.g. VMD subdevices with domain 0x10000
+ */
+ if (pci_domain_nr(dev->bus) > U16_MAX)
+ return NULL;
+
/* Only generate path[] for device addition event */
if (event == BUS_NOTIFY_ADD_DEVICE)
for (tmp = dev; tmp; tmp = tmp->bus->self)
@@ -363,7 +371,8 @@ dmar_find_dmaru(struct acpi_dmar_hardware_unit *drhd)
{
struct dmar_drhd_unit *dmaru;
- list_for_each_entry_rcu(dmaru, &dmar_drhd_units, list)
+ list_for_each_entry_rcu(dmaru, &dmar_drhd_units, list,
+ dmar_rcu_check())
if (dmaru->segment == drhd->segment &&
dmaru->reg_base_addr == drhd->address)
return dmaru;
@@ -440,12 +449,13 @@ static int __init dmar_parse_one_andd(struct acpi_dmar_header *header,
/* Check for NUL termination within the designated length */
if (strnlen(andd->device_name, header->length - 8) == header->length - 8) {
- WARN_TAINT(1, TAINT_FIRMWARE_WORKAROUND,
+ pr_warn(FW_BUG
"Your BIOS is broken; ANDD object name is not NUL-terminated\n"
"BIOS vendor: %s; Ver: %s; Product Version: %s\n",
dmi_get_system_info(DMI_BIOS_VENDOR),
dmi_get_system_info(DMI_BIOS_VERSION),
dmi_get_system_info(DMI_PRODUCT_VERSION));
+ add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
return -EINVAL;
}
pr_info("ANDD device: %x name: %s\n", andd->device_number,
@@ -471,14 +481,14 @@ static int dmar_parse_one_rhsa(struct acpi_dmar_header *header, void *arg)
return 0;
}
}
- WARN_TAINT(
- 1, TAINT_FIRMWARE_WORKAROUND,
+ pr_warn(FW_BUG
"Your BIOS is broken; RHSA refers to non-existent DMAR unit at %llx\n"
"BIOS vendor: %s; Ver: %s; Product Version: %s\n",
- drhd->reg_base_addr,
+ rhsa->base_address,
dmi_get_system_info(DMI_BIOS_VENDOR),
dmi_get_system_info(DMI_BIOS_VERSION),
dmi_get_system_info(DMI_PRODUCT_VERSION));
+ add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
return 0;
}
@@ -827,14 +837,14 @@ int __init dmar_table_init(void)
static void warn_invalid_dmar(u64 addr, const char *message)
{
- WARN_TAINT_ONCE(
- 1, TAINT_FIRMWARE_WORKAROUND,
+ pr_warn_once(FW_BUG
"Your BIOS is broken; DMAR reported at address %llx%s!\n"
"BIOS vendor: %s; Ver: %s; Product Version: %s\n",
addr, message,
dmi_get_system_info(DMI_BIOS_VENDOR),
dmi_get_system_info(DMI_BIOS_VERSION),
dmi_get_system_info(DMI_PRODUCT_VERSION));
+ add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
}
static int __ref
diff --git a/drivers/iommu/intel-iommu-debugfs.c b/drivers/iommu/intel-iommu-debugfs.c
index c1257bef553c..3eb1fe240fb0 100644
--- a/drivers/iommu/intel-iommu-debugfs.c
+++ b/drivers/iommu/intel-iommu-debugfs.c
@@ -33,38 +33,42 @@ struct iommu_regset {
#define IOMMU_REGSET_ENTRY(_reg_) \
{ DMAR_##_reg_##_REG, __stringify(_reg_) }
-static const struct iommu_regset iommu_regs[] = {
+
+static const struct iommu_regset iommu_regs_32[] = {
IOMMU_REGSET_ENTRY(VER),
- IOMMU_REGSET_ENTRY(CAP),
- IOMMU_REGSET_ENTRY(ECAP),
IOMMU_REGSET_ENTRY(GCMD),
IOMMU_REGSET_ENTRY(GSTS),
- IOMMU_REGSET_ENTRY(RTADDR),
- IOMMU_REGSET_ENTRY(CCMD),
IOMMU_REGSET_ENTRY(FSTS),
IOMMU_REGSET_ENTRY(FECTL),
IOMMU_REGSET_ENTRY(FEDATA),
IOMMU_REGSET_ENTRY(FEADDR),
IOMMU_REGSET_ENTRY(FEUADDR),
- IOMMU_REGSET_ENTRY(AFLOG),
IOMMU_REGSET_ENTRY(PMEN),
IOMMU_REGSET_ENTRY(PLMBASE),
IOMMU_REGSET_ENTRY(PLMLIMIT),
+ IOMMU_REGSET_ENTRY(ICS),
+ IOMMU_REGSET_ENTRY(PRS),
+ IOMMU_REGSET_ENTRY(PECTL),
+ IOMMU_REGSET_ENTRY(PEDATA),
+ IOMMU_REGSET_ENTRY(PEADDR),
+ IOMMU_REGSET_ENTRY(PEUADDR),
+};
+
+static const struct iommu_regset iommu_regs_64[] = {
+ IOMMU_REGSET_ENTRY(CAP),
+ IOMMU_REGSET_ENTRY(ECAP),
+ IOMMU_REGSET_ENTRY(RTADDR),
+ IOMMU_REGSET_ENTRY(CCMD),
+ IOMMU_REGSET_ENTRY(AFLOG),
IOMMU_REGSET_ENTRY(PHMBASE),
IOMMU_REGSET_ENTRY(PHMLIMIT),
IOMMU_REGSET_ENTRY(IQH),
IOMMU_REGSET_ENTRY(IQT),
IOMMU_REGSET_ENTRY(IQA),
- IOMMU_REGSET_ENTRY(ICS),
IOMMU_REGSET_ENTRY(IRTA),
IOMMU_REGSET_ENTRY(PQH),
IOMMU_REGSET_ENTRY(PQT),
IOMMU_REGSET_ENTRY(PQA),
- IOMMU_REGSET_ENTRY(PRS),
- IOMMU_REGSET_ENTRY(PECTL),
- IOMMU_REGSET_ENTRY(PEDATA),
- IOMMU_REGSET_ENTRY(PEADDR),
- IOMMU_REGSET_ENTRY(PEUADDR),
IOMMU_REGSET_ENTRY(MTRRCAP),
IOMMU_REGSET_ENTRY(MTRRDEF),
IOMMU_REGSET_ENTRY(MTRR_FIX64K_00000),
@@ -127,10 +131,16 @@ static int iommu_regset_show(struct seq_file *m, void *unused)
* by adding the offset to the pointer (virtual address).
*/
raw_spin_lock_irqsave(&iommu->register_lock, flag);
- for (i = 0 ; i < ARRAY_SIZE(iommu_regs); i++) {
- value = dmar_readq(iommu->reg + iommu_regs[i].offset);
+ for (i = 0 ; i < ARRAY_SIZE(iommu_regs_32); i++) {
+ value = dmar_readl(iommu->reg + iommu_regs_32[i].offset);
+ seq_printf(m, "%-16s\t0x%02x\t\t0x%016llx\n",
+ iommu_regs_32[i].regs, iommu_regs_32[i].offset,
+ value);
+ }
+ for (i = 0 ; i < ARRAY_SIZE(iommu_regs_64); i++) {
+ value = dmar_readq(iommu->reg + iommu_regs_64[i].offset);
seq_printf(m, "%-16s\t0x%02x\t\t0x%016llx\n",
- iommu_regs[i].regs, iommu_regs[i].offset,
+ iommu_regs_64[i].regs, iommu_regs_64[i].offset,
value);
}
raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
@@ -272,9 +282,16 @@ static int dmar_translation_struct_show(struct seq_file *m, void *unused)
{
struct dmar_drhd_unit *drhd;
struct intel_iommu *iommu;
+ u32 sts;
rcu_read_lock();
for_each_active_iommu(iommu, drhd) {
+ sts = dmar_readl(iommu->reg + DMAR_GSTS_REG);
+ if (!(sts & DMA_GSTS_TES)) {
+ seq_printf(m, "DMA Remapping is not enabled on %s\n",
+ iommu->name);
+ continue;
+ }
root_tbl_walk(m, iommu);
seq_putc(m, '\n');
}
@@ -415,6 +432,7 @@ static int ir_translation_struct_show(struct seq_file *m, void *unused)
struct dmar_drhd_unit *drhd;
struct intel_iommu *iommu;
u64 irta;
+ u32 sts;
rcu_read_lock();
for_each_active_iommu(iommu, drhd) {
@@ -424,7 +442,8 @@ static int ir_translation_struct_show(struct seq_file *m, void *unused)
seq_printf(m, "Remapped Interrupt supported on IOMMU: %s\n",
iommu->name);
- if (iommu->ir_table) {
+ sts = dmar_readl(iommu->reg + DMAR_GSTS_REG);
+ if (iommu->ir_table && (sts & DMA_GSTS_IRES)) {
irta = virt_to_phys(iommu->ir_table->base);
seq_printf(m, " IR table address:%llx\n", irta);
ir_tbl_remap_entry_show(m, iommu);
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 6fa6de2b6ad5..4be549478691 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -4261,10 +4261,11 @@ static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
/* we know that the this iommu should be at offset 0xa000 from vtbar */
drhd = dmar_find_matched_drhd_unit(pdev);
- if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
- TAINT_FIRMWARE_WORKAROUND,
- "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
+ if (!drhd || drhd->reg_base_addr - vtbar != 0xa000) {
+ pr_warn_once(FW_BUG "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n");
+ add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
+ }
}
DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
@@ -4460,14 +4461,16 @@ int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
struct dmar_rmrr_unit *rmrru;
rmrr = (struct acpi_dmar_reserved_memory *)header;
- if (rmrr_sanity_check(rmrr))
- WARN_TAINT(1, TAINT_FIRMWARE_WORKAROUND,
+ if (rmrr_sanity_check(rmrr)) {
+ pr_warn(FW_BUG
"Your BIOS is broken; bad RMRR [%#018Lx-%#018Lx]\n"
"BIOS vendor: %s; Ver: %s; Product Version: %s\n",
rmrr->base_address, rmrr->end_address,
dmi_get_system_info(DMI_BIOS_VENDOR),
dmi_get_system_info(DMI_BIOS_VERSION),
dmi_get_system_info(DMI_PRODUCT_VERSION));
+ add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
+ }
rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
if (!rmrru)
@@ -5130,6 +5133,9 @@ int __init intel_iommu_init(void)
down_write(&dmar_global_lock);
+ if (!no_iommu)
+ intel_iommu_debugfs_init();
+
if (no_iommu || dmar_disabled) {
/*
* We exit the function here to ensure IOMMU's remapping and
@@ -5193,6 +5199,7 @@ int __init intel_iommu_init(void)
init_iommu_pm_ops();
+ down_read(&dmar_global_lock);
for_each_active_iommu(iommu, drhd) {
iommu_device_sysfs_add(&iommu->iommu, NULL,
intel_iommu_groups,
@@ -5200,6 +5207,7 @@ int __init intel_iommu_init(void)
iommu_device_set_ops(&iommu->iommu, &intel_iommu_ops);
iommu_device_register(&iommu->iommu);
}
+ up_read(&dmar_global_lock);
bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
if (si_domain && !hw_pass_through)
@@ -5210,7 +5218,6 @@ int __init intel_iommu_init(void)
down_read(&dmar_global_lock);
if (probe_acpi_namespace_devices())
pr_warn("ACPI name space devices didn't probe correctly\n");
- up_read(&dmar_global_lock);
/* Finally, we enable the DMA remapping hardware. */
for_each_iommu(iommu, drhd) {
@@ -5219,10 +5226,11 @@ int __init intel_iommu_init(void)
iommu_disable_protect_mem_regions(iommu);
}
+ up_read(&dmar_global_lock);
+
pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
intel_iommu_enabled = 1;
- intel_iommu_debugfs_init();
return 0;
@@ -5700,8 +5708,10 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
u64 phys = 0;
pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
- if (pte)
- phys = dma_pte_addr(pte);
+ if (pte && dma_pte_present(pte))
+ phys = dma_pte_addr(pte) +
+ (iova & (BIT_MASK(level_to_offset_bits(level) +
+ VTD_PAGE_SHIFT) - 1));
return phys;
}
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 983b08477e64..04fbd4bf0ff9 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -468,7 +468,7 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova,
arm_lpae_iopte *ptep = data->pgd;
int ret, lvl = data->start_level;
arm_lpae_iopte prot;
- long iaext = (long)iova >> cfg->ias;
+ long iaext = (s64)iova >> cfg->ias;
/* If no access, then nothing to do */
if (!(iommu_prot & (IOMMU_READ | IOMMU_WRITE)))
@@ -645,7 +645,7 @@ static size_t arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova,
struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
struct io_pgtable_cfg *cfg = &data->iop.cfg;
arm_lpae_iopte *ptep = data->pgd;
- long iaext = (long)iova >> cfg->ias;
+ long iaext = (s64)iova >> cfg->ias;
if (WARN_ON(!size || (size & cfg->pgsize_bitmap) != size))
return 0;
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index c1f7af9d9ae7..1eec9d4649d5 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -34,6 +34,7 @@
#define GICD_INT_NMI_PRI (GICD_INT_DEF_PRI & ~0x80)
#define FLAGS_WORKAROUND_GICR_WAKER_MSM8996 (1ULL << 0)
+#define FLAGS_WORKAROUND_CAVIUM_ERRATUM_38539 (1ULL << 1)
struct redist_region {
void __iomem *redist_base;
@@ -1464,6 +1465,15 @@ static bool gic_enable_quirk_msm8996(void *data)
return true;
}
+static bool gic_enable_quirk_cavium_38539(void *data)
+{
+ struct gic_chip_data *d = data;
+
+ d->flags |= FLAGS_WORKAROUND_CAVIUM_ERRATUM_38539;
+
+ return true;
+}
+
static bool gic_enable_quirk_hip06_07(void *data)
{
struct gic_chip_data *d = data;
@@ -1503,6 +1513,19 @@ static const struct gic_quirk gic_quirks[] = {
.init = gic_enable_quirk_hip06_07,
},
{
+ /*
+ * Reserved register accesses generate a Synchronous
+ * External Abort. This erratum applies to:
+ * - ThunderX: CN88xx
+ * - OCTEON TX: CN83xx, CN81xx
+ * - OCTEON TX2: CN93xx, CN96xx, CN98xx, CNF95xx*
+ */
+ .desc = "GICv3: Cavium erratum 38539",
+ .iidr = 0xa000034c,
+ .mask = 0xe8f00fff,
+ .init = gic_enable_quirk_cavium_38539,
+ },
+ {
}
};
@@ -1577,7 +1600,12 @@ static int __init gic_init_bases(void __iomem *dist_base,
pr_info("%d SPIs implemented\n", GIC_LINE_NR - 32);
pr_info("%d Extended SPIs implemented\n", GIC_ESPI_NR);
- gic_data.rdists.gicd_typer2 = readl_relaxed(gic_data.dist_base + GICD_TYPER2);
+ /*
+ * ThunderX1 explodes on reading GICD_TYPER2, in violation of the
+ * architecture spec (which says that reserved registers are RES0).
+ */
+ if (!(gic_data.flags & FLAGS_WORKAROUND_CAVIUM_ERRATUM_38539))
+ gic_data.rdists.gicd_typer2 = readl_relaxed(gic_data.dist_base + GICD_TYPER2);
gic_data.domain = irq_domain_create_tree(handle, &gic_irq_domain_ops,
&gic_data);
diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index 7543e395a2c6..db38a68abb6c 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -380,12 +380,11 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
goto err_dev;
}
- tqueue = blk_alloc_queue_node(GFP_KERNEL, dev->q->node);
+ tqueue = blk_alloc_queue(tt->make_rq, dev->q->node);
if (!tqueue) {
ret = -ENOMEM;
goto err_disk;
}
- blk_queue_make_request(tqueue, tt->make_rq);
strlcpy(tdisk->disk_name, create->tgtname, sizeof(tdisk->disk_name));
tdisk->flags = GENHD_FL_EXT_DEVT;
diff --git a/drivers/lightnvm/pblk-sysfs.c b/drivers/lightnvm/pblk-sysfs.c
index 7d8958df9472..6387302b03f2 100644
--- a/drivers/lightnvm/pblk-sysfs.c
+++ b/drivers/lightnvm/pblk-sysfs.c
@@ -37,7 +37,7 @@ static ssize_t pblk_sysfs_luns_show(struct pblk *pblk, char *page)
active = 0;
up(&rlun->wr_sem);
}
- sz += snprintf(page + sz, PAGE_SIZE - sz,
+ sz += scnprintf(page + sz, PAGE_SIZE - sz,
"pblk: pos:%d, ch:%d, lun:%d - %d\n",
i,
rlun->bppa.a.ch,
@@ -120,7 +120,7 @@ static ssize_t pblk_sysfs_ppaf(struct pblk *pblk, char *page)
struct nvm_addrf_12 *ppaf = (struct nvm_addrf_12 *)&pblk->addrf;
struct nvm_addrf_12 *gppaf = (struct nvm_addrf_12 *)&geo->addrf;
- sz = snprintf(page, PAGE_SIZE,
+ sz = scnprintf(page, PAGE_SIZE,
"g:(b:%d)blk:%d/%d,pg:%d/%d,lun:%d/%d,ch:%d/%d,pl:%d/%d,sec:%d/%d\n",
pblk->addrf_len,
ppaf->blk_offset, ppaf->blk_len,
@@ -130,7 +130,7 @@ static ssize_t pblk_sysfs_ppaf(struct pblk *pblk, char *page)
ppaf->pln_offset, ppaf->pln_len,
ppaf->sec_offset, ppaf->sec_len);
- sz += snprintf(page + sz, PAGE_SIZE - sz,
+ sz += scnprintf(page + sz, PAGE_SIZE - sz,
"d:blk:%d/%d,pg:%d/%d,lun:%d/%d,ch:%d/%d,pl:%d/%d,sec:%d/%d\n",
gppaf->blk_offset, gppaf->blk_len,
gppaf->pg_offset, gppaf->pg_len,
@@ -142,7 +142,7 @@ static ssize_t pblk_sysfs_ppaf(struct pblk *pblk, char *page)
struct nvm_addrf *ppaf = &pblk->addrf;
struct nvm_addrf *gppaf = &geo->addrf;
- sz = snprintf(page, PAGE_SIZE,
+ sz = scnprintf(page, PAGE_SIZE,
"pblk:(s:%d)ch:%d/%d,lun:%d/%d,chk:%d/%d/sec:%d/%d\n",
pblk->addrf_len,
ppaf->ch_offset, ppaf->ch_len,
@@ -150,7 +150,7 @@ static ssize_t pblk_sysfs_ppaf(struct pblk *pblk, char *page)
ppaf->chk_offset, ppaf->chk_len,
ppaf->sec_offset, ppaf->sec_len);
- sz += snprintf(page + sz, PAGE_SIZE - sz,
+ sz += scnprintf(page + sz, PAGE_SIZE - sz,
"device:ch:%d/%d,lun:%d/%d,chk:%d/%d,sec:%d/%d\n",
gppaf->ch_offset, gppaf->ch_len,
gppaf->lun_offset, gppaf->lun_len,
@@ -278,11 +278,11 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
pblk_err(pblk, "corrupted free line list:%d/%d\n",
nr_free_lines, free_line_cnt);
- sz = snprintf(page, PAGE_SIZE - sz,
+ sz = scnprintf(page, PAGE_SIZE - sz,
"line: nluns:%d, nblks:%d, nsecs:%d\n",
geo->all_luns, lm->blk_per_line, lm->sec_per_line);
- sz += snprintf(page + sz, PAGE_SIZE - sz,
+ sz += scnprintf(page + sz, PAGE_SIZE - sz,
"lines:d:%d,l:%d-f:%d,m:%d/%d,c:%d,b:%d,co:%d(d:%d,l:%d)t:%d\n",
cur_data, cur_log,
nr_free_lines,
@@ -292,12 +292,12 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
d_line_cnt, l_line_cnt,
l_mg->nr_lines);
- sz += snprintf(page + sz, PAGE_SIZE - sz,
+ sz += scnprintf(page + sz, PAGE_SIZE - sz,
"GC: full:%d, high:%d, mid:%d, low:%d, empty:%d, werr: %d, queue:%d\n",
gc_full, gc_high, gc_mid, gc_low, gc_empty, gc_werr,
atomic_read(&pblk->gc.read_inflight_gc));
- sz += snprintf(page + sz, PAGE_SIZE - sz,
+ sz += scnprintf(page + sz, PAGE_SIZE - sz,
"data (%d) cur:%d, left:%d, vsc:%d, s:%d, map:%d/%d (%d)\n",
cur_data, cur_sec, msecs, vsc, sec_in_line,
map_weight, lm->sec_per_line,
@@ -313,19 +313,19 @@ static ssize_t pblk_sysfs_lines_info(struct pblk *pblk, char *page)
struct pblk_line_meta *lm = &pblk->lm;
ssize_t sz = 0;
- sz = snprintf(page, PAGE_SIZE - sz,
+ sz = scnprintf(page, PAGE_SIZE - sz,
"smeta - len:%d, secs:%d\n",
lm->smeta_len, lm->smeta_sec);
- sz += snprintf(page + sz, PAGE_SIZE - sz,
+ sz += scnprintf(page + sz, PAGE_SIZE - sz,
"emeta - len:%d, sec:%d, bb_start:%d\n",
lm->emeta_len[0], lm->emeta_sec[0],
lm->emeta_bb);
- sz += snprintf(page + sz, PAGE_SIZE - sz,
+ sz += scnprintf(page + sz, PAGE_SIZE - sz,
"bitmap lengths: sec:%d, blk:%d, lun:%d\n",
lm->sec_bitmap_len,
lm->blk_bitmap_len,
lm->lun_bitmap_len);
- sz += snprintf(page + sz, PAGE_SIZE - sz,
+ sz += scnprintf(page + sz, PAGE_SIZE - sz,
"blk_line:%d, sec_line:%d, sec_blk:%d\n",
lm->blk_per_line,
lm->sec_per_line,
@@ -344,12 +344,12 @@ static ssize_t pblk_get_write_amp(u64 user, u64 gc, u64 pad,
{
int sz;
- sz = snprintf(page, PAGE_SIZE,
+ sz = scnprintf(page, PAGE_SIZE,
"user:%lld gc:%lld pad:%lld WA:",
user, gc, pad);
if (!user) {
- sz += snprintf(page + sz, PAGE_SIZE - sz, "NaN\n");
+ sz += scnprintf(page + sz, PAGE_SIZE - sz, "NaN\n");
} else {
u64 wa_int;
u32 wa_frac;
@@ -358,7 +358,7 @@ static ssize_t pblk_get_write_amp(u64 user, u64 gc, u64 pad,
wa_int = div64_u64(wa_int, user);
wa_int = div_u64_rem(wa_int, 100000, &wa_frac);
- sz += snprintf(page + sz, PAGE_SIZE - sz, "%llu.%05u\n",
+ sz += scnprintf(page + sz, PAGE_SIZE - sz, "%llu.%05u\n",
wa_int, wa_frac);
}
@@ -401,9 +401,9 @@ static ssize_t pblk_sysfs_get_padding_dist(struct pblk *pblk, char *page)
total = atomic64_read(&pblk->nr_flush) - pblk->nr_flush_rst;
if (!total) {
for (i = 0; i < (buckets + 1); i++)
- sz += snprintf(page + sz, PAGE_SIZE - sz,
+ sz += scnprintf(page + sz, PAGE_SIZE - sz,
"%d:0 ", i);
- sz += snprintf(page + sz, PAGE_SIZE - sz, "\n");
+ sz += scnprintf(page + sz, PAGE_SIZE - sz, "\n");
return sz;
}
@@ -411,7 +411,7 @@ static ssize_t pblk_sysfs_get_padding_dist(struct pblk *pblk, char *page)
for (i = 0; i < buckets; i++)
total_buckets += atomic64_read(&pblk->pad_dist[i]);
- sz += snprintf(page + sz, PAGE_SIZE - sz, "0:%lld%% ",
+ sz += scnprintf(page + sz, PAGE_SIZE - sz, "0:%lld%% ",
bucket_percentage(total - total_buckets, total));
for (i = 0; i < buckets; i++) {
@@ -419,10 +419,10 @@ static ssize_t pblk_sysfs_get_padding_dist(struct pblk *pblk, char *page)
p = bucket_percentage(atomic64_read(&pblk->pad_dist[i]),
total);
- sz += snprintf(page + sz, PAGE_SIZE - sz, "%d:%lld%% ",
+ sz += scnprintf(page + sz, PAGE_SIZE - sz, "%d:%lld%% ",
i + 1, p);
}
- sz += snprintf(page + sz, PAGE_SIZE - sz, "\n");
+ sz += scnprintf(page + sz, PAGE_SIZE - sz, "\n");
return sz;
}
diff --git a/drivers/macintosh/windfarm_ad7417_sensor.c b/drivers/macintosh/windfarm_ad7417_sensor.c
index 125605987b44..e7dec328c7cf 100644
--- a/drivers/macintosh/windfarm_ad7417_sensor.c
+++ b/drivers/macintosh/windfarm_ad7417_sensor.c
@@ -312,9 +312,16 @@ static const struct i2c_device_id wf_ad7417_id[] = {
};
MODULE_DEVICE_TABLE(i2c, wf_ad7417_id);
+static const struct of_device_id wf_ad7417_of_id[] = {
+ { .compatible = "ad7417", },
+ { }
+};
+MODULE_DEVICE_TABLE(of, wf_ad7417_of_id);
+
static struct i2c_driver wf_ad7417_driver = {
.driver = {
.name = "wf_ad7417",
+ .of_match_table = wf_ad7417_of_id,
},
.probe = wf_ad7417_probe,
.remove = wf_ad7417_remove,
diff --git a/drivers/macintosh/windfarm_fcu_controls.c b/drivers/macintosh/windfarm_fcu_controls.c
index 67daeec94b44..2470e5a725c8 100644
--- a/drivers/macintosh/windfarm_fcu_controls.c
+++ b/drivers/macintosh/windfarm_fcu_controls.c
@@ -580,9 +580,16 @@ static const struct i2c_device_id wf_fcu_id[] = {
};
MODULE_DEVICE_TABLE(i2c, wf_fcu_id);
+static const struct of_device_id wf_fcu_of_id[] = {
+ { .compatible = "fcu", },
+ { }
+};
+MODULE_DEVICE_TABLE(of, wf_fcu_of_id);
+
static struct i2c_driver wf_fcu_driver = {
.driver = {
.name = "wf_fcu",
+ .of_match_table = wf_fcu_of_id,
},
.probe = wf_fcu_probe,
.remove = wf_fcu_remove,
diff --git a/drivers/macintosh/windfarm_lm75_sensor.c b/drivers/macintosh/windfarm_lm75_sensor.c
index 282c28a17ea1..1e5fa09845e7 100644
--- a/drivers/macintosh/windfarm_lm75_sensor.c
+++ b/drivers/macintosh/windfarm_lm75_sensor.c
@@ -14,6 +14,7 @@
#include <linux/init.h>
#include <linux/wait.h>
#include <linux/i2c.h>
+#include <linux/of_device.h>
#include <asm/prom.h>
#include <asm/machdep.h>
#include <asm/io.h>
@@ -91,9 +92,14 @@ static int wf_lm75_probe(struct i2c_client *client,
const struct i2c_device_id *id)
{
struct wf_lm75_sensor *lm;
- int rc, ds1775 = id->driver_data;
+ int rc, ds1775;
const char *name, *loc;
+ if (id)
+ ds1775 = id->driver_data;
+ else
+ ds1775 = !!of_device_get_match_data(&client->dev);
+
DBG("wf_lm75: creating %s device at address 0x%02x\n",
ds1775 ? "ds1775" : "lm75", client->addr);
@@ -164,9 +170,17 @@ static const struct i2c_device_id wf_lm75_id[] = {
};
MODULE_DEVICE_TABLE(i2c, wf_lm75_id);
+static const struct of_device_id wf_lm75_of_id[] = {
+ { .compatible = "lm75", .data = (void *)0},
+ { .compatible = "ds1775", .data = (void *)1 },
+ { }
+};
+MODULE_DEVICE_TABLE(of, wf_lm75_of_id);
+
static struct i2c_driver wf_lm75_driver = {
.driver = {
.name = "wf_lm75",
+ .of_match_table = wf_lm75_of_id,
},
.probe = wf_lm75_probe,
.remove = wf_lm75_remove,
diff --git a/drivers/macintosh/windfarm_lm87_sensor.c b/drivers/macintosh/windfarm_lm87_sensor.c
index b03a33b803b7..d011899c0a8a 100644
--- a/drivers/macintosh/windfarm_lm87_sensor.c
+++ b/drivers/macintosh/windfarm_lm87_sensor.c
@@ -166,9 +166,16 @@ static const struct i2c_device_id wf_lm87_id[] = {
};
MODULE_DEVICE_TABLE(i2c, wf_lm87_id);
+static const struct of_device_id wf_lm87_of_id[] = {
+ { .compatible = "lm87cimt", },
+ { }
+};
+MODULE_DEVICE_TABLE(of, wf_lm87_of_id);
+
static struct i2c_driver wf_lm87_driver = {
.driver = {
.name = "wf_lm87",
+ .of_match_table = wf_lm87_of_id,
},
.probe = wf_lm87_probe,
.remove = wf_lm87_remove,
diff --git a/drivers/macintosh/windfarm_max6690_sensor.c b/drivers/macintosh/windfarm_max6690_sensor.c
index e666cc020683..1e7b03d44ad9 100644
--- a/drivers/macintosh/windfarm_max6690_sensor.c
+++ b/drivers/macintosh/windfarm_max6690_sensor.c
@@ -120,9 +120,16 @@ static const struct i2c_device_id wf_max6690_id[] = {
};
MODULE_DEVICE_TABLE(i2c, wf_max6690_id);
+static const struct of_device_id wf_max6690_of_id[] = {
+ { .compatible = "max6690", },
+ { }
+};
+MODULE_DEVICE_TABLE(of, wf_max6690_of_id);
+
static struct i2c_driver wf_max6690_driver = {
.driver = {
.name = "wf_max6690",
+ .of_match_table = wf_max6690_of_id,
},
.probe = wf_max6690_probe,
.remove = wf_max6690_remove,
diff --git a/drivers/macintosh/windfarm_smu_sat.c b/drivers/macintosh/windfarm_smu_sat.c
index c84ec49c3741..cb75dc035616 100644
--- a/drivers/macintosh/windfarm_smu_sat.c
+++ b/drivers/macintosh/windfarm_smu_sat.c
@@ -341,9 +341,16 @@ static const struct i2c_device_id wf_sat_id[] = {
};
MODULE_DEVICE_TABLE(i2c, wf_sat_id);
+static const struct of_device_id wf_sat_of_id[] = {
+ { .compatible = "smu-sat", },
+ { }
+};
+MODULE_DEVICE_TABLE(of, wf_sat_of_id);
+
static struct i2c_driver wf_sat_driver = {
.driver = {
.name = "wf_smu_sat",
+ .of_match_table = wf_sat_of_id,
},
.probe = wf_sat_probe,
.remove = wf_sat_remove,
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index fa872df4e770..72856e5f23a3 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -101,64 +101,6 @@
#define insert_lock(s, b) ((b)->level <= (s)->lock)
-/*
- * These macros are for recursing down the btree - they handle the details of
- * locking and looking up nodes in the cache for you. They're best treated as
- * mere syntax when reading code that uses them.
- *
- * op->lock determines whether we take a read or a write lock at a given depth.
- * If you've got a read lock and find that you need a write lock (i.e. you're
- * going to have to split), set op->lock and return -EINTR; btree_root() will
- * call you again and you'll have the correct lock.
- */
-
-/**
- * btree - recurse down the btree on a specified key
- * @fn: function to call, which will be passed the child node
- * @key: key to recurse on
- * @b: parent btree node
- * @op: pointer to struct btree_op
- */
-#define btree(fn, key, b, op, ...) \
-({ \
- int _r, l = (b)->level - 1; \
- bool _w = l <= (op)->lock; \
- struct btree *_child = bch_btree_node_get((b)->c, op, key, l, \
- _w, b); \
- if (!IS_ERR(_child)) { \
- _r = bch_btree_ ## fn(_child, op, ##__VA_ARGS__); \
- rw_unlock(_w, _child); \
- } else \
- _r = PTR_ERR(_child); \
- _r; \
-})
-
-/**
- * btree_root - call a function on the root of the btree
- * @fn: function to call, which will be passed the child node
- * @c: cache set
- * @op: pointer to struct btree_op
- */
-#define btree_root(fn, c, op, ...) \
-({ \
- int _r = -EINTR; \
- do { \
- struct btree *_b = (c)->root; \
- bool _w = insert_lock(op, _b); \
- rw_lock(_w, _b, _b->level); \
- if (_b == (c)->root && \
- _w == insert_lock(op, _b)) { \
- _r = bch_btree_ ## fn(_b, op, ##__VA_ARGS__); \
- } \
- rw_unlock(_w, _b); \
- bch_cannibalize_unlock(c); \
- if (_r == -EINTR) \
- schedule(); \
- } while (_r == -EINTR); \
- \
- finish_wait(&(c)->btree_cache_wait, &(op)->wait); \
- _r; \
-})
static inline struct bset *write_block(struct btree *b)
{
@@ -1848,7 +1790,7 @@ static void bch_btree_gc(struct cache_set *c)
/* if CACHE_SET_IO_DISABLE set, gc thread should stop too */
do {
- ret = btree_root(gc_root, c, &op, &writes, &stats);
+ ret = bcache_btree_root(gc_root, c, &op, &writes, &stats);
closure_sync(&writes);
cond_resched();
@@ -1946,7 +1888,7 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op)
}
if (p)
- ret = btree(check_recurse, p, b, op);
+ ret = bcache_btree(check_recurse, p, b, op);
p = k;
} while (p && !ret);
@@ -1955,13 +1897,176 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op)
return ret;
}
+
+static int bch_btree_check_thread(void *arg)
+{
+ int ret;
+ struct btree_check_info *info = arg;
+ struct btree_check_state *check_state = info->state;
+ struct cache_set *c = check_state->c;
+ struct btree_iter iter;
+ struct bkey *k, *p;
+ int cur_idx, prev_idx, skip_nr;
+ int i, n;
+
+ k = p = NULL;
+ i = n = 0;
+ cur_idx = prev_idx = 0;
+ ret = 0;
+
+ /* root node keys are checked before thread created */
+ bch_btree_iter_init(&c->root->keys, &iter, NULL);
+ k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad);
+ BUG_ON(!k);
+
+ p = k;
+ while (k) {
+ /*
+ * Fetch a root node key index, skip the keys which
+ * should be fetched by other threads, then check the
+ * sub-tree indexed by the fetched key.
+ */
+ spin_lock(&check_state->idx_lock);
+ cur_idx = check_state->key_idx;
+ check_state->key_idx++;
+ spin_unlock(&check_state->idx_lock);
+
+ skip_nr = cur_idx - prev_idx;
+
+ while (skip_nr) {
+ k = bch_btree_iter_next_filter(&iter,
+ &c->root->keys,
+ bch_ptr_bad);
+ if (k)
+ p = k;
+ else {
+ /*
+ * No more keys to check in root node,
+ * current checking threads are enough,
+ * stop creating more.
+ */
+ atomic_set(&check_state->enough, 1);
+ /* Update check_state->enough earlier */
+ smp_mb__after_atomic();
+ goto out;
+ }
+ skip_nr--;
+ cond_resched();
+ }
+
+ if (p) {
+ struct btree_op op;
+
+ btree_node_prefetch(c->root, p);
+ c->gc_stats.nodes++;
+ bch_btree_op_init(&op, 0);
+ ret = bcache_btree(check_recurse, p, c->root, &op);
+ if (ret)
+ goto out;
+ }
+ p = NULL;
+ prev_idx = cur_idx;
+ cond_resched();
+ }
+
+out:
+ info->result = ret;
+ /* update check_state->started among all CPUs */
+ smp_mb__before_atomic();
+ if (atomic_dec_and_test(&check_state->started))
+ wake_up(&check_state->wait);
+
+ return ret;
+}
+
+
+
+static int bch_btree_chkthread_nr(void)
+{
+ int n = num_online_cpus()/2;
+
+ if (n == 0)
+ n = 1;
+ else if (n > BCH_BTR_CHKTHREAD_MAX)
+ n = BCH_BTR_CHKTHREAD_MAX;
+
+ return n;
+}
+
int bch_btree_check(struct cache_set *c)
{
- struct btree_op op;
+ int ret = 0;
+ int i;
+ struct bkey *k = NULL;
+ struct btree_iter iter;
+ struct btree_check_state *check_state;
+ char name[32];
- bch_btree_op_init(&op, SHRT_MAX);
+ /* check and mark root node keys */
+ for_each_key_filter(&c->root->keys, k, &iter, bch_ptr_invalid)
+ bch_initial_mark_key(c, c->root->level, k);
+
+ bch_initial_mark_key(c, c->root->level + 1, &c->root->key);
+
+ if (c->root->level == 0)
+ return 0;
+
+ check_state = kzalloc(sizeof(struct btree_check_state), GFP_KERNEL);
+ if (!check_state)
+ return -ENOMEM;
- return btree_root(check_recurse, c, &op);
+ check_state->c = c;
+ check_state->total_threads = bch_btree_chkthread_nr();
+ check_state->key_idx = 0;
+ spin_lock_init(&check_state->idx_lock);
+ atomic_set(&check_state->started, 0);
+ atomic_set(&check_state->enough, 0);
+ init_waitqueue_head(&check_state->wait);
+
+ /*
+ * Run multiple threads to check btree nodes in parallel,
+ * if check_state->enough is non-zero, it means current
+ * running check threads are enough, unncessary to create
+ * more.
+ */
+ for (i = 0; i < check_state->total_threads; i++) {
+ /* fetch latest check_state->enough earlier */
+ smp_mb__before_atomic();
+ if (atomic_read(&check_state->enough))
+ break;
+
+ check_state->infos[i].result = 0;
+ check_state->infos[i].state = check_state;
+ snprintf(name, sizeof(name), "bch_btrchk[%u]", i);
+ atomic_inc(&check_state->started);
+
+ check_state->infos[i].thread =
+ kthread_run(bch_btree_check_thread,
+ &check_state->infos[i],
+ name);
+ if (IS_ERR(check_state->infos[i].thread)) {
+ pr_err("fails to run thread bch_btrchk[%d]", i);
+ for (--i; i >= 0; i--)
+ kthread_stop(check_state->infos[i].thread);
+ ret = -ENOMEM;
+ goto out;
+ }
+ }
+
+ wait_event_interruptible(check_state->wait,
+ atomic_read(&check_state->started) == 0 ||
+ test_bit(CACHE_SET_IO_DISABLE, &c->flags));
+
+ for (i = 0; i < check_state->total_threads; i++) {
+ if (check_state->infos[i].result) {
+ ret = check_state->infos[i].result;
+ goto out;
+ }
+ }
+
+out:
+ kfree(check_state);
+ return ret;
}
void bch_initial_gc_finish(struct cache_set *c)
@@ -2401,7 +2506,7 @@ static int bch_btree_map_nodes_recurse(struct btree *b, struct btree_op *op,
while ((k = bch_btree_iter_next_filter(&iter, &b->keys,
bch_ptr_bad))) {
- ret = btree(map_nodes_recurse, k, b,
+ ret = bcache_btree(map_nodes_recurse, k, b,
op, from, fn, flags);
from = NULL;
@@ -2419,10 +2524,10 @@ static int bch_btree_map_nodes_recurse(struct btree *b, struct btree_op *op,
int __bch_btree_map_nodes(struct btree_op *op, struct cache_set *c,
struct bkey *from, btree_map_nodes_fn *fn, int flags)
{
- return btree_root(map_nodes_recurse, c, op, from, fn, flags);
+ return bcache_btree_root(map_nodes_recurse, c, op, from, fn, flags);
}
-static int bch_btree_map_keys_recurse(struct btree *b, struct btree_op *op,
+int bch_btree_map_keys_recurse(struct btree *b, struct btree_op *op,
struct bkey *from, btree_map_keys_fn *fn,
int flags)
{
@@ -2435,7 +2540,8 @@ static int bch_btree_map_keys_recurse(struct btree *b, struct btree_op *op,
while ((k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad))) {
ret = !b->level
? fn(op, b, k)
- : btree(map_keys_recurse, k, b, op, from, fn, flags);
+ : bcache_btree(map_keys_recurse, k,
+ b, op, from, fn, flags);
from = NULL;
if (ret != MAP_CONTINUE)
@@ -2452,7 +2558,7 @@ static int bch_btree_map_keys_recurse(struct btree *b, struct btree_op *op,
int bch_btree_map_keys(struct btree_op *op, struct cache_set *c,
struct bkey *from, btree_map_keys_fn *fn, int flags)
{
- return btree_root(map_keys_recurse, c, op, from, fn, flags);
+ return bcache_btree_root(map_keys_recurse, c, op, from, fn, flags);
}
/* Keybuf code */
diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h
index f4dcca449391..257969980c49 100644
--- a/drivers/md/bcache/btree.h
+++ b/drivers/md/bcache/btree.h
@@ -145,6 +145,9 @@ struct btree {
struct bio *bio;
};
+
+
+
#define BTREE_FLAG(flag) \
static inline bool btree_node_ ## flag(struct btree *b) \
{ return test_bit(BTREE_NODE_ ## flag, &b->flags); } \
@@ -216,6 +219,25 @@ struct btree_op {
unsigned int insert_collision:1;
};
+struct btree_check_state;
+struct btree_check_info {
+ struct btree_check_state *state;
+ struct task_struct *thread;
+ int result;
+};
+
+#define BCH_BTR_CHKTHREAD_MAX 64
+struct btree_check_state {
+ struct cache_set *c;
+ int total_threads;
+ int key_idx;
+ spinlock_t idx_lock;
+ atomic_t started;
+ atomic_t enough;
+ wait_queue_head_t wait;
+ struct btree_check_info infos[BCH_BTR_CHKTHREAD_MAX];
+};
+
static inline void bch_btree_op_init(struct btree_op *op, int write_lock_level)
{
memset(op, 0, sizeof(struct btree_op));
@@ -284,6 +306,65 @@ static inline void force_wake_up_gc(struct cache_set *c)
wake_up_gc(c);
}
+/*
+ * These macros are for recursing down the btree - they handle the details of
+ * locking and looking up nodes in the cache for you. They're best treated as
+ * mere syntax when reading code that uses them.
+ *
+ * op->lock determines whether we take a read or a write lock at a given depth.
+ * If you've got a read lock and find that you need a write lock (i.e. you're
+ * going to have to split), set op->lock and return -EINTR; btree_root() will
+ * call you again and you'll have the correct lock.
+ */
+
+/**
+ * btree - recurse down the btree on a specified key
+ * @fn: function to call, which will be passed the child node
+ * @key: key to recurse on
+ * @b: parent btree node
+ * @op: pointer to struct btree_op
+ */
+#define bcache_btree(fn, key, b, op, ...) \
+({ \
+ int _r, l = (b)->level - 1; \
+ bool _w = l <= (op)->lock; \
+ struct btree *_child = bch_btree_node_get((b)->c, op, key, l, \
+ _w, b); \
+ if (!IS_ERR(_child)) { \
+ _r = bch_btree_ ## fn(_child, op, ##__VA_ARGS__); \
+ rw_unlock(_w, _child); \
+ } else \
+ _r = PTR_ERR(_child); \
+ _r; \
+})
+
+/**
+ * btree_root - call a function on the root of the btree
+ * @fn: function to call, which will be passed the child node
+ * @c: cache set
+ * @op: pointer to struct btree_op
+ */
+#define bcache_btree_root(fn, c, op, ...) \
+({ \
+ int _r = -EINTR; \
+ do { \
+ struct btree *_b = (c)->root; \
+ bool _w = insert_lock(op, _b); \
+ rw_lock(_w, _b, _b->level); \
+ if (_b == (c)->root && \
+ _w == insert_lock(op, _b)) { \
+ _r = bch_btree_ ## fn(_b, op, ##__VA_ARGS__); \
+ } \
+ rw_unlock(_w, _b); \
+ bch_cannibalize_unlock(c); \
+ if (_r == -EINTR) \
+ schedule(); \
+ } while (_r == -EINTR); \
+ \
+ finish_wait(&(c)->btree_cache_wait, &(op)->wait); \
+ _r; \
+})
+
#define MAP_DONE 0
#define MAP_CONTINUE 1
@@ -314,6 +395,9 @@ typedef int (btree_map_keys_fn)(struct btree_op *op, struct btree *b,
struct bkey *k);
int bch_btree_map_keys(struct btree_op *op, struct cache_set *c,
struct bkey *from, btree_map_keys_fn *fn, int flags);
+int bch_btree_map_keys_recurse(struct btree *b, struct btree_op *op,
+ struct bkey *from, btree_map_keys_fn *fn,
+ int flags);
typedef bool (keybuf_pred_fn)(struct keybuf *buf, struct bkey *k);
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 820d8402a1dc..71a90fbec314 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -1161,8 +1161,7 @@ static void quit_max_writeback_rate(struct cache_set *c,
/* Cached devices - read & write stuff */
-static blk_qc_t cached_dev_make_request(struct request_queue *q,
- struct bio *bio)
+blk_qc_t cached_dev_make_request(struct request_queue *q, struct bio *bio)
{
struct search *s;
struct bcache_device *d = bio->bi_disk->private_data;
@@ -1266,7 +1265,6 @@ void bch_cached_dev_request_init(struct cached_dev *dc)
{
struct gendisk *g = dc->disk.disk;
- g->queue->make_request_fn = cached_dev_make_request;
g->queue->backing_dev_info->congested_fn = cached_dev_congested;
dc->disk.cache_miss = cached_dev_cache_miss;
dc->disk.ioctl = cached_dev_ioctl;
@@ -1301,8 +1299,7 @@ static void flash_dev_nodata(struct closure *cl)
continue_at(cl, search_free, NULL);
}
-static blk_qc_t flash_dev_make_request(struct request_queue *q,
- struct bio *bio)
+blk_qc_t flash_dev_make_request(struct request_queue *q, struct bio *bio)
{
struct search *s;
struct closure *cl;
diff --git a/drivers/md/bcache/request.h b/drivers/md/bcache/request.h
index c64dbd7a91aa..bb005c93dd72 100644
--- a/drivers/md/bcache/request.h
+++ b/drivers/md/bcache/request.h
@@ -37,7 +37,10 @@ unsigned int bch_get_congested(const struct cache_set *c);
void bch_data_insert(struct closure *cl);
void bch_cached_dev_request_init(struct cached_dev *dc);
+blk_qc_t cached_dev_make_request(struct request_queue *q, struct bio *bio);
+
void bch_flash_dev_request_init(struct bcache_device *d);
+blk_qc_t flash_dev_make_request(struct request_queue *q, struct bio *bio);
extern struct kmem_cache *bch_search_cache;
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 0c3c5419c52b..d98354fa28e3 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -816,7 +816,7 @@ static void bcache_device_free(struct bcache_device *d)
}
static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
- sector_t sectors)
+ sector_t sectors, make_request_fn make_request_fn)
{
struct request_queue *q;
const size_t max_stripes = min_t(size_t, INT_MAX,
@@ -866,11 +866,10 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
d->disk->fops = &bcache_ops;
d->disk->private_data = d;
- q = blk_alloc_queue(GFP_KERNEL);
+ q = blk_alloc_queue(make_request_fn, NUMA_NO_NODE);
if (!q)
return -ENOMEM;
- blk_queue_make_request(q, NULL);
d->disk->queue = q;
q->queuedata = d;
q->backing_dev_info->congested_data = d;
@@ -1339,7 +1338,8 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size)
q->limits.raid_partial_stripes_expensive;
ret = bcache_device_init(&dc->disk, block_size,
- dc->bdev->bd_part->nr_sects - dc->sb.data_offset);
+ dc->bdev->bd_part->nr_sects - dc->sb.data_offset,
+ cached_dev_make_request);
if (ret)
return ret;
@@ -1451,7 +1451,8 @@ static int flash_dev_run(struct cache_set *c, struct uuid_entry *u)
kobject_init(&d->kobj, &bch_flash_dev_ktype);
- if (bcache_device_init(d, block_bytes(c), u->sectors))
+ if (bcache_device_init(d, block_bytes(c), u->sectors,
+ flash_dev_make_request))
goto err;
bcache_device_attach(d, c, u - c->uuids);
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 3470fae4eabc..323276994aab 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -154,7 +154,7 @@ static ssize_t bch_snprint_string_list(char *buf,
size_t i;
for (i = 0; list[i]; i++)
- out += snprintf(out, buf + size - out,
+ out += scnprintf(out, buf + size - out,
i == selected ? "[%s] " : "%s ", list[i]);
out[-1] = '\n';
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 4a40f9eadeaf..3f7641fb28d5 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -183,7 +183,7 @@ static void update_writeback_rate(struct work_struct *work)
*/
set_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags);
/* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
- smp_mb();
+ smp_mb__after_atomic();
/*
* CACHE_SET_IO_DISABLE might be set via sysfs interface,
@@ -193,7 +193,7 @@ static void update_writeback_rate(struct work_struct *work)
test_bit(CACHE_SET_IO_DISABLE, &c->flags)) {
clear_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags);
/* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
- smp_mb();
+ smp_mb__after_atomic();
return;
}
@@ -229,7 +229,7 @@ static void update_writeback_rate(struct work_struct *work)
*/
clear_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags);
/* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
- smp_mb();
+ smp_mb__after_atomic();
}
static unsigned int writeback_delay(struct cached_dev *dc,
@@ -785,7 +785,9 @@ static int sectors_dirty_init_fn(struct btree_op *_op, struct btree *b,
return MAP_CONTINUE;
}
-void bch_sectors_dirty_init(struct bcache_device *d)
+static int bch_root_node_dirty_init(struct cache_set *c,
+ struct bcache_device *d,
+ struct bkey *k)
{
struct sectors_dirty_init op;
int ret;
@@ -796,8 +798,13 @@ void bch_sectors_dirty_init(struct bcache_device *d)
op.start = KEY(op.inode, 0, 0);
do {
- ret = bch_btree_map_keys(&op.op, d->c, &op.start,
- sectors_dirty_init_fn, 0);
+ ret = bcache_btree(map_keys_recurse,
+ k,
+ c->root,
+ &op.op,
+ &op.start,
+ sectors_dirty_init_fn,
+ 0);
if (ret == -EAGAIN)
schedule_timeout_interruptible(
msecs_to_jiffies(INIT_KEYS_SLEEP_MS));
@@ -806,6 +813,151 @@ void bch_sectors_dirty_init(struct bcache_device *d)
break;
}
} while (ret == -EAGAIN);
+
+ return ret;
+}
+
+static int bch_dirty_init_thread(void *arg)
+{
+ struct dirty_init_thrd_info *info = arg;
+ struct bch_dirty_init_state *state = info->state;
+ struct cache_set *c = state->c;
+ struct btree_iter iter;
+ struct bkey *k, *p;
+ int cur_idx, prev_idx, skip_nr;
+ int i;
+
+ k = p = NULL;
+ i = 0;
+ cur_idx = prev_idx = 0;
+
+ bch_btree_iter_init(&c->root->keys, &iter, NULL);
+ k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad);
+ BUG_ON(!k);
+
+ p = k;
+
+ while (k) {
+ spin_lock(&state->idx_lock);
+ cur_idx = state->key_idx;
+ state->key_idx++;
+ spin_unlock(&state->idx_lock);
+
+ skip_nr = cur_idx - prev_idx;
+
+ while (skip_nr) {
+ k = bch_btree_iter_next_filter(&iter,
+ &c->root->keys,
+ bch_ptr_bad);
+ if (k)
+ p = k;
+ else {
+ atomic_set(&state->enough, 1);
+ /* Update state->enough earlier */
+ smp_mb__after_atomic();
+ goto out;
+ }
+ skip_nr--;
+ cond_resched();
+ }
+
+ if (p) {
+ if (bch_root_node_dirty_init(c, state->d, p) < 0)
+ goto out;
+ }
+
+ p = NULL;
+ prev_idx = cur_idx;
+ cond_resched();
+ }
+
+out:
+ /* In order to wake up state->wait in time */
+ smp_mb__before_atomic();
+ if (atomic_dec_and_test(&state->started))
+ wake_up(&state->wait);
+
+ return 0;
+}
+
+static int bch_btre_dirty_init_thread_nr(void)
+{
+ int n = num_online_cpus()/2;
+
+ if (n == 0)
+ n = 1;
+ else if (n > BCH_DIRTY_INIT_THRD_MAX)
+ n = BCH_DIRTY_INIT_THRD_MAX;
+
+ return n;
+}
+
+void bch_sectors_dirty_init(struct bcache_device *d)
+{
+ int i;
+ struct bkey *k = NULL;
+ struct btree_iter iter;
+ struct sectors_dirty_init op;
+ struct cache_set *c = d->c;
+ struct bch_dirty_init_state *state;
+ char name[32];
+
+ /* Just count root keys if no leaf node */
+ if (c->root->level == 0) {
+ bch_btree_op_init(&op.op, -1);
+ op.inode = d->id;
+ op.count = 0;
+ op.start = KEY(op.inode, 0, 0);
+
+ for_each_key_filter(&c->root->keys,
+ k, &iter, bch_ptr_invalid)
+ sectors_dirty_init_fn(&op.op, c->root, k);
+ return;
+ }
+
+ state = kzalloc(sizeof(struct bch_dirty_init_state), GFP_KERNEL);
+ if (!state) {
+ pr_warn("sectors dirty init failed: cannot allocate memory");
+ return;
+ }
+
+ state->c = c;
+ state->d = d;
+ state->total_threads = bch_btre_dirty_init_thread_nr();
+ state->key_idx = 0;
+ spin_lock_init(&state->idx_lock);
+ atomic_set(&state->started, 0);
+ atomic_set(&state->enough, 0);
+ init_waitqueue_head(&state->wait);
+
+ for (i = 0; i < state->total_threads; i++) {
+ /* Fetch latest state->enough earlier */
+ smp_mb__before_atomic();
+ if (atomic_read(&state->enough))
+ break;
+
+ state->infos[i].state = state;
+ atomic_inc(&state->started);
+ snprintf(name, sizeof(name), "bch_dirty_init[%d]", i);
+
+ state->infos[i].thread =
+ kthread_run(bch_dirty_init_thread,
+ &state->infos[i],
+ name);
+ if (IS_ERR(state->infos[i].thread)) {
+ pr_err("fails to run thread bch_dirty_init[%d]", i);
+ for (--i; i >= 0; i--)
+ kthread_stop(state->infos[i].thread);
+ goto out;
+ }
+ }
+
+ wait_event_interruptible(state->wait,
+ atomic_read(&state->started) == 0 ||
+ test_bit(CACHE_SET_IO_DISABLE, &c->flags));
+
+out:
+ kfree(state);
}
void bch_cached_dev_writeback_init(struct cached_dev *dc)
diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h
index 4e4c6810dc3c..b029843ce5b6 100644
--- a/drivers/md/bcache/writeback.h
+++ b/drivers/md/bcache/writeback.h
@@ -16,6 +16,7 @@
#define BCH_AUTO_GC_DIRTY_THRESHOLD 50
+#define BCH_DIRTY_INIT_THRD_MAX 64
/*
* 14 (16384ths) is chosen here as something that each backing device
* should be a reasonable fraction of the share, and not to blow up
@@ -23,6 +24,24 @@
*/
#define WRITEBACK_SHARE_SHIFT 14
+struct bch_dirty_init_state;
+struct dirty_init_thrd_info {
+ struct bch_dirty_init_state *state;
+ struct task_struct *thread;
+};
+
+struct bch_dirty_init_state {
+ struct cache_set *c;
+ struct bcache_device *d;
+ int total_threads;
+ int key_idx;
+ spinlock_t idx_lock;
+ atomic_t started;
+ atomic_t enough;
+ wait_queue_head_t wait;
+ struct dirty_init_thrd_info infos[BCH_DIRTY_INIT_THRD_MAX];
+};
+
static inline uint64_t bcache_dev_sectors_dirty(struct bcache_device *d)
{
uint64_t i, ret = 0;
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 0413018c8305..753302e83910 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -25,6 +25,7 @@
#include <linux/wait.h>
#include <linux/pr.h>
#include <linux/refcount.h>
+#include <linux/part_stat.h>
#define DM_MSG_PREFIX "core"
@@ -1938,16 +1939,15 @@ static struct mapped_device *alloc_dev(int minor)
INIT_LIST_HEAD(&md->table_devices);
spin_lock_init(&md->uevent_lock);
- md->queue = blk_alloc_queue_node(GFP_KERNEL, numa_node_id);
- if (!md->queue)
- goto bad;
- md->queue->queuedata = md;
/*
* default to bio-based required ->make_request_fn until DM
* table is loaded and md->type established. If request-based
* table is loaded: blk-mq will override accordingly.
*/
- blk_queue_make_request(md->queue, dm_make_request);
+ md->queue = blk_alloc_queue(dm_make_request, numa_node_id);
+ if (!md->queue)
+ goto bad;
+ md->queue->queuedata = md;
md->disk = alloc_disk_node(1, md->numa_node_id);
if (!md->disk)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 469f551863be..271e8a587354 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -58,8 +58,10 @@
#include <linux/delay.h>
#include <linux/raid/md_p.h>
#include <linux/raid/md_u.h>
+#include <linux/raid/detect.h>
#include <linux/slab.h>
#include <linux/percpu-refcount.h>
+#include <linux/part_stat.h>
#include <trace/events/block.h>
#include "md.h"
@@ -2491,12 +2493,12 @@ static int lock_rdev(struct md_rdev *rdev, dev_t dev, int shared)
{
int err = 0;
struct block_device *bdev;
- char b[BDEVNAME_SIZE];
bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
shared ? (struct md_rdev *)lock_rdev : rdev);
if (IS_ERR(bdev)) {
- pr_warn("md: could not open %s.\n", __bdevname(dev, b));
+ pr_warn("md: could not open device unknown-block(%u,%u).\n",
+ MAJOR(dev), MINOR(dev));
return PTR_ERR(bdev);
}
rdev->bdev = bdev;
@@ -5621,12 +5623,11 @@ static int md_alloc(dev_t dev, char *name)
mddev->hold_active = UNTIL_STOP;
error = -ENOMEM;
- mddev->queue = blk_alloc_queue(GFP_KERNEL);
+ mddev->queue = blk_alloc_queue(md_make_request, NUMA_NO_NODE);
if (!mddev->queue)
goto abort;
mddev->queue->queuedata = mddev;
- blk_queue_make_request(mddev->queue, md_make_request);
blk_set_stacking_limits(&mddev->queue->limits);
disk = alloc_disk(1 << shift);
@@ -6184,7 +6185,7 @@ EXPORT_SYMBOL_GPL(md_stop_writes);
static void mddev_detach(struct mddev *mddev)
{
md_bitmap_wait_behind_writes(mddev);
- if (mddev->pers && mddev->pers->quiesce) {
+ if (mddev->pers && mddev->pers->quiesce && !mddev->suspended) {
mddev->pers->quiesce(mddev, 1);
mddev->pers->quiesce(mddev, 0);
}
diff --git a/drivers/misc/cardreader/rts5227.c b/drivers/misc/cardreader/rts5227.c
index 4feed296a327..423fecc19fc4 100644
--- a/drivers/misc/cardreader/rts5227.c
+++ b/drivers/misc/cardreader/rts5227.c
@@ -394,7 +394,7 @@ static const struct pcr_ops rts522a_pcr_ops = {
void rts522a_init_params(struct rtsx_pcr *pcr)
{
rts5227_init_params(pcr);
-
+ pcr->tx_initial_phase = SET_CLOCK_PHASE(20, 20, 11);
pcr->reg_pm_ctrl3 = RTS522A_PM_CTRL3;
pcr->option.ocp_en = 1;
diff --git a/drivers/misc/cardreader/rts5249.c b/drivers/misc/cardreader/rts5249.c
index db936e4d6e56..1a81cda948c1 100644
--- a/drivers/misc/cardreader/rts5249.c
+++ b/drivers/misc/cardreader/rts5249.c
@@ -618,6 +618,7 @@ static const struct pcr_ops rts524a_pcr_ops = {
void rts524a_init_params(struct rtsx_pcr *pcr)
{
rts5249_init_params(pcr);
+ pcr->tx_initial_phase = SET_CLOCK_PHASE(27, 29, 11);
pcr->option.ltr_l1off_sspwrgate = LTR_L1OFF_SSPWRGATE_5250_DEF;
pcr->option.ltr_l1off_snooze_sspwrgate =
LTR_L1OFF_SNOOZE_SSPWRGATE_5250_DEF;
@@ -733,6 +734,7 @@ static const struct pcr_ops rts525a_pcr_ops = {
void rts525a_init_params(struct rtsx_pcr *pcr)
{
rts5249_init_params(pcr);
+ pcr->tx_initial_phase = SET_CLOCK_PHASE(25, 29, 11);
pcr->option.ltr_l1off_sspwrgate = LTR_L1OFF_SSPWRGATE_5250_DEF;
pcr->option.ltr_l1off_snooze_sspwrgate =
LTR_L1OFF_SNOOZE_SSPWRGATE_5250_DEF;
diff --git a/drivers/misc/cardreader/rts5260.c b/drivers/misc/cardreader/rts5260.c
index 4214f02a17fd..711054ebad74 100644
--- a/drivers/misc/cardreader/rts5260.c
+++ b/drivers/misc/cardreader/rts5260.c
@@ -662,7 +662,7 @@ void rts5260_init_params(struct rtsx_pcr *pcr)
pcr->sd30_drive_sel_1v8 = CFG_DRIVER_TYPE_B;
pcr->sd30_drive_sel_3v3 = CFG_DRIVER_TYPE_B;
pcr->aspm_en = ASPM_L1_EN;
- pcr->tx_initial_phase = SET_CLOCK_PHASE(1, 29, 16);
+ pcr->tx_initial_phase = SET_CLOCK_PHASE(27, 29, 11);
pcr->rx_initial_phase = SET_CLOCK_PHASE(24, 6, 5);
pcr->ic_version = rts5260_get_ic_version(pcr);
diff --git a/drivers/misc/cardreader/rts5261.c b/drivers/misc/cardreader/rts5261.c
index bc4967a6efa1..78c3b1d424c3 100644
--- a/drivers/misc/cardreader/rts5261.c
+++ b/drivers/misc/cardreader/rts5261.c
@@ -764,7 +764,7 @@ void rts5261_init_params(struct rtsx_pcr *pcr)
pcr->sd30_drive_sel_1v8 = CFG_DRIVER_TYPE_B;
pcr->sd30_drive_sel_3v3 = CFG_DRIVER_TYPE_B;
pcr->aspm_en = ASPM_L1_EN;
- pcr->tx_initial_phase = SET_CLOCK_PHASE(20, 27, 16);
+ pcr->tx_initial_phase = SET_CLOCK_PHASE(27, 27, 11);
pcr->rx_initial_phase = SET_CLOCK_PHASE(24, 6, 5);
pcr->ic_version = rts5261_get_ic_version(pcr);
diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c
index 031eb64549af..282c9ef68ed2 100644
--- a/drivers/misc/eeprom/at24.c
+++ b/drivers/misc/eeprom/at24.c
@@ -712,13 +712,14 @@ static int at24_probe(struct i2c_client *client)
* chip is functional.
*/
err = at24_read(at24, 0, &test_byte, 1);
- pm_runtime_idle(dev);
if (err) {
pm_runtime_disable(dev);
regulator_disable(at24->vcc_reg);
return -ENODEV;
}
+ pm_runtime_idle(dev);
+
if (writable)
dev_info(dev, "%u byte %s EEPROM, writable, %u bytes/write\n",
byte_len, client->name, at24->write_max);
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index aa54d359dab7..a971c4bcc442 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -1732,8 +1732,11 @@ static int mmc_do_erase(struct mmc_card *card, unsigned int from,
* the erase operation does not exceed the max_busy_timeout, we should
* use R1B response. Or we need to prevent the host from doing hw busy
* detection, which is done by converting to a R1 response instead.
+ * Note, some hosts requires R1B, which also means they are on their own
+ * when it comes to deal with the busy timeout.
*/
- if (card->host->max_busy_timeout &&
+ if (!(card->host->caps & MMC_CAP_NEED_RSP_BUSY) &&
+ card->host->max_busy_timeout &&
busy_timeout > card->host->max_busy_timeout) {
cmd.flags = MMC_RSP_SPI_R1 | MMC_RSP_R1 | MMC_CMD_AC;
} else {
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index f6912ded652d..de14b5845f52 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -1910,9 +1910,12 @@ static int mmc_sleep(struct mmc_host *host)
* If the max_busy_timeout of the host is specified, validate it against
* the sleep cmd timeout. A failure means we need to prevent the host
* from doing hw busy detection, which is done by converting to a R1
- * response instead of a R1B.
+ * response instead of a R1B. Note, some hosts requires R1B, which also
+ * means they are on their own when it comes to deal with the busy
+ * timeout.
*/
- if (host->max_busy_timeout && (timeout_ms > host->max_busy_timeout)) {
+ if (!(host->caps & MMC_CAP_NEED_RSP_BUSY) && host->max_busy_timeout &&
+ (timeout_ms > host->max_busy_timeout)) {
cmd.flags = MMC_RSP_R1 | MMC_CMD_AC;
} else {
cmd.flags = MMC_RSP_R1B | MMC_CMD_AC;
diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c
index da425ee2d9bf..e025604e17d4 100644
--- a/drivers/mmc/core/mmc_ops.c
+++ b/drivers/mmc/core/mmc_ops.c
@@ -542,9 +542,11 @@ int __mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value,
* If the max_busy_timeout of the host is specified, make sure it's
* enough to fit the used timeout_ms. In case it's not, let's instruct
* the host to avoid HW busy detection, by converting to a R1 response
- * instead of a R1B.
+ * instead of a R1B. Note, some hosts requires R1B, which also means
+ * they are on their own when it comes to deal with the busy timeout.
*/
- if (host->max_busy_timeout && (timeout_ms > host->max_busy_timeout))
+ if (!(host->caps & MMC_CAP_NEED_RSP_BUSY) && host->max_busy_timeout &&
+ (timeout_ms > host->max_busy_timeout))
use_r1b_resp = false;
cmd.opcode = MMC_SWITCH;
diff --git a/drivers/mmc/host/rtsx_pci_sdmmc.c b/drivers/mmc/host/rtsx_pci_sdmmc.c
index bd50935dc37d..11087976ab19 100644
--- a/drivers/mmc/host/rtsx_pci_sdmmc.c
+++ b/drivers/mmc/host/rtsx_pci_sdmmc.c
@@ -606,19 +606,22 @@ static int sd_change_phase(struct realtek_pci_sdmmc *host,
u8 sample_point, bool rx)
{
struct rtsx_pcr *pcr = host->pcr;
-
+ u16 SD_VP_CTL = 0;
dev_dbg(sdmmc_dev(host), "%s(%s): sample_point = %d\n",
__func__, rx ? "RX" : "TX", sample_point);
rtsx_pci_write_register(pcr, CLK_CTL, CHANGE_CLK, CHANGE_CLK);
- if (rx)
+ if (rx) {
+ SD_VP_CTL = SD_VPRX_CTL;
rtsx_pci_write_register(pcr, SD_VPRX_CTL,
PHASE_SELECT_MASK, sample_point);
- else
+ } else {
+ SD_VP_CTL = SD_VPTX_CTL;
rtsx_pci_write_register(pcr, SD_VPTX_CTL,
PHASE_SELECT_MASK, sample_point);
- rtsx_pci_write_register(pcr, SD_VPCLK0_CTL, PHASE_NOT_RESET, 0);
- rtsx_pci_write_register(pcr, SD_VPCLK0_CTL, PHASE_NOT_RESET,
+ }
+ rtsx_pci_write_register(pcr, SD_VP_CTL, PHASE_NOT_RESET, 0);
+ rtsx_pci_write_register(pcr, SD_VP_CTL, PHASE_NOT_RESET,
PHASE_NOT_RESET);
rtsx_pci_write_register(pcr, CLK_CTL, CHANGE_CLK, 0);
rtsx_pci_write_register(pcr, SD_CFG1, SD_ASYNC_FIFO_NOT_RST, 0);
diff --git a/drivers/mmc/host/sdhci-acpi.c b/drivers/mmc/host/sdhci-acpi.c
index 9651dca6863e..2a2173d953f5 100644
--- a/drivers/mmc/host/sdhci-acpi.c
+++ b/drivers/mmc/host/sdhci-acpi.c
@@ -23,6 +23,7 @@
#include <linux/pm.h>
#include <linux/pm_runtime.h>
#include <linux/delay.h>
+#include <linux/dmi.h>
#include <linux/mmc/host.h>
#include <linux/mmc/pm.h>
@@ -72,9 +73,16 @@ struct sdhci_acpi_host {
const struct sdhci_acpi_slot *slot;
struct platform_device *pdev;
bool use_runtime_pm;
+ bool is_intel;
+ bool reset_signal_volt_on_suspend;
unsigned long private[0] ____cacheline_aligned;
};
+enum {
+ DMI_QUIRK_RESET_SD_SIGNAL_VOLT_ON_SUSP = BIT(0),
+ DMI_QUIRK_SD_NO_WRITE_PROTECT = BIT(1),
+};
+
static inline void *sdhci_acpi_priv(struct sdhci_acpi_host *c)
{
return (void *)c->private;
@@ -391,6 +399,8 @@ static int intel_probe_slot(struct platform_device *pdev, struct acpi_device *ad
host->mmc_host_ops.start_signal_voltage_switch =
intel_start_signal_voltage_switch;
+ c->is_intel = true;
+
return 0;
}
@@ -647,6 +657,36 @@ static const struct acpi_device_id sdhci_acpi_ids[] = {
};
MODULE_DEVICE_TABLE(acpi, sdhci_acpi_ids);
+static const struct dmi_system_id sdhci_acpi_quirks[] = {
+ {
+ /*
+ * The Lenovo Miix 320-10ICR has a bug in the _PS0 method of
+ * the SHC1 ACPI device, this bug causes it to reprogram the
+ * wrong LDO (DLDO3) to 1.8V if 1.8V modes are used and the
+ * card is (runtime) suspended + resumed. DLDO3 is used for
+ * the LCD and setting it to 1.8V causes the LCD to go black.
+ */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+ DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo MIIX 320-10ICR"),
+ },
+ .driver_data = (void *)DMI_QUIRK_RESET_SD_SIGNAL_VOLT_ON_SUSP,
+ },
+ {
+ /*
+ * The Acer Aspire Switch 10 (SW5-012) microSD slot always
+ * reports the card being write-protected even though microSD
+ * cards do not have a write-protect switch at all.
+ */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Aspire SW5-012"),
+ },
+ .driver_data = (void *)DMI_QUIRK_SD_NO_WRITE_PROTECT,
+ },
+ {} /* Terminating entry */
+};
+
static const struct sdhci_acpi_slot *sdhci_acpi_get_slot(struct acpi_device *adev)
{
const struct sdhci_acpi_uid_slot *u;
@@ -663,17 +703,23 @@ static int sdhci_acpi_probe(struct platform_device *pdev)
struct device *dev = &pdev->dev;
const struct sdhci_acpi_slot *slot;
struct acpi_device *device, *child;
+ const struct dmi_system_id *id;
struct sdhci_acpi_host *c;
struct sdhci_host *host;
struct resource *iomem;
resource_size_t len;
size_t priv_size;
+ int quirks = 0;
int err;
device = ACPI_COMPANION(dev);
if (!device)
return -ENODEV;
+ id = dmi_first_match(sdhci_acpi_quirks);
+ if (id)
+ quirks = (long)id->driver_data;
+
slot = sdhci_acpi_get_slot(device);
/* Power on the SDHCI controller and its children */
@@ -759,6 +805,12 @@ static int sdhci_acpi_probe(struct platform_device *pdev)
dev_warn(dev, "failed to setup card detect gpio\n");
c->use_runtime_pm = false;
}
+
+ if (quirks & DMI_QUIRK_RESET_SD_SIGNAL_VOLT_ON_SUSP)
+ c->reset_signal_volt_on_suspend = true;
+
+ if (quirks & DMI_QUIRK_SD_NO_WRITE_PROTECT)
+ host->mmc->caps2 |= MMC_CAP2_NO_WRITE_PROTECT;
}
err = sdhci_setup_host(host);
@@ -823,17 +875,39 @@ static int sdhci_acpi_remove(struct platform_device *pdev)
return 0;
}
+static void __maybe_unused sdhci_acpi_reset_signal_voltage_if_needed(
+ struct device *dev)
+{
+ struct sdhci_acpi_host *c = dev_get_drvdata(dev);
+ struct sdhci_host *host = c->host;
+
+ if (c->is_intel && c->reset_signal_volt_on_suspend &&
+ host->mmc->ios.signal_voltage != MMC_SIGNAL_VOLTAGE_330) {
+ struct intel_host *intel_host = sdhci_acpi_priv(c);
+ unsigned int fn = INTEL_DSM_V33_SWITCH;
+ u32 result = 0;
+
+ intel_dsm(intel_host, dev, fn, &result);
+ }
+}
+
#ifdef CONFIG_PM_SLEEP
static int sdhci_acpi_suspend(struct device *dev)
{
struct sdhci_acpi_host *c = dev_get_drvdata(dev);
struct sdhci_host *host = c->host;
+ int ret;
if (host->tuning_mode != SDHCI_TUNING_MODE_3)
mmc_retune_needed(host->mmc);
- return sdhci_suspend_host(host);
+ ret = sdhci_suspend_host(host);
+ if (ret)
+ return ret;
+
+ sdhci_acpi_reset_signal_voltage_if_needed(dev);
+ return 0;
}
static int sdhci_acpi_resume(struct device *dev)
@@ -853,11 +927,17 @@ static int sdhci_acpi_runtime_suspend(struct device *dev)
{
struct sdhci_acpi_host *c = dev_get_drvdata(dev);
struct sdhci_host *host = c->host;
+ int ret;
if (host->tuning_mode != SDHCI_TUNING_MODE_3)
mmc_retune_needed(host->mmc);
- return sdhci_runtime_suspend_host(host);
+ ret = sdhci_runtime_suspend_host(host);
+ if (ret)
+ return ret;
+
+ sdhci_acpi_reset_signal_voltage_if_needed(dev);
+ return 0;
}
static int sdhci_acpi_runtime_resume(struct device *dev)
diff --git a/drivers/mmc/host/sdhci-cadence.c b/drivers/mmc/host/sdhci-cadence.c
index 5827d3751b81..e573495f8726 100644
--- a/drivers/mmc/host/sdhci-cadence.c
+++ b/drivers/mmc/host/sdhci-cadence.c
@@ -11,6 +11,7 @@
#include <linux/mmc/host.h>
#include <linux/mmc/mmc.h>
#include <linux/of.h>
+#include <linux/of_device.h>
#include "sdhci-pltfm.h"
@@ -235,6 +236,11 @@ static const struct sdhci_ops sdhci_cdns_ops = {
.set_uhs_signaling = sdhci_cdns_set_uhs_signaling,
};
+static const struct sdhci_pltfm_data sdhci_cdns_uniphier_pltfm_data = {
+ .ops = &sdhci_cdns_ops,
+ .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN,
+};
+
static const struct sdhci_pltfm_data sdhci_cdns_pltfm_data = {
.ops = &sdhci_cdns_ops,
};
@@ -334,6 +340,7 @@ static void sdhci_cdns_hs400_enhanced_strobe(struct mmc_host *mmc,
static int sdhci_cdns_probe(struct platform_device *pdev)
{
struct sdhci_host *host;
+ const struct sdhci_pltfm_data *data;
struct sdhci_pltfm_host *pltfm_host;
struct sdhci_cdns_priv *priv;
struct clk *clk;
@@ -350,8 +357,12 @@ static int sdhci_cdns_probe(struct platform_device *pdev)
if (ret)
return ret;
+ data = of_device_get_match_data(dev);
+ if (!data)
+ data = &sdhci_cdns_pltfm_data;
+
nr_phy_params = sdhci_cdns_phy_param_count(dev->of_node);
- host = sdhci_pltfm_init(pdev, &sdhci_cdns_pltfm_data,
+ host = sdhci_pltfm_init(pdev, data,
struct_size(priv, phy_params, nr_phy_params));
if (IS_ERR(host)) {
ret = PTR_ERR(host);
@@ -431,7 +442,10 @@ static const struct dev_pm_ops sdhci_cdns_pm_ops = {
};
static const struct of_device_id sdhci_cdns_match[] = {
- { .compatible = "socionext,uniphier-sd4hc" },
+ {
+ .compatible = "socionext,uniphier-sd4hc",
+ .data = &sdhci_cdns_uniphier_pltfm_data,
+ },
{ .compatible = "cdns,sd4hc" },
{ /* sentinel */ }
};
diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c
index c3a160c18047..3955fa5db43c 100644
--- a/drivers/mmc/host/sdhci-msm.c
+++ b/drivers/mmc/host/sdhci-msm.c
@@ -1590,7 +1590,7 @@ static u32 sdhci_msm_cqe_irq(struct sdhci_host *host, u32 intmask)
return 0;
}
-void sdhci_msm_cqe_disable(struct mmc_host *mmc, bool recovery)
+static void sdhci_msm_cqe_disable(struct mmc_host *mmc, bool recovery)
{
struct sdhci_host *host = mmc_priv(mmc);
unsigned long flags;
diff --git a/drivers/mmc/host/sdhci-of-at91.c b/drivers/mmc/host/sdhci-of-at91.c
index ab2bd314a390..fcef5c0d0908 100644
--- a/drivers/mmc/host/sdhci-of-at91.c
+++ b/drivers/mmc/host/sdhci-of-at91.c
@@ -132,7 +132,8 @@ static void sdhci_at91_reset(struct sdhci_host *host, u8 mask)
sdhci_reset(host, mask);
- if (host->mmc->caps & MMC_CAP_NONREMOVABLE)
+ if ((host->mmc->caps & MMC_CAP_NONREMOVABLE)
+ || mmc_gpio_get_cd(host->mmc) >= 0)
sdhci_at91_set_force_card_detect(host);
if (priv->cal_always_on && (mask & SDHCI_RESET_ALL))
@@ -427,8 +428,11 @@ static int sdhci_at91_probe(struct platform_device *pdev)
* detection procedure using the SDMCC_CD signal is bypassed.
* This bit is reset when a software reset for all command is performed
* so we need to implement our own reset function to set back this bit.
+ *
+ * WA: SAMA5D2 doesn't drive CMD if using CD GPIO line.
*/
- if (host->mmc->caps & MMC_CAP_NONREMOVABLE)
+ if ((host->mmc->caps & MMC_CAP_NONREMOVABLE)
+ || mmc_gpio_get_cd(host->mmc) >= 0)
sdhci_at91_set_force_card_detect(host);
pm_runtime_put_autosuspend(&pdev->dev);
diff --git a/drivers/mmc/host/sdhci-omap.c b/drivers/mmc/host/sdhci-omap.c
index 882053151a47..c4978177ef88 100644
--- a/drivers/mmc/host/sdhci-omap.c
+++ b/drivers/mmc/host/sdhci-omap.c
@@ -1192,6 +1192,9 @@ static int sdhci_omap_probe(struct platform_device *pdev)
if (of_find_property(dev->of_node, "dmas", NULL))
sdhci_switch_external_dma(host, true);
+ /* R1B responses is required to properly manage HW busy detection. */
+ mmc->caps |= MMC_CAP_NEED_RSP_BUSY;
+
ret = sdhci_setup_host(host);
if (ret)
goto err_put_sync;
diff --git a/drivers/mmc/host/sdhci-pci-gli.c b/drivers/mmc/host/sdhci-pci-gli.c
index 5eea8d70a85d..ce15a05f23d4 100644
--- a/drivers/mmc/host/sdhci-pci-gli.c
+++ b/drivers/mmc/host/sdhci-pci-gli.c
@@ -262,10 +262,26 @@ static int gl9750_execute_tuning(struct sdhci_host *host, u32 opcode)
return 0;
}
+static void gli_pcie_enable_msi(struct sdhci_pci_slot *slot)
+{
+ int ret;
+
+ ret = pci_alloc_irq_vectors(slot->chip->pdev, 1, 1,
+ PCI_IRQ_MSI | PCI_IRQ_MSIX);
+ if (ret < 0) {
+ pr_warn("%s: enable PCI MSI failed, error=%d\n",
+ mmc_hostname(slot->host->mmc), ret);
+ return;
+ }
+
+ slot->host->irq = pci_irq_vector(slot->chip->pdev, 0);
+}
+
static int gli_probe_slot_gl9750(struct sdhci_pci_slot *slot)
{
struct sdhci_host *host = slot->host;
+ gli_pcie_enable_msi(slot);
slot->host->mmc->caps2 |= MMC_CAP2_NO_SDIO;
sdhci_enable_v4_mode(host);
@@ -276,6 +292,7 @@ static int gli_probe_slot_gl9755(struct sdhci_pci_slot *slot)
{
struct sdhci_host *host = slot->host;
+ gli_pcie_enable_msi(slot);
slot->host->mmc->caps2 |= MMC_CAP2_NO_SDIO;
sdhci_enable_v4_mode(host);
diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c
index 403ac44a7378..a25c3a4d3f6c 100644
--- a/drivers/mmc/host/sdhci-tegra.c
+++ b/drivers/mmc/host/sdhci-tegra.c
@@ -1552,6 +1552,9 @@ static int sdhci_tegra_probe(struct platform_device *pdev)
if (tegra_host->soc_data->nvquirks & NVQUIRK_ENABLE_DDR50)
host->mmc->caps |= MMC_CAP_1_8V_DDR;
+ /* R1B responses is required to properly manage HW busy detection. */
+ host->mmc->caps |= MMC_CAP_NEED_RSP_BUSY;
+
tegra_sdhci_parse_dt(host);
tegra_host->power_gpio = devm_gpiod_get_optional(&pdev->dev, "power",
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 25a8f9387d5a..db8884ad6d40 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -149,6 +149,7 @@ config NET_FC
config IFB
tristate "Intermediate Functional Block support"
depends on NET_CLS_ACT
+ select NET_REDIRECT
---help---
This is an intermediate driver that allows sharing of
resources.
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index 1cc2cd894f87..c81698550e5a 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -50,11 +50,6 @@ struct arp_pkt {
};
#pragma pack()
-static inline struct arp_pkt *arp_pkt(const struct sk_buff *skb)
-{
- return (struct arp_pkt *)skb_network_header(skb);
-}
-
/* Forward declaration */
static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[],
bool strict_match);
@@ -553,10 +548,11 @@ static void rlb_req_update_subnet_clients(struct bonding *bond, __be32 src_ip)
spin_unlock(&bond->mode_lock);
}
-static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bond)
+static struct slave *rlb_choose_channel(struct sk_buff *skb,
+ struct bonding *bond,
+ const struct arp_pkt *arp)
{
struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
- struct arp_pkt *arp = arp_pkt(skb);
struct slave *assigned_slave, *curr_active_slave;
struct rlb_client_info *client_info;
u32 hash_index = 0;
@@ -653,8 +649,12 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
*/
static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond)
{
- struct arp_pkt *arp = arp_pkt(skb);
struct slave *tx_slave = NULL;
+ struct arp_pkt *arp;
+
+ if (!pskb_network_may_pull(skb, sizeof(*arp)))
+ return NULL;
+ arp = (struct arp_pkt *)skb_network_header(skb);
/* Don't modify or load balance ARPs that do not originate locally
* (e.g.,arrive via a bridge).
@@ -664,7 +664,7 @@ static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond)
if (arp->op_code == htons(ARPOP_REPLY)) {
/* the arp must be sent on the selected rx channel */
- tx_slave = rlb_choose_channel(skb, bond);
+ tx_slave = rlb_choose_channel(skb, bond, arp);
if (tx_slave)
bond_hw_addr_copy(arp->mac_src, tx_slave->dev->dev_addr,
tx_slave->dev->addr_len);
@@ -676,7 +676,7 @@ static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond)
* When the arp reply is received the entry will be updated
* with the correct unicast address of the client.
*/
- tx_slave = rlb_choose_channel(skb, bond);
+ tx_slave = rlb_choose_channel(skb, bond, arp);
/* The ARP reply packets must be delayed so that
* they can cancel out the influence of the ARP request.
diff --git a/drivers/net/caif/caif_spi.c b/drivers/net/caif/caif_spi.c
index 8e81bdf98ac6..63f2548f5b1b 100644
--- a/drivers/net/caif/caif_spi.c
+++ b/drivers/net/caif/caif_spi.c
@@ -141,29 +141,29 @@ static ssize_t dbgfs_state(struct file *file, char __user *user_buf,
return 0;
/* Print out debug information. */
- len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
- "CAIF SPI debug information:\n");
-
- len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len), FLAVOR);
-
- len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
- "STATE: %d\n", cfspi->dbg_state);
- len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
- "Previous CMD: 0x%x\n", cfspi->pcmd);
- len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
- "Current CMD: 0x%x\n", cfspi->cmd);
- len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
- "Previous TX len: %d\n", cfspi->tx_ppck_len);
- len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
- "Previous RX len: %d\n", cfspi->rx_ppck_len);
- len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
- "Current TX len: %d\n", cfspi->tx_cpck_len);
- len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
- "Current RX len: %d\n", cfspi->rx_cpck_len);
- len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
- "Next TX len: %d\n", cfspi->tx_npck_len);
- len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
- "Next RX len: %d\n", cfspi->rx_npck_len);
+ len += scnprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
+ "CAIF SPI debug information:\n");
+
+ len += scnprintf((buf + len), (DEBUGFS_BUF_SIZE - len), FLAVOR);
+
+ len += scnprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
+ "STATE: %d\n", cfspi->dbg_state);
+ len += scnprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
+ "Previous CMD: 0x%x\n", cfspi->pcmd);
+ len += scnprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
+ "Current CMD: 0x%x\n", cfspi->cmd);
+ len += scnprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
+ "Previous TX len: %d\n", cfspi->tx_ppck_len);
+ len += scnprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
+ "Previous RX len: %d\n", cfspi->rx_ppck_len);
+ len += scnprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
+ "Current TX len: %d\n", cfspi->tx_cpck_len);
+ len += scnprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
+ "Current RX len: %d\n", cfspi->rx_cpck_len);
+ len += scnprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
+ "Next TX len: %d\n", cfspi->tx_npck_len);
+ len += scnprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
+ "Next RX len: %d\n", cfspi->rx_npck_len);
if (len > DEBUGFS_BUF_SIZE)
len = DEBUGFS_BUF_SIZE;
@@ -180,23 +180,23 @@ static ssize_t print_frame(char *buf, size_t size, char *frm,
int len = 0;
int i;
for (i = 0; i < count; i++) {
- len += snprintf((buf + len), (size - len),
+ len += scnprintf((buf + len), (size - len),
"[0x" BYTE_HEX_FMT "]",
frm[i]);
if ((i == cut) && (count > (cut * 2))) {
/* Fast forward. */
i = count - cut;
- len += snprintf((buf + len), (size - len),
- "--- %zu bytes skipped ---\n",
- count - (cut * 2));
+ len += scnprintf((buf + len), (size - len),
+ "--- %zu bytes skipped ---\n",
+ count - (cut * 2));
}
if ((!(i % 10)) && i) {
- len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
- "\n");
+ len += scnprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
+ "\n");
}
}
- len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len), "\n");
+ len += scnprintf((buf + len), (DEBUGFS_BUF_SIZE - len), "\n");
return len;
}
@@ -214,18 +214,18 @@ static ssize_t dbgfs_frame(struct file *file, char __user *user_buf,
return 0;
/* Print out debug information. */
- len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
- "Current frame:\n");
+ len += scnprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
+ "Current frame:\n");
- len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
- "Tx data (Len: %d):\n", cfspi->tx_cpck_len);
+ len += scnprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
+ "Tx data (Len: %d):\n", cfspi->tx_cpck_len);
len += print_frame((buf + len), (DEBUGFS_BUF_SIZE - len),
cfspi->xfer.va_tx[0],
(cfspi->tx_cpck_len + SPI_CMD_SZ), 100);
- len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
- "Rx data (Len: %d):\n", cfspi->rx_cpck_len);
+ len += scnprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
+ "Rx data (Len: %d):\n", cfspi->rx_cpck_len);
len += print_frame((buf + len), (DEBUGFS_BUF_SIZE - len),
cfspi->xfer.va_rx,
diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index 6ee06a49fb4c..68834a2853c9 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c
@@ -883,6 +883,7 @@ static const struct nla_policy can_policy[IFLA_CAN_MAX + 1] = {
= { .len = sizeof(struct can_bittiming) },
[IFLA_CAN_DATA_BITTIMING_CONST]
= { .len = sizeof(struct can_bittiming_const) },
+ [IFLA_CAN_TERMINATION] = { .type = NLA_U16 },
};
static int can_validate(struct nlattr *tb[], struct nlattr *data[],
diff --git a/drivers/net/can/slcan.c b/drivers/net/can/slcan.c
index 2f5c287eac95..a3664281a33f 100644
--- a/drivers/net/can/slcan.c
+++ b/drivers/net/can/slcan.c
@@ -625,7 +625,10 @@ err_free_chan:
tty->disc_data = NULL;
clear_bit(SLF_INUSE, &sl->flags);
slc_free_netdev(sl->dev);
+ /* do not call free_netdev before rtnl_unlock */
+ rtnl_unlock();
free_netdev(sl->dev);
+ return err;
err_exit:
rtnl_unlock();
diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
index 022466ca1c19..7cbd1bd4c5a6 100644
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -566,7 +566,7 @@ mt7530_mib_reset(struct dsa_switch *ds)
static void
mt7530_port_set_status(struct mt7530_priv *priv, int port, int enable)
{
- u32 mask = PMCR_TX_EN | PMCR_RX_EN;
+ u32 mask = PMCR_TX_EN | PMCR_RX_EN | PMCR_FORCE_LNK;
if (enable)
mt7530_set(priv, MT7530_PMCR_P(port), mask);
@@ -1444,7 +1444,7 @@ static void mt7530_phylink_mac_config(struct dsa_switch *ds, int port,
mcr_new &= ~(PMCR_FORCE_SPEED_1000 | PMCR_FORCE_SPEED_100 |
PMCR_FORCE_FDX | PMCR_TX_FC_EN | PMCR_RX_FC_EN);
mcr_new |= PMCR_IFG_XMIT(1) | PMCR_MAC_MODE | PMCR_BACKOFF_EN |
- PMCR_BACKPR_EN | PMCR_FORCE_MODE | PMCR_FORCE_LNK;
+ PMCR_BACKPR_EN | PMCR_FORCE_MODE;
/* Are we connected to external phy */
if (port == 5 && dsa_is_user_port(ds, 5))
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 8c9289549688..2f993e673ec7 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -2769,6 +2769,8 @@ static u64 mv88e6xxx_devlink_atu_bin_get(struct mv88e6xxx_chip *chip,
goto unlock;
}
+ occupancy &= MV88E6XXX_G2_ATU_STATS_MASK;
+
unlock:
mv88e6xxx_reg_unlock(chip);
diff --git a/drivers/net/dsa/mv88e6xxx/global2.c b/drivers/net/dsa/mv88e6xxx/global2.c
index 01503014b1ee..8fd483020c5b 100644
--- a/drivers/net/dsa/mv88e6xxx/global2.c
+++ b/drivers/net/dsa/mv88e6xxx/global2.c
@@ -1099,6 +1099,13 @@ int mv88e6xxx_g2_irq_setup(struct mv88e6xxx_chip *chip)
{
int err, irq, virq;
+ chip->g2_irq.masked = ~0;
+ mv88e6xxx_reg_lock(chip);
+ err = mv88e6xxx_g2_int_mask(chip, ~chip->g2_irq.masked);
+ mv88e6xxx_reg_unlock(chip);
+ if (err)
+ return err;
+
chip->g2_irq.domain = irq_domain_add_simple(
chip->dev->of_node, 16, 0, &mv88e6xxx_g2_irq_domain_ops, chip);
if (!chip->g2_irq.domain)
@@ -1108,7 +1115,6 @@ int mv88e6xxx_g2_irq_setup(struct mv88e6xxx_chip *chip)
irq_create_mapping(chip->g2_irq.domain, irq);
chip->g2_irq.chip = mv88e6xxx_g2_irq_chip;
- chip->g2_irq.masked = ~0;
chip->device_irq = irq_find_mapping(chip->g1_irq.domain,
MV88E6XXX_G1_STS_IRQ_DEVICE);
diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 03ba6d25f7fe..7edea5741a5f 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -1741,7 +1741,8 @@ static void sja1105_teardown(struct dsa_switch *ds)
if (!dsa_is_user_port(ds, port))
continue;
- kthread_destroy_worker(sp->xmit_worker);
+ if (sp->xmit_worker)
+ kthread_destroy_worker(sp->xmit_worker);
}
sja1105_tas_teardown(ds);
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 0b2fd96b93d7..cada6e7e30f4 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -1018,13 +1018,9 @@ static int ena_refill_rx_bufs(struct ena_ring *rx_ring, u32 num)
struct ena_rx_buffer *rx_info;
req_id = rx_ring->free_ids[next_to_use];
- rc = validate_rx_req_id(rx_ring, req_id);
- if (unlikely(rc < 0))
- break;
rx_info = &rx_ring->rx_buffer_info[req_id];
-
rc = ena_alloc_rx_page(rx_ring, rx_info,
GFP_ATOMIC | __GFP_COMP);
if (unlikely(rc < 0)) {
@@ -1379,9 +1375,15 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring,
struct ena_rx_buffer *rx_info;
u16 len, req_id, buf = 0;
void *va;
+ int rc;
len = ena_bufs[buf].len;
req_id = ena_bufs[buf].req_id;
+
+ rc = validate_rx_req_id(rx_ring, req_id);
+ if (unlikely(rc < 0))
+ return NULL;
+
rx_info = &rx_ring->rx_buffer_info[req_id];
if (unlikely(!rx_info->page)) {
@@ -1454,6 +1456,11 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring,
buf++;
len = ena_bufs[buf].len;
req_id = ena_bufs[buf].req_id;
+
+ rc = validate_rx_req_id(rx_ring, req_id);
+ if (unlikely(rc < 0))
+ return NULL;
+
rx_info = &rx_ring->rx_buffer_info[req_id];
} while (1);
@@ -1968,7 +1975,7 @@ static int ena_enable_msix(struct ena_adapter *adapter)
}
/* Reserved the max msix vectors we might need */
- msix_vecs = ENA_MAX_MSIX_VEC(adapter->num_io_queues);
+ msix_vecs = ENA_MAX_MSIX_VEC(adapter->max_num_io_queues);
netif_dbg(adapter, probe, adapter->netdev,
"trying to enable MSI-X, vectors %d\n", msix_vecs);
@@ -2068,6 +2075,7 @@ static int ena_request_mgmnt_irq(struct ena_adapter *adapter)
static int ena_request_io_irq(struct ena_adapter *adapter)
{
+ u32 io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
unsigned long flags = 0;
struct ena_irq *irq;
int rc = 0, i, k;
@@ -2078,7 +2086,7 @@ static int ena_request_io_irq(struct ena_adapter *adapter)
return -EINVAL;
}
- for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) {
+ for (i = ENA_IO_IRQ_FIRST_IDX; i < ENA_MAX_MSIX_VEC(io_queue_count); i++) {
irq = &adapter->irq_tbl[i];
rc = request_irq(irq->vector, irq->handler, flags, irq->name,
irq->data);
@@ -2119,6 +2127,7 @@ static void ena_free_mgmnt_irq(struct ena_adapter *adapter)
static void ena_free_io_irq(struct ena_adapter *adapter)
{
+ u32 io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
struct ena_irq *irq;
int i;
@@ -2129,7 +2138,7 @@ static void ena_free_io_irq(struct ena_adapter *adapter)
}
#endif /* CONFIG_RFS_ACCEL */
- for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) {
+ for (i = ENA_IO_IRQ_FIRST_IDX; i < ENA_MAX_MSIX_VEC(io_queue_count); i++) {
irq = &adapter->irq_tbl[i];
irq_set_affinity_hint(irq->vector, NULL);
free_irq(irq->vector, irq->data);
@@ -2144,12 +2153,13 @@ static void ena_disable_msix(struct ena_adapter *adapter)
static void ena_disable_io_intr_sync(struct ena_adapter *adapter)
{
+ u32 io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
int i;
if (!netif_running(adapter->netdev))
return;
- for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++)
+ for (i = ENA_IO_IRQ_FIRST_IDX; i < ENA_MAX_MSIX_VEC(io_queue_count); i++)
synchronize_irq(adapter->irq_tbl[i].vector);
}
@@ -3476,6 +3486,7 @@ static int ena_restore_device(struct ena_adapter *adapter)
netif_carrier_on(adapter->netdev);
mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ));
+ adapter->last_keep_alive_jiffies = jiffies;
dev_err(&pdev->dev,
"Device reset completed successfully, Driver info: %s\n",
version);
@@ -4325,13 +4336,15 @@ err_disable_device:
/*****************************************************************************/
-/* ena_remove - Device Removal Routine
+/* __ena_shutoff - Helper used in both PCI remove/shutdown routines
* @pdev: PCI device information struct
+ * @shutdown: Is it a shutdown operation? If false, means it is a removal
*
- * ena_remove is called by the PCI subsystem to alert the driver
- * that it should release a PCI device.
+ * __ena_shutoff is a helper routine that does the real work on shutdown and
+ * removal paths; the difference between those paths is with regards to whether
+ * dettach or unregister the netdevice.
*/
-static void ena_remove(struct pci_dev *pdev)
+static void __ena_shutoff(struct pci_dev *pdev, bool shutdown)
{
struct ena_adapter *adapter = pci_get_drvdata(pdev);
struct ena_com_dev *ena_dev;
@@ -4350,13 +4363,17 @@ static void ena_remove(struct pci_dev *pdev)
cancel_work_sync(&adapter->reset_task);
- rtnl_lock();
+ rtnl_lock(); /* lock released inside the below if-else block */
ena_destroy_device(adapter, true);
- rtnl_unlock();
-
- unregister_netdev(netdev);
-
- free_netdev(netdev);
+ if (shutdown) {
+ netif_device_detach(netdev);
+ dev_close(netdev);
+ rtnl_unlock();
+ } else {
+ rtnl_unlock();
+ unregister_netdev(netdev);
+ free_netdev(netdev);
+ }
ena_com_rss_destroy(ena_dev);
@@ -4371,6 +4388,30 @@ static void ena_remove(struct pci_dev *pdev)
vfree(ena_dev);
}
+/* ena_remove - Device Removal Routine
+ * @pdev: PCI device information struct
+ *
+ * ena_remove is called by the PCI subsystem to alert the driver
+ * that it should release a PCI device.
+ */
+
+static void ena_remove(struct pci_dev *pdev)
+{
+ __ena_shutoff(pdev, false);
+}
+
+/* ena_shutdown - Device Shutdown Routine
+ * @pdev: PCI device information struct
+ *
+ * ena_shutdown is called by the PCI subsystem to alert the driver that
+ * a shutdown/reboot (or kexec) is happening and device must be disabled.
+ */
+
+static void ena_shutdown(struct pci_dev *pdev)
+{
+ __ena_shutoff(pdev, true);
+}
+
#ifdef CONFIG_PM
/* ena_suspend - PM suspend callback
* @pdev: PCI device information struct
@@ -4420,6 +4461,7 @@ static struct pci_driver ena_pci_driver = {
.id_table = ena_pci_tbl,
.probe = ena_probe,
.remove = ena_remove,
+ .shutdown = ena_shutdown,
#ifdef CONFIG_PM
.suspend = ena_suspend,
.resume = ena_resume,
diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
index e0611cba87f9..15b31cddc054 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@ -2135,7 +2135,7 @@ static int bcm_sysport_rule_set(struct bcm_sysport_priv *priv,
return -ENOSPC;
index = find_first_zero_bit(priv->filters, RXCHK_BRCM_TAG_MAX);
- if (index > RXCHK_BRCM_TAG_MAX)
+ if (index >= RXCHK_BRCM_TAG_MAX)
return -ENOSPC;
/* Location is the classification ID, and index is the position
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index f9a8151f092c..d28b406a26b1 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -6880,12 +6880,12 @@ skip_rdma:
}
ena |= FUNC_BACKING_STORE_CFG_REQ_DFLT_ENABLES;
rc = bnxt_hwrm_func_backing_store_cfg(bp, ena);
- if (rc)
+ if (rc) {
netdev_err(bp->dev, "Failed configuring context mem, rc = %d.\n",
rc);
- else
- ctx->flags |= BNXT_CTX_FLAG_INITED;
-
+ return rc;
+ }
+ ctx->flags |= BNXT_CTX_FLAG_INITED;
return 0;
}
@@ -7406,14 +7406,22 @@ static int bnxt_hwrm_port_qstats_ext(struct bnxt *bp)
pri2cos = &resp2->pri0_cos_queue_id;
for (i = 0; i < 8; i++) {
u8 queue_id = pri2cos[i];
+ u8 queue_idx;
+ /* Per port queue IDs start from 0, 10, 20, etc */
+ queue_idx = queue_id % 10;
+ if (queue_idx > BNXT_MAX_QUEUE) {
+ bp->pri2cos_valid = false;
+ goto qstats_done;
+ }
for (j = 0; j < bp->max_q; j++) {
if (bp->q_ids[j] == queue_id)
- bp->pri2cos[i] = j;
+ bp->pri2cos_idx[i] = queue_idx;
}
}
bp->pri2cos_valid = 1;
}
+qstats_done:
mutex_unlock(&bp->hwrm_cmd_lock);
return rc;
}
@@ -10982,13 +10990,13 @@ static int bnxt_change_mtu(struct net_device *dev, int new_mtu)
struct bnxt *bp = netdev_priv(dev);
if (netif_running(dev))
- bnxt_close_nic(bp, false, false);
+ bnxt_close_nic(bp, true, false);
dev->mtu = new_mtu;
bnxt_set_ring_params(bp);
if (netif_running(dev))
- return bnxt_open_nic(bp, false, false);
+ return bnxt_open_nic(bp, true, false);
return 0;
}
@@ -11669,6 +11677,10 @@ static int bnxt_set_dflt_rings(struct bnxt *bp, bool sh)
bp->rx_nr_rings++;
bp->cp_nr_rings++;
}
+ if (rc) {
+ bp->tx_nr_rings = 0;
+ bp->rx_nr_rings = 0;
+ }
return rc;
}
@@ -11962,12 +11974,12 @@ init_err_pci_clean:
bnxt_hwrm_func_drv_unrgtr(bp);
bnxt_free_hwrm_short_cmd_req(bp);
bnxt_free_hwrm_resources(bp);
- bnxt_free_ctx_mem(bp);
- kfree(bp->ctx);
- bp->ctx = NULL;
kfree(bp->fw_health);
bp->fw_health = NULL;
bnxt_cleanup_pci(bp);
+ bnxt_free_ctx_mem(bp);
+ kfree(bp->ctx);
+ bp->ctx = NULL;
init_err_free:
free_netdev(dev);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index cabef0b4f5fb..63b170658532 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -1716,7 +1716,7 @@ struct bnxt {
u16 fw_rx_stats_ext_size;
u16 fw_tx_stats_ext_size;
u16 hw_ring_stats_size;
- u8 pri2cos[8];
+ u8 pri2cos_idx[8];
u8 pri2cos_valid;
u16 hwrm_max_req_len;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
index fb6f30d0d1d0..b1511bcffb1b 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
@@ -479,24 +479,26 @@ static int bnxt_dcbnl_ieee_getets(struct net_device *dev, struct ieee_ets *ets)
{
struct bnxt *bp = netdev_priv(dev);
struct ieee_ets *my_ets = bp->ieee_ets;
+ int rc;
ets->ets_cap = bp->max_tc;
if (!my_ets) {
- int rc;
-
if (bp->dcbx_cap & DCB_CAP_DCBX_HOST)
return 0;
my_ets = kzalloc(sizeof(*my_ets), GFP_KERNEL);
if (!my_ets)
- return 0;
+ return -ENOMEM;
rc = bnxt_hwrm_queue_cos2bw_qcfg(bp, my_ets);
if (rc)
- return 0;
+ goto error;
rc = bnxt_hwrm_queue_pri2cos_qcfg(bp, my_ets);
if (rc)
- return 0;
+ goto error;
+
+ /* cache result */
+ bp->ieee_ets = my_ets;
}
ets->cbs = my_ets->cbs;
@@ -505,6 +507,9 @@ static int bnxt_dcbnl_ieee_getets(struct net_device *dev, struct ieee_ets *ets)
memcpy(ets->tc_tsa, my_ets->tc_tsa, sizeof(ets->tc_tsa));
memcpy(ets->prio_tc, my_ets->prio_tc, sizeof(ets->prio_tc));
return 0;
+error:
+ kfree(my_ets);
+ return rc;
}
static int bnxt_dcbnl_ieee_setets(struct net_device *dev, struct ieee_ets *ets)
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index e8fc1671c581..3f8a1ded662a 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -589,25 +589,25 @@ skip_ring_stats:
if (bp->pri2cos_valid) {
for (i = 0; i < 8; i++, j++) {
long n = bnxt_rx_bytes_pri_arr[i].base_off +
- bp->pri2cos[i];
+ bp->pri2cos_idx[i];
buf[j] = le64_to_cpu(*(rx_port_stats_ext + n));
}
for (i = 0; i < 8; i++, j++) {
long n = bnxt_rx_pkts_pri_arr[i].base_off +
- bp->pri2cos[i];
+ bp->pri2cos_idx[i];
buf[j] = le64_to_cpu(*(rx_port_stats_ext + n));
}
for (i = 0; i < 8; i++, j++) {
long n = bnxt_tx_bytes_pri_arr[i].base_off +
- bp->pri2cos[i];
+ bp->pri2cos_idx[i];
buf[j] = le64_to_cpu(*(tx_port_stats_ext + n));
}
for (i = 0; i < 8; i++, j++) {
long n = bnxt_tx_pkts_pri_arr[i].base_off +
- bp->pri2cos[i];
+ bp->pri2cos_idx[i];
buf[j] = le64_to_cpu(*(tx_port_stats_ext + n));
}
@@ -2007,8 +2007,8 @@ int bnxt_flash_package_from_file(struct net_device *dev, const char *filename,
struct hwrm_nvm_install_update_output *resp = bp->hwrm_cmd_resp_addr;
struct hwrm_nvm_install_update_input install = {0};
const struct firmware *fw;
- int rc, hwrm_err = 0;
u32 item_len;
+ int rc = 0;
u16 index;
bnxt_hwrm_fw_set_time(bp);
@@ -2052,15 +2052,14 @@ int bnxt_flash_package_from_file(struct net_device *dev, const char *filename,
memcpy(kmem, fw->data, fw->size);
modify.host_src_addr = cpu_to_le64(dma_handle);
- hwrm_err = hwrm_send_message(bp, &modify,
- sizeof(modify),
- FLASH_PACKAGE_TIMEOUT);
+ rc = hwrm_send_message(bp, &modify, sizeof(modify),
+ FLASH_PACKAGE_TIMEOUT);
dma_free_coherent(&bp->pdev->dev, fw->size, kmem,
dma_handle);
}
}
release_firmware(fw);
- if (rc || hwrm_err)
+ if (rc)
goto err_exit;
if ((install_type & 0xffff) == 0)
@@ -2069,20 +2068,19 @@ int bnxt_flash_package_from_file(struct net_device *dev, const char *filename,
install.install_type = cpu_to_le32(install_type);
mutex_lock(&bp->hwrm_cmd_lock);
- hwrm_err = _hwrm_send_message(bp, &install, sizeof(install),
- INSTALL_PACKAGE_TIMEOUT);
- if (hwrm_err) {
+ rc = _hwrm_send_message(bp, &install, sizeof(install),
+ INSTALL_PACKAGE_TIMEOUT);
+ if (rc) {
u8 error_code = ((struct hwrm_err_output *)resp)->cmd_err;
if (resp->error_code && error_code ==
NVM_INSTALL_UPDATE_CMD_ERR_CODE_FRAG_ERR) {
install.flags |= cpu_to_le16(
NVM_INSTALL_UPDATE_REQ_FLAGS_ALLOWED_TO_DEFRAG);
- hwrm_err = _hwrm_send_message(bp, &install,
- sizeof(install),
- INSTALL_PACKAGE_TIMEOUT);
+ rc = _hwrm_send_message(bp, &install, sizeof(install),
+ INSTALL_PACKAGE_TIMEOUT);
}
- if (hwrm_err)
+ if (rc)
goto flash_pkg_exit;
}
@@ -2094,7 +2092,7 @@ int bnxt_flash_package_from_file(struct net_device *dev, const char *filename,
flash_pkg_exit:
mutex_unlock(&bp->hwrm_cmd_lock);
err_exit:
- if (hwrm_err == -EACCES)
+ if (rc == -EACCES)
bnxt_print_admin_err(bp);
return rc;
}
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index e50a15397e11..1d678bee2cc9 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -94,12 +94,6 @@ static inline void dmadesc_set_length_status(struct bcmgenet_priv *priv,
bcmgenet_writel(value, d + DMA_DESC_LENGTH_STATUS);
}
-static inline u32 dmadesc_get_length_status(struct bcmgenet_priv *priv,
- void __iomem *d)
-{
- return bcmgenet_readl(d + DMA_DESC_LENGTH_STATUS);
-}
-
static inline void dmadesc_set_addr(struct bcmgenet_priv *priv,
void __iomem *d,
dma_addr_t addr)
@@ -508,61 +502,6 @@ static int bcmgenet_set_link_ksettings(struct net_device *dev,
return phy_ethtool_ksettings_set(dev->phydev, cmd);
}
-static void bcmgenet_set_rx_csum(struct net_device *dev,
- netdev_features_t wanted)
-{
- struct bcmgenet_priv *priv = netdev_priv(dev);
- u32 rbuf_chk_ctrl;
- bool rx_csum_en;
-
- rx_csum_en = !!(wanted & NETIF_F_RXCSUM);
-
- rbuf_chk_ctrl = bcmgenet_rbuf_readl(priv, RBUF_CHK_CTRL);
-
- /* enable rx checksumming */
- if (rx_csum_en)
- rbuf_chk_ctrl |= RBUF_RXCHK_EN | RBUF_L3_PARSE_DIS;
- else
- rbuf_chk_ctrl &= ~RBUF_RXCHK_EN;
- priv->desc_rxchk_en = rx_csum_en;
-
- /* If UniMAC forwards CRC, we need to skip over it to get
- * a valid CHK bit to be set in the per-packet status word
- */
- if (rx_csum_en && priv->crc_fwd_en)
- rbuf_chk_ctrl |= RBUF_SKIP_FCS;
- else
- rbuf_chk_ctrl &= ~RBUF_SKIP_FCS;
-
- bcmgenet_rbuf_writel(priv, rbuf_chk_ctrl, RBUF_CHK_CTRL);
-}
-
-static void bcmgenet_set_tx_csum(struct net_device *dev,
- netdev_features_t wanted)
-{
- struct bcmgenet_priv *priv = netdev_priv(dev);
- bool desc_64b_en;
- u32 tbuf_ctrl, rbuf_ctrl;
-
- tbuf_ctrl = bcmgenet_tbuf_ctrl_get(priv);
- rbuf_ctrl = bcmgenet_rbuf_readl(priv, RBUF_CTRL);
-
- desc_64b_en = !!(wanted & NETIF_F_HW_CSUM);
-
- /* enable 64 bytes descriptor in both directions (RBUF and TBUF) */
- if (desc_64b_en) {
- tbuf_ctrl |= RBUF_64B_EN;
- rbuf_ctrl |= RBUF_64B_EN;
- } else {
- tbuf_ctrl &= ~RBUF_64B_EN;
- rbuf_ctrl &= ~RBUF_64B_EN;
- }
- priv->desc_64b_en = desc_64b_en;
-
- bcmgenet_tbuf_ctrl_set(priv, tbuf_ctrl);
- bcmgenet_rbuf_writel(priv, rbuf_ctrl, RBUF_CTRL);
-}
-
static int bcmgenet_set_features(struct net_device *dev,
netdev_features_t features)
{
@@ -578,9 +517,6 @@ static int bcmgenet_set_features(struct net_device *dev,
reg = bcmgenet_umac_readl(priv, UMAC_CMD);
priv->crc_fwd_en = !!(reg & CMD_CRC_FWD);
- bcmgenet_set_tx_csum(dev, features);
- bcmgenet_set_rx_csum(dev, features);
-
clk_disable_unprepare(priv->clk);
return ret;
@@ -1475,8 +1411,8 @@ static void bcmgenet_tx_reclaim_all(struct net_device *dev)
/* Reallocate the SKB to put enough headroom in front of it and insert
* the transmit checksum offsets in the descriptors
*/
-static struct sk_buff *bcmgenet_put_tx_csum(struct net_device *dev,
- struct sk_buff *skb)
+static struct sk_buff *bcmgenet_add_tsb(struct net_device *dev,
+ struct sk_buff *skb)
{
struct bcmgenet_priv *priv = netdev_priv(dev);
struct status_64 *status = NULL;
@@ -1590,13 +1526,11 @@ static netdev_tx_t bcmgenet_xmit(struct sk_buff *skb, struct net_device *dev)
*/
GENET_CB(skb)->bytes_sent = skb->len;
- /* set the SKB transmit checksum */
- if (priv->desc_64b_en) {
- skb = bcmgenet_put_tx_csum(dev, skb);
- if (!skb) {
- ret = NETDEV_TX_OK;
- goto out;
- }
+ /* add the Transmit Status Block */
+ skb = bcmgenet_add_tsb(dev, skb);
+ if (!skb) {
+ ret = NETDEV_TX_OK;
+ goto out;
}
for (i = 0; i <= nr_frags; i++) {
@@ -1775,6 +1709,9 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring,
while ((rxpktprocessed < rxpkttoprocess) &&
(rxpktprocessed < budget)) {
+ struct status_64 *status;
+ __be16 rx_csum;
+
cb = &priv->rx_cbs[ring->read_ptr];
skb = bcmgenet_rx_refill(priv, cb);
@@ -1783,20 +1720,12 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring,
goto next;
}
- if (!priv->desc_64b_en) {
- dma_length_status =
- dmadesc_get_length_status(priv, cb->bd_addr);
- } else {
- struct status_64 *status;
- __be16 rx_csum;
-
- status = (struct status_64 *)skb->data;
- dma_length_status = status->length_status;
+ status = (struct status_64 *)skb->data;
+ dma_length_status = status->length_status;
+ if (dev->features & NETIF_F_RXCSUM) {
rx_csum = (__force __be16)(status->rx_csum & 0xffff);
- if (priv->desc_rxchk_en) {
- skb->csum = (__force __wsum)ntohs(rx_csum);
- skb->ip_summed = CHECKSUM_COMPLETE;
- }
+ skb->csum = (__force __wsum)ntohs(rx_csum);
+ skb->ip_summed = CHECKSUM_COMPLETE;
}
/* DMA flags and length are still valid no matter how
@@ -1840,14 +1769,10 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring,
} /* error packet */
skb_put(skb, len);
- if (priv->desc_64b_en) {
- skb_pull(skb, 64);
- len -= 64;
- }
- /* remove hardware 2bytes added for IP alignment */
- skb_pull(skb, 2);
- len -= 2;
+ /* remove RSB and hardware 2bytes added for IP alignment */
+ skb_pull(skb, 66);
+ len -= 66;
if (priv->crc_fwd_en) {
skb_trim(skb, len - ETH_FCS_LEN);
@@ -1965,6 +1890,8 @@ static void umac_enable_set(struct bcmgenet_priv *priv, u32 mask, bool enable)
u32 reg;
reg = bcmgenet_umac_readl(priv, UMAC_CMD);
+ if (reg & CMD_SW_RESET)
+ return;
if (enable)
reg |= mask;
else
@@ -1984,11 +1911,9 @@ static void reset_umac(struct bcmgenet_priv *priv)
bcmgenet_rbuf_ctrl_set(priv, 0);
udelay(10);
- /* disable MAC while updating its registers */
- bcmgenet_umac_writel(priv, 0, UMAC_CMD);
-
- /* issue soft reset with (rg)mii loopback to ensure a stable rxclk */
- bcmgenet_umac_writel(priv, CMD_SW_RESET | CMD_LCL_LOOP_EN, UMAC_CMD);
+ /* issue soft reset and disable MAC while updating its registers */
+ bcmgenet_umac_writel(priv, CMD_SW_RESET, UMAC_CMD);
+ udelay(2);
}
static void bcmgenet_intr_disable(struct bcmgenet_priv *priv)
@@ -2038,11 +1963,28 @@ static void init_umac(struct bcmgenet_priv *priv)
bcmgenet_umac_writel(priv, ENET_MAX_MTU_SIZE, UMAC_MAX_FRAME_LEN);
- /* init rx registers, enable ip header optimization */
+ /* init tx registers, enable TSB */
+ reg = bcmgenet_tbuf_ctrl_get(priv);
+ reg |= TBUF_64B_EN;
+ bcmgenet_tbuf_ctrl_set(priv, reg);
+
+ /* init rx registers, enable ip header optimization and RSB */
reg = bcmgenet_rbuf_readl(priv, RBUF_CTRL);
- reg |= RBUF_ALIGN_2B;
+ reg |= RBUF_ALIGN_2B | RBUF_64B_EN;
bcmgenet_rbuf_writel(priv, reg, RBUF_CTRL);
+ /* enable rx checksumming */
+ reg = bcmgenet_rbuf_readl(priv, RBUF_CHK_CTRL);
+ reg |= RBUF_RXCHK_EN | RBUF_L3_PARSE_DIS;
+ /* If UniMAC forwards CRC, we need to skip over it to get
+ * a valid CHK bit to be set in the per-packet status word
+ */
+ if (priv->crc_fwd_en)
+ reg |= RBUF_SKIP_FCS;
+ else
+ reg &= ~RBUF_SKIP_FCS;
+ bcmgenet_rbuf_writel(priv, reg, RBUF_CHK_CTRL);
+
if (!GENET_IS_V1(priv) && !GENET_IS_V2(priv))
bcmgenet_rbuf_writel(priv, 1, RBUF_TBUF_SIZE_CTRL);
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
index 61a6fe9f4cec..daf8fb2c39b6 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
@@ -273,6 +273,7 @@ struct bcmgenet_mib_counters {
#define RBUF_FLTR_LEN_SHIFT 8
#define TBUF_CTRL 0x00
+#define TBUF_64B_EN (1 << 0)
#define TBUF_BP_MC 0x0C
#define TBUF_ENERGY_CTRL 0x14
#define TBUF_EEE_EN (1 << 0)
@@ -662,8 +663,6 @@ struct bcmgenet_priv {
unsigned int irq0_stat;
/* HW descriptors/checksum variables */
- bool desc_64b_en;
- bool desc_rxchk_en;
bool crc_fwd_en;
u32 dma_max_burst_length;
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c
index ea20d94bd050..c9a43695b182 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c
@@ -132,8 +132,12 @@ int bcmgenet_wol_power_down_cfg(struct bcmgenet_priv *priv,
return -EINVAL;
}
- /* disable RX */
+ /* Can't suspend with WoL if MAC is still in reset */
reg = bcmgenet_umac_readl(priv, UMAC_CMD);
+ if (reg & CMD_SW_RESET)
+ reg &= ~CMD_SW_RESET;
+
+ /* disable RX */
reg &= ~CMD_RX_EN;
bcmgenet_umac_writel(priv, reg, UMAC_CMD);
mdelay(10);
diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c
index 10244941a7a6..b5930f80039d 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmmii.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c
@@ -95,6 +95,12 @@ void bcmgenet_mii_setup(struct net_device *dev)
CMD_HD_EN |
CMD_RX_PAUSE_IGNORE | CMD_TX_PAUSE_IGNORE);
reg |= cmd_bits;
+ if (reg & CMD_SW_RESET) {
+ reg &= ~CMD_SW_RESET;
+ bcmgenet_umac_writel(priv, reg, UMAC_CMD);
+ udelay(2);
+ reg |= CMD_TX_EN | CMD_RX_EN;
+ }
bcmgenet_umac_writel(priv, reg, UMAC_CMD);
} else {
/* done if nothing has changed */
@@ -181,38 +187,8 @@ int bcmgenet_mii_config(struct net_device *dev, bool init)
const char *phy_name = NULL;
u32 id_mode_dis = 0;
u32 port_ctrl;
- int bmcr = -1;
- int ret;
u32 reg;
- /* MAC clocking workaround during reset of umac state machines */
- reg = bcmgenet_umac_readl(priv, UMAC_CMD);
- if (reg & CMD_SW_RESET) {
- /* An MII PHY must be isolated to prevent TXC contention */
- if (priv->phy_interface == PHY_INTERFACE_MODE_MII) {
- ret = phy_read(phydev, MII_BMCR);
- if (ret >= 0) {
- bmcr = ret;
- ret = phy_write(phydev, MII_BMCR,
- bmcr | BMCR_ISOLATE);
- }
- if (ret) {
- netdev_err(dev, "failed to isolate PHY\n");
- return ret;
- }
- }
- /* Switch MAC clocking to RGMII generated clock */
- bcmgenet_sys_writel(priv, PORT_MODE_EXT_GPHY, SYS_PORT_CTRL);
- /* Ensure 5 clks with Rx disabled
- * followed by 5 clks with Reset asserted
- */
- udelay(4);
- reg &= ~(CMD_SW_RESET | CMD_LCL_LOOP_EN);
- bcmgenet_umac_writel(priv, reg, UMAC_CMD);
- /* Ensure 5 more clocks before Rx is enabled */
- udelay(2);
- }
-
switch (priv->phy_interface) {
case PHY_INTERFACE_MODE_INTERNAL:
phy_name = "internal PHY";
@@ -282,10 +258,6 @@ int bcmgenet_mii_config(struct net_device *dev, bool init)
bcmgenet_sys_writel(priv, port_ctrl, SYS_PORT_CTRL);
- /* Restore the MII PHY after isolation */
- if (bmcr >= 0)
- phy_write(phydev, MII_BMCR, bmcr);
-
priv->ext_phy = !priv->internal_phy &&
(priv->phy_interface != PHY_INTERFACE_MODE_MOCA);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
index 2a2938bbb93a..fc05248984fc 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
@@ -902,7 +902,7 @@ void clear_all_filters(struct adapter *adapter)
adapter->tids.tid_tab[i];
if (f && (f->valid || f->pending))
- cxgb4_del_filter(dev, i, &f->fs);
+ cxgb4_del_filter(dev, f->tid, &f->fs);
}
sb = t4_read_reg(adapter, LE_DB_SRVR_START_INDEX_A);
@@ -910,7 +910,7 @@ void clear_all_filters(struct adapter *adapter)
f = (struct filter_entry *)adapter->tids.tid_tab[i];
if (f && (f->valid || f->pending))
- cxgb4_del_filter(dev, i, &f->fs);
+ cxgb4_del_filter(dev, f->tid, &f->fs);
}
}
}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 649842a8aa28..97f90edbc068 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -5381,12 +5381,11 @@ static inline bool is_x_10g_port(const struct link_config *lc)
static int cfg_queues(struct adapter *adap)
{
u32 avail_qsets, avail_eth_qsets, avail_uld_qsets;
+ u32 i, n10g = 0, qidx = 0, n1g = 0;
+ u32 ncpus = num_online_cpus();
u32 niqflint, neq, num_ulds;
struct sge *s = &adap->sge;
- u32 i, n10g = 0, qidx = 0;
-#ifndef CONFIG_CHELSIO_T4_DCB
- int q10g = 0;
-#endif
+ u32 q10g = 0, q1g;
/* Reduce memory usage in kdump environment, disable all offload. */
if (is_kdump_kernel() || (is_uld(adap) && t4_uld_mem_alloc(adap))) {
@@ -5424,44 +5423,50 @@ static int cfg_queues(struct adapter *adap)
n10g += is_x_10g_port(&adap2pinfo(adap, i)->link_cfg);
avail_eth_qsets = min_t(u32, avail_qsets, MAX_ETH_QSETS);
+
+ /* We default to 1 queue per non-10G port and up to # of cores queues
+ * per 10G port.
+ */
+ if (n10g)
+ q10g = (avail_eth_qsets - (adap->params.nports - n10g)) / n10g;
+
+ n1g = adap->params.nports - n10g;
#ifdef CONFIG_CHELSIO_T4_DCB
/* For Data Center Bridging support we need to be able to support up
* to 8 Traffic Priorities; each of which will be assigned to its
* own TX Queue in order to prevent Head-Of-Line Blocking.
*/
+ q1g = 8;
if (adap->params.nports * 8 > avail_eth_qsets) {
dev_err(adap->pdev_dev, "DCB avail_eth_qsets=%d < %d!\n",
avail_eth_qsets, adap->params.nports * 8);
return -ENOMEM;
}
- for_each_port(adap, i) {
- struct port_info *pi = adap2pinfo(adap, i);
+ if (adap->params.nports * ncpus < avail_eth_qsets)
+ q10g = max(8U, ncpus);
+ else
+ q10g = max(8U, q10g);
- pi->first_qset = qidx;
- pi->nqsets = is_kdump_kernel() ? 1 : 8;
- qidx += pi->nqsets;
- }
-#else /* !CONFIG_CHELSIO_T4_DCB */
- /* We default to 1 queue per non-10G port and up to # of cores queues
- * per 10G port.
- */
- if (n10g)
- q10g = (avail_eth_qsets - (adap->params.nports - n10g)) / n10g;
- if (q10g > netif_get_num_default_rss_queues())
- q10g = netif_get_num_default_rss_queues();
+ while ((q10g * n10g) > (avail_eth_qsets - n1g * q1g))
+ q10g--;
- if (is_kdump_kernel())
+#else /* !CONFIG_CHELSIO_T4_DCB */
+ q1g = 1;
+ q10g = min(q10g, ncpus);
+#endif /* !CONFIG_CHELSIO_T4_DCB */
+ if (is_kdump_kernel()) {
q10g = 1;
+ q1g = 1;
+ }
for_each_port(adap, i) {
struct port_info *pi = adap2pinfo(adap, i);
pi->first_qset = qidx;
- pi->nqsets = is_x_10g_port(&pi->link_cfg) ? q10g : 1;
+ pi->nqsets = is_x_10g_port(&pi->link_cfg) ? q10g : q1g;
qidx += pi->nqsets;
}
-#endif /* !CONFIG_CHELSIO_T4_DCB */
s->ethqsets = qidx;
s->max_ethqsets = qidx; /* MSI-X may lower it later */
@@ -5473,7 +5478,7 @@ static int cfg_queues(struct adapter *adap)
* capped by the number of available cores.
*/
num_ulds = adap->num_uld + adap->num_ofld_uld;
- i = min_t(u32, MAX_OFLD_QSETS, num_online_cpus());
+ i = min_t(u32, MAX_OFLD_QSETS, ncpus);
avail_uld_qsets = roundup(i, adap->params.nports);
if (avail_qsets < num_ulds * adap->params.nports) {
adap->params.offload = 0;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ptp.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ptp.c
index 58a039c3224a..af1f40cbccc8 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ptp.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ptp.c
@@ -246,6 +246,9 @@ static int cxgb4_ptp_fineadjtime(struct adapter *adapter, s64 delta)
FW_PTP_CMD_PORTID_V(0));
c.retval_len16 = cpu_to_be32(FW_CMD_LEN16_V(sizeof(c) / 16));
c.u.ts.sc = FW_PTP_SC_ADJ_FTIME;
+ c.u.ts.sign = (delta < 0) ? 1 : 0;
+ if (delta < 0)
+ delta = -delta;
c.u.ts.tm = cpu_to_be64(delta);
err = t4_wr_mbox(adapter, adapter->mbox, &c, sizeof(c), NULL);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index 97cda501e7e8..cab3d17e0e1a 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -1307,8 +1307,9 @@ static inline void *write_tso_wr(struct adapter *adap, struct sk_buff *skb,
int t4_sge_eth_txq_egress_update(struct adapter *adap, struct sge_eth_txq *eq,
int maxreclaim)
{
+ unsigned int reclaimed, hw_cidx;
struct sge_txq *q = &eq->q;
- unsigned int reclaimed;
+ int hw_in_use;
if (!q->in_use || !__netif_tx_trylock(eq->txq))
return 0;
@@ -1316,12 +1317,17 @@ int t4_sge_eth_txq_egress_update(struct adapter *adap, struct sge_eth_txq *eq,
/* Reclaim pending completed TX Descriptors. */
reclaimed = reclaim_completed_tx(adap, &eq->q, maxreclaim, true);
+ hw_cidx = ntohs(READ_ONCE(q->stat->cidx));
+ hw_in_use = q->pidx - hw_cidx;
+ if (hw_in_use < 0)
+ hw_in_use += q->size;
+
/* If the TX Queue is currently stopped and there's now more than half
* the queue available, restart it. Otherwise bail out since the rest
* of what we want do here is with the possibility of shipping any
* currently buffered Coalesced TX Work Request.
*/
- if (netif_tx_queue_stopped(eq->txq) && txq_avail(q) > (q->size / 2)) {
+ if (netif_tx_queue_stopped(eq->txq) && hw_in_use < (q->size / 2)) {
netif_tx_wake_queue(eq->txq);
eq->q.restarts++;
}
@@ -1486,16 +1492,7 @@ static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev)
* has opened up.
*/
eth_txq_stop(q);
-
- /* If we're using the SGE Doorbell Queue Timer facility, we
- * don't need to ask the Firmware to send us Egress Queue CIDX
- * Updates: the Hardware will do this automatically. And
- * since we send the Ingress Queue CIDX Updates to the
- * corresponding Ethernet Response Queue, we'll get them very
- * quickly.
- */
- if (!q->dbqt)
- wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F;
+ wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F;
}
wr = (void *)&q->q.desc[q->q.pidx];
@@ -1805,16 +1802,7 @@ static netdev_tx_t cxgb4_vf_eth_xmit(struct sk_buff *skb,
* has opened up.
*/
eth_txq_stop(txq);
-
- /* If we're using the SGE Doorbell Queue Timer facility, we
- * don't need to ask the Firmware to send us Egress Queue CIDX
- * Updates: the Hardware will do this automatically. And
- * since we send the Ingress Queue CIDX Updates to the
- * corresponding Ethernet Response Queue, we'll get them very
- * quickly.
- */
- if (!txq->dbqt)
- wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F;
+ wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F;
}
/* Start filling in our Work Request. Note that we do _not_ handle
@@ -3370,26 +3358,6 @@ static void t4_tx_completion_handler(struct sge_rspq *rspq,
}
txq = &s->ethtxq[pi->first_qset + rspq->idx];
-
- /* We've got the Hardware Consumer Index Update in the Egress Update
- * message. If we're using the SGE Doorbell Queue Timer mechanism,
- * these Egress Update messages will be our sole CIDX Updates we get
- * since we don't want to chew up PCIe bandwidth for both Ingress
- * Messages and Status Page writes. However, The code which manages
- * reclaiming successfully DMA'ed TX Work Requests uses the CIDX value
- * stored in the Status Page at the end of the TX Queue. It's easiest
- * to simply copy the CIDX Update value from the Egress Update message
- * to the Status Page. Also note that no Endian issues need to be
- * considered here since both are Big Endian and we're just copying
- * bytes consistently ...
- */
- if (txq->dbqt) {
- struct cpl_sge_egr_update *egr;
-
- egr = (struct cpl_sge_egr_update *)rsp;
- WRITE_ONCE(txq->q.stat->cidx, egr->cidx);
- }
-
t4_sge_eth_txq_egress_update(adapter, txq, -1);
}
diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
index fd93d542f497..ca74a684a904 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
@@ -1,4 +1,5 @@
/* Copyright 2008 - 2016 Freescale Semiconductor Inc.
+ * Copyright 2020 NXP
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -123,7 +124,22 @@ MODULE_PARM_DESC(tx_timeout, "The Tx timeout in ms");
#define FSL_QMAN_MAX_OAL 127
/* Default alignment for start of data in an Rx FD */
+#ifdef CONFIG_DPAA_ERRATUM_A050385
+/* aligning data start to 64 avoids DMA transaction splits, unless the buffer
+ * is crossing a 4k page boundary
+ */
+#define DPAA_FD_DATA_ALIGNMENT (fman_has_errata_a050385() ? 64 : 16)
+/* aligning to 256 avoids DMA transaction splits caused by 4k page boundary
+ * crossings; also, all SG fragments except the last must have a size multiple
+ * of 256 to avoid DMA transaction splits
+ */
+#define DPAA_A050385_ALIGN 256
+#define DPAA_FD_RX_DATA_ALIGNMENT (fman_has_errata_a050385() ? \
+ DPAA_A050385_ALIGN : 16)
+#else
#define DPAA_FD_DATA_ALIGNMENT 16
+#define DPAA_FD_RX_DATA_ALIGNMENT DPAA_FD_DATA_ALIGNMENT
+#endif
/* The DPAA requires 256 bytes reserved and mapped for the SGT */
#define DPAA_SGT_SIZE 256
@@ -158,8 +174,13 @@ MODULE_PARM_DESC(tx_timeout, "The Tx timeout in ms");
#define DPAA_PARSE_RESULTS_SIZE sizeof(struct fman_prs_result)
#define DPAA_TIME_STAMP_SIZE 8
#define DPAA_HASH_RESULTS_SIZE 8
+#ifdef CONFIG_DPAA_ERRATUM_A050385
+#define DPAA_RX_PRIV_DATA_SIZE (DPAA_A050385_ALIGN - (DPAA_PARSE_RESULTS_SIZE\
+ + DPAA_TIME_STAMP_SIZE + DPAA_HASH_RESULTS_SIZE))
+#else
#define DPAA_RX_PRIV_DATA_SIZE (u16)(DPAA_TX_PRIV_DATA_SIZE + \
dpaa_rx_extra_headroom)
+#endif
#define DPAA_ETH_PCD_RXQ_NUM 128
@@ -180,7 +201,12 @@ static struct dpaa_bp *dpaa_bp_array[BM_MAX_NUM_OF_POOLS];
#define DPAA_BP_RAW_SIZE 4096
+#ifdef CONFIG_DPAA_ERRATUM_A050385
+#define dpaa_bp_size(raw_size) (SKB_WITH_OVERHEAD(raw_size) & \
+ ~(DPAA_A050385_ALIGN - 1))
+#else
#define dpaa_bp_size(raw_size) SKB_WITH_OVERHEAD(raw_size)
+#endif
static int dpaa_max_frm;
@@ -1192,7 +1218,7 @@ static int dpaa_eth_init_rx_port(struct fman_port *port, struct dpaa_bp *bp,
buf_prefix_content.pass_prs_result = true;
buf_prefix_content.pass_hash_result = true;
buf_prefix_content.pass_time_stamp = true;
- buf_prefix_content.data_align = DPAA_FD_DATA_ALIGNMENT;
+ buf_prefix_content.data_align = DPAA_FD_RX_DATA_ALIGNMENT;
rx_p = &params.specific_params.rx_params;
rx_p->err_fqid = errq->fqid;
@@ -1662,6 +1688,8 @@ static u8 rx_csum_offload(const struct dpaa_priv *priv, const struct qm_fd *fd)
return CHECKSUM_NONE;
}
+#define PTR_IS_ALIGNED(x, a) (IS_ALIGNED((unsigned long)(x), (a)))
+
/* Build a linear skb around the received buffer.
* We are guaranteed there is enough room at the end of the data buffer to
* accommodate the shared info area of the skb.
@@ -1733,8 +1761,7 @@ static struct sk_buff *sg_fd_to_skb(const struct dpaa_priv *priv,
sg_addr = qm_sg_addr(&sgt[i]);
sg_vaddr = phys_to_virt(sg_addr);
- WARN_ON(!IS_ALIGNED((unsigned long)sg_vaddr,
- SMP_CACHE_BYTES));
+ WARN_ON(!PTR_IS_ALIGNED(sg_vaddr, SMP_CACHE_BYTES));
dma_unmap_page(priv->rx_dma_dev, sg_addr,
DPAA_BP_RAW_SIZE, DMA_FROM_DEVICE);
@@ -2022,6 +2049,75 @@ static inline int dpaa_xmit(struct dpaa_priv *priv,
return 0;
}
+#ifdef CONFIG_DPAA_ERRATUM_A050385
+int dpaa_a050385_wa(struct net_device *net_dev, struct sk_buff **s)
+{
+ struct dpaa_priv *priv = netdev_priv(net_dev);
+ struct sk_buff *new_skb, *skb = *s;
+ unsigned char *start, i;
+
+ /* check linear buffer alignment */
+ if (!PTR_IS_ALIGNED(skb->data, DPAA_A050385_ALIGN))
+ goto workaround;
+
+ /* linear buffers just need to have an aligned start */
+ if (!skb_is_nonlinear(skb))
+ return 0;
+
+ /* linear data size for nonlinear skbs needs to be aligned */
+ if (!IS_ALIGNED(skb_headlen(skb), DPAA_A050385_ALIGN))
+ goto workaround;
+
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+ /* all fragments need to have aligned start addresses */
+ if (!IS_ALIGNED(skb_frag_off(frag), DPAA_A050385_ALIGN))
+ goto workaround;
+
+ /* all but last fragment need to have aligned sizes */
+ if (!IS_ALIGNED(skb_frag_size(frag), DPAA_A050385_ALIGN) &&
+ (i < skb_shinfo(skb)->nr_frags - 1))
+ goto workaround;
+ }
+
+ return 0;
+
+workaround:
+ /* copy all the skb content into a new linear buffer */
+ new_skb = netdev_alloc_skb(net_dev, skb->len + DPAA_A050385_ALIGN - 1 +
+ priv->tx_headroom);
+ if (!new_skb)
+ return -ENOMEM;
+
+ /* NET_SKB_PAD bytes already reserved, adding up to tx_headroom */
+ skb_reserve(new_skb, priv->tx_headroom - NET_SKB_PAD);
+
+ /* Workaround for DPAA_A050385 requires data start to be aligned */
+ start = PTR_ALIGN(new_skb->data, DPAA_A050385_ALIGN);
+ if (start - new_skb->data != 0)
+ skb_reserve(new_skb, start - new_skb->data);
+
+ skb_put(new_skb, skb->len);
+ skb_copy_bits(skb, 0, new_skb->data, skb->len);
+ skb_copy_header(new_skb, skb);
+ new_skb->dev = skb->dev;
+
+ /* We move the headroom when we align it so we have to reset the
+ * network and transport header offsets relative to the new data
+ * pointer. The checksum offload relies on these offsets.
+ */
+ skb_set_network_header(new_skb, skb_network_offset(skb));
+ skb_set_transport_header(new_skb, skb_transport_offset(skb));
+
+ /* TODO: does timestamping need the result in the old skb? */
+ dev_kfree_skb(skb);
+ *s = new_skb;
+
+ return 0;
+}
+#endif
+
static netdev_tx_t
dpaa_start_xmit(struct sk_buff *skb, struct net_device *net_dev)
{
@@ -2068,6 +2164,14 @@ dpaa_start_xmit(struct sk_buff *skb, struct net_device *net_dev)
nonlinear = skb_is_nonlinear(skb);
}
+#ifdef CONFIG_DPAA_ERRATUM_A050385
+ if (unlikely(fman_has_errata_a050385())) {
+ if (dpaa_a050385_wa(net_dev, &skb))
+ goto enomem;
+ nonlinear = skb_is_nonlinear(skb);
+ }
+#endif
+
if (nonlinear) {
/* Just create a S/G fd based on the skb */
err = skb_to_sg_fd(priv, skb, &fd);
@@ -2741,9 +2845,7 @@ static inline u16 dpaa_get_headroom(struct dpaa_buffer_layout *bl)
headroom = (u16)(bl->priv_data_size + DPAA_PARSE_RESULTS_SIZE +
DPAA_TIME_STAMP_SIZE + DPAA_HASH_RESULTS_SIZE);
- return DPAA_FD_DATA_ALIGNMENT ? ALIGN(headroom,
- DPAA_FD_DATA_ALIGNMENT) :
- headroom;
+ return ALIGN(headroom, DPAA_FD_DATA_ALIGNMENT);
}
static int dpaa_eth_probe(struct platform_device *pdev)
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 4432a59904c7..23c5fef2f1ad 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -2529,15 +2529,15 @@ fec_enet_set_coalesce(struct net_device *ndev, struct ethtool_coalesce *ec)
return -EINVAL;
}
- cycle = fec_enet_us_to_itr_clock(ndev, fep->rx_time_itr);
+ cycle = fec_enet_us_to_itr_clock(ndev, ec->rx_coalesce_usecs);
if (cycle > 0xFFFF) {
dev_err(dev, "Rx coalesced usec exceed hardware limitation\n");
return -EINVAL;
}
- cycle = fec_enet_us_to_itr_clock(ndev, fep->tx_time_itr);
+ cycle = fec_enet_us_to_itr_clock(ndev, ec->tx_coalesce_usecs);
if (cycle > 0xFFFF) {
- dev_err(dev, "Rx coalesced usec exceed hardware limitation\n");
+ dev_err(dev, "Tx coalesced usec exceed hardware limitation\n");
return -EINVAL;
}
diff --git a/drivers/net/ethernet/freescale/fman/Kconfig b/drivers/net/ethernet/freescale/fman/Kconfig
index 0139cb9042ec..34150182cc35 100644
--- a/drivers/net/ethernet/freescale/fman/Kconfig
+++ b/drivers/net/ethernet/freescale/fman/Kconfig
@@ -8,3 +8,31 @@ config FSL_FMAN
help
Freescale Data-Path Acceleration Architecture Frame Manager
(FMan) support
+
+config DPAA_ERRATUM_A050385
+ bool
+ depends on ARM64 && FSL_DPAA
+ default y
+ help
+ DPAA FMan erratum A050385 software workaround implementation:
+ align buffers, data start, SG fragment length to avoid FMan DMA
+ splits.
+ FMAN DMA read or writes under heavy traffic load may cause FMAN
+ internal resource leak thus stopping further packet processing.
+ The FMAN internal queue can overflow when FMAN splits single
+ read or write transactions into multiple smaller transactions
+ such that more than 17 AXI transactions are in flight from FMAN
+ to interconnect. When the FMAN internal queue overflows, it can
+ stall further packet processing. The issue can occur with any
+ one of the following three conditions:
+ 1. FMAN AXI transaction crosses 4K address boundary (Errata
+ A010022)
+ 2. FMAN DMA address for an AXI transaction is not 16 byte
+ aligned, i.e. the last 4 bits of an address are non-zero
+ 3. Scatter Gather (SG) frames have more than one SG buffer in
+ the SG list and any one of the buffers, except the last
+ buffer in the SG list has data size that is not a multiple
+ of 16 bytes, i.e., other than 16, 32, 48, 64, etc.
+ With any one of the above three conditions present, there is
+ likelihood of stalled FMAN packet processing, especially under
+ stress with multiple ports injecting line-rate traffic.
diff --git a/drivers/net/ethernet/freescale/fman/fman.c b/drivers/net/ethernet/freescale/fman/fman.c
index 934111def0be..f151d6e111dd 100644
--- a/drivers/net/ethernet/freescale/fman/fman.c
+++ b/drivers/net/ethernet/freescale/fman/fman.c
@@ -1,5 +1,6 @@
/*
* Copyright 2008-2015 Freescale Semiconductor Inc.
+ * Copyright 2020 NXP
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -566,6 +567,10 @@ struct fman_cfg {
u32 qmi_def_tnums_thresh;
};
+#ifdef CONFIG_DPAA_ERRATUM_A050385
+static bool fman_has_err_a050385;
+#endif
+
static irqreturn_t fman_exceptions(struct fman *fman,
enum fman_exceptions exception)
{
@@ -2518,6 +2523,14 @@ struct fman *fman_bind(struct device *fm_dev)
}
EXPORT_SYMBOL(fman_bind);
+#ifdef CONFIG_DPAA_ERRATUM_A050385
+bool fman_has_errata_a050385(void)
+{
+ return fman_has_err_a050385;
+}
+EXPORT_SYMBOL(fman_has_errata_a050385);
+#endif
+
static irqreturn_t fman_err_irq(int irq, void *handle)
{
struct fman *fman = (struct fman *)handle;
@@ -2845,6 +2858,11 @@ static struct fman *read_dts_node(struct platform_device *of_dev)
goto fman_free;
}
+#ifdef CONFIG_DPAA_ERRATUM_A050385
+ fman_has_err_a050385 =
+ of_property_read_bool(fm_node, "fsl,erratum-a050385");
+#endif
+
return fman;
fman_node_put:
diff --git a/drivers/net/ethernet/freescale/fman/fman.h b/drivers/net/ethernet/freescale/fman/fman.h
index 935c317fa696..f2ede1360f03 100644
--- a/drivers/net/ethernet/freescale/fman/fman.h
+++ b/drivers/net/ethernet/freescale/fman/fman.h
@@ -1,5 +1,6 @@
/*
* Copyright 2008-2015 Freescale Semiconductor Inc.
+ * Copyright 2020 NXP
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -398,6 +399,10 @@ u16 fman_get_max_frm(void);
int fman_get_rx_extra_headroom(void);
+#ifdef CONFIG_DPAA_ERRATUM_A050385
+bool fman_has_errata_a050385(void);
+#endif
+
struct fman *fman_bind(struct device *dev);
#endif /* __FM_H */
diff --git a/drivers/net/ethernet/freescale/fman/fman_memac.c b/drivers/net/ethernet/freescale/fman/fman_memac.c
index e1901874c19f..0d2b4ab01f24 100644
--- a/drivers/net/ethernet/freescale/fman/fman_memac.c
+++ b/drivers/net/ethernet/freescale/fman/fman_memac.c
@@ -782,7 +782,7 @@ int memac_adjust_link(struct fman_mac *memac, u16 speed)
/* Set full duplex */
tmp &= ~IF_MODE_HD;
- if (memac->phy_if == PHY_INTERFACE_MODE_RGMII) {
+ if (phy_interface_mode_is_rgmii(memac->phy_if)) {
/* Configure RGMII in manual mode */
tmp &= ~IF_MODE_RGMII_AUTO;
tmp &= ~IF_MODE_RGMII_SP_MASK;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
index 1b0313900f98..d87158acdf6f 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
@@ -46,6 +46,7 @@ enum HCLGE_MBX_OPCODE {
HCLGE_MBX_PUSH_VLAN_INFO, /* (PF -> VF) push port base vlan */
HCLGE_MBX_GET_MEDIA_TYPE, /* (VF -> PF) get media type */
HCLGE_MBX_PUSH_PROMISC_INFO, /* (PF -> VF) push vf promisc info */
+ HCLGE_MBX_VF_UNINIT, /* (VF -> PF) vf is unintializing */
HCLGE_MBX_GET_VF_FLR_STATUS = 200, /* (M7 -> PF) get vf flr status */
HCLGE_MBX_PUSH_LINK_STATUS, /* (M7 -> PF) get port link status */
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index acb796cc10d0..a7f40aa1a0ea 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -1711,7 +1711,7 @@ static int hns3_setup_tc(struct net_device *netdev, void *type_data)
netif_dbg(h, drv, netdev, "setup tc: num_tc=%u\n", tc);
return (kinfo->dcb_ops && kinfo->dcb_ops->setup_tc) ?
- kinfo->dcb_ops->setup_tc(h, tc, prio_tc) : -EOPNOTSUPP;
+ kinfo->dcb_ops->setup_tc(h, tc ? tc : 1, prio_tc) : -EOPNOTSUPP;
}
static int hns3_nic_setup_tc(struct net_device *dev, enum tc_setup_type type,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 492bc9446463..d3b0cd74ecd2 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -2446,10 +2446,12 @@ static int hclge_cfg_mac_speed_dup_hw(struct hclge_dev *hdev, int speed,
int hclge_cfg_mac_speed_dup(struct hclge_dev *hdev, int speed, u8 duplex)
{
+ struct hclge_mac *mac = &hdev->hw.mac;
int ret;
duplex = hclge_check_speed_dup(duplex, speed);
- if (hdev->hw.mac.speed == speed && hdev->hw.mac.duplex == duplex)
+ if (!mac->support_autoneg && mac->speed == speed &&
+ mac->duplex == duplex)
return 0;
ret = hclge_cfg_mac_speed_dup_hw(hdev, speed, duplex);
@@ -7743,16 +7745,27 @@ static int hclge_set_vlan_filter_ctrl(struct hclge_dev *hdev, u8 vlan_type,
struct hclge_desc desc;
int ret;
- hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_VLAN_FILTER_CTRL, false);
-
+ /* read current vlan filter parameter */
+ hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_VLAN_FILTER_CTRL, true);
req = (struct hclge_vlan_filter_ctrl_cmd *)desc.data;
req->vlan_type = vlan_type;
- req->vlan_fe = filter_en ? fe_type : 0;
req->vf_id = vf_id;
ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+ if (ret) {
+ dev_err(&hdev->pdev->dev,
+ "failed to get vlan filter config, ret = %d.\n", ret);
+ return ret;
+ }
+
+ /* modify and write new config parameter */
+ hclge_cmd_reuse_desc(&desc, false);
+ req->vlan_fe = filter_en ?
+ (req->vlan_fe | fe_type) : (req->vlan_fe & ~fe_type);
+
+ ret = hclge_cmd_send(&hdev->hw, &desc, 1);
if (ret)
- dev_err(&hdev->pdev->dev, "set vlan filter fail, ret =%d.\n",
+ dev_err(&hdev->pdev->dev, "failed to set vlan filter, ret = %d.\n",
ret);
return ret;
@@ -8270,6 +8283,7 @@ void hclge_rm_vport_all_vlan_table(struct hclge_vport *vport, bool is_del_list)
kfree(vlan);
}
}
+ clear_bit(vport->vport_id, hdev->vf_vlan_full);
}
void hclge_uninit_vport_vlan_table(struct hclge_dev *hdev)
@@ -8486,6 +8500,28 @@ static int hclge_set_vf_vlan_filter(struct hnae3_handle *handle, int vfid,
}
}
+static void hclge_clear_vf_vlan(struct hclge_dev *hdev)
+{
+ struct hclge_vlan_info *vlan_info;
+ struct hclge_vport *vport;
+ int ret;
+ int vf;
+
+ /* clear port base vlan for all vf */
+ for (vf = HCLGE_VF_VPORT_START_NUM; vf < hdev->num_alloc_vport; vf++) {
+ vport = &hdev->vport[vf];
+ vlan_info = &vport->port_base_vlan_cfg.vlan_info;
+
+ ret = hclge_set_vlan_filter_hw(hdev, htons(ETH_P_8021Q),
+ vport->vport_id,
+ vlan_info->vlan_tag, true);
+ if (ret)
+ dev_err(&hdev->pdev->dev,
+ "failed to clear vf vlan for vf%d, ret = %d\n",
+ vf - HCLGE_VF_VPORT_START_NUM, ret);
+ }
+}
+
int hclge_set_vlan_filter(struct hnae3_handle *handle, __be16 proto,
u16 vlan_id, bool is_kill)
{
@@ -9895,6 +9931,7 @@ static void hclge_uninit_ae_dev(struct hnae3_ae_dev *ae_dev)
struct hclge_mac *mac = &hdev->hw.mac;
hclge_reset_vf_rate(hdev);
+ hclge_clear_vf_vlan(hdev);
hclge_misc_affinity_teardown(hdev);
hclge_state_uninit(hdev);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
index a3c0822191a9..3d850f6b1e37 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
@@ -799,6 +799,7 @@ void hclge_mbx_handler(struct hclge_dev *hdev)
hclge_get_link_mode(vport, req);
break;
case HCLGE_MBX_GET_VF_FLR_STATUS:
+ case HCLGE_MBX_VF_UNINIT:
hclge_rm_vport_all_mac_table(vport, true,
HCLGE_MAC_ADDR_UC);
hclge_rm_vport_all_mac_table(vport, true,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index d6597206e692..0510d85a7f6a 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -2803,6 +2803,9 @@ static void hclgevf_uninit_hdev(struct hclgevf_dev *hdev)
{
hclgevf_state_uninit(hdev);
+ hclgevf_send_mbx_msg(hdev, HCLGE_MBX_VF_UNINIT, 0, NULL, 0,
+ false, NULL, 0);
+
if (test_bit(HCLGEVF_STATE_IRQ_INITED, &hdev->state)) {
hclgevf_misc_irq_uninit(hdev);
hclgevf_uninit_msi(hdev);
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c
index eb53c15b13f3..5f2d57d1b2d3 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c
@@ -389,7 +389,8 @@ static int cmdq_sync_cmd_direct_resp(struct hinic_cmdq *cmdq,
spin_unlock_bh(&cmdq->cmdq_lock);
- if (!wait_for_completion_timeout(&done, CMDQ_TIMEOUT)) {
+ if (!wait_for_completion_timeout(&done,
+ msecs_to_jiffies(CMDQ_TIMEOUT))) {
spin_lock_bh(&cmdq->cmdq_lock);
if (cmdq->errcode[curr_prod_idx] == &errcode)
@@ -623,6 +624,8 @@ static int cmdq_cmd_ceq_handler(struct hinic_cmdq *cmdq, u16 ci,
if (!CMDQ_WQE_COMPLETED(be32_to_cpu(ctrl->ctrl_info)))
return -EBUSY;
+ dma_rmb();
+
errcode = CMDQ_WQE_ERRCODE_GET(be32_to_cpu(status->status_info), VAL);
cmdq_sync_cmd_handler(cmdq, ci, errcode);
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
index 79b3d53f2fbf..c7c75b772a86 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
@@ -360,50 +360,6 @@ static int wait_for_db_state(struct hinic_hwdev *hwdev)
return -EFAULT;
}
-static int wait_for_io_stopped(struct hinic_hwdev *hwdev)
-{
- struct hinic_cmd_io_status cmd_io_status;
- struct hinic_hwif *hwif = hwdev->hwif;
- struct pci_dev *pdev = hwif->pdev;
- struct hinic_pfhwdev *pfhwdev;
- unsigned long end;
- u16 out_size;
- int err;
-
- if (!HINIC_IS_PF(hwif) && !HINIC_IS_PPF(hwif)) {
- dev_err(&pdev->dev, "Unsupported PCI Function type\n");
- return -EINVAL;
- }
-
- pfhwdev = container_of(hwdev, struct hinic_pfhwdev, hwdev);
-
- cmd_io_status.func_idx = HINIC_HWIF_FUNC_IDX(hwif);
-
- end = jiffies + msecs_to_jiffies(IO_STATUS_TIMEOUT);
- do {
- err = hinic_msg_to_mgmt(&pfhwdev->pf_to_mgmt, HINIC_MOD_COMM,
- HINIC_COMM_CMD_IO_STATUS_GET,
- &cmd_io_status, sizeof(cmd_io_status),
- &cmd_io_status, &out_size,
- HINIC_MGMT_MSG_SYNC);
- if ((err) || (out_size != sizeof(cmd_io_status))) {
- dev_err(&pdev->dev, "Failed to get IO status, ret = %d\n",
- err);
- return err;
- }
-
- if (cmd_io_status.status == IO_STOPPED) {
- dev_info(&pdev->dev, "IO stopped\n");
- return 0;
- }
-
- msleep(20);
- } while (time_before(jiffies, end));
-
- dev_err(&pdev->dev, "Wait for IO stopped - Timeout\n");
- return -ETIMEDOUT;
-}
-
/**
* clear_io_resource - set the IO resources as not active in the NIC
* @hwdev: the NIC HW device
@@ -423,11 +379,8 @@ static int clear_io_resources(struct hinic_hwdev *hwdev)
return -EINVAL;
}
- err = wait_for_io_stopped(hwdev);
- if (err) {
- dev_err(&pdev->dev, "IO has not stopped yet\n");
- return err;
- }
+ /* sleep 100ms to wait for firmware stopping I/O */
+ msleep(100);
cmd_clear_io_res.func_idx = HINIC_HWIF_FUNC_IDX(hwif);
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c
index 79243b626ddb..c0b6bcb067cd 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c
@@ -188,7 +188,7 @@ static u8 eq_cons_idx_checksum_set(u32 val)
* eq_update_ci - update the HW cons idx of event queue
* @eq: the event queue to update the cons idx for
**/
-static void eq_update_ci(struct hinic_eq *eq)
+static void eq_update_ci(struct hinic_eq *eq, u32 arm_state)
{
u32 val, addr = EQ_CONS_IDX_REG_ADDR(eq);
@@ -202,7 +202,7 @@ static void eq_update_ci(struct hinic_eq *eq)
val |= HINIC_EQ_CI_SET(eq->cons_idx, IDX) |
HINIC_EQ_CI_SET(eq->wrapped, WRAPPED) |
- HINIC_EQ_CI_SET(EQ_ARMED, INT_ARMED);
+ HINIC_EQ_CI_SET(arm_state, INT_ARMED);
val |= HINIC_EQ_CI_SET(eq_cons_idx_checksum_set(val), XOR_CHKSUM);
@@ -235,6 +235,8 @@ static void aeq_irq_handler(struct hinic_eq *eq)
if (HINIC_EQ_ELEM_DESC_GET(aeqe_desc, WRAPPED) == eq->wrapped)
break;
+ dma_rmb();
+
event = HINIC_EQ_ELEM_DESC_GET(aeqe_desc, TYPE);
if (event >= HINIC_MAX_AEQ_EVENTS) {
dev_err(&pdev->dev, "Unknown AEQ Event %d\n", event);
@@ -347,7 +349,7 @@ static void eq_irq_handler(void *data)
else if (eq->type == HINIC_CEQ)
ceq_irq_handler(eq);
- eq_update_ci(eq);
+ eq_update_ci(eq, EQ_ARMED);
}
/**
@@ -702,7 +704,7 @@ static int init_eq(struct hinic_eq *eq, struct hinic_hwif *hwif,
}
set_eq_ctrls(eq);
- eq_update_ci(eq);
+ eq_update_ci(eq, EQ_ARMED);
err = alloc_eq_pages(eq);
if (err) {
@@ -752,18 +754,28 @@ err_req_irq:
**/
static void remove_eq(struct hinic_eq *eq)
{
- struct msix_entry *entry = &eq->msix_entry;
-
- free_irq(entry->vector, eq);
+ hinic_set_msix_state(eq->hwif, eq->msix_entry.entry,
+ HINIC_MSIX_DISABLE);
+ free_irq(eq->msix_entry.vector, eq);
if (eq->type == HINIC_AEQ) {
struct hinic_eq_work *aeq_work = &eq->aeq_work;
cancel_work_sync(&aeq_work->work);
+ /* clear aeq_len to avoid hw access host memory */
+ hinic_hwif_write_reg(eq->hwif,
+ HINIC_CSR_AEQ_CTRL_1_ADDR(eq->q_id), 0);
} else if (eq->type == HINIC_CEQ) {
tasklet_kill(&eq->ceq_tasklet);
+ /* clear ceq_len to avoid hw access host memory */
+ hinic_hwif_write_reg(eq->hwif,
+ HINIC_CSR_CEQ_CTRL_1_ADDR(eq->q_id), 0);
}
+ /* update cons_idx to avoid invalid interrupt */
+ eq->cons_idx = hinic_hwif_read_reg(eq->hwif, EQ_PROD_IDX_REG_ADDR(eq));
+ eq_update_ci(eq, EQ_NOT_ARMED);
+
free_eq_pages(eq);
}
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c
index c1a6be6bf6a8..8995e32dd1c0 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c
@@ -43,7 +43,7 @@
#define MSG_NOT_RESP 0xFFFF
-#define MGMT_MSG_TIMEOUT 1000
+#define MGMT_MSG_TIMEOUT 5000
#define mgmt_to_pfhwdev(pf_mgmt) \
container_of(pf_mgmt, struct hinic_pfhwdev, pf_to_mgmt)
@@ -267,7 +267,8 @@ static int msg_to_mgmt_sync(struct hinic_pf_to_mgmt *pf_to_mgmt,
goto unlock_sync_msg;
}
- if (!wait_for_completion_timeout(recv_done, MGMT_MSG_TIMEOUT)) {
+ if (!wait_for_completion_timeout(recv_done,
+ msecs_to_jiffies(MGMT_MSG_TIMEOUT))) {
dev_err(&pdev->dev, "MGMT timeout, MSG id = %d\n", msg_id);
err = -ETIMEDOUT;
goto unlock_sync_msg;
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_rx.c b/drivers/net/ethernet/huawei/hinic/hinic_rx.c
index 2695ad69fca6..815649e37cb1 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_rx.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_rx.c
@@ -350,6 +350,9 @@ static int rxq_recv(struct hinic_rxq *rxq, int budget)
if (!rq_wqe)
break;
+ /* make sure we read rx_done before packet length */
+ dma_rmb();
+
cqe = rq->cqe[ci];
status = be32_to_cpu(cqe->status);
hinic_rq_get_sge(rxq->rq, rq_wqe, ci, &sge);
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_tx.c b/drivers/net/ethernet/huawei/hinic/hinic_tx.c
index 0e13d1c7e474..365016450bdb 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_tx.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_tx.c
@@ -45,7 +45,7 @@
#define HW_CONS_IDX(sq) be16_to_cpu(*(u16 *)((sq)->hw_ci_addr))
-#define MIN_SKB_LEN 17
+#define MIN_SKB_LEN 32
#define MAX_PAYLOAD_OFFSET 221
#define TRANSPORT_OFFSET(l4_hdr, skb) ((u32)((l4_hdr) - (skb)->data))
@@ -622,6 +622,8 @@ static int free_tx_poll(struct napi_struct *napi, int budget)
do {
hw_ci = HW_CONS_IDX(sq) & wq->mask;
+ dma_rmb();
+
/* Reading a WQEBB to get real WQE size and consumer index. */
sq_wqe = hinic_sq_read_wqebb(sq, &skb, &wqe_size, &sw_ci);
if ((!sq_wqe) ||
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index c75239d8820f..4bd33245bad6 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -2142,6 +2142,8 @@ static void __ibmvnic_reset(struct work_struct *work)
{
struct ibmvnic_rwi *rwi;
struct ibmvnic_adapter *adapter;
+ bool saved_state = false;
+ unsigned long flags;
u32 reset_state;
int rc = 0;
@@ -2153,17 +2155,25 @@ static void __ibmvnic_reset(struct work_struct *work)
return;
}
- reset_state = adapter->state;
-
rwi = get_next_rwi(adapter);
while (rwi) {
+ spin_lock_irqsave(&adapter->state_lock, flags);
+
if (adapter->state == VNIC_REMOVING ||
adapter->state == VNIC_REMOVED) {
+ spin_unlock_irqrestore(&adapter->state_lock, flags);
kfree(rwi);
rc = EBUSY;
break;
}
+ if (!saved_state) {
+ reset_state = adapter->state;
+ adapter->state = VNIC_RESETTING;
+ saved_state = true;
+ }
+ spin_unlock_irqrestore(&adapter->state_lock, flags);
+
if (rwi->reset_reason == VNIC_RESET_CHANGE_PARAM) {
/* CHANGE_PARAM requestor holds rtnl_lock */
rc = do_change_param_reset(adapter, rwi, reset_state);
@@ -5091,6 +5101,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
__ibmvnic_delayed_reset);
INIT_LIST_HEAD(&adapter->rwi_list);
spin_lock_init(&adapter->rwi_lock);
+ spin_lock_init(&adapter->state_lock);
mutex_init(&adapter->fw_lock);
init_completion(&adapter->init_done);
init_completion(&adapter->fw_done);
@@ -5163,8 +5174,17 @@ static int ibmvnic_remove(struct vio_dev *dev)
{
struct net_device *netdev = dev_get_drvdata(&dev->dev);
struct ibmvnic_adapter *adapter = netdev_priv(netdev);
+ unsigned long flags;
+
+ spin_lock_irqsave(&adapter->state_lock, flags);
+ if (adapter->state == VNIC_RESETTING) {
+ spin_unlock_irqrestore(&adapter->state_lock, flags);
+ return -EBUSY;
+ }
adapter->state = VNIC_REMOVING;
+ spin_unlock_irqrestore(&adapter->state_lock, flags);
+
rtnl_lock();
unregister_netdevice(netdev);
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h
index 60eccaf91b12..f8416e1d4cf0 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -941,7 +941,8 @@ enum vnic_state {VNIC_PROBING = 1,
VNIC_CLOSING,
VNIC_CLOSED,
VNIC_REMOVING,
- VNIC_REMOVED};
+ VNIC_REMOVED,
+ VNIC_RESETTING};
enum ibmvnic_reset_reason {VNIC_RESET_FAILOVER = 1,
VNIC_RESET_MOBILITY,
@@ -1090,4 +1091,7 @@ struct ibmvnic_adapter {
struct ibmvnic_tunables desired;
struct ibmvnic_tunables fallback;
+
+ /* Used for serializatin of state field */
+ spinlock_t state_lock;
};
diff --git a/drivers/net/ethernet/marvell/mvmdio.c b/drivers/net/ethernet/marvell/mvmdio.c
index 0b9e851f3da4..d14762d93640 100644
--- a/drivers/net/ethernet/marvell/mvmdio.c
+++ b/drivers/net/ethernet/marvell/mvmdio.c
@@ -347,7 +347,7 @@ static int orion_mdio_probe(struct platform_device *pdev)
}
- dev->err_interrupt = platform_get_irq(pdev, 0);
+ dev->err_interrupt = platform_get_irq_optional(pdev, 0);
if (dev->err_interrupt > 0 &&
resource_size(r) < MVMDIO_ERR_INT_MASK + 4) {
dev_err(&pdev->dev,
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 98017e7d5dd0..11babc79dc6c 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -3036,11 +3036,10 @@ static int mvneta_poll(struct napi_struct *napi, int budget)
/* For the case where the last mvneta_poll did not process all
* RX packets
*/
- rx_queue = fls(((cause_rx_tx >> 8) & 0xff));
-
cause_rx_tx |= pp->neta_armada3700 ? pp->cause_rx_tx :
port->cause_rx_tx;
+ rx_queue = fls(((cause_rx_tx >> 8) & 0xff));
if (rx_queue) {
rx_queue = rx_queue - 1;
if (pp->bm_priv)
diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c
index 9c481823b3e8..9486caecfbdc 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mcg.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c
@@ -906,59 +906,59 @@ static void mlx4_err_rule(struct mlx4_dev *dev, char *str,
int len = 0;
mlx4_err(dev, "%s", str);
- len += snprintf(buf + len, BUF_SIZE - len,
- "port = %d prio = 0x%x qp = 0x%x ",
- rule->port, rule->priority, rule->qpn);
+ len += scnprintf(buf + len, BUF_SIZE - len,
+ "port = %d prio = 0x%x qp = 0x%x ",
+ rule->port, rule->priority, rule->qpn);
list_for_each_entry(cur, &rule->list, list) {
switch (cur->id) {
case MLX4_NET_TRANS_RULE_ID_ETH:
- len += snprintf(buf + len, BUF_SIZE - len,
- "dmac = %pM ", &cur->eth.dst_mac);
+ len += scnprintf(buf + len, BUF_SIZE - len,
+ "dmac = %pM ", &cur->eth.dst_mac);
if (cur->eth.ether_type)
- len += snprintf(buf + len, BUF_SIZE - len,
- "ethertype = 0x%x ",
- be16_to_cpu(cur->eth.ether_type));
+ len += scnprintf(buf + len, BUF_SIZE - len,
+ "ethertype = 0x%x ",
+ be16_to_cpu(cur->eth.ether_type));
if (cur->eth.vlan_id)
- len += snprintf(buf + len, BUF_SIZE - len,
- "vlan-id = %d ",
- be16_to_cpu(cur->eth.vlan_id));
+ len += scnprintf(buf + len, BUF_SIZE - len,
+ "vlan-id = %d ",
+ be16_to_cpu(cur->eth.vlan_id));
break;
case MLX4_NET_TRANS_RULE_ID_IPV4:
if (cur->ipv4.src_ip)
- len += snprintf(buf + len, BUF_SIZE - len,
- "src-ip = %pI4 ",
- &cur->ipv4.src_ip);
+ len += scnprintf(buf + len, BUF_SIZE - len,
+ "src-ip = %pI4 ",
+ &cur->ipv4.src_ip);
if (cur->ipv4.dst_ip)
- len += snprintf(buf + len, BUF_SIZE - len,
- "dst-ip = %pI4 ",
- &cur->ipv4.dst_ip);
+ len += scnprintf(buf + len, BUF_SIZE - len,
+ "dst-ip = %pI4 ",
+ &cur->ipv4.dst_ip);
break;
case MLX4_NET_TRANS_RULE_ID_TCP:
case MLX4_NET_TRANS_RULE_ID_UDP:
if (cur->tcp_udp.src_port)
- len += snprintf(buf + len, BUF_SIZE - len,
- "src-port = %d ",
- be16_to_cpu(cur->tcp_udp.src_port));
+ len += scnprintf(buf + len, BUF_SIZE - len,
+ "src-port = %d ",
+ be16_to_cpu(cur->tcp_udp.src_port));
if (cur->tcp_udp.dst_port)
- len += snprintf(buf + len, BUF_SIZE - len,
- "dst-port = %d ",
- be16_to_cpu(cur->tcp_udp.dst_port));
+ len += scnprintf(buf + len, BUF_SIZE - len,
+ "dst-port = %d ",
+ be16_to_cpu(cur->tcp_udp.dst_port));
break;
case MLX4_NET_TRANS_RULE_ID_IB:
- len += snprintf(buf + len, BUF_SIZE - len,
- "dst-gid = %pI6\n", cur->ib.dst_gid);
- len += snprintf(buf + len, BUF_SIZE - len,
- "dst-gid-mask = %pI6\n",
- cur->ib.dst_gid_msk);
+ len += scnprintf(buf + len, BUF_SIZE - len,
+ "dst-gid = %pI6\n", cur->ib.dst_gid);
+ len += scnprintf(buf + len, BUF_SIZE - len,
+ "dst-gid-mask = %pI6\n",
+ cur->ib.dst_gid_msk);
break;
case MLX4_NET_TRANS_RULE_ID_VXLAN:
- len += snprintf(buf + len, BUF_SIZE - len,
- "VNID = %d ", be32_to_cpu(cur->vxlan.vni));
+ len += scnprintf(buf + len, BUF_SIZE - len,
+ "VNID = %d ", be32_to_cpu(cur->vxlan.vni));
break;
case MLX4_NET_TRANS_RULE_ID_IPV6:
break;
@@ -967,7 +967,7 @@ static void mlx4_err_rule(struct mlx4_dev *dev, char *str,
break;
}
}
- len += snprintf(buf + len, BUF_SIZE - len, "\n");
+ len += scnprintf(buf + len, BUF_SIZE - len, "\n");
mlx4_err(dev, "%s", buf);
if (len >= BUF_SIZE)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 220ef9f06f84..c9606b8ab6ef 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -371,6 +371,7 @@ enum {
struct mlx5e_sq_wqe_info {
u8 opcode;
+ u8 num_wqebbs;
/* Auxiliary data for different opcodes. */
union {
@@ -1059,6 +1060,7 @@ int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state);
void mlx5e_activate_rq(struct mlx5e_rq *rq);
void mlx5e_deactivate_rq(struct mlx5e_rq *rq);
void mlx5e_free_rx_descs(struct mlx5e_rq *rq);
+void mlx5e_free_rx_in_progress_descs(struct mlx5e_rq *rq);
void mlx5e_activate_icosq(struct mlx5e_icosq *icosq);
void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
index d3693fa547ac..e54f70d9af22 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
@@ -10,8 +10,7 @@
static inline bool cqe_syndrome_needs_recover(u8 syndrome)
{
- return syndrome == MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR ||
- syndrome == MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR ||
+ return syndrome == MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR ||
syndrome == MLX5_CQE_SYNDROME_LOCAL_PROT_ERR ||
syndrome == MLX5_CQE_SYNDROME_WR_FLUSH_ERR;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
index 6c72b592315b..a01e2de2488f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
@@ -90,7 +90,7 @@ static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx)
goto out;
mlx5e_reset_icosq_cc_pc(icosq);
- mlx5e_free_rx_descs(rq);
+ mlx5e_free_rx_in_progress_descs(rq);
clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state);
mlx5e_activate_icosq(icosq);
mlx5e_activate_rq(rq);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
index a226277b0980..f07b1399744e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
@@ -181,10 +181,12 @@ mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma)
static inline void mlx5e_rqwq_reset(struct mlx5e_rq *rq)
{
- if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
+ if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
mlx5_wq_ll_reset(&rq->mpwqe.wq);
- else
+ rq->mpwqe.actual_wq_head = 0;
+ } else {
mlx5_wq_cyc_reset(&rq->wqe.wq);
+ }
}
/* SW parser related functions */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
index a3efa29a4629..63116be6b1d6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
@@ -38,8 +38,8 @@ enum {
enum {
MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_START = 0,
- MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_SEARCHING = 1,
- MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_TRACKING = 2,
+ MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_TRACKING = 1,
+ MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_SEARCHING = 2,
};
struct mlx5e_ktls_offload_context_tx {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
index f260dd96873b..52a56622034a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
@@ -218,7 +218,7 @@ tx_sync_info_get(struct mlx5e_ktls_offload_context_tx *priv_tx,
* this packet was already acknowledged and its record info
* was released.
*/
- ends_before = before(tcp_seq + datalen, tls_record_start_seq(record));
+ ends_before = before(tcp_seq + datalen - 1, tls_record_start_seq(record));
if (unlikely(tls_record_is_start_marker(record))) {
ret = ends_before ? MLX5E_KTLS_SYNC_SKIP_NO_DATA : MLX5E_KTLS_SYNC_FAIL;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 21de4764d4c0..4ef3dc79f73c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -813,6 +813,29 @@ int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time)
return -ETIMEDOUT;
}
+void mlx5e_free_rx_in_progress_descs(struct mlx5e_rq *rq)
+{
+ struct mlx5_wq_ll *wq;
+ u16 head;
+ int i;
+
+ if (rq->wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
+ return;
+
+ wq = &rq->mpwqe.wq;
+ head = wq->head;
+
+ /* Outstanding UMR WQEs (in progress) start at wq->head */
+ for (i = 0; i < rq->mpwqe.umr_in_progress; i++) {
+ rq->dealloc_wqe(rq, head);
+ head = mlx5_wq_ll_get_wqe_next_ix(wq, head);
+ }
+
+ rq->mpwqe.actual_wq_head = wq->head;
+ rq->mpwqe.umr_in_progress = 0;
+ rq->mpwqe.umr_completed = 0;
+}
+
void mlx5e_free_rx_descs(struct mlx5e_rq *rq)
{
__be16 wqe_ix_be;
@@ -820,14 +843,8 @@ void mlx5e_free_rx_descs(struct mlx5e_rq *rq)
if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
struct mlx5_wq_ll *wq = &rq->mpwqe.wq;
- u16 head = wq->head;
- int i;
- /* Outstanding UMR WQEs (in progress) start at wq->head */
- for (i = 0; i < rq->mpwqe.umr_in_progress; i++) {
- rq->dealloc_wqe(rq, head);
- head = mlx5_wq_ll_get_wqe_next_ix(wq, head);
- }
+ mlx5e_free_rx_in_progress_descs(rq);
while (!mlx5_wq_ll_is_empty(wq)) {
struct mlx5e_rx_wqe_ll *wqe;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 1c3ab69cbd96..312d4692425b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -477,6 +477,7 @@ static inline void mlx5e_fill_icosq_frag_edge(struct mlx5e_icosq *sq,
/* fill sq frag edge with nops to avoid wqe wrapping two pages */
for (; wi < edge_wi; wi++) {
wi->opcode = MLX5_OPCODE_NOP;
+ wi->num_wqebbs = 1;
mlx5e_post_nop(wq, sq->sqn, &sq->pc);
}
}
@@ -525,6 +526,7 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
umr_wqe->uctrl.xlt_offset = cpu_to_be16(xlt_offset);
sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_UMR;
+ sq->db.ico_wqe[pi].num_wqebbs = MLX5E_UMR_WQEBBS;
sq->db.ico_wqe[pi].umr.rq = rq;
sq->pc += MLX5E_UMR_WQEBBS;
@@ -621,6 +623,7 @@ void mlx5e_poll_ico_cq(struct mlx5e_cq *cq)
ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
wi = &sq->db.ico_wqe[ci];
+ sqcc += wi->num_wqebbs;
if (last_wqe && unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) {
netdev_WARN_ONCE(cq->channel->netdev,
@@ -631,16 +634,12 @@ void mlx5e_poll_ico_cq(struct mlx5e_cq *cq)
break;
}
- if (likely(wi->opcode == MLX5_OPCODE_UMR)) {
- sqcc += MLX5E_UMR_WQEBBS;
+ if (likely(wi->opcode == MLX5_OPCODE_UMR))
wi->umr.rq->mpwqe.umr_completed++;
- } else if (likely(wi->opcode == MLX5_OPCODE_NOP)) {
- sqcc++;
- } else {
+ else if (unlikely(wi->opcode != MLX5_OPCODE_NOP))
netdev_WARN_ONCE(cq->channel->netdev,
"Bad OPCODE in ICOSQ WQE info: 0x%x\n",
wi->opcode);
- }
} while (!last_wqe);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 74091f72c9a8..ec5fc52bf572 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -2476,10 +2476,11 @@ static int offload_pedit_fields(struct pedit_headers_action *hdrs,
continue;
if (f->field_bsize == 32) {
- mask_be32 = *(__be32 *)&mask;
+ mask_be32 = (__be32)mask;
mask = (__force unsigned long)cpu_to_le32(be32_to_cpu(mask_be32));
} else if (f->field_bsize == 16) {
- mask_be16 = *(__be16 *)&mask;
+ mask_be32 = (__be32)mask;
+ mask_be16 = *(__be16 *)&mask_be32;
mask = (__force unsigned long)cpu_to_le16(be16_to_cpu(mask_be16));
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
index 257a7c9f7a14..800d34ed8a96 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
@@ -78,6 +78,7 @@ void mlx5e_trigger_irq(struct mlx5e_icosq *sq)
u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_NOP;
+ sq->db.ico_wqe[pi].num_wqebbs = 1;
nopwqe = mlx5e_post_nop(wq, sq->sqn, &sq->pc);
mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &nopwqe->ctrl);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
index 8e19f6ab8393..93052b07c76c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
@@ -615,8 +615,10 @@ void mlx5_lag_remove(struct mlx5_core_dev *dev)
break;
if (i == MLX5_MAX_PORTS) {
- if (ldev->nb.notifier_call)
+ if (ldev->nb.notifier_call) {
unregister_netdevice_notifier_net(&init_net, &ldev->nb);
+ ldev->nb.notifier_call = NULL;
+ }
mlx5_lag_mp_cleanup(ldev);
cancel_delayed_work_sync(&ldev->bond_work);
mlx5_lag_dev_free(ldev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
index 6dec2a550a10..2d93228ff633 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
@@ -933,7 +933,6 @@ static int dr_actions_l2_rewrite(struct mlx5dr_domain *dmn,
action->rewrite.data = (void *)ops;
action->rewrite.num_of_actions = i;
- action->rewrite.chunk->byte_size = i * sizeof(*ops);
ret = mlx5dr_send_postsend_action(dmn, action);
if (ret) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
index c7f10d4f8f8d..095ec7b1399d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
@@ -558,7 +558,8 @@ int mlx5dr_send_postsend_action(struct mlx5dr_domain *dmn,
int ret;
send_info.write.addr = (uintptr_t)action->rewrite.data;
- send_info.write.length = action->rewrite.chunk->byte_size;
+ send_info.write.length = action->rewrite.num_of_actions *
+ DR_MODIFY_ACTION_SIZE;
send_info.write.lkey = 0;
send_info.remote_addr = action->rewrite.chunk->mr_addr;
send_info.rkey = action->rewrite.chunk->rkey;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index 1faac31f74d0..23f879da9104 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -1071,6 +1071,9 @@ int mlx5_core_modify_hca_vport_context(struct mlx5_core_dev *dev,
MLX5_SET64(hca_vport_context, ctx, port_guid, req->port_guid);
if (req->field_select & MLX5_HCA_VPORT_SEL_NODE_GUID)
MLX5_SET64(hca_vport_context, ctx, node_guid, req->node_guid);
+ MLX5_SET(hca_vport_context, ctx, cap_mask1, req->cap_mask1);
+ MLX5_SET(hca_vport_context, ctx, cap_mask1_field_select,
+ req->cap_mask1_perm);
err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out));
ex:
kfree(in);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c
index 914c33e46fb4..e9ded1a6e131 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c
@@ -1322,36 +1322,64 @@ static void mlxsw_pci_mbox_free(struct mlxsw_pci *mlxsw_pci,
mbox->mapaddr);
}
-static int mlxsw_pci_sw_reset(struct mlxsw_pci *mlxsw_pci,
- const struct pci_device_id *id)
+static int mlxsw_pci_sys_ready_wait(struct mlxsw_pci *mlxsw_pci,
+ const struct pci_device_id *id,
+ u32 *p_sys_status)
{
unsigned long end;
- char mrsr_pl[MLXSW_REG_MRSR_LEN];
- int err;
+ u32 val;
- mlxsw_reg_mrsr_pack(mrsr_pl);
- err = mlxsw_reg_write(mlxsw_pci->core, MLXSW_REG(mrsr), mrsr_pl);
- if (err)
- return err;
if (id->device == PCI_DEVICE_ID_MELLANOX_SWITCHX2) {
msleep(MLXSW_PCI_SW_RESET_TIMEOUT_MSECS);
return 0;
}
- /* We must wait for the HW to become responsive once again. */
+ /* We must wait for the HW to become responsive. */
msleep(MLXSW_PCI_SW_RESET_WAIT_MSECS);
end = jiffies + msecs_to_jiffies(MLXSW_PCI_SW_RESET_TIMEOUT_MSECS);
do {
- u32 val = mlxsw_pci_read32(mlxsw_pci, FW_READY);
-
+ val = mlxsw_pci_read32(mlxsw_pci, FW_READY);
if ((val & MLXSW_PCI_FW_READY_MASK) == MLXSW_PCI_FW_READY_MAGIC)
return 0;
cond_resched();
} while (time_before(jiffies, end));
+
+ *p_sys_status = val & MLXSW_PCI_FW_READY_MASK;
+
return -EBUSY;
}
+static int mlxsw_pci_sw_reset(struct mlxsw_pci *mlxsw_pci,
+ const struct pci_device_id *id)
+{
+ struct pci_dev *pdev = mlxsw_pci->pdev;
+ char mrsr_pl[MLXSW_REG_MRSR_LEN];
+ u32 sys_status;
+ int err;
+
+ err = mlxsw_pci_sys_ready_wait(mlxsw_pci, id, &sys_status);
+ if (err) {
+ dev_err(&pdev->dev, "Failed to reach system ready status before reset. Status is 0x%x\n",
+ sys_status);
+ return err;
+ }
+
+ mlxsw_reg_mrsr_pack(mrsr_pl);
+ err = mlxsw_reg_write(mlxsw_pci->core, MLXSW_REG(mrsr), mrsr_pl);
+ if (err)
+ return err;
+
+ err = mlxsw_pci_sys_ready_wait(mlxsw_pci, id, &sys_status);
+ if (err) {
+ dev_err(&pdev->dev, "Failed to reach system ready status after reset. Status is 0x%x\n",
+ sys_status);
+ return err;
+ }
+
+ return 0;
+}
+
static int mlxsw_pci_alloc_irq_vectors(struct mlxsw_pci *mlxsw_pci)
{
int err;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index dd6685156396..e05d1d1be2fd 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -3572,7 +3572,7 @@ MLXSW_ITEM32(reg, qeec, mase, 0x10, 31, 1);
* When in bytes mode, value is specified in units of 1000bps.
* Access: RW
*/
-MLXSW_ITEM32(reg, qeec, max_shaper_rate, 0x10, 0, 28);
+MLXSW_ITEM32(reg, qeec, max_shaper_rate, 0x10, 0, 31);
/* reg_qeec_de
* DWRR configuration enable. Enables configuration of the dwrr and
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
index 54275624718b..336e5ecc68f8 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
@@ -637,12 +637,12 @@ static int mlxsw_sp_mr_vif_resolve(struct mlxsw_sp_mr_table *mr_table,
return 0;
err_erif_unresolve:
- list_for_each_entry_from_reverse(erve, &mr_vif->route_evif_list,
- vif_node)
+ list_for_each_entry_continue_reverse(erve, &mr_vif->route_evif_list,
+ vif_node)
mlxsw_sp_mr_route_evif_unresolve(mr_table, erve);
err_irif_unresolve:
- list_for_each_entry_from_reverse(irve, &mr_vif->route_ivif_list,
- vif_node)
+ list_for_each_entry_continue_reverse(irve, &mr_vif->route_ivif_list,
+ vif_node)
mlxsw_sp_mr_route_ivif_unresolve(mr_table, irve);
mr_vif->rif = NULL;
return err;
diff --git a/drivers/net/ethernet/micrel/ks8851_mll.c b/drivers/net/ethernet/micrel/ks8851_mll.c
index 58579baf3f7a..45cc840d8e2e 100644
--- a/drivers/net/ethernet/micrel/ks8851_mll.c
+++ b/drivers/net/ethernet/micrel/ks8851_mll.c
@@ -157,6 +157,50 @@ static int msg_enable;
*/
/**
+ * ks_check_endian - Check whether endianness of the bus is correct
+ * @ks : The chip information
+ *
+ * The KS8851-16MLL EESK pin allows selecting the endianness of the 16bit
+ * bus. To maintain optimum performance, the bus endianness should be set
+ * such that it matches the endianness of the CPU.
+ */
+
+static int ks_check_endian(struct ks_net *ks)
+{
+ u16 cider;
+
+ /*
+ * Read CIDER register first, however read it the "wrong" way around.
+ * If the endian strap on the KS8851-16MLL in incorrect and the chip
+ * is operating in different endianness than the CPU, then the meaning
+ * of BE[3:0] byte-enable bits is also swapped such that:
+ * BE[3,2,1,0] becomes BE[1,0,3,2]
+ *
+ * Luckily for us, the byte-enable bits are the top four MSbits of
+ * the address register and the CIDER register is at offset 0xc0.
+ * Hence, by reading address 0xc0c0, which is not impacted by endian
+ * swapping, we assert either BE[3:2] or BE[1:0] while reading the
+ * CIDER register.
+ *
+ * If the bus configuration is correct, reading 0xc0c0 asserts
+ * BE[3:2] and this read returns 0x0000, because to read register
+ * with bottom two LSbits of address set to 0, BE[1:0] must be
+ * asserted.
+ *
+ * If the bus configuration is NOT correct, reading 0xc0c0 asserts
+ * BE[1:0] and this read returns non-zero 0x8872 value.
+ */
+ iowrite16(BE3 | BE2 | KS_CIDER, ks->hw_addr_cmd);
+ cider = ioread16(ks->hw_addr);
+ if (!cider)
+ return 0;
+
+ netdev_err(ks->netdev, "incorrect EESK endian strap setting\n");
+
+ return -EINVAL;
+}
+
+/**
* ks_rdreg16 - read 16 bit register from device
* @ks : The chip information
* @offset: The register address
@@ -166,7 +210,7 @@ static int msg_enable;
static u16 ks_rdreg16(struct ks_net *ks, int offset)
{
- ks->cmd_reg_cache = (u16)offset | ((BE3 | BE2) >> (offset & 0x02));
+ ks->cmd_reg_cache = (u16)offset | ((BE1 | BE0) << (offset & 0x02));
iowrite16(ks->cmd_reg_cache, ks->hw_addr_cmd);
return ioread16(ks->hw_addr);
}
@@ -181,7 +225,7 @@ static u16 ks_rdreg16(struct ks_net *ks, int offset)
static void ks_wrreg16(struct ks_net *ks, int offset, u16 value)
{
- ks->cmd_reg_cache = (u16)offset | ((BE3 | BE2) >> (offset & 0x02));
+ ks->cmd_reg_cache = (u16)offset | ((BE1 | BE0) << (offset & 0x02));
iowrite16(ks->cmd_reg_cache, ks->hw_addr_cmd);
iowrite16(value, ks->hw_addr);
}
@@ -197,7 +241,7 @@ static inline void ks_inblk(struct ks_net *ks, u16 *wptr, u32 len)
{
len >>= 1;
while (len--)
- *wptr++ = be16_to_cpu(ioread16(ks->hw_addr));
+ *wptr++ = (u16)ioread16(ks->hw_addr);
}
/**
@@ -211,7 +255,7 @@ static inline void ks_outblk(struct ks_net *ks, u16 *wptr, u32 len)
{
len >>= 1;
while (len--)
- iowrite16(cpu_to_be16(*wptr++), ks->hw_addr);
+ iowrite16(*wptr++, ks->hw_addr);
}
static void ks_disable_int(struct ks_net *ks)
@@ -1218,6 +1262,10 @@ static int ks8851_probe(struct platform_device *pdev)
goto err_free;
}
+ err = ks_check_endian(ks);
+ if (err)
+ goto err_free;
+
netdev->irq = platform_get_irq(pdev, 0);
if ((int)netdev->irq < 0) {
diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index 86d543ab1ab9..d3b7373c5961 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -2176,24 +2176,29 @@ static int ocelot_init_timestamp(struct ocelot *ocelot)
return 0;
}
-static void ocelot_port_set_mtu(struct ocelot *ocelot, int port, size_t mtu)
+/* Configure the maximum SDU (L2 payload) on RX to the value specified in @sdu.
+ * The length of VLAN tags is accounted for automatically via DEV_MAC_TAGS_CFG.
+ */
+static void ocelot_port_set_maxlen(struct ocelot *ocelot, int port, size_t sdu)
{
struct ocelot_port *ocelot_port = ocelot->ports[port];
+ int maxlen = sdu + ETH_HLEN + ETH_FCS_LEN;
int atop_wm;
- ocelot_port_writel(ocelot_port, mtu, DEV_MAC_MAXLEN_CFG);
+ ocelot_port_writel(ocelot_port, maxlen, DEV_MAC_MAXLEN_CFG);
/* Set Pause WM hysteresis
- * 152 = 6 * mtu / OCELOT_BUFFER_CELL_SZ
- * 101 = 4 * mtu / OCELOT_BUFFER_CELL_SZ
+ * 152 = 6 * maxlen / OCELOT_BUFFER_CELL_SZ
+ * 101 = 4 * maxlen / OCELOT_BUFFER_CELL_SZ
*/
ocelot_write_rix(ocelot, SYS_PAUSE_CFG_PAUSE_ENA |
SYS_PAUSE_CFG_PAUSE_STOP(101) |
SYS_PAUSE_CFG_PAUSE_START(152), SYS_PAUSE_CFG, port);
/* Tail dropping watermark */
- atop_wm = (ocelot->shared_queue_sz - 9 * mtu) / OCELOT_BUFFER_CELL_SZ;
- ocelot_write_rix(ocelot, ocelot_wm_enc(9 * mtu),
+ atop_wm = (ocelot->shared_queue_sz - 9 * maxlen) /
+ OCELOT_BUFFER_CELL_SZ;
+ ocelot_write_rix(ocelot, ocelot_wm_enc(9 * maxlen),
SYS_ATOP, port);
ocelot_write(ocelot, ocelot_wm_enc(atop_wm), SYS_ATOP_TOT_CFG);
}
@@ -2222,9 +2227,10 @@ void ocelot_init_port(struct ocelot *ocelot, int port)
DEV_MAC_HDX_CFG);
/* Set Max Length and maximum tags allowed */
- ocelot_port_set_mtu(ocelot, port, VLAN_ETH_FRAME_LEN);
+ ocelot_port_set_maxlen(ocelot, port, ETH_DATA_LEN);
ocelot_port_writel(ocelot_port, DEV_MAC_TAGS_CFG_TAG_ID(ETH_P_8021AD) |
DEV_MAC_TAGS_CFG_VLAN_AWR_ENA |
+ DEV_MAC_TAGS_CFG_VLAN_DBL_AWR_ENA |
DEV_MAC_TAGS_CFG_VLAN_LEN_AWR_ENA,
DEV_MAC_TAGS_CFG);
@@ -2310,18 +2316,18 @@ void ocelot_set_cpu_port(struct ocelot *ocelot, int cpu,
* Only one port can be an NPI at the same time.
*/
if (cpu < ocelot->num_phys_ports) {
- int mtu = VLAN_ETH_FRAME_LEN + OCELOT_TAG_LEN;
+ int sdu = ETH_DATA_LEN + OCELOT_TAG_LEN;
ocelot_write(ocelot, QSYS_EXT_CPU_CFG_EXT_CPUQ_MSK_M |
QSYS_EXT_CPU_CFG_EXT_CPU_PORT(cpu),
QSYS_EXT_CPU_CFG);
if (injection == OCELOT_TAG_PREFIX_SHORT)
- mtu += OCELOT_SHORT_PREFIX_LEN;
+ sdu += OCELOT_SHORT_PREFIX_LEN;
else if (injection == OCELOT_TAG_PREFIX_LONG)
- mtu += OCELOT_LONG_PREFIX_LEN;
+ sdu += OCELOT_LONG_PREFIX_LEN;
- ocelot_port_set_mtu(ocelot, cpu, mtu);
+ ocelot_port_set_maxlen(ocelot, cpu, sdu);
}
/* CPU port Injection/Extraction configuration */
diff --git a/drivers/net/ethernet/neterion/vxge/vxge-config.h b/drivers/net/ethernet/neterion/vxge/vxge-config.h
index e678ba379598..628fa9b2f741 100644
--- a/drivers/net/ethernet/neterion/vxge/vxge-config.h
+++ b/drivers/net/ethernet/neterion/vxge/vxge-config.h
@@ -2045,7 +2045,7 @@ vxge_hw_vpath_strip_fcs_check(struct __vxge_hw_device *hldev, u64 vpath_mask);
if ((level >= VXGE_ERR && VXGE_COMPONENT_LL & VXGE_DEBUG_ERR_MASK) || \
(level >= VXGE_TRACE && VXGE_COMPONENT_LL & VXGE_DEBUG_TRACE_MASK))\
if ((mask & VXGE_DEBUG_MASK) == mask) \
- printk(fmt "\n", __VA_ARGS__); \
+ printk(fmt "\n", ##__VA_ARGS__); \
} while (0)
#else
#define vxge_debug_ll(level, mask, fmt, ...)
diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.h b/drivers/net/ethernet/neterion/vxge/vxge-main.h
index 59a57ff5e96a..9c86f4f9cd42 100644
--- a/drivers/net/ethernet/neterion/vxge/vxge-main.h
+++ b/drivers/net/ethernet/neterion/vxge/vxge-main.h
@@ -452,49 +452,49 @@ int vxge_fw_upgrade(struct vxgedev *vdev, char *fw_name, int override);
#if (VXGE_DEBUG_LL_CONFIG & VXGE_DEBUG_MASK)
#define vxge_debug_ll_config(level, fmt, ...) \
- vxge_debug_ll(level, VXGE_DEBUG_LL_CONFIG, fmt, __VA_ARGS__)
+ vxge_debug_ll(level, VXGE_DEBUG_LL_CONFIG, fmt, ##__VA_ARGS__)
#else
#define vxge_debug_ll_config(level, fmt, ...)
#endif
#if (VXGE_DEBUG_INIT & VXGE_DEBUG_MASK)
#define vxge_debug_init(level, fmt, ...) \
- vxge_debug_ll(level, VXGE_DEBUG_INIT, fmt, __VA_ARGS__)
+ vxge_debug_ll(level, VXGE_DEBUG_INIT, fmt, ##__VA_ARGS__)
#else
#define vxge_debug_init(level, fmt, ...)
#endif
#if (VXGE_DEBUG_TX & VXGE_DEBUG_MASK)
#define vxge_debug_tx(level, fmt, ...) \
- vxge_debug_ll(level, VXGE_DEBUG_TX, fmt, __VA_ARGS__)
+ vxge_debug_ll(level, VXGE_DEBUG_TX, fmt, ##__VA_ARGS__)
#else
#define vxge_debug_tx(level, fmt, ...)
#endif
#if (VXGE_DEBUG_RX & VXGE_DEBUG_MASK)
#define vxge_debug_rx(level, fmt, ...) \
- vxge_debug_ll(level, VXGE_DEBUG_RX, fmt, __VA_ARGS__)
+ vxge_debug_ll(level, VXGE_DEBUG_RX, fmt, ##__VA_ARGS__)
#else
#define vxge_debug_rx(level, fmt, ...)
#endif
#if (VXGE_DEBUG_MEM & VXGE_DEBUG_MASK)
#define vxge_debug_mem(level, fmt, ...) \
- vxge_debug_ll(level, VXGE_DEBUG_MEM, fmt, __VA_ARGS__)
+ vxge_debug_ll(level, VXGE_DEBUG_MEM, fmt, ##__VA_ARGS__)
#else
#define vxge_debug_mem(level, fmt, ...)
#endif
#if (VXGE_DEBUG_ENTRYEXIT & VXGE_DEBUG_MASK)
#define vxge_debug_entryexit(level, fmt, ...) \
- vxge_debug_ll(level, VXGE_DEBUG_ENTRYEXIT, fmt, __VA_ARGS__)
+ vxge_debug_ll(level, VXGE_DEBUG_ENTRYEXIT, fmt, ##__VA_ARGS__)
#else
#define vxge_debug_entryexit(level, fmt, ...)
#endif
#if (VXGE_DEBUG_INTR & VXGE_DEBUG_MASK)
#define vxge_debug_intr(level, fmt, ...) \
- vxge_debug_ll(level, VXGE_DEBUG_INTR, fmt, __VA_ARGS__)
+ vxge_debug_ll(level, VXGE_DEBUG_INTR, fmt, ##__VA_ARGS__)
#else
#define vxge_debug_intr(level, fmt, ...)
#endif
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c
index b454db283aef..684e4e036c55 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c
@@ -616,7 +616,7 @@ static int enable_bars(struct nfp6000_pcie *nfp, u16 interface)
if (bar->iomem) {
int pf;
- msg += snprintf(msg, end - msg, "0.0: General/MSI-X SRAM, ");
+ msg += scnprintf(msg, end - msg, "0.0: General/MSI-X SRAM, ");
atomic_inc(&bar->refcnt);
bars_free--;
@@ -661,7 +661,7 @@ static int enable_bars(struct nfp6000_pcie *nfp, u16 interface)
/* Configure, and lock, BAR0.1 for PCIe XPB (MSI-X PBA) */
bar = &nfp->bar[1];
- msg += snprintf(msg, end - msg, "0.1: PCIe XPB/MSI-X PBA, ");
+ msg += scnprintf(msg, end - msg, "0.1: PCIe XPB/MSI-X PBA, ");
atomic_inc(&bar->refcnt);
bars_free--;
@@ -680,8 +680,8 @@ static int enable_bars(struct nfp6000_pcie *nfp, u16 interface)
bar->iomem = ioremap(nfp_bar_resource_start(bar),
nfp_bar_resource_len(bar));
if (bar->iomem) {
- msg += snprintf(msg, end - msg,
- "0.%d: Explicit%d, ", 4 + i, i);
+ msg += scnprintf(msg, end - msg,
+ "0.%d: Explicit%d, ", 4 + i, i);
atomic_inc(&bar->refcnt);
bars_free--;
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_if.h b/drivers/net/ethernet/pensando/ionic/ionic_if.h
index 54547d53b0f2..51adf5059834 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_if.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_if.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB OR BSD-2-Clause */
+/* SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB) OR BSD-2-Clause */
/* Copyright (c) 2017-2019 Pensando Systems, Inc. All rights reserved. */
#ifndef _IONIC_IF_H_
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index 191271f6260d..938e19ee0bcd 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -948,18 +948,18 @@ static void ionic_lif_rx_mode(struct ionic_lif *lif, unsigned int rx_mode)
int i;
#define REMAIN(__x) (sizeof(buf) - (__x))
- i = snprintf(buf, sizeof(buf), "rx_mode 0x%04x -> 0x%04x:",
- lif->rx_mode, rx_mode);
+ i = scnprintf(buf, sizeof(buf), "rx_mode 0x%04x -> 0x%04x:",
+ lif->rx_mode, rx_mode);
if (rx_mode & IONIC_RX_MODE_F_UNICAST)
- i += snprintf(&buf[i], REMAIN(i), " RX_MODE_F_UNICAST");
+ i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_UNICAST");
if (rx_mode & IONIC_RX_MODE_F_MULTICAST)
- i += snprintf(&buf[i], REMAIN(i), " RX_MODE_F_MULTICAST");
+ i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_MULTICAST");
if (rx_mode & IONIC_RX_MODE_F_BROADCAST)
- i += snprintf(&buf[i], REMAIN(i), " RX_MODE_F_BROADCAST");
+ i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_BROADCAST");
if (rx_mode & IONIC_RX_MODE_F_PROMISC)
- i += snprintf(&buf[i], REMAIN(i), " RX_MODE_F_PROMISC");
+ i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_PROMISC");
if (rx_mode & IONIC_RX_MODE_F_ALLMULTI)
- i += snprintf(&buf[i], REMAIN(i), " RX_MODE_F_ALLMULTI");
+ i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_ALLMULTI");
netdev_dbg(lif->netdev, "lif%d %s\n", lif->index, buf);
err = ionic_adminq_post_wait(lif, &ctx);
@@ -1688,7 +1688,7 @@ static int ionic_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
if (!(is_zero_ether_addr(mac) || is_valid_ether_addr(mac)))
return -EINVAL;
- down_read(&ionic->vf_op_lock);
+ down_write(&ionic->vf_op_lock);
if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) {
ret = -EINVAL;
@@ -1698,7 +1698,7 @@ static int ionic_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
ether_addr_copy(ionic->vfs[vf].macaddr, mac);
}
- up_read(&ionic->vf_op_lock);
+ up_write(&ionic->vf_op_lock);
return ret;
}
@@ -1719,7 +1719,7 @@ static int ionic_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan,
if (proto != htons(ETH_P_8021Q))
return -EPROTONOSUPPORT;
- down_read(&ionic->vf_op_lock);
+ down_write(&ionic->vf_op_lock);
if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) {
ret = -EINVAL;
@@ -1730,7 +1730,7 @@ static int ionic_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan,
ionic->vfs[vf].vlanid = vlan;
}
- up_read(&ionic->vf_op_lock);
+ up_write(&ionic->vf_op_lock);
return ret;
}
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_regs.h b/drivers/net/ethernet/pensando/ionic/ionic_regs.h
index 03ee5a36472b..2e174f45c030 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_regs.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_regs.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB OR BSD-2-Clause */
+/* SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB) OR BSD-2-Clause */
/* Copyright (c) 2018-2019 Pensando Systems, Inc. All rights reserved. */
#ifndef IONIC_REGS_H
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c
index 07f9067affc6..cda5b0a9e948 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c
@@ -1720,7 +1720,7 @@ static int qlcnic_83xx_get_reset_instruction_template(struct qlcnic_adapter *p_d
ahw->reset.seq_error = 0;
ahw->reset.buff = kzalloc(QLC_83XX_RESTART_TEMPLATE_SIZE, GFP_KERNEL);
- if (p_dev->ahw->reset.buff == NULL)
+ if (ahw->reset.buff == NULL)
return -ENOMEM;
p_buff = p_dev->ahw->reset.buff;
diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index a2168a14794c..791d99b9e1cf 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -5194,7 +5194,7 @@ static int rtl_alloc_irq(struct rtl8169_private *tp)
RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~MSIEnable);
rtl_lock_config_regs(tp);
/* fall through */
- case RTL_GIGA_MAC_VER_07 ... RTL_GIGA_MAC_VER_24:
+ case RTL_GIGA_MAC_VER_07 ... RTL_GIGA_MAC_VER_17:
flags = PCI_IRQ_LEGACY;
break;
default:
@@ -5285,6 +5285,13 @@ static int r8169_mdio_register(struct rtl8169_private *tp)
if (!tp->phydev) {
mdiobus_unregister(new_bus);
return -ENODEV;
+ } else if (!tp->phydev->drv) {
+ /* Most chip versions fail with the genphy driver.
+ * Therefore ensure that the dedicated PHY driver is loaded.
+ */
+ dev_err(&pdev->dev, "realtek.ko not loaded, maybe it needs to be added to initramfs?\n");
+ mdiobus_unregister(new_bus);
+ return -EUNATCH;
}
/* PHY will be woken up in rtl_open() */
@@ -5446,15 +5453,6 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
int chipset, region;
int jumbo_max, rc;
- /* Some tools for creating an initramfs don't consider softdeps, then
- * r8169.ko may be in initramfs, but realtek.ko not. Then the generic
- * PHY driver is used that doesn't work with most chip versions.
- */
- if (!driver_find("RTL8201CP Ethernet", &mdio_bus_type)) {
- dev_err(&pdev->dev, "realtek.ko not loaded, maybe it needs to be added to initramfs?\n");
- return -ENOENT;
- }
-
dev = devm_alloc_etherdev(&pdev->dev, sizeof (*tp));
if (!dev)
return -ENOMEM;
diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
index c705743d69f7..2cc8184b7e6b 100644
--- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
+++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
@@ -2277,7 +2277,7 @@ static int __init sxgbe_cmdline_opt(char *str)
if (!str || !*str)
return -EINVAL;
while ((opt = strsep(&str, ",")) != NULL) {
- if (!strncmp(opt, "eee_timer:", 6)) {
+ if (!strncmp(opt, "eee_timer:", 10)) {
if (kstrtoint(opt + 10, 0, &eee_timer))
goto err;
}
diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index 52113b7529d6..3f16bd807c6e 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -2853,11 +2853,24 @@ efx_ef10_handle_tx_event(struct efx_channel *channel, efx_qword_t *event)
}
/* Transmit timestamps are only available for 8XXX series. They result
- * in three events per packet. These occur in order, and are:
- * - the normal completion event
+ * in up to three events per packet. These occur in order, and are:
+ * - the normal completion event (may be omitted)
* - the low part of the timestamp
* - the high part of the timestamp
*
+ * It's possible for multiple completion events to appear before the
+ * corresponding timestamps. So we can for example get:
+ * COMP N
+ * COMP N+1
+ * TS_LO N
+ * TS_HI N
+ * TS_LO N+1
+ * TS_HI N+1
+ *
+ * In addition it's also possible for the adjacent completions to be
+ * merged, so we may not see COMP N above. As such, the completion
+ * events are not very useful here.
+ *
* Each part of the timestamp is itself split across two 16 bit
* fields in the event.
*/
@@ -2865,17 +2878,7 @@ efx_ef10_handle_tx_event(struct efx_channel *channel, efx_qword_t *event)
switch (tx_ev_type) {
case TX_TIMESTAMP_EVENT_TX_EV_COMPLETION:
- /* In case of Queue flush or FLR, we might have received
- * the previous TX completion event but not the Timestamp
- * events.
- */
- if (tx_queue->completed_desc_ptr != tx_queue->ptr_mask)
- efx_xmit_done(tx_queue, tx_queue->completed_desc_ptr);
-
- tx_ev_desc_ptr = EFX_QWORD_FIELD(*event,
- ESF_DZ_TX_DESCR_INDX);
- tx_queue->completed_desc_ptr =
- tx_ev_desc_ptr & tx_queue->ptr_mask;
+ /* Ignore this event - see above. */
break;
case TX_TIMESTAMP_EVENT_TX_EV_TSTAMP_LO:
@@ -2887,8 +2890,7 @@ efx_ef10_handle_tx_event(struct efx_channel *channel, efx_qword_t *event)
ts_part = efx_ef10_extract_event_ts(event);
tx_queue->completed_timestamp_major = ts_part;
- efx_xmit_done(tx_queue, tx_queue->completed_desc_ptr);
- tx_queue->completed_desc_ptr = tx_queue->ptr_mask;
+ efx_xmit_done_single(tx_queue);
break;
default:
diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h
index f1bdb04efbe4..95395d67ea2d 100644
--- a/drivers/net/ethernet/sfc/efx.h
+++ b/drivers/net/ethernet/sfc/efx.h
@@ -20,6 +20,7 @@ netdev_tx_t efx_hard_start_xmit(struct sk_buff *skb,
struct net_device *net_dev);
netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb);
void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index);
+void efx_xmit_done_single(struct efx_tx_queue *tx_queue);
int efx_setup_tc(struct net_device *net_dev, enum tc_setup_type type,
void *type_data);
extern unsigned int efx_piobuf_size;
diff --git a/drivers/net/ethernet/sfc/efx_channels.c b/drivers/net/ethernet/sfc/efx_channels.c
index aeb5e8aa2f2a..73d4e39b5b16 100644
--- a/drivers/net/ethernet/sfc/efx_channels.c
+++ b/drivers/net/ethernet/sfc/efx_channels.c
@@ -583,6 +583,7 @@ struct efx_channel *efx_copy_channel(const struct efx_channel *old_channel)
if (tx_queue->channel)
tx_queue->channel = channel;
tx_queue->buffer = NULL;
+ tx_queue->cb_page = NULL;
memset(&tx_queue->txd, 0, sizeof(tx_queue->txd));
}
diff --git a/drivers/net/ethernet/sfc/mcdi.c b/drivers/net/ethernet/sfc/mcdi.c
index 2713300343c7..15c731d04065 100644
--- a/drivers/net/ethernet/sfc/mcdi.c
+++ b/drivers/net/ethernet/sfc/mcdi.c
@@ -212,12 +212,14 @@ static void efx_mcdi_send_request(struct efx_nic *efx, unsigned cmd,
* progress on a NIC at any one time. So no need for locking.
*/
for (i = 0; i < hdr_len / 4 && bytes < PAGE_SIZE; i++)
- bytes += snprintf(buf + bytes, PAGE_SIZE - bytes,
- " %08x", le32_to_cpu(hdr[i].u32[0]));
+ bytes += scnprintf(buf + bytes, PAGE_SIZE - bytes,
+ " %08x",
+ le32_to_cpu(hdr[i].u32[0]));
for (i = 0; i < inlen / 4 && bytes < PAGE_SIZE; i++)
- bytes += snprintf(buf + bytes, PAGE_SIZE - bytes,
- " %08x", le32_to_cpu(inbuf[i].u32[0]));
+ bytes += scnprintf(buf + bytes, PAGE_SIZE - bytes,
+ " %08x",
+ le32_to_cpu(inbuf[i].u32[0]));
netif_info(efx, hw, efx->net_dev, "MCDI RPC REQ:%s\n", buf);
}
@@ -302,15 +304,15 @@ static void efx_mcdi_read_response_header(struct efx_nic *efx)
*/
for (i = 0; i < hdr_len && bytes < PAGE_SIZE; i++) {
efx->type->mcdi_read_response(efx, &hdr, (i * 4), 4);
- bytes += snprintf(buf + bytes, PAGE_SIZE - bytes,
- " %08x", le32_to_cpu(hdr.u32[0]));
+ bytes += scnprintf(buf + bytes, PAGE_SIZE - bytes,
+ " %08x", le32_to_cpu(hdr.u32[0]));
}
for (i = 0; i < data_len && bytes < PAGE_SIZE; i++) {
efx->type->mcdi_read_response(efx, &hdr,
mcdi->resp_hdr_len + (i * 4), 4);
- bytes += snprintf(buf + bytes, PAGE_SIZE - bytes,
- " %08x", le32_to_cpu(hdr.u32[0]));
+ bytes += scnprintf(buf + bytes, PAGE_SIZE - bytes,
+ " %08x", le32_to_cpu(hdr.u32[0]));
}
netif_info(efx, hw, efx->net_dev, "MCDI RPC RESP:%s\n", buf);
@@ -1417,9 +1419,11 @@ void efx_mcdi_print_fwver(struct efx_nic *efx, char *buf, size_t len)
}
ver_words = (__le16 *)MCDI_PTR(outbuf, GET_VERSION_OUT_VERSION);
- offset = snprintf(buf, len, "%u.%u.%u.%u",
- le16_to_cpu(ver_words[0]), le16_to_cpu(ver_words[1]),
- le16_to_cpu(ver_words[2]), le16_to_cpu(ver_words[3]));
+ offset = scnprintf(buf, len, "%u.%u.%u.%u",
+ le16_to_cpu(ver_words[0]),
+ le16_to_cpu(ver_words[1]),
+ le16_to_cpu(ver_words[2]),
+ le16_to_cpu(ver_words[3]));
/* EF10 may have multiple datapath firmware variants within a
* single version. Report which variants are running.
@@ -1427,9 +1431,9 @@ void efx_mcdi_print_fwver(struct efx_nic *efx, char *buf, size_t len)
if (efx_nic_rev(efx) >= EFX_REV_HUNT_A0) {
struct efx_ef10_nic_data *nic_data = efx->nic_data;
- offset += snprintf(buf + offset, len - offset, " rx%x tx%x",
- nic_data->rx_dpcpu_fw_id,
- nic_data->tx_dpcpu_fw_id);
+ offset += scnprintf(buf + offset, len - offset, " rx%x tx%x",
+ nic_data->rx_dpcpu_fw_id,
+ nic_data->tx_dpcpu_fw_id);
/* It's theoretically possible for the string to exceed 31
* characters, though in practice the first three version
diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index 9f9886f222c8..8164f0edcbf0 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -208,8 +208,6 @@ struct efx_tx_buffer {
* avoid cache-line ping-pong between the xmit path and the
* completion path.
* @merge_events: Number of TX merged completion events
- * @completed_desc_ptr: Most recent completed pointer - only used with
- * timestamping.
* @completed_timestamp_major: Top part of the most recent tx timestamp.
* @completed_timestamp_minor: Low part of the most recent tx timestamp.
* @insert_count: Current insert pointer
@@ -269,7 +267,6 @@ struct efx_tx_queue {
unsigned int merge_events;
unsigned int bytes_compl;
unsigned int pkts_compl;
- unsigned int completed_desc_ptr;
u32 completed_timestamp_major;
u32 completed_timestamp_minor;
diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c
index 04d7f41d7ed9..8aafc54a4684 100644
--- a/drivers/net/ethernet/sfc/tx.c
+++ b/drivers/net/ethernet/sfc/tx.c
@@ -535,6 +535,44 @@ netdev_tx_t efx_hard_start_xmit(struct sk_buff *skb,
return efx_enqueue_skb(tx_queue, skb);
}
+void efx_xmit_done_single(struct efx_tx_queue *tx_queue)
+{
+ unsigned int pkts_compl = 0, bytes_compl = 0;
+ unsigned int read_ptr;
+ bool finished = false;
+
+ read_ptr = tx_queue->read_count & tx_queue->ptr_mask;
+
+ while (!finished) {
+ struct efx_tx_buffer *buffer = &tx_queue->buffer[read_ptr];
+
+ if (!efx_tx_buffer_in_use(buffer)) {
+ struct efx_nic *efx = tx_queue->efx;
+
+ netif_err(efx, hw, efx->net_dev,
+ "TX queue %d spurious single TX completion\n",
+ tx_queue->queue);
+ efx_schedule_reset(efx, RESET_TYPE_TX_SKIP);
+ return;
+ }
+
+ /* Need to check the flag before dequeueing. */
+ if (buffer->flags & EFX_TX_BUF_SKB)
+ finished = true;
+ efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl);
+
+ ++tx_queue->read_count;
+ read_ptr = tx_queue->read_count & tx_queue->ptr_mask;
+ }
+
+ tx_queue->pkts_compl += pkts_compl;
+ tx_queue->bytes_compl += bytes_compl;
+
+ EFX_WARN_ON_PARANOID(pkts_compl != 1);
+
+ efx_xmit_done_check_empty(tx_queue);
+}
+
void efx_init_tx_queue_core_txq(struct efx_tx_queue *tx_queue)
{
struct efx_nic *efx = tx_queue->efx;
diff --git a/drivers/net/ethernet/sfc/tx_common.c b/drivers/net/ethernet/sfc/tx_common.c
index b1571e9789d0..70876df1da69 100644
--- a/drivers/net/ethernet/sfc/tx_common.c
+++ b/drivers/net/ethernet/sfc/tx_common.c
@@ -80,7 +80,6 @@ void efx_init_tx_queue(struct efx_tx_queue *tx_queue)
tx_queue->xmit_more_available = false;
tx_queue->timestamping = (efx_ptp_use_mac_tx_timestamps(efx) &&
tx_queue->channel == efx_ptp_channel(efx));
- tx_queue->completed_desc_ptr = tx_queue->ptr_mask;
tx_queue->completed_timestamp_major = 0;
tx_queue->completed_timestamp_minor = 0;
@@ -210,10 +209,9 @@ static void efx_dequeue_buffers(struct efx_tx_queue *tx_queue,
while (read_ptr != stop_index) {
struct efx_tx_buffer *buffer = &tx_queue->buffer[read_ptr];
- if (!(buffer->flags & EFX_TX_BUF_OPTION) &&
- unlikely(buffer->len == 0)) {
+ if (!efx_tx_buffer_in_use(buffer)) {
netif_err(efx, tx_err, efx->net_dev,
- "TX queue %d spurious TX completion id %x\n",
+ "TX queue %d spurious TX completion id %d\n",
tx_queue->queue, read_ptr);
efx_schedule_reset(efx, RESET_TYPE_TX_SKIP);
return;
@@ -226,6 +224,19 @@ static void efx_dequeue_buffers(struct efx_tx_queue *tx_queue,
}
}
+void efx_xmit_done_check_empty(struct efx_tx_queue *tx_queue)
+{
+ if ((int)(tx_queue->read_count - tx_queue->old_write_count) >= 0) {
+ tx_queue->old_write_count = READ_ONCE(tx_queue->write_count);
+ if (tx_queue->read_count == tx_queue->old_write_count) {
+ /* Ensure that read_count is flushed. */
+ smp_mb();
+ tx_queue->empty_read_count =
+ tx_queue->read_count | EFX_EMPTY_COUNT_VALID;
+ }
+ }
+}
+
void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index)
{
unsigned int fill_level, pkts_compl = 0, bytes_compl = 0;
@@ -256,15 +267,7 @@ void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index)
netif_tx_wake_queue(tx_queue->core_txq);
}
- /* Check whether the hardware queue is now empty */
- if ((int)(tx_queue->read_count - tx_queue->old_write_count) >= 0) {
- tx_queue->old_write_count = READ_ONCE(tx_queue->write_count);
- if (tx_queue->read_count == tx_queue->old_write_count) {
- smp_mb();
- tx_queue->empty_read_count =
- tx_queue->read_count | EFX_EMPTY_COUNT_VALID;
- }
- }
+ efx_xmit_done_check_empty(tx_queue);
}
/* Remove buffers put into a tx_queue for the current packet.
diff --git a/drivers/net/ethernet/sfc/tx_common.h b/drivers/net/ethernet/sfc/tx_common.h
index f92f1fe3a87f..99cf7ce2f36c 100644
--- a/drivers/net/ethernet/sfc/tx_common.h
+++ b/drivers/net/ethernet/sfc/tx_common.h
@@ -21,6 +21,12 @@ void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
unsigned int *pkts_compl,
unsigned int *bytes_compl);
+static inline bool efx_tx_buffer_in_use(struct efx_tx_buffer *buffer)
+{
+ return buffer->len || (buffer->flags & EFX_TX_BUF_OPTION);
+}
+
+void efx_xmit_done_check_empty(struct efx_tx_queue *tx_queue);
void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index);
void efx_enqueue_unwind(struct efx_tx_queue *tx_queue,
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
index dc50ba13a746..2d5573b3dee1 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
@@ -1411,7 +1411,7 @@ static int rk_gmac_probe(struct platform_device *pdev)
ret = rk_gmac_clk_init(plat_dat);
if (ret)
- return ret;
+ goto err_remove_config_dt;
ret = rk_gmac_powerup(plat_dat->bsp_priv);
if (ret)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
index d0356fbd1e43..542784300620 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
@@ -24,6 +24,7 @@
static void dwmac1000_core_init(struct mac_device_info *hw,
struct net_device *dev)
{
+ struct stmmac_priv *priv = netdev_priv(dev);
void __iomem *ioaddr = hw->pcsr;
u32 value = readl(ioaddr + GMAC_CONTROL);
int mtu = dev->mtu;
@@ -35,7 +36,7 @@ static void dwmac1000_core_init(struct mac_device_info *hw,
* Broadcom tags can look like invalid LLC/SNAP packets and cause the
* hardware to truncate packets on reception.
*/
- if (netdev_uses_dsa(dev))
+ if (netdev_uses_dsa(dev) || !priv->plat->enh_desc)
value &= ~GMAC_CONTROL_ACS;
if (mtu > 1500)
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index d10ac54bf385..13fafd905db8 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -663,16 +663,22 @@ int stmmac_get_platform_resources(struct platform_device *pdev,
* In case the wake up interrupt is not passed from the platform
* so the driver will continue to use the mac irq (ndev->irq)
*/
- stmmac_res->wol_irq = platform_get_irq_byname(pdev, "eth_wake_irq");
+ stmmac_res->wol_irq =
+ platform_get_irq_byname_optional(pdev, "eth_wake_irq");
if (stmmac_res->wol_irq < 0) {
if (stmmac_res->wol_irq == -EPROBE_DEFER)
return -EPROBE_DEFER;
+ dev_info(&pdev->dev, "IRQ eth_wake_irq not found\n");
stmmac_res->wol_irq = stmmac_res->irq;
}
- stmmac_res->lpi_irq = platform_get_irq_byname(pdev, "eth_lpi");
- if (stmmac_res->lpi_irq == -EPROBE_DEFER)
- return -EPROBE_DEFER;
+ stmmac_res->lpi_irq =
+ platform_get_irq_byname_optional(pdev, "eth_lpi");
+ if (stmmac_res->lpi_irq < 0) {
+ if (stmmac_res->lpi_irq == -EPROBE_DEFER)
+ return -EPROBE_DEFER;
+ dev_info(&pdev->dev, "IRQ eth_lpi not found\n");
+ }
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
stmmac_res->addr = devm_ioremap_resource(&pdev->dev, res);
diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index 75757e9954ba..09f279c0182b 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -1845,8 +1845,6 @@ static void geneve_destroy_tunnels(struct net *net, struct list_head *head)
if (!net_eq(dev_net(geneve->dev), net))
unregister_netdevice_queue(geneve->dev, head);
}
-
- WARN_ON_ONCE(!list_empty(&gn->sock_list));
}
static void __net_exit geneve_exit_batch_net(struct list_head *net_list)
@@ -1861,6 +1859,12 @@ static void __net_exit geneve_exit_batch_net(struct list_head *net_list)
/* unregister the devices gathered above */
unregister_netdevice_many(&list);
rtnl_unlock();
+
+ list_for_each_entry(net, net_list, exit_list) {
+ const struct geneve_net *gn = net_generic(net, geneve_net_id);
+
+ WARN_ON_ONCE(!list_empty(&gn->sock_list));
+ }
}
static struct pernet_operations geneve_net_ops = {
diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c
index 242b9b0943f8..7fe306e76281 100644
--- a/drivers/net/ifb.c
+++ b/drivers/net/ifb.c
@@ -75,7 +75,7 @@ static void ifb_ri_tasklet(unsigned long _txp)
}
while ((skb = __skb_dequeue(&txp->tq)) != NULL) {
- skb->tc_redirected = 0;
+ skb->redirected = 0;
skb->tc_skip_classify = 1;
u64_stats_update_begin(&txp->tsync);
@@ -96,7 +96,7 @@ static void ifb_ri_tasklet(unsigned long _txp)
rcu_read_unlock();
skb->skb_iif = txp->dev->ifindex;
- if (!skb->tc_from_ingress) {
+ if (!skb->from_ingress) {
dev_queue_xmit(skb);
} else {
skb_pull_rcsum(skb, skb->mac_len);
@@ -243,7 +243,7 @@ static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev)
txp->rx_bytes += skb->len;
u64_stats_update_end(&txp->rsync);
- if (!skb->tc_redirected || !skb->skb_iif) {
+ if (!skb->redirected || !skb->skb_iif) {
dev_kfree_skb(skb);
dev->stats.rx_dropped++;
return NETDEV_TX_OK;
diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
index 30cd0c4f0be0..8801d093135c 100644
--- a/drivers/net/ipvlan/ipvlan_core.c
+++ b/drivers/net/ipvlan/ipvlan_core.c
@@ -293,6 +293,7 @@ void ipvlan_process_multicast(struct work_struct *work)
}
if (dev)
dev_put(dev);
+ cond_resched();
}
}
@@ -498,19 +499,21 @@ static int ipvlan_process_outbound(struct sk_buff *skb)
struct ethhdr *ethh = eth_hdr(skb);
int ret = NET_XMIT_DROP;
- /* In this mode we dont care about multicast and broadcast traffic */
- if (is_multicast_ether_addr(ethh->h_dest)) {
- pr_debug_ratelimited("Dropped {multi|broad}cast of type=[%x]\n",
- ntohs(skb->protocol));
- kfree_skb(skb);
- goto out;
- }
-
/* The ipvlan is a pseudo-L2 device, so the packets that we receive
* will have L2; which need to discarded and processed further
* in the net-ns of the main-device.
*/
if (skb_mac_header_was_set(skb)) {
+ /* In this mode we dont care about
+ * multicast and broadcast traffic */
+ if (is_multicast_ether_addr(ethh->h_dest)) {
+ pr_debug_ratelimited(
+ "Dropped {multi|broad}cast of type=[%x]\n",
+ ntohs(skb->protocol));
+ kfree_skb(skb);
+ goto out;
+ }
+
skb_pull(skb, sizeof(*ethh));
skb->mac_header = (typeof(skb->mac_header))~0U;
skb_reset_network_header(skb);
diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index a70662261a5a..f195f278a83a 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -164,7 +164,6 @@ static void ipvlan_uninit(struct net_device *dev)
static int ipvlan_open(struct net_device *dev)
{
struct ipvl_dev *ipvlan = netdev_priv(dev);
- struct net_device *phy_dev = ipvlan->phy_dev;
struct ipvl_addr *addr;
if (ipvlan->port->mode == IPVLAN_MODE_L3 ||
@@ -178,7 +177,7 @@ static int ipvlan_open(struct net_device *dev)
ipvlan_ht_addr_add(ipvlan, addr);
rcu_read_unlock();
- return dev_uc_add(phy_dev, phy_dev->dev_addr);
+ return 0;
}
static int ipvlan_stop(struct net_device *dev)
@@ -190,8 +189,6 @@ static int ipvlan_stop(struct net_device *dev)
dev_uc_unsync(phy_dev, dev);
dev_mc_unsync(phy_dev, dev);
- dev_uc_del(phy_dev, phy_dev->dev_addr);
-
rcu_read_lock();
list_for_each_entry_rcu(addr, &ipvlan->addrs, anode)
ipvlan_ht_addr_del(addr);
diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index 45bfd99f17fa..92bc2b2df660 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -19,6 +19,7 @@
#include <net/gro_cells.h>
#include <net/macsec.h>
#include <linux/phy.h>
+#include <linux/if_arp.h>
#include <uapi/linux/if_macsec.h>
@@ -424,6 +425,11 @@ static struct macsec_eth_header *macsec_ethhdr(struct sk_buff *skb)
return (struct macsec_eth_header *)skb_mac_header(skb);
}
+static sci_t dev_to_sci(struct net_device *dev, __be16 port)
+{
+ return make_sci(dev->dev_addr, port);
+}
+
static void __macsec_pn_wrapped(struct macsec_secy *secy,
struct macsec_tx_sa *tx_sa)
{
@@ -3268,6 +3274,20 @@ static int macsec_set_mac_address(struct net_device *dev, void *p)
out:
ether_addr_copy(dev->dev_addr, addr->sa_data);
+ macsec->secy.sci = dev_to_sci(dev, MACSEC_PORT_ES);
+
+ /* If h/w offloading is available, propagate to the device */
+ if (macsec_is_offloaded(macsec)) {
+ const struct macsec_ops *ops;
+ struct macsec_context ctx;
+
+ ops = macsec_get_ops(macsec, &ctx);
+ if (ops) {
+ ctx.secy = &macsec->secy;
+ macsec_offload(ops->mdo_upd_secy, &ctx);
+ }
+ }
+
return 0;
}
@@ -3342,6 +3362,7 @@ static const struct device_type macsec_type = {
static const struct nla_policy macsec_rtnl_policy[IFLA_MACSEC_MAX + 1] = {
[IFLA_MACSEC_SCI] = { .type = NLA_U64 },
+ [IFLA_MACSEC_PORT] = { .type = NLA_U16 },
[IFLA_MACSEC_ICV_LEN] = { .type = NLA_U8 },
[IFLA_MACSEC_CIPHER_SUITE] = { .type = NLA_U64 },
[IFLA_MACSEC_WINDOW] = { .type = NLA_U32 },
@@ -3592,11 +3613,6 @@ static bool sci_exists(struct net_device *dev, sci_t sci)
return false;
}
-static sci_t dev_to_sci(struct net_device *dev, __be16 port)
-{
- return make_sci(dev->dev_addr, port);
-}
-
static int macsec_add_dev(struct net_device *dev, sci_t sci, u8 icv_len)
{
struct macsec_dev *macsec = macsec_priv(dev);
@@ -3650,6 +3666,8 @@ static int macsec_newlink(struct net *net, struct net_device *dev,
real_dev = __dev_get_by_index(net, nla_get_u32(tb[IFLA_LINK]));
if (!real_dev)
return -ENODEV;
+ if (real_dev->type != ARPHRD_ETHER)
+ return -EINVAL;
dev->priv_flags |= IFF_MACSEC;
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 81aa7adf4801..e7289d67268f 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -334,6 +334,8 @@ static void macvlan_process_broadcast(struct work_struct *w)
if (src)
dev_put(src->dev);
consume_skb(skb);
+
+ cond_resched();
}
}
diff --git a/drivers/net/netdevsim/ipsec.c b/drivers/net/netdevsim/ipsec.c
index e27fc1a4516d..3811f1bde84e 100644
--- a/drivers/net/netdevsim/ipsec.c
+++ b/drivers/net/netdevsim/ipsec.c
@@ -29,9 +29,9 @@ static ssize_t nsim_dbg_netdev_ops_read(struct file *filp,
return -ENOMEM;
p = buf;
- p += snprintf(p, bufsize - (p - buf),
- "SA count=%u tx=%u\n",
- ipsec->count, ipsec->tx);
+ p += scnprintf(p, bufsize - (p - buf),
+ "SA count=%u tx=%u\n",
+ ipsec->count, ipsec->tx);
for (i = 0; i < NSIM_IPSEC_MAX_SA_COUNT; i++) {
struct nsim_sa *sap = &ipsec->sa[i];
@@ -39,18 +39,18 @@ static ssize_t nsim_dbg_netdev_ops_read(struct file *filp,
if (!sap->used)
continue;
- p += snprintf(p, bufsize - (p - buf),
- "sa[%i] %cx ipaddr=0x%08x %08x %08x %08x\n",
- i, (sap->rx ? 'r' : 't'), sap->ipaddr[0],
- sap->ipaddr[1], sap->ipaddr[2], sap->ipaddr[3]);
- p += snprintf(p, bufsize - (p - buf),
- "sa[%i] spi=0x%08x proto=0x%x salt=0x%08x crypt=%d\n",
- i, be32_to_cpu(sap->xs->id.spi),
- sap->xs->id.proto, sap->salt, sap->crypt);
- p += snprintf(p, bufsize - (p - buf),
- "sa[%i] key=0x%08x %08x %08x %08x\n",
- i, sap->key[0], sap->key[1],
- sap->key[2], sap->key[3]);
+ p += scnprintf(p, bufsize - (p - buf),
+ "sa[%i] %cx ipaddr=0x%08x %08x %08x %08x\n",
+ i, (sap->rx ? 'r' : 't'), sap->ipaddr[0],
+ sap->ipaddr[1], sap->ipaddr[2], sap->ipaddr[3]);
+ p += scnprintf(p, bufsize - (p - buf),
+ "sa[%i] spi=0x%08x proto=0x%x salt=0x%08x crypt=%d\n",
+ i, be32_to_cpu(sap->xs->id.spi),
+ sap->xs->id.proto, sap->salt, sap->crypt);
+ p += scnprintf(p, bufsize - (p - buf),
+ "sa[%i] key=0x%08x %08x %08x %08x\n",
+ i, sap->key[0], sap->key[1],
+ sap->key[2], sap->key[3]);
}
len = simple_read_from_buffer(buffer, count, ppos, buf, p - buf);
diff --git a/drivers/net/phy/bcm63xx.c b/drivers/net/phy/bcm63xx.c
index 23f1958ba6ad..459fb2069c7e 100644
--- a/drivers/net/phy/bcm63xx.c
+++ b/drivers/net/phy/bcm63xx.c
@@ -73,6 +73,7 @@ static struct phy_driver bcm63xx_driver[] = {
/* same phy as above, with just a different OUI */
.phy_id = 0x002bdc00,
.phy_id_mask = 0xfffffc00,
+ .name = "Broadcom BCM63XX (2)",
/* PHY_BASIC_FEATURES */
.flags = PHY_IS_INTERNAL,
.config_init = bcm63xx_config_init,
diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c
index 967f57ed0b65..9a07ad137c2e 100644
--- a/drivers/net/phy/dp83867.c
+++ b/drivers/net/phy/dp83867.c
@@ -28,7 +28,8 @@
#define DP83867_CTRL 0x1f
/* Extended Registers */
-#define DP83867_CFG4 0x0031
+#define DP83867_FLD_THR_CFG 0x002e
+#define DP83867_CFG4 0x0031
#define DP83867_CFG4_SGMII_ANEG_MASK (BIT(5) | BIT(6))
#define DP83867_CFG4_SGMII_ANEG_TIMER_11MS (3 << 5)
#define DP83867_CFG4_SGMII_ANEG_TIMER_800US (2 << 5)
@@ -91,6 +92,7 @@
#define DP83867_STRAP_STS2_CLK_SKEW_RX_MASK GENMASK(2, 0)
#define DP83867_STRAP_STS2_CLK_SKEW_RX_SHIFT 0
#define DP83867_STRAP_STS2_CLK_SKEW_NONE BIT(2)
+#define DP83867_STRAP_STS2_STRAP_FLD BIT(10)
/* PHY CTRL bits */
#define DP83867_PHYCR_TX_FIFO_DEPTH_SHIFT 14
@@ -125,6 +127,9 @@
/* CFG4 bits */
#define DP83867_CFG4_PORT_MIRROR_EN BIT(0)
+/* FLD_THR_CFG */
+#define DP83867_FLD_THR_CFG_ENERGY_LOST_THR_MASK 0x7
+
enum {
DP83867_PORT_MIRROING_KEEP,
DP83867_PORT_MIRROING_EN,
@@ -476,6 +481,20 @@ static int dp83867_config_init(struct phy_device *phydev)
phy_clear_bits_mmd(phydev, DP83867_DEVADDR, DP83867_CFG4,
BIT(7));
+ bs = phy_read_mmd(phydev, DP83867_DEVADDR, DP83867_STRAP_STS2);
+ if (bs & DP83867_STRAP_STS2_STRAP_FLD) {
+ /* When using strap to enable FLD, the ENERGY_LOST_FLD_THR will
+ * be set to 0x2. This may causes the PHY link to be unstable -
+ * the default value 0x1 need to be restored.
+ */
+ ret = phy_modify_mmd(phydev, DP83867_DEVADDR,
+ DP83867_FLD_THR_CFG,
+ DP83867_FLD_THR_CFG_ENERGY_LOST_THR_MASK,
+ 0x1);
+ if (ret)
+ return ret;
+ }
+
if (phy_interface_is_rgmii(phydev) ||
phydev->interface == PHY_INTERFACE_MODE_SGMII) {
val = phy_read(phydev, MII_DP83867_PHYCTRL);
diff --git a/drivers/net/phy/mdio-bcm-unimac.c b/drivers/net/phy/mdio-bcm-unimac.c
index 4a28fb29adaa..fbd36891ee64 100644
--- a/drivers/net/phy/mdio-bcm-unimac.c
+++ b/drivers/net/phy/mdio-bcm-unimac.c
@@ -242,11 +242,9 @@ static int unimac_mdio_probe(struct platform_device *pdev)
return -ENOMEM;
}
- priv->clk = devm_clk_get(&pdev->dev, NULL);
- if (PTR_ERR(priv->clk) == -EPROBE_DEFER)
+ priv->clk = devm_clk_get_optional(&pdev->dev, NULL);
+ if (IS_ERR(priv->clk))
return PTR_ERR(priv->clk);
- else
- priv->clk = NULL;
ret = clk_prepare_enable(priv->clk);
if (ret)
diff --git a/drivers/net/phy/mdio-mux-bcm-iproc.c b/drivers/net/phy/mdio-mux-bcm-iproc.c
index 88d409e48c1f..aad6809ebe39 100644
--- a/drivers/net/phy/mdio-mux-bcm-iproc.c
+++ b/drivers/net/phy/mdio-mux-bcm-iproc.c
@@ -288,8 +288,13 @@ static int mdio_mux_iproc_suspend(struct device *dev)
static int mdio_mux_iproc_resume(struct device *dev)
{
struct iproc_mdiomux_desc *md = dev_get_drvdata(dev);
+ int rc;
- clk_prepare_enable(md->core_clk);
+ rc = clk_prepare_enable(md->core_clk);
+ if (rc) {
+ dev_err(md->dev, "failed to enable core clk\n");
+ return rc;
+ }
mdio_mux_iproc_config(md);
return 0;
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index d76e038cf2cb..355bfdef48d2 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -727,7 +727,8 @@ static irqreturn_t phy_interrupt(int irq, void *phy_dat)
phy_trigger_machine(phydev);
}
- if (phy_clear_interrupt(phydev))
+ /* did_interrupt() may have cleared the interrupt already */
+ if (!phydev->drv->did_interrupt && phy_clear_interrupt(phydev))
goto phy_err;
return IRQ_HANDLED;
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index c8b0c34030d3..28e3c5c0e3c3 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -286,6 +286,8 @@ static int mdio_bus_phy_suspend(struct device *dev)
if (!mdio_bus_phy_may_suspend(phydev))
return 0;
+ phydev->suspended_by_mdio_bus = 1;
+
return phy_suspend(phydev);
}
@@ -294,9 +296,11 @@ static int mdio_bus_phy_resume(struct device *dev)
struct phy_device *phydev = to_phy_device(dev);
int ret;
- if (!mdio_bus_phy_may_suspend(phydev))
+ if (!phydev->suspended_by_mdio_bus)
goto no_resume;
+ phydev->suspended_by_mdio_bus = 0;
+
ret = phy_resume(phydev);
if (ret < 0)
return ret;
diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index 70b9a143db84..6e66b8e77ec7 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -761,8 +761,14 @@ static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy,
config.interface = interface;
ret = phylink_validate(pl, supported, &config);
- if (ret)
+ if (ret) {
+ phylink_warn(pl, "validation of %s with support %*pb and advertisement %*pb failed: %d\n",
+ phy_modes(config.interface),
+ __ETHTOOL_LINK_MODE_MASK_NBITS, phy->supported,
+ __ETHTOOL_LINK_MODE_MASK_NBITS, config.advertising,
+ ret);
return ret;
+ }
phy->phylink = pl;
phy->phy_link_change = phylink_phy_change;
diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c
index d949ea7b4f8c..6900c68260e0 100644
--- a/drivers/net/phy/sfp-bus.c
+++ b/drivers/net/phy/sfp-bus.c
@@ -572,13 +572,15 @@ static void sfp_upstream_clear(struct sfp_bus *bus)
* the sfp_bus structure, incrementing its reference count. This must
* be put via sfp_bus_put() when done.
*
- * Returns: on success, a pointer to the sfp_bus structure,
- * %NULL if no SFP is specified,
- * on failure, an error pointer value:
- * corresponding to the errors detailed for
- * fwnode_property_get_reference_args().
- * %-ENOMEM if we failed to allocate the bus.
- * an error from the upstream's connect_phy() method.
+ * Returns:
+ * - on success, a pointer to the sfp_bus structure,
+ * - %NULL if no SFP is specified,
+ * - on failure, an error pointer value:
+ *
+ * - corresponding to the errors detailed for
+ * fwnode_property_get_reference_args().
+ * - %-ENOMEM if we failed to allocate the bus.
+ * - an error from the upstream's connect_phy() method.
*/
struct sfp_bus *sfp_bus_find_fwnode(struct fwnode_handle *fwnode)
{
@@ -612,13 +614,15 @@ EXPORT_SYMBOL_GPL(sfp_bus_find_fwnode);
* the SFP bus using sfp_register_upstream(). This takes a reference on the
* bus, so it is safe to put the bus after this call.
*
- * Returns: on success, a pointer to the sfp_bus structure,
- * %NULL if no SFP is specified,
- * on failure, an error pointer value:
- * corresponding to the errors detailed for
- * fwnode_property_get_reference_args().
- * %-ENOMEM if we failed to allocate the bus.
- * an error from the upstream's connect_phy() method.
+ * Returns:
+ * - on success, a pointer to the sfp_bus structure,
+ * - %NULL if no SFP is specified,
+ * - on failure, an error pointer value:
+ *
+ * - corresponding to the errors detailed for
+ * fwnode_property_get_reference_args().
+ * - %-ENOMEM if we failed to allocate the bus.
+ * - an error from the upstream's connect_phy() method.
*/
int sfp_bus_add_upstream(struct sfp_bus *bus, void *upstream,
const struct sfp_upstream_ops *ops)
diff --git a/drivers/net/slip/slhc.c b/drivers/net/slip/slhc.c
index 58a69f830d29..f78ceba42e57 100644
--- a/drivers/net/slip/slhc.c
+++ b/drivers/net/slip/slhc.c
@@ -232,7 +232,7 @@ slhc_compress(struct slcompress *comp, unsigned char *icp, int isize,
struct cstate *cs = lcs->next;
unsigned long deltaS, deltaA;
short changes = 0;
- int hlen;
+ int nlen, hlen;
unsigned char new_seq[16];
unsigned char *cp = new_seq;
struct iphdr *ip;
@@ -248,6 +248,8 @@ slhc_compress(struct slcompress *comp, unsigned char *icp, int isize,
return isize;
ip = (struct iphdr *) icp;
+ if (ip->version != 4 || ip->ihl < 5)
+ return isize;
/* Bail if this packet isn't TCP, or is an IP fragment */
if (ip->protocol != IPPROTO_TCP || (ntohs(ip->frag_off) & 0x3fff)) {
@@ -258,10 +260,14 @@ slhc_compress(struct slcompress *comp, unsigned char *icp, int isize,
comp->sls_o_tcp++;
return isize;
}
- /* Extract TCP header */
+ nlen = ip->ihl * 4;
+ if (isize < nlen + sizeof(*th))
+ return isize;
- th = (struct tcphdr *)(((unsigned char *)ip) + ip->ihl*4);
- hlen = ip->ihl*4 + th->doff*4;
+ th = (struct tcphdr *)(icp + nlen);
+ if (th->doff < sizeof(struct tcphdr) / 4)
+ return isize;
+ hlen = nlen + th->doff * 4;
/* Bail if the TCP packet isn't `compressible' (i.e., ACK isn't set or
* some other control bit is set). Also uncompressible if
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index ca70a1d840eb..4004f98e50d9 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -2240,6 +2240,8 @@ team_nl_option_policy[TEAM_ATTR_OPTION_MAX + 1] = {
[TEAM_ATTR_OPTION_CHANGED] = { .type = NLA_FLAG },
[TEAM_ATTR_OPTION_TYPE] = { .type = NLA_U8 },
[TEAM_ATTR_OPTION_DATA] = { .type = NLA_BINARY },
+ [TEAM_ATTR_OPTION_PORT_IFINDEX] = { .type = NLA_U32 },
+ [TEAM_ATTR_OPTION_ARRAY_INDEX] = { .type = NLA_U32 },
};
static int team_nl_cmd_noop(struct sk_buff *skb, struct genl_info *info)
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 5754bb6ca0ee..6c738a271257 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -1210,6 +1210,7 @@ static const struct usb_device_id products[] = {
{QMI_FIXED_INTF(0x1435, 0xd182, 5)}, /* Wistron NeWeb D18 */
{QMI_FIXED_INTF(0x1435, 0xd191, 4)}, /* Wistron NeWeb D19Q1 */
{QMI_QUIRK_SET_DTR(0x1508, 0x1001, 4)}, /* Fibocom NL668 series */
+ {QMI_FIXED_INTF(0x1690, 0x7588, 4)}, /* ASKEY WWHC050 */
{QMI_FIXED_INTF(0x16d8, 0x6003, 0)}, /* CMOTech 6003 */
{QMI_FIXED_INTF(0x16d8, 0x6007, 0)}, /* CMOTech CHE-628S */
{QMI_FIXED_INTF(0x16d8, 0x6008, 0)}, /* CMOTech CMU-301 */
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index 78ddbaf6401b..95b19ce96513 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -3221,6 +3221,8 @@ static u16 r8153_phy_status(struct r8152 *tp, u16 desired)
}
msleep(20);
+ if (test_bit(RTL8152_UNPLUG, &tp->flags))
+ break;
}
return data;
@@ -5402,7 +5404,10 @@ static void r8153_init(struct r8152 *tp)
if (ocp_read_word(tp, MCU_TYPE_PLA, PLA_BOOT_CTRL) &
AUTOLOAD_DONE)
break;
+
msleep(20);
+ if (test_bit(RTL8152_UNPLUG, &tp->flags))
+ break;
}
data = r8153_phy_status(tp, 0);
@@ -5539,7 +5544,10 @@ static void r8153b_init(struct r8152 *tp)
if (ocp_read_word(tp, MCU_TYPE_PLA, PLA_BOOT_CTRL) &
AUTOLOAD_DONE)
break;
+
msleep(20);
+ if (test_bit(RTL8152_UNPLUG, &tp->flags))
+ break;
}
data = r8153_phy_status(tp, 0);
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 8cdc4415fa70..d4cbb9e8c63f 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -328,7 +328,7 @@ static void veth_get_stats64(struct net_device *dev,
rcu_read_lock();
peer = rcu_dereference(priv->peer);
if (peer) {
- tot->rx_dropped += veth_stats_tx(peer, &packets, &bytes);
+ veth_stats_tx(peer, &packets, &bytes);
tot->rx_bytes += bytes;
tot->rx_packets += packets;
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index d3b08b76b1ec..45308b3350cf 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -2779,10 +2779,19 @@ static void vxlan_vs_add_dev(struct vxlan_sock *vs, struct vxlan_dev *vxlan,
/* Setup stats when device is created */
static int vxlan_init(struct net_device *dev)
{
+ struct vxlan_dev *vxlan = netdev_priv(dev);
+ int err;
+
dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
if (!dev->tstats)
return -ENOMEM;
+ err = gro_cells_init(&vxlan->gro_cells, dev);
+ if (err) {
+ free_percpu(dev->tstats);
+ return err;
+ }
+
return 0;
}
@@ -3043,8 +3052,6 @@ static void vxlan_setup(struct net_device *dev)
vxlan->dev = dev;
- gro_cells_init(&vxlan->gro_cells, dev);
-
for (h = 0; h < FDB_HASH_SIZE; ++h) {
spin_lock_init(&vxlan->hash_lock[h]);
INIT_HLIST_HEAD(&vxlan->fdb_head[h]);
diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c
index cdc96968b0f4..3ac3f8570ca1 100644
--- a/drivers/net/wireguard/device.c
+++ b/drivers/net/wireguard/device.c
@@ -122,7 +122,7 @@ static netdev_tx_t wg_xmit(struct sk_buff *skb, struct net_device *dev)
u32 mtu;
int ret;
- if (unlikely(wg_skb_examine_untrusted_ip_hdr(skb) != skb->protocol)) {
+ if (unlikely(!wg_check_packet_protocol(skb))) {
ret = -EPROTONOSUPPORT;
net_dbg_ratelimited("%s: Invalid IP packet\n", dev->name);
goto err;
diff --git a/drivers/net/wireguard/netlink.c b/drivers/net/wireguard/netlink.c
index bda26405497c..802099c8828a 100644
--- a/drivers/net/wireguard/netlink.c
+++ b/drivers/net/wireguard/netlink.c
@@ -411,11 +411,7 @@ static int set_peer(struct wg_device *wg, struct nlattr **attrs)
peer = wg_peer_create(wg, public_key, preshared_key);
if (IS_ERR(peer)) {
- /* Similar to the above, if the key is invalid, we skip
- * it without fanfare, so that services don't need to
- * worry about doing key validation themselves.
- */
- ret = PTR_ERR(peer) == -EKEYREJECTED ? 0 : PTR_ERR(peer);
+ ret = PTR_ERR(peer);
peer = NULL;
goto out;
}
@@ -569,7 +565,7 @@ static int wg_set_device(struct sk_buff *skb, struct genl_info *info)
private_key);
list_for_each_entry_safe(peer, temp, &wg->peer_list,
peer_list) {
- BUG_ON(!wg_noise_precompute_static_static(peer));
+ wg_noise_precompute_static_static(peer);
wg_noise_expire_current_peer_keypairs(peer);
}
wg_cookie_checker_precompute_device_keys(&wg->cookie_checker);
diff --git a/drivers/net/wireguard/noise.c b/drivers/net/wireguard/noise.c
index 919d9d866446..708dc61c974f 100644
--- a/drivers/net/wireguard/noise.c
+++ b/drivers/net/wireguard/noise.c
@@ -44,32 +44,23 @@ void __init wg_noise_init(void)
}
/* Must hold peer->handshake.static_identity->lock */
-bool wg_noise_precompute_static_static(struct wg_peer *peer)
+void wg_noise_precompute_static_static(struct wg_peer *peer)
{
- bool ret;
-
down_write(&peer->handshake.lock);
- if (peer->handshake.static_identity->has_identity) {
- ret = curve25519(
- peer->handshake.precomputed_static_static,
+ if (!peer->handshake.static_identity->has_identity ||
+ !curve25519(peer->handshake.precomputed_static_static,
peer->handshake.static_identity->static_private,
- peer->handshake.remote_static);
- } else {
- u8 empty[NOISE_PUBLIC_KEY_LEN] = { 0 };
-
- ret = curve25519(empty, empty, peer->handshake.remote_static);
+ peer->handshake.remote_static))
memset(peer->handshake.precomputed_static_static, 0,
NOISE_PUBLIC_KEY_LEN);
- }
up_write(&peer->handshake.lock);
- return ret;
}
-bool wg_noise_handshake_init(struct noise_handshake *handshake,
- struct noise_static_identity *static_identity,
- const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN],
- const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN],
- struct wg_peer *peer)
+void wg_noise_handshake_init(struct noise_handshake *handshake,
+ struct noise_static_identity *static_identity,
+ const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN],
+ const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN],
+ struct wg_peer *peer)
{
memset(handshake, 0, sizeof(*handshake));
init_rwsem(&handshake->lock);
@@ -81,7 +72,7 @@ bool wg_noise_handshake_init(struct noise_handshake *handshake,
NOISE_SYMMETRIC_KEY_LEN);
handshake->static_identity = static_identity;
handshake->state = HANDSHAKE_ZEROED;
- return wg_noise_precompute_static_static(peer);
+ wg_noise_precompute_static_static(peer);
}
static void handshake_zero(struct noise_handshake *handshake)
@@ -403,6 +394,19 @@ static bool __must_check mix_dh(u8 chaining_key[NOISE_HASH_LEN],
return true;
}
+static bool __must_check mix_precomputed_dh(u8 chaining_key[NOISE_HASH_LEN],
+ u8 key[NOISE_SYMMETRIC_KEY_LEN],
+ const u8 precomputed[NOISE_PUBLIC_KEY_LEN])
+{
+ static u8 zero_point[NOISE_PUBLIC_KEY_LEN];
+ if (unlikely(!crypto_memneq(precomputed, zero_point, NOISE_PUBLIC_KEY_LEN)))
+ return false;
+ kdf(chaining_key, key, NULL, precomputed, NOISE_HASH_LEN,
+ NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN,
+ chaining_key);
+ return true;
+}
+
static void mix_hash(u8 hash[NOISE_HASH_LEN], const u8 *src, size_t src_len)
{
struct blake2s_state blake;
@@ -531,10 +535,9 @@ wg_noise_handshake_create_initiation(struct message_handshake_initiation *dst,
NOISE_PUBLIC_KEY_LEN, key, handshake->hash);
/* ss */
- kdf(handshake->chaining_key, key, NULL,
- handshake->precomputed_static_static, NOISE_HASH_LEN,
- NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN,
- handshake->chaining_key);
+ if (!mix_precomputed_dh(handshake->chaining_key, key,
+ handshake->precomputed_static_static))
+ goto out;
/* {t} */
tai64n_now(timestamp);
@@ -595,9 +598,9 @@ wg_noise_handshake_consume_initiation(struct message_handshake_initiation *src,
handshake = &peer->handshake;
/* ss */
- kdf(chaining_key, key, NULL, handshake->precomputed_static_static,
- NOISE_HASH_LEN, NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN,
- chaining_key);
+ if (!mix_precomputed_dh(chaining_key, key,
+ handshake->precomputed_static_static))
+ goto out;
/* {t} */
if (!message_decrypt(t, src->encrypted_timestamp,
diff --git a/drivers/net/wireguard/noise.h b/drivers/net/wireguard/noise.h
index 138a07bb817c..f532d59d3f19 100644
--- a/drivers/net/wireguard/noise.h
+++ b/drivers/net/wireguard/noise.h
@@ -94,11 +94,11 @@ struct noise_handshake {
struct wg_device;
void wg_noise_init(void);
-bool wg_noise_handshake_init(struct noise_handshake *handshake,
- struct noise_static_identity *static_identity,
- const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN],
- const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN],
- struct wg_peer *peer);
+void wg_noise_handshake_init(struct noise_handshake *handshake,
+ struct noise_static_identity *static_identity,
+ const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN],
+ const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN],
+ struct wg_peer *peer);
void wg_noise_handshake_clear(struct noise_handshake *handshake);
static inline void wg_noise_reset_last_sent_handshake(atomic64_t *handshake_ns)
{
@@ -116,7 +116,7 @@ void wg_noise_expire_current_peer_keypairs(struct wg_peer *peer);
void wg_noise_set_static_identity_private_key(
struct noise_static_identity *static_identity,
const u8 private_key[NOISE_PUBLIC_KEY_LEN]);
-bool wg_noise_precompute_static_static(struct wg_peer *peer);
+void wg_noise_precompute_static_static(struct wg_peer *peer);
bool
wg_noise_handshake_create_initiation(struct message_handshake_initiation *dst,
diff --git a/drivers/net/wireguard/peer.c b/drivers/net/wireguard/peer.c
index 071eedf33f5a..1d634bd3038f 100644
--- a/drivers/net/wireguard/peer.c
+++ b/drivers/net/wireguard/peer.c
@@ -34,11 +34,8 @@ struct wg_peer *wg_peer_create(struct wg_device *wg,
return ERR_PTR(ret);
peer->device = wg;
- if (!wg_noise_handshake_init(&peer->handshake, &wg->static_identity,
- public_key, preshared_key, peer)) {
- ret = -EKEYREJECTED;
- goto err_1;
- }
+ wg_noise_handshake_init(&peer->handshake, &wg->static_identity,
+ public_key, preshared_key, peer);
if (dst_cache_init(&peer->endpoint_cache, GFP_KERNEL))
goto err_1;
if (wg_packet_queue_init(&peer->tx_queue, wg_packet_tx_worker, false,
diff --git a/drivers/net/wireguard/queueing.h b/drivers/net/wireguard/queueing.h
index fecb559cbdb6..3432232afe06 100644
--- a/drivers/net/wireguard/queueing.h
+++ b/drivers/net/wireguard/queueing.h
@@ -66,7 +66,7 @@ struct packet_cb {
#define PACKET_PEER(skb) (PACKET_CB(skb)->keypair->entry.peer)
/* Returns either the correct skb->protocol value, or 0 if invalid. */
-static inline __be16 wg_skb_examine_untrusted_ip_hdr(struct sk_buff *skb)
+static inline __be16 wg_examine_packet_protocol(struct sk_buff *skb)
{
if (skb_network_header(skb) >= skb->head &&
(skb_network_header(skb) + sizeof(struct iphdr)) <=
@@ -81,6 +81,12 @@ static inline __be16 wg_skb_examine_untrusted_ip_hdr(struct sk_buff *skb)
return 0;
}
+static inline bool wg_check_packet_protocol(struct sk_buff *skb)
+{
+ __be16 real_protocol = wg_examine_packet_protocol(skb);
+ return real_protocol && skb->protocol == real_protocol;
+}
+
static inline void wg_reset_packet(struct sk_buff *skb)
{
skb_scrub_packet(skb, true);
@@ -94,8 +100,8 @@ static inline void wg_reset_packet(struct sk_buff *skb)
skb->dev = NULL;
#ifdef CONFIG_NET_SCHED
skb->tc_index = 0;
- skb_reset_tc(skb);
#endif
+ skb_reset_redirect(skb);
skb->hdr_len = skb_headroom(skb);
skb_reset_mac_header(skb);
skb_reset_network_header(skb);
diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c
index 4a153894cee2..da3b782ab7d3 100644
--- a/drivers/net/wireguard/receive.c
+++ b/drivers/net/wireguard/receive.c
@@ -56,7 +56,7 @@ static int prepare_skb_header(struct sk_buff *skb, struct wg_device *wg)
size_t data_offset, data_len, header_len;
struct udphdr *udp;
- if (unlikely(wg_skb_examine_untrusted_ip_hdr(skb) != skb->protocol ||
+ if (unlikely(!wg_check_packet_protocol(skb) ||
skb_transport_header(skb) < skb->head ||
(skb_transport_header(skb) + sizeof(struct udphdr)) >
skb_tail_pointer(skb)))
@@ -388,7 +388,7 @@ static void wg_packet_consume_data_done(struct wg_peer *peer,
*/
skb->ip_summed = CHECKSUM_UNNECESSARY;
skb->csum_level = ~0; /* All levels */
- skb->protocol = wg_skb_examine_untrusted_ip_hdr(skb);
+ skb->protocol = wg_examine_packet_protocol(skb);
if (skb->protocol == htons(ETH_P_IP)) {
len = ntohs(ip_hdr(skb)->tot_len);
if (unlikely(len < sizeof(struct iphdr)))
@@ -587,8 +587,7 @@ void wg_packet_receive(struct wg_device *wg, struct sk_buff *skb)
wg_packet_consume_data(wg, skb);
break;
default:
- net_dbg_skb_ratelimited("%s: Invalid packet from %pISpfsc\n",
- wg->dev->name, skb);
+ WARN(1, "Non-exhaustive parsing of packet header lead to unknown packet type!\n");
goto err;
}
return;
diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
index a22a830019c0..355af47c5f73 100644
--- a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
+++ b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
@@ -283,6 +283,7 @@ const struct iwl_cfg iwl_ax101_cfg_qu_c0_hr_b0 = {
* HT size; mac80211 would otherwise pick the HE max (256) by default.
*/
.max_tx_agg_size = IEEE80211_MAX_AMPDU_BUF_HT,
+ .tx_with_siso_diversity = true,
.num_rbds = IWL_NUM_RBDS_22000_HE,
};
@@ -309,6 +310,7 @@ const struct iwl_cfg iwl_ax101_cfg_quz_hr = {
* HT size; mac80211 would otherwise pick the HE max (256) by default.
*/
.max_tx_agg_size = IEEE80211_MAX_AMPDU_BUF_HT,
+ .tx_with_siso_diversity = true,
.num_rbds = IWL_NUM_RBDS_22000_HE,
};
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c
index 48d375a86d86..ba2aff3af0fe 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c
@@ -6,7 +6,7 @@
* GPL LICENSE SUMMARY
*
* Copyright(c) 2017 Intel Deutschland GmbH
- * Copyright (C) 2019 Intel Corporation
+ * Copyright (C) 2019 - 2020 Intel Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
@@ -27,7 +27,7 @@
* BSD LICENSE
*
* Copyright(c) 2017 Intel Deutschland GmbH
- * Copyright (C) 2019 Intel Corporation
+ * Copyright (C) 2019 - 2020 Intel Corporation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -491,13 +491,13 @@ int iwl_validate_sar_geo_profile(struct iwl_fw_runtime *fwrt,
}
IWL_EXPORT_SYMBOL(iwl_validate_sar_geo_profile);
-void iwl_sar_geo_init(struct iwl_fw_runtime *fwrt,
- struct iwl_per_chain_offset_group *table)
+int iwl_sar_geo_init(struct iwl_fw_runtime *fwrt,
+ struct iwl_per_chain_offset_group *table)
{
int ret, i, j;
if (!iwl_sar_geo_support(fwrt))
- return;
+ return -EOPNOTSUPP;
ret = iwl_sar_get_wgds_table(fwrt);
if (ret < 0) {
@@ -505,7 +505,7 @@ void iwl_sar_geo_init(struct iwl_fw_runtime *fwrt,
"Geo SAR BIOS table invalid or unavailable. (%d)\n",
ret);
/* we don't fail if the table is not available */
- return;
+ return -ENOENT;
}
BUILD_BUG_ON(ACPI_NUM_GEO_PROFILES * ACPI_WGDS_NUM_BANDS *
@@ -530,5 +530,7 @@ void iwl_sar_geo_init(struct iwl_fw_runtime *fwrt,
i, j, value[1], value[2], value[0]);
}
}
+
+ return 0;
}
IWL_EXPORT_SYMBOL(iwl_sar_geo_init);
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.h b/drivers/net/wireless/intel/iwlwifi/fw/acpi.h
index 4a6e8262974b..5590e5cc8fbb 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.h
@@ -6,7 +6,7 @@
* GPL LICENSE SUMMARY
*
* Copyright(c) 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 - 2019 Intel Corporation
+ * Copyright(c) 2018 - 2020 Intel Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
@@ -27,7 +27,7 @@
* BSD LICENSE
*
* Copyright(c) 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 - 2019 Intel Corporation
+ * Copyright(c) 2018 - 2020 Intel Corporation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -171,8 +171,9 @@ bool iwl_sar_geo_support(struct iwl_fw_runtime *fwrt);
int iwl_validate_sar_geo_profile(struct iwl_fw_runtime *fwrt,
struct iwl_host_cmd *cmd);
-void iwl_sar_geo_init(struct iwl_fw_runtime *fwrt,
- struct iwl_per_chain_offset_group *table);
+int iwl_sar_geo_init(struct iwl_fw_runtime *fwrt,
+ struct iwl_per_chain_offset_group *table);
+
#else /* CONFIG_ACPI */
static inline void *iwl_acpi_get_object(struct device *dev, acpi_string method)
@@ -243,9 +244,10 @@ static inline int iwl_validate_sar_geo_profile(struct iwl_fw_runtime *fwrt,
return -ENOENT;
}
-static inline void iwl_sar_geo_init(struct iwl_fw_runtime *fwrt,
- struct iwl_per_chain_offset_group *table)
+static inline int iwl_sar_geo_init(struct iwl_fw_runtime *fwrt,
+ struct iwl_per_chain_offset_group *table)
{
+ return -ENOENT;
}
#endif /* CONFIG_ACPI */
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
index 91df1ee25dd0..8796ab8f2a5f 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
@@ -8,7 +8,7 @@
* Copyright(c) 2008 - 2014 Intel Corporation. All rights reserved.
* Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
* Copyright(c) 2015 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 - 2019 Intel Corporation
+ * Copyright(c) 2018 - 2020 Intel Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
@@ -31,7 +31,7 @@
* Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved.
* Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
* Copyright(c) 2015 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 - 2019 Intel Corporation
+ * Copyright(c) 2018 - 2020 Intel Corporation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -1409,11 +1409,7 @@ static int iwl_dump_ini_rxf_iter(struct iwl_fw_runtime *fwrt,
goto out;
}
- /*
- * region register have absolute value so apply rxf offset after
- * reading the registers
- */
- offs += rxf_data.offset;
+ offs = rxf_data.offset;
/* Lock fence */
iwl_write_prph_no_grab(fwrt->trans, RXF_SET_FENCE_MODE + offs, 0x1);
@@ -2494,10 +2490,7 @@ static void iwl_fw_dbg_collect_sync(struct iwl_fw_runtime *fwrt, u8 wk_idx)
goto out;
}
- if (iwl_fw_dbg_stop_restart_recording(fwrt, &params, true)) {
- IWL_ERR(fwrt, "Failed to stop DBGC recording, aborting dump\n");
- goto out;
- }
+ iwl_fw_dbg_stop_restart_recording(fwrt, &params, true);
IWL_DEBUG_FW_INFO(fwrt, "WRT: Data collection start\n");
if (iwl_trans_dbg_ini_valid(fwrt->trans))
@@ -2662,14 +2655,14 @@ static int iwl_fw_dbg_restart_recording(struct iwl_trans *trans,
return 0;
}
-int iwl_fw_dbg_stop_restart_recording(struct iwl_fw_runtime *fwrt,
- struct iwl_fw_dbg_params *params,
- bool stop)
+void iwl_fw_dbg_stop_restart_recording(struct iwl_fw_runtime *fwrt,
+ struct iwl_fw_dbg_params *params,
+ bool stop)
{
int ret = 0;
if (test_bit(STATUS_FW_ERROR, &fwrt->trans->status))
- return 0;
+ return;
if (fw_has_capa(&fwrt->fw->ucode_capa,
IWL_UCODE_TLV_CAPA_DBG_SUSPEND_RESUME_CMD_SUPP))
@@ -2686,7 +2679,5 @@ int iwl_fw_dbg_stop_restart_recording(struct iwl_fw_runtime *fwrt,
iwl_fw_set_dbg_rec_on(fwrt);
}
#endif
-
- return ret;
}
IWL_EXPORT_SYMBOL(iwl_fw_dbg_stop_restart_recording);
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.h b/drivers/net/wireless/intel/iwlwifi/fw/dbg.h
index 179f2905d56b..9d3513213f5f 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.h
@@ -239,9 +239,9 @@ _iwl_fw_dbg_trigger_simple_stop(struct iwl_fw_runtime *fwrt,
_iwl_fw_dbg_trigger_simple_stop((fwrt), (wdev), \
iwl_fw_dbg_get_trigger((fwrt)->fw,\
(trig)))
-int iwl_fw_dbg_stop_restart_recording(struct iwl_fw_runtime *fwrt,
- struct iwl_fw_dbg_params *params,
- bool stop);
+void iwl_fw_dbg_stop_restart_recording(struct iwl_fw_runtime *fwrt,
+ struct iwl_fw_dbg_params *params,
+ bool stop);
#ifdef CONFIG_IWLWIFI_DEBUGFS
static inline void iwl_fw_set_dbg_rec_on(struct iwl_fw_runtime *fwrt)
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
index 2d1cb4647c3b..0481796f75bc 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
@@ -1467,7 +1467,7 @@ static void iwl_req_fw_callback(const struct firmware *ucode_raw, void *context)
kmemdup(pieces->dbg_conf_tlv[i],
pieces->dbg_conf_tlv_len[i],
GFP_KERNEL);
- if (!pieces->dbg_conf_tlv_len[i])
+ if (!pieces->dbg_conf_tlv[i])
goto out_free_fw;
}
}
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
index 54c094e88474..98263cd37944 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
@@ -762,10 +762,17 @@ static int iwl_mvm_sar_geo_init(struct iwl_mvm *mvm)
u16 cmd_wide_id = WIDE_ID(PHY_OPS_GROUP, GEO_TX_POWER_LIMIT);
union geo_tx_power_profiles_cmd cmd;
u16 len;
+ int ret;
cmd.geo_cmd.ops = cpu_to_le32(IWL_PER_CHAIN_OFFSET_SET_TABLES);
- iwl_sar_geo_init(&mvm->fwrt, cmd.geo_cmd.table);
+ ret = iwl_sar_geo_init(&mvm->fwrt, cmd.geo_cmd.table);
+ /*
+ * It is a valid scenario to not support SAR, or miss wgds table,
+ * but in that case there is no need to send the command.
+ */
+ if (ret)
+ return 0;
cmd.geo_cmd.table_revision = cpu_to_le32(mvm->fwrt.geo_rev);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c
index 70b29bf16bb9..60296a754af2 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c
@@ -308,7 +308,8 @@ iwl_parse_nvm_sections(struct iwl_mvm *mvm)
}
/* PHY_SKU section is mandatory in B0 */
- if (!mvm->nvm_sections[NVM_SECTION_TYPE_PHY_SKU].data) {
+ if (mvm->trans->cfg->nvm_type == IWL_NVM_EXT &&
+ !mvm->nvm_sections[NVM_SECTION_TYPE_PHY_SKU].data) {
IWL_ERR(mvm,
"Can't parse phy_sku in B0, empty sections\n");
return NULL;
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c
index e2cf9e015ef8..ca99a9c4f70e 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c
@@ -6,7 +6,7 @@
* GPL LICENSE SUMMARY
*
* Copyright(c) 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 - 2019 Intel Corporation
+ * Copyright(c) 2018 - 2020 Intel Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
@@ -27,7 +27,7 @@
* BSD LICENSE
*
* Copyright(c) 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 - 2019 Intel Corporation
+ * Copyright(c) 2018 - 2020 Intel Corporation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -147,7 +147,11 @@ static u16 rs_fw_get_config_flags(struct iwl_mvm *mvm,
(vht_ena && (vht_cap->cap & IEEE80211_VHT_CAP_RXLDPC))))
flags |= IWL_TLC_MNG_CFG_FLAGS_LDPC_MSK;
- /* consider our LDPC support in case of HE */
+ /* consider LDPC support in case of HE */
+ if (he_cap->has_he && (he_cap->he_cap_elem.phy_cap_info[1] &
+ IEEE80211_HE_PHY_CAP1_LDPC_CODING_IN_PAYLOAD))
+ flags |= IWL_TLC_MNG_CFG_FLAGS_LDPC_MSK;
+
if (sband->iftype_data && sband->iftype_data->he_cap.has_he &&
!(sband->iftype_data->he_cap.he_cap_elem.phy_cap_info[1] &
IEEE80211_HE_PHY_CAP1_LDPC_CODING_IN_PAYLOAD))
@@ -191,11 +195,13 @@ rs_fw_vht_set_enabled_rates(const struct ieee80211_sta *sta,
{
u16 supp;
int i, highest_mcs;
+ u8 nss = sta->rx_nss;
- for (i = 0; i < sta->rx_nss; i++) {
- if (i == IWL_TLC_NSS_MAX)
- break;
+ /* the station support only a single receive chain */
+ if (sta->smps_mode == IEEE80211_SMPS_STATIC)
+ nss = 1;
+ for (i = 0; i < nss && i < IWL_TLC_NSS_MAX; i++) {
highest_mcs = rs_fw_vht_highest_rx_mcs_index(vht_cap, i + 1);
if (!highest_mcs)
continue;
@@ -241,8 +247,13 @@ rs_fw_he_set_enabled_rates(const struct ieee80211_sta *sta,
u16 tx_mcs_160 =
le16_to_cpu(sband->iftype_data->he_cap.he_mcs_nss_supp.tx_mcs_160);
int i;
+ u8 nss = sta->rx_nss;
+
+ /* the station support only a single receive chain */
+ if (sta->smps_mode == IEEE80211_SMPS_STATIC)
+ nss = 1;
- for (i = 0; i < sta->rx_nss && i < IWL_TLC_NSS_MAX; i++) {
+ for (i = 0; i < nss && i < IWL_TLC_NSS_MAX; i++) {
u16 _mcs_160 = (mcs_160 >> (2 * i)) & 0x3;
u16 _mcs_80 = (mcs_80 >> (2 * i)) & 0x3;
u16 _tx_mcs_160 = (tx_mcs_160 >> (2 * i)) & 0x3;
@@ -303,8 +314,14 @@ static void rs_fw_set_supp_rates(struct ieee80211_sta *sta,
cmd->mode = IWL_TLC_MNG_MODE_HT;
cmd->ht_rates[IWL_TLC_NSS_1][IWL_TLC_HT_BW_NONE_160] =
cpu_to_le16(ht_cap->mcs.rx_mask[0]);
- cmd->ht_rates[IWL_TLC_NSS_2][IWL_TLC_HT_BW_NONE_160] =
- cpu_to_le16(ht_cap->mcs.rx_mask[1]);
+
+ /* the station support only a single receive chain */
+ if (sta->smps_mode == IEEE80211_SMPS_STATIC)
+ cmd->ht_rates[IWL_TLC_NSS_2][IWL_TLC_HT_BW_NONE_160] =
+ 0;
+ else
+ cmd->ht_rates[IWL_TLC_NSS_2][IWL_TLC_HT_BW_NONE_160] =
+ cpu_to_le16(ht_cap->mcs.rx_mask[1]);
}
}
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
index c0b420fe5e48..1babc4bb5194 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
@@ -785,7 +785,9 @@ void iwl_mvm_rx_session_protect_notif(struct iwl_mvm *mvm,
if (!le32_to_cpu(notif->status)) {
iwl_mvm_te_check_disconnect(mvm, vif,
"Session protection failure");
+ spin_lock_bh(&mvm->time_event_lock);
iwl_mvm_te_clear_data(mvm, te_data);
+ spin_unlock_bh(&mvm->time_event_lock);
}
if (le32_to_cpu(notif->start)) {
@@ -801,7 +803,9 @@ void iwl_mvm_rx_session_protect_notif(struct iwl_mvm *mvm,
*/
iwl_mvm_te_check_disconnect(mvm, vif,
"No beacon heard and the session protection is over already...");
+ spin_lock_bh(&mvm->time_event_lock);
iwl_mvm_te_clear_data(mvm, te_data);
+ spin_unlock_bh(&mvm->time_event_lock);
}
goto out_unlock;
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
index 97f227f3cbc3..f441b20e1642 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
@@ -981,6 +981,9 @@ static const struct iwl_dev_info iwl_dev_info_table[] = {
IWL_DEV_INFO(0x2526, 0x0014, iwl9260_2ac_160_cfg, iwl9260_160_name),
IWL_DEV_INFO(0x2526, 0x0018, iwl9260_2ac_160_cfg, iwl9260_160_name),
IWL_DEV_INFO(0x2526, 0x001C, iwl9260_2ac_160_cfg, iwl9260_160_name),
+ IWL_DEV_INFO(0x2526, 0x4010, iwl9260_2ac_160_cfg, iwl9260_160_name),
+ IWL_DEV_INFO(0x2526, 0x4018, iwl9260_2ac_160_cfg, iwl9260_160_name),
+ IWL_DEV_INFO(0x2526, 0x401C, iwl9260_2ac_160_cfg, iwl9260_160_name),
IWL_DEV_INFO(0x2526, 0x6010, iwl9260_2ac_160_cfg, iwl9260_160_name),
IWL_DEV_INFO(0x2526, 0x6014, iwl9260_2ac_160_cfg, iwl9260_160_name),
IWL_DEV_INFO(0x2526, 0x8014, iwl9260_2ac_160_cfg, iwl9260_160_name),
diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c
index 6173c80189ba..1847f55e199b 100644
--- a/drivers/net/wireless/mediatek/mt76/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/dma.c
@@ -447,10 +447,13 @@ mt76_add_fragment(struct mt76_dev *dev, struct mt76_queue *q, void *data,
struct page *page = virt_to_head_page(data);
int offset = data - page_address(page);
struct sk_buff *skb = q->rx_head;
+ struct skb_shared_info *shinfo = skb_shinfo(skb);
- offset += q->buf_offset;
- skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, offset, len,
- q->buf_size);
+ if (shinfo->nr_frags < ARRAY_SIZE(shinfo->frags)) {
+ offset += q->buf_offset;
+ skb_add_rx_frag(skb, shinfo->nr_frags, page, offset, len,
+ q->buf_size);
+ }
if (more)
return;
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/trx.h b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/trx.h
index 917729807514..e17f70b4d199 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/trx.h
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/trx.h
@@ -561,6 +561,7 @@ static inline void clear_pci_tx_desc_content(__le32 *__pdesc, int _size)
rxmcs == DESC92C_RATE11M)
struct phy_status_rpt {
+ u8 padding[2];
u8 ch_corr[2];
u8 cck_sig_qual_ofdm_pwdb_all;
u8 cck_agc_rpt_ofdm_cfosho_a;
diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c
index ed049c9f7e29..f140f7d7f553 100644
--- a/drivers/net/wireless/ti/wlcore/main.c
+++ b/drivers/net/wireless/ti/wlcore/main.c
@@ -6274,7 +6274,7 @@ static int wl1271_init_ieee80211(struct wl1271 *wl)
wl->hw->wiphy->flags |= WIPHY_FLAG_AP_UAPSD |
WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL |
WIPHY_FLAG_HAS_CHANNEL_SWITCH |
-+ WIPHY_FLAG_IBSS_RSN;
+ WIPHY_FLAG_IBSS_RSN;
wl->hw->wiphy->features |= NL80211_FEATURE_AP_SCAN;
diff --git a/drivers/nfc/fdp/fdp.c b/drivers/nfc/fdp/fdp.c
index 0cc9ac856fe2..ed2123129e0e 100644
--- a/drivers/nfc/fdp/fdp.c
+++ b/drivers/nfc/fdp/fdp.c
@@ -184,7 +184,7 @@ static int fdp_nci_send_patch(struct nci_dev *ndev, u8 conn_id, u8 type)
const struct firmware *fw;
struct sk_buff *skb;
unsigned long len;
- u8 max_size, payload_size;
+ int max_size, payload_size;
int rc = 0;
if ((type == NCI_PATCH_TYPE_OTP && !info->otp_patch) ||
@@ -207,8 +207,7 @@ static int fdp_nci_send_patch(struct nci_dev *ndev, u8 conn_id, u8 type)
while (len) {
- payload_size = min_t(unsigned long, (unsigned long) max_size,
- len);
+ payload_size = min_t(unsigned long, max_size, len);
skb = nci_skb_alloc(ndev, (NCI_CTRL_HDR_SIZE + payload_size),
GFP_KERNEL);
diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c
index 677d6f45b5c4..43751fab9d36 100644
--- a/drivers/nvdimm/blk.c
+++ b/drivers/nvdimm/blk.c
@@ -249,13 +249,12 @@ static int nsblk_attach_disk(struct nd_namespace_blk *nsblk)
internal_nlba = div_u64(nsblk->size, nsblk_internal_lbasize(nsblk));
available_disk_size = internal_nlba * nsblk_sector_size(nsblk);
- q = blk_alloc_queue(GFP_KERNEL);
+ q = blk_alloc_queue(nd_blk_make_request, NUMA_NO_NODE);
if (!q)
return -ENOMEM;
if (devm_add_action_or_reset(dev, nd_blk_release_queue, q))
return -ENOMEM;
- blk_queue_make_request(q, nd_blk_make_request);
blk_queue_max_hw_sectors(q, UINT_MAX);
blk_queue_logical_block_size(q, nsblk_sector_size(nsblk));
blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index 0d04ea3d9fd7..3b09419218d6 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -1521,7 +1521,7 @@ static int btt_blk_init(struct btt *btt)
struct nd_namespace_common *ndns = nd_btt->ndns;
/* create a new disk and request queue for btt */
- btt->btt_queue = blk_alloc_queue(GFP_KERNEL);
+ btt->btt_queue = blk_alloc_queue(btt_make_request, NUMA_NO_NODE);
if (!btt->btt_queue)
return -ENOMEM;
@@ -1540,7 +1540,6 @@ static int btt_blk_init(struct btt *btt)
btt->btt_disk->queue->backing_dev_info->capabilities |=
BDI_CAP_SYNCHRONOUS_IO;
- blk_queue_make_request(btt->btt_queue, btt_make_request);
blk_queue_logical_block_size(btt->btt_queue, btt->sector_size);
blk_queue_max_hw_sectors(btt->btt_queue, UINT_MAX);
blk_queue_flag_set(QUEUE_FLAG_NONROT, btt->btt_queue);
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 4eae441f86c9..4ffc6f7ca131 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -395,7 +395,7 @@ static int pmem_attach_disk(struct device *dev,
return -EBUSY;
}
- q = blk_alloc_queue_node(GFP_KERNEL, dev_to_node(dev));
+ q = blk_alloc_queue(pmem_make_request, dev_to_node(dev));
if (!q)
return -ENOMEM;
@@ -433,7 +433,6 @@ static int pmem_attach_disk(struct device *dev,
pmem->virt_addr = addr;
blk_queue_write_cache(q, true, fua);
- blk_queue_make_request(q, pmem_make_request);
blk_queue_physical_block_size(q, PAGE_SIZE);
blk_queue_logical_block_size(q, pmem_sector_size(ndns));
blk_queue_max_hw_sectors(q, UINT_MAX);
diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig
index b9358db83e96..9c17ed32be64 100644
--- a/drivers/nvme/host/Kconfig
+++ b/drivers/nvme/host/Kconfig
@@ -32,8 +32,6 @@ config NVME_HWMON
a hardware monitoring device will be created for each NVMe drive
in the system.
- If unsure, say N.
-
config NVME_FABRICS
tristate
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index a4d8c90ee7cc..4f907e3beda1 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -171,7 +171,6 @@ static void nvme_do_delete_ctrl(struct nvme_ctrl *ctrl)
nvme_remove_namespaces(ctrl);
ctrl->ops->delete_ctrl(ctrl);
nvme_uninit_ctrl(ctrl);
- nvme_put_ctrl(ctrl);
}
static void nvme_delete_ctrl_work(struct work_struct *work)
@@ -192,21 +191,16 @@ int nvme_delete_ctrl(struct nvme_ctrl *ctrl)
}
EXPORT_SYMBOL_GPL(nvme_delete_ctrl);
-static int nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl)
+static void nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl)
{
- int ret = 0;
-
/*
* Keep a reference until nvme_do_delete_ctrl() complete,
* since ->delete_ctrl can free the controller.
*/
nvme_get_ctrl(ctrl);
- if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_DELETING))
- ret = -EBUSY;
- if (!ret)
+ if (nvme_change_ctrl_state(ctrl, NVME_CTRL_DELETING))
nvme_do_delete_ctrl(ctrl);
nvme_put_ctrl(ctrl);
- return ret;
}
static inline bool nvme_ns_has_pi(struct nvme_ns *ns)
@@ -291,11 +285,8 @@ void nvme_complete_rq(struct request *req)
nvme_req(req)->ctrl->comp_seen = true;
if (unlikely(status != BLK_STS_OK && nvme_req_needs_retry(req))) {
- if ((req->cmd_flags & REQ_NVME_MPATH) &&
- blk_path_error(status)) {
- nvme_failover_req(req);
+ if ((req->cmd_flags & REQ_NVME_MPATH) && nvme_failover_req(req))
return;
- }
if (!blk_queue_dying(req->q)) {
nvme_retry_req(req);
@@ -1055,6 +1046,43 @@ static int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
return error;
}
+static int nvme_process_ns_desc(struct nvme_ctrl *ctrl, struct nvme_ns_ids *ids,
+ struct nvme_ns_id_desc *cur)
+{
+ const char *warn_str = "ctrl returned bogus length:";
+ void *data = cur;
+
+ switch (cur->nidt) {
+ case NVME_NIDT_EUI64:
+ if (cur->nidl != NVME_NIDT_EUI64_LEN) {
+ dev_warn(ctrl->device, "%s %d for NVME_NIDT_EUI64\n",
+ warn_str, cur->nidl);
+ return -1;
+ }
+ memcpy(ids->eui64, data + sizeof(*cur), NVME_NIDT_EUI64_LEN);
+ return NVME_NIDT_EUI64_LEN;
+ case NVME_NIDT_NGUID:
+ if (cur->nidl != NVME_NIDT_NGUID_LEN) {
+ dev_warn(ctrl->device, "%s %d for NVME_NIDT_NGUID\n",
+ warn_str, cur->nidl);
+ return -1;
+ }
+ memcpy(ids->nguid, data + sizeof(*cur), NVME_NIDT_NGUID_LEN);
+ return NVME_NIDT_NGUID_LEN;
+ case NVME_NIDT_UUID:
+ if (cur->nidl != NVME_NIDT_UUID_LEN) {
+ dev_warn(ctrl->device, "%s %d for NVME_NIDT_UUID\n",
+ warn_str, cur->nidl);
+ return -1;
+ }
+ uuid_copy(&ids->uuid, data + sizeof(*cur));
+ return NVME_NIDT_UUID_LEN;
+ default:
+ /* Skip unknown types */
+ return cur->nidl;
+ }
+}
+
static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid,
struct nvme_ns_ids *ids)
{
@@ -1074,8 +1102,17 @@ static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid,
status = nvme_submit_sync_cmd(ctrl->admin_q, &c, data,
NVME_IDENTIFY_DATA_SIZE);
- if (status)
+ if (status) {
+ dev_warn(ctrl->device,
+ "Identify Descriptors failed (%d)\n", status);
+ /*
+ * Don't treat an error as fatal, as we potentially already
+ * have a NGUID or EUI-64.
+ */
+ if (status > 0)
+ status = 0;
goto free_data;
+ }
for (pos = 0; pos < NVME_IDENTIFY_DATA_SIZE; pos += len) {
struct nvme_ns_id_desc *cur = data + pos;
@@ -1083,42 +1120,9 @@ static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid,
if (cur->nidl == 0)
break;
- switch (cur->nidt) {
- case NVME_NIDT_EUI64:
- if (cur->nidl != NVME_NIDT_EUI64_LEN) {
- dev_warn(ctrl->device,
- "ctrl returned bogus length: %d for NVME_NIDT_EUI64\n",
- cur->nidl);
- goto free_data;
- }
- len = NVME_NIDT_EUI64_LEN;
- memcpy(ids->eui64, data + pos + sizeof(*cur), len);
- break;
- case NVME_NIDT_NGUID:
- if (cur->nidl != NVME_NIDT_NGUID_LEN) {
- dev_warn(ctrl->device,
- "ctrl returned bogus length: %d for NVME_NIDT_NGUID\n",
- cur->nidl);
- goto free_data;
- }
- len = NVME_NIDT_NGUID_LEN;
- memcpy(ids->nguid, data + pos + sizeof(*cur), len);
- break;
- case NVME_NIDT_UUID:
- if (cur->nidl != NVME_NIDT_UUID_LEN) {
- dev_warn(ctrl->device,
- "ctrl returned bogus length: %d for NVME_NIDT_UUID\n",
- cur->nidl);
- goto free_data;
- }
- len = NVME_NIDT_UUID_LEN;
- uuid_copy(&ids->uuid, data + pos + sizeof(*cur));
- break;
- default:
- /* Skip unknown types */
- len = cur->nidl;
- break;
- }
+ len = nvme_process_ns_desc(ctrl, ids, cur);
+ if (len < 0)
+ goto free_data;
len += sizeof(*cur);
}
@@ -1584,6 +1588,47 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
return ret;
}
+#ifdef CONFIG_COMPAT
+struct nvme_user_io32 {
+ __u8 opcode;
+ __u8 flags;
+ __u16 control;
+ __u16 nblocks;
+ __u16 rsvd;
+ __u64 metadata;
+ __u64 addr;
+ __u64 slba;
+ __u32 dsmgmt;
+ __u32 reftag;
+ __u16 apptag;
+ __u16 appmask;
+} __attribute__((__packed__));
+
+#define NVME_IOCTL_SUBMIT_IO32 _IOW('N', 0x42, struct nvme_user_io32)
+
+static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode,
+ unsigned int cmd, unsigned long arg)
+{
+ /*
+ * Corresponds to the difference of NVME_IOCTL_SUBMIT_IO
+ * between 32 bit programs and 64 bit kernel.
+ * The cause is that the results of sizeof(struct nvme_user_io),
+ * which is used to define NVME_IOCTL_SUBMIT_IO,
+ * are not same between 32 bit compiler and 64 bit compiler.
+ * NVME_IOCTL_SUBMIT_IO32 is for 64 bit kernel handling
+ * NVME_IOCTL_SUBMIT_IO issued from 32 bit programs.
+ * Other IOCTL numbers are same between 32 bit and 64 bit.
+ * So there is nothing to do regarding to other IOCTL numbers.
+ */
+ if (cmd == NVME_IOCTL_SUBMIT_IO32)
+ return nvme_ioctl(bdev, mode, NVME_IOCTL_SUBMIT_IO, arg);
+
+ return nvme_ioctl(bdev, mode, cmd, arg);
+}
+#else
+#define nvme_compat_ioctl NULL
+#endif /* CONFIG_COMPAT */
+
static int nvme_open(struct block_device *bdev, fmode_t mode)
{
struct nvme_ns *ns = bdev->bd_disk->private_data;
@@ -1721,26 +1766,15 @@ static void nvme_config_write_zeroes(struct gendisk *disk, struct nvme_ns *ns)
static int nvme_report_ns_ids(struct nvme_ctrl *ctrl, unsigned int nsid,
struct nvme_id_ns *id, struct nvme_ns_ids *ids)
{
- int ret = 0;
-
memset(ids, 0, sizeof(*ids));
if (ctrl->vs >= NVME_VS(1, 1, 0))
memcpy(ids->eui64, id->eui64, sizeof(id->eui64));
if (ctrl->vs >= NVME_VS(1, 2, 0))
memcpy(ids->nguid, id->nguid, sizeof(id->nguid));
- if (ctrl->vs >= NVME_VS(1, 3, 0)) {
- /* Don't treat error as fatal we potentially
- * already have a NGUID or EUI-64
- */
- ret = nvme_identify_ns_descs(ctrl, nsid, ids);
- if (ret)
- dev_warn(ctrl->device,
- "Identify Descriptors failed (%d)\n", ret);
- if (ret > 0)
- ret = 0;
- }
- return ret;
+ if (ctrl->vs >= NVME_VS(1, 3, 0))
+ return nvme_identify_ns_descs(ctrl, nsid, ids);
+ return 0;
}
static bool nvme_ns_ids_valid(struct nvme_ns_ids *ids)
@@ -1810,7 +1844,7 @@ static void nvme_update_disk_info(struct gendisk *disk,
ns->lba_shift > PAGE_SHIFT)
capacity = 0;
- set_capacity(disk, capacity);
+ set_capacity_revalidate_and_notify(disk, capacity, false);
nvme_config_discard(disk, ns);
nvme_config_write_zeroes(disk, ns);
@@ -2027,7 +2061,7 @@ EXPORT_SYMBOL_GPL(nvme_sec_submit);
static const struct block_device_operations nvme_fops = {
.owner = THIS_MODULE,
.ioctl = nvme_ioctl,
- .compat_ioctl = nvme_ioctl,
+ .compat_ioctl = nvme_compat_ioctl,
.open = nvme_open,
.release = nvme_release,
.getgeo = nvme_getgeo,
@@ -2055,7 +2089,7 @@ const struct block_device_operations nvme_ns_head_ops = {
.open = nvme_ns_head_open,
.release = nvme_ns_head_release,
.ioctl = nvme_ioctl,
- .compat_ioctl = nvme_ioctl,
+ .compat_ioctl = nvme_compat_ioctl,
.getgeo = nvme_getgeo,
.pr_ops = &nvme_pr_ops,
};
@@ -2074,13 +2108,13 @@ static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled)
if ((csts & NVME_CSTS_RDY) == bit)
break;
- msleep(100);
+ usleep_range(1000, 2000);
if (fatal_signal_pending(current))
return -EINTR;
if (time_after(jiffies, timeout)) {
dev_err(ctrl->device,
- "Device not ready; aborting %s\n", enabled ?
- "initialisation" : "reset");
+ "Device not ready; aborting %s, CSTS=0x%x\n",
+ enabled ? "initialisation" : "reset", csts);
return -ENODEV;
}
}
@@ -2591,8 +2625,7 @@ static bool nvme_validate_cntlid(struct nvme_subsystem *subsys,
lockdep_assert_held(&nvme_subsystems_lock);
list_for_each_entry(tmp, &subsys->ctrls, subsys_entry) {
- if (tmp->state == NVME_CTRL_DELETING ||
- tmp->state == NVME_CTRL_DEAD)
+ if (nvme_state_terminal(tmp))
continue;
if (tmp->cntlid == ctrl->cntlid) {
@@ -3193,6 +3226,10 @@ static ssize_t nvme_sysfs_delete(struct device *dev,
{
struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+ /* Can't delete non-created controllers */
+ if (!ctrl->created)
+ return -EBUSY;
+
if (device_remove_file_self(dev, attr))
nvme_delete_ctrl_sync(ctrl);
return count;
@@ -3242,6 +3279,26 @@ static ssize_t nvme_sysfs_show_subsysnqn(struct device *dev,
}
static DEVICE_ATTR(subsysnqn, S_IRUGO, nvme_sysfs_show_subsysnqn, NULL);
+static ssize_t nvme_sysfs_show_hostnqn(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+
+ return snprintf(buf, PAGE_SIZE, "%s\n", ctrl->opts->host->nqn);
+}
+static DEVICE_ATTR(hostnqn, S_IRUGO, nvme_sysfs_show_hostnqn, NULL);
+
+static ssize_t nvme_sysfs_show_hostid(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+
+ return snprintf(buf, PAGE_SIZE, "%pU\n", &ctrl->opts->host->id);
+}
+static DEVICE_ATTR(hostid, S_IRUGO, nvme_sysfs_show_hostid, NULL);
+
static ssize_t nvme_sysfs_show_address(struct device *dev,
struct device_attribute *attr,
char *buf)
@@ -3267,6 +3324,8 @@ static struct attribute *nvme_dev_attrs[] = {
&dev_attr_numa_node.attr,
&dev_attr_queue_count.attr,
&dev_attr_sqsize.attr,
+ &dev_attr_hostnqn.attr,
+ &dev_attr_hostid.attr,
NULL
};
@@ -3280,6 +3339,10 @@ static umode_t nvme_dev_attrs_are_visible(struct kobject *kobj,
return 0;
if (a == &dev_attr_address.attr && !ctrl->ops->get_address)
return 0;
+ if (a == &dev_attr_hostnqn.attr && !ctrl->opts)
+ return 0;
+ if (a == &dev_attr_hostid.attr && !ctrl->opts)
+ return 0;
return a->mode;
}
@@ -3294,7 +3357,7 @@ static const struct attribute_group *nvme_dev_attr_groups[] = {
NULL,
};
-static struct nvme_ns_head *__nvme_find_ns_head(struct nvme_subsystem *subsys,
+static struct nvme_ns_head *nvme_find_ns_head(struct nvme_subsystem *subsys,
unsigned nsid)
{
struct nvme_ns_head *h;
@@ -3327,7 +3390,8 @@ static int __nvme_check_ids(struct nvme_subsystem *subsys,
}
static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
- unsigned nsid, struct nvme_id_ns *id)
+ unsigned nsid, struct nvme_id_ns *id,
+ struct nvme_ns_ids *ids)
{
struct nvme_ns_head *head;
size_t size = sizeof(*head);
@@ -3350,12 +3414,9 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
goto out_ida_remove;
head->subsys = ctrl->subsys;
head->ns_id = nsid;
+ head->ids = *ids;
kref_init(&head->ref);
- ret = nvme_report_ns_ids(ctrl, nsid, id, &head->ids);
- if (ret)
- goto out_cleanup_srcu;
-
ret = __nvme_check_ids(ctrl->subsys, head);
if (ret) {
dev_err(ctrl->device,
@@ -3390,24 +3451,23 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid,
struct nvme_ctrl *ctrl = ns->ctrl;
bool is_shared = id->nmic & (1 << 0);
struct nvme_ns_head *head = NULL;
+ struct nvme_ns_ids ids;
int ret = 0;
+ ret = nvme_report_ns_ids(ctrl, nsid, id, &ids);
+ if (ret)
+ goto out;
+
mutex_lock(&ctrl->subsys->lock);
if (is_shared)
- head = __nvme_find_ns_head(ctrl->subsys, nsid);
+ head = nvme_find_ns_head(ctrl->subsys, nsid);
if (!head) {
- head = nvme_alloc_ns_head(ctrl, nsid, id);
+ head = nvme_alloc_ns_head(ctrl, nsid, id, &ids);
if (IS_ERR(head)) {
ret = PTR_ERR(head);
goto out_unlock;
}
} else {
- struct nvme_ns_ids ids;
-
- ret = nvme_report_ns_ids(ctrl, nsid, id, &ids);
- if (ret)
- goto out_unlock;
-
if (!nvme_ns_ids_equal(&head->ids, &ids)) {
dev_err(ctrl->device,
"IDs don't match for shared namespace %d\n",
@@ -3422,6 +3482,7 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid,
out_unlock:
mutex_unlock(&ctrl->subsys->lock);
+out:
if (ret > 0)
ret = blk_status_to_errno(nvme_error_status(ret));
return ret;
@@ -3480,7 +3541,7 @@ static int nvme_setup_streams_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns)
return 0;
}
-static int nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
+static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
{
struct nvme_ns *ns;
struct gendisk *disk;
@@ -3490,13 +3551,11 @@ static int nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node);
if (!ns)
- return -ENOMEM;
+ return;
ns->queue = blk_mq_init_queue(ctrl->tagset);
- if (IS_ERR(ns->queue)) {
- ret = PTR_ERR(ns->queue);
+ if (IS_ERR(ns->queue))
goto out_free_ns;
- }
if (ctrl->opts && ctrl->opts->data_digest)
ns->queue->backing_dev_info->capabilities
@@ -3519,10 +3578,8 @@ static int nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
if (ret)
goto out_free_queue;
- if (id->ncap == 0) {
- ret = -EINVAL;
+ if (id->ncap == 0) /* no namespace (legacy quirk) */
goto out_free_id;
- }
ret = nvme_init_ns_head(ns, nsid, id);
if (ret)
@@ -3531,10 +3588,8 @@ static int nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
nvme_set_disk_name(disk_name, ns, ctrl, &flags);
disk = alloc_disk_node(0, node);
- if (!disk) {
- ret = -ENOMEM;
+ if (!disk)
goto out_unlink_ns;
- }
disk->fops = &nvme_fops;
disk->private_data = ns;
@@ -3565,7 +3620,7 @@ static int nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
nvme_fault_inject_init(&ns->fault_inject, ns->disk->disk_name);
kfree(id);
- return 0;
+ return;
out_put_disk:
put_disk(ns->disk);
out_unlink_ns:
@@ -3579,9 +3634,6 @@ static int nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
blk_cleanup_queue(ns->queue);
out_free_ns:
kfree(ns);
- if (ret > 0)
- ret = blk_status_to_errno(nvme_error_status(ret));
- return ret;
}
static void nvme_ns_remove(struct nvme_ns *ns)
@@ -3987,6 +4039,7 @@ void nvme_start_ctrl(struct nvme_ctrl *ctrl)
nvme_queue_scan(ctrl);
nvme_start_queues(ctrl);
}
+ ctrl->created = true;
}
EXPORT_SYMBOL_GPL(nvme_start_ctrl);
@@ -3995,6 +4048,7 @@ void nvme_uninit_ctrl(struct nvme_ctrl *ctrl)
nvme_fault_inject_fini(&ctrl->fault_inject);
dev_pm_qos_hide_latency_tolerance(ctrl->device);
cdev_device_del(&ctrl->cdev, ctrl->device);
+ nvme_put_ctrl(ctrl);
}
EXPORT_SYMBOL_GPL(nvme_uninit_ctrl);
@@ -4077,6 +4131,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
if (ret)
goto out_release_instance;
+ nvme_get_ctrl(ctrl);
cdev_init(&ctrl->cdev, &nvme_dev_fops);
ctrl->cdev.owner = ops->module;
ret = cdev_device_add(&ctrl->cdev, ctrl->device);
@@ -4095,6 +4150,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
return 0;
out_free_name:
+ nvme_put_ctrl(ctrl);
kfree_const(ctrl->device->kobj.name);
out_release_instance:
ida_simple_remove(&nvme_instance_ida, ctrl->instance);
@@ -4299,6 +4355,7 @@ static void __exit nvme_core_exit(void)
destroy_workqueue(nvme_delete_wq);
destroy_workqueue(nvme_reset_wq);
destroy_workqueue(nvme_wq);
+ ida_destroy(&nvme_instance_ida);
}
MODULE_LICENSE("GPL");
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 74b8818ac9a1..2a6c8190eeb7 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -105,14 +105,14 @@ int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size)
int len = 0;
if (ctrl->opts->mask & NVMF_OPT_TRADDR)
- len += snprintf(buf, size, "traddr=%s", ctrl->opts->traddr);
+ len += scnprintf(buf, size, "traddr=%s", ctrl->opts->traddr);
if (ctrl->opts->mask & NVMF_OPT_TRSVCID)
- len += snprintf(buf + len, size - len, "%strsvcid=%s",
+ len += scnprintf(buf + len, size - len, "%strsvcid=%s",
(len) ? "," : "", ctrl->opts->trsvcid);
if (ctrl->opts->mask & NVMF_OPT_HOST_TRADDR)
- len += snprintf(buf + len, size - len, "%shost_traddr=%s",
+ len += scnprintf(buf + len, size - len, "%shost_traddr=%s",
(len) ? "," : "", ctrl->opts->host_traddr);
- len += snprintf(buf + len, size - len, "\n");
+ len += scnprintf(buf + len, size - len, "\n");
return len;
}
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 5a70ac395d53..a8bf2fb1287b 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -3181,10 +3181,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
goto fail_ctrl;
}
- nvme_get_ctrl(&ctrl->ctrl);
-
if (!queue_delayed_work(nvme_wq, &ctrl->connect_work, 0)) {
- nvme_put_ctrl(&ctrl->ctrl);
dev_err(ctrl->ctrl.device,
"NVME-FC{%d}: failed to schedule initial connect\n",
ctrl->cnum);
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index a11900cf3a36..61bf87592570 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -64,17 +64,12 @@ void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
}
}
-void nvme_failover_req(struct request *req)
+bool nvme_failover_req(struct request *req)
{
struct nvme_ns *ns = req->q->queuedata;
u16 status = nvme_req(req)->status;
unsigned long flags;
- spin_lock_irqsave(&ns->head->requeue_lock, flags);
- blk_steal_bios(&ns->head->requeue_list, req);
- spin_unlock_irqrestore(&ns->head->requeue_lock, flags);
- blk_mq_end_request(req, 0);
-
switch (status & 0x7ff) {
case NVME_SC_ANA_TRANSITION:
case NVME_SC_ANA_INACCESSIBLE:
@@ -103,15 +98,17 @@ void nvme_failover_req(struct request *req)
nvme_mpath_clear_current_path(ns);
break;
default:
- /*
- * Reset the controller for any non-ANA error as we don't know
- * what caused the error.
- */
- nvme_reset_ctrl(ns->ctrl);
- break;
+ /* This was a non-ANA error so follow the normal error path. */
+ return false;
}
+ spin_lock_irqsave(&ns->head->requeue_lock, flags);
+ blk_steal_bios(&ns->head->requeue_list, req);
+ spin_unlock_irqrestore(&ns->head->requeue_lock, flags);
+ blk_mq_end_request(req, 0);
+
kblockd_schedule_work(&ns->head->requeue_work);
+ return true;
}
void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
@@ -377,11 +374,10 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
if (!(ctrl->subsys->cmic & (1 << 1)) || !multipath)
return 0;
- q = blk_alloc_queue_node(GFP_KERNEL, ctrl->numa_node);
+ q = blk_alloc_queue(nvme_ns_head_make_request, ctrl->numa_node);
if (!q)
goto out;
q->queuedata = head;
- blk_queue_make_request(q, nvme_ns_head_make_request);
blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
/* set to a default value for 512 until disk is validated */
blk_queue_logical_block_size(q, 512);
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 1024fec7914c..2e04a36296d9 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -259,6 +259,7 @@ struct nvme_ctrl {
struct nvme_command ka_cmd;
struct work_struct fw_act_work;
unsigned long events;
+ bool created;
#ifdef CONFIG_NVME_MULTIPATH
/* asymmetric namespace access: */
@@ -550,7 +551,7 @@ void nvme_mpath_wait_freeze(struct nvme_subsystem *subsys);
void nvme_mpath_start_freeze(struct nvme_subsystem *subsys);
void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
struct nvme_ctrl *ctrl, int *flags);
-void nvme_failover_req(struct request *req);
+bool nvme_failover_req(struct request *req);
void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl);
int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head);
void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id);
@@ -599,8 +600,9 @@ static inline void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->head->instance);
}
-static inline void nvme_failover_req(struct request *req)
+static inline bool nvme_failover_req(struct request *req)
{
+ return false;
}
static inline void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
{
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index d3f23d6254e4..4e79e412b276 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -971,39 +971,25 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx)
nvme_end_request(req, cqe->status, cqe->result);
}
-static void nvme_complete_cqes(struct nvme_queue *nvmeq, u16 start, u16 end)
-{
- while (start != end) {
- nvme_handle_cqe(nvmeq, start);
- if (++start == nvmeq->q_depth)
- start = 0;
- }
-}
-
static inline void nvme_update_cq_head(struct nvme_queue *nvmeq)
{
- if (nvmeq->cq_head == nvmeq->q_depth - 1) {
+ if (++nvmeq->cq_head == nvmeq->q_depth) {
nvmeq->cq_head = 0;
- nvmeq->cq_phase = !nvmeq->cq_phase;
- } else {
- nvmeq->cq_head++;
+ nvmeq->cq_phase ^= 1;
}
}
-static inline int nvme_process_cq(struct nvme_queue *nvmeq, u16 *start,
- u16 *end, unsigned int tag)
+static inline int nvme_process_cq(struct nvme_queue *nvmeq)
{
int found = 0;
- *start = nvmeq->cq_head;
while (nvme_cqe_pending(nvmeq)) {
- if (tag == -1U || nvmeq->cqes[nvmeq->cq_head].command_id == tag)
- found++;
+ found++;
+ nvme_handle_cqe(nvmeq, nvmeq->cq_head);
nvme_update_cq_head(nvmeq);
}
- *end = nvmeq->cq_head;
- if (*start != *end)
+ if (found)
nvme_ring_cq_doorbell(nvmeq);
return found;
}
@@ -1012,21 +998,16 @@ static irqreturn_t nvme_irq(int irq, void *data)
{
struct nvme_queue *nvmeq = data;
irqreturn_t ret = IRQ_NONE;
- u16 start, end;
/*
* The rmb/wmb pair ensures we see all updates from a previous run of
* the irq handler, even if that was on another CPU.
*/
rmb();
- nvme_process_cq(nvmeq, &start, &end, -1);
+ if (nvme_process_cq(nvmeq))
+ ret = IRQ_HANDLED;
wmb();
- if (start != end) {
- nvme_complete_cqes(nvmeq, start, end);
- return IRQ_HANDLED;
- }
-
return ret;
}
@@ -1039,46 +1020,30 @@ static irqreturn_t nvme_irq_check(int irq, void *data)
}
/*
- * Poll for completions any queue, including those not dedicated to polling.
+ * Poll for completions for any interrupt driven queue
* Can be called from any context.
*/
-static int nvme_poll_irqdisable(struct nvme_queue *nvmeq, unsigned int tag)
+static void nvme_poll_irqdisable(struct nvme_queue *nvmeq)
{
struct pci_dev *pdev = to_pci_dev(nvmeq->dev->dev);
- u16 start, end;
- int found;
- /*
- * For a poll queue we need to protect against the polling thread
- * using the CQ lock. For normal interrupt driven threads we have
- * to disable the interrupt to avoid racing with it.
- */
- if (test_bit(NVMEQ_POLLED, &nvmeq->flags)) {
- spin_lock(&nvmeq->cq_poll_lock);
- found = nvme_process_cq(nvmeq, &start, &end, tag);
- spin_unlock(&nvmeq->cq_poll_lock);
- } else {
- disable_irq(pci_irq_vector(pdev, nvmeq->cq_vector));
- found = nvme_process_cq(nvmeq, &start, &end, tag);
- enable_irq(pci_irq_vector(pdev, nvmeq->cq_vector));
- }
+ WARN_ON_ONCE(test_bit(NVMEQ_POLLED, &nvmeq->flags));
- nvme_complete_cqes(nvmeq, start, end);
- return found;
+ disable_irq(pci_irq_vector(pdev, nvmeq->cq_vector));
+ nvme_process_cq(nvmeq);
+ enable_irq(pci_irq_vector(pdev, nvmeq->cq_vector));
}
static int nvme_poll(struct blk_mq_hw_ctx *hctx)
{
struct nvme_queue *nvmeq = hctx->driver_data;
- u16 start, end;
bool found;
if (!nvme_cqe_pending(nvmeq))
return 0;
spin_lock(&nvmeq->cq_poll_lock);
- found = nvme_process_cq(nvmeq, &start, &end, -1);
- nvme_complete_cqes(nvmeq, start, end);
+ found = nvme_process_cq(nvmeq);
spin_unlock(&nvmeq->cq_poll_lock);
return found;
@@ -1255,7 +1220,12 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
/*
* Did we miss an interrupt?
*/
- if (nvme_poll_irqdisable(nvmeq, req->tag)) {
+ if (test_bit(NVMEQ_POLLED, &nvmeq->flags))
+ nvme_poll(req->mq_hctx);
+ else
+ nvme_poll_irqdisable(nvmeq);
+
+ if (blk_mq_request_completed(req)) {
dev_warn(dev->ctrl.device,
"I/O %d QID %d timeout, completion polled\n",
req->tag, nvmeq->qid);
@@ -1398,7 +1368,7 @@ static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown)
else
nvme_disable_ctrl(&dev->ctrl);
- nvme_poll_irqdisable(nvmeq, -1);
+ nvme_poll_irqdisable(nvmeq);
}
/*
@@ -1409,13 +1379,10 @@ static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown)
*/
static void nvme_reap_pending_cqes(struct nvme_dev *dev)
{
- u16 start, end;
int i;
- for (i = dev->ctrl.queue_count - 1; i > 0; i--) {
- nvme_process_cq(&dev->queues[i], &start, &end, -1);
- nvme_complete_cqes(&dev->queues[i], start, end);
- }
+ for (i = dev->ctrl.queue_count - 1; i > 0; i--)
+ nvme_process_cq(&dev->queues[i]);
}
static int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues,
@@ -2503,13 +2470,13 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl)
struct nvme_dev *dev = to_nvme_dev(ctrl);
nvme_dbbuf_dma_free(dev);
- put_device(dev->dev);
nvme_free_tagset(dev);
if (dev->ctrl.admin_q)
blk_put_queue(dev->ctrl.admin_q);
- kfree(dev->queues);
free_opal_dev(dev->ctrl.opal_dev);
mempool_destroy(dev->iod_mempool);
+ put_device(dev->dev);
+ kfree(dev->queues);
kfree(dev);
}
@@ -2689,7 +2656,7 @@ static int nvme_pci_get_address(struct nvme_ctrl *ctrl, char *buf, int size)
{
struct pci_dev *pdev = to_pci_dev(to_nvme_dev(ctrl)->dev);
- return snprintf(buf, size, "%s", dev_name(&pdev->dev));
+ return snprintf(buf, size, "%s\n", dev_name(&pdev->dev));
}
static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
@@ -2835,7 +2802,6 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev));
nvme_reset_ctrl(&dev->ctrl);
- nvme_get_ctrl(&dev->ctrl);
async_schedule(nvme_async_probe, dev);
return 0;
@@ -2907,10 +2873,9 @@ static void nvme_remove(struct pci_dev *pdev)
nvme_free_host_mem(dev);
nvme_dev_remove_admin(dev);
nvme_free_queues(dev, 0);
- nvme_uninit_ctrl(&dev->ctrl);
nvme_release_prp_pools(dev);
nvme_dev_unmap(dev);
- nvme_put_ctrl(&dev->ctrl);
+ nvme_uninit_ctrl(&dev->ctrl);
}
#ifdef CONFIG_PM_SLEEP
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 3e85c5cacefd..86603d9b0cef 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -850,9 +850,11 @@ out_free_tagset:
if (new)
blk_mq_free_tag_set(ctrl->ctrl.admin_tagset);
out_free_async_qe:
- nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe,
- sizeof(struct nvme_command), DMA_TO_DEVICE);
- ctrl->async_event_sqe.data = NULL;
+ if (ctrl->async_event_sqe.data) {
+ nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe,
+ sizeof(struct nvme_command), DMA_TO_DEVICE);
+ ctrl->async_event_sqe.data = NULL;
+ }
out_free_queue:
nvme_rdma_free_queue(&ctrl->queues[0]);
return error;
@@ -1022,8 +1024,13 @@ static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new)
changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
if (!changed) {
- /* state change failure is ok if we're in DELETING state */
+ /*
+ * state change failure is ok if we're in DELETING state,
+ * unless we're during creation of a new controller to
+ * avoid races with teardown flow.
+ */
WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING);
+ WARN_ON_ONCE(new);
ret = -EINVAL;
goto destroy_io;
}
@@ -2043,8 +2050,6 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISpcs\n",
ctrl->ctrl.opts->subsysnqn, &ctrl->addr);
- nvme_get_ctrl(&ctrl->ctrl);
-
mutex_lock(&nvme_rdma_ctrl_mutex);
list_add_tail(&ctrl->list, &nvme_rdma_ctrl_list);
mutex_unlock(&nvme_rdma_ctrl_mutex);
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 49d4373b84eb..0ef14f0fad86 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -20,6 +20,16 @@
struct nvme_tcp_queue;
+/* Define the socket priority to use for connections were it is desirable
+ * that the NIC consider performing optimized packet processing or filtering.
+ * A non-zero value being sufficient to indicate general consideration of any
+ * possible optimization. Making it a module param allows for alternative
+ * values that may be unique for some NIC implementations.
+ */
+static int so_priority;
+module_param(so_priority, int, 0644);
+MODULE_PARM_DESC(so_priority, "nvme tcp socket optimize priority");
+
enum nvme_tcp_send_state {
NVME_TCP_SEND_CMD_PDU = 0,
NVME_TCP_SEND_H2C_PDU,
@@ -1017,8 +1027,15 @@ static int nvme_tcp_try_send(struct nvme_tcp_queue *queue)
if (req->state == NVME_TCP_SEND_DDGST)
ret = nvme_tcp_try_send_ddgst(req);
done:
- if (ret == -EAGAIN)
+ if (ret == -EAGAIN) {
ret = 0;
+ } else if (ret < 0) {
+ dev_err(queue->ctrl->ctrl.device,
+ "failed to send request %d\n", ret);
+ if (ret != -EPIPE && ret != -ECONNRESET)
+ nvme_tcp_fail_request(queue->request);
+ nvme_tcp_done_send_req(queue);
+ }
return ret;
}
@@ -1049,25 +1066,16 @@ static void nvme_tcp_io_work(struct work_struct *w)
int result;
result = nvme_tcp_try_send(queue);
- if (result > 0) {
+ if (result > 0)
pending = true;
- } else if (unlikely(result < 0)) {
- dev_err(queue->ctrl->ctrl.device,
- "failed to send request %d\n", result);
-
- /*
- * Fail the request unless peer closed the connection,
- * in which case error recovery flow will complete all.
- */
- if ((result != -EPIPE) && (result != -ECONNRESET))
- nvme_tcp_fail_request(queue->request);
- nvme_tcp_done_send_req(queue);
- return;
- }
+ else if (unlikely(result < 0))
+ break;
result = nvme_tcp_try_recv(queue);
if (result > 0)
pending = true;
+ else if (unlikely(result < 0))
+ break;
if (!pending)
return;
@@ -1248,13 +1256,67 @@ free_icreq:
return ret;
}
+static bool nvme_tcp_admin_queue(struct nvme_tcp_queue *queue)
+{
+ return nvme_tcp_queue_id(queue) == 0;
+}
+
+static bool nvme_tcp_default_queue(struct nvme_tcp_queue *queue)
+{
+ struct nvme_tcp_ctrl *ctrl = queue->ctrl;
+ int qid = nvme_tcp_queue_id(queue);
+
+ return !nvme_tcp_admin_queue(queue) &&
+ qid < 1 + ctrl->io_queues[HCTX_TYPE_DEFAULT];
+}
+
+static bool nvme_tcp_read_queue(struct nvme_tcp_queue *queue)
+{
+ struct nvme_tcp_ctrl *ctrl = queue->ctrl;
+ int qid = nvme_tcp_queue_id(queue);
+
+ return !nvme_tcp_admin_queue(queue) &&
+ !nvme_tcp_default_queue(queue) &&
+ qid < 1 + ctrl->io_queues[HCTX_TYPE_DEFAULT] +
+ ctrl->io_queues[HCTX_TYPE_READ];
+}
+
+static bool nvme_tcp_poll_queue(struct nvme_tcp_queue *queue)
+{
+ struct nvme_tcp_ctrl *ctrl = queue->ctrl;
+ int qid = nvme_tcp_queue_id(queue);
+
+ return !nvme_tcp_admin_queue(queue) &&
+ !nvme_tcp_default_queue(queue) &&
+ !nvme_tcp_read_queue(queue) &&
+ qid < 1 + ctrl->io_queues[HCTX_TYPE_DEFAULT] +
+ ctrl->io_queues[HCTX_TYPE_READ] +
+ ctrl->io_queues[HCTX_TYPE_POLL];
+}
+
+static void nvme_tcp_set_queue_io_cpu(struct nvme_tcp_queue *queue)
+{
+ struct nvme_tcp_ctrl *ctrl = queue->ctrl;
+ int qid = nvme_tcp_queue_id(queue);
+ int n = 0;
+
+ if (nvme_tcp_default_queue(queue))
+ n = qid - 1;
+ else if (nvme_tcp_read_queue(queue))
+ n = qid - ctrl->io_queues[HCTX_TYPE_DEFAULT] - 1;
+ else if (nvme_tcp_poll_queue(queue))
+ n = qid - ctrl->io_queues[HCTX_TYPE_DEFAULT] -
+ ctrl->io_queues[HCTX_TYPE_READ] - 1;
+ queue->io_cpu = cpumask_next_wrap(n - 1, cpu_online_mask, -1, false);
+}
+
static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
int qid, size_t queue_size)
{
struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
struct nvme_tcp_queue *queue = &ctrl->queues[qid];
struct linger sol = { .l_onoff = 1, .l_linger = 0 };
- int ret, opt, rcv_pdu_size, n;
+ int ret, opt, rcv_pdu_size;
queue->ctrl = ctrl;
INIT_LIST_HEAD(&queue->send_list);
@@ -1309,6 +1371,17 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
goto err_sock;
}
+ if (so_priority > 0) {
+ ret = kernel_setsockopt(queue->sock, SOL_SOCKET, SO_PRIORITY,
+ (char *)&so_priority, sizeof(so_priority));
+ if (ret) {
+ dev_err(ctrl->ctrl.device,
+ "failed to set SO_PRIORITY sock opt, ret %d\n",
+ ret);
+ goto err_sock;
+ }
+ }
+
/* Set socket type of service */
if (nctrl->opts->tos >= 0) {
opt = nctrl->opts->tos;
@@ -1322,11 +1395,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
}
queue->sock->sk->sk_allocation = GFP_ATOMIC;
- if (!qid)
- n = 0;
- else
- n = (qid - 1) % num_online_cpus();
- queue->io_cpu = cpumask_next_wrap(n - 1, cpu_online_mask, -1, false);
+ nvme_tcp_set_queue_io_cpu(queue);
queue->request = NULL;
queue->data_remaining = 0;
queue->ddgst_remaining = 0;
@@ -1861,8 +1930,13 @@ static int nvme_tcp_setup_ctrl(struct nvme_ctrl *ctrl, bool new)
}
if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_LIVE)) {
- /* state change failure is ok if we're in DELETING state */
+ /*
+ * state change failure is ok if we're in DELETING state,
+ * unless we're during creation of a new controller to
+ * avoid races with teardown flow.
+ */
WARN_ON_ONCE(ctrl->state != NVME_CTRL_DELETING);
+ WARN_ON_ONCE(new);
ret = -EINVAL;
goto destroy_io;
}
@@ -2359,8 +2433,6 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISp\n",
ctrl->ctrl.opts->subsysnqn, &ctrl->addr);
- nvme_get_ctrl(&ctrl->ctrl);
-
mutex_lock(&nvme_tcp_ctrl_mutex);
list_add_tail(&ctrl->list, &nvme_tcp_ctrl_list);
mutex_unlock(&nvme_tcp_ctrl_mutex);
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index 72a7e41f3018..9d6f75cfa77c 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -6,6 +6,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
#include <linux/rculist.h>
+#include <linux/part_stat.h>
#include <generated/utsrelease.h>
#include <asm/unaligned.h>
@@ -322,12 +323,25 @@ static void nvmet_execute_get_log_page(struct nvmet_req *req)
nvmet_req_complete(req, NVME_SC_INVALID_FIELD | NVME_SC_DNR);
}
+static void nvmet_id_set_model_number(struct nvme_id_ctrl *id,
+ struct nvmet_subsys *subsys)
+{
+ const char *model = NVMET_DEFAULT_CTRL_MODEL;
+ struct nvmet_subsys_model *subsys_model;
+
+ rcu_read_lock();
+ subsys_model = rcu_dereference(subsys->model);
+ if (subsys_model)
+ model = subsys_model->number;
+ memcpy_and_pad(id->mn, sizeof(id->mn), model, strlen(model), ' ');
+ rcu_read_unlock();
+}
+
static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
{
struct nvmet_ctrl *ctrl = req->sq->ctrl;
struct nvme_id_ctrl *id;
u16 status = 0;
- const char model[] = "Linux";
id = kzalloc(sizeof(*id), GFP_KERNEL);
if (!id) {
@@ -342,7 +356,7 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
memset(id->sn, ' ', sizeof(id->sn));
bin2hex(id->sn, &ctrl->subsys->serial,
min(sizeof(ctrl->subsys->serial), sizeof(id->sn) / 2));
- memcpy_and_pad(id->mn, sizeof(id->mn), model, sizeof(model) - 1, ' ');
+ nvmet_id_set_model_number(id, ctrl->subsys);
memcpy_and_pad(id->fr, sizeof(id->fr),
UTS_RELEASE, strlen(UTS_RELEASE), ' ');
@@ -356,8 +370,12 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
/* we support multiple ports, multiples hosts and ANA: */
id->cmic = (1 << 0) | (1 << 1) | (1 << 3);
- /* no limit on data transfer sizes for now */
- id->mdts = 0;
+ /* Limit MDTS according to transport capability */
+ if (ctrl->ops->get_mdts)
+ id->mdts = ctrl->ops->get_mdts(ctrl);
+ else
+ id->mdts = 0;
+
id->cntlid = cpu_to_le16(ctrl->cntlid);
id->ver = cpu_to_le32(ctrl->subsys->ver);
@@ -720,13 +738,22 @@ static void nvmet_execute_set_features(struct nvmet_req *req)
{
struct nvmet_subsys *subsys = req->sq->ctrl->subsys;
u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10);
+ u32 cdw11 = le32_to_cpu(req->cmd->common.cdw11);
u16 status = 0;
+ u16 nsqr;
+ u16 ncqr;
if (!nvmet_check_data_len(req, 0))
return;
switch (cdw10 & 0xff) {
case NVME_FEAT_NUM_QUEUES:
+ ncqr = (cdw11 >> 16) & 0xffff;
+ nsqr = cdw11 & 0xffff;
+ if (ncqr == 0xffff || nsqr == 0xffff) {
+ status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ break;
+ }
nvmet_set_result(req,
(subsys->max_qid - 1) | ((subsys->max_qid - 1) << 16));
break;
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index 98613a45bd3b..7aa10788b7c8 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -395,14 +395,12 @@ static ssize_t nvmet_ns_device_uuid_store(struct config_item *item,
struct nvmet_subsys *subsys = ns->subsys;
int ret = 0;
-
mutex_lock(&subsys->lock);
if (ns->enabled) {
ret = -EBUSY;
goto out_unlock;
}
-
if (uuid_parse(page, &ns->uuid))
ret = -EINVAL;
@@ -815,10 +813,10 @@ static ssize_t nvmet_subsys_attr_version_show(struct config_item *item,
(int)NVME_MAJOR(subsys->ver),
(int)NVME_MINOR(subsys->ver),
(int)NVME_TERTIARY(subsys->ver));
- else
- return snprintf(page, PAGE_SIZE, "%d.%d\n",
- (int)NVME_MAJOR(subsys->ver),
- (int)NVME_MINOR(subsys->ver));
+
+ return snprintf(page, PAGE_SIZE, "%d.%d\n",
+ (int)NVME_MAJOR(subsys->ver),
+ (int)NVME_MINOR(subsys->ver));
}
static ssize_t nvmet_subsys_attr_version_store(struct config_item *item,
@@ -828,7 +826,6 @@ static ssize_t nvmet_subsys_attr_version_store(struct config_item *item,
int major, minor, tertiary = 0;
int ret;
-
ret = sscanf(page, "%d.%d.%d\n", &major, &minor, &tertiary);
if (ret != 2 && ret != 3)
return -EINVAL;
@@ -852,20 +849,151 @@ static ssize_t nvmet_subsys_attr_serial_show(struct config_item *item,
static ssize_t nvmet_subsys_attr_serial_store(struct config_item *item,
const char *page, size_t count)
{
- struct nvmet_subsys *subsys = to_subsys(item);
+ u64 serial;
+
+ if (sscanf(page, "%llx\n", &serial) != 1)
+ return -EINVAL;
down_write(&nvmet_config_sem);
- sscanf(page, "%llx\n", &subsys->serial);
+ to_subsys(item)->serial = serial;
up_write(&nvmet_config_sem);
return count;
}
CONFIGFS_ATTR(nvmet_subsys_, attr_serial);
+static ssize_t nvmet_subsys_attr_cntlid_min_show(struct config_item *item,
+ char *page)
+{
+ return snprintf(page, PAGE_SIZE, "%u\n", to_subsys(item)->cntlid_min);
+}
+
+static ssize_t nvmet_subsys_attr_cntlid_min_store(struct config_item *item,
+ const char *page, size_t cnt)
+{
+ u16 cntlid_min;
+
+ if (sscanf(page, "%hu\n", &cntlid_min) != 1)
+ return -EINVAL;
+
+ if (cntlid_min == 0)
+ return -EINVAL;
+
+ down_write(&nvmet_config_sem);
+ if (cntlid_min >= to_subsys(item)->cntlid_max)
+ goto out_unlock;
+ to_subsys(item)->cntlid_min = cntlid_min;
+ up_write(&nvmet_config_sem);
+ return cnt;
+
+out_unlock:
+ up_write(&nvmet_config_sem);
+ return -EINVAL;
+}
+CONFIGFS_ATTR(nvmet_subsys_, attr_cntlid_min);
+
+static ssize_t nvmet_subsys_attr_cntlid_max_show(struct config_item *item,
+ char *page)
+{
+ return snprintf(page, PAGE_SIZE, "%u\n", to_subsys(item)->cntlid_max);
+}
+
+static ssize_t nvmet_subsys_attr_cntlid_max_store(struct config_item *item,
+ const char *page, size_t cnt)
+{
+ u16 cntlid_max;
+
+ if (sscanf(page, "%hu\n", &cntlid_max) != 1)
+ return -EINVAL;
+
+ if (cntlid_max == 0)
+ return -EINVAL;
+
+ down_write(&nvmet_config_sem);
+ if (cntlid_max <= to_subsys(item)->cntlid_min)
+ goto out_unlock;
+ to_subsys(item)->cntlid_max = cntlid_max;
+ up_write(&nvmet_config_sem);
+ return cnt;
+
+out_unlock:
+ up_write(&nvmet_config_sem);
+ return -EINVAL;
+}
+CONFIGFS_ATTR(nvmet_subsys_, attr_cntlid_max);
+
+static ssize_t nvmet_subsys_attr_model_show(struct config_item *item,
+ char *page)
+{
+ struct nvmet_subsys *subsys = to_subsys(item);
+ struct nvmet_subsys_model *subsys_model;
+ char *model = NVMET_DEFAULT_CTRL_MODEL;
+ int ret;
+
+ rcu_read_lock();
+ subsys_model = rcu_dereference(subsys->model);
+ if (subsys_model)
+ model = subsys_model->number;
+ ret = snprintf(page, PAGE_SIZE, "%s\n", model);
+ rcu_read_unlock();
+
+ return ret;
+}
+
+/* See Section 1.5 of NVMe 1.4 */
+static bool nvmet_is_ascii(const char c)
+{
+ return c >= 0x20 && c <= 0x7e;
+}
+
+static ssize_t nvmet_subsys_attr_model_store(struct config_item *item,
+ const char *page, size_t count)
+{
+ struct nvmet_subsys *subsys = to_subsys(item);
+ struct nvmet_subsys_model *new_model;
+ char *new_model_number;
+ int pos = 0, len;
+
+ len = strcspn(page, "\n");
+ if (!len)
+ return -EINVAL;
+
+ for (pos = 0; pos < len; pos++) {
+ if (!nvmet_is_ascii(page[pos]))
+ return -EINVAL;
+ }
+
+ new_model_number = kstrndup(page, len, GFP_KERNEL);
+ if (!new_model_number)
+ return -ENOMEM;
+
+ new_model = kzalloc(sizeof(*new_model) + len + 1, GFP_KERNEL);
+ if (!new_model) {
+ kfree(new_model_number);
+ return -ENOMEM;
+ }
+ memcpy(new_model->number, new_model_number, len);
+
+ down_write(&nvmet_config_sem);
+ mutex_lock(&subsys->lock);
+ new_model = rcu_replace_pointer(subsys->model, new_model,
+ mutex_is_locked(&subsys->lock));
+ mutex_unlock(&subsys->lock);
+ up_write(&nvmet_config_sem);
+
+ kfree_rcu(new_model, rcuhead);
+
+ return count;
+}
+CONFIGFS_ATTR(nvmet_subsys_, attr_model);
+
static struct configfs_attribute *nvmet_subsys_attrs[] = {
&nvmet_subsys_attr_attr_allow_any_host,
&nvmet_subsys_attr_attr_version,
&nvmet_subsys_attr_attr_serial,
+ &nvmet_subsys_attr_attr_cntlid_min,
+ &nvmet_subsys_attr_attr_cntlid_max,
+ &nvmet_subsys_attr_attr_model,
NULL,
};
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 576de773b4db..b685f99d56a1 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -1289,8 +1289,11 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
if (!ctrl->sqs)
goto out_free_cqs;
+ if (subsys->cntlid_min > subsys->cntlid_max)
+ goto out_free_cqs;
+
ret = ida_simple_get(&cntlid_ida,
- NVME_CNTLID_MIN, NVME_CNTLID_MAX,
+ subsys->cntlid_min, subsys->cntlid_max,
GFP_KERNEL);
if (ret < 0) {
status = NVME_SC_CONNECT_CTRL_BUSY | NVME_SC_DNR;
@@ -1438,7 +1441,8 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
kfree(subsys);
return ERR_PTR(-ENOMEM);
}
-
+ subsys->cntlid_min = NVME_CNTLID_MIN;
+ subsys->cntlid_max = NVME_CNTLID_MAX;
kref_init(&subsys->ref);
mutex_init(&subsys->lock);
@@ -1457,6 +1461,7 @@ static void nvmet_subsys_free(struct kref *ref)
WARN_ON_ONCE(!list_empty(&subsys->namespaces));
kfree(subsys->subsysnqn);
+ kfree_rcu(subsys->model, rcuhead);
kfree(subsys);
}
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 4df4ebde208a..0d54e730cbf2 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -485,7 +485,6 @@ out_destroy_admin:
out_disable:
dev_warn(ctrl->ctrl.device, "Removing after reset failure\n");
nvme_uninit_ctrl(&ctrl->ctrl);
- nvme_put_ctrl(&ctrl->ctrl);
}
static const struct nvme_ctrl_ops nvme_loop_ctrl_ops = {
@@ -618,8 +617,6 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev,
dev_info(ctrl->ctrl.device,
"new ctrl: \"%s\"\n", ctrl->ctrl.opts->subsysnqn);
- nvme_get_ctrl(&ctrl->ctrl);
-
changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
WARN_ON_ONCE(!changed);
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index eda28b22a2c8..421dff3ea143 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -23,6 +23,7 @@
#define NVMET_ASYNC_EVENTS 4
#define NVMET_ERROR_LOG_SLOTS 128
#define NVMET_NO_ERROR_LOC ((u16)-1)
+#define NVMET_DEFAULT_CTRL_MODEL "Linux"
/*
* Supported optional AENs:
@@ -202,6 +203,11 @@ struct nvmet_ctrl {
struct nvme_error_slot slots[NVMET_ERROR_LOG_SLOTS];
};
+struct nvmet_subsys_model {
+ struct rcu_head rcuhead;
+ char number[];
+};
+
struct nvmet_subsys {
enum nvme_subsys_type type;
@@ -211,6 +217,8 @@ struct nvmet_subsys {
struct list_head namespaces;
unsigned int nr_namespaces;
unsigned int max_nsid;
+ u16 cntlid_min;
+ u16 cntlid_max;
struct list_head ctrls;
@@ -227,6 +235,8 @@ struct nvmet_subsys {
struct config_group namespaces_group;
struct config_group allowed_hosts_group;
+
+ struct nvmet_subsys_model __rcu *model;
};
static inline struct nvmet_subsys *to_subsys(struct config_item *item)
@@ -279,6 +289,7 @@ struct nvmet_fabrics_ops {
struct nvmet_port *port, char *traddr);
u16 (*install_queue)(struct nvmet_sq *nvme_sq);
void (*discovery_chg)(struct nvmet_port *port);
+ u8 (*get_mdts)(const struct nvmet_ctrl *ctrl);
};
#define NVMET_MAX_INLINE_BIOVEC 8
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 37d262a65877..9e1b8c61f54e 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -31,6 +31,9 @@
#define NVMET_RDMA_MAX_INLINE_SGE 4
#define NVMET_RDMA_MAX_INLINE_DATA_SIZE max_t(int, SZ_16K, PAGE_SIZE)
+/* Assume mpsmin == device_page_size == 4KB */
+#define NVMET_RDMA_MAX_MDTS 8
+
struct nvmet_rdma_cmd {
struct ib_sge sge[NVMET_RDMA_MAX_INLINE_SGE + 1];
struct ib_cqe cqe;
@@ -975,7 +978,7 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue)
{
struct ib_qp_init_attr qp_attr;
struct nvmet_rdma_device *ndev = queue->dev;
- int comp_vector, nr_cqe, ret, i;
+ int comp_vector, nr_cqe, ret, i, factor;
/*
* Spread the io queues across completion vectors,
@@ -1008,7 +1011,9 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue)
qp_attr.qp_type = IB_QPT_RC;
/* +1 for drain */
qp_attr.cap.max_send_wr = queue->send_queue_size + 1;
- qp_attr.cap.max_rdma_ctxs = queue->send_queue_size;
+ factor = rdma_rw_mr_factor(ndev->device, queue->cm_id->port_num,
+ 1 << NVMET_RDMA_MAX_MDTS);
+ qp_attr.cap.max_rdma_ctxs = queue->send_queue_size * factor;
qp_attr.cap.max_send_sge = max(ndev->device->attrs.max_sge_rd,
ndev->device->attrs.max_send_sge);
@@ -1602,6 +1607,11 @@ static void nvmet_rdma_disc_port_addr(struct nvmet_req *req,
}
}
+static u8 nvmet_rdma_get_mdts(const struct nvmet_ctrl *ctrl)
+{
+ return NVMET_RDMA_MAX_MDTS;
+}
+
static const struct nvmet_fabrics_ops nvmet_rdma_ops = {
.owner = THIS_MODULE,
.type = NVMF_TRTYPE_RDMA,
@@ -1612,6 +1622,7 @@ static const struct nvmet_fabrics_ops nvmet_rdma_ops = {
.queue_response = nvmet_rdma_queue_response,
.delete_ctrl = nvmet_rdma_delete_ctrl,
.disc_traddr = nvmet_rdma_disc_port_addr,
+ .get_mdts = nvmet_rdma_get_mdts,
};
static void nvmet_rdma_remove_one(struct ib_device *ib_device, void *client_data)
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index af674fc0bb1e..f0da04e960f4 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -19,6 +19,16 @@
#define NVMET_TCP_DEF_INLINE_DATA_SIZE (4 * PAGE_SIZE)
+/* Define the socket priority to use for connections were it is desirable
+ * that the NIC consider performing optimized packet processing or filtering.
+ * A non-zero value being sufficient to indicate general consideration of any
+ * possible optimization. Making it a module param allows for alternative
+ * values that may be unique for some NIC implementations.
+ */
+static int so_priority;
+module_param(so_priority, int, 0644);
+MODULE_PARM_DESC(so_priority, "nvmet tcp socket optimize priority");
+
#define NVMET_TCP_RECV_BUDGET 8
#define NVMET_TCP_SEND_BUDGET 8
#define NVMET_TCP_IO_WORK_BUDGET 64
@@ -515,7 +525,7 @@ static int nvmet_try_send_data_pdu(struct nvmet_tcp_cmd *cmd)
return 1;
}
-static int nvmet_try_send_data(struct nvmet_tcp_cmd *cmd)
+static int nvmet_try_send_data(struct nvmet_tcp_cmd *cmd, bool last_in_batch)
{
struct nvmet_tcp_queue *queue = cmd->queue;
int ret;
@@ -523,9 +533,15 @@ static int nvmet_try_send_data(struct nvmet_tcp_cmd *cmd)
while (cmd->cur_sg) {
struct page *page = sg_page(cmd->cur_sg);
u32 left = cmd->cur_sg->length - cmd->offset;
+ int flags = MSG_DONTWAIT;
+
+ if ((!last_in_batch && cmd->queue->send_list_len) ||
+ cmd->wbytes_done + left < cmd->req.transfer_len ||
+ queue->data_digest || !queue->nvme_sq.sqhd_disabled)
+ flags |= MSG_MORE;
ret = kernel_sendpage(cmd->queue->sock, page, cmd->offset,
- left, MSG_DONTWAIT | MSG_MORE);
+ left, flags);
if (ret <= 0)
return ret;
@@ -616,7 +632,7 @@ static int nvmet_try_send_r2t(struct nvmet_tcp_cmd *cmd, bool last_in_batch)
return 1;
}
-static int nvmet_try_send_ddgst(struct nvmet_tcp_cmd *cmd)
+static int nvmet_try_send_ddgst(struct nvmet_tcp_cmd *cmd, bool last_in_batch)
{
struct nvmet_tcp_queue *queue = cmd->queue;
struct msghdr msg = { .msg_flags = MSG_DONTWAIT };
@@ -626,6 +642,9 @@ static int nvmet_try_send_ddgst(struct nvmet_tcp_cmd *cmd)
};
int ret;
+ if (!last_in_batch && cmd->queue->send_list_len)
+ msg.msg_flags |= MSG_MORE;
+
ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len);
if (unlikely(ret <= 0))
return ret;
@@ -660,13 +679,13 @@ static int nvmet_tcp_try_send_one(struct nvmet_tcp_queue *queue,
}
if (cmd->state == NVMET_TCP_SEND_DATA) {
- ret = nvmet_try_send_data(cmd);
+ ret = nvmet_try_send_data(cmd, last_in_batch);
if (ret <= 0)
goto done_send;
}
if (cmd->state == NVMET_TCP_SEND_DDGST) {
- ret = nvmet_try_send_ddgst(cmd);
+ ret = nvmet_try_send_ddgst(cmd, last_in_batch);
if (ret <= 0)
goto done_send;
}
@@ -788,7 +807,7 @@ static int nvmet_tcp_handle_icreq(struct nvmet_tcp_queue *queue)
icresp->hdr.pdo = 0;
icresp->hdr.plen = cpu_to_le32(icresp->hdr.hlen);
icresp->pfv = cpu_to_le16(NVME_TCP_PFV_1_0);
- icresp->maxdata = cpu_to_le32(0xffff); /* FIXME: support r2t */
+ icresp->maxdata = cpu_to_le32(0x400000); /* 16M arbitrary limit */
icresp->cpda = 0;
if (queue->hdr_digest)
icresp->digest |= NVME_TCP_HDR_DIGEST_ENABLE;
@@ -1433,6 +1452,13 @@ static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue)
if (ret)
return ret;
+ if (so_priority > 0) {
+ ret = kernel_setsockopt(sock, SOL_SOCKET, SO_PRIORITY,
+ (char *)&so_priority, sizeof(so_priority));
+ if (ret)
+ return ret;
+ }
+
/* Set socket type of service */
if (inet->rcv_tos > 0) {
int tos = inet->rcv_tos;
@@ -1622,6 +1648,15 @@ static int nvmet_tcp_add_port(struct nvmet_port *nport)
goto err_sock;
}
+ if (so_priority > 0) {
+ ret = kernel_setsockopt(port->sock, SOL_SOCKET, SO_PRIORITY,
+ (char *)&so_priority, sizeof(so_priority));
+ if (ret) {
+ pr_err("failed to set SO_PRIORITY sock opt %d\n", ret);
+ goto err_sock;
+ }
+ }
+
ret = kernel_bind(port->sock, (struct sockaddr *)&port->addr,
sizeof(port->addr));
if (ret) {
diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c
index 8270bbf505fb..9f982c0627a0 100644
--- a/drivers/of/of_mdio.c
+++ b/drivers/of/of_mdio.c
@@ -306,6 +306,7 @@ int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np)
rc = of_mdiobus_register_phy(mdio, child, addr);
if (rc && rc != -ENODEV)
goto unregister;
+ break;
}
}
}
diff --git a/drivers/pinctrl/cirrus/pinctrl-madera-core.c b/drivers/pinctrl/cirrus/pinctrl-madera-core.c
index 7b6409ef553c..dce2626384a9 100644
--- a/drivers/pinctrl/cirrus/pinctrl-madera-core.c
+++ b/drivers/pinctrl/cirrus/pinctrl-madera-core.c
@@ -1073,13 +1073,26 @@ static int madera_pin_probe(struct platform_device *pdev)
return ret;
}
+ platform_set_drvdata(pdev, priv);
+
dev_dbg(priv->dev, "pinctrl probed ok\n");
return 0;
}
+static int madera_pin_remove(struct platform_device *pdev)
+{
+ struct madera_pin_private *priv = platform_get_drvdata(pdev);
+
+ if (priv->madera->pdata.gpio_configs)
+ pinctrl_unregister_mappings(priv->madera->pdata.gpio_configs);
+
+ return 0;
+}
+
static struct platform_driver madera_pin_driver = {
.probe = madera_pin_probe,
+ .remove = madera_pin_remove,
.driver = {
.name = "madera-pinctrl",
},
diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c
index 446d84fe0e31..f23c55e22195 100644
--- a/drivers/pinctrl/core.c
+++ b/drivers/pinctrl/core.c
@@ -2021,7 +2021,6 @@ static int pinctrl_claim_hogs(struct pinctrl_dev *pctldev)
return PTR_ERR(pctldev->p);
}
- kref_get(&pctldev->p->users);
pctldev->hog_default =
pinctrl_lookup_state(pctldev->p, PINCTRL_STATE_DEFAULT);
if (IS_ERR(pctldev->hog_default)) {
diff --git a/drivers/pinctrl/freescale/pinctrl-scu.c b/drivers/pinctrl/freescale/pinctrl-scu.c
index 73bf1d9f9cc6..23cf04bdfc55 100644
--- a/drivers/pinctrl/freescale/pinctrl-scu.c
+++ b/drivers/pinctrl/freescale/pinctrl-scu.c
@@ -23,12 +23,12 @@ struct imx_sc_msg_req_pad_set {
struct imx_sc_rpc_msg hdr;
u32 val;
u16 pad;
-} __packed;
+} __packed __aligned(4);
struct imx_sc_msg_req_pad_get {
struct imx_sc_rpc_msg hdr;
u16 pad;
-} __packed;
+} __packed __aligned(4);
struct imx_sc_msg_resp_pad_get {
struct imx_sc_rpc_msg hdr;
diff --git a/drivers/pinctrl/meson/pinctrl-meson-gxl.c b/drivers/pinctrl/meson/pinctrl-meson-gxl.c
index 1b6e8646700f..2ac921c83da9 100644
--- a/drivers/pinctrl/meson/pinctrl-meson-gxl.c
+++ b/drivers/pinctrl/meson/pinctrl-meson-gxl.c
@@ -147,8 +147,8 @@ static const unsigned int sdio_d0_pins[] = { GPIOX_0 };
static const unsigned int sdio_d1_pins[] = { GPIOX_1 };
static const unsigned int sdio_d2_pins[] = { GPIOX_2 };
static const unsigned int sdio_d3_pins[] = { GPIOX_3 };
-static const unsigned int sdio_cmd_pins[] = { GPIOX_4 };
-static const unsigned int sdio_clk_pins[] = { GPIOX_5 };
+static const unsigned int sdio_clk_pins[] = { GPIOX_4 };
+static const unsigned int sdio_cmd_pins[] = { GPIOX_5 };
static const unsigned int sdio_irq_pins[] = { GPIOX_7 };
static const unsigned int nand_ce0_pins[] = { BOOT_8 };
diff --git a/drivers/pinctrl/pinctrl-falcon.c b/drivers/pinctrl/pinctrl-falcon.c
index a454f57c264e..62c02b969327 100644
--- a/drivers/pinctrl/pinctrl-falcon.c
+++ b/drivers/pinctrl/pinctrl-falcon.c
@@ -451,7 +451,7 @@ static int pinctrl_falcon_probe(struct platform_device *pdev)
falcon_info.clk[*bank] = clk_get(&ppdev->dev, NULL);
if (IS_ERR(falcon_info.clk[*bank])) {
dev_err(&ppdev->dev, "failed to get clock\n");
- of_node_put(np)
+ of_node_put(np);
return PTR_ERR(falcon_info.clk[*bank]);
}
falcon_info.membase[*bank] = devm_ioremap_resource(&pdev->dev,
diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c b/drivers/pinctrl/qcom/pinctrl-msm.c
index 9a8daa256a32..1a948c3f54b7 100644
--- a/drivers/pinctrl/qcom/pinctrl-msm.c
+++ b/drivers/pinctrl/qcom/pinctrl-msm.c
@@ -1104,7 +1104,6 @@ static int msm_gpio_init(struct msm_pinctrl *pctrl)
pctrl->irq_chip.irq_mask = msm_gpio_irq_mask;
pctrl->irq_chip.irq_unmask = msm_gpio_irq_unmask;
pctrl->irq_chip.irq_ack = msm_gpio_irq_ack;
- pctrl->irq_chip.irq_eoi = irq_chip_eoi_parent;
pctrl->irq_chip.irq_set_type = msm_gpio_irq_set_type;
pctrl->irq_chip.irq_set_wake = msm_gpio_irq_set_wake;
pctrl->irq_chip.irq_request_resources = msm_gpio_irq_reqres;
@@ -1118,7 +1117,7 @@ static int msm_gpio_init(struct msm_pinctrl *pctrl)
if (!chip->irq.parent_domain)
return -EPROBE_DEFER;
chip->irq.child_to_parent_hwirq = msm_gpio_wakeirq;
-
+ pctrl->irq_chip.irq_eoi = irq_chip_eoi_parent;
/*
* Let's skip handling the GPIOs, if the parent irqchip
* is handling the direct connect IRQ of the GPIO.
diff --git a/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c b/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c
index fba1d41d20ec..338a15d08629 100644
--- a/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c
+++ b/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c
@@ -794,7 +794,7 @@ static int pm8xxx_gpio_probe(struct platform_device *pdev)
girq->fwnode = of_node_to_fwnode(pctrl->dev->of_node);
girq->parent_domain = parent_domain;
girq->child_to_parent_hwirq = pm8xxx_child_to_parent_hwirq;
- girq->populate_parent_alloc_arg = gpiochip_populate_parent_fwspec_fourcell;
+ girq->populate_parent_alloc_arg = gpiochip_populate_parent_fwspec_twocell;
girq->child_offset_to_irq = pm8xxx_child_offset_to_irq;
girq->child_irq_domain_ops.translate = pm8xxx_domain_translate;
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 34c8b6c7e095..8e503881d9d6 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -327,6 +327,7 @@ config RTC_DRV_MAX6900
config RTC_DRV_MAX8907
tristate "Maxim MAX8907"
depends on MFD_MAX8907 || COMPILE_TEST
+ select REGMAP_IRQ
help
If you say yes here you will get support for the
RTC of Maxim MAX8907 PMIC.
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index 6cca72782af6..cf87eb27879f 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -178,6 +178,8 @@ struct dasd_block *dasd_alloc_block(void)
(unsigned long) block);
INIT_LIST_HEAD(&block->ccw_queue);
spin_lock_init(&block->queue_lock);
+ INIT_LIST_HEAD(&block->format_list);
+ spin_lock_init(&block->format_lock);
timer_setup(&block->timer, dasd_block_timeout, 0);
spin_lock_init(&block->profile.lock);
@@ -1779,20 +1781,26 @@ void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm,
if (dasd_ese_needs_format(cqr->block, irb)) {
if (rq_data_dir((struct request *)cqr->callback_data) == READ) {
- device->discipline->ese_read(cqr);
+ device->discipline->ese_read(cqr, irb);
cqr->status = DASD_CQR_SUCCESS;
cqr->stopclk = now;
dasd_device_clear_timer(device);
dasd_schedule_device_bh(device);
return;
}
- fcqr = device->discipline->ese_format(device, cqr);
+ fcqr = device->discipline->ese_format(device, cqr, irb);
if (IS_ERR(fcqr)) {
+ if (PTR_ERR(fcqr) == -EINVAL) {
+ cqr->status = DASD_CQR_ERROR;
+ return;
+ }
/*
* If we can't format now, let the request go
* one extra round. Maybe we can format later.
*/
cqr->status = DASD_CQR_QUEUED;
+ dasd_schedule_device_bh(device);
+ return;
} else {
fcqr->status = DASD_CQR_QUEUED;
cqr->status = DASD_CQR_QUEUED;
@@ -2748,11 +2756,13 @@ static void __dasd_cleanup_cqr(struct dasd_ccw_req *cqr)
{
struct request *req;
blk_status_t error = BLK_STS_OK;
+ unsigned int proc_bytes;
int status;
req = (struct request *) cqr->callback_data;
dasd_profile_end(cqr->block, cqr, req);
+ proc_bytes = cqr->proc_bytes;
status = cqr->block->base->discipline->free_cp(cqr, req);
if (status < 0)
error = errno_to_blk_status(status);
@@ -2783,7 +2793,18 @@ static void __dasd_cleanup_cqr(struct dasd_ccw_req *cqr)
blk_mq_end_request(req, error);
blk_mq_run_hw_queues(req->q, true);
} else {
- blk_mq_complete_request(req);
+ /*
+ * Partial completed requests can happen with ESE devices.
+ * During read we might have gotten a NRF error and have to
+ * complete a request partially.
+ */
+ if (proc_bytes) {
+ blk_update_request(req, BLK_STS_OK,
+ blk_rq_bytes(req) - proc_bytes);
+ blk_mq_requeue_request(req, true);
+ } else {
+ blk_mq_complete_request(req);
+ }
}
}
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index a28b9ff82378..ad44d22e8859 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -207,6 +207,45 @@ static void set_ch_t(struct ch_t *geo, __u32 cyl, __u8 head)
geo->head |= head;
}
+/*
+ * calculate failing track from sense data depending if
+ * it is an EAV device or not
+ */
+static int dasd_eckd_track_from_irb(struct irb *irb, struct dasd_device *device,
+ sector_t *track)
+{
+ struct dasd_eckd_private *private = device->private;
+ u8 *sense = NULL;
+ u32 cyl;
+ u8 head;
+
+ sense = dasd_get_sense(irb);
+ if (!sense) {
+ DBF_DEV_EVENT(DBF_WARNING, device, "%s",
+ "ESE error no sense data\n");
+ return -EINVAL;
+ }
+ if (!(sense[27] & DASD_SENSE_BIT_2)) {
+ DBF_DEV_EVENT(DBF_WARNING, device, "%s",
+ "ESE error no valid track data\n");
+ return -EINVAL;
+ }
+
+ if (sense[27] & DASD_SENSE_BIT_3) {
+ /* enhanced addressing */
+ cyl = sense[30] << 20;
+ cyl |= (sense[31] & 0xF0) << 12;
+ cyl |= sense[28] << 8;
+ cyl |= sense[29];
+ } else {
+ cyl = sense[29] << 8;
+ cyl |= sense[30];
+ }
+ head = sense[31] & 0x0F;
+ *track = cyl * private->rdc_data.trk_per_cyl + head;
+ return 0;
+}
+
static int set_timestamp(struct ccw1 *ccw, struct DE_eckd_data *data,
struct dasd_device *device)
{
@@ -2986,6 +3025,37 @@ static int dasd_eckd_format_device(struct dasd_device *base,
0, NULL);
}
+static bool test_and_set_format_track(struct dasd_format_entry *to_format,
+ struct dasd_block *block)
+{
+ struct dasd_format_entry *format;
+ unsigned long flags;
+ bool rc = false;
+
+ spin_lock_irqsave(&block->format_lock, flags);
+ list_for_each_entry(format, &block->format_list, list) {
+ if (format->track == to_format->track) {
+ rc = true;
+ goto out;
+ }
+ }
+ list_add_tail(&to_format->list, &block->format_list);
+
+out:
+ spin_unlock_irqrestore(&block->format_lock, flags);
+ return rc;
+}
+
+static void clear_format_track(struct dasd_format_entry *format,
+ struct dasd_block *block)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&block->format_lock, flags);
+ list_del_init(&format->list);
+ spin_unlock_irqrestore(&block->format_lock, flags);
+}
+
/*
* Callback function to free ESE format requests.
*/
@@ -2993,15 +3063,19 @@ static void dasd_eckd_ese_format_cb(struct dasd_ccw_req *cqr, void *data)
{
struct dasd_device *device = cqr->startdev;
struct dasd_eckd_private *private = device->private;
+ struct dasd_format_entry *format = data;
+ clear_format_track(format, cqr->basedev->block);
private->count--;
dasd_ffree_request(cqr, device);
}
static struct dasd_ccw_req *
-dasd_eckd_ese_format(struct dasd_device *startdev, struct dasd_ccw_req *cqr)
+dasd_eckd_ese_format(struct dasd_device *startdev, struct dasd_ccw_req *cqr,
+ struct irb *irb)
{
struct dasd_eckd_private *private;
+ struct dasd_format_entry *format;
struct format_data_t fdata;
unsigned int recs_per_trk;
struct dasd_ccw_req *fcqr;
@@ -3011,23 +3085,39 @@ dasd_eckd_ese_format(struct dasd_device *startdev, struct dasd_ccw_req *cqr)
struct request *req;
sector_t first_trk;
sector_t last_trk;
+ sector_t curr_trk;
int rc;
req = cqr->callback_data;
- base = cqr->block->base;
+ block = cqr->block;
+ base = block->base;
private = base->private;
- block = base->block;
blksize = block->bp_block;
recs_per_trk = recs_per_track(&private->rdc_data, 0, blksize);
+ format = &startdev->format_entry;
first_trk = blk_rq_pos(req) >> block->s2b_shift;
sector_div(first_trk, recs_per_trk);
last_trk =
(blk_rq_pos(req) + blk_rq_sectors(req) - 1) >> block->s2b_shift;
sector_div(last_trk, recs_per_trk);
+ rc = dasd_eckd_track_from_irb(irb, base, &curr_trk);
+ if (rc)
+ return ERR_PTR(rc);
- fdata.start_unit = first_trk;
- fdata.stop_unit = last_trk;
+ if (curr_trk < first_trk || curr_trk > last_trk) {
+ DBF_DEV_EVENT(DBF_WARNING, startdev,
+ "ESE error track %llu not within range %llu - %llu\n",
+ curr_trk, first_trk, last_trk);
+ return ERR_PTR(-EINVAL);
+ }
+ format->track = curr_trk;
+ /* test if track is already in formatting by another thread */
+ if (test_and_set_format_track(format, block))
+ return ERR_PTR(-EEXIST);
+
+ fdata.start_unit = curr_trk;
+ fdata.stop_unit = curr_trk;
fdata.blksize = blksize;
fdata.intensity = private->uses_cdl ? DASD_FMT_INT_COMPAT : 0;
@@ -3044,6 +3134,7 @@ dasd_eckd_ese_format(struct dasd_device *startdev, struct dasd_ccw_req *cqr)
return fcqr;
fcqr->callback = dasd_eckd_ese_format_cb;
+ fcqr->callback_data = (void *) format;
return fcqr;
}
@@ -3051,29 +3142,87 @@ dasd_eckd_ese_format(struct dasd_device *startdev, struct dasd_ccw_req *cqr)
/*
* When data is read from an unformatted area of an ESE volume, this function
* returns zeroed data and thereby mimics a read of zero data.
+ *
+ * The first unformatted track is the one that got the NRF error, the address is
+ * encoded in the sense data.
+ *
+ * All tracks before have returned valid data and should not be touched.
+ * All tracks after the unformatted track might be formatted or not. This is
+ * currently not known, remember the processed data and return the remainder of
+ * the request to the blocklayer in __dasd_cleanup_cqr().
*/
-static void dasd_eckd_ese_read(struct dasd_ccw_req *cqr)
+static int dasd_eckd_ese_read(struct dasd_ccw_req *cqr, struct irb *irb)
{
+ struct dasd_eckd_private *private;
+ sector_t first_trk, last_trk;
+ sector_t first_blk, last_blk;
unsigned int blksize, off;
+ unsigned int recs_per_trk;
struct dasd_device *base;
struct req_iterator iter;
+ struct dasd_block *block;
+ unsigned int skip_block;
+ unsigned int blk_count;
struct request *req;
struct bio_vec bv;
+ sector_t curr_trk;
+ sector_t end_blk;
char *dst;
+ int rc;
req = (struct request *) cqr->callback_data;
base = cqr->block->base;
blksize = base->block->bp_block;
+ block = cqr->block;
+ private = base->private;
+ skip_block = 0;
+ blk_count = 0;
+
+ recs_per_trk = recs_per_track(&private->rdc_data, 0, blksize);
+ first_trk = first_blk = blk_rq_pos(req) >> block->s2b_shift;
+ sector_div(first_trk, recs_per_trk);
+ last_trk = last_blk =
+ (blk_rq_pos(req) + blk_rq_sectors(req) - 1) >> block->s2b_shift;
+ sector_div(last_trk, recs_per_trk);
+ rc = dasd_eckd_track_from_irb(irb, base, &curr_trk);
+ if (rc)
+ return rc;
+
+ /* sanity check if the current track from sense data is valid */
+ if (curr_trk < first_trk || curr_trk > last_trk) {
+ DBF_DEV_EVENT(DBF_WARNING, base,
+ "ESE error track %llu not within range %llu - %llu\n",
+ curr_trk, first_trk, last_trk);
+ return -EINVAL;
+ }
+
+ /*
+ * if not the first track got the NRF error we have to skip over valid
+ * blocks
+ */
+ if (curr_trk != first_trk)
+ skip_block = curr_trk * recs_per_trk - first_blk;
+
+ /* we have no information beyond the current track */
+ end_blk = (curr_trk + 1) * recs_per_trk;
rq_for_each_segment(bv, req, iter) {
dst = page_address(bv.bv_page) + bv.bv_offset;
for (off = 0; off < bv.bv_len; off += blksize) {
- if (dst && rq_data_dir(req) == READ) {
+ if (first_blk + blk_count >= end_blk) {
+ cqr->proc_bytes = blk_count * blksize;
+ return 0;
+ }
+ if (dst && !skip_block) {
dst += off;
memset(dst, 0, blksize);
+ } else {
+ skip_block--;
}
+ blk_count++;
}
}
+ return 0;
}
/*
diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h
index 91c9f9586e0f..fa552f9f1666 100644
--- a/drivers/s390/block/dasd_int.h
+++ b/drivers/s390/block/dasd_int.h
@@ -187,6 +187,7 @@ struct dasd_ccw_req {
void (*callback)(struct dasd_ccw_req *, void *data);
void *callback_data;
+ unsigned int proc_bytes; /* bytes for partial completion */
};
/*
@@ -387,8 +388,9 @@ struct dasd_discipline {
int (*ext_pool_warn_thrshld)(struct dasd_device *);
int (*ext_pool_oos)(struct dasd_device *);
int (*ext_pool_exhaust)(struct dasd_device *, struct dasd_ccw_req *);
- struct dasd_ccw_req *(*ese_format)(struct dasd_device *, struct dasd_ccw_req *);
- void (*ese_read)(struct dasd_ccw_req *);
+ struct dasd_ccw_req *(*ese_format)(struct dasd_device *,
+ struct dasd_ccw_req *, struct irb *);
+ int (*ese_read)(struct dasd_ccw_req *, struct irb *);
};
extern struct dasd_discipline *dasd_diag_discipline_pointer;
@@ -474,6 +476,11 @@ struct dasd_profile {
spinlock_t lock;
};
+struct dasd_format_entry {
+ struct list_head list;
+ sector_t track;
+};
+
struct dasd_device {
/* Block device stuff. */
struct dasd_block *block;
@@ -539,6 +546,7 @@ struct dasd_device {
struct dentry *debugfs_dentry;
struct dentry *hosts_dentry;
struct dasd_profile profile;
+ struct dasd_format_entry format_entry;
};
struct dasd_block {
@@ -564,6 +572,9 @@ struct dasd_block {
struct dentry *debugfs_dentry;
struct dasd_profile profile;
+
+ struct list_head format_list;
+ spinlock_t format_lock;
};
struct dasd_attention_data {
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index 63502ca537eb..80d22290f268 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -636,10 +636,10 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char
}
dev_info->gd->major = dcssblk_major;
dev_info->gd->fops = &dcssblk_devops;
- dev_info->dcssblk_queue = blk_alloc_queue(GFP_KERNEL);
+ dev_info->dcssblk_queue =
+ blk_alloc_queue(dcssblk_make_request, NUMA_NO_NODE);
dev_info->gd->queue = dev_info->dcssblk_queue;
dev_info->gd->private_data = dev_info;
- blk_queue_make_request(dev_info->dcssblk_queue, dcssblk_make_request);
blk_queue_logical_block_size(dev_info->dcssblk_queue, 4096);
blk_queue_flag_set(QUEUE_FLAG_DAX, dev_info->dcssblk_queue);
diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c
index 3df5d68d09f0..45a04daec89e 100644
--- a/drivers/s390/block/xpram.c
+++ b/drivers/s390/block/xpram.c
@@ -343,14 +343,14 @@ static int __init xpram_setup_blkdev(void)
xpram_disks[i] = alloc_disk(1);
if (!xpram_disks[i])
goto out;
- xpram_queues[i] = blk_alloc_queue(GFP_KERNEL);
+ xpram_queues[i] = blk_alloc_queue(xpram_make_request,
+ NUMA_NO_NODE);
if (!xpram_queues[i]) {
put_disk(xpram_disks[i]);
goto out;
}
blk_queue_flag_set(QUEUE_FLAG_NONROT, xpram_queues[i]);
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, xpram_queues[i]);
- blk_queue_make_request(xpram_queues[i], xpram_make_request);
blk_queue_logical_block_size(xpram_queues[i], 4096);
}
diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index 9575a627a1e1..468cada49e72 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -369,7 +369,7 @@ enum qeth_qdio_info_states {
struct qeth_buffer_pool_entry {
struct list_head list;
struct list_head init_list;
- void *elements[QDIO_MAX_ELEMENTS_PER_BUFFER];
+ struct page *elements[QDIO_MAX_ELEMENTS_PER_BUFFER];
};
struct qeth_qdio_buffer_pool {
@@ -983,7 +983,7 @@ extern const struct attribute_group qeth_device_blkt_group;
extern const struct device_type qeth_generic_devtype;
const char *qeth_get_cardname_short(struct qeth_card *);
-int qeth_realloc_buffer_pool(struct qeth_card *, int);
+int qeth_resize_buffer_pool(struct qeth_card *card, unsigned int count);
int qeth_core_load_discipline(struct qeth_card *, enum qeth_discipline_id);
void qeth_core_free_discipline(struct qeth_card *);
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 8ca85c8a01a1..6d3f2f14b414 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -65,7 +65,6 @@ static struct lock_class_key qdio_out_skb_queue_key;
static void qeth_issue_next_read_cb(struct qeth_card *card,
struct qeth_cmd_buffer *iob,
unsigned int data_length);
-static void qeth_free_buffer_pool(struct qeth_card *);
static int qeth_qdio_establish(struct qeth_card *);
static void qeth_free_qdio_queues(struct qeth_card *card);
static void qeth_notify_skbs(struct qeth_qdio_out_q *queue,
@@ -212,49 +211,121 @@ void qeth_clear_working_pool_list(struct qeth_card *card)
}
EXPORT_SYMBOL_GPL(qeth_clear_working_pool_list);
+static void qeth_free_pool_entry(struct qeth_buffer_pool_entry *entry)
+{
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(entry->elements); i++) {
+ if (entry->elements[i])
+ __free_page(entry->elements[i]);
+ }
+
+ kfree(entry);
+}
+
+static void qeth_free_buffer_pool(struct qeth_card *card)
+{
+ struct qeth_buffer_pool_entry *entry, *tmp;
+
+ list_for_each_entry_safe(entry, tmp, &card->qdio.init_pool.entry_list,
+ init_list) {
+ list_del(&entry->init_list);
+ qeth_free_pool_entry(entry);
+ }
+}
+
+static struct qeth_buffer_pool_entry *qeth_alloc_pool_entry(unsigned int pages)
+{
+ struct qeth_buffer_pool_entry *entry;
+ unsigned int i;
+
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ return NULL;
+
+ for (i = 0; i < pages; i++) {
+ entry->elements[i] = alloc_page(GFP_KERNEL);
+
+ if (!entry->elements[i]) {
+ qeth_free_pool_entry(entry);
+ return NULL;
+ }
+ }
+
+ return entry;
+}
+
static int qeth_alloc_buffer_pool(struct qeth_card *card)
{
- struct qeth_buffer_pool_entry *pool_entry;
- void *ptr;
- int i, j;
+ unsigned int buf_elements = QETH_MAX_BUFFER_ELEMENTS(card);
+ unsigned int i;
QETH_CARD_TEXT(card, 5, "alocpool");
for (i = 0; i < card->qdio.init_pool.buf_count; ++i) {
- pool_entry = kzalloc(sizeof(*pool_entry), GFP_KERNEL);
- if (!pool_entry) {
+ struct qeth_buffer_pool_entry *entry;
+
+ entry = qeth_alloc_pool_entry(buf_elements);
+ if (!entry) {
qeth_free_buffer_pool(card);
return -ENOMEM;
}
- for (j = 0; j < QETH_MAX_BUFFER_ELEMENTS(card); ++j) {
- ptr = (void *) __get_free_page(GFP_KERNEL);
- if (!ptr) {
- while (j > 0)
- free_page((unsigned long)
- pool_entry->elements[--j]);
- kfree(pool_entry);
- qeth_free_buffer_pool(card);
- return -ENOMEM;
- }
- pool_entry->elements[j] = ptr;
- }
- list_add(&pool_entry->init_list,
- &card->qdio.init_pool.entry_list);
+
+ list_add(&entry->init_list, &card->qdio.init_pool.entry_list);
}
return 0;
}
-int qeth_realloc_buffer_pool(struct qeth_card *card, int bufcnt)
+int qeth_resize_buffer_pool(struct qeth_card *card, unsigned int count)
{
+ unsigned int buf_elements = QETH_MAX_BUFFER_ELEMENTS(card);
+ struct qeth_qdio_buffer_pool *pool = &card->qdio.init_pool;
+ struct qeth_buffer_pool_entry *entry, *tmp;
+ int delta = count - pool->buf_count;
+ LIST_HEAD(entries);
+
QETH_CARD_TEXT(card, 2, "realcbp");
- /* TODO: steel/add buffers from/to a running card's buffer pool (?) */
- qeth_clear_working_pool_list(card);
- qeth_free_buffer_pool(card);
- card->qdio.in_buf_pool.buf_count = bufcnt;
- card->qdio.init_pool.buf_count = bufcnt;
- return qeth_alloc_buffer_pool(card);
+ /* Defer until queue is allocated: */
+ if (!card->qdio.in_q)
+ goto out;
+
+ /* Remove entries from the pool: */
+ while (delta < 0) {
+ entry = list_first_entry(&pool->entry_list,
+ struct qeth_buffer_pool_entry,
+ init_list);
+ list_del(&entry->init_list);
+ qeth_free_pool_entry(entry);
+
+ delta++;
+ }
+
+ /* Allocate additional entries: */
+ while (delta > 0) {
+ entry = qeth_alloc_pool_entry(buf_elements);
+ if (!entry) {
+ list_for_each_entry_safe(entry, tmp, &entries,
+ init_list) {
+ list_del(&entry->init_list);
+ qeth_free_pool_entry(entry);
+ }
+
+ return -ENOMEM;
+ }
+
+ list_add(&entry->init_list, &entries);
+
+ delta--;
+ }
+
+ list_splice(&entries, &pool->entry_list);
+
+out:
+ card->qdio.in_buf_pool.buf_count = count;
+ pool->buf_count = count;
+ return 0;
}
-EXPORT_SYMBOL_GPL(qeth_realloc_buffer_pool);
+EXPORT_SYMBOL_GPL(qeth_resize_buffer_pool);
static void qeth_free_qdio_queue(struct qeth_qdio_q *q)
{
@@ -1170,19 +1241,6 @@ void qeth_drain_output_queues(struct qeth_card *card)
}
EXPORT_SYMBOL_GPL(qeth_drain_output_queues);
-static void qeth_free_buffer_pool(struct qeth_card *card)
-{
- struct qeth_buffer_pool_entry *pool_entry, *tmp;
- int i = 0;
- list_for_each_entry_safe(pool_entry, tmp,
- &card->qdio.init_pool.entry_list, init_list){
- for (i = 0; i < QETH_MAX_BUFFER_ELEMENTS(card); ++i)
- free_page((unsigned long)pool_entry->elements[i]);
- list_del(&pool_entry->init_list);
- kfree(pool_entry);
- }
-}
-
static int qeth_osa_set_output_queues(struct qeth_card *card, bool single)
{
unsigned int count = single ? 1 : card->dev->num_tx_queues;
@@ -1204,7 +1262,6 @@ static int qeth_osa_set_output_queues(struct qeth_card *card, bool single)
if (count == 1)
dev_info(&card->gdev->dev, "Priority Queueing not supported\n");
- card->qdio.default_out_queue = single ? 0 : QETH_DEFAULT_QUEUE;
card->qdio.no_out_queues = count;
return 0;
}
@@ -2393,7 +2450,6 @@ static void qeth_free_qdio_queues(struct qeth_card *card)
return;
qeth_free_cq(card);
- cancel_delayed_work_sync(&card->buffer_reclaim_work);
for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; ++j) {
if (card->qdio.in_q->bufs[j].rx_skb)
dev_kfree_skb_any(card->qdio.in_q->bufs[j].rx_skb);
@@ -2575,7 +2631,6 @@ static struct qeth_buffer_pool_entry *qeth_find_free_buffer_pool_entry(
struct list_head *plh;
struct qeth_buffer_pool_entry *entry;
int i, free;
- struct page *page;
if (list_empty(&card->qdio.in_buf_pool.entry_list))
return NULL;
@@ -2584,7 +2639,7 @@ static struct qeth_buffer_pool_entry *qeth_find_free_buffer_pool_entry(
entry = list_entry(plh, struct qeth_buffer_pool_entry, list);
free = 1;
for (i = 0; i < QETH_MAX_BUFFER_ELEMENTS(card); ++i) {
- if (page_count(virt_to_page(entry->elements[i])) > 1) {
+ if (page_count(entry->elements[i]) > 1) {
free = 0;
break;
}
@@ -2599,15 +2654,15 @@ static struct qeth_buffer_pool_entry *qeth_find_free_buffer_pool_entry(
entry = list_entry(card->qdio.in_buf_pool.entry_list.next,
struct qeth_buffer_pool_entry, list);
for (i = 0; i < QETH_MAX_BUFFER_ELEMENTS(card); ++i) {
- if (page_count(virt_to_page(entry->elements[i])) > 1) {
- page = alloc_page(GFP_ATOMIC);
- if (!page) {
+ if (page_count(entry->elements[i]) > 1) {
+ struct page *page = alloc_page(GFP_ATOMIC);
+
+ if (!page)
return NULL;
- } else {
- free_page((unsigned long)entry->elements[i]);
- entry->elements[i] = page_address(page);
- QETH_CARD_STAT_INC(card, rx_sg_alloc_page);
- }
+
+ __free_page(entry->elements[i]);
+ entry->elements[i] = page;
+ QETH_CARD_STAT_INC(card, rx_sg_alloc_page);
}
}
list_del_init(&entry->list);
@@ -2625,12 +2680,12 @@ static int qeth_init_input_buffer(struct qeth_card *card,
ETH_HLEN +
sizeof(struct ipv6hdr));
if (!buf->rx_skb)
- return 1;
+ return -ENOMEM;
}
pool_entry = qeth_find_free_buffer_pool_entry(card);
if (!pool_entry)
- return 1;
+ return -ENOBUFS;
/*
* since the buffer is accessed only from the input_tasklet
@@ -2643,7 +2698,7 @@ static int qeth_init_input_buffer(struct qeth_card *card,
for (i = 0; i < QETH_MAX_BUFFER_ELEMENTS(card); ++i) {
buf->buffer->element[i].length = PAGE_SIZE;
buf->buffer->element[i].addr =
- virt_to_phys(pool_entry->elements[i]);
+ page_to_phys(pool_entry->elements[i]);
if (i == QETH_MAX_BUFFER_ELEMENTS(card) - 1)
buf->buffer->element[i].eflags = SBAL_EFLAGS_LAST_ENTRY;
else
@@ -2675,10 +2730,15 @@ static int qeth_init_qdio_queues(struct qeth_card *card)
/* inbound queue */
qdio_reset_buffers(card->qdio.in_q->qdio_bufs, QDIO_MAX_BUFFERS_PER_Q);
memset(&card->rx, 0, sizeof(struct qeth_rx));
+
qeth_initialize_working_pool_list(card);
/*give only as many buffers to hardware as we have buffer pool entries*/
- for (i = 0; i < card->qdio.in_buf_pool.buf_count - 1; ++i)
- qeth_init_input_buffer(card, &card->qdio.in_q->bufs[i]);
+ for (i = 0; i < card->qdio.in_buf_pool.buf_count - 1; i++) {
+ rc = qeth_init_input_buffer(card, &card->qdio.in_q->bufs[i]);
+ if (rc)
+ return rc;
+ }
+
card->qdio.in_q->next_buf_to_init =
card->qdio.in_buf_pool.buf_count - 1;
rc = do_QDIO(CARD_DDEV(card), QDIO_FLAG_SYNC_INPUT, 0, 0,
diff --git a/drivers/s390/net/qeth_core_sys.c b/drivers/s390/net/qeth_core_sys.c
index 2bd9993aa60b..78cae61bc924 100644
--- a/drivers/s390/net/qeth_core_sys.c
+++ b/drivers/s390/net/qeth_core_sys.c
@@ -247,8 +247,8 @@ static ssize_t qeth_dev_bufcnt_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t count)
{
struct qeth_card *card = dev_get_drvdata(dev);
+ unsigned int cnt;
char *tmp;
- int cnt, old_cnt;
int rc = 0;
mutex_lock(&card->conf_mutex);
@@ -257,13 +257,12 @@ static ssize_t qeth_dev_bufcnt_store(struct device *dev,
goto out;
}
- old_cnt = card->qdio.in_buf_pool.buf_count;
cnt = simple_strtoul(buf, &tmp, 10);
cnt = (cnt < QETH_IN_BUF_COUNT_MIN) ? QETH_IN_BUF_COUNT_MIN :
((cnt > QETH_IN_BUF_COUNT_MAX) ? QETH_IN_BUF_COUNT_MAX : cnt);
- if (old_cnt != cnt) {
- rc = qeth_realloc_buffer_pool(card, cnt);
- }
+
+ rc = qeth_resize_buffer_pool(card, cnt);
+
out:
mutex_unlock(&card->conf_mutex);
return rc ? rc : count;
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 9972d96820f3..8fb29371788b 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -284,6 +284,7 @@ static void qeth_l2_stop_card(struct qeth_card *card)
if (card->state == CARD_STATE_SOFTSETUP) {
qeth_clear_ipacmd_list(card);
qeth_drain_output_queues(card);
+ cancel_delayed_work_sync(&card->buffer_reclaim_work);
card->state = CARD_STATE_DOWN;
}
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index 317d56647a4a..82f800d1d7b3 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -1178,6 +1178,7 @@ static void qeth_l3_stop_card(struct qeth_card *card)
qeth_l3_clear_ip_htable(card, 1);
qeth_clear_ipacmd_list(card);
qeth_drain_output_queues(card);
+ cancel_delayed_work_sync(&card->buffer_reclaim_work);
card->state = CARD_STATE_DOWN;
}
diff --git a/drivers/s390/net/qeth_l3_sys.c b/drivers/s390/net/qeth_l3_sys.c
index 29f2517d2a31..a3d1c3bdfadb 100644
--- a/drivers/s390/net/qeth_l3_sys.c
+++ b/drivers/s390/net/qeth_l3_sys.c
@@ -206,12 +206,11 @@ static ssize_t qeth_l3_dev_sniffer_store(struct device *dev,
qdio_get_ssqd_desc(CARD_DDEV(card), &card->ssqd);
if (card->ssqd.qdioac2 & CHSC_AC2_SNIFFER_AVAILABLE) {
card->options.sniffer = i;
- if (card->qdio.init_pool.buf_count !=
- QETH_IN_BUF_COUNT_MAX)
- qeth_realloc_buffer_pool(card,
- QETH_IN_BUF_COUNT_MAX);
- } else
+ qeth_resize_buffer_pool(card, QETH_IN_BUF_COUNT_MAX);
+ } else {
rc = -EPERM;
+ }
+
break;
default:
rc = -EINVAL;
diff --git a/drivers/scsi/BusLogic.c b/drivers/scsi/BusLogic.c
index 3170b295a5da..186259417449 100644
--- a/drivers/scsi/BusLogic.c
+++ b/drivers/scsi/BusLogic.c
@@ -36,6 +36,7 @@
#include <linux/jiffies.h>
#include <linux/dma-mapping.h>
#include <linux/slab.h>
+#include <linux/msdos_partition.h>
#include <scsi/scsicam.h>
#include <asm/dma.h>
@@ -3410,9 +3411,10 @@ static int blogic_diskparam(struct scsi_device *sdev, struct block_device *dev,
a partition table entry whose end_head matches one of the
standard BusLogic geometry translations (64/32, 128/32, or 255/63).
*/
- if (*(unsigned short *) (buf + 64) == 0xAA55) {
- struct partition *part1_entry = (struct partition *) buf;
- struct partition *part_entry = part1_entry;
+ if (*(unsigned short *) (buf + 64) == MSDOS_LABEL_MAGIC) {
+ struct msdos_partition *part1_entry =
+ (struct msdos_partition *)buf;
+ struct msdos_partition *part_entry = part1_entry;
int saved_cyl = diskparam->cylinders, part_no;
unsigned char part_end_head = 0, part_end_sector = 0;
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index a7881f8eb05e..1b6eaf8da5fa 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -989,6 +989,7 @@ config SCSI_SYM53C8XX_MMIO
config SCSI_IPR
tristate "IBM Power Linux RAID adapter support"
depends on PCI && SCSI && ATA
+ select SATA_HOST
select FW_LOADER
select IRQ_POLL
select SGL_ALLOC
diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c
index ee6bc2f9b80a..0443b74390cf 100644
--- a/drivers/scsi/aacraid/linit.c
+++ b/drivers/scsi/aacraid/linit.c
@@ -33,6 +33,7 @@
#include <linux/syscalls.h>
#include <linux/delay.h>
#include <linux/kthread.h>
+#include <linux/msdos_partition.h>
#include <scsi/scsi.h>
#include <scsi/scsi_cmnd.h>
@@ -328,9 +329,9 @@ static int aac_biosparm(struct scsi_device *sdev, struct block_device *bdev,
buf = scsi_bios_ptable(bdev);
if (!buf)
return 0;
- if(*(__le16 *)(buf + 0x40) == cpu_to_le16(0xaa55)) {
- struct partition *first = (struct partition * )buf;
- struct partition *entry = first;
+ if (*(__le16 *)(buf + 0x40) == cpu_to_le16(MSDOS_LABEL_MAGIC)) {
+ struct msdos_partition *first = (struct msdos_partition *)buf;
+ struct msdos_partition *entry = first;
int saved_cylinders = param->cylinders;
int num;
unsigned char end_head, end_sec;
diff --git a/drivers/scsi/aic7xxx/aic79xx_osm.c b/drivers/scsi/aic7xxx/aic79xx_osm.c
index 57992519384e..dc4fe334efd0 100644
--- a/drivers/scsi/aic7xxx/aic79xx_osm.c
+++ b/drivers/scsi/aic7xxx/aic79xx_osm.c
@@ -723,24 +723,17 @@ static int
ahd_linux_biosparam(struct scsi_device *sdev, struct block_device *bdev,
sector_t capacity, int geom[])
{
- uint8_t *bh;
int heads;
int sectors;
int cylinders;
- int ret;
int extended;
struct ahd_softc *ahd;
ahd = *((struct ahd_softc **)sdev->host->hostdata);
- bh = scsi_bios_ptable(bdev);
- if (bh) {
- ret = scsi_partsize(bh, capacity,
- &geom[2], &geom[0], &geom[1]);
- kfree(bh);
- if (ret != -1)
- return (ret);
- }
+ if (scsi_partsize(bdev, capacity, geom))
+ return 0;
+
heads = 64;
sectors = 32;
cylinders = aic_sector_div(capacity, heads, sectors);
diff --git a/drivers/scsi/aic7xxx/aic7xxx_osm.c b/drivers/scsi/aic7xxx/aic7xxx_osm.c
index d5c4a0d23706..2edfa0594f18 100644
--- a/drivers/scsi/aic7xxx/aic7xxx_osm.c
+++ b/drivers/scsi/aic7xxx/aic7xxx_osm.c
@@ -695,11 +695,9 @@ static int
ahc_linux_biosparam(struct scsi_device *sdev, struct block_device *bdev,
sector_t capacity, int geom[])
{
- uint8_t *bh;
int heads;
int sectors;
int cylinders;
- int ret;
int extended;
struct ahc_softc *ahc;
u_int channel;
@@ -707,14 +705,9 @@ ahc_linux_biosparam(struct scsi_device *sdev, struct block_device *bdev,
ahc = *((struct ahc_softc **)sdev->host->hostdata);
channel = sdev_channel(sdev);
- bh = scsi_bios_ptable(bdev);
- if (bh) {
- ret = scsi_partsize(bh, capacity,
- &geom[2], &geom[0], &geom[1]);
- kfree(bh);
- if (ret != -1)
- return (ret);
- }
+ if (scsi_partsize(bdev, capacity, geom))
+ return 0;
+
heads = 64;
sectors = 32;
cylinders = aic_sector_div(capacity, heads, sectors);
diff --git a/drivers/scsi/arcmsr/arcmsr_hba.c b/drivers/scsi/arcmsr/arcmsr_hba.c
index 40dc8eac0e3a..c2c79a37a9ef 100644
--- a/drivers/scsi/arcmsr/arcmsr_hba.c
+++ b/drivers/scsi/arcmsr/arcmsr_hba.c
@@ -353,16 +353,11 @@ static irqreturn_t arcmsr_do_interrupt(int irq, void *dev_id)
static int arcmsr_bios_param(struct scsi_device *sdev,
struct block_device *bdev, sector_t capacity, int *geom)
{
- int ret, heads, sectors, cylinders, total_capacity;
- unsigned char *buffer;/* return copy of block device's partition table */
+ int heads, sectors, cylinders, total_capacity;
+
+ if (scsi_partsize(bdev, capacity, geom))
+ return 0;
- buffer = scsi_bios_ptable(bdev);
- if (buffer) {
- ret = scsi_partsize(buffer, capacity, &geom[2], &geom[0], &geom[1]);
- kfree(buffer);
- if (ret != -1)
- return ret;
- }
total_capacity = capacity;
heads = 64;
sectors = 32;
diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index ae45cbe98ae2..cd8db1349871 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -9950,6 +9950,7 @@ static void ipr_init_ioa_cfg(struct ipr_ioa_cfg *ioa_cfg,
ioa_cfg->max_devs_supported = ipr_max_devs;
if (ioa_cfg->sis64) {
+ host->max_channel = IPR_MAX_SIS64_BUSES;
host->max_id = IPR_MAX_SIS64_TARGETS_PER_BUS;
host->max_lun = IPR_MAX_SIS64_LUNS_PER_TARGET;
if (ipr_max_devs > IPR_MAX_SIS64_DEVS)
@@ -9958,6 +9959,7 @@ static void ipr_init_ioa_cfg(struct ipr_ioa_cfg *ioa_cfg,
+ ((sizeof(struct ipr_config_table_entry64)
* ioa_cfg->max_devs_supported)));
} else {
+ host->max_channel = IPR_VSET_BUS;
host->max_id = IPR_MAX_NUM_TARGETS_PER_BUS;
host->max_lun = IPR_MAX_NUM_LUNS_PER_TARGET;
if (ipr_max_devs > IPR_MAX_PHYSICAL_DEVS)
@@ -9967,7 +9969,6 @@ static void ipr_init_ioa_cfg(struct ipr_ioa_cfg *ioa_cfg,
* ioa_cfg->max_devs_supported)));
}
- host->max_channel = IPR_VSET_BUS;
host->unique_id = host->host_no;
host->max_cmd_len = IPR_MAX_CDB_LEN;
host->can_queue = ioa_cfg->max_cmds;
diff --git a/drivers/scsi/ipr.h b/drivers/scsi/ipr.h
index a67baeb36d1f..b97aa9ac2ffe 100644
--- a/drivers/scsi/ipr.h
+++ b/drivers/scsi/ipr.h
@@ -1300,6 +1300,7 @@ struct ipr_resource_entry {
#define IPR_ARRAY_VIRTUAL_BUS 0x1
#define IPR_VSET_VIRTUAL_BUS 0x2
#define IPR_IOAFP_VIRTUAL_BUS 0x3
+#define IPR_MAX_SIS64_BUSES 0x4
#define IPR_GET_RES_PHYS_LOC(res) \
(((res)->bus << 24) | ((res)->target << 8) | (res)->lun)
diff --git a/drivers/scsi/libsas/Kconfig b/drivers/scsi/libsas/Kconfig
index 5c6a5eff2f8e..052ee3a26f6e 100644
--- a/drivers/scsi/libsas/Kconfig
+++ b/drivers/scsi/libsas/Kconfig
@@ -19,6 +19,7 @@ config SCSI_SAS_ATA
bool "ATA support for libsas (requires libata)"
depends on SCSI_SAS_LIBSAS
depends on ATA = y || ATA = SCSI_SAS_LIBSAS
+ select SATA_HOST
help
Builds in ATA support into libsas. Will necessitate
the loading of libata along with libsas.
diff --git a/drivers/scsi/megaraid.c b/drivers/scsi/megaraid.c
index ff6d4aa92421..f27ffd088c8a 100644
--- a/drivers/scsi/megaraid.c
+++ b/drivers/scsi/megaraid.c
@@ -2795,11 +2795,9 @@ megaraid_biosparam(struct scsi_device *sdev, struct block_device *bdev,
sector_t capacity, int geom[])
{
adapter_t *adapter;
- unsigned char *bh;
int heads;
int sectors;
int cylinders;
- int rval;
/* Get pointer to host config structure */
adapter = (adapter_t *)sdev->host->hostdata;
@@ -2826,15 +2824,8 @@ megaraid_biosparam(struct scsi_device *sdev, struct block_device *bdev,
geom[2] = cylinders;
}
else {
- bh = scsi_bios_ptable(bdev);
-
- if( bh ) {
- rval = scsi_partsize(bh, capacity,
- &geom[2], &geom[0], &geom[1]);
- kfree(bh);
- if( rval != -1 )
- return rval;
- }
+ if (scsi_partsize(bdev, capacity, geom))
+ return 0;
dev_info(&adapter->dev->dev,
"invalid partition on this disk on channel %d\n",
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index b520a980d1dc..7a94e1171c72 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -864,7 +864,7 @@ qla2xxx_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
goto qc24_fail_command;
}
- if (atomic_read(&fcport->state) != FCS_ONLINE) {
+ if (atomic_read(&fcport->state) != FCS_ONLINE || fcport->deleted) {
if (atomic_read(&fcport->state) == FCS_DEVICE_DEAD ||
atomic_read(&base_vha->loop_state) == LOOP_DEAD) {
ql_dbg(ql_dbg_io, vha, 0x3005,
@@ -946,7 +946,7 @@ qla2xxx_mqueuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd,
goto qc24_fail_command;
}
- if (atomic_read(&fcport->state) != FCS_ONLINE) {
+ if (atomic_read(&fcport->state) != FCS_ONLINE || fcport->deleted) {
if (atomic_read(&fcport->state) == FCS_DEVICE_DEAD ||
atomic_read(&base_vha->loop_state) == LOOP_DEAD) {
ql_dbg(ql_dbg_io, vha, 0x3077,
diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index 44cb054d5e66..4c6c448dc2df 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -38,6 +38,7 @@
#include <linux/hrtimer.h>
#include <linux/uuid.h>
#include <linux/t10-pi.h>
+#include <linux/msdos_partition.h>
#include <net/checksum.h>
@@ -4146,7 +4147,7 @@ static int scsi_debug_host_reset(struct scsi_cmnd *SCpnt)
static void __init sdebug_build_parts(unsigned char *ramp,
unsigned long store_size)
{
- struct partition *pp;
+ struct msdos_partition *pp;
int starts[SDEBUG_MAX_PARTS + 2];
int sectors_per_part, num_sectors, k;
int heads_by_sects, start_sec, end_sec;
@@ -4171,7 +4172,7 @@ static void __init sdebug_build_parts(unsigned char *ramp,
ramp[510] = 0x55; /* magic partition markings */
ramp[511] = 0xAA;
- pp = (struct partition *)(ramp + 0x1be);
+ pp = (struct msdos_partition *)(ramp + 0x1be);
for (k = 0; starts[k + 1]; ++k, ++pp) {
start_sec = starts[k];
end_sec = starts[k + 1] - 1;
diff --git a/drivers/scsi/scsicam.c b/drivers/scsi/scsicam.c
index e969138051c7..682cf08ab041 100644
--- a/drivers/scsi/scsicam.c
+++ b/drivers/scsi/scsicam.c
@@ -17,14 +17,11 @@
#include <linux/genhd.h>
#include <linux/kernel.h>
#include <linux/blkdev.h>
+#include <linux/msdos_partition.h>
#include <asm/unaligned.h>
#include <scsi/scsicam.h>
-
-static int setsize(unsigned long capacity, unsigned int *cyls, unsigned int *hds,
- unsigned int *secs);
-
/**
* scsi_bios_ptable - Read PC partition table out of first sector of device.
* @dev: from this device
@@ -35,105 +32,48 @@ static int setsize(unsigned long capacity, unsigned int *cyls, unsigned int *hds
*/
unsigned char *scsi_bios_ptable(struct block_device *dev)
{
- unsigned char *res = kmalloc(66, GFP_KERNEL);
- if (res) {
- struct block_device *bdev = dev->bd_contains;
- Sector sect;
- void *data = read_dev_sector(bdev, 0, &sect);
- if (data) {
- memcpy(res, data + 0x1be, 66);
- put_dev_sector(sect);
- } else {
- kfree(res);
- res = NULL;
- }
- }
- return res;
-}
-EXPORT_SYMBOL(scsi_bios_ptable);
-
-/**
- * scsicam_bios_param - Determine geometry of a disk in cylinders/heads/sectors.
- * @bdev: which device
- * @capacity: size of the disk in sectors
- * @ip: return value: ip[0]=heads, ip[1]=sectors, ip[2]=cylinders
- *
- * Description : determine the BIOS mapping/geometry used for a drive in a
- * SCSI-CAM system, storing the results in ip as required
- * by the HDIO_GETGEO ioctl().
- *
- * Returns : -1 on failure, 0 on success.
- */
-
-int scsicam_bios_param(struct block_device *bdev, sector_t capacity, int *ip)
-{
- unsigned char *p;
- u64 capacity64 = capacity; /* Suppress gcc warning */
- int ret;
-
- p = scsi_bios_ptable(bdev);
- if (!p)
- return -1;
-
- /* try to infer mapping from partition table */
- ret = scsi_partsize(p, (unsigned long)capacity, (unsigned int *)ip + 2,
- (unsigned int *)ip + 0, (unsigned int *)ip + 1);
- kfree(p);
-
- if (ret == -1 && capacity64 < (1ULL << 32)) {
- /* pick some standard mapping with at most 1024 cylinders,
- and at most 62 sectors per track - this works up to
- 7905 MB */
- ret = setsize((unsigned long)capacity, (unsigned int *)ip + 2,
- (unsigned int *)ip + 0, (unsigned int *)ip + 1);
- }
-
- /* if something went wrong, then apparently we have to return
- a geometry with more than 1024 cylinders */
- if (ret || ip[0] > 255 || ip[1] > 63) {
- if ((capacity >> 11) > 65534) {
- ip[0] = 255;
- ip[1] = 63;
- } else {
- ip[0] = 64;
- ip[1] = 32;
- }
+ struct address_space *mapping = dev->bd_contains->bd_inode->i_mapping;
+ unsigned char *res = NULL;
+ struct page *page;
- if (capacity > 65535*63*255)
- ip[2] = 65535;
- else
- ip[2] = (unsigned long)capacity / (ip[0] * ip[1]);
- }
+ page = read_mapping_page(mapping, 0, NULL);
+ if (IS_ERR(page))
+ return NULL;
- return 0;
+ if (!PageError(page))
+ res = kmemdup(page_address(page) + 0x1be, 66, GFP_KERNEL);
+ put_page(page);
+ return res;
}
-EXPORT_SYMBOL(scsicam_bios_param);
+EXPORT_SYMBOL(scsi_bios_ptable);
/**
* scsi_partsize - Parse cylinders/heads/sectors from PC partition table
- * @buf: partition table, see scsi_bios_ptable()
+ * @bdev: block device to parse
* @capacity: size of the disk in sectors
- * @cyls: put cylinders here
- * @hds: put heads here
- * @secs: put sectors here
+ * @geom: output in form of [hds, cylinders, sectors]
*
* Determine the BIOS mapping/geometry used to create the partition
- * table, storing the results in @cyls, @hds, and @secs
+ * table, storing the results in @geom.
*
- * Returns: -1 on failure, 0 on success.
+ * Returns: %false on failure, %true on success.
*/
-
-int scsi_partsize(unsigned char *buf, unsigned long capacity,
- unsigned int *cyls, unsigned int *hds, unsigned int *secs)
+bool scsi_partsize(struct block_device *bdev, sector_t capacity, int geom[3])
{
- struct partition *p = (struct partition *)buf, *largest = NULL;
- int i, largest_cyl;
int cyl, ext_cyl, end_head, end_cyl, end_sector;
unsigned int logical_end, physical_end, ext_physical_end;
+ struct msdos_partition *p, *largest = NULL;
+ void *buf;
+ int ret = false;
+ buf = scsi_bios_ptable(bdev);
+ if (!buf)
+ return false;
if (*(unsigned short *) (buf + 64) == 0xAA55) {
- for (largest_cyl = -1, i = 0; i < 4; ++i, ++p) {
+ int largest_cyl = -1, i;
+
+ for (i = 0, p = buf; i < 4; i++, p++) {
if (!p->sys_ind)
continue;
#ifdef DEBUG
@@ -153,7 +93,7 @@ int scsi_partsize(unsigned char *buf, unsigned long capacity,
end_sector = largest->end_sector & 0x3f;
if (end_head + 1 == 0 || end_sector == 0)
- return -1;
+ goto out_free_buf;
#ifdef DEBUG
printk("scsicam_bios_param : end at h = %d, c = %d, s = %d\n",
@@ -178,19 +118,24 @@ int scsi_partsize(unsigned char *buf, unsigned long capacity,
,logical_end, physical_end, ext_physical_end, ext_cyl);
#endif
- if ((logical_end == physical_end) ||
- (end_cyl == 1023 && ext_physical_end == logical_end)) {
- *secs = end_sector;
- *hds = end_head + 1;
- *cyls = capacity / ((end_head + 1) * end_sector);
- return 0;
+ if (logical_end == physical_end ||
+ (end_cyl == 1023 && ext_physical_end == logical_end)) {
+ geom[0] = end_head + 1;
+ geom[1] = end_sector;
+ geom[2] = (unsigned long)capacity /
+ ((end_head + 1) * end_sector);
+ ret = true;
+ goto out_free_buf;
}
#ifdef DEBUG
printk("scsicam_bios_param : logical (%u) != physical (%u)\n",
logical_end, physical_end);
#endif
}
- return -1;
+
+out_free_buf:
+ kfree(buf);
+ return ret;
}
EXPORT_SYMBOL(scsi_partsize);
@@ -258,3 +203,56 @@ static int setsize(unsigned long capacity, unsigned int *cyls, unsigned int *hds
*hds = (unsigned int) heads;
return (rv);
}
+
+/**
+ * scsicam_bios_param - Determine geometry of a disk in cylinders/heads/sectors.
+ * @bdev: which device
+ * @capacity: size of the disk in sectors
+ * @ip: return value: ip[0]=heads, ip[1]=sectors, ip[2]=cylinders
+ *
+ * Description : determine the BIOS mapping/geometry used for a drive in a
+ * SCSI-CAM system, storing the results in ip as required
+ * by the HDIO_GETGEO ioctl().
+ *
+ * Returns : -1 on failure, 0 on success.
+ */
+int scsicam_bios_param(struct block_device *bdev, sector_t capacity, int *ip)
+{
+ u64 capacity64 = capacity; /* Suppress gcc warning */
+ int ret = 0;
+
+ /* try to infer mapping from partition table */
+ if (scsi_partsize(bdev, capacity, ip))
+ return 0;
+
+ if (capacity64 < (1ULL << 32)) {
+ /*
+ * Pick some standard mapping with at most 1024 cylinders, and
+ * at most 62 sectors per track - this works up to 7905 MB.
+ */
+ ret = setsize((unsigned long)capacity, (unsigned int *)ip + 2,
+ (unsigned int *)ip + 0, (unsigned int *)ip + 1);
+ }
+
+ /*
+ * If something went wrong, then apparently we have to return a geometry
+ * with more than 1024 cylinders.
+ */
+ if (ret || ip[0] > 255 || ip[1] > 63) {
+ if ((capacity >> 11) > 65534) {
+ ip[0] = 255;
+ ip[1] = 63;
+ } else {
+ ip[0] = 64;
+ ip[1] = 32;
+ }
+
+ if (capacity > 65535*63*255)
+ ip[2] = 65535;
+ else
+ ip[2] = (unsigned long)capacity / (ip[0] * ip[1]);
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(scsicam_bios_param);
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 8ca9299ffd36..a793cb08d025 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -3169,9 +3169,11 @@ static int sd_revalidate_disk(struct gendisk *disk)
if (sd_validate_opt_xfer_size(sdkp, dev_max)) {
q->limits.io_opt = logical_to_bytes(sdp, sdkp->opt_xfer_blocks);
rw_max = logical_to_sectors(sdp, sdkp->opt_xfer_blocks);
- } else
+ } else {
+ q->limits.io_opt = 0;
rw_max = min_not_zero(logical_to_sectors(sdp, dev_max),
(sector_t)BLK_DEF_MAX_SECTORS);
+ }
/* Do not exceed controller limit */
rw_max = min(rw_max, queue_max_hw_sectors(q));
@@ -3187,7 +3189,8 @@ static int sd_revalidate_disk(struct gendisk *disk)
sdkp->first_scan = 0;
- set_capacity(disk, logical_to_sectors(sdp, sdkp->capacity));
+ set_capacity_revalidate_and_notify(disk,
+ logical_to_sectors(sdp, sdkp->capacity), false);
sd_config_write_same(sdkp);
kfree(buffer);
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index abd0e6b05f79..2d705694636c 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -3884,18 +3884,25 @@ EXPORT_SYMBOL_GPL(ufshcd_uic_hibern8_exit);
void ufshcd_auto_hibern8_update(struct ufs_hba *hba, u32 ahit)
{
unsigned long flags;
+ bool update = false;
- if (!(hba->capabilities & MASK_AUTO_HIBERN8_SUPPORT))
+ if (!ufshcd_is_auto_hibern8_supported(hba))
return;
spin_lock_irqsave(hba->host->host_lock, flags);
- if (hba->ahit == ahit)
- goto out_unlock;
- hba->ahit = ahit;
- if (!pm_runtime_suspended(hba->dev))
- ufshcd_writel(hba, hba->ahit, REG_AUTO_HIBERNATE_IDLE_TIMER);
-out_unlock:
+ if (hba->ahit != ahit) {
+ hba->ahit = ahit;
+ update = true;
+ }
spin_unlock_irqrestore(hba->host->host_lock, flags);
+
+ if (update && !pm_runtime_suspended(hba->dev)) {
+ pm_runtime_get_sync(hba->dev);
+ ufshcd_hold(hba, false);
+ ufshcd_auto_hibern8_enable(hba);
+ ufshcd_release(hba);
+ pm_runtime_put(hba->dev);
+ }
}
EXPORT_SYMBOL_GPL(ufshcd_auto_hibern8_update);
diff --git a/drivers/slimbus/qcom-ngd-ctrl.c b/drivers/slimbus/qcom-ngd-ctrl.c
index e3f5ebc0c05e..fc2575fef51b 100644
--- a/drivers/slimbus/qcom-ngd-ctrl.c
+++ b/drivers/slimbus/qcom-ngd-ctrl.c
@@ -1320,6 +1320,9 @@ static const struct of_device_id qcom_slim_ngd_dt_match[] = {
{
.compatible = "qcom,slim-ngd-v1.5.0",
.data = &ngd_v1_5_offset_info,
+ },{
+ .compatible = "qcom,slim-ngd-v2.1.0",
+ .data = &ngd_v1_5_offset_info,
},
{}
};
diff --git a/drivers/soc/fsl/dpio/dpio-driver.c b/drivers/soc/fsl/dpio/dpio-driver.c
index 70014ecce2a7..7b642c330977 100644
--- a/drivers/soc/fsl/dpio/dpio-driver.c
+++ b/drivers/soc/fsl/dpio/dpio-driver.c
@@ -233,10 +233,6 @@ static int dpaa2_dpio_probe(struct fsl_mc_device *dpio_dev)
goto err_allocate_irqs;
}
- err = register_dpio_irq_handlers(dpio_dev, desc.cpu);
- if (err)
- goto err_register_dpio_irq;
-
priv->io = dpaa2_io_create(&desc, dev);
if (!priv->io) {
dev_err(dev, "dpaa2_io_create failed\n");
@@ -244,6 +240,10 @@ static int dpaa2_dpio_probe(struct fsl_mc_device *dpio_dev)
goto err_dpaa2_io_create;
}
+ err = register_dpio_irq_handlers(dpio_dev, desc.cpu);
+ if (err)
+ goto err_register_dpio_irq;
+
dev_info(dev, "probed\n");
dev_dbg(dev, " receives_notifications = %d\n",
desc.receives_notifications);
diff --git a/drivers/soc/samsung/exynos-chipid.c b/drivers/soc/samsung/exynos-chipid.c
index 2dad4961a80b..8d4d05086906 100644
--- a/drivers/soc/samsung/exynos-chipid.c
+++ b/drivers/soc/samsung/exynos-chipid.c
@@ -59,7 +59,7 @@ static int __init exynos_chipid_early_init(void)
syscon = of_find_compatible_node(NULL, NULL,
"samsung,exynos4210-chipid");
if (!syscon)
- return ENODEV;
+ return -ENODEV;
regmap = device_node_to_regmap(syscon);
of_node_put(syscon);
diff --git a/drivers/staging/greybus/tools/loopback_test.c b/drivers/staging/greybus/tools/loopback_test.c
index ba6f905f26fa..69c6dce9be31 100644
--- a/drivers/staging/greybus/tools/loopback_test.c
+++ b/drivers/staging/greybus/tools/loopback_test.c
@@ -19,6 +19,7 @@
#include <signal.h>
#define MAX_NUM_DEVICES 10
+#define MAX_SYSFS_PREFIX 0x80
#define MAX_SYSFS_PATH 0x200
#define CSV_MAX_LINE 0x1000
#define SYSFS_MAX_INT 0x20
@@ -67,7 +68,7 @@ struct loopback_results {
};
struct loopback_device {
- char name[MAX_SYSFS_PATH];
+ char name[MAX_STR_LEN];
char sysfs_entry[MAX_SYSFS_PATH];
char debugfs_entry[MAX_SYSFS_PATH];
struct loopback_results results;
@@ -93,8 +94,8 @@ struct loopback_test {
int stop_all;
int poll_count;
char test_name[MAX_STR_LEN];
- char sysfs_prefix[MAX_SYSFS_PATH];
- char debugfs_prefix[MAX_SYSFS_PATH];
+ char sysfs_prefix[MAX_SYSFS_PREFIX];
+ char debugfs_prefix[MAX_SYSFS_PREFIX];
struct timespec poll_timeout;
struct loopback_device devices[MAX_NUM_DEVICES];
struct loopback_results aggregate_results;
@@ -637,7 +638,7 @@ baddir:
static int open_poll_files(struct loopback_test *t)
{
struct loopback_device *dev;
- char buf[MAX_STR_LEN];
+ char buf[MAX_SYSFS_PATH + MAX_STR_LEN];
char dummy;
int fds_idx = 0;
int i;
@@ -655,7 +656,7 @@ static int open_poll_files(struct loopback_test *t)
goto err;
}
read(t->fds[fds_idx].fd, &dummy, 1);
- t->fds[fds_idx].events = EPOLLERR|EPOLLPRI;
+ t->fds[fds_idx].events = POLLERR | POLLPRI;
t->fds[fds_idx].revents = 0;
fds_idx++;
}
@@ -748,7 +749,7 @@ static int wait_for_complete(struct loopback_test *t)
}
for (i = 0; i < t->poll_count; i++) {
- if (t->fds[i].revents & EPOLLPRI) {
+ if (t->fds[i].revents & POLLPRI) {
/* Dummy read to clear the event */
read(t->fds[i].fd, &dummy, 1);
number_of_events++;
@@ -907,10 +908,10 @@ int main(int argc, char *argv[])
t.iteration_max = atoi(optarg);
break;
case 'S':
- snprintf(t.sysfs_prefix, MAX_SYSFS_PATH, "%s", optarg);
+ snprintf(t.sysfs_prefix, MAX_SYSFS_PREFIX, "%s", optarg);
break;
case 'D':
- snprintf(t.debugfs_prefix, MAX_SYSFS_PATH, "%s", optarg);
+ snprintf(t.debugfs_prefix, MAX_SYSFS_PREFIX, "%s", optarg);
break;
case 'm':
t.mask = atol(optarg);
@@ -961,10 +962,10 @@ int main(int argc, char *argv[])
}
if (!strcmp(t.sysfs_prefix, ""))
- snprintf(t.sysfs_prefix, MAX_SYSFS_PATH, "%s", sysfs_prefix);
+ snprintf(t.sysfs_prefix, MAX_SYSFS_PREFIX, "%s", sysfs_prefix);
if (!strcmp(t.debugfs_prefix, ""))
- snprintf(t.debugfs_prefix, MAX_SYSFS_PATH, "%s", debugfs_prefix);
+ snprintf(t.debugfs_prefix, MAX_SYSFS_PREFIX, "%s", debugfs_prefix);
ret = find_loopback_devices(&t);
if (ret)
diff --git a/drivers/staging/rtl8188eu/os_dep/usb_intf.c b/drivers/staging/rtl8188eu/os_dep/usb_intf.c
index b5d42f411dd8..845c8817281c 100644
--- a/drivers/staging/rtl8188eu/os_dep/usb_intf.c
+++ b/drivers/staging/rtl8188eu/os_dep/usb_intf.c
@@ -38,6 +38,7 @@ static const struct usb_device_id rtw_usb_id_tbl[] = {
{USB_DEVICE(0x2001, 0x331B)}, /* D-Link DWA-121 rev B1 */
{USB_DEVICE(0x2357, 0x010c)}, /* TP-Link TL-WN722N v2 */
{USB_DEVICE(0x2357, 0x0111)}, /* TP-Link TL-WN727N v5.21 */
+ {USB_DEVICE(0x2C4E, 0x0102)}, /* MERCUSYS MW150US v2 */
{USB_DEVICE(0x0df6, 0x0076)}, /* Sitecom N150 v2 */
{USB_DEVICE(USB_VENDER_ID_REALTEK, 0xffef)}, /* Rosewill RNX-N150NUB */
{} /* Terminating entry */
diff --git a/drivers/staging/speakup/main.c b/drivers/staging/speakup/main.c
index 488f2539aa9a..81ecfd1a200d 100644
--- a/drivers/staging/speakup/main.c
+++ b/drivers/staging/speakup/main.c
@@ -561,7 +561,7 @@ static u_long get_word(struct vc_data *vc)
return 0;
} else if (tmpx < vc->vc_cols - 2 &&
(ch == SPACE || ch == 0 || (ch < 0x100 && IS_WDLM(ch))) &&
- get_char(vc, (u_short *)&tmp_pos + 1, &temp) > SPACE) {
+ get_char(vc, (u_short *)tmp_pos + 1, &temp) > SPACE) {
tmp_pos += 2;
tmpx++;
} else {
diff --git a/drivers/staging/wfx/hif_tx.c b/drivers/staging/wfx/hif_tx.c
index 2428363371fa..77bca43aca42 100644
--- a/drivers/staging/wfx/hif_tx.c
+++ b/drivers/staging/wfx/hif_tx.c
@@ -140,6 +140,7 @@ int hif_shutdown(struct wfx_dev *wdev)
else
control_reg_write(wdev, 0);
mutex_unlock(&wdev->hif_cmd.lock);
+ mutex_unlock(&wdev->hif_cmd.key_renew_lock);
kfree(hif);
return ret;
}
@@ -289,7 +290,7 @@ int hif_stop_scan(struct wfx_vif *wvif)
}
int hif_join(struct wfx_vif *wvif, const struct ieee80211_bss_conf *conf,
- const struct ieee80211_channel *channel, const u8 *ssidie)
+ struct ieee80211_channel *channel, const u8 *ssid, int ssidlen)
{
int ret;
struct hif_msg *hif;
@@ -307,9 +308,9 @@ int hif_join(struct wfx_vif *wvif, const struct ieee80211_bss_conf *conf,
body->basic_rate_set =
cpu_to_le32(wfx_rate_mask_to_hw(wvif->wdev, conf->basic_rates));
memcpy(body->bssid, conf->bssid, sizeof(body->bssid));
- if (!conf->ibss_joined && ssidie) {
- body->ssid_length = cpu_to_le32(ssidie[1]);
- memcpy(body->ssid, &ssidie[2], ssidie[1]);
+ if (!conf->ibss_joined && ssid) {
+ body->ssid_length = cpu_to_le32(ssidlen);
+ memcpy(body->ssid, ssid, ssidlen);
}
wfx_fill_header(hif, wvif->id, HIF_REQ_ID_JOIN, sizeof(*body));
ret = wfx_cmd_send(wvif->wdev, hif, NULL, 0, false);
@@ -427,9 +428,9 @@ int hif_start(struct wfx_vif *wvif, const struct ieee80211_bss_conf *conf,
struct hif_msg *hif;
struct hif_req_start *body = wfx_alloc_hif(sizeof(*body), &hif);
- body->dtim_period = conf->dtim_period,
- body->short_preamble = conf->use_short_preamble,
- body->channel_number = cpu_to_le16(channel->hw_value),
+ body->dtim_period = conf->dtim_period;
+ body->short_preamble = conf->use_short_preamble;
+ body->channel_number = cpu_to_le16(channel->hw_value);
body->beacon_interval = cpu_to_le32(conf->beacon_int);
body->basic_rate_set =
cpu_to_le32(wfx_rate_mask_to_hw(wvif->wdev, conf->basic_rates));
diff --git a/drivers/staging/wfx/hif_tx.h b/drivers/staging/wfx/hif_tx.h
index 20977e461718..f8520a14c14c 100644
--- a/drivers/staging/wfx/hif_tx.h
+++ b/drivers/staging/wfx/hif_tx.h
@@ -46,7 +46,7 @@ int hif_scan(struct wfx_vif *wvif, struct cfg80211_scan_request *req80211,
int chan_start, int chan_num);
int hif_stop_scan(struct wfx_vif *wvif);
int hif_join(struct wfx_vif *wvif, const struct ieee80211_bss_conf *conf,
- const struct ieee80211_channel *channel, const u8 *ssidie);
+ struct ieee80211_channel *channel, const u8 *ssid, int ssidlen);
int hif_set_pm(struct wfx_vif *wvif, bool ps, int dynamic_ps_timeout);
int hif_set_bss_params(struct wfx_vif *wvif,
const struct hif_req_set_bss_params *arg);
diff --git a/drivers/staging/wfx/hif_tx_mib.h b/drivers/staging/wfx/hif_tx_mib.h
index bf3769c2a9b6..26b1406f9f6c 100644
--- a/drivers/staging/wfx/hif_tx_mib.h
+++ b/drivers/staging/wfx/hif_tx_mib.h
@@ -191,10 +191,10 @@ static inline int hif_set_block_ack_policy(struct wfx_vif *wvif,
}
static inline int hif_set_association_mode(struct wfx_vif *wvif,
- struct ieee80211_bss_conf *info,
- struct ieee80211_sta_ht_cap *ht_cap)
+ struct ieee80211_bss_conf *info)
{
int basic_rates = wfx_rate_mask_to_hw(wvif->wdev, info->basic_rates);
+ struct ieee80211_sta *sta = NULL;
struct hif_mib_set_association_mode val = {
.preambtype_use = 1,
.mode = 1,
@@ -204,12 +204,17 @@ static inline int hif_set_association_mode(struct wfx_vif *wvif,
.basic_rate_set = cpu_to_le32(basic_rates)
};
+ rcu_read_lock(); // protect sta
+ if (info->bssid && !info->ibss_joined)
+ sta = ieee80211_find_sta(wvif->vif, info->bssid);
+
// FIXME: it is strange to not retrieve all information from bss_info
- if (ht_cap && ht_cap->ht_supported) {
- val.mpdu_start_spacing = ht_cap->ampdu_density;
+ if (sta && sta->ht_cap.ht_supported) {
+ val.mpdu_start_spacing = sta->ht_cap.ampdu_density;
if (!(info->ht_operation_mode & IEEE80211_HT_OP_MODE_NON_GF_STA_PRSNT))
- val.greenfield = !!(ht_cap->cap & IEEE80211_HT_CAP_GRN_FLD);
+ val.greenfield = !!(sta->ht_cap.cap & IEEE80211_HT_CAP_GRN_FLD);
}
+ rcu_read_unlock();
return hif_write_mib(wvif->wdev, wvif->id,
HIF_MIB_ID_SET_ASSOCIATION_MODE, &val, sizeof(val));
diff --git a/drivers/staging/wfx/sta.c b/drivers/staging/wfx/sta.c
index 03d0f224ffdb..af4f4bbd0572 100644
--- a/drivers/staging/wfx/sta.c
+++ b/drivers/staging/wfx/sta.c
@@ -491,9 +491,11 @@ static void wfx_set_mfp(struct wfx_vif *wvif,
static void wfx_do_join(struct wfx_vif *wvif)
{
int ret;
- const u8 *ssidie;
struct ieee80211_bss_conf *conf = &wvif->vif->bss_conf;
struct cfg80211_bss *bss = NULL;
+ u8 ssid[IEEE80211_MAX_SSID_LEN];
+ const u8 *ssidie = NULL;
+ int ssidlen = 0;
wfx_tx_lock_flush(wvif->wdev);
@@ -514,11 +516,14 @@ static void wfx_do_join(struct wfx_vif *wvif)
if (!wvif->beacon_int)
wvif->beacon_int = 1;
- rcu_read_lock();
+ rcu_read_lock(); // protect ssidie
if (!conf->ibss_joined)
ssidie = ieee80211_bss_get_ie(bss, WLAN_EID_SSID);
- else
- ssidie = NULL;
+ if (ssidie) {
+ ssidlen = ssidie[1];
+ memcpy(ssid, &ssidie[2], ssidie[1]);
+ }
+ rcu_read_unlock();
wfx_tx_flush(wvif->wdev);
@@ -527,10 +532,8 @@ static void wfx_do_join(struct wfx_vif *wvif)
wfx_set_mfp(wvif, bss);
- /* Perform actual join */
wvif->wdev->tx_burst_idx = -1;
- ret = hif_join(wvif, conf, wvif->channel, ssidie);
- rcu_read_unlock();
+ ret = hif_join(wvif, conf, wvif->channel, ssid, ssidlen);
if (ret) {
ieee80211_connection_loss(wvif->vif);
wvif->join_complete_status = -1;
@@ -605,7 +608,9 @@ int wfx_sta_remove(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
int i;
for (i = 0; i < ARRAY_SIZE(sta_priv->buffered); i++)
- WARN(sta_priv->buffered[i], "release station while Tx is in progress");
+ if (sta_priv->buffered[i])
+ dev_warn(wvif->wdev->dev, "release station while %d pending frame on queue %d",
+ sta_priv->buffered[i], i);
// FIXME: see note in wfx_sta_add()
if (vif->type == NL80211_IFTYPE_STATION)
return 0;
@@ -689,6 +694,7 @@ static void wfx_join_finalize(struct wfx_vif *wvif,
wfx_rate_mask_to_hw(wvif->wdev, sta->supp_rates[wvif->channel->band]);
else
wvif->bss_params.operational_rate_set = -1;
+ rcu_read_unlock();
if (sta &&
info->ht_operation_mode & IEEE80211_HT_OP_MODE_NON_GF_STA_PRSNT)
hif_dual_cts_protection(wvif, true);
@@ -701,8 +707,7 @@ static void wfx_join_finalize(struct wfx_vif *wvif,
wvif->bss_params.beacon_lost_count = 20;
wvif->bss_params.aid = info->aid;
- hif_set_association_mode(wvif, info, sta ? &sta->ht_cap : NULL);
- rcu_read_unlock();
+ hif_set_association_mode(wvif, info);
if (!info->ibss_joined) {
hif_keep_alive_period(wvif, 30 /* sec */);
diff --git a/drivers/tee/amdtee/core.c b/drivers/tee/amdtee/core.c
index 0026eb6f13ce..27b4cd77d0db 100644
--- a/drivers/tee/amdtee/core.c
+++ b/drivers/tee/amdtee/core.c
@@ -139,6 +139,9 @@ static struct amdtee_session *find_session(struct amdtee_context_data *ctxdata,
u32 index = get_session_index(session);
struct amdtee_session *sess;
+ if (index >= TEE_NUM_SESSIONS)
+ return NULL;
+
list_for_each_entry(sess, &ctxdata->sess_list, list_node)
if (ta_handle == sess->ta_handle &&
test_bit(index, sess->sess_mask))
diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c
index 7d6ecc342508..a2ce99051c51 100644
--- a/drivers/thunderbolt/switch.c
+++ b/drivers/thunderbolt/switch.c
@@ -954,7 +954,7 @@ static bool tb_port_is_width_supported(struct tb_port *port, int width)
ret = tb_port_read(port, &phy, TB_CFG_PORT,
port->cap_phy + LANE_ADP_CS_0, 1);
if (ret)
- return ret;
+ return false;
widths = (phy & LANE_ADP_CS_0_SUPPORTED_WIDTH_MASK) >>
LANE_ADP_CS_0_SUPPORTED_WIDTH_SHIFT;
diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index a1453fe10862..5a6f36b391d9 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -1589,9 +1589,7 @@ void tty_kclose(struct tty_struct *tty)
tty_debug_hangup(tty, "freeing structure\n");
/*
* The release_tty function takes care of the details of clearing
- * the slots and preserving the termios structure. The tty_unlock_pair
- * should be safe as we keep a kref while the tty is locked (so the
- * unlock never unlocks a freed tty).
+ * the slots and preserving the termios structure.
*/
mutex_lock(&tty_mutex);
tty_port_set_kopened(tty->port, 0);
@@ -1621,9 +1619,7 @@ void tty_release_struct(struct tty_struct *tty, int idx)
tty_debug_hangup(tty, "freeing structure\n");
/*
* The release_tty function takes care of the details of clearing
- * the slots and preserving the termios structure. The tty_unlock_pair
- * should be safe as we keep a kref while the tty is locked (so the
- * unlock never unlocks a freed tty).
+ * the slots and preserving the termios structure.
*/
mutex_lock(&tty_mutex);
release_tty(tty, idx);
@@ -2734,9 +2730,11 @@ static int compat_tty_tiocgserial(struct tty_struct *tty,
struct serial_struct32 v32;
struct serial_struct v;
int err;
- memset(&v, 0, sizeof(struct serial_struct));
- if (!tty->ops->set_serial)
+ memset(&v, 0, sizeof(v));
+ memset(&v32, 0, sizeof(v32));
+
+ if (!tty->ops->get_serial)
return -ENOTTY;
err = tty->ops->get_serial(tty, &v);
if (!err) {
diff --git a/drivers/usb/chipidea/udc.c b/drivers/usb/chipidea/udc.c
index ffaf46f5d062..4c4ac30db498 100644
--- a/drivers/usb/chipidea/udc.c
+++ b/drivers/usb/chipidea/udc.c
@@ -1530,18 +1530,19 @@ static const struct usb_ep_ops usb_ep_ops = {
static void ci_hdrc_gadget_connect(struct usb_gadget *_gadget, int is_active)
{
struct ci_hdrc *ci = container_of(_gadget, struct ci_hdrc, gadget);
- unsigned long flags;
if (is_active) {
pm_runtime_get_sync(&_gadget->dev);
hw_device_reset(ci);
- spin_lock_irqsave(&ci->lock, flags);
+ spin_lock_irq(&ci->lock);
if (ci->driver) {
hw_device_state(ci, ci->ep0out->qh.dma);
usb_gadget_set_state(_gadget, USB_STATE_POWERED);
+ spin_unlock_irq(&ci->lock);
usb_udc_vbus_handler(_gadget, true);
+ } else {
+ spin_unlock_irq(&ci->lock);
}
- spin_unlock_irqrestore(&ci->lock, flags);
} else {
usb_udc_vbus_handler(_gadget, false);
if (ci->driver)
diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index 62f4fb9b362f..47f09a6ce7bd 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -896,10 +896,10 @@ static int get_serial_info(struct tty_struct *tty, struct serial_struct *ss)
ss->xmit_fifo_size = acm->writesize;
ss->baud_base = le32_to_cpu(acm->line.dwDTERate);
- ss->close_delay = acm->port.close_delay / 10;
+ ss->close_delay = jiffies_to_msecs(acm->port.close_delay) / 10;
ss->closing_wait = acm->port.closing_wait == ASYNC_CLOSING_WAIT_NONE ?
ASYNC_CLOSING_WAIT_NONE :
- acm->port.closing_wait / 10;
+ jiffies_to_msecs(acm->port.closing_wait) / 10;
return 0;
}
@@ -907,24 +907,32 @@ static int set_serial_info(struct tty_struct *tty, struct serial_struct *ss)
{
struct acm *acm = tty->driver_data;
unsigned int closing_wait, close_delay;
+ unsigned int old_closing_wait, old_close_delay;
int retval = 0;
- close_delay = ss->close_delay * 10;
+ close_delay = msecs_to_jiffies(ss->close_delay * 10);
closing_wait = ss->closing_wait == ASYNC_CLOSING_WAIT_NONE ?
- ASYNC_CLOSING_WAIT_NONE : ss->closing_wait * 10;
+ ASYNC_CLOSING_WAIT_NONE :
+ msecs_to_jiffies(ss->closing_wait * 10);
+
+ /* we must redo the rounding here, so that the values match */
+ old_close_delay = jiffies_to_msecs(acm->port.close_delay) / 10;
+ old_closing_wait = acm->port.closing_wait == ASYNC_CLOSING_WAIT_NONE ?
+ ASYNC_CLOSING_WAIT_NONE :
+ jiffies_to_msecs(acm->port.closing_wait) / 10;
mutex_lock(&acm->port.mutex);
- if (!capable(CAP_SYS_ADMIN)) {
- if ((close_delay != acm->port.close_delay) ||
- (closing_wait != acm->port.closing_wait))
+ if ((ss->close_delay != old_close_delay) ||
+ (ss->closing_wait != old_closing_wait)) {
+ if (!capable(CAP_SYS_ADMIN))
retval = -EPERM;
- else
- retval = -EOPNOTSUPP;
- } else {
- acm->port.close_delay = close_delay;
- acm->port.closing_wait = closing_wait;
- }
+ else {
+ acm->port.close_delay = close_delay;
+ acm->port.closing_wait = closing_wait;
+ }
+ } else
+ retval = -EOPNOTSUPP;
mutex_unlock(&acm->port.mutex);
return retval;
diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
index 2dac3e7cdd97..da30b5664ff3 100644
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -378,6 +378,12 @@ static const struct usb_device_id usb_quirk_list[] = {
{ USB_DEVICE(0x0b05, 0x17e0), .driver_info =
USB_QUIRK_IGNORE_REMOTE_WAKEUP },
+ /* Realtek hub in Dell WD19 (Type-C) */
+ { USB_DEVICE(0x0bda, 0x0487), .driver_info = USB_QUIRK_NO_LPM },
+
+ /* Generic RTL8153 based ethernet adapters */
+ { USB_DEVICE(0x0bda, 0x8153), .driver_info = USB_QUIRK_NO_LPM },
+
/* Action Semiconductor flash disk */
{ USB_DEVICE(0x10d6, 0x2200), .driver_info =
USB_QUIRK_STRING_FETCH_255 },
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index 5e9b537df631..1fddc41fa1f3 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -136,7 +136,8 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
xhci->quirks |= XHCI_AMD_PLL_FIX;
if (pdev->vendor == PCI_VENDOR_ID_AMD &&
- (pdev->device == 0x15e0 ||
+ (pdev->device == 0x145c ||
+ pdev->device == 0x15e0 ||
pdev->device == 0x15e1 ||
pdev->device == 0x43bb))
xhci->quirks |= XHCI_SUSPEND_DELAY;
diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c
index d90cd5ec09cf..315b4552693c 100644
--- a/drivers/usb/host/xhci-plat.c
+++ b/drivers/usb/host/xhci-plat.c
@@ -445,6 +445,7 @@ MODULE_DEVICE_TABLE(acpi, usb_xhci_acpi_match);
static struct platform_driver usb_xhci_driver = {
.probe = xhci_plat_probe,
.remove = xhci_plat_remove,
+ .shutdown = usb_hcd_platform_shutdown,
.driver = {
.name = "xhci-hcd",
.pm = &xhci_plat_pm_ops,
diff --git a/drivers/usb/host/xhci-trace.h b/drivers/usb/host/xhci-trace.h
index 56eb867803a6..b19582b2a72c 100644
--- a/drivers/usb/host/xhci-trace.h
+++ b/drivers/usb/host/xhci-trace.h
@@ -289,23 +289,12 @@ DECLARE_EVENT_CLASS(xhci_log_urb,
),
TP_printk("ep%d%s-%s: urb %p pipe %u slot %d length %d/%d sgs %d/%d stream %d flags %08x",
__entry->epnum, __entry->dir_in ? "in" : "out",
- ({ char *s;
- switch (__entry->type) {
- case USB_ENDPOINT_XFER_INT:
- s = "intr";
- break;
- case USB_ENDPOINT_XFER_CONTROL:
- s = "control";
- break;
- case USB_ENDPOINT_XFER_BULK:
- s = "bulk";
- break;
- case USB_ENDPOINT_XFER_ISOC:
- s = "isoc";
- break;
- default:
- s = "UNKNOWN";
- } s; }), __entry->urb, __entry->pipe, __entry->slot_id,
+ __print_symbolic(__entry->type,
+ { USB_ENDPOINT_XFER_INT, "intr" },
+ { USB_ENDPOINT_XFER_CONTROL, "control" },
+ { USB_ENDPOINT_XFER_BULK, "bulk" },
+ { USB_ENDPOINT_XFER_ISOC, "isoc" }),
+ __entry->urb, __entry->pipe, __entry->slot_id,
__entry->actual, __entry->length, __entry->num_mapped_sgs,
__entry->num_sgs, __entry->stream, __entry->flags
)
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 084cc2fff3ae..0b5dcf973d94 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -1183,6 +1183,8 @@ static const struct usb_device_id option_ids[] = {
.driver_info = NCTRL(0) },
{ USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x110a, 0xff), /* Telit ME910G1 */
.driver_info = NCTRL(0) | RSVD(3) },
+ { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x110b, 0xff), /* Telit ME910G1 (ECM) */
+ .driver_info = NCTRL(0) },
{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910),
.driver_info = NCTRL(0) | RSVD(1) | RSVD(2) },
{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910_USBCFG4),
diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c
index aab737e1e7b6..c5a2995dfa2e 100644
--- a/drivers/usb/serial/pl2303.c
+++ b/drivers/usb/serial/pl2303.c
@@ -99,6 +99,7 @@ static const struct usb_device_id id_table[] = {
{ USB_DEVICE(SUPERIAL_VENDOR_ID, SUPERIAL_PRODUCT_ID) },
{ USB_DEVICE(HP_VENDOR_ID, HP_LD220_PRODUCT_ID) },
{ USB_DEVICE(HP_VENDOR_ID, HP_LD220TA_PRODUCT_ID) },
+ { USB_DEVICE(HP_VENDOR_ID, HP_LD381_PRODUCT_ID) },
{ USB_DEVICE(HP_VENDOR_ID, HP_LD960_PRODUCT_ID) },
{ USB_DEVICE(HP_VENDOR_ID, HP_LD960TA_PRODUCT_ID) },
{ USB_DEVICE(HP_VENDOR_ID, HP_LCM220_PRODUCT_ID) },
diff --git a/drivers/usb/serial/pl2303.h b/drivers/usb/serial/pl2303.h
index a019ea7e6e0e..52db5519aaf0 100644
--- a/drivers/usb/serial/pl2303.h
+++ b/drivers/usb/serial/pl2303.h
@@ -130,6 +130,7 @@
#define HP_LM920_PRODUCT_ID 0x026b
#define HP_TD620_PRODUCT_ID 0x0956
#define HP_LD960_PRODUCT_ID 0x0b39
+#define HP_LD381_PRODUCT_ID 0x0f7f
#define HP_LCM220_PRODUCT_ID 0x3139
#define HP_LCM960_PRODUCT_ID 0x3239
#define HP_LD220_PRODUCT_ID 0x3524
diff --git a/drivers/usb/typec/ucsi/displayport.c b/drivers/usb/typec/ucsi/displayport.c
index 0f1273ae086c..048381c058a5 100644
--- a/drivers/usb/typec/ucsi/displayport.c
+++ b/drivers/usb/typec/ucsi/displayport.c
@@ -271,6 +271,9 @@ void ucsi_displayport_remove_partner(struct typec_altmode *alt)
return;
dp = typec_altmode_get_drvdata(alt);
+ if (!dp)
+ return;
+
dp->data.conf = 0;
dp->data.status = 0;
dp->initialized = false;
@@ -285,6 +288,8 @@ struct typec_altmode *ucsi_register_displayport(struct ucsi_connector *con,
struct typec_altmode *alt;
struct ucsi_dp *dp;
+ mutex_lock(&con->lock);
+
/* We can't rely on the firmware with the capabilities. */
desc->vdo |= DP_CAP_DP_SIGNALING | DP_CAP_RECEPTACLE;
@@ -293,12 +298,15 @@ struct typec_altmode *ucsi_register_displayport(struct ucsi_connector *con,
desc->vdo |= all_assignments << 16;
alt = typec_port_register_altmode(con->port, desc);
- if (IS_ERR(alt))
+ if (IS_ERR(alt)) {
+ mutex_unlock(&con->lock);
return alt;
+ }
dp = devm_kzalloc(&alt->dev, sizeof(*dp), GFP_KERNEL);
if (!dp) {
typec_unregister_altmode(alt);
+ mutex_unlock(&con->lock);
return ERR_PTR(-ENOMEM);
}
@@ -311,5 +319,7 @@ struct typec_altmode *ucsi_register_displayport(struct ucsi_connector *con,
alt->ops = &ucsi_displayport_ops;
typec_altmode_set_drvdata(alt, dp);
+ mutex_unlock(&con->lock);
+
return alt;
}
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 7bfe365d9372..341458fd95ca 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -959,8 +959,8 @@ out_iput:
iput(vb->vb_dev_info.inode);
out_kern_unmount:
kern_unmount(balloon_mnt);
-#endif
out_del_vqs:
+#endif
vdev->config->del_vqs(vdev);
out_free_vb:
kfree(vb);
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 867c7ebd3f10..58b96baa8d48 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -2203,10 +2203,10 @@ void vring_del_virtqueue(struct virtqueue *_vq)
vq->split.queue_size_in_bytes,
vq->split.vring.desc,
vq->split.queue_dma_addr);
-
- kfree(vq->split.desc_state);
}
}
+ if (!vq->packed_ring)
+ kfree(vq->split.desc_state);
list_del(&_vq->list);
kfree(vq);
}
diff --git a/drivers/watchdog/iTCO_vendor.h b/drivers/watchdog/iTCO_vendor.h
index 0f7373ba10d5..69e92e692ae0 100644
--- a/drivers/watchdog/iTCO_vendor.h
+++ b/drivers/watchdog/iTCO_vendor.h
@@ -1,10 +1,12 @@
/* SPDX-License-Identifier: GPL-2.0 */
/* iTCO Vendor Specific Support hooks */
#ifdef CONFIG_ITCO_VENDOR_SUPPORT
+extern int iTCO_vendorsupport;
extern void iTCO_vendor_pre_start(struct resource *, unsigned int);
extern void iTCO_vendor_pre_stop(struct resource *);
extern int iTCO_vendor_check_noreboot_on(void);
#else
+#define iTCO_vendorsupport 0
#define iTCO_vendor_pre_start(acpibase, heartbeat) {}
#define iTCO_vendor_pre_stop(acpibase) {}
#define iTCO_vendor_check_noreboot_on() 1
diff --git a/drivers/watchdog/iTCO_vendor_support.c b/drivers/watchdog/iTCO_vendor_support.c
index 4f1b96f59349..cf0eaa04b064 100644
--- a/drivers/watchdog/iTCO_vendor_support.c
+++ b/drivers/watchdog/iTCO_vendor_support.c
@@ -39,8 +39,10 @@
/* Broken BIOS */
#define BROKEN_BIOS 911
-static int vendorsupport;
-module_param(vendorsupport, int, 0);
+int iTCO_vendorsupport;
+EXPORT_SYMBOL(iTCO_vendorsupport);
+
+module_param_named(vendorsupport, iTCO_vendorsupport, int, 0);
MODULE_PARM_DESC(vendorsupport, "iTCO vendor specific support mode, default="
"0 (none), 1=SuperMicro Pent3, 911=Broken SMI BIOS");
@@ -152,7 +154,7 @@ static void broken_bios_stop(struct resource *smires)
void iTCO_vendor_pre_start(struct resource *smires,
unsigned int heartbeat)
{
- switch (vendorsupport) {
+ switch (iTCO_vendorsupport) {
case SUPERMICRO_OLD_BOARD:
supermicro_old_pre_start(smires);
break;
@@ -165,7 +167,7 @@ EXPORT_SYMBOL(iTCO_vendor_pre_start);
void iTCO_vendor_pre_stop(struct resource *smires)
{
- switch (vendorsupport) {
+ switch (iTCO_vendorsupport) {
case SUPERMICRO_OLD_BOARD:
supermicro_old_pre_stop(smires);
break;
@@ -178,7 +180,7 @@ EXPORT_SYMBOL(iTCO_vendor_pre_stop);
int iTCO_vendor_check_noreboot_on(void)
{
- switch (vendorsupport) {
+ switch (iTCO_vendorsupport) {
case SUPERMICRO_OLD_BOARD:
return 0;
default:
@@ -189,13 +191,13 @@ EXPORT_SYMBOL(iTCO_vendor_check_noreboot_on);
static int __init iTCO_vendor_init_module(void)
{
- if (vendorsupport == SUPERMICRO_NEW_BOARD) {
+ if (iTCO_vendorsupport == SUPERMICRO_NEW_BOARD) {
pr_warn("Option vendorsupport=%d is no longer supported, "
"please use the w83627hf_wdt driver instead\n",
SUPERMICRO_NEW_BOARD);
return -EINVAL;
}
- pr_info("vendor-support=%d\n", vendorsupport);
+ pr_info("vendor-support=%d\n", iTCO_vendorsupport);
return 0;
}
diff --git a/drivers/watchdog/iTCO_wdt.c b/drivers/watchdog/iTCO_wdt.c
index 156360e37714..e707c4797f76 100644
--- a/drivers/watchdog/iTCO_wdt.c
+++ b/drivers/watchdog/iTCO_wdt.c
@@ -459,13 +459,25 @@ static int iTCO_wdt_probe(struct platform_device *pdev)
if (!p->tco_res)
return -ENODEV;
- p->smi_res = platform_get_resource(pdev, IORESOURCE_IO, ICH_RES_IO_SMI);
- if (!p->smi_res)
- return -ENODEV;
-
p->iTCO_version = pdata->version;
p->pci_dev = to_pci_dev(dev->parent);
+ p->smi_res = platform_get_resource(pdev, IORESOURCE_IO, ICH_RES_IO_SMI);
+ if (p->smi_res) {
+ /* The TCO logic uses the TCO_EN bit in the SMI_EN register */
+ if (!devm_request_region(dev, p->smi_res->start,
+ resource_size(p->smi_res),
+ pdev->name)) {
+ pr_err("I/O address 0x%04llx already in use, device disabled\n",
+ (u64)SMI_EN(p));
+ return -EBUSY;
+ }
+ } else if (iTCO_vendorsupport ||
+ turn_SMI_watchdog_clear_off >= p->iTCO_version) {
+ pr_err("SMI I/O resource is missing\n");
+ return -ENODEV;
+ }
+
iTCO_wdt_no_reboot_bit_setup(p, pdata);
/*
@@ -492,14 +504,6 @@ static int iTCO_wdt_probe(struct platform_device *pdev)
/* Set the NO_REBOOT bit to prevent later reboots, just for sure */
p->update_no_reboot_bit(p->no_reboot_priv, true);
- /* The TCO logic uses the TCO_EN bit in the SMI_EN register */
- if (!devm_request_region(dev, p->smi_res->start,
- resource_size(p->smi_res),
- pdev->name)) {
- pr_err("I/O address 0x%04llx already in use, device disabled\n",
- (u64)SMI_EN(p));
- return -EBUSY;
- }
if (turn_SMI_watchdog_clear_off >= p->iTCO_version) {
/*
* Bit 13: TCO_EN -> 0
diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c
index df415c05939e..de1ae0bead3b 100644
--- a/fs/afs/addr_list.c
+++ b/fs/afs/addr_list.c
@@ -19,7 +19,7 @@
void afs_put_addrlist(struct afs_addr_list *alist)
{
if (alist && refcount_dec_and_test(&alist->usage))
- call_rcu(&alist->rcu, (rcu_callback_t)kfree);
+ kfree_rcu(alist, rcu);
}
/*
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index ff3994a6be23..6765949b3aab 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -244,6 +244,17 @@ static void afs_cm_destructor(struct afs_call *call)
}
/*
+ * Abort a service call from within an action function.
+ */
+static void afs_abort_service_call(struct afs_call *call, u32 abort_code, int error,
+ const char *why)
+{
+ rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
+ abort_code, error, why);
+ afs_set_call_complete(call, error, 0);
+}
+
+/*
* The server supplied a list of callbacks that it wanted to break.
*/
static void SRXAFSCB_CallBack(struct work_struct *work)
@@ -510,8 +521,7 @@ static void SRXAFSCB_ProbeUuid(struct work_struct *work)
if (memcmp(r, &call->net->uuid, sizeof(call->net->uuid)) == 0)
afs_send_empty_reply(call);
else
- rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
- 1, 1, "K-1");
+ afs_abort_service_call(call, 1, 1, "K-1");
afs_put_call(call);
_leave("");
diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c
index cfe62b154f68..e1b9ed679045 100644
--- a/fs/afs/fs_probe.c
+++ b/fs/afs/fs_probe.c
@@ -145,6 +145,7 @@ static int afs_do_probe_fileserver(struct afs_net *net,
read_lock(&server->fs_lock);
ac.alist = rcu_dereference_protected(server->addresses,
lockdep_is_held(&server->fs_lock));
+ afs_get_addrlist(ac.alist);
read_unlock(&server->fs_lock);
atomic_set(&server->probe_outstanding, ac.alist->nr_addrs);
@@ -163,6 +164,7 @@ static int afs_do_probe_fileserver(struct afs_net *net,
if (!in_progress)
afs_fs_probe_done(server);
+ afs_put_addrlist(ac.alist);
return in_progress;
}
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 1d81fc4c3058..ef732dd4e7ef 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -81,7 +81,7 @@ enum afs_call_state {
* List of server addresses.
*/
struct afs_addr_list {
- struct rcu_head rcu; /* Must be first */
+ struct rcu_head rcu;
refcount_t usage;
u32 version; /* Version */
unsigned char max_addrs;
@@ -154,7 +154,7 @@ struct afs_call {
};
unsigned char unmarshall; /* unmarshalling phase */
unsigned char addr_ix; /* Address in ->alist */
- bool incoming; /* T if incoming call */
+ bool drop_ref; /* T if need to drop ref for incoming call */
bool send_pages; /* T if data from mapping should be sent */
bool need_attention; /* T if RxRPC poked us */
bool async; /* T if asynchronous */
@@ -1209,8 +1209,16 @@ static inline void afs_set_call_complete(struct afs_call *call,
ok = true;
}
spin_unlock_bh(&call->state_lock);
- if (ok)
+ if (ok) {
trace_afs_call_done(call);
+
+ /* Asynchronous calls have two refs to release - one from the alloc and
+ * one queued with the work item - and we can't just deallocate the
+ * call because the work item may be queued again.
+ */
+ if (call->drop_ref)
+ afs_put_call(call);
+ }
}
/*
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 58d396592250..1ecc67da6c1a 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -18,7 +18,6 @@ struct workqueue_struct *afs_async_calls;
static void afs_wake_up_call_waiter(struct sock *, struct rxrpc_call *, unsigned long);
static void afs_wake_up_async_call(struct sock *, struct rxrpc_call *, unsigned long);
-static void afs_delete_async_call(struct work_struct *);
static void afs_process_async_call(struct work_struct *);
static void afs_rx_new_call(struct sock *, struct rxrpc_call *, unsigned long);
static void afs_rx_discard_new_call(struct rxrpc_call *, unsigned long);
@@ -169,7 +168,7 @@ void afs_put_call(struct afs_call *call)
int n = atomic_dec_return(&call->usage);
int o = atomic_read(&net->nr_outstanding_calls);
- trace_afs_call(call, afs_call_trace_put, n + 1, o,
+ trace_afs_call(call, afs_call_trace_put, n, o,
__builtin_return_address(0));
ASSERTCMP(n, >=, 0);
@@ -402,8 +401,10 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp)
/* If the call is going to be asynchronous, we need an extra ref for
* the call to hold itself so the caller need not hang on to its ref.
*/
- if (call->async)
+ if (call->async) {
afs_get_call(call, afs_call_trace_get);
+ call->drop_ref = true;
+ }
/* create a call */
rxcall = rxrpc_kernel_begin_call(call->net->socket, srx, call->key,
@@ -413,7 +414,8 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp)
afs_wake_up_async_call :
afs_wake_up_call_waiter),
call->upgrade,
- call->intr,
+ (call->intr ? RXRPC_PREINTERRUPTIBLE :
+ RXRPC_UNINTERRUPTIBLE),
call->debug_id);
if (IS_ERR(rxcall)) {
ret = PTR_ERR(rxcall);
@@ -584,8 +586,6 @@ static void afs_deliver_to_call(struct afs_call *call)
done:
if (call->type->done)
call->type->done(call);
- if (state == AFS_CALL_COMPLETE && call->incoming)
- afs_put_call(call);
out:
_leave("");
return;
@@ -604,11 +604,7 @@ call_complete:
long afs_wait_for_call_to_complete(struct afs_call *call,
struct afs_addr_cursor *ac)
{
- signed long rtt2, timeout;
long ret;
- bool stalled = false;
- u64 rtt;
- u32 life, last_life;
bool rxrpc_complete = false;
DECLARE_WAITQUEUE(myself, current);
@@ -619,14 +615,6 @@ long afs_wait_for_call_to_complete(struct afs_call *call,
if (ret < 0)
goto out;
- rtt = rxrpc_kernel_get_rtt(call->net->socket, call->rxcall);
- rtt2 = nsecs_to_jiffies64(rtt) * 2;
- if (rtt2 < 2)
- rtt2 = 2;
-
- timeout = rtt2;
- rxrpc_kernel_check_life(call->net->socket, call->rxcall, &last_life);
-
add_wait_queue(&call->waitq, &myself);
for (;;) {
set_current_state(TASK_UNINTERRUPTIBLE);
@@ -637,37 +625,19 @@ long afs_wait_for_call_to_complete(struct afs_call *call,
call->need_attention = false;
__set_current_state(TASK_RUNNING);
afs_deliver_to_call(call);
- timeout = rtt2;
continue;
}
if (afs_check_call_state(call, AFS_CALL_COMPLETE))
break;
- if (!rxrpc_kernel_check_life(call->net->socket, call->rxcall, &life)) {
+ if (!rxrpc_kernel_check_life(call->net->socket, call->rxcall)) {
/* rxrpc terminated the call. */
rxrpc_complete = true;
break;
}
- if (call->intr && timeout == 0 &&
- life == last_life && signal_pending(current)) {
- if (stalled)
- break;
- __set_current_state(TASK_RUNNING);
- rxrpc_kernel_probe_life(call->net->socket, call->rxcall);
- timeout = rtt2;
- stalled = true;
- continue;
- }
-
- if (life != last_life) {
- timeout = rtt2;
- last_life = life;
- stalled = false;
- }
-
- timeout = schedule_timeout(timeout);
+ schedule();
}
remove_wait_queue(&call->waitq, &myself);
@@ -735,7 +705,7 @@ static void afs_wake_up_async_call(struct sock *sk, struct rxrpc_call *rxcall,
u = atomic_fetch_add_unless(&call->usage, 1, 0);
if (u != 0) {
- trace_afs_call(call, afs_call_trace_wake, u,
+ trace_afs_call(call, afs_call_trace_wake, u + 1,
atomic_read(&call->net->nr_outstanding_calls),
__builtin_return_address(0));
@@ -745,21 +715,6 @@ static void afs_wake_up_async_call(struct sock *sk, struct rxrpc_call *rxcall,
}
/*
- * Delete an asynchronous call. The work item carries a ref to the call struct
- * that we need to release.
- */
-static void afs_delete_async_call(struct work_struct *work)
-{
- struct afs_call *call = container_of(work, struct afs_call, async_work);
-
- _enter("");
-
- afs_put_call(call);
-
- _leave("");
-}
-
-/*
* Perform I/O processing on an asynchronous call. The work item carries a ref
* to the call struct that we either need to release or to pass on.
*/
@@ -774,16 +729,6 @@ static void afs_process_async_call(struct work_struct *work)
afs_deliver_to_call(call);
}
- if (call->state == AFS_CALL_COMPLETE) {
- /* We have two refs to release - one from the alloc and one
- * queued with the work item - and we can't just deallocate the
- * call because the work item may be queued again.
- */
- call->async_work.func = afs_delete_async_call;
- if (!queue_work(afs_async_calls, &call->async_work))
- afs_put_call(call);
- }
-
afs_put_call(call);
_leave("");
}
@@ -810,6 +755,7 @@ void afs_charge_preallocation(struct work_struct *work)
if (!call)
break;
+ call->drop_ref = true;
call->async = true;
call->state = AFS_CALL_SV_AWAIT_OP_ID;
init_waitqueue_head(&call->waitq);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 69bf2fb6f7cd..9501880dff5e 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1520,10 +1520,22 @@ rescan:
if (ret)
return ret;
- if (invalidate)
- set_capacity(disk, 0);
- else if (disk->fops->revalidate_disk)
- disk->fops->revalidate_disk(disk);
+ /*
+ * Historically we only set the capacity to zero for devices that
+ * support partitions (independ of actually having partitions created).
+ * Doing that is rather inconsistent, but changing it broke legacy
+ * udisks polling for legacy ide-cdrom devices. Use the crude check
+ * below to get the sane behavior for most device while not breaking
+ * userspace for this particular setup.
+ */
+ if (invalidate) {
+ if (disk_part_scan_enabled(disk) ||
+ !(disk->flags & GENHD_FL_REMOVABLE))
+ set_capacity(disk, 0);
+ } else {
+ if (disk->fops->revalidate_disk)
+ disk->fops->revalidate_disk(disk);
+ }
check_disk_size_change(disk, bdev, !invalidate);
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 404e050ce8ee..7f09147872dc 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -856,9 +856,9 @@ static void clear_incompat_bg_bits(struct btrfs_fs_info *fs_info, u64 flags)
found_raid1c34 = true;
up_read(&sinfo->groups_sem);
}
- if (found_raid56)
+ if (!found_raid56)
btrfs_clear_fs_incompat(fs_info, RAID56);
- if (found_raid1c34)
+ if (!found_raid1c34)
btrfs_clear_fs_incompat(fs_info, RAID1C34);
}
}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 27076ebadb36..d267eb5caa7b 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -9496,6 +9496,10 @@ out_fail:
ret = btrfs_sync_log(trans, BTRFS_I(old_inode)->root, &ctx);
if (ret)
commit_transaction = true;
+ } else if (sync_log) {
+ mutex_lock(&root->log_mutex);
+ list_del(&ctx.list);
+ mutex_unlock(&root->log_mutex);
}
if (commit_transaction) {
ret = btrfs_commit_transaction(trans);
diff --git a/fs/buffer.c b/fs/buffer.c
index b8d28370cfd7..3f5758e01e40 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -3019,49 +3019,6 @@ static void end_bio_bh_io_sync(struct bio *bio)
bio_put(bio);
}
-/*
- * This allows us to do IO even on the odd last sectors
- * of a device, even if the block size is some multiple
- * of the physical sector size.
- *
- * We'll just truncate the bio to the size of the device,
- * and clear the end of the buffer head manually.
- *
- * Truly out-of-range accesses will turn into actual IO
- * errors, this only handles the "we need to be able to
- * do IO at the final sector" case.
- */
-void guard_bio_eod(struct bio *bio)
-{
- sector_t maxsector;
- struct hd_struct *part;
-
- rcu_read_lock();
- part = __disk_get_part(bio->bi_disk, bio->bi_partno);
- if (part)
- maxsector = part_nr_sects_read(part);
- else
- maxsector = get_capacity(bio->bi_disk);
- rcu_read_unlock();
-
- if (!maxsector)
- return;
-
- /*
- * If the *whole* IO is past the end of the device,
- * let it through, and the IO layer will turn it into
- * an EIO.
- */
- if (unlikely(bio->bi_iter.bi_sector >= maxsector))
- return;
-
- maxsector -= bio->bi_iter.bi_sector;
- if (likely((bio->bi_iter.bi_size >> 9) <= maxsector))
- return;
-
- bio_truncate(bio, maxsector << 9);
-}
-
static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
enum rw_hint write_hint, struct writeback_control *wbc)
{
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 7e0190b1f821..5a478cd06e11 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1415,10 +1415,13 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
struct inode *inode = file_inode(file);
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+ struct ceph_osd_client *osdc = &fsc->client->osdc;
struct ceph_cap_flush *prealloc_cf;
ssize_t count, written = 0;
int err, want, got;
bool direct_lock = false;
+ u32 map_flags;
+ u64 pool_flags;
loff_t pos;
loff_t limit = max(i_size_read(inode), fsc->max_file_size);
@@ -1481,8 +1484,12 @@ retry_snap:
goto out;
}
- /* FIXME: not complete since it doesn't account for being at quota */
- if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_FULL)) {
+ down_read(&osdc->lock);
+ map_flags = osdc->osdmap->flags;
+ pool_flags = ceph_pg_pool_flags(osdc->osdmap, ci->i_layout.pool_id);
+ up_read(&osdc->lock);
+ if ((map_flags & CEPH_OSDMAP_FULL) ||
+ (pool_flags & CEPH_POOL_FLAG_FULL)) {
err = -ENOSPC;
goto out;
}
@@ -1575,7 +1582,8 @@ retry_snap:
}
if (written >= 0) {
- if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_NEARFULL))
+ if ((map_flags & CEPH_OSDMAP_NEARFULL) ||
+ (pool_flags & CEPH_POOL_FLAG_NEARFULL))
iocb->ki_flags |= IOCB_DSYNC;
written = generic_write_sync(iocb, written);
}
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index ccfcc66aaf44..923be9399b21 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -1155,5 +1155,6 @@ void ceph_cleanup_snapid_map(struct ceph_mds_client *mdsc)
pr_err("snapid map %llx -> %x still in use\n",
sm->snap, sm->dev);
}
+ kfree(sm);
}
}
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 0ef099442f20..36e7b2fd2190 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -555,7 +555,6 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
if (server->ops->close)
server->ops->close(xid, tcon, &fid);
cifs_del_pending_open(&open);
- fput(file);
rc = -ENOMEM;
}
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 3b942ecdd4be..8f9d849a0012 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1169,7 +1169,8 @@ try_again:
rc = posix_lock_file(file, flock, NULL);
up_write(&cinode->lock_sem);
if (rc == FILE_LOCK_DEFERRED) {
- rc = wait_event_interruptible(flock->fl_wait, !flock->fl_blocker);
+ rc = wait_event_interruptible(flock->fl_wait,
+ list_empty(&flock->fl_blocked_member));
if (!rc)
goto try_again;
locks_delete_block(flock);
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 1e8a4b1579db..b16f8d23e97b 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -2191,7 +2191,7 @@ int cifs_getattr(const struct path *path, struct kstat *stat,
if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID))
stat->gid = current_fsgid();
}
- return rc;
+ return 0;
}
int cifs_fiemap(struct inode *inode, struct fiemap_extent_info *fei, u64 start,
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index c31e84ee3c39..cfe9b800ea8c 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -2222,6 +2222,8 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
goto qdf_free;
}
+ atomic_inc(&tcon->num_remote_opens);
+
qd_rsp = (struct smb2_query_directory_rsp *)rsp_iov[1].iov_base;
if (qd_rsp->sync_hdr.Status == STATUS_NO_MORE_FILES) {
trace_smb3_query_dir_done(xid, fid->persistent_fid,
@@ -3417,7 +3419,7 @@ static int smb3_fiemap(struct cifs_tcon *tcon,
if (rc)
goto out;
- if (out_data_len < sizeof(struct file_allocated_range_buffer)) {
+ if (out_data_len && out_data_len < sizeof(struct file_allocated_range_buffer)) {
rc = -EINVAL;
goto out;
}
diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c
index 65cb09fa6ead..08c9f216a54d 100644
--- a/fs/crypto/keysetup.c
+++ b/fs/crypto/keysetup.c
@@ -539,6 +539,15 @@ int fscrypt_drop_inode(struct inode *inode)
mk = ci->ci_master_key->payload.data[0];
/*
+ * With proper, non-racy use of FS_IOC_REMOVE_ENCRYPTION_KEY, all inodes
+ * protected by the key were cleaned by sync_filesystem(). But if
+ * userspace is still using the files, inodes can be dirtied between
+ * then and now. We mustn't lose any writes, so skip dirty inodes here.
+ */
+ if (inode->i_state & I_DIRTY_ALL)
+ return 0;
+
+ /*
* Note: since we aren't holding ->mk_secret_sem, the result here can
* immediately become outdated. But there's no correctness problem with
* unnecessarily evicting. Nor is there a correctness problem with not
diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c
index 5779a15c2cd6..5d2d81940679 100644
--- a/fs/erofs/decompressor.c
+++ b/fs/erofs/decompressor.c
@@ -157,17 +157,27 @@ static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq, u8 *out)
}
}
- ret = LZ4_decompress_safe_partial(src + inputmargin, out,
- inlen, rq->outputsize,
- rq->outputsize);
- if (ret < 0) {
- erofs_err(rq->sb, "failed to decompress, in[%u, %u] out[%u]",
- inlen, inputmargin, rq->outputsize);
+ /* legacy format could compress extra data in a pcluster. */
+ if (rq->partial_decoding || !support_0padding)
+ ret = LZ4_decompress_safe_partial(src + inputmargin, out,
+ inlen, rq->outputsize,
+ rq->outputsize);
+ else
+ ret = LZ4_decompress_safe(src + inputmargin, out,
+ inlen, rq->outputsize);
+
+ if (ret != rq->outputsize) {
+ erofs_err(rq->sb, "failed to decompress %d in[%u, %u] out[%u]",
+ ret, inlen, inputmargin, rq->outputsize);
+
WARN_ON(1);
print_hex_dump(KERN_DEBUG, "[ in]: ", DUMP_PREFIX_OFFSET,
16, 1, src + inputmargin, inlen, true);
print_hex_dump(KERN_DEBUG, "[out]: ", DUMP_PREFIX_OFFSET,
16, 1, out, rq->outputsize, true);
+
+ if (ret >= 0)
+ memset(out + ret, 0, rq->outputsize - ret);
ret = -EIO;
}
diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index c4c6dcdc89ad..5eead7fdc7a6 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -52,8 +52,8 @@ struct erofs_sb_info {
struct list_head list;
struct mutex umount_mutex;
- /* the dedicated workstation for compression */
- struct radix_tree_root workstn_tree;
+ /* managed XArray arranged in physical block number */
+ struct xarray managed_pslots;
/* threshold for decompression synchronously */
unsigned int max_sync_decompress_pages;
@@ -402,8 +402,8 @@ static inline void *erofs_get_pcpubuf(unsigned int pagenr)
int erofs_workgroup_put(struct erofs_workgroup *grp);
struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb,
pgoff_t index);
-int erofs_register_workgroup(struct super_block *sb,
- struct erofs_workgroup *grp);
+struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb,
+ struct erofs_workgroup *grp);
void erofs_workgroup_free_rcu(struct erofs_workgroup *grp);
void erofs_shrinker_register(struct super_block *sb);
void erofs_shrinker_unregister(struct super_block *sb);
diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index 057e6d7b5b7f..b514c67e5fc2 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -425,7 +425,7 @@ static int erofs_fill_super(struct super_block *sb, void *data, int silent)
sb->s_flags &= ~SB_POSIXACL;
#ifdef CONFIG_EROFS_FS_ZIP
- INIT_RADIX_TREE(&sbi->workstn_tree, GFP_ATOMIC);
+ xa_init(&sbi->managed_pslots);
#endif
/* get the root inode */
diff --git a/fs/erofs/utils.c b/fs/erofs/utils.c
index fddc5059c930..52d0be10f1aa 100644
--- a/fs/erofs/utils.c
+++ b/fs/erofs/utils.c
@@ -37,9 +37,6 @@ void *erofs_get_pcpubuf(unsigned int pagenr)
/* global shrink count (for all mounted EROFS instances) */
static atomic_long_t erofs_global_shrink_cnt;
-#define __erofs_workgroup_get(grp) atomic_inc(&(grp)->refcount)
-#define __erofs_workgroup_put(grp) atomic_dec(&(grp)->refcount)
-
static int erofs_workgroup_get(struct erofs_workgroup *grp)
{
int o;
@@ -66,7 +63,7 @@ struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb,
repeat:
rcu_read_lock();
- grp = radix_tree_lookup(&sbi->workstn_tree, index);
+ grp = xa_load(&sbi->managed_pslots, index);
if (grp) {
if (erofs_workgroup_get(grp)) {
/* prefer to relax rcu read side */
@@ -80,43 +77,37 @@ repeat:
return grp;
}
-int erofs_register_workgroup(struct super_block *sb,
- struct erofs_workgroup *grp)
+struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb,
+ struct erofs_workgroup *grp)
{
- struct erofs_sb_info *sbi;
- int err;
-
- /* grp shouldn't be broken or used before */
- if (atomic_read(&grp->refcount) != 1) {
- DBG_BUGON(1);
- return -EINVAL;
- }
-
- err = radix_tree_preload(GFP_NOFS);
- if (err)
- return err;
-
- sbi = EROFS_SB(sb);
- xa_lock(&sbi->workstn_tree);
+ struct erofs_sb_info *const sbi = EROFS_SB(sb);
+ struct erofs_workgroup *pre;
/*
- * Bump up reference count before making this workgroup
- * visible to other users in order to avoid potential UAF
- * without serialized by workstn_lock.
+ * Bump up a reference count before making this visible
+ * to others for the XArray in order to avoid potential
+ * UAF without serialized by xa_lock.
*/
- __erofs_workgroup_get(grp);
-
- err = radix_tree_insert(&sbi->workstn_tree, grp->index, grp);
- if (err)
- /*
- * it's safe to decrease since the workgroup isn't visible
- * and refcount >= 2 (cannot be freezed).
- */
- __erofs_workgroup_put(grp);
+ atomic_inc(&grp->refcount);
- xa_unlock(&sbi->workstn_tree);
- radix_tree_preload_end();
- return err;
+repeat:
+ xa_lock(&sbi->managed_pslots);
+ pre = __xa_cmpxchg(&sbi->managed_pslots, grp->index,
+ NULL, grp, GFP_NOFS);
+ if (pre) {
+ if (xa_is_err(pre)) {
+ pre = ERR_PTR(xa_err(pre));
+ } else if (erofs_workgroup_get(pre)) {
+ /* try to legitimize the current in-tree one */
+ xa_unlock(&sbi->managed_pslots);
+ cond_resched();
+ goto repeat;
+ }
+ atomic_dec(&grp->refcount);
+ grp = pre;
+ }
+ xa_unlock(&sbi->managed_pslots);
+ return grp;
}
static void __erofs_workgroup_free(struct erofs_workgroup *grp)
@@ -155,7 +146,7 @@ static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
/*
* Note that all cached pages should be unattached
- * before deleted from the radix tree. Otherwise some
+ * before deleted from the XArray. Otherwise some
* cached pages could be still attached to the orphan
* old workgroup when the new one is available in the tree.
*/
@@ -169,7 +160,7 @@ static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
* however in order to avoid some race conditions, add a
* DBG_BUGON to observe this in advance.
*/
- DBG_BUGON(radix_tree_delete(&sbi->workstn_tree, grp->index) != grp);
+ DBG_BUGON(xa_erase(&sbi->managed_pslots, grp->index) != grp);
/*
* If managed cache is on, last refcount should indicate
@@ -182,22 +173,11 @@ static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
static unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi,
unsigned long nr_shrink)
{
- pgoff_t first_index = 0;
- void *batch[PAGEVEC_SIZE];
+ struct erofs_workgroup *grp;
unsigned int freed = 0;
+ unsigned long index;
- int i, found;
-repeat:
- xa_lock(&sbi->workstn_tree);
-
- found = radix_tree_gang_lookup(&sbi->workstn_tree,
- batch, first_index, PAGEVEC_SIZE);
-
- for (i = 0; i < found; ++i) {
- struct erofs_workgroup *grp = batch[i];
-
- first_index = grp->index + 1;
-
+ xa_for_each(&sbi->managed_pslots, index, grp) {
/* try to shrink each valid workgroup */
if (!erofs_try_to_release_workgroup(sbi, grp))
continue;
@@ -206,10 +186,6 @@ repeat:
if (!--nr_shrink)
break;
}
- xa_unlock(&sbi->workstn_tree);
-
- if (i && nr_shrink)
- goto repeat;
return freed;
}
@@ -286,7 +262,7 @@ static unsigned long erofs_shrink_scan(struct shrinker *shrink,
spin_unlock(&erofs_sb_list_lock);
sbi->shrinker_run_no = run_no;
- freed += erofs_shrink_workstation(sbi, nr);
+ freed += erofs_shrink_workstation(sbi, nr - freed);
spin_lock(&erofs_sb_list_lock);
/* Get the next list element before we move this one */
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 80e47f07d946..c4b6c9aa87ec 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -67,16 +67,6 @@ static void z_erofs_pcluster_init_once(void *ptr)
pcl->compressed_pages[i] = NULL;
}
-static void z_erofs_pcluster_init_always(struct z_erofs_pcluster *pcl)
-{
- struct z_erofs_collection *cl = z_erofs_primarycollection(pcl);
-
- atomic_set(&pcl->obj.refcount, 1);
-
- DBG_BUGON(cl->nr_pages);
- DBG_BUGON(cl->vcnt);
-}
-
int __init z_erofs_init_zip_subsystem(void)
{
pcluster_cachep = kmem_cache_create("erofs_compress",
@@ -341,26 +331,19 @@ static int z_erofs_lookup_collection(struct z_erofs_collector *clt,
struct inode *inode,
struct erofs_map_blocks *map)
{
- struct erofs_workgroup *grp;
- struct z_erofs_pcluster *pcl;
+ struct z_erofs_pcluster *pcl = clt->pcl;
struct z_erofs_collection *cl;
unsigned int length;
- grp = erofs_find_workgroup(inode->i_sb, map->m_pa >> PAGE_SHIFT);
- if (!grp)
- return -ENOENT;
-
- pcl = container_of(grp, struct z_erofs_pcluster, obj);
+ /* to avoid unexpected loop formed by corrupted images */
if (clt->owned_head == &pcl->next || pcl == clt->tailpcl) {
DBG_BUGON(1);
- erofs_workgroup_put(grp);
return -EFSCORRUPTED;
}
cl = z_erofs_primarycollection(pcl);
if (cl->pageofs != (map->m_la & ~PAGE_MASK)) {
DBG_BUGON(1);
- erofs_workgroup_put(grp);
return -EFSCORRUPTED;
}
@@ -368,7 +351,6 @@ static int z_erofs_lookup_collection(struct z_erofs_collector *clt,
if (length & Z_EROFS_PCLUSTER_FULL_LENGTH) {
if ((map->m_llen << Z_EROFS_PCLUSTER_LENGTH_BIT) > length) {
DBG_BUGON(1);
- erofs_workgroup_put(grp);
return -EFSCORRUPTED;
}
} else {
@@ -391,7 +373,6 @@ static int z_erofs_lookup_collection(struct z_erofs_collector *clt,
/* clean tailpcl if the current owned_head is Z_EROFS_PCLUSTER_TAIL */
if (clt->owned_head == Z_EROFS_PCLUSTER_TAIL)
clt->tailpcl = NULL;
- clt->pcl = pcl;
clt->cl = cl;
return 0;
}
@@ -402,6 +383,7 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt,
{
struct z_erofs_pcluster *pcl;
struct z_erofs_collection *cl;
+ struct erofs_workgroup *grp;
int err;
/* no available workgroup, let's allocate one */
@@ -409,7 +391,7 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt,
if (!pcl)
return -ENOMEM;
- z_erofs_pcluster_init_always(pcl);
+ atomic_set(&pcl->obj.refcount, 1);
pcl->obj.index = map->m_pa >> PAGE_SHIFT;
pcl->length = (map->m_llen << Z_EROFS_PCLUSTER_LENGTH_BIT) |
@@ -429,19 +411,29 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt,
clt->mode = COLLECT_PRIMARY_FOLLOWED;
cl = z_erofs_primarycollection(pcl);
+
+ /* must be cleaned before freeing to slab */
+ DBG_BUGON(cl->nr_pages);
+ DBG_BUGON(cl->vcnt);
+
cl->pageofs = map->m_la & ~PAGE_MASK;
/*
* lock all primary followed works before visible to others
* and mutex_trylock *never* fails for a new pcluster.
*/
- mutex_trylock(&cl->lock);
+ DBG_BUGON(!mutex_trylock(&cl->lock));
- err = erofs_register_workgroup(inode->i_sb, &pcl->obj);
- if (err) {
- mutex_unlock(&cl->lock);
- kmem_cache_free(pcluster_cachep, pcl);
- return -EAGAIN;
+ grp = erofs_insert_workgroup(inode->i_sb, &pcl->obj);
+ if (IS_ERR(grp)) {
+ err = PTR_ERR(grp);
+ goto err_out;
+ }
+
+ if (grp != &pcl->obj) {
+ clt->pcl = container_of(grp, struct z_erofs_pcluster, obj);
+ err = -EEXIST;
+ goto err_out;
}
/* used to check tail merging loop due to corrupted images */
if (clt->owned_head == Z_EROFS_PCLUSTER_TAIL)
@@ -450,12 +442,18 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt,
clt->pcl = pcl;
clt->cl = cl;
return 0;
+
+err_out:
+ mutex_unlock(&cl->lock);
+ kmem_cache_free(pcluster_cachep, pcl);
+ return err;
}
static int z_erofs_collector_begin(struct z_erofs_collector *clt,
struct inode *inode,
struct erofs_map_blocks *map)
{
+ struct erofs_workgroup *grp;
int ret;
DBG_BUGON(clt->cl);
@@ -469,21 +467,25 @@ static int z_erofs_collector_begin(struct z_erofs_collector *clt,
return -EINVAL;
}
-repeat:
- ret = z_erofs_lookup_collection(clt, inode, map);
- if (ret == -ENOENT) {
+ grp = erofs_find_workgroup(inode->i_sb, map->m_pa >> PAGE_SHIFT);
+ if (grp) {
+ clt->pcl = container_of(grp, struct z_erofs_pcluster, obj);
+ } else {
ret = z_erofs_register_collection(clt, inode, map);
- /* someone registered at the same time, give another try */
- if (ret == -EAGAIN) {
- cond_resched();
- goto repeat;
- }
+ if (!ret)
+ goto out;
+ if (ret != -EEXIST)
+ return ret;
}
- if (ret)
+ ret = z_erofs_lookup_collection(clt, inode, map);
+ if (ret) {
+ erofs_workgroup_put(&clt->pcl->obj);
return ret;
+ }
+out:
z_erofs_pagevec_ctor_init(&clt->vector, Z_EROFS_NR_INLINE_PAGEVECS,
clt->cl->pagevec, clt->cl->vcnt);
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index b041b66002db..eee3c92a9ebf 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1854,9 +1854,9 @@ fetch_events:
waiter = true;
init_waitqueue_entry(&wait, current);
- spin_lock_irq(&ep->wq.lock);
+ write_lock_irq(&ep->lock);
__add_wait_queue_exclusive(&ep->wq, &wait);
- spin_unlock_irq(&ep->wq.lock);
+ write_unlock_irq(&ep->lock);
}
for (;;) {
@@ -1904,9 +1904,9 @@ send_events:
goto fetch_events;
if (waiter) {
- spin_lock_irq(&ep->wq.lock);
+ write_lock_irq(&ep->lock);
__remove_wait_queue(&ep->wq, &wait);
- spin_unlock_irq(&ep->wq.lock);
+ write_unlock_irq(&ep->lock);
}
return res;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 0c7c4adb664e..c8dff4c68141 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -43,7 +43,7 @@
#include <linux/uaccess.h>
#include <linux/iversion.h>
#include <linux/unicode.h>
-
+#include <linux/part_stat.h>
#include <linux/kthread.h>
#include <linux/freezer.h>
@@ -927,7 +927,6 @@ void ext4_update_dynamic_rev(struct super_block *sb)
static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb)
{
struct block_device *bdev;
- char b[BDEVNAME_SIZE];
bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb);
if (IS_ERR(bdev))
@@ -935,8 +934,9 @@ static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb)
return bdev;
fail:
- ext4_msg(sb, KERN_ERR, "failed to open journal device %s: %ld",
- __bdevname(dev, b), PTR_ERR(bdev));
+ ext4_msg(sb, KERN_ERR,
+ "failed to open journal device unknown-block(%u,%u) %ld",
+ MAJOR(dev), MINOR(dev), PTR_ERR(bdev));
return NULL;
}
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index d218ebdafa4a..04bfaf63752c 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -13,6 +13,7 @@
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/proc_fs.h>
+#include <linux/part_stat.h>
#include "ext4.h"
#include "ext4_jbd2.h"
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 5355be6b6755..088c3e7a1080 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -22,6 +22,7 @@
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/quotaops.h>
+#include <linux/part_stat.h>
#include <crypto/hash.h>
#include <linux/fscrypt.h>
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 65a7a432dfee..d398b2d90c6c 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -24,6 +24,7 @@
#include <linux/sysfs.h>
#include <linux/quota.h>
#include <linux/unicode.h>
+#include <linux/part_stat.h>
#include "f2fs.h"
#include "node.h"
diff --git a/fs/file.c b/fs/file.c
index a364e1a9b7e8..c8a4e4c86e55 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -540,9 +540,14 @@ static int alloc_fd(unsigned start, unsigned flags)
return __alloc_fd(current->files, start, rlimit(RLIMIT_NOFILE), flags);
}
+int __get_unused_fd_flags(unsigned flags, unsigned long nofile)
+{
+ return __alloc_fd(current->files, 0, nofile, flags);
+}
+
int get_unused_fd_flags(unsigned flags)
{
- return __alloc_fd(current->files, 0, rlimit(RLIMIT_NOFILE), flags);
+ return __get_unused_fd_flags(flags, rlimit(RLIMIT_NOFILE));
}
EXPORT_SYMBOL(get_unused_fd_flags);
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 8e02d76fe104..97eec7522bf2 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -276,12 +276,10 @@ static void flush_bg_queue(struct fuse_conn *fc)
void fuse_request_end(struct fuse_conn *fc, struct fuse_req *req)
{
struct fuse_iqueue *fiq = &fc->iq;
- bool async;
if (test_and_set_bit(FR_FINISHED, &req->flags))
goto put_request;
- async = req->args->end;
/*
* test_and_set_bit() implies smp_mb() between bit
* changing and below intr_entry check. Pairs with
@@ -324,7 +322,7 @@ void fuse_request_end(struct fuse_conn *fc, struct fuse_req *req)
wake_up(&req->waitq);
}
- if (async)
+ if (test_bit(FR_ASYNC, &req->flags))
req->args->end(fc, req->args, req->out.h.error);
put_request:
fuse_put_request(fc, req);
@@ -471,6 +469,8 @@ static void fuse_args_to_req(struct fuse_req *req, struct fuse_args *args)
req->in.h.opcode = args->opcode;
req->in.h.nodeid = args->nodeid;
req->args = args;
+ if (args->end)
+ __set_bit(FR_ASYNC, &req->flags);
}
ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args)
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index aa75e2305b75..ca344bf71404 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -301,6 +301,7 @@ struct fuse_io_priv {
* FR_SENT: request is in userspace, waiting for an answer
* FR_FINISHED: request is finished
* FR_PRIVATE: request is on private list
+ * FR_ASYNC: request is asynchronous
*/
enum fuse_req_flag {
FR_ISREPLY,
@@ -314,6 +315,7 @@ enum fuse_req_flag {
FR_SENT,
FR_FINISHED,
FR_PRIVATE,
+ FR_ASYNC,
};
/**
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 2716d56ed0a0..8294851a9dd9 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1248,7 +1248,7 @@ static int gfs2_atomic_open(struct inode *dir, struct dentry *dentry,
if (!(file->f_mode & FMODE_OPENED))
return finish_no_open(file, d);
dput(d);
- return 0;
+ return excl && (flags & O_CREAT) ? -EEXIST : 0;
}
BUG_ON(d != NULL);
diff --git a/fs/inode.c b/fs/inode.c
index 7d57068b6b7a..93d9252a00ab 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -138,6 +138,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
inode->i_sb = sb;
inode->i_blkbits = sb->s_blocksize_bits;
inode->i_flags = 0;
+ atomic64_set(&inode->i_sequence, 0);
atomic_set(&inode->i_count, 1);
inode->i_op = &empty_iops;
inode->i_fop = &no_open_fops;
diff --git a/fs/internal.h b/fs/internal.h
index f3f280b952a3..4d37912a5587 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -38,7 +38,6 @@ static inline int __sync_blockdev(struct block_device *bdev, int wait)
/*
* buffer.c
*/
-extern void guard_bio_eod(struct bio *bio);
extern int __block_write_begin_int(struct page *page, loff_t pos, unsigned len,
get_block_t *get_block, struct iomap *iomap);
diff --git a/fs/io-wq.c b/fs/io-wq.c
index 5cef075c0b37..cc5cf2209fb0 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -69,6 +69,8 @@ struct io_worker {
#define IO_WQ_HASH_ORDER 5
#endif
+#define IO_WQ_NR_HASH_BUCKETS (1u << IO_WQ_HASH_ORDER)
+
struct io_wqe_acct {
unsigned nr_workers;
unsigned max_workers;
@@ -98,6 +100,7 @@ struct io_wqe {
struct list_head all_list;
struct io_wq *wq;
+ struct io_wq_work *hash_tail[IO_WQ_NR_HASH_BUCKETS];
};
/*
@@ -107,8 +110,7 @@ struct io_wq {
struct io_wqe **wqes;
unsigned long state;
- get_work_fn *get_work;
- put_work_fn *put_work;
+ free_work_fn *free_work;
struct task_struct *manager;
struct user_struct *user;
@@ -376,26 +378,35 @@ static bool __io_worker_idle(struct io_wqe *wqe, struct io_worker *worker)
return __io_worker_unuse(wqe, worker);
}
-static struct io_wq_work *io_get_next_work(struct io_wqe *wqe, unsigned *hash)
+static inline unsigned int io_get_work_hash(struct io_wq_work *work)
+{
+ return work->flags >> IO_WQ_HASH_SHIFT;
+}
+
+static struct io_wq_work *io_get_next_work(struct io_wqe *wqe)
__must_hold(wqe->lock)
{
struct io_wq_work_node *node, *prev;
- struct io_wq_work *work;
+ struct io_wq_work *work, *tail;
+ unsigned int hash;
wq_list_for_each(node, prev, &wqe->work_list) {
work = container_of(node, struct io_wq_work, list);
/* not hashed, can run anytime */
- if (!(work->flags & IO_WQ_WORK_HASHED)) {
- wq_node_del(&wqe->work_list, node, prev);
+ if (!io_wq_is_hashed(work)) {
+ wq_list_del(&wqe->work_list, node, prev);
return work;
}
/* hashed, can run if not already running */
- *hash = work->flags >> IO_WQ_HASH_SHIFT;
- if (!(wqe->hash_map & BIT_ULL(*hash))) {
- wqe->hash_map |= BIT_ULL(*hash);
- wq_node_del(&wqe->work_list, node, prev);
+ hash = io_get_work_hash(work);
+ if (!(wqe->hash_map & BIT(hash))) {
+ wqe->hash_map |= BIT(hash);
+ /* all items with this hash lie in [work, tail] */
+ tail = wqe->hash_tail[hash];
+ wqe->hash_tail[hash] = NULL;
+ wq_list_cut(&wqe->work_list, &tail->list, prev);
return work;
}
}
@@ -440,16 +451,49 @@ static void io_wq_switch_creds(struct io_worker *worker,
worker->saved_creds = old_creds;
}
+static void io_impersonate_work(struct io_worker *worker,
+ struct io_wq_work *work)
+{
+ if (work->files && current->files != work->files) {
+ task_lock(current);
+ current->files = work->files;
+ task_unlock(current);
+ }
+ if (work->fs && current->fs != work->fs)
+ current->fs = work->fs;
+ if (work->mm != worker->mm)
+ io_wq_switch_mm(worker, work);
+ if (worker->cur_creds != work->creds)
+ io_wq_switch_creds(worker, work);
+}
+
+static void io_assign_current_work(struct io_worker *worker,
+ struct io_wq_work *work)
+{
+ if (work) {
+ /* flush pending signals before assigning new work */
+ if (signal_pending(current))
+ flush_signals(current);
+ cond_resched();
+ }
+
+ spin_lock_irq(&worker->lock);
+ worker->cur_work = work;
+ spin_unlock_irq(&worker->lock);
+}
+
+static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work);
+
static void io_worker_handle_work(struct io_worker *worker)
__releases(wqe->lock)
{
- struct io_wq_work *work, *old_work = NULL, *put_work = NULL;
struct io_wqe *wqe = worker->wqe;
struct io_wq *wq = wqe->wq;
do {
- unsigned hash = -1U;
-
+ struct io_wq_work *work;
+ unsigned int hash;
+get_next:
/*
* If we got some work, mark us as busy. If we didn't, but
* the list isn't empty, it means we stalled on hashed work.
@@ -457,81 +501,60 @@ static void io_worker_handle_work(struct io_worker *worker)
* can't make progress, any work completion or insertion will
* clear the stalled flag.
*/
- work = io_get_next_work(wqe, &hash);
+ work = io_get_next_work(wqe);
if (work)
__io_worker_busy(wqe, worker, work);
else if (!wq_list_empty(&wqe->work_list))
wqe->flags |= IO_WQE_FLAG_STALLED;
spin_unlock_irq(&wqe->lock);
- if (put_work && wq->put_work)
- wq->put_work(old_work);
if (!work)
break;
-next:
- /* flush any pending signals before assigning new work */
- if (signal_pending(current))
- flush_signals(current);
-
- cond_resched();
-
- spin_lock_irq(&worker->lock);
- worker->cur_work = work;
- spin_unlock_irq(&worker->lock);
-
- if (work->flags & IO_WQ_WORK_CB)
- work->func(&work);
-
- if (work->files && current->files != work->files) {
- task_lock(current);
- current->files = work->files;
- task_unlock(current);
- }
- if (work->fs && current->fs != work->fs)
- current->fs = work->fs;
- if (work->mm != worker->mm)
- io_wq_switch_mm(worker, work);
- if (worker->cur_creds != work->creds)
- io_wq_switch_creds(worker, work);
- /*
- * OK to set IO_WQ_WORK_CANCEL even for uncancellable work,
- * the worker function will do the right thing.
- */
- if (test_bit(IO_WQ_BIT_CANCEL, &wq->state))
- work->flags |= IO_WQ_WORK_CANCEL;
- if (worker->mm)
- work->flags |= IO_WQ_WORK_HAS_MM;
-
- if (wq->get_work) {
- put_work = work;
- wq->get_work(work);
- }
-
- old_work = work;
- work->func(&work);
-
- spin_lock_irq(&worker->lock);
- worker->cur_work = NULL;
- spin_unlock_irq(&worker->lock);
-
- spin_lock_irq(&wqe->lock);
-
- if (hash != -1U) {
- wqe->hash_map &= ~BIT_ULL(hash);
- wqe->flags &= ~IO_WQE_FLAG_STALLED;
- }
- if (work && work != old_work) {
- spin_unlock_irq(&wqe->lock);
-
- if (put_work && wq->put_work) {
- wq->put_work(put_work);
- put_work = NULL;
+ io_assign_current_work(worker, work);
+
+ /* handle a whole dependent link */
+ do {
+ struct io_wq_work *old_work, *next_hashed, *linked;
+
+ next_hashed = wq_next_work(work);
+ io_impersonate_work(worker, work);
+ /*
+ * OK to set IO_WQ_WORK_CANCEL even for uncancellable
+ * work, the worker function will do the right thing.
+ */
+ if (test_bit(IO_WQ_BIT_CANCEL, &wq->state))
+ work->flags |= IO_WQ_WORK_CANCEL;
+
+ hash = io_get_work_hash(work);
+ linked = old_work = work;
+ linked->func(&linked);
+ linked = (old_work == linked) ? NULL : linked;
+
+ work = next_hashed;
+ if (!work && linked && !io_wq_is_hashed(linked)) {
+ work = linked;
+ linked = NULL;
}
+ io_assign_current_work(worker, work);
+ wq->free_work(old_work);
+
+ if (linked)
+ io_wqe_enqueue(wqe, linked);
+
+ if (hash != -1U && !next_hashed) {
+ spin_lock_irq(&wqe->lock);
+ wqe->hash_map &= ~BIT_ULL(hash);
+ wqe->flags &= ~IO_WQE_FLAG_STALLED;
+ /* dependent work is not hashed */
+ hash = -1U;
+ /* skip unnecessary unlock-lock wqe->lock */
+ if (!work)
+ goto get_next;
+ spin_unlock_irq(&wqe->lock);
+ }
+ } while (work);
- /* dependent work not hashed */
- hash = -1U;
- goto next;
- }
+ spin_lock_irq(&wqe->lock);
} while (1);
}
@@ -747,17 +770,40 @@ static bool io_wq_can_queue(struct io_wqe *wqe, struct io_wqe_acct *acct,
return true;
}
-static void io_run_cancel(struct io_wq_work *work)
+static void io_run_cancel(struct io_wq_work *work, struct io_wqe *wqe)
{
+ struct io_wq *wq = wqe->wq;
+
do {
struct io_wq_work *old_work = work;
work->flags |= IO_WQ_WORK_CANCEL;
work->func(&work);
work = (work == old_work) ? NULL : work;
+ wq->free_work(old_work);
} while (work);
}
+static void io_wqe_insert_work(struct io_wqe *wqe, struct io_wq_work *work)
+{
+ unsigned int hash;
+ struct io_wq_work *tail;
+
+ if (!io_wq_is_hashed(work)) {
+append:
+ wq_list_add_tail(&work->list, &wqe->work_list);
+ return;
+ }
+
+ hash = io_get_work_hash(work);
+ tail = wqe->hash_tail[hash];
+ wqe->hash_tail[hash] = work;
+ if (!tail)
+ goto append;
+
+ wq_list_add_after(&work->list, &tail->list, &wqe->work_list);
+}
+
static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
{
struct io_wqe_acct *acct = io_work_get_acct(wqe, work);
@@ -771,13 +817,13 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
* It's close enough to not be an issue, fork() has the same delay.
*/
if (unlikely(!io_wq_can_queue(wqe, acct, work))) {
- io_run_cancel(work);
+ io_run_cancel(work, wqe);
return;
}
work_flags = work->flags;
spin_lock_irqsave(&wqe->lock, flags);
- wq_list_add_tail(&work->list, &wqe->work_list);
+ io_wqe_insert_work(wqe, work);
wqe->flags &= ~IO_WQE_FLAG_STALLED;
spin_unlock_irqrestore(&wqe->lock, flags);
@@ -794,19 +840,15 @@ void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work)
}
/*
- * Enqueue work, hashed by some key. Work items that hash to the same value
- * will not be done in parallel. Used to limit concurrent writes, generally
- * hashed by inode.
+ * Work items that hash to the same value will not be done in parallel.
+ * Used to limit concurrent writes, generally hashed by inode.
*/
-void io_wq_enqueue_hashed(struct io_wq *wq, struct io_wq_work *work, void *val)
+void io_wq_hash_work(struct io_wq_work *work, void *val)
{
- struct io_wqe *wqe = wq->wqes[numa_node_id()];
- unsigned bit;
-
+ unsigned int bit;
bit = hash_ptr(val, IO_WQ_HASH_ORDER);
work->flags |= (IO_WQ_WORK_HASHED | (bit << IO_WQ_HASH_SHIFT));
- io_wqe_enqueue(wqe, work);
}
static bool io_wqe_worker_send_sig(struct io_worker *worker, void *data)
@@ -856,14 +898,13 @@ void io_wq_cancel_all(struct io_wq *wq)
}
struct io_cb_cancel_data {
- struct io_wqe *wqe;
- work_cancel_fn *cancel;
- void *caller_data;
+ work_cancel_fn *fn;
+ void *data;
};
-static bool io_work_cancel(struct io_worker *worker, void *cancel_data)
+static bool io_wq_worker_cancel(struct io_worker *worker, void *data)
{
- struct io_cb_cancel_data *data = cancel_data;
+ struct io_cb_cancel_data *match = data;
unsigned long flags;
bool ret = false;
@@ -874,83 +915,7 @@ static bool io_work_cancel(struct io_worker *worker, void *cancel_data)
spin_lock_irqsave(&worker->lock, flags);
if (worker->cur_work &&
!(worker->cur_work->flags & IO_WQ_WORK_NO_CANCEL) &&
- data->cancel(worker->cur_work, data->caller_data)) {
- send_sig(SIGINT, worker->task, 1);
- ret = true;
- }
- spin_unlock_irqrestore(&worker->lock, flags);
-
- return ret;
-}
-
-static enum io_wq_cancel io_wqe_cancel_cb_work(struct io_wqe *wqe,
- work_cancel_fn *cancel,
- void *cancel_data)
-{
- struct io_cb_cancel_data data = {
- .wqe = wqe,
- .cancel = cancel,
- .caller_data = cancel_data,
- };
- struct io_wq_work_node *node, *prev;
- struct io_wq_work *work;
- unsigned long flags;
- bool found = false;
-
- spin_lock_irqsave(&wqe->lock, flags);
- wq_list_for_each(node, prev, &wqe->work_list) {
- work = container_of(node, struct io_wq_work, list);
-
- if (cancel(work, cancel_data)) {
- wq_node_del(&wqe->work_list, node, prev);
- found = true;
- break;
- }
- }
- spin_unlock_irqrestore(&wqe->lock, flags);
-
- if (found) {
- io_run_cancel(work);
- return IO_WQ_CANCEL_OK;
- }
-
- rcu_read_lock();
- found = io_wq_for_each_worker(wqe, io_work_cancel, &data);
- rcu_read_unlock();
- return found ? IO_WQ_CANCEL_RUNNING : IO_WQ_CANCEL_NOTFOUND;
-}
-
-enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel,
- void *data)
-{
- enum io_wq_cancel ret = IO_WQ_CANCEL_NOTFOUND;
- int node;
-
- for_each_node(node) {
- struct io_wqe *wqe = wq->wqes[node];
-
- ret = io_wqe_cancel_cb_work(wqe, cancel, data);
- if (ret != IO_WQ_CANCEL_NOTFOUND)
- break;
- }
-
- return ret;
-}
-
-struct work_match {
- bool (*fn)(struct io_wq_work *, void *data);
- void *data;
-};
-
-static bool io_wq_worker_cancel(struct io_worker *worker, void *data)
-{
- struct work_match *match = data;
- unsigned long flags;
- bool ret = false;
-
- spin_lock_irqsave(&worker->lock, flags);
- if (match->fn(worker->cur_work, match->data) &&
- !(worker->cur_work->flags & IO_WQ_WORK_NO_CANCEL)) {
+ match->fn(worker->cur_work, match->data)) {
send_sig(SIGINT, worker->task, 1);
ret = true;
}
@@ -960,7 +925,7 @@ static bool io_wq_worker_cancel(struct io_worker *worker, void *data)
}
static enum io_wq_cancel io_wqe_cancel_work(struct io_wqe *wqe,
- struct work_match *match)
+ struct io_cb_cancel_data *match)
{
struct io_wq_work_node *node, *prev;
struct io_wq_work *work;
@@ -977,7 +942,7 @@ static enum io_wq_cancel io_wqe_cancel_work(struct io_wqe *wqe,
work = container_of(node, struct io_wq_work, list);
if (match->fn(work, match->data)) {
- wq_node_del(&wqe->work_list, node, prev);
+ wq_list_del(&wqe->work_list, node, prev);
found = true;
break;
}
@@ -985,7 +950,7 @@ static enum io_wq_cancel io_wqe_cancel_work(struct io_wqe *wqe,
spin_unlock_irqrestore(&wqe->lock, flags);
if (found) {
- io_run_cancel(work);
+ io_run_cancel(work, wqe);
return IO_WQ_CANCEL_OK;
}
@@ -1001,22 +966,16 @@ static enum io_wq_cancel io_wqe_cancel_work(struct io_wqe *wqe,
return found ? IO_WQ_CANCEL_RUNNING : IO_WQ_CANCEL_NOTFOUND;
}
-static bool io_wq_work_match(struct io_wq_work *work, void *data)
-{
- return work == data;
-}
-
-enum io_wq_cancel io_wq_cancel_work(struct io_wq *wq, struct io_wq_work *cwork)
+enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel,
+ void *data)
{
- struct work_match match = {
- .fn = io_wq_work_match,
- .data = cwork
+ struct io_cb_cancel_data match = {
+ .fn = cancel,
+ .data = data,
};
enum io_wq_cancel ret = IO_WQ_CANCEL_NOTFOUND;
int node;
- cwork->flags |= IO_WQ_WORK_CANCEL;
-
for_each_node(node) {
struct io_wqe *wqe = wq->wqes[node];
@@ -1028,33 +987,28 @@ enum io_wq_cancel io_wq_cancel_work(struct io_wq *wq, struct io_wq_work *cwork)
return ret;
}
+static bool io_wq_io_cb_cancel_data(struct io_wq_work *work, void *data)
+{
+ return work == data;
+}
+
+enum io_wq_cancel io_wq_cancel_work(struct io_wq *wq, struct io_wq_work *cwork)
+{
+ return io_wq_cancel_cb(wq, io_wq_io_cb_cancel_data, (void *)cwork);
+}
+
static bool io_wq_pid_match(struct io_wq_work *work, void *data)
{
pid_t pid = (pid_t) (unsigned long) data;
- if (work)
- return work->task_pid == pid;
- return false;
+ return work->task_pid == pid;
}
enum io_wq_cancel io_wq_cancel_pid(struct io_wq *wq, pid_t pid)
{
- struct work_match match = {
- .fn = io_wq_pid_match,
- .data = (void *) (unsigned long) pid
- };
- enum io_wq_cancel ret = IO_WQ_CANCEL_NOTFOUND;
- int node;
-
- for_each_node(node) {
- struct io_wqe *wqe = wq->wqes[node];
+ void *data = (void *) (unsigned long) pid;
- ret = io_wqe_cancel_work(wqe, &match);
- if (ret != IO_WQ_CANCEL_NOTFOUND)
- break;
- }
-
- return ret;
+ return io_wq_cancel_cb(wq, io_wq_pid_match, data);
}
struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
@@ -1062,6 +1016,9 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
int ret = -ENOMEM, node;
struct io_wq *wq;
+ if (WARN_ON_ONCE(!data->free_work))
+ return ERR_PTR(-EINVAL);
+
wq = kzalloc(sizeof(*wq), GFP_KERNEL);
if (!wq)
return ERR_PTR(-ENOMEM);
@@ -1072,8 +1029,7 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
return ERR_PTR(-ENOMEM);
}
- wq->get_work = data->get_work;
- wq->put_work = data->put_work;
+ wq->free_work = data->free_work;
/* caller must already hold a reference to this */
wq->user = data->user;
@@ -1130,7 +1086,7 @@ err:
bool io_wq_get(struct io_wq *wq, struct io_wq_data *data)
{
- if (data->get_work != wq->get_work || data->put_work != wq->put_work)
+ if (data->free_work != wq->free_work)
return false;
return refcount_inc_not_zero(&wq->use_refs);
diff --git a/fs/io-wq.h b/fs/io-wq.h
index e5e15f2c93ec..3ee7356d6be5 100644
--- a/fs/io-wq.h
+++ b/fs/io-wq.h
@@ -5,10 +5,8 @@ struct io_wq;
enum {
IO_WQ_WORK_CANCEL = 1,
- IO_WQ_WORK_HAS_MM = 2,
IO_WQ_WORK_HASHED = 4,
IO_WQ_WORK_UNBOUND = 32,
- IO_WQ_WORK_CB = 128,
IO_WQ_WORK_NO_CANCEL = 256,
IO_WQ_WORK_CONCURRENT = 512,
@@ -30,6 +28,18 @@ struct io_wq_work_list {
struct io_wq_work_node *last;
};
+static inline void wq_list_add_after(struct io_wq_work_node *node,
+ struct io_wq_work_node *pos,
+ struct io_wq_work_list *list)
+{
+ struct io_wq_work_node *next = pos->next;
+
+ pos->next = node;
+ node->next = next;
+ if (!next)
+ list->last = node;
+}
+
static inline void wq_list_add_tail(struct io_wq_work_node *node,
struct io_wq_work_list *list)
{
@@ -42,17 +52,26 @@ static inline void wq_list_add_tail(struct io_wq_work_node *node,
}
}
-static inline void wq_node_del(struct io_wq_work_list *list,
- struct io_wq_work_node *node,
+static inline void wq_list_cut(struct io_wq_work_list *list,
+ struct io_wq_work_node *last,
struct io_wq_work_node *prev)
{
- if (node == list->first)
- WRITE_ONCE(list->first, node->next);
- if (node == list->last)
+ /* first in the list, if prev==NULL */
+ if (!prev)
+ WRITE_ONCE(list->first, last->next);
+ else
+ prev->next = last->next;
+
+ if (last == list->last)
list->last = prev;
- if (prev)
- prev->next = node->next;
- node->next = NULL;
+ last->next = NULL;
+}
+
+static inline void wq_list_del(struct io_wq_work_list *list,
+ struct io_wq_work_node *node,
+ struct io_wq_work_node *prev)
+{
+ wq_list_cut(list, node, prev);
}
#define wq_list_for_each(pos, prv, head) \
@@ -65,10 +84,7 @@ static inline void wq_node_del(struct io_wq_work_list *list,
} while (0)
struct io_wq_work {
- union {
- struct io_wq_work_node list;
- void *data;
- };
+ struct io_wq_work_node list;
void (*func)(struct io_wq_work **);
struct files_struct *files;
struct mm_struct *mm;
@@ -83,14 +99,20 @@ struct io_wq_work {
*(work) = (struct io_wq_work){ .func = _func }; \
} while (0) \
-typedef void (get_work_fn)(struct io_wq_work *);
-typedef void (put_work_fn)(struct io_wq_work *);
+static inline struct io_wq_work *wq_next_work(struct io_wq_work *work)
+{
+ if (!work->list.next)
+ return NULL;
+
+ return container_of(work->list.next, struct io_wq_work, list);
+}
+
+typedef void (free_work_fn)(struct io_wq_work *);
struct io_wq_data {
struct user_struct *user;
- get_work_fn *get_work;
- put_work_fn *put_work;
+ free_work_fn *free_work;
};
struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data);
@@ -98,7 +120,12 @@ bool io_wq_get(struct io_wq *wq, struct io_wq_data *data);
void io_wq_destroy(struct io_wq *wq);
void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work);
-void io_wq_enqueue_hashed(struct io_wq *wq, struct io_wq_work *work, void *val);
+void io_wq_hash_work(struct io_wq_work *work, void *val);
+
+static inline bool io_wq_is_hashed(struct io_wq_work *work)
+{
+ return work->flags & IO_WQ_WORK_HASHED;
+}
void io_wq_cancel_all(struct io_wq *wq);
enum io_wq_cancel io_wq_cancel_work(struct io_wq *wq, struct io_wq_work *cwork);
diff --git a/fs/io_uring.c b/fs/io_uring.c
index c06082bb039a..358f97be9c7b 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -44,6 +44,7 @@
#include <linux/errno.h>
#include <linux/syscalls.h>
#include <linux/compat.h>
+#include <net/compat.h>
#include <linux/refcount.h>
#include <linux/uio.h>
#include <linux/bits.h>
@@ -76,6 +77,8 @@
#include <linux/fadvise.h>
#include <linux/eventpoll.h>
#include <linux/fs_struct.h>
+#include <linux/splice.h>
+#include <linux/task_work.h>
#define CREATE_TRACE_POINTS
#include <trace/events/io_uring.h>
@@ -191,7 +194,13 @@ struct fixed_file_data {
struct llist_head put_llist;
struct work_struct ref_work;
struct completion done;
- struct rcu_head rcu;
+};
+
+struct io_buffer {
+ struct list_head list;
+ __u64 addr;
+ __s32 len;
+ __u16 bid;
};
struct io_ring_ctx {
@@ -271,6 +280,8 @@ struct io_ring_ctx {
struct socket *ring_sock;
#endif
+ struct idr io_buffer_idr;
+
struct idr personality_idr;
struct {
@@ -291,7 +302,6 @@ struct io_ring_ctx {
struct {
spinlock_t completion_lock;
- struct llist_head poll_llist;
/*
* ->poll_list is protected by the ctx->uring_lock for
@@ -344,6 +354,7 @@ struct io_accept {
struct sockaddr __user *addr;
int __user *addr_len;
int flags;
+ unsigned long nofile;
};
struct io_sync {
@@ -386,7 +397,9 @@ struct io_sr_msg {
void __user *buf;
};
int msg_flags;
+ int bgid;
size_t len;
+ struct io_buffer *kbuf;
};
struct io_open {
@@ -398,6 +411,7 @@ struct io_open {
struct filename *filename;
struct statx __user *buffer;
struct open_how how;
+ unsigned long nofile;
};
struct io_files_update {
@@ -429,6 +443,24 @@ struct io_epoll {
struct epoll_event event;
};
+struct io_splice {
+ struct file *file_out;
+ struct file *file_in;
+ loff_t off_out;
+ loff_t off_in;
+ u64 len;
+ unsigned int flags;
+};
+
+struct io_provide_buf {
+ struct file *file;
+ __u64 addr;
+ __s32 len;
+ __u32 bgid;
+ __u16 nbufs;
+ __u16 bid;
+};
+
struct io_async_connect {
struct sockaddr_storage address;
};
@@ -463,6 +495,7 @@ enum {
REQ_F_LINK_BIT = IOSQE_IO_LINK_BIT,
REQ_F_HARDLINK_BIT = IOSQE_IO_HARDLINK_BIT,
REQ_F_FORCE_ASYNC_BIT = IOSQE_ASYNC_BIT,
+ REQ_F_BUFFER_SELECT_BIT = IOSQE_BUFFER_SELECT_BIT,
REQ_F_LINK_NEXT_BIT,
REQ_F_FAIL_LINK_BIT,
@@ -478,6 +511,11 @@ enum {
REQ_F_COMP_LOCKED_BIT,
REQ_F_NEED_CLEANUP_BIT,
REQ_F_OVERFLOW_BIT,
+ REQ_F_POLLED_BIT,
+ REQ_F_BUFFER_SELECTED_BIT,
+
+ /* not a real bit, just to check we're not overflowing the space */
+ __REQ_F_LAST_BIT,
};
enum {
@@ -491,6 +529,8 @@ enum {
REQ_F_HARDLINK = BIT(REQ_F_HARDLINK_BIT),
/* IOSQE_ASYNC */
REQ_F_FORCE_ASYNC = BIT(REQ_F_FORCE_ASYNC_BIT),
+ /* IOSQE_BUFFER_SELECT */
+ REQ_F_BUFFER_SELECT = BIT(REQ_F_BUFFER_SELECT_BIT),
/* already grabbed next link */
REQ_F_LINK_NEXT = BIT(REQ_F_LINK_NEXT_BIT),
@@ -520,6 +560,15 @@ enum {
REQ_F_NEED_CLEANUP = BIT(REQ_F_NEED_CLEANUP_BIT),
/* in overflow list */
REQ_F_OVERFLOW = BIT(REQ_F_OVERFLOW_BIT),
+ /* already went through poll handler */
+ REQ_F_POLLED = BIT(REQ_F_POLLED_BIT),
+ /* buffer already selected */
+ REQ_F_BUFFER_SELECTED = BIT(REQ_F_BUFFER_SELECTED_BIT),
+};
+
+struct async_poll {
+ struct io_poll_iocb poll;
+ struct io_wq_work work;
};
/*
@@ -545,32 +594,45 @@ struct io_kiocb {
struct io_fadvise fadvise;
struct io_madvise madvise;
struct io_epoll epoll;
+ struct io_splice splice;
+ struct io_provide_buf pbuf;
};
struct io_async_ctx *io;
- /*
- * llist_node is only used for poll deferred completions
- */
- struct llist_node llist_node;
- bool in_async;
bool needs_fixed_file;
u8 opcode;
struct io_ring_ctx *ctx;
- union {
- struct list_head list;
- struct hlist_node hash_node;
- };
- struct list_head link_list;
+ struct list_head list;
unsigned int flags;
refcount_t refs;
+ union {
+ struct task_struct *task;
+ unsigned long fsize;
+ };
u64 user_data;
u32 result;
u32 sequence;
+ struct list_head link_list;
+
struct list_head inflight_entry;
- struct io_wq_work work;
+ union {
+ /*
+ * Only commands that never go async can use the below fields,
+ * obviously. Right now only IORING_OP_POLL_ADD uses them, and
+ * async armed poll handlers for regular commands. The latter
+ * restore the work, if needed.
+ */
+ struct {
+ struct callback_head task_work;
+ struct hlist_node hash_node;
+ struct async_poll *apoll;
+ int cflags;
+ };
+ struct io_wq_work work;
+ };
};
#define IO_PLUG_THRESHOLD 2
@@ -614,6 +676,11 @@ struct io_op_def {
unsigned file_table : 1;
/* needs ->fs */
unsigned needs_fs : 1;
+ /* set if opcode supports polled "wait" */
+ unsigned pollin : 1;
+ unsigned pollout : 1;
+ /* op supports buffer selection */
+ unsigned buffer_select : 1;
};
static const struct io_op_def io_op_defs[] = {
@@ -623,6 +690,8 @@ static const struct io_op_def io_op_defs[] = {
.needs_mm = 1,
.needs_file = 1,
.unbound_nonreg_file = 1,
+ .pollin = 1,
+ .buffer_select = 1,
},
[IORING_OP_WRITEV] = {
.async_ctx = 1,
@@ -630,6 +699,7 @@ static const struct io_op_def io_op_defs[] = {
.needs_file = 1,
.hash_reg_file = 1,
.unbound_nonreg_file = 1,
+ .pollout = 1,
},
[IORING_OP_FSYNC] = {
.needs_file = 1,
@@ -637,11 +707,13 @@ static const struct io_op_def io_op_defs[] = {
[IORING_OP_READ_FIXED] = {
.needs_file = 1,
.unbound_nonreg_file = 1,
+ .pollin = 1,
},
[IORING_OP_WRITE_FIXED] = {
.needs_file = 1,
.hash_reg_file = 1,
.unbound_nonreg_file = 1,
+ .pollout = 1,
},
[IORING_OP_POLL_ADD] = {
.needs_file = 1,
@@ -657,6 +729,7 @@ static const struct io_op_def io_op_defs[] = {
.needs_file = 1,
.unbound_nonreg_file = 1,
.needs_fs = 1,
+ .pollout = 1,
},
[IORING_OP_RECVMSG] = {
.async_ctx = 1,
@@ -664,6 +737,8 @@ static const struct io_op_def io_op_defs[] = {
.needs_file = 1,
.unbound_nonreg_file = 1,
.needs_fs = 1,
+ .pollin = 1,
+ .buffer_select = 1,
},
[IORING_OP_TIMEOUT] = {
.async_ctx = 1,
@@ -675,6 +750,7 @@ static const struct io_op_def io_op_defs[] = {
.needs_file = 1,
.unbound_nonreg_file = 1,
.file_table = 1,
+ .pollin = 1,
},
[IORING_OP_ASYNC_CANCEL] = {},
[IORING_OP_LINK_TIMEOUT] = {
@@ -686,6 +762,7 @@ static const struct io_op_def io_op_defs[] = {
.needs_mm = 1,
.needs_file = 1,
.unbound_nonreg_file = 1,
+ .pollout = 1,
},
[IORING_OP_FALLOCATE] = {
.needs_file = 1,
@@ -714,11 +791,14 @@ static const struct io_op_def io_op_defs[] = {
.needs_mm = 1,
.needs_file = 1,
.unbound_nonreg_file = 1,
+ .pollin = 1,
+ .buffer_select = 1,
},
[IORING_OP_WRITE] = {
.needs_mm = 1,
.needs_file = 1,
.unbound_nonreg_file = 1,
+ .pollout = 1,
},
[IORING_OP_FADVISE] = {
.needs_file = 1,
@@ -730,11 +810,14 @@ static const struct io_op_def io_op_defs[] = {
.needs_mm = 1,
.needs_file = 1,
.unbound_nonreg_file = 1,
+ .pollout = 1,
},
[IORING_OP_RECV] = {
.needs_mm = 1,
.needs_file = 1,
.unbound_nonreg_file = 1,
+ .pollin = 1,
+ .buffer_select = 1,
},
[IORING_OP_OPENAT2] = {
.needs_file = 1,
@@ -746,6 +829,13 @@ static const struct io_op_def io_op_defs[] = {
.unbound_nonreg_file = 1,
.file_table = 1,
},
+ [IORING_OP_SPLICE] = {
+ .needs_file = 1,
+ .hash_reg_file = 1,
+ .unbound_nonreg_file = 1,
+ },
+ [IORING_OP_PROVIDE_BUFFERS] = {},
+ [IORING_OP_REMOVE_BUFFERS] = {},
};
static void io_wq_submit_work(struct io_wq_work **workptr);
@@ -760,6 +850,10 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
static int io_grab_files(struct io_kiocb *req);
static void io_ring_file_ref_flush(struct fixed_file_data *data);
static void io_cleanup_req(struct io_kiocb *req);
+static int io_file_get(struct io_submit_state *state, struct io_kiocb *req,
+ int fd, struct file **out_file, bool fixed);
+static void __io_queue_sqe(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe);
static struct kmem_cache *req_cachep;
@@ -826,11 +920,11 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
INIT_LIST_HEAD(&ctx->cq_overflow_list);
init_completion(&ctx->completions[0]);
init_completion(&ctx->completions[1]);
+ idr_init(&ctx->io_buffer_idr);
idr_init(&ctx->personality_idr);
mutex_init(&ctx->uring_lock);
init_waitqueue_head(&ctx->wait);
spin_lock_init(&ctx->completion_lock);
- init_llist_head(&ctx->poll_llist);
INIT_LIST_HEAD(&ctx->poll_list);
INIT_LIST_HEAD(&ctx->defer_list);
INIT_LIST_HEAD(&ctx->timeout_list);
@@ -951,15 +1045,14 @@ static inline void io_req_work_drop_env(struct io_kiocb *req)
}
}
-static inline bool io_prep_async_work(struct io_kiocb *req,
+static inline void io_prep_async_work(struct io_kiocb *req,
struct io_kiocb **link)
{
const struct io_op_def *def = &io_op_defs[req->opcode];
- bool do_hashed = false;
if (req->flags & REQ_F_ISREG) {
if (def->hash_reg_file)
- do_hashed = true;
+ io_wq_hash_work(&req->work, file_inode(req->file));
} else {
if (def->unbound_nonreg_file)
req->work.flags |= IO_WQ_WORK_UNBOUND;
@@ -968,25 +1061,18 @@ static inline bool io_prep_async_work(struct io_kiocb *req,
io_req_work_grab_env(req, def);
*link = io_prep_linked_timeout(req);
- return do_hashed;
}
static inline void io_queue_async_work(struct io_kiocb *req)
{
struct io_ring_ctx *ctx = req->ctx;
struct io_kiocb *link;
- bool do_hashed;
- do_hashed = io_prep_async_work(req, &link);
+ io_prep_async_work(req, &link);
- trace_io_uring_queue_async_work(ctx, do_hashed, req, &req->work,
- req->flags);
- if (!do_hashed) {
- io_wq_enqueue(ctx->io_wq, &req->work);
- } else {
- io_wq_enqueue_hashed(ctx->io_wq, &req->work,
- file_inode(req->file));
- }
+ trace_io_uring_queue_async_work(ctx, io_wq_is_hashed(&req->work), req,
+ &req->work, req->flags);
+ io_wq_enqueue(ctx->io_wq, &req->work);
if (link)
io_queue_linked_timeout(link);
@@ -1053,24 +1139,19 @@ static inline bool io_should_trigger_evfd(struct io_ring_ctx *ctx)
return false;
if (!ctx->eventfd_async)
return true;
- return io_wq_current_is_worker() || in_interrupt();
+ return io_wq_current_is_worker();
}
-static void __io_cqring_ev_posted(struct io_ring_ctx *ctx, bool trigger_ev)
+static void io_cqring_ev_posted(struct io_ring_ctx *ctx)
{
if (waitqueue_active(&ctx->wait))
wake_up(&ctx->wait);
if (waitqueue_active(&ctx->sqo_wait))
wake_up(&ctx->sqo_wait);
- if (trigger_ev)
+ if (io_should_trigger_evfd(ctx))
eventfd_signal(ctx->cq_ev_fd, 1);
}
-static void io_cqring_ev_posted(struct io_ring_ctx *ctx)
-{
- __io_cqring_ev_posted(ctx, io_should_trigger_evfd(ctx));
-}
-
/* Returns true if there are no backlogged entries after the flush */
static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
{
@@ -1107,7 +1188,7 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
if (cqe) {
WRITE_ONCE(cqe->user_data, req->user_data);
WRITE_ONCE(cqe->res, req->result);
- WRITE_ONCE(cqe->flags, 0);
+ WRITE_ONCE(cqe->flags, req->cflags);
} else {
WRITE_ONCE(ctx->rings->cq_overflow,
atomic_inc_return(&ctx->cached_cq_overflow));
@@ -1131,7 +1212,7 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
return cqe != NULL;
}
-static void io_cqring_fill_event(struct io_kiocb *req, long res)
+static void __io_cqring_fill_event(struct io_kiocb *req, long res, long cflags)
{
struct io_ring_ctx *ctx = req->ctx;
struct io_uring_cqe *cqe;
@@ -1147,7 +1228,7 @@ static void io_cqring_fill_event(struct io_kiocb *req, long res)
if (likely(cqe)) {
WRITE_ONCE(cqe->user_data, req->user_data);
WRITE_ONCE(cqe->res, res);
- WRITE_ONCE(cqe->flags, 0);
+ WRITE_ONCE(cqe->flags, cflags);
} else if (ctx->cq_overflow_flushed) {
WRITE_ONCE(ctx->rings->cq_overflow,
atomic_inc_return(&ctx->cached_cq_overflow));
@@ -1159,23 +1240,34 @@ static void io_cqring_fill_event(struct io_kiocb *req, long res)
req->flags |= REQ_F_OVERFLOW;
refcount_inc(&req->refs);
req->result = res;
+ req->cflags = cflags;
list_add_tail(&req->list, &ctx->cq_overflow_list);
}
}
-static void io_cqring_add_event(struct io_kiocb *req, long res)
+static void io_cqring_fill_event(struct io_kiocb *req, long res)
+{
+ __io_cqring_fill_event(req, res, 0);
+}
+
+static void __io_cqring_add_event(struct io_kiocb *req, long res, long cflags)
{
struct io_ring_ctx *ctx = req->ctx;
unsigned long flags;
spin_lock_irqsave(&ctx->completion_lock, flags);
- io_cqring_fill_event(req, res);
+ __io_cqring_fill_event(req, res, cflags);
io_commit_cqring(ctx);
spin_unlock_irqrestore(&ctx->completion_lock, flags);
io_cqring_ev_posted(ctx);
}
+static void io_cqring_add_event(struct io_kiocb *req, long res)
+{
+ __io_cqring_add_event(req, res, 0);
+}
+
static inline bool io_is_fallback_req(struct io_kiocb *req)
{
return req == (struct io_kiocb *)
@@ -1245,6 +1337,15 @@ fallback:
return NULL;
}
+static inline void io_put_file(struct io_kiocb *req, struct file *file,
+ bool fixed)
+{
+ if (fixed)
+ percpu_ref_put(&req->ctx->file_data->refs);
+ else
+ fput(file);
+}
+
static void __io_req_do_free(struct io_kiocb *req)
{
if (likely(!io_is_fallback_req(req)))
@@ -1255,18 +1356,12 @@ static void __io_req_do_free(struct io_kiocb *req)
static void __io_req_aux_free(struct io_kiocb *req)
{
- struct io_ring_ctx *ctx = req->ctx;
-
if (req->flags & REQ_F_NEED_CLEANUP)
io_cleanup_req(req);
kfree(req->io);
- if (req->file) {
- if (req->flags & REQ_F_FIXED_FILE)
- percpu_ref_put(&ctx->file_data->refs);
- else
- fput(req->file);
- }
+ if (req->file)
+ io_put_file(req, req->file, (req->flags & REQ_F_FIXED_FILE));
io_req_work_drop_env(req);
}
@@ -1473,6 +1568,30 @@ static void io_free_req(struct io_kiocb *req)
io_queue_async_work(nxt);
}
+static void io_link_work_cb(struct io_wq_work **workptr)
+{
+ struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
+ struct io_kiocb *link;
+
+ link = list_first_entry(&req->link_list, struct io_kiocb, link_list);
+ io_queue_linked_timeout(link);
+ io_wq_submit_work(workptr);
+}
+
+static void io_wq_assign_next(struct io_wq_work **workptr, struct io_kiocb *nxt)
+{
+ struct io_kiocb *link;
+ const struct io_op_def *def = &io_op_defs[nxt->opcode];
+
+ if ((nxt->flags & REQ_F_ISREG) && def->hash_reg_file)
+ io_wq_hash_work(&nxt->work, file_inode(nxt->file));
+
+ *workptr = &nxt->work;
+ link = io_prep_linked_timeout(nxt);
+ if (link)
+ nxt->work.func = io_link_work_cb;
+}
+
/*
* Drop reference to request, return next in chain (if there is one) if this
* was the last reference to this request.
@@ -1492,6 +1611,26 @@ static void io_put_req(struct io_kiocb *req)
io_free_req(req);
}
+static void io_steal_work(struct io_kiocb *req,
+ struct io_wq_work **workptr)
+{
+ /*
+ * It's in an io-wq worker, so there always should be at least
+ * one reference, which will be dropped in io_put_work() just
+ * after the current handler returns.
+ *
+ * It also means, that if the counter dropped to 1, then there is
+ * no asynchronous users left, so it's safe to steal the next work.
+ */
+ if (refcount_read(&req->refs) == 1) {
+ struct io_kiocb *nxt = NULL;
+
+ io_req_find_next(req, &nxt);
+ if (nxt)
+ io_wq_assign_next(workptr, nxt);
+ }
+}
+
/*
* Must only be used if we don't need to care about links, usually from
* within the completion handling itself.
@@ -1553,6 +1692,19 @@ static inline bool io_req_multi_free(struct req_batch *rb, struct io_kiocb *req)
return true;
}
+static int io_put_kbuf(struct io_kiocb *req)
+{
+ struct io_buffer *kbuf;
+ int cflags;
+
+ kbuf = (struct io_buffer *) (unsigned long) req->rw.addr;
+ cflags = kbuf->bid << IORING_CQE_BUFFER_SHIFT;
+ cflags |= IORING_CQE_F_BUFFER;
+ req->rw.addr = 0;
+ kfree(kbuf);
+ return cflags;
+}
+
/*
* Find and free completed poll iocbs
*/
@@ -1564,10 +1716,15 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
rb.to_free = rb.need_iter = 0;
while (!list_empty(done)) {
+ int cflags = 0;
+
req = list_first_entry(done, struct io_kiocb, list);
list_del(&req->list);
- io_cqring_fill_event(req, req->result);
+ if (req->flags & REQ_F_BUFFER_SELECTED)
+ cflags = io_put_kbuf(req);
+
+ __io_cqring_fill_event(req, req->result, cflags);
(*nr_events)++;
if (refcount_dec_and_test(&req->refs) &&
@@ -1576,6 +1733,8 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
}
io_commit_cqring(ctx);
+ if (ctx->flags & IORING_SETUP_SQPOLL)
+ io_cqring_ev_posted(ctx);
io_free_req_many(ctx, &rb);
}
@@ -1742,13 +1901,16 @@ static inline void req_set_fail_links(struct io_kiocb *req)
static void io_complete_rw_common(struct kiocb *kiocb, long res)
{
struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
+ int cflags = 0;
if (kiocb->ki_flags & IOCB_WRITE)
kiocb_end_write(req);
if (res != req->result)
req_set_fail_links(req);
- io_cqring_add_event(req, res);
+ if (req->flags & REQ_F_BUFFER_SELECTED)
+ cflags = io_put_kbuf(req);
+ __io_cqring_add_event(req, res, cflags);
}
static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
@@ -1759,17 +1921,6 @@ static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
io_put_req(req);
}
-static struct io_kiocb *__io_complete_rw(struct kiocb *kiocb, long res)
-{
- struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
- struct io_kiocb *nxt = NULL;
-
- io_complete_rw_common(kiocb, res);
- io_put_req_find_next(req, &nxt);
-
- return nxt;
-}
-
static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2)
{
struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
@@ -1840,7 +1991,7 @@ static void io_file_put(struct io_submit_state *state)
* assuming most submissions are for one file, or at least that each file
* has more than one submission.
*/
-static struct file *io_file_get(struct io_submit_state *state, int fd)
+static struct file *__io_file_get(struct io_submit_state *state, int fd)
{
if (!state)
return fget(fd);
@@ -1937,7 +2088,7 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
req->rw.addr = READ_ONCE(sqe->addr);
req->rw.len = READ_ONCE(sqe->len);
- /* we own ->private, reuse it for the buffer index */
+ /* we own ->private, reuse it for the buffer index / buffer ID */
req->rw.kiocb.private = (void *) (unsigned long)
READ_ONCE(sqe->buf_index);
return 0;
@@ -1964,15 +2115,14 @@ static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret)
}
}
-static void kiocb_done(struct kiocb *kiocb, ssize_t ret, struct io_kiocb **nxt,
- bool in_async)
+static void kiocb_done(struct kiocb *kiocb, ssize_t ret)
{
struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
if (req->flags & REQ_F_CUR_POS)
req->file->f_pos = kiocb->ki_pos;
- if (in_async && ret >= 0 && kiocb->ki_complete == io_complete_rw)
- *nxt = __io_complete_rw(kiocb, ret);
+ if (ret >= 0 && kiocb->ki_complete == io_complete_rw)
+ io_complete_rw(kiocb, ret, 0);
else
io_rw_done(kiocb, ret);
}
@@ -2051,11 +2201,147 @@ static ssize_t io_import_fixed(struct io_kiocb *req, int rw,
return len;
}
+static void io_ring_submit_unlock(struct io_ring_ctx *ctx, bool needs_lock)
+{
+ if (needs_lock)
+ mutex_unlock(&ctx->uring_lock);
+}
+
+static void io_ring_submit_lock(struct io_ring_ctx *ctx, bool needs_lock)
+{
+ /*
+ * "Normal" inline submissions always hold the uring_lock, since we
+ * grab it from the system call. Same is true for the SQPOLL offload.
+ * The only exception is when we've detached the request and issue it
+ * from an async worker thread, grab the lock for that case.
+ */
+ if (needs_lock)
+ mutex_lock(&ctx->uring_lock);
+}
+
+static struct io_buffer *io_buffer_select(struct io_kiocb *req, size_t *len,
+ int bgid, struct io_buffer *kbuf,
+ bool needs_lock)
+{
+ struct io_buffer *head;
+
+ if (req->flags & REQ_F_BUFFER_SELECTED)
+ return kbuf;
+
+ io_ring_submit_lock(req->ctx, needs_lock);
+
+ lockdep_assert_held(&req->ctx->uring_lock);
+
+ head = idr_find(&req->ctx->io_buffer_idr, bgid);
+ if (head) {
+ if (!list_empty(&head->list)) {
+ kbuf = list_last_entry(&head->list, struct io_buffer,
+ list);
+ list_del(&kbuf->list);
+ } else {
+ kbuf = head;
+ idr_remove(&req->ctx->io_buffer_idr, bgid);
+ }
+ if (*len > kbuf->len)
+ *len = kbuf->len;
+ } else {
+ kbuf = ERR_PTR(-ENOBUFS);
+ }
+
+ io_ring_submit_unlock(req->ctx, needs_lock);
+
+ return kbuf;
+}
+
+static void __user *io_rw_buffer_select(struct io_kiocb *req, size_t *len,
+ bool needs_lock)
+{
+ struct io_buffer *kbuf;
+ int bgid;
+
+ kbuf = (struct io_buffer *) (unsigned long) req->rw.addr;
+ bgid = (int) (unsigned long) req->rw.kiocb.private;
+ kbuf = io_buffer_select(req, len, bgid, kbuf, needs_lock);
+ if (IS_ERR(kbuf))
+ return kbuf;
+ req->rw.addr = (u64) (unsigned long) kbuf;
+ req->flags |= REQ_F_BUFFER_SELECTED;
+ return u64_to_user_ptr(kbuf->addr);
+}
+
+#ifdef CONFIG_COMPAT
+static ssize_t io_compat_import(struct io_kiocb *req, struct iovec *iov,
+ bool needs_lock)
+{
+ struct compat_iovec __user *uiov;
+ compat_ssize_t clen;
+ void __user *buf;
+ ssize_t len;
+
+ uiov = u64_to_user_ptr(req->rw.addr);
+ if (!access_ok(uiov, sizeof(*uiov)))
+ return -EFAULT;
+ if (__get_user(clen, &uiov->iov_len))
+ return -EFAULT;
+ if (clen < 0)
+ return -EINVAL;
+
+ len = clen;
+ buf = io_rw_buffer_select(req, &len, needs_lock);
+ if (IS_ERR(buf))
+ return PTR_ERR(buf);
+ iov[0].iov_base = buf;
+ iov[0].iov_len = (compat_size_t) len;
+ return 0;
+}
+#endif
+
+static ssize_t __io_iov_buffer_select(struct io_kiocb *req, struct iovec *iov,
+ bool needs_lock)
+{
+ struct iovec __user *uiov = u64_to_user_ptr(req->rw.addr);
+ void __user *buf;
+ ssize_t len;
+
+ if (copy_from_user(iov, uiov, sizeof(*uiov)))
+ return -EFAULT;
+
+ len = iov[0].iov_len;
+ if (len < 0)
+ return -EINVAL;
+ buf = io_rw_buffer_select(req, &len, needs_lock);
+ if (IS_ERR(buf))
+ return PTR_ERR(buf);
+ iov[0].iov_base = buf;
+ iov[0].iov_len = len;
+ return 0;
+}
+
+static ssize_t io_iov_buffer_select(struct io_kiocb *req, struct iovec *iov,
+ bool needs_lock)
+{
+ if (req->flags & REQ_F_BUFFER_SELECTED)
+ return 0;
+ if (!req->rw.len)
+ return 0;
+ else if (req->rw.len > 1)
+ return -EINVAL;
+
+#ifdef CONFIG_COMPAT
+ if (req->ctx->compat)
+ return io_compat_import(req, iov, needs_lock);
+#endif
+
+ return __io_iov_buffer_select(req, iov, needs_lock);
+}
+
static ssize_t io_import_iovec(int rw, struct io_kiocb *req,
- struct iovec **iovec, struct iov_iter *iter)
+ struct iovec **iovec, struct iov_iter *iter,
+ bool needs_lock)
{
void __user *buf = u64_to_user_ptr(req->rw.addr);
size_t sqe_len = req->rw.len;
+ ssize_t ret;
u8 opcode;
opcode = req->opcode;
@@ -2064,12 +2350,20 @@ static ssize_t io_import_iovec(int rw, struct io_kiocb *req,
return io_import_fixed(req, rw, iter);
}
- /* buffer index only valid with fixed read/write */
- if (req->rw.kiocb.private)
+ /* buffer index only valid with fixed read/write, or buffer select */
+ if (req->rw.kiocb.private && !(req->flags & REQ_F_BUFFER_SELECT))
return -EINVAL;
if (opcode == IORING_OP_READ || opcode == IORING_OP_WRITE) {
- ssize_t ret;
+ if (req->flags & REQ_F_BUFFER_SELECT) {
+ buf = io_rw_buffer_select(req, &sqe_len, needs_lock);
+ if (IS_ERR(buf)) {
+ *iovec = NULL;
+ return PTR_ERR(buf);
+ }
+ req->rw.len = sqe_len;
+ }
+
ret = import_single_range(rw, buf, sqe_len, *iovec, iter);
*iovec = NULL;
return ret < 0 ? ret : sqe_len;
@@ -2085,6 +2379,16 @@ static ssize_t io_import_iovec(int rw, struct io_kiocb *req,
return iorw->size;
}
+ if (req->flags & REQ_F_BUFFER_SELECT) {
+ ret = io_iov_buffer_select(req, *iovec, needs_lock);
+ if (!ret) {
+ ret = (*iovec)->iov_len;
+ iov_iter_init(iter, rw, *iovec, 1, ret);
+ }
+ *iovec = NULL;
+ return ret;
+ }
+
#ifdef CONFIG_COMPAT
if (req->ctx->compat)
return compat_import_iovec(rw, buf, sqe_len, UIO_FASTIOV,
@@ -2168,12 +2472,18 @@ static void io_req_map_rw(struct io_kiocb *req, ssize_t io_size,
}
}
+static inline int __io_alloc_async_ctx(struct io_kiocb *req)
+{
+ req->io = kmalloc(sizeof(*req->io), GFP_KERNEL);
+ return req->io == NULL;
+}
+
static int io_alloc_async_ctx(struct io_kiocb *req)
{
if (!io_op_defs[req->opcode].async_ctx)
return 0;
- req->io = kmalloc(sizeof(*req->io), GFP_KERNEL);
- return req->io == NULL;
+
+ return __io_alloc_async_ctx(req);
}
static int io_setup_async_rw(struct io_kiocb *req, ssize_t io_size,
@@ -2183,7 +2493,7 @@ static int io_setup_async_rw(struct io_kiocb *req, ssize_t io_size,
if (!io_op_defs[req->opcode].async_ctx)
return 0;
if (!req->io) {
- if (io_alloc_async_ctx(req))
+ if (__io_alloc_async_ctx(req))
return -ENOMEM;
io_req_map_rw(req, io_size, iovec, fast_iov, iter);
@@ -2212,7 +2522,7 @@ static int io_read_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
io = req->io;
io->rw.iov = io->rw.fast_iov;
req->io = NULL;
- ret = io_import_iovec(READ, req, &io->rw.iov, &iter);
+ ret = io_import_iovec(READ, req, &io->rw.iov, &iter, !force_nonblock);
req->io = io;
if (ret < 0)
return ret;
@@ -2221,8 +2531,7 @@ static int io_read_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
return 0;
}
-static int io_read(struct io_kiocb *req, struct io_kiocb **nxt,
- bool force_nonblock)
+static int io_read(struct io_kiocb *req, bool force_nonblock)
{
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
struct kiocb *kiocb = &req->rw.kiocb;
@@ -2230,13 +2539,13 @@ static int io_read(struct io_kiocb *req, struct io_kiocb **nxt,
size_t iov_count;
ssize_t io_size, ret;
- ret = io_import_iovec(READ, req, &iovec, &iter);
+ ret = io_import_iovec(READ, req, &iovec, &iter, !force_nonblock);
if (ret < 0)
return ret;
/* Ensure we clear previously set non-block flag */
if (!force_nonblock)
- req->rw.kiocb.ki_flags &= ~IOCB_NOWAIT;
+ kiocb->ki_flags &= ~IOCB_NOWAIT;
req->result = 0;
io_size = ret;
@@ -2247,10 +2556,8 @@ static int io_read(struct io_kiocb *req, struct io_kiocb **nxt,
* If the file doesn't support async, mark it as REQ_F_MUST_PUNT so
* we know to async punt it even if it was opened O_NONBLOCK
*/
- if (force_nonblock && !io_file_supports_async(req->file)) {
- req->flags |= REQ_F_MUST_PUNT;
+ if (force_nonblock && !io_file_supports_async(req->file))
goto copy_iov;
- }
iov_count = iov_iter_count(&iter);
ret = rw_verify_area(READ, req->file, &kiocb->ki_pos, iov_count);
@@ -2264,13 +2571,16 @@ static int io_read(struct io_kiocb *req, struct io_kiocb **nxt,
/* Catch -EAGAIN return for forced non-blocking submission */
if (!force_nonblock || ret2 != -EAGAIN) {
- kiocb_done(kiocb, ret2, nxt, req->in_async);
+ kiocb_done(kiocb, ret2);
} else {
copy_iov:
ret = io_setup_async_rw(req, io_size, iovec,
inline_vecs, &iter);
if (ret)
goto out_free;
+ /* any defer here is final, must blocking retry */
+ if (!(req->flags & REQ_F_NOWAIT))
+ req->flags |= REQ_F_MUST_PUNT;
return -EAGAIN;
}
}
@@ -2294,6 +2604,8 @@ static int io_write_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
if (unlikely(!(req->file->f_mode & FMODE_WRITE)))
return -EBADF;
+ req->fsize = rlimit(RLIMIT_FSIZE);
+
/* either don't need iovec imported or already have it */
if (!req->io || req->flags & REQ_F_NEED_CLEANUP)
return 0;
@@ -2301,7 +2613,7 @@ static int io_write_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
io = req->io;
io->rw.iov = io->rw.fast_iov;
req->io = NULL;
- ret = io_import_iovec(WRITE, req, &io->rw.iov, &iter);
+ ret = io_import_iovec(WRITE, req, &io->rw.iov, &iter, !force_nonblock);
req->io = io;
if (ret < 0)
return ret;
@@ -2310,8 +2622,7 @@ static int io_write_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
return 0;
}
-static int io_write(struct io_kiocb *req, struct io_kiocb **nxt,
- bool force_nonblock)
+static int io_write(struct io_kiocb *req, bool force_nonblock)
{
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
struct kiocb *kiocb = &req->rw.kiocb;
@@ -2319,7 +2630,7 @@ static int io_write(struct io_kiocb *req, struct io_kiocb **nxt,
size_t iov_count;
ssize_t ret, io_size;
- ret = io_import_iovec(WRITE, req, &iovec, &iter);
+ ret = io_import_iovec(WRITE, req, &iovec, &iter, !force_nonblock);
if (ret < 0)
return ret;
@@ -2336,10 +2647,8 @@ static int io_write(struct io_kiocb *req, struct io_kiocb **nxt,
* If the file doesn't support async, mark it as REQ_F_MUST_PUNT so
* we know to async punt it even if it was opened O_NONBLOCK
*/
- if (force_nonblock && !io_file_supports_async(req->file)) {
- req->flags |= REQ_F_MUST_PUNT;
+ if (force_nonblock && !io_file_supports_async(req->file))
goto copy_iov;
- }
/* file path doesn't support NOWAIT for non-direct_IO */
if (force_nonblock && !(kiocb->ki_flags & IOCB_DIRECT) &&
@@ -2366,24 +2675,33 @@ static int io_write(struct io_kiocb *req, struct io_kiocb **nxt,
}
kiocb->ki_flags |= IOCB_WRITE;
+ if (!force_nonblock)
+ current->signal->rlim[RLIMIT_FSIZE].rlim_cur = req->fsize;
+
if (req->file->f_op->write_iter)
ret2 = call_write_iter(req->file, kiocb, &iter);
else
ret2 = loop_rw_iter(WRITE, req->file, kiocb, &iter);
+
+ if (!force_nonblock)
+ current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
+
/*
- * Raw bdev writes will -EOPNOTSUPP for IOCB_NOWAIT. Just
+ * Raw bdev writes will return -EOPNOTSUPP for IOCB_NOWAIT. Just
* retry them without IOCB_NOWAIT.
*/
if (ret2 == -EOPNOTSUPP && (kiocb->ki_flags & IOCB_NOWAIT))
ret2 = -EAGAIN;
if (!force_nonblock || ret2 != -EAGAIN) {
- kiocb_done(kiocb, ret2, nxt, req->in_async);
+ kiocb_done(kiocb, ret2);
} else {
copy_iov:
ret = io_setup_async_rw(req, io_size, iovec,
inline_vecs, &iter);
if (ret)
goto out_free;
+ /* any defer here is final, must blocking retry */
+ req->flags |= REQ_F_MUST_PUNT;
return -EAGAIN;
}
}
@@ -2393,6 +2711,76 @@ out_free:
return ret;
}
+static int io_splice_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+ struct io_splice* sp = &req->splice;
+ unsigned int valid_flags = SPLICE_F_FD_IN_FIXED | SPLICE_F_ALL;
+ int ret;
+
+ if (req->flags & REQ_F_NEED_CLEANUP)
+ return 0;
+
+ sp->file_in = NULL;
+ sp->off_in = READ_ONCE(sqe->splice_off_in);
+ sp->off_out = READ_ONCE(sqe->off);
+ sp->len = READ_ONCE(sqe->len);
+ sp->flags = READ_ONCE(sqe->splice_flags);
+
+ if (unlikely(sp->flags & ~valid_flags))
+ return -EINVAL;
+
+ ret = io_file_get(NULL, req, READ_ONCE(sqe->splice_fd_in), &sp->file_in,
+ (sp->flags & SPLICE_F_FD_IN_FIXED));
+ if (ret)
+ return ret;
+ req->flags |= REQ_F_NEED_CLEANUP;
+
+ if (!S_ISREG(file_inode(sp->file_in)->i_mode))
+ req->work.flags |= IO_WQ_WORK_UNBOUND;
+
+ return 0;
+}
+
+static bool io_splice_punt(struct file *file)
+{
+ if (get_pipe_info(file))
+ return false;
+ if (!io_file_supports_async(file))
+ return true;
+ return !(file->f_mode & O_NONBLOCK);
+}
+
+static int io_splice(struct io_kiocb *req, bool force_nonblock)
+{
+ struct io_splice *sp = &req->splice;
+ struct file *in = sp->file_in;
+ struct file *out = sp->file_out;
+ unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED;
+ loff_t *poff_in, *poff_out;
+ long ret;
+
+ if (force_nonblock) {
+ if (io_splice_punt(in) || io_splice_punt(out))
+ return -EAGAIN;
+ flags |= SPLICE_F_NONBLOCK;
+ }
+
+ poff_in = (sp->off_in == -1) ? NULL : &sp->off_in;
+ poff_out = (sp->off_out == -1) ? NULL : &sp->off_out;
+ ret = do_splice(in, poff_in, out, poff_out, sp->len, flags);
+ if (force_nonblock && ret == -EAGAIN)
+ return -EAGAIN;
+
+ io_put_file(req, in, (sp->flags & SPLICE_F_FD_IN_FIXED));
+ req->flags &= ~REQ_F_NEED_CLEANUP;
+
+ io_cqring_add_event(req, ret);
+ if (ret != sp->len)
+ req_set_fail_links(req);
+ io_put_req(req);
+ return 0;
+}
+
/*
* IORING_OP_NOP just posts a completion event, nothing else.
*/
@@ -2441,85 +2829,63 @@ static bool io_req_cancelled(struct io_kiocb *req)
return false;
}
-static void io_link_work_cb(struct io_wq_work **workptr)
-{
- struct io_wq_work *work = *workptr;
- struct io_kiocb *link = work->data;
-
- io_queue_linked_timeout(link);
- work->func = io_wq_submit_work;
-}
-
-static void io_wq_assign_next(struct io_wq_work **workptr, struct io_kiocb *nxt)
-{
- struct io_kiocb *link;
-
- io_prep_async_work(nxt, &link);
- *workptr = &nxt->work;
- if (link) {
- nxt->work.flags |= IO_WQ_WORK_CB;
- nxt->work.func = io_link_work_cb;
- nxt->work.data = link;
- }
-}
-
-static void io_fsync_finish(struct io_wq_work **workptr)
+static void __io_fsync(struct io_kiocb *req)
{
- struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
loff_t end = req->sync.off + req->sync.len;
- struct io_kiocb *nxt = NULL;
int ret;
- if (io_req_cancelled(req))
- return;
-
ret = vfs_fsync_range(req->file, req->sync.off,
end > 0 ? end : LLONG_MAX,
req->sync.flags & IORING_FSYNC_DATASYNC);
if (ret < 0)
req_set_fail_links(req);
io_cqring_add_event(req, ret);
- io_put_req_find_next(req, &nxt);
- if (nxt)
- io_wq_assign_next(workptr, nxt);
+ io_put_req(req);
}
-static int io_fsync(struct io_kiocb *req, struct io_kiocb **nxt,
- bool force_nonblock)
+static void io_fsync_finish(struct io_wq_work **workptr)
{
- struct io_wq_work *work, *old_work;
+ struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
+
+ if (io_req_cancelled(req))
+ return;
+ __io_fsync(req);
+ io_steal_work(req, workptr);
+}
+static int io_fsync(struct io_kiocb *req, bool force_nonblock)
+{
/* fsync always requires a blocking context */
if (force_nonblock) {
- io_put_req(req);
req->work.func = io_fsync_finish;
return -EAGAIN;
}
-
- work = old_work = &req->work;
- io_fsync_finish(&work);
- if (work && work != old_work)
- *nxt = container_of(work, struct io_kiocb, work);
+ __io_fsync(req);
return 0;
}
-static void io_fallocate_finish(struct io_wq_work **workptr)
+static void __io_fallocate(struct io_kiocb *req)
{
- struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
- struct io_kiocb *nxt = NULL;
int ret;
- if (io_req_cancelled(req))
- return;
-
+ current->signal->rlim[RLIMIT_FSIZE].rlim_cur = req->fsize;
ret = vfs_fallocate(req->file, req->sync.mode, req->sync.off,
req->sync.len);
+ current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
if (ret < 0)
req_set_fail_links(req);
io_cqring_add_event(req, ret);
- io_put_req_find_next(req, &nxt);
- if (nxt)
- io_wq_assign_next(workptr, nxt);
+ io_put_req(req);
+}
+
+static void io_fallocate_finish(struct io_wq_work **workptr)
+{
+ struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
+
+ if (io_req_cancelled(req))
+ return;
+ __io_fallocate(req);
+ io_steal_work(req, workptr);
}
static int io_fallocate_prep(struct io_kiocb *req,
@@ -2531,26 +2897,19 @@ static int io_fallocate_prep(struct io_kiocb *req,
req->sync.off = READ_ONCE(sqe->off);
req->sync.len = READ_ONCE(sqe->addr);
req->sync.mode = READ_ONCE(sqe->len);
+ req->fsize = rlimit(RLIMIT_FSIZE);
return 0;
}
-static int io_fallocate(struct io_kiocb *req, struct io_kiocb **nxt,
- bool force_nonblock)
+static int io_fallocate(struct io_kiocb *req, bool force_nonblock)
{
- struct io_wq_work *work, *old_work;
-
/* fallocate always requiring blocking context */
if (force_nonblock) {
- io_put_req(req);
req->work.func = io_fallocate_finish;
return -EAGAIN;
}
- work = old_work = &req->work;
- io_fallocate_finish(&work);
- if (work && work != old_work)
- *nxt = container_of(work, struct io_kiocb, work);
-
+ __io_fallocate(req);
return 0;
}
@@ -2578,6 +2937,7 @@ static int io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return ret;
}
+ req->open.nofile = rlimit(RLIMIT_NOFILE);
req->flags |= REQ_F_NEED_CLEANUP;
return 0;
}
@@ -2619,12 +2979,12 @@ static int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return ret;
}
+ req->open.nofile = rlimit(RLIMIT_NOFILE);
req->flags |= REQ_F_NEED_CLEANUP;
return 0;
}
-static int io_openat2(struct io_kiocb *req, struct io_kiocb **nxt,
- bool force_nonblock)
+static int io_openat2(struct io_kiocb *req, bool force_nonblock)
{
struct open_flags op;
struct file *file;
@@ -2637,7 +2997,7 @@ static int io_openat2(struct io_kiocb *req, struct io_kiocb **nxt,
if (ret)
goto err;
- ret = get_unused_fd_flags(req->open.how.flags);
+ ret = __get_unused_fd_flags(req->open.how.flags, req->open.nofile);
if (ret < 0)
goto err;
@@ -2655,15 +3015,171 @@ err:
if (ret < 0)
req_set_fail_links(req);
io_cqring_add_event(req, ret);
- io_put_req_find_next(req, nxt);
+ io_put_req(req);
return 0;
}
-static int io_openat(struct io_kiocb *req, struct io_kiocb **nxt,
- bool force_nonblock)
+static int io_openat(struct io_kiocb *req, bool force_nonblock)
{
req->open.how = build_open_how(req->open.how.flags, req->open.how.mode);
- return io_openat2(req, nxt, force_nonblock);
+ return io_openat2(req, force_nonblock);
+}
+
+static int io_remove_buffers_prep(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
+{
+ struct io_provide_buf *p = &req->pbuf;
+ u64 tmp;
+
+ if (sqe->ioprio || sqe->rw_flags || sqe->addr || sqe->len || sqe->off)
+ return -EINVAL;
+
+ tmp = READ_ONCE(sqe->fd);
+ if (!tmp || tmp > USHRT_MAX)
+ return -EINVAL;
+
+ memset(p, 0, sizeof(*p));
+ p->nbufs = tmp;
+ p->bgid = READ_ONCE(sqe->buf_group);
+ return 0;
+}
+
+static int __io_remove_buffers(struct io_ring_ctx *ctx, struct io_buffer *buf,
+ int bgid, unsigned nbufs)
+{
+ unsigned i = 0;
+
+ /* shouldn't happen */
+ if (!nbufs)
+ return 0;
+
+ /* the head kbuf is the list itself */
+ while (!list_empty(&buf->list)) {
+ struct io_buffer *nxt;
+
+ nxt = list_first_entry(&buf->list, struct io_buffer, list);
+ list_del(&nxt->list);
+ kfree(nxt);
+ if (++i == nbufs)
+ return i;
+ }
+ i++;
+ kfree(buf);
+ idr_remove(&ctx->io_buffer_idr, bgid);
+
+ return i;
+}
+
+static int io_remove_buffers(struct io_kiocb *req, bool force_nonblock)
+{
+ struct io_provide_buf *p = &req->pbuf;
+ struct io_ring_ctx *ctx = req->ctx;
+ struct io_buffer *head;
+ int ret = 0;
+
+ io_ring_submit_lock(ctx, !force_nonblock);
+
+ lockdep_assert_held(&ctx->uring_lock);
+
+ ret = -ENOENT;
+ head = idr_find(&ctx->io_buffer_idr, p->bgid);
+ if (head)
+ ret = __io_remove_buffers(ctx, head, p->bgid, p->nbufs);
+
+ io_ring_submit_lock(ctx, !force_nonblock);
+ if (ret < 0)
+ req_set_fail_links(req);
+ io_cqring_add_event(req, ret);
+ io_put_req(req);
+ return 0;
+}
+
+static int io_provide_buffers_prep(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
+{
+ struct io_provide_buf *p = &req->pbuf;
+ u64 tmp;
+
+ if (sqe->ioprio || sqe->rw_flags)
+ return -EINVAL;
+
+ tmp = READ_ONCE(sqe->fd);
+ if (!tmp || tmp > USHRT_MAX)
+ return -E2BIG;
+ p->nbufs = tmp;
+ p->addr = READ_ONCE(sqe->addr);
+ p->len = READ_ONCE(sqe->len);
+
+ if (!access_ok(u64_to_user_ptr(p->addr), p->len))
+ return -EFAULT;
+
+ p->bgid = READ_ONCE(sqe->buf_group);
+ tmp = READ_ONCE(sqe->off);
+ if (tmp > USHRT_MAX)
+ return -E2BIG;
+ p->bid = tmp;
+ return 0;
+}
+
+static int io_add_buffers(struct io_provide_buf *pbuf, struct io_buffer **head)
+{
+ struct io_buffer *buf;
+ u64 addr = pbuf->addr;
+ int i, bid = pbuf->bid;
+
+ for (i = 0; i < pbuf->nbufs; i++) {
+ buf = kmalloc(sizeof(*buf), GFP_KERNEL);
+ if (!buf)
+ break;
+
+ buf->addr = addr;
+ buf->len = pbuf->len;
+ buf->bid = bid;
+ addr += pbuf->len;
+ bid++;
+ if (!*head) {
+ INIT_LIST_HEAD(&buf->list);
+ *head = buf;
+ } else {
+ list_add_tail(&buf->list, &(*head)->list);
+ }
+ }
+
+ return i ? i : -ENOMEM;
+}
+
+static int io_provide_buffers(struct io_kiocb *req, bool force_nonblock)
+{
+ struct io_provide_buf *p = &req->pbuf;
+ struct io_ring_ctx *ctx = req->ctx;
+ struct io_buffer *head, *list;
+ int ret = 0;
+
+ io_ring_submit_lock(ctx, !force_nonblock);
+
+ lockdep_assert_held(&ctx->uring_lock);
+
+ list = head = idr_find(&ctx->io_buffer_idr, p->bgid);
+
+ ret = io_add_buffers(p, &head);
+ if (ret < 0)
+ goto out;
+
+ if (!list) {
+ ret = idr_alloc(&ctx->io_buffer_idr, head, p->bgid, p->bgid + 1,
+ GFP_KERNEL);
+ if (ret < 0) {
+ __io_remove_buffers(ctx, head, p->bgid, -1U);
+ goto out;
+ }
+ }
+out:
+ io_ring_submit_unlock(ctx, !force_nonblock);
+ if (ret < 0)
+ req_set_fail_links(req);
+ io_cqring_add_event(req, ret);
+ io_put_req(req);
+ return 0;
}
static int io_epoll_ctl_prep(struct io_kiocb *req,
@@ -2691,8 +3207,7 @@ static int io_epoll_ctl_prep(struct io_kiocb *req,
#endif
}
-static int io_epoll_ctl(struct io_kiocb *req, struct io_kiocb **nxt,
- bool force_nonblock)
+static int io_epoll_ctl(struct io_kiocb *req, bool force_nonblock)
{
#if defined(CONFIG_EPOLL)
struct io_epoll *ie = &req->epoll;
@@ -2705,7 +3220,7 @@ static int io_epoll_ctl(struct io_kiocb *req, struct io_kiocb **nxt,
if (ret < 0)
req_set_fail_links(req);
io_cqring_add_event(req, ret);
- io_put_req_find_next(req, nxt);
+ io_put_req(req);
return 0;
#else
return -EOPNOTSUPP;
@@ -2727,8 +3242,7 @@ static int io_madvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
#endif
}
-static int io_madvise(struct io_kiocb *req, struct io_kiocb **nxt,
- bool force_nonblock)
+static int io_madvise(struct io_kiocb *req, bool force_nonblock)
{
#if defined(CONFIG_ADVISE_SYSCALLS) && defined(CONFIG_MMU)
struct io_madvise *ma = &req->madvise;
@@ -2741,7 +3255,7 @@ static int io_madvise(struct io_kiocb *req, struct io_kiocb **nxt,
if (ret < 0)
req_set_fail_links(req);
io_cqring_add_event(req, ret);
- io_put_req_find_next(req, nxt);
+ io_put_req(req);
return 0;
#else
return -EOPNOTSUPP;
@@ -2759,8 +3273,7 @@ static int io_fadvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return 0;
}
-static int io_fadvise(struct io_kiocb *req, struct io_kiocb **nxt,
- bool force_nonblock)
+static int io_fadvise(struct io_kiocb *req, bool force_nonblock)
{
struct io_fadvise *fa = &req->fadvise;
int ret;
@@ -2780,7 +3293,7 @@ static int io_fadvise(struct io_kiocb *req, struct io_kiocb **nxt,
if (ret < 0)
req_set_fail_links(req);
io_cqring_add_event(req, ret);
- io_put_req_find_next(req, nxt);
+ io_put_req(req);
return 0;
}
@@ -2817,8 +3330,7 @@ static int io_statx_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return 0;
}
-static int io_statx(struct io_kiocb *req, struct io_kiocb **nxt,
- bool force_nonblock)
+static int io_statx(struct io_kiocb *req, bool force_nonblock)
{
struct io_open *ctx = &req->open;
unsigned lookup_flags;
@@ -2855,7 +3367,7 @@ err:
if (ret < 0)
req_set_fail_links(req);
io_cqring_add_event(req, ret);
- io_put_req_find_next(req, nxt);
+ io_put_req(req);
return 0;
}
@@ -2882,7 +3394,7 @@ static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
}
/* only called when __close_fd_get_file() is done */
-static void __io_close_finish(struct io_kiocb *req, struct io_kiocb **nxt)
+static void __io_close_finish(struct io_kiocb *req)
{
int ret;
@@ -2891,22 +3403,19 @@ static void __io_close_finish(struct io_kiocb *req, struct io_kiocb **nxt)
req_set_fail_links(req);
io_cqring_add_event(req, ret);
fput(req->close.put_file);
- io_put_req_find_next(req, nxt);
+ io_put_req(req);
}
static void io_close_finish(struct io_wq_work **workptr)
{
struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
- struct io_kiocb *nxt = NULL;
/* not cancellable, don't do io_req_cancelled() */
- __io_close_finish(req, &nxt);
- if (nxt)
- io_wq_assign_next(workptr, nxt);
+ __io_close_finish(req);
+ io_steal_work(req, workptr);
}
-static int io_close(struct io_kiocb *req, struct io_kiocb **nxt,
- bool force_nonblock)
+static int io_close(struct io_kiocb *req, bool force_nonblock)
{
int ret;
@@ -2916,23 +3425,25 @@ static int io_close(struct io_kiocb *req, struct io_kiocb **nxt,
return ret;
/* if the file has a flush method, be safe and punt to async */
- if (req->close.put_file->f_op->flush && !io_wq_current_is_worker())
- goto eagain;
+ if (req->close.put_file->f_op->flush && force_nonblock) {
+ /* submission ref will be dropped, take it for async */
+ refcount_inc(&req->refs);
+
+ req->work.func = io_close_finish;
+ /*
+ * Do manual async queue here to avoid grabbing files - we don't
+ * need the files, and it'll cause io_close_finish() to close
+ * the file again and cause a double CQE entry for this request
+ */
+ io_queue_async_work(req);
+ return 0;
+ }
/*
* No ->flush(), safely close from here and just punt the
* fput() to async context.
*/
- __io_close_finish(req, nxt);
- return 0;
-eagain:
- req->work.func = io_close_finish;
- /*
- * Do manual async queue here to avoid grabbing files - we don't
- * need the files, and it'll cause io_close_finish() to close
- * the file again and cause a double CQE entry for this request
- */
- io_queue_async_work(req);
+ __io_close_finish(req);
return 0;
}
@@ -2954,47 +3465,62 @@ static int io_prep_sfr(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return 0;
}
-static void io_sync_file_range_finish(struct io_wq_work **workptr)
+static void __io_sync_file_range(struct io_kiocb *req)
{
- struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
- struct io_kiocb *nxt = NULL;
int ret;
- if (io_req_cancelled(req))
- return;
-
ret = sync_file_range(req->file, req->sync.off, req->sync.len,
req->sync.flags);
if (ret < 0)
req_set_fail_links(req);
io_cqring_add_event(req, ret);
- io_put_req_find_next(req, &nxt);
+ io_put_req(req);
+}
+
+
+static void io_sync_file_range_finish(struct io_wq_work **workptr)
+{
+ struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
+ struct io_kiocb *nxt = NULL;
+
+ if (io_req_cancelled(req))
+ return;
+ __io_sync_file_range(req);
+ io_put_req(req); /* put submission ref */
if (nxt)
io_wq_assign_next(workptr, nxt);
}
-static int io_sync_file_range(struct io_kiocb *req, struct io_kiocb **nxt,
- bool force_nonblock)
+static int io_sync_file_range(struct io_kiocb *req, bool force_nonblock)
{
- struct io_wq_work *work, *old_work;
-
/* sync_file_range always requires a blocking context */
if (force_nonblock) {
- io_put_req(req);
req->work.func = io_sync_file_range_finish;
return -EAGAIN;
}
- work = old_work = &req->work;
- io_sync_file_range_finish(&work);
- if (work && work != old_work)
- *nxt = container_of(work, struct io_kiocb, work);
+ __io_sync_file_range(req);
return 0;
}
+#if defined(CONFIG_NET)
+static int io_setup_async_msg(struct io_kiocb *req,
+ struct io_async_msghdr *kmsg)
+{
+ if (req->io)
+ return -EAGAIN;
+ if (io_alloc_async_ctx(req)) {
+ if (kmsg->iov != kmsg->fast_iov)
+ kfree(kmsg->iov);
+ return -ENOMEM;
+ }
+ req->flags |= REQ_F_NEED_CLEANUP;
+ memcpy(&req->io->msg, kmsg, sizeof(*kmsg));
+ return -EAGAIN;
+}
+
static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
-#if defined(CONFIG_NET)
struct io_sr_msg *sr = &req->sr_msg;
struct io_async_ctx *io = req->io;
int ret;
@@ -3020,15 +3546,10 @@ static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
if (!ret)
req->flags |= REQ_F_NEED_CLEANUP;
return ret;
-#else
- return -EOPNOTSUPP;
-#endif
}
-static int io_sendmsg(struct io_kiocb *req, struct io_kiocb **nxt,
- bool force_nonblock)
+static int io_sendmsg(struct io_kiocb *req, bool force_nonblock)
{
-#if defined(CONFIG_NET)
struct io_async_msghdr *kmsg = NULL;
struct socket *sock;
int ret;
@@ -3068,18 +3589,8 @@ static int io_sendmsg(struct io_kiocb *req, struct io_kiocb **nxt,
flags |= MSG_DONTWAIT;
ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags);
- if (force_nonblock && ret == -EAGAIN) {
- if (req->io)
- return -EAGAIN;
- if (io_alloc_async_ctx(req)) {
- if (kmsg->iov != kmsg->fast_iov)
- kfree(kmsg->iov);
- return -ENOMEM;
- }
- req->flags |= REQ_F_NEED_CLEANUP;
- memcpy(&req->io->msg, &io.msg, sizeof(io.msg));
- return -EAGAIN;
- }
+ if (force_nonblock && ret == -EAGAIN)
+ return io_setup_async_msg(req, kmsg);
if (ret == -ERESTARTSYS)
ret = -EINTR;
}
@@ -3090,17 +3601,12 @@ static int io_sendmsg(struct io_kiocb *req, struct io_kiocb **nxt,
io_cqring_add_event(req, ret);
if (ret < 0)
req_set_fail_links(req);
- io_put_req_find_next(req, nxt);
+ io_put_req(req);
return 0;
-#else
- return -EOPNOTSUPP;
-#endif
}
-static int io_send(struct io_kiocb *req, struct io_kiocb **nxt,
- bool force_nonblock)
+static int io_send(struct io_kiocb *req, bool force_nonblock)
{
-#if defined(CONFIG_NET)
struct socket *sock;
int ret;
@@ -3141,17 +3647,120 @@ static int io_send(struct io_kiocb *req, struct io_kiocb **nxt,
io_cqring_add_event(req, ret);
if (ret < 0)
req_set_fail_links(req);
- io_put_req_find_next(req, nxt);
+ io_put_req(req);
return 0;
-#else
- return -EOPNOTSUPP;
+}
+
+static int __io_recvmsg_copy_hdr(struct io_kiocb *req, struct io_async_ctx *io)
+{
+ struct io_sr_msg *sr = &req->sr_msg;
+ struct iovec __user *uiov;
+ size_t iov_len;
+ int ret;
+
+ ret = __copy_msghdr_from_user(&io->msg.msg, sr->msg, &io->msg.uaddr,
+ &uiov, &iov_len);
+ if (ret)
+ return ret;
+
+ if (req->flags & REQ_F_BUFFER_SELECT) {
+ if (iov_len > 1)
+ return -EINVAL;
+ if (copy_from_user(io->msg.iov, uiov, sizeof(*uiov)))
+ return -EFAULT;
+ sr->len = io->msg.iov[0].iov_len;
+ iov_iter_init(&io->msg.msg.msg_iter, READ, io->msg.iov, 1,
+ sr->len);
+ io->msg.iov = NULL;
+ } else {
+ ret = import_iovec(READ, uiov, iov_len, UIO_FASTIOV,
+ &io->msg.iov, &io->msg.msg.msg_iter);
+ if (ret > 0)
+ ret = 0;
+ }
+
+ return ret;
+}
+
+#ifdef CONFIG_COMPAT
+static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req,
+ struct io_async_ctx *io)
+{
+ struct compat_msghdr __user *msg_compat;
+ struct io_sr_msg *sr = &req->sr_msg;
+ struct compat_iovec __user *uiov;
+ compat_uptr_t ptr;
+ compat_size_t len;
+ int ret;
+
+ msg_compat = (struct compat_msghdr __user *) sr->msg;
+ ret = __get_compat_msghdr(&io->msg.msg, msg_compat, &io->msg.uaddr,
+ &ptr, &len);
+ if (ret)
+ return ret;
+
+ uiov = compat_ptr(ptr);
+ if (req->flags & REQ_F_BUFFER_SELECT) {
+ compat_ssize_t clen;
+
+ if (len > 1)
+ return -EINVAL;
+ if (!access_ok(uiov, sizeof(*uiov)))
+ return -EFAULT;
+ if (__get_user(clen, &uiov->iov_len))
+ return -EFAULT;
+ if (clen < 0)
+ return -EINVAL;
+ sr->len = io->msg.iov[0].iov_len;
+ io->msg.iov = NULL;
+ } else {
+ ret = compat_import_iovec(READ, uiov, len, UIO_FASTIOV,
+ &io->msg.iov,
+ &io->msg.msg.msg_iter);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
#endif
+
+static int io_recvmsg_copy_hdr(struct io_kiocb *req, struct io_async_ctx *io)
+{
+ io->msg.iov = io->msg.fast_iov;
+
+#ifdef CONFIG_COMPAT
+ if (req->ctx->compat)
+ return __io_compat_recvmsg_copy_hdr(req, io);
+#endif
+
+ return __io_recvmsg_copy_hdr(req, io);
+}
+
+static struct io_buffer *io_recv_buffer_select(struct io_kiocb *req,
+ int *cflags, bool needs_lock)
+{
+ struct io_sr_msg *sr = &req->sr_msg;
+ struct io_buffer *kbuf;
+
+ if (!(req->flags & REQ_F_BUFFER_SELECT))
+ return NULL;
+
+ kbuf = io_buffer_select(req, &sr->len, sr->bgid, sr->kbuf, needs_lock);
+ if (IS_ERR(kbuf))
+ return kbuf;
+
+ sr->kbuf = kbuf;
+ req->flags |= REQ_F_BUFFER_SELECTED;
+
+ *cflags = kbuf->bid << IORING_CQE_BUFFER_SHIFT;
+ *cflags |= IORING_CQE_F_BUFFER;
+ return kbuf;
}
static int io_recvmsg_prep(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{
-#if defined(CONFIG_NET)
struct io_sr_msg *sr = &req->sr_msg;
struct io_async_ctx *io = req->io;
int ret;
@@ -3159,6 +3768,7 @@ static int io_recvmsg_prep(struct io_kiocb *req,
sr->msg_flags = READ_ONCE(sqe->msg_flags);
sr->msg = u64_to_user_ptr(READ_ONCE(sqe->addr));
sr->len = READ_ONCE(sqe->len);
+ sr->bgid = READ_ONCE(sqe->buf_group);
#ifdef CONFIG_COMPAT
if (req->ctx->compat)
@@ -3171,30 +3781,24 @@ static int io_recvmsg_prep(struct io_kiocb *req,
if (req->flags & REQ_F_NEED_CLEANUP)
return 0;
- io->msg.iov = io->msg.fast_iov;
- ret = recvmsg_copy_msghdr(&io->msg.msg, sr->msg, sr->msg_flags,
- &io->msg.uaddr, &io->msg.iov);
+ ret = io_recvmsg_copy_hdr(req, io);
if (!ret)
req->flags |= REQ_F_NEED_CLEANUP;
return ret;
-#else
- return -EOPNOTSUPP;
-#endif
}
-static int io_recvmsg(struct io_kiocb *req, struct io_kiocb **nxt,
- bool force_nonblock)
+static int io_recvmsg(struct io_kiocb *req, bool force_nonblock)
{
-#if defined(CONFIG_NET)
struct io_async_msghdr *kmsg = NULL;
struct socket *sock;
- int ret;
+ int ret, cflags = 0;
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
sock = sock_from_file(req->file, &ret);
if (sock) {
+ struct io_buffer *kbuf;
struct io_async_ctx io;
unsigned flags;
@@ -3206,19 +3810,23 @@ static int io_recvmsg(struct io_kiocb *req, struct io_kiocb **nxt,
kmsg->iov = kmsg->fast_iov;
kmsg->msg.msg_iter.iov = kmsg->iov;
} else {
- struct io_sr_msg *sr = &req->sr_msg;
-
kmsg = &io.msg;
kmsg->msg.msg_name = &io.msg.addr;
- io.msg.iov = io.msg.fast_iov;
- ret = recvmsg_copy_msghdr(&io.msg.msg, sr->msg,
- sr->msg_flags, &io.msg.uaddr,
- &io.msg.iov);
+ ret = io_recvmsg_copy_hdr(req, &io);
if (ret)
return ret;
}
+ kbuf = io_recv_buffer_select(req, &cflags, !force_nonblock);
+ if (IS_ERR(kbuf)) {
+ return PTR_ERR(kbuf);
+ } else if (kbuf) {
+ kmsg->fast_iov[0].iov_base = u64_to_user_ptr(kbuf->addr);
+ iov_iter_init(&kmsg->msg.msg_iter, READ, kmsg->iov,
+ 1, req->sr_msg.len);
+ }
+
flags = req->sr_msg.msg_flags;
if (flags & MSG_DONTWAIT)
req->flags |= REQ_F_NOWAIT;
@@ -3227,18 +3835,8 @@ static int io_recvmsg(struct io_kiocb *req, struct io_kiocb **nxt,
ret = __sys_recvmsg_sock(sock, &kmsg->msg, req->sr_msg.msg,
kmsg->uaddr, flags);
- if (force_nonblock && ret == -EAGAIN) {
- if (req->io)
- return -EAGAIN;
- if (io_alloc_async_ctx(req)) {
- if (kmsg->iov != kmsg->fast_iov)
- kfree(kmsg->iov);
- return -ENOMEM;
- }
- memcpy(&req->io->msg, &io.msg, sizeof(io.msg));
- req->flags |= REQ_F_NEED_CLEANUP;
- return -EAGAIN;
- }
+ if (force_nonblock && ret == -EAGAIN)
+ return io_setup_async_msg(req, kmsg);
if (ret == -ERESTARTSYS)
ret = -EINTR;
}
@@ -3246,22 +3844,18 @@ static int io_recvmsg(struct io_kiocb *req, struct io_kiocb **nxt,
if (kmsg && kmsg->iov != kmsg->fast_iov)
kfree(kmsg->iov);
req->flags &= ~REQ_F_NEED_CLEANUP;
- io_cqring_add_event(req, ret);
+ __io_cqring_add_event(req, ret, cflags);
if (ret < 0)
req_set_fail_links(req);
- io_put_req_find_next(req, nxt);
+ io_put_req(req);
return 0;
-#else
- return -EOPNOTSUPP;
-#endif
}
-static int io_recv(struct io_kiocb *req, struct io_kiocb **nxt,
- bool force_nonblock)
+static int io_recv(struct io_kiocb *req, bool force_nonblock)
{
-#if defined(CONFIG_NET)
+ struct io_buffer *kbuf = NULL;
struct socket *sock;
- int ret;
+ int ret, cflags = 0;
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
@@ -3269,15 +3863,25 @@ static int io_recv(struct io_kiocb *req, struct io_kiocb **nxt,
sock = sock_from_file(req->file, &ret);
if (sock) {
struct io_sr_msg *sr = &req->sr_msg;
+ void __user *buf = sr->buf;
struct msghdr msg;
struct iovec iov;
unsigned flags;
- ret = import_single_range(READ, sr->buf, sr->len, &iov,
+ kbuf = io_recv_buffer_select(req, &cflags, !force_nonblock);
+ if (IS_ERR(kbuf))
+ return PTR_ERR(kbuf);
+ else if (kbuf)
+ buf = u64_to_user_ptr(kbuf->addr);
+
+ ret = import_single_range(READ, buf, sr->len, &iov,
&msg.msg_iter);
- if (ret)
+ if (ret) {
+ kfree(kbuf);
return ret;
+ }
+ req->flags |= REQ_F_NEED_CLEANUP;
msg.msg_name = NULL;
msg.msg_control = NULL;
msg.msg_controllen = 0;
@@ -3298,20 +3902,17 @@ static int io_recv(struct io_kiocb *req, struct io_kiocb **nxt,
ret = -EINTR;
}
- io_cqring_add_event(req, ret);
+ kfree(kbuf);
+ req->flags &= ~REQ_F_NEED_CLEANUP;
+ __io_cqring_add_event(req, ret, cflags);
if (ret < 0)
req_set_fail_links(req);
- io_put_req_find_next(req, nxt);
+ io_put_req(req);
return 0;
-#else
- return -EOPNOTSUPP;
-#endif
}
-
static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
-#if defined(CONFIG_NET)
struct io_accept *accept = &req->accept;
if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL)))
@@ -3322,15 +3923,11 @@ static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2));
accept->flags = READ_ONCE(sqe->accept_flags);
+ accept->nofile = rlimit(RLIMIT_NOFILE);
return 0;
-#else
- return -EOPNOTSUPP;
-#endif
}
-#if defined(CONFIG_NET)
-static int __io_accept(struct io_kiocb *req, struct io_kiocb **nxt,
- bool force_nonblock)
+static int __io_accept(struct io_kiocb *req, bool force_nonblock)
{
struct io_accept *accept = &req->accept;
unsigned file_flags;
@@ -3338,7 +3935,8 @@ static int __io_accept(struct io_kiocb *req, struct io_kiocb **nxt,
file_flags = force_nonblock ? O_NONBLOCK : 0;
ret = __sys_accept4_file(req->file, file_flags, accept->addr,
- accept->addr_len, accept->flags);
+ accept->addr_len, accept->flags,
+ accept->nofile);
if (ret == -EAGAIN && force_nonblock)
return -EAGAIN;
if (ret == -ERESTARTSYS)
@@ -3346,44 +3944,34 @@ static int __io_accept(struct io_kiocb *req, struct io_kiocb **nxt,
if (ret < 0)
req_set_fail_links(req);
io_cqring_add_event(req, ret);
- io_put_req_find_next(req, nxt);
+ io_put_req(req);
return 0;
}
static void io_accept_finish(struct io_wq_work **workptr)
{
struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
- struct io_kiocb *nxt = NULL;
if (io_req_cancelled(req))
return;
- __io_accept(req, &nxt, false);
- if (nxt)
- io_wq_assign_next(workptr, nxt);
+ __io_accept(req, false);
+ io_steal_work(req, workptr);
}
-#endif
-static int io_accept(struct io_kiocb *req, struct io_kiocb **nxt,
- bool force_nonblock)
+static int io_accept(struct io_kiocb *req, bool force_nonblock)
{
-#if defined(CONFIG_NET)
int ret;
- ret = __io_accept(req, nxt, force_nonblock);
+ ret = __io_accept(req, force_nonblock);
if (ret == -EAGAIN && force_nonblock) {
req->work.func = io_accept_finish;
- io_put_req(req);
return -EAGAIN;
}
return 0;
-#else
- return -EOPNOTSUPP;
-#endif
}
static int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
-#if defined(CONFIG_NET)
struct io_connect *conn = &req->connect;
struct io_async_ctx *io = req->io;
@@ -3400,15 +3988,10 @@ static int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return move_addr_to_kernel(conn->addr, conn->addr_len,
&io->connect.address);
-#else
- return -EOPNOTSUPP;
-#endif
}
-static int io_connect(struct io_kiocb *req, struct io_kiocb **nxt,
- bool force_nonblock)
+static int io_connect(struct io_kiocb *req, bool force_nonblock)
{
-#if defined(CONFIG_NET)
struct io_async_ctx __io, *io;
unsigned file_flags;
int ret;
@@ -3444,25 +4027,301 @@ out:
if (ret < 0)
req_set_fail_links(req);
io_cqring_add_event(req, ret);
- io_put_req_find_next(req, nxt);
+ io_put_req(req);
return 0;
-#else
+}
+#else /* !CONFIG_NET */
+static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
return -EOPNOTSUPP;
-#endif
}
-static void io_poll_remove_one(struct io_kiocb *req)
+static int io_sendmsg(struct io_kiocb *req, bool force_nonblock)
{
- struct io_poll_iocb *poll = &req->poll;
+ return -EOPNOTSUPP;
+}
+
+static int io_send(struct io_kiocb *req, bool force_nonblock)
+{
+ return -EOPNOTSUPP;
+}
+
+static int io_recvmsg_prep(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
+{
+ return -EOPNOTSUPP;
+}
+
+static int io_recvmsg(struct io_kiocb *req, bool force_nonblock)
+{
+ return -EOPNOTSUPP;
+}
+
+static int io_recv(struct io_kiocb *req, bool force_nonblock)
+{
+ return -EOPNOTSUPP;
+}
+
+static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+ return -EOPNOTSUPP;
+}
+
+static int io_accept(struct io_kiocb *req, bool force_nonblock)
+{
+ return -EOPNOTSUPP;
+}
+
+static int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+ return -EOPNOTSUPP;
+}
+
+static int io_connect(struct io_kiocb *req, bool force_nonblock)
+{
+ return -EOPNOTSUPP;
+}
+#endif /* CONFIG_NET */
+
+struct io_poll_table {
+ struct poll_table_struct pt;
+ struct io_kiocb *req;
+ int error;
+};
+
+static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt,
+ struct wait_queue_head *head)
+{
+ if (unlikely(poll->head)) {
+ pt->error = -EINVAL;
+ return;
+ }
+
+ pt->error = 0;
+ poll->head = head;
+ add_wait_queue(head, &poll->wait);
+}
+
+static void io_async_queue_proc(struct file *file, struct wait_queue_head *head,
+ struct poll_table_struct *p)
+{
+ struct io_poll_table *pt = container_of(p, struct io_poll_table, pt);
+
+ __io_queue_proc(&pt->req->apoll->poll, pt, head);
+}
+
+static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll,
+ __poll_t mask, task_work_func_t func)
+{
+ struct task_struct *tsk;
+
+ /* for instances that support it check for an event match first: */
+ if (mask && !(mask & poll->events))
+ return 0;
+
+ trace_io_uring_task_add(req->ctx, req->opcode, req->user_data, mask);
+
+ list_del_init(&poll->wait.entry);
+
+ tsk = req->task;
+ req->result = mask;
+ init_task_work(&req->task_work, func);
+ /*
+ * If this fails, then the task is exiting. If that is the case, then
+ * the exit check will ultimately cancel these work items. Hence we
+ * don't need to check here and handle it specifically.
+ */
+ task_work_add(tsk, &req->task_work, true);
+ wake_up_process(tsk);
+ return 1;
+}
+
+static void io_async_task_func(struct callback_head *cb)
+{
+ struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work);
+ struct async_poll *apoll = req->apoll;
+ struct io_ring_ctx *ctx = req->ctx;
+
+ trace_io_uring_task_run(req->ctx, req->opcode, req->user_data);
+
+ WARN_ON_ONCE(!list_empty(&req->apoll->poll.wait.entry));
+
+ if (hash_hashed(&req->hash_node)) {
+ spin_lock_irq(&ctx->completion_lock);
+ hash_del(&req->hash_node);
+ spin_unlock_irq(&ctx->completion_lock);
+ }
+
+ /* restore ->work in case we need to retry again */
+ memcpy(&req->work, &apoll->work, sizeof(req->work));
+
+ __set_current_state(TASK_RUNNING);
+ mutex_lock(&ctx->uring_lock);
+ __io_queue_sqe(req, NULL);
+ mutex_unlock(&ctx->uring_lock);
+
+ kfree(apoll);
+}
+
+static int io_async_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
+ void *key)
+{
+ struct io_kiocb *req = wait->private;
+ struct io_poll_iocb *poll = &req->apoll->poll;
+
+ trace_io_uring_poll_wake(req->ctx, req->opcode, req->user_data,
+ key_to_poll(key));
+
+ return __io_async_wake(req, poll, key_to_poll(key), io_async_task_func);
+}
+
+static void io_poll_req_insert(struct io_kiocb *req)
+{
+ struct io_ring_ctx *ctx = req->ctx;
+ struct hlist_head *list;
+
+ list = &ctx->cancel_hash[hash_long(req->user_data, ctx->cancel_hash_bits)];
+ hlist_add_head(&req->hash_node, list);
+}
+
+static __poll_t __io_arm_poll_handler(struct io_kiocb *req,
+ struct io_poll_iocb *poll,
+ struct io_poll_table *ipt, __poll_t mask,
+ wait_queue_func_t wake_func)
+ __acquires(&ctx->completion_lock)
+{
+ struct io_ring_ctx *ctx = req->ctx;
+ bool cancel = false;
+
+ poll->file = req->file;
+ poll->head = NULL;
+ poll->done = poll->canceled = false;
+ poll->events = mask;
+
+ ipt->pt._key = mask;
+ ipt->req = req;
+ ipt->error = -EINVAL;
+
+ INIT_LIST_HEAD(&poll->wait.entry);
+ init_waitqueue_func_entry(&poll->wait, wake_func);
+ poll->wait.private = req;
+
+ mask = vfs_poll(req->file, &ipt->pt) & poll->events;
+
+ spin_lock_irq(&ctx->completion_lock);
+ if (likely(poll->head)) {
+ spin_lock(&poll->head->lock);
+ if (unlikely(list_empty(&poll->wait.entry))) {
+ if (ipt->error)
+ cancel = true;
+ ipt->error = 0;
+ mask = 0;
+ }
+ if (mask || ipt->error)
+ list_del_init(&poll->wait.entry);
+ else if (cancel)
+ WRITE_ONCE(poll->canceled, true);
+ else if (!poll->done) /* actually waiting for an event */
+ io_poll_req_insert(req);
+ spin_unlock(&poll->head->lock);
+ }
+
+ return mask;
+}
+
+static bool io_arm_poll_handler(struct io_kiocb *req)
+{
+ const struct io_op_def *def = &io_op_defs[req->opcode];
+ struct io_ring_ctx *ctx = req->ctx;
+ struct async_poll *apoll;
+ struct io_poll_table ipt;
+ __poll_t mask, ret;
+
+ if (!req->file || !file_can_poll(req->file))
+ return false;
+ if (req->flags & (REQ_F_MUST_PUNT | REQ_F_POLLED))
+ return false;
+ if (!def->pollin && !def->pollout)
+ return false;
+
+ apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC);
+ if (unlikely(!apoll))
+ return false;
+
+ req->flags |= REQ_F_POLLED;
+ memcpy(&apoll->work, &req->work, sizeof(req->work));
+
+ /*
+ * Don't need a reference here, as we're adding it to the task
+ * task_works list. If the task exits, the list is pruned.
+ */
+ req->task = current;
+ req->apoll = apoll;
+ INIT_HLIST_NODE(&req->hash_node);
+
+ mask = 0;
+ if (def->pollin)
+ mask |= POLLIN | POLLRDNORM;
+ if (def->pollout)
+ mask |= POLLOUT | POLLWRNORM;
+ mask |= POLLERR | POLLPRI;
+
+ ipt.pt._qproc = io_async_queue_proc;
+
+ ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask,
+ io_async_wake);
+ if (ret) {
+ ipt.error = 0;
+ apoll->poll.done = true;
+ spin_unlock_irq(&ctx->completion_lock);
+ memcpy(&req->work, &apoll->work, sizeof(req->work));
+ kfree(apoll);
+ return false;
+ }
+ spin_unlock_irq(&ctx->completion_lock);
+ trace_io_uring_poll_arm(ctx, req->opcode, req->user_data, mask,
+ apoll->poll.events);
+ return true;
+}
+
+static bool __io_poll_remove_one(struct io_kiocb *req,
+ struct io_poll_iocb *poll)
+{
+ bool do_complete = false;
spin_lock(&poll->head->lock);
WRITE_ONCE(poll->canceled, true);
if (!list_empty(&poll->wait.entry)) {
list_del_init(&poll->wait.entry);
- io_queue_async_work(req);
+ do_complete = true;
}
spin_unlock(&poll->head->lock);
+ return do_complete;
+}
+
+static bool io_poll_remove_one(struct io_kiocb *req)
+{
+ bool do_complete;
+
+ if (req->opcode == IORING_OP_POLL_ADD) {
+ do_complete = __io_poll_remove_one(req, &req->poll);
+ } else {
+ /* non-poll requests have submit ref still */
+ do_complete = __io_poll_remove_one(req, &req->apoll->poll);
+ if (do_complete)
+ io_put_req(req);
+ }
+
hash_del(&req->hash_node);
+
+ if (do_complete) {
+ io_cqring_fill_event(req, -ECANCELED);
+ io_commit_cqring(req->ctx);
+ req->flags |= REQ_F_COMP_LOCKED;
+ io_put_req(req);
+ }
+
+ return do_complete;
}
static void io_poll_remove_all(struct io_ring_ctx *ctx)
@@ -3480,6 +4339,8 @@ static void io_poll_remove_all(struct io_ring_ctx *ctx)
io_poll_remove_one(req);
}
spin_unlock_irq(&ctx->completion_lock);
+
+ io_cqring_ev_posted(ctx);
}
static int io_poll_cancel(struct io_ring_ctx *ctx, __u64 sqe_addr)
@@ -3489,10 +4350,11 @@ static int io_poll_cancel(struct io_ring_ctx *ctx, __u64 sqe_addr)
list = &ctx->cancel_hash[hash_long(sqe_addr, ctx->cancel_hash_bits)];
hlist_for_each_entry(req, list, hash_node) {
- if (sqe_addr == req->user_data) {
- io_poll_remove_one(req);
+ if (sqe_addr != req->user_data)
+ continue;
+ if (io_poll_remove_one(req))
return 0;
- }
+ return -EALREADY;
}
return -ENOENT;
@@ -3538,186 +4400,54 @@ static void io_poll_complete(struct io_kiocb *req, __poll_t mask, int error)
struct io_ring_ctx *ctx = req->ctx;
req->poll.done = true;
- if (error)
- io_cqring_fill_event(req, error);
- else
- io_cqring_fill_event(req, mangle_poll(mask));
+ io_cqring_fill_event(req, error ? error : mangle_poll(mask));
io_commit_cqring(ctx);
}
-static void io_poll_complete_work(struct io_wq_work **workptr)
+static void io_poll_task_handler(struct io_kiocb *req, struct io_kiocb **nxt)
{
- struct io_wq_work *work = *workptr;
- struct io_kiocb *req = container_of(work, struct io_kiocb, work);
- struct io_poll_iocb *poll = &req->poll;
- struct poll_table_struct pt = { ._key = poll->events };
struct io_ring_ctx *ctx = req->ctx;
- struct io_kiocb *nxt = NULL;
- __poll_t mask = 0;
- int ret = 0;
-
- if (work->flags & IO_WQ_WORK_CANCEL) {
- WRITE_ONCE(poll->canceled, true);
- ret = -ECANCELED;
- } else if (READ_ONCE(poll->canceled)) {
- ret = -ECANCELED;
- }
- if (ret != -ECANCELED)
- mask = vfs_poll(poll->file, &pt) & poll->events;
-
- /*
- * Note that ->ki_cancel callers also delete iocb from active_reqs after
- * calling ->ki_cancel. We need the ctx_lock roundtrip here to
- * synchronize with them. In the cancellation case the list_del_init
- * itself is not actually needed, but harmless so we keep it in to
- * avoid further branches in the fast path.
- */
spin_lock_irq(&ctx->completion_lock);
- if (!mask && ret != -ECANCELED) {
- add_wait_queue(poll->head, &poll->wait);
- spin_unlock_irq(&ctx->completion_lock);
- return;
- }
hash_del(&req->hash_node);
- io_poll_complete(req, mask, ret);
+ io_poll_complete(req, req->result, 0);
+ req->flags |= REQ_F_COMP_LOCKED;
+ io_put_req_find_next(req, nxt);
spin_unlock_irq(&ctx->completion_lock);
io_cqring_ev_posted(ctx);
-
- if (ret < 0)
- req_set_fail_links(req);
- io_put_req_find_next(req, &nxt);
- if (nxt)
- io_wq_assign_next(workptr, nxt);
}
-static void __io_poll_flush(struct io_ring_ctx *ctx, struct llist_node *nodes)
+static void io_poll_task_func(struct callback_head *cb)
{
- struct io_kiocb *req, *tmp;
- struct req_batch rb;
+ struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work);
+ struct io_kiocb *nxt = NULL;
- rb.to_free = rb.need_iter = 0;
- spin_lock_irq(&ctx->completion_lock);
- llist_for_each_entry_safe(req, tmp, nodes, llist_node) {
- hash_del(&req->hash_node);
- io_poll_complete(req, req->result, 0);
+ io_poll_task_handler(req, &nxt);
+ if (nxt) {
+ struct io_ring_ctx *ctx = nxt->ctx;
- if (refcount_dec_and_test(&req->refs) &&
- !io_req_multi_free(&rb, req)) {
- req->flags |= REQ_F_COMP_LOCKED;
- io_free_req(req);
- }
+ mutex_lock(&ctx->uring_lock);
+ __io_queue_sqe(nxt, NULL);
+ mutex_unlock(&ctx->uring_lock);
}
- spin_unlock_irq(&ctx->completion_lock);
-
- io_cqring_ev_posted(ctx);
- io_free_req_many(ctx, &rb);
-}
-
-static void io_poll_flush(struct io_wq_work **workptr)
-{
- struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
- struct llist_node *nodes;
-
- nodes = llist_del_all(&req->ctx->poll_llist);
- if (nodes)
- __io_poll_flush(req->ctx, nodes);
-}
-
-static void io_poll_trigger_evfd(struct io_wq_work **workptr)
-{
- struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
-
- eventfd_signal(req->ctx->cq_ev_fd, 1);
- io_put_req(req);
}
static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
void *key)
{
- struct io_poll_iocb *poll = wait->private;
- struct io_kiocb *req = container_of(poll, struct io_kiocb, poll);
- struct io_ring_ctx *ctx = req->ctx;
- __poll_t mask = key_to_poll(key);
-
- /* for instances that support it check for an event match first: */
- if (mask && !(mask & poll->events))
- return 0;
-
- list_del_init(&poll->wait.entry);
-
- /*
- * Run completion inline if we can. We're using trylock here because
- * we are violating the completion_lock -> poll wq lock ordering.
- * If we have a link timeout we're going to need the completion_lock
- * for finalizing the request, mark us as having grabbed that already.
- */
- if (mask) {
- unsigned long flags;
-
- if (llist_empty(&ctx->poll_llist) &&
- spin_trylock_irqsave(&ctx->completion_lock, flags)) {
- bool trigger_ev;
-
- hash_del(&req->hash_node);
- io_poll_complete(req, mask, 0);
-
- trigger_ev = io_should_trigger_evfd(ctx);
- if (trigger_ev && eventfd_signal_count()) {
- trigger_ev = false;
- req->work.func = io_poll_trigger_evfd;
- } else {
- req->flags |= REQ_F_COMP_LOCKED;
- io_put_req(req);
- req = NULL;
- }
- spin_unlock_irqrestore(&ctx->completion_lock, flags);
- __io_cqring_ev_posted(ctx, trigger_ev);
- } else {
- req->result = mask;
- req->llist_node.next = NULL;
- /* if the list wasn't empty, we're done */
- if (!llist_add(&req->llist_node, &ctx->poll_llist))
- req = NULL;
- else
- req->work.func = io_poll_flush;
- }
- }
- if (req)
- io_queue_async_work(req);
+ struct io_kiocb *req = wait->private;
+ struct io_poll_iocb *poll = &req->poll;
- return 1;
+ return __io_async_wake(req, poll, key_to_poll(key), io_poll_task_func);
}
-struct io_poll_table {
- struct poll_table_struct pt;
- struct io_kiocb *req;
- int error;
-};
-
static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head,
struct poll_table_struct *p)
{
struct io_poll_table *pt = container_of(p, struct io_poll_table, pt);
- if (unlikely(pt->req->poll.head)) {
- pt->error = -EINVAL;
- return;
- }
-
- pt->error = 0;
- pt->req->poll.head = head;
- add_wait_queue(head, &pt->req->poll.wait);
-}
-
-static void io_poll_req_insert(struct io_kiocb *req)
-{
- struct io_ring_ctx *ctx = req->ctx;
- struct hlist_head *list;
-
- list = &ctx->cancel_hash[hash_long(req->user_data, ctx->cancel_hash_bits)];
- hlist_add_head(&req->hash_node, list);
+ __io_queue_proc(&pt->req->poll, pt, head);
}
static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
@@ -3734,55 +4464,29 @@ static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
events = READ_ONCE(sqe->poll_events);
poll->events = demangle_poll(events) | EPOLLERR | EPOLLHUP;
+
+ /*
+ * Don't need a reference here, as we're adding it to the task
+ * task_works list. If the task exits, the list is pruned.
+ */
+ req->task = current;
return 0;
}
-static int io_poll_add(struct io_kiocb *req, struct io_kiocb **nxt)
+static int io_poll_add(struct io_kiocb *req)
{
struct io_poll_iocb *poll = &req->poll;
struct io_ring_ctx *ctx = req->ctx;
struct io_poll_table ipt;
- bool cancel = false;
__poll_t mask;
- INIT_IO_WORK(&req->work, io_poll_complete_work);
INIT_HLIST_NODE(&req->hash_node);
-
- poll->head = NULL;
- poll->done = false;
- poll->canceled = false;
-
- ipt.pt._qproc = io_poll_queue_proc;
- ipt.pt._key = poll->events;
- ipt.req = req;
- ipt.error = -EINVAL; /* same as no support for IOCB_CMD_POLL */
-
- /* initialized the list so that we can do list_empty checks */
- INIT_LIST_HEAD(&poll->wait.entry);
- init_waitqueue_func_entry(&poll->wait, io_poll_wake);
- poll->wait.private = poll;
-
INIT_LIST_HEAD(&req->list);
+ ipt.pt._qproc = io_poll_queue_proc;
- mask = vfs_poll(poll->file, &ipt.pt) & poll->events;
+ mask = __io_arm_poll_handler(req, &req->poll, &ipt, poll->events,
+ io_poll_wake);
- spin_lock_irq(&ctx->completion_lock);
- if (likely(poll->head)) {
- spin_lock(&poll->head->lock);
- if (unlikely(list_empty(&poll->wait.entry))) {
- if (ipt.error)
- cancel = true;
- ipt.error = 0;
- mask = 0;
- }
- if (mask || ipt.error)
- list_del_init(&poll->wait.entry);
- else if (cancel)
- WRITE_ONCE(poll->canceled, true);
- else if (!poll->done) /* actually waiting for an event */
- io_poll_req_insert(req);
- spin_unlock(&poll->head->lock);
- }
if (mask) { /* no async, we'd stolen it */
ipt.error = 0;
io_poll_complete(req, mask, 0);
@@ -3791,7 +4495,7 @@ static int io_poll_add(struct io_kiocb *req, struct io_kiocb **nxt)
if (mask) {
io_cqring_ev_posted(ctx);
- io_put_req_find_next(req, nxt);
+ io_put_req(req);
}
return ipt.error;
}
@@ -4040,7 +4744,7 @@ static int io_async_cancel_one(struct io_ring_ctx *ctx, void *sqe_addr)
static void io_async_find_and_cancel(struct io_ring_ctx *ctx,
struct io_kiocb *req, __u64 sqe_addr,
- struct io_kiocb **nxt, int success_ret)
+ int success_ret)
{
unsigned long flags;
int ret;
@@ -4066,7 +4770,7 @@ done:
if (ret < 0)
req_set_fail_links(req);
- io_put_req_find_next(req, nxt);
+ io_put_req(req);
}
static int io_async_cancel_prep(struct io_kiocb *req,
@@ -4082,11 +4786,11 @@ static int io_async_cancel_prep(struct io_kiocb *req,
return 0;
}
-static int io_async_cancel(struct io_kiocb *req, struct io_kiocb **nxt)
+static int io_async_cancel(struct io_kiocb *req)
{
struct io_ring_ctx *ctx = req->ctx;
- io_async_find_and_cancel(ctx, req, req->cancel.addr, nxt, 0);
+ io_async_find_and_cancel(ctx, req, req->cancel.addr, 0);
return 0;
}
@@ -4132,6 +4836,9 @@ static int io_req_defer_prep(struct io_kiocb *req,
{
ssize_t ret = 0;
+ if (!sqe)
+ return 0;
+
if (io_op_defs[req->opcode].file_table) {
ret = io_grab_files(req);
if (unlikely(ret))
@@ -4218,6 +4925,15 @@ static int io_req_defer_prep(struct io_kiocb *req,
case IORING_OP_EPOLL_CTL:
ret = io_epoll_ctl_prep(req, sqe);
break;
+ case IORING_OP_SPLICE:
+ ret = io_splice_prep(req, sqe);
+ break;
+ case IORING_OP_PROVIDE_BUFFERS:
+ ret = io_provide_buffers_prep(req, sqe);
+ break;
+ case IORING_OP_REMOVE_BUFFERS:
+ ret = io_remove_buffers_prep(req, sqe);
+ break;
default:
printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
req->opcode);
@@ -4264,29 +4980,43 @@ static void io_cleanup_req(struct io_kiocb *req)
case IORING_OP_READV:
case IORING_OP_READ_FIXED:
case IORING_OP_READ:
+ if (req->flags & REQ_F_BUFFER_SELECTED)
+ kfree((void *)(unsigned long)req->rw.addr);
+ /* fallthrough */
case IORING_OP_WRITEV:
case IORING_OP_WRITE_FIXED:
case IORING_OP_WRITE:
if (io->rw.iov != io->rw.fast_iov)
kfree(io->rw.iov);
break;
- case IORING_OP_SENDMSG:
case IORING_OP_RECVMSG:
+ if (req->flags & REQ_F_BUFFER_SELECTED)
+ kfree(req->sr_msg.kbuf);
+ /* fallthrough */
+ case IORING_OP_SENDMSG:
if (io->msg.iov != io->msg.fast_iov)
kfree(io->msg.iov);
break;
+ case IORING_OP_RECV:
+ if (req->flags & REQ_F_BUFFER_SELECTED)
+ kfree(req->sr_msg.kbuf);
+ break;
case IORING_OP_OPENAT:
case IORING_OP_OPENAT2:
case IORING_OP_STATX:
putname(req->open.filename);
break;
+ case IORING_OP_SPLICE:
+ io_put_file(req, req->splice.file_in,
+ (req->splice.flags & SPLICE_F_FD_IN_FIXED));
+ break;
}
req->flags &= ~REQ_F_NEED_CLEANUP;
}
static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
- struct io_kiocb **nxt, bool force_nonblock)
+ bool force_nonblock)
{
struct io_ring_ctx *ctx = req->ctx;
int ret;
@@ -4303,7 +5033,7 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
if (ret < 0)
break;
}
- ret = io_read(req, nxt, force_nonblock);
+ ret = io_read(req, force_nonblock);
break;
case IORING_OP_WRITEV:
case IORING_OP_WRITE_FIXED:
@@ -4313,7 +5043,7 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
if (ret < 0)
break;
}
- ret = io_write(req, nxt, force_nonblock);
+ ret = io_write(req, force_nonblock);
break;
case IORING_OP_FSYNC:
if (sqe) {
@@ -4321,7 +5051,7 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
if (ret < 0)
break;
}
- ret = io_fsync(req, nxt, force_nonblock);
+ ret = io_fsync(req, force_nonblock);
break;
case IORING_OP_POLL_ADD:
if (sqe) {
@@ -4329,7 +5059,7 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
if (ret)
break;
}
- ret = io_poll_add(req, nxt);
+ ret = io_poll_add(req);
break;
case IORING_OP_POLL_REMOVE:
if (sqe) {
@@ -4345,7 +5075,7 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
if (ret < 0)
break;
}
- ret = io_sync_file_range(req, nxt, force_nonblock);
+ ret = io_sync_file_range(req, force_nonblock);
break;
case IORING_OP_SENDMSG:
case IORING_OP_SEND:
@@ -4355,9 +5085,9 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
break;
}
if (req->opcode == IORING_OP_SENDMSG)
- ret = io_sendmsg(req, nxt, force_nonblock);
+ ret = io_sendmsg(req, force_nonblock);
else
- ret = io_send(req, nxt, force_nonblock);
+ ret = io_send(req, force_nonblock);
break;
case IORING_OP_RECVMSG:
case IORING_OP_RECV:
@@ -4367,9 +5097,9 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
break;
}
if (req->opcode == IORING_OP_RECVMSG)
- ret = io_recvmsg(req, nxt, force_nonblock);
+ ret = io_recvmsg(req, force_nonblock);
else
- ret = io_recv(req, nxt, force_nonblock);
+ ret = io_recv(req, force_nonblock);
break;
case IORING_OP_TIMEOUT:
if (sqe) {
@@ -4393,7 +5123,7 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
if (ret)
break;
}
- ret = io_accept(req, nxt, force_nonblock);
+ ret = io_accept(req, force_nonblock);
break;
case IORING_OP_CONNECT:
if (sqe) {
@@ -4401,7 +5131,7 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
if (ret)
break;
}
- ret = io_connect(req, nxt, force_nonblock);
+ ret = io_connect(req, force_nonblock);
break;
case IORING_OP_ASYNC_CANCEL:
if (sqe) {
@@ -4409,7 +5139,7 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
if (ret)
break;
}
- ret = io_async_cancel(req, nxt);
+ ret = io_async_cancel(req);
break;
case IORING_OP_FALLOCATE:
if (sqe) {
@@ -4417,7 +5147,7 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
if (ret)
break;
}
- ret = io_fallocate(req, nxt, force_nonblock);
+ ret = io_fallocate(req, force_nonblock);
break;
case IORING_OP_OPENAT:
if (sqe) {
@@ -4425,7 +5155,7 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
if (ret)
break;
}
- ret = io_openat(req, nxt, force_nonblock);
+ ret = io_openat(req, force_nonblock);
break;
case IORING_OP_CLOSE:
if (sqe) {
@@ -4433,7 +5163,7 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
if (ret)
break;
}
- ret = io_close(req, nxt, force_nonblock);
+ ret = io_close(req, force_nonblock);
break;
case IORING_OP_FILES_UPDATE:
if (sqe) {
@@ -4449,7 +5179,7 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
if (ret)
break;
}
- ret = io_statx(req, nxt, force_nonblock);
+ ret = io_statx(req, force_nonblock);
break;
case IORING_OP_FADVISE:
if (sqe) {
@@ -4457,7 +5187,7 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
if (ret)
break;
}
- ret = io_fadvise(req, nxt, force_nonblock);
+ ret = io_fadvise(req, force_nonblock);
break;
case IORING_OP_MADVISE:
if (sqe) {
@@ -4465,7 +5195,7 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
if (ret)
break;
}
- ret = io_madvise(req, nxt, force_nonblock);
+ ret = io_madvise(req, force_nonblock);
break;
case IORING_OP_OPENAT2:
if (sqe) {
@@ -4473,7 +5203,7 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
if (ret)
break;
}
- ret = io_openat2(req, nxt, force_nonblock);
+ ret = io_openat2(req, force_nonblock);
break;
case IORING_OP_EPOLL_CTL:
if (sqe) {
@@ -4481,7 +5211,31 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
if (ret)
break;
}
- ret = io_epoll_ctl(req, nxt, force_nonblock);
+ ret = io_epoll_ctl(req, force_nonblock);
+ break;
+ case IORING_OP_SPLICE:
+ if (sqe) {
+ ret = io_splice_prep(req, sqe);
+ if (ret < 0)
+ break;
+ }
+ ret = io_splice(req, force_nonblock);
+ break;
+ case IORING_OP_PROVIDE_BUFFERS:
+ if (sqe) {
+ ret = io_provide_buffers_prep(req, sqe);
+ if (ret)
+ break;
+ }
+ ret = io_provide_buffers(req, force_nonblock);
+ break;
+ case IORING_OP_REMOVE_BUFFERS:
+ if (sqe) {
+ ret = io_remove_buffers_prep(req, sqe);
+ if (ret)
+ break;
+ }
+ ret = io_remove_buffers(req, force_nonblock);
break;
default:
ret = -EINVAL;
@@ -4514,7 +5268,6 @@ static void io_wq_submit_work(struct io_wq_work **workptr)
{
struct io_wq_work *work = *workptr;
struct io_kiocb *req = container_of(work, struct io_kiocb, work);
- struct io_kiocb *nxt = NULL;
int ret = 0;
/* if NO_CANCEL is set, we must still run the work */
@@ -4524,9 +5277,8 @@ static void io_wq_submit_work(struct io_wq_work **workptr)
}
if (!ret) {
- req->in_async = true;
do {
- ret = io_issue_sqe(req, NULL, &nxt, false);
+ ret = io_issue_sqe(req, NULL, false);
/*
* We can get EAGAIN for polled IO even though we're
* forcing a sync submission from here, since we can't
@@ -4538,18 +5290,13 @@ static void io_wq_submit_work(struct io_wq_work **workptr)
} while (1);
}
- /* drop submission reference */
- io_put_req(req);
-
if (ret) {
req_set_fail_links(req);
io_cqring_add_event(req, ret);
io_put_req(req);
}
- /* if a dependent link is ready, pass it back */
- if (!ret && nxt)
- io_wq_assign_next(workptr, nxt);
+ io_steal_work(req, workptr);
}
static int io_req_needs_file(struct io_kiocb *req, int fd)
@@ -4570,41 +5317,52 @@ static inline struct file *io_file_from_index(struct io_ring_ctx *ctx,
return table->files[index & IORING_FILE_TABLE_MASK];;
}
-static int io_req_set_file(struct io_submit_state *state, struct io_kiocb *req,
- const struct io_uring_sqe *sqe)
+static int io_file_get(struct io_submit_state *state, struct io_kiocb *req,
+ int fd, struct file **out_file, bool fixed)
{
struct io_ring_ctx *ctx = req->ctx;
- unsigned flags;
- int fd;
-
- flags = READ_ONCE(sqe->flags);
- fd = READ_ONCE(sqe->fd);
-
- if (!io_req_needs_file(req, fd))
- return 0;
+ struct file *file;
- if (flags & IOSQE_FIXED_FILE) {
+ if (fixed) {
if (unlikely(!ctx->file_data ||
(unsigned) fd >= ctx->nr_user_files))
return -EBADF;
fd = array_index_nospec(fd, ctx->nr_user_files);
- req->file = io_file_from_index(ctx, fd);
- if (!req->file)
+ file = io_file_from_index(ctx, fd);
+ if (!file)
return -EBADF;
- req->flags |= REQ_F_FIXED_FILE;
percpu_ref_get(&ctx->file_data->refs);
} else {
- if (req->needs_fixed_file)
- return -EBADF;
trace_io_uring_file_get(ctx, fd);
- req->file = io_file_get(state, fd);
- if (unlikely(!req->file))
+ file = __io_file_get(state, fd);
+ if (unlikely(!file))
return -EBADF;
}
+ *out_file = file;
return 0;
}
+static int io_req_set_file(struct io_submit_state *state, struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
+{
+ unsigned flags;
+ int fd;
+ bool fixed;
+
+ flags = READ_ONCE(sqe->flags);
+ fd = READ_ONCE(sqe->fd);
+
+ if (!io_req_needs_file(req, fd))
+ return 0;
+
+ fixed = (flags & IOSQE_FIXED_FILE);
+ if (unlikely(!fixed && req->needs_fixed_file))
+ return -EBADF;
+
+ return io_file_get(state, req, fd, &req->file, fixed);
+}
+
static int io_grab_files(struct io_kiocb *req)
{
int ret = -EBADF;
@@ -4664,8 +5422,7 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
if (prev) {
req_set_fail_links(prev);
- io_async_find_and_cancel(ctx, req, prev->user_data, NULL,
- -ETIME);
+ io_async_find_and_cancel(ctx, req, prev->user_data, -ETIME);
io_put_req(prev);
} else {
io_cqring_add_event(req, -ETIME);
@@ -4702,6 +5459,9 @@ static struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req)
if (!(req->flags & REQ_F_LINK))
return NULL;
+ /* for polled retry, if flag is set, we already went through here */
+ if (req->flags & REQ_F_POLLED)
+ return NULL;
nxt = list_first_entry_or_null(&req->link_list, struct io_kiocb,
link_list);
@@ -4715,7 +5475,7 @@ static struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req)
static void __io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_kiocb *linked_timeout;
- struct io_kiocb *nxt = NULL;
+ struct io_kiocb *nxt;
const struct cred *old_creds = NULL;
int ret;
@@ -4731,7 +5491,7 @@ again:
old_creds = override_creds(req->work.creds);
}
- ret = io_issue_sqe(req, sqe, &nxt, true);
+ ret = io_issue_sqe(req, sqe, true);
/*
* We async punt it if the file wasn't marked NOWAIT, or if the file
@@ -4739,6 +5499,11 @@ again:
*/
if (ret == -EAGAIN && (!(req->flags & REQ_F_NOWAIT) ||
(req->flags & REQ_F_MUST_PUNT))) {
+ if (io_arm_poll_handler(req)) {
+ if (linked_timeout)
+ io_queue_linked_timeout(linked_timeout);
+ goto exit;
+ }
punt:
if (io_op_defs[req->opcode].file_table) {
ret = io_grab_files(req);
@@ -4751,10 +5516,11 @@ punt:
* submit reference when the iocb is actually submitted.
*/
io_queue_async_work(req);
- goto done_req;
+ goto exit;
}
err:
+ nxt = NULL;
/* drop submission reference */
io_put_req_find_next(req, &nxt);
@@ -4771,15 +5537,14 @@ err:
req_set_fail_links(req);
io_put_req(req);
}
-done_req:
if (nxt) {
req = nxt;
- nxt = NULL;
if (req->flags & REQ_F_FORCE_ASYNC)
goto punt;
goto again;
}
+exit:
if (old_creds)
revert_creds(old_creds);
}
@@ -4821,7 +5586,8 @@ static inline void io_queue_link_head(struct io_kiocb *req)
}
#define SQE_VALID_FLAGS (IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK| \
- IOSQE_IO_HARDLINK | IOSQE_ASYNC)
+ IOSQE_IO_HARDLINK | IOSQE_ASYNC | \
+ IOSQE_BUFFER_SELECT)
static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
struct io_submit_state *state, struct io_kiocb **link)
@@ -4838,6 +5604,12 @@ static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
goto err_req;
}
+ if ((sqe_flags & IOSQE_BUFFER_SELECT) &&
+ !io_op_defs[req->opcode].buffer_select) {
+ ret = -EOPNOTSUPP;
+ goto err_req;
+ }
+
id = READ_ONCE(sqe->personality);
if (id) {
req->work.creds = idr_find(&ctx->personality_idr, id);
@@ -4849,8 +5621,9 @@ static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
}
/* same numerical values with corresponding REQ_F_*, safe to copy */
- req->flags |= sqe_flags & (IOSQE_IO_DRAIN|IOSQE_IO_HARDLINK|
- IOSQE_ASYNC);
+ req->flags |= sqe_flags & (IOSQE_IO_DRAIN | IOSQE_IO_HARDLINK |
+ IOSQE_ASYNC | IOSQE_FIXED_FILE |
+ IOSQE_BUFFER_SELECT);
ret = io_req_set_file(state, req, sqe);
if (unlikely(ret)) {
@@ -4908,6 +5681,11 @@ err_req:
if (sqe_flags & (IOSQE_IO_LINK|IOSQE_IO_HARDLINK)) {
req->flags |= REQ_F_LINK;
INIT_LIST_HEAD(&req->link_list);
+
+ if (io_alloc_async_ctx(req)) {
+ ret = -EAGAIN;
+ goto err_req;
+ }
ret = io_req_defer_prep(req, sqe);
if (ret)
req->flags |= REQ_F_FAIL_LINK;
@@ -5066,7 +5844,6 @@ fail_req:
*mm = ctx->sqo_mm;
}
- req->in_async = async;
req->needs_fixed_file = async;
trace_io_uring_submit_sqe(ctx, req->opcode, req->user_data,
true, async);
@@ -5150,6 +5927,8 @@ static int io_sq_thread(void *data)
if (!list_empty(&ctx->poll_list) ||
(!time_after(jiffies, timeout) && ret != -EBUSY &&
!percpu_ref_is_dying(&ctx->refs))) {
+ if (current->task_works)
+ task_work_run();
cond_resched();
continue;
}
@@ -5181,6 +5960,10 @@ static int io_sq_thread(void *data)
finish_wait(&ctx->sqo_wait, &wait);
break;
}
+ if (current->task_works) {
+ task_work_run();
+ continue;
+ }
if (signal_pending(current))
flush_signals(current);
schedule();
@@ -5200,6 +5983,9 @@ static int io_sq_thread(void *data)
timeout = jiffies + ctx->sq_thread_idle;
}
+ if (current->task_works)
+ task_work_run();
+
set_fs(old_fs);
if (cur_mm) {
unuse_mm(cur_mm);
@@ -5264,8 +6050,13 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
struct io_rings *rings = ctx->rings;
int ret = 0;
- if (io_cqring_events(ctx, false) >= min_events)
- return 0;
+ do {
+ if (io_cqring_events(ctx, false) >= min_events)
+ return 0;
+ if (!current->task_works)
+ break;
+ task_work_run();
+ } while (1);
if (sig) {
#ifdef CONFIG_COMPAT
@@ -5285,6 +6076,8 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
do {
prepare_to_wait_exclusive(&ctx->wait, &iowq.wq,
TASK_INTERRUPTIBLE);
+ if (current->task_works)
+ task_work_run();
if (io_should_wake(&iowq, false))
break;
schedule();
@@ -5331,24 +6124,21 @@ static void io_file_ref_kill(struct percpu_ref *ref)
complete(&data->done);
}
-static void __io_file_ref_exit_and_free(struct rcu_head *rcu)
+static void io_file_ref_exit_and_free(struct work_struct *work)
{
- struct fixed_file_data *data = container_of(rcu, struct fixed_file_data,
- rcu);
- percpu_ref_exit(&data->refs);
- kfree(data);
-}
+ struct fixed_file_data *data;
+
+ data = container_of(work, struct fixed_file_data, ref_work);
-static void io_file_ref_exit_and_free(struct rcu_head *rcu)
-{
/*
- * We need to order our exit+free call against the potentially
- * existing call_rcu() for switching to atomic. One way to do that
- * is to have this rcu callback queue the final put and free, as we
- * could otherwise have a pre-existing atomic switch complete _after_
- * the free callback we queued.
+ * Ensure any percpu-ref atomic switch callback has run, it could have
+ * been in progress when the files were being unregistered. Once
+ * that's done, we can safely exit and free the ref and containing
+ * data structure.
*/
- call_rcu(rcu, __io_file_ref_exit_and_free);
+ rcu_barrier();
+ percpu_ref_exit(&data->refs);
+ kfree(data);
}
static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
@@ -5369,7 +6159,8 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
for (i = 0; i < nr_tables; i++)
kfree(data->table[i].files);
kfree(data->table);
- call_rcu(&data->rcu, io_file_ref_exit_and_free);
+ INIT_WORK(&data->ref_work, io_file_ref_exit_and_free);
+ queue_work(system_wq, &data->ref_work);
ctx->file_data = NULL;
ctx->nr_user_files = 0;
return 0;
@@ -5596,7 +6387,6 @@ static void io_ring_file_put(struct io_ring_ctx *ctx, struct file *file)
struct io_file_put {
struct llist_node llist;
struct file *file;
- struct completion *done;
};
static void io_ring_file_ref_flush(struct fixed_file_data *data)
@@ -5607,10 +6397,7 @@ static void io_ring_file_ref_flush(struct fixed_file_data *data)
while ((node = llist_del_all(&data->put_llist)) != NULL) {
llist_for_each_entry_safe(pfile, tmp, node, llist) {
io_ring_file_put(data->ctx, pfile->file);
- if (pfile->done)
- complete(pfile->done);
- else
- kfree(pfile);
+ kfree(pfile);
}
}
}
@@ -5805,33 +6592,18 @@ static void io_atomic_switch(struct percpu_ref *ref)
percpu_ref_get(&data->refs);
}
-static bool io_queue_file_removal(struct fixed_file_data *data,
+static int io_queue_file_removal(struct fixed_file_data *data,
struct file *file)
{
- struct io_file_put *pfile, pfile_stack;
- DECLARE_COMPLETION_ONSTACK(done);
+ struct io_file_put *pfile;
- /*
- * If we fail allocating the struct we need for doing async reomval
- * of this file, just punt to sync and wait for it.
- */
pfile = kzalloc(sizeof(*pfile), GFP_KERNEL);
- if (!pfile) {
- pfile = &pfile_stack;
- pfile->done = &done;
- }
+ if (!pfile)
+ return -ENOMEM;
pfile->file = file;
llist_add(&pfile->llist, &data->put_llist);
-
- if (pfile == &pfile_stack) {
- percpu_ref_switch_to_atomic(&data->refs, io_atomic_switch);
- wait_for_completion(&done);
- flush_work(&data->ref_work);
- return false;
- }
-
- return true;
+ return 0;
}
static int __io_sqe_files_update(struct io_ring_ctx *ctx,
@@ -5866,9 +6638,11 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
index = i & IORING_FILE_TABLE_MASK;
if (table->files[index]) {
file = io_file_from_index(ctx, index);
+ err = io_queue_file_removal(data, file);
+ if (err)
+ break;
table->files[index] = NULL;
- if (io_queue_file_removal(data, file))
- ref_switch = true;
+ ref_switch = true;
}
if (fd != -1) {
file = fget(fd);
@@ -5921,20 +6695,14 @@ static int io_sqe_files_update(struct io_ring_ctx *ctx, void __user *arg,
return __io_sqe_files_update(ctx, &up, nr_args);
}
-static void io_put_work(struct io_wq_work *work)
+static void io_free_work(struct io_wq_work *work)
{
struct io_kiocb *req = container_of(work, struct io_kiocb, work);
+ /* Consider that io_steal_work() relies on this ref */
io_put_req(req);
}
-static void io_get_work(struct io_wq_work *work)
-{
- struct io_kiocb *req = container_of(work, struct io_kiocb, work);
-
- refcount_inc(&req->refs);
-}
-
static int io_init_wq_offload(struct io_ring_ctx *ctx,
struct io_uring_params *p)
{
@@ -5945,8 +6713,7 @@ static int io_init_wq_offload(struct io_ring_ctx *ctx,
int ret = 0;
data.user = ctx->user;
- data.get_work = io_get_work;
- data.put_work = io_put_work;
+ data.free_work = io_free_work;
if (!(p->flags & IORING_SETUP_ATTACH_WQ)) {
/* Do QD, or 4 * CPUS, whatever is smallest */
@@ -6348,6 +7115,21 @@ static int io_eventfd_unregister(struct io_ring_ctx *ctx)
return -ENXIO;
}
+static int __io_destroy_buffers(int id, void *p, void *data)
+{
+ struct io_ring_ctx *ctx = data;
+ struct io_buffer *buf = p;
+
+ __io_remove_buffers(ctx, buf, id, -1U);
+ return 0;
+}
+
+static void io_destroy_buffers(struct io_ring_ctx *ctx)
+{
+ idr_for_each(&ctx->io_buffer_idr, __io_destroy_buffers, ctx);
+ idr_destroy(&ctx->io_buffer_idr);
+}
+
static void io_ring_ctx_free(struct io_ring_ctx *ctx)
{
io_finish_async(ctx);
@@ -6358,6 +7140,7 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx)
io_sqe_buffer_unregister(ctx);
io_sqe_files_unregister(ctx);
io_eventfd_unregister(ctx);
+ io_destroy_buffers(ctx);
idr_destroy(&ctx->personality_idr);
#if defined(CONFIG_UNIX)
@@ -6612,6 +7395,9 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
int submitted = 0;
struct fd f;
+ if (current->task_works)
+ task_work_run();
+
if (flags & ~(IORING_ENTER_GETEVENTS | IORING_ENTER_SQ_WAKEUP))
return -EINVAL;
@@ -6658,7 +7444,14 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
min_complete = min(min_complete, ctx->cq_entries);
- if (ctx->flags & IORING_SETUP_IOPOLL) {
+ /*
+ * When SETUP_IOPOLL and SETUP_SQPOLL are both enabled, user
+ * space applications don't need to do io completion events
+ * polling again, they can rely on io_sq_thread to do polling
+ * work, which can reduce cpu usage and uring_lock contention.
+ */
+ if (ctx->flags & IORING_SETUP_IOPOLL &&
+ !(ctx->flags & IORING_SETUP_SQPOLL)) {
ret = io_iopoll_check(ctx, &nr_events, min_complete);
} else {
ret = io_cqring_wait(ctx, min_complete, sig, sigsz);
@@ -6734,6 +7527,17 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
seq_printf(m, "Personalities:\n");
idr_for_each(&ctx->personality_idr, io_uring_show_cred, m);
}
+ seq_printf(m, "PollList:\n");
+ spin_lock_irq(&ctx->completion_lock);
+ for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) {
+ struct hlist_head *list = &ctx->cancel_hash[i];
+ struct io_kiocb *req;
+
+ hlist_for_each_entry(req, list, hash_node)
+ seq_printf(m, " op=%d, task_works=%d\n", req->opcode,
+ req->task->task_works != NULL);
+ }
+ spin_unlock_irq(&ctx->completion_lock);
mutex_unlock(&ctx->uring_lock);
}
@@ -6950,7 +7754,7 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p)
p->features = IORING_FEAT_SINGLE_MMAP | IORING_FEAT_NODROP |
IORING_FEAT_SUBMIT_STABLE | IORING_FEAT_RW_CUR_POS |
- IORING_FEAT_CUR_PERSONALITY;
+ IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL;
trace_io_uring_create(ret, ctx, p->sq_entries, p->cq_entries, p->flags);
return ret;
err:
@@ -7228,6 +8032,7 @@ static int __init io_uring_init(void)
BUILD_BUG_SQE_ELEM(8, __u64, off);
BUILD_BUG_SQE_ELEM(8, __u64, addr2);
BUILD_BUG_SQE_ELEM(16, __u64, addr);
+ BUILD_BUG_SQE_ELEM(16, __u64, splice_off_in);
BUILD_BUG_SQE_ELEM(24, __u32, len);
BUILD_BUG_SQE_ELEM(28, __kernel_rwf_t, rw_flags);
BUILD_BUG_SQE_ELEM(28, /* compat */ int, rw_flags);
@@ -7242,11 +8047,14 @@ static int __init io_uring_init(void)
BUILD_BUG_SQE_ELEM(28, __u32, open_flags);
BUILD_BUG_SQE_ELEM(28, __u32, statx_flags);
BUILD_BUG_SQE_ELEM(28, __u32, fadvise_advice);
+ BUILD_BUG_SQE_ELEM(28, __u32, splice_flags);
BUILD_BUG_SQE_ELEM(32, __u64, user_data);
BUILD_BUG_SQE_ELEM(40, __u16, buf_index);
BUILD_BUG_SQE_ELEM(42, __u16, personality);
+ BUILD_BUG_SQE_ELEM(44, __s32, splice_fd_in);
BUILD_BUG_ON(ARRAY_SIZE(io_op_defs) != IORING_OP_LAST);
+ BUILD_BUG_ON(__REQ_F_LAST_BIT >= 8 * sizeof(int));
req_cachep = KMEM_CACHE(io_kiocb, SLAB_HWCACHE_ALIGN | SLAB_PANIC);
return 0;
};
diff --git a/fs/locks.c b/fs/locks.c
index 426b55d333d5..b8a31c1c4fff 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -725,7 +725,6 @@ static void __locks_delete_block(struct file_lock *waiter)
{
locks_delete_global_blocked(waiter);
list_del_init(&waiter->fl_blocked_member);
- waiter->fl_blocker = NULL;
}
static void __locks_wake_up_blocks(struct file_lock *blocker)
@@ -740,6 +739,13 @@ static void __locks_wake_up_blocks(struct file_lock *blocker)
waiter->fl_lmops->lm_notify(waiter);
else
wake_up(&waiter->fl_wait);
+
+ /*
+ * The setting of fl_blocker to NULL marks the "done"
+ * point in deleting a block. Paired with acquire at the top
+ * of locks_delete_block().
+ */
+ smp_store_release(&waiter->fl_blocker, NULL);
}
}
@@ -753,11 +759,42 @@ int locks_delete_block(struct file_lock *waiter)
{
int status = -ENOENT;
+ /*
+ * If fl_blocker is NULL, it won't be set again as this thread "owns"
+ * the lock and is the only one that might try to claim the lock.
+ *
+ * We use acquire/release to manage fl_blocker so that we can
+ * optimize away taking the blocked_lock_lock in many cases.
+ *
+ * The smp_load_acquire guarantees two things:
+ *
+ * 1/ that fl_blocked_requests can be tested locklessly. If something
+ * was recently added to that list it must have been in a locked region
+ * *before* the locked region when fl_blocker was set to NULL.
+ *
+ * 2/ that no other thread is accessing 'waiter', so it is safe to free
+ * it. __locks_wake_up_blocks is careful not to touch waiter after
+ * fl_blocker is released.
+ *
+ * If a lockless check of fl_blocker shows it to be NULL, we know that
+ * no new locks can be inserted into its fl_blocked_requests list, and
+ * can avoid doing anything further if the list is empty.
+ */
+ if (!smp_load_acquire(&waiter->fl_blocker) &&
+ list_empty(&waiter->fl_blocked_requests))
+ return status;
+
spin_lock(&blocked_lock_lock);
if (waiter->fl_blocker)
status = 0;
__locks_wake_up_blocks(waiter);
__locks_delete_block(waiter);
+
+ /*
+ * The setting of fl_blocker to NULL marks the "done" point in deleting
+ * a block. Paired with acquire at the top of this function.
+ */
+ smp_store_release(&waiter->fl_blocker, NULL);
spin_unlock(&blocked_lock_lock);
return status;
}
@@ -1350,7 +1387,8 @@ static int posix_lock_inode_wait(struct inode *inode, struct file_lock *fl)
error = posix_lock_inode(inode, fl, NULL);
if (error != FILE_LOCK_DEFERRED)
break;
- error = wait_event_interruptible(fl->fl_wait, !fl->fl_blocker);
+ error = wait_event_interruptible(fl->fl_wait,
+ list_empty(&fl->fl_blocked_member));
if (error)
break;
}
@@ -1435,7 +1473,8 @@ int locks_mandatory_area(struct inode *inode, struct file *filp, loff_t start,
error = posix_lock_inode(inode, &fl, NULL);
if (error != FILE_LOCK_DEFERRED)
break;
- error = wait_event_interruptible(fl.fl_wait, !fl.fl_blocker);
+ error = wait_event_interruptible(fl.fl_wait,
+ list_empty(&fl.fl_blocked_member));
if (!error) {
/*
* If we've been sleeping someone might have
@@ -1638,7 +1677,8 @@ restart:
locks_dispose_list(&dispose);
error = wait_event_interruptible_timeout(new_fl->fl_wait,
- !new_fl->fl_blocker, break_time);
+ list_empty(&new_fl->fl_blocked_member),
+ break_time);
percpu_down_read(&file_rwsem);
spin_lock(&ctx->flc_lock);
@@ -2122,7 +2162,8 @@ static int flock_lock_inode_wait(struct inode *inode, struct file_lock *fl)
error = flock_lock_inode(inode, fl);
if (error != FILE_LOCK_DEFERRED)
break;
- error = wait_event_interruptible(fl->fl_wait, !fl->fl_blocker);
+ error = wait_event_interruptible(fl->fl_wait,
+ list_empty(&fl->fl_blocked_member));
if (error)
break;
}
@@ -2399,7 +2440,8 @@ static int do_lock_file_wait(struct file *filp, unsigned int cmd,
error = vfs_lock_file(filp, cmd, fl, NULL);
if (error != FILE_LOCK_DEFERRED)
break;
- error = wait_event_interruptible(fl->fl_wait, !fl->fl_blocker);
+ error = wait_event_interruptible(fl->fl_wait,
+ list_empty(&fl->fl_blocked_member));
if (error)
break;
}
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index 40b6c5ac46c0..88e1763e02f3 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -164,7 +164,7 @@ config ROOT_NFS
If you want your system to mount its root file system via NFS,
choose Y here. This is common practice for managing systems
without local permanent storage. For details, read
- <file:Documentation/filesystems/nfs/nfsroot.txt>.
+ <file:Documentation/admin-guide/nfs/nfsroot.rst>.
Most people say N here.
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 989c30c98511..f1ff3076e4a4 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -153,6 +153,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL)
goto error_0;
+ clp->cl_minorversion = cl_init->minorversion;
clp->cl_nfs_mod = cl_init->nfs_mod;
if (!try_module_get(clp->cl_nfs_mod->owner))
goto error_dealloc;
diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
index e1b938457ab9..e113fcb4bb4c 100644
--- a/fs/nfs/fs_context.c
+++ b/fs/nfs/fs_context.c
@@ -832,6 +832,8 @@ static int nfs_parse_source(struct fs_context *fc,
if (len > maxnamlen)
goto out_hostname;
+ kfree(ctx->nfs_server.hostname);
+
/* N.B. caller will free nfs_server.hostname in all cases */
ctx->nfs_server.hostname = kmemdup_nul(dev_name, len, GFP_KERNEL);
if (!ctx->nfs_server.hostname)
@@ -1240,6 +1242,13 @@ static int nfs_fs_context_validate(struct fs_context *fc)
}
ctx->nfs_mod = nfs_mod;
}
+
+ /* Ensure the filesystem context has the correct fs_type */
+ if (fc->fs_type != ctx->nfs_mod->nfs_fs) {
+ module_put(fc->fs_type->owner);
+ __module_get(ctx->nfs_mod->nfs_fs->owner);
+ fc->fs_type = ctx->nfs_mod->nfs_fs;
+ }
return 0;
out_no_device_name:
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
index 52270bfac120..1abf126c2df4 100644
--- a/fs/nfs/fscache.c
+++ b/fs/nfs/fscache.c
@@ -31,6 +31,7 @@ static DEFINE_SPINLOCK(nfs_fscache_keys_lock);
struct nfs_server_key {
struct {
uint16_t nfsversion; /* NFS protocol version */
+ uint32_t minorversion; /* NFSv4 minor version */
uint16_t family; /* address family */
__be16 port; /* IP port */
} hdr;
@@ -55,6 +56,7 @@ void nfs_fscache_get_client_cookie(struct nfs_client *clp)
memset(&key, 0, sizeof(key));
key.hdr.nfsversion = clp->rpc_ops->version;
+ key.hdr.minorversion = clp->cl_minorversion;
key.hdr.family = clp->cl_addr.ss_family;
switch (clp->cl_addr.ss_family) {
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index ad6077404947..f3ece8ed3203 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -153,7 +153,7 @@ struct vfsmount *nfs_d_automount(struct path *path)
/* Open a new filesystem context, transferring parameters from the
* parent superblock, including the network namespace.
*/
- fc = fs_context_for_submount(&nfs_fs_type, path->dentry);
+ fc = fs_context_for_submount(path->mnt->mnt_sb->s_type, path->dentry);
if (IS_ERR(fc))
return ERR_CAST(fc);
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 0cd767e5c977..0bd77cc1f639 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -216,7 +216,6 @@ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init)
INIT_LIST_HEAD(&clp->cl_ds_clients);
rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client");
clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED;
- clp->cl_minorversion = cl_init->minorversion;
clp->cl_mvops = nfs_v4_minor_ops[cl_init->minorversion];
clp->cl_mig_gen = 1;
#if IS_ENABLED(CONFIG_NFS_V4_1)
diff --git a/fs/open.c b/fs/open.c
index 0788b3715731..b69d6eed67e6 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -860,9 +860,6 @@ cleanup_file:
* the return value of d_splice_alias(), then the caller needs to perform dput()
* on it after finish_open().
*
- * On successful return @file is a fully instantiated open file. After this, if
- * an error occurs in ->atomic_open(), it needs to clean up with fput().
- *
* Returns zero on success or -errno if the open failed.
*/
int finish_open(struct file *file, struct dentry *dentry,
diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig
index 444e2da4f60e..714c14c47ca5 100644
--- a/fs/overlayfs/Kconfig
+++ b/fs/overlayfs/Kconfig
@@ -93,6 +93,7 @@ config OVERLAY_FS_XINO_AUTO
bool "Overlayfs: auto enable inode number mapping"
default n
depends on OVERLAY_FS
+ depends on 64BIT
help
If this config option is enabled then overlay filesystems will use
unused high bits in undelying filesystem inode numbers to map all
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index a5317216de73..87c362f65448 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -244,6 +244,9 @@ static void ovl_aio_cleanup_handler(struct ovl_aio_req *aio_req)
if (iocb->ki_flags & IOCB_WRITE) {
struct inode *inode = file_inode(orig_iocb->ki_filp);
+ /* Actually acquired in ovl_write_iter() */
+ __sb_writers_acquired(file_inode(iocb->ki_filp)->i_sb,
+ SB_FREEZE_WRITE);
file_end_write(iocb->ki_filp);
ovl_copyattr(ovl_inode_real(inode), inode);
}
@@ -346,6 +349,9 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
goto out;
file_start_write(real.file);
+ /* Pacify lockdep, same trick as done in aio_write() */
+ __sb_writers_release(file_inode(real.file)->i_sb,
+ SB_FREEZE_WRITE);
aio_req->fd = real;
real.flags = 0;
aio_req->orig_iocb = iocb;
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 3623d28aa4fa..3d3f2b8bdae5 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -318,7 +318,12 @@ static inline unsigned int ovl_xino_bits(struct super_block *sb)
return ovl_same_dev(sb) ? OVL_FS(sb)->xino_mode : 0;
}
-static inline int ovl_inode_lock(struct inode *inode)
+static inline void ovl_inode_lock(struct inode *inode)
+{
+ mutex_lock(&OVL_I(inode)->lock);
+}
+
+static inline int ovl_inode_lock_interruptible(struct inode *inode)
{
return mutex_lock_interruptible(&OVL_I(inode)->lock);
}
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 319fe0d355b0..ac967f1cb6e5 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -1411,6 +1411,8 @@ static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs,
if (ofs->config.xino == OVL_XINO_ON)
pr_info("\"xino=on\" is useless with all layers on same fs, ignore.\n");
ofs->xino_mode = 0;
+ } else if (ofs->config.xino == OVL_XINO_OFF) {
+ ofs->xino_mode = -1;
} else if (ofs->config.xino == OVL_XINO_ON && ofs->xino_mode < 0) {
/*
* This is a roundup of number of bits needed for encoding
@@ -1623,8 +1625,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
sb->s_stack_depth = 0;
sb->s_maxbytes = MAX_LFS_FILESIZE;
/* Assume underlaying fs uses 32bit inodes unless proven otherwise */
- if (ofs->config.xino != OVL_XINO_OFF)
+ if (ofs->config.xino != OVL_XINO_OFF) {
ofs->xino_mode = BITS_PER_LONG - 32;
+ if (!ofs->xino_mode) {
+ pr_warn("xino not supported on 32bit kernel, falling back to xino=off.\n");
+ ofs->config.xino = OVL_XINO_OFF;
+ }
+ }
/* alloc/destroy_inode needed for setting up traps in inode cache */
sb->s_op = &ovl_super_operations;
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index ea005085803f..042f7eb4f7f4 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -509,7 +509,7 @@ int ovl_copy_up_start(struct dentry *dentry, int flags)
struct inode *inode = d_inode(dentry);
int err;
- err = ovl_inode_lock(inode);
+ err = ovl_inode_lock_interruptible(inode);
if (!err && ovl_already_copied_up_locked(dentry, flags)) {
err = 1; /* Already copied up */
ovl_inode_unlock(inode);
@@ -764,7 +764,7 @@ int ovl_nlink_start(struct dentry *dentry)
return err;
}
- err = ovl_inode_lock(inode);
+ err = ovl_inode_lock_interruptible(inode);
if (err)
return err;
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index 7fbe8f058220..d99b5d39aa90 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -87,11 +87,11 @@ static void *pstore_ftrace_seq_next(struct seq_file *s, void *v, loff_t *pos)
struct pstore_private *ps = s->private;
struct pstore_ftrace_seq_data *data = v;
+ (*pos)++;
data->off += REC_SIZE;
if (data->off + REC_SIZE > ps->total_size)
return NULL;
- (*pos)++;
return data;
}
@@ -101,6 +101,9 @@ static int pstore_ftrace_seq_show(struct seq_file *s, void *v)
struct pstore_ftrace_seq_data *data = v;
struct pstore_ftrace_record *rec;
+ if (!data)
+ return 0;
+
rec = (struct pstore_ftrace_record *)(ps->record->buf + data->off);
seq_printf(s, "CPU:%d ts:%llu %08lx %08lx %ps <- %pS\n",
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index d896457e7c11..408277ee3cdb 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -823,9 +823,9 @@ static int __init pstore_init(void)
ret = pstore_init_fs();
if (ret)
- return ret;
+ free_buf_for_compression();
- return 0;
+ return ret;
}
late_initcall(pstore_init);
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 013486b5125e..795622190c01 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -963,7 +963,6 @@ static void __init ramoops_register_dummy(void)
pr_info("could not create platform device: %ld\n",
PTR_ERR(dummy));
dummy = NULL;
- ramoops_unregister_dummy();
}
}
diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index 1f4d8c06f9be..c917c191e78c 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -34,7 +34,7 @@ struct persistent_ram_buffer {
uint32_t sig;
atomic_t start;
atomic_t size;
- uint8_t data[0];
+ uint8_t data[];
};
#define PERSISTENT_RAM_SIG (0x43474244) /* DBGC */
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 072156c4f895..5c766330e493 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2599,7 +2599,6 @@ static int journal_init_dev(struct super_block *super,
int result;
dev_t jdev;
fmode_t blkdev_mode = FMODE_READ | FMODE_WRITE | FMODE_EXCL;
- char b[BDEVNAME_SIZE];
result = 0;
@@ -2621,8 +2620,8 @@ static int journal_init_dev(struct super_block *super,
result = PTR_ERR(journal->j_dev_bd);
journal->j_dev_bd = NULL;
reiserfs_warning(super, "sh-458",
- "cannot init journal device '%s': %i",
- __bdevname(jdev, b), result);
+ "cannot init journal device unknown-block(%u,%u): %i",
+ MAJOR(jdev), MINOR(jdev), result);
return result;
} else if (jdev != super->s_dev)
set_blocksize(journal->j_dev_bd, super->s_blocksize);
diff --git a/fs/splice.c b/fs/splice.c
index d671936d0aad..4735defc46ee 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1109,9 +1109,9 @@ static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
/*
* Determine where to splice to/from.
*/
-static long do_splice(struct file *in, loff_t __user *off_in,
- struct file *out, loff_t __user *off_out,
- size_t len, unsigned int flags)
+long do_splice(struct file *in, loff_t __user *off_in,
+ struct file *out, loff_t __user *off_out,
+ size_t len, unsigned int flags)
{
struct pipe_inode_info *ipipe;
struct pipe_inode_info *opipe;
diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
index 69aee3dfb660..3ce9829a6936 100644
--- a/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@ -178,7 +178,8 @@ static void zonefs_update_stats(struct inode *inode, loff_t new_isize)
* amount of readable data in the zone.
*/
static loff_t zonefs_check_zone_condition(struct inode *inode,
- struct blk_zone *zone, bool warn)
+ struct blk_zone *zone, bool warn,
+ bool mount)
{
struct zonefs_inode_info *zi = ZONEFS_I(inode);
@@ -196,13 +197,26 @@ static loff_t zonefs_check_zone_condition(struct inode *inode,
zone->wp = zone->start;
return 0;
case BLK_ZONE_COND_READONLY:
- /* Do not allow writes in read-only zones */
+ /*
+ * The write pointer of read-only zones is invalid. If such a
+ * zone is found during mount, the file size cannot be retrieved
+ * so we treat the zone as offline (mount == true case).
+ * Otherwise, keep the file size as it was when last updated
+ * so that the user can recover data. In both cases, writes are
+ * always disabled for the zone.
+ */
if (warn)
zonefs_warn(inode->i_sb, "inode %lu: read-only zone\n",
inode->i_ino);
inode->i_flags |= S_IMMUTABLE;
+ if (mount) {
+ zone->cond = BLK_ZONE_COND_OFFLINE;
+ inode->i_mode &= ~0777;
+ zone->wp = zone->start;
+ return 0;
+ }
inode->i_mode &= ~0222;
- /* fallthrough */
+ return i_size_read(inode);
default:
if (zi->i_ztype == ZONEFS_ZTYPE_CNV)
return zi->i_max_size;
@@ -231,7 +245,7 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
* as there is no inconsistency between the inode size and the amount of
* data writen in the zone (data_size).
*/
- data_size = zonefs_check_zone_condition(inode, zone, true);
+ data_size = zonefs_check_zone_condition(inode, zone, true, false);
isize = i_size_read(inode);
if (zone->cond != BLK_ZONE_COND_OFFLINE &&
zone->cond != BLK_ZONE_COND_READONLY &&
@@ -274,7 +288,7 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
if (zone->cond != BLK_ZONE_COND_OFFLINE) {
zone->cond = BLK_ZONE_COND_OFFLINE;
data_size = zonefs_check_zone_condition(inode, zone,
- false);
+ false, false);
}
} else if (zone->cond == BLK_ZONE_COND_READONLY ||
sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZRO) {
@@ -283,7 +297,7 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
if (zone->cond != BLK_ZONE_COND_READONLY) {
zone->cond = BLK_ZONE_COND_READONLY;
data_size = zonefs_check_zone_condition(inode, zone,
- false);
+ false, false);
}
}
@@ -975,7 +989,7 @@ static void zonefs_init_file_inode(struct inode *inode, struct blk_zone *zone,
zi->i_zsector = zone->start;
zi->i_max_size = min_t(loff_t, MAX_LFS_FILESIZE,
zone->len << SECTOR_SHIFT);
- zi->i_wpoffset = zonefs_check_zone_condition(inode, zone, true);
+ zi->i_wpoffset = zonefs_check_zone_condition(inode, zone, true, true);
inode->i_uid = sbi->s_uid;
inode->i_gid = sbi->s_gid;
diff --git a/include/crypto/curve25519.h b/include/crypto/curve25519.h
index 4e6dc840b159..9ecb3c1f0f15 100644
--- a/include/crypto/curve25519.h
+++ b/include/crypto/curve25519.h
@@ -33,7 +33,8 @@ bool __must_check curve25519(u8 mypublic[CURVE25519_KEY_SIZE],
const u8 secret[CURVE25519_KEY_SIZE],
const u8 basepoint[CURVE25519_KEY_SIZE])
{
- if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519))
+ if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519) &&
+ (!IS_ENABLED(CONFIG_CRYPTO_CURVE25519_X86) || IS_ENABLED(CONFIG_AS_ADX)))
curve25519_arch(mypublic, secret, basepoint);
else
curve25519_generic(mypublic, secret, basepoint);
@@ -49,7 +50,8 @@ __must_check curve25519_generate_public(u8 pub[CURVE25519_KEY_SIZE],
CURVE25519_KEY_SIZE)))
return false;
- if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519))
+ if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519) &&
+ (!IS_ENABLED(CONFIG_CRYPTO_CURVE25519_X86) || IS_ENABLED(CONFIG_AS_ADX)))
curve25519_base_arch(pub, secret);
else
curve25519_generic(pub, secret, curve25519_base_point);
diff --git a/include/drm/drm_dp_mst_helper.h b/include/drm/drm_dp_mst_helper.h
index bcb39da9adb4..41725d88d27e 100644
--- a/include/drm/drm_dp_mst_helper.h
+++ b/include/drm/drm_dp_mst_helper.h
@@ -81,7 +81,7 @@ struct drm_dp_vcpi {
* &drm_dp_mst_topology_mgr.base.lock.
* @num_sdp_stream_sinks: Number of stream sinks. Protected by
* &drm_dp_mst_topology_mgr.base.lock.
- * @available_pbn: Available bandwidth for this port. Protected by
+ * @full_pbn: Max possible bandwidth for this port. Protected by
* &drm_dp_mst_topology_mgr.base.lock.
* @next: link to next port on this branch device
* @aux: i2c aux transport to talk to device connected to this port, protected
@@ -126,7 +126,7 @@ struct drm_dp_mst_port {
u8 dpcd_rev;
u8 num_sdp_streams;
u8 num_sdp_stream_sinks;
- uint16_t available_pbn;
+ uint16_t full_pbn;
struct list_head next;
/**
* @mstb: the branch device connected to this port, if there is one.
diff --git a/include/dt-bindings/clock/imx8mn-clock.h b/include/dt-bindings/clock/imx8mn-clock.h
index 0f2b8423ce1d..65ac6eb6c733 100644
--- a/include/dt-bindings/clock/imx8mn-clock.h
+++ b/include/dt-bindings/clock/imx8mn-clock.h
@@ -122,8 +122,8 @@
#define IMX8MN_CLK_I2C1 105
#define IMX8MN_CLK_I2C2 106
#define IMX8MN_CLK_I2C3 107
-#define IMX8MN_CLK_I2C4 118
-#define IMX8MN_CLK_UART1 119
+#define IMX8MN_CLK_I2C4 108
+#define IMX8MN_CLK_UART1 109
#define IMX8MN_CLK_UART2 110
#define IMX8MN_CLK_UART3 111
#define IMX8MN_CLK_UART4 112
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 853d92ceee64..c1c0f9ea4e63 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -441,14 +441,6 @@ void __bio_add_page(struct bio *bio, struct page *page,
unsigned int len, unsigned int off);
int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter);
void bio_release_pages(struct bio *bio, bool mark_dirty);
-struct rq_map_data;
-extern struct bio *bio_map_user_iov(struct request_queue *,
- struct iov_iter *, gfp_t);
-extern void bio_unmap_user(struct bio *);
-extern struct bio *bio_map_kern(struct request_queue *, void *, unsigned int,
- gfp_t);
-extern struct bio *bio_copy_kern(struct request_queue *, void *, unsigned int,
- gfp_t, int);
extern void bio_set_pages_dirty(struct bio *bio);
extern void bio_check_pages_dirty(struct bio *bio);
@@ -463,14 +455,9 @@ extern void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter,
extern void bio_copy_data(struct bio *dst, struct bio *src);
extern void bio_list_copy_data(struct bio *dst, struct bio *src);
extern void bio_free_pages(struct bio *bio);
-
-extern struct bio *bio_copy_user_iov(struct request_queue *,
- struct rq_map_data *,
- struct iov_iter *,
- gfp_t);
-extern int bio_uncopy_user(struct bio *);
void zero_fill_bio_iter(struct bio *bio, struct bvec_iter iter);
void bio_truncate(struct bio *bio, unsigned new_size);
+void guard_bio_eod(struct bio *bio);
static inline void zero_fill_bio(struct bio *bio)
{
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 11cfd6470b1a..f389d7c724bd 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -162,7 +162,10 @@ struct blk_mq_hw_ctx {
struct dentry *sched_debugfs_dir;
#endif
- /** @hctx_list: List of all hardware queues. */
+ /**
+ * @hctx_list: if this hctx is not in use, this is an entry in
+ * q->unused_hctx_list.
+ */
struct list_head hctx_list;
/**
@@ -409,6 +412,8 @@ enum {
<< BLK_MQ_F_ALLOC_POLICY_START_BIT)
struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *);
+struct request_queue *blk_mq_init_queue_data(struct blk_mq_tag_set *set,
+ void *queuedata);
struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
struct request_queue *q,
bool elevator_init);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index f629d40c645c..32868fbedc9e 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -952,6 +952,10 @@ static inline unsigned int blk_rq_stats_sectors(const struct request *rq)
}
#ifdef CONFIG_BLK_DEV_ZONED
+
+/* Helper to convert BLK_ZONE_ZONE_XXX to its string format XXX */
+const char *blk_zone_cond_str(enum blk_zone_cond zone_cond);
+
static inline unsigned int blk_rq_zone_no(struct request *rq)
{
return blk_queue_zone_no(rq->q, blk_rq_pos(rq));
@@ -1063,7 +1067,6 @@ extern void blk_abort_request(struct request *);
* Access functions for manipulating queue properties
*/
extern void blk_cleanup_queue(struct request_queue *);
-extern void blk_queue_make_request(struct request_queue *, make_request_fn *);
extern void blk_queue_bounce_limit(struct request_queue *, u64);
extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int);
extern void blk_queue_chunk_sectors(struct request_queue *, unsigned int);
@@ -1140,8 +1143,7 @@ extern void blk_dump_rq_flags(struct request *, char *);
extern long nr_blockdev_pages(void);
bool __must_check blk_get_queue(struct request_queue *);
-struct request_queue *blk_alloc_queue(gfp_t);
-struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id);
+struct request_queue *blk_alloc_queue(make_request_fn make_request, int node_id);
extern void blk_put_queue(struct request_queue *);
extern void blk_set_queue_dying(struct request_queue *);
@@ -1484,15 +1486,6 @@ static inline unsigned int block_size(struct block_device *bdev)
return bdev->bd_block_size;
}
-typedef struct {struct page *v;} Sector;
-
-unsigned char *read_dev_sector(struct block_device *, sector_t, Sector *);
-
-static inline void put_dev_sector(Sector p)
-{
- put_page(p.v);
-}
-
int kblockd_schedule_work(struct work_struct *work);
int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay);
@@ -1706,6 +1699,7 @@ struct block_device_operations {
void (*swap_slot_free_notify) (struct block_device *, unsigned long);
int (*report_zones)(struct gendisk *, sector_t sector,
unsigned int nr_zones, report_zones_cb cb, void *data);
+ char *(*devnode)(struct gendisk *disk, umode_t *mode);
struct module *owner;
const struct pr_ops *pr_ops;
};
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 49b1a70e12c8..212991f6f2a5 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -160,6 +160,7 @@ static inline void copy_map_value(struct bpf_map *map, void *dst, void *src)
}
void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
bool lock_src);
+int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size);
struct bpf_offload_dev;
struct bpf_offloaded_map;
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index c4458dc6a757..76371aaae2d1 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -175,9 +175,10 @@ struct ceph_msg_data {
#endif /* CONFIG_BLOCK */
struct ceph_bvec_iter bvec_pos;
struct {
- struct page **pages; /* NOT OWNER. */
+ struct page **pages;
size_t length; /* total # bytes */
unsigned int alignment; /* first page */
+ bool own_pages;
};
struct ceph_pagelist *pagelist;
};
@@ -356,8 +357,8 @@ extern void ceph_con_keepalive(struct ceph_connection *con);
extern bool ceph_con_keepalive_expired(struct ceph_connection *con,
unsigned long interval);
-extern void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages,
- size_t length, size_t alignment);
+void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages,
+ size_t length, size_t alignment, bool own_pages);
extern void ceph_msg_data_add_pagelist(struct ceph_msg *msg,
struct ceph_pagelist *pagelist);
#ifdef CONFIG_BLOCK
diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index e081b56f1c1d..5e601975745f 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h
@@ -37,6 +37,9 @@ int ceph_spg_compare(const struct ceph_spg *lhs, const struct ceph_spg *rhs);
#define CEPH_POOL_FLAG_HASHPSPOOL (1ULL << 0) /* hash pg seed and pool id
together */
#define CEPH_POOL_FLAG_FULL (1ULL << 1) /* pool is full */
+#define CEPH_POOL_FLAG_FULL_QUOTA (1ULL << 10) /* pool ran out of quota,
+ will set FULL too */
+#define CEPH_POOL_FLAG_NEARFULL (1ULL << 11) /* pool is nearfull */
struct ceph_pg_pool_info {
struct rb_node node;
@@ -304,5 +307,6 @@ extern struct ceph_pg_pool_info *ceph_pg_pool_by_id(struct ceph_osdmap *map,
extern const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id);
extern int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name);
+u64 ceph_pg_pool_flags(struct ceph_osdmap *map, u64 id);
#endif
diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h
index 59bdfd470100..88ed3c5c04c5 100644
--- a/include/linux/ceph/rados.h
+++ b/include/linux/ceph/rados.h
@@ -143,8 +143,10 @@ extern const char *ceph_osd_state_name(int s);
/*
* osd map flag bits
*/
-#define CEPH_OSDMAP_NEARFULL (1<<0) /* sync writes (near ENOSPC) */
-#define CEPH_OSDMAP_FULL (1<<1) /* no data writes (ENOSPC) */
+#define CEPH_OSDMAP_NEARFULL (1<<0) /* sync writes (near ENOSPC),
+ not set since ~luminous */
+#define CEPH_OSDMAP_FULL (1<<1) /* no data writes (ENOSPC),
+ not set since ~luminous */
#define CEPH_OSDMAP_PAUSERD (1<<2) /* pause all reads */
#define CEPH_OSDMAP_PAUSEWR (1<<3) /* pause all writes */
#define CEPH_OSDMAP_PAUSEREC (1<<4) /* pause recovery */
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index d7ddebd0cdec..e75d2191226b 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -62,6 +62,7 @@ struct css_task_iter {
struct list_head *mg_tasks_head;
struct list_head *dying_tasks_head;
+ struct list_head *cur_tasks_head;
struct css_set *cur_cset;
struct css_set *cur_dcset;
struct task_struct *cur_task;
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 952ac035bab9..bd1ee9039558 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -522,9 +522,9 @@ struct clk *clk_register_gate(struct device *dev, const char *name,
* @clk_gate_flags: gate-specific flags for this clock
* @lock: shared register lock for this clock
*/
-#define clk_hw_register_gate_parent_hw(dev, name, parent_name, flags, reg, \
+#define clk_hw_register_gate_parent_hw(dev, name, parent_hw, flags, reg, \
bit_idx, clk_gate_flags, lock) \
- __clk_hw_register_gate((dev), NULL, (name), (parent_name), NULL, \
+ __clk_hw_register_gate((dev), NULL, (name), NULL, (parent_hw), \
NULL, (flags), (reg), (bit_idx), \
(clk_gate_flags), (lock))
/**
@@ -539,10 +539,10 @@ struct clk *clk_register_gate(struct device *dev, const char *name,
* @clk_gate_flags: gate-specific flags for this clock
* @lock: shared register lock for this clock
*/
-#define clk_hw_register_gate_parent_data(dev, name, parent_name, flags, reg, \
+#define clk_hw_register_gate_parent_data(dev, name, parent_data, flags, reg, \
bit_idx, clk_gate_flags, lock) \
- __clk_hw_register_gate((dev), NULL, (name), (parent_name), NULL, \
- NULL, (flags), (reg), (bit_idx), \
+ __clk_hw_register_gate((dev), NULL, (name), NULL, NULL, (parent_data), \
+ (flags), (reg), (bit_idx), \
(clk_gate_flags), (lock))
void clk_unregister_gate(struct clk *clk);
void clk_hw_unregister_gate(struct clk_hw *hw);
diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index f64ca27dc210..d7bf029df737 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -69,19 +69,23 @@ struct dmar_pci_notify_info {
extern struct rw_semaphore dmar_global_lock;
extern struct list_head dmar_drhd_units;
-#define for_each_drhd_unit(drhd) \
- list_for_each_entry_rcu(drhd, &dmar_drhd_units, list)
+#define for_each_drhd_unit(drhd) \
+ list_for_each_entry_rcu(drhd, &dmar_drhd_units, list, \
+ dmar_rcu_check())
#define for_each_active_drhd_unit(drhd) \
- list_for_each_entry_rcu(drhd, &dmar_drhd_units, list) \
+ list_for_each_entry_rcu(drhd, &dmar_drhd_units, list, \
+ dmar_rcu_check()) \
if (drhd->ignored) {} else
#define for_each_active_iommu(i, drhd) \
- list_for_each_entry_rcu(drhd, &dmar_drhd_units, list) \
+ list_for_each_entry_rcu(drhd, &dmar_drhd_units, list, \
+ dmar_rcu_check()) \
if (i=drhd->iommu, drhd->ignored) {} else
#define for_each_iommu(i, drhd) \
- list_for_each_entry_rcu(drhd, &dmar_drhd_units, list) \
+ list_for_each_entry_rcu(drhd, &dmar_drhd_units, list, \
+ dmar_rcu_check()) \
if (i=drhd->iommu, 0) {} else
static inline bool dmar_rcu_check(void)
diff --git a/include/linux/dsa/8021q.h b/include/linux/dsa/8021q.h
index 0aa803c451a3..c620d9139c28 100644
--- a/include/linux/dsa/8021q.h
+++ b/include/linux/dsa/8021q.h
@@ -28,8 +28,6 @@ int dsa_8021q_rx_switch_id(u16 vid);
int dsa_8021q_rx_source_port(u16 vid);
-struct sk_buff *dsa_8021q_remove_header(struct sk_buff *skb);
-
#else
int dsa_port_setup_8021q_tagging(struct dsa_switch *ds, int index,
@@ -64,11 +62,6 @@ int dsa_8021q_rx_source_port(u16 vid)
return 0;
}
-struct sk_buff *dsa_8021q_remove_header(struct sk_buff *skb)
-{
- return NULL;
-}
-
#endif /* IS_ENABLED(CONFIG_NET_DSA_TAG_8021Q) */
#endif /* _NET_DSA_8021Q_H */
diff --git a/include/linux/edac.h b/include/linux/edac.h
index cc31b9742684..0f20b986b0ab 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -383,6 +383,9 @@ struct dimm_info {
unsigned int csrow, cschannel; /* Points to the old API data */
u16 smbios_handle; /* Handle for SMBIOS type 17 */
+
+ u32 ce_count;
+ u32 ue_count;
};
/**
@@ -442,6 +445,7 @@ struct errcount_attribute_data {
* struct edac_raw_error_desc - Raw error report structure
* @grain: minimum granularity for an error report, in bytes
* @error_count: number of errors of the same type
+ * @type: severity of the error (CE/UE/Fatal)
* @top_layer: top layer of the error (layer[0])
* @mid_layer: middle layer of the error (layer[1])
* @low_layer: low layer of the error (layer[2])
@@ -453,8 +457,6 @@ struct errcount_attribute_data {
* @location: location of the error
* @label: label of the affected DIMM(s)
* @other_detail: other driver-specific detail about the error
- * @enable_per_layer_report: if false, the error affects all layers
- * (typically, a memory controller error)
*/
struct edac_raw_error_desc {
char location[LOCATION_SIZE];
@@ -462,6 +464,7 @@ struct edac_raw_error_desc {
long grain;
u16 error_count;
+ enum hw_event_mc_err_type type;
int top_layer;
int mid_layer;
int low_layer;
@@ -470,7 +473,6 @@ struct edac_raw_error_desc {
unsigned long syndrome;
const char *msg;
const char *other_detail;
- bool enable_per_layer_report;
};
/* MEMORY controller information structure
@@ -560,7 +562,6 @@ struct mem_ctl_info {
*/
u32 ce_noinfo_count, ue_noinfo_count;
u32 ue_mc, ce_mc;
- u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
struct completion complete;
diff --git a/include/linux/file.h b/include/linux/file.h
index c6c7b24ea9f7..142d102f285e 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -85,6 +85,7 @@ extern int f_dupfd(unsigned int from, struct file *file, unsigned flags);
extern int replace_fd(unsigned fd, struct file *file, unsigned flags);
extern void set_close_on_exec(unsigned int fd, int flag);
extern bool get_close_on_exec(unsigned int fd);
+extern int __get_unused_fd_flags(unsigned flags, unsigned long nofile);
extern int get_unused_fd_flags(unsigned flags);
extern void put_unused_fd(unsigned int fd);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 3cd4fe6b845e..593e911d1ca0 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -698,6 +698,7 @@ struct inode {
struct rcu_head i_rcu;
};
atomic64_t i_version;
+ atomic64_t i_sequence; /* see futex */
atomic_t i_count;
atomic_t i_dio_count;
atomic_t i_writecount;
@@ -2699,7 +2700,6 @@ static inline void unregister_chrdev(unsigned int major, const char *name)
#ifdef CONFIG_BLOCK
#define BLKDEV_MAJOR_MAX 512
-extern const char *__bdevname(dev_t, char *buffer);
extern const char *bdevname(struct block_device *bdev, char *buffer);
extern struct block_device *lookup_bdev(const char *);
extern void blkdev_show(struct seq_file *,off_t);
diff --git a/include/linux/futex.h b/include/linux/futex.h
index 5cc3fed27d4c..b70df27d7e85 100644
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -31,23 +31,26 @@ struct task_struct;
union futex_key {
struct {
+ u64 i_seq;
unsigned long pgoff;
- struct inode *inode;
- int offset;
+ unsigned int offset;
} shared;
struct {
+ union {
+ struct mm_struct *mm;
+ u64 __tmp;
+ };
unsigned long address;
- struct mm_struct *mm;
- int offset;
+ unsigned int offset;
} private;
struct {
+ u64 ptr;
unsigned long word;
- void *ptr;
- int offset;
+ unsigned int offset;
} both;
};
-#define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = NULL } }
+#define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = 0ULL } }
#ifdef CONFIG_FUTEX
enum {
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 6fbe58538ad6..9b3fffdf4011 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -27,39 +27,8 @@
#define part_to_dev(part) (&((part)->__dev))
extern struct device_type part_type;
-extern struct kobject *block_depr;
extern struct class block_class;
-enum {
-/* These three have identical behaviour; use the second one if DOS FDISK gets
- confused about extended/logical partitions starting past cylinder 1023. */
- DOS_EXTENDED_PARTITION = 5,
- LINUX_EXTENDED_PARTITION = 0x85,
- WIN98_EXTENDED_PARTITION = 0x0f,
-
- SUN_WHOLE_DISK = DOS_EXTENDED_PARTITION,
-
- LINUX_SWAP_PARTITION = 0x82,
- LINUX_DATA_PARTITION = 0x83,
- LINUX_LVM_PARTITION = 0x8e,
- LINUX_RAID_PARTITION = 0xfd, /* autodetect RAID partition */
-
- SOLARIS_X86_PARTITION = LINUX_SWAP_PARTITION,
- NEW_SOLARIS_X86_PARTITION = 0xbf,
-
- DM6_AUX1PARTITION = 0x51, /* no DDO: use xlated geom */
- DM6_AUX3PARTITION = 0x53, /* no DDO: use xlated geom */
- DM6_PARTITION = 0x54, /* has DDO: use xlated geom & offset */
- EZD_PARTITION = 0x55, /* EZ-DRIVE */
-
- FREEBSD_PARTITION = 0xa5, /* FreeBSD Partition ID */
- OPENBSD_PARTITION = 0xa6, /* OpenBSD Partition ID */
- NETBSD_PARTITION = 0xa9, /* NetBSD Partition ID */
- BSDI_PARTITION = 0xb7, /* BSDI Partition ID */
- MINIX_PARTITION = 0x81, /* Minix Partition ID */
- UNIXWARE_PARTITION = 0x63, /* Same as GNU_HURD and SCO Unix */
-};
-
#define DISK_MAX_PARTS 256
#define DISK_NAME_LEN 32
@@ -70,26 +39,12 @@ enum {
#include <linux/fs.h>
#include <linux/workqueue.h>
-struct partition {
- unsigned char boot_ind; /* 0x80 - active */
- unsigned char head; /* starting head */
- unsigned char sector; /* starting sector */
- unsigned char cyl; /* starting cylinder */
- unsigned char sys_ind; /* What partition type */
- unsigned char end_head; /* end head */
- unsigned char end_sector; /* end sector */
- unsigned char end_cyl; /* end cylinder */
- __le32 start_sect; /* starting sector counting from 0 */
- __le32 nr_sects; /* nr of sectors in partition */
-} __attribute__((packed));
-
struct disk_stats {
u64 nsecs[NR_STAT_GROUPS];
unsigned long sectors[NR_STAT_GROUPS];
unsigned long ios[NR_STAT_GROUPS];
unsigned long merges[NR_STAT_GROUPS];
unsigned long io_ticks;
- unsigned long time_in_queue;
local_t in_flight[2];
};
@@ -133,17 +88,64 @@ struct hd_struct {
struct rcu_work rcu_work;
};
-#define GENHD_FL_REMOVABLE 1
-/* 2 is unused */
-#define GENHD_FL_MEDIA_CHANGE_NOTIFY 4
-#define GENHD_FL_CD 8
-#define GENHD_FL_UP 16
-#define GENHD_FL_SUPPRESS_PARTITION_INFO 32
-#define GENHD_FL_EXT_DEVT 64 /* allow extended devt */
-#define GENHD_FL_NATIVE_CAPACITY 128
-#define GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE 256
-#define GENHD_FL_NO_PART_SCAN 512
-#define GENHD_FL_HIDDEN 1024
+/**
+ * DOC: genhd capability flags
+ *
+ * ``GENHD_FL_REMOVABLE`` (0x0001): indicates that the block device
+ * gives access to removable media.
+ * When set, the device remains present even when media is not
+ * inserted.
+ * Must not be set for devices which are removed entirely when the
+ * media is removed.
+ *
+ * ``GENHD_FL_CD`` (0x0008): the block device is a CD-ROM-style
+ * device.
+ * Affects responses to the ``CDROM_GET_CAPABILITY`` ioctl.
+ *
+ * ``GENHD_FL_UP`` (0x0010): indicates that the block device is "up",
+ * with a similar meaning to network interfaces.
+ *
+ * ``GENHD_FL_SUPPRESS_PARTITION_INFO`` (0x0020): don't include
+ * partition information in ``/proc/partitions`` or in the output of
+ * printk_all_partitions().
+ * Used for the null block device and some MMC devices.
+ *
+ * ``GENHD_FL_EXT_DEVT`` (0x0040): the driver supports extended
+ * dynamic ``dev_t``, i.e. it wants extended device numbers
+ * (``BLOCK_EXT_MAJOR``).
+ * This affects the maximum number of partitions.
+ *
+ * ``GENHD_FL_NATIVE_CAPACITY`` (0x0080): based on information in the
+ * partition table, the device's capacity has been extended to its
+ * native capacity; i.e. the device has hidden capacity used by one
+ * of the partitions (this is a flag used so that native capacity is
+ * only ever unlocked once).
+ *
+ * ``GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE`` (0x0100): event polling is
+ * blocked whenever a writer holds an exclusive lock.
+ *
+ * ``GENHD_FL_NO_PART_SCAN`` (0x0200): partition scanning is disabled.
+ * Used for loop devices in their default settings and some MMC
+ * devices.
+ *
+ * ``GENHD_FL_HIDDEN`` (0x0400): the block device is hidden; it
+ * doesn't produce events, doesn't appear in sysfs, and doesn't have
+ * an associated ``bdev``.
+ * Implies ``GENHD_FL_SUPPRESS_PARTITION_INFO`` and
+ * ``GENHD_FL_NO_PART_SCAN``.
+ * Used for multipath devices.
+ */
+#define GENHD_FL_REMOVABLE 0x0001
+/* 2 is unused (used to be GENHD_FL_DRIVERFS) */
+/* 4 is unused (used to be GENHD_FL_MEDIA_CHANGE_NOTIFY) */
+#define GENHD_FL_CD 0x0008
+#define GENHD_FL_UP 0x0010
+#define GENHD_FL_SUPPRESS_PARTITION_INFO 0x0020
+#define GENHD_FL_EXT_DEVT 0x0040
+#define GENHD_FL_NATIVE_CAPACITY 0x0080
+#define GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE 0x0100
+#define GENHD_FL_NO_PART_SCAN 0x0200
+#define GENHD_FL_HIDDEN 0x0400
enum {
DISK_EVENT_MEDIA_CHANGE = 1 << 0, /* media changed */
@@ -189,7 +191,6 @@ struct gendisk {
* disks that can't be partitioned. */
char disk_name[DISK_NAME_LEN]; /* name of major driver */
- char *(*devnode)(struct gendisk *gd, umode_t *mode);
unsigned short events; /* supported events */
unsigned short event_flags; /* flags related to event processing */
@@ -245,18 +246,6 @@ static inline bool disk_part_scan_enabled(struct gendisk *disk)
!(disk->flags & GENHD_FL_NO_PART_SCAN);
}
-static inline bool disk_has_partitions(struct gendisk *disk)
-{
- bool ret = false;
-
- rcu_read_lock();
- if (rcu_dereference(disk->part_tbl)->len > 1)
- ret = true;
- rcu_read_unlock();
-
- return ret;
-}
-
static inline dev_t disk_devt(struct gendisk *disk)
{
return MKDEV(disk->major, disk->first_minor);
@@ -295,143 +284,7 @@ extern void disk_part_iter_init(struct disk_part_iter *piter,
struct gendisk *disk, unsigned int flags);
extern struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter);
extern void disk_part_iter_exit(struct disk_part_iter *piter);
-
-extern struct hd_struct *disk_map_sector_rcu(struct gendisk *disk,
- sector_t sector);
-
-/*
- * Macros to operate on percpu disk statistics:
- *
- * {disk|part|all}_stat_{add|sub|inc|dec}() modify the stat counters
- * and should be called between disk_stat_lock() and
- * disk_stat_unlock().
- *
- * part_stat_read() can be called at any time.
- *
- * part_stat_{add|set_all}() and {init|free}_part_stats are for
- * internal use only.
- */
-#ifdef CONFIG_SMP
-#define part_stat_lock() ({ rcu_read_lock(); get_cpu(); })
-#define part_stat_unlock() do { put_cpu(); rcu_read_unlock(); } while (0)
-
-#define part_stat_get_cpu(part, field, cpu) \
- (per_cpu_ptr((part)->dkstats, (cpu))->field)
-
-#define part_stat_get(part, field) \
- part_stat_get_cpu(part, field, smp_processor_id())
-
-#define part_stat_read(part, field) \
-({ \
- typeof((part)->dkstats->field) res = 0; \
- unsigned int _cpu; \
- for_each_possible_cpu(_cpu) \
- res += per_cpu_ptr((part)->dkstats, _cpu)->field; \
- res; \
-})
-
-static inline void part_stat_set_all(struct hd_struct *part, int value)
-{
- int i;
-
- for_each_possible_cpu(i)
- memset(per_cpu_ptr(part->dkstats, i), value,
- sizeof(struct disk_stats));
-}
-
-static inline int init_part_stats(struct hd_struct *part)
-{
- part->dkstats = alloc_percpu(struct disk_stats);
- if (!part->dkstats)
- return 0;
- return 1;
-}
-
-static inline void free_part_stats(struct hd_struct *part)
-{
- free_percpu(part->dkstats);
-}
-
-#else /* !CONFIG_SMP */
-#define part_stat_lock() ({ rcu_read_lock(); 0; })
-#define part_stat_unlock() rcu_read_unlock()
-
-#define part_stat_get(part, field) ((part)->dkstats.field)
-#define part_stat_get_cpu(part, field, cpu) part_stat_get(part, field)
-#define part_stat_read(part, field) part_stat_get(part, field)
-
-static inline void part_stat_set_all(struct hd_struct *part, int value)
-{
- memset(&part->dkstats, value, sizeof(struct disk_stats));
-}
-
-static inline int init_part_stats(struct hd_struct *part)
-{
- return 1;
-}
-
-static inline void free_part_stats(struct hd_struct *part)
-{
-}
-
-#endif /* CONFIG_SMP */
-
-#define part_stat_read_msecs(part, which) \
- div_u64(part_stat_read(part, nsecs[which]), NSEC_PER_MSEC)
-
-#define part_stat_read_accum(part, field) \
- (part_stat_read(part, field[STAT_READ]) + \
- part_stat_read(part, field[STAT_WRITE]) + \
- part_stat_read(part, field[STAT_DISCARD]))
-
-#define __part_stat_add(part, field, addnd) \
- (part_stat_get(part, field) += (addnd))
-
-#define part_stat_add(part, field, addnd) do { \
- __part_stat_add((part), field, addnd); \
- if ((part)->partno) \
- __part_stat_add(&part_to_disk((part))->part0, \
- field, addnd); \
-} while (0)
-
-#define part_stat_dec(gendiskp, field) \
- part_stat_add(gendiskp, field, -1)
-#define part_stat_inc(gendiskp, field) \
- part_stat_add(gendiskp, field, 1)
-#define part_stat_sub(gendiskp, field, subnd) \
- part_stat_add(gendiskp, field, -subnd)
-
-#define part_stat_local_dec(gendiskp, field) \
- local_dec(&(part_stat_get(gendiskp, field)))
-#define part_stat_local_inc(gendiskp, field) \
- local_inc(&(part_stat_get(gendiskp, field)))
-#define part_stat_local_read(gendiskp, field) \
- local_read(&(part_stat_get(gendiskp, field)))
-#define part_stat_local_read_cpu(gendiskp, field, cpu) \
- local_read(&(part_stat_get_cpu(gendiskp, field, cpu)))
-
-unsigned int part_in_flight(struct request_queue *q, struct hd_struct *part);
-void part_in_flight_rw(struct request_queue *q, struct hd_struct *part,
- unsigned int inflight[2]);
-void part_dec_in_flight(struct request_queue *q, struct hd_struct *part,
- int rw);
-void part_inc_in_flight(struct request_queue *q, struct hd_struct *part,
- int rw);
-
-static inline struct partition_meta_info *alloc_part_info(struct gendisk *disk)
-{
- if (disk)
- return kzalloc_node(sizeof(struct partition_meta_info),
- GFP_KERNEL, disk->node_id);
- return kzalloc(sizeof(struct partition_meta_info), GFP_KERNEL);
-}
-
-static inline void free_part_info(struct hd_struct *part)
-{
- kfree(part->info);
-}
-
-void update_io_ticks(struct hd_struct *part, unsigned long now);
+extern bool disk_has_partitions(struct gendisk *disk);
/* block/genhd.c */
extern void device_add_disk(struct device *parent, struct gendisk *disk,
@@ -461,6 +314,8 @@ static inline int get_disk_ro(struct gendisk *disk)
extern void disk_block_events(struct gendisk *disk);
extern void disk_unblock_events(struct gendisk *disk);
extern void disk_flush_events(struct gendisk *disk, unsigned int mask);
+extern void set_capacity_revalidate_and_notify(struct gendisk *disk,
+ sector_t size, bool revalidate);
extern unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask);
/* drivers/char/random.c */
@@ -480,170 +335,11 @@ static inline void set_capacity(struct gendisk *disk, sector_t size)
disk->part0.nr_sects = size;
}
-#ifdef CONFIG_SOLARIS_X86_PARTITION
-
-#define SOLARIS_X86_NUMSLICE 16
-#define SOLARIS_X86_VTOC_SANE (0x600DDEEEUL)
-
-struct solaris_x86_slice {
- __le16 s_tag; /* ID tag of partition */
- __le16 s_flag; /* permission flags */
- __le32 s_start; /* start sector no of partition */
- __le32 s_size; /* # of blocks in partition */
-};
-
-struct solaris_x86_vtoc {
- unsigned int v_bootinfo[3]; /* info needed by mboot (unsupported) */
- __le32 v_sanity; /* to verify vtoc sanity */
- __le32 v_version; /* layout version */
- char v_volume[8]; /* volume name */
- __le16 v_sectorsz; /* sector size in bytes */
- __le16 v_nparts; /* number of partitions */
- unsigned int v_reserved[10]; /* free space */
- struct solaris_x86_slice
- v_slice[SOLARIS_X86_NUMSLICE]; /* slice headers */
- unsigned int timestamp[SOLARIS_X86_NUMSLICE]; /* timestamp (unsupported) */
- char v_asciilabel[128]; /* for compatibility */
-};
-
-#endif /* CONFIG_SOLARIS_X86_PARTITION */
-
-#ifdef CONFIG_BSD_DISKLABEL
-/*
- * BSD disklabel support by Yossi Gottlieb <yogo@math.tau.ac.il>
- * updated by Marc Espie <Marc.Espie@openbsd.org>
- */
-
-/* check against BSD src/sys/sys/disklabel.h for consistency */
-
-#define BSD_DISKMAGIC (0x82564557UL) /* The disk magic number */
-#define BSD_MAXPARTITIONS 16
-#define OPENBSD_MAXPARTITIONS 16
-#define BSD_FS_UNUSED 0 /* disklabel unused partition entry ID */
-struct bsd_disklabel {
- __le32 d_magic; /* the magic number */
- __s16 d_type; /* drive type */
- __s16 d_subtype; /* controller/d_type specific */
- char d_typename[16]; /* type name, e.g. "eagle" */
- char d_packname[16]; /* pack identifier */
- __u32 d_secsize; /* # of bytes per sector */
- __u32 d_nsectors; /* # of data sectors per track */
- __u32 d_ntracks; /* # of tracks per cylinder */
- __u32 d_ncylinders; /* # of data cylinders per unit */
- __u32 d_secpercyl; /* # of data sectors per cylinder */
- __u32 d_secperunit; /* # of data sectors per unit */
- __u16 d_sparespertrack; /* # of spare sectors per track */
- __u16 d_sparespercyl; /* # of spare sectors per cylinder */
- __u32 d_acylinders; /* # of alt. cylinders per unit */
- __u16 d_rpm; /* rotational speed */
- __u16 d_interleave; /* hardware sector interleave */
- __u16 d_trackskew; /* sector 0 skew, per track */
- __u16 d_cylskew; /* sector 0 skew, per cylinder */
- __u32 d_headswitch; /* head switch time, usec */
- __u32 d_trkseek; /* track-to-track seek, usec */
- __u32 d_flags; /* generic flags */
-#define NDDATA 5
- __u32 d_drivedata[NDDATA]; /* drive-type specific information */
-#define NSPARE 5
- __u32 d_spare[NSPARE]; /* reserved for future use */
- __le32 d_magic2; /* the magic number (again) */
- __le16 d_checksum; /* xor of data incl. partitions */
-
- /* filesystem and partition information: */
- __le16 d_npartitions; /* number of partitions in following */
- __le32 d_bbsize; /* size of boot area at sn0, bytes */
- __le32 d_sbsize; /* max size of fs superblock, bytes */
- struct bsd_partition { /* the partition table */
- __le32 p_size; /* number of sectors in partition */
- __le32 p_offset; /* starting sector */
- __le32 p_fsize; /* filesystem basic fragment size */
- __u8 p_fstype; /* filesystem type, see below */
- __u8 p_frag; /* filesystem fragments per block */
- __le16 p_cpg; /* filesystem cylinders per group */
- } d_partitions[BSD_MAXPARTITIONS]; /* actually may be more */
-};
-
-#endif /* CONFIG_BSD_DISKLABEL */
-
-#ifdef CONFIG_UNIXWARE_DISKLABEL
-/*
- * Unixware slices support by Andrzej Krzysztofowicz <ankry@mif.pg.gda.pl>
- * and Krzysztof G. Baranowski <kgb@knm.org.pl>
- */
-
-#define UNIXWARE_DISKMAGIC (0xCA5E600DUL) /* The disk magic number */
-#define UNIXWARE_DISKMAGIC2 (0x600DDEEEUL) /* The slice table magic nr */
-#define UNIXWARE_NUMSLICE 16
-#define UNIXWARE_FS_UNUSED 0 /* Unused slice entry ID */
-
-struct unixware_slice {
- __le16 s_label; /* label */
- __le16 s_flags; /* permission flags */
- __le32 start_sect; /* starting sector */
- __le32 nr_sects; /* number of sectors in slice */
-};
-
-struct unixware_disklabel {
- __le32 d_type; /* drive type */
- __le32 d_magic; /* the magic number */
- __le32 d_version; /* version number */
- char d_serial[12]; /* serial number of the device */
- __le32 d_ncylinders; /* # of data cylinders per device */
- __le32 d_ntracks; /* # of tracks per cylinder */
- __le32 d_nsectors; /* # of data sectors per track */
- __le32 d_secsize; /* # of bytes per sector */
- __le32 d_part_start; /* # of first sector of this partition */
- __le32 d_unknown1[12]; /* ? */
- __le32 d_alt_tbl; /* byte offset of alternate table */
- __le32 d_alt_len; /* byte length of alternate table */
- __le32 d_phys_cyl; /* # of physical cylinders per device */
- __le32 d_phys_trk; /* # of physical tracks per cylinder */
- __le32 d_phys_sec; /* # of physical sectors per track */
- __le32 d_phys_bytes; /* # of physical bytes per sector */
- __le32 d_unknown2; /* ? */
- __le32 d_unknown3; /* ? */
- __le32 d_pad[8]; /* pad */
-
- struct unixware_vtoc {
- __le32 v_magic; /* the magic number */
- __le32 v_version; /* version number */
- char v_name[8]; /* volume name */
- __le16 v_nslices; /* # of slices */
- __le16 v_unknown1; /* ? */
- __le32 v_reserved[10]; /* reserved */
- struct unixware_slice
- v_slice[UNIXWARE_NUMSLICE]; /* slice headers */
- } vtoc;
-
-}; /* 408 */
-
-#endif /* CONFIG_UNIXWARE_DISKLABEL */
-
-#ifdef CONFIG_MINIX_SUBPARTITION
-# define MINIX_NR_SUBPARTITIONS 4
-#endif /* CONFIG_MINIX_SUBPARTITION */
-
-#define ADDPART_FLAG_NONE 0
-#define ADDPART_FLAG_RAID 1
-#define ADDPART_FLAG_WHOLEDISK 2
-
-extern int blk_alloc_devt(struct hd_struct *part, dev_t *devt);
-extern void blk_free_devt(dev_t devt);
-extern void blk_invalidate_devt(dev_t devt);
extern dev_t blk_lookup_devt(const char *name, int partno);
-extern char *disk_name (struct gendisk *hd, int partno, char *buf);
int bdev_disk_changed(struct block_device *bdev, bool invalidate);
int blk_add_partitions(struct gendisk *disk, struct block_device *bdev);
int blk_drop_partitions(struct gendisk *disk, struct block_device *bdev);
-extern int disk_expand_part_tbl(struct gendisk *disk, int target);
-extern struct hd_struct * __must_check add_partition(struct gendisk *disk,
- int partno, sector_t start,
- sector_t len, int flags,
- struct partition_meta_info
- *info);
-extern void __delete_partition(struct percpu_ref *);
-extern void delete_partition(struct gendisk *, int);
extern void printk_all_partitions(void);
extern struct gendisk *__alloc_disk_node(int minors, int node_id);
@@ -657,20 +353,6 @@ extern void blk_register_region(dev_t devt, unsigned long range,
void *data);
extern void blk_unregister_region(dev_t devt, unsigned long range);
-extern ssize_t part_size_show(struct device *dev,
- struct device_attribute *attr, char *buf);
-extern ssize_t part_stat_show(struct device *dev,
- struct device_attribute *attr, char *buf);
-extern ssize_t part_inflight_show(struct device *dev,
- struct device_attribute *attr, char *buf);
-#ifdef CONFIG_FAIL_MAKE_REQUEST
-extern ssize_t part_fail_show(struct device *dev,
- struct device_attribute *attr, char *buf);
-extern ssize_t part_fail_store(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t count);
-#endif /* CONFIG_FAIL_MAKE_REQUEST */
-
#define alloc_disk_node(minors, node_id) \
({ \
static struct lock_class_key __key; \
@@ -689,100 +371,6 @@ extern ssize_t part_fail_store(struct device *dev,
#define alloc_disk(minors) alloc_disk_node(minors, NUMA_NO_NODE)
-static inline int hd_ref_init(struct hd_struct *part)
-{
- if (percpu_ref_init(&part->ref, __delete_partition, 0,
- GFP_KERNEL))
- return -ENOMEM;
- return 0;
-}
-
-static inline void hd_struct_get(struct hd_struct *part)
-{
- percpu_ref_get(&part->ref);
-}
-
-static inline int hd_struct_try_get(struct hd_struct *part)
-{
- return percpu_ref_tryget_live(&part->ref);
-}
-
-static inline void hd_struct_put(struct hd_struct *part)
-{
- percpu_ref_put(&part->ref);
-}
-
-static inline void hd_struct_kill(struct hd_struct *part)
-{
- percpu_ref_kill(&part->ref);
-}
-
-static inline void hd_free_part(struct hd_struct *part)
-{
- free_part_stats(part);
- free_part_info(part);
- percpu_ref_exit(&part->ref);
-}
-
-/*
- * Any access of part->nr_sects which is not protected by partition
- * bd_mutex or gendisk bdev bd_mutex, should be done using this
- * accessor function.
- *
- * Code written along the lines of i_size_read() and i_size_write().
- * CONFIG_PREEMPTION case optimizes the case of UP kernel with preemption
- * on.
- */
-static inline sector_t part_nr_sects_read(struct hd_struct *part)
-{
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
- sector_t nr_sects;
- unsigned seq;
- do {
- seq = read_seqcount_begin(&part->nr_sects_seq);
- nr_sects = part->nr_sects;
- } while (read_seqcount_retry(&part->nr_sects_seq, seq));
- return nr_sects;
-#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
- sector_t nr_sects;
-
- preempt_disable();
- nr_sects = part->nr_sects;
- preempt_enable();
- return nr_sects;
-#else
- return part->nr_sects;
-#endif
-}
-
-/*
- * Should be called with mutex lock held (typically bd_mutex) of partition
- * to provide mutual exlusion among writers otherwise seqcount might be
- * left in wrong state leaving the readers spinning infinitely.
- */
-static inline void part_nr_sects_write(struct hd_struct *part, sector_t size)
-{
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
- write_seqcount_begin(&part->nr_sects_seq);
- part->nr_sects = size;
- write_seqcount_end(&part->nr_sects_seq);
-#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
- preempt_disable();
- part->nr_sects = size;
- preempt_enable();
-#else
- part->nr_sects = size;
-#endif
-}
-
-#if defined(CONFIG_BLK_DEV_INTEGRITY)
-extern void blk_integrity_add(struct gendisk *);
-extern void blk_integrity_del(struct gendisk *);
-#else /* CONFIG_BLK_DEV_INTEGRITY */
-static inline void blk_integrity_add(struct gendisk *disk) { }
-static inline void blk_integrity_del(struct gendisk *disk) { }
-#endif /* CONFIG_BLK_DEV_INTEGRITY */
-
#else /* CONFIG_BLOCK */
static inline void printk_all_partitions(void) { }
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index f834687989f7..f6b942150631 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -506,7 +506,7 @@ i2c_register_board_info(int busnum, struct i2c_board_info const *info,
* @smbus_xfer_atomic: same as @smbus_xfer. Yet, only using atomic context
* so e.g. PMICs can be accessed very late before shutdown. Optional.
* @functionality: Return the flags that this algorithm/adapter pair supports
- * from the I2C_FUNC_* flags.
+ * from the ``I2C_FUNC_*`` flags.
* @reg_slave: Register given client to I2C slave mode of this adapter
* @unreg_slave: Unregister given client from I2C slave mode of this adapter
*
@@ -515,7 +515,7 @@ i2c_register_board_info(int busnum, struct i2c_board_info const *info,
* be addressed using the same bus algorithms - i.e. bit-banging or the PCF8584
* to name two of the most common.
*
- * The return codes from the @master_xfer{_atomic} fields should indicate the
+ * The return codes from the ``master_xfer{_atomic}`` fields should indicate the
* type of error code that occurred during the transfer, as documented in the
* Kernel Documentation file Documentation/i2c/fault-codes.rst.
*/
diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 7d3f2ced92d1..73c66a3a33ae 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -2102,14 +2102,14 @@ ieee80211_he_spr_size(const u8 *he_spr_ie)
{
struct ieee80211_he_spr *he_spr = (void *)he_spr_ie;
u8 spr_len = sizeof(struct ieee80211_he_spr);
- u32 he_spr_params;
+ u8 he_spr_params;
/* Make sure the input is not NULL */
if (!he_spr_ie)
return 0;
/* Calc required length */
- he_spr_params = le32_to_cpu(he_spr->he_sr_control);
+ he_spr_params = he_spr->he_sr_control;
if (he_spr_params & IEEE80211_HE_SPR_NON_SRG_OFFSET_PRESENT)
spr_len++;
if (he_spr_params & IEEE80211_HE_SPR_SRG_INFORMATION_PRESENT)
diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index 39faaaf843e1..c91cf2dee12a 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -2,15 +2,10 @@
#ifndef _INET_DIAG_H_
#define _INET_DIAG_H_ 1
+#include <net/netlink.h>
#include <uapi/linux/inet_diag.h>
-struct net;
-struct sock;
struct inet_hashinfo;
-struct nlattr;
-struct nlmsghdr;
-struct sk_buff;
-struct netlink_callback;
struct inet_diag_handler {
void (*dump)(struct sk_buff *skb,
@@ -62,6 +57,17 @@ int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk);
void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk);
+static inline size_t inet_diag_msg_attrs_size(void)
+{
+ return nla_total_size(1) /* INET_DIAG_SHUTDOWN */
+ + nla_total_size(1) /* INET_DIAG_TOS */
+#if IS_ENABLED(CONFIG_IPV6)
+ + nla_total_size(1) /* INET_DIAG_TCLASS */
+ + nla_total_size(1) /* INET_DIAG_SKV6ONLY */
+#endif
+ + nla_total_size(4) /* INET_DIAG_MARK */
+ + nla_total_size(4); /* INET_DIAG_CLASS_ID */
+}
int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
struct inet_diag_msg *r, int ext,
struct user_namespace *user_ns, bool net_admin);
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 4a16b39ae353..980234ae0312 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -123,6 +123,8 @@
#define dmar_readq(a) readq(a)
#define dmar_writeq(a,v) writeq(v,a)
+#define dmar_readl(a) readl(a)
+#define dmar_writel(a, v) writel(v, a)
#define DMAR_VER_MAJOR(v) (((v) & 0xf0) >> 4)
#define DMAR_VER_MINOR(v) ((v) & 0x0f)
diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h
index 837058bc1c9f..b336622612f3 100644
--- a/include/linux/io-mapping.h
+++ b/include/linux/io-mapping.h
@@ -16,7 +16,7 @@
* The io_mapping mechanism provides an abstraction for mapping
* individual pages from an io device to the CPU in an efficient fashion.
*
- * See Documentation/io-mapping.txt
+ * See Documentation/driver-api/io-mapping.rst
*/
struct io_mapping {
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index dba15ca8e60b..1dcd9198beb7 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -8,6 +8,7 @@
enum {
ICQ_EXITED = 1 << 2,
+ ICQ_DESTROYED = 1 << 3,
};
/*
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 2ca9b7056a82..cffa4714bfa8 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -57,8 +57,6 @@
#define VPRINTK(fmt, args...)
#endif /* ATA_DEBUG */
-#define BPRINTK(fmt, args...) if (ap->flags & ATA_FLAG_DEBUGMSG) printk(KERN_ERR "%s: " fmt, __func__, ## args)
-
#define ata_print_version_once(dev, version) \
({ \
static bool __print_once; \
@@ -176,6 +174,7 @@ enum {
ATA_DEV_NONE = 11, /* no device */
/* struct ata_link flags */
+ /* NOTE: struct ata_force_param currently stores lflags in u16 */
ATA_LFLAG_NO_HRST = (1 << 1), /* avoid hardreset */
ATA_LFLAG_NO_SRST = (1 << 2), /* avoid softreset */
ATA_LFLAG_ASSUME_ATA = (1 << 3), /* assume ATA class */
@@ -531,12 +530,14 @@ typedef int (*ata_reset_fn_t)(struct ata_link *link, unsigned int *classes,
unsigned long deadline);
typedef void (*ata_postreset_fn_t)(struct ata_link *link, unsigned int *classes);
-extern struct device_attribute dev_attr_link_power_management_policy;
extern struct device_attribute dev_attr_unload_heads;
+#ifdef CONFIG_SATA_HOST
+extern struct device_attribute dev_attr_link_power_management_policy;
extern struct device_attribute dev_attr_ncq_prio_enable;
extern struct device_attribute dev_attr_em_message_type;
extern struct device_attribute dev_attr_em_message;
extern struct device_attribute dev_attr_sw_activity;
+#endif
enum sw_activity {
OFF,
@@ -1020,10 +1021,6 @@ struct ata_timing {
/*
* Core layer - drivers/ata/libata-core.c
*/
-extern const unsigned long sata_deb_timing_normal[];
-extern const unsigned long sata_deb_timing_hotplug[];
-extern const unsigned long sata_deb_timing_long[];
-
extern struct ata_port_operations ata_dummy_port_ops;
extern const struct ata_port_info ata_dummy_port_info;
@@ -1061,33 +1058,14 @@ static inline int is_multi_taskfile(struct ata_taskfile *tf)
(tf->command == ATA_CMD_WRITE_MULTI_FUA_EXT);
}
-static inline const unsigned long *
-sata_ehc_deb_timing(struct ata_eh_context *ehc)
-{
- if (ehc->i.flags & ATA_EHI_HOTPLUGGED)
- return sata_deb_timing_hotplug;
- else
- return sata_deb_timing_normal;
-}
-
static inline int ata_port_is_dummy(struct ata_port *ap)
{
return ap->ops == &ata_dummy_port_ops;
}
-extern int sata_set_spd(struct ata_link *link);
extern int ata_std_prereset(struct ata_link *link, unsigned long deadline);
extern int ata_wait_after_reset(struct ata_link *link, unsigned long deadline,
int (*check_ready)(struct ata_link *link));
-extern int sata_link_debounce(struct ata_link *link,
- const unsigned long *params, unsigned long deadline);
-extern int sata_link_resume(struct ata_link *link, const unsigned long *params,
- unsigned long deadline);
-extern int sata_link_scr_lpm(struct ata_link *link, enum ata_lpm_policy policy,
- bool spm_wakeup);
-extern int sata_link_hardreset(struct ata_link *link,
- const unsigned long *timing, unsigned long deadline,
- bool *online, int (*check_ready)(struct ata_link *));
extern int sata_std_hardreset(struct ata_link *link, unsigned int *class,
unsigned long deadline);
extern void ata_std_postreset(struct ata_link *link, unsigned int *classes);
@@ -1095,7 +1073,6 @@ extern void ata_std_postreset(struct ata_link *link, unsigned int *classes);
extern struct ata_host *ata_host_alloc(struct device *dev, int max_ports);
extern struct ata_host *ata_host_alloc_pinfo(struct device *dev,
const struct ata_port_info * const * ppi, int n_ports);
-extern int ata_slave_link_init(struct ata_port *ap);
extern void ata_host_get(struct ata_host *host);
extern void ata_host_put(struct ata_host *host);
extern int ata_host_start(struct ata_host *host);
@@ -1117,22 +1094,6 @@ extern int ata_scsi_ioctl(struct scsi_device *dev, unsigned int cmd,
extern int ata_scsi_queuecmd(struct Scsi_Host *h, struct scsi_cmnd *cmd);
extern int ata_sas_scsi_ioctl(struct ata_port *ap, struct scsi_device *dev,
unsigned int cmd, void __user *arg);
-extern void ata_sas_port_destroy(struct ata_port *);
-extern struct ata_port *ata_sas_port_alloc(struct ata_host *,
- struct ata_port_info *, struct Scsi_Host *);
-extern void ata_sas_async_probe(struct ata_port *ap);
-extern int ata_sas_sync_probe(struct ata_port *ap);
-extern int ata_sas_port_init(struct ata_port *);
-extern int ata_sas_port_start(struct ata_port *ap);
-extern int ata_sas_tport_add(struct device *parent, struct ata_port *ap);
-extern void ata_sas_tport_delete(struct ata_port *ap);
-extern void ata_sas_port_stop(struct ata_port *ap);
-extern int ata_sas_slave_configure(struct scsi_device *, struct ata_port *);
-extern int ata_sas_queuecmd(struct scsi_cmnd *cmd, struct ata_port *ap);
-extern int sata_scr_valid(struct ata_link *link);
-extern int sata_scr_read(struct ata_link *link, int reg, u32 *val);
-extern int sata_scr_write(struct ata_link *link, int reg, u32 val);
-extern int sata_scr_write_flush(struct ata_link *link, int reg, u32 val);
extern bool ata_link_online(struct ata_link *link);
extern bool ata_link_offline(struct ata_link *link);
#ifdef CONFIG_PM
@@ -1153,9 +1114,6 @@ extern void ata_msleep(struct ata_port *ap, unsigned int msecs);
extern u32 ata_wait_register(struct ata_port *ap, void __iomem *reg, u32 mask,
u32 val, unsigned long interval, unsigned long timeout);
extern int atapi_cmd_type(u8 opcode);
-extern void ata_tf_to_fis(const struct ata_taskfile *tf,
- u8 pmp, int is_cmd, u8 *fis);
-extern void ata_tf_from_fis(const u8 *fis, struct ata_taskfile *tf);
extern unsigned long ata_pack_xfermask(unsigned long pio_mask,
unsigned long mwdma_mask, unsigned long udma_mask);
extern void ata_unpack_xfermask(unsigned long xfer_mask,
@@ -1179,7 +1137,6 @@ extern void ata_id_c_string(const u16 *id, unsigned char *s,
extern unsigned int ata_do_dev_read_id(struct ata_device *dev,
struct ata_taskfile *tf, u16 *id);
extern void ata_qc_complete(struct ata_queued_cmd *qc);
-extern int ata_qc_complete_multiple(struct ata_port *ap, u64 qc_active);
extern u64 ata_qc_get_active(struct ata_port *ap);
extern void ata_scsi_simulate(struct ata_device *dev, struct scsi_cmnd *cmd);
extern int ata_std_bios_param(struct scsi_device *sdev,
@@ -1196,7 +1153,96 @@ extern struct ata_device *ata_dev_pair(struct ata_device *adev);
extern int ata_do_set_mode(struct ata_link *link, struct ata_device **r_failed_dev);
extern void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap);
extern void ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *ap, struct list_head *eh_q);
+
+/*
+ * SATA specific code - drivers/ata/libata-sata.c
+ */
+#ifdef CONFIG_SATA_HOST
+extern const unsigned long sata_deb_timing_normal[];
+extern const unsigned long sata_deb_timing_hotplug[];
+extern const unsigned long sata_deb_timing_long[];
+
+static inline const unsigned long *
+sata_ehc_deb_timing(struct ata_eh_context *ehc)
+{
+ if (ehc->i.flags & ATA_EHI_HOTPLUGGED)
+ return sata_deb_timing_hotplug;
+ else
+ return sata_deb_timing_normal;
+}
+
+extern int sata_scr_valid(struct ata_link *link);
+extern int sata_scr_read(struct ata_link *link, int reg, u32 *val);
+extern int sata_scr_write(struct ata_link *link, int reg, u32 val);
+extern int sata_scr_write_flush(struct ata_link *link, int reg, u32 val);
+extern int sata_set_spd(struct ata_link *link);
+extern int sata_link_hardreset(struct ata_link *link,
+ const unsigned long *timing, unsigned long deadline,
+ bool *online, int (*check_ready)(struct ata_link *));
+extern int sata_link_resume(struct ata_link *link, const unsigned long *params,
+ unsigned long deadline);
+extern void ata_eh_analyze_ncq_error(struct ata_link *link);
+#else
+static inline const unsigned long *
+sata_ehc_deb_timing(struct ata_eh_context *ehc)
+{
+ return NULL;
+}
+static inline int sata_scr_valid(struct ata_link *link) { return 0; }
+static inline int sata_scr_read(struct ata_link *link, int reg, u32 *val)
+{
+ return -EOPNOTSUPP;
+}
+static inline int sata_scr_write(struct ata_link *link, int reg, u32 val)
+{
+ return -EOPNOTSUPP;
+}
+static inline int sata_scr_write_flush(struct ata_link *link, int reg, u32 val)
+{
+ return -EOPNOTSUPP;
+}
+static inline int sata_set_spd(struct ata_link *link) { return -EOPNOTSUPP; }
+static inline int sata_link_hardreset(struct ata_link *link,
+ const unsigned long *timing,
+ unsigned long deadline,
+ bool *online,
+ int (*check_ready)(struct ata_link *))
+{
+ if (online)
+ *online = false;
+ return -EOPNOTSUPP;
+}
+static inline int sata_link_resume(struct ata_link *link,
+ const unsigned long *params,
+ unsigned long deadline)
+{
+ return -EOPNOTSUPP;
+}
+static inline void ata_eh_analyze_ncq_error(struct ata_link *link) { }
+#endif
+extern int sata_link_debounce(struct ata_link *link,
+ const unsigned long *params, unsigned long deadline);
+extern int sata_link_scr_lpm(struct ata_link *link, enum ata_lpm_policy policy,
+ bool spm_wakeup);
+extern int ata_slave_link_init(struct ata_port *ap);
+extern void ata_sas_port_destroy(struct ata_port *);
+extern struct ata_port *ata_sas_port_alloc(struct ata_host *,
+ struct ata_port_info *, struct Scsi_Host *);
+extern void ata_sas_async_probe(struct ata_port *ap);
+extern int ata_sas_sync_probe(struct ata_port *ap);
+extern int ata_sas_port_init(struct ata_port *);
+extern int ata_sas_port_start(struct ata_port *ap);
+extern int ata_sas_tport_add(struct device *parent, struct ata_port *ap);
+extern void ata_sas_tport_delete(struct ata_port *ap);
+extern void ata_sas_port_stop(struct ata_port *ap);
+extern int ata_sas_slave_configure(struct scsi_device *, struct ata_port *);
+extern int ata_sas_queuecmd(struct scsi_cmnd *cmd, struct ata_port *ap);
+extern void ata_tf_to_fis(const struct ata_taskfile *tf,
+ u8 pmp, int is_cmd, u8 *fis);
+extern void ata_tf_from_fis(const u8 *fis, struct ata_taskfile *tf);
+extern int ata_qc_complete_multiple(struct ata_port *ap, u64 qc_active);
extern bool sata_lpm_ignore_phy_events(struct ata_link *link);
+extern int sata_async_notification(struct ata_port *ap);
extern int ata_cable_40wire(struct ata_port *ap);
extern int ata_cable_80wire(struct ata_port *ap);
@@ -1206,12 +1252,6 @@ extern int ata_cable_unknown(struct ata_port *ap);
/* Timing helpers */
extern unsigned int ata_pio_need_iordy(const struct ata_device *);
-extern const struct ata_timing *ata_timing_find_mode(u8 xfer_mode);
-extern int ata_timing_compute(struct ata_device *, unsigned short,
- struct ata_timing *, int, int);
-extern void ata_timing_merge(const struct ata_timing *,
- const struct ata_timing *, struct ata_timing *,
- unsigned int);
extern u8 ata_timing_cycle2mode(unsigned int xfer_shift, int cycle);
/* PCI */
@@ -1295,14 +1335,12 @@ extern void ata_port_wait_eh(struct ata_port *ap);
extern int ata_link_abort(struct ata_link *link);
extern int ata_port_abort(struct ata_port *ap);
extern int ata_port_freeze(struct ata_port *ap);
-extern int sata_async_notification(struct ata_port *ap);
extern void ata_eh_freeze_port(struct ata_port *ap);
extern void ata_eh_thaw_port(struct ata_port *ap);
extern void ata_eh_qc_complete(struct ata_queued_cmd *qc);
extern void ata_eh_qc_retry(struct ata_queued_cmd *qc);
-extern void ata_eh_analyze_ncq_error(struct ata_link *link);
extern void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset,
ata_reset_fn_t softreset, ata_reset_fn_t hardreset,
@@ -1343,7 +1381,7 @@ extern struct device_attribute *ata_common_sdev_attrs[];
* edge driver's module reference, otherwise the driver can be unloaded
* even if the scsi_device is being accessed.
*/
-#define ATA_BASE_SHT(drv_name) \
+#define __ATA_BASE_SHT(drv_name) \
.module = THIS_MODULE, \
.name = drv_name, \
.ioctl = ata_scsi_ioctl, \
@@ -1357,12 +1395,20 @@ extern struct device_attribute *ata_common_sdev_attrs[];
.slave_configure = ata_scsi_slave_config, \
.slave_destroy = ata_scsi_slave_destroy, \
.bios_param = ata_std_bios_param, \
- .unlock_native_capacity = ata_scsi_unlock_native_capacity, \
+ .unlock_native_capacity = ata_scsi_unlock_native_capacity
+
+#define ATA_BASE_SHT(drv_name) \
+ __ATA_BASE_SHT(drv_name), \
.sdev_attrs = ata_common_sdev_attrs
+#ifdef CONFIG_SATA_HOST
+extern struct device_attribute *ata_ncq_sdev_attrs[];
+
#define ATA_NCQ_SHT(drv_name) \
- ATA_BASE_SHT(drv_name), \
+ __ATA_BASE_SHT(drv_name), \
+ .sdev_attrs = ata_ncq_sdev_attrs, \
.change_queue_depth = ata_scsi_change_queue_depth
+#endif
/*
* PMP helpers
@@ -1635,6 +1681,8 @@ extern struct ata_device *ata_dev_next(struct ata_device *dev,
*/
static inline int ata_ncq_enabled(struct ata_device *dev)
{
+ if (!IS_ENABLED(CONFIG_SATA_HOST))
+ return 0;
return (dev->flags & (ATA_DFLAG_PIO | ATA_DFLAG_NCQ_OFF |
ATA_DFLAG_NCQ)) == ATA_DFLAG_NCQ;
}
@@ -1804,6 +1852,16 @@ static inline int ata_dma_enabled(struct ata_device *adev)
}
/**************************************************************************
+ * PATA timings - drivers/ata/libata-pata-timings.c
+ */
+extern const struct ata_timing *ata_timing_find_mode(u8 xfer_mode);
+extern int ata_timing_compute(struct ata_device *, unsigned short,
+ struct ata_timing *, int, int);
+extern void ata_timing_merge(const struct ata_timing *,
+ const struct ata_timing *, struct ata_timing *,
+ unsigned int);
+
+/**************************************************************************
* PMP - drivers/ata/libata-pmp.c
*/
#ifdef CONFIG_SATA_PMP
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index a7a0a1a5c8d5..e9ba01336d4e 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -695,6 +695,7 @@ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec,
void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
int val);
void __mod_lruvec_slab_state(void *p, enum node_stat_item idx, int val);
+void mod_memcg_obj_state(void *p, int idx, int val);
static inline void mod_lruvec_state(struct lruvec *lruvec,
enum node_stat_item idx, int val)
@@ -1123,6 +1124,10 @@ static inline void __mod_lruvec_slab_state(void *p, enum node_stat_item idx,
__mod_node_page_state(page_pgdat(page), idx, val);
}
+static inline void mod_memcg_obj_state(void *p, int idx, int val)
+{
+}
+
static inline
unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
gfp_t gfp_mask,
@@ -1427,6 +1432,8 @@ static inline int memcg_cache_id(struct mem_cgroup *memcg)
return memcg ? memcg->kmemcg_id : -1;
}
+struct mem_cgroup *mem_cgroup_from_obj(void *p);
+
#else
static inline int memcg_kmem_charge(struct page *page, gfp_t gfp, int order)
@@ -1468,6 +1475,11 @@ static inline void memcg_put_cache_ids(void)
{
}
+static inline struct mem_cgroup *mem_cgroup_from_obj(void *p)
+{
+ return NULL;
+}
+
#endif /* CONFIG_MEMCG_KMEM */
#endif /* _LINUX_MEMCONTROL_H */
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index ba703384bea0..4c5eb3aa8e72 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -333,6 +333,7 @@ struct mmc_host {
MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_SDR104 | \
MMC_CAP_UHS_DDR50)
#define MMC_CAP_SYNC_RUNTIME_PM (1 << 21) /* Synced runtime PM suspends. */
+#define MMC_CAP_NEED_RSP_BUSY (1 << 22) /* Commands with R1B can't use R1. */
#define MMC_CAP_DRIVER_TYPE_A (1 << 23) /* Host supports Driver Type A */
#define MMC_CAP_DRIVER_TYPE_C (1 << 24) /* Host supports Driver Type C */
#define MMC_CAP_DRIVER_TYPE_D (1 << 25) /* Host supports Driver Type D */
diff --git a/include/linux/msdos_partition.h b/include/linux/msdos_partition.h
new file mode 100644
index 000000000000..2cb82db2a43c
--- /dev/null
+++ b/include/linux/msdos_partition.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_MSDOS_PARTITION_H
+#define _LINUX_MSDOS_PARTITION_H
+
+#define MSDOS_LABEL_MAGIC 0xAA55
+
+struct msdos_partition {
+ u8 boot_ind; /* 0x80 - active */
+ u8 head; /* starting head */
+ u8 sector; /* starting sector */
+ u8 cyl; /* starting cylinder */
+ u8 sys_ind; /* What partition type */
+ u8 end_head; /* end head */
+ u8 end_sector; /* end sector */
+ u8 end_cyl; /* end cylinder */
+ __le32 start_sect; /* starting sector counting from 0 */
+ __le32 nr_sects; /* nr of sectors in partition */
+} __packed;
+
+enum msdos_sys_ind {
+ /*
+ * These three have identical behaviour; use the second one if DOS FDISK
+ * gets confused about extended/logical partitions starting past
+ * cylinder 1023.
+ */
+ DOS_EXTENDED_PARTITION = 5,
+ LINUX_EXTENDED_PARTITION = 0x85,
+ WIN98_EXTENDED_PARTITION = 0x0f,
+
+ LINUX_DATA_PARTITION = 0x83,
+ LINUX_LVM_PARTITION = 0x8e,
+ LINUX_RAID_PARTITION = 0xfd, /* autodetect RAID partition */
+
+ SOLARIS_X86_PARTITION = 0x82, /* also Linux swap partitions */
+ NEW_SOLARIS_X86_PARTITION = 0xbf,
+
+ DM6_AUX1PARTITION = 0x51, /* no DDO: use xlated geom */
+ DM6_AUX3PARTITION = 0x53, /* no DDO: use xlated geom */
+ DM6_PARTITION = 0x54, /* has DDO: use xlated geom & offset */
+ EZD_PARTITION = 0x55, /* EZ-DRIVE */
+
+ FREEBSD_PARTITION = 0xa5, /* FreeBSD Partition ID */
+ OPENBSD_PARTITION = 0xa6, /* OpenBSD Partition ID */
+ NETBSD_PARTITION = 0xa9, /* NetBSD Partition ID */
+ BSDI_PARTITION = 0xb7, /* BSDI Partition ID */
+ MINIX_PARTITION = 0x81, /* Minix Partition ID */
+ UNIXWARE_PARTITION = 0x63, /* Same as GNU_HURD and SCO Unix */
+};
+
+#endif /* LINUX_MSDOS_PARTITION_H */
diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 205fa7b1f07a..60739d0cbf93 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -115,6 +115,19 @@ static inline void nl_set_extack_cookie_u64(struct netlink_ext_ack *extack,
{
u64 __cookie = cookie;
+ if (!extack)
+ return;
+ memcpy(extack->cookie, &__cookie, sizeof(__cookie));
+ extack->cookie_len = sizeof(__cookie);
+}
+
+static inline void nl_set_extack_cookie_u32(struct netlink_ext_ack *extack,
+ u32 cookie)
+{
+ u32 __cookie = cookie;
+
+ if (!extack)
+ return;
memcpy(extack->cookie, &__cookie, sizeof(__cookie));
extack->cookie_len = sizeof(__cookie);
}
diff --git a/include/linux/of_clk.h b/include/linux/of_clk.h
index c86fcad23fc2..31b73a0da9db 100644
--- a/include/linux/of_clk.h
+++ b/include/linux/of_clk.h
@@ -11,17 +11,17 @@ struct of_device_id;
#if defined(CONFIG_COMMON_CLK) && defined(CONFIG_OF)
-unsigned int of_clk_get_parent_count(struct device_node *np);
-const char *of_clk_get_parent_name(struct device_node *np, int index);
+unsigned int of_clk_get_parent_count(const struct device_node *np);
+const char *of_clk_get_parent_name(const struct device_node *np, int index);
void of_clk_init(const struct of_device_id *matches);
#else /* !CONFIG_COMMON_CLK || !CONFIG_OF */
-static inline unsigned int of_clk_get_parent_count(struct device_node *np)
+static inline unsigned int of_clk_get_parent_count(const struct device_node *np)
{
return 0;
}
-static inline const char *of_clk_get_parent_name(struct device_node *np,
+static inline const char *of_clk_get_parent_name(const struct device_node *np,
int index)
{
return NULL;
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 1bf83c8fcaa7..77de28bfefb0 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -311,7 +311,7 @@ static inline int TestClearPage##uname(struct page *page) { return 0; }
__PAGEFLAG(Locked, locked, PF_NO_TAIL)
PAGEFLAG(Waiters, waiters, PF_ONLY_HEAD) __CLEARPAGEFLAG(Waiters, waiters, PF_ONLY_HEAD)
-PAGEFLAG(Error, error, PF_NO_COMPOUND) TESTCLEARFLAG(Error, error, PF_NO_COMPOUND)
+PAGEFLAG(Error, error, PF_NO_TAIL) TESTCLEARFLAG(Error, error, PF_NO_TAIL)
PAGEFLAG(Referenced, referenced, PF_HEAD)
TESTCLEARFLAG(Referenced, referenced, PF_HEAD)
__SETPAGEFLAG(Referenced, referenced, PF_HEAD)
diff --git a/include/linux/part_stat.h b/include/linux/part_stat.h
new file mode 100644
index 000000000000..ece607607a86
--- /dev/null
+++ b/include/linux/part_stat.h
@@ -0,0 +1,115 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_PART_STAT_H
+#define _LINUX_PART_STAT_H
+
+#include <linux/genhd.h>
+
+/*
+ * Macros to operate on percpu disk statistics:
+ *
+ * {disk|part|all}_stat_{add|sub|inc|dec}() modify the stat counters
+ * and should be called between disk_stat_lock() and
+ * disk_stat_unlock().
+ *
+ * part_stat_read() can be called at any time.
+ *
+ * part_stat_{add|set_all}() and {init|free}_part_stats are for
+ * internal use only.
+ */
+#ifdef CONFIG_SMP
+#define part_stat_lock() ({ rcu_read_lock(); get_cpu(); })
+#define part_stat_unlock() do { put_cpu(); rcu_read_unlock(); } while (0)
+
+#define part_stat_get_cpu(part, field, cpu) \
+ (per_cpu_ptr((part)->dkstats, (cpu))->field)
+
+#define part_stat_get(part, field) \
+ part_stat_get_cpu(part, field, smp_processor_id())
+
+#define part_stat_read(part, field) \
+({ \
+ typeof((part)->dkstats->field) res = 0; \
+ unsigned int _cpu; \
+ for_each_possible_cpu(_cpu) \
+ res += per_cpu_ptr((part)->dkstats, _cpu)->field; \
+ res; \
+})
+
+static inline void part_stat_set_all(struct hd_struct *part, int value)
+{
+ int i;
+
+ for_each_possible_cpu(i)
+ memset(per_cpu_ptr(part->dkstats, i), value,
+ sizeof(struct disk_stats));
+}
+
+static inline int init_part_stats(struct hd_struct *part)
+{
+ part->dkstats = alloc_percpu(struct disk_stats);
+ if (!part->dkstats)
+ return 0;
+ return 1;
+}
+
+static inline void free_part_stats(struct hd_struct *part)
+{
+ free_percpu(part->dkstats);
+}
+
+#else /* !CONFIG_SMP */
+#define part_stat_lock() ({ rcu_read_lock(); 0; })
+#define part_stat_unlock() rcu_read_unlock()
+
+#define part_stat_get(part, field) ((part)->dkstats.field)
+#define part_stat_get_cpu(part, field, cpu) part_stat_get(part, field)
+#define part_stat_read(part, field) part_stat_get(part, field)
+
+static inline void part_stat_set_all(struct hd_struct *part, int value)
+{
+ memset(&part->dkstats, value, sizeof(struct disk_stats));
+}
+
+static inline int init_part_stats(struct hd_struct *part)
+{
+ return 1;
+}
+
+static inline void free_part_stats(struct hd_struct *part)
+{
+}
+
+#endif /* CONFIG_SMP */
+
+#define part_stat_read_accum(part, field) \
+ (part_stat_read(part, field[STAT_READ]) + \
+ part_stat_read(part, field[STAT_WRITE]) + \
+ part_stat_read(part, field[STAT_DISCARD]))
+
+#define __part_stat_add(part, field, addnd) \
+ (part_stat_get(part, field) += (addnd))
+
+#define part_stat_add(part, field, addnd) do { \
+ __part_stat_add((part), field, addnd); \
+ if ((part)->partno) \
+ __part_stat_add(&part_to_disk((part))->part0, \
+ field, addnd); \
+} while (0)
+
+#define part_stat_dec(gendiskp, field) \
+ part_stat_add(gendiskp, field, -1)
+#define part_stat_inc(gendiskp, field) \
+ part_stat_add(gendiskp, field, 1)
+#define part_stat_sub(gendiskp, field, subnd) \
+ part_stat_add(gendiskp, field, -subnd)
+
+#define part_stat_local_dec(gendiskp, field) \
+ local_dec(&(part_stat_get(gendiskp, field)))
+#define part_stat_local_inc(gendiskp, field) \
+ local_inc(&(part_stat_get(gendiskp, field)))
+#define part_stat_local_read(gendiskp, field) \
+ local_read(&(part_stat_get(gendiskp, field)))
+#define part_stat_local_read_cpu(gendiskp, field, cpu) \
+ local_read(&(part_stat_get_cpu(gendiskp, field, cpu)))
+
+#endif /* _LINUX_PART_STAT_H */
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 352c0d708720..977e66875a96 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -148,6 +148,8 @@
/* Vendors and devices. Sort key: vendor first, device next. */
+#define PCI_VENDOR_ID_LOONGSON 0x0014
+
#define PCI_VENDOR_ID_TTTECH 0x0357
#define PCI_DEVICE_ID_TTTECH_MC322 0x000a
diff --git a/include/linux/phy.h b/include/linux/phy.h
index c570e162e05e..452e8ba8665f 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -357,6 +357,7 @@ struct macsec_ops;
* is_gigabit_capable: Set to true if PHY supports 1000Mbps
* has_fixups: Set to true if this phy has fixups/quirks.
* suspended: Set to true if this phy has been suspended successfully.
+ * suspended_by_mdio_bus: Set to true if this phy was suspended by MDIO bus.
* sysfs_links: Internal boolean tracking sysfs symbolic links setup/removal.
* loopback_enabled: Set true if this phy has been loopbacked successfully.
* state: state of the PHY for management purposes
@@ -396,6 +397,7 @@ struct phy_device {
unsigned is_gigabit_capable:1;
unsigned has_fixups:1;
unsigned suspended:1;
+ unsigned suspended_by_mdio_bus:1;
unsigned sysfs_links:1;
unsigned loopback_enabled:1;
@@ -557,6 +559,7 @@ struct phy_driver {
/*
* Checks if the PHY generated an interrupt.
* For multi-PHY devices with shared PHY interrupt pin
+ * Set interrupt bits have to be cleared.
*/
int (*did_interrupt)(struct phy_device *phydev);
diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index 276a03c24691..041bfa412aa0 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -24,7 +24,7 @@ struct platform_device {
int id;
bool id_auto;
struct device dev;
- u64 dma_mask;
+ u64 platform_dma_mask;
u32 num_resources;
struct resource *resource;
diff --git a/include/linux/raid/detect.h b/include/linux/raid/detect.h
new file mode 100644
index 000000000000..37dd3f40cd31
--- /dev/null
+++ b/include/linux/raid/detect.h
@@ -0,0 +1,3 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+void md_autodetect_dev(dev_t dev);
diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index beb9a9da1699..70ebef866cc8 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -972,9 +972,9 @@ static inline int rhashtable_lookup_insert_key(
/**
* rhashtable_lookup_get_insert_key - lookup and insert object into hash table
* @ht: hash table
+ * @key: key
* @obj: pointer to hash head inside object
* @params: hash table parameters
- * @data: pointer to element data already in hashes
*
* Just like rhashtable_lookup_insert_key(), but this function returns the
* object if it exists, NULL if it does not and the insertion was successful,
diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
index 03583b6d1416..4192369b8418 100644
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -7,7 +7,8 @@
#define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC | \
SECCOMP_FILTER_FLAG_LOG | \
SECCOMP_FILTER_FLAG_SPEC_ALLOW | \
- SECCOMP_FILTER_FLAG_NEW_LISTENER)
+ SECCOMP_FILTER_FLAG_NEW_LISTENER | \
+ SECCOMP_FILTER_FLAG_TSYNC_ESRCH)
#ifdef CONFIG_SECCOMP
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 5b50278c4bc8..e59620234415 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -645,8 +645,8 @@ typedef unsigned char *sk_buff_data_t;
* @offload_l3_fwd_mark: Packet was L3-forwarded in hardware
* @tc_skip_classify: do not classify packet. set by IFB device
* @tc_at_ingress: used within tc_classify to distinguish in/egress
- * @tc_redirected: packet was redirected by a tc action
- * @tc_from_ingress: if tc_redirected, tc_at_ingress at time of redirect
+ * @redirected: packet was redirected by packet classifier
+ * @from_ingress: packet was redirected from the ingress path
* @peeked: this packet has been seen already, so stats have been
* done for it, don't do them again
* @nf_trace: netfilter packet trace flag
@@ -848,8 +848,10 @@ struct sk_buff {
#ifdef CONFIG_NET_CLS_ACT
__u8 tc_skip_classify:1;
__u8 tc_at_ingress:1;
- __u8 tc_redirected:1;
- __u8 tc_from_ingress:1;
+#endif
+#ifdef CONFIG_NET_REDIRECT
+ __u8 redirected:1;
+ __u8 from_ingress:1;
#endif
#ifdef CONFIG_TLS_DEVICE
__u8 decrypted:1;
@@ -4579,5 +4581,31 @@ static inline __wsum lco_csum(struct sk_buff *skb)
return csum_partial(l4_hdr, csum_start - l4_hdr, partial);
}
+static inline bool skb_is_redirected(const struct sk_buff *skb)
+{
+#ifdef CONFIG_NET_REDIRECT
+ return skb->redirected;
+#else
+ return false;
+#endif
+}
+
+static inline void skb_set_redirected(struct sk_buff *skb, bool from_ingress)
+{
+#ifdef CONFIG_NET_REDIRECT
+ skb->redirected = 1;
+ skb->from_ingress = from_ingress;
+ if (skb->from_ingress)
+ skb->tstamp = 0;
+#endif
+}
+
+static inline void skb_reset_redirect(struct sk_buff *skb)
+{
+#ifdef CONFIG_NET_REDIRECT
+ skb->redirected = 0;
+#endif
+}
+
#endif /* __KERNEL__ */
#endif /* _LINUX_SKBUFF_H */
diff --git a/include/linux/socket.h b/include/linux/socket.h
index 2d2313403101..54338fac45cb 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -391,6 +391,10 @@ extern int recvmsg_copy_msghdr(struct msghdr *msg,
struct user_msghdr __user *umsg, unsigned flags,
struct sockaddr __user **uaddr,
struct iovec **iov);
+extern int __copy_msghdr_from_user(struct msghdr *kmsg,
+ struct user_msghdr __user *umsg,
+ struct sockaddr __user **save_addr,
+ struct iovec __user **uiov, size_t *nsegs);
/* helpers which do the actual work for syscalls */
extern int __sys_recvfrom(int fd, void __user *ubuf, size_t size,
@@ -401,7 +405,8 @@ extern int __sys_sendto(int fd, void __user *buff, size_t len,
int addr_len);
extern int __sys_accept4_file(struct file *file, unsigned file_flags,
struct sockaddr __user *upeer_sockaddr,
- int __user *upeer_addrlen, int flags);
+ int __user *upeer_addrlen, int flags,
+ unsigned long nofile);
extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
int __user *upeer_addrlen, int flags);
extern int __sys_socket(int family, int type, int protocol);
diff --git a/include/linux/splice.h b/include/linux/splice.h
index 74b4911ac16d..ebbbfea48aa0 100644
--- a/include/linux/splice.h
+++ b/include/linux/splice.h
@@ -78,6 +78,9 @@ extern ssize_t add_to_pipe(struct pipe_inode_info *,
struct pipe_buffer *);
extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *,
splice_direct_actor *);
+extern long do_splice(struct file *in, loff_t __user *off_in,
+ struct file *out, loff_t __user *off_out,
+ size_t len, unsigned int flags);
/*
* for dynamic pipe sizing
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index ec3813236699..0507a162ccd0 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -141,8 +141,9 @@ extern int remap_vmalloc_range_partial(struct vm_area_struct *vma,
extern int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
unsigned long pgoff);
-void vmalloc_sync_all(void);
-
+void vmalloc_sync_mappings(void);
+void vmalloc_sync_unmappings(void);
+
/*
* Lowlevel-APIs (not for driver use!)
*/
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 4261d1c6e87b..e48554e6526c 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -487,6 +487,19 @@ extern void wq_worker_comm(char *buf, size_t size, struct task_struct *task);
*
* We queue the work to the CPU on which it was submitted, but if the CPU dies
* it can be processed by another CPU.
+ *
+ * Memory-ordering properties: If it returns %true, guarantees that all stores
+ * preceding the call to queue_work() in the program order will be visible from
+ * the CPU which will execute @work by the time such work executes, e.g.,
+ *
+ * { x is initially 0 }
+ *
+ * CPU0 CPU1
+ *
+ * WRITE_ONCE(x, 1); [ @work is being executed ]
+ * r0 = queue_work(wq, work); r1 = READ_ONCE(x);
+ *
+ * Forbids: r0 == true && r1 == 0
*/
static inline bool queue_work(struct workqueue_struct *wq,
struct work_struct *work)
@@ -546,6 +559,9 @@ static inline bool schedule_work_on(int cpu, struct work_struct *work)
* This puts a job in the kernel-global workqueue if it was not already
* queued and leaves it in the same position on the kernel-global
* workqueue otherwise.
+ *
+ * Shares the same memory-ordering properties of queue_work(), cf. the
+ * DocBook header of queue_work().
*/
static inline bool schedule_work(struct work_struct *work)
{
diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h
index 1abae3c340a5..04e97bab6f28 100644
--- a/include/net/af_rxrpc.h
+++ b/include/net/af_rxrpc.h
@@ -16,6 +16,12 @@ struct sock;
struct socket;
struct rxrpc_call;
+enum rxrpc_interruptibility {
+ RXRPC_INTERRUPTIBLE, /* Call is interruptible */
+ RXRPC_PREINTERRUPTIBLE, /* Call can be cancelled whilst waiting for a slot */
+ RXRPC_UNINTERRUPTIBLE, /* Call should not be interruptible at all */
+};
+
/*
* Debug ID counter for tracing.
*/
@@ -41,7 +47,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *,
gfp_t,
rxrpc_notify_rx_t,
bool,
- bool,
+ enum rxrpc_interruptibility,
unsigned int);
int rxrpc_kernel_send_data(struct socket *, struct rxrpc_call *,
struct msghdr *, size_t,
@@ -58,9 +64,7 @@ int rxrpc_kernel_charge_accept(struct socket *, rxrpc_notify_rx_t,
rxrpc_user_attach_call_t, unsigned long, gfp_t,
unsigned int);
void rxrpc_kernel_set_tx_length(struct socket *, struct rxrpc_call *, s64);
-bool rxrpc_kernel_check_life(const struct socket *, const struct rxrpc_call *,
- u32 *);
-void rxrpc_kernel_probe_life(struct socket *, struct rxrpc_call *);
+bool rxrpc_kernel_check_life(const struct socket *, const struct rxrpc_call *);
u32 rxrpc_kernel_get_epoch(struct socket *, struct rxrpc_call *);
bool rxrpc_kernel_get_reply_time(struct socket *, struct rxrpc_call *,
ktime_t *);
diff --git a/include/net/compat.h b/include/net/compat.h
index f277653c7e17..e341260642fe 100644
--- a/include/net/compat.h
+++ b/include/net/compat.h
@@ -38,6 +38,9 @@ struct compat_cmsghdr {
#define compat_mmsghdr mmsghdr
#endif /* defined(CONFIG_COMPAT) */
+int __get_compat_msghdr(struct msghdr *kmsg, struct compat_msghdr __user *umsg,
+ struct sockaddr __user **save_addr, compat_uptr_t *ptr,
+ compat_size_t *len);
int get_compat_msghdr(struct msghdr *, struct compat_msghdr __user *,
struct sockaddr __user **, struct iovec **);
struct sock_fprog __user *get_compat_bpf_fprog(char __user *optval);
diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index 54e227e6b06a..a259050f84af 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -108,6 +108,7 @@ struct fib_rule_notifier_info {
[FRA_OIFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \
[FRA_PRIORITY] = { .type = NLA_U32 }, \
[FRA_FWMARK] = { .type = NLA_U32 }, \
+ [FRA_TUN_ID] = { .type = NLA_U64 }, \
[FRA_FWMASK] = { .type = NLA_U32 }, \
[FRA_TABLE] = { .type = NLA_U32 }, \
[FRA_SUPPRESS_PREFIXLEN] = { .type = NLA_U32 }, \
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 151208704ed2..c30f914867e6 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -675,22 +675,6 @@ void __qdisc_calculate_pkt_len(struct sk_buff *skb,
const struct qdisc_size_table *stab);
int skb_do_redirect(struct sk_buff *);
-static inline void skb_reset_tc(struct sk_buff *skb)
-{
-#ifdef CONFIG_NET_CLS_ACT
- skb->tc_redirected = 0;
-#endif
-}
-
-static inline bool skb_is_tc_redirected(const struct sk_buff *skb)
-{
-#ifdef CONFIG_NET_CLS_ACT
- return skb->tc_redirected;
-#else
- return false;
-#endif
-}
-
static inline bool skb_at_tc_ingress(const struct sk_buff *skb)
{
#ifdef CONFIG_NET_CLS_ACT
diff --git a/include/scsi/scsicam.h b/include/scsi/scsicam.h
index 57c729254569..08edd603e521 100644
--- a/include/scsi/scsicam.h
+++ b/include/scsi/scsicam.h
@@ -13,8 +13,7 @@
#ifndef SCSICAM_H
#define SCSICAM_H
-extern int scsicam_bios_param (struct block_device *bdev, sector_t capacity, int *ip);
-extern int scsi_partsize(unsigned char *buf, unsigned long capacity,
- unsigned int *cyls, unsigned int *hds, unsigned int *secs);
-extern unsigned char *scsi_bios_ptable(struct block_device *bdev);
+int scsicam_bios_param(struct block_device *bdev, sector_t capacity, int *ip);
+bool scsi_partsize(struct block_device *bdev, sector_t capacity, int geom[3]);
+unsigned char *scsi_bios_ptable(struct block_device *bdev);
#endif /* def SCSICAM_H */
diff --git a/include/soc/mscc/ocelot_dev.h b/include/soc/mscc/ocelot_dev.h
index 0a50d53bbd3f..7c08437061fc 100644
--- a/include/soc/mscc/ocelot_dev.h
+++ b/include/soc/mscc/ocelot_dev.h
@@ -74,7 +74,7 @@
#define DEV_MAC_TAGS_CFG_TAG_ID_M GENMASK(31, 16)
#define DEV_MAC_TAGS_CFG_TAG_ID_X(x) (((x) & GENMASK(31, 16)) >> 16)
#define DEV_MAC_TAGS_CFG_VLAN_LEN_AWR_ENA BIT(2)
-#define DEV_MAC_TAGS_CFG_PB_ENA BIT(1)
+#define DEV_MAC_TAGS_CFG_VLAN_DBL_AWR_ENA BIT(1)
#define DEV_MAC_TAGS_CFG_VLAN_AWR_ENA BIT(0)
#define DEV_MAC_ADV_CHK_CFG 0x2c
diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h
index 564ba1b5cf57..c612cabbc378 100644
--- a/include/trace/events/afs.h
+++ b/include/trace/events/afs.h
@@ -233,7 +233,7 @@ enum afs_cb_break_reason {
EM(afs_call_trace_get, "GET ") \
EM(afs_call_trace_put, "PUT ") \
EM(afs_call_trace_wake, "WAKE ") \
- E_(afs_call_trace_work, "WORK ")
+ E_(afs_call_trace_work, "QUEUE")
#define afs_server_traces \
EM(afs_server_trace_alloc, "ALLOC ") \
diff --git a/include/trace/events/io_uring.h b/include/trace/events/io_uring.h
index 27bd9e4f927b..9f0d3b7d56b0 100644
--- a/include/trace/events/io_uring.h
+++ b/include/trace/events/io_uring.h
@@ -357,6 +357,109 @@ TRACE_EVENT(io_uring_submit_sqe,
__entry->force_nonblock, __entry->sq_thread)
);
+TRACE_EVENT(io_uring_poll_arm,
+
+ TP_PROTO(void *ctx, u8 opcode, u64 user_data, int mask, int events),
+
+ TP_ARGS(ctx, opcode, user_data, mask, events),
+
+ TP_STRUCT__entry (
+ __field( void *, ctx )
+ __field( u8, opcode )
+ __field( u64, user_data )
+ __field( int, mask )
+ __field( int, events )
+ ),
+
+ TP_fast_assign(
+ __entry->ctx = ctx;
+ __entry->opcode = opcode;
+ __entry->user_data = user_data;
+ __entry->mask = mask;
+ __entry->events = events;
+ ),
+
+ TP_printk("ring %p, op %d, data 0x%llx, mask 0x%x, events 0x%x",
+ __entry->ctx, __entry->opcode,
+ (unsigned long long) __entry->user_data,
+ __entry->mask, __entry->events)
+);
+
+TRACE_EVENT(io_uring_poll_wake,
+
+ TP_PROTO(void *ctx, u8 opcode, u64 user_data, int mask),
+
+ TP_ARGS(ctx, opcode, user_data, mask),
+
+ TP_STRUCT__entry (
+ __field( void *, ctx )
+ __field( u8, opcode )
+ __field( u64, user_data )
+ __field( int, mask )
+ ),
+
+ TP_fast_assign(
+ __entry->ctx = ctx;
+ __entry->opcode = opcode;
+ __entry->user_data = user_data;
+ __entry->mask = mask;
+ ),
+
+ TP_printk("ring %p, op %d, data 0x%llx, mask 0x%x",
+ __entry->ctx, __entry->opcode,
+ (unsigned long long) __entry->user_data,
+ __entry->mask)
+);
+
+TRACE_EVENT(io_uring_task_add,
+
+ TP_PROTO(void *ctx, u8 opcode, u64 user_data, int mask),
+
+ TP_ARGS(ctx, opcode, user_data, mask),
+
+ TP_STRUCT__entry (
+ __field( void *, ctx )
+ __field( u8, opcode )
+ __field( u64, user_data )
+ __field( int, mask )
+ ),
+
+ TP_fast_assign(
+ __entry->ctx = ctx;
+ __entry->opcode = opcode;
+ __entry->user_data = user_data;
+ __entry->mask = mask;
+ ),
+
+ TP_printk("ring %p, op %d, data 0x%llx, mask %x",
+ __entry->ctx, __entry->opcode,
+ (unsigned long long) __entry->user_data,
+ __entry->mask)
+);
+
+TRACE_EVENT(io_uring_task_run,
+
+ TP_PROTO(void *ctx, u8 opcode, u64 user_data),
+
+ TP_ARGS(ctx, opcode, user_data),
+
+ TP_STRUCT__entry (
+ __field( void *, ctx )
+ __field( u8, opcode )
+ __field( u64, user_data )
+ ),
+
+ TP_fast_assign(
+ __entry->ctx = ctx;
+ __entry->opcode = opcode;
+ __entry->user_data = user_data;
+ ),
+
+ TP_printk("ring %p, op %d, data 0x%llx",
+ __entry->ctx, __entry->opcode,
+ (unsigned long long) __entry->user_data)
+);
+
#endif /* _TRACE_IO_URING_H */
/* This part must be outside protection */
diff --git a/include/uapi/linux/fdreg.h b/include/uapi/linux/fdreg.h
index 5e2981d5c523..1318881954e1 100644
--- a/include/uapi/linux/fdreg.h
+++ b/include/uapi/linux/fdreg.h
@@ -7,26 +7,18 @@
* Handbook", Sanches and Canton.
*/
-#ifdef FDPATCHES
-#define FD_IOPORT fdc_state[fdc].address
-#else
-/* It would be a lot saner just to force fdc_state[fdc].address to always
- be set ! FIXME */
-#define FD_IOPORT 0x3f0
-#endif
-
/* Fd controller regs. S&C, about page 340 */
-#define FD_STATUS (4 + FD_IOPORT )
-#define FD_DATA (5 + FD_IOPORT )
+#define FD_STATUS 4
+#define FD_DATA 5
/* Digital Output Register */
-#define FD_DOR (2 + FD_IOPORT )
+#define FD_DOR 2
/* Digital Input Register (read) */
-#define FD_DIR (7 + FD_IOPORT )
+#define FD_DIR 7
/* Diskette Control Register (write)*/
-#define FD_DCR (7 + FD_IOPORT )
+#define FD_DCR 7
/* Bits of main status register */
#define STATUS_BUSYMASK 0x0F /* drive busy mask */
diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h
index 1521073b6348..8533bf07450f 100644
--- a/include/uapi/linux/in.h
+++ b/include/uapi/linux/in.h
@@ -74,6 +74,8 @@ enum {
#define IPPROTO_UDPLITE IPPROTO_UDPLITE
IPPROTO_MPLS = 137, /* MPLS in IP (RFC 4023) */
#define IPPROTO_MPLS IPPROTO_MPLS
+ IPPROTO_ETHERNET = 143, /* Ethernet-within-IPv6 Encapsulation */
+#define IPPROTO_ETHERNET IPPROTO_ETHERNET
IPPROTO_RAW = 255, /* Raw IP packets */
#define IPPROTO_RAW IPPROTO_RAW
IPPROTO_MPTCP = 262, /* Multipath TCP connection */
diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h
index 0f1db1cccc3f..6923dc7e0298 100644
--- a/include/uapi/linux/input-event-codes.h
+++ b/include/uapi/linux/input-event-codes.h
@@ -652,6 +652,9 @@
/* Electronic privacy screen control */
#define KEY_PRIVACY_SCREEN_TOGGLE 0x279
+/* Select an area of screen to be copied */
+#define KEY_SELECTIVE_SCREENSHOT 0x27a
+
/*
* Some keyboards have keys which do not have a defined meaning, these keys
* are intended to be programmed / bound to macros by the user. For most
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 3f7961c1c243..e48d746b8e2a 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT */
/*
* Header file for the io_uring interface.
*
@@ -23,7 +23,10 @@ struct io_uring_sqe {
__u64 off; /* offset into file */
__u64 addr2;
};
- __u64 addr; /* pointer to buffer or iovecs */
+ union {
+ __u64 addr; /* pointer to buffer or iovecs */
+ __u64 splice_off_in;
+ };
__u32 len; /* buffer size or number of iovecs */
union {
__kernel_rwf_t rw_flags;
@@ -37,14 +40,21 @@ struct io_uring_sqe {
__u32 open_flags;
__u32 statx_flags;
__u32 fadvise_advice;
+ __u32 splice_flags;
};
__u64 user_data; /* data to be passed back at completion time */
union {
struct {
- /* index into fixed buffers, if used */
- __u16 buf_index;
+ /* pack this to avoid bogus arm OABI complaints */
+ union {
+ /* index into fixed buffers, if used */
+ __u16 buf_index;
+ /* for grouped buffer selection */
+ __u16 buf_group;
+ } __attribute__((packed));
/* personality to use, if used */
__u16 personality;
+ __s32 splice_fd_in;
};
__u64 __pad2[3];
};
@@ -56,6 +66,7 @@ enum {
IOSQE_IO_LINK_BIT,
IOSQE_IO_HARDLINK_BIT,
IOSQE_ASYNC_BIT,
+ IOSQE_BUFFER_SELECT_BIT,
};
/*
@@ -71,6 +82,8 @@ enum {
#define IOSQE_IO_HARDLINK (1U << IOSQE_IO_HARDLINK_BIT)
/* always go async */
#define IOSQE_ASYNC (1U << IOSQE_ASYNC_BIT)
+/* select buffer from sqe->buf_group */
+#define IOSQE_BUFFER_SELECT (1U << IOSQE_BUFFER_SELECT_BIT)
/*
* io_uring_setup() flags
@@ -113,6 +126,9 @@ enum {
IORING_OP_RECV,
IORING_OP_OPENAT2,
IORING_OP_EPOLL_CTL,
+ IORING_OP_SPLICE,
+ IORING_OP_PROVIDE_BUFFERS,
+ IORING_OP_REMOVE_BUFFERS,
/* this goes last, obviously */
IORING_OP_LAST,
@@ -129,6 +145,12 @@ enum {
#define IORING_TIMEOUT_ABS (1U << 0)
/*
+ * sqe->splice_flags
+ * extends splice(2) flags
+ */
+#define SPLICE_F_FD_IN_FIXED (1U << 31) /* the last bit of __u32 */
+
+/*
* IO completion data structure (Completion Queue Entry)
*/
struct io_uring_cqe {
@@ -138,6 +160,17 @@ struct io_uring_cqe {
};
/*
+ * cqe->flags
+ *
+ * IORING_CQE_F_BUFFER If set, the upper 16 bits are the buffer ID
+ */
+#define IORING_CQE_F_BUFFER (1U << 0)
+
+enum {
+ IORING_CQE_BUFFER_SHIFT = 16,
+};
+
+/*
* Magic offsets for the application to mmap the data it needs
*/
#define IORING_OFF_SQ_RING 0ULL
@@ -204,6 +237,7 @@ struct io_uring_params {
#define IORING_FEAT_SUBMIT_STABLE (1U << 2)
#define IORING_FEAT_RW_CUR_POS (1U << 3)
#define IORING_FEAT_CUR_PERSONALITY (1U << 4)
+#define IORING_FEAT_FAST_POLL (1U << 5)
/*
* io_uring_register(2) opcodes and arguments
diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index be84d87f1f46..c1735455bc53 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -22,6 +22,7 @@
#define SECCOMP_FILTER_FLAG_LOG (1UL << 1)
#define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2)
#define SECCOMP_FILTER_FLAG_NEW_LISTENER (1UL << 3)
+#define SECCOMP_FILTER_FLAG_TSYNC_ESRCH (1UL << 4)
/*
* All BPF programs must return a 32-bit value.
diff --git a/include/uapi/linux/serio.h b/include/uapi/linux/serio.h
index 50e991952c97..ed2a96f43ce4 100644
--- a/include/uapi/linux/serio.h
+++ b/include/uapi/linux/serio.h
@@ -9,7 +9,7 @@
#ifndef _UAPI_SERIO_H
#define _UAPI_SERIO_H
-
+#include <linux/const.h>
#include <linux/ioctl.h>
#define SPIOCSTYPE _IOW('q', 0x01, unsigned long)
@@ -18,10 +18,10 @@
/*
* bit masks for use in "interrupt" flags (3rd argument)
*/
-#define SERIO_TIMEOUT BIT(0)
-#define SERIO_PARITY BIT(1)
-#define SERIO_FRAME BIT(2)
-#define SERIO_OOB_DATA BIT(3)
+#define SERIO_TIMEOUT _BITUL(0)
+#define SERIO_PARITY _BITUL(1)
+#define SERIO_FRAME _BITUL(2)
+#define SERIO_OOB_DATA _BITUL(3)
/*
* Serio types
diff --git a/init/Kconfig b/init/Kconfig
index 20a6ac33761c..4f717bfdbfe2 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -767,8 +767,7 @@ config ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
bool
config CC_HAS_INT128
- def_bool y
- depends on !$(cc-option,-D__SIZEOF_INT128__=0)
+ def_bool !$(cc-option,$(m64-flag) -D__SIZEOF_INT128__=0) && 64BIT
#
# For architectures that know their GCC __int128 support is sound
diff --git a/init/do_mounts.c b/init/do_mounts.c
index 0ae9cc22f2ae..29d326b6c29d 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -429,12 +429,10 @@ void __init mount_block_root(char *name, int flags)
struct page *page = alloc_page(GFP_KERNEL);
char *fs_names = page_address(page);
char *p;
-#ifdef CONFIG_BLOCK
char b[BDEVNAME_SIZE];
-#else
- const char *b = name;
-#endif
+ scnprintf(b, BDEVNAME_SIZE, "unknown-block(%u,%u)",
+ MAJOR(ROOT_DEV), MINOR(ROOT_DEV));
get_fs_names(fs_names);
retry:
for (p = fs_names; *p; p += strlen(p)+1) {
@@ -451,9 +449,6 @@ retry:
* and bad superblock on root device.
* and give them a list of the available devices
*/
-#ifdef CONFIG_BLOCK
- __bdevname(ROOT_DEV, b);
-#endif
printk("VFS: Cannot open root device \"%s\" or %s: error %d\n",
root_device_name, b, err);
printk("Please append a correct \"root=\" boot option; here are the available partitions:\n");
@@ -476,9 +471,6 @@ retry:
for (p = fs_names; *p; p += strlen(p)+1)
printk(" %s", p);
printk("\n");
-#ifdef CONFIG_BLOCK
- __bdevname(ROOT_DEV, b);
-#endif
panic("VFS: Unable to mount root fs on %s", b);
out:
put_page(page);
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index 042f95534f86..68a89a9f7ccd 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -482,13 +482,21 @@ static int bpf_struct_ops_map_delete_elem(struct bpf_map *map, void *key)
prev_state = cmpxchg(&st_map->kvalue.state,
BPF_STRUCT_OPS_STATE_INUSE,
BPF_STRUCT_OPS_STATE_TOBEFREE);
- if (prev_state == BPF_STRUCT_OPS_STATE_INUSE) {
+ switch (prev_state) {
+ case BPF_STRUCT_OPS_STATE_INUSE:
st_map->st_ops->unreg(&st_map->kvalue.data);
if (refcount_dec_and_test(&st_map->kvalue.refcnt))
bpf_map_put(map);
+ return 0;
+ case BPF_STRUCT_OPS_STATE_TOBEFREE:
+ return -EINPROGRESS;
+ case BPF_STRUCT_OPS_STATE_INIT:
+ return -ENOENT;
+ default:
+ WARN_ON_ONCE(1);
+ /* Should never happen. Treat it as not found. */
+ return -ENOENT;
}
-
- return 0;
}
static void bpf_struct_ops_map_seq_show_elem(struct bpf_map *map, void *key,
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 787140095e58..7787bdcb5d68 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -2418,7 +2418,7 @@ static int btf_enum_check_member(struct btf_verifier_env *env,
struct_size = struct_type->size;
bytes_offset = BITS_ROUNDDOWN_BYTES(struct_bits_off);
- if (struct_size - bytes_offset < sizeof(int)) {
+ if (struct_size - bytes_offset < member_type->size) {
btf_verifier_log_member(env, struct_type, member,
"Member exceeds struct_size");
return -EINVAL;
@@ -4564,7 +4564,7 @@ int btf_get_info_by_fd(const struct btf *btf,
union bpf_attr __user *uattr)
{
struct bpf_btf_info __user *uinfo;
- struct bpf_btf_info info = {};
+ struct bpf_btf_info info;
u32 info_copy, btf_copy;
void __user *ubtf;
u32 uinfo_len;
@@ -4573,6 +4573,7 @@ int btf_get_info_by_fd(const struct btf *btf,
uinfo_len = attr->info.info_len;
info_copy = min_t(u32, uinfo_len, sizeof(info));
+ memset(&info, 0, sizeof(info));
if (copy_from_user(&info, uinfo, info_copy))
return -EFAULT;
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 9a500fadbef5..4f1472409ef8 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -227,6 +227,9 @@ cleanup:
for (i = 0; i < NR; i++)
bpf_prog_array_free(arrays[i]);
+ for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p))
+ cgroup_bpf_put(p);
+
percpu_ref_exit(&cgrp->bpf.refcnt);
return -ENOMEM;
@@ -302,8 +305,8 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
u32 saved_flags = (flags & (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI));
struct list_head *progs = &cgrp->bpf.progs[type];
struct bpf_prog *old_prog = NULL;
- struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE],
- *old_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {NULL};
+ struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {};
+ struct bpf_cgroup_storage *old_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {};
struct bpf_prog_list *pl, *replace_pl = NULL;
enum bpf_cgroup_storage_type stype;
int err;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index a91ad518c050..966b7b34cde0 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -696,14 +696,15 @@ int bpf_get_file_flag(int flags)
offsetof(union bpf_attr, CMD##_LAST_FIELD) - \
sizeof(attr->CMD##_LAST_FIELD)) != NULL
-/* dst and src must have at least BPF_OBJ_NAME_LEN number of bytes.
- * Return 0 on success and < 0 on error.
+/* dst and src must have at least "size" number of bytes.
+ * Return strlen on success and < 0 on error.
*/
-static int bpf_obj_name_cpy(char *dst, const char *src)
+int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size)
{
- const char *end = src + BPF_OBJ_NAME_LEN;
+ const char *end = src + size;
+ const char *orig_src = src;
- memset(dst, 0, BPF_OBJ_NAME_LEN);
+ memset(dst, 0, size);
/* Copy all isalnum(), '_' and '.' chars. */
while (src < end && *src) {
if (!isalnum(*src) &&
@@ -712,11 +713,11 @@ static int bpf_obj_name_cpy(char *dst, const char *src)
*dst++ = *src++;
}
- /* No '\0' found in BPF_OBJ_NAME_LEN number of bytes */
+ /* No '\0' found in "size" number of bytes */
if (src == end)
return -EINVAL;
- return 0;
+ return src - orig_src;
}
int map_check_no_btf(const struct bpf_map *map,
@@ -810,8 +811,9 @@ static int map_create(union bpf_attr *attr)
if (IS_ERR(map))
return PTR_ERR(map);
- err = bpf_obj_name_cpy(map->name, attr->map_name);
- if (err)
+ err = bpf_obj_name_cpy(map->name, attr->map_name,
+ sizeof(attr->map_name));
+ if (err < 0)
goto free_map;
atomic64_set(&map->refcnt, 1);
@@ -1510,6 +1512,11 @@ static int map_freeze(const union bpf_attr *attr)
if (IS_ERR(map))
return PTR_ERR(map);
+ if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
+ fdput(f);
+ return -ENOTSUPP;
+ }
+
mutex_lock(&map->freeze_mutex);
if (map->writecnt) {
@@ -2093,8 +2100,9 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
goto free_prog;
prog->aux->load_time = ktime_get_boottime_ns();
- err = bpf_obj_name_cpy(prog->aux->name, attr->prog_name);
- if (err)
+ err = bpf_obj_name_cpy(prog->aux->name, attr->prog_name,
+ sizeof(attr->prog_name));
+ if (err < 0)
goto free_prog;
/* run eBPF verifier */
@@ -2787,7 +2795,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
union bpf_attr __user *uattr)
{
struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info);
- struct bpf_prog_info info = {};
+ struct bpf_prog_info info;
u32 info_len = attr->info.info_len;
struct bpf_prog_stats stats;
char __user *uinsns;
@@ -2799,6 +2807,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
return err;
info_len = min_t(u32, sizeof(info), info_len);
+ memset(&info, 0, sizeof(info));
if (copy_from_user(&info, uinfo, info_len))
return -EFAULT;
@@ -3062,7 +3071,7 @@ static int bpf_map_get_info_by_fd(struct bpf_map *map,
union bpf_attr __user *uattr)
{
struct bpf_map_info __user *uinfo = u64_to_user_ptr(attr->info.info);
- struct bpf_map_info info = {};
+ struct bpf_map_info info;
u32 info_len = attr->info.info_len;
int err;
@@ -3071,6 +3080,7 @@ static int bpf_map_get_info_by_fd(struct bpf_map *map,
return err;
info_len = min_t(u32, sizeof(info), info_len);
+ memset(&info, 0, sizeof(info));
info.type = map->map_type;
info.id = map->id;
info.key_size = map->key_size;
@@ -3354,7 +3364,7 @@ err_put:
SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
{
- union bpf_attr attr = {};
+ union bpf_attr attr;
int err;
if (sysctl_unprivileged_bpf_disabled && !capable(CAP_SYS_ADMIN))
@@ -3366,6 +3376,7 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
size = min_t(u32, size, sizeof(attr));
/* copy attributes from user space, may be less than sizeof(bpf_attr) */
+ memset(&attr, 0, sizeof(attr));
if (copy_from_user(&attr, uattr, size) != 0)
return -EFAULT;
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index be1a1c83cdd1..f2d7cea86ffe 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -471,6 +471,7 @@ static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos)
*/
p++;
if (p >= end) {
+ (*pos)++;
return NULL;
} else {
*pos = *p;
@@ -782,7 +783,7 @@ void cgroup1_release_agent(struct work_struct *work)
pathbuf = kmalloc(PATH_MAX, GFP_KERNEL);
agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL);
- if (!pathbuf || !agentbuf)
+ if (!pathbuf || !agentbuf || !strlen(agentbuf))
goto out;
spin_lock_irq(&css_set_lock);
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 75f687301bbf..3dead0416b91 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -3542,21 +3542,21 @@ static int cpu_stat_show(struct seq_file *seq, void *v)
static int cgroup_io_pressure_show(struct seq_file *seq, void *v)
{
struct cgroup *cgrp = seq_css(seq)->cgroup;
- struct psi_group *psi = cgroup_id(cgrp) == 1 ? &psi_system : &cgrp->psi;
+ struct psi_group *psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi;
return psi_show(seq, psi, PSI_IO);
}
static int cgroup_memory_pressure_show(struct seq_file *seq, void *v)
{
struct cgroup *cgrp = seq_css(seq)->cgroup;
- struct psi_group *psi = cgroup_id(cgrp) == 1 ? &psi_system : &cgrp->psi;
+ struct psi_group *psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi;
return psi_show(seq, psi, PSI_MEM);
}
static int cgroup_cpu_pressure_show(struct seq_file *seq, void *v)
{
struct cgroup *cgrp = seq_css(seq)->cgroup;
- struct psi_group *psi = cgroup_id(cgrp) == 1 ? &psi_system : &cgrp->psi;
+ struct psi_group *psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi;
return psi_show(seq, psi, PSI_CPU);
}
@@ -4400,12 +4400,16 @@ static void css_task_iter_advance_css_set(struct css_task_iter *it)
}
} while (!css_set_populated(cset) && list_empty(&cset->dying_tasks));
- if (!list_empty(&cset->tasks))
+ if (!list_empty(&cset->tasks)) {
it->task_pos = cset->tasks.next;
- else if (!list_empty(&cset->mg_tasks))
+ it->cur_tasks_head = &cset->tasks;
+ } else if (!list_empty(&cset->mg_tasks)) {
it->task_pos = cset->mg_tasks.next;
- else
+ it->cur_tasks_head = &cset->mg_tasks;
+ } else {
it->task_pos = cset->dying_tasks.next;
+ it->cur_tasks_head = &cset->dying_tasks;
+ }
it->tasks_head = &cset->tasks;
it->mg_tasks_head = &cset->mg_tasks;
@@ -4463,10 +4467,14 @@ repeat:
else
it->task_pos = it->task_pos->next;
- if (it->task_pos == it->tasks_head)
+ if (it->task_pos == it->tasks_head) {
it->task_pos = it->mg_tasks_head->next;
- if (it->task_pos == it->mg_tasks_head)
+ it->cur_tasks_head = it->mg_tasks_head;
+ }
+ if (it->task_pos == it->mg_tasks_head) {
it->task_pos = it->dying_tasks_head->next;
+ it->cur_tasks_head = it->dying_tasks_head;
+ }
if (it->task_pos == it->dying_tasks_head)
css_task_iter_advance_css_set(it);
} else {
@@ -4485,11 +4493,12 @@ repeat:
goto repeat;
/* and dying leaders w/o live member threads */
- if (!atomic_read(&task->signal->live))
+ if (it->cur_tasks_head == it->dying_tasks_head &&
+ !atomic_read(&task->signal->live))
goto repeat;
} else {
/* skip all dying ones */
- if (task->flags & PF_EXITING)
+ if (it->cur_tasks_head == it->dying_tasks_head)
goto repeat;
}
}
@@ -4595,6 +4604,9 @@ static void *cgroup_procs_next(struct seq_file *s, void *v, loff_t *pos)
struct kernfs_open_file *of = s->private;
struct css_task_iter *it = of->priv;
+ if (pos)
+ (*pos)++;
+
return css_task_iter_next(it);
}
@@ -4610,7 +4622,7 @@ static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos,
* from position 0, so we can simply keep iterating on !0 *pos.
*/
if (!it) {
- if (WARN_ON_ONCE((*pos)++))
+ if (WARN_ON_ONCE((*pos)))
return ERR_PTR(-EINVAL);
it = kzalloc(sizeof(*it), GFP_KERNEL);
@@ -4618,10 +4630,11 @@ static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos,
return ERR_PTR(-ENOMEM);
of->priv = it;
css_task_iter_start(&cgrp->self, iter_flags, it);
- } else if (!(*pos)++) {
+ } else if (!(*pos)) {
css_task_iter_end(it);
css_task_iter_start(&cgrp->self, iter_flags, it);
- }
+ } else
+ return it->cur_task;
return cgroup_procs_next(s, NULL, NULL);
}
@@ -6258,6 +6271,10 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
return;
}
+ /* Don't associate the sock with unrelated interrupted task's cgroup. */
+ if (in_interrupt())
+ return;
+
rcu_read_lock();
while (true) {
diff --git a/kernel/fork.c b/kernel/fork.c
index 86425305cd4a..d90af13431c7 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -397,8 +397,8 @@ static void account_kernel_stack(struct task_struct *tsk, int account)
mod_zone_page_state(page_zone(first_page), NR_KERNEL_STACK_KB,
THREAD_SIZE / 1024 * account);
- mod_memcg_page_state(first_page, MEMCG_KERNEL_STACK_KB,
- account * (THREAD_SIZE / 1024));
+ mod_memcg_obj_state(stack, MEMCG_KERNEL_STACK_KB,
+ account * (THREAD_SIZE / 1024));
}
}
diff --git a/kernel/futex.c b/kernel/futex.c
index 0cf84c8664f2..82dfacb3250e 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -385,9 +385,9 @@ static inline int hb_waiters_pending(struct futex_hash_bucket *hb)
*/
static struct futex_hash_bucket *hash_futex(union futex_key *key)
{
- u32 hash = jhash2((u32*)&key->both.word,
- (sizeof(key->both.word)+sizeof(key->both.ptr))/4,
+ u32 hash = jhash2((u32 *)key, offsetof(typeof(*key), both.offset) / 4,
key->both.offset);
+
return &futex_queues[hash & (futex_hashsize - 1)];
}
@@ -429,7 +429,7 @@ static void get_futex_key_refs(union futex_key *key)
switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
case FUT_OFF_INODE:
- ihold(key->shared.inode); /* implies smp_mb(); (B) */
+ smp_mb(); /* explicit smp_mb(); (B) */
break;
case FUT_OFF_MMSHARED:
futex_get_mm(key); /* implies smp_mb(); (B) */
@@ -463,7 +463,6 @@ static void drop_futex_key_refs(union futex_key *key)
switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
case FUT_OFF_INODE:
- iput(key->shared.inode);
break;
case FUT_OFF_MMSHARED:
mmdrop(key->private.mm);
@@ -505,6 +504,46 @@ futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
return timeout;
}
+/*
+ * Generate a machine wide unique identifier for this inode.
+ *
+ * This relies on u64 not wrapping in the life-time of the machine; which with
+ * 1ns resolution means almost 585 years.
+ *
+ * This further relies on the fact that a well formed program will not unmap
+ * the file while it has a (shared) futex waiting on it. This mapping will have
+ * a file reference which pins the mount and inode.
+ *
+ * If for some reason an inode gets evicted and read back in again, it will get
+ * a new sequence number and will _NOT_ match, even though it is the exact same
+ * file.
+ *
+ * It is important that match_futex() will never have a false-positive, esp.
+ * for PI futexes that can mess up the state. The above argues that false-negatives
+ * are only possible for malformed programs.
+ */
+static u64 get_inode_sequence_number(struct inode *inode)
+{
+ static atomic64_t i_seq;
+ u64 old;
+
+ /* Does the inode already have a sequence number? */
+ old = atomic64_read(&inode->i_sequence);
+ if (likely(old))
+ return old;
+
+ for (;;) {
+ u64 new = atomic64_add_return(1, &i_seq);
+ if (WARN_ON_ONCE(!new))
+ continue;
+
+ old = atomic64_cmpxchg_relaxed(&inode->i_sequence, 0, new);
+ if (old)
+ return old;
+ return new;
+ }
+}
+
/**
* get_futex_key() - Get parameters which are the keys for a futex
* @uaddr: virtual address of the futex
@@ -517,9 +556,15 @@ futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
*
* The key words are stored in @key on success.
*
- * For shared mappings, it's (page->index, file_inode(vma->vm_file),
- * offset_within_page). For private mappings, it's (uaddr, current->mm).
- * We can usually work out the index without swapping in the page.
+ * For shared mappings (when @fshared), the key is:
+ * ( inode->i_sequence, page->index, offset_within_page )
+ * [ also see get_inode_sequence_number() ]
+ *
+ * For private mappings (or when !@fshared), the key is:
+ * ( current->mm, address, 0 )
+ *
+ * This allows (cross process, where applicable) identification of the futex
+ * without keeping the page pinned for the duration of the FUTEX_WAIT.
*
* lock_page() might sleep, the caller should not hold a spinlock.
*/
@@ -659,8 +704,6 @@ again:
key->private.mm = mm;
key->private.address = address;
- get_futex_key_refs(key); /* implies smp_mb(); (B) */
-
} else {
struct inode *inode;
@@ -692,40 +735,14 @@ again:
goto again;
}
- /*
- * Take a reference unless it is about to be freed. Previously
- * this reference was taken by ihold under the page lock
- * pinning the inode in place so i_lock was unnecessary. The
- * only way for this check to fail is if the inode was
- * truncated in parallel which is almost certainly an
- * application bug. In such a case, just retry.
- *
- * We are not calling into get_futex_key_refs() in file-backed
- * cases, therefore a successful atomic_inc return below will
- * guarantee that get_futex_key() will still imply smp_mb(); (B).
- */
- if (!atomic_inc_not_zero(&inode->i_count)) {
- rcu_read_unlock();
- put_page(page);
-
- goto again;
- }
-
- /* Should be impossible but lets be paranoid for now */
- if (WARN_ON_ONCE(inode->i_mapping != mapping)) {
- err = -EFAULT;
- rcu_read_unlock();
- iput(inode);
-
- goto out;
- }
-
key->both.offset |= FUT_OFF_INODE; /* inode-based key */
- key->shared.inode = inode;
+ key->shared.i_seq = get_inode_sequence_number(inode);
key->shared.pgoff = basepage_index(tail);
rcu_read_unlock();
}
+ get_futex_key_refs(key); /* implies smp_mb(); (B) */
+
out:
put_page(page);
return err;
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 7eee98c38f25..fe40c658f86f 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -323,7 +323,11 @@ int irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask,
if (desc->affinity_notify) {
kref_get(&desc->affinity_notify->kref);
- schedule_work(&desc->affinity_notify->work);
+ if (!schedule_work(&desc->affinity_notify->work)) {
+ /* Work was already scheduled, drop our extra ref */
+ kref_put(&desc->affinity_notify->kref,
+ desc->affinity_notify->release);
+ }
}
irqd_set(data, IRQD_AFFINITY_SET);
@@ -423,7 +427,10 @@ irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify)
raw_spin_unlock_irqrestore(&desc->lock, flags);
if (old_notify) {
- cancel_work_sync(&old_notify->work);
+ if (cancel_work_sync(&old_notify->work)) {
+ /* Pending work had a ref, put that one too */
+ kref_put(&old_notify->kref, old_notify->release);
+ }
kref_put(&old_notify->kref, old_notify->release);
}
diff --git a/kernel/notifier.c b/kernel/notifier.c
index 63d7501ac638..5989bbb93039 100644
--- a/kernel/notifier.c
+++ b/kernel/notifier.c
@@ -519,7 +519,7 @@ NOKPROBE_SYMBOL(notify_die);
int register_die_notifier(struct notifier_block *nb)
{
- vmalloc_sync_all();
+ vmalloc_sync_mappings();
return atomic_notifier_chain_register(&die_chain, nb);
}
EXPORT_SYMBOL_GPL(register_die_notifier);
diff --git a/kernel/pid.c b/kernel/pid.c
index 0f4ecb57214c..647b4bb457b5 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -247,6 +247,16 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
tmp = tmp->parent;
}
+ /*
+ * ENOMEM is not the most obvious choice especially for the case
+ * where the child subreaper has already exited and the pid
+ * namespace denies the creation of any new processes. But ENOMEM
+ * is what we have exposed to userspace for a long time and it is
+ * documented behavior for pid namespaces. So we can't easily
+ * change it even if there were an error code better suited.
+ */
+ retval = -ENOMEM;
+
if (unlikely(is_child_reaper(pid))) {
if (pid_ns_prepare_proc(ns))
goto out_free;
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index b6ea3dcb57bf..ec5c606bc3a1 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -528,8 +528,12 @@ static long seccomp_attach_filter(unsigned int flags,
int ret;
ret = seccomp_can_sync_threads();
- if (ret)
- return ret;
+ if (ret) {
+ if (flags & SECCOMP_FILTER_FLAG_TSYNC_ESRCH)
+ return -ESRCH;
+ else
+ return ret;
+ }
}
/* Set log flag, if present. */
@@ -1221,6 +1225,7 @@ static const struct file_operations seccomp_notify_ops = {
.poll = seccomp_notify_poll,
.release = seccomp_notify_release,
.unlocked_ioctl = seccomp_notify_ioctl,
+ .compat_ioctl = seccomp_notify_ioctl,
};
static struct file *init_listener(struct seccomp_filter *filter)
@@ -1288,10 +1293,12 @@ static long seccomp_set_mode_filter(unsigned int flags,
* In the successful case, NEW_LISTENER returns the new listener fd.
* But in the failure case, TSYNC returns the thread that died. If you
* combine these two flags, there's no way to tell whether something
- * succeeded or failed. So, let's disallow this combination.
+ * succeeded or failed. So, let's disallow this combination if the user
+ * has not explicitly requested no errors from TSYNC.
*/
if ((flags & SECCOMP_FILTER_FLAG_TSYNC) &&
- (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER))
+ (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) &&
+ ((flags & SECCOMP_FILTER_FLAG_TSYNC_ESRCH) == 0))
return -EINVAL;
/* Prepare the new filter before holding any locks. */
diff --git a/kernel/sys.c b/kernel/sys.c
index f9bc5c303e3f..d325f3ab624a 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -47,6 +47,7 @@
#include <linux/syscalls.h>
#include <linux/kprobes.h>
#include <linux/user_namespace.h>
+#include <linux/time_namespace.h>
#include <linux/binfmts.h>
#include <linux/sched.h>
@@ -2546,6 +2547,7 @@ static int do_sysinfo(struct sysinfo *info)
memset(info, 0, sizeof(struct sysinfo));
ktime_get_boottime_ts64(&tp);
+ timens_add_boottime(&tp);
info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
get_avenrun(info->loads, 0, SI_LOAD_SHIFT - FSHIFT);
diff --git a/kernel/task_work.c b/kernel/task_work.c
index 0fef395662a6..825f28259a19 100644
--- a/kernel/task_work.c
+++ b/kernel/task_work.c
@@ -97,16 +97,26 @@ void task_work_run(void)
* work->func() can do task_work_add(), do not set
* work_exited unless the list is empty.
*/
- raw_spin_lock_irq(&task->pi_lock);
do {
+ head = NULL;
work = READ_ONCE(task->task_works);
- head = !work && (task->flags & PF_EXITING) ?
- &work_exited : NULL;
+ if (!work) {
+ if (task->flags & PF_EXITING)
+ head = &work_exited;
+ else
+ break;
+ }
} while (cmpxchg(&task->task_works, work, head) != work);
- raw_spin_unlock_irq(&task->pi_lock);
if (!work)
break;
+ /*
+ * Synchronize with task_work_cancel(). It can not remove
+ * the first entry == work, cmpxchg(task_works) must fail.
+ * But it can remove another entry from the ->next list.
+ */
+ raw_spin_lock_irq(&task->pi_lock);
+ raw_spin_unlock_irq(&task->pi_lock);
do {
next = work->next;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 19e793aa441a..68250d433bd7 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -732,7 +732,7 @@ static int bpf_send_signal_common(u32 sig, enum pid_type type)
if (unlikely(!nmi_uaccess_okay()))
return -EPERM;
- if (in_nmi()) {
+ if (irqs_disabled()) {
/* Do an early check on signal validity. Otherwise,
* the error is lost in deferred irq_work.
*/
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 3f7ee102868a..fd81c7de77a7 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1547,6 +1547,8 @@ static struct dyn_ftrace *lookup_rec(unsigned long start, unsigned long end)
rec = bsearch(&key, pg->records, pg->index,
sizeof(struct dyn_ftrace),
ftrace_cmp_recs);
+ if (rec)
+ break;
}
return rec;
}
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 301db4406bc3..4e01c448b4b4 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1411,14 +1411,16 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
return;
rcu_read_lock();
retry:
- if (req_cpu == WORK_CPU_UNBOUND)
- cpu = wq_select_unbound_cpu(raw_smp_processor_id());
-
/* pwq which will be used unless @work is executing elsewhere */
- if (!(wq->flags & WQ_UNBOUND))
- pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
- else
+ if (wq->flags & WQ_UNBOUND) {
+ if (req_cpu == WORK_CPU_UNBOUND)
+ cpu = wq_select_unbound_cpu(raw_smp_processor_id());
pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
+ } else {
+ if (req_cpu == WORK_CPU_UNBOUND)
+ cpu = raw_smp_processor_id();
+ pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
+ }
/*
* If @work was previously on a different pool, it might still be
diff --git a/lib/crypto/chacha20poly1305-selftest.c b/lib/crypto/chacha20poly1305-selftest.c
index c391a91364e9..fa43deda2660 100644
--- a/lib/crypto/chacha20poly1305-selftest.c
+++ b/lib/crypto/chacha20poly1305-selftest.c
@@ -9028,10 +9028,15 @@ bool __init chacha20poly1305_selftest(void)
&& total_len <= 1 << 10; ++total_len) {
for (i = 0; i <= total_len; ++i) {
for (j = i; j <= total_len; ++j) {
+ k = 0;
sg_init_table(sg_src, 3);
- sg_set_buf(&sg_src[0], input, i);
- sg_set_buf(&sg_src[1], input + i, j - i);
- sg_set_buf(&sg_src[2], input + j, total_len - j);
+ if (i)
+ sg_set_buf(&sg_src[k++], input, i);
+ if (j - i)
+ sg_set_buf(&sg_src[k++], input + i, j - i);
+ if (total_len - j)
+ sg_set_buf(&sg_src[k++], input + j, total_len - j);
+ sg_init_marker(sg_src, k);
memset(computed_output, 0, total_len);
memset(input, 0, total_len);
diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
index e434b05416c6..5280bcf459af 100644
--- a/mm/hugetlb_cgroup.c
+++ b/mm/hugetlb_cgroup.c
@@ -240,8 +240,7 @@ again:
if (!page_counter_try_charge(&h_cg->hugepage[idx], nr_pages,
&counter)) {
ret = -ENOMEM;
- hugetlb_event(hugetlb_cgroup_from_counter(counter, idx), idx,
- HUGETLB_MAX);
+ hugetlb_event(h_cg, idx, HUGETLB_MAX);
}
css_put(&h_cg->css);
done:
diff --git a/mm/madvise.c b/mm/madvise.c
index 43b47d3fae02..4bb30ed6c8d2 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -335,12 +335,14 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
}
page = pmd_page(orig_pmd);
+
+ /* Do not interfere with other mappings of this page */
+ if (page_mapcount(page) != 1)
+ goto huge_unlock;
+
if (next - addr != HPAGE_PMD_SIZE) {
int err;
- if (page_mapcount(page) != 1)
- goto huge_unlock;
-
get_page(page);
spin_unlock(ptl);
lock_page(page);
@@ -426,6 +428,10 @@ regular_page:
continue;
}
+ /* Do not interfere with other mappings of this page */
+ if (page_mapcount(page) != 1)
+ continue;
+
VM_BUG_ON_PAGE(PageTransCompound(page), page);
if (pte_young(ptent)) {
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index d09776cd6e10..7ddf91c4295f 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -777,6 +777,17 @@ void __mod_lruvec_slab_state(void *p, enum node_stat_item idx, int val)
rcu_read_unlock();
}
+void mod_memcg_obj_state(void *p, int idx, int val)
+{
+ struct mem_cgroup *memcg;
+
+ rcu_read_lock();
+ memcg = mem_cgroup_from_obj(p);
+ if (memcg)
+ mod_memcg_state(memcg, idx, val);
+ rcu_read_unlock();
+}
+
/**
* __count_memcg_events - account VM events in a cgroup
* @memcg: the memory cgroup
@@ -2297,28 +2308,41 @@ static void high_work_func(struct work_struct *work)
#define MEMCG_DELAY_SCALING_SHIFT 14
/*
- * Scheduled by try_charge() to be executed from the userland return path
- * and reclaims memory over the high limit.
+ * Get the number of jiffies that we should penalise a mischievous cgroup which
+ * is exceeding its memory.high by checking both it and its ancestors.
*/
-void mem_cgroup_handle_over_high(void)
+static unsigned long calculate_high_delay(struct mem_cgroup *memcg,
+ unsigned int nr_pages)
{
- unsigned long usage, high, clamped_high;
- unsigned long pflags;
- unsigned long penalty_jiffies, overage;
- unsigned int nr_pages = current->memcg_nr_pages_over_high;
- struct mem_cgroup *memcg;
+ unsigned long penalty_jiffies;
+ u64 max_overage = 0;
- if (likely(!nr_pages))
- return;
+ do {
+ unsigned long usage, high;
+ u64 overage;
- memcg = get_mem_cgroup_from_mm(current->mm);
- reclaim_high(memcg, nr_pages, GFP_KERNEL);
- current->memcg_nr_pages_over_high = 0;
+ usage = page_counter_read(&memcg->memory);
+ high = READ_ONCE(memcg->high);
+
+ /*
+ * Prevent division by 0 in overage calculation by acting as if
+ * it was a threshold of 1 page
+ */
+ high = max(high, 1UL);
+
+ overage = usage - high;
+ overage <<= MEMCG_DELAY_PRECISION_SHIFT;
+ overage = div64_u64(overage, high);
+
+ if (overage > max_overage)
+ max_overage = overage;
+ } while ((memcg = parent_mem_cgroup(memcg)) &&
+ !mem_cgroup_is_root(memcg));
+
+ if (!max_overage)
+ return 0;
/*
- * memory.high is breached and reclaim is unable to keep up. Throttle
- * allocators proactively to slow down excessive growth.
- *
* We use overage compared to memory.high to calculate the number of
* jiffies to sleep (penalty_jiffies). Ideally this value should be
* fairly lenient on small overages, and increasingly harsh when the
@@ -2326,24 +2350,9 @@ void mem_cgroup_handle_over_high(void)
* its crazy behaviour, so we exponentially increase the delay based on
* overage amount.
*/
-
- usage = page_counter_read(&memcg->memory);
- high = READ_ONCE(memcg->high);
-
- if (usage <= high)
- goto out;
-
- /*
- * Prevent division by 0 in overage calculation by acting as if it was a
- * threshold of 1 page
- */
- clamped_high = max(high, 1UL);
-
- overage = div_u64((u64)(usage - high) << MEMCG_DELAY_PRECISION_SHIFT,
- clamped_high);
-
- penalty_jiffies = ((u64)overage * overage * HZ)
- >> (MEMCG_DELAY_PRECISION_SHIFT + MEMCG_DELAY_SCALING_SHIFT);
+ penalty_jiffies = max_overage * max_overage * HZ;
+ penalty_jiffies >>= MEMCG_DELAY_PRECISION_SHIFT;
+ penalty_jiffies >>= MEMCG_DELAY_SCALING_SHIFT;
/*
* Factor in the task's own contribution to the overage, such that four
@@ -2360,7 +2369,32 @@ void mem_cgroup_handle_over_high(void)
* application moving forwards and also permit diagnostics, albeit
* extremely slowly.
*/
- penalty_jiffies = min(penalty_jiffies, MEMCG_MAX_HIGH_DELAY_JIFFIES);
+ return min(penalty_jiffies, MEMCG_MAX_HIGH_DELAY_JIFFIES);
+}
+
+/*
+ * Scheduled by try_charge() to be executed from the userland return path
+ * and reclaims memory over the high limit.
+ */
+void mem_cgroup_handle_over_high(void)
+{
+ unsigned long penalty_jiffies;
+ unsigned long pflags;
+ unsigned int nr_pages = current->memcg_nr_pages_over_high;
+ struct mem_cgroup *memcg;
+
+ if (likely(!nr_pages))
+ return;
+
+ memcg = get_mem_cgroup_from_mm(current->mm);
+ reclaim_high(memcg, nr_pages, GFP_KERNEL);
+ current->memcg_nr_pages_over_high = 0;
+
+ /*
+ * memory.high is breached and reclaim is unable to keep up. Throttle
+ * allocators proactively to slow down excessive growth.
+ */
+ penalty_jiffies = calculate_high_delay(memcg, nr_pages);
/*
* Don't sleep if the amount of jiffies this memcg owes us is so low
@@ -2638,6 +2672,33 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg,
}
#ifdef CONFIG_MEMCG_KMEM
+/*
+ * Returns a pointer to the memory cgroup to which the kernel object is charged.
+ *
+ * The caller must ensure the memcg lifetime, e.g. by taking rcu_read_lock(),
+ * cgroup_mutex, etc.
+ */
+struct mem_cgroup *mem_cgroup_from_obj(void *p)
+{
+ struct page *page;
+
+ if (mem_cgroup_disabled())
+ return NULL;
+
+ page = virt_to_head_page(p);
+
+ /*
+ * Slab pages don't have page->mem_cgroup set because corresponding
+ * kmem caches can be reparented during the lifetime. That's why
+ * memcg_from_slab_page() should be used instead.
+ */
+ if (PageSlab(page))
+ return memcg_from_slab_page(page);
+
+ /* All other pages use page->mem_cgroup */
+ return page->mem_cgroup;
+}
+
static int memcg_alloc_cache_id(void)
{
int id, size;
@@ -4027,7 +4088,7 @@ static void __mem_cgroup_usage_unregister_event(struct mem_cgroup *memcg,
struct mem_cgroup_thresholds *thresholds;
struct mem_cgroup_threshold_ary *new;
unsigned long usage;
- int i, j, size;
+ int i, j, size, entries;
mutex_lock(&memcg->thresholds_lock);
@@ -4047,14 +4108,20 @@ static void __mem_cgroup_usage_unregister_event(struct mem_cgroup *memcg,
__mem_cgroup_threshold(memcg, type == _MEMSWAP);
/* Calculate new number of threshold */
- size = 0;
+ size = entries = 0;
for (i = 0; i < thresholds->primary->size; i++) {
if (thresholds->primary->entries[i].eventfd != eventfd)
size++;
+ else
+ entries++;
}
new = thresholds->spare;
+ /* If no items related to eventfd have been cleared, nothing to do */
+ if (!entries)
+ goto unlock;
+
/* Set thresholds array to NULL if we don't have thresholds */
if (!size) {
kfree(new);
@@ -6682,19 +6749,9 @@ void mem_cgroup_sk_alloc(struct sock *sk)
if (!mem_cgroup_sockets_enabled)
return;
- /*
- * Socket cloning can throw us here with sk_memcg already
- * filled. It won't however, necessarily happen from
- * process context. So the test for root memcg given
- * the current task's memcg won't help us in this case.
- *
- * Respecting the original socket's memcg is a better
- * decision in this case.
- */
- if (sk->sk_memcg) {
- css_get(&sk->sk_memcg->css);
+ /* Do not associate the sock with unrelated interrupted task's memcg. */
+ if (in_interrupt())
return;
- }
rcu_read_lock();
memcg = mem_cgroup_from_task(current);
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index ef3973a5d34a..06852b896fa6 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -307,7 +307,8 @@ static void mn_hlist_release(struct mmu_notifier_subscriptions *subscriptions,
* ->release returns.
*/
id = srcu_read_lock(&srcu);
- hlist_for_each_entry_rcu(subscription, &subscriptions->list, hlist)
+ hlist_for_each_entry_rcu(subscription, &subscriptions->list, hlist,
+ srcu_read_lock_held(&srcu))
/*
* If ->release runs before mmu_notifier_unregister it must be
* handled, as it's the only way for the driver to flush all
@@ -370,7 +371,8 @@ int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
id = srcu_read_lock(&srcu);
hlist_for_each_entry_rcu(subscription,
- &mm->notifier_subscriptions->list, hlist) {
+ &mm->notifier_subscriptions->list, hlist,
+ srcu_read_lock_held(&srcu)) {
if (subscription->ops->clear_flush_young)
young |= subscription->ops->clear_flush_young(
subscription, mm, start, end);
@@ -389,7 +391,8 @@ int __mmu_notifier_clear_young(struct mm_struct *mm,
id = srcu_read_lock(&srcu);
hlist_for_each_entry_rcu(subscription,
- &mm->notifier_subscriptions->list, hlist) {
+ &mm->notifier_subscriptions->list, hlist,
+ srcu_read_lock_held(&srcu)) {
if (subscription->ops->clear_young)
young |= subscription->ops->clear_young(subscription,
mm, start, end);
@@ -407,7 +410,8 @@ int __mmu_notifier_test_young(struct mm_struct *mm,
id = srcu_read_lock(&srcu);
hlist_for_each_entry_rcu(subscription,
- &mm->notifier_subscriptions->list, hlist) {
+ &mm->notifier_subscriptions->list, hlist,
+ srcu_read_lock_held(&srcu)) {
if (subscription->ops->test_young) {
young = subscription->ops->test_young(subscription, mm,
address);
@@ -428,7 +432,8 @@ void __mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address,
id = srcu_read_lock(&srcu);
hlist_for_each_entry_rcu(subscription,
- &mm->notifier_subscriptions->list, hlist) {
+ &mm->notifier_subscriptions->list, hlist,
+ srcu_read_lock_held(&srcu)) {
if (subscription->ops->change_pte)
subscription->ops->change_pte(subscription, mm, address,
pte);
@@ -476,7 +481,8 @@ static int mn_hlist_invalidate_range_start(
int id;
id = srcu_read_lock(&srcu);
- hlist_for_each_entry_rcu(subscription, &subscriptions->list, hlist) {
+ hlist_for_each_entry_rcu(subscription, &subscriptions->list, hlist,
+ srcu_read_lock_held(&srcu)) {
const struct mmu_notifier_ops *ops = subscription->ops;
if (ops->invalidate_range_start) {
@@ -528,7 +534,8 @@ mn_hlist_invalidate_end(struct mmu_notifier_subscriptions *subscriptions,
int id;
id = srcu_read_lock(&srcu);
- hlist_for_each_entry_rcu(subscription, &subscriptions->list, hlist) {
+ hlist_for_each_entry_rcu(subscription, &subscriptions->list, hlist,
+ srcu_read_lock_held(&srcu)) {
/*
* Call invalidate_range here too to avoid the need for the
* subsystem of having to register an invalidate_range_end
@@ -582,7 +589,8 @@ void __mmu_notifier_invalidate_range(struct mm_struct *mm,
id = srcu_read_lock(&srcu);
hlist_for_each_entry_rcu(subscription,
- &mm->notifier_subscriptions->list, hlist) {
+ &mm->notifier_subscriptions->list, hlist,
+ srcu_read_lock_held(&srcu)) {
if (subscription->ops->invalidate_range)
subscription->ops->invalidate_range(subscription, mm,
start, end);
@@ -714,7 +722,8 @@ find_get_mmu_notifier(struct mm_struct *mm, const struct mmu_notifier_ops *ops)
spin_lock(&mm->notifier_subscriptions->lock);
hlist_for_each_entry_rcu(subscription,
- &mm->notifier_subscriptions->list, hlist) {
+ &mm->notifier_subscriptions->list, hlist,
+ lockdep_is_held(&mm->notifier_subscriptions->lock)) {
if (subscription->ops != ops)
continue;
diff --git a/mm/nommu.c b/mm/nommu.c
index bd2b4e5ef144..318df4e236c9 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -370,10 +370,14 @@ void vm_unmap_aliases(void)
EXPORT_SYMBOL_GPL(vm_unmap_aliases);
/*
- * Implement a stub for vmalloc_sync_all() if the architecture chose not to
- * have one.
+ * Implement a stub for vmalloc_sync_[un]mapping() if the architecture
+ * chose not to have one.
*/
-void __weak vmalloc_sync_all(void)
+void __weak vmalloc_sync_mappings(void)
+{
+}
+
+void __weak vmalloc_sync_unmappings(void)
{
}
diff --git a/mm/slub.c b/mm/slub.c
index 17dc00e33115..6589b41d5a60 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1973,8 +1973,6 @@ static void *get_partial(struct kmem_cache *s, gfp_t flags, int node,
if (node == NUMA_NO_NODE)
searchnode = numa_mem_id();
- else if (!node_present_pages(node))
- searchnode = node_to_mem_node(node);
object = get_partial_node(s, get_node(s, searchnode), c, flags);
if (object || node != NUMA_NO_NODE)
@@ -2563,17 +2561,27 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
struct page *page;
page = c->page;
- if (!page)
+ if (!page) {
+ /*
+ * if the node is not online or has no normal memory, just
+ * ignore the node constraint
+ */
+ if (unlikely(node != NUMA_NO_NODE &&
+ !node_state(node, N_NORMAL_MEMORY)))
+ node = NUMA_NO_NODE;
goto new_slab;
+ }
redo:
if (unlikely(!node_match(page, node))) {
- int searchnode = node;
-
- if (node != NUMA_NO_NODE && !node_present_pages(node))
- searchnode = node_to_mem_node(node);
-
- if (unlikely(!node_match(page, searchnode))) {
+ /*
+ * same as above but node_match() being false already
+ * implies node != NUMA_NO_NODE
+ */
+ if (!node_state(node, N_NORMAL_MEMORY)) {
+ node = NUMA_NO_NODE;
+ goto redo;
+ } else {
stat(s, ALLOC_NODE_MISMATCH);
deactivate_slab(s, page, c->freelist, c);
goto new_slab;
@@ -2997,11 +3005,13 @@ redo:
barrier();
if (likely(page == c->page)) {
- set_freepointer(s, tail_obj, c->freelist);
+ void **freelist = READ_ONCE(c->freelist);
+
+ set_freepointer(s, tail_obj, freelist);
if (unlikely(!this_cpu_cmpxchg_double(
s->cpu_slab->freelist, s->cpu_slab->tid,
- c->freelist, tid,
+ freelist, tid,
head, next_tid(tid)))) {
note_cmpxchg_failure("slab_free", s, tid);
@@ -3175,6 +3185,15 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
if (unlikely(!object)) {
/*
+ * We may have removed an object from c->freelist using
+ * the fastpath in the previous iteration; in that case,
+ * c->tid has not been bumped yet.
+ * Since ___slab_alloc() may reenable interrupts while
+ * allocating memory, we should bump c->tid now.
+ */
+ c->tid = next_tid(c->tid);
+
+ /*
* Invoking slow path likely have side-effect
* of re-populating per CPU c->freelist
*/
diff --git a/mm/sparse.c b/mm/sparse.c
index 596b2a45b100..65599e8bd636 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -734,6 +734,7 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
struct mem_section *ms = __pfn_to_section(pfn);
bool section_is_early = early_section(ms);
struct page *memmap = NULL;
+ bool empty;
unsigned long *subsection_map = ms->usage
? &ms->usage->subsection_map[0] : NULL;
@@ -764,7 +765,8 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
* For 2/ and 3/ the SPARSEMEM_VMEMMAP={y,n} cases are unified
*/
bitmap_xor(subsection_map, map, subsection_map, SUBSECTIONS_PER_SECTION);
- if (bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION)) {
+ empty = bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION);
+ if (empty) {
unsigned long section_nr = pfn_to_section_nr(pfn);
/*
@@ -779,13 +781,21 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
ms->usage = NULL;
}
memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
- ms->section_mem_map = (unsigned long)NULL;
+ /*
+ * Mark the section invalid so that valid_section()
+ * return false. This prevents code from dereferencing
+ * ms->usage array.
+ */
+ ms->section_mem_map &= ~SECTION_HAS_MEM_MAP;
}
if (section_is_early && memmap)
free_map_bootmem(memmap);
else
depopulate_section_memmap(pfn, nr_pages, altmap);
+
+ if (empty)
+ ms->section_mem_map = (unsigned long)NULL;
}
static struct page * __meminit section_activate(int nid, unsigned long pfn,
diff --git a/mm/swapfile.c b/mm/swapfile.c
index b2a2e45c9a36..be33e6176cd9 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2899,10 +2899,6 @@ static int claim_swapfile(struct swap_info_struct *p, struct inode *inode)
p->bdev = inode->i_sb->s_bdev;
}
- inode_lock(inode);
- if (IS_SWAPFILE(inode))
- return -EBUSY;
-
return 0;
}
@@ -3157,36 +3153,41 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
mapping = swap_file->f_mapping;
inode = mapping->host;
- /* will take i_rwsem; */
error = claim_swapfile(p, inode);
if (unlikely(error))
goto bad_swap;
+ inode_lock(inode);
+ if (IS_SWAPFILE(inode)) {
+ error = -EBUSY;
+ goto bad_swap_unlock_inode;
+ }
+
/*
* Read the swap header.
*/
if (!mapping->a_ops->readpage) {
error = -EINVAL;
- goto bad_swap;
+ goto bad_swap_unlock_inode;
}
page = read_mapping_page(mapping, 0, swap_file);
if (IS_ERR(page)) {
error = PTR_ERR(page);
- goto bad_swap;
+ goto bad_swap_unlock_inode;
}
swap_header = kmap(page);
maxpages = read_swap_header(p, swap_header, inode);
if (unlikely(!maxpages)) {
error = -EINVAL;
- goto bad_swap;
+ goto bad_swap_unlock_inode;
}
/* OK, set up the swap map and apply the bad block list */
swap_map = vzalloc(maxpages);
if (!swap_map) {
error = -ENOMEM;
- goto bad_swap;
+ goto bad_swap_unlock_inode;
}
if (bdi_cap_stable_pages_required(inode_to_bdi(inode)))
@@ -3211,7 +3212,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
GFP_KERNEL);
if (!cluster_info) {
error = -ENOMEM;
- goto bad_swap;
+ goto bad_swap_unlock_inode;
}
for (ci = 0; ci < nr_cluster; ci++)
@@ -3220,7 +3221,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
p->percpu_cluster = alloc_percpu(struct percpu_cluster);
if (!p->percpu_cluster) {
error = -ENOMEM;
- goto bad_swap;
+ goto bad_swap_unlock_inode;
}
for_each_possible_cpu(cpu) {
struct percpu_cluster *cluster;
@@ -3234,13 +3235,13 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
error = swap_cgroup_swapon(p->type, maxpages);
if (error)
- goto bad_swap;
+ goto bad_swap_unlock_inode;
nr_extents = setup_swap_map_and_extents(p, swap_header, swap_map,
cluster_info, maxpages, &span);
if (unlikely(nr_extents < 0)) {
error = nr_extents;
- goto bad_swap;
+ goto bad_swap_unlock_inode;
}
/* frontswap enabled? set up bit-per-page map for frontswap */
if (IS_ENABLED(CONFIG_FRONTSWAP))
@@ -3280,7 +3281,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
error = init_swap_address_space(p->type, maxpages);
if (error)
- goto bad_swap;
+ goto bad_swap_unlock_inode;
/*
* Flush any pending IO and dirty mappings before we start using this
@@ -3290,7 +3291,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
error = inode_drain_writes(inode);
if (error) {
inode->i_flags &= ~S_SWAPFILE;
- goto bad_swap;
+ goto bad_swap_unlock_inode;
}
mutex_lock(&swapon_mutex);
@@ -3315,6 +3316,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
error = 0;
goto out;
+bad_swap_unlock_inode:
+ inode_unlock(inode);
bad_swap:
free_percpu(p->percpu_cluster);
p->percpu_cluster = NULL;
@@ -3322,6 +3325,7 @@ bad_swap:
set_blocksize(p->bdev, p->old_block_size);
blkdev_put(p->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
}
+ inode = NULL;
destroy_swap_extents(p);
swap_cgroup_swapoff(p->type);
spin_lock(&swap_lock);
@@ -3333,13 +3337,8 @@ bad_swap:
kvfree(frontswap_map);
if (inced_nr_rotate_swap)
atomic_dec(&nr_rotate_swap);
- if (swap_file) {
- if (inode) {
- inode_unlock(inode);
- inode = NULL;
- }
+ if (swap_file)
filp_close(swap_file, NULL);
- }
out:
if (page && !IS_ERR(page)) {
kunmap(page);
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 1f46c3b86f9f..6b8eeb0ecee5 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1295,7 +1295,7 @@ static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end)
* First make sure the mappings are removed from all page-tables
* before they are freed.
*/
- vmalloc_sync_all();
+ vmalloc_sync_unmappings();
/*
* TODO: to calculate a flush range without looping.
@@ -3128,16 +3128,19 @@ int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
EXPORT_SYMBOL(remap_vmalloc_range);
/*
- * Implement a stub for vmalloc_sync_all() if the architecture chose not to
- * have one.
+ * Implement stubs for vmalloc_sync_[un]mappings () if the architecture chose
+ * not to have one.
*
* The purpose of this function is to make sure the vmalloc area
* mappings are identical in all page-tables in the system.
*/
-void __weak vmalloc_sync_all(void)
+void __weak vmalloc_sync_mappings(void)
{
}
+void __weak vmalloc_sync_unmappings(void)
+{
+}
static int f(pte_t *pte, unsigned long addr, void *data)
{
diff --git a/net/Kconfig b/net/Kconfig
index 2eeb0e55f7c9..df8d8c9bd021 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -52,6 +52,9 @@ config NET_INGRESS
config NET_EGRESS
bool
+config NET_REDIRECT
+ bool
+
config SKB_EXTENSIONS
bool
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index f0209505e41a..a7c8dd7ae513 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -789,6 +789,10 @@ static void batadv_iv_ogm_schedule_buff(struct batadv_hard_iface *hard_iface)
lockdep_assert_held(&hard_iface->bat_iv.ogm_buff_mutex);
+ /* interface already disabled by batadv_iv_ogm_iface_disable */
+ if (!*ogm_buff)
+ return;
+
/* the interface gets activated here to avoid race conditions between
* the moment of activating the interface in
* hardif_activate_interface() where the originator mac is set and
diff --git a/net/bpfilter/main.c b/net/bpfilter/main.c
index 77396a098fbe..efea4874743e 100644
--- a/net/bpfilter/main.c
+++ b/net/bpfilter/main.c
@@ -10,7 +10,7 @@
#include <asm/unistd.h>
#include "msgfmt.h"
-int debug_fd;
+FILE *debug_f;
static int handle_get_cmd(struct mbox_request *cmd)
{
@@ -35,9 +35,10 @@ static void loop(void)
struct mbox_reply reply;
int n;
+ fprintf(debug_f, "testing the buffer\n");
n = read(0, &req, sizeof(req));
if (n != sizeof(req)) {
- dprintf(debug_fd, "invalid request %d\n", n);
+ fprintf(debug_f, "invalid request %d\n", n);
return;
}
@@ -47,7 +48,7 @@ static void loop(void)
n = write(1, &reply, sizeof(reply));
if (n != sizeof(reply)) {
- dprintf(debug_fd, "reply failed %d\n", n);
+ fprintf(debug_f, "reply failed %d\n", n);
return;
}
}
@@ -55,9 +56,10 @@ static void loop(void)
int main(void)
{
- debug_fd = open("/dev/kmsg", 00000002);
- dprintf(debug_fd, "Started bpfilter\n");
+ debug_f = fopen("/dev/kmsg", "w");
+ setvbuf(debug_f, 0, _IOLBF, 0);
+ fprintf(debug_f, "Started bpfilter\n");
loop();
- close(debug_fd);
+ fclose(debug_f);
return 0;
}
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index 03c7cdd8e4cb..195d2d67be8a 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -112,7 +112,8 @@ static struct caif_device_entry *caif_get(struct net_device *dev)
caif_device_list(dev_net(dev));
struct caif_device_entry *caifd;
- list_for_each_entry_rcu(caifd, &caifdevs->list, list) {
+ list_for_each_entry_rcu(caifd, &caifdevs->list, list,
+ lockdep_rtnl_is_held()) {
if (caifd->netdev == dev)
return caifd;
}
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 5b4bd8261002..f8ca5edc5f2c 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -3248,12 +3248,16 @@ static struct ceph_msg_data *ceph_msg_data_add(struct ceph_msg *msg)
static void ceph_msg_data_destroy(struct ceph_msg_data *data)
{
- if (data->type == CEPH_MSG_DATA_PAGELIST)
+ if (data->type == CEPH_MSG_DATA_PAGES && data->own_pages) {
+ int num_pages = calc_pages_for(data->alignment, data->length);
+ ceph_release_page_vector(data->pages, num_pages);
+ } else if (data->type == CEPH_MSG_DATA_PAGELIST) {
ceph_pagelist_release(data->pagelist);
+ }
}
void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages,
- size_t length, size_t alignment)
+ size_t length, size_t alignment, bool own_pages)
{
struct ceph_msg_data *data;
@@ -3265,6 +3269,7 @@ void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages,
data->pages = pages;
data->length = length;
data->alignment = alignment & ~PAGE_MASK;
+ data->own_pages = own_pages;
msg->data_length += length;
}
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index b68b376d8c2f..af868d3923b9 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -962,7 +962,7 @@ static void ceph_osdc_msg_data_add(struct ceph_msg *msg,
BUG_ON(length > (u64) SIZE_MAX);
if (length)
ceph_msg_data_add_pages(msg, osd_data->pages,
- length, osd_data->alignment);
+ length, osd_data->alignment, false);
} else if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGELIST) {
BUG_ON(!length);
ceph_msg_data_add_pagelist(msg, osd_data->pagelist);
@@ -4436,9 +4436,7 @@ static void handle_watch_notify(struct ceph_osd_client *osdc,
CEPH_MSG_DATA_PAGES);
*lreq->preply_pages = data->pages;
*lreq->preply_len = data->length;
- } else {
- ceph_release_page_vector(data->pages,
- calc_pages_for(0, data->length));
+ data->own_pages = false;
}
}
lreq->notify_finish_error = return_code;
@@ -5506,9 +5504,6 @@ out_unlock_osdc:
return m;
}
-/*
- * TODO: switch to a msg-owned pagelist
- */
static struct ceph_msg *alloc_msg_with_page_vector(struct ceph_msg_header *hdr)
{
struct ceph_msg *m;
@@ -5522,7 +5517,6 @@ static struct ceph_msg *alloc_msg_with_page_vector(struct ceph_msg_header *hdr)
if (data_len) {
struct page **pages;
- struct ceph_osd_data osd_data;
pages = ceph_alloc_page_vector(calc_pages_for(0, data_len),
GFP_NOIO);
@@ -5531,9 +5525,7 @@ static struct ceph_msg *alloc_msg_with_page_vector(struct ceph_msg_header *hdr)
return NULL;
}
- ceph_osd_data_pages_init(&osd_data, pages, data_len, 0, false,
- false);
- ceph_osdc_msg_data_add(m, &osd_data);
+ ceph_msg_data_add_pages(m, pages, data_len, 0, true);
}
return m;
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 4e0de14f80bb..2a6e63a8edbe 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -710,6 +710,15 @@ int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name)
}
EXPORT_SYMBOL(ceph_pg_poolid_by_name);
+u64 ceph_pg_pool_flags(struct ceph_osdmap *map, u64 id)
+{
+ struct ceph_pg_pool_info *pi;
+
+ pi = __lookup_pg_pool(&map->pg_pools, id);
+ return pi ? pi->flags : 0;
+}
+EXPORT_SYMBOL(ceph_pg_pool_flags);
+
static void __remove_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *pi)
{
rb_erase(&pi->node, root);
diff --git a/net/compat.c b/net/compat.c
index 47d99c784947..4bed96e84d9a 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -33,10 +33,10 @@
#include <linux/uaccess.h>
#include <net/compat.h>
-int get_compat_msghdr(struct msghdr *kmsg,
- struct compat_msghdr __user *umsg,
- struct sockaddr __user **save_addr,
- struct iovec **iov)
+int __get_compat_msghdr(struct msghdr *kmsg,
+ struct compat_msghdr __user *umsg,
+ struct sockaddr __user **save_addr,
+ compat_uptr_t *ptr, compat_size_t *len)
{
struct compat_msghdr msg;
ssize_t err;
@@ -79,10 +79,26 @@ int get_compat_msghdr(struct msghdr *kmsg,
return -EMSGSIZE;
kmsg->msg_iocb = NULL;
+ *ptr = msg.msg_iov;
+ *len = msg.msg_iovlen;
+ return 0;
+}
+
+int get_compat_msghdr(struct msghdr *kmsg,
+ struct compat_msghdr __user *umsg,
+ struct sockaddr __user **save_addr,
+ struct iovec **iov)
+{
+ compat_uptr_t ptr;
+ compat_size_t len;
+ ssize_t err;
+
+ err = __get_compat_msghdr(kmsg, umsg, save_addr, &ptr, &len);
+ if (err)
+ return err;
- err = compat_import_iovec(save_addr ? READ : WRITE,
- compat_ptr(msg.msg_iov), msg.msg_iovlen,
- UIO_FASTIOV, iov, &kmsg->msg_iter);
+ err = compat_import_iovec(save_addr ? READ : WRITE, compat_ptr(ptr),
+ len, UIO_FASTIOV, iov, &kmsg->msg_iter);
return err < 0 ? err : 0;
}
diff --git a/net/core/dev.c b/net/core/dev.c
index c6c985fe7b1b..500bba8874b0 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4516,7 +4516,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
/* Reinjected packets coming from act_mirred or similar should
* not get XDP generic processing.
*/
- if (skb_is_tc_redirected(skb))
+ if (skb_is_redirected(skb))
return XDP_PASS;
/* XDP packets must be linear and must have sufficient headroom
@@ -5063,7 +5063,7 @@ skip_taps:
goto out;
}
#endif
- skb_reset_tc(skb);
+ skb_reset_redirect(skb);
skip_classify:
if (pfmemalloc && !skb_pfmemalloc_protocol(skb))
goto drop;
@@ -5195,7 +5195,7 @@ static int __netif_receive_skb_one_core(struct sk_buff *skb, bool pfmemalloc)
*
* More direct receive version of netif_receive_skb(). It should
* only be used by callers that have a need to skip RPS and Generic XDP.
- * Caller must also take care of handling if (page_is_)pfmemalloc.
+ * Caller must also take care of handling if ``(page_is_)pfmemalloc``.
*
* This function may only be called from softirq context and interrupts
* should be enabled.
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 5e220809844c..b831c5545d6a 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -3352,34 +3352,41 @@ devlink_param_value_get_from_info(const struct devlink_param *param,
struct genl_info *info,
union devlink_param_value *value)
{
+ struct nlattr *param_data;
int len;
- if (param->type != DEVLINK_PARAM_TYPE_BOOL &&
- !info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA])
+ param_data = info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA];
+
+ if (param->type != DEVLINK_PARAM_TYPE_BOOL && !param_data)
return -EINVAL;
switch (param->type) {
case DEVLINK_PARAM_TYPE_U8:
- value->vu8 = nla_get_u8(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]);
+ if (nla_len(param_data) != sizeof(u8))
+ return -EINVAL;
+ value->vu8 = nla_get_u8(param_data);
break;
case DEVLINK_PARAM_TYPE_U16:
- value->vu16 = nla_get_u16(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]);
+ if (nla_len(param_data) != sizeof(u16))
+ return -EINVAL;
+ value->vu16 = nla_get_u16(param_data);
break;
case DEVLINK_PARAM_TYPE_U32:
- value->vu32 = nla_get_u32(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]);
+ if (nla_len(param_data) != sizeof(u32))
+ return -EINVAL;
+ value->vu32 = nla_get_u32(param_data);
break;
case DEVLINK_PARAM_TYPE_STRING:
- len = strnlen(nla_data(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]),
- nla_len(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]));
- if (len == nla_len(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]) ||
+ len = strnlen(nla_data(param_data), nla_len(param_data));
+ if (len == nla_len(param_data) ||
len >= __DEVLINK_PARAM_MAX_STRING_VALUE)
return -EINVAL;
- strcpy(value->vstr,
- nla_data(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]));
+ strcpy(value->vstr, nla_data(param_data));
break;
case DEVLINK_PARAM_TYPE_BOOL:
- value->vbool = info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA] ?
- true : false;
+ if (param_data && nla_len(param_data))
+ return -EINVAL;
+ value->vbool = nla_get_flag(param_data);
break;
}
return 0;
@@ -5951,6 +5958,8 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
[DEVLINK_ATTR_PARAM_VALUE_CMODE] = { .type = NLA_U8 },
[DEVLINK_ATTR_REGION_NAME] = { .type = NLA_NUL_STRING },
[DEVLINK_ATTR_REGION_SNAPSHOT_ID] = { .type = NLA_U32 },
+ [DEVLINK_ATTR_REGION_CHUNK_ADDR] = { .type = NLA_U64 },
+ [DEVLINK_ATTR_REGION_CHUNK_LEN] = { .type = NLA_U64 },
[DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING },
[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] = { .type = NLA_U64 },
[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER] = { .type = NLA_U8 },
diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c
index 0642f91c4038..b4c87fe31be2 100644
--- a/net/core/netclassid_cgroup.c
+++ b/net/core/netclassid_cgroup.c
@@ -53,30 +53,60 @@ static void cgrp_css_free(struct cgroup_subsys_state *css)
kfree(css_cls_state(css));
}
+/*
+ * To avoid freezing of sockets creation for tasks with big number of threads
+ * and opened sockets lets release file_lock every 1000 iterated descriptors.
+ * New sockets will already have been created with new classid.
+ */
+
+struct update_classid_context {
+ u32 classid;
+ unsigned int batch;
+};
+
+#define UPDATE_CLASSID_BATCH 1000
+
static int update_classid_sock(const void *v, struct file *file, unsigned n)
{
int err;
+ struct update_classid_context *ctx = (void *)v;
struct socket *sock = sock_from_file(file, &err);
if (sock) {
spin_lock(&cgroup_sk_update_lock);
- sock_cgroup_set_classid(&sock->sk->sk_cgrp_data,
- (unsigned long)v);
+ sock_cgroup_set_classid(&sock->sk->sk_cgrp_data, ctx->classid);
spin_unlock(&cgroup_sk_update_lock);
}
+ if (--ctx->batch == 0) {
+ ctx->batch = UPDATE_CLASSID_BATCH;
+ return n + 1;
+ }
return 0;
}
+static void update_classid_task(struct task_struct *p, u32 classid)
+{
+ struct update_classid_context ctx = {
+ .classid = classid,
+ .batch = UPDATE_CLASSID_BATCH
+ };
+ unsigned int fd = 0;
+
+ do {
+ task_lock(p);
+ fd = iterate_fd(p->files, fd, update_classid_sock, &ctx);
+ task_unlock(p);
+ cond_resched();
+ } while (fd);
+}
+
static void cgrp_attach(struct cgroup_taskset *tset)
{
struct cgroup_subsys_state *css;
struct task_struct *p;
cgroup_taskset_for_each(p, css, tset) {
- task_lock(p);
- iterate_fd(p->files, 0, update_classid_sock,
- (void *)(unsigned long)css_cls_state(css)->classid);
- task_unlock(p);
+ update_classid_task(p, css_cls_state(css)->classid);
}
}
@@ -98,10 +128,7 @@ static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft,
css_task_iter_start(css, 0, &it);
while ((p = css_task_iter_next(&it))) {
- task_lock(p);
- iterate_fd(p->files, 0, update_classid_sock,
- (void *)(unsigned long)cs->classid);
- task_unlock(p);
+ update_classid_task(p, cs->classid);
cond_resched();
}
css_task_iter_end(&it);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index acc849df60b5..d0641bba6b81 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3362,7 +3362,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
/* skb was 'freed' by stack, so clean few
* bits and reuse it
*/
- skb_reset_tc(skb);
+ skb_reset_redirect(skb);
} while (--burst > 0);
goto out; /* Skips xmit_mode M_START_XMIT */
} else if (pkt_dev->xmit_mode == M_QUEUE_XMIT) {
diff --git a/net/core/sock.c b/net/core/sock.c
index a4c8fac781ff..8f71684305c3 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1830,7 +1830,10 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
atomic_set(&newsk->sk_zckey, 0);
sock_reset_flag(newsk, SOCK_DONE);
- mem_cgroup_sk_alloc(newsk);
+
+ /* sk->sk_memcg will be populated at accept() time */
+ newsk->sk_memcg = NULL;
+
cgroup_sk_alloc(&newsk->sk_cgrp_data);
rcu_read_lock();
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 085cef5857bb..b70c844a88ec 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -233,8 +233,11 @@ static void sock_map_free(struct bpf_map *map)
struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
int i;
+ /* After the sync no updates or deletes will be in-flight so it
+ * is safe to walk map and remove entries without risking a race
+ * in EEXIST update case.
+ */
synchronize_rcu();
- raw_spin_lock_bh(&stab->lock);
for (i = 0; i < stab->map.max_entries; i++) {
struct sock **psk = &stab->sks[i];
struct sock *sk;
@@ -248,7 +251,6 @@ static void sock_map_free(struct bpf_map *map)
release_sock(sk);
}
}
- raw_spin_unlock_bh(&stab->lock);
/* wait for psock readers accessing its map link */
synchronize_rcu();
@@ -863,10 +865,13 @@ static void sock_hash_free(struct bpf_map *map)
struct hlist_node *node;
int i;
+ /* After the sync no updates or deletes will be in-flight so it
+ * is safe to walk map and remove entries without risking a race
+ * in EEXIST update case.
+ */
synchronize_rcu();
for (i = 0; i < htab->buckets_num; i++) {
bucket = sock_hash_select_bucket(htab, i);
- raw_spin_lock_bh(&bucket->lock);
hlist_for_each_entry_safe(elem, node, &bucket->head, node) {
hlist_del_rcu(&elem->node);
lock_sock(elem->sk);
@@ -875,7 +880,6 @@ static void sock_hash_free(struct bpf_map *map)
rcu_read_unlock();
release_sock(elem->sk);
}
- raw_spin_unlock_bh(&bucket->lock);
}
/* wait for psock readers accessing its map link */
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index a7662e7a691d..760e6ea3178a 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -117,7 +117,9 @@ static inline struct net_device *dsa_master_find_slave(struct net_device *dev,
/* port.c */
int dsa_port_set_state(struct dsa_port *dp, u8 state,
struct switchdev_trans *trans);
+int dsa_port_enable_rt(struct dsa_port *dp, struct phy_device *phy);
int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy);
+void dsa_port_disable_rt(struct dsa_port *dp);
void dsa_port_disable(struct dsa_port *dp);
int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br);
void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br);
diff --git a/net/dsa/port.c b/net/dsa/port.c
index 774facb8d547..ec13dc666788 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -63,7 +63,7 @@ static void dsa_port_set_state_now(struct dsa_port *dp, u8 state)
pr_err("DSA: failed to set STP state %u (%d)\n", state, err);
}
-int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy)
+int dsa_port_enable_rt(struct dsa_port *dp, struct phy_device *phy)
{
struct dsa_switch *ds = dp->ds;
int port = dp->index;
@@ -78,14 +78,31 @@ int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy)
if (!dp->bridge_dev)
dsa_port_set_state_now(dp, BR_STATE_FORWARDING);
+ if (dp->pl)
+ phylink_start(dp->pl);
+
return 0;
}
-void dsa_port_disable(struct dsa_port *dp)
+int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy)
+{
+ int err;
+
+ rtnl_lock();
+ err = dsa_port_enable_rt(dp, phy);
+ rtnl_unlock();
+
+ return err;
+}
+
+void dsa_port_disable_rt(struct dsa_port *dp)
{
struct dsa_switch *ds = dp->ds;
int port = dp->index;
+ if (dp->pl)
+ phylink_stop(dp->pl);
+
if (!dp->bridge_dev)
dsa_port_set_state_now(dp, BR_STATE_DISABLED);
@@ -93,6 +110,13 @@ void dsa_port_disable(struct dsa_port *dp)
ds->ops->port_disable(ds, port);
}
+void dsa_port_disable(struct dsa_port *dp)
+{
+ rtnl_lock();
+ dsa_port_disable_rt(dp);
+ rtnl_unlock();
+}
+
int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br)
{
struct dsa_notifier_bridge_info info = {
@@ -614,10 +638,6 @@ static int dsa_port_phylink_register(struct dsa_port *dp)
goto err_phy_connect;
}
- rtnl_lock();
- phylink_start(dp->pl);
- rtnl_unlock();
-
return 0;
err_phy_connect:
@@ -628,9 +648,14 @@ err_phy_connect:
int dsa_port_link_register_of(struct dsa_port *dp)
{
struct dsa_switch *ds = dp->ds;
+ struct device_node *phy_np;
- if (!ds->ops->adjust_link)
- return dsa_port_phylink_register(dp);
+ if (!ds->ops->adjust_link) {
+ phy_np = of_parse_phandle(dp->dn, "phy-handle", 0);
+ if (of_phy_is_fixed_link(dp->dn) || phy_np)
+ return dsa_port_phylink_register(dp);
+ return 0;
+ }
dev_warn(ds->dev,
"Using legacy PHYLIB callbacks. Please migrate to PHYLINK!\n");
@@ -645,11 +670,12 @@ void dsa_port_link_unregister_of(struct dsa_port *dp)
{
struct dsa_switch *ds = dp->ds;
- if (!ds->ops->adjust_link) {
+ if (!ds->ops->adjust_link && dp->pl) {
rtnl_lock();
phylink_disconnect_phy(dp->pl);
rtnl_unlock();
phylink_destroy(dp->pl);
+ dp->pl = NULL;
return;
}
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 088c886e609e..ddc0f9236928 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -88,12 +88,10 @@ static int dsa_slave_open(struct net_device *dev)
goto clear_allmulti;
}
- err = dsa_port_enable(dp, dev->phydev);
+ err = dsa_port_enable_rt(dp, dev->phydev);
if (err)
goto clear_promisc;
- phylink_start(dp->pl);
-
return 0;
clear_promisc:
@@ -114,9 +112,7 @@ static int dsa_slave_close(struct net_device *dev)
struct net_device *master = dsa_slave_to_master(dev);
struct dsa_port *dp = dsa_slave_to_port(dev);
- phylink_stop(dp->pl);
-
- dsa_port_disable(dp);
+ dsa_port_disable_rt(dp);
dev_mc_unsync(master, dev);
dev_uc_unsync(master, dev);
diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c
index 2fb6c26294b5..b97ad93d1c1a 100644
--- a/net/dsa/tag_8021q.c
+++ b/net/dsa/tag_8021q.c
@@ -298,47 +298,4 @@ struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev,
}
EXPORT_SYMBOL_GPL(dsa_8021q_xmit);
-/* In the DSA packet_type handler, skb->data points in the middle of the VLAN
- * tag, after tpid and before tci. This is because so far, ETH_HLEN
- * (DMAC, SMAC, EtherType) bytes were pulled.
- * There are 2 bytes of VLAN tag left in skb->data, and upper
- * layers expect the 'real' EtherType to be consumed as well.
- * Coincidentally, a VLAN header is also of the same size as
- * the number of bytes that need to be pulled.
- *
- * skb_mac_header skb->data
- * | |
- * v v
- * | | | | | | | | | | | | | | | | | | |
- * +-----------------------+-----------------------+-------+-------+-------+
- * | Destination MAC | Source MAC | TPID | TCI | EType |
- * +-----------------------+-----------------------+-------+-------+-------+
- * ^ | |
- * |<--VLAN_HLEN-->to <---VLAN_HLEN--->
- * from |
- * >>>>>>> v
- * >>>>>>> | | | | | | | | | | | | | | |
- * >>>>>>> +-----------------------+-----------------------+-------+
- * >>>>>>> | Destination MAC | Source MAC | EType |
- * +-----------------------+-----------------------+-------+
- * ^ ^
- * (now part of | |
- * skb->head) skb_mac_header skb->data
- */
-struct sk_buff *dsa_8021q_remove_header(struct sk_buff *skb)
-{
- u8 *from = skb_mac_header(skb);
- u8 *dest = from + VLAN_HLEN;
-
- memmove(dest, from, ETH_HLEN - VLAN_HLEN);
- skb_pull(skb, VLAN_HLEN);
- skb_push(skb, ETH_HLEN);
- skb_reset_mac_header(skb);
- skb_reset_mac_len(skb);
- skb_pull_rcsum(skb, ETH_HLEN);
-
- return skb;
-}
-EXPORT_SYMBOL_GPL(dsa_8021q_remove_header);
-
MODULE_LICENSE("GPL v2");
diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c
index 9c3114179690..9169b63a89e3 100644
--- a/net/dsa/tag_brcm.c
+++ b/net/dsa/tag_brcm.c
@@ -140,6 +140,8 @@ static struct sk_buff *brcm_tag_rcv_ll(struct sk_buff *skb,
/* Remove Broadcom tag and update checksum */
skb_pull_rcsum(skb, BRCM_TAG_LEN);
+ skb->offload_fwd_mark = 1;
+
return skb;
}
#endif
diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c
index 5366ea430349..d553bf36bd41 100644
--- a/net/dsa/tag_sja1105.c
+++ b/net/dsa/tag_sja1105.c
@@ -250,14 +250,14 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb,
{
struct sja1105_meta meta = {0};
int source_port, switch_id;
- struct vlan_ethhdr *hdr;
+ struct ethhdr *hdr;
u16 tpid, vid, tci;
bool is_link_local;
bool is_tagged;
bool is_meta;
- hdr = vlan_eth_hdr(skb);
- tpid = ntohs(hdr->h_vlan_proto);
+ hdr = eth_hdr(skb);
+ tpid = ntohs(hdr->h_proto);
is_tagged = (tpid == ETH_P_SJA1105);
is_link_local = sja1105_is_link_local(skb);
is_meta = sja1105_is_meta_frame(skb);
@@ -266,7 +266,12 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb,
if (is_tagged) {
/* Normal traffic path. */
- tci = ntohs(hdr->h_vlan_TCI);
+ skb_push_rcsum(skb, ETH_HLEN);
+ __skb_vlan_pop(skb, &tci);
+ skb_pull_rcsum(skb, ETH_HLEN);
+ skb_reset_network_header(skb);
+ skb_reset_transport_header(skb);
+
vid = tci & VLAN_VID_MASK;
source_port = dsa_8021q_rx_source_port(vid);
switch_id = dsa_8021q_rx_switch_id(vid);
@@ -295,12 +300,6 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb,
return NULL;
}
- /* Delete/overwrite fake VLAN header, DSA expects to not find
- * it there, see dsa_switch_rcv: skb_push(skb, ETH_HLEN).
- */
- if (is_tagged)
- skb = dsa_8021q_remove_header(skb);
-
return sja1105_rcv_meta_state_machine(skb, &meta, is_link_local,
is_meta);
}
diff --git a/net/ethtool/debug.c b/net/ethtool/debug.c
index aaef4843e6ba..92599ad7b3c2 100644
--- a/net/ethtool/debug.c
+++ b/net/ethtool/debug.c
@@ -107,8 +107,9 @@ int ethnl_set_debug(struct sk_buff *skb, struct genl_info *info)
if (ret < 0)
return ret;
dev = req_info.dev;
+ ret = -EOPNOTSUPP;
if (!dev->ethtool_ops->get_msglevel || !dev->ethtool_ops->set_msglevel)
- return -EOPNOTSUPP;
+ goto out_dev;
rtnl_lock();
ret = ethnl_ops_begin(dev);
@@ -129,6 +130,7 @@ out_ops:
ethnl_ops_complete(dev);
out_rtnl:
rtnl_unlock();
+out_dev:
dev_put(dev);
return ret;
}
diff --git a/net/ethtool/linkinfo.c b/net/ethtool/linkinfo.c
index 5d16cb4e8693..6e9e0b590bb5 100644
--- a/net/ethtool/linkinfo.c
+++ b/net/ethtool/linkinfo.c
@@ -126,9 +126,10 @@ int ethnl_set_linkinfo(struct sk_buff *skb, struct genl_info *info)
if (ret < 0)
return ret;
dev = req_info.dev;
+ ret = -EOPNOTSUPP;
if (!dev->ethtool_ops->get_link_ksettings ||
!dev->ethtool_ops->set_link_ksettings)
- return -EOPNOTSUPP;
+ goto out_dev;
rtnl_lock();
ret = ethnl_ops_begin(dev);
@@ -162,6 +163,7 @@ out_ops:
ethnl_ops_complete(dev);
out_rtnl:
rtnl_unlock();
+out_dev:
dev_put(dev);
return ret;
}
diff --git a/net/ethtool/linkmodes.c b/net/ethtool/linkmodes.c
index 96f20be64553..18cc37be2d9c 100644
--- a/net/ethtool/linkmodes.c
+++ b/net/ethtool/linkmodes.c
@@ -338,9 +338,10 @@ int ethnl_set_linkmodes(struct sk_buff *skb, struct genl_info *info)
if (ret < 0)
return ret;
dev = req_info.dev;
+ ret = -EOPNOTSUPP;
if (!dev->ethtool_ops->get_link_ksettings ||
!dev->ethtool_ops->set_link_ksettings)
- return -EOPNOTSUPP;
+ goto out_dev;
rtnl_lock();
ret = ethnl_ops_begin(dev);
@@ -370,6 +371,7 @@ out_ops:
ethnl_ops_complete(dev);
out_rtnl:
rtnl_unlock();
+out_dev:
dev_put(dev);
return ret;
}
diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
index 180c194fab07..fc9e0b806889 100644
--- a/net/ethtool/netlink.c
+++ b/net/ethtool/netlink.c
@@ -40,6 +40,7 @@ int ethnl_parse_header(struct ethnl_req_info *req_info,
struct nlattr *tb[ETHTOOL_A_HEADER_MAX + 1];
const struct nlattr *devname_attr;
struct net_device *dev = NULL;
+ u32 flags = 0;
int ret;
if (!header) {
@@ -50,8 +51,17 @@ int ethnl_parse_header(struct ethnl_req_info *req_info,
ethnl_header_policy, extack);
if (ret < 0)
return ret;
- devname_attr = tb[ETHTOOL_A_HEADER_DEV_NAME];
+ if (tb[ETHTOOL_A_HEADER_FLAGS]) {
+ flags = nla_get_u32(tb[ETHTOOL_A_HEADER_FLAGS]);
+ if (flags & ~ETHTOOL_FLAG_ALL) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_HEADER_FLAGS],
+ "unrecognized request flags");
+ nl_set_extack_cookie_u32(extack, ETHTOOL_FLAG_ALL);
+ return -EOPNOTSUPP;
+ }
+ }
+ devname_attr = tb[ETHTOOL_A_HEADER_DEV_NAME];
if (tb[ETHTOOL_A_HEADER_DEV_INDEX]) {
u32 ifindex = nla_get_u32(tb[ETHTOOL_A_HEADER_DEV_INDEX]);
@@ -90,9 +100,7 @@ int ethnl_parse_header(struct ethnl_req_info *req_info,
}
req_info->dev = dev;
- if (tb[ETHTOOL_A_HEADER_FLAGS])
- req_info->flags = nla_get_u32(tb[ETHTOOL_A_HEADER_FLAGS]);
-
+ req_info->flags = flags;
return 0;
}
diff --git a/net/ethtool/wol.c b/net/ethtool/wol.c
index e1b8a65b64c4..55e1ecaaf739 100644
--- a/net/ethtool/wol.c
+++ b/net/ethtool/wol.c
@@ -128,8 +128,9 @@ int ethnl_set_wol(struct sk_buff *skb, struct genl_info *info)
if (ret < 0)
return ret;
dev = req_info.dev;
+ ret = -EOPNOTSUPP;
if (!dev->ethtool_ops->get_wol || !dev->ethtool_ops->set_wol)
- return -EOPNOTSUPP;
+ goto out_dev;
rtnl_lock();
ret = ethnl_ops_begin(dev);
@@ -172,6 +173,7 @@ out_ops:
ethnl_ops_complete(dev);
out_rtnl:
rtnl_unlock();
+out_dev:
dev_put(dev);
return ret;
}
diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
index 3ba7f61be107..a64bb64935a6 100644
--- a/net/hsr/hsr_framereg.c
+++ b/net/hsr/hsr_framereg.c
@@ -482,12 +482,9 @@ int hsr_get_node_data(struct hsr_priv *hsr,
struct hsr_port *port;
unsigned long tdiff;
- rcu_read_lock();
node = find_node_by_addr_A(&hsr->node_db, addr);
- if (!node) {
- rcu_read_unlock();
- return -ENOENT; /* No such entry */
- }
+ if (!node)
+ return -ENOENT;
ether_addr_copy(addr_b, node->macaddress_B);
@@ -522,7 +519,5 @@ int hsr_get_node_data(struct hsr_priv *hsr,
*addr_b_ifindex = -1;
}
- rcu_read_unlock();
-
return 0;
}
diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c
index 8dc0547f01d0..fae21c863b1f 100644
--- a/net/hsr/hsr_netlink.c
+++ b/net/hsr/hsr_netlink.c
@@ -251,15 +251,16 @@ static int hsr_get_node_status(struct sk_buff *skb_in, struct genl_info *info)
if (!na)
goto invalid;
- hsr_dev = __dev_get_by_index(genl_info_net(info),
- nla_get_u32(info->attrs[HSR_A_IFINDEX]));
+ rcu_read_lock();
+ hsr_dev = dev_get_by_index_rcu(genl_info_net(info),
+ nla_get_u32(info->attrs[HSR_A_IFINDEX]));
if (!hsr_dev)
- goto invalid;
+ goto rcu_unlock;
if (!is_hsr_master(hsr_dev))
- goto invalid;
+ goto rcu_unlock;
/* Send reply */
- skb_out = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ skb_out = genlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC);
if (!skb_out) {
res = -ENOMEM;
goto fail;
@@ -313,12 +314,10 @@ static int hsr_get_node_status(struct sk_buff *skb_in, struct genl_info *info)
res = nla_put_u16(skb_out, HSR_A_IF1_SEQ, hsr_node_if1_seq);
if (res < 0)
goto nla_put_failure;
- rcu_read_lock();
port = hsr_port_get_hsr(hsr, HSR_PT_SLAVE_A);
if (port)
res = nla_put_u32(skb_out, HSR_A_IF1_IFINDEX,
port->dev->ifindex);
- rcu_read_unlock();
if (res < 0)
goto nla_put_failure;
@@ -328,20 +327,22 @@ static int hsr_get_node_status(struct sk_buff *skb_in, struct genl_info *info)
res = nla_put_u16(skb_out, HSR_A_IF2_SEQ, hsr_node_if2_seq);
if (res < 0)
goto nla_put_failure;
- rcu_read_lock();
port = hsr_port_get_hsr(hsr, HSR_PT_SLAVE_B);
if (port)
res = nla_put_u32(skb_out, HSR_A_IF2_IFINDEX,
port->dev->ifindex);
- rcu_read_unlock();
if (res < 0)
goto nla_put_failure;
+ rcu_read_unlock();
+
genlmsg_end(skb_out, msg_head);
genlmsg_unicast(genl_info_net(info), skb_out, info->snd_portid);
return 0;
+rcu_unlock:
+ rcu_read_unlock();
invalid:
netlink_ack(skb_in, nlmsg_hdr(skb_in), -EINVAL, NULL);
return 0;
@@ -351,6 +352,7 @@ nla_put_failure:
/* Fall through */
fail:
+ rcu_read_unlock();
return res;
}
@@ -358,16 +360,14 @@ fail:
*/
static int hsr_get_node_list(struct sk_buff *skb_in, struct genl_info *info)
{
- /* For receiving */
- struct nlattr *na;
+ unsigned char addr[ETH_ALEN];
struct net_device *hsr_dev;
-
- /* For sending */
struct sk_buff *skb_out;
- void *msg_head;
struct hsr_priv *hsr;
- void *pos;
- unsigned char addr[ETH_ALEN];
+ bool restart = false;
+ struct nlattr *na;
+ void *pos = NULL;
+ void *msg_head;
int res;
if (!info)
@@ -377,15 +377,17 @@ static int hsr_get_node_list(struct sk_buff *skb_in, struct genl_info *info)
if (!na)
goto invalid;
- hsr_dev = __dev_get_by_index(genl_info_net(info),
- nla_get_u32(info->attrs[HSR_A_IFINDEX]));
+ rcu_read_lock();
+ hsr_dev = dev_get_by_index_rcu(genl_info_net(info),
+ nla_get_u32(info->attrs[HSR_A_IFINDEX]));
if (!hsr_dev)
- goto invalid;
+ goto rcu_unlock;
if (!is_hsr_master(hsr_dev))
- goto invalid;
+ goto rcu_unlock;
+restart:
/* Send reply */
- skb_out = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ skb_out = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_ATOMIC);
if (!skb_out) {
res = -ENOMEM;
goto fail;
@@ -399,18 +401,26 @@ static int hsr_get_node_list(struct sk_buff *skb_in, struct genl_info *info)
goto nla_put_failure;
}
- res = nla_put_u32(skb_out, HSR_A_IFINDEX, hsr_dev->ifindex);
- if (res < 0)
- goto nla_put_failure;
+ if (!restart) {
+ res = nla_put_u32(skb_out, HSR_A_IFINDEX, hsr_dev->ifindex);
+ if (res < 0)
+ goto nla_put_failure;
+ }
hsr = netdev_priv(hsr_dev);
- rcu_read_lock();
- pos = hsr_get_next_node(hsr, NULL, addr);
+ if (!pos)
+ pos = hsr_get_next_node(hsr, NULL, addr);
while (pos) {
res = nla_put(skb_out, HSR_A_NODE_ADDR, ETH_ALEN, addr);
if (res < 0) {
- rcu_read_unlock();
+ if (res == -EMSGSIZE) {
+ genlmsg_end(skb_out, msg_head);
+ genlmsg_unicast(genl_info_net(info), skb_out,
+ info->snd_portid);
+ restart = true;
+ goto restart;
+ }
goto nla_put_failure;
}
pos = hsr_get_next_node(hsr, pos, addr);
@@ -422,15 +432,18 @@ static int hsr_get_node_list(struct sk_buff *skb_in, struct genl_info *info)
return 0;
+rcu_unlock:
+ rcu_read_unlock();
invalid:
netlink_ack(skb_in, nlmsg_hdr(skb_in), -EINVAL, NULL);
return 0;
nla_put_failure:
- kfree_skb(skb_out);
+ nlmsg_free(skb_out);
/* Fall through */
fail:
+ rcu_read_unlock();
return res;
}
@@ -457,6 +470,7 @@ static struct genl_family hsr_genl_family __ro_after_init = {
.version = 1,
.maxattr = HSR_A_MAX,
.policy = hsr_genl_policy,
+ .netnsok = true,
.module = THIS_MODULE,
.ops = hsr_ops,
.n_ops = ARRAY_SIZE(hsr_ops),
diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c
index fbfd0db182b7..a9104d42aafb 100644
--- a/net/hsr/hsr_slave.c
+++ b/net/hsr/hsr_slave.c
@@ -145,16 +145,16 @@ int hsr_add_port(struct hsr_priv *hsr, struct net_device *dev,
if (!port)
return -ENOMEM;
+ port->hsr = hsr;
+ port->dev = dev;
+ port->type = type;
+
if (type != HSR_PT_MASTER) {
res = hsr_portdev_setup(dev, port);
if (res)
goto fail_dev_setup;
}
- port->hsr = hsr;
- port->dev = dev;
- port->type = type;
-
list_add_tail_rcu(&port->port_list, &hsr->ports);
synchronize_rcu();
diff --git a/net/ieee802154/nl_policy.c b/net/ieee802154/nl_policy.c
index 2c7a38d76a3a..0672b2f01586 100644
--- a/net/ieee802154/nl_policy.c
+++ b/net/ieee802154/nl_policy.c
@@ -21,7 +21,13 @@ const struct nla_policy ieee802154_policy[IEEE802154_ATTR_MAX + 1] = {
[IEEE802154_ATTR_HW_ADDR] = { .type = NLA_HW_ADDR, },
[IEEE802154_ATTR_PAN_ID] = { .type = NLA_U16, },
[IEEE802154_ATTR_CHANNEL] = { .type = NLA_U8, },
+ [IEEE802154_ATTR_BCN_ORD] = { .type = NLA_U8, },
+ [IEEE802154_ATTR_SF_ORD] = { .type = NLA_U8, },
+ [IEEE802154_ATTR_PAN_COORD] = { .type = NLA_U8, },
+ [IEEE802154_ATTR_BAT_EXT] = { .type = NLA_U8, },
+ [IEEE802154_ATTR_COORD_REALIGN] = { .type = NLA_U8, },
[IEEE802154_ATTR_PAGE] = { .type = NLA_U8, },
+ [IEEE802154_ATTR_DEV_TYPE] = { .type = NLA_U8, },
[IEEE802154_ATTR_COORD_SHORT_ADDR] = { .type = NLA_U16, },
[IEEE802154_ATTR_COORD_HW_ADDR] = { .type = NLA_HW_ADDR, },
[IEEE802154_ATTR_COORD_PAN_ID] = { .type = NLA_U16, },
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index f96bd489b362..25a8888826b8 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -129,7 +129,7 @@ config IP_PNP_DHCP
If unsure, say Y. Note that if you want to use DHCP, a DHCP server
must be operating on your network. Read
- <file:Documentation/filesystems/nfs/nfsroot.txt> for details.
+ <file:Documentation/admin-guide/nfs/nfsroot.rst> for details.
config IP_PNP_BOOTP
bool "IP: BOOTP support"
@@ -144,7 +144,7 @@ config IP_PNP_BOOTP
does BOOTP itself, providing all necessary information on the kernel
command line, you can say N here. If unsure, say Y. Note that if you
want to use BOOTP, a BOOTP server must be operating on your network.
- Read <file:Documentation/filesystems/nfs/nfsroot.txt> for details.
+ Read <file:Documentation/admin-guide/nfs/nfsroot.rst> for details.
config IP_PNP_RARP
bool "IP: RARP support"
@@ -157,7 +157,7 @@ config IP_PNP_RARP
older protocol which is being obsoleted by BOOTP and DHCP), say Y
here. Note that if you want to use RARP, a RARP server must be
operating on your network. Read
- <file:Documentation/filesystems/nfs/nfsroot.txt> for details.
+ <file:Documentation/admin-guide/nfs/nfsroot.rst> for details.
config NET_IPIP
tristate "IP: tunneling"
@@ -303,6 +303,7 @@ config SYN_COOKIES
config NET_IPVTI
tristate "Virtual (secure) IP: tunneling"
+ depends on IPV6 || IPV6=n
select INET_TUNNEL
select NET_IP_TUNNEL
select XFRM
diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
index 574972bc7299..2bf3abeb1456 100644
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -184,7 +184,6 @@ static int bpf_tcp_ca_init_member(const struct btf_type *t,
{
const struct tcp_congestion_ops *utcp_ca;
struct tcp_congestion_ops *tcp_ca;
- size_t tcp_ca_name_len;
int prog_fd;
u32 moff;
@@ -199,13 +198,11 @@ static int bpf_tcp_ca_init_member(const struct btf_type *t,
tcp_ca->flags = utcp_ca->flags;
return 1;
case offsetof(struct tcp_congestion_ops, name):
- tcp_ca_name_len = strnlen(utcp_ca->name, sizeof(utcp_ca->name));
- if (!tcp_ca_name_len ||
- tcp_ca_name_len == sizeof(utcp_ca->name))
+ if (bpf_obj_name_cpy(tcp_ca->name, utcp_ca->name,
+ sizeof(tcp_ca->name)) <= 0)
return -EINVAL;
if (tcp_ca_find(utcp_ca->name))
return -EEXIST;
- memcpy(tcp_ca->name, utcp_ca->name, sizeof(tcp_ca->name));
return 1;
}
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 577db1d50a24..213be9c050ad 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -997,7 +997,9 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
return -ENOENT;
}
+ rcu_read_lock();
err = fib_table_dump(tb, skb, cb, &filter);
+ rcu_read_unlock();
return skb->len ? : err;
}
diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c
index 5fd6e8ed02b5..66fdbfe5447c 100644
--- a/net/ipv4/gre_demux.c
+++ b/net/ipv4/gre_demux.c
@@ -56,7 +56,9 @@ int gre_del_protocol(const struct gre_protocol *proto, u8 version)
}
EXPORT_SYMBOL_GPL(gre_del_protocol);
-/* Fills in tpi and returns header length to be pulled. */
+/* Fills in tpi and returns header length to be pulled.
+ * Note that caller must use pskb_may_pull() before pulling GRE header.
+ */
int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
bool *csum_err, __be16 proto, int nhs)
{
@@ -110,8 +112,14 @@ int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
* - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
*/
if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) {
+ u8 _val, *val;
+
+ val = skb_header_pointer(skb, nhs + hdr_len,
+ sizeof(_val), &_val);
+ if (!val)
+ return -EINVAL;
tpi->proto = proto;
- if ((*(u8 *)options & 0xF0) != 0x40)
+ if ((*val & 0xF0) != 0x40)
hdr_len += 4;
}
tpi->hdr_len = hdr_len;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index a4db79b1b643..d545fb99a8a1 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -482,8 +482,28 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern)
}
spin_unlock_bh(&queue->fastopenq.lock);
}
+
out:
release_sock(sk);
+ if (newsk && mem_cgroup_sockets_enabled) {
+ int amt;
+
+ /* atomically get the memory usage, set and charge the
+ * newsk->sk_memcg.
+ */
+ lock_sock(newsk);
+
+ /* The socket has not been accepted yet, no need to look at
+ * newsk->sk_wmem_queued.
+ */
+ amt = sk_mem_pages(newsk->sk_forward_alloc +
+ atomic_read(&newsk->sk_rmem_alloc));
+ mem_cgroup_sk_alloc(newsk);
+ if (newsk->sk_memcg && amt)
+ mem_cgroup_charge_skmem(newsk->sk_memcg, amt);
+
+ release_sock(newsk);
+ }
if (req)
reqsk_put(req);
return newsk;
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index f11e997e517b..8c8377568a78 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -100,13 +100,9 @@ static size_t inet_sk_attr_size(struct sock *sk,
aux = handler->idiag_get_aux_size(sk, net_admin);
return nla_total_size(sizeof(struct tcp_info))
- + nla_total_size(1) /* INET_DIAG_SHUTDOWN */
- + nla_total_size(1) /* INET_DIAG_TOS */
- + nla_total_size(1) /* INET_DIAG_TCLASS */
- + nla_total_size(4) /* INET_DIAG_MARK */
- + nla_total_size(4) /* INET_DIAG_CLASS_ID */
- + nla_total_size(sizeof(struct inet_diag_meminfo))
+ nla_total_size(sizeof(struct inet_diag_msg))
+ + inet_diag_msg_attrs_size()
+ + nla_total_size(sizeof(struct inet_diag_meminfo))
+ nla_total_size(SK_MEMINFO_VARS * sizeof(u32))
+ nla_total_size(TCP_CA_NAME_MAX)
+ nla_total_size(sizeof(struct tcpvegas_info))
@@ -147,6 +143,24 @@ int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, sk->sk_mark))
goto errout;
+ if (ext & (1 << (INET_DIAG_CLASS_ID - 1)) ||
+ ext & (1 << (INET_DIAG_TCLASS - 1))) {
+ u32 classid = 0;
+
+#ifdef CONFIG_SOCK_CGROUP_DATA
+ classid = sock_cgroup_classid(&sk->sk_cgrp_data);
+#endif
+ /* Fallback to socket priority if class id isn't set.
+ * Classful qdiscs use it as direct reference to class.
+ * For cgroup2 classid is always zero.
+ */
+ if (!classid)
+ classid = sk->sk_priority;
+
+ if (nla_put_u32(skb, INET_DIAG_CLASS_ID, classid))
+ goto errout;
+ }
+
r->idiag_uid = from_kuid_munged(user_ns, sock_i_uid(sk));
r->idiag_inode = sock_i_ino(sk);
@@ -284,24 +298,6 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
goto errout;
}
- if (ext & (1 << (INET_DIAG_CLASS_ID - 1)) ||
- ext & (1 << (INET_DIAG_TCLASS - 1))) {
- u32 classid = 0;
-
-#ifdef CONFIG_SOCK_CGROUP_DATA
- classid = sock_cgroup_classid(&sk->sk_cgrp_data);
-#endif
- /* Fallback to socket priority if class id isn't set.
- * Classful qdiscs use it as direct reference to class.
- * For cgroup2 classid is always zero.
- */
- if (!classid)
- classid = sk->sk_priority;
-
- if (nla_put_u32(skb, INET_DIAG_CLASS_ID, classid))
- goto errout;
- }
-
out:
nlmsg_end(skb, nlh);
return 0;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 8274f98c511c..029b24eeafba 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1153,6 +1153,24 @@ static int ipgre_netlink_parms(struct net_device *dev,
if (data[IFLA_GRE_FWMARK])
*fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]);
+ return 0;
+}
+
+static int erspan_netlink_parms(struct net_device *dev,
+ struct nlattr *data[],
+ struct nlattr *tb[],
+ struct ip_tunnel_parm *parms,
+ __u32 *fwmark)
+{
+ struct ip_tunnel *t = netdev_priv(dev);
+ int err;
+
+ err = ipgre_netlink_parms(dev, data, tb, parms, fwmark);
+ if (err)
+ return err;
+ if (!data)
+ return 0;
+
if (data[IFLA_GRE_ERSPAN_VER]) {
t->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
@@ -1276,45 +1294,70 @@ static void ipgre_tap_setup(struct net_device *dev)
ip_tunnel_setup(dev, gre_tap_net_id);
}
-static int ipgre_newlink(struct net *src_net, struct net_device *dev,
- struct nlattr *tb[], struct nlattr *data[],
- struct netlink_ext_ack *extack)
+static int
+ipgre_newlink_encap_setup(struct net_device *dev, struct nlattr *data[])
{
- struct ip_tunnel_parm p;
struct ip_tunnel_encap ipencap;
- __u32 fwmark = 0;
- int err;
if (ipgre_netlink_encap_parms(data, &ipencap)) {
struct ip_tunnel *t = netdev_priv(dev);
- err = ip_tunnel_encap_setup(t, &ipencap);
+ int err = ip_tunnel_encap_setup(t, &ipencap);
if (err < 0)
return err;
}
+ return 0;
+}
+
+static int ipgre_newlink(struct net *src_net, struct net_device *dev,
+ struct nlattr *tb[], struct nlattr *data[],
+ struct netlink_ext_ack *extack)
+{
+ struct ip_tunnel_parm p;
+ __u32 fwmark = 0;
+ int err;
+
+ err = ipgre_newlink_encap_setup(dev, data);
+ if (err)
+ return err;
+
err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
if (err < 0)
return err;
return ip_tunnel_newlink(dev, tb, &p, fwmark);
}
+static int erspan_newlink(struct net *src_net, struct net_device *dev,
+ struct nlattr *tb[], struct nlattr *data[],
+ struct netlink_ext_ack *extack)
+{
+ struct ip_tunnel_parm p;
+ __u32 fwmark = 0;
+ int err;
+
+ err = ipgre_newlink_encap_setup(dev, data);
+ if (err)
+ return err;
+
+ err = erspan_netlink_parms(dev, data, tb, &p, &fwmark);
+ if (err)
+ return err;
+ return ip_tunnel_newlink(dev, tb, &p, fwmark);
+}
+
static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
struct nlattr *data[],
struct netlink_ext_ack *extack)
{
struct ip_tunnel *t = netdev_priv(dev);
- struct ip_tunnel_encap ipencap;
__u32 fwmark = t->fwmark;
struct ip_tunnel_parm p;
int err;
- if (ipgre_netlink_encap_parms(data, &ipencap)) {
- err = ip_tunnel_encap_setup(t, &ipencap);
-
- if (err < 0)
- return err;
- }
+ err = ipgre_newlink_encap_setup(dev, data);
+ if (err)
+ return err;
err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
if (err < 0)
@@ -1327,8 +1370,34 @@ static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
t->parms.i_flags = p.i_flags;
t->parms.o_flags = p.o_flags;
- if (strcmp(dev->rtnl_link_ops->kind, "erspan"))
- ipgre_link_update(dev, !tb[IFLA_MTU]);
+ ipgre_link_update(dev, !tb[IFLA_MTU]);
+
+ return 0;
+}
+
+static int erspan_changelink(struct net_device *dev, struct nlattr *tb[],
+ struct nlattr *data[],
+ struct netlink_ext_ack *extack)
+{
+ struct ip_tunnel *t = netdev_priv(dev);
+ __u32 fwmark = t->fwmark;
+ struct ip_tunnel_parm p;
+ int err;
+
+ err = ipgre_newlink_encap_setup(dev, data);
+ if (err)
+ return err;
+
+ err = erspan_netlink_parms(dev, data, tb, &p, &fwmark);
+ if (err < 0)
+ return err;
+
+ err = ip_tunnel_changelink(dev, tb, &p, fwmark);
+ if (err < 0)
+ return err;
+
+ t->parms.i_flags = p.i_flags;
+ t->parms.o_flags = p.o_flags;
return 0;
}
@@ -1519,8 +1588,8 @@ static struct rtnl_link_ops erspan_link_ops __read_mostly = {
.priv_size = sizeof(struct ip_tunnel),
.setup = erspan_setup,
.validate = erspan_validate,
- .newlink = ipgre_newlink,
- .changelink = ipgre_changelink,
+ .newlink = erspan_newlink,
+ .changelink = erspan_changelink,
.dellink = ip_tunnel_dellink,
.get_size = ipgre_get_size,
.fill_info = ipgre_fill_info,
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 37cddd18f282..1b4e6f298648 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -187,17 +187,39 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
int mtu;
if (!dst) {
- struct rtable *rt;
-
- fl->u.ip4.flowi4_oif = dev->ifindex;
- fl->u.ip4.flowi4_flags |= FLOWI_FLAG_ANYSRC;
- rt = __ip_route_output_key(dev_net(dev), &fl->u.ip4);
- if (IS_ERR(rt)) {
+ switch (skb->protocol) {
+ case htons(ETH_P_IP): {
+ struct rtable *rt;
+
+ fl->u.ip4.flowi4_oif = dev->ifindex;
+ fl->u.ip4.flowi4_flags |= FLOWI_FLAG_ANYSRC;
+ rt = __ip_route_output_key(dev_net(dev), &fl->u.ip4);
+ if (IS_ERR(rt)) {
+ dev->stats.tx_carrier_errors++;
+ goto tx_error_icmp;
+ }
+ dst = &rt->dst;
+ skb_dst_set(skb, dst);
+ break;
+ }
+#if IS_ENABLED(CONFIG_IPV6)
+ case htons(ETH_P_IPV6):
+ fl->u.ip6.flowi6_oif = dev->ifindex;
+ fl->u.ip6.flowi6_flags |= FLOWI_FLAG_ANYSRC;
+ dst = ip6_route_output(dev_net(dev), NULL, &fl->u.ip6);
+ if (dst->error) {
+ dst_release(dst);
+ dst = NULL;
+ dev->stats.tx_carrier_errors++;
+ goto tx_error_icmp;
+ }
+ skb_dst_set(skb, dst);
+ break;
+#endif
+ default:
dev->stats.tx_carrier_errors++;
goto tx_error_icmp;
}
- dst = &rt->dst;
- skb_dst_set(skb, dst);
}
dst_hold(dst);
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 4438f6b12335..561f15b5a944 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -1621,7 +1621,7 @@ late_initcall(ip_auto_config);
/*
* Decode any IP configuration options in the "ip=" or "nfsaddrs=" kernel
- * command line parameter. See Documentation/filesystems/nfs/nfsroot.txt.
+ * command line parameter. See Documentation/admin-guide/nfs/nfsroot.rst.
*/
static int __init ic_proto_name(char *name)
{
diff --git a/net/ipv4/raw_diag.c b/net/ipv4/raw_diag.c
index e35736b99300..a93e7d1e1251 100644
--- a/net/ipv4/raw_diag.c
+++ b/net/ipv4/raw_diag.c
@@ -100,8 +100,9 @@ static int raw_diag_dump_one(struct sk_buff *in_skb,
if (IS_ERR(sk))
return PTR_ERR(sk);
- rep = nlmsg_new(sizeof(struct inet_diag_msg) +
- sizeof(struct inet_diag_meminfo) + 64,
+ rep = nlmsg_new(nla_total_size(sizeof(struct inet_diag_msg)) +
+ inet_diag_msg_attrs_size() +
+ nla_total_size(sizeof(struct inet_diag_meminfo)) + 64,
GFP_KERNEL);
if (!rep) {
sock_put(sk);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index eb2d80519f8e..dc77c303e6f7 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2948,8 +2948,10 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
err = -EPERM;
else if (tp->repair_queue == TCP_SEND_QUEUE)
WRITE_ONCE(tp->write_seq, val);
- else if (tp->repair_queue == TCP_RECV_QUEUE)
+ else if (tp->repair_queue == TCP_RECV_QUEUE) {
WRITE_ONCE(tp->rcv_nxt, val);
+ WRITE_ONCE(tp->copied_seq, val);
+ }
else
err = -EINVAL;
break;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 306e25d743e8..2f45cde168c4 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1109,6 +1109,10 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
if (unlikely(!skb))
return -ENOBUFS;
+ /* retransmit skbs might have a non zero value in skb->dev
+ * because skb->dev is aliased with skb->rbnode.rb_left
+ */
+ skb->dev = NULL;
}
inet = inet_sk(sk);
@@ -3037,8 +3041,12 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
tcp_skb_tsorted_save(skb) {
nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC);
- err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
- -ENOBUFS;
+ if (nskb) {
+ nskb->dev = NULL;
+ err = tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC);
+ } else {
+ err = -ENOBUFS;
+ }
} tcp_skb_tsorted_restore(skb);
if (!err) {
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
index 910555a4d9fe..dccd2286bc28 100644
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c
@@ -64,8 +64,9 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb,
goto out;
err = -ENOMEM;
- rep = nlmsg_new(sizeof(struct inet_diag_msg) +
- sizeof(struct inet_diag_meminfo) + 64,
+ rep = nlmsg_new(nla_total_size(sizeof(struct inet_diag_msg)) +
+ inet_diag_msg_attrs_size() +
+ nla_total_size(sizeof(struct inet_diag_meminfo)) + 64,
GFP_KERNEL);
if (!rep)
goto out;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index cb493e15959c..46d614b611db 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1226,11 +1226,13 @@ check_cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long *expires)
}
static void
-cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires, bool del_rt)
+cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires,
+ bool del_rt, bool del_peer)
{
struct fib6_info *f6i;
- f6i = addrconf_get_prefix_route(&ifp->addr, ifp->prefix_len,
+ f6i = addrconf_get_prefix_route(del_peer ? &ifp->peer_addr : &ifp->addr,
+ ifp->prefix_len,
ifp->idev->dev, 0, RTF_DEFAULT, true);
if (f6i) {
if (del_rt)
@@ -1293,7 +1295,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
if (action != CLEANUP_PREFIX_RT_NOP) {
cleanup_prefix_route(ifp, expires,
- action == CLEANUP_PREFIX_RT_DEL);
+ action == CLEANUP_PREFIX_RT_DEL, false);
}
/* clean up prefsrc entries */
@@ -3345,6 +3347,10 @@ static void addrconf_dev_config(struct net_device *dev)
(dev->type != ARPHRD_NONE) &&
(dev->type != ARPHRD_RAWIP)) {
/* Alas, we support only Ethernet autoconfiguration. */
+ idev = __in6_dev_get(dev);
+ if (!IS_ERR_OR_NULL(idev) && dev->flags & IFF_UP &&
+ dev->flags & IFF_MULTICAST)
+ ipv6_mc_up(idev);
return;
}
@@ -4586,12 +4592,14 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
}
static int modify_prefix_route(struct inet6_ifaddr *ifp,
- unsigned long expires, u32 flags)
+ unsigned long expires, u32 flags,
+ bool modify_peer)
{
struct fib6_info *f6i;
u32 prio;
- f6i = addrconf_get_prefix_route(&ifp->addr, ifp->prefix_len,
+ f6i = addrconf_get_prefix_route(modify_peer ? &ifp->peer_addr : &ifp->addr,
+ ifp->prefix_len,
ifp->idev->dev, 0, RTF_DEFAULT, true);
if (!f6i)
return -ENOENT;
@@ -4602,7 +4610,8 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp,
ip6_del_rt(dev_net(ifp->idev->dev), f6i);
/* add new one */
- addrconf_prefix_route(&ifp->addr, ifp->prefix_len,
+ addrconf_prefix_route(modify_peer ? &ifp->peer_addr : &ifp->addr,
+ ifp->prefix_len,
ifp->rt_priority, ifp->idev->dev,
expires, flags, GFP_KERNEL);
} else {
@@ -4624,6 +4633,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, struct ifa6_config *cfg)
unsigned long timeout;
bool was_managetempaddr;
bool had_prefixroute;
+ bool new_peer = false;
ASSERT_RTNL();
@@ -4655,6 +4665,13 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, struct ifa6_config *cfg)
cfg->preferred_lft = timeout;
}
+ if (cfg->peer_pfx &&
+ memcmp(&ifp->peer_addr, cfg->peer_pfx, sizeof(struct in6_addr))) {
+ if (!ipv6_addr_any(&ifp->peer_addr))
+ cleanup_prefix_route(ifp, expires, true, true);
+ new_peer = true;
+ }
+
spin_lock_bh(&ifp->lock);
was_managetempaddr = ifp->flags & IFA_F_MANAGETEMPADDR;
had_prefixroute = ifp->flags & IFA_F_PERMANENT &&
@@ -4670,6 +4687,9 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, struct ifa6_config *cfg)
if (cfg->rt_priority && cfg->rt_priority != ifp->rt_priority)
ifp->rt_priority = cfg->rt_priority;
+ if (new_peer)
+ ifp->peer_addr = *cfg->peer_pfx;
+
spin_unlock_bh(&ifp->lock);
if (!(ifp->flags&IFA_F_TENTATIVE))
ipv6_ifa_notify(0, ifp);
@@ -4678,7 +4698,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, struct ifa6_config *cfg)
int rc = -ENOENT;
if (had_prefixroute)
- rc = modify_prefix_route(ifp, expires, flags);
+ rc = modify_prefix_route(ifp, expires, flags, false);
/* prefix route could have been deleted; if so restore it */
if (rc == -ENOENT) {
@@ -4686,6 +4706,15 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, struct ifa6_config *cfg)
ifp->rt_priority, ifp->idev->dev,
expires, flags, GFP_KERNEL);
}
+
+ if (had_prefixroute && !ipv6_addr_any(&ifp->peer_addr))
+ rc = modify_prefix_route(ifp, expires, flags, true);
+
+ if (rc == -ENOENT && !ipv6_addr_any(&ifp->peer_addr)) {
+ addrconf_prefix_route(&ifp->peer_addr, ifp->prefix_len,
+ ifp->rt_priority, ifp->idev->dev,
+ expires, flags, GFP_KERNEL);
+ }
} else if (had_prefixroute) {
enum cleanup_prefix_rt_t action;
unsigned long rt_expires;
@@ -4696,7 +4725,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, struct ifa6_config *cfg)
if (action != CLEANUP_PREFIX_RT_NOP) {
cleanup_prefix_route(ifp, rt_expires,
- action == CLEANUP_PREFIX_RT_DEL);
+ action == CLEANUP_PREFIX_RT_DEL, false);
}
}
@@ -5983,9 +6012,9 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
if (ifp->idev->cnf.forwarding)
addrconf_join_anycast(ifp);
if (!ipv6_addr_any(&ifp->peer_addr))
- addrconf_prefix_route(&ifp->peer_addr, 128, 0,
- ifp->idev->dev, 0, 0,
- GFP_ATOMIC);
+ addrconf_prefix_route(&ifp->peer_addr, 128,
+ ifp->rt_priority, ifp->idev->dev,
+ 0, 0, GFP_ATOMIC);
break;
case RTM_DELADDR:
if (ifp->idev->cnf.forwarding)
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 524006aa0d78..cc6180e08a4f 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -311,7 +311,7 @@ static int vti6_rcv(struct sk_buff *skb)
if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
rcu_read_unlock();
- return 0;
+ goto discard;
}
ipv6h = ipv6_hdr(skb);
@@ -450,15 +450,33 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
int mtu;
if (!dst) {
- fl->u.ip6.flowi6_oif = dev->ifindex;
- fl->u.ip6.flowi6_flags |= FLOWI_FLAG_ANYSRC;
- dst = ip6_route_output(dev_net(dev), NULL, &fl->u.ip6);
- if (dst->error) {
- dst_release(dst);
- dst = NULL;
+ switch (skb->protocol) {
+ case htons(ETH_P_IP): {
+ struct rtable *rt;
+
+ fl->u.ip4.flowi4_oif = dev->ifindex;
+ fl->u.ip4.flowi4_flags |= FLOWI_FLAG_ANYSRC;
+ rt = __ip_route_output_key(dev_net(dev), &fl->u.ip4);
+ if (IS_ERR(rt))
+ goto tx_err_link_failure;
+ dst = &rt->dst;
+ skb_dst_set(skb, dst);
+ break;
+ }
+ case htons(ETH_P_IPV6):
+ fl->u.ip6.flowi6_oif = dev->ifindex;
+ fl->u.ip6.flowi6_flags |= FLOWI_FLAG_ANYSRC;
+ dst = ip6_route_output(dev_net(dev), NULL, &fl->u.ip6);
+ if (dst->error) {
+ dst_release(dst);
+ dst = NULL;
+ goto tx_err_link_failure;
+ }
+ skb_dst_set(skb, dst);
+ break;
+ default:
goto tx_err_link_failure;
}
- skb_dst_set(skb, dst);
}
dst_hold(dst);
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index ab7f124ff5d7..8c52efe299cc 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -268,7 +268,7 @@ static int seg6_do_srh(struct sk_buff *skb)
skb_mac_header_rebuild(skb);
skb_push(skb, skb->mac_len);
- err = seg6_do_srh_encap(skb, tinfo->srh, NEXTHDR_NONE);
+ err = seg6_do_srh_encap(skb, tinfo->srh, IPPROTO_ETHERNET);
if (err)
return err;
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index 7cbc19731997..8165802d8e05 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -282,7 +282,7 @@ static int input_action_end_dx2(struct sk_buff *skb,
struct net_device *odev;
struct ethhdr *eth;
- if (!decap_and_validate(skb, NEXTHDR_NONE))
+ if (!decap_and_validate(skb, IPPROTO_ETHERNET))
goto drop;
if (!pskb_may_pull(skb, ETH_HLEN))
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index e11bdb0aaa15..25b7ebda2fab 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -78,7 +78,7 @@ static struct xfrm6_tunnel_spi *__xfrm6_tunnel_spi_lookup(struct net *net, const
hlist_for_each_entry_rcu(x6spi,
&xfrm6_tn->spi_byaddr[xfrm6_tunnel_spi_hash_byaddr(saddr)],
- list_byaddr) {
+ list_byaddr, lockdep_is_held(&xfrm6_tunnel_spi_lock)) {
if (xfrm6_addr_equal(&x6spi->addr, saddr))
return x6spi;
}
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index c80b1e163ea4..3419ed66c7b0 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -5,7 +5,7 @@
* Copyright 2007 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright(c) 2016 Intel Deutschland GmbH
- * Copyright (C) 2018 - 2019 Intel Corporation
+ * Copyright (C) 2018 - 2020 Intel Corporation
*/
#include <linux/debugfs.h>
@@ -78,6 +78,7 @@ static const char * const sta_flag_names[] = {
FLAG(MPSP_OWNER),
FLAG(MPSP_RECIPIENT),
FLAG(PS_DELIVER),
+ FLAG(USES_ENCRYPTION),
#undef FLAG
};
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 0f889b919b06..efc1acc6543c 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -6,7 +6,7 @@
* Copyright 2007-2008 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright 2015-2017 Intel Deutschland GmbH
- * Copyright 2018-2019 Intel Corporation
+ * Copyright 2018-2020 Intel Corporation
*/
#include <linux/if_ether.h>
@@ -262,22 +262,29 @@ static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key)
sta ? sta->sta.addr : bcast_addr, ret);
}
-int ieee80211_set_tx_key(struct ieee80211_key *key)
+static int _ieee80211_set_tx_key(struct ieee80211_key *key, bool force)
{
struct sta_info *sta = key->sta;
struct ieee80211_local *local = key->local;
assert_key_lock(local);
+ set_sta_flag(sta, WLAN_STA_USES_ENCRYPTION);
+
sta->ptk_idx = key->conf.keyidx;
- if (!ieee80211_hw_check(&local->hw, AMPDU_KEYBORDER_SUPPORT))
+ if (force || !ieee80211_hw_check(&local->hw, AMPDU_KEYBORDER_SUPPORT))
clear_sta_flag(sta, WLAN_STA_BLOCK_BA);
ieee80211_check_fast_xmit(sta);
return 0;
}
+int ieee80211_set_tx_key(struct ieee80211_key *key)
+{
+ return _ieee80211_set_tx_key(key, false);
+}
+
static void ieee80211_pairwise_rekey(struct ieee80211_key *old,
struct ieee80211_key *new)
{
@@ -441,11 +448,8 @@ static int ieee80211_key_replace(struct ieee80211_sub_if_data *sdata,
if (pairwise) {
rcu_assign_pointer(sta->ptk[idx], new);
if (new &&
- !(new->conf.flags & IEEE80211_KEY_FLAG_NO_AUTO_TX)) {
- sta->ptk_idx = idx;
- clear_sta_flag(sta, WLAN_STA_BLOCK_BA);
- ieee80211_check_fast_xmit(sta);
- }
+ !(new->conf.flags & IEEE80211_KEY_FLAG_NO_AUTO_TX))
+ _ieee80211_set_tx_key(new, true);
} else {
rcu_assign_pointer(sta->gtk[idx], new);
}
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index d69983370381..38a0383dfbcf 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -1152,7 +1152,8 @@ int mesh_nexthop_resolve(struct ieee80211_sub_if_data *sdata,
}
}
- if (!(mpath->flags & MESH_PATH_RESOLVING))
+ if (!(mpath->flags & MESH_PATH_RESOLVING) &&
+ mesh_path_sel_is_hwmp(sdata))
mesh_queue_preq(mpath, PREQ_Q_F_START);
if (skb_queue_len(&mpath->frame_queue) >= MESH_FRAME_QUEUE_LEN)
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 0f5f40678885..e3572be307d6 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -4,7 +4,7 @@
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2015 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2019 Intel Corporation
+ * Copyright (C) 2018-2020 Intel Corporation
*/
#include <linux/module.h>
@@ -1049,6 +1049,11 @@ static void __sta_info_destroy_part2(struct sta_info *sta)
might_sleep();
lockdep_assert_held(&local->sta_mtx);
+ while (sta->sta_state == IEEE80211_STA_AUTHORIZED) {
+ ret = sta_info_move_state(sta, IEEE80211_STA_ASSOC);
+ WARN_ON_ONCE(ret);
+ }
+
/* now keys can no longer be reached */
ieee80211_free_sta_keys(local, sta);
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index c00e28585f9d..552eed36faca 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -98,6 +98,7 @@ enum ieee80211_sta_info_flags {
WLAN_STA_MPSP_OWNER,
WLAN_STA_MPSP_RECIPIENT,
WLAN_STA_PS_DELIVER,
+ WLAN_STA_USES_ENCRYPTION,
NUM_WLAN_STA_FLAGS,
};
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 87def9cb91ff..d9cca6dbd870 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -5,7 +5,7 @@
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2007 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
- * Copyright (C) 2018 Intel Corporation
+ * Copyright (C) 2018, 2020 Intel Corporation
*
* Transmit and frame generation functions.
*/
@@ -590,10 +590,13 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data;
- if (unlikely(info->flags & IEEE80211_TX_INTFL_DONT_ENCRYPT))
+ if (unlikely(info->flags & IEEE80211_TX_INTFL_DONT_ENCRYPT)) {
tx->key = NULL;
- else if (tx->sta &&
- (key = rcu_dereference(tx->sta->ptk[tx->sta->ptk_idx])))
+ return TX_CONTINUE;
+ }
+
+ if (tx->sta &&
+ (key = rcu_dereference(tx->sta->ptk[tx->sta->ptk_idx])))
tx->key = key;
else if (ieee80211_is_group_privacy_action(tx->skb) &&
(key = rcu_dereference(tx->sdata->default_multicast_key)))
@@ -654,6 +657,9 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
if (!skip_hw && tx->key &&
tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)
info->control.hw_key = &tx->key->conf;
+ } else if (!ieee80211_is_mgmt(hdr->frame_control) && tx->sta &&
+ test_sta_flag(tx->sta, WLAN_STA_USES_ENCRYPTION)) {
+ return TX_DROP;
}
return TX_CONTINUE;
@@ -3598,8 +3604,25 @@ begin:
tx.skb = skb;
tx.sdata = vif_to_sdata(info->control.vif);
- if (txq->sta)
+ if (txq->sta) {
tx.sta = container_of(txq->sta, struct sta_info, sta);
+ /*
+ * Drop unicast frames to unauthorised stations unless they are
+ * EAPOL frames from the local station.
+ */
+ if (unlikely(!ieee80211_vif_is_mesh(&tx.sdata->vif) &&
+ tx.sdata->vif.type != NL80211_IFTYPE_OCB &&
+ !is_multicast_ether_addr(hdr->addr1) &&
+ !test_sta_flag(tx.sta, WLAN_STA_AUTHORIZED) &&
+ (!(info->control.flags &
+ IEEE80211_TX_CTRL_PORT_CTRL_PROTO) ||
+ !ether_addr_equal(tx.sdata->vif.addr,
+ hdr->addr2)))) {
+ I802_DEBUG_INC(local->tx_handlers_drop_unauth_port);
+ ieee80211_free_txskb(&local->hw, skb);
+ goto begin;
+ }
+ }
/*
* The key can be removed while the packet was queued, so need to call
@@ -5126,6 +5149,7 @@ int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev,
struct ieee80211_local *local = sdata->local;
struct sk_buff *skb;
struct ethhdr *ehdr;
+ u32 ctrl_flags = 0;
u32 flags;
/* Only accept CONTROL_PORT_PROTOCOL configured in CONNECT/ASSOCIATE
@@ -5135,6 +5159,9 @@ int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev,
proto != cpu_to_be16(ETH_P_PREAUTH))
return -EINVAL;
+ if (proto == sdata->control_port_protocol)
+ ctrl_flags |= IEEE80211_TX_CTRL_PORT_CTRL_PROTO;
+
if (unencrypted)
flags = IEEE80211_TX_INTFL_DONT_ENCRYPT;
else
@@ -5160,7 +5187,7 @@ int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev,
skb_reset_mac_header(skb);
local_bh_disable();
- __ieee80211_subif_start_xmit(skb, skb->dev, flags, 0);
+ __ieee80211_subif_start_xmit(skb, skb->dev, flags, ctrl_flags);
local_bh_enable();
return 0;
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 45acd877bef3..fd2c3150e591 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -334,6 +334,8 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
struct mptcp_sock *msk;
unsigned int ack_size;
bool ret = false;
+ bool can_ack;
+ u64 ack_seq;
u8 tcp_fin;
if (skb) {
@@ -360,9 +362,22 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
ret = true;
}
+ /* passive sockets msk will set the 'can_ack' after accept(), even
+ * if the first subflow may have the already the remote key handy
+ */
+ can_ack = true;
opts->ext_copy.use_ack = 0;
msk = mptcp_sk(subflow->conn);
- if (!msk || !READ_ONCE(msk->can_ack)) {
+ if (likely(msk && READ_ONCE(msk->can_ack))) {
+ ack_seq = msk->ack_seq;
+ } else if (subflow->can_ack) {
+ mptcp_crypto_key_sha(subflow->remote_key, NULL, &ack_seq);
+ ack_seq++;
+ } else {
+ can_ack = false;
+ }
+
+ if (unlikely(!can_ack)) {
*size = ALIGN(dss_size, 4);
return ret;
}
@@ -375,7 +390,7 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
dss_size += ack_size;
- opts->ext_copy.data_ack = msk->ack_seq;
+ opts->ext_copy.data_ack = ack_seq;
opts->ext_copy.ack64 = 1;
opts->ext_copy.use_ack = 1;
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 410809c669e1..4912069627b6 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -411,7 +411,7 @@ static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
*pos = cpu + 1;
return per_cpu_ptr(net->ct.stat, cpu);
}
-
+ (*pos)++;
return NULL;
}
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 8af28e10b4e6..70ebebaf5bc1 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -554,6 +554,9 @@ void nf_flow_table_free(struct nf_flowtable *flow_table)
nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, flow_table);
nf_flow_table_offload_flush(flow_table);
+ if (nf_flowtable_hw_offload(flow_table))
+ nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step,
+ flow_table);
rhashtable_destroy(&flow_table->rhashtable);
}
EXPORT_SYMBOL_GPL(nf_flow_table_free);
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index 9e563fd3da0f..ba775aecd89a 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -146,11 +146,13 @@ static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
if (test_bit(NF_FLOW_SNAT, &flow->flags) &&
(nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
- nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0))
+ nf_flow_snat_ip(flow, skb, ip_hdr(skb), thoff, dir) < 0))
return -1;
+
+ iph = ip_hdr(skb);
if (test_bit(NF_FLOW_DNAT, &flow->flags) &&
(nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
- nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0))
+ nf_flow_dnat_ip(flow, skb, ip_hdr(skb), thoff, dir) < 0))
return -1;
return 0;
@@ -189,6 +191,7 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
return -1;
+ iph = ip_hdr(skb);
ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
tuple->src_v4.s_addr = iph->saddr;
@@ -426,11 +429,13 @@ static int nf_flow_nat_ipv6(const struct flow_offload *flow,
if (test_bit(NF_FLOW_SNAT, &flow->flags) &&
(nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
- nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
+ nf_flow_snat_ipv6(flow, skb, ipv6_hdr(skb), thoff, dir) < 0))
return -1;
+
+ ip6h = ipv6_hdr(skb);
if (test_bit(NF_FLOW_DNAT, &flow->flags) &&
(nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
- nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
+ nf_flow_dnat_ipv6(flow, skb, ipv6_hdr(skb), thoff, dir) < 0))
return -1;
return 0;
@@ -459,6 +464,7 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
return -1;
+ ip6h = ipv6_hdr(skb);
ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
tuple->src_v6 = ip6h->saddr;
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index 06f00cdc3891..f2c22c682851 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -87,6 +87,7 @@ static int nf_flow_rule_match(struct nf_flow_match *match,
default:
return -EOPNOTSUPP;
}
+ mask->control.addr_type = 0xffff;
match->dissector.used_keys |= BIT(key->control.addr_type);
mask->basic.n_proto = 0xffff;
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index b0930d4aba22..b9cbe1e2453e 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -267,7 +267,7 @@ static void *synproxy_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
*pos = cpu + 1;
return per_cpu_ptr(snet->stats, cpu);
}
-
+ (*pos)++;
return NULL;
}
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index d1318bdf49ca..d11f1a74d43c 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1405,6 +1405,11 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net,
lockdep_commit_lock_is_held(net));
if (nft_dump_stats(skb, stats))
goto nla_put_failure;
+
+ if ((chain->flags & NFT_CHAIN_HW_OFFLOAD) &&
+ nla_put_be32(skb, NFTA_CHAIN_FLAGS,
+ htonl(NFT_CHAIN_HW_OFFLOAD)))
+ goto nla_put_failure;
}
if (nla_put_be32(skb, NFTA_CHAIN_USE, htonl(chain->use)))
@@ -5077,6 +5082,11 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
err = -EBUSY;
else if (!(nlmsg_flags & NLM_F_EXCL))
err = 0;
+ } else if (err == -ENOTEMPTY) {
+ /* ENOTEMPTY reports overlapping between this element
+ * and an existing one.
+ */
+ err = -EEXIST;
}
goto err_element_clash;
}
@@ -6300,8 +6310,13 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
goto err4;
err = nft_register_flowtable_net_hooks(ctx.net, table, flowtable);
- if (err < 0)
+ if (err < 0) {
+ list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) {
+ list_del_rcu(&hook->list);
+ kfree_rcu(hook, rcu);
+ }
goto err4;
+ }
err = nft_trans_flowtable_add(&ctx, NFT_MSG_NEWFLOWTABLE, flowtable);
if (err < 0)
@@ -7378,13 +7393,8 @@ static void nf_tables_module_autoload(struct net *net)
list_splice_init(&net->nft.module_list, &module_list);
mutex_unlock(&net->nft.commit_mutex);
list_for_each_entry_safe(req, next, &module_list, list) {
- if (req->done) {
- list_del(&req->list);
- kfree(req);
- } else {
- request_module("%s", req->module);
- req->done = true;
- }
+ request_module("%s", req->module);
+ req->done = true;
}
mutex_lock(&net->nft.commit_mutex);
list_splice(&module_list, &net->nft.module_list);
@@ -8167,6 +8177,7 @@ static void __net_exit nf_tables_exit_net(struct net *net)
__nft_release_tables(net);
mutex_unlock(&net->nft.commit_mutex);
WARN_ON_ONCE(!list_empty(&net->nft.tables));
+ WARN_ON_ONCE(!list_empty(&net->nft.module_list));
}
static struct pernet_operations nf_tables_net_ops = {
diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index de3a9596b7f1..a5f294aa8e4c 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -742,6 +742,8 @@ static const struct nla_policy nfnl_cthelper_policy[NFCTH_MAX+1] = {
[NFCTH_NAME] = { .type = NLA_NUL_STRING,
.len = NF_CT_HELPER_NAME_LEN-1 },
[NFCTH_QUEUE_NUM] = { .type = NLA_U32, },
+ [NFCTH_PRIV_DATA_LEN] = { .type = NLA_U32, },
+ [NFCTH_STATUS] = { .type = NLA_U32, },
};
static const struct nfnl_callback nfnl_cthelper_cb[NFNL_MSG_CTHELPER_MAX] = {
diff --git a/net/netfilter/nft_chain_nat.c b/net/netfilter/nft_chain_nat.c
index ff9ac8ae0031..eac4a901233f 100644
--- a/net/netfilter/nft_chain_nat.c
+++ b/net/netfilter/nft_chain_nat.c
@@ -89,6 +89,7 @@ static const struct nft_chain_type nft_chain_nat_inet = {
.name = "nat",
.type = NFT_CHAIN_T_NAT,
.family = NFPROTO_INET,
+ .owner = THIS_MODULE,
.hook_mask = (1 << NF_INET_PRE_ROUTING) |
(1 << NF_INET_LOCAL_IN) |
(1 << NF_INET_LOCAL_OUT) |
diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c
index aba11c2333f3..3087e23297db 100644
--- a/net/netfilter/nft_fwd_netdev.c
+++ b/net/netfilter/nft_fwd_netdev.c
@@ -28,6 +28,9 @@ static void nft_fwd_netdev_eval(const struct nft_expr *expr,
struct nft_fwd_netdev *priv = nft_expr_priv(expr);
int oif = regs->data[priv->sreg_dev];
+ /* This is used by ifb only. */
+ skb_set_redirected(pkt->skb, true);
+
nf_fwd_netdev_egress(pkt, oif);
regs->verdict.code = NF_STOLEN;
}
@@ -190,6 +193,13 @@ nla_put_failure:
return -1;
}
+static int nft_fwd_validate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nft_data **data)
+{
+ return nft_chain_validate_hooks(ctx->chain, (1 << NF_NETDEV_INGRESS));
+}
+
static struct nft_expr_type nft_fwd_netdev_type;
static const struct nft_expr_ops nft_fwd_neigh_netdev_ops = {
.type = &nft_fwd_netdev_type,
@@ -197,6 +207,7 @@ static const struct nft_expr_ops nft_fwd_neigh_netdev_ops = {
.eval = nft_fwd_neigh_eval,
.init = nft_fwd_neigh_init,
.dump = nft_fwd_neigh_dump,
+ .validate = nft_fwd_validate,
};
static const struct nft_expr_ops nft_fwd_netdev_ops = {
@@ -205,6 +216,7 @@ static const struct nft_expr_ops nft_fwd_netdev_ops = {
.eval = nft_fwd_netdev_eval,
.init = nft_fwd_netdev_init,
.dump = nft_fwd_netdev_dump,
+ .validate = nft_fwd_validate,
.offload = nft_fwd_netdev_offload,
};
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index 1993af3a2979..a7de3a58f553 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -129,6 +129,7 @@ static const struct nla_policy nft_payload_policy[NFTA_PAYLOAD_MAX + 1] = {
[NFTA_PAYLOAD_LEN] = { .type = NLA_U32 },
[NFTA_PAYLOAD_CSUM_TYPE] = { .type = NLA_U32 },
[NFTA_PAYLOAD_CSUM_OFFSET] = { .type = NLA_U32 },
+ [NFTA_PAYLOAD_CSUM_FLAGS] = { .type = NLA_U32 },
};
static int nft_payload_init(const struct nft_ctx *ctx,
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index 4fc0c924ed5d..ef7e8ad2e344 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -1098,21 +1098,41 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
struct nft_pipapo_field *f;
int i, bsize_max, err = 0;
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_KEY_END))
+ end = (const u8 *)nft_set_ext_key_end(ext)->data;
+ else
+ end = start;
+
dup = pipapo_get(net, set, start, genmask);
- if (PTR_ERR(dup) == -ENOENT) {
- if (nft_set_ext_exists(ext, NFT_SET_EXT_KEY_END)) {
- end = (const u8 *)nft_set_ext_key_end(ext)->data;
- dup = pipapo_get(net, set, end, nft_genmask_next(net));
- } else {
- end = start;
+ if (!IS_ERR(dup)) {
+ /* Check if we already have the same exact entry */
+ const struct nft_data *dup_key, *dup_end;
+
+ dup_key = nft_set_ext_key(&dup->ext);
+ if (nft_set_ext_exists(&dup->ext, NFT_SET_EXT_KEY_END))
+ dup_end = nft_set_ext_key_end(&dup->ext);
+ else
+ dup_end = dup_key;
+
+ if (!memcmp(start, dup_key->data, sizeof(*dup_key->data)) &&
+ !memcmp(end, dup_end->data, sizeof(*dup_end->data))) {
+ *ext2 = &dup->ext;
+ return -EEXIST;
}
+
+ return -ENOTEMPTY;
+ }
+
+ if (PTR_ERR(dup) == -ENOENT) {
+ /* Look for partially overlapping entries */
+ dup = pipapo_get(net, set, end, nft_genmask_next(net));
}
if (PTR_ERR(dup) != -ENOENT) {
if (IS_ERR(dup))
return PTR_ERR(dup);
*ext2 = &dup->ext;
- return -EEXIST;
+ return -ENOTEMPTY;
}
/* Validate */
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 5000b938ab1e..8617fc16a1ed 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -33,6 +33,11 @@ static bool nft_rbtree_interval_end(const struct nft_rbtree_elem *rbe)
(*nft_set_ext_flags(&rbe->ext) & NFT_SET_ELEM_INTERVAL_END);
}
+static bool nft_rbtree_interval_start(const struct nft_rbtree_elem *rbe)
+{
+ return !nft_rbtree_interval_end(rbe);
+}
+
static bool nft_rbtree_equal(const struct nft_set *set, const void *this,
const struct nft_rbtree_elem *interval)
{
@@ -64,7 +69,7 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set
if (interval &&
nft_rbtree_equal(set, this, interval) &&
nft_rbtree_interval_end(rbe) &&
- !nft_rbtree_interval_end(interval))
+ nft_rbtree_interval_start(interval))
continue;
interval = rbe;
} else if (d > 0)
@@ -89,7 +94,7 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set
if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
nft_set_elem_active(&interval->ext, genmask) &&
- !nft_rbtree_interval_end(interval)) {
+ nft_rbtree_interval_start(interval)) {
*ext = &interval->ext;
return true;
}
@@ -208,8 +213,43 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
u8 genmask = nft_genmask_next(net);
struct nft_rbtree_elem *rbe;
struct rb_node *parent, **p;
+ bool overlap = false;
int d;
+ /* Detect overlaps as we descend the tree. Set the flag in these cases:
+ *
+ * a1. |__ _ _? >|__ _ _ (insert start after existing start)
+ * a2. _ _ __>| ?_ _ __| (insert end before existing end)
+ * a3. _ _ ___| ?_ _ _>| (insert end after existing end)
+ * a4. >|__ _ _ _ _ __| (insert start before existing end)
+ *
+ * and clear it later on, as we eventually reach the points indicated by
+ * '?' above, in the cases described below. We'll always meet these
+ * later, locally, due to tree ordering, and overlaps for the intervals
+ * that are the closest together are always evaluated last.
+ *
+ * b1. |__ _ _! >|__ _ _ (insert start after existing end)
+ * b2. _ _ __>| !_ _ __| (insert end before existing start)
+ * b3. !_____>| (insert end after existing start)
+ *
+ * Case a4. resolves to b1.:
+ * - if the inserted start element is the leftmost, because the '0'
+ * element in the tree serves as end element
+ * - otherwise, if an existing end is found. Note that end elements are
+ * always inserted after corresponding start elements.
+ *
+ * For a new, rightmost pair of elements, we'll hit cases b1. and b3.,
+ * in that order.
+ *
+ * The flag is also cleared in two special cases:
+ *
+ * b4. |__ _ _!|<_ _ _ (insert start right before existing end)
+ * b5. |__ _ >|!__ _ _ (insert end right after existing start)
+ *
+ * which always happen as last step and imply that no further
+ * overlapping is possible.
+ */
+
parent = NULL;
p = &priv->root.rb_node;
while (*p != NULL) {
@@ -218,17 +258,42 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
d = memcmp(nft_set_ext_key(&rbe->ext),
nft_set_ext_key(&new->ext),
set->klen);
- if (d < 0)
+ if (d < 0) {
p = &parent->rb_left;
- else if (d > 0)
+
+ if (nft_rbtree_interval_start(new)) {
+ overlap = nft_rbtree_interval_start(rbe) &&
+ nft_set_elem_active(&rbe->ext,
+ genmask);
+ } else {
+ overlap = nft_rbtree_interval_end(rbe) &&
+ nft_set_elem_active(&rbe->ext,
+ genmask);
+ }
+ } else if (d > 0) {
p = &parent->rb_right;
- else {
+
+ if (nft_rbtree_interval_end(new)) {
+ overlap = nft_rbtree_interval_end(rbe) &&
+ nft_set_elem_active(&rbe->ext,
+ genmask);
+ } else if (nft_rbtree_interval_end(rbe) &&
+ nft_set_elem_active(&rbe->ext, genmask)) {
+ overlap = true;
+ }
+ } else {
if (nft_rbtree_interval_end(rbe) &&
- !nft_rbtree_interval_end(new)) {
+ nft_rbtree_interval_start(new)) {
p = &parent->rb_left;
- } else if (!nft_rbtree_interval_end(rbe) &&
+
+ if (nft_set_elem_active(&rbe->ext, genmask))
+ overlap = false;
+ } else if (nft_rbtree_interval_start(rbe) &&
nft_rbtree_interval_end(new)) {
p = &parent->rb_right;
+
+ if (nft_set_elem_active(&rbe->ext, genmask))
+ overlap = false;
} else if (nft_set_elem_active(&rbe->ext, genmask)) {
*ext = &rbe->ext;
return -EEXIST;
@@ -237,6 +302,10 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
}
}
}
+
+ if (overlap)
+ return -ENOTEMPTY;
+
rb_link_node_rcu(&new->node, parent, p);
rb_insert_color(&new->node, &priv->root);
return 0;
@@ -317,10 +386,10 @@ static void *nft_rbtree_deactivate(const struct net *net,
parent = parent->rb_right;
else {
if (nft_rbtree_interval_end(rbe) &&
- !nft_rbtree_interval_end(this)) {
+ nft_rbtree_interval_start(this)) {
parent = parent->rb_left;
continue;
- } else if (!nft_rbtree_interval_end(rbe) &&
+ } else if (nft_rbtree_interval_start(rbe) &&
nft_rbtree_interval_end(this)) {
parent = parent->rb_right;
continue;
diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c
index 4c3f2e24c7cb..764e88682a81 100644
--- a/net/netfilter/nft_tunnel.c
+++ b/net/netfilter/nft_tunnel.c
@@ -339,6 +339,8 @@ static const struct nla_policy nft_tunnel_key_policy[NFTA_TUNNEL_KEY_MAX + 1] =
[NFTA_TUNNEL_KEY_FLAGS] = { .type = NLA_U32, },
[NFTA_TUNNEL_KEY_TOS] = { .type = NLA_U8, },
[NFTA_TUNNEL_KEY_TTL] = { .type = NLA_U8, },
+ [NFTA_TUNNEL_KEY_SPORT] = { .type = NLA_U16, },
+ [NFTA_TUNNEL_KEY_DPORT] = { .type = NLA_U16, },
[NFTA_TUNNEL_KEY_OPTS] = { .type = NLA_NESTED, },
};
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index e27c6c5ba9df..cd2b034eef59 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1551,6 +1551,9 @@ static void *xt_mttg_seq_next(struct seq_file *seq, void *v, loff_t *ppos,
uint8_t nfproto = (unsigned long)PDE_DATA(file_inode(seq->file));
struct nf_mttg_trav *trav = seq->private;
+ if (ppos != NULL)
+ ++(*ppos);
+
switch (trav->class) {
case MTTG_TRAV_INIT:
trav->class = MTTG_TRAV_NFP_UNSPEC;
@@ -1576,9 +1579,6 @@ static void *xt_mttg_seq_next(struct seq_file *seq, void *v, loff_t *ppos,
default:
return NULL;
}
-
- if (ppos != NULL)
- ++*ppos;
return trav;
}
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 0a9708004e20..225a7ab6d79a 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -492,12 +492,12 @@ static void *recent_seq_next(struct seq_file *seq, void *v, loff_t *pos)
const struct recent_entry *e = v;
const struct list_head *head = e->list.next;
+ (*pos)++;
while (head == &t->iphash[st->bucket]) {
if (++st->bucket >= ip_list_hash_size)
return NULL;
head = t->iphash[st->bucket].next;
}
- (*pos)++;
return list_entry(head, struct recent_entry, list);
}
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index edf3e285e242..2f234791b879 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2392,19 +2392,14 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err,
if (nlk_has_extack && extack && extack->_msg)
tlvlen += nla_total_size(strlen(extack->_msg) + 1);
- if (err) {
- if (!(nlk->flags & NETLINK_F_CAP_ACK))
- payload += nlmsg_len(nlh);
- else
- flags |= NLM_F_CAPPED;
- if (nlk_has_extack && extack && extack->bad_attr)
- tlvlen += nla_total_size(sizeof(u32));
- } else {
+ if (err && !(nlk->flags & NETLINK_F_CAP_ACK))
+ payload += nlmsg_len(nlh);
+ else
flags |= NLM_F_CAPPED;
-
- if (nlk_has_extack && extack && extack->cookie_len)
- tlvlen += nla_total_size(extack->cookie_len);
- }
+ if (err && nlk_has_extack && extack && extack->bad_attr)
+ tlvlen += nla_total_size(sizeof(u32));
+ if (nlk_has_extack && extack && extack->cookie_len)
+ tlvlen += nla_total_size(extack->cookie_len);
if (tlvlen)
flags |= NLM_F_ACK_TLVS;
@@ -2427,20 +2422,16 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err,
WARN_ON(nla_put_string(skb, NLMSGERR_ATTR_MSG,
extack->_msg));
}
- if (err) {
- if (extack->bad_attr &&
- !WARN_ON((u8 *)extack->bad_attr < in_skb->data ||
- (u8 *)extack->bad_attr >= in_skb->data +
- in_skb->len))
- WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_OFFS,
- (u8 *)extack->bad_attr -
- in_skb->data));
- } else {
- if (extack->cookie_len)
- WARN_ON(nla_put(skb, NLMSGERR_ATTR_COOKIE,
- extack->cookie_len,
- extack->cookie));
- }
+ if (err && extack->bad_attr &&
+ !WARN_ON((u8 *)extack->bad_attr < in_skb->data ||
+ (u8 *)extack->bad_attr >= in_skb->data +
+ in_skb->len))
+ WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_OFFS,
+ (u8 *)extack->bad_attr -
+ (u8 *)nlh));
+ if (extack->cookie_len)
+ WARN_ON(nla_put(skb, NLMSGERR_ATTR_COOKIE,
+ extack->cookie_len, extack->cookie));
}
nlmsg_end(skb, rep);
diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c
index 6f1b096e601c..43811b5219b5 100644
--- a/net/nfc/hci/core.c
+++ b/net/nfc/hci/core.c
@@ -181,13 +181,20 @@ exit:
void nfc_hci_cmd_received(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd,
struct sk_buff *skb)
{
- u8 gate = hdev->pipes[pipe].gate;
u8 status = NFC_HCI_ANY_OK;
struct hci_create_pipe_resp *create_info;
struct hci_delete_pipe_noti *delete_info;
struct hci_all_pipe_cleared_noti *cleared_info;
+ u8 gate;
- pr_debug("from gate %x pipe %x cmd %x\n", gate, pipe, cmd);
+ pr_debug("from pipe %x cmd %x\n", pipe, cmd);
+
+ if (pipe >= NFC_HCI_MAX_PIPES) {
+ status = NFC_HCI_ANY_E_NOK;
+ goto exit;
+ }
+
+ gate = hdev->pipes[pipe].gate;
switch (cmd) {
case NFC_HCI_ADM_NOTIFY_PIPE_CREATED:
@@ -375,8 +382,14 @@ void nfc_hci_event_received(struct nfc_hci_dev *hdev, u8 pipe, u8 event,
struct sk_buff *skb)
{
int r = 0;
- u8 gate = hdev->pipes[pipe].gate;
+ u8 gate;
+
+ if (pipe >= NFC_HCI_MAX_PIPES) {
+ pr_err("Discarded event %x to invalid pipe %x\n", event, pipe);
+ goto exit;
+ }
+ gate = hdev->pipes[pipe].gate;
if (gate == NFC_HCI_INVALID_GATE) {
pr_err("Discarded event %x to unopened pipe %x\n", event, pipe);
goto exit;
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index eee0dddb7749..e894254c17d4 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -32,6 +32,7 @@ static const struct nla_policy nfc_genl_policy[NFC_ATTR_MAX + 1] = {
[NFC_ATTR_DEVICE_NAME] = { .type = NLA_STRING,
.len = NFC_DEVICE_NAME_MAXSIZE },
[NFC_ATTR_PROTOCOLS] = { .type = NLA_U32 },
+ [NFC_ATTR_TARGET_INDEX] = { .type = NLA_U32 },
[NFC_ATTR_COMM_MODE] = { .type = NLA_U8 },
[NFC_ATTR_RF_MODE] = { .type = NLA_U8 },
[NFC_ATTR_DEVICE_POWERED] = { .type = NLA_U8 },
@@ -43,7 +44,10 @@ static const struct nla_policy nfc_genl_policy[NFC_ATTR_MAX + 1] = {
[NFC_ATTR_LLC_SDP] = { .type = NLA_NESTED },
[NFC_ATTR_FIRMWARE_NAME] = { .type = NLA_STRING,
.len = NFC_FIRMWARE_NAME_MAXSIZE },
+ [NFC_ATTR_SE_INDEX] = { .type = NLA_U32 },
[NFC_ATTR_SE_APDU] = { .type = NLA_BINARY },
+ [NFC_ATTR_VENDOR_ID] = { .type = NLA_U32 },
+ [NFC_ATTR_VENDOR_SUBCMD] = { .type = NLA_U32 },
[NFC_ATTR_VENDOR_DATA] = { .type = NLA_BINARY },
};
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index c047afd12116..07a7dd185995 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -645,6 +645,7 @@ static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
[OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
[OVS_PACKET_ATTR_PROBE] = { .type = NLA_FLAG },
[OVS_PACKET_ATTR_MRU] = { .type = NLA_U16 },
+ [OVS_PACKET_ATTR_HASH] = { .type = NLA_U64 },
};
static const struct genl_ops dp_packet_genl_ops[] = {
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 30c6879d6774..29bd405adbbd 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2173,6 +2173,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
struct timespec64 ts;
__u32 ts_status;
bool is_drop_n_account = false;
+ unsigned int slot_id = 0;
bool do_vnet = false;
/* struct tpacket{2,3}_hdr is aligned to a multiple of TPACKET_ALIGNMENT.
@@ -2274,6 +2275,20 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
TP_STATUS_KERNEL, (macoff+snaplen));
if (!h.raw)
goto drop_n_account;
+
+ if (po->tp_version <= TPACKET_V2) {
+ slot_id = po->rx_ring.head;
+ if (test_bit(slot_id, po->rx_ring.rx_owner_map))
+ goto drop_n_account;
+ __set_bit(slot_id, po->rx_ring.rx_owner_map);
+ }
+
+ if (do_vnet &&
+ virtio_net_hdr_from_skb(skb, h.raw + macoff -
+ sizeof(struct virtio_net_hdr),
+ vio_le(), true, 0))
+ goto drop_n_account;
+
if (po->tp_version <= TPACKET_V2) {
packet_increment_rx_head(po, &po->rx_ring);
/*
@@ -2286,12 +2301,6 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
status |= TP_STATUS_LOSING;
}
- if (do_vnet &&
- virtio_net_hdr_from_skb(skb, h.raw + macoff -
- sizeof(struct virtio_net_hdr),
- vio_le(), true, 0))
- goto drop_n_account;
-
po->stats.stats1.tp_packets++;
if (copy_skb) {
status |= TP_STATUS_COPY;
@@ -2379,7 +2388,10 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
#endif
if (po->tp_version <= TPACKET_V2) {
+ spin_lock(&sk->sk_receive_queue.lock);
__packet_set_status(po, h.raw, status);
+ __clear_bit(slot_id, po->rx_ring.rx_owner_map);
+ spin_unlock(&sk->sk_receive_queue.lock);
sk->sk_data_ready(sk);
} else {
prb_clear_blk_fill_status(&po->rx_ring);
@@ -4276,6 +4288,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
{
struct pgv *pg_vec = NULL;
struct packet_sock *po = pkt_sk(sk);
+ unsigned long *rx_owner_map = NULL;
int was_running, order = 0;
struct packet_ring_buffer *rb;
struct sk_buff_head *rb_queue;
@@ -4361,6 +4374,12 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
}
break;
default:
+ if (!tx_ring) {
+ rx_owner_map = bitmap_alloc(req->tp_frame_nr,
+ GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO);
+ if (!rx_owner_map)
+ goto out_free_pg_vec;
+ }
break;
}
}
@@ -4390,6 +4409,8 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
err = 0;
spin_lock_bh(&rb_queue->lock);
swap(rb->pg_vec, pg_vec);
+ if (po->tp_version <= TPACKET_V2)
+ swap(rb->rx_owner_map, rx_owner_map);
rb->frame_max = (req->tp_frame_nr - 1);
rb->head = 0;
rb->frame_size = req->tp_frame_size;
@@ -4421,6 +4442,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
}
out_free_pg_vec:
+ bitmap_free(rx_owner_map);
if (pg_vec)
free_pg_vec(pg_vec, order, req->tp_block_nr);
out:
diff --git a/net/packet/internal.h b/net/packet/internal.h
index 82fb2b10f790..907f4cd2a718 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -70,7 +70,10 @@ struct packet_ring_buffer {
unsigned int __percpu *pending_refcnt;
- struct tpacket_kbdq_core prb_bdqc;
+ union {
+ unsigned long *rx_owner_map;
+ struct tpacket_kbdq_core prb_bdqc;
+ };
};
extern struct mutex fanout_mutex;
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index fe42f986cd94..15ee92d79581 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -285,7 +285,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
gfp_t gfp,
rxrpc_notify_rx_t notify_rx,
bool upgrade,
- bool intr,
+ enum rxrpc_interruptibility interruptibility,
unsigned int debug_id)
{
struct rxrpc_conn_parameters cp;
@@ -310,7 +310,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
memset(&p, 0, sizeof(p));
p.user_call_ID = user_call_ID;
p.tx_total_len = tx_total_len;
- p.intr = intr;
+ p.interruptibility = interruptibility;
memset(&cp, 0, sizeof(cp));
cp.local = rx->local;
@@ -371,45 +371,18 @@ EXPORT_SYMBOL(rxrpc_kernel_end_call);
* rxrpc_kernel_check_life - Check to see whether a call is still alive
* @sock: The socket the call is on
* @call: The call to check
- * @_life: Where to store the life value
*
- * Allow a kernel service to find out whether a call is still alive - ie. we're
- * getting ACKs from the server. Passes back in *_life a number representing
- * the life state which can be compared to that returned by a previous call and
- * return true if the call is still alive.
- *
- * If the life state stalls, rxrpc_kernel_probe_life() should be called and
- * then 2RTT waited.
+ * Allow a kernel service to find out whether a call is still alive -
+ * ie. whether it has completed.
*/
bool rxrpc_kernel_check_life(const struct socket *sock,
- const struct rxrpc_call *call,
- u32 *_life)
+ const struct rxrpc_call *call)
{
- *_life = call->acks_latest;
return call->state != RXRPC_CALL_COMPLETE;
}
EXPORT_SYMBOL(rxrpc_kernel_check_life);
/**
- * rxrpc_kernel_probe_life - Poke the peer to see if it's still alive
- * @sock: The socket the call is on
- * @call: The call to check
- *
- * In conjunction with rxrpc_kernel_check_life(), allow a kernel service to
- * find out whether a call is still alive by pinging it. This should cause the
- * life state to be bumped in about 2*RTT.
- *
- * The must be called in TASK_RUNNING state on pain of might_sleep() objecting.
- */
-void rxrpc_kernel_probe_life(struct socket *sock, struct rxrpc_call *call)
-{
- rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, true, false,
- rxrpc_propose_ack_ping_for_check_life);
- rxrpc_send_ack_packet(call, true, NULL);
-}
-EXPORT_SYMBOL(rxrpc_kernel_probe_life);
-
-/**
* rxrpc_kernel_get_epoch - Retrieve the epoch value from a call.
* @sock: The socket the call is on
* @call: The call to query
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 7d730c438404..3eb1ab40ca5c 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -489,7 +489,6 @@ enum rxrpc_call_flag {
RXRPC_CALL_BEGAN_RX_TIMER, /* We began the expect_rx_by timer */
RXRPC_CALL_RX_HEARD, /* The peer responded at least once to this call */
RXRPC_CALL_RX_UNDERRUN, /* Got data underrun */
- RXRPC_CALL_IS_INTR, /* The call is interruptible */
RXRPC_CALL_DISCONNECTED, /* The call has been disconnected */
};
@@ -598,6 +597,7 @@ struct rxrpc_call {
atomic_t usage;
u16 service_id; /* service ID */
u8 security_ix; /* Security type */
+ enum rxrpc_interruptibility interruptibility; /* At what point call may be interrupted */
u32 call_id; /* call ID on connection */
u32 cid; /* connection ID plus channel index */
int debug_id; /* debug ID for printks */
@@ -675,7 +675,6 @@ struct rxrpc_call {
/* transmission-phase ACK management */
ktime_t acks_latest_ts; /* Timestamp of latest ACK received */
- rxrpc_serial_t acks_latest; /* serial number of latest ACK received */
rxrpc_seq_t acks_lowest_nak; /* Lowest NACK in the buffer (or ==tx_hard_ack) */
rxrpc_seq_t acks_lost_top; /* tx_top at the time lost-ack ping sent */
rxrpc_serial_t acks_lost_ping; /* Serial number of probe ACK */
@@ -721,7 +720,7 @@ struct rxrpc_call_params {
u32 normal; /* Max time since last call packet (msec) */
} timeouts;
u8 nr_timeouts; /* Number of timeouts specified */
- bool intr; /* The call is interruptible */
+ enum rxrpc_interruptibility interruptibility; /* How is interruptible is the call? */
};
struct rxrpc_send_params {
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index c9f34b0a11df..f07970207b54 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -237,8 +237,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
return call;
}
- if (p->intr)
- __set_bit(RXRPC_CALL_IS_INTR, &call->flags);
+ call->interruptibility = p->interruptibility;
call->tx_total_len = p->tx_total_len;
trace_rxrpc_call(call->debug_id, rxrpc_call_new_client,
atomic_read(&call->usage),
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index ea7d4c21f889..f2a1a5dbb5a7 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -655,13 +655,20 @@ static int rxrpc_wait_for_channel(struct rxrpc_call *call, gfp_t gfp)
add_wait_queue_exclusive(&call->waitq, &myself);
for (;;) {
- if (test_bit(RXRPC_CALL_IS_INTR, &call->flags))
+ switch (call->interruptibility) {
+ case RXRPC_INTERRUPTIBLE:
+ case RXRPC_PREINTERRUPTIBLE:
set_current_state(TASK_INTERRUPTIBLE);
- else
+ break;
+ case RXRPC_UNINTERRUPTIBLE:
+ default:
set_current_state(TASK_UNINTERRUPTIBLE);
+ break;
+ }
if (call->call_id)
break;
- if (test_bit(RXRPC_CALL_IS_INTR, &call->flags) &&
+ if ((call->interruptibility == RXRPC_INTERRUPTIBLE ||
+ call->interruptibility == RXRPC_PREINTERRUPTIBLE) &&
signal_pending(current)) {
ret = -ERESTARTSYS;
break;
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index ef10fbf71b15..69e09d69c896 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -882,7 +882,6 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
before(prev_pkt, call->ackr_prev_seq))
goto out;
call->acks_latest_ts = skb->tstamp;
- call->acks_latest = sp->hdr.serial;
call->ackr_first_seq = first_soft_ack;
call->ackr_prev_seq = prev_pkt;
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 813fd6888142..0fcf157aa09f 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -18,6 +18,21 @@
#include "ar-internal.h"
/*
+ * Return true if there's sufficient Tx queue space.
+ */
+static bool rxrpc_check_tx_space(struct rxrpc_call *call, rxrpc_seq_t *_tx_win)
+{
+ unsigned int win_size =
+ min_t(unsigned int, call->tx_winsize,
+ call->cong_cwnd + call->cong_extra);
+ rxrpc_seq_t tx_win = READ_ONCE(call->tx_hard_ack);
+
+ if (_tx_win)
+ *_tx_win = tx_win;
+ return call->tx_top - tx_win < win_size;
+}
+
+/*
* Wait for space to appear in the Tx queue or a signal to occur.
*/
static int rxrpc_wait_for_tx_window_intr(struct rxrpc_sock *rx,
@@ -26,9 +41,7 @@ static int rxrpc_wait_for_tx_window_intr(struct rxrpc_sock *rx,
{
for (;;) {
set_current_state(TASK_INTERRUPTIBLE);
- if (call->tx_top - call->tx_hard_ack <
- min_t(unsigned int, call->tx_winsize,
- call->cong_cwnd + call->cong_extra))
+ if (rxrpc_check_tx_space(call, NULL))
return 0;
if (call->state >= RXRPC_CALL_COMPLETE)
@@ -49,7 +62,7 @@ static int rxrpc_wait_for_tx_window_intr(struct rxrpc_sock *rx,
* Wait for space to appear in the Tx queue uninterruptibly, but with
* a timeout of 2*RTT if no progress was made and a signal occurred.
*/
-static int rxrpc_wait_for_tx_window_nonintr(struct rxrpc_sock *rx,
+static int rxrpc_wait_for_tx_window_waitall(struct rxrpc_sock *rx,
struct rxrpc_call *call)
{
rxrpc_seq_t tx_start, tx_win;
@@ -58,8 +71,8 @@ static int rxrpc_wait_for_tx_window_nonintr(struct rxrpc_sock *rx,
rtt = READ_ONCE(call->peer->rtt);
rtt2 = nsecs_to_jiffies64(rtt) * 2;
- if (rtt2 < 1)
- rtt2 = 1;
+ if (rtt2 < 2)
+ rtt2 = 2;
timeout = rtt2;
tx_start = READ_ONCE(call->tx_hard_ack);
@@ -68,16 +81,13 @@ static int rxrpc_wait_for_tx_window_nonintr(struct rxrpc_sock *rx,
set_current_state(TASK_UNINTERRUPTIBLE);
tx_win = READ_ONCE(call->tx_hard_ack);
- if (call->tx_top - tx_win <
- min_t(unsigned int, call->tx_winsize,
- call->cong_cwnd + call->cong_extra))
+ if (rxrpc_check_tx_space(call, &tx_win))
return 0;
if (call->state >= RXRPC_CALL_COMPLETE)
return call->error;
- if (test_bit(RXRPC_CALL_IS_INTR, &call->flags) &&
- timeout == 0 &&
+ if (timeout == 0 &&
tx_win == tx_start && signal_pending(current))
return -EINTR;
@@ -92,6 +102,26 @@ static int rxrpc_wait_for_tx_window_nonintr(struct rxrpc_sock *rx,
}
/*
+ * Wait for space to appear in the Tx queue uninterruptibly.
+ */
+static int rxrpc_wait_for_tx_window_nonintr(struct rxrpc_sock *rx,
+ struct rxrpc_call *call,
+ long *timeo)
+{
+ for (;;) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ if (rxrpc_check_tx_space(call, NULL))
+ return 0;
+
+ if (call->state >= RXRPC_CALL_COMPLETE)
+ return call->error;
+
+ trace_rxrpc_transmit(call, rxrpc_transmit_wait);
+ *timeo = schedule_timeout(*timeo);
+ }
+}
+
+/*
* wait for space to appear in the transmit/ACK window
* - caller holds the socket locked
*/
@@ -108,10 +138,19 @@ static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx,
add_wait_queue(&call->waitq, &myself);
- if (waitall)
- ret = rxrpc_wait_for_tx_window_nonintr(rx, call);
- else
- ret = rxrpc_wait_for_tx_window_intr(rx, call, timeo);
+ switch (call->interruptibility) {
+ case RXRPC_INTERRUPTIBLE:
+ if (waitall)
+ ret = rxrpc_wait_for_tx_window_waitall(rx, call);
+ else
+ ret = rxrpc_wait_for_tx_window_intr(rx, call, timeo);
+ break;
+ case RXRPC_PREINTERRUPTIBLE:
+ case RXRPC_UNINTERRUPTIBLE:
+ default:
+ ret = rxrpc_wait_for_tx_window_nonintr(rx, call, timeo);
+ break;
+ }
remove_wait_queue(&call->waitq, &myself);
set_current_state(TASK_RUNNING);
@@ -302,9 +341,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
_debug("alloc");
- if (call->tx_top - call->tx_hard_ack >=
- min_t(unsigned int, call->tx_winsize,
- call->cong_cwnd + call->cong_extra)) {
+ if (!rxrpc_check_tx_space(call, NULL)) {
ret = -EAGAIN;
if (msg->msg_flags & MSG_DONTWAIT)
goto maybe_error;
@@ -619,7 +656,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
.call.tx_total_len = -1,
.call.user_call_ID = 0,
.call.nr_timeouts = 0,
- .call.intr = true,
+ .call.interruptibility = RXRPC_INTERRUPTIBLE,
.abort_code = 0,
.command = RXRPC_CMD_SEND_DATA,
.exclusive = false,
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index f685c0d73708..41114b463161 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -739,7 +739,7 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla,
if (goto_ch)
tcf_chain_put_by_act(goto_ch);
if (params)
- kfree_rcu(params, rcu);
+ call_rcu(&params->rcu, tcf_ct_params_free);
if (res == ACT_P_CREATED)
tcf_idr_insert(tn, *a);
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 1ad300e6dbc0..83dd82fc9f40 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -284,10 +284,8 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a,
/* mirror is always swallowed */
if (is_redirect) {
- skb2->tc_redirected = 1;
- skb2->tc_from_ingress = skb2->tc_at_ingress;
- if (skb2->tc_from_ingress)
- skb2->tstamp = 0;
+ skb_set_redirected(skb2, skb2->tc_at_ingress);
+
/* let's the caller reinsert the packet, if possible */
if (use_reinsert) {
res->ingress = want_ingress;
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 6f8786b06bde..5efa3e7ace15 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -534,8 +534,8 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
fp = &b->ht[h];
for (pfp = rtnl_dereference(*fp); pfp;
fp = &pfp->next, pfp = rtnl_dereference(*fp)) {
- if (pfp == f) {
- *fp = f->next;
+ if (pfp == fold) {
+ rcu_assign_pointer(*fp, fold->next);
break;
}
}
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 09b7dc5fe7e0..9904299424a1 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -261,8 +261,10 @@ static void tcindex_partial_destroy_work(struct work_struct *work)
struct tcindex_data,
rwork);
+ rtnl_lock();
kfree(p->perfect);
kfree(p);
+ rtnl_unlock();
}
static void tcindex_free_perfect_hash(struct tcindex_data *cp)
@@ -357,6 +359,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
if (tcindex_alloc_perfect_hash(net, cp) < 0)
goto errout;
+ cp->alloc_hash = cp->hash;
for (i = 0; i < min(cp->hash, p->hash); i++)
cp->perfect[i].res = p->perfect[i].res;
balloc = 1;
diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c
index b2905b03a432..2eaac2ff380f 100644
--- a/net/sched/sch_cbs.c
+++ b/net/sched/sch_cbs.c
@@ -181,6 +181,11 @@ static struct sk_buff *cbs_dequeue_soft(struct Qdisc *sch)
s64 credits;
int len;
+ /* The previous packet is still being sent */
+ if (now < q->last) {
+ qdisc_watchdog_schedule_ns(&q->watchdog, q->last);
+ return NULL;
+ }
if (q->credits < 0) {
credits = timediff_to_credits(now - q->last, q->idleslope);
@@ -212,7 +217,12 @@ static struct sk_buff *cbs_dequeue_soft(struct Qdisc *sch)
credits += q->credits;
q->credits = max_t(s64, credits, q->locredit);
- q->last = now;
+ /* Estimate of the transmission of the last byte of the packet in ns */
+ if (unlikely(atomic64_read(&q->port_rate) == 0))
+ q->last = now;
+ else
+ q->last = now + div64_s64(len * NSEC_PER_SEC,
+ atomic64_read(&q->port_rate));
return skb;
}
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index a5a295477ecc..371ad84def3b 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -744,6 +744,7 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = {
[TCA_FQ_FLOW_MAX_RATE] = { .type = NLA_U32 },
[TCA_FQ_BUCKETS_LOG] = { .type = NLA_U32 },
[TCA_FQ_FLOW_REFILL_DELAY] = { .type = NLA_U32 },
+ [TCA_FQ_ORPHAN_MASK] = { .type = NLA_U32 },
[TCA_FQ_LOW_RATE_THRESHOLD] = { .type = NLA_U32 },
[TCA_FQ_CE_THRESHOLD] = { .type = NLA_U32 },
};
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 660fc45ee40f..b1eb12d33b9a 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -564,8 +564,10 @@ static struct sk_buff *taprio_dequeue_soft(struct Qdisc *sch)
prio = skb->priority;
tc = netdev_get_prio_tc_map(dev, prio);
- if (!(gate_mask & BIT(tc)))
+ if (!(gate_mask & BIT(tc))) {
+ skb = NULL;
continue;
+ }
len = qdisc_pkt_len(skb);
guard = ktime_add_ns(taprio_get_time(q),
@@ -575,13 +577,17 @@ static struct sk_buff *taprio_dequeue_soft(struct Qdisc *sch)
* guard band ...
*/
if (gate_mask != TAPRIO_ALL_GATES_OPEN &&
- ktime_after(guard, entry->close_time))
+ ktime_after(guard, entry->close_time)) {
+ skb = NULL;
continue;
+ }
/* ... and no budget. */
if (gate_mask != TAPRIO_ALL_GATES_OPEN &&
- atomic_sub_return(len, &entry->budget) < 0)
+ atomic_sub_return(len, &entry->budget) < 0) {
+ skb = NULL;
continue;
+ }
skb = child->ops->dequeue(child);
if (unlikely(!skb))
@@ -768,6 +774,7 @@ static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = {
[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME] = { .type = NLA_S64 },
[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION] = { .type = NLA_S64 },
[TCA_TAPRIO_ATTR_FLAGS] = { .type = NLA_U32 },
+ [TCA_TAPRIO_ATTR_TXTIME_DELAY] = { .type = NLA_U32 },
};
static int fill_sched_entry(struct nlattr **tb, struct sched_entry *entry,
diff --git a/net/sctp/diag.c b/net/sctp/diag.c
index 8a15146faaeb..1069d7af3672 100644
--- a/net/sctp/diag.c
+++ b/net/sctp/diag.c
@@ -237,15 +237,11 @@ static size_t inet_assoc_attr_size(struct sctp_association *asoc)
addrcnt++;
return nla_total_size(sizeof(struct sctp_info))
- + nla_total_size(1) /* INET_DIAG_SHUTDOWN */
- + nla_total_size(1) /* INET_DIAG_TOS */
- + nla_total_size(1) /* INET_DIAG_TCLASS */
- + nla_total_size(4) /* INET_DIAG_MARK */
- + nla_total_size(4) /* INET_DIAG_CLASS_ID */
+ nla_total_size(addrlen * asoc->peer.transport_count)
+ nla_total_size(addrlen * addrcnt)
- + nla_total_size(sizeof(struct inet_diag_meminfo))
+ nla_total_size(sizeof(struct inet_diag_msg))
+ + inet_diag_msg_attrs_size()
+ + nla_total_size(sizeof(struct inet_diag_meminfo))
+ 64;
}
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index d6ba186f67e2..05b825b3cfa4 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -582,6 +582,7 @@ static void smc_ib_remove_dev(struct ib_device *ibdev, void *client_data)
smc_smcr_terminate_all(smcibdev);
smc_ib_cleanup_per_ibdev(smcibdev);
ib_unregister_event_handler(&smcibdev->event_handler);
+ cancel_work_sync(&smcibdev->port_event_work);
kfree(smcibdev);
}
diff --git a/net/socket.c b/net/socket.c
index b79a05de7c6e..2dd739fba866 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1707,7 +1707,8 @@ SYSCALL_DEFINE2(listen, int, fd, int, backlog)
int __sys_accept4_file(struct file *file, unsigned file_flags,
struct sockaddr __user *upeer_sockaddr,
- int __user *upeer_addrlen, int flags)
+ int __user *upeer_addrlen, int flags,
+ unsigned long nofile)
{
struct socket *sock, *newsock;
struct file *newfile;
@@ -1738,7 +1739,7 @@ int __sys_accept4_file(struct file *file, unsigned file_flags,
*/
__module_get(newsock->ops->owner);
- newfd = get_unused_fd_flags(flags);
+ newfd = __get_unused_fd_flags(flags, nofile);
if (unlikely(newfd < 0)) {
err = newfd;
sock_release(newsock);
@@ -1807,7 +1808,8 @@ int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
f = fdget(fd);
if (f.file) {
ret = __sys_accept4_file(f.file, 0, upeer_sockaddr,
- upeer_addrlen, flags);
+ upeer_addrlen, flags,
+ rlimit(RLIMIT_NOFILE));
if (f.flags)
fput(f.file);
}
@@ -2226,10 +2228,10 @@ struct used_address {
unsigned int name_len;
};
-static int copy_msghdr_from_user(struct msghdr *kmsg,
- struct user_msghdr __user *umsg,
- struct sockaddr __user **save_addr,
- struct iovec **iov)
+int __copy_msghdr_from_user(struct msghdr *kmsg,
+ struct user_msghdr __user *umsg,
+ struct sockaddr __user **save_addr,
+ struct iovec __user **uiov, size_t *nsegs)
{
struct user_msghdr msg;
ssize_t err;
@@ -2271,6 +2273,23 @@ static int copy_msghdr_from_user(struct msghdr *kmsg,
return -EMSGSIZE;
kmsg->msg_iocb = NULL;
+ *uiov = msg.msg_iov;
+ *nsegs = msg.msg_iovlen;
+ return 0;
+}
+
+static int copy_msghdr_from_user(struct msghdr *kmsg,
+ struct user_msghdr __user *umsg,
+ struct sockaddr __user **save_addr,
+ struct iovec **iov)
+{
+ struct user_msghdr msg;
+ ssize_t err;
+
+ err = __copy_msghdr_from_user(kmsg, umsg, save_addr, &msg.msg_iov,
+ &msg.msg_iovlen);
+ if (err)
+ return err;
err = import_iovec(save_addr ? READ : WRITE,
msg.msg_iov, msg.msg_iovlen,
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index 7c35094c20b8..bb9862410e68 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -116,6 +116,7 @@ const struct nla_policy tipc_nl_prop_policy[TIPC_NLA_PROP_MAX + 1] = {
[TIPC_NLA_PROP_PRIO] = { .type = NLA_U32 },
[TIPC_NLA_PROP_TOL] = { .type = NLA_U32 },
[TIPC_NLA_PROP_WIN] = { .type = NLA_U32 },
+ [TIPC_NLA_PROP_MTU] = { .type = NLA_U32 },
[TIPC_NLA_PROP_BROADCAST] = { .type = NLA_U32 },
[TIPC_NLA_PROP_BROADCAST_RATIO] = { .type = NLA_U32 }
};
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 5b19e9fac4aa..f0af23c1634a 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -470,6 +470,8 @@ const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_WOWLAN_TRIGGERS] = { .type = NLA_NESTED },
[NL80211_ATTR_STA_PLINK_STATE] =
NLA_POLICY_MAX(NLA_U8, NUM_NL80211_PLINK_STATES - 1),
+ [NL80211_ATTR_MEASUREMENT_DURATION] = { .type = NLA_U16 },
+ [NL80211_ATTR_MEASUREMENT_DURATION_MANDATORY] = { .type = NLA_FLAG },
[NL80211_ATTR_MESH_PEER_AID] =
NLA_POLICY_RANGE(NLA_U16, 1, IEEE80211_MAX_AID),
[NL80211_ATTR_SCHED_SCAN_INTERVAL] = { .type = NLA_U32 },
@@ -531,6 +533,8 @@ const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_MDID] = { .type = NLA_U16 },
[NL80211_ATTR_IE_RIC] = { .type = NLA_BINARY,
.len = IEEE80211_MAX_DATA_LEN },
+ [NL80211_ATTR_CRIT_PROT_ID] = { .type = NLA_U16 },
+ [NL80211_ATTR_MAX_CRIT_PROT_DURATION] = { .type = NLA_U16 },
[NL80211_ATTR_PEER_AID] =
NLA_POLICY_RANGE(NLA_U16, 1, IEEE80211_MAX_AID),
[NL80211_ATTR_CH_SWITCH_COUNT] = { .type = NLA_U32 },
@@ -561,6 +565,7 @@ const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
NLA_POLICY_MAX(NLA_U8, IEEE80211_NUM_UPS - 1),
[NL80211_ATTR_ADMITTED_TIME] = { .type = NLA_U16 },
[NL80211_ATTR_SMPS_MODE] = { .type = NLA_U8 },
+ [NL80211_ATTR_OPER_CLASS] = { .type = NLA_U8 },
[NL80211_ATTR_MAC_MASK] = {
.type = NLA_EXACT_LEN_WARN,
.len = ETH_ALEN
@@ -16411,7 +16416,7 @@ void cfg80211_sta_opmode_change_notify(struct net_device *dev, const u8 *mac,
goto nla_put_failure;
if ((sta_opmode->changed & STA_OPMODE_MAX_BW_CHANGED) &&
- nla_put_u8(msg, NL80211_ATTR_CHANNEL_WIDTH, sta_opmode->bw))
+ nla_put_u32(msg, NL80211_ATTR_CHANNEL_WIDTH, sta_opmode->bw))
goto nla_put_failure;
if ((sta_opmode->changed & STA_OPMODE_N_SS_CHANGED) &&
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index aef240fdf8df..328402ab64a3 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -2022,7 +2022,11 @@ void cfg80211_update_assoc_bss_entry(struct wireless_dev *wdev,
spin_lock_bh(&rdev->bss_lock);
- if (WARN_ON(cbss->pub.channel == chan))
+ /*
+ * Some APs use CSA also for bandwidth changes, i.e., without actually
+ * changing the control channel, so no need to update in such a case.
+ */
+ if (cbss->pub.channel == chan)
goto done;
/* use transmitting bss */
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 50f567a88f45..e2db468cf50e 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -78,8 +78,8 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur
int err;
unsigned long flags;
struct xfrm_state *x;
- struct sk_buff *skb2, *nskb;
struct softnet_data *sd;
+ struct sk_buff *skb2, *nskb, *pskb = NULL;
netdev_features_t esp_features = features;
struct xfrm_offload *xo = xfrm_offload(skb);
struct sec_path *sp;
@@ -168,14 +168,14 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur
} else {
if (skb == skb2)
skb = nskb;
-
- if (!skb)
- return NULL;
+ else
+ pskb->next = nskb;
continue;
}
skb_push(skb2, skb2->data - skb_mac_header(skb2));
+ pskb = skb2;
}
return skb;
@@ -383,6 +383,7 @@ static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void
return xfrm_dev_feat_change(dev);
case NETDEV_DOWN:
+ case NETDEV_UNREGISTER:
return xfrm_dev_down(dev);
}
return NOTIFY_DONE;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index dbda08ec566e..8a4af86a285e 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -434,7 +434,9 @@ EXPORT_SYMBOL(xfrm_policy_destroy);
static void xfrm_policy_kill(struct xfrm_policy *policy)
{
+ write_lock_bh(&policy->lock);
policy->walk.dead = 1;
+ write_unlock_bh(&policy->lock);
atomic_inc(&policy->genid);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index b88ba45ff1ac..e6cfaa680ef3 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -110,7 +110,8 @@ static inline int verify_sec_ctx_len(struct nlattr **attrs)
return 0;
uctx = nla_data(rt);
- if (uctx->len != (sizeof(struct xfrm_user_sec_ctx) + uctx->ctx_len))
+ if (uctx->len > nla_len(rt) ||
+ uctx->len != (sizeof(struct xfrm_user_sec_ctx) + uctx->ctx_len))
return -EINVAL;
return 0;
@@ -2275,6 +2276,9 @@ static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh,
err = verify_newpolicy_info(&ua->policy);
if (err)
goto free_state;
+ err = verify_sec_ctx_len(attrs);
+ if (err)
+ goto free_state;
/* build an XP */
xp = xfrm_policy_construct(net, &ua->policy, attrs, &err);
diff --git a/scripts/Kconfig.include b/scripts/Kconfig.include
index 85334dc8c997..496d11c92c97 100644
--- a/scripts/Kconfig.include
+++ b/scripts/Kconfig.include
@@ -44,3 +44,10 @@ $(error-if,$(success, $(LD) -v | grep -q gold), gold linker '$(LD)' not supporte
# gcc version including patch level
gcc-version := $(shell,$(srctree)/scripts/gcc-version.sh $(CC))
+
+# machine bit flags
+# $(m32-flag): -m32 if the compiler supports it, or an empty string otherwise.
+# $(m64-flag): -m64 if the compiler supports it, or an empty string otherwise.
+cc-option-bit = $(if-success,$(CC) -Werror $(1) -E -x c /dev/null -o /dev/null,$(1))
+m32-flag := $(cc-option-bit,-m32)
+m64-flag := $(cc-option-bit,-m64)
diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn
index ecddf83ac142..ca08f2fe7c34 100644
--- a/scripts/Makefile.extrawarn
+++ b/scripts/Makefile.extrawarn
@@ -48,6 +48,7 @@ KBUILD_CFLAGS += -Wno-initializer-overrides
KBUILD_CFLAGS += -Wno-format
KBUILD_CFLAGS += -Wno-sign-compare
KBUILD_CFLAGS += -Wno-format-zero-length
+KBUILD_CFLAGS += $(call cc-disable-warning, pointer-to-enum-cast)
endif
endif
diff --git a/scripts/check-sysctl-docs b/scripts/check-sysctl-docs
new file mode 100755
index 000000000000..8bcb9e26c7bc
--- /dev/null
+++ b/scripts/check-sysctl-docs
@@ -0,0 +1,181 @@
+#!/usr/bin/gawk -f
+# SPDX-License-Identifier: GPL-2.0
+
+# Script to check sysctl documentation against source files
+#
+# Copyright (c) 2020 Stephen Kitt
+
+# Example invocation:
+# scripts/check-sysctl-docs -vtable="kernel" \
+# Documentation/admin-guide/sysctl/kernel.rst \
+# $(git grep -l register_sysctl_)
+#
+# Specify -vdebug=1 to see debugging information
+
+BEGIN {
+ if (!table) {
+ print "Please specify the table to look for using the table variable" > "/dev/stderr"
+ exit 1
+ }
+}
+
+# The following globals are used:
+# children: maps ctl_table names and procnames to child ctl_table names
+# documented: maps documented entries (each key is an entry)
+# entries: maps ctl_table names and procnames to counts (so
+# enumerating the subkeys for a given ctl_table lists its
+# procnames)
+# files: maps procnames to source file names
+# paths: maps ctl_path names to paths
+# curpath: the name of the current ctl_path struct
+# curtable: the name of the current ctl_table struct
+# curentry: the name of the current proc entry (procname when parsing
+# a ctl_table, constructed path when parsing a ctl_path)
+
+
+# Remove punctuation from the given value
+function trimpunct(value) {
+ while (value ~ /^["&]/) {
+ value = substr(value, 2)
+ }
+ while (value ~ /[]["&,}]$/) {
+ value = substr(value, 1, length(value) - 1)
+ }
+ return value
+}
+
+# Print the information for the given entry
+function printentry(entry) {
+ seen[entry]++
+ printf "* %s from %s", entry, file[entry]
+ if (documented[entry]) {
+ printf " (documented)"
+ }
+ print ""
+}
+
+
+# Stage 1: build the list of documented entries
+FNR == NR && /^=+$/ {
+ if (prevline ~ /Documentation for/) {
+ # This is the main title
+ next
+ }
+
+ # The previous line is a section title, parse it
+ $0 = prevline
+ if (debug) print "Parsing " $0
+ inbrackets = 0
+ for (i = 1; i <= NF; i++) {
+ if (length($i) == 0) {
+ continue
+ }
+ if (!inbrackets && substr($i, 1, 1) == "(") {
+ inbrackets = 1
+ }
+ if (!inbrackets) {
+ token = trimpunct($i)
+ if (length(token) > 0 && token != "and") {
+ if (debug) print trimpunct($i)
+ documented[trimpunct($i)]++
+ }
+ }
+ if (inbrackets && substr($i, length($i), 1) == ")") {
+ inbrackets = 0
+ }
+ }
+}
+
+FNR == NR {
+ prevline = $0
+ next
+}
+
+
+# Stage 2: process each file and find all sysctl tables
+BEGINFILE {
+ delete children
+ delete entries
+ delete paths
+ curpath = ""
+ curtable = ""
+ curentry = ""
+ if (debug) print "Processing file " FILENAME
+}
+
+/^static struct ctl_path/ {
+ match($0, /static struct ctl_path ([^][]+)/, tables)
+ curpath = tables[1]
+ if (debug) print "Processing path " curpath
+}
+
+/^static struct ctl_table/ {
+ match($0, /static struct ctl_table ([^][]+)/, tables)
+ curtable = tables[1]
+ if (debug) print "Processing table " curtable
+}
+
+/^};$/ {
+ curpath = ""
+ curtable = ""
+ curentry = ""
+}
+
+curpath && /\.procname[\t ]*=[\t ]*".+"/ {
+ match($0, /.procname[\t ]*=[\t ]*"([^"]+)"/, names)
+ if (curentry) {
+ curentry = curentry "/" names[1]
+ } else {
+ curentry = names[1]
+ }
+ if (debug) print "Setting path " curpath " to " curentry
+ paths[curpath] = curentry
+}
+
+curtable && /\.procname[\t ]*=[\t ]*".+"/ {
+ match($0, /.procname[\t ]*=[\t ]*"([^"]+)"/, names)
+ curentry = names[1]
+ if (debug) print "Adding entry " curentry " to table " curtable
+ entries[curtable][curentry]++
+ file[curentry] = FILENAME
+}
+
+/\.child[\t ]*=/ {
+ child = trimpunct($NF)
+ if (debug) print "Linking child " child " to table " curtable " entry " curentry
+ children[curtable][curentry] = child
+}
+
+/register_sysctl_table\(.*\)/ {
+ match($0, /register_sysctl_table\(([^)]+)\)/, tables)
+ if (debug) print "Registering table " tables[1]
+ if (children[tables[1]][table]) {
+ for (entry in entries[children[tables[1]][table]]) {
+ printentry(entry)
+ }
+ }
+}
+
+/register_sysctl_paths\(.*\)/ {
+ match($0, /register_sysctl_paths\(([^)]+), ([^)]+)\)/, tables)
+ if (debug) print "Attaching table " tables[2] " to path " tables[1]
+ if (paths[tables[1]] == table) {
+ for (entry in entries[tables[2]]) {
+ printentry(entry)
+ }
+ }
+ split(paths[tables[1]], components, "/")
+ if (length(components) > 1 && components[1] == table) {
+ # Count the first subdirectory as seen
+ seen[components[2]]++
+ }
+}
+
+
+END {
+ for (entry in documented) {
+ if (!seen[entry]) {
+ print "No implementation for " entry
+ }
+ }
+}
diff --git a/scripts/documentation-file-ref-check b/scripts/documentation-file-ref-check
index 7784c54aa38b..997202a18ddb 100755
--- a/scripts/documentation-file-ref-check
+++ b/scripts/documentation-file-ref-check
@@ -51,7 +51,9 @@ open IN, "git grep ':doc:\`' Documentation/|"
or die "Failed to run git grep";
while (<IN>) {
next if (!m,^([^:]+):.*\:doc\:\`([^\`]+)\`,);
+ next if (m,sphinx/,);
+ my $file = $1;
my $d = $1;
my $doc_ref = $2;
@@ -60,7 +62,12 @@ while (<IN>) {
$d =~ s,(.*/).*,$1,;
$f =~ s,.*\<([^\>]+)\>,$1,;
- $f ="$d$f.rst";
+ if ($f =~ m,^/,) {
+ $f = "$f.rst";
+ $f =~ s,^/,Documentation/,;
+ } else {
+ $f = "$d$f.rst";
+ }
next if (grep -e, glob("$f"));
@@ -69,7 +76,7 @@ while (<IN>) {
}
$doc_fix++;
- print STDERR "$f: :doc:`$doc_ref`\n";
+ print STDERR "$file: :doc:`$doc_ref`\n";
}
close IN;
diff --git a/scripts/dtc/dtc-lexer.l b/scripts/dtc/dtc-lexer.l
index 5c6c3fd557d7..b3b7270300de 100644
--- a/scripts/dtc/dtc-lexer.l
+++ b/scripts/dtc/dtc-lexer.l
@@ -23,7 +23,6 @@ LINECOMMENT "//".*\n
#include "srcpos.h"
#include "dtc-parser.tab.h"
-YYLTYPE yylloc;
extern bool treesource_error;
/* CAUTION: this will stop working if we ever use yyless() or yyunput() */
diff --git a/scripts/export_report.pl b/scripts/export_report.pl
index 548330e8c4e7..feb3d5542a62 100755
--- a/scripts/export_report.pl
+++ b/scripts/export_report.pl
@@ -94,7 +94,7 @@ if (defined $opt{'o'}) {
#
while ( <$module_symvers> ) {
chomp;
- my (undef, $symbol, $namespace, $module, $gpl) = split('\t');
+ my (undef, $symbol, $module, $gpl, $namespace) = split('\t');
$SYMBOL { $symbol } = [ $module , "0" , $symbol, $gpl];
}
close($module_symvers);
diff --git a/scripts/gcc-plugins/Kconfig b/scripts/gcc-plugins/Kconfig
index e3569543bdac..f8ca236d6165 100644
--- a/scripts/gcc-plugins/Kconfig
+++ b/scripts/gcc-plugins/Kconfig
@@ -23,7 +23,7 @@ menuconfig GCC_PLUGINS
GCC plugins are loadable modules that provide extra features to the
compiler. They are useful for runtime instrumentation and static analysis.
- See Documentation/core-api/gcc-plugins.rst for details.
+ See Documentation/kbuild/gcc-plugins.rst for details.
if GCC_PLUGINS
diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c
index 0133dfaaf352..3e8dea6e0a95 100644
--- a/scripts/kallsyms.c
+++ b/scripts/kallsyms.c
@@ -195,13 +195,13 @@ static struct sym_entry *read_symbol(FILE *in)
return NULL;
}
- if (is_ignored_symbol(name, type))
- return NULL;
-
- /* Ignore most absolute/undefined (?) symbols. */
if (strcmp(name, "_text") == 0)
_text = addr;
+ /* Ignore most absolute/undefined (?) symbols. */
+ if (is_ignored_symbol(name, type))
+ return NULL;
+
check_symbol_range(name, addr, text_ranges, ARRAY_SIZE(text_ranges));
check_symbol_range(name, addr, &percpu_range, 1);
diff --git a/scripts/mod/devicetable-offsets.c b/scripts/mod/devicetable-offsets.c
index 054405b90ba4..d3c237b9b7c0 100644
--- a/scripts/mod/devicetable-offsets.c
+++ b/scripts/mod/devicetable-offsets.c
@@ -145,6 +145,13 @@ int main(void)
DEVID(i2c_device_id);
DEVID_FIELD(i2c_device_id, name);
+ DEVID(i3c_device_id);
+ DEVID_FIELD(i3c_device_id, match_flags);
+ DEVID_FIELD(i3c_device_id, dcr);
+ DEVID_FIELD(i3c_device_id, manuf_id);
+ DEVID_FIELD(i3c_device_id, part_id);
+ DEVID_FIELD(i3c_device_id, extra_info);
+
DEVID(spi_device_id);
DEVID_FIELD(spi_device_id, name);
diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
index c91eba751804..f81cbe021a47 100644
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c
@@ -919,6 +919,24 @@ static int do_i2c_entry(const char *filename, void *symval,
return 1;
}
+static int do_i3c_entry(const char *filename, void *symval,
+ char *alias)
+{
+ DEF_FIELD(symval, i3c_device_id, match_flags);
+ DEF_FIELD(symval, i3c_device_id, dcr);
+ DEF_FIELD(symval, i3c_device_id, manuf_id);
+ DEF_FIELD(symval, i3c_device_id, part_id);
+ DEF_FIELD(symval, i3c_device_id, extra_info);
+
+ strcpy(alias, "i3c:");
+ ADD(alias, "dcr", match_flags & I3C_MATCH_DCR, dcr);
+ ADD(alias, "manuf", match_flags & I3C_MATCH_MANUF, manuf_id);
+ ADD(alias, "part", match_flags & I3C_MATCH_PART, part_id);
+ ADD(alias, "ext", match_flags & I3C_MATCH_EXTRA_INFO, extra_info);
+
+ return 1;
+}
+
/* Looks like: spi:S */
static int do_spi_entry(const char *filename, void *symval,
char *alias)
@@ -1386,6 +1404,7 @@ static const struct devtable devtable[] = {
{"vmbus", SIZE_hv_vmbus_device_id, do_vmbus_entry},
{"rpmsg", SIZE_rpmsg_device_id, do_rpmsg_entry},
{"i2c", SIZE_i2c_device_id, do_i2c_entry},
+ {"i3c", SIZE_i3c_device_id, do_i3c_entry},
{"spi", SIZE_spi_device_id, do_spi_entry},
{"dmi", SIZE_dmi_system_id, do_dmi_entry},
{"platform", SIZE_platform_device_id, do_platform_entry},
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 7edfdb2f4497..55a0a2eccbd2 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -308,7 +308,8 @@ static const char *sec_name(struct elf_info *elf, int secindex)
static void *sym_get_data(const struct elf_info *info, const Elf_Sym *sym)
{
- Elf_Shdr *sechdr = &info->sechdrs[sym->st_shndx];
+ unsigned int secindex = get_secindex(info, sym);
+ Elf_Shdr *sechdr = &info->sechdrs[secindex];
unsigned long offset;
offset = sym->st_value;
@@ -2427,7 +2428,7 @@ static void write_if_changed(struct buffer *b, const char *fname)
}
/* parse Module.symvers file. line format:
- * 0x12345678<tab>symbol<tab>module[[<tab>export]<tab>something]
+ * 0x12345678<tab>symbol<tab>module<tab>export<tab>namespace
**/
static void read_dump(const char *fname, unsigned int kernel)
{
@@ -2440,7 +2441,7 @@ static void read_dump(const char *fname, unsigned int kernel)
return;
while ((line = get_next_line(&pos, file, size))) {
- char *symname, *namespace, *modname, *d, *export, *end;
+ char *symname, *namespace, *modname, *d, *export;
unsigned int crc;
struct module *mod;
struct symbol *s;
@@ -2448,16 +2449,16 @@ static void read_dump(const char *fname, unsigned int kernel)
if (!(symname = strchr(line, '\t')))
goto fail;
*symname++ = '\0';
- if (!(namespace = strchr(symname, '\t')))
- goto fail;
- *namespace++ = '\0';
- if (!(modname = strchr(namespace, '\t')))
+ if (!(modname = strchr(symname, '\t')))
goto fail;
*modname++ = '\0';
- if ((export = strchr(modname, '\t')) != NULL)
- *export++ = '\0';
- if (export && ((end = strchr(export, '\t')) != NULL))
- *end = '\0';
+ if (!(export = strchr(modname, '\t')))
+ goto fail;
+ *export++ = '\0';
+ if (!(namespace = strchr(export, '\t')))
+ goto fail;
+ *namespace++ = '\0';
+
crc = strtoul(line, &d, 16);
if (*symname == '\0' || *modname == '\0' || *d != '\0')
goto fail;
@@ -2508,9 +2509,9 @@ static void write_dump(const char *fname)
namespace = symbol->namespace;
buf_printf(&buf, "0x%08x\t%s\t%s\t%s\t%s\n",
symbol->crc, symbol->name,
- namespace ? namespace : "",
symbol->module->name,
- export_str(symbol->export));
+ export_str(symbol->export),
+ namespace ? namespace : "");
}
symbol = symbol->next;
}
diff --git a/scripts/parse-maintainers.pl b/scripts/parse-maintainers.pl
index 255cef1b098d..2ca4eb3f190d 100755
--- a/scripts/parse-maintainers.pl
+++ b/scripts/parse-maintainers.pl
@@ -8,13 +8,14 @@ my $input_file = "MAINTAINERS";
my $output_file = "MAINTAINERS.new";
my $output_section = "SECTION.new";
my $help = 0;
-
+my $order = 0;
my $P = $0;
if (!GetOptions(
'input=s' => \$input_file,
'output=s' => \$output_file,
'section=s' => \$output_section,
+ 'order!' => \$order,
'h|help|usage' => \$help,
)) {
die "$P: invalid argument - use --help if necessary\n";
@@ -32,6 +33,22 @@ usage: $P [options] <pattern matching regexes>
--input => MAINTAINERS file to read (default: MAINTAINERS)
--output => sorted MAINTAINERS file to write (default: MAINTAINERS.new)
--section => new sorted MAINTAINERS file to write to (default: SECTION.new)
+ --order => Use the preferred section content output ordering (default: 0)
+ Preferred ordering of section output is:
+ M: Person acting as a maintainer
+ R: Person acting as a patch reviewer
+ L: Mailing list where patches should be sent
+ S: Maintenance status
+ W: URI for general information
+ Q: URI for patchwork tracking
+ B: URI for bug tracking/submission
+ C: URI for chat
+ P: URI or file for subsystem specific coding styles
+ T: SCM tree type and location
+ F: File and directory pattern
+ X: File and directory exclusion pattern
+ N: File glob
+ K: Keyword - patch content regex
If <pattern match regexes> exist, then the sections that match the
regexes are not written to the output file but are written to the
@@ -56,7 +73,7 @@ sub by_category($$) {
sub by_pattern($$) {
my ($a, $b) = @_;
- my $preferred_order = 'MRPLSWTQBCFXNK';
+ my $preferred_order = 'MRLSWQBCPTFXNK';
my $a1 = uc(substr($a, 0, 1));
my $b1 = uc(substr($b, 0, 1));
@@ -105,8 +122,14 @@ sub alpha_output {
print $file $separator;
}
print $file $key . "\n";
- foreach my $pattern (sort by_pattern split('\n', %$hashref{$key})) {
- print $file ($pattern . "\n");
+ if ($order) {
+ foreach my $pattern (sort by_pattern split('\n', %$hashref{$key})) {
+ print $file ($pattern . "\n");
+ }
+ } else {
+ foreach my $pattern (split('\n', %$hashref{$key})) {
+ print $file ($pattern . "\n");
+ }
}
}
}
diff --git a/scripts/sphinx-pre-install b/scripts/sphinx-pre-install
index a8f0c002a340..fa3fb05cd54b 100755
--- a/scripts/sphinx-pre-install
+++ b/scripts/sphinx-pre-install
@@ -701,11 +701,26 @@ sub check_needs()
} else {
my $rec_activate = "$virtenv_dir/bin/activate";
my $virtualenv = findprog("virtualenv-3");
+ my $rec_python3 = "";
$virtualenv = findprog("virtualenv-3.5") if (!$virtualenv);
$virtualenv = findprog("virtualenv") if (!$virtualenv);
$virtualenv = "virtualenv" if (!$virtualenv);
- printf "\t$virtualenv $virtenv_dir\n";
+ my $rel = "";
+ if (index($system_release, "Ubuntu") != -1) {
+ $rel = $1 if ($system_release =~ /Ubuntu\s+(\d+)[.]/);
+ if ($rel && $rel >= 16) {
+ $rec_python3 = " -p python3";
+ }
+ }
+ if (index($system_release, "Debian") != -1) {
+ $rel = $1 if ($system_release =~ /Debian\s+(\d+)/);
+ if ($rel && $rel >= 7) {
+ $rec_python3 = " -p python3";
+ }
+ }
+
+ printf "\t$virtualenv$rec_python3 $virtenv_dir\n";
printf "\t. $rec_activate\n";
printf "\tpip install -r $requirement_file\n";
deactivate_help();
diff --git a/security/keys/key.c b/security/keys/key.c
index 718bf7217420..e959b3c96b48 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c
@@ -382,7 +382,7 @@ int key_payload_reserve(struct key *key, size_t datalen)
spin_lock(&key->user->lock);
if (delta > 0 &&
- (key->user->qnbytes + delta >= maxbytes ||
+ (key->user->qnbytes + delta > maxbytes ||
key->user->qnbytes + delta < key->user->qnbytes)) {
ret = -EDQUOT;
}
diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index 9b898c969558..d1a3dea58dee 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -937,8 +937,8 @@ long keyctl_chown_key(key_serial_t id, uid_t user, gid_t group)
key_quota_root_maxbytes : key_quota_maxbytes;
spin_lock(&newowner->lock);
- if (newowner->qnkeys + 1 >= maxkeys ||
- newowner->qnbytes + key->quotalen >= maxbytes ||
+ if (newowner->qnkeys + 1 > maxkeys ||
+ newowner->qnbytes + key->quotalen > maxbytes ||
newowner->qnbytes + key->quotalen <
newowner->qnbytes)
goto quota_overrun;
diff --git a/sound/core/oss/pcm_plugin.c b/sound/core/oss/pcm_plugin.c
index 240e4702c098..752d078908e9 100644
--- a/sound/core/oss/pcm_plugin.c
+++ b/sound/core/oss/pcm_plugin.c
@@ -111,7 +111,7 @@ int snd_pcm_plug_alloc(struct snd_pcm_substream *plug, snd_pcm_uframes_t frames)
while (plugin->next) {
if (plugin->dst_frames)
frames = plugin->dst_frames(plugin, frames);
- if (snd_BUG_ON((snd_pcm_sframes_t)frames <= 0))
+ if ((snd_pcm_sframes_t)frames <= 0)
return -ENXIO;
plugin = plugin->next;
err = snd_pcm_plugin_alloc(plugin, frames);
@@ -123,7 +123,7 @@ int snd_pcm_plug_alloc(struct snd_pcm_substream *plug, snd_pcm_uframes_t frames)
while (plugin->prev) {
if (plugin->src_frames)
frames = plugin->src_frames(plugin, frames);
- if (snd_BUG_ON((snd_pcm_sframes_t)frames <= 0))
+ if ((snd_pcm_sframes_t)frames <= 0)
return -ENXIO;
plugin = plugin->prev;
err = snd_pcm_plugin_alloc(plugin, frames);
@@ -209,6 +209,8 @@ snd_pcm_sframes_t snd_pcm_plug_client_size(struct snd_pcm_substream *plug, snd_p
if (stream == SNDRV_PCM_STREAM_PLAYBACK) {
plugin = snd_pcm_plug_last(plug);
while (plugin && drv_frames > 0) {
+ if (drv_frames > plugin->buf_frames)
+ drv_frames = plugin->buf_frames;
plugin_prev = plugin->prev;
if (plugin->src_frames)
drv_frames = plugin->src_frames(plugin, drv_frames);
@@ -220,6 +222,8 @@ snd_pcm_sframes_t snd_pcm_plug_client_size(struct snd_pcm_substream *plug, snd_p
plugin_next = plugin->next;
if (plugin->dst_frames)
drv_frames = plugin->dst_frames(plugin, drv_frames);
+ if (drv_frames > plugin->buf_frames)
+ drv_frames = plugin->buf_frames;
plugin = plugin_next;
}
} else
@@ -248,11 +252,15 @@ snd_pcm_sframes_t snd_pcm_plug_slave_size(struct snd_pcm_substream *plug, snd_pc
if (frames < 0)
return frames;
}
+ if (frames > plugin->buf_frames)
+ frames = plugin->buf_frames;
plugin = plugin_next;
}
} else if (stream == SNDRV_PCM_STREAM_CAPTURE) {
plugin = snd_pcm_plug_last(plug);
while (plugin) {
+ if (frames > plugin->buf_frames)
+ frames = plugin->buf_frames;
plugin_prev = plugin->prev;
if (plugin->src_frames) {
frames = plugin->src_frames(plugin, frames);
diff --git a/sound/core/seq/oss/seq_oss_midi.c b/sound/core/seq/oss/seq_oss_midi.c
index a88c235b2ea3..2ddfe2226651 100644
--- a/sound/core/seq/oss/seq_oss_midi.c
+++ b/sound/core/seq/oss/seq_oss_midi.c
@@ -602,6 +602,7 @@ send_midi_event(struct seq_oss_devinfo *dp, struct snd_seq_event *ev, struct seq
len = snd_seq_oss_timer_start(dp->timer);
if (ev->type == SNDRV_SEQ_EVENT_SYSEX) {
snd_seq_oss_readq_sysex(dp->readq, mdev->seq_device, ev);
+ snd_midi_event_reset_decode(mdev->coder);
} else {
len = snd_midi_event_decode(mdev->coder, msg, sizeof(msg), ev);
if (len > 0)
diff --git a/sound/core/seq/seq_virmidi.c b/sound/core/seq/seq_virmidi.c
index 626d87c1539b..77d7037d1476 100644
--- a/sound/core/seq/seq_virmidi.c
+++ b/sound/core/seq/seq_virmidi.c
@@ -81,6 +81,7 @@ static int snd_virmidi_dev_receive_event(struct snd_virmidi_dev *rdev,
if ((ev->flags & SNDRV_SEQ_EVENT_LENGTH_MASK) != SNDRV_SEQ_EVENT_LENGTH_VARIABLE)
continue;
snd_seq_dump_var_event(ev, (snd_seq_dump_func_t)snd_rawmidi_receive, vmidi->substream);
+ snd_midi_event_reset_decode(vmidi->parser);
} else {
len = snd_midi_event_decode(vmidi->parser, msg, sizeof(msg), ev);
if (len > 0)
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 0ac06ff1a17c..63e1a56f705b 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -8051,6 +8051,8 @@ static int patch_alc269(struct hda_codec *codec)
spec->gen.mixer_nid = 0;
break;
case 0x10ec0225:
+ codec->power_save_node = 1;
+ /* fall through */
case 0x10ec0295:
case 0x10ec0299:
spec->codec_variant = ALC269_TYPE_ALC225;
@@ -8610,6 +8612,8 @@ enum {
ALC669_FIXUP_ACER_ASPIRE_ETHOS,
ALC669_FIXUP_ACER_ASPIRE_ETHOS_HEADSET,
ALC671_FIXUP_HP_HEADSET_MIC2,
+ ALC662_FIXUP_ACER_X2660G_HEADSET_MODE,
+ ALC662_FIXUP_ACER_NITRO_HEADSET_MODE,
};
static const struct hda_fixup alc662_fixups[] = {
@@ -8955,6 +8959,25 @@ static const struct hda_fixup alc662_fixups[] = {
.type = HDA_FIXUP_FUNC,
.v.func = alc671_fixup_hp_headset_mic2,
},
+ [ALC662_FIXUP_ACER_X2660G_HEADSET_MODE] = {
+ .type = HDA_FIXUP_PINS,
+ .v.pins = (const struct hda_pintbl[]) {
+ { 0x1a, 0x02a1113c }, /* use as headset mic, without its own jack detect */
+ { }
+ },
+ .chained = true,
+ .chain_id = ALC662_FIXUP_USI_FUNC
+ },
+ [ALC662_FIXUP_ACER_NITRO_HEADSET_MODE] = {
+ .type = HDA_FIXUP_PINS,
+ .v.pins = (const struct hda_pintbl[]) {
+ { 0x1a, 0x01a11140 }, /* use as headset mic, without its own jack detect */
+ { 0x1b, 0x0221144f },
+ { }
+ },
+ .chained = true,
+ .chain_id = ALC662_FIXUP_USI_FUNC
+ },
};
static const struct snd_pci_quirk alc662_fixup_tbl[] = {
@@ -8966,6 +8989,8 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = {
SND_PCI_QUIRK(0x1025, 0x0349, "eMachines eM250", ALC662_FIXUP_INV_DMIC),
SND_PCI_QUIRK(0x1025, 0x034a, "Gateway LT27", ALC662_FIXUP_INV_DMIC),
SND_PCI_QUIRK(0x1025, 0x038b, "Acer Aspire 8943G", ALC662_FIXUP_ASPIRE),
+ SND_PCI_QUIRK(0x1025, 0x123c, "Acer Nitro N50-600", ALC662_FIXUP_ACER_NITRO_HEADSET_MODE),
+ SND_PCI_QUIRK(0x1025, 0x124e, "Acer 2660G", ALC662_FIXUP_ACER_X2660G_HEADSET_MODE),
SND_PCI_QUIRK(0x1028, 0x05d8, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1028, 0x05db, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1028, 0x05fe, "Dell XPS 15", ALC668_FIXUP_DELL_XPS13),
diff --git a/sound/usb/line6/driver.c b/sound/usb/line6/driver.c
index b5a3f754a4f1..4f096685ed65 100644
--- a/sound/usb/line6/driver.c
+++ b/sound/usb/line6/driver.c
@@ -305,7 +305,7 @@ static void line6_data_received(struct urb *urb)
line6_midibuf_read(mb, line6->buffer_message,
LINE6_MIDI_MESSAGE_MAXLEN);
- if (done == 0)
+ if (done <= 0)
break;
line6->message_length = done;
diff --git a/sound/usb/line6/midibuf.c b/sound/usb/line6/midibuf.c
index 8d6eefa0d936..6a70463f82c4 100644
--- a/sound/usb/line6/midibuf.c
+++ b/sound/usb/line6/midibuf.c
@@ -159,7 +159,7 @@ int line6_midibuf_read(struct midi_buffer *this, unsigned char *data,
int midi_length_prev =
midibuf_message_length(this->command_prev);
- if (midi_length_prev > 0) {
+ if (midi_length_prev > 1) {
midi_length = midi_length_prev - 1;
repeat = 1;
} else
diff --git a/Documentation/EDID/1024x768.S b/tools/edid/1024x768.S
index 4aed3f9ab88a..4aed3f9ab88a 100644
--- a/Documentation/EDID/1024x768.S
+++ b/tools/edid/1024x768.S
diff --git a/Documentation/EDID/1280x1024.S b/tools/edid/1280x1024.S
index b26dd424cad7..b26dd424cad7 100644
--- a/Documentation/EDID/1280x1024.S
+++ b/tools/edid/1280x1024.S
diff --git a/Documentation/EDID/1600x1200.S b/tools/edid/1600x1200.S
index 0d091b282768..0d091b282768 100644
--- a/Documentation/EDID/1600x1200.S
+++ b/tools/edid/1600x1200.S
diff --git a/Documentation/EDID/1680x1050.S b/tools/edid/1680x1050.S
index 7dfed9a33eab..7dfed9a33eab 100644
--- a/Documentation/EDID/1680x1050.S
+++ b/tools/edid/1680x1050.S
diff --git a/Documentation/EDID/1920x1080.S b/tools/edid/1920x1080.S
index d6ffbba28e95..d6ffbba28e95 100644
--- a/Documentation/EDID/1920x1080.S
+++ b/tools/edid/1920x1080.S
diff --git a/Documentation/EDID/800x600.S b/tools/edid/800x600.S
index a5616588de08..a5616588de08 100644
--- a/Documentation/EDID/800x600.S
+++ b/tools/edid/800x600.S
diff --git a/Documentation/EDID/Makefile b/tools/edid/Makefile
index 85a927dfab02..85a927dfab02 100644
--- a/Documentation/EDID/Makefile
+++ b/tools/edid/Makefile
diff --git a/Documentation/EDID/edid.S b/tools/edid/edid.S
index c3d13815526d..c3d13815526d 100644
--- a/Documentation/EDID/edid.S
+++ b/tools/edid/edid.S
diff --git a/Documentation/EDID/hex b/tools/edid/hex
index 8873ebb618af..8873ebb618af 100644
--- a/Documentation/EDID/hex
+++ b/tools/edid/hex
diff --git a/tools/include/uapi/asm/errno.h b/tools/include/uapi/asm/errno.h
index ce3c5945a1c4..637189ec1ab9 100644
--- a/tools/include/uapi/asm/errno.h
+++ b/tools/include/uapi/asm/errno.h
@@ -1,18 +1,18 @@
/* SPDX-License-Identifier: GPL-2.0 */
#if defined(__i386__) || defined(__x86_64__)
-#include "../../arch/x86/include/uapi/asm/errno.h"
+#include "../../../arch/x86/include/uapi/asm/errno.h"
#elif defined(__powerpc__)
-#include "../../arch/powerpc/include/uapi/asm/errno.h"
+#include "../../../arch/powerpc/include/uapi/asm/errno.h"
#elif defined(__sparc__)
-#include "../../arch/sparc/include/uapi/asm/errno.h"
+#include "../../../arch/sparc/include/uapi/asm/errno.h"
#elif defined(__alpha__)
-#include "../../arch/alpha/include/uapi/asm/errno.h"
+#include "../../../arch/alpha/include/uapi/asm/errno.h"
#elif defined(__mips__)
-#include "../../arch/mips/include/uapi/asm/errno.h"
+#include "../../../arch/mips/include/uapi/asm/errno.h"
#elif defined(__ia64__)
-#include "../../arch/ia64/include/uapi/asm/errno.h"
+#include "../../../arch/ia64/include/uapi/asm/errno.h"
#elif defined(__xtensa__)
-#include "../../arch/xtensa/include/uapi/asm/errno.h"
+#include "../../../arch/xtensa/include/uapi/asm/errno.h"
#else
#include <asm-generic/errno.h>
#endif
diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h
index 1521073b6348..8533bf07450f 100644
--- a/tools/include/uapi/linux/in.h
+++ b/tools/include/uapi/linux/in.h
@@ -74,6 +74,8 @@ enum {
#define IPPROTO_UDPLITE IPPROTO_UDPLITE
IPPROTO_MPLS = 137, /* MPLS in IP (RFC 4023) */
#define IPPROTO_MPLS IPPROTO_MPLS
+ IPPROTO_ETHERNET = 143, /* Ethernet-within-IPv6 Encapsulation */
+#define IPPROTO_ETHERNET IPPROTO_ETHERNET
IPPROTO_RAW = 255, /* Raw IP packets */
#define IPPROTO_RAW IPPROTO_RAW
IPPROTO_MPTCP = 262, /* Multipath TCP connection */
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 7902a5681fc8..b8fc7d972be9 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -35,7 +35,7 @@ endif
# Only pass canonical directory names as the output directory:
#
ifneq ($(O),)
- FULL_O := $(shell readlink -f $(O) || echo $(O))
+ FULL_O := $(shell cd $(PWD); readlink -f $(O) || echo $(O))
endif
#
diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c
index 8d6821d9c3f6..27653be24447 100644
--- a/tools/perf/arch/arm64/util/arm-spe.c
+++ b/tools/perf/arch/arm64/util/arm-spe.c
@@ -11,17 +11,17 @@
#include <linux/zalloc.h>
#include <time.h>
-#include "../../util/cpumap.h"
-#include "../../util/event.h"
-#include "../../util/evsel.h"
-#include "../../util/evlist.h"
-#include "../../util/session.h"
+#include "../../../util/cpumap.h"
+#include "../../../util/event.h"
+#include "../../../util/evsel.h"
+#include "../../../util/evlist.h"
+#include "../../../util/session.h"
#include <internal/lib.h> // page_size
-#include "../../util/pmu.h"
-#include "../../util/debug.h"
-#include "../../util/auxtrace.h"
-#include "../../util/record.h"
-#include "../../util/arm-spe.h"
+#include "../../../util/pmu.h"
+#include "../../../util/debug.h"
+#include "../../../util/auxtrace.h"
+#include "../../../util/record.h"
+#include "../../../util/arm-spe.h"
#define KiB(x) ((x) * 1024)
#define MiB(x) ((x) * 1024 * 1024)
diff --git a/tools/perf/arch/arm64/util/perf_regs.c b/tools/perf/arch/arm64/util/perf_regs.c
index 2864e2e3776d..2833e101a7c6 100644
--- a/tools/perf/arch/arm64/util/perf_regs.c
+++ b/tools/perf/arch/arm64/util/perf_regs.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-#include "../../util/perf_regs.h"
+#include "../../../util/perf_regs.h"
const struct sample_reg sample_reg_masks[] = {
SMPL_REG_END
diff --git a/tools/perf/arch/powerpc/util/perf_regs.c b/tools/perf/arch/powerpc/util/perf_regs.c
index e9c436eeffc9..0a5242900248 100644
--- a/tools/perf/arch/powerpc/util/perf_regs.c
+++ b/tools/perf/arch/powerpc/util/perf_regs.c
@@ -4,8 +4,8 @@
#include <regex.h>
#include <linux/zalloc.h>
-#include "../../util/perf_regs.h"
-#include "../../util/debug.h"
+#include "../../../util/perf_regs.h"
+#include "../../../util/debug.h"
#include <linux/kernel.h>
diff --git a/tools/perf/arch/x86/util/auxtrace.c b/tools/perf/arch/x86/util/auxtrace.c
index 7abc9fd4cbec..3da506e13f49 100644
--- a/tools/perf/arch/x86/util/auxtrace.c
+++ b/tools/perf/arch/x86/util/auxtrace.c
@@ -7,13 +7,13 @@
#include <errno.h>
#include <stdbool.h>
-#include "../../util/header.h"
-#include "../../util/debug.h"
-#include "../../util/pmu.h"
-#include "../../util/auxtrace.h"
-#include "../../util/intel-pt.h"
-#include "../../util/intel-bts.h"
-#include "../../util/evlist.h"
+#include "../../../util/header.h"
+#include "../../../util/debug.h"
+#include "../../../util/pmu.h"
+#include "../../../util/auxtrace.h"
+#include "../../../util/intel-pt.h"
+#include "../../../util/intel-bts.h"
+#include "../../../util/evlist.h"
static
struct auxtrace_record *auxtrace_record__init_intel(struct evlist *evlist,
diff --git a/tools/perf/arch/x86/util/event.c b/tools/perf/arch/x86/util/event.c
index ac45015cc6ba..047dc00eafa6 100644
--- a/tools/perf/arch/x86/util/event.c
+++ b/tools/perf/arch/x86/util/event.c
@@ -3,12 +3,12 @@
#include <linux/string.h>
#include <linux/zalloc.h>
-#include "../../util/event.h"
-#include "../../util/synthetic-events.h"
-#include "../../util/machine.h"
-#include "../../util/tool.h"
-#include "../../util/map.h"
-#include "../../util/debug.h"
+#include "../../../util/event.h"
+#include "../../../util/synthetic-events.h"
+#include "../../../util/machine.h"
+#include "../../../util/tool.h"
+#include "../../../util/map.h"
+#include "../../../util/debug.h"
#if defined(__x86_64__)
diff --git a/tools/perf/arch/x86/util/header.c b/tools/perf/arch/x86/util/header.c
index aa6deb463bf3..578c8c568ffd 100644
--- a/tools/perf/arch/x86/util/header.c
+++ b/tools/perf/arch/x86/util/header.c
@@ -7,8 +7,8 @@
#include <string.h>
#include <regex.h>
-#include "../../util/debug.h"
-#include "../../util/header.h"
+#include "../../../util/debug.h"
+#include "../../../util/header.h"
static inline void
cpuid(unsigned int op, unsigned int *a, unsigned int *b, unsigned int *c,
diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c
index 26cee1052179..09f93800bffd 100644
--- a/tools/perf/arch/x86/util/intel-bts.c
+++ b/tools/perf/arch/x86/util/intel-bts.c
@@ -11,18 +11,18 @@
#include <linux/log2.h>
#include <linux/zalloc.h>
-#include "../../util/cpumap.h"
-#include "../../util/event.h"
-#include "../../util/evsel.h"
-#include "../../util/evlist.h"
-#include "../../util/mmap.h"
-#include "../../util/session.h"
-#include "../../util/pmu.h"
-#include "../../util/debug.h"
-#include "../../util/record.h"
-#include "../../util/tsc.h"
-#include "../../util/auxtrace.h"
-#include "../../util/intel-bts.h"
+#include "../../../util/cpumap.h"
+#include "../../../util/event.h"
+#include "../../../util/evsel.h"
+#include "../../../util/evlist.h"
+#include "../../../util/mmap.h"
+#include "../../../util/session.h"
+#include "../../../util/pmu.h"
+#include "../../../util/debug.h"
+#include "../../../util/record.h"
+#include "../../../util/tsc.h"
+#include "../../../util/auxtrace.h"
+#include "../../../util/intel-bts.h"
#include <internal/lib.h> // page_size
#define KiB(x) ((x) * 1024)
diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
index 7eea4fd7ce58..1643aed8c4c8 100644
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -13,23 +13,23 @@
#include <linux/zalloc.h>
#include <cpuid.h>
-#include "../../util/session.h"
-#include "../../util/event.h"
-#include "../../util/evlist.h"
-#include "../../util/evsel.h"
-#include "../../util/evsel_config.h"
-#include "../../util/cpumap.h"
-#include "../../util/mmap.h"
+#include "../../../util/session.h"
+#include "../../../util/event.h"
+#include "../../../util/evlist.h"
+#include "../../../util/evsel.h"
+#include "../../../util/evsel_config.h"
+#include "../../../util/cpumap.h"
+#include "../../../util/mmap.h"
#include <subcmd/parse-options.h>
-#include "../../util/parse-events.h"
-#include "../../util/pmu.h"
-#include "../../util/debug.h"
-#include "../../util/auxtrace.h"
-#include "../../util/record.h"
-#include "../../util/target.h"
-#include "../../util/tsc.h"
+#include "../../../util/parse-events.h"
+#include "../../../util/pmu.h"
+#include "../../../util/debug.h"
+#include "../../../util/auxtrace.h"
+#include "../../../util/record.h"
+#include "../../../util/target.h"
+#include "../../../util/tsc.h"
#include <internal/lib.h> // page_size
-#include "../../util/intel-pt.h"
+#include "../../../util/intel-pt.h"
#define KiB(x) ((x) * 1024)
#define MiB(x) ((x) * 1024 * 1024)
diff --git a/tools/perf/arch/x86/util/machine.c b/tools/perf/arch/x86/util/machine.c
index e17e080e76f4..31679c35d493 100644
--- a/tools/perf/arch/x86/util/machine.c
+++ b/tools/perf/arch/x86/util/machine.c
@@ -5,9 +5,9 @@
#include <stdlib.h>
#include <internal/lib.h> // page_size
-#include "../../util/machine.h"
-#include "../../util/map.h"
-#include "../../util/symbol.h"
+#include "../../../util/machine.h"
+#include "../../../util/map.h"
+#include "../../../util/symbol.h"
#include <linux/ctype.h>
#include <symbol/kallsyms.h>
diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c
index c218b83e063b..fca81b39b09f 100644
--- a/tools/perf/arch/x86/util/perf_regs.c
+++ b/tools/perf/arch/x86/util/perf_regs.c
@@ -5,10 +5,10 @@
#include <linux/kernel.h>
#include <linux/zalloc.h>
-#include "../../perf-sys.h"
-#include "../../util/perf_regs.h"
-#include "../../util/debug.h"
-#include "../../util/event.h"
+#include "../../../perf-sys.h"
+#include "../../../util/perf_regs.h"
+#include "../../../util/debug.h"
+#include "../../../util/event.h"
const struct sample_reg sample_reg_masks[] = {
SMPL_REG(AX, PERF_REG_X86_AX),
diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c
index e33ef5bc31c5..d48d608517fd 100644
--- a/tools/perf/arch/x86/util/pmu.c
+++ b/tools/perf/arch/x86/util/pmu.c
@@ -4,9 +4,9 @@
#include <linux/stddef.h>
#include <linux/perf_event.h>
-#include "../../util/intel-pt.h"
-#include "../../util/intel-bts.h"
-#include "../../util/pmu.h"
+#include "../../../util/intel-pt.h"
+#include "../../../util/intel-bts.h"
+#include "../../../util/pmu.h"
struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused)
{
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index fddb3ced9db6..4aa6de1aa67d 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -2,6 +2,10 @@
#ifndef BENCH_H
#define BENCH_H
+#include <sys/time.h>
+
+extern struct timeval bench__start, bench__end, bench__runtime;
+
/*
* The madvise transparent hugepage constants were added in glibc
* 2.13. For compatibility with older versions of glibc, define these
diff --git a/tools/perf/bench/epoll-ctl.c b/tools/perf/bench/epoll-ctl.c
index bb617e568841..cadc18d42aa4 100644
--- a/tools/perf/bench/epoll-ctl.c
+++ b/tools/perf/bench/epoll-ctl.c
@@ -35,7 +35,6 @@
static unsigned int nthreads = 0;
static unsigned int nsecs = 8;
-struct timeval start, end, runtime;
static bool done, __verbose, randomize;
/*
@@ -94,8 +93,8 @@ static void toggle_done(int sig __maybe_unused,
{
/* inform all threads that we're done for the day */
done = true;
- gettimeofday(&end, NULL);
- timersub(&end, &start, &runtime);
+ gettimeofday(&bench__end, NULL);
+ timersub(&bench__end, &bench__start, &bench__runtime);
}
static void nest_epollfd(void)
@@ -313,6 +312,7 @@ int bench_epoll_ctl(int argc, const char **argv)
exit(EXIT_FAILURE);
}
+ memset(&act, 0, sizeof(act));
sigfillset(&act.sa_mask);
act.sa_sigaction = toggle_done;
sigaction(SIGINT, &act, NULL);
@@ -361,7 +361,7 @@ int bench_epoll_ctl(int argc, const char **argv)
threads_starting = nthreads;
- gettimeofday(&start, NULL);
+ gettimeofday(&bench__start, NULL);
do_threads(worker, cpu);
diff --git a/tools/perf/bench/epoll-wait.c b/tools/perf/bench/epoll-wait.c
index 7af694437f4e..f938c585d512 100644
--- a/tools/perf/bench/epoll-wait.c
+++ b/tools/perf/bench/epoll-wait.c
@@ -90,7 +90,6 @@
static unsigned int nthreads = 0;
static unsigned int nsecs = 8;
-struct timeval start, end, runtime;
static bool wdone, done, __verbose, randomize, nonblocking;
/*
@@ -276,8 +275,8 @@ static void toggle_done(int sig __maybe_unused,
{
/* inform all threads that we're done for the day */
done = true;
- gettimeofday(&end, NULL);
- timersub(&end, &start, &runtime);
+ gettimeofday(&bench__end, NULL);
+ timersub(&bench__end, &bench__start, &bench__runtime);
}
static void print_summary(void)
@@ -287,7 +286,7 @@ static void print_summary(void)
printf("\nAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n",
avg, rel_stddev_stats(stddev, avg),
- (int) runtime.tv_sec);
+ (int)bench__runtime.tv_sec);
}
static int do_threads(struct worker *worker, struct perf_cpu_map *cpu)
@@ -427,6 +426,7 @@ int bench_epoll_wait(int argc, const char **argv)
exit(EXIT_FAILURE);
}
+ memset(&act, 0, sizeof(act));
sigfillset(&act.sa_mask);
act.sa_sigaction = toggle_done;
sigaction(SIGINT, &act, NULL);
@@ -479,7 +479,7 @@ int bench_epoll_wait(int argc, const char **argv)
threads_starting = nthreads;
- gettimeofday(&start, NULL);
+ gettimeofday(&bench__start, NULL);
do_threads(worker, cpu);
@@ -519,7 +519,7 @@ int bench_epoll_wait(int argc, const char **argv)
qsort(worker, nthreads, sizeof(struct worker), cmpworker);
for (i = 0; i < nthreads; i++) {
- unsigned long t = worker[i].ops/runtime.tv_sec;
+ unsigned long t = worker[i].ops / bench__runtime.tv_sec;
update_stats(&throughput_stats, t);
diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c
index 8ba0c3330a9a..65eebe06c04d 100644
--- a/tools/perf/bench/futex-hash.c
+++ b/tools/perf/bench/futex-hash.c
@@ -37,7 +37,7 @@ static unsigned int nfutexes = 1024;
static bool fshared = false, done = false, silent = false;
static int futex_flag = 0;
-struct timeval start, end, runtime;
+struct timeval bench__start, bench__end, bench__runtime;
static pthread_mutex_t thread_lock;
static unsigned int threads_starting;
static struct stats throughput_stats;
@@ -103,8 +103,8 @@ static void toggle_done(int sig __maybe_unused,
{
/* inform all threads that we're done for the day */
done = true;
- gettimeofday(&end, NULL);
- timersub(&end, &start, &runtime);
+ gettimeofday(&bench__end, NULL);
+ timersub(&bench__end, &bench__start, &bench__runtime);
}
static void print_summary(void)
@@ -114,7 +114,7 @@ static void print_summary(void)
printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n",
!silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg),
- (int) runtime.tv_sec);
+ (int)bench__runtime.tv_sec);
}
int bench_futex_hash(int argc, const char **argv)
@@ -137,6 +137,7 @@ int bench_futex_hash(int argc, const char **argv)
if (!cpu)
goto errmem;
+ memset(&act, 0, sizeof(act));
sigfillset(&act.sa_mask);
act.sa_sigaction = toggle_done;
sigaction(SIGINT, &act, NULL);
@@ -161,7 +162,7 @@ int bench_futex_hash(int argc, const char **argv)
threads_starting = nthreads;
pthread_attr_init(&thread_attr);
- gettimeofday(&start, NULL);
+ gettimeofday(&bench__start, NULL);
for (i = 0; i < nthreads; i++) {
worker[i].tid = i;
worker[i].futex = calloc(nfutexes, sizeof(*worker[i].futex));
@@ -204,7 +205,7 @@ int bench_futex_hash(int argc, const char **argv)
pthread_mutex_destroy(&thread_lock);
for (i = 0; i < nthreads; i++) {
- unsigned long t = worker[i].ops/runtime.tv_sec;
+ unsigned long t = worker[i].ops / bench__runtime.tv_sec;
update_stats(&throughput_stats, t);
if (!silent) {
if (nfutexes == 1)
diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c
index d0cae8125423..89fd8f325f38 100644
--- a/tools/perf/bench/futex-lock-pi.c
+++ b/tools/perf/bench/futex-lock-pi.c
@@ -37,7 +37,6 @@ static bool silent = false, multi = false;
static bool done = false, fshared = false;
static unsigned int nthreads = 0;
static int futex_flag = 0;
-struct timeval start, end, runtime;
static pthread_mutex_t thread_lock;
static unsigned int threads_starting;
static struct stats throughput_stats;
@@ -64,7 +63,7 @@ static void print_summary(void)
printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n",
!silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg),
- (int) runtime.tv_sec);
+ (int)bench__runtime.tv_sec);
}
static void toggle_done(int sig __maybe_unused,
@@ -73,8 +72,8 @@ static void toggle_done(int sig __maybe_unused,
{
/* inform all threads that we're done for the day */
done = true;
- gettimeofday(&end, NULL);
- timersub(&end, &start, &runtime);
+ gettimeofday(&bench__end, NULL);
+ timersub(&bench__end, &bench__start, &bench__runtime);
}
static void *workerfn(void *arg)
@@ -161,6 +160,7 @@ int bench_futex_lock_pi(int argc, const char **argv)
if (!cpu)
err(EXIT_FAILURE, "calloc");
+ memset(&act, 0, sizeof(act));
sigfillset(&act.sa_mask);
act.sa_sigaction = toggle_done;
sigaction(SIGINT, &act, NULL);
@@ -185,7 +185,7 @@ int bench_futex_lock_pi(int argc, const char **argv)
threads_starting = nthreads;
pthread_attr_init(&thread_attr);
- gettimeofday(&start, NULL);
+ gettimeofday(&bench__start, NULL);
create_threads(worker, thread_attr, cpu);
pthread_attr_destroy(&thread_attr);
@@ -211,7 +211,7 @@ int bench_futex_lock_pi(int argc, const char **argv)
pthread_mutex_destroy(&thread_lock);
for (i = 0; i < nthreads; i++) {
- unsigned long t = worker[i].ops/runtime.tv_sec;
+ unsigned long t = worker[i].ops / bench__runtime.tv_sec;
update_stats(&throughput_stats, t);
if (!silent)
diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c
index a00a6891447a..7a15c2e61022 100644
--- a/tools/perf/bench/futex-requeue.c
+++ b/tools/perf/bench/futex-requeue.c
@@ -128,6 +128,7 @@ int bench_futex_requeue(int argc, const char **argv)
if (!cpu)
err(EXIT_FAILURE, "cpu_map__new");
+ memset(&act, 0, sizeof(act));
sigfillset(&act.sa_mask);
act.sa_sigaction = toggle_done;
sigaction(SIGINT, &act, NULL);
diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c
index a053cf2b7039..cd2b81a845ac 100644
--- a/tools/perf/bench/futex-wake-parallel.c
+++ b/tools/perf/bench/futex-wake-parallel.c
@@ -234,6 +234,7 @@ int bench_futex_wake_parallel(int argc, const char **argv)
exit(EXIT_FAILURE);
}
+ memset(&act, 0, sizeof(act));
sigfillset(&act.sa_mask);
act.sa_sigaction = toggle_done;
sigaction(SIGINT, &act, NULL);
diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c
index df810096abfe..2dfcef3e371e 100644
--- a/tools/perf/bench/futex-wake.c
+++ b/tools/perf/bench/futex-wake.c
@@ -43,7 +43,7 @@ static bool done = false, silent = false, fshared = false;
static pthread_mutex_t thread_lock;
static pthread_cond_t thread_parent, thread_worker;
static struct stats waketime_stats, wakeup_stats;
-static unsigned int ncpus, threads_starting, nthreads = 0;
+static unsigned int threads_starting, nthreads = 0;
static int futex_flag = 0;
static const struct option options[] = {
@@ -136,12 +136,13 @@ int bench_futex_wake(int argc, const char **argv)
if (!cpu)
err(EXIT_FAILURE, "calloc");
+ memset(&act, 0, sizeof(act));
sigfillset(&act.sa_mask);
act.sa_sigaction = toggle_done;
sigaction(SIGINT, &act, NULL);
if (!nthreads)
- nthreads = ncpus;
+ nthreads = cpu->nr;
worker = calloc(nthreads, sizeof(*worker));
if (!worker)
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index f8b6ae557d8b..c03c36fde7e2 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -1312,7 +1312,8 @@ static int cycles_printf(struct hist_entry *he, struct hist_entry *pair,
end_line = map__srcline(he->ms.map, bi->sym->start + bi->end,
he->ms.sym);
- if ((start_line != SRCLINE_UNKNOWN) && (end_line != SRCLINE_UNKNOWN)) {
+ if ((strncmp(start_line, SRCLINE_UNKNOWN, strlen(SRCLINE_UNKNOWN)) != 0) &&
+ (strncmp(end_line, SRCLINE_UNKNOWN, strlen(SRCLINE_UNKNOWN)) != 0)) {
scnprintf(buf, sizeof(buf), "[%s -> %s] %4ld",
start_line, end_line, block_he->diff.cycles);
} else {
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index f6dd1a63f159..d2539b793f9d 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -684,7 +684,9 @@ repeat:
delay_msecs = top->delay_secs * MSEC_PER_SEC;
set_term_quiet_input(&save);
/* trash return*/
- getc(stdin);
+ clearerr(stdin);
+ if (poll(&stdin_poll, 1, 0) > 0)
+ getc(stdin);
while (!done) {
perf_top__print_sym_table(top);
diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c
index 079c77b6a2fd..27b4da80f751 100644
--- a/tools/perf/pmu-events/jevents.c
+++ b/tools/perf/pmu-events/jevents.c
@@ -1082,10 +1082,9 @@ static int process_one_file(const char *fpath, const struct stat *sb,
*/
int main(int argc, char *argv[])
{
- int rc;
+ int rc, ret = 0;
int maxfds;
char ldirname[PATH_MAX];
-
const char *arch;
const char *output_file;
const char *start_dirname;
@@ -1156,7 +1155,8 @@ int main(int argc, char *argv[])
/* Make build fail */
fclose(eventsfp);
free_arch_std_events();
- return 1;
+ ret = 1;
+ goto out_free_mapfile;
} else if (rc) {
goto empty_map;
}
@@ -1174,14 +1174,17 @@ int main(int argc, char *argv[])
/* Make build fail */
fclose(eventsfp);
free_arch_std_events();
- return 1;
+ ret = 1;
}
- return 0;
+
+ goto out_free_mapfile;
empty_map:
fclose(eventsfp);
create_empty_mapping(output_file);
free_arch_std_events();
- return 0;
+out_free_mapfile:
+ free(mapfile);
+ return ret;
}
diff --git a/tools/perf/tests/bp_account.c b/tools/perf/tests/bp_account.c
index d0b935356274..489b50604cf2 100644
--- a/tools/perf/tests/bp_account.c
+++ b/tools/perf/tests/bp_account.c
@@ -19,7 +19,7 @@
#include "../perf-sys.h"
#include "cloexec.h"
-volatile long the_var;
+static volatile long the_var;
static noinline int test_function(void)
{
diff --git a/tools/perf/util/block-info.c b/tools/perf/util/block-info.c
index c4b030bf6ec2..fbbb6d640dad 100644
--- a/tools/perf/util/block-info.c
+++ b/tools/perf/util/block-info.c
@@ -295,7 +295,8 @@ static int block_range_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
end_line = map__srcline(he->ms.map, bi->sym->start + bi->end,
he->ms.sym);
- if ((start_line != SRCLINE_UNKNOWN) && (end_line != SRCLINE_UNKNOWN)) {
+ if ((strncmp(start_line, SRCLINE_UNKNOWN, strlen(SRCLINE_UNKNOWN)) != 0) &&
+ (strncmp(end_line, SRCLINE_UNKNOWN, strlen(SRCLINE_UNKNOWN)) != 0)) {
scnprintf(buf, sizeof(buf), "[%s -> %s]",
start_line, end_line);
} else {
diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
index 6242a9215df7..4154f944f474 100644
--- a/tools/perf/util/env.c
+++ b/tools/perf/util/env.c
@@ -343,11 +343,11 @@ static const char *normalize_arch(char *arch)
const char *perf_env__arch(struct perf_env *env)
{
- struct utsname uts;
char *arch_name;
if (!env || !env->arch) { /* Assume local operation */
- if (uname(&uts) < 0)
+ static struct utsname uts = { .machine[0] = '\0', };
+ if (uts.machine[0] == '\0' && uname(&uts) < 0)
return NULL;
arch_name = uts.machine;
} else
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index a08ca276098e..b342f744b1fc 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -89,7 +89,7 @@ static inline bool replace_android_lib(const char *filename, char *newfilename)
return true;
}
- if (!strncmp(filename, "/system/lib/", 11)) {
+ if (!strncmp(filename, "/system/lib/", 12)) {
char *ndk, *app;
const char *arch;
size_t ndk_length;
@@ -431,7 +431,7 @@ int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix,
if (map && map->dso) {
char *srcline = map__srcline(map, addr, NULL);
- if (srcline != SRCLINE_UNKNOWN)
+ if (strncmp(srcline, SRCLINE_UNKNOWN, strlen(SRCLINE_UNKNOWN)) != 0)
ret = fprintf(fp, "%s%s", prefix, srcline);
free_srcline(srcline);
}
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index c01ba6f8fdad..a7dc0b096974 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -257,21 +257,15 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config)
path = zalloc(sizeof(*path));
if (!path)
return NULL;
- path->system = malloc(MAX_EVENT_LENGTH);
- if (!path->system) {
+ if (asprintf(&path->system, "%.*s", MAX_EVENT_LENGTH, sys_dirent->d_name) < 0) {
free(path);
return NULL;
}
- path->name = malloc(MAX_EVENT_LENGTH);
- if (!path->name) {
+ if (asprintf(&path->name, "%.*s", MAX_EVENT_LENGTH, evt_dirent->d_name) < 0) {
zfree(&path->system);
free(path);
return NULL;
}
- strncpy(path->system, sys_dirent->d_name,
- MAX_EVENT_LENGTH);
- strncpy(path->name, evt_dirent->d_name,
- MAX_EVENT_LENGTH);
return path;
}
}
@@ -1219,7 +1213,7 @@ static int config_attr(struct perf_event_attr *attr,
static int get_config_terms(struct list_head *head_config,
struct list_head *head_terms __maybe_unused)
{
-#define ADD_CONFIG_TERM(__type) \
+#define ADD_CONFIG_TERM(__type, __weak) \
struct perf_evsel_config_term *__t; \
\
__t = zalloc(sizeof(*__t)); \
@@ -1228,18 +1222,18 @@ static int get_config_terms(struct list_head *head_config,
\
INIT_LIST_HEAD(&__t->list); \
__t->type = PERF_EVSEL__CONFIG_TERM_ ## __type; \
- __t->weak = term->weak; \
+ __t->weak = __weak; \
list_add_tail(&__t->list, head_terms)
-#define ADD_CONFIG_TERM_VAL(__type, __name, __val) \
+#define ADD_CONFIG_TERM_VAL(__type, __name, __val, __weak) \
do { \
- ADD_CONFIG_TERM(__type); \
+ ADD_CONFIG_TERM(__type, __weak); \
__t->val.__name = __val; \
} while (0)
-#define ADD_CONFIG_TERM_STR(__type, __val) \
+#define ADD_CONFIG_TERM_STR(__type, __val, __weak) \
do { \
- ADD_CONFIG_TERM(__type); \
+ ADD_CONFIG_TERM(__type, __weak); \
__t->val.str = strdup(__val); \
if (!__t->val.str) { \
zfree(&__t); \
@@ -1253,62 +1247,62 @@ do { \
list_for_each_entry(term, head_config, list) {
switch (term->type_term) {
case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD:
- ADD_CONFIG_TERM_VAL(PERIOD, period, term->val.num);
+ ADD_CONFIG_TERM_VAL(PERIOD, period, term->val.num, term->weak);
break;
case PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ:
- ADD_CONFIG_TERM_VAL(FREQ, freq, term->val.num);
+ ADD_CONFIG_TERM_VAL(FREQ, freq, term->val.num, term->weak);
break;
case PARSE_EVENTS__TERM_TYPE_TIME:
- ADD_CONFIG_TERM_VAL(TIME, time, term->val.num);
+ ADD_CONFIG_TERM_VAL(TIME, time, term->val.num, term->weak);
break;
case PARSE_EVENTS__TERM_TYPE_CALLGRAPH:
- ADD_CONFIG_TERM_STR(CALLGRAPH, term->val.str);
+ ADD_CONFIG_TERM_STR(CALLGRAPH, term->val.str, term->weak);
break;
case PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE:
- ADD_CONFIG_TERM_STR(BRANCH, term->val.str);
+ ADD_CONFIG_TERM_STR(BRANCH, term->val.str, term->weak);
break;
case PARSE_EVENTS__TERM_TYPE_STACKSIZE:
ADD_CONFIG_TERM_VAL(STACK_USER, stack_user,
- term->val.num);
+ term->val.num, term->weak);
break;
case PARSE_EVENTS__TERM_TYPE_INHERIT:
ADD_CONFIG_TERM_VAL(INHERIT, inherit,
- term->val.num ? 1 : 0);
+ term->val.num ? 1 : 0, term->weak);
break;
case PARSE_EVENTS__TERM_TYPE_NOINHERIT:
ADD_CONFIG_TERM_VAL(INHERIT, inherit,
- term->val.num ? 0 : 1);
+ term->val.num ? 0 : 1, term->weak);
break;
case PARSE_EVENTS__TERM_TYPE_MAX_STACK:
ADD_CONFIG_TERM_VAL(MAX_STACK, max_stack,
- term->val.num);
+ term->val.num, term->weak);
break;
case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS:
ADD_CONFIG_TERM_VAL(MAX_EVENTS, max_events,
- term->val.num);
+ term->val.num, term->weak);
break;
case PARSE_EVENTS__TERM_TYPE_OVERWRITE:
ADD_CONFIG_TERM_VAL(OVERWRITE, overwrite,
- term->val.num ? 1 : 0);
+ term->val.num ? 1 : 0, term->weak);
break;
case PARSE_EVENTS__TERM_TYPE_NOOVERWRITE:
ADD_CONFIG_TERM_VAL(OVERWRITE, overwrite,
- term->val.num ? 0 : 1);
+ term->val.num ? 0 : 1, term->weak);
break;
case PARSE_EVENTS__TERM_TYPE_DRV_CFG:
- ADD_CONFIG_TERM_STR(DRV_CFG, term->val.str);
+ ADD_CONFIG_TERM_STR(DRV_CFG, term->val.str, term->weak);
break;
case PARSE_EVENTS__TERM_TYPE_PERCORE:
ADD_CONFIG_TERM_VAL(PERCORE, percore,
- term->val.num ? true : false);
+ term->val.num ? true : false, term->weak);
break;
case PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT:
ADD_CONFIG_TERM_VAL(AUX_OUTPUT, aux_output,
- term->val.num ? 1 : 0);
+ term->val.num ? 1 : 0, term->weak);
break;
case PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE:
ADD_CONFIG_TERM_VAL(AUX_SAMPLE_SIZE, aux_sample_size,
- term->val.num);
+ term->val.num, term->weak);
break;
default:
break;
@@ -1345,7 +1339,7 @@ static int get_config_chgs(struct perf_pmu *pmu, struct list_head *head_config,
}
if (bits)
- ADD_CONFIG_TERM_VAL(CFG_CHG, cfg_chg, bits);
+ ADD_CONFIG_TERM_VAL(CFG_CHG, cfg_chg, bits, false);
#undef ADD_CONFIG_TERM
return 0;
diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c
index 0f5fda11675f..8c852948513e 100644
--- a/tools/perf/util/probe-file.c
+++ b/tools/perf/util/probe-file.c
@@ -206,6 +206,9 @@ static struct strlist *__probe_file__get_namelist(int fd, bool include_group)
} else
ret = strlist__add(sl, tev.event);
clear_probe_trace_event(&tev);
+ /* Skip if there is same name multi-probe event in the list */
+ if (ret == -EEXIST)
+ ret = 0;
if (ret < 0)
break;
}
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 1c817add6ca4..e4cff49384f4 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -637,14 +637,19 @@ static int convert_to_trace_point(Dwarf_Die *sp_die, Dwfl_Module *mod,
return -EINVAL;
}
- /* Try to get actual symbol name from symtab */
- symbol = dwfl_module_addrsym(mod, paddr, &sym, NULL);
+ if (dwarf_entrypc(sp_die, &eaddr) == 0) {
+ /* If the DIE has entrypc, use it. */
+ symbol = dwarf_diename(sp_die);
+ } else {
+ /* Try to get actual symbol name and address from symtab */
+ symbol = dwfl_module_addrsym(mod, paddr, &sym, NULL);
+ eaddr = sym.st_value;
+ }
if (!symbol) {
pr_warning("Failed to find symbol at 0x%lx\n",
(unsigned long)paddr);
return -ENOENT;
}
- eaddr = sym.st_value;
tp->offset = (unsigned long)(paddr - eaddr);
tp->address = (unsigned long)paddr;
diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py
index aa344a163eaf..8a065a6f9713 100644
--- a/tools/perf/util/setup.py
+++ b/tools/perf/util/setup.py
@@ -2,11 +2,13 @@ from os import getenv
from subprocess import Popen, PIPE
from re import sub
+cc = getenv("CC")
+cc_is_clang = b"clang version" in Popen([cc, "-v"], stderr=PIPE).stderr.readline()
+
def clang_has_option(option):
- return [o for o in Popen(['clang', option], stderr=PIPE).stderr.readlines() if b"unknown argument" in o] == [ ]
+ return [o for o in Popen([cc, option], stderr=PIPE).stderr.readlines() if b"unknown argument" in o] == [ ]
-cc = getenv("CC")
-if cc == "clang":
+if cc_is_clang:
from distutils.sysconfig import get_config_vars
vars = get_config_vars()
for var in ('CFLAGS', 'OPT'):
@@ -40,7 +42,7 @@ class install_lib(_install_lib):
cflags = getenv('CFLAGS', '').split()
# switch off several checks (need to be at the end of cflags list)
cflags += ['-fno-strict-aliasing', '-Wno-write-strings', '-Wno-unused-parameter', '-Wno-redundant-decls' ]
-if cc != "clang":
+if not cc_is_clang:
cflags += ['-Wno-cast-function-type' ]
src_perf = getenv('srctree') + '/tools/perf'
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 1077013d8ce2..26bc6a0096ce 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1622,7 +1622,12 @@ int dso__load(struct dso *dso, struct map *map)
goto out;
}
- if (dso->kernel) {
+ kmod = dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE ||
+ dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP ||
+ dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE ||
+ dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE_COMP;
+
+ if (dso->kernel && !kmod) {
if (dso->kernel == DSO_TYPE_KERNEL)
ret = dso__load_kernel_sym(dso, map);
else if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
@@ -1650,12 +1655,6 @@ int dso__load(struct dso *dso, struct map *map)
if (!name)
goto out;
- kmod = dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE ||
- dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP ||
- dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE ||
- dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE_COMP;
-
-
/*
* Read the build id if possible. This is required for
* DSO_BINARY_TYPE__BUILDID_DEBUGINFO to work
diff --git a/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c b/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c
index 33dc34db4f3c..20f46348271b 100644
--- a/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c
+++ b/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c
@@ -82,7 +82,7 @@ static struct pci_access *pci_acc;
static struct pci_dev *amd_fam14h_pci_dev;
static int nbp1_entered;
-struct timespec start_time;
+static struct timespec start_time;
static unsigned long long timediff;
#ifdef DEBUG
diff --git a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c
index 3c4cee160b0e..a65f7d011513 100644
--- a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c
+++ b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c
@@ -19,7 +19,7 @@ struct cpuidle_monitor cpuidle_sysfs_monitor;
static unsigned long long **previous_count;
static unsigned long long **current_count;
-struct timespec start_time;
+static struct timespec start_time;
static unsigned long long timediff;
static int cpuidle_get_count_percent(unsigned int id, double *percent,
diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c
index 6d44fec55ad5..7c77045fef52 100644
--- a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c
+++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c
@@ -27,6 +27,8 @@ struct cpuidle_monitor *all_monitors[] = {
0
};
+int cpu_count;
+
static struct cpuidle_monitor *monitors[MONITORS_MAX];
static unsigned int avail_monitors;
diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h
index 5b5eb1da0cce..c559d3115330 100644
--- a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h
+++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h
@@ -25,7 +25,7 @@
#endif
#define CSTATE_DESC_LEN 60
-int cpu_count;
+extern int cpu_count;
/* Hard to define the right names ...: */
enum power_range_e {
diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile
index 13f1e8b9ac52..2b6551269e43 100644
--- a/tools/power/x86/turbostat/Makefile
+++ b/tools/power/x86/turbostat/Makefile
@@ -16,7 +16,7 @@ override CFLAGS += -D_FORTIFY_SOURCE=2
%: %.c
@mkdir -p $(BUILD_OUTPUT)
- $(CC) $(CFLAGS) $< -o $(BUILD_OUTPUT)/$@ $(LDFLAGS)
+ $(CC) $(CFLAGS) $< -o $(BUILD_OUTPUT)/$@ $(LDFLAGS) -lcap
.PHONY : clean
clean :
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 31c1ca0bb3ee..33b370865d16 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -30,7 +30,7 @@
#include <sched.h>
#include <time.h>
#include <cpuid.h>
-#include <linux/capability.h>
+#include <sys/capability.h>
#include <errno.h>
#include <math.h>
@@ -304,6 +304,10 @@ int *irqs_per_cpu; /* indexed by cpu_num */
void setup_all_buffers(void);
+char *sys_lpi_file;
+char *sys_lpi_file_sysfs = "/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us";
+char *sys_lpi_file_debugfs = "/sys/kernel/debug/pmc_core/slp_s0_residency_usec";
+
int cpu_is_not_present(int cpu)
{
return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set);
@@ -2916,8 +2920,6 @@ int snapshot_gfx_mhz(void)
*
* record snapshot of
* /sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us
- *
- * return 1 if config change requires a restart, else return 0
*/
int snapshot_cpu_lpi_us(void)
{
@@ -2941,17 +2943,14 @@ int snapshot_cpu_lpi_us(void)
/*
* snapshot_sys_lpi()
*
- * record snapshot of
- * /sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us
- *
- * return 1 if config change requires a restart, else return 0
+ * record snapshot of sys_lpi_file
*/
int snapshot_sys_lpi_us(void)
{
FILE *fp;
int retval;
- fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us", "r");
+ fp = fopen_or_die(sys_lpi_file, "r");
retval = fscanf(fp, "%lld", &cpuidle_cur_sys_lpi_us);
if (retval != 1) {
@@ -3151,28 +3150,42 @@ void check_dev_msr()
err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" ");
}
-void check_permissions()
+/*
+ * check for CAP_SYS_RAWIO
+ * return 0 on success
+ * return 1 on fail
+ */
+int check_for_cap_sys_rawio(void)
{
- struct __user_cap_header_struct cap_header_data;
- cap_user_header_t cap_header = &cap_header_data;
- struct __user_cap_data_struct cap_data_data;
- cap_user_data_t cap_data = &cap_data_data;
- extern int capget(cap_user_header_t hdrp, cap_user_data_t datap);
- int do_exit = 0;
- char pathname[32];
+ cap_t caps;
+ cap_flag_value_t cap_flag_value;
- /* check for CAP_SYS_RAWIO */
- cap_header->pid = getpid();
- cap_header->version = _LINUX_CAPABILITY_VERSION;
- if (capget(cap_header, cap_data) < 0)
- err(-6, "capget(2) failed");
+ caps = cap_get_proc();
+ if (caps == NULL)
+ err(-6, "cap_get_proc\n");
- if ((cap_data->effective & (1 << CAP_SYS_RAWIO)) == 0) {
- do_exit++;
+ if (cap_get_flag(caps, CAP_SYS_RAWIO, CAP_EFFECTIVE, &cap_flag_value))
+ err(-6, "cap_get\n");
+
+ if (cap_flag_value != CAP_SET) {
warnx("capget(CAP_SYS_RAWIO) failed,"
" try \"# setcap cap_sys_rawio=ep %s\"", progname);
+ return 1;
}
+ if (cap_free(caps) == -1)
+ err(-6, "cap_free\n");
+
+ return 0;
+}
+void check_permissions(void)
+{
+ int do_exit = 0;
+ char pathname[32];
+
+ /* check for CAP_SYS_RAWIO */
+ do_exit += check_for_cap_sys_rawio();
+
/* test file permissions */
sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
if (euidaccess(pathname, R_OK)) {
@@ -3265,6 +3278,7 @@ int probe_nhm_msrs(unsigned int family, unsigned int model)
case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */
case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
case INTEL_FAM6_ATOM_GOLDMONT_D: /* DNV */
+ case INTEL_FAM6_ATOM_TREMONT: /* EHL */
pkg_cstate_limits = glm_pkg_cstate_limits;
break;
default:
@@ -3336,6 +3350,17 @@ int is_skx(unsigned int family, unsigned int model)
}
return 0;
}
+int is_ehl(unsigned int family, unsigned int model)
+{
+ if (!genuine_intel)
+ return 0;
+
+ switch (model) {
+ case INTEL_FAM6_ATOM_TREMONT:
+ return 1;
+ }
+ return 0;
+}
int has_turbo_ratio_limit(unsigned int family, unsigned int model)
{
@@ -3478,6 +3503,23 @@ dump_cstate_pstate_config_info(unsigned int family, unsigned int model)
dump_nhm_cst_cfg();
}
+static void dump_sysfs_file(char *path)
+{
+ FILE *input;
+ char cpuidle_buf[64];
+
+ input = fopen(path, "r");
+ if (input == NULL) {
+ if (debug)
+ fprintf(outf, "NSFOD %s\n", path);
+ return;
+ }
+ if (!fgets(cpuidle_buf, sizeof(cpuidle_buf), input))
+ err(1, "%s: failed to read file", path);
+ fclose(input);
+
+ fprintf(outf, "%s: %s", strrchr(path, '/') + 1, cpuidle_buf);
+}
static void
dump_sysfs_cstate_config(void)
{
@@ -3491,6 +3533,15 @@ dump_sysfs_cstate_config(void)
if (!DO_BIC(BIC_sysfs))
return;
+ if (access("/sys/devices/system/cpu/cpuidle", R_OK)) {
+ fprintf(outf, "cpuidle not loaded\n");
+ return;
+ }
+
+ dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_driver");
+ dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_governor");
+ dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_governor_ro");
+
for (state = 0; state < 10; ++state) {
sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
@@ -3894,6 +3945,20 @@ void rapl_probe_intel(unsigned int family, unsigned int model)
else
BIC_PRESENT(BIC_PkgWatt);
break;
+ case INTEL_FAM6_ATOM_TREMONT: /* EHL */
+ do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_GFX | RAPL_PKG_POWER_INFO;
+ if (rapl_joules) {
+ BIC_PRESENT(BIC_Pkg_J);
+ BIC_PRESENT(BIC_Cor_J);
+ BIC_PRESENT(BIC_RAM_J);
+ BIC_PRESENT(BIC_GFX_J);
+ } else {
+ BIC_PRESENT(BIC_PkgWatt);
+ BIC_PRESENT(BIC_CorWatt);
+ BIC_PRESENT(BIC_RAMWatt);
+ BIC_PRESENT(BIC_GFXWatt);
+ }
+ break;
case INTEL_FAM6_SKYLAKE_L: /* SKL */
case INTEL_FAM6_CANNONLAKE_L: /* CNL */
do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_GFX | RAPL_PKG_POWER_INFO;
@@ -4295,6 +4360,7 @@ int has_snb_msrs(unsigned int family, unsigned int model)
case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */
case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
case INTEL_FAM6_ATOM_GOLDMONT_D: /* DNV */
+ case INTEL_FAM6_ATOM_TREMONT: /* EHL */
return 1;
}
return 0;
@@ -4324,6 +4390,7 @@ int has_c8910_msrs(unsigned int family, unsigned int model)
case INTEL_FAM6_CANNONLAKE_L: /* CNL */
case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */
case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
+ case INTEL_FAM6_ATOM_TREMONT: /* EHL */
return 1;
}
return 0;
@@ -4610,14 +4677,24 @@ unsigned int intel_model_duplicates(unsigned int model)
case INTEL_FAM6_SKYLAKE:
case INTEL_FAM6_KABYLAKE_L:
case INTEL_FAM6_KABYLAKE:
+ case INTEL_FAM6_COMETLAKE_L:
+ case INTEL_FAM6_COMETLAKE:
return INTEL_FAM6_SKYLAKE_L;
case INTEL_FAM6_ICELAKE_L:
case INTEL_FAM6_ICELAKE_NNPI:
+ case INTEL_FAM6_TIGERLAKE_L:
+ case INTEL_FAM6_TIGERLAKE:
return INTEL_FAM6_CANNONLAKE_L;
case INTEL_FAM6_ATOM_TREMONT_D:
return INTEL_FAM6_ATOM_GOLDMONT_D;
+
+ case INTEL_FAM6_ATOM_TREMONT_L:
+ return INTEL_FAM6_ATOM_TREMONT;
+
+ case INTEL_FAM6_ICELAKE_X:
+ return INTEL_FAM6_SKYLAKE_X;
}
return model;
}
@@ -4872,7 +4949,8 @@ void process_cpuid()
do_slm_cstates = is_slm(family, model);
do_knl_cstates = is_knl(family, model);
- if (do_slm_cstates || do_knl_cstates || is_cnl(family, model))
+ if (do_slm_cstates || do_knl_cstates || is_cnl(family, model) ||
+ is_ehl(family, model))
BIC_NOT_PRESENT(BIC_CPU_c3);
if (!quiet)
@@ -4907,10 +4985,16 @@ void process_cpuid()
else
BIC_NOT_PRESENT(BIC_CPU_LPI);
- if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us", R_OK))
+ if (!access(sys_lpi_file_sysfs, R_OK)) {
+ sys_lpi_file = sys_lpi_file_sysfs;
BIC_PRESENT(BIC_SYS_LPI);
- else
+ } else if (!access(sys_lpi_file_debugfs, R_OK)) {
+ sys_lpi_file = sys_lpi_file_debugfs;
+ BIC_PRESENT(BIC_SYS_LPI);
+ } else {
+ sys_lpi_file_sysfs = NULL;
BIC_NOT_PRESENT(BIC_SYS_LPI);
+ }
if (!quiet)
decode_misc_feature_control();
@@ -5306,7 +5390,7 @@ int get_and_dump_counters(void)
}
void print_version() {
- fprintf(outf, "turbostat version 19.08.31"
+ fprintf(outf, "turbostat version 20.03.20"
" - Len Brown <lenb@kernel.org>\n");
}
@@ -5323,9 +5407,9 @@ int add_counter(unsigned int msr_num, char *path, char *name,
}
msrp->msr_num = msr_num;
- strncpy(msrp->name, name, NAME_BYTES);
+ strncpy(msrp->name, name, NAME_BYTES - 1);
if (path)
- strncpy(msrp->path, path, PATH_BYTES);
+ strncpy(msrp->path, path, PATH_BYTES - 1);
msrp->width = width;
msrp->type = type;
msrp->format = format;
diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include
index ded7a950dc40..6d2f3a1b2249 100644
--- a/tools/scripts/Makefile.include
+++ b/tools/scripts/Makefile.include
@@ -1,8 +1,8 @@
# SPDX-License-Identifier: GPL-2.0
ifneq ($(O),)
ifeq ($(origin O), command line)
- dummy := $(if $(shell test -d $(O) || echo $(O)),$(error O=$(O) does not exist),)
- ABSOLUTE_O := $(shell cd $(O) ; pwd)
+ dummy := $(if $(shell cd $(PWD); test -d $(O) || echo $(O)),$(error O=$(O) does not exist),)
+ ABSOLUTE_O := $(shell cd $(PWD); cd $(O) ; pwd)
OUTPUT := $(ABSOLUTE_O)/$(if $(subdir),$(subdir)/)
COMMAND_O := O=$(ABSOLUTE_O)
ifeq ($(objtree),)
diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl
index 220d04f958a6..7570e36d636d 100755
--- a/tools/testing/ktest/ktest.pl
+++ b/tools/testing/ktest/ktest.pl
@@ -30,7 +30,7 @@ my %default = (
"EMAIL_WHEN_STARTED" => 0,
"NUM_TESTS" => 1,
"TEST_TYPE" => "build",
- "BUILD_TYPE" => "randconfig",
+ "BUILD_TYPE" => "oldconfig",
"MAKE_CMD" => "make",
"CLOSE_CONSOLE_SIGNAL" => "INT",
"TIMEOUT" => 120,
@@ -1030,7 +1030,7 @@ sub __read_config {
}
if (!$skip && $rest !~ /^\s*$/) {
- die "$name: $.: Gargbage found after $type\n$_";
+ die "$name: $.: Garbage found after $type\n$_";
}
if ($skip && $type eq "TEST_START") {
@@ -1063,7 +1063,7 @@ sub __read_config {
}
if ($rest !~ /^\s*$/) {
- die "$name: $.: Gargbage found after DEFAULTS\n$_";
+ die "$name: $.: Garbage found after DEFAULTS\n$_";
}
} elsif (/^\s*INCLUDE\s+(\S+)/) {
@@ -1154,7 +1154,7 @@ sub __read_config {
# on of these sections that have SKIP defined.
# The save variable can be
# defined multiple times and the new one simply overrides
- # the prevous one.
+ # the previous one.
set_variable($lvalue, $rvalue);
} else {
@@ -1234,7 +1234,7 @@ sub read_config {
foreach my $option (keys %not_used) {
print "$option\n";
}
- print "Set IGRNORE_UNUSED = 1 to have ktest ignore unused variables\n";
+ print "Set IGNORE_UNUSED = 1 to have ktest ignore unused variables\n";
if (!read_yn "Do you want to continue?") {
exit -1;
}
@@ -1345,7 +1345,7 @@ sub eval_option {
# Check for recursive evaluations.
# 100 deep should be more than enough.
if ($r++ > 100) {
- die "Over 100 evaluations accurred with $option\n" .
+ die "Over 100 evaluations occurred with $option\n" .
"Check for recursive variables\n";
}
$prev = $option;
@@ -1383,7 +1383,7 @@ sub reboot {
} else {
# Make sure everything has been written to disk
- run_ssh("sync");
+ run_ssh("sync", 10);
if (defined($time)) {
start_monitor;
@@ -1461,7 +1461,7 @@ sub get_test_name() {
sub dodie {
- # avoid recusion
+ # avoid recursion
return if ($in_die);
$in_die = 1;
diff --git a/tools/testing/ktest/sample.conf b/tools/testing/ktest/sample.conf
index c3bc933d437b..27666b8007ed 100644
--- a/tools/testing/ktest/sample.conf
+++ b/tools/testing/ktest/sample.conf
@@ -10,7 +10,7 @@
#
# Options set in the beginning of the file are considered to be
-# default options. These options can be overriden by test specific
+# default options. These options can be overridden by test specific
# options, with the following exceptions:
#
# LOG_FILE
@@ -204,7 +204,7 @@
#
# This config file can also contain "config variables".
# These are assigned with ":=" instead of the ktest option
-# assigment "=".
+# assignment "=".
#
# The difference between ktest options and config variables
# is that config variables can be used multiple times,
@@ -263,7 +263,7 @@
#### Using options in other options ####
#
# Options that are defined in the config file may also be used
-# by other options. All options are evaulated at time of
+# by other options. All options are evaluated at time of
# use (except that config variables are evaluated at config
# processing time).
#
@@ -505,7 +505,7 @@
#TEST = ssh user@machine /root/run_test
# The build type is any make config type or special command
-# (default randconfig)
+# (default oldconfig)
# nobuild - skip the clean and build step
# useconfig:/path/to/config - use the given config and run
# oldconfig on it.
@@ -707,7 +707,7 @@
# Line to define a successful boot up in console output.
# This is what the line contains, not the entire line. If you need
-# the entire line to match, then use regural expression syntax like:
+# the entire line to match, then use regular expression syntax like:
# (do not add any quotes around it)
#
# SUCCESS_LINE = ^MyBox Login:$
@@ -839,7 +839,7 @@
# (ignored if POWEROFF_ON_SUCCESS is set)
#REBOOT_ON_SUCCESS = 1
-# In case there are isses with rebooting, you can specify this
+# In case there are issues with rebooting, you can specify this
# to always powercycle after this amount of time after calling
# reboot.
# Note, POWERCYCLE_AFTER_REBOOT = 0 does NOT disable it. It just
@@ -848,7 +848,7 @@
# (default undefined)
#POWERCYCLE_AFTER_REBOOT = 5
-# In case there's isses with halting, you can specify this
+# In case there's issues with halting, you can specify this
# to always poweroff after this amount of time after calling
# halt.
# Note, POWEROFF_AFTER_HALT = 0 does NOT disable it. It just
@@ -972,7 +972,7 @@
#
# PATCHCHECK_START is required and is the first patch to
# test (the SHA1 of the commit). You may also specify anything
-# that git checkout allows (branch name, tage, HEAD~3).
+# that git checkout allows (branch name, tag, HEAD~3).
#
# PATCHCHECK_END is the last patch to check (default HEAD)
#
@@ -994,7 +994,7 @@
# IGNORE_WARNINGS is set for the given commit's sha1
#
# IGNORE_WARNINGS can be used to disable the failure of patchcheck
-# on a particuler commit (SHA1). You can add more than one commit
+# on a particular commit (SHA1). You can add more than one commit
# by adding a list of SHA1s that are space delimited.
#
# If BUILD_NOCLEAN is set, then make mrproper will not be run on
@@ -1093,7 +1093,7 @@
# whatever reason. (Can't reboot, want to inspect each iteration)
# Doing a BISECT_MANUAL will have the test wait for you to
# tell it if the test passed or failed after each iteration.
-# This is basicall the same as running git bisect yourself
+# This is basically the same as running git bisect yourself
# but ktest will rebuild and install the kernel for you.
#
# BISECT_CHECK = 1 (optional, default 0)
@@ -1239,7 +1239,7 @@
#
# CONFIG_BISECT_EXEC (optional)
# The config bisect is a separate program that comes with ktest.pl.
-# By befault, it will look for:
+# By default, it will look for:
# `pwd`/config-bisect.pl # the location ktest.pl was executed from.
# If it does not find it there, it will look for:
# `dirname <ktest.pl>`/config-bisect.pl # The directory that holds ktest.pl
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 6ec503912bea..b93fa645ee54 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -33,6 +33,7 @@ TARGETS += memory-hotplug
TARGETS += mount
TARGETS += mqueue
TARGETS += net
+TARGETS += net/forwarding
TARGETS += net/mptcp
TARGETS += netfilter
TARGETS += networking/timestamping
diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal_sched_switch.c b/tools/testing/selftests/bpf/prog_tests/send_signal_sched_switch.c
new file mode 100644
index 000000000000..189a34a7addb
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/send_signal_sched_switch.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <pthread.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include "test_send_signal_kern.skel.h"
+
+static void sigusr1_handler(int signum)
+{
+}
+
+#define THREAD_COUNT 100
+
+static void *worker(void *p)
+{
+ int i;
+
+ for ( i = 0; i < 1000; i++)
+ usleep(1);
+
+ return NULL;
+}
+
+void test_send_signal_sched_switch(void)
+{
+ struct test_send_signal_kern *skel;
+ pthread_t threads[THREAD_COUNT];
+ u32 duration = 0;
+ int i, err;
+
+ signal(SIGUSR1, sigusr1_handler);
+
+ skel = test_send_signal_kern__open_and_load();
+ if (CHECK(!skel, "skel_open_and_load", "skeleton open_and_load failed\n"))
+ return;
+
+ skel->bss->pid = getpid();
+ skel->bss->sig = SIGUSR1;
+
+ err = test_send_signal_kern__attach(skel);
+ if (CHECK(err, "skel_attach", "skeleton attach failed\n"))
+ goto destroy_skel;
+
+ for (i = 0; i < THREAD_COUNT; i++) {
+ err = pthread_create(threads + i, NULL, worker, NULL);
+ if (CHECK(err, "pthread_create", "Error creating thread, %s\n",
+ strerror(errno)))
+ goto destroy_skel;
+ }
+
+ for (i = 0; i < THREAD_COUNT; i++)
+ pthread_join(threads[i], NULL);
+
+destroy_skel:
+ test_send_signal_kern__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_send_signal_kern.c b/tools/testing/selftests/bpf/progs/test_send_signal_kern.c
index 1acc91e87bfc..b4233d3efac2 100644
--- a/tools/testing/selftests/bpf/progs/test_send_signal_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_send_signal_kern.c
@@ -31,6 +31,12 @@ int send_signal_tp(void *ctx)
return bpf_send_signal_test(ctx);
}
+SEC("tracepoint/sched/sched_switch")
+int send_signal_tp_sched(void *ctx)
+{
+ return bpf_send_signal_test(ctx);
+}
+
SEC("perf_event")
int send_signal_perf(void *ctx)
{
diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c
index 93040ca83e60..8da77cda5f4a 100644
--- a/tools/testing/selftests/bpf/test_btf.c
+++ b/tools/testing/selftests/bpf/test_btf.c
@@ -1062,6 +1062,48 @@ static struct btf_raw_test raw_tests[] = {
.err_str = "Member exceeds struct_size",
},
+/* Test member unexceeds the size of struct
+ *
+ * enum E {
+ * E0,
+ * E1,
+ * };
+ *
+ * struct A {
+ * char m;
+ * enum E __attribute__((packed)) n;
+ * };
+ */
+{
+ .descr = "size check test #5",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, sizeof(int)),
+ /* char */ /* [2] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1),
+ /* enum E { */ /* [3] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 2), 1),
+ BTF_ENUM_ENC(NAME_TBD, 0),
+ BTF_ENUM_ENC(NAME_TBD, 1),
+ /* } */
+ /* struct A { */ /* [4] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 2),
+ BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* char m; */
+ BTF_MEMBER_ENC(NAME_TBD, 3, 8),/* enum E __attribute__((packed)) n; */
+ /* } */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0E\0E0\0E1\0A\0m\0n",
+ .str_sec_size = sizeof("\0E\0E0\0E1\0A\0m\0n"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "size_check5_map",
+ .key_size = sizeof(int),
+ .value_size = 2,
+ .key_type_id = 1,
+ .value_type_id = 4,
+ .max_entries = 4,
+},
+
/* typedef const void * const_void_ptr;
* struct A {
* const_void_ptr m;
diff --git a/tools/testing/selftests/bpf/verifier/jmp32.c b/tools/testing/selftests/bpf/verifier/jmp32.c
index bf0322eb5346..bd5cae4a7f73 100644
--- a/tools/testing/selftests/bpf/verifier/jmp32.c
+++ b/tools/testing/selftests/bpf/verifier/jmp32.c
@@ -62,6 +62,21 @@
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
+ "jset32: ignores upper bits",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_LD_IMM64(BPF_REG_7, 0x8000000000000000),
+ BPF_LD_IMM64(BPF_REG_8, 0x8000000000000000),
+ BPF_JMP_REG(BPF_JSET, BPF_REG_7, BPF_REG_8, 1),
+ BPF_EXIT_INSN(),
+ BPF_JMP32_REG(BPF_JSET, BPF_REG_7, BPF_REG_8, 1),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .retval = 2,
+},
+{
"jset32: min/max deduction",
.insns = {
BPF_RAND_UEXT_R7,
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 287ae916ec0b..4c1bd03ffa1c 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -11,7 +11,9 @@ TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh
TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh reuseport_addr_any.sh
TEST_PROGS += test_vxlan_fdb_changelink.sh so_txtime.sh ipv6_flowlabel.sh
TEST_PROGS += tcp_fastopen_backup_key.sh fcnal-test.sh l2tp.sh traceroute.sh
-TEST_PROGS += fin_ack_lat.sh
+TEST_PROGS += fin_ack_lat.sh fib_nexthop_multiprefix.sh fib_nexthops.sh
+TEST_PROGS += altnames.sh icmp_redirect.sh ip6_gre_headroom.sh
+TEST_PROGS += route_localnet.sh
TEST_PROGS_EXTENDED := in_netns.sh
TEST_GEN_FILES = socket nettest
TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index 60273f1bc7d9..b7616704b55e 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -1041,6 +1041,27 @@ ipv6_addr_metric_test()
fi
log_test $rc 0 "Prefix route with metric on link up"
+ # verify peer metric added correctly
+ set -e
+ run_cmd "$IP -6 addr flush dev dummy2"
+ run_cmd "$IP -6 addr add dev dummy2 2001:db8:104::1 peer 2001:db8:104::2 metric 260"
+ set +e
+
+ check_route6 "2001:db8:104::1 dev dummy2 proto kernel metric 260"
+ log_test $? 0 "Set metric with peer route on local side"
+ log_test $? 0 "User specified metric on local address"
+ check_route6 "2001:db8:104::2 dev dummy2 proto kernel metric 260"
+ log_test $? 0 "Set metric with peer route on peer side"
+
+ set -e
+ run_cmd "$IP -6 addr change dev dummy2 2001:db8:104::1 peer 2001:db8:104::3 metric 261"
+ set +e
+
+ check_route6 "2001:db8:104::1 dev dummy2 proto kernel metric 261"
+ log_test $? 0 "Modify metric and peer address on local side"
+ check_route6 "2001:db8:104::3 dev dummy2 proto kernel metric 261"
+ log_test $? 0 "Modify metric and peer address on peer side"
+
$IP li del dummy1
$IP li del dummy2
cleanup
@@ -1457,13 +1478,20 @@ ipv4_addr_metric_test()
run_cmd "$IP addr flush dev dummy2"
run_cmd "$IP addr add dev dummy2 172.16.104.1/32 peer 172.16.104.2 metric 260"
- run_cmd "$IP addr change dev dummy2 172.16.104.1/32 peer 172.16.104.2 metric 261"
rc=$?
if [ $rc -eq 0 ]; then
- check_route "172.16.104.2 dev dummy2 proto kernel scope link src 172.16.104.1 metric 261"
+ check_route "172.16.104.2 dev dummy2 proto kernel scope link src 172.16.104.1 metric 260"
+ rc=$?
+ fi
+ log_test $rc 0 "Set metric of address with peer route"
+
+ run_cmd "$IP addr change dev dummy2 172.16.104.1/32 peer 172.16.104.3 metric 261"
+ rc=$?
+ if [ $rc -eq 0 ]; then
+ check_route "172.16.104.3 dev dummy2 proto kernel scope link src 172.16.104.1 metric 261"
rc=$?
fi
- log_test $rc 0 "Modify metric of address with peer route"
+ log_test $rc 0 "Modify metric and peer address for peer route"
$IP li del dummy1
$IP li del dummy2
diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile
new file mode 100644
index 000000000000..250fbb2d1625
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/Makefile
@@ -0,0 +1,76 @@
+# SPDX-License-Identifier: GPL-2.0+ OR MIT
+
+TEST_PROGS = bridge_igmp.sh \
+ bridge_port_isolation.sh \
+ bridge_sticky_fdb.sh \
+ bridge_vlan_aware.sh \
+ bridge_vlan_unaware.sh \
+ ethtool.sh \
+ gre_inner_v4_multipath.sh \
+ gre_inner_v6_multipath.sh \
+ gre_multipath.sh \
+ ip6gre_inner_v4_multipath.sh \
+ ip6gre_inner_v6_multipath.sh \
+ ipip_flat_gre_key.sh \
+ ipip_flat_gre_keys.sh \
+ ipip_flat_gre.sh \
+ ipip_hier_gre_key.sh \
+ ipip_hier_gre_keys.sh \
+ ipip_hier_gre.sh \
+ loopback.sh \
+ mirror_gre_bound.sh \
+ mirror_gre_bridge_1d.sh \
+ mirror_gre_bridge_1d_vlan.sh \
+ mirror_gre_bridge_1q_lag.sh \
+ mirror_gre_bridge_1q.sh \
+ mirror_gre_changes.sh \
+ mirror_gre_flower.sh \
+ mirror_gre_lag_lacp.sh \
+ mirror_gre_neigh.sh \
+ mirror_gre_nh.sh \
+ mirror_gre.sh \
+ mirror_gre_vlan_bridge_1q.sh \
+ mirror_gre_vlan.sh \
+ mirror_vlan.sh \
+ router_bridge.sh \
+ router_bridge_vlan.sh \
+ router_broadcast.sh \
+ router_mpath_nh.sh \
+ router_multicast.sh \
+ router_multipath.sh \
+ router.sh \
+ router_vid_1.sh \
+ sch_ets.sh \
+ sch_tbf_ets.sh \
+ sch_tbf_prio.sh \
+ sch_tbf_root.sh \
+ tc_actions.sh \
+ tc_chains.sh \
+ tc_flower_router.sh \
+ tc_flower.sh \
+ tc_shblocks.sh \
+ tc_vlan_modify.sh \
+ vxlan_asymmetric.sh \
+ vxlan_bridge_1d_port_8472.sh \
+ vxlan_bridge_1d.sh \
+ vxlan_bridge_1q_port_8472.sh \
+ vxlan_bridge_1q.sh \
+ vxlan_symmetric.sh
+
+TEST_PROGS_EXTENDED := devlink_lib.sh \
+ ethtool_lib.sh \
+ fib_offload_lib.sh \
+ forwarding.config.sample \
+ ipip_lib.sh \
+ lib.sh \
+ mirror_gre_lib.sh \
+ mirror_gre_topo_lib.sh \
+ mirror_lib.sh \
+ mirror_topo_lib.sh \
+ sch_ets_core.sh \
+ sch_ets_tests.sh \
+ sch_tbf_core.sh \
+ sch_tbf_etsprio.sh \
+ tc_common.sh
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/net/forwarding/ethtool_lib.sh b/tools/testing/selftests/net/forwarding/ethtool_lib.sh
index 925d229a59d8..925d229a59d8 100755..100644
--- a/tools/testing/selftests/net/forwarding/ethtool_lib.sh
+++ b/tools/testing/selftests/net/forwarding/ethtool_lib.sh
diff --git a/tools/testing/selftests/net/reuseport_addr_any.c b/tools/testing/selftests/net/reuseport_addr_any.c
index c6233935fed1..b8475cb29be7 100644
--- a/tools/testing/selftests/net/reuseport_addr_any.c
+++ b/tools/testing/selftests/net/reuseport_addr_any.c
@@ -21,6 +21,10 @@
#include <sys/socket.h>
#include <unistd.h>
+#ifndef SOL_DCCP
+#define SOL_DCCP 269
+#endif
+
static const char *IP4_ADDR = "127.0.0.1";
static const char *IP6_ADDR = "::1";
static const char *IP4_MAPPED6 = "::ffff:127.0.0.1";
diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile
index 08194aa44006..9c0f758310fe 100644
--- a/tools/testing/selftests/netfilter/Makefile
+++ b/tools/testing/selftests/netfilter/Makefile
@@ -3,6 +3,10 @@
TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \
conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh \
- nft_concat_range.sh
+ nft_concat_range.sh \
+ nft_queue.sh
+
+LDLIBS = -lmnl
+TEST_GEN_FILES = nf-queue
include ../lib.mk
diff --git a/tools/testing/selftests/netfilter/config b/tools/testing/selftests/netfilter/config
index 59caa8f71cd8..4faf2ce021d9 100644
--- a/tools/testing/selftests/netfilter/config
+++ b/tools/testing/selftests/netfilter/config
@@ -1,2 +1,8 @@
CONFIG_NET_NS=y
CONFIG_NF_TABLES_INET=y
+CONFIG_NFT_QUEUE=m
+CONFIG_NFT_NAT=m
+CONFIG_NFT_REDIR=m
+CONFIG_NFT_MASQ=m
+CONFIG_NFT_FLOW_OFFLOAD=m
+CONFIG_NF_CT_NETLINK=m
diff --git a/tools/testing/selftests/netfilter/nf-queue.c b/tools/testing/selftests/netfilter/nf-queue.c
new file mode 100644
index 000000000000..29c73bce38fa
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nf-queue.c
@@ -0,0 +1,352 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <time.h>
+#include <arpa/inet.h>
+
+#include <libmnl/libmnl.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_queue.h>
+
+struct options {
+ bool count_packets;
+ int verbose;
+ unsigned int queue_num;
+ unsigned int timeout;
+};
+
+static unsigned int queue_stats[5];
+static struct options opts;
+
+static void help(const char *p)
+{
+ printf("Usage: %s [-c|-v [-vv] ] [-t timeout] [-q queue_num]\n", p);
+}
+
+static int parse_attr_cb(const struct nlattr *attr, void *data)
+{
+ const struct nlattr **tb = data;
+ int type = mnl_attr_get_type(attr);
+
+ /* skip unsupported attribute in user-space */
+ if (mnl_attr_type_valid(attr, NFQA_MAX) < 0)
+ return MNL_CB_OK;
+
+ switch (type) {
+ case NFQA_MARK:
+ case NFQA_IFINDEX_INDEV:
+ case NFQA_IFINDEX_OUTDEV:
+ case NFQA_IFINDEX_PHYSINDEV:
+ case NFQA_IFINDEX_PHYSOUTDEV:
+ if (mnl_attr_validate(attr, MNL_TYPE_U32) < 0) {
+ perror("mnl_attr_validate");
+ return MNL_CB_ERROR;
+ }
+ break;
+ case NFQA_TIMESTAMP:
+ if (mnl_attr_validate2(attr, MNL_TYPE_UNSPEC,
+ sizeof(struct nfqnl_msg_packet_timestamp)) < 0) {
+ perror("mnl_attr_validate2");
+ return MNL_CB_ERROR;
+ }
+ break;
+ case NFQA_HWADDR:
+ if (mnl_attr_validate2(attr, MNL_TYPE_UNSPEC,
+ sizeof(struct nfqnl_msg_packet_hw)) < 0) {
+ perror("mnl_attr_validate2");
+ return MNL_CB_ERROR;
+ }
+ break;
+ case NFQA_PAYLOAD:
+ break;
+ }
+ tb[type] = attr;
+ return MNL_CB_OK;
+}
+
+static int queue_cb(const struct nlmsghdr *nlh, void *data)
+{
+ struct nlattr *tb[NFQA_MAX+1] = { 0 };
+ struct nfqnl_msg_packet_hdr *ph = NULL;
+ uint32_t id = 0;
+
+ (void)data;
+
+ mnl_attr_parse(nlh, sizeof(struct nfgenmsg), parse_attr_cb, tb);
+ if (tb[NFQA_PACKET_HDR]) {
+ ph = mnl_attr_get_payload(tb[NFQA_PACKET_HDR]);
+ id = ntohl(ph->packet_id);
+
+ if (opts.verbose > 0)
+ printf("packet hook=%u, hwproto 0x%x",
+ ntohs(ph->hw_protocol), ph->hook);
+
+ if (ph->hook >= 5) {
+ fprintf(stderr, "Unknown hook %d\n", ph->hook);
+ return MNL_CB_ERROR;
+ }
+
+ if (opts.verbose > 0) {
+ uint32_t skbinfo = 0;
+
+ if (tb[NFQA_SKB_INFO])
+ skbinfo = ntohl(mnl_attr_get_u32(tb[NFQA_SKB_INFO]));
+ if (skbinfo & NFQA_SKB_CSUMNOTREADY)
+ printf(" csumnotready");
+ if (skbinfo & NFQA_SKB_GSO)
+ printf(" gso");
+ if (skbinfo & NFQA_SKB_CSUM_NOTVERIFIED)
+ printf(" csumnotverified");
+ puts("");
+ }
+
+ if (opts.count_packets)
+ queue_stats[ph->hook]++;
+ }
+
+ return MNL_CB_OK + id;
+}
+
+static struct nlmsghdr *
+nfq_build_cfg_request(char *buf, uint8_t command, int queue_num)
+{
+ struct nlmsghdr *nlh = mnl_nlmsg_put_header(buf);
+ struct nfqnl_msg_config_cmd cmd = {
+ .command = command,
+ .pf = htons(AF_INET),
+ };
+ struct nfgenmsg *nfg;
+
+ nlh->nlmsg_type = (NFNL_SUBSYS_QUEUE << 8) | NFQNL_MSG_CONFIG;
+ nlh->nlmsg_flags = NLM_F_REQUEST;
+
+ nfg = mnl_nlmsg_put_extra_header(nlh, sizeof(*nfg));
+
+ nfg->nfgen_family = AF_UNSPEC;
+ nfg->version = NFNETLINK_V0;
+ nfg->res_id = htons(queue_num);
+
+ mnl_attr_put(nlh, NFQA_CFG_CMD, sizeof(cmd), &cmd);
+
+ return nlh;
+}
+
+static struct nlmsghdr *
+nfq_build_cfg_params(char *buf, uint8_t mode, int range, int queue_num)
+{
+ struct nlmsghdr *nlh = mnl_nlmsg_put_header(buf);
+ struct nfqnl_msg_config_params params = {
+ .copy_range = htonl(range),
+ .copy_mode = mode,
+ };
+ struct nfgenmsg *nfg;
+
+ nlh->nlmsg_type = (NFNL_SUBSYS_QUEUE << 8) | NFQNL_MSG_CONFIG;
+ nlh->nlmsg_flags = NLM_F_REQUEST;
+
+ nfg = mnl_nlmsg_put_extra_header(nlh, sizeof(*nfg));
+ nfg->nfgen_family = AF_UNSPEC;
+ nfg->version = NFNETLINK_V0;
+ nfg->res_id = htons(queue_num);
+
+ mnl_attr_put(nlh, NFQA_CFG_PARAMS, sizeof(params), &params);
+
+ return nlh;
+}
+
+static struct nlmsghdr *
+nfq_build_verdict(char *buf, int id, int queue_num, int verd)
+{
+ struct nfqnl_msg_verdict_hdr vh = {
+ .verdict = htonl(verd),
+ .id = htonl(id),
+ };
+ struct nlmsghdr *nlh;
+ struct nfgenmsg *nfg;
+
+ nlh = mnl_nlmsg_put_header(buf);
+ nlh->nlmsg_type = (NFNL_SUBSYS_QUEUE << 8) | NFQNL_MSG_VERDICT;
+ nlh->nlmsg_flags = NLM_F_REQUEST;
+ nfg = mnl_nlmsg_put_extra_header(nlh, sizeof(*nfg));
+ nfg->nfgen_family = AF_UNSPEC;
+ nfg->version = NFNETLINK_V0;
+ nfg->res_id = htons(queue_num);
+
+ mnl_attr_put(nlh, NFQA_VERDICT_HDR, sizeof(vh), &vh);
+
+ return nlh;
+}
+
+static void print_stats(void)
+{
+ unsigned int last, total;
+ int i;
+
+ if (!opts.count_packets)
+ return;
+
+ total = 0;
+ last = queue_stats[0];
+
+ for (i = 0; i < 5; i++) {
+ printf("hook %d packets %08u\n", i, queue_stats[i]);
+ last = queue_stats[i];
+ total += last;
+ }
+
+ printf("%u packets total\n", total);
+}
+
+struct mnl_socket *open_queue(void)
+{
+ char buf[MNL_SOCKET_BUFFER_SIZE];
+ unsigned int queue_num;
+ struct mnl_socket *nl;
+ struct nlmsghdr *nlh;
+ struct timeval tv;
+ uint32_t flags;
+
+ nl = mnl_socket_open(NETLINK_NETFILTER);
+ if (nl == NULL) {
+ perror("mnl_socket_open");
+ exit(EXIT_FAILURE);
+ }
+
+ if (mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID) < 0) {
+ perror("mnl_socket_bind");
+ exit(EXIT_FAILURE);
+ }
+
+ queue_num = opts.queue_num;
+ nlh = nfq_build_cfg_request(buf, NFQNL_CFG_CMD_BIND, queue_num);
+
+ if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) {
+ perror("mnl_socket_sendto");
+ exit(EXIT_FAILURE);
+ }
+
+ nlh = nfq_build_cfg_params(buf, NFQNL_COPY_PACKET, 0xFFFF, queue_num);
+
+ flags = NFQA_CFG_F_GSO | NFQA_CFG_F_UID_GID;
+ mnl_attr_put_u32(nlh, NFQA_CFG_FLAGS, htonl(flags));
+ mnl_attr_put_u32(nlh, NFQA_CFG_MASK, htonl(flags));
+
+ if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) {
+ perror("mnl_socket_sendto");
+ exit(EXIT_FAILURE);
+ }
+
+ memset(&tv, 0, sizeof(tv));
+ tv.tv_sec = opts.timeout;
+ if (opts.timeout && setsockopt(mnl_socket_get_fd(nl),
+ SOL_SOCKET, SO_RCVTIMEO,
+ &tv, sizeof(tv))) {
+ perror("setsockopt(SO_RCVTIMEO)");
+ exit(EXIT_FAILURE);
+ }
+
+ return nl;
+}
+
+static int mainloop(void)
+{
+ unsigned int buflen = 64 * 1024 + MNL_SOCKET_BUFFER_SIZE;
+ struct mnl_socket *nl;
+ struct nlmsghdr *nlh;
+ unsigned int portid;
+ char *buf;
+ int ret;
+
+ buf = malloc(buflen);
+ if (!buf) {
+ perror("malloc");
+ exit(EXIT_FAILURE);
+ }
+
+ nl = open_queue();
+ portid = mnl_socket_get_portid(nl);
+
+ for (;;) {
+ uint32_t id;
+
+ ret = mnl_socket_recvfrom(nl, buf, buflen);
+ if (ret == -1) {
+ if (errno == ENOBUFS)
+ continue;
+
+ if (errno == EAGAIN) {
+ errno = 0;
+ ret = 0;
+ break;
+ }
+
+ perror("mnl_socket_recvfrom");
+ exit(EXIT_FAILURE);
+ }
+
+ ret = mnl_cb_run(buf, ret, 0, portid, queue_cb, NULL);
+ if (ret < 0) {
+ perror("mnl_cb_run");
+ exit(EXIT_FAILURE);
+ }
+
+ id = ret - MNL_CB_OK;
+ nlh = nfq_build_verdict(buf, id, opts.queue_num, NF_ACCEPT);
+ if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) {
+ perror("mnl_socket_sendto");
+ exit(EXIT_FAILURE);
+ }
+ }
+
+ mnl_socket_close(nl);
+
+ return ret;
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ int c;
+
+ while ((c = getopt(argc, argv, "chvt:q:")) != -1) {
+ switch (c) {
+ case 'c':
+ opts.count_packets = true;
+ break;
+ case 'h':
+ help(argv[0]);
+ exit(0);
+ break;
+ case 'q':
+ opts.queue_num = atoi(optarg);
+ if (opts.queue_num > 0xffff)
+ opts.queue_num = 0;
+ break;
+ case 't':
+ opts.timeout = atoi(optarg);
+ break;
+ case 'v':
+ opts.verbose++;
+ break;
+ }
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ int ret;
+
+ parse_opts(argc, argv);
+
+ ret = mainloop();
+ if (opts.count_packets)
+ print_stats();
+
+ return ret;
+}
diff --git a/tools/testing/selftests/netfilter/nft_queue.sh b/tools/testing/selftests/netfilter/nft_queue.sh
new file mode 100755
index 000000000000..6898448b4266
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nft_queue.sh
@@ -0,0 +1,332 @@
+#!/bin/bash
+#
+# This tests nf_queue:
+# 1. can process packets from all hooks
+# 2. support running nfqueue from more than one base chain
+#
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+
+sfx=$(mktemp -u "XXXXXXXX")
+ns1="ns1-$sfx"
+ns2="ns2-$sfx"
+nsrouter="nsrouter-$sfx"
+
+cleanup()
+{
+ ip netns del ${ns1}
+ ip netns del ${ns2}
+ ip netns del ${nsrouter}
+ rm -f "$TMPFILE0"
+ rm -f "$TMPFILE1"
+}
+
+nft --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without nft tool"
+ exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+ip netns add ${nsrouter}
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not create net namespace"
+ exit $ksft_skip
+fi
+
+TMPFILE0=$(mktemp)
+TMPFILE1=$(mktemp)
+trap cleanup EXIT
+
+ip netns add ${ns1}
+ip netns add ${ns2}
+
+ip link add veth0 netns ${nsrouter} type veth peer name eth0 netns ${ns1} > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: No virtual ethernet pair device support in kernel"
+ exit $ksft_skip
+fi
+ip link add veth1 netns ${nsrouter} type veth peer name eth0 netns ${ns2}
+
+ip -net ${nsrouter} link set lo up
+ip -net ${nsrouter} link set veth0 up
+ip -net ${nsrouter} addr add 10.0.1.1/24 dev veth0
+ip -net ${nsrouter} addr add dead:1::1/64 dev veth0
+
+ip -net ${nsrouter} link set veth1 up
+ip -net ${nsrouter} addr add 10.0.2.1/24 dev veth1
+ip -net ${nsrouter} addr add dead:2::1/64 dev veth1
+
+ip -net ${ns1} link set lo up
+ip -net ${ns1} link set eth0 up
+
+ip -net ${ns2} link set lo up
+ip -net ${ns2} link set eth0 up
+
+ip -net ${ns1} addr add 10.0.1.99/24 dev eth0
+ip -net ${ns1} addr add dead:1::99/64 dev eth0
+ip -net ${ns1} route add default via 10.0.1.1
+ip -net ${ns1} route add default via dead:1::1
+
+ip -net ${ns2} addr add 10.0.2.99/24 dev eth0
+ip -net ${ns2} addr add dead:2::99/64 dev eth0
+ip -net ${ns2} route add default via 10.0.2.1
+ip -net ${ns2} route add default via dead:2::1
+
+load_ruleset() {
+ local name=$1
+ local prio=$2
+
+ip netns exec ${nsrouter} nft -f - <<EOF
+table inet $name {
+ chain nfq {
+ ip protocol icmp queue bypass
+ icmpv6 type { "echo-request", "echo-reply" } queue num 1 bypass
+ }
+ chain pre {
+ type filter hook prerouting priority $prio; policy accept;
+ jump nfq
+ }
+ chain input {
+ type filter hook input priority $prio; policy accept;
+ jump nfq
+ }
+ chain forward {
+ type filter hook forward priority $prio; policy accept;
+ tcp dport 12345 queue num 2
+ jump nfq
+ }
+ chain output {
+ type filter hook output priority $prio; policy accept;
+ tcp dport 12345 queue num 3
+ jump nfq
+ }
+ chain post {
+ type filter hook postrouting priority $prio; policy accept;
+ jump nfq
+ }
+}
+EOF
+}
+
+load_counter_ruleset() {
+ local prio=$1
+
+ip netns exec ${nsrouter} nft -f - <<EOF
+table inet countrules {
+ chain pre {
+ type filter hook prerouting priority $prio; policy accept;
+ counter
+ }
+ chain input {
+ type filter hook input priority $prio; policy accept;
+ counter
+ }
+ chain forward {
+ type filter hook forward priority $prio; policy accept;
+ counter
+ }
+ chain output {
+ type filter hook output priority $prio; policy accept;
+ counter
+ }
+ chain post {
+ type filter hook postrouting priority $prio; policy accept;
+ counter
+ }
+}
+EOF
+}
+
+test_ping() {
+ ip netns exec ${ns1} ping -c 1 -q 10.0.2.99 > /dev/null
+ if [ $? -ne 0 ];then
+ return 1
+ fi
+
+ ip netns exec ${ns1} ping -c 1 -q dead:2::99 > /dev/null
+ if [ $? -ne 0 ];then
+ return 1
+ fi
+
+ return 0
+}
+
+test_ping_router() {
+ ip netns exec ${ns1} ping -c 1 -q 10.0.2.1 > /dev/null
+ if [ $? -ne 0 ];then
+ return 1
+ fi
+
+ ip netns exec ${ns1} ping -c 1 -q dead:2::1 > /dev/null
+ if [ $? -ne 0 ];then
+ return 1
+ fi
+
+ return 0
+}
+
+test_queue_blackhole() {
+ local proto=$1
+
+ip netns exec ${nsrouter} nft -f - <<EOF
+table $proto blackh {
+ chain forward {
+ type filter hook forward priority 0; policy accept;
+ queue num 600
+ }
+}
+EOF
+ if [ $proto = "ip" ] ;then
+ ip netns exec ${ns1} ping -c 1 -q 10.0.2.99 > /dev/null
+ lret=$?
+ elif [ $proto = "ip6" ]; then
+ ip netns exec ${ns1} ping -c 1 -q dead:2::99 > /dev/null
+ lret=$?
+ else
+ lret=111
+ fi
+
+ # queue without bypass keyword should drop traffic if no listener exists.
+ if [ $lret -eq 0 ];then
+ echo "FAIL: $proto expected failure, got $lret" 1>&2
+ exit 1
+ fi
+
+ ip netns exec ${nsrouter} nft delete table $proto blackh
+ if [ $? -ne 0 ] ;then
+ echo "FAIL: $proto: Could not delete blackh table"
+ exit 1
+ fi
+
+ echo "PASS: $proto: statement with no listener results in packet drop"
+}
+
+test_queue()
+{
+ local expected=$1
+ local last=""
+
+ # spawn nf-queue listeners
+ ip netns exec ${nsrouter} ./nf-queue -c -q 0 -t 3 > "$TMPFILE0" &
+ ip netns exec ${nsrouter} ./nf-queue -c -q 1 -t 3 > "$TMPFILE1" &
+ sleep 1
+ test_ping
+ ret=$?
+ if [ $ret -ne 0 ];then
+ echo "FAIL: netns routing/connectivity with active listener on queue $queue: $ret" 1>&2
+ exit $ret
+ fi
+
+ test_ping_router
+ ret=$?
+ if [ $ret -ne 0 ];then
+ echo "FAIL: netns router unreachable listener on queue $queue: $ret" 1>&2
+ exit $ret
+ fi
+
+ wait
+ ret=$?
+
+ for file in $TMPFILE0 $TMPFILE1; do
+ last=$(tail -n1 "$file")
+ if [ x"$last" != x"$expected packets total" ]; then
+ echo "FAIL: Expected $expected packets total, but got $last" 1>&2
+ cat "$file" 1>&2
+
+ ip netns exec ${nsrouter} nft list ruleset
+ exit 1
+ fi
+ done
+
+ echo "PASS: Expected and received $last"
+}
+
+test_tcp_forward()
+{
+ ip netns exec ${nsrouter} ./nf-queue -q 2 -t 10 &
+ local nfqpid=$!
+
+ tmpfile=$(mktemp) || exit 1
+ dd conv=sparse status=none if=/dev/zero bs=1M count=100 of=$tmpfile
+ ip netns exec ${ns2} nc -w 5 -l -p 12345 <"$tmpfile" >/dev/null &
+ local rpid=$!
+
+ sleep 1
+ ip netns exec ${ns1} nc -w 5 10.0.2.99 12345 <"$tmpfile" >/dev/null &
+
+ rm -f "$tmpfile"
+
+ wait $rpid
+ wait $lpid
+ [ $? -eq 0 ] && echo "PASS: tcp and nfqueue in forward chain"
+}
+
+test_tcp_localhost()
+{
+ tc -net "${nsrouter}" qdisc add dev lo root netem loss random 1%
+
+ tmpfile=$(mktemp) || exit 1
+
+ dd conv=sparse status=none if=/dev/zero bs=1M count=900 of=$tmpfile
+ ip netns exec ${nsrouter} nc -w 5 -l -p 12345 <"$tmpfile" >/dev/null &
+ local rpid=$!
+
+ ip netns exec ${nsrouter} ./nf-queue -q 3 -t 30 &
+ local nfqpid=$!
+
+ sleep 1
+ ip netns exec ${nsrouter} nc -w 5 127.0.0.1 12345 <"$tmpfile" > /dev/null
+ rm -f "$tmpfile"
+
+ wait $rpid
+ [ $? -eq 0 ] && echo "PASS: tcp via loopback"
+}
+
+ip netns exec ${nsrouter} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
+ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
+
+load_ruleset "filter" 0
+
+sleep 3
+
+test_ping
+ret=$?
+if [ $ret -eq 0 ];then
+ # queue bypass works (rules were skipped, no listener)
+ echo "PASS: ${ns1} can reach ${ns2}"
+else
+ echo "FAIL: ${ns1} cannot reach ${ns2}: $ret" 1>&2
+ exit $ret
+fi
+
+test_queue_blackhole ip
+test_queue_blackhole ip6
+
+# dummy ruleset to add base chains between the
+# queueing rules. We don't want the second reinject
+# to re-execute the old hooks.
+load_counter_ruleset 10
+
+# we are hooking all: prerouting/input/forward/output/postrouting.
+# we ping ${ns2} from ${ns1} via ${nsrouter} using ipv4 and ipv6, so:
+# 1x icmp prerouting,forward,postrouting -> 3 queue events (6 incl. reply).
+# 1x icmp prerouting,input,output postrouting -> 4 queue events incl. reply.
+# so we expect that userspace program receives 10 packets.
+test_queue 10
+
+# same. We queue to a second program as well.
+load_ruleset "filter2" 20
+test_queue 20
+
+test_tcp_forward
+test_tcp_localhost
+
+exit $ret
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index ee1b727ede04..a9ad3bd8b2ad 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -212,6 +212,10 @@ struct seccomp_notif_sizes {
#define SECCOMP_USER_NOTIF_FLAG_CONTINUE 0x00000001
#endif
+#ifndef SECCOMP_FILTER_FLAG_TSYNC_ESRCH
+#define SECCOMP_FILTER_FLAG_TSYNC_ESRCH (1UL << 4)
+#endif
+
#ifndef seccomp
int seccomp(unsigned int op, unsigned int flags, void *args)
{
@@ -2187,7 +2191,8 @@ TEST(detect_seccomp_filter_flags)
unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC,
SECCOMP_FILTER_FLAG_LOG,
SECCOMP_FILTER_FLAG_SPEC_ALLOW,
- SECCOMP_FILTER_FLAG_NEW_LISTENER };
+ SECCOMP_FILTER_FLAG_NEW_LISTENER,
+ SECCOMP_FILTER_FLAG_TSYNC_ESRCH };
unsigned int exclusive[] = {
SECCOMP_FILTER_FLAG_TSYNC,
SECCOMP_FILTER_FLAG_NEW_LISTENER };
@@ -2645,6 +2650,55 @@ TEST_F(TSYNC, two_siblings_with_one_divergence)
EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
}
+TEST_F(TSYNC, two_siblings_with_one_divergence_no_tid_in_err)
+{
+ long ret, flags;
+ void *status;
+
+ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
+ ASSERT_NE(ENOSYS, errno) {
+ TH_LOG("Kernel does not support seccomp syscall!");
+ }
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
+ }
+ self->sibling[0].diverge = 1;
+ tsync_start_sibling(&self->sibling[0]);
+ tsync_start_sibling(&self->sibling[1]);
+
+ while (self->sibling_count < TSYNC_SIBLINGS) {
+ sem_wait(&self->started);
+ self->sibling_count++;
+ }
+
+ flags = SECCOMP_FILTER_FLAG_TSYNC | \
+ SECCOMP_FILTER_FLAG_TSYNC_ESRCH;
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, flags, &self->apply_prog);
+ ASSERT_EQ(ESRCH, errno) {
+ TH_LOG("Did not return ESRCH for diverged sibling.");
+ }
+ ASSERT_EQ(-1, ret) {
+ TH_LOG("Did not fail on diverged sibling.");
+ }
+
+ /* Wake the threads */
+ pthread_mutex_lock(&self->mutex);
+ ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
+ TH_LOG("cond broadcast non-zero");
+ }
+ pthread_mutex_unlock(&self->mutex);
+
+ /* Ensure they are both unkilled. */
+ PTHREAD_JOIN(self->sibling[0].tid, &status);
+ EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
+ PTHREAD_JOIN(self->sibling[1].tid, &status);
+ EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
+}
+
TEST_F(TSYNC, two_siblings_not_under_filter)
{
long ret, sib;
@@ -3196,6 +3250,24 @@ TEST(user_notification_basic)
EXPECT_EQ(0, WEXITSTATUS(status));
}
+TEST(user_notification_with_tsync)
+{
+ int ret;
+ unsigned int flags;
+
+ /* these were exclusive */
+ flags = SECCOMP_FILTER_FLAG_NEW_LISTENER |
+ SECCOMP_FILTER_FLAG_TSYNC;
+ ASSERT_EQ(-1, user_trap_syscall(__NR_getppid, flags));
+ ASSERT_EQ(EINVAL, errno);
+
+ /* but now they're not */
+ flags |= SECCOMP_FILTER_FLAG_TSYNC_ESRCH;
+ ret = user_trap_syscall(__NR_getppid, flags);
+ close(ret);
+ ASSERT_LE(0, ret);
+}
+
TEST(user_notification_kill_in_middle)
{
pid_t pid;
diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config
index 477bc61b374a..c03af4600281 100644
--- a/tools/testing/selftests/tc-testing/config
+++ b/tools/testing/selftests/tc-testing/config
@@ -57,3 +57,4 @@ CONFIG_NET_IFE_SKBMARK=m
CONFIG_NET_IFE_SKBPRIO=m
CONFIG_NET_IFE_SKBTCINDEX=m
CONFIG_NET_SCH_FIFO=y
+CONFIG_NET_SCH_ETS=m
diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh
index 138d46b3f330..936e1ca9410e 100755
--- a/tools/testing/selftests/wireguard/netns.sh
+++ b/tools/testing/selftests/wireguard/netns.sh
@@ -527,11 +527,16 @@ n0 wg set wg0 peer "$pub2" allowed-ips 0.0.0.0/0
n0 wg set wg0 peer "$pub2" allowed-ips ::/0,1700::/111,5000::/4,e000::/37,9000::/75
n0 wg set wg0 peer "$pub2" allowed-ips ::/0
n0 wg set wg0 peer "$pub2" remove
-low_order_points=( AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= AQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= 4Ot6fDtBuK4WVuP68Z/EatoJjeucMrH9hmIFFl9JuAA= X5yVvKNQjCSx0LFVnIPvWwREXMRYHI6G2CJO3dCfEVc= 7P///////////////////////////////////////38= 7f///////////////////////////////////////38= 7v///////////////////////////////////////38= )
-n0 wg set wg0 private-key /dev/null ${low_order_points[@]/#/peer }
-[[ -z $(n0 wg show wg0 peers) ]]
-n0 wg set wg0 private-key <(echo "$key1") ${low_order_points[@]/#/peer }
-[[ -z $(n0 wg show wg0 peers) ]]
+for low_order_point in AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= AQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= 4Ot6fDtBuK4WVuP68Z/EatoJjeucMrH9hmIFFl9JuAA= X5yVvKNQjCSx0LFVnIPvWwREXMRYHI6G2CJO3dCfEVc= 7P///////////////////////////////////////38= 7f///////////////////////////////////////38= 7v///////////////////////////////////////38=; do
+ n0 wg set wg0 peer "$low_order_point" persistent-keepalive 1 endpoint 127.0.0.1:1111
+done
+[[ -n $(n0 wg show wg0 peers) ]]
+exec 4< <(n0 ncat -l -u -p 1111)
+ncat_pid=$!
+waitncatudp $netns0 $ncat_pid
+ip0 link set wg0 up
+! read -r -n 1 -t 2 <&4 || false
+kill $ncat_pid
ip0 link del wg0
declare -A objects
diff --git a/tools/testing/selftests/wireguard/qemu/Makefile b/tools/testing/selftests/wireguard/qemu/Makefile
index 28d477683e8a..90598a425c18 100644
--- a/tools/testing/selftests/wireguard/qemu/Makefile
+++ b/tools/testing/selftests/wireguard/qemu/Makefile
@@ -41,7 +41,7 @@ $(DISTFILES_PATH)/$(1):
flock -x $$@.lock -c '[ -f $$@ ] && exit 0; wget -O $$@.tmp $(MIRROR)$(1) || wget -O $$@.tmp $(2)$(1) || rm -f $$@.tmp; [ -f $$@.tmp ] || exit 1; if echo "$(3) $$@.tmp" | sha256sum -c -; then mv $$@.tmp $$@; else rm -f $$@.tmp; exit 71; fi'
endef
-$(eval $(call tar_download,MUSL,musl,1.1.24,.tar.gz,https://www.musl-libc.org/releases/,1370c9a812b2cf2a7d92802510cca0058cc37e66a7bedd70051f0a34015022a3))
+$(eval $(call tar_download,MUSL,musl,1.2.0,.tar.gz,https://musl.libc.org/releases/,c6de7b191139142d3f9a7b5b702c9cae1b5ee6e7f57e582da9328629408fd4e8))
$(eval $(call tar_download,IPERF,iperf,3.7,.tar.gz,https://downloads.es.net/pub/iperf/,d846040224317caf2f75c843d309a950a7db23f9b44b94688ccbe557d6d1710c))
$(eval $(call tar_download,BASH,bash,5.0,.tar.gz,https://ftp.gnu.org/gnu/bash/,b4a80f2ac66170b2913efbfb9f2594f1f76c7b1afd11f799e22035d63077fb4d))
$(eval $(call tar_download,IPROUTE2,iproute2,5.4.0,.tar.xz,https://www.kernel.org/pub/linux/utils/net/iproute2/,fe97aa60a0d4c5ac830be18937e18dc3400ca713a33a89ad896ff1e3d46086ae))
diff --git a/tools/testing/selftests/wireguard/qemu/init.c b/tools/testing/selftests/wireguard/qemu/init.c
index 90bc9813cadc..c9698120ac9d 100644
--- a/tools/testing/selftests/wireguard/qemu/init.c
+++ b/tools/testing/selftests/wireguard/qemu/init.c
@@ -13,7 +13,6 @@
#include <fcntl.h>
#include <sys/wait.h>
#include <sys/mount.h>
-#include <sys/types.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/io.h>
diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config
index af9323a0b6e0..d531de13c95b 100644
--- a/tools/testing/selftests/wireguard/qemu/kernel.config
+++ b/tools/testing/selftests/wireguard/qemu/kernel.config
@@ -56,7 +56,6 @@ CONFIG_NO_HZ_IDLE=y
CONFIG_NO_HZ_FULL=n
CONFIG_HZ_PERIODIC=n
CONFIG_HIGH_RES_TIMERS=y
-CONFIG_COMPAT_32BIT_TIME=y
CONFIG_ARCH_RANDOM=y
CONFIG_FILE_LOCKING=y
CONFIG_POSIX_TIMERS=y
diff --git a/usr/Kconfig b/usr/Kconfig
index bdf5bbd40727..96afb03b65f9 100644
--- a/usr/Kconfig
+++ b/usr/Kconfig
@@ -124,17 +124,6 @@ choice
If in doubt, select 'None'
-config INITRAMFS_COMPRESSION_NONE
- bool "None"
- help
- Do not compress the built-in initramfs at all. This may sound wasteful
- in space, but, you should be aware that the built-in initramfs will be
- compressed at a later stage anyways along with the rest of the kernel,
- on those architectures that support this. However, not compressing the
- initramfs may lead to slightly higher memory consumption during a
- short time at boot, while both the cpio image and the unpacked
- filesystem image will be present in memory simultaneously
-
config INITRAMFS_COMPRESSION_GZIP
bool "Gzip"
depends on RD_GZIP
@@ -207,4 +196,15 @@ config INITRAMFS_COMPRESSION_LZ4
If you choose this, keep in mind that most distros don't provide lz4
by default which could cause a build failure.
+config INITRAMFS_COMPRESSION_NONE
+ bool "None"
+ help
+ Do not compress the built-in initramfs at all. This may sound wasteful
+ in space, but, you should be aware that the built-in initramfs will be
+ compressed at a later stage anyways along with the rest of the kernel,
+ on those architectures that support this. However, not compressing the
+ initramfs may lead to slightly higher memory consumption during a
+ short time at boot, while both the cpio image and the unpacked
+ filesystem image will be present in memory simultaneously
+
endchoice