summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/sysfs-devices-system-cpu1
-rw-r--r--Documentation/admin-guide/hw-vuln/index.rst1
-rw-r--r--Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst246
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt37
-rw-r--r--Documentation/devicetree/bindings/hwmon/ti,tmp401.yaml5
-rw-r--r--Documentation/networking/ip-sysctl.rst37
-rw-r--r--Documentation/networking/phy.rst2
-rw-r--r--MAINTAINERS2
-rw-r--r--arch/arm/boot/dts/at91-sama5d3_ksz9477_evb.dts5
-rw-r--r--arch/arm64/include/asm/kvm_host.h5
-rw-r--r--arch/arm64/include/asm/virt.h3
-rw-r--r--arch/arm64/kernel/cpufeature.c10
-rw-r--r--arch/arm64/kvm/arch_timer.c3
-rw-r--r--arch/arm64/kvm/arm.c10
-rw-r--r--arch/arm64/kvm/fpsimd.c2
-rw-r--r--arch/arm64/kvm/hyp/nvhe/mem_protect.c4
-rw-r--r--arch/arm64/kvm/hyp/nvhe/sys_regs.c42
-rw-r--r--arch/arm64/kvm/vgic/vgic-mmio-v2.c4
-rw-r--r--arch/arm64/kvm/vgic/vgic-mmio-v3.c40
-rw-r--r--arch/arm64/kvm/vgic/vgic-mmio.c40
-rw-r--r--arch/arm64/kvm/vgic/vgic-mmio.h3
-rw-r--r--arch/arm64/kvm/vmid.c2
-rw-r--r--arch/riscv/Kconfig9
-rw-r--r--arch/riscv/Kconfig.erratas1
-rw-r--r--arch/riscv/boot/dts/microchip/mpfs.dtsi9
-rw-r--r--arch/riscv/kernel/cpufeature.c5
-rw-r--r--arch/riscv/kvm/vmid.c2
-rw-r--r--arch/x86/hyperv/hv_init.c6
-rw-r--r--arch/x86/hyperv/ivm.c84
-rw-r--r--arch/x86/include/asm/cpufeatures.h1
-rw-r--r--arch/x86/include/asm/kvm_host.h67
-rw-r--r--arch/x86/include/asm/mshyperv.h4
-rw-r--r--arch/x86/include/asm/msr-index.h25
-rw-r--r--arch/x86/include/asm/nospec-branch.h2
-rw-r--r--arch/x86/kernel/cpu/bugs.c235
-rw-r--r--arch/x86/kernel/cpu/common.c52
-rw-r--r--arch/x86/kvm/lapic.c27
-rw-r--r--arch/x86/kvm/mmu/mmu.c2
-rw-r--r--arch/x86/kvm/svm/avic.c171
-rw-r--r--arch/x86/kvm/svm/nested.c39
-rw-r--r--arch/x86/kvm/svm/svm.c8
-rw-r--r--arch/x86/kvm/svm/svm.h4
-rw-r--r--arch/x86/kvm/vmx/vmx.c76
-rw-r--r--arch/x86/kvm/vmx/vmx.h2
-rw-r--r--arch/x86/kvm/x86.c5
-rw-r--r--block/bfq-iosched.c6
-rw-r--r--block/blk-mq-sched.c1
-rw-r--r--block/blk-mq.c29
-rw-r--r--block/kyber-iosched.c3
-rw-r--r--block/mq-deadline.c3
-rw-r--r--certs/.gitignore2
-rw-r--r--certs/Makefile20
-rw-r--r--certs/blacklist_hashes.c4
-rw-r--r--crypto/Kconfig1
-rw-r--r--crypto/Makefile2
-rw-r--r--drivers/base/cpu.c8
-rw-r--r--drivers/base/init.c2
-rw-r--r--drivers/char/lp.c2
-rw-r--r--drivers/clocksource/hyperv_timer.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c2
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c8
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_drv.c6
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_mic.c42
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.c1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_sysfs.c29
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_sysfs.h6
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_types.h3
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c2
-rw-r--r--drivers/gpu/drm/i915/i915_sysfs.c17
-rw-r--r--drivers/gpu/drm/i915/i915_vma.c48
-rw-r--r--drivers/gpu/drm/ttm/ttm_bo.c22
-rw-r--r--drivers/gpu/drm/ttm/ttm_device.c6
-rw-r--r--drivers/gpu/drm/ttm/ttm_resource.c52
-rw-r--r--drivers/hid/hid-hyperv.c5
-rw-r--r--drivers/hv/channel_mgmt.c18
-rw-r--r--drivers/hv/hv_kvp.c2
-rw-r--r--drivers/hv/vmbus_drv.c4
-rw-r--r--drivers/hwmon/asus-ec-sensors.c2
-rw-r--r--drivers/hwmon/occ/common.c5
-rw-r--r--drivers/md/dm-log.c3
-rw-r--r--drivers/md/dm-raid.c2
-rw-r--r--drivers/md/dm.c24
-rw-r--r--drivers/md/md.c14
-rw-r--r--drivers/md/md.h2
-rw-r--r--drivers/md/raid5-ppl.c4
-rw-r--r--drivers/misc/atmel-ssc.c4
-rw-r--r--drivers/misc/eeprom/at25.c93
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-platform.c4
-rw-r--r--drivers/net/ethernet/broadcom/bgmac-bcma.c1
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hnae3.h1
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c2
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c18
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c101
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h1
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_ethtool.c25
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c5
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c2
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_main.c2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_main.c49
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ptp.c2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ptp.h31
-rw-r--r--drivers/net/ethernet/intel/ice/ice_vf_lib.c5
-rw-r--r--drivers/net/ethernet/intel/ice/ice_virtchnl.c53
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.h2
-rw-r--r--drivers/net/ethernet/xilinx/xilinx_axienet.h51
-rw-r--r--drivers/net/ethernet/xilinx/xilinx_axienet_main.c29
-rw-r--r--drivers/net/usb/ax88179_178a.c26
-rw-r--r--drivers/nvme/host/core.c5
-rw-r--r--drivers/nvme/host/nvme.h28
-rw-r--r--drivers/nvme/host/pci.c43
-rw-r--r--drivers/staging/olpc_dcon/Kconfig2
-rw-r--r--drivers/staging/r8188eu/core/rtw_xmit.c20
-rw-r--r--drivers/staging/r8188eu/os_dep/ioctl_linux.c2
-rw-r--r--drivers/staging/rtl8723bs/os_dep/ioctl_linux.c6
-rw-r--r--drivers/tty/goldfish.c2
-rw-r--r--drivers/tty/n_gsm.c2
-rw-r--r--drivers/tty/serial/8250/8250_port.c2
-rw-r--r--drivers/tty/serial/qcom_geni_serial.c1
-rw-r--r--drivers/tty/serial/serial_core.c9
-rw-r--r--drivers/usb/cdns3/cdnsp-ring.c19
-rw-r--r--drivers/usb/dwc2/hcd.c2
-rw-r--r--drivers/usb/dwc3/core.c9
-rw-r--r--drivers/usb/dwc3/dwc3-pci.c1
-rw-r--r--drivers/usb/dwc3/gadget.c26
-rw-r--r--drivers/usb/gadget/function/f_fs.c40
-rw-r--r--drivers/usb/gadget/function/u_ether.c12
-rw-r--r--drivers/usb/gadget/udc/lpc32xx_udc.c1
-rw-r--r--drivers/usb/host/xhci.c15
-rw-r--r--drivers/usb/serial/io_ti.c2
-rw-r--r--drivers/usb/serial/io_usbvend.h1
-rw-r--r--drivers/usb/serial/option.c6
-rw-r--r--drivers/watchdog/gxp-wdt.c1
-rw-r--r--fs/attr.c26
-rw-r--r--fs/ext2/dir.c9
-rw-r--r--fs/io_uring.c347
-rw-r--r--include/drm/drm_atomic.h1
-rw-r--r--include/drm/ttm/ttm_resource.h8
-rw-r--r--include/linux/backing-dev.h2
-rw-r--r--include/linux/blkdev.h4
-rw-r--r--include/linux/cpu.h3
-rw-r--r--include/linux/netfs.h2
-rw-r--r--include/linux/serial_core.h1
-rw-r--r--include/linux/visorbus.h344
-rw-r--r--include/linux/vmalloc.h1
-rw-r--r--include/net/inet_connection_sock.h3
-rw-r--r--include/net/inet_hashtables.h68
-rw-r--r--include/net/sock.h14
-rw-r--r--include/uapi/linux/io_uring.h6
-rw-r--r--kernel/auditsc.c2
-rw-r--r--kernel/cfi.c22
-rw-r--r--lib/Kconfig3
-rw-r--r--lib/Makefile1
-rw-r--r--lib/crypto/Kconfig1
-rw-r--r--lib/memneq.c (renamed from crypto/memneq.c)0
-rw-r--r--mm/backing-dev.c11
-rw-r--r--mm/usercopy.c26
-rw-r--r--mm/vmalloc.c2
-rw-r--r--net/ax25/af_ax25.c33
-rw-r--r--net/dccp/proto.c33
-rw-r--r--net/ipv4/inet_connection_sock.c247
-rw-r--r--net/ipv4/inet_hashtables.c193
-rw-r--r--net/ipv4/tcp.c14
-rw-r--r--security/selinux/hooks.c11
-rw-r--r--tools/arch/x86/include/asm/cpufeatures.h1
-rw-r--r--tools/arch/x86/include/asm/msr-index.h25
-rw-r--r--tools/testing/selftests/kvm/Makefile49
-rw-r--r--tools/testing/selftests/kvm/dirty_log_perf_test.c10
-rw-r--r--tools/testing/selftests/kvm/include/perf_test_util.h9
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/processor.h25
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/vmx.h6
-rw-r--r--tools/testing/selftests/kvm/lib/perf_test_util.c53
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/perf_test_util.c112
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/processor.c31
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/vmx.c149
-rw-r--r--tools/testing/selftests/kvm/max_guest_memory_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/mmu_role_test.c2
-rw-r--r--tools/testing/selftests/net/.gitignore1
-rw-r--r--tools/testing/selftests/net/Makefile2
-rw-r--r--tools/testing/selftests/net/bind_bhash_test.c119
-rw-r--r--virt/kvm/kvm_main.c8
182 files changed, 2534 insertions, 2031 deletions
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index 2ad01cad7f1c..bcc974d276dc 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -526,6 +526,7 @@ What: /sys/devices/system/cpu/vulnerabilities
/sys/devices/system/cpu/vulnerabilities/srbds
/sys/devices/system/cpu/vulnerabilities/tsx_async_abort
/sys/devices/system/cpu/vulnerabilities/itlb_multihit
+ /sys/devices/system/cpu/vulnerabilities/mmio_stale_data
Date: January 2018
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
Description: Information about CPU vulnerabilities
diff --git a/Documentation/admin-guide/hw-vuln/index.rst b/Documentation/admin-guide/hw-vuln/index.rst
index 8cbc711cda93..4df436e7c417 100644
--- a/Documentation/admin-guide/hw-vuln/index.rst
+++ b/Documentation/admin-guide/hw-vuln/index.rst
@@ -17,3 +17,4 @@ are configurable at compile, boot or run time.
special-register-buffer-data-sampling.rst
core-scheduling.rst
l1d_flush.rst
+ processor_mmio_stale_data.rst
diff --git a/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst b/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst
new file mode 100644
index 000000000000..9393c50b5afc
--- /dev/null
+++ b/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst
@@ -0,0 +1,246 @@
+=========================================
+Processor MMIO Stale Data Vulnerabilities
+=========================================
+
+Processor MMIO Stale Data Vulnerabilities are a class of memory-mapped I/O
+(MMIO) vulnerabilities that can expose data. The sequences of operations for
+exposing data range from simple to very complex. Because most of the
+vulnerabilities require the attacker to have access to MMIO, many environments
+are not affected. System environments using virtualization where MMIO access is
+provided to untrusted guests may need mitigation. These vulnerabilities are
+not transient execution attacks. However, these vulnerabilities may propagate
+stale data into core fill buffers where the data can subsequently be inferred
+by an unmitigated transient execution attack. Mitigation for these
+vulnerabilities includes a combination of microcode update and software
+changes, depending on the platform and usage model. Some of these mitigations
+are similar to those used to mitigate Microarchitectural Data Sampling (MDS) or
+those used to mitigate Special Register Buffer Data Sampling (SRBDS).
+
+Data Propagators
+================
+Propagators are operations that result in stale data being copied or moved from
+one microarchitectural buffer or register to another. Processor MMIO Stale Data
+Vulnerabilities are operations that may result in stale data being directly
+read into an architectural, software-visible state or sampled from a buffer or
+register.
+
+Fill Buffer Stale Data Propagator (FBSDP)
+-----------------------------------------
+Stale data may propagate from fill buffers (FB) into the non-coherent portion
+of the uncore on some non-coherent writes. Fill buffer propagation by itself
+does not make stale data architecturally visible. Stale data must be propagated
+to a location where it is subject to reading or sampling.
+
+Sideband Stale Data Propagator (SSDP)
+-------------------------------------
+The sideband stale data propagator (SSDP) is limited to the client (including
+Intel Xeon server E3) uncore implementation. The sideband response buffer is
+shared by all client cores. For non-coherent reads that go to sideband
+destinations, the uncore logic returns 64 bytes of data to the core, including
+both requested data and unrequested stale data, from a transaction buffer and
+the sideband response buffer. As a result, stale data from the sideband
+response and transaction buffers may now reside in a core fill buffer.
+
+Primary Stale Data Propagator (PSDP)
+------------------------------------
+The primary stale data propagator (PSDP) is limited to the client (including
+Intel Xeon server E3) uncore implementation. Similar to the sideband response
+buffer, the primary response buffer is shared by all client cores. For some
+processors, MMIO primary reads will return 64 bytes of data to the core fill
+buffer including both requested data and unrequested stale data. This is
+similar to the sideband stale data propagator.
+
+Vulnerabilities
+===============
+Device Register Partial Write (DRPW) (CVE-2022-21166)
+-----------------------------------------------------
+Some endpoint MMIO registers incorrectly handle writes that are smaller than
+the register size. Instead of aborting the write or only copying the correct
+subset of bytes (for example, 2 bytes for a 2-byte write), more bytes than
+specified by the write transaction may be written to the register. On
+processors affected by FBSDP, this may expose stale data from the fill buffers
+of the core that created the write transaction.
+
+Shared Buffers Data Sampling (SBDS) (CVE-2022-21125)
+----------------------------------------------------
+After propagators may have moved data around the uncore and copied stale data
+into client core fill buffers, processors affected by MFBDS can leak data from
+the fill buffer. It is limited to the client (including Intel Xeon server E3)
+uncore implementation.
+
+Shared Buffers Data Read (SBDR) (CVE-2022-21123)
+------------------------------------------------
+It is similar to Shared Buffer Data Sampling (SBDS) except that the data is
+directly read into the architectural software-visible state. It is limited to
+the client (including Intel Xeon server E3) uncore implementation.
+
+Affected Processors
+===================
+Not all the CPUs are affected by all the variants. For instance, most
+processors for the server market (excluding Intel Xeon E3 processors) are
+impacted by only Device Register Partial Write (DRPW).
+
+Below is the list of affected Intel processors [#f1]_:
+
+ =================== ============ =========
+ Common name Family_Model Steppings
+ =================== ============ =========
+ HASWELL_X 06_3FH 2,4
+ SKYLAKE_L 06_4EH 3
+ BROADWELL_X 06_4FH All
+ SKYLAKE_X 06_55H 3,4,6,7,11
+ BROADWELL_D 06_56H 3,4,5
+ SKYLAKE 06_5EH 3
+ ICELAKE_X 06_6AH 4,5,6
+ ICELAKE_D 06_6CH 1
+ ICELAKE_L 06_7EH 5
+ ATOM_TREMONT_D 06_86H All
+ LAKEFIELD 06_8AH 1
+ KABYLAKE_L 06_8EH 9 to 12
+ ATOM_TREMONT 06_96H 1
+ ATOM_TREMONT_L 06_9CH 0
+ KABYLAKE 06_9EH 9 to 13
+ COMETLAKE 06_A5H 2,3,5
+ COMETLAKE_L 06_A6H 0,1
+ ROCKETLAKE 06_A7H 1
+ =================== ============ =========
+
+If a CPU is in the affected processor list, but not affected by a variant, it
+is indicated by new bits in MSR IA32_ARCH_CAPABILITIES. As described in a later
+section, mitigation largely remains the same for all the variants, i.e. to
+clear the CPU fill buffers via VERW instruction.
+
+New bits in MSRs
+================
+Newer processors and microcode update on existing affected processors added new
+bits to IA32_ARCH_CAPABILITIES MSR. These bits can be used to enumerate
+specific variants of Processor MMIO Stale Data vulnerabilities and mitigation
+capability.
+
+MSR IA32_ARCH_CAPABILITIES
+--------------------------
+Bit 13 - SBDR_SSDP_NO - When set, processor is not affected by either the
+ Shared Buffers Data Read (SBDR) vulnerability or the sideband stale
+ data propagator (SSDP).
+Bit 14 - FBSDP_NO - When set, processor is not affected by the Fill Buffer
+ Stale Data Propagator (FBSDP).
+Bit 15 - PSDP_NO - When set, processor is not affected by Primary Stale Data
+ Propagator (PSDP).
+Bit 17 - FB_CLEAR - When set, VERW instruction will overwrite CPU fill buffer
+ values as part of MD_CLEAR operations. Processors that do not
+ enumerate MDS_NO (meaning they are affected by MDS) but that do
+ enumerate support for both L1D_FLUSH and MD_CLEAR implicitly enumerate
+ FB_CLEAR as part of their MD_CLEAR support.
+Bit 18 - FB_CLEAR_CTRL - Processor supports read and write to MSR
+ IA32_MCU_OPT_CTRL[FB_CLEAR_DIS]. On such processors, the FB_CLEAR_DIS
+ bit can be set to cause the VERW instruction to not perform the
+ FB_CLEAR action. Not all processors that support FB_CLEAR will support
+ FB_CLEAR_CTRL.
+
+MSR IA32_MCU_OPT_CTRL
+---------------------
+Bit 3 - FB_CLEAR_DIS - When set, VERW instruction does not perform the FB_CLEAR
+action. This may be useful to reduce the performance impact of FB_CLEAR in
+cases where system software deems it warranted (for example, when performance
+is more critical, or the untrusted software has no MMIO access). Note that
+FB_CLEAR_DIS has no impact on enumeration (for example, it does not change
+FB_CLEAR or MD_CLEAR enumeration) and it may not be supported on all processors
+that enumerate FB_CLEAR.
+
+Mitigation
+==========
+Like MDS, all variants of Processor MMIO Stale Data vulnerabilities have the
+same mitigation strategy to force the CPU to clear the affected buffers before
+an attacker can extract the secrets.
+
+This is achieved by using the otherwise unused and obsolete VERW instruction in
+combination with a microcode update. The microcode clears the affected CPU
+buffers when the VERW instruction is executed.
+
+Kernel reuses the MDS function to invoke the buffer clearing:
+
+ mds_clear_cpu_buffers()
+
+On MDS affected CPUs, the kernel already invokes CPU buffer clear on
+kernel/userspace, hypervisor/guest and C-state (idle) transitions. No
+additional mitigation is needed on such CPUs.
+
+For CPUs not affected by MDS or TAA, mitigation is needed only for the attacker
+with MMIO capability. Therefore, VERW is not required for kernel/userspace. For
+virtualization case, VERW is only needed at VMENTER for a guest with MMIO
+capability.
+
+Mitigation points
+-----------------
+Return to user space
+^^^^^^^^^^^^^^^^^^^^
+Same mitigation as MDS when affected by MDS/TAA, otherwise no mitigation
+needed.
+
+C-State transition
+^^^^^^^^^^^^^^^^^^
+Control register writes by CPU during C-state transition can propagate data
+from fill buffer to uncore buffers. Execute VERW before C-state transition to
+clear CPU fill buffers.
+
+Guest entry point
+^^^^^^^^^^^^^^^^^
+Same mitigation as MDS when processor is also affected by MDS/TAA, otherwise
+execute VERW at VMENTER only for MMIO capable guests. On CPUs not affected by
+MDS/TAA, guest without MMIO access cannot extract secrets using Processor MMIO
+Stale Data vulnerabilities, so there is no need to execute VERW for such guests.
+
+Mitigation control on the kernel command line
+---------------------------------------------
+The kernel command line allows to control the Processor MMIO Stale Data
+mitigations at boot time with the option "mmio_stale_data=". The valid
+arguments for this option are:
+
+ ========== =================================================================
+ full If the CPU is vulnerable, enable mitigation; CPU buffer clearing
+ on exit to userspace and when entering a VM. Idle transitions are
+ protected as well. It does not automatically disable SMT.
+ full,nosmt Same as full, with SMT disabled on vulnerable CPUs. This is the
+ complete mitigation.
+ off Disables mitigation completely.
+ ========== =================================================================
+
+If the CPU is affected and mmio_stale_data=off is not supplied on the kernel
+command line, then the kernel selects the appropriate mitigation.
+
+Mitigation status information
+-----------------------------
+The Linux kernel provides a sysfs interface to enumerate the current
+vulnerability status of the system: whether the system is vulnerable, and
+which mitigations are active. The relevant sysfs file is:
+
+ /sys/devices/system/cpu/vulnerabilities/mmio_stale_data
+
+The possible values in this file are:
+
+ .. list-table::
+
+ * - 'Not affected'
+ - The processor is not vulnerable
+ * - 'Vulnerable'
+ - The processor is vulnerable, but no mitigation enabled
+ * - 'Vulnerable: Clear CPU buffers attempted, no microcode'
+ - The processor is vulnerable, but microcode is not updated. The
+ mitigation is enabled on a best effort basis.
+ * - 'Mitigation: Clear CPU buffers'
+ - The processor is vulnerable and the CPU buffer clearing mitigation is
+ enabled.
+
+If the processor is vulnerable then the following information is appended to
+the above information:
+
+ ======================== ===========================================
+ 'SMT vulnerable' SMT is enabled
+ 'SMT disabled' SMT is disabled
+ 'SMT Host state unknown' Kernel runs in a VM, Host SMT state unknown
+ ======================== ===========================================
+
+References
+----------
+.. [#f1] Affected Processors
+ https://www.intel.com/content/www/us/en/developer/topic-technology/software-security-guidance/processors-affected-consolidated-product-cpu-model.html
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 8090130b544b..2522b11e593f 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2469,7 +2469,6 @@
protected: nVHE-based mode with support for guests whose
state is kept private from the host.
- Not valid if the kernel is running in EL2.
Defaults to VHE/nVHE based on hardware support. Setting
mode to "protected" will disable kexec and hibernation
@@ -3176,6 +3175,7 @@
srbds=off [X86,INTEL]
no_entry_flush [PPC]
no_uaccess_flush [PPC]
+ mmio_stale_data=off [X86]
Exceptions:
This does not have any effect on
@@ -3197,6 +3197,7 @@
Equivalent to: l1tf=flush,nosmt [X86]
mds=full,nosmt [X86]
tsx_async_abort=full,nosmt [X86]
+ mmio_stale_data=full,nosmt [X86]
mminit_loglevel=
[KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this
@@ -3206,6 +3207,40 @@
log everything. Information is printed at KERN_DEBUG
so loglevel=8 may also need to be specified.
+ mmio_stale_data=
+ [X86,INTEL] Control mitigation for the Processor
+ MMIO Stale Data vulnerabilities.
+
+ Processor MMIO Stale Data is a class of
+ vulnerabilities that may expose data after an MMIO
+ operation. Exposed data could originate or end in
+ the same CPU buffers as affected by MDS and TAA.
+ Therefore, similar to MDS and TAA, the mitigation
+ is to clear the affected CPU buffers.
+
+ This parameter controls the mitigation. The
+ options are:
+
+ full - Enable mitigation on vulnerable CPUs
+
+ full,nosmt - Enable mitigation and disable SMT on
+ vulnerable CPUs.
+
+ off - Unconditionally disable mitigation
+
+ On MDS or TAA affected machines,
+ mmio_stale_data=off can be prevented by an active
+ MDS or TAA mitigation as these vulnerabilities are
+ mitigated with the same mechanism so in order to
+ disable this mitigation, you need to specify
+ mds=off and tsx_async_abort=off too.
+
+ Not specifying this option is equivalent to
+ mmio_stale_data=full.
+
+ For details see:
+ Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst
+
module.sig_enforce
[KNL] When CONFIG_MODULE_SIG is set, this means that
modules without (valid) signatures will fail to load.
diff --git a/Documentation/devicetree/bindings/hwmon/ti,tmp401.yaml b/Documentation/devicetree/bindings/hwmon/ti,tmp401.yaml
index fe0ac08faa1a..0e8ddf0ad789 100644
--- a/Documentation/devicetree/bindings/hwmon/ti,tmp401.yaml
+++ b/Documentation/devicetree/bindings/hwmon/ti,tmp401.yaml
@@ -40,9 +40,8 @@ properties:
value to be used for converting remote channel measurements to
temperature.
$ref: /schemas/types.yaml#/definitions/int32
- items:
- minimum: -128
- maximum: 127
+ minimum: -128
+ maximum: 127
ti,beta-compensation:
description:
diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
index 04216564a03c..9f41961d11d5 100644
--- a/Documentation/networking/ip-sysctl.rst
+++ b/Documentation/networking/ip-sysctl.rst
@@ -2925,6 +2925,43 @@ plpmtud_probe_interval - INTEGER
Default: 0
+reconf_enable - BOOLEAN
+ Enable or disable extension of Stream Reconfiguration functionality
+ specified in RFC6525. This extension provides the ability to "reset"
+ a stream, and it includes the Parameters of "Outgoing/Incoming SSN
+ Reset", "SSN/TSN Reset" and "Add Outgoing/Incoming Streams".
+
+ - 1: Enable extension.
+ - 0: Disable extension.
+
+ Default: 0
+
+intl_enable - BOOLEAN
+ Enable or disable extension of User Message Interleaving functionality
+ specified in RFC8260. This extension allows the interleaving of user
+ messages sent on different streams. With this feature enabled, I-DATA
+ chunk will replace DATA chunk to carry user messages if also supported
+ by the peer. Note that to use this feature, one needs to set this option
+ to 1 and also needs to set socket options SCTP_FRAGMENT_INTERLEAVE to 2
+ and SCTP_INTERLEAVING_SUPPORTED to 1.
+
+ - 1: Enable extension.
+ - 0: Disable extension.
+
+ Default: 0
+
+ecn_enable - BOOLEAN
+ Control use of Explicit Congestion Notification (ECN) by SCTP.
+ Like in TCP, ECN is used only when both ends of the SCTP connection
+ indicate support for it. This feature is useful in avoiding losses
+ due to congestion by allowing supporting routers to signal congestion
+ before having to drop packets.
+
+ 1: Enable ecn.
+ 0: Disable ecn.
+
+ Default: 1
+
``/proc/sys/net/core/*``
========================
diff --git a/Documentation/networking/phy.rst b/Documentation/networking/phy.rst
index d43da709bf40..704f31da5167 100644
--- a/Documentation/networking/phy.rst
+++ b/Documentation/networking/phy.rst
@@ -104,7 +104,7 @@ Whenever possible, use the PHY side RGMII delay for these reasons:
* PHY device drivers in PHYLIB being reusable by nature, being able to
configure correctly a specified delay enables more designs with similar delay
- requirements to be operate correctly
+ requirements to be operated correctly
For cases where the PHY is not capable of providing this delay, but the
Ethernet MAC driver is capable of doing so, the correct phy_interface_t value
diff --git a/MAINTAINERS b/MAINTAINERS
index 1fc9ead83d2a..f52543aedd61 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10872,7 +10872,6 @@ F: arch/riscv/include/asm/kvm*
F: arch/riscv/include/uapi/asm/kvm*
F: arch/riscv/kvm/
F: tools/testing/selftests/kvm/*/riscv/
-F: tools/testing/selftests/kvm/riscv/
KERNEL VIRTUAL MACHINE for s390 (KVM/s390)
M: Christian Borntraeger <borntraeger@linux.ibm.com>
@@ -13801,6 +13800,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git
F: Documentation/devicetree/bindings/net/
F: drivers/connector/
F: drivers/net/
+F: include/dt-bindings/net/
F: include/linux/etherdevice.h
F: include/linux/fcdevice.h
F: include/linux/fddidevice.h
diff --git a/arch/arm/boot/dts/at91-sama5d3_ksz9477_evb.dts b/arch/arm/boot/dts/at91-sama5d3_ksz9477_evb.dts
index 443e8b022897..14af1fd6d247 100644
--- a/arch/arm/boot/dts/at91-sama5d3_ksz9477_evb.dts
+++ b/arch/arm/boot/dts/at91-sama5d3_ksz9477_evb.dts
@@ -120,26 +120,31 @@
port@0 {
reg = <0>;
label = "lan1";
+ phy-mode = "internal";
};
port@1 {
reg = <1>;
label = "lan2";
+ phy-mode = "internal";
};
port@2 {
reg = <2>;
label = "lan3";
+ phy-mode = "internal";
};
port@3 {
reg = <3>;
label = "lan4";
+ phy-mode = "internal";
};
port@4 {
reg = <4>;
label = "lan5";
+ phy-mode = "internal";
};
port@5 {
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 47a1e25e25bb..de32152cea04 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -363,11 +363,6 @@ struct kvm_vcpu_arch {
struct kvm_pmu pmu;
/*
- * Anything that is not used directly from assembly code goes
- * here.
- */
-
- /*
* Guest registers we preserve during guest debugging.
*
* These shadow registers are updated by the kvm_handle_sys_reg
diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
index 3c8af033a997..0e80db4327b6 100644
--- a/arch/arm64/include/asm/virt.h
+++ b/arch/arm64/include/asm/virt.h
@@ -113,6 +113,9 @@ static __always_inline bool has_vhe(void)
/*
* Code only run in VHE/NVHE hyp context can assume VHE is present or
* absent. Otherwise fall back to caps.
+ * This allows the compiler to discard VHE-specific code from the
+ * nVHE object, reducing the number of external symbol references
+ * needed to link.
*/
if (is_vhe_hyp_code())
return true;
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 42ea2bd856c6..79fac13ab2ef 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1974,15 +1974,7 @@ static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap)
#ifdef CONFIG_KVM
static bool is_kvm_protected_mode(const struct arm64_cpu_capabilities *entry, int __unused)
{
- if (kvm_get_mode() != KVM_MODE_PROTECTED)
- return false;
-
- if (is_kernel_in_hyp_mode()) {
- pr_warn("Protected KVM not available with VHE\n");
- return false;
- }
-
- return true;
+ return kvm_get_mode() == KVM_MODE_PROTECTED;
}
#endif /* CONFIG_KVM */
diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
index 4e39ace073af..3b8d062e30ea 100644
--- a/arch/arm64/kvm/arch_timer.c
+++ b/arch/arm64/kvm/arch_timer.c
@@ -1230,6 +1230,9 @@ bool kvm_arch_timer_get_input_level(int vintid)
struct kvm_vcpu *vcpu = kvm_get_running_vcpu();
struct arch_timer_context *timer;
+ if (WARN(!vcpu, "No vcpu context!\n"))
+ return false;
+
if (vintid == vcpu_vtimer(vcpu)->irq.irq)
timer = vcpu_vtimer(vcpu);
else if (vintid == vcpu_ptimer(vcpu)->irq.irq)
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 400bb0fe2745..a0188144a122 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -150,8 +150,10 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
if (ret)
goto out_free_stage2_pgd;
- if (!zalloc_cpumask_var(&kvm->arch.supported_cpus, GFP_KERNEL))
+ if (!zalloc_cpumask_var(&kvm->arch.supported_cpus, GFP_KERNEL)) {
+ ret = -ENOMEM;
goto out_free_stage2_pgd;
+ }
cpumask_copy(kvm->arch.supported_cpus, cpu_possible_mask);
kvm_vgic_early_init(kvm);
@@ -2271,7 +2273,11 @@ static int __init early_kvm_mode_cfg(char *arg)
return -EINVAL;
if (strcmp(arg, "protected") == 0) {
- kvm_mode = KVM_MODE_PROTECTED;
+ if (!is_kernel_in_hyp_mode())
+ kvm_mode = KVM_MODE_PROTECTED;
+ else
+ pr_warn_once("Protected KVM not available with VHE\n");
+
return 0;
}
diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
index 3d251a4d2cf7..6012b08ecb14 100644
--- a/arch/arm64/kvm/fpsimd.c
+++ b/arch/arm64/kvm/fpsimd.c
@@ -80,6 +80,7 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
vcpu->arch.flags &= ~KVM_ARM64_FP_ENABLED;
vcpu->arch.flags |= KVM_ARM64_FP_HOST;
+ vcpu->arch.flags &= ~KVM_ARM64_HOST_SVE_ENABLED;
if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN)
vcpu->arch.flags |= KVM_ARM64_HOST_SVE_ENABLED;
@@ -93,6 +94,7 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
* operations. Do this for ZA as well for now for simplicity.
*/
if (system_supports_sme()) {
+ vcpu->arch.flags &= ~KVM_ARM64_HOST_SME_ENABLED;
if (read_sysreg(cpacr_el1) & CPACR_EL1_SMEN_EL0EN)
vcpu->arch.flags |= KVM_ARM64_HOST_SME_ENABLED;
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 78edf077fa3b..1e78acf9662e 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -314,15 +314,11 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range)
int host_stage2_idmap_locked(phys_addr_t addr, u64 size,
enum kvm_pgtable_prot prot)
{
- hyp_assert_lock_held(&host_kvm.lock);
-
return host_stage2_try(__host_stage2_idmap, addr, addr + size, prot);
}
int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id)
{
- hyp_assert_lock_held(&host_kvm.lock);
-
return host_stage2_try(kvm_pgtable_stage2_set_owner, &host_kvm.pgt,
addr, size, &host_s2_pool, owner_id);
}
diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c
index b6d86e423319..35a4331ba5f3 100644
--- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c
+++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c
@@ -243,15 +243,9 @@ u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id)
case SYS_ID_AA64MMFR2_EL1:
return get_pvm_id_aa64mmfr2(vcpu);
default:
- /*
- * Should never happen because all cases are covered in
- * pvm_sys_reg_descs[].
- */
- WARN_ON(1);
- break;
+ /* Unhandled ID register, RAZ */
+ return 0;
}
-
- return 0;
}
static u64 read_id_reg(const struct kvm_vcpu *vcpu,
@@ -332,6 +326,16 @@ static bool pvm_gic_read_sre(struct kvm_vcpu *vcpu,
/* Mark the specified system register as an AArch64 feature id register. */
#define AARCH64(REG) { SYS_DESC(REG), .access = pvm_access_id_aarch64 }
+/*
+ * sys_reg_desc initialiser for architecturally unallocated cpufeature ID
+ * register with encoding Op0=3, Op1=0, CRn=0, CRm=crm, Op2=op2
+ * (1 <= crm < 8, 0 <= Op2 < 8).
+ */
+#define ID_UNALLOCATED(crm, op2) { \
+ Op0(3), Op1(0), CRn(0), CRm(crm), Op2(op2), \
+ .access = pvm_access_id_aarch64, \
+}
+
/* Mark the specified system register as Read-As-Zero/Write-Ignored */
#define RAZ_WI(REG) { SYS_DESC(REG), .access = pvm_access_raz_wi }
@@ -375,24 +379,46 @@ static const struct sys_reg_desc pvm_sys_reg_descs[] = {
AARCH32(SYS_MVFR0_EL1),
AARCH32(SYS_MVFR1_EL1),
AARCH32(SYS_MVFR2_EL1),
+ ID_UNALLOCATED(3,3),
AARCH32(SYS_ID_PFR2_EL1),
AARCH32(SYS_ID_DFR1_EL1),
AARCH32(SYS_ID_MMFR5_EL1),
+ ID_UNALLOCATED(3,7),
/* AArch64 ID registers */
/* CRm=4 */
AARCH64(SYS_ID_AA64PFR0_EL1),
AARCH64(SYS_ID_AA64PFR1_EL1),
+ ID_UNALLOCATED(4,2),
+ ID_UNALLOCATED(4,3),
AARCH64(SYS_ID_AA64ZFR0_EL1),
+ ID_UNALLOCATED(4,5),
+ ID_UNALLOCATED(4,6),
+ ID_UNALLOCATED(4,7),
AARCH64(SYS_ID_AA64DFR0_EL1),
AARCH64(SYS_ID_AA64DFR1_EL1),
+ ID_UNALLOCATED(5,2),
+ ID_UNALLOCATED(5,3),
AARCH64(SYS_ID_AA64AFR0_EL1),
AARCH64(SYS_ID_AA64AFR1_EL1),
+ ID_UNALLOCATED(5,6),
+ ID_UNALLOCATED(5,7),
AARCH64(SYS_ID_AA64ISAR0_EL1),
AARCH64(SYS_ID_AA64ISAR1_EL1),
+ AARCH64(SYS_ID_AA64ISAR2_EL1),
+ ID_UNALLOCATED(6,3),
+ ID_UNALLOCATED(6,4),
+ ID_UNALLOCATED(6,5),
+ ID_UNALLOCATED(6,6),
+ ID_UNALLOCATED(6,7),
AARCH64(SYS_ID_AA64MMFR0_EL1),
AARCH64(SYS_ID_AA64MMFR1_EL1),
AARCH64(SYS_ID_AA64MMFR2_EL1),
+ ID_UNALLOCATED(7,3),
+ ID_UNALLOCATED(7,4),
+ ID_UNALLOCATED(7,5),
+ ID_UNALLOCATED(7,6),
+ ID_UNALLOCATED(7,7),
/* Scalable Vector Registers are restricted. */
diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v2.c b/arch/arm64/kvm/vgic/vgic-mmio-v2.c
index 77a67e9d3d14..e070cda86e12 100644
--- a/arch/arm64/kvm/vgic/vgic-mmio-v2.c
+++ b/arch/arm64/kvm/vgic/vgic-mmio-v2.c
@@ -429,11 +429,11 @@ static const struct vgic_register_region vgic_v2_dist_registers[] = {
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_SET,
vgic_mmio_read_pending, vgic_mmio_write_spending,
- NULL, vgic_uaccess_write_spending, 1,
+ vgic_uaccess_read_pending, vgic_uaccess_write_spending, 1,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_CLEAR,
vgic_mmio_read_pending, vgic_mmio_write_cpending,
- NULL, vgic_uaccess_write_cpending, 1,
+ vgic_uaccess_read_pending, vgic_uaccess_write_cpending, 1,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ACTIVE_SET,
vgic_mmio_read_active, vgic_mmio_write_sactive,
diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
index f7aa7bcd6fb8..f15e29cc63ce 100644
--- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
@@ -353,42 +353,6 @@ static unsigned long vgic_mmio_read_v3_idregs(struct kvm_vcpu *vcpu,
return 0;
}
-static unsigned long vgic_v3_uaccess_read_pending(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len)
-{
- u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
- u32 value = 0;
- int i;
-
- /*
- * pending state of interrupt is latched in pending_latch variable.
- * Userspace will save and restore pending state and line_level
- * separately.
- * Refer to Documentation/virt/kvm/devices/arm-vgic-v3.rst
- * for handling of ISPENDR and ICPENDR.
- */
- for (i = 0; i < len * 8; i++) {
- struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
- bool state = irq->pending_latch;
-
- if (irq->hw && vgic_irq_is_sgi(irq->intid)) {
- int err;
-
- err = irq_get_irqchip_state(irq->host_irq,
- IRQCHIP_STATE_PENDING,
- &state);
- WARN_ON(err);
- }
-
- if (state)
- value |= (1U << i);
-
- vgic_put_irq(vcpu->kvm, irq);
- }
-
- return value;
-}
-
static int vgic_v3_uaccess_write_pending(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len,
unsigned long val)
@@ -666,7 +630,7 @@ static const struct vgic_register_region vgic_v3_dist_registers[] = {
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISPENDR,
vgic_mmio_read_pending, vgic_mmio_write_spending,
- vgic_v3_uaccess_read_pending, vgic_v3_uaccess_write_pending, 1,
+ vgic_uaccess_read_pending, vgic_v3_uaccess_write_pending, 1,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICPENDR,
vgic_mmio_read_pending, vgic_mmio_write_cpending,
@@ -750,7 +714,7 @@ static const struct vgic_register_region vgic_v3_rd_registers[] = {
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ISPENDR0,
vgic_mmio_read_pending, vgic_mmio_write_spending,
- vgic_v3_uaccess_read_pending, vgic_v3_uaccess_write_pending, 4,
+ vgic_uaccess_read_pending, vgic_v3_uaccess_write_pending, 4,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ICPENDR0,
vgic_mmio_read_pending, vgic_mmio_write_cpending,
diff --git a/arch/arm64/kvm/vgic/vgic-mmio.c b/arch/arm64/kvm/vgic/vgic-mmio.c
index 49837d3a3ef5..997d0fce2088 100644
--- a/arch/arm64/kvm/vgic/vgic-mmio.c
+++ b/arch/arm64/kvm/vgic/vgic-mmio.c
@@ -226,8 +226,9 @@ int vgic_uaccess_write_cenable(struct kvm_vcpu *vcpu,
return 0;
}
-unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len)
+static unsigned long __read_pending(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len,
+ bool is_user)
{
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
u32 value = 0;
@@ -239,6 +240,15 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
unsigned long flags;
bool val;
+ /*
+ * When used from userspace with a GICv3 model:
+ *
+ * Pending state of interrupt is latched in pending_latch
+ * variable. Userspace will save and restore pending state
+ * and line_level separately.
+ * Refer to Documentation/virt/kvm/devices/arm-vgic-v3.rst
+ * for handling of ISPENDR and ICPENDR.
+ */
raw_spin_lock_irqsave(&irq->irq_lock, flags);
if (irq->hw && vgic_irq_is_sgi(irq->intid)) {
int err;
@@ -248,10 +258,20 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
IRQCHIP_STATE_PENDING,
&val);
WARN_RATELIMIT(err, "IRQ %d", irq->host_irq);
- } else if (vgic_irq_is_mapped_level(irq)) {
+ } else if (!is_user && vgic_irq_is_mapped_level(irq)) {
val = vgic_get_phys_line_level(irq);
} else {
- val = irq_is_pending(irq);
+ switch (vcpu->kvm->arch.vgic.vgic_model) {
+ case KVM_DEV_TYPE_ARM_VGIC_V3:
+ if (is_user) {
+ val = irq->pending_latch;
+ break;
+ }
+ fallthrough;
+ default:
+ val = irq_is_pending(irq);
+ break;
+ }
}
value |= ((u32)val << i);
@@ -263,6 +283,18 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
return value;
}
+unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len)
+{
+ return __read_pending(vcpu, addr, len, false);
+}
+
+unsigned long vgic_uaccess_read_pending(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len)
+{
+ return __read_pending(vcpu, addr, len, true);
+}
+
static bool is_vgic_v2_sgi(struct kvm_vcpu *vcpu, struct vgic_irq *irq)
{
return (vgic_irq_is_sgi(irq->intid) &&
diff --git a/arch/arm64/kvm/vgic/vgic-mmio.h b/arch/arm64/kvm/vgic/vgic-mmio.h
index 3fa696f198a3..6082d4b66d39 100644
--- a/arch/arm64/kvm/vgic/vgic-mmio.h
+++ b/arch/arm64/kvm/vgic/vgic-mmio.h
@@ -149,6 +149,9 @@ int vgic_uaccess_write_cenable(struct kvm_vcpu *vcpu,
unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len);
+unsigned long vgic_uaccess_read_pending(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len);
+
void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len,
unsigned long val);
diff --git a/arch/arm64/kvm/vmid.c b/arch/arm64/kvm/vmid.c
index 8d5f0506fd87..d78ae63d7c15 100644
--- a/arch/arm64/kvm/vmid.c
+++ b/arch/arm64/kvm/vmid.c
@@ -66,7 +66,7 @@ static void flush_context(void)
* the next context-switch, we broadcast TLB flush + I-cache
* invalidation over the inner shareable domain on rollover.
*/
- kvm_call_hyp(__kvm_flush_vm_context);
+ kvm_call_hyp(__kvm_flush_vm_context);
}
static bool check_update_reserved_vmid(u64 vmid, u64 newvmid)
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index c22f58155948..32ffef9f6e5b 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -364,8 +364,13 @@ config RISCV_ISA_SVPBMT
select RISCV_ALTERNATIVE
default y
help
- Adds support to dynamically detect the presence of the SVPBMT extension
- (Supervisor-mode: page-based memory types) and enable its usage.
+ Adds support to dynamically detect the presence of the SVPBMT
+ ISA-extension (Supervisor-mode: page-based memory types) and
+ enable its usage.
+
+ The memory type for a page contains a combination of attributes
+ that indicate the cacheability, idempotency, and ordering
+ properties for access to that page.
The SVPBMT extension is only available on 64Bit cpus.
diff --git a/arch/riscv/Kconfig.erratas b/arch/riscv/Kconfig.erratas
index ebfcd5cc6eaf..457ac72c9b36 100644
--- a/arch/riscv/Kconfig.erratas
+++ b/arch/riscv/Kconfig.erratas
@@ -35,6 +35,7 @@ config ERRATA_SIFIVE_CIP_1200
config ERRATA_THEAD
bool "T-HEAD errata"
+ depends on !XIP_KERNEL
select RISCV_ALTERNATIVE
help
All T-HEAD errata Kconfig depend on this Kconfig. Disabling
diff --git a/arch/riscv/boot/dts/microchip/mpfs.dtsi b/arch/riscv/boot/dts/microchip/mpfs.dtsi
index 8c3259134194..3095d08453a1 100644
--- a/arch/riscv/boot/dts/microchip/mpfs.dtsi
+++ b/arch/riscv/boot/dts/microchip/mpfs.dtsi
@@ -192,6 +192,15 @@
riscv,ndev = <186>;
};
+ pdma: dma-controller@3000000 {
+ compatible = "sifive,fu540-c000-pdma", "sifive,pdma0";
+ reg = <0x0 0x3000000 0x0 0x8000>;
+ interrupt-parent = <&plic>;
+ interrupts = <5 6>, <7 8>, <9 10>, <11 12>;
+ dma-channels = <4>;
+ #dma-cells = <1>;
+ };
+
clkcfg: clkcfg@20002000 {
compatible = "microchip,mpfs-clkcfg";
reg = <0x0 0x20002000 0x0 0x1000>, <0x0 0x3E001000 0x0 0x1000>;
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index a6f62a6d1edd..12b05ce164bb 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -293,7 +293,6 @@ void __init_or_module riscv_cpufeature_patch_func(struct alt_entry *begin,
unsigned int stage)
{
u32 cpu_req_feature = cpufeature_probe(stage);
- u32 cpu_apply_feature = 0;
struct alt_entry *alt;
u32 tmp;
@@ -307,10 +306,8 @@ void __init_or_module riscv_cpufeature_patch_func(struct alt_entry *begin,
}
tmp = (1U << alt->errata_id);
- if (cpu_req_feature & tmp) {
+ if (cpu_req_feature & tmp)
patch_text_nosync(alt->old_ptr, alt->alt_ptr, alt->alt_len);
- cpu_apply_feature |= tmp;
- }
}
}
#endif
diff --git a/arch/riscv/kvm/vmid.c b/arch/riscv/kvm/vmid.c
index 9f764df125db..6cd93995fb65 100644
--- a/arch/riscv/kvm/vmid.c
+++ b/arch/riscv/kvm/vmid.c
@@ -97,7 +97,7 @@ void kvm_riscv_gstage_vmid_update(struct kvm_vcpu *vcpu)
* We ran out of VMIDs so we increment vmid_version and
* start assigning VMIDs from 1.
*
- * This also means existing VMIDs assignement to all Guest
+ * This also means existing VMIDs assignment to all Guest
* instances is invalid and we have force VMID re-assignement
* for all Guest instances. The Guest instances that were not
* running will automatically pick-up new VMIDs because will
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 8b392b6b7b93..3de6d8b53367 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -13,6 +13,7 @@
#include <linux/io.h>
#include <asm/apic.h>
#include <asm/desc.h>
+#include <asm/sev.h>
#include <asm/hypervisor.h>
#include <asm/hyperv-tlfs.h>
#include <asm/mshyperv.h>
@@ -405,6 +406,11 @@ void __init hyperv_init(void)
}
if (hv_isolation_type_snp()) {
+ /* Negotiate GHCB Version. */
+ if (!hv_ghcb_negotiate_protocol())
+ hv_ghcb_terminate(SEV_TERM_SET_GEN,
+ GHCB_SEV_ES_PROT_UNSUPPORTED);
+
hv_ghcb_pg = alloc_percpu(union hv_ghcb *);
if (!hv_ghcb_pg)
goto free_vp_assist_page;
diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
index 2b994117581e..1dbcbd9da74d 100644
--- a/arch/x86/hyperv/ivm.c
+++ b/arch/x86/hyperv/ivm.c
@@ -53,6 +53,8 @@ union hv_ghcb {
} hypercall;
} __packed __aligned(HV_HYP_PAGE_SIZE);
+static u16 hv_ghcb_version __ro_after_init;
+
u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size)
{
union hv_ghcb *hv_ghcb;
@@ -96,12 +98,85 @@ u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size)
return status;
}
+static inline u64 rd_ghcb_msr(void)
+{
+ return __rdmsr(MSR_AMD64_SEV_ES_GHCB);
+}
+
+static inline void wr_ghcb_msr(u64 val)
+{
+ native_wrmsrl(MSR_AMD64_SEV_ES_GHCB, val);
+}
+
+static enum es_result hv_ghcb_hv_call(struct ghcb *ghcb, u64 exit_code,
+ u64 exit_info_1, u64 exit_info_2)
+{
+ /* Fill in protocol and format specifiers */
+ ghcb->protocol_version = hv_ghcb_version;
+ ghcb->ghcb_usage = GHCB_DEFAULT_USAGE;
+
+ ghcb_set_sw_exit_code(ghcb, exit_code);
+ ghcb_set_sw_exit_info_1(ghcb, exit_info_1);
+ ghcb_set_sw_exit_info_2(ghcb, exit_info_2);
+
+ VMGEXIT();
+
+ if (ghcb->save.sw_exit_info_1 & GENMASK_ULL(31, 0))
+ return ES_VMM_ERROR;
+ else
+ return ES_OK;
+}
+
+void hv_ghcb_terminate(unsigned int set, unsigned int reason)
+{
+ u64 val = GHCB_MSR_TERM_REQ;
+
+ /* Tell the hypervisor what went wrong. */
+ val |= GHCB_SEV_TERM_REASON(set, reason);
+
+ /* Request Guest Termination from Hypvervisor */
+ wr_ghcb_msr(val);
+ VMGEXIT();
+
+ while (true)
+ asm volatile("hlt\n" : : : "memory");
+}
+
+bool hv_ghcb_negotiate_protocol(void)
+{
+ u64 ghcb_gpa;
+ u64 val;
+
+ /* Save ghcb page gpa. */
+ ghcb_gpa = rd_ghcb_msr();
+
+ /* Do the GHCB protocol version negotiation */
+ wr_ghcb_msr(GHCB_MSR_SEV_INFO_REQ);
+ VMGEXIT();
+ val = rd_ghcb_msr();
+
+ if (GHCB_MSR_INFO(val) != GHCB_MSR_SEV_INFO_RESP)
+ return false;
+
+ if (GHCB_MSR_PROTO_MAX(val) < GHCB_PROTOCOL_MIN ||
+ GHCB_MSR_PROTO_MIN(val) > GHCB_PROTOCOL_MAX)
+ return false;
+
+ hv_ghcb_version = min_t(size_t, GHCB_MSR_PROTO_MAX(val),
+ GHCB_PROTOCOL_MAX);
+
+ /* Write ghcb page back after negotiating protocol. */
+ wr_ghcb_msr(ghcb_gpa);
+ VMGEXIT();
+
+ return true;
+}
+
void hv_ghcb_msr_write(u64 msr, u64 value)
{
union hv_ghcb *hv_ghcb;
void **ghcb_base;
unsigned long flags;
- struct es_em_ctxt ctxt;
if (!hv_ghcb_pg)
return;
@@ -120,8 +195,7 @@ void hv_ghcb_msr_write(u64 msr, u64 value)
ghcb_set_rax(&hv_ghcb->ghcb, lower_32_bits(value));
ghcb_set_rdx(&hv_ghcb->ghcb, upper_32_bits(value));
- if (sev_es_ghcb_hv_call(&hv_ghcb->ghcb, false, &ctxt,
- SVM_EXIT_MSR, 1, 0))
+ if (hv_ghcb_hv_call(&hv_ghcb->ghcb, SVM_EXIT_MSR, 1, 0))
pr_warn("Fail to write msr via ghcb %llx.\n", msr);
local_irq_restore(flags);
@@ -133,7 +207,6 @@ void hv_ghcb_msr_read(u64 msr, u64 *value)
union hv_ghcb *hv_ghcb;
void **ghcb_base;
unsigned long flags;
- struct es_em_ctxt ctxt;
/* Check size of union hv_ghcb here. */
BUILD_BUG_ON(sizeof(union hv_ghcb) != HV_HYP_PAGE_SIZE);
@@ -152,8 +225,7 @@ void hv_ghcb_msr_read(u64 msr, u64 *value)
}
ghcb_set_rcx(&hv_ghcb->ghcb, msr);
- if (sev_es_ghcb_hv_call(&hv_ghcb->ghcb, false, &ctxt,
- SVM_EXIT_MSR, 0, 0))
+ if (hv_ghcb_hv_call(&hv_ghcb->ghcb, SVM_EXIT_MSR, 0, 0))
pr_warn("Fail to read msr via ghcb %llx.\n", msr);
else
*value = (u64)lower_32_bits(hv_ghcb->ghcb.save.rax)
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 393f2bbb5e3a..03acc823838a 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -446,5 +446,6 @@
#define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */
#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */
#define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */
+#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3a240a64ac68..9217bd6cf0d1 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1047,14 +1047,77 @@ struct kvm_x86_msr_filter {
};
enum kvm_apicv_inhibit {
+
+ /********************************************************************/
+ /* INHIBITs that are relevant to both Intel's APICv and AMD's AVIC. */
+ /********************************************************************/
+
+ /*
+ * APIC acceleration is disabled by a module parameter
+ * and/or not supported in hardware.
+ */
APICV_INHIBIT_REASON_DISABLE,
+
+ /*
+ * APIC acceleration is inhibited because AutoEOI feature is
+ * being used by a HyperV guest.
+ */
APICV_INHIBIT_REASON_HYPERV,
+
+ /*
+ * APIC acceleration is inhibited because the userspace didn't yet
+ * enable the kernel/split irqchip.
+ */
+ APICV_INHIBIT_REASON_ABSENT,
+
+ /* APIC acceleration is inhibited because KVM_GUESTDBG_BLOCKIRQ
+ * (out of band, debug measure of blocking all interrupts on this vCPU)
+ * was enabled, to avoid AVIC/APICv bypassing it.
+ */
+ APICV_INHIBIT_REASON_BLOCKIRQ,
+
+ /*
+ * For simplicity, the APIC acceleration is inhibited
+ * first time either APIC ID or APIC base are changed by the guest
+ * from their reset values.
+ */
+ APICV_INHIBIT_REASON_APIC_ID_MODIFIED,
+ APICV_INHIBIT_REASON_APIC_BASE_MODIFIED,
+
+ /******************************************************/
+ /* INHIBITs that are relevant only to the AMD's AVIC. */
+ /******************************************************/
+
+ /*
+ * AVIC is inhibited on a vCPU because it runs a nested guest.
+ *
+ * This is needed because unlike APICv, the peers of this vCPU
+ * cannot use the doorbell mechanism to signal interrupts via AVIC when
+ * a vCPU runs nested.
+ */
APICV_INHIBIT_REASON_NESTED,
+
+ /*
+ * On SVM, the wait for the IRQ window is implemented with pending vIRQ,
+ * which cannot be injected when the AVIC is enabled, thus AVIC
+ * is inhibited while KVM waits for IRQ window.
+ */
APICV_INHIBIT_REASON_IRQWIN,
+
+ /*
+ * PIT (i8254) 're-inject' mode, relies on EOI intercept,
+ * which AVIC doesn't support for edge triggered interrupts.
+ */
APICV_INHIBIT_REASON_PIT_REINJ,
+
+ /*
+ * AVIC is inhibited because the guest has x2apic in its CPUID.
+ */
APICV_INHIBIT_REASON_X2APIC,
- APICV_INHIBIT_REASON_BLOCKIRQ,
- APICV_INHIBIT_REASON_ABSENT,
+
+ /*
+ * AVIC is disabled because SEV doesn't support it.
+ */
APICV_INHIBIT_REASON_SEV,
};
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index a82f603d4312..61f0c206bff0 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -179,9 +179,13 @@ int hv_set_mem_host_visibility(unsigned long addr, int numpages, bool visible);
#ifdef CONFIG_AMD_MEM_ENCRYPT
void hv_ghcb_msr_write(u64 msr, u64 value);
void hv_ghcb_msr_read(u64 msr, u64 *value);
+bool hv_ghcb_negotiate_protocol(void);
+void hv_ghcb_terminate(unsigned int set, unsigned int reason);
#else
static inline void hv_ghcb_msr_write(u64 msr, u64 value) {}
static inline void hv_ghcb_msr_read(u64 msr, u64 *value) {}
+static inline bool hv_ghcb_negotiate_protocol(void) { return false; }
+static inline void hv_ghcb_terminate(unsigned int set, unsigned int reason) {}
#endif
extern bool hv_isolation_type_snp(void);
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 403e83b4adc8..d27e0581b777 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -116,6 +116,30 @@
* Not susceptible to
* TSX Async Abort (TAA) vulnerabilities.
*/
+#define ARCH_CAP_SBDR_SSDP_NO BIT(13) /*
+ * Not susceptible to SBDR and SSDP
+ * variants of Processor MMIO stale data
+ * vulnerabilities.
+ */
+#define ARCH_CAP_FBSDP_NO BIT(14) /*
+ * Not susceptible to FBSDP variant of
+ * Processor MMIO stale data
+ * vulnerabilities.
+ */
+#define ARCH_CAP_PSDP_NO BIT(15) /*
+ * Not susceptible to PSDP variant of
+ * Processor MMIO stale data
+ * vulnerabilities.
+ */
+#define ARCH_CAP_FB_CLEAR BIT(17) /*
+ * VERW clears CPU fill buffer
+ * even on MDS_NO CPUs.
+ */
+#define ARCH_CAP_FB_CLEAR_CTRL BIT(18) /*
+ * MSR_IA32_MCU_OPT_CTRL[FB_CLEAR_DIS]
+ * bit available to control VERW
+ * behavior.
+ */
#define MSR_IA32_FLUSH_CMD 0x0000010b
#define L1D_FLUSH BIT(0) /*
@@ -133,6 +157,7 @@
#define MSR_IA32_MCU_OPT_CTRL 0x00000123
#define RNGDS_MITG_DIS BIT(0) /* SRBDS support */
#define RTM_ALLOW BIT(1) /* TSX development mode */
+#define FB_CLEAR_DIS BIT(3) /* CPU Fill buffer clear disable */
#define MSR_IA32_SYSENTER_CS 0x00000174
#define MSR_IA32_SYSENTER_ESP 0x00000175
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index acbaeaf83b61..da251a5645b0 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -269,6 +269,8 @@ DECLARE_STATIC_KEY_FALSE(mds_idle_clear);
DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush);
+DECLARE_STATIC_KEY_FALSE(mmio_stale_data_clear);
+
#include <asm/segment.h>
/**
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index d879a6c93609..74c62cc47a5f 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -41,8 +41,10 @@ static void __init spectre_v2_select_mitigation(void);
static void __init ssb_select_mitigation(void);
static void __init l1tf_select_mitigation(void);
static void __init mds_select_mitigation(void);
-static void __init mds_print_mitigation(void);
+static void __init md_clear_update_mitigation(void);
+static void __init md_clear_select_mitigation(void);
static void __init taa_select_mitigation(void);
+static void __init mmio_select_mitigation(void);
static void __init srbds_select_mitigation(void);
static void __init l1d_flush_select_mitigation(void);
@@ -85,6 +87,10 @@ EXPORT_SYMBOL_GPL(mds_idle_clear);
*/
DEFINE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush);
+/* Controls CPU Fill buffer clear before KVM guest MMIO accesses */
+DEFINE_STATIC_KEY_FALSE(mmio_stale_data_clear);
+EXPORT_SYMBOL_GPL(mmio_stale_data_clear);
+
void __init check_bugs(void)
{
identify_boot_cpu();
@@ -117,17 +123,10 @@ void __init check_bugs(void)
spectre_v2_select_mitigation();
ssb_select_mitigation();
l1tf_select_mitigation();
- mds_select_mitigation();
- taa_select_mitigation();
+ md_clear_select_mitigation();
srbds_select_mitigation();
l1d_flush_select_mitigation();
- /*
- * As MDS and TAA mitigations are inter-related, print MDS
- * mitigation until after TAA mitigation selection is done.
- */
- mds_print_mitigation();
-
arch_smt_update();
#ifdef CONFIG_X86_32
@@ -267,14 +266,6 @@ static void __init mds_select_mitigation(void)
}
}
-static void __init mds_print_mitigation(void)
-{
- if (!boot_cpu_has_bug(X86_BUG_MDS) || cpu_mitigations_off())
- return;
-
- pr_info("%s\n", mds_strings[mds_mitigation]);
-}
-
static int __init mds_cmdline(char *str)
{
if (!boot_cpu_has_bug(X86_BUG_MDS))
@@ -329,7 +320,7 @@ static void __init taa_select_mitigation(void)
/* TSX previously disabled by tsx=off */
if (!boot_cpu_has(X86_FEATURE_RTM)) {
taa_mitigation = TAA_MITIGATION_TSX_DISABLED;
- goto out;
+ return;
}
if (cpu_mitigations_off()) {
@@ -343,7 +334,7 @@ static void __init taa_select_mitigation(void)
*/
if (taa_mitigation == TAA_MITIGATION_OFF &&
mds_mitigation == MDS_MITIGATION_OFF)
- goto out;
+ return;
if (boot_cpu_has(X86_FEATURE_MD_CLEAR))
taa_mitigation = TAA_MITIGATION_VERW;
@@ -375,18 +366,6 @@ static void __init taa_select_mitigation(void)
if (taa_nosmt || cpu_mitigations_auto_nosmt())
cpu_smt_disable(false);
-
- /*
- * Update MDS mitigation, if necessary, as the mds_user_clear is
- * now enabled for TAA mitigation.
- */
- if (mds_mitigation == MDS_MITIGATION_OFF &&
- boot_cpu_has_bug(X86_BUG_MDS)) {
- mds_mitigation = MDS_MITIGATION_FULL;
- mds_select_mitigation();
- }
-out:
- pr_info("%s\n", taa_strings[taa_mitigation]);
}
static int __init tsx_async_abort_parse_cmdline(char *str)
@@ -411,6 +390,151 @@ static int __init tsx_async_abort_parse_cmdline(char *str)
early_param("tsx_async_abort", tsx_async_abort_parse_cmdline);
#undef pr_fmt
+#define pr_fmt(fmt) "MMIO Stale Data: " fmt
+
+enum mmio_mitigations {
+ MMIO_MITIGATION_OFF,
+ MMIO_MITIGATION_UCODE_NEEDED,
+ MMIO_MITIGATION_VERW,
+};
+
+/* Default mitigation for Processor MMIO Stale Data vulnerabilities */
+static enum mmio_mitigations mmio_mitigation __ro_after_init = MMIO_MITIGATION_VERW;
+static bool mmio_nosmt __ro_after_init = false;
+
+static const char * const mmio_strings[] = {
+ [MMIO_MITIGATION_OFF] = "Vulnerable",
+ [MMIO_MITIGATION_UCODE_NEEDED] = "Vulnerable: Clear CPU buffers attempted, no microcode",
+ [MMIO_MITIGATION_VERW] = "Mitigation: Clear CPU buffers",
+};
+
+static void __init mmio_select_mitigation(void)
+{
+ u64 ia32_cap;
+
+ if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA) ||
+ cpu_mitigations_off()) {
+ mmio_mitigation = MMIO_MITIGATION_OFF;
+ return;
+ }
+
+ if (mmio_mitigation == MMIO_MITIGATION_OFF)
+ return;
+
+ ia32_cap = x86_read_arch_cap_msr();
+
+ /*
+ * Enable CPU buffer clear mitigation for host and VMM, if also affected
+ * by MDS or TAA. Otherwise, enable mitigation for VMM only.
+ */
+ if (boot_cpu_has_bug(X86_BUG_MDS) || (boot_cpu_has_bug(X86_BUG_TAA) &&
+ boot_cpu_has(X86_FEATURE_RTM)))
+ static_branch_enable(&mds_user_clear);
+ else
+ static_branch_enable(&mmio_stale_data_clear);
+
+ /*
+ * If Processor-MMIO-Stale-Data bug is present and Fill Buffer data can
+ * be propagated to uncore buffers, clearing the Fill buffers on idle
+ * is required irrespective of SMT state.
+ */
+ if (!(ia32_cap & ARCH_CAP_FBSDP_NO))
+ static_branch_enable(&mds_idle_clear);
+
+ /*
+ * Check if the system has the right microcode.
+ *
+ * CPU Fill buffer clear mitigation is enumerated by either an explicit
+ * FB_CLEAR or by the presence of both MD_CLEAR and L1D_FLUSH on MDS
+ * affected systems.
+ */
+ if ((ia32_cap & ARCH_CAP_FB_CLEAR) ||
+ (boot_cpu_has(X86_FEATURE_MD_CLEAR) &&
+ boot_cpu_has(X86_FEATURE_FLUSH_L1D) &&
+ !(ia32_cap & ARCH_CAP_MDS_NO)))
+ mmio_mitigation = MMIO_MITIGATION_VERW;
+ else
+ mmio_mitigation = MMIO_MITIGATION_UCODE_NEEDED;
+
+ if (mmio_nosmt || cpu_mitigations_auto_nosmt())
+ cpu_smt_disable(false);
+}
+
+static int __init mmio_stale_data_parse_cmdline(char *str)
+{
+ if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA))
+ return 0;
+
+ if (!str)
+ return -EINVAL;
+
+ if (!strcmp(str, "off")) {
+ mmio_mitigation = MMIO_MITIGATION_OFF;
+ } else if (!strcmp(str, "full")) {
+ mmio_mitigation = MMIO_MITIGATION_VERW;
+ } else if (!strcmp(str, "full,nosmt")) {
+ mmio_mitigation = MMIO_MITIGATION_VERW;
+ mmio_nosmt = true;
+ }
+
+ return 0;
+}
+early_param("mmio_stale_data", mmio_stale_data_parse_cmdline);
+
+#undef pr_fmt
+#define pr_fmt(fmt) "" fmt
+
+static void __init md_clear_update_mitigation(void)
+{
+ if (cpu_mitigations_off())
+ return;
+
+ if (!static_key_enabled(&mds_user_clear))
+ goto out;
+
+ /*
+ * mds_user_clear is now enabled. Update MDS, TAA and MMIO Stale Data
+ * mitigation, if necessary.
+ */
+ if (mds_mitigation == MDS_MITIGATION_OFF &&
+ boot_cpu_has_bug(X86_BUG_MDS)) {
+ mds_mitigation = MDS_MITIGATION_FULL;
+ mds_select_mitigation();
+ }
+ if (taa_mitigation == TAA_MITIGATION_OFF &&
+ boot_cpu_has_bug(X86_BUG_TAA)) {
+ taa_mitigation = TAA_MITIGATION_VERW;
+ taa_select_mitigation();
+ }
+ if (mmio_mitigation == MMIO_MITIGATION_OFF &&
+ boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) {
+ mmio_mitigation = MMIO_MITIGATION_VERW;
+ mmio_select_mitigation();
+ }
+out:
+ if (boot_cpu_has_bug(X86_BUG_MDS))
+ pr_info("MDS: %s\n", mds_strings[mds_mitigation]);
+ if (boot_cpu_has_bug(X86_BUG_TAA))
+ pr_info("TAA: %s\n", taa_strings[taa_mitigation]);
+ if (boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA))
+ pr_info("MMIO Stale Data: %s\n", mmio_strings[mmio_mitigation]);
+}
+
+static void __init md_clear_select_mitigation(void)
+{
+ mds_select_mitigation();
+ taa_select_mitigation();
+ mmio_select_mitigation();
+
+ /*
+ * As MDS, TAA and MMIO Stale Data mitigations are inter-related, update
+ * and print their mitigation after MDS, TAA and MMIO Stale Data
+ * mitigation selection is done.
+ */
+ md_clear_update_mitigation();
+}
+
+#undef pr_fmt
#define pr_fmt(fmt) "SRBDS: " fmt
enum srbds_mitigations {
@@ -478,11 +602,13 @@ static void __init srbds_select_mitigation(void)
return;
/*
- * Check to see if this is one of the MDS_NO systems supporting
- * TSX that are only exposed to SRBDS when TSX is enabled.
+ * Check to see if this is one of the MDS_NO systems supporting TSX that
+ * are only exposed to SRBDS when TSX is enabled or when CPU is affected
+ * by Processor MMIO Stale Data vulnerability.
*/
ia32_cap = x86_read_arch_cap_msr();
- if ((ia32_cap & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM))
+ if ((ia32_cap & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM) &&
+ !boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA))
srbds_mitigation = SRBDS_MITIGATION_TSX_OFF;
else if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
srbds_mitigation = SRBDS_MITIGATION_HYPERVISOR;
@@ -1116,6 +1242,8 @@ static void update_indir_branch_cond(void)
/* Update the static key controlling the MDS CPU buffer clear in idle */
static void update_mds_branch_idle(void)
{
+ u64 ia32_cap = x86_read_arch_cap_msr();
+
/*
* Enable the idle clearing if SMT is active on CPUs which are
* affected only by MSBDS and not any other MDS variant.
@@ -1127,14 +1255,17 @@ static void update_mds_branch_idle(void)
if (!boot_cpu_has_bug(X86_BUG_MSBDS_ONLY))
return;
- if (sched_smt_active())
+ if (sched_smt_active()) {
static_branch_enable(&mds_idle_clear);
- else
+ } else if (mmio_mitigation == MMIO_MITIGATION_OFF ||
+ (ia32_cap & ARCH_CAP_FBSDP_NO)) {
static_branch_disable(&mds_idle_clear);
+ }
}
#define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n"
#define TAA_MSG_SMT "TAA CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/tsx_async_abort.html for more details.\n"
+#define MMIO_MSG_SMT "MMIO Stale Data CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/processor_mmio_stale_data.html for more details.\n"
void cpu_bugs_smt_update(void)
{
@@ -1179,6 +1310,16 @@ void cpu_bugs_smt_update(void)
break;
}
+ switch (mmio_mitigation) {
+ case MMIO_MITIGATION_VERW:
+ case MMIO_MITIGATION_UCODE_NEEDED:
+ if (sched_smt_active())
+ pr_warn_once(MMIO_MSG_SMT);
+ break;
+ case MMIO_MITIGATION_OFF:
+ break;
+ }
+
mutex_unlock(&spec_ctrl_mutex);
}
@@ -1781,6 +1922,20 @@ static ssize_t tsx_async_abort_show_state(char *buf)
sched_smt_active() ? "vulnerable" : "disabled");
}
+static ssize_t mmio_stale_data_show_state(char *buf)
+{
+ if (mmio_mitigation == MMIO_MITIGATION_OFF)
+ return sysfs_emit(buf, "%s\n", mmio_strings[mmio_mitigation]);
+
+ if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
+ return sysfs_emit(buf, "%s; SMT Host state unknown\n",
+ mmio_strings[mmio_mitigation]);
+ }
+
+ return sysfs_emit(buf, "%s; SMT %s\n", mmio_strings[mmio_mitigation],
+ sched_smt_active() ? "vulnerable" : "disabled");
+}
+
static char *stibp_state(void)
{
if (spectre_v2_in_eibrs_mode(spectre_v2_enabled))
@@ -1881,6 +2036,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
case X86_BUG_SRBDS:
return srbds_show_state(buf);
+ case X86_BUG_MMIO_STALE_DATA:
+ return mmio_stale_data_show_state(buf);
+
default:
break;
}
@@ -1932,4 +2090,9 @@ ssize_t cpu_show_srbds(struct device *dev, struct device_attribute *attr, char *
{
return cpu_show_common(dev, attr, buf, X86_BUG_SRBDS);
}
+
+ssize_t cpu_show_mmio_stale_data(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA);
+}
#endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index c296cb1c0113..4730b0a58f24 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1211,18 +1211,42 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
X86_FEATURE_ANY, issues)
#define SRBDS BIT(0)
+/* CPU is affected by X86_BUG_MMIO_STALE_DATA */
+#define MMIO BIT(1)
+/* CPU is affected by Shared Buffers Data Sampling (SBDS), a variant of X86_BUG_MMIO_STALE_DATA */
+#define MMIO_SBDS BIT(2)
static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS),
VULNBL_INTEL_STEPPINGS(HASWELL, X86_STEPPING_ANY, SRBDS),
VULNBL_INTEL_STEPPINGS(HASWELL_L, X86_STEPPING_ANY, SRBDS),
VULNBL_INTEL_STEPPINGS(HASWELL_G, X86_STEPPING_ANY, SRBDS),
+ VULNBL_INTEL_STEPPINGS(HASWELL_X, BIT(2) | BIT(4), MMIO),
+ VULNBL_INTEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x3, 0x5), MMIO),
VULNBL_INTEL_STEPPINGS(BROADWELL_G, X86_STEPPING_ANY, SRBDS),
+ VULNBL_INTEL_STEPPINGS(BROADWELL_X, X86_STEPPING_ANY, MMIO),
VULNBL_INTEL_STEPPINGS(BROADWELL, X86_STEPPING_ANY, SRBDS),
+ VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO),
VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, SRBDS),
+ VULNBL_INTEL_STEPPINGS(SKYLAKE_X, BIT(3) | BIT(4) | BIT(6) |
+ BIT(7) | BIT(0xB), MMIO),
+ VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO),
VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS),
- VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x0, 0xC), SRBDS),
- VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x0, 0xD), SRBDS),
+ VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x9, 0xC), SRBDS | MMIO),
+ VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x0, 0x8), SRBDS),
+ VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x9, 0xD), SRBDS | MMIO),
+ VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x0, 0x8), SRBDS),
+ VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPINGS(0x5, 0x5), MMIO | MMIO_SBDS),
+ VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPINGS(0x1, 0x1), MMIO),
+ VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPINGS(0x4, 0x6), MMIO),
+ VULNBL_INTEL_STEPPINGS(COMETLAKE, BIT(2) | BIT(3) | BIT(5), MMIO | MMIO_SBDS),
+ VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS),
+ VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO),
+ VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS),
+ VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPINGS(0x1, 0x1), MMIO),
+ VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS),
+ VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO),
+ VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPINGS(0x0, 0x0), MMIO | MMIO_SBDS),
{}
};
@@ -1243,6 +1267,13 @@ u64 x86_read_arch_cap_msr(void)
return ia32_cap;
}
+static bool arch_cap_mmio_immune(u64 ia32_cap)
+{
+ return (ia32_cap & ARCH_CAP_FBSDP_NO &&
+ ia32_cap & ARCH_CAP_PSDP_NO &&
+ ia32_cap & ARCH_CAP_SBDR_SSDP_NO);
+}
+
static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
{
u64 ia32_cap = x86_read_arch_cap_msr();
@@ -1296,12 +1327,27 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
/*
* SRBDS affects CPUs which support RDRAND or RDSEED and are listed
* in the vulnerability blacklist.
+ *
+ * Some of the implications and mitigation of Shared Buffers Data
+ * Sampling (SBDS) are similar to SRBDS. Give SBDS same treatment as
+ * SRBDS.
*/
if ((cpu_has(c, X86_FEATURE_RDRAND) ||
cpu_has(c, X86_FEATURE_RDSEED)) &&
- cpu_matches(cpu_vuln_blacklist, SRBDS))
+ cpu_matches(cpu_vuln_blacklist, SRBDS | MMIO_SBDS))
setup_force_cpu_bug(X86_BUG_SRBDS);
+ /*
+ * Processor MMIO Stale Data bug enumeration
+ *
+ * Affected CPU list is generally enough to enumerate the vulnerability,
+ * but for virtualization case check for ARCH_CAP MSR bits also, VMM may
+ * not want the guest to enumerate the bug.
+ */
+ if (cpu_matches(cpu_vuln_blacklist, MMIO) &&
+ !arch_cap_mmio_immune(ia32_cap))
+ setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA);
+
if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
return;
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index f1bdac3f5aa8..0e68b4c937fc 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2039,6 +2039,19 @@ static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
}
}
+static void kvm_lapic_xapic_id_updated(struct kvm_lapic *apic)
+{
+ struct kvm *kvm = apic->vcpu->kvm;
+
+ if (KVM_BUG_ON(apic_x2apic_mode(apic), kvm))
+ return;
+
+ if (kvm_xapic_id(apic) == apic->vcpu->vcpu_id)
+ return;
+
+ kvm_set_apicv_inhibit(apic->vcpu->kvm, APICV_INHIBIT_REASON_APIC_ID_MODIFIED);
+}
+
static int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
{
int ret = 0;
@@ -2047,10 +2060,12 @@ static int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
switch (reg) {
case APIC_ID: /* Local APIC ID */
- if (!apic_x2apic_mode(apic))
+ if (!apic_x2apic_mode(apic)) {
kvm_apic_set_xapic_id(apic, val >> 24);
- else
+ kvm_lapic_xapic_id_updated(apic);
+ } else {
ret = 1;
+ }
break;
case APIC_TASKPRI:
@@ -2336,8 +2351,10 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
MSR_IA32_APICBASE_BASE;
if ((value & MSR_IA32_APICBASE_ENABLE) &&
- apic->base_address != APIC_DEFAULT_PHYS_BASE)
- pr_warn_once("APIC base relocation is unsupported by KVM");
+ apic->base_address != APIC_DEFAULT_PHYS_BASE) {
+ kvm_set_apicv_inhibit(apic->vcpu->kvm,
+ APICV_INHIBIT_REASON_APIC_BASE_MODIFIED);
+ }
}
void kvm_apic_update_apicv(struct kvm_vcpu *vcpu)
@@ -2648,6 +2665,8 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
icr = __kvm_lapic_get_reg64(s->regs, APIC_ICR);
__kvm_lapic_set_reg(s->regs, APIC_ICR2, icr >> 32);
}
+ } else {
+ kvm_lapic_xapic_id_updated(vcpu->arch.apic);
}
return 0;
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index e826ee9138fa..17252f39bd7c 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3411,7 +3411,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
root = mmu_alloc_root(vcpu, i << (30 - PAGE_SHIFT),
i << 30, PT32_ROOT_LEVEL, true);
mmu->pae_root[i] = root | PT_PRESENT_MASK |
- shadow_me_mask;
+ shadow_me_value;
}
mmu->root.hpa = __pa(mmu->pae_root);
} else {
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 54fe03714f8a..d1bc5820ea46 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -291,58 +291,91 @@ void avic_ring_doorbell(struct kvm_vcpu *vcpu)
static int avic_kick_target_vcpus_fast(struct kvm *kvm, struct kvm_lapic *source,
u32 icrl, u32 icrh, u32 index)
{
- u32 dest, apic_id;
- struct kvm_vcpu *vcpu;
+ u32 l1_physical_id, dest;
+ struct kvm_vcpu *target_vcpu;
int dest_mode = icrl & APIC_DEST_MASK;
int shorthand = icrl & APIC_SHORT_MASK;
struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
- u32 *avic_logical_id_table = page_address(kvm_svm->avic_logical_id_table_page);
if (shorthand != APIC_DEST_NOSHORT)
return -EINVAL;
- /*
- * The AVIC incomplete IPI #vmexit info provides index into
- * the physical APIC ID table, which can be used to derive
- * guest physical APIC ID.
- */
+ if (apic_x2apic_mode(source))
+ dest = icrh;
+ else
+ dest = GET_APIC_DEST_FIELD(icrh);
+
if (dest_mode == APIC_DEST_PHYSICAL) {
- apic_id = index;
+ /* broadcast destination, use slow path */
+ if (apic_x2apic_mode(source) && dest == X2APIC_BROADCAST)
+ return -EINVAL;
+ if (!apic_x2apic_mode(source) && dest == APIC_BROADCAST)
+ return -EINVAL;
+
+ l1_physical_id = dest;
+
+ if (WARN_ON_ONCE(l1_physical_id != index))
+ return -EINVAL;
+
} else {
- if (!apic_x2apic_mode(source)) {
- /* For xAPIC logical mode, the index is for logical APIC table. */
- apic_id = avic_logical_id_table[index] & 0x1ff;
+ u32 bitmap, cluster;
+ int logid_index;
+
+ if (apic_x2apic_mode(source)) {
+ /* 16 bit dest mask, 16 bit cluster id */
+ bitmap = dest & 0xFFFF0000;
+ cluster = (dest >> 16) << 4;
+ } else if (kvm_lapic_get_reg(source, APIC_DFR) == APIC_DFR_FLAT) {
+ /* 8 bit dest mask*/
+ bitmap = dest;
+ cluster = 0;
} else {
- return -EINVAL;
+ /* 4 bit desk mask, 4 bit cluster id */
+ bitmap = dest & 0xF;
+ cluster = (dest >> 4) << 2;
}
- }
- /*
- * Assuming vcpu ID is the same as physical apic ID,
- * and use it to retrieve the target vCPU.
- */
- vcpu = kvm_get_vcpu_by_id(kvm, apic_id);
- if (!vcpu)
- return -EINVAL;
+ if (unlikely(!bitmap))
+ /* guest bug: nobody to send the logical interrupt to */
+ return 0;
- if (apic_x2apic_mode(vcpu->arch.apic))
- dest = icrh;
- else
- dest = GET_APIC_DEST_FIELD(icrh);
+ if (!is_power_of_2(bitmap))
+ /* multiple logical destinations, use slow path */
+ return -EINVAL;
- /*
- * Try matching the destination APIC ID with the vCPU.
- */
- if (kvm_apic_match_dest(vcpu, source, shorthand, dest, dest_mode)) {
- vcpu->arch.apic->irr_pending = true;
- svm_complete_interrupt_delivery(vcpu,
- icrl & APIC_MODE_MASK,
- icrl & APIC_INT_LEVELTRIG,
- icrl & APIC_VECTOR_MASK);
- return 0;
+ logid_index = cluster + __ffs(bitmap);
+
+ if (apic_x2apic_mode(source)) {
+ l1_physical_id = logid_index;
+ } else {
+ u32 *avic_logical_id_table =
+ page_address(kvm_svm->avic_logical_id_table_page);
+
+ u32 logid_entry = avic_logical_id_table[logid_index];
+
+ if (WARN_ON_ONCE(index != logid_index))
+ return -EINVAL;
+
+ /* guest bug: non existing/reserved logical destination */
+ if (unlikely(!(logid_entry & AVIC_LOGICAL_ID_ENTRY_VALID_MASK)))
+ return 0;
+
+ l1_physical_id = logid_entry &
+ AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK;
+ }
}
- return -EINVAL;
+ target_vcpu = kvm_get_vcpu_by_id(kvm, l1_physical_id);
+ if (unlikely(!target_vcpu))
+ /* guest bug: non existing vCPU is a target of this IPI*/
+ return 0;
+
+ target_vcpu->arch.apic->irr_pending = true;
+ svm_complete_interrupt_delivery(target_vcpu,
+ icrl & APIC_MODE_MASK,
+ icrl & APIC_INT_LEVELTRIG,
+ icrl & APIC_VECTOR_MASK);
+ return 0;
}
static void avic_kick_target_vcpus(struct kvm *kvm, struct kvm_lapic *source,
@@ -508,35 +541,6 @@ static int avic_handle_ldr_update(struct kvm_vcpu *vcpu)
return ret;
}
-static int avic_handle_apic_id_update(struct kvm_vcpu *vcpu)
-{
- u64 *old, *new;
- struct vcpu_svm *svm = to_svm(vcpu);
- u32 id = kvm_xapic_id(vcpu->arch.apic);
-
- if (vcpu->vcpu_id == id)
- return 0;
-
- old = avic_get_physical_id_entry(vcpu, vcpu->vcpu_id);
- new = avic_get_physical_id_entry(vcpu, id);
- if (!new || !old)
- return 1;
-
- /* We need to move physical_id_entry to new offset */
- *new = *old;
- *old = 0ULL;
- to_svm(vcpu)->avic_physical_id_cache = new;
-
- /*
- * Also update the guest physical APIC ID in the logical
- * APIC ID table entry if already setup the LDR.
- */
- if (svm->ldr_reg)
- avic_handle_ldr_update(vcpu);
-
- return 0;
-}
-
static void avic_handle_dfr_update(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -555,10 +559,6 @@ static int avic_unaccel_trap_write(struct kvm_vcpu *vcpu)
AVIC_UNACCEL_ACCESS_OFFSET_MASK;
switch (offset) {
- case APIC_ID:
- if (avic_handle_apic_id_update(vcpu))
- return 0;
- break;
case APIC_LDR:
if (avic_handle_ldr_update(vcpu))
return 0;
@@ -650,8 +650,6 @@ int avic_init_vcpu(struct vcpu_svm *svm)
void avic_apicv_post_state_restore(struct kvm_vcpu *vcpu)
{
- if (avic_handle_apic_id_update(vcpu) != 0)
- return;
avic_handle_dfr_update(vcpu);
avic_handle_ldr_update(vcpu);
}
@@ -910,7 +908,9 @@ bool avic_check_apicv_inhibit_reasons(enum kvm_apicv_inhibit reason)
BIT(APICV_INHIBIT_REASON_PIT_REINJ) |
BIT(APICV_INHIBIT_REASON_X2APIC) |
BIT(APICV_INHIBIT_REASON_BLOCKIRQ) |
- BIT(APICV_INHIBIT_REASON_SEV);
+ BIT(APICV_INHIBIT_REASON_SEV) |
+ BIT(APICV_INHIBIT_REASON_APIC_ID_MODIFIED) |
+ BIT(APICV_INHIBIT_REASON_APIC_BASE_MODIFIED);
return supported & BIT(reason);
}
@@ -946,7 +946,7 @@ out:
return ret;
}
-void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
u64 entry;
int h_physical_id = kvm_cpu_get_apicid(cpu);
@@ -978,7 +978,7 @@ void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, true);
}
-void __avic_vcpu_put(struct kvm_vcpu *vcpu)
+void avic_vcpu_put(struct kvm_vcpu *vcpu)
{
u64 entry;
struct vcpu_svm *svm = to_svm(vcpu);
@@ -997,25 +997,6 @@ void __avic_vcpu_put(struct kvm_vcpu *vcpu)
WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
}
-static void avic_vcpu_load(struct kvm_vcpu *vcpu)
-{
- int cpu = get_cpu();
-
- WARN_ON(cpu != vcpu->cpu);
-
- __avic_vcpu_load(vcpu, cpu);
-
- put_cpu();
-}
-
-static void avic_vcpu_put(struct kvm_vcpu *vcpu)
-{
- preempt_disable();
-
- __avic_vcpu_put(vcpu);
-
- preempt_enable();
-}
void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
{
@@ -1042,7 +1023,7 @@ void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
vmcb_mark_dirty(vmcb, VMCB_AVIC);
if (activated)
- avic_vcpu_load(vcpu);
+ avic_vcpu_load(vcpu, vcpu->cpu);
else
avic_vcpu_put(vcpu);
@@ -1075,5 +1056,5 @@ void avic_vcpu_unblocking(struct kvm_vcpu *vcpu)
if (!kvm_vcpu_apicv_active(vcpu))
return;
- avic_vcpu_load(vcpu);
+ avic_vcpu_load(vcpu, vcpu->cpu);
}
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 3361258640a2..ba7cd26f438f 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -616,6 +616,8 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
struct kvm_vcpu *vcpu = &svm->vcpu;
struct vmcb *vmcb01 = svm->vmcb01.ptr;
struct vmcb *vmcb02 = svm->nested.vmcb02.ptr;
+ u32 pause_count12;
+ u32 pause_thresh12;
/*
* Filled at exit: exit_code, exit_code_hi, exit_info_1, exit_info_2,
@@ -671,27 +673,25 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
if (!nested_vmcb_needs_vls_intercept(svm))
vmcb02->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
+ pause_count12 = svm->pause_filter_enabled ? svm->nested.ctl.pause_filter_count : 0;
+ pause_thresh12 = svm->pause_threshold_enabled ? svm->nested.ctl.pause_filter_thresh : 0;
if (kvm_pause_in_guest(svm->vcpu.kvm)) {
- /* use guest values since host doesn't use them */
- vmcb02->control.pause_filter_count =
- svm->pause_filter_enabled ?
- svm->nested.ctl.pause_filter_count : 0;
+ /* use guest values since host doesn't intercept PAUSE */
+ vmcb02->control.pause_filter_count = pause_count12;
+ vmcb02->control.pause_filter_thresh = pause_thresh12;
- vmcb02->control.pause_filter_thresh =
- svm->pause_threshold_enabled ?
- svm->nested.ctl.pause_filter_thresh : 0;
-
- } else if (!vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_PAUSE)) {
- /* use host values when guest doesn't use them */
+ } else {
+ /* start from host values otherwise */
vmcb02->control.pause_filter_count = vmcb01->control.pause_filter_count;
vmcb02->control.pause_filter_thresh = vmcb01->control.pause_filter_thresh;
- } else {
- /*
- * Intercept every PAUSE otherwise and
- * ignore both host and guest values
- */
- vmcb02->control.pause_filter_count = 0;
- vmcb02->control.pause_filter_thresh = 0;
+
+ /* ... but ensure filtering is disabled if so requested. */
+ if (vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_PAUSE)) {
+ if (!pause_count12)
+ vmcb02->control.pause_filter_count = 0;
+ if (!pause_thresh12)
+ vmcb02->control.pause_filter_thresh = 0;
+ }
}
nested_svm_transition_tlb_flush(vcpu);
@@ -951,8 +951,11 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
vmcb12->control.event_inj = svm->nested.ctl.event_inj;
vmcb12->control.event_inj_err = svm->nested.ctl.event_inj_err;
- if (!kvm_pause_in_guest(vcpu->kvm) && vmcb02->control.pause_filter_count)
+ if (!kvm_pause_in_guest(vcpu->kvm)) {
vmcb01->control.pause_filter_count = vmcb02->control.pause_filter_count;
+ vmcb_mark_dirty(vmcb01, VMCB_INTERCEPTS);
+
+ }
nested_svm_copy_common_state(svm->nested.vmcb02.ptr, svm->vmcb01.ptr);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 1dc02cdf6960..87da90360bc7 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -921,7 +921,7 @@ static void grow_ple_window(struct kvm_vcpu *vcpu)
struct vmcb_control_area *control = &svm->vmcb->control;
int old = control->pause_filter_count;
- if (kvm_pause_in_guest(vcpu->kvm) || !old)
+ if (kvm_pause_in_guest(vcpu->kvm))
return;
control->pause_filter_count = __grow_ple_window(old,
@@ -942,7 +942,7 @@ static void shrink_ple_window(struct kvm_vcpu *vcpu)
struct vmcb_control_area *control = &svm->vmcb->control;
int old = control->pause_filter_count;
- if (kvm_pause_in_guest(vcpu->kvm) || !old)
+ if (kvm_pause_in_guest(vcpu->kvm))
return;
control->pause_filter_count =
@@ -1400,13 +1400,13 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
indirect_branch_prediction_barrier();
}
if (kvm_vcpu_apicv_active(vcpu))
- __avic_vcpu_load(vcpu, cpu);
+ avic_vcpu_load(vcpu, cpu);
}
static void svm_vcpu_put(struct kvm_vcpu *vcpu)
{
if (kvm_vcpu_apicv_active(vcpu))
- __avic_vcpu_put(vcpu);
+ avic_vcpu_put(vcpu);
svm_prepare_host_switch(vcpu);
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 500348c1cb35..1bddd336a27e 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -610,8 +610,8 @@ void avic_init_vmcb(struct vcpu_svm *svm, struct vmcb *vmcb);
int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu);
int avic_unaccelerated_access_interception(struct kvm_vcpu *vcpu);
int avic_init_vcpu(struct vcpu_svm *svm);
-void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
-void __avic_vcpu_put(struct kvm_vcpu *vcpu);
+void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
+void avic_vcpu_put(struct kvm_vcpu *vcpu);
void avic_apicv_post_state_restore(struct kvm_vcpu *vcpu);
void avic_set_virtual_apic_mode(struct kvm_vcpu *vcpu);
void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 9bd86ecccdab..3a919e49129b 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -229,6 +229,9 @@ static const struct {
#define L1D_CACHE_ORDER 4
static void *vmx_l1d_flush_pages;
+/* Control for disabling CPU Fill buffer clear */
+static bool __read_mostly vmx_fb_clear_ctrl_available;
+
static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
{
struct page *page;
@@ -360,6 +363,60 @@ static int vmentry_l1d_flush_get(char *s, const struct kernel_param *kp)
return sprintf(s, "%s\n", vmentry_l1d_param[l1tf_vmx_mitigation].option);
}
+static void vmx_setup_fb_clear_ctrl(void)
+{
+ u64 msr;
+
+ if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES) &&
+ !boot_cpu_has_bug(X86_BUG_MDS) &&
+ !boot_cpu_has_bug(X86_BUG_TAA)) {
+ rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr);
+ if (msr & ARCH_CAP_FB_CLEAR_CTRL)
+ vmx_fb_clear_ctrl_available = true;
+ }
+}
+
+static __always_inline void vmx_disable_fb_clear(struct vcpu_vmx *vmx)
+{
+ u64 msr;
+
+ if (!vmx->disable_fb_clear)
+ return;
+
+ rdmsrl(MSR_IA32_MCU_OPT_CTRL, msr);
+ msr |= FB_CLEAR_DIS;
+ wrmsrl(MSR_IA32_MCU_OPT_CTRL, msr);
+ /* Cache the MSR value to avoid reading it later */
+ vmx->msr_ia32_mcu_opt_ctrl = msr;
+}
+
+static __always_inline void vmx_enable_fb_clear(struct vcpu_vmx *vmx)
+{
+ if (!vmx->disable_fb_clear)
+ return;
+
+ vmx->msr_ia32_mcu_opt_ctrl &= ~FB_CLEAR_DIS;
+ wrmsrl(MSR_IA32_MCU_OPT_CTRL, vmx->msr_ia32_mcu_opt_ctrl);
+}
+
+static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx)
+{
+ vmx->disable_fb_clear = vmx_fb_clear_ctrl_available;
+
+ /*
+ * If guest will not execute VERW, there is no need to set FB_CLEAR_DIS
+ * at VMEntry. Skip the MSR read/write when a guest has no use case to
+ * execute VERW.
+ */
+ if ((vcpu->arch.arch_capabilities & ARCH_CAP_FB_CLEAR) ||
+ ((vcpu->arch.arch_capabilities & ARCH_CAP_MDS_NO) &&
+ (vcpu->arch.arch_capabilities & ARCH_CAP_TAA_NO) &&
+ (vcpu->arch.arch_capabilities & ARCH_CAP_PSDP_NO) &&
+ (vcpu->arch.arch_capabilities & ARCH_CAP_FBSDP_NO) &&
+ (vcpu->arch.arch_capabilities & ARCH_CAP_SBDR_SSDP_NO)))
+ vmx->disable_fb_clear = false;
+}
+
static const struct kernel_param_ops vmentry_l1d_flush_ops = {
.set = vmentry_l1d_flush_set,
.get = vmentry_l1d_flush_get,
@@ -2252,6 +2309,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
ret = kvm_set_msr_common(vcpu, msr_info);
}
+ /* FB_CLEAR may have changed, also update the FB_CLEAR_DIS behavior */
+ if (msr_index == MSR_IA32_ARCH_CAPABILITIES)
+ vmx_update_fb_clear_dis(vcpu, vmx);
+
return ret;
}
@@ -4553,6 +4614,8 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
vpid_sync_context(vmx->vpid);
+
+ vmx_update_fb_clear_dis(vcpu, vmx);
}
static void vmx_enable_irq_window(struct kvm_vcpu *vcpu)
@@ -6772,6 +6835,11 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
vmx_l1d_flush(vcpu);
else if (static_branch_unlikely(&mds_user_clear))
mds_clear_cpu_buffers();
+ else if (static_branch_unlikely(&mmio_stale_data_clear) &&
+ kvm_arch_has_assigned_device(vcpu->kvm))
+ mds_clear_cpu_buffers();
+
+ vmx_disable_fb_clear(vmx);
if (vcpu->arch.cr2 != native_read_cr2())
native_write_cr2(vcpu->arch.cr2);
@@ -6781,6 +6849,8 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
vcpu->arch.cr2 = native_read_cr2();
+ vmx_enable_fb_clear(vmx);
+
guest_state_exit_irqoff();
}
@@ -7709,7 +7779,9 @@ static bool vmx_check_apicv_inhibit_reasons(enum kvm_apicv_inhibit reason)
ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) |
BIT(APICV_INHIBIT_REASON_ABSENT) |
BIT(APICV_INHIBIT_REASON_HYPERV) |
- BIT(APICV_INHIBIT_REASON_BLOCKIRQ);
+ BIT(APICV_INHIBIT_REASON_BLOCKIRQ) |
+ BIT(APICV_INHIBIT_REASON_APIC_ID_MODIFIED) |
+ BIT(APICV_INHIBIT_REASON_APIC_BASE_MODIFIED);
return supported & BIT(reason);
}
@@ -8212,6 +8284,8 @@ static int __init vmx_init(void)
return r;
}
+ vmx_setup_fb_clear_ctrl();
+
for_each_possible_cpu(cpu) {
INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index b98c7e96697a..8d2342ede0c5 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -348,6 +348,8 @@ struct vcpu_vmx {
u64 msr_ia32_feature_control_valid_bits;
/* SGX Launch Control public key hash */
u64 msr_ia32_sgxlepubkeyhash[4];
+ u64 msr_ia32_mcu_opt_ctrl;
+ bool disable_fb_clear;
struct pt_desc pt_desc;
struct lbr_desc lbr_desc;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 03fbfbbec460..1910e1e78b15 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1617,6 +1617,9 @@ static u64 kvm_get_arch_capabilities(void)
*/
}
+ /* Guests don't need to know "Fill buffer clear control" exists */
+ data &= ~ARCH_CAP_FB_CLEAR_CTRL;
+
return data;
}
@@ -9850,6 +9853,7 @@ void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
return;
down_read(&vcpu->kvm->arch.apicv_update_lock);
+ preempt_disable();
activate = kvm_vcpu_apicv_activated(vcpu);
@@ -9870,6 +9874,7 @@ void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
kvm_make_request(KVM_REQ_EVENT, vcpu);
out:
+ preempt_enable();
up_read(&vcpu->kvm->arch.apicv_update_lock);
}
EXPORT_SYMBOL_GPL(kvm_vcpu_update_apicv);
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 0d46cb728bbf..e6d7e6b01a05 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -7046,6 +7046,7 @@ static void bfq_exit_queue(struct elevator_queue *e)
spin_unlock_irq(&bfqd->lock);
#endif
+ blk_stat_disable_accounting(bfqd->queue);
wbt_enable_default(bfqd->queue);
kfree(bfqd);
@@ -7188,7 +7189,12 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
bfq_init_root_group(bfqd->root_group, bfqd);
bfq_init_entity(&bfqd->oom_bfqq.entity, bfqd->root_group);
+ /* We dispatch from request queue wide instead of hw queue */
+ blk_queue_flag_set(QUEUE_FLAG_SQ_SCHED, q);
+
wbt_disable_default(q);
+ blk_stat_enable_accounting(q);
+
return 0;
out_free:
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 9e56a69422b6..eb3c65a21362 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -564,6 +564,7 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
int ret;
if (!e) {
+ blk_queue_flag_clear(QUEUE_FLAG_SQ_SCHED, q);
q->elevator = NULL;
q->nr_requests = q->tag_set->queue_depth;
return 0;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index e9bf950983c7..33145ba52c96 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -579,6 +579,8 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
if (!blk_mq_hw_queue_mapped(data.hctx))
goto out_queue_exit;
cpu = cpumask_first_and(data.hctx->cpumask, cpu_online_mask);
+ if (cpu >= nr_cpu_ids)
+ goto out_queue_exit;
data.ctx = __blk_mq_get_ctx(q, cpu);
if (!q->elevator)
@@ -2141,20 +2143,6 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
EXPORT_SYMBOL(blk_mq_run_hw_queue);
/*
- * Is the request queue handled by an IO scheduler that does not respect
- * hardware queues when dispatching?
- */
-static bool blk_mq_has_sqsched(struct request_queue *q)
-{
- struct elevator_queue *e = q->elevator;
-
- if (e && e->type->ops.dispatch_request &&
- !(e->type->elevator_features & ELEVATOR_F_MQ_AWARE))
- return true;
- return false;
-}
-
-/*
* Return prefered queue to dispatch from (if any) for non-mq aware IO
* scheduler.
*/
@@ -2186,7 +2174,7 @@ void blk_mq_run_hw_queues(struct request_queue *q, bool async)
unsigned long i;
sq_hctx = NULL;
- if (blk_mq_has_sqsched(q))
+ if (blk_queue_sq_sched(q))
sq_hctx = blk_mq_get_sq_hctx(q);
queue_for_each_hw_ctx(q, hctx, i) {
if (blk_mq_hctx_stopped(hctx))
@@ -2214,7 +2202,7 @@ void blk_mq_delay_run_hw_queues(struct request_queue *q, unsigned long msecs)
unsigned long i;
sq_hctx = NULL;
- if (blk_mq_has_sqsched(q))
+ if (blk_queue_sq_sched(q))
sq_hctx = blk_mq_get_sq_hctx(q);
queue_for_each_hw_ctx(q, hctx, i) {
if (blk_mq_hctx_stopped(hctx))
@@ -3443,8 +3431,9 @@ static void blk_mq_exit_hctx(struct request_queue *q,
if (blk_mq_hw_queue_mapped(hctx))
blk_mq_tag_idle(hctx);
- blk_mq_clear_flush_rq_mapping(set->tags[hctx_idx],
- set->queue_depth, flush_rq);
+ if (blk_queue_init_done(q))
+ blk_mq_clear_flush_rq_mapping(set->tags[hctx_idx],
+ set->queue_depth, flush_rq);
if (set->ops->exit_request)
set->ops->exit_request(set, flush_rq, hctx_idx);
@@ -4438,12 +4427,14 @@ static bool blk_mq_elv_switch_none(struct list_head *head,
if (!qe)
return false;
+ /* q->elevator needs protection from ->sysfs_lock */
+ mutex_lock(&q->sysfs_lock);
+
INIT_LIST_HEAD(&qe->node);
qe->q = q;
qe->type = q->elevator->type;
list_add(&qe->node, head);
- mutex_lock(&q->sysfs_lock);
/*
* After elevator_switch_mq, the previous elevator_queue will be
* released by elevator_release. The reference of the io scheduler
diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c
index 70ff2a599ef6..8f7c745b4a57 100644
--- a/block/kyber-iosched.c
+++ b/block/kyber-iosched.c
@@ -421,6 +421,8 @@ static int kyber_init_sched(struct request_queue *q, struct elevator_type *e)
blk_stat_enable_accounting(q);
+ blk_queue_flag_clear(QUEUE_FLAG_SQ_SCHED, q);
+
eq->elevator_data = kqd;
q->elevator = eq;
@@ -1033,7 +1035,6 @@ static struct elevator_type kyber_sched = {
#endif
.elevator_attrs = kyber_sched_attrs,
.elevator_name = "kyber",
- .elevator_features = ELEVATOR_F_MQ_AWARE,
.elevator_owner = THIS_MODULE,
};
diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index 6ed602b2f80a..1a9e835e816c 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -642,6 +642,9 @@ static int dd_init_sched(struct request_queue *q, struct elevator_type *e)
spin_lock_init(&dd->lock);
spin_lock_init(&dd->zone_lock);
+ /* We dispatch from request queue wide instead of hw queue */
+ blk_queue_flag_set(QUEUE_FLAG_SQ_SCHED, q);
+
q->elevator = eq;
return 0;
diff --git a/certs/.gitignore b/certs/.gitignore
index 56637aceaf81..cec5465f31c1 100644
--- a/certs/.gitignore
+++ b/certs/.gitignore
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
-/blacklist_hashes_checked
+/blacklist_hash_list
/extract-cert
/x509_certificate_list
/x509_revocation_list
diff --git a/certs/Makefile b/certs/Makefile
index cb1a9da3fc58..a8d628fd5f7b 100644
--- a/certs/Makefile
+++ b/certs/Makefile
@@ -7,22 +7,22 @@ obj-$(CONFIG_SYSTEM_TRUSTED_KEYRING) += system_keyring.o system_certificates.o c
obj-$(CONFIG_SYSTEM_BLACKLIST_KEYRING) += blacklist.o common.o
obj-$(CONFIG_SYSTEM_REVOCATION_LIST) += revocation_certificates.o
ifneq ($(CONFIG_SYSTEM_BLACKLIST_HASH_LIST),)
-quiet_cmd_check_blacklist_hashes = CHECK $(patsubst "%",%,$(2))
- cmd_check_blacklist_hashes = $(AWK) -f $(srctree)/scripts/check-blacklist-hashes.awk $(2); touch $@
-$(eval $(call config_filename,SYSTEM_BLACKLIST_HASH_LIST))
+$(obj)/blacklist_hashes.o: $(obj)/blacklist_hash_list
+CFLAGS_blacklist_hashes.o := -I $(obj)
-$(obj)/blacklist_hashes.o: $(obj)/blacklist_hashes_checked
+quiet_cmd_check_and_copy_blacklist_hash_list = GEN $@
+ cmd_check_and_copy_blacklist_hash_list = \
+ $(AWK) -f $(srctree)/scripts/check-blacklist-hashes.awk $(CONFIG_SYSTEM_BLACKLIST_HASH_LIST) >&2; \
+ cat $(CONFIG_SYSTEM_BLACKLIST_HASH_LIST) > $@
-CFLAGS_blacklist_hashes.o += -I$(srctree)
-
-targets += blacklist_hashes_checked
-$(obj)/blacklist_hashes_checked: $(SYSTEM_BLACKLIST_HASH_LIST_SRCPREFIX)$(SYSTEM_BLACKLIST_HASH_LIST_FILENAME) scripts/check-blacklist-hashes.awk FORCE
- $(call if_changed,check_blacklist_hashes,$(SYSTEM_BLACKLIST_HASH_LIST_SRCPREFIX)$(CONFIG_SYSTEM_BLACKLIST_HASH_LIST))
+$(obj)/blacklist_hash_list: $(CONFIG_SYSTEM_BLACKLIST_HASH_LIST) FORCE
+ $(call if_changed,check_and_copy_blacklist_hash_list)
obj-$(CONFIG_SYSTEM_BLACKLIST_KEYRING) += blacklist_hashes.o
else
obj-$(CONFIG_SYSTEM_BLACKLIST_KEYRING) += blacklist_nohashes.o
endif
+targets += blacklist_hash_list
quiet_cmd_extract_certs = CERT $@
cmd_extract_certs = $(obj)/extract-cert $(extract-cert-in) $@
@@ -33,7 +33,7 @@ $(obj)/system_certificates.o: $(obj)/x509_certificate_list
$(obj)/x509_certificate_list: $(CONFIG_SYSTEM_TRUSTED_KEYS) $(obj)/extract-cert FORCE
$(call if_changed,extract_certs)
-targets += x509_certificate_list blacklist_hashes_checked
+targets += x509_certificate_list
# If module signing is requested, say by allyesconfig, but a key has not been
# supplied, then one will need to be generated to make sure the build does not
diff --git a/certs/blacklist_hashes.c b/certs/blacklist_hashes.c
index 344892337be0..86d66fe11348 100644
--- a/certs/blacklist_hashes.c
+++ b/certs/blacklist_hashes.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include "blacklist.h"
-const char __initdata *const blacklist_hashes[] = {
-#include CONFIG_SYSTEM_BLACKLIST_HASH_LIST
+const char __initconst *const blacklist_hashes[] = {
+#include "blacklist_hash_list"
, NULL
};
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 19197469cfab..1d44893a997b 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -15,6 +15,7 @@ source "crypto/async_tx/Kconfig"
#
menuconfig CRYPTO
tristate "Cryptographic API"
+ select LIB_MEMNEQ
help
This option provides the core Cryptographic API.
diff --git a/crypto/Makefile b/crypto/Makefile
index 43bc33e247d1..ceaaa9f34145 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -4,7 +4,7 @@
#
obj-$(CONFIG_CRYPTO) += crypto.o
-crypto-y := api.o cipher.o compress.o memneq.o
+crypto-y := api.o cipher.o compress.o
obj-$(CONFIG_CRYPTO_ENGINE) += crypto_engine.o
obj-$(CONFIG_CRYPTO_FIPS) += fips.o
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index 2ef23fce0860..a97776ea9d99 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -564,6 +564,12 @@ ssize_t __weak cpu_show_srbds(struct device *dev,
return sysfs_emit(buf, "Not affected\n");
}
+ssize_t __weak cpu_show_mmio_stale_data(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "Not affected\n");
+}
+
static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
@@ -573,6 +579,7 @@ static DEVICE_ATTR(mds, 0444, cpu_show_mds, NULL);
static DEVICE_ATTR(tsx_async_abort, 0444, cpu_show_tsx_async_abort, NULL);
static DEVICE_ATTR(itlb_multihit, 0444, cpu_show_itlb_multihit, NULL);
static DEVICE_ATTR(srbds, 0444, cpu_show_srbds, NULL);
+static DEVICE_ATTR(mmio_stale_data, 0444, cpu_show_mmio_stale_data, NULL);
static struct attribute *cpu_root_vulnerabilities_attrs[] = {
&dev_attr_meltdown.attr,
@@ -584,6 +591,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = {
&dev_attr_tsx_async_abort.attr,
&dev_attr_itlb_multihit.attr,
&dev_attr_srbds.attr,
+ &dev_attr_mmio_stale_data.attr,
NULL
};
diff --git a/drivers/base/init.c b/drivers/base/init.c
index d8d0fe687111..397eb9880cec 100644
--- a/drivers/base/init.c
+++ b/drivers/base/init.c
@@ -8,6 +8,7 @@
#include <linux/init.h>
#include <linux/memory.h>
#include <linux/of.h>
+#include <linux/backing-dev.h>
#include "base.h"
@@ -20,6 +21,7 @@
void __init driver_init(void)
{
/* These are the core pieces */
+ bdi_init(&noop_backing_dev_info);
devtmpfs_init();
devices_init();
buses_init();
diff --git a/drivers/char/lp.c b/drivers/char/lp.c
index 0e22e3b0a04e..38aad99ebb61 100644
--- a/drivers/char/lp.c
+++ b/drivers/char/lp.c
@@ -1019,7 +1019,7 @@ static struct parport_driver lp_driver = {
static int __init lp_init(void)
{
- int i, err = 0;
+ int i, err;
if (parport_nr[0] == LP_PARPORT_OFF)
return 0;
diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c
index ff188ab68496..bb47610bbd1c 100644
--- a/drivers/clocksource/hyperv_timer.c
+++ b/drivers/clocksource/hyperv_timer.c
@@ -565,4 +565,3 @@ void __init hv_init_clocksource(void)
hv_sched_clock_offset = hv_read_reference_counter();
hv_setup_sched_clock(read_hv_sched_clock_msr);
}
-EXPORT_SYMBOL_GPL(hv_init_clocksource);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 801f6fa692e9..6de63ea6687e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -642,7 +642,6 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
atomic64_read(&adev->visible_pin_size),
vram_gtt.vram_size);
vram_gtt.gtt_size = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT)->size;
- vram_gtt.gtt_size *= PAGE_SIZE;
vram_gtt.gtt_size -= atomic64_read(&adev->gart_pin_size);
return copy_to_user(out, &vram_gtt,
min((size_t)size, sizeof(vram_gtt))) ? -EFAULT : 0;
@@ -675,7 +674,6 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
mem.cpu_accessible_vram.usable_heap_size * 3 / 4;
mem.gtt.total_heap_size = gtt_man->size;
- mem.gtt.total_heap_size *= PAGE_SIZE;
mem.gtt.usable_heap_size = mem.gtt.total_heap_size -
atomic64_read(&adev->gart_pin_size);
mem.gtt.heap_usage = ttm_resource_manager_usage(gtt_man);
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 70be67a56673..39b425d83bb1 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -2812,7 +2812,7 @@ static struct drm_mode_config_helper_funcs amdgpu_dm_mode_config_helperfuncs = {
static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector)
{
- u32 max_cll, min_cll, max, min, q, r;
+ u32 max_avg, min_cll, max, min, q, r;
struct amdgpu_dm_backlight_caps *caps;
struct amdgpu_display_manager *dm;
struct drm_connector *conn_base;
@@ -2842,7 +2842,7 @@ static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector)
caps = &dm->backlight_caps[i];
caps->ext_caps = &aconnector->dc_link->dpcd_sink_ext_caps;
caps->aux_support = false;
- max_cll = conn_base->hdr_sink_metadata.hdmi_type1.max_cll;
+ max_avg = conn_base->hdr_sink_metadata.hdmi_type1.max_fall;
min_cll = conn_base->hdr_sink_metadata.hdmi_type1.min_cll;
if (caps->ext_caps->bits.oled == 1 /*||
@@ -2870,8 +2870,8 @@ static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector)
* The results of the above expressions can be verified at
* pre_computed_values.
*/
- q = max_cll >> 5;
- r = max_cll % 32;
+ q = max_avg >> 5;
+ r = max_avg % 32;
max = (1 << q) * pre_computed_values[r];
// min luminance: maxLum * (CV/255)^2 / 100
diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c
index 424ea23eec32..16c539657f73 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c
@@ -177,15 +177,15 @@ static struct exynos_drm_driver_info exynos_drm_drivers[] = {
DRV_PTR(mixer_driver, CONFIG_DRM_EXYNOS_MIXER),
DRM_COMPONENT_DRIVER
}, {
- DRV_PTR(mic_driver, CONFIG_DRM_EXYNOS_MIC),
- DRM_COMPONENT_DRIVER
- }, {
DRV_PTR(dp_driver, CONFIG_DRM_EXYNOS_DP),
DRM_COMPONENT_DRIVER
}, {
DRV_PTR(dsi_driver, CONFIG_DRM_EXYNOS_DSI),
DRM_COMPONENT_DRIVER
}, {
+ DRV_PTR(mic_driver, CONFIG_DRM_EXYNOS_MIC),
+ DRM_COMPONENT_DRIVER
+ }, {
DRV_PTR(hdmi_driver, CONFIG_DRM_EXYNOS_HDMI),
DRM_COMPONENT_DRIVER
}, {
diff --git a/drivers/gpu/drm/exynos/exynos_drm_mic.c b/drivers/gpu/drm/exynos/exynos_drm_mic.c
index 9e06f8e2a863..09ce28ee08d9 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_mic.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_mic.c
@@ -26,6 +26,7 @@
#include <drm/drm_print.h>
#include "exynos_drm_drv.h"
+#include "exynos_drm_crtc.h"
/* Sysreg registers for MIC */
#define DSD_CFG_MUX 0x1004
@@ -100,9 +101,7 @@ struct exynos_mic {
bool i80_mode;
struct videomode vm;
- struct drm_encoder *encoder;
struct drm_bridge bridge;
- struct drm_bridge *next_bridge;
bool enabled;
};
@@ -229,8 +228,6 @@ static void mic_set_reg_on(struct exynos_mic *mic, bool enable)
writel(reg, mic->reg + MIC_OP);
}
-static void mic_disable(struct drm_bridge *bridge) { }
-
static void mic_post_disable(struct drm_bridge *bridge)
{
struct exynos_mic *mic = bridge->driver_private;
@@ -297,34 +294,30 @@ unlock:
mutex_unlock(&mic_mutex);
}
-static void mic_enable(struct drm_bridge *bridge) { }
-
-static int mic_attach(struct drm_bridge *bridge,
- enum drm_bridge_attach_flags flags)
-{
- struct exynos_mic *mic = bridge->driver_private;
-
- return drm_bridge_attach(bridge->encoder, mic->next_bridge,
- &mic->bridge, flags);
-}
-
static const struct drm_bridge_funcs mic_bridge_funcs = {
- .disable = mic_disable,
.post_disable = mic_post_disable,
.mode_set = mic_mode_set,
.pre_enable = mic_pre_enable,
- .enable = mic_enable,
- .attach = mic_attach,
};
static int exynos_mic_bind(struct device *dev, struct device *master,
void *data)
{
struct exynos_mic *mic = dev_get_drvdata(dev);
+ struct drm_device *drm_dev = data;
+ struct exynos_drm_crtc *crtc = exynos_drm_crtc_get_by_type(drm_dev,
+ EXYNOS_DISPLAY_TYPE_LCD);
+ struct drm_encoder *e, *encoder = NULL;
+
+ drm_for_each_encoder(e, drm_dev)
+ if (e->possible_crtcs == drm_crtc_mask(&crtc->base))
+ encoder = e;
+ if (!encoder)
+ return -ENODEV;
mic->bridge.driver_private = mic;
- return 0;
+ return drm_bridge_attach(encoder, &mic->bridge, NULL, 0);
}
static void exynos_mic_unbind(struct device *dev, struct device *master,
@@ -388,7 +381,6 @@ static int exynos_mic_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
struct exynos_mic *mic;
- struct device_node *remote;
struct resource res;
int ret, i;
@@ -432,16 +424,6 @@ static int exynos_mic_probe(struct platform_device *pdev)
}
}
- remote = of_graph_get_remote_node(dev->of_node, 1, 0);
- mic->next_bridge = of_drm_find_bridge(remote);
- if (IS_ERR(mic->next_bridge)) {
- DRM_DEV_ERROR(dev, "mic: Failed to find next bridge\n");
- ret = PTR_ERR(mic->next_bridge);
- goto err;
- }
-
- of_node_put(remote);
-
platform_set_drvdata(pdev, mic);
mic->bridge.funcs = &mic_bridge_funcs;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index c326bd2b444f..30fe847c6664 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -999,7 +999,8 @@ static int eb_validate_vmas(struct i915_execbuffer *eb)
}
}
- err = dma_resv_reserve_fences(vma->obj->base.resv, 1);
+ /* Reserve enough slots to accommodate composite fences */
+ err = dma_resv_reserve_fences(vma->obj->base.resv, eb->num_batches);
if (err)
return err;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index 53307ca0eed0..51a0fe60c050 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -785,6 +785,7 @@ void intel_gt_driver_unregister(struct intel_gt *gt)
{
intel_wakeref_t wakeref;
+ intel_gt_sysfs_unregister(gt);
intel_rps_driver_unregister(&gt->rps);
intel_gsc_fini(&gt->gsc);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c
index 8ec8bc660c8c..9e4ebf53379b 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c
@@ -24,7 +24,7 @@ bool is_object_gt(struct kobject *kobj)
static struct intel_gt *kobj_to_gt(struct kobject *kobj)
{
- return container_of(kobj, struct kobj_gt, base)->gt;
+ return container_of(kobj, struct intel_gt, sysfs_gt);
}
struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev,
@@ -72,9 +72,9 @@ static struct attribute *id_attrs[] = {
};
ATTRIBUTE_GROUPS(id);
+/* A kobject needs a release() method even if it does nothing */
static void kobj_gt_release(struct kobject *kobj)
{
- kfree(kobj);
}
static struct kobj_type kobj_gt_type = {
@@ -85,8 +85,6 @@ static struct kobj_type kobj_gt_type = {
void intel_gt_sysfs_register(struct intel_gt *gt)
{
- struct kobj_gt *kg;
-
/*
* We need to make things right with the
* ABI compatibility. The files were originally
@@ -98,25 +96,22 @@ void intel_gt_sysfs_register(struct intel_gt *gt)
if (gt_is_root(gt))
intel_gt_sysfs_pm_init(gt, gt_get_parent_obj(gt));
- kg = kzalloc(sizeof(*kg), GFP_KERNEL);
- if (!kg)
+ /* init and xfer ownership to sysfs tree */
+ if (kobject_init_and_add(&gt->sysfs_gt, &kobj_gt_type,
+ gt->i915->sysfs_gt, "gt%d", gt->info.id))
goto exit_fail;
- kobject_init(&kg->base, &kobj_gt_type);
- kg->gt = gt;
-
- /* xfer ownership to sysfs tree */
- if (kobject_add(&kg->base, gt->i915->sysfs_gt, "gt%d", gt->info.id))
- goto exit_kobj_put;
-
- intel_gt_sysfs_pm_init(gt, &kg->base);
+ intel_gt_sysfs_pm_init(gt, &gt->sysfs_gt);
return;
-exit_kobj_put:
- kobject_put(&kg->base);
-
exit_fail:
+ kobject_put(&gt->sysfs_gt);
drm_warn(&gt->i915->drm,
"failed to initialize gt%d sysfs root\n", gt->info.id);
}
+
+void intel_gt_sysfs_unregister(struct intel_gt *gt)
+{
+ kobject_put(&gt->sysfs_gt);
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h
index 9471b26752cf..a99aa7e8b01a 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h
@@ -13,11 +13,6 @@
struct intel_gt;
-struct kobj_gt {
- struct kobject base;
- struct intel_gt *gt;
-};
-
bool is_object_gt(struct kobject *kobj);
struct drm_i915_private *kobj_to_i915(struct kobject *kobj);
@@ -28,6 +23,7 @@ intel_gt_create_kobj(struct intel_gt *gt,
const char *name);
void intel_gt_sysfs_register(struct intel_gt *gt);
+void intel_gt_sysfs_unregister(struct intel_gt *gt);
struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev,
const char *name);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index b06611c1d4ad..edd7a3cf5f5f 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -224,6 +224,9 @@ struct intel_gt {
} mocs;
struct intel_pxp pxp;
+
+ /* gt/gtN sysfs */
+ struct kobject sysfs_gt;
};
enum intel_gt_scratch_field {
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
index d078f884b5e3..f0d7b57b741e 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
@@ -156,7 +156,7 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw)
[INTEL_UC_FW_TYPE_GUC] = { blobs_guc, ARRAY_SIZE(blobs_guc) },
[INTEL_UC_FW_TYPE_HUC] = { blobs_huc, ARRAY_SIZE(blobs_huc) },
};
- static const struct uc_fw_platform_requirement *fw_blobs;
+ const struct uc_fw_platform_requirement *fw_blobs;
enum intel_platform p = INTEL_INFO(i915)->platform;
u32 fw_count;
u8 rev = INTEL_REVID(i915);
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index 8521daba212a..1e2750210831 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -166,7 +166,14 @@ static ssize_t error_state_read(struct file *filp, struct kobject *kobj,
struct device *kdev = kobj_to_dev(kobj);
struct drm_i915_private *i915 = kdev_minor_to_i915(kdev);
struct i915_gpu_coredump *gpu;
- ssize_t ret;
+ ssize_t ret = 0;
+
+ /*
+ * FIXME: Concurrent clients triggering resets and reading + clearing
+ * dumps can cause inconsistent sysfs reads when a user calls in with a
+ * non-zero offset to complete a prior partial read but the
+ * gpu_coredump has been cleared or replaced.
+ */
gpu = i915_first_error_state(i915);
if (IS_ERR(gpu)) {
@@ -178,8 +185,10 @@ static ssize_t error_state_read(struct file *filp, struct kobject *kobj,
const char *str = "No error state collected\n";
size_t len = strlen(str);
- ret = min_t(size_t, count, len - off);
- memcpy(buf, str + off, ret);
+ if (off < len) {
+ ret = min_t(size_t, count, len - off);
+ memcpy(buf, str + off, ret);
+ }
}
return ret;
@@ -259,4 +268,6 @@ void i915_teardown_sysfs(struct drm_i915_private *dev_priv)
device_remove_bin_file(kdev, &dpf_attrs_1);
device_remove_bin_file(kdev, &dpf_attrs);
+
+ kobject_put(dev_priv->sysfs_gt);
}
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 4f6db539571a..0bffb70b3c5f 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -23,6 +23,7 @@
*/
#include <linux/sched/mm.h>
+#include <linux/dma-fence-array.h>
#include <drm/drm_gem.h>
#include "display/intel_frontbuffer.h"
@@ -1823,6 +1824,21 @@ int _i915_vma_move_to_active(struct i915_vma *vma,
if (unlikely(err))
return err;
+ /*
+ * Reserve fences slot early to prevent an allocation after preparing
+ * the workload and associating fences with dma_resv.
+ */
+ if (fence && !(flags & __EXEC_OBJECT_NO_RESERVE)) {
+ struct dma_fence *curr;
+ int idx;
+
+ dma_fence_array_for_each(curr, idx, fence)
+ ;
+ err = dma_resv_reserve_fences(vma->obj->base.resv, idx);
+ if (unlikely(err))
+ return err;
+ }
+
if (flags & EXEC_OBJECT_WRITE) {
struct intel_frontbuffer *front;
@@ -1832,31 +1848,23 @@ int _i915_vma_move_to_active(struct i915_vma *vma,
i915_active_add_request(&front->write, rq);
intel_frontbuffer_put(front);
}
+ }
- if (!(flags & __EXEC_OBJECT_NO_RESERVE)) {
- err = dma_resv_reserve_fences(vma->obj->base.resv, 1);
- if (unlikely(err))
- return err;
- }
+ if (fence) {
+ struct dma_fence *curr;
+ enum dma_resv_usage usage;
+ int idx;
- if (fence) {
- dma_resv_add_fence(vma->obj->base.resv, fence,
- DMA_RESV_USAGE_WRITE);
+ obj->read_domains = 0;
+ if (flags & EXEC_OBJECT_WRITE) {
+ usage = DMA_RESV_USAGE_WRITE;
obj->write_domain = I915_GEM_DOMAIN_RENDER;
- obj->read_domains = 0;
- }
- } else {
- if (!(flags & __EXEC_OBJECT_NO_RESERVE)) {
- err = dma_resv_reserve_fences(vma->obj->base.resv, 1);
- if (unlikely(err))
- return err;
+ } else {
+ usage = DMA_RESV_USAGE_READ;
}
- if (fence) {
- dma_resv_add_fence(vma->obj->base.resv, fence,
- DMA_RESV_USAGE_READ);
- obj->write_domain = 0;
- }
+ dma_fence_array_for_each(curr, idx, fence)
+ dma_resv_add_fence(vma->obj->base.resv, curr, usage);
}
if (flags & EXEC_OBJECT_NEEDS_FENCE && vma->fence)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 75d308ec173d..406e9c324e76 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -109,11 +109,11 @@ void ttm_bo_set_bulk_move(struct ttm_buffer_object *bo,
return;
spin_lock(&bo->bdev->lru_lock);
- if (bo->bulk_move && bo->resource)
- ttm_lru_bulk_move_del(bo->bulk_move, bo->resource);
+ if (bo->resource)
+ ttm_resource_del_bulk_move(bo->resource, bo);
bo->bulk_move = bulk;
- if (bo->bulk_move && bo->resource)
- ttm_lru_bulk_move_add(bo->bulk_move, bo->resource);
+ if (bo->resource)
+ ttm_resource_add_bulk_move(bo->resource, bo);
spin_unlock(&bo->bdev->lru_lock);
}
EXPORT_SYMBOL(ttm_bo_set_bulk_move);
@@ -689,8 +689,11 @@ void ttm_bo_pin(struct ttm_buffer_object *bo)
{
dma_resv_assert_held(bo->base.resv);
WARN_ON_ONCE(!kref_read(&bo->kref));
- if (!(bo->pin_count++) && bo->bulk_move && bo->resource)
- ttm_lru_bulk_move_del(bo->bulk_move, bo->resource);
+ spin_lock(&bo->bdev->lru_lock);
+ if (bo->resource)
+ ttm_resource_del_bulk_move(bo->resource, bo);
+ ++bo->pin_count;
+ spin_unlock(&bo->bdev->lru_lock);
}
EXPORT_SYMBOL(ttm_bo_pin);
@@ -707,8 +710,11 @@ void ttm_bo_unpin(struct ttm_buffer_object *bo)
if (WARN_ON_ONCE(!bo->pin_count))
return;
- if (!(--bo->pin_count) && bo->bulk_move && bo->resource)
- ttm_lru_bulk_move_add(bo->bulk_move, bo->resource);
+ spin_lock(&bo->bdev->lru_lock);
+ --bo->pin_count;
+ if (bo->resource)
+ ttm_resource_add_bulk_move(bo->resource, bo);
+ spin_unlock(&bo->bdev->lru_lock);
}
EXPORT_SYMBOL(ttm_bo_unpin);
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
index a0562ab386f5..e7147e304637 100644
--- a/drivers/gpu/drm/ttm/ttm_device.c
+++ b/drivers/gpu/drm/ttm/ttm_device.c
@@ -156,8 +156,12 @@ int ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
ttm_resource_manager_for_each_res(man, &cursor, res) {
struct ttm_buffer_object *bo = res->bo;
- uint32_t num_pages = PFN_UP(bo->base.size);
+ uint32_t num_pages;
+ if (!bo)
+ continue;
+
+ num_pages = PFN_UP(bo->base.size);
ret = ttm_bo_swapout(bo, ctx, gfp_flags);
/* ttm_bo_swapout has dropped the lru_lock */
if (!ret)
diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c
index 65889b3caf50..20f9adcc3235 100644
--- a/drivers/gpu/drm/ttm/ttm_resource.c
+++ b/drivers/gpu/drm/ttm/ttm_resource.c
@@ -91,8 +91,8 @@ static void ttm_lru_bulk_move_pos_tail(struct ttm_lru_bulk_move_pos *pos,
}
/* Add the resource to a bulk_move cursor */
-void ttm_lru_bulk_move_add(struct ttm_lru_bulk_move *bulk,
- struct ttm_resource *res)
+static void ttm_lru_bulk_move_add(struct ttm_lru_bulk_move *bulk,
+ struct ttm_resource *res)
{
struct ttm_lru_bulk_move_pos *pos = ttm_lru_bulk_move_pos(bulk, res);
@@ -105,8 +105,8 @@ void ttm_lru_bulk_move_add(struct ttm_lru_bulk_move *bulk,
}
/* Remove the resource from a bulk_move range */
-void ttm_lru_bulk_move_del(struct ttm_lru_bulk_move *bulk,
- struct ttm_resource *res)
+static void ttm_lru_bulk_move_del(struct ttm_lru_bulk_move *bulk,
+ struct ttm_resource *res)
{
struct ttm_lru_bulk_move_pos *pos = ttm_lru_bulk_move_pos(bulk, res);
@@ -122,6 +122,22 @@ void ttm_lru_bulk_move_del(struct ttm_lru_bulk_move *bulk,
}
}
+/* Add the resource to a bulk move if the BO is configured for it */
+void ttm_resource_add_bulk_move(struct ttm_resource *res,
+ struct ttm_buffer_object *bo)
+{
+ if (bo->bulk_move && !bo->pin_count)
+ ttm_lru_bulk_move_add(bo->bulk_move, res);
+}
+
+/* Remove the resource from a bulk move if the BO is configured for it */
+void ttm_resource_del_bulk_move(struct ttm_resource *res,
+ struct ttm_buffer_object *bo)
+{
+ if (bo->bulk_move && !bo->pin_count)
+ ttm_lru_bulk_move_del(bo->bulk_move, res);
+}
+
/* Move a resource to the LRU or bulk tail */
void ttm_resource_move_to_lru_tail(struct ttm_resource *res)
{
@@ -169,15 +185,14 @@ void ttm_resource_init(struct ttm_buffer_object *bo,
res->bus.is_iomem = false;
res->bus.caching = ttm_cached;
res->bo = bo;
- INIT_LIST_HEAD(&res->lru);
man = ttm_manager_type(bo->bdev, place->mem_type);
spin_lock(&bo->bdev->lru_lock);
- man->usage += res->num_pages << PAGE_SHIFT;
- if (bo->bulk_move)
- ttm_lru_bulk_move_add(bo->bulk_move, res);
+ if (bo->pin_count)
+ list_add_tail(&res->lru, &bo->bdev->pinned);
else
- ttm_resource_move_to_lru_tail(res);
+ list_add_tail(&res->lru, &man->lru[bo->priority]);
+ man->usage += res->num_pages << PAGE_SHIFT;
spin_unlock(&bo->bdev->lru_lock);
}
EXPORT_SYMBOL(ttm_resource_init);
@@ -210,8 +225,16 @@ int ttm_resource_alloc(struct ttm_buffer_object *bo,
{
struct ttm_resource_manager *man =
ttm_manager_type(bo->bdev, place->mem_type);
+ int ret;
+
+ ret = man->func->alloc(man, bo, place, res_ptr);
+ if (ret)
+ return ret;
- return man->func->alloc(man, bo, place, res_ptr);
+ spin_lock(&bo->bdev->lru_lock);
+ ttm_resource_add_bulk_move(*res_ptr, bo);
+ spin_unlock(&bo->bdev->lru_lock);
+ return 0;
}
void ttm_resource_free(struct ttm_buffer_object *bo, struct ttm_resource **res)
@@ -221,12 +244,9 @@ void ttm_resource_free(struct ttm_buffer_object *bo, struct ttm_resource **res)
if (!*res)
return;
- if (bo->bulk_move) {
- spin_lock(&bo->bdev->lru_lock);
- ttm_lru_bulk_move_del(bo->bulk_move, *res);
- spin_unlock(&bo->bdev->lru_lock);
- }
-
+ spin_lock(&bo->bdev->lru_lock);
+ ttm_resource_del_bulk_move(*res, bo);
+ spin_unlock(&bo->bdev->lru_lock);
man = ttm_manager_type(bo->bdev, (*res)->mem_type);
man->func->free(man, *res);
*res = NULL;
diff --git a/drivers/hid/hid-hyperv.c b/drivers/hid/hid-hyperv.c
index 978ee2aab2d4..e0bc73124196 100644
--- a/drivers/hid/hid-hyperv.c
+++ b/drivers/hid/hid-hyperv.c
@@ -199,7 +199,8 @@ static void mousevsc_on_receive_device_info(struct mousevsc_dev *input_device,
if (!input_device->hid_desc)
goto cleanup;
- input_device->report_desc_size = desc->desc[0].wDescriptorLength;
+ input_device->report_desc_size = le16_to_cpu(
+ desc->desc[0].wDescriptorLength);
if (input_device->report_desc_size == 0) {
input_device->dev_info_status = -EINVAL;
goto cleanup;
@@ -217,7 +218,7 @@ static void mousevsc_on_receive_device_info(struct mousevsc_dev *input_device,
memcpy(input_device->report_desc,
((unsigned char *)desc) + desc->bLength,
- desc->desc[0].wDescriptorLength);
+ le16_to_cpu(desc->desc[0].wDescriptorLength));
/* Send the ack */
memset(&ack, 0, sizeof(struct mousevsc_prt_msg));
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index b60f13481bdc..5b120402d405 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -21,6 +21,7 @@
#include <linux/cpu.h>
#include <linux/hyperv.h>
#include <asm/mshyperv.h>
+#include <linux/sched/isolation.h>
#include "hyperv_vmbus.h"
@@ -638,6 +639,7 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
*/
if (newchannel->offermsg.offer.sub_channel_index == 0) {
mutex_unlock(&vmbus_connection.channel_mutex);
+ cpus_read_unlock();
/*
* Don't call free_channel(), because newchannel->kobj
* is not initialized yet.
@@ -728,16 +730,20 @@ static void init_vp_index(struct vmbus_channel *channel)
u32 i, ncpu = num_online_cpus();
cpumask_var_t available_mask;
struct cpumask *allocated_mask;
+ const struct cpumask *hk_mask = housekeeping_cpumask(HK_TYPE_MANAGED_IRQ);
u32 target_cpu;
int numa_node;
if (!perf_chn ||
- !alloc_cpumask_var(&available_mask, GFP_KERNEL)) {
+ !alloc_cpumask_var(&available_mask, GFP_KERNEL) ||
+ cpumask_empty(hk_mask)) {
/*
* If the channel is not a performance critical
* channel, bind it to VMBUS_CONNECT_CPU.
* In case alloc_cpumask_var() fails, bind it to
* VMBUS_CONNECT_CPU.
+ * If all the cpus are isolated, bind it to
+ * VMBUS_CONNECT_CPU.
*/
channel->target_cpu = VMBUS_CONNECT_CPU;
if (perf_chn)
@@ -758,17 +764,19 @@ static void init_vp_index(struct vmbus_channel *channel)
}
allocated_mask = &hv_context.hv_numa_map[numa_node];
- if (cpumask_equal(allocated_mask, cpumask_of_node(numa_node))) {
+retry:
+ cpumask_xor(available_mask, allocated_mask, cpumask_of_node(numa_node));
+ cpumask_and(available_mask, available_mask, hk_mask);
+
+ if (cpumask_empty(available_mask)) {
/*
* We have cycled through all the CPUs in the node;
* reset the allocated map.
*/
cpumask_clear(allocated_mask);
+ goto retry;
}
- cpumask_xor(available_mask, allocated_mask,
- cpumask_of_node(numa_node));
-
target_cpu = cpumask_first(available_mask);
cpumask_set_cpu(target_cpu, allocated_mask);
diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c
index c698592b83e4..d35b60c06114 100644
--- a/drivers/hv/hv_kvp.c
+++ b/drivers/hv/hv_kvp.c
@@ -394,7 +394,7 @@ kvp_send_key(struct work_struct *dummy)
in_msg = kvp_transaction.kvp_msg;
/*
- * The key/value strings sent from the host are encoded in
+ * The key/value strings sent from the host are encoded
* in utf16; convert it to utf8 strings.
* The host assures us that the utf16 strings will not exceed
* the max lengths specified. We will however, reserve room
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 714d549b7b46..547ae334e5cd 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -21,6 +21,7 @@
#include <linux/kernel_stat.h>
#include <linux/clockchips.h>
#include <linux/cpu.h>
+#include <linux/sched/isolation.h>
#include <linux/sched/task_stack.h>
#include <linux/delay.h>
@@ -1770,6 +1771,9 @@ static ssize_t target_cpu_store(struct vmbus_channel *channel,
if (target_cpu >= nr_cpumask_bits)
return -EINVAL;
+ if (!cpumask_test_cpu(target_cpu, housekeeping_cpumask(HK_TYPE_MANAGED_IRQ)))
+ return -EINVAL;
+
/* No CPUs should come up or down during this. */
cpus_read_lock();
diff --git a/drivers/hwmon/asus-ec-sensors.c b/drivers/hwmon/asus-ec-sensors.c
index 57e11b2bab74..3633ab691662 100644
--- a/drivers/hwmon/asus-ec-sensors.c
+++ b/drivers/hwmon/asus-ec-sensors.c
@@ -259,7 +259,7 @@ static const struct ec_board_info board_info[] = {
},
{
.board_names = {
- "ROG CROSSHAIR VIII FORMULA"
+ "ROG CROSSHAIR VIII FORMULA",
"ROG CROSSHAIR VIII HERO",
"ROG CROSSHAIR VIII HERO (WI-FI)",
},
diff --git a/drivers/hwmon/occ/common.c b/drivers/hwmon/occ/common.c
index d78f4bebc718..ea070b91e5b9 100644
--- a/drivers/hwmon/occ/common.c
+++ b/drivers/hwmon/occ/common.c
@@ -1228,10 +1228,15 @@ EXPORT_SYMBOL_GPL(occ_setup);
void occ_shutdown(struct occ *occ)
{
+ mutex_lock(&occ->lock);
+
occ_shutdown_sysfs(occ);
if (occ->hwmon)
hwmon_device_unregister(occ->hwmon);
+ occ->hwmon = NULL;
+
+ mutex_unlock(&occ->lock);
}
EXPORT_SYMBOL_GPL(occ_shutdown);
diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c
index 06f328928a7f..2dda05aada23 100644
--- a/drivers/md/dm-log.c
+++ b/drivers/md/dm-log.c
@@ -415,8 +415,7 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti,
/*
* Work out how many "unsigned long"s we need to hold the bitset.
*/
- bitset_size = dm_round_up(region_count,
- sizeof(*lc->clean_bits) << BYTE_SHIFT);
+ bitset_size = dm_round_up(region_count, BITS_PER_LONG);
bitset_size >>= BYTE_SHIFT;
lc->bitset_uint32_count = bitset_size / sizeof(*lc->clean_bits);
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 5e41fbae3f6b..9526ccbedafb 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -3725,7 +3725,7 @@ static int raid_message(struct dm_target *ti, unsigned int argc, char **argv,
if (!strcasecmp(argv[0], "idle") || !strcasecmp(argv[0], "frozen")) {
if (mddev->sync_thread) {
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
- md_reap_sync_thread(mddev, false);
+ md_reap_sync_thread(mddev);
}
} else if (decipher_sync_action(mddev, mddev->recovery) != st_idle)
return -EBUSY;
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index d8f16183bf27..b6b25d319ef7 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -555,6 +555,10 @@ static void dm_start_io_acct(struct dm_io *io, struct bio *clone)
unsigned long flags;
/* Can afford locking given DM_TIO_IS_DUPLICATE_BIO */
spin_lock_irqsave(&io->lock, flags);
+ if (dm_io_flagged(io, DM_IO_ACCOUNTED)) {
+ spin_unlock_irqrestore(&io->lock, flags);
+ return;
+ }
dm_io_set_flag(io, DM_IO_ACCOUNTED);
spin_unlock_irqrestore(&io->lock, flags);
}
@@ -711,18 +715,18 @@ static void dm_put_live_table_fast(struct mapped_device *md) __releases(RCU)
}
static inline struct dm_table *dm_get_live_table_bio(struct mapped_device *md,
- int *srcu_idx, struct bio *bio)
+ int *srcu_idx, unsigned bio_opf)
{
- if (bio->bi_opf & REQ_NOWAIT)
+ if (bio_opf & REQ_NOWAIT)
return dm_get_live_table_fast(md);
else
return dm_get_live_table(md, srcu_idx);
}
static inline void dm_put_live_table_bio(struct mapped_device *md, int srcu_idx,
- struct bio *bio)
+ unsigned bio_opf)
{
- if (bio->bi_opf & REQ_NOWAIT)
+ if (bio_opf & REQ_NOWAIT)
dm_put_live_table_fast(md);
else
dm_put_live_table(md, srcu_idx);
@@ -1609,7 +1613,12 @@ static blk_status_t __split_and_process_bio(struct clone_info *ci)
ti = dm_table_find_target(ci->map, ci->sector);
if (unlikely(!ti))
return BLK_STS_IOERR;
- else if (unlikely(ci->is_abnormal_io))
+
+ if (unlikely((ci->bio->bi_opf & REQ_NOWAIT) != 0) &&
+ unlikely(!dm_target_supports_nowait(ti->type)))
+ return BLK_STS_NOTSUPP;
+
+ if (unlikely(ci->is_abnormal_io))
return __process_abnormal_io(ci, ti);
/*
@@ -1711,8 +1720,9 @@ static void dm_submit_bio(struct bio *bio)
struct mapped_device *md = bio->bi_bdev->bd_disk->private_data;
int srcu_idx;
struct dm_table *map;
+ unsigned bio_opf = bio->bi_opf;
- map = dm_get_live_table_bio(md, &srcu_idx, bio);
+ map = dm_get_live_table_bio(md, &srcu_idx, bio_opf);
/* If suspended, or map not yet available, queue this IO for later */
if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) ||
@@ -1728,7 +1738,7 @@ static void dm_submit_bio(struct bio *bio)
dm_split_and_process_bio(md, map, bio);
out:
- dm_put_live_table_bio(md, srcu_idx, bio);
+ dm_put_live_table_bio(md, srcu_idx, bio_opf);
}
static bool dm_poll_dm_io(struct dm_io *io, struct io_comp_batch *iob,
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 8273ac5eef06..c7ecb0bffda0 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -4831,7 +4831,7 @@ action_store(struct mddev *mddev, const char *page, size_t len)
flush_workqueue(md_misc_wq);
if (mddev->sync_thread) {
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
- md_reap_sync_thread(mddev, true);
+ md_reap_sync_thread(mddev);
}
mddev_unlock(mddev);
}
@@ -6197,7 +6197,7 @@ static void __md_stop_writes(struct mddev *mddev)
flush_workqueue(md_misc_wq);
if (mddev->sync_thread) {
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
- md_reap_sync_thread(mddev, true);
+ md_reap_sync_thread(mddev);
}
del_timer_sync(&mddev->safemode_timer);
@@ -9303,7 +9303,7 @@ void md_check_recovery(struct mddev *mddev)
* ->spare_active and clear saved_raid_disk
*/
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
- md_reap_sync_thread(mddev, true);
+ md_reap_sync_thread(mddev);
clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
@@ -9338,7 +9338,7 @@ void md_check_recovery(struct mddev *mddev)
goto unlock;
}
if (mddev->sync_thread) {
- md_reap_sync_thread(mddev, true);
+ md_reap_sync_thread(mddev);
goto unlock;
}
/* Set RUNNING before clearing NEEDED to avoid
@@ -9411,18 +9411,14 @@ void md_check_recovery(struct mddev *mddev)
}
EXPORT_SYMBOL(md_check_recovery);
-void md_reap_sync_thread(struct mddev *mddev, bool reconfig_mutex_held)
+void md_reap_sync_thread(struct mddev *mddev)
{
struct md_rdev *rdev;
sector_t old_dev_sectors = mddev->dev_sectors;
bool is_reshaped = false;
- if (reconfig_mutex_held)
- mddev_unlock(mddev);
/* resync has finished, collect result */
md_unregister_thread(&mddev->sync_thread);
- if (reconfig_mutex_held)
- mddev_lock_nointr(mddev);
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
mddev->degraded != mddev->raid_disks) {
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 5f62c46ac2d3..cf2cbb17acbd 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -719,7 +719,7 @@ extern struct md_thread *md_register_thread(
extern void md_unregister_thread(struct md_thread **threadp);
extern void md_wakeup_thread(struct md_thread *thread);
extern void md_check_recovery(struct mddev *mddev);
-extern void md_reap_sync_thread(struct mddev *mddev, bool reconfig_mutex_held);
+extern void md_reap_sync_thread(struct mddev *mddev);
extern int mddev_init_writes_pending(struct mddev *mddev);
extern bool md_write_start(struct mddev *mddev, struct bio *bi);
extern void md_write_inc(struct mddev *mddev, struct bio *bi);
diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c
index 973e2e06f19c..0a2e4806b1ec 100644
--- a/drivers/md/raid5-ppl.c
+++ b/drivers/md/raid5-ppl.c
@@ -629,9 +629,9 @@ static void ppl_do_flush(struct ppl_io_unit *io)
if (bdev) {
struct bio *bio;
- bio = bio_alloc_bioset(bdev, 0, GFP_NOIO,
+ bio = bio_alloc_bioset(bdev, 0,
REQ_OP_WRITE | REQ_PREFLUSH,
- &ppl_conf->flush_bs);
+ GFP_NOIO, &ppl_conf->flush_bs);
bio->bi_private = io;
bio->bi_end_io = ppl_flush_endio;
diff --git a/drivers/misc/atmel-ssc.c b/drivers/misc/atmel-ssc.c
index d6cd5537126c..69f9b0336410 100644
--- a/drivers/misc/atmel-ssc.c
+++ b/drivers/misc/atmel-ssc.c
@@ -232,9 +232,9 @@ static int ssc_probe(struct platform_device *pdev)
clk_disable_unprepare(ssc->clk);
ssc->irq = platform_get_irq(pdev, 0);
- if (!ssc->irq) {
+ if (ssc->irq < 0) {
dev_dbg(&pdev->dev, "could not get irq\n");
- return -ENXIO;
+ return ssc->irq;
}
mutex_lock(&user_lock);
diff --git a/drivers/misc/eeprom/at25.c b/drivers/misc/eeprom/at25.c
index 8d169a35cf13..c9c56fd194c1 100644
--- a/drivers/misc/eeprom/at25.c
+++ b/drivers/misc/eeprom/at25.c
@@ -79,6 +79,11 @@ static int at25_ee_read(void *priv, unsigned int offset,
{
struct at25_data *at25 = priv;
char *buf = val;
+ size_t max_chunk = spi_max_transfer_size(at25->spi);
+ size_t num_msgs = DIV_ROUND_UP(count, max_chunk);
+ size_t nr_bytes = 0;
+ unsigned int msg_offset;
+ size_t msg_count;
u8 *cp;
ssize_t status;
struct spi_transfer t[2];
@@ -92,54 +97,59 @@ static int at25_ee_read(void *priv, unsigned int offset,
if (unlikely(!count))
return -EINVAL;
- cp = at25->command;
+ msg_offset = (unsigned int)offset;
+ msg_count = min(count, max_chunk);
+ while (num_msgs) {
+ cp = at25->command;
- instr = AT25_READ;
- if (at25->chip.flags & EE_INSTR_BIT3_IS_ADDR)
- if (offset >= BIT(at25->addrlen * 8))
- instr |= AT25_INSTR_BIT3;
+ instr = AT25_READ;
+ if (at25->chip.flags & EE_INSTR_BIT3_IS_ADDR)
+ if (msg_offset >= BIT(at25->addrlen * 8))
+ instr |= AT25_INSTR_BIT3;
- mutex_lock(&at25->lock);
+ mutex_lock(&at25->lock);
- *cp++ = instr;
-
- /* 8/16/24-bit address is written MSB first */
- switch (at25->addrlen) {
- default: /* case 3 */
- *cp++ = offset >> 16;
- fallthrough;
- case 2:
- *cp++ = offset >> 8;
- fallthrough;
- case 1:
- case 0: /* can't happen: for better code generation */
- *cp++ = offset >> 0;
- }
+ *cp++ = instr;
- spi_message_init(&m);
- memset(t, 0, sizeof(t));
+ /* 8/16/24-bit address is written MSB first */
+ switch (at25->addrlen) {
+ default: /* case 3 */
+ *cp++ = msg_offset >> 16;
+ fallthrough;
+ case 2:
+ *cp++ = msg_offset >> 8;
+ fallthrough;
+ case 1:
+ case 0: /* can't happen: for better code generation */
+ *cp++ = msg_offset >> 0;
+ }
- t[0].tx_buf = at25->command;
- t[0].len = at25->addrlen + 1;
- spi_message_add_tail(&t[0], &m);
+ spi_message_init(&m);
+ memset(t, 0, sizeof(t));
- t[1].rx_buf = buf;
- t[1].len = count;
- spi_message_add_tail(&t[1], &m);
+ t[0].tx_buf = at25->command;
+ t[0].len = at25->addrlen + 1;
+ spi_message_add_tail(&t[0], &m);
- /*
- * Read it all at once.
- *
- * REVISIT that's potentially a problem with large chips, if
- * other devices on the bus need to be accessed regularly or
- * this chip is clocked very slowly.
- */
- status = spi_sync(at25->spi, &m);
- dev_dbg(&at25->spi->dev, "read %zu bytes at %d --> %zd\n",
- count, offset, status);
+ t[1].rx_buf = buf + nr_bytes;
+ t[1].len = msg_count;
+ spi_message_add_tail(&t[1], &m);
- mutex_unlock(&at25->lock);
- return status;
+ status = spi_sync(at25->spi, &m);
+
+ mutex_unlock(&at25->lock);
+
+ if (status)
+ return status;
+
+ --num_msgs;
+ msg_offset += msg_count;
+ nr_bytes += msg_count;
+ }
+
+ dev_dbg(&at25->spi->dev, "read %zu bytes at %d\n",
+ count, offset);
+ return 0;
}
/* Read extra registers as ID or serial number */
@@ -190,6 +200,7 @@ ATTRIBUTE_GROUPS(sernum);
static int at25_ee_write(void *priv, unsigned int off, void *val, size_t count)
{
struct at25_data *at25 = priv;
+ size_t maxsz = spi_max_transfer_size(at25->spi);
const char *buf = val;
int status = 0;
unsigned buf_size;
@@ -253,6 +264,8 @@ static int at25_ee_write(void *priv, unsigned int off, void *val, size_t count)
segment = buf_size - (offset % buf_size);
if (segment > count)
segment = count;
+ if (segment > maxsz)
+ segment = maxsz;
memcpy(cp, buf, segment);
status = spi_write(at25->spi, bounce,
segment + at25->addrlen + 1);
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-platform.c b/drivers/net/ethernet/amd/xgbe/xgbe-platform.c
index 4ebd2410185a..4d790a89fe77 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-platform.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-platform.c
@@ -338,7 +338,7 @@ static int xgbe_platform_probe(struct platform_device *pdev)
* the PHY resources listed last
*/
phy_memnum = xgbe_resource_count(pdev, IORESOURCE_MEM) - 3;
- phy_irqnum = xgbe_resource_count(pdev, IORESOURCE_IRQ) - 1;
+ phy_irqnum = platform_irq_count(pdev) - 1;
dma_irqnum = 1;
dma_irqend = phy_irqnum;
} else {
@@ -348,7 +348,7 @@ static int xgbe_platform_probe(struct platform_device *pdev)
phy_memnum = 0;
phy_irqnum = 0;
dma_irqnum = 1;
- dma_irqend = xgbe_resource_count(pdev, IORESOURCE_IRQ);
+ dma_irqend = platform_irq_count(pdev);
}
/* Obtain the mmio areas for the device */
diff --git a/drivers/net/ethernet/broadcom/bgmac-bcma.c b/drivers/net/ethernet/broadcom/bgmac-bcma.c
index e6f48786949c..02bd3cf9a260 100644
--- a/drivers/net/ethernet/broadcom/bgmac-bcma.c
+++ b/drivers/net/ethernet/broadcom/bgmac-bcma.c
@@ -332,7 +332,6 @@ static void bgmac_remove(struct bcma_device *core)
bcma_mdio_mii_unregister(bgmac->mii_bus);
bgmac_enet_remove(bgmac);
bcma_set_drvdata(core, NULL);
- kfree(bgmac);
}
static struct bcma_driver bgmac_bcma_driver = {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index 8a3a446219f7..94f80e1c4020 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -769,6 +769,7 @@ struct hnae3_tc_info {
u8 prio_tc[HNAE3_MAX_USER_PRIO]; /* TC indexed by prio */
u16 tqp_count[HNAE3_MAX_TC];
u16 tqp_offset[HNAE3_MAX_TC];
+ u8 max_tc; /* Total number of TCs */
u8 num_tc; /* Total number of enabled TCs */
bool mqprio_active;
};
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index 6d20974519fe..4c7988e308a2 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -1129,7 +1129,7 @@ hns3_is_ringparam_changed(struct net_device *ndev,
if (old_ringparam->tx_desc_num == new_ringparam->tx_desc_num &&
old_ringparam->rx_desc_num == new_ringparam->rx_desc_num &&
old_ringparam->rx_buf_len == new_ringparam->rx_buf_len) {
- netdev_info(ndev, "ringparam not changed\n");
+ netdev_info(ndev, "descriptor number and rx buffer length not changed\n");
return false;
}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 1ebad0e50e6a..fae79764dc44 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -3268,7 +3268,7 @@ static int hclge_tp_port_init(struct hclge_dev *hdev)
static int hclge_update_port_info(struct hclge_dev *hdev)
{
struct hclge_mac *mac = &hdev->hw.mac;
- int speed = HCLGE_MAC_SPEED_UNKNOWN;
+ int speed;
int ret;
/* get the port info from SFP cmd if not copper port */
@@ -3279,10 +3279,13 @@ static int hclge_update_port_info(struct hclge_dev *hdev)
if (!hdev->support_sfp_query)
return 0;
- if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2)
+ if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2) {
+ speed = mac->speed;
ret = hclge_get_sfp_info(hdev, mac);
- else
+ } else {
+ speed = HCLGE_MAC_SPEED_UNKNOWN;
ret = hclge_get_sfp_speed(hdev, &speed);
+ }
if (ret == -EOPNOTSUPP) {
hdev->support_sfp_query = false;
@@ -3294,6 +3297,8 @@ static int hclge_update_port_info(struct hclge_dev *hdev)
if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2) {
if (mac->speed_type == QUERY_ACTIVE_SPEED) {
hclge_update_port_capability(hdev, mac);
+ if (mac->speed != speed)
+ (void)hclge_tm_port_shaper_cfg(hdev);
return 0;
}
return hclge_cfg_mac_speed_dup(hdev, mac->speed,
@@ -3376,6 +3381,12 @@ static int hclge_set_vf_link_state(struct hnae3_handle *handle, int vf,
link_state_old = vport->vf_info.link_state;
vport->vf_info.link_state = link_state;
+ /* return success directly if the VF is unalive, VF will
+ * query link state itself when it starts work.
+ */
+ if (!test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state))
+ return 0;
+
ret = hclge_push_vf_link_status(vport);
if (ret) {
vport->vf_info.link_state = link_state_old;
@@ -10117,6 +10128,7 @@ static int hclge_modify_port_base_vlan_tag(struct hclge_vport *vport,
if (ret)
return ret;
+ vport->port_base_vlan_cfg.tbl_sta = false;
/* remove old VLAN tag */
if (old_info->vlan_tag == 0)
ret = hclge_set_vf_vlan_common(hdev, vport->vport_id,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
index 1f87a8a3fe32..2f33b036a47a 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
@@ -282,8 +282,8 @@ static int hclge_tm_pg_to_pri_map_cfg(struct hclge_dev *hdev,
return hclge_cmd_send(&hdev->hw, &desc, 1);
}
-static int hclge_tm_qs_to_pri_map_cfg(struct hclge_dev *hdev,
- u16 qs_id, u8 pri)
+static int hclge_tm_qs_to_pri_map_cfg(struct hclge_dev *hdev, u16 qs_id, u8 pri,
+ bool link_vld)
{
struct hclge_qs_to_pri_link_cmd *map;
struct hclge_desc desc;
@@ -294,7 +294,7 @@ static int hclge_tm_qs_to_pri_map_cfg(struct hclge_dev *hdev,
map->qs_id = cpu_to_le16(qs_id);
map->priority = pri;
- map->link_vld = HCLGE_TM_QS_PRI_LINK_VLD_MSK;
+ map->link_vld = link_vld ? HCLGE_TM_QS_PRI_LINK_VLD_MSK : 0;
return hclge_cmd_send(&hdev->hw, &desc, 1);
}
@@ -420,7 +420,7 @@ static int hclge_tm_pg_shapping_cfg(struct hclge_dev *hdev,
return hclge_cmd_send(&hdev->hw, &desc, 1);
}
-static int hclge_tm_port_shaper_cfg(struct hclge_dev *hdev)
+int hclge_tm_port_shaper_cfg(struct hclge_dev *hdev)
{
struct hclge_port_shapping_cmd *shap_cfg_cmd;
struct hclge_shaper_ir_para ir_para;
@@ -642,11 +642,13 @@ static void hclge_tm_update_kinfo_rss_size(struct hclge_vport *vport)
* one tc for VF for simplicity. VF's vport_id is non zero.
*/
if (vport->vport_id) {
+ kinfo->tc_info.max_tc = 1;
kinfo->tc_info.num_tc = 1;
vport->qs_offset = HNAE3_MAX_TC +
vport->vport_id - HCLGE_VF_VPORT_START_NUM;
vport_max_rss_size = hdev->vf_rss_size_max;
} else {
+ kinfo->tc_info.max_tc = hdev->tc_max;
kinfo->tc_info.num_tc =
min_t(u16, vport->alloc_tqps, hdev->tm_info.num_tc);
vport->qs_offset = 0;
@@ -679,7 +681,9 @@ static void hclge_tm_vport_tc_info_update(struct hclge_vport *vport)
kinfo->num_tqps = hclge_vport_get_tqp_num(vport);
vport->dwrr = 100; /* 100 percent as init */
vport->bw_limit = hdev->tm_info.pg_info[0].bw_limit;
- hdev->rss_cfg.rss_size = kinfo->rss_size;
+
+ if (vport->vport_id == PF_VPORT_ID)
+ hdev->rss_cfg.rss_size = kinfo->rss_size;
/* when enable mqprio, the tc_info has been updated. */
if (kinfo->tc_info.mqprio_active)
@@ -714,14 +718,22 @@ static void hclge_tm_vport_info_update(struct hclge_dev *hdev)
static void hclge_tm_tc_info_init(struct hclge_dev *hdev)
{
- u8 i;
+ u8 i, tc_sch_mode;
+ u32 bw_limit;
+
+ for (i = 0; i < hdev->tc_max; i++) {
+ if (i < hdev->tm_info.num_tc) {
+ tc_sch_mode = HCLGE_SCH_MODE_DWRR;
+ bw_limit = hdev->tm_info.pg_info[0].bw_limit;
+ } else {
+ tc_sch_mode = HCLGE_SCH_MODE_SP;
+ bw_limit = 0;
+ }
- for (i = 0; i < hdev->tm_info.num_tc; i++) {
hdev->tm_info.tc_info[i].tc_id = i;
- hdev->tm_info.tc_info[i].tc_sch_mode = HCLGE_SCH_MODE_DWRR;
+ hdev->tm_info.tc_info[i].tc_sch_mode = tc_sch_mode;
hdev->tm_info.tc_info[i].pgid = 0;
- hdev->tm_info.tc_info[i].bw_limit =
- hdev->tm_info.pg_info[0].bw_limit;
+ hdev->tm_info.tc_info[i].bw_limit = bw_limit;
}
for (i = 0; i < HNAE3_MAX_USER_PRIO; i++)
@@ -926,10 +938,13 @@ static int hclge_tm_pri_q_qs_cfg_tc_base(struct hclge_dev *hdev)
for (k = 0; k < hdev->num_alloc_vport; k++) {
struct hnae3_knic_private_info *kinfo = &vport[k].nic.kinfo;
- for (i = 0; i < kinfo->tc_info.num_tc; i++) {
+ for (i = 0; i < kinfo->tc_info.max_tc; i++) {
+ u8 pri = i < kinfo->tc_info.num_tc ? i : 0;
+ bool link_vld = i < kinfo->tc_info.num_tc;
+
ret = hclge_tm_qs_to_pri_map_cfg(hdev,
vport[k].qs_offset + i,
- i);
+ pri, link_vld);
if (ret)
return ret;
}
@@ -949,7 +964,7 @@ static int hclge_tm_pri_q_qs_cfg_vnet_base(struct hclge_dev *hdev)
for (i = 0; i < HNAE3_MAX_TC; i++) {
ret = hclge_tm_qs_to_pri_map_cfg(hdev,
vport[k].qs_offset + i,
- k);
+ k, true);
if (ret)
return ret;
}
@@ -989,33 +1004,39 @@ static int hclge_tm_pri_tc_base_shaper_cfg(struct hclge_dev *hdev)
{
u32 max_tm_rate = hdev->ae_dev->dev_specs.max_tm_rate;
struct hclge_shaper_ir_para ir_para;
- u32 shaper_para;
+ u32 shaper_para_c, shaper_para_p;
int ret;
u32 i;
- for (i = 0; i < hdev->tm_info.num_tc; i++) {
+ for (i = 0; i < hdev->tc_max; i++) {
u32 rate = hdev->tm_info.tc_info[i].bw_limit;
- ret = hclge_shaper_para_calc(rate, HCLGE_SHAPER_LVL_PRI,
- &ir_para, max_tm_rate);
- if (ret)
- return ret;
+ if (rate) {
+ ret = hclge_shaper_para_calc(rate, HCLGE_SHAPER_LVL_PRI,
+ &ir_para, max_tm_rate);
+ if (ret)
+ return ret;
+
+ shaper_para_c = hclge_tm_get_shapping_para(0, 0, 0,
+ HCLGE_SHAPER_BS_U_DEF,
+ HCLGE_SHAPER_BS_S_DEF);
+ shaper_para_p = hclge_tm_get_shapping_para(ir_para.ir_b,
+ ir_para.ir_u,
+ ir_para.ir_s,
+ HCLGE_SHAPER_BS_U_DEF,
+ HCLGE_SHAPER_BS_S_DEF);
+ } else {
+ shaper_para_c = 0;
+ shaper_para_p = 0;
+ }
- shaper_para = hclge_tm_get_shapping_para(0, 0, 0,
- HCLGE_SHAPER_BS_U_DEF,
- HCLGE_SHAPER_BS_S_DEF);
ret = hclge_tm_pri_shapping_cfg(hdev, HCLGE_TM_SHAP_C_BUCKET, i,
- shaper_para, rate);
+ shaper_para_c, rate);
if (ret)
return ret;
- shaper_para = hclge_tm_get_shapping_para(ir_para.ir_b,
- ir_para.ir_u,
- ir_para.ir_s,
- HCLGE_SHAPER_BS_U_DEF,
- HCLGE_SHAPER_BS_S_DEF);
ret = hclge_tm_pri_shapping_cfg(hdev, HCLGE_TM_SHAP_P_BUCKET, i,
- shaper_para, rate);
+ shaper_para_p, rate);
if (ret)
return ret;
}
@@ -1125,7 +1146,7 @@ static int hclge_tm_pri_tc_base_dwrr_cfg(struct hclge_dev *hdev)
int ret;
u32 i, k;
- for (i = 0; i < hdev->tm_info.num_tc; i++) {
+ for (i = 0; i < hdev->tc_max; i++) {
pg_info =
&hdev->tm_info.pg_info[hdev->tm_info.tc_info[i].pgid];
dwrr = pg_info->tc_dwrr[i];
@@ -1135,9 +1156,15 @@ static int hclge_tm_pri_tc_base_dwrr_cfg(struct hclge_dev *hdev)
return ret;
for (k = 0; k < hdev->num_alloc_vport; k++) {
+ struct hnae3_knic_private_info *kinfo = &vport[k].nic.kinfo;
+
+ if (i >= kinfo->tc_info.max_tc)
+ continue;
+
+ dwrr = i < kinfo->tc_info.num_tc ? vport[k].dwrr : 0;
ret = hclge_tm_qs_weight_cfg(
hdev, vport[k].qs_offset + i,
- vport[k].dwrr);
+ dwrr);
if (ret)
return ret;
}
@@ -1303,6 +1330,7 @@ static int hclge_tm_schd_mode_tc_base_cfg(struct hclge_dev *hdev, u8 pri_id)
{
struct hclge_vport *vport = hdev->vport;
int ret;
+ u8 mode;
u16 i;
ret = hclge_tm_pri_schd_mode_cfg(hdev, pri_id);
@@ -1310,9 +1338,16 @@ static int hclge_tm_schd_mode_tc_base_cfg(struct hclge_dev *hdev, u8 pri_id)
return ret;
for (i = 0; i < hdev->num_alloc_vport; i++) {
+ struct hnae3_knic_private_info *kinfo = &vport[i].nic.kinfo;
+
+ if (pri_id >= kinfo->tc_info.max_tc)
+ continue;
+
+ mode = pri_id < kinfo->tc_info.num_tc ? HCLGE_SCH_MODE_DWRR :
+ HCLGE_SCH_MODE_SP;
ret = hclge_tm_qs_schd_mode_cfg(hdev,
vport[i].qs_offset + pri_id,
- HCLGE_SCH_MODE_DWRR);
+ mode);
if (ret)
return ret;
}
@@ -1353,7 +1388,7 @@ static int hclge_tm_lvl34_schd_mode_cfg(struct hclge_dev *hdev)
u8 i;
if (hdev->tx_sch_mode == HCLGE_FLAG_TC_BASE_SCH_MODE) {
- for (i = 0; i < hdev->tm_info.num_tc; i++) {
+ for (i = 0; i < hdev->tc_max; i++) {
ret = hclge_tm_schd_mode_tc_base_cfg(hdev, i);
if (ret)
return ret;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
index 619cc30a2dfc..d943943912f7 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
@@ -237,6 +237,7 @@ int hclge_pause_addr_cfg(struct hclge_dev *hdev, const u8 *mac_addr);
void hclge_pfc_rx_stats_get(struct hclge_dev *hdev, u64 *stats);
void hclge_pfc_tx_stats_get(struct hclge_dev *hdev, u64 *stats);
int hclge_tm_qs_shaper_cfg(struct hclge_vport *vport, int max_tx_rate);
+int hclge_tm_port_shaper_cfg(struct hclge_dev *hdev);
int hclge_tm_get_qset_num(struct hclge_dev *hdev, u16 *qset_num);
int hclge_tm_get_pri_num(struct hclge_dev *hdev, u8 *pri_num);
int hclge_tm_get_qset_map_pri(struct hclge_dev *hdev, u16 qset_id, u8 *priority,
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 610f00cbaff9..19704f5c8291 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -2586,15 +2586,16 @@ static void i40e_diag_test(struct net_device *netdev,
set_bit(__I40E_TESTING, pf->state);
+ if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state) ||
+ test_bit(__I40E_RESET_INTR_RECEIVED, pf->state)) {
+ dev_warn(&pf->pdev->dev,
+ "Cannot start offline testing when PF is in reset state.\n");
+ goto skip_ol_tests;
+ }
+
if (i40e_active_vfs(pf) || i40e_active_vmdqs(pf)) {
dev_warn(&pf->pdev->dev,
"Please take active VFs and Netqueues offline and restart the adapter before running NIC diagnostics\n");
- data[I40E_ETH_TEST_REG] = 1;
- data[I40E_ETH_TEST_EEPROM] = 1;
- data[I40E_ETH_TEST_INTR] = 1;
- data[I40E_ETH_TEST_LINK] = 1;
- eth_test->flags |= ETH_TEST_FL_FAILED;
- clear_bit(__I40E_TESTING, pf->state);
goto skip_ol_tests;
}
@@ -2641,9 +2642,17 @@ static void i40e_diag_test(struct net_device *netdev,
data[I40E_ETH_TEST_INTR] = 0;
}
-skip_ol_tests:
-
netif_info(pf, drv, netdev, "testing finished\n");
+ return;
+
+skip_ol_tests:
+ data[I40E_ETH_TEST_REG] = 1;
+ data[I40E_ETH_TEST_EEPROM] = 1;
+ data[I40E_ETH_TEST_INTR] = 1;
+ data[I40E_ETH_TEST_LINK] = 1;
+ eth_test->flags |= ETH_TEST_FL_FAILED;
+ clear_bit(__I40E_TESTING, pf->state);
+ netif_info(pf, drv, netdev, "testing failed\n");
}
static void i40e_get_wol(struct net_device *netdev,
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 332a608dbaa6..72576bb3e94d 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -8542,6 +8542,11 @@ static int i40e_configure_clsflower(struct i40e_vsi *vsi,
return -EOPNOTSUPP;
}
+ if (!tc) {
+ dev_err(&pf->pdev->dev, "Unable to add filter because of invalid destination");
+ return -EINVAL;
+ }
+
if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state) ||
test_bit(__I40E_RESET_INTR_RECEIVED, pf->state))
return -EBUSY;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 2606e8f0f19b..033ea71763e3 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -2282,7 +2282,7 @@ static int i40e_vc_config_queues_msg(struct i40e_vf *vf, u8 *msg)
}
if (vf->adq_enabled) {
- for (i = 0; i < I40E_MAX_VF_VSI; i++)
+ for (i = 0; i < vf->num_tc; i++)
num_qps_all += vf->ch[i].num_qps;
if (num_qps_all != qci->num_queue_pairs) {
aq_ret = I40E_ERR_PARAM;
diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
index 7dfcf78b57fb..f3ecb3bca33d 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -984,7 +984,7 @@ struct iavf_mac_filter *iavf_add_filter(struct iavf_adapter *adapter,
list_add_tail(&f->list, &adapter->mac_filter_list);
f->add = true;
f->is_new_mac = true;
- f->is_primary = false;
+ f->is_primary = ether_addr_equal(macaddr, adapter->hw.mac.addr);
adapter->aq_required |= IAVF_FLAG_AQ_ADD_MAC_FILTER;
} else {
f->remove = false;
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index e1cae253412c..c1ac2f746714 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -5763,25 +5763,38 @@ static netdev_features_t
ice_fix_features(struct net_device *netdev, netdev_features_t features)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
- netdev_features_t supported_vlan_filtering;
- netdev_features_t requested_vlan_filtering;
- struct ice_vsi *vsi = np->vsi;
-
- requested_vlan_filtering = features & NETIF_VLAN_FILTERING_FEATURES;
-
- /* make sure supported_vlan_filtering works for both SVM and DVM */
- supported_vlan_filtering = NETIF_F_HW_VLAN_CTAG_FILTER;
- if (ice_is_dvm_ena(&vsi->back->hw))
- supported_vlan_filtering |= NETIF_F_HW_VLAN_STAG_FILTER;
-
- if (requested_vlan_filtering &&
- requested_vlan_filtering != supported_vlan_filtering) {
- if (requested_vlan_filtering & NETIF_F_HW_VLAN_CTAG_FILTER) {
- netdev_warn(netdev, "cannot support requested VLAN filtering settings, enabling all supported VLAN filtering settings\n");
- features |= supported_vlan_filtering;
+ netdev_features_t req_vlan_fltr, cur_vlan_fltr;
+ bool cur_ctag, cur_stag, req_ctag, req_stag;
+
+ cur_vlan_fltr = netdev->features & NETIF_VLAN_FILTERING_FEATURES;
+ cur_ctag = cur_vlan_fltr & NETIF_F_HW_VLAN_CTAG_FILTER;
+ cur_stag = cur_vlan_fltr & NETIF_F_HW_VLAN_STAG_FILTER;
+
+ req_vlan_fltr = features & NETIF_VLAN_FILTERING_FEATURES;
+ req_ctag = req_vlan_fltr & NETIF_F_HW_VLAN_CTAG_FILTER;
+ req_stag = req_vlan_fltr & NETIF_F_HW_VLAN_STAG_FILTER;
+
+ if (req_vlan_fltr != cur_vlan_fltr) {
+ if (ice_is_dvm_ena(&np->vsi->back->hw)) {
+ if (req_ctag && req_stag) {
+ features |= NETIF_VLAN_FILTERING_FEATURES;
+ } else if (!req_ctag && !req_stag) {
+ features &= ~NETIF_VLAN_FILTERING_FEATURES;
+ } else if ((!cur_ctag && req_ctag && !cur_stag) ||
+ (!cur_stag && req_stag && !cur_ctag)) {
+ features |= NETIF_VLAN_FILTERING_FEATURES;
+ netdev_warn(netdev, "802.1Q and 802.1ad VLAN filtering must be either both on or both off. VLAN filtering has been enabled for both types.\n");
+ } else if ((cur_ctag && !req_ctag && cur_stag) ||
+ (cur_stag && !req_stag && cur_ctag)) {
+ features &= ~NETIF_VLAN_FILTERING_FEATURES;
+ netdev_warn(netdev, "802.1Q and 802.1ad VLAN filtering must be either both on or both off. VLAN filtering has been disabled for both types.\n");
+ }
} else {
- netdev_warn(netdev, "cannot support requested VLAN filtering settings, clearing all supported VLAN filtering settings\n");
- features &= ~supported_vlan_filtering;
+ if (req_vlan_fltr & NETIF_F_HW_VLAN_STAG_FILTER)
+ netdev_warn(netdev, "cannot support requested 802.1ad filtering setting in SVM mode\n");
+
+ if (req_vlan_fltr & NETIF_F_HW_VLAN_CTAG_FILTER)
+ features |= NETIF_F_HW_VLAN_CTAG_FILTER;
}
}
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c
index 662947c882e8..ef9344ef0d8e 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp.c
+++ b/drivers/net/ethernet/intel/ice/ice_ptp.c
@@ -2271,7 +2271,7 @@ static int
ice_ptp_init_tx_e822(struct ice_pf *pf, struct ice_ptp_tx *tx, u8 port)
{
tx->quad = port / ICE_PORTS_PER_QUAD;
- tx->quad_offset = tx->quad * INDEX_PER_PORT;
+ tx->quad_offset = (port % ICE_PORTS_PER_QUAD) * INDEX_PER_PORT;
tx->len = INDEX_PER_PORT;
return ice_ptp_alloc_tx_tracker(tx);
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.h b/drivers/net/ethernet/intel/ice/ice_ptp.h
index afd048d69959..10e396abf130 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp.h
+++ b/drivers/net/ethernet/intel/ice/ice_ptp.h
@@ -49,6 +49,37 @@ struct ice_perout_channel {
* To allow multiple ports to access the shared register block independently,
* the blocks are split up so that indexes are assigned to each port based on
* hardware logical port number.
+ *
+ * The timestamp blocks are handled differently for E810- and E822-based
+ * devices. In E810 devices, each port has its own block of timestamps, while in
+ * E822 there is a need to logically break the block of registers into smaller
+ * chunks based on the port number to avoid collisions.
+ *
+ * Example for port 5 in E810:
+ * +--------+--------+--------+--------+--------+--------+--------+--------+
+ * |register|register|register|register|register|register|register|register|
+ * | block | block | block | block | block | block | block | block |
+ * | for | for | for | for | for | for | for | for |
+ * | port 0 | port 1 | port 2 | port 3 | port 4 | port 5 | port 6 | port 7 |
+ * +--------+--------+--------+--------+--------+--------+--------+--------+
+ * ^^
+ * ||
+ * |--- quad offset is always 0
+ * ---- quad number
+ *
+ * Example for port 5 in E822:
+ * +-----------------------------+-----------------------------+
+ * | register block for quad 0 | register block for quad 1 |
+ * |+------+------+------+------+|+------+------+------+------+|
+ * ||port 0|port 1|port 2|port 3|||port 0|port 1|port 2|port 3||
+ * |+------+------+------+------+|+------+------+------+------+|
+ * +-----------------------------+-------^---------------------+
+ * ^ |
+ * | --- quad offset*
+ * ---- quad number
+ *
+ * * PHY port 5 is port 1 in quad 1
+ *
*/
/**
diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.c b/drivers/net/ethernet/intel/ice/ice_vf_lib.c
index cd8e6b50968c..7adf9ddf129e 100644
--- a/drivers/net/ethernet/intel/ice/ice_vf_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.c
@@ -504,6 +504,11 @@ int ice_reset_vf(struct ice_vf *vf, u32 flags)
}
if (ice_is_vf_disabled(vf)) {
+ vsi = ice_get_vf_vsi(vf);
+ if (WARN_ON(!vsi))
+ return -EINVAL;
+ ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, vf->vf_id);
+ ice_vsi_stop_all_rx_rings(vsi);
dev_dbg(dev, "VF is already disabled, there is no need for resetting it, telling VM, all is fine %d\n",
vf->vf_id);
return 0;
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c
index 1d9b84c3937a..4547bc1f7cee 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c
@@ -1569,35 +1569,27 @@ error_param:
*/
static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg)
{
- enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
struct virtchnl_vsi_queue_config_info *qci =
(struct virtchnl_vsi_queue_config_info *)msg;
struct virtchnl_queue_pair_info *qpi;
struct ice_pf *pf = vf->pf;
struct ice_vsi *vsi;
- int i, q_idx;
+ int i = -1, q_idx;
- if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states))
goto error_param;
- }
- if (!ice_vc_isvalid_vsi_id(vf, qci->vsi_id)) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ if (!ice_vc_isvalid_vsi_id(vf, qci->vsi_id))
goto error_param;
- }
vsi = ice_get_vf_vsi(vf);
- if (!vsi) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ if (!vsi)
goto error_param;
- }
if (qci->num_queue_pairs > ICE_MAX_RSS_QS_PER_VF ||
qci->num_queue_pairs > min_t(u16, vsi->alloc_txq, vsi->alloc_rxq)) {
dev_err(ice_pf_to_dev(pf), "VF-%d requesting more than supported number of queues: %d\n",
vf->vf_id, min_t(u16, vsi->alloc_txq, vsi->alloc_rxq));
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
@@ -1610,7 +1602,6 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg)
!ice_vc_isvalid_ring_len(qpi->txq.ring_len) ||
!ice_vc_isvalid_ring_len(qpi->rxq.ring_len) ||
!ice_vc_isvalid_q_id(vf, qci->vsi_id, qpi->txq.queue_id)) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
@@ -1620,7 +1611,6 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg)
* for selected "vsi"
*/
if (q_idx >= vsi->alloc_txq || q_idx >= vsi->alloc_rxq) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
@@ -1630,14 +1620,13 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg)
vsi->tx_rings[i]->count = qpi->txq.ring_len;
/* Disable any existing queue first */
- if (ice_vf_vsi_dis_single_txq(vf, vsi, q_idx)) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ if (ice_vf_vsi_dis_single_txq(vf, vsi, q_idx))
goto error_param;
- }
/* Configure a queue with the requested settings */
if (ice_vsi_cfg_single_txq(vsi, vsi->tx_rings, q_idx)) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ dev_warn(ice_pf_to_dev(pf), "VF-%d failed to configure TX queue %d\n",
+ vf->vf_id, i);
goto error_param;
}
}
@@ -1651,17 +1640,13 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg)
if (qpi->rxq.databuffer_size != 0 &&
(qpi->rxq.databuffer_size > ((16 * 1024) - 128) ||
- qpi->rxq.databuffer_size < 1024)) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ qpi->rxq.databuffer_size < 1024))
goto error_param;
- }
vsi->rx_buf_len = qpi->rxq.databuffer_size;
vsi->rx_rings[i]->rx_buf_len = vsi->rx_buf_len;
if (qpi->rxq.max_pkt_size > max_frame_size ||
- qpi->rxq.max_pkt_size < 64) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ qpi->rxq.max_pkt_size < 64)
goto error_param;
- }
vsi->max_frame = qpi->rxq.max_pkt_size;
/* add space for the port VLAN since the VF driver is
@@ -1672,16 +1657,30 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg)
vsi->max_frame += VLAN_HLEN;
if (ice_vsi_cfg_single_rxq(vsi, q_idx)) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ dev_warn(ice_pf_to_dev(pf), "VF-%d failed to configure RX queue %d\n",
+ vf->vf_id, i);
goto error_param;
}
}
}
+ /* send the response to the VF */
+ return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_VSI_QUEUES,
+ VIRTCHNL_STATUS_SUCCESS, NULL, 0);
error_param:
+ /* disable whatever we can */
+ for (; i >= 0; i--) {
+ if (ice_vsi_ctrl_one_rx_ring(vsi, false, i, true))
+ dev_err(ice_pf_to_dev(pf), "VF-%d could not disable RX queue %d\n",
+ vf->vf_id, i);
+ if (ice_vf_vsi_dis_single_txq(vf, vsi, i))
+ dev_err(ice_pf_to_dev(pf), "VF-%d could not disable TX queue %d\n",
+ vf->vf_id, i);
+ }
+
/* send the response to the VF */
- return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_VSI_QUEUES, v_ret,
- NULL, 0);
+ return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_VSI_QUEUES,
+ VIRTCHNL_STATUS_ERR_PARAM, NULL, 0);
}
/**
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
index bc614a4def9e..3f60a80e34c8 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
@@ -1390,7 +1390,8 @@ static int otx2vf_get_link_ksettings(struct net_device *netdev,
static const struct ethtool_ops otx2vf_ethtool_ops = {
.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
- ETHTOOL_COALESCE_MAX_FRAMES,
+ ETHTOOL_COALESCE_MAX_FRAMES |
+ ETHTOOL_COALESCE_USE_ADAPTIVE,
.supported_ring_params = ETHTOOL_RING_USE_RX_BUF_LEN |
ETHTOOL_RING_USE_CQE_SIZE,
.get_link = otx2_get_link,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.h
index a68d931090dd..15c8d4de8350 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.h
@@ -8,8 +8,8 @@
#include "spectrum.h"
enum mlxsw_sp_counter_sub_pool_id {
- MLXSW_SP_COUNTER_SUB_POOL_FLOW,
MLXSW_SP_COUNTER_SUB_POOL_RIF,
+ MLXSW_SP_COUNTER_SUB_POOL_FLOW,
};
int mlxsw_sp_counter_alloc(struct mlxsw_sp *mlxsw_sp,
diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet.h b/drivers/net/ethernet/xilinx/xilinx_axienet.h
index 4225efbeda3d..f2e2261b4b7d 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet.h
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet.h
@@ -547,6 +547,57 @@ static inline void axienet_iow(struct axienet_local *lp, off_t offset,
iowrite32(value, lp->regs + offset);
}
+/**
+ * axienet_dma_out32 - Memory mapped Axi DMA register write.
+ * @lp: Pointer to axienet local structure
+ * @reg: Address offset from the base address of the Axi DMA core
+ * @value: Value to be written into the Axi DMA register
+ *
+ * This function writes the desired value into the corresponding Axi DMA
+ * register.
+ */
+
+static inline void axienet_dma_out32(struct axienet_local *lp,
+ off_t reg, u32 value)
+{
+ iowrite32(value, lp->dma_regs + reg);
+}
+
+#if defined(CONFIG_64BIT) && defined(iowrite64)
+/**
+ * axienet_dma_out64 - Memory mapped Axi DMA register write.
+ * @lp: Pointer to axienet local structure
+ * @reg: Address offset from the base address of the Axi DMA core
+ * @value: Value to be written into the Axi DMA register
+ *
+ * This function writes the desired value into the corresponding Axi DMA
+ * register.
+ */
+static inline void axienet_dma_out64(struct axienet_local *lp,
+ off_t reg, u64 value)
+{
+ iowrite64(value, lp->dma_regs + reg);
+}
+
+static inline void axienet_dma_out_addr(struct axienet_local *lp, off_t reg,
+ dma_addr_t addr)
+{
+ if (lp->features & XAE_FEATURE_DMA_64BIT)
+ axienet_dma_out64(lp, reg, addr);
+ else
+ axienet_dma_out32(lp, reg, lower_32_bits(addr));
+}
+
+#else /* CONFIG_64BIT */
+
+static inline void axienet_dma_out_addr(struct axienet_local *lp, off_t reg,
+ dma_addr_t addr)
+{
+ axienet_dma_out32(lp, reg, lower_32_bits(addr));
+}
+
+#endif /* CONFIG_64BIT */
+
/* Function prototypes visible in xilinx_axienet_mdio.c for other files */
int axienet_mdio_enable(struct axienet_local *lp);
void axienet_mdio_disable(struct axienet_local *lp);
diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
index 93c9f305bba4..1760930ec0c4 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
@@ -133,30 +133,6 @@ static inline u32 axienet_dma_in32(struct axienet_local *lp, off_t reg)
return ioread32(lp->dma_regs + reg);
}
-/**
- * axienet_dma_out32 - Memory mapped Axi DMA register write.
- * @lp: Pointer to axienet local structure
- * @reg: Address offset from the base address of the Axi DMA core
- * @value: Value to be written into the Axi DMA register
- *
- * This function writes the desired value into the corresponding Axi DMA
- * register.
- */
-static inline void axienet_dma_out32(struct axienet_local *lp,
- off_t reg, u32 value)
-{
- iowrite32(value, lp->dma_regs + reg);
-}
-
-static void axienet_dma_out_addr(struct axienet_local *lp, off_t reg,
- dma_addr_t addr)
-{
- axienet_dma_out32(lp, reg, lower_32_bits(addr));
-
- if (lp->features & XAE_FEATURE_DMA_64BIT)
- axienet_dma_out32(lp, reg + 4, upper_32_bits(addr));
-}
-
static void desc_set_phys_addr(struct axienet_local *lp, dma_addr_t addr,
struct axidma_bd *desc)
{
@@ -2061,6 +2037,11 @@ static int axienet_probe(struct platform_device *pdev)
iowrite32(0x0, desc);
}
}
+ if (!IS_ENABLED(CONFIG_64BIT) && lp->features & XAE_FEATURE_DMA_64BIT) {
+ dev_err(&pdev->dev, "64-bit addressable DMA is not compatible with 32-bit archecture\n");
+ ret = -EINVAL;
+ goto cleanup_clk;
+ }
ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(addr_width));
if (ret) {
diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c
index 7a8c11a26eb5..4704ed6f00ef 100644
--- a/drivers/net/usb/ax88179_178a.c
+++ b/drivers/net/usb/ax88179_178a.c
@@ -1750,7 +1750,7 @@ static const struct driver_info ax88179_info = {
.link_reset = ax88179_link_reset,
.reset = ax88179_reset,
.stop = ax88179_stop,
- .flags = FLAG_ETHER | FLAG_FRAMING_AX,
+ .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP,
.rx_fixup = ax88179_rx_fixup,
.tx_fixup = ax88179_tx_fixup,
};
@@ -1763,7 +1763,7 @@ static const struct driver_info ax88178a_info = {
.link_reset = ax88179_link_reset,
.reset = ax88179_reset,
.stop = ax88179_stop,
- .flags = FLAG_ETHER | FLAG_FRAMING_AX,
+ .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP,
.rx_fixup = ax88179_rx_fixup,
.tx_fixup = ax88179_tx_fixup,
};
@@ -1776,7 +1776,7 @@ static const struct driver_info cypress_GX3_info = {
.link_reset = ax88179_link_reset,
.reset = ax88179_reset,
.stop = ax88179_stop,
- .flags = FLAG_ETHER | FLAG_FRAMING_AX,
+ .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP,
.rx_fixup = ax88179_rx_fixup,
.tx_fixup = ax88179_tx_fixup,
};
@@ -1789,7 +1789,7 @@ static const struct driver_info dlink_dub1312_info = {
.link_reset = ax88179_link_reset,
.reset = ax88179_reset,
.stop = ax88179_stop,
- .flags = FLAG_ETHER | FLAG_FRAMING_AX,
+ .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP,
.rx_fixup = ax88179_rx_fixup,
.tx_fixup = ax88179_tx_fixup,
};
@@ -1802,7 +1802,7 @@ static const struct driver_info sitecom_info = {
.link_reset = ax88179_link_reset,
.reset = ax88179_reset,
.stop = ax88179_stop,
- .flags = FLAG_ETHER | FLAG_FRAMING_AX,
+ .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP,
.rx_fixup = ax88179_rx_fixup,
.tx_fixup = ax88179_tx_fixup,
};
@@ -1815,7 +1815,7 @@ static const struct driver_info samsung_info = {
.link_reset = ax88179_link_reset,
.reset = ax88179_reset,
.stop = ax88179_stop,
- .flags = FLAG_ETHER | FLAG_FRAMING_AX,
+ .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP,
.rx_fixup = ax88179_rx_fixup,
.tx_fixup = ax88179_tx_fixup,
};
@@ -1828,7 +1828,7 @@ static const struct driver_info lenovo_info = {
.link_reset = ax88179_link_reset,
.reset = ax88179_reset,
.stop = ax88179_stop,
- .flags = FLAG_ETHER | FLAG_FRAMING_AX,
+ .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP,
.rx_fixup = ax88179_rx_fixup,
.tx_fixup = ax88179_tx_fixup,
};
@@ -1841,7 +1841,7 @@ static const struct driver_info belkin_info = {
.link_reset = ax88179_link_reset,
.reset = ax88179_reset,
.stop = ax88179_stop,
- .flags = FLAG_ETHER | FLAG_FRAMING_AX,
+ .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP,
.rx_fixup = ax88179_rx_fixup,
.tx_fixup = ax88179_tx_fixup,
};
@@ -1854,7 +1854,7 @@ static const struct driver_info toshiba_info = {
.link_reset = ax88179_link_reset,
.reset = ax88179_reset,
.stop = ax88179_stop,
- .flags = FLAG_ETHER | FLAG_FRAMING_AX,
+ .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP,
.rx_fixup = ax88179_rx_fixup,
.tx_fixup = ax88179_tx_fixup,
};
@@ -1867,7 +1867,7 @@ static const struct driver_info mct_info = {
.link_reset = ax88179_link_reset,
.reset = ax88179_reset,
.stop = ax88179_stop,
- .flags = FLAG_ETHER | FLAG_FRAMING_AX,
+ .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP,
.rx_fixup = ax88179_rx_fixup,
.tx_fixup = ax88179_tx_fixup,
};
@@ -1880,7 +1880,7 @@ static const struct driver_info at_umc2000_info = {
.link_reset = ax88179_link_reset,
.reset = ax88179_reset,
.stop = ax88179_stop,
- .flags = FLAG_ETHER | FLAG_FRAMING_AX,
+ .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP,
.rx_fixup = ax88179_rx_fixup,
.tx_fixup = ax88179_tx_fixup,
};
@@ -1893,7 +1893,7 @@ static const struct driver_info at_umc200_info = {
.link_reset = ax88179_link_reset,
.reset = ax88179_reset,
.stop = ax88179_stop,
- .flags = FLAG_ETHER | FLAG_FRAMING_AX,
+ .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP,
.rx_fixup = ax88179_rx_fixup,
.tx_fixup = ax88179_tx_fixup,
};
@@ -1906,7 +1906,7 @@ static const struct driver_info at_umc2000sp_info = {
.link_reset = ax88179_link_reset,
.reset = ax88179_reset,
.stop = ax88179_stop,
- .flags = FLAG_ETHER | FLAG_FRAMING_AX,
+ .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP,
.rx_fixup = ax88179_rx_fixup,
.tx_fixup = ax88179_tx_fixup,
};
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 24165daee3c8..3ab2cfd254a4 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3285,8 +3285,8 @@ static ssize_t uuid_show(struct device *dev, struct device_attribute *attr,
* we have no UUID set
*/
if (uuid_is_null(&ids->uuid)) {
- printk_ratelimited(KERN_WARNING
- "No UUID available providing old NGUID\n");
+ dev_warn_ratelimited(dev,
+ "No UUID available providing old NGUID\n");
return sysfs_emit(buf, "%pU\n", ids->nguid);
}
return sysfs_emit(buf, "%pU\n", &ids->uuid);
@@ -3863,6 +3863,7 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid,
if (ret) {
dev_err(ctrl->device,
"globally duplicate IDs for nsid %d\n", nsid);
+ nvme_print_device_info(ctrl);
return ret;
}
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 9b72b6ecf33c..0da94b233fed 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -503,6 +503,7 @@ struct nvme_ctrl_ops {
void (*submit_async_event)(struct nvme_ctrl *ctrl);
void (*delete_ctrl)(struct nvme_ctrl *ctrl);
int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size);
+ void (*print_device_info)(struct nvme_ctrl *ctrl);
};
/*
@@ -548,6 +549,33 @@ static inline struct request *nvme_cid_to_rq(struct blk_mq_tags *tags,
return blk_mq_tag_to_rq(tags, nvme_tag_from_cid(command_id));
}
+/*
+ * Return the length of the string without the space padding
+ */
+static inline int nvme_strlen(char *s, int len)
+{
+ while (s[len - 1] == ' ')
+ len--;
+ return len;
+}
+
+static inline void nvme_print_device_info(struct nvme_ctrl *ctrl)
+{
+ struct nvme_subsystem *subsys = ctrl->subsys;
+
+ if (ctrl->ops->print_device_info) {
+ ctrl->ops->print_device_info(ctrl);
+ return;
+ }
+
+ dev_err(ctrl->device,
+ "VID:%04x model:%.*s firmware:%.*s\n", subsys->vendor_id,
+ nvme_strlen(subsys->model, sizeof(subsys->model)),
+ subsys->model, nvme_strlen(subsys->firmware_rev,
+ sizeof(subsys->firmware_rev)),
+ subsys->firmware_rev);
+}
+
#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
void nvme_fault_inject_init(struct nvme_fault_inject *fault_inj,
const char *dev_name);
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 48f4f6eb877b..c7012e85d035 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1334,6 +1334,14 @@ static void nvme_warn_reset(struct nvme_dev *dev, u32 csts)
dev_warn(dev->ctrl.device,
"controller is down; will reset: CSTS=0x%x, PCI_STATUS read failed (%d)\n",
csts, result);
+
+ if (csts != ~0)
+ return;
+
+ dev_warn(dev->ctrl.device,
+ "Does your device have a faulty power saving mode enabled?\n");
+ dev_warn(dev->ctrl.device,
+ "Try \"nvme_core.default_ps_max_latency_us=0 pcie_aspm=off\" and report a bug\n");
}
static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
@@ -2976,6 +2984,21 @@ static int nvme_pci_get_address(struct nvme_ctrl *ctrl, char *buf, int size)
return snprintf(buf, size, "%s\n", dev_name(&pdev->dev));
}
+
+static void nvme_pci_print_device_info(struct nvme_ctrl *ctrl)
+{
+ struct pci_dev *pdev = to_pci_dev(to_nvme_dev(ctrl)->dev);
+ struct nvme_subsystem *subsys = ctrl->subsys;
+
+ dev_err(ctrl->device,
+ "VID:DID %04x:%04x model:%.*s firmware:%.*s\n",
+ pdev->vendor, pdev->device,
+ nvme_strlen(subsys->model, sizeof(subsys->model)),
+ subsys->model, nvme_strlen(subsys->firmware_rev,
+ sizeof(subsys->firmware_rev)),
+ subsys->firmware_rev);
+}
+
static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
.name = "pcie",
.module = THIS_MODULE,
@@ -2987,6 +3010,7 @@ static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
.free_ctrl = nvme_pci_free_ctrl,
.submit_async_event = nvme_pci_submit_async_event,
.get_address = nvme_pci_get_address,
+ .print_device_info = nvme_pci_print_device_info,
};
static int nvme_dev_map(struct nvme_dev *dev)
@@ -3421,7 +3445,8 @@ static const struct pci_device_id nvme_id_table[] = {
{ PCI_VDEVICE(REDHAT, 0x0010), /* Qemu emulated controller */
.driver_data = NVME_QUIRK_BOGUS_NID, },
{ PCI_DEVICE(0x126f, 0x2263), /* Silicon Motion unidentified */
- .driver_data = NVME_QUIRK_NO_NS_DESC_LIST, },
+ .driver_data = NVME_QUIRK_NO_NS_DESC_LIST |
+ NVME_QUIRK_BOGUS_NID, },
{ PCI_DEVICE(0x1bb1, 0x0100), /* Seagate Nytro Flash Storage */
.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY |
NVME_QUIRK_NO_NS_DESC_LIST, },
@@ -3437,6 +3462,8 @@ static const struct pci_device_id nvme_id_table[] = {
.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY |
NVME_QUIRK_DISABLE_WRITE_ZEROES|
NVME_QUIRK_IGNORE_DEV_SUBNQN, },
+ { PCI_DEVICE(0x1987, 0x5012), /* Phison E12 */
+ .driver_data = NVME_QUIRK_BOGUS_NID, },
{ PCI_DEVICE(0x1987, 0x5016), /* Phison E16 */
.driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, },
{ PCI_DEVICE(0x1b4b, 0x1092), /* Lexar 256 GB SSD */
@@ -3449,10 +3476,20 @@ static const struct pci_device_id nvme_id_table[] = {
NVME_QUIRK_IGNORE_DEV_SUBNQN, },
{ PCI_DEVICE(0x1c5c, 0x1504), /* SK Hynix PC400 */
.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
+ { PCI_DEVICE(0x1c5c, 0x174a), /* SK Hynix P31 SSD */
+ .driver_data = NVME_QUIRK_BOGUS_NID, },
{ PCI_DEVICE(0x15b7, 0x2001), /* Sandisk Skyhawk */
.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
{ PCI_DEVICE(0x1d97, 0x2263), /* SPCC */
.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
+ { PCI_DEVICE(0x144d, 0xa80b), /* Samsung PM9B1 256G and 512G */
+ .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
+ { PCI_DEVICE(0x144d, 0xa809), /* Samsung MZALQ256HBJD 256G */
+ .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
+ { PCI_DEVICE(0x1cc4, 0x6303), /* UMIS RPJTJ512MGE1QDY 512G */
+ .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
+ { PCI_DEVICE(0x1cc4, 0x6302), /* UMIS RPJTJ256MGE1QDY 256G */
+ .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
{ PCI_DEVICE(0x2646, 0x2262), /* KINGSTON SKC2000 NVMe SSD */
.driver_data = NVME_QUIRK_NO_DEEPEST_PS, },
{ PCI_DEVICE(0x2646, 0x2263), /* KINGSTON A2000 NVMe SSD */
@@ -3463,6 +3500,10 @@ static const struct pci_device_id nvme_id_table[] = {
.driver_data = NVME_QUIRK_BOGUS_NID, },
{ PCI_DEVICE(0x1e4B, 0x1202), /* MAXIO MAP1202 */
.driver_data = NVME_QUIRK_BOGUS_NID, },
+ { PCI_DEVICE(0x1cc1, 0x5350), /* ADATA XPG GAMMIX S50 */
+ .driver_data = NVME_QUIRK_BOGUS_NID, },
+ { PCI_DEVICE(0x1e49, 0x0041), /* ZHITAI TiPro7000 NVMe SSD */
+ .driver_data = NVME_QUIRK_NO_DEEPEST_PS, },
{ PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0061),
.driver_data = NVME_QUIRK_DMA_ADDRESS_BITS_48, },
{ PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0065),
diff --git a/drivers/staging/olpc_dcon/Kconfig b/drivers/staging/olpc_dcon/Kconfig
index d1a0dea09ef0..d0ba34cc32f7 100644
--- a/drivers/staging/olpc_dcon/Kconfig
+++ b/drivers/staging/olpc_dcon/Kconfig
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
config FB_OLPC_DCON
tristate "One Laptop Per Child Display CONtroller support"
- depends on OLPC && FB
+ depends on OLPC && FB && BROKEN
depends on I2C
depends on GPIO_CS5535 && ACPI
select BACKLIGHT_CLASS_DEVICE
diff --git a/drivers/staging/r8188eu/core/rtw_xmit.c b/drivers/staging/r8188eu/core/rtw_xmit.c
index 3d8e9dea7651..7135d89caac1 100644
--- a/drivers/staging/r8188eu/core/rtw_xmit.c
+++ b/drivers/staging/r8188eu/core/rtw_xmit.c
@@ -178,8 +178,7 @@ s32 _rtw_init_xmit_priv(struct xmit_priv *pxmitpriv, struct adapter *padapter)
pxmitpriv->free_xmit_extbuf_cnt = num_xmit_extbuf;
- res = rtw_alloc_hwxmits(padapter);
- if (res) {
+ if (rtw_alloc_hwxmits(padapter)) {
res = _FAIL;
goto exit;
}
@@ -1483,19 +1482,10 @@ int rtw_alloc_hwxmits(struct adapter *padapter)
hwxmits = pxmitpriv->hwxmits;
- if (pxmitpriv->hwxmit_entry == 5) {
- hwxmits[0] .sta_queue = &pxmitpriv->bm_pending;
- hwxmits[1] .sta_queue = &pxmitpriv->vo_pending;
- hwxmits[2] .sta_queue = &pxmitpriv->vi_pending;
- hwxmits[3] .sta_queue = &pxmitpriv->bk_pending;
- hwxmits[4] .sta_queue = &pxmitpriv->be_pending;
- } else if (pxmitpriv->hwxmit_entry == 4) {
- hwxmits[0] .sta_queue = &pxmitpriv->vo_pending;
- hwxmits[1] .sta_queue = &pxmitpriv->vi_pending;
- hwxmits[2] .sta_queue = &pxmitpriv->be_pending;
- hwxmits[3] .sta_queue = &pxmitpriv->bk_pending;
- } else {
- }
+ hwxmits[0].sta_queue = &pxmitpriv->vo_pending;
+ hwxmits[1].sta_queue = &pxmitpriv->vi_pending;
+ hwxmits[2].sta_queue = &pxmitpriv->be_pending;
+ hwxmits[3].sta_queue = &pxmitpriv->bk_pending;
return 0;
}
diff --git a/drivers/staging/r8188eu/os_dep/ioctl_linux.c b/drivers/staging/r8188eu/os_dep/ioctl_linux.c
index 1b09462ca908..8dd280e2739a 100644
--- a/drivers/staging/r8188eu/os_dep/ioctl_linux.c
+++ b/drivers/staging/r8188eu/os_dep/ioctl_linux.c
@@ -403,7 +403,7 @@ static int wpa_set_encryption(struct net_device *dev, struct ieee_param *param,
if (wep_key_len > 0) {
wep_key_len = wep_key_len <= 5 ? 5 : 13;
- wep_total_len = wep_key_len + FIELD_OFFSET(struct ndis_802_11_wep, KeyMaterial);
+ wep_total_len = wep_key_len + sizeof(*pwep);
pwep = kzalloc(wep_total_len, GFP_KERNEL);
if (!pwep)
goto exit;
diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c b/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c
index ece97e37ac91..30374a820496 100644
--- a/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c
+++ b/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c
@@ -90,7 +90,8 @@ static int wpa_set_encryption(struct net_device *dev, struct ieee_param *param,
if (wep_key_len > 0) {
wep_key_len = wep_key_len <= 5 ? 5 : 13;
wep_total_len = wep_key_len + FIELD_OFFSET(struct ndis_802_11_wep, key_material);
- pwep = kzalloc(wep_total_len, GFP_KERNEL);
+ /* Allocate a full structure to avoid potentially running off the end. */
+ pwep = kzalloc(sizeof(*pwep), GFP_KERNEL);
if (!pwep) {
ret = -ENOMEM;
goto exit;
@@ -582,7 +583,8 @@ static int rtw_set_encryption(struct net_device *dev, struct ieee_param *param,
if (wep_key_len > 0) {
wep_key_len = wep_key_len <= 5 ? 5 : 13;
wep_total_len = wep_key_len + FIELD_OFFSET(struct ndis_802_11_wep, key_material);
- pwep = kzalloc(wep_total_len, GFP_KERNEL);
+ /* Allocate a full structure to avoid potentially running off the end. */
+ pwep = kzalloc(sizeof(*pwep), GFP_KERNEL);
if (!pwep)
goto exit;
diff --git a/drivers/tty/goldfish.c b/drivers/tty/goldfish.c
index c7968aecd870..d02de3f0326f 100644
--- a/drivers/tty/goldfish.c
+++ b/drivers/tty/goldfish.c
@@ -426,7 +426,7 @@ static int goldfish_tty_remove(struct platform_device *pdev)
tty_unregister_device(goldfish_tty_driver, qtty->console.index);
iounmap(qtty->base);
qtty->base = NULL;
- free_irq(qtty->irq, pdev);
+ free_irq(qtty->irq, qtty);
tty_port_destroy(&qtty->port);
goldfish_tty_current_line_count--;
if (goldfish_tty_current_line_count == 0)
diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c
index 137eebdcfda9..fd4d24f61c46 100644
--- a/drivers/tty/n_gsm.c
+++ b/drivers/tty/n_gsm.c
@@ -455,7 +455,7 @@ static void gsm_hex_dump_bytes(const char *fname, const u8 *data,
return;
}
- prefix = kasprintf(GFP_KERNEL, "%s: ", fname);
+ prefix = kasprintf(GFP_ATOMIC, "%s: ", fname);
if (!prefix)
return;
print_hex_dump(KERN_INFO, prefix, DUMP_PREFIX_OFFSET, 16, 1, data, len,
diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
index 78b6dedc43e6..8f32fe9e149e 100644
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -1517,6 +1517,8 @@ static inline void __stop_tx(struct uart_8250_port *p)
unsigned char lsr = serial_in(p, UART_LSR);
u64 stop_delay = 0;
+ p->lsr_saved_flags |= lsr & LSR_SAVE_FLAGS;
+
if (!(lsr & UART_LSR_THRE))
return;
/*
diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c
index 4733a233bd0c..f8f950641ad9 100644
--- a/drivers/tty/serial/qcom_geni_serial.c
+++ b/drivers/tty/serial/qcom_geni_serial.c
@@ -1306,6 +1306,7 @@ static const struct uart_ops qcom_geni_console_pops = {
.stop_tx = qcom_geni_serial_stop_tx,
.start_tx = qcom_geni_serial_start_tx,
.stop_rx = qcom_geni_serial_stop_rx,
+ .start_rx = qcom_geni_serial_start_rx,
.set_termios = qcom_geni_serial_set_termios,
.startup = qcom_geni_serial_startup,
.request_port = qcom_geni_serial_request_port,
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index 9a85b41caa0a..338ebadfd44b 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -2214,11 +2214,12 @@ int uart_suspend_port(struct uart_driver *drv, struct uart_port *uport)
/*
* Nothing to do if the console is not suspending
* except stop_rx to prevent any asynchronous data
- * over RX line. Re-start_rx, when required, is
- * done by set_termios in resume sequence
+ * over RX line. However ensure that we will be
+ * able to Re-start_rx later.
*/
if (!console_suspend_enabled && uart_console(uport)) {
- uport->ops->stop_rx(uport);
+ if (uport->ops->start_rx)
+ uport->ops->stop_rx(uport);
goto unlock;
}
@@ -2310,6 +2311,8 @@ int uart_resume_port(struct uart_driver *drv, struct uart_port *uport)
if (console_suspend_enabled)
uart_change_pm(state, UART_PM_STATE_ON);
uport->ops->set_termios(uport, &termios, NULL);
+ if (!console_suspend_enabled && uport->ops->start_rx)
+ uport->ops->start_rx(uport);
if (console_suspend_enabled)
console_start(uport->cons);
}
diff --git a/drivers/usb/cdns3/cdnsp-ring.c b/drivers/usb/cdns3/cdnsp-ring.c
index e45c3d6e1536..794e413800ae 100644
--- a/drivers/usb/cdns3/cdnsp-ring.c
+++ b/drivers/usb/cdns3/cdnsp-ring.c
@@ -1941,13 +1941,16 @@ int cdnsp_queue_bulk_tx(struct cdnsp_device *pdev, struct cdnsp_request *preq)
}
if (enqd_len + trb_buff_len >= full_len) {
- if (need_zero_pkt)
- zero_len_trb = !zero_len_trb;
-
- field &= ~TRB_CHAIN;
- field |= TRB_IOC;
- more_trbs_coming = false;
- preq->td.last_trb = ring->enqueue;
+ if (need_zero_pkt && !zero_len_trb) {
+ zero_len_trb = true;
+ } else {
+ zero_len_trb = false;
+ field &= ~TRB_CHAIN;
+ field |= TRB_IOC;
+ more_trbs_coming = false;
+ need_zero_pkt = false;
+ preq->td.last_trb = ring->enqueue;
+ }
}
/* Only set interrupt on short packet for OUT endpoints. */
@@ -1962,7 +1965,7 @@ int cdnsp_queue_bulk_tx(struct cdnsp_device *pdev, struct cdnsp_request *preq)
length_field = TRB_LEN(trb_buff_len) | TRB_TD_SIZE(remainder) |
TRB_INTR_TARGET(0);
- cdnsp_queue_trb(pdev, ring, more_trbs_coming | zero_len_trb,
+ cdnsp_queue_trb(pdev, ring, more_trbs_coming,
lower_32_bits(send_addr),
upper_32_bits(send_addr),
length_field,
diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c
index f63a27d11fac..3f107a06817d 100644
--- a/drivers/usb/dwc2/hcd.c
+++ b/drivers/usb/dwc2/hcd.c
@@ -5190,7 +5190,7 @@ int dwc2_hcd_init(struct dwc2_hsotg *hsotg)
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
if (!res) {
retval = -EINVAL;
- goto error1;
+ goto error2;
}
hcd->rsrc_start = res->start;
hcd->rsrc_len = resource_size(res);
diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index e027c0420dc3..573421984948 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -1644,13 +1644,8 @@ static struct extcon_dev *dwc3_get_extcon(struct dwc3 *dwc)
* This device property is for kernel internal use only and
* is expected to be set by the glue code.
*/
- if (device_property_read_string(dev, "linux,extcon-name", &name) == 0) {
- edev = extcon_get_extcon_dev(name);
- if (!edev)
- return ERR_PTR(-EPROBE_DEFER);
-
- return edev;
- }
+ if (device_property_read_string(dev, "linux,extcon-name", &name) == 0)
+ return extcon_get_extcon_dev(name);
/*
* Try to get an extcon device from the USB PHY controller's "port"
diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c
index ba51de7dd760..6b018048fe2e 100644
--- a/drivers/usb/dwc3/dwc3-pci.c
+++ b/drivers/usb/dwc3/dwc3-pci.c
@@ -127,6 +127,7 @@ static const struct property_entry dwc3_pci_intel_phy_charger_detect_properties[
PROPERTY_ENTRY_STRING("dr_mode", "peripheral"),
PROPERTY_ENTRY_BOOL("snps,dis_u2_susphy_quirk"),
PROPERTY_ENTRY_BOOL("linux,phy_charger_detect"),
+ PROPERTY_ENTRY_BOOL("linux,sysdev_is_parent"),
{}
};
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 00427d108ab9..8716bece1072 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -2976,6 +2976,7 @@ static int dwc3_gadget_init_in_endpoint(struct dwc3_ep *dep)
struct dwc3 *dwc = dep->dwc;
u32 mdwidth;
int size;
+ int maxpacket;
mdwidth = dwc3_mdwidth(dwc);
@@ -2988,21 +2989,24 @@ static int dwc3_gadget_init_in_endpoint(struct dwc3_ep *dep)
else
size = DWC31_GTXFIFOSIZ_TXFDEP(size);
- /* FIFO Depth is in MDWDITH bytes. Multiply */
- size *= mdwidth;
-
/*
- * To meet performance requirement, a minimum TxFIFO size of 3x
- * MaxPacketSize is recommended for endpoints that support burst and a
- * minimum TxFIFO size of 2x MaxPacketSize for endpoints that don't
- * support burst. Use those numbers and we can calculate the max packet
- * limit as below.
+ * maxpacket size is determined as part of the following, after assuming
+ * a mult value of one maxpacket:
+ * DWC3 revision 280A and prior:
+ * fifo_size = mult * (max_packet / mdwidth) + 1;
+ * maxpacket = mdwidth * (fifo_size - 1);
+ *
+ * DWC3 revision 290A and onwards:
+ * fifo_size = mult * ((max_packet + mdwidth)/mdwidth + 1) + 1
+ * maxpacket = mdwidth * ((fifo_size - 1) - 1) - mdwidth;
*/
- if (dwc->maximum_speed >= USB_SPEED_SUPER)
- size /= 3;
+ if (DWC3_VER_IS_PRIOR(DWC3, 290A))
+ maxpacket = mdwidth * (size - 1);
else
- size /= 2;
+ maxpacket = mdwidth * ((size - 1) - 1) - mdwidth;
+ /* Functionally, space for one max packet is sufficient */
+ size = min_t(int, maxpacket, 1024);
usb_ep_set_maxpacket_limit(&dep->endpoint, size);
dep->endpoint.max_streams = 16;
diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index 4585ee3a444a..e0fa4b186ec6 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -122,8 +122,6 @@ struct ffs_ep {
struct usb_endpoint_descriptor *descs[3];
u8 num;
-
- int status; /* P: epfile->mutex */
};
struct ffs_epfile {
@@ -227,6 +225,9 @@ struct ffs_io_data {
bool use_sg;
struct ffs_data *ffs;
+
+ int status;
+ struct completion done;
};
struct ffs_desc_helper {
@@ -707,12 +708,15 @@ static const struct file_operations ffs_ep0_operations = {
static void ffs_epfile_io_complete(struct usb_ep *_ep, struct usb_request *req)
{
+ struct ffs_io_data *io_data = req->context;
+
ENTER();
- if (req->context) {
- struct ffs_ep *ep = _ep->driver_data;
- ep->status = req->status ? req->status : req->actual;
- complete(req->context);
- }
+ if (req->status)
+ io_data->status = req->status;
+ else
+ io_data->status = req->actual;
+
+ complete(&io_data->done);
}
static ssize_t ffs_copy_to_iter(void *data, int data_len, struct iov_iter *iter)
@@ -1050,7 +1054,6 @@ static ssize_t ffs_epfile_io(struct file *file, struct ffs_io_data *io_data)
WARN(1, "%s: data_len == -EINVAL\n", __func__);
ret = -EINVAL;
} else if (!io_data->aio) {
- DECLARE_COMPLETION_ONSTACK(done);
bool interrupted = false;
req = ep->req;
@@ -1066,7 +1069,8 @@ static ssize_t ffs_epfile_io(struct file *file, struct ffs_io_data *io_data)
io_data->buf = data;
- req->context = &done;
+ init_completion(&io_data->done);
+ req->context = io_data;
req->complete = ffs_epfile_io_complete;
ret = usb_ep_queue(ep->ep, req, GFP_ATOMIC);
@@ -1075,7 +1079,12 @@ static ssize_t ffs_epfile_io(struct file *file, struct ffs_io_data *io_data)
spin_unlock_irq(&epfile->ffs->eps_lock);
- if (wait_for_completion_interruptible(&done)) {
+ if (wait_for_completion_interruptible(&io_data->done)) {
+ spin_lock_irq(&epfile->ffs->eps_lock);
+ if (epfile->ep != ep) {
+ ret = -ESHUTDOWN;
+ goto error_lock;
+ }
/*
* To avoid race condition with ffs_epfile_io_complete,
* dequeue the request first then check
@@ -1083,17 +1092,18 @@ static ssize_t ffs_epfile_io(struct file *file, struct ffs_io_data *io_data)
* condition with req->complete callback.
*/
usb_ep_dequeue(ep->ep, req);
- wait_for_completion(&done);
- interrupted = ep->status < 0;
+ spin_unlock_irq(&epfile->ffs->eps_lock);
+ wait_for_completion(&io_data->done);
+ interrupted = io_data->status < 0;
}
if (interrupted)
ret = -EINTR;
- else if (io_data->read && ep->status > 0)
- ret = __ffs_epfile_read_data(epfile, data, ep->status,
+ else if (io_data->read && io_data->status > 0)
+ ret = __ffs_epfile_read_data(epfile, data, io_data->status,
&io_data->data);
else
- ret = ep->status;
+ ret = io_data->status;
goto error_mutex;
} else if (!(req = usb_ep_alloc_request(ep->ep, GFP_ATOMIC))) {
ret = -ENOMEM;
diff --git a/drivers/usb/gadget/function/u_ether.c b/drivers/usb/gadget/function/u_ether.c
index 6f5d45ef2e39..f51694f29de9 100644
--- a/drivers/usb/gadget/function/u_ether.c
+++ b/drivers/usb/gadget/function/u_ether.c
@@ -775,9 +775,13 @@ struct eth_dev *gether_setup_name(struct usb_gadget *g,
dev->qmult = qmult;
snprintf(net->name, sizeof(net->name), "%s%%d", netname);
- if (get_ether_addr(dev_addr, addr))
+ if (get_ether_addr(dev_addr, addr)) {
+ net->addr_assign_type = NET_ADDR_RANDOM;
dev_warn(&g->dev,
"using random %s ethernet address\n", "self");
+ } else {
+ net->addr_assign_type = NET_ADDR_SET;
+ }
eth_hw_addr_set(net, addr);
if (get_ether_addr(host_addr, dev->host_mac))
dev_warn(&g->dev,
@@ -844,6 +848,10 @@ struct net_device *gether_setup_name_default(const char *netname)
eth_random_addr(dev->dev_mac);
pr_warn("using random %s ethernet address\n", "self");
+
+ /* by default we always have a random MAC address */
+ net->addr_assign_type = NET_ADDR_RANDOM;
+
eth_random_addr(dev->host_mac);
pr_warn("using random %s ethernet address\n", "host");
@@ -871,7 +879,6 @@ int gether_register_netdev(struct net_device *net)
dev = netdev_priv(net);
g = dev->gadget;
- net->addr_assign_type = NET_ADDR_RANDOM;
eth_hw_addr_set(net, dev->dev_mac);
status = register_netdev(net);
@@ -912,6 +919,7 @@ int gether_set_dev_addr(struct net_device *net, const char *dev_addr)
if (get_ether_addr(dev_addr, new_addr))
return -EINVAL;
memcpy(dev->dev_mac, new_addr, ETH_ALEN);
+ net->addr_assign_type = NET_ADDR_SET;
return 0;
}
EXPORT_SYMBOL_GPL(gether_set_dev_addr);
diff --git a/drivers/usb/gadget/udc/lpc32xx_udc.c b/drivers/usb/gadget/udc/lpc32xx_udc.c
index 6117ae8e7242..cea10cdb83ae 100644
--- a/drivers/usb/gadget/udc/lpc32xx_udc.c
+++ b/drivers/usb/gadget/udc/lpc32xx_udc.c
@@ -3016,6 +3016,7 @@ static int lpc32xx_udc_probe(struct platform_device *pdev)
}
udc->isp1301_i2c_client = isp1301_get_client(isp1301_node);
+ of_node_put(isp1301_node);
if (!udc->isp1301_i2c_client) {
return -EPROBE_DEFER;
}
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index f0ab63138016..9ac56e9ffc64 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -1107,7 +1107,6 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated)
{
u32 command, temp = 0;
struct usb_hcd *hcd = xhci_to_hcd(xhci);
- struct usb_hcd *secondary_hcd;
int retval = 0;
bool comp_timer_running = false;
bool pending_portevent = false;
@@ -1214,23 +1213,19 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated)
* first with the primary HCD, and then with the secondary HCD.
* If we don't do the same, the host will never be started.
*/
- if (!usb_hcd_is_primary_hcd(hcd))
- secondary_hcd = hcd;
- else
- secondary_hcd = xhci->shared_hcd;
-
xhci_dbg(xhci, "Initialize the xhci_hcd\n");
- retval = xhci_init(hcd->primary_hcd);
+ retval = xhci_init(hcd);
if (retval)
return retval;
comp_timer_running = true;
xhci_dbg(xhci, "Start the primary HCD\n");
- retval = xhci_run(hcd->primary_hcd);
- if (!retval && secondary_hcd) {
+ retval = xhci_run(hcd);
+ if (!retval && xhci->shared_hcd) {
xhci_dbg(xhci, "Start the secondary HCD\n");
- retval = xhci_run(secondary_hcd);
+ retval = xhci_run(xhci->shared_hcd);
}
+
hcd->state = HC_STATE_SUSPENDED;
if (xhci->shared_hcd)
xhci->shared_hcd->state = HC_STATE_SUSPENDED;
diff --git a/drivers/usb/serial/io_ti.c b/drivers/usb/serial/io_ti.c
index a7b3c15957ba..feba2a8d1233 100644
--- a/drivers/usb/serial/io_ti.c
+++ b/drivers/usb/serial/io_ti.c
@@ -166,6 +166,7 @@ static const struct usb_device_id edgeport_2port_id_table[] = {
{ USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_TI_EDGEPORT_8S) },
{ USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_TI_EDGEPORT_416) },
{ USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_TI_EDGEPORT_416B) },
+ { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_E5805A) },
{ }
};
@@ -204,6 +205,7 @@ static const struct usb_device_id id_table_combined[] = {
{ USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_TI_EDGEPORT_8S) },
{ USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_TI_EDGEPORT_416) },
{ USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_TI_EDGEPORT_416B) },
+ { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_E5805A) },
{ }
};
diff --git a/drivers/usb/serial/io_usbvend.h b/drivers/usb/serial/io_usbvend.h
index 52cbc353051f..9a6f742ad3ab 100644
--- a/drivers/usb/serial/io_usbvend.h
+++ b/drivers/usb/serial/io_usbvend.h
@@ -212,6 +212,7 @@
//
// Definitions for other product IDs
#define ION_DEVICE_ID_MT4X56USB 0x1403 // OEM device
+#define ION_DEVICE_ID_E5805A 0x1A01 // OEM device (rebranded Edgeport/4)
#define GENERATION_ID_FROM_USB_PRODUCT_ID(ProductId) \
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index e60425bbf537..ed1e50d83cca 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -432,6 +432,8 @@ static void option_instat_callback(struct urb *urb);
#define CINTERION_PRODUCT_CLS8 0x00b0
#define CINTERION_PRODUCT_MV31_MBIM 0x00b3
#define CINTERION_PRODUCT_MV31_RMNET 0x00b7
+#define CINTERION_PRODUCT_MV31_2_MBIM 0x00b8
+#define CINTERION_PRODUCT_MV31_2_RMNET 0x00b9
#define CINTERION_PRODUCT_MV32_WA 0x00f1
#define CINTERION_PRODUCT_MV32_WB 0x00f2
@@ -1979,6 +1981,10 @@ static const struct usb_device_id option_ids[] = {
.driver_info = RSVD(3)},
{ USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_MV31_RMNET, 0xff),
.driver_info = RSVD(0)},
+ { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_MV31_2_MBIM, 0xff),
+ .driver_info = RSVD(3)},
+ { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_MV31_2_RMNET, 0xff),
+ .driver_info = RSVD(0)},
{ USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_MV32_WA, 0xff),
.driver_info = RSVD(3)},
{ USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_MV32_WB, 0xff),
diff --git a/drivers/watchdog/gxp-wdt.c b/drivers/watchdog/gxp-wdt.c
index b0b2d7a6fdde..2fd85be88278 100644
--- a/drivers/watchdog/gxp-wdt.c
+++ b/drivers/watchdog/gxp-wdt.c
@@ -172,3 +172,4 @@ module_platform_driver(gxp_wdt_driver);
MODULE_AUTHOR("Nick Hawkins <nick.hawkins@hpe.com>");
MODULE_AUTHOR("Jean-Marie Verdun <verdun@hpe.com>");
MODULE_DESCRIPTION("Driver for GXP watchdog timer");
+MODULE_LICENSE("GPL");
diff --git a/fs/attr.c b/fs/attr.c
index 66899b6e9bd8..dbe996b0dedf 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -61,9 +61,15 @@ static bool chgrp_ok(struct user_namespace *mnt_userns,
const struct inode *inode, kgid_t gid)
{
kgid_t kgid = i_gid_into_mnt(mnt_userns, inode);
- if (uid_eq(current_fsuid(), i_uid_into_mnt(mnt_userns, inode)) &&
- (in_group_p(gid) || gid_eq(gid, inode->i_gid)))
- return true;
+ if (uid_eq(current_fsuid(), i_uid_into_mnt(mnt_userns, inode))) {
+ kgid_t mapped_gid;
+
+ if (gid_eq(gid, inode->i_gid))
+ return true;
+ mapped_gid = mapped_kgid_fs(mnt_userns, i_user_ns(inode), gid);
+ if (in_group_p(mapped_gid))
+ return true;
+ }
if (capable_wrt_inode_uidgid(mnt_userns, inode, CAP_CHOWN))
return true;
if (gid_eq(kgid, INVALID_GID) &&
@@ -123,12 +129,20 @@ int setattr_prepare(struct user_namespace *mnt_userns, struct dentry *dentry,
/* Make sure a caller can chmod. */
if (ia_valid & ATTR_MODE) {
+ kgid_t mapped_gid;
+
if (!inode_owner_or_capable(mnt_userns, inode))
return -EPERM;
+
+ if (ia_valid & ATTR_GID)
+ mapped_gid = mapped_kgid_fs(mnt_userns,
+ i_user_ns(inode), attr->ia_gid);
+ else
+ mapped_gid = i_gid_into_mnt(mnt_userns, inode);
+
/* Also check the setgid bit! */
- if (!in_group_p((ia_valid & ATTR_GID) ? attr->ia_gid :
- i_gid_into_mnt(mnt_userns, inode)) &&
- !capable_wrt_inode_uidgid(mnt_userns, inode, CAP_FSETID))
+ if (!in_group_p(mapped_gid) &&
+ !capable_wrt_inode_uidgid(mnt_userns, inode, CAP_FSETID))
attr->ia_mode &= ~S_ISGID;
}
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 2c2f179b6977..43de293cef56 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -672,17 +672,14 @@ int ext2_empty_dir (struct inode * inode)
void *page_addr = NULL;
struct page *page = NULL;
unsigned long i, npages = dir_pages(inode);
- int dir_has_error = 0;
for (i = 0; i < npages; i++) {
char *kaddr;
ext2_dirent * de;
- page = ext2_get_page(inode, i, dir_has_error, &page_addr);
+ page = ext2_get_page(inode, i, 0, &page_addr);
- if (IS_ERR(page)) {
- dir_has_error = 1;
- continue;
- }
+ if (IS_ERR(page))
+ goto not_empty;
kaddr = page_addr;
de = (ext2_dirent *)kaddr;
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 3aab4182fd89..d3ee4fc532fa 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -298,8 +298,8 @@ struct io_buffer_list {
/* below is for ring provided buffers */
__u16 buf_nr_pages;
__u16 nr_entries;
- __u32 head;
- __u32 mask;
+ __u16 head;
+ __u16 mask;
};
struct io_buffer {
@@ -576,7 +576,6 @@ struct io_close {
struct file *file;
int fd;
u32 file_slot;
- u32 flags;
};
struct io_timeout_data {
@@ -784,12 +783,6 @@ struct io_msg {
u32 len;
};
-struct io_nop {
- struct file *file;
- u64 extra1;
- u64 extra2;
-};
-
struct io_async_connect {
struct sockaddr_storage address;
};
@@ -851,6 +844,7 @@ enum {
REQ_F_SINGLE_POLL_BIT,
REQ_F_DOUBLE_POLL_BIT,
REQ_F_PARTIAL_IO_BIT,
+ REQ_F_CQE32_INIT_BIT,
REQ_F_APOLL_MULTISHOT_BIT,
/* keep async read/write and isreg together and in order */
REQ_F_SUPPORT_NOWAIT_BIT,
@@ -920,6 +914,8 @@ enum {
REQ_F_PARTIAL_IO = BIT(REQ_F_PARTIAL_IO_BIT),
/* fast poll multishot mode */
REQ_F_APOLL_MULTISHOT = BIT(REQ_F_APOLL_MULTISHOT_BIT),
+ /* ->extra1 and ->extra2 are initialised */
+ REQ_F_CQE32_INIT = BIT(REQ_F_CQE32_INIT_BIT),
};
struct async_poll {
@@ -994,7 +990,6 @@ struct io_kiocb {
struct io_msg msg;
struct io_xattr xattr;
struct io_socket sock;
- struct io_nop nop;
struct io_uring_cmd uring_cmd;
};
@@ -1121,7 +1116,6 @@ static const struct io_op_def io_op_defs[] = {
[IORING_OP_NOP] = {
.audit_skip = 1,
.iopoll = 1,
- .buffer_select = 1,
},
[IORING_OP_READV] = {
.needs_file = 1,
@@ -1729,9 +1723,16 @@ static void io_kbuf_recycle(struct io_kiocb *req, unsigned issue_flags)
if (!(req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING)))
return;
- /* don't recycle if we already did IO to this buffer */
- if (req->flags & REQ_F_PARTIAL_IO)
+ /*
+ * For legacy provided buffer mode, don't recycle if we already did
+ * IO to this buffer. For ring-mapped provided buffer mode, we should
+ * increment ring->head to explicitly monopolize the buffer to avoid
+ * multiple use.
+ */
+ if ((req->flags & REQ_F_BUFFER_SELECTED) &&
+ (req->flags & REQ_F_PARTIAL_IO))
return;
+
/*
* We don't need to recycle for REQ_F_BUFFER_RING, we can just clear
* the flag and hence ensure that bl->head doesn't get incremented.
@@ -1739,8 +1740,13 @@ static void io_kbuf_recycle(struct io_kiocb *req, unsigned issue_flags)
*/
if (req->flags & REQ_F_BUFFER_RING) {
if (req->buf_list) {
- req->buf_index = req->buf_list->bgid;
- req->flags &= ~REQ_F_BUFFER_RING;
+ if (req->flags & REQ_F_PARTIAL_IO) {
+ req->buf_list->head++;
+ req->buf_list = NULL;
+ } else {
+ req->buf_index = req->buf_list->bgid;
+ req->flags &= ~REQ_F_BUFFER_RING;
+ }
}
return;
}
@@ -2441,94 +2447,66 @@ static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data,
return true;
}
-static inline bool __io_fill_cqe(struct io_ring_ctx *ctx, u64 user_data,
- s32 res, u32 cflags)
+static inline bool __io_fill_cqe_req(struct io_ring_ctx *ctx,
+ struct io_kiocb *req)
{
struct io_uring_cqe *cqe;
- /*
- * If we can't get a cq entry, userspace overflowed the
- * submission (by quite a lot). Increment the overflow count in
- * the ring.
- */
- cqe = io_get_cqe(ctx);
- if (likely(cqe)) {
- WRITE_ONCE(cqe->user_data, user_data);
- WRITE_ONCE(cqe->res, res);
- WRITE_ONCE(cqe->flags, cflags);
- return true;
- }
- return io_cqring_event_overflow(ctx, user_data, res, cflags, 0, 0);
-}
+ if (!(ctx->flags & IORING_SETUP_CQE32)) {
+ trace_io_uring_complete(req->ctx, req, req->cqe.user_data,
+ req->cqe.res, req->cqe.flags, 0, 0);
-static inline bool __io_fill_cqe_req_filled(struct io_ring_ctx *ctx,
- struct io_kiocb *req)
-{
- struct io_uring_cqe *cqe;
+ /*
+ * If we can't get a cq entry, userspace overflowed the
+ * submission (by quite a lot). Increment the overflow count in
+ * the ring.
+ */
+ cqe = io_get_cqe(ctx);
+ if (likely(cqe)) {
+ memcpy(cqe, &req->cqe, sizeof(*cqe));
+ return true;
+ }
- trace_io_uring_complete(req->ctx, req, req->cqe.user_data,
- req->cqe.res, req->cqe.flags, 0, 0);
+ return io_cqring_event_overflow(ctx, req->cqe.user_data,
+ req->cqe.res, req->cqe.flags,
+ 0, 0);
+ } else {
+ u64 extra1 = 0, extra2 = 0;
- /*
- * If we can't get a cq entry, userspace overflowed the
- * submission (by quite a lot). Increment the overflow count in
- * the ring.
- */
- cqe = io_get_cqe(ctx);
- if (likely(cqe)) {
- memcpy(cqe, &req->cqe, sizeof(*cqe));
- return true;
- }
- return io_cqring_event_overflow(ctx, req->cqe.user_data,
- req->cqe.res, req->cqe.flags, 0, 0);
-}
+ if (req->flags & REQ_F_CQE32_INIT) {
+ extra1 = req->extra1;
+ extra2 = req->extra2;
+ }
-static inline bool __io_fill_cqe32_req_filled(struct io_ring_ctx *ctx,
- struct io_kiocb *req)
-{
- struct io_uring_cqe *cqe;
- u64 extra1 = req->extra1;
- u64 extra2 = req->extra2;
+ trace_io_uring_complete(req->ctx, req, req->cqe.user_data,
+ req->cqe.res, req->cqe.flags, extra1, extra2);
- trace_io_uring_complete(req->ctx, req, req->cqe.user_data,
- req->cqe.res, req->cqe.flags, extra1, extra2);
+ /*
+ * If we can't get a cq entry, userspace overflowed the
+ * submission (by quite a lot). Increment the overflow count in
+ * the ring.
+ */
+ cqe = io_get_cqe(ctx);
+ if (likely(cqe)) {
+ memcpy(cqe, &req->cqe, sizeof(struct io_uring_cqe));
+ WRITE_ONCE(cqe->big_cqe[0], extra1);
+ WRITE_ONCE(cqe->big_cqe[1], extra2);
+ return true;
+ }
- /*
- * If we can't get a cq entry, userspace overflowed the
- * submission (by quite a lot). Increment the overflow count in
- * the ring.
- */
- cqe = io_get_cqe(ctx);
- if (likely(cqe)) {
- memcpy(cqe, &req->cqe, sizeof(struct io_uring_cqe));
- cqe->big_cqe[0] = extra1;
- cqe->big_cqe[1] = extra2;
- return true;
+ return io_cqring_event_overflow(ctx, req->cqe.user_data,
+ req->cqe.res, req->cqe.flags,
+ extra1, extra2);
}
-
- return io_cqring_event_overflow(ctx, req->cqe.user_data, req->cqe.res,
- req->cqe.flags, extra1, extra2);
-}
-
-static inline bool __io_fill_cqe_req(struct io_kiocb *req, s32 res, u32 cflags)
-{
- trace_io_uring_complete(req->ctx, req, req->cqe.user_data, res, cflags, 0, 0);
- return __io_fill_cqe(req->ctx, req->cqe.user_data, res, cflags);
}
-static inline void __io_fill_cqe32_req(struct io_kiocb *req, s32 res, u32 cflags,
- u64 extra1, u64 extra2)
+static noinline bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data,
+ s32 res, u32 cflags)
{
- struct io_ring_ctx *ctx = req->ctx;
struct io_uring_cqe *cqe;
- if (WARN_ON_ONCE(!(ctx->flags & IORING_SETUP_CQE32)))
- return;
- if (req->flags & REQ_F_CQE_SKIP)
- return;
-
- trace_io_uring_complete(ctx, req, req->cqe.user_data, res, cflags,
- extra1, extra2);
+ ctx->cq_extra++;
+ trace_io_uring_complete(ctx, NULL, user_data, res, cflags, 0, 0);
/*
* If we can't get a cq entry, userspace overflowed the
@@ -2537,23 +2515,17 @@ static inline void __io_fill_cqe32_req(struct io_kiocb *req, s32 res, u32 cflags
*/
cqe = io_get_cqe(ctx);
if (likely(cqe)) {
- WRITE_ONCE(cqe->user_data, req->cqe.user_data);
+ WRITE_ONCE(cqe->user_data, user_data);
WRITE_ONCE(cqe->res, res);
WRITE_ONCE(cqe->flags, cflags);
- WRITE_ONCE(cqe->big_cqe[0], extra1);
- WRITE_ONCE(cqe->big_cqe[1], extra2);
- return;
- }
- io_cqring_event_overflow(ctx, req->cqe.user_data, res, cflags, extra1, extra2);
-}
-
-static noinline bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data,
- s32 res, u32 cflags)
-{
- ctx->cq_extra++;
- trace_io_uring_complete(ctx, NULL, user_data, res, cflags, 0, 0);
- return __io_fill_cqe(ctx, user_data, res, cflags);
+ if (ctx->flags & IORING_SETUP_CQE32) {
+ WRITE_ONCE(cqe->big_cqe[0], 0);
+ WRITE_ONCE(cqe->big_cqe[1], 0);
+ }
+ return true;
+ }
+ return io_cqring_event_overflow(ctx, user_data, res, cflags, 0, 0);
}
static void __io_req_complete_put(struct io_kiocb *req)
@@ -2590,16 +2562,11 @@ static void __io_req_complete_put(struct io_kiocb *req)
static void __io_req_complete_post(struct io_kiocb *req, s32 res,
u32 cflags)
{
- if (!(req->flags & REQ_F_CQE_SKIP))
- __io_fill_cqe_req(req, res, cflags);
- __io_req_complete_put(req);
-}
-
-static void __io_req_complete_post32(struct io_kiocb *req, s32 res,
- u32 cflags, u64 extra1, u64 extra2)
-{
- if (!(req->flags & REQ_F_CQE_SKIP))
- __io_fill_cqe32_req(req, res, cflags, extra1, extra2);
+ if (!(req->flags & REQ_F_CQE_SKIP)) {
+ req->cqe.res = res;
+ req->cqe.flags = cflags;
+ __io_fill_cqe_req(req->ctx, req);
+ }
__io_req_complete_put(req);
}
@@ -2614,18 +2581,6 @@ static void io_req_complete_post(struct io_kiocb *req, s32 res, u32 cflags)
io_cqring_ev_posted(ctx);
}
-static void io_req_complete_post32(struct io_kiocb *req, s32 res,
- u32 cflags, u64 extra1, u64 extra2)
-{
- struct io_ring_ctx *ctx = req->ctx;
-
- spin_lock(&ctx->completion_lock);
- __io_req_complete_post32(req, res, cflags, extra1, extra2);
- io_commit_cqring(ctx);
- spin_unlock(&ctx->completion_lock);
- io_cqring_ev_posted(ctx);
-}
-
static inline void io_req_complete_state(struct io_kiocb *req, s32 res,
u32 cflags)
{
@@ -2643,19 +2598,6 @@ static inline void __io_req_complete(struct io_kiocb *req, unsigned issue_flags,
io_req_complete_post(req, res, cflags);
}
-static inline void __io_req_complete32(struct io_kiocb *req,
- unsigned int issue_flags, s32 res,
- u32 cflags, u64 extra1, u64 extra2)
-{
- if (issue_flags & IO_URING_F_COMPLETE_DEFER) {
- io_req_complete_state(req, res, cflags);
- req->extra1 = extra1;
- req->extra2 = extra2;
- } else {
- io_req_complete_post32(req, res, cflags, extra1, extra2);
- }
-}
-
static inline void io_req_complete(struct io_kiocb *req, s32 res)
{
if (res < 0)
@@ -3202,12 +3144,8 @@ static void __io_submit_flush_completions(struct io_ring_ctx *ctx)
struct io_kiocb *req = container_of(node, struct io_kiocb,
comp_list);
- if (!(req->flags & REQ_F_CQE_SKIP)) {
- if (!(ctx->flags & IORING_SETUP_CQE32))
- __io_fill_cqe_req_filled(ctx, req);
- else
- __io_fill_cqe32_req_filled(ctx, req);
- }
+ if (!(req->flags & REQ_F_CQE_SKIP))
+ __io_fill_cqe_req(ctx, req);
}
io_commit_cqring(ctx);
@@ -3326,7 +3264,9 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin)
nr_events++;
if (unlikely(req->flags & REQ_F_CQE_SKIP))
continue;
- __io_fill_cqe_req(req, req->cqe.res, io_put_kbuf(req, 0));
+
+ req->cqe.flags = io_put_kbuf(req, 0);
+ __io_fill_cqe_req(req->ctx, req);
}
if (unlikely(!nr_events))
@@ -3677,6 +3617,20 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe)
int ret;
kiocb->ki_pos = READ_ONCE(sqe->off);
+ /* used for fixed read/write too - just read unconditionally */
+ req->buf_index = READ_ONCE(sqe->buf_index);
+
+ if (req->opcode == IORING_OP_READ_FIXED ||
+ req->opcode == IORING_OP_WRITE_FIXED) {
+ struct io_ring_ctx *ctx = req->ctx;
+ u16 index;
+
+ if (unlikely(req->buf_index >= ctx->nr_user_bufs))
+ return -EFAULT;
+ index = array_index_nospec(req->buf_index, ctx->nr_user_bufs);
+ req->imu = ctx->user_bufs[index];
+ io_req_set_rsrc_node(req, ctx, 0);
+ }
ioprio = READ_ONCE(sqe->ioprio);
if (ioprio) {
@@ -3689,12 +3643,9 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe)
kiocb->ki_ioprio = get_current_ioprio();
}
- req->imu = NULL;
req->rw.addr = READ_ONCE(sqe->addr);
req->rw.len = READ_ONCE(sqe->len);
req->rw.flags = READ_ONCE(sqe->rw_flags);
- /* used for fixed read/write too - just read unconditionally */
- req->buf_index = READ_ONCE(sqe->buf_index);
return 0;
}
@@ -3826,20 +3777,9 @@ static int __io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter
static int io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter,
unsigned int issue_flags)
{
- struct io_mapped_ubuf *imu = req->imu;
- u16 index, buf_index = req->buf_index;
-
- if (likely(!imu)) {
- struct io_ring_ctx *ctx = req->ctx;
-
- if (unlikely(buf_index >= ctx->nr_user_bufs))
- return -EFAULT;
- io_req_set_rsrc_node(req, ctx, issue_flags);
- index = array_index_nospec(buf_index, ctx->nr_user_bufs);
- imu = READ_ONCE(ctx->user_bufs[index]);
- req->imu = imu;
- }
- return __io_import_fixed(req, rw, iter, imu);
+ if (WARN_ON_ONCE(!req->imu))
+ return -EFAULT;
+ return __io_import_fixed(req, rw, iter, req->imu);
}
static int io_buffer_add_list(struct io_ring_ctx *ctx,
@@ -3876,19 +3816,17 @@ static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len,
{
struct io_uring_buf_ring *br = bl->buf_ring;
struct io_uring_buf *buf;
- __u32 head = bl->head;
+ __u16 head = bl->head;
- if (unlikely(smp_load_acquire(&br->tail) == head)) {
- io_ring_submit_unlock(req->ctx, issue_flags);
+ if (unlikely(smp_load_acquire(&br->tail) == head))
return NULL;
- }
head &= bl->mask;
if (head < IO_BUFFER_LIST_BUF_PER_PAGE) {
buf = &br->bufs[head];
} else {
int off = head & (IO_BUFFER_LIST_BUF_PER_PAGE - 1);
- int index = head / IO_BUFFER_LIST_BUF_PER_PAGE - 1;
+ int index = head / IO_BUFFER_LIST_BUF_PER_PAGE;
buf = page_address(bl->buf_pages[index]);
buf += off;
}
@@ -3898,7 +3836,7 @@ static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len,
req->buf_list = bl;
req->buf_index = buf->bid;
- if (issue_flags & IO_URING_F_UNLOCKED) {
+ if (issue_flags & IO_URING_F_UNLOCKED || !file_can_poll(req->file)) {
/*
* If we came in unlocked, we have no choice but to consume the
* buffer here. This does mean it'll be pinned until the IO
@@ -5079,10 +5017,18 @@ void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
req->uring_cmd.task_work_cb = task_work_cb;
req->io_task_work.func = io_uring_cmd_work;
- io_req_task_prio_work_add(req);
+ io_req_task_work_add(req);
}
EXPORT_SYMBOL_GPL(io_uring_cmd_complete_in_task);
+static inline void io_req_set_cqe32_extra(struct io_kiocb *req,
+ u64 extra1, u64 extra2)
+{
+ req->extra1 = extra1;
+ req->extra2 = extra2;
+ req->flags |= REQ_F_CQE32_INIT;
+}
+
/*
* Called by consumers of io_uring_cmd, if they originally returned
* -EIOCBQUEUED upon receiving the command.
@@ -5093,10 +5039,10 @@ void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, ssize_t res2)
if (ret < 0)
req_set_fail(req);
+
if (req->ctx->flags & IORING_SETUP_CQE32)
- __io_req_complete32(req, 0, ret, 0, res2, 0);
- else
- io_req_complete(req, ret);
+ io_req_set_cqe32_extra(req, res2, 0);
+ io_req_complete(req, ret);
}
EXPORT_SYMBOL_GPL(io_uring_cmd_done);
@@ -5258,14 +5204,6 @@ done:
static int io_nop_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
- /*
- * If the ring is setup with CQE32, relay back addr/addr
- */
- if (req->ctx->flags & IORING_SETUP_CQE32) {
- req->nop.extra1 = READ_ONCE(sqe->addr);
- req->nop.extra2 = READ_ONCE(sqe->addr2);
- }
-
return 0;
}
@@ -5274,23 +5212,7 @@ static int io_nop_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
*/
static int io_nop(struct io_kiocb *req, unsigned int issue_flags)
{
- unsigned int cflags;
- void __user *buf;
-
- if (req->flags & REQ_F_BUFFER_SELECT) {
- size_t len = 1;
-
- buf = io_buffer_select(req, &len, issue_flags);
- if (!buf)
- return -ENOBUFS;
- }
-
- cflags = io_put_kbuf(req, issue_flags);
- if (!(req->ctx->flags & IORING_SETUP_CQE32))
- __io_req_complete(req, issue_flags, 0, cflags);
- else
- __io_req_complete32(req, issue_flags, 0, cflags,
- req->nop.extra1, req->nop.extra2);
+ __io_req_complete(req, issue_flags, 0, 0);
return 0;
}
@@ -5988,18 +5910,14 @@ static int io_statx(struct io_kiocb *req, unsigned int issue_flags)
static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
- if (sqe->off || sqe->addr || sqe->len || sqe->buf_index)
+ if (sqe->off || sqe->addr || sqe->len || sqe->rw_flags || sqe->buf_index)
return -EINVAL;
if (req->flags & REQ_F_FIXED_FILE)
return -EBADF;
req->close.fd = READ_ONCE(sqe->fd);
req->close.file_slot = READ_ONCE(sqe->file_index);
- req->close.flags = READ_ONCE(sqe->close_flags);
- if (req->close.flags & ~IORING_CLOSE_FD_AND_FILE_SLOT)
- return -EINVAL;
- if (!(req->close.flags & IORING_CLOSE_FD_AND_FILE_SLOT) &&
- req->close.file_slot && req->close.fd)
+ if (req->close.file_slot && req->close.fd)
return -EINVAL;
return 0;
@@ -6015,8 +5933,7 @@ static int io_close(struct io_kiocb *req, unsigned int issue_flags)
if (req->close.file_slot) {
ret = io_close_fixed(req, issue_flags);
- if (ret || !(req->close.flags & IORING_CLOSE_FD_AND_FILE_SLOT))
- goto err;
+ goto err;
}
spin_lock(&files->file_lock);
@@ -8063,8 +7980,8 @@ static int io_files_update_with_index_alloc(struct io_kiocb *req,
if (ret < 0)
break;
if (copy_to_user(&fds[done], &ret, sizeof(ret))) {
- ret = -EFAULT;
__io_close_fixed(req, issue_flags, ret);
+ ret = -EFAULT;
break;
}
}
@@ -8773,6 +8690,7 @@ static void io_queue_async(struct io_kiocb *req, int ret)
* Queued up for async execution, worker will release
* submit reference when the iocb is actually submitted.
*/
+ io_kbuf_recycle(req, 0);
io_queue_iowq(req, NULL);
break;
case IO_APOLL_OK:
@@ -9788,11 +9706,19 @@ static void __io_sqe_files_unregister(struct io_ring_ctx *ctx)
static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
{
+ unsigned nr = ctx->nr_user_files;
int ret;
if (!ctx->file_data)
return -ENXIO;
+
+ /*
+ * Quiesce may unlock ->uring_lock, and while it's not held
+ * prevent new requests using the table.
+ */
+ ctx->nr_user_files = 0;
ret = io_rsrc_ref_quiesce(ctx->file_data, ctx);
+ ctx->nr_user_files = nr;
if (!ret)
__io_sqe_files_unregister(ctx);
return ret;
@@ -10690,12 +10616,19 @@ static void __io_sqe_buffers_unregister(struct io_ring_ctx *ctx)
static int io_sqe_buffers_unregister(struct io_ring_ctx *ctx)
{
+ unsigned nr = ctx->nr_user_bufs;
int ret;
if (!ctx->buf_data)
return -ENXIO;
+ /*
+ * Quiesce may unlock ->uring_lock, and while it's not held
+ * prevent new requests using the table.
+ */
+ ctx->nr_user_bufs = 0;
ret = io_rsrc_ref_quiesce(ctx->buf_data, ctx);
+ ctx->nr_user_bufs = nr;
if (!ret)
__io_sqe_buffers_unregister(ctx);
return ret;
@@ -13002,6 +12935,10 @@ static int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
if (!is_power_of_2(reg.ring_entries))
return -EINVAL;
+ /* cannot disambiguate full vs empty due to head/tail size */
+ if (reg.ring_entries >= 65536)
+ return -EINVAL;
+
if (unlikely(reg.bgid < BGID_ARRAY && !ctx->io_bl)) {
int ret = io_init_bl_list(ctx);
if (ret)
diff --git a/include/drm/drm_atomic.h b/include/drm/drm_atomic.h
index 0777725085df..10b1990bc1f6 100644
--- a/include/drm/drm_atomic.h
+++ b/include/drm/drm_atomic.h
@@ -1022,6 +1022,7 @@ void drm_state_dump(struct drm_device *dev, struct drm_printer *p);
for ((__i) = 0; \
(__i) < (__state)->num_private_objs && \
((obj) = (__state)->private_objs[__i].ptr, \
+ (void)(obj) /* Only to avoid unused-but-set-variable warning */, \
(new_obj_state) = (__state)->private_objs[__i].new_state, 1); \
(__i)++)
diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h
index 441653693970..ca89a48c2460 100644
--- a/include/drm/ttm/ttm_resource.h
+++ b/include/drm/ttm/ttm_resource.h
@@ -311,12 +311,12 @@ ttm_resource_manager_cleanup(struct ttm_resource_manager *man)
}
void ttm_lru_bulk_move_init(struct ttm_lru_bulk_move *bulk);
-void ttm_lru_bulk_move_add(struct ttm_lru_bulk_move *bulk,
- struct ttm_resource *res);
-void ttm_lru_bulk_move_del(struct ttm_lru_bulk_move *bulk,
- struct ttm_resource *res);
void ttm_lru_bulk_move_tail(struct ttm_lru_bulk_move *bulk);
+void ttm_resource_add_bulk_move(struct ttm_resource *res,
+ struct ttm_buffer_object *bo);
+void ttm_resource_del_bulk_move(struct ttm_resource *res,
+ struct ttm_buffer_object *bo);
void ttm_resource_move_to_lru_tail(struct ttm_resource *res);
void ttm_resource_init(struct ttm_buffer_object *bo,
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 2bd073fa6bb5..d452071db572 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -119,6 +119,8 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio);
extern struct backing_dev_info noop_backing_dev_info;
+int bdi_init(struct backing_dev_info *bdi);
+
/**
* writeback_in_progress - determine whether there is writeback in progress
* @wb: bdi_writeback of interest
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 608d577734c2..bb6e3c31b3b7 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -575,6 +575,7 @@ struct request_queue {
#define QUEUE_FLAG_RQ_ALLOC_TIME 27 /* record rq->alloc_time_ns */
#define QUEUE_FLAG_HCTX_ACTIVE 28 /* at least one blk-mq hctx is active */
#define QUEUE_FLAG_NOWAIT 29 /* device supports NOWAIT */
+#define QUEUE_FLAG_SQ_SCHED 30 /* single queue style io dispatch */
#define QUEUE_FLAG_MQ_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \
(1 << QUEUE_FLAG_SAME_COMP) | \
@@ -616,6 +617,7 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
#define blk_queue_pm_only(q) atomic_read(&(q)->pm_only)
#define blk_queue_registered(q) test_bit(QUEUE_FLAG_REGISTERED, &(q)->queue_flags)
#define blk_queue_nowait(q) test_bit(QUEUE_FLAG_NOWAIT, &(q)->queue_flags)
+#define blk_queue_sq_sched(q) test_bit(QUEUE_FLAG_SQ_SCHED, &(q)->queue_flags)
extern void blk_set_pm_only(struct request_queue *q);
extern void blk_clear_pm_only(struct request_queue *q);
@@ -1006,8 +1008,6 @@ void disk_set_independent_access_ranges(struct gendisk *disk,
*/
/* Supports zoned block devices sequential write constraint */
#define ELEVATOR_F_ZBD_SEQ_WRITE (1U << 0)
-/* Supports scheduling on multiple hardware queues */
-#define ELEVATOR_F_MQ_AWARE (1U << 1)
extern void blk_queue_required_elevator_features(struct request_queue *q,
unsigned int features);
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 54dc2f9a2d56..2c7477354744 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -65,6 +65,9 @@ extern ssize_t cpu_show_tsx_async_abort(struct device *dev,
extern ssize_t cpu_show_itlb_multihit(struct device *dev,
struct device_attribute *attr, char *buf);
extern ssize_t cpu_show_srbds(struct device *dev, struct device_attribute *attr, char *buf);
+extern ssize_t cpu_show_mmio_stale_data(struct device *dev,
+ struct device_attribute *attr,
+ char *buf);
extern __printf(4, 5)
struct device *cpu_device_create(struct device *parent, void *drvdata,
diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index 097cdd644665..1773e5df8e65 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -304,7 +304,7 @@ static inline struct netfs_inode *netfs_inode(struct inode *inode)
/**
* netfs_inode_init - Initialise a netfslib inode context
- * @inode: The netfs inode to initialise
+ * @ctx: The netfs inode to initialise
* @ops: The netfs's operations list
*
* Initialise the netfs library context struct. This is expected to follow on
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index cbd5070bc87f..657a0fc68a3f 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -45,6 +45,7 @@ struct uart_ops {
void (*unthrottle)(struct uart_port *);
void (*send_xchar)(struct uart_port *, char ch);
void (*stop_rx)(struct uart_port *);
+ void (*start_rx)(struct uart_port *);
void (*enable_ms)(struct uart_port *);
void (*break_ctl)(struct uart_port *, int ctl);
int (*startup)(struct uart_port *);
diff --git a/include/linux/visorbus.h b/include/linux/visorbus.h
deleted file mode 100644
index 0d8bd6769b13..000000000000
--- a/include/linux/visorbus.h
+++ /dev/null
@@ -1,344 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * Copyright (C) 2010 - 2013 UNISYS CORPORATION
- * All rights reserved.
- */
-
-/*
- * This header file is to be included by other kernel mode components that
- * implement a particular kind of visor_device. Each of these other kernel
- * mode components is called a visor device driver. Refer to visortemplate
- * for a minimal sample visor device driver.
- *
- * There should be nothing in this file that is private to the visorbus
- * bus implementation itself.
- */
-
-#ifndef __VISORBUS_H__
-#define __VISORBUS_H__
-
-#include <linux/device.h>
-
-#define VISOR_CHANNEL_SIGNATURE ('L' << 24 | 'N' << 16 | 'C' << 8 | 'E')
-
-/*
- * enum channel_serverstate
- * @CHANNELSRV_UNINITIALIZED: Channel is in an undefined state.
- * @CHANNELSRV_READY: Channel has been initialized by server.
- */
-enum channel_serverstate {
- CHANNELSRV_UNINITIALIZED = 0,
- CHANNELSRV_READY = 1
-};
-
-/*
- * enum channel_clientstate
- * @CHANNELCLI_DETACHED:
- * @CHANNELCLI_DISABLED: Client can see channel but is NOT allowed to use it
- * unless given TBD* explicit request
- * (should actually be < DETACHED).
- * @CHANNELCLI_ATTACHING: Legacy EFI client request for EFI server to attach.
- * @CHANNELCLI_ATTACHED: Idle, but client may want to use channel any time.
- * @CHANNELCLI_BUSY: Client either wants to use or is using channel.
- * @CHANNELCLI_OWNED: "No worries" state - client can access channel
- * anytime.
- */
-enum channel_clientstate {
- CHANNELCLI_DETACHED = 0,
- CHANNELCLI_DISABLED = 1,
- CHANNELCLI_ATTACHING = 2,
- CHANNELCLI_ATTACHED = 3,
- CHANNELCLI_BUSY = 4,
- CHANNELCLI_OWNED = 5
-};
-
-/*
- * Values for VISOR_CHANNEL_PROTOCOL.Features: This define exists so that
- * a guest can look at the FeatureFlags in the io channel, and configure the
- * driver to use interrupts or not based on this setting. All feature bits for
- * all channels should be defined here. The io channel feature bits are defined
- * below.
- */
-#define VISOR_DRIVER_ENABLES_INTS (0x1ULL << 1)
-#define VISOR_CHANNEL_IS_POLLING (0x1ULL << 3)
-#define VISOR_IOVM_OK_DRIVER_DISABLING_INTS (0x1ULL << 4)
-#define VISOR_DRIVER_DISABLES_INTS (0x1ULL << 5)
-#define VISOR_DRIVER_ENHANCED_RCVBUF_CHECKING (0x1ULL << 6)
-
-/*
- * struct channel_header - Common Channel Header
- * @signature: Signature.
- * @legacy_state: DEPRECATED - being replaced by.
- * @header_size: sizeof(struct channel_header).
- * @size: Total size of this channel in bytes.
- * @features: Flags to modify behavior.
- * @chtype: Channel type: data, bus, control, etc..
- * @partition_handle: ID of guest partition.
- * @handle: Device number of this channel in client.
- * @ch_space_offset: Offset in bytes to channel specific area.
- * @version_id: Struct channel_header Version ID.
- * @partition_index: Index of guest partition.
- * @zone_uuid: Guid of Channel's zone.
- * @cli_str_offset: Offset from channel header to null-terminated
- * ClientString (0 if ClientString not present).
- * @cli_state_boot: CHANNEL_CLIENTSTATE of pre-boot EFI client of this
- * channel.
- * @cmd_state_cli: CHANNEL_COMMANDSTATE (overloaded in Windows drivers, see
- * ServerStateUp, ServerStateDown, etc).
- * @cli_state_os: CHANNEL_CLIENTSTATE of Guest OS client of this channel.
- * @ch_characteristic: CHANNEL_CHARACTERISTIC_<xxx>.
- * @cmd_state_srv: CHANNEL_COMMANDSTATE (overloaded in Windows drivers, see
- * ServerStateUp, ServerStateDown, etc).
- * @srv_state: CHANNEL_SERVERSTATE.
- * @cli_error_boot: Bits to indicate err states for boot clients, so err
- * messages can be throttled.
- * @cli_error_os: Bits to indicate err states for OS clients, so err
- * messages can be throttled.
- * @filler: Pad out to 128 byte cacheline.
- * @recover_channel: Please add all new single-byte values below here.
- */
-struct channel_header {
- u64 signature;
- u32 legacy_state;
- /* SrvState, CliStateBoot, and CliStateOS below */
- u32 header_size;
- u64 size;
- u64 features;
- guid_t chtype;
- u64 partition_handle;
- u64 handle;
- u64 ch_space_offset;
- u32 version_id;
- u32 partition_index;
- guid_t zone_guid;
- u32 cli_str_offset;
- u32 cli_state_boot;
- u32 cmd_state_cli;
- u32 cli_state_os;
- u32 ch_characteristic;
- u32 cmd_state_srv;
- u32 srv_state;
- u8 cli_error_boot;
- u8 cli_error_os;
- u8 filler[1];
- u8 recover_channel;
-} __packed;
-
-#define VISOR_CHANNEL_ENABLE_INTS (0x1ULL << 0)
-
-/*
- * struct signal_queue_header - Subheader for the Signal Type variation of the
- * Common Channel.
- * @version: SIGNAL_QUEUE_HEADER Version ID.
- * @chtype: Queue type: storage, network.
- * @size: Total size of this queue in bytes.
- * @sig_base_offset: Offset to signal queue area.
- * @features: Flags to modify behavior.
- * @num_sent: Total # of signals placed in this queue.
- * @num_overflows: Total # of inserts failed due to full queue.
- * @signal_size: Total size of a signal for this queue.
- * @max_slots: Max # of slots in queue, 1 slot is always empty.
- * @max_signals: Max # of signals in queue (MaxSignalSlots-1).
- * @head: Queue head signal #.
- * @num_received: Total # of signals removed from this queue.
- * @tail: Queue tail signal.
- * @reserved1: Reserved field.
- * @reserved2: Reserved field.
- * @client_queue:
- * @num_irq_received: Total # of Interrupts received. This is incremented by the
- * ISR in the guest windows driver.
- * @num_empty: Number of times that visor_signal_remove is called and
- * returned Empty Status.
- * @errorflags: Error bits set during SignalReinit to denote trouble with
- * client's fields.
- * @filler: Pad out to 64 byte cacheline.
- */
-struct signal_queue_header {
- /* 1st cache line */
- u32 version;
- u32 chtype;
- u64 size;
- u64 sig_base_offset;
- u64 features;
- u64 num_sent;
- u64 num_overflows;
- u32 signal_size;
- u32 max_slots;
- u32 max_signals;
- u32 head;
- /* 2nd cache line */
- u64 num_received;
- u32 tail;
- u32 reserved1;
- u64 reserved2;
- u64 client_queue;
- u64 num_irq_received;
- u64 num_empty;
- u32 errorflags;
- u8 filler[12];
-} __packed;
-
-/* VISORCHANNEL Guids */
-/* {414815ed-c58c-11da-95a9-00e08161165f} */
-#define VISOR_VHBA_CHANNEL_GUID \
- GUID_INIT(0x414815ed, 0xc58c, 0x11da, \
- 0x95, 0xa9, 0x0, 0xe0, 0x81, 0x61, 0x16, 0x5f)
-#define VISOR_VHBA_CHANNEL_GUID_STR \
- "414815ed-c58c-11da-95a9-00e08161165f"
-struct visorchipset_state {
- u32 created:1;
- u32 attached:1;
- u32 configured:1;
- u32 running:1;
- /* Remaining bits in this 32-bit word are reserved. */
-};
-
-/**
- * struct visor_device - A device type for things "plugged" into the visorbus
- * bus
- * @visorchannel: Points to the channel that the device is
- * associated with.
- * @channel_type_guid: Identifies the channel type to the bus driver.
- * @device: Device struct meant for use by the bus driver
- * only.
- * @list_all: Used by the bus driver to enumerate devices.
- * @timer: Timer fired periodically to do interrupt-type
- * activity.
- * @being_removed: Indicates that the device is being removed from
- * the bus. Private bus driver use only.
- * @visordriver_callback_lock: Used by the bus driver to lock when adding and
- * removing devices.
- * @pausing: Indicates that a change towards a paused state.
- * is in progress. Only modified by the bus driver.
- * @resuming: Indicates that a change towards a running state
- * is in progress. Only modified by the bus driver.
- * @chipset_bus_no: Private field used by the bus driver.
- * @chipset_dev_no: Private field used the bus driver.
- * @state: Used to indicate the current state of the
- * device.
- * @inst: Unique GUID for this instance of the device.
- * @name: Name of the device.
- * @pending_msg_hdr: For private use by bus driver to respond to
- * hypervisor requests.
- * @vbus_hdr_info: A pointer to header info. Private use by bus
- * driver.
- * @partition_guid: Indicates client partion id. This should be the
- * same across all visor_devices in the current
- * guest. Private use by bus driver only.
- */
-struct visor_device {
- struct visorchannel *visorchannel;
- guid_t channel_type_guid;
- /* These fields are for private use by the bus driver only. */
- struct device device;
- struct list_head list_all;
- struct timer_list timer;
- bool timer_active;
- bool being_removed;
- struct mutex visordriver_callback_lock; /* synchronize probe/remove */
- bool pausing;
- bool resuming;
- u32 chipset_bus_no;
- u32 chipset_dev_no;
- struct visorchipset_state state;
- guid_t inst;
- u8 *name;
- struct controlvm_message_header *pending_msg_hdr;
- void *vbus_hdr_info;
- guid_t partition_guid;
- struct dentry *debugfs_dir;
- struct dentry *debugfs_bus_info;
-};
-
-#define to_visor_device(x) container_of(x, struct visor_device, device)
-
-typedef void (*visorbus_state_complete_func) (struct visor_device *dev,
- int status);
-
-/*
- * This struct describes a specific visor channel, by providing its GUID, name,
- * and sizes.
- */
-struct visor_channeltype_descriptor {
- const guid_t guid;
- const char *name;
- u64 min_bytes;
- u32 version;
-};
-
-/**
- * struct visor_driver - Information provided by each visor driver when it
- * registers with the visorbus driver
- * @name: Name of the visor driver.
- * @owner: The module owner.
- * @channel_types: Types of channels handled by this driver, ending with
- * a zero GUID. Our specialized BUS.match() method knows
- * about this list, and uses it to determine whether this
- * driver will in fact handle a new device that it has
- * detected.
- * @probe: Called when a new device comes online, by our probe()
- * function specified by driver.probe() (triggered
- * ultimately by some call to driver_register(),
- * bus_add_driver(), or driver_attach()).
- * @remove: Called when a new device is removed, by our remove()
- * function specified by driver.remove() (triggered
- * ultimately by some call to device_release_driver()).
- * @channel_interrupt: Called periodically, whenever there is a possiblity
- * that "something interesting" may have happened to the
- * channel.
- * @pause: Called to initiate a change of the device's state. If
- * the return valu`e is < 0, there was an error and the
- * state transition will NOT occur. If the return value
- * is >= 0, then the state transition was INITIATED
- * successfully, and complete_func() will be called (or
- * was just called) with the final status when either the
- * state transition fails or completes successfully.
- * @resume: Behaves similar to pause.
- * @driver: Private reference to the device driver. For use by bus
- * driver only.
- */
-struct visor_driver {
- const char *name;
- struct module *owner;
- struct visor_channeltype_descriptor *channel_types;
- int (*probe)(struct visor_device *dev);
- void (*remove)(struct visor_device *dev);
- void (*channel_interrupt)(struct visor_device *dev);
- int (*pause)(struct visor_device *dev,
- visorbus_state_complete_func complete_func);
- int (*resume)(struct visor_device *dev,
- visorbus_state_complete_func complete_func);
-
- /* These fields are for private use by the bus driver only. */
- struct device_driver driver;
-};
-
-#define to_visor_driver(x) (container_of(x, struct visor_driver, driver))
-
-int visor_check_channel(struct channel_header *ch, struct device *dev,
- const guid_t *expected_uuid, char *chname,
- u64 expected_min_bytes, u32 expected_version,
- u64 expected_signature);
-
-int visorbus_register_visor_driver(struct visor_driver *drv);
-void visorbus_unregister_visor_driver(struct visor_driver *drv);
-int visorbus_read_channel(struct visor_device *dev,
- unsigned long offset, void *dest,
- unsigned long nbytes);
-int visorbus_write_channel(struct visor_device *dev,
- unsigned long offset, void *src,
- unsigned long nbytes);
-int visorbus_enable_channel_interrupts(struct visor_device *dev);
-void visorbus_disable_channel_interrupts(struct visor_device *dev);
-
-int visorchannel_signalremove(struct visorchannel *channel, u32 queue,
- void *msg);
-int visorchannel_signalinsert(struct visorchannel *channel, u32 queue,
- void *msg);
-bool visorchannel_signalempty(struct visorchannel *channel, u32 queue);
-const guid_t *visorchannel_get_guid(struct visorchannel *channel);
-
-#define BUS_ROOT_DEVICE UINT_MAX
-struct visor_device *visorbus_get_device_by_id(u32 bus_no, u32 dev_no,
- struct visor_device *from);
-#endif
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index b159c2789961..096d48aa3437 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -215,6 +215,7 @@ extern struct vm_struct *__get_vm_area_caller(unsigned long size,
void free_vm_area(struct vm_struct *area);
extern struct vm_struct *remove_vm_area(const void *addr);
extern struct vm_struct *find_vm_area(const void *addr);
+struct vmap_area *find_vmap_area(unsigned long addr);
static inline bool is_vm_area_hugepages(const void *addr)
{
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 077cd730ce2f..85cd695e7fd1 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -25,7 +25,6 @@
#undef INET_CSK_CLEAR_TIMERS
struct inet_bind_bucket;
-struct inet_bind2_bucket;
struct tcp_congestion_ops;
/*
@@ -58,7 +57,6 @@ struct inet_connection_sock_af_ops {
*
* @icsk_accept_queue: FIFO of established children
* @icsk_bind_hash: Bind node
- * @icsk_bind2_hash: Bind node in the bhash2 table
* @icsk_timeout: Timeout
* @icsk_retransmit_timer: Resend (no ack)
* @icsk_rto: Retransmit timeout
@@ -85,7 +83,6 @@ struct inet_connection_sock {
struct inet_sock icsk_inet;
struct request_sock_queue icsk_accept_queue;
struct inet_bind_bucket *icsk_bind_hash;
- struct inet_bind2_bucket *icsk_bind2_hash;
unsigned long icsk_timeout;
struct timer_list icsk_retransmit_timer;
struct timer_list icsk_delack_timer;
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index a0887b70967b..ebfa3df6f8dc 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -90,32 +90,11 @@ struct inet_bind_bucket {
struct hlist_head owners;
};
-struct inet_bind2_bucket {
- possible_net_t ib_net;
- int l3mdev;
- unsigned short port;
- union {
-#if IS_ENABLED(CONFIG_IPV6)
- struct in6_addr v6_rcv_saddr;
-#endif
- __be32 rcv_saddr;
- };
- /* Node in the inet2_bind_hashbucket chain */
- struct hlist_node node;
- /* List of sockets hashed to this bucket */
- struct hlist_head owners;
-};
-
static inline struct net *ib_net(struct inet_bind_bucket *ib)
{
return read_pnet(&ib->ib_net);
}
-static inline struct net *ib2_net(struct inet_bind2_bucket *ib)
-{
- return read_pnet(&ib->ib_net);
-}
-
#define inet_bind_bucket_for_each(tb, head) \
hlist_for_each_entry(tb, head, node)
@@ -124,15 +103,6 @@ struct inet_bind_hashbucket {
struct hlist_head chain;
};
-/* This is synchronized using the inet_bind_hashbucket's spinlock.
- * Instead of having separate spinlocks, the inet_bind2_hashbucket can share
- * the inet_bind_hashbucket's given that in every case where the bhash2 table
- * is useful, a lookup in the bhash table also occurs.
- */
-struct inet_bind2_hashbucket {
- struct hlist_head chain;
-};
-
/* Sockets can be hashed in established or listening table.
* We must use different 'nulls' end-of-chain value for all hash buckets :
* A socket might transition from ESTABLISH to LISTEN state without
@@ -164,12 +134,6 @@ struct inet_hashinfo {
*/
struct kmem_cache *bind_bucket_cachep;
struct inet_bind_hashbucket *bhash;
- /* The 2nd binding table hashed by port and address.
- * This is used primarily for expediting the resolution of bind
- * conflicts.
- */
- struct kmem_cache *bind2_bucket_cachep;
- struct inet_bind2_hashbucket *bhash2;
unsigned int bhash_size;
/* The 2nd listener table hashed by local port and address */
@@ -229,36 +193,6 @@ inet_bind_bucket_create(struct kmem_cache *cachep, struct net *net,
void inet_bind_bucket_destroy(struct kmem_cache *cachep,
struct inet_bind_bucket *tb);
-static inline bool check_bind_bucket_match(struct inet_bind_bucket *tb,
- struct net *net,
- const unsigned short port,
- int l3mdev)
-{
- return net_eq(ib_net(tb), net) && tb->port == port &&
- tb->l3mdev == l3mdev;
-}
-
-struct inet_bind2_bucket *
-inet_bind2_bucket_create(struct kmem_cache *cachep, struct net *net,
- struct inet_bind2_hashbucket *head,
- const unsigned short port, int l3mdev,
- const struct sock *sk);
-
-void inet_bind2_bucket_destroy(struct kmem_cache *cachep,
- struct inet_bind2_bucket *tb);
-
-struct inet_bind2_bucket *
-inet_bind2_bucket_find(struct inet_hashinfo *hinfo, struct net *net,
- const unsigned short port, int l3mdev,
- struct sock *sk,
- struct inet_bind2_hashbucket **head);
-
-bool check_bind2_bucket_match_nulladdr(struct inet_bind2_bucket *tb,
- struct net *net,
- const unsigned short port,
- int l3mdev,
- const struct sock *sk);
-
static inline u32 inet_bhashfn(const struct net *net, const __u16 lport,
const u32 bhash_size)
{
@@ -266,7 +200,7 @@ static inline u32 inet_bhashfn(const struct net *net, const __u16 lport,
}
void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
- struct inet_bind2_bucket *tb2, const unsigned short snum);
+ const unsigned short snum);
/* Caller must disable local BH processing. */
int __inet_inherit_port(const struct sock *sk, struct sock *child);
diff --git a/include/net/sock.h b/include/net/sock.h
index c585ef6565d9..72ca97ccb460 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -348,7 +348,6 @@ struct sk_filter;
* @sk_txtime_report_errors: set report errors mode for SO_TXTIME
* @sk_txtime_unused: unused txtime flags
* @ns_tracker: tracker for netns reference
- * @sk_bind2_node: bind node in the bhash2 table
*/
struct sock {
/*
@@ -538,7 +537,6 @@ struct sock {
#endif
struct rcu_head sk_rcu;
netns_tracker ns_tracker;
- struct hlist_node sk_bind2_node;
};
enum sk_pacing {
@@ -819,16 +817,6 @@ static inline void sk_add_bind_node(struct sock *sk,
hlist_add_head(&sk->sk_bind_node, list);
}
-static inline void __sk_del_bind2_node(struct sock *sk)
-{
- __hlist_del(&sk->sk_bind2_node);
-}
-
-static inline void sk_add_bind2_node(struct sock *sk, struct hlist_head *list)
-{
- hlist_add_head(&sk->sk_bind2_node, list);
-}
-
#define sk_for_each(__sk, list) \
hlist_for_each_entry(__sk, list, sk_node)
#define sk_for_each_rcu(__sk, list) \
@@ -846,8 +834,6 @@ static inline void sk_add_bind2_node(struct sock *sk, struct hlist_head *list)
hlist_for_each_entry_safe(__sk, tmp, list, sk_node)
#define sk_for_each_bound(__sk, list) \
hlist_for_each_entry(__sk, list, sk_bind_node)
-#define sk_for_each_bound_bhash2(__sk, list) \
- hlist_for_each_entry(__sk, list, sk_bind2_node)
/**
* sk_for_each_entry_offset_rcu - iterate over a list at a given struct offset
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 776e0278f9dd..53e7dae92e42 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -47,7 +47,6 @@ struct io_uring_sqe {
__u32 unlink_flags;
__u32 hardlink_flags;
__u32 xattr_flags;
- __u32 close_flags;
};
__u64 user_data; /* data to be passed back at completion time */
/* pack this to avoid bogus arm OABI complaints */
@@ -260,11 +259,6 @@ enum io_uring_op {
#define IORING_ACCEPT_MULTISHOT (1U << 0)
/*
- * close flags, store in sqe->close_flags
- */
-#define IORING_CLOSE_FD_AND_FILE_SLOT (1U << 0)
-
-/*
* IO completion data structure (Completion Queue Entry)
*/
struct io_uring_cqe {
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index f3a2abd6d1a1..3a8c9d744800 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1014,10 +1014,10 @@ static void audit_reset_context(struct audit_context *ctx)
ctx->target_comm[0] = '\0';
unroll_tree_refs(ctx, NULL, 0);
WARN_ON(!list_empty(&ctx->killed_trees));
- ctx->type = 0;
audit_free_module(ctx);
ctx->fds[0] = -1;
audit_proctitle_free(ctx);
+ ctx->type = 0; /* reset last for audit_free_*() */
}
static inline struct audit_context *audit_alloc_context(enum audit_state state)
diff --git a/kernel/cfi.c b/kernel/cfi.c
index 9594cfd1cf2c..08102d19ec15 100644
--- a/kernel/cfi.c
+++ b/kernel/cfi.c
@@ -281,6 +281,8 @@ static inline cfi_check_fn find_module_check_fn(unsigned long ptr)
static inline cfi_check_fn find_check_fn(unsigned long ptr)
{
cfi_check_fn fn = NULL;
+ unsigned long flags;
+ bool rcu_idle;
if (is_kernel_text(ptr))
return __cfi_check;
@@ -290,13 +292,21 @@ static inline cfi_check_fn find_check_fn(unsigned long ptr)
* the shadow and __module_address use RCU, so we need to wake it
* up if necessary.
*/
- RCU_NONIDLE({
- if (IS_ENABLED(CONFIG_CFI_CLANG_SHADOW))
- fn = find_shadow_check_fn(ptr);
+ rcu_idle = !rcu_is_watching();
+ if (rcu_idle) {
+ local_irq_save(flags);
+ rcu_irq_enter();
+ }
+
+ if (IS_ENABLED(CONFIG_CFI_CLANG_SHADOW))
+ fn = find_shadow_check_fn(ptr);
+ if (!fn)
+ fn = find_module_check_fn(ptr);
- if (!fn)
- fn = find_module_check_fn(ptr);
- });
+ if (rcu_idle) {
+ rcu_irq_exit();
+ local_irq_restore(flags);
+ }
return fn;
}
diff --git a/lib/Kconfig b/lib/Kconfig
index 6a843639814f..eaaad4d85bf2 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -120,6 +120,9 @@ config INDIRECT_IOMEM_FALLBACK
source "lib/crypto/Kconfig"
+config LIB_MEMNEQ
+ bool
+
config CRC_CCITT
tristate "CRC-CCITT functions"
help
diff --git a/lib/Makefile b/lib/Makefile
index ea54294d73bf..f99bf61f8bbc 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -251,6 +251,7 @@ obj-$(CONFIG_DIMLIB) += dim/
obj-$(CONFIG_SIGNATURE) += digsig.o
lib-$(CONFIG_CLZ_TAB) += clz_tab.o
+lib-$(CONFIG_LIB_MEMNEQ) += memneq.o
obj-$(CONFIG_GENERIC_STRNCPY_FROM_USER) += strncpy_from_user.o
obj-$(CONFIG_GENERIC_STRNLEN_USER) += strnlen_user.o
diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig
index 9856e291f414..2082af43d51f 100644
--- a/lib/crypto/Kconfig
+++ b/lib/crypto/Kconfig
@@ -71,6 +71,7 @@ config CRYPTO_LIB_CURVE25519
tristate "Curve25519 scalar multiplication library"
depends on CRYPTO_ARCH_HAVE_LIB_CURVE25519 || !CRYPTO_ARCH_HAVE_LIB_CURVE25519
select CRYPTO_LIB_CURVE25519_GENERIC if CRYPTO_ARCH_HAVE_LIB_CURVE25519=n
+ select LIB_MEMNEQ
help
Enable the Curve25519 library interface. This interface may be
fulfilled by either the generic implementation or an arch-specific
diff --git a/crypto/memneq.c b/lib/memneq.c
index fb11608b1ec1..fb11608b1ec1 100644
--- a/crypto/memneq.c
+++ b/lib/memneq.c
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index ff60bd7d74e0..95550b8fa7fe 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -231,20 +231,13 @@ static __init int bdi_class_init(void)
}
postcore_initcall(bdi_class_init);
-static int bdi_init(struct backing_dev_info *bdi);
-
static int __init default_bdi_init(void)
{
- int err;
-
bdi_wq = alloc_workqueue("writeback", WQ_MEM_RECLAIM | WQ_UNBOUND |
WQ_SYSFS, 0);
if (!bdi_wq)
return -ENOMEM;
-
- err = bdi_init(&noop_backing_dev_info);
-
- return err;
+ return 0;
}
subsys_initcall(default_bdi_init);
@@ -781,7 +774,7 @@ static void cgwb_remove_from_bdi_list(struct bdi_writeback *wb)
#endif /* CONFIG_CGROUP_WRITEBACK */
-static int bdi_init(struct backing_dev_info *bdi)
+int bdi_init(struct backing_dev_info *bdi)
{
int ret;
diff --git a/mm/usercopy.c b/mm/usercopy.c
index baeacc735b83..4e1da708699b 100644
--- a/mm/usercopy.c
+++ b/mm/usercopy.c
@@ -161,29 +161,27 @@ static inline void check_bogus_address(const unsigned long ptr, unsigned long n,
static inline void check_heap_object(const void *ptr, unsigned long n,
bool to_user)
{
+ uintptr_t addr = (uintptr_t)ptr;
+ unsigned long offset;
struct folio *folio;
if (is_kmap_addr(ptr)) {
- unsigned long page_end = (unsigned long)ptr | (PAGE_SIZE - 1);
-
- if ((unsigned long)ptr + n - 1 > page_end)
- usercopy_abort("kmap", NULL, to_user,
- offset_in_page(ptr), n);
+ offset = offset_in_page(ptr);
+ if (n > PAGE_SIZE - offset)
+ usercopy_abort("kmap", NULL, to_user, offset, n);
return;
}
if (is_vmalloc_addr(ptr)) {
- struct vm_struct *area = find_vm_area(ptr);
- unsigned long offset;
+ struct vmap_area *area = find_vmap_area(addr);
- if (!area) {
+ if (!area)
usercopy_abort("vmalloc", "no area", to_user, 0, n);
- return;
- }
- offset = ptr - area->addr;
- if (offset + n > get_vm_area_size(area))
+ if (n > area->va_end - addr) {
+ offset = addr - area->va_start;
usercopy_abort("vmalloc", NULL, to_user, offset, n);
+ }
return;
}
@@ -196,8 +194,8 @@ static inline void check_heap_object(const void *ptr, unsigned long n,
/* Check slab allocator for flags and size. */
__check_heap_object(ptr, n, folio_slab(folio), to_user);
} else if (folio_test_large(folio)) {
- unsigned long offset = ptr - folio_address(folio);
- if (offset + n > folio_size(folio))
+ offset = ptr - folio_address(folio);
+ if (n > folio_size(folio) - offset)
usercopy_abort("page alloc", NULL, to_user, offset, n);
}
}
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 07db42455dd4..effd1ff6a4b4 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1798,7 +1798,7 @@ static void free_unmap_vmap_area(struct vmap_area *va)
free_vmap_area_noflush(va);
}
-static struct vmap_area *find_vmap_area(unsigned long addr)
+struct vmap_area *find_vmap_area(unsigned long addr)
{
struct vmap_area *va;
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 95393bb2760b..4c7030ed8d33 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1661,9 +1661,12 @@ static int ax25_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
int flags)
{
struct sock *sk = sock->sk;
- struct sk_buff *skb;
+ struct sk_buff *skb, *last;
+ struct sk_buff_head *sk_queue;
int copied;
int err = 0;
+ int off = 0;
+ long timeo;
lock_sock(sk);
/*
@@ -1675,10 +1678,29 @@ static int ax25_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
goto out;
}
- /* Now we can treat all alike */
- skb = skb_recv_datagram(sk, flags, &err);
- if (skb == NULL)
- goto out;
+ /* We need support for non-blocking reads. */
+ sk_queue = &sk->sk_receive_queue;
+ skb = __skb_try_recv_datagram(sk, sk_queue, flags, &off, &err, &last);
+ /* If no packet is available, release_sock(sk) and try again. */
+ if (!skb) {
+ if (err != -EAGAIN)
+ goto out;
+ release_sock(sk);
+ timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+ while (timeo && !__skb_wait_for_more_packets(sk, sk_queue, &err,
+ &timeo, last)) {
+ skb = __skb_try_recv_datagram(sk, sk_queue, flags, &off,
+ &err, &last);
+ if (skb)
+ break;
+
+ if (err != -EAGAIN)
+ goto done;
+ }
+ if (!skb)
+ goto done;
+ lock_sock(sk);
+ }
if (!sk_to_ax25(sk)->pidincl)
skb_pull(skb, 1); /* Remove PID */
@@ -1725,6 +1747,7 @@ static int ax25_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
out:
release_sock(sk);
+done:
return err;
}
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 2e78458900f2..eb8e128e43e8 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -1120,12 +1120,6 @@ static int __init dccp_init(void)
SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL);
if (!dccp_hashinfo.bind_bucket_cachep)
goto out_free_hashinfo2;
- dccp_hashinfo.bind2_bucket_cachep =
- kmem_cache_create("dccp_bind2_bucket",
- sizeof(struct inet_bind2_bucket), 0,
- SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL);
- if (!dccp_hashinfo.bind2_bucket_cachep)
- goto out_free_bind_bucket_cachep;
/*
* Size and allocate the main established and bind bucket
@@ -1156,7 +1150,7 @@ static int __init dccp_init(void)
if (!dccp_hashinfo.ehash) {
DCCP_CRIT("Failed to allocate DCCP established hash table");
- goto out_free_bind2_bucket_cachep;
+ goto out_free_bind_bucket_cachep;
}
for (i = 0; i <= dccp_hashinfo.ehash_mask; i++)
@@ -1182,23 +1176,14 @@ static int __init dccp_init(void)
goto out_free_dccp_locks;
}
- dccp_hashinfo.bhash2 = (struct inet_bind2_hashbucket *)
- __get_free_pages(GFP_ATOMIC | __GFP_NOWARN, bhash_order);
-
- if (!dccp_hashinfo.bhash2) {
- DCCP_CRIT("Failed to allocate DCCP bind2 hash table");
- goto out_free_dccp_bhash;
- }
-
for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
spin_lock_init(&dccp_hashinfo.bhash[i].lock);
INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
- INIT_HLIST_HEAD(&dccp_hashinfo.bhash2[i].chain);
}
rc = dccp_mib_init();
if (rc)
- goto out_free_dccp_bhash2;
+ goto out_free_dccp_bhash;
rc = dccp_ackvec_init();
if (rc)
@@ -1222,38 +1207,30 @@ out_ackvec_exit:
dccp_ackvec_exit();
out_free_dccp_mib:
dccp_mib_exit();
-out_free_dccp_bhash2:
- free_pages((unsigned long)dccp_hashinfo.bhash2, bhash_order);
out_free_dccp_bhash:
free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
out_free_dccp_locks:
inet_ehash_locks_free(&dccp_hashinfo);
out_free_dccp_ehash:
free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
-out_free_bind2_bucket_cachep:
- kmem_cache_destroy(dccp_hashinfo.bind2_bucket_cachep);
out_free_bind_bucket_cachep:
kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
out_free_hashinfo2:
inet_hashinfo2_free_mod(&dccp_hashinfo);
out_fail:
dccp_hashinfo.bhash = NULL;
- dccp_hashinfo.bhash2 = NULL;
dccp_hashinfo.ehash = NULL;
dccp_hashinfo.bind_bucket_cachep = NULL;
- dccp_hashinfo.bind2_bucket_cachep = NULL;
return rc;
}
static void __exit dccp_fini(void)
{
- int bhash_order = get_order(dccp_hashinfo.bhash_size *
- sizeof(struct inet_bind_hashbucket));
-
ccid_cleanup_builtins();
dccp_mib_exit();
- free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
- free_pages((unsigned long)dccp_hashinfo.bhash2, bhash_order);
+ free_pages((unsigned long)dccp_hashinfo.bhash,
+ get_order(dccp_hashinfo.bhash_size *
+ sizeof(struct inet_bind_hashbucket)));
free_pages((unsigned long)dccp_hashinfo.ehash,
get_order((dccp_hashinfo.ehash_mask + 1) *
sizeof(struct inet_ehash_bucket)));
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index c0b7e6c21360..53f5f956d948 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -117,32 +117,6 @@ bool inet_rcv_saddr_any(const struct sock *sk)
return !sk->sk_rcv_saddr;
}
-static bool use_bhash2_on_bind(const struct sock *sk)
-{
-#if IS_ENABLED(CONFIG_IPV6)
- int addr_type;
-
- if (sk->sk_family == AF_INET6) {
- addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
- return addr_type != IPV6_ADDR_ANY &&
- addr_type != IPV6_ADDR_MAPPED;
- }
-#endif
- return sk->sk_rcv_saddr != htonl(INADDR_ANY);
-}
-
-static u32 get_bhash2_nulladdr_hash(const struct sock *sk, struct net *net,
- int port)
-{
-#if IS_ENABLED(CONFIG_IPV6)
- struct in6_addr nulladdr = {};
-
- if (sk->sk_family == AF_INET6)
- return ipv6_portaddr_hash(net, &nulladdr, port);
-#endif
- return ipv4_portaddr_hash(net, 0, port);
-}
-
void inet_get_local_port_range(struct net *net, int *low, int *high)
{
unsigned int seq;
@@ -156,71 +130,16 @@ void inet_get_local_port_range(struct net *net, int *low, int *high)
}
EXPORT_SYMBOL(inet_get_local_port_range);
-static bool bind_conflict_exist(const struct sock *sk, struct sock *sk2,
- kuid_t sk_uid, bool relax,
- bool reuseport_cb_ok, bool reuseport_ok)
-{
- int bound_dev_if2;
-
- if (sk == sk2)
- return false;
-
- bound_dev_if2 = READ_ONCE(sk2->sk_bound_dev_if);
-
- if (!sk->sk_bound_dev_if || !bound_dev_if2 ||
- sk->sk_bound_dev_if == bound_dev_if2) {
- if (sk->sk_reuse && sk2->sk_reuse &&
- sk2->sk_state != TCP_LISTEN) {
- if (!relax || (!reuseport_ok && sk->sk_reuseport &&
- sk2->sk_reuseport && reuseport_cb_ok &&
- (sk2->sk_state == TCP_TIME_WAIT ||
- uid_eq(sk_uid, sock_i_uid(sk2)))))
- return true;
- } else if (!reuseport_ok || !sk->sk_reuseport ||
- !sk2->sk_reuseport || !reuseport_cb_ok ||
- (sk2->sk_state != TCP_TIME_WAIT &&
- !uid_eq(sk_uid, sock_i_uid(sk2)))) {
- return true;
- }
- }
- return false;
-}
-
-static bool check_bhash2_conflict(const struct sock *sk,
- struct inet_bind2_bucket *tb2, kuid_t sk_uid,
- bool relax, bool reuseport_cb_ok,
- bool reuseport_ok)
-{
- struct sock *sk2;
-
- sk_for_each_bound_bhash2(sk2, &tb2->owners) {
- if (sk->sk_family == AF_INET && ipv6_only_sock(sk2))
- continue;
-
- if (bind_conflict_exist(sk, sk2, sk_uid, relax,
- reuseport_cb_ok, reuseport_ok))
- return true;
- }
- return false;
-}
-
-/* This should be called only when the corresponding inet_bind_bucket spinlock
- * is held
- */
-static int inet_csk_bind_conflict(const struct sock *sk, int port,
- struct inet_bind_bucket *tb,
- struct inet_bind2_bucket *tb2, /* may be null */
+static int inet_csk_bind_conflict(const struct sock *sk,
+ const struct inet_bind_bucket *tb,
bool relax, bool reuseport_ok)
{
- struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo;
- kuid_t uid = sock_i_uid((struct sock *)sk);
- struct sock_reuseport *reuseport_cb;
- struct inet_bind2_hashbucket *head2;
- bool reuseport_cb_ok;
struct sock *sk2;
- struct net *net;
- int l3mdev;
- u32 hash;
+ bool reuseport_cb_ok;
+ bool reuse = sk->sk_reuse;
+ bool reuseport = !!sk->sk_reuseport;
+ struct sock_reuseport *reuseport_cb;
+ kuid_t uid = sock_i_uid((struct sock *)sk);
rcu_read_lock();
reuseport_cb = rcu_dereference(sk->sk_reuseport_cb);
@@ -231,42 +150,40 @@ static int inet_csk_bind_conflict(const struct sock *sk, int port,
/*
* Unlike other sk lookup places we do not check
* for sk_net here, since _all_ the socks listed
- * in tb->owners and tb2->owners list belong
- * to the same net
+ * in tb->owners list belong to the same net - the
+ * one this bucket belongs to.
*/
- if (!use_bhash2_on_bind(sk)) {
- sk_for_each_bound(sk2, &tb->owners)
- if (bind_conflict_exist(sk, sk2, uid, relax,
- reuseport_cb_ok, reuseport_ok) &&
- inet_rcv_saddr_equal(sk, sk2, true))
- return true;
+ sk_for_each_bound(sk2, &tb->owners) {
+ int bound_dev_if2;
- return false;
+ if (sk == sk2)
+ continue;
+ bound_dev_if2 = READ_ONCE(sk2->sk_bound_dev_if);
+ if ((!sk->sk_bound_dev_if ||
+ !bound_dev_if2 ||
+ sk->sk_bound_dev_if == bound_dev_if2)) {
+ if (reuse && sk2->sk_reuse &&
+ sk2->sk_state != TCP_LISTEN) {
+ if ((!relax ||
+ (!reuseport_ok &&
+ reuseport && sk2->sk_reuseport &&
+ reuseport_cb_ok &&
+ (sk2->sk_state == TCP_TIME_WAIT ||
+ uid_eq(uid, sock_i_uid(sk2))))) &&
+ inet_rcv_saddr_equal(sk, sk2, true))
+ break;
+ } else if (!reuseport_ok ||
+ !reuseport || !sk2->sk_reuseport ||
+ !reuseport_cb_ok ||
+ (sk2->sk_state != TCP_TIME_WAIT &&
+ !uid_eq(uid, sock_i_uid(sk2)))) {
+ if (inet_rcv_saddr_equal(sk, sk2, true))
+ break;
+ }
+ }
}
-
- if (tb2 && check_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok,
- reuseport_ok))
- return true;
-
- net = sock_net(sk);
-
- /* check there's no conflict with an existing IPV6_ADDR_ANY (if ipv6) or
- * INADDR_ANY (if ipv4) socket.
- */
- hash = get_bhash2_nulladdr_hash(sk, net, port);
- head2 = &hinfo->bhash2[hash & (hinfo->bhash_size - 1)];
-
- l3mdev = inet_sk_bound_l3mdev(sk);
- inet_bind_bucket_for_each(tb2, &head2->chain)
- if (check_bind2_bucket_match_nulladdr(tb2, net, port, l3mdev, sk))
- break;
-
- if (tb2 && check_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok,
- reuseport_ok))
- return true;
-
- return false;
+ return sk2 != NULL;
}
/*
@@ -274,20 +191,16 @@ static int inet_csk_bind_conflict(const struct sock *sk, int port,
* inet_bind_hashbucket lock held.
*/
static struct inet_bind_hashbucket *
-inet_csk_find_open_port(struct sock *sk, struct inet_bind_bucket **tb_ret,
- struct inet_bind2_bucket **tb2_ret,
- struct inet_bind2_hashbucket **head2_ret, int *port_ret)
+inet_csk_find_open_port(struct sock *sk, struct inet_bind_bucket **tb_ret, int *port_ret)
{
struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo;
- struct inet_bind2_hashbucket *head2;
+ int port = 0;
struct inet_bind_hashbucket *head;
struct net *net = sock_net(sk);
+ bool relax = false;
int i, low, high, attempt_half;
- struct inet_bind2_bucket *tb2;
struct inet_bind_bucket *tb;
u32 remaining, offset;
- bool relax = false;
- int port = 0;
int l3mdev;
l3mdev = inet_sk_bound_l3mdev(sk);
@@ -326,12 +239,10 @@ other_parity_scan:
head = &hinfo->bhash[inet_bhashfn(net, port,
hinfo->bhash_size)];
spin_lock_bh(&head->lock);
- tb2 = inet_bind2_bucket_find(hinfo, net, port, l3mdev, sk,
- &head2);
inet_bind_bucket_for_each(tb, &head->chain)
- if (check_bind_bucket_match(tb, net, port, l3mdev)) {
- if (!inet_csk_bind_conflict(sk, port, tb, tb2,
- relax, false))
+ if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev &&
+ tb->port == port) {
+ if (!inet_csk_bind_conflict(sk, tb, relax, false))
goto success;
goto next_port;
}
@@ -361,8 +272,6 @@ next_port:
success:
*port_ret = port;
*tb_ret = tb;
- *tb2_ret = tb2;
- *head2_ret = head2;
return head;
}
@@ -458,81 +367,54 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
{
bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN;
struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo;
- bool bhash_created = false, bhash2_created = false;
- struct inet_bind2_bucket *tb2 = NULL;
- struct inet_bind2_hashbucket *head2;
- struct inet_bind_bucket *tb = NULL;
+ int ret = 1, port = snum;
struct inet_bind_hashbucket *head;
struct net *net = sock_net(sk);
- int ret = 1, port = snum;
- bool found_port = false;
+ struct inet_bind_bucket *tb = NULL;
int l3mdev;
l3mdev = inet_sk_bound_l3mdev(sk);
if (!port) {
- head = inet_csk_find_open_port(sk, &tb, &tb2, &head2, &port);
+ head = inet_csk_find_open_port(sk, &tb, &port);
if (!head)
return ret;
- if (tb && tb2)
- goto success;
- found_port = true;
- } else {
- head = &hinfo->bhash[inet_bhashfn(net, port,
- hinfo->bhash_size)];
- spin_lock_bh(&head->lock);
- inet_bind_bucket_for_each(tb, &head->chain)
- if (check_bind_bucket_match(tb, net, port, l3mdev))
- break;
-
- tb2 = inet_bind2_bucket_find(hinfo, net, port, l3mdev, sk,
- &head2);
- }
-
- if (!tb) {
- tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, net,
- head, port, l3mdev);
if (!tb)
- goto fail_unlock;
- bhash_created = true;
- }
-
- if (!tb2) {
- tb2 = inet_bind2_bucket_create(hinfo->bind2_bucket_cachep,
- net, head2, port, l3mdev, sk);
- if (!tb2)
- goto fail_unlock;
- bhash2_created = true;
+ goto tb_not_found;
+ goto success;
}
-
- /* If we had to find an open port, we already checked for conflicts */
- if (!found_port && !hlist_empty(&tb->owners)) {
+ head = &hinfo->bhash[inet_bhashfn(net, port,
+ hinfo->bhash_size)];
+ spin_lock_bh(&head->lock);
+ inet_bind_bucket_for_each(tb, &head->chain)
+ if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev &&
+ tb->port == port)
+ goto tb_found;
+tb_not_found:
+ tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
+ net, head, port, l3mdev);
+ if (!tb)
+ goto fail_unlock;
+tb_found:
+ if (!hlist_empty(&tb->owners)) {
if (sk->sk_reuse == SK_FORCE_REUSE)
goto success;
if ((tb->fastreuse > 0 && reuse) ||
sk_reuseport_match(tb, sk))
goto success;
- if (inet_csk_bind_conflict(sk, port, tb, tb2, true, true))
+ if (inet_csk_bind_conflict(sk, tb, true, true))
goto fail_unlock;
}
success:
inet_csk_update_fastreuse(tb, sk);
if (!inet_csk(sk)->icsk_bind_hash)
- inet_bind_hash(sk, tb, tb2, port);
+ inet_bind_hash(sk, tb, port);
WARN_ON(inet_csk(sk)->icsk_bind_hash != tb);
- WARN_ON(inet_csk(sk)->icsk_bind2_hash != tb2);
ret = 0;
fail_unlock:
- if (ret) {
- if (bhash_created)
- inet_bind_bucket_destroy(hinfo->bind_bucket_cachep, tb);
- if (bhash2_created)
- inet_bind2_bucket_destroy(hinfo->bind2_bucket_cachep,
- tb2);
- }
spin_unlock_bh(&head->lock);
return ret;
}
@@ -1079,7 +961,6 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
inet_sk_set_state(newsk, TCP_SYN_RECV);
newicsk->icsk_bind_hash = NULL;
- newicsk->icsk_bind2_hash = NULL;
inet_sk(newsk)->inet_dport = inet_rsk(req)->ir_rmt_port;
inet_sk(newsk)->inet_num = inet_rsk(req)->ir_num;
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 545f91b6cb5e..b9d995b5ce24 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -81,41 +81,6 @@ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep,
return tb;
}
-struct inet_bind2_bucket *inet_bind2_bucket_create(struct kmem_cache *cachep,
- struct net *net,
- struct inet_bind2_hashbucket *head,
- const unsigned short port,
- int l3mdev,
- const struct sock *sk)
-{
- struct inet_bind2_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC);
-
- if (tb) {
- write_pnet(&tb->ib_net, net);
- tb->l3mdev = l3mdev;
- tb->port = port;
-#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family == AF_INET6)
- tb->v6_rcv_saddr = sk->sk_v6_rcv_saddr;
- else
-#endif
- tb->rcv_saddr = sk->sk_rcv_saddr;
- INIT_HLIST_HEAD(&tb->owners);
- hlist_add_head(&tb->node, &head->chain);
- }
- return tb;
-}
-
-static bool bind2_bucket_addr_match(struct inet_bind2_bucket *tb2, struct sock *sk)
-{
-#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family == AF_INET6)
- return ipv6_addr_equal(&tb2->v6_rcv_saddr,
- &sk->sk_v6_rcv_saddr);
-#endif
- return tb2->rcv_saddr == sk->sk_rcv_saddr;
-}
-
/*
* Caller must hold hashbucket lock for this tb with local BH disabled
*/
@@ -127,25 +92,12 @@ void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket
}
}
-/* Caller must hold the lock for the corresponding hashbucket in the bhash table
- * with local BH disabled
- */
-void inet_bind2_bucket_destroy(struct kmem_cache *cachep, struct inet_bind2_bucket *tb)
-{
- if (hlist_empty(&tb->owners)) {
- __hlist_del(&tb->node);
- kmem_cache_free(cachep, tb);
- }
-}
-
void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
- struct inet_bind2_bucket *tb2, const unsigned short snum)
+ const unsigned short snum)
{
inet_sk(sk)->inet_num = snum;
sk_add_bind_node(sk, &tb->owners);
inet_csk(sk)->icsk_bind_hash = tb;
- sk_add_bind2_node(sk, &tb2->owners);
- inet_csk(sk)->icsk_bind2_hash = tb2;
}
/*
@@ -157,7 +109,6 @@ static void __inet_put_port(struct sock *sk)
const int bhash = inet_bhashfn(sock_net(sk), inet_sk(sk)->inet_num,
hashinfo->bhash_size);
struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash];
- struct inet_bind2_bucket *tb2;
struct inet_bind_bucket *tb;
spin_lock(&head->lock);
@@ -166,13 +117,6 @@ static void __inet_put_port(struct sock *sk)
inet_csk(sk)->icsk_bind_hash = NULL;
inet_sk(sk)->inet_num = 0;
inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb);
-
- if (inet_csk(sk)->icsk_bind2_hash) {
- tb2 = inet_csk(sk)->icsk_bind2_hash;
- __sk_del_bind2_node(sk);
- inet_csk(sk)->icsk_bind2_hash = NULL;
- inet_bind2_bucket_destroy(hashinfo->bind2_bucket_cachep, tb2);
- }
spin_unlock(&head->lock);
}
@@ -189,19 +133,14 @@ int __inet_inherit_port(const struct sock *sk, struct sock *child)
struct inet_hashinfo *table = sk->sk_prot->h.hashinfo;
unsigned short port = inet_sk(child)->inet_num;
const int bhash = inet_bhashfn(sock_net(sk), port,
- table->bhash_size);
+ table->bhash_size);
struct inet_bind_hashbucket *head = &table->bhash[bhash];
- struct inet_bind2_hashbucket *head_bhash2;
- bool created_inet_bind_bucket = false;
- struct net *net = sock_net(sk);
- struct inet_bind2_bucket *tb2;
struct inet_bind_bucket *tb;
int l3mdev;
spin_lock(&head->lock);
tb = inet_csk(sk)->icsk_bind_hash;
- tb2 = inet_csk(sk)->icsk_bind2_hash;
- if (unlikely(!tb || !tb2)) {
+ if (unlikely(!tb)) {
spin_unlock(&head->lock);
return -ENOENT;
}
@@ -214,45 +153,25 @@ int __inet_inherit_port(const struct sock *sk, struct sock *child)
* as that of the child socket. We have to look up or
* create a new bind bucket for the child here. */
inet_bind_bucket_for_each(tb, &head->chain) {
- if (check_bind_bucket_match(tb, net, port, l3mdev))
+ if (net_eq(ib_net(tb), sock_net(sk)) &&
+ tb->l3mdev == l3mdev && tb->port == port)
break;
}
if (!tb) {
tb = inet_bind_bucket_create(table->bind_bucket_cachep,
- net, head, port, l3mdev);
+ sock_net(sk), head, port,
+ l3mdev);
if (!tb) {
spin_unlock(&head->lock);
return -ENOMEM;
}
- created_inet_bind_bucket = true;
}
inet_csk_update_fastreuse(tb, child);
-
- goto bhash2_find;
- } else if (!bind2_bucket_addr_match(tb2, child)) {
- l3mdev = inet_sk_bound_l3mdev(sk);
-
-bhash2_find:
- tb2 = inet_bind2_bucket_find(table, net, port, l3mdev, child,
- &head_bhash2);
- if (!tb2) {
- tb2 = inet_bind2_bucket_create(table->bind2_bucket_cachep,
- net, head_bhash2, port,
- l3mdev, child);
- if (!tb2)
- goto error;
- }
}
- inet_bind_hash(child, tb, tb2, port);
+ inet_bind_hash(child, tb, port);
spin_unlock(&head->lock);
return 0;
-
-error:
- if (created_inet_bind_bucket)
- inet_bind_bucket_destroy(table->bind_bucket_cachep, tb);
- spin_unlock(&head->lock);
- return -ENOMEM;
}
EXPORT_SYMBOL_GPL(__inet_inherit_port);
@@ -756,76 +675,6 @@ void inet_unhash(struct sock *sk)
}
EXPORT_SYMBOL_GPL(inet_unhash);
-static bool check_bind2_bucket_match(struct inet_bind2_bucket *tb,
- struct net *net, unsigned short port,
- int l3mdev, struct sock *sk)
-{
-#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family == AF_INET6)
- return net_eq(ib2_net(tb), net) && tb->port == port &&
- tb->l3mdev == l3mdev &&
- ipv6_addr_equal(&tb->v6_rcv_saddr, &sk->sk_v6_rcv_saddr);
- else
-#endif
- return net_eq(ib2_net(tb), net) && tb->port == port &&
- tb->l3mdev == l3mdev && tb->rcv_saddr == sk->sk_rcv_saddr;
-}
-
-bool check_bind2_bucket_match_nulladdr(struct inet_bind2_bucket *tb,
- struct net *net, const unsigned short port,
- int l3mdev, const struct sock *sk)
-{
-#if IS_ENABLED(CONFIG_IPV6)
- struct in6_addr nulladdr = {};
-
- if (sk->sk_family == AF_INET6)
- return net_eq(ib2_net(tb), net) && tb->port == port &&
- tb->l3mdev == l3mdev &&
- ipv6_addr_equal(&tb->v6_rcv_saddr, &nulladdr);
- else
-#endif
- return net_eq(ib2_net(tb), net) && tb->port == port &&
- tb->l3mdev == l3mdev && tb->rcv_saddr == 0;
-}
-
-static struct inet_bind2_hashbucket *
-inet_bhashfn_portaddr(struct inet_hashinfo *hinfo, const struct sock *sk,
- const struct net *net, unsigned short port)
-{
- u32 hash;
-
-#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family == AF_INET6)
- hash = ipv6_portaddr_hash(net, &sk->sk_v6_rcv_saddr, port);
- else
-#endif
- hash = ipv4_portaddr_hash(net, sk->sk_rcv_saddr, port);
- return &hinfo->bhash2[hash & (hinfo->bhash_size - 1)];
-}
-
-/* This should only be called when the spinlock for the socket's corresponding
- * bind_hashbucket is held
- */
-struct inet_bind2_bucket *
-inet_bind2_bucket_find(struct inet_hashinfo *hinfo, struct net *net,
- const unsigned short port, int l3mdev, struct sock *sk,
- struct inet_bind2_hashbucket **head)
-{
- struct inet_bind2_bucket *bhash2 = NULL;
- struct inet_bind2_hashbucket *h;
-
- h = inet_bhashfn_portaddr(hinfo, sk, net, port);
- inet_bind_bucket_for_each(bhash2, &h->chain) {
- if (check_bind2_bucket_match(bhash2, net, port, l3mdev, sk))
- break;
- }
-
- if (head)
- *head = h;
-
- return bhash2;
-}
-
/* RFC 6056 3.3.4. Algorithm 4: Double-Hash Port Selection Algorithm
* Note that we use 32bit integers (vs RFC 'short integers')
* because 2^16 is not a multiple of num_ephemeral and this
@@ -846,13 +695,10 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
{
struct inet_hashinfo *hinfo = death_row->hashinfo;
struct inet_timewait_sock *tw = NULL;
- struct inet_bind2_hashbucket *head2;
struct inet_bind_hashbucket *head;
int port = inet_sk(sk)->inet_num;
struct net *net = sock_net(sk);
- struct inet_bind2_bucket *tb2;
struct inet_bind_bucket *tb;
- bool tb_created = false;
u32 remaining, offset;
int ret, i, low, high;
int l3mdev;
@@ -909,7 +755,8 @@ other_parity_scan:
* the established check is already unique enough.
*/
inet_bind_bucket_for_each(tb, &head->chain) {
- if (check_bind_bucket_match(tb, net, port, l3mdev)) {
+ if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev &&
+ tb->port == port) {
if (tb->fastreuse >= 0 ||
tb->fastreuseport >= 0)
goto next_port;
@@ -927,7 +774,6 @@ other_parity_scan:
spin_unlock_bh(&head->lock);
return -ENOMEM;
}
- tb_created = true;
tb->fastreuse = -1;
tb->fastreuseport = -1;
goto ok;
@@ -943,17 +789,6 @@ next_port:
return -EADDRNOTAVAIL;
ok:
- /* Find the corresponding tb2 bucket since we need to
- * add the socket to the bhash2 table as well
- */
- tb2 = inet_bind2_bucket_find(hinfo, net, port, l3mdev, sk, &head2);
- if (!tb2) {
- tb2 = inet_bind2_bucket_create(hinfo->bind2_bucket_cachep, net,
- head2, port, l3mdev, sk);
- if (!tb2)
- goto error;
- }
-
/* Here we want to add a little bit of randomness to the next source
* port that will be chosen. We use a max() with a random here so that
* on low contention the randomness is maximal and on high contention
@@ -963,7 +798,7 @@ ok:
WRITE_ONCE(table_perturb[index], READ_ONCE(table_perturb[index]) + i + 2);
/* Head lock still held and bh's disabled */
- inet_bind_hash(sk, tb, tb2, port);
+ inet_bind_hash(sk, tb, port);
if (sk_unhashed(sk)) {
inet_sk(sk)->inet_sport = htons(port);
inet_ehash_nolisten(sk, (struct sock *)tw, NULL);
@@ -975,12 +810,6 @@ ok:
inet_twsk_deschedule_put(tw);
local_bh_enable();
return 0;
-
-error:
- if (tb_created)
- inet_bind_bucket_destroy(hinfo->bind_bucket_cachep, tb);
- spin_unlock_bh(&head->lock);
- return -ENOMEM;
}
/*
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 9984d23a7f3e..028513d3e2a2 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -4604,12 +4604,6 @@ void __init tcp_init(void)
SLAB_HWCACHE_ALIGN | SLAB_PANIC |
SLAB_ACCOUNT,
NULL);
- tcp_hashinfo.bind2_bucket_cachep =
- kmem_cache_create("tcp_bind2_bucket",
- sizeof(struct inet_bind2_bucket), 0,
- SLAB_HWCACHE_ALIGN | SLAB_PANIC |
- SLAB_ACCOUNT,
- NULL);
/* Size and allocate the main established and bind bucket
* hash tables.
@@ -4632,9 +4626,8 @@ void __init tcp_init(void)
if (inet_ehash_locks_alloc(&tcp_hashinfo))
panic("TCP: failed to alloc ehash_locks");
tcp_hashinfo.bhash =
- alloc_large_system_hash("TCP bind bhash tables",
- sizeof(struct inet_bind_hashbucket) +
- sizeof(struct inet_bind2_hashbucket),
+ alloc_large_system_hash("TCP bind",
+ sizeof(struct inet_bind_hashbucket),
tcp_hashinfo.ehash_mask + 1,
17, /* one slot per 128 KB of memory */
0,
@@ -4643,12 +4636,9 @@ void __init tcp_init(void)
0,
64 * 1024);
tcp_hashinfo.bhash_size = 1U << tcp_hashinfo.bhash_size;
- tcp_hashinfo.bhash2 =
- (struct inet_bind2_hashbucket *)(tcp_hashinfo.bhash + tcp_hashinfo.bhash_size);
for (i = 0; i < tcp_hashinfo.bhash_size; i++) {
spin_lock_init(&tcp_hashinfo.bhash[i].lock);
INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain);
- INIT_HLIST_HEAD(&tcp_hashinfo.bhash2[i].chain);
}
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index beceb89f68d9..1bbd53321d13 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -2600,8 +2600,9 @@ static int selinux_sb_eat_lsm_opts(char *options, void **mnt_opts)
}
}
rc = selinux_add_opt(token, arg, mnt_opts);
+ kfree(arg);
+ arg = NULL;
if (unlikely(rc)) {
- kfree(arg);
goto free_opt;
}
} else {
@@ -2792,17 +2793,13 @@ static int selinux_fs_context_parse_param(struct fs_context *fc,
struct fs_parameter *param)
{
struct fs_parse_result result;
- int opt, rc;
+ int opt;
opt = fs_parse(fc, selinux_fs_parameters, param, &result);
if (opt < 0)
return opt;
- rc = selinux_add_opt(opt, param->string, &fc->security);
- if (!rc)
- param->string = NULL;
-
- return rc;
+ return selinux_add_opt(opt, param->string, &fc->security);
}
/* inode security operations */
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 73e643ae94b6..e17de69faa54 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -443,5 +443,6 @@
#define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */
#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */
#define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */
+#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
index 403e83b4adc8..d27e0581b777 100644
--- a/tools/arch/x86/include/asm/msr-index.h
+++ b/tools/arch/x86/include/asm/msr-index.h
@@ -116,6 +116,30 @@
* Not susceptible to
* TSX Async Abort (TAA) vulnerabilities.
*/
+#define ARCH_CAP_SBDR_SSDP_NO BIT(13) /*
+ * Not susceptible to SBDR and SSDP
+ * variants of Processor MMIO stale data
+ * vulnerabilities.
+ */
+#define ARCH_CAP_FBSDP_NO BIT(14) /*
+ * Not susceptible to FBSDP variant of
+ * Processor MMIO stale data
+ * vulnerabilities.
+ */
+#define ARCH_CAP_PSDP_NO BIT(15) /*
+ * Not susceptible to PSDP variant of
+ * Processor MMIO stale data
+ * vulnerabilities.
+ */
+#define ARCH_CAP_FB_CLEAR BIT(17) /*
+ * VERW clears CPU fill buffer
+ * even on MDS_NO CPUs.
+ */
+#define ARCH_CAP_FB_CLEAR_CTRL BIT(18) /*
+ * MSR_IA32_MCU_OPT_CTRL[FB_CLEAR_DIS]
+ * bit available to control VERW
+ * behavior.
+ */
#define MSR_IA32_FLUSH_CMD 0x0000010b
#define L1D_FLUSH BIT(0) /*
@@ -133,6 +157,7 @@
#define MSR_IA32_MCU_OPT_CTRL 0x00000123
#define RNGDS_MITG_DIS BIT(0) /* SRBDS support */
#define RTM_ALLOW BIT(1) /* TSX development mode */
+#define FB_CLEAR_DIS BIT(3) /* CPU Fill buffer clear disable */
#define MSR_IA32_SYSENTER_CS 0x00000174
#define MSR_IA32_SYSENTER_ESP 0x00000175
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 81470a99ed1c..22423c871ed6 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -37,11 +37,38 @@ ifeq ($(ARCH),riscv)
UNAME_M := riscv
endif
-LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/rbtree.c lib/sparsebit.c lib/test_util.c lib/guest_modes.c lib/perf_test_util.c
-LIBKVM_x86_64 = lib/x86_64/apic.c lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S
-LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c lib/aarch64/handlers.S lib/aarch64/spinlock.c lib/aarch64/gic.c lib/aarch64/gic_v3.c lib/aarch64/vgic.c
-LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_handler.c
-LIBKVM_riscv = lib/riscv/processor.c lib/riscv/ucall.c
+LIBKVM += lib/assert.c
+LIBKVM += lib/elf.c
+LIBKVM += lib/guest_modes.c
+LIBKVM += lib/io.c
+LIBKVM += lib/kvm_util.c
+LIBKVM += lib/perf_test_util.c
+LIBKVM += lib/rbtree.c
+LIBKVM += lib/sparsebit.c
+LIBKVM += lib/test_util.c
+
+LIBKVM_x86_64 += lib/x86_64/apic.c
+LIBKVM_x86_64 += lib/x86_64/handlers.S
+LIBKVM_x86_64 += lib/x86_64/perf_test_util.c
+LIBKVM_x86_64 += lib/x86_64/processor.c
+LIBKVM_x86_64 += lib/x86_64/svm.c
+LIBKVM_x86_64 += lib/x86_64/ucall.c
+LIBKVM_x86_64 += lib/x86_64/vmx.c
+
+LIBKVM_aarch64 += lib/aarch64/gic.c
+LIBKVM_aarch64 += lib/aarch64/gic_v3.c
+LIBKVM_aarch64 += lib/aarch64/handlers.S
+LIBKVM_aarch64 += lib/aarch64/processor.c
+LIBKVM_aarch64 += lib/aarch64/spinlock.c
+LIBKVM_aarch64 += lib/aarch64/ucall.c
+LIBKVM_aarch64 += lib/aarch64/vgic.c
+
+LIBKVM_s390x += lib/s390x/diag318_test_handler.c
+LIBKVM_s390x += lib/s390x/processor.c
+LIBKVM_s390x += lib/s390x/ucall.c
+
+LIBKVM_riscv += lib/riscv/processor.c
+LIBKVM_riscv += lib/riscv/ucall.c
TEST_GEN_PROGS_x86_64 = x86_64/cpuid_test
TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test
@@ -173,12 +200,13 @@ LDFLAGS += -pthread $(no-pie-option) $(pgste-option)
# $(TEST_GEN_PROGS) starts with $(OUTPUT)/
include ../lib.mk
-STATIC_LIBS := $(OUTPUT)/libkvm.a
LIBKVM_C := $(filter %.c,$(LIBKVM))
LIBKVM_S := $(filter %.S,$(LIBKVM))
LIBKVM_C_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_C))
LIBKVM_S_OBJ := $(patsubst %.S, $(OUTPUT)/%.o, $(LIBKVM_S))
-EXTRA_CLEAN += $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(STATIC_LIBS) cscope.*
+LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ)
+
+EXTRA_CLEAN += $(LIBKVM_OBJS) cscope.*
x := $(shell mkdir -p $(sort $(dir $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ))))
$(LIBKVM_C_OBJ): $(OUTPUT)/%.o: %.c
@@ -187,13 +215,8 @@ $(LIBKVM_C_OBJ): $(OUTPUT)/%.o: %.c
$(LIBKVM_S_OBJ): $(OUTPUT)/%.o: %.S
$(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
-LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ)
-$(OUTPUT)/libkvm.a: $(LIBKVM_OBJS)
- $(AR) crs $@ $^
-
x := $(shell mkdir -p $(sort $(dir $(TEST_GEN_PROGS))))
-all: $(STATIC_LIBS)
-$(TEST_GEN_PROGS): $(STATIC_LIBS)
+$(TEST_GEN_PROGS): $(LIBKVM_OBJS)
cscope: include_paths = $(LINUX_TOOL_INCLUDE) $(LINUX_HDR_PATH) include lib ..
cscope:
diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c
index 7b47ae4f952e..d60a34cdfaee 100644
--- a/tools/testing/selftests/kvm/dirty_log_perf_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
@@ -336,8 +336,8 @@ static void run_test(enum vm_guest_mode mode, void *arg)
static void help(char *name)
{
puts("");
- printf("usage: %s [-h] [-i iterations] [-p offset] [-g]"
- "[-m mode] [-b vcpu bytes] [-v vcpus] [-o] [-s mem type]"
+ printf("usage: %s [-h] [-i iterations] [-p offset] [-g] "
+ "[-m mode] [-n] [-b vcpu bytes] [-v vcpus] [-o] [-s mem type]"
"[-x memslots]\n", name);
puts("");
printf(" -i: specify iteration counts (default: %"PRIu64")\n",
@@ -351,6 +351,7 @@ static void help(char *name)
printf(" -p: specify guest physical test memory offset\n"
" Warning: a low offset can conflict with the loaded test code.\n");
guest_modes_help();
+ printf(" -n: Run the vCPUs in nested mode (L2)\n");
printf(" -b: specify the size of the memory region which should be\n"
" dirtied by each vCPU. e.g. 10M or 3G.\n"
" (default: 1G)\n");
@@ -387,7 +388,7 @@ int main(int argc, char *argv[])
guest_modes_append_default();
- while ((opt = getopt(argc, argv, "ghi:p:m:b:f:v:os:x:")) != -1) {
+ while ((opt = getopt(argc, argv, "ghi:p:m:nb:f:v:os:x:")) != -1) {
switch (opt) {
case 'g':
dirty_log_manual_caps = 0;
@@ -401,6 +402,9 @@ int main(int argc, char *argv[])
case 'm':
guest_modes_cmdline(optarg);
break;
+ case 'n':
+ perf_test_args.nested = true;
+ break;
case 'b':
guest_percpu_mem_size = parse_size(optarg);
break;
diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h
index a86f953d8d36..d822cb670f1c 100644
--- a/tools/testing/selftests/kvm/include/perf_test_util.h
+++ b/tools/testing/selftests/kvm/include/perf_test_util.h
@@ -30,10 +30,15 @@ struct perf_test_vcpu_args {
struct perf_test_args {
struct kvm_vm *vm;
+ /* The starting address and size of the guest test region. */
uint64_t gpa;
+ uint64_t size;
uint64_t guest_page_size;
int wr_fract;
+ /* Run vCPUs in L2 instead of L1, if the architecture supports it. */
+ bool nested;
+
struct perf_test_vcpu_args vcpu_args[KVM_MAX_VCPUS];
};
@@ -49,5 +54,9 @@ void perf_test_set_wr_fract(struct kvm_vm *vm, int wr_fract);
void perf_test_start_vcpu_threads(int vcpus, void (*vcpu_fn)(struct perf_test_vcpu_args *));
void perf_test_join_vcpu_threads(int vcpus);
+void perf_test_guest_code(uint32_t vcpu_id);
+
+uint64_t perf_test_nested_pages(int nr_vcpus);
+void perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus);
#endif /* SELFTEST_KVM_PERF_TEST_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index d0d51adec76e..6ce185449259 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -482,13 +482,23 @@ void vcpu_set_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid);
struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid);
void vm_xsave_req_perm(int bit);
-enum x86_page_size {
- X86_PAGE_SIZE_4K = 0,
- X86_PAGE_SIZE_2M,
- X86_PAGE_SIZE_1G,
+enum pg_level {
+ PG_LEVEL_NONE,
+ PG_LEVEL_4K,
+ PG_LEVEL_2M,
+ PG_LEVEL_1G,
+ PG_LEVEL_512G,
+ PG_LEVEL_NUM
};
-void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
- enum x86_page_size page_size);
+
+#define PG_LEVEL_SHIFT(_level) ((_level - 1) * 9 + 12)
+#define PG_LEVEL_SIZE(_level) (1ull << PG_LEVEL_SHIFT(_level))
+
+#define PG_SIZE_4K PG_LEVEL_SIZE(PG_LEVEL_4K)
+#define PG_SIZE_2M PG_LEVEL_SIZE(PG_LEVEL_2M)
+#define PG_SIZE_1G PG_LEVEL_SIZE(PG_LEVEL_1G)
+
+void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level);
/*
* Basic CPU control in CR0
@@ -505,9 +515,6 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
#define X86_CR0_CD (1UL<<30) /* Cache Disable */
#define X86_CR0_PG (1UL<<31) /* Paging */
-/* VMX_EPT_VPID_CAP bits */
-#define VMX_EPT_VPID_CAP_AD_BITS (1ULL << 21)
-
#define XSTATE_XTILE_CFG_BIT 17
#define XSTATE_XTILE_DATA_BIT 18
diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h
index 583ceb0d1457..cc3604f8f1d3 100644
--- a/tools/testing/selftests/kvm/include/x86_64/vmx.h
+++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h
@@ -96,6 +96,9 @@
#define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f
#define VMX_MISC_SAVE_EFER_LMA 0x00000020
+#define VMX_EPT_VPID_CAP_1G_PAGES 0x00020000
+#define VMX_EPT_VPID_CAP_AD_BITS 0x00200000
+
#define EXIT_REASON_FAILED_VMENTRY 0x80000000
#define EXIT_REASON_EXCEPTION_NMI 0
#define EXIT_REASON_EXTERNAL_INTERRUPT 1
@@ -606,6 +609,7 @@ bool load_vmcs(struct vmx_pages *vmx);
bool nested_vmx_supported(void);
void nested_vmx_check_supported(void);
+bool ept_1g_pages_supported(void);
void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
uint64_t nested_paddr, uint64_t paddr);
@@ -613,6 +617,8 @@ void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
uint64_t nested_paddr, uint64_t paddr, uint64_t size);
void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
uint32_t memslot);
+void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm,
+ uint64_t addr, uint64_t size);
void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
uint32_t eptp_memslot);
void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm);
diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/perf_test_util.c
index 722df3a28791..f989ff91f022 100644
--- a/tools/testing/selftests/kvm/lib/perf_test_util.c
+++ b/tools/testing/selftests/kvm/lib/perf_test_util.c
@@ -40,7 +40,7 @@ static bool all_vcpu_threads_running;
* Continuously write to the first 8 bytes of each page in the
* specified region.
*/
-static void guest_code(uint32_t vcpu_id)
+void perf_test_guest_code(uint32_t vcpu_id)
{
struct perf_test_args *pta = &perf_test_args;
struct perf_test_vcpu_args *vcpu_args = &pta->vcpu_args[vcpu_id];
@@ -108,8 +108,9 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
{
struct perf_test_args *pta = &perf_test_args;
struct kvm_vm *vm;
- uint64_t guest_num_pages;
+ uint64_t guest_num_pages, slot0_pages = DEFAULT_GUEST_PHY_PAGES;
uint64_t backing_src_pagesz = get_backing_src_pagesz(backing_src);
+ uint64_t region_end_gfn;
int i;
pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));
@@ -135,33 +136,53 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
slots);
/*
+ * If using nested, allocate extra pages for the nested page tables and
+ * in-memory data structures.
+ */
+ if (pta->nested)
+ slot0_pages += perf_test_nested_pages(vcpus);
+
+ /*
* Pass guest_num_pages to populate the page tables for test memory.
* The memory is also added to memslot 0, but that's a benign side
* effect as KVM allows aliasing HVAs in meslots.
*/
- vm = vm_create_with_vcpus(mode, vcpus, DEFAULT_GUEST_PHY_PAGES,
- guest_num_pages, 0, guest_code, NULL);
+ vm = vm_create_with_vcpus(mode, vcpus, slot0_pages, guest_num_pages, 0,
+ perf_test_guest_code, NULL);
pta->vm = vm;
+ /* Put the test region at the top guest physical memory. */
+ region_end_gfn = vm_get_max_gfn(vm) + 1;
+
+#ifdef __x86_64__
+ /*
+ * When running vCPUs in L2, restrict the test region to 48 bits to
+ * avoid needing 5-level page tables to identity map L2.
+ */
+ if (pta->nested)
+ region_end_gfn = min(region_end_gfn, (1UL << 48) / pta->guest_page_size);
+#endif
/*
* If there should be more memory in the guest test region than there
* can be pages in the guest, it will definitely cause problems.
*/
- TEST_ASSERT(guest_num_pages < vm_get_max_gfn(vm),
+ TEST_ASSERT(guest_num_pages < region_end_gfn,
"Requested more guest memory than address space allows.\n"
" guest pages: %" PRIx64 " max gfn: %" PRIx64
" vcpus: %d wss: %" PRIx64 "]\n",
- guest_num_pages, vm_get_max_gfn(vm), vcpus,
+ guest_num_pages, region_end_gfn - 1, vcpus,
vcpu_memory_bytes);
- pta->gpa = (vm_get_max_gfn(vm) - guest_num_pages) * pta->guest_page_size;
+ pta->gpa = (region_end_gfn - guest_num_pages) * pta->guest_page_size;
pta->gpa = align_down(pta->gpa, backing_src_pagesz);
#ifdef __s390x__
/* Align to 1M (segment size) */
pta->gpa = align_down(pta->gpa, 1 << 20);
#endif
- pr_info("guest physical test memory offset: 0x%lx\n", pta->gpa);
+ pta->size = guest_num_pages * pta->guest_page_size;
+ pr_info("guest physical test memory: [0x%lx, 0x%lx)\n",
+ pta->gpa, pta->gpa + pta->size);
/* Add extra memory slots for testing */
for (i = 0; i < slots; i++) {
@@ -178,6 +199,11 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
perf_test_setup_vcpus(vm, vcpus, vcpu_memory_bytes, partition_vcpu_memory_access);
+ if (pta->nested) {
+ pr_info("Configuring vCPUs to run in L2 (nested).\n");
+ perf_test_setup_nested(vm, vcpus);
+ }
+
ucall_init(vm, NULL);
/* Export the shared variables to the guest. */
@@ -198,6 +224,17 @@ void perf_test_set_wr_fract(struct kvm_vm *vm, int wr_fract)
sync_global_to_guest(vm, perf_test_args);
}
+uint64_t __weak perf_test_nested_pages(int nr_vcpus)
+{
+ return 0;
+}
+
+void __weak perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus)
+{
+ pr_info("%s() not support on this architecture, skipping.\n", __func__);
+ exit(KSFT_SKIP);
+}
+
static void *vcpu_thread_main(void *data)
{
struct vcpu_thread *vcpu = data;
diff --git a/tools/testing/selftests/kvm/lib/x86_64/perf_test_util.c b/tools/testing/selftests/kvm/lib/x86_64/perf_test_util.c
new file mode 100644
index 000000000000..e258524435a0
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/x86_64/perf_test_util.c
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * x86_64-specific extensions to perf_test_util.c.
+ *
+ * Copyright (C) 2022, Google, Inc.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "perf_test_util.h"
+#include "../kvm_util_internal.h"
+#include "processor.h"
+#include "vmx.h"
+
+void perf_test_l2_guest_code(uint64_t vcpu_id)
+{
+ perf_test_guest_code(vcpu_id);
+ vmcall();
+}
+
+extern char perf_test_l2_guest_entry[];
+__asm__(
+"perf_test_l2_guest_entry:"
+" mov (%rsp), %rdi;"
+" call perf_test_l2_guest_code;"
+" ud2;"
+);
+
+static void perf_test_l1_guest_code(struct vmx_pages *vmx, uint64_t vcpu_id)
+{
+#define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ unsigned long *rsp;
+
+ GUEST_ASSERT(vmx->vmcs_gpa);
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx));
+ GUEST_ASSERT(load_vmcs(vmx));
+ GUEST_ASSERT(ept_1g_pages_supported());
+
+ rsp = &l2_guest_stack[L2_GUEST_STACK_SIZE - 1];
+ *rsp = vcpu_id;
+ prepare_vmcs(vmx, perf_test_l2_guest_entry, rsp);
+
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+ GUEST_DONE();
+}
+
+uint64_t perf_test_nested_pages(int nr_vcpus)
+{
+ /*
+ * 513 page tables is enough to identity-map 256 TiB of L2 with 1G
+ * pages and 4-level paging, plus a few pages per-vCPU for data
+ * structures such as the VMCS.
+ */
+ return 513 + 10 * nr_vcpus;
+}
+
+void perf_test_setup_ept(struct vmx_pages *vmx, struct kvm_vm *vm)
+{
+ uint64_t start, end;
+
+ prepare_eptp(vmx, vm, 0);
+
+ /*
+ * Identity map the first 4G and the test region with 1G pages so that
+ * KVM can shadow the EPT12 with the maximum huge page size supported
+ * by the backing source.
+ */
+ nested_identity_map_1g(vmx, vm, 0, 0x100000000ULL);
+
+ start = align_down(perf_test_args.gpa, PG_SIZE_1G);
+ end = align_up(perf_test_args.gpa + perf_test_args.size, PG_SIZE_1G);
+ nested_identity_map_1g(vmx, vm, start, end - start);
+}
+
+void perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus)
+{
+ struct vmx_pages *vmx, *vmx0 = NULL;
+ struct kvm_regs regs;
+ vm_vaddr_t vmx_gva;
+ int vcpu_id;
+
+ nested_vmx_check_supported();
+
+ for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
+ vmx = vcpu_alloc_vmx(vm, &vmx_gva);
+
+ if (vcpu_id == 0) {
+ perf_test_setup_ept(vmx, vm);
+ vmx0 = vmx;
+ } else {
+ /* Share the same EPT table across all vCPUs. */
+ vmx->eptp = vmx0->eptp;
+ vmx->eptp_hva = vmx0->eptp_hva;
+ vmx->eptp_gpa = vmx0->eptp_gpa;
+ }
+
+ /*
+ * Override the vCPU to run perf_test_l1_guest_code() which will
+ * bounce it into L2 before calling perf_test_guest_code().
+ */
+ vcpu_regs_get(vm, vcpu_id, &regs);
+ regs.rip = (unsigned long) perf_test_l1_guest_code;
+ vcpu_regs_set(vm, vcpu_id, &regs);
+ vcpu_args_set(vm, vcpu_id, 2, vmx_gva, vcpu_id);
+ }
+}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index 33ea5e9955d9..ead7011ee8f6 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -158,7 +158,7 @@ static void *virt_get_pte(struct kvm_vm *vm, uint64_t pt_pfn, uint64_t vaddr,
int level)
{
uint64_t *page_table = addr_gpa2hva(vm, pt_pfn << vm->page_shift);
- int index = vaddr >> (vm->page_shift + level * 9) & 0x1ffu;
+ int index = (vaddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu;
return &page_table[index];
}
@@ -167,14 +167,14 @@ static uint64_t *virt_create_upper_pte(struct kvm_vm *vm,
uint64_t pt_pfn,
uint64_t vaddr,
uint64_t paddr,
- int level,
- enum x86_page_size page_size)
+ int current_level,
+ int target_level)
{
- uint64_t *pte = virt_get_pte(vm, pt_pfn, vaddr, level);
+ uint64_t *pte = virt_get_pte(vm, pt_pfn, vaddr, current_level);
if (!(*pte & PTE_PRESENT_MASK)) {
*pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK;
- if (level == page_size)
+ if (current_level == target_level)
*pte |= PTE_LARGE_MASK | (paddr & PHYSICAL_PAGE_MASK);
else
*pte |= vm_alloc_page_table(vm) & PHYSICAL_PAGE_MASK;
@@ -184,20 +184,19 @@ static uint64_t *virt_create_upper_pte(struct kvm_vm *vm,
* a hugepage at this level, and that there isn't a hugepage at
* this level.
*/
- TEST_ASSERT(level != page_size,
+ TEST_ASSERT(current_level != target_level,
"Cannot create hugepage at level: %u, vaddr: 0x%lx\n",
- page_size, vaddr);
+ current_level, vaddr);
TEST_ASSERT(!(*pte & PTE_LARGE_MASK),
"Cannot create page table at level: %u, vaddr: 0x%lx\n",
- level, vaddr);
+ current_level, vaddr);
}
return pte;
}
-void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
- enum x86_page_size page_size)
+void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level)
{
- const uint64_t pg_size = 1ull << ((page_size * 9) + 12);
+ const uint64_t pg_size = PG_LEVEL_SIZE(level);
uint64_t *pml4e, *pdpe, *pde;
uint64_t *pte;
@@ -222,20 +221,20 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
* early if a hugepage was created.
*/
pml4e = virt_create_upper_pte(vm, vm->pgd >> vm->page_shift,
- vaddr, paddr, 3, page_size);
+ vaddr, paddr, PG_LEVEL_512G, level);
if (*pml4e & PTE_LARGE_MASK)
return;
- pdpe = virt_create_upper_pte(vm, PTE_GET_PFN(*pml4e), vaddr, paddr, 2, page_size);
+ pdpe = virt_create_upper_pte(vm, PTE_GET_PFN(*pml4e), vaddr, paddr, PG_LEVEL_1G, level);
if (*pdpe & PTE_LARGE_MASK)
return;
- pde = virt_create_upper_pte(vm, PTE_GET_PFN(*pdpe), vaddr, paddr, 1, page_size);
+ pde = virt_create_upper_pte(vm, PTE_GET_PFN(*pdpe), vaddr, paddr, PG_LEVEL_2M, level);
if (*pde & PTE_LARGE_MASK)
return;
/* Fill in page table entry. */
- pte = virt_get_pte(vm, PTE_GET_PFN(*pde), vaddr, 0);
+ pte = virt_get_pte(vm, PTE_GET_PFN(*pde), vaddr, PG_LEVEL_4K);
TEST_ASSERT(!(*pte & PTE_PRESENT_MASK),
"PTE already present for 4k page at vaddr: 0x%lx\n", vaddr);
*pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK);
@@ -243,7 +242,7 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
{
- __virt_pg_map(vm, vaddr, paddr, X86_PAGE_SIZE_4K);
+ __virt_pg_map(vm, vaddr, paddr, PG_LEVEL_4K);
}
static uint64_t *_vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid,
diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
index d089d8b850b5..b77a01d0a271 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
@@ -198,6 +198,16 @@ bool load_vmcs(struct vmx_pages *vmx)
return true;
}
+static bool ept_vpid_cap_supported(uint64_t mask)
+{
+ return rdmsr(MSR_IA32_VMX_EPT_VPID_CAP) & mask;
+}
+
+bool ept_1g_pages_supported(void)
+{
+ return ept_vpid_cap_supported(VMX_EPT_VPID_CAP_1G_PAGES);
+}
+
/*
* Initialize the control fields to the most basic settings possible.
*/
@@ -215,7 +225,7 @@ static inline void init_vmcs_control_fields(struct vmx_pages *vmx)
struct eptPageTablePointer eptp = {
.memory_type = VMX_BASIC_MEM_TYPE_WB,
.page_walk_length = 3, /* + 1 */
- .ad_enabled = !!(rdmsr(MSR_IA32_VMX_EPT_VPID_CAP) & VMX_EPT_VPID_CAP_AD_BITS),
+ .ad_enabled = ept_vpid_cap_supported(VMX_EPT_VPID_CAP_AD_BITS),
.address = vmx->eptp_gpa >> PAGE_SHIFT_4K,
};
@@ -392,80 +402,93 @@ void nested_vmx_check_supported(void)
}
}
-void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint64_t nested_paddr, uint64_t paddr)
+static void nested_create_pte(struct kvm_vm *vm,
+ struct eptPageTableEntry *pte,
+ uint64_t nested_paddr,
+ uint64_t paddr,
+ int current_level,
+ int target_level)
+{
+ if (!pte->readable) {
+ pte->writable = true;
+ pte->readable = true;
+ pte->executable = true;
+ pte->page_size = (current_level == target_level);
+ if (pte->page_size)
+ pte->address = paddr >> vm->page_shift;
+ else
+ pte->address = vm_alloc_page_table(vm) >> vm->page_shift;
+ } else {
+ /*
+ * Entry already present. Assert that the caller doesn't want
+ * a hugepage at this level, and that there isn't a hugepage at
+ * this level.
+ */
+ TEST_ASSERT(current_level != target_level,
+ "Cannot create hugepage at level: %u, nested_paddr: 0x%lx\n",
+ current_level, nested_paddr);
+ TEST_ASSERT(!pte->page_size,
+ "Cannot create page table at level: %u, nested_paddr: 0x%lx\n",
+ current_level, nested_paddr);
+ }
+}
+
+
+void __nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
+ uint64_t nested_paddr, uint64_t paddr, int target_level)
{
- uint16_t index[4];
- struct eptPageTableEntry *pml4e;
+ const uint64_t page_size = PG_LEVEL_SIZE(target_level);
+ struct eptPageTableEntry *pt = vmx->eptp_hva, *pte;
+ uint16_t index;
TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
- TEST_ASSERT((nested_paddr % vm->page_size) == 0,
+ TEST_ASSERT((nested_paddr >> 48) == 0,
+ "Nested physical address 0x%lx requires 5-level paging",
+ nested_paddr);
+ TEST_ASSERT((nested_paddr % page_size) == 0,
"Nested physical address not on page boundary,\n"
- " nested_paddr: 0x%lx vm->page_size: 0x%x",
- nested_paddr, vm->page_size);
+ " nested_paddr: 0x%lx page_size: 0x%lx",
+ nested_paddr, page_size);
TEST_ASSERT((nested_paddr >> vm->page_shift) <= vm->max_gfn,
"Physical address beyond beyond maximum supported,\n"
" nested_paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
paddr, vm->max_gfn, vm->page_size);
- TEST_ASSERT((paddr % vm->page_size) == 0,
+ TEST_ASSERT((paddr % page_size) == 0,
"Physical address not on page boundary,\n"
- " paddr: 0x%lx vm->page_size: 0x%x",
- paddr, vm->page_size);
+ " paddr: 0x%lx page_size: 0x%lx",
+ paddr, page_size);
TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
"Physical address beyond beyond maximum supported,\n"
" paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
paddr, vm->max_gfn, vm->page_size);
- index[0] = (nested_paddr >> 12) & 0x1ffu;
- index[1] = (nested_paddr >> 21) & 0x1ffu;
- index[2] = (nested_paddr >> 30) & 0x1ffu;
- index[3] = (nested_paddr >> 39) & 0x1ffu;
-
- /* Allocate page directory pointer table if not present. */
- pml4e = vmx->eptp_hva;
- if (!pml4e[index[3]].readable) {
- pml4e[index[3]].address = vm_alloc_page_table(vm) >> vm->page_shift;
- pml4e[index[3]].writable = true;
- pml4e[index[3]].readable = true;
- pml4e[index[3]].executable = true;
- }
+ for (int level = PG_LEVEL_512G; level >= PG_LEVEL_4K; level--) {
+ index = (nested_paddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu;
+ pte = &pt[index];
- /* Allocate page directory table if not present. */
- struct eptPageTableEntry *pdpe;
- pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size);
- if (!pdpe[index[2]].readable) {
- pdpe[index[2]].address = vm_alloc_page_table(vm) >> vm->page_shift;
- pdpe[index[2]].writable = true;
- pdpe[index[2]].readable = true;
- pdpe[index[2]].executable = true;
- }
+ nested_create_pte(vm, pte, nested_paddr, paddr, level, target_level);
- /* Allocate page table if not present. */
- struct eptPageTableEntry *pde;
- pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size);
- if (!pde[index[1]].readable) {
- pde[index[1]].address = vm_alloc_page_table(vm) >> vm->page_shift;
- pde[index[1]].writable = true;
- pde[index[1]].readable = true;
- pde[index[1]].executable = true;
- }
+ if (pte->page_size)
+ break;
- /* Fill in page table entry. */
- struct eptPageTableEntry *pte;
- pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size);
- pte[index[0]].address = paddr >> vm->page_shift;
- pte[index[0]].writable = true;
- pte[index[0]].readable = true;
- pte[index[0]].executable = true;
+ pt = addr_gpa2hva(vm, pte->address * vm->page_size);
+ }
/*
* For now mark these as accessed and dirty because the only
* testcase we have needs that. Can be reconsidered later.
*/
- pte[index[0]].accessed = true;
- pte[index[0]].dirty = true;
+ pte->accessed = true;
+ pte->dirty = true;
+
+}
+
+void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
+ uint64_t nested_paddr, uint64_t paddr)
+{
+ __nested_pg_map(vmx, vm, nested_paddr, paddr, PG_LEVEL_4K);
}
/*
@@ -476,7 +499,7 @@ void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
* nested_paddr - Nested guest physical address to map
* paddr - VM Physical Address
* size - The size of the range to map
- * eptp_memslot - Memory region slot for new virtual translation tables
+ * level - The level at which to map the range
*
* Output Args: None
*
@@ -485,22 +508,29 @@ void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
* Within the VM given by vm, creates a nested guest translation for the
* page range starting at nested_paddr to the page range starting at paddr.
*/
-void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint64_t nested_paddr, uint64_t paddr, uint64_t size)
+void __nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
+ uint64_t nested_paddr, uint64_t paddr, uint64_t size,
+ int level)
{
- size_t page_size = vm->page_size;
+ size_t page_size = PG_LEVEL_SIZE(level);
size_t npages = size / page_size;
TEST_ASSERT(nested_paddr + size > nested_paddr, "Vaddr overflow");
TEST_ASSERT(paddr + size > paddr, "Paddr overflow");
while (npages--) {
- nested_pg_map(vmx, vm, nested_paddr, paddr);
+ __nested_pg_map(vmx, vm, nested_paddr, paddr, level);
nested_paddr += page_size;
paddr += page_size;
}
}
+void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
+ uint64_t nested_paddr, uint64_t paddr, uint64_t size)
+{
+ __nested_map(vmx, vm, nested_paddr, paddr, size, PG_LEVEL_4K);
+}
+
/* Prepare an identity extended page table that maps all the
* physical pages in VM.
*/
@@ -525,6 +555,13 @@ void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
}
}
+/* Identity map a region with 1GiB Pages. */
+void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm,
+ uint64_t addr, uint64_t size)
+{
+ __nested_map(vmx, vm, addr, addr, size, PG_LEVEL_1G);
+}
+
void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
uint32_t eptp_memslot)
{
diff --git a/tools/testing/selftests/kvm/max_guest_memory_test.c b/tools/testing/selftests/kvm/max_guest_memory_test.c
index 3875c4b23a04..15f046e19cb2 100644
--- a/tools/testing/selftests/kvm/max_guest_memory_test.c
+++ b/tools/testing/selftests/kvm/max_guest_memory_test.c
@@ -244,7 +244,7 @@ int main(int argc, char *argv[])
#ifdef __x86_64__
/* Identity map memory in the guest using 1gb pages. */
for (i = 0; i < slot_size; i += size_1gb)
- __virt_pg_map(vm, gpa + i, gpa + i, X86_PAGE_SIZE_1G);
+ __virt_pg_map(vm, gpa + i, gpa + i, PG_LEVEL_1G);
#else
for (i = 0; i < slot_size; i += vm_get_page_size(vm))
virt_pg_map(vm, gpa + i, gpa + i);
diff --git a/tools/testing/selftests/kvm/x86_64/mmu_role_test.c b/tools/testing/selftests/kvm/x86_64/mmu_role_test.c
index da2325fcad87..bdecd532f935 100644
--- a/tools/testing/selftests/kvm/x86_64/mmu_role_test.c
+++ b/tools/testing/selftests/kvm/x86_64/mmu_role_test.c
@@ -35,7 +35,7 @@ static void mmu_role_test(u32 *cpuid_reg, u32 evil_cpuid_val)
run = vcpu_state(vm, VCPU_ID);
/* Map 1gb page without a backing memlot. */
- __virt_pg_map(vm, MMIO_GPA, MMIO_GPA, X86_PAGE_SIZE_1G);
+ __virt_pg_map(vm, MMIO_GPA, MMIO_GPA, PG_LEVEL_1G);
r = _vcpu_run(vm, VCPU_ID);
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index b984f8c8d523..a29f79618934 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -37,4 +37,3 @@ gro
ioam6_parser
toeplitz
cmsg_sender
-bind_bhash_test
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 464df13831f2..7ea54af55490 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -59,7 +59,6 @@ TEST_GEN_FILES += toeplitz
TEST_GEN_FILES += cmsg_sender
TEST_GEN_FILES += stress_reuseport_listen
TEST_PROGS += test_vxlan_vnifiltering.sh
-TEST_GEN_FILES += bind_bhash_test
TEST_FILES := settings
@@ -70,5 +69,4 @@ include bpf/Makefile
$(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma
$(OUTPUT)/tcp_mmap: LDLIBS += -lpthread
-$(OUTPUT)/bind_bhash_test: LDLIBS += -lpthread
$(OUTPUT)/tcp_inq: LDLIBS += -lpthread
diff --git a/tools/testing/selftests/net/bind_bhash_test.c b/tools/testing/selftests/net/bind_bhash_test.c
deleted file mode 100644
index 252e73754e76..000000000000
--- a/tools/testing/selftests/net/bind_bhash_test.c
+++ /dev/null
@@ -1,119 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * This times how long it takes to bind to a port when the port already
- * has multiple sockets in its bhash table.
- *
- * In the setup(), we populate the port's bhash table with
- * MAX_THREADS * MAX_CONNECTIONS number of entries.
- */
-
-#include <unistd.h>
-#include <stdio.h>
-#include <netdb.h>
-#include <pthread.h>
-
-#define MAX_THREADS 600
-#define MAX_CONNECTIONS 40
-
-static const char *bind_addr = "::1";
-static const char *port;
-
-static int fd_array[MAX_THREADS][MAX_CONNECTIONS];
-
-static int bind_socket(int opt, const char *addr)
-{
- struct addrinfo *res, hint = {};
- int sock_fd, reuse = 1, err;
-
- sock_fd = socket(AF_INET6, SOCK_STREAM, 0);
- if (sock_fd < 0) {
- perror("socket fd err");
- return -1;
- }
-
- hint.ai_family = AF_INET6;
- hint.ai_socktype = SOCK_STREAM;
-
- err = getaddrinfo(addr, port, &hint, &res);
- if (err) {
- perror("getaddrinfo failed");
- return -1;
- }
-
- if (opt) {
- err = setsockopt(sock_fd, SOL_SOCKET, opt, &reuse, sizeof(reuse));
- if (err) {
- perror("setsockopt failed");
- return -1;
- }
- }
-
- err = bind(sock_fd, res->ai_addr, res->ai_addrlen);
- if (err) {
- perror("failed to bind to port");
- return -1;
- }
-
- return sock_fd;
-}
-
-static void *setup(void *arg)
-{
- int sock_fd, i;
- int *array = (int *)arg;
-
- for (i = 0; i < MAX_CONNECTIONS; i++) {
- sock_fd = bind_socket(SO_REUSEADDR | SO_REUSEPORT, bind_addr);
- if (sock_fd < 0)
- return NULL;
- array[i] = sock_fd;
- }
-
- return NULL;
-}
-
-int main(int argc, const char *argv[])
-{
- int listener_fd, sock_fd, i, j;
- pthread_t tid[MAX_THREADS];
- clock_t begin, end;
-
- if (argc != 2) {
- printf("Usage: listener <port>\n");
- return -1;
- }
-
- port = argv[1];
-
- listener_fd = bind_socket(SO_REUSEADDR | SO_REUSEPORT, bind_addr);
- if (listen(listener_fd, 100) < 0) {
- perror("listen failed");
- return -1;
- }
-
- /* Set up threads to populate the bhash table entry for the port */
- for (i = 0; i < MAX_THREADS; i++)
- pthread_create(&tid[i], NULL, setup, fd_array[i]);
-
- for (i = 0; i < MAX_THREADS; i++)
- pthread_join(tid[i], NULL);
-
- begin = clock();
-
- /* Bind to the same port on a different address */
- sock_fd = bind_socket(0, "2001:0db8:0:f101::1");
-
- end = clock();
-
- printf("time spent = %f\n", (double)(end - begin) / CLOCKS_PER_SEC);
-
- /* clean up */
- close(sock_fd);
- close(listener_fd);
- for (i = 0; i < MAX_THREADS; i++) {
- for (j = 0; i < MAX_THREADS; i++)
- close(fd_array[i][j]);
- }
-
- return 0;
-}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 44c47670447a..a49df8988cd6 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -3328,9 +3328,11 @@ bool kvm_vcpu_block(struct kvm_vcpu *vcpu)
vcpu->stat.generic.blocking = 1;
+ preempt_disable();
kvm_arch_vcpu_blocking(vcpu);
-
prepare_to_rcuwait(wait);
+ preempt_enable();
+
for (;;) {
set_current_state(TASK_INTERRUPTIBLE);
@@ -3340,9 +3342,11 @@ bool kvm_vcpu_block(struct kvm_vcpu *vcpu)
waited = true;
schedule();
}
- finish_rcuwait(wait);
+ preempt_disable();
+ finish_rcuwait(wait);
kvm_arch_vcpu_unblocking(vcpu);
+ preempt_enable();
vcpu->stat.generic.blocking = 0;