diff options
839 files changed, 9332 insertions, 4923 deletions
@@ -223,6 +223,8 @@ Dmitry Safonov <0x7f454c46@gmail.com> <d.safonov@partner.samsung.com> Dmitry Safonov <0x7f454c46@gmail.com> <dsafonov@virtuozzo.com> Domen Puncer <domen@coderock.org> Douglas Gilbert <dougg@torque.net> +Drew Fustini <fustini@kernel.org> <drew@pdp7.com> +<duje@dujemihanovic.xyz> <duje.mihanovic@skole.hr> Ed L. Cashin <ecashin@coraid.com> Elliot Berman <quic_eberman@quicinc.com> <eberman@codeaurora.org> Enric Balletbo i Serra <eballetbo@kernel.org> <enric.balletbo@collabora.com> @@ -414,6 +416,7 @@ Kenneth W Chen <kenneth.w.chen@intel.com> Kenneth Westfield <quic_kwestfie@quicinc.com> <kwestfie@codeaurora.org> Kiran Gunda <quic_kgunda@quicinc.com> <kgunda@codeaurora.org> Kirill Tkhai <tkhai@ya.ru> <ktkhai@virtuozzo.com> +Kirill A. Shutemov <kas@kernel.org> <kirill.shutemov@linux.intel.com> Kishon Vijay Abraham I <kishon@kernel.org> <kishon@ti.com> Konrad Dybcio <konradybcio@kernel.org> <konrad.dybcio@linaro.org> Konrad Dybcio <konradybcio@kernel.org> <konrad.dybcio@somainline.org> @@ -830,3 +833,6 @@ Yosry Ahmed <yosry.ahmed@linux.dev> <yosryahmed@google.com> Yusuke Goda <goda.yusuke@renesas.com> Zack Rusin <zack.rusin@broadcom.com> <zackr@vmware.com> Zhu Yanjun <zyjzyj2000@gmail.com> <yanjunz@nvidia.com> +Zijun Hu <zijun.hu@oss.qualcomm.com> <quic_zijuhu@quicinc.com> +Zijun Hu <zijun.hu@oss.qualcomm.com> <zijuhu@codeaurora.org> +Zijun Hu <zijun_hu@htc.com> @@ -2981,6 +2981,11 @@ S: 521 Pleasant Valley Road S: Potsdam, New York 13676 S: USA +N: Shannon Nelson +E: sln@onemain.com +D: Worked on several network drivers including +D: ixgbe, i40e, ionic, pds_core, pds_vdpa, pds_fwctl + N: Dave Neuer E: dave.neuer@pobox.com D: Helped implement support for Compaq's H31xx series iPAQs diff --git a/Documentation/ABI/testing/sysfs-devices-power b/Documentation/ABI/testing/sysfs-devices-power index 54195530e97a..d3da88b26a53 100644 --- a/Documentation/ABI/testing/sysfs-devices-power +++ b/Documentation/ABI/testing/sysfs-devices-power @@ -56,7 +56,7 @@ Date: January 2009 Contact: Rafael J. Wysocki <rjw@rjwysocki.net> Description: The /sys/devices/.../async attribute allows the user space to - enable or diasble the device's suspend and resume callbacks to + enable or disable the device's suspend and resume callbacks to be executed asynchronously (ie. in separate threads, in parallel with the main suspend/resume thread) during system-wide power transitions (eg. suspend to RAM, hibernation). diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index bf85f4de6862..ab8cd337f43a 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -584,6 +584,7 @@ What: /sys/devices/system/cpu/vulnerabilities /sys/devices/system/cpu/vulnerabilities/spectre_v1 /sys/devices/system/cpu/vulnerabilities/spectre_v2 /sys/devices/system/cpu/vulnerabilities/srbds + /sys/devices/system/cpu/vulnerabilities/tsa /sys/devices/system/cpu/vulnerabilities/tsx_async_abort Date: January 2018 Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org> diff --git a/Documentation/ABI/testing/sysfs-driver-ufs b/Documentation/ABI/testing/sysfs-driver-ufs index d4140dc6c5ba..615453fcc9ff 100644 --- a/Documentation/ABI/testing/sysfs-driver-ufs +++ b/Documentation/ABI/testing/sysfs-driver-ufs @@ -711,7 +711,7 @@ Description: This file shows the thin provisioning type. This is one of The file is read only. -What: /sys/class/scsi_device/*/device/unit_descriptor/physical_memory_resourse_count +What: /sys/class/scsi_device/*/device/unit_descriptor/physical_memory_resource_count Date: February 2018 Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com> Description: This file shows the total physical memory resources. This is diff --git a/Documentation/ABI/testing/sysfs-edac-scrub b/Documentation/ABI/testing/sysfs-edac-scrub index c43be90deab4..ab6014743da5 100644 --- a/Documentation/ABI/testing/sysfs-edac-scrub +++ b/Documentation/ABI/testing/sysfs-edac-scrub @@ -49,6 +49,12 @@ Description: (RO) Supported minimum scrub cycle duration in seconds by the memory scrubber. + Device-based scrub: returns the minimum scrub cycle + supported by the memory device. + + Region-based scrub: returns the max of minimum scrub cycles + supported by individual memory devices that back the region. + What: /sys/bus/edac/devices/<dev-name>/scrubX/max_cycle_duration Date: March 2025 KernelVersion: 6.15 @@ -57,6 +63,16 @@ Description: (RO) Supported maximum scrub cycle duration in seconds by the memory scrubber. + Device-based scrub: returns the maximum scrub cycle supported + by the memory device. + + Region-based scrub: returns the min of maximum scrub cycles + supported by individual memory devices that back the region. + + If the memory device does not provide maximum scrub cycle + information, return the maximum supported value of the scrub + cycle field. + What: /sys/bus/edac/devices/<dev-name>/scrubX/current_cycle_duration Date: March 2025 KernelVersion: 6.15 diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index 0cc35a14afbe..bd98ea3175ec 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -1732,12 +1732,6 @@ The following nested keys are defined. numa_hint_faults (npn) Number of NUMA hinting faults. - numa_task_migrated (npn) - Number of task migration by NUMA balancing. - - numa_task_swapped (npn) - Number of task swap by NUMA balancing. - pgdemote_kswapd Number of pages demoted by kswapd. diff --git a/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst b/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst index 1302fd1b55e8..6dba18dbb9ab 100644 --- a/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst +++ b/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst @@ -157,9 +157,7 @@ This is achieved by using the otherwise unused and obsolete VERW instruction in combination with a microcode update. The microcode clears the affected CPU buffers when the VERW instruction is executed. -Kernel reuses the MDS function to invoke the buffer clearing: - - mds_clear_cpu_buffers() +Kernel does the buffer clearing with x86_clear_cpu_buffers(). On MDS affected CPUs, the kernel already invokes CPU buffer clear on kernel/userspace, hypervisor/guest and C-state (idle) transitions. No diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index f1f2c0874da9..07e22ba5bfe3 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -7488,6 +7488,19 @@ having this key zero'ed is acceptable. E.g. in testing scenarios. + tsa= [X86] Control mitigation for Transient Scheduler + Attacks on AMD CPUs. Search the following in your + favourite search engine for more details: + + "Technical guidance for mitigating transient scheduler + attacks". + + off - disable the mitigation + on - enable the mitigation (default) + user - mitigate only user/kernel transitions + vm - mitigate only guest/host transitions + + tsc= Disable clocksource stability checks for TSC. Format: <string> [x86] reliable: mark tsc clocksource as reliable, this diff --git a/Documentation/arch/x86/mds.rst b/Documentation/arch/x86/mds.rst index 5a2e6c0ef04a..3518671e1a85 100644 --- a/Documentation/arch/x86/mds.rst +++ b/Documentation/arch/x86/mds.rst @@ -93,7 +93,7 @@ enters a C-state. The kernel provides a function to invoke the buffer clearing: - mds_clear_cpu_buffers() + x86_clear_cpu_buffers() Also macro CLEAR_CPU_BUFFERS can be used in ASM late in exit-to-user path. Other than CFLAGS.ZF, this macro doesn't clobber any registers. @@ -185,9 +185,9 @@ Mitigation points idle clearing would be a window dressing exercise and is therefore not activated. - The invocation is controlled by the static key mds_idle_clear which is - switched depending on the chosen mitigation mode and the SMT state of - the system. + The invocation is controlled by the static key cpu_buf_idle_clear which is + switched depending on the chosen mitigation mode and the SMT state of the + system. The buffer clear is only invoked before entering the C-State to prevent that stale data from the idling CPU from spilling to the Hyper-Thread diff --git a/Documentation/bpf/map_hash.rst b/Documentation/bpf/map_hash.rst index d2343952f2cb..8606bf958a8c 100644 --- a/Documentation/bpf/map_hash.rst +++ b/Documentation/bpf/map_hash.rst @@ -233,10 +233,16 @@ attempts in order to enforce the LRU property which have increasing impacts on other CPUs involved in the following operation attempts: - Attempt to use CPU-local state to batch operations -- Attempt to fetch free nodes from global lists +- Attempt to fetch ``target_free`` free nodes from global lists - Attempt to pull any node from a global list and remove it from the hashmap - Attempt to pull any node from any CPU's list and remove it from the hashmap +The number of nodes to borrow from the global list in a batch, ``target_free``, +depends on the size of the map. Larger batch size reduces lock contention, but +may also exhaust the global structure. The value is computed at map init to +avoid exhaustion, by limiting aggregate reservation by all CPUs to half the map +size. With a minimum of a single element and maximum budget of 128 at a time. + This algorithm is described visually in the following diagram. See the description in commit 3a08c2fd7634 ("bpf: LRU List") for a full explanation of the corresponding operations: diff --git a/Documentation/bpf/map_lru_hash_update.dot b/Documentation/bpf/map_lru_hash_update.dot index a0fee349d29c..ab10058f5b79 100644 --- a/Documentation/bpf/map_lru_hash_update.dot +++ b/Documentation/bpf/map_lru_hash_update.dot @@ -35,18 +35,18 @@ digraph { fn_bpf_lru_list_pop_free_to_local [shape=rectangle,fillcolor=2, label="Flush local pending, Rotate Global list, move - LOCAL_FREE_TARGET + target_free from global -> local"] // Also corresponds to: // fn__local_list_flush() // fn_bpf_lru_list_rotate() fn___bpf_lru_node_move_to_free[shape=diamond,fillcolor=2, - label="Able to free\nLOCAL_FREE_TARGET\nnodes?"] + label="Able to free\ntarget_free\nnodes?"] fn___bpf_lru_list_shrink_inactive [shape=rectangle,fillcolor=3, label="Shrink inactive list up to remaining - LOCAL_FREE_TARGET + target_free (global LRU -> local)"] fn___bpf_lru_list_shrink [shape=diamond,fillcolor=2, label="> 0 entries in\nlocal free list?"] diff --git a/Documentation/devicetree/bindings/clock/mediatek,mt8188-clock.yaml b/Documentation/devicetree/bindings/clock/mediatek,mt8188-clock.yaml index 2985c8c717d7..5403242545ab 100644 --- a/Documentation/devicetree/bindings/clock/mediatek,mt8188-clock.yaml +++ b/Documentation/devicetree/bindings/clock/mediatek,mt8188-clock.yaml @@ -52,6 +52,9 @@ properties: '#clock-cells': const: 1 + '#reset-cells': + const: 1 + required: - compatible - reg diff --git a/Documentation/devicetree/bindings/display/bridge/ti,sn65dsi83.yaml b/Documentation/devicetree/bindings/display/bridge/ti,sn65dsi83.yaml index 9b5f3f3eab19..e69b6343a8eb 100644 --- a/Documentation/devicetree/bindings/display/bridge/ti,sn65dsi83.yaml +++ b/Documentation/devicetree/bindings/display/bridge/ti,sn65dsi83.yaml @@ -118,15 +118,11 @@ $defs: ti,lvds-vod-swing-clock-microvolt: description: LVDS diferential output voltage <min max> for clock lanes in microvolts. - $ref: /schemas/types.yaml#/definitions/uint32-array - minItems: 2 maxItems: 2 ti,lvds-vod-swing-data-microvolt: description: LVDS diferential output voltage <min max> for data lanes in microvolts. - $ref: /schemas/types.yaml#/definitions/uint32-array - minItems: 2 maxItems: 2 allOf: diff --git a/Documentation/devicetree/bindings/i2c/realtek,rtl9301-i2c.yaml b/Documentation/devicetree/bindings/i2c/realtek,rtl9301-i2c.yaml index eddfd329c67b..69ac5db8b914 100644 --- a/Documentation/devicetree/bindings/i2c/realtek,rtl9301-i2c.yaml +++ b/Documentation/devicetree/bindings/i2c/realtek,rtl9301-i2c.yaml @@ -26,7 +26,8 @@ properties: - const: realtek,rtl9301-i2c reg: - description: Register offset and size this I2C controller. + items: + - description: Register offset and size this I2C controller. "#address-cells": const: 1 diff --git a/Documentation/devicetree/bindings/input/elan,ekth6915.yaml b/Documentation/devicetree/bindings/input/elan,ekth6915.yaml index cb3e1801b0d3..0840e4ab28b7 100644 --- a/Documentation/devicetree/bindings/input/elan,ekth6915.yaml +++ b/Documentation/devicetree/bindings/input/elan,ekth6915.yaml @@ -4,14 +4,14 @@ $id: http://devicetree.org/schemas/input/elan,ekth6915.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Elan eKTH6915 touchscreen controller +title: Elan I2C-HID touchscreen controllers maintainers: - Douglas Anderson <dianders@chromium.org> description: - Supports the Elan eKTH6915 touchscreen controller. - This touchscreen controller uses the i2c-hid protocol with a reset GPIO. + Supports the Elan eKTH6915 and other I2C-HID touchscreen controllers. + These touchscreen controller use the i2c-hid protocol with a reset GPIO. allOf: - $ref: /schemas/input/touchscreen/touchscreen.yaml# @@ -23,12 +23,14 @@ properties: - enum: - elan,ekth5015m - const: elan,ekth6915 + - items: + - const: elan,ekth8d18 + - const: elan,ekth6a12nay - enum: - elan,ekth6915 - elan,ekth6a12nay - reg: - const: 0x10 + reg: true interrupts: maxItems: 1 diff --git a/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml b/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml index 7b6a2fde8175..19934d5c24e5 100644 --- a/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml +++ b/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml @@ -23,7 +23,7 @@ properties: - allwinner,sun20i-d1-emac - allwinner,sun50i-h6-emac - allwinner,sun50i-h616-emac0 - - allwinner,sun55i-a523-emac0 + - allwinner,sun55i-a523-gmac0 - const: allwinner,sun50i-a64-emac reg: diff --git a/Documentation/devicetree/bindings/net/sophgo,sg2044-dwmac.yaml b/Documentation/devicetree/bindings/net/sophgo,sg2044-dwmac.yaml index 4dd2dc9c678b..8afbd9ebd73f 100644 --- a/Documentation/devicetree/bindings/net/sophgo,sg2044-dwmac.yaml +++ b/Documentation/devicetree/bindings/net/sophgo,sg2044-dwmac.yaml @@ -80,6 +80,8 @@ examples: interrupt-parent = <&intc>; interrupts = <296 IRQ_TYPE_LEVEL_HIGH>; interrupt-names = "macirq"; + phy-handle = <&phy0>; + phy-mode = "rgmii-id"; resets = <&rst 30>; reset-names = "stmmaceth"; snps,multicast-filter-bins = <0>; @@ -91,7 +93,6 @@ examples: snps,mtl-rx-config = <&gmac0_mtl_rx_setup>; snps,mtl-tx-config = <&gmac0_mtl_tx_setup>; snps,axi-config = <&gmac0_stmmac_axi_setup>; - status = "disabled"; gmac0_mtl_rx_setup: rx-queues-config { snps,rx-queues-to-use = <8>; diff --git a/Documentation/devicetree/bindings/serial/8250.yaml b/Documentation/devicetree/bindings/serial/8250.yaml index 33d2016b6509..c6bc27709bf7 100644 --- a/Documentation/devicetree/bindings/serial/8250.yaml +++ b/Documentation/devicetree/bindings/serial/8250.yaml @@ -45,7 +45,7 @@ allOf: - ns16550 - ns16550a then: - anyOf: + oneOf: - required: [ clock-frequency ] - required: [ clocks ] diff --git a/Documentation/devicetree/bindings/serial/altera_jtaguart.txt b/Documentation/devicetree/bindings/serial/altera_jtaguart.txt deleted file mode 100644 index 55a901051e8f..000000000000 --- a/Documentation/devicetree/bindings/serial/altera_jtaguart.txt +++ /dev/null @@ -1,5 +0,0 @@ -Altera JTAG UART - -Required properties: -- compatible : should be "ALTR,juart-1.0" <DEPRECATED> -- compatible : should be "altr,juart-1.0" diff --git a/Documentation/devicetree/bindings/serial/altera_uart.txt b/Documentation/devicetree/bindings/serial/altera_uart.txt deleted file mode 100644 index 81bf7ffb1a81..000000000000 --- a/Documentation/devicetree/bindings/serial/altera_uart.txt +++ /dev/null @@ -1,8 +0,0 @@ -Altera UART - -Required properties: -- compatible : should be "ALTR,uart-1.0" <DEPRECATED> -- compatible : should be "altr,uart-1.0" - -Optional properties: -- clock-frequency : frequency of the clock input to the UART diff --git a/Documentation/devicetree/bindings/serial/altr,juart-1.0.yaml b/Documentation/devicetree/bindings/serial/altr,juart-1.0.yaml new file mode 100644 index 000000000000..02e20fa591da --- /dev/null +++ b/Documentation/devicetree/bindings/serial/altr,juart-1.0.yaml @@ -0,0 +1,19 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/serial/altr,juart-1.0.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Altera JTAG UART + +maintainers: + - Dinh Nguyen <dinguyen@kernel.org> + +properties: + compatible: + const: altr,juart-1.0 + +required: + - compatible + +additionalProperties: false diff --git a/Documentation/devicetree/bindings/serial/altr,uart-1.0.yaml b/Documentation/devicetree/bindings/serial/altr,uart-1.0.yaml new file mode 100644 index 000000000000..72d4972e1e22 --- /dev/null +++ b/Documentation/devicetree/bindings/serial/altr,uart-1.0.yaml @@ -0,0 +1,25 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/serial/altr,uart-1.0.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Altera UART + +maintainers: + - Dinh Nguyen <dinguyen@kernel.org> + +allOf: + - $ref: /schemas/serial/serial.yaml# + +properties: + compatible: + const: altr,uart-1.0 + + clock-frequency: + description: Frequency of the clock input to the UART. + +required: + - compatible + +unevaluatedProperties: false diff --git a/Documentation/devicetree/bindings/soc/fsl/fsl,ls1028a-reset.yaml b/Documentation/devicetree/bindings/soc/fsl/fsl,ls1028a-reset.yaml index 234089b5954d..b43df10c5ef4 100644 --- a/Documentation/devicetree/bindings/soc/fsl/fsl,ls1028a-reset.yaml +++ b/Documentation/devicetree/bindings/soc/fsl/fsl,ls1028a-reset.yaml @@ -1,7 +1,7 @@ # SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) %YAML 1.2 --- -$id: http://devicetree.org/schemas//soc/fsl/fsl,ls1028a-reset.yaml# +$id: http://devicetree.org/schemas/soc/fsl/fsl,ls1028a-reset.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# title: Freescale Layerscape Reset Registers Module diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst index 3616d7161dab..a5734bdd1cc7 100644 --- a/Documentation/filesystems/porting.rst +++ b/Documentation/filesystems/porting.rst @@ -1249,3 +1249,12 @@ Using try_lookup_noperm() will require linux/namei.h to be included. Calling conventions for ->d_automount() have changed; we should *not* grab an extra reference to new mount - it should be returned with refcount 1. + +--- + +collect_mounts()/drop_collected_mounts()/iterate_mounts() are gone now. +Replacement is collect_paths()/drop_collected_path(), with no special +iterator needed. Instead of a cloned mount tree, the new interface returns +an array of struct path, one for each mount collect_mounts() would've +created. These struct path point to locations in the caller's namespace +that would be roots of the cloned mounts. diff --git a/Documentation/netlink/genetlink-legacy.yaml b/Documentation/netlink/genetlink-legacy.yaml index 4cbfe666e6f5..b29d62eefa16 100644 --- a/Documentation/netlink/genetlink-legacy.yaml +++ b/Documentation/netlink/genetlink-legacy.yaml @@ -6,6 +6,9 @@ $schema: https://json-schema.org/draft-07/schema # Common defines $defs: + name: + type: string + pattern: ^[0-9a-z-]+$ uint: type: integer minimum: 0 @@ -76,7 +79,7 @@ properties: additionalProperties: False properties: name: - type: string + $ref: '#/$defs/name' header: description: For C-compatible languages, header which already defines this value. type: string @@ -103,7 +106,7 @@ properties: additionalProperties: False properties: name: - type: string + $ref: '#/$defs/name' value: type: integer doc: @@ -132,7 +135,7 @@ properties: additionalProperties: False properties: name: - type: string + $ref: '#/$defs/name' type: description: The netlink attribute type enum: [ u8, u16, u32, u64, s8, s16, s32, s64, string, binary ] @@ -169,7 +172,7 @@ properties: name: description: | Name used when referring to this space in other definitions, not used outside of the spec. - type: string + $ref: '#/$defs/name' name-prefix: description: | Prefix for the C enum name of the attributes. Default family[name]-set[name]-a- @@ -206,7 +209,7 @@ properties: additionalProperties: False properties: name: - type: string + $ref: '#/$defs/name' type: &attr-type description: The netlink attribute type enum: [ unused, pad, flag, binary, bitfield32, @@ -348,7 +351,7 @@ properties: properties: name: description: Name of the operation, also defining its C enum value in uAPI. - type: string + $ref: '#/$defs/name' doc: description: Documentation for the command. type: string diff --git a/Documentation/netlink/genetlink.yaml b/Documentation/netlink/genetlink.yaml index 40efbbad76ab..7b1ec153e834 100644 --- a/Documentation/netlink/genetlink.yaml +++ b/Documentation/netlink/genetlink.yaml @@ -6,6 +6,9 @@ $schema: https://json-schema.org/draft-07/schema # Common defines $defs: + name: + type: string + pattern: ^[0-9a-z-]+$ uint: type: integer minimum: 0 @@ -29,7 +32,7 @@ additionalProperties: False properties: name: description: Name of the genetlink family. - type: string + $ref: '#/$defs/name' doc: type: string protocol: @@ -48,7 +51,7 @@ properties: additionalProperties: False properties: name: - type: string + $ref: '#/$defs/name' header: description: For C-compatible languages, header which already defines this value. type: string @@ -75,7 +78,7 @@ properties: additionalProperties: False properties: name: - type: string + $ref: '#/$defs/name' value: type: integer doc: @@ -96,7 +99,7 @@ properties: name: description: | Name used when referring to this space in other definitions, not used outside of the spec. - type: string + $ref: '#/$defs/name' name-prefix: description: | Prefix for the C enum name of the attributes. Default family[name]-set[name]-a- @@ -121,7 +124,7 @@ properties: additionalProperties: False properties: name: - type: string + $ref: '#/$defs/name' type: &attr-type enum: [ unused, pad, flag, binary, uint, sint, u8, u16, u32, u64, s8, s16, s32, s64, @@ -243,7 +246,7 @@ properties: properties: name: description: Name of the operation, also defining its C enum value in uAPI. - type: string + $ref: '#/$defs/name' doc: description: Documentation for the command. type: string @@ -327,7 +330,7 @@ properties: name: description: | The name for the group, used to form the define and the value of the define. - type: string + $ref: '#/$defs/name' flags: *cmd_flags kernel-family: diff --git a/Documentation/netlink/netlink-raw.yaml b/Documentation/netlink/netlink-raw.yaml index e34bf23897fa..246fa07bccf6 100644 --- a/Documentation/netlink/netlink-raw.yaml +++ b/Documentation/netlink/netlink-raw.yaml @@ -6,6 +6,12 @@ $schema: https://json-schema.org/draft-07/schema # Common defines $defs: + name: + type: string + pattern: ^[0-9a-z-]+$ + name-cap: + type: string + pattern: ^[0-9a-zA-Z-]+$ uint: type: integer minimum: 0 @@ -71,7 +77,7 @@ properties: additionalProperties: False properties: name: - type: string + $ref: '#/$defs/name' header: description: For C-compatible languages, header which already defines this value. type: string @@ -98,7 +104,7 @@ properties: additionalProperties: False properties: name: - type: string + $ref: '#/$defs/name' value: type: integer doc: @@ -124,7 +130,7 @@ properties: additionalProperties: False properties: name: - type: string + $ref: '#/$defs/name-cap' type: description: | The netlink attribute type. Members of type 'binary' or 'pad' @@ -183,7 +189,7 @@ properties: name: description: | Name used when referring to this space in other definitions, not used outside of the spec. - type: string + $ref: '#/$defs/name' name-prefix: description: | Prefix for the C enum name of the attributes. Default family[name]-set[name]-a- @@ -220,7 +226,7 @@ properties: additionalProperties: False properties: name: - type: string + $ref: '#/$defs/name' type: &attr-type description: The netlink attribute type enum: [ unused, pad, flag, binary, bitfield32, @@ -408,7 +414,7 @@ properties: properties: name: description: Name of the operation, also defining its C enum value in uAPI. - type: string + $ref: '#/$defs/name' doc: description: Documentation for the command. type: string diff --git a/Documentation/netlink/specs/devlink.yaml b/Documentation/netlink/specs/devlink.yaml index 05fee1b7fe19..38ddc04f9e6d 100644 --- a/Documentation/netlink/specs/devlink.yaml +++ b/Documentation/netlink/specs/devlink.yaml @@ -38,15 +38,15 @@ definitions: - name: dsa - - name: pci_pf + name: pci-pf - - name: pci_vf + name: pci-vf - name: virtual - name: unused - - name: pci_sf + name: pci-sf - type: enum name: port-fn-state @@ -220,7 +220,7 @@ definitions: - name: flag - - name: nul_string + name: nul-string value: 10 - name: binary diff --git a/Documentation/netlink/specs/dpll.yaml b/Documentation/netlink/specs/dpll.yaml index 8feefeae5376..f434140b538e 100644 --- a/Documentation/netlink/specs/dpll.yaml +++ b/Documentation/netlink/specs/dpll.yaml @@ -188,7 +188,7 @@ definitions: value: 10000 - type: const - name: pin-frequency-77_5-khz + name: pin-frequency-77-5-khz value: 77500 - type: const diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml index 72a076b0e1b5..348c6ad548f5 100644 --- a/Documentation/netlink/specs/ethtool.yaml +++ b/Documentation/netlink/specs/ethtool.yaml @@ -48,7 +48,7 @@ definitions: name: started doc: The firmware flashing process has started. - - name: in_progress + name: in-progress doc: The firmware flashing process is in progress. - name: completed @@ -1422,7 +1422,7 @@ attribute-sets: name: hkey type: binary - - name: input_xfrm + name: input-xfrm type: u32 - name: start-context @@ -2238,7 +2238,7 @@ operations: - hfunc - indir - hkey - - input_xfrm + - input-xfrm dump: request: attributes: diff --git a/Documentation/netlink/specs/fou.yaml b/Documentation/netlink/specs/fou.yaml index 0af5ab842c04..b02ab19817d3 100644 --- a/Documentation/netlink/specs/fou.yaml +++ b/Documentation/netlink/specs/fou.yaml @@ -15,7 +15,7 @@ kernel-policy: global definitions: - type: enum - name: encap_type + name: encap-type name-prefix: fou-encap- enum-name: entries: [ unspec, direct, gue ] @@ -43,26 +43,26 @@ attribute-sets: name: type type: u8 - - name: remcsum_nopartial + name: remcsum-nopartial type: flag - - name: local_v4 + name: local-v4 type: u32 - - name: local_v6 + name: local-v6 type: binary checks: min-len: 16 - - name: peer_v4 + name: peer-v4 type: u32 - - name: peer_v6 + name: peer-v6 type: binary checks: min-len: 16 - - name: peer_port + name: peer-port type: u16 byte-order: big-endian - @@ -90,12 +90,12 @@ operations: - port - ipproto - type - - remcsum_nopartial - - local_v4 - - peer_v4 - - local_v6 - - peer_v6 - - peer_port + - remcsum-nopartial + - local-v4 + - peer-v4 + - local-v6 + - peer-v6 + - peer-port - ifindex - @@ -112,11 +112,11 @@ operations: - af - ifindex - port - - peer_port - - local_v4 - - peer_v4 - - local_v6 - - peer_v6 + - peer-port + - local-v4 + - peer-v4 + - local-v6 + - peer-v6 - name: get diff --git a/Documentation/netlink/specs/mptcp_pm.yaml b/Documentation/netlink/specs/mptcp_pm.yaml index dfd017780d2f..fb57860fe778 100644 --- a/Documentation/netlink/specs/mptcp_pm.yaml +++ b/Documentation/netlink/specs/mptcp_pm.yaml @@ -57,21 +57,21 @@ definitions: doc: >- A new subflow has been established. 'error' should not be set. Attributes: token, family, loc_id, rem_id, saddr4 | saddr6, daddr4 | - daddr6, sport, dport, backup, if_idx [, error]. + daddr6, sport, dport, backup, if-idx [, error]. - name: sub-closed doc: >- A subflow has been closed. An error (copy of sk_err) could be set if an error has been detected for this subflow. Attributes: token, family, loc_id, rem_id, saddr4 | saddr6, daddr4 | - daddr6, sport, dport, backup, if_idx [, error]. + daddr6, sport, dport, backup, if-idx [, error]. - name: sub-priority value: 13 doc: >- The priority of a subflow has changed. 'error' should not be set. Attributes: token, family, loc_id, rem_id, saddr4 | saddr6, daddr4 | - daddr6, sport, dport, backup, if_idx [, error]. + daddr6, sport, dport, backup, if-idx [, error]. - name: listener-created value: 15 @@ -255,7 +255,7 @@ attribute-sets: name: timeout type: u32 - - name: if_idx + name: if-idx type: u32 - name: reset-reason diff --git a/Documentation/netlink/specs/nfsd.yaml b/Documentation/netlink/specs/nfsd.yaml index c87658114852..8d1a3c01708f 100644 --- a/Documentation/netlink/specs/nfsd.yaml +++ b/Documentation/netlink/specs/nfsd.yaml @@ -27,7 +27,7 @@ attribute-sets: name: proc type: u32 - - name: service_time + name: service-time type: s64 - name: pad @@ -139,7 +139,7 @@ operations: - prog - version - proc - - service_time + - service-time - saddr4 - daddr4 - saddr6 diff --git a/Documentation/netlink/specs/ovs_flow.yaml b/Documentation/netlink/specs/ovs_flow.yaml index 46f5d1cd8a5f..7974aa7d8905 100644 --- a/Documentation/netlink/specs/ovs_flow.yaml +++ b/Documentation/netlink/specs/ovs_flow.yaml @@ -216,7 +216,7 @@ definitions: type: struct members: - - name: nd_target + name: nd-target type: binary len: 16 byte-order: big-endian @@ -258,12 +258,12 @@ definitions: type: struct members: - - name: vlan_tpid + name: vlan-tpid type: u16 byte-order: big-endian doc: Tag protocol identifier (TPID) to push. - - name: vlan_tci + name: vlan-tci type: u16 byte-order: big-endian doc: Tag control identifier (TCI) to push. diff --git a/Documentation/netlink/specs/rt-link.yaml b/Documentation/netlink/specs/rt-link.yaml index b41b31eebcae..28c4cf66517c 100644 --- a/Documentation/netlink/specs/rt-link.yaml +++ b/Documentation/netlink/specs/rt-link.yaml @@ -603,7 +603,7 @@ definitions: name: optmask type: u32 - - name: if_stats_msg + name: if-stats-msg type: struct members: - @@ -2486,7 +2486,7 @@ operations: name: getstats doc: Get / dump link stats. attribute-set: stats-attrs - fixed-header: if_stats_msg + fixed-header: if-stats-msg do: request: value: 94 diff --git a/Documentation/netlink/specs/tc.yaml b/Documentation/netlink/specs/tc.yaml index cb7ea7d62e56..42d74c9aeb54 100644 --- a/Documentation/netlink/specs/tc.yaml +++ b/Documentation/netlink/specs/tc.yaml @@ -232,7 +232,7 @@ definitions: type: u8 doc: log(P_max / (qth-max - qth-min)) - - name: Scell_log + name: Scell-log type: u8 doc: cell size for idle damping - @@ -253,7 +253,7 @@ definitions: name: DPs type: u32 - - name: def_DP + name: def-DP type: u32 - name: grio diff --git a/Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst b/Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst index af7db0e91f6b..a52850602cd8 100644 --- a/Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst +++ b/Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst @@ -66,7 +66,7 @@ Admin Function driver As mentioned above RVU PF0 is called the admin function (AF), this driver supports resource provisioning and configuration of functional blocks. Doesn't handle any I/O. It sets up few basic stuff but most of the -funcionality is achieved via configuration requests from PFs and VFs. +functionality is achieved via configuration requests from PFs and VFs. PF/VFs communicates with AF via a shared memory region (mailbox). Upon receiving requests AF does resource provisioning and other HW configuration. diff --git a/Documentation/networking/tls.rst b/Documentation/networking/tls.rst index c7904a1bc167..36cc7afc2527 100644 --- a/Documentation/networking/tls.rst +++ b/Documentation/networking/tls.rst @@ -16,11 +16,13 @@ User interface Creating a TLS connection ------------------------- -First create a new TCP socket and set the TLS ULP. +First create a new TCP socket and once the connection is established set the +TLS ULP. .. code-block:: c sock = socket(AF_INET, SOCK_STREAM, 0); + connect(sock, addr, addrlen); setsockopt(sock, SOL_TCP, TCP_ULP, "tls", sizeof("tls")); Setting the TLS ULP allows us to set/get TLS socket options. Currently diff --git a/Documentation/process/maintainer-netdev.rst b/Documentation/process/maintainer-netdev.rst index 1ac62dc3a66f..e1755610b4bc 100644 --- a/Documentation/process/maintainer-netdev.rst +++ b/Documentation/process/maintainer-netdev.rst @@ -312,7 +312,7 @@ Posting as one thread is discouraged because it confuses patchwork (as of patchwork 2.2.2). Co-posting selftests --------------------- +~~~~~~~~~~~~~~~~~~~~ Selftests should be part of the same series as the code changes. Specifically for fixes both code change and related test should go into diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 9abf93ee5f65..43ed57e048a8 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -7196,6 +7196,10 @@ The valid value for 'flags' is: u64 leaf; u64 r11, r12, r13, r14; } get_tdvmcall_info; + struct { + u64 ret; + u64 vector; + } setup_event_notify; }; } tdx; @@ -7210,21 +7214,24 @@ number from register R11. The remaining field of the union provide the inputs and outputs of the TDVMCALL. Currently the following values of ``nr`` are defined: -* ``TDVMCALL_GET_QUOTE``: the guest has requested to generate a TD-Quote -signed by a service hosting TD-Quoting Enclave operating on the host. -Parameters and return value are in the ``get_quote`` field of the union. -The ``gpa`` field and ``size`` specify the guest physical address -(without the shared bit set) and the size of a shared-memory buffer, in -which the TDX guest passes a TD Report. The ``ret`` field represents -the return value of the GetQuote request. When the request has been -queued successfully, the TDX guest can poll the status field in the -shared-memory area to check whether the Quote generation is completed or -not. When completed, the generated Quote is returned via the same buffer. - -* ``TDVMCALL_GET_TD_VM_CALL_INFO``: the guest has requested the support -status of TDVMCALLs. The output values for the given leaf should be -placed in fields from ``r11`` to ``r14`` of the ``get_tdvmcall_info`` -field of the union. + * ``TDVMCALL_GET_QUOTE``: the guest has requested to generate a TD-Quote + signed by a service hosting TD-Quoting Enclave operating on the host. + Parameters and return value are in the ``get_quote`` field of the union. + The ``gpa`` field and ``size`` specify the guest physical address + (without the shared bit set) and the size of a shared-memory buffer, in + which the TDX guest passes a TD Report. The ``ret`` field represents + the return value of the GetQuote request. When the request has been + queued successfully, the TDX guest can poll the status field in the + shared-memory area to check whether the Quote generation is completed or + not. When completed, the generated Quote is returned via the same buffer. + + * ``TDVMCALL_GET_TD_VM_CALL_INFO``: the guest has requested the support + status of TDVMCALLs. The output values for the given leaf should be + placed in fields from ``r11`` to ``r14`` of the ``get_tdvmcall_info`` + field of the union. + +* ``TDVMCALL_SETUP_EVENT_NOTIFY_INTERRUPT``: the guest has requested to +set up a notification interrupt for vector ``vector``. KVM may add support for more values in the future that may cause a userspace exit, even without calls to ``KVM_ENABLE_CAP`` or similar. In this case, diff --git a/Documentation/virt/kvm/x86/intel-tdx.rst b/Documentation/virt/kvm/x86/intel-tdx.rst index 76bdd95334d6..5efac62c92c7 100644 --- a/Documentation/virt/kvm/x86/intel-tdx.rst +++ b/Documentation/virt/kvm/x86/intel-tdx.rst @@ -79,7 +79,20 @@ to be configured to the TDX guest. struct kvm_tdx_capabilities { __u64 supported_attrs; __u64 supported_xfam; - __u64 reserved[254]; + + /* TDG.VP.VMCALL hypercalls executed in kernel and forwarded to + * userspace, respectively + */ + __u64 kernel_tdvmcallinfo_1_r11; + __u64 user_tdvmcallinfo_1_r11; + + /* TDG.VP.VMCALL instruction executions subfunctions executed in kernel + * and forwarded to userspace, respectively + */ + __u64 kernel_tdvmcallinfo_1_r12; + __u64 user_tdvmcallinfo_1_r12; + + __u64 reserved[250]; /* Configurable CPUID bits for userspace */ struct kvm_cpuid2 cpuid; diff --git a/Documentation/wmi/acpi-interface.rst b/Documentation/wmi/acpi-interface.rst index f1b28835d23c..1ef003b033bf 100644 --- a/Documentation/wmi/acpi-interface.rst +++ b/Documentation/wmi/acpi-interface.rst @@ -36,7 +36,7 @@ Offset Size (in bytes) Content The WMI object flags control whether the method or notification ID is used: -- 0x1: Data block usage is expensive and must be explicitly enabled/disabled. +- 0x1: Data block is expensive to collect. - 0x2: Data block contains WMI methods. - 0x4: Data block contains ASCIZ string. - 0x8: Data block describes a WMI event, use notification ID instead @@ -83,14 +83,18 @@ event as hexadecimal value. Their first parameter is an integer with a value of 0 if the WMI event should be disabled, other values will enable the WMI event. +Those ACPI methods are always called even for WMI events not registered as +being expensive to collect to match the behavior of the Windows driver. + WCxx ACPI methods ----------------- -Similar to the ``WExx`` ACPI methods, except that it controls data collection -instead of events and thus the last two characters of the ACPI method name are -the method ID of the data block to enable/disable. +Similar to the ``WExx`` ACPI methods, except that instead of WMI events it controls +data collection of data blocks registered as being expensive to collect. Thus the +last two characters of the ACPI method name are the method ID of the data block +to enable/disable. Those ACPI methods are also called before setting data blocks to match the -behaviour of the Windows driver. +behavior of the Windows driver. _WED ACPI method ---------------- diff --git a/MAINTAINERS b/MAINTAINERS index c3f7fbd0d67a..60bba48f5479 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4181,6 +4181,7 @@ F: include/linux/cpumask_types.h F: include/linux/find.h F: include/linux/nodemask.h F: include/linux/nodemask_types.h +F: include/uapi/linux/bits.h F: include/vdso/bits.h F: lib/bitmap-str.c F: lib/bitmap.c @@ -4193,6 +4194,7 @@ F: tools/include/linux/bitfield.h F: tools/include/linux/bitmap.h F: tools/include/linux/bits.h F: tools/include/linux/find.h +F: tools/include/uapi/linux/bits.h F: tools/include/vdso/bits.h F: tools/lib/bitmap.c F: tools/lib/find_bit.c @@ -10504,7 +10506,7 @@ S: Maintained F: block/partitions/efi.* HABANALABS PCI DRIVER -M: Ofir Bitton <obitton@habana.ai> +M: Yaron Avizrat <yaron.avizrat@intel.com> L: dri-devel@lists.freedesktop.org S: Supported C: irc://irc.oftc.net/dri-devel @@ -11155,7 +11157,8 @@ F: include/linux/platform_data/huawei-gaokun-ec.h HUGETLB SUBSYSTEM M: Muchun Song <muchun.song@linux.dev> -R: Oscar Salvador <osalvador@suse.de> +M: Oscar Salvador <osalvador@suse.de> +R: David Hildenbrand <david@redhat.com> L: linux-mm@kvack.org S: Maintained F: Documentation/ABI/testing/sysfs-kernel-mm-hugepages @@ -11166,6 +11169,7 @@ F: fs/hugetlbfs/ F: include/linux/hugetlb.h F: include/trace/events/hugetlbfs.h F: mm/hugetlb.c +F: mm/hugetlb_cgroup.c F: mm/hugetlb_cma.c F: mm/hugetlb_cma.h F: mm/hugetlb_vmemmap.c @@ -13345,6 +13349,7 @@ M: Alexander Graf <graf@amazon.com> M: Mike Rapoport <rppt@kernel.org> M: Changyuan Lyu <changyuanl@google.com> L: kexec@lists.infradead.org +L: linux-mm@kvack.org S: Maintained F: Documentation/admin-guide/mm/kho.rst F: Documentation/core-api/kho/* @@ -15547,6 +15552,7 @@ F: drivers/net/ethernet/mellanox/mlx4/en_* MELLANOX ETHERNET DRIVER (mlx5e) M: Saeed Mahameed <saeedm@nvidia.com> M: Tariq Toukan <tariqt@nvidia.com> +M: Mark Bloch <mbloch@nvidia.com> L: netdev@vger.kernel.org S: Maintained W: https://www.nvidia.com/networking/ @@ -15616,6 +15622,7 @@ MELLANOX MLX5 core VPI driver M: Saeed Mahameed <saeedm@nvidia.com> M: Leon Romanovsky <leonro@nvidia.com> M: Tariq Toukan <tariqt@nvidia.com> +M: Mark Bloch <mbloch@nvidia.com> L: netdev@vger.kernel.org L: linux-rdma@vger.kernel.org S: Maintained @@ -15673,11 +15680,16 @@ MEMBLOCK AND MEMORY MANAGEMENT INITIALIZATION M: Mike Rapoport <rppt@kernel.org> L: linux-mm@kvack.org S: Maintained +T: git git://git.kernel.org/pub/scm/linux/kernel/git/rppt/memblock.git for-next +T: git git://git.kernel.org/pub/scm/linux/kernel/git/rppt/memblock.git fixes F: Documentation/core-api/boot-time-mm.rst F: Documentation/core-api/kho/bindings/memblock/* F: include/linux/memblock.h +F: mm/bootmem_info.c F: mm/memblock.c +F: mm/memtest.c F: mm/mm_init.c +F: mm/rodata_test.c F: tools/testing/memblock/ MEMORY ALLOCATION PROFILING @@ -15732,7 +15744,6 @@ F: Documentation/admin-guide/mm/ F: Documentation/mm/ F: include/linux/gfp.h F: include/linux/gfp_types.h -F: include/linux/memfd.h F: include/linux/memory_hotplug.h F: include/linux/memory-tiers.h F: include/linux/mempolicy.h @@ -15792,6 +15803,10 @@ S: Maintained W: http://www.linux-mm.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm F: mm/gup.c +F: mm/gup_test.c +F: mm/gup_test.h +F: tools/testing/selftests/mm/gup_longterm.c +F: tools/testing/selftests/mm/gup_test.c MEMORY MANAGEMENT - KSM (Kernel Samepage Merging) M: Andrew Morton <akpm@linux-foundation.org> @@ -15839,6 +15854,17 @@ F: mm/numa.c F: mm/numa_emulation.c F: mm/numa_memblks.c +MEMORY MANAGEMENT - OOM KILLER +M: Michal Hocko <mhocko@suse.com> +R: David Rientjes <rientjes@google.com> +R: Shakeel Butt <shakeel.butt@linux.dev> +L: linux-mm@kvack.org +S: Maintained +F: include/linux/oom.h +F: include/trace/events/oom.h +F: include/uapi/linux/oom.h +F: mm/oom_kill.c + MEMORY MANAGEMENT - PAGE ALLOCATOR M: Andrew Morton <akpm@linux-foundation.org> M: Vlastimil Babka <vbabka@suse.cz> @@ -15853,8 +15879,17 @@ F: include/linux/compaction.h F: include/linux/gfp.h F: include/linux/page-isolation.h F: mm/compaction.c +F: mm/debug_page_alloc.c +F: mm/fail_page_alloc.c F: mm/page_alloc.c +F: mm/page_ext.c +F: mm/page_frag_cache.c F: mm/page_isolation.c +F: mm/page_owner.c +F: mm/page_poison.c +F: mm/page_reporting.c +F: mm/show_mem.c +F: mm/shuffle.c MEMORY MANAGEMENT - RECLAIM M: Andrew Morton <akpm@linux-foundation.org> @@ -15868,6 +15903,7 @@ L: linux-mm@kvack.org S: Maintained F: mm/pt_reclaim.c F: mm/vmscan.c +F: mm/workingset.c MEMORY MANAGEMENT - RMAP (REVERSE MAPPING) M: Andrew Morton <akpm@linux-foundation.org> @@ -15880,6 +15916,7 @@ R: Harry Yoo <harry.yoo@oracle.com> L: linux-mm@kvack.org S: Maintained F: include/linux/rmap.h +F: mm/page_vma_mapped.c F: mm/rmap.c MEMORY MANAGEMENT - SECRETMEM @@ -15912,9 +15949,9 @@ F: mm/swapfile.c MEMORY MANAGEMENT - THP (TRANSPARENT HUGE PAGE) M: Andrew Morton <akpm@linux-foundation.org> M: David Hildenbrand <david@redhat.com> +M: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> R: Zi Yan <ziy@nvidia.com> R: Baolin Wang <baolin.wang@linux.alibaba.com> -R: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> R: Liam R. Howlett <Liam.Howlett@oracle.com> R: Nico Pache <npache@redhat.com> R: Ryan Roberts <ryan.roberts@arm.com> @@ -15972,11 +16009,14 @@ S: Maintained W: http://www.linux-mm.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm F: include/trace/events/mmap.h +F: mm/mincore.c F: mm/mlock.c F: mm/mmap.c F: mm/mprotect.c F: mm/mremap.c F: mm/mseal.c +F: mm/msync.c +F: mm/nommu.c F: mm/vma.c F: mm/vma.h F: mm/vma_exec.c @@ -16784,8 +16824,8 @@ F: include/dt-bindings/clock/mobileye,eyeq5-clk.h MODULE SUPPORT M: Luis Chamberlain <mcgrof@kernel.org> M: Petr Pavlu <petr.pavlu@suse.com> +M: Daniel Gomez <da.gomez@kernel.org> R: Sami Tolvanen <samitolvanen@google.com> -R: Daniel Gomez <da.gomez@samsung.com> L: linux-modules@vger.kernel.org L: linux-kernel@vger.kernel.org S: Maintained @@ -17184,10 +17224,10 @@ F: drivers/rtc/rtc-ntxec.c F: include/linux/mfd/ntxec.h NETRONOME ETHERNET DRIVERS -M: Louis Peens <louis.peens@corigine.com> R: Jakub Kicinski <kuba@kernel.org> +R: Simon Horman <horms@kernel.org> L: oss-drivers@corigine.com -S: Maintained +S: Odd Fixes F: drivers/net/ethernet/netronome/ NETWORK BLOCK DEVICE (NBD) @@ -19563,8 +19603,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/pinctrl/intel.git F: drivers/pinctrl/intel/ PIN CONTROLLER - KEEMBAY -M: Lakshmi Sowjanya D <lakshmi.sowjanya.d@intel.com> -S: Supported +S: Orphan F: drivers/pinctrl/pinctrl-keembay* PIN CONTROLLER - MEDIATEK @@ -20117,21 +20156,15 @@ S: Supported F: Documentation/devicetree/bindings/soc/qcom/qcom,apr* F: Documentation/devicetree/bindings/sound/qcom,* F: drivers/soc/qcom/apr.c -F: include/dt-bindings/sound/qcom,wcd9335.h -F: include/dt-bindings/sound/qcom,wcd934x.h -F: sound/soc/codecs/lpass-rx-macro.* -F: sound/soc/codecs/lpass-tx-macro.* -F: sound/soc/codecs/lpass-va-macro.c -F: sound/soc/codecs/lpass-wsa-macro.* +F: drivers/soundwire/qcom.c +F: include/dt-bindings/sound/qcom,wcd93* +F: sound/soc/codecs/lpass-*.* F: sound/soc/codecs/msm8916-wcd-analog.c F: sound/soc/codecs/msm8916-wcd-digital.c F: sound/soc/codecs/wcd-clsh-v2.* F: sound/soc/codecs/wcd-mbhc-v2.* -F: sound/soc/codecs/wcd9335.* -F: sound/soc/codecs/wcd934x.c -F: sound/soc/codecs/wsa881x.c -F: sound/soc/codecs/wsa883x.c -F: sound/soc/codecs/wsa884x.c +F: sound/soc/codecs/wcd93*.* +F: sound/soc/codecs/wsa88*.* F: sound/soc/qcom/ QCOM EMBEDDED USB DEBUGGER (EUD) @@ -21162,7 +21195,7 @@ M: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com> L: netdev@vger.kernel.org L: linux-renesas-soc@vger.kernel.org S: Maintained -F: Documentation/devicetree/bindings/net/renesas,r9a09g057-gbeth.yaml +F: Documentation/devicetree/bindings/net/renesas,rzv2h-gbeth.yaml F: drivers/net/ethernet/stmicro/stmmac/dwmac-renesas-gbeth.c RENESAS RZ/V2H(P) USB2PHY PORT RESET DRIVER @@ -21374,7 +21407,7 @@ N: spacemit K: spacemit RISC-V THEAD SoC SUPPORT -M: Drew Fustini <drew@pdp7.com> +M: Drew Fustini <fustini@kernel.org> M: Guo Ren <guoren@kernel.org> M: Fu Wei <wefu@redhat.com> L: linux-riscv@lists.infradead.org @@ -22550,9 +22583,11 @@ S: Maintained F: drivers/misc/sgi-xp/ SHARED MEMORY COMMUNICATIONS (SMC) SOCKETS +M: D. Wythe <alibuda@linux.alibaba.com> +M: Dust Li <dust.li@linux.alibaba.com> +M: Sidraya Jayagond <sidraya@linux.ibm.com> M: Wenjia Zhang <wenjia@linux.ibm.com> -M: Jan Karcher <jaka@linux.ibm.com> -R: D. Wythe <alibuda@linux.alibaba.com> +R: Mahanta Jambigi <mjambigi@linux.ibm.com> R: Tony Lu <tonylu@linux.alibaba.com> R: Wen Gu <guwen@linux.alibaba.com> L: linux-rdma@vger.kernel.org @@ -24063,6 +24098,7 @@ M: Bin Du <bin.du@amd.com> L: linux-i2c@vger.kernel.org S: Maintained F: drivers/i2c/busses/i2c-designware-amdisp.c +F: include/linux/soc/amd/isp4_misc.h SYNOPSYS DESIGNWARE MMC/SD/SDIO DRIVER M: Jaehoon Chung <jh80.chung@samsung.com> @@ -25027,8 +25063,11 @@ M: Hugh Dickins <hughd@google.com> R: Baolin Wang <baolin.wang@linux.alibaba.com> L: linux-mm@kvack.org S: Maintained +F: include/linux/memfd.h F: include/linux/shmem_fs.h +F: mm/memfd.c F: mm/shmem.c +F: mm/shmem_quota.c TOMOYO SECURITY MODULE M: Kentaro Takeda <takedakn@nttdata.co.jp> @@ -26900,7 +26939,7 @@ F: arch/x86/kernel/stacktrace.c F: arch/x86/kernel/unwind_*.c X86 TRUST DOMAIN EXTENSIONS (TDX) -M: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> +M: Kirill A. Shutemov <kas@kernel.org> R: Dave Hansen <dave.hansen@linux.intel.com> L: x86@kernel.org L: linux-coco@lists.linux.dev @@ -27269,13 +27308,6 @@ S: Supported W: http://www.marvell.com F: drivers/i2c/busses/i2c-xlp9xx.c -XRA1403 GPIO EXPANDER -M: Nandor Han <nandor.han@ge.com> -L: linux-gpio@vger.kernel.org -S: Maintained -F: Documentation/devicetree/bindings/gpio/gpio-xra1403.txt -F: drivers/gpio/gpio-xra1403.c - XTENSA XTFPGA PLATFORM SUPPORT M: Max Filippov <jcmvbkbc@gmail.com> S: Maintained @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 16 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc6 NAME = Baby Opossum Posse # *DOCUMENTATION* diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 55fc331af337..393d71124f5d 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -256,6 +256,7 @@ config ARM64 select HOTPLUG_SMT if HOTPLUG_CPU select IRQ_DOMAIN select IRQ_FORCED_THREADING + select JUMP_LABEL select KASAN_VMALLOC if KASAN select LOCK_MM_AND_FIND_VMA select MODULES_USE_ELF_RELA diff --git a/arch/arm64/boot/dts/apple/spi1-nvram.dtsi b/arch/arm64/boot/dts/apple/spi1-nvram.dtsi index 3df2fd3993b5..9740fbf200f0 100644 --- a/arch/arm64/boot/dts/apple/spi1-nvram.dtsi +++ b/arch/arm64/boot/dts/apple/spi1-nvram.dtsi @@ -20,8 +20,6 @@ compatible = "jedec,spi-nor"; reg = <0x0>; spi-max-frequency = <25000000>; - #address-cells = <1>; - #size-cells = <1>; partitions { compatible = "fixed-partitions"; diff --git a/arch/arm64/boot/dts/apple/t8103-j293.dts b/arch/arm64/boot/dts/apple/t8103-j293.dts index e2d9439397f7..5b3c42e9f0e6 100644 --- a/arch/arm64/boot/dts/apple/t8103-j293.dts +++ b/arch/arm64/boot/dts/apple/t8103-j293.dts @@ -100,6 +100,8 @@ &displaydfr_mipi { status = "okay"; + #address-cells = <1>; + #size-cells = <0>; dfr_panel: panel@0 { compatible = "apple,j293-summit", "apple,summit"; diff --git a/arch/arm64/boot/dts/apple/t8103-jxxx.dtsi b/arch/arm64/boot/dts/apple/t8103-jxxx.dtsi index 8e82231acab5..0c8206156bfe 100644 --- a/arch/arm64/boot/dts/apple/t8103-jxxx.dtsi +++ b/arch/arm64/boot/dts/apple/t8103-jxxx.dtsi @@ -71,7 +71,7 @@ */ &port00 { bus-range = <1 1>; - wifi0: network@0,0 { + wifi0: wifi@0,0 { compatible = "pci14e4,4425"; reg = <0x10000 0x0 0x0 0x0 0x0>; /* To be filled by the loader */ diff --git a/arch/arm64/boot/dts/apple/t8103.dtsi b/arch/arm64/boot/dts/apple/t8103.dtsi index 20faf0c0d809..3a204845b85b 100644 --- a/arch/arm64/boot/dts/apple/t8103.dtsi +++ b/arch/arm64/boot/dts/apple/t8103.dtsi @@ -405,8 +405,6 @@ compatible = "apple,t8103-display-pipe-mipi", "apple,h7-display-pipe-mipi"; reg = <0x2 0x28600000 0x0 0x100000>; power-domains = <&ps_mipi_dsi>; - #address-cells = <1>; - #size-cells = <0>; status = "disabled"; ports { diff --git a/arch/arm64/boot/dts/apple/t8112-j493.dts b/arch/arm64/boot/dts/apple/t8112-j493.dts index be86d34c6696..fb8ad7d4c65a 100644 --- a/arch/arm64/boot/dts/apple/t8112-j493.dts +++ b/arch/arm64/boot/dts/apple/t8112-j493.dts @@ -63,6 +63,8 @@ &displaydfr_mipi { status = "okay"; + #address-cells = <1>; + #size-cells = <0>; dfr_panel: panel@0 { compatible = "apple,j493-summit", "apple,summit"; diff --git a/arch/arm64/boot/dts/apple/t8112.dtsi b/arch/arm64/boot/dts/apple/t8112.dtsi index e95711d8337f..f68354194355 100644 --- a/arch/arm64/boot/dts/apple/t8112.dtsi +++ b/arch/arm64/boot/dts/apple/t8112.dtsi @@ -420,8 +420,6 @@ compatible = "apple,t8112-display-pipe-mipi", "apple,h7-display-pipe-mipi"; reg = <0x2 0x28600000 0x0 0x100000>; power-domains = <&ps_mipi_dsi>; - #address-cells = <1>; - #size-cells = <0>; status = "disabled"; ports { diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 897fc686e6a9..7e04a2905ce4 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -1573,6 +1573,7 @@ CONFIG_RESET_QCOM_AOSS=y CONFIG_RESET_QCOM_PDC=m CONFIG_RESET_RZG2L_USBPHY_CTRL=y CONFIG_RESET_TI_SCI=y +CONFIG_PHY_SNPS_EUSB2=m CONFIG_PHY_XGENE=y CONFIG_PHY_CAN_TRANSCEIVER=m CONFIG_PHY_NXP_PTN3222=m @@ -1597,7 +1598,6 @@ CONFIG_PHY_QCOM_EDP=m CONFIG_PHY_QCOM_PCIE2=m CONFIG_PHY_QCOM_QMP=m CONFIG_PHY_QCOM_QUSB2=m -CONFIG_PHY_QCOM_SNPS_EUSB2=m CONFIG_PHY_QCOM_EUSB2_REPEATER=m CONFIG_PHY_QCOM_M31_USB=m CONFIG_PHY_QCOM_USB_HS=m diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index ba5df0df02a4..9f38340d24c2 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -287,17 +287,6 @@ .Lskip_fgt2_\@: .endm -.macro __init_el2_gcs - mrs_s x1, SYS_ID_AA64PFR1_EL1 - ubfx x1, x1, #ID_AA64PFR1_EL1_GCS_SHIFT, #4 - cbz x1, .Lskip_gcs_\@ - - /* Ensure GCS is not enabled when we start trying to do BLs */ - msr_s SYS_GCSCR_EL1, xzr - msr_s SYS_GCSCRE0_EL1, xzr -.Lskip_gcs_\@: -.endm - /** * Initialize EL2 registers to sane values. This should be called early on all * cores that were booted in EL2. Note that everything gets initialised as @@ -319,7 +308,6 @@ __init_el2_cptr __init_el2_fgt __init_el2_fgt2 - __init_el2_gcs .endm #ifndef __KVM_NVHE_HYPERVISOR__ @@ -371,6 +359,13 @@ msr_s SYS_MPAMHCR_EL2, xzr // clear TRAP_MPAMIDR_EL1 -> EL2 .Lskip_mpam_\@: + check_override id_aa64pfr1, ID_AA64PFR1_EL1_GCS_SHIFT, .Linit_gcs_\@, .Lskip_gcs_\@, x1, x2 + +.Linit_gcs_\@: + msr_s SYS_GCSCR_EL1, xzr + msr_s SYS_GCSCRE0_EL1, xzr + +.Lskip_gcs_\@: check_override id_aa64pfr0, ID_AA64PFR0_EL1_SVE_SHIFT, .Linit_sve_\@, .Lskip_sve_\@, x1, x2 .Linit_sve_\@: /* SVE register access */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index d27079968341..3e41a880b062 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -1480,7 +1480,6 @@ int kvm_vm_ioctl_get_reg_writable_masks(struct kvm *kvm, struct reg_mask_range *range); /* Guest/host FPSIMD coordination helpers */ -int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_ctxflush_fp(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 2920b0a51403..a2faf0049dab 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -34,7 +34,7 @@ obj-y := debug-monitors.o entry.o irq.o fpsimd.o \ cpufeature.o alternative.o cacheinfo.o \ smp.o smp_spin_table.o topology.o smccc-call.o \ syscall.o proton-pack.o idle.o patching.o pi/ \ - rsi.o + rsi.o jump_label.o obj-$(CONFIG_COMPAT) += sys32.o signal32.o \ sys_compat.o @@ -47,7 +47,6 @@ obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o obj-$(CONFIG_HARDLOCKUP_DETECTOR_PERF) += watchdog_hld.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_CPU_PM) += sleep.o suspend.o -obj-$(CONFIG_JUMP_LABEL) += jump_label.o obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_EFI) += efi.o efi-rt-wrapper.o obj-$(CONFIG_PCI) += pci.o diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index b34044e20128..e151585c6cca 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -3135,6 +3135,13 @@ static bool has_sve_feature(const struct arm64_cpu_capabilities *cap, int scope) } #endif +#ifdef CONFIG_ARM64_SME +static bool has_sme_feature(const struct arm64_cpu_capabilities *cap, int scope) +{ + return system_supports_sme() && has_user_cpuid_feature(cap, scope); +} +#endif + static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(ID_AA64ISAR0_EL1, AES, PMULL, CAP_HWCAP, KERNEL_HWCAP_PMULL), HWCAP_CAP(ID_AA64ISAR0_EL1, AES, AES, CAP_HWCAP, KERNEL_HWCAP_AES), @@ -3223,31 +3230,31 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(ID_AA64ISAR2_EL1, BC, IMP, CAP_HWCAP, KERNEL_HWCAP_HBC), #ifdef CONFIG_ARM64_SME HWCAP_CAP(ID_AA64PFR1_EL1, SME, IMP, CAP_HWCAP, KERNEL_HWCAP_SME), - HWCAP_CAP(ID_AA64SMFR0_EL1, FA64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_FA64), - HWCAP_CAP(ID_AA64SMFR0_EL1, LUTv2, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_LUTV2), - HWCAP_CAP(ID_AA64SMFR0_EL1, SMEver, SME2p2, CAP_HWCAP, KERNEL_HWCAP_SME2P2), - HWCAP_CAP(ID_AA64SMFR0_EL1, SMEver, SME2p1, CAP_HWCAP, KERNEL_HWCAP_SME2P1), - HWCAP_CAP(ID_AA64SMFR0_EL1, SMEver, SME2, CAP_HWCAP, KERNEL_HWCAP_SME2), - HWCAP_CAP(ID_AA64SMFR0_EL1, I16I64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I64), - HWCAP_CAP(ID_AA64SMFR0_EL1, F64F64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F64F64), - HWCAP_CAP(ID_AA64SMFR0_EL1, I16I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I32), - HWCAP_CAP(ID_AA64SMFR0_EL1, B16B16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16B16), - HWCAP_CAP(ID_AA64SMFR0_EL1, F16F16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F16), - HWCAP_CAP(ID_AA64SMFR0_EL1, F8F16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F8F16), - HWCAP_CAP(ID_AA64SMFR0_EL1, F8F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F8F32), - HWCAP_CAP(ID_AA64SMFR0_EL1, I8I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I8I32), - HWCAP_CAP(ID_AA64SMFR0_EL1, F16F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F32), - HWCAP_CAP(ID_AA64SMFR0_EL1, B16F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16F32), - HWCAP_CAP(ID_AA64SMFR0_EL1, BI32I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_BI32I32), - HWCAP_CAP(ID_AA64SMFR0_EL1, F32F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F32F32), - HWCAP_CAP(ID_AA64SMFR0_EL1, SF8FMA, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8FMA), - HWCAP_CAP(ID_AA64SMFR0_EL1, SF8DP4, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8DP4), - HWCAP_CAP(ID_AA64SMFR0_EL1, SF8DP2, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8DP2), - HWCAP_CAP(ID_AA64SMFR0_EL1, SBitPerm, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SBITPERM), - HWCAP_CAP(ID_AA64SMFR0_EL1, AES, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_AES), - HWCAP_CAP(ID_AA64SMFR0_EL1, SFEXPA, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SFEXPA), - HWCAP_CAP(ID_AA64SMFR0_EL1, STMOP, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_STMOP), - HWCAP_CAP(ID_AA64SMFR0_EL1, SMOP4, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SMOP4), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, FA64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_FA64), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, LUTv2, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_LUTV2), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SMEver, SME2p2, CAP_HWCAP, KERNEL_HWCAP_SME2P2), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SMEver, SME2p1, CAP_HWCAP, KERNEL_HWCAP_SME2P1), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SMEver, SME2, CAP_HWCAP, KERNEL_HWCAP_SME2), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, I16I64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I64), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F64F64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F64F64), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, I16I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I32), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, B16B16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16B16), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F16F16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F16), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F8F16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F8F16), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F8F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F8F32), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, I8I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I8I32), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F16F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F32), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, B16F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16F32), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, BI32I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_BI32I32), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F32F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F32F32), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SF8FMA, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8FMA), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SF8DP4, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8DP4), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SF8DP2, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8DP2), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SBitPerm, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SBITPERM), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, AES, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_AES), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SFEXPA, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SFEXPA), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, STMOP, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_STMOP), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SMOP4, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SMOP4), #endif /* CONFIG_ARM64_SME */ HWCAP_CAP(ID_AA64FPFR0_EL1, F8CVT, IMP, CAP_HWCAP, KERNEL_HWCAP_F8CVT), HWCAP_CAP(ID_AA64FPFR0_EL1, F8FMA, IMP, CAP_HWCAP, KERNEL_HWCAP_F8FMA), diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 3857fd7ee8d4..62230d6dd919 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -15,6 +15,7 @@ #include <asm/efi.h> #include <asm/stacktrace.h> +#include <asm/vmap_stack.h> static bool region_is_misaligned(const efi_memory_desc_t *md) { @@ -214,9 +215,13 @@ static int __init arm64_efi_rt_init(void) if (!efi_enabled(EFI_RUNTIME_SERVICES)) return 0; - p = __vmalloc_node(THREAD_SIZE, THREAD_ALIGN, GFP_KERNEL, - NUMA_NO_NODE, &&l); -l: if (!p) { + if (!IS_ENABLED(CONFIG_VMAP_STACK)) { + clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); + return -ENOMEM; + } + + p = arch_alloc_vmap_stack(THREAD_SIZE, NUMA_NO_NODE); + if (!p) { pr_warn("Failed to allocate EFI runtime stack\n"); clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); return -ENOMEM; diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 5954cec19660..08b7042a2e2d 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -673,6 +673,11 @@ static void permission_overlay_switch(struct task_struct *next) current->thread.por_el0 = read_sysreg_s(SYS_POR_EL0); if (current->thread.por_el0 != next->thread.por_el0) { write_sysreg_s(next->thread.por_el0, SYS_POR_EL0); + /* + * No ISB required as we can tolerate spurious Overlay faults - + * the fault handler will check again based on the new value + * of POR_EL0. + */ } } diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 3b3f6b56e733..21a795303568 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -1143,7 +1143,7 @@ static inline unsigned int num_other_online_cpus(void) void smp_send_stop(void) { static unsigned long stop_in_progress; - cpumask_t mask; + static cpumask_t mask; unsigned long timeout; /* diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 38a91bb5d4c7..23dd3f3fc3eb 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -825,10 +825,6 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) if (!kvm_arm_vcpu_is_finalized(vcpu)) return -EPERM; - ret = kvm_arch_vcpu_run_map_fp(vcpu); - if (ret) - return ret; - if (likely(vcpu_has_run_once(vcpu))) return 0; @@ -2129,7 +2125,7 @@ static void cpu_hyp_init(void *discard) static void cpu_hyp_uninit(void *discard) { - if (__this_cpu_read(kvm_hyp_initialized)) { + if (!is_protected_kvm_enabled() && __this_cpu_read(kvm_hyp_initialized)) { cpu_hyp_reset(); __this_cpu_write(kvm_hyp_initialized, 0); } @@ -2345,8 +2341,13 @@ static void __init teardown_hyp_mode(void) free_hyp_pgds(); for_each_possible_cpu(cpu) { + if (per_cpu(kvm_hyp_initialized, cpu)) + continue; + free_pages(per_cpu(kvm_arm_hyp_stack_base, cpu), NVHE_STACK_SHIFT - PAGE_SHIFT); - free_pages(kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu], nvhe_percpu_order()); + + if (!kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu]) + continue; if (free_sve) { struct cpu_sve_state *sve_state; @@ -2354,6 +2355,9 @@ static void __init teardown_hyp_mode(void) sve_state = per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state; free_pages((unsigned long) sve_state, pkvm_host_sve_state_order()); } + + free_pages(kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu], nvhe_percpu_order()); + } } diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 8f6c8f57c6b9..15e17aca1dec 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -15,32 +15,6 @@ #include <asm/sysreg.h> /* - * Called on entry to KVM_RUN unless this vcpu previously ran at least - * once and the most recent prior KVM_RUN for this vcpu was called from - * the same task as current (highly likely). - * - * This is guaranteed to execute before kvm_arch_vcpu_load_fp(vcpu), - * such that on entering hyp the relevant parts of current are already - * mapped. - */ -int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu) -{ - struct user_fpsimd_state *fpsimd = ¤t->thread.uw.fpsimd_state; - int ret; - - /* pKVM has its own tracking of the host fpsimd state. */ - if (is_protected_kvm_enabled()) - return 0; - - /* Make sure the host task fpsimd state is visible to hyp: */ - ret = kvm_share_hyp(fpsimd, fpsimd + 1); - if (ret) - return ret; - - return 0; -} - -/* * Prepare vcpu for saving the host's FPSIMD state and loading the guest's. * The actual loading is done by the FPSIMD access trap taken to hyp. * diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 95d7534c9679..8957734d6183 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -479,6 +479,7 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range) { struct kvm_mem_range cur; kvm_pte_t pte; + u64 granule; s8 level; int ret; @@ -496,18 +497,21 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range) return -EPERM; } - do { - u64 granule = kvm_granule_size(level); + for (; level <= KVM_PGTABLE_LAST_LEVEL; level++) { + if (!kvm_level_supports_block_mapping(level)) + continue; + granule = kvm_granule_size(level); cur.start = ALIGN_DOWN(addr, granule); cur.end = cur.start + granule; - level++; - } while ((level <= KVM_PGTABLE_LAST_LEVEL) && - !(kvm_level_supports_block_mapping(level) && - range_included(&cur, range))); + if (!range_included(&cur, range)) + continue; + *range = cur; + return 0; + } - *range = cur; + WARN_ON(1); - return 0; + return -EINVAL; } int host_stage2_idmap_locked(phys_addr_t addr, u64 size, diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 5b191f4dc566..dc1d26559bfa 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -1402,6 +1402,21 @@ static void kvm_map_l1_vncr(struct kvm_vcpu *vcpu) } } +#define has_tgran_2(__r, __sz) \ + ({ \ + u64 _s1, _s2, _mmfr0 = __r; \ + \ + _s2 = SYS_FIELD_GET(ID_AA64MMFR0_EL1, \ + TGRAN##__sz##_2, _mmfr0); \ + \ + _s1 = SYS_FIELD_GET(ID_AA64MMFR0_EL1, \ + TGRAN##__sz, _mmfr0); \ + \ + ((_s2 != ID_AA64MMFR0_EL1_TGRAN##__sz##_2_NI && \ + _s2 != ID_AA64MMFR0_EL1_TGRAN##__sz##_2_TGRAN##__sz) || \ + (_s2 == ID_AA64MMFR0_EL1_TGRAN##__sz##_2_TGRAN##__sz && \ + _s1 != ID_AA64MMFR0_EL1_TGRAN##__sz##_NI)); \ + }) /* * Our emulated CPU doesn't support all the possible features. For the * sake of simplicity (and probably mental sanity), wipe out a number @@ -1411,6 +1426,8 @@ static void kvm_map_l1_vncr(struct kvm_vcpu *vcpu) */ u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val) { + u64 orig_val = val; + switch (reg) { case SYS_ID_AA64ISAR0_EL1: /* Support everything but TME */ @@ -1480,13 +1497,16 @@ u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val) */ switch (PAGE_SIZE) { case SZ_4K: - val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN4_2, IMP); + if (has_tgran_2(orig_val, 4)) + val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN4_2, IMP); fallthrough; case SZ_16K: - val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN16_2, IMP); + if (has_tgran_2(orig_val, 16)) + val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN16_2, IMP); fallthrough; case SZ_64K: - val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN64_2, IMP); + if (has_tgran_2(orig_val, 64)) + val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN64_2, IMP); break; } diff --git a/arch/arm64/kvm/vgic/vgic-v3-nested.c b/arch/arm64/kvm/vgic/vgic-v3-nested.c index a50fb7e6841f..679aafe77de2 100644 --- a/arch/arm64/kvm/vgic/vgic-v3-nested.c +++ b/arch/arm64/kvm/vgic/vgic-v3-nested.c @@ -401,9 +401,7 @@ void vgic_v3_nested_update_mi(struct kvm_vcpu *vcpu) { bool level; - level = __vcpu_sys_reg(vcpu, ICH_HCR_EL2) & ICH_HCR_EL2_En; - if (level) - level &= vgic_v3_get_misr(vcpu); + level = (__vcpu_sys_reg(vcpu, ICH_HCR_EL2) & ICH_HCR_EL2_En) && vgic_v3_get_misr(vcpu); kvm_vgic_inject_irq(vcpu->kvm, vcpu, vcpu->kvm->arch.vgic.mi_intid, level, vcpu); } diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index ec0a337891dd..11eb8d1adc84 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -487,17 +487,29 @@ static void do_bad_area(unsigned long far, unsigned long esr, } } -static bool fault_from_pkey(unsigned long esr, struct vm_area_struct *vma, - unsigned int mm_flags) +static bool fault_from_pkey(struct vm_area_struct *vma, unsigned int mm_flags) { - unsigned long iss2 = ESR_ELx_ISS2(esr); - if (!system_supports_poe()) return false; - if (esr_fsc_is_permission_fault(esr) && (iss2 & ESR_ELx_Overlay)) - return true; - + /* + * We do not check whether an Overlay fault has occurred because we + * cannot make a decision based solely on its value: + * + * - If Overlay is set, a fault did occur due to POE, but it may be + * spurious in those cases where we update POR_EL0 without ISB (e.g. + * on context-switch). We would then need to manually check POR_EL0 + * against vma_pkey(vma), which is exactly what + * arch_vma_access_permitted() does. + * + * - If Overlay is not set, we may still need to report a pkey fault. + * This is the case if an access was made within a mapping but with no + * page mapped, and POR_EL0 forbids the access (according to + * vma_pkey()). Such access will result in a SIGSEGV regardless + * because core code checks arch_vma_access_permitted(), but in order + * to report the correct error code - SEGV_PKUERR - we must handle + * that case here. + */ return !arch_vma_access_permitted(vma, mm_flags & FAULT_FLAG_WRITE, mm_flags & FAULT_FLAG_INSTRUCTION, @@ -635,7 +647,7 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, goto bad_area; } - if (fault_from_pkey(esr, vma, mm_flags)) { + if (fault_from_pkey(vma, mm_flags)) { pkey = vma_pkey(vma); vma_end_read(vma); fault = 0; @@ -679,7 +691,7 @@ retry: goto bad_area; } - if (fault_from_pkey(esr, vma, mm_flags)) { + if (fault_from_pkey(vma, mm_flags)) { pkey = vma_pkey(vma); mmap_read_unlock(mm); fault = 0; diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 80d470aa469d..54dccfd6aa11 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -518,7 +518,6 @@ alternative_else_nop_endif msr REG_PIR_EL1, x0 orr tcr2, tcr2, TCR2_EL1_PIE - msr REG_TCR2_EL1, x0 .Lskip_indirection: diff --git a/arch/loongarch/include/asm/addrspace.h b/arch/loongarch/include/asm/addrspace.h index fe198b473f84..e739dbc6329d 100644 --- a/arch/loongarch/include/asm/addrspace.h +++ b/arch/loongarch/include/asm/addrspace.h @@ -18,12 +18,12 @@ /* * This gives the physical RAM offset. */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #ifndef PHYS_OFFSET #define PHYS_OFFSET _UL(0) #endif extern unsigned long vm_map_base; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #ifndef IO_BASE #define IO_BASE CSR_DMW0_BASE @@ -66,7 +66,7 @@ extern unsigned long vm_map_base; #define FIXADDR_TOP ((unsigned long)(long)(int)0xfffe0000) #endif -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #define _ATYPE_ #define _ATYPE32_ #define _ATYPE64_ @@ -85,7 +85,7 @@ extern unsigned long vm_map_base; /* * 32/64-bit LoongArch address spaces */ -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #define _ACAST32_ #define _ACAST64_ #else diff --git a/arch/loongarch/include/asm/alternative-asm.h b/arch/loongarch/include/asm/alternative-asm.h index ff3d10ac393f..7dc29bd9b2f0 100644 --- a/arch/loongarch/include/asm/alternative-asm.h +++ b/arch/loongarch/include/asm/alternative-asm.h @@ -2,7 +2,7 @@ #ifndef _ASM_ALTERNATIVE_ASM_H #define _ASM_ALTERNATIVE_ASM_H -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #include <asm/asm.h> @@ -77,6 +77,6 @@ .previous .endm -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_ALTERNATIVE_ASM_H */ diff --git a/arch/loongarch/include/asm/alternative.h b/arch/loongarch/include/asm/alternative.h index cee7b29785ab..b5bae21fb3c8 100644 --- a/arch/loongarch/include/asm/alternative.h +++ b/arch/loongarch/include/asm/alternative.h @@ -2,7 +2,7 @@ #ifndef _ASM_ALTERNATIVE_H #define _ASM_ALTERNATIVE_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> #include <linux/stddef.h> @@ -106,6 +106,6 @@ extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); #define alternative_2(oldinstr, newinstr1, feature1, newinstr2, feature2) \ (asm volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2) ::: "memory")) -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_ALTERNATIVE_H */ diff --git a/arch/loongarch/include/asm/asm-extable.h b/arch/loongarch/include/asm/asm-extable.h index df05005f2b80..d60bdf2e6377 100644 --- a/arch/loongarch/include/asm/asm-extable.h +++ b/arch/loongarch/include/asm/asm-extable.h @@ -7,7 +7,7 @@ #define EX_TYPE_UACCESS_ERR_ZERO 2 #define EX_TYPE_BPF 3 -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #define __ASM_EXTABLE_RAW(insn, fixup, type, data) \ .pushsection __ex_table, "a"; \ @@ -22,7 +22,7 @@ __ASM_EXTABLE_RAW(\insn, \fixup, EX_TYPE_FIXUP, 0) .endm -#else /* __ASSEMBLY__ */ +#else /* __ASSEMBLER__ */ #include <linux/bits.h> #include <linux/stringify.h> @@ -60,6 +60,6 @@ #define _ASM_EXTABLE_UACCESS_ERR(insn, fixup, err) \ _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, zero) -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_ASM_EXTABLE_H */ diff --git a/arch/loongarch/include/asm/asm.h b/arch/loongarch/include/asm/asm.h index f591b3245def..f018d26fc995 100644 --- a/arch/loongarch/include/asm/asm.h +++ b/arch/loongarch/include/asm/asm.h @@ -110,7 +110,7 @@ #define LONG_SRA srai.w #define LONG_SRAV sra.w -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #define LONG .word #endif #define LONGSIZE 4 @@ -131,7 +131,7 @@ #define LONG_SRA srai.d #define LONG_SRAV sra.d -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #define LONG .dword #endif #define LONGSIZE 8 @@ -158,7 +158,7 @@ #define PTR_SCALESHIFT 2 -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #define PTR .word #endif #define PTRSIZE 4 @@ -181,7 +181,7 @@ #define PTR_SCALESHIFT 3 -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #define PTR .dword #endif #define PTRSIZE 8 diff --git a/arch/loongarch/include/asm/cpu.h b/arch/loongarch/include/asm/cpu.h index 98cf4d7b4b0a..dfb982fe8701 100644 --- a/arch/loongarch/include/asm/cpu.h +++ b/arch/loongarch/include/asm/cpu.h @@ -46,7 +46,7 @@ #define PRID_PRODUCT_MASK 0x0fff -#if !defined(__ASSEMBLY__) +#if !defined(__ASSEMBLER__) enum cpu_type_enum { CPU_UNKNOWN, @@ -55,7 +55,7 @@ enum cpu_type_enum { CPU_LAST }; -#endif /* !__ASSEMBLY */ +#endif /* !__ASSEMBLER__ */ /* * ISA Level encodings diff --git a/arch/loongarch/include/asm/ftrace.h b/arch/loongarch/include/asm/ftrace.h index 6e0a99763a9a..f4caaf764f9e 100644 --- a/arch/loongarch/include/asm/ftrace.h +++ b/arch/loongarch/include/asm/ftrace.h @@ -14,7 +14,7 @@ #define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #ifndef CONFIG_DYNAMIC_FTRACE @@ -84,7 +84,7 @@ __arch_ftrace_set_direct_caller(struct pt_regs *regs, unsigned long addr) #endif -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* CONFIG_FUNCTION_TRACER */ diff --git a/arch/loongarch/include/asm/gpr-num.h b/arch/loongarch/include/asm/gpr-num.h index 996038da806d..af95b941f48b 100644 --- a/arch/loongarch/include/asm/gpr-num.h +++ b/arch/loongarch/include/asm/gpr-num.h @@ -2,7 +2,7 @@ #ifndef __ASM_GPR_NUM_H #define __ASM_GPR_NUM_H -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ .equ .L__gpr_num_zero, 0 .irp num,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 @@ -25,7 +25,7 @@ .equ .L__gpr_num_$s\num, 23 + \num .endr -#else /* __ASSEMBLY__ */ +#else /* __ASSEMBLER__ */ #define __DEFINE_ASM_GPR_NUMS \ " .equ .L__gpr_num_zero, 0\n" \ @@ -47,6 +47,6 @@ " .equ .L__gpr_num_$s\\num, 23 + \\num\n" \ " .endr\n" \ -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_GPR_NUM_H */ diff --git a/arch/loongarch/include/asm/irqflags.h b/arch/loongarch/include/asm/irqflags.h index 003172b8406b..620163628a7f 100644 --- a/arch/loongarch/include/asm/irqflags.h +++ b/arch/loongarch/include/asm/irqflags.h @@ -5,7 +5,7 @@ #ifndef _ASM_IRQFLAGS_H #define _ASM_IRQFLAGS_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/compiler.h> #include <linux/stringify.h> @@ -80,6 +80,6 @@ static inline int arch_irqs_disabled(void) return arch_irqs_disabled_flags(arch_local_save_flags()); } -#endif /* #ifndef __ASSEMBLY__ */ +#endif /* #ifndef __ASSEMBLER__ */ #endif /* _ASM_IRQFLAGS_H */ diff --git a/arch/loongarch/include/asm/jump_label.h b/arch/loongarch/include/asm/jump_label.h index 8a924bd69d19..4000c7603d8e 100644 --- a/arch/loongarch/include/asm/jump_label.h +++ b/arch/loongarch/include/asm/jump_label.h @@ -7,7 +7,7 @@ #ifndef __ASM_JUMP_LABEL_H #define __ASM_JUMP_LABEL_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> @@ -50,5 +50,5 @@ l_yes: return true; } -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_JUMP_LABEL_H */ diff --git a/arch/loongarch/include/asm/kasan.h b/arch/loongarch/include/asm/kasan.h index 7f52bd31b9d4..62f139a9c87d 100644 --- a/arch/loongarch/include/asm/kasan.h +++ b/arch/loongarch/include/asm/kasan.h @@ -2,7 +2,7 @@ #ifndef __ASM_KASAN_H #define __ASM_KASAN_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/linkage.h> #include <linux/mmzone.h> diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h index d84dac88a584..a0994d226eff 100644 --- a/arch/loongarch/include/asm/loongarch.h +++ b/arch/loongarch/include/asm/loongarch.h @@ -9,15 +9,15 @@ #include <linux/linkage.h> #include <linux/types.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <larchintrin.h> /* CPUCFG */ #define read_cpucfg(reg) __cpucfg(reg) -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ /* LoongArch Registers */ #define REG_ZERO 0x0 @@ -53,7 +53,7 @@ #define REG_S7 0x1e #define REG_S8 0x1f -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ /* Bit fields for CPUCFG registers */ #define LOONGARCH_CPUCFG0 0x0 @@ -171,7 +171,7 @@ * SW emulation for KVM hypervirsor, see arch/loongarch/include/uapi/asm/kvm_para.h */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* CSR */ #define csr_read32(reg) __csrrd_w(reg) @@ -187,7 +187,7 @@ #define iocsr_write32(val, reg) __iocsrwr_w(val, reg) #define iocsr_write64(val, reg) __iocsrwr_d(val, reg) -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ /* CSR register number */ @@ -1195,7 +1195,7 @@ #define LOONGARCH_IOCSR_EXTIOI_ROUTE_BASE 0x1c00 #define IOCSR_EXTIOI_VECTOR_NUM 256 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ static __always_inline u64 drdtime(void) { @@ -1357,7 +1357,7 @@ __BUILD_CSR_OP(tlbidx) #define clear_csr_estat(val) \ csr_xchg32(~(val), val, LOONGARCH_CSR_ESTAT) -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ /* Generic EntryLo bit definitions */ #define ENTRYLO_V (_ULCAST_(1) << 0) diff --git a/arch/loongarch/include/asm/orc_types.h b/arch/loongarch/include/asm/orc_types.h index caf1f71a1057..d5fa98d1d177 100644 --- a/arch/loongarch/include/asm/orc_types.h +++ b/arch/loongarch/include/asm/orc_types.h @@ -34,7 +34,7 @@ #define ORC_TYPE_REGS 3 #define ORC_TYPE_REGS_PARTIAL 4 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * This struct is more or less a vastly simplified version of the DWARF Call * Frame Information standard. It contains only the necessary parts of DWARF @@ -53,6 +53,6 @@ struct orc_entry { unsigned int type:3; unsigned int signal:1; }; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ORC_TYPES_H */ diff --git a/arch/loongarch/include/asm/page.h b/arch/loongarch/include/asm/page.h index 7368f12b7cb1..a3aaf34fba16 100644 --- a/arch/loongarch/include/asm/page.h +++ b/arch/loongarch/include/asm/page.h @@ -15,7 +15,7 @@ #define HPAGE_MASK (~(HPAGE_SIZE - 1)) #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/kernel.h> #include <linux/pfn.h> @@ -110,6 +110,6 @@ extern int __virt_addr_valid(volatile void *kaddr); #include <asm-generic/memory_model.h> #include <asm-generic/getorder.h> -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _ASM_PAGE_H */ diff --git a/arch/loongarch/include/asm/pgtable-bits.h b/arch/loongarch/include/asm/pgtable-bits.h index 45bfc65a0c9f..7bbfb04a54cc 100644 --- a/arch/loongarch/include/asm/pgtable-bits.h +++ b/arch/loongarch/include/asm/pgtable-bits.h @@ -92,7 +92,7 @@ #define PAGE_KERNEL_WUC __pgprot(_PAGE_PRESENT | __READABLE | __WRITEABLE | \ _PAGE_GLOBAL | _PAGE_KERN | _CACHE_WUC) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define _PAGE_IOREMAP pgprot_val(PAGE_KERNEL_SUC) @@ -127,6 +127,6 @@ static inline pgprot_t pgprot_writecombine(pgprot_t _prot) return __pgprot(prot); } -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _ASM_PGTABLE_BITS_H */ diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index b30185302c07..f2aeff544cee 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -55,7 +55,7 @@ #define USER_PTRS_PER_PGD ((TASK_SIZE64 / PGDIR_SIZE)?(TASK_SIZE64 / PGDIR_SIZE):1) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/mm_types.h> #include <linux/mmzone.h> @@ -618,6 +618,6 @@ static inline long pmd_protnone(pmd_t pmd) #define HAVE_ARCH_UNMAPPED_AREA #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _ASM_PGTABLE_H */ diff --git a/arch/loongarch/include/asm/prefetch.h b/arch/loongarch/include/asm/prefetch.h index 1672262a5e2e..0b168cdaae9a 100644 --- a/arch/loongarch/include/asm/prefetch.h +++ b/arch/loongarch/include/asm/prefetch.h @@ -8,7 +8,7 @@ #define Pref_Load 0 #define Pref_Store 8 -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ .macro __pref hint addr #ifdef CONFIG_CPU_HAS_PREFETCH diff --git a/arch/loongarch/include/asm/smp.h b/arch/loongarch/include/asm/smp.h index ad0bd234a0f1..3a47f52959a8 100644 --- a/arch/loongarch/include/asm/smp.h +++ b/arch/loongarch/include/asm/smp.h @@ -39,7 +39,7 @@ int loongson_cpu_disable(void); void loongson_cpu_die(unsigned int cpu); #endif -static inline void plat_smp_setup(void) +static inline void __init plat_smp_setup(void) { loongson_smp_setup(); } diff --git a/arch/loongarch/include/asm/thread_info.h b/arch/loongarch/include/asm/thread_info.h index 4f5a9441754e..9dfa2ef00816 100644 --- a/arch/loongarch/include/asm/thread_info.h +++ b/arch/loongarch/include/asm/thread_info.h @@ -10,7 +10,7 @@ #ifdef __KERNEL__ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/processor.h> @@ -53,7 +53,7 @@ static inline struct thread_info *current_thread_info(void) register unsigned long current_stack_pointer __asm__("$sp"); -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ /* thread information allocation */ #define THREAD_SIZE SZ_16K diff --git a/arch/loongarch/include/asm/types.h b/arch/loongarch/include/asm/types.h index baf15a0dcf8b..0edd731f3d6a 100644 --- a/arch/loongarch/include/asm/types.h +++ b/arch/loongarch/include/asm/types.h @@ -8,7 +8,7 @@ #include <asm-generic/int-ll64.h> #include <uapi/asm/types.h> -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #define _ULCAST_ #define _U64CAST_ #else diff --git a/arch/loongarch/include/asm/unwind_hints.h b/arch/loongarch/include/asm/unwind_hints.h index 2c68bc72736c..16c7f7e465a0 100644 --- a/arch/loongarch/include/asm/unwind_hints.h +++ b/arch/loongarch/include/asm/unwind_hints.h @@ -5,7 +5,7 @@ #include <linux/objtool.h> #include <asm/orc_types.h> -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ .macro UNWIND_HINT_UNDEFINED UNWIND_HINT type=UNWIND_HINT_TYPE_UNDEFINED @@ -23,7 +23,7 @@ UNWIND_HINT sp_reg=ORC_REG_SP type=UNWIND_HINT_TYPE_CALL .endm -#else /* !__ASSEMBLY__ */ +#else /* !__ASSEMBLER__ */ #define UNWIND_HINT_SAVE \ UNWIND_HINT(UNWIND_HINT_TYPE_SAVE, 0, 0, 0) @@ -31,6 +31,6 @@ #define UNWIND_HINT_RESTORE \ UNWIND_HINT(UNWIND_HINT_TYPE_RESTORE, 0, 0, 0) -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _ASM_LOONGARCH_UNWIND_HINTS_H */ diff --git a/arch/loongarch/include/asm/vdso/arch_data.h b/arch/loongarch/include/asm/vdso/arch_data.h index 322d0a5f1c84..395ec223bcbe 100644 --- a/arch/loongarch/include/asm/vdso/arch_data.h +++ b/arch/loongarch/include/asm/vdso/arch_data.h @@ -7,7 +7,7 @@ #ifndef _VDSO_ARCH_DATA_H #define _VDSO_ARCH_DATA_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/asm.h> #include <asm/vdso.h> @@ -20,6 +20,6 @@ struct vdso_arch_data { struct vdso_pcpu_data pdata[NR_CPUS]; }; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif diff --git a/arch/loongarch/include/asm/vdso/getrandom.h b/arch/loongarch/include/asm/vdso/getrandom.h index a81724b69f29..2ff05003c6e7 100644 --- a/arch/loongarch/include/asm/vdso/getrandom.h +++ b/arch/loongarch/include/asm/vdso/getrandom.h @@ -5,7 +5,7 @@ #ifndef __ASM_VDSO_GETRANDOM_H #define __ASM_VDSO_GETRANDOM_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/unistd.h> #include <asm/vdso/vdso.h> @@ -28,6 +28,6 @@ static __always_inline ssize_t getrandom_syscall(void *_buffer, size_t _len, uns return ret; } -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* __ASM_VDSO_GETRANDOM_H */ diff --git a/arch/loongarch/include/asm/vdso/gettimeofday.h b/arch/loongarch/include/asm/vdso/gettimeofday.h index f15503e3336c..dcafabca9bb6 100644 --- a/arch/loongarch/include/asm/vdso/gettimeofday.h +++ b/arch/loongarch/include/asm/vdso/gettimeofday.h @@ -7,7 +7,7 @@ #ifndef __ASM_VDSO_GETTIMEOFDAY_H #define __ASM_VDSO_GETTIMEOFDAY_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/unistd.h> #include <asm/vdso/vdso.h> @@ -89,6 +89,6 @@ static inline bool loongarch_vdso_hres_capable(void) } #define __arch_vdso_hres_capable loongarch_vdso_hres_capable -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* __ASM_VDSO_GETTIMEOFDAY_H */ diff --git a/arch/loongarch/include/asm/vdso/processor.h b/arch/loongarch/include/asm/vdso/processor.h index ef5770b343a0..1e255373b0b8 100644 --- a/arch/loongarch/include/asm/vdso/processor.h +++ b/arch/loongarch/include/asm/vdso/processor.h @@ -5,10 +5,10 @@ #ifndef __ASM_VDSO_PROCESSOR_H #define __ASM_VDSO_PROCESSOR_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define cpu_relax() barrier() -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_VDSO_PROCESSOR_H */ diff --git a/arch/loongarch/include/asm/vdso/vdso.h b/arch/loongarch/include/asm/vdso/vdso.h index 50c65fb29daf..04bd2d452876 100644 --- a/arch/loongarch/include/asm/vdso/vdso.h +++ b/arch/loongarch/include/asm/vdso/vdso.h @@ -7,7 +7,7 @@ #ifndef _ASM_VDSO_VDSO_H #define _ASM_VDSO_VDSO_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/asm.h> #include <asm/page.h> @@ -16,6 +16,6 @@ #define VVAR_SIZE (VDSO_NR_PAGES << PAGE_SHIFT) -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif diff --git a/arch/loongarch/include/asm/vdso/vsyscall.h b/arch/loongarch/include/asm/vdso/vsyscall.h index 1140b54b4bc8..558eb9dfda52 100644 --- a/arch/loongarch/include/asm/vdso/vsyscall.h +++ b/arch/loongarch/include/asm/vdso/vsyscall.h @@ -2,13 +2,13 @@ #ifndef __ASM_VDSO_VSYSCALL_H #define __ASM_VDSO_VSYSCALL_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <vdso/datapage.h> /* The asm-generic header needs to be included after the definitions above */ #include <asm-generic/vdso/vsyscall.h> -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* __ASM_VDSO_VSYSCALL_H */ diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c index a54cd6fd3796..1367ca759468 100644 --- a/arch/loongarch/kernel/acpi.c +++ b/arch/loongarch/kernel/acpi.c @@ -10,6 +10,7 @@ #include <linux/init.h> #include <linux/acpi.h> #include <linux/efi-bgrt.h> +#include <linux/export.h> #include <linux/irq.h> #include <linux/irqdomain.h> #include <linux/memblock.h> diff --git a/arch/loongarch/kernel/alternative.c b/arch/loongarch/kernel/alternative.c index 4ad13847e962..0e0c766df1e3 100644 --- a/arch/loongarch/kernel/alternative.c +++ b/arch/loongarch/kernel/alternative.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only +#include <linux/export.h> #include <linux/mm.h> #include <linux/module.h> #include <asm/alternative.h> diff --git a/arch/loongarch/kernel/efi.c b/arch/loongarch/kernel/efi.c index de21e72759ee..860a3bc030e0 100644 --- a/arch/loongarch/kernel/efi.c +++ b/arch/loongarch/kernel/efi.c @@ -144,6 +144,18 @@ void __init efi_init(void) if (efi_memmap_init_early(&data) < 0) panic("Unable to map EFI memory map.\n"); + /* + * Reserve the physical memory region occupied by the EFI + * memory map table (header + descriptors). This is crucial + * for kdump, as the kdump kernel relies on this original + * memmap passed by the bootloader. Without reservation, + * this region could be overwritten by the primary kernel. + * Also, set the EFI_PRESERVE_BS_REGIONS flag to indicate that + * critical boot services code/data regions like this are preserved. + */ + memblock_reserve((phys_addr_t)boot_memmap, sizeof(*tbl) + data.size); + set_bit(EFI_PRESERVE_BS_REGIONS, &efi.flags); + early_memunmap(tbl, sizeof(*tbl)); } diff --git a/arch/loongarch/kernel/elf.c b/arch/loongarch/kernel/elf.c index 0fa81ced28dc..3d98c6aa00db 100644 --- a/arch/loongarch/kernel/elf.c +++ b/arch/loongarch/kernel/elf.c @@ -6,7 +6,6 @@ #include <linux/binfmts.h> #include <linux/elf.h> -#include <linux/export.h> #include <linux/sched.h> #include <asm/cpu-features.h> diff --git a/arch/loongarch/kernel/kfpu.c b/arch/loongarch/kernel/kfpu.c index 4c476904227f..141b49bd989c 100644 --- a/arch/loongarch/kernel/kfpu.c +++ b/arch/loongarch/kernel/kfpu.c @@ -4,6 +4,7 @@ */ #include <linux/cpu.h> +#include <linux/export.h> #include <linux/init.h> #include <asm/fpu.h> #include <asm/smp.h> diff --git a/arch/loongarch/kernel/paravirt.c b/arch/loongarch/kernel/paravirt.c index e5a39bbad078..b1b51f920b23 100644 --- a/arch/loongarch/kernel/paravirt.c +++ b/arch/loongarch/kernel/paravirt.c @@ -1,5 +1,4 @@ // SPDX-License-Identifier: GPL-2.0 -#include <linux/export.h> #include <linux/types.h> #include <linux/interrupt.h> #include <linux/irq_work.h> diff --git a/arch/loongarch/kernel/time.c b/arch/loongarch/kernel/time.c index bc75a3a69fc8..367906b10f81 100644 --- a/arch/loongarch/kernel/time.c +++ b/arch/loongarch/kernel/time.c @@ -102,7 +102,7 @@ static int constant_timer_next_event(unsigned long delta, struct clock_event_dev return 0; } -static unsigned long __init get_loops_per_jiffy(void) +static unsigned long get_loops_per_jiffy(void) { unsigned long lpj = (unsigned long)const_clock_freq; diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index 47fc2de6d150..3d9be6ca7ec5 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -13,6 +13,7 @@ #include <linux/kernel.h> #include <linux/kexec.h> #include <linux/module.h> +#include <linux/export.h> #include <linux/extable.h> #include <linux/mm.h> #include <linux/sched/mm.h> diff --git a/arch/loongarch/kernel/unwind_guess.c b/arch/loongarch/kernel/unwind_guess.c index 98379b7d4147..08d7951b2f60 100644 --- a/arch/loongarch/kernel/unwind_guess.c +++ b/arch/loongarch/kernel/unwind_guess.c @@ -3,6 +3,7 @@ * Copyright (C) 2022 Loongson Technology Corporation Limited */ #include <asm/unwind.h> +#include <linux/export.h> unsigned long unwind_get_return_address(struct unwind_state *state) { diff --git a/arch/loongarch/kernel/unwind_orc.c b/arch/loongarch/kernel/unwind_orc.c index d623935a7547..0005be49b056 100644 --- a/arch/loongarch/kernel/unwind_orc.c +++ b/arch/loongarch/kernel/unwind_orc.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only -#include <linux/objtool.h> +#include <linux/export.h> #include <linux/module.h> +#include <linux/objtool.h> #include <linux/sort.h> #include <asm/exception.h> #include <asm/orc_header.h> diff --git a/arch/loongarch/kernel/unwind_prologue.c b/arch/loongarch/kernel/unwind_prologue.c index 929ae240280a..729e775bd40d 100644 --- a/arch/loongarch/kernel/unwind_prologue.c +++ b/arch/loongarch/kernel/unwind_prologue.c @@ -3,6 +3,7 @@ * Copyright (C) 2022 Loongson Technology Corporation Limited */ #include <linux/cpumask.h> +#include <linux/export.h> #include <linux/ftrace.h> #include <linux/kallsyms.h> diff --git a/arch/loongarch/kvm/intc/eiointc.c b/arch/loongarch/kvm/intc/eiointc.c index f39929d7bf8a..a75f865d6fb9 100644 --- a/arch/loongarch/kvm/intc/eiointc.c +++ b/arch/loongarch/kvm/intc/eiointc.c @@ -9,7 +9,8 @@ static void eiointc_set_sw_coreisr(struct loongarch_eiointc *s) { - int ipnum, cpu, irq_index, irq_mask, irq; + int ipnum, cpu, cpuid, irq_index, irq_mask, irq; + struct kvm_vcpu *vcpu; for (irq = 0; irq < EIOINTC_IRQS; irq++) { ipnum = s->ipmap.reg_u8[irq / 32]; @@ -20,7 +21,12 @@ static void eiointc_set_sw_coreisr(struct loongarch_eiointc *s) irq_index = irq / 32; irq_mask = BIT(irq & 0x1f); - cpu = s->coremap.reg_u8[irq]; + cpuid = s->coremap.reg_u8[irq]; + vcpu = kvm_get_vcpu_by_cpuid(s->kvm, cpuid); + if (!vcpu) + continue; + + cpu = vcpu->vcpu_id; if (!!(s->coreisr.reg_u32[cpu][irq_index] & irq_mask)) set_bit(irq, s->sw_coreisr[cpu][ipnum]); else @@ -66,20 +72,25 @@ static void eiointc_update_irq(struct loongarch_eiointc *s, int irq, int level) } static inline void eiointc_update_sw_coremap(struct loongarch_eiointc *s, - int irq, void *pvalue, u32 len, bool notify) + int irq, u64 val, u32 len, bool notify) { - int i, cpu; - u64 val = *(u64 *)pvalue; + int i, cpu, cpuid; + struct kvm_vcpu *vcpu; for (i = 0; i < len; i++) { - cpu = val & 0xff; + cpuid = val & 0xff; val = val >> 8; if (!(s->status & BIT(EIOINTC_ENABLE_CPU_ENCODE))) { - cpu = ffs(cpu) - 1; - cpu = (cpu >= 4) ? 0 : cpu; + cpuid = ffs(cpuid) - 1; + cpuid = (cpuid >= 4) ? 0 : cpuid; } + vcpu = kvm_get_vcpu_by_cpuid(s->kvm, cpuid); + if (!vcpu) + continue; + + cpu = vcpu->vcpu_id; if (s->sw_coremap[irq + i] == cpu) continue; @@ -305,6 +316,11 @@ static int kvm_eiointc_read(struct kvm_vcpu *vcpu, return -EINVAL; } + if (addr & (len - 1)) { + kvm_err("%s: eiointc not aligned addr %llx len %d\n", __func__, addr, len); + return -EINVAL; + } + vcpu->kvm->stat.eiointc_read_exits++; spin_lock_irqsave(&eiointc->lock, flags); switch (len) { @@ -398,7 +414,7 @@ static int loongarch_eiointc_writeb(struct kvm_vcpu *vcpu, irq = offset - EIOINTC_COREMAP_START; index = irq; s->coremap.reg_u8[index] = data; - eiointc_update_sw_coremap(s, irq, (void *)&data, sizeof(data), true); + eiointc_update_sw_coremap(s, irq, data, sizeof(data), true); break; default: ret = -EINVAL; @@ -436,17 +452,16 @@ static int loongarch_eiointc_writew(struct kvm_vcpu *vcpu, break; case EIOINTC_ENABLE_START ... EIOINTC_ENABLE_END: index = (offset - EIOINTC_ENABLE_START) >> 1; - old_data = s->enable.reg_u32[index]; + old_data = s->enable.reg_u16[index]; s->enable.reg_u16[index] = data; /* * 1: enable irq. * update irq when isr is set. */ data = s->enable.reg_u16[index] & ~old_data & s->isr.reg_u16[index]; - index = index << 1; for (i = 0; i < sizeof(data); i++) { u8 mask = (data >> (i * 8)) & 0xff; - eiointc_enable_irq(vcpu, s, index + i, mask, 1); + eiointc_enable_irq(vcpu, s, index * 2 + i, mask, 1); } /* * 0: disable irq. @@ -455,7 +470,7 @@ static int loongarch_eiointc_writew(struct kvm_vcpu *vcpu, data = ~s->enable.reg_u16[index] & old_data & s->isr.reg_u16[index]; for (i = 0; i < sizeof(data); i++) { u8 mask = (data >> (i * 8)) & 0xff; - eiointc_enable_irq(vcpu, s, index, mask, 0); + eiointc_enable_irq(vcpu, s, index * 2 + i, mask, 0); } break; case EIOINTC_BOUNCE_START ... EIOINTC_BOUNCE_END: @@ -484,7 +499,7 @@ static int loongarch_eiointc_writew(struct kvm_vcpu *vcpu, irq = offset - EIOINTC_COREMAP_START; index = irq >> 1; s->coremap.reg_u16[index] = data; - eiointc_update_sw_coremap(s, irq, (void *)&data, sizeof(data), true); + eiointc_update_sw_coremap(s, irq, data, sizeof(data), true); break; default: ret = -EINVAL; @@ -529,10 +544,9 @@ static int loongarch_eiointc_writel(struct kvm_vcpu *vcpu, * update irq when isr is set. */ data = s->enable.reg_u32[index] & ~old_data & s->isr.reg_u32[index]; - index = index << 2; for (i = 0; i < sizeof(data); i++) { u8 mask = (data >> (i * 8)) & 0xff; - eiointc_enable_irq(vcpu, s, index + i, mask, 1); + eiointc_enable_irq(vcpu, s, index * 4 + i, mask, 1); } /* * 0: disable irq. @@ -541,7 +555,7 @@ static int loongarch_eiointc_writel(struct kvm_vcpu *vcpu, data = ~s->enable.reg_u32[index] & old_data & s->isr.reg_u32[index]; for (i = 0; i < sizeof(data); i++) { u8 mask = (data >> (i * 8)) & 0xff; - eiointc_enable_irq(vcpu, s, index, mask, 0); + eiointc_enable_irq(vcpu, s, index * 4 + i, mask, 0); } break; case EIOINTC_BOUNCE_START ... EIOINTC_BOUNCE_END: @@ -570,7 +584,7 @@ static int loongarch_eiointc_writel(struct kvm_vcpu *vcpu, irq = offset - EIOINTC_COREMAP_START; index = irq >> 2; s->coremap.reg_u32[index] = data; - eiointc_update_sw_coremap(s, irq, (void *)&data, sizeof(data), true); + eiointc_update_sw_coremap(s, irq, data, sizeof(data), true); break; default: ret = -EINVAL; @@ -615,10 +629,9 @@ static int loongarch_eiointc_writeq(struct kvm_vcpu *vcpu, * update irq when isr is set. */ data = s->enable.reg_u64[index] & ~old_data & s->isr.reg_u64[index]; - index = index << 3; for (i = 0; i < sizeof(data); i++) { u8 mask = (data >> (i * 8)) & 0xff; - eiointc_enable_irq(vcpu, s, index + i, mask, 1); + eiointc_enable_irq(vcpu, s, index * 8 + i, mask, 1); } /* * 0: disable irq. @@ -627,7 +640,7 @@ static int loongarch_eiointc_writeq(struct kvm_vcpu *vcpu, data = ~s->enable.reg_u64[index] & old_data & s->isr.reg_u64[index]; for (i = 0; i < sizeof(data); i++) { u8 mask = (data >> (i * 8)) & 0xff; - eiointc_enable_irq(vcpu, s, index, mask, 0); + eiointc_enable_irq(vcpu, s, index * 8 + i, mask, 0); } break; case EIOINTC_BOUNCE_START ... EIOINTC_BOUNCE_END: @@ -656,7 +669,7 @@ static int loongarch_eiointc_writeq(struct kvm_vcpu *vcpu, irq = offset - EIOINTC_COREMAP_START; index = irq >> 3; s->coremap.reg_u64[index] = data; - eiointc_update_sw_coremap(s, irq, (void *)&data, sizeof(data), true); + eiointc_update_sw_coremap(s, irq, data, sizeof(data), true); break; default: ret = -EINVAL; @@ -679,6 +692,11 @@ static int kvm_eiointc_write(struct kvm_vcpu *vcpu, return -EINVAL; } + if (addr & (len - 1)) { + kvm_err("%s: eiointc not aligned addr %llx len %d\n", __func__, addr, len); + return -EINVAL; + } + vcpu->kvm->stat.eiointc_write_exits++; spin_lock_irqsave(&eiointc->lock, flags); switch (len) { @@ -787,7 +805,7 @@ static int kvm_eiointc_ctrl_access(struct kvm_device *dev, int ret = 0; unsigned long flags; unsigned long type = (unsigned long)attr->attr; - u32 i, start_irq; + u32 i, start_irq, val; void __user *data; struct loongarch_eiointc *s = dev->kvm->arch.eiointc; @@ -795,8 +813,14 @@ static int kvm_eiointc_ctrl_access(struct kvm_device *dev, spin_lock_irqsave(&s->lock, flags); switch (type) { case KVM_DEV_LOONGARCH_EXTIOI_CTRL_INIT_NUM_CPU: - if (copy_from_user(&s->num_cpu, data, 4)) + if (copy_from_user(&val, data, 4)) ret = -EFAULT; + else { + if (val >= EIOINTC_ROUTE_MAX_VCPUS) + ret = -EINVAL; + else + s->num_cpu = val; + } break; case KVM_DEV_LOONGARCH_EXTIOI_CTRL_INIT_FEATURE: if (copy_from_user(&s->features, data, 4)) @@ -809,7 +833,7 @@ static int kvm_eiointc_ctrl_access(struct kvm_device *dev, for (i = 0; i < (EIOINTC_IRQS / 4); i++) { start_irq = i * 4; eiointc_update_sw_coremap(s, start_irq, - (void *)&s->coremap.reg_u32[i], sizeof(u32), false); + s->coremap.reg_u32[i], sizeof(u32), false); } break; default: @@ -824,7 +848,7 @@ static int kvm_eiointc_regs_access(struct kvm_device *dev, struct kvm_device_attr *attr, bool is_write) { - int addr, cpuid, offset, ret = 0; + int addr, cpu, offset, ret = 0; unsigned long flags; void *p = NULL; void __user *data; @@ -832,7 +856,7 @@ static int kvm_eiointc_regs_access(struct kvm_device *dev, s = dev->kvm->arch.eiointc; addr = attr->attr; - cpuid = addr >> 16; + cpu = addr >> 16; addr &= 0xffff; data = (void __user *)attr->addr; switch (addr) { @@ -857,8 +881,11 @@ static int kvm_eiointc_regs_access(struct kvm_device *dev, p = &s->isr.reg_u32[offset]; break; case EIOINTC_COREISR_START ... EIOINTC_COREISR_END: + if (cpu >= s->num_cpu) + return -EINVAL; + offset = (addr - EIOINTC_COREISR_START) / 4; - p = &s->coreisr.reg_u32[cpuid][offset]; + p = &s->coreisr.reg_u32[cpu][offset]; break; case EIOINTC_COREMAP_START ... EIOINTC_COREMAP_END: offset = (addr - EIOINTC_COREMAP_START) / 4; @@ -899,9 +926,15 @@ static int kvm_eiointc_sw_status_access(struct kvm_device *dev, data = (void __user *)attr->addr; switch (addr) { case KVM_DEV_LOONGARCH_EXTIOI_SW_STATUS_NUM_CPU: + if (is_write) + return ret; + p = &s->num_cpu; break; case KVM_DEV_LOONGARCH_EXTIOI_SW_STATUS_FEATURE: + if (is_write) + return ret; + p = &s->features; break; case KVM_DEV_LOONGARCH_EXTIOI_SW_STATUS_STATE: diff --git a/arch/loongarch/lib/crc32-loongarch.c b/arch/loongarch/lib/crc32-loongarch.c index b37cd8537b45..db22c2ec55e2 100644 --- a/arch/loongarch/lib/crc32-loongarch.c +++ b/arch/loongarch/lib/crc32-loongarch.c @@ -11,6 +11,7 @@ #include <asm/cpu-features.h> #include <linux/crc32.h> +#include <linux/export.h> #include <linux/module.h> #include <linux/unaligned.h> diff --git a/arch/loongarch/lib/csum.c b/arch/loongarch/lib/csum.c index df309ae4045d..bcc9d01d8c41 100644 --- a/arch/loongarch/lib/csum.c +++ b/arch/loongarch/lib/csum.c @@ -2,6 +2,7 @@ // Copyright (C) 2019-2020 Arm Ltd. #include <linux/compiler.h> +#include <linux/export.h> #include <linux/kasan-checks.h> #include <linux/kernel.h> diff --git a/arch/loongarch/mm/ioremap.c b/arch/loongarch/mm/ioremap.c index 70ca73019811..df949a3d0f34 100644 --- a/arch/loongarch/mm/ioremap.c +++ b/arch/loongarch/mm/ioremap.c @@ -16,12 +16,12 @@ void __init early_iounmap(void __iomem *addr, unsigned long size) } -void *early_memremap_ro(resource_size_t phys_addr, unsigned long size) +void * __init early_memremap_ro(resource_size_t phys_addr, unsigned long size) { return early_memremap(phys_addr, size); } -void *early_memremap_prot(resource_size_t phys_addr, unsigned long size, +void * __init early_memremap_prot(resource_size_t phys_addr, unsigned long size, unsigned long prot_val) { return early_memremap(phys_addr, size); diff --git a/arch/loongarch/pci/pci.c b/arch/loongarch/pci/pci.c index 2726639150bc..5bc9627a6cf9 100644 --- a/arch/loongarch/pci/pci.c +++ b/arch/loongarch/pci/pci.c @@ -3,7 +3,6 @@ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited */ #include <linux/kernel.h> -#include <linux/export.h> #include <linux/init.h> #include <linux/acpi.h> #include <linux/types.h> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 36061f4732b7..d71ea0f4466f 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -63,7 +63,8 @@ config RISCV select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT select ARCH_STACKWALK select ARCH_SUPPORTS_ATOMIC_RMW - select ARCH_SUPPORTS_CFI_CLANG + # clang >= 17: https://github.com/llvm/llvm-project/commit/62fa708ceb027713b386c7e0efda994f8bdc27e2 + select ARCH_SUPPORTS_CFI_CLANG if CLANG_VERSION >= 170000 select ARCH_SUPPORTS_DEBUG_PAGEALLOC if MMU select ARCH_SUPPORTS_HUGE_PFNMAP if TRANSPARENT_HUGEPAGE select ARCH_SUPPORTS_HUGETLBFS if MMU diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 438ce7df24c3..5bd5aae60d53 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -1075,7 +1075,6 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte) */ #ifdef CONFIG_64BIT #define TASK_SIZE_64 (PGDIR_SIZE * PTRS_PER_PGD / 2) -#define TASK_SIZE_MAX LONG_MAX #ifdef CONFIG_COMPAT #define TASK_SIZE_32 (_AC(0x80000000, UL) - PAGE_SIZE) diff --git a/arch/riscv/include/asm/runtime-const.h b/arch/riscv/include/asm/runtime-const.h index 451fd76b8811..d766e2b9e6df 100644 --- a/arch/riscv/include/asm/runtime-const.h +++ b/arch/riscv/include/asm/runtime-const.h @@ -206,7 +206,7 @@ static inline void __runtime_fixup_32(__le16 *lui_parcel, __le16 *addi_parcel, u addi_insn_mask &= 0x07fff; } - if (lower_immediate & 0x00000fff) { + if (lower_immediate & 0x00000fff || lui_insn == RISCV_INSN_NOP4) { /* replace upper 12 bits of addi with lower 12 bits of val */ addi_insn &= addi_insn_mask; addi_insn |= (lower_immediate & 0x00000fff) << 20; diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h index d472da4450e6..525e50db24f7 100644 --- a/arch/riscv/include/asm/uaccess.h +++ b/arch/riscv/include/asm/uaccess.h @@ -127,6 +127,7 @@ do { \ #ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT #define __get_user_8(x, ptr, label) \ +do { \ u32 __user *__ptr = (u32 __user *)(ptr); \ u32 __lo, __hi; \ asm_goto_output( \ @@ -141,7 +142,7 @@ do { \ : : label); \ (x) = (__typeof__(x))((__typeof__((x) - (x)))( \ (((u64)__hi << 32) | __lo))); \ - +} while (0) #else /* !CONFIG_CC_HAS_ASM_GOTO_OUTPUT */ #define __get_user_8(x, ptr, label) \ do { \ diff --git a/arch/riscv/include/asm/vdso/getrandom.h b/arch/riscv/include/asm/vdso/getrandom.h index 8dc92441702a..c6d66895c1f5 100644 --- a/arch/riscv/include/asm/vdso/getrandom.h +++ b/arch/riscv/include/asm/vdso/getrandom.h @@ -18,7 +18,7 @@ static __always_inline ssize_t getrandom_syscall(void *_buffer, size_t _len, uns register unsigned int flags asm("a2") = _flags; asm volatile ("ecall\n" - : "+r" (ret) + : "=r" (ret) : "r" (nr), "r" (buffer), "r" (len), "r" (flags) : "memory"); diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h index 45c9b426fcc5..b61786d43c20 100644 --- a/arch/riscv/include/asm/vector.h +++ b/arch/riscv/include/asm/vector.h @@ -205,11 +205,11 @@ static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to, THEAD_VSETVLI_T4X0E8M8D1 THEAD_VSB_V_V0T0 "add t0, t0, t4\n\t" - THEAD_VSB_V_V0T0 + THEAD_VSB_V_V8T0 "add t0, t0, t4\n\t" - THEAD_VSB_V_V0T0 + THEAD_VSB_V_V16T0 "add t0, t0, t4\n\t" - THEAD_VSB_V_V0T0 + THEAD_VSB_V_V24T0 : : "r" (datap) : "memory", "t0", "t4"); } else { asm volatile ( @@ -241,11 +241,11 @@ static inline void __riscv_v_vstate_restore(struct __riscv_v_ext_state *restore_ THEAD_VSETVLI_T4X0E8M8D1 THEAD_VLB_V_V0T0 "add t0, t0, t4\n\t" - THEAD_VLB_V_V0T0 + THEAD_VLB_V_V8T0 "add t0, t0, t4\n\t" - THEAD_VLB_V_V0T0 + THEAD_VLB_V_V16T0 "add t0, t0, t4\n\t" - THEAD_VLB_V_V0T0 + THEAD_VLB_V_V24T0 : : "r" (datap) : "memory", "t0", "t4"); } else { asm volatile ( diff --git a/arch/riscv/kernel/cpu_ops_sbi.c b/arch/riscv/kernel/cpu_ops_sbi.c index e6fbaaf54956..87d655944803 100644 --- a/arch/riscv/kernel/cpu_ops_sbi.c +++ b/arch/riscv/kernel/cpu_ops_sbi.c @@ -18,10 +18,10 @@ const struct cpu_operations cpu_ops_sbi; /* * Ordered booting via HSM brings one cpu at a time. However, cpu hotplug can - * be invoked from multiple threads in parallel. Define a per cpu data + * be invoked from multiple threads in parallel. Define an array of boot data * to handle that. */ -static DEFINE_PER_CPU(struct sbi_hart_boot_data, boot_data); +static struct sbi_hart_boot_data boot_data[NR_CPUS]; static int sbi_hsm_hart_start(unsigned long hartid, unsigned long saddr, unsigned long priv) @@ -67,7 +67,7 @@ static int sbi_cpu_start(unsigned int cpuid, struct task_struct *tidle) unsigned long boot_addr = __pa_symbol(secondary_start_sbi); unsigned long hartid = cpuid_to_hartid_map(cpuid); unsigned long hsm_data; - struct sbi_hart_boot_data *bdata = &per_cpu(boot_data, cpuid); + struct sbi_hart_boot_data *bdata = &boot_data[cpuid]; /* Make sure tidle is updated */ smp_mb(); diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index f7c9a1caa83e..14888e5ea19a 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -50,6 +50,7 @@ atomic_t hart_lottery __section(".sdata") #endif ; unsigned long boot_cpu_hartid; +EXPORT_SYMBOL_GPL(boot_cpu_hartid); /* * Place kernel memory regions on the resource tree so that diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c index dd8e4af6583f..93043924fe6c 100644 --- a/arch/riscv/kernel/traps_misaligned.c +++ b/arch/riscv/kernel/traps_misaligned.c @@ -454,7 +454,7 @@ static int handle_scalar_misaligned_load(struct pt_regs *regs) val.data_u64 = 0; if (user_mode(regs)) { - if (copy_from_user_nofault(&val, (u8 __user *)addr, len)) + if (copy_from_user(&val, (u8 __user *)addr, len)) return -1; } else { memcpy(&val, (u8 *)addr, len); @@ -555,7 +555,7 @@ static int handle_scalar_misaligned_store(struct pt_regs *regs) return -EOPNOTSUPP; if (user_mode(regs)) { - if (copy_to_user_nofault((u8 __user *)addr, &val, len)) + if (copy_to_user((u8 __user *)addr, &val, len)) return -1; } else { memcpy((u8 *)addr, &val, len); diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S index 7c15b0f4ee3b..c29ef12a63bb 100644 --- a/arch/riscv/kernel/vdso/vdso.lds.S +++ b/arch/riscv/kernel/vdso/vdso.lds.S @@ -30,7 +30,7 @@ SECTIONS *(.data .data.* .gnu.linkonce.d.*) *(.dynbss) *(.bss .bss.* .gnu.linkonce.b.*) - } + } :text .note : { *(.note.*) } :text :note diff --git a/arch/riscv/kernel/vendor_extensions/sifive.c b/arch/riscv/kernel/vendor_extensions/sifive.c index 1411337dc1e6..8fcf67e8c07f 100644 --- a/arch/riscv/kernel/vendor_extensions/sifive.c +++ b/arch/riscv/kernel/vendor_extensions/sifive.c @@ -8,7 +8,7 @@ #include <linux/types.h> /* All SiFive vendor extensions supported in Linux */ -const struct riscv_isa_ext_data riscv_isa_vendor_ext_sifive[] = { +static const struct riscv_isa_ext_data riscv_isa_vendor_ext_sifive[] = { __RISCV_ISA_EXT_DATA(xsfvfnrclipxfqf, RISCV_ISA_VENDOR_EXT_XSFVFNRCLIPXFQF), __RISCV_ISA_EXT_DATA(xsfvfwmaccqqq, RISCV_ISA_VENDOR_EXT_XSFVFWMACCQQQ), __RISCV_ISA_EXT_DATA(xsfvqmaccdod, RISCV_ISA_VENDOR_EXT_XSFVQMACCDOD), diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c index d229cbd2ba22..9b0d55be1239 100644 --- a/arch/s390/crypto/sha1_s390.c +++ b/arch/s390/crypto/sha1_s390.c @@ -38,6 +38,7 @@ static int s390_sha1_init(struct shash_desc *desc) sctx->state[4] = SHA1_H4; sctx->count = 0; sctx->func = CPACF_KIMD_SHA_1; + sctx->first_message_part = 0; return 0; } @@ -60,6 +61,7 @@ static int s390_sha1_import(struct shash_desc *desc, const void *in) sctx->count = ictx->count; memcpy(sctx->state, ictx->state, sizeof(ictx->state)); sctx->func = CPACF_KIMD_SHA_1; + sctx->first_message_part = 0; return 0; } diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c index 33711a29618c..6cbbf5e8555f 100644 --- a/arch/s390/crypto/sha512_s390.c +++ b/arch/s390/crypto/sha512_s390.c @@ -32,6 +32,7 @@ static int sha512_init(struct shash_desc *desc) ctx->count = 0; ctx->sha512.count_hi = 0; ctx->func = CPACF_KIMD_SHA_512; + ctx->first_message_part = 0; return 0; } @@ -57,6 +58,7 @@ static int sha512_import(struct shash_desc *desc, const void *in) memcpy(sctx->state, ictx->state, sizeof(ictx->state)); sctx->func = CPACF_KIMD_SHA_512; + sctx->first_message_part = 0; return 0; } @@ -97,6 +99,7 @@ static int sha384_init(struct shash_desc *desc) ctx->count = 0; ctx->sha512.count_hi = 0; ctx->func = CPACF_KIMD_SHA_512; + ctx->first_message_part = 0; return 0; } diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index 62c0ab4a4b9d..0905fa99a31e 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -265,7 +265,7 @@ static __always_inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *r addr = kernel_stack_pointer(regs) + n * sizeof(long); if (!regs_within_kernel_stack(regs, addr)) return 0; - return READ_ONCE_NOCHECK(addr); + return READ_ONCE_NOCHECK(*(unsigned long *)addr); } /** diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index 2fbee3887d13..d930416d4c90 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -54,6 +54,7 @@ static inline bool ers_result_indicates_abort(pci_ers_result_t ers_res) case PCI_ERS_RESULT_CAN_RECOVER: case PCI_ERS_RESULT_RECOVERED: case PCI_ERS_RESULT_NEED_RESET: + case PCI_ERS_RESULT_NONE: return false; default: return true; @@ -78,10 +79,6 @@ static bool is_driver_supported(struct pci_driver *driver) return false; if (!driver->err_handler->error_detected) return false; - if (!driver->err_handler->slot_reset) - return false; - if (!driver->err_handler->resume) - return false; return true; } @@ -106,6 +103,10 @@ static pci_ers_result_t zpci_event_do_error_state_clear(struct pci_dev *pdev, struct zpci_dev *zdev = to_zpci(pdev); int rc; + /* The underlying device may have been disabled by the event */ + if (!zdev_enabled(zdev)) + return PCI_ERS_RESULT_NEED_RESET; + pr_info("%s: Unblocking device access for examination\n", pci_name(pdev)); rc = zpci_reset_load_store_blocked(zdev); if (rc) { @@ -114,16 +115,18 @@ static pci_ers_result_t zpci_event_do_error_state_clear(struct pci_dev *pdev, return PCI_ERS_RESULT_NEED_RESET; } - if (driver->err_handler->mmio_enabled) { + if (driver->err_handler->mmio_enabled) ers_res = driver->err_handler->mmio_enabled(pdev); - if (ers_result_indicates_abort(ers_res)) { - pr_info("%s: Automatic recovery failed after MMIO re-enable\n", - pci_name(pdev)); - return ers_res; - } else if (ers_res == PCI_ERS_RESULT_NEED_RESET) { - pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev)); - return ers_res; - } + else + ers_res = PCI_ERS_RESULT_NONE; + + if (ers_result_indicates_abort(ers_res)) { + pr_info("%s: Automatic recovery failed after MMIO re-enable\n", + pci_name(pdev)); + return ers_res; + } else if (ers_res == PCI_ERS_RESULT_NEED_RESET) { + pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev)); + return ers_res; } pr_debug("%s: Unblocking DMA\n", pci_name(pdev)); @@ -150,7 +153,12 @@ static pci_ers_result_t zpci_event_do_reset(struct pci_dev *pdev, return ers_res; } pdev->error_state = pci_channel_io_normal; - ers_res = driver->err_handler->slot_reset(pdev); + + if (driver->err_handler->slot_reset) + ers_res = driver->err_handler->slot_reset(pdev); + else + ers_res = PCI_ERS_RESULT_NONE; + if (ers_result_indicates_abort(ers_res)) { pr_info("%s: Automatic recovery failed after slot reset\n", pci_name(pdev)); return ers_res; @@ -214,7 +222,7 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev) goto out_unlock; } - if (ers_res == PCI_ERS_RESULT_CAN_RECOVER) { + if (ers_res != PCI_ERS_RESULT_NEED_RESET) { ers_res = zpci_event_do_error_state_clear(pdev, driver); if (ers_result_indicates_abort(ers_res)) { status_str = "failed (abort on MMIO enable)"; @@ -225,6 +233,16 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev) if (ers_res == PCI_ERS_RESULT_NEED_RESET) ers_res = zpci_event_do_reset(pdev, driver); + /* + * ers_res can be PCI_ERS_RESULT_NONE either because the driver + * decided to return it, indicating that it abstains from voting + * on how to recover, or because it didn't implement the callback. + * Both cases assume, that if there is nothing else causing a + * disconnect, we recovered successfully. + */ + if (ers_res == PCI_ERS_RESULT_NONE) + ers_res = PCI_ERS_RESULT_RECOVERED; + if (ers_res != PCI_ERS_RESULT_RECOVERED) { pr_err("%s: Automatic recovery failed; operator intervention is required\n", pci_name(pdev)); @@ -273,6 +291,8 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf) struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid); struct pci_dev *pdev = NULL; pci_ers_result_t ers_res; + u32 fh = 0; + int rc; zpci_dbg(3, "err fid:%x, fh:%x, pec:%x\n", ccdf->fid, ccdf->fh, ccdf->pec); @@ -281,6 +301,15 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf) if (zdev) { mutex_lock(&zdev->state_lock); + rc = clp_refresh_fh(zdev->fid, &fh); + if (rc) + goto no_pdev; + if (!fh || ccdf->fh != fh) { + /* Ignore events with stale handles */ + zpci_dbg(3, "err fid:%x, fh:%x (stale %x)\n", + ccdf->fid, fh, ccdf->fh); + goto no_pdev; + } zpci_update_fh(zdev, ccdf->fh); if (zdev->zbus->bus) pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn); diff --git a/arch/um/drivers/ubd_user.c b/arch/um/drivers/ubd_user.c index c5e6545f6fcf..8e8a8bf518b6 100644 --- a/arch/um/drivers/ubd_user.c +++ b/arch/um/drivers/ubd_user.c @@ -41,7 +41,7 @@ int start_io_thread(struct os_helper_thread **td_out, int *fd_out) *fd_out = fds[1]; err = os_set_fd_block(*fd_out, 0); - err = os_set_fd_block(kernel_fd, 0); + err |= os_set_fd_block(kernel_fd, 0); if (err) { printk("start_io_thread - failed to set nonblocking I/O.\n"); goto out_close; diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c index f292e0b4ff8b..9bbbddfe866b 100644 --- a/arch/um/drivers/vector_kern.c +++ b/arch/um/drivers/vector_kern.c @@ -1625,35 +1625,19 @@ static void vector_eth_configure( device->dev = dev; - *vp = ((struct vector_private) - { - .list = LIST_HEAD_INIT(vp->list), - .dev = dev, - .unit = n, - .options = get_transport_options(def), - .rx_irq = 0, - .tx_irq = 0, - .parsed = def, - .max_packet = get_mtu(def) + ETH_HEADER_OTHER, - /* TODO - we need to calculate headroom so that ip header - * is 16 byte aligned all the time - */ - .headroom = get_headroom(def), - .form_header = NULL, - .verify_header = NULL, - .header_rxbuffer = NULL, - .header_txbuffer = NULL, - .header_size = 0, - .rx_header_size = 0, - .rexmit_scheduled = false, - .opened = false, - .transport_data = NULL, - .in_write_poll = false, - .coalesce = 2, - .req_size = get_req_size(def), - .in_error = false, - .bpf = NULL - }); + INIT_LIST_HEAD(&vp->list); + vp->dev = dev; + vp->unit = n; + vp->options = get_transport_options(def); + vp->parsed = def; + vp->max_packet = get_mtu(def) + ETH_HEADER_OTHER; + /* + * TODO - we need to calculate headroom so that ip header + * is 16 byte aligned all the time + */ + vp->headroom = get_headroom(def); + vp->coalesce = 2; + vp->req_size = get_req_size(def); dev->features = dev->hw_features = (NETIF_F_SG | NETIF_F_FRAGLIST); INIT_WORK(&vp->reset_tx, vector_reset_tx); diff --git a/arch/um/drivers/vfio_kern.c b/arch/um/drivers/vfio_kern.c index b51fc9888ae1..13b971a2bd43 100644 --- a/arch/um/drivers/vfio_kern.c +++ b/arch/um/drivers/vfio_kern.c @@ -570,6 +570,17 @@ static void uml_vfio_release_device(struct uml_vfio_device *dev) kfree(dev); } +static struct uml_vfio_device *uml_vfio_find_device(const char *device) +{ + struct uml_vfio_device *dev; + + list_for_each_entry(dev, ¨_vfio_devices, list) { + if (!strcmp(dev->name, device)) + return dev; + } + return NULL; +} + static int uml_vfio_cmdline_set(const char *device, const struct kernel_param *kp) { struct uml_vfio_device *dev; @@ -582,6 +593,9 @@ static int uml_vfio_cmdline_set(const char *device, const struct kernel_param *k uml_vfio_container.fd = fd; } + if (uml_vfio_find_device(device)) + return -EEXIST; + dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) return -ENOMEM; diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 71019b3b54ea..8bed9030ad47 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -147,7 +147,7 @@ config X86 select ARCH_WANTS_DYNAMIC_TASK_STRUCT select ARCH_WANTS_NO_INSTR select ARCH_WANT_GENERAL_HUGETLB - select ARCH_WANT_HUGE_PMD_SHARE + select ARCH_WANT_HUGE_PMD_SHARE if X86_64 select ARCH_WANT_LD_ORPHAN_WARN select ARCH_WANT_OPTIMIZE_DAX_VMEMMAP if X86_64 select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP if X86_64 @@ -2695,6 +2695,15 @@ config MITIGATION_ITS disabled, mitigation cannot be enabled via cmdline. See <file:Documentation/admin-guide/hw-vuln/indirect-target-selection.rst> +config MITIGATION_TSA + bool "Mitigate Transient Scheduler Attacks" + depends on CPU_SUP_AMD + default y + help + Enable mitigation for Transient Scheduler Attacks. TSA is a hardware + security vulnerability on AMD CPUs which can lead to forwarding of + invalid info to subsequent instructions and thus can affect their + timing and thereby cause a leakage. endif config ARCH_HAS_ADD_PAGES diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c index b6db4e0b936b..7543a8b52c67 100644 --- a/arch/x86/coco/sev/core.c +++ b/arch/x86/coco/sev/core.c @@ -88,7 +88,7 @@ static const char * const sev_status_feat_names[] = { */ static u64 snp_tsc_scale __ro_after_init; static u64 snp_tsc_offset __ro_after_init; -static u64 snp_tsc_freq_khz __ro_after_init; +static unsigned long snp_tsc_freq_khz __ro_after_init; DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data); DEFINE_PER_CPU(struct sev_es_save_area *, sev_vmsa); @@ -2167,15 +2167,31 @@ static unsigned long securetsc_get_tsc_khz(void) void __init snp_secure_tsc_init(void) { - unsigned long long tsc_freq_mhz; + struct snp_secrets_page *secrets; + unsigned long tsc_freq_mhz; + void *mem; if (!cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC)) return; + mem = early_memremap_encrypted(sev_secrets_pa, PAGE_SIZE); + if (!mem) { + pr_err("Unable to get TSC_FACTOR: failed to map the SNP secrets page.\n"); + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SECURE_TSC); + } + + secrets = (__force struct snp_secrets_page *)mem; + setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ); rdmsrq(MSR_AMD64_GUEST_TSC_FREQ, tsc_freq_mhz); - snp_tsc_freq_khz = (unsigned long)(tsc_freq_mhz * 1000); + + /* Extract the GUEST TSC MHZ from BIT[17:0], rest is reserved space */ + tsc_freq_mhz &= GENMASK_ULL(17, 0); + + snp_tsc_freq_khz = SNP_SCALE_TSC_FREQ(tsc_freq_mhz * 1000, secrets->tsc_factor); x86_platform.calibrate_cpu = securetsc_get_tsc_khz; x86_platform.calibrate_tsc = securetsc_get_tsc_khz; + + early_memunmap(mem, PAGE_SIZE); } diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S index 175958b02f2b..8e9a0cc20a4a 100644 --- a/arch/x86/entry/entry.S +++ b/arch/x86/entry/entry.S @@ -36,20 +36,20 @@ EXPORT_SYMBOL_GPL(write_ibpb); /* * Define the VERW operand that is disguised as entry code so that - * it can be referenced with KPTI enabled. This ensure VERW can be + * it can be referenced with KPTI enabled. This ensures VERW can be * used late in exit-to-user path after page tables are switched. */ .pushsection .entry.text, "ax" .align L1_CACHE_BYTES, 0xcc -SYM_CODE_START_NOALIGN(mds_verw_sel) +SYM_CODE_START_NOALIGN(x86_verw_sel) UNWIND_HINT_UNDEFINED ANNOTATE_NOENDBR .word __KERNEL_DS .align L1_CACHE_BYTES, 0xcc -SYM_CODE_END(mds_verw_sel); +SYM_CODE_END(x86_verw_sel); /* For KVM */ -EXPORT_SYMBOL_GPL(mds_verw_sel); +EXPORT_SYMBOL_GPL(x86_verw_sel); .popsection diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index ee176236c2be..286d509f9363 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -456,6 +456,7 @@ #define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* No Nested Data Breakpoints */ #define X86_FEATURE_WRMSR_XX_BASE_NS (20*32+ 1) /* WRMSR to {FS,GS,KERNEL_GS}_BASE is non-serializing */ #define X86_FEATURE_LFENCE_RDTSC (20*32+ 2) /* LFENCE always serializing / synchronizes RDTSC */ +#define X86_FEATURE_VERW_CLEAR (20*32+ 5) /* The memory form of VERW mitigates TSA */ #define X86_FEATURE_NULL_SEL_CLR_BASE (20*32+ 6) /* Null Selector Clears Base */ #define X86_FEATURE_AUTOIBRS (20*32+ 8) /* Automatic IBRS */ #define X86_FEATURE_NO_SMM_CTL_MSR (20*32+ 9) /* SMM_CTL MSR is not present */ @@ -487,6 +488,9 @@ #define X86_FEATURE_PREFER_YMM (21*32+ 8) /* Avoid ZMM registers due to downclocking */ #define X86_FEATURE_APX (21*32+ 9) /* Advanced Performance Extensions */ #define X86_FEATURE_INDIRECT_THUNK_ITS (21*32+10) /* Use thunk for indirect branches in lower half of cacheline */ +#define X86_FEATURE_TSA_SQ_NO (21*32+11) /* AMD CPU not vulnerable to TSA-SQ */ +#define X86_FEATURE_TSA_L1_NO (21*32+12) /* AMD CPU not vulnerable to TSA-L1 */ +#define X86_FEATURE_CLEAR_CPU_BUF_VM (21*32+13) /* Clear CPU buffers using VERW before VMRUN */ /* * BUG word(s) @@ -542,5 +546,5 @@ #define X86_BUG_OLD_MICROCODE X86_BUG( 1*32+ 6) /* "old_microcode" CPU has old microcode, it is surely vulnerable to something */ #define X86_BUG_ITS X86_BUG( 1*32+ 7) /* "its" CPU is affected by Indirect Target Selection */ #define X86_BUG_ITS_NATIVE_ONLY X86_BUG( 1*32+ 8) /* "its_native_only" CPU is affected by ITS, VMX is not affected */ - +#define X86_BUG_TSA X86_BUG( 1*32+ 9) /* "tsa" CPU is affected by Transient Scheduler Attacks */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index 363110e6b2e3..a2c1f2d24b64 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h @@ -9,6 +9,14 @@ #include <asm/cpufeature.h> #include <asm/msr.h> +/* + * Define bits that are always set to 1 in DR7, only bit 10 is + * architecturally reserved to '1'. + * + * This is also the init/reset value for DR7. + */ +#define DR7_FIXED_1 0x00000400 + DECLARE_PER_CPU(unsigned long, cpu_dr7); #ifndef CONFIG_PARAVIRT_XXL @@ -100,8 +108,8 @@ static __always_inline void native_set_debugreg(int regno, unsigned long value) static inline void hw_breakpoint_disable(void) { - /* Zero the control register for HW Breakpoint */ - set_debugreg(0UL, 7); + /* Reset the control register for HW Breakpoint */ + set_debugreg(DR7_FIXED_1, 7); /* Zero-out the individual HW breakpoint address registers */ set_debugreg(0UL, 0); @@ -125,9 +133,12 @@ static __always_inline unsigned long local_db_save(void) return 0; get_debugreg(dr7, 7); - dr7 &= ~0x400; /* architecturally set bit */ + + /* Architecturally set bit */ + dr7 &= ~DR7_FIXED_1; if (dr7) - set_debugreg(0, 7); + set_debugreg(DR7_FIXED_1, 7); + /* * Ensure the compiler doesn't lower the above statements into * the critical section; disabling breakpoints late would not diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index 9a9b21b78905..b30e5474c18e 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -44,13 +44,13 @@ static __always_inline void native_irq_enable(void) static __always_inline void native_safe_halt(void) { - mds_idle_clear_cpu_buffers(); + x86_idle_clear_cpu_buffers(); asm volatile("sti; hlt": : :"memory"); } static __always_inline void native_halt(void) { - mds_idle_clear_cpu_buffers(); + x86_idle_clear_cpu_buffers(); asm volatile("hlt": : :"memory"); } diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index b4a391929cdb..f7af967aa16f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -31,6 +31,7 @@ #include <asm/apic.h> #include <asm/pvclock-abi.h> +#include <asm/debugreg.h> #include <asm/desc.h> #include <asm/mtrr.h> #include <asm/msr-index.h> @@ -249,7 +250,6 @@ enum x86_intercept_stage; #define DR7_BP_EN_MASK 0x000000ff #define DR7_GE (1 << 9) #define DR7_GD (1 << 13) -#define DR7_FIXED_1 0x00000400 #define DR7_VOLATILE 0xffff2bff #define KVM_GUESTDBG_VALID_MASK \ @@ -700,8 +700,13 @@ struct kvm_vcpu_hv { struct kvm_vcpu_hv_tlb_flush_fifo tlb_flush_fifo[HV_NR_TLB_FLUSH_FIFOS]; - /* Preallocated buffer for handling hypercalls passing sparse vCPU set */ + /* + * Preallocated buffers for handling hypercalls that pass sparse vCPU + * sets (for high vCPU counts, they're too large to comfortably fit on + * the stack). + */ u64 sparse_banks[HV_MAX_SPARSE_VCPU_BANKS]; + DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS); struct hv_vp_assist_page vp_assist_page; @@ -764,6 +769,7 @@ enum kvm_only_cpuid_leafs { CPUID_8000_0022_EAX, CPUID_7_2_EDX, CPUID_24_0_EBX, + CPUID_8000_0021_ECX, NR_KVM_CPU_CAPS, NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS, diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index b7dded3c8113..5cfb5d74dd5f 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -628,6 +628,7 @@ #define MSR_AMD64_OSVW_STATUS 0xc0010141 #define MSR_AMD_PPIN_CTL 0xc00102f0 #define MSR_AMD_PPIN 0xc00102f1 +#define MSR_AMD64_CPUID_FN_7 0xc0011002 #define MSR_AMD64_CPUID_FN_1 0xc0011004 #define MSR_AMD64_LS_CFG 0xc0011020 #define MSR_AMD64_DC_CFG 0xc0011022 diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index dd2b129b0418..6ca6516c7492 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -43,8 +43,6 @@ static __always_inline void __monitorx(const void *eax, u32 ecx, u32 edx) static __always_inline void __mwait(u32 eax, u32 ecx) { - mds_idle_clear_cpu_buffers(); - /* * Use the instruction mnemonic with implicit operands, as the LLVM * assembler fails to assemble the mnemonic with explicit operands: @@ -80,7 +78,7 @@ static __always_inline void __mwait(u32 eax, u32 ecx) */ static __always_inline void __mwaitx(u32 eax, u32 ebx, u32 ecx) { - /* No MDS buffer clear as this is AMD/HYGON only */ + /* No need for TSA buffer clearing on AMD */ /* "mwaitx %eax, %ebx, %ecx" */ asm volatile(".byte 0x0f, 0x01, 0xfb" @@ -98,7 +96,6 @@ static __always_inline void __mwaitx(u32 eax, u32 ebx, u32 ecx) */ static __always_inline void __sti_mwait(u32 eax, u32 ecx) { - mds_idle_clear_cpu_buffers(); asm volatile("sti; mwait" :: "a" (eax), "c" (ecx)); } @@ -115,21 +112,29 @@ static __always_inline void __sti_mwait(u32 eax, u32 ecx) */ static __always_inline void mwait_idle_with_hints(u32 eax, u32 ecx) { + if (need_resched()) + return; + + x86_idle_clear_cpu_buffers(); + if (static_cpu_has_bug(X86_BUG_MONITOR) || !current_set_polling_and_test()) { const void *addr = ¤t_thread_info()->flags; alternative_input("", "clflush (%[addr])", X86_BUG_CLFLUSH_MONITOR, [addr] "a" (addr)); __monitor(addr, 0, 0); - if (!need_resched()) { - if (ecx & 1) { - __mwait(eax, ecx); - } else { - __sti_mwait(eax, ecx); - raw_local_irq_disable(); - } + if (need_resched()) + goto out; + + if (ecx & 1) { + __mwait(eax, ecx); + } else { + __sti_mwait(eax, ecx); + raw_local_irq_disable(); } } + +out: current_clr_polling(); } diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 20d754b98f3f..10f261678749 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -302,25 +302,31 @@ .endm /* - * Macro to execute VERW instruction that mitigate transient data sampling - * attacks such as MDS. On affected systems a microcode update overloaded VERW - * instruction to also clear the CPU buffers. VERW clobbers CFLAGS.ZF. - * + * Macro to execute VERW insns that mitigate transient data sampling + * attacks such as MDS or TSA. On affected systems a microcode update + * overloaded VERW insns to also clear the CPU buffers. VERW clobbers + * CFLAGS.ZF. * Note: Only the memory operand variant of VERW clears the CPU buffers. */ -.macro CLEAR_CPU_BUFFERS +.macro __CLEAR_CPU_BUFFERS feature #ifdef CONFIG_X86_64 - ALTERNATIVE "", "verw mds_verw_sel(%rip)", X86_FEATURE_CLEAR_CPU_BUF + ALTERNATIVE "", "verw x86_verw_sel(%rip)", \feature #else /* * In 32bit mode, the memory operand must be a %cs reference. The data * segments may not be usable (vm86 mode), and the stack segment may not * be flat (ESPFIX32). */ - ALTERNATIVE "", "verw %cs:mds_verw_sel", X86_FEATURE_CLEAR_CPU_BUF + ALTERNATIVE "", "verw %cs:x86_verw_sel", \feature #endif .endm +#define CLEAR_CPU_BUFFERS \ + __CLEAR_CPU_BUFFERS X86_FEATURE_CLEAR_CPU_BUF + +#define VM_CLEAR_CPU_BUFFERS \ + __CLEAR_CPU_BUFFERS X86_FEATURE_CLEAR_CPU_BUF_VM + #ifdef CONFIG_X86_64 .macro CLEAR_BRANCH_HISTORY ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP @@ -567,24 +573,24 @@ DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb); DECLARE_STATIC_KEY_FALSE(switch_vcpu_ibpb); -DECLARE_STATIC_KEY_FALSE(mds_idle_clear); +DECLARE_STATIC_KEY_FALSE(cpu_buf_idle_clear); DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush); DECLARE_STATIC_KEY_FALSE(cpu_buf_vm_clear); -extern u16 mds_verw_sel; +extern u16 x86_verw_sel; #include <asm/segment.h> /** - * mds_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability + * x86_clear_cpu_buffers - Buffer clearing support for different x86 CPU vulns * * This uses the otherwise unused and obsolete VERW instruction in * combination with microcode which triggers a CPU buffer flush when the * instruction is executed. */ -static __always_inline void mds_clear_cpu_buffers(void) +static __always_inline void x86_clear_cpu_buffers(void) { static const u16 ds = __KERNEL_DS; @@ -601,14 +607,15 @@ static __always_inline void mds_clear_cpu_buffers(void) } /** - * mds_idle_clear_cpu_buffers - Mitigation for MDS vulnerability + * x86_idle_clear_cpu_buffers - Buffer clearing support in idle for the MDS + * and TSA vulnerabilities. * * Clear CPU buffers if the corresponding static key is enabled */ -static __always_inline void mds_idle_clear_cpu_buffers(void) +static __always_inline void x86_idle_clear_cpu_buffers(void) { - if (static_branch_likely(&mds_idle_clear)) - mds_clear_cpu_buffers(); + if (static_branch_likely(&cpu_buf_idle_clear)) + x86_clear_cpu_buffers(); } #endif /* __ASSEMBLER__ */ diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 58e028d42e41..a631f7d7c0c0 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -223,6 +223,18 @@ struct snp_tsc_info_resp { u8 rsvd2[100]; } __packed; +/* + * Obtain the mean TSC frequency by decreasing the nominal TSC frequency with + * TSC_FACTOR as documented in the SNP Firmware ABI specification: + * + * GUEST_TSC_FREQ * (1 - (TSC_FACTOR * 0.00001)) + * + * which is equivalent to: + * + * GUEST_TSC_FREQ -= (GUEST_TSC_FREQ * TSC_FACTOR) / 100000; + */ +#define SNP_SCALE_TSC_FREQ(freq, factor) ((freq) - (freq) * (factor) / 100000) + struct snp_guest_req { void *req_buf; size_t req_sz; @@ -282,8 +294,11 @@ struct snp_secrets_page { u8 svsm_guest_vmpl; u8 rsvd3[3]; + /* The percentage decrease from nominal to mean TSC frequency. */ + u32 tsc_factor; + /* Remainder of page */ - u8 rsvd4[3744]; + u8 rsvd4[3740]; } __packed; struct snp_msg_desc { diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h index d8525e6ef50a..8bc074c8d7c6 100644 --- a/arch/x86/include/asm/shared/tdx.h +++ b/arch/x86/include/asm/shared/tdx.h @@ -72,6 +72,7 @@ #define TDVMCALL_MAP_GPA 0x10001 #define TDVMCALL_GET_QUOTE 0x10002 #define TDVMCALL_REPORT_FATAL_ERROR 0x10003 +#define TDVMCALL_SETUP_EVENT_NOTIFY_INTERRUPT 0x10004ULL /* * TDG.VP.VMCALL Status Codes (returned in R10) diff --git a/arch/x86/include/uapi/asm/debugreg.h b/arch/x86/include/uapi/asm/debugreg.h index 0007ba077c0c..41da492dfb01 100644 --- a/arch/x86/include/uapi/asm/debugreg.h +++ b/arch/x86/include/uapi/asm/debugreg.h @@ -15,7 +15,26 @@ which debugging register was responsible for the trap. The other bits are either reserved or not of interest to us. */ -/* Define reserved bits in DR6 which are always set to 1 */ +/* + * Define bits in DR6 which are set to 1 by default. + * + * This is also the DR6 architectural value following Power-up, Reset or INIT. + * + * Note, with the introduction of Bus Lock Detection (BLD) and Restricted + * Transactional Memory (RTM), the DR6 register has been modified: + * + * 1) BLD flag (bit 11) is no longer reserved to 1 if the CPU supports + * Bus Lock Detection. The assertion of a bus lock could clear it. + * + * 2) RTM flag (bit 16) is no longer reserved to 1 if the CPU supports + * restricted transactional memory. #DB occurred inside an RTM region + * could clear it. + * + * Apparently, DR6.BLD and DR6.RTM are active low bits. + * + * As a result, DR6_RESERVED is an incorrect name now, but it is kept for + * compatibility. + */ #define DR6_RESERVED (0xFFFF0FF0) #define DR_TRAP0 (0x1) /* db0 */ diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index 6f3499507c5e..0f15d683817d 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -965,7 +965,13 @@ struct kvm_tdx_cmd { struct kvm_tdx_capabilities { __u64 supported_attrs; __u64 supported_xfam; - __u64 reserved[254]; + + __u64 kernel_tdvmcallinfo_1_r11; + __u64 user_tdvmcallinfo_1_r11; + __u64 kernel_tdvmcallinfo_1_r12; + __u64 user_tdvmcallinfo_1_r12; + + __u64 reserved[250]; /* Configurable CPUID bits for userspace */ struct kvm_cpuid2 cpuid; diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index b2ad8d13211a..329ee185d8cc 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -9,7 +9,7 @@ #include <linux/sched/clock.h> #include <linux/random.h> #include <linux/topology.h> -#include <asm/amd/fch.h> +#include <linux/platform_data/x86/amd-fch.h> #include <asm/processor.h> #include <asm/apic.h> #include <asm/cacheinfo.h> @@ -377,6 +377,47 @@ static void bsp_determine_snp(struct cpuinfo_x86 *c) #endif } +#define ZEN_MODEL_STEP_UCODE(fam, model, step, ucode) \ + X86_MATCH_VFM_STEPS(VFM_MAKE(X86_VENDOR_AMD, fam, model), \ + step, step, ucode) + +static const struct x86_cpu_id amd_tsa_microcode[] = { + ZEN_MODEL_STEP_UCODE(0x19, 0x01, 0x1, 0x0a0011d7), + ZEN_MODEL_STEP_UCODE(0x19, 0x01, 0x2, 0x0a00123b), + ZEN_MODEL_STEP_UCODE(0x19, 0x08, 0x2, 0x0a00820d), + ZEN_MODEL_STEP_UCODE(0x19, 0x11, 0x1, 0x0a10114c), + ZEN_MODEL_STEP_UCODE(0x19, 0x11, 0x2, 0x0a10124c), + ZEN_MODEL_STEP_UCODE(0x19, 0x18, 0x1, 0x0a108109), + ZEN_MODEL_STEP_UCODE(0x19, 0x21, 0x0, 0x0a20102e), + ZEN_MODEL_STEP_UCODE(0x19, 0x21, 0x2, 0x0a201211), + ZEN_MODEL_STEP_UCODE(0x19, 0x44, 0x1, 0x0a404108), + ZEN_MODEL_STEP_UCODE(0x19, 0x50, 0x0, 0x0a500012), + ZEN_MODEL_STEP_UCODE(0x19, 0x61, 0x2, 0x0a60120a), + ZEN_MODEL_STEP_UCODE(0x19, 0x74, 0x1, 0x0a704108), + ZEN_MODEL_STEP_UCODE(0x19, 0x75, 0x2, 0x0a705208), + ZEN_MODEL_STEP_UCODE(0x19, 0x78, 0x0, 0x0a708008), + ZEN_MODEL_STEP_UCODE(0x19, 0x7c, 0x0, 0x0a70c008), + ZEN_MODEL_STEP_UCODE(0x19, 0xa0, 0x2, 0x0aa00216), + {}, +}; + +static void tsa_init(struct cpuinfo_x86 *c) +{ + if (cpu_has(c, X86_FEATURE_HYPERVISOR)) + return; + + if (cpu_has(c, X86_FEATURE_ZEN3) || + cpu_has(c, X86_FEATURE_ZEN4)) { + if (x86_match_min_microcode_rev(amd_tsa_microcode)) + setup_force_cpu_cap(X86_FEATURE_VERW_CLEAR); + else + pr_debug("%s: current revision: 0x%x\n", __func__, c->microcode); + } else { + setup_force_cpu_cap(X86_FEATURE_TSA_SQ_NO); + setup_force_cpu_cap(X86_FEATURE_TSA_L1_NO); + } +} + static void bsp_init_amd(struct cpuinfo_x86 *c) { if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) { @@ -489,6 +530,9 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) } bsp_determine_snp(c); + + tsa_init(c); + return; warn: @@ -930,6 +974,16 @@ static void init_amd_zen2(struct cpuinfo_x86 *c) init_spectral_chicken(c); fix_erratum_1386(c); zen2_zenbleed_check(c); + + /* Disable RDSEED on AMD Cyan Skillfish because of an error. */ + if (c->x86_model == 0x47 && c->x86_stepping == 0x0) { + clear_cpu_cap(c, X86_FEATURE_RDSEED); + msr_clear_bit(MSR_AMD64_CPUID_FN_7, 18); + pr_emerg("RDSEED is not reliable on this platform; disabling.\n"); + } + + /* Correct misconfigured CPUID on some clients. */ + clear_cpu_cap(c, X86_FEATURE_INVLPGB); } static void init_amd_zen3(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 7f94e6a5497d..f4d3abb12317 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -94,6 +94,8 @@ static void __init bhi_apply_mitigation(void); static void __init its_select_mitigation(void); static void __init its_update_mitigation(void); static void __init its_apply_mitigation(void); +static void __init tsa_select_mitigation(void); +static void __init tsa_apply_mitigation(void); /* The base value of the SPEC_CTRL MSR without task-specific bits set */ u64 x86_spec_ctrl_base; @@ -169,9 +171,9 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb); DEFINE_STATIC_KEY_FALSE(switch_vcpu_ibpb); EXPORT_SYMBOL_GPL(switch_vcpu_ibpb); -/* Control MDS CPU buffer clear before idling (halt, mwait) */ -DEFINE_STATIC_KEY_FALSE(mds_idle_clear); -EXPORT_SYMBOL_GPL(mds_idle_clear); +/* Control CPU buffer clear before idling (halt, mwait) */ +DEFINE_STATIC_KEY_FALSE(cpu_buf_idle_clear); +EXPORT_SYMBOL_GPL(cpu_buf_idle_clear); /* * Controls whether l1d flush based mitigations are enabled, @@ -225,6 +227,7 @@ void __init cpu_select_mitigations(void) gds_select_mitigation(); its_select_mitigation(); bhi_select_mitigation(); + tsa_select_mitigation(); /* * After mitigations are selected, some may need to update their @@ -272,6 +275,7 @@ void __init cpu_select_mitigations(void) gds_apply_mitigation(); its_apply_mitigation(); bhi_apply_mitigation(); + tsa_apply_mitigation(); } /* @@ -637,7 +641,7 @@ static void __init mmio_apply_mitigation(void) * is required irrespective of SMT state. */ if (!(x86_arch_cap_msr & ARCH_CAP_FBSDP_NO)) - static_branch_enable(&mds_idle_clear); + static_branch_enable(&cpu_buf_idle_clear); if (mmio_nosmt || cpu_mitigations_auto_nosmt()) cpu_smt_disable(false); @@ -1488,6 +1492,94 @@ static void __init its_apply_mitigation(void) } #undef pr_fmt +#define pr_fmt(fmt) "Transient Scheduler Attacks: " fmt + +enum tsa_mitigations { + TSA_MITIGATION_NONE, + TSA_MITIGATION_AUTO, + TSA_MITIGATION_UCODE_NEEDED, + TSA_MITIGATION_USER_KERNEL, + TSA_MITIGATION_VM, + TSA_MITIGATION_FULL, +}; + +static const char * const tsa_strings[] = { + [TSA_MITIGATION_NONE] = "Vulnerable", + [TSA_MITIGATION_UCODE_NEEDED] = "Vulnerable: No microcode", + [TSA_MITIGATION_USER_KERNEL] = "Mitigation: Clear CPU buffers: user/kernel boundary", + [TSA_MITIGATION_VM] = "Mitigation: Clear CPU buffers: VM", + [TSA_MITIGATION_FULL] = "Mitigation: Clear CPU buffers", +}; + +static enum tsa_mitigations tsa_mitigation __ro_after_init = + IS_ENABLED(CONFIG_MITIGATION_TSA) ? TSA_MITIGATION_AUTO : TSA_MITIGATION_NONE; + +static int __init tsa_parse_cmdline(char *str) +{ + if (!str) + return -EINVAL; + + if (!strcmp(str, "off")) + tsa_mitigation = TSA_MITIGATION_NONE; + else if (!strcmp(str, "on")) + tsa_mitigation = TSA_MITIGATION_FULL; + else if (!strcmp(str, "user")) + tsa_mitigation = TSA_MITIGATION_USER_KERNEL; + else if (!strcmp(str, "vm")) + tsa_mitigation = TSA_MITIGATION_VM; + else + pr_err("Ignoring unknown tsa=%s option.\n", str); + + return 0; +} +early_param("tsa", tsa_parse_cmdline); + +static void __init tsa_select_mitigation(void) +{ + if (cpu_mitigations_off() || !boot_cpu_has_bug(X86_BUG_TSA)) { + tsa_mitigation = TSA_MITIGATION_NONE; + return; + } + + if (tsa_mitigation == TSA_MITIGATION_NONE) + return; + + if (!boot_cpu_has(X86_FEATURE_VERW_CLEAR)) { + tsa_mitigation = TSA_MITIGATION_UCODE_NEEDED; + goto out; + } + + if (tsa_mitigation == TSA_MITIGATION_AUTO) + tsa_mitigation = TSA_MITIGATION_FULL; + + /* + * No need to set verw_clear_cpu_buf_mitigation_selected - it + * doesn't fit all cases here and it is not needed because this + * is the only VERW-based mitigation on AMD. + */ +out: + pr_info("%s\n", tsa_strings[tsa_mitigation]); +} + +static void __init tsa_apply_mitigation(void) +{ + switch (tsa_mitigation) { + case TSA_MITIGATION_USER_KERNEL: + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); + break; + case TSA_MITIGATION_VM: + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF_VM); + break; + case TSA_MITIGATION_FULL: + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF_VM); + break; + default: + break; + } +} + +#undef pr_fmt #define pr_fmt(fmt) "Spectre V2 : " fmt static enum spectre_v2_user_mitigation spectre_v2_user_stibp __ro_after_init = @@ -2249,10 +2341,10 @@ static void update_mds_branch_idle(void) return; if (sched_smt_active()) { - static_branch_enable(&mds_idle_clear); + static_branch_enable(&cpu_buf_idle_clear); } else if (mmio_mitigation == MMIO_MITIGATION_OFF || (x86_arch_cap_msr & ARCH_CAP_FBSDP_NO)) { - static_branch_disable(&mds_idle_clear); + static_branch_disable(&cpu_buf_idle_clear); } } @@ -2316,6 +2408,25 @@ void cpu_bugs_smt_update(void) break; } + switch (tsa_mitigation) { + case TSA_MITIGATION_USER_KERNEL: + case TSA_MITIGATION_VM: + case TSA_MITIGATION_AUTO: + case TSA_MITIGATION_FULL: + /* + * TSA-SQ can potentially lead to info leakage between + * SMT threads. + */ + if (sched_smt_active()) + static_branch_enable(&cpu_buf_idle_clear); + else + static_branch_disable(&cpu_buf_idle_clear); + break; + case TSA_MITIGATION_NONE: + case TSA_MITIGATION_UCODE_NEEDED: + break; + } + mutex_unlock(&spec_ctrl_mutex); } @@ -3265,6 +3376,11 @@ static ssize_t gds_show_state(char *buf) return sysfs_emit(buf, "%s\n", gds_strings[gds_mitigation]); } +static ssize_t tsa_show_state(char *buf) +{ + return sysfs_emit(buf, "%s\n", tsa_strings[tsa_mitigation]); +} + static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, char *buf, unsigned int bug) { @@ -3328,6 +3444,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr case X86_BUG_ITS: return its_show_state(buf); + case X86_BUG_TSA: + return tsa_show_state(buf); + default: break; } @@ -3414,6 +3533,11 @@ ssize_t cpu_show_indirect_target_selection(struct device *dev, struct device_att { return cpu_show_common(dev, attr, buf, X86_BUG_ITS); } + +ssize_t cpu_show_tsa(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_TSA); +} #endif void __warn_thunk(void) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 8feb8fd2957a..fb50c1dd53ef 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1233,6 +1233,8 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { #define ITS BIT(8) /* CPU is affected by Indirect Target Selection, but guest-host isolation is not affected */ #define ITS_NATIVE_ONLY BIT(9) +/* CPU is affected by Transient Scheduler Attacks */ +#define TSA BIT(10) static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPS(INTEL_IVYBRIDGE, X86_STEP_MAX, SRBDS), @@ -1280,7 +1282,7 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_AMD(0x16, RETBLEED), VULNBL_AMD(0x17, RETBLEED | SMT_RSB | SRSO), VULNBL_HYGON(0x18, RETBLEED | SMT_RSB | SRSO), - VULNBL_AMD(0x19, SRSO), + VULNBL_AMD(0x19, SRSO | TSA), VULNBL_AMD(0x1a, SRSO), {} }; @@ -1530,6 +1532,16 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) setup_force_cpu_bug(X86_BUG_ITS_NATIVE_ONLY); } + if (c->x86_vendor == X86_VENDOR_AMD) { + if (!cpu_has(c, X86_FEATURE_TSA_SQ_NO) || + !cpu_has(c, X86_FEATURE_TSA_L1_NO)) { + if (cpu_matches(cpu_vuln_blacklist, TSA) || + /* Enable bug on Zen guests to allow for live migration. */ + (cpu_has(c, X86_FEATURE_HYPERVISOR) && cpu_has(c, X86_FEATURE_ZEN))) + setup_force_cpu_bug(X86_BUG_TSA); + } + } + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) return; @@ -2243,20 +2255,16 @@ EXPORT_PER_CPU_SYMBOL(__stack_chk_guard); #endif #endif -/* - * Clear all 6 debug registers: - */ -static void clear_all_debug_regs(void) +static void initialize_debug_regs(void) { - int i; - - for (i = 0; i < 8; i++) { - /* Ignore db4, db5 */ - if ((i == 4) || (i == 5)) - continue; - - set_debugreg(0, i); - } + /* Control register first -- to make sure everything is disabled. */ + set_debugreg(DR7_FIXED_1, 7); + set_debugreg(DR6_RESERVED, 6); + /* dr5 and dr4 don't exist */ + set_debugreg(0, 3); + set_debugreg(0, 2); + set_debugreg(0, 1); + set_debugreg(0, 0); } #ifdef CONFIG_KGDB @@ -2417,7 +2425,7 @@ void cpu_init(void) load_mm_ldt(&init_mm); - clear_all_debug_regs(); + initialize_debug_regs(); dbg_restore_debug_regs(); doublefault_init_cpu_tss(); diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index 9d852c3b2cb5..5c4eb28c3ac9 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -350,7 +350,6 @@ static void smca_configure(unsigned int bank, unsigned int cpu) struct thresh_restart { struct threshold_block *b; - int reset; int set_lvt_off; int lvt_off; u16 old_limit; @@ -432,13 +431,13 @@ static void threshold_restart_bank(void *_tr) rdmsr(tr->b->address, lo, hi); - if (tr->b->threshold_limit < (hi & THRESHOLD_MAX)) - tr->reset = 1; /* limit cannot be lower than err count */ - - if (tr->reset) { /* reset err count and overflow bit */ - hi = - (hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) | - (THRESHOLD_MAX - tr->b->threshold_limit); + /* + * Reset error count and overflow bit. + * This is done during init or after handling an interrupt. + */ + if (hi & MASK_OVERFLOW_HI || tr->set_lvt_off) { + hi &= ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI); + hi |= THRESHOLD_MAX - tr->b->threshold_limit; } else if (tr->old_limit) { /* change limit w/o reset */ int new_count = (hi & THRESHOLD_MAX) + (tr->old_limit - tr->b->threshold_limit); @@ -1113,13 +1112,20 @@ static const char *get_name(unsigned int cpu, unsigned int bank, struct threshol } bank_type = smca_get_bank_type(cpu, bank); - if (bank_type >= N_SMCA_BANK_TYPES) - return NULL; if (b && (bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2)) { if (b->block < ARRAY_SIZE(smca_umc_block_names)) return smca_umc_block_names[b->block]; - return NULL; + } + + if (b && b->block) { + snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN, "th_block_%u", b->block); + return buf_mcatype; + } + + if (bank_type >= N_SMCA_BANK_TYPES) { + snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN, "th_bank_%u", bank); + return buf_mcatype; } if (per_cpu(smca_bank_counts, cpu)[bank_type] == 1) diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index e9b3c5d4a52e..4da4eab56c81 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -1740,6 +1740,11 @@ static void mc_poll_banks_default(void) void (*mc_poll_banks)(void) = mc_poll_banks_default; +static bool should_enable_timer(unsigned long iv) +{ + return !mca_cfg.ignore_ce && iv; +} + static void mce_timer_fn(struct timer_list *t) { struct timer_list *cpu_t = this_cpu_ptr(&mce_timer); @@ -1763,7 +1768,7 @@ static void mce_timer_fn(struct timer_list *t) if (mce_get_storm_mode()) { __start_timer(t, HZ); - } else { + } else if (should_enable_timer(iv)) { __this_cpu_write(mce_next_interval, iv); __start_timer(t, iv); } @@ -2156,11 +2161,10 @@ static void mce_start_timer(struct timer_list *t) { unsigned long iv = check_interval * HZ; - if (mca_cfg.ignore_ce || !iv) - return; - - this_cpu_write(mce_next_interval, iv); - __start_timer(t, iv); + if (should_enable_timer(iv)) { + this_cpu_write(mce_next_interval, iv); + __start_timer(t, iv); + } } static void __mcheck_cpu_setup_timer(void) @@ -2801,15 +2805,9 @@ static int mce_cpu_dead(unsigned int cpu) static int mce_cpu_online(unsigned int cpu) { struct timer_list *t = this_cpu_ptr(&mce_timer); - int ret; mce_device_create(cpu); - - ret = mce_threshold_create_device(cpu); - if (ret) { - mce_device_remove(cpu); - return ret; - } + mce_threshold_create_device(cpu); mce_reenable_cpu(); mce_start_timer(t); return 0; diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c index efcf21e9552e..9b149b9c4109 100644 --- a/arch/x86/kernel/cpu/mce/intel.c +++ b/arch/x86/kernel/cpu/mce/intel.c @@ -478,6 +478,7 @@ void mce_intel_feature_init(struct cpuinfo_x86 *c) void mce_intel_feature_clear(struct cpuinfo_x86 *c) { intel_clear_lmce(); + cmci_clear(); } bool intel_filter_mce(struct mce *m) diff --git a/arch/x86/kernel/cpu/microcode/amd_shas.c b/arch/x86/kernel/cpu/microcode/amd_shas.c index 2a1655b1fdd8..1fd349cfc802 100644 --- a/arch/x86/kernel/cpu/microcode/amd_shas.c +++ b/arch/x86/kernel/cpu/microcode/amd_shas.c @@ -231,6 +231,13 @@ static const struct patch_digest phashes[] = { 0x0d,0x5b,0x65,0x34,0x69,0xb2,0x62,0x21, } }, + { 0xa0011d7, { + 0x35,0x07,0xcd,0x40,0x94,0xbc,0x81,0x6b, + 0xfc,0x61,0x56,0x1a,0xe2,0xdb,0x96,0x12, + 0x1c,0x1c,0x31,0xb1,0x02,0x6f,0xe5,0xd2, + 0xfe,0x1b,0x04,0x03,0x2c,0x8f,0x4c,0x36, + } + }, { 0xa001223, { 0xfb,0x32,0x5f,0xc6,0x83,0x4f,0x8c,0xb8, 0xa4,0x05,0xf9,0x71,0x53,0x01,0x16,0xc4, @@ -294,6 +301,13 @@ static const struct patch_digest phashes[] = { 0xc0,0xcd,0x33,0xf2,0x8d,0xf9,0xef,0x59, } }, + { 0xa00123b, { + 0xef,0xa1,0x1e,0x71,0xf1,0xc3,0x2c,0xe2, + 0xc3,0xef,0x69,0x41,0x7a,0x54,0xca,0xc3, + 0x8f,0x62,0x84,0xee,0xc2,0x39,0xd9,0x28, + 0x95,0xa7,0x12,0x49,0x1e,0x30,0x71,0x72, + } + }, { 0xa00820c, { 0xa8,0x0c,0x81,0xc0,0xa6,0x00,0xe7,0xf3, 0x5f,0x65,0xd3,0xb9,0x6f,0xea,0x93,0x63, @@ -301,6 +315,13 @@ static const struct patch_digest phashes[] = { 0xe1,0x3b,0x8d,0xb2,0xf8,0x22,0x03,0xe2, } }, + { 0xa00820d, { + 0xf9,0x2a,0xc0,0xf4,0x9e,0xa4,0x87,0xa4, + 0x7d,0x87,0x00,0xfd,0xab,0xda,0x19,0xca, + 0x26,0x51,0x32,0xc1,0x57,0x91,0xdf,0xc1, + 0x05,0xeb,0x01,0x7c,0x5a,0x95,0x21,0xb7, + } + }, { 0xa10113e, { 0x05,0x3c,0x66,0xd7,0xa9,0x5a,0x33,0x10, 0x1b,0xf8,0x9c,0x8f,0xed,0xfc,0xa7,0xa0, @@ -322,6 +343,13 @@ static const struct patch_digest phashes[] = { 0xf1,0x5e,0xb0,0xde,0xb4,0x98,0xae,0xc4, } }, + { 0xa10114c, { + 0x9e,0xb6,0xa2,0xd9,0x87,0x38,0xc5,0x64, + 0xd8,0x88,0xfa,0x78,0x98,0xf9,0x6f,0x74, + 0x39,0x90,0x1b,0xa5,0xcf,0x5e,0xb4,0x2a, + 0x02,0xff,0xd4,0x8c,0x71,0x8b,0xe2,0xc0, + } + }, { 0xa10123e, { 0x03,0xb9,0x2c,0x76,0x48,0x93,0xc9,0x18, 0xfb,0x56,0xfd,0xf7,0xe2,0x1d,0xca,0x4d, @@ -343,6 +371,13 @@ static const struct patch_digest phashes[] = { 0x1b,0x7d,0x64,0x9d,0x4b,0x53,0x13,0x75, } }, + { 0xa10124c, { + 0x29,0xea,0xf1,0x2c,0xb2,0xe4,0xef,0x90, + 0xa4,0xcd,0x1d,0x86,0x97,0x17,0x61,0x46, + 0xfc,0x22,0xcb,0x57,0x75,0x19,0xc8,0xcc, + 0x0c,0xf5,0xbc,0xac,0x81,0x9d,0x9a,0xd2, + } + }, { 0xa108108, { 0xed,0xc2,0xec,0xa1,0x15,0xc6,0x65,0xe9, 0xd0,0xef,0x39,0xaa,0x7f,0x55,0x06,0xc6, @@ -350,6 +385,13 @@ static const struct patch_digest phashes[] = { 0x28,0x1e,0x9c,0x59,0x69,0x99,0x4d,0x16, } }, + { 0xa108109, { + 0x85,0xb4,0xbd,0x7c,0x49,0xa7,0xbd,0xfa, + 0x49,0x36,0x80,0x81,0xc5,0xb7,0x39,0x1b, + 0x9a,0xaa,0x50,0xde,0x9b,0xe9,0x32,0x35, + 0x42,0x7e,0x51,0x4f,0x52,0x2c,0x28,0x59, + } + }, { 0xa20102d, { 0xf9,0x6e,0xf2,0x32,0xd3,0x0f,0x5f,0x11, 0x59,0xa1,0xfe,0xcc,0xcd,0x9b,0x42,0x89, @@ -357,6 +399,13 @@ static const struct patch_digest phashes[] = { 0x8c,0xe9,0x19,0x3e,0xcc,0x3f,0x7b,0xb4, } }, + { 0xa20102e, { + 0xbe,0x1f,0x32,0x04,0x0d,0x3c,0x9c,0xdd, + 0xe1,0xa4,0xbf,0x76,0x3a,0xec,0xc2,0xf6, + 0x11,0x00,0xa7,0xaf,0x0f,0xe5,0x02,0xc5, + 0x54,0x3a,0x1f,0x8c,0x16,0xb5,0xff,0xbe, + } + }, { 0xa201210, { 0xe8,0x6d,0x51,0x6a,0x8e,0x72,0xf3,0xfe, 0x6e,0x16,0xbc,0x62,0x59,0x40,0x17,0xe9, @@ -364,6 +413,13 @@ static const struct patch_digest phashes[] = { 0xf7,0x55,0xf0,0x13,0xbb,0x22,0xf6,0x41, } }, + { 0xa201211, { + 0x69,0xa1,0x17,0xec,0xd0,0xf6,0x6c,0x95, + 0xe2,0x1e,0xc5,0x59,0x1a,0x52,0x0a,0x27, + 0xc4,0xed,0xd5,0x59,0x1f,0xbf,0x00,0xff, + 0x08,0x88,0xb5,0xe1,0x12,0xb6,0xcc,0x27, + } + }, { 0xa404107, { 0xbb,0x04,0x4e,0x47,0xdd,0x5e,0x26,0x45, 0x1a,0xc9,0x56,0x24,0xa4,0x4c,0x82,0xb0, @@ -371,6 +427,13 @@ static const struct patch_digest phashes[] = { 0x13,0xbc,0xc5,0x25,0xe4,0xc5,0xc3,0x99, } }, + { 0xa404108, { + 0x69,0x67,0x43,0x06,0xf8,0x0c,0x62,0xdc, + 0xa4,0x21,0x30,0x4f,0x0f,0x21,0x2c,0xcb, + 0xcc,0x37,0xf1,0x1c,0xc3,0xf8,0x2f,0x19, + 0xdf,0x53,0x53,0x46,0xb1,0x15,0xea,0x00, + } + }, { 0xa500011, { 0x23,0x3d,0x70,0x7d,0x03,0xc3,0xc4,0xf4, 0x2b,0x82,0xc6,0x05,0xda,0x80,0x0a,0xf1, @@ -378,6 +441,13 @@ static const struct patch_digest phashes[] = { 0x11,0x5e,0x96,0x7e,0x71,0xe9,0xfc,0x74, } }, + { 0xa500012, { + 0xeb,0x74,0x0d,0x47,0xa1,0x8e,0x09,0xe4, + 0x93,0x4c,0xad,0x03,0x32,0x4c,0x38,0x16, + 0x10,0x39,0xdd,0x06,0xaa,0xce,0xd6,0x0f, + 0x62,0x83,0x9d,0x8e,0x64,0x55,0xbe,0x63, + } + }, { 0xa601209, { 0x66,0x48,0xd4,0x09,0x05,0xcb,0x29,0x32, 0x66,0xb7,0x9a,0x76,0xcd,0x11,0xf3,0x30, @@ -385,6 +455,13 @@ static const struct patch_digest phashes[] = { 0xe8,0x73,0xe2,0xd6,0xdb,0xd2,0x77,0x1d, } }, + { 0xa60120a, { + 0x0c,0x8b,0x3d,0xfd,0x52,0x52,0x85,0x7d, + 0x20,0x3a,0xe1,0x7e,0xa4,0x21,0x3b,0x7b, + 0x17,0x86,0xae,0xac,0x13,0xb8,0x63,0x9d, + 0x06,0x01,0xd0,0xa0,0x51,0x9a,0x91,0x2c, + } + }, { 0xa704107, { 0xf3,0xc6,0x58,0x26,0xee,0xac,0x3f,0xd6, 0xce,0xa1,0x72,0x47,0x3b,0xba,0x2b,0x93, @@ -392,6 +469,13 @@ static const struct patch_digest phashes[] = { 0x64,0x39,0x71,0x8c,0xce,0xe7,0x41,0x39, } }, + { 0xa704108, { + 0xd7,0x55,0x15,0x2b,0xfe,0xc4,0xbc,0x93, + 0xec,0x91,0xa0,0xae,0x45,0xb7,0xc3,0x98, + 0x4e,0xff,0x61,0x77,0x88,0xc2,0x70,0x49, + 0xe0,0x3a,0x1d,0x84,0x38,0x52,0xbf,0x5a, + } + }, { 0xa705206, { 0x8d,0xc0,0x76,0xbd,0x58,0x9f,0x8f,0xa4, 0x12,0x9d,0x21,0xfb,0x48,0x21,0xbc,0xe7, @@ -399,6 +483,13 @@ static const struct patch_digest phashes[] = { 0x03,0x35,0xe9,0xbe,0xfb,0x06,0xdf,0xfc, } }, + { 0xa705208, { + 0x30,0x1d,0x55,0x24,0xbc,0x6b,0x5a,0x19, + 0x0c,0x7d,0x1d,0x74,0xaa,0xd1,0xeb,0xd2, + 0x16,0x62,0xf7,0x5b,0xe1,0x1f,0x18,0x11, + 0x5c,0xf0,0x94,0x90,0x26,0xec,0x69,0xff, + } + }, { 0xa708007, { 0x6b,0x76,0xcc,0x78,0xc5,0x8a,0xa3,0xe3, 0x32,0x2d,0x79,0xe4,0xc3,0x80,0xdb,0xb2, @@ -406,6 +497,13 @@ static const struct patch_digest phashes[] = { 0xdf,0x92,0x73,0x84,0x87,0x3c,0x73,0x93, } }, + { 0xa708008, { + 0x08,0x6e,0xf0,0x22,0x4b,0x8e,0xc4,0x46, + 0x58,0x34,0xe6,0x47,0xa2,0x28,0xfd,0xab, + 0x22,0x3d,0xdd,0xd8,0x52,0x9e,0x1d,0x16, + 0xfa,0x01,0x68,0x14,0x79,0x3e,0xe8,0x6b, + } + }, { 0xa70c005, { 0x88,0x5d,0xfb,0x79,0x64,0xd8,0x46,0x3b, 0x4a,0x83,0x8e,0x77,0x7e,0xcf,0xb3,0x0f, @@ -413,6 +511,13 @@ static const struct patch_digest phashes[] = { 0xee,0x49,0xac,0xe1,0x8b,0x13,0xc5,0x13, } }, + { 0xa70c008, { + 0x0f,0xdb,0x37,0xa1,0x10,0xaf,0xd4,0x21, + 0x94,0x0d,0xa4,0xa2,0xe9,0x86,0x6c,0x0e, + 0x85,0x7c,0x36,0x30,0xa3,0x3a,0x78,0x66, + 0x18,0x10,0x60,0x0d,0x78,0x3d,0x44,0xd0, + } + }, { 0xaa00116, { 0xe8,0x4c,0x2c,0x88,0xa1,0xac,0x24,0x63, 0x65,0xe5,0xaa,0x2d,0x16,0xa9,0xc3,0xf5, @@ -441,4 +546,11 @@ static const struct patch_digest phashes[] = { 0x68,0x2f,0x46,0xee,0xfe,0xc6,0x6d,0xef, } }, + { 0xaa00216, { + 0x79,0xfb,0x5b,0x9f,0xb6,0xe6,0xa8,0xf5, + 0x4e,0x7c,0x4f,0x8e,0x1d,0xad,0xd0,0x08, + 0xc2,0x43,0x7c,0x8b,0xe6,0xdb,0xd0,0xd2, + 0xe8,0x39,0x26,0xc1,0xe5,0x5a,0x48,0xf1, + } + }, }; diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index dbf6d71bdf18..b4a1f6732a3a 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -50,6 +50,8 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_MBA, CPUID_EBX, 6, 0x80000008, 0 }, { X86_FEATURE_SMBA, CPUID_EBX, 2, 0x80000020, 0 }, { X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 }, + { X86_FEATURE_TSA_SQ_NO, CPUID_ECX, 1, 0x80000021, 0 }, + { X86_FEATURE_TSA_L1_NO, CPUID_ECX, 2, 0x80000021, 0 }, { X86_FEATURE_AMD_WORKLOAD_CLASS, CPUID_EAX, 22, 0x80000021, 0 }, { X86_FEATURE_PERFMON_V2, CPUID_EAX, 0, 0x80000022, 0 }, { X86_FEATURE_AMD_LBR_V2, CPUID_EAX, 1, 0x80000022, 0 }, diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 102641fd2172..8b1a9733d13e 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -385,7 +385,7 @@ static void kgdb_disable_hw_debug(struct pt_regs *regs) struct perf_event *bp; /* Disable hardware debugging while we are in kgdb: */ - set_debugreg(0UL, 7); + set_debugreg(DR7_FIXED_1, 7); for (i = 0; i < HBP_NUM; i++) { if (!breakinfo[i].enabled) continue; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 704883c21f3a..a838be04f874 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -907,16 +907,24 @@ static __init bool prefer_mwait_c1_over_halt(void) */ static __cpuidle void mwait_idle(void) { + if (need_resched()) + return; + + x86_idle_clear_cpu_buffers(); + if (!current_set_polling_and_test()) { const void *addr = ¤t_thread_info()->flags; alternative_input("", "clflush (%[addr])", X86_BUG_CLFLUSH_MONITOR, [addr] "a" (addr)); __monitor(addr, 0, 0); - if (!need_resched()) { - __sti_mwait(0, 0); - raw_local_irq_disable(); - } + if (need_resched()) + goto out; + + __sti_mwait(0, 0); + raw_local_irq_disable(); } + +out: __current_clr_polling(); } diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index a10e180cbf23..3ef15c2f152f 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -93,7 +93,7 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode, /* Only print out debug registers if they are in their non-default state. */ if ((d0 == 0) && (d1 == 0) && (d2 == 0) && (d3 == 0) && - (d6 == DR6_RESERVED) && (d7 == 0x400)) + (d6 == DR6_RESERVED) && (d7 == DR7_FIXED_1)) return; printk("%sDR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n", diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 8d6cf25127aa..b972bf72fb8b 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -133,7 +133,7 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode, /* Only print out debug registers if they are in their non-default state. */ if (!((d0 == 0) && (d1 == 0) && (d2 == 0) && (d3 == 0) && - (d6 == DR6_RESERVED) && (d7 == 0x400))) { + (d6 == DR6_RESERVED) && (d7 == DR7_FIXED_1))) { printk("%sDR0: %016lx DR1: %016lx DR2: %016lx\n", log_lvl, d0, d1, d2); printk("%sDR3: %016lx DR6: %016lx DR7: %016lx\n", diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index c5c897a86418..36354b470590 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -1022,24 +1022,32 @@ static bool is_sysenter_singlestep(struct pt_regs *regs) #endif } -static __always_inline unsigned long debug_read_clear_dr6(void) +static __always_inline unsigned long debug_read_reset_dr6(void) { unsigned long dr6; + get_debugreg(dr6, 6); + dr6 ^= DR6_RESERVED; /* Flip to positive polarity */ + /* * The Intel SDM says: * - * Certain debug exceptions may clear bits 0-3. The remaining - * contents of the DR6 register are never cleared by the - * processor. To avoid confusion in identifying debug - * exceptions, debug handlers should clear the register before - * returning to the interrupted task. + * Certain debug exceptions may clear bits 0-3 of DR6. + * + * BLD induced #DB clears DR6.BLD and any other debug + * exception doesn't modify DR6.BLD. * - * Keep it simple: clear DR6 immediately. + * RTM induced #DB clears DR6.RTM and any other debug + * exception sets DR6.RTM. + * + * To avoid confusion in identifying debug exceptions, + * debug handlers should set DR6.BLD and DR6.RTM, and + * clear other DR6 bits before returning. + * + * Keep it simple: write DR6 with its architectural reset + * value 0xFFFF0FF0, defined as DR6_RESERVED, immediately. */ - get_debugreg(dr6, 6); set_debugreg(DR6_RESERVED, 6); - dr6 ^= DR6_RESERVED; /* Flip to positive polarity */ return dr6; } @@ -1239,13 +1247,13 @@ out: /* IST stack entry */ DEFINE_IDTENTRY_DEBUG(exc_debug) { - exc_debug_kernel(regs, debug_read_clear_dr6()); + exc_debug_kernel(regs, debug_read_reset_dr6()); } /* User entry, runs on regular task stack */ DEFINE_IDTENTRY_DEBUG_USER(exc_debug) { - exc_debug_user(regs, debug_read_clear_dr6()); + exc_debug_user(regs, debug_read_reset_dr6()); } #ifdef CONFIG_X86_FRED @@ -1264,7 +1272,7 @@ DEFINE_FREDENTRY_DEBUG(exc_debug) { /* * FRED #DB stores DR6 on the stack in the format which - * debug_read_clear_dr6() returns for the IDT entry points. + * debug_read_reset_dr6() returns for the IDT entry points. */ unsigned long dr6 = fred_event_data(regs); @@ -1279,7 +1287,7 @@ DEFINE_FREDENTRY_DEBUG(exc_debug) /* 32 bit does not have separate entry points. */ DEFINE_IDTENTRY_RAW(exc_debug) { - unsigned long dr6 = debug_read_clear_dr6(); + unsigned long dr6 = debug_read_reset_dr6(); if (user_mode(regs)) exc_debug_user(regs, dr6); diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index b2d006756e02..f84bc0569c9c 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -1165,6 +1165,8 @@ void kvm_set_cpu_caps(void) */ SYNTHESIZED_F(LFENCE_RDTSC), /* SmmPgCfgLock */ + /* 4: Resv */ + SYNTHESIZED_F(VERW_CLEAR), F(NULL_SEL_CLR_BASE), /* UpperAddressIgnore */ F(AUTOIBRS), @@ -1179,6 +1181,11 @@ void kvm_set_cpu_caps(void) F(SRSO_USER_KERNEL_NO), ); + kvm_cpu_cap_init(CPUID_8000_0021_ECX, + SYNTHESIZED_F(TSA_SQ_NO), + SYNTHESIZED_F(TSA_L1_NO), + ); + kvm_cpu_cap_init(CPUID_8000_0022_EAX, F(PERFMON_V2), ); @@ -1748,8 +1755,9 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) entry->eax = entry->ebx = entry->ecx = entry->edx = 0; break; case 0x80000021: - entry->ebx = entry->ecx = entry->edx = 0; + entry->ebx = entry->edx = 0; cpuid_entry_override(entry, CPUID_8000_0021_EAX); + cpuid_entry_override(entry, CPUID_8000_0021_ECX); break; /* AMD Extended Performance Monitoring and Debug */ case 0x80000022: { diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 24f0318c50d7..ee27064dd72f 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1979,6 +1979,9 @@ int kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu) if (entries[i] == KVM_HV_TLB_FLUSHALL_ENTRY) goto out_flush_all; + if (is_noncanonical_invlpg_address(entries[i], vcpu)) + continue; + /* * Lower 12 bits of 'address' encode the number of additional * pages to flush. @@ -2001,11 +2004,11 @@ out_flush_all: static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) { struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); + unsigned long *vcpu_mask = hv_vcpu->vcpu_mask; u64 *sparse_banks = hv_vcpu->sparse_banks; struct kvm *kvm = vcpu->kvm; struct hv_tlb_flush_ex flush_ex; struct hv_tlb_flush flush; - DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS); struct kvm_vcpu_hv_tlb_flush_fifo *tlb_flush_fifo; /* * Normally, there can be no more than 'KVM_HV_TLB_FLUSH_FIFO_SIZE' diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h index fde0ae986003..c53b92379e6e 100644 --- a/arch/x86/kvm/reverse_cpuid.h +++ b/arch/x86/kvm/reverse_cpuid.h @@ -52,6 +52,10 @@ /* CPUID level 0x80000022 (EAX) */ #define KVM_X86_FEATURE_PERFMON_V2 KVM_X86_FEATURE(CPUID_8000_0022_EAX, 0) +/* CPUID level 0x80000021 (ECX) */ +#define KVM_X86_FEATURE_TSA_SQ_NO KVM_X86_FEATURE(CPUID_8000_0021_ECX, 1) +#define KVM_X86_FEATURE_TSA_L1_NO KVM_X86_FEATURE(CPUID_8000_0021_ECX, 2) + struct cpuid_reg { u32 function; u32 index; @@ -82,6 +86,7 @@ static const struct cpuid_reg reverse_cpuid[] = { [CPUID_8000_0022_EAX] = {0x80000022, 0, CPUID_EAX}, [CPUID_7_2_EDX] = { 7, 2, CPUID_EDX}, [CPUID_24_0_EBX] = { 0x24, 0, CPUID_EBX}, + [CPUID_8000_0021_ECX] = {0x80000021, 0, CPUID_ECX}, }; /* @@ -121,6 +126,8 @@ static __always_inline u32 __feature_translate(int x86_feature) KVM_X86_TRANSLATE_FEATURE(PERFMON_V2); KVM_X86_TRANSLATE_FEATURE(RRSBA_CTRL); KVM_X86_TRANSLATE_FEATURE(BHI_CTRL); + KVM_X86_TRANSLATE_FEATURE(TSA_SQ_NO); + KVM_X86_TRANSLATE_FEATURE(TSA_L1_NO); default: return x86_feature; } diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 459c3b791fd4..b201f77fcd49 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -1971,6 +1971,10 @@ static int sev_check_source_vcpus(struct kvm *dst, struct kvm *src) struct kvm_vcpu *src_vcpu; unsigned long i; + if (src->created_vcpus != atomic_read(&src->online_vcpus) || + dst->created_vcpus != atomic_read(&dst->online_vcpus)) + return -EBUSY; + if (!sev_es_guest(src)) return 0; @@ -4445,8 +4449,12 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm) * the VMSA will be NULL if this vCPU is the destination for intrahost * migration, and will be copied later. */ - if (svm->sev_es.vmsa && !svm->sev_es.snp_has_guest_vmsa) - svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa); + if (!svm->sev_es.snp_has_guest_vmsa) { + if (svm->sev_es.vmsa) + svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa); + else + svm->vmcb->control.vmsa_pa = INVALID_PAGE; + } if (cpu_feature_enabled(X86_FEATURE_ALLOWED_SEV_FEATURES)) svm->vmcb->control.allowed_sev_features = sev->vmsa_features | diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S index 0c61153b275f..235c4af6b692 100644 --- a/arch/x86/kvm/svm/vmenter.S +++ b/arch/x86/kvm/svm/vmenter.S @@ -169,6 +169,9 @@ SYM_FUNC_START(__svm_vcpu_run) #endif mov VCPU_RDI(%_ASM_DI), %_ASM_DI + /* Clobbers EFLAGS.ZF */ + VM_CLEAR_CPU_BUFFERS + /* Enter guest mode */ 3: vmrun %_ASM_AX 4: @@ -335,6 +338,9 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run) mov SVM_current_vmcb(%rdi), %rax mov KVM_VMCB_pa(%rax), %rax + /* Clobbers EFLAGS.ZF */ + VM_CLEAR_CPU_BUFFERS + /* Enter guest mode */ 1: vmrun %rax 2: diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c index 1ad20c273f3b..f31ccdeb905b 100644 --- a/arch/x86/kvm/vmx/tdx.c +++ b/arch/x86/kvm/vmx/tdx.c @@ -173,6 +173,9 @@ static void td_init_cpuid_entry2(struct kvm_cpuid_entry2 *entry, unsigned char i tdx_clear_unsupported_cpuid(entry); } +#define TDVMCALLINFO_GET_QUOTE BIT(0) +#define TDVMCALLINFO_SETUP_EVENT_NOTIFY_INTERRUPT BIT(1) + static int init_kvm_tdx_caps(const struct tdx_sys_info_td_conf *td_conf, struct kvm_tdx_capabilities *caps) { @@ -188,6 +191,10 @@ static int init_kvm_tdx_caps(const struct tdx_sys_info_td_conf *td_conf, caps->cpuid.nent = td_conf->num_cpuid_config; + caps->user_tdvmcallinfo_1_r11 = + TDVMCALLINFO_GET_QUOTE | + TDVMCALLINFO_SETUP_EVENT_NOTIFY_INTERRUPT; + for (i = 0; i < td_conf->num_cpuid_config; i++) td_init_cpuid_entry2(&caps->cpuid.entries[i], i); @@ -1530,6 +1537,27 @@ static int tdx_get_quote(struct kvm_vcpu *vcpu) return 0; } +static int tdx_setup_event_notify_interrupt(struct kvm_vcpu *vcpu) +{ + struct vcpu_tdx *tdx = to_tdx(vcpu); + u64 vector = tdx->vp_enter_args.r12; + + if (vector < 32 || vector > 255) { + tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_INVALID_OPERAND); + return 1; + } + + vcpu->run->exit_reason = KVM_EXIT_TDX; + vcpu->run->tdx.flags = 0; + vcpu->run->tdx.nr = TDVMCALL_SETUP_EVENT_NOTIFY_INTERRUPT; + vcpu->run->tdx.setup_event_notify.ret = TDVMCALL_STATUS_SUBFUNC_UNSUPPORTED; + vcpu->run->tdx.setup_event_notify.vector = vector; + + vcpu->arch.complete_userspace_io = tdx_complete_simple; + + return 0; +} + static int handle_tdvmcall(struct kvm_vcpu *vcpu) { switch (tdvmcall_leaf(vcpu)) { @@ -1541,6 +1569,8 @@ static int handle_tdvmcall(struct kvm_vcpu *vcpu) return tdx_get_td_vm_call_info(vcpu); case TDVMCALL_GET_QUOTE: return tdx_get_quote(vcpu); + case TDVMCALL_SETUP_EVENT_NOTIFY_INTERRUPT: + return tdx_setup_event_notify_interrupt(vcpu); default: break; } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 4953846cb30d..191a9ed0da22 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7291,7 +7291,7 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, vmx_l1d_flush(vcpu); else if (static_branch_unlikely(&cpu_buf_vm_clear) && kvm_arch_has_assigned_device(vcpu->kvm)) - mds_clear_cpu_buffers(); + x86_clear_cpu_buffers(); vmx_disable_fb_clear(vmx); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b58a74c1722d..357b9e3a6cef 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3258,9 +3258,11 @@ int kvm_guest_time_update(struct kvm_vcpu *v) /* With all the info we got, fill in the values */ - if (kvm_caps.has_tsc_control) + if (kvm_caps.has_tsc_control) { tgt_tsc_khz = kvm_scale_tsc(tgt_tsc_khz, v->arch.l1_tsc_scaling_ratio); + tgt_tsc_khz = tgt_tsc_khz ? : 1; + } if (unlikely(vcpu->hw_tsc_khz != tgt_tsc_khz)) { kvm_get_time_scale(NSEC_PER_SEC, tgt_tsc_khz * 1000LL, @@ -11035,7 +11037,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (unlikely(vcpu->arch.switch_db_regs && !(vcpu->arch.switch_db_regs & KVM_DEBUGREG_AUTO_SWITCH))) { - set_debugreg(0, 7); + set_debugreg(DR7_FIXED_1, 7); set_debugreg(vcpu->arch.eff_db[0], 0); set_debugreg(vcpu->arch.eff_db[1], 1); set_debugreg(vcpu->arch.eff_db[2], 2); @@ -11044,7 +11046,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) kvm_x86_call(set_dr6)(vcpu, vcpu->arch.dr6); } else if (unlikely(hw_breakpoint_active())) { - set_debugreg(0, 7); + set_debugreg(DR7_FIXED_1, 7); } vcpu->arch.host_debugctl = get_debugctlmsr(); diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index 9b029bb29a16..5fa2cca43653 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -1971,8 +1971,19 @@ int kvm_xen_setup_evtchn(struct kvm *kvm, { struct kvm_vcpu *vcpu; - if (ue->u.xen_evtchn.port >= max_evtchn_port(kvm)) - return -EINVAL; + /* + * Don't check for the port being within range of max_evtchn_port(). + * Userspace can configure what ever targets it likes; events just won't + * be delivered if/while the target is invalid, just like userspace can + * configure MSIs which target non-existent APICs. + * + * This allow on Live Migration and Live Update, the IRQ routing table + * can be restored *independently* of other things like creating vCPUs, + * without imposing an ordering dependency on userspace. In this + * particular case, the problematic ordering would be with setting the + * Xen 'long mode' flag, which changes max_evtchn_port() to allow 4096 + * instead of 1024 event channels. + */ /* We only support 2 level event channels for now */ if (ue->u.xen_evtchn.priority != KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL) diff --git a/arch/x86/um/ptrace.c b/arch/x86/um/ptrace.c index 3275870330fe..fae8aabad10f 100644 --- a/arch/x86/um/ptrace.c +++ b/arch/x86/um/ptrace.c @@ -161,7 +161,7 @@ static int fpregs_legacy_set(struct task_struct *target, from = kbuf; } - return um_fxsr_from_i387(fxsave, &buf); + return um_fxsr_from_i387(fxsave, from); } #endif diff --git a/block/genhd.c b/block/genhd.c index 8171a6bc3210..c26733f6324b 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -128,23 +128,27 @@ static void part_stat_read_all(struct block_device *part, static void bdev_count_inflight_rw(struct block_device *part, unsigned int inflight[2], bool mq_driver) { + int write = 0; + int read = 0; int cpu; if (mq_driver) { blk_mq_in_driver_rw(part, inflight); - } else { - for_each_possible_cpu(cpu) { - inflight[READ] += part_stat_local_read_cpu( - part, in_flight[READ], cpu); - inflight[WRITE] += part_stat_local_read_cpu( - part, in_flight[WRITE], cpu); - } + return; + } + + for_each_possible_cpu(cpu) { + read += part_stat_local_read_cpu(part, in_flight[READ], cpu); + write += part_stat_local_read_cpu(part, in_flight[WRITE], cpu); } - if (WARN_ON_ONCE((int)inflight[READ] < 0)) - inflight[READ] = 0; - if (WARN_ON_ONCE((int)inflight[WRITE] < 0)) - inflight[WRITE] = 0; + /* + * While iterating all CPUs, some IOs may be issued from a CPU already + * traversed and complete on a CPU that has not yet been traversed, + * causing the inflight number to be negative. + */ + inflight[READ] = read > 0 ? read : 0; + inflight[WRITE] = write > 0 ? write : 0; } /** diff --git a/crypto/wp512.c b/crypto/wp512.c index 41f13d490333..229b189a7988 100644 --- a/crypto/wp512.c +++ b/crypto/wp512.c @@ -21,10 +21,10 @@ */ #include <crypto/internal/hash.h> #include <linux/init.h> +#include <linux/kernel.h> #include <linux/module.h> -#include <linux/mm.h> -#include <asm/byteorder.h> -#include <linux/types.h> +#include <linux/string.h> +#include <linux/unaligned.h> #define WP512_DIGEST_SIZE 64 #define WP384_DIGEST_SIZE 48 @@ -37,9 +37,6 @@ struct wp512_ctx { u8 bitLength[WP512_LENGTHBYTES]; - u8 buffer[WP512_BLOCK_SIZE]; - int bufferBits; - int bufferPos; u64 hash[WP512_DIGEST_SIZE/8]; }; @@ -779,16 +776,16 @@ static const u64 rc[WHIRLPOOL_ROUNDS] = { * The core Whirlpool transform. */ -static __no_kmsan_checks void wp512_process_buffer(struct wp512_ctx *wctx) { +static __no_kmsan_checks void wp512_process_buffer(struct wp512_ctx *wctx, + const u8 *buffer) { int i, r; u64 K[8]; /* the round key */ u64 block[8]; /* mu(buffer) */ u64 state[8]; /* the cipher state */ u64 L[8]; - const __be64 *buffer = (const __be64 *)wctx->buffer; for (i = 0; i < 8; i++) - block[i] = be64_to_cpu(buffer[i]); + block[i] = get_unaligned_be64(buffer + i * 8); state[0] = block[0] ^ (K[0] = wctx->hash[0]); state[1] = block[1] ^ (K[1] = wctx->hash[1]); @@ -991,8 +988,6 @@ static int wp512_init(struct shash_desc *desc) { int i; memset(wctx->bitLength, 0, 32); - wctx->bufferBits = wctx->bufferPos = 0; - wctx->buffer[0] = 0; for (i = 0; i < 8; i++) { wctx->hash[i] = 0L; } @@ -1000,84 +995,54 @@ static int wp512_init(struct shash_desc *desc) { return 0; } -static int wp512_update(struct shash_desc *desc, const u8 *source, - unsigned int len) +static void wp512_add_length(u8 *bitLength, u64 value) { - struct wp512_ctx *wctx = shash_desc_ctx(desc); - int sourcePos = 0; - unsigned int bits_len = len * 8; // convert to number of bits - int sourceGap = (8 - ((int)bits_len & 7)) & 7; - int bufferRem = wctx->bufferBits & 7; + u32 carry; int i; - u32 b, carry; - u8 *buffer = wctx->buffer; - u8 *bitLength = wctx->bitLength; - int bufferBits = wctx->bufferBits; - int bufferPos = wctx->bufferPos; - u64 value = bits_len; for (i = 31, carry = 0; i >= 0 && (carry != 0 || value != 0ULL); i--) { carry += bitLength[i] + ((u32)value & 0xff); bitLength[i] = (u8)carry; carry >>= 8; value >>= 8; } - while (bits_len > 8) { - b = ((source[sourcePos] << sourceGap) & 0xff) | - ((source[sourcePos + 1] & 0xff) >> (8 - sourceGap)); - buffer[bufferPos++] |= (u8)(b >> bufferRem); - bufferBits += 8 - bufferRem; - if (bufferBits == WP512_BLOCK_SIZE * 8) { - wp512_process_buffer(wctx); - bufferBits = bufferPos = 0; - } - buffer[bufferPos] = b << (8 - bufferRem); - bufferBits += bufferRem; - bits_len -= 8; - sourcePos++; - } - if (bits_len > 0) { - b = (source[sourcePos] << sourceGap) & 0xff; - buffer[bufferPos] |= b >> bufferRem; - } else { - b = 0; - } - if (bufferRem + bits_len < 8) { - bufferBits += bits_len; - } else { - bufferPos++; - bufferBits += 8 - bufferRem; - bits_len -= 8 - bufferRem; - if (bufferBits == WP512_BLOCK_SIZE * 8) { - wp512_process_buffer(wctx); - bufferBits = bufferPos = 0; - } - buffer[bufferPos] = b << (8 - bufferRem); - bufferBits += (int)bits_len; - } +} - wctx->bufferBits = bufferBits; - wctx->bufferPos = bufferPos; +static int wp512_update(struct shash_desc *desc, const u8 *source, + unsigned int len) +{ + struct wp512_ctx *wctx = shash_desc_ctx(desc); + unsigned int remain = len % WP512_BLOCK_SIZE; + u64 bits_len = (len - remain) * 8ull; + u8 *bitLength = wctx->bitLength; - return 0; + wp512_add_length(bitLength, bits_len); + do { + wp512_process_buffer(wctx, source); + source += WP512_BLOCK_SIZE; + bits_len -= WP512_BLOCK_SIZE * 8; + } while (bits_len); + + return remain; } -static int wp512_final(struct shash_desc *desc, u8 *out) +static int wp512_finup(struct shash_desc *desc, const u8 *src, + unsigned int bufferPos, u8 *out) { struct wp512_ctx *wctx = shash_desc_ctx(desc); int i; - u8 *buffer = wctx->buffer; u8 *bitLength = wctx->bitLength; - int bufferBits = wctx->bufferBits; - int bufferPos = wctx->bufferPos; __be64 *digest = (__be64 *)out; + u8 buffer[WP512_BLOCK_SIZE]; - buffer[bufferPos] |= 0x80U >> (bufferBits & 7); + wp512_add_length(bitLength, bufferPos * 8); + memcpy(buffer, src, bufferPos); + buffer[bufferPos] = 0x80U; bufferPos++; if (bufferPos > WP512_BLOCK_SIZE - WP512_LENGTHBYTES) { if (bufferPos < WP512_BLOCK_SIZE) memset(&buffer[bufferPos], 0, WP512_BLOCK_SIZE - bufferPos); - wp512_process_buffer(wctx); + wp512_process_buffer(wctx, buffer); bufferPos = 0; } if (bufferPos < WP512_BLOCK_SIZE - WP512_LENGTHBYTES) @@ -1086,31 +1051,32 @@ static int wp512_final(struct shash_desc *desc, u8 *out) bufferPos = WP512_BLOCK_SIZE - WP512_LENGTHBYTES; memcpy(&buffer[WP512_BLOCK_SIZE - WP512_LENGTHBYTES], bitLength, WP512_LENGTHBYTES); - wp512_process_buffer(wctx); + wp512_process_buffer(wctx, buffer); + memzero_explicit(buffer, sizeof(buffer)); for (i = 0; i < WP512_DIGEST_SIZE/8; i++) digest[i] = cpu_to_be64(wctx->hash[i]); - wctx->bufferBits = bufferBits; - wctx->bufferPos = bufferPos; return 0; } -static int wp384_final(struct shash_desc *desc, u8 *out) +static int wp384_finup(struct shash_desc *desc, const u8 *src, + unsigned int len, u8 *out) { u8 D[64]; - wp512_final(desc, D); + wp512_finup(desc, src, len, D); memcpy(out, D, WP384_DIGEST_SIZE); memzero_explicit(D, WP512_DIGEST_SIZE); return 0; } -static int wp256_final(struct shash_desc *desc, u8 *out) +static int wp256_finup(struct shash_desc *desc, const u8 *src, + unsigned int len, u8 *out) { u8 D[64]; - wp512_final(desc, D); + wp512_finup(desc, src, len, D); memcpy(out, D, WP256_DIGEST_SIZE); memzero_explicit(D, WP512_DIGEST_SIZE); @@ -1121,11 +1087,12 @@ static struct shash_alg wp_algs[3] = { { .digestsize = WP512_DIGEST_SIZE, .init = wp512_init, .update = wp512_update, - .final = wp512_final, + .finup = wp512_finup, .descsize = sizeof(struct wp512_ctx), .base = { .cra_name = "wp512", .cra_driver_name = "wp512-generic", + .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, .cra_blocksize = WP512_BLOCK_SIZE, .cra_module = THIS_MODULE, } @@ -1133,11 +1100,12 @@ static struct shash_alg wp_algs[3] = { { .digestsize = WP384_DIGEST_SIZE, .init = wp512_init, .update = wp512_update, - .final = wp384_final, + .finup = wp384_finup, .descsize = sizeof(struct wp512_ctx), .base = { .cra_name = "wp384", .cra_driver_name = "wp384-generic", + .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, .cra_blocksize = WP512_BLOCK_SIZE, .cra_module = THIS_MODULE, } @@ -1145,11 +1113,12 @@ static struct shash_alg wp_algs[3] = { { .digestsize = WP256_DIGEST_SIZE, .init = wp512_init, .update = wp512_update, - .final = wp256_final, + .finup = wp256_finup, .descsize = sizeof(struct wp512_ctx), .base = { .cra_name = "wp256", .cra_driver_name = "wp256-generic", + .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, .cra_blocksize = WP512_BLOCK_SIZE, .cra_module = THIS_MODULE, } diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c index 45593612a4db..6905b56bf3e4 100644 --- a/drivers/acpi/battery.c +++ b/drivers/acpi/battery.c @@ -243,23 +243,10 @@ static int acpi_battery_get_property(struct power_supply *psy, break; case POWER_SUPPLY_PROP_CURRENT_NOW: case POWER_SUPPLY_PROP_POWER_NOW: - if (battery->rate_now == ACPI_BATTERY_VALUE_UNKNOWN) { + if (battery->rate_now == ACPI_BATTERY_VALUE_UNKNOWN) ret = -ENODEV; - break; - } - - val->intval = battery->rate_now * 1000; - /* - * When discharging, the current should be reported as a - * negative number as per the power supply class interface - * definition. - */ - if (psp == POWER_SUPPLY_PROP_CURRENT_NOW && - (battery->state & ACPI_BATTERY_STATE_DISCHARGING) && - acpi_battery_handle_discharging(battery) - == POWER_SUPPLY_STATUS_DISCHARGING) - val->intval = -val->intval; - + else + val->intval = battery->rate_now * 1000; break; case POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN: case POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN: diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index e5e5c2e81d09..aa93b0ecbbc6 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -1450,7 +1450,7 @@ static bool ahci_broken_lpm(struct pci_dev *pdev) { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), - DMI_MATCH(DMI_PRODUCT_VERSION, "ASUSPRO D840MB_M840SA"), + DMI_MATCH(DMI_PRODUCT_NAME, "ASUSPRO D840MB_M840SA"), }, /* 320 is broken, there is no known good version. */ }, diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c index 1206ab764ba9..f2e91b7d79f0 100644 --- a/drivers/atm/idt77252.c +++ b/drivers/atm/idt77252.c @@ -852,6 +852,8 @@ queue_skb(struct idt77252_dev *card, struct vc_map *vc, IDT77252_PRV_PADDR(skb) = dma_map_single(&card->pcidev->dev, skb->data, skb->len, DMA_TO_DEVICE); + if (dma_mapping_error(&card->pcidev->dev, IDT77252_PRV_PADDR(skb))) + return -ENOMEM; error = -EINVAL; @@ -1857,6 +1859,8 @@ add_rx_skb(struct idt77252_dev *card, int queue, paddr = dma_map_single(&card->pcidev->dev, skb->data, skb_end_pointer(skb) - skb->data, DMA_FROM_DEVICE); + if (dma_mapping_error(&card->pcidev->dev, paddr)) + goto outpoolrm; IDT77252_PRV_PADDR(skb) = paddr; if (push_rx_skb(card, skb, queue)) { @@ -1871,6 +1875,7 @@ outunmap: dma_unmap_single(&card->pcidev->dev, IDT77252_PRV_PADDR(skb), skb_end_pointer(skb) - skb->data, DMA_FROM_DEVICE); +outpoolrm: handle = IDT77252_PRV_POOL(skb); card->sbpool[POOL_QUEUE(handle)].skb[POOL_INDEX(handle)] = NULL; diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 7779ab0ca7ce..efc575a00edd 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -602,6 +602,7 @@ CPU_SHOW_VULN_FALLBACK(reg_file_data_sampling); CPU_SHOW_VULN_FALLBACK(ghostwrite); CPU_SHOW_VULN_FALLBACK(old_microcode); CPU_SHOW_VULN_FALLBACK(indirect_target_selection); +CPU_SHOW_VULN_FALLBACK(tsa); static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); @@ -620,6 +621,7 @@ static DEVICE_ATTR(reg_file_data_sampling, 0444, cpu_show_reg_file_data_sampling static DEVICE_ATTR(ghostwrite, 0444, cpu_show_ghostwrite, NULL); static DEVICE_ATTR(old_microcode, 0444, cpu_show_old_microcode, NULL); static DEVICE_ATTR(indirect_target_selection, 0444, cpu_show_indirect_target_selection, NULL); +static DEVICE_ATTR(tsa, 0444, cpu_show_tsa, NULL); static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_meltdown.attr, @@ -639,6 +641,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_ghostwrite.attr, &dev_attr_old_microcode.attr, &dev_attr_indirect_target_selection.attr, + &dev_attr_tsa.attr, NULL }; diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index eebe699fdf4f..a6ab666ef48a 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -1237,6 +1237,7 @@ void dpm_complete(pm_message_t state) void dpm_resume_end(pm_message_t state) { dpm_resume(state); + pm_restore_gfp_mask(); dpm_complete(state); } EXPORT_SYMBOL_GPL(dpm_resume_end); @@ -2176,8 +2177,10 @@ int dpm_suspend_start(pm_message_t state) error = dpm_prepare(state); if (error) dpm_save_failed_step(SUSPEND_PREPARE); - else + else { + pm_restrict_gfp_mask(); error = dpm_suspend(state); + } dpm_show_time(starttime, state, error, "start"); return error; diff --git a/drivers/block/brd.c b/drivers/block/brd.c index b1be6c510372..0c2eabe14af3 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -64,13 +64,15 @@ static struct page *brd_insert_page(struct brd_device *brd, sector_t sector, rcu_read_unlock(); page = alloc_page(gfp | __GFP_ZERO | __GFP_HIGHMEM); - rcu_read_lock(); - if (!page) + if (!page) { + rcu_read_lock(); return ERR_PTR(-ENOMEM); + } xa_lock(&brd->brd_pages); ret = __xa_cmpxchg(&brd->brd_pages, sector >> PAGE_SECTORS_SHIFT, NULL, page, gfp); + rcu_read_lock(); if (ret) { xa_unlock(&brd->brd_pages); __free_page(page); diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 7bdc7eb808ea..2592bd19ebc1 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -2198,9 +2198,7 @@ again: goto out; } } - ret = nbd_start_device(nbd); - if (ret) - goto out; + if (info->attrs[NBD_ATTR_BACKEND_IDENTIFIER]) { nbd->backend = nla_strdup(info->attrs[NBD_ATTR_BACKEND_IDENTIFIER], GFP_KERNEL); @@ -2216,6 +2214,8 @@ again: goto out; } set_bit(NBD_RT_HAS_BACKEND_FILE, &config->runtime_flags); + + ret = nbd_start_device(nbd); out: mutex_unlock(&nbd->config_lock); if (!ret) { diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index d36f44f5ee80..9fd284fa76dc 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -1148,8 +1148,8 @@ exit: blk_mq_end_request(req, res); } -static void ublk_complete_io_cmd(struct ublk_io *io, struct request *req, - int res, unsigned issue_flags) +static struct io_uring_cmd *__ublk_prep_compl_io_cmd(struct ublk_io *io, + struct request *req) { /* read cmd first because req will overwrite it */ struct io_uring_cmd *cmd = io->cmd; @@ -1164,6 +1164,13 @@ static void ublk_complete_io_cmd(struct ublk_io *io, struct request *req, io->flags &= ~UBLK_IO_FLAG_ACTIVE; io->req = req; + return cmd; +} + +static void ublk_complete_io_cmd(struct ublk_io *io, struct request *req, + int res, unsigned issue_flags) +{ + struct io_uring_cmd *cmd = __ublk_prep_compl_io_cmd(io, req); /* tell ublksrv one io request is coming */ io_uring_cmd_done(cmd, res, 0, issue_flags); @@ -1416,6 +1423,14 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, return BLK_STS_OK; } +static inline bool ublk_belong_to_same_batch(const struct ublk_io *io, + const struct ublk_io *io2) +{ + return (io_uring_cmd_ctx_handle(io->cmd) == + io_uring_cmd_ctx_handle(io2->cmd)) && + (io->task == io2->task); +} + static void ublk_queue_rqs(struct rq_list *rqlist) { struct rq_list requeue_list = { }; @@ -1427,14 +1442,16 @@ static void ublk_queue_rqs(struct rq_list *rqlist) struct ublk_queue *this_q = req->mq_hctx->driver_data; struct ublk_io *this_io = &this_q->ios[req->tag]; - if (io && io->task != this_io->task && !rq_list_empty(&submit_list)) + if (ublk_prep_req(this_q, req, true) != BLK_STS_OK) { + rq_list_add_tail(&requeue_list, req); + continue; + } + + if (io && !ublk_belong_to_same_batch(io, this_io) && + !rq_list_empty(&submit_list)) ublk_queue_cmd_list(io, &submit_list); io = this_io; - - if (ublk_prep_req(this_q, req, true) == BLK_STS_OK) - rq_list_add_tail(&submit_list, req); - else - rq_list_add_tail(&requeue_list, req); + rq_list_add_tail(&submit_list, req); } if (!rq_list_empty(&submit_list)) @@ -2148,10 +2165,9 @@ static int ublk_commit_and_fetch(const struct ublk_queue *ubq, return 0; } -static bool ublk_get_data(const struct ublk_queue *ubq, struct ublk_io *io) +static bool ublk_get_data(const struct ublk_queue *ubq, struct ublk_io *io, + struct request *req) { - struct request *req = io->req; - /* * We have handled UBLK_IO_NEED_GET_DATA command, * so clear UBLK_IO_FLAG_NEED_GET_DATA now and just @@ -2178,6 +2194,7 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, u32 cmd_op = cmd->cmd_op; unsigned tag = ub_cmd->tag; int ret = -EINVAL; + struct request *req; pr_devel("%s: received: cmd op %d queue %d tag %d result %d\n", __func__, cmd->cmd_op, ub_cmd->q_id, tag, @@ -2236,11 +2253,19 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, goto out; break; case UBLK_IO_NEED_GET_DATA: - io->addr = ub_cmd->addr; - if (!ublk_get_data(ubq, io)) - return -EIOCBQUEUED; - - return UBLK_IO_RES_OK; + /* + * ublk_get_data() may fail and fallback to requeue, so keep + * uring_cmd active first and prepare for handling new requeued + * request + */ + req = io->req; + ublk_fill_io_cmd(io, cmd, ub_cmd->addr); + io->flags &= ~UBLK_IO_FLAG_OWNED_BY_SRV; + if (likely(ublk_get_data(ubq, io, req))) { + __ublk_prep_compl_io_cmd(io, req); + return UBLK_IO_RES_OK; + } + break; default: goto out; } @@ -2825,7 +2850,8 @@ static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_cmd *header) if (copy_from_user(&info, argp, sizeof(info))) return -EFAULT; - if (info.queue_depth > UBLK_MAX_QUEUE_DEPTH || info.nr_hw_queues > UBLK_MAX_NR_QUEUES) + if (info.queue_depth > UBLK_MAX_QUEUE_DEPTH || !info.queue_depth || + info.nr_hw_queues > UBLK_MAX_NR_QUEUES || !info.nr_hw_queues) return -EINVAL; if (capable(CAP_SYS_ADMIN)) diff --git a/drivers/bluetooth/btintel_pcie.c b/drivers/bluetooth/btintel_pcie.c index 563165c5efae..e1c688dd2d45 100644 --- a/drivers/bluetooth/btintel_pcie.c +++ b/drivers/bluetooth/btintel_pcie.c @@ -2033,6 +2033,28 @@ static void btintel_pcie_release_hdev(struct btintel_pcie_data *data) data->hdev = NULL; } +static void btintel_pcie_disable_interrupts(struct btintel_pcie_data *data) +{ + spin_lock(&data->irq_lock); + btintel_pcie_wr_reg32(data, BTINTEL_PCIE_CSR_MSIX_FH_INT_MASK, data->fh_init_mask); + btintel_pcie_wr_reg32(data, BTINTEL_PCIE_CSR_MSIX_HW_INT_MASK, data->hw_init_mask); + spin_unlock(&data->irq_lock); +} + +static void btintel_pcie_enable_interrupts(struct btintel_pcie_data *data) +{ + spin_lock(&data->irq_lock); + btintel_pcie_wr_reg32(data, BTINTEL_PCIE_CSR_MSIX_FH_INT_MASK, ~data->fh_init_mask); + btintel_pcie_wr_reg32(data, BTINTEL_PCIE_CSR_MSIX_HW_INT_MASK, ~data->hw_init_mask); + spin_unlock(&data->irq_lock); +} + +static void btintel_pcie_synchronize_irqs(struct btintel_pcie_data *data) +{ + for (int i = 0; i < data->alloc_vecs; i++) + synchronize_irq(data->msix_entries[i].vector); +} + static int btintel_pcie_setup_internal(struct hci_dev *hdev) { struct btintel_pcie_data *data = hci_get_drvdata(hdev); @@ -2152,6 +2174,8 @@ static int btintel_pcie_setup(struct hci_dev *hdev) bt_dev_err(hdev, "Firmware download retry count: %d", fw_dl_retry); btintel_pcie_dump_debug_registers(hdev); + btintel_pcie_disable_interrupts(data); + btintel_pcie_synchronize_irqs(data); err = btintel_pcie_reset_bt(data); if (err) { bt_dev_err(hdev, "Failed to do shr reset: %d", err); @@ -2159,6 +2183,7 @@ static int btintel_pcie_setup(struct hci_dev *hdev) } usleep_range(10000, 12000); btintel_pcie_reset_ia(data); + btintel_pcie_enable_interrupts(data); btintel_pcie_config_msix(data); err = btintel_pcie_enable_bt(data); if (err) { @@ -2291,6 +2316,12 @@ static void btintel_pcie_remove(struct pci_dev *pdev) data = pci_get_drvdata(pdev); + btintel_pcie_disable_interrupts(data); + + btintel_pcie_synchronize_irqs(data); + + flush_work(&data->rx_work); + btintel_pcie_reset_bt(data); for (int i = 0; i < data->alloc_vecs; i++) { struct msix_entry *msix_entry; @@ -2303,8 +2334,6 @@ static void btintel_pcie_remove(struct pci_dev *pdev) btintel_pcie_release_hdev(data); - flush_work(&data->rx_work); - destroy_workqueue(data->workqueue); btintel_pcie_free(data); diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 5fe5879881f5..3ec0be496820 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -2392,10 +2392,17 @@ static int qca_serdev_probe(struct serdev_device *serdev) */ qcadev->bt_power->pwrseq = devm_pwrseq_get(&serdev->dev, "bluetooth"); - if (IS_ERR(qcadev->bt_power->pwrseq)) - return PTR_ERR(qcadev->bt_power->pwrseq); - break; + /* + * Some modules have BT_EN enabled via a hardware pull-up, + * meaning it is not defined in the DTS and is not controlled + * through the power sequence. In such cases, fall through + * to follow the legacy flow. + */ + if (IS_ERR(qcadev->bt_power->pwrseq)) + qcadev->bt_power->pwrseq = NULL; + else + break; } fallthrough; case QCA_WCN3950: diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c index bf490967241a..2505df1f4e69 100644 --- a/drivers/char/agp/amd64-agp.c +++ b/drivers/char/agp/amd64-agp.c @@ -720,11 +720,6 @@ static const struct pci_device_id agp_amd64_pci_table[] = { MODULE_DEVICE_TABLE(pci, agp_amd64_pci_table); -static const struct pci_device_id agp_amd64_pci_promisc_table[] = { - { PCI_DEVICE_CLASS(0, 0) }, - { } -}; - static DEFINE_SIMPLE_DEV_PM_OPS(agp_amd64_pm_ops, NULL, agp_amd64_resume); static struct pci_driver agp_amd64_pci_driver = { @@ -739,6 +734,7 @@ static struct pci_driver agp_amd64_pci_driver = { /* Not static due to IOMMU code calling it early. */ int __init agp_amd64_init(void) { + struct pci_dev *pdev = NULL; int err = 0; if (agp_off) @@ -767,9 +763,13 @@ int __init agp_amd64_init(void) } /* Look for any AGP bridge */ - agp_amd64_pci_driver.id_table = agp_amd64_pci_promisc_table; - err = driver_attach(&agp_amd64_pci_driver.driver); - if (err == 0 && agp_bridges_found == 0) { + for_each_pci_dev(pdev) + if (pci_find_capability(pdev, PCI_CAP_ID_AGP)) + pci_add_dynid(&agp_amd64_pci_driver, + pdev->vendor, pdev->device, + pdev->subsystem_vendor, + pdev->subsystem_device, 0, 0, 0); + if (agp_bridges_found == 0) { pci_unregister_driver(&agp_amd64_pci_driver); err = -ENODEV; } diff --git a/drivers/clk/clk-scmi.c b/drivers/clk/clk-scmi.c index 15510c2ff21c..1b1561c84127 100644 --- a/drivers/clk/clk-scmi.c +++ b/drivers/clk/clk-scmi.c @@ -404,6 +404,7 @@ static int scmi_clocks_probe(struct scmi_device *sdev) const struct scmi_handle *handle = sdev->handle; struct scmi_protocol_handle *ph; const struct clk_ops *scmi_clk_ops_db[SCMI_MAX_CLK_OPS] = {}; + struct scmi_clk *sclks; if (!handle) return -ENODEV; @@ -430,18 +431,21 @@ static int scmi_clocks_probe(struct scmi_device *sdev) transport_is_atomic = handle->is_transport_atomic(handle, &atomic_threshold_us); + sclks = devm_kcalloc(dev, count, sizeof(*sclks), GFP_KERNEL); + if (!sclks) + return -ENOMEM; + + for (idx = 0; idx < count; idx++) + hws[idx] = &sclks[idx].hw; + for (idx = 0; idx < count; idx++) { - struct scmi_clk *sclk; + struct scmi_clk *sclk = &sclks[idx]; const struct clk_ops *scmi_ops; - sclk = devm_kzalloc(dev, sizeof(*sclk), GFP_KERNEL); - if (!sclk) - return -ENOMEM; - sclk->info = scmi_proto_clk_ops->info_get(ph, idx); if (!sclk->info) { dev_dbg(dev, "invalid clock info for idx %d\n", idx); - devm_kfree(dev, sclk); + hws[idx] = NULL; continue; } @@ -479,13 +483,11 @@ static int scmi_clocks_probe(struct scmi_device *sdev) if (err) { dev_err(dev, "failed to register clock %d\n", idx); devm_kfree(dev, sclk->parent_data); - devm_kfree(dev, sclk); hws[idx] = NULL; } else { dev_dbg(dev, "Registered clock:%s%s\n", sclk->info->name, scmi_ops->enable ? " (atomic ops)" : ""); - hws[idx] = &sclk->hw; } } diff --git a/drivers/clk/imx/clk-imx95-blk-ctl.c b/drivers/clk/imx/clk-imx95-blk-ctl.c index 25974947ad0c..cc2ee2be1819 100644 --- a/drivers/clk/imx/clk-imx95-blk-ctl.c +++ b/drivers/clk/imx/clk-imx95-blk-ctl.c @@ -219,11 +219,15 @@ static const struct imx95_blk_ctl_dev_data lvds_csr_dev_data = { .clk_reg_offset = 0, }; +static const char * const disp_engine_parents[] = { + "videopll1", "dsi_pll", "ldb_pll_div7" +}; + static const struct imx95_blk_ctl_clk_dev_data dispmix_csr_clk_dev_data[] = { [IMX95_CLK_DISPMIX_ENG0_SEL] = { .name = "disp_engine0_sel", - .parent_names = (const char *[]){"videopll1", "dsi_pll", "ldb_pll_div7", }, - .num_parents = 4, + .parent_names = disp_engine_parents, + .num_parents = ARRAY_SIZE(disp_engine_parents), .reg = 0, .bit_idx = 0, .bit_width = 2, @@ -232,8 +236,8 @@ static const struct imx95_blk_ctl_clk_dev_data dispmix_csr_clk_dev_data[] = { }, [IMX95_CLK_DISPMIX_ENG1_SEL] = { .name = "disp_engine1_sel", - .parent_names = (const char *[]){"videopll1", "dsi_pll", "ldb_pll_div7", }, - .num_parents = 4, + .parent_names = disp_engine_parents, + .num_parents = ARRAY_SIZE(disp_engine_parents), .reg = 0, .bit_idx = 2, .bit_width = 2, diff --git a/drivers/cxl/core/edac.c b/drivers/cxl/core/edac.c index 2cbc664e5d62..623aaa4439c4 100644 --- a/drivers/cxl/core/edac.c +++ b/drivers/cxl/core/edac.c @@ -103,10 +103,10 @@ static int cxl_scrub_get_attrbs(struct cxl_patrol_scrub_context *cxl_ps_ctx, u8 *cap, u16 *cycle, u8 *flags, u8 *min_cycle) { struct cxl_mailbox *cxl_mbox; - u8 min_scrub_cycle = U8_MAX; struct cxl_region_params *p; struct cxl_memdev *cxlmd; struct cxl_region *cxlr; + u8 min_scrub_cycle = 0; int i, ret; if (!cxl_ps_ctx->cxlr) { @@ -133,8 +133,12 @@ static int cxl_scrub_get_attrbs(struct cxl_patrol_scrub_context *cxl_ps_ctx, if (ret) return ret; + /* + * The min_scrub_cycle of a region is the max of minimum scrub + * cycles supported by memdevs that back the region. + */ if (min_cycle) - min_scrub_cycle = min(*min_cycle, min_scrub_cycle); + min_scrub_cycle = max(*min_cycle, min_scrub_cycle); } if (min_cycle) @@ -1099,8 +1103,10 @@ int cxl_store_rec_gen_media(struct cxl_memdev *cxlmd, union cxl_event *evt) old_rec = xa_store(&array_rec->rec_gen_media, le64_to_cpu(rec->media_hdr.phys_addr), rec, GFP_KERNEL); - if (xa_is_err(old_rec)) + if (xa_is_err(old_rec)) { + kfree(rec); return xa_err(old_rec); + } kfree(old_rec); @@ -1127,8 +1133,10 @@ int cxl_store_rec_dram(struct cxl_memdev *cxlmd, union cxl_event *evt) old_rec = xa_store(&array_rec->rec_dram, le64_to_cpu(rec->media_hdr.phys_addr), rec, GFP_KERNEL); - if (xa_is_err(old_rec)) + if (xa_is_err(old_rec)) { + kfree(rec); return xa_err(old_rec); + } kfree(old_rec); @@ -1315,7 +1323,7 @@ cxl_mem_get_rec_dram(struct cxl_memdev *cxlmd, attrbs.bank = ctx->bank; break; case EDAC_REPAIR_RANK_SPARING: - attrbs.repair_type = CXL_BANK_SPARING; + attrbs.repair_type = CXL_RANK_SPARING; break; default: return NULL; diff --git a/drivers/cxl/core/features.c b/drivers/cxl/core/features.c index 6f2eae1eb126..7c750599ea69 100644 --- a/drivers/cxl/core/features.c +++ b/drivers/cxl/core/features.c @@ -544,7 +544,7 @@ static bool cxlctl_validate_set_features(struct cxl_features_state *cxlfs, u32 flags; if (rpc_in->op_size < sizeof(uuid_t)) - return ERR_PTR(-EINVAL); + return false; feat = cxl_feature_info(cxlfs, &rpc_in->set_feat_in.uuid); if (IS_ERR(feat)) diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c index 485a831695c7..2731ba3a0799 100644 --- a/drivers/cxl/core/ras.c +++ b/drivers/cxl/core/ras.c @@ -31,40 +31,38 @@ static void cxl_cper_trace_uncorr_port_prot_err(struct pci_dev *pdev, ras_cap.header_log); } -static void cxl_cper_trace_corr_prot_err(struct pci_dev *pdev, - struct cxl_ras_capability_regs ras_cap) +static void cxl_cper_trace_corr_prot_err(struct cxl_memdev *cxlmd, + struct cxl_ras_capability_regs ras_cap) { u32 status = ras_cap.cor_status & ~ras_cap.cor_mask; - struct cxl_dev_state *cxlds; - cxlds = pci_get_drvdata(pdev); - if (!cxlds) - return; - - trace_cxl_aer_correctable_error(cxlds->cxlmd, status); + trace_cxl_aer_correctable_error(cxlmd, status); } -static void cxl_cper_trace_uncorr_prot_err(struct pci_dev *pdev, - struct cxl_ras_capability_regs ras_cap) +static void +cxl_cper_trace_uncorr_prot_err(struct cxl_memdev *cxlmd, + struct cxl_ras_capability_regs ras_cap) { u32 status = ras_cap.uncor_status & ~ras_cap.uncor_mask; - struct cxl_dev_state *cxlds; u32 fe; - cxlds = pci_get_drvdata(pdev); - if (!cxlds) - return; - if (hweight32(status) > 1) fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK, ras_cap.cap_control)); else fe = status; - trace_cxl_aer_uncorrectable_error(cxlds->cxlmd, status, fe, + trace_cxl_aer_uncorrectable_error(cxlmd, status, fe, ras_cap.header_log); } +static int match_memdev_by_parent(struct device *dev, const void *uport) +{ + if (is_cxl_memdev(dev) && dev->parent == uport) + return 1; + return 0; +} + static void cxl_cper_handle_prot_err(struct cxl_cper_prot_err_work_data *data) { unsigned int devfn = PCI_DEVFN(data->prot_err.agent_addr.device, @@ -73,13 +71,12 @@ static void cxl_cper_handle_prot_err(struct cxl_cper_prot_err_work_data *data) pci_get_domain_bus_and_slot(data->prot_err.agent_addr.segment, data->prot_err.agent_addr.bus, devfn); + struct cxl_memdev *cxlmd; int port_type; if (!pdev) return; - guard(device)(&pdev->dev); - port_type = pci_pcie_type(pdev); if (port_type == PCI_EXP_TYPE_ROOT_PORT || port_type == PCI_EXP_TYPE_DOWNSTREAM || @@ -92,10 +89,20 @@ static void cxl_cper_handle_prot_err(struct cxl_cper_prot_err_work_data *data) return; } + guard(device)(&pdev->dev); + if (!pdev->dev.driver) + return; + + struct device *mem_dev __free(put_device) = bus_find_device( + &cxl_bus_type, NULL, pdev, match_memdev_by_parent); + if (!mem_dev) + return; + + cxlmd = to_cxl_memdev(mem_dev); if (data->severity == AER_CORRECTABLE) - cxl_cper_trace_corr_prot_err(pdev, data->ras_cap); + cxl_cper_trace_corr_prot_err(cxlmd, data->ras_cap); else - cxl_cper_trace_uncorr_prot_err(pdev, data->ras_cap); + cxl_cper_trace_uncorr_prot_err(cxlmd, data->ras_cap); } static void cxl_cper_prot_err_work_fn(struct work_struct *work) diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index b1ef4546346d..bea3e9858aca 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -685,11 +685,13 @@ long dma_resv_wait_timeout(struct dma_resv *obj, enum dma_resv_usage usage, dma_resv_iter_begin(&cursor, obj, usage); dma_resv_for_each_fence_unlocked(&cursor, fence) { - ret = dma_fence_wait_timeout(fence, intr, ret); - if (ret <= 0) { - dma_resv_iter_end(&cursor); - return ret; - } + ret = dma_fence_wait_timeout(fence, intr, timeout); + if (ret <= 0) + break; + + /* Even for zero timeout the return value is 1 */ + if (timeout) + timeout = ret; } dma_resv_iter_end(&cursor); diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index b681c0663203..07f1e9dc1ca7 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -1209,7 +1209,9 @@ static int umc_get_cs_mode(int dimm, u8 ctrl, struct amd64_pvt *pvt) if (csrow_enabled(2 * dimm + 1, ctrl, pvt)) cs_mode |= CS_ODD_PRIMARY; - /* Asymmetric dual-rank DIMM support. */ + if (csrow_sec_enabled(2 * dimm, ctrl, pvt)) + cs_mode |= CS_EVEN_SECONDARY; + if (csrow_sec_enabled(2 * dimm + 1, ctrl, pvt)) cs_mode |= CS_ODD_SECONDARY; @@ -1230,12 +1232,13 @@ static int umc_get_cs_mode(int dimm, u8 ctrl, struct amd64_pvt *pvt) return cs_mode; } -static int __addr_mask_to_cs_size(u32 addr_mask_orig, unsigned int cs_mode, - int csrow_nr, int dimm) +static int calculate_cs_size(u32 mask, unsigned int cs_mode) { - u32 msb, weight, num_zero_bits; - u32 addr_mask_deinterleaved; - int size = 0; + int msb, weight, num_zero_bits; + u32 deinterleaved_mask; + + if (!mask) + return 0; /* * The number of zero bits in the mask is equal to the number of bits @@ -1248,19 +1251,30 @@ static int __addr_mask_to_cs_size(u32 addr_mask_orig, unsigned int cs_mode, * without swapping with the most significant bit. This can be handled * by keeping the MSB where it is and ignoring the single zero bit. */ - msb = fls(addr_mask_orig) - 1; - weight = hweight_long(addr_mask_orig); + msb = fls(mask) - 1; + weight = hweight_long(mask); num_zero_bits = msb - weight - !!(cs_mode & CS_3R_INTERLEAVE); /* Take the number of zero bits off from the top of the mask. */ - addr_mask_deinterleaved = GENMASK_ULL(msb - num_zero_bits, 1); + deinterleaved_mask = GENMASK(msb - num_zero_bits, 1); + edac_dbg(1, " Deinterleaved AddrMask: 0x%x\n", deinterleaved_mask); + + return (deinterleaved_mask >> 2) + 1; +} + +static int __addr_mask_to_cs_size(u32 addr_mask, u32 addr_mask_sec, + unsigned int cs_mode, int csrow_nr, int dimm) +{ + int size; edac_dbg(1, "CS%d DIMM%d AddrMasks:\n", csrow_nr, dimm); - edac_dbg(1, " Original AddrMask: 0x%x\n", addr_mask_orig); - edac_dbg(1, " Deinterleaved AddrMask: 0x%x\n", addr_mask_deinterleaved); + edac_dbg(1, " Primary AddrMask: 0x%x\n", addr_mask); /* Register [31:1] = Address [39:9]. Size is in kBs here. */ - size = (addr_mask_deinterleaved >> 2) + 1; + size = calculate_cs_size(addr_mask, cs_mode); + + edac_dbg(1, " Secondary AddrMask: 0x%x\n", addr_mask_sec); + size += calculate_cs_size(addr_mask_sec, cs_mode); /* Return size in MBs. */ return size >> 10; @@ -1269,8 +1283,8 @@ static int __addr_mask_to_cs_size(u32 addr_mask_orig, unsigned int cs_mode, static int umc_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc, unsigned int cs_mode, int csrow_nr) { + u32 addr_mask = 0, addr_mask_sec = 0; int cs_mask_nr = csrow_nr; - u32 addr_mask_orig; int dimm, size = 0; /* No Chip Selects are enabled. */ @@ -1308,13 +1322,13 @@ static int umc_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc, if (!pvt->flags.zn_regs_v2) cs_mask_nr >>= 1; - /* Asymmetric dual-rank DIMM support. */ - if ((csrow_nr & 1) && (cs_mode & CS_ODD_SECONDARY)) - addr_mask_orig = pvt->csels[umc].csmasks_sec[cs_mask_nr]; - else - addr_mask_orig = pvt->csels[umc].csmasks[cs_mask_nr]; + if (cs_mode & (CS_EVEN_PRIMARY | CS_ODD_PRIMARY)) + addr_mask = pvt->csels[umc].csmasks[cs_mask_nr]; + + if (cs_mode & (CS_EVEN_SECONDARY | CS_ODD_SECONDARY)) + addr_mask_sec = pvt->csels[umc].csmasks_sec[cs_mask_nr]; - return __addr_mask_to_cs_size(addr_mask_orig, cs_mode, csrow_nr, dimm); + return __addr_mask_to_cs_size(addr_mask, addr_mask_sec, cs_mode, csrow_nr, dimm); } static void umc_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl) @@ -3512,9 +3526,10 @@ static void gpu_get_err_info(struct mce *m, struct err_info *err) static int gpu_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc, unsigned int cs_mode, int csrow_nr) { - u32 addr_mask_orig = pvt->csels[umc].csmasks[csrow_nr]; + u32 addr_mask = pvt->csels[umc].csmasks[csrow_nr]; + u32 addr_mask_sec = pvt->csels[umc].csmasks_sec[csrow_nr]; - return __addr_mask_to_cs_size(addr_mask_orig, cs_mode, csrow_nr, csrow_nr >> 1); + return __addr_mask_to_cs_size(addr_mask, addr_mask_sec, cs_mode, csrow_nr, csrow_nr >> 1); } static void gpu_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl) diff --git a/drivers/edac/ecs.c b/drivers/edac/ecs.c index 1d51838a60c1..51c451c7f0f0 100755 --- a/drivers/edac/ecs.c +++ b/drivers/edac/ecs.c @@ -170,8 +170,10 @@ static int ecs_create_desc(struct device *ecs_dev, const struct attribute_group fru_ctx->dev_attr[ECS_RESET] = EDAC_ECS_ATTR_WO(reset, fru); fru_ctx->dev_attr[ECS_THRESHOLD] = EDAC_ECS_ATTR_RW(threshold, fru); - for (i = 0; i < ECS_MAX_ATTRS; i++) + for (i = 0; i < ECS_MAX_ATTRS; i++) { + sysfs_attr_init(&fru_ctx->dev_attr[i].dev_attr.attr); fru_ctx->ecs_attrs[i] = &fru_ctx->dev_attr[i].dev_attr.attr; + } sprintf(fru_ctx->name, "%s%d", EDAC_ECS_FRU_NAME, fru); group->name = fru_ctx->name; diff --git a/drivers/edac/mem_repair.c b/drivers/edac/mem_repair.c index d1a8caa85369..70a033a76233 100755 --- a/drivers/edac/mem_repair.c +++ b/drivers/edac/mem_repair.c @@ -333,6 +333,7 @@ static int mem_repair_create_desc(struct device *dev, for (i = 0; i < MR_MAX_ATTRS; i++) { memcpy(&ctx->mem_repair_dev_attr[i], &dev_attr[i], sizeof(dev_attr[i])); + sysfs_attr_init(&ctx->mem_repair_dev_attr[i].dev_attr.attr); ctx->mem_repair_attrs[i] = &ctx->mem_repair_dev_attr[i].dev_attr.attr; } diff --git a/drivers/edac/scrub.c b/drivers/edac/scrub.c index e421d3ebd959..f9d02af2fc3a 100755 --- a/drivers/edac/scrub.c +++ b/drivers/edac/scrub.c @@ -176,6 +176,7 @@ static int scrub_create_desc(struct device *scrub_dev, group = &scrub_ctx->group; for (i = 0; i < SCRUB_MAX_ATTRS; i++) { memcpy(&scrub_ctx->scrub_dev_attr[i], &dev_attr[i], sizeof(dev_attr[i])); + sysfs_attr_init(&scrub_ctx->scrub_dev_attr[i].dev_attr.attr); scrub_ctx->scrub_attrs[i] = &scrub_ctx->scrub_dev_attr[i].dev_attr.attr; } sprintf(scrub_ctx->name, "%s%d", "scrub", instance); diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c index fe55613a8ea9..37eb2e6c2f9f 100644 --- a/drivers/firmware/arm_ffa/driver.c +++ b/drivers/firmware/arm_ffa/driver.c @@ -110,7 +110,7 @@ struct ffa_drv_info { struct work_struct sched_recv_irq_work; struct xarray partition_info; DECLARE_HASHTABLE(notifier_hash, ilog2(FFA_MAX_NOTIFICATIONS)); - struct mutex notify_lock; /* lock to protect notifier hashtable */ + rwlock_t notify_lock; /* lock to protect notifier hashtable */ }; static struct ffa_drv_info *drv_info; @@ -1250,13 +1250,12 @@ notifier_hnode_get_by_type(u16 notify_id, enum notify_type type) return NULL; } -static int -update_notifier_cb(struct ffa_device *dev, int notify_id, void *cb, - void *cb_data, bool is_registration, bool is_framework) +static int update_notifier_cb(struct ffa_device *dev, int notify_id, + struct notifier_cb_info *cb, bool is_framework) { struct notifier_cb_info *cb_info = NULL; enum notify_type type = ffa_notify_type_get(dev->vm_id); - bool cb_found; + bool cb_found, is_registration = !!cb; if (is_framework) cb_info = notifier_hnode_get_by_vmid_uuid(notify_id, dev->vm_id, @@ -1270,20 +1269,10 @@ update_notifier_cb(struct ffa_device *dev, int notify_id, void *cb, return -EINVAL; if (is_registration) { - cb_info = kzalloc(sizeof(*cb_info), GFP_KERNEL); - if (!cb_info) - return -ENOMEM; - - cb_info->dev = dev; - cb_info->cb_data = cb_data; - if (is_framework) - cb_info->fwk_cb = cb; - else - cb_info->cb = cb; - - hash_add(drv_info->notifier_hash, &cb_info->hnode, notify_id); + hash_add(drv_info->notifier_hash, &cb->hnode, notify_id); } else { hash_del(&cb_info->hnode); + kfree(cb_info); } return 0; @@ -1300,20 +1289,19 @@ static int __ffa_notify_relinquish(struct ffa_device *dev, int notify_id, if (notify_id >= FFA_MAX_NOTIFICATIONS) return -EINVAL; - mutex_lock(&drv_info->notify_lock); + write_lock(&drv_info->notify_lock); - rc = update_notifier_cb(dev, notify_id, NULL, NULL, false, - is_framework); + rc = update_notifier_cb(dev, notify_id, NULL, is_framework); if (rc) { pr_err("Could not unregister notification callback\n"); - mutex_unlock(&drv_info->notify_lock); + write_unlock(&drv_info->notify_lock); return rc; } if (!is_framework) rc = ffa_notification_unbind(dev->vm_id, BIT(notify_id)); - mutex_unlock(&drv_info->notify_lock); + write_unlock(&drv_info->notify_lock); return rc; } @@ -1334,6 +1322,7 @@ static int __ffa_notify_request(struct ffa_device *dev, bool is_per_vcpu, { int rc; u32 flags = 0; + struct notifier_cb_info *cb_info = NULL; if (ffa_notifications_disabled()) return -EOPNOTSUPP; @@ -1341,28 +1330,40 @@ static int __ffa_notify_request(struct ffa_device *dev, bool is_per_vcpu, if (notify_id >= FFA_MAX_NOTIFICATIONS) return -EINVAL; - mutex_lock(&drv_info->notify_lock); + cb_info = kzalloc(sizeof(*cb_info), GFP_KERNEL); + if (!cb_info) + return -ENOMEM; + + cb_info->dev = dev; + cb_info->cb_data = cb_data; + if (is_framework) + cb_info->fwk_cb = cb; + else + cb_info->cb = cb; + + write_lock(&drv_info->notify_lock); if (!is_framework) { if (is_per_vcpu) flags = PER_VCPU_NOTIFICATION_FLAG; rc = ffa_notification_bind(dev->vm_id, BIT(notify_id), flags); - if (rc) { - mutex_unlock(&drv_info->notify_lock); - return rc; - } + if (rc) + goto out_unlock_free; } - rc = update_notifier_cb(dev, notify_id, cb, cb_data, true, - is_framework); + rc = update_notifier_cb(dev, notify_id, cb_info, is_framework); if (rc) { pr_err("Failed to register callback for %d - %d\n", notify_id, rc); if (!is_framework) ffa_notification_unbind(dev->vm_id, BIT(notify_id)); } - mutex_unlock(&drv_info->notify_lock); + +out_unlock_free: + write_unlock(&drv_info->notify_lock); + if (rc) + kfree(cb_info); return rc; } @@ -1406,9 +1407,9 @@ static void handle_notif_callbacks(u64 bitmap, enum notify_type type) if (!(bitmap & 1)) continue; - mutex_lock(&drv_info->notify_lock); + read_lock(&drv_info->notify_lock); cb_info = notifier_hnode_get_by_type(notify_id, type); - mutex_unlock(&drv_info->notify_lock); + read_unlock(&drv_info->notify_lock); if (cb_info && cb_info->cb) cb_info->cb(notify_id, cb_info->cb_data); @@ -1446,9 +1447,9 @@ static void handle_fwk_notif_callbacks(u32 bitmap) ffa_rx_release(); - mutex_lock(&drv_info->notify_lock); + read_lock(&drv_info->notify_lock); cb_info = notifier_hnode_get_by_vmid_uuid(notify_id, target, &uuid); - mutex_unlock(&drv_info->notify_lock); + read_unlock(&drv_info->notify_lock); if (cb_info && cb_info->fwk_cb) cb_info->fwk_cb(notify_id, cb_info->cb_data, buf); @@ -1973,7 +1974,7 @@ static void ffa_notifications_setup(void) goto cleanup; hash_init(drv_info->notifier_hash); - mutex_init(&drv_info->notify_lock); + rwlock_init(&drv_info->notify_lock); drv_info->notif_enabled = true; return; diff --git a/drivers/firmware/efi/libstub/zboot.lds b/drivers/firmware/efi/libstub/zboot.lds index c3a166675450..367907eb7d86 100644 --- a/drivers/firmware/efi/libstub/zboot.lds +++ b/drivers/firmware/efi/libstub/zboot.lds @@ -29,14 +29,12 @@ SECTIONS . = _etext; } -#ifdef CONFIG_EFI_SBAT .sbat : ALIGN(4096) { _sbat = .; *(.sbat) _esbat = ALIGN(4096); . = _esbat; } -#endif .data : ALIGN(4096) { _data = .; @@ -60,6 +58,6 @@ SECTIONS PROVIDE(__efistub__gzdata_size = ABSOLUTE(__efistub__gzdata_end - __efistub__gzdata_start)); -PROVIDE(__data_rawsize = ABSOLUTE(_edata - _etext)); -PROVIDE(__data_size = ABSOLUTE(_end - _etext)); +PROVIDE(__data_rawsize = ABSOLUTE(_edata - _data)); +PROVIDE(__data_size = ABSOLUTE(_end - _data)); PROVIDE(__sbat_size = ABSOLUTE(_esbat - _sbat)); diff --git a/drivers/firmware/samsung/exynos-acpm.c b/drivers/firmware/samsung/exynos-acpm.c index e02f14f4bd7c..3a69fe3234c7 100644 --- a/drivers/firmware/samsung/exynos-acpm.c +++ b/drivers/firmware/samsung/exynos-acpm.c @@ -430,6 +430,9 @@ int acpm_do_xfer(const struct acpm_handle *handle, const struct acpm_xfer *xfer) return -EOPNOTSUPP; } + msg.chan_id = xfer->acpm_chan_id; + msg.chan_type = EXYNOS_MBOX_CHAN_TYPE_DOORBELL; + scoped_guard(mutex, &achan->tx_lock) { tx_front = readl(achan->tx.front); idx = (tx_front + 1) % achan->qlen; @@ -446,25 +449,15 @@ int acpm_do_xfer(const struct acpm_handle *handle, const struct acpm_xfer *xfer) /* Advance TX front. */ writel(idx, achan->tx.front); - } - msg.chan_id = xfer->acpm_chan_id; - msg.chan_type = EXYNOS_MBOX_CHAN_TYPE_DOORBELL; - ret = mbox_send_message(achan->chan, (void *)&msg); - if (ret < 0) - return ret; - - ret = acpm_wait_for_message_response(achan, xfer); + ret = mbox_send_message(achan->chan, (void *)&msg); + if (ret < 0) + return ret; - /* - * NOTE: we might prefer not to need the mailbox ticker to manage the - * transfer queueing since the protocol layer queues things by itself. - * Unfortunately, we have to kick the mailbox framework after we have - * received our message. - */ - mbox_client_txdone(achan->chan, ret); + mbox_client_txdone(achan->chan, 0); + } - return ret; + return acpm_wait_for_message_response(achan, xfer); } /** diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index 73ba73b31cb1..37ab78243fab 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -708,7 +708,7 @@ struct gpio_desc *of_find_gpio(struct device_node *np, const char *con_id, unsigned int idx, unsigned long *flags) { char propname[32]; /* 32 is max size of property name */ - enum of_gpio_flags of_flags; + enum of_gpio_flags of_flags = 0; const of_find_gpio_quirk *q; struct gpio_desc *desc; diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index fdafa0df1b43..3a3eca5b4c40 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -3297,14 +3297,15 @@ static int gpiod_get_raw_value_commit(const struct gpio_desc *desc) static int gpio_chip_get_multiple(struct gpio_chip *gc, unsigned long *mask, unsigned long *bits) { - int ret; - lockdep_assert_held(&gc->gpiodev->srcu); if (gc->get_multiple) { + int ret; + ret = gc->get_multiple(gc, mask, bits); if (ret > 0) return -EBADE; + return ret; } if (gc->get) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index ca4a6b82817f..df77558e03ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -561,6 +561,13 @@ static uint32_t read_vmid_from_vmfault_reg(struct amdgpu_device *adev) return REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID); } +static uint32_t kgd_hqd_sdma_get_doorbell(struct amdgpu_device *adev, + int engine, int queue) + +{ + return 0; +} + const struct kfd2kgd_calls gfx_v7_kfd2kgd = { .program_sh_mem_settings = kgd_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, @@ -578,4 +585,5 @@ const struct kfd2kgd_calls gfx_v7_kfd2kgd = { .set_scratch_backing_va = set_scratch_backing_va, .set_vm_context_page_table_base = set_vm_context_page_table_base, .read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg, + .hqd_sdma_get_doorbell = kgd_hqd_sdma_get_doorbell, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index 0f3e2944edd7..e68c0fa8d751 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -582,6 +582,13 @@ static void set_vm_context_page_table_base(struct amdgpu_device *adev, lower_32_bits(page_table_base)); } +static uint32_t kgd_hqd_sdma_get_doorbell(struct amdgpu_device *adev, + int engine, int queue) + +{ + return 0; +} + const struct kfd2kgd_calls gfx_v8_kfd2kgd = { .program_sh_mem_settings = kgd_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, @@ -599,4 +606,5 @@ const struct kfd2kgd_calls gfx_v8_kfd2kgd = { get_atc_vmid_pasid_mapping_info, .set_scratch_backing_va = set_scratch_backing_va, .set_vm_context_page_table_base = set_vm_context_page_table_base, + .hqd_sdma_get_doorbell = kgd_hqd_sdma_get_doorbell, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 85567d0d9545..f5d5c45ddc0d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -944,6 +944,7 @@ static void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr) drm_sched_entity_fini(entity); } } + kref_put(&ctx->refcount, amdgpu_ctx_fini); } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index a0e9bf9b2710..81b3443c8d7f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -321,10 +321,12 @@ static int amdgpu_discovery_read_binary_from_file(struct amdgpu_device *adev, const struct firmware *fw; int r; - r = request_firmware(&fw, fw_name, adev->dev); + r = firmware_request_nowarn(&fw, fw_name, adev->dev); if (r) { - dev_err(adev->dev, "can't load firmware \"%s\"\n", - fw_name); + if (amdgpu_discovery == 2) + dev_err(adev->dev, "can't load firmware \"%s\"\n", fw_name); + else + drm_info(&adev->ddev, "Optional firmware \"%s\" was not found\n", fw_name); return r; } @@ -459,16 +461,12 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev) /* Read from file if it is the preferred option */ fw_name = amdgpu_discovery_get_fw_name(adev); if (fw_name != NULL) { - dev_info(adev->dev, "use ip discovery information from file"); + drm_dbg(&adev->ddev, "use ip discovery information from file"); r = amdgpu_discovery_read_binary_from_file(adev, adev->mman.discovery_bin, fw_name); - - if (r) { - dev_err(adev->dev, "failed to read ip discovery binary from file\n"); - r = -EINVAL; + if (r) goto out; - } - } else { + drm_dbg(&adev->ddev, "use ip discovery information from memory"); r = amdgpu_discovery_read_binary_from_mem( adev, adev->mman.discovery_bin); if (r) @@ -1338,10 +1336,8 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) int r; r = amdgpu_discovery_init(adev); - if (r) { - DRM_ERROR("amdgpu_discovery_init failed\n"); + if (r) return r; - } wafl_ver = 0; adev->gfx.xcc_mask = 0; @@ -2579,8 +2575,10 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) break; default: r = amdgpu_discovery_reg_base_init(adev); - if (r) - return -EINVAL; + if (r) { + drm_err(&adev->ddev, "discovery failed: %d\n", r); + return r; + } amdgpu_discovery_harvest_ip(adev); amdgpu_discovery_get_gfx_info(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index d377a7c57d5e..ad9be3656653 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -2235,6 +2235,25 @@ static int gfx_v9_0_sw_init(struct amdgpu_ip_block *ip_block) } switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(9, 0, 1): + case IP_VERSION(9, 2, 1): + case IP_VERSION(9, 4, 0): + case IP_VERSION(9, 2, 2): + case IP_VERSION(9, 1, 0): + case IP_VERSION(9, 3, 0): + adev->gfx.cleaner_shader_ptr = gfx_9_4_2_cleaner_shader_hex; + adev->gfx.cleaner_shader_size = sizeof(gfx_9_4_2_cleaner_shader_hex); + if (adev->gfx.me_fw_version >= 167 && + adev->gfx.pfp_fw_version >= 196 && + adev->gfx.mec_fw_version >= 474) { + adev->gfx.enable_cleaner_shader = true; + r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); + if (r) { + adev->gfx.enable_cleaner_shader = false; + dev_err(adev->dev, "Failed to initialize cleaner shader\n"); + } + } + break; case IP_VERSION(9, 4, 2): adev->gfx.cleaner_shader_ptr = gfx_9_4_2_cleaner_shader_hex; adev->gfx.cleaner_shader_size = sizeof(gfx_9_4_2_cleaner_shader_hex); diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index c9eba537de09..28eb846280dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -1630,10 +1630,12 @@ static int mes_v11_0_hw_init(struct amdgpu_ip_block *ip_block) if (r) goto failure; - r = mes_v11_0_set_hw_resources_1(&adev->mes); - if (r) { - DRM_ERROR("failed mes_v11_0_set_hw_resources_1, r=%d\n", r); - goto failure; + if ((adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x50) { + r = mes_v11_0_set_hw_resources_1(&adev->mes); + if (r) { + DRM_ERROR("failed mes_v11_0_set_hw_resources_1, r=%d\n", r); + goto failure; + } } r = mes_v11_0_query_sched_status(&adev->mes); diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index b4f17332d466..6b222630f3fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -1742,7 +1742,8 @@ static int mes_v12_0_hw_init(struct amdgpu_ip_block *ip_block) if (r) goto failure; - mes_v12_0_set_hw_resources_1(&adev->mes, AMDGPU_MES_SCHED_PIPE); + if ((adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x4b) + mes_v12_0_set_hw_resources_1(&adev->mes, AMDGPU_MES_SCHED_PIPE); mes_v12_0_init_aggregated_doorbell(&adev->mes); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index cef68df4c663..bb82c652e4c0 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -45,6 +45,7 @@ #include "amdgpu_ras.h" MODULE_FIRMWARE("amdgpu/sdma_4_4_2.bin"); +MODULE_FIRMWARE("amdgpu/sdma_4_4_4.bin"); MODULE_FIRMWARE("amdgpu/sdma_4_4_5.bin"); static const struct amdgpu_hwip_reg_entry sdma_reg_list_4_4_2[] = { diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index 1813c3ed0aa6..37f4b5b4a098 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -1543,8 +1543,13 @@ static int sdma_v5_0_reset_queue(struct amdgpu_ring *ring, unsigned int vmid) { struct amdgpu_device *adev = ring->adev; u32 inst_id = ring->me; + int r; + + amdgpu_amdkfd_suspend(adev, true); + r = amdgpu_sdma_reset_engine(adev, inst_id); + amdgpu_amdkfd_resume(adev, true); - return amdgpu_sdma_reset_engine(adev, inst_id); + return r; } static int sdma_v5_0_stop_queue(struct amdgpu_ring *ring) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 23f97da62808..0b40411b92a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -1456,8 +1456,13 @@ static int sdma_v5_2_reset_queue(struct amdgpu_ring *ring, unsigned int vmid) { struct amdgpu_device *adev = ring->adev; u32 inst_id = ring->me; + int r; + + amdgpu_amdkfd_suspend(adev, true); + r = amdgpu_sdma_reset_engine(adev, inst_id); + amdgpu_amdkfd_resume(adev, true); - return amdgpu_sdma_reset_engine(adev, inst_id); + return r; } static int sdma_v5_2_stop_queue(struct amdgpu_ring *ring) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index 5a70ae17be04..a9bdf8d61d6c 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -1374,9 +1374,22 @@ static int sdma_v6_0_sw_init(struct amdgpu_ip_block *ip_block) else DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n"); - /* add firmware version checks here */ - if (0 && !adev->sdma.disable_uq) - adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; + switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) { + case IP_VERSION(6, 0, 0): + if ((adev->sdma.instance[0].fw_version >= 24) && !adev->sdma.disable_uq) + adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; + break; + case IP_VERSION(6, 0, 2): + if ((adev->sdma.instance[0].fw_version >= 21) && !adev->sdma.disable_uq) + adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; + break; + case IP_VERSION(6, 0, 3): + if ((adev->sdma.instance[0].fw_version >= 25) && !adev->sdma.disable_uq) + adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; + break; + default: + break; + } r = amdgpu_sdma_sysfs_reset_mask_init(adev); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c index ad47d0bdf777..86903eccbd4e 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c @@ -1349,9 +1349,15 @@ static int sdma_v7_0_sw_init(struct amdgpu_ip_block *ip_block) else DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n"); - /* add firmware version checks here */ - if (0 && !adev->sdma.disable_uq) - adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; + switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) { + case IP_VERSION(7, 0, 0): + case IP_VERSION(7, 0, 1): + if ((adev->sdma.instance[0].fw_version >= 7836028) && !adev->sdma.disable_uq) + adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; + break; + default: + break; + } return r; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 865dca2547de..a0f22ea6d15a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1171,13 +1171,12 @@ svm_range_split_head(struct svm_range *prange, uint64_t new_start, } static void -svm_range_add_child(struct svm_range *prange, struct mm_struct *mm, - struct svm_range *pchild, enum svm_work_list_ops op) +svm_range_add_child(struct svm_range *prange, struct svm_range *pchild, enum svm_work_list_ops op) { pr_debug("add child 0x%p [0x%lx 0x%lx] to prange 0x%p child list %d\n", pchild, pchild->start, pchild->last, prange, op); - pchild->work_item.mm = mm; + pchild->work_item.mm = NULL; pchild->work_item.op = op; list_add_tail(&pchild->child_list, &prange->child_list); } @@ -1278,7 +1277,7 @@ svm_range_get_pte_flags(struct kfd_node *node, mapping_flags |= ext_coherent ? AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; /* system memory accessed by the dGPU */ } else { - if (gc_ip_version < IP_VERSION(9, 5, 0)) + if (gc_ip_version < IP_VERSION(9, 5, 0) || ext_coherent) mapping_flags |= AMDGPU_VM_MTYPE_UC; else mapping_flags |= AMDGPU_VM_MTYPE_NC; @@ -2394,15 +2393,17 @@ svm_range_add_list_work(struct svm_range_list *svms, struct svm_range *prange, prange->work_item.op != SVM_OP_UNMAP_RANGE) prange->work_item.op = op; } else { - prange->work_item.op = op; - - /* Pairs with mmput in deferred_list_work */ - mmget(mm); - prange->work_item.mm = mm; - list_add_tail(&prange->deferred_list, - &prange->svms->deferred_range_list); - pr_debug("add prange 0x%p [0x%lx 0x%lx] to work list op %d\n", - prange, prange->start, prange->last, op); + /* Pairs with mmput in deferred_list_work. + * If process is exiting and mm is gone, don't update mmu notifier. + */ + if (mmget_not_zero(mm)) { + prange->work_item.mm = mm; + prange->work_item.op = op; + list_add_tail(&prange->deferred_list, + &prange->svms->deferred_range_list); + pr_debug("add prange 0x%p [0x%lx 0x%lx] to work list op %d\n", + prange, prange->start, prange->last, op); + } } spin_unlock(&svms->deferred_list_lock); } @@ -2416,8 +2417,7 @@ void schedule_deferred_list_work(struct svm_range_list *svms) } static void -svm_range_unmap_split(struct mm_struct *mm, struct svm_range *parent, - struct svm_range *prange, unsigned long start, +svm_range_unmap_split(struct svm_range *parent, struct svm_range *prange, unsigned long start, unsigned long last) { struct svm_range *head; @@ -2438,12 +2438,12 @@ svm_range_unmap_split(struct mm_struct *mm, struct svm_range *parent, svm_range_split(tail, last + 1, tail->last, &head); if (head != prange && tail != prange) { - svm_range_add_child(parent, mm, head, SVM_OP_UNMAP_RANGE); - svm_range_add_child(parent, mm, tail, SVM_OP_ADD_RANGE); + svm_range_add_child(parent, head, SVM_OP_UNMAP_RANGE); + svm_range_add_child(parent, tail, SVM_OP_ADD_RANGE); } else if (tail != prange) { - svm_range_add_child(parent, mm, tail, SVM_OP_UNMAP_RANGE); + svm_range_add_child(parent, tail, SVM_OP_UNMAP_RANGE); } else if (head != prange) { - svm_range_add_child(parent, mm, head, SVM_OP_UNMAP_RANGE); + svm_range_add_child(parent, head, SVM_OP_UNMAP_RANGE); } else if (parent != prange) { prange->work_item.op = SVM_OP_UNMAP_RANGE; } @@ -2520,14 +2520,14 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange, l = min(last, pchild->last); if (l >= s) svm_range_unmap_from_gpus(pchild, s, l, trigger); - svm_range_unmap_split(mm, prange, pchild, start, last); + svm_range_unmap_split(prange, pchild, start, last); mutex_unlock(&pchild->lock); } s = max(start, prange->start); l = min(last, prange->last); if (l >= s) svm_range_unmap_from_gpus(prange, s, l, trigger); - svm_range_unmap_split(mm, prange, prange, start, last); + svm_range_unmap_split(prange, prange, start, last); if (unmap_parent) svm_range_add_list_work(svms, prange, mm, SVM_OP_UNMAP_RANGE); @@ -2570,8 +2570,6 @@ svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni, if (range->event == MMU_NOTIFY_RELEASE) return true; - if (!mmget_not_zero(mni->mm)) - return true; start = mni->interval_tree.start; last = mni->interval_tree.last; @@ -2598,7 +2596,6 @@ svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni, } svm_range_unlock(prange); - mmput(mni->mm); return true; } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index bc4cd11bfc79..f58fa5da7fe5 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -3610,13 +3610,15 @@ static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector) luminance_range = &conn_base->display_info.luminance_range; - if (luminance_range->max_luminance) { - caps->aux_min_input_signal = luminance_range->min_luminance; + if (luminance_range->max_luminance) caps->aux_max_input_signal = luminance_range->max_luminance; - } else { - caps->aux_min_input_signal = 0; + else caps->aux_max_input_signal = 512; - } + + if (luminance_range->min_luminance) + caps->aux_min_input_signal = luminance_range->min_luminance; + else + caps->aux_min_input_signal = 1; min_input_signal_override = drm_get_panel_min_brightness_quirk(aconnector->drm_edid); if (min_input_signal_override >= 0) @@ -4718,16 +4720,16 @@ static int get_brightness_range(const struct amdgpu_dm_backlight_caps *caps, return 1; } -/* Rescale from [min..max] to [0..AMDGPU_MAX_BL_LEVEL] */ +/* Rescale from [min..max] to [0..MAX_BACKLIGHT_LEVEL] */ static inline u32 scale_input_to_fw(int min, int max, u64 input) { - return DIV_ROUND_CLOSEST_ULL(input * AMDGPU_MAX_BL_LEVEL, max - min); + return DIV_ROUND_CLOSEST_ULL(input * MAX_BACKLIGHT_LEVEL, max - min); } -/* Rescale from [0..AMDGPU_MAX_BL_LEVEL] to [min..max] */ +/* Rescale from [0..MAX_BACKLIGHT_LEVEL] to [min..max] */ static inline u32 scale_fw_to_input(int min, int max, u64 input) { - return min + DIV_ROUND_CLOSEST_ULL(input * (max - min), AMDGPU_MAX_BL_LEVEL); + return min + DIV_ROUND_CLOSEST_ULL(input * (max - min), MAX_BACKLIGHT_LEVEL); } static void convert_custom_brightness(const struct amdgpu_dm_backlight_caps *caps, @@ -4947,7 +4949,7 @@ amdgpu_dm_register_backlight_device(struct amdgpu_dm_connector *aconnector) drm_dbg(drm, "Backlight caps: min: %d, max: %d, ac %d, dc %d\n", min, max, caps->ac_level, caps->dc_level); } else - props.brightness = props.max_brightness = AMDGPU_MAX_BL_LEVEL; + props.brightness = props.max_brightness = MAX_BACKLIGHT_LEVEL; if (caps->data_points && !(amdgpu_dc_debug_mask & DC_DISABLE_CUSTOM_BRIGHTNESS_CURVE)) drm_info(drm, "Using custom brightness curve\n"); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index d4395b92fb85..9e3e51a2dc49 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -1029,6 +1029,10 @@ enum dc_edid_status dm_helpers_read_local_edid( return EDID_NO_RESPONSE; edid = drm_edid_raw(drm_edid); // FIXME: Get rid of drm_edid_raw() + if (!edid || + edid->extensions >= sizeof(sink->dc_edid.raw_edid) / EDID_LENGTH) + return EDID_BAD_INPUT; + sink->dc_edid.length = EDID_LENGTH * (edid->extensions + 1); memmove(sink->dc_edid.raw_edid, (uint8_t *)edid, sink->dc_edid.length); diff --git a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h index d562ddeca512..c9f6c6275ca1 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h @@ -974,6 +974,7 @@ struct dc_crtc_timing { uint32_t pix_clk_100hz; uint32_t min_refresh_in_uhz; + uint32_t max_refresh_in_uhz; uint32_t vic; uint32_t hdmi_vic; diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c index 3ba9b62ba70b..250f09922d2f 100644 --- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c +++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c @@ -155,6 +155,14 @@ unsigned int mod_freesync_calc_v_total_from_refresh( v_total = div64_u64(div64_u64(((unsigned long long)( frame_duration_in_ns) * (stream->timing.pix_clk_100hz / 10)), stream->timing.h_total), 1000000); + } else if (refresh_in_uhz >= stream->timing.max_refresh_in_uhz) { + /* When the target refresh rate is the maximum panel refresh rate + * round up the vtotal value to prevent off-by-one error causing + * v_total_min to be below the panel's lower bound + */ + v_total = div64_u64(div64_u64(((unsigned long long)( + frame_duration_in_ns) * (stream->timing.pix_clk_100hz / 10)), + stream->timing.h_total) + (1000000 - 1), 1000000); } else { v_total = div64_u64(div64_u64(((unsigned long long)( frame_duration_in_ns) * (stream->timing.pix_clk_100hz / 10)), diff --git a/drivers/gpu/drm/bridge/aux-hpd-bridge.c b/drivers/gpu/drm/bridge/aux-hpd-bridge.c index b3f588b71a7d..af6f79793407 100644 --- a/drivers/gpu/drm/bridge/aux-hpd-bridge.c +++ b/drivers/gpu/drm/bridge/aux-hpd-bridge.c @@ -64,10 +64,11 @@ struct auxiliary_device *devm_drm_dp_hpd_bridge_alloc(struct device *parent, str adev->id = ret; adev->name = "dp_hpd_bridge"; adev->dev.parent = parent; - adev->dev.of_node = of_node_get(parent->of_node); adev->dev.release = drm_aux_hpd_bridge_release; adev->dev.platform_data = of_node_get(np); + device_set_of_node_from_dev(&adev->dev, parent); + ret = auxiliary_device_init(adev); if (ret) { of_node_put(adev->dev.platform_data); diff --git a/drivers/gpu/drm/bridge/panel.c b/drivers/gpu/drm/bridge/panel.c index 79b009ab9396..29b0358a7b6d 100644 --- a/drivers/gpu/drm/bridge/panel.c +++ b/drivers/gpu/drm/bridge/panel.c @@ -299,6 +299,7 @@ struct drm_bridge *drm_panel_bridge_add_typed(struct drm_panel *panel, panel_bridge->bridge.of_node = panel->dev->of_node; panel_bridge->bridge.ops = DRM_BRIDGE_OP_MODES; panel_bridge->bridge.type = connector_type; + panel_bridge->bridge.pre_enable_prev_first = panel->prepare_prev_first; drm_bridge_add(&panel_bridge->bridge); @@ -413,8 +414,6 @@ struct drm_bridge *devm_drm_panel_bridge_add_typed(struct device *dev, return bridge; } - bridge->pre_enable_prev_first = panel->prepare_prev_first; - *ptr = bridge; devres_add(dev, ptr); @@ -456,8 +455,6 @@ struct drm_bridge *drmm_panel_bridge_add(struct drm_device *drm, if (ret) return ERR_PTR(ret); - bridge->pre_enable_prev_first = panel->prepare_prev_first; - return bridge; } EXPORT_SYMBOL(drmm_panel_bridge_add); diff --git a/drivers/gpu/drm/bridge/samsung-dsim.c b/drivers/gpu/drm/bridge/samsung-dsim.c index 0014c497e3fe..bccc88d25948 100644 --- a/drivers/gpu/drm/bridge/samsung-dsim.c +++ b/drivers/gpu/drm/bridge/samsung-dsim.c @@ -1095,7 +1095,7 @@ static void samsung_dsim_send_to_fifo(struct samsung_dsim *dsi, bool first = !xfer->tx_done; u32 reg; - dev_dbg(dev, "< xfer %pK: tx len %u, done %u, rx len %u, done %u\n", + dev_dbg(dev, "< xfer %p: tx len %u, done %u, rx len %u, done %u\n", xfer, length, xfer->tx_done, xfer->rx_len, xfer->rx_done); if (length > DSI_TX_FIFO_SIZE) @@ -1293,7 +1293,7 @@ static bool samsung_dsim_transfer_finish(struct samsung_dsim *dsi) spin_unlock_irqrestore(&dsi->transfer_lock, flags); dev_dbg(dsi->dev, - "> xfer %pK, tx_len %zu, tx_done %u, rx_len %u, rx_done %u\n", + "> xfer %p, tx_len %zu, tx_done %u, rx_len %u, rx_done %u\n", xfer, xfer->packet.payload_length, xfer->tx_done, xfer->rx_len, xfer->rx_done); diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c b/drivers/gpu/drm/bridge/ti-sn65dsi86.c index 60224f476e1d..de9c23537465 100644 --- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c +++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c @@ -348,12 +348,18 @@ static void ti_sn65dsi86_enable_comms(struct ti_sn65dsi86 *pdata, * 200 ms. We'll assume that the panel driver will have the hardcoded * delay in its prepare and always disable HPD. * - * If HPD somehow makes sense on some future panel we'll have to - * change this to be conditional on someone specifying that HPD should - * be used. + * For DisplayPort bridge type, we need HPD. So we use the bridge type + * to conditionally disable HPD. + * NOTE: The bridge type is set in ti_sn_bridge_probe() but enable_comms() + * can be called before. So for DisplayPort, HPD will be enabled once + * bridge type is set. We are using bridge type instead of "no-hpd" + * property because it is not used properly in devicetree description + * and hence is unreliable. */ - regmap_update_bits(pdata->regmap, SN_HPD_DISABLE_REG, HPD_DISABLE, - HPD_DISABLE); + + if (pdata->bridge.type != DRM_MODE_CONNECTOR_DisplayPort) + regmap_update_bits(pdata->regmap, SN_HPD_DISABLE_REG, HPD_DISABLE, + HPD_DISABLE); pdata->comms_enabled = true; @@ -1195,9 +1201,14 @@ static enum drm_connector_status ti_sn_bridge_detect(struct drm_bridge *bridge) struct ti_sn65dsi86 *pdata = bridge_to_ti_sn65dsi86(bridge); int val = 0; - pm_runtime_get_sync(pdata->dev); + /* + * Runtime reference is grabbed in ti_sn_bridge_hpd_enable() + * as the chip won't report HPD just after being powered on. + * HPD_DEBOUNCED_STATE reflects correct state only after the + * debounce time (~100-400 ms). + */ + regmap_read(pdata->regmap, SN_HPD_DISABLE_REG, &val); - pm_runtime_put_autosuspend(pdata->dev); return val & HPD_DEBOUNCED_STATE ? connector_status_connected : connector_status_disconnected; @@ -1220,6 +1231,26 @@ static void ti_sn65dsi86_debugfs_init(struct drm_bridge *bridge, struct dentry * debugfs_create_file("status", 0600, debugfs, pdata, &status_fops); } +static void ti_sn_bridge_hpd_enable(struct drm_bridge *bridge) +{ + struct ti_sn65dsi86 *pdata = bridge_to_ti_sn65dsi86(bridge); + + /* + * Device needs to be powered on before reading the HPD state + * for reliable hpd detection in ti_sn_bridge_detect() due to + * the high debounce time. + */ + + pm_runtime_get_sync(pdata->dev); +} + +static void ti_sn_bridge_hpd_disable(struct drm_bridge *bridge) +{ + struct ti_sn65dsi86 *pdata = bridge_to_ti_sn65dsi86(bridge); + + pm_runtime_put_autosuspend(pdata->dev); +} + static const struct drm_bridge_funcs ti_sn_bridge_funcs = { .attach = ti_sn_bridge_attach, .detach = ti_sn_bridge_detach, @@ -1234,6 +1265,8 @@ static const struct drm_bridge_funcs ti_sn_bridge_funcs = { .atomic_duplicate_state = drm_atomic_helper_bridge_duplicate_state, .atomic_destroy_state = drm_atomic_helper_bridge_destroy_state, .debugfs_init = ti_sn65dsi86_debugfs_init, + .hpd_enable = ti_sn_bridge_hpd_enable, + .hpd_disable = ti_sn_bridge_hpd_disable, }; static void ti_sn_bridge_parse_lanes(struct ti_sn65dsi86 *pdata, @@ -1321,8 +1354,26 @@ static int ti_sn_bridge_probe(struct auxiliary_device *adev, pdata->bridge.type = pdata->next_bridge->type == DRM_MODE_CONNECTOR_DisplayPort ? DRM_MODE_CONNECTOR_DisplayPort : DRM_MODE_CONNECTOR_eDP; - if (pdata->bridge.type == DRM_MODE_CONNECTOR_DisplayPort) - pdata->bridge.ops = DRM_BRIDGE_OP_EDID | DRM_BRIDGE_OP_DETECT; + if (pdata->bridge.type == DRM_MODE_CONNECTOR_DisplayPort) { + pdata->bridge.ops = DRM_BRIDGE_OP_EDID | DRM_BRIDGE_OP_DETECT | + DRM_BRIDGE_OP_HPD; + /* + * If comms were already enabled they would have been enabled + * with the wrong value of HPD_DISABLE. Update it now. Comms + * could be enabled if anyone is holding a pm_runtime reference + * (like if a GPIO is in use). Note that in most cases nobody + * is doing AUX channel xfers before the bridge is added so + * HPD doesn't _really_ matter then. The only exception is in + * the eDP case where the panel wants to read the EDID before + * the bridge is added. We always consistently have HPD disabled + * for eDP. + */ + mutex_lock(&pdata->comms_mutex); + if (pdata->comms_enabled) + regmap_update_bits(pdata->regmap, SN_HPD_DISABLE_REG, + HPD_DISABLE, 0); + mutex_unlock(&pdata->comms_mutex); + }; drm_bridge_add(&pdata->bridge); diff --git a/drivers/gpu/drm/display/drm_bridge_connector.c b/drivers/gpu/drm/display/drm_bridge_connector.c index 7d2e499ea5de..262e93e07a28 100644 --- a/drivers/gpu/drm/display/drm_bridge_connector.c +++ b/drivers/gpu/drm/display/drm_bridge_connector.c @@ -708,11 +708,14 @@ struct drm_connector *drm_bridge_connector_init(struct drm_device *drm, if (bridge_connector->bridge_hdmi_audio || bridge_connector->bridge_dp_audio) { struct device *dev; + struct drm_bridge *bridge; if (bridge_connector->bridge_hdmi_audio) - dev = bridge_connector->bridge_hdmi_audio->hdmi_audio_dev; + bridge = bridge_connector->bridge_hdmi_audio; else - dev = bridge_connector->bridge_dp_audio->hdmi_audio_dev; + bridge = bridge_connector->bridge_dp_audio; + + dev = bridge->hdmi_audio_dev; ret = drm_connector_hdmi_audio_init(connector, dev, &drm_bridge_connector_hdmi_audio_funcs, diff --git a/drivers/gpu/drm/display/drm_dp_helper.c b/drivers/gpu/drm/display/drm_dp_helper.c index f2a6559a2710..dc622c78db9d 100644 --- a/drivers/gpu/drm/display/drm_dp_helper.c +++ b/drivers/gpu/drm/display/drm_dp_helper.c @@ -725,7 +725,7 @@ ssize_t drm_dp_dpcd_read(struct drm_dp_aux *aux, unsigned int offset, * monitor doesn't power down exactly after the throw away read. */ if (!aux->is_remote) { - ret = drm_dp_dpcd_probe(aux, DP_DPCD_REV); + ret = drm_dp_dpcd_probe(aux, DP_LANE0_1_STATUS); if (ret < 0) return ret; } diff --git a/drivers/gpu/drm/drm_framebuffer.c b/drivers/gpu/drm/drm_framebuffer.c index b781601946db..63a70f285cce 100644 --- a/drivers/gpu/drm/drm_framebuffer.c +++ b/drivers/gpu/drm/drm_framebuffer.c @@ -862,11 +862,23 @@ EXPORT_SYMBOL_FOR_TESTS_ONLY(drm_framebuffer_free); int drm_framebuffer_init(struct drm_device *dev, struct drm_framebuffer *fb, const struct drm_framebuffer_funcs *funcs) { + unsigned int i; int ret; + bool exists; if (WARN_ON_ONCE(fb->dev != dev || !fb->format)) return -EINVAL; + for (i = 0; i < fb->format->num_planes; i++) { + if (drm_WARN_ON_ONCE(dev, fb->internal_flags & DRM_FRAMEBUFFER_HAS_HANDLE_REF(i))) + fb->internal_flags &= ~DRM_FRAMEBUFFER_HAS_HANDLE_REF(i); + if (fb->obj[i]) { + exists = drm_gem_object_handle_get_if_exists_unlocked(fb->obj[i]); + if (exists) + fb->internal_flags |= DRM_FRAMEBUFFER_HAS_HANDLE_REF(i); + } + } + INIT_LIST_HEAD(&fb->filp_head); fb->funcs = funcs; @@ -875,7 +887,7 @@ int drm_framebuffer_init(struct drm_device *dev, struct drm_framebuffer *fb, ret = __drm_mode_object_add(dev, &fb->base, DRM_MODE_OBJECT_FB, false, drm_framebuffer_free); if (ret) - goto out; + goto err; mutex_lock(&dev->mode_config.fb_lock); dev->mode_config.num_fb++; @@ -883,7 +895,16 @@ int drm_framebuffer_init(struct drm_device *dev, struct drm_framebuffer *fb, mutex_unlock(&dev->mode_config.fb_lock); drm_mode_object_register(dev, &fb->base); -out: + + return 0; + +err: + for (i = 0; i < fb->format->num_planes; i++) { + if (fb->internal_flags & DRM_FRAMEBUFFER_HAS_HANDLE_REF(i)) { + drm_gem_object_handle_put_unlocked(fb->obj[i]); + fb->internal_flags &= ~DRM_FRAMEBUFFER_HAS_HANDLE_REF(i); + } + } return ret; } EXPORT_SYMBOL(drm_framebuffer_init); @@ -960,6 +981,12 @@ EXPORT_SYMBOL(drm_framebuffer_unregister_private); void drm_framebuffer_cleanup(struct drm_framebuffer *fb) { struct drm_device *dev = fb->dev; + unsigned int i; + + for (i = 0; i < fb->format->num_planes; i++) { + if (fb->internal_flags & DRM_FRAMEBUFFER_HAS_HANDLE_REF(i)) + drm_gem_object_handle_put_unlocked(fb->obj[i]); + } mutex_lock(&dev->mode_config.fb_lock); list_del(&fb->head); diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 1e659d2660f7..ac0524595bd6 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -212,6 +212,46 @@ void drm_gem_private_object_fini(struct drm_gem_object *obj) } EXPORT_SYMBOL(drm_gem_private_object_fini); +static void drm_gem_object_handle_get(struct drm_gem_object *obj) +{ + struct drm_device *dev = obj->dev; + + drm_WARN_ON(dev, !mutex_is_locked(&dev->object_name_lock)); + + if (obj->handle_count++ == 0) + drm_gem_object_get(obj); +} + +/** + * drm_gem_object_handle_get_if_exists_unlocked - acquire reference on user-space handle, if any + * @obj: GEM object + * + * Acquires a reference on the GEM buffer object's handle. Required to keep + * the GEM object alive. Call drm_gem_object_handle_put_if_exists_unlocked() + * to release the reference. Does nothing if the buffer object has no handle. + * + * Returns: + * True if a handle exists, or false otherwise + */ +bool drm_gem_object_handle_get_if_exists_unlocked(struct drm_gem_object *obj) +{ + struct drm_device *dev = obj->dev; + + guard(mutex)(&dev->object_name_lock); + + /* + * First ref taken during GEM object creation, if any. Some + * drivers set up internal framebuffers with GEM objects that + * do not have a GEM handle. Hence, this counter can be zero. + */ + if (!obj->handle_count) + return false; + + drm_gem_object_handle_get(obj); + + return true; +} + /** * drm_gem_object_handle_free - release resources bound to userspace handles * @obj: GEM object to clean up. @@ -242,20 +282,26 @@ static void drm_gem_object_exported_dma_buf_free(struct drm_gem_object *obj) } } -static void -drm_gem_object_handle_put_unlocked(struct drm_gem_object *obj) +/** + * drm_gem_object_handle_put_unlocked - releases reference on user-space handle + * @obj: GEM object + * + * Releases a reference on the GEM buffer object's handle. Possibly releases + * the GEM buffer object and associated dma-buf objects. + */ +void drm_gem_object_handle_put_unlocked(struct drm_gem_object *obj) { struct drm_device *dev = obj->dev; bool final = false; - if (WARN_ON(READ_ONCE(obj->handle_count) == 0)) + if (drm_WARN_ON(dev, READ_ONCE(obj->handle_count) == 0)) return; /* - * Must bump handle count first as this may be the last - * ref, in which case the object would disappear before we - * checked for a name - */ + * Must bump handle count first as this may be the last + * ref, in which case the object would disappear before + * we checked for a name. + */ mutex_lock(&dev->object_name_lock); if (--obj->handle_count == 0) { @@ -279,6 +325,9 @@ drm_gem_object_release_handle(int id, void *ptr, void *data) struct drm_file *file_priv = data; struct drm_gem_object *obj = ptr; + if (drm_WARN_ON(obj->dev, !data)) + return 0; + if (obj->funcs->close) obj->funcs->close(obj, file_priv); @@ -389,8 +438,8 @@ drm_gem_handle_create_tail(struct drm_file *file_priv, int ret; WARN_ON(!mutex_is_locked(&dev->object_name_lock)); - if (obj->handle_count++ == 0) - drm_gem_object_get(obj); + + drm_gem_object_handle_get(obj); /* * Get the user-visible handle using idr. Preload and perform @@ -399,7 +448,7 @@ drm_gem_handle_create_tail(struct drm_file *file_priv, idr_preload(GFP_KERNEL); spin_lock(&file_priv->table_lock); - ret = idr_alloc(&file_priv->object_idr, obj, 1, 0, GFP_NOWAIT); + ret = idr_alloc(&file_priv->object_idr, NULL, 1, 0, GFP_NOWAIT); spin_unlock(&file_priv->table_lock); idr_preload_end(); @@ -420,6 +469,11 @@ drm_gem_handle_create_tail(struct drm_file *file_priv, goto err_revoke; } + /* mirrors drm_gem_handle_delete to avoid races */ + spin_lock(&file_priv->table_lock); + obj = idr_replace(&file_priv->object_idr, obj, handle); + WARN_ON(obj != NULL); + spin_unlock(&file_priv->table_lock); *handlep = handle; return 0; diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h index e44f28fd81d3..60c282881958 100644 --- a/drivers/gpu/drm/drm_internal.h +++ b/drivers/gpu/drm/drm_internal.h @@ -161,6 +161,8 @@ void drm_sysfs_lease_event(struct drm_device *dev); /* drm_gem.c */ int drm_gem_init(struct drm_device *dev); +bool drm_gem_object_handle_get_if_exists_unlocked(struct drm_gem_object *obj); +void drm_gem_object_handle_put_unlocked(struct drm_gem_object *obj); int drm_gem_handle_create_tail(struct drm_file *file_priv, struct drm_gem_object *obj, u32 *handlep); diff --git a/drivers/gpu/drm/drm_mipi_dsi.c b/drivers/gpu/drm/drm_mipi_dsi.c index e5184a0c2465..21fd647f8ce1 100644 --- a/drivers/gpu/drm/drm_mipi_dsi.c +++ b/drivers/gpu/drm/drm_mipi_dsi.c @@ -91,12 +91,13 @@ static const struct dev_pm_ops mipi_dsi_device_pm_ops = { .restore = pm_generic_restore, }; -static const struct bus_type mipi_dsi_bus_type = { +const struct bus_type mipi_dsi_bus_type = { .name = "mipi-dsi", .match = mipi_dsi_device_match, .uevent = mipi_dsi_uevent, .pm = &mipi_dsi_device_pm_ops, }; +EXPORT_SYMBOL_GPL(mipi_dsi_bus_type); /** * of_find_mipi_dsi_device_by_node() - find the MIPI DSI device matching a diff --git a/drivers/gpu/drm/drm_panic_qr.rs b/drivers/gpu/drm/drm_panic_qr.rs index dd55b1cb764d..18492daae4b3 100644 --- a/drivers/gpu/drm/drm_panic_qr.rs +++ b/drivers/gpu/drm/drm_panic_qr.rs @@ -27,7 +27,7 @@ //! * <https://github.com/erwanvivien/fast_qr> //! * <https://github.com/bjguillot/qr> -use kernel::{prelude::*, str::CStr}; +use kernel::prelude::*; #[derive(Debug, Clone, Copy, PartialEq, Eq, Ord, PartialOrd)] struct Version(usize); diff --git a/drivers/gpu/drm/drm_writeback.c b/drivers/gpu/drm/drm_writeback.c index edbeab88ff2b..d983ee85cf13 100644 --- a/drivers/gpu/drm/drm_writeback.c +++ b/drivers/gpu/drm/drm_writeback.c @@ -343,17 +343,18 @@ EXPORT_SYMBOL(drm_writeback_connector_init_with_encoder); /** * drm_writeback_connector_cleanup - Cleanup the writeback connector * @dev: DRM device - * @wb_connector: Pointer to the writeback connector to clean up + * @data: Pointer to the writeback connector to clean up * * This will decrement the reference counter of blobs and destroy properties. It * will also clean the remaining jobs in this writeback connector. Caution: This helper will not * clean up the attached encoder and the drm_connector. */ static void drm_writeback_connector_cleanup(struct drm_device *dev, - struct drm_writeback_connector *wb_connector) + void *data) { unsigned long flags; struct drm_writeback_job *pos, *n; + struct drm_writeback_connector *wb_connector = data; delete_writeback_properties(dev); drm_property_blob_put(wb_connector->pixel_formats_blob_ptr); @@ -405,7 +406,7 @@ int drmm_writeback_connector_init(struct drm_device *dev, if (ret) return ret; - ret = drmm_add_action_or_reset(dev, (void *)drm_writeback_connector_cleanup, + ret = drmm_add_action_or_reset(dev, drm_writeback_connector_cleanup, wb_connector); if (ret) return ret; diff --git a/drivers/gpu/drm/exynos/exynos7_drm_decon.c b/drivers/gpu/drm/exynos/exynos7_drm_decon.c index f91daefa9d2b..805aa28c1723 100644 --- a/drivers/gpu/drm/exynos/exynos7_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos7_drm_decon.c @@ -636,6 +636,10 @@ static irqreturn_t decon_irq_handler(int irq, void *dev_id) if (!ctx->drm_dev) goto out; + /* check if crtc and vblank have been initialized properly */ + if (!drm_dev_has_vblank(ctx->drm_dev)) + goto out; + if (!ctx->i80_if) { drm_crtc_handle_vblank(&ctx->crtc->base); diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c index c394cc702d7d..205c238cc73a 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c @@ -187,6 +187,7 @@ struct fimd_context { u32 i80ifcon; bool i80_if; bool suspended; + bool dp_clk_enabled; wait_queue_head_t wait_vsync_queue; atomic_t wait_vsync_event; atomic_t win_updated; @@ -1047,7 +1048,18 @@ static void fimd_dp_clock_enable(struct exynos_drm_clk *clk, bool enable) struct fimd_context *ctx = container_of(clk, struct fimd_context, dp_clk); u32 val = enable ? DP_MIE_CLK_DP_ENABLE : DP_MIE_CLK_DISABLE; + + if (enable == ctx->dp_clk_enabled) + return; + + if (enable) + pm_runtime_resume_and_get(ctx->dev); + + ctx->dp_clk_enabled = enable; writel(val, ctx->regs + DP_MIE_CLKCON); + + if (!enable) + pm_runtime_put(ctx->dev); } static const struct exynos_drm_crtc_ops fimd_crtc_ops = { diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c index 4787fee4696f..d44401a695e2 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gem.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c @@ -174,7 +174,7 @@ static struct exynos_drm_gem *exynos_drm_gem_init(struct drm_device *dev, return ERR_PTR(ret); } - DRM_DEV_DEBUG_KMS(dev->dev, "created file object = %pK\n", obj->filp); + DRM_DEV_DEBUG_KMS(dev->dev, "created file object = %p\n", obj->filp); return exynos_gem; } diff --git a/drivers/gpu/drm/exynos/exynos_drm_ipp.c b/drivers/gpu/drm/exynos/exynos_drm_ipp.c index ea9f66037600..03c8490af4f4 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_ipp.c +++ b/drivers/gpu/drm/exynos/exynos_drm_ipp.c @@ -271,7 +271,7 @@ static inline struct exynos_drm_ipp_task * task->src.rect.h = task->dst.rect.h = UINT_MAX; task->transform.rotation = DRM_MODE_ROTATE_0; - DRM_DEV_DEBUG_DRIVER(task->dev, "Allocated task %pK\n", task); + DRM_DEV_DEBUG_DRIVER(task->dev, "Allocated task %p\n", task); return task; } @@ -339,7 +339,7 @@ static int exynos_drm_ipp_task_set(struct exynos_drm_ipp_task *task, } DRM_DEV_DEBUG_DRIVER(task->dev, - "Got task %pK configuration from userspace\n", + "Got task %p configuration from userspace\n", task); return 0; } @@ -394,7 +394,7 @@ static void exynos_drm_ipp_task_release_buf(struct exynos_drm_ipp_buffer *buf) static void exynos_drm_ipp_task_free(struct exynos_drm_ipp *ipp, struct exynos_drm_ipp_task *task) { - DRM_DEV_DEBUG_DRIVER(task->dev, "Freeing task %pK\n", task); + DRM_DEV_DEBUG_DRIVER(task->dev, "Freeing task %p\n", task); exynos_drm_ipp_task_release_buf(&task->src); exynos_drm_ipp_task_release_buf(&task->dst); @@ -559,7 +559,7 @@ static int exynos_drm_ipp_check_format(struct exynos_drm_ipp_task *task, DRM_EXYNOS_IPP_FORMAT_DESTINATION); if (!fmt) { DRM_DEV_DEBUG_DRIVER(task->dev, - "Task %pK: %s format not supported\n", + "Task %p: %s format not supported\n", task, buf == src ? "src" : "dst"); return -EINVAL; } @@ -609,7 +609,7 @@ static int exynos_drm_ipp_task_check(struct exynos_drm_ipp_task *task) bool rotate = (rotation != DRM_MODE_ROTATE_0); bool scale = false; - DRM_DEV_DEBUG_DRIVER(task->dev, "Checking task %pK\n", task); + DRM_DEV_DEBUG_DRIVER(task->dev, "Checking task %p\n", task); if (src->rect.w == UINT_MAX) src->rect.w = src->buf.width; @@ -625,7 +625,7 @@ static int exynos_drm_ipp_task_check(struct exynos_drm_ipp_task *task) dst->rect.x + dst->rect.w > (dst->buf.width) || dst->rect.y + dst->rect.h > (dst->buf.height)) { DRM_DEV_DEBUG_DRIVER(task->dev, - "Task %pK: defined area is outside provided buffers\n", + "Task %p: defined area is outside provided buffers\n", task); return -EINVAL; } @@ -642,7 +642,7 @@ static int exynos_drm_ipp_task_check(struct exynos_drm_ipp_task *task) (!(ipp->capabilities & DRM_EXYNOS_IPP_CAP_SCALE) && scale) || (!(ipp->capabilities & DRM_EXYNOS_IPP_CAP_CONVERT) && src->buf.fourcc != dst->buf.fourcc)) { - DRM_DEV_DEBUG_DRIVER(task->dev, "Task %pK: hw capabilities exceeded\n", + DRM_DEV_DEBUG_DRIVER(task->dev, "Task %p: hw capabilities exceeded\n", task); return -EINVAL; } @@ -655,7 +655,7 @@ static int exynos_drm_ipp_task_check(struct exynos_drm_ipp_task *task) if (ret) return ret; - DRM_DEV_DEBUG_DRIVER(ipp->dev, "Task %pK: all checks done.\n", + DRM_DEV_DEBUG_DRIVER(ipp->dev, "Task %p: all checks done.\n", task); return ret; @@ -667,25 +667,25 @@ static int exynos_drm_ipp_task_setup_buffers(struct exynos_drm_ipp_task *task, struct exynos_drm_ipp_buffer *src = &task->src, *dst = &task->dst; int ret = 0; - DRM_DEV_DEBUG_DRIVER(task->dev, "Setting buffer for task %pK\n", + DRM_DEV_DEBUG_DRIVER(task->dev, "Setting buffer for task %p\n", task); ret = exynos_drm_ipp_task_setup_buffer(src, filp); if (ret) { DRM_DEV_DEBUG_DRIVER(task->dev, - "Task %pK: src buffer setup failed\n", + "Task %p: src buffer setup failed\n", task); return ret; } ret = exynos_drm_ipp_task_setup_buffer(dst, filp); if (ret) { DRM_DEV_DEBUG_DRIVER(task->dev, - "Task %pK: dst buffer setup failed\n", + "Task %p: dst buffer setup failed\n", task); return ret; } - DRM_DEV_DEBUG_DRIVER(task->dev, "Task %pK: buffers prepared.\n", + DRM_DEV_DEBUG_DRIVER(task->dev, "Task %p: buffers prepared.\n", task); return ret; @@ -764,7 +764,7 @@ void exynos_drm_ipp_task_done(struct exynos_drm_ipp_task *task, int ret) struct exynos_drm_ipp *ipp = task->ipp; unsigned long flags; - DRM_DEV_DEBUG_DRIVER(task->dev, "ipp: %d, task %pK done: %d\n", + DRM_DEV_DEBUG_DRIVER(task->dev, "ipp: %d, task %p done: %d\n", ipp->id, task, ret); spin_lock_irqsave(&ipp->lock, flags); @@ -807,7 +807,7 @@ static void exynos_drm_ipp_next_task(struct exynos_drm_ipp *ipp) spin_unlock_irqrestore(&ipp->lock, flags); DRM_DEV_DEBUG_DRIVER(ipp->dev, - "ipp: %d, selected task %pK to run\n", ipp->id, + "ipp: %d, selected task %p to run\n", ipp->id, task); ret = ipp->funcs->commit(ipp, task); @@ -917,14 +917,14 @@ int exynos_drm_ipp_commit_ioctl(struct drm_device *dev, void *data, */ if (arg->flags & DRM_EXYNOS_IPP_FLAG_NONBLOCK) { DRM_DEV_DEBUG_DRIVER(ipp->dev, - "ipp: %d, nonblocking processing task %pK\n", + "ipp: %d, nonblocking processing task %p\n", ipp->id, task); task->flags |= DRM_EXYNOS_IPP_TASK_ASYNC; exynos_drm_ipp_schedule_task(task->ipp, task); ret = 0; } else { - DRM_DEV_DEBUG_DRIVER(ipp->dev, "ipp: %d, processing task %pK\n", + DRM_DEV_DEBUG_DRIVER(ipp->dev, "ipp: %d, processing task %p\n", ipp->id, task); exynos_drm_ipp_schedule_task(ipp, task); ret = wait_event_interruptible(ipp->done_wq, diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index ba7b8938b17c..166ee11831ab 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -1938,7 +1938,7 @@ static int get_init_otp_deassert_fragment_len(struct intel_display *display, int index, len; if (drm_WARN_ON(display->drm, - !data || panel->vbt.dsi.seq_version != 1)) + !data || panel->vbt.dsi.seq_version >= 3)) return 0; /* index = 1 to skip sequence byte */ @@ -1961,7 +1961,7 @@ static int get_init_otp_deassert_fragment_len(struct intel_display *display, } /* - * Some v1 VBT MIPI sequences do the deassert in the init OTP sequence. + * Some v1/v2 VBT MIPI sequences do the deassert in the init OTP sequence. * The deassert must be done before calling intel_dsi_device_ready, so for * these devices we split the init OTP sequence into a deassert sequence and * the actual init OTP part. @@ -1972,9 +1972,9 @@ static void vlv_fixup_mipi_sequences(struct intel_display *display, u8 *init_otp; int len; - /* Limit this to v1 vid-mode sequences */ + /* Limit this to v1/v2 vid-mode sequences */ if (panel->vbt.dsi.config->is_cmd_mode || - panel->vbt.dsi.seq_version != 1) + panel->vbt.dsi.seq_version >= 3) return; /* Only do this if there are otp and assert seqs and no deassert seq */ diff --git a/drivers/gpu/drm/i915/display/intel_snps_hdmi_pll.c b/drivers/gpu/drm/i915/display/intel_snps_hdmi_pll.c index 74bb3bedf30f..5111bdc3075b 100644 --- a/drivers/gpu/drm/i915/display/intel_snps_hdmi_pll.c +++ b/drivers/gpu/drm/i915/display/intel_snps_hdmi_pll.c @@ -103,8 +103,8 @@ static void get_ana_cp_int_prop(u64 vco_clk, DIV_ROUND_DOWN_ULL(curve_1_interpolated, CURVE0_MULTIPLIER))); ana_cp_int_temp = - DIV_ROUND_CLOSEST_ULL(DIV_ROUND_DOWN_ULL(adjusted_vco_clk1, curve_2_scaled1), - CURVE2_MULTIPLIER); + DIV64_U64_ROUND_CLOSEST(DIV_ROUND_DOWN_ULL(adjusted_vco_clk1, curve_2_scaled1), + CURVE2_MULTIPLIER); *ana_cp_int = max(1, min(ana_cp_int_temp, 127)); diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c index 21c1e10caf68..2007bb9d974d 100644 --- a/drivers/gpu/drm/i915/display/vlv_dsi.c +++ b/drivers/gpu/drm/i915/display/vlv_dsi.c @@ -1589,8 +1589,8 @@ static void vlv_dsi_add_properties(struct intel_connector *connector) static void vlv_dphy_param_init(struct intel_dsi *intel_dsi) { + struct intel_display *display = to_intel_display(&intel_dsi->base); struct intel_connector *connector = intel_dsi->attached_connector; - struct intel_display *display = to_intel_display(connector); struct mipi_config *mipi_config = connector->panel.vbt.dsi.config; u32 tlpx_ns, extra_byte_count, tlpx_ui; u32 ui_num, ui_den; diff --git a/drivers/gpu/drm/i915/gt/intel_gsc.c b/drivers/gpu/drm/i915/gt/intel_gsc.c index 1e925c75fb08..c43febc862dc 100644 --- a/drivers/gpu/drm/i915/gt/intel_gsc.c +++ b/drivers/gpu/drm/i915/gt/intel_gsc.c @@ -284,7 +284,7 @@ static void gsc_irq_handler(struct intel_gt *gt, unsigned int intf_id) if (gt->gsc.intf[intf_id].irq < 0) return; - ret = generic_handle_irq(gt->gsc.intf[intf_id].irq); + ret = generic_handle_irq_safe(gt->gsc.intf[intf_id].irq); if (ret) gt_err_ratelimited(gt, "error handling GSC irq: %d\n", ret); } diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index a876a34455f1..2a6d79abf25b 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -610,7 +610,6 @@ static int ring_context_alloc(struct intel_context *ce) /* One ringbuffer to rule them all */ GEM_BUG_ON(!engine->legacy.ring); ce->ring = engine->legacy.ring; - ce->timeline = intel_timeline_get(engine->legacy.timeline); GEM_BUG_ON(ce->state); if (engine->context_size) { @@ -623,6 +622,8 @@ static int ring_context_alloc(struct intel_context *ce) ce->state = vma; } + ce->timeline = intel_timeline_get(engine->legacy.timeline); + return 0; } diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 990bfaba3ce4..5bc696bfbb0f 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -108,7 +108,7 @@ static unsigned int config_bit(const u64 config) return other_bit(config); } -static u32 config_mask(const u64 config) +static __always_inline u32 config_mask(const u64 config) { unsigned int bit = config_bit(config); diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index 88870844b5bd..2fb7a9e7efec 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -73,8 +73,8 @@ static int igt_add_request(void *arg) /* Basic preliminary test to create a request and let it loose! */ request = mock_request(rcs0(i915)->kernel_context, HZ / 10); - if (!request) - return -ENOMEM; + if (IS_ERR(request)) + return PTR_ERR(request); i915_request_add(request); @@ -91,8 +91,8 @@ static int igt_wait_request(void *arg) /* Submit a request, then wait upon it */ request = mock_request(rcs0(i915)->kernel_context, T); - if (!request) - return -ENOMEM; + if (IS_ERR(request)) + return PTR_ERR(request); i915_request_get(request); @@ -160,8 +160,8 @@ static int igt_fence_wait(void *arg) /* Submit a request, treat it as a fence and wait upon it */ request = mock_request(rcs0(i915)->kernel_context, T); - if (!request) - return -ENOMEM; + if (IS_ERR(request)) + return PTR_ERR(request); if (dma_fence_wait_timeout(&request->fence, false, T) != -ETIME) { pr_err("fence wait success before submit (expected timeout)!\n"); @@ -219,8 +219,8 @@ static int igt_request_rewind(void *arg) GEM_BUG_ON(IS_ERR(ce)); request = mock_request(ce, 2 * HZ); intel_context_put(ce); - if (!request) { - err = -ENOMEM; + if (IS_ERR(request)) { + err = PTR_ERR(request); goto err_context_0; } @@ -237,8 +237,8 @@ static int igt_request_rewind(void *arg) GEM_BUG_ON(IS_ERR(ce)); vip = mock_request(ce, 0); intel_context_put(ce); - if (!vip) { - err = -ENOMEM; + if (IS_ERR(vip)) { + err = PTR_ERR(vip); goto err_context_1; } diff --git a/drivers/gpu/drm/i915/selftests/mock_request.c b/drivers/gpu/drm/i915/selftests/mock_request.c index 09f747228dff..1b0cf073e964 100644 --- a/drivers/gpu/drm/i915/selftests/mock_request.c +++ b/drivers/gpu/drm/i915/selftests/mock_request.c @@ -35,7 +35,7 @@ mock_request(struct intel_context *ce, unsigned long delay) /* NB the i915->requests slab cache is enlarged to fit mock_request */ request = intel_context_create_request(ce); if (IS_ERR(request)) - return NULL; + return request; request->mock.delay = delay; return request; diff --git a/drivers/gpu/drm/imagination/pvr_power.c b/drivers/gpu/drm/imagination/pvr_power.c index 41f5d89e78b8..3e349d039fc0 100644 --- a/drivers/gpu/drm/imagination/pvr_power.c +++ b/drivers/gpu/drm/imagination/pvr_power.c @@ -386,13 +386,13 @@ pvr_power_reset(struct pvr_device *pvr_dev, bool hard_reset) if (!err) { if (hard_reset) { pvr_dev->fw_dev.booted = false; - WARN_ON(pm_runtime_force_suspend(from_pvr_device(pvr_dev)->dev)); + WARN_ON(pvr_power_device_suspend(from_pvr_device(pvr_dev)->dev)); err = pvr_fw_hard_reset(pvr_dev); if (err) goto err_device_lost; - err = pm_runtime_force_resume(from_pvr_device(pvr_dev)->dev); + err = pvr_power_device_resume(from_pvr_device(pvr_dev)->dev); pvr_dev->fw_dev.booted = true; if (err) goto err_device_lost; diff --git a/drivers/gpu/drm/nouveau/nouveau_debugfs.c b/drivers/gpu/drm/nouveau/nouveau_debugfs.c index 200e65a7cefc..c7869a639bef 100644 --- a/drivers/gpu/drm/nouveau/nouveau_debugfs.c +++ b/drivers/gpu/drm/nouveau/nouveau_debugfs.c @@ -314,14 +314,10 @@ nouveau_debugfs_fini(struct nouveau_drm *drm) drm->debugfs = NULL; } -int +void nouveau_module_debugfs_init(void) { nouveau_debugfs_root = debugfs_create_dir("nouveau", NULL); - if (IS_ERR(nouveau_debugfs_root)) - return PTR_ERR(nouveau_debugfs_root); - - return 0; } void diff --git a/drivers/gpu/drm/nouveau/nouveau_debugfs.h b/drivers/gpu/drm/nouveau/nouveau_debugfs.h index b7617b344ee2..d05ed0e641c4 100644 --- a/drivers/gpu/drm/nouveau/nouveau_debugfs.h +++ b/drivers/gpu/drm/nouveau/nouveau_debugfs.h @@ -24,7 +24,7 @@ extern void nouveau_debugfs_fini(struct nouveau_drm *); extern struct dentry *nouveau_debugfs_root; -int nouveau_module_debugfs_init(void); +void nouveau_module_debugfs_init(void); void nouveau_module_debugfs_fini(void); #else static inline void @@ -42,10 +42,9 @@ nouveau_debugfs_fini(struct nouveau_drm *drm) { } -static inline int +static inline void nouveau_module_debugfs_init(void) { - return 0; } static inline void diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index 0c82a63cd49d..1527b801f013 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -1461,9 +1461,7 @@ nouveau_drm_init(void) if (!nouveau_modeset) return 0; - ret = nouveau_module_debugfs_init(); - if (ret) - return ret; + nouveau_module_debugfs_init(); #ifdef CONFIG_NOUVEAU_PLATFORM_DRIVER platform_driver_register(&nouveau_platform_driver); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/gsp.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/gsp.c index baf42339f93e..588cb4ab85cb 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/gsp.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/gsp.c @@ -719,7 +719,6 @@ r535_gsp_acpi_caps(acpi_handle handle, CAPS_METHOD_DATA *caps) union acpi_object argv4 = { .buffer.type = ACPI_TYPE_BUFFER, .buffer.length = 4, - .buffer.pointer = kmalloc(argv4.buffer.length, GFP_KERNEL), }, *obj; caps->status = 0xffff; @@ -727,17 +726,22 @@ r535_gsp_acpi_caps(acpi_handle handle, CAPS_METHOD_DATA *caps) if (!acpi_check_dsm(handle, &NVOP_DSM_GUID, NVOP_DSM_REV, BIT_ULL(0x1a))) return; + argv4.buffer.pointer = kmalloc(argv4.buffer.length, GFP_KERNEL); + if (!argv4.buffer.pointer) + return; + obj = acpi_evaluate_dsm(handle, &NVOP_DSM_GUID, NVOP_DSM_REV, 0x1a, &argv4); if (!obj) - return; + goto done; if (WARN_ON(obj->type != ACPI_TYPE_BUFFER) || WARN_ON(obj->buffer.length != 4)) - return; + goto done; caps->status = 0; caps->optimusCaps = *(u32 *)obj->buffer.pointer; +done: ACPI_FREE(obj); kfree(argv4.buffer.pointer); @@ -754,24 +758,28 @@ r535_gsp_acpi_jt(acpi_handle handle, JT_METHOD_DATA *jt) union acpi_object argv4 = { .buffer.type = ACPI_TYPE_BUFFER, .buffer.length = sizeof(caps), - .buffer.pointer = kmalloc(argv4.buffer.length, GFP_KERNEL), }, *obj; jt->status = 0xffff; + argv4.buffer.pointer = kmalloc(argv4.buffer.length, GFP_KERNEL); + if (!argv4.buffer.pointer) + return; + obj = acpi_evaluate_dsm(handle, &JT_DSM_GUID, JT_DSM_REV, 0x1, &argv4); if (!obj) - return; + goto done; if (WARN_ON(obj->type != ACPI_TYPE_BUFFER) || WARN_ON(obj->buffer.length != 4)) - return; + goto done; jt->status = 0; jt->jtCaps = *(u32 *)obj->buffer.pointer; jt->jtRevId = (jt->jtCaps & 0xfff00000) >> 20; jt->bSBIOSCaps = 0; +done: ACPI_FREE(obj); kfree(argv4.buffer.pointer); @@ -1744,6 +1752,13 @@ r535_gsp_fini(struct nvkm_gsp *gsp, bool suspend) nvkm_gsp_sg_free(gsp->subdev.device, &gsp->sr.sgt); return ret; } + + /* + * TODO: Debug the GSP firmware / RPC handling to find out why + * without this Turing (but none of the other architectures) + * ends up resetting all channels after resume. + */ + msleep(50); } ret = r535_gsp_rpc_unloading_guest_driver(gsp, suspend); diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index 0a3b26bb4d73..9f81fa960b46 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -26,6 +26,7 @@ #include <linux/i2c.h> #include <linux/media-bus-format.h> #include <linux/module.h> +#include <linux/of_device.h> #include <linux/of_platform.h> #include <linux/platform_device.h> #include <linux/pm_runtime.h> @@ -136,6 +137,14 @@ struct panel_desc { int connector_type; }; +struct panel_desc_dsi { + struct panel_desc desc; + + unsigned long flags; + enum mipi_dsi_pixel_format format; + unsigned int lanes; +}; + struct panel_simple { struct drm_panel base; @@ -430,10 +439,7 @@ static const struct drm_panel_funcs panel_simple_funcs = { .get_timings = panel_simple_get_timings, }; -static struct panel_desc panel_dpi; - -static int panel_dpi_probe(struct device *dev, - struct panel_simple *panel) +static struct panel_desc *panel_dpi_probe(struct device *dev) { struct display_timing *timing; const struct device_node *np; @@ -445,17 +451,17 @@ static int panel_dpi_probe(struct device *dev, np = dev->of_node; desc = devm_kzalloc(dev, sizeof(*desc), GFP_KERNEL); if (!desc) - return -ENOMEM; + return ERR_PTR(-ENOMEM); timing = devm_kzalloc(dev, sizeof(*timing), GFP_KERNEL); if (!timing) - return -ENOMEM; + return ERR_PTR(-ENOMEM); ret = of_get_display_timing(np, "panel-timing", timing); if (ret < 0) { dev_err(dev, "%pOF: no panel-timing node found for \"panel-dpi\" binding\n", np); - return ret; + return ERR_PTR(ret); } desc->timings = timing; @@ -473,9 +479,7 @@ static int panel_dpi_probe(struct device *dev, /* We do not know the connector for the DT node, so guess it */ desc->connector_type = DRM_MODE_CONNECTOR_DPI; - panel->desc = desc; - - return 0; + return desc; } #define PANEL_SIMPLE_BOUNDS_CHECK(to_check, bounds, field) \ @@ -570,8 +574,44 @@ static int panel_simple_override_nondefault_lvds_datamapping(struct device *dev, return 0; } -static int panel_simple_probe(struct device *dev, const struct panel_desc *desc) +static const struct panel_desc *panel_simple_get_desc(struct device *dev) { + if (IS_ENABLED(CONFIG_DRM_MIPI_DSI) && + dev_is_mipi_dsi(dev)) { + const struct panel_desc_dsi *dsi_desc; + + dsi_desc = of_device_get_match_data(dev); + if (!dsi_desc) + return ERR_PTR(-ENODEV); + + return &dsi_desc->desc; + } + + if (dev_is_platform(dev)) { + const struct panel_desc *desc; + + desc = of_device_get_match_data(dev); + if (!desc) { + /* + * panel-dpi probes without a descriptor and + * panel_dpi_probe() will initialize one for us + * based on the device tree. + */ + if (of_device_is_compatible(dev->of_node, "panel-dpi")) + return panel_dpi_probe(dev); + else + return ERR_PTR(-ENODEV); + } + + return desc; + } + + return ERR_PTR(-ENODEV); +} + +static struct panel_simple *panel_simple_probe(struct device *dev) +{ + const struct panel_desc *desc; struct panel_simple *panel; struct display_timing dt; struct device_node *ddc; @@ -579,27 +619,31 @@ static int panel_simple_probe(struct device *dev, const struct panel_desc *desc) u32 bus_flags; int err; + desc = panel_simple_get_desc(dev); + if (IS_ERR(desc)) + return ERR_CAST(desc); + panel = devm_drm_panel_alloc(dev, struct panel_simple, base, &panel_simple_funcs, desc->connector_type); if (IS_ERR(panel)) - return PTR_ERR(panel); + return ERR_CAST(panel); panel->desc = desc; panel->supply = devm_regulator_get(dev, "power"); if (IS_ERR(panel->supply)) - return PTR_ERR(panel->supply); + return ERR_CAST(panel->supply); panel->enable_gpio = devm_gpiod_get_optional(dev, "enable", GPIOD_OUT_LOW); if (IS_ERR(panel->enable_gpio)) - return dev_err_probe(dev, PTR_ERR(panel->enable_gpio), - "failed to request GPIO\n"); + return dev_err_cast_probe(dev, panel->enable_gpio, + "failed to request GPIO\n"); err = of_drm_get_panel_orientation(dev->of_node, &panel->orientation); if (err) { dev_err(dev, "%pOF: failed to get orientation %d\n", dev->of_node, err); - return err; + return ERR_PTR(err); } ddc = of_parse_phandle(dev->of_node, "ddc-i2c-bus", 0); @@ -608,19 +652,12 @@ static int panel_simple_probe(struct device *dev, const struct panel_desc *desc) of_node_put(ddc); if (!panel->ddc) - return -EPROBE_DEFER; + return ERR_PTR(-EPROBE_DEFER); } - if (desc == &panel_dpi) { - /* Handle the generic panel-dpi binding */ - err = panel_dpi_probe(dev, panel); - if (err) - goto free_ddc; - desc = panel->desc; - } else { - if (!of_get_display_timing(dev->of_node, "panel-timing", &dt)) - panel_simple_parse_panel_timing_node(dev, panel, &dt); - } + if (!of_device_is_compatible(dev->of_node, "panel-dpi") && + !of_get_display_timing(dev->of_node, "panel-timing", &dt)) + panel_simple_parse_panel_timing_node(dev, panel, &dt); if (desc->connector_type == DRM_MODE_CONNECTOR_LVDS) { /* Optional data-mapping property for overriding bus format */ @@ -703,7 +740,7 @@ static int panel_simple_probe(struct device *dev, const struct panel_desc *desc) drm_panel_add(&panel->base); - return 0; + return panel; disable_pm_runtime: pm_runtime_dont_use_autosuspend(dev); @@ -712,7 +749,7 @@ free_ddc: if (panel->ddc) put_device(&panel->ddc->dev); - return err; + return ERR_PTR(err); } static void panel_simple_shutdown(struct device *dev) @@ -5367,7 +5404,12 @@ static const struct of_device_id platform_of_match[] = { }, { /* Must be the last entry */ .compatible = "panel-dpi", - .data = &panel_dpi, + + /* + * Explicitly NULL, the panel_desc structure will be + * allocated by panel_dpi_probe(). + */ + .data = NULL, }, { /* sentinel */ } @@ -5376,13 +5418,13 @@ MODULE_DEVICE_TABLE(of, platform_of_match); static int panel_simple_platform_probe(struct platform_device *pdev) { - const struct panel_desc *desc; + struct panel_simple *panel; - desc = of_device_get_match_data(&pdev->dev); - if (!desc) - return -ENODEV; + panel = panel_simple_probe(&pdev->dev); + if (IS_ERR(panel)) + return PTR_ERR(panel); - return panel_simple_probe(&pdev->dev, desc); + return 0; } static void panel_simple_platform_remove(struct platform_device *pdev) @@ -5412,14 +5454,6 @@ static struct platform_driver panel_simple_platform_driver = { .shutdown = panel_simple_platform_shutdown, }; -struct panel_desc_dsi { - struct panel_desc desc; - - unsigned long flags; - enum mipi_dsi_pixel_format format; - unsigned int lanes; -}; - static const struct drm_display_mode auo_b080uan01_mode = { .clock = 154500, .hdisplay = 1200, @@ -5653,16 +5687,14 @@ MODULE_DEVICE_TABLE(of, dsi_of_match); static int panel_simple_dsi_probe(struct mipi_dsi_device *dsi) { const struct panel_desc_dsi *desc; + struct panel_simple *panel; int err; - desc = of_device_get_match_data(&dsi->dev); - if (!desc) - return -ENODEV; - - err = panel_simple_probe(&dsi->dev, &desc->desc); - if (err < 0) - return err; + panel = panel_simple_probe(&dsi->dev); + if (IS_ERR(panel)) + return PTR_ERR(panel); + desc = container_of(panel->desc, struct panel_desc_dsi, desc); dsi->mode_flags = desc->flags; dsi->format = desc->format; dsi->lanes = desc->lanes; diff --git a/drivers/gpu/drm/sysfb/vesadrm.c b/drivers/gpu/drm/sysfb/vesadrm.c index 4d62c78e7d1e..f7532db3831f 100644 --- a/drivers/gpu/drm/sysfb/vesadrm.c +++ b/drivers/gpu/drm/sysfb/vesadrm.c @@ -362,14 +362,19 @@ static struct vesadrm_device *vesadrm_device_create(struct drm_driver *drv, if (!__screen_info_vbe_mode_nonvga(si)) { vesa->cmap_write = vesadrm_vga_cmap_write; -#if defined(CONFIG_X86_32) } else { +#if defined(CONFIG_X86_32) phys_addr_t pmi_base = __screen_info_vesapm_info_base(si); - const u16 *pmi_addr = phys_to_virt(pmi_base); - vesa->pmi.PrimaryPalette = (u8 *)pmi_addr + pmi_addr[2]; - vesa->cmap_write = vesadrm_pmi_cmap_write; + if (pmi_base) { + const u16 *pmi_addr = phys_to_virt(pmi_base); + + vesa->pmi.PrimaryPalette = (u8 *)pmi_addr + pmi_addr[2]; + vesa->cmap_write = vesadrm_pmi_cmap_write; + } else #endif + if (format->is_color_indexed) + drm_warn(dev, "hardware palette is unchangeable, colors may be incorrect\n"); } #ifdef CONFIG_X86 diff --git a/drivers/gpu/drm/tegra/nvdec.c b/drivers/gpu/drm/tegra/nvdec.c index 2d9a0a3f6c38..7a38664e890e 100644 --- a/drivers/gpu/drm/tegra/nvdec.c +++ b/drivers/gpu/drm/tegra/nvdec.c @@ -261,10 +261,8 @@ static int nvdec_load_falcon_firmware(struct nvdec *nvdec) if (!client->group) { virt = dma_alloc_coherent(nvdec->dev, size, &iova, GFP_KERNEL); - - err = dma_mapping_error(nvdec->dev, iova); - if (err < 0) - return err; + if (!virt) + return -ENOMEM; } else { virt = tegra_drm_alloc(tegra, size, &iova); if (IS_ERR(virt)) diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 15cab9bda17f..bd90404ea609 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -254,6 +254,13 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo, ret = dma_resv_trylock(&fbo->base.base._resv); WARN_ON(!ret); + ret = dma_resv_reserve_fences(&fbo->base.base._resv, 1); + if (ret) { + dma_resv_unlock(&fbo->base.base._resv); + kfree(fbo); + return ret; + } + if (fbo->base.resource) { ttm_resource_set_bo(fbo->base.resource, &fbo->base); bo->resource = NULL; @@ -262,12 +269,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo, fbo->base.bulk_move = NULL; } - ret = dma_resv_reserve_fences(&fbo->base.base._resv, 1); - if (ret) { - kfree(fbo); - return ret; - } - ttm_bo_get(bo); fbo->bo = bo; diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h index b51f0b648a08..411e47702f8a 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.h +++ b/drivers/gpu/drm/v3d/v3d_drv.h @@ -101,6 +101,12 @@ enum v3d_gen { V3D_GEN_71 = 71, }; +enum v3d_irq { + V3D_CORE_IRQ, + V3D_HUB_IRQ, + V3D_MAX_IRQS, +}; + struct v3d_dev { struct drm_device drm; @@ -112,6 +118,8 @@ struct v3d_dev { bool single_irq_line; + int irq[V3D_MAX_IRQS]; + struct v3d_perfmon_info perfmon_info; void __iomem *hub_regs; diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c index d7d16da78db3..37bf5eecdd2c 100644 --- a/drivers/gpu/drm/v3d/v3d_gem.c +++ b/drivers/gpu/drm/v3d/v3d_gem.c @@ -134,6 +134,8 @@ v3d_reset(struct v3d_dev *v3d) if (false) v3d_idle_axi(v3d, 0); + v3d_irq_disable(v3d); + v3d_idle_gca(v3d); v3d_reset_sms(v3d); v3d_reset_v3d(v3d); diff --git a/drivers/gpu/drm/v3d/v3d_irq.c b/drivers/gpu/drm/v3d/v3d_irq.c index 2cca5d3a26a2..a515a301e480 100644 --- a/drivers/gpu/drm/v3d/v3d_irq.c +++ b/drivers/gpu/drm/v3d/v3d_irq.c @@ -260,7 +260,7 @@ v3d_hub_irq(int irq, void *arg) int v3d_irq_init(struct v3d_dev *v3d) { - int irq1, ret, core; + int irq, ret, core; INIT_WORK(&v3d->overflow_mem_work, v3d_overflow_mem_work); @@ -271,17 +271,24 @@ v3d_irq_init(struct v3d_dev *v3d) V3D_CORE_WRITE(core, V3D_CTL_INT_CLR, V3D_CORE_IRQS(v3d->ver)); V3D_WRITE(V3D_HUB_INT_CLR, V3D_HUB_IRQS(v3d->ver)); - irq1 = platform_get_irq_optional(v3d_to_pdev(v3d), 1); - if (irq1 == -EPROBE_DEFER) - return irq1; - if (irq1 > 0) { - ret = devm_request_irq(v3d->drm.dev, irq1, + irq = platform_get_irq_optional(v3d_to_pdev(v3d), 1); + if (irq == -EPROBE_DEFER) + return irq; + if (irq > 0) { + v3d->irq[V3D_CORE_IRQ] = irq; + + ret = devm_request_irq(v3d->drm.dev, v3d->irq[V3D_CORE_IRQ], v3d_irq, IRQF_SHARED, "v3d_core0", v3d); if (ret) goto fail; - ret = devm_request_irq(v3d->drm.dev, - platform_get_irq(v3d_to_pdev(v3d), 0), + + irq = platform_get_irq(v3d_to_pdev(v3d), 0); + if (irq < 0) + return irq; + v3d->irq[V3D_HUB_IRQ] = irq; + + ret = devm_request_irq(v3d->drm.dev, v3d->irq[V3D_HUB_IRQ], v3d_hub_irq, IRQF_SHARED, "v3d_hub", v3d); if (ret) @@ -289,8 +296,12 @@ v3d_irq_init(struct v3d_dev *v3d) } else { v3d->single_irq_line = true; - ret = devm_request_irq(v3d->drm.dev, - platform_get_irq(v3d_to_pdev(v3d), 0), + irq = platform_get_irq(v3d_to_pdev(v3d), 0); + if (irq < 0) + return irq; + v3d->irq[V3D_CORE_IRQ] = irq; + + ret = devm_request_irq(v3d->drm.dev, v3d->irq[V3D_CORE_IRQ], v3d_irq, IRQF_SHARED, "v3d", v3d); if (ret) @@ -331,6 +342,12 @@ v3d_irq_disable(struct v3d_dev *v3d) V3D_CORE_WRITE(core, V3D_CTL_INT_MSK_SET, ~0); V3D_WRITE(V3D_HUB_INT_MSK_SET, ~0); + /* Finish any interrupt handler still in flight. */ + for (int i = 0; i < V3D_MAX_IRQS; i++) { + if (v3d->irq[i]) + synchronize_irq(v3d->irq[i]); + } + /* Clear any pending interrupts we might have left. */ for (core = 0; core < v3d->cores; core++) V3D_CORE_WRITE(core, V3D_CTL_INT_CLR, V3D_CORE_IRQS(v3d->ver)); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 0695a342b1ef..5205552b1970 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -749,7 +749,7 @@ static int vmw_setup_pci_resources(struct vmw_private *dev, dev->fifo_mem = devm_memremap(dev->drm.dev, fifo_start, fifo_size, - MEMREMAP_WB); + MEMREMAP_WB | MEMREMAP_DEC); if (IS_ERR(dev->fifo_mem)) { drm_err(&dev->drm, diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig index fcc2677a4229..99a91355842e 100644 --- a/drivers/gpu/drm/xe/Kconfig +++ b/drivers/gpu/drm/xe/Kconfig @@ -1,7 +1,8 @@ # SPDX-License-Identifier: GPL-2.0-only config DRM_XE - tristate "Intel Xe Graphics" - depends on DRM && PCI && (m || (y && KUNIT=y)) + tristate "Intel Xe2 Graphics" + depends on DRM && PCI + depends on KUNIT || !KUNIT depends on INTEL_VSEC || !INTEL_VSEC depends on X86_PLATFORM_DEVICES || !(X86 && ACPI) select INTERVAL_TREE @@ -46,7 +47,8 @@ config DRM_XE select AUXILIARY_BUS select HMM_MIRROR help - Experimental driver for Intel Xe series GPUs + Driver for Intel Xe2 series GPUs and later. Experimental support + for Xe series is also available. If "M" is selected, the module will be called xe. diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c index 68f064f33d4b..9f4ade25787a 100644 --- a/drivers/gpu/drm/xe/display/xe_display.c +++ b/drivers/gpu/drm/xe/display/xe_display.c @@ -104,6 +104,8 @@ int xe_display_create(struct xe_device *xe) spin_lock_init(&xe->display.fb_tracking.lock); xe->display.hotplug.dp_wq = alloc_ordered_workqueue("xe-dp", 0); + if (!xe->display.hotplug.dp_wq) + return -ENOMEM; return drmm_add_action_or_reset(&xe->drm, display_destroy, NULL); } diff --git a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c index f95375451e2f..9f941fc2e36b 100644 --- a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c +++ b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c @@ -17,10 +17,7 @@ u32 intel_dsb_buffer_ggtt_offset(struct intel_dsb_buffer *dsb_buf) void intel_dsb_buffer_write(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val) { - struct xe_device *xe = dsb_buf->vma->bo->tile->xe; - iosys_map_wr(&dsb_buf->vma->bo->vmap, idx * 4, u32, val); - xe_device_l2_flush(xe); } u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx) @@ -30,12 +27,9 @@ u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx) void intel_dsb_buffer_memset(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val, size_t size) { - struct xe_device *xe = dsb_buf->vma->bo->tile->xe; - WARN_ON(idx > (dsb_buf->buf_size - size) / sizeof(*dsb_buf->cmd_buf)); iosys_map_memset(&dsb_buf->vma->bo->vmap, idx * 4, val, size); - xe_device_l2_flush(xe); } bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct intel_dsb_buffer *dsb_buf, size_t size) @@ -74,9 +68,12 @@ void intel_dsb_buffer_cleanup(struct intel_dsb_buffer *dsb_buf) void intel_dsb_buffer_flush_map(struct intel_dsb_buffer *dsb_buf) { + struct xe_device *xe = dsb_buf->vma->bo->tile->xe; + /* * The memory barrier here is to ensure coherency of DSB vs MMIO, * both for weak ordering archs and discrete cards. */ - xe_device_wmb(dsb_buf->vma->bo->tile->xe); + xe_device_wmb(xe); + xe_device_l2_flush(xe); } diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c index d918ae1c8061..55259969480b 100644 --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -164,6 +164,9 @@ static int __xe_pin_fb_vma_dpt(const struct intel_framebuffer *fb, vma->dpt = dpt; vma->node = dpt->ggtt_node[tile0->id]; + + /* Ensure DPT writes are flushed */ + xe_device_l2_flush(xe); return 0; } @@ -333,8 +336,6 @@ static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb, if (ret) goto err_unpin; - /* Ensure DPT writes are flushed */ - xe_device_l2_flush(xe); return vma; err_unpin: diff --git a/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h b/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h index 5394a1373a6b..ef2bf984723f 100644 --- a/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h @@ -40,6 +40,7 @@ #define PCU_CR_PACKAGE_RAPL_LIMIT XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x59a0) #define PWR_LIM_VAL REG_GENMASK(14, 0) #define PWR_LIM_EN REG_BIT(15) +#define PWR_LIM REG_GENMASK(15, 0) #define PWR_LIM_TIME REG_GENMASK(23, 17) #define PWR_LIM_TIME_X REG_GENMASK(23, 22) #define PWR_LIM_TIME_Y REG_GENMASK(21, 17) diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index 7a8af2311318..11e60d687572 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -171,14 +171,32 @@ static void xe_devcoredump_snapshot_free(struct xe_devcoredump_snapshot *ss) #define XE_DEVCOREDUMP_CHUNK_MAX (SZ_512M + SZ_1G) +/** + * xe_devcoredump_read() - Read data from the Xe device coredump snapshot + * @buffer: Destination buffer to copy the coredump data into + * @offset: Offset in the coredump data to start reading from + * @count: Number of bytes to read + * @data: Pointer to the xe_devcoredump structure + * @datalen: Length of the data (unused) + * + * Reads a chunk of the coredump snapshot data into the provided buffer. + * If the devcoredump is smaller than 1.5 GB (XE_DEVCOREDUMP_CHUNK_MAX), + * it is read directly from a pre-written buffer. For larger devcoredumps, + * the pre-written buffer must be periodically repopulated from the snapshot + * state due to kmalloc size limitations. + * + * Return: Number of bytes copied on success, or a negative error code on failure. + */ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, size_t count, void *data, size_t datalen) { struct xe_devcoredump *coredump = data; struct xe_devcoredump_snapshot *ss; - ssize_t byte_copied; + ssize_t byte_copied = 0; u32 chunk_offset; ssize_t new_chunk_position; + bool pm_needed = false; + int ret = 0; if (!coredump) return -ENODEV; @@ -188,20 +206,19 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, /* Ensure delayed work is captured before continuing */ flush_work(&ss->work); - if (ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX) + pm_needed = ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX; + if (pm_needed) xe_pm_runtime_get(gt_to_xe(ss->gt)); mutex_lock(&coredump->lock); if (!ss->read.buffer) { - mutex_unlock(&coredump->lock); - return -ENODEV; + ret = -ENODEV; + goto unlock; } - if (offset >= ss->read.size) { - mutex_unlock(&coredump->lock); - return 0; - } + if (offset >= ss->read.size) + goto unlock; new_chunk_position = div_u64_rem(offset, XE_DEVCOREDUMP_CHUNK_MAX, @@ -221,12 +238,13 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, ss->read.size - offset; memcpy(buffer, ss->read.buffer + chunk_offset, byte_copied); +unlock: mutex_unlock(&coredump->lock); - if (ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX) + if (pm_needed) xe_pm_runtime_put(gt_to_xe(ss->gt)); - return byte_copied; + return byte_copied ? byte_copied : ret; } static void xe_devcoredump_free(void *data) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index c02c4c4e9412..e9f3c1a53db2 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -40,6 +40,7 @@ #include "xe_gt_printk.h" #include "xe_gt_sriov_vf.h" #include "xe_guc.h" +#include "xe_guc_pc.h" #include "xe_hw_engine_group.h" #include "xe_hwmon.h" #include "xe_irq.h" @@ -986,38 +987,15 @@ void xe_device_wmb(struct xe_device *xe) xe_mmio_write32(xe_root_tile_mmio(xe), VF_CAP_REG, 0); } -/** - * xe_device_td_flush() - Flush transient L3 cache entries - * @xe: The device - * - * Display engine has direct access to memory and is never coherent with L3/L4 - * caches (or CPU caches), however KMD is responsible for specifically flushing - * transient L3 GPU cache entries prior to the flip sequence to ensure scanout - * can happen from such a surface without seeing corruption. - * - * Display surfaces can be tagged as transient by mapping it using one of the - * various L3:XD PAT index modes on Xe2. - * - * Note: On non-discrete xe2 platforms, like LNL, the entire L3 cache is flushed - * at the end of each submission via PIPE_CONTROL for compute/render, since SA - * Media is not coherent with L3 and we want to support render-vs-media - * usescases. For other engines like copy/blt the HW internally forces uncached - * behaviour, hence why we can skip the TDF on such platforms. +/* + * Issue a TRANSIENT_FLUSH_REQUEST and wait for completion on each gt. */ -void xe_device_td_flush(struct xe_device *xe) +static void tdf_request_sync(struct xe_device *xe) { - struct xe_gt *gt; unsigned int fw_ref; + struct xe_gt *gt; u8 id; - if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20) - return; - - if (XE_WA(xe_root_mmio_gt(xe), 16023588340)) { - xe_device_l2_flush(xe); - return; - } - for_each_gt(gt, xe, id) { if (xe_gt_is_media_type(gt)) continue; @@ -1027,6 +1005,7 @@ void xe_device_td_flush(struct xe_device *xe) return; xe_mmio_write32(>->mmio, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST); + /* * FIXME: We can likely do better here with our choice of * timeout. Currently we just assume the worst case, i.e. 150us, @@ -1057,15 +1036,52 @@ void xe_device_l2_flush(struct xe_device *xe) return; spin_lock(>->global_invl_lock); - xe_mmio_write32(>->mmio, XE2_GLOBAL_INVAL, 0x1); + xe_mmio_write32(>->mmio, XE2_GLOBAL_INVAL, 0x1); if (xe_mmio_wait32(>->mmio, XE2_GLOBAL_INVAL, 0x1, 0x0, 500, NULL, true)) xe_gt_err_once(gt, "Global invalidation timeout\n"); + spin_unlock(>->global_invl_lock); xe_force_wake_put(gt_to_fw(gt), fw_ref); } +/** + * xe_device_td_flush() - Flush transient L3 cache entries + * @xe: The device + * + * Display engine has direct access to memory and is never coherent with L3/L4 + * caches (or CPU caches), however KMD is responsible for specifically flushing + * transient L3 GPU cache entries prior to the flip sequence to ensure scanout + * can happen from such a surface without seeing corruption. + * + * Display surfaces can be tagged as transient by mapping it using one of the + * various L3:XD PAT index modes on Xe2. + * + * Note: On non-discrete xe2 platforms, like LNL, the entire L3 cache is flushed + * at the end of each submission via PIPE_CONTROL for compute/render, since SA + * Media is not coherent with L3 and we want to support render-vs-media + * usescases. For other engines like copy/blt the HW internally forces uncached + * behaviour, hence why we can skip the TDF on such platforms. + */ +void xe_device_td_flush(struct xe_device *xe) +{ + struct xe_gt *root_gt; + + if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20) + return; + + root_gt = xe_root_mmio_gt(xe); + if (XE_WA(root_gt, 16023588340)) { + /* A transient flush is not sufficient: flush the L2 */ + xe_device_l2_flush(xe); + } else { + xe_guc_pc_apply_flush_freq_limit(&root_gt->uc.guc.pc); + tdf_request_sync(xe); + xe_guc_pc_remove_flush_freq_limit(&root_gt->uc.guc.pc); + } +} + u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size) { return xe_device_has_flat_ccs(xe) ? diff --git a/drivers/gpu/drm/xe/xe_drv.h b/drivers/gpu/drm/xe/xe_drv.h index d61650d4aa0b..95242a375e54 100644 --- a/drivers/gpu/drm/xe/xe_drv.h +++ b/drivers/gpu/drm/xe/xe_drv.h @@ -9,7 +9,7 @@ #include <drm/drm_drv.h> #define DRIVER_NAME "xe" -#define DRIVER_DESC "Intel Xe Graphics" +#define DRIVER_DESC "Intel Xe2 Graphics" /* Interface history: * diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 7062115909f2..2c799958c1e4 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -201,6 +201,13 @@ static const struct xe_ggtt_pt_ops xelpg_pt_wa_ops = { .ggtt_set_pte = xe_ggtt_set_pte_and_flush, }; +static void dev_fini_ggtt(void *arg) +{ + struct xe_ggtt *ggtt = arg; + + drain_workqueue(ggtt->wq); +} + /** * xe_ggtt_init_early - Early GGTT initialization * @ggtt: the &xe_ggtt to be initialized @@ -257,6 +264,10 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt) if (err) return err; + err = devm_add_action_or_reset(xe->drm.dev, dev_fini_ggtt, ggtt); + if (err) + return err; + if (IS_SRIOV_VF(xe)) { err = xe_gt_sriov_vf_prepare_ggtt(xe_tile_get_gt(ggtt->tile, 0)); if (err) diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index 10622ca471a2..6717a636b1d9 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -444,6 +444,7 @@ static int xe_alloc_pf_queue(struct xe_gt *gt, struct pf_queue *pf_queue) #define PF_MULTIPLIER 8 pf_queue->num_dw = (num_eus + XE_NUM_HW_ENGINES) * PF_MSG_LEN_DW * PF_MULTIPLIER; + pf_queue->num_dw = roundup_pow_of_two(pf_queue->num_dw); #undef PF_MULTIPLIER pf_queue->gt = gt; diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index d0ac48d8f4f7..bbcbb348256f 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -34,6 +34,11 @@ #include "xe_pm.h" #include "xe_trace_guc.h" +static void receive_g2h(struct xe_guc_ct *ct); +static void g2h_worker_func(struct work_struct *w); +static void safe_mode_worker_func(struct work_struct *w); +static void ct_exit_safe_mode(struct xe_guc_ct *ct); + #if IS_ENABLED(CONFIG_DRM_XE_DEBUG) enum { /* Internal states, not error conditions */ @@ -186,14 +191,11 @@ static void guc_ct_fini(struct drm_device *drm, void *arg) { struct xe_guc_ct *ct = arg; + ct_exit_safe_mode(ct); destroy_workqueue(ct->g2h_wq); xa_destroy(&ct->fence_lookup); } -static void receive_g2h(struct xe_guc_ct *ct); -static void g2h_worker_func(struct work_struct *w); -static void safe_mode_worker_func(struct work_struct *w); - static void primelockdep(struct xe_guc_ct *ct) { if (!IS_ENABLED(CONFIG_LOCKDEP)) diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 3beaaa7b25c1..c0ca61695d76 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -5,8 +5,11 @@ #include "xe_guc_pc.h" +#include <linux/cleanup.h> #include <linux/delay.h> +#include <linux/jiffies.h> #include <linux/ktime.h> +#include <linux/wait_bit.h> #include <drm/drm_managed.h> #include <drm/drm_print.h> @@ -51,9 +54,12 @@ #define LNL_MERT_FREQ_CAP 800 #define BMG_MERT_FREQ_CAP 2133 +#define BMG_MIN_FREQ 1200 +#define BMG_MERT_FLUSH_FREQ_CAP 2600 #define SLPC_RESET_TIMEOUT_MS 5 /* roughly 5ms, but no need for precision */ #define SLPC_RESET_EXTENDED_TIMEOUT_MS 1000 /* To be used only at pc_start */ +#define SLPC_ACT_FREQ_TIMEOUT_MS 100 /** * DOC: GuC Power Conservation (PC) @@ -141,6 +147,36 @@ static int wait_for_pc_state(struct xe_guc_pc *pc, return -ETIMEDOUT; } +static int wait_for_flush_complete(struct xe_guc_pc *pc) +{ + const unsigned long timeout = msecs_to_jiffies(30); + + if (!wait_var_event_timeout(&pc->flush_freq_limit, + !atomic_read(&pc->flush_freq_limit), + timeout)) + return -ETIMEDOUT; + + return 0; +} + +static int wait_for_act_freq_limit(struct xe_guc_pc *pc, u32 freq) +{ + int timeout_us = SLPC_ACT_FREQ_TIMEOUT_MS * USEC_PER_MSEC; + int slept, wait = 10; + + for (slept = 0; slept < timeout_us;) { + if (xe_guc_pc_get_act_freq(pc) <= freq) + return 0; + + usleep_range(wait, wait << 1); + slept += wait; + wait <<= 1; + if (slept + wait > timeout_us) + wait = timeout_us - slept; + } + + return -ETIMEDOUT; +} static int pc_action_reset(struct xe_guc_pc *pc) { struct xe_guc_ct *ct = pc_to_ct(pc); @@ -553,6 +589,25 @@ u32 xe_guc_pc_get_rpn_freq(struct xe_guc_pc *pc) return pc->rpn_freq; } +static int xe_guc_pc_get_min_freq_locked(struct xe_guc_pc *pc, u32 *freq) +{ + int ret; + + lockdep_assert_held(&pc->freq_lock); + + /* Might be in the middle of a gt reset */ + if (!pc->freq_ready) + return -EAGAIN; + + ret = pc_action_query_task_state(pc); + if (ret) + return ret; + + *freq = pc_get_min_freq(pc); + + return 0; +} + /** * xe_guc_pc_get_min_freq - Get the min operational frequency * @pc: The GuC PC @@ -563,26 +618,28 @@ u32 xe_guc_pc_get_rpn_freq(struct xe_guc_pc *pc) */ int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq) { + guard(mutex)(&pc->freq_lock); + + return xe_guc_pc_get_min_freq_locked(pc, freq); +} + +static int xe_guc_pc_set_min_freq_locked(struct xe_guc_pc *pc, u32 freq) +{ int ret; - xe_device_assert_mem_access(pc_to_xe(pc)); + lockdep_assert_held(&pc->freq_lock); - mutex_lock(&pc->freq_lock); - if (!pc->freq_ready) { - /* Might be in the middle of a gt reset */ - ret = -EAGAIN; - goto out; - } + /* Might be in the middle of a gt reset */ + if (!pc->freq_ready) + return -EAGAIN; - ret = pc_action_query_task_state(pc); + ret = pc_set_min_freq(pc, freq); if (ret) - goto out; + return ret; - *freq = pc_get_min_freq(pc); + pc->user_requested_min = freq; -out: - mutex_unlock(&pc->freq_lock); - return ret; + return 0; } /** @@ -596,24 +653,28 @@ out: */ int xe_guc_pc_set_min_freq(struct xe_guc_pc *pc, u32 freq) { + guard(mutex)(&pc->freq_lock); + + return xe_guc_pc_set_min_freq_locked(pc, freq); +} + +static int xe_guc_pc_get_max_freq_locked(struct xe_guc_pc *pc, u32 *freq) +{ int ret; - mutex_lock(&pc->freq_lock); - if (!pc->freq_ready) { - /* Might be in the middle of a gt reset */ - ret = -EAGAIN; - goto out; - } + lockdep_assert_held(&pc->freq_lock); - ret = pc_set_min_freq(pc, freq); + /* Might be in the middle of a gt reset */ + if (!pc->freq_ready) + return -EAGAIN; + + ret = pc_action_query_task_state(pc); if (ret) - goto out; + return ret; - pc->user_requested_min = freq; + *freq = pc_get_max_freq(pc); -out: - mutex_unlock(&pc->freq_lock); - return ret; + return 0; } /** @@ -626,24 +687,28 @@ out: */ int xe_guc_pc_get_max_freq(struct xe_guc_pc *pc, u32 *freq) { + guard(mutex)(&pc->freq_lock); + + return xe_guc_pc_get_max_freq_locked(pc, freq); +} + +static int xe_guc_pc_set_max_freq_locked(struct xe_guc_pc *pc, u32 freq) +{ int ret; - mutex_lock(&pc->freq_lock); - if (!pc->freq_ready) { - /* Might be in the middle of a gt reset */ - ret = -EAGAIN; - goto out; - } + lockdep_assert_held(&pc->freq_lock); - ret = pc_action_query_task_state(pc); + /* Might be in the middle of a gt reset */ + if (!pc->freq_ready) + return -EAGAIN; + + ret = pc_set_max_freq(pc, freq); if (ret) - goto out; + return ret; - *freq = pc_get_max_freq(pc); + pc->user_requested_max = freq; -out: - mutex_unlock(&pc->freq_lock); - return ret; + return 0; } /** @@ -657,24 +722,14 @@ out: */ int xe_guc_pc_set_max_freq(struct xe_guc_pc *pc, u32 freq) { - int ret; - - mutex_lock(&pc->freq_lock); - if (!pc->freq_ready) { - /* Might be in the middle of a gt reset */ - ret = -EAGAIN; - goto out; + if (XE_WA(pc_to_gt(pc), 22019338487)) { + if (wait_for_flush_complete(pc) != 0) + return -EAGAIN; } - ret = pc_set_max_freq(pc, freq); - if (ret) - goto out; - - pc->user_requested_max = freq; + guard(mutex)(&pc->freq_lock); -out: - mutex_unlock(&pc->freq_lock); - return ret; + return xe_guc_pc_set_max_freq_locked(pc, freq); } /** @@ -817,6 +872,7 @@ void xe_guc_pc_init_early(struct xe_guc_pc *pc) static int pc_adjust_freq_bounds(struct xe_guc_pc *pc) { + struct xe_tile *tile = gt_to_tile(pc_to_gt(pc)); int ret; lockdep_assert_held(&pc->freq_lock); @@ -843,6 +899,9 @@ static int pc_adjust_freq_bounds(struct xe_guc_pc *pc) if (pc_get_min_freq(pc) > pc->rp0_freq) ret = pc_set_min_freq(pc, pc->rp0_freq); + if (XE_WA(tile->primary_gt, 14022085890)) + ret = pc_set_min_freq(pc, max(BMG_MIN_FREQ, pc_get_min_freq(pc))); + out: return ret; } @@ -868,30 +927,117 @@ static int pc_adjust_requested_freq(struct xe_guc_pc *pc) return ret; } -static int pc_set_mert_freq_cap(struct xe_guc_pc *pc) +static bool needs_flush_freq_limit(struct xe_guc_pc *pc) { - int ret = 0; + struct xe_gt *gt = pc_to_gt(pc); - if (XE_WA(pc_to_gt(pc), 22019338487)) { - /* - * Get updated min/max and stash them. - */ - ret = xe_guc_pc_get_min_freq(pc, &pc->stashed_min_freq); - if (!ret) - ret = xe_guc_pc_get_max_freq(pc, &pc->stashed_max_freq); - if (ret) - return ret; + return XE_WA(gt, 22019338487) && + pc->rp0_freq > BMG_MERT_FLUSH_FREQ_CAP; +} + +/** + * xe_guc_pc_apply_flush_freq_limit() - Limit max GT freq during L2 flush + * @pc: the xe_guc_pc object + * + * As per the WA, reduce max GT frequency during L2 cache flush + */ +void xe_guc_pc_apply_flush_freq_limit(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + u32 max_freq; + int ret; + + if (!needs_flush_freq_limit(pc)) + return; + + guard(mutex)(&pc->freq_lock); + + ret = xe_guc_pc_get_max_freq_locked(pc, &max_freq); + if (!ret && max_freq > BMG_MERT_FLUSH_FREQ_CAP) { + ret = pc_set_max_freq(pc, BMG_MERT_FLUSH_FREQ_CAP); + if (ret) { + xe_gt_err_once(gt, "Failed to cap max freq on flush to %u, %pe\n", + BMG_MERT_FLUSH_FREQ_CAP, ERR_PTR(ret)); + return; + } + + atomic_set(&pc->flush_freq_limit, 1); /* - * Ensure min and max are bound by MERT_FREQ_CAP until driver loads. + * If user has previously changed max freq, stash that value to + * restore later, otherwise use the current max. New user + * requests wait on flush. */ - mutex_lock(&pc->freq_lock); - ret = pc_set_min_freq(pc, min(pc->rpe_freq, pc_max_freq_cap(pc))); - if (!ret) - ret = pc_set_max_freq(pc, min(pc->rp0_freq, pc_max_freq_cap(pc))); - mutex_unlock(&pc->freq_lock); + if (pc->user_requested_max != 0) + pc->stashed_max_freq = pc->user_requested_max; + else + pc->stashed_max_freq = max_freq; } + /* + * Wait for actual freq to go below the flush cap: even if the previous + * max was below cap, the current one might still be above it + */ + ret = wait_for_act_freq_limit(pc, BMG_MERT_FLUSH_FREQ_CAP); + if (ret) + xe_gt_err_once(gt, "Actual freq did not reduce to %u, %pe\n", + BMG_MERT_FLUSH_FREQ_CAP, ERR_PTR(ret)); +} + +/** + * xe_guc_pc_remove_flush_freq_limit() - Remove max GT freq limit after L2 flush completes. + * @pc: the xe_guc_pc object + * + * Retrieve the previous GT max frequency value. + */ +void xe_guc_pc_remove_flush_freq_limit(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + int ret = 0; + + if (!needs_flush_freq_limit(pc)) + return; + + if (!atomic_read(&pc->flush_freq_limit)) + return; + + mutex_lock(&pc->freq_lock); + + ret = pc_set_max_freq(>->uc.guc.pc, pc->stashed_max_freq); + if (ret) + xe_gt_err_once(gt, "Failed to restore max freq %u:%d", + pc->stashed_max_freq, ret); + + atomic_set(&pc->flush_freq_limit, 0); + mutex_unlock(&pc->freq_lock); + wake_up_var(&pc->flush_freq_limit); +} + +static int pc_set_mert_freq_cap(struct xe_guc_pc *pc) +{ + int ret; + + if (!XE_WA(pc_to_gt(pc), 22019338487)) + return 0; + + guard(mutex)(&pc->freq_lock); + + /* + * Get updated min/max and stash them. + */ + ret = xe_guc_pc_get_min_freq_locked(pc, &pc->stashed_min_freq); + if (!ret) + ret = xe_guc_pc_get_max_freq_locked(pc, &pc->stashed_max_freq); + if (ret) + return ret; + + /* + * Ensure min and max are bound by MERT_FREQ_CAP until driver loads. + */ + ret = pc_set_min_freq(pc, min(pc->rpe_freq, pc_max_freq_cap(pc))); + if (!ret) + ret = pc_set_max_freq(pc, min(pc->rp0_freq, pc_max_freq_cap(pc))); + return ret; } diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h index 0a2664d5c811..52ecdd5ddbff 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.h +++ b/drivers/gpu/drm/xe/xe_guc_pc.h @@ -38,5 +38,7 @@ u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc); void xe_guc_pc_init_early(struct xe_guc_pc *pc); int xe_guc_pc_restore_stashed_freq(struct xe_guc_pc *pc); void xe_guc_pc_raise_unslice(struct xe_guc_pc *pc); +void xe_guc_pc_apply_flush_freq_limit(struct xe_guc_pc *pc); +void xe_guc_pc_remove_flush_freq_limit(struct xe_guc_pc *pc); #endif /* _XE_GUC_PC_H_ */ diff --git a/drivers/gpu/drm/xe/xe_guc_pc_types.h b/drivers/gpu/drm/xe/xe_guc_pc_types.h index 2978ac9a249b..c02053948a57 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc_types.h +++ b/drivers/gpu/drm/xe/xe_guc_pc_types.h @@ -15,6 +15,8 @@ struct xe_guc_pc { /** @bo: GGTT buffer object that is shared with GuC PC */ struct xe_bo *bo; + /** @flush_freq_limit: 1 when max freq changes are limited by driver */ + atomic_t flush_freq_limit; /** @rp0_freq: HW RP0 frequency - The Maximum one */ u32 rp0_freq; /** @rpa_freq: HW RPa frequency - The Achievable one */ diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 9567f6700cf2..2ac87ff4a057 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -891,12 +891,13 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w) struct xe_exec_queue *q = ge->q; struct xe_guc *guc = exec_queue_to_guc(q); struct xe_gpu_scheduler *sched = &ge->sched; - bool wedged; + bool wedged = false; xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_lr(q)); trace_xe_exec_queue_lr_cleanup(q); - wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); + if (!exec_queue_killed(q)) + wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); /* Kill the run_job / process_msg entry points */ xe_sched_submission_stop(sched); @@ -1070,7 +1071,7 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) int err = -ETIME; pid_t pid = -1; int i = 0; - bool wedged, skip_timeout_check; + bool wedged = false, skip_timeout_check; /* * TDR has fired before free job worker. Common if exec queue @@ -1116,7 +1117,8 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) * doesn't work for SRIOV. For now assuming timeouts in wedged mode are * genuine timeouts. */ - wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); + if (!exec_queue_killed(q)) + wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); /* Engine state now stable, disable scheduling to check timestamp */ if (!wedged && exec_queue_registered(q)) { diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c index 74f31639b37f..f008e8049700 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.c +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -159,8 +159,8 @@ static int xe_hwmon_pcode_read_power_limit(const struct xe_hwmon *hwmon, u32 att return ret; } -static int xe_hwmon_pcode_write_power_limit(const struct xe_hwmon *hwmon, u32 attr, u8 channel, - u32 uval) +static int xe_hwmon_pcode_rmw_power_limit(const struct xe_hwmon *hwmon, u32 attr, u8 channel, + u32 clr, u32 set) { struct xe_tile *root_tile = xe_device_get_root_tile(hwmon->xe); u32 val0, val1; @@ -179,7 +179,7 @@ static int xe_hwmon_pcode_write_power_limit(const struct xe_hwmon *hwmon, u32 at channel, val0, val1, ret); if (attr == PL1_HWMON_ATTR) - val0 = uval; + val0 = (val0 & ~clr) | set; else return -EIO; @@ -339,7 +339,7 @@ static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, u32 attr, int channe if (hwmon->xe->info.has_mbx_power_limits) { drm_dbg(&hwmon->xe->drm, "disabling %s on channel %d\n", PWR_ATTR_TO_STR(attr), channel); - xe_hwmon_pcode_write_power_limit(hwmon, attr, channel, 0); + xe_hwmon_pcode_rmw_power_limit(hwmon, attr, channel, PWR_LIM_EN, 0); xe_hwmon_pcode_read_power_limit(hwmon, attr, channel, ®_val); } else { reg_val = xe_mmio_rmw32(mmio, rapl_limit, PWR_LIM_EN, 0); @@ -370,10 +370,9 @@ static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, u32 attr, int channe } if (hwmon->xe->info.has_mbx_power_limits) - ret = xe_hwmon_pcode_write_power_limit(hwmon, attr, channel, reg_val); + ret = xe_hwmon_pcode_rmw_power_limit(hwmon, attr, channel, PWR_LIM, reg_val); else - reg_val = xe_mmio_rmw32(mmio, rapl_limit, PWR_LIM_EN | PWR_LIM_VAL, - reg_val); + reg_val = xe_mmio_rmw32(mmio, rapl_limit, PWR_LIM, reg_val); unlock: mutex_unlock(&hwmon->hwmon_lock); return ret; @@ -563,14 +562,11 @@ xe_hwmon_power_max_interval_store(struct device *dev, struct device_attribute *a mutex_lock(&hwmon->hwmon_lock); - if (hwmon->xe->info.has_mbx_power_limits) { - ret = xe_hwmon_pcode_read_power_limit(hwmon, power_attr, channel, (u32 *)&r); - r = (r & ~PWR_LIM_TIME) | rxy; - xe_hwmon_pcode_write_power_limit(hwmon, power_attr, channel, r); - } else { + if (hwmon->xe->info.has_mbx_power_limits) + xe_hwmon_pcode_rmw_power_limit(hwmon, power_attr, channel, PWR_LIM_TIME, rxy); + else r = xe_mmio_rmw32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel), PWR_LIM_TIME, rxy); - } mutex_unlock(&hwmon->hwmon_lock); @@ -1138,12 +1134,12 @@ xe_hwmon_get_preregistration_info(struct xe_hwmon *hwmon) } else { drm_info(&hwmon->xe->drm, "Using mailbox commands for power limits\n"); /* Write default limits to read from pcode from now on. */ - xe_hwmon_pcode_write_power_limit(hwmon, PL1_HWMON_ATTR, - CHANNEL_CARD, - hwmon->pl1_on_boot[CHANNEL_CARD]); - xe_hwmon_pcode_write_power_limit(hwmon, PL1_HWMON_ATTR, - CHANNEL_PKG, - hwmon->pl1_on_boot[CHANNEL_PKG]); + xe_hwmon_pcode_rmw_power_limit(hwmon, PL1_HWMON_ATTR, + CHANNEL_CARD, PWR_LIM | PWR_LIM_TIME, + hwmon->pl1_on_boot[CHANNEL_CARD]); + xe_hwmon_pcode_rmw_power_limit(hwmon, PL1_HWMON_ATTR, + CHANNEL_PKG, PWR_LIM | PWR_LIM_TIME, + hwmon->pl1_on_boot[CHANNEL_PKG]); hwmon->scl_shift_power = PWR_UNIT; hwmon->scl_shift_energy = ENERGY_UNIT; hwmon->scl_shift_time = TIME_UNIT; diff --git a/drivers/gpu/drm/xe/xe_lmtt.c b/drivers/gpu/drm/xe/xe_lmtt.c index 63db66df064b..023ed6a6b49d 100644 --- a/drivers/gpu/drm/xe/xe_lmtt.c +++ b/drivers/gpu/drm/xe/xe_lmtt.c @@ -78,6 +78,9 @@ static struct xe_lmtt_pt *lmtt_pt_alloc(struct xe_lmtt *lmtt, unsigned int level } lmtt_assert(lmtt, xe_bo_is_vram(bo)); + lmtt_debug(lmtt, "level=%u addr=%#llx\n", level, (u64)xe_bo_main_addr(bo, XE_PAGE_SIZE)); + + xe_map_memset(lmtt_to_xe(lmtt), &bo->vmap, 0, 0, bo->size); pt->level = level; pt->bo = bo; @@ -91,6 +94,9 @@ out: static void lmtt_pt_free(struct xe_lmtt_pt *pt) { + lmtt_debug(&pt->bo->tile->sriov.pf.lmtt, "level=%u addr=%llx\n", + pt->level, (u64)xe_bo_main_addr(pt->bo, XE_PAGE_SIZE)); + xe_bo_unpin_map_no_vm(pt->bo); kfree(pt); } @@ -226,9 +232,14 @@ static void lmtt_write_pte(struct xe_lmtt *lmtt, struct xe_lmtt_pt *pt, switch (lmtt->ops->lmtt_pte_size(level)) { case sizeof(u32): + lmtt_assert(lmtt, !overflows_type(pte, u32)); + lmtt_assert(lmtt, !pte || !iosys_map_rd(&pt->bo->vmap, idx * sizeof(u32), u32)); + xe_map_wr(lmtt_to_xe(lmtt), &pt->bo->vmap, idx * sizeof(u32), u32, pte); break; case sizeof(u64): + lmtt_assert(lmtt, !pte || !iosys_map_rd(&pt->bo->vmap, idx * sizeof(u64), u64)); + xe_map_wr(lmtt_to_xe(lmtt), &pt->bo->vmap, idx * sizeof(u64), u64, pte); break; default: diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index bf7c3981897d..6e7b70532d11 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -40,6 +40,7 @@ #define LRC_PPHWSP_SIZE SZ_4K #define LRC_INDIRECT_RING_STATE_SIZE SZ_4K +#define LRC_WA_BB_SIZE SZ_4K static struct xe_device * lrc_to_xe(struct xe_lrc *lrc) @@ -910,7 +911,11 @@ static void xe_lrc_finish(struct xe_lrc *lrc) { xe_hw_fence_ctx_finish(&lrc->fence_ctx); xe_bo_unpin_map_no_vm(lrc->bo); - xe_bo_unpin_map_no_vm(lrc->bb_per_ctx_bo); +} + +static size_t wa_bb_offset(struct xe_lrc *lrc) +{ + return lrc->bo->size - LRC_WA_BB_SIZE; } /* @@ -943,15 +948,16 @@ static void xe_lrc_finish(struct xe_lrc *lrc) #define CONTEXT_ACTIVE 1ULL static int xe_lrc_setup_utilization(struct xe_lrc *lrc) { + const size_t max_size = LRC_WA_BB_SIZE; u32 *cmd, *buf = NULL; - if (lrc->bb_per_ctx_bo->vmap.is_iomem) { - buf = kmalloc(lrc->bb_per_ctx_bo->size, GFP_KERNEL); + if (lrc->bo->vmap.is_iomem) { + buf = kmalloc(max_size, GFP_KERNEL); if (!buf) return -ENOMEM; cmd = buf; } else { - cmd = lrc->bb_per_ctx_bo->vmap.vaddr; + cmd = lrc->bo->vmap.vaddr + wa_bb_offset(lrc); } *cmd++ = MI_STORE_REGISTER_MEM | MI_SRM_USE_GGTT | MI_SRM_ADD_CS_OFFSET; @@ -974,13 +980,14 @@ static int xe_lrc_setup_utilization(struct xe_lrc *lrc) *cmd++ = MI_BATCH_BUFFER_END; if (buf) { - xe_map_memcpy_to(gt_to_xe(lrc->gt), &lrc->bb_per_ctx_bo->vmap, 0, - buf, (cmd - buf) * sizeof(*cmd)); + xe_map_memcpy_to(gt_to_xe(lrc->gt), &lrc->bo->vmap, + wa_bb_offset(lrc), buf, + (cmd - buf) * sizeof(*cmd)); kfree(buf); } - xe_lrc_write_ctx_reg(lrc, CTX_BB_PER_CTX_PTR, - xe_bo_ggtt_addr(lrc->bb_per_ctx_bo) | 1); + xe_lrc_write_ctx_reg(lrc, CTX_BB_PER_CTX_PTR, xe_bo_ggtt_addr(lrc->bo) + + wa_bb_offset(lrc) + 1); return 0; } @@ -1018,20 +1025,13 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address * via VM bind calls. */ - lrc->bo = xe_bo_create_pin_map(xe, tile, NULL, lrc_size, + lrc->bo = xe_bo_create_pin_map(xe, tile, NULL, + lrc_size + LRC_WA_BB_SIZE, ttm_bo_type_kernel, bo_flags); if (IS_ERR(lrc->bo)) return PTR_ERR(lrc->bo); - lrc->bb_per_ctx_bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4K, - ttm_bo_type_kernel, - bo_flags); - if (IS_ERR(lrc->bb_per_ctx_bo)) { - err = PTR_ERR(lrc->bb_per_ctx_bo); - goto err_lrc_finish; - } - lrc->size = lrc_size; lrc->ring.size = ring_size; lrc->ring.tail = 0; @@ -1819,7 +1819,8 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) snapshot->seqno = xe_lrc_seqno(lrc); snapshot->lrc_bo = xe_bo_get(lrc->bo); snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc); - snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset; + snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset - + LRC_WA_BB_SIZE; snapshot->lrc_snapshot = NULL; snapshot->ctx_timestamp = lower_32_bits(xe_lrc_ctx_timestamp(lrc)); snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc); diff --git a/drivers/gpu/drm/xe/xe_lrc_types.h b/drivers/gpu/drm/xe/xe_lrc_types.h index ae24cf6f8dd9..883e550a9423 100644 --- a/drivers/gpu/drm/xe/xe_lrc_types.h +++ b/drivers/gpu/drm/xe/xe_lrc_types.h @@ -53,9 +53,6 @@ struct xe_lrc { /** @ctx_timestamp: readout value of CTX_TIMESTAMP on last update */ u64 ctx_timestamp; - - /** @bb_per_ctx_bo: buffer object for per context batch wa buffer */ - struct xe_bo *bb_per_ctx_bo; }; struct xe_lrc_snapshot; diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 8f8e9fdfb2a8..66bc02302c55 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -82,7 +82,7 @@ struct xe_migrate { * of the instruction. Subtracting the instruction header (1 dword) and * address (2 dwords), that leaves 0x3FD dwords (0x1FE qwords) for PTE values. */ -#define MAX_PTE_PER_SDI 0x1FE +#define MAX_PTE_PER_SDI 0x1FEU /** * xe_tile_migrate_exec_queue() - Get this tile's migrate exec queue. @@ -863,7 +863,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, if (src_is_vram && xe_migrate_allow_identity(src_L0, &src_it)) xe_res_next(&src_it, src_L0); else - emit_pte(m, bb, src_L0_pt, src_is_vram, copy_system_ccs, + emit_pte(m, bb, src_L0_pt, src_is_vram, copy_system_ccs || use_comp_pat, &src_it, src_L0, src); if (dst_is_vram && xe_migrate_allow_identity(src_L0, &dst_it)) @@ -1553,15 +1553,17 @@ static u32 pte_update_cmd_size(u64 size) u64 entries = DIV_U64_ROUND_UP(size, XE_PAGE_SIZE); XE_WARN_ON(size > MAX_PREEMPTDISABLE_TRANSFER); + /* * MI_STORE_DATA_IMM command is used to update page table. Each - * instruction can update maximumly 0x1ff pte entries. To update - * n (n <= 0x1ff) pte entries, we need: - * 1 dword for the MI_STORE_DATA_IMM command header (opcode etc) - * 2 dword for the page table's physical location - * 2*n dword for value of pte to fill (each pte entry is 2 dwords) + * instruction can update maximumly MAX_PTE_PER_SDI pte entries. To + * update n (n <= MAX_PTE_PER_SDI) pte entries, we need: + * + * - 1 dword for the MI_STORE_DATA_IMM command header (opcode etc) + * - 2 dword for the page table's physical location + * - 2*n dword for value of pte to fill (each pte entry is 2 dwords) */ - num_dword = (1 + 2) * DIV_U64_ROUND_UP(entries, 0x1ff); + num_dword = (1 + 2) * DIV_U64_ROUND_UP(entries, MAX_PTE_PER_SDI); num_dword += entries * 2; return num_dword; @@ -1577,7 +1579,7 @@ static void build_pt_update_batch_sram(struct xe_migrate *m, ptes = DIV_ROUND_UP(size, XE_PAGE_SIZE); while (ptes) { - u32 chunk = min(0x1ffU, ptes); + u32 chunk = min(MAX_PTE_PER_SDI, ptes); bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(chunk); bb->cs[bb->len++] = pt_offset; diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c index e4742e27e2cd..da6793c2f991 100644 --- a/drivers/gpu/drm/xe/xe_module.c +++ b/drivers/gpu/drm/xe/xe_module.c @@ -20,7 +20,7 @@ struct xe_modparam xe_modparam = { .probe_display = true, - .guc_log_level = 3, + .guc_log_level = IS_ENABLED(CONFIG_DRM_XE_DEBUG) ? 3 : 1, .force_probe = CONFIG_DRM_XE_FORCE_PROBE, .wedged_mode = 1, .svm_notifier_size = 512, diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index ac4beaed58ff..278af53c74dc 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -140,7 +140,6 @@ static const struct xe_graphics_desc graphics_xelpg = { .has_asid = 1, \ .has_atomic_enable_pte_bit = 1, \ .has_flat_ccs = 1, \ - .has_indirect_ring_state = 1, \ .has_range_tlb_invalidation = 1, \ .has_usm = 1, \ .has_64bit_timestamp = 1, \ diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index ff749edc005b..ad263de44111 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -134,7 +134,7 @@ int xe_pm_suspend(struct xe_device *xe) /* FIXME: Super racey... */ err = xe_bo_evict_all(xe); if (err) - goto err_pxp; + goto err_display; for_each_gt(gt, xe, id) { err = xe_gt_suspend(gt); @@ -151,7 +151,6 @@ int xe_pm_suspend(struct xe_device *xe) err_display: xe_display_pm_resume(xe); -err_pxp: xe_pxp_pm_resume(xe->pxp); err: drm_dbg(&xe->drm, "Device suspend failed %d\n", err); @@ -753,11 +752,13 @@ void xe_pm_assert_unbounded_bridge(struct xe_device *xe) } /** - * xe_pm_set_vram_threshold - Set a vram threshold for allowing/blocking D3Cold + * xe_pm_set_vram_threshold - Set a VRAM threshold for allowing/blocking D3Cold * @xe: xe device instance - * @threshold: VRAM size in bites for the D3cold threshold + * @threshold: VRAM size in MiB for the D3cold threshold * - * Returns 0 for success, negative error code otherwise. + * Return: + * * 0 - success + * * -EINVAL - invalid argument */ int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold) { diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index 2741849bbf4d..a6612105201a 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -114,10 +114,10 @@ struct fw_blobs_by_type { #define XE_GT_TYPE_ANY XE_GT_TYPE_UNINITIALIZED #define XE_GUC_FIRMWARE_DEFS(fw_def, mmp_ver, major_ver) \ - fw_def(BATTLEMAGE, GT_TYPE_ANY, major_ver(xe, guc, bmg, 70, 44, 1)) \ - fw_def(LUNARLAKE, GT_TYPE_ANY, major_ver(xe, guc, lnl, 70, 44, 1)) \ + fw_def(BATTLEMAGE, GT_TYPE_ANY, major_ver(xe, guc, bmg, 70, 45, 2)) \ + fw_def(LUNARLAKE, GT_TYPE_ANY, major_ver(xe, guc, lnl, 70, 45, 2)) \ fw_def(METEORLAKE, GT_TYPE_ANY, major_ver(i915, guc, mtl, 70, 44, 1)) \ - fw_def(DG2, GT_TYPE_ANY, major_ver(i915, guc, dg2, 70, 44, 1)) \ + fw_def(DG2, GT_TYPE_ANY, major_ver(i915, guc, dg2, 70, 45, 2)) \ fw_def(DG1, GT_TYPE_ANY, major_ver(i915, guc, dg1, 70, 44, 1)) \ fw_def(ALDERLAKE_N, GT_TYPE_ANY, major_ver(i915, guc, tgl, 70, 44, 1)) \ fw_def(ALDERLAKE_P, GT_TYPE_ANY, major_ver(i915, guc, adlp, 70, 44, 1)) \ diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index 9efc5accd43d..6d70109fcc43 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -21,7 +21,8 @@ GRAPHICS_VERSION_RANGE(1270, 1274) MEDIA_VERSION(1300) PLATFORM(DG2) -14018094691 GRAPHICS_VERSION(2004) +14018094691 GRAPHICS_VERSION_RANGE(2001, 2002) + GRAPHICS_VERSION(2004) 14019882105 GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0) 18024947630 GRAPHICS_VERSION(2001) GRAPHICS_VERSION(2004) @@ -37,10 +38,10 @@ GRAPHICS_VERSION(2004) GRAPHICS_VERSION_RANGE(3000, 3001) 22019338487 MEDIA_VERSION(2000) - GRAPHICS_VERSION(2001) + GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_not_sriov_vf) MEDIA_VERSION(3000), MEDIA_STEP(A0, B0), FUNC(xe_rtp_match_not_sriov_vf) 22019338487_display PLATFORM(LUNARLAKE) -16023588340 GRAPHICS_VERSION(2001) +16023588340 GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_not_sriov_vf) 14019789679 GRAPHICS_VERSION(1255) GRAPHICS_VERSION_RANGE(1270, 2004) no_media_l3 MEDIA_VERSION(3000) @@ -59,3 +60,7 @@ no_media_l3 MEDIA_VERSION(3000) MEDIA_VERSION_RANGE(1301, 3000) 16026508708 GRAPHICS_VERSION_RANGE(1200, 3001) MEDIA_VERSION_RANGE(1300, 3000) + +# SoC workaround - currently applies to all platforms with the following +# primary GT GMDID +14022085890 GRAPHICS_VERSION(2001) diff --git a/drivers/hid/hid-appletb-kbd.c b/drivers/hid/hid-appletb-kbd.c index 6f251b284018..b00687e67ce8 100644 --- a/drivers/hid/hid-appletb-kbd.c +++ b/drivers/hid/hid-appletb-kbd.c @@ -430,14 +430,20 @@ static int appletb_kbd_probe(struct hid_device *hdev, const struct hid_device_id ret = appletb_kbd_set_mode(kbd, appletb_tb_def_mode); if (ret) { dev_err_probe(dev, ret, "Failed to set touchbar mode\n"); - goto close_hw; + goto unregister_handler; } hid_set_drvdata(hdev, kbd); return 0; +unregister_handler: + input_unregister_handler(&kbd->inp_handler); close_hw: + if (kbd->backlight_dev) { + put_device(&kbd->backlight_dev->dev); + timer_delete_sync(&kbd->inactivity_timer); + } hid_hw_close(hdev); stop_hw: hid_hw_stop(hdev); @@ -451,7 +457,10 @@ static void appletb_kbd_remove(struct hid_device *hdev) appletb_kbd_set_mode(kbd, APPLETB_KBD_MODE_OFF); input_unregister_handler(&kbd->inp_handler); - timer_delete_sync(&kbd->inactivity_timer); + if (kbd->backlight_dev) { + put_device(&kbd->backlight_dev->dev); + timer_delete_sync(&kbd->inactivity_timer); + } hid_hw_close(hdev); hid_hw_stop(hdev); diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c index 8433306148d5..c6b6b1029540 100644 --- a/drivers/hid/hid-debug.c +++ b/drivers/hid/hid-debug.c @@ -3298,8 +3298,8 @@ static const char *keys[KEY_MAX + 1] = { [BTN_TOUCH] = "Touch", [BTN_STYLUS] = "Stylus", [BTN_STYLUS2] = "Stylus2", [BTN_TOOL_DOUBLETAP] = "ToolDoubleTap", [BTN_TOOL_TRIPLETAP] = "ToolTripleTap", [BTN_TOOL_QUADTAP] = "ToolQuadrupleTap", - [BTN_GEAR_DOWN] = "WheelBtn", - [BTN_GEAR_UP] = "Gear up", [KEY_OK] = "Ok", + [BTN_GEAR_DOWN] = "BtnGearDown", [BTN_GEAR_UP] = "BtnGearUp", + [BTN_WHEEL] = "BtnWheel", [KEY_OK] = "Ok", [KEY_SELECT] = "Select", [KEY_GOTO] = "Goto", [KEY_CLEAR] = "Clear", [KEY_POWER2] = "Power2", [KEY_OPTION] = "Option", [KEY_INFO] = "Info", diff --git a/drivers/hid/hid-elecom.c b/drivers/hid/hid-elecom.c index defcf91fdd14..0ad7d25d9864 100644 --- a/drivers/hid/hid-elecom.c +++ b/drivers/hid/hid-elecom.c @@ -89,7 +89,8 @@ static const __u8 *elecom_report_fixup(struct hid_device *hdev, __u8 *rdesc, break; case USB_DEVICE_ID_ELECOM_M_DT1URBK: case USB_DEVICE_ID_ELECOM_M_DT1DRBK: - case USB_DEVICE_ID_ELECOM_M_HT1URBK: + case USB_DEVICE_ID_ELECOM_M_HT1URBK_010C: + case USB_DEVICE_ID_ELECOM_M_HT1URBK_019B: case USB_DEVICE_ID_ELECOM_M_HT1DRBK_010D: /* * Report descriptor format: @@ -122,7 +123,8 @@ static const struct hid_device_id elecom_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_XT4DRBK) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_DT1URBK) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_DT1DRBK) }, - { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1URBK) }, + { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1URBK_010C) }, + { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1URBK_019B) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1DRBK_010D) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1DRBK_011C) }, { } diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index e3fb4e2fe911..33cc5820f2be 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -312,6 +312,8 @@ #define USB_DEVICE_ID_ASUS_AK1D 0x1125 #define USB_DEVICE_ID_CHICONY_TOSHIBA_WT10A 0x1408 #define USB_DEVICE_ID_CHICONY_ACER_SWITCH12 0x1421 +#define USB_DEVICE_ID_CHICONY_HP_5MP_CAMERA 0xb824 +#define USB_DEVICE_ID_CHICONY_HP_5MP_CAMERA2 0xb82c #define USB_VENDOR_ID_CHUNGHWAT 0x2247 #define USB_DEVICE_ID_CHUNGHWAT_MULTITOUCH 0x0001 @@ -446,7 +448,8 @@ #define USB_DEVICE_ID_ELECOM_M_XT4DRBK 0x00fd #define USB_DEVICE_ID_ELECOM_M_DT1URBK 0x00fe #define USB_DEVICE_ID_ELECOM_M_DT1DRBK 0x00ff -#define USB_DEVICE_ID_ELECOM_M_HT1URBK 0x010c +#define USB_DEVICE_ID_ELECOM_M_HT1URBK_010C 0x010c +#define USB_DEVICE_ID_ELECOM_M_HT1URBK_019B 0x019b #define USB_DEVICE_ID_ELECOM_M_HT1DRBK_010D 0x010d #define USB_DEVICE_ID_ELECOM_M_HT1DRBK_011C 0x011c @@ -819,6 +822,7 @@ #define USB_DEVICE_ID_LENOVO_TPPRODOCK 0x6067 #define USB_DEVICE_ID_LENOVO_X1_COVER 0x6085 #define USB_DEVICE_ID_LENOVO_X1_TAB 0x60a3 +#define USB_DEVICE_ID_LENOVO_X1_TAB2 0x60a4 #define USB_DEVICE_ID_LENOVO_X1_TAB3 0x60b5 #define USB_DEVICE_ID_LENOVO_X12_TAB 0x60fe #define USB_DEVICE_ID_LENOVO_X12_TAB2 0x61ae @@ -1525,4 +1529,7 @@ #define USB_VENDOR_ID_SIGNOTEC 0x2133 #define USB_DEVICE_ID_SIGNOTEC_VIEWSONIC_PD1011 0x0018 +#define USB_VENDOR_ID_SMARTLINKTECHNOLOGY 0x4c4a +#define USB_DEVICE_ID_SMARTLINKTECHNOLOGY_4155 0x4155 + #endif diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index 9d80635a91eb..ff1784b5c2a4 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -2343,7 +2343,7 @@ int hidinput_connect(struct hid_device *hid, unsigned int force) } if (list_empty(&hid->inputs)) { - hid_err(hid, "No inputs registered, leaving\n"); + hid_dbg(hid, "No inputs registered, leaving\n"); goto out_unwind; } diff --git a/drivers/hid/hid-lenovo.c b/drivers/hid/hid-lenovo.c index af29ba840522..b3121fa7a72d 100644 --- a/drivers/hid/hid-lenovo.c +++ b/drivers/hid/hid-lenovo.c @@ -492,6 +492,7 @@ static int lenovo_input_mapping(struct hid_device *hdev, case USB_DEVICE_ID_LENOVO_X12_TAB: case USB_DEVICE_ID_LENOVO_X12_TAB2: case USB_DEVICE_ID_LENOVO_X1_TAB: + case USB_DEVICE_ID_LENOVO_X1_TAB2: case USB_DEVICE_ID_LENOVO_X1_TAB3: return lenovo_input_mapping_x1_tab_kbd(hdev, hi, field, usage, bit, max); default: @@ -548,11 +549,14 @@ static void lenovo_features_set_cptkbd(struct hid_device *hdev) /* * Tell the keyboard a driver understands it, and turn F7, F9, F11 into - * regular keys + * regular keys (Compact only) */ - ret = lenovo_send_cmd_cptkbd(hdev, 0x01, 0x03); - if (ret) - hid_warn(hdev, "Failed to switch F7/9/11 mode: %d\n", ret); + if (hdev->product == USB_DEVICE_ID_LENOVO_CUSBKBD || + hdev->product == USB_DEVICE_ID_LENOVO_CBTKBD) { + ret = lenovo_send_cmd_cptkbd(hdev, 0x01, 0x03); + if (ret) + hid_warn(hdev, "Failed to switch F7/9/11 mode: %d\n", ret); + } /* Switch middle button to native mode */ ret = lenovo_send_cmd_cptkbd(hdev, 0x09, 0x01); @@ -605,6 +609,7 @@ static ssize_t attr_fn_lock_store(struct device *dev, case USB_DEVICE_ID_LENOVO_X12_TAB2: case USB_DEVICE_ID_LENOVO_TP10UBKBD: case USB_DEVICE_ID_LENOVO_X1_TAB: + case USB_DEVICE_ID_LENOVO_X1_TAB2: case USB_DEVICE_ID_LENOVO_X1_TAB3: ret = lenovo_led_set_tp10ubkbd(hdev, TP10UBKBD_FN_LOCK_LED, value); if (ret) @@ -861,6 +866,7 @@ static int lenovo_event(struct hid_device *hdev, struct hid_field *field, case USB_DEVICE_ID_LENOVO_X12_TAB2: case USB_DEVICE_ID_LENOVO_TP10UBKBD: case USB_DEVICE_ID_LENOVO_X1_TAB: + case USB_DEVICE_ID_LENOVO_X1_TAB2: case USB_DEVICE_ID_LENOVO_X1_TAB3: return lenovo_event_tp10ubkbd(hdev, field, usage, value); default: @@ -1144,6 +1150,7 @@ static int lenovo_led_brightness_set(struct led_classdev *led_cdev, case USB_DEVICE_ID_LENOVO_X12_TAB2: case USB_DEVICE_ID_LENOVO_TP10UBKBD: case USB_DEVICE_ID_LENOVO_X1_TAB: + case USB_DEVICE_ID_LENOVO_X1_TAB2: case USB_DEVICE_ID_LENOVO_X1_TAB3: ret = lenovo_led_set_tp10ubkbd(hdev, tp10ubkbd_led[led_nr], value); break; @@ -1384,6 +1391,7 @@ static int lenovo_probe(struct hid_device *hdev, case USB_DEVICE_ID_LENOVO_X12_TAB2: case USB_DEVICE_ID_LENOVO_TP10UBKBD: case USB_DEVICE_ID_LENOVO_X1_TAB: + case USB_DEVICE_ID_LENOVO_X1_TAB2: case USB_DEVICE_ID_LENOVO_X1_TAB3: ret = lenovo_probe_tp10ubkbd(hdev); break; @@ -1473,6 +1481,7 @@ static void lenovo_remove(struct hid_device *hdev) case USB_DEVICE_ID_LENOVO_X12_TAB2: case USB_DEVICE_ID_LENOVO_TP10UBKBD: case USB_DEVICE_ID_LENOVO_X1_TAB: + case USB_DEVICE_ID_LENOVO_X1_TAB2: case USB_DEVICE_ID_LENOVO_X1_TAB3: lenovo_remove_tp10ubkbd(hdev); break; @@ -1524,6 +1533,8 @@ static const struct hid_device_id lenovo_devices[] = { { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_X1_TAB) }, { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, + USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_X1_TAB2) }, + { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_X1_TAB3) }, { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_X12_TAB) }, diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index b41001e02da7..a1c54ffe02b4 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -2132,12 +2132,18 @@ static const struct hid_device_id mt_devices[] = { HID_DEVICE(BUS_I2C, HID_GROUP_GENERIC, USB_VENDOR_ID_LG, I2C_DEVICE_ID_LG_7010) }, - /* Lenovo X1 TAB Gen 2 */ + /* Lenovo X1 TAB Gen 1 */ { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT, HID_DEVICE(BUS_USB, HID_GROUP_MULTITOUCH_WIN_8, USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_X1_TAB) }, + /* Lenovo X1 TAB Gen 2 */ + { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT, + HID_DEVICE(BUS_USB, HID_GROUP_MULTITOUCH_WIN_8, + USB_VENDOR_ID_LENOVO, + USB_DEVICE_ID_LENOVO_X1_TAB2) }, + /* Lenovo X1 TAB Gen 3 */ { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT, HID_DEVICE(BUS_USB, HID_GROUP_MULTITOUCH_WIN_8, diff --git a/drivers/hid/hid-nintendo.c b/drivers/hid/hid-nintendo.c index 839d5bcd72b1..fb4985988615 100644 --- a/drivers/hid/hid-nintendo.c +++ b/drivers/hid/hid-nintendo.c @@ -308,6 +308,7 @@ enum joycon_ctlr_state { JOYCON_CTLR_STATE_INIT, JOYCON_CTLR_STATE_READ, JOYCON_CTLR_STATE_REMOVED, + JOYCON_CTLR_STATE_SUSPENDED, }; /* Controller type received as part of device info */ @@ -2750,14 +2751,46 @@ static void nintendo_hid_remove(struct hid_device *hdev) static int nintendo_hid_resume(struct hid_device *hdev) { - int ret = joycon_init(hdev); + struct joycon_ctlr *ctlr = hid_get_drvdata(hdev); + int ret; + + hid_dbg(hdev, "resume\n"); + if (!joycon_using_usb(ctlr)) { + hid_dbg(hdev, "no-op resume for bt ctlr\n"); + ctlr->ctlr_state = JOYCON_CTLR_STATE_READ; + return 0; + } + ret = joycon_init(hdev); if (ret) - hid_err(hdev, "Failed to restore controller after resume"); + hid_err(hdev, + "Failed to restore controller after resume: %d\n", + ret); + else + ctlr->ctlr_state = JOYCON_CTLR_STATE_READ; return ret; } +static int nintendo_hid_suspend(struct hid_device *hdev, pm_message_t message) +{ + struct joycon_ctlr *ctlr = hid_get_drvdata(hdev); + + hid_dbg(hdev, "suspend: %d\n", message.event); + /* + * Avoid any blocking loops in suspend/resume transitions. + * + * joycon_enforce_subcmd_rate() can result in repeated retries if for + * whatever reason the controller stops providing input reports. + * + * This has been observed with bluetooth controllers which lose + * connectivity prior to suspend (but not long enough to result in + * complete disconnection). + */ + ctlr->ctlr_state = JOYCON_CTLR_STATE_SUSPENDED; + return 0; +} + #endif static const struct hid_device_id nintendo_hid_devices[] = { @@ -2796,6 +2829,7 @@ static struct hid_driver nintendo_hid_driver = { #ifdef CONFIG_PM .resume = nintendo_hid_resume, + .suspend = nintendo_hid_suspend, #endif }; static int __init nintendo_init(void) diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c index 7fefeb413ec3..9bf9ce8dc803 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c @@ -410,7 +410,8 @@ static const struct hid_device_id hid_have_special_driver[] = { { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_XT4DRBK) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_DT1URBK) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_DT1DRBK) }, - { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1URBK) }, + { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1URBK_010C) }, + { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1URBK_019B) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1DRBK_010D) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1DRBK_011C) }, #endif @@ -757,6 +758,8 @@ static const struct hid_device_id hid_ignore_list[] = { { HID_USB_DEVICE(USB_VENDOR_ID_AVERMEDIA, USB_DEVICE_ID_AVER_FM_MR800) }, { HID_USB_DEVICE(USB_VENDOR_ID_AXENTIA, USB_DEVICE_ID_AXENTIA_FM_RADIO) }, { HID_USB_DEVICE(USB_VENDOR_ID_BERKSHIRE, USB_DEVICE_ID_BERKSHIRE_PCWD) }, + { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_HP_5MP_CAMERA) }, + { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_HP_5MP_CAMERA2) }, { HID_USB_DEVICE(USB_VENDOR_ID_CIDC, 0x0103) }, { HID_USB_DEVICE(USB_VENDOR_ID_CYGNAL, USB_DEVICE_ID_CYGNAL_RADIO_SI470X) }, { HID_USB_DEVICE(USB_VENDOR_ID_CYGNAL, USB_DEVICE_ID_CYGNAL_RADIO_SI4713) }, @@ -904,6 +907,7 @@ static const struct hid_device_id hid_ignore_list[] = { #endif { HID_USB_DEVICE(USB_VENDOR_ID_YEALINK, USB_DEVICE_ID_YEALINK_P1K_P4K_B2K) }, { HID_USB_DEVICE(USB_VENDOR_ID_QUANTA, USB_DEVICE_ID_QUANTA_HP_5MP_CAMERA_5473) }, + { HID_USB_DEVICE(USB_VENDOR_ID_SMARTLINKTECHNOLOGY, USB_DEVICE_ID_SMARTLINKTECHNOLOGY_4155) }, { } }; diff --git a/drivers/hid/intel-ish-hid/ipc/hw-ish.h b/drivers/hid/intel-ish-hid/ipc/hw-ish.h index 07e90d51f073..fa5d68c36313 100644 --- a/drivers/hid/intel-ish-hid/ipc/hw-ish.h +++ b/drivers/hid/intel-ish-hid/ipc/hw-ish.h @@ -38,6 +38,7 @@ #define PCI_DEVICE_ID_INTEL_ISH_LNL_M 0xA845 #define PCI_DEVICE_ID_INTEL_ISH_PTL_H 0xE345 #define PCI_DEVICE_ID_INTEL_ISH_PTL_P 0xE445 +#define PCI_DEVICE_ID_INTEL_ISH_WCL 0x4D45 #define REVISION_ID_CHT_A0 0x6 #define REVISION_ID_CHT_Ax_SI 0x0 diff --git a/drivers/hid/intel-ish-hid/ipc/pci-ish.c b/drivers/hid/intel-ish-hid/ipc/pci-ish.c index ff0fc8010072..c57483224db6 100644 --- a/drivers/hid/intel-ish-hid/ipc/pci-ish.c +++ b/drivers/hid/intel-ish-hid/ipc/pci-ish.c @@ -27,10 +27,12 @@ enum ishtp_driver_data_index { ISHTP_DRIVER_DATA_NONE, ISHTP_DRIVER_DATA_LNL_M, ISHTP_DRIVER_DATA_PTL, + ISHTP_DRIVER_DATA_WCL, }; #define ISH_FW_GEN_LNL_M "lnlm" #define ISH_FW_GEN_PTL "ptl" +#define ISH_FW_GEN_WCL "wcl" #define ISH_FIRMWARE_PATH(gen) "intel/ish/ish_" gen ".bin" #define ISH_FIRMWARE_PATH_ALL "intel/ish/ish_*.bin" @@ -42,6 +44,9 @@ static struct ishtp_driver_data ishtp_driver_data[] = { [ISHTP_DRIVER_DATA_PTL] = { .fw_generation = ISH_FW_GEN_PTL, }, + [ISHTP_DRIVER_DATA_WCL] = { + .fw_generation = ISH_FW_GEN_WCL, + }, }; static const struct pci_device_id ish_pci_tbl[] = { @@ -67,9 +72,10 @@ static const struct pci_device_id ish_pci_tbl[] = { {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ISH_MTL_P)}, {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ISH_ARL_H)}, {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ISH_ARL_S)}, - {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ISH_LNL_M), .driver_data = ISHTP_DRIVER_DATA_LNL_M}, - {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ISH_PTL_H), .driver_data = ISHTP_DRIVER_DATA_PTL}, - {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ISH_PTL_P), .driver_data = ISHTP_DRIVER_DATA_PTL}, + {PCI_DEVICE_DATA(INTEL, ISH_LNL_M, ISHTP_DRIVER_DATA_LNL_M)}, + {PCI_DEVICE_DATA(INTEL, ISH_PTL_H, ISHTP_DRIVER_DATA_PTL)}, + {PCI_DEVICE_DATA(INTEL, ISH_PTL_P, ISHTP_DRIVER_DATA_PTL)}, + {PCI_DEVICE_DATA(INTEL, ISH_WCL, ISHTP_DRIVER_DATA_WCL)}, {} }; MODULE_DEVICE_TABLE(pci, ish_pci_tbl); diff --git a/drivers/hid/intel-thc-hid/intel-quicki2c/quicki2c-protocol.c b/drivers/hid/intel-thc-hid/intel-quicki2c/quicki2c-protocol.c index f493df0d5dc4..a63f8c833252 100644 --- a/drivers/hid/intel-thc-hid/intel-quicki2c/quicki2c-protocol.c +++ b/drivers/hid/intel-thc-hid/intel-quicki2c/quicki2c-protocol.c @@ -4,6 +4,7 @@ #include <linux/bitfield.h> #include <linux/hid.h> #include <linux/hid-over-i2c.h> +#include <linux/unaligned.h> #include "intel-thc-dev.h" #include "intel-thc-dma.h" @@ -200,6 +201,9 @@ int quicki2c_set_report(struct quicki2c_device *qcdev, u8 report_type, int quicki2c_reset(struct quicki2c_device *qcdev) { + u16 input_reg = le16_to_cpu(qcdev->dev_desc.input_reg); + size_t read_len = HIDI2C_LENGTH_LEN; + u32 prd_len = read_len; int ret; qcdev->reset_ack = false; @@ -213,12 +217,32 @@ int quicki2c_reset(struct quicki2c_device *qcdev) ret = wait_event_interruptible_timeout(qcdev->reset_ack_wq, qcdev->reset_ack, HIDI2C_RESET_TIMEOUT * HZ); - if (ret <= 0 || !qcdev->reset_ack) { + if (qcdev->reset_ack) + return 0; + + /* + * Manually read reset response if it wasn't received, in case reset interrupt + * was missed by touch device or THC hardware. + */ + ret = thc_tic_pio_read(qcdev->thc_hw, input_reg, read_len, &prd_len, + (u32 *)qcdev->input_buf); + if (ret) { + dev_err_once(qcdev->dev, "Read Reset Response failed, ret %d\n", ret); + return ret; + } + + /* + * Check response packet length, it's first 16 bits of packet. + * If response packet length is zero, it's reset response, otherwise not. + */ + if (get_unaligned_le16(qcdev->input_buf)) { dev_err_once(qcdev->dev, "Wait reset response timed out ret:%d timeout:%ds\n", ret, HIDI2C_RESET_TIMEOUT); return -ETIMEDOUT; } + qcdev->reset_ack = true; + return 0; } diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c index eaf099b2efdb..9a57504e51a1 100644 --- a/drivers/hid/wacom_sys.c +++ b/drivers/hid/wacom_sys.c @@ -2048,14 +2048,18 @@ static int wacom_initialize_remotes(struct wacom *wacom) remote->remote_dir = kobject_create_and_add("wacom_remote", &wacom->hdev->dev.kobj); - if (!remote->remote_dir) + if (!remote->remote_dir) { + kfifo_free(&remote->remote_fifo); return -ENOMEM; + } error = sysfs_create_files(remote->remote_dir, remote_unpair_attrs); if (error) { hid_err(wacom->hdev, "cannot create sysfs group err: %d\n", error); + kfifo_free(&remote->remote_fifo); + kobject_put(remote->remote_dir); return error; } @@ -2901,6 +2905,7 @@ static void wacom_remove(struct hid_device *hdev) hid_hw_stop(hdev); cancel_delayed_work_sync(&wacom->init_work); + cancel_delayed_work_sync(&wacom->aes_battery_work); cancel_work_sync(&wacom->wireless_work); cancel_work_sync(&wacom->battery_work); cancel_work_sync(&wacom->remote_work); diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index 48c5ab832009..c8d115b58e44 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -200,7 +200,7 @@ config I2C_ISMT config I2C_PIIX4 tristate "Intel PIIX4 and compatible (ATI/AMD/Serverworks/Broadcom/SMSC)" - depends on PCI && HAS_IOPORT && X86 + depends on PCI && HAS_IOPORT select I2C_SMBUS help If you say yes to this option, support will be included for the Intel @@ -1530,7 +1530,7 @@ config I2C_XGENE_SLIMPRO config SCx200_ACB tristate "Geode ACCESS.bus support" - depends on X86_32 && PCI + depends on X86_32 && PCI && HAS_IOPORT help Enable the use of the ACCESS.bus controllers on the Geode SCx200 and SC1100 processors and the CS5535 and CS5536 Geode companion devices. diff --git a/drivers/i2c/busses/i2c-designware-amdisp.c b/drivers/i2c/busses/i2c-designware-amdisp.c index ad6f08338124..450793d5f839 100644 --- a/drivers/i2c/busses/i2c-designware-amdisp.c +++ b/drivers/i2c/busses/i2c-designware-amdisp.c @@ -8,6 +8,7 @@ #include <linux/module.h> #include <linux/platform_device.h> #include <linux/pm_runtime.h> +#include <linux/soc/amd/isp4_misc.h> #include "i2c-designware-core.h" @@ -62,6 +63,7 @@ static int amd_isp_dw_i2c_plat_probe(struct platform_device *pdev) adap = &isp_i2c_dev->adapter; adap->owner = THIS_MODULE; + scnprintf(adap->name, sizeof(adap->name), AMDISP_I2C_ADAP_NAME); ACPI_COMPANION_SET(&adap->dev, ACPI_COMPANION(&pdev->dev)); adap->dev.of_node = pdev->dev.of_node; /* use dynamically allocated adapter id */ diff --git a/drivers/i2c/busses/i2c-designware-master.c b/drivers/i2c/busses/i2c-designware-master.c index c5394229b77f..cbd88ffa5610 100644 --- a/drivers/i2c/busses/i2c-designware-master.c +++ b/drivers/i2c/busses/i2c-designware-master.c @@ -363,6 +363,7 @@ static int amd_i2c_dw_xfer_quirk(struct i2c_adapter *adap, struct i2c_msg *msgs, dev->msgs = msgs; dev->msgs_num = num_msgs; + dev->msg_write_idx = 0; i2c_dw_xfer_init(dev); /* Initiate messages read/write transaction */ @@ -1042,8 +1043,9 @@ int i2c_dw_probe_master(struct dw_i2c_dev *dev) if (ret) return ret; - snprintf(adap->name, sizeof(adap->name), - "Synopsys DesignWare I2C adapter"); + if (!adap->name[0]) + scnprintf(adap->name, sizeof(adap->name), + "Synopsys DesignWare I2C adapter"); adap->retries = 3; adap->algo = &i2c_dw_algo; adap->quirks = &i2c_dw_quirks; diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index e5732b0557fb..205cc132fdec 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c @@ -1008,7 +1008,7 @@ static inline int i2c_imx_isr_read(struct imx_i2c_struct *i2c_imx) /* setup bus to read data */ temp = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2CR); temp &= ~I2CR_MTX; - if (i2c_imx->msg->len - 1) + if ((i2c_imx->msg->len - 1) || (i2c_imx->msg->flags & I2C_M_RECV_LEN)) temp &= ~I2CR_TXAK; imx_i2c_write_reg(temp, i2c_imx, IMX_I2C_I2CR); @@ -1063,6 +1063,7 @@ static inline void i2c_imx_isr_read_block_data_len(struct imx_i2c_struct *i2c_im wake_up(&i2c_imx->queue); } i2c_imx->msg->len += len; + i2c_imx->msg->buf[i2c_imx->msg_buf_idx++] = len; } static irqreturn_t i2c_imx_master_isr(struct imx_i2c_struct *i2c_imx, unsigned int status) diff --git a/drivers/i2c/busses/i2c-microchip-corei2c.c b/drivers/i2c/busses/i2c-microchip-corei2c.c index f173bda1c98c..c8599733633e 100644 --- a/drivers/i2c/busses/i2c-microchip-corei2c.c +++ b/drivers/i2c/busses/i2c-microchip-corei2c.c @@ -435,6 +435,7 @@ static int mchp_corei2c_smbus_xfer(struct i2c_adapter *adap, u16 addr, unsigned u8 tx_buf[I2C_SMBUS_BLOCK_MAX + 2]; u8 rx_buf[I2C_SMBUS_BLOCK_MAX + 1]; int num_msgs = 1; + int ret; msgs[CORE_I2C_SMBUS_MSG_WR].addr = addr; msgs[CORE_I2C_SMBUS_MSG_WR].flags = 0; @@ -505,7 +506,10 @@ static int mchp_corei2c_smbus_xfer(struct i2c_adapter *adap, u16 addr, unsigned return -EOPNOTSUPP; } - mchp_corei2c_xfer(&idev->adapter, msgs, num_msgs); + ret = mchp_corei2c_xfer(&idev->adapter, msgs, num_msgs); + if (ret < 0) + return ret; + if (read_write == I2C_SMBUS_WRITE || size <= I2C_SMBUS_BYTE_DATA) return 0; diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c index f1cc26ac5b80..8b01df3cc8e9 100644 --- a/drivers/i2c/busses/i2c-omap.c +++ b/drivers/i2c/busses/i2c-omap.c @@ -1461,13 +1461,13 @@ omap_i2c_probe(struct platform_device *pdev) if (IS_ERR(mux_state)) { r = PTR_ERR(mux_state); dev_dbg(&pdev->dev, "failed to get I2C mux: %d\n", r); - goto err_disable_pm; + goto err_put_pm; } omap->mux_state = mux_state; r = mux_state_select(omap->mux_state); if (r) { dev_err(&pdev->dev, "failed to select I2C mux: %d\n", r); - goto err_disable_pm; + goto err_put_pm; } } @@ -1515,6 +1515,9 @@ omap_i2c_probe(struct platform_device *pdev) err_unuse_clocks: omap_i2c_write_reg(omap, OMAP_I2C_CON_REG, 0); + if (omap->mux_state) + mux_state_deselect(omap->mux_state); +err_put_pm: pm_runtime_dont_use_autosuspend(omap->dev); pm_runtime_put_sync(omap->dev); err_disable_pm: diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c index 9d3a4dc2bd60..ac3bb550303f 100644 --- a/drivers/i2c/busses/i2c-piix4.c +++ b/drivers/i2c/busses/i2c-piix4.c @@ -34,7 +34,7 @@ #include <linux/dmi.h> #include <linux/acpi.h> #include <linux/io.h> -#include <asm/amd/fch.h> +#include <linux/platform_data/x86/amd-fch.h> #include "i2c-piix4.h" diff --git a/drivers/i2c/busses/i2c-robotfuzz-osif.c b/drivers/i2c/busses/i2c-robotfuzz-osif.c index 80d45079b763..e0a76fb5bc31 100644 --- a/drivers/i2c/busses/i2c-robotfuzz-osif.c +++ b/drivers/i2c/busses/i2c-robotfuzz-osif.c @@ -111,6 +111,11 @@ static u32 osif_func(struct i2c_adapter *adapter) return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL; } +/* prevent invalid 0-length usb_control_msg */ +static const struct i2c_adapter_quirks osif_quirks = { + .flags = I2C_AQ_NO_ZERO_LEN_READ, +}; + static const struct i2c_algorithm osif_algorithm = { .xfer = osif_xfer, .functionality = osif_func, @@ -143,6 +148,7 @@ static int osif_probe(struct usb_interface *interface, priv->adapter.owner = THIS_MODULE; priv->adapter.class = I2C_CLASS_HWMON; + priv->adapter.quirks = &osif_quirks; priv->adapter.algo = &osif_algorithm; priv->adapter.algo_data = priv; snprintf(priv->adapter.name, sizeof(priv->adapter.name), diff --git a/drivers/i2c/busses/i2c-tiny-usb.c b/drivers/i2c/busses/i2c-tiny-usb.c index a18eab0992a1..57dfe5f1a7d9 100644 --- a/drivers/i2c/busses/i2c-tiny-usb.c +++ b/drivers/i2c/busses/i2c-tiny-usb.c @@ -139,6 +139,11 @@ out: return ret; } +/* prevent invalid 0-length usb_control_msg */ +static const struct i2c_adapter_quirks usb_quirks = { + .flags = I2C_AQ_NO_ZERO_LEN_READ, +}; + /* This is the actual algorithm we define */ static const struct i2c_algorithm usb_algorithm = { .xfer = usb_xfer, @@ -247,6 +252,7 @@ static int i2c_tiny_usb_probe(struct usb_interface *interface, /* setup i2c adapter description */ dev->adapter.owner = THIS_MODULE; dev->adapter.class = I2C_CLASS_HWMON; + dev->adapter.quirks = &usb_quirks; dev->adapter.algo = &usb_algorithm; dev->adapter.algo_data = dev; snprintf(dev->adapter.name, sizeof(dev->adapter.name), diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 9979a351577f..81cf3c902e81 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -582,8 +582,8 @@ static int __ib_cache_gid_add(struct ib_device *ib_dev, u32 port, out_unlock: mutex_unlock(&table->lock); if (ret) - pr_warn("%s: unable to add gid %pI6 error=%d\n", - __func__, gid->raw, ret); + pr_warn_ratelimited("%s: unable to add gid %pI6 error=%d\n", + __func__, gid->raw, ret); return ret; } diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index c752ae9fad6c..b1c44ec1a3f3 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -76,6 +76,17 @@ static int ib_init_umem_odp(struct ib_umem_odp *umem_odp, end = ALIGN(end, page_size); if (unlikely(end < page_size)) return -EOVERFLOW; + /* + * The mmu notifier can be called within reclaim contexts and takes the + * umem_mutex. This is rare to trigger in testing, teach lockdep about + * it. + */ + if (IS_ENABLED(CONFIG_LOCKDEP)) { + fs_reclaim_acquire(GFP_KERNEL); + mutex_lock(&umem_odp->umem_mutex); + mutex_unlock(&umem_odp->umem_mutex); + fs_reclaim_release(GFP_KERNEL); + } nr_entries = (end - start) >> PAGE_SHIFT; if (!(nr_entries * PAGE_SIZE / page_size)) diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c index b847084dcd99..a506fafd2b15 100644 --- a/drivers/infiniband/hw/mlx5/counters.c +++ b/drivers/infiniband/hw/mlx5/counters.c @@ -398,7 +398,7 @@ static int do_get_hw_stats(struct ib_device *ibdev, return ret; /* We don't expose device counters over Vports */ - if (is_mdev_switchdev_mode(dev->mdev) && port_num != 0) + if (is_mdev_switchdev_mode(dev->mdev) && dev->is_rep && port_num != 0) goto done; if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) { @@ -418,7 +418,7 @@ static int do_get_hw_stats(struct ib_device *ibdev, */ goto done; } - ret = mlx5_lag_query_cong_counters(dev->mdev, + ret = mlx5_lag_query_cong_counters(mdev, stats->value + cnts->num_q_counters, cnts->num_cong_counters, diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 2479da8620ca..843dcd312242 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -1958,6 +1958,7 @@ subscribe_event_xa_alloc(struct mlx5_devx_event_table *devx_event_table, /* Level1 is valid for future use, no need to free */ return -ENOMEM; + INIT_LIST_HEAD(&obj_event->obj_sub_list); err = xa_insert(&event->object_ids, key_level2, obj_event, @@ -1966,7 +1967,6 @@ subscribe_event_xa_alloc(struct mlx5_devx_event_table *devx_event_table, kfree(obj_event); return err; } - INIT_LIST_HEAD(&obj_event->obj_sub_list); } return 0; @@ -2669,7 +2669,7 @@ static void devx_wait_async_destroy(struct mlx5_async_cmd *cmd) void mlx5_ib_ufile_hw_cleanup(struct ib_uverbs_file *ufile) { - struct mlx5_async_cmd async_cmd[MAX_ASYNC_CMDS]; + struct mlx5_async_cmd *async_cmd; struct ib_ucontext *ucontext = ufile->ucontext; struct ib_device *device = ucontext->device; struct mlx5_ib_dev *dev = to_mdev(device); @@ -2678,6 +2678,10 @@ void mlx5_ib_ufile_hw_cleanup(struct ib_uverbs_file *ufile) int head = 0; int tail = 0; + async_cmd = kcalloc(MAX_ASYNC_CMDS, sizeof(*async_cmd), GFP_KERNEL); + if (!async_cmd) + return; + list_for_each_entry(uobject, &ufile->uobjects, list) { WARN_ON(uverbs_try_lock_object(uobject, UVERBS_LOOKUP_WRITE)); @@ -2713,6 +2717,8 @@ void mlx5_ib_ufile_hw_cleanup(struct ib_uverbs_file *ufile) devx_wait_async_destroy(&async_cmd[head % MAX_ASYNC_CMDS]); head++; } + + kfree(async_cmd); } static ssize_t devx_async_cmd_event_read(struct file *filp, char __user *buf, diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index ce7610740412..df6557ddbdfc 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1791,6 +1791,33 @@ static void deallocate_uars(struct mlx5_ib_dev *dev, context->devx_uid); } +static int mlx5_ib_enable_lb_mp(struct mlx5_core_dev *master, + struct mlx5_core_dev *slave) +{ + int err; + + err = mlx5_nic_vport_update_local_lb(master, true); + if (err) + return err; + + err = mlx5_nic_vport_update_local_lb(slave, true); + if (err) + goto out; + + return 0; + +out: + mlx5_nic_vport_update_local_lb(master, false); + return err; +} + +static void mlx5_ib_disable_lb_mp(struct mlx5_core_dev *master, + struct mlx5_core_dev *slave) +{ + mlx5_nic_vport_update_local_lb(slave, false); + mlx5_nic_vport_update_local_lb(master, false); +} + int mlx5_ib_enable_lb(struct mlx5_ib_dev *dev, bool td, bool qp) { int err = 0; @@ -3495,6 +3522,8 @@ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev, lockdep_assert_held(&mlx5_ib_multiport_mutex); + mlx5_ib_disable_lb_mp(ibdev->mdev, mpi->mdev); + mlx5_core_mp_event_replay(ibdev->mdev, MLX5_DRIVER_EVENT_AFFILIATION_REMOVED, NULL); @@ -3590,6 +3619,10 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev, MLX5_DRIVER_EVENT_AFFILIATION_DONE, &key); + err = mlx5_ib_enable_lb_mp(ibdev->mdev, mpi->mdev); + if (err) + goto unbind; + return true; unbind: diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 57f9bc2a4a3a..bd35e75d9ce5 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -2027,23 +2027,50 @@ void mlx5_ib_revoke_data_direct_mrs(struct mlx5_ib_dev *dev) } } -static int mlx5_revoke_mr(struct mlx5_ib_mr *mr) +static int mlx5_umr_revoke_mr_with_lock(struct mlx5_ib_mr *mr) { - struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); - struct mlx5_cache_ent *ent = mr->mmkey.cache_ent; - bool is_odp = is_odp_mr(mr); bool is_odp_dma_buf = is_dmabuf_mr(mr) && - !to_ib_umem_dmabuf(mr->umem)->pinned; - bool from_cache = !!ent; - int ret = 0; + !to_ib_umem_dmabuf(mr->umem)->pinned; + bool is_odp = is_odp_mr(mr); + int ret; if (is_odp) mutex_lock(&to_ib_umem_odp(mr->umem)->umem_mutex); if (is_odp_dma_buf) - dma_resv_lock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv, NULL); + dma_resv_lock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv, + NULL); + + ret = mlx5r_umr_revoke_mr(mr); + + if (is_odp) { + if (!ret) + to_ib_umem_odp(mr->umem)->private = NULL; + mutex_unlock(&to_ib_umem_odp(mr->umem)->umem_mutex); + } + + if (is_odp_dma_buf) { + if (!ret) + to_ib_umem_dmabuf(mr->umem)->private = NULL; + dma_resv_unlock( + to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv); + } - if (mr->mmkey.cacheable && !mlx5r_umr_revoke_mr(mr) && !cache_ent_find_and_store(dev, mr)) { + return ret; +} + +static int mlx5r_handle_mkey_cleanup(struct mlx5_ib_mr *mr) +{ + bool is_odp_dma_buf = is_dmabuf_mr(mr) && + !to_ib_umem_dmabuf(mr->umem)->pinned; + struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); + struct mlx5_cache_ent *ent = mr->mmkey.cache_ent; + bool is_odp = is_odp_mr(mr); + bool from_cache = !!ent; + int ret; + + if (mr->mmkey.cacheable && !mlx5_umr_revoke_mr_with_lock(mr) && + !cache_ent_find_and_store(dev, mr)) { ent = mr->mmkey.cache_ent; /* upon storing to a clean temp entry - schedule its cleanup */ spin_lock_irq(&ent->mkeys_queue.lock); @@ -2055,7 +2082,7 @@ static int mlx5_revoke_mr(struct mlx5_ib_mr *mr) ent->tmp_cleanup_scheduled = true; } spin_unlock_irq(&ent->mkeys_queue.lock); - goto out; + return 0; } if (ent) { @@ -2064,8 +2091,14 @@ static int mlx5_revoke_mr(struct mlx5_ib_mr *mr) mr->mmkey.cache_ent = NULL; spin_unlock_irq(&ent->mkeys_queue.lock); } + + if (is_odp) + mutex_lock(&to_ib_umem_odp(mr->umem)->umem_mutex); + + if (is_odp_dma_buf) + dma_resv_lock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv, + NULL); ret = destroy_mkey(dev, mr); -out: if (is_odp) { if (!ret) to_ib_umem_odp(mr->umem)->private = NULL; @@ -2075,9 +2108,9 @@ out: if (is_odp_dma_buf) { if (!ret) to_ib_umem_dmabuf(mr->umem)->private = NULL; - dma_resv_unlock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv); + dma_resv_unlock( + to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv); } - return ret; } @@ -2126,7 +2159,7 @@ static int __mlx5_ib_dereg_mr(struct ib_mr *ibmr) } /* Stop DMA */ - rc = mlx5_revoke_mr(mr); + rc = mlx5r_handle_mkey_cleanup(mr); if (rc) return rc; diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index eaa2f9f5f3a9..f6abd64f07f7 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -259,8 +259,8 @@ static void destroy_unused_implicit_child_mr(struct mlx5_ib_mr *mr) } if (MLX5_CAP_ODP(mr_to_mdev(mr)->mdev, mem_page_fault)) - __xa_erase(&mr_to_mdev(mr)->odp_mkeys, - mlx5_base_mkey(mr->mmkey.key)); + xa_erase(&mr_to_mdev(mr)->odp_mkeys, + mlx5_base_mkey(mr->mmkey.key)); xa_unlock(&imr->implicit_children); /* Freeing a MR is a sleeping operation, so bounce to a work queue */ @@ -532,8 +532,8 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, } if (MLX5_CAP_ODP(dev->mdev, mem_page_fault)) { - ret = __xa_store(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key), - &mr->mmkey, GFP_KERNEL); + ret = xa_store(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key), + &mr->mmkey, GFP_KERNEL); if (xa_is_err(ret)) { ret = ERR_PTR(xa_err(ret)); __xa_erase(&imr->implicit_children, idx); diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 1378651735f6..23ed2fc688f0 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -3705,9 +3705,10 @@ static ssize_t add_target_store(struct device *dev, target_host->max_id = 1; target_host->max_lun = -1LL; target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb; - target_host->max_segment_size = ib_dma_max_seg_size(ibdev); - if (!(ibdev->attrs.kernel_cap_flags & IBK_SG_GAPS_REG)) + if (ibdev->attrs.kernel_cap_flags & IBK_SG_GAPS_REG) + target_host->max_segment_size = ib_dma_max_seg_size(ibdev); + else target_host->virt_boundary_mask = ~srp_dev->mr_page_mask; target = host_to_target(target_host); diff --git a/drivers/input/joystick/fsia6b.c b/drivers/input/joystick/fsia6b.c index 76ffdec5c183..7e3bc99d766f 100644 --- a/drivers/input/joystick/fsia6b.c +++ b/drivers/input/joystick/fsia6b.c @@ -149,7 +149,7 @@ static int fsia6b_serio_connect(struct serio *serio, struct serio_driver *drv) } fsia6b->dev = input_dev; - snprintf(fsia6b->phys, sizeof(fsia6b->phys), "%s/input0", serio->phys); + scnprintf(fsia6b->phys, sizeof(fsia6b->phys), "%s/input0", serio->phys); input_dev->name = DRIVER_DESC; input_dev->phys = fsia6b->phys; diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index c066a4da7c14..5d9b7007a730 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -177,6 +177,7 @@ static const struct xpad_device { { 0x05fd, 0x107a, "InterAct 'PowerPad Pro' X-Box pad (Germany)", 0, XTYPE_XBOX }, { 0x05fe, 0x3030, "Chic Controller", 0, XTYPE_XBOX }, { 0x05fe, 0x3031, "Chic Controller", 0, XTYPE_XBOX }, + { 0x0502, 0x1305, "Acer NGR200", 0, XTYPE_XBOX }, { 0x062a, 0x0020, "Logic3 Xbox GamePad", 0, XTYPE_XBOX }, { 0x062a, 0x0033, "Competition Pro Steering Wheel", 0, XTYPE_XBOX }, { 0x06a3, 0x0200, "Saitek Racing Wheel", 0, XTYPE_XBOX }, @@ -524,6 +525,7 @@ static const struct usb_device_id xpad_table[] = { XPAD_XBOX360_VENDOR(0x045e), /* Microsoft Xbox 360 controllers */ XPAD_XBOXONE_VENDOR(0x045e), /* Microsoft Xbox One controllers */ XPAD_XBOX360_VENDOR(0x046d), /* Logitech Xbox 360-style controllers */ + XPAD_XBOX360_VENDOR(0x0502), /* Acer Inc. Xbox 360 style controllers */ XPAD_XBOX360_VENDOR(0x056e), /* Elecom JC-U3613M */ XPAD_XBOX360_VENDOR(0x06a3), /* Saitek P3600 */ XPAD_XBOX360_VENDOR(0x0738), /* Mad Catz Xbox 360 controllers */ @@ -1344,11 +1346,12 @@ static int xpad_try_sending_next_out_packet(struct usb_xpad *xpad) usb_anchor_urb(xpad->irq_out, &xpad->irq_out_anchor); error = usb_submit_urb(xpad->irq_out, GFP_ATOMIC); if (error) { - dev_err(&xpad->intf->dev, - "%s - usb_submit_urb failed with result %d\n", - __func__, error); + if (error != -ENODEV) + dev_err(&xpad->intf->dev, + "%s - usb_submit_urb failed with result %d\n", + __func__, error); usb_unanchor_urb(xpad->irq_out); - return -EIO; + return error; } xpad->irq_out_active = true; diff --git a/drivers/input/keyboard/atkbd.c b/drivers/input/keyboard/atkbd.c index 3ff2fcf05ad5..c9e1127578b9 100644 --- a/drivers/input/keyboard/atkbd.c +++ b/drivers/input/keyboard/atkbd.c @@ -1191,8 +1191,8 @@ static void atkbd_set_device_attrs(struct atkbd *atkbd) "AT %s Set %d keyboard", atkbd->translated ? "Translated" : "Raw", atkbd->set); - snprintf(atkbd->phys, sizeof(atkbd->phys), - "%s/input0", atkbd->ps2dev.serio->phys); + scnprintf(atkbd->phys, sizeof(atkbd->phys), + "%s/input0", atkbd->ps2dev.serio->phys); input_dev->name = atkbd->name; input_dev->phys = atkbd->phys; diff --git a/drivers/input/misc/cs40l50-vibra.c b/drivers/input/misc/cs40l50-vibra.c index dce3b0ec8cf3..330f09123631 100644 --- a/drivers/input/misc/cs40l50-vibra.c +++ b/drivers/input/misc/cs40l50-vibra.c @@ -238,6 +238,8 @@ static int cs40l50_upload_owt(struct cs40l50_work *work_data) header.data_words = len / sizeof(u32); new_owt_effect_data = kmalloc(sizeof(header) + len, GFP_KERNEL); + if (!new_owt_effect_data) + return -ENOMEM; memcpy(new_owt_effect_data, &header, sizeof(header)); memcpy(new_owt_effect_data + sizeof(header), work_data->custom_data, len); diff --git a/drivers/input/misc/gpio-beeper.c b/drivers/input/misc/gpio-beeper.c index d2d2954e2f79..3d65cb4f4ef3 100644 --- a/drivers/input/misc/gpio-beeper.c +++ b/drivers/input/misc/gpio-beeper.c @@ -94,7 +94,7 @@ static int gpio_beeper_probe(struct platform_device *pdev) #ifdef CONFIG_OF static const struct of_device_id gpio_beeper_of_match[] = { - { .compatible = BEEPER_MODNAME, }, + { .compatible = "gpio-beeper", }, { } }; MODULE_DEVICE_TABLE(of, gpio_beeper_of_match); diff --git a/drivers/input/misc/iqs626a.c b/drivers/input/misc/iqs626a.c index 7a6e6927f331..7fba4a8edceb 100644 --- a/drivers/input/misc/iqs626a.c +++ b/drivers/input/misc/iqs626a.c @@ -771,7 +771,7 @@ static int iqs626_parse_trackpad(struct iqs626_private *iqs626, u8 *thresh = &sys_reg->tp_grp_reg.ch_reg_tp[i].thresh; char tc_name[10]; - snprintf(tc_name, sizeof(tc_name), "channel-%d", i); + scnprintf(tc_name, sizeof(tc_name), "channel-%d", i); struct fwnode_handle *tc_node __free(fwnode_handle) = fwnode_get_named_child_node(ch_node, tc_name); diff --git a/drivers/input/misc/iqs7222.c b/drivers/input/misc/iqs7222.c index 80b917944b51..6fac31c0d99f 100644 --- a/drivers/input/misc/iqs7222.c +++ b/drivers/input/misc/iqs7222.c @@ -301,6 +301,7 @@ struct iqs7222_dev_desc { int allow_offset; int event_offset; int comms_offset; + int ext_chan; bool legacy_gesture; struct iqs7222_reg_grp_desc reg_grps[IQS7222_NUM_REG_GRPS]; }; @@ -315,6 +316,7 @@ static const struct iqs7222_dev_desc iqs7222_devs[] = { .allow_offset = 9, .event_offset = 10, .comms_offset = 12, + .ext_chan = 10, .reg_grps = { [IQS7222_REG_GRP_STAT] = { .base = IQS7222_SYS_STATUS, @@ -373,6 +375,7 @@ static const struct iqs7222_dev_desc iqs7222_devs[] = { .allow_offset = 9, .event_offset = 10, .comms_offset = 12, + .ext_chan = 10, .legacy_gesture = true, .reg_grps = { [IQS7222_REG_GRP_STAT] = { @@ -2244,7 +2247,7 @@ static int iqs7222_parse_chan(struct iqs7222_private *iqs7222, const struct iqs7222_dev_desc *dev_desc = iqs7222->dev_desc; struct i2c_client *client = iqs7222->client; int num_chan = dev_desc->reg_grps[IQS7222_REG_GRP_CHAN].num_row; - int ext_chan = rounddown(num_chan, 10); + int ext_chan = dev_desc->ext_chan ? : num_chan; int error, i; u16 *chan_setup = iqs7222->chan_setup[chan_index]; u16 *sys_setup = iqs7222->sys_setup; @@ -2445,7 +2448,7 @@ static int iqs7222_parse_sldr(struct iqs7222_private *iqs7222, const struct iqs7222_dev_desc *dev_desc = iqs7222->dev_desc; struct i2c_client *client = iqs7222->client; int num_chan = dev_desc->reg_grps[IQS7222_REG_GRP_CHAN].num_row; - int ext_chan = rounddown(num_chan, 10); + int ext_chan = dev_desc->ext_chan ? : num_chan; int count, error, reg_offset, i; u16 *event_mask = &iqs7222->sys_setup[dev_desc->event_offset]; u16 *sldr_setup = iqs7222->sldr_setup[sldr_index]; diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c index be734d65ea72..d0cb9fb94821 100644 --- a/drivers/input/mouse/alps.c +++ b/drivers/input/mouse/alps.c @@ -1408,9 +1408,9 @@ static int alps_do_register_bare_ps2_mouse(struct alps_data *priv) return -ENOMEM; } - snprintf(priv->phys3, sizeof(priv->phys3), "%s/%s", - psmouse->ps2dev.serio->phys, - (priv->dev2 ? "input2" : "input1")); + scnprintf(priv->phys3, sizeof(priv->phys3), "%s/%s", + psmouse->ps2dev.serio->phys, + (priv->dev2 ? "input2" : "input1")); dev3->phys = priv->phys3; /* @@ -3103,8 +3103,8 @@ int alps_init(struct psmouse *psmouse) goto init_fail; } - snprintf(priv->phys2, sizeof(priv->phys2), "%s/input1", - psmouse->ps2dev.serio->phys); + scnprintf(priv->phys2, sizeof(priv->phys2), "%s/input1", + psmouse->ps2dev.serio->phys); dev2->phys = priv->phys2; /* diff --git a/drivers/input/mouse/lifebook.c b/drivers/input/mouse/lifebook.c index 7147dacc404f..283ef46f039f 100644 --- a/drivers/input/mouse/lifebook.c +++ b/drivers/input/mouse/lifebook.c @@ -279,8 +279,8 @@ static int lifebook_create_relative_device(struct psmouse *psmouse) goto err_out; priv->dev2 = dev2; - snprintf(priv->phys, sizeof(priv->phys), - "%s/input1", psmouse->ps2dev.serio->phys); + scnprintf(priv->phys, sizeof(priv->phys), + "%s/input1", psmouse->ps2dev.serio->phys); dev2->phys = priv->phys; dev2->name = "LBPS/2 Fujitsu Lifebook Touchpad"; diff --git a/drivers/input/mouse/psmouse-base.c b/drivers/input/mouse/psmouse-base.c index a2c9f7144864..77ea7da3b1c5 100644 --- a/drivers/input/mouse/psmouse-base.c +++ b/drivers/input/mouse/psmouse-base.c @@ -1600,7 +1600,7 @@ static int psmouse_connect(struct serio *serio, struct serio_driver *drv) psmouse_pre_receive_byte, psmouse_receive_byte); INIT_DELAYED_WORK(&psmouse->resync_work, psmouse_resync); psmouse->dev = input_dev; - snprintf(psmouse->phys, sizeof(psmouse->phys), "%s/input0", serio->phys); + scnprintf(psmouse->phys, sizeof(psmouse->phys), "%s/input0", serio->phys); psmouse_set_state(psmouse, PSMOUSE_INITIALIZING); diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig index 91a2b584dab1..196905162945 100644 --- a/drivers/input/touchscreen/Kconfig +++ b/drivers/input/touchscreen/Kconfig @@ -105,7 +105,6 @@ config TOUCHSCREEN_ADC config TOUCHSCREEN_APPLE_Z2 tristate "Apple Z2 touchscreens" - default ARCH_APPLE depends on SPI && (ARCH_APPLE || COMPILE_TEST) help Say Y here if you have an ARM Apple device with diff --git a/drivers/input/touchscreen/melfas_mip4.c b/drivers/input/touchscreen/melfas_mip4.c index a6946e3d8376..869884219908 100644 --- a/drivers/input/touchscreen/melfas_mip4.c +++ b/drivers/input/touchscreen/melfas_mip4.c @@ -1554,7 +1554,7 @@ static DEFINE_SIMPLE_DEV_PM_OPS(mip4_pm_ops, mip4_suspend, mip4_resume); #ifdef CONFIG_OF static const struct of_device_id mip4_of_match[] = { - { .compatible = "melfas,"MIP4_DEVICE_NAME, }, + { .compatible = "melfas,mip4_ts", }, { }, }; MODULE_DEVICE_TABLE(of, mip4_of_match); diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c index fc35cba59145..47692cbfaabd 100644 --- a/drivers/iommu/intel/cache.c +++ b/drivers/iommu/intel/cache.c @@ -40,9 +40,8 @@ static bool cache_tage_match(struct cache_tag *tag, u16 domain_id, } /* Assign a cache tag with specified type to domain. */ -static int cache_tag_assign(struct dmar_domain *domain, u16 did, - struct device *dev, ioasid_t pasid, - enum cache_tag_type type) +int cache_tag_assign(struct dmar_domain *domain, u16 did, struct device *dev, + ioasid_t pasid, enum cache_tag_type type) { struct device_domain_info *info = dev_iommu_priv_get(dev); struct intel_iommu *iommu = info->iommu; diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 7aa3932251b2..148b944143b8 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -3780,8 +3780,17 @@ static void intel_iommu_probe_finalize(struct device *dev) !pci_enable_pasid(to_pci_dev(dev), info->pasid_supported & ~1)) info->pasid_enabled = 1; - if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) + if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) { iommu_enable_pci_ats(info); + /* Assign a DEVTLB cache tag to the default domain. */ + if (info->ats_enabled && info->domain) { + u16 did = domain_id_iommu(info->domain, iommu); + + if (cache_tag_assign(info->domain, did, dev, + IOMMU_NO_PASID, CACHE_TAG_DEVTLB)) + iommu_disable_pci_ats(info); + } + } iommu_enable_pci_pri(info); } diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 3ddbcc603de2..2d1afab5eedc 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -1289,6 +1289,8 @@ struct cache_tag { unsigned int users; }; +int cache_tag_assign(struct dmar_domain *domain, u16 did, struct device *dev, + ioasid_t pasid, enum cache_tag_type type); int cache_tag_assign_domain(struct dmar_domain *domain, struct device *dev, ioasid_t pasid); void cache_tag_unassign_domain(struct dmar_domain *domain, diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c index 22f74ba33a0e..e6bb3c784017 100644 --- a/drivers/iommu/rockchip-iommu.c +++ b/drivers/iommu/rockchip-iommu.c @@ -1157,7 +1157,6 @@ static int rk_iommu_of_xlate(struct device *dev, return -ENOMEM; data->iommu = platform_get_drvdata(iommu_dev); - data->iommu->domain = &rk_identity_domain; dev_iommu_priv_set(dev, data); platform_device_put(iommu_dev); @@ -1195,6 +1194,8 @@ static int rk_iommu_probe(struct platform_device *pdev) if (!iommu) return -ENOMEM; + iommu->domain = &rk_identity_domain; + platform_set_drvdata(pdev, iommu); iommu->dev = dev; iommu->num_mmu = 0; diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index 0d196e447142..c3928ef79344 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -74,6 +74,7 @@ config ARM_VIC_NR config IRQ_MSI_LIB bool + select GENERIC_MSI_IRQ config ARMADA_370_XP_IRQ bool diff --git a/drivers/md/bcache/Kconfig b/drivers/md/bcache/Kconfig index d4697e79d5a3..b2d10063d35f 100644 --- a/drivers/md/bcache/Kconfig +++ b/drivers/md/bcache/Kconfig @@ -5,7 +5,6 @@ config BCACHE select BLOCK_HOLDER_DEPRECATED if SYSFS select CRC64 select CLOSURES - select MIN_HEAP help Allows a block device to be used as cache for other devices; uses a btree for indexing and the layout is optimized for SSDs. diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index 8998e61efa40..48ce750bf70a 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c @@ -164,61 +164,40 @@ static void bch_invalidate_one_bucket(struct cache *ca, struct bucket *b) * prio is worth 1/8th of what INITIAL_PRIO is worth. */ -static inline unsigned int new_bucket_prio(struct cache *ca, struct bucket *b) -{ - unsigned int min_prio = (INITIAL_PRIO - ca->set->min_prio) / 8; - - return (b->prio - ca->set->min_prio + min_prio) * GC_SECTORS_USED(b); -} - -static inline bool new_bucket_max_cmp(const void *l, const void *r, void *args) -{ - struct bucket **lhs = (struct bucket **)l; - struct bucket **rhs = (struct bucket **)r; - struct cache *ca = args; - - return new_bucket_prio(ca, *lhs) > new_bucket_prio(ca, *rhs); -} - -static inline bool new_bucket_min_cmp(const void *l, const void *r, void *args) -{ - struct bucket **lhs = (struct bucket **)l; - struct bucket **rhs = (struct bucket **)r; - struct cache *ca = args; +#define bucket_prio(b) \ +({ \ + unsigned int min_prio = (INITIAL_PRIO - ca->set->min_prio) / 8; \ + \ + (b->prio - ca->set->min_prio + min_prio) * GC_SECTORS_USED(b); \ +}) - return new_bucket_prio(ca, *lhs) < new_bucket_prio(ca, *rhs); -} +#define bucket_max_cmp(l, r) (bucket_prio(l) < bucket_prio(r)) +#define bucket_min_cmp(l, r) (bucket_prio(l) > bucket_prio(r)) static void invalidate_buckets_lru(struct cache *ca) { struct bucket *b; - const struct min_heap_callbacks bucket_max_cmp_callback = { - .less = new_bucket_max_cmp, - .swp = NULL, - }; - const struct min_heap_callbacks bucket_min_cmp_callback = { - .less = new_bucket_min_cmp, - .swp = NULL, - }; + ssize_t i; - ca->heap.nr = 0; + ca->heap.used = 0; for_each_bucket(b, ca) { if (!bch_can_invalidate_bucket(ca, b)) continue; - if (!min_heap_full(&ca->heap)) - min_heap_push(&ca->heap, &b, &bucket_max_cmp_callback, ca); - else if (!new_bucket_max_cmp(&b, min_heap_peek(&ca->heap), ca)) { + if (!heap_full(&ca->heap)) + heap_add(&ca->heap, b, bucket_max_cmp); + else if (bucket_max_cmp(b, heap_peek(&ca->heap))) { ca->heap.data[0] = b; - min_heap_sift_down(&ca->heap, 0, &bucket_max_cmp_callback, ca); + heap_sift(&ca->heap, 0, bucket_max_cmp); } } - min_heapify_all(&ca->heap, &bucket_min_cmp_callback, ca); + for (i = ca->heap.used / 2 - 1; i >= 0; --i) + heap_sift(&ca->heap, i, bucket_min_cmp); while (!fifo_full(&ca->free_inc)) { - if (!ca->heap.nr) { + if (!heap_pop(&ca->heap, b, bucket_min_cmp)) { /* * We don't want to be calling invalidate_buckets() * multiple times when it can't do anything @@ -227,8 +206,6 @@ static void invalidate_buckets_lru(struct cache *ca) wake_up_gc(ca->set); return; } - b = min_heap_peek(&ca->heap)[0]; - min_heap_pop(&ca->heap, &bucket_min_cmp_callback, ca); bch_invalidate_one_bucket(ca, b); } diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 785b0d9008fa..1d33e40d26ea 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -458,7 +458,7 @@ struct cache { /* Allocation stuff: */ struct bucket *buckets; - DEFINE_MIN_HEAP(struct bucket *, cache_heap) heap; + DECLARE_HEAP(struct bucket *, heap); /* * If nonzero, we know we aren't going to find any buckets to invalidate diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c index 68258a16e125..463eb13bd0b2 100644 --- a/drivers/md/bcache/bset.c +++ b/drivers/md/bcache/bset.c @@ -54,11 +54,9 @@ void bch_dump_bucket(struct btree_keys *b) int __bch_count_data(struct btree_keys *b) { unsigned int ret = 0; - struct btree_iter iter; + struct btree_iter_stack iter; struct bkey *k; - min_heap_init(&iter.heap, NULL, MAX_BSETS); - if (b->ops->is_extents) for_each_key(b, k, &iter) ret += KEY_SIZE(k); @@ -69,11 +67,9 @@ void __bch_check_keys(struct btree_keys *b, const char *fmt, ...) { va_list args; struct bkey *k, *p = NULL; - struct btree_iter iter; + struct btree_iter_stack iter; const char *err; - min_heap_init(&iter.heap, NULL, MAX_BSETS); - for_each_key(b, k, &iter) { if (b->ops->is_extents) { err = "Keys out of order"; @@ -114,9 +110,9 @@ bug: static void bch_btree_iter_next_check(struct btree_iter *iter) { - struct bkey *k = iter->heap.data->k, *next = bkey_next(k); + struct bkey *k = iter->data->k, *next = bkey_next(k); - if (next < iter->heap.data->end && + if (next < iter->data->end && bkey_cmp(k, iter->b->ops->is_extents ? &START_KEY(next) : next) > 0) { bch_dump_bucket(iter->b); @@ -883,14 +879,12 @@ unsigned int bch_btree_insert_key(struct btree_keys *b, struct bkey *k, unsigned int status = BTREE_INSERT_STATUS_NO_INSERT; struct bset *i = bset_tree_last(b)->data; struct bkey *m, *prev = NULL; - struct btree_iter iter; + struct btree_iter_stack iter; struct bkey preceding_key_on_stack = ZERO_KEY; struct bkey *preceding_key_p = &preceding_key_on_stack; BUG_ON(b->ops->is_extents && !KEY_SIZE(k)); - min_heap_init(&iter.heap, NULL, MAX_BSETS); - /* * If k has preceding key, preceding_key_p will be set to address * of k's preceding key; otherwise preceding_key_p will be set @@ -901,9 +895,9 @@ unsigned int bch_btree_insert_key(struct btree_keys *b, struct bkey *k, else preceding_key(k, &preceding_key_p); - m = bch_btree_iter_init(b, &iter, preceding_key_p); + m = bch_btree_iter_stack_init(b, &iter, preceding_key_p); - if (b->ops->insert_fixup(b, k, &iter, replace_key)) + if (b->ops->insert_fixup(b, k, &iter.iter, replace_key)) return status; status = BTREE_INSERT_STATUS_INSERT; @@ -1083,94 +1077,79 @@ struct bkey *__bch_bset_search(struct btree_keys *b, struct bset_tree *t, /* Btree iterator */ -typedef bool (new_btree_iter_cmp_fn)(const void *, const void *, void *); +typedef bool (btree_iter_cmp_fn)(struct btree_iter_set, + struct btree_iter_set); -static inline bool new_btree_iter_cmp(const void *l, const void *r, void __always_unused *args) +static inline bool btree_iter_cmp(struct btree_iter_set l, + struct btree_iter_set r) { - const struct btree_iter_set *_l = l; - const struct btree_iter_set *_r = r; - - return bkey_cmp(_l->k, _r->k) <= 0; + return bkey_cmp(l.k, r.k) > 0; } static inline bool btree_iter_end(struct btree_iter *iter) { - return !iter->heap.nr; + return !iter->used; } void bch_btree_iter_push(struct btree_iter *iter, struct bkey *k, struct bkey *end) { - const struct min_heap_callbacks callbacks = { - .less = new_btree_iter_cmp, - .swp = NULL, - }; - if (k != end) - BUG_ON(!min_heap_push(&iter->heap, - &((struct btree_iter_set) { k, end }), - &callbacks, - NULL)); + BUG_ON(!heap_add(iter, + ((struct btree_iter_set) { k, end }), + btree_iter_cmp)); } -static struct bkey *__bch_btree_iter_init(struct btree_keys *b, - struct btree_iter *iter, - struct bkey *search, - struct bset_tree *start) +static struct bkey *__bch_btree_iter_stack_init(struct btree_keys *b, + struct btree_iter_stack *iter, + struct bkey *search, + struct bset_tree *start) { struct bkey *ret = NULL; - iter->heap.size = ARRAY_SIZE(iter->heap.preallocated); - iter->heap.nr = 0; + iter->iter.size = ARRAY_SIZE(iter->stack_data); + iter->iter.used = 0; #ifdef CONFIG_BCACHE_DEBUG - iter->b = b; + iter->iter.b = b; #endif for (; start <= bset_tree_last(b); start++) { ret = bch_bset_search(b, start, search); - bch_btree_iter_push(iter, ret, bset_bkey_last(start->data)); + bch_btree_iter_push(&iter->iter, ret, bset_bkey_last(start->data)); } return ret; } -struct bkey *bch_btree_iter_init(struct btree_keys *b, - struct btree_iter *iter, +struct bkey *bch_btree_iter_stack_init(struct btree_keys *b, + struct btree_iter_stack *iter, struct bkey *search) { - return __bch_btree_iter_init(b, iter, search, b->set); + return __bch_btree_iter_stack_init(b, iter, search, b->set); } static inline struct bkey *__bch_btree_iter_next(struct btree_iter *iter, - new_btree_iter_cmp_fn *cmp) + btree_iter_cmp_fn *cmp) { struct btree_iter_set b __maybe_unused; struct bkey *ret = NULL; - const struct min_heap_callbacks callbacks = { - .less = cmp, - .swp = NULL, - }; if (!btree_iter_end(iter)) { bch_btree_iter_next_check(iter); - ret = iter->heap.data->k; - iter->heap.data->k = bkey_next(iter->heap.data->k); + ret = iter->data->k; + iter->data->k = bkey_next(iter->data->k); - if (iter->heap.data->k > iter->heap.data->end) { + if (iter->data->k > iter->data->end) { WARN_ONCE(1, "bset was corrupt!\n"); - iter->heap.data->k = iter->heap.data->end; + iter->data->k = iter->data->end; } - if (iter->heap.data->k == iter->heap.data->end) { - if (iter->heap.nr) { - b = min_heap_peek(&iter->heap)[0]; - min_heap_pop(&iter->heap, &callbacks, NULL); - } - } + if (iter->data->k == iter->data->end) + heap_pop(iter, b, cmp); else - min_heap_sift_down(&iter->heap, 0, &callbacks, NULL); + heap_sift(iter, 0, cmp); } return ret; @@ -1178,7 +1157,7 @@ static inline struct bkey *__bch_btree_iter_next(struct btree_iter *iter, struct bkey *bch_btree_iter_next(struct btree_iter *iter) { - return __bch_btree_iter_next(iter, new_btree_iter_cmp); + return __bch_btree_iter_next(iter, btree_iter_cmp); } @@ -1216,18 +1195,16 @@ static void btree_mergesort(struct btree_keys *b, struct bset *out, struct btree_iter *iter, bool fixup, bool remove_stale) { + int i; struct bkey *k, *last = NULL; BKEY_PADDED(k) tmp; bool (*bad)(struct btree_keys *, const struct bkey *) = remove_stale ? bch_ptr_bad : bch_ptr_invalid; - const struct min_heap_callbacks callbacks = { - .less = b->ops->sort_cmp, - .swp = NULL, - }; /* Heapify the iterator, using our comparison function */ - min_heapify_all(&iter->heap, &callbacks, NULL); + for (i = iter->used / 2 - 1; i >= 0; --i) + heap_sift(iter, i, b->ops->sort_cmp); while (!btree_iter_end(iter)) { if (b->ops->sort_fixup && fixup) @@ -1316,11 +1293,10 @@ void bch_btree_sort_partial(struct btree_keys *b, unsigned int start, struct bset_sort_state *state) { size_t order = b->page_order, keys = 0; - struct btree_iter iter; + struct btree_iter_stack iter; int oldsize = bch_count_data(b); - min_heap_init(&iter.heap, NULL, MAX_BSETS); - __bch_btree_iter_init(b, &iter, NULL, &b->set[start]); + __bch_btree_iter_stack_init(b, &iter, NULL, &b->set[start]); if (start) { unsigned int i; @@ -1331,7 +1307,7 @@ void bch_btree_sort_partial(struct btree_keys *b, unsigned int start, order = get_order(__set_bytes(b->set->data, keys)); } - __btree_sort(b, &iter, start, order, false, state); + __btree_sort(b, &iter.iter, start, order, false, state); EBUG_ON(oldsize >= 0 && bch_count_data(b) != oldsize); } @@ -1347,13 +1323,11 @@ void bch_btree_sort_into(struct btree_keys *b, struct btree_keys *new, struct bset_sort_state *state) { uint64_t start_time = local_clock(); - struct btree_iter iter; - - min_heap_init(&iter.heap, NULL, MAX_BSETS); + struct btree_iter_stack iter; - bch_btree_iter_init(b, &iter, NULL); + bch_btree_iter_stack_init(b, &iter, NULL); - btree_mergesort(b, new->set->data, &iter, false, true); + btree_mergesort(b, new->set->data, &iter.iter, false, true); bch_time_stats_update(&state->time, start_time); diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h index f79441acd4c1..011f6062c4c0 100644 --- a/drivers/md/bcache/bset.h +++ b/drivers/md/bcache/bset.h @@ -187,9 +187,8 @@ struct bset_tree { }; struct btree_keys_ops { - bool (*sort_cmp)(const void *l, - const void *r, - void *args); + bool (*sort_cmp)(struct btree_iter_set l, + struct btree_iter_set r); struct bkey *(*sort_fixup)(struct btree_iter *iter, struct bkey *tmp); bool (*insert_fixup)(struct btree_keys *b, @@ -313,17 +312,23 @@ enum { BTREE_INSERT_STATUS_FRONT_MERGE, }; -struct btree_iter_set { - struct bkey *k, *end; -}; - /* Btree key iteration */ struct btree_iter { + size_t size, used; #ifdef CONFIG_BCACHE_DEBUG struct btree_keys *b; #endif - MIN_HEAP_PREALLOCATED(struct btree_iter_set, btree_iter_heap, MAX_BSETS) heap; + struct btree_iter_set { + struct bkey *k, *end; + } data[]; +}; + +/* Fixed-size btree_iter that can be allocated on the stack */ + +struct btree_iter_stack { + struct btree_iter iter; + struct btree_iter_set stack_data[MAX_BSETS]; }; typedef bool (*ptr_filter_fn)(struct btree_keys *b, const struct bkey *k); @@ -335,9 +340,9 @@ struct bkey *bch_btree_iter_next_filter(struct btree_iter *iter, void bch_btree_iter_push(struct btree_iter *iter, struct bkey *k, struct bkey *end); -struct bkey *bch_btree_iter_init(struct btree_keys *b, - struct btree_iter *iter, - struct bkey *search); +struct bkey *bch_btree_iter_stack_init(struct btree_keys *b, + struct btree_iter_stack *iter, + struct bkey *search); struct bkey *__bch_bset_search(struct btree_keys *b, struct bset_tree *t, const struct bkey *search); @@ -352,13 +357,14 @@ static inline struct bkey *bch_bset_search(struct btree_keys *b, return search ? __bch_bset_search(b, t, search) : t->data->start; } -#define for_each_key_filter(b, k, iter, filter) \ - for (bch_btree_iter_init((b), (iter), NULL); \ - ((k) = bch_btree_iter_next_filter((iter), (b), filter));) +#define for_each_key_filter(b, k, stack_iter, filter) \ + for (bch_btree_iter_stack_init((b), (stack_iter), NULL); \ + ((k) = bch_btree_iter_next_filter(&((stack_iter)->iter), (b), \ + filter));) -#define for_each_key(b, k, iter) \ - for (bch_btree_iter_init((b), (iter), NULL); \ - ((k) = bch_btree_iter_next(iter));) +#define for_each_key(b, k, stack_iter) \ + for (bch_btree_iter_stack_init((b), (stack_iter), NULL); \ + ((k) = bch_btree_iter_next(&((stack_iter)->iter)));) /* Sorting */ diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 1d0100677357..210b59007d98 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -148,19 +148,19 @@ void bch_btree_node_read_done(struct btree *b) { const char *err = "bad btree header"; struct bset *i = btree_bset_first(b); - struct btree_iter iter; + struct btree_iter *iter; /* * c->fill_iter can allocate an iterator with more memory space * than static MAX_BSETS. * See the comment arount cache_set->fill_iter. */ - iter.heap.data = mempool_alloc(&b->c->fill_iter, GFP_NOIO); - iter.heap.size = b->c->cache->sb.bucket_size / b->c->cache->sb.block_size; - iter.heap.nr = 0; + iter = mempool_alloc(&b->c->fill_iter, GFP_NOIO); + iter->size = b->c->cache->sb.bucket_size / b->c->cache->sb.block_size; + iter->used = 0; #ifdef CONFIG_BCACHE_DEBUG - iter.b = &b->keys; + iter->b = &b->keys; #endif if (!i->seq) @@ -198,7 +198,7 @@ void bch_btree_node_read_done(struct btree *b) if (i != b->keys.set[0].data && !i->keys) goto err; - bch_btree_iter_push(&iter, i->start, bset_bkey_last(i)); + bch_btree_iter_push(iter, i->start, bset_bkey_last(i)); b->written += set_blocks(i, block_bytes(b->c->cache)); } @@ -210,7 +210,7 @@ void bch_btree_node_read_done(struct btree *b) if (i->seq == b->keys.set[0].data->seq) goto err; - bch_btree_sort_and_fix_extents(&b->keys, &iter, &b->c->sort); + bch_btree_sort_and_fix_extents(&b->keys, iter, &b->c->sort); i = b->keys.set[0].data; err = "short btree key"; @@ -222,7 +222,7 @@ void bch_btree_node_read_done(struct btree *b) bch_bset_init_next(&b->keys, write_block(b), bset_magic(&b->c->cache->sb)); out: - mempool_free(iter.heap.data, &b->c->fill_iter); + mempool_free(iter, &b->c->fill_iter); return; err: set_btree_node_io_error(b); @@ -1306,11 +1306,9 @@ static bool btree_gc_mark_node(struct btree *b, struct gc_stat *gc) uint8_t stale = 0; unsigned int keys = 0, good_keys = 0; struct bkey *k; - struct btree_iter iter; + struct btree_iter_stack iter; struct bset_tree *t; - min_heap_init(&iter.heap, NULL, MAX_BSETS); - gc->nodes++; for_each_key_filter(&b->keys, k, &iter, bch_ptr_invalid) { @@ -1569,11 +1567,9 @@ static int btree_gc_rewrite_node(struct btree *b, struct btree_op *op, static unsigned int btree_gc_count_keys(struct btree *b) { struct bkey *k; - struct btree_iter iter; + struct btree_iter_stack iter; unsigned int ret = 0; - min_heap_init(&iter.heap, NULL, MAX_BSETS); - for_each_key_filter(&b->keys, k, &iter, bch_ptr_bad) ret += bkey_u64s(k); @@ -1612,18 +1608,18 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op, int ret = 0; bool should_rewrite; struct bkey *k; - struct btree_iter iter; + struct btree_iter_stack iter; struct gc_merge_info r[GC_MERGE_NODES]; struct gc_merge_info *i, *last = r + ARRAY_SIZE(r) - 1; - min_heap_init(&iter.heap, NULL, MAX_BSETS); - bch_btree_iter_init(&b->keys, &iter, &b->c->gc_done); + bch_btree_iter_stack_init(&b->keys, &iter, &b->c->gc_done); for (i = r; i < r + ARRAY_SIZE(r); i++) i->b = ERR_PTR(-EINTR); while (1) { - k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad); + k = bch_btree_iter_next_filter(&iter.iter, &b->keys, + bch_ptr_bad); if (k) { r->b = bch_btree_node_get(b->c, op, k, b->level - 1, true, b); @@ -1918,9 +1914,7 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op) { int ret = 0; struct bkey *k, *p = NULL; - struct btree_iter iter; - - min_heap_init(&iter.heap, NULL, MAX_BSETS); + struct btree_iter_stack iter; for_each_key_filter(&b->keys, k, &iter, bch_ptr_invalid) bch_initial_mark_key(b->c, b->level, k); @@ -1928,10 +1922,10 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op) bch_initial_mark_key(b->c, b->level + 1, &b->key); if (b->level) { - bch_btree_iter_init(&b->keys, &iter, NULL); + bch_btree_iter_stack_init(&b->keys, &iter, NULL); do { - k = bch_btree_iter_next_filter(&iter, &b->keys, + k = bch_btree_iter_next_filter(&iter.iter, &b->keys, bch_ptr_bad); if (k) { btree_node_prefetch(b, k); @@ -1959,7 +1953,7 @@ static int bch_btree_check_thread(void *arg) struct btree_check_info *info = arg; struct btree_check_state *check_state = info->state; struct cache_set *c = check_state->c; - struct btree_iter iter; + struct btree_iter_stack iter; struct bkey *k, *p; int cur_idx, prev_idx, skip_nr; @@ -1967,11 +1961,9 @@ static int bch_btree_check_thread(void *arg) cur_idx = prev_idx = 0; ret = 0; - min_heap_init(&iter.heap, NULL, MAX_BSETS); - /* root node keys are checked before thread created */ - bch_btree_iter_init(&c->root->keys, &iter, NULL); - k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad); + bch_btree_iter_stack_init(&c->root->keys, &iter, NULL); + k = bch_btree_iter_next_filter(&iter.iter, &c->root->keys, bch_ptr_bad); BUG_ON(!k); p = k; @@ -1989,7 +1981,7 @@ static int bch_btree_check_thread(void *arg) skip_nr = cur_idx - prev_idx; while (skip_nr) { - k = bch_btree_iter_next_filter(&iter, + k = bch_btree_iter_next_filter(&iter.iter, &c->root->keys, bch_ptr_bad); if (k) @@ -2062,11 +2054,9 @@ int bch_btree_check(struct cache_set *c) int ret = 0; int i; struct bkey *k = NULL; - struct btree_iter iter; + struct btree_iter_stack iter; struct btree_check_state check_state; - min_heap_init(&iter.heap, NULL, MAX_BSETS); - /* check and mark root node keys */ for_each_key_filter(&c->root->keys, k, &iter, bch_ptr_invalid) bch_initial_mark_key(c, c->root->level, k); @@ -2560,12 +2550,11 @@ static int bch_btree_map_nodes_recurse(struct btree *b, struct btree_op *op, if (b->level) { struct bkey *k; - struct btree_iter iter; + struct btree_iter_stack iter; - min_heap_init(&iter.heap, NULL, MAX_BSETS); - bch_btree_iter_init(&b->keys, &iter, from); + bch_btree_iter_stack_init(&b->keys, &iter, from); - while ((k = bch_btree_iter_next_filter(&iter, &b->keys, + while ((k = bch_btree_iter_next_filter(&iter.iter, &b->keys, bch_ptr_bad))) { ret = bcache_btree(map_nodes_recurse, k, b, op, from, fn, flags); @@ -2594,12 +2583,12 @@ int bch_btree_map_keys_recurse(struct btree *b, struct btree_op *op, { int ret = MAP_CONTINUE; struct bkey *k; - struct btree_iter iter; + struct btree_iter_stack iter; - min_heap_init(&iter.heap, NULL, MAX_BSETS); - bch_btree_iter_init(&b->keys, &iter, from); + bch_btree_iter_stack_init(&b->keys, &iter, from); - while ((k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad))) { + while ((k = bch_btree_iter_next_filter(&iter.iter, &b->keys, + bch_ptr_bad))) { ret = !b->level ? fn(op, b, k) : bcache_btree(map_keys_recurse, k, diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c index 4b84fda1530a..d626ffcbecb9 100644 --- a/drivers/md/bcache/extents.c +++ b/drivers/md/bcache/extents.c @@ -33,16 +33,15 @@ static void sort_key_next(struct btree_iter *iter, i->k = bkey_next(i->k); if (i->k == i->end) - *i = iter->heap.data[--iter->heap.nr]; + *i = iter->data[--iter->used]; } -static bool new_bch_key_sort_cmp(const void *l, const void *r, void *args) +static bool bch_key_sort_cmp(struct btree_iter_set l, + struct btree_iter_set r) { - struct btree_iter_set *_l = (struct btree_iter_set *)l; - struct btree_iter_set *_r = (struct btree_iter_set *)r; - int64_t c = bkey_cmp(_l->k, _r->k); + int64_t c = bkey_cmp(l.k, r.k); - return !(c ? c > 0 : _l->k < _r->k); + return c ? c > 0 : l.k < r.k; } static bool __ptr_invalid(struct cache_set *c, const struct bkey *k) @@ -239,7 +238,7 @@ static bool bch_btree_ptr_insert_fixup(struct btree_keys *bk, } const struct btree_keys_ops bch_btree_keys_ops = { - .sort_cmp = new_bch_key_sort_cmp, + .sort_cmp = bch_key_sort_cmp, .insert_fixup = bch_btree_ptr_insert_fixup, .key_invalid = bch_btree_ptr_invalid, .key_bad = bch_btree_ptr_bad, @@ -256,28 +255,22 @@ const struct btree_keys_ops bch_btree_keys_ops = { * Necessary for btree_sort_fixup() - if there are multiple keys that compare * equal in different sets, we have to process them newest to oldest. */ - -static bool new_bch_extent_sort_cmp(const void *l, const void *r, void __always_unused *args) +static bool bch_extent_sort_cmp(struct btree_iter_set l, + struct btree_iter_set r) { - struct btree_iter_set *_l = (struct btree_iter_set *)l; - struct btree_iter_set *_r = (struct btree_iter_set *)r; - int64_t c = bkey_cmp(&START_KEY(_l->k), &START_KEY(_r->k)); + int64_t c = bkey_cmp(&START_KEY(l.k), &START_KEY(r.k)); - return !(c ? c > 0 : _l->k < _r->k); + return c ? c > 0 : l.k < r.k; } static struct bkey *bch_extent_sort_fixup(struct btree_iter *iter, struct bkey *tmp) { - const struct min_heap_callbacks callbacks = { - .less = new_bch_extent_sort_cmp, - .swp = NULL, - }; - while (iter->heap.nr > 1) { - struct btree_iter_set *top = iter->heap.data, *i = top + 1; - - if (iter->heap.nr > 2 && - !new_bch_extent_sort_cmp(&i[0], &i[1], NULL)) + while (iter->used > 1) { + struct btree_iter_set *top = iter->data, *i = top + 1; + + if (iter->used > 2 && + bch_extent_sort_cmp(i[0], i[1])) i++; if (bkey_cmp(top->k, &START_KEY(i->k)) <= 0) @@ -285,7 +278,7 @@ static struct bkey *bch_extent_sort_fixup(struct btree_iter *iter, if (!KEY_SIZE(i->k)) { sort_key_next(iter, i); - min_heap_sift_down(&iter->heap, i - top, &callbacks, NULL); + heap_sift(iter, i - top, bch_extent_sort_cmp); continue; } @@ -295,7 +288,7 @@ static struct bkey *bch_extent_sort_fixup(struct btree_iter *iter, else bch_cut_front(top->k, i->k); - min_heap_sift_down(&iter->heap, i - top, &callbacks, NULL); + heap_sift(iter, i - top, bch_extent_sort_cmp); } else { /* can't happen because of comparison func */ BUG_ON(!bkey_cmp(&START_KEY(top->k), &START_KEY(i->k))); @@ -305,7 +298,7 @@ static struct bkey *bch_extent_sort_fixup(struct btree_iter *iter, bch_cut_back(&START_KEY(i->k), tmp); bch_cut_front(i->k, top->k); - min_heap_sift_down(&iter->heap, 0, &callbacks, NULL); + heap_sift(iter, 0, bch_extent_sort_cmp); return tmp; } else { @@ -625,7 +618,7 @@ static bool bch_extent_merge(struct btree_keys *bk, } const struct btree_keys_ops bch_extent_keys_ops = { - .sort_cmp = new_bch_extent_sort_cmp, + .sort_cmp = bch_extent_sort_cmp, .sort_fixup = bch_extent_sort_fixup, .insert_fixup = bch_extent_insert_fixup, .key_invalid = bch_extent_invalid, diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c index 45ca134cbf02..26a6a535ec32 100644 --- a/drivers/md/bcache/movinggc.c +++ b/drivers/md/bcache/movinggc.c @@ -182,19 +182,16 @@ err: if (!IS_ERR_OR_NULL(w->private)) closure_sync(&cl); } -static bool new_bucket_cmp(const void *l, const void *r, void __always_unused *args) +static bool bucket_cmp(struct bucket *l, struct bucket *r) { - struct bucket **_l = (struct bucket **)l; - struct bucket **_r = (struct bucket **)r; - - return GC_SECTORS_USED(*_l) >= GC_SECTORS_USED(*_r); + return GC_SECTORS_USED(l) < GC_SECTORS_USED(r); } static unsigned int bucket_heap_top(struct cache *ca) { struct bucket *b; - return (b = min_heap_peek(&ca->heap)[0]) ? GC_SECTORS_USED(b) : 0; + return (b = heap_peek(&ca->heap)) ? GC_SECTORS_USED(b) : 0; } void bch_moving_gc(struct cache_set *c) @@ -202,10 +199,6 @@ void bch_moving_gc(struct cache_set *c) struct cache *ca = c->cache; struct bucket *b; unsigned long sectors_to_move, reserve_sectors; - const struct min_heap_callbacks callbacks = { - .less = new_bucket_cmp, - .swp = NULL, - }; if (!c->copy_gc_enabled) return; @@ -216,7 +209,7 @@ void bch_moving_gc(struct cache_set *c) reserve_sectors = ca->sb.bucket_size * fifo_used(&ca->free[RESERVE_MOVINGGC]); - ca->heap.nr = 0; + ca->heap.used = 0; for_each_bucket(b, ca) { if (GC_MARK(b) == GC_MARK_METADATA || @@ -225,31 +218,25 @@ void bch_moving_gc(struct cache_set *c) atomic_read(&b->pin)) continue; - if (!min_heap_full(&ca->heap)) { + if (!heap_full(&ca->heap)) { sectors_to_move += GC_SECTORS_USED(b); - min_heap_push(&ca->heap, &b, &callbacks, NULL); - } else if (!new_bucket_cmp(&b, min_heap_peek(&ca->heap), ca)) { + heap_add(&ca->heap, b, bucket_cmp); + } else if (bucket_cmp(b, heap_peek(&ca->heap))) { sectors_to_move -= bucket_heap_top(ca); sectors_to_move += GC_SECTORS_USED(b); ca->heap.data[0] = b; - min_heap_sift_down(&ca->heap, 0, &callbacks, NULL); + heap_sift(&ca->heap, 0, bucket_cmp); } } while (sectors_to_move > reserve_sectors) { - if (ca->heap.nr) { - b = min_heap_peek(&ca->heap)[0]; - min_heap_pop(&ca->heap, &callbacks, NULL); - } + heap_pop(&ca->heap, b, bucket_cmp); sectors_to_move -= GC_SECTORS_USED(b); } - while (ca->heap.nr) { - b = min_heap_peek(&ca->heap)[0]; - min_heap_pop(&ca->heap, &callbacks, NULL); + while (heap_pop(&ca->heap, b, bucket_cmp)) SET_GC_MOVE(b, 1); - } mutex_unlock(&c->bucket_lock); diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 1efb768b2890..2ea490b9d370 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1912,7 +1912,8 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) INIT_LIST_HEAD(&c->btree_cache_freed); INIT_LIST_HEAD(&c->data_buckets); - iter_size = ((meta_bucket_pages(sb) * PAGE_SECTORS) / sb->block_size) * + iter_size = sizeof(struct btree_iter) + + ((meta_bucket_pages(sb) * PAGE_SECTORS) / sb->block_size) * sizeof(struct btree_iter_set); c->devices = kcalloc(c->nr_uuids, sizeof(void *), GFP_KERNEL); diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index e8f696cb58c0..826b14cae4e5 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -660,9 +660,7 @@ static unsigned int bch_root_usage(struct cache_set *c) unsigned int bytes = 0; struct bkey *k; struct btree *b; - struct btree_iter iter; - - min_heap_init(&iter.heap, NULL, MAX_BSETS); + struct btree_iter_stack iter; goto lock_root; diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h index 539454d8e2d0..f61ab1bada6c 100644 --- a/drivers/md/bcache/util.h +++ b/drivers/md/bcache/util.h @@ -9,7 +9,6 @@ #include <linux/kernel.h> #include <linux/sched/clock.h> #include <linux/llist.h> -#include <linux/min_heap.h> #include <linux/ratelimit.h> #include <linux/vmalloc.h> #include <linux/workqueue.h> @@ -31,10 +30,16 @@ struct closure; #endif +#define DECLARE_HEAP(type, name) \ + struct { \ + size_t size, used; \ + type *data; \ + } name + #define init_heap(heap, _size, gfp) \ ({ \ size_t _bytes; \ - (heap)->nr = 0; \ + (heap)->used = 0; \ (heap)->size = (_size); \ _bytes = (heap)->size * sizeof(*(heap)->data); \ (heap)->data = kvmalloc(_bytes, (gfp) & GFP_KERNEL); \ @@ -47,6 +52,64 @@ do { \ (heap)->data = NULL; \ } while (0) +#define heap_swap(h, i, j) swap((h)->data[i], (h)->data[j]) + +#define heap_sift(h, i, cmp) \ +do { \ + size_t _r, _j = i; \ + \ + for (; _j * 2 + 1 < (h)->used; _j = _r) { \ + _r = _j * 2 + 1; \ + if (_r + 1 < (h)->used && \ + cmp((h)->data[_r], (h)->data[_r + 1])) \ + _r++; \ + \ + if (cmp((h)->data[_r], (h)->data[_j])) \ + break; \ + heap_swap(h, _r, _j); \ + } \ +} while (0) + +#define heap_sift_down(h, i, cmp) \ +do { \ + while (i) { \ + size_t p = (i - 1) / 2; \ + if (cmp((h)->data[i], (h)->data[p])) \ + break; \ + heap_swap(h, i, p); \ + i = p; \ + } \ +} while (0) + +#define heap_add(h, d, cmp) \ +({ \ + bool _r = !heap_full(h); \ + if (_r) { \ + size_t _i = (h)->used++; \ + (h)->data[_i] = d; \ + \ + heap_sift_down(h, _i, cmp); \ + heap_sift(h, _i, cmp); \ + } \ + _r; \ +}) + +#define heap_pop(h, d, cmp) \ +({ \ + bool _r = (h)->used; \ + if (_r) { \ + (d) = (h)->data[0]; \ + (h)->used--; \ + heap_swap(h, 0, (h)->used); \ + heap_sift(h, 0, cmp); \ + } \ + _r; \ +}) + +#define heap_peek(h) ((h)->used ? (h)->data[0] : NULL) + +#define heap_full(h) ((h)->used == (h)->size) + #define DECLARE_FIFO(type, name) \ struct { \ size_t front, back, size, mask; \ diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 453efbbdc8ee..302e75f1fc4b 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -908,16 +908,15 @@ static int bch_dirty_init_thread(void *arg) struct dirty_init_thrd_info *info = arg; struct bch_dirty_init_state *state = info->state; struct cache_set *c = state->c; - struct btree_iter iter; + struct btree_iter_stack iter; struct bkey *k, *p; int cur_idx, prev_idx, skip_nr; k = p = NULL; prev_idx = 0; - min_heap_init(&iter.heap, NULL, MAX_BSETS); - bch_btree_iter_init(&c->root->keys, &iter, NULL); - k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad); + bch_btree_iter_stack_init(&c->root->keys, &iter, NULL); + k = bch_btree_iter_next_filter(&iter.iter, &c->root->keys, bch_ptr_bad); BUG_ON(!k); p = k; @@ -931,7 +930,7 @@ static int bch_dirty_init_thread(void *arg) skip_nr = cur_idx - prev_idx; while (skip_nr) { - k = bch_btree_iter_next_filter(&iter, + k = bch_btree_iter_next_filter(&iter.iter, &c->root->keys, bch_ptr_bad); if (k) @@ -980,13 +979,11 @@ void bch_sectors_dirty_init(struct bcache_device *d) int i; struct btree *b = NULL; struct bkey *k = NULL; - struct btree_iter iter; + struct btree_iter_stack iter; struct sectors_dirty_init op; struct cache_set *c = d->c; struct bch_dirty_init_state state; - min_heap_init(&iter.heap, NULL, MAX_BSETS); - retry_lock: b = c->root; rw_lock(0, b, b->level); diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c index bd694910b01b..7f524a26cebc 100644 --- a/drivers/md/md-bitmap.c +++ b/drivers/md/md-bitmap.c @@ -2366,8 +2366,7 @@ static int bitmap_get_stats(void *data, struct md_bitmap_stats *stats) if (!bitmap) return -ENOENT; - if (!bitmap->mddev->bitmap_info.external && - !bitmap->storage.sb_page) + if (!bitmap->storage.sb_page) return -EINVAL; sb = kmap_local_page(bitmap->storage.sb_page); stats->sync_size = le64_to_cpu(sb->sync_size); diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 19c5a0ce5a40..64b8176907a9 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1399,7 +1399,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, } read_bio = bio_alloc_clone(mirror->rdev->bdev, bio, gfp, &mddev->bio_set); - + read_bio->bi_opf &= ~REQ_NOWAIT; r1_bio->bios[rdisk] = read_bio; read_bio->bi_iter.bi_sector = r1_bio->sector + @@ -1649,6 +1649,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, wait_for_serialization(rdev, r1_bio); } + mbio->bi_opf &= ~REQ_NOWAIT; r1_bio->bios[i] = mbio; mbio->bi_iter.bi_sector = (r1_bio->sector + rdev->data_offset); @@ -3428,6 +3429,7 @@ static int raid1_reshape(struct mddev *mddev) /* ok, everything is stopped */ oldpool = conf->r1bio_pool; conf->r1bio_pool = newpool; + init_waitqueue_head(&conf->r1bio_pool.wait); for (d = d2 = 0; d < conf->raid_disks; d++) { struct md_rdev *rdev = conf->mirrors[d].rdev; diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index b74780af4c22..c9bd2005bfd0 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1182,8 +1182,11 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio, } } - if (!regular_request_wait(mddev, conf, bio, r10_bio->sectors)) + if (!regular_request_wait(mddev, conf, bio, r10_bio->sectors)) { + raid_end_bio_io(r10_bio); return; + } + rdev = read_balance(conf, r10_bio, &max_sectors); if (!rdev) { if (err_rdev) { @@ -1221,6 +1224,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio, r10_bio->master_bio = bio; } read_bio = bio_alloc_clone(rdev->bdev, bio, gfp, &mddev->bio_set); + read_bio->bi_opf &= ~REQ_NOWAIT; r10_bio->devs[slot].bio = read_bio; r10_bio->devs[slot].rdev = rdev; @@ -1256,6 +1260,7 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio, conf->mirrors[devnum].rdev; mbio = bio_alloc_clone(rdev->bdev, bio, GFP_NOIO, &mddev->bio_set); + mbio->bi_opf &= ~REQ_NOWAIT; if (replacement) r10_bio->devs[n_copy].repl_bio = mbio; else @@ -1370,8 +1375,11 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio, } sectors = r10_bio->sectors; - if (!regular_request_wait(mddev, conf, bio, sectors)) + if (!regular_request_wait(mddev, conf, bio, sectors)) { + raid_end_bio_io(r10_bio); return; + } + if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && (mddev->reshape_backwards ? (bio->bi_iter.bi_sector < conf->reshape_safe && diff --git a/drivers/mfd/88pm860x-core.c b/drivers/mfd/88pm860x-core.c index 488e346047c1..77230fbe07be 100644 --- a/drivers/mfd/88pm860x-core.c +++ b/drivers/mfd/88pm860x-core.c @@ -573,7 +573,6 @@ static int device_irq_init(struct pm860x_chip *chip, unsigned long flags = IRQF_TRIGGER_FALLING | IRQF_ONESHOT; int data, mask, ret = -EINVAL; int nr_irqs, irq_base = -1; - struct device_node *node = i2c->dev.of_node; mask = PM8607_B0_MISC1_INV_INT | PM8607_B0_MISC1_INT_CLEAR | PM8607_B0_MISC1_INT_MASK; @@ -624,7 +623,7 @@ static int device_irq_init(struct pm860x_chip *chip, ret = -EBUSY; goto out; } - irq_domain_create_legacy(of_fwnode_handle(node), nr_irqs, chip->irq_base, 0, + irq_domain_create_legacy(dev_fwnode(&i2c->dev), nr_irqs, chip->irq_base, 0, &pm860x_irq_domain_ops, chip); chip->core_irq = i2c->irq; if (!chip->core_irq) diff --git a/drivers/mfd/max8925-core.c b/drivers/mfd/max8925-core.c index 78b16c67a5fc..25377dcce60e 100644 --- a/drivers/mfd/max8925-core.c +++ b/drivers/mfd/max8925-core.c @@ -656,7 +656,6 @@ static int max8925_irq_init(struct max8925_chip *chip, int irq, { unsigned long flags = IRQF_TRIGGER_FALLING | IRQF_ONESHOT; int ret; - struct device_node *node = chip->dev->of_node; /* clear all interrupts */ max8925_reg_read(chip->i2c, MAX8925_CHG_IRQ1); @@ -682,8 +681,9 @@ static int max8925_irq_init(struct max8925_chip *chip, int irq, return -EBUSY; } - irq_domain_create_legacy(of_fwnode_handle(node), MAX8925_NR_IRQS, chip->irq_base, 0, - &max8925_irq_domain_ops, chip); + irq_domain_create_legacy(dev_fwnode(chip->dev), MAX8925_NR_IRQS, + chip->irq_base, 0, &max8925_irq_domain_ops, + chip); /* request irq handler for pmic main irq*/ chip->core_irq = irq; diff --git a/drivers/mfd/twl4030-irq.c b/drivers/mfd/twl4030-irq.c index 232c2bfe8c18..d3ab40651307 100644 --- a/drivers/mfd/twl4030-irq.c +++ b/drivers/mfd/twl4030-irq.c @@ -676,7 +676,6 @@ int twl4030_init_irq(struct device *dev, int irq_num) static struct irq_chip twl4030_irq_chip; int status, i; int irq_base, irq_end, nr_irqs; - struct device_node *node = dev->of_node; /* * TWL core and pwr interrupts must be contiguous because @@ -691,7 +690,7 @@ int twl4030_init_irq(struct device *dev, int irq_num) return irq_base; } - irq_domain_create_legacy(of_fwnode_handle(node), nr_irqs, irq_base, 0, + irq_domain_create_legacy(dev_fwnode(dev), nr_irqs, irq_base, 0, &irq_domain_simple_ops, NULL); irq_end = irq_base + TWL4030_CORE_NR_IRQS; diff --git a/drivers/mmc/core/quirks.h b/drivers/mmc/core/quirks.h index 7f893bafaa60..c417ed34c057 100644 --- a/drivers/mmc/core/quirks.h +++ b/drivers/mmc/core/quirks.h @@ -44,6 +44,12 @@ static const struct mmc_fixup __maybe_unused mmc_sd_fixups[] = { 0, -1ull, SDIO_ANY_ID, SDIO_ANY_ID, add_quirk_sd, MMC_QUIRK_NO_UHS_DDR50_TUNING, EXT_CSD_REV_ANY), + /* + * Some SD cards reports discard support while they don't + */ + MMC_FIXUP(CID_NAME_ANY, CID_MANFID_SANDISK_SD, 0x5344, add_quirk_sd, + MMC_QUIRK_BROKEN_SD_DISCARD), + END_FIXUP }; @@ -147,12 +153,6 @@ static const struct mmc_fixup __maybe_unused mmc_blk_fixups[] = { MMC_FIXUP("M62704", CID_MANFID_KINGSTON, 0x0100, add_quirk_mmc, MMC_QUIRK_TRIM_BROKEN), - /* - * Some SD cards reports discard support while they don't - */ - MMC_FIXUP(CID_NAME_ANY, CID_MANFID_SANDISK_SD, 0x5344, add_quirk_sd, - MMC_QUIRK_BROKEN_SD_DISCARD), - END_FIXUP }; diff --git a/drivers/mmc/core/sd_uhs2.c b/drivers/mmc/core/sd_uhs2.c index 1c31d0dfa961..de17d1611290 100644 --- a/drivers/mmc/core/sd_uhs2.c +++ b/drivers/mmc/core/sd_uhs2.c @@ -91,8 +91,8 @@ static int sd_uhs2_phy_init(struct mmc_host *host) err = host->ops->uhs2_control(host, UHS2_PHY_INIT); if (err) { - pr_err("%s: failed to initial phy for UHS-II!\n", - mmc_hostname(host)); + pr_debug("%s: failed to initial phy for UHS-II!\n", + mmc_hostname(host)); } return err; diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c index 31eb90536bce..d7020e06dd55 100644 --- a/drivers/mmc/host/mtk-sd.c +++ b/drivers/mmc/host/mtk-sd.c @@ -846,12 +846,18 @@ static inline void msdc_dma_setup(struct msdc_host *host, struct msdc_dma *dma, static void msdc_prepare_data(struct msdc_host *host, struct mmc_data *data) { if (!(data->host_cookie & MSDC_PREPARE_FLAG)) { - data->host_cookie |= MSDC_PREPARE_FLAG; data->sg_count = dma_map_sg(host->dev, data->sg, data->sg_len, mmc_get_dma_dir(data)); + if (data->sg_count) + data->host_cookie |= MSDC_PREPARE_FLAG; } } +static bool msdc_data_prepared(struct mmc_data *data) +{ + return data->host_cookie & MSDC_PREPARE_FLAG; +} + static void msdc_unprepare_data(struct msdc_host *host, struct mmc_data *data) { if (data->host_cookie & MSDC_ASYNC_FLAG) @@ -1483,8 +1489,19 @@ static void msdc_ops_request(struct mmc_host *mmc, struct mmc_request *mrq) WARN_ON(!host->hsq_en && host->mrq); host->mrq = mrq; - if (mrq->data) + if (mrq->data) { msdc_prepare_data(host, mrq->data); + if (!msdc_data_prepared(mrq->data)) { + host->mrq = NULL; + /* + * Failed to prepare DMA area, fail fast before + * starting any commands. + */ + mrq->cmd->error = -ENOSPC; + mmc_request_done(mmc_from_priv(host), mrq); + return; + } + } /* if SBC is required, we have HW option and SW option. * if HW option is enabled, and SBC does not have "special" flags, diff --git a/drivers/mmc/host/sdhci-of-k1.c b/drivers/mmc/host/sdhci-of-k1.c index 6880d3e9ab62..2e5da7c5834c 100644 --- a/drivers/mmc/host/sdhci-of-k1.c +++ b/drivers/mmc/host/sdhci-of-k1.c @@ -276,7 +276,8 @@ static int spacemit_sdhci_probe(struct platform_device *pdev) host->mmc->caps |= MMC_CAP_NEED_RSP_BUSY; - if (spacemit_sdhci_get_clocks(dev, pltfm_host)) + ret = spacemit_sdhci_get_clocks(dev, pltfm_host); + if (ret) goto err_pltfm; ret = sdhci_add_host(host); diff --git a/drivers/mmc/host/sdhci-uhs2.c b/drivers/mmc/host/sdhci-uhs2.c index c53b64d50c0d..0efeb9d0c376 100644 --- a/drivers/mmc/host/sdhci-uhs2.c +++ b/drivers/mmc/host/sdhci-uhs2.c @@ -99,8 +99,8 @@ void sdhci_uhs2_reset(struct sdhci_host *host, u16 mask) /* hw clears the bit when it's done */ if (read_poll_timeout_atomic(sdhci_readw, val, !(val & mask), 10, UHS2_RESET_TIMEOUT_100MS, true, host, SDHCI_UHS2_SW_RESET)) { - pr_warn("%s: %s: Reset 0x%x never completed. %s: clean reset bit.\n", __func__, - mmc_hostname(host->mmc), (int)mask, mmc_hostname(host->mmc)); + pr_debug("%s: %s: Reset 0x%x never completed. %s: clean reset bit.\n", __func__, + mmc_hostname(host->mmc), (int)mask, mmc_hostname(host->mmc)); sdhci_writeb(host, 0, SDHCI_UHS2_SW_RESET); return; } @@ -335,8 +335,8 @@ static int sdhci_uhs2_interface_detect(struct sdhci_host *host) if (read_poll_timeout(sdhci_readl, val, (val & SDHCI_UHS2_IF_DETECT), 100, UHS2_INTERFACE_DETECT_TIMEOUT_100MS, true, host, SDHCI_PRESENT_STATE)) { - pr_warn("%s: not detect UHS2 interface in 100ms.\n", mmc_hostname(host->mmc)); - sdhci_dumpregs(host); + pr_debug("%s: not detect UHS2 interface in 100ms.\n", mmc_hostname(host->mmc)); + sdhci_dbg_dumpregs(host, "UHS2 interface detect timeout in 100ms"); return -EIO; } @@ -345,8 +345,8 @@ static int sdhci_uhs2_interface_detect(struct sdhci_host *host) if (read_poll_timeout(sdhci_readl, val, (val & SDHCI_UHS2_LANE_SYNC), 100, UHS2_LANE_SYNC_TIMEOUT_150MS, true, host, SDHCI_PRESENT_STATE)) { - pr_warn("%s: UHS2 Lane sync fail in 150ms.\n", mmc_hostname(host->mmc)); - sdhci_dumpregs(host); + pr_debug("%s: UHS2 Lane sync fail in 150ms.\n", mmc_hostname(host->mmc)); + sdhci_dbg_dumpregs(host, "UHS2 Lane sync fail in 150ms"); return -EIO; } @@ -417,12 +417,12 @@ static int sdhci_uhs2_do_detect_init(struct mmc_host *mmc) host->ops->uhs2_pre_detect_init(host); if (sdhci_uhs2_interface_detect(host)) { - pr_warn("%s: cannot detect UHS2 interface.\n", mmc_hostname(host->mmc)); + pr_debug("%s: cannot detect UHS2 interface.\n", mmc_hostname(host->mmc)); return -EIO; } if (sdhci_uhs2_init(host)) { - pr_warn("%s: UHS2 init fail.\n", mmc_hostname(host->mmc)); + pr_debug("%s: UHS2 init fail.\n", mmc_hostname(host->mmc)); return -EIO; } @@ -504,8 +504,8 @@ static int sdhci_uhs2_check_dormant(struct sdhci_host *host) if (read_poll_timeout(sdhci_readl, val, (val & SDHCI_UHS2_IN_DORMANT_STATE), 100, UHS2_CHECK_DORMANT_TIMEOUT_100MS, true, host, SDHCI_PRESENT_STATE)) { - pr_warn("%s: UHS2 IN_DORMANT fail in 100ms.\n", mmc_hostname(host->mmc)); - sdhci_dumpregs(host); + pr_debug("%s: UHS2 IN_DORMANT fail in 100ms.\n", mmc_hostname(host->mmc)); + sdhci_dbg_dumpregs(host, "UHS2 IN_DORMANT fail in 100ms"); return -EIO; } return 0; diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index f008167d1863..e116f2db34d5 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -2065,15 +2065,10 @@ void sdhci_set_clock(struct sdhci_host *host, unsigned int clock) host->mmc->actual_clock = 0; - clk = sdhci_readw(host, SDHCI_CLOCK_CONTROL); - if (clk & SDHCI_CLOCK_CARD_EN) - sdhci_writew(host, clk & ~SDHCI_CLOCK_CARD_EN, - SDHCI_CLOCK_CONTROL); + sdhci_writew(host, 0, SDHCI_CLOCK_CONTROL); - if (clock == 0) { - sdhci_writew(host, 0, SDHCI_CLOCK_CONTROL); + if (clock == 0) return; - } clk = sdhci_calc_clk(host, clock, &host->mmc->actual_clock); sdhci_enable_clk(host, clk); diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h index f9d65dd0f2b2..70ada1857a4c 100644 --- a/drivers/mmc/host/sdhci.h +++ b/drivers/mmc/host/sdhci.h @@ -900,4 +900,20 @@ void sdhci_switch_external_dma(struct sdhci_host *host, bool en); void sdhci_set_data_timeout_irq(struct sdhci_host *host, bool enable); void __sdhci_set_timeout(struct sdhci_host *host, struct mmc_command *cmd); +#if defined(CONFIG_DYNAMIC_DEBUG) || \ + (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE)) +#define SDHCI_DBG_ANYWAY 0 +#elif defined(DEBUG) +#define SDHCI_DBG_ANYWAY 1 +#else +#define SDHCI_DBG_ANYWAY 0 +#endif + +#define sdhci_dbg_dumpregs(host, fmt) \ +do { \ + DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt); \ + if (DYNAMIC_DEBUG_BRANCH(descriptor) || SDHCI_DBG_ANYWAY) \ + sdhci_dumpregs(host); \ +} while (0) + #endif /* __SDHCI_HW_H */ diff --git a/drivers/mtd/nand/qpic_common.c b/drivers/mtd/nand/qpic_common.c index 4dc4d65e7d32..8e604cc22ca3 100644 --- a/drivers/mtd/nand/qpic_common.c +++ b/drivers/mtd/nand/qpic_common.c @@ -57,14 +57,15 @@ qcom_alloc_bam_transaction(struct qcom_nand_controller *nandc) bam_txn_buf += sizeof(*bam_txn); bam_txn->bam_ce = bam_txn_buf; - bam_txn_buf += - sizeof(*bam_txn->bam_ce) * QPIC_PER_CW_CMD_ELEMENTS * num_cw; + bam_txn->bam_ce_nitems = QPIC_PER_CW_CMD_ELEMENTS * num_cw; + bam_txn_buf += sizeof(*bam_txn->bam_ce) * bam_txn->bam_ce_nitems; bam_txn->cmd_sgl = bam_txn_buf; - bam_txn_buf += - sizeof(*bam_txn->cmd_sgl) * QPIC_PER_CW_CMD_SGL * num_cw; + bam_txn->cmd_sgl_nitems = QPIC_PER_CW_CMD_SGL * num_cw; + bam_txn_buf += sizeof(*bam_txn->cmd_sgl) * bam_txn->cmd_sgl_nitems; bam_txn->data_sgl = bam_txn_buf; + bam_txn->data_sgl_nitems = QPIC_PER_CW_DATA_SGL * num_cw; init_completion(&bam_txn->txn_done); @@ -238,6 +239,11 @@ int qcom_prep_bam_dma_desc_cmd(struct qcom_nand_controller *nandc, bool read, struct bam_transaction *bam_txn = nandc->bam_txn; u32 offset; + if (bam_txn->bam_ce_pos + size > bam_txn->bam_ce_nitems) { + dev_err(nandc->dev, "BAM %s array is full\n", "CE"); + return -EINVAL; + } + bam_ce_buffer = &bam_txn->bam_ce[bam_txn->bam_ce_pos]; /* fill the command desc */ @@ -258,6 +264,12 @@ int qcom_prep_bam_dma_desc_cmd(struct qcom_nand_controller *nandc, bool read, /* use the separate sgl after this command */ if (flags & NAND_BAM_NEXT_SGL) { + if (bam_txn->cmd_sgl_pos >= bam_txn->cmd_sgl_nitems) { + dev_err(nandc->dev, "BAM %s array is full\n", + "CMD sgl"); + return -EINVAL; + } + bam_ce_buffer = &bam_txn->bam_ce[bam_txn->bam_ce_start]; bam_ce_size = (bam_txn->bam_ce_pos - bam_txn->bam_ce_start) * @@ -297,10 +309,20 @@ int qcom_prep_bam_dma_desc_data(struct qcom_nand_controller *nandc, bool read, struct bam_transaction *bam_txn = nandc->bam_txn; if (read) { + if (bam_txn->rx_sgl_pos >= bam_txn->data_sgl_nitems) { + dev_err(nandc->dev, "BAM %s array is full\n", "RX sgl"); + return -EINVAL; + } + sg_set_buf(&bam_txn->data_sgl[bam_txn->rx_sgl_pos], vaddr, size); bam_txn->rx_sgl_pos++; } else { + if (bam_txn->tx_sgl_pos >= bam_txn->data_sgl_nitems) { + dev_err(nandc->dev, "BAM %s array is full\n", "TX sgl"); + return -EINVAL; + } + sg_set_buf(&bam_txn->data_sgl[bam_txn->tx_sgl_pos], vaddr, size); bam_txn->tx_sgl_pos++; diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 6c656bfdb323..fe74dbd2c966 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -665,7 +665,7 @@ static int m_can_handle_lost_msg(struct net_device *dev) struct can_frame *frame; u32 timestamp = 0; - netdev_err(dev, "msg lost in rxf0\n"); + netdev_dbg(dev, "msg lost in rxf0\n"); stats->rx_errors++; stats->rx_over_errors++; diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index 06dea3a13e77..9057180051df 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -2984,6 +2984,7 @@ static int airoha_probe(struct platform_device *pdev) error_napi_stop: for (i = 0; i < ARRAY_SIZE(eth->qdma); i++) airoha_qdma_stop_napi(ð->qdma[i]); + airoha_ppe_deinit(eth); error_hw_cleanup: for (i = 0; i < ARRAY_SIZE(eth->qdma); i++) airoha_hw_cleanup(ð->qdma[i]); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-common.h b/drivers/net/ethernet/amd/xgbe/xgbe-common.h index e1296cbf4ff3..9316de4126cf 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h @@ -1269,6 +1269,8 @@ #define MDIO_VEND2_CTRL1_SS13 BIT(13) #endif +#define XGBE_VEND2_MAC_AUTO_SW BIT(9) + /* MDIO mask values */ #define XGBE_AN_CL73_INT_CMPLT BIT(0) #define XGBE_AN_CL73_INC_LINK BIT(1) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c index 71449edbb76d..1a37ec45e650 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c @@ -266,6 +266,10 @@ static void xgbe_an37_set(struct xgbe_prv_data *pdata, bool enable, reg |= MDIO_VEND2_CTRL1_AN_RESTART; XMDIO_WRITE(pdata, MDIO_MMD_VEND2, MDIO_CTRL1, reg); + + reg = XMDIO_READ(pdata, MDIO_MMD_VEND2, MDIO_PCS_DIG_CTRL); + reg |= XGBE_VEND2_MAC_AUTO_SW; + XMDIO_WRITE(pdata, MDIO_MMD_VEND2, MDIO_PCS_DIG_CTRL, reg); } static void xgbe_an37_restart(struct xgbe_prv_data *pdata) @@ -894,6 +898,11 @@ static void xgbe_an37_init(struct xgbe_prv_data *pdata) netif_dbg(pdata, link, pdata->netdev, "CL37 AN (%s) initialized\n", (pdata->an_mode == XGBE_AN_MODE_CL37) ? "BaseX" : "SGMII"); + + reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_CTRL1); + reg &= ~MDIO_AN_CTRL1_ENABLE; + XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_CTRL1, reg); + } static void xgbe_an73_init(struct xgbe_prv_data *pdata) @@ -1295,6 +1304,10 @@ static void xgbe_phy_status(struct xgbe_prv_data *pdata) pdata->phy.link = pdata->phy_if.phy_impl.link_status(pdata, &an_restart); + /* bail out if the link status register read fails */ + if (pdata->phy.link < 0) + return; + if (an_restart) { xgbe_phy_config_aneg(pdata); goto adjust_link; diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c index 7a4dfa4e19c7..23c39e92e783 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c @@ -2746,8 +2746,7 @@ static bool xgbe_phy_valid_speed(struct xgbe_prv_data *pdata, int speed) static int xgbe_phy_link_status(struct xgbe_prv_data *pdata, int *an_restart) { struct xgbe_phy_data *phy_data = pdata->phy_data; - unsigned int reg; - int ret; + int reg, ret; *an_restart = 0; @@ -2781,11 +2780,20 @@ static int xgbe_phy_link_status(struct xgbe_prv_data *pdata, int *an_restart) return 0; } - /* Link status is latched low, so read once to clear - * and then read again to get current state - */ - reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1); reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1); + if (reg < 0) + return reg; + + /* Link status is latched low so that momentary link drops + * can be detected. If link was already down read again + * to get the latest state. + */ + + if (!pdata->phy.link && !(reg & MDIO_STAT1_LSTATUS)) { + reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1); + if (reg < 0) + return reg; + } if (pdata->en_rx_adap) { /* if the link is available and adaptation is done, @@ -2804,9 +2812,7 @@ static int xgbe_phy_link_status(struct xgbe_prv_data *pdata, int *an_restart) xgbe_phy_set_mode(pdata, phy_data->cur_mode); } - /* check again for the link and adaptation status */ - reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1); - if ((reg & MDIO_STAT1_LSTATUS) && pdata->rx_adapt_done) + if (pdata->rx_adapt_done) return 1; } else if (reg & MDIO_STAT1_LSTATUS) return 1; diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h index 6359bb87dc13..057379cd43ba 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe.h @@ -183,12 +183,12 @@ #define XGBE_LINK_TIMEOUT 5 #define XGBE_KR_TRAINING_WAIT_ITER 50 -#define XGBE_SGMII_AN_LINK_STATUS BIT(1) +#define XGBE_SGMII_AN_LINK_DUPLEX BIT(1) #define XGBE_SGMII_AN_LINK_SPEED (BIT(2) | BIT(3)) #define XGBE_SGMII_AN_LINK_SPEED_10 0x00 #define XGBE_SGMII_AN_LINK_SPEED_100 0x04 #define XGBE_SGMII_AN_LINK_SPEED_1000 0x08 -#define XGBE_SGMII_AN_LINK_DUPLEX BIT(4) +#define XGBE_SGMII_AN_LINK_STATUS BIT(4) /* ECC correctable error notification window (seconds) */ #define XGBE_ECC_LIMIT 60 diff --git a/drivers/net/ethernet/atheros/atlx/atl1.c b/drivers/net/ethernet/atheros/atlx/atl1.c index cfdb546a09e7..98a4d089270e 100644 --- a/drivers/net/ethernet/atheros/atlx/atl1.c +++ b/drivers/net/ethernet/atheros/atlx/atl1.c @@ -1861,14 +1861,21 @@ static u16 atl1_alloc_rx_buffers(struct atl1_adapter *adapter) break; } - buffer_info->alloced = 1; - buffer_info->skb = skb; - buffer_info->length = (u16) adapter->rx_buffer_len; page = virt_to_page(skb->data); offset = offset_in_page(skb->data); buffer_info->dma = dma_map_page(&pdev->dev, page, offset, adapter->rx_buffer_len, DMA_FROM_DEVICE); + if (dma_mapping_error(&pdev->dev, buffer_info->dma)) { + kfree_skb(skb); + adapter->soft_stats.rx_dropped++; + break; + } + + buffer_info->alloced = 1; + buffer_info->skb = skb; + buffer_info->length = (u16)adapter->rx_buffer_len; + rfd_desc->buffer_addr = cpu_to_le64(buffer_info->dma); rfd_desc->buf_len = cpu_to_le16(adapter->rx_buffer_len); rfd_desc->coalese = 0; @@ -2183,8 +2190,8 @@ static int atl1_tx_csum(struct atl1_adapter *adapter, struct sk_buff *skb, return 0; } -static void atl1_tx_map(struct atl1_adapter *adapter, struct sk_buff *skb, - struct tx_packet_desc *ptpd) +static bool atl1_tx_map(struct atl1_adapter *adapter, struct sk_buff *skb, + struct tx_packet_desc *ptpd) { struct atl1_tpd_ring *tpd_ring = &adapter->tpd_ring; struct atl1_buffer *buffer_info; @@ -2194,6 +2201,7 @@ static void atl1_tx_map(struct atl1_adapter *adapter, struct sk_buff *skb, unsigned int nr_frags; unsigned int f; int retval; + u16 first_mapped; u16 next_to_use; u16 data_len; u8 hdr_len; @@ -2201,6 +2209,7 @@ static void atl1_tx_map(struct atl1_adapter *adapter, struct sk_buff *skb, buf_len -= skb->data_len; nr_frags = skb_shinfo(skb)->nr_frags; next_to_use = atomic_read(&tpd_ring->next_to_use); + first_mapped = next_to_use; buffer_info = &tpd_ring->buffer_info[next_to_use]; BUG_ON(buffer_info->skb); /* put skb in last TPD */ @@ -2216,6 +2225,8 @@ static void atl1_tx_map(struct atl1_adapter *adapter, struct sk_buff *skb, buffer_info->dma = dma_map_page(&adapter->pdev->dev, page, offset, hdr_len, DMA_TO_DEVICE); + if (dma_mapping_error(&adapter->pdev->dev, buffer_info->dma)) + goto dma_err; if (++next_to_use == tpd_ring->count) next_to_use = 0; @@ -2242,6 +2253,9 @@ static void atl1_tx_map(struct atl1_adapter *adapter, struct sk_buff *skb, page, offset, buffer_info->length, DMA_TO_DEVICE); + if (dma_mapping_error(&adapter->pdev->dev, + buffer_info->dma)) + goto dma_err; if (++next_to_use == tpd_ring->count) next_to_use = 0; } @@ -2254,6 +2268,8 @@ static void atl1_tx_map(struct atl1_adapter *adapter, struct sk_buff *skb, buffer_info->dma = dma_map_page(&adapter->pdev->dev, page, offset, buf_len, DMA_TO_DEVICE); + if (dma_mapping_error(&adapter->pdev->dev, buffer_info->dma)) + goto dma_err; if (++next_to_use == tpd_ring->count) next_to_use = 0; } @@ -2277,6 +2293,9 @@ static void atl1_tx_map(struct atl1_adapter *adapter, struct sk_buff *skb, buffer_info->dma = skb_frag_dma_map(&adapter->pdev->dev, frag, i * ATL1_MAX_TX_BUF_LEN, buffer_info->length, DMA_TO_DEVICE); + if (dma_mapping_error(&adapter->pdev->dev, + buffer_info->dma)) + goto dma_err; if (++next_to_use == tpd_ring->count) next_to_use = 0; @@ -2285,6 +2304,22 @@ static void atl1_tx_map(struct atl1_adapter *adapter, struct sk_buff *skb, /* last tpd's buffer-info */ buffer_info->skb = skb; + + return true; + + dma_err: + while (first_mapped != next_to_use) { + buffer_info = &tpd_ring->buffer_info[first_mapped]; + dma_unmap_page(&adapter->pdev->dev, + buffer_info->dma, + buffer_info->length, + DMA_TO_DEVICE); + buffer_info->dma = 0; + + if (++first_mapped == tpd_ring->count) + first_mapped = 0; + } + return false; } static void atl1_tx_queue(struct atl1_adapter *adapter, u16 count, @@ -2355,10 +2390,8 @@ static netdev_tx_t atl1_xmit_frame(struct sk_buff *skb, len = skb_headlen(skb); - if (unlikely(skb->len <= 0)) { - dev_kfree_skb_any(skb); - return NETDEV_TX_OK; - } + if (unlikely(skb->len <= 0)) + goto drop_packet; nr_frags = skb_shinfo(skb)->nr_frags; for (f = 0; f < nr_frags; f++) { @@ -2371,10 +2404,9 @@ static netdev_tx_t atl1_xmit_frame(struct sk_buff *skb, if (mss) { if (skb->protocol == htons(ETH_P_IP)) { proto_hdr_len = skb_tcp_all_headers(skb); - if (unlikely(proto_hdr_len > len)) { - dev_kfree_skb_any(skb); - return NETDEV_TX_OK; - } + if (unlikely(proto_hdr_len > len)) + goto drop_packet; + /* need additional TPD ? */ if (proto_hdr_len != len) count += (len - proto_hdr_len + @@ -2406,23 +2438,26 @@ static netdev_tx_t atl1_xmit_frame(struct sk_buff *skb, } tso = atl1_tso(adapter, skb, ptpd); - if (tso < 0) { - dev_kfree_skb_any(skb); - return NETDEV_TX_OK; - } + if (tso < 0) + goto drop_packet; if (!tso) { ret_val = atl1_tx_csum(adapter, skb, ptpd); - if (ret_val < 0) { - dev_kfree_skb_any(skb); - return NETDEV_TX_OK; - } + if (ret_val < 0) + goto drop_packet; } - atl1_tx_map(adapter, skb, ptpd); + if (!atl1_tx_map(adapter, skb, ptpd)) + goto drop_packet; + atl1_tx_queue(adapter, count, ptpd); atl1_update_mailbox(adapter); return NETDEV_TX_OK; + +drop_packet: + adapter->soft_stats.tx_errors++; + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; } static int atl1_rings_clean(struct napi_struct *napi, int budget) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 2cb3185c442c..243cb13cb01c 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -2989,6 +2989,7 @@ static int __bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, { struct bnxt_napi *bnapi = cpr->bnapi; u32 raw_cons = cpr->cp_raw_cons; + bool flush_xdp = false; u32 cons; int rx_pkts = 0; u8 event = 0; @@ -3042,6 +3043,8 @@ static int __bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, else rc = bnxt_force_rx_discard(bp, cpr, &raw_cons, &event); + if (event & BNXT_REDIRECT_EVENT) + flush_xdp = true; if (likely(rc >= 0)) rx_pkts += rc; /* Increment rx_pkts when rc is -ENOMEM to count towards @@ -3066,7 +3069,7 @@ static int __bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, } } - if (event & BNXT_REDIRECT_EVENT) { + if (flush_xdp) { xdp_do_flush(); event &= ~BNXT_REDIRECT_EVENT; } @@ -11604,11 +11607,9 @@ static void bnxt_free_irq(struct bnxt *bp) static int bnxt_request_irq(struct bnxt *bp) { + struct cpu_rmap *rmap = NULL; int i, j, rc = 0; unsigned long flags = 0; -#ifdef CONFIG_RFS_ACCEL - struct cpu_rmap *rmap; -#endif rc = bnxt_setup_int_mode(bp); if (rc) { @@ -11629,15 +11630,15 @@ static int bnxt_request_irq(struct bnxt *bp) int map_idx = bnxt_cp_num_to_irq_num(bp, i); struct bnxt_irq *irq = &bp->irq_tbl[map_idx]; -#ifdef CONFIG_RFS_ACCEL - if (rmap && bp->bnapi[i]->rx_ring) { + if (IS_ENABLED(CONFIG_RFS_ACCEL) && + rmap && bp->bnapi[i]->rx_ring) { rc = irq_cpu_rmap_add(rmap, irq->vector); if (rc) netdev_warn(bp->dev, "failed adding irq rmap for ring %d\n", j); j++; } -#endif + rc = request_irq(irq->vector, irq->handler, flags, irq->name, bp->bnapi[i]); if (rc) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c index ce97befd3cb3..67e70d3d0980 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c @@ -368,23 +368,27 @@ static u32 bnxt_get_ctx_coredump(struct bnxt *bp, void *buf, u32 offset, if (!ctxm->mem_valid || !seg_id) continue; - if (trace) + if (trace) { extra_hlen = BNXT_SEG_RCD_LEN; + if (buf) { + u16 trace_type = bnxt_bstore_to_trace[type]; + + bnxt_fill_drv_seg_record(bp, &record, ctxm, + trace_type); + } + } + if (buf) data = buf + BNXT_SEG_HDR_LEN + extra_hlen; + seg_len = bnxt_copy_ctx_mem(bp, ctxm, data, 0) + extra_hlen; if (buf) { bnxt_fill_coredump_seg_hdr(bp, &seg_hdr, NULL, seg_len, 0, 0, 0, comp_id, seg_id); memcpy(buf, &seg_hdr, BNXT_SEG_HDR_LEN); buf += BNXT_SEG_HDR_LEN; - if (trace) { - u16 trace_type = bnxt_bstore_to_trace[type]; - - bnxt_fill_drv_seg_record(bp, &record, ctxm, - trace_type); + if (trace) memcpy(buf, &record, BNXT_SEG_RCD_LEN); - } buf += seg_len; } len += BNXT_SEG_HDR_LEN + seg_len; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c index 0dbb880a7aa0..71e14be2507e 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c @@ -487,7 +487,9 @@ static int bnxt_ets_validate(struct bnxt *bp, struct ieee_ets *ets, u8 *tc) if ((ets->tc_tx_bw[i] || ets->tc_tsa[i]) && i > bp->max_tc) return -EINVAL; + } + for (i = 0; i < max_tc; i++) { switch (ets->tc_tsa[i]) { case IEEE_8021QAZ_TSA_STRICT: break; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index 4a6d8cb9f970..09e7e8efa6fa 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -115,7 +115,7 @@ static void __bnxt_xmit_xdp_redirect(struct bnxt *bp, tx_buf->action = XDP_REDIRECT; tx_buf->xdpf = xdpf; dma_unmap_addr_set(tx_buf, mapping, mapping); - dma_unmap_len_set(tx_buf, len, 0); + dma_unmap_len_set(tx_buf, len, len); } void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int budget) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index fa0077bc67b7..97585c160de3 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -4092,6 +4092,12 @@ static int bcmgenet_probe(struct platform_device *pdev) for (i = 0; i <= priv->hw_params->rx_queues; i++) priv->rx_rings[i].rx_max_coalesced_frames = 1; + /* Initialize u64 stats seq counter for 32bit machines */ + for (i = 0; i <= priv->hw_params->rx_queues; i++) + u64_stats_init(&priv->rx_rings[i].stats64.syncp); + for (i = 0; i <= priv->hw_params->tx_queues; i++) + u64_stats_init(&priv->tx_rings[i].stats64.syncp); + /* libphy will determine the link state */ netif_carrier_off(dev); diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index aebb9fef3f6e..1be2dc40a1a6 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -1578,7 +1578,6 @@ napi_del: static int nicvf_change_mtu(struct net_device *netdev, int new_mtu) { struct nicvf *nic = netdev_priv(netdev); - int orig_mtu = netdev->mtu; /* For now just support only the usual MTU sized frames, * plus some headroom for VLAN, QinQ. @@ -1589,15 +1588,10 @@ static int nicvf_change_mtu(struct net_device *netdev, int new_mtu) return -EINVAL; } - WRITE_ONCE(netdev->mtu, new_mtu); - - if (!netif_running(netdev)) - return 0; - - if (nicvf_update_hw_max_frs(nic, new_mtu)) { - netdev->mtu = orig_mtu; + if (netif_running(netdev) && nicvf_update_hw_max_frs(nic, new_mtu)) return -EINVAL; - } + + WRITE_ONCE(netdev->mtu, new_mtu); return 0; } diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index 773f5ad972a2..6bc8dfdb3d4b 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -1864,10 +1864,10 @@ static int enic_change_mtu(struct net_device *netdev, int new_mtu) if (enic_is_dynamic(enic) || enic_is_sriov_vf(enic)) return -EOPNOTSUPP; - if (netdev->mtu > enic->port_mtu) + if (new_mtu > enic->port_mtu) netdev_warn(netdev, "interface MTU (%d) set higher than port MTU (%d)\n", - netdev->mtu, enic->port_mtu); + new_mtu, enic->port_mtu); return _enic_change_mtu(netdev, new_mtu); } diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index 2ec2c3dab250..b82f121cadad 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -3939,6 +3939,7 @@ static int dpaa2_eth_setup_rx_flow(struct dpaa2_eth_priv *priv, MEM_TYPE_PAGE_ORDER0, NULL); if (err) { dev_err(dev, "xdp_rxq_info_reg_mem_model failed\n"); + xdp_rxq_info_unreg(&fq->channel->xdp_rxq); return err; } @@ -4432,17 +4433,25 @@ static int dpaa2_eth_bind_dpni(struct dpaa2_eth_priv *priv) return -EINVAL; } if (err) - return err; + goto out; } err = dpni_get_qdid(priv->mc_io, 0, priv->mc_token, DPNI_QUEUE_TX, &priv->tx_qdid); if (err) { dev_err(dev, "dpni_get_qdid() failed\n"); - return err; + goto out; } return 0; + +out: + while (i--) { + if (priv->fq[i].type == DPAA2_RX_FQ && + xdp_rxq_info_is_reg(&priv->fq[i].channel->xdp_rxq)) + xdp_rxq_info_unreg(&priv->fq[i].channel->xdp_rxq); + } + return err; } /* Allocate rings for storing incoming frame descriptors */ @@ -4825,6 +4834,17 @@ static void dpaa2_eth_del_ch_napi(struct dpaa2_eth_priv *priv) } } +static void dpaa2_eth_free_rx_xdp_rxq(struct dpaa2_eth_priv *priv) +{ + int i; + + for (i = 0; i < priv->num_fqs; i++) { + if (priv->fq[i].type == DPAA2_RX_FQ && + xdp_rxq_info_is_reg(&priv->fq[i].channel->xdp_rxq)) + xdp_rxq_info_unreg(&priv->fq[i].channel->xdp_rxq); + } +} + static int dpaa2_eth_probe(struct fsl_mc_device *dpni_dev) { struct device *dev; @@ -5028,6 +5048,7 @@ err_alloc_percpu_extras: free_percpu(priv->percpu_stats); err_alloc_percpu_stats: dpaa2_eth_del_ch_napi(priv); + dpaa2_eth_free_rx_xdp_rxq(priv); err_bind: dpaa2_eth_free_dpbps(priv); err_dpbp_setup: @@ -5080,6 +5101,7 @@ static void dpaa2_eth_remove(struct fsl_mc_device *ls_dev) free_percpu(priv->percpu_extras); dpaa2_eth_del_ch_napi(priv); + dpaa2_eth_free_rx_xdp_rxq(priv); dpaa2_eth_free_dpbps(priv); dpaa2_eth_free_dpio(priv); dpaa2_eth_free_dpni(priv); diff --git a/drivers/net/ethernet/freescale/enetc/enetc_hw.h b/drivers/net/ethernet/freescale/enetc/enetc_hw.h index 4098f01479bc..53e8d18c7a34 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_hw.h +++ b/drivers/net/ethernet/freescale/enetc/enetc_hw.h @@ -507,7 +507,7 @@ static inline u64 _enetc_rd_reg64(void __iomem *reg) tmp = ioread32(reg + 4); } while (high != tmp); - return le64_to_cpu((__le64)high << 32 | low); + return (u64)high << 32 | low; } #endif diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index a189038d88df..246ddce753f9 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -211,7 +211,6 @@ struct ibmvnic_statistics { u8 reserved[72]; } __packed __aligned(8); -#define NUM_TX_STATS 3 struct ibmvnic_tx_queue_stats { u64 batched_packets; u64 direct_packets; @@ -219,13 +218,18 @@ struct ibmvnic_tx_queue_stats { u64 dropped_packets; }; -#define NUM_RX_STATS 3 +#define NUM_TX_STATS \ + (sizeof(struct ibmvnic_tx_queue_stats) / sizeof(u64)) + struct ibmvnic_rx_queue_stats { u64 packets; u64 bytes; u64 interrupts; }; +#define NUM_RX_STATS \ + (sizeof(struct ibmvnic_rx_queue_stats) / sizeof(u64)) + struct ibmvnic_acl_buffer { __be32 len; __be32 version; diff --git a/drivers/net/ethernet/intel/idpf/idpf_controlq.c b/drivers/net/ethernet/intel/idpf/idpf_controlq.c index b28991dd1870..48b8e184f3db 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_controlq.c +++ b/drivers/net/ethernet/intel/idpf/idpf_controlq.c @@ -96,7 +96,7 @@ static void idpf_ctlq_init_rxq_bufs(struct idpf_ctlq_info *cq) */ static void idpf_ctlq_shutdown(struct idpf_hw *hw, struct idpf_ctlq_info *cq) { - mutex_lock(&cq->cq_lock); + spin_lock(&cq->cq_lock); /* free ring buffers and the ring itself */ idpf_ctlq_dealloc_ring_res(hw, cq); @@ -104,8 +104,7 @@ static void idpf_ctlq_shutdown(struct idpf_hw *hw, struct idpf_ctlq_info *cq) /* Set ring_size to 0 to indicate uninitialized queue */ cq->ring_size = 0; - mutex_unlock(&cq->cq_lock); - mutex_destroy(&cq->cq_lock); + spin_unlock(&cq->cq_lock); } /** @@ -173,7 +172,7 @@ int idpf_ctlq_add(struct idpf_hw *hw, idpf_ctlq_init_regs(hw, cq, is_rxq); - mutex_init(&cq->cq_lock); + spin_lock_init(&cq->cq_lock); list_add(&cq->cq_list, &hw->cq_list_head); @@ -272,7 +271,7 @@ int idpf_ctlq_send(struct idpf_hw *hw, struct idpf_ctlq_info *cq, int err = 0; int i; - mutex_lock(&cq->cq_lock); + spin_lock(&cq->cq_lock); /* Ensure there are enough descriptors to send all messages */ num_desc_avail = IDPF_CTLQ_DESC_UNUSED(cq); @@ -332,7 +331,7 @@ int idpf_ctlq_send(struct idpf_hw *hw, struct idpf_ctlq_info *cq, wr32(hw, cq->reg.tail, cq->next_to_use); err_unlock: - mutex_unlock(&cq->cq_lock); + spin_unlock(&cq->cq_lock); return err; } @@ -364,7 +363,7 @@ int idpf_ctlq_clean_sq(struct idpf_ctlq_info *cq, u16 *clean_count, if (*clean_count > cq->ring_size) return -EBADR; - mutex_lock(&cq->cq_lock); + spin_lock(&cq->cq_lock); ntc = cq->next_to_clean; @@ -397,7 +396,7 @@ int idpf_ctlq_clean_sq(struct idpf_ctlq_info *cq, u16 *clean_count, cq->next_to_clean = ntc; - mutex_unlock(&cq->cq_lock); + spin_unlock(&cq->cq_lock); /* Return number of descriptors actually cleaned */ *clean_count = i; @@ -435,7 +434,7 @@ int idpf_ctlq_post_rx_buffs(struct idpf_hw *hw, struct idpf_ctlq_info *cq, if (*buff_count > 0) buffs_avail = true; - mutex_lock(&cq->cq_lock); + spin_lock(&cq->cq_lock); if (tbp >= cq->ring_size) tbp = 0; @@ -524,7 +523,7 @@ post_buffs_out: wr32(hw, cq->reg.tail, cq->next_to_post); } - mutex_unlock(&cq->cq_lock); + spin_unlock(&cq->cq_lock); /* return the number of buffers that were not posted */ *buff_count = *buff_count - i; @@ -552,7 +551,7 @@ int idpf_ctlq_recv(struct idpf_ctlq_info *cq, u16 *num_q_msg, u16 i; /* take the lock before we start messing with the ring */ - mutex_lock(&cq->cq_lock); + spin_lock(&cq->cq_lock); ntc = cq->next_to_clean; @@ -614,7 +613,7 @@ int idpf_ctlq_recv(struct idpf_ctlq_info *cq, u16 *num_q_msg, cq->next_to_clean = ntc; - mutex_unlock(&cq->cq_lock); + spin_unlock(&cq->cq_lock); *num_q_msg = i; if (*num_q_msg == 0) diff --git a/drivers/net/ethernet/intel/idpf/idpf_controlq_api.h b/drivers/net/ethernet/intel/idpf/idpf_controlq_api.h index 9642494a67d8..3414c5f9a831 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_controlq_api.h +++ b/drivers/net/ethernet/intel/idpf/idpf_controlq_api.h @@ -99,7 +99,7 @@ struct idpf_ctlq_info { enum idpf_ctlq_type cq_type; int q_id; - struct mutex cq_lock; /* control queue lock */ + spinlock_t cq_lock; /* control queue lock */ /* used for interrupt processing */ u16 next_to_use; u16 next_to_clean; diff --git a/drivers/net/ethernet/intel/idpf/idpf_ethtool.c b/drivers/net/ethernet/intel/idpf/idpf_ethtool.c index 9bdb309b668e..eaf7a2606faa 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_ethtool.c +++ b/drivers/net/ethernet/intel/idpf/idpf_ethtool.c @@ -47,7 +47,7 @@ static u32 idpf_get_rxfh_key_size(struct net_device *netdev) struct idpf_vport_user_config_data *user_config; if (!idpf_is_cap_ena_all(np->adapter, IDPF_RSS_CAPS, IDPF_CAP_RSS)) - return -EOPNOTSUPP; + return 0; user_config = &np->adapter->vport_config[np->vport_idx]->user_config; @@ -66,7 +66,7 @@ static u32 idpf_get_rxfh_indir_size(struct net_device *netdev) struct idpf_vport_user_config_data *user_config; if (!idpf_is_cap_ena_all(np->adapter, IDPF_RSS_CAPS, IDPF_CAP_RSS)) - return -EOPNOTSUPP; + return 0; user_config = &np->adapter->vport_config[np->vport_idx]->user_config; diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c index 4eb20ec2accb..80382ff4a5fa 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c @@ -2314,8 +2314,12 @@ void *idpf_alloc_dma_mem(struct idpf_hw *hw, struct idpf_dma_mem *mem, u64 size) struct idpf_adapter *adapter = hw->back; size_t sz = ALIGN(size, 4096); - mem->va = dma_alloc_coherent(&adapter->pdev->dev, sz, - &mem->pa, GFP_KERNEL); + /* The control queue resources are freed under a spinlock, contiguous + * pages will avoid IOMMU remapping and the use vmap (and vunmap in + * dma_free_*() path. + */ + mem->va = dma_alloc_attrs(&adapter->pdev->dev, sz, &mem->pa, + GFP_KERNEL, DMA_ATTR_FORCE_CONTIGUOUS); mem->size = sz; return mem->va; @@ -2330,8 +2334,8 @@ void idpf_free_dma_mem(struct idpf_hw *hw, struct idpf_dma_mem *mem) { struct idpf_adapter *adapter = hw->back; - dma_free_coherent(&adapter->pdev->dev, mem->size, - mem->va, mem->pa); + dma_free_attrs(&adapter->pdev->dev, mem->size, + mem->va, mem->pa, DMA_ATTR_FORCE_CONTIGUOUS); mem->size = 0; mem->va = NULL; mem->pa = 0; diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 686793c539f2..031c332f66c4 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -7115,6 +7115,10 @@ static int igc_probe(struct pci_dev *pdev, adapter->port_num = hw->bus.func; adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE); + /* Disable ASPM L1.2 on I226 devices to avoid packet loss */ + if (igc_is_device_id_i226(hw)) + pci_disable_link_state(pdev, PCIE_LINK_STATE_L1_2); + err = pci_save_state(pdev); if (err) goto err_ioremap; @@ -7500,6 +7504,9 @@ static int __igc_resume(struct device *dev, bool rpm) pci_enable_wake(pdev, PCI_D3hot, 0); pci_enable_wake(pdev, PCI_D3cold, 0); + if (igc_is_device_id_i226(hw)) + pci_disable_link_state(pdev, PCIE_LINK_STATE_L1_2); + if (igc_init_interrupt_scheme(adapter, true)) { netdev_err(netdev, "Unable to allocate memory for queues\n"); return -ENOMEM; @@ -7625,6 +7632,9 @@ static pci_ers_result_t igc_io_slot_reset(struct pci_dev *pdev) pci_enable_wake(pdev, PCI_D3hot, 0); pci_enable_wake(pdev, PCI_D3cold, 0); + if (igc_is_device_id_i226(hw)) + pci_disable_link_state_locked(pdev, PCIE_LINK_STATE_L1_2); + /* In case of PCI error, adapter loses its HW address * so we should re-assign it here. */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h index b5c3a2a9d2a5..9560fcba643f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h @@ -18,7 +18,8 @@ enum { enum { MLX5E_TC_PRIO = 0, - MLX5E_NIC_PRIO + MLX5E_PROMISC_PRIO, + MLX5E_NIC_PRIO, }; struct mlx5e_flow_table { @@ -68,9 +69,13 @@ struct mlx5e_l2_table { MLX5_HASH_FIELD_SEL_DST_IP |\ MLX5_HASH_FIELD_SEL_IPSEC_SPI) -/* NIC prio FTS */ +/* NIC promisc FT level */ enum { MLX5E_PROMISC_FT_LEVEL, +}; + +/* NIC prio FTS */ +enum { MLX5E_VLAN_FT_LEVEL, MLX5E_L2_FT_LEVEL, MLX5E_TTC_FT_LEVEL, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c index 298bb74ec5e9..d1d629697e28 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c @@ -113,7 +113,7 @@ int mlx5e_dim_rx_change(struct mlx5e_rq *rq, bool enable) __set_bit(MLX5E_RQ_STATE_DIM, &rq->state); } else { __clear_bit(MLX5E_RQ_STATE_DIM, &rq->state); - + synchronize_net(); mlx5e_dim_disable(rq->dim); rq->dim = NULL; } @@ -140,7 +140,7 @@ int mlx5e_dim_tx_change(struct mlx5e_txqsq *sq, bool enable) __set_bit(MLX5E_SQ_STATE_DIM, &sq->state); } else { __clear_bit(MLX5E_SQ_STATE_DIM, &sq->state); - + synchronize_net(); mlx5e_dim_disable(sq->dim); sq->dim = NULL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index 04a969128161..265c4ca85f7d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -780,7 +780,7 @@ static int mlx5e_create_promisc_table(struct mlx5e_flow_steering *fs) ft_attr.max_fte = MLX5E_PROMISC_TABLE_SIZE; ft_attr.autogroup.max_num_groups = 1; ft_attr.level = MLX5E_PROMISC_FT_LEVEL; - ft_attr.prio = MLX5E_NIC_PRIO; + ft_attr.prio = MLX5E_PROMISC_PRIO; ft->t = mlx5_create_auto_grouped_flow_table(fs->ns, &ft_attr); if (IS_ERR(ft->t)) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index b6ae384396b3..ad9f6fca9b6a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -1076,6 +1076,7 @@ static int esw_qos_vports_node_update_parent(struct mlx5_esw_sched_node *node, return err; } esw_qos_node_set_parent(node, parent); + node->bw_share = 0; return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index a8046200d376..3dd9a6f40709 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -113,13 +113,16 @@ #define ETHTOOL_PRIO_NUM_LEVELS 1 #define ETHTOOL_NUM_PRIOS 11 #define ETHTOOL_MIN_LEVEL (KERNEL_MIN_LEVEL + ETHTOOL_NUM_PRIOS) -/* Promiscuous, Vlan, mac, ttc, inner ttc, {UDP/ANY/aRFS/accel/{esp, esp_err}}, IPsec policy, +/* Vlan, mac, ttc, inner ttc, {UDP/ANY/aRFS/accel/{esp, esp_err}}, IPsec policy, * {IPsec RoCE MPV,Alias table},IPsec RoCE policy */ -#define KERNEL_NIC_PRIO_NUM_LEVELS 11 +#define KERNEL_NIC_PRIO_NUM_LEVELS 10 #define KERNEL_NIC_NUM_PRIOS 1 -/* One more level for tc */ -#define KERNEL_MIN_LEVEL (KERNEL_NIC_PRIO_NUM_LEVELS + 1) +/* One more level for tc, and one more for promisc */ +#define KERNEL_MIN_LEVEL (KERNEL_NIC_PRIO_NUM_LEVELS + 2) + +#define KERNEL_NIC_PROMISC_NUM_PRIOS 1 +#define KERNEL_NIC_PROMISC_NUM_LEVELS 1 #define KERNEL_NIC_TC_NUM_PRIOS 1 #define KERNEL_NIC_TC_NUM_LEVELS 3 @@ -187,6 +190,8 @@ static struct init_tree_node { ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, ADD_MULTIPLE_PRIO(KERNEL_NIC_TC_NUM_PRIOS, KERNEL_NIC_TC_NUM_LEVELS), + ADD_MULTIPLE_PRIO(KERNEL_NIC_PROMISC_NUM_PRIOS, + KERNEL_NIC_PROMISC_NUM_LEVELS), ADD_MULTIPLE_PRIO(KERNEL_NIC_NUM_PRIOS, KERNEL_NIC_PRIO_NUM_LEVELS))), ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0, FS_CHAINING_CAPS, diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c index 3504507477c6..52cf7112762c 100644 --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c @@ -31,6 +31,9 @@ static void mana_gd_init_pf_regs(struct pci_dev *pdev) gc->db_page_base = gc->bar0_va + mana_gd_r64(gc, GDMA_PF_REG_DB_PAGE_OFF); + gc->phys_db_page_base = gc->bar0_pa + + mana_gd_r64(gc, GDMA_PF_REG_DB_PAGE_OFF); + sriov_base_off = mana_gd_r64(gc, GDMA_SRIOV_REG_CFG_BASE_OFF); sriov_base_va = gc->bar0_va + sriov_base_off; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c index 2ac59564ded1..d10b58ebf603 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c @@ -321,7 +321,7 @@ static int ionic_xdp_post_frame(struct ionic_queue *q, struct xdp_frame *frame, len, DMA_TO_DEVICE); } else /* XDP_REDIRECT */ { dma_addr = ionic_tx_map_single(q, frame->data, len); - if (!dma_addr) + if (dma_addr == DMA_MAPPING_ERROR) return -EIO; } @@ -357,7 +357,7 @@ static int ionic_xdp_post_frame(struct ionic_queue *q, struct xdp_frame *frame, } else { dma_addr = ionic_tx_map_frag(q, frag, 0, skb_frag_size(frag)); - if (dma_mapping_error(q->dev, dma_addr)) { + if (dma_addr == DMA_MAPPING_ERROR) { ionic_tx_desc_unmap_bufs(q, desc_info); return -EIO; } @@ -1083,7 +1083,7 @@ static dma_addr_t ionic_tx_map_single(struct ionic_queue *q, net_warn_ratelimited("%s: DMA single map failed on %s!\n", dev_name(dev), q->name); q_to_tx_stats(q)->dma_map_err++; - return 0; + return DMA_MAPPING_ERROR; } return dma_addr; } @@ -1100,7 +1100,7 @@ static dma_addr_t ionic_tx_map_frag(struct ionic_queue *q, net_warn_ratelimited("%s: DMA frag map failed on %s!\n", dev_name(dev), q->name); q_to_tx_stats(q)->dma_map_err++; - return 0; + return DMA_MAPPING_ERROR; } return dma_addr; } @@ -1116,7 +1116,7 @@ static int ionic_tx_map_skb(struct ionic_queue *q, struct sk_buff *skb, int frag_idx; dma_addr = ionic_tx_map_single(q, skb->data, skb_headlen(skb)); - if (!dma_addr) + if (dma_addr == DMA_MAPPING_ERROR) return -EIO; buf_info->dma_addr = dma_addr; buf_info->len = skb_headlen(skb); @@ -1126,7 +1126,7 @@ static int ionic_tx_map_skb(struct ionic_queue *q, struct sk_buff *skb, nfrags = skb_shinfo(skb)->nr_frags; for (frag_idx = 0; frag_idx < nfrags; frag_idx++, frag++) { dma_addr = ionic_tx_map_frag(q, frag, 0, skb_frag_size(frag)); - if (!dma_addr) + if (dma_addr == DMA_MAPPING_ERROR) goto dma_fail; buf_info->dma_addr = dma_addr; buf_info->len = skb_frag_size(frag); diff --git a/drivers/net/ethernet/qlogic/qed/qed_mng_tlv.c b/drivers/net/ethernet/qlogic/qed/qed_mng_tlv.c index f55eed092f25..7d78f072b0a1 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mng_tlv.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mng_tlv.c @@ -242,7 +242,7 @@ static int qed_mfw_get_tlv_group(u8 tlv_type, u8 *tlv_group) } /* Returns size of the data buffer or, -1 in case TLV data is not available. */ -static int +static noinline_for_stack int qed_mfw_get_gen_tlv_value(struct qed_drv_tlv_hdr *p_tlv, struct qed_mfw_tlv_generic *p_drv_buf, struct qed_tlv_parsed_buf *p_buf) @@ -304,7 +304,7 @@ qed_mfw_get_gen_tlv_value(struct qed_drv_tlv_hdr *p_tlv, return -1; } -static int +static noinline_for_stack int qed_mfw_get_eth_tlv_value(struct qed_drv_tlv_hdr *p_tlv, struct qed_mfw_tlv_eth *p_drv_buf, struct qed_tlv_parsed_buf *p_buf) @@ -438,7 +438,7 @@ qed_mfw_get_tlv_time_value(struct qed_mfw_tlv_time *p_time, return QED_MFW_TLV_TIME_SIZE; } -static int +static noinline_for_stack int qed_mfw_get_fcoe_tlv_value(struct qed_drv_tlv_hdr *p_tlv, struct qed_mfw_tlv_fcoe *p_drv_buf, struct qed_tlv_parsed_buf *p_buf) @@ -1073,7 +1073,7 @@ qed_mfw_get_fcoe_tlv_value(struct qed_drv_tlv_hdr *p_tlv, return -1; } -static int +static noinline_for_stack int qed_mfw_get_iscsi_tlv_value(struct qed_drv_tlv_hdr *p_tlv, struct qed_mfw_tlv_iscsi *p_drv_buf, struct qed_tlv_parsed_buf *p_buf) diff --git a/drivers/net/ethernet/renesas/rtsn.c b/drivers/net/ethernet/renesas/rtsn.c index 6b3f7fca8d15..05c4b6c8c9c3 100644 --- a/drivers/net/ethernet/renesas/rtsn.c +++ b/drivers/net/ethernet/renesas/rtsn.c @@ -1259,7 +1259,12 @@ static int rtsn_probe(struct platform_device *pdev) priv = netdev_priv(ndev); priv->pdev = pdev; priv->ndev = ndev; + priv->ptp_priv = rcar_gen4_ptp_alloc(pdev); + if (!priv->ptp_priv) { + ret = -ENOMEM; + goto error_free; + } spin_lock_init(&priv->lock); platform_set_drvdata(pdev, priv); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c index 7840bc403788..5dcc95bc0ad2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c @@ -364,19 +364,17 @@ static int dwxgmac2_dma_interrupt(struct stmmac_priv *priv, } /* TX/RX NORMAL interrupts */ - if (likely(intr_status & XGMAC_NIS)) { - if (likely(intr_status & XGMAC_RI)) { - u64_stats_update_begin(&stats->syncp); - u64_stats_inc(&stats->rx_normal_irq_n[chan]); - u64_stats_update_end(&stats->syncp); - ret |= handle_rx; - } - if (likely(intr_status & (XGMAC_TI | XGMAC_TBU))) { - u64_stats_update_begin(&stats->syncp); - u64_stats_inc(&stats->tx_normal_irq_n[chan]); - u64_stats_update_end(&stats->syncp); - ret |= handle_tx; - } + if (likely(intr_status & XGMAC_RI)) { + u64_stats_update_begin(&stats->syncp); + u64_stats_inc(&stats->rx_normal_irq_n[chan]); + u64_stats_update_end(&stats->syncp); + ret |= handle_rx; + } + if (likely(intr_status & (XGMAC_TI | XGMAC_TBU))) { + u64_stats_update_begin(&stats->syncp); + u64_stats_inc(&stats->tx_normal_irq_n[chan]); + u64_stats_update_end(&stats->syncp); + ret |= handle_tx; } /* Clear interrupts */ diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index ddca8fc7883e..26119d02a94d 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -3336,7 +3336,7 @@ static int niu_rbr_add_page(struct niu *np, struct rx_ring_info *rp, addr = np->ops->map_page(np->device, page, 0, PAGE_SIZE, DMA_FROM_DEVICE); - if (!addr) { + if (np->ops->mapping_error(np->device, addr)) { __free_page(page); return -ENOMEM; } @@ -6676,6 +6676,8 @@ static netdev_tx_t niu_start_xmit(struct sk_buff *skb, len = skb_headlen(skb); mapping = np->ops->map_single(np->device, skb->data, len, DMA_TO_DEVICE); + if (np->ops->mapping_error(np->device, mapping)) + goto out_drop; prod = rp->prod; @@ -6717,6 +6719,8 @@ static netdev_tx_t niu_start_xmit(struct sk_buff *skb, mapping = np->ops->map_page(np->device, skb_frag_page(frag), skb_frag_off(frag), len, DMA_TO_DEVICE); + if (np->ops->mapping_error(np->device, mapping)) + goto out_unmap; rp->tx_buffs[prod].skb = NULL; rp->tx_buffs[prod].mapping = mapping; @@ -6741,6 +6745,19 @@ static netdev_tx_t niu_start_xmit(struct sk_buff *skb, out: return NETDEV_TX_OK; +out_unmap: + while (i--) { + const skb_frag_t *frag; + + prod = PREVIOUS_TX(rp, prod); + frag = &skb_shinfo(skb)->frags[i]; + np->ops->unmap_page(np->device, rp->tx_buffs[prod].mapping, + skb_frag_size(frag), DMA_TO_DEVICE); + } + + np->ops->unmap_single(np->device, rp->tx_buffs[rp->prod].mapping, + skb_headlen(skb), DMA_TO_DEVICE); + out_drop: rp->tx_errors++; kfree_skb(skb); @@ -9644,6 +9661,11 @@ static void niu_pci_unmap_single(struct device *dev, u64 dma_address, dma_unmap_single(dev, dma_address, size, direction); } +static int niu_pci_mapping_error(struct device *dev, u64 addr) +{ + return dma_mapping_error(dev, addr); +} + static const struct niu_ops niu_pci_ops = { .alloc_coherent = niu_pci_alloc_coherent, .free_coherent = niu_pci_free_coherent, @@ -9651,6 +9673,7 @@ static const struct niu_ops niu_pci_ops = { .unmap_page = niu_pci_unmap_page, .map_single = niu_pci_map_single, .unmap_single = niu_pci_unmap_single, + .mapping_error = niu_pci_mapping_error, }; static void niu_driver_version(void) @@ -10019,6 +10042,11 @@ static void niu_phys_unmap_single(struct device *dev, u64 dma_address, /* Nothing to do. */ } +static int niu_phys_mapping_error(struct device *dev, u64 dma_address) +{ + return false; +} + static const struct niu_ops niu_phys_ops = { .alloc_coherent = niu_phys_alloc_coherent, .free_coherent = niu_phys_free_coherent, @@ -10026,6 +10054,7 @@ static const struct niu_ops niu_phys_ops = { .unmap_page = niu_phys_unmap_page, .map_single = niu_phys_map_single, .unmap_single = niu_phys_unmap_single, + .mapping_error = niu_phys_mapping_error, }; static int niu_of_probe(struct platform_device *op) diff --git a/drivers/net/ethernet/sun/niu.h b/drivers/net/ethernet/sun/niu.h index 04c215f91fc0..0b169c08b0f2 100644 --- a/drivers/net/ethernet/sun/niu.h +++ b/drivers/net/ethernet/sun/niu.h @@ -2879,6 +2879,9 @@ struct tx_ring_info { #define NEXT_TX(tp, index) \ (((index) + 1) < (tp)->pending ? ((index) + 1) : 0) +#define PREVIOUS_TX(tp, index) \ + (((index) - 1) >= 0 ? ((index) - 1) : (((tp)->pending) - 1)) + static inline u32 niu_tx_avail(struct tx_ring_info *tp) { return (tp->pending - @@ -3140,6 +3143,7 @@ struct niu_ops { enum dma_data_direction direction); void (*unmap_single)(struct device *dev, u64 dma_address, size_t size, enum dma_data_direction direction); + int (*mapping_error)(struct device *dev, u64 dma_address); }; struct niu_link_config { diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index f20d1ff192ef..231ca141331f 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -856,8 +856,6 @@ static struct sk_buff *am65_cpsw_build_skb(void *page_addr, { struct sk_buff *skb; - len += AM65_CPSW_HEADROOM; - skb = build_skb(page_addr, len); if (unlikely(!skb)) return NULL; @@ -1344,7 +1342,7 @@ static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_rx_flow *flow, } skb = am65_cpsw_build_skb(page_addr, ndev, - AM65_CPSW_MAX_PACKET_SIZE, headroom); + PAGE_SIZE, headroom); if (unlikely(!skb)) { new_page = page; goto requeue; diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_lib.c index 7f2e6cddfeb1..55e252789db3 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_lib.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c @@ -1705,6 +1705,7 @@ static void wx_set_rss_queues(struct wx *wx) clear_bit(WX_FLAG_FDIR_HASH, wx->flags); + wx->ring_feature[RING_F_FDIR].indices = 1; /* Use Flow Director in addition to RSS to ensure the best * distribution of flows across cores, even when an FDIR flow * isn't matched. @@ -1746,7 +1747,7 @@ static void wx_set_num_queues(struct wx *wx) */ static int wx_acquire_msix_vectors(struct wx *wx) { - struct irq_affinity affd = { .pre_vectors = 1 }; + struct irq_affinity affd = { .post_vectors = 1 }; int nvecs, i; /* We start by asking for one vector per queue pair */ @@ -1783,16 +1784,24 @@ static int wx_acquire_msix_vectors(struct wx *wx) return nvecs; } - wx->msix_entry->entry = 0; - wx->msix_entry->vector = pci_irq_vector(wx->pdev, 0); nvecs -= 1; for (i = 0; i < nvecs; i++) { wx->msix_q_entries[i].entry = i; - wx->msix_q_entries[i].vector = pci_irq_vector(wx->pdev, i + 1); + wx->msix_q_entries[i].vector = pci_irq_vector(wx->pdev, i); } wx->num_q_vectors = nvecs; + wx->msix_entry->entry = nvecs; + wx->msix_entry->vector = pci_irq_vector(wx->pdev, nvecs); + + if (test_bit(WX_FLAG_IRQ_VECTOR_SHARED, wx->flags)) { + wx->msix_entry->entry = 0; + wx->msix_entry->vector = pci_irq_vector(wx->pdev, 0); + wx->msix_q_entries[0].entry = 0; + wx->msix_q_entries[0].vector = pci_irq_vector(wx->pdev, 1); + } + return 0; } @@ -2291,6 +2300,8 @@ static void wx_set_ivar(struct wx *wx, s8 direction, if (direction == -1) { /* other causes */ + if (test_bit(WX_FLAG_IRQ_VECTOR_SHARED, wx->flags)) + msix_vector = 0; msix_vector |= WX_PX_IVAR_ALLOC_VAL; index = 0; ivar = rd32(wx, WX_PX_MISC_IVAR); @@ -2299,8 +2310,6 @@ static void wx_set_ivar(struct wx *wx, s8 direction, wr32(wx, WX_PX_MISC_IVAR, ivar); } else { /* tx or rx causes */ - if (!(wx->mac.type == wx_mac_em && wx->num_vfs == 7)) - msix_vector += 1; /* offset for queue vectors */ msix_vector |= WX_PX_IVAR_ALLOC_VAL; index = ((16 * (queue & 1)) + (8 * direction)); ivar = rd32(wx, WX_PX_IVAR(queue >> 1)); @@ -2339,7 +2348,7 @@ void wx_write_eitr(struct wx_q_vector *q_vector) itr_reg |= WX_PX_ITR_CNT_WDIS; - wr32(wx, WX_PX_ITR(v_idx + 1), itr_reg); + wr32(wx, WX_PX_ITR(v_idx), itr_reg); } /** @@ -2392,9 +2401,9 @@ void wx_configure_vectors(struct wx *wx) wx_write_eitr(q_vector); } - wx_set_ivar(wx, -1, 0, 0); + wx_set_ivar(wx, -1, 0, v_idx); if (pdev->msix_enabled) - wr32(wx, WX_PX_ITR(0), 1950); + wr32(wx, WX_PX_ITR(v_idx), 1950); } EXPORT_SYMBOL(wx_configure_vectors); @@ -2623,7 +2632,7 @@ static int wx_alloc_page_pool(struct wx_ring *rx_ring) struct page_pool_params pp_params = { .flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV, .order = 0, - .pool_size = rx_ring->size, + .pool_size = rx_ring->count, .nid = dev_to_node(rx_ring->dev), .dev = rx_ring->dev, .dma_dir = DMA_FROM_DEVICE, diff --git a/drivers/net/ethernet/wangxun/libwx/wx_sriov.c b/drivers/net/ethernet/wangxun/libwx/wx_sriov.c index e8656d9d733b..c82ae137756c 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_sriov.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_sriov.c @@ -64,6 +64,7 @@ static void wx_sriov_clear_data(struct wx *wx) wr32m(wx, WX_PSR_VM_CTL, WX_PSR_VM_CTL_POOL_MASK, 0); wx->ring_feature[RING_F_VMDQ].offset = 0; + clear_bit(WX_FLAG_IRQ_VECTOR_SHARED, wx->flags); clear_bit(WX_FLAG_SRIOV_ENABLED, wx->flags); /* Disable VMDq flag so device will be set in NM mode */ if (wx->ring_feature[RING_F_VMDQ].limit == 1) @@ -78,6 +79,9 @@ static int __wx_enable_sriov(struct wx *wx, u8 num_vfs) set_bit(WX_FLAG_SRIOV_ENABLED, wx->flags); dev_info(&wx->pdev->dev, "SR-IOV enabled with %d VFs\n", num_vfs); + if (num_vfs == 7 && wx->mac.type == wx_mac_em) + set_bit(WX_FLAG_IRQ_VECTOR_SHARED, wx->flags); + /* Enable VMDq flag so device will be set in VM mode */ set_bit(WX_FLAG_VMDQ_ENABLED, wx->flags); if (!wx->ring_feature[RING_F_VMDQ].limit) diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h index 7730c9fc3e02..c363379126c0 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_type.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h @@ -1191,6 +1191,7 @@ enum wx_pf_flags { WX_FLAG_VMDQ_ENABLED, WX_FLAG_VLAN_PROMISC, WX_FLAG_SRIOV_ENABLED, + WX_FLAG_IRQ_VECTOR_SHARED, WX_FLAG_FDIR_CAPABLE, WX_FLAG_FDIR_HASH, WX_FLAG_FDIR_PERFECT, @@ -1343,7 +1344,7 @@ struct wx { }; #define WX_INTR_ALL (~0ULL) -#define WX_INTR_Q(i) BIT((i) + 1) +#define WX_INTR_Q(i) BIT((i)) /* register operations */ #define wr32(a, reg, value) writel((value), ((a)->hw_addr + (reg))) diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c index b5022c49dc5e..e0fc897b0a58 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c @@ -161,7 +161,7 @@ static void ngbe_irq_enable(struct wx *wx, bool queues) if (queues) wx_intr_enable(wx, NGBE_INTR_ALL); else - wx_intr_enable(wx, NGBE_INTR_MISC); + wx_intr_enable(wx, NGBE_INTR_MISC(wx)); } /** @@ -286,7 +286,7 @@ static int ngbe_request_msix_irqs(struct wx *wx) * for queue. But when num_vfs == 7, vector[1] is assigned to vf6. * Misc and queue should reuse interrupt vector[0]. */ - if (wx->num_vfs == 7) + if (test_bit(WX_FLAG_IRQ_VECTOR_SHARED, wx->flags)) err = request_irq(wx->msix_entry->vector, ngbe_misc_and_queue, 0, netdev->name, wx); else diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h b/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h index bb74263f0498..3b2ca7f47e33 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h @@ -87,7 +87,7 @@ #define NGBE_PX_MISC_IC_TIMESYNC BIT(11) /* time sync */ #define NGBE_INTR_ALL 0x1FF -#define NGBE_INTR_MISC BIT(0) +#define NGBE_INTR_MISC(A) BIT((A)->msix_entry->entry) #define NGBE_PHY_CONFIG(reg_offset) (0x14000 + ((reg_offset) * 4)) #define NGBE_CFG_LAN_SPEED 0x14440 diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_aml.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_aml.c index 7dbcf41750c1..dc87ccad9652 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_aml.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_aml.c @@ -294,6 +294,7 @@ static void txgbe_mac_link_up_aml(struct phylink_config *config, wx_fc_enable(wx, tx_pause, rx_pause); txgbe_reconfig_mac(wx); + txgbe_enable_sec_tx_path(wx); txcfg = rd32(wx, TXGBE_AML_MAC_TX_CFG); txcfg &= ~TXGBE_AML_MAC_TX_CFG_SPEED_MASK; diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c index 20b9a28bcb55..3885283681ec 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c @@ -31,7 +31,7 @@ void txgbe_irq_enable(struct wx *wx, bool queues) wr32(wx, WX_PX_MISC_IEN, misc_ien); /* unmask interrupt */ - wx_intr_enable(wx, TXGBE_INTR_MISC); + wx_intr_enable(wx, TXGBE_INTR_MISC(wx)); if (queues) wx_intr_enable(wx, TXGBE_INTR_QALL(wx)); } @@ -78,7 +78,6 @@ free_queue_irqs: free_irq(wx->msix_q_entries[vector].vector, wx->q_vector[vector]); } - wx_reset_interrupt_capability(wx); return err; } @@ -132,7 +131,7 @@ static irqreturn_t txgbe_misc_irq_handle(int irq, void *data) txgbe->eicr = eicr; if (eicr & TXGBE_PX_MISC_IC_VF_MBOX) { wx_msg_task(txgbe->wx); - wx_intr_enable(wx, TXGBE_INTR_MISC); + wx_intr_enable(wx, TXGBE_INTR_MISC(wx)); } return IRQ_WAKE_THREAD; } @@ -184,7 +183,7 @@ static irqreturn_t txgbe_misc_irq_thread_fn(int irq, void *data) nhandled++; } - wx_intr_enable(wx, TXGBE_INTR_MISC); + wx_intr_enable(wx, TXGBE_INTR_MISC(wx)); return (nhandled > 0 ? IRQ_HANDLED : IRQ_NONE); } @@ -211,6 +210,7 @@ void txgbe_free_misc_irq(struct txgbe *txgbe) free_irq(txgbe->link_irq, txgbe); free_irq(txgbe->misc.irq, txgbe); txgbe_del_irq_domain(txgbe); + txgbe->wx->misc_irq_domain = false; } int txgbe_setup_misc_irq(struct txgbe *txgbe) diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c index f3d2778b8e35..a5867f3c93fc 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c @@ -458,10 +458,14 @@ static int txgbe_open(struct net_device *netdev) wx_configure(wx); - err = txgbe_request_queue_irqs(wx); + err = txgbe_setup_misc_irq(wx->priv); if (err) goto err_free_resources; + err = txgbe_request_queue_irqs(wx); + if (err) + goto err_free_misc_irq; + /* Notify the stack of the actual queue counts. */ err = netif_set_real_num_tx_queues(netdev, wx->num_tx_queues); if (err) @@ -479,6 +483,9 @@ static int txgbe_open(struct net_device *netdev) err_free_irq: wx_free_irq(wx); +err_free_misc_irq: + txgbe_free_misc_irq(wx->priv); + wx_reset_interrupt_capability(wx); err_free_resources: wx_free_resources(wx); err_reset: @@ -519,6 +526,7 @@ static int txgbe_close(struct net_device *netdev) wx_ptp_stop(wx); txgbe_down(wx); wx_free_irq(wx); + txgbe_free_misc_irq(wx->priv); wx_free_resources(wx); txgbe_fdir_filter_exit(wx); wx_control_hw(wx, false); @@ -564,7 +572,6 @@ static void txgbe_shutdown(struct pci_dev *pdev) int txgbe_setup_tc(struct net_device *dev, u8 tc) { struct wx *wx = netdev_priv(dev); - struct txgbe *txgbe = wx->priv; /* Hardware has to reinitialize queues and interrupts to * match packet buffer alignment. Unfortunately, the @@ -575,7 +582,6 @@ int txgbe_setup_tc(struct net_device *dev, u8 tc) else txgbe_reset(wx); - txgbe_free_misc_irq(txgbe); wx_clear_interrupt_scheme(wx); if (tc) @@ -584,7 +590,6 @@ int txgbe_setup_tc(struct net_device *dev, u8 tc) netdev_reset_tc(dev); wx_init_interrupt_scheme(wx); - txgbe_setup_misc_irq(txgbe); if (netif_running(dev)) txgbe_open(dev); @@ -882,13 +887,9 @@ static int txgbe_probe(struct pci_dev *pdev, txgbe_init_fdir(txgbe); - err = txgbe_setup_misc_irq(txgbe); - if (err) - goto err_release_hw; - err = txgbe_init_phy(txgbe); if (err) - goto err_free_misc_irq; + goto err_release_hw; err = register_netdev(netdev); if (err) @@ -916,8 +917,6 @@ static int txgbe_probe(struct pci_dev *pdev, err_remove_phy: txgbe_remove_phy(txgbe); -err_free_misc_irq: - txgbe_free_misc_irq(txgbe); err_release_hw: wx_clear_interrupt_scheme(wx); wx_control_hw(wx, false); @@ -957,7 +956,6 @@ static void txgbe_remove(struct pci_dev *pdev) unregister_netdev(netdev); txgbe_remove_phy(txgbe); - txgbe_free_misc_irq(txgbe); wx_free_isb_resources(wx); pci_release_selected_regions(pdev, diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h index 42ec815159e8..41915d7dd372 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h @@ -302,8 +302,8 @@ struct txgbe_fdir_filter { #define TXGBE_DEFAULT_RX_WORK 128 #endif -#define TXGBE_INTR_MISC BIT(0) -#define TXGBE_INTR_QALL(A) GENMASK((A)->num_q_vectors, 1) +#define TXGBE_INTR_MISC(A) BIT((A)->num_q_vectors) +#define TXGBE_INTR_QALL(A) (TXGBE_INTR_MISC(A) - 1) #define TXGBE_MAX_EITR GENMASK(11, 3) diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c index edb36ff07a0c..6f82203a414c 100644 --- a/drivers/net/ethernet/xilinx/ll_temac_main.c +++ b/drivers/net/ethernet/xilinx/ll_temac_main.c @@ -1309,7 +1309,7 @@ ll_temac_ethtools_set_ringparam(struct net_device *ndev, if (ering->rx_pending > RX_BD_NUM_MAX || ering->rx_mini_pending || ering->rx_jumbo_pending || - ering->rx_pending > TX_BD_NUM_MAX) + ering->tx_pending > TX_BD_NUM_MAX) return -EINVAL; if (netif_running(ndev)) diff --git a/drivers/net/phy/microchip.c b/drivers/net/phy/microchip.c index 13570f628aa5..dc8634e7bcbe 100644 --- a/drivers/net/phy/microchip.c +++ b/drivers/net/phy/microchip.c @@ -332,7 +332,7 @@ static void lan88xx_link_change_notify(struct phy_device *phydev) * As workaround, set to 10 before setting to 100 * at forced 100 F/H mode. */ - if (!phydev->autoneg && phydev->speed == 100) { + if (phydev->state == PHY_NOLINK && !phydev->autoneg && phydev->speed == 100) { /* disable phy interrupt */ temp = phy_read(phydev, LAN88XX_INT_MASK); temp &= ~LAN88XX_INT_MASK_MDINTPIN_EN_; @@ -488,6 +488,7 @@ static struct phy_driver microchip_phy_driver[] = { .config_init = lan88xx_config_init, .config_aneg = lan88xx_config_aneg, .link_change_notify = lan88xx_link_change_notify, + .soft_reset = genphy_soft_reset, /* Interrupt handling is broken, do not define related * functions to force polling. diff --git a/drivers/net/phy/qcom/at803x.c b/drivers/net/phy/qcom/at803x.c index 26350b962890..8f26e395e39f 100644 --- a/drivers/net/phy/qcom/at803x.c +++ b/drivers/net/phy/qcom/at803x.c @@ -26,9 +26,6 @@ #define AT803X_LED_CONTROL 0x18 -#define AT803X_PHY_MMD3_WOL_CTRL 0x8012 -#define AT803X_WOL_EN BIT(5) - #define AT803X_REG_CHIP_CONFIG 0x1f #define AT803X_BT_BX_REG_SEL 0x8000 @@ -866,30 +863,6 @@ static int at8031_config_init(struct phy_device *phydev) return at803x_config_init(phydev); } -static int at8031_set_wol(struct phy_device *phydev, - struct ethtool_wolinfo *wol) -{ - int ret; - - /* First setup MAC address and enable WOL interrupt */ - ret = at803x_set_wol(phydev, wol); - if (ret) - return ret; - - if (wol->wolopts & WAKE_MAGIC) - /* Enable WOL function for 1588 */ - ret = phy_modify_mmd(phydev, MDIO_MMD_PCS, - AT803X_PHY_MMD3_WOL_CTRL, - 0, AT803X_WOL_EN); - else - /* Disable WoL function for 1588 */ - ret = phy_modify_mmd(phydev, MDIO_MMD_PCS, - AT803X_PHY_MMD3_WOL_CTRL, - AT803X_WOL_EN, 0); - - return ret; -} - static int at8031_config_intr(struct phy_device *phydev) { struct at803x_priv *priv = phydev->priv; diff --git a/drivers/net/phy/qcom/qca808x.c b/drivers/net/phy/qcom/qca808x.c index 71498c518f0f..6de16c0eaa08 100644 --- a/drivers/net/phy/qcom/qca808x.c +++ b/drivers/net/phy/qcom/qca808x.c @@ -633,7 +633,7 @@ static struct phy_driver qca808x_driver[] = { .handle_interrupt = at803x_handle_interrupt, .get_tunable = at803x_get_tunable, .set_tunable = at803x_set_tunable, - .set_wol = at803x_set_wol, + .set_wol = at8031_set_wol, .get_wol = at803x_get_wol, .get_features = qca808x_get_features, .config_aneg = qca808x_config_aneg, diff --git a/drivers/net/phy/qcom/qcom-phy-lib.c b/drivers/net/phy/qcom/qcom-phy-lib.c index d28815ef56bb..af7d0d8e81be 100644 --- a/drivers/net/phy/qcom/qcom-phy-lib.c +++ b/drivers/net/phy/qcom/qcom-phy-lib.c @@ -115,6 +115,31 @@ int at803x_set_wol(struct phy_device *phydev, } EXPORT_SYMBOL_GPL(at803x_set_wol); +int at8031_set_wol(struct phy_device *phydev, + struct ethtool_wolinfo *wol) +{ + int ret; + + /* First setup MAC address and enable WOL interrupt */ + ret = at803x_set_wol(phydev, wol); + if (ret) + return ret; + + if (wol->wolopts & WAKE_MAGIC) + /* Enable WOL function for 1588 */ + ret = phy_modify_mmd(phydev, MDIO_MMD_PCS, + AT803X_PHY_MMD3_WOL_CTRL, + 0, AT803X_WOL_EN); + else + /* Disable WoL function for 1588 */ + ret = phy_modify_mmd(phydev, MDIO_MMD_PCS, + AT803X_PHY_MMD3_WOL_CTRL, + AT803X_WOL_EN, 0); + + return ret; +} +EXPORT_SYMBOL_GPL(at8031_set_wol); + void at803x_get_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol) { diff --git a/drivers/net/phy/qcom/qcom.h b/drivers/net/phy/qcom/qcom.h index 4bb541728846..7f7151c8baca 100644 --- a/drivers/net/phy/qcom/qcom.h +++ b/drivers/net/phy/qcom/qcom.h @@ -172,6 +172,9 @@ #define AT803X_LOC_MAC_ADDR_16_31_OFFSET 0x804B #define AT803X_LOC_MAC_ADDR_32_47_OFFSET 0x804A +#define AT803X_PHY_MMD3_WOL_CTRL 0x8012 +#define AT803X_WOL_EN BIT(5) + #define AT803X_DEBUG_ADDR 0x1D #define AT803X_DEBUG_DATA 0x1E @@ -215,6 +218,8 @@ int at803x_debug_reg_mask(struct phy_device *phydev, u16 reg, int at803x_debug_reg_write(struct phy_device *phydev, u16 reg, u16 data); int at803x_set_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol); +int at8031_set_wol(struct phy_device *phydev, + struct ethtool_wolinfo *wol); void at803x_get_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol); int at803x_ack_interrupt(struct phy_device *phydev); diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c index 31463b9e5697..b6489da5cfcd 100644 --- a/drivers/net/phy/smsc.c +++ b/drivers/net/phy/smsc.c @@ -155,10 +155,29 @@ static int smsc_phy_reset(struct phy_device *phydev) static int lan87xx_config_aneg(struct phy_device *phydev) { - int rc; + u8 mdix_ctrl; int val; + int rc; + + /* When auto-negotiation is disabled (forced mode), the PHY's + * Auto-MDIX will continue toggling the TX/RX pairs. + * + * To establish a stable link, we must select a fixed MDI mode. + * If the user has not specified a fixed MDI mode (i.e., mdix_ctrl is + * 'auto'), we default to ETH_TP_MDI. This choice of a ETH_TP_MDI mode + * mirrors the behavior the hardware would exhibit if the AUTOMDIX_EN + * strap were configured for a fixed MDI connection. + */ + if (phydev->autoneg == AUTONEG_DISABLE) { + if (phydev->mdix_ctrl == ETH_TP_MDI_AUTO) + mdix_ctrl = ETH_TP_MDI; + else + mdix_ctrl = phydev->mdix_ctrl; + } else { + mdix_ctrl = phydev->mdix_ctrl; + } - switch (phydev->mdix_ctrl) { + switch (mdix_ctrl) { case ETH_TP_MDI: val = SPECIAL_CTRL_STS_OVRRD_AMDIX_; break; @@ -167,7 +186,8 @@ static int lan87xx_config_aneg(struct phy_device *phydev) SPECIAL_CTRL_STS_AMDIX_STATE_; break; case ETH_TP_MDI_AUTO: - val = SPECIAL_CTRL_STS_AMDIX_ENABLE_; + val = SPECIAL_CTRL_STS_OVRRD_AMDIX_ | + SPECIAL_CTRL_STS_AMDIX_ENABLE_; break; default: return genphy_config_aneg(phydev); @@ -183,7 +203,7 @@ static int lan87xx_config_aneg(struct phy_device *phydev) rc |= val; phy_write(phydev, SPECIAL_CTRL_STS, rc); - phydev->mdix = phydev->mdix_ctrl; + phydev->mdix = mdix_ctrl; return genphy_config_aneg(phydev); } @@ -261,6 +281,33 @@ int lan87xx_read_status(struct phy_device *phydev) } EXPORT_SYMBOL_GPL(lan87xx_read_status); +static int lan87xx_phy_config_init(struct phy_device *phydev) +{ + int rc; + + /* The LAN87xx PHY's initial MDI-X mode is determined by the AUTOMDIX_EN + * hardware strap, but the driver cannot read the strap's status. This + * creates an unpredictable initial state. + * + * To ensure consistent and reliable behavior across all boards, + * override the strap configuration on initialization and force the PHY + * into a known state with Auto-MDIX enabled, which is the expected + * default for modern hardware. + */ + rc = phy_modify(phydev, SPECIAL_CTRL_STS, + SPECIAL_CTRL_STS_OVRRD_AMDIX_ | + SPECIAL_CTRL_STS_AMDIX_ENABLE_ | + SPECIAL_CTRL_STS_AMDIX_STATE_, + SPECIAL_CTRL_STS_OVRRD_AMDIX_ | + SPECIAL_CTRL_STS_AMDIX_ENABLE_); + if (rc < 0) + return rc; + + phydev->mdix_ctrl = ETH_TP_MDI_AUTO; + + return smsc_phy_config_init(phydev); +} + static int lan874x_phy_config_init(struct phy_device *phydev) { u16 val; @@ -695,7 +742,7 @@ static struct phy_driver smsc_phy_driver[] = { /* basic functions */ .read_status = lan87xx_read_status, - .config_init = smsc_phy_config_init, + .config_init = lan87xx_phy_config_init, .soft_reset = smsc_phy_reset, .config_aneg = lan87xx_config_aneg, diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index f53e255116ea..e3ca6e91efe1 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -4567,8 +4567,6 @@ static void lan78xx_disconnect(struct usb_interface *intf) if (!dev) return; - netif_napi_del(&dev->napi); - udev = interface_to_usbdev(intf); net = dev->net; diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index b586b1c13a47..f5647ee0adde 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1426,6 +1426,7 @@ static const struct usb_device_id products[] = { {QMI_QUIRK_SET_DTR(0x22de, 0x9051, 2)}, /* Hucom Wireless HM-211S/K */ {QMI_FIXED_INTF(0x22de, 0x9061, 3)}, /* WeTelecom WPD-600N */ {QMI_QUIRK_SET_DTR(0x1e0e, 0x9001, 5)}, /* SIMCom 7100E, 7230E, 7600E ++ */ + {QMI_QUIRK_SET_DTR(0x1e0e, 0x9071, 3)}, /* SIMCom 8230C ++ */ {QMI_QUIRK_SET_DTR(0x2c7c, 0x0121, 4)}, /* Quectel EC21 Mini PCIe */ {QMI_QUIRK_SET_DTR(0x2c7c, 0x0191, 4)}, /* Quectel EG91 */ {QMI_QUIRK_SET_DTR(0x2c7c, 0x0195, 4)}, /* Quectel EG95 */ diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index e53ba600605a..5d674eb9a0f2 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -778,6 +778,26 @@ static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx) return (unsigned long)mrg_ctx & ((1 << MRG_CTX_HEADER_SHIFT) - 1); } +static int check_mergeable_len(struct net_device *dev, void *mrg_ctx, + unsigned int len) +{ + unsigned int headroom, tailroom, room, truesize; + + truesize = mergeable_ctx_to_truesize(mrg_ctx); + headroom = mergeable_ctx_to_headroom(mrg_ctx); + tailroom = headroom ? sizeof(struct skb_shared_info) : 0; + room = SKB_DATA_ALIGN(headroom + tailroom); + + if (len > truesize - room) { + pr_debug("%s: rx error: len %u exceeds truesize %lu\n", + dev->name, len, (unsigned long)(truesize - room)); + DEV_STATS_INC(dev, rx_length_errors); + return -1; + } + + return 0; +} + static struct sk_buff *virtnet_build_skb(void *buf, unsigned int buflen, unsigned int headroom, unsigned int len) @@ -1084,7 +1104,7 @@ static bool tx_may_stop(struct virtnet_info *vi, * Since most packets only take 1 or 2 ring slots, stopping the queue * early means 16 slots are typically wasted. */ - if (sq->vq->num_free < 2+MAX_SKB_FRAGS) { + if (sq->vq->num_free < MAX_SKB_FRAGS + 2) { struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum); netif_tx_stop_queue(txq); @@ -1116,7 +1136,7 @@ static void check_sq_full_and_disable(struct virtnet_info *vi, } else if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) { /* More just got used, free them then recheck. */ free_old_xmit(sq, txq, false); - if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) { + if (sq->vq->num_free >= MAX_SKB_FRAGS + 2) { netif_start_subqueue(dev, qnum); u64_stats_update_begin(&sq->stats.syncp); u64_stats_inc(&sq->stats.wake); @@ -1127,15 +1147,29 @@ static void check_sq_full_and_disable(struct virtnet_info *vi, } } +/* Note that @len is the length of received data without virtio header */ static struct xdp_buff *buf_to_xdp(struct virtnet_info *vi, - struct receive_queue *rq, void *buf, u32 len) + struct receive_queue *rq, void *buf, + u32 len, bool first_buf) { struct xdp_buff *xdp; u32 bufsize; xdp = (struct xdp_buff *)buf; - bufsize = xsk_pool_get_rx_frame_size(rq->xsk_pool) + vi->hdr_len; + /* In virtnet_add_recvbuf_xsk, we use part of XDP_PACKET_HEADROOM for + * virtio header and ask the vhost to fill data from + * hard_start + XDP_PACKET_HEADROOM - vi->hdr_len + * The first buffer has virtio header so the remaining region for frame + * data is + * xsk_pool_get_rx_frame_size() + * While other buffers than the first one do not have virtio header, so + * the maximum frame data's length can be + * xsk_pool_get_rx_frame_size() + vi->hdr_len + */ + bufsize = xsk_pool_get_rx_frame_size(rq->xsk_pool); + if (!first_buf) + bufsize += vi->hdr_len; if (unlikely(len > bufsize)) { pr_debug("%s: rx error: len %u exceeds truesize %u\n", @@ -1260,7 +1294,7 @@ static int xsk_append_merge_buffer(struct virtnet_info *vi, u64_stats_add(&stats->bytes, len); - xdp = buf_to_xdp(vi, rq, buf, len); + xdp = buf_to_xdp(vi, rq, buf, len, false); if (!xdp) goto err; @@ -1358,7 +1392,7 @@ static void virtnet_receive_xsk_buf(struct virtnet_info *vi, struct receive_queu u64_stats_add(&stats->bytes, len); - xdp = buf_to_xdp(vi, rq, buf, len); + xdp = buf_to_xdp(vi, rq, buf, len, true); if (!xdp) return; @@ -1797,7 +1831,8 @@ static unsigned int virtnet_get_headroom(struct virtnet_info *vi) * across multiple buffers (num_buf > 1), and we make sure buffers * have enough headroom. */ -static struct page *xdp_linearize_page(struct receive_queue *rq, +static struct page *xdp_linearize_page(struct net_device *dev, + struct receive_queue *rq, int *num_buf, struct page *p, int offset, @@ -1817,18 +1852,27 @@ static struct page *xdp_linearize_page(struct receive_queue *rq, memcpy(page_address(page) + page_off, page_address(p) + offset, *len); page_off += *len; + /* Only mergeable mode can go inside this while loop. In small mode, + * *num_buf == 1, so it cannot go inside. + */ while (--*num_buf) { unsigned int buflen; void *buf; + void *ctx; int off; - buf = virtnet_rq_get_buf(rq, &buflen, NULL); + buf = virtnet_rq_get_buf(rq, &buflen, &ctx); if (unlikely(!buf)) goto err_buf; p = virt_to_head_page(buf); off = buf - page_address(p); + if (check_mergeable_len(dev, ctx, buflen)) { + put_page(p); + goto err_buf; + } + /* guard against a misconfigured or uncooperative backend that * is sending packet larger than the MTU. */ @@ -1917,7 +1961,7 @@ static struct sk_buff *receive_small_xdp(struct net_device *dev, headroom = vi->hdr_len + header_offset; buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); - xdp_page = xdp_linearize_page(rq, &num_buf, page, + xdp_page = xdp_linearize_page(dev, rq, &num_buf, page, offset, header_offset, &tlen); if (!xdp_page) @@ -2126,10 +2170,9 @@ static int virtnet_build_xdp_buff_mrg(struct net_device *dev, struct virtnet_rq_stats *stats) { struct virtio_net_hdr_mrg_rxbuf *hdr = buf; - unsigned int headroom, tailroom, room; - unsigned int truesize, cur_frag_size; struct skb_shared_info *shinfo; unsigned int xdp_frags_truesz = 0; + unsigned int truesize; struct page *page; skb_frag_t *frag; int offset; @@ -2172,21 +2215,14 @@ static int virtnet_build_xdp_buff_mrg(struct net_device *dev, page = virt_to_head_page(buf); offset = buf - page_address(page); - truesize = mergeable_ctx_to_truesize(ctx); - headroom = mergeable_ctx_to_headroom(ctx); - tailroom = headroom ? sizeof(struct skb_shared_info) : 0; - room = SKB_DATA_ALIGN(headroom + tailroom); - - cur_frag_size = truesize; - xdp_frags_truesz += cur_frag_size; - if (unlikely(len > truesize - room || cur_frag_size > PAGE_SIZE)) { + if (check_mergeable_len(dev, ctx, len)) { put_page(page); - pr_debug("%s: rx error: len %u exceeds truesize %lu\n", - dev->name, len, (unsigned long)(truesize - room)); - DEV_STATS_INC(dev, rx_length_errors); goto err; } + truesize = mergeable_ctx_to_truesize(ctx); + xdp_frags_truesz += truesize; + frag = &shinfo->frags[shinfo->nr_frags++]; skb_frag_fill_page_desc(frag, page, offset, len); if (page_is_pfmemalloc(page)) @@ -2252,7 +2288,7 @@ static void *mergeable_xdp_get_buf(struct virtnet_info *vi, */ if (!xdp_prog->aux->xdp_has_frags) { /* linearize data for XDP */ - xdp_page = xdp_linearize_page(rq, num_buf, + xdp_page = xdp_linearize_page(vi->dev, rq, num_buf, *page, offset, XDP_PACKET_HEADROOM, len); @@ -2400,18 +2436,12 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, struct sk_buff *head_skb, *curr_skb; unsigned int truesize = mergeable_ctx_to_truesize(ctx); unsigned int headroom = mergeable_ctx_to_headroom(ctx); - unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; - unsigned int room = SKB_DATA_ALIGN(headroom + tailroom); head_skb = NULL; u64_stats_add(&stats->bytes, len - vi->hdr_len); - if (unlikely(len > truesize - room)) { - pr_debug("%s: rx error: len %u exceeds truesize %lu\n", - dev->name, len, (unsigned long)(truesize - room)); - DEV_STATS_INC(dev, rx_length_errors); + if (check_mergeable_len(dev, ctx, len)) goto err_skb; - } if (unlikely(vi->xdp_enabled)) { struct bpf_prog *xdp_prog; @@ -2446,17 +2476,10 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, u64_stats_add(&stats->bytes, len); page = virt_to_head_page(buf); - truesize = mergeable_ctx_to_truesize(ctx); - headroom = mergeable_ctx_to_headroom(ctx); - tailroom = headroom ? sizeof(struct skb_shared_info) : 0; - room = SKB_DATA_ALIGN(headroom + tailroom); - if (unlikely(len > truesize - room)) { - pr_debug("%s: rx error: len %u exceeds truesize %lu\n", - dev->name, len, (unsigned long)(truesize - room)); - DEV_STATS_INC(dev, rx_length_errors); + if (check_mergeable_len(dev, ctx, len)) goto err_skb; - } + truesize = mergeable_ctx_to_truesize(ctx); curr_skb = virtnet_skb_append_frag(head_skb, curr_skb, page, buf, len, truesize); if (!curr_skb) @@ -2998,7 +3021,7 @@ static void virtnet_poll_cleantx(struct receive_queue *rq, int budget) free_old_xmit(sq, txq, !!budget); } while (unlikely(!virtqueue_enable_cb_delayed(sq->vq))); - if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) { + if (sq->vq->num_free >= MAX_SKB_FRAGS + 2) { if (netif_tx_queue_stopped(txq)) { u64_stats_update_begin(&sq->stats.syncp); u64_stats_inc(&sq->stats.wake); @@ -3195,7 +3218,7 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget) else free_old_xmit(sq, txq, !!budget); - if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) { + if (sq->vq->num_free >= MAX_SKB_FRAGS + 2) { if (netif_tx_queue_stopped(txq)) { u64_stats_update_begin(&sq->stats.syncp); u64_stats_inc(&sq->stats.wake); @@ -3481,6 +3504,12 @@ static int virtnet_tx_resize(struct virtnet_info *vi, struct send_queue *sq, { int qindex, err; + if (ring_num <= MAX_SKB_FRAGS + 2) { + netdev_err(vi->dev, "tx size (%d) cannot be smaller than %d\n", + ring_num, MAX_SKB_FRAGS + 2); + return -EINVAL; + } + qindex = sq - vi->sq; virtnet_tx_pause(vi, sq); diff --git a/drivers/net/wireless/intel/iwlegacy/4965-rs.c b/drivers/net/wireless/intel/iwlegacy/4965-rs.c index 0e5130d1fccd..031d88bf6393 100644 --- a/drivers/net/wireless/intel/iwlegacy/4965-rs.c +++ b/drivers/net/wireless/intel/iwlegacy/4965-rs.c @@ -203,7 +203,8 @@ il4965_rs_extract_rate(u32 rate_n_flags) return (u8) (rate_n_flags & 0xFF); } -static void +/* noinline works around https://github.com/llvm/llvm-project/issues/143908 */ +static noinline_for_stack void il4965_rs_rate_scale_clear_win(struct il_rate_scale_data *win) { win->data = 0; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac.c b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac.c index 3c255ae916c8..3f8b840871d3 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac.c @@ -32,9 +32,9 @@ static void iwl_mvm_mld_mac_ctxt_cmd_common(struct iwl_mvm *mvm, unsigned int link_id; int cmd_ver = iwl_fw_lookup_cmd_ver(mvm->fw, WIDE_ID(MAC_CONF_GROUP, - MAC_CONFIG_CMD), 0); + MAC_CONFIG_CMD), 1); - if (WARN_ON(cmd_ver < 1 || cmd_ver > 3)) + if (WARN_ON(cmd_ver > 3)) return; cmd->id_and_color = cpu_to_le32(mvmvif->id); diff --git a/drivers/net/wireless/marvell/mwifiex/util.c b/drivers/net/wireless/marvell/mwifiex/util.c index 4c5b1de0e936..6882e90e90b2 100644 --- a/drivers/net/wireless/marvell/mwifiex/util.c +++ b/drivers/net/wireless/marvell/mwifiex/util.c @@ -459,7 +459,9 @@ mwifiex_process_mgmt_packet(struct mwifiex_private *priv, "auth: receive authentication from %pM\n", ieee_hdr->addr3); } else { - if (!priv->wdev.connected) + if (!priv->wdev.connected || + !ether_addr_equal(ieee_hdr->addr3, + priv->curr_bss_params.bss_descriptor.mac_address)) return 0; if (ieee80211_is_deauth(ieee_hdr->frame_control)) { diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h index 5f8d81cda6cd..74b75035d361 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76.h +++ b/drivers/net/wireless/mediatek/mt76/mt76.h @@ -1224,6 +1224,16 @@ static inline int mt76_wed_dma_setup(struct mt76_dev *dev, struct mt76_queue *q, #define mt76_dereference(p, dev) \ rcu_dereference_protected(p, lockdep_is_held(&(dev)->mutex)) +static inline struct mt76_wcid * +__mt76_wcid_ptr(struct mt76_dev *dev, u16 idx) +{ + if (idx >= ARRAY_SIZE(dev->wcid)) + return NULL; + return rcu_dereference(dev->wcid[idx]); +} + +#define mt76_wcid_ptr(dev, idx) __mt76_wcid_ptr(&(dev)->mt76, idx) + struct mt76_dev *mt76_alloc_device(struct device *pdev, unsigned int size, const struct ieee80211_ops *ops, const struct mt76_driver_ops *drv_ops); diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/dma.c b/drivers/net/wireless/mediatek/mt76/mt7603/dma.c index 863e5770df51..e26cc78fff94 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7603/dma.c +++ b/drivers/net/wireless/mediatek/mt76/mt7603/dma.c @@ -44,7 +44,7 @@ mt7603_rx_loopback_skb(struct mt7603_dev *dev, struct sk_buff *skb) if (idx >= MT7603_WTBL_STA - 1) goto free; - wcid = rcu_dereference(dev->mt76.wcid[idx]); + wcid = mt76_wcid_ptr(dev, idx); if (!wcid) goto free; diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c index 413973d05b43..6387f9e61060 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c @@ -487,10 +487,7 @@ mt7603_rx_get_wcid(struct mt7603_dev *dev, u8 idx, bool unicast) struct mt7603_sta *sta; struct mt76_wcid *wcid; - if (idx >= MT7603_WTBL_SIZE) - return NULL; - - wcid = rcu_dereference(dev->mt76.wcid[idx]); + wcid = mt76_wcid_ptr(dev, idx); if (unicast || !wcid) return wcid; @@ -1266,12 +1263,9 @@ void mt7603_mac_add_txs(struct mt7603_dev *dev, void *data) if (pid == MT_PACKET_ID_NO_ACK) return; - if (wcidx >= MT7603_WTBL_SIZE) - return; - rcu_read_lock(); - wcid = rcu_dereference(dev->mt76.wcid[wcidx]); + wcid = mt76_wcid_ptr(dev, wcidx); if (!wcid) goto out; diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c index 3ca4fae7c4b0..f8d2cc94b742 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c @@ -90,10 +90,7 @@ static struct mt76_wcid *mt7615_rx_get_wcid(struct mt7615_dev *dev, struct mt7615_sta *sta; struct mt76_wcid *wcid; - if (idx >= MT7615_WTBL_SIZE) - return NULL; - - wcid = rcu_dereference(dev->mt76.wcid[idx]); + wcid = mt76_wcid_ptr(dev, idx); if (unicast || !wcid) return wcid; @@ -1504,7 +1501,7 @@ static void mt7615_mac_add_txs(struct mt7615_dev *dev, void *data) rcu_read_lock(); - wcid = rcu_dereference(dev->mt76.wcid[wcidx]); + wcid = mt76_wcid_ptr(dev, wcidx); if (!wcid) goto out; diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c index e9ac8a7317a1..0db00efe88b0 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c @@ -1172,7 +1172,7 @@ void mt76_connac2_txwi_free(struct mt76_dev *dev, struct mt76_txwi_cache *t, wcid_idx = wcid->idx; } else { wcid_idx = le32_get_bits(txwi[1], MT_TXD1_WLAN_IDX); - wcid = rcu_dereference(dev->wcid[wcid_idx]); + wcid = __mt76_wcid_ptr(dev, wcid_idx); if (wcid && wcid->sta) { sta = container_of((void *)wcid, struct ieee80211_sta, diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c index cb13d0a76878..16db0f2082d1 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c @@ -287,7 +287,7 @@ __mt76_connac_mcu_alloc_sta_req(struct mt76_dev *dev, struct mt76_vif_link *mvif mt76_connac_mcu_get_wlan_idx(dev, wcid, &hdr.wlan_idx_lo, &hdr.wlan_idx_hi); - skb = mt76_mcu_msg_alloc(dev, NULL, len); + skb = __mt76_mcu_msg_alloc(dev, NULL, len, len, GFP_ATOMIC); if (!skb) return ERR_PTR(-ENOMEM); @@ -1740,8 +1740,8 @@ int mt76_connac_mcu_hw_scan(struct mt76_phy *phy, struct ieee80211_vif *vif, if (!sreq->ssids[i].ssid_len) continue; - req->ssids[i].ssid_len = cpu_to_le32(sreq->ssids[i].ssid_len); - memcpy(req->ssids[i].ssid, sreq->ssids[i].ssid, + req->ssids[n_ssids].ssid_len = cpu_to_le32(sreq->ssids[i].ssid_len); + memcpy(req->ssids[n_ssids].ssid, sreq->ssids[i].ssid, sreq->ssids[i].ssid_len); n_ssids++; } diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02.h b/drivers/net/wireless/mediatek/mt76/mt76x02.h index 4cd63bacd742..9d7ee09b6cc9 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02.h +++ b/drivers/net/wireless/mediatek/mt76/mt76x02.h @@ -262,10 +262,7 @@ mt76x02_rx_get_sta(struct mt76_dev *dev, u8 idx) { struct mt76_wcid *wcid; - if (idx >= MT76x02_N_WCIDS) - return NULL; - - wcid = rcu_dereference(dev->wcid[idx]); + wcid = __mt76_wcid_ptr(dev, idx); if (!wcid) return NULL; diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c index d5db6ffd6d36..83488b2d6efb 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c @@ -564,9 +564,7 @@ void mt76x02_send_tx_status(struct mt76x02_dev *dev, rcu_read_lock(); - if (stat->wcid < MT76x02_N_WCIDS) - wcid = rcu_dereference(dev->mt76.wcid[stat->wcid]); - + wcid = mt76_wcid_ptr(dev, stat->wcid); if (wcid && wcid->sta) { void *priv; diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c index 9400e4af2a04..6639976afcee 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c @@ -56,10 +56,7 @@ static struct mt76_wcid *mt7915_rx_get_wcid(struct mt7915_dev *dev, struct mt7915_sta *sta; struct mt76_wcid *wcid; - if (idx >= ARRAY_SIZE(dev->mt76.wcid)) - return NULL; - - wcid = rcu_dereference(dev->mt76.wcid[idx]); + wcid = mt76_wcid_ptr(dev, idx); if (unicast || !wcid) return wcid; @@ -917,7 +914,7 @@ mt7915_mac_tx_free(struct mt7915_dev *dev, void *data, int len) u16 idx; idx = FIELD_GET(MT_TX_FREE_WLAN_ID, info); - wcid = rcu_dereference(dev->mt76.wcid[idx]); + wcid = mt76_wcid_ptr(dev, idx); sta = wcid_to_sta(wcid); if (!sta) continue; @@ -1013,12 +1010,9 @@ static void mt7915_mac_add_txs(struct mt7915_dev *dev, void *data) if (pid < MT_PACKET_ID_WED) return; - if (wcidx >= mt7915_wtbl_size(dev)) - return; - rcu_read_lock(); - wcid = rcu_dereference(dev->mt76.wcid[wcidx]); + wcid = mt76_wcid_ptr(dev, wcidx); if (!wcid) goto out; diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c index 427542777abc..c6584d2b7509 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c @@ -3986,7 +3986,7 @@ int mt7915_mcu_wed_wa_tx_stats(struct mt7915_dev *dev, u16 wlan_idx) rcu_read_lock(); - wcid = rcu_dereference(dev->mt76.wcid[wlan_idx]); + wcid = mt76_wcid_ptr(dev, wlan_idx); if (wcid) wcid->stats.tx_packets += le32_to_cpu(res->tx_packets); else diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c b/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c index 9c4d5cea0c42..4a82f8e4c118 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c @@ -587,12 +587,9 @@ static void mt7915_mmio_wed_update_rx_stats(struct mtk_wed_device *wed, dev = container_of(wed, struct mt7915_dev, mt76.mmio.wed); - if (idx >= mt7915_wtbl_size(dev)) - return; - rcu_read_lock(); - wcid = rcu_dereference(dev->mt76.wcid[idx]); + wcid = mt76_wcid_ptr(dev, idx); if (wcid) { wcid->stats.rx_bytes += le32_to_cpu(stats->rx_byte_cnt); wcid->stats.rx_packets += le32_to_cpu(stats->rx_pkt_cnt); diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c index 5dd57de59f27..f1f76506b0a5 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c @@ -465,7 +465,7 @@ void mt7921_mac_add_txs(struct mt792x_dev *dev, void *data) rcu_read_lock(); - wcid = rcu_dereference(dev->mt76.wcid[wcidx]); + wcid = mt76_wcid_ptr(dev, wcidx); if (!wcid) goto out; @@ -516,7 +516,7 @@ static void mt7921_mac_tx_free(struct mt792x_dev *dev, void *data, int len) count++; idx = FIELD_GET(MT_TX_FREE_WLAN_ID, info); - wcid = rcu_dereference(dev->mt76.wcid[idx]); + wcid = mt76_wcid_ptr(dev, idx); sta = wcid_to_sta(wcid); if (!sta) continue; @@ -816,7 +816,7 @@ void mt7921_usb_sdio_tx_complete_skb(struct mt76_dev *mdev, u16 idx; idx = le32_get_bits(txwi[1], MT_TXD1_WLAN_IDX); - wcid = rcu_dereference(mdev->wcid[idx]); + wcid = __mt76_wcid_ptr(mdev, idx); sta = wcid_to_sta(wcid); if (sta && likely(e->skb->protocol != cpu_to_be16(ETH_P_PAE))) diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c index 1fffa43379b2..77f73ae1d7ec 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c @@ -1180,6 +1180,9 @@ static void mt7921_sta_set_decap_offload(struct ieee80211_hw *hw, struct mt792x_sta *msta = (struct mt792x_sta *)sta->drv_priv; struct mt792x_dev *dev = mt792x_hw_dev(hw); + if (!msta->deflink.wcid.sta) + return; + mt792x_mutex_acquire(dev); if (enabled) diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/init.c b/drivers/net/wireless/mediatek/mt76/mt7925/init.c index 2a83ff59a968..4249bad83c93 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/init.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/init.c @@ -52,6 +52,8 @@ static int mt7925_thermal_init(struct mt792x_phy *phy) name = devm_kasprintf(&wiphy->dev, GFP_KERNEL, "mt7925_%s", wiphy_name(wiphy)); + if (!name) + return -ENOMEM; hwmon = devm_hwmon_device_register_with_groups(&wiphy->dev, name, phy, mt7925_hwmon_groups); diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/mac.c b/drivers/net/wireless/mediatek/mt76/mt7925/mac.c index c871d2f9688b..75823c9fd3a1 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/mac.c @@ -1040,7 +1040,7 @@ void mt7925_mac_add_txs(struct mt792x_dev *dev, void *data) rcu_read_lock(); - wcid = rcu_dereference(dev->mt76.wcid[wcidx]); + wcid = mt76_wcid_ptr(dev, wcidx); if (!wcid) goto out; @@ -1122,7 +1122,7 @@ mt7925_mac_tx_free(struct mt792x_dev *dev, void *data, int len) u16 idx; idx = FIELD_GET(MT_TXFREE_INFO_WLAN_ID, info); - wcid = rcu_dereference(dev->mt76.wcid[idx]); + wcid = mt76_wcid_ptr(dev, idx); sta = wcid_to_sta(wcid); if (!sta) continue; @@ -1445,7 +1445,7 @@ void mt7925_usb_sdio_tx_complete_skb(struct mt76_dev *mdev, u16 idx; idx = le32_get_bits(txwi[1], MT_TXD1_WLAN_IDX); - wcid = rcu_dereference(mdev->wcid[idx]); + wcid = __mt76_wcid_ptr(mdev, idx); sta = wcid_to_sta(wcid); if (sta && likely(e->skb->protocol != cpu_to_be16(ETH_P_PAE))) diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/main.c b/drivers/net/wireless/mediatek/mt76/mt7925/main.c index 94b0099dcd41..5b001548dffc 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/main.c @@ -1481,7 +1481,7 @@ mt7925_start_sched_scan(struct ieee80211_hw *hw, struct ieee80211_vif *vif, mt792x_mutex_acquire(dev); - err = mt7925_mcu_sched_scan_req(mphy, vif, req); + err = mt7925_mcu_sched_scan_req(mphy, vif, req, ies); if (err < 0) goto out; @@ -1603,6 +1603,9 @@ static void mt7925_sta_set_decap_offload(struct ieee80211_hw *hw, unsigned long valid = mvif->valid_links; u8 i; + if (!msta->vif) + return; + mt792x_mutex_acquire(dev); valid = ieee80211_vif_is_mld(vif) ? mvif->valid_links : BIT(0); @@ -1617,6 +1620,9 @@ static void mt7925_sta_set_decap_offload(struct ieee80211_hw *hw, else clear_bit(MT_WCID_FLAG_HDR_TRANS, &mlink->wcid.flags); + if (!mlink->wcid.sta) + continue; + mt7925_mcu_wtbl_update_hdr_trans(dev, vif, sta, i); } diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c index b8542be0d945..8ac6fbb736ab 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c @@ -164,6 +164,7 @@ mt7925_connac_mcu_set_wow_ctrl(struct mt76_phy *phy, struct ieee80211_vif *vif, bool suspend, struct cfg80211_wowlan *wowlan) { struct mt76_vif_link *mvif = (struct mt76_vif_link *)vif->drv_priv; + struct ieee80211_scan_ies ies = {}; struct mt76_dev *dev = phy->dev; struct { struct { @@ -194,7 +195,7 @@ mt7925_connac_mcu_set_wow_ctrl(struct mt76_phy *phy, struct ieee80211_vif *vif, req.wow_ctrl_tlv.trigger |= (UNI_WOW_DETECT_TYPE_DISCONNECT | UNI_WOW_DETECT_TYPE_BCN_LOST); if (wowlan->nd_config) { - mt7925_mcu_sched_scan_req(phy, vif, wowlan->nd_config); + mt7925_mcu_sched_scan_req(phy, vif, wowlan->nd_config, &ies); req.wow_ctrl_tlv.trigger |= UNI_WOW_DETECT_TYPE_SCH_SCAN_HIT; mt7925_mcu_sched_scan_enable(phy, vif, suspend); } @@ -2818,6 +2819,54 @@ int mt7925_mcu_set_dbdc(struct mt76_phy *phy, bool enable) return err; } +static void +mt7925_mcu_build_scan_ie_tlv(struct mt76_dev *mdev, + struct sk_buff *skb, + struct ieee80211_scan_ies *scan_ies) +{ + u32 max_len = sizeof(struct scan_ie_tlv) + MT76_CONNAC_SCAN_IE_LEN; + struct scan_ie_tlv *ie; + enum nl80211_band i; + struct tlv *tlv; + const u8 *ies; + u16 ies_len; + + for (i = 0; i <= NL80211_BAND_6GHZ; i++) { + if (i == NL80211_BAND_60GHZ) + continue; + + ies = scan_ies->ies[i]; + ies_len = scan_ies->len[i]; + + if (!ies || !ies_len) + continue; + + if (ies_len > max_len) + return; + + tlv = mt76_connac_mcu_add_tlv(skb, UNI_SCAN_IE, + sizeof(*ie) + ies_len); + ie = (struct scan_ie_tlv *)tlv; + + memcpy(ie->ies, ies, ies_len); + ie->ies_len = cpu_to_le16(ies_len); + + switch (i) { + case NL80211_BAND_2GHZ: + ie->band = 1; + break; + case NL80211_BAND_6GHZ: + ie->band = 3; + break; + default: + ie->band = 2; + break; + } + + max_len -= (sizeof(*ie) + ies_len); + } +} + int mt7925_mcu_hw_scan(struct mt76_phy *phy, struct ieee80211_vif *vif, struct ieee80211_scan_request *scan_req) { @@ -2843,7 +2892,8 @@ int mt7925_mcu_hw_scan(struct mt76_phy *phy, struct ieee80211_vif *vif, max_len = sizeof(*hdr) + sizeof(*req) + sizeof(*ssid) + sizeof(*bssid) * MT7925_RNR_SCAN_MAX_BSSIDS + - sizeof(*chan_info) + sizeof(*misc) + sizeof(*ie); + sizeof(*chan_info) + sizeof(*misc) + sizeof(*ie) + + MT76_CONNAC_SCAN_IE_LEN; skb = mt76_mcu_msg_alloc(mdev, NULL, max_len); if (!skb) @@ -2869,8 +2919,8 @@ int mt7925_mcu_hw_scan(struct mt76_phy *phy, struct ieee80211_vif *vif, if (i > MT7925_RNR_SCAN_MAX_BSSIDS) break; - ssid->ssids[i].ssid_len = cpu_to_le32(sreq->ssids[i].ssid_len); - memcpy(ssid->ssids[i].ssid, sreq->ssids[i].ssid, + ssid->ssids[n_ssids].ssid_len = cpu_to_le32(sreq->ssids[i].ssid_len); + memcpy(ssid->ssids[n_ssids].ssid, sreq->ssids[i].ssid, sreq->ssids[i].ssid_len); n_ssids++; } @@ -2925,13 +2975,6 @@ int mt7925_mcu_hw_scan(struct mt76_phy *phy, struct ieee80211_vif *vif, } chan_info->channel_type = sreq->n_channels ? 4 : 0; - tlv = mt76_connac_mcu_add_tlv(skb, UNI_SCAN_IE, sizeof(*ie)); - ie = (struct scan_ie_tlv *)tlv; - if (sreq->ie_len > 0) { - memcpy(ie->ies, sreq->ie, sreq->ie_len); - ie->ies_len = cpu_to_le16(sreq->ie_len); - } - req->scan_func |= SCAN_FUNC_SPLIT_SCAN; tlv = mt76_connac_mcu_add_tlv(skb, UNI_SCAN_MISC, sizeof(*misc)); @@ -2942,6 +2985,9 @@ int mt7925_mcu_hw_scan(struct mt76_phy *phy, struct ieee80211_vif *vif, req->scan_func |= SCAN_FUNC_RANDOM_MAC; } + /* Append scan probe IEs as the last tlv */ + mt7925_mcu_build_scan_ie_tlv(mdev, skb, &scan_req->ies); + err = mt76_mcu_skb_send_msg(mdev, skb, MCU_UNI_CMD(SCAN_REQ), true); if (err < 0) @@ -2953,7 +2999,8 @@ EXPORT_SYMBOL_GPL(mt7925_mcu_hw_scan); int mt7925_mcu_sched_scan_req(struct mt76_phy *phy, struct ieee80211_vif *vif, - struct cfg80211_sched_scan_request *sreq) + struct cfg80211_sched_scan_request *sreq, + struct ieee80211_scan_ies *ies) { struct mt76_vif_link *mvif = (struct mt76_vif_link *)vif->drv_priv; struct ieee80211_channel **scan_list = sreq->channels; @@ -3041,12 +3088,8 @@ int mt7925_mcu_sched_scan_req(struct mt76_phy *phy, } chan_info->channel_type = sreq->n_channels ? 4 : 0; - tlv = mt76_connac_mcu_add_tlv(skb, UNI_SCAN_IE, sizeof(*ie)); - ie = (struct scan_ie_tlv *)tlv; - if (sreq->ie_len > 0) { - memcpy(ie->ies, sreq->ie, sreq->ie_len); - ie->ies_len = cpu_to_le16(sreq->ie_len); - } + /* Append scan probe IEs as the last tlv */ + mt7925_mcu_build_scan_ie_tlv(mdev, skb, ies); return mt76_mcu_skb_send_msg(mdev, skb, MCU_UNI_CMD(SCAN_REQ), true); diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.h index ee6fb16e83c5..a40764d89a1f 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.h +++ b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.h @@ -269,7 +269,7 @@ struct scan_ie_tlv { __le16 ies_len; u8 band; u8 pad; - u8 ies[MT76_CONNAC_SCAN_IE_LEN]; + u8 ies[]; }; struct scan_misc_tlv { @@ -673,7 +673,8 @@ int mt7925_mcu_cancel_hw_scan(struct mt76_phy *phy, struct ieee80211_vif *vif); int mt7925_mcu_sched_scan_req(struct mt76_phy *phy, struct ieee80211_vif *vif, - struct cfg80211_sched_scan_request *sreq); + struct cfg80211_sched_scan_request *sreq, + struct ieee80211_scan_ies *ies); int mt7925_mcu_sched_scan_enable(struct mt76_phy *phy, struct ieee80211_vif *vif, bool enable); diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/regs.h b/drivers/net/wireless/mediatek/mt76/mt7925/regs.h index 547489092c29..341987e47f67 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/regs.h +++ b/drivers/net/wireless/mediatek/mt76/mt7925/regs.h @@ -58,7 +58,7 @@ #define MT_INT_TX_DONE_MCU (MT_INT_TX_DONE_MCU_WM | \ MT_INT_TX_DONE_FWDL) -#define MT_INT_TX_DONE_ALL (MT_INT_TX_DONE_MCU_WM | \ +#define MT_INT_TX_DONE_ALL (MT_INT_TX_DONE_MCU | \ MT_INT_TX_DONE_BAND0 | \ GENMASK(18, 4)) diff --git a/drivers/net/wireless/mediatek/mt76/mt792x_core.c b/drivers/net/wireless/mediatek/mt76/mt792x_core.c index a50c1723ca29..05130ec1e5f7 100644 --- a/drivers/net/wireless/mediatek/mt76/mt792x_core.c +++ b/drivers/net/wireless/mediatek/mt76/mt792x_core.c @@ -28,7 +28,7 @@ static const struct ieee80211_iface_combination if_comb[] = { }, }; -static const struct ieee80211_iface_limit if_limits_chanctx[] = { +static const struct ieee80211_iface_limit if_limits_chanctx_mcc[] = { { .max = 2, .types = BIT(NL80211_IFTYPE_STATION) | @@ -36,8 +36,23 @@ static const struct ieee80211_iface_limit if_limits_chanctx[] = { }, { .max = 1, - .types = BIT(NL80211_IFTYPE_AP) | - BIT(NL80211_IFTYPE_P2P_GO) + .types = BIT(NL80211_IFTYPE_P2P_GO) + }, + { + .max = 1, + .types = BIT(NL80211_IFTYPE_P2P_DEVICE) + } +}; + +static const struct ieee80211_iface_limit if_limits_chanctx_scc[] = { + { + .max = 2, + .types = BIT(NL80211_IFTYPE_STATION) | + BIT(NL80211_IFTYPE_P2P_CLIENT) + }, + { + .max = 1, + .types = BIT(NL80211_IFTYPE_AP) }, { .max = 1, @@ -47,11 +62,18 @@ static const struct ieee80211_iface_limit if_limits_chanctx[] = { static const struct ieee80211_iface_combination if_comb_chanctx[] = { { - .limits = if_limits_chanctx, - .n_limits = ARRAY_SIZE(if_limits_chanctx), + .limits = if_limits_chanctx_mcc, + .n_limits = ARRAY_SIZE(if_limits_chanctx_mcc), .max_interfaces = 3, .num_different_channels = 2, .beacon_int_infra_match = false, + }, + { + .limits = if_limits_chanctx_scc, + .n_limits = ARRAY_SIZE(if_limits_chanctx_scc), + .max_interfaces = 3, + .num_different_channels = 1, + .beacon_int_infra_match = false, } }; diff --git a/drivers/net/wireless/mediatek/mt76/mt792x_mac.c b/drivers/net/wireless/mediatek/mt76/mt792x_mac.c index 05978d9c7b91..3f1d9ba49076 100644 --- a/drivers/net/wireless/mediatek/mt76/mt792x_mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt792x_mac.c @@ -142,10 +142,7 @@ struct mt76_wcid *mt792x_rx_get_wcid(struct mt792x_dev *dev, u16 idx, struct mt792x_sta *sta; struct mt76_wcid *wcid; - if (idx >= ARRAY_SIZE(dev->mt76.wcid)) - return NULL; - - wcid = rcu_dereference(dev->mt76.wcid[idx]); + wcid = mt76_wcid_ptr(dev, idx); if (unicast || !wcid) return wcid; diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c index 0dbd4662bc84..92148518f6a5 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c @@ -61,10 +61,7 @@ static struct mt76_wcid *mt7996_rx_get_wcid(struct mt7996_dev *dev, struct mt76_wcid *wcid; int i; - if (idx >= ARRAY_SIZE(dev->mt76.wcid)) - return NULL; - - wcid = rcu_dereference(dev->mt76.wcid[idx]); + wcid = mt76_wcid_ptr(dev, idx); if (!wcid) return NULL; @@ -1249,7 +1246,7 @@ mt7996_mac_tx_free(struct mt7996_dev *dev, void *data, int len) u16 idx; idx = FIELD_GET(MT_TXFREE_INFO_WLAN_ID, info); - wcid = rcu_dereference(dev->mt76.wcid[idx]); + wcid = mt76_wcid_ptr(dev, idx); sta = wcid_to_sta(wcid); if (!sta) goto next; @@ -1471,12 +1468,9 @@ static void mt7996_mac_add_txs(struct mt7996_dev *dev, void *data) if (pid < MT_PACKET_ID_NO_SKB) return; - if (wcidx >= mt7996_wtbl_size(dev)) - return; - rcu_read_lock(); - wcid = rcu_dereference(dev->mt76.wcid[wcidx]); + wcid = mt76_wcid_ptr(dev, wcidx); if (!wcid) goto out; @@ -2353,20 +2347,12 @@ void mt7996_mac_update_stats(struct mt7996_phy *phy) void mt7996_mac_sta_rc_work(struct work_struct *work) { struct mt7996_dev *dev = container_of(work, struct mt7996_dev, rc_work); - struct ieee80211_bss_conf *link_conf; - struct ieee80211_link_sta *link_sta; struct mt7996_sta_link *msta_link; - struct mt7996_vif_link *link; - struct mt76_vif_link *mlink; - struct ieee80211_sta *sta; struct ieee80211_vif *vif; - struct mt7996_sta *msta; struct mt7996_vif *mvif; LIST_HEAD(list); u32 changed; - u8 link_id; - rcu_read_lock(); spin_lock_bh(&dev->mt76.sta_poll_lock); list_splice_init(&dev->sta_rc_list, &list); @@ -2377,46 +2363,28 @@ void mt7996_mac_sta_rc_work(struct work_struct *work) changed = msta_link->changed; msta_link->changed = 0; - - sta = wcid_to_sta(&msta_link->wcid); - link_id = msta_link->wcid.link_id; - msta = msta_link->sta; - mvif = msta->vif; - vif = container_of((void *)mvif, struct ieee80211_vif, drv_priv); - - mlink = rcu_dereference(mvif->mt76.link[link_id]); - if (!mlink) - continue; - - link_sta = rcu_dereference(sta->link[link_id]); - if (!link_sta) - continue; - - link_conf = rcu_dereference(vif->link_conf[link_id]); - if (!link_conf) - continue; + mvif = msta_link->sta->vif; + vif = container_of((void *)mvif, struct ieee80211_vif, + drv_priv); spin_unlock_bh(&dev->mt76.sta_poll_lock); - link = (struct mt7996_vif_link *)mlink; - if (changed & (IEEE80211_RC_SUPP_RATES_CHANGED | IEEE80211_RC_NSS_CHANGED | IEEE80211_RC_BW_CHANGED)) - mt7996_mcu_add_rate_ctrl(dev, vif, link_conf, - link_sta, link, msta_link, + mt7996_mcu_add_rate_ctrl(dev, msta_link->sta, vif, + msta_link->wcid.link_id, true); if (changed & IEEE80211_RC_SMPS_CHANGED) - mt7996_mcu_set_fixed_field(dev, link_sta, link, - msta_link, NULL, + mt7996_mcu_set_fixed_field(dev, msta_link->sta, NULL, + msta_link->wcid.link_id, RATE_PARAM_MMPS_UPDATE); spin_lock_bh(&dev->mt76.sta_poll_lock); } spin_unlock_bh(&dev->mt76.sta_poll_lock); - rcu_read_unlock(); } void mt7996_mac_work(struct work_struct *work) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/main.c b/drivers/net/wireless/mediatek/mt76/mt7996/main.c index 78ae9f5cb176..07dd75ce94a5 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/main.c @@ -1112,9 +1112,8 @@ mt7996_mac_sta_event(struct mt7996_dev *dev, struct ieee80211_vif *vif, if (err) return err; - err = mt7996_mcu_add_rate_ctrl(dev, vif, link_conf, - link_sta, link, - msta_link, false); + err = mt7996_mcu_add_rate_ctrl(dev, msta_link->sta, vif, + link_id, false); if (err) return err; diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c index f0adc0b4b8b6..994526c65bfc 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c @@ -555,7 +555,7 @@ mt7996_mcu_rx_all_sta_info_event(struct mt7996_dev *dev, struct sk_buff *skb) switch (le16_to_cpu(res->tag)) { case UNI_ALL_STA_TXRX_RATE: wlan_idx = le16_to_cpu(res->rate[i].wlan_idx); - wcid = rcu_dereference(dev->mt76.wcid[wlan_idx]); + wcid = mt76_wcid_ptr(dev, wlan_idx); if (!wcid) break; @@ -565,7 +565,7 @@ mt7996_mcu_rx_all_sta_info_event(struct mt7996_dev *dev, struct sk_buff *skb) break; case UNI_ALL_STA_TXRX_ADM_STAT: wlan_idx = le16_to_cpu(res->adm_stat[i].wlan_idx); - wcid = rcu_dereference(dev->mt76.wcid[wlan_idx]); + wcid = mt76_wcid_ptr(dev, wlan_idx); if (!wcid) break; @@ -579,7 +579,7 @@ mt7996_mcu_rx_all_sta_info_event(struct mt7996_dev *dev, struct sk_buff *skb) break; case UNI_ALL_STA_TXRX_MSDU_COUNT: wlan_idx = le16_to_cpu(res->msdu_cnt[i].wlan_idx); - wcid = rcu_dereference(dev->mt76.wcid[wlan_idx]); + wcid = mt76_wcid_ptr(dev, wlan_idx); if (!wcid) break; @@ -676,10 +676,7 @@ mt7996_mcu_wed_rro_event(struct mt7996_dev *dev, struct sk_buff *skb) e = (void *)skb->data; idx = le16_to_cpu(e->wlan_id); - if (idx >= ARRAY_SIZE(dev->mt76.wcid)) - break; - - wcid = rcu_dereference(dev->mt76.wcid[idx]); + wcid = mt76_wcid_ptr(dev, idx); if (!wcid || !wcid->sta) break; @@ -1905,22 +1902,35 @@ int mt7996_mcu_set_fixed_rate_ctrl(struct mt7996_dev *dev, MCU_WM_UNI_CMD(RA), true); } -int mt7996_mcu_set_fixed_field(struct mt7996_dev *dev, - struct ieee80211_link_sta *link_sta, - struct mt7996_vif_link *link, - struct mt7996_sta_link *msta_link, - void *data, u32 field) +int mt7996_mcu_set_fixed_field(struct mt7996_dev *dev, struct mt7996_sta *msta, + void *data, u8 link_id, u32 field) { - struct sta_phy_uni *phy = data; + struct mt7996_vif *mvif = msta->vif; + struct mt7996_sta_link *msta_link; struct sta_rec_ra_fixed_uni *ra; + struct sta_phy_uni *phy = data; + struct mt76_vif_link *mlink; struct sk_buff *skb; + int err = -ENODEV; struct tlv *tlv; - skb = __mt76_connac_mcu_alloc_sta_req(&dev->mt76, &link->mt76, + rcu_read_lock(); + + mlink = rcu_dereference(mvif->mt76.link[link_id]); + if (!mlink) + goto error_unlock; + + msta_link = rcu_dereference(msta->link[link_id]); + if (!msta_link) + goto error_unlock; + + skb = __mt76_connac_mcu_alloc_sta_req(&dev->mt76, mlink, &msta_link->wcid, MT7996_STA_UPDATE_MAX_SIZE); - if (IS_ERR(skb)) - return PTR_ERR(skb); + if (IS_ERR(skb)) { + err = PTR_ERR(skb); + goto error_unlock; + } tlv = mt76_connac_mcu_add_tlv(skb, STA_REC_RA_UPDATE, sizeof(*ra)); ra = (struct sta_rec_ra_fixed_uni *)tlv; @@ -1935,106 +1945,149 @@ int mt7996_mcu_set_fixed_field(struct mt7996_dev *dev, if (phy) ra->phy = *phy; break; - case RATE_PARAM_MMPS_UPDATE: + case RATE_PARAM_MMPS_UPDATE: { + struct ieee80211_sta *sta = wcid_to_sta(&msta_link->wcid); + struct ieee80211_link_sta *link_sta; + + link_sta = rcu_dereference(sta->link[link_id]); + if (!link_sta) { + dev_kfree_skb(skb); + goto error_unlock; + } + ra->mmps_mode = mt7996_mcu_get_mmps_mode(link_sta->smps_mode); break; + } default: break; } ra->field = cpu_to_le32(field); + rcu_read_unlock(); + return mt76_mcu_skb_send_msg(&dev->mt76, skb, MCU_WMWA_UNI_CMD(STA_REC_UPDATE), true); +error_unlock: + rcu_read_unlock(); + + return err; } static int -mt7996_mcu_add_rate_ctrl_fixed(struct mt7996_dev *dev, - struct ieee80211_link_sta *link_sta, - struct mt7996_vif_link *link, - struct mt7996_sta_link *msta_link) +mt7996_mcu_add_rate_ctrl_fixed(struct mt7996_dev *dev, struct mt7996_sta *msta, + struct ieee80211_vif *vif, u8 link_id) { - struct cfg80211_chan_def *chandef = &link->phy->mt76->chandef; - struct cfg80211_bitrate_mask *mask = &link->bitrate_mask; - enum nl80211_band band = chandef->chan->band; + struct ieee80211_link_sta *link_sta; + struct cfg80211_bitrate_mask mask; + struct mt7996_sta_link *msta_link; + struct mt7996_vif_link *link; struct sta_phy_uni phy = {}; - int ret, nrates = 0; + struct ieee80211_sta *sta; + int ret, nrates = 0, idx; + enum nl80211_band band; + bool has_he; #define __sta_phy_bitrate_mask_check(_mcs, _gi, _ht, _he) \ do { \ - u8 i, gi = mask->control[band]._gi; \ + u8 i, gi = mask.control[band]._gi; \ gi = (_he) ? gi : gi == NL80211_TXRATE_FORCE_SGI; \ phy.sgi = gi; \ - phy.he_ltf = mask->control[band].he_ltf; \ - for (i = 0; i < ARRAY_SIZE(mask->control[band]._mcs); i++) { \ - if (!mask->control[band]._mcs[i]) \ + phy.he_ltf = mask.control[band].he_ltf; \ + for (i = 0; i < ARRAY_SIZE(mask.control[band]._mcs); i++) { \ + if (!mask.control[band]._mcs[i]) \ continue; \ - nrates += hweight16(mask->control[band]._mcs[i]); \ - phy.mcs = ffs(mask->control[band]._mcs[i]) - 1; \ + nrates += hweight16(mask.control[band]._mcs[i]); \ + phy.mcs = ffs(mask.control[band]._mcs[i]) - 1; \ if (_ht) \ phy.mcs += 8 * i; \ } \ } while (0) - if (link_sta->he_cap.has_he) { + rcu_read_lock(); + + link = mt7996_vif_link(dev, vif, link_id); + if (!link) + goto error_unlock; + + msta_link = rcu_dereference(msta->link[link_id]); + if (!msta_link) + goto error_unlock; + + sta = wcid_to_sta(&msta_link->wcid); + link_sta = rcu_dereference(sta->link[link_id]); + if (!link_sta) + goto error_unlock; + + band = link->phy->mt76->chandef.chan->band; + has_he = link_sta->he_cap.has_he; + mask = link->bitrate_mask; + idx = msta_link->wcid.idx; + + if (has_he) { __sta_phy_bitrate_mask_check(he_mcs, he_gi, 0, 1); } else if (link_sta->vht_cap.vht_supported) { __sta_phy_bitrate_mask_check(vht_mcs, gi, 0, 0); } else if (link_sta->ht_cap.ht_supported) { __sta_phy_bitrate_mask_check(ht_mcs, gi, 1, 0); } else { - nrates = hweight32(mask->control[band].legacy); - phy.mcs = ffs(mask->control[band].legacy) - 1; + nrates = hweight32(mask.control[band].legacy); + phy.mcs = ffs(mask.control[band].legacy) - 1; } + + rcu_read_unlock(); + #undef __sta_phy_bitrate_mask_check /* fall back to auto rate control */ - if (mask->control[band].gi == NL80211_TXRATE_DEFAULT_GI && - mask->control[band].he_gi == GENMASK(7, 0) && - mask->control[band].he_ltf == GENMASK(7, 0) && + if (mask.control[band].gi == NL80211_TXRATE_DEFAULT_GI && + mask.control[band].he_gi == GENMASK(7, 0) && + mask.control[band].he_ltf == GENMASK(7, 0) && nrates != 1) return 0; /* fixed single rate */ if (nrates == 1) { - ret = mt7996_mcu_set_fixed_field(dev, link_sta, link, - msta_link, &phy, + ret = mt7996_mcu_set_fixed_field(dev, msta, &phy, link_id, RATE_PARAM_FIXED_MCS); if (ret) return ret; } /* fixed GI */ - if (mask->control[band].gi != NL80211_TXRATE_DEFAULT_GI || - mask->control[band].he_gi != GENMASK(7, 0)) { + if (mask.control[band].gi != NL80211_TXRATE_DEFAULT_GI || + mask.control[band].he_gi != GENMASK(7, 0)) { u32 addr; /* firmware updates only TXCMD but doesn't take WTBL into * account, so driver should update here to reflect the * actual txrate hardware sends out. */ - addr = mt7996_mac_wtbl_lmac_addr(dev, msta_link->wcid.idx, 7); - if (link_sta->he_cap.has_he) + addr = mt7996_mac_wtbl_lmac_addr(dev, idx, 7); + if (has_he) mt76_rmw_field(dev, addr, GENMASK(31, 24), phy.sgi); else mt76_rmw_field(dev, addr, GENMASK(15, 12), phy.sgi); - ret = mt7996_mcu_set_fixed_field(dev, link_sta, link, - msta_link, &phy, + ret = mt7996_mcu_set_fixed_field(dev, msta, &phy, link_id, RATE_PARAM_FIXED_GI); if (ret) return ret; } /* fixed HE_LTF */ - if (mask->control[band].he_ltf != GENMASK(7, 0)) { - ret = mt7996_mcu_set_fixed_field(dev, link_sta, link, - msta_link, &phy, + if (mask.control[band].he_ltf != GENMASK(7, 0)) { + ret = mt7996_mcu_set_fixed_field(dev, msta, &phy, link_id, RATE_PARAM_FIXED_HE_LTF); if (ret) return ret; } return 0; + +error_unlock: + rcu_read_unlock(); + + return -ENODEV; } static void @@ -2145,21 +2198,44 @@ mt7996_mcu_sta_rate_ctrl_tlv(struct sk_buff *skb, struct mt7996_dev *dev, memset(ra->rx_rcpi, INIT_RCPI, sizeof(ra->rx_rcpi)); } -int mt7996_mcu_add_rate_ctrl(struct mt7996_dev *dev, - struct ieee80211_vif *vif, - struct ieee80211_bss_conf *link_conf, - struct ieee80211_link_sta *link_sta, - struct mt7996_vif_link *link, - struct mt7996_sta_link *msta_link, bool changed) +int mt7996_mcu_add_rate_ctrl(struct mt7996_dev *dev, struct mt7996_sta *msta, + struct ieee80211_vif *vif, u8 link_id, + bool changed) { + struct ieee80211_bss_conf *link_conf; + struct ieee80211_link_sta *link_sta; + struct mt7996_sta_link *msta_link; + struct mt7996_vif_link *link; + struct ieee80211_sta *sta; struct sk_buff *skb; - int ret; + int ret = -ENODEV; + + rcu_read_lock(); + + link = mt7996_vif_link(dev, vif, link_id); + if (!link) + goto error_unlock; + + msta_link = rcu_dereference(msta->link[link_id]); + if (!msta_link) + goto error_unlock; + + sta = wcid_to_sta(&msta_link->wcid); + link_sta = rcu_dereference(sta->link[link_id]); + if (!link_sta) + goto error_unlock; + + link_conf = rcu_dereference(vif->link_conf[link_id]); + if (!link_conf) + goto error_unlock; skb = __mt76_connac_mcu_alloc_sta_req(&dev->mt76, &link->mt76, &msta_link->wcid, MT7996_STA_UPDATE_MAX_SIZE); - if (IS_ERR(skb)) - return PTR_ERR(skb); + if (IS_ERR(skb)) { + ret = PTR_ERR(skb); + goto error_unlock; + } /* firmware rc algorithm refers to sta_rec_he for HE control. * once dev->rc_work changes the settings driver should also @@ -2173,12 +2249,19 @@ int mt7996_mcu_add_rate_ctrl(struct mt7996_dev *dev, */ mt7996_mcu_sta_rate_ctrl_tlv(skb, dev, vif, link_conf, link_sta, link); + rcu_read_unlock(); + ret = mt76_mcu_skb_send_msg(&dev->mt76, skb, MCU_WMWA_UNI_CMD(STA_REC_UPDATE), true); if (ret) return ret; - return mt7996_mcu_add_rate_ctrl_fixed(dev, link_sta, link, msta_link); + return mt7996_mcu_add_rate_ctrl_fixed(dev, msta, vif, link_id); + +error_unlock: + rcu_read_unlock(); + + return ret; } static int diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h b/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h index 1ad6bc046f7c..33ac16b64ef1 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h @@ -620,23 +620,17 @@ int mt7996_mcu_beacon_inband_discov(struct mt7996_dev *dev, int mt7996_mcu_add_obss_spr(struct mt7996_phy *phy, struct mt7996_vif_link *link, struct ieee80211_he_obss_pd *he_obss_pd); -int mt7996_mcu_add_rate_ctrl(struct mt7996_dev *dev, - struct ieee80211_vif *vif, - struct ieee80211_bss_conf *link_conf, - struct ieee80211_link_sta *link_sta, - struct mt7996_vif_link *link, - struct mt7996_sta_link *msta_link, bool changed); +int mt7996_mcu_add_rate_ctrl(struct mt7996_dev *dev, struct mt7996_sta *msta, + struct ieee80211_vif *vif, u8 link_id, + bool changed); int mt7996_set_channel(struct mt76_phy *mphy); int mt7996_mcu_set_chan_info(struct mt7996_phy *phy, u16 tag); int mt7996_mcu_set_tx(struct mt7996_dev *dev, struct ieee80211_vif *vif, struct ieee80211_bss_conf *link_conf); int mt7996_mcu_set_fixed_rate_ctrl(struct mt7996_dev *dev, void *data, u16 version); -int mt7996_mcu_set_fixed_field(struct mt7996_dev *dev, - struct ieee80211_link_sta *link_sta, - struct mt7996_vif_link *link, - struct mt7996_sta_link *msta_link, - void *data, u32 field); +int mt7996_mcu_set_fixed_field(struct mt7996_dev *dev, struct mt7996_sta *msta, + void *data, u8 link_id, u32 field); int mt7996_mcu_set_eeprom(struct mt7996_dev *dev); int mt7996_mcu_get_eeprom(struct mt7996_dev *dev, u32 offset, u8 *buf, u32 buf_len); int mt7996_mcu_get_eeprom_free_block(struct mt7996_dev *dev, u8 *block_num); diff --git a/drivers/net/wireless/mediatek/mt76/tx.c b/drivers/net/wireless/mediatek/mt76/tx.c index 513916469ca2..e6cf16706667 100644 --- a/drivers/net/wireless/mediatek/mt76/tx.c +++ b/drivers/net/wireless/mediatek/mt76/tx.c @@ -64,7 +64,7 @@ mt76_tx_status_unlock(struct mt76_dev *dev, struct sk_buff_head *list) struct mt76_tx_cb *cb = mt76_tx_skb_cb(skb); struct mt76_wcid *wcid; - wcid = rcu_dereference(dev->wcid[cb->wcid]); + wcid = __mt76_wcid_ptr(dev, cb->wcid); if (wcid) { status.sta = wcid_to_sta(wcid); if (status.sta && (wcid->rate.flags || wcid->rate.legacy)) { @@ -251,9 +251,7 @@ void __mt76_tx_complete_skb(struct mt76_dev *dev, u16 wcid_idx, struct sk_buff * rcu_read_lock(); - if (wcid_idx < ARRAY_SIZE(dev->wcid)) - wcid = rcu_dereference(dev->wcid[wcid_idx]); - + wcid = __mt76_wcid_ptr(dev, wcid_idx); mt76_tx_check_non_aql(dev, wcid, skb); #ifdef CONFIG_NL80211_TESTMODE @@ -538,7 +536,7 @@ mt76_txq_schedule_list(struct mt76_phy *phy, enum mt76_txq_id qid) break; mtxq = (struct mt76_txq *)txq->drv_priv; - wcid = rcu_dereference(dev->wcid[mtxq->wcid]); + wcid = __mt76_wcid_ptr(dev, mtxq->wcid); if (!wcid || test_bit(MT_WCID_FLAG_PS, &wcid->flags)) continue; @@ -617,7 +615,8 @@ mt76_txq_schedule_pending_wcid(struct mt76_phy *phy, struct mt76_wcid *wcid, if ((dev->drv->drv_flags & MT_DRV_HW_MGMT_TXQ) && !(info->flags & IEEE80211_TX_CTL_HW_80211_ENCAP) && !ieee80211_is_data(hdr->frame_control) && - !ieee80211_is_bufferable_mmpdu(skb)) + (!ieee80211_is_bufferable_mmpdu(skb) || + ieee80211_is_deauth(hdr->frame_control))) qid = MT_TXQ_PSD; q = phy->q_tx[qid]; diff --git a/drivers/net/wireless/mediatek/mt76/util.c b/drivers/net/wireless/mediatek/mt76/util.c index 95b3dc96e4c4..97249ebb4bc8 100644 --- a/drivers/net/wireless/mediatek/mt76/util.c +++ b/drivers/net/wireless/mediatek/mt76/util.c @@ -83,7 +83,7 @@ int mt76_get_min_avg_rssi(struct mt76_dev *dev, u8 phy_idx) if (!(mask & 1)) continue; - wcid = rcu_dereference(dev->wcid[j]); + wcid = __mt76_wcid_ptr(dev, j); if (!wcid || wcid->phy_idx != phy_idx) continue; diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00soc.c b/drivers/net/wireless/ralink/rt2x00/rt2x00soc.c index eface610178d..f7f3a2340c39 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2x00soc.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2x00soc.c @@ -108,7 +108,7 @@ exit_free_device: } EXPORT_SYMBOL_GPL(rt2x00soc_probe); -int rt2x00soc_remove(struct platform_device *pdev) +void rt2x00soc_remove(struct platform_device *pdev) { struct ieee80211_hw *hw = platform_get_drvdata(pdev); struct rt2x00_dev *rt2x00dev = hw->priv; @@ -119,8 +119,6 @@ int rt2x00soc_remove(struct platform_device *pdev) rt2x00lib_remove_dev(rt2x00dev); rt2x00soc_free_reg(rt2x00dev); ieee80211_free_hw(hw); - - return 0; } EXPORT_SYMBOL_GPL(rt2x00soc_remove); diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00soc.h b/drivers/net/wireless/ralink/rt2x00/rt2x00soc.h index 021fd06b3627..d6226b8a10e0 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2x00soc.h +++ b/drivers/net/wireless/ralink/rt2x00/rt2x00soc.h @@ -17,7 +17,7 @@ * SoC driver handlers. */ int rt2x00soc_probe(struct platform_device *pdev, const struct rt2x00_ops *ops); -int rt2x00soc_remove(struct platform_device *pdev); +void rt2x00soc_remove(struct platform_device *pdev); #ifdef CONFIG_PM int rt2x00soc_suspend(struct platform_device *pdev, pm_message_t state); int rt2x00soc_resume(struct platform_device *pdev); diff --git a/drivers/net/wireless/zydas/zd1211rw/zd_mac.c b/drivers/net/wireless/zydas/zd1211rw/zd_mac.c index 9653dbaac3c0..781510a3ec6d 100644 --- a/drivers/net/wireless/zydas/zd1211rw/zd_mac.c +++ b/drivers/net/wireless/zydas/zd1211rw/zd_mac.c @@ -583,7 +583,11 @@ void zd_mac_tx_to_dev(struct sk_buff *skb, int error) skb_queue_tail(q, skb); while (skb_queue_len(q) > ZD_MAC_MAX_ACK_WAITERS) { - zd_mac_tx_status(hw, skb_dequeue(q), + skb = skb_dequeue(q); + if (!skb) + break; + + zd_mac_tx_status(hw, skb, mac->ack_pending ? mac->ack_signal : 0, NULL); mac->ack_pending = 0; diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 92697f98c601..7493e5aa984c 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -386,7 +386,7 @@ static void nvme_log_err_passthru(struct request *req) nr->cmd->common.cdw12, nr->cmd->common.cdw13, nr->cmd->common.cdw14, - nr->cmd->common.cdw14); + nr->cmd->common.cdw15); } enum nvme_disposition { @@ -2015,21 +2015,41 @@ static void nvme_configure_metadata(struct nvme_ctrl *ctrl, } -static void nvme_update_atomic_write_disk_info(struct nvme_ns *ns, - struct nvme_id_ns *id, struct queue_limits *lim, - u32 bs, u32 atomic_bs) +static u32 nvme_configure_atomic_write(struct nvme_ns *ns, + struct nvme_id_ns *id, struct queue_limits *lim, u32 bs) { - unsigned int boundary = 0; + u32 atomic_bs, boundary = 0; - if (id->nsfeat & NVME_NS_FEAT_ATOMICS && id->nawupf) { - if (le16_to_cpu(id->nabspf)) + /* + * We do not support an offset for the atomic boundaries. + */ + if (id->nabo) + return bs; + + if ((id->nsfeat & NVME_NS_FEAT_ATOMICS) && id->nawupf) { + /* + * Use the per-namespace atomic write unit when available. + */ + atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs; + if (id->nabspf) boundary = (le16_to_cpu(id->nabspf) + 1) * bs; + } else { + /* + * Use the controller wide atomic write unit. This sucks + * because the limit is defined in terms of logical blocks while + * namespaces can have different formats, and because there is + * no clear language in the specification prohibiting different + * values for different controllers in the subsystem. + */ + atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs; } + lim->atomic_write_hw_max = atomic_bs; lim->atomic_write_hw_boundary = boundary; lim->atomic_write_hw_unit_min = bs; lim->atomic_write_hw_unit_max = rounddown_pow_of_two(atomic_bs); lim->features |= BLK_FEAT_ATOMIC_WRITES; + return atomic_bs; } static u32 nvme_max_drv_segments(struct nvme_ctrl *ctrl) @@ -2067,34 +2087,8 @@ static bool nvme_update_disk_info(struct nvme_ns *ns, struct nvme_id_ns *id, valid = false; } - atomic_bs = phys_bs = bs; - if (id->nabo == 0) { - /* - * Bit 1 indicates whether NAWUPF is defined for this namespace - * and whether it should be used instead of AWUPF. If NAWUPF == - * 0 then AWUPF must be used instead. - */ - if (id->nsfeat & NVME_NS_FEAT_ATOMICS && id->nawupf) - atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs; - else - atomic_bs = (1 + ns->ctrl->awupf) * bs; - - /* - * Set subsystem atomic bs. - */ - if (ns->ctrl->subsys->atomic_bs) { - if (atomic_bs != ns->ctrl->subsys->atomic_bs) { - dev_err_ratelimited(ns->ctrl->device, - "%s: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=%d bytes, Controller/Namespace=%d bytes\n", - ns->disk ? ns->disk->disk_name : "?", - ns->ctrl->subsys->atomic_bs, - atomic_bs); - } - } else - ns->ctrl->subsys->atomic_bs = atomic_bs; - - nvme_update_atomic_write_disk_info(ns, id, lim, bs, atomic_bs); - } + phys_bs = bs; + atomic_bs = nvme_configure_atomic_write(ns, id, lim, bs); if (id->nsfeat & NVME_NS_FEAT_IO_OPT) { /* NPWG = Namespace Preferred Write Granularity */ @@ -2382,16 +2376,6 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, if (!nvme_update_disk_info(ns, id, &lim)) capacity = 0; - /* - * Validate the max atomic write size fits within the subsystem's - * atomic write capabilities. - */ - if (lim.atomic_write_hw_max > ns->ctrl->subsys->atomic_bs) { - blk_mq_unfreeze_queue(ns->disk->queue, memflags); - ret = -ENXIO; - goto out; - } - nvme_config_discard(ns, &lim); if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && ns->head->ids.csi == NVME_CSI_ZNS) @@ -3215,6 +3199,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) memcpy(subsys->model, id->mn, sizeof(subsys->model)); subsys->vendor_id = le16_to_cpu(id->vid); subsys->cmic = id->cmic; + subsys->awupf = le16_to_cpu(id->awupf); /* Versions prior to 1.4 don't necessarily report a valid type */ if (id->cntrltype == NVME_CTRL_DISC || @@ -3552,6 +3537,15 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl) if (ret) goto out_free; } + + if (le16_to_cpu(id->awupf) != ctrl->subsys->awupf) { + dev_err_ratelimited(ctrl->device, + "inconsistent AWUPF, controller not added (%u/%u).\n", + le16_to_cpu(id->awupf), ctrl->subsys->awupf); + ret = -EINVAL; + goto out_free; + } + memcpy(ctrl->subsys->firmware_rev, id->fr, sizeof(ctrl->subsys->firmware_rev)); @@ -3647,7 +3641,6 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl) dev_pm_qos_expose_latency_tolerance(ctrl->device); else if (!ctrl->apst_enabled && prev_apst_enabled) dev_pm_qos_hide_latency_tolerance(ctrl->device); - ctrl->awupf = le16_to_cpu(id->awupf); out_free: kfree(id); return ret; @@ -4036,6 +4029,10 @@ static int nvme_init_ns_head(struct nvme_ns *ns, struct nvme_ns_info *info) list_add_tail_rcu(&ns->siblings, &head->list); ns->head = head; mutex_unlock(&ctrl->subsys->lock); + +#ifdef CONFIG_NVME_MULTIPATH + cancel_delayed_work(&head->remove_work); +#endif return 0; out_put_ns_head: @@ -4089,6 +4086,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info) struct nvme_ns *ns; struct gendisk *disk; int node = ctrl->numa_node; + bool last_path = false; ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node); if (!ns) @@ -4181,9 +4179,22 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info) out_unlink_ns: mutex_lock(&ctrl->subsys->lock); list_del_rcu(&ns->siblings); - if (list_empty(&ns->head->list)) + if (list_empty(&ns->head->list)) { list_del_init(&ns->head->entry); + /* + * If multipath is not configured, we still create a namespace + * head (nshead), but head->disk is not initialized in that + * case. As a result, only a single reference to nshead is held + * (via kref_init()) when it is created. Therefore, ensure that + * we do not release the reference to nshead twice if head->disk + * is not present. + */ + if (ns->head->disk) + last_path = true; + } mutex_unlock(&ctrl->subsys->lock); + if (last_path) + nvme_put_ns_head(ns->head); nvme_put_ns_head(ns->head); out_cleanup_disk: put_disk(disk); diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index e040e467f9fa..3da980dc60d9 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -690,8 +690,8 @@ static void nvme_remove_head(struct nvme_ns_head *head) nvme_cdev_del(&head->cdev, &head->cdev_device); synchronize_srcu(&head->srcu); del_gendisk(head->disk); - nvme_put_ns_head(head); } + nvme_put_ns_head(head); } static void nvme_remove_head_work(struct work_struct *work) @@ -1200,7 +1200,8 @@ void nvme_mpath_add_sysfs_link(struct nvme_ns_head *head) */ srcu_idx = srcu_read_lock(&head->srcu); - list_for_each_entry_rcu(ns, &head->list, siblings) { + list_for_each_entry_srcu(ns, &head->list, siblings, + srcu_read_lock_held(&head->srcu)) { /* * Ensure that ns path disk node is already added otherwise we * may get invalid kobj name for target @@ -1291,6 +1292,9 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head) { bool remove = false; + if (!head->disk) + return; + mutex_lock(&head->subsys->lock); /* * We are called when all paths have been removed, and at that point @@ -1311,7 +1315,7 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head) */ if (!try_module_get(THIS_MODULE)) goto out; - queue_delayed_work(nvme_wq, &head->remove_work, + mod_delayed_work(nvme_wq, &head->remove_work, head->delayed_removal_secs * HZ); } else { list_del_init(&head->entry); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index a468cdc5b5cb..7df2ea21851f 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -410,7 +410,6 @@ struct nvme_ctrl { enum nvme_ctrl_type cntrltype; enum nvme_dctype dctype; - u16 awupf; /* 0's based value. */ }; static inline enum nvme_ctrl_state nvme_ctrl_state(struct nvme_ctrl *ctrl) @@ -443,11 +442,11 @@ struct nvme_subsystem { u8 cmic; enum nvme_subsys_type subtype; u16 vendor_id; + u16 awupf; /* 0's based value. */ struct ida ns_ida; #ifdef CONFIG_NVME_MULTIPATH enum nvme_iopolicy iopolicy; #endif - u32 atomic_bs; }; /* diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 8ff12e415cb5..320aaa41ec39 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2101,8 +2101,6 @@ static void nvme_map_cmb(struct nvme_dev *dev) if ((dev->cmbsz & (NVME_CMBSZ_WDS | NVME_CMBSZ_RDS)) == (NVME_CMBSZ_WDS | NVME_CMBSZ_RDS)) pci_p2pmem_publish(pdev, true); - - nvme_update_attrs(dev); } static int nvme_set_host_mem(struct nvme_dev *dev, u32 bits) @@ -3010,6 +3008,8 @@ static void nvme_reset_work(struct work_struct *work) if (result < 0) goto out; + nvme_update_attrs(dev); + result = nvme_setup_io_queues(dev); if (result) goto out; @@ -3343,6 +3343,8 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (result < 0) goto out_disable; + nvme_update_attrs(dev); + result = nvme_setup_io_queues(dev); if (result) goto out_disable; diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index df69a9dee71c..51df72f5e89b 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -867,6 +867,8 @@ static inline void nvmet_req_bio_put(struct nvmet_req *req, struct bio *bio) { if (bio != &req->b.inline_bio) bio_put(bio); + else + bio_uninit(bio); } #ifdef CONFIG_NVME_TARGET_TCP_TLS diff --git a/drivers/pci/controller/pci-host-common.c b/drivers/pci/controller/pci-host-common.c index b0992325dd65..b37052863847 100644 --- a/drivers/pci/controller/pci-host-common.c +++ b/drivers/pci/controller/pci-host-common.c @@ -64,13 +64,13 @@ int pci_host_common_init(struct platform_device *pdev, of_pci_check_probe_only(); + platform_set_drvdata(pdev, bridge); + /* Parse and map our Configuration Space windows */ cfg = gen_pci_init(dev, bridge, ops); if (IS_ERR(cfg)) return PTR_ERR(cfg); - platform_set_drvdata(pdev, bridge); - bridge->sysdata = cfg; bridge->ops = (struct pci_ops *)&ops->pci_ops; bridge->enable_device = ops->enable_device; diff --git a/drivers/pci/controller/pcie-apple.c b/drivers/pci/controller/pcie-apple.c index 77fe73976654..0380d300adca 100644 --- a/drivers/pci/controller/pcie-apple.c +++ b/drivers/pci/controller/pcie-apple.c @@ -187,6 +187,7 @@ struct apple_pcie { const struct hw_info *hw; unsigned long *bitmap; struct list_head ports; + struct list_head entry; struct completion event; struct irq_fwspec fwspec; u32 nvecs; @@ -205,6 +206,9 @@ struct apple_pcie_port { int idx; }; +static LIST_HEAD(pcie_list); +static DEFINE_MUTEX(pcie_list_lock); + static void rmw_set(u32 set, void __iomem *addr) { writel_relaxed(readl_relaxed(addr) | set, addr); @@ -720,13 +724,45 @@ static int apple_msi_init(struct apple_pcie *pcie) return 0; } +static void apple_pcie_register(struct apple_pcie *pcie) +{ + guard(mutex)(&pcie_list_lock); + + list_add_tail(&pcie->entry, &pcie_list); +} + +static void apple_pcie_unregister(struct apple_pcie *pcie) +{ + guard(mutex)(&pcie_list_lock); + + list_del(&pcie->entry); +} + +static struct apple_pcie *apple_pcie_lookup(struct device *dev) +{ + struct apple_pcie *pcie; + + guard(mutex)(&pcie_list_lock); + + list_for_each_entry(pcie, &pcie_list, entry) { + if (pcie->dev == dev) + return pcie; + } + + return NULL; +} + static struct apple_pcie_port *apple_pcie_get_port(struct pci_dev *pdev) { struct pci_config_window *cfg = pdev->sysdata; - struct apple_pcie *pcie = cfg->priv; + struct apple_pcie *pcie; struct pci_dev *port_pdev; struct apple_pcie_port *port; + pcie = apple_pcie_lookup(cfg->parent); + if (WARN_ON(!pcie)) + return NULL; + /* Find the root port this device is on */ port_pdev = pcie_find_root_port(pdev); @@ -806,10 +842,14 @@ static void apple_pcie_disable_device(struct pci_host_bridge *bridge, struct pci static int apple_pcie_init(struct pci_config_window *cfg) { - struct apple_pcie *pcie = cfg->priv; struct device *dev = cfg->parent; + struct apple_pcie *pcie; int ret; + pcie = apple_pcie_lookup(dev); + if (WARN_ON(!pcie)) + return -ENOENT; + for_each_available_child_of_node_scoped(dev->of_node, of_port) { ret = apple_pcie_setup_port(pcie, of_port); if (ret) { @@ -852,13 +892,18 @@ static int apple_pcie_probe(struct platform_device *pdev) mutex_init(&pcie->lock); INIT_LIST_HEAD(&pcie->ports); - dev_set_drvdata(dev, pcie); ret = apple_msi_init(pcie); if (ret) return ret; - return pci_host_common_init(pdev, &apple_pcie_cfg_ecam_ops); + apple_pcie_register(pcie); + + ret = pci_host_common_init(pdev, &apple_pcie_cfg_ecam_ops); + if (ret) + apple_pcie_unregister(pcie); + + return ret; } static const struct of_device_id apple_pcie_of_match[] = { diff --git a/drivers/pci/ecam.c b/drivers/pci/ecam.c index 2c5e6446e00e..260b7de2dbd5 100644 --- a/drivers/pci/ecam.c +++ b/drivers/pci/ecam.c @@ -84,8 +84,6 @@ struct pci_config_window *pci_ecam_create(struct device *dev, goto err_exit_iomap; } - cfg->priv = dev_get_drvdata(dev); - if (ops->init) { err = ops->init(cfg); if (err) diff --git a/drivers/pci/msi/msi.c b/drivers/pci/msi/msi.c index 6ede55a7c5e6..d686488f4111 100644 --- a/drivers/pci/msi/msi.c +++ b/drivers/pci/msi/msi.c @@ -934,10 +934,12 @@ int pci_msix_write_tph_tag(struct pci_dev *pdev, unsigned int index, u16 tag) if (!pdev->msix_enabled) return -ENXIO; - guard(msi_descs_lock)(&pdev->dev); virq = msi_get_virq(&pdev->dev, index); if (!virq) return -ENXIO; + + guard(msi_descs_lock)(&pdev->dev); + /* * This is a horrible hack, but short of implementing a PCI * specific interrupt chip callback and a huge pile of diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index b78e0e417324..af370628e583 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -1676,19 +1676,24 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) return NULL; root_ops = kzalloc(sizeof(*root_ops), GFP_KERNEL); - if (!root_ops) - goto free_ri; + if (!root_ops) { + kfree(ri); + return NULL; + } ri->cfg = pci_acpi_setup_ecam_mapping(root); - if (!ri->cfg) - goto free_root_ops; + if (!ri->cfg) { + kfree(ri); + kfree(root_ops); + return NULL; + } root_ops->release_info = pci_acpi_generic_release_info; root_ops->prepare_resources = pci_acpi_root_prepare_resources; root_ops->pci_ops = (struct pci_ops *)&ri->cfg->ops->pci_ops; bus = acpi_pci_root_create(root, root_ops, &ri->common, ri->cfg); if (!bus) - goto free_cfg; + return NULL; /* If we must preserve the resource configuration, claim now */ host = pci_find_host_bridge(bus); @@ -1705,14 +1710,6 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) pcie_bus_configure_settings(child); return bus; - -free_cfg: - pci_ecam_free(ri->cfg); -free_root_ops: - kfree(root_ops); -free_ri: - kfree(ri); - return NULL; } void pcibios_add_bus(struct pci_bus *bus) diff --git a/drivers/pci/pcie/ptm.c b/drivers/pci/pcie/ptm.c index ee5f615a9023..4bd73f038ffb 100644 --- a/drivers/pci/pcie/ptm.c +++ b/drivers/pci/pcie/ptm.c @@ -254,6 +254,7 @@ bool pcie_ptm_enabled(struct pci_dev *dev) } EXPORT_SYMBOL(pcie_ptm_enabled); +#if IS_ENABLED(CONFIG_DEBUG_FS) static ssize_t context_update_write(struct file *file, const char __user *ubuf, size_t count, loff_t *ppos) { @@ -552,3 +553,4 @@ void pcie_ptm_destroy_debugfs(struct pci_ptm_debugfs *ptm_debugfs) debugfs_remove_recursive(ptm_debugfs->debugfs); } EXPORT_SYMBOL_GPL(pcie_ptm_destroy_debugfs); +#endif diff --git a/drivers/pinctrl/nuvoton/pinctrl-ma35.c b/drivers/pinctrl/nuvoton/pinctrl-ma35.c index 06ae1fe8b8c5..b51704bafd81 100644 --- a/drivers/pinctrl/nuvoton/pinctrl-ma35.c +++ b/drivers/pinctrl/nuvoton/pinctrl-ma35.c @@ -1074,7 +1074,10 @@ static int ma35_pinctrl_probe_dt(struct platform_device *pdev, struct ma35_pinct u32 idx = 0; int ret; - for_each_gpiochip_node(dev, child) { + device_for_each_child_node(dev, child) { + if (fwnode_property_present(child, "gpio-controller")) + continue; + npctl->nfunctions++; npctl->ngroups += of_get_child_count(to_of_node(child)); } @@ -1092,7 +1095,10 @@ static int ma35_pinctrl_probe_dt(struct platform_device *pdev, struct ma35_pinct if (!npctl->groups) return -ENOMEM; - for_each_gpiochip_node(dev, child) { + device_for_each_child_node(dev, child) { + if (fwnode_property_present(child, "gpio-controller")) + continue; + ret = ma35_pinctrl_parse_functions(child, npctl, idx++); if (ret) { fwnode_handle_put(child); diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index 5cf3db6d78b7..b3f0d02aeeb3 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -979,6 +979,17 @@ static int amd_gpio_suspend_hibernate_common(struct device *dev, bool is_suspend pin, is_suspend ? "suspend" : "hibernate"); } + /* + * debounce enabled over suspend has shown issues with a GPIO + * being unable to wake the system, as we're only interested in + * the actual wakeup event, clear it. + */ + if (gpio_dev->saved_regs[i] & (DB_CNTRl_MASK << DB_CNTRL_OFF)) { + amd_gpio_set_debounce(gpio_dev, pin, 0); + pm_pr_dbg("Clearing debounce for GPIO #%d during %s.\n", + pin, is_suspend ? "suspend" : "hibernate"); + } + raw_spin_unlock_irqrestore(&gpio_dev->lock, flags); } diff --git a/drivers/pinctrl/pinctrl-aw9523.c b/drivers/pinctrl/pinctrl-aw9523.c index 9bf53de20be8..04afb344e9e5 100644 --- a/drivers/pinctrl/pinctrl-aw9523.c +++ b/drivers/pinctrl/pinctrl-aw9523.c @@ -784,7 +784,7 @@ static int aw9523_init_gpiochip(struct aw9523 *awi, unsigned int npins) gc->set_config = gpiochip_generic_config; gc->parent = dev; gc->owner = THIS_MODULE; - gc->can_sleep = false; + gc->can_sleep = true; return 0; } diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c b/drivers/pinctrl/qcom/pinctrl-msm.c index 5c4687de1464..f713c80d7f3e 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm.c +++ b/drivers/pinctrl/qcom/pinctrl-msm.c @@ -1038,6 +1038,25 @@ static bool msm_gpio_needs_dual_edge_parent_workaround(struct irq_data *d, test_bit(d->hwirq, pctrl->skip_wake_irqs); } +static void msm_gpio_irq_init_valid_mask(struct gpio_chip *gc, + unsigned long *valid_mask, + unsigned int ngpios) +{ + struct msm_pinctrl *pctrl = gpiochip_get_data(gc); + const struct msm_pingroup *g; + int i; + + bitmap_fill(valid_mask, ngpios); + + for (i = 0; i < ngpios; i++) { + g = &pctrl->soc->groups[i]; + + if (g->intr_detection_width != 1 && + g->intr_detection_width != 2) + clear_bit(i, valid_mask); + } +} + static int msm_gpio_irq_set_type(struct irq_data *d, unsigned int type) { struct gpio_chip *gc = irq_data_get_irq_chip_data(d); @@ -1441,6 +1460,7 @@ static int msm_gpio_init(struct msm_pinctrl *pctrl) girq->default_type = IRQ_TYPE_NONE; girq->handler = handle_bad_irq; girq->parents[0] = pctrl->irq; + girq->init_valid_mask = msm_gpio_irq_init_valid_mask; ret = devm_gpiochip_add_data(pctrl->dev, &pctrl->chip, pctrl); if (ret) { diff --git a/drivers/platform/mellanox/mlxbf-pmc.c b/drivers/platform/mellanox/mlxbf-pmc.c index 900069eb186e..a1c529f1ff1a 100644 --- a/drivers/platform/mellanox/mlxbf-pmc.c +++ b/drivers/platform/mellanox/mlxbf-pmc.c @@ -715,7 +715,7 @@ static const struct mlxbf_pmc_events mlxbf_pmc_llt_events[] = { {101, "GDC_BANK0_HIT_DCL_PARTIAL"}, {102, "GDC_BANK0_EVICT_DCL"}, {103, "GDC_BANK0_G_RSE_PIPE_CACHE_DATA0"}, - {103, "GDC_BANK0_G_RSE_PIPE_CACHE_DATA1"}, + {104, "GDC_BANK0_G_RSE_PIPE_CACHE_DATA1"}, {105, "GDC_BANK0_ARB_STRB"}, {106, "GDC_BANK0_ARB_WAIT"}, {107, "GDC_BANK0_GGA_STRB"}, diff --git a/drivers/platform/mellanox/mlxbf-tmfifo.c b/drivers/platform/mellanox/mlxbf-tmfifo.c index aae99adb29eb..14aa87b39be5 100644 --- a/drivers/platform/mellanox/mlxbf-tmfifo.c +++ b/drivers/platform/mellanox/mlxbf-tmfifo.c @@ -281,7 +281,8 @@ static int mlxbf_tmfifo_alloc_vrings(struct mlxbf_tmfifo *fifo, vring->align = SMP_CACHE_BYTES; vring->index = i; vring->vdev_id = tm_vdev->vdev.id.device; - vring->drop_desc.len = VRING_DROP_DESC_MAX_LEN; + vring->drop_desc.len = cpu_to_virtio32(&tm_vdev->vdev, + VRING_DROP_DESC_MAX_LEN); dev = &tm_vdev->vdev.dev; size = vring_size(vring->num, vring->align); @@ -1287,7 +1288,7 @@ static void mlxbf_tmfifo_get_cfg_mac(u8 *mac) ether_addr_copy(mac, mlxbf_tmfifo_net_default_mac); } -/* Set TmFifo thresolds which is used to trigger interrupts. */ +/* Set TmFifo thresholds which is used to trigger interrupts. */ static void mlxbf_tmfifo_set_threshold(struct mlxbf_tmfifo *fifo) { u64 ctl; diff --git a/drivers/platform/mellanox/mlxreg-dpu.c b/drivers/platform/mellanox/mlxreg-dpu.c index 52260106a9f1..39f89c47144a 100644 --- a/drivers/platform/mellanox/mlxreg-dpu.c +++ b/drivers/platform/mellanox/mlxreg-dpu.c @@ -483,7 +483,7 @@ static int mlxreg_dpu_config_init(struct mlxreg_dpu *mlxreg_dpu, void *regmap, mlxreg_dpu->io_data, sizeof(*mlxreg_dpu->io_data)); if (IS_ERR(mlxreg_dpu->io_regs)) { - dev_err(dev, "Failed to create regio for client %s at bus %d at addr 0x%02x\n", + dev_err(dev, "Failed to create region for client %s at bus %d at addr 0x%02x\n", data->hpdev.brdinfo->type, data->hpdev.nr, data->hpdev.brdinfo->addr); return PTR_ERR(mlxreg_dpu->io_regs); diff --git a/drivers/platform/mellanox/mlxreg-lc.c b/drivers/platform/mellanox/mlxreg-lc.c index aee395bb48ae..d1518598dfed 100644 --- a/drivers/platform/mellanox/mlxreg-lc.c +++ b/drivers/platform/mellanox/mlxreg-lc.c @@ -57,9 +57,9 @@ enum mlxreg_lc_state { * @dev: platform device; * @lock: line card lock; * @par_regmap: parent device regmap handle; - * @data: pltaform core data; + * @data: platform core data; * @io_data: register access platform data; - * @led_data: LED platform data ; + * @led_data: LED platform data; * @mux_data: MUX platform data; * @led: LED device; * @io_regs: register access device; @@ -171,7 +171,7 @@ static int mlxreg_lc_chan[] = { 0x4e, 0x4f }; -/* Defaul mux configuration. */ +/* Default mux configuration. */ static struct mlxcpld_mux_plat_data mlxreg_lc_mux_data[] = { { .chan_ids = mlxreg_lc_chan, @@ -181,7 +181,7 @@ static struct mlxcpld_mux_plat_data mlxreg_lc_mux_data[] = { }, }; -/* Defaul mux board info. */ +/* Default mux board info. */ static struct i2c_board_info mlxreg_lc_mux_brdinfo = { I2C_BOARD_INFO("i2c-mux-mlxcpld", 0x32), }; @@ -688,7 +688,7 @@ static int mlxreg_lc_completion_notify(void *handle, struct i2c_adapter *parent, if (regval & mlxreg_lc->data->mask) { mlxreg_lc->state |= MLXREG_LC_SYNCED; mlxreg_lc_state_update_locked(mlxreg_lc, MLXREG_LC_SYNCED, 1); - if (mlxreg_lc->state & ~MLXREG_LC_POWERED) { + if (!(mlxreg_lc->state & MLXREG_LC_POWERED)) { err = mlxreg_lc_power_on_off(mlxreg_lc, 1); if (err) goto mlxreg_lc_regmap_power_on_off_fail; @@ -758,7 +758,7 @@ mlxreg_lc_config_init(struct mlxreg_lc *mlxreg_lc, void *regmap, platform_device_register_resndata(dev, "mlxreg-io", data->hpdev.nr, NULL, 0, mlxreg_lc->io_data, sizeof(*mlxreg_lc->io_data)); if (IS_ERR(mlxreg_lc->io_regs)) { - dev_err(dev, "Failed to create regio for client %s at bus %d at addr 0x%02x\n", + dev_err(dev, "Failed to create region for client %s at bus %d at addr 0x%02x\n", data->hpdev.brdinfo->type, data->hpdev.nr, data->hpdev.brdinfo->addr); err = PTR_ERR(mlxreg_lc->io_regs); diff --git a/drivers/platform/mellanox/nvsw-sn2201.c b/drivers/platform/mellanox/nvsw-sn2201.c index db31c8bf2255..51504113c17e 100644 --- a/drivers/platform/mellanox/nvsw-sn2201.c +++ b/drivers/platform/mellanox/nvsw-sn2201.c @@ -1181,7 +1181,7 @@ static int nvsw_sn2201_i2c_completion_notify(void *handle, int id) if (!nvsw_sn2201->main_mux_devs->adapter) { err = -ENODEV; dev_err(nvsw_sn2201->dev, "Failed to get adapter for bus %d\n", - nvsw_sn2201->cpld_devs->nr); + nvsw_sn2201->main_mux_devs->nr); goto i2c_get_adapter_main_fail; } diff --git a/drivers/platform/x86/amd/amd_isp4.c b/drivers/platform/x86/amd/amd_isp4.c index 0cc01441bcbb..0d494899502c 100644 --- a/drivers/platform/x86/amd/amd_isp4.c +++ b/drivers/platform/x86/amd/amd_isp4.c @@ -11,6 +11,7 @@ #include <linux/mutex.h> #include <linux/platform_device.h> #include <linux/property.h> +#include <linux/soc/amd/isp4_misc.h> #include <linux/string.h> #include <linux/types.h> #include <linux/units.h> @@ -20,6 +21,9 @@ #define AMDISP_OV05C10_REMOTE_EP_NAME "ov05c10_isp_4_1_1" #define AMD_ISP_PLAT_DRV_NAME "amd-isp4" +static const struct software_node isp4_mipi1_endpoint_node; +static const struct software_node ov05c10_endpoint_node; + /* * AMD ISP platform info definition to initialize sensor * specific platform configuration to prepare the amdisp @@ -42,55 +46,116 @@ struct amdisp_platform { struct mutex lock; /* protects i2c client creation */ }; -/* Top-level OV05C10 camera node property table */ +/* Root AMD CAMERA SWNODE */ + +/* Root amd camera node definition */ +static const struct software_node amd_camera_node = { + .name = "amd_camera", +}; + +/* ISP4 SWNODE */ + +/* ISP4 OV05C10 camera node definition */ +static const struct software_node isp4_node = { + .name = "isp4", + .parent = &amd_camera_node, +}; + +/* + * ISP4 Ports node definition. No properties defined for + * ports node. + */ +static const struct software_node isp4_ports = { + .name = "ports", + .parent = &isp4_node, +}; + +/* + * ISP4 Port node definition. No properties defined for + * port node. + */ +static const struct software_node isp4_port_node = { + .name = "port@0", + .parent = &isp4_ports, +}; + +/* + * ISP4 MIPI1 remote endpoint points to OV05C10 endpoint + * node. + */ +static const struct software_node_ref_args isp4_refs[] = { + SOFTWARE_NODE_REFERENCE(&ov05c10_endpoint_node), +}; + +/* ISP4 MIPI1 endpoint node properties table */ +static const struct property_entry isp4_mipi1_endpoint_props[] = { + PROPERTY_ENTRY_REF_ARRAY("remote-endpoint", isp4_refs), + { } +}; + +/* ISP4 MIPI1 endpoint node definition */ +static const struct software_node isp4_mipi1_endpoint_node = { + .name = "endpoint", + .parent = &isp4_port_node, + .properties = isp4_mipi1_endpoint_props, +}; + +/* I2C1 SWNODE */ + +/* I2C1 camera node property table */ +static const struct property_entry i2c1_camera_props[] = { + PROPERTY_ENTRY_U32("clock-frequency", 1 * HZ_PER_MHZ), + { } +}; + +/* I2C1 camera node definition */ +static const struct software_node i2c1_node = { + .name = "i2c1", + .parent = &amd_camera_node, + .properties = i2c1_camera_props, +}; + +/* I2C1 camera node property table */ static const struct property_entry ov05c10_camera_props[] = { PROPERTY_ENTRY_U32("clock-frequency", 24 * HZ_PER_MHZ), { } }; -/* Root AMD ISP OV05C10 camera node definition */ -static const struct software_node camera_node = { +/* OV05C10 camera node definition */ +static const struct software_node ov05c10_camera_node = { .name = AMDISP_OV05C10_HID, + .parent = &i2c1_node, .properties = ov05c10_camera_props, }; /* - * AMD ISP OV05C10 Ports node definition. No properties defined for + * OV05C10 Ports node definition. No properties defined for * ports node for OV05C10. */ -static const struct software_node ports = { +static const struct software_node ov05c10_ports = { .name = "ports", - .parent = &camera_node, -}; - -/* - * AMD ISP OV05C10 Port node definition. No properties defined for - * port node for OV05C10. - */ -static const struct software_node port_node = { - .name = "port@", - .parent = &ports, + .parent = &ov05c10_camera_node, }; /* - * Remote endpoint AMD ISP node definition. No properties defined for - * remote endpoint node for OV05C10. + * OV05C10 Port node definition. */ -static const struct software_node remote_ep_isp_node = { - .name = AMDISP_OV05C10_REMOTE_EP_NAME, +static const struct software_node ov05c10_port_node = { + .name = "port@0", + .parent = &ov05c10_ports, }; /* - * Remote endpoint reference for isp node included in the - * OV05C10 endpoint. + * OV05C10 remote endpoint points to ISP4 MIPI1 endpoint + * node. */ static const struct software_node_ref_args ov05c10_refs[] = { - SOFTWARE_NODE_REFERENCE(&remote_ep_isp_node), + SOFTWARE_NODE_REFERENCE(&isp4_mipi1_endpoint_node), }; /* OV05C10 supports one single link frequency */ static const u64 ov05c10_link_freqs[] = { - 925 * HZ_PER_MHZ, + 900 * HZ_PER_MHZ, }; /* OV05C10 supports only 2-lane configuration */ @@ -110,27 +175,64 @@ static const struct property_entry ov05c10_endpoint_props[] = { { } }; -/* AMD ISP endpoint node definition */ -static const struct software_node endpoint_node = { +/* OV05C10 endpoint node definition */ +static const struct software_node ov05c10_endpoint_node = { .name = "endpoint", - .parent = &port_node, + .parent = &ov05c10_port_node, .properties = ov05c10_endpoint_props, }; /* - * AMD ISP swnode graph uses 5 nodes and also its relationship is - * fixed to align with the structure that v4l2 expects for successful - * endpoint fwnode parsing. + * AMD Camera swnode graph uses 10 nodes and also its relationship is + * fixed to align with the structure that v4l2 and i2c frameworks expects + * for successful parsing of fwnodes and its properties with standard names. * * It is only the node property_entries that will vary for each platform * supporting different sensor modules. + * + * AMD ISP4 SWNODE GRAPH Structure + * + * amd_camera { + * isp4 { + * ports { + * port@0 { + * isp4_mipi1_ep: endpoint { + * remote-endpoint = &OMNI5C10_ep; + * }; + * }; + * }; + * }; + * + * i2c1 { + * clock-frequency = 1 MHz; + * OMNI5C10 { + * clock-frequency = 24MHz; + * ports { + * port@0 { + * OMNI5C10_ep: endpoint { + * bus-type = 4; + * data-lanes = <1 2>; + * link-frequencies = 900MHz; + * remote-endpoint = &isp4_mipi1; + * }; + * }; + * }; + * }; + * }; + * }; + * */ -static const struct software_node *ov05c10_nodes[] = { - &camera_node, - &ports, - &port_node, - &endpoint_node, - &remote_ep_isp_node, +static const struct software_node *amd_isp4_nodes[] = { + &amd_camera_node, + &isp4_node, + &isp4_ports, + &isp4_port_node, + &isp4_mipi1_endpoint_node, + &i2c1_node, + &ov05c10_camera_node, + &ov05c10_ports, + &ov05c10_port_node, + &ov05c10_endpoint_node, NULL }; @@ -140,7 +242,7 @@ static const struct amdisp_platform_info ov05c10_platform_config = { .dev_name = "ov05c10", I2C_BOARD_INFO("ov05c10", AMDISP_OV05C10_I2C_ADDR), }, - .swnodes = ov05c10_nodes, + .swnodes = amd_isp4_nodes, }; static const struct acpi_device_id amdisp_sensor_ids[] = { @@ -151,7 +253,7 @@ MODULE_DEVICE_TABLE(acpi, amdisp_sensor_ids); static inline bool is_isp_i2c_adapter(struct i2c_adapter *adap) { - return !strcmp(adap->owner->name, "i2c_designware_amdisp"); + return !strcmp(adap->name, AMDISP_I2C_ADAP_NAME); } static void instantiate_isp_i2c_client(struct amdisp_platform *isp4_platform, @@ -232,7 +334,8 @@ static struct amdisp_platform *prepare_amdisp_platform(struct device *dev, if (ret) return ERR_PTR(ret); - isp4_platform->board_info.swnode = src->swnodes[0]; + /* initialize ov05c10_camera_node */ + isp4_platform->board_info.swnode = src->swnodes[6]; return isp4_platform; } @@ -257,6 +360,7 @@ static int amd_isp_probe(struct platform_device *pdev) { const struct amdisp_platform_info *pinfo; struct amdisp_platform *isp4_platform; + struct acpi_device *adev; int ret; pinfo = device_get_match_data(&pdev->dev); @@ -274,6 +378,10 @@ static int amd_isp_probe(struct platform_device *pdev) if (ret) goto error_unregister_sw_node; + adev = ACPI_COMPANION(&pdev->dev); + /* initialize root amd_camera_node */ + adev->driver_data = (void *)pinfo->swnodes[0]; + /* check if adapter is already registered and create i2c client instance */ i2c_for_each_dev(isp4_platform, try_to_instantiate_i2c_client); diff --git a/drivers/platform/x86/amd/pmc/pmc-quirks.c b/drivers/platform/x86/amd/pmc/pmc-quirks.c index f292111bd065..131f10b68308 100644 --- a/drivers/platform/x86/amd/pmc/pmc-quirks.c +++ b/drivers/platform/x86/amd/pmc/pmc-quirks.c @@ -11,7 +11,7 @@ #include <linux/dmi.h> #include <linux/io.h> #include <linux/ioport.h> -#include <asm/amd/fch.h> +#include <linux/platform_data/x86/amd-fch.h> #include "pmc.h" diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c index 3f8b2a324efd..f84c3d03c1de 100644 --- a/drivers/platform/x86/asus-nb-wmi.c +++ b/drivers/platform/x86/asus-nb-wmi.c @@ -530,6 +530,15 @@ static const struct dmi_system_id asus_quirks[] = { }, .driver_data = &quirk_asus_zenbook_duo_kbd, }, + { + .callback = dmi_matched, + .ident = "ASUS Zenbook Duo UX8406CA", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "UX8406CA"), + }, + .driver_data = &quirk_asus_zenbook_duo_kbd, + }, {}, }; diff --git a/drivers/platform/x86/dell/dell-lis3lv02d.c b/drivers/platform/x86/dell/dell-lis3lv02d.c index efe26d667973..0791118dd6b7 100644 --- a/drivers/platform/x86/dell/dell-lis3lv02d.c +++ b/drivers/platform/x86/dell/dell-lis3lv02d.c @@ -45,6 +45,7 @@ static const struct dmi_system_id lis3lv02d_devices[] __initconst = { * Additional individual entries were added after verification. */ DELL_LIS3LV02D_DMI_ENTRY("Latitude 5480", 0x29), + DELL_LIS3LV02D_DMI_ENTRY("Latitude 5500", 0x29), DELL_LIS3LV02D_DMI_ENTRY("Latitude E6330", 0x29), DELL_LIS3LV02D_DMI_ENTRY("Latitude E6430", 0x29), DELL_LIS3LV02D_DMI_ENTRY("Precision 3540", 0x29), diff --git a/drivers/platform/x86/dell/dell-wmi-sysman/dell-wmi-sysman.h b/drivers/platform/x86/dell/dell-wmi-sysman/dell-wmi-sysman.h index 3ad33a094588..817ee7ba07ca 100644 --- a/drivers/platform/x86/dell/dell-wmi-sysman/dell-wmi-sysman.h +++ b/drivers/platform/x86/dell/dell-wmi-sysman/dell-wmi-sysman.h @@ -89,6 +89,11 @@ extern struct wmi_sysman_priv wmi_priv; enum { ENUM, INT, STR, PO }; +#define ENUM_MIN_ELEMENTS 8 +#define INT_MIN_ELEMENTS 9 +#define STR_MIN_ELEMENTS 8 +#define PO_MIN_ELEMENTS 4 + enum { ATTR_NAME, DISPL_NAME_LANG_CODE, diff --git a/drivers/platform/x86/dell/dell-wmi-sysman/enum-attributes.c b/drivers/platform/x86/dell/dell-wmi-sysman/enum-attributes.c index 8cc212c85266..fc2f58b4cbc6 100644 --- a/drivers/platform/x86/dell/dell-wmi-sysman/enum-attributes.c +++ b/drivers/platform/x86/dell/dell-wmi-sysman/enum-attributes.c @@ -23,9 +23,10 @@ static ssize_t current_value_show(struct kobject *kobj, struct kobj_attribute *a obj = get_wmiobj_pointer(instance_id, DELL_WMI_BIOS_ENUMERATION_ATTRIBUTE_GUID); if (!obj) return -EIO; - if (obj->package.elements[CURRENT_VAL].type != ACPI_TYPE_STRING) { + if (obj->type != ACPI_TYPE_PACKAGE || obj->package.count < ENUM_MIN_ELEMENTS || + obj->package.elements[CURRENT_VAL].type != ACPI_TYPE_STRING) { kfree(obj); - return -EINVAL; + return -EIO; } ret = snprintf(buf, PAGE_SIZE, "%s\n", obj->package.elements[CURRENT_VAL].string.pointer); kfree(obj); diff --git a/drivers/platform/x86/dell/dell-wmi-sysman/int-attributes.c b/drivers/platform/x86/dell/dell-wmi-sysman/int-attributes.c index 951e75b538fa..735248064239 100644 --- a/drivers/platform/x86/dell/dell-wmi-sysman/int-attributes.c +++ b/drivers/platform/x86/dell/dell-wmi-sysman/int-attributes.c @@ -25,9 +25,10 @@ static ssize_t current_value_show(struct kobject *kobj, struct kobj_attribute *a obj = get_wmiobj_pointer(instance_id, DELL_WMI_BIOS_INTEGER_ATTRIBUTE_GUID); if (!obj) return -EIO; - if (obj->package.elements[CURRENT_VAL].type != ACPI_TYPE_INTEGER) { + if (obj->type != ACPI_TYPE_PACKAGE || obj->package.count < INT_MIN_ELEMENTS || + obj->package.elements[CURRENT_VAL].type != ACPI_TYPE_INTEGER) { kfree(obj); - return -EINVAL; + return -EIO; } ret = snprintf(buf, PAGE_SIZE, "%lld\n", obj->package.elements[CURRENT_VAL].integer.value); kfree(obj); diff --git a/drivers/platform/x86/dell/dell-wmi-sysman/passobj-attributes.c b/drivers/platform/x86/dell/dell-wmi-sysman/passobj-attributes.c index d8f1bf5e58a0..3167e06d416e 100644 --- a/drivers/platform/x86/dell/dell-wmi-sysman/passobj-attributes.c +++ b/drivers/platform/x86/dell/dell-wmi-sysman/passobj-attributes.c @@ -26,9 +26,10 @@ static ssize_t is_enabled_show(struct kobject *kobj, struct kobj_attribute *attr obj = get_wmiobj_pointer(instance_id, DELL_WMI_BIOS_PASSOBJ_ATTRIBUTE_GUID); if (!obj) return -EIO; - if (obj->package.elements[IS_PASS_SET].type != ACPI_TYPE_INTEGER) { + if (obj->type != ACPI_TYPE_PACKAGE || obj->package.count < PO_MIN_ELEMENTS || + obj->package.elements[IS_PASS_SET].type != ACPI_TYPE_INTEGER) { kfree(obj); - return -EINVAL; + return -EIO; } ret = snprintf(buf, PAGE_SIZE, "%lld\n", obj->package.elements[IS_PASS_SET].integer.value); kfree(obj); diff --git a/drivers/platform/x86/dell/dell-wmi-sysman/string-attributes.c b/drivers/platform/x86/dell/dell-wmi-sysman/string-attributes.c index c392f0ecf8b5..0d2c74f8d1aa 100644 --- a/drivers/platform/x86/dell/dell-wmi-sysman/string-attributes.c +++ b/drivers/platform/x86/dell/dell-wmi-sysman/string-attributes.c @@ -25,9 +25,10 @@ static ssize_t current_value_show(struct kobject *kobj, struct kobj_attribute *a obj = get_wmiobj_pointer(instance_id, DELL_WMI_BIOS_STRING_ATTRIBUTE_GUID); if (!obj) return -EIO; - if (obj->package.elements[CURRENT_VAL].type != ACPI_TYPE_STRING) { + if (obj->type != ACPI_TYPE_PACKAGE || obj->package.count < STR_MIN_ELEMENTS || + obj->package.elements[CURRENT_VAL].type != ACPI_TYPE_STRING) { kfree(obj); - return -EINVAL; + return -EIO; } ret = snprintf(buf, PAGE_SIZE, "%s\n", obj->package.elements[CURRENT_VAL].string.pointer); kfree(obj); diff --git a/drivers/platform/x86/dell/dell-wmi-sysman/sysman.c b/drivers/platform/x86/dell/dell-wmi-sysman/sysman.c index d00389b860e4..f5402b714657 100644 --- a/drivers/platform/x86/dell/dell-wmi-sysman/sysman.c +++ b/drivers/platform/x86/dell/dell-wmi-sysman/sysman.c @@ -407,10 +407,10 @@ static int init_bios_attributes(int attr_type, const char *guid) return retval; switch (attr_type) { - case ENUM: min_elements = 8; break; - case INT: min_elements = 9; break; - case STR: min_elements = 8; break; - case PO: min_elements = 4; break; + case ENUM: min_elements = ENUM_MIN_ELEMENTS; break; + case INT: min_elements = INT_MIN_ELEMENTS; break; + case STR: min_elements = STR_MIN_ELEMENTS; break; + case PO: min_elements = PO_MIN_ELEMENTS; break; default: pr_err("Error: Unknown attr_type: %d\n", attr_type); return -EINVAL; @@ -597,7 +597,7 @@ err_release_attributes_data: release_attributes_data(); err_destroy_classdev: - device_destroy(&firmware_attributes_class, MKDEV(0, 0)); + device_unregister(wmi_priv.class_dev); err_exit_bios_attr_pass_interface: exit_bios_attr_pass_interface(); @@ -611,7 +611,7 @@ err_exit_bios_attr_set_interface: static void __exit sysman_exit(void) { release_attributes_data(); - device_destroy(&firmware_attributes_class, MKDEV(0, 0)); + device_unregister(wmi_priv.class_dev); exit_bios_attr_set_interface(); exit_bios_attr_pass_interface(); } diff --git a/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c b/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c index 13237890fc92..5bfa7159f5bc 100644 --- a/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c +++ b/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c @@ -1034,7 +1034,7 @@ err_release_attributes_data: release_attributes_data(); err_destroy_classdev: - device_destroy(&firmware_attributes_class, MKDEV(0, 0)); + device_unregister(bioscfg_drv.class_dev); err_unregister_class: hp_exit_attr_set_interface(); @@ -1045,7 +1045,7 @@ err_unregister_class: static void __exit hp_exit(void) { release_attributes_data(); - device_destroy(&firmware_attributes_class, MKDEV(0, 0)); + device_unregister(bioscfg_drv.class_dev); hp_exit_attr_set_interface(); } diff --git a/drivers/platform/x86/intel/hid.c b/drivers/platform/x86/intel/hid.c index 0b5e43444ed6..f25a427cccda 100644 --- a/drivers/platform/x86/intel/hid.c +++ b/drivers/platform/x86/intel/hid.c @@ -54,6 +54,7 @@ static const struct acpi_device_id intel_hid_ids[] = { { "INTC107B" }, { "INTC10CB" }, { "INTC10CC" }, + { "INTC10F1" }, { } }; MODULE_DEVICE_TABLE(acpi, intel_hid_ids); diff --git a/drivers/platform/x86/portwell-ec.c b/drivers/platform/x86/portwell-ec.c index 8b788822237b..3e019c51913e 100644 --- a/drivers/platform/x86/portwell-ec.c +++ b/drivers/platform/x86/portwell-ec.c @@ -236,6 +236,7 @@ static int pwec_probe(struct platform_device *pdev) return ret; } + ec_wdt_dev.parent = &pdev->dev; ret = devm_watchdog_register_device(&pdev->dev, &ec_wdt_dev); if (ret < 0) { dev_err(&pdev->dev, "failed to register Portwell EC Watchdog\n"); diff --git a/drivers/platform/x86/think-lmi.c b/drivers/platform/x86/think-lmi.c index 00b1e7c79a3d..b73b84fdb15e 100644 --- a/drivers/platform/x86/think-lmi.c +++ b/drivers/platform/x86/think-lmi.c @@ -973,6 +973,7 @@ static const struct attribute_group auth_attr_group = { .is_visible = auth_attr_is_visible, .attrs = auth_attrs, }; +__ATTRIBUTE_GROUPS(auth_attr); /* ---- Attributes sysfs --------------------------------------------------------- */ static ssize_t display_name_show(struct kobject *kobj, struct kobj_attribute *attr, @@ -1188,6 +1189,7 @@ static const struct attribute_group tlmi_attr_group = { .is_visible = attr_is_visible, .attrs = tlmi_attrs, }; +__ATTRIBUTE_GROUPS(tlmi_attr); static void tlmi_attr_setting_release(struct kobject *kobj) { @@ -1207,11 +1209,13 @@ static void tlmi_pwd_setting_release(struct kobject *kobj) static const struct kobj_type tlmi_attr_setting_ktype = { .release = &tlmi_attr_setting_release, .sysfs_ops = &kobj_sysfs_ops, + .default_groups = tlmi_attr_groups, }; static const struct kobj_type tlmi_pwd_setting_ktype = { .release = &tlmi_pwd_setting_release, .sysfs_ops = &kobj_sysfs_ops, + .default_groups = auth_attr_groups, }; static ssize_t pending_reboot_show(struct kobject *kobj, struct kobj_attribute *attr, @@ -1380,21 +1384,18 @@ static struct kobj_attribute debug_cmd = __ATTR_WO(debug_cmd); /* ---- Initialisation --------------------------------------------------------- */ static void tlmi_release_attr(void) { - int i; + struct kobject *pos, *n; /* Attribute structures */ - for (i = 0; i < TLMI_SETTINGS_COUNT; i++) { - if (tlmi_priv.setting[i]) { - sysfs_remove_group(&tlmi_priv.setting[i]->kobj, &tlmi_attr_group); - kobject_put(&tlmi_priv.setting[i]->kobj); - } - } sysfs_remove_file(&tlmi_priv.attribute_kset->kobj, &pending_reboot.attr); sysfs_remove_file(&tlmi_priv.attribute_kset->kobj, &save_settings.attr); if (tlmi_priv.can_debug_cmd && debug_support) sysfs_remove_file(&tlmi_priv.attribute_kset->kobj, &debug_cmd.attr); + list_for_each_entry_safe(pos, n, &tlmi_priv.attribute_kset->list, entry) + kobject_put(pos); + kset_unregister(tlmi_priv.attribute_kset); /* Free up any saved signatures */ @@ -1402,19 +1403,8 @@ static void tlmi_release_attr(void) kfree(tlmi_priv.pwd_admin->save_signature); /* Authentication structures */ - sysfs_remove_group(&tlmi_priv.pwd_admin->kobj, &auth_attr_group); - kobject_put(&tlmi_priv.pwd_admin->kobj); - sysfs_remove_group(&tlmi_priv.pwd_power->kobj, &auth_attr_group); - kobject_put(&tlmi_priv.pwd_power->kobj); - - if (tlmi_priv.opcode_support) { - sysfs_remove_group(&tlmi_priv.pwd_system->kobj, &auth_attr_group); - kobject_put(&tlmi_priv.pwd_system->kobj); - sysfs_remove_group(&tlmi_priv.pwd_hdd->kobj, &auth_attr_group); - kobject_put(&tlmi_priv.pwd_hdd->kobj); - sysfs_remove_group(&tlmi_priv.pwd_nvme->kobj, &auth_attr_group); - kobject_put(&tlmi_priv.pwd_nvme->kobj); - } + list_for_each_entry_safe(pos, n, &tlmi_priv.authentication_kset->list, entry) + kobject_put(pos); kset_unregister(tlmi_priv.authentication_kset); } @@ -1455,6 +1445,14 @@ static int tlmi_sysfs_init(void) goto fail_device_created; } + tlmi_priv.authentication_kset = kset_create_and_add("authentication", NULL, + &tlmi_priv.class_dev->kobj); + if (!tlmi_priv.authentication_kset) { + kset_unregister(tlmi_priv.attribute_kset); + ret = -ENOMEM; + goto fail_device_created; + } + for (i = 0; i < TLMI_SETTINGS_COUNT; i++) { /* Check if index is a valid setting - skip if it isn't */ if (!tlmi_priv.setting[i]) @@ -1471,12 +1469,8 @@ static int tlmi_sysfs_init(void) /* Build attribute */ tlmi_priv.setting[i]->kobj.kset = tlmi_priv.attribute_kset; - ret = kobject_add(&tlmi_priv.setting[i]->kobj, NULL, - "%s", tlmi_priv.setting[i]->display_name); - if (ret) - goto fail_create_attr; - - ret = sysfs_create_group(&tlmi_priv.setting[i]->kobj, &tlmi_attr_group); + ret = kobject_init_and_add(&tlmi_priv.setting[i]->kobj, &tlmi_attr_setting_ktype, + NULL, "%s", tlmi_priv.setting[i]->display_name); if (ret) goto fail_create_attr; } @@ -1496,55 +1490,34 @@ static int tlmi_sysfs_init(void) } /* Create authentication entries */ - tlmi_priv.authentication_kset = kset_create_and_add("authentication", NULL, - &tlmi_priv.class_dev->kobj); - if (!tlmi_priv.authentication_kset) { - ret = -ENOMEM; - goto fail_create_attr; - } tlmi_priv.pwd_admin->kobj.kset = tlmi_priv.authentication_kset; - ret = kobject_add(&tlmi_priv.pwd_admin->kobj, NULL, "%s", "Admin"); - if (ret) - goto fail_create_attr; - - ret = sysfs_create_group(&tlmi_priv.pwd_admin->kobj, &auth_attr_group); + ret = kobject_init_and_add(&tlmi_priv.pwd_admin->kobj, &tlmi_pwd_setting_ktype, + NULL, "%s", "Admin"); if (ret) goto fail_create_attr; tlmi_priv.pwd_power->kobj.kset = tlmi_priv.authentication_kset; - ret = kobject_add(&tlmi_priv.pwd_power->kobj, NULL, "%s", "Power-on"); - if (ret) - goto fail_create_attr; - - ret = sysfs_create_group(&tlmi_priv.pwd_power->kobj, &auth_attr_group); + ret = kobject_init_and_add(&tlmi_priv.pwd_power->kobj, &tlmi_pwd_setting_ktype, + NULL, "%s", "Power-on"); if (ret) goto fail_create_attr; if (tlmi_priv.opcode_support) { tlmi_priv.pwd_system->kobj.kset = tlmi_priv.authentication_kset; - ret = kobject_add(&tlmi_priv.pwd_system->kobj, NULL, "%s", "System"); - if (ret) - goto fail_create_attr; - - ret = sysfs_create_group(&tlmi_priv.pwd_system->kobj, &auth_attr_group); + ret = kobject_init_and_add(&tlmi_priv.pwd_system->kobj, &tlmi_pwd_setting_ktype, + NULL, "%s", "System"); if (ret) goto fail_create_attr; tlmi_priv.pwd_hdd->kobj.kset = tlmi_priv.authentication_kset; - ret = kobject_add(&tlmi_priv.pwd_hdd->kobj, NULL, "%s", "HDD"); - if (ret) - goto fail_create_attr; - - ret = sysfs_create_group(&tlmi_priv.pwd_hdd->kobj, &auth_attr_group); + ret = kobject_init_and_add(&tlmi_priv.pwd_hdd->kobj, &tlmi_pwd_setting_ktype, + NULL, "%s", "HDD"); if (ret) goto fail_create_attr; tlmi_priv.pwd_nvme->kobj.kset = tlmi_priv.authentication_kset; - ret = kobject_add(&tlmi_priv.pwd_nvme->kobj, NULL, "%s", "NVMe"); - if (ret) - goto fail_create_attr; - - ret = sysfs_create_group(&tlmi_priv.pwd_nvme->kobj, &auth_attr_group); + ret = kobject_init_and_add(&tlmi_priv.pwd_nvme->kobj, &tlmi_pwd_setting_ktype, + NULL, "%s", "NVMe"); if (ret) goto fail_create_attr; } @@ -1554,7 +1527,7 @@ static int tlmi_sysfs_init(void) fail_create_attr: tlmi_release_attr(); fail_device_created: - device_destroy(&firmware_attributes_class, MKDEV(0, 0)); + device_unregister(tlmi_priv.class_dev); fail_class_created: return ret; } @@ -1577,8 +1550,6 @@ static struct tlmi_pwd_setting *tlmi_create_auth(const char *pwd_type, new_pwd->maxlen = tlmi_priv.pwdcfg.core.max_length; new_pwd->index = 0; - kobject_init(&new_pwd->kobj, &tlmi_pwd_setting_ktype); - return new_pwd; } @@ -1683,7 +1654,6 @@ static int tlmi_analyze(struct wmi_device *wdev) if (setting->possible_values) strreplace(setting->possible_values, ',', ';'); - kobject_init(&setting->kobj, &tlmi_attr_setting_ktype); tlmi_priv.setting[i] = setting; kfree(item); } @@ -1781,7 +1751,7 @@ fail_clear_attr: static void tlmi_remove(struct wmi_device *wdev) { tlmi_release_attr(); - device_destroy(&firmware_attributes_class, MKDEV(0, 0)); + device_unregister(tlmi_priv.class_dev); } static int tlmi_probe(struct wmi_device *wdev, const void *context) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index e7350c9fa3aa..b59b4d90b0c7 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -3295,6 +3295,7 @@ static const struct key_entry keymap_lenovo[] __initconst = { */ { KE_KEY, 0x131d, { KEY_VENDOR } }, /* System debug info, similar to old ThinkPad key */ { KE_KEY, 0x1320, { KEY_LINK_PHONE } }, + { KE_KEY, 0x1402, { KEY_LINK_PHONE } }, { KE_KEY, TP_HKEY_EV_TRACK_DOUBLETAP /* 0x8036 */, { KEY_PROG4 } }, { KE_END } }; diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c index e46453750d5f..03aecf8bb7f8 100644 --- a/drivers/platform/x86/wmi.c +++ b/drivers/platform/x86/wmi.c @@ -177,16 +177,22 @@ static int wmi_device_enable(struct wmi_device *wdev, bool enable) acpi_handle handle; acpi_status status; - if (!(wblock->gblock.flags & ACPI_WMI_EXPENSIVE)) - return 0; - if (wblock->dev.dev.type == &wmi_type_method) return 0; - if (wblock->dev.dev.type == &wmi_type_event) + if (wblock->dev.dev.type == &wmi_type_event) { + /* + * Windows always enables/disables WMI events, even when they are + * not marked as being expensive. We follow this behavior for + * compatibility reasons. + */ snprintf(method, sizeof(method), "WE%02X", wblock->gblock.notify_id); - else + } else { + if (!(wblock->gblock.flags & ACPI_WMI_EXPENSIVE)) + return 0; + get_acpi_method_name(wblock, 'C', method); + } /* * Not all WMI devices marked as expensive actually implement the diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c index e3be40adc0d7..faa0b6bc5b53 100644 --- a/drivers/powercap/intel_rapl_common.c +++ b/drivers/powercap/intel_rapl_common.c @@ -341,12 +341,28 @@ static int set_domain_enable(struct powercap_zone *power_zone, bool mode) { struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone); struct rapl_defaults *defaults = get_defaults(rd->rp); + u64 val; int ret; cpus_read_lock(); ret = rapl_write_pl_data(rd, POWER_LIMIT1, PL_ENABLE, mode); - if (!ret && defaults->set_floor_freq) + if (ret) + goto end; + + ret = rapl_read_pl_data(rd, POWER_LIMIT1, PL_ENABLE, false, &val); + if (ret) + goto end; + + if (mode != val) { + pr_debug("%s cannot be %s\n", power_zone->name, + str_enabled_disabled(mode)); + goto end; + } + + if (defaults->set_floor_freq) defaults->set_floor_freq(rd, mode); + +end: cpus_read_unlock(); return ret; diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c index 4d842c692194..edf776b8ad53 100644 --- a/drivers/pwm/core.c +++ b/drivers/pwm/core.c @@ -596,7 +596,7 @@ static bool pwm_state_valid(const struct pwm_state *state) * and supposed to be ignored. So also ignore any strange values and * consider the state ok. */ - if (state->enabled) + if (!state->enabled) return true; if (!state->period) diff --git a/drivers/pwm/pwm-mediatek.c b/drivers/pwm/pwm-mediatek.c index 7eaab5831499..33d3554b9197 100644 --- a/drivers/pwm/pwm-mediatek.c +++ b/drivers/pwm/pwm-mediatek.c @@ -130,8 +130,10 @@ static int pwm_mediatek_config(struct pwm_chip *chip, struct pwm_device *pwm, return ret; clk_rate = clk_get_rate(pc->clk_pwms[pwm->hwpwm]); - if (!clk_rate) - return -EINVAL; + if (!clk_rate) { + ret = -EINVAL; + goto out; + } /* Make sure we use the bus clock and not the 26MHz clock */ if (pc->soc->has_ck_26m_sel) @@ -150,9 +152,9 @@ static int pwm_mediatek_config(struct pwm_chip *chip, struct pwm_device *pwm, } if (clkdiv > PWM_CLK_DIV_MAX) { - pwm_mediatek_clk_disable(chip, pwm); dev_err(pwmchip_parent(chip), "period of %d ns not supported\n", period_ns); - return -EINVAL; + ret = -EINVAL; + goto out; } if (pc->soc->pwm45_fixup && pwm->hwpwm > 2) { @@ -169,9 +171,10 @@ static int pwm_mediatek_config(struct pwm_chip *chip, struct pwm_device *pwm, pwm_mediatek_writel(pc, pwm->hwpwm, reg_width, cnt_period); pwm_mediatek_writel(pc, pwm->hwpwm, reg_thres, cnt_duty); +out: pwm_mediatek_clk_disable(chip, pwm); - return 0; + return ret; } static int pwm_mediatek_enable(struct pwm_chip *chip, struct pwm_device *pwm) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 7a248dc8d2e2..cbd6d53ebfb5 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -5639,6 +5639,7 @@ static void regulator_remove_coupling(struct regulator_dev *rdev) ERR_PTR(err)); } + rdev->coupling_desc.n_coupled = 0; kfree(rdev->coupling_desc.coupled_rdevs); rdev->coupling_desc.coupled_rdevs = NULL; } diff --git a/drivers/regulator/gpio-regulator.c b/drivers/regulator/gpio-regulator.c index 75bd53445ba7..6351ceefdb3e 100644 --- a/drivers/regulator/gpio-regulator.c +++ b/drivers/regulator/gpio-regulator.c @@ -260,8 +260,10 @@ static int gpio_regulator_probe(struct platform_device *pdev) return -ENOMEM; } - drvdata->gpiods = devm_kzalloc(dev, sizeof(struct gpio_desc *), - GFP_KERNEL); + drvdata->gpiods = devm_kcalloc(dev, config->ngpios, + sizeof(struct gpio_desc *), GFP_KERNEL); + if (!drvdata->gpiods) + return -ENOMEM; if (config->input_supply) { drvdata->desc.supply_name = devm_kstrdup(&pdev->dev, @@ -274,8 +276,6 @@ static int gpio_regulator_probe(struct platform_device *pdev) } } - if (!drvdata->gpiods) - return -ENOMEM; for (i = 0; i < config->ngpios; i++) { drvdata->gpiods[i] = devm_gpiod_get_index(dev, NULL, diff --git a/drivers/regulator/mp886x.c b/drivers/regulator/mp886x.c index 48dcee5287f3..9ad16b04c913 100644 --- a/drivers/regulator/mp886x.c +++ b/drivers/regulator/mp886x.c @@ -348,7 +348,8 @@ static const struct of_device_id mp886x_dt_ids[] = { MODULE_DEVICE_TABLE(of, mp886x_dt_ids); static const struct i2c_device_id mp886x_id[] = { - { "mp886x", (kernel_ulong_t)&mp8869_ci }, + { "mp8867", (kernel_ulong_t)&mp8867_ci }, + { "mp8869", (kernel_ulong_t)&mp8869_ci }, { }, }; MODULE_DEVICE_TABLE(i2c, mp886x_id); diff --git a/drivers/regulator/sy8824x.c b/drivers/regulator/sy8824x.c index c05b67e26ac8..5bec84db25f1 100644 --- a/drivers/regulator/sy8824x.c +++ b/drivers/regulator/sy8824x.c @@ -213,7 +213,10 @@ static const struct of_device_id sy8824_dt_ids[] = { MODULE_DEVICE_TABLE(of, sy8824_dt_ids); static const struct i2c_device_id sy8824_id[] = { - { "sy8824", (kernel_ulong_t)&sy8824c_cfg }, + { "sy8824c", (kernel_ulong_t)&sy8824c_cfg }, + { "sy8824e", (kernel_ulong_t)&sy8824e_cfg }, + { "sy20276", (kernel_ulong_t)&sy20276_cfg }, + { "sy20278", (kernel_ulong_t)&sy20278_cfg }, { } }; MODULE_DEVICE_TABLE(i2c, sy8824_id); diff --git a/drivers/regulator/tps65219-regulator.c b/drivers/regulator/tps65219-regulator.c index b16b300d7f45..5e67fdc88f49 100644 --- a/drivers/regulator/tps65219-regulator.c +++ b/drivers/regulator/tps65219-regulator.c @@ -436,46 +436,46 @@ static int tps65219_regulator_probe(struct platform_device *pdev) pmic->rdesc[i].name); } - irq_data = devm_kmalloc(tps->dev, pmic->common_irq_size, GFP_KERNEL); - if (!irq_data) - return -ENOMEM; - for (i = 0; i < pmic->common_irq_size; ++i) { irq_type = &pmic->common_irq_types[i]; irq = platform_get_irq_byname(pdev, irq_type->irq_name); if (irq < 0) return -EINVAL; - irq_data[i].dev = tps->dev; - irq_data[i].type = irq_type; + irq_data = devm_kmalloc(tps->dev, sizeof(*irq_data), GFP_KERNEL); + if (!irq_data) + return -ENOMEM; + + irq_data->dev = tps->dev; + irq_data->type = irq_type; error = devm_request_threaded_irq(tps->dev, irq, NULL, tps65219_regulator_irq_handler, IRQF_ONESHOT, irq_type->irq_name, - &irq_data[i]); + irq_data); if (error) return dev_err_probe(tps->dev, PTR_ERR(rdev), "Failed to request %s IRQ %d: %d\n", irq_type->irq_name, irq, error); } - irq_data = devm_kmalloc(tps->dev, pmic->dev_irq_size, GFP_KERNEL); - if (!irq_data) - return -ENOMEM; - for (i = 0; i < pmic->dev_irq_size; ++i) { irq_type = &pmic->irq_types[i]; irq = platform_get_irq_byname(pdev, irq_type->irq_name); if (irq < 0) return -EINVAL; - irq_data[i].dev = tps->dev; - irq_data[i].type = irq_type; + irq_data = devm_kmalloc(tps->dev, sizeof(*irq_data), GFP_KERNEL); + if (!irq_data) + return -ENOMEM; + + irq_data->dev = tps->dev; + irq_data->type = irq_type; error = devm_request_threaded_irq(tps->dev, irq, NULL, tps65219_regulator_irq_handler, IRQF_ONESHOT, irq_type->irq_name, - &irq_data[i]); + irq_data); if (error) return dev_err_probe(tps->dev, PTR_ERR(rdev), "Failed to request %s IRQ %d: %d\n", diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index 8172869bd3d7..0743c6acd6e2 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -692,8 +692,12 @@ static irqreturn_t cmos_interrupt(int irq, void *p) { u8 irqstat; u8 rtc_control; + unsigned long flags; - spin_lock(&rtc_lock); + /* We cannot use spin_lock() here, as cmos_interrupt() is also called + * in a non-irq context. + */ + spin_lock_irqsave(&rtc_lock, flags); /* When the HPET interrupt handler calls us, the interrupt * status is passed as arg1 instead of the irq number. But @@ -727,7 +731,7 @@ static irqreturn_t cmos_interrupt(int irq, void *p) hpet_mask_rtc_irq_bit(RTC_AIE); CMOS_READ(RTC_INTR_FLAGS); } - spin_unlock(&rtc_lock); + spin_unlock_irqrestore(&rtc_lock, flags); if (is_intr(irqstat)) { rtc_update_irq(p, 1, irqstat); @@ -1295,9 +1299,7 @@ static void cmos_check_wkalrm(struct device *dev) * ACK the rtc irq here */ if (t_now >= cmos->alarm_expires && cmos_use_acpi_alarm()) { - local_irq_disable(); cmos_interrupt(0, (void *)cmos->rtc); - local_irq_enable(); return; } diff --git a/drivers/rtc/rtc-pcf2127.c b/drivers/rtc/rtc-pcf2127.c index 31c7dca8f469..2e1ac0c42e93 100644 --- a/drivers/rtc/rtc-pcf2127.c +++ b/drivers/rtc/rtc-pcf2127.c @@ -1538,7 +1538,12 @@ static int pcf2127_spi_probe(struct spi_device *spi) variant = &pcf21xx_cfg[type]; } - config.max_register = variant->max_register, + if (variant->type == PCF2131) { + config.read_flag_mask = 0x0; + config.write_flag_mask = 0x0; + } + + config.max_register = variant->max_register; regmap = devm_regmap_init_spi(spi, &config); if (IS_ERR(regmap)) { diff --git a/drivers/rtc/rtc-s5m.c b/drivers/rtc/rtc-s5m.c index db5c9b641277..a7220b4d0e8d 100644 --- a/drivers/rtc/rtc-s5m.c +++ b/drivers/rtc/rtc-s5m.c @@ -10,6 +10,7 @@ #include <linux/module.h> #include <linux/i2c.h> #include <linux/bcd.h> +#include <linux/reboot.h> #include <linux/regmap.h> #include <linux/rtc.h> #include <linux/platform_device.h> @@ -53,6 +54,7 @@ enum { * Device | Write time | Read time | Write alarm * ================================================= * S5M8767 | UDR + TIME | | UDR + * S2MPG10 | WUDR | RUDR | AUDR * S2MPS11/14 | WUDR | RUDR | WUDR + RUDR * S2MPS13 | WUDR | RUDR | WUDR + AUDR * S2MPS15 | WUDR | RUDR | AUDR @@ -99,6 +101,20 @@ static const struct s5m_rtc_reg_config s5m_rtc_regs = { .write_alarm_udr_mask = S5M_RTC_UDR_MASK, }; +/* Register map for S2MPG10 */ +static const struct s5m_rtc_reg_config s2mpg10_rtc_regs = { + .regs_count = 7, + .time = S2MPG10_RTC_SEC, + .ctrl = S2MPG10_RTC_CTRL, + .alarm0 = S2MPG10_RTC_A0SEC, + .alarm1 = S2MPG10_RTC_A1SEC, + .udr_update = S2MPG10_RTC_UPDATE, + .autoclear_udr_mask = S2MPS15_RTC_WUDR_MASK | S2MPS15_RTC_AUDR_MASK, + .read_time_udr_mask = S2MPS_RTC_RUDR_MASK, + .write_time_udr_mask = S2MPS15_RTC_WUDR_MASK, + .write_alarm_udr_mask = S2MPS15_RTC_AUDR_MASK, +}; + /* Register map for S2MPS13 */ static const struct s5m_rtc_reg_config s2mps13_rtc_regs = { .regs_count = 7, @@ -227,8 +243,8 @@ static int s5m8767_wait_for_udr_update(struct s5m_rtc_info *info) return ret; } -static int s5m_check_peding_alarm_interrupt(struct s5m_rtc_info *info, - struct rtc_wkalrm *alarm) +static int s5m_check_pending_alarm_interrupt(struct s5m_rtc_info *info, + struct rtc_wkalrm *alarm) { int ret; unsigned int val; @@ -238,6 +254,7 @@ static int s5m_check_peding_alarm_interrupt(struct s5m_rtc_info *info, ret = regmap_read(info->regmap, S5M_RTC_STATUS, &val); val &= S5M_ALARM0_STATUS; break; + case S2MPG10: case S2MPS15X: case S2MPS14X: case S2MPS13X: @@ -262,17 +279,9 @@ static int s5m_check_peding_alarm_interrupt(struct s5m_rtc_info *info, static int s5m8767_rtc_set_time_reg(struct s5m_rtc_info *info) { int ret; - unsigned int data; - ret = regmap_read(info->regmap, info->regs->udr_update, &data); - if (ret < 0) { - dev_err(info->dev, "failed to read update reg(%d)\n", ret); - return ret; - } - - data |= info->regs->write_time_udr_mask; - - ret = regmap_write(info->regmap, info->regs->udr_update, data); + ret = regmap_set_bits(info->regmap, info->regs->udr_update, + info->regs->write_time_udr_mask); if (ret < 0) { dev_err(info->dev, "failed to write update reg(%d)\n", ret); return ret; @@ -286,20 +295,14 @@ static int s5m8767_rtc_set_time_reg(struct s5m_rtc_info *info) static int s5m8767_rtc_set_alarm_reg(struct s5m_rtc_info *info) { int ret; - unsigned int data; + unsigned int udr_mask; - ret = regmap_read(info->regmap, info->regs->udr_update, &data); - if (ret < 0) { - dev_err(info->dev, "%s: fail to read update reg(%d)\n", - __func__, ret); - return ret; - } - - data |= info->regs->write_alarm_udr_mask; + udr_mask = info->regs->write_alarm_udr_mask; switch (info->device_type) { case S5M8767X: - data &= ~S5M_RTC_TIME_EN_MASK; + udr_mask |= S5M_RTC_TIME_EN_MASK; break; + case S2MPG10: case S2MPS15X: case S2MPS14X: case S2MPS13X: @@ -309,7 +312,8 @@ static int s5m8767_rtc_set_alarm_reg(struct s5m_rtc_info *info) return -EINVAL; } - ret = regmap_write(info->regmap, info->regs->udr_update, data); + ret = regmap_update_bits(info->regmap, info->regs->udr_update, + udr_mask, info->regs->write_alarm_udr_mask); if (ret < 0) { dev_err(info->dev, "%s: fail to write update reg(%d)\n", __func__, ret); @@ -320,8 +324,8 @@ static int s5m8767_rtc_set_alarm_reg(struct s5m_rtc_info *info) /* On S2MPS13 the AUDR is not auto-cleared */ if (info->device_type == S2MPS13X) - regmap_update_bits(info->regmap, info->regs->udr_update, - S2MPS13_RTC_AUDR_MASK, 0); + regmap_clear_bits(info->regmap, info->regs->udr_update, + S2MPS13_RTC_AUDR_MASK); return ret; } @@ -333,10 +337,8 @@ static int s5m_rtc_read_time(struct device *dev, struct rtc_time *tm) int ret; if (info->regs->read_time_udr_mask) { - ret = regmap_update_bits(info->regmap, - info->regs->udr_update, - info->regs->read_time_udr_mask, - info->regs->read_time_udr_mask); + ret = regmap_set_bits(info->regmap, info->regs->udr_update, + info->regs->read_time_udr_mask); if (ret) { dev_err(dev, "Failed to prepare registers for time reading: %d\n", @@ -351,6 +353,7 @@ static int s5m_rtc_read_time(struct device *dev, struct rtc_time *tm) switch (info->device_type) { case S5M8767X: + case S2MPG10: case S2MPS15X: case S2MPS14X: case S2MPS13X: @@ -374,6 +377,7 @@ static int s5m_rtc_set_time(struct device *dev, struct rtc_time *tm) switch (info->device_type) { case S5M8767X: + case S2MPG10: case S2MPS15X: case S2MPS14X: case S2MPS13X: @@ -411,6 +415,7 @@ static int s5m_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) switch (info->device_type) { case S5M8767X: + case S2MPG10: case S2MPS15X: case S2MPS14X: case S2MPS13X: @@ -430,7 +435,7 @@ static int s5m_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) dev_dbg(dev, "%s: %ptR(%d)\n", __func__, &alrm->time, alrm->time.tm_wday); - return s5m_check_peding_alarm_interrupt(info, alrm); + return s5m_check_pending_alarm_interrupt(info, alrm); } static int s5m_rtc_stop_alarm(struct s5m_rtc_info *info) @@ -449,6 +454,7 @@ static int s5m_rtc_stop_alarm(struct s5m_rtc_info *info) switch (info->device_type) { case S5M8767X: + case S2MPG10: case S2MPS15X: case S2MPS14X: case S2MPS13X: @@ -487,6 +493,7 @@ static int s5m_rtc_start_alarm(struct s5m_rtc_info *info) switch (info->device_type) { case S5M8767X: + case S2MPG10: case S2MPS15X: case S2MPS14X: case S2MPS13X: @@ -524,6 +531,7 @@ static int s5m_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) switch (info->device_type) { case S5M8767X: + case S2MPG10: case S2MPS15X: case S2MPS14X: case S2MPS13X: @@ -604,6 +612,7 @@ static int s5m8767_rtc_init_reg(struct s5m_rtc_info *info) ret = regmap_raw_write(info->regmap, S5M_ALARM0_CONF, data, 2); break; + case S2MPG10: case S2MPS15X: case S2MPS14X: case S2MPS13X: @@ -634,59 +643,92 @@ static int s5m8767_rtc_init_reg(struct s5m_rtc_info *info) return ret; } +static int s5m_rtc_restart_s2mpg10(struct sys_off_data *data) +{ + struct s5m_rtc_info *info = data->cb_data; + int ret; + + if (data->mode != REBOOT_COLD && data->mode != REBOOT_HARD) + return NOTIFY_DONE; + + /* + * Arm watchdog with maximum timeout (2 seconds), and perform full reset + * on expiry. + */ + ret = regmap_set_bits(info->regmap, S2MPG10_RTC_WTSR, + (S2MPG10_WTSR_COLDTIMER | S2MPG10_WTSR_COLDRST + | S2MPG10_WTSR_WTSRT | S2MPG10_WTSR_WTSR_EN)); + + return ret ? NOTIFY_BAD : NOTIFY_DONE; +} + static int s5m_rtc_probe(struct platform_device *pdev) { struct sec_pmic_dev *s5m87xx = dev_get_drvdata(pdev->dev.parent); + enum sec_device_type device_type = + platform_get_device_id(pdev)->driver_data; struct s5m_rtc_info *info; - struct i2c_client *i2c; - const struct regmap_config *regmap_cfg; int ret, alarm_irq; info = devm_kzalloc(&pdev->dev, sizeof(*info), GFP_KERNEL); if (!info) return -ENOMEM; - switch (platform_get_device_id(pdev)->driver_data) { - case S2MPS15X: - regmap_cfg = &s2mps14_rtc_regmap_config; - info->regs = &s2mps15_rtc_regs; - alarm_irq = S2MPS14_IRQ_RTCA0; - break; - case S2MPS14X: - regmap_cfg = &s2mps14_rtc_regmap_config; - info->regs = &s2mps14_rtc_regs; - alarm_irq = S2MPS14_IRQ_RTCA0; - break; - case S2MPS13X: - regmap_cfg = &s2mps14_rtc_regmap_config; - info->regs = &s2mps13_rtc_regs; - alarm_irq = S2MPS14_IRQ_RTCA0; - break; - case S5M8767X: - regmap_cfg = &s5m_rtc_regmap_config; - info->regs = &s5m_rtc_regs; - alarm_irq = S5M8767_IRQ_RTCA1; - break; - default: - return dev_err_probe(&pdev->dev, -ENODEV, - "Device type %lu is not supported by RTC driver\n", - platform_get_device_id(pdev)->driver_data); - } + info->regmap = dev_get_regmap(pdev->dev.parent, "rtc"); + if (!info->regmap) { + const struct regmap_config *regmap_cfg; + struct i2c_client *i2c; - i2c = devm_i2c_new_dummy_device(&pdev->dev, s5m87xx->i2c->adapter, - RTC_I2C_ADDR); - if (IS_ERR(i2c)) - return dev_err_probe(&pdev->dev, PTR_ERR(i2c), - "Failed to allocate I2C for RTC\n"); + switch (device_type) { + case S2MPS15X: + regmap_cfg = &s2mps14_rtc_regmap_config; + info->regs = &s2mps15_rtc_regs; + alarm_irq = S2MPS14_IRQ_RTCA0; + break; + case S2MPS14X: + regmap_cfg = &s2mps14_rtc_regmap_config; + info->regs = &s2mps14_rtc_regs; + alarm_irq = S2MPS14_IRQ_RTCA0; + break; + case S2MPS13X: + regmap_cfg = &s2mps14_rtc_regmap_config; + info->regs = &s2mps13_rtc_regs; + alarm_irq = S2MPS14_IRQ_RTCA0; + break; + case S5M8767X: + regmap_cfg = &s5m_rtc_regmap_config; + info->regs = &s5m_rtc_regs; + alarm_irq = S5M8767_IRQ_RTCA1; + break; + default: + return dev_err_probe(&pdev->dev, -ENODEV, + "Unsupported device type %d\n", + device_type); + } - info->regmap = devm_regmap_init_i2c(i2c, regmap_cfg); - if (IS_ERR(info->regmap)) - return dev_err_probe(&pdev->dev, PTR_ERR(info->regmap), - "Failed to allocate RTC register map\n"); + i2c = devm_i2c_new_dummy_device(&pdev->dev, + s5m87xx->i2c->adapter, + RTC_I2C_ADDR); + if (IS_ERR(i2c)) + return dev_err_probe(&pdev->dev, PTR_ERR(i2c), + "Failed to allocate I2C\n"); + + info->regmap = devm_regmap_init_i2c(i2c, regmap_cfg); + if (IS_ERR(info->regmap)) + return dev_err_probe(&pdev->dev, PTR_ERR(info->regmap), + "Failed to allocate regmap\n"); + } else if (device_type == S2MPG10) { + info->regs = &s2mpg10_rtc_regs; + alarm_irq = S2MPG10_IRQ_RTCA0; + } else { + return dev_err_probe(&pdev->dev, -ENODEV, + "Unsupported device type %d\n", + device_type); + } info->dev = &pdev->dev; info->s5m87xx = s5m87xx; - info->device_type = platform_get_device_id(pdev)->driver_data; + info->device_type = device_type; if (s5m87xx->irq_data) { info->irq = regmap_irq_get_virq(s5m87xx->irq_data, alarm_irq); @@ -721,7 +763,23 @@ static int s5m_rtc_probe(struct platform_device *pdev) return dev_err_probe(&pdev->dev, ret, "Failed to request alarm IRQ %d\n", info->irq); - device_init_wakeup(&pdev->dev, true); + + ret = devm_device_init_wakeup(&pdev->dev); + if (ret < 0) + return dev_err_probe(&pdev->dev, ret, + "Failed to init wakeup\n"); + } + + if (of_device_is_system_power_controller(pdev->dev.parent->of_node) && + info->device_type == S2MPG10) { + ret = devm_register_sys_off_handler(&pdev->dev, + SYS_OFF_MODE_RESTART, + SYS_OFF_PRIO_HIGH + 1, + s5m_rtc_restart_s2mpg10, + info); + if (ret) + return dev_err_probe(&pdev->dev, ret, + "Failed to register restart handler\n"); } return devm_rtc_register_device(info->rtc_dev); @@ -755,6 +813,7 @@ static SIMPLE_DEV_PM_OPS(s5m_rtc_pm_ops, s5m_rtc_suspend, s5m_rtc_resume); static const struct platform_device_id s5m_rtc_id[] = { { "s5m-rtc", S5M8767X }, + { "s2mpg10-rtc", S2MPG10 }, { "s2mps13-rtc", S2MPS13X }, { "s2mps14-rtc", S2MPS14X }, { "s2mps15-rtc", S2MPS15X }, diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c index cef60770f68b..b3fcdcae379e 100644 --- a/drivers/s390/crypto/pkey_api.c +++ b/drivers/s390/crypto/pkey_api.c @@ -86,7 +86,7 @@ static void *_copy_apqns_from_user(void __user *uapqns, size_t nr_apqns) if (!uapqns || nr_apqns == 0) return NULL; - return memdup_user(uapqns, nr_apqns * sizeof(struct pkey_apqn)); + return memdup_array_user(uapqns, nr_apqns, sizeof(struct pkey_apqn)); } static int pkey_ioctl_genseck(struct pkey_genseck __user *ugs) diff --git a/drivers/scsi/fnic/fdls_disc.c b/drivers/scsi/fnic/fdls_disc.c index f8ab69c51dab..ae37f85f618b 100644 --- a/drivers/scsi/fnic/fdls_disc.c +++ b/drivers/scsi/fnic/fdls_disc.c @@ -763,50 +763,86 @@ static void fdls_send_fabric_abts(struct fnic_iport_s *iport) iport->fabric.timer_pending = 1; } -static void fdls_send_fdmi_abts(struct fnic_iport_s *iport) +static uint8_t *fdls_alloc_init_fdmi_abts_frame(struct fnic_iport_s *iport, + uint16_t oxid) { - uint8_t *frame; + struct fc_frame_header *pfdmi_abts; uint8_t d_id[3]; + uint8_t *frame; struct fnic *fnic = iport->fnic; - struct fc_frame_header *pfabric_abts; - unsigned long fdmi_tov; - uint16_t oxid; - uint16_t frame_size = FNIC_ETH_FCOE_HDRS_OFFSET + - sizeof(struct fc_frame_header); frame = fdls_alloc_frame(iport); if (frame == NULL) { FNIC_FCS_DBG(KERN_ERR, fnic->host, fnic->fnic_num, "Failed to allocate frame to send FDMI ABTS"); - return; + return NULL; } - pfabric_abts = (struct fc_frame_header *) (frame + FNIC_ETH_FCOE_HDRS_OFFSET); + pfdmi_abts = (struct fc_frame_header *) (frame + FNIC_ETH_FCOE_HDRS_OFFSET); fdls_init_fabric_abts_frame(frame, iport); hton24(d_id, FC_FID_MGMT_SERV); - FNIC_STD_SET_D_ID(*pfabric_abts, d_id); + FNIC_STD_SET_D_ID(*pfdmi_abts, d_id); + FNIC_STD_SET_OX_ID(*pfdmi_abts, oxid); + + return frame; +} + +static void fdls_send_fdmi_abts(struct fnic_iport_s *iport) +{ + uint8_t *frame; + struct fnic *fnic = iport->fnic; + unsigned long fdmi_tov; + uint16_t frame_size = FNIC_ETH_FCOE_HDRS_OFFSET + + sizeof(struct fc_frame_header); if (iport->fabric.fdmi_pending & FDLS_FDMI_PLOGI_PENDING) { - oxid = iport->active_oxid_fdmi_plogi; - FNIC_STD_SET_OX_ID(*pfabric_abts, oxid); + frame = fdls_alloc_init_fdmi_abts_frame(iport, + iport->active_oxid_fdmi_plogi); + if (frame == NULL) + return; + + FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, + "0x%x: FDLS send FDMI PLOGI abts. iport->fabric.state: %d oxid: 0x%x", + iport->fcid, iport->fabric.state, iport->active_oxid_fdmi_plogi); fnic_send_fcoe_frame(iport, frame, frame_size); } else { if (iport->fabric.fdmi_pending & FDLS_FDMI_REG_HBA_PENDING) { - oxid = iport->active_oxid_fdmi_rhba; - FNIC_STD_SET_OX_ID(*pfabric_abts, oxid); + frame = fdls_alloc_init_fdmi_abts_frame(iport, + iport->active_oxid_fdmi_rhba); + if (frame == NULL) + return; + + FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, + "0x%x: FDLS send FDMI RHBA abts. iport->fabric.state: %d oxid: 0x%x", + iport->fcid, iport->fabric.state, iport->active_oxid_fdmi_rhba); fnic_send_fcoe_frame(iport, frame, frame_size); } if (iport->fabric.fdmi_pending & FDLS_FDMI_RPA_PENDING) { - oxid = iport->active_oxid_fdmi_rpa; - FNIC_STD_SET_OX_ID(*pfabric_abts, oxid); + frame = fdls_alloc_init_fdmi_abts_frame(iport, + iport->active_oxid_fdmi_rpa); + if (frame == NULL) { + if (iport->fabric.fdmi_pending & FDLS_FDMI_REG_HBA_PENDING) + goto arm_timer; + else + return; + } + + FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, + "0x%x: FDLS send FDMI RPA abts. iport->fabric.state: %d oxid: 0x%x", + iport->fcid, iport->fabric.state, iport->active_oxid_fdmi_rpa); fnic_send_fcoe_frame(iport, frame, frame_size); } } +arm_timer: fdmi_tov = jiffies + msecs_to_jiffies(2 * iport->e_d_tov); mod_timer(&iport->fabric.fdmi_timer, round_jiffies(fdmi_tov)); iport->fabric.fdmi_pending |= FDLS_FDMI_ABORT_PENDING; + + FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, + "0x%x: iport->fabric.fdmi_pending: 0x%x", + iport->fcid, iport->fabric.fdmi_pending); } static void fdls_send_fabric_flogi(struct fnic_iport_s *iport) @@ -2245,6 +2281,21 @@ void fdls_fabric_timer_callback(struct timer_list *t) spin_unlock_irqrestore(&fnic->fnic_lock, flags); } +void fdls_fdmi_retry_plogi(struct fnic_iport_s *iport) +{ + struct fnic *fnic = iport->fnic; + + iport->fabric.fdmi_pending = 0; + /* If max retries not exhausted, start over from fdmi plogi */ + if (iport->fabric.fdmi_retry < FDLS_FDMI_MAX_RETRY) { + iport->fabric.fdmi_retry++; + FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, + "Retry FDMI PLOGI. FDMI retry: %d", + iport->fabric.fdmi_retry); + fdls_send_fdmi_plogi(iport); + } +} + void fdls_fdmi_timer_callback(struct timer_list *t) { struct fnic_fdls_fabric_s *fabric = timer_container_of(fabric, t, @@ -2257,7 +2308,7 @@ void fdls_fdmi_timer_callback(struct timer_list *t) spin_lock_irqsave(&fnic->fnic_lock, flags); FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, - "fdmi timer callback : 0x%x\n", iport->fabric.fdmi_pending); + "iport->fabric.fdmi_pending: 0x%x\n", iport->fabric.fdmi_pending); if (!iport->fabric.fdmi_pending) { /* timer expired after fdmi responses received. */ @@ -2265,7 +2316,7 @@ void fdls_fdmi_timer_callback(struct timer_list *t) return; } FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, - "fdmi timer callback : 0x%x\n", iport->fabric.fdmi_pending); + "iport->fabric.fdmi_pending: 0x%x\n", iport->fabric.fdmi_pending); /* if not abort pending, send an abort */ if (!(iport->fabric.fdmi_pending & FDLS_FDMI_ABORT_PENDING)) { @@ -2274,33 +2325,37 @@ void fdls_fdmi_timer_callback(struct timer_list *t) return; } FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, - "fdmi timer callback : 0x%x\n", iport->fabric.fdmi_pending); + "iport->fabric.fdmi_pending: 0x%x\n", iport->fabric.fdmi_pending); /* ABTS pending for an active fdmi request that is pending. * That means FDMI ABTS timed out * Schedule to free the OXID after 2*r_a_tov and proceed */ if (iport->fabric.fdmi_pending & FDLS_FDMI_PLOGI_PENDING) { + FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, + "FDMI PLOGI ABTS timed out. Schedule oxid free: 0x%x\n", + iport->active_oxid_fdmi_plogi); fdls_schedule_oxid_free(iport, &iport->active_oxid_fdmi_plogi); } else { - if (iport->fabric.fdmi_pending & FDLS_FDMI_REG_HBA_PENDING) + if (iport->fabric.fdmi_pending & FDLS_FDMI_REG_HBA_PENDING) { + FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, + "FDMI RHBA ABTS timed out. Schedule oxid free: 0x%x\n", + iport->active_oxid_fdmi_rhba); fdls_schedule_oxid_free(iport, &iport->active_oxid_fdmi_rhba); - if (iport->fabric.fdmi_pending & FDLS_FDMI_RPA_PENDING) + } + if (iport->fabric.fdmi_pending & FDLS_FDMI_RPA_PENDING) { + FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, + "FDMI RPA ABTS timed out. Schedule oxid free: 0x%x\n", + iport->active_oxid_fdmi_rpa); fdls_schedule_oxid_free(iport, &iport->active_oxid_fdmi_rpa); + } } FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, - "fdmi timer callback : 0x%x\n", iport->fabric.fdmi_pending); + "iport->fabric.fdmi_pending: 0x%x\n", iport->fabric.fdmi_pending); - iport->fabric.fdmi_pending = 0; - /* If max retries not exhaused, start over from fdmi plogi */ - if (iport->fabric.fdmi_retry < FDLS_FDMI_MAX_RETRY) { - iport->fabric.fdmi_retry++; - FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, - "retry fdmi timer %d", iport->fabric.fdmi_retry); - fdls_send_fdmi_plogi(iport); - } + fdls_fdmi_retry_plogi(iport); FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, - "fdmi timer callback : 0x%x\n", iport->fabric.fdmi_pending); + "iport->fabric.fdmi_pending: 0x%x\n", iport->fabric.fdmi_pending); spin_unlock_irqrestore(&fnic->fnic_lock, flags); } @@ -3715,13 +3770,60 @@ static void fdls_process_fdmi_abts_rsp(struct fnic_iport_s *iport, switch (FNIC_FRAME_TYPE(oxid)) { case FNIC_FRAME_TYPE_FDMI_PLOGI: + FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, + "Received FDMI PLOGI ABTS rsp with oxid: 0x%x", oxid); + FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, + "0x%x: iport->fabric.fdmi_pending: 0x%x", + iport->fcid, iport->fabric.fdmi_pending); fdls_free_oxid(iport, oxid, &iport->active_oxid_fdmi_plogi); + + iport->fabric.fdmi_pending &= ~FDLS_FDMI_PLOGI_PENDING; + iport->fabric.fdmi_pending &= ~FDLS_FDMI_ABORT_PENDING; + FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, + "0x%x: iport->fabric.fdmi_pending: 0x%x", + iport->fcid, iport->fabric.fdmi_pending); break; case FNIC_FRAME_TYPE_FDMI_RHBA: + FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, + "Received FDMI RHBA ABTS rsp with oxid: 0x%x", oxid); + FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, + "0x%x: iport->fabric.fdmi_pending: 0x%x", + iport->fcid, iport->fabric.fdmi_pending); + + iport->fabric.fdmi_pending &= ~FDLS_FDMI_REG_HBA_PENDING; + + /* If RPA is still pending, don't turn off ABORT PENDING. + * We count on the timer to detect the ABTS timeout and take + * corrective action. + */ + if (!(iport->fabric.fdmi_pending & FDLS_FDMI_RPA_PENDING)) + iport->fabric.fdmi_pending &= ~FDLS_FDMI_ABORT_PENDING; + fdls_free_oxid(iport, oxid, &iport->active_oxid_fdmi_rhba); + FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, + "0x%x: iport->fabric.fdmi_pending: 0x%x", + iport->fcid, iport->fabric.fdmi_pending); break; case FNIC_FRAME_TYPE_FDMI_RPA: + FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, + "Received FDMI RPA ABTS rsp with oxid: 0x%x", oxid); + FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, + "0x%x: iport->fabric.fdmi_pending: 0x%x", + iport->fcid, iport->fabric.fdmi_pending); + + iport->fabric.fdmi_pending &= ~FDLS_FDMI_RPA_PENDING; + + /* If RHBA is still pending, don't turn off ABORT PENDING. + * We count on the timer to detect the ABTS timeout and take + * corrective action. + */ + if (!(iport->fabric.fdmi_pending & FDLS_FDMI_REG_HBA_PENDING)) + iport->fabric.fdmi_pending &= ~FDLS_FDMI_ABORT_PENDING; + fdls_free_oxid(iport, oxid, &iport->active_oxid_fdmi_rpa); + FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, + "0x%x: iport->fabric.fdmi_pending: 0x%x", + iport->fcid, iport->fabric.fdmi_pending); break; default: FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, @@ -3730,10 +3832,16 @@ static void fdls_process_fdmi_abts_rsp(struct fnic_iport_s *iport, break; } - timer_delete_sync(&iport->fabric.fdmi_timer); - iport->fabric.fdmi_pending &= ~FDLS_FDMI_ABORT_PENDING; - - fdls_send_fdmi_plogi(iport); + /* + * Only if ABORT PENDING is off, delete the timer, and if no other + * operations are pending, retry FDMI. + * Otherwise, let the timer pop and take the appropriate action. + */ + if (!(iport->fabric.fdmi_pending & FDLS_FDMI_ABORT_PENDING)) { + timer_delete_sync(&iport->fabric.fdmi_timer); + if (!iport->fabric.fdmi_pending) + fdls_fdmi_retry_plogi(iport); + } } static void @@ -4972,9 +5080,12 @@ void fnic_fdls_link_down(struct fnic_iport_s *iport) fdls_delete_tport(iport, tport); } - if ((fnic_fdmi_support == 1) && (iport->fabric.fdmi_pending > 0)) { - timer_delete_sync(&iport->fabric.fdmi_timer); - iport->fabric.fdmi_pending = 0; + if (fnic_fdmi_support == 1) { + if (iport->fabric.fdmi_pending > 0) { + timer_delete_sync(&iport->fabric.fdmi_timer); + iport->fabric.fdmi_pending = 0; + } + iport->flags &= ~FNIC_FDMI_ACTIVE; } FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, diff --git a/drivers/scsi/fnic/fnic.h b/drivers/scsi/fnic/fnic.h index 6c5f6046b1f5..c2fdc6553e62 100644 --- a/drivers/scsi/fnic/fnic.h +++ b/drivers/scsi/fnic/fnic.h @@ -30,7 +30,7 @@ #define DRV_NAME "fnic" #define DRV_DESCRIPTION "Cisco FCoE HBA Driver" -#define DRV_VERSION "1.8.0.0" +#define DRV_VERSION "1.8.0.2" #define PFX DRV_NAME ": " #define DFX DRV_NAME "%d: " diff --git a/drivers/scsi/fnic/fnic_fcs.c b/drivers/scsi/fnic/fnic_fcs.c index 1e8cd64f9a5c..103ab6f1f7cd 100644 --- a/drivers/scsi/fnic/fnic_fcs.c +++ b/drivers/scsi/fnic/fnic_fcs.c @@ -636,6 +636,8 @@ static int fnic_send_frame(struct fnic *fnic, void *frame, int frame_len) unsigned long flags; pa = dma_map_single(&fnic->pdev->dev, frame, frame_len, DMA_TO_DEVICE); + if (dma_mapping_error(&fnic->pdev->dev, pa)) + return -ENOMEM; if ((fnic_fc_trace_set_data(fnic->fnic_num, FNIC_FC_SEND | 0x80, (char *) frame, diff --git a/drivers/scsi/fnic/fnic_fdls.h b/drivers/scsi/fnic/fnic_fdls.h index 8e610b65ad57..531d0b37e450 100644 --- a/drivers/scsi/fnic/fnic_fdls.h +++ b/drivers/scsi/fnic/fnic_fdls.h @@ -394,6 +394,7 @@ void fdls_send_tport_abts(struct fnic_iport_s *iport, bool fdls_delete_tport(struct fnic_iport_s *iport, struct fnic_tport_s *tport); void fdls_fdmi_timer_callback(struct timer_list *t); +void fdls_fdmi_retry_plogi(struct fnic_iport_s *iport); /* fnic_fcs.c */ void fnic_fdls_init(struct fnic *fnic, int usefip); diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c index 7133b254cbe4..75b29a018d1f 100644 --- a/drivers/scsi/fnic/fnic_scsi.c +++ b/drivers/scsi/fnic/fnic_scsi.c @@ -1046,7 +1046,7 @@ static void fnic_fcpio_icmnd_cmpl_handler(struct fnic *fnic, unsigned int cq_ind if (icmnd_cmpl->scsi_status == SAM_STAT_TASK_SET_FULL) atomic64_inc(&fnic_stats->misc_stats.queue_fulls); - FNIC_SCSI_DBG(KERN_INFO, fnic->host, fnic->fnic_num, + FNIC_SCSI_DBG(KERN_DEBUG, fnic->host, fnic->fnic_num, "xfer_len: %llu", xfer_len); break; diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c index e021f1106bea..cc5d05dc395c 100644 --- a/drivers/scsi/hosts.c +++ b/drivers/scsi/hosts.c @@ -473,10 +473,17 @@ struct Scsi_Host *scsi_host_alloc(const struct scsi_host_template *sht, int priv else shost->max_sectors = SCSI_DEFAULT_MAX_SECTORS; - if (sht->max_segment_size) - shost->max_segment_size = sht->max_segment_size; - else - shost->max_segment_size = BLK_MAX_SEGMENT_SIZE; + shost->virt_boundary_mask = sht->virt_boundary_mask; + if (shost->virt_boundary_mask) { + WARN_ON_ONCE(sht->max_segment_size && + sht->max_segment_size != UINT_MAX); + shost->max_segment_size = UINT_MAX; + } else { + if (sht->max_segment_size) + shost->max_segment_size = sht->max_segment_size; + else + shost->max_segment_size = BLK_MAX_SEGMENT_SIZE; + } /* 32-byte (dword) is a common minimum for HBAs. */ if (sht->dma_alignment) @@ -492,9 +499,6 @@ struct Scsi_Host *scsi_host_alloc(const struct scsi_host_template *sht, int priv else shost->dma_boundary = 0xffffffff; - if (sht->virt_boundary_mask) - shost->virt_boundary_mask = sht->virt_boundary_mask; - device_initialize(&shost->shost_gendev); dev_set_name(&shost->shost_gendev, "host%d", shost->host_no); shost->shost_gendev.bus = &scsi_bus_type; diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 3aac0e17cb00..9179f8aee964 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -5910,7 +5910,11 @@ megasas_set_high_iops_queue_affinity_and_hint(struct megasas_instance *instance) const struct cpumask *mask; if (instance->perf_mode == MR_BALANCED_PERF_MODE) { - mask = cpumask_of_node(dev_to_node(&instance->pdev->dev)); + int nid = dev_to_node(&instance->pdev->dev); + + if (nid == NUMA_NO_NODE) + nid = 0; + mask = cpumask_of_node(nid); for (i = 0; i < instance->low_latency_index_start; i++) { irq = pci_irq_vector(instance->pdev, i); diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index 0cd6f3e14882..13b6cb1b93ac 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@ -2147,7 +2147,7 @@ qla24xx_get_port_database(scsi_qla_host_t *vha, u16 nport_handle, pdb_dma = dma_map_single(&vha->hw->pdev->dev, pdb, sizeof(*pdb), DMA_FROM_DEVICE); - if (!pdb_dma) { + if (dma_mapping_error(&vha->hw->pdev->dev, pdb_dma)) { ql_log(ql_log_warn, vha, 0x1116, "Failed to map dma buffer.\n"); return QLA_MEMORY_ALLOC_FAILED; } diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c index d4141656b204..a39f1da4ce47 100644 --- a/drivers/scsi/qla4xxx/ql4_os.c +++ b/drivers/scsi/qla4xxx/ql4_os.c @@ -3420,6 +3420,8 @@ static int qla4xxx_alloc_pdu(struct iscsi_task *task, uint8_t opcode) task_data->data_dma = dma_map_single(&ha->pdev->dev, task->data, task->data_count, DMA_TO_DEVICE); + if (dma_mapping_error(&ha->pdev->dev, task_data->data_dma)) + return -ENOMEM; } DEBUG2(ql4_printk(KERN_INFO, ha, "%s: MaxRecvLen %u, iscsi hrd %d\n", diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 3f6e87705b62..eeaa6af294b8 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3384,7 +3384,7 @@ static void sd_read_block_limits_ext(struct scsi_disk *sdkp) rcu_read_lock(); vpd = rcu_dereference(sdkp->device->vpd_pgb7); - if (vpd && vpd->len >= 2) + if (vpd && vpd->len >= 6) sdkp->rscs = vpd->data[5] & 1; rcu_read_unlock(); } diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c index fe0f122f07b0..d3c78f59b22c 100644 --- a/drivers/spi/spi-cadence-quadspi.c +++ b/drivers/spi/spi-cadence-quadspi.c @@ -1958,12 +1958,7 @@ static int cqspi_probe(struct platform_device *pdev) goto probe_setup_failed; } - ret = devm_pm_runtime_enable(dev); - if (ret) { - if (cqspi->rx_chan) - dma_release_channel(cqspi->rx_chan); - goto probe_setup_failed; - } + pm_runtime_enable(dev); pm_runtime_set_autosuspend_delay(dev, CQSPI_AUTOSUSPEND_TIMEOUT); pm_runtime_use_autosuspend(dev); @@ -1981,6 +1976,7 @@ static int cqspi_probe(struct platform_device *pdev) return 0; probe_setup_failed: cqspi_controller_enable(cqspi, 0); + pm_runtime_disable(dev); probe_reset_failed: if (cqspi->is_jh7110) cqspi_jh7110_disable_clk(pdev, cqspi); @@ -1999,7 +1995,8 @@ static void cqspi_remove(struct platform_device *pdev) if (cqspi->rx_chan) dma_release_channel(cqspi->rx_chan); - clk_disable_unprepare(cqspi->clk); + if (pm_runtime_get_sync(&pdev->dev) >= 0) + clk_disable(cqspi->clk); if (cqspi->is_jh7110) cqspi_jh7110_disable_clk(pdev, cqspi); diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c index 863781ba6c16..0dcd49114095 100644 --- a/drivers/spi/spi-fsl-dspi.c +++ b/drivers/spi/spi-fsl-dspi.c @@ -983,11 +983,20 @@ static int dspi_transfer_one_message(struct spi_controller *ctlr, if (dspi->devtype_data->trans_mode == DSPI_DMA_MODE) { status = dspi_dma_xfer(dspi); } else { + /* + * Reinitialize the completion before transferring data + * to avoid the case where it might remain in the done + * state due to a spurious interrupt from a previous + * transfer. This could falsely signal that the current + * transfer has completed. + */ + if (dspi->irq) + reinit_completion(&dspi->xfer_done); + dspi_fifo_write(dspi); if (dspi->irq) { wait_for_completion(&dspi->xfer_done); - reinit_completion(&dspi->xfer_done); } else { do { status = dspi_poll(dspi); diff --git a/drivers/spi/spi-qpic-snand.c b/drivers/spi/spi-qpic-snand.c index 77d9cc65477a..f2e1a27b410d 100644 --- a/drivers/spi/spi-qpic-snand.c +++ b/drivers/spi/spi-qpic-snand.c @@ -315,6 +315,22 @@ static int qcom_spi_ecc_init_ctx_pipelined(struct nand_device *nand) mtd_set_ooblayout(mtd, &qcom_spi_ooblayout); + /* + * Free the temporary BAM transaction allocated initially by + * qcom_nandc_alloc(), and allocate a new one based on the + * updated max_cwperpage value. + */ + qcom_free_bam_transaction(snandc); + + snandc->max_cwperpage = cwperpage; + + snandc->bam_txn = qcom_alloc_bam_transaction(snandc); + if (!snandc->bam_txn) { + dev_err(snandc->dev, "failed to allocate BAM transaction\n"); + ret = -ENOMEM; + goto err_free_ecc_cfg; + } + ecc_cfg->cfg0 = FIELD_PREP(CW_PER_PAGE_MASK, (cwperpage - 1)) | FIELD_PREP(UD_SIZE_BYTES_MASK, ecc_cfg->cw_data) | FIELD_PREP(DISABLE_STATUS_AFTER_WRITE, 1) | diff --git a/drivers/staging/rtl8723bs/core/rtw_security.c b/drivers/staging/rtl8723bs/core/rtw_security.c index 1e9eff01b1aa..e9f382c280d9 100644 --- a/drivers/staging/rtl8723bs/core/rtw_security.c +++ b/drivers/staging/rtl8723bs/core/rtw_security.c @@ -868,29 +868,21 @@ static signed int aes_cipher(u8 *key, uint hdrlen, num_blocks, payload_index; u8 pn_vector[6]; - u8 mic_iv[16]; - u8 mic_header1[16]; - u8 mic_header2[16]; - u8 ctr_preload[16]; + u8 mic_iv[16] = {}; + u8 mic_header1[16] = {}; + u8 mic_header2[16] = {}; + u8 ctr_preload[16] = {}; /* Intermediate Buffers */ - u8 chain_buffer[16]; - u8 aes_out[16]; - u8 padded_buffer[16]; + u8 chain_buffer[16] = {}; + u8 aes_out[16] = {}; + u8 padded_buffer[16] = {}; u8 mic[8]; uint frtype = GetFrameType(pframe); uint frsubtype = GetFrameSubType(pframe); frsubtype = frsubtype>>4; - memset((void *)mic_iv, 0, 16); - memset((void *)mic_header1, 0, 16); - memset((void *)mic_header2, 0, 16); - memset((void *)ctr_preload, 0, 16); - memset((void *)chain_buffer, 0, 16); - memset((void *)aes_out, 0, 16); - memset((void *)padded_buffer, 0, 16); - if ((hdrlen == WLAN_HDR_A3_LEN) || (hdrlen == WLAN_HDR_A3_QOS_LEN)) a4_exists = 0; else @@ -1080,15 +1072,15 @@ static signed int aes_decipher(u8 *key, uint hdrlen, num_blocks, payload_index; signed int res = _SUCCESS; u8 pn_vector[6]; - u8 mic_iv[16]; - u8 mic_header1[16]; - u8 mic_header2[16]; - u8 ctr_preload[16]; + u8 mic_iv[16] = {}; + u8 mic_header1[16] = {}; + u8 mic_header2[16] = {}; + u8 ctr_preload[16] = {}; /* Intermediate Buffers */ - u8 chain_buffer[16]; - u8 aes_out[16]; - u8 padded_buffer[16]; + u8 chain_buffer[16] = {}; + u8 aes_out[16] = {}; + u8 padded_buffer[16] = {}; u8 mic[8]; uint frtype = GetFrameType(pframe); @@ -1096,14 +1088,6 @@ static signed int aes_decipher(u8 *key, uint hdrlen, frsubtype = frsubtype>>4; - memset((void *)mic_iv, 0, 16); - memset((void *)mic_header1, 0, 16); - memset((void *)mic_header2, 0, 16); - memset((void *)ctr_preload, 0, 16); - memset((void *)chain_buffer, 0, 16); - memset((void *)aes_out, 0, 16); - memset((void *)padded_buffer, 0, 16); - /* start to decrypt the payload */ num_blocks = (plen-8) / 16; /* plen including LLC, payload_length and mic) */ diff --git a/drivers/tee/optee/ffa_abi.c b/drivers/tee/optee/ffa_abi.c index f3af5666bb11..f9ef7d94cebd 100644 --- a/drivers/tee/optee/ffa_abi.c +++ b/drivers/tee/optee/ffa_abi.c @@ -728,12 +728,21 @@ static bool optee_ffa_exchange_caps(struct ffa_device *ffa_dev, return true; } +static void notif_work_fn(struct work_struct *work) +{ + struct optee_ffa *optee_ffa = container_of(work, struct optee_ffa, + notif_work); + struct optee *optee = container_of(optee_ffa, struct optee, ffa); + + optee_do_bottom_half(optee->ctx); +} + static void notif_callback(int notify_id, void *cb_data) { struct optee *optee = cb_data; if (notify_id == optee->ffa.bottom_half_value) - optee_do_bottom_half(optee->ctx); + queue_work(optee->ffa.notif_wq, &optee->ffa.notif_work); else optee_notif_send(optee, notify_id); } @@ -817,9 +826,11 @@ static void optee_ffa_remove(struct ffa_device *ffa_dev) struct optee *optee = ffa_dev_get_drvdata(ffa_dev); u32 bottom_half_id = optee->ffa.bottom_half_value; - if (bottom_half_id != U32_MAX) + if (bottom_half_id != U32_MAX) { ffa_dev->ops->notifier_ops->notify_relinquish(ffa_dev, bottom_half_id); + destroy_workqueue(optee->ffa.notif_wq); + } optee_remove_common(optee); mutex_destroy(&optee->ffa.mutex); @@ -835,6 +846,13 @@ static int optee_ffa_async_notif_init(struct ffa_device *ffa_dev, u32 notif_id = 0; int rc; + INIT_WORK(&optee->ffa.notif_work, notif_work_fn); + optee->ffa.notif_wq = create_workqueue("optee_notification"); + if (!optee->ffa.notif_wq) { + rc = -EINVAL; + goto err; + } + while (true) { rc = ffa_dev->ops->notifier_ops->notify_request(ffa_dev, is_per_vcpu, @@ -851,19 +869,24 @@ static int optee_ffa_async_notif_init(struct ffa_device *ffa_dev, * notifications in that case. */ if (rc != -EACCES) - return rc; + goto err_wq; notif_id++; if (notif_id >= OPTEE_FFA_MAX_ASYNC_NOTIF_VALUE) - return rc; + goto err_wq; } optee->ffa.bottom_half_value = notif_id; rc = enable_async_notif(optee); - if (rc < 0) { - ffa_dev->ops->notifier_ops->notify_relinquish(ffa_dev, - notif_id); - optee->ffa.bottom_half_value = U32_MAX; - } + if (rc < 0) + goto err_rel; + + return 0; +err_rel: + ffa_dev->ops->notifier_ops->notify_relinquish(ffa_dev, notif_id); +err_wq: + destroy_workqueue(optee->ffa.notif_wq); +err: + optee->ffa.bottom_half_value = U32_MAX; return rc; } diff --git a/drivers/tee/optee/optee_private.h b/drivers/tee/optee/optee_private.h index dc0f355ef72a..9526087f0e68 100644 --- a/drivers/tee/optee/optee_private.h +++ b/drivers/tee/optee/optee_private.h @@ -165,6 +165,8 @@ struct optee_ffa { /* Serializes access to @global_ids */ struct mutex mutex; struct rhashtable global_ids; + struct workqueue_struct *notif_wq; + struct work_struct notif_work; }; struct optee; diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c index bd02ee898f5d..500dfc009d03 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c @@ -235,6 +235,7 @@ struct imx_port { enum imx_tx_state tx_state; struct hrtimer trigger_start_tx; struct hrtimer trigger_stop_tx; + unsigned int rxtl; }; struct imx_port_ucrs { @@ -1339,6 +1340,7 @@ static void imx_uart_clear_rx_errors(struct imx_port *sport) #define TXTL_DEFAULT 8 #define RXTL_DEFAULT 8 /* 8 characters or aging timer */ +#define RXTL_CONSOLE_DEFAULT 1 #define TXTL_DMA 8 /* DMA burst setting */ #define RXTL_DMA 9 /* DMA burst setting */ @@ -1457,7 +1459,7 @@ static void imx_uart_disable_dma(struct imx_port *sport) ucr1 &= ~(UCR1_RXDMAEN | UCR1_TXDMAEN | UCR1_ATDMAEN); imx_uart_writel(sport, ucr1, UCR1); - imx_uart_setup_ufcr(sport, TXTL_DEFAULT, RXTL_DEFAULT); + imx_uart_setup_ufcr(sport, TXTL_DEFAULT, sport->rxtl); sport->dma_is_enabled = 0; } @@ -1482,7 +1484,12 @@ static int imx_uart_startup(struct uart_port *port) return retval; } - imx_uart_setup_ufcr(sport, TXTL_DEFAULT, RXTL_DEFAULT); + if (uart_console(&sport->port)) + sport->rxtl = RXTL_CONSOLE_DEFAULT; + else + sport->rxtl = RXTL_DEFAULT; + + imx_uart_setup_ufcr(sport, TXTL_DEFAULT, sport->rxtl); /* disable the DREN bit (Data Ready interrupt enable) before * requesting IRQs @@ -1948,7 +1955,7 @@ static int imx_uart_poll_init(struct uart_port *port) if (retval) clk_disable_unprepare(sport->clk_ipg); - imx_uart_setup_ufcr(sport, TXTL_DEFAULT, RXTL_DEFAULT); + imx_uart_setup_ufcr(sport, TXTL_DEFAULT, sport->rxtl); uart_port_lock_irqsave(&sport->port, &flags); @@ -2040,7 +2047,7 @@ static int imx_uart_rs485_config(struct uart_port *port, struct ktermios *termio /* If the receiver trigger is 0, set it to a default value */ ufcr = imx_uart_readl(sport, UFCR); if ((ufcr & UFCR_RXTL_MASK) == 0) - imx_uart_setup_ufcr(sport, TXTL_DEFAULT, RXTL_DEFAULT); + imx_uart_setup_ufcr(sport, TXTL_DEFAULT, sport->rxtl); imx_uart_start_rx(port); } @@ -2302,7 +2309,7 @@ imx_uart_console_setup(struct console *co, char *options) else imx_uart_console_get_options(sport, &baud, &parity, &bits); - imx_uart_setup_ufcr(sport, TXTL_DEFAULT, RXTL_DEFAULT); + imx_uart_setup_ufcr(sport, TXTL_DEFAULT, sport->rxtl); retval = uart_set_options(&sport->port, co, baud, parity, bits, flow); diff --git a/drivers/tty/serial/serial_base_bus.c b/drivers/tty/serial/serial_base_bus.c index 5d1677f1b651..cb3b127b06b6 100644 --- a/drivers/tty/serial/serial_base_bus.c +++ b/drivers/tty/serial/serial_base_bus.c @@ -72,6 +72,7 @@ static int serial_base_device_init(struct uart_port *port, dev->parent = parent_dev; dev->bus = &serial_base_bus_type; dev->release = release; + device_set_of_node_from_dev(dev, parent_dev); if (!serial_base_initialized) { dev_dbg(port->dev, "uart_add_one_port() called before arch_initcall()?\n"); diff --git a/drivers/tty/vt/ucs.c b/drivers/tty/vt/ucs.c index 6ead622b7713..03877485dfb7 100644 --- a/drivers/tty/vt/ucs.c +++ b/drivers/tty/vt/ucs.c @@ -206,7 +206,7 @@ static int ucs_page_entry_cmp(const void *key, const void *element) /** * ucs_get_fallback() - Get a substitution for the provided Unicode character - * @base: Base Unicode code point (UCS-4) + * @cp: Unicode code point (UCS-4) * * Get a simpler fallback character for the provided Unicode character. * This is used for terminal display when corresponding glyph is unavailable. diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index ed39d9cb4432..62049ceb34de 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -4650,6 +4650,7 @@ void do_unblank_screen(int leaving_gfx) set_palette(vc); set_cursor(vc); vt_event_post(VT_EVENT_UNBLANK, vc->vc_num, vc->vc_num); + notify_update(vc); } EXPORT_SYMBOL(do_unblank_screen); diff --git a/drivers/ufs/core/ufs-sysfs.c b/drivers/ufs/core/ufs-sysfs.c index de8b6acd4058..fcb4b14a710f 100644 --- a/drivers/ufs/core/ufs-sysfs.c +++ b/drivers/ufs/core/ufs-sysfs.c @@ -1808,7 +1808,7 @@ UFS_UNIT_DESC_PARAM(logical_block_size, _LOGICAL_BLK_SIZE, 1); UFS_UNIT_DESC_PARAM(logical_block_count, _LOGICAL_BLK_COUNT, 8); UFS_UNIT_DESC_PARAM(erase_block_size, _ERASE_BLK_SIZE, 4); UFS_UNIT_DESC_PARAM(provisioning_type, _PROVISIONING_TYPE, 1); -UFS_UNIT_DESC_PARAM(physical_memory_resourse_count, _PHY_MEM_RSRC_CNT, 8); +UFS_UNIT_DESC_PARAM(physical_memory_resource_count, _PHY_MEM_RSRC_CNT, 8); UFS_UNIT_DESC_PARAM(context_capabilities, _CTX_CAPABILITIES, 2); UFS_UNIT_DESC_PARAM(large_unit_granularity, _LARGE_UNIT_SIZE_M1, 1); UFS_UNIT_DESC_PARAM(wb_buf_alloc_units, _WB_BUF_ALLOC_UNITS, 4); @@ -1825,7 +1825,7 @@ static struct attribute *ufs_sysfs_unit_descriptor[] = { &dev_attr_logical_block_count.attr, &dev_attr_erase_block_size.attr, &dev_attr_provisioning_type.attr, - &dev_attr_physical_memory_resourse_count.attr, + &dev_attr_physical_memory_resource_count.attr, &dev_attr_context_capabilities.attr, &dev_attr_large_unit_granularity.attr, &dev_attr_wb_buf_alloc_units.attr, diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index f62d89c8e580..50adfb8b335b 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -7807,7 +7807,8 @@ static int ufshcd_host_reset_and_restore(struct ufs_hba *hba) hba->silence_err_logs = false; /* scale up clocks to max frequency before full reinitialization */ - ufshcd_scale_clks(hba, ULONG_MAX, true); + if (ufshcd_is_clkscaling_supported(hba)) + ufshcd_scale_clks(hba, ULONG_MAX, true); err = ufshcd_hba_enable(hba); diff --git a/drivers/usb/cdns3/cdnsp-debug.h b/drivers/usb/cdns3/cdnsp-debug.h index cd138acdcce1..86860686d836 100644 --- a/drivers/usb/cdns3/cdnsp-debug.h +++ b/drivers/usb/cdns3/cdnsp-debug.h @@ -327,12 +327,13 @@ static inline const char *cdnsp_decode_trb(char *str, size_t size, u32 field0, case TRB_RESET_EP: case TRB_HALT_ENDPOINT: ret = scnprintf(str, size, - "%s: ep%d%s(%d) ctx %08x%08x slot %ld flags %c", + "%s: ep%d%s(%d) ctx %08x%08x slot %ld flags %c %c", cdnsp_trb_type_string(type), ep_num, ep_id % 2 ? "out" : "in", TRB_TO_EP_INDEX(field3), field1, field0, TRB_TO_SLOT_ID(field3), - field3 & TRB_CYCLE ? 'C' : 'c'); + field3 & TRB_CYCLE ? 'C' : 'c', + field3 & TRB_ESP ? 'P' : 'p'); break; case TRB_STOP_RING: ret = scnprintf(str, size, diff --git a/drivers/usb/cdns3/cdnsp-ep0.c b/drivers/usb/cdns3/cdnsp-ep0.c index f317d3c84781..5cd9b898ce97 100644 --- a/drivers/usb/cdns3/cdnsp-ep0.c +++ b/drivers/usb/cdns3/cdnsp-ep0.c @@ -414,6 +414,7 @@ static int cdnsp_ep0_std_request(struct cdnsp_device *pdev, void cdnsp_setup_analyze(struct cdnsp_device *pdev) { struct usb_ctrlrequest *ctrl = &pdev->setup; + struct cdnsp_ep *pep; int ret = -EINVAL; u16 len; @@ -427,10 +428,21 @@ void cdnsp_setup_analyze(struct cdnsp_device *pdev) goto out; } + pep = &pdev->eps[0]; + /* Restore the ep0 to Stopped/Running state. */ - if (pdev->eps[0].ep_state & EP_HALTED) { - trace_cdnsp_ep0_halted("Restore to normal state"); - cdnsp_halt_endpoint(pdev, &pdev->eps[0], 0); + if (pep->ep_state & EP_HALTED) { + if (GET_EP_CTX_STATE(pep->out_ctx) == EP_STATE_HALTED) + cdnsp_halt_endpoint(pdev, pep, 0); + + /* + * Halt Endpoint Command for SSP2 for ep0 preserve current + * endpoint state and driver has to synchronize the + * software endpoint state with endpoint output context + * state. + */ + pep->ep_state &= ~EP_HALTED; + pep->ep_state |= EP_STOPPED; } /* diff --git a/drivers/usb/cdns3/cdnsp-gadget.h b/drivers/usb/cdns3/cdnsp-gadget.h index 2afa3e558f85..a91cca509db0 100644 --- a/drivers/usb/cdns3/cdnsp-gadget.h +++ b/drivers/usb/cdns3/cdnsp-gadget.h @@ -987,6 +987,12 @@ enum cdnsp_setup_dev { #define STREAM_ID_FOR_TRB(p) ((((p)) << 16) & GENMASK(31, 16)) #define SCT_FOR_TRB(p) (((p) << 1) & 0x7) +/* + * Halt Endpoint Command TRB field. + * The ESP bit only exists in the SSP2 controller. + */ +#define TRB_ESP BIT(9) + /* Link TRB specific fields. */ #define TRB_TC BIT(1) diff --git a/drivers/usb/cdns3/cdnsp-ring.c b/drivers/usb/cdns3/cdnsp-ring.c index fd06cb85c4ea..0758f171f73e 100644 --- a/drivers/usb/cdns3/cdnsp-ring.c +++ b/drivers/usb/cdns3/cdnsp-ring.c @@ -772,7 +772,9 @@ static int cdnsp_update_port_id(struct cdnsp_device *pdev, u32 port_id) } if (port_id != old_port) { - cdnsp_disable_slot(pdev); + if (pdev->slot_id) + cdnsp_disable_slot(pdev); + pdev->active_port = port; cdnsp_enable_slot(pdev); } @@ -2483,7 +2485,8 @@ void cdnsp_queue_halt_endpoint(struct cdnsp_device *pdev, unsigned int ep_index) { cdnsp_queue_command(pdev, 0, 0, 0, TRB_TYPE(TRB_HALT_ENDPOINT) | SLOT_ID_FOR_TRB(pdev->slot_id) | - EP_ID_FOR_TRB(ep_index)); + EP_ID_FOR_TRB(ep_index) | + (!ep_index ? TRB_ESP : 0)); } void cdnsp_force_header_wakeup(struct cdnsp_device *pdev, int intf_num) diff --git a/drivers/usb/chipidea/udc.c b/drivers/usb/chipidea/udc.c index 8a9b31fd5c89..1a48e6440e6c 100644 --- a/drivers/usb/chipidea/udc.c +++ b/drivers/usb/chipidea/udc.c @@ -2374,6 +2374,10 @@ static void udc_suspend(struct ci_hdrc *ci) */ if (hw_read(ci, OP_ENDPTLISTADDR, ~0) == 0) hw_write(ci, OP_ENDPTLISTADDR, ~0, ~0); + + if (ci->gadget.connected && + (!ci->suspended || !device_may_wakeup(ci->dev))) + usb_gadget_disconnect(&ci->gadget); } static void udc_resume(struct ci_hdrc *ci, bool power_lost) @@ -2384,6 +2388,9 @@ static void udc_resume(struct ci_hdrc *ci, bool power_lost) OTGSC_BSVIS | OTGSC_BSVIE); if (ci->vbus_active) usb_gadget_vbus_disconnect(&ci->gadget); + } else if (ci->vbus_active && ci->driver && + !ci->gadget.connected) { + usb_gadget_connect(&ci->gadget); } /* Restore value 0 if it was set for power lost check */ diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 770d1e91183c..3e1215f7a9a0 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -68,6 +68,12 @@ */ #define USB_SHORT_SET_ADDRESS_REQ_TIMEOUT 500 /* ms */ +/* + * Give SS hubs 200ms time after wake to train downstream links before + * assuming no port activity and allowing hub to runtime suspend back. + */ +#define USB_SS_PORT_U0_WAKE_TIME 200 /* ms */ + /* Protect struct usb_device->state and ->children members * Note: Both are also protected by ->dev.sem, except that ->state can * change to USB_STATE_NOTATTACHED even when the semaphore isn't held. */ @@ -1095,6 +1101,7 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) goto init2; goto init3; } + hub_get(hub); /* The superspeed hub except for root hub has to use Hub Depth @@ -1343,6 +1350,17 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) device_unlock(&hdev->dev); } + if (type == HUB_RESUME && hub_is_superspeed(hub->hdev)) { + /* give usb3 downstream links training time after hub resume */ + usb_autopm_get_interface_no_resume( + to_usb_interface(hub->intfdev)); + + queue_delayed_work(system_power_efficient_wq, + &hub->post_resume_work, + msecs_to_jiffies(USB_SS_PORT_U0_WAKE_TIME)); + return; + } + hub_put(hub); } @@ -1361,6 +1379,14 @@ static void hub_init_func3(struct work_struct *ws) hub_activate(hub, HUB_INIT3); } +static void hub_post_resume(struct work_struct *ws) +{ + struct usb_hub *hub = container_of(ws, struct usb_hub, post_resume_work.work); + + usb_autopm_put_interface_async(to_usb_interface(hub->intfdev)); + hub_put(hub); +} + enum hub_quiescing_type { HUB_DISCONNECT, HUB_PRE_RESET, HUB_SUSPEND }; @@ -1386,6 +1412,7 @@ static void hub_quiesce(struct usb_hub *hub, enum hub_quiescing_type type) /* Stop hub_wq and related activity */ timer_delete_sync(&hub->irq_urb_retry); + flush_delayed_work(&hub->post_resume_work); usb_kill_urb(hub->urb); if (hub->has_indicators) cancel_delayed_work_sync(&hub->leds); @@ -1944,6 +1971,7 @@ static int hub_probe(struct usb_interface *intf, const struct usb_device_id *id) hub->hdev = hdev; INIT_DELAYED_WORK(&hub->leds, led_work); INIT_DELAYED_WORK(&hub->init_work, NULL); + INIT_DELAYED_WORK(&hub->post_resume_work, hub_post_resume); INIT_WORK(&hub->events, hub_event); INIT_LIST_HEAD(&hub->onboard_devs); spin_lock_init(&hub->irq_urb_lock); @@ -2337,6 +2365,9 @@ void usb_disconnect(struct usb_device **pdev) usb_remove_ep_devs(&udev->ep0); usb_unlock_device(udev); + if (udev->usb4_link) + device_link_del(udev->usb4_link); + /* Unregister the device. The device driver is responsible * for de-configuring the device and invoking the remove-device * notifier chain (used by usbfs and possibly others). diff --git a/drivers/usb/core/hub.h b/drivers/usb/core/hub.h index e6ae73f8a95d..9ebc5ef54a32 100644 --- a/drivers/usb/core/hub.h +++ b/drivers/usb/core/hub.h @@ -70,6 +70,7 @@ struct usb_hub { u8 indicator[USB_MAXCHILDREN]; struct delayed_work leds; struct delayed_work init_work; + struct delayed_work post_resume_work; struct work_struct events; spinlock_t irq_urb_lock; struct timer_list irq_urb_retry; diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 53d68d20fb62..0cf94c7a2c9c 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -227,7 +227,8 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x046a, 0x0023), .driver_info = USB_QUIRK_RESET_RESUME }, /* Logitech HD Webcam C270 */ - { USB_DEVICE(0x046d, 0x0825), .driver_info = USB_QUIRK_RESET_RESUME }, + { USB_DEVICE(0x046d, 0x0825), .driver_info = USB_QUIRK_RESET_RESUME | + USB_QUIRK_NO_LPM}, /* Logitech HD Pro Webcams C920, C920-C, C922, C925e and C930e */ { USB_DEVICE(0x046d, 0x082d), .driver_info = USB_QUIRK_DELAY_INIT }, diff --git a/drivers/usb/core/usb-acpi.c b/drivers/usb/core/usb-acpi.c index ea1ce8beb0cb..489dbdc96f94 100644 --- a/drivers/usb/core/usb-acpi.c +++ b/drivers/usb/core/usb-acpi.c @@ -157,7 +157,7 @@ EXPORT_SYMBOL_GPL(usb_acpi_set_power_state); */ static int usb_acpi_add_usb4_devlink(struct usb_device *udev) { - const struct device_link *link; + struct device_link *link; struct usb_port *port_dev; struct usb_hub *hub; @@ -188,6 +188,8 @@ static int usb_acpi_add_usb4_devlink(struct usb_device *udev) dev_dbg(&port_dev->dev, "Created device link from %s to %s\n", dev_name(&port_dev->child->dev), dev_name(nhi_fwnode->dev)); + udev->usb4_link = link; + return 0; } diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 2bc775a747f2..8002c23a5a02 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -2422,6 +2422,7 @@ static int dwc3_suspend_common(struct dwc3 *dwc, pm_message_t msg) { u32 reg; int i; + int ret; if (!pm_runtime_suspended(dwc->dev) && !PMSG_IS_AUTO(msg)) { dwc->susphy_state = (dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0)) & @@ -2440,7 +2441,9 @@ static int dwc3_suspend_common(struct dwc3 *dwc, pm_message_t msg) case DWC3_GCTL_PRTCAP_DEVICE: if (pm_runtime_suspended(dwc->dev)) break; - dwc3_gadget_suspend(dwc); + ret = dwc3_gadget_suspend(dwc); + if (ret) + return ret; synchronize_irq(dwc->irq_gadget); dwc3_core_exit(dwc); break; @@ -2475,7 +2478,9 @@ static int dwc3_suspend_common(struct dwc3 *dwc, pm_message_t msg) break; if (dwc->current_otg_role == DWC3_OTG_ROLE_DEVICE) { - dwc3_gadget_suspend(dwc); + ret = dwc3_gadget_suspend(dwc); + if (ret) + return ret; synchronize_irq(dwc->irq_gadget); } diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 321361288935..74968f93d4a3 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -3516,7 +3516,7 @@ static int dwc3_gadget_ep_reclaim_completed_trb(struct dwc3_ep *dep, * We're going to do that here to avoid problems of HW trying * to use bogus TRBs for transfers. */ - if (chain && (trb->ctrl & DWC3_TRB_CTRL_HWO)) + if (trb->ctrl & DWC3_TRB_CTRL_HWO) trb->ctrl &= ~DWC3_TRB_CTRL_HWO; /* @@ -4821,26 +4821,22 @@ int dwc3_gadget_suspend(struct dwc3 *dwc) int ret; ret = dwc3_gadget_soft_disconnect(dwc); - if (ret) - goto err; - - spin_lock_irqsave(&dwc->lock, flags); - if (dwc->gadget_driver) - dwc3_disconnect_gadget(dwc); - spin_unlock_irqrestore(&dwc->lock, flags); - - return 0; - -err: /* * Attempt to reset the controller's state. Likely no * communication can be established until the host * performs a port reset. */ - if (dwc->softconnect) + if (ret && dwc->softconnect) { dwc3_gadget_soft_connect(dwc); + return -EAGAIN; + } - return ret; + spin_lock_irqsave(&dwc->lock, flags); + if (dwc->gadget_driver) + dwc3_disconnect_gadget(dwc); + spin_unlock_irqrestore(&dwc->lock, flags); + + return 0; } int dwc3_gadget_resume(struct dwc3 *dwc) diff --git a/drivers/usb/gadget/function/u_serial.c b/drivers/usb/gadget/function/u_serial.c index ab544f6824be..540dc5ab96fc 100644 --- a/drivers/usb/gadget/function/u_serial.c +++ b/drivers/usb/gadget/function/u_serial.c @@ -295,8 +295,8 @@ __acquires(&port->port_lock) break; } - if (do_tty_wake && port->port.tty) - tty_wakeup(port->port.tty); + if (do_tty_wake) + tty_port_tty_wakeup(&port->port); return status; } @@ -544,20 +544,16 @@ static int gs_alloc_requests(struct usb_ep *ep, struct list_head *head, static int gs_start_io(struct gs_port *port) { struct list_head *head = &port->read_pool; - struct usb_ep *ep; + struct usb_ep *ep = port->port_usb->out; int status; unsigned started; - if (!port->port_usb || !port->port.tty) - return -EIO; - /* Allocate RX and TX I/O buffers. We can't easily do this much * earlier (with GFP_KERNEL) because the requests are coupled to * endpoints, as are the packet sizes we'll be using. Different * configurations may use different endpoints with a given port; * and high speed vs full speed changes packet sizes too. */ - ep = port->port_usb->out; status = gs_alloc_requests(ep, head, gs_read_complete, &port->read_allocated); if (status) @@ -578,7 +574,7 @@ static int gs_start_io(struct gs_port *port) gs_start_tx(port); /* Unblock any pending writes into our circular buffer, in case * we didn't in gs_start_tx() */ - tty_wakeup(port->port.tty); + tty_port_tty_wakeup(&port->port); } else { /* Free reqs only if we are still connected */ if (port->port_usb) { diff --git a/drivers/usb/host/xhci-dbgcap.c b/drivers/usb/host/xhci-dbgcap.c index 0d4ce5734165..06a2edb9e86e 100644 --- a/drivers/usb/host/xhci-dbgcap.c +++ b/drivers/usb/host/xhci-dbgcap.c @@ -652,6 +652,10 @@ static void xhci_dbc_stop(struct xhci_dbc *dbc) case DS_DISABLED: return; case DS_CONFIGURED: + spin_lock(&dbc->lock); + xhci_dbc_flush_requests(dbc); + spin_unlock(&dbc->lock); + if (dbc->driver->disconnect) dbc->driver->disconnect(dbc); break; diff --git a/drivers/usb/host/xhci-dbgtty.c b/drivers/usb/host/xhci-dbgtty.c index 60ed753c85bb..d894081d8d15 100644 --- a/drivers/usb/host/xhci-dbgtty.c +++ b/drivers/usb/host/xhci-dbgtty.c @@ -617,6 +617,7 @@ int dbc_tty_init(void) dbc_tty_driver->type = TTY_DRIVER_TYPE_SERIAL; dbc_tty_driver->subtype = SERIAL_TYPE_NORMAL; dbc_tty_driver->init_termios = tty_std_termios; + dbc_tty_driver->init_termios.c_lflag &= ~ECHO; dbc_tty_driver->init_termios.c_cflag = B9600 | CS8 | CREAD | HUPCL | CLOCAL; dbc_tty_driver->init_termios.c_ispeed = 9600; diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index bd745a0f2f78..6680afa4f596 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -1449,6 +1449,10 @@ int xhci_endpoint_init(struct xhci_hcd *xhci, /* Periodic endpoint bInterval limit quirk */ if (usb_endpoint_xfer_int(&ep->desc) || usb_endpoint_xfer_isoc(&ep->desc)) { + if ((xhci->quirks & XHCI_LIMIT_ENDPOINT_INTERVAL_9) && + interval >= 9) { + interval = 8; + } if ((xhci->quirks & XHCI_LIMIT_ENDPOINT_INTERVAL_7) && udev->speed >= USB_SPEED_HIGH && interval >= 7) { diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 0c481cbc8f08..00fac8b233d2 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -71,12 +71,22 @@ #define PCI_DEVICE_ID_INTEL_TITAN_RIDGE_4C_XHCI 0x15ec #define PCI_DEVICE_ID_INTEL_TITAN_RIDGE_DD_XHCI 0x15f0 +#define PCI_DEVICE_ID_AMD_ARIEL_TYPEC_XHCI 0x13ed +#define PCI_DEVICE_ID_AMD_ARIEL_TYPEA_XHCI 0x13ee +#define PCI_DEVICE_ID_AMD_STARSHIP_XHCI 0x148c +#define PCI_DEVICE_ID_AMD_FIREFLIGHT_15D4_XHCI 0x15d4 +#define PCI_DEVICE_ID_AMD_FIREFLIGHT_15D5_XHCI 0x15d5 +#define PCI_DEVICE_ID_AMD_RAVEN_15E0_XHCI 0x15e0 +#define PCI_DEVICE_ID_AMD_RAVEN_15E1_XHCI 0x15e1 +#define PCI_DEVICE_ID_AMD_RAVEN2_XHCI 0x15e5 #define PCI_DEVICE_ID_AMD_RENOIR_XHCI 0x1639 #define PCI_DEVICE_ID_AMD_PROMONTORYA_4 0x43b9 #define PCI_DEVICE_ID_AMD_PROMONTORYA_3 0x43ba #define PCI_DEVICE_ID_AMD_PROMONTORYA_2 0x43bb #define PCI_DEVICE_ID_AMD_PROMONTORYA_1 0x43bc +#define PCI_DEVICE_ID_ATI_NAVI10_7316_XHCI 0x7316 + #define PCI_DEVICE_ID_ASMEDIA_1042_XHCI 0x1042 #define PCI_DEVICE_ID_ASMEDIA_1042A_XHCI 0x1142 #define PCI_DEVICE_ID_ASMEDIA_1142_XHCI 0x1242 @@ -280,6 +290,21 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) if (pdev->vendor == PCI_VENDOR_ID_NEC) xhci->quirks |= XHCI_NEC_HOST; + if (pdev->vendor == PCI_VENDOR_ID_AMD && + (pdev->device == PCI_DEVICE_ID_AMD_ARIEL_TYPEC_XHCI || + pdev->device == PCI_DEVICE_ID_AMD_ARIEL_TYPEA_XHCI || + pdev->device == PCI_DEVICE_ID_AMD_STARSHIP_XHCI || + pdev->device == PCI_DEVICE_ID_AMD_FIREFLIGHT_15D4_XHCI || + pdev->device == PCI_DEVICE_ID_AMD_FIREFLIGHT_15D5_XHCI || + pdev->device == PCI_DEVICE_ID_AMD_RAVEN_15E0_XHCI || + pdev->device == PCI_DEVICE_ID_AMD_RAVEN_15E1_XHCI || + pdev->device == PCI_DEVICE_ID_AMD_RAVEN2_XHCI)) + xhci->quirks |= XHCI_LIMIT_ENDPOINT_INTERVAL_9; + + if (pdev->vendor == PCI_VENDOR_ID_ATI && + pdev->device == PCI_DEVICE_ID_ATI_NAVI10_7316_XHCI) + xhci->quirks |= XHCI_LIMIT_ENDPOINT_INTERVAL_9; + if (pdev->vendor == PCI_VENDOR_ID_AMD && xhci->hci_version == 0x96) xhci->quirks |= XHCI_AMD_0x96_HOST; diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c index 6dab142e7278..c79d5ed48a08 100644 --- a/drivers/usb/host/xhci-plat.c +++ b/drivers/usb/host/xhci-plat.c @@ -328,7 +328,8 @@ int xhci_plat_probe(struct platform_device *pdev, struct device *sysdev, const s } usb3_hcd = xhci_get_usb3_hcd(xhci); - if (usb3_hcd && HCC_MAX_PSA(xhci->hcc_params) >= 4) + if (usb3_hcd && HCC_MAX_PSA(xhci->hcc_params) >= 4 && + !(xhci->quirks & XHCI_BROKEN_STREAMS)) usb3_hcd->can_do_streams = 1; if (xhci->shared_hcd) { diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index e038ad3375dc..94c9c9271658 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -518,9 +518,8 @@ static int xhci_abort_cmd_ring(struct xhci_hcd *xhci, unsigned long flags) * In the future we should distinguish between -ENODEV and -ETIMEDOUT * and try to recover a -ETIMEDOUT with a host controller reset. */ - ret = xhci_handshake_check_state(xhci, &xhci->op_regs->cmd_ring, - CMD_RING_RUNNING, 0, 5 * 1000 * 1000, - XHCI_STATE_REMOVING); + ret = xhci_handshake(&xhci->op_regs->cmd_ring, + CMD_RING_RUNNING, 0, 5 * 1000 * 1000); if (ret < 0) { xhci_err(xhci, "Abort failed to stop command ring: %d\n", ret); xhci_halt(xhci); diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 4e6dbd2375c3..8a819e853288 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -85,29 +85,6 @@ int xhci_handshake(void __iomem *ptr, u32 mask, u32 done, u64 timeout_us) } /* - * xhci_handshake_check_state - same as xhci_handshake but takes an additional - * exit_state parameter, and bails out with an error immediately when xhc_state - * has exit_state flag set. - */ -int xhci_handshake_check_state(struct xhci_hcd *xhci, void __iomem *ptr, - u32 mask, u32 done, int usec, unsigned int exit_state) -{ - u32 result; - int ret; - - ret = readl_poll_timeout_atomic(ptr, result, - (result & mask) == done || - result == U32_MAX || - xhci->xhc_state & exit_state, - 1, usec); - - if (result == U32_MAX || xhci->xhc_state & exit_state) - return -ENODEV; - - return ret; -} - -/* * Disable interrupts and begin the xHCI halting process. */ void xhci_quiesce(struct xhci_hcd *xhci) @@ -227,8 +204,7 @@ int xhci_reset(struct xhci_hcd *xhci, u64 timeout_us) if (xhci->quirks & XHCI_INTEL_HOST) udelay(1000); - ret = xhci_handshake_check_state(xhci, &xhci->op_regs->command, - CMD_RESET, 0, timeout_us, XHCI_STATE_REMOVING); + ret = xhci_handshake(&xhci->op_regs->command, CMD_RESET, 0, timeout_us); if (ret) return ret; @@ -1182,7 +1158,10 @@ int xhci_resume(struct xhci_hcd *xhci, bool power_lost, bool is_auto_resume) xhci_dbg(xhci, "Stop HCD\n"); xhci_halt(xhci); xhci_zero_64b_regs(xhci); - retval = xhci_reset(xhci, XHCI_RESET_LONG_USEC); + if (xhci->xhc_state & XHCI_STATE_REMOVING) + retval = -ENODEV; + else + retval = xhci_reset(xhci, XHCI_RESET_LONG_USEC); spin_unlock_irq(&xhci->lock); if (retval) return retval; diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 49887a303e43..a20f4e7cd43a 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1643,6 +1643,7 @@ struct xhci_hcd { #define XHCI_WRITE_64_HI_LO BIT_ULL(47) #define XHCI_CDNS_SCTX_QUIRK BIT_ULL(48) #define XHCI_ETRON_HOST BIT_ULL(49) +#define XHCI_LIMIT_ENDPOINT_INTERVAL_9 BIT_ULL(50) unsigned int num_active_eps; unsigned int limit_active_eps; @@ -1868,8 +1869,6 @@ void xhci_skip_sec_intr_events(struct xhci_hcd *xhci, /* xHCI host controller glue */ typedef void (*xhci_get_quirks_t)(struct device *, struct xhci_hcd *); int xhci_handshake(void __iomem *ptr, u32 mask, u32 done, u64 timeout_us); -int xhci_handshake_check_state(struct xhci_hcd *xhci, void __iomem *ptr, - u32 mask, u32 done, int usec, unsigned int exit_state); void xhci_quiesce(struct xhci_hcd *xhci); int xhci_halt(struct xhci_hcd *xhci); int xhci_start(struct xhci_hcd *xhci); diff --git a/drivers/usb/typec/altmodes/displayport.c b/drivers/usb/typec/altmodes/displayport.c index b09b58d7311d..d8b906ec4d1c 100644 --- a/drivers/usb/typec/altmodes/displayport.c +++ b/drivers/usb/typec/altmodes/displayport.c @@ -394,8 +394,7 @@ static int dp_altmode_vdm(struct typec_altmode *alt, case CMDT_RSP_NAK: switch (cmd) { case DP_CMD_STATUS_UPDATE: - if (typec_altmode_exit(alt)) - dev_err(&dp->alt->dev, "Exit Mode Failed!\n"); + dp->state = DP_STATE_EXIT; break; case DP_CMD_CONFIGURE: dp->data.conf = 0; @@ -677,7 +676,7 @@ static ssize_t pin_assignment_show(struct device *dev, assignments = get_current_pin_assignments(dp); - for (i = 0; assignments; assignments >>= 1, i++) { + for (i = 0; assignments && i < DP_PIN_ASSIGN_MAX; assignments >>= 1, i++) { if (assignments & 1) { if (i == cur) len += sprintf(buf + len, "[%s] ", diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 1a1f9e1f8e4e..1f6fdfaa34bf 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -4410,17 +4410,6 @@ static int tcpm_src_attach(struct tcpm_port *port) tcpm_enable_auto_vbus_discharge(port, true); - ret = tcpm_set_roles(port, true, TYPEC_STATE_USB, - TYPEC_SOURCE, tcpm_data_role_for_source(port)); - if (ret < 0) - return ret; - - if (port->pd_supported) { - ret = port->tcpc->set_pd_rx(port->tcpc, true); - if (ret < 0) - goto out_disable_mux; - } - /* * USB Type-C specification, version 1.2, * chapter 4.5.2.2.8.1 (Attached.SRC Requirements) @@ -4430,13 +4419,24 @@ static int tcpm_src_attach(struct tcpm_port *port) (polarity == TYPEC_POLARITY_CC2 && port->cc1 == TYPEC_CC_RA)) { ret = tcpm_set_vconn(port, true); if (ret < 0) - goto out_disable_pd; + return ret; } ret = tcpm_set_vbus(port, true); if (ret < 0) goto out_disable_vconn; + ret = tcpm_set_roles(port, true, TYPEC_STATE_USB, TYPEC_SOURCE, + tcpm_data_role_for_source(port)); + if (ret < 0) + goto out_disable_vbus; + + if (port->pd_supported) { + ret = port->tcpc->set_pd_rx(port->tcpc, true); + if (ret < 0) + goto out_disable_mux; + } + port->pd_capable = false; port->partner = NULL; @@ -4447,14 +4447,14 @@ static int tcpm_src_attach(struct tcpm_port *port) return 0; -out_disable_vconn: - tcpm_set_vconn(port, false); -out_disable_pd: - if (port->pd_supported) - port->tcpc->set_pd_rx(port->tcpc, false); out_disable_mux: tcpm_mux_set(port, TYPEC_STATE_SAFE, USB_ROLE_NONE, TYPEC_ORIENTATION_NONE); +out_disable_vbus: + tcpm_set_vbus(port, false); +out_disable_vconn: + tcpm_set_vconn(port, false); + return ret; } diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index b784aab66867..4397392bfef0 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -2797,7 +2797,7 @@ int virtqueue_resize(struct virtqueue *_vq, u32 num, void (*recycle_done)(struct virtqueue *vq)) { struct vring_virtqueue *vq = to_vvq(_vq); - int err; + int err, err_reset; if (num > vq->vq.num_max) return -E2BIG; @@ -2819,7 +2819,11 @@ int virtqueue_resize(struct virtqueue *_vq, u32 num, else err = virtqueue_resize_split(_vq, num); - return virtqueue_enable_after_reset(_vq); + err_reset = virtqueue_enable_after_reset(_vq); + if (err_reset) + return err_reset; + + return err; } EXPORT_SYMBOL_GPL(virtqueue_resize); diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index e51e7d88980a..1d847a939f29 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -98,14 +98,25 @@ static struct file_system_type anon_inode_fs_type = { .kill_sb = kill_anon_super, }; -static struct inode *anon_inode_make_secure_inode( - const char *name, - const struct inode *context_inode) +/** + * anon_inode_make_secure_inode - allocate an anonymous inode with security context + * @sb: [in] Superblock to allocate from + * @name: [in] Name of the class of the newfile (e.g., "secretmem") + * @context_inode: + * [in] Optional parent inode for security inheritance + * + * The function ensures proper security initialization through the LSM hook + * security_inode_init_security_anon(). + * + * Return: Pointer to new inode on success, ERR_PTR on failure. + */ +struct inode *anon_inode_make_secure_inode(struct super_block *sb, const char *name, + const struct inode *context_inode) { struct inode *inode; int error; - inode = alloc_anon_inode(anon_inode_mnt->mnt_sb); + inode = alloc_anon_inode(sb); if (IS_ERR(inode)) return inode; inode->i_flags &= ~S_PRIVATE; @@ -118,6 +129,7 @@ static struct inode *anon_inode_make_secure_inode( } return inode; } +EXPORT_SYMBOL_GPL_FOR_MODULES(anon_inode_make_secure_inode, "kvm"); static struct file *__anon_inode_getfile(const char *name, const struct file_operations *fops, @@ -132,7 +144,8 @@ static struct file *__anon_inode_getfile(const char *name, return ERR_PTR(-ENOENT); if (make_inode) { - inode = anon_inode_make_secure_inode(name, context_inode); + inode = anon_inode_make_secure_inode(anon_inode_mnt->mnt_sb, + name, context_inode); if (IS_ERR(inode)) { file = ERR_CAST(inode); goto err; diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index b228a5a64479..66de46318620 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -1406,6 +1406,9 @@ int bch2_check_discard_freespace_key(struct btree_trans *trans, struct btree_ite : BCH_DATA_free; struct printbuf buf = PRINTBUF; + unsigned fsck_flags = (async_repair ? FSCK_ERR_NO_LOG : 0)| + FSCK_CAN_FIX|FSCK_CAN_IGNORE; + struct bpos bucket = iter->pos; bucket.offset &= ~(~0ULL << 56); u64 genbits = iter->pos.offset & (~0ULL << 56); @@ -1419,9 +1422,10 @@ int bch2_check_discard_freespace_key(struct btree_trans *trans, struct btree_ite return ret; if (!bch2_dev_bucket_exists(c, bucket)) { - if (fsck_err(trans, need_discard_freespace_key_to_invalid_dev_bucket, - "entry in %s btree for nonexistant dev:bucket %llu:%llu", - bch2_btree_id_str(iter->btree_id), bucket.inode, bucket.offset)) + if (__fsck_err(trans, fsck_flags, + need_discard_freespace_key_to_invalid_dev_bucket, + "entry in %s btree for nonexistant dev:bucket %llu:%llu", + bch2_btree_id_str(iter->btree_id), bucket.inode, bucket.offset)) goto delete; ret = 1; goto out; @@ -1433,7 +1437,8 @@ int bch2_check_discard_freespace_key(struct btree_trans *trans, struct btree_ite if (a->data_type != state || (state == BCH_DATA_free && genbits != alloc_freespace_genbits(*a))) { - if (fsck_err(trans, need_discard_freespace_key_bad, + if (__fsck_err(trans, fsck_flags, + need_discard_freespace_key_bad, "%s\nincorrectly set at %s:%llu:%llu:0 (free %u, genbits %llu should be %llu)", (bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf), bch2_btree_id_str(iter->btree_id), diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index e76809e71858..77d93beb3c8f 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -353,7 +353,7 @@ static struct bkey_s_c __bch2_backpointer_get_key(struct btree_trans *trans, return ret ? bkey_s_c_err(ret) : bkey_s_c_null; } else { struct btree *b = __bch2_backpointer_get_node(trans, bp, iter, last_flushed, commit); - if (b == ERR_PTR(bch_err_throw(c, backpointer_to_overwritten_btree_node))) + if (b == ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node)) return bkey_s_c_null; if (IS_ERR_OR_NULL(b)) return ((struct bkey_s_c) { .k = ERR_CAST(b) }); diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 5a1cede2febf..ddfacad0f70c 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -767,7 +767,8 @@ struct btree_trans_buf { x(sysfs) \ x(btree_write_buffer) \ x(btree_node_scrub) \ - x(async_recovery_passes) + x(async_recovery_passes) \ + x(ioctl_data) enum bch_write_ref { #define x(n) BCH_WRITE_REF_##n, @@ -862,9 +863,7 @@ struct bch_fs { DARRAY(enum bcachefs_metadata_version) incompat_versions_requested; -#ifdef CONFIG_UNICODE struct unicode_map *cf_encoding; -#endif struct bch_sb_handle disk_sb; @@ -1284,4 +1283,13 @@ static inline bool bch2_discard_opt_enabled(struct bch_fs *c, struct bch_dev *ca : ca->mi.discard; } +static inline bool bch2_fs_casefold_enabled(struct bch_fs *c) +{ +#ifdef CONFIG_UNICODE + return !c->opts.casefold_disabled; +#else + return false; +#endif +} + #endif /* _BCACHEFS_H */ diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index 91e0aa796e6b..83c9860e6b82 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -85,7 +85,7 @@ void bch2_btree_node_to_freelist(struct bch_fs *c, struct btree *b) six_unlock_intent(&b->c.lock); } -static void __btree_node_data_free(struct btree_cache *bc, struct btree *b) +void __btree_node_data_free(struct btree *b) { BUG_ON(!list_empty(&b->list)); BUG_ON(btree_node_hashed(b)); @@ -112,16 +112,17 @@ static void __btree_node_data_free(struct btree_cache *bc, struct btree *b) munmap(b->aux_data, btree_aux_data_bytes(b)); #endif b->aux_data = NULL; - - btree_node_to_freedlist(bc, b); } static void btree_node_data_free(struct btree_cache *bc, struct btree *b) { BUG_ON(list_empty(&b->list)); list_del_init(&b->list); + + __btree_node_data_free(b); + --bc->nr_freeable; - __btree_node_data_free(bc, b); + btree_node_to_freedlist(bc, b); } static int bch2_btree_cache_cmp_fn(struct rhashtable_compare_arg *arg, @@ -185,10 +186,7 @@ static struct btree *__btree_node_mem_alloc(struct bch_fs *c, gfp_t gfp) struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *c) { - struct btree_cache *bc = &c->btree_cache; - struct btree *b; - - b = __btree_node_mem_alloc(c, GFP_KERNEL); + struct btree *b = __btree_node_mem_alloc(c, GFP_KERNEL); if (!b) return NULL; @@ -198,8 +196,6 @@ struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *c) } bch2_btree_lock_init(&b->c, 0, GFP_KERNEL); - - __bch2_btree_node_to_freelist(bc, b); return b; } @@ -524,7 +520,8 @@ restart: --touched;; } else if (!btree_node_reclaim(c, b)) { __bch2_btree_node_hash_remove(bc, b); - __btree_node_data_free(bc, b); + __btree_node_data_free(b); + btree_node_to_freedlist(bc, b); freed++; bc->nr_freed++; @@ -652,9 +649,12 @@ int bch2_fs_btree_cache_init(struct bch_fs *c) bch2_recalc_btree_reserve(c); - for (i = 0; i < bc->nr_reserve; i++) - if (!__bch2_btree_node_mem_alloc(c)) + for (i = 0; i < bc->nr_reserve; i++) { + struct btree *b = __bch2_btree_node_mem_alloc(c); + if (!b) goto err; + __bch2_btree_node_to_freelist(bc, b); + } list_splice_init(&bc->live[0].list, &bc->freeable); diff --git a/fs/bcachefs/btree_cache.h b/fs/bcachefs/btree_cache.h index ca3c1b145330..be275f87a60e 100644 --- a/fs/bcachefs/btree_cache.h +++ b/fs/bcachefs/btree_cache.h @@ -30,6 +30,7 @@ void bch2_btree_node_update_key_early(struct btree_trans *, enum btree_id, unsig void bch2_btree_cache_cannibalize_unlock(struct btree_trans *); int bch2_btree_cache_cannibalize_lock(struct btree_trans *, struct closure *); +void __btree_node_data_free(struct btree *); struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *); struct btree *bch2_btree_node_mem_alloc(struct btree_trans *, bool); diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index e92cf3928c63..bac108e93823 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -503,8 +503,14 @@ again: prt_newline(&buf); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); + /* + * XXX: we're not passing the trans object here because we're not set up + * to handle a transaction restart - this code needs to be rewritten + * when we start doing online topology repair + */ + bch2_trans_unlock_long(trans); if (mustfix_fsck_err_on(!have_child, - trans, btree_node_topology_interior_node_empty, + c, btree_node_topology_interior_node_empty, "empty interior btree node at %s", buf.buf)) ret = DROP_THIS_NODE; err: @@ -528,32 +534,39 @@ fsck_err: return ret; } -static int bch2_check_root(struct btree_trans *trans, enum btree_id i, +static int bch2_check_root(struct btree_trans *trans, enum btree_id btree, bool *reconstructed_root) { struct bch_fs *c = trans->c; - struct btree_root *r = bch2_btree_id_root(c, i); + struct btree_root *r = bch2_btree_id_root(c, btree); struct printbuf buf = PRINTBUF; int ret = 0; - bch2_btree_id_to_text(&buf, i); + bch2_btree_id_to_text(&buf, btree); if (r->error) { bch_info(c, "btree root %s unreadable, must recover from scan", buf.buf); - r->alive = false; - r->error = 0; + ret = bch2_btree_has_scanned_nodes(c, btree); + if (ret < 0) + goto err; - if (!bch2_btree_has_scanned_nodes(c, i)) { + if (!ret) { __fsck_err(trans, - FSCK_CAN_FIX|(!btree_id_important(i) ? FSCK_AUTOFIX : 0), + FSCK_CAN_FIX|(!btree_id_important(btree) ? FSCK_AUTOFIX : 0), btree_root_unreadable_and_scan_found_nothing, "no nodes found for btree %s, continue?", buf.buf); - bch2_btree_root_alloc_fake_trans(trans, i, 0); + + r->alive = false; + r->error = 0; + bch2_btree_root_alloc_fake_trans(trans, btree, 0); } else { - bch2_btree_root_alloc_fake_trans(trans, i, 1); - bch2_shoot_down_journal_keys(c, i, 1, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); - ret = bch2_get_scanned_nodes(c, i, 0, POS_MIN, SPOS_MAX); + r->alive = false; + r->error = 0; + bch2_btree_root_alloc_fake_trans(trans, btree, 1); + + bch2_shoot_down_journal_keys(c, btree, 1, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); + ret = bch2_get_scanned_nodes(c, btree, 0, POS_MIN, SPOS_MAX); if (ret) goto err; } diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index d8f3c4c65e90..a4cc72986e36 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -557,7 +557,9 @@ static int __btree_err(int ret, const char *fmt, ...) { if (c->recovery.curr_pass == BCH_RECOVERY_PASS_scan_for_btree_nodes) - return bch_err_throw(c, fsck_fix); + return ret == -BCH_ERR_btree_node_read_err_fixable + ? bch_err_throw(c, fsck_fix) + : ret; bool have_retry = false; int ret2; @@ -566,9 +568,9 @@ static int __btree_err(int ret, bch2_mark_btree_validate_failure(failed, ca->dev_idx); struct extent_ptr_decoded pick; - have_retry = !bch2_bkey_pick_read_device(c, + have_retry = bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key), - failed, &pick, -1); + failed, &pick, -1) == 1; } if (!have_retry && ret == -BCH_ERR_btree_node_read_err_want_retry) @@ -613,7 +615,6 @@ static int __btree_err(int ret, goto out; case -BCH_ERR_btree_node_read_err_bad_node: prt_str(&out, ", "); - ret = __bch2_topology_error(c, &out); break; } @@ -642,7 +643,6 @@ static int __btree_err(int ret, goto out; case -BCH_ERR_btree_node_read_err_bad_node: prt_str(&out, ", "); - ret = __bch2_topology_error(c, &out); break; } print: @@ -723,12 +723,11 @@ void bch2_btree_node_drop_keys_outside_node(struct btree *b) static int validate_bset(struct bch_fs *c, struct bch_dev *ca, struct btree *b, struct bset *i, - unsigned offset, unsigned sectors, int write, + unsigned offset, int write, struct bch_io_failures *failed, struct printbuf *err_msg) { unsigned version = le16_to_cpu(i->version); - unsigned ptr_written = btree_ptr_sectors_written(bkey_i_to_s_c(&b->key)); struct printbuf buf1 = PRINTBUF; struct printbuf buf2 = PRINTBUF; int ret = 0; @@ -778,15 +777,6 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, btree_node_unsupported_version, "BSET_SEPARATE_WHITEOUTS no longer supported"); - if (!write && - btree_err_on(offset + sectors > (ptr_written ?: btree_sectors(c)), - -BCH_ERR_btree_node_read_err_fixable, - c, ca, b, i, NULL, - bset_past_end_of_btree_node, - "bset past end of btree node (offset %u len %u but written %zu)", - offset, sectors, ptr_written ?: btree_sectors(c))) - i->u64s = 0; - btree_err_on(offset && !i->u64s, -BCH_ERR_btree_node_read_err_fixable, c, ca, b, i, NULL, @@ -1151,6 +1141,14 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, "unknown checksum type %llu", BSET_CSUM_TYPE(i)); if (first) { + sectors = vstruct_sectors(b->data, c->block_bits); + if (btree_err_on(b->written + sectors > (ptr_written ?: btree_sectors(c)), + -BCH_ERR_btree_node_read_err_fixable, + c, ca, b, i, NULL, + bset_past_end_of_btree_node, + "bset past end of btree node (offset %u len %u but written %zu)", + b->written, sectors, ptr_written ?: btree_sectors(c))) + i->u64s = 0; if (good_csum_type) { struct bch_csum csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, b->data); bool csum_bad = bch2_crc_cmp(b->data->csum, csum); @@ -1178,9 +1176,15 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, c, NULL, b, NULL, NULL, btree_node_unsupported_version, "btree node does not have NEW_EXTENT_OVERWRITE set"); - - sectors = vstruct_sectors(b->data, c->block_bits); } else { + sectors = vstruct_sectors(bne, c->block_bits); + if (btree_err_on(b->written + sectors > (ptr_written ?: btree_sectors(c)), + -BCH_ERR_btree_node_read_err_fixable, + c, ca, b, i, NULL, + bset_past_end_of_btree_node, + "bset past end of btree node (offset %u len %u but written %zu)", + b->written, sectors, ptr_written ?: btree_sectors(c))) + i->u64s = 0; if (good_csum_type) { struct bch_csum csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne); bool csum_bad = bch2_crc_cmp(bne->csum, csum); @@ -1201,14 +1205,12 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, "decrypting btree node: %s", bch2_err_str(ret))) goto fsck_err; } - - sectors = vstruct_sectors(bne, c->block_bits); } b->version_ondisk = min(b->version_ondisk, le16_to_cpu(i->version)); - ret = validate_bset(c, ca, b, i, b->written, sectors, READ, failed, err_msg); + ret = validate_bset(c, ca, b, i, b->written, READ, failed, err_msg); if (ret) goto fsck_err; @@ -1333,15 +1335,42 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, btree_node_reset_sib_u64s(b); - scoped_guard(rcu) - bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&b->key)), ptr) { - struct bch_dev *ca2 = bch2_dev_rcu(c, ptr->dev); - - if (!ca2 || ca2->mi.state != BCH_MEMBER_STATE_rw) { - set_btree_node_need_rewrite(b); - set_btree_node_need_rewrite_degraded(b); + /* + * XXX: + * + * We deadlock if too many btree updates require node rewrites while + * we're still in journal replay. + * + * This is because btree node rewrites generate more updates for the + * interior updates (alloc, backpointers), and if those updates touch + * new nodes and generate more rewrites - well, you see the problem. + * + * The biggest cause is that we don't use the btree write buffer (for + * the backpointer updates - this needs some real thought on locking in + * order to fix. + * + * The problem with this workaround (not doing the rewrite for degraded + * nodes in journal replay) is that those degraded nodes persist, and we + * don't want that (this is a real bug when a btree node write completes + * with fewer replicas than we wanted and leaves a degraded node due to + * device _removal_, i.e. the device went away mid write). + * + * It's less of a bug here, but still a problem because we don't yet + * have a way of tracking degraded data - we another index (all + * extents/btree nodes, by replicas entry) in order to fix properly + * (re-replicate degraded data at the earliest possible time). + */ + if (c->recovery.passes_complete & BIT_ULL(BCH_RECOVERY_PASS_journal_replay)) { + scoped_guard(rcu) + bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&b->key)), ptr) { + struct bch_dev *ca2 = bch2_dev_rcu(c, ptr->dev); + + if (!ca2 || ca2->mi.state != BCH_MEMBER_STATE_rw) { + set_btree_node_need_rewrite(b); + set_btree_node_need_rewrite_degraded(b); + } } - } + } if (!ptr_written) { set_btree_node_need_rewrite(b); @@ -1377,7 +1406,7 @@ static void btree_node_read_work(struct work_struct *work) ret = bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key), &failed, &rb->pick, -1); - if (ret) { + if (ret <= 0) { set_btree_node_read_error(b); break; } @@ -1982,28 +2011,12 @@ static void btree_node_scrub_work(struct work_struct *work) prt_newline(&err); if (!btree_node_scrub_check(c, scrub->buf, scrub->written, &err)) { - struct btree_trans *trans = bch2_trans_get(c); - - struct btree_iter iter; - bch2_trans_node_iter_init(trans, &iter, scrub->btree, - scrub->key.k->k.p, 0, scrub->level - 1, 0); - - struct btree *b; - int ret = lockrestart_do(trans, - PTR_ERR_OR_ZERO(b = bch2_btree_iter_peek_node(trans, &iter))); - if (ret) - goto err; - - if (bkey_i_to_btree_ptr_v2(&b->key)->v.seq == scrub->seq) { - bch_err(c, "error validating btree node during scrub on %s at btree %s", - scrub->ca->name, err.buf); - - ret = bch2_btree_node_rewrite(trans, &iter, b, 0, 0); - } -err: - bch2_trans_iter_exit(trans, &iter); - bch2_trans_begin(trans); - bch2_trans_put(trans); + int ret = bch2_trans_do(c, + bch2_btree_node_rewrite_key(trans, scrub->btree, scrub->level - 1, + scrub->key.k, 0)); + if (!bch2_err_matches(ret, ENOENT) && + !bch2_err_matches(ret, EROFS)) + bch_err_fn_ratelimited(c, ret); } printbuf_exit(&err); @@ -2267,7 +2280,7 @@ static void btree_node_write_endio(struct bio *bio) } static int validate_bset_for_write(struct bch_fs *c, struct btree *b, - struct bset *i, unsigned sectors) + struct bset *i) { int ret = bch2_bkey_validate(c, bkey_i_to_s_c(&b->key), (struct bkey_validate_context) { @@ -2282,7 +2295,7 @@ static int validate_bset_for_write(struct bch_fs *c, struct btree *b, } ret = validate_bset_keys(c, b, i, WRITE, NULL, NULL) ?: - validate_bset(c, NULL, b, i, b->written, sectors, WRITE, NULL, NULL); + validate_bset(c, NULL, b, i, b->written, WRITE, NULL, NULL); if (ret) { bch2_inconsistent_error(c); dump_stack(); @@ -2475,7 +2488,7 @@ do_write: /* if we're going to be encrypting, check metadata validity first: */ if (validate_before_checksum && - validate_bset_for_write(c, b, i, sectors_to_write)) + validate_bset_for_write(c, b, i)) goto err; ret = bset_encrypt(c, i, b->written << 9); @@ -2492,7 +2505,7 @@ do_write: /* if we're not encrypting, check metadata after checksumming: */ if (!validate_before_checksum && - validate_bset_for_write(c, b, i, sectors_to_write)) + validate_bset_for_write(c, b, i)) goto err; /* diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index b78403376c07..f8829b667ad3 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -2076,14 +2076,14 @@ inline bool bch2_btree_iter_rewind(struct btree_trans *trans, struct btree_iter static noinline void bch2_btree_trans_peek_prev_updates(struct btree_trans *trans, struct btree_iter *iter, - struct bkey_s_c *k) + struct bpos search_key, struct bkey_s_c *k) { struct bpos end = path_l(btree_iter_path(trans, iter))->b->data->min_key; trans_for_each_update(trans, i) if (!i->key_cache_already_flushed && i->btree_id == iter->btree_id && - bpos_le(i->k->k.p, iter->pos) && + bpos_le(i->k->k.p, search_key) && bpos_ge(i->k->k.p, k->k ? k->k->p : end)) { iter->k = i->k->k; *k = bkey_i_to_s_c(i->k); @@ -2092,6 +2092,7 @@ void bch2_btree_trans_peek_prev_updates(struct btree_trans *trans, struct btree_ static noinline void bch2_btree_trans_peek_updates(struct btree_trans *trans, struct btree_iter *iter, + struct bpos search_key, struct bkey_s_c *k) { struct btree_path *path = btree_iter_path(trans, iter); @@ -2100,7 +2101,7 @@ void bch2_btree_trans_peek_updates(struct btree_trans *trans, struct btree_iter trans_for_each_update(trans, i) if (!i->key_cache_already_flushed && i->btree_id == iter->btree_id && - bpos_ge(i->k->k.p, path->pos) && + bpos_ge(i->k->k.p, search_key) && bpos_le(i->k->k.p, k->k ? k->k->p : end)) { iter->k = i->k->k; *k = bkey_i_to_s_c(i->k); @@ -2122,13 +2123,14 @@ void bch2_btree_trans_peek_slot_updates(struct btree_trans *trans, struct btree_ static struct bkey_i *bch2_btree_journal_peek(struct btree_trans *trans, struct btree_iter *iter, + struct bpos search_pos, struct bpos end_pos) { struct btree_path *path = btree_iter_path(trans, iter); return bch2_journal_keys_peek_max(trans->c, iter->btree_id, path->level, - path->pos, + search_pos, end_pos, &iter->journal_idx); } @@ -2138,7 +2140,7 @@ struct bkey_s_c btree_trans_peek_slot_journal(struct btree_trans *trans, struct btree_iter *iter) { struct btree_path *path = btree_iter_path(trans, iter); - struct bkey_i *k = bch2_btree_journal_peek(trans, iter, path->pos); + struct bkey_i *k = bch2_btree_journal_peek(trans, iter, path->pos, path->pos); if (k) { iter->k = k->k; @@ -2151,11 +2153,12 @@ struct bkey_s_c btree_trans_peek_slot_journal(struct btree_trans *trans, static noinline void btree_trans_peek_journal(struct btree_trans *trans, struct btree_iter *iter, + struct bpos search_key, struct bkey_s_c *k) { struct btree_path *path = btree_iter_path(trans, iter); struct bkey_i *next_journal = - bch2_btree_journal_peek(trans, iter, + bch2_btree_journal_peek(trans, iter, search_key, k->k ? k->k->p : path_l(path)->b->key.k.p); if (next_journal) { iter->k = next_journal->k; @@ -2165,13 +2168,14 @@ void btree_trans_peek_journal(struct btree_trans *trans, static struct bkey_i *bch2_btree_journal_peek_prev(struct btree_trans *trans, struct btree_iter *iter, + struct bpos search_key, struct bpos end_pos) { struct btree_path *path = btree_iter_path(trans, iter); return bch2_journal_keys_peek_prev_min(trans->c, iter->btree_id, path->level, - path->pos, + search_key, end_pos, &iter->journal_idx); } @@ -2179,12 +2183,13 @@ static struct bkey_i *bch2_btree_journal_peek_prev(struct btree_trans *trans, static noinline void btree_trans_peek_prev_journal(struct btree_trans *trans, struct btree_iter *iter, + struct bpos search_key, struct bkey_s_c *k) { struct btree_path *path = btree_iter_path(trans, iter); struct bkey_i *next_journal = - bch2_btree_journal_peek_prev(trans, iter, - k->k ? k->k->p : path_l(path)->b->key.k.p); + bch2_btree_journal_peek_prev(trans, iter, search_key, + k->k ? k->k->p : path_l(path)->b->data->min_key); if (next_journal) { iter->k = next_journal->k; @@ -2292,11 +2297,11 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_trans *trans, struct } if (unlikely(iter->flags & BTREE_ITER_with_journal)) - btree_trans_peek_journal(trans, iter, &k); + btree_trans_peek_journal(trans, iter, search_key, &k); if (unlikely((iter->flags & BTREE_ITER_with_updates) && trans->nr_updates)) - bch2_btree_trans_peek_updates(trans, iter, &k); + bch2_btree_trans_peek_updates(trans, iter, search_key, &k); if (k.k && bkey_deleted(k.k)) { /* @@ -2326,6 +2331,20 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_trans *trans, struct } bch2_btree_iter_verify(trans, iter); + + if (trace___btree_iter_peek_enabled()) { + CLASS(printbuf, buf)(); + + int ret = bkey_err(k); + if (ret) + prt_str(&buf, bch2_err_str(ret)); + else if (k.k) + bch2_bkey_val_to_text(&buf, trans->c, k); + else + prt_str(&buf, "(null)"); + trace___btree_iter_peek(trans->c, buf.buf); + } + return k; } @@ -2484,6 +2503,19 @@ out_no_locked: bch2_btree_iter_verify_entry_exit(iter); + if (trace_btree_iter_peek_max_enabled()) { + CLASS(printbuf, buf)(); + + int ret = bkey_err(k); + if (ret) + prt_str(&buf, bch2_err_str(ret)); + else if (k.k) + bch2_bkey_val_to_text(&buf, trans->c, k); + else + prt_str(&buf, "(null)"); + trace_btree_iter_peek_max(trans->c, buf.buf); + } + return k; end: bch2_btree_iter_set_pos(trans, iter, end); @@ -2557,11 +2589,11 @@ static struct bkey_s_c __bch2_btree_iter_peek_prev(struct btree_trans *trans, st } if (unlikely(iter->flags & BTREE_ITER_with_journal)) - btree_trans_peek_prev_journal(trans, iter, &k); + btree_trans_peek_prev_journal(trans, iter, search_key, &k); if (unlikely((iter->flags & BTREE_ITER_with_updates) && trans->nr_updates)) - bch2_btree_trans_peek_prev_updates(trans, iter, &k); + bch2_btree_trans_peek_prev_updates(trans, iter, search_key, &k); if (likely(k.k && !bkey_deleted(k.k))) { break; @@ -2724,6 +2756,19 @@ out_no_locked: bch2_btree_iter_verify_entry_exit(iter); bch2_btree_iter_verify(trans, iter); + + if (trace_btree_iter_peek_prev_min_enabled()) { + CLASS(printbuf, buf)(); + + int ret = bkey_err(k); + if (ret) + prt_str(&buf, bch2_err_str(ret)); + else if (k.k) + bch2_bkey_val_to_text(&buf, trans->c, k); + else + prt_str(&buf, "(null)"); + trace_btree_iter_peek_prev_min(trans->c, buf.buf); + } return k; end: bch2_btree_iter_set_pos(trans, iter, end); @@ -2767,8 +2812,10 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_trans *trans, struct btre /* extents can't span inode numbers: */ if ((iter->flags & BTREE_ITER_is_extents) && unlikely(iter->pos.offset == KEY_OFFSET_MAX)) { - if (iter->pos.inode == KEY_INODE_MAX) - return bkey_s_c_null; + if (iter->pos.inode == KEY_INODE_MAX) { + k = bkey_s_c_null; + goto out2; + } bch2_btree_iter_set_pos(trans, iter, bpos_nosnap_successor(iter->pos)); } @@ -2785,8 +2832,10 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_trans *trans, struct btre } struct btree_path *path = btree_iter_path(trans, iter); - if (unlikely(!btree_path_node(path, path->level))) - return bkey_s_c_null; + if (unlikely(!btree_path_node(path, path->level))) { + k = bkey_s_c_null; + goto out2; + } btree_path_set_should_be_locked(trans, path); @@ -2879,7 +2928,20 @@ out: bch2_btree_iter_verify(trans, iter); ret = bch2_btree_iter_verify_ret(trans, iter, k); if (unlikely(ret)) - return bkey_s_c_err(ret); + k = bkey_s_c_err(ret); +out2: + if (trace_btree_iter_peek_slot_enabled()) { + CLASS(printbuf, buf)(); + + int ret = bkey_err(k); + if (ret) + prt_str(&buf, bch2_err_str(ret)); + else if (k.k) + bch2_bkey_val_to_text(&buf, trans->c, k); + else + prt_str(&buf, "(null)"); + trace_btree_iter_peek_slot(trans->c, buf.buf); + } return k; } @@ -3132,6 +3194,10 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size, unsigned long if (WARN_ON_ONCE(new_bytes > BTREE_TRANS_MEM_MAX)) { #ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE struct printbuf buf = PRINTBUF; + bch2_log_msg_start(c, &buf); + prt_printf(&buf, "bump allocator exceeded BTREE_TRANS_MEM_MAX (%u)\n", + BTREE_TRANS_MEM_MAX); + bch2_trans_kmalloc_trace_to_text(&buf, &trans->trans_kmalloc_trace); bch2_print_str(c, KERN_ERR, buf.buf); printbuf_exit(&buf); @@ -3159,46 +3225,32 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size, unsigned long mutex_unlock(&s->lock); } - if (trans->used_mempool) { - if (trans->mem_bytes >= new_bytes) - goto out_change_top; - - /* No more space from mempool item, need malloc new one */ - new_mem = kmalloc(new_bytes, GFP_NOWAIT|__GFP_NOWARN); - if (unlikely(!new_mem)) { - bch2_trans_unlock(trans); - - new_mem = kmalloc(new_bytes, GFP_KERNEL); - if (!new_mem) - return ERR_PTR(-BCH_ERR_ENOMEM_trans_kmalloc); + if (trans->used_mempool || new_bytes > BTREE_TRANS_MEM_MAX) { + EBUG_ON(trans->mem_bytes >= new_bytes); + return ERR_PTR(-BCH_ERR_ENOMEM_trans_kmalloc); + } - ret = bch2_trans_relock(trans); - if (ret) { - kfree(new_mem); - return ERR_PTR(ret); - } - } - memcpy(new_mem, trans->mem, trans->mem_top); - trans->used_mempool = false; - mempool_free(trans->mem, &c->btree_trans_mem_pool); - goto out_new_mem; + if (old_bytes) { + trans->realloc_bytes_required = new_bytes; + trace_and_count(c, trans_restart_mem_realloced, trans, _RET_IP_, new_bytes); + return ERR_PTR(btree_trans_restart_ip(trans, + BCH_ERR_transaction_restart_mem_realloced, _RET_IP_)); } - new_mem = krealloc(trans->mem, new_bytes, GFP_NOWAIT|__GFP_NOWARN); + EBUG_ON(trans->mem); + + new_mem = kmalloc(new_bytes, GFP_NOWAIT|__GFP_NOWARN); if (unlikely(!new_mem)) { bch2_trans_unlock(trans); - new_mem = krealloc(trans->mem, new_bytes, GFP_KERNEL); + new_mem = kmalloc(new_bytes, GFP_KERNEL); if (!new_mem && new_bytes <= BTREE_TRANS_MEM_MAX) { new_mem = mempool_alloc(&c->btree_trans_mem_pool, GFP_KERNEL); new_bytes = BTREE_TRANS_MEM_MAX; - memcpy(new_mem, trans->mem, trans->mem_top); trans->used_mempool = true; - kfree(trans->mem); } - if (!new_mem) - return ERR_PTR(-BCH_ERR_ENOMEM_trans_kmalloc); + EBUG_ON(!new_mem); trans->mem = new_mem; trans->mem_bytes = new_bytes; @@ -3207,18 +3259,10 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size, unsigned long if (ret) return ERR_PTR(ret); } -out_new_mem: + trans->mem = new_mem; trans->mem_bytes = new_bytes; - if (old_bytes) { - trace_and_count(c, trans_restart_mem_realloced, trans, _RET_IP_, new_bytes); - return ERR_PTR(btree_trans_restart_ip(trans, - BCH_ERR_transaction_restart_mem_realloced, _RET_IP_)); - } -out_change_top: - bch2_trans_kmalloc_trace(trans, size, ip); - p = trans->mem + trans->mem_top; trans->mem_top += size; memset(p, 0, size); @@ -3279,6 +3323,27 @@ u32 bch2_trans_begin(struct btree_trans *trans) trans->restart_count++; trans->mem_top = 0; + if (trans->restarted == BCH_ERR_transaction_restart_mem_realloced) { + EBUG_ON(!trans->mem || !trans->mem_bytes); + unsigned new_bytes = trans->realloc_bytes_required; + void *new_mem = krealloc(trans->mem, new_bytes, GFP_NOWAIT|__GFP_NOWARN); + if (unlikely(!new_mem)) { + bch2_trans_unlock(trans); + new_mem = krealloc(trans->mem, new_bytes, GFP_KERNEL); + + EBUG_ON(new_bytes > BTREE_TRANS_MEM_MAX); + + if (!new_mem) { + new_mem = mempool_alloc(&trans->c->btree_trans_mem_pool, GFP_KERNEL); + new_bytes = BTREE_TRANS_MEM_MAX; + trans->used_mempool = true; + kfree(trans->mem); + } + } + trans->mem = new_mem; + trans->mem_bytes = new_bytes; + } + trans_for_each_path(trans, path, i) { path->should_be_locked = false; diff --git a/fs/bcachefs/btree_journal_iter.c b/fs/bcachefs/btree_journal_iter.c index cf7398751644..ea839560a136 100644 --- a/fs/bcachefs/btree_journal_iter.c +++ b/fs/bcachefs/btree_journal_iter.c @@ -137,12 +137,15 @@ struct bkey_i *bch2_journal_keys_peek_prev_min(struct bch_fs *c, enum btree_id b struct journal_key *k; BUG_ON(*idx > keys->nr); + + if (!keys->nr) + return NULL; search: if (!*idx) *idx = __bch2_journal_key_search(keys, btree_id, level, pos); - while (*idx && - __journal_key_cmp(btree_id, level, end_pos, idx_to_key(keys, *idx - 1)) <= 0) { + while (*idx < keys->nr && + __journal_key_cmp(btree_id, level, end_pos, idx_to_key(keys, *idx)) >= 0) { (*idx)++; iters++; if (iters == 10) { @@ -151,18 +154,23 @@ search: } } + if (*idx == keys->nr) + --(*idx); + struct bkey_i *ret = NULL; rcu_read_lock(); /* for overwritten_ranges */ - while ((k = *idx < keys->nr ? idx_to_key(keys, *idx) : NULL)) { + while (true) { + k = idx_to_key(keys, *idx); if (__journal_key_cmp(btree_id, level, end_pos, k) > 0) break; if (k->overwritten) { if (k->overwritten_range) - *idx = rcu_dereference(k->overwritten_range)->start - 1; - else - *idx -= 1; + *idx = rcu_dereference(k->overwritten_range)->start; + if (!*idx) + break; + --(*idx); continue; } @@ -171,6 +179,8 @@ search: break; } + if (!*idx) + break; --(*idx); iters++; if (iters == 10) { @@ -641,10 +651,11 @@ static int journal_sort_key_cmp(const void *_l, const void *_r) { const struct journal_key *l = _l; const struct journal_key *r = _r; + int rewind = l->rewind && r->rewind ? -1 : 1; return journal_key_cmp(l, r) ?: - cmp_int(l->journal_seq, r->journal_seq) ?: - cmp_int(l->journal_offset, r->journal_offset); + ((cmp_int(l->journal_seq, r->journal_seq) ?: + cmp_int(l->journal_offset, r->journal_offset)) * rewind); } void bch2_journal_keys_put(struct bch_fs *c) @@ -713,6 +724,8 @@ int bch2_journal_keys_sort(struct bch_fs *c) struct journal_keys *keys = &c->journal_keys; size_t nr_read = 0; + u64 rewind_seq = c->opts.journal_rewind ?: U64_MAX; + genradix_for_each(&c->journal_entries, iter, _i) { i = *_i; @@ -721,28 +734,43 @@ int bch2_journal_keys_sort(struct bch_fs *c) cond_resched(); - for_each_jset_key(k, entry, &i->j) { - struct journal_key n = (struct journal_key) { - .btree_id = entry->btree_id, - .level = entry->level, - .k = k, - .journal_seq = le64_to_cpu(i->j.seq), - .journal_offset = k->_data - i->j._data, - }; - - if (darray_push(keys, n)) { - __journal_keys_sort(keys); - - if (keys->nr * 8 > keys->size * 7) { - bch_err(c, "Too many journal keys for slowpath; have %zu compacted, buf size %zu, processed %zu keys at seq %llu", - keys->nr, keys->size, nr_read, le64_to_cpu(i->j.seq)); - return bch_err_throw(c, ENOMEM_journal_keys_sort); + vstruct_for_each(&i->j, entry) { + bool rewind = !entry->level && + !btree_id_is_alloc(entry->btree_id) && + le64_to_cpu(i->j.seq) >= rewind_seq; + + if (entry->type != (rewind + ? BCH_JSET_ENTRY_overwrite + : BCH_JSET_ENTRY_btree_keys)) + continue; + + if (!rewind && le64_to_cpu(i->j.seq) < c->journal_replay_seq_start) + continue; + + jset_entry_for_each_key(entry, k) { + struct journal_key n = (struct journal_key) { + .btree_id = entry->btree_id, + .level = entry->level, + .rewind = rewind, + .k = k, + .journal_seq = le64_to_cpu(i->j.seq), + .journal_offset = k->_data - i->j._data, + }; + + if (darray_push(keys, n)) { + __journal_keys_sort(keys); + + if (keys->nr * 8 > keys->size * 7) { + bch_err(c, "Too many journal keys for slowpath; have %zu compacted, buf size %zu, processed %zu keys at seq %llu", + keys->nr, keys->size, nr_read, le64_to_cpu(i->j.seq)); + return bch_err_throw(c, ENOMEM_journal_keys_sort); + } + + BUG_ON(darray_push(keys, n)); } - BUG_ON(darray_push(keys, n)); + nr_read++; } - - nr_read++; } } diff --git a/fs/bcachefs/btree_journal_iter_types.h b/fs/bcachefs/btree_journal_iter_types.h index 8b773823704f..86aacb254fb2 100644 --- a/fs/bcachefs/btree_journal_iter_types.h +++ b/fs/bcachefs/btree_journal_iter_types.h @@ -11,8 +11,9 @@ struct journal_key { u32 journal_offset; enum btree_id btree_id:8; unsigned level:8; - bool allocated; - bool overwritten; + bool allocated:1; + bool overwritten:1; + bool rewind:1; struct journal_key_range_overwritten __rcu * overwritten_range; struct bkey_i *k; diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c index 91a51aef82f1..bed2b4b6ffb9 100644 --- a/fs/bcachefs/btree_locking.c +++ b/fs/bcachefs/btree_locking.c @@ -771,7 +771,7 @@ static inline void __bch2_trans_unlock(struct btree_trans *trans) } static noinline __cold void bch2_trans_relock_fail(struct btree_trans *trans, struct btree_path *path, - struct get_locks_fail *f, bool trace) + struct get_locks_fail *f, bool trace, ulong ip) { if (!trace) goto out; @@ -796,7 +796,7 @@ static noinline __cold void bch2_trans_relock_fail(struct btree_trans *trans, st prt_printf(&buf, " total locked %u.%u.%u", c.n[0], c.n[1], c.n[2]); } - trace_trans_restart_relock(trans, _RET_IP_, buf.buf); + trace_trans_restart_relock(trans, ip, buf.buf); printbuf_exit(&buf); } @@ -806,7 +806,7 @@ out: bch2_trans_verify_locks(trans); } -static inline int __bch2_trans_relock(struct btree_trans *trans, bool trace) +static inline int __bch2_trans_relock(struct btree_trans *trans, bool trace, ulong ip) { bch2_trans_verify_locks(trans); @@ -825,7 +825,7 @@ static inline int __bch2_trans_relock(struct btree_trans *trans, bool trace) if (path->should_be_locked && (ret = btree_path_get_locks(trans, path, false, &f, BCH_ERR_transaction_restart_relock))) { - bch2_trans_relock_fail(trans, path, &f, trace); + bch2_trans_relock_fail(trans, path, &f, trace, ip); return ret; } } @@ -838,12 +838,12 @@ out: int bch2_trans_relock(struct btree_trans *trans) { - return __bch2_trans_relock(trans, true); + return __bch2_trans_relock(trans, true, _RET_IP_); } int bch2_trans_relock_notrace(struct btree_trans *trans) { - return __bch2_trans_relock(trans, false); + return __bch2_trans_relock(trans, false, _RET_IP_); } void bch2_trans_unlock(struct btree_trans *trans) diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c index a35847734a60..42c9eb2c786e 100644 --- a/fs/bcachefs/btree_node_scan.c +++ b/fs/bcachefs/btree_node_scan.c @@ -75,39 +75,6 @@ static inline u64 bkey_journal_seq(struct bkey_s_c k) } } -static bool found_btree_node_is_readable(struct btree_trans *trans, - struct found_btree_node *f) -{ - struct { __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX); } tmp; - - found_btree_node_to_key(&tmp.k, f); - - struct btree *b = bch2_btree_node_get_noiter(trans, &tmp.k, f->btree_id, f->level, false); - bool ret = !IS_ERR_OR_NULL(b); - if (!ret) - return ret; - - f->sectors_written = b->written; - f->journal_seq = le64_to_cpu(b->data->keys.journal_seq); - - struct bkey_s_c k; - struct bkey unpacked; - struct btree_node_iter iter; - for_each_btree_node_key_unpack(b, k, &iter, &unpacked) - f->journal_seq = max(f->journal_seq, bkey_journal_seq(k)); - - six_unlock_read(&b->c.lock); - - /* - * We might update this node's range; if that happens, we need the node - * to be re-read so the read path can trim keys that are no longer in - * this node - */ - if (b != btree_node_root(trans->c, b)) - bch2_btree_node_evict(trans, &tmp.k); - return ret; -} - static int found_btree_node_cmp_cookie(const void *_l, const void *_r) { const struct found_btree_node *l = _l; @@ -159,17 +126,17 @@ static const struct min_heap_callbacks found_btree_node_heap_cbs = { }; static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca, - struct bio *bio, struct btree_node *bn, u64 offset) + struct btree *b, struct bio *bio, u64 offset) { struct bch_fs *c = container_of(f, struct bch_fs, found_btree_nodes); + struct btree_node *bn = b->data; bio_reset(bio, ca->disk_sb.bdev, REQ_OP_READ); bio->bi_iter.bi_sector = offset; - bch2_bio_map(bio, bn, PAGE_SIZE); + bch2_bio_map(bio, b->data, c->opts.block_size); u64 submit_time = local_clock(); submit_bio_wait(bio); - bch2_account_io_completion(ca, BCH_MEMBER_ERROR_read, submit_time, !bio->bi_status); if (bio->bi_status) { @@ -201,6 +168,14 @@ static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca, if (BTREE_NODE_ID(bn) >= BTREE_ID_NR_MAX) return; + bio_reset(bio, ca->disk_sb.bdev, REQ_OP_READ); + bio->bi_iter.bi_sector = offset; + bch2_bio_map(bio, b->data, c->opts.btree_node_size); + + submit_time = local_clock(); + submit_bio_wait(bio); + bch2_account_io_completion(ca, BCH_MEMBER_ERROR_read, submit_time, !bio->bi_status); + rcu_read_lock(); struct found_btree_node n = { .btree_id = BTREE_NODE_ID(bn), @@ -217,7 +192,20 @@ static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca, }; rcu_read_unlock(); - if (bch2_trans_run(c, found_btree_node_is_readable(trans, &n))) { + found_btree_node_to_key(&b->key, &n); + + CLASS(printbuf, buf)(); + if (!bch2_btree_node_read_done(c, ca, b, NULL, &buf)) { + /* read_done will swap out b->data for another buffer */ + bn = b->data; + /* + * Grab journal_seq here because we want the max journal_seq of + * any bset; read_done sorts down to a single set and picks the + * max journal_seq + */ + n.journal_seq = le64_to_cpu(bn->keys.journal_seq), + n.sectors_written = b->written; + mutex_lock(&f->lock); if (BSET_BIG_ENDIAN(&bn->keys) != CPU_BIG_ENDIAN) { bch_err(c, "try_read_btree_node() can't handle endian conversion"); @@ -237,12 +225,20 @@ static int read_btree_nodes_worker(void *p) struct find_btree_nodes_worker *w = p; struct bch_fs *c = container_of(w->f, struct bch_fs, found_btree_nodes); struct bch_dev *ca = w->ca; - void *buf = (void *) __get_free_page(GFP_KERNEL); - struct bio *bio = bio_alloc(NULL, 1, 0, GFP_KERNEL); unsigned long last_print = jiffies; + struct btree *b = NULL; + struct bio *bio = NULL; + + b = __bch2_btree_node_mem_alloc(c); + if (!b) { + bch_err(c, "read_btree_nodes_worker: error allocating buf"); + w->f->ret = -ENOMEM; + goto err; + } - if (!buf || !bio) { - bch_err(c, "read_btree_nodes_worker: error allocating bio/buf"); + bio = bio_alloc(NULL, buf_pages(b->data, c->opts.btree_node_size), 0, GFP_KERNEL); + if (!bio) { + bch_err(c, "read_btree_nodes_worker: error allocating bio"); w->f->ret = -ENOMEM; goto err; } @@ -266,11 +262,13 @@ static int read_btree_nodes_worker(void *p) !bch2_dev_btree_bitmap_marked_sectors(ca, sector, btree_sectors(c))) continue; - try_read_btree_node(w->f, ca, bio, buf, sector); + try_read_btree_node(w->f, ca, b, bio, sector); } err: + if (b) + __btree_node_data_free(b); + kfree(b); bio_put(bio); - free_page((unsigned long) buf); enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_btree_node_scan); closure_put(w->cl); kfree(w); @@ -521,8 +519,12 @@ bool bch2_btree_node_is_stale(struct bch_fs *c, struct btree *b) return false; } -bool bch2_btree_has_scanned_nodes(struct bch_fs *c, enum btree_id btree) +int bch2_btree_has_scanned_nodes(struct bch_fs *c, enum btree_id btree) { + int ret = bch2_run_print_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes); + if (ret) + return ret; + struct found_btree_node search = { .btree_id = btree, .level = 0, diff --git a/fs/bcachefs/btree_node_scan.h b/fs/bcachefs/btree_node_scan.h index 08687b209787..66e6f9ed19d0 100644 --- a/fs/bcachefs/btree_node_scan.h +++ b/fs/bcachefs/btree_node_scan.h @@ -4,7 +4,7 @@ int bch2_scan_for_btree_nodes(struct bch_fs *); bool bch2_btree_node_is_stale(struct bch_fs *, struct btree *); -bool bch2_btree_has_scanned_nodes(struct bch_fs *, enum btree_id); +int bch2_btree_has_scanned_nodes(struct bch_fs *, enum btree_id); int bch2_get_scanned_nodes(struct bch_fs *, enum btree_id, unsigned, struct bpos, struct bpos); void bch2_find_btree_nodes_exit(struct find_btree_nodes *); diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index d9710801e3ee..639ef75b3dbd 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -595,12 +595,13 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, int ret = 0; bch2_trans_verify_not_unlocked_or_in_restart(trans); - +#if 0 + /* todo: bring back dynamic fault injection */ if (race_fault()) { trace_and_count(c, trans_restart_fault_inject, trans, trace_ip); return btree_trans_restart(trans, BCH_ERR_transaction_restart_fault_inject); } - +#endif /* * Check if the insert will fit in the leaf node with the write lock * held, otherwise another thread could write the node changing the @@ -757,6 +758,8 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, btree_trans_journal_entries_start(trans), trans->journal_entries.u64s); + EBUG_ON(trans->journal_res.u64s < trans->journal_entries.u64s); + trans->journal_res.offset += trans->journal_entries.u64s; trans->journal_res.u64s -= trans->journal_entries.u64s; @@ -1003,6 +1006,7 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) { struct btree_insert_entry *errored_at = NULL; struct bch_fs *c = trans->c; + unsigned journal_u64s = 0; int ret = 0; bch2_trans_verify_not_unlocked_or_in_restart(trans); @@ -1031,10 +1035,10 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) EBUG_ON(test_bit(BCH_FS_clean_shutdown, &c->flags)); - trans->journal_u64s = trans->journal_entries.u64s + jset_u64s(trans->accounting.u64s); + journal_u64s = jset_u64s(trans->accounting.u64s); trans->journal_transaction_names = READ_ONCE(c->opts.journal_transaction_names); if (trans->journal_transaction_names) - trans->journal_u64s += jset_u64s(JSET_ENTRY_LOG_U64s); + journal_u64s += jset_u64s(JSET_ENTRY_LOG_U64s); trans_for_each_update(trans, i) { struct btree_path *path = trans->paths + i->path; @@ -1054,11 +1058,11 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) continue; /* we're going to journal the key being updated: */ - trans->journal_u64s += jset_u64s(i->k->k.u64s); + journal_u64s += jset_u64s(i->k->k.u64s); /* and we're also going to log the overwrite: */ if (trans->journal_transaction_names) - trans->journal_u64s += jset_u64s(i->old_k.u64s); + journal_u64s += jset_u64s(i->old_k.u64s); } if (trans->extra_disk_res) { @@ -1076,6 +1080,8 @@ retry: memset(&trans->journal_res, 0, sizeof(trans->journal_res)); memset(&trans->fs_usage_delta, 0, sizeof(trans->fs_usage_delta)); + trans->journal_u64s = journal_u64s + trans->journal_entries.u64s; + ret = do_bch2_trans_commit(trans, flags, &errored_at, _RET_IP_); /* make sure we didn't drop or screw up locks: */ diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index 3aa4a602bd02..112170fd9c8f 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -497,6 +497,7 @@ struct btree_trans { void *mem; unsigned mem_top; unsigned mem_bytes; + unsigned realloc_bytes_required; #ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE darray_trans_kmalloc_trace trans_kmalloc_trace; #endif diff --git a/fs/bcachefs/btree_update.c b/fs/bcachefs/btree_update.c index e97e78c10f49..ee657b9f4b96 100644 --- a/fs/bcachefs/btree_update.c +++ b/fs/bcachefs/btree_update.c @@ -549,20 +549,26 @@ void *__bch2_trans_subbuf_alloc(struct btree_trans *trans, unsigned u64s) { unsigned new_top = buf->u64s + u64s; - unsigned old_size = buf->size; + unsigned new_size = buf->size; - if (new_top > buf->size) - buf->size = roundup_pow_of_two(new_top); + BUG_ON(roundup_pow_of_two(new_top) > U16_MAX); - void *n = bch2_trans_kmalloc_nomemzero(trans, buf->size * sizeof(u64)); + if (new_top > new_size) + new_size = roundup_pow_of_two(new_top); + + void *n = bch2_trans_kmalloc_nomemzero(trans, new_size * sizeof(u64)); if (IS_ERR(n)) return n; + unsigned offset = (u64 *) n - (u64 *) trans->mem; + BUG_ON(offset > U16_MAX); + if (buf->u64s) memcpy(n, btree_trans_subbuf_base(trans, buf), - old_size * sizeof(u64)); + buf->size * sizeof(u64)); buf->base = (u64 *) n - (u64 *) trans->mem; + buf->size = new_size; void *p = btree_trans_subbuf_top(trans, buf); buf->u64s = new_top; diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h index 9feef1dc4de5..0b98ab959719 100644 --- a/fs/bcachefs/btree_update.h +++ b/fs/bcachefs/btree_update.h @@ -170,8 +170,7 @@ bch2_trans_jset_entry_alloc(struct btree_trans *trans, unsigned u64s) int bch2_btree_insert_clone_trans(struct btree_trans *, enum btree_id, struct bkey_i *); -int bch2_btree_write_buffer_insert_err(struct btree_trans *, - enum btree_id, struct bkey_i *); +int bch2_btree_write_buffer_insert_err(struct bch_fs *, enum btree_id, struct bkey_i *); static inline int __must_check bch2_trans_update_buffered(struct btree_trans *trans, enum btree_id btree, @@ -182,7 +181,7 @@ static inline int __must_check bch2_trans_update_buffered(struct btree_trans *tr EBUG_ON(k->k.u64s > BTREE_WRITE_BUFERED_U64s_MAX); if (unlikely(!btree_type_uses_write_buffer(btree))) { - int ret = bch2_btree_write_buffer_insert_err(trans, btree, k); + int ret = bch2_btree_write_buffer_insert_err(trans->c, btree, k); dump_stack(); return ret; } diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index e77584607f0d..553059b33bfd 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -1287,10 +1287,11 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, do { ret = bch2_btree_reserve_get(trans, as, nr_nodes, target, flags, &cl); - + if (!bch2_err_matches(ret, BCH_ERR_operation_blocked)) + break; bch2_trans_unlock(trans); bch2_wait_on_allocator(c, &cl); - } while (bch2_err_matches(ret, BCH_ERR_operation_blocked)); + } while (1); } if (ret) { @@ -2293,9 +2294,9 @@ err: goto out; } -static int bch2_btree_node_rewrite_key(struct btree_trans *trans, - enum btree_id btree, unsigned level, - struct bkey_i *k, unsigned flags) +int bch2_btree_node_rewrite_key(struct btree_trans *trans, + enum btree_id btree, unsigned level, + struct bkey_i *k, unsigned flags) { struct btree_iter iter; bch2_trans_node_iter_init(trans, &iter, @@ -2367,9 +2368,8 @@ static void async_btree_node_rewrite_work(struct work_struct *work) int ret = bch2_trans_do(c, bch2_btree_node_rewrite_key(trans, a->btree_id, a->level, a->key.k, 0)); - if (ret != -ENOENT && - !bch2_err_matches(ret, EROFS) && - ret != -BCH_ERR_journal_shutdown) + if (!bch2_err_matches(ret, ENOENT) && + !bch2_err_matches(ret, EROFS)) bch_err_fn_ratelimited(c, ret); spin_lock(&c->btree_node_rewrites_lock); diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h index b649c36c3fbb..ac04e45a8515 100644 --- a/fs/bcachefs/btree_update_interior.h +++ b/fs/bcachefs/btree_update_interior.h @@ -176,6 +176,9 @@ static inline int bch2_foreground_maybe_merge(struct btree_trans *trans, int bch2_btree_node_rewrite(struct btree_trans *, struct btree_iter *, struct btree *, unsigned, unsigned); +int bch2_btree_node_rewrite_key(struct btree_trans *, + enum btree_id, unsigned, + struct bkey_i *, unsigned); int bch2_btree_node_rewrite_pos(struct btree_trans *, enum btree_id, unsigned, struct bpos, unsigned, unsigned); diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c index 90b21e61d2b6..4b095235a0d2 100644 --- a/fs/bcachefs/btree_write_buffer.c +++ b/fs/bcachefs/btree_write_buffer.c @@ -267,10 +267,9 @@ out: BUG_ON(wb->sorted.size < wb->flushing.keys.nr); } -int bch2_btree_write_buffer_insert_err(struct btree_trans *trans, +int bch2_btree_write_buffer_insert_err(struct bch_fs *c, enum btree_id btree, struct bkey_i *k) { - struct bch_fs *c = trans->c; struct printbuf buf = PRINTBUF; prt_printf(&buf, "attempting to do write buffer update on non wb btree="); @@ -332,7 +331,7 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) struct btree_write_buffered_key *k = &wb->flushing.keys.data[i->idx]; if (unlikely(!btree_type_uses_write_buffer(k->btree))) { - ret = bch2_btree_write_buffer_insert_err(trans, k->btree, &k->k); + ret = bch2_btree_write_buffer_insert_err(trans->c, k->btree, &k->k); goto err; } @@ -676,6 +675,9 @@ int bch2_btree_write_buffer_maybe_flush(struct btree_trans *trans, goto err; bch2_bkey_buf_copy(last_flushed, c, tmp.k); + + /* can we avoid the unconditional restart? */ + trace_and_count(c, trans_restart_write_buffer_flush, trans, _RET_IP_); ret = bch_err_throw(c, transaction_restart_write_buffer_flush); } err: diff --git a/fs/bcachefs/btree_write_buffer.h b/fs/bcachefs/btree_write_buffer.h index 05f56fd1eed0..c351d21aca0b 100644 --- a/fs/bcachefs/btree_write_buffer.h +++ b/fs/bcachefs/btree_write_buffer.h @@ -89,6 +89,12 @@ static inline int bch2_journal_key_to_wb(struct bch_fs *c, struct journal_keys_to_wb *dst, enum btree_id btree, struct bkey_i *k) { + if (unlikely(!btree_type_uses_write_buffer(btree))) { + int ret = bch2_btree_write_buffer_insert_err(c, btree, k); + dump_stack(); + return ret; + } + EBUG_ON(!dst->seq); return k->k.type == KEY_TYPE_accounting diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c index fde3c2380e28..5ea89aa2b0c4 100644 --- a/fs/bcachefs/chardev.c +++ b/fs/bcachefs/chardev.c @@ -319,6 +319,7 @@ static int bch2_data_thread(void *arg) ctx->stats.ret = BCH_IOCTL_DATA_EVENT_RET_done; ctx->stats.data_type = (int) DATA_PROGRESS_DATA_TYPE_done; } + enumerated_ref_put(&ctx->c->writes, BCH_WRITE_REF_ioctl_data); return 0; } @@ -378,15 +379,24 @@ static long bch2_ioctl_data(struct bch_fs *c, struct bch_data_ctx *ctx; int ret; - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; + if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_ioctl_data)) + return -EROFS; - if (arg.op >= BCH_DATA_OP_NR || arg.flags) - return -EINVAL; + if (!capable(CAP_SYS_ADMIN)) { + ret = -EPERM; + goto put_ref; + } + + if (arg.op >= BCH_DATA_OP_NR || arg.flags) { + ret = -EINVAL; + goto put_ref; + } ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); - if (!ctx) - return -ENOMEM; + if (!ctx) { + ret = -ENOMEM; + goto put_ref; + } ctx->c = c; ctx->arg = arg; @@ -395,7 +405,12 @@ static long bch2_ioctl_data(struct bch_fs *c, &bcachefs_data_ops, bch2_data_thread); if (ret < 0) - kfree(ctx); + goto cleanup; + return ret; +cleanup: + kfree(ctx); +put_ref: + enumerated_ref_put(&c->writes, BCH_WRITE_REF_ioctl_data); return ret; } diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 5f1174348974..e848e210a9bf 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -249,6 +249,7 @@ static int data_update_invalid_bkey(struct data_update *m, bch2_bkey_val_to_text(&buf, c, k); prt_str(&buf, "\nnew: "); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert)); + prt_newline(&buf); bch2_fs_emergency_read_only2(c, &buf); diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index 901f643ead83..07c2a0f73cc2 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -153,8 +153,6 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b) c->verify_data = __bch2_btree_node_mem_alloc(c); if (!c->verify_data) goto out; - - list_del_init(&c->verify_data->list); } BUG_ON(b->nsets != 1); @@ -586,6 +584,8 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf, i->ubuf = buf; i->size = size; i->ret = 0; + + int srcu_idx = srcu_read_lock(&c->btree_trans_barrier); restart: seqmutex_lock(&c->btree_trans_lock); list_sort(&c->btree_trans_list, list_ptr_order_cmp); @@ -599,6 +599,11 @@ restart: if (!closure_get_not_zero(&trans->ref)) continue; + if (!trans->srcu_held) { + closure_put(&trans->ref); + continue; + } + u32 seq = seqmutex_unlock(&c->btree_trans_lock); bch2_btree_trans_to_text(&i->buf, trans); @@ -620,6 +625,8 @@ restart: } seqmutex_unlock(&c->btree_trans_lock); unlocked: + srcu_read_unlock(&c->btree_trans_barrier, srcu_idx); + if (i->buf.allocation_failure) ret = -ENOMEM; diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c index 300f7cc8abdf..a18d0f78704d 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c @@ -18,7 +18,9 @@ int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *info, { *out_cf = (struct qstr) QSTR_INIT(NULL, 0); -#ifdef CONFIG_UNICODE + if (!bch2_fs_casefold_enabled(trans->c)) + return -EOPNOTSUPP; + unsigned char *buf = bch2_trans_kmalloc(trans, BCH_NAME_MAX + 1); int ret = PTR_ERR_OR_ZERO(buf); if (ret) @@ -30,9 +32,6 @@ int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *info, *out_cf = (struct qstr) QSTR_INIT(buf, ret); return 0; -#else - return -EOPNOTSUPP; -#endif } static unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent d) @@ -231,7 +230,8 @@ void bch2_dirent_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c prt_printf(out, " type %s", bch2_d_type_str(d.v->d_type)); } -int bch2_dirent_init_name(struct bkey_i_dirent *dirent, +int bch2_dirent_init_name(struct bch_fs *c, + struct bkey_i_dirent *dirent, const struct bch_hash_info *hash_info, const struct qstr *name, const struct qstr *cf_name) @@ -251,7 +251,9 @@ int bch2_dirent_init_name(struct bkey_i_dirent *dirent, offsetof(struct bch_dirent, d_name) - name->len); } else { -#ifdef CONFIG_UNICODE + if (!bch2_fs_casefold_enabled(c)) + return -EOPNOTSUPP; + memcpy(&dirent->v.d_cf_name_block.d_names[0], name->name, name->len); char *cf_out = &dirent->v.d_cf_name_block.d_names[name->len]; @@ -277,9 +279,6 @@ int bch2_dirent_init_name(struct bkey_i_dirent *dirent, dirent->v.d_cf_name_block.d_cf_name_len = cpu_to_le16(cf_len); EBUG_ON(bch2_dirent_get_casefold_name(dirent_i_to_s_c(dirent)).len != cf_len); -#else - return -EOPNOTSUPP; -#endif } unsigned u64s = dirent_val_u64s(name->len, cf_len); @@ -313,7 +312,7 @@ struct bkey_i_dirent *bch2_dirent_create_key(struct btree_trans *trans, dirent->v.d_type = type; dirent->v.d_unused = 0; - int ret = bch2_dirent_init_name(dirent, hash_info, name, cf_name); + int ret = bch2_dirent_init_name(trans->c, dirent, hash_info, name, cf_name); if (ret) return ERR_PTR(ret); diff --git a/fs/bcachefs/dirent.h b/fs/bcachefs/dirent.h index 70fb0b581221..1e17199cc5c7 100644 --- a/fs/bcachefs/dirent.h +++ b/fs/bcachefs/dirent.h @@ -59,7 +59,8 @@ static inline void dirent_copy_target(struct bkey_i_dirent *dst, dst->v.d_type = src.v->d_type; } -int bch2_dirent_init_name(struct bkey_i_dirent *, +int bch2_dirent_init_name(struct bch_fs *, + struct bkey_i_dirent *, const struct bch_hash_info *, const struct qstr *, const struct qstr *); diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index ac3264134a15..acc3b7b67704 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -137,7 +137,6 @@ x(BCH_ERR_transaction_restart, transaction_restart_relock) \ x(BCH_ERR_transaction_restart, transaction_restart_relock_path) \ x(BCH_ERR_transaction_restart, transaction_restart_relock_path_intent) \ - x(BCH_ERR_transaction_restart, transaction_restart_relock_after_fill) \ x(BCH_ERR_transaction_restart, transaction_restart_too_many_iters) \ x(BCH_ERR_transaction_restart, transaction_restart_lock_node_reused) \ x(BCH_ERR_transaction_restart, transaction_restart_fill_relock) \ @@ -148,11 +147,8 @@ x(BCH_ERR_transaction_restart, transaction_restart_would_deadlock_write)\ x(BCH_ERR_transaction_restart, transaction_restart_deadlock_recursion_limit)\ x(BCH_ERR_transaction_restart, transaction_restart_upgrade) \ - x(BCH_ERR_transaction_restart, transaction_restart_key_cache_upgrade) \ x(BCH_ERR_transaction_restart, transaction_restart_key_cache_fill) \ x(BCH_ERR_transaction_restart, transaction_restart_key_cache_raced) \ - x(BCH_ERR_transaction_restart, transaction_restart_key_cache_realloced)\ - x(BCH_ERR_transaction_restart, transaction_restart_journal_preres_get) \ x(BCH_ERR_transaction_restart, transaction_restart_split_race) \ x(BCH_ERR_transaction_restart, transaction_restart_write_buffer_flush) \ x(BCH_ERR_transaction_restart, transaction_restart_nested) \ @@ -241,7 +237,6 @@ x(BCH_ERR_journal_res_blocked, journal_buf_enomem) \ x(BCH_ERR_journal_res_blocked, journal_stuck) \ x(BCH_ERR_journal_res_blocked, journal_retry_open) \ - x(BCH_ERR_journal_res_blocked, journal_preres_get_blocked) \ x(BCH_ERR_journal_res_blocked, bucket_alloc_blocked) \ x(BCH_ERR_journal_res_blocked, stripe_alloc_blocked) \ x(BCH_ERR_invalid, invalid_sb) \ @@ -287,7 +282,6 @@ x(EIO, sb_not_downgraded) \ x(EIO, btree_node_write_all_failed) \ x(EIO, btree_node_read_error) \ - x(EIO, btree_node_read_validate_error) \ x(EIO, btree_need_topology_repair) \ x(EIO, bucket_ref_update) \ x(EIO, trigger_alloc) \ diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index a8ec6aae5738..267e73d9d7e6 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -103,7 +103,7 @@ int __bch2_topology_error(struct bch_fs *c, struct printbuf *out) return bch_err_throw(c, btree_need_topology_repair); } else { return bch2_run_explicit_recovery_pass(c, out, BCH_RECOVERY_PASS_check_topology, 0) ?: - bch_err_throw(c, btree_node_read_validate_error); + bch_err_throw(c, btree_need_topology_repair); } } @@ -621,7 +621,9 @@ print: if (s) s->ret = ret; - if (trans) + if (trans && + !(flags & FSCK_ERR_NO_LOG) && + ret == -BCH_ERR_fsck_fix) ret = bch2_trans_log_str(trans, bch2_sb_error_strs[err]) ?: ret; err_unlock: mutex_unlock(&c->fsck_error_msgs_lock); @@ -631,7 +633,9 @@ err: * log_fsck_err()s: that would require us to track for every error type * which recovery pass corrects it, to get the fsck exit status correct: */ - if (bch2_err_matches(ret, BCH_ERR_fsck_fix)) { + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) { + /* nothing */ + } else if (bch2_err_matches(ret, BCH_ERR_fsck_fix)) { set_bit(BCH_FS_errors_fixed, &c->flags); } else { set_bit(BCH_FS_errors_not_fixed, &c->flags); diff --git a/fs/bcachefs/extent_update.c b/fs/bcachefs/extent_update.c index b899ee75f5b9..e76e58a568bf 100644 --- a/fs/bcachefs/extent_update.c +++ b/fs/bcachefs/extent_update.c @@ -139,6 +139,17 @@ int bch2_extent_trim_atomic(struct btree_trans *trans, if (ret) return ret; - bch2_cut_back(end, k); + /* tracepoint */ + + if (bpos_lt(end, k->k.p)) { + if (trace_extent_trim_atomic_enabled()) { + CLASS(printbuf, buf)(); + bch2_bpos_to_text(&buf, end); + prt_newline(&buf); + bch2_bkey_val_to_text(&buf, trans->c, bkey_i_to_s_c(k)); + trace_extent_trim_atomic(trans->c, buf.buf); + } + bch2_cut_back(end, k); + } return 0; } diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 036e4ad95987..83cbd77dcb9c 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -50,19 +50,17 @@ void bch2_io_failures_to_text(struct printbuf *out, struct bch_io_failures *failed) { static const char * const error_types[] = { - "io", "checksum", "ec reconstruct", NULL + "btree validate", "io", "checksum", "ec reconstruct", NULL }; for (struct bch_dev_io_failures *f = failed->devs; f < failed->devs + failed->nr; f++) { unsigned errflags = - ((!!f->failed_io) << 0) | - ((!!f->failed_csum_nr) << 1) | - ((!!f->failed_ec) << 2); - - if (!errflags) - continue; + ((!!f->failed_btree_validate) << 0) | + ((!!f->failed_io) << 1) | + ((!!f->failed_csum_nr) << 2) | + ((!!f->failed_ec) << 3); bch2_printbuf_make_room(out, 1024); out->atomic++; @@ -77,7 +75,9 @@ void bch2_io_failures_to_text(struct printbuf *out, prt_char(out, ' '); - if (is_power_of_2(errflags)) { + if (!errflags) { + prt_str(out, "no error - confused"); + } else if (is_power_of_2(errflags)) { prt_bitflags(out, error_types, errflags); prt_str(out, " error"); } else { diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 3063a8ddc2df..e54e4f255b22 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -722,7 +722,6 @@ static struct dentry *bch2_lookup(struct inode *vdir, struct dentry *dentry, if (IS_ERR(inode)) inode = NULL; -#ifdef CONFIG_UNICODE if (!inode && IS_CASEFOLDED(vdir)) { /* * Do not cache a negative dentry in casefolded directories @@ -737,7 +736,6 @@ static struct dentry *bch2_lookup(struct inode *vdir, struct dentry *dentry, */ return NULL; } -#endif return d_splice_alias(&inode->v, dentry); } @@ -1732,7 +1730,8 @@ static int bch2_fileattr_set(struct mnt_idmap *idmap, bch2_write_inode(c, inode, fssetxattr_inode_update_fn, &s, ATTR_CTIME); mutex_unlock(&inode->ei_update_lock); - return ret; + + return bch2_err_class(ret); } static const struct file_operations bch_file_operations = { @@ -2565,9 +2564,10 @@ got_sb: sb->s_shrink->seeks = 0; #ifdef CONFIG_UNICODE - sb->s_encoding = c->cf_encoding; -#endif + if (bch2_fs_casefold_enabled(c)) + sb->s_encoding = c->cf_encoding; generic_set_sb_d_ops(sb); +#endif vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_SUBVOL_INUM); ret = PTR_ERR_OR_ZERO(vinode); diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 68ed69a255e1..856eb2b41896 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -12,6 +12,7 @@ #include "fs.h" #include "fsck.h" #include "inode.h" +#include "io_misc.h" #include "keylist.h" #include "namei.h" #include "recovery_passes.h" @@ -327,7 +328,8 @@ static inline bool inode_should_reattach(struct bch_inode_unpacked *inode) (inode->bi_flags & BCH_INODE_has_child_snapshot)) return false; - return !inode->bi_dir && !(inode->bi_flags & BCH_INODE_unlinked); + return !bch2_inode_has_backpointer(inode) && + !(inode->bi_flags & BCH_INODE_unlinked); } static int maybe_delete_dirent(struct btree_trans *trans, struct bpos d_pos, u32 snapshot) @@ -372,6 +374,18 @@ static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked * if (inode->bi_subvol) { inode->bi_parent_subvol = BCACHEFS_ROOT_SUBVOL; + struct btree_iter subvol_iter; + struct bkey_i_subvolume *subvol = + bch2_bkey_get_mut_typed(trans, &subvol_iter, + BTREE_ID_subvolumes, POS(0, inode->bi_subvol), + 0, subvolume); + ret = PTR_ERR_OR_ZERO(subvol); + if (ret) + return ret; + + subvol->v.fs_path_parent = BCACHEFS_ROOT_SUBVOL; + bch2_trans_iter_exit(trans, &subvol_iter); + u64 root_inum; ret = subvol_lookup(trans, inode->bi_parent_subvol, &dirent_snapshot, &root_inum); @@ -387,6 +401,8 @@ static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked * if (ret) return ret; + bch_verbose(c, "got lostfound inum %llu", lostfound.bi_inum); + lostfound.bi_nlink += S_ISDIR(inode->bi_mode); /* ensure lost+found inode is also present in inode snapshot */ @@ -423,6 +439,16 @@ static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked * if (ret) return ret; + { + CLASS(printbuf, buf)(); + ret = bch2_inum_snapshot_to_path(trans, inode->bi_inum, + inode->bi_snapshot, NULL, &buf); + if (ret) + return ret; + + bch_info(c, "reattached at %s", buf.buf); + } + /* * Fix up inodes in child snapshots: if they should also be reattached * update the backpointer field, if they should not be we need to emit @@ -490,13 +516,21 @@ static struct bkey_s_c_dirent dirent_get_by_pos(struct btree_trans *trans, static int remove_backpointer(struct btree_trans *trans, struct bch_inode_unpacked *inode) { - if (!inode->bi_dir) + if (!bch2_inode_has_backpointer(inode)) return 0; + u32 snapshot = inode->bi_snapshot; + + if (inode->bi_parent_subvol) { + int ret = bch2_subvolume_get_snapshot(trans, inode->bi_parent_subvol, &snapshot); + if (ret) + return ret; + } + struct bch_fs *c = trans->c; struct btree_iter iter; struct bkey_s_c_dirent d = dirent_get_by_pos(trans, &iter, - SPOS(inode->bi_dir, inode->bi_dir_offset, inode->bi_snapshot)); + SPOS(inode->bi_dir, inode->bi_dir_offset, snapshot)); int ret = bkey_err(d) ?: dirent_points_to_inode(c, d, inode) ?: bch2_fsck_remove_dirent(trans, d.k->p); @@ -695,14 +729,8 @@ static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s, static bool key_visible_in_snapshot(struct bch_fs *c, struct snapshots_seen *seen, u32 id, u32 ancestor) { - ssize_t i; - EBUG_ON(id > ancestor); - /* @ancestor should be the snapshot most recently added to @seen */ - EBUG_ON(ancestor != seen->pos.snapshot); - EBUG_ON(ancestor != darray_last(seen->ids)); - if (id == ancestor) return true; @@ -718,11 +746,8 @@ static bool key_visible_in_snapshot(struct bch_fs *c, struct snapshots_seen *see * numerically, since snapshot ID lists are kept sorted, so if we find * an id that's an ancestor of @id we're done: */ - - for (i = seen->ids.nr - 2; - i >= 0 && seen->ids.data[i] >= id; - --i) - if (bch2_snapshot_is_ancestor(c, id, seen->ids.data[i])) + darray_for_each_reverse(seen->ids, i) + if (*i != ancestor && bch2_snapshot_is_ancestor(c, id, *i)) return false; return true; @@ -806,7 +831,7 @@ static int add_inode(struct bch_fs *c, struct inode_walker *w, if (!n->whiteout) { return bch2_inode_unpack(inode, &n->inode); } else { - n->inode.bi_inum = inode.k->p.inode; + n->inode.bi_inum = inode.k->p.offset; n->inode.bi_snapshot = inode.k->p.snapshot; return 0; } @@ -903,17 +928,15 @@ lookup_inode_for_snapshot(struct btree_trans *trans, struct inode_walker *w, str w->last_pos.inode, k.k->p.snapshot, i->inode.bi_snapshot, (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { - struct bch_inode_unpacked new = i->inode; - struct bkey_i whiteout; - - new.bi_snapshot = k.k->p.snapshot; - if (!i->whiteout) { + struct bch_inode_unpacked new = i->inode; + new.bi_snapshot = k.k->p.snapshot; ret = __bch2_fsck_write_inode(trans, &new); } else { + struct bkey_i whiteout; bkey_init(&whiteout.k); whiteout.k.type = KEY_TYPE_whiteout; - whiteout.k.p = SPOS(0, i->inode.bi_inum, i->inode.bi_snapshot); + whiteout.k.p = SPOS(0, i->inode.bi_inum, k.k->p.snapshot); ret = bch2_btree_insert_nonextent(trans, BTREE_ID_inodes, &whiteout, BTREE_UPDATE_internal_snapshot_node); @@ -1135,13 +1158,14 @@ static int check_inode(struct btree_trans *trans, if (ret) goto err; - if (u.bi_dir || u.bi_dir_offset) { + if (bch2_inode_has_backpointer(&u)) { ret = check_inode_dirent_inode(trans, &u, &do_update); if (ret) goto err; } - if (fsck_err_on(u.bi_dir && (u.bi_flags & BCH_INODE_unlinked), + if (fsck_err_on(bch2_inode_has_backpointer(&u) && + (u.bi_flags & BCH_INODE_unlinked), trans, inode_unlinked_but_has_dirent, "inode unlinked but has dirent\n%s", (printbuf_reset(&buf), @@ -1438,6 +1462,7 @@ static int check_key_has_inode(struct btree_trans *trans, { struct bch_fs *c = trans->c; struct printbuf buf = PRINTBUF; + struct btree_iter iter2 = {}; int ret = PTR_ERR_OR_ZERO(i); if (ret) return ret; @@ -1447,40 +1472,105 @@ static int check_key_has_inode(struct btree_trans *trans, bool have_inode = i && !i->whiteout; - if (!have_inode && (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_inodes))) { - ret = reconstruct_inode(trans, iter->btree_id, k.k->p.snapshot, k.k->p.inode) ?: - bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); - if (ret) - goto err; + if (!have_inode && (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_inodes))) + goto reconstruct; - inode->last_pos.inode--; - ret = bch_err_throw(c, transaction_restart_nested); - goto err; + if (have_inode && btree_matches_i_mode(iter->btree_id, i->inode.bi_mode)) + goto out; + + prt_printf(&buf, ", "); + + bool have_old_inode = false; + darray_for_each(inode->inodes, i2) + if (!i2->whiteout && + bch2_snapshot_is_ancestor(c, k.k->p.snapshot, i2->inode.bi_snapshot) && + btree_matches_i_mode(iter->btree_id, i2->inode.bi_mode)) { + prt_printf(&buf, "but found good inode in older snapshot\n"); + bch2_inode_unpacked_to_text(&buf, &i2->inode); + prt_newline(&buf); + have_old_inode = true; + break; + } + + struct bkey_s_c k2; + unsigned nr_keys = 0; + + prt_printf(&buf, "found keys:\n"); + + for_each_btree_key_max_norestart(trans, iter2, iter->btree_id, + SPOS(k.k->p.inode, 0, k.k->p.snapshot), + POS(k.k->p.inode, U64_MAX), + 0, k2, ret) { + nr_keys++; + if (nr_keys <= 10) { + bch2_bkey_val_to_text(&buf, c, k2); + prt_newline(&buf); + } + if (nr_keys >= 100) + break; } - if (fsck_err_on(!have_inode, - trans, key_in_missing_inode, - "key in missing inode:\n%s", - (printbuf_reset(&buf), - bch2_bkey_val_to_text(&buf, c, k), buf.buf))) - goto delete; + if (ret) + goto err; - if (fsck_err_on(have_inode && !btree_matches_i_mode(iter->btree_id, i->inode.bi_mode), - trans, key_in_wrong_inode_type, - "key for wrong inode mode %o:\n%s", - i->inode.bi_mode, - (printbuf_reset(&buf), - bch2_bkey_val_to_text(&buf, c, k), buf.buf))) - goto delete; + if (nr_keys > 100) + prt_printf(&buf, "found > %u keys for this missing inode\n", nr_keys); + else if (nr_keys > 10) + prt_printf(&buf, "found %u keys for this missing inode\n", nr_keys); + + if (!have_inode) { + if (fsck_err_on(!have_inode, + trans, key_in_missing_inode, + "key in missing inode%s", buf.buf)) { + /* + * Maybe a deletion that raced with data move, or something + * weird like that? But if we know the inode was deleted, or + * it's just a few keys, we can safely delete them. + * + * If it's many keys, we should probably recreate the inode + */ + if (have_old_inode || nr_keys <= 2) + goto delete; + else + goto reconstruct; + } + } else { + /* + * not autofix, this one would be a giant wtf - bit error in the + * inode corrupting i_mode? + * + * may want to try repairing inode instead of deleting + */ + if (fsck_err_on(!btree_matches_i_mode(iter->btree_id, i->inode.bi_mode), + trans, key_in_wrong_inode_type, + "key for wrong inode mode %o%s", + i->inode.bi_mode, buf.buf)) + goto delete; + } out: err: fsck_err: + bch2_trans_iter_exit(trans, &iter2); printbuf_exit(&buf); bch_err_fn(c, ret); return ret; delete: + /* + * XXX: print out more info + * count up extents for this inode, check if we have different inode in + * an older snapshot version, perhaps decide if we want to reconstitute + */ ret = bch2_btree_delete_at(trans, iter, BTREE_UPDATE_internal_snapshot_node); goto out; +reconstruct: + ret = reconstruct_inode(trans, iter->btree_id, k.k->p.snapshot, k.k->p.inode) ?: + bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); + if (ret) + goto err; + + inode->last_pos.inode--; + ret = bch_err_throw(c, transaction_restart_nested); + goto out; } static int check_i_sectors_notnested(struct btree_trans *trans, struct inode_walker *w) @@ -1822,20 +1912,19 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, !key_visible_in_snapshot(c, s, i->inode.bi_snapshot, k.k->p.snapshot)) continue; - if (fsck_err_on(k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9 && + u64 last_block = round_up(i->inode.bi_size, block_bytes(c)) >> 9; + + if (fsck_err_on(k.k->p.offset > last_block && !bkey_extent_is_reservation(k), trans, extent_past_end_of_inode, "extent type past end of inode %llu:%u, i_size %llu\n%s", i->inode.bi_inum, i->inode.bi_snapshot, i->inode.bi_size, (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { - struct btree_iter iter2; - - bch2_trans_copy_iter(trans, &iter2, iter); - bch2_btree_iter_set_snapshot(trans, &iter2, i->inode.bi_snapshot); - ret = bch2_btree_iter_traverse(trans, &iter2) ?: - bch2_btree_delete_at(trans, &iter2, - BTREE_UPDATE_internal_snapshot_node); - bch2_trans_iter_exit(trans, &iter2); + ret = bch2_fpunch_snapshot(trans, + SPOS(i->inode.bi_inum, + last_block, + i->inode.bi_snapshot), + POS(i->inode.bi_inum, U64_MAX)); if (ret) goto err; @@ -1949,14 +2038,22 @@ static int check_subdir_count_notnested(struct btree_trans *trans, struct inode_ continue; } - if (fsck_err_on(i->inode.bi_nlink != i->count, - trans, inode_dir_wrong_nlink, - "directory %llu:%u with wrong i_nlink: got %u, should be %llu", - w->last_pos.inode, i->inode.bi_snapshot, i->inode.bi_nlink, i->count)) { - i->inode.bi_nlink = i->count; - ret = bch2_fsck_write_inode(trans, &i->inode); - if (ret) - break; + if (i->inode.bi_nlink != i->count) { + CLASS(printbuf, buf)(); + + lockrestart_do(trans, + bch2_inum_snapshot_to_path(trans, w->last_pos.inode, + i->inode.bi_snapshot, NULL, &buf)); + + if (fsck_err_on(i->inode.bi_nlink != i->count, + trans, inode_dir_wrong_nlink, + "directory with wrong i_nlink: got %u, should be %llu\n%s", + i->inode.bi_nlink, i->count, buf.buf)) { + i->inode.bi_nlink = i->count; + ret = bch2_fsck_write_inode(trans, &i->inode); + if (ret) + break; + } } } fsck_err: @@ -2184,9 +2281,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, *hash_info = bch2_hash_info_init(c, &i->inode); dir->first_this_inode = false; -#ifdef CONFIG_UNICODE hash_info->cf_encoding = bch2_inode_casefold(c, &i->inode) ? c->cf_encoding : NULL; -#endif ret = bch2_str_hash_check_key(trans, s, &bch2_dirent_hash_desc, hash_info, iter, k, need_second_pass); @@ -2493,6 +2588,11 @@ static int check_subvol_path(struct btree_trans *trans, struct btree_iter *iter, if (k.k->type != KEY_TYPE_subvolume) return 0; + subvol_inum start = { + .subvol = k.k->p.offset, + .inum = le64_to_cpu(bkey_s_c_to_subvolume(k).v->inode), + }; + while (k.k->p.offset != BCACHEFS_ROOT_SUBVOL) { ret = darray_push(&subvol_path, k.k->p.offset); if (ret) @@ -2511,11 +2611,11 @@ static int check_subvol_path(struct btree_trans *trans, struct btree_iter *iter, if (darray_u32_has(&subvol_path, parent)) { printbuf_reset(&buf); - prt_printf(&buf, "subvolume loop:\n"); + prt_printf(&buf, "subvolume loop: "); - darray_for_each_reverse(subvol_path, i) - prt_printf(&buf, "%u ", *i); - prt_printf(&buf, "%u", parent); + ret = bch2_inum_to_path(trans, start, &buf); + if (ret) + goto err; if (fsck_err(trans, subvol_loop, "%s", buf.buf)) ret = reattach_subvol(trans, s); @@ -2559,19 +2659,13 @@ int bch2_check_subvolume_structure(struct bch_fs *c) return ret; } -struct pathbuf_entry { - u64 inum; - u32 snapshot; -}; - -typedef DARRAY(struct pathbuf_entry) pathbuf; - -static int bch2_bi_depth_renumber_one(struct btree_trans *trans, struct pathbuf_entry *p, +static int bch2_bi_depth_renumber_one(struct btree_trans *trans, + u64 inum, u32 snapshot, u32 new_depth) { struct btree_iter iter; struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, - SPOS(0, p->inum, p->snapshot), 0); + SPOS(0, inum, snapshot), 0); struct bch_inode_unpacked inode; int ret = bkey_err(k) ?: @@ -2590,14 +2684,15 @@ err: return ret; } -static int bch2_bi_depth_renumber(struct btree_trans *trans, pathbuf *path, u32 new_bi_depth) +static int bch2_bi_depth_renumber(struct btree_trans *trans, darray_u64 *path, + u32 snapshot, u32 new_bi_depth) { u32 restart_count = trans->restart_count; int ret = 0; darray_for_each_reverse(*path, i) { ret = nested_lockrestart_do(trans, - bch2_bi_depth_renumber_one(trans, i, new_bi_depth)); + bch2_bi_depth_renumber_one(trans, *i, snapshot, new_bi_depth)); bch_err_fn(trans->c, ret); if (ret) break; @@ -2608,37 +2703,36 @@ static int bch2_bi_depth_renumber(struct btree_trans *trans, pathbuf *path, u32 return ret ?: trans_was_restarted(trans, restart_count); } -static bool path_is_dup(pathbuf *p, u64 inum, u32 snapshot) -{ - darray_for_each(*p, i) - if (i->inum == inum && - i->snapshot == snapshot) - return true; - return false; -} - static int check_path_loop(struct btree_trans *trans, struct bkey_s_c inode_k) { struct bch_fs *c = trans->c; struct btree_iter inode_iter = {}; - pathbuf path = {}; + darray_u64 path = {}; struct printbuf buf = PRINTBUF; u32 snapshot = inode_k.k->p.snapshot; bool redo_bi_depth = false; u32 min_bi_depth = U32_MAX; int ret = 0; + struct bpos start = inode_k.k->p; + struct bch_inode_unpacked inode; ret = bch2_inode_unpack(inode_k, &inode); if (ret) return ret; - while (!inode.bi_subvol) { + /* + * If we're running full fsck, check_dirents() will have already ran, + * and we shouldn't see any missing backpointers here - otherwise that's + * handled separately, by check_unreachable_inodes + */ + while (!inode.bi_subvol && + bch2_inode_has_backpointer(&inode)) { struct btree_iter dirent_iter; struct bkey_s_c_dirent d; - u32 parent_snapshot = snapshot; - d = inode_get_dirent(trans, &dirent_iter, &inode, &parent_snapshot); + d = dirent_get_by_pos(trans, &dirent_iter, + SPOS(inode.bi_dir, inode.bi_dir_offset, snapshot)); ret = bkey_err(d.s_c); if (ret && !bch2_err_matches(ret, ENOENT)) goto out; @@ -2656,15 +2750,10 @@ static int check_path_loop(struct btree_trans *trans, struct bkey_s_c inode_k) bch2_trans_iter_exit(trans, &dirent_iter); - ret = darray_push(&path, ((struct pathbuf_entry) { - .inum = inode.bi_inum, - .snapshot = snapshot, - })); + ret = darray_push(&path, inode.bi_inum); if (ret) return ret; - snapshot = parent_snapshot; - bch2_trans_iter_exit(trans, &inode_iter); inode_k = bch2_bkey_get_iter(trans, &inode_iter, BTREE_ID_inodes, SPOS(0, inode.bi_dir, snapshot), 0); @@ -2686,21 +2775,28 @@ static int check_path_loop(struct btree_trans *trans, struct bkey_s_c inode_k) break; inode = parent_inode; - snapshot = inode_k.k->p.snapshot; redo_bi_depth = true; - if (path_is_dup(&path, inode.bi_inum, snapshot)) { + if (darray_find(path, inode.bi_inum)) { printbuf_reset(&buf); - prt_printf(&buf, "directory structure loop:\n"); - darray_for_each_reverse(path, i) - prt_printf(&buf, "%llu:%u ", i->inum, i->snapshot); - prt_printf(&buf, "%llu:%u", inode.bi_inum, snapshot); + prt_printf(&buf, "directory structure loop in snapshot %u: ", + snapshot); + + ret = bch2_inum_snapshot_to_path(trans, start.offset, start.snapshot, NULL, &buf); + if (ret) + goto out; + + if (c->opts.verbose) { + prt_newline(&buf); + darray_for_each(path, i) + prt_printf(&buf, "%llu ", *i); + } if (fsck_err(trans, dir_loop, "%s", buf.buf)) { ret = remove_backpointer(trans, &inode); bch_err_msg(c, ret, "removing dirent"); if (ret) - break; + goto out; ret = reattach_inode(trans, &inode); bch_err_msg(c, ret, "reattaching inode %llu", inode.bi_inum); @@ -2714,7 +2810,7 @@ static int check_path_loop(struct btree_trans *trans, struct bkey_s_c inode_k) min_bi_depth = 0; if (redo_bi_depth) - ret = bch2_bi_depth_renumber(trans, &path, min_bi_depth); + ret = bch2_bi_depth_renumber(trans, &path, snapshot, min_bi_depth); out: fsck_err: bch2_trans_iter_exit(trans, &inode_iter); @@ -2731,7 +2827,7 @@ fsck_err: int bch2_check_directory_structure(struct bch_fs *c) { int ret = bch2_trans_run(c, - for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, POS_MIN, + for_each_btree_key_reverse_commit(trans, iter, BTREE_ID_inodes, POS_MIN, BTREE_ITER_intent| BTREE_ITER_prefetch| BTREE_ITER_all_snapshots, k, diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 53e5dc1f6ac1..ef4cc7395b86 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -1265,7 +1265,14 @@ int bch2_inode_set_casefold(struct btree_trans *trans, subvol_inum inum, { struct bch_fs *c = trans->c; -#ifdef CONFIG_UNICODE +#ifndef CONFIG_UNICODE + bch_err(c, "Cannot use casefolding on a kernel without CONFIG_UNICODE"); + return -EOPNOTSUPP; +#endif + + if (c->opts.casefold_disabled) + return -EOPNOTSUPP; + int ret = 0; /* Not supported on individual files. */ if (!S_ISDIR(bi->bi_mode)) @@ -1289,10 +1296,6 @@ int bch2_inode_set_casefold(struct btree_trans *trans, subvol_inum inum, bi->bi_fields_set |= BIT(Inode_opt_casefold); return bch2_maybe_propagate_has_case_insensitive(trans, inum, bi); -#else - bch_err(c, "Cannot use casefolding on a kernel without CONFIG_UNICODE"); - return -EOPNOTSUPP; -#endif } static noinline int __bch2_inode_rm_snapshot(struct btree_trans *trans, u64 inum, u32 snapshot) diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h index 82cec2836cbd..b8ec3e628d90 100644 --- a/fs/bcachefs/inode.h +++ b/fs/bcachefs/inode.h @@ -254,6 +254,11 @@ static inline bool bch2_inode_casefold(struct bch_fs *c, const struct bch_inode_ : c->opts.casefold; } +static inline bool bch2_inode_has_backpointer(const struct bch_inode_unpacked *bi) +{ + return bi->bi_dir || bi->bi_dir_offset; +} + /* i_nlink: */ static inline unsigned nlink_bias(umode_t mode) diff --git a/fs/bcachefs/io_misc.c b/fs/bcachefs/io_misc.c index bf72b1d2e2cb..07023667a475 100644 --- a/fs/bcachefs/io_misc.c +++ b/fs/bcachefs/io_misc.c @@ -135,6 +135,33 @@ err_noprint: return ret; } +/* For fsck */ +int bch2_fpunch_snapshot(struct btree_trans *trans, struct bpos start, struct bpos end) +{ + u32 restart_count = trans->restart_count; + struct bch_fs *c = trans->c; + struct disk_reservation disk_res = bch2_disk_reservation_init(c, 0); + unsigned max_sectors = KEY_SIZE_MAX & (~0 << c->block_bits); + struct bkey_i delete; + + int ret = for_each_btree_key_max_commit(trans, iter, BTREE_ID_extents, + start, end, 0, k, + &disk_res, NULL, BCH_TRANS_COMMIT_no_enospc, ({ + bkey_init(&delete.k); + delete.k.p = iter.pos; + + /* create the biggest key we can */ + bch2_key_resize(&delete.k, max_sectors); + bch2_cut_back(end, &delete); + + bch2_extent_trim_atomic(trans, &iter, &delete) ?: + bch2_trans_update(trans, &iter, &delete, 0); + })); + + bch2_disk_reservation_put(c, &disk_res); + return ret ?: trans_was_restarted(trans, restart_count); +} + /* * Returns -BCH_ERR_transacton_restart if we had to drop locks: */ diff --git a/fs/bcachefs/io_misc.h b/fs/bcachefs/io_misc.h index 9cb44a7c43c1..b93e4d4b3c0c 100644 --- a/fs/bcachefs/io_misc.h +++ b/fs/bcachefs/io_misc.h @@ -5,6 +5,8 @@ int bch2_extent_fallocate(struct btree_trans *, subvol_inum, struct btree_iter *, u64, struct bch_io_opts, s64 *, struct write_point_specifier); + +int bch2_fpunch_snapshot(struct btree_trans *, struct bpos, struct bpos); int bch2_fpunch_at(struct btree_trans *, struct btree_iter *, subvol_inum, u64, s64 *); int bch2_fpunch(struct bch_fs *c, subvol_inum, u64, u64, s64 *); diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index 04bbdcf58e40..cd184b219a65 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -1491,7 +1491,12 @@ void bch2_read_bio_to_text(struct printbuf *out, struct bch_read_bio *rbio) prt_printf(out, "have_ioref:\t%u\n", rbio->have_ioref); prt_printf(out, "narrow_crcs:\t%u\n", rbio->narrow_crcs); prt_printf(out, "context:\t%u\n", rbio->context); - prt_printf(out, "ret:\t%s\n", bch2_err_str(rbio->ret)); + + int ret = READ_ONCE(rbio->ret); + if (ret < 0) + prt_printf(out, "ret:\t%s\n", bch2_err_str(ret)); + else + prt_printf(out, "ret:\t%i\n", ret); prt_printf(out, "flags:\t"); bch2_prt_bitflags(out, bch2_read_bio_flags, rbio->flags); diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index dda802a656cf..f22b05e02c1e 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -1283,7 +1283,7 @@ static int bch2_set_nr_journal_buckets_loop(struct bch_fs *c, struct bch_dev *ca ret = 0; /* wait and retry */ bch2_disk_reservation_put(c, &disk_res); - closure_sync(&cl); + bch2_wait_on_allocator(c, &cl); } return ret; @@ -1474,14 +1474,13 @@ void bch2_fs_journal_stop(struct journal *j) clear_bit(JOURNAL_running, &j->flags); } -int bch2_fs_journal_start(struct journal *j, u64 cur_seq) +int bch2_fs_journal_start(struct journal *j, u64 last_seq, u64 cur_seq) { struct bch_fs *c = container_of(j, struct bch_fs, journal); struct journal_entry_pin_list *p; struct journal_replay *i, **_i; struct genradix_iter iter; bool had_entries = false; - u64 last_seq = cur_seq, nr, seq; /* * @@ -1495,17 +1494,11 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq) return -EINVAL; } - genradix_for_each_reverse(&c->journal_entries, iter, _i) { - i = *_i; - - if (journal_replay_ignore(i)) - continue; + /* Clean filesystem? */ + if (!last_seq) + last_seq = cur_seq; - last_seq = le64_to_cpu(i->j.last_seq); - break; - } - - nr = cur_seq - last_seq; + u64 nr = cur_seq - last_seq; /* * Extra fudge factor, in case we crashed when the journal pin fifo was @@ -1532,6 +1525,7 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq) j->pin.back = cur_seq; atomic64_set(&j->seq, cur_seq - 1); + u64 seq; fifo_for_each_entry_ptr(p, &j->pin, seq) journal_pin_list_init(p, 1); diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h index 83734fe4331f..977907038d98 100644 --- a/fs/bcachefs/journal.h +++ b/fs/bcachefs/journal.h @@ -453,7 +453,7 @@ int bch2_fs_journal_alloc(struct bch_fs *); void bch2_dev_journal_stop(struct journal *, struct bch_dev *); void bch2_fs_journal_stop(struct journal *); -int bch2_fs_journal_start(struct journal *, u64); +int bch2_fs_journal_start(struct journal *, u64, u64); void bch2_journal_set_replay_done(struct journal *); void bch2_dev_journal_exit(struct bch_dev *); diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 0b15d71a8d2d..dd3f3434c1b0 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -160,6 +160,9 @@ static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca, struct printbuf buf = PRINTBUF; int ret = JOURNAL_ENTRY_ADD_OK; + if (last_seq && c->opts.journal_rewind) + last_seq = min(last_seq, c->opts.journal_rewind); + if (!c->journal.oldest_seq_found_ondisk || le64_to_cpu(j->seq) < c->journal.oldest_seq_found_ondisk) c->journal.oldest_seq_found_ondisk = le64_to_cpu(j->seq); @@ -1430,11 +1433,21 @@ int bch2_journal_read(struct bch_fs *c, printbuf_reset(&buf); prt_printf(&buf, "journal read done, replaying entries %llu-%llu", *last_seq, *blacklist_seq - 1); + + /* + * Drop blacklisted entries and entries older than last_seq (or start of + * journal rewind: + */ + u64 drop_before = *last_seq; + if (c->opts.journal_rewind) { + drop_before = min(drop_before, c->opts.journal_rewind); + prt_printf(&buf, " (rewinding from %llu)", c->opts.journal_rewind); + } + + *last_seq = drop_before; if (*start_seq != *blacklist_seq) prt_printf(&buf, " (unflushed %llu-%llu)", *blacklist_seq, *start_seq - 1); bch_info(c, "%s", buf.buf); - - /* Drop blacklisted entries and entries older than last_seq: */ genradix_for_each(&c->journal_entries, radix_iter, _i) { i = *_i; @@ -1442,7 +1455,7 @@ int bch2_journal_read(struct bch_fs *c, continue; seq = le64_to_cpu(i->j.seq); - if (seq < *last_seq) { + if (seq < drop_before) { journal_replay_free(c, i, false); continue; } @@ -1455,7 +1468,7 @@ int bch2_journal_read(struct bch_fs *c, } } - ret = bch2_journal_check_for_missing(c, *last_seq, *blacklist_seq - 1); + ret = bch2_journal_check_for_missing(c, drop_before, *blacklist_seq - 1); if (ret) goto err; @@ -1703,9 +1716,10 @@ static CLOSURE_CALLBACK(journal_write_done) bch2_log_msg_start(c, &buf); if (err == -BCH_ERR_journal_write_err) - prt_printf(&buf, "unable to write journal to sufficient devices"); + prt_printf(&buf, "unable to write journal to sufficient devices\n"); else - prt_printf(&buf, "journal write error marking replicas: %s", bch2_err_str(err)); + prt_printf(&buf, "journal write error marking replicas: %s\n", + bch2_err_str(err)); bch2_fs_emergency_read_only2(c, &buf); diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c index cd6201741c59..0042d43b8e57 100644 --- a/fs/bcachefs/journal_reclaim.c +++ b/fs/bcachefs/journal_reclaim.c @@ -170,6 +170,12 @@ static struct journal_space __journal_space_available(struct journal *j, unsigne return (struct journal_space) { 0, 0 }; /* + * It's possible for bucket size to be misaligned w.r.t. the filesystem + * block size: + */ + min_bucket_size = round_down(min_bucket_size, block_sectors(c)); + + /* * We sorted largest to smallest, and we want the smallest out of the * @nr_devs_want largest devices: */ diff --git a/fs/bcachefs/namei.c b/fs/bcachefs/namei.c index 779c22eb3979..c3f87c59922d 100644 --- a/fs/bcachefs/namei.c +++ b/fs/bcachefs/namei.c @@ -625,14 +625,26 @@ static int __bch2_inum_to_path(struct btree_trans *trans, { unsigned orig_pos = path->pos; int ret = 0; + DARRAY(subvol_inum) inums = {}; + + if (!snapshot) { + ret = bch2_subvolume_get_snapshot(trans, subvol, &snapshot); + if (ret) + goto disconnected; + } while (true) { - if (!snapshot) { - ret = bch2_subvolume_get_snapshot(trans, subvol, &snapshot); - if (ret) - goto disconnected; + subvol_inum n = (subvol_inum) { subvol ?: snapshot, inum }; + + if (darray_find_p(inums, i, i->subvol == n.subvol && i->inum == n.inum)) { + prt_str_reversed(path, "(loop)"); + break; } + ret = darray_push(&inums, n); + if (ret) + goto err; + struct bch_inode_unpacked inode; ret = bch2_inode_find_by_inum_snapshot(trans, inum, snapshot, &inode, 0); if (ret) @@ -650,7 +662,9 @@ static int __bch2_inum_to_path(struct btree_trans *trans, inum = inode.bi_dir; if (inode.bi_parent_subvol) { subvol = inode.bi_parent_subvol; - snapshot = 0; + ret = bch2_subvolume_get_snapshot(trans, inode.bi_parent_subvol, &snapshot); + if (ret) + goto disconnected; } struct btree_iter d_iter; @@ -662,6 +676,7 @@ static int __bch2_inum_to_path(struct btree_trans *trans, goto disconnected; struct qstr dirent_name = bch2_dirent_get_name(d); + prt_bytes_reversed(path, dirent_name.name, dirent_name.len); prt_char(path, '/'); @@ -677,8 +692,10 @@ out: goto err; reverse_bytes(path->buf + orig_pos, path->pos - orig_pos); + darray_exit(&inums); return 0; err: + darray_exit(&inums); return ret; disconnected: if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) @@ -717,8 +734,7 @@ static int bch2_check_dirent_inode_dirent(struct btree_trans *trans, if (inode_points_to_dirent(target, d)) return 0; - if (!target->bi_dir && - !target->bi_dir_offset) { + if (!bch2_inode_has_backpointer(target)) { fsck_err_on(S_ISDIR(target->bi_mode), trans, inode_dir_missing_backpointer, "directory with missing backpointer\n%s", diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h index 2a02606254b3..63f8e254495c 100644 --- a/fs/bcachefs/opts.h +++ b/fs/bcachefs/opts.h @@ -234,6 +234,11 @@ enum fsck_err_opts { OPT_BOOL(), \ BCH_SB_CASEFOLD, false, \ NULL, "Dirent lookups are casefolded") \ + x(casefold_disabled, u8, \ + OPT_FS|OPT_MOUNT, \ + OPT_BOOL(), \ + BCH2_NO_SB_OPT, false, \ + NULL, "Disable casefolding filesystem wide") \ x(inodes_32bit, u8, \ OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ OPT_BOOL(), \ @@ -379,6 +384,11 @@ enum fsck_err_opts { OPT_BOOL(), \ BCH2_NO_SB_OPT, false, \ NULL, "Exit recovery immediately prior to journal replay")\ + x(journal_rewind, u64, \ + OPT_FS|OPT_MOUNT, \ + OPT_UINT(0, U64_MAX), \ + BCH2_NO_SB_OPT, 0, \ + NULL, "Rewind journal") \ x(recovery_passes, u64, \ OPT_FS|OPT_MOUNT, \ OPT_BITFIELD(bch2_recovery_passes), \ diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 0b21fa6ff062..c94debb12d2f 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -273,24 +273,35 @@ static int bch2_journal_replay_key(struct btree_trans *trans, goto out; struct btree_path *path = btree_iter_path(trans, &iter); - if (unlikely(!btree_path_node(path, k->level) && - !k->allocated)) { + if (unlikely(!btree_path_node(path, k->level))) { struct bch_fs *c = trans->c; + CLASS(printbuf, buf)(); + prt_str(&buf, "btree="); + bch2_btree_id_to_text(&buf, k->btree_id); + prt_printf(&buf, " level=%u ", k->level); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k->k)); + if (!(c->recovery.passes_complete & (BIT_ULL(BCH_RECOVERY_PASS_scan_for_btree_nodes)| BIT_ULL(BCH_RECOVERY_PASS_check_topology)))) { - bch_err(c, "have key in journal replay for btree depth that does not exist, confused"); + bch_err(c, "have key in journal replay for btree depth that does not exist, confused\n%s", + buf.buf); ret = -EINVAL; } -#if 0 + + if (!k->allocated) { + bch_notice(c, "dropping key in journal replay for depth that does not exist because we're recovering from scan\n%s", + buf.buf); + k->overwritten = true; + goto out; + } + bch2_trans_iter_exit(trans, &iter); bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p, BTREE_MAX_DEPTH, 0, iter_flags); ret = bch2_btree_iter_traverse(trans, &iter) ?: bch2_btree_increase_depth(trans, iter.path, 0) ?: -BCH_ERR_transaction_restart_nested; -#endif - k->overwritten = true; goto out; } @@ -607,6 +618,7 @@ static int read_btree_roots(struct bch_fs *c) buf.buf, bch2_err_str(ret))) { if (btree_id_is_alloc(i)) r->error = 0; + ret = 0; } } @@ -692,7 +704,7 @@ static bool check_version_upgrade(struct bch_fs *c) ret = true; } - if (new_version > c->sb.version_incompat && + if (new_version > c->sb.version_incompat_allowed && c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible) { struct printbuf buf = PRINTBUF; @@ -757,6 +769,21 @@ int bch2_fs_recovery(struct bch_fs *c) if (c->opts.nochanges) c->opts.read_only = true; + if (c->opts.journal_rewind) { + bch_info(c, "rewinding journal, fsck required"); + c->opts.fsck = true; + } + + if (go_rw_in_recovery(c)) { + /* + * start workqueues/kworkers early - kthread creation checks for + * pending signals, which is _very_ annoying + */ + ret = bch2_fs_init_rw(c); + if (ret) + goto err; + } + mutex_lock(&c->sb_lock); struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); bool write_sb = false; @@ -965,7 +992,7 @@ use_clean: ret = bch2_journal_log_msg(c, "starting journal at entry %llu, replaying %llu-%llu", journal_seq, last_seq, blacklist_seq - 1) ?: - bch2_fs_journal_start(&c->journal, journal_seq); + bch2_fs_journal_start(&c->journal, last_seq, journal_seq); if (ret) goto err; @@ -1126,7 +1153,7 @@ fsck_err: struct printbuf buf = PRINTBUF; bch2_log_msg_start(c, &buf); - prt_printf(&buf, "error in recovery: %s", bch2_err_str(ret)); + prt_printf(&buf, "error in recovery: %s\n", bch2_err_str(ret)); bch2_fs_emergency_read_only2(c, &buf); bch2_print_str(c, KERN_ERR, buf.buf); @@ -1181,7 +1208,7 @@ int bch2_fs_initialize(struct bch_fs *c) * journal_res_get() will crash if called before this has * set up the journal.pin FIFO and journal.cur pointer: */ - ret = bch2_fs_journal_start(&c->journal, 1); + ret = bch2_fs_journal_start(&c->journal, 1, 1); if (ret) goto err; diff --git a/fs/bcachefs/recovery_passes.c b/fs/bcachefs/recovery_passes.c index 35ac0d64d73a..6a039e011064 100644 --- a/fs/bcachefs/recovery_passes.c +++ b/fs/bcachefs/recovery_passes.c @@ -217,11 +217,7 @@ static int bch2_set_may_go_rw(struct bch_fs *c) set_bit(BCH_FS_may_go_rw, &c->flags); - if (keys->nr || - !c->opts.read_only || - !c->sb.clean || - c->opts.recovery_passes || - (c->opts.fsck && !(c->sb.features & BIT_ULL(BCH_FEATURE_no_alloc_info)))) { + if (go_rw_in_recovery(c)) { if (c->sb.features & BIT_ULL(BCH_FEATURE_no_alloc_info)) { bch_info(c, "mounting a filesystem with no alloc info read-write; will recreate"); bch2_reconstruct_alloc(c); @@ -317,6 +313,9 @@ static bool recovery_pass_needs_set(struct bch_fs *c, */ bool in_recovery = test_bit(BCH_FS_in_recovery, &c->flags); bool persistent = !in_recovery || !(*flags & RUN_RECOVERY_PASS_nopersistent); + bool rewind = in_recovery && + r->curr_pass > pass && + !(r->passes_complete & BIT_ULL(pass)); if (persistent ? !(c->sb.recovery_passes_required & BIT_ULL(pass)) @@ -327,6 +326,9 @@ static bool recovery_pass_needs_set(struct bch_fs *c, (r->passes_ratelimiting & BIT_ULL(pass))) return true; + if (rewind) + return true; + return false; } @@ -341,7 +343,6 @@ int __bch2_run_explicit_recovery_pass(struct bch_fs *c, struct bch_fs_recovery *r = &c->recovery; int ret = 0; - lockdep_assert_held(&c->sb_lock); bch2_printbuf_make_room(out, 1024); @@ -359,7 +360,7 @@ int __bch2_run_explicit_recovery_pass(struct bch_fs *c, !(r->passes_complete & BIT_ULL(pass)); bool ratelimit = flags & RUN_RECOVERY_PASS_ratelimit; - if (!(in_recovery && (flags & RUN_RECOVERY_PASS_nopersistent))) { + if (!(flags & RUN_RECOVERY_PASS_nopersistent)) { struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); __set_bit_le64(bch2_recovery_pass_to_stable(pass), ext->recovery_passes_required); } @@ -412,10 +413,8 @@ int bch2_run_explicit_recovery_pass(struct bch_fs *c, { int ret = 0; - scoped_guard(mutex, &c->sb_lock) { - if (!recovery_pass_needs_set(c, pass, &flags)) - return 0; - + if (recovery_pass_needs_set(c, pass, &flags)) { + guard(mutex)(&c->sb_lock); ret = __bch2_run_explicit_recovery_pass(c, out, pass, flags); bch2_write_super(c); } diff --git a/fs/bcachefs/recovery_passes.h b/fs/bcachefs/recovery_passes.h index 260571c7105e..2117f0ce1922 100644 --- a/fs/bcachefs/recovery_passes.h +++ b/fs/bcachefs/recovery_passes.h @@ -17,6 +17,15 @@ enum bch_run_recovery_pass_flags { RUN_RECOVERY_PASS_ratelimit = BIT(1), }; +static inline bool go_rw_in_recovery(struct bch_fs *c) +{ + return (c->journal_keys.nr || + !c->opts.read_only || + !c->sb.clean || + c->opts.recovery_passes || + (c->opts.fsck && !(c->sb.features & BIT_ULL(BCH_FEATURE_no_alloc_info)))); +} + int bch2_run_print_explicit_recovery_pass(struct bch_fs *, enum bch_recovery_pass); int __bch2_run_explicit_recovery_pass(struct bch_fs *, struct printbuf *, diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c index a535abd44df3..92b90cfe622b 100644 --- a/fs/bcachefs/reflink.c +++ b/fs/bcachefs/reflink.c @@ -64,6 +64,9 @@ void bch2_reflink_p_to_text(struct printbuf *out, struct bch_fs *c, REFLINK_P_IDX(p.v), le32_to_cpu(p.v->front_pad), le32_to_cpu(p.v->back_pad)); + + if (REFLINK_P_ERROR(p.v)) + prt_str(out, " error"); } bool bch2_reflink_p_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r) @@ -269,13 +272,12 @@ struct bkey_s_c bch2_lookup_indirect_extent(struct btree_trans *trans, return k; if (unlikely(!bkey_extent_is_reflink_data(k.k))) { - unsigned size = min((u64) k.k->size, - REFLINK_P_IDX(p.v) + p.k->size + le32_to_cpu(p.v->back_pad) - - reflink_offset); - bch2_key_resize(&iter->k, size); + u64 missing_end = min(k.k->p.offset, + REFLINK_P_IDX(p.v) + p.k->size + le32_to_cpu(p.v->back_pad)); + BUG_ON(reflink_offset == missing_end); int ret = bch2_indirect_extent_missing_error(trans, p, reflink_offset, - k.k->p.offset, should_commit); + missing_end, should_commit); if (ret) { bch2_trans_iter_exit(trans, iter); return bkey_s_c_err(ret); diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index d06e73884871..d154b7651d28 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -3,9 +3,10 @@ #define _BCACHEFS_SB_ERRORS_FORMAT_H enum bch_fsck_flags { - FSCK_CAN_FIX = 1 << 0, - FSCK_CAN_IGNORE = 1 << 1, - FSCK_AUTOFIX = 1 << 2, + FSCK_CAN_FIX = BIT(0), + FSCK_CAN_IGNORE = BIT(1), + FSCK_AUTOFIX = BIT(2), + FSCK_ERR_NO_LOG = BIT(3), }; #define BCH_SB_ERRS() \ @@ -217,7 +218,7 @@ enum bch_fsck_flags { x(inode_str_hash_invalid, 194, 0) \ x(inode_v3_fields_start_bad, 195, 0) \ x(inode_snapshot_mismatch, 196, 0) \ - x(snapshot_key_missing_inode_snapshot, 314, 0) \ + x(snapshot_key_missing_inode_snapshot, 314, FSCK_AUTOFIX) \ x(inode_unlinked_but_clean, 197, 0) \ x(inode_unlinked_but_nlink_nonzero, 198, 0) \ x(inode_unlinked_and_not_open, 281, 0) \ @@ -251,20 +252,20 @@ enum bch_fsck_flags { x(deleted_inode_not_unlinked, 214, FSCK_AUTOFIX) \ x(deleted_inode_has_child_snapshots, 288, FSCK_AUTOFIX) \ x(extent_overlapping, 215, 0) \ - x(key_in_missing_inode, 216, 0) \ + x(key_in_missing_inode, 216, FSCK_AUTOFIX) \ x(key_in_wrong_inode_type, 217, 0) \ - x(extent_past_end_of_inode, 218, 0) \ + x(extent_past_end_of_inode, 218, FSCK_AUTOFIX) \ x(dirent_empty_name, 219, 0) \ x(dirent_val_too_big, 220, 0) \ x(dirent_name_too_long, 221, 0) \ x(dirent_name_embedded_nul, 222, 0) \ x(dirent_name_dot_or_dotdot, 223, 0) \ x(dirent_name_has_slash, 224, 0) \ - x(dirent_d_type_wrong, 225, 0) \ + x(dirent_d_type_wrong, 225, FSCK_AUTOFIX) \ x(inode_bi_parent_wrong, 226, 0) \ x(dirent_in_missing_dir_inode, 227, 0) \ x(dirent_in_non_dir_inode, 228, 0) \ - x(dirent_to_missing_inode, 229, 0) \ + x(dirent_to_missing_inode, 229, FSCK_AUTOFIX) \ x(dirent_to_overwritten_inode, 302, 0) \ x(dirent_to_missing_subvol, 230, 0) \ x(dirent_to_itself, 231, 0) \ @@ -300,7 +301,7 @@ enum bch_fsck_flags { x(btree_node_bkey_bad_u64s, 260, 0) \ x(btree_node_topology_empty_interior_node, 261, 0) \ x(btree_ptr_v2_min_key_bad, 262, 0) \ - x(btree_root_unreadable_and_scan_found_nothing, 263, FSCK_AUTOFIX) \ + x(btree_root_unreadable_and_scan_found_nothing, 263, 0) \ x(snapshot_node_missing, 264, FSCK_AUTOFIX) \ x(dup_backpointer_to_bad_csum_extent, 265, 0) \ x(btree_bitmap_not_marked, 266, FSCK_AUTOFIX) \ @@ -313,7 +314,7 @@ enum bch_fsck_flags { x(accounting_mismatch, 272, FSCK_AUTOFIX) \ x(accounting_replicas_not_marked, 273, 0) \ x(accounting_to_invalid_device, 289, 0) \ - x(invalid_btree_id, 274, 0) \ + x(invalid_btree_id, 274, FSCK_AUTOFIX) \ x(alloc_key_io_time_bad, 275, 0) \ x(alloc_key_fragmentation_lru_wrong, 276, FSCK_AUTOFIX) \ x(accounting_key_junk_at_end, 277, FSCK_AUTOFIX) \ diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index 23a332d76b32..4c43d2a2c1f5 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -135,7 +135,9 @@ static bool test_ancestor_bitmap(struct snapshot_table *t, u32 id, u32 ancestor) bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor) { - bool ret; +#ifdef CONFIG_BCACHEFS_DEBUG + u32 orig_id = id; +#endif guard(rcu)(); struct snapshot_table *t = rcu_dereference(c->snapshots); @@ -147,11 +149,11 @@ bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor) while (id && id < ancestor - IS_ANCESTOR_BITMAP) id = get_ancestor_below(t, id, ancestor); - ret = id && id < ancestor + bool ret = id && id < ancestor ? test_ancestor_bitmap(t, id, ancestor) : id == ancestor; - EBUG_ON(ret != __bch2_snapshot_is_ancestor_early(t, id, ancestor)); + EBUG_ON(ret != __bch2_snapshot_is_ancestor_early(t, orig_id, ancestor)); return ret; } @@ -869,7 +871,8 @@ static int check_snapshot_exists(struct btree_trans *trans, u32 id) for_each_btree_key_norestart(trans, iter, BTREE_ID_snapshot_trees, POS_MIN, 0, k, ret) { - if (le32_to_cpu(bkey_s_c_to_snapshot_tree(k).v->root_snapshot) == id) { + if (k.k->type == KEY_TYPE_snapshot_tree && + le32_to_cpu(bkey_s_c_to_snapshot_tree(k).v->root_snapshot) == id) { tree_id = k.k->p.offset; break; } @@ -897,7 +900,8 @@ static int check_snapshot_exists(struct btree_trans *trans, u32 id) for_each_btree_key_norestart(trans, iter, BTREE_ID_subvolumes, POS_MIN, 0, k, ret) { - if (le32_to_cpu(bkey_s_c_to_subvolume(k).v->snapshot) == id) { + if (k.k->type == KEY_TYPE_subvolume && + le32_to_cpu(bkey_s_c_to_subvolume(k).v->snapshot) == id) { snapshot->v.subvol = cpu_to_le32(k.k->p.offset); SET_BCH_SNAPSHOT_SUBVOL(&snapshot->v, true); break; diff --git a/fs/bcachefs/str_hash.c b/fs/bcachefs/str_hash.c index 71b735a85026..3e9f59226bdf 100644 --- a/fs/bcachefs/str_hash.c +++ b/fs/bcachefs/str_hash.c @@ -38,6 +38,7 @@ static int bch2_fsck_rename_dirent(struct btree_trans *trans, struct bkey_s_c_dirent old, bool *updated_before_k_pos) { + struct bch_fs *c = trans->c; struct qstr old_name = bch2_dirent_get_name(old); struct bkey_i_dirent *new = bch2_trans_kmalloc(trans, BKEY_U64s_MAX * sizeof(u64)); int ret = PTR_ERR_OR_ZERO(new); @@ -60,7 +61,7 @@ static int bch2_fsck_rename_dirent(struct btree_trans *trans, sprintf(renamed_buf, "%.*s.fsck_renamed-%u", old_name.len, old_name.name, i)); - ret = bch2_dirent_init_name(new, hash_info, &renamed_name, NULL); + ret = bch2_dirent_init_name(c, new, hash_info, &renamed_name, NULL); if (ret) return ret; @@ -79,7 +80,7 @@ static int bch2_fsck_rename_dirent(struct btree_trans *trans, } ret = ret ?: bch2_fsck_update_backpointers(trans, s, desc, hash_info, &new->k_i); - bch_err_fn(trans->c, ret); + bch_err_fn(c, ret); return ret; } diff --git a/fs/bcachefs/str_hash.h b/fs/bcachefs/str_hash.h index 79d51aef70aa..8979ac2d7a3b 100644 --- a/fs/bcachefs/str_hash.h +++ b/fs/bcachefs/str_hash.h @@ -48,9 +48,7 @@ bch2_hash_info_init(struct bch_fs *c, const struct bch_inode_unpacked *bi) struct bch_hash_info info = { .inum_snapshot = bi->bi_snapshot, .type = INODE_STR_HASH(bi), -#ifdef CONFIG_UNICODE .cf_encoding = bch2_inode_casefold(c, bi) ? c->cf_encoding : NULL, -#endif .siphash_key = { .k0 = bi->bi_hash_seed } }; diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index a5b97c9c5163..c46b1053a02c 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -210,7 +210,6 @@ static int bch2_dev_alloc(struct bch_fs *, unsigned); static int bch2_dev_sysfs_online(struct bch_fs *, struct bch_dev *); static void bch2_dev_io_ref_stop(struct bch_dev *, int); static void __bch2_dev_read_only(struct bch_fs *, struct bch_dev *); -static int bch2_fs_init_rw(struct bch_fs *); struct bch_fs *bch2_dev_to_fs(dev_t dev) { @@ -794,7 +793,7 @@ err: return ret; } -static int bch2_fs_init_rw(struct bch_fs *c) +int bch2_fs_init_rw(struct bch_fs *c) { if (test_bit(BCH_FS_rw_init_done, &c->flags)) return 0; @@ -1015,16 +1014,28 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts *opts, if (ret) goto err; + if (go_rw_in_recovery(c)) { + /* + * start workqueues/kworkers early - kthread creation checks for + * pending signals, which is _very_ annoying + */ + ret = bch2_fs_init_rw(c); + if (ret) + goto err; + } + #ifdef CONFIG_UNICODE - /* Default encoding until we can potentially have more as an option. */ - c->cf_encoding = utf8_load(BCH_FS_DEFAULT_UTF8_ENCODING); - if (IS_ERR(c->cf_encoding)) { - printk(KERN_ERR "Cannot load UTF-8 encoding for filesystem. Version: %u.%u.%u", - unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING), - unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING), - unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING)); - ret = -EINVAL; - goto err; + if (bch2_fs_casefold_enabled(c)) { + /* Default encoding until we can potentially have more as an option. */ + c->cf_encoding = utf8_load(BCH_FS_DEFAULT_UTF8_ENCODING); + if (IS_ERR(c->cf_encoding)) { + printk(KERN_ERR "Cannot load UTF-8 encoding for filesystem. Version: %u.%u.%u", + unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING), + unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING), + unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING)); + ret = -EINVAL; + goto err; + } } #else if (c->sb.features & BIT_ULL(BCH_FEATURE_casefolding)) { @@ -1151,12 +1162,11 @@ int bch2_fs_start(struct bch_fs *c) print_mount_opts(c); -#ifdef CONFIG_UNICODE - bch_info(c, "Using encoding defined by superblock: utf8-%u.%u.%u", - unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING), - unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING), - unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING)); -#endif + if (c->cf_encoding) + bch_info(c, "Using encoding defined by superblock: utf8-%u.%u.%u", + unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING), + unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING), + unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING)); if (!bch2_fs_may_start(c)) return bch_err_throw(c, insufficient_devices_to_start); diff --git a/fs/bcachefs/super.h b/fs/bcachefs/super.h index dc52f06cb2b9..e90bab9afe78 100644 --- a/fs/bcachefs/super.h +++ b/fs/bcachefs/super.h @@ -46,6 +46,7 @@ void __bch2_fs_stop(struct bch_fs *); void bch2_fs_free(struct bch_fs *); void bch2_fs_stop(struct bch_fs *); +int bch2_fs_init_rw(struct bch_fs *); int bch2_fs_start(struct bch_fs *); struct bch_fs *bch2_fs_open(darray_const_str *, struct bch_opts *); diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h index dc09532796af..9c5a9c551f03 100644 --- a/fs/bcachefs/trace.h +++ b/fs/bcachefs/trace.h @@ -1080,34 +1080,14 @@ TRACE_EVENT(trans_blocked_journal_reclaim, __entry->must_wait) ); -TRACE_EVENT(trans_restart_journal_preres_get, - TP_PROTO(struct btree_trans *trans, - unsigned long caller_ip, - unsigned flags), - TP_ARGS(trans, caller_ip, flags), - - TP_STRUCT__entry( - __array(char, trans_fn, 32 ) - __field(unsigned long, caller_ip ) - __field(unsigned, flags ) - ), - - TP_fast_assign( - strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn)); - __entry->caller_ip = caller_ip; - __entry->flags = flags; - ), - - TP_printk("%s %pS %x", __entry->trans_fn, - (void *) __entry->caller_ip, - __entry->flags) -); - +#if 0 +/* todo: bring back dynamic fault injection */ DEFINE_EVENT(transaction_event, trans_restart_fault_inject, TP_PROTO(struct btree_trans *trans, unsigned long caller_ip), TP_ARGS(trans, caller_ip) ); +#endif DEFINE_EVENT(transaction_event, trans_traverse_all, TP_PROTO(struct btree_trans *trans, @@ -1195,19 +1175,6 @@ DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_parent_for_fill, TP_ARGS(trans, caller_ip, path) ); -DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_after_fill, - TP_PROTO(struct btree_trans *trans, - unsigned long caller_ip, - struct btree_path *path), - TP_ARGS(trans, caller_ip, path) -); - -DEFINE_EVENT(transaction_event, trans_restart_key_cache_upgrade, - TP_PROTO(struct btree_trans *trans, - unsigned long caller_ip), - TP_ARGS(trans, caller_ip) -); - DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_key_cache_fill, TP_PROTO(struct btree_trans *trans, unsigned long caller_ip, @@ -1229,13 +1196,6 @@ DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_path_intent, TP_ARGS(trans, caller_ip, path) ); -DEFINE_EVENT(transaction_restart_iter, trans_restart_traverse, - TP_PROTO(struct btree_trans *trans, - unsigned long caller_ip, - struct btree_path *path), - TP_ARGS(trans, caller_ip, path) -); - DEFINE_EVENT(transaction_restart_iter, trans_restart_memory_allocation_failure, TP_PROTO(struct btree_trans *trans, unsigned long caller_ip, @@ -1294,44 +1254,6 @@ TRACE_EVENT(trans_restart_mem_realloced, __entry->bytes) ); -TRACE_EVENT(trans_restart_key_cache_key_realloced, - TP_PROTO(struct btree_trans *trans, - unsigned long caller_ip, - struct btree_path *path, - unsigned old_u64s, - unsigned new_u64s), - TP_ARGS(trans, caller_ip, path, old_u64s, new_u64s), - - TP_STRUCT__entry( - __array(char, trans_fn, 32 ) - __field(unsigned long, caller_ip ) - __field(enum btree_id, btree_id ) - TRACE_BPOS_entries(pos) - __field(u32, old_u64s ) - __field(u32, new_u64s ) - ), - - TP_fast_assign( - strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn)); - __entry->caller_ip = caller_ip; - - __entry->btree_id = path->btree_id; - TRACE_BPOS_assign(pos, path->pos); - __entry->old_u64s = old_u64s; - __entry->new_u64s = new_u64s; - ), - - TP_printk("%s %pS btree %s pos %llu:%llu:%u old_u64s %u new_u64s %u", - __entry->trans_fn, - (void *) __entry->caller_ip, - bch2_btree_id_str(__entry->btree_id), - __entry->pos_inode, - __entry->pos_offset, - __entry->pos_snapshot, - __entry->old_u64s, - __entry->new_u64s) -); - DEFINE_EVENT(transaction_event, trans_restart_write_buffer_flush, TP_PROTO(struct btree_trans *trans, unsigned long caller_ip), @@ -1490,6 +1412,31 @@ DEFINE_EVENT(fs_str, io_move_evacuate_bucket, TP_ARGS(c, str) ); +DEFINE_EVENT(fs_str, extent_trim_atomic, + TP_PROTO(struct bch_fs *c, const char *str), + TP_ARGS(c, str) +); + +DEFINE_EVENT(fs_str, btree_iter_peek_slot, + TP_PROTO(struct bch_fs *c, const char *str), + TP_ARGS(c, str) +); + +DEFINE_EVENT(fs_str, __btree_iter_peek, + TP_PROTO(struct bch_fs *c, const char *str), + TP_ARGS(c, str) +); + +DEFINE_EVENT(fs_str, btree_iter_peek_max, + TP_PROTO(struct bch_fs *c, const char *str), + TP_ARGS(c, str) +); + +DEFINE_EVENT(fs_str, btree_iter_peek_prev_min, + TP_PROTO(struct bch_fs *c, const char *str), + TP_ARGS(c, str) +); + #ifdef CONFIG_BCACHEFS_PATH_TRACEPOINTS TRACE_EVENT(update_by_path, @@ -1902,21 +1849,6 @@ TRACE_EVENT(btree_path_free, __entry->dup_locked) ); -TRACE_EVENT(btree_path_free_trans_begin, - TP_PROTO(btree_path_idx_t path), - TP_ARGS(path), - - TP_STRUCT__entry( - __field(btree_path_idx_t, idx ) - ), - - TP_fast_assign( - __entry->idx = path; - ), - - TP_printk(" path %3u", __entry->idx) -); - #else /* CONFIG_BCACHEFS_PATH_TRACEPOINTS */ #ifndef _TRACE_BCACHEFS_H @@ -1934,7 +1866,6 @@ static inline void trace_btree_path_traverse_start(struct btree_trans *trans, st static inline void trace_btree_path_traverse_end(struct btree_trans *trans, struct btree_path *path) {} static inline void trace_btree_path_set_pos(struct btree_trans *trans, struct btree_path *path, struct bpos *new_pos) {} static inline void trace_btree_path_free(struct btree_trans *trans, btree_path_idx_t path, struct btree_path *dup) {} -static inline void trace_btree_path_free_trans_begin(btree_path_idx_t path) {} #endif #endif /* CONFIG_BCACHEFS_PATH_TRACEPOINTS */ diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h index 9de356bcb411..aa176cc9a324 100644 --- a/fs/btrfs/block-group.h +++ b/fs/btrfs/block-group.h @@ -83,6 +83,8 @@ enum btrfs_block_group_flags { BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, /* Does the block group need to be added to the free space tree? */ BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, + /* Set after we add a new block group to the free space tree. */ + BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, /* Indicate that the block group is placed on a sequential zone */ BLOCK_GROUP_FLAG_SEQUENTIAL_ZONE, /* diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index c7cc24a5dd5e..8c597fa60523 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1377,7 +1377,10 @@ static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root, void btrfs_assert_delayed_root_empty(struct btrfs_fs_info *fs_info) { - WARN_ON(btrfs_first_delayed_node(fs_info->delayed_root)); + struct btrfs_delayed_node *node = btrfs_first_delayed_node(fs_info->delayed_root); + + if (WARN_ON(node)) + refcount_dec(&node->refs); } static bool could_end_wait(struct btrfs_delayed_root *delayed_root, int seq) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 1beb9458f622..0d6ad7512f21 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1835,6 +1835,8 @@ void btrfs_put_root(struct btrfs_root *root) if (refcount_dec_and_test(&root->refs)) { if (WARN_ON(!xa_empty(&root->inodes))) xa_destroy(&root->inodes); + if (WARN_ON(!xa_empty(&root->delayed_nodes))) + xa_destroy(&root->delayed_nodes); WARN_ON(test_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state)); if (root->anon_dev) free_anon_bdev(root->anon_dev); @@ -2156,8 +2158,7 @@ static int load_global_roots_objectid(struct btrfs_root *tree_root, found = true; root = read_tree_root_path(tree_root, path, &key); if (IS_ERR(root)) { - if (!btrfs_test_opt(fs_info, IGNOREBADROOTS)) - ret = PTR_ERR(root); + ret = PTR_ERR(root); break; } set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state); @@ -4310,8 +4311,8 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info) * * So wait for all ongoing ordered extents to complete and then run * delayed iputs. This works because once we reach this point no one - * can either create new ordered extents nor create delayed iputs - * through some other means. + * can create new ordered extents, but delayed iputs can still be added + * by a reclaim worker (see comments further below). * * Also note that btrfs_wait_ordered_roots() is not safe here, because * it waits for BTRFS_ORDERED_COMPLETE to be set on an ordered extent, @@ -4322,15 +4323,29 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info) btrfs_flush_workqueue(fs_info->endio_write_workers); /* Ordered extents for free space inodes. */ btrfs_flush_workqueue(fs_info->endio_freespace_worker); + /* + * Run delayed iputs in case an async reclaim worker is waiting for them + * to be run as mentioned above. + */ btrfs_run_delayed_iputs(fs_info); - /* There should be no more workload to generate new delayed iputs. */ - set_bit(BTRFS_FS_STATE_NO_DELAYED_IPUT, &fs_info->fs_state); cancel_work_sync(&fs_info->async_reclaim_work); cancel_work_sync(&fs_info->async_data_reclaim_work); cancel_work_sync(&fs_info->preempt_reclaim_work); cancel_work_sync(&fs_info->em_shrinker_work); + /* + * Run delayed iputs again because an async reclaim worker may have + * added new ones if it was flushing delalloc: + * + * shrink_delalloc() -> btrfs_start_delalloc_roots() -> + * start_delalloc_inodes() -> btrfs_add_delayed_iput() + */ + btrfs_run_delayed_iputs(fs_info); + + /* There should be no more workload to generate new delayed iputs. */ + set_bit(BTRFS_FS_STATE_NO_DELAYED_IPUT, &fs_info->fs_state); + /* Cancel or finish ongoing discard work */ btrfs_discard_cleanup(fs_info); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 849199768664..1dc931c4937f 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4312,7 +4312,6 @@ static int try_release_subpage_extent_buffer(struct folio *folio) spin_unlock(&eb->refs_lock); continue; } - xa_unlock_irq(&fs_info->buffer_tree); /* * If tree ref isn't set then we know the ref on this eb is a @@ -4329,6 +4328,7 @@ static int try_release_subpage_extent_buffer(struct folio *folio) * check the folio private at the end. And * release_extent_buffer() will release the refs_lock. */ + xa_unlock_irq(&fs_info->buffer_tree); release_extent_buffer(eb); xa_lock_irq(&fs_info->buffer_tree); } diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c index 0c573d46639a..a83c268f7f87 100644 --- a/fs/btrfs/free-space-tree.c +++ b/fs/btrfs/free-space-tree.c @@ -1115,11 +1115,21 @@ static int populate_free_space_tree(struct btrfs_trans_handle *trans, ret = btrfs_search_slot_for_read(extent_root, &key, path, 1, 0); if (ret < 0) goto out_locked; - ASSERT(ret == 0); + /* + * If ret is 1 (no key found), it means this is an empty block group, + * without any extents allocated from it and there's no block group + * item (key BTRFS_BLOCK_GROUP_ITEM_KEY) located in the extent tree + * because we are using the block group tree feature, so block group + * items are stored in the block group tree. It also means there are no + * extents allocated for block groups with a start offset beyond this + * block group's end offset (this is the last, highest, block group). + */ + if (!btrfs_fs_compat_ro(trans->fs_info, BLOCK_GROUP_TREE)) + ASSERT(ret == 0); start = block_group->start; end = block_group->start + block_group->length; - while (1) { + while (ret == 0) { btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); if (key.type == BTRFS_EXTENT_ITEM_KEY || @@ -1149,8 +1159,6 @@ static int populate_free_space_tree(struct btrfs_trans_handle *trans, ret = btrfs_next_item(extent_root, path); if (ret < 0) goto out_locked; - if (ret) - break; } if (start < end) { ret = __add_to_free_space_tree(trans, block_group, path2, @@ -1233,6 +1241,7 @@ static int clear_free_space_tree(struct btrfs_trans_handle *trans, { BTRFS_PATH_AUTO_FREE(path); struct btrfs_key key; + struct rb_node *node; int nr; int ret; @@ -1261,6 +1270,16 @@ static int clear_free_space_tree(struct btrfs_trans_handle *trans, btrfs_release_path(path); } + node = rb_first_cached(&trans->fs_info->block_group_cache_tree); + while (node) { + struct btrfs_block_group *bg; + + bg = rb_entry(node, struct btrfs_block_group, cache_node); + clear_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, &bg->runtime_flags); + node = rb_next(node); + cond_resched(); + } + return 0; } @@ -1350,12 +1369,18 @@ int btrfs_rebuild_free_space_tree(struct btrfs_fs_info *fs_info) block_group = rb_entry(node, struct btrfs_block_group, cache_node); + + if (test_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, + &block_group->runtime_flags)) + goto next; + ret = populate_free_space_tree(trans, block_group); if (ret) { btrfs_abort_transaction(trans, ret); btrfs_end_transaction(trans); return ret; } +next: if (btrfs_should_end_transaction(trans)) { btrfs_end_transaction(trans); trans = btrfs_start_transaction(free_space_root, 1); @@ -1382,6 +1407,29 @@ static int __add_block_group_free_space(struct btrfs_trans_handle *trans, clear_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags); + /* + * While rebuilding the free space tree we may allocate new metadata + * block groups while modifying the free space tree. + * + * Because during the rebuild (at btrfs_rebuild_free_space_tree()) we + * can use multiple transactions, every time btrfs_end_transaction() is + * called at btrfs_rebuild_free_space_tree() we finish the creation of + * new block groups by calling btrfs_create_pending_block_groups(), and + * that in turn calls us, through add_block_group_free_space(), to add + * a free space info item and a free space extent item for the block + * group. + * + * Then later btrfs_rebuild_free_space_tree() may find such new block + * groups and processes them with populate_free_space_tree(), which can + * fail with EEXIST since there are already items for the block group in + * the free space tree. Notice that we say "may find" because a new + * block group may be added to the block groups rbtree in a node before + * or after the block group currently being processed by the rebuild + * process. So signal the rebuild process to skip such new block groups + * if it finds them. + */ + set_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, &block_group->runtime_flags); + ret = add_new_free_space_info(trans, block_group, path); if (ret) return ret; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index c0c778243bf1..fc66872b4c74 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4250,9 +4250,9 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans, ret = btrfs_del_inode_ref(trans, root, name, ino, dir_ino, &index); if (ret) { - btrfs_info(fs_info, - "failed to delete reference to %.*s, inode %llu parent %llu", - name->len, name->name, ino, dir_ino); + btrfs_crit(fs_info, + "failed to delete reference to %.*s, root %llu inode %llu parent %llu", + name->len, name->name, btrfs_root_id(root), ino, dir_ino); btrfs_abort_transaction(trans, ret); goto err; } @@ -4710,7 +4710,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; int ret = 0; struct btrfs_trans_handle *trans; - u64 last_unlink_trans; struct fscrypt_name fname; if (inode->i_size > BTRFS_EMPTY_DIR_SIZE) @@ -4736,6 +4735,23 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) goto out_notrans; } + /* + * Propagate the last_unlink_trans value of the deleted dir to its + * parent directory. This is to prevent an unrecoverable log tree in the + * case we do something like this: + * 1) create dir foo + * 2) create snapshot under dir foo + * 3) delete the snapshot + * 4) rmdir foo + * 5) mkdir foo + * 6) fsync foo or some file inside foo + * + * This is because we can't unlink other roots when replaying the dir + * deletes for directory foo. + */ + if (BTRFS_I(inode)->last_unlink_trans >= trans->transid) + btrfs_record_snapshot_destroy(trans, BTRFS_I(dir)); + if (unlikely(btrfs_ino(BTRFS_I(inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { ret = btrfs_unlink_subvol(trans, BTRFS_I(dir), dentry); goto out; @@ -4745,27 +4761,11 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) if (ret) goto out; - last_unlink_trans = BTRFS_I(inode)->last_unlink_trans; - /* now the directory is empty */ ret = btrfs_unlink_inode(trans, BTRFS_I(dir), BTRFS_I(d_inode(dentry)), &fname.disk_name); - if (!ret) { + if (!ret) btrfs_i_size_write(BTRFS_I(inode), 0); - /* - * Propagate the last_unlink_trans value of the deleted dir to - * its parent directory. This is to prevent an unrecoverable - * log tree in the case we do something like this: - * 1) create dir foo - * 2) create snapshot under dir foo - * 3) delete the snapshot - * 4) rmdir foo - * 5) mkdir foo - * 6) fsync foo or some file inside foo - */ - if (last_unlink_trans >= trans->transid) - BTRFS_I(dir)->last_unlink_trans = last_unlink_trans; - } out: btrfs_end_transaction(trans); out_notrans: @@ -8059,6 +8059,7 @@ static int btrfs_rename_exchange(struct inode *old_dir, int ret; int ret2; bool need_abort = false; + bool logs_pinned = false; struct fscrypt_name old_fname, new_fname; struct fscrypt_str *old_name, *new_name; @@ -8182,6 +8183,31 @@ static int btrfs_rename_exchange(struct inode *old_dir, inode_inc_iversion(new_inode); simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry); + if (old_ino != BTRFS_FIRST_FREE_OBJECTID && + new_ino != BTRFS_FIRST_FREE_OBJECTID) { + /* + * If we are renaming in the same directory (and it's not for + * root entries) pin the log early to prevent any concurrent + * task from logging the directory after we removed the old + * entries and before we add the new entries, otherwise that + * task can sync a log without any entry for the inodes we are + * renaming and therefore replaying that log, if a power failure + * happens after syncing the log, would result in deleting the + * inodes. + * + * If the rename affects two different directories, we want to + * make sure the that there's no log commit that contains + * updates for only one of the directories but not for the + * other. + * + * If we are renaming an entry for a root, we don't care about + * log updates since we called btrfs_set_log_full_commit(). + */ + btrfs_pin_log_trans(root); + btrfs_pin_log_trans(dest); + logs_pinned = true; + } + if (old_dentry->d_parent != new_dentry->d_parent) { btrfs_record_unlink_dir(trans, BTRFS_I(old_dir), BTRFS_I(old_inode), true); @@ -8253,30 +8279,23 @@ static int btrfs_rename_exchange(struct inode *old_dir, BTRFS_I(new_inode)->dir_index = new_idx; /* - * Now pin the logs of the roots. We do it to ensure that no other task - * can sync the logs while we are in progress with the rename, because - * that could result in an inconsistency in case any of the inodes that - * are part of this rename operation were logged before. + * Do the log updates for all inodes. + * + * If either entry is for a root we don't need to update the logs since + * we've called btrfs_set_log_full_commit() before. */ - if (old_ino != BTRFS_FIRST_FREE_OBJECTID) - btrfs_pin_log_trans(root); - if (new_ino != BTRFS_FIRST_FREE_OBJECTID) - btrfs_pin_log_trans(dest); - - /* Do the log updates for all inodes. */ - if (old_ino != BTRFS_FIRST_FREE_OBJECTID) + if (logs_pinned) { btrfs_log_new_name(trans, old_dentry, BTRFS_I(old_dir), old_rename_ctx.index, new_dentry->d_parent); - if (new_ino != BTRFS_FIRST_FREE_OBJECTID) btrfs_log_new_name(trans, new_dentry, BTRFS_I(new_dir), new_rename_ctx.index, old_dentry->d_parent); + } - /* Now unpin the logs. */ - if (old_ino != BTRFS_FIRST_FREE_OBJECTID) +out_fail: + if (logs_pinned) { btrfs_end_log_trans(root); - if (new_ino != BTRFS_FIRST_FREE_OBJECTID) btrfs_end_log_trans(dest); -out_fail: + } ret2 = btrfs_end_transaction(trans); ret = ret ? ret : ret2; out_notrans: @@ -8326,6 +8345,7 @@ static int btrfs_rename(struct mnt_idmap *idmap, int ret2; u64 old_ino = btrfs_ino(BTRFS_I(old_inode)); struct fscrypt_name old_fname, new_fname; + bool logs_pinned = false; if (btrfs_ino(BTRFS_I(new_dir)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) return -EPERM; @@ -8460,6 +8480,29 @@ static int btrfs_rename(struct mnt_idmap *idmap, inode_inc_iversion(old_inode); simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry); + if (old_ino != BTRFS_FIRST_FREE_OBJECTID) { + /* + * If we are renaming in the same directory (and it's not a + * root entry) pin the log to prevent any concurrent task from + * logging the directory after we removed the old entry and + * before we add the new entry, otherwise that task can sync + * a log without any entry for the inode we are renaming and + * therefore replaying that log, if a power failure happens + * after syncing the log, would result in deleting the inode. + * + * If the rename affects two different directories, we want to + * make sure the that there's no log commit that contains + * updates for only one of the directories but not for the + * other. + * + * If we are renaming an entry for a root, we don't care about + * log updates since we called btrfs_set_log_full_commit(). + */ + btrfs_pin_log_trans(root); + btrfs_pin_log_trans(dest); + logs_pinned = true; + } + if (old_dentry->d_parent != new_dentry->d_parent) btrfs_record_unlink_dir(trans, BTRFS_I(old_dir), BTRFS_I(old_inode), true); @@ -8524,7 +8567,7 @@ static int btrfs_rename(struct mnt_idmap *idmap, if (old_inode->i_nlink == 1) BTRFS_I(old_inode)->dir_index = index; - if (old_ino != BTRFS_FIRST_FREE_OBJECTID) + if (logs_pinned) btrfs_log_new_name(trans, old_dentry, BTRFS_I(old_dir), rename_ctx.index, new_dentry->d_parent); @@ -8540,6 +8583,10 @@ static int btrfs_rename(struct mnt_idmap *idmap, } } out_fail: + if (logs_pinned) { + btrfs_end_log_trans(root); + btrfs_end_log_trans(dest); + } ret2 = btrfs_end_transaction(trans); ret = ret ? ret : ret2; out_notrans: diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 913acef3f0a9..8a60983a697c 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -666,14 +666,14 @@ static noinline int create_subvol(struct mnt_idmap *idmap, goto out; } + btrfs_record_new_subvolume(trans, BTRFS_I(dir)); + ret = btrfs_create_new_inode(trans, &new_inode_args); if (ret) { btrfs_abort_transaction(trans, ret); goto out; } - btrfs_record_new_subvolume(trans, BTRFS_I(dir)); - d_instantiate_new(dentry, new_inode_args.inode); new_inode_args.inode = NULL; @@ -3139,7 +3139,7 @@ static long btrfs_ioctl_scrub(struct file *file, void __user *arg) return -EPERM; if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) { - btrfs_err(fs_info, "scrub is not supported on extent tree v2 yet"); + btrfs_err(fs_info, "scrub: extent tree v2 not yet supported"); return -EINVAL; } diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index ce36fafc771e..7cd5e76a783c 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -557,7 +557,7 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 num_bytes, */ for (i = 0; i < ipath->fspath->elem_cnt; ++i) btrfs_warn_in_rcu(fs_info, -"%s at logical %llu on dev %s, physical %llu, root %llu, inode %llu, offset %llu, length %u, links %u (path: %s)", +"scrub: %s at logical %llu on dev %s, physical %llu root %llu inode %llu offset %llu length %u links %u (path: %s)", swarn->errstr, swarn->logical, btrfs_dev_name(swarn->dev), swarn->physical, @@ -571,7 +571,7 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 num_bytes, err: btrfs_warn_in_rcu(fs_info, - "%s at logical %llu on dev %s, physical %llu, root %llu, inode %llu, offset %llu: path resolving failed with ret=%d", + "scrub: %s at logical %llu on dev %s, physical %llu root %llu inode %llu offset %llu: path resolving failed with ret=%d", swarn->errstr, swarn->logical, btrfs_dev_name(swarn->dev), swarn->physical, @@ -596,7 +596,7 @@ static void scrub_print_common_warning(const char *errstr, struct btrfs_device * /* Super block error, no need to search extent tree. */ if (is_super) { - btrfs_warn_in_rcu(fs_info, "%s on device %s, physical %llu", + btrfs_warn_in_rcu(fs_info, "scrub: %s on device %s, physical %llu", errstr, btrfs_dev_name(dev), physical); return; } @@ -631,14 +631,14 @@ static void scrub_print_common_warning(const char *errstr, struct btrfs_device * &ref_level); if (ret < 0) { btrfs_warn(fs_info, - "failed to resolve tree backref for logical %llu: %d", - swarn.logical, ret); + "scrub: failed to resolve tree backref for logical %llu: %d", + swarn.logical, ret); break; } if (ret > 0) break; btrfs_warn_in_rcu(fs_info, -"%s at logical %llu on dev %s, physical %llu: metadata %s (level %d) in tree %llu", +"scrub: %s at logical %llu on dev %s, physical %llu: metadata %s (level %d) in tree %llu", errstr, swarn.logical, btrfs_dev_name(dev), swarn.physical, (ref_level ? "node" : "leaf"), ref_level, ref_root); @@ -718,7 +718,7 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr scrub_bitmap_set_meta_error(stripe, sector_nr, sectors_per_tree); scrub_bitmap_set_error(stripe, sector_nr, sectors_per_tree); btrfs_warn_rl(fs_info, - "tree block %llu mirror %u has bad bytenr, has %llu want %llu", + "scrub: tree block %llu mirror %u has bad bytenr, has %llu want %llu", logical, stripe->mirror_num, btrfs_stack_header_bytenr(header), logical); return; @@ -728,7 +728,7 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr scrub_bitmap_set_meta_error(stripe, sector_nr, sectors_per_tree); scrub_bitmap_set_error(stripe, sector_nr, sectors_per_tree); btrfs_warn_rl(fs_info, - "tree block %llu mirror %u has bad fsid, has %pU want %pU", + "scrub: tree block %llu mirror %u has bad fsid, has %pU want %pU", logical, stripe->mirror_num, header->fsid, fs_info->fs_devices->fsid); return; @@ -738,7 +738,7 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr scrub_bitmap_set_meta_error(stripe, sector_nr, sectors_per_tree); scrub_bitmap_set_error(stripe, sector_nr, sectors_per_tree); btrfs_warn_rl(fs_info, - "tree block %llu mirror %u has bad chunk tree uuid, has %pU want %pU", + "scrub: tree block %llu mirror %u has bad chunk tree uuid, has %pU want %pU", logical, stripe->mirror_num, header->chunk_tree_uuid, fs_info->chunk_tree_uuid); return; @@ -760,7 +760,7 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr scrub_bitmap_set_meta_error(stripe, sector_nr, sectors_per_tree); scrub_bitmap_set_error(stripe, sector_nr, sectors_per_tree); btrfs_warn_rl(fs_info, - "tree block %llu mirror %u has bad csum, has " CSUM_FMT " want " CSUM_FMT, +"scrub: tree block %llu mirror %u has bad csum, has " CSUM_FMT " want " CSUM_FMT, logical, stripe->mirror_num, CSUM_FMT_VALUE(fs_info->csum_size, on_disk_csum), CSUM_FMT_VALUE(fs_info->csum_size, calculated_csum)); @@ -771,7 +771,7 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr scrub_bitmap_set_meta_gen_error(stripe, sector_nr, sectors_per_tree); scrub_bitmap_set_error(stripe, sector_nr, sectors_per_tree); btrfs_warn_rl(fs_info, - "tree block %llu mirror %u has bad generation, has %llu want %llu", + "scrub: tree block %llu mirror %u has bad generation, has %llu want %llu", logical, stripe->mirror_num, btrfs_stack_header_generation(header), stripe->sectors[sector_nr].generation); @@ -814,7 +814,7 @@ static void scrub_verify_one_sector(struct scrub_stripe *stripe, int sector_nr) */ if (unlikely(sector_nr + sectors_per_tree > stripe->nr_sectors)) { btrfs_warn_rl(fs_info, - "tree block at %llu crosses stripe boundary %llu", + "scrub: tree block at %llu crosses stripe boundary %llu", stripe->logical + (sector_nr << fs_info->sectorsize_bits), stripe->logical); @@ -1046,12 +1046,12 @@ skip: if (repaired) { if (dev) { btrfs_err_rl_in_rcu(fs_info, - "fixed up error at logical %llu on dev %s physical %llu", + "scrub: fixed up error at logical %llu on dev %s physical %llu", stripe->logical, btrfs_dev_name(dev), physical); } else { btrfs_err_rl_in_rcu(fs_info, - "fixed up error at logical %llu on mirror %u", + "scrub: fixed up error at logical %llu on mirror %u", stripe->logical, stripe->mirror_num); } continue; @@ -1060,12 +1060,12 @@ skip: /* The remaining are all for unrepaired. */ if (dev) { btrfs_err_rl_in_rcu(fs_info, - "unable to fixup (regular) error at logical %llu on dev %s physical %llu", +"scrub: unable to fixup (regular) error at logical %llu on dev %s physical %llu", stripe->logical, btrfs_dev_name(dev), physical); } else { btrfs_err_rl_in_rcu(fs_info, - "unable to fixup (regular) error at logical %llu on mirror %u", + "scrub: unable to fixup (regular) error at logical %llu on mirror %u", stripe->logical, stripe->mirror_num); } @@ -1593,8 +1593,7 @@ static int sync_write_pointer_for_zoned(struct scrub_ctx *sctx, u64 logical, physical, sctx->write_pointer); if (ret) - btrfs_err(fs_info, - "zoned: failed to recover write pointer"); + btrfs_err(fs_info, "scrub: zoned: failed to recover write pointer"); } mutex_unlock(&sctx->wr_lock); btrfs_dev_clear_zone_empty(sctx->wr_tgtdev, physical); @@ -1658,7 +1657,7 @@ static int scrub_find_fill_first_stripe(struct btrfs_block_group *bg, int ret; if (unlikely(!extent_root || !csum_root)) { - btrfs_err(fs_info, "no valid extent or csum root for scrub"); + btrfs_err(fs_info, "scrub: no valid extent or csum root found"); return -EUCLEAN; } memset(stripe->sectors, 0, sizeof(struct scrub_sector_verification) * @@ -1907,7 +1906,7 @@ static bool stripe_has_metadata_error(struct scrub_stripe *stripe) struct btrfs_fs_info *fs_info = stripe->bg->fs_info; btrfs_err(fs_info, - "stripe %llu has unrepaired metadata sector at %llu", + "scrub: stripe %llu has unrepaired metadata sector at logical %llu", stripe->logical, stripe->logical + (i << fs_info->sectorsize_bits)); return true; @@ -2167,7 +2166,7 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx, bitmap_and(&error, &error, &has_extent, stripe->nr_sectors); if (!bitmap_empty(&error, stripe->nr_sectors)) { btrfs_err(fs_info, -"unrepaired sectors detected, full stripe %llu data stripe %u errors %*pbl", +"scrub: unrepaired sectors detected, full stripe %llu data stripe %u errors %*pbl", full_stripe_start, i, stripe->nr_sectors, &error); ret = -EIO; @@ -2789,14 +2788,14 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, ro_set = 0; } else if (ret == -ETXTBSY) { btrfs_warn(fs_info, - "skipping scrub of block group %llu due to active swapfile", + "scrub: skipping scrub of block group %llu due to active swapfile", cache->start); scrub_pause_off(fs_info); ret = 0; goto skip_unfreeze; } else { - btrfs_warn(fs_info, - "failed setting block group ro: %d", ret); + btrfs_warn(fs_info, "scrub: failed setting block group ro: %d", + ret); btrfs_unfreeze_block_group(cache); btrfs_put_block_group(cache); scrub_pause_off(fs_info); @@ -2892,13 +2891,13 @@ static int scrub_one_super(struct scrub_ctx *sctx, struct btrfs_device *dev, ret = btrfs_check_super_csum(fs_info, sb); if (ret != 0) { btrfs_err_rl(fs_info, - "super block at physical %llu devid %llu has bad csum", + "scrub: super block at physical %llu devid %llu has bad csum", physical, dev->devid); return -EIO; } if (btrfs_super_generation(sb) != generation) { btrfs_err_rl(fs_info, -"super block at physical %llu devid %llu has bad generation %llu expect %llu", +"scrub: super block at physical %llu devid %llu has bad generation %llu expect %llu", physical, dev->devid, btrfs_super_generation(sb), generation); return -EUCLEAN; @@ -3059,7 +3058,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) { mutex_unlock(&fs_info->fs_devices->device_list_mutex); btrfs_err_in_rcu(fs_info, - "scrub on devid %llu: filesystem on %s is not writable", + "scrub: devid %llu: filesystem on %s is not writable", devid, btrfs_dev_name(dev)); ret = -EROFS; goto out; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 97e933113b82..cea8a7e9d6d3 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -143,6 +143,9 @@ static struct btrfs_inode *btrfs_iget_logging(u64 objectid, struct btrfs_root *r unsigned int nofs_flag; struct btrfs_inode *inode; + /* Only meant to be called for subvolume roots and not for log roots. */ + ASSERT(is_fstree(btrfs_root_id(root))); + /* * We're holding a transaction handle whether we are logging or * replaying a log tree, so we must make sure NOFS semantics apply @@ -604,21 +607,6 @@ static int read_alloc_one_name(struct extent_buffer *eb, void *start, int len, return 0; } -/* - * simple helper to read an inode off the disk from a given root - * This can only be called for subvolume roots and not for the log - */ -static noinline struct btrfs_inode *read_one_inode(struct btrfs_root *root, - u64 objectid) -{ - struct btrfs_inode *inode; - - inode = btrfs_iget_logging(objectid, root); - if (IS_ERR(inode)) - return NULL; - return inode; -} - /* replays a single extent in 'eb' at 'slot' with 'key' into the * subvolume 'root'. path is released on entry and should be released * on exit. @@ -668,15 +656,15 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, extent_end = ALIGN(start + size, fs_info->sectorsize); } else { - ret = 0; - goto out; + btrfs_err(fs_info, + "unexpected extent type=%d root=%llu inode=%llu offset=%llu", + found_type, btrfs_root_id(root), key->objectid, key->offset); + return -EUCLEAN; } - inode = read_one_inode(root, key->objectid); - if (!inode) { - ret = -EIO; - goto out; - } + inode = btrfs_iget_logging(key->objectid, root); + if (IS_ERR(inode)) + return PTR_ERR(inode); /* * first check to see if we already have this extent in the @@ -948,9 +936,10 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans, btrfs_release_path(path); - inode = read_one_inode(root, location.objectid); - if (!inode) { - ret = -EIO; + inode = btrfs_iget_logging(location.objectid, root); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); + inode = NULL; goto out; } @@ -961,7 +950,8 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans, ret = unlink_inode_for_log_replay(trans, dir, inode, &name); out: kfree(name.name); - iput(&inode->vfs_inode); + if (inode) + iput(&inode->vfs_inode); return ret; } @@ -1072,7 +1062,9 @@ again: search_key.type = BTRFS_INODE_REF_KEY; search_key.offset = parent_objectid; ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); - if (ret == 0) { + if (ret < 0) { + return ret; + } else if (ret == 0) { struct btrfs_inode_ref *victim_ref; unsigned long ptr; unsigned long ptr_end; @@ -1145,13 +1137,13 @@ again: struct fscrypt_str victim_name; extref = (struct btrfs_inode_extref *)(base + cur_offset); + victim_name.len = btrfs_inode_extref_name_len(leaf, extref); if (btrfs_inode_extref_parent(leaf, extref) != parent_objectid) goto next; ret = read_alloc_one_name(leaf, &extref->name, - btrfs_inode_extref_name_len(leaf, extref), - &victim_name); + victim_name.len, &victim_name); if (ret) return ret; @@ -1166,18 +1158,18 @@ again: kfree(victim_name.name); return ret; } else if (!ret) { - ret = -ENOENT; - victim_parent = read_one_inode(root, - parent_objectid); - if (victim_parent) { + victim_parent = btrfs_iget_logging(parent_objectid, root); + if (IS_ERR(victim_parent)) { + ret = PTR_ERR(victim_parent); + } else { inc_nlink(&inode->vfs_inode); btrfs_release_path(path); ret = unlink_inode_for_log_replay(trans, victim_parent, inode, &victim_name); + iput(&victim_parent->vfs_inode); } - iput(&victim_parent->vfs_inode); kfree(victim_name.name); if (ret) return ret; @@ -1314,9 +1306,9 @@ again: struct btrfs_inode *dir; btrfs_release_path(path); - dir = read_one_inode(root, parent_id); - if (!dir) { - ret = -ENOENT; + dir = btrfs_iget_logging(parent_id, root); + if (IS_ERR(dir)) { + ret = PTR_ERR(dir); kfree(name.name); goto out; } @@ -1388,15 +1380,17 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, * copy the back ref in. The link count fixup code will take * care of the rest */ - dir = read_one_inode(root, parent_objectid); - if (!dir) { - ret = -ENOENT; + dir = btrfs_iget_logging(parent_objectid, root); + if (IS_ERR(dir)) { + ret = PTR_ERR(dir); + dir = NULL; goto out; } - inode = read_one_inode(root, inode_objectid); - if (!inode) { - ret = -EIO; + inode = btrfs_iget_logging(inode_objectid, root); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); + inode = NULL; goto out; } @@ -1408,11 +1402,13 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, * parent object can change from one array * item to another. */ - if (!dir) - dir = read_one_inode(root, parent_objectid); if (!dir) { - ret = -ENOENT; - goto out; + dir = btrfs_iget_logging(parent_objectid, root); + if (IS_ERR(dir)) { + ret = PTR_ERR(dir); + dir = NULL; + goto out; + } } } else { ret = ref_get_fields(eb, ref_ptr, &name, &ref_index); @@ -1681,9 +1677,9 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans, break; btrfs_release_path(path); - inode = read_one_inode(root, key.offset); - if (!inode) { - ret = -EIO; + inode = btrfs_iget_logging(key.offset, root); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); break; } @@ -1719,9 +1715,9 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans, struct btrfs_inode *inode; struct inode *vfs_inode; - inode = read_one_inode(root, objectid); - if (!inode) - return -EIO; + inode = btrfs_iget_logging(objectid, root); + if (IS_ERR(inode)) + return PTR_ERR(inode); vfs_inode = &inode->vfs_inode; key.objectid = BTRFS_TREE_LOG_FIXUP_OBJECTID; @@ -1760,14 +1756,14 @@ static noinline int insert_one_name(struct btrfs_trans_handle *trans, struct btrfs_inode *dir; int ret; - inode = read_one_inode(root, location->objectid); - if (!inode) - return -ENOENT; + inode = btrfs_iget_logging(location->objectid, root); + if (IS_ERR(inode)) + return PTR_ERR(inode); - dir = read_one_inode(root, dirid); - if (!dir) { + dir = btrfs_iget_logging(dirid, root); + if (IS_ERR(dir)) { iput(&inode->vfs_inode); - return -EIO; + return PTR_ERR(dir); } ret = btrfs_add_link(trans, dir, inode, name, 1, index); @@ -1844,9 +1840,9 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, bool update_size = true; bool name_added = false; - dir = read_one_inode(root, key->objectid); - if (!dir) - return -EIO; + dir = btrfs_iget_logging(key->objectid, root); + if (IS_ERR(dir)) + return PTR_ERR(dir); ret = read_alloc_one_name(eb, di + 1, btrfs_dir_name_len(eb, di), &name); if (ret) @@ -2146,9 +2142,10 @@ static noinline int check_item_in_log(struct btrfs_trans_handle *trans, btrfs_dir_item_key_to_cpu(eb, di, &location); btrfs_release_path(path); btrfs_release_path(log_path); - inode = read_one_inode(root, location.objectid); - if (!inode) { - ret = -EIO; + inode = btrfs_iget_logging(location.objectid, root); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); + inode = NULL; goto out; } @@ -2300,14 +2297,17 @@ static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans, if (!log_path) return -ENOMEM; - dir = read_one_inode(root, dirid); - /* it isn't an error if the inode isn't there, that can happen - * because we replay the deletes before we copy in the inode item - * from the log + dir = btrfs_iget_logging(dirid, root); + /* + * It isn't an error if the inode isn't there, that can happen because + * we replay the deletes before we copy in the inode item from the log. */ - if (!dir) { + if (IS_ERR(dir)) { btrfs_free_path(log_path); - return 0; + ret = PTR_ERR(dir); + if (ret == -ENOENT) + ret = 0; + return ret; } range_start = 0; @@ -2466,9 +2466,9 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, struct btrfs_inode *inode; u64 from; - inode = read_one_inode(root, key.objectid); - if (!inode) { - ret = -EIO; + inode = btrfs_iget_logging(key.objectid, root); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); break; } from = ALIGN(i_size_read(&inode->vfs_inode), @@ -7447,6 +7447,8 @@ void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans, * full log sync. * Also we don't need to worry with renames, since btrfs_rename() marks the log * for full commit when renaming a subvolume. + * + * Must be called before creating the subvolume entry in its parent directory. */ void btrfs_record_new_subvolume(const struct btrfs_trans_handle *trans, struct btrfs_inode *dir) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 89835071cfea..f475b4b7c457 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3282,6 +3282,12 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset) device->bytes_used - dev_extent_len); atomic64_add(dev_extent_len, &fs_info->free_chunk_space); btrfs_clear_space_info_full(fs_info); + + if (list_empty(&device->post_commit_list)) { + list_add_tail(&device->post_commit_list, + &trans->transaction->dev_update_list); + } + mutex_unlock(&fs_info->chunk_mutex); } } diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index b5b0156d5b95..9430b34d3cbb 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1403,7 +1403,8 @@ static int btrfs_load_block_group_single(struct btrfs_block_group *bg, static int btrfs_load_block_group_dup(struct btrfs_block_group *bg, struct btrfs_chunk_map *map, struct zone_info *zone_info, - unsigned long *active) + unsigned long *active, + u64 last_alloc) { struct btrfs_fs_info *fs_info = bg->fs_info; @@ -1426,6 +1427,13 @@ static int btrfs_load_block_group_dup(struct btrfs_block_group *bg, zone_info[1].physical); return -EIO; } + + if (zone_info[0].alloc_offset == WP_CONVENTIONAL) + zone_info[0].alloc_offset = last_alloc; + + if (zone_info[1].alloc_offset == WP_CONVENTIONAL) + zone_info[1].alloc_offset = last_alloc; + if (zone_info[0].alloc_offset != zone_info[1].alloc_offset) { btrfs_err(bg->fs_info, "zoned: write pointer offset mismatch of zones in DUP profile"); @@ -1446,7 +1454,8 @@ static int btrfs_load_block_group_dup(struct btrfs_block_group *bg, static int btrfs_load_block_group_raid1(struct btrfs_block_group *bg, struct btrfs_chunk_map *map, struct zone_info *zone_info, - unsigned long *active) + unsigned long *active, + u64 last_alloc) { struct btrfs_fs_info *fs_info = bg->fs_info; int i; @@ -1461,10 +1470,12 @@ static int btrfs_load_block_group_raid1(struct btrfs_block_group *bg, bg->zone_capacity = min_not_zero(zone_info[0].capacity, zone_info[1].capacity); for (i = 0; i < map->num_stripes; i++) { - if (zone_info[i].alloc_offset == WP_MISSING_DEV || - zone_info[i].alloc_offset == WP_CONVENTIONAL) + if (zone_info[i].alloc_offset == WP_MISSING_DEV) continue; + if (zone_info[i].alloc_offset == WP_CONVENTIONAL) + zone_info[i].alloc_offset = last_alloc; + if ((zone_info[0].alloc_offset != zone_info[i].alloc_offset) && !btrfs_test_opt(fs_info, DEGRADED)) { btrfs_err(fs_info, @@ -1494,7 +1505,8 @@ static int btrfs_load_block_group_raid1(struct btrfs_block_group *bg, static int btrfs_load_block_group_raid0(struct btrfs_block_group *bg, struct btrfs_chunk_map *map, struct zone_info *zone_info, - unsigned long *active) + unsigned long *active, + u64 last_alloc) { struct btrfs_fs_info *fs_info = bg->fs_info; @@ -1505,10 +1517,29 @@ static int btrfs_load_block_group_raid0(struct btrfs_block_group *bg, } for (int i = 0; i < map->num_stripes; i++) { - if (zone_info[i].alloc_offset == WP_MISSING_DEV || - zone_info[i].alloc_offset == WP_CONVENTIONAL) + if (zone_info[i].alloc_offset == WP_MISSING_DEV) continue; + if (zone_info[i].alloc_offset == WP_CONVENTIONAL) { + u64 stripe_nr, full_stripe_nr; + u64 stripe_offset; + int stripe_index; + + stripe_nr = div64_u64(last_alloc, map->stripe_size); + stripe_offset = stripe_nr * map->stripe_size; + full_stripe_nr = div_u64(stripe_nr, map->num_stripes); + div_u64_rem(stripe_nr, map->num_stripes, &stripe_index); + + zone_info[i].alloc_offset = + full_stripe_nr * map->stripe_size; + + if (stripe_index > i) + zone_info[i].alloc_offset += map->stripe_size; + else if (stripe_index == i) + zone_info[i].alloc_offset += + (last_alloc - stripe_offset); + } + if (test_bit(0, active) != test_bit(i, active)) { if (!btrfs_zone_activate(bg)) return -EIO; @@ -1526,7 +1557,8 @@ static int btrfs_load_block_group_raid0(struct btrfs_block_group *bg, static int btrfs_load_block_group_raid10(struct btrfs_block_group *bg, struct btrfs_chunk_map *map, struct zone_info *zone_info, - unsigned long *active) + unsigned long *active, + u64 last_alloc) { struct btrfs_fs_info *fs_info = bg->fs_info; @@ -1537,8 +1569,7 @@ static int btrfs_load_block_group_raid10(struct btrfs_block_group *bg, } for (int i = 0; i < map->num_stripes; i++) { - if (zone_info[i].alloc_offset == WP_MISSING_DEV || - zone_info[i].alloc_offset == WP_CONVENTIONAL) + if (zone_info[i].alloc_offset == WP_MISSING_DEV) continue; if (test_bit(0, active) != test_bit(i, active)) { @@ -1549,6 +1580,29 @@ static int btrfs_load_block_group_raid10(struct btrfs_block_group *bg, set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags); } + if (zone_info[i].alloc_offset == WP_CONVENTIONAL) { + u64 stripe_nr, full_stripe_nr; + u64 stripe_offset; + int stripe_index; + + stripe_nr = div64_u64(last_alloc, map->stripe_size); + stripe_offset = stripe_nr * map->stripe_size; + full_stripe_nr = div_u64(stripe_nr, + map->num_stripes / map->sub_stripes); + div_u64_rem(stripe_nr, + (map->num_stripes / map->sub_stripes), + &stripe_index); + + zone_info[i].alloc_offset = + full_stripe_nr * map->stripe_size; + + if (stripe_index > (i / map->sub_stripes)) + zone_info[i].alloc_offset += map->stripe_size; + else if (stripe_index == (i / map->sub_stripes)) + zone_info[i].alloc_offset += + (last_alloc - stripe_offset); + } + if ((i % map->sub_stripes) == 0) { bg->zone_capacity += zone_info[i].capacity; bg->alloc_offset += zone_info[i].alloc_offset; @@ -1637,18 +1691,22 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) ret = btrfs_load_block_group_single(cache, &zone_info[0], active); break; case BTRFS_BLOCK_GROUP_DUP: - ret = btrfs_load_block_group_dup(cache, map, zone_info, active); + ret = btrfs_load_block_group_dup(cache, map, zone_info, active, + last_alloc); break; case BTRFS_BLOCK_GROUP_RAID1: case BTRFS_BLOCK_GROUP_RAID1C3: case BTRFS_BLOCK_GROUP_RAID1C4: - ret = btrfs_load_block_group_raid1(cache, map, zone_info, active); + ret = btrfs_load_block_group_raid1(cache, map, zone_info, + active, last_alloc); break; case BTRFS_BLOCK_GROUP_RAID0: - ret = btrfs_load_block_group_raid0(cache, map, zone_info, active); + ret = btrfs_load_block_group_raid0(cache, map, zone_info, + active, last_alloc); break; case BTRFS_BLOCK_GROUP_RAID10: - ret = btrfs_load_block_group_raid10(cache, map, zone_info, active); + ret = btrfs_load_block_group_raid10(cache, map, zone_info, + active, last_alloc); break; case BTRFS_BLOCK_GROUP_RAID5: case BTRFS_BLOCK_GROUP_RAID6: diff --git a/fs/erofs/data.c b/fs/erofs/data.c index 6a329c329f43..16e4a6bd9b97 100644 --- a/fs/erofs/data.c +++ b/fs/erofs/data.c @@ -214,9 +214,11 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map) /* * bit 30: I/O error occurred on this folio + * bit 29: CPU has dirty data in D-cache (needs aliasing handling); * bit 0 - 29: remaining parts to complete this folio */ -#define EROFS_ONLINEFOLIO_EIO (1 << 30) +#define EROFS_ONLINEFOLIO_EIO 30 +#define EROFS_ONLINEFOLIO_DIRTY 29 void erofs_onlinefolio_init(struct folio *folio) { @@ -233,19 +235,23 @@ void erofs_onlinefolio_split(struct folio *folio) atomic_inc((atomic_t *)&folio->private); } -void erofs_onlinefolio_end(struct folio *folio, int err) +void erofs_onlinefolio_end(struct folio *folio, int err, bool dirty) { int orig, v; do { orig = atomic_read((atomic_t *)&folio->private); - v = (orig - 1) | (err ? EROFS_ONLINEFOLIO_EIO : 0); + DBG_BUGON(orig <= 0); + v = dirty << EROFS_ONLINEFOLIO_DIRTY; + v |= (orig - 1) | (!!err << EROFS_ONLINEFOLIO_EIO); } while (atomic_cmpxchg((atomic_t *)&folio->private, orig, v) != orig); - if (v & ~EROFS_ONLINEFOLIO_EIO) + if (v & (BIT(EROFS_ONLINEFOLIO_DIRTY) - 1)) return; folio->private = 0; - folio_end_read(folio, !(v & EROFS_ONLINEFOLIO_EIO)); + if (v & BIT(EROFS_ONLINEFOLIO_DIRTY)) + flush_dcache_folio(folio); + folio_end_read(folio, !(v & BIT(EROFS_ONLINEFOLIO_EIO))); } static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, @@ -351,11 +357,16 @@ int erofs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, */ static int erofs_read_folio(struct file *file, struct folio *folio) { + trace_erofs_read_folio(folio, true); + return iomap_read_folio(folio, &erofs_iomap_ops); } static void erofs_readahead(struct readahead_control *rac) { + trace_erofs_readahead(rac->mapping->host, readahead_index(rac), + readahead_count(rac), true); + return iomap_readahead(rac, &erofs_iomap_ops); } diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c index bf62e2836b60..358061d7b660 100644 --- a/fs/erofs/decompressor.c +++ b/fs/erofs/decompressor.c @@ -301,13 +301,11 @@ static int z_erofs_transform_plain(struct z_erofs_decompress_req *rq, cur = min(cur, rq->outputsize); if (cur && rq->out[0]) { kin = kmap_local_page(rq->in[nrpages_in - 1]); - if (rq->out[0] == rq->in[nrpages_in - 1]) { + if (rq->out[0] == rq->in[nrpages_in - 1]) memmove(kin + rq->pageofs_out, kin + pi, cur); - flush_dcache_page(rq->out[0]); - } else { + else memcpy_to_page(rq->out[0], rq->pageofs_out, kin + pi, cur); - } kunmap_local(kin); } rq->outputsize -= cur; @@ -325,14 +323,12 @@ static int z_erofs_transform_plain(struct z_erofs_decompress_req *rq, po = (rq->pageofs_out + cur + pi) & ~PAGE_MASK; DBG_BUGON(no >= nrpages_out); cnt = min(insz - pi, PAGE_SIZE - po); - if (rq->out[no] == rq->in[ni]) { + if (rq->out[no] == rq->in[ni]) memmove(kin + po, kin + rq->pageofs_in + pi, cnt); - flush_dcache_page(rq->out[no]); - } else if (rq->out[no]) { + else if (rq->out[no]) memcpy_to_page(rq->out[no], po, kin + rq->pageofs_in + pi, cnt); - } pi += cnt; } while (pi < insz); kunmap_local(kin); diff --git a/fs/erofs/dir.c b/fs/erofs/dir.c index 2fae209d0274..3e4b38bec0aa 100644 --- a/fs/erofs/dir.c +++ b/fs/erofs/dir.c @@ -58,6 +58,11 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx) struct erofs_dirent *de; unsigned int nameoff, maxsize; + if (fatal_signal_pending(current)) { + err = -ERESTARTSYS; + break; + } + de = erofs_bread(&buf, dbstart, true); if (IS_ERR(de)) { erofs_err(sb, "failed to readdir of logical block %llu of nid %llu", @@ -88,6 +93,7 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx) break; ctx->pos = dbstart + maxsize; ofs = 0; + cond_resched(); } erofs_put_metabuf(&buf); if (EROFS_I(dir)->dot_omitted && ctx->pos == dir->i_size) { diff --git a/fs/erofs/fileio.c b/fs/erofs/fileio.c index df5cc63f2c01..91781718199e 100644 --- a/fs/erofs/fileio.c +++ b/fs/erofs/fileio.c @@ -38,7 +38,7 @@ static void erofs_fileio_ki_complete(struct kiocb *iocb, long ret) } else { bio_for_each_folio_all(fi, &rq->bio) { DBG_BUGON(folio_test_uptodate(fi.folio)); - erofs_onlinefolio_end(fi.folio, ret); + erofs_onlinefolio_end(fi.folio, ret, false); } } bio_uninit(&rq->bio); @@ -96,8 +96,6 @@ static int erofs_fileio_scan_folio(struct erofs_fileio *io, struct folio *folio) struct erofs_map_blocks *map = &io->map; unsigned int cur = 0, end = folio_size(folio), len, attached = 0; loff_t pos = folio_pos(folio), ofs; - struct iov_iter iter; - struct bio_vec bv; int err = 0; erofs_onlinefolio_init(folio); @@ -122,13 +120,7 @@ static int erofs_fileio_scan_folio(struct erofs_fileio *io, struct folio *folio) err = PTR_ERR(src); break; } - bvec_set_folio(&bv, folio, len, cur); - iov_iter_bvec(&iter, ITER_DEST, &bv, 1, len); - if (copy_to_iter(src, len, &iter) != len) { - erofs_put_metabuf(&buf); - err = -EIO; - break; - } + memcpy_to_folio(folio, cur, src, len); erofs_put_metabuf(&buf); } else if (!(map->m_flags & EROFS_MAP_MAPPED)) { folio_zero_segment(folio, cur, cur + len); @@ -162,7 +154,7 @@ io_retry: } cur += len; } - erofs_onlinefolio_end(folio, err); + erofs_onlinefolio_end(folio, err, false); return err; } diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h index a32c03a80c70..06b867d2fc3b 100644 --- a/fs/erofs/internal.h +++ b/fs/erofs/internal.h @@ -315,10 +315,12 @@ static inline struct folio *erofs_grab_folio_nowait(struct address_space *as, /* The length of extent is full */ #define EROFS_MAP_FULL_MAPPED 0x0008 /* Located in the special packed inode */ -#define EROFS_MAP_FRAGMENT 0x0010 +#define __EROFS_MAP_FRAGMENT 0x0010 /* The extent refers to partial decompressed data */ #define EROFS_MAP_PARTIAL_REF 0x0020 +#define EROFS_MAP_FRAGMENT (EROFS_MAP_MAPPED | __EROFS_MAP_FRAGMENT) + struct erofs_map_blocks { struct erofs_buf buf; @@ -390,7 +392,7 @@ int erofs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, int erofs_map_blocks(struct inode *inode, struct erofs_map_blocks *map); void erofs_onlinefolio_init(struct folio *folio); void erofs_onlinefolio_split(struct folio *folio); -void erofs_onlinefolio_end(struct folio *folio, int err); +void erofs_onlinefolio_end(struct folio *folio, int err, bool dirty); struct inode *erofs_iget(struct super_block *sb, erofs_nid_t nid); int erofs_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index fe8071844724..e3f28a1bb945 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -1034,7 +1034,7 @@ static int z_erofs_scan_folio(struct z_erofs_frontend *f, if (!(map->m_flags & EROFS_MAP_MAPPED)) { folio_zero_segment(folio, cur, end); tight = false; - } else if (map->m_flags & EROFS_MAP_FRAGMENT) { + } else if (map->m_flags & __EROFS_MAP_FRAGMENT) { erofs_off_t fpos = offset + cur - map->m_la; err = z_erofs_read_fragment(inode->i_sb, folio, cur, @@ -1091,7 +1091,7 @@ static int z_erofs_scan_folio(struct z_erofs_frontend *f, tight = (bs == PAGE_SIZE); } } while ((end = cur) > 0); - erofs_onlinefolio_end(folio, err); + erofs_onlinefolio_end(folio, err, false); return err; } @@ -1196,7 +1196,7 @@ static void z_erofs_fill_other_copies(struct z_erofs_backend *be, int err) cur += len; } kunmap_local(dst); - erofs_onlinefolio_end(page_folio(bvi->bvec.page), err); + erofs_onlinefolio_end(page_folio(bvi->bvec.page), err, true); list_del(p); kfree(bvi); } @@ -1355,7 +1355,7 @@ static int z_erofs_decompress_pcluster(struct z_erofs_backend *be, int err) DBG_BUGON(z_erofs_page_is_invalidated(page)); if (!z_erofs_is_shortlived_page(page)) { - erofs_onlinefolio_end(page_folio(page), err); + erofs_onlinefolio_end(page_folio(page), err, true); continue; } if (pcl->algorithmformat != Z_EROFS_COMPRESSION_LZ4) { diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c index 0bebc6e3a4d7..f1a15ff22147 100644 --- a/fs/erofs/zmap.c +++ b/fs/erofs/zmap.c @@ -413,8 +413,7 @@ static int z_erofs_map_blocks_fo(struct inode *inode, !vi->z_tailextent_headlcn) { map->m_la = 0; map->m_llen = inode->i_size; - map->m_flags = EROFS_MAP_MAPPED | - EROFS_MAP_FULL_MAPPED | EROFS_MAP_FRAGMENT; + map->m_flags = EROFS_MAP_FRAGMENT; return 0; } initial_lcn = ofs >> lclusterbits; @@ -489,7 +488,7 @@ static int z_erofs_map_blocks_fo(struct inode *inode, goto unmap_out; } } else if (fragment && m.lcn == vi->z_tailextent_headlcn) { - map->m_flags |= EROFS_MAP_FRAGMENT; + map->m_flags = EROFS_MAP_FRAGMENT; } else { map->m_pa = erofs_pos(sb, m.pblk); err = z_erofs_get_extent_compressedlen(&m, initial_lcn); @@ -617,7 +616,7 @@ static int z_erofs_map_blocks_ext(struct inode *inode, if (lstart < lend) { map->m_la = lstart; if (last && (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER)) { - map->m_flags |= EROFS_MAP_MAPPED | EROFS_MAP_FRAGMENT; + map->m_flags = EROFS_MAP_FRAGMENT; vi->z_fragmentoff = map->m_plen; if (recsz > offsetof(struct z_erofs_extent, pstart_lo)) vi->z_fragmentoff |= map->m_pa << 32; @@ -797,7 +796,7 @@ static int z_erofs_iomap_begin_report(struct inode *inode, loff_t offset, iomap->length = map.m_llen; if (map.m_flags & EROFS_MAP_MAPPED) { iomap->type = IOMAP_MAPPED; - iomap->addr = map.m_flags & EROFS_MAP_FRAGMENT ? + iomap->addr = map.m_flags & __EROFS_MAP_FRAGMENT ? IOMAP_NULL_ADDR : map.m_pa; } else { iomap->type = IOMAP_HOLE; diff --git a/fs/eventpoll.c b/fs/eventpoll.c index d4dbffdedd08..0fbf5dfedb24 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -883,7 +883,7 @@ static bool __ep_remove(struct eventpoll *ep, struct epitem *epi, bool force) kfree_rcu(epi, rcu); percpu_counter_dec(&ep->user->epoll_watches); - return ep_refcount_dec_and_test(ep); + return true; } /* @@ -891,14 +891,14 @@ static bool __ep_remove(struct eventpoll *ep, struct epitem *epi, bool force) */ static void ep_remove_safe(struct eventpoll *ep, struct epitem *epi) { - WARN_ON_ONCE(__ep_remove(ep, epi, false)); + if (__ep_remove(ep, epi, false)) + WARN_ON_ONCE(ep_refcount_dec_and_test(ep)); } static void ep_clear_and_put(struct eventpoll *ep) { struct rb_node *rbp, *next; struct epitem *epi; - bool dispose; /* We need to release all tasks waiting for these file */ if (waitqueue_active(&ep->poll_wait)) @@ -931,10 +931,8 @@ static void ep_clear_and_put(struct eventpoll *ep) cond_resched(); } - dispose = ep_refcount_dec_and_test(ep); mutex_unlock(&ep->mtx); - - if (dispose) + if (ep_refcount_dec_and_test(ep)) ep_free(ep); } @@ -1137,7 +1135,7 @@ again: dispose = __ep_remove(ep, epi, true); mutex_unlock(&ep->mtx); - if (dispose) + if (dispose && ep_refcount_dec_and_test(ep)) ep_free(ep); goto again; } diff --git a/fs/exec.c b/fs/exec.c index 1f5fdd2e096e..ba400aafd640 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -114,6 +114,9 @@ static inline void put_binfmt(struct linux_binfmt * fmt) bool path_noexec(const struct path *path) { + /* If it's an anonymous inode make sure that we catch any shenanigans. */ + VFS_WARN_ON_ONCE(IS_ANON_FILE(d_inode(path->dentry)) && + !(path->mnt->mnt_sb->s_iflags & SB_I_NOEXEC)); return (path->mnt->mnt_flags & MNT_NOEXEC) || (path->mnt->mnt_sb->s_iflags & SB_I_NOEXEC); } @@ -781,13 +784,15 @@ static struct file *do_open_execat(int fd, struct filename *name, int flags) if (IS_ERR(file)) return file; + if (path_noexec(&file->f_path)) + return ERR_PTR(-EACCES); + /* * In the past the regular type check was here. It moved to may_open() in * 633fb6ac3980 ("exec: move S_ISREG() check earlier"). Since then it is * an invariant that all non-regular files error out before we get here. */ - if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode)) || - path_noexec(&file->f_path)) + if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode))) return ERR_PTR(-EACCES); err = exe_file_deny_write_access(file); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 6bd3de64f2a8..696131e655ed 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -35,6 +35,17 @@ #include <trace/events/f2fs.h> #include <uapi/linux/f2fs.h> +static void f2fs_zero_post_eof_page(struct inode *inode, loff_t new_size) +{ + loff_t old_size = i_size_read(inode); + + if (old_size >= new_size) + return; + + /* zero or drop pages only in range of [old_size, new_size] */ + truncate_pagecache(inode, old_size); +} + static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf) { struct inode *inode = file_inode(vmf->vma->vm_file); @@ -103,8 +114,13 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) f2fs_bug_on(sbi, f2fs_has_inline_data(inode)); + filemap_invalidate_lock(inode->i_mapping); + f2fs_zero_post_eof_page(inode, (folio->index + 1) << PAGE_SHIFT); + filemap_invalidate_unlock(inode->i_mapping); + file_update_time(vmf->vma->vm_file); filemap_invalidate_lock_shared(inode->i_mapping); + folio_lock(folio); if (unlikely(folio->mapping != inode->i_mapping || folio_pos(folio) > i_size_read(inode) || @@ -1109,6 +1125,8 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, f2fs_down_write(&fi->i_gc_rwsem[WRITE]); filemap_invalidate_lock(inode->i_mapping); + if (attr->ia_size > old_size) + f2fs_zero_post_eof_page(inode, attr->ia_size); truncate_setsize(inode, attr->ia_size); if (attr->ia_size <= old_size) @@ -1227,6 +1245,10 @@ static int f2fs_punch_hole(struct inode *inode, loff_t offset, loff_t len) if (ret) return ret; + filemap_invalidate_lock(inode->i_mapping); + f2fs_zero_post_eof_page(inode, offset + len); + filemap_invalidate_unlock(inode->i_mapping); + pg_start = ((unsigned long long) offset) >> PAGE_SHIFT; pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT; @@ -1510,6 +1532,8 @@ static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len) f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); filemap_invalidate_lock(inode->i_mapping); + f2fs_zero_post_eof_page(inode, offset + len); + f2fs_lock_op(sbi); f2fs_drop_extent_tree(inode); truncate_pagecache(inode, offset); @@ -1631,6 +1655,10 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, if (ret) return ret; + filemap_invalidate_lock(mapping); + f2fs_zero_post_eof_page(inode, offset + len); + filemap_invalidate_unlock(mapping); + pg_start = ((unsigned long long) offset) >> PAGE_SHIFT; pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT; @@ -1762,6 +1790,8 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) /* avoid gc operation during block exchange */ f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); filemap_invalidate_lock(mapping); + + f2fs_zero_post_eof_page(inode, offset + len); truncate_pagecache(inode, offset); while (!ret && idx > pg_start) { @@ -1819,6 +1849,10 @@ static int f2fs_expand_inode_data(struct inode *inode, loff_t offset, if (err) return err; + filemap_invalidate_lock(inode->i_mapping); + f2fs_zero_post_eof_page(inode, offset + len); + filemap_invalidate_unlock(inode->i_mapping); + f2fs_balance_fs(sbi, true); pg_start = ((unsigned long long)offset) >> PAGE_SHIFT; @@ -4860,6 +4894,10 @@ static ssize_t f2fs_write_checks(struct kiocb *iocb, struct iov_iter *from) err = file_modified(file); if (err) return err; + + filemap_invalidate_lock(inode->i_mapping); + f2fs_zero_post_eof_page(inode, iocb->ki_pos + iov_iter_count(from)); + filemap_invalidate_unlock(inode->i_mapping); return count; } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 1cb4cba7f961..bfe104db284e 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2078,7 +2078,6 @@ write_node: if (!__write_node_folio(folio, false, &submitted, wbc, do_balance, io_type, NULL)) { - folio_unlock(folio); folio_batch_release(&fbatch); ret = -EIO; goto out; diff --git a/fs/fuse/file.c b/fs/fuse/file.c index f102afc03359..47006d0753f1 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1147,7 +1147,7 @@ static ssize_t fuse_send_write_pages(struct fuse_io_args *ia, static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia, struct address_space *mapping, struct iov_iter *ii, loff_t pos, - unsigned int max_pages) + unsigned int max_folios) { struct fuse_args_pages *ap = &ia->ap; struct fuse_conn *fc = get_fuse_conn(mapping->host); @@ -1157,12 +1157,11 @@ static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia, int err = 0; num = min(iov_iter_count(ii), fc->max_write); - num = min(num, max_pages << PAGE_SHIFT); ap->args.in_pages = true; ap->descs[0].offset = offset; - while (num) { + while (num && ap->num_folios < max_folios) { size_t tmp; struct folio *folio; pgoff_t index = pos >> PAGE_SHIFT; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index bfe8d8af46f3..9572bdef49ee 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -9,6 +9,7 @@ #include "fuse_i.h" #include "dev_uring_i.h" +#include <linux/dax.h> #include <linux/pagemap.h> #include <linux/slab.h> #include <linux/file.h> @@ -162,6 +163,9 @@ static void fuse_evict_inode(struct inode *inode) /* Will write inode on close/munmap and in all other dirtiers */ WARN_ON(inode->i_state & I_DIRTY_INODE); + if (FUSE_IS_DAX(inode)) + dax_break_layout_final(inode); + truncate_inode_pages_final(&inode->i_data); clear_inode(inode); if (inode->i_sb->s_flags & SB_ACTIVE) { diff --git a/fs/libfs.c b/fs/libfs.c index 9ea0ecc325a8..6f487fc6be34 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -1649,12 +1649,10 @@ struct inode *alloc_anon_inode(struct super_block *s) */ inode->i_state = I_DIRTY; /* - * Historically anonymous inodes didn't have a type at all and - * userspace has come to rely on this. Internally they're just - * regular files but S_IFREG is masked off when reporting - * information to userspace. + * Historically anonymous inodes don't have a type at all and + * userspace has come to rely on this. */ - inode->i_mode = S_IFREG | S_IRUSR | S_IWUSR; + inode->i_mode = S_IRUSR | S_IWUSR; inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); inode->i_flags |= S_PRIVATE | S_ANON_INODE; diff --git a/fs/namei.c b/fs/namei.c index f761cafaeaad..c26a7ee42184 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3480,7 +3480,7 @@ static int may_open(struct mnt_idmap *idmap, const struct path *path, return -EACCES; break; default: - VFS_BUG_ON_INODE(1, inode); + VFS_BUG_ON_INODE(!IS_ANON_FILE(inode), inode); } error = inode_permission(idmap, inode, MAY_OPEN | acc_mode); diff --git a/fs/namespace.c b/fs/namespace.c index e13d9ab4f564..54c59e091919 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2310,21 +2310,62 @@ out: return dst_mnt; } -/* Caller should check returned pointer for errors */ +static inline bool extend_array(struct path **res, struct path **to_free, + unsigned n, unsigned *count, unsigned new_count) +{ + struct path *p; -struct vfsmount *collect_mounts(const struct path *path) + if (likely(n < *count)) + return true; + p = kmalloc_array(new_count, sizeof(struct path), GFP_KERNEL); + if (p && *count) + memcpy(p, *res, *count * sizeof(struct path)); + *count = new_count; + kfree(*to_free); + *to_free = *res = p; + return p; +} + +struct path *collect_paths(const struct path *path, + struct path *prealloc, unsigned count) { - struct mount *tree; - namespace_lock(); - if (!check_mnt(real_mount(path->mnt))) - tree = ERR_PTR(-EINVAL); - else - tree = copy_tree(real_mount(path->mnt), path->dentry, - CL_COPY_ALL | CL_PRIVATE); - namespace_unlock(); - if (IS_ERR(tree)) - return ERR_CAST(tree); - return &tree->mnt; + struct mount *root = real_mount(path->mnt); + struct mount *child; + struct path *res = prealloc, *to_free = NULL; + unsigned n = 0; + + guard(rwsem_read)(&namespace_sem); + + if (!check_mnt(root)) + return ERR_PTR(-EINVAL); + if (!extend_array(&res, &to_free, 0, &count, 32)) + return ERR_PTR(-ENOMEM); + res[n++] = *path; + list_for_each_entry(child, &root->mnt_mounts, mnt_child) { + if (!is_subdir(child->mnt_mountpoint, path->dentry)) + continue; + for (struct mount *m = child; m; m = next_mnt(m, child)) { + if (!extend_array(&res, &to_free, n, &count, 2 * count)) + return ERR_PTR(-ENOMEM); + res[n].mnt = &m->mnt; + res[n].dentry = m->mnt.mnt_root; + n++; + } + } + if (!extend_array(&res, &to_free, n, &count, count + 1)) + return ERR_PTR(-ENOMEM); + memset(res + n, 0, (count - n) * sizeof(struct path)); + for (struct path *p = res; p->mnt; p++) + path_get(p); + return res; +} + +void drop_collected_paths(struct path *paths, struct path *prealloc) +{ + for (struct path *p = paths; p->mnt; p++) + path_put(p); + if (paths != prealloc) + kfree(paths); } static void free_mnt_ns(struct mnt_namespace *); @@ -2401,15 +2442,6 @@ void dissolve_on_fput(struct vfsmount *mnt) free_mnt_ns(ns); } -void drop_collected_mounts(struct vfsmount *mnt) -{ - namespace_lock(); - lock_mount_hash(); - umount_tree(real_mount(mnt), 0); - unlock_mount_hash(); - namespace_unlock(); -} - static bool __has_locked_children(struct mount *mnt, struct dentry *dentry) { struct mount *child; @@ -2511,21 +2543,6 @@ struct vfsmount *clone_private_mount(const struct path *path) } EXPORT_SYMBOL_GPL(clone_private_mount); -int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg, - struct vfsmount *root) -{ - struct mount *mnt; - int res = f(root, arg); - if (res) - return res; - list_for_each_entry(mnt, &real_mount(root)->mnt_list, mnt_list) { - res = f(&mnt->mnt, arg); - if (res) - return res; - } - return 0; -} - static void lock_mnt_tree(struct mount *mnt) { struct mount *p; @@ -2751,14 +2768,14 @@ static int attach_recursive_mnt(struct mount *source_mnt, hlist_for_each_entry_safe(child, n, &tree_list, mnt_hash) { struct mount *q; hlist_del_init(&child->mnt_hash); - q = __lookup_mnt(&child->mnt_parent->mnt, - child->mnt_mountpoint); - if (q) - mnt_change_mountpoint(child, smp, q); /* Notice when we are propagating across user namespaces */ if (child->mnt_parent->mnt_ns->user_ns != user_ns) lock_mnt_tree(child); child->mnt.mnt_flags &= ~MNT_LOCKED; + q = __lookup_mnt(&child->mnt_parent->mnt, + child->mnt_mountpoint); + if (q) + mnt_change_mountpoint(child, smp, q); commit_tree(child); } put_mountpoint(smp); @@ -5290,16 +5307,12 @@ SYSCALL_DEFINE5(open_tree_attr, int, dfd, const char __user *, filename, kattr.kflags |= MOUNT_KATTR_RECURSE; ret = wants_mount_setattr(uattr, usize, &kattr); - if (ret < 0) - return ret; - - if (ret) { + if (ret > 0) { ret = do_mount_setattr(&file->f_path, &kattr); - if (ret) - return ret; - finish_mount_kattr(&kattr); } + if (ret) + return ret; } fd = get_unused_fd_flags(flags & O_CLOEXEC); @@ -6262,7 +6275,11 @@ void put_mnt_ns(struct mnt_namespace *ns) { if (!refcount_dec_and_test(&ns->ns.count)) return; - drop_collected_mounts(&ns->root->mnt); + namespace_lock(); + lock_mount_hash(); + umount_tree(ns->root, 0); + unlock_mount_hash(); + namespace_unlock(); free_mnt_ns(ns); } diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c index 72a3e6db2524..f27ea5099a68 100644 --- a/fs/netfs/buffered_write.c +++ b/fs/netfs/buffered_write.c @@ -53,30 +53,40 @@ static struct folio *netfs_grab_folio_for_write(struct address_space *mapping, * data written into the pagecache until we can find out from the server what * the values actually are. */ -static void netfs_update_i_size(struct netfs_inode *ctx, struct inode *inode, - loff_t i_size, loff_t pos, size_t copied) +void netfs_update_i_size(struct netfs_inode *ctx, struct inode *inode, + loff_t pos, size_t copied) { + loff_t i_size, end = pos + copied; blkcnt_t add; size_t gap; + if (end <= i_size_read(inode)) + return; + if (ctx->ops->update_i_size) { - ctx->ops->update_i_size(inode, pos); + ctx->ops->update_i_size(inode, end); return; } - i_size_write(inode, pos); + spin_lock(&inode->i_lock); + + i_size = i_size_read(inode); + if (end > i_size) { + i_size_write(inode, end); #if IS_ENABLED(CONFIG_FSCACHE) - fscache_update_cookie(ctx->cache, NULL, &pos); + fscache_update_cookie(ctx->cache, NULL, &end); #endif - gap = SECTOR_SIZE - (i_size & (SECTOR_SIZE - 1)); - if (copied > gap) { - add = DIV_ROUND_UP(copied - gap, SECTOR_SIZE); + gap = SECTOR_SIZE - (i_size & (SECTOR_SIZE - 1)); + if (copied > gap) { + add = DIV_ROUND_UP(copied - gap, SECTOR_SIZE); - inode->i_blocks = min_t(blkcnt_t, - DIV_ROUND_UP(pos, SECTOR_SIZE), - inode->i_blocks + add); + inode->i_blocks = min_t(blkcnt_t, + DIV_ROUND_UP(end, SECTOR_SIZE), + inode->i_blocks + add); + } } + spin_unlock(&inode->i_lock); } /** @@ -111,7 +121,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, struct folio *folio = NULL, *writethrough = NULL; unsigned int bdp_flags = (iocb->ki_flags & IOCB_NOWAIT) ? BDP_ASYNC : 0; ssize_t written = 0, ret, ret2; - loff_t i_size, pos = iocb->ki_pos; + loff_t pos = iocb->ki_pos; size_t max_chunk = mapping_max_folio_size(mapping); bool maybe_trouble = false; @@ -344,10 +354,8 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, flush_dcache_folio(folio); /* Update the inode size if we moved the EOF marker */ + netfs_update_i_size(ctx, inode, pos, copied); pos += copied; - i_size = i_size_read(inode); - if (pos > i_size) - netfs_update_i_size(ctx, inode, i_size, pos, copied); written += copied; if (likely(!wreq)) { diff --git a/fs/netfs/direct_write.c b/fs/netfs/direct_write.c index fa9a5bf3c6d5..a16660ab7f83 100644 --- a/fs/netfs/direct_write.c +++ b/fs/netfs/direct_write.c @@ -9,20 +9,6 @@ #include <linux/uio.h> #include "internal.h" -static void netfs_cleanup_dio_write(struct netfs_io_request *wreq) -{ - struct inode *inode = wreq->inode; - unsigned long long end = wreq->start + wreq->transferred; - - if (!wreq->error && - i_size_read(inode) < end) { - if (wreq->netfs_ops->update_i_size) - wreq->netfs_ops->update_i_size(inode, end); - else - i_size_write(inode, end); - } -} - /* * Perform an unbuffered write where we may have to do an RMW operation on an * encrypted file. This can also be used for direct I/O writes. @@ -98,7 +84,6 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter * if (async) wreq->iocb = iocb; wreq->len = iov_iter_count(&wreq->buffer.iter); - wreq->cleanup = netfs_cleanup_dio_write; ret = netfs_unbuffered_write(wreq, is_sync_kiocb(iocb), wreq->len); if (ret < 0) { _debug("begin = %zd", ret); @@ -106,7 +91,6 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter * } if (!async) { - trace_netfs_rreq(wreq, netfs_rreq_trace_wait_ip); ret = netfs_wait_for_write(wreq); if (ret > 0) iocb->ki_pos += ret; diff --git a/fs/netfs/internal.h b/fs/netfs/internal.h index e2ee9183392b..d4f16fefd965 100644 --- a/fs/netfs/internal.h +++ b/fs/netfs/internal.h @@ -28,6 +28,12 @@ int netfs_prefetch_for_write(struct file *file, struct folio *folio, size_t offset, size_t len); /* + * buffered_write.c + */ +void netfs_update_i_size(struct netfs_inode *ctx, struct inode *inode, + loff_t pos, size_t copied); + +/* * main.c */ extern unsigned int netfs_debug; @@ -267,14 +273,32 @@ static inline void netfs_wake_rreq_flag(struct netfs_io_request *rreq, enum netfs_rreq_trace trace) { if (test_bit(rreq_flag, &rreq->flags)) { - trace_netfs_rreq(rreq, trace); clear_bit_unlock(rreq_flag, &rreq->flags); smp_mb__after_atomic(); /* Set flag before task state */ + trace_netfs_rreq(rreq, trace); wake_up(&rreq->waitq); } } /* + * Test the NETFS_RREQ_IN_PROGRESS flag, inserting an appropriate barrier. + */ +static inline bool netfs_check_rreq_in_progress(const struct netfs_io_request *rreq) +{ + /* Order read of flags before read of anything else, such as error. */ + return test_bit_acquire(NETFS_RREQ_IN_PROGRESS, &rreq->flags); +} + +/* + * Test the NETFS_SREQ_IN_PROGRESS flag, inserting an appropriate barrier. + */ +static inline bool netfs_check_subreq_in_progress(const struct netfs_io_subrequest *subreq) +{ + /* Order read of flags before read of anything else, such as error. */ + return test_bit_acquire(NETFS_SREQ_IN_PROGRESS, &subreq->flags); +} + +/* * fscache-cache.c */ #ifdef CONFIG_PROC_FS diff --git a/fs/netfs/main.c b/fs/netfs/main.c index 3db401d269e7..73da6c9f5777 100644 --- a/fs/netfs/main.c +++ b/fs/netfs/main.c @@ -58,15 +58,15 @@ static int netfs_requests_seq_show(struct seq_file *m, void *v) if (v == &netfs_io_requests) { seq_puts(m, - "REQUEST OR REF FL ERR OPS COVERAGE\n" - "======== == === == ==== === =========\n" + "REQUEST OR REF FLAG ERR OPS COVERAGE\n" + "======== == === ==== ==== === =========\n" ); return 0; } rreq = list_entry(v, struct netfs_io_request, proc_link); seq_printf(m, - "%08x %s %3d %2lx %4ld %3d @%04llx %llx/%llx", + "%08x %s %3d %4lx %4ld %3d @%04llx %llx/%llx", rreq->debug_id, netfs_origins[rreq->origin], refcount_read(&rreq->ref), diff --git a/fs/netfs/misc.c b/fs/netfs/misc.c index 43b67a28a8fa..20748bcfbf59 100644 --- a/fs/netfs/misc.c +++ b/fs/netfs/misc.c @@ -356,22 +356,22 @@ void netfs_wait_for_in_progress_stream(struct netfs_io_request *rreq, DEFINE_WAIT(myself); list_for_each_entry(subreq, &stream->subrequests, rreq_link) { - if (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags)) + if (!netfs_check_subreq_in_progress(subreq)) continue; - trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue); + trace_netfs_rreq(rreq, netfs_rreq_trace_wait_quiesce); for (;;) { prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE); - if (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags)) + if (!netfs_check_subreq_in_progress(subreq)) break; trace_netfs_sreq(subreq, netfs_sreq_trace_wait_for); schedule(); - trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue); } } + trace_netfs_rreq(rreq, netfs_rreq_trace_waited_quiesce); finish_wait(&rreq->waitq, &myself); } @@ -381,7 +381,12 @@ void netfs_wait_for_in_progress_stream(struct netfs_io_request *rreq, static int netfs_collect_in_app(struct netfs_io_request *rreq, bool (*collector)(struct netfs_io_request *rreq)) { - bool need_collect = false, inactive = true; + bool need_collect = false, inactive = true, done = true; + + if (!netfs_check_rreq_in_progress(rreq)) { + trace_netfs_rreq(rreq, netfs_rreq_trace_recollect); + return 1; /* Done */ + } for (int i = 0; i < NR_IO_STREAMS; i++) { struct netfs_io_subrequest *subreq; @@ -395,14 +400,16 @@ static int netfs_collect_in_app(struct netfs_io_request *rreq, struct netfs_io_subrequest, rreq_link); if (subreq && - (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags) || + (!netfs_check_subreq_in_progress(subreq) || test_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags))) { need_collect = true; break; } + if (subreq || !test_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags)) + done = false; } - if (!need_collect && !inactive) + if (!need_collect && !inactive && !done) return 0; /* Sleep */ __set_current_state(TASK_RUNNING); @@ -423,14 +430,13 @@ static int netfs_collect_in_app(struct netfs_io_request *rreq, /* * Wait for a request to complete, successfully or otherwise. */ -static ssize_t netfs_wait_for_request(struct netfs_io_request *rreq, - bool (*collector)(struct netfs_io_request *rreq)) +static ssize_t netfs_wait_for_in_progress(struct netfs_io_request *rreq, + bool (*collector)(struct netfs_io_request *rreq)) { DEFINE_WAIT(myself); ssize_t ret; for (;;) { - trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue); prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE); if (!test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags)) { @@ -440,18 +446,22 @@ static ssize_t netfs_wait_for_request(struct netfs_io_request *rreq, case 1: goto all_collected; case 2: + if (!netfs_check_rreq_in_progress(rreq)) + break; + cond_resched(); continue; } } - if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags)) + if (!netfs_check_rreq_in_progress(rreq)) break; + trace_netfs_rreq(rreq, netfs_rreq_trace_wait_ip); schedule(); - trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue); } all_collected: + trace_netfs_rreq(rreq, netfs_rreq_trace_waited_ip); finish_wait(&rreq->waitq, &myself); ret = rreq->error; @@ -478,12 +488,12 @@ all_collected: ssize_t netfs_wait_for_read(struct netfs_io_request *rreq) { - return netfs_wait_for_request(rreq, netfs_read_collection); + return netfs_wait_for_in_progress(rreq, netfs_read_collection); } ssize_t netfs_wait_for_write(struct netfs_io_request *rreq) { - return netfs_wait_for_request(rreq, netfs_write_collection); + return netfs_wait_for_in_progress(rreq, netfs_write_collection); } /* @@ -494,10 +504,8 @@ static void netfs_wait_for_pause(struct netfs_io_request *rreq, { DEFINE_WAIT(myself); - trace_netfs_rreq(rreq, netfs_rreq_trace_wait_pause); - for (;;) { - trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue); + trace_netfs_rreq(rreq, netfs_rreq_trace_wait_pause); prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE); if (!test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags)) { @@ -507,19 +515,23 @@ static void netfs_wait_for_pause(struct netfs_io_request *rreq, case 1: goto all_collected; case 2: + if (!netfs_check_rreq_in_progress(rreq) || + !test_bit(NETFS_RREQ_PAUSE, &rreq->flags)) + break; + cond_resched(); continue; } } - if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags) || + if (!netfs_check_rreq_in_progress(rreq) || !test_bit(NETFS_RREQ_PAUSE, &rreq->flags)) break; schedule(); - trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue); } all_collected: + trace_netfs_rreq(rreq, netfs_rreq_trace_waited_pause); finish_wait(&rreq->waitq, &myself); } diff --git a/fs/netfs/read_collect.c b/fs/netfs/read_collect.c index 96ee18af28ef..3e804da1e1eb 100644 --- a/fs/netfs/read_collect.c +++ b/fs/netfs/read_collect.c @@ -218,7 +218,7 @@ reassess: stream->collected_to = front->start; } - if (test_bit(NETFS_SREQ_IN_PROGRESS, &front->flags)) + if (netfs_check_subreq_in_progress(front)) notes |= HIT_PENDING; smp_rmb(); /* Read counters after IN_PROGRESS flag. */ transferred = READ_ONCE(front->transferred); @@ -293,7 +293,9 @@ reassess: spin_lock(&rreq->lock); remove = front; - trace_netfs_sreq(front, netfs_sreq_trace_discard); + trace_netfs_sreq(front, + notes & ABANDON_SREQ ? + netfs_sreq_trace_abandoned : netfs_sreq_trace_consumed); list_del_init(&front->rreq_link); front = list_first_entry_or_null(&stream->subrequests, struct netfs_io_subrequest, rreq_link); @@ -353,9 +355,11 @@ static void netfs_rreq_assess_dio(struct netfs_io_request *rreq) if (rreq->iocb) { rreq->iocb->ki_pos += rreq->transferred; - if (rreq->iocb->ki_complete) + if (rreq->iocb->ki_complete) { + trace_netfs_rreq(rreq, netfs_rreq_trace_ki_complete); rreq->iocb->ki_complete( rreq->iocb, rreq->error ? rreq->error : rreq->transferred); + } } if (rreq->netfs_ops->done) rreq->netfs_ops->done(rreq); @@ -379,9 +383,11 @@ static void netfs_rreq_assess_single(struct netfs_io_request *rreq) if (rreq->iocb) { rreq->iocb->ki_pos += rreq->transferred; - if (rreq->iocb->ki_complete) + if (rreq->iocb->ki_complete) { + trace_netfs_rreq(rreq, netfs_rreq_trace_ki_complete); rreq->iocb->ki_complete( rreq->iocb, rreq->error ? rreq->error : rreq->transferred); + } } if (rreq->netfs_ops->done) rreq->netfs_ops->done(rreq); @@ -445,7 +451,7 @@ void netfs_read_collection_worker(struct work_struct *work) struct netfs_io_request *rreq = container_of(work, struct netfs_io_request, work); netfs_see_request(rreq, netfs_rreq_trace_see_work); - if (test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags)) { + if (netfs_check_rreq_in_progress(rreq)) { if (netfs_read_collection(rreq)) /* Drop the ref from the IN_PROGRESS flag. */ netfs_put_request(rreq, netfs_rreq_trace_put_work_ip); diff --git a/fs/netfs/write_collect.c b/fs/netfs/write_collect.c index e2b102ffb768..0f3a36852a4d 100644 --- a/fs/netfs/write_collect.c +++ b/fs/netfs/write_collect.c @@ -240,7 +240,7 @@ reassess_streams: } /* Stall if the front is still undergoing I/O. */ - if (test_bit(NETFS_SREQ_IN_PROGRESS, &front->flags)) { + if (netfs_check_subreq_in_progress(front)) { notes |= HIT_PENDING; break; } @@ -393,8 +393,10 @@ bool netfs_write_collection(struct netfs_io_request *wreq) ictx->ops->invalidate_cache(wreq); } - if (wreq->cleanup) - wreq->cleanup(wreq); + if ((wreq->origin == NETFS_UNBUFFERED_WRITE || + wreq->origin == NETFS_DIO_WRITE) && + !wreq->error) + netfs_update_i_size(ictx, &ictx->inode, wreq->start, wreq->transferred); if (wreq->origin == NETFS_DIO_WRITE && wreq->mapping->nrpages) { @@ -419,9 +421,11 @@ bool netfs_write_collection(struct netfs_io_request *wreq) if (wreq->iocb) { size_t written = min(wreq->transferred, wreq->len); wreq->iocb->ki_pos += written; - if (wreq->iocb->ki_complete) + if (wreq->iocb->ki_complete) { + trace_netfs_rreq(wreq, netfs_rreq_trace_ki_complete); wreq->iocb->ki_complete( wreq->iocb, wreq->error ? wreq->error : written); + } wreq->iocb = VFS_PTR_POISON; } @@ -434,7 +438,7 @@ void netfs_write_collection_worker(struct work_struct *work) struct netfs_io_request *rreq = container_of(work, struct netfs_io_request, work); netfs_see_request(rreq, netfs_rreq_trace_see_work); - if (test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags)) { + if (netfs_check_rreq_in_progress(rreq)) { if (netfs_write_collection(rreq)) /* Drop the ref from the IN_PROGRESS flag. */ netfs_put_request(rreq, netfs_rreq_trace_put_work_ip); diff --git a/fs/netfs/write_retry.c b/fs/netfs/write_retry.c index 9d1d8a8bab72..fc9c3e0d34d8 100644 --- a/fs/netfs/write_retry.c +++ b/fs/netfs/write_retry.c @@ -146,14 +146,13 @@ static void netfs_retry_write_stream(struct netfs_io_request *wreq, subreq = netfs_alloc_subrequest(wreq); subreq->source = to->source; subreq->start = start; - subreq->debug_index = atomic_inc_return(&wreq->subreq_counter); subreq->stream_nr = to->stream_nr; subreq->retry_count = 1; trace_netfs_sreq_ref(wreq->debug_id, subreq->debug_index, refcount_read(&subreq->ref), netfs_sreq_trace_new); - netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit); + trace_netfs_sreq(subreq, netfs_sreq_trace_split); list_add(&subreq->rreq_link, &to->rreq_link); to = list_next_entry(to, rreq_link); diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index df4807460596..4bea008dbebd 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1105,6 +1105,7 @@ static void ff_layout_reset_read(struct nfs_pgio_header *hdr) } static int ff_layout_async_handle_error_v4(struct rpc_task *task, + u32 op_status, struct nfs4_state *state, struct nfs_client *clp, struct pnfs_layout_segment *lseg, @@ -1115,34 +1116,42 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task, struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx); struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table; - switch (task->tk_status) { - case -NFS4ERR_BADSESSION: - case -NFS4ERR_BADSLOT: - case -NFS4ERR_BAD_HIGH_SLOT: - case -NFS4ERR_DEADSESSION: - case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: - case -NFS4ERR_SEQ_FALSE_RETRY: - case -NFS4ERR_SEQ_MISORDERED: + switch (op_status) { + case NFS4_OK: + case NFS4ERR_NXIO: + break; + case NFSERR_PERM: + if (!task->tk_xprt) + break; + xprt_force_disconnect(task->tk_xprt); + goto out_retry; + case NFS4ERR_BADSESSION: + case NFS4ERR_BADSLOT: + case NFS4ERR_BAD_HIGH_SLOT: + case NFS4ERR_DEADSESSION: + case NFS4ERR_CONN_NOT_BOUND_TO_SESSION: + case NFS4ERR_SEQ_FALSE_RETRY: + case NFS4ERR_SEQ_MISORDERED: dprintk("%s ERROR %d, Reset session. Exchangeid " "flags 0x%x\n", __func__, task->tk_status, clp->cl_exchange_flags); nfs4_schedule_session_recovery(clp->cl_session, task->tk_status); - break; - case -NFS4ERR_DELAY: + goto out_retry; + case NFS4ERR_DELAY: nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY); fallthrough; - case -NFS4ERR_GRACE: + case NFS4ERR_GRACE: rpc_delay(task, FF_LAYOUT_POLL_RETRY_MAX); - break; - case -NFS4ERR_RETRY_UNCACHED_REP: - break; + goto out_retry; + case NFS4ERR_RETRY_UNCACHED_REP: + goto out_retry; /* Invalidate Layout errors */ - case -NFS4ERR_PNFS_NO_LAYOUT: - case -ESTALE: /* mapped NFS4ERR_STALE */ - case -EBADHANDLE: /* mapped NFS4ERR_BADHANDLE */ - case -EISDIR: /* mapped NFS4ERR_ISDIR */ - case -NFS4ERR_FHEXPIRED: - case -NFS4ERR_WRONG_TYPE: + case NFS4ERR_PNFS_NO_LAYOUT: + case NFS4ERR_STALE: + case NFS4ERR_BADHANDLE: + case NFS4ERR_ISDIR: + case NFS4ERR_FHEXPIRED: + case NFS4ERR_WRONG_TYPE: dprintk("%s Invalid layout error %d\n", __func__, task->tk_status); /* @@ -1155,6 +1164,11 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task, pnfs_destroy_layout(NFS_I(inode)); rpc_wake_up(&tbl->slot_tbl_waitq); goto reset; + default: + break; + } + + switch (task->tk_status) { /* RPC connection errors */ case -ENETDOWN: case -ENETUNREACH: @@ -1174,27 +1188,56 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task, nfs4_delete_deviceid(devid->ld, devid->nfs_client, &devid->deviceid); rpc_wake_up(&tbl->slot_tbl_waitq); - fallthrough; + break; default: - if (ff_layout_avoid_mds_available_ds(lseg)) - return -NFS4ERR_RESET_TO_PNFS; -reset: - dprintk("%s Retry through MDS. Error %d\n", __func__, - task->tk_status); - return -NFS4ERR_RESET_TO_MDS; + break; } + + if (ff_layout_avoid_mds_available_ds(lseg)) + return -NFS4ERR_RESET_TO_PNFS; +reset: + dprintk("%s Retry through MDS. Error %d\n", __func__, + task->tk_status); + return -NFS4ERR_RESET_TO_MDS; + +out_retry: task->tk_status = 0; return -EAGAIN; } /* Retry all errors through either pNFS or MDS except for -EJUKEBOX */ static int ff_layout_async_handle_error_v3(struct rpc_task *task, + u32 op_status, struct nfs_client *clp, struct pnfs_layout_segment *lseg, u32 idx) { struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx); + switch (op_status) { + case NFS_OK: + case NFSERR_NXIO: + break; + case NFSERR_PERM: + if (!task->tk_xprt) + break; + xprt_force_disconnect(task->tk_xprt); + goto out_retry; + case NFSERR_ACCES: + case NFSERR_BADHANDLE: + case NFSERR_FBIG: + case NFSERR_IO: + case NFSERR_NOSPC: + case NFSERR_ROFS: + case NFSERR_STALE: + goto out_reset_to_pnfs; + case NFSERR_JUKEBOX: + nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY); + goto out_retry; + default: + break; + } + switch (task->tk_status) { /* File access problems. Don't mark the device as unavailable */ case -EACCES: @@ -1218,6 +1261,7 @@ static int ff_layout_async_handle_error_v3(struct rpc_task *task, nfs4_delete_deviceid(devid->ld, devid->nfs_client, &devid->deviceid); } +out_reset_to_pnfs: /* FIXME: Need to prevent infinite looping here. */ return -NFS4ERR_RESET_TO_PNFS; out_retry: @@ -1228,6 +1272,7 @@ out_retry: } static int ff_layout_async_handle_error(struct rpc_task *task, + u32 op_status, struct nfs4_state *state, struct nfs_client *clp, struct pnfs_layout_segment *lseg, @@ -1246,10 +1291,11 @@ static int ff_layout_async_handle_error(struct rpc_task *task, switch (vers) { case 3: - return ff_layout_async_handle_error_v3(task, clp, lseg, idx); - case 4: - return ff_layout_async_handle_error_v4(task, state, clp, + return ff_layout_async_handle_error_v3(task, op_status, clp, lseg, idx); + case 4: + return ff_layout_async_handle_error_v4(task, op_status, state, + clp, lseg, idx); default: /* should never happen */ WARN_ON_ONCE(1); @@ -1302,6 +1348,7 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg, switch (status) { case NFS4ERR_DELAY: case NFS4ERR_GRACE: + case NFS4ERR_PERM: break; case NFS4ERR_NXIO: ff_layout_mark_ds_unreachable(lseg, idx); @@ -1334,7 +1381,8 @@ static int ff_layout_read_done_cb(struct rpc_task *task, trace_ff_layout_read_error(hdr, task->tk_status); } - err = ff_layout_async_handle_error(task, hdr->args.context->state, + err = ff_layout_async_handle_error(task, hdr->res.op_status, + hdr->args.context->state, hdr->ds_clp, hdr->lseg, hdr->pgio_mirror_idx); @@ -1507,7 +1555,8 @@ static int ff_layout_write_done_cb(struct rpc_task *task, trace_ff_layout_write_error(hdr, task->tk_status); } - err = ff_layout_async_handle_error(task, hdr->args.context->state, + err = ff_layout_async_handle_error(task, hdr->res.op_status, + hdr->args.context->state, hdr->ds_clp, hdr->lseg, hdr->pgio_mirror_idx); @@ -1556,8 +1605,9 @@ static int ff_layout_commit_done_cb(struct rpc_task *task, trace_ff_layout_commit_error(data, task->tk_status); } - err = ff_layout_async_handle_error(task, NULL, data->ds_clp, - data->lseg, data->ds_commit_index); + err = ff_layout_async_handle_error(task, data->res.op_status, + NULL, data->ds_clp, data->lseg, + data->ds_commit_index); trace_nfs4_pnfs_commit_ds(data, err); switch (err) { diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 8ab7868807a7..a2fa6bc4d74e 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -2589,15 +2589,26 @@ EXPORT_SYMBOL_GPL(nfs_net_id); static int nfs_net_init(struct net *net) { struct nfs_net *nn = net_generic(net, nfs_net_id); + int err; nfs_clients_init(net); if (!rpc_proc_register(net, &nn->rpcstats)) { - nfs_clients_exit(net); - return -ENOMEM; + err = -ENOMEM; + goto err_proc_rpc; } - return nfs_fs_proc_net_init(net); + err = nfs_fs_proc_net_init(net); + if (err) + goto err_proc_nfs; + + return 0; + +err_proc_nfs: + rpc_proc_unregister(net, "nfs"); +err_proc_rpc: + nfs_clients_exit(net); + return err; } static void nfs_net_exit(struct net *net) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 3adb7d0dbec7..1a7ec68bde15 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2059,8 +2059,10 @@ static void nfs_layoutget_begin(struct pnfs_layout_hdr *lo) static void nfs_layoutget_end(struct pnfs_layout_hdr *lo) { if (atomic_dec_and_test(&lo->plh_outstanding) && - test_and_clear_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags)) + test_and_clear_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags)) { + smp_mb__after_atomic(); wake_up_bit(&lo->plh_flags, NFS_LAYOUT_DRAIN); + } } static bool pnfs_is_first_layoutget(struct pnfs_layout_hdr *lo) diff --git a/fs/pnode.h b/fs/pnode.h index 34b6247af01d..2d026fb98b18 100644 --- a/fs/pnode.h +++ b/fs/pnode.h @@ -28,8 +28,6 @@ #define CL_SHARED_TO_SLAVE 0x20 #define CL_COPY_MNT_NS_FILE 0x40 -#define CL_COPY_ALL (CL_COPY_UNBINDABLE | CL_COPY_MNT_NS_FILE) - static inline void set_mnt_shared(struct mount *mnt) { mnt->mnt.mnt_flags &= ~MNT_SHARED_MASK; diff --git a/fs/proc/inode.c b/fs/proc/inode.c index a3eb3b740f76..3604b616311c 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -42,7 +42,7 @@ static void proc_evict_inode(struct inode *inode) head = ei->sysctl; if (head) { - RCU_INIT_POINTER(ei->sysctl, NULL); + WRITE_ONCE(ei->sysctl, NULL); proc_sys_evict_inode(inode, head); } } diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index cc9d74a06ff0..08b78150cdde 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -918,17 +918,21 @@ static int proc_sys_compare(const struct dentry *dentry, struct ctl_table_header *head; struct inode *inode; - /* Although proc doesn't have negative dentries, rcu-walk means - * that inode here can be NULL */ - /* AV: can it, indeed? */ - inode = d_inode_rcu(dentry); - if (!inode) - return 1; if (name->len != len) return 1; if (memcmp(name->name, str, len)) return 1; - head = rcu_dereference(PROC_I(inode)->sysctl); + + // false positive is fine here - we'll recheck anyway + if (d_in_lookup(dentry)) + return 0; + + inode = d_inode_rcu(dentry); + // we just might have run into dentry in the middle of __dentry_kill() + if (!inode) + return 1; + + head = READ_ONCE(PROC_I(inode)->sysctl); return !head || !sysctl_is_seen(head); } diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 27972c0749e7..751479eb128f 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -36,9 +36,9 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) unsigned long text, lib, swap, anon, file, shmem; unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss; - anon = get_mm_counter(mm, MM_ANONPAGES); - file = get_mm_counter(mm, MM_FILEPAGES); - shmem = get_mm_counter(mm, MM_SHMEMPAGES); + anon = get_mm_counter_sum(mm, MM_ANONPAGES); + file = get_mm_counter_sum(mm, MM_FILEPAGES); + shmem = get_mm_counter_sum(mm, MM_SHMEMPAGES); /* * Note: to minimize their overhead, mm maintains hiwater_vm and @@ -59,7 +59,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) text = min(text, mm->exec_vm << PAGE_SHIFT); lib = (mm->exec_vm << PAGE_SHIFT) - text; - swap = get_mm_counter(mm, MM_SWAPENTS); + swap = get_mm_counter_sum(mm, MM_SWAPENTS); SEQ_PUT_DEC("VmPeak:\t", hiwater_vm); SEQ_PUT_DEC(" kB\nVmSize:\t", total_vm); SEQ_PUT_DEC(" kB\nVmLck:\t", mm->locked_vm); @@ -92,12 +92,12 @@ unsigned long task_statm(struct mm_struct *mm, unsigned long *shared, unsigned long *text, unsigned long *data, unsigned long *resident) { - *shared = get_mm_counter(mm, MM_FILEPAGES) + - get_mm_counter(mm, MM_SHMEMPAGES); + *shared = get_mm_counter_sum(mm, MM_FILEPAGES) + + get_mm_counter_sum(mm, MM_SHMEMPAGES); *text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> PAGE_SHIFT; *data = mm->data_vm + mm->stack_vm; - *resident = *shared + get_mm_counter(mm, MM_ANONPAGES); + *resident = *shared + get_mm_counter_sum(mm, MM_ANONPAGES); return mm->total_vm; } @@ -2182,7 +2182,7 @@ static unsigned long pagemap_thp_category(struct pagemap_scan_private *p, categories |= PAGE_IS_FILE; } - if (is_zero_pfn(pmd_pfn(pmd))) + if (is_huge_zero_pmd(pmd)) categories |= PAGE_IS_PFNZERO; if (pmd_soft_dirty(pmd)) categories |= PAGE_IS_SOFT_DIRTY; diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 45e94e18f4d5..89160bc34d35 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -709,6 +709,7 @@ inc_rfc1001_len(void *buf, int count) struct TCP_Server_Info { struct list_head tcp_ses_list; struct list_head smb_ses_list; + struct list_head rlist; /* reconnect list */ spinlock_t srv_lock; /* protect anything here that is not protected */ __u64 conn_id; /* connection identifier (useful for debugging) */ int srv_count; /* reference counter */ @@ -776,6 +777,7 @@ struct TCP_Server_Info { __le32 session_key_id; /* retrieved from negotiate response and send in session setup request */ struct session_key session_key; unsigned long lstrp; /* when we got last response from this server */ + unsigned long neg_start; /* when negotiate started (jiffies) */ struct cifs_secmech secmech; /* crypto sec mech functs, descriptors */ #define CIFS_NEGFLAVOR_UNENCAP 1 /* wct == 17, but no ext_sec */ #define CIFS_NEGFLAVOR_EXTENDED 2 /* wct == 17, ext_sec bit set */ @@ -1302,6 +1304,7 @@ struct cifs_tcon { bool use_persistent:1; /* use persistent instead of durable handles */ bool no_lease:1; /* Do not request leases on files or directories */ bool use_witness:1; /* use witness protocol */ + bool dummy:1; /* dummy tcon used for reconnecting channels */ __le32 capabilities; __u32 share_flags; __u32 maximal_access; diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index 66093fa78aed..045227ed4efc 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -136,6 +136,7 @@ extern int SendReceiveBlockingLock(const unsigned int xid, struct smb_hdr *out_buf, int *bytes_returned); +void smb2_query_server_interfaces(struct work_struct *work); void cifs_signal_cifsd_for_reconnect(struct TCP_Server_Info *server, bool all_channels); diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c index 7216fcec79e8..75142f49d65d 100644 --- a/fs/smb/client/cifssmb.c +++ b/fs/smb/client/cifssmb.c @@ -1334,7 +1334,12 @@ cifs_readv_callback(struct mid_q_entry *mid) cifs_stats_bytes_read(tcon, rdata->got_bytes); break; case MID_REQUEST_SUBMITTED: + trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_req_submitted); + goto do_retry; case MID_RETRY_NEEDED: + trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_retry_needed); +do_retry: + __set_bit(NETFS_SREQ_NEED_RETRY, &rdata->subreq.flags); rdata->result = -EAGAIN; if (server->sign && rdata->got_bytes) /* reset bytes number since we can not check a sign */ @@ -1343,8 +1348,14 @@ cifs_readv_callback(struct mid_q_entry *mid) task_io_account_read(rdata->got_bytes); cifs_stats_bytes_read(tcon, rdata->got_bytes); break; + case MID_RESPONSE_MALFORMED: + trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_malformed); + rdata->result = -EIO; + break; default: + trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_unknown); rdata->result = -EIO; + break; } if (rdata->result == -ENODATA) { @@ -1713,10 +1724,21 @@ cifs_writev_callback(struct mid_q_entry *mid) } break; case MID_REQUEST_SUBMITTED: + trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_req_submitted); + __set_bit(NETFS_SREQ_NEED_RETRY, &wdata->subreq.flags); + result = -EAGAIN; + break; case MID_RETRY_NEEDED: + trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_retry_needed); + __set_bit(NETFS_SREQ_NEED_RETRY, &wdata->subreq.flags); result = -EAGAIN; break; + case MID_RESPONSE_MALFORMED: + trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_malformed); + result = -EIO; + break; default: + trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_unknown); result = -EIO; break; } diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index c48869c29e15..205f547ca49e 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -97,7 +97,7 @@ static int reconn_set_ipaddr_from_hostname(struct TCP_Server_Info *server) return rc; } -static void smb2_query_server_interfaces(struct work_struct *work) +void smb2_query_server_interfaces(struct work_struct *work) { int rc; int xid; @@ -124,6 +124,14 @@ static void smb2_query_server_interfaces(struct work_struct *work) (SMB_INTERFACE_POLL_INTERVAL * HZ)); } +#define set_need_reco(server) \ +do { \ + spin_lock(&server->srv_lock); \ + if (server->tcpStatus != CifsExiting) \ + server->tcpStatus = CifsNeedReconnect; \ + spin_unlock(&server->srv_lock); \ +} while (0) + /* * Update the tcpStatus for the server. * This is used to signal the cifsd thread to call cifs_reconnect @@ -137,39 +145,45 @@ void cifs_signal_cifsd_for_reconnect(struct TCP_Server_Info *server, bool all_channels) { - struct TCP_Server_Info *pserver; + struct TCP_Server_Info *nserver; struct cifs_ses *ses; + LIST_HEAD(reco); int i; - /* If server is a channel, select the primary channel */ - pserver = SERVER_IS_CHAN(server) ? server->primary_server : server; - /* if we need to signal just this channel */ if (!all_channels) { - spin_lock(&server->srv_lock); - if (server->tcpStatus != CifsExiting) - server->tcpStatus = CifsNeedReconnect; - spin_unlock(&server->srv_lock); + set_need_reco(server); return; } - spin_lock(&cifs_tcp_ses_lock); - list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { - if (cifs_ses_exiting(ses)) - continue; - spin_lock(&ses->chan_lock); - for (i = 0; i < ses->chan_count; i++) { - if (!ses->chans[i].server) + if (SERVER_IS_CHAN(server)) + server = server->primary_server; + scoped_guard(spinlock, &cifs_tcp_ses_lock) { + set_need_reco(server); + list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { + spin_lock(&ses->ses_lock); + if (ses->ses_status == SES_EXITING) { + spin_unlock(&ses->ses_lock); continue; - - spin_lock(&ses->chans[i].server->srv_lock); - if (ses->chans[i].server->tcpStatus != CifsExiting) - ses->chans[i].server->tcpStatus = CifsNeedReconnect; - spin_unlock(&ses->chans[i].server->srv_lock); + } + spin_lock(&ses->chan_lock); + for (i = 1; i < ses->chan_count; i++) { + nserver = ses->chans[i].server; + if (!nserver) + continue; + nserver->srv_count++; + list_add(&nserver->rlist, &reco); + } + spin_unlock(&ses->chan_lock); + spin_unlock(&ses->ses_lock); } - spin_unlock(&ses->chan_lock); } - spin_unlock(&cifs_tcp_ses_lock); + + list_for_each_entry_safe(server, nserver, &reco, rlist) { + list_del_init(&server->rlist); + set_need_reco(server); + cifs_put_tcp_session(server, 0); + } } /* @@ -665,12 +679,12 @@ server_unresponsive(struct TCP_Server_Info *server) /* * If we're in the process of mounting a share or reconnecting a session * and the server abruptly shut down (e.g. socket wasn't closed, packet - * had been ACK'ed but no SMB response), don't wait longer than 20s to - * negotiate protocol. + * had been ACK'ed but no SMB response), don't wait longer than 20s from + * when negotiate actually started. */ spin_lock(&server->srv_lock); if (server->tcpStatus == CifsInNegotiate && - time_after(jiffies, server->lstrp + 20 * HZ)) { + time_after(jiffies, server->neg_start + 20 * HZ)) { spin_unlock(&server->srv_lock); cifs_reconnect(server, false); return true; @@ -2866,20 +2880,14 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx) tcon->max_cached_dirs = ctx->max_cached_dirs; tcon->nodelete = ctx->nodelete; tcon->local_lease = ctx->local_lease; - INIT_LIST_HEAD(&tcon->pending_opens); tcon->status = TID_GOOD; - INIT_DELAYED_WORK(&tcon->query_interfaces, - smb2_query_server_interfaces); if (ses->server->dialect >= SMB30_PROT_ID && (ses->server->capabilities & SMB2_GLOBAL_CAP_MULTI_CHANNEL)) { /* schedule query interfaces poll */ queue_delayed_work(cifsiod_wq, &tcon->query_interfaces, (SMB_INTERFACE_POLL_INTERVAL * HZ)); } -#ifdef CONFIG_CIFS_DFS_UPCALL - INIT_DELAYED_WORK(&tcon->dfs_cache_work, dfs_cache_refresh); -#endif spin_lock(&cifs_tcp_ses_lock); list_add(&tcon->tcon_list, &ses->tcon_list); spin_unlock(&cifs_tcp_ses_lock); @@ -4201,6 +4209,7 @@ retry: server->lstrp = jiffies; server->tcpStatus = CifsInNegotiate; + server->neg_start = jiffies; spin_unlock(&server->srv_lock); rc = server->ops->negotiate(xid, ses, server); diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c index a634a34d4086..59ccc2229ab3 100644 --- a/fs/smb/client/fs_context.c +++ b/fs/smb/client/fs_context.c @@ -1824,10 +1824,14 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, cifs_errorf(fc, "symlinkroot mount options must be absolute path\n"); goto cifs_parse_mount_err; } - kfree(ctx->symlinkroot); - ctx->symlinkroot = kstrdup(param->string, GFP_KERNEL); - if (!ctx->symlinkroot) + if (strnlen(param->string, PATH_MAX) == PATH_MAX) { + cifs_errorf(fc, "symlinkroot path too long (max path length: %u)\n", + PATH_MAX - 1); goto cifs_parse_mount_err; + } + kfree(ctx->symlinkroot); + ctx->symlinkroot = param->string; + param->string = NULL; break; } /* case Opt_ignore: - is ignored as expected ... */ @@ -1837,13 +1841,6 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, goto cifs_parse_mount_err; } - /* - * By default resolve all native absolute symlinks relative to "/mnt/". - * Same default has drvfs driver running in WSL for resolving SMB shares. - */ - if (!ctx->symlinkroot) - ctx->symlinkroot = kstrdup("/mnt/", GFP_KERNEL); - return 0; cifs_parse_mount_err: diff --git a/fs/smb/client/misc.c b/fs/smb/client/misc.c index e77017f47084..da23cc12a52c 100644 --- a/fs/smb/client/misc.c +++ b/fs/smb/client/misc.c @@ -151,6 +151,12 @@ tcon_info_alloc(bool dir_leases_enabled, enum smb3_tcon_ref_trace trace) #ifdef CONFIG_CIFS_DFS_UPCALL INIT_LIST_HEAD(&ret_buf->dfs_ses_list); #endif + INIT_LIST_HEAD(&ret_buf->pending_opens); + INIT_DELAYED_WORK(&ret_buf->query_interfaces, + smb2_query_server_interfaces); +#ifdef CONFIG_CIFS_DFS_UPCALL + INIT_DELAYED_WORK(&ret_buf->dfs_cache_work, dfs_cache_refresh); +#endif return ret_buf; } diff --git a/fs/smb/client/readdir.c b/fs/smb/client/readdir.c index ba0193cf9033..4e5460206397 100644 --- a/fs/smb/client/readdir.c +++ b/fs/smb/client/readdir.c @@ -264,7 +264,7 @@ cifs_posix_to_fattr(struct cifs_fattr *fattr, struct smb2_posix_info *info, /* The Mode field in the response can now include the file type as well */ fattr->cf_mode = wire_mode_to_posix(le32_to_cpu(info->Mode), fattr->cf_cifsattrs & ATTR_DIRECTORY); - fattr->cf_dtype = S_DT(le32_to_cpu(info->Mode)); + fattr->cf_dtype = S_DT(fattr->cf_mode); switch (fattr->cf_mode & S_IFMT) { case S_IFLNK: diff --git a/fs/smb/client/reparse.c b/fs/smb/client/reparse.c index 511611206dab..5fa29a97ac15 100644 --- a/fs/smb/client/reparse.c +++ b/fs/smb/client/reparse.c @@ -57,6 +57,7 @@ static int create_native_symlink(const unsigned int xid, struct inode *inode, struct reparse_symlink_data_buffer *buf = NULL; struct cifs_open_info_data data = {}; struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + const char *symroot = cifs_sb->ctx->symlinkroot; struct inode *new; struct kvec iov; __le16 *path = NULL; @@ -82,7 +83,8 @@ static int create_native_symlink(const unsigned int xid, struct inode *inode, .symlink_target = symlink_target, }; - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) && symname[0] == '/') { + if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) && + symroot && symname[0] == '/') { /* * This is a request to create an absolute symlink on the server * which does not support POSIX paths, and expects symlink in @@ -92,7 +94,7 @@ static int create_native_symlink(const unsigned int xid, struct inode *inode, * ensure compatibility of this symlink stored in absolute form * on the SMB server. */ - if (!strstarts(symname, cifs_sb->ctx->symlinkroot)) { + if (!strstarts(symname, symroot)) { /* * If the absolute Linux symlink target path is not * inside "symlinkroot" location then there is no way @@ -101,12 +103,12 @@ static int create_native_symlink(const unsigned int xid, struct inode *inode, cifs_dbg(VFS, "absolute symlink '%s' cannot be converted to NT format " "because it is outside of symlinkroot='%s'\n", - symname, cifs_sb->ctx->symlinkroot); + symname, symroot); rc = -EINVAL; goto out; } - len = strlen(cifs_sb->ctx->symlinkroot); - if (cifs_sb->ctx->symlinkroot[len-1] != '/') + len = strlen(symroot); + if (symroot[len - 1] != '/') len++; if (symname[len] >= 'a' && symname[len] <= 'z' && (symname[len+1] == '/' || symname[len+1] == '\0')) { @@ -782,6 +784,7 @@ int smb2_parse_native_symlink(char **target, const char *buf, unsigned int len, const char *full_path, struct cifs_sb_info *cifs_sb) { + const char *symroot = cifs_sb->ctx->symlinkroot; char sep = CIFS_DIR_SEP(cifs_sb); char *linux_target = NULL; char *smb_target = NULL; @@ -815,7 +818,8 @@ int smb2_parse_native_symlink(char **target, const char *buf, unsigned int len, goto out; } - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) && !relative) { + if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) && + symroot && !relative) { /* * This is an absolute symlink from the server which does not * support POSIX paths, so the symlink is in NT-style path. @@ -875,15 +879,8 @@ globalroot: abs_path += sizeof("\\DosDevices\\")-1; else if (strstarts(abs_path, "\\GLOBAL??\\")) abs_path += sizeof("\\GLOBAL??\\")-1; - else { - /* Unhandled absolute symlink, points outside of DOS/Win32 */ - cifs_dbg(VFS, - "absolute symlink '%s' cannot be converted from NT format " - "because points to unknown target\n", - smb_target); - rc = -EIO; - goto out; - } + else + goto out_unhandled_target; /* Sometimes path separator after \?? is double backslash */ if (abs_path[0] == '\\') @@ -910,25 +907,19 @@ globalroot: abs_path++; abs_path[0] = drive_letter; } else { - /* Unhandled absolute symlink. Report an error. */ - cifs_dbg(VFS, - "absolute symlink '%s' cannot be converted from NT format " - "because points to unknown target\n", - smb_target); - rc = -EIO; - goto out; + goto out_unhandled_target; } abs_path_len = strlen(abs_path)+1; - symlinkroot_len = strlen(cifs_sb->ctx->symlinkroot); - if (cifs_sb->ctx->symlinkroot[symlinkroot_len-1] == '/') + symlinkroot_len = strlen(symroot); + if (symroot[symlinkroot_len - 1] == '/') symlinkroot_len--; linux_target = kmalloc(symlinkroot_len + 1 + abs_path_len, GFP_KERNEL); if (!linux_target) { rc = -ENOMEM; goto out; } - memcpy(linux_target, cifs_sb->ctx->symlinkroot, symlinkroot_len); + memcpy(linux_target, symroot, symlinkroot_len); linux_target[symlinkroot_len] = '/'; memcpy(linux_target + symlinkroot_len + 1, abs_path, abs_path_len); } else if (smb_target[0] == sep && relative) { @@ -966,6 +957,7 @@ globalroot: * These paths have same format as Linux symlinks, so no * conversion is needed. */ +out_unhandled_target: linux_target = smb_target; smb_target = NULL; } diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index a717be1626a3..2df93a75e3b8 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -424,9 +424,9 @@ skip_sess_setup: free_xid(xid); ses->flags &= ~CIFS_SES_FLAGS_PENDING_QUERY_INTERFACES; - /* regardless of rc value, setup polling */ - queue_delayed_work(cifsiod_wq, &tcon->query_interfaces, - (SMB_INTERFACE_POLL_INTERVAL * HZ)); + if (!tcon->ipc && !tcon->dummy) + queue_delayed_work(cifsiod_wq, &tcon->query_interfaces, + (SMB_INTERFACE_POLL_INTERVAL * HZ)); mutex_unlock(&ses->session_mutex); @@ -4229,10 +4229,8 @@ void smb2_reconnect_server(struct work_struct *work) } goto done; } - tcon->status = TID_GOOD; - tcon->retry = false; - tcon->need_reconnect = false; + tcon->dummy = true; /* now reconnect sessions for necessary channels */ list_for_each_entry_safe(ses, ses2, &tmp_ses_list, rlist) { @@ -4567,7 +4565,11 @@ smb2_readv_callback(struct mid_q_entry *mid) cifs_stats_bytes_read(tcon, rdata->got_bytes); break; case MID_REQUEST_SUBMITTED: + trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_req_submitted); + goto do_retry; case MID_RETRY_NEEDED: + trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_retry_needed); +do_retry: __set_bit(NETFS_SREQ_NEED_RETRY, &rdata->subreq.flags); rdata->result = -EAGAIN; if (server->sign && rdata->got_bytes) @@ -4578,11 +4580,15 @@ smb2_readv_callback(struct mid_q_entry *mid) cifs_stats_bytes_read(tcon, rdata->got_bytes); break; case MID_RESPONSE_MALFORMED: + trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_malformed); credits.value = le16_to_cpu(shdr->CreditRequest); credits.instance = server->reconnect_instance; - fallthrough; + rdata->result = -EIO; + break; default: + trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_unknown); rdata->result = -EIO; + break; } #ifdef CONFIG_CIFS_SMB_DIRECT /* @@ -4835,11 +4841,14 @@ smb2_writev_callback(struct mid_q_entry *mid) switch (mid->mid_state) { case MID_RESPONSE_RECEIVED: + trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_progress); credits.value = le16_to_cpu(rsp->hdr.CreditRequest); credits.instance = server->reconnect_instance; result = smb2_check_receive(mid, server, 0); - if (result != 0) + if (result != 0) { + trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_bad); break; + } written = le32_to_cpu(rsp->DataLength); /* @@ -4861,14 +4870,23 @@ smb2_writev_callback(struct mid_q_entry *mid) } break; case MID_REQUEST_SUBMITTED: + trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_req_submitted); + __set_bit(NETFS_SREQ_NEED_RETRY, &wdata->subreq.flags); + result = -EAGAIN; + break; case MID_RETRY_NEEDED: + trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_retry_needed); + __set_bit(NETFS_SREQ_NEED_RETRY, &wdata->subreq.flags); result = -EAGAIN; break; case MID_RESPONSE_MALFORMED: + trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_malformed); credits.value = le16_to_cpu(rsp->hdr.CreditRequest); credits.instance = server->reconnect_instance; - fallthrough; + result = -EIO; + break; default: + trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_unknown); result = -EIO; break; } @@ -4908,7 +4926,6 @@ smb2_writev_callback(struct mid_q_entry *mid) server->credits, server->in_flight, 0, cifs_trace_rw_credits_write_response_clear); wdata->credits.value = 0; - trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_progress); cifs_write_subrequest_terminated(wdata, result ?: written); release_mid(mid); trace_smb3_rw_credits(rreq_debug_id, subreq_debug_index, 0, diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index cbc85bca006f..754e94a0e07f 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -907,8 +907,10 @@ wait_send_queue: .local_dma_lkey = sc->ib.pd->local_dma_lkey, .direction = DMA_TO_DEVICE, }; + size_t payload_len = umin(*_remaining_data_length, + sp->max_send_size - sizeof(*packet)); - rc = smb_extract_iter_to_rdma(iter, *_remaining_data_length, + rc = smb_extract_iter_to_rdma(iter, payload_len, &extract); if (rc < 0) goto err_dma; @@ -1013,6 +1015,27 @@ static int smbd_post_send_empty(struct smbd_connection *info) return smbd_post_send_iter(info, NULL, &remaining_data_length); } +static int smbd_post_send_full_iter(struct smbd_connection *info, + struct iov_iter *iter, + int *_remaining_data_length) +{ + int rc = 0; + + /* + * smbd_post_send_iter() respects the + * negotiated max_send_size, so we need to + * loop until the full iter is posted + */ + + while (iov_iter_count(iter) > 0) { + rc = smbd_post_send_iter(info, iter, _remaining_data_length); + if (rc < 0) + break; + } + + return rc; +} + /* * Post a receive request to the transport * The remote peer can only send data when a receive request is posted @@ -1452,6 +1475,9 @@ static int allocate_caches_and_workqueue(struct smbd_connection *info) char name[MAX_NAME_LEN]; int rc; + if (WARN_ON_ONCE(sp->max_recv_size < sizeof(struct smbdirect_data_transfer))) + return -ENOMEM; + scnprintf(name, MAX_NAME_LEN, "smbd_request_%p", info); info->request_cache = kmem_cache_create( @@ -1469,12 +1495,17 @@ static int allocate_caches_and_workqueue(struct smbd_connection *info) goto out1; scnprintf(name, MAX_NAME_LEN, "smbd_response_%p", info); + + struct kmem_cache_args response_args = { + .align = __alignof__(struct smbd_response), + .useroffset = (offsetof(struct smbd_response, packet) + + sizeof(struct smbdirect_data_transfer)), + .usersize = sp->max_recv_size - sizeof(struct smbdirect_data_transfer), + }; info->response_cache = - kmem_cache_create( - name, - sizeof(struct smbd_response) + - sp->max_recv_size, - 0, SLAB_HWCACHE_ALIGN, NULL); + kmem_cache_create(name, + sizeof(struct smbd_response) + sp->max_recv_size, + &response_args, SLAB_HWCACHE_ALIGN); if (!info->response_cache) goto out2; @@ -1747,35 +1778,39 @@ try_again: } /* - * Receive data from receive reassembly queue + * Receive data from the transport's receive reassembly queue * All the incoming data packets are placed in reassembly queue - * buf: the buffer to read data into + * iter: the buffer to read data into * size: the length of data to read * return value: actual data read - * Note: this implementation copies the data from reassebmly queue to receive + * + * Note: this implementation copies the data from reassembly queue to receive * buffers used by upper layer. This is not the optimal code path. A better way * to do it is to not have upper layer allocate its receive buffers but rather * borrow the buffer from reassembly queue, and return it after data is * consumed. But this will require more changes to upper layer code, and also * need to consider packet boundaries while they still being reassembled. */ -static int smbd_recv_buf(struct smbd_connection *info, char *buf, - unsigned int size) +int smbd_recv(struct smbd_connection *info, struct msghdr *msg) { struct smbdirect_socket *sc = &info->socket; struct smbd_response *response; struct smbdirect_data_transfer *data_transfer; + size_t size = iov_iter_count(&msg->msg_iter); int to_copy, to_read, data_read, offset; u32 data_length, remaining_data_length, data_offset; int rc; + if (WARN_ON_ONCE(iov_iter_rw(&msg->msg_iter) == WRITE)) + return -EINVAL; /* It's a bug in upper layer to get there */ + again: /* * No need to hold the reassembly queue lock all the time as we are * the only one reading from the front of the queue. The transport * may add more entries to the back of the queue at the same time */ - log_read(INFO, "size=%d info->reassembly_data_length=%d\n", size, + log_read(INFO, "size=%zd info->reassembly_data_length=%d\n", size, info->reassembly_data_length); if (info->reassembly_data_length >= size) { int queue_length; @@ -1813,7 +1848,10 @@ again: if (response->first_segment && size == 4) { unsigned int rfc1002_len = data_length + remaining_data_length; - *((__be32 *)buf) = cpu_to_be32(rfc1002_len); + __be32 rfc1002_hdr = cpu_to_be32(rfc1002_len); + if (copy_to_iter(&rfc1002_hdr, sizeof(rfc1002_hdr), + &msg->msg_iter) != sizeof(rfc1002_hdr)) + return -EFAULT; data_read = 4; response->first_segment = false; log_read(INFO, "returning rfc1002 length %d\n", @@ -1822,10 +1860,9 @@ again: } to_copy = min_t(int, data_length - offset, to_read); - memcpy( - buf + data_read, - (char *)data_transfer + data_offset + offset, - to_copy); + if (copy_to_iter((char *)data_transfer + data_offset + offset, + to_copy, &msg->msg_iter) != to_copy) + return -EFAULT; /* move on to the next buffer? */ if (to_copy == data_length - offset) { @@ -1891,90 +1928,6 @@ read_rfc1002_done: } /* - * Receive a page from receive reassembly queue - * page: the page to read data into - * to_read: the length of data to read - * return value: actual data read - */ -static int smbd_recv_page(struct smbd_connection *info, - struct page *page, unsigned int page_offset, - unsigned int to_read) -{ - struct smbdirect_socket *sc = &info->socket; - int ret; - char *to_address; - void *page_address; - - /* make sure we have the page ready for read */ - ret = wait_event_interruptible( - info->wait_reassembly_queue, - info->reassembly_data_length >= to_read || - sc->status != SMBDIRECT_SOCKET_CONNECTED); - if (ret) - return ret; - - /* now we can read from reassembly queue and not sleep */ - page_address = kmap_atomic(page); - to_address = (char *) page_address + page_offset; - - log_read(INFO, "reading from page=%p address=%p to_read=%d\n", - page, to_address, to_read); - - ret = smbd_recv_buf(info, to_address, to_read); - kunmap_atomic(page_address); - - return ret; -} - -/* - * Receive data from transport - * msg: a msghdr point to the buffer, can be ITER_KVEC or ITER_BVEC - * return: total bytes read, or 0. SMB Direct will not do partial read. - */ -int smbd_recv(struct smbd_connection *info, struct msghdr *msg) -{ - char *buf; - struct page *page; - unsigned int to_read, page_offset; - int rc; - - if (iov_iter_rw(&msg->msg_iter) == WRITE) { - /* It's a bug in upper layer to get there */ - cifs_dbg(VFS, "Invalid msg iter dir %u\n", - iov_iter_rw(&msg->msg_iter)); - rc = -EINVAL; - goto out; - } - - switch (iov_iter_type(&msg->msg_iter)) { - case ITER_KVEC: - buf = msg->msg_iter.kvec->iov_base; - to_read = msg->msg_iter.kvec->iov_len; - rc = smbd_recv_buf(info, buf, to_read); - break; - - case ITER_BVEC: - page = msg->msg_iter.bvec->bv_page; - page_offset = msg->msg_iter.bvec->bv_offset; - to_read = msg->msg_iter.bvec->bv_len; - rc = smbd_recv_page(info, page, page_offset, to_read); - break; - - default: - /* It's a bug in upper layer to get there */ - cifs_dbg(VFS, "Invalid msg type %d\n", - iov_iter_type(&msg->msg_iter)); - rc = -EINVAL; - } - -out: - /* SMBDirect will read it all or nothing */ - if (rc > 0) - msg->msg_iter.count = 0; - return rc; -} - -/* * Send data to transport * Each rqst is transported as a SMBDirect payload * rqst: the data to write @@ -2032,14 +1985,14 @@ int smbd_send(struct TCP_Server_Info *server, klen += rqst->rq_iov[i].iov_len; iov_iter_kvec(&iter, ITER_SOURCE, rqst->rq_iov, rqst->rq_nvec, klen); - rc = smbd_post_send_iter(info, &iter, &remaining_data_length); + rc = smbd_post_send_full_iter(info, &iter, &remaining_data_length); if (rc < 0) break; if (iov_iter_count(&rqst->rq_iter) > 0) { /* And then the data pages if there are any */ - rc = smbd_post_send_iter(info, &rqst->rq_iter, - &remaining_data_length); + rc = smbd_post_send_full_iter(info, &rqst->rq_iter, + &remaining_data_length); if (rc < 0) break; } diff --git a/fs/smb/client/trace.h b/fs/smb/client/trace.h index 52bcb55d9952..93e5b2bb9f28 100644 --- a/fs/smb/client/trace.h +++ b/fs/smb/client/trace.h @@ -140,7 +140,7 @@ DECLARE_EVENT_CLASS(smb3_rw_err_class, __entry->len = len; __entry->rc = rc; ), - TP_printk("\tR=%08x[%x] xid=%u sid=0x%llx tid=0x%x fid=0x%llx offset=0x%llx len=0x%x rc=%d", + TP_printk("R=%08x[%x] xid=%u sid=0x%llx tid=0x%x fid=0x%llx offset=0x%llx len=0x%x rc=%d", __entry->rreq_debug_id, __entry->rreq_debug_index, __entry->xid, __entry->sesid, __entry->tid, __entry->fid, __entry->offset, __entry->len, __entry->rc) @@ -190,7 +190,7 @@ DECLARE_EVENT_CLASS(smb3_other_err_class, __entry->len = len; __entry->rc = rc; ), - TP_printk("\txid=%u sid=0x%llx tid=0x%x fid=0x%llx offset=0x%llx len=0x%x rc=%d", + TP_printk("xid=%u sid=0x%llx tid=0x%x fid=0x%llx offset=0x%llx len=0x%x rc=%d", __entry->xid, __entry->sesid, __entry->tid, __entry->fid, __entry->offset, __entry->len, __entry->rc) ) @@ -247,7 +247,7 @@ DECLARE_EVENT_CLASS(smb3_copy_range_err_class, __entry->len = len; __entry->rc = rc; ), - TP_printk("\txid=%u sid=0x%llx tid=0x%x source fid=0x%llx source offset=0x%llx target fid=0x%llx target offset=0x%llx len=0x%x rc=%d", + TP_printk("xid=%u sid=0x%llx tid=0x%x source fid=0x%llx source offset=0x%llx target fid=0x%llx target offset=0x%llx len=0x%x rc=%d", __entry->xid, __entry->sesid, __entry->tid, __entry->target_fid, __entry->src_offset, __entry->target_fid, __entry->target_offset, __entry->len, __entry->rc) ) @@ -298,7 +298,7 @@ DECLARE_EVENT_CLASS(smb3_copy_range_done_class, __entry->target_offset = target_offset; __entry->len = len; ), - TP_printk("\txid=%u sid=0x%llx tid=0x%x source fid=0x%llx source offset=0x%llx target fid=0x%llx target offset=0x%llx len=0x%x", + TP_printk("xid=%u sid=0x%llx tid=0x%x source fid=0x%llx source offset=0x%llx target fid=0x%llx target offset=0x%llx len=0x%x", __entry->xid, __entry->sesid, __entry->tid, __entry->target_fid, __entry->src_offset, __entry->target_fid, __entry->target_offset, __entry->len) ) @@ -482,7 +482,7 @@ DECLARE_EVENT_CLASS(smb3_fd_class, __entry->tid = tid; __entry->sesid = sesid; ), - TP_printk("\txid=%u sid=0x%llx tid=0x%x fid=0x%llx", + TP_printk("xid=%u sid=0x%llx tid=0x%x fid=0x%llx", __entry->xid, __entry->sesid, __entry->tid, __entry->fid) ) @@ -521,7 +521,7 @@ DECLARE_EVENT_CLASS(smb3_fd_err_class, __entry->sesid = sesid; __entry->rc = rc; ), - TP_printk("\txid=%u sid=0x%llx tid=0x%x fid=0x%llx rc=%d", + TP_printk("xid=%u sid=0x%llx tid=0x%x fid=0x%llx rc=%d", __entry->xid, __entry->sesid, __entry->tid, __entry->fid, __entry->rc) ) @@ -794,7 +794,7 @@ DECLARE_EVENT_CLASS(smb3_cmd_err_class, __entry->status = status; __entry->rc = rc; ), - TP_printk("\tsid=0x%llx tid=0x%x cmd=%u mid=%llu status=0x%x rc=%d", + TP_printk("sid=0x%llx tid=0x%x cmd=%u mid=%llu status=0x%x rc=%d", __entry->sesid, __entry->tid, __entry->cmd, __entry->mid, __entry->status, __entry->rc) ) @@ -829,7 +829,7 @@ DECLARE_EVENT_CLASS(smb3_cmd_done_class, __entry->cmd = cmd; __entry->mid = mid; ), - TP_printk("\tsid=0x%llx tid=0x%x cmd=%u mid=%llu", + TP_printk("sid=0x%llx tid=0x%x cmd=%u mid=%llu", __entry->sesid, __entry->tid, __entry->cmd, __entry->mid) ) @@ -867,7 +867,7 @@ DECLARE_EVENT_CLASS(smb3_mid_class, __entry->when_sent = when_sent; __entry->when_received = when_received; ), - TP_printk("\tcmd=%u mid=%llu pid=%u, when_sent=%lu when_rcv=%lu", + TP_printk("cmd=%u mid=%llu pid=%u, when_sent=%lu when_rcv=%lu", __entry->cmd, __entry->mid, __entry->pid, __entry->when_sent, __entry->when_received) ) @@ -898,7 +898,7 @@ DECLARE_EVENT_CLASS(smb3_exit_err_class, __assign_str(func_name); __entry->rc = rc; ), - TP_printk("\t%s: xid=%u rc=%d", + TP_printk("%s: xid=%u rc=%d", __get_str(func_name), __entry->xid, __entry->rc) ) @@ -924,7 +924,7 @@ DECLARE_EVENT_CLASS(smb3_sync_err_class, __entry->ino = ino; __entry->rc = rc; ), - TP_printk("\tino=%lu rc=%d", + TP_printk("ino=%lu rc=%d", __entry->ino, __entry->rc) ) @@ -950,7 +950,7 @@ DECLARE_EVENT_CLASS(smb3_enter_exit_class, __entry->xid = xid; __assign_str(func_name); ), - TP_printk("\t%s: xid=%u", + TP_printk("%s: xid=%u", __get_str(func_name), __entry->xid) ) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index fafa86273f12..63d17cea2e95 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -8573,11 +8573,6 @@ static void smb20_oplock_break_ack(struct ksmbd_work *work) goto err_out; } - opinfo->op_state = OPLOCK_STATE_NONE; - wake_up_interruptible_all(&opinfo->oplock_q); - opinfo_put(opinfo); - ksmbd_fd_put(work, fp); - rsp->StructureSize = cpu_to_le16(24); rsp->OplockLevel = rsp_oplevel; rsp->Reserved = 0; @@ -8585,16 +8580,15 @@ static void smb20_oplock_break_ack(struct ksmbd_work *work) rsp->VolatileFid = volatile_id; rsp->PersistentFid = persistent_id; ret = ksmbd_iov_pin_rsp(work, rsp, sizeof(struct smb2_oplock_break)); - if (!ret) - return; - + if (ret) { err_out: + smb2_set_err_rsp(work); + } + opinfo->op_state = OPLOCK_STATE_NONE; wake_up_interruptible_all(&opinfo->oplock_q); - opinfo_put(opinfo); ksmbd_fd_put(work, fp); - smb2_set_err_rsp(work); } static int check_lease_state(struct lease *lease, __le32 req_state) @@ -8724,11 +8718,6 @@ static void smb21_lease_break_ack(struct ksmbd_work *work) } lease_state = lease->state; - opinfo->op_state = OPLOCK_STATE_NONE; - wake_up_interruptible_all(&opinfo->oplock_q); - atomic_dec(&opinfo->breaking_cnt); - wake_up_interruptible_all(&opinfo->oplock_brk); - opinfo_put(opinfo); rsp->StructureSize = cpu_to_le16(36); rsp->Reserved = 0; @@ -8737,16 +8726,16 @@ static void smb21_lease_break_ack(struct ksmbd_work *work) rsp->LeaseState = lease_state; rsp->LeaseDuration = 0; ret = ksmbd_iov_pin_rsp(work, rsp, sizeof(struct smb2_lease_ack)); - if (!ret) - return; - + if (ret) { err_out: + smb2_set_err_rsp(work); + } + + opinfo->op_state = OPLOCK_STATE_NONE; wake_up_interruptible_all(&opinfo->oplock_q); atomic_dec(&opinfo->breaking_cnt); wake_up_interruptible_all(&opinfo->oplock_brk); - opinfo_put(opinfo); - smb2_set_err_rsp(work); } /** diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index 64a428a06ace..c6cbe0d56e32 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -433,7 +433,8 @@ static void free_transport(struct smb_direct_transport *t) if (t->qp) { ib_drain_qp(t->qp); ib_mr_pool_destroy(t->qp, &t->qp->rdma_mrs); - ib_destroy_qp(t->qp); + t->qp = NULL; + rdma_destroy_qp(t->cm_id); } ksmbd_debug(RDMA, "drain the reassembly queue\n"); @@ -1940,8 +1941,8 @@ static int smb_direct_create_qpair(struct smb_direct_transport *t, return 0; err: if (t->qp) { - ib_destroy_qp(t->qp); t->qp = NULL; + rdma_destroy_qp(t->cm_id); } if (t->recv_cq) { ib_destroy_cq(t->recv_cq); diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index 0f3aad12e495..d3437f6644e3 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -1282,6 +1282,7 @@ out1: err = ksmbd_vfs_lock_parent(parent_path->dentry, path->dentry); if (err) { + mnt_drop_write(parent_path->mnt); path_put(path); path_put(parent_path); } diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 7839efe050bf..000cc7f4a3ce 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -3444,16 +3444,41 @@ xfs_alloc_read_agf( set_bit(XFS_AGSTATE_AGF_INIT, &pag->pag_opstate); } + #ifdef DEBUG - else if (!xfs_is_shutdown(mp)) { - ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks)); - ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks)); - ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount)); - ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest)); - ASSERT(pag->pagf_bno_level == be32_to_cpu(agf->agf_bno_level)); - ASSERT(pag->pagf_cnt_level == be32_to_cpu(agf->agf_cnt_level)); + /* + * It's possible for the AGF to be out of sync if the block device is + * silently dropping writes. This can happen in fstests with dmflakey + * enabled, which allows the buffer to be cleaned and reclaimed by + * memory pressure and then re-read from disk here. We will get a + * stale version of the AGF from disk, and nothing good can happen from + * here. Hence if we detect this situation, immediately shut down the + * filesystem. + * + * This can also happen if we are already in the middle of a forced + * shutdown, so don't bother checking if we are already shut down. + */ + if (!xfs_is_shutdown(pag_mount(pag))) { + bool ok = true; + + ok &= pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks); + ok &= pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks); + ok &= pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks); + ok &= pag->pagf_flcount == be32_to_cpu(agf->agf_flcount); + ok &= pag->pagf_longest == be32_to_cpu(agf->agf_longest); + ok &= pag->pagf_bno_level == be32_to_cpu(agf->agf_bno_level); + ok &= pag->pagf_cnt_level == be32_to_cpu(agf->agf_cnt_level); + + if (XFS_IS_CORRUPT(pag_mount(pag), !ok)) { + xfs_ag_mark_sick(pag, XFS_SICK_AG_AGF); + xfs_trans_brelse(tp, agfbp); + xfs_force_shutdown(pag_mount(pag), + SHUTDOWN_CORRUPT_ONDISK); + return -EFSCORRUPTED; + } } -#endif +#endif /* DEBUG */ + if (agfbpp) *agfbpp = agfbp; else diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index 0c47b5c6ca7d..750111634d9f 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -2801,12 +2801,35 @@ xfs_ialloc_read_agi( set_bit(XFS_AGSTATE_AGI_INIT, &pag->pag_opstate); } +#ifdef DEBUG /* - * It's possible for these to be out of sync if - * we are in the middle of a forced shutdown. + * It's possible for the AGF to be out of sync if the block device is + * silently dropping writes. This can happen in fstests with dmflakey + * enabled, which allows the buffer to be cleaned and reclaimed by + * memory pressure and then re-read from disk here. We will get a + * stale version of the AGF from disk, and nothing good can happen from + * here. Hence if we detect this situation, immediately shut down the + * filesystem. + * + * This can also happen if we are already in the middle of a forced + * shutdown, so don't bother checking if we are already shut down. */ - ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) || - xfs_is_shutdown(pag_mount(pag))); + if (!xfs_is_shutdown(pag_mount(pag))) { + bool ok = true; + + ok &= pag->pagi_freecount == be32_to_cpu(agi->agi_freecount); + ok &= pag->pagi_count == be32_to_cpu(agi->agi_count); + + if (XFS_IS_CORRUPT(pag_mount(pag), !ok)) { + xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI); + xfs_trans_brelse(tp, agibp); + xfs_force_shutdown(pag_mount(pag), + SHUTDOWN_CORRUPT_ONDISK); + return -EFSCORRUPTED; + } + } +#endif /* DEBUG */ + if (agibpp) *agibpp = agibp; else diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 8af83bd161f9..ba5bd6031ece 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -2082,44 +2082,6 @@ xfs_buf_delwri_submit( return error; } -/* - * Push a single buffer on a delwri queue. - * - * The purpose of this function is to submit a single buffer of a delwri queue - * and return with the buffer still on the original queue. - * - * The buffer locking and queue management logic between _delwri_pushbuf() and - * _delwri_queue() guarantee that the buffer cannot be queued to another list - * before returning. - */ -int -xfs_buf_delwri_pushbuf( - struct xfs_buf *bp, - struct list_head *buffer_list) -{ - int error; - - ASSERT(bp->b_flags & _XBF_DELWRI_Q); - - trace_xfs_buf_delwri_pushbuf(bp, _RET_IP_); - - xfs_buf_lock(bp); - bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_ASYNC); - bp->b_flags |= XBF_WRITE; - xfs_buf_submit(bp); - - /* - * The buffer is now locked, under I/O but still on the original delwri - * queue. Wait for I/O completion, restore the DELWRI_Q flag and - * return with the buffer unlocked and still on the original queue. - */ - error = xfs_buf_iowait(bp); - bp->b_flags |= _XBF_DELWRI_Q; - xfs_buf_unlock(bp); - - return error; -} - void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref) { /* diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 9d2ab567cf81..15fc56948346 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -326,7 +326,6 @@ extern bool xfs_buf_delwri_queue(struct xfs_buf *, struct list_head *); void xfs_buf_delwri_queue_here(struct xfs_buf *bp, struct list_head *bl); extern int xfs_buf_delwri_submit(struct list_head *); extern int xfs_buf_delwri_submit_nowait(struct list_head *); -extern int xfs_buf_delwri_pushbuf(struct xfs_buf *, struct list_head *); static inline xfs_daddr_t xfs_buf_daddr(struct xfs_buf *bp) { diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 90139e0f3271..7fc54725c5f6 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -32,6 +32,61 @@ static inline struct xfs_buf_log_item *BUF_ITEM(struct xfs_log_item *lip) return container_of(lip, struct xfs_buf_log_item, bli_item); } +static void +xfs_buf_item_get_format( + struct xfs_buf_log_item *bip, + int count) +{ + ASSERT(bip->bli_formats == NULL); + bip->bli_format_count = count; + + if (count == 1) { + bip->bli_formats = &bip->__bli_format; + return; + } + + bip->bli_formats = kzalloc(count * sizeof(struct xfs_buf_log_format), + GFP_KERNEL | __GFP_NOFAIL); +} + +static void +xfs_buf_item_free_format( + struct xfs_buf_log_item *bip) +{ + if (bip->bli_formats != &bip->__bli_format) { + kfree(bip->bli_formats); + bip->bli_formats = NULL; + } +} + +static void +xfs_buf_item_free( + struct xfs_buf_log_item *bip) +{ + xfs_buf_item_free_format(bip); + kvfree(bip->bli_item.li_lv_shadow); + kmem_cache_free(xfs_buf_item_cache, bip); +} + +/* + * xfs_buf_item_relse() is called when the buf log item is no longer needed. + */ +static void +xfs_buf_item_relse( + struct xfs_buf_log_item *bip) +{ + struct xfs_buf *bp = bip->bli_buf; + + trace_xfs_buf_item_relse(bp, _RET_IP_); + + ASSERT(!test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags)); + ASSERT(atomic_read(&bip->bli_refcount) == 0); + + bp->b_log_item = NULL; + xfs_buf_rele(bp); + xfs_buf_item_free(bip); +} + /* Is this log iovec plausibly large enough to contain the buffer log format? */ bool xfs_buf_log_check_iovec( @@ -390,6 +445,42 @@ xfs_buf_item_pin( } /* + * For a stale BLI, process all the necessary completions that must be + * performed when the final BLI reference goes away. The buffer will be + * referenced and locked here - we return to the caller with the buffer still + * referenced and locked for them to finalise processing of the buffer. + */ +static void +xfs_buf_item_finish_stale( + struct xfs_buf_log_item *bip) +{ + struct xfs_buf *bp = bip->bli_buf; + struct xfs_log_item *lip = &bip->bli_item; + + ASSERT(bip->bli_flags & XFS_BLI_STALE); + ASSERT(xfs_buf_islocked(bp)); + ASSERT(bp->b_flags & XBF_STALE); + ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL); + ASSERT(list_empty(&lip->li_trans)); + ASSERT(!bp->b_transp); + + if (bip->bli_flags & XFS_BLI_STALE_INODE) { + xfs_buf_item_done(bp); + xfs_buf_inode_iodone(bp); + ASSERT(list_empty(&bp->b_li_list)); + return; + } + + /* + * We may or may not be on the AIL here, xfs_trans_ail_delete() will do + * the right thing regardless of the situation in which we are called. + */ + xfs_trans_ail_delete(lip, SHUTDOWN_LOG_IO_ERROR); + xfs_buf_item_relse(bip); + ASSERT(bp->b_log_item == NULL); +} + +/* * This is called to unpin the buffer associated with the buf log item which was * previously pinned with a call to xfs_buf_item_pin(). We enter this function * with a buffer pin count, a buffer reference and a BLI reference. @@ -438,13 +529,6 @@ xfs_buf_item_unpin( } if (stale) { - ASSERT(bip->bli_flags & XFS_BLI_STALE); - ASSERT(xfs_buf_islocked(bp)); - ASSERT(bp->b_flags & XBF_STALE); - ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL); - ASSERT(list_empty(&lip->li_trans)); - ASSERT(!bp->b_transp); - trace_xfs_buf_item_unpin_stale(bip); /* @@ -455,22 +539,7 @@ xfs_buf_item_unpin( * processing is complete. */ xfs_buf_rele(bp); - - /* - * If we get called here because of an IO error, we may or may - * not have the item on the AIL. xfs_trans_ail_delete() will - * take care of that situation. xfs_trans_ail_delete() drops - * the AIL lock. - */ - if (bip->bli_flags & XFS_BLI_STALE_INODE) { - xfs_buf_item_done(bp); - xfs_buf_inode_iodone(bp); - ASSERT(list_empty(&bp->b_li_list)); - } else { - xfs_trans_ail_delete(lip, SHUTDOWN_LOG_IO_ERROR); - xfs_buf_item_relse(bp); - ASSERT(bp->b_log_item == NULL); - } + xfs_buf_item_finish_stale(bip); xfs_buf_relse(bp); return; } @@ -543,43 +612,42 @@ xfs_buf_item_push( * Drop the buffer log item refcount and take appropriate action. This helper * determines whether the bli must be freed or not, since a decrement to zero * does not necessarily mean the bli is unused. - * - * Return true if the bli is freed, false otherwise. */ -bool +void xfs_buf_item_put( struct xfs_buf_log_item *bip) { - struct xfs_log_item *lip = &bip->bli_item; - bool aborted; - bool dirty; + + ASSERT(xfs_buf_islocked(bip->bli_buf)); /* drop the bli ref and return if it wasn't the last one */ if (!atomic_dec_and_test(&bip->bli_refcount)) - return false; + return; - /* - * We dropped the last ref and must free the item if clean or aborted. - * If the bli is dirty and non-aborted, the buffer was clean in the - * transaction but still awaiting writeback from previous changes. In - * that case, the bli is freed on buffer writeback completion. - */ - aborted = test_bit(XFS_LI_ABORTED, &lip->li_flags) || - xlog_is_shutdown(lip->li_log); - dirty = bip->bli_flags & XFS_BLI_DIRTY; - if (dirty && !aborted) - return false; + /* If the BLI is in the AIL, then it is still dirty and in use */ + if (test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags)) { + ASSERT(bip->bli_flags & XFS_BLI_DIRTY); + return; + } /* - * The bli is aborted or clean. An aborted item may be in the AIL - * regardless of dirty state. For example, consider an aborted - * transaction that invalidated a dirty bli and cleared the dirty - * state. + * In shutdown conditions, we can be asked to free a dirty BLI that + * isn't in the AIL. This can occur due to a checkpoint aborting a BLI + * instead of inserting it into the AIL at checkpoint IO completion. If + * there's another bli reference (e.g. a btree cursor holds a clean + * reference) and it is released via xfs_trans_brelse(), we can get here + * with that aborted, dirty BLI. In this case, it is safe to free the + * dirty BLI immediately, as it is not in the AIL and there are no + * other references to it. + * + * We should never get here with a stale BLI via that path as + * xfs_trans_brelse() specifically holds onto stale buffers rather than + * releasing them. */ - if (aborted) - xfs_trans_ail_delete(lip, 0); - xfs_buf_item_relse(bip->bli_buf); - return true; + ASSERT(!(bip->bli_flags & XFS_BLI_DIRTY) || + test_bit(XFS_LI_ABORTED, &bip->bli_item.li_flags)); + ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); + xfs_buf_item_relse(bip); } /* @@ -600,6 +668,15 @@ xfs_buf_item_put( * if necessary but do not unlock the buffer. This is for support of * xfs_trans_bhold(). Make sure the XFS_BLI_HOLD field is cleared if we don't * free the item. + * + * If the XFS_BLI_STALE flag is set, the last reference to the BLI *must* + * perform a completion abort of any objects attached to the buffer for IO + * tracking purposes. This generally only happens in shutdown situations, + * normally xfs_buf_item_unpin() will drop the last BLI reference and perform + * completion processing. However, because transaction completion can race with + * checkpoint completion during a shutdown, this release context may end up + * being the last active reference to the BLI and so needs to perform this + * cleanup. */ STATIC void xfs_buf_item_release( @@ -607,18 +684,19 @@ xfs_buf_item_release( { struct xfs_buf_log_item *bip = BUF_ITEM(lip); struct xfs_buf *bp = bip->bli_buf; - bool released; bool hold = bip->bli_flags & XFS_BLI_HOLD; bool stale = bip->bli_flags & XFS_BLI_STALE; -#if defined(DEBUG) || defined(XFS_WARN) - bool ordered = bip->bli_flags & XFS_BLI_ORDERED; - bool dirty = bip->bli_flags & XFS_BLI_DIRTY; bool aborted = test_bit(XFS_LI_ABORTED, &lip->li_flags); + bool dirty = bip->bli_flags & XFS_BLI_DIRTY; +#if defined(DEBUG) || defined(XFS_WARN) + bool ordered = bip->bli_flags & XFS_BLI_ORDERED; #endif trace_xfs_buf_item_release(bip); + ASSERT(xfs_buf_islocked(bp)); + /* * The bli dirty state should match whether the blf has logged segments * except for ordered buffers, where only the bli should be dirty. @@ -634,16 +712,56 @@ xfs_buf_item_release( bp->b_transp = NULL; bip->bli_flags &= ~(XFS_BLI_LOGGED | XFS_BLI_HOLD | XFS_BLI_ORDERED); + /* If there are other references, then we have nothing to do. */ + if (!atomic_dec_and_test(&bip->bli_refcount)) + goto out_release; + + /* + * Stale buffer completion frees the BLI, unlocks and releases the + * buffer. Neither the BLI or buffer are safe to reference after this + * call, so there's nothing more we need to do here. + * + * If we get here with a stale buffer and references to the BLI remain, + * we must not unlock the buffer as the last BLI reference owns lock + * context, not us. + */ + if (stale) { + xfs_buf_item_finish_stale(bip); + xfs_buf_relse(bp); + ASSERT(!hold); + return; + } + /* - * Unref the item and unlock the buffer unless held or stale. Stale - * buffers remain locked until final unpin unless the bli is freed by - * the unref call. The latter implies shutdown because buffer - * invalidation dirties the bli and transaction. + * Dirty or clean, aborted items are done and need to be removed from + * the AIL and released. This frees the BLI, but leaves the buffer + * locked and referenced. */ - released = xfs_buf_item_put(bip); - if (hold || (stale && !released)) + if (aborted || xlog_is_shutdown(lip->li_log)) { + ASSERT(list_empty(&bip->bli_buf->b_li_list)); + xfs_buf_item_done(bp); + goto out_release; + } + + /* + * Clean, unreferenced BLIs can be immediately freed, leaving the buffer + * locked and referenced. + * + * Dirty, unreferenced BLIs *must* be in the AIL awaiting writeback. + */ + if (!dirty) + xfs_buf_item_relse(bip); + else + ASSERT(test_bit(XFS_LI_IN_AIL, &lip->li_flags)); + + /* Not safe to reference the BLI from here */ +out_release: + /* + * If we get here with a stale buffer, we must not unlock the + * buffer as the last BLI reference owns lock context, not us. + */ + if (stale || hold) return; - ASSERT(!stale || aborted); xfs_buf_relse(bp); } @@ -729,33 +847,6 @@ static const struct xfs_item_ops xfs_buf_item_ops = { .iop_push = xfs_buf_item_push, }; -STATIC void -xfs_buf_item_get_format( - struct xfs_buf_log_item *bip, - int count) -{ - ASSERT(bip->bli_formats == NULL); - bip->bli_format_count = count; - - if (count == 1) { - bip->bli_formats = &bip->__bli_format; - return; - } - - bip->bli_formats = kzalloc(count * sizeof(struct xfs_buf_log_format), - GFP_KERNEL | __GFP_NOFAIL); -} - -STATIC void -xfs_buf_item_free_format( - struct xfs_buf_log_item *bip) -{ - if (bip->bli_formats != &bip->__bli_format) { - kfree(bip->bli_formats); - bip->bli_formats = NULL; - } -} - /* * Allocate a new buf log item to go with the given buffer. * Set the buffer's b_log_item field to point to the new @@ -976,34 +1067,6 @@ xfs_buf_item_dirty_format( return false; } -STATIC void -xfs_buf_item_free( - struct xfs_buf_log_item *bip) -{ - xfs_buf_item_free_format(bip); - kvfree(bip->bli_item.li_lv_shadow); - kmem_cache_free(xfs_buf_item_cache, bip); -} - -/* - * xfs_buf_item_relse() is called when the buf log item is no longer needed. - */ -void -xfs_buf_item_relse( - struct xfs_buf *bp) -{ - struct xfs_buf_log_item *bip = bp->b_log_item; - - trace_xfs_buf_item_relse(bp, _RET_IP_); - ASSERT(!test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags)); - - if (atomic_read(&bip->bli_refcount)) - return; - bp->b_log_item = NULL; - xfs_buf_rele(bp); - xfs_buf_item_free(bip); -} - void xfs_buf_item_done( struct xfs_buf *bp) @@ -1023,5 +1086,5 @@ xfs_buf_item_done( xfs_trans_ail_delete(&bp->b_log_item->bli_item, (bp->b_flags & _XBF_LOGRECOVERY) ? 0 : SHUTDOWN_CORRUPT_INCORE); - xfs_buf_item_relse(bp); + xfs_buf_item_relse(bp->b_log_item); } diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h index e10e324cd245..416890b84f8c 100644 --- a/fs/xfs/xfs_buf_item.h +++ b/fs/xfs/xfs_buf_item.h @@ -49,8 +49,7 @@ struct xfs_buf_log_item { int xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *); void xfs_buf_item_done(struct xfs_buf *bp); -void xfs_buf_item_relse(struct xfs_buf *); -bool xfs_buf_item_put(struct xfs_buf_log_item *); +void xfs_buf_item_put(struct xfs_buf_log_item *bip); void xfs_buf_item_log(struct xfs_buf_log_item *, uint, uint); bool xfs_buf_item_dirty_format(struct xfs_buf_log_item *); void xfs_buf_inode_iodone(struct xfs_buf *); diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index b4e32f0860b7..0bd8022e47b4 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -1398,11 +1398,9 @@ xfs_qm_dqflush( ASSERT(XFS_DQ_IS_LOCKED(dqp)); ASSERT(!completion_done(&dqp->q_flush)); + ASSERT(atomic_read(&dqp->q_pincount) == 0); trace_xfs_dqflush(dqp); - - xfs_qm_dqunpin_wait(dqp); - fa = xfs_qm_dqflush_check(dqp); if (fa) { xfs_alert(mp, "corrupt dquot ID 0x%x in memory at %pS", diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 48254a72071b..0b41b18debf3 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1335,9 +1335,10 @@ xfs_falloc_allocate_range( } #define XFS_FALLOC_FL_SUPPORTED \ - (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \ - FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | \ - FALLOC_FL_INSERT_RANGE | FALLOC_FL_UNSHARE_RANGE) + (FALLOC_FL_ALLOCATE_RANGE | FALLOC_FL_KEEP_SIZE | \ + FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE | \ + FALLOC_FL_ZERO_RANGE | FALLOC_FL_INSERT_RANGE | \ + FALLOC_FL_UNSHARE_RANGE) STATIC long __xfs_file_fallocate( diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 726e29b837e6..bbc2f2973dcc 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -979,7 +979,15 @@ xfs_reclaim_inode( */ if (xlog_is_shutdown(ip->i_mount->m_log)) { xfs_iunpin_wait(ip); + /* + * Avoid a ABBA deadlock on the inode cluster buffer vs + * concurrent xfs_ifree_cluster() trying to mark the inode + * stale. We don't need the inode locked to run the flush abort + * code, but the flush abort needs to lock the cluster buffer. + */ + xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_iflush_shutdown_abort(ip); + xfs_ilock(ip, XFS_ILOCK_EXCL); goto reclaim; } if (xfs_ipincount(ip)) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index ee3e0f284287..761a996a857c 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1635,7 +1635,7 @@ retry: iip = ip->i_itemp; if (__xfs_iflags_test(ip, XFS_IFLUSHING)) { ASSERT(!list_empty(&iip->ili_item.li_bio_list)); - ASSERT(iip->ili_last_fields); + ASSERT(iip->ili_last_fields || xlog_is_shutdown(mp->m_log)); goto out_iunlock; } diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index c6cb0b6b9e46..285e27ff89e2 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -758,11 +758,14 @@ xfs_inode_item_push( * completed and items removed from the AIL before the next push * attempt. */ + trace_xfs_inode_push_stale(ip, _RET_IP_); return XFS_ITEM_PINNED; } - if (xfs_ipincount(ip) > 0 || xfs_buf_ispinned(bp)) + if (xfs_ipincount(ip) > 0 || xfs_buf_ispinned(bp)) { + trace_xfs_inode_push_pinned(ip, _RET_IP_); return XFS_ITEM_PINNED; + } if (xfs_iflags_test(ip, XFS_IFLUSHING)) return XFS_ITEM_FLUSHING; diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index f66d2d430e4f..a80cb6b9969a 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -793,8 +793,10 @@ xlog_cil_ail_insert( struct xfs_log_item *lip = lv->lv_item; xfs_lsn_t item_lsn; - if (aborted) + if (aborted) { + trace_xlog_ail_insert_abort(lip); set_bit(XFS_LI_ABORTED, &lip->li_flags); + } if (lip->li_ops->flags & XFS_ITEM_RELEASE_WHEN_COMMITTED) { lip->li_ops->iop_release(lip); diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c index 08443ceec329..866c71d9fbae 100644 --- a/fs/xfs/xfs_mru_cache.c +++ b/fs/xfs/xfs_mru_cache.c @@ -320,7 +320,7 @@ xfs_mru_cache_create( xfs_mru_cache_free_func_t free_func) { struct xfs_mru_cache *mru = NULL; - int err = 0, grp; + int grp; unsigned int grp_time; if (mrup) @@ -341,8 +341,8 @@ xfs_mru_cache_create( mru->lists = kzalloc(mru->grp_count * sizeof(*mru->lists), GFP_KERNEL | __GFP_NOFAIL); if (!mru->lists) { - err = -ENOMEM; - goto exit; + kfree(mru); + return -ENOMEM; } for (grp = 0; grp < mru->grp_count; grp++) @@ -361,14 +361,7 @@ xfs_mru_cache_create( mru->free_func = free_func; mru->data = data; *mrup = mru; - -exit: - if (err && mru && mru->lists) - kfree(mru->lists); - if (err && mru) - kfree(mru); - - return err; + return 0; } /* @@ -425,10 +418,6 @@ xfs_mru_cache_insert( { int error = -EINVAL; - ASSERT(mru && mru->lists); - if (!mru || !mru->lists) - goto out_free; - error = -ENOMEM; if (radix_tree_preload(GFP_KERNEL)) goto out_free; diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 417439b58785..fa135ac26471 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -134,6 +134,7 @@ xfs_qm_dqpurge( dqp->q_flags |= XFS_DQFLAG_FREEING; + xfs_qm_dqunpin_wait(dqp); xfs_dqflock(dqp); /* @@ -465,6 +466,7 @@ xfs_qm_dquot_isolate( struct xfs_dquot *dqp = container_of(item, struct xfs_dquot, q_lru); struct xfs_qm_isolate *isol = arg; + enum lru_status ret = LRU_SKIP; if (!xfs_dqlock_nowait(dqp)) goto out_miss_busy; @@ -478,6 +480,16 @@ xfs_qm_dquot_isolate( goto out_miss_unlock; /* + * If the dquot is pinned or dirty, rotate it to the end of the LRU to + * give some time for it to be cleaned before we try to isolate it + * again. + */ + ret = LRU_ROTATE; + if (XFS_DQ_IS_DIRTY(dqp) || atomic_read(&dqp->q_pincount) > 0) { + goto out_miss_unlock; + } + + /* * This dquot has acquired a reference in the meantime remove it from * the freelist and try again. */ @@ -492,41 +504,14 @@ xfs_qm_dquot_isolate( } /* - * If the dquot is dirty, flush it. If it's already being flushed, just - * skip it so there is time for the IO to complete before we try to - * reclaim it again on the next LRU pass. + * The dquot may still be under IO, in which case the flush lock will be + * held. If we can't get the flush lock now, just skip over the dquot as + * if it was dirty. */ if (!xfs_dqflock_nowait(dqp)) goto out_miss_unlock; - if (XFS_DQ_IS_DIRTY(dqp)) { - struct xfs_buf *bp = NULL; - int error; - - trace_xfs_dqreclaim_dirty(dqp); - - /* we have to drop the LRU lock to flush the dquot */ - spin_unlock(&lru->lock); - - error = xfs_dquot_use_attached_buf(dqp, &bp); - if (!bp || error == -EAGAIN) { - xfs_dqfunlock(dqp); - goto out_unlock_dirty; - } - - /* - * dqflush completes dqflock on error, and the delwri ioend - * does it on success. - */ - error = xfs_qm_dqflush(dqp, bp); - if (error) - goto out_unlock_dirty; - - xfs_buf_delwri_queue(bp, &isol->buffers); - xfs_buf_relse(bp); - goto out_unlock_dirty; - } - + ASSERT(!XFS_DQ_IS_DIRTY(dqp)); xfs_dquot_detach_buf(dqp); xfs_dqfunlock(dqp); @@ -548,13 +533,7 @@ out_miss_unlock: out_miss_busy: trace_xfs_dqreclaim_busy(dqp); XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaim_misses); - return LRU_SKIP; - -out_unlock_dirty: - trace_xfs_dqreclaim_busy(dqp); - XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaim_misses); - xfs_dqunlock(dqp); - return LRU_RETRY; + return ret; } static unsigned long @@ -1486,7 +1465,6 @@ xfs_qm_flush_one( struct xfs_dquot *dqp, void *data) { - struct xfs_mount *mp = dqp->q_mount; struct list_head *buffer_list = data; struct xfs_buf *bp = NULL; int error = 0; @@ -1497,34 +1475,8 @@ xfs_qm_flush_one( if (!XFS_DQ_IS_DIRTY(dqp)) goto out_unlock; - /* - * The only way the dquot is already flush locked by the time quotacheck - * gets here is if reclaim flushed it before the dqadjust walk dirtied - * it for the final time. Quotacheck collects all dquot bufs in the - * local delwri queue before dquots are dirtied, so reclaim can't have - * possibly queued it for I/O. The only way out is to push the buffer to - * cycle the flush lock. - */ - if (!xfs_dqflock_nowait(dqp)) { - /* buf is pinned in-core by delwri list */ - error = xfs_buf_incore(mp->m_ddev_targp, dqp->q_blkno, - mp->m_quotainfo->qi_dqchunklen, 0, &bp); - if (error) - goto out_unlock; - - if (!(bp->b_flags & _XBF_DELWRI_Q)) { - error = -EAGAIN; - xfs_buf_relse(bp); - goto out_unlock; - } - xfs_buf_unlock(bp); - - xfs_buf_delwri_pushbuf(bp, buffer_list); - xfs_buf_rele(bp); - - error = -EAGAIN; - goto out_unlock; - } + xfs_qm_dqunpin_wait(dqp); + xfs_dqflock(dqp); error = xfs_dquot_use_attached_buf(dqp, &bp); if (error) diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 6484c596ecea..736eb0924573 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -1259,6 +1259,8 @@ xfs_growfs_check_rtgeom( kfree(nmp); + trace_xfs_growfs_check_rtgeom(mp, min_logfsbs); + if (min_logfsbs > mp->m_sb.sb_logblocks) return -EINVAL; diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 0bc4b5489078..bb0a82635a77 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -2020,14 +2020,13 @@ xfs_remount_rw( int error; if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp && - bdev_read_only(mp->m_logdev_targp->bt_bdev)) { + xfs_readonly_buftarg(mp->m_logdev_targp)) { xfs_warn(mp, "ro->rw transition prohibited by read-only logdev"); return -EACCES; } - if (mp->m_rtdev_targp && - bdev_read_only(mp->m_rtdev_targp->bt_bdev)) { + if (mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp)) { xfs_warn(mp, "ro->rw transition prohibited by read-only rtdev"); return -EACCES; diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 01d284a1c759..ba45d801df1c 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -778,7 +778,6 @@ DEFINE_BUF_EVENT(xfs_buf_iowait_done); DEFINE_BUF_EVENT(xfs_buf_delwri_queue); DEFINE_BUF_EVENT(xfs_buf_delwri_queued); DEFINE_BUF_EVENT(xfs_buf_delwri_split); -DEFINE_BUF_EVENT(xfs_buf_delwri_pushbuf); DEFINE_BUF_EVENT(xfs_buf_get_uncached); DEFINE_BUF_EVENT(xfs_buf_item_relse); DEFINE_BUF_EVENT(xfs_buf_iodone_async); @@ -1147,6 +1146,7 @@ DECLARE_EVENT_CLASS(xfs_iref_class, __field(xfs_ino_t, ino) __field(int, count) __field(int, pincount) + __field(unsigned long, iflags) __field(unsigned long, caller_ip) ), TP_fast_assign( @@ -1154,13 +1154,15 @@ DECLARE_EVENT_CLASS(xfs_iref_class, __entry->ino = ip->i_ino; __entry->count = atomic_read(&VFS_I(ip)->i_count); __entry->pincount = atomic_read(&ip->i_pincount); + __entry->iflags = ip->i_flags; __entry->caller_ip = caller_ip; ), - TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %pS", + TP_printk("dev %d:%d ino 0x%llx count %d pincount %d iflags 0x%lx caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->count, __entry->pincount, + __entry->iflags, (char *)__entry->caller_ip) ) @@ -1250,6 +1252,8 @@ DEFINE_IREF_EVENT(xfs_irele); DEFINE_IREF_EVENT(xfs_inode_pin); DEFINE_IREF_EVENT(xfs_inode_unpin); DEFINE_IREF_EVENT(xfs_inode_unpin_nowait); +DEFINE_IREF_EVENT(xfs_inode_push_pinned); +DEFINE_IREF_EVENT(xfs_inode_push_stale); DECLARE_EVENT_CLASS(xfs_namespace_class, TP_PROTO(struct xfs_inode *dp, const struct xfs_name *name), @@ -1654,6 +1658,8 @@ DEFINE_LOG_ITEM_EVENT(xfs_ail_flushing); DEFINE_LOG_ITEM_EVENT(xfs_cil_whiteout_mark); DEFINE_LOG_ITEM_EVENT(xfs_cil_whiteout_skip); DEFINE_LOG_ITEM_EVENT(xfs_cil_whiteout_unpin); +DEFINE_LOG_ITEM_EVENT(xlog_ail_insert_abort); +DEFINE_LOG_ITEM_EVENT(xfs_trans_free_abort); DECLARE_EVENT_CLASS(xfs_ail_class, TP_PROTO(struct xfs_log_item *lip, xfs_lsn_t old_lsn, xfs_lsn_t new_lsn), diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index c6657072361a..b4a07af513ba 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -742,8 +742,10 @@ xfs_trans_free_items( list_for_each_entry_safe(lip, next, &tp->t_items, li_trans) { xfs_trans_del_item(lip); - if (abort) + if (abort) { + trace_xfs_trans_free_abort(lip); set_bit(XFS_LI_ABORTED, &lip->li_flags); + } if (lip->li_ops->iop_release) lip->li_ops->iop_release(lip); } diff --git a/fs/xfs/xfs_zone_alloc.c b/fs/xfs/xfs_zone_alloc.c index 80add26c0111..01315ed75502 100644 --- a/fs/xfs/xfs_zone_alloc.c +++ b/fs/xfs/xfs_zone_alloc.c @@ -727,7 +727,7 @@ xfs_select_zone( for (;;) { prepare_to_wait(&zi->zi_zone_wait, &wait, TASK_UNINTERRUPTIBLE); oz = xfs_select_zone_nowait(mp, write_hint, pack_tight); - if (oz) + if (oz || xfs_is_shutdown(mp)) break; schedule(); } @@ -777,26 +777,6 @@ xfs_mark_rtg_boundary( ioend->io_flags |= IOMAP_IOEND_BOUNDARY; } -static void -xfs_submit_zoned_bio( - struct iomap_ioend *ioend, - struct xfs_open_zone *oz, - bool is_seq) -{ - ioend->io_bio.bi_iter.bi_sector = ioend->io_sector; - ioend->io_private = oz; - atomic_inc(&oz->oz_ref); /* for xfs_zoned_end_io */ - - if (is_seq) { - ioend->io_bio.bi_opf &= ~REQ_OP_WRITE; - ioend->io_bio.bi_opf |= REQ_OP_ZONE_APPEND; - } else { - xfs_mark_rtg_boundary(ioend); - } - - submit_bio(&ioend->io_bio); -} - /* * Cache the last zone written to for an inode so that it is considered first * for subsequent writes. @@ -891,6 +871,26 @@ xfs_zone_cache_create_association( xfs_mru_cache_insert(mp->m_zone_cache, ip->i_ino, &item->mru); } +static void +xfs_submit_zoned_bio( + struct iomap_ioend *ioend, + struct xfs_open_zone *oz, + bool is_seq) +{ + ioend->io_bio.bi_iter.bi_sector = ioend->io_sector; + ioend->io_private = oz; + atomic_inc(&oz->oz_ref); /* for xfs_zoned_end_io */ + + if (is_seq) { + ioend->io_bio.bi_opf &= ~REQ_OP_WRITE; + ioend->io_bio.bi_opf |= REQ_OP_ZONE_APPEND; + } else { + xfs_mark_rtg_boundary(ioend); + } + + submit_bio(&ioend->io_bio); +} + void xfs_zone_alloc_and_submit( struct iomap_ioend *ioend, diff --git a/include/crypto/internal/sha2.h b/include/crypto/internal/sha2.h index b9bccd3ff57f..21a27fd5e198 100644 --- a/include/crypto/internal/sha2.h +++ b/include/crypto/internal/sha2.h @@ -25,7 +25,7 @@ void sha256_blocks_arch(u32 state[SHA256_STATE_WORDS], void sha256_blocks_simd(u32 state[SHA256_STATE_WORDS], const u8 *data, size_t nblocks); -static inline void sha256_choose_blocks( +static __always_inline void sha256_choose_blocks( u32 state[SHA256_STATE_WORDS], const u8 *data, size_t nblocks, bool force_generic, bool force_simd) { diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h index 5c3b2aa3e69d..d344d41e6cfe 100644 --- a/include/drm/drm_file.h +++ b/include/drm/drm_file.h @@ -300,6 +300,9 @@ struct drm_file { * * Mapping of mm object handles to object pointers. Used by the GEM * subsystem. Protected by @table_lock. + * + * Note that allocated entries might be NULL as a transient state when + * creating or deleting a handle. */ struct idr object_idr; diff --git a/include/drm/drm_framebuffer.h b/include/drm/drm_framebuffer.h index 668077009fce..38b24fc8978d 100644 --- a/include/drm/drm_framebuffer.h +++ b/include/drm/drm_framebuffer.h @@ -23,6 +23,7 @@ #ifndef __DRM_FRAMEBUFFER_H__ #define __DRM_FRAMEBUFFER_H__ +#include <linux/bits.h> #include <linux/ctype.h> #include <linux/list.h> #include <linux/sched.h> @@ -100,6 +101,8 @@ struct drm_framebuffer_funcs { unsigned num_clips); }; +#define DRM_FRAMEBUFFER_HAS_HANDLE_REF(_i) BIT(0u + (_i)) + /** * struct drm_framebuffer - frame buffer object * @@ -189,6 +192,10 @@ struct drm_framebuffer { */ int flags; /** + * @internal_flags: Framebuffer flags like DRM_FRAMEBUFFER_HAS_HANDLE_REF. + */ + unsigned int internal_flags; + /** * @filp_head: Placed on &drm_file.fbs, protected by &drm_file.fbs_lock. */ struct list_head filp_head; diff --git a/include/drm/drm_mipi_dsi.h b/include/drm/drm_mipi_dsi.h index b37860f4a895..6d2c08e81101 100644 --- a/include/drm/drm_mipi_dsi.h +++ b/include/drm/drm_mipi_dsi.h @@ -223,6 +223,9 @@ struct mipi_dsi_multi_context { #define to_mipi_dsi_device(__dev) container_of_const(__dev, struct mipi_dsi_device, dev) +extern const struct bus_type mipi_dsi_bus_type; +#define dev_is_mipi_dsi(dev) ((dev)->bus == &mipi_dsi_bus_type) + /** * mipi_dsi_pixel_format_to_bpp - obtain the number of bits per pixel for any * given pixel format defined by the MIPI DSI diff --git a/include/drm/spsc_queue.h b/include/drm/spsc_queue.h index 125f096c88cb..ee9df8cc67b7 100644 --- a/include/drm/spsc_queue.h +++ b/include/drm/spsc_queue.h @@ -70,9 +70,11 @@ static inline bool spsc_queue_push(struct spsc_queue *queue, struct spsc_node *n preempt_disable(); + atomic_inc(&queue->job_count); + smp_mb__after_atomic(); + tail = (struct spsc_node **)atomic_long_xchg(&queue->tail, (long)&node->next); WRITE_ONCE(*tail, node); - atomic_inc(&queue->job_count); /* * In case of first element verify new node will be visible to the consumer diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h index 5bded24dc24f..e1634897e159 100644 --- a/include/linux/arm_ffa.h +++ b/include/linux/arm_ffa.h @@ -283,6 +283,7 @@ struct ffa_indirect_msg_hdr { u32 offset; u32 send_recv_id; u32 size; + u32 res1; uuid_t uuid; }; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index a59880c809c7..181a0deadc9e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -269,11 +269,16 @@ static inline dev_t disk_devt(struct gendisk *disk) return MKDEV(disk->major, disk->first_minor); } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE /* * We should strive for 1 << (PAGE_SHIFT + MAX_PAGECACHE_ORDER) * however we constrain this to what we can validate and test. */ #define BLK_MAX_BLOCK_SIZE SZ_64K +#else +#define BLK_MAX_BLOCK_SIZE PAGE_SIZE +#endif + /* blk_validate_limits() validates bsize, so drivers don't usually need to */ static inline int blk_validate_block_size(unsigned long bsize) diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 96a3a0d6a60e..6378370a952f 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -82,6 +82,7 @@ extern ssize_t cpu_show_old_microcode(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t cpu_show_indirect_target_selection(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_tsa(struct device *dev, struct device_attribute *attr, char *buf); extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, diff --git a/include/linux/fs.h b/include/linux/fs.h index b085f161ed22..040c0036320f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3608,6 +3608,8 @@ extern int simple_write_begin(struct file *file, struct address_space *mapping, extern const struct address_space_operations ram_aops; extern int always_delete_dentry(const struct dentry *); extern struct inode *alloc_anon_inode(struct super_block *); +struct inode *anon_inode_make_secure_inode(struct super_block *sb, const char *name, + const struct inode *context_inode); extern int simple_nosetlease(struct file *, int, struct file_lease **, void **); extern const struct dentry_operations simple_dentry_operations; diff --git a/include/linux/futex.h b/include/linux/futex.h index 005b040c4791..b37193653e6b 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -89,6 +89,7 @@ void futex_hash_free(struct mm_struct *mm); static inline void futex_mm_init(struct mm_struct *mm) { RCU_INIT_POINTER(mm->futex_phash, NULL); + mm->futex_phash_new = NULL; mutex_init(&mm->futex_hash_lock); } diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 22f39e5e2ff1..996be3c2cff0 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -663,18 +663,6 @@ static inline bool ieee80211_s1g_has_cssid(__le16 fc) } /** - * ieee80211_is_s1g_short_beacon - check if frame is an S1G short beacon - * @fc: frame control bytes in little-endian byteorder - * Return: whether or not the frame is an S1G short beacon, - * i.e. it is an S1G beacon with 'next TBTT' flag set - */ -static inline bool ieee80211_is_s1g_short_beacon(__le16 fc) -{ - return ieee80211_is_s1g_beacon(fc) && - (fc & cpu_to_le16(IEEE80211_S1G_BCN_NEXT_TBTT)); -} - -/** * ieee80211_is_atim - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_ATIM * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame is an ATIM frame @@ -4901,6 +4889,39 @@ static inline bool ieee80211_is_ftm(struct sk_buff *skb) return false; } +/** + * ieee80211_is_s1g_short_beacon - check if frame is an S1G short beacon + * @fc: frame control bytes in little-endian byteorder + * @variable: pointer to the beacon frame elements + * @variable_len: length of the frame elements + * Return: whether or not the frame is an S1G short beacon. As per + * IEEE80211-2024 11.1.3.10.1, The S1G beacon compatibility element shall + * always be present as the first element in beacon frames generated at a + * TBTT (Target Beacon Transmission Time), so any frame not containing + * this element must have been generated at a TSBTT (Target Short Beacon + * Transmission Time) that is not a TBTT. Additionally, short beacons are + * prohibited from containing the S1G beacon compatibility element as per + * IEEE80211-2024 9.3.4.3 Table 9-76, so if we have an S1G beacon with + * either no elements or the first element is not the beacon compatibility + * element, we have a short beacon. + */ +static inline bool ieee80211_is_s1g_short_beacon(__le16 fc, const u8 *variable, + size_t variable_len) +{ + if (!ieee80211_is_s1g_beacon(fc)) + return false; + + /* + * If the frame does not contain at least 1 element (this is perfectly + * valid in a short beacon) and is an S1G beacon, we have a short + * beacon. + */ + if (variable_len < 2) + return true; + + return variable[0] != WLAN_EID_S1G_BCN_COMPAT; +} + struct element { u8 id; u8 datalen; diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 2922635986f5..a7efcec2e3d0 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -698,6 +698,8 @@ struct io_kiocb { struct hlist_node hash_node; /* For IOPOLL setup queues, with hybrid polling */ u64 iopoll_start; + /* for private io_kiocb freeing */ + struct rcu_head rcu_head; }; /* internal polling, see IORING_FEAT_FAST_POLL */ struct async_poll *apoll; diff --git a/include/linux/irqchip/irq-msi-lib.h b/include/linux/irqchip/irq-msi-lib.h index dd8d1d138544..224ac28e88d7 100644 --- a/include/linux/irqchip/irq-msi-lib.h +++ b/include/linux/irqchip/irq-msi-lib.h @@ -17,6 +17,7 @@ #define MATCH_PLATFORM_MSI BIT(DOMAIN_BUS_PLATFORM_MSI) +struct msi_domain_info; int msi_lib_irq_domain_select(struct irq_domain *d, struct irq_fwspec *fwspec, enum irq_domain_bus_token bus_token); diff --git a/include/linux/kmemleak.h b/include/linux/kmemleak.h index 93a73c076d16..fbd424b2abb1 100644 --- a/include/linux/kmemleak.h +++ b/include/linux/kmemleak.h @@ -28,6 +28,7 @@ extern void kmemleak_update_trace(const void *ptr) __ref; extern void kmemleak_not_leak(const void *ptr) __ref; extern void kmemleak_transient_leak(const void *ptr) __ref; extern void kmemleak_ignore(const void *ptr) __ref; +extern void kmemleak_ignore_percpu(const void __percpu *ptr) __ref; extern void kmemleak_scan_area(const void *ptr, size_t size, gfp_t gfp) __ref; extern void kmemleak_no_scan(const void *ptr) __ref; extern void kmemleak_alloc_phys(phys_addr_t phys, size_t size, @@ -97,6 +98,9 @@ static inline void kmemleak_not_leak(const void *ptr) static inline void kmemleak_transient_leak(const void *ptr) { } +static inline void kmemleak_ignore_percpu(const void __percpu *ptr) +{ +} static inline void kmemleak_ignore(const void *ptr) { } diff --git a/include/linux/mm.h b/include/linux/mm.h index 0ef2ba0c667a..fa538feaa8d9 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2568,6 +2568,11 @@ static inline unsigned long get_mm_counter(struct mm_struct *mm, int member) return percpu_counter_read_positive(&mm->rss_stat[member]); } +static inline unsigned long get_mm_counter_sum(struct mm_struct *mm, int member) +{ + return percpu_counter_sum_positive(&mm->rss_stat[member]); +} + void mm_trace_rss_stat(struct mm_struct *mm, int member); static inline void add_mm_counter(struct mm_struct *mm, int member, long value) diff --git a/include/linux/mount.h b/include/linux/mount.h index 4880f434c021..1a508beba446 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -116,10 +116,8 @@ extern int may_umount_tree(struct vfsmount *); extern int may_umount(struct vfsmount *); int do_mount(const char *, const char __user *, const char *, unsigned long, void *); -extern struct vfsmount *collect_mounts(const struct path *); -extern void drop_collected_mounts(struct vfsmount *); -extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *, - struct vfsmount *); +extern struct path *collect_paths(const struct path *, struct path *, unsigned); +extern void drop_collected_paths(struct path *, struct path *); extern void kern_unmount_array(struct vfsmount *mnt[], unsigned int num); extern int cifs_root_data(char **dev, char **opts); diff --git a/include/linux/mtd/nand-qpic-common.h b/include/linux/mtd/nand-qpic-common.h index e8462deda6db..f0aa098a395f 100644 --- a/include/linux/mtd/nand-qpic-common.h +++ b/include/linux/mtd/nand-qpic-common.h @@ -237,6 +237,9 @@ * @last_data_desc - last DMA desc in data channel (tx/rx). * @last_cmd_desc - last DMA desc in command channel. * @txn_done - completion for NAND transfer. + * @bam_ce_nitems - the number of elements in the @bam_ce array + * @cmd_sgl_nitems - the number of elements in the @cmd_sgl array + * @data_sgl_nitems - the number of elements in the @data_sgl array * @bam_ce_pos - the index in bam_ce which is available for next sgl * @bam_ce_start - the index in bam_ce which marks the start position ce * for current sgl. It will be used for size calculation @@ -255,6 +258,11 @@ struct bam_transaction { struct dma_async_tx_descriptor *last_data_desc; struct dma_async_tx_descriptor *last_cmd_desc; struct completion txn_done; + + unsigned int bam_ce_nitems; + unsigned int cmd_sgl_nitems; + unsigned int data_sgl_nitems; + struct_group(bam_positions, u32 bam_ce_pos; u32 bam_ce_start; diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 065c17385e53..f43f075852c0 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -265,21 +265,20 @@ struct netfs_io_request { bool direct_bv_unpin; /* T if direct_bv[] must be unpinned */ refcount_t ref; unsigned long flags; -#define NETFS_RREQ_OFFLOAD_COLLECTION 0 /* Offload collection to workqueue */ -#define NETFS_RREQ_NO_UNLOCK_FOLIO 2 /* Don't unlock no_unlock_folio on completion */ -#define NETFS_RREQ_FAILED 4 /* The request failed */ -#define NETFS_RREQ_IN_PROGRESS 5 /* Unlocked when the request completes (has ref) */ -#define NETFS_RREQ_FOLIO_COPY_TO_CACHE 6 /* Copy current folio to cache from read */ -#define NETFS_RREQ_UPLOAD_TO_SERVER 8 /* Need to write to the server */ -#define NETFS_RREQ_PAUSE 11 /* Pause subrequest generation */ +#define NETFS_RREQ_IN_PROGRESS 0 /* Unlocked when the request completes (has ref) */ +#define NETFS_RREQ_ALL_QUEUED 1 /* All subreqs are now queued */ +#define NETFS_RREQ_PAUSE 2 /* Pause subrequest generation */ +#define NETFS_RREQ_FAILED 3 /* The request failed */ +#define NETFS_RREQ_RETRYING 4 /* Set if we're in the retry path */ +#define NETFS_RREQ_SHORT_TRANSFER 5 /* Set if we have a short transfer */ +#define NETFS_RREQ_OFFLOAD_COLLECTION 8 /* Offload collection to workqueue */ +#define NETFS_RREQ_NO_UNLOCK_FOLIO 9 /* Don't unlock no_unlock_folio on completion */ +#define NETFS_RREQ_FOLIO_COPY_TO_CACHE 10 /* Copy current folio to cache from read */ +#define NETFS_RREQ_UPLOAD_TO_SERVER 11 /* Need to write to the server */ #define NETFS_RREQ_USE_IO_ITER 12 /* Use ->io_iter rather than ->i_pages */ -#define NETFS_RREQ_ALL_QUEUED 13 /* All subreqs are now queued */ -#define NETFS_RREQ_RETRYING 14 /* Set if we're in the retry path */ -#define NETFS_RREQ_SHORT_TRANSFER 15 /* Set if we have a short transfer */ #define NETFS_RREQ_USE_PGPRIV2 31 /* [DEPRECATED] Use PG_private_2 to mark * write to cache on read */ const struct netfs_request_ops *netfs_ops; - void (*cleanup)(struct netfs_io_request *req); }; /* diff --git a/arch/x86/include/asm/amd/fch.h b/include/linux/platform_data/x86/amd-fch.h index 2cf5153edbc2..2cf5153edbc2 100644 --- a/arch/x86/include/asm/amd/fch.h +++ b/include/linux/platform_data/x86/amd-fch.h diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h index 0b3a36bdaa90..0f5f94137f6d 100644 --- a/include/linux/psp-sev.h +++ b/include/linux/psp-sev.h @@ -594,6 +594,7 @@ struct sev_data_snp_addr { * @imi_en: launch flow is launching an IMI (Incoming Migration Image) for the * purpose of guest-assisted migration. * @rsvd: reserved + * @desired_tsc_khz: hypervisor desired mean TSC freq in kHz of the guest * @gosvw: guest OS-visible workarounds, as defined by hypervisor */ struct sev_data_snp_launch_start { @@ -603,6 +604,7 @@ struct sev_data_snp_launch_start { u32 ma_en:1; /* In */ u32 imi_en:1; /* In */ u32 rsvd:30; + u32 desired_tsc_khz; /* In */ u8 gosvw[16]; /* In */ } __packed; diff --git a/include/linux/sched.h b/include/linux/sched.h index 4f78a64beb52..aa9c5be7a632 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -548,10 +548,6 @@ struct sched_statistics { u64 nr_failed_migrations_running; u64 nr_failed_migrations_hot; u64 nr_forced_migrations; -#ifdef CONFIG_NUMA_BALANCING - u64 numa_task_migrated; - u64 numa_task_swapped; -#endif u64 nr_wakeups; u64 nr_wakeups_sync; diff --git a/include/linux/soc/amd/isp4_misc.h b/include/linux/soc/amd/isp4_misc.h new file mode 100644 index 000000000000..6738796986a7 --- /dev/null +++ b/include/linux/soc/amd/isp4_misc.h @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0+ + +/* + * Copyright (C) 2025 Advanced Micro Devices, Inc. + */ + +#ifndef __SOC_ISP4_MISC_H +#define __SOC_ISP4_MISC_H + +#define AMDISP_I2C_ADAP_NAME "AMDISP DesignWare I2C adapter" + +#endif diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 4789f91dae94..e9ea43234d9a 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -21,7 +21,7 @@ #include <uapi/linux/spi/spi.h> /* Max no. of CS supported per spi device */ -#define SPI_CS_CNT_MAX 16 +#define SPI_CS_CNT_MAX 24 struct dma_chan; struct software_node; diff --git a/include/linux/suspend.h b/include/linux/suspend.h index b1c76c8f2c82..6a3f92098872 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -446,6 +446,8 @@ extern int unregister_pm_notifier(struct notifier_block *nb); extern void ksys_sync_helper(void); extern void pm_report_hw_sleep_time(u64 t); extern void pm_report_max_hw_sleep(u64 t); +void pm_restrict_gfp_mask(void); +void pm_restore_gfp_mask(void); #define pm_notifier(fn, pri) { \ static struct notifier_block fn##_nb = \ @@ -492,6 +494,9 @@ static inline int unregister_pm_notifier(struct notifier_block *nb) static inline void pm_report_hw_sleep_time(u64 t) {}; static inline void pm_report_max_hw_sleep(u64 t) {}; +static inline void pm_restrict_gfp_mask(void) {} +static inline void pm_restore_gfp_mask(void) {} + static inline void ksys_sync_helper(void) {} #define pm_notifier(fn, pri) do { (void)(fn); } while (0) diff --git a/include/linux/usb.h b/include/linux/usb.h index 1b2545b4363b..92c752f5446f 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -614,6 +614,7 @@ struct usb3_lpm_parameters { * FIXME -- complete doc * @authenticated: Crypto authentication passed * @tunnel_mode: Connection native or tunneled over USB4 + * @usb4_link: device link to the USB4 host interface * @lpm_capable: device supports LPM * @lpm_devinit_allow: Allow USB3 device initiated LPM, exit latency is in range * @usb2_hw_lpm_capable: device can perform USB2 hardware LPM @@ -724,6 +725,7 @@ struct usb_device { unsigned reset_resume:1; unsigned port_is_suspended:1; enum usb_link_tunnel_mode tunnel_mode; + struct device_link *usb4_link; int slot_id; struct usb2_lpm_parameters l1_params; diff --git a/include/linux/usb/typec_dp.h b/include/linux/usb/typec_dp.h index f2da264d9c14..acb0ad03bdac 100644 --- a/include/linux/usb/typec_dp.h +++ b/include/linux/usb/typec_dp.h @@ -57,6 +57,7 @@ enum { DP_PIN_ASSIGN_D, DP_PIN_ASSIGN_E, DP_PIN_ASSIGN_F, /* Not supported after v1.0b */ + DP_PIN_ASSIGN_MAX, }; /* DisplayPort alt mode specific commands */ diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 91a3ce9a2687..9e15a088ba38 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -66,8 +66,6 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, NUMA_HINT_FAULTS, NUMA_HINT_FAULTS_LOCAL, NUMA_PAGE_MIGRATE, - NUMA_TASK_MIGRATE, - NUMA_TASK_SWAP, #endif #ifdef CONFIG_MIGRATION PGMIGRATE_SUCCESS, PGMIGRATE_FAIL, diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index d56e6e135158..d40e978126e3 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -243,8 +243,8 @@ int __vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg, int vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags); -#ifdef CONFIG_BPF_SYSCALL extern struct proto vsock_proto; +#ifdef CONFIG_BPF_SYSCALL int vsock_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore); void __init vsock_bpf_build_proto(void); #else diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index a760f05fa3fb..0da011fc8146 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -29,6 +29,7 @@ #include <linux/idr.h> #include <linux/leds.h> #include <linux/rculist.h> +#include <linux/srcu.h> #include <net/bluetooth/hci.h> #include <net/bluetooth/hci_drv.h> @@ -347,6 +348,7 @@ struct adv_monitor { struct hci_dev { struct list_head list; + struct srcu_struct srcu; struct mutex lock; struct ida unset_handle_ida; @@ -1348,8 +1350,7 @@ hci_conn_hash_lookup_big_state(struct hci_dev *hdev, __u8 handle, __u16 state) rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { - if (c->type != BIS_LINK || bacmp(&c->dst, BDADDR_ANY) || - c->state != state) + if (c->type != BIS_LINK || c->state != state) continue; if (handle == c->iso_qos.bcast.big) { diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index d711642e78b5..c003cd194fa2 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -370,7 +370,7 @@ static inline __be16 __nf_flow_pppoe_proto(const struct sk_buff *skb) static inline bool nf_flow_pppoe_proto(struct sk_buff *skb, __be16 *inner_proto) { - if (!pskb_may_pull(skb, PPPOE_SES_HLEN)) + if (!pskb_may_pull(skb, ETH_HLEN + PPPOE_SES_HLEN)) return false; *inner_proto = __nf_flow_pppoe_proto(skb); diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index d7b7b6cd4aa1..8a75c73fc555 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -114,7 +114,6 @@ struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct netlink_ext_ack *extack); void qdisc_put_rtab(struct qdisc_rate_table *tab); void qdisc_put_stab(struct qdisc_size_table *tab); -void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc); bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, struct net_device *dev, struct netdev_queue *txq, spinlock_t *root_lock, bool validate); @@ -290,4 +289,28 @@ static inline bool tc_qdisc_stats_dump(struct Qdisc *sch, return true; } +static inline void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc) +{ + if (!(qdisc->flags & TCQ_F_WARN_NONWC)) { + pr_warn("%s: %s qdisc %X: is non-work-conserving?\n", + txt, qdisc->ops->id, qdisc->handle >> 16); + qdisc->flags |= TCQ_F_WARN_NONWC; + } +} + +static inline unsigned int qdisc_peek_len(struct Qdisc *sch) +{ + struct sk_buff *skb; + unsigned int len; + + skb = sch->ops->peek(sch); + if (unlikely(skb == NULL)) { + qdisc_warn_nonwc("qdisc_peek_len", sch); + return 0; + } + len = qdisc_pkt_len(skb); + + return len; +} + #endif diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index 333d2e38dd2c..73e96ccbe830 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -50,12 +50,14 @@ #define netfs_rreq_traces \ EM(netfs_rreq_trace_assess, "ASSESS ") \ - EM(netfs_rreq_trace_copy, "COPY ") \ EM(netfs_rreq_trace_collect, "COLLECT") \ EM(netfs_rreq_trace_complete, "COMPLET") \ + EM(netfs_rreq_trace_copy, "COPY ") \ EM(netfs_rreq_trace_dirty, "DIRTY ") \ EM(netfs_rreq_trace_done, "DONE ") \ EM(netfs_rreq_trace_free, "FREE ") \ + EM(netfs_rreq_trace_ki_complete, "KI-CMPL") \ + EM(netfs_rreq_trace_recollect, "RECLLCT") \ EM(netfs_rreq_trace_redirty, "REDIRTY") \ EM(netfs_rreq_trace_resubmit, "RESUBMT") \ EM(netfs_rreq_trace_set_abandon, "S-ABNDN") \ @@ -63,13 +65,15 @@ EM(netfs_rreq_trace_unlock, "UNLOCK ") \ EM(netfs_rreq_trace_unlock_pgpriv2, "UNLCK-2") \ EM(netfs_rreq_trace_unmark, "UNMARK ") \ + EM(netfs_rreq_trace_unpause, "UNPAUSE") \ EM(netfs_rreq_trace_wait_ip, "WAIT-IP") \ - EM(netfs_rreq_trace_wait_pause, "WT-PAUS") \ - EM(netfs_rreq_trace_wait_queue, "WAIT-Q ") \ + EM(netfs_rreq_trace_wait_pause, "--PAUSED--") \ + EM(netfs_rreq_trace_wait_quiesce, "WAIT-QUIESCE") \ + EM(netfs_rreq_trace_waited_ip, "DONE-IP") \ + EM(netfs_rreq_trace_waited_pause, "--UNPAUSED--") \ + EM(netfs_rreq_trace_waited_quiesce, "DONE-QUIESCE") \ EM(netfs_rreq_trace_wake_ip, "WAKE-IP") \ EM(netfs_rreq_trace_wake_queue, "WAKE-Q ") \ - EM(netfs_rreq_trace_woke_queue, "WOKE-Q ") \ - EM(netfs_rreq_trace_unpause, "UNPAUSE") \ E_(netfs_rreq_trace_write_done, "WR-DONE") #define netfs_sreq_sources \ @@ -82,6 +86,7 @@ E_(NETFS_WRITE_TO_CACHE, "WRIT") #define netfs_sreq_traces \ + EM(netfs_sreq_trace_abandoned, "ABNDN") \ EM(netfs_sreq_trace_add_donations, "+DON ") \ EM(netfs_sreq_trace_added, "ADD ") \ EM(netfs_sreq_trace_cache_nowrite, "CA-NW") \ @@ -89,6 +94,7 @@ EM(netfs_sreq_trace_cache_write, "CA-WR") \ EM(netfs_sreq_trace_cancel, "CANCL") \ EM(netfs_sreq_trace_clear, "CLEAR") \ + EM(netfs_sreq_trace_consumed, "CONSM") \ EM(netfs_sreq_trace_discard, "DSCRD") \ EM(netfs_sreq_trace_donate_to_prev, "DON-P") \ EM(netfs_sreq_trace_donate_to_next, "DON-N") \ @@ -96,7 +102,12 @@ EM(netfs_sreq_trace_fail, "FAIL ") \ EM(netfs_sreq_trace_free, "FREE ") \ EM(netfs_sreq_trace_hit_eof, "EOF ") \ - EM(netfs_sreq_trace_io_progress, "IO ") \ + EM(netfs_sreq_trace_io_bad, "I-BAD") \ + EM(netfs_sreq_trace_io_malformed, "I-MLF") \ + EM(netfs_sreq_trace_io_unknown, "I-UNK") \ + EM(netfs_sreq_trace_io_progress, "I-OK ") \ + EM(netfs_sreq_trace_io_req_submitted, "I-RSB") \ + EM(netfs_sreq_trace_io_retry_needed, "I-RTR") \ EM(netfs_sreq_trace_limited, "LIMIT") \ EM(netfs_sreq_trace_need_clear, "N-CLR") \ EM(netfs_sreq_trace_partial_read, "PARTR") \ @@ -142,8 +153,8 @@ #define netfs_sreq_ref_traces \ EM(netfs_sreq_trace_get_copy_to_cache, "GET COPY2C ") \ - EM(netfs_sreq_trace_get_resubmit, "GET RESUBMIT") \ - EM(netfs_sreq_trace_get_submit, "GET SUBMIT") \ + EM(netfs_sreq_trace_get_resubmit, "GET RESUBMT") \ + EM(netfs_sreq_trace_get_submit, "GET SUBMIT ") \ EM(netfs_sreq_trace_get_short_read, "GET SHORTRD") \ EM(netfs_sreq_trace_new, "NEW ") \ EM(netfs_sreq_trace_put_abandon, "PUT ABANDON") \ @@ -366,7 +377,7 @@ TRACE_EVENT(netfs_sreq, __entry->slot = sreq->io_iter.folioq_slot; ), - TP_printk("R=%08x[%x] %s %s f=%02x s=%llx %zx/%zx s=%u e=%d", + TP_printk("R=%08x[%x] %s %s f=%03x s=%llx %zx/%zx s=%u e=%d", __entry->rreq, __entry->index, __print_symbolic(__entry->source, netfs_sreq_sources), __print_symbolic(__entry->what, netfs_sreq_traces), diff --git a/include/uapi/linux/bits.h b/include/uapi/linux/bits.h index 682b406e1067..a04afef9efca 100644 --- a/include/uapi/linux/bits.h +++ b/include/uapi/linux/bits.h @@ -4,9 +4,9 @@ #ifndef _UAPI_LINUX_BITS_H #define _UAPI_LINUX_BITS_H -#define __GENMASK(h, l) (((~_UL(0)) << (l)) & (~_UL(0) >> (BITS_PER_LONG - 1 - (h)))) +#define __GENMASK(h, l) (((~_UL(0)) << (l)) & (~_UL(0) >> (__BITS_PER_LONG - 1 - (h)))) -#define __GENMASK_ULL(h, l) (((~_ULL(0)) << (l)) & (~_ULL(0) >> (BITS_PER_LONG_LONG - 1 - (h)))) +#define __GENMASK_ULL(h, l) (((~_ULL(0)) << (l)) & (~_ULL(0) >> (__BITS_PER_LONG_LONG - 1 - (h)))) #define __GENMASK_U128(h, l) \ ((_BIT128((h)) << 1) - (_BIT128(l))) diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 37891580d05d..7a4c35ff03fe 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -467,6 +467,10 @@ struct kvm_run { __u64 leaf; __u64 r11, r12, r13, r14; } get_tdvmcall_info; + struct { + __u64 ret; + __u64 vector; + } setup_event_notify; }; } tdx; /* Fix the size of the union. */ diff --git a/include/uapi/linux/mptcp_pm.h b/include/uapi/linux/mptcp_pm.h index 84fa8a21dfd0..6ac84b2f636c 100644 --- a/include/uapi/linux/mptcp_pm.h +++ b/include/uapi/linux/mptcp_pm.h @@ -27,14 +27,14 @@ * token, rem_id. * @MPTCP_EVENT_SUB_ESTABLISHED: A new subflow has been established. 'error' * should not be set. Attributes: token, family, loc_id, rem_id, saddr4 | - * saddr6, daddr4 | daddr6, sport, dport, backup, if_idx [, error]. + * saddr6, daddr4 | daddr6, sport, dport, backup, if-idx [, error]. * @MPTCP_EVENT_SUB_CLOSED: A subflow has been closed. An error (copy of * sk_err) could be set if an error has been detected for this subflow. * Attributes: token, family, loc_id, rem_id, saddr4 | saddr6, daddr4 | - * daddr6, sport, dport, backup, if_idx [, error]. + * daddr6, sport, dport, backup, if-idx [, error]. * @MPTCP_EVENT_SUB_PRIORITY: The priority of a subflow has changed. 'error' * should not be set. Attributes: token, family, loc_id, rem_id, saddr4 | - * saddr6, daddr4 | daddr6, sport, dport, backup, if_idx [, error]. + * saddr6, daddr4 | daddr6, sport, dport, backup, if-idx [, error]. * @MPTCP_EVENT_LISTENER_CREATED: A new PM listener is created. Attributes: * family, sport, saddr4 | saddr6. * @MPTCP_EVENT_LISTENER_CLOSED: A PM listener is closed. Attributes: family, diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index 77d9d6af46da..c9751bdfd937 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -135,8 +135,28 @@ #define UBLKSRV_IO_BUF_TOTAL_SIZE (1ULL << UBLKSRV_IO_BUF_TOTAL_BITS) /* - * zero copy requires 4k block size, and can remap ublk driver's io - * request into ublksrv's vm space + * ublk server can register data buffers for incoming I/O requests with a sparse + * io_uring buffer table. The request buffer can then be used as the data buffer + * for io_uring operations via the fixed buffer index. + * Note that the ublk server can never directly access the request data memory. + * + * To use this feature, the ublk server must first register a sparse buffer + * table on an io_uring instance. + * When an incoming ublk request is received, the ublk server submits a + * UBLK_U_IO_REGISTER_IO_BUF command to that io_uring instance. The + * ublksrv_io_cmd's q_id and tag specify the request whose buffer to register + * and addr is the index in the io_uring's buffer table to install the buffer. + * SQEs can now be submitted to the io_uring to read/write the request's buffer + * by enabling fixed buffers (e.g. using IORING_OP_{READ,WRITE}_FIXED or + * IORING_URING_CMD_FIXED) and passing the registered buffer index in buf_index. + * Once the last io_uring operation using the request's buffer has completed, + * the ublk server submits a UBLK_U_IO_UNREGISTER_IO_BUF command with q_id, tag, + * and addr again specifying the request buffer to unregister. + * The ublk request is completed when its buffer is unregistered from all + * io_uring instances and the ublk server issues UBLK_U_IO_COMMIT_AND_FETCH_REQ. + * + * Not available for UBLK_F_UNPRIVILEGED_DEV, as a ublk server can leak + * uninitialized kernel memory by not reading into the full request buffer. */ #define UBLK_F_SUPPORT_ZERO_COPY (1ULL << 0) @@ -450,10 +470,10 @@ static inline struct ublk_auto_buf_reg ublk_sqe_addr_to_auto_buf_reg( __u64 sqe_addr) { struct ublk_auto_buf_reg reg = { - .index = sqe_addr & 0xffff, - .flags = (sqe_addr >> 16) & 0xff, - .reserved0 = (sqe_addr >> 24) & 0xff, - .reserved1 = sqe_addr >> 32, + .index = (__u16)sqe_addr, + .flags = (__u8)(sqe_addr >> 16), + .reserved0 = (__u8)(sqe_addr >> 24), + .reserved1 = (__u32)(sqe_addr >> 32), }; return reg; diff --git a/include/uapi/linux/vm_sockets.h b/include/uapi/linux/vm_sockets.h index ed07181d4eff..e05280e41522 100644 --- a/include/uapi/linux/vm_sockets.h +++ b/include/uapi/linux/vm_sockets.h @@ -17,6 +17,10 @@ #ifndef _UAPI_VM_SOCKETS_H #define _UAPI_VM_SOCKETS_H +#ifndef __KERNEL__ +#include <sys/socket.h> /* for struct sockaddr and sa_family_t */ +#endif + #include <linux/socket.h> #include <linux/types.h> diff --git a/init/Kconfig b/init/Kconfig index af4c2f085455..666783eb50ab 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1716,9 +1716,13 @@ config FUTEX_PI depends on FUTEX && RT_MUTEXES default y +# +# marked broken for performance reasons; gives us one more cycle to sort things out. +# config FUTEX_PRIVATE_HASH bool depends on FUTEX && !BASE_SMALL && MMU + depends on BROKEN default y config FUTEX_MPOL diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index ce95e3af44a9..f2d2cc319faa 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -271,6 +271,7 @@ static int io_ring_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg, if (len > arg->max_len) { len = arg->max_len; if (!(bl->flags & IOBL_INC)) { + arg->partial_map = 1; if (iov != arg->iovs) break; buf->len = len; diff --git a/io_uring/kbuf.h b/io_uring/kbuf.h index 5d83c7adc739..723d0361898e 100644 --- a/io_uring/kbuf.h +++ b/io_uring/kbuf.h @@ -58,7 +58,8 @@ struct buf_sel_arg { size_t max_len; unsigned short nr_iovs; unsigned short mode; - unsigned buf_group; + unsigned short buf_group; + unsigned short partial_map; }; void __user *io_buffer_select(struct io_kiocb *req, size_t *len, diff --git a/io_uring/msg_ring.c b/io_uring/msg_ring.c index 71400d6cefc8..4c2578f2efcb 100644 --- a/io_uring/msg_ring.c +++ b/io_uring/msg_ring.c @@ -82,7 +82,7 @@ static void io_msg_tw_complete(struct io_kiocb *req, io_tw_token_t tw) spin_unlock(&ctx->msg_lock); } if (req) - kmem_cache_free(req_cachep, req); + kfree_rcu(req, rcu_head); percpu_ref_put(&ctx->refs); } @@ -90,7 +90,7 @@ static int io_msg_remote_post(struct io_ring_ctx *ctx, struct io_kiocb *req, int res, u32 cflags, u64 user_data) { if (!READ_ONCE(ctx->submitter_task)) { - kmem_cache_free(req_cachep, req); + kfree_rcu(req, rcu_head); return -EOWNERDEAD; } req->opcode = IORING_OP_NOP; diff --git a/io_uring/net.c b/io_uring/net.c index 9550d4c8f866..43a43522f406 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -75,12 +75,17 @@ struct io_sr_msg { u16 flags; /* initialised and used only by !msg send variants */ u16 buf_group; - bool retry; + unsigned short retry_flags; void __user *msg_control; /* used only for send zerocopy */ struct io_kiocb *notif; }; +enum sr_retry_flags { + IO_SR_MSG_RETRY = 1, + IO_SR_MSG_PARTIAL_MAP = 2, +}; + /* * Number of times we'll try and do receives if there's more data. If we * exceed this limit, then add us to the back of the queue and retry from @@ -187,7 +192,7 @@ static inline void io_mshot_prep_retry(struct io_kiocb *req, req->flags &= ~REQ_F_BL_EMPTY; sr->done_io = 0; - sr->retry = false; + sr->retry_flags = 0; sr->len = 0; /* get from the provided buffer */ } @@ -397,7 +402,7 @@ int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); sr->done_io = 0; - sr->retry = false; + sr->retry_flags = 0; sr->len = READ_ONCE(sqe->len); sr->flags = READ_ONCE(sqe->ioprio); if (sr->flags & ~SENDMSG_FLAGS) @@ -751,7 +756,7 @@ int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); sr->done_io = 0; - sr->retry = false; + sr->retry_flags = 0; if (unlikely(sqe->file_index || sqe->addr2)) return -EINVAL; @@ -823,7 +828,7 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret, cflags |= io_put_kbufs(req, this_ret, io_bundle_nbufs(kmsg, this_ret), issue_flags); - if (sr->retry) + if (sr->retry_flags & IO_SR_MSG_RETRY) cflags = req->cqe.flags | (cflags & CQE_F_MASK); /* bundle with no more immediate buffers, we're done */ if (req->flags & REQ_F_BL_EMPTY) @@ -832,12 +837,12 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret, * If more is available AND it was a full transfer, retry and * append to this one */ - if (!sr->retry && kmsg->msg.msg_inq > 1 && this_ret > 0 && + if (!sr->retry_flags && kmsg->msg.msg_inq > 1 && this_ret > 0 && !iov_iter_count(&kmsg->msg.msg_iter)) { req->cqe.flags = cflags & ~CQE_F_MASK; sr->len = kmsg->msg.msg_inq; sr->done_io += this_ret; - sr->retry = true; + sr->retry_flags |= IO_SR_MSG_RETRY; return false; } } else { @@ -1077,6 +1082,14 @@ static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg if (unlikely(ret < 0)) return ret; + if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->vec.iovec) { + kmsg->vec.nr = ret; + kmsg->vec.iovec = arg.iovs; + req->flags |= REQ_F_NEED_CLEANUP; + } + if (arg.partial_map) + sr->retry_flags |= IO_SR_MSG_PARTIAL_MAP; + /* special case 1 vec, can be a fast path */ if (ret == 1) { sr->buf = arg.iovs[0].iov_base; @@ -1085,11 +1098,6 @@ static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg } iov_iter_init(&kmsg->msg.msg_iter, ITER_DEST, arg.iovs, ret, arg.out_len); - if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->vec.iovec) { - kmsg->vec.nr = ret; - kmsg->vec.iovec = arg.iovs; - req->flags |= REQ_F_NEED_CLEANUP; - } } else { void __user *buf; @@ -1275,7 +1283,7 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) int ret; zc->done_io = 0; - zc->retry = false; + zc->retry_flags = 0; if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3))) return -EINVAL; diff --git a/io_uring/opdef.c b/io_uring/opdef.c index 6e0882b051f9..6de6229207a8 100644 --- a/io_uring/opdef.c +++ b/io_uring/opdef.c @@ -216,6 +216,7 @@ const struct io_issue_def io_issue_defs[] = { }, [IORING_OP_FALLOCATE] = { .needs_file = 1, + .hash_reg_file = 1, .prep = io_fallocate_prep, .issue = io_fallocate, }, diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index d724602697e7..f2b31fb68992 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -112,8 +112,11 @@ static void io_release_ubuf(void *priv) struct io_mapped_ubuf *imu = priv; unsigned int i; - for (i = 0; i < imu->nr_bvecs; i++) - unpin_user_page(imu->bvec[i].bv_page); + for (i = 0; i < imu->nr_bvecs; i++) { + struct folio *folio = page_folio(imu->bvec[i].bv_page); + + unpin_user_folio(folio, 1); + } } static struct io_mapped_ubuf *io_alloc_imu(struct io_ring_ctx *ctx, @@ -731,6 +734,7 @@ bool io_check_coalesce_buffer(struct page **page_array, int nr_pages, data->nr_pages_mid = folio_nr_pages(folio); data->folio_shift = folio_shift(folio); + data->first_folio_page_idx = folio_page_idx(folio, page_array[0]); /* * Check if pages are contiguous inside a folio, and all folios have @@ -824,7 +828,11 @@ static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx, if (coalesced) imu->folio_shift = data.folio_shift; refcount_set(&imu->refs, 1); - off = (unsigned long) iov->iov_base & ((1UL << imu->folio_shift) - 1); + + off = (unsigned long)iov->iov_base & ~PAGE_MASK; + if (coalesced) + off += data.first_folio_page_idx << PAGE_SHIFT; + node->buf = imu; ret = 0; @@ -840,8 +848,10 @@ done: if (ret) { if (imu) io_free_imu(ctx, imu); - if (pages) - unpin_user_pages(pages, nr_pages); + if (pages) { + for (i = 0; i < nr_pages; i++) + unpin_user_folio(page_folio(pages[i]), 1); + } io_cache_free(&ctx->node_cache, node); node = ERR_PTR(ret); } @@ -1329,7 +1339,6 @@ static int io_vec_fill_bvec(int ddir, struct iov_iter *iter, { unsigned long folio_size = 1 << imu->folio_shift; unsigned long folio_mask = folio_size - 1; - u64 folio_addr = imu->ubuf & ~folio_mask; struct bio_vec *res_bvec = vec->bvec; size_t total_len = 0; unsigned bvec_idx = 0; @@ -1351,8 +1360,13 @@ static int io_vec_fill_bvec(int ddir, struct iov_iter *iter, if (unlikely(check_add_overflow(total_len, iov_len, &total_len))) return -EOVERFLOW; - /* by using folio address it also accounts for bvec offset */ - offset = buf_addr - folio_addr; + offset = buf_addr - imu->ubuf; + /* + * Only the first bvec can have non zero bv_offset, account it + * here and work with full folios below. + */ + offset += imu->bvec[0].bv_offset; + src_bvec = imu->bvec + (offset >> imu->folio_shift); offset &= folio_mask; diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h index 0d2138f16322..25e7e998dcfd 100644 --- a/io_uring/rsrc.h +++ b/io_uring/rsrc.h @@ -49,6 +49,7 @@ struct io_imu_folio_data { unsigned int nr_pages_mid; unsigned int folio_shift; unsigned int nr_folios; + unsigned long first_folio_page_idx; }; bool io_rsrc_cache_init(struct io_ring_ctx *ctx); diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c index 797247a34cb7..00d0064b22a5 100644 --- a/io_uring/zcrx.c +++ b/io_uring/zcrx.c @@ -106,8 +106,10 @@ static int io_import_dmabuf(struct io_zcrx_ifq *ifq, for_each_sgtable_dma_sg(mem->sgt, sg, i) total_size += sg_dma_len(sg); - if (total_size < off + len) - return -EINVAL; + if (total_size < off + len) { + ret = -EINVAL; + goto err; + } mem->dmabuf_offset = off; mem->size = len; @@ -861,10 +863,7 @@ static int io_pp_zc_init(struct page_pool *pp) static void io_pp_zc_destroy(struct page_pool *pp) { struct io_zcrx_ifq *ifq = io_pp_to_ifq(pp); - struct io_zcrx_area *area = ifq->area; - if (WARN_ON_ONCE(area->free_count != area->nia.num_niovs)) - return; percpu_ref_put(&ifq->ctx->refs); } diff --git a/kernel/Kconfig.kexec b/kernel/Kconfig.kexec index e64ce21f9a80..2ee603a98813 100644 --- a/kernel/Kconfig.kexec +++ b/kernel/Kconfig.kexec @@ -134,6 +134,7 @@ config CRASH_DM_CRYPT depends on KEXEC_FILE depends on CRASH_DUMP depends on DM_CRYPT + depends on KEYS help With this option enabled, user space can intereact with /sys/kernel/config/crash_dm_crypt_keys to make the dm crypt keys diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index f2f38903b2fe..b0eae2a3c895 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -668,12 +668,6 @@ int audit_remove_tree_rule(struct audit_krule *rule) return 0; } -static int compare_root(struct vfsmount *mnt, void *arg) -{ - return inode_to_key(d_backing_inode(mnt->mnt_root)) == - (unsigned long)arg; -} - void audit_trim_trees(void) { struct list_head cursor; @@ -683,8 +677,9 @@ void audit_trim_trees(void) while (cursor.next != &tree_list) { struct audit_tree *tree; struct path path; - struct vfsmount *root_mnt; struct audit_node *node; + struct path *paths; + struct path array[16]; int err; tree = container_of(cursor.next, struct audit_tree, list); @@ -696,9 +691,9 @@ void audit_trim_trees(void) if (err) goto skip_it; - root_mnt = collect_mounts(&path); + paths = collect_paths(&path, array, 16); path_put(&path); - if (IS_ERR(root_mnt)) + if (IS_ERR(paths)) goto skip_it; spin_lock(&hash_lock); @@ -706,14 +701,17 @@ void audit_trim_trees(void) struct audit_chunk *chunk = find_chunk(node); /* this could be NULL if the watch is dying else where... */ node->index |= 1U<<31; - if (iterate_mounts(compare_root, - (void *)(chunk->key), - root_mnt)) - node->index &= ~(1U<<31); + for (struct path *p = paths; p->dentry; p++) { + struct inode *inode = p->dentry->d_inode; + if (inode_to_key(inode) == chunk->key) { + node->index &= ~(1U<<31); + break; + } + } } spin_unlock(&hash_lock); trim_marked(tree); - drop_collected_mounts(root_mnt); + drop_collected_paths(paths, array); skip_it: put_tree(tree); mutex_lock(&audit_filter_mutex); @@ -742,9 +740,14 @@ void audit_put_tree(struct audit_tree *tree) put_tree(tree); } -static int tag_mount(struct vfsmount *mnt, void *arg) +static int tag_mounts(struct path *paths, struct audit_tree *tree) { - return tag_chunk(d_backing_inode(mnt->mnt_root), arg); + for (struct path *p = paths; p->dentry; p++) { + int err = tag_chunk(p->dentry->d_inode, tree); + if (err) + return err; + } + return 0; } /* @@ -801,7 +804,8 @@ int audit_add_tree_rule(struct audit_krule *rule) { struct audit_tree *seed = rule->tree, *tree; struct path path; - struct vfsmount *mnt; + struct path array[16]; + struct path *paths; int err; rule->tree = NULL; @@ -828,16 +832,16 @@ int audit_add_tree_rule(struct audit_krule *rule) err = kern_path(tree->pathname, 0, &path); if (err) goto Err; - mnt = collect_mounts(&path); + paths = collect_paths(&path, array, 16); path_put(&path); - if (IS_ERR(mnt)) { - err = PTR_ERR(mnt); + if (IS_ERR(paths)) { + err = PTR_ERR(paths); goto Err; } get_tree(tree); - err = iterate_mounts(tag_mount, tree, mnt); - drop_collected_mounts(mnt); + err = tag_mounts(paths, tree); + drop_collected_paths(paths, array); if (!err) { struct audit_node *node; @@ -872,20 +876,21 @@ int audit_tag_tree(char *old, char *new) struct list_head cursor, barrier; int failed = 0; struct path path1, path2; - struct vfsmount *tagged; + struct path array[16]; + struct path *paths; int err; err = kern_path(new, 0, &path2); if (err) return err; - tagged = collect_mounts(&path2); + paths = collect_paths(&path2, array, 16); path_put(&path2); - if (IS_ERR(tagged)) - return PTR_ERR(tagged); + if (IS_ERR(paths)) + return PTR_ERR(paths); err = kern_path(old, 0, &path1); if (err) { - drop_collected_mounts(tagged); + drop_collected_paths(paths, array); return err; } @@ -914,7 +919,7 @@ int audit_tag_tree(char *old, char *new) continue; } - failed = iterate_mounts(tag_mount, tree, tagged); + failed = tag_mounts(paths, tree); if (failed) { put_tree(tree); mutex_lock(&audit_filter_mutex); @@ -955,7 +960,7 @@ int audit_tag_tree(char *old, char *new) list_del(&cursor); mutex_unlock(&audit_filter_mutex); path_put(&path1); - drop_collected_mounts(tagged); + drop_collected_paths(paths, array); return failed; } diff --git a/kernel/bpf/bpf_lru_list.c b/kernel/bpf/bpf_lru_list.c index 3dabdd137d10..2d6e1c98d8ad 100644 --- a/kernel/bpf/bpf_lru_list.c +++ b/kernel/bpf/bpf_lru_list.c @@ -337,12 +337,12 @@ static void bpf_lru_list_pop_free_to_local(struct bpf_lru *lru, list) { __bpf_lru_node_move_to_free(l, node, local_free_list(loc_l), BPF_LRU_LOCAL_LIST_T_FREE); - if (++nfree == LOCAL_FREE_TARGET) + if (++nfree == lru->target_free) break; } - if (nfree < LOCAL_FREE_TARGET) - __bpf_lru_list_shrink(lru, l, LOCAL_FREE_TARGET - nfree, + if (nfree < lru->target_free) + __bpf_lru_list_shrink(lru, l, lru->target_free - nfree, local_free_list(loc_l), BPF_LRU_LOCAL_LIST_T_FREE); @@ -577,6 +577,9 @@ static void bpf_common_lru_populate(struct bpf_lru *lru, void *buf, list_add(&node->list, &l->lists[BPF_LRU_LIST_T_FREE]); buf += elem_size; } + + lru->target_free = clamp((nr_elems / num_possible_cpus()) / 2, + 1, LOCAL_FREE_TARGET); } static void bpf_percpu_lru_populate(struct bpf_lru *lru, void *buf, diff --git a/kernel/bpf/bpf_lru_list.h b/kernel/bpf/bpf_lru_list.h index cbd8d3720c2b..fe2661a58ea9 100644 --- a/kernel/bpf/bpf_lru_list.h +++ b/kernel/bpf/bpf_lru_list.h @@ -58,6 +58,7 @@ struct bpf_lru { del_from_htab_func del_from_htab; void *del_arg; unsigned int hash_offset; + unsigned int target_free; unsigned int nr_scans; bool percpu; }; diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 9122c39870bf..f4885514f007 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -2134,7 +2134,7 @@ static const struct bpf_func_proto bpf_sysctl_get_name_proto = { .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, - .arg2_type = ARG_PTR_TO_MEM, + .arg2_type = ARG_PTR_TO_MEM | MEM_WRITE, .arg3_type = ARG_CONST_SIZE, .arg4_type = ARG_ANYTHING, }; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index a7d6e0c5928b..169845710c7e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -7027,8 +7027,7 @@ BTF_TYPE_SAFE_TRUSTED(struct file) { struct inode *f_inode; }; -BTF_TYPE_SAFE_TRUSTED(struct dentry) { - /* no negative dentry-s in places where bpf can see it */ +BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct dentry) { struct inode *d_inode; }; @@ -7066,7 +7065,6 @@ static bool type_is_trusted(struct bpf_verifier_env *env, BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task)); BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct linux_binprm)); BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct file)); - BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct dentry)); return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_trusted"); } @@ -7076,6 +7074,7 @@ static bool type_is_trusted_or_null(struct bpf_verifier_env *env, const char *field_name, u32 btf_id) { BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct socket)); + BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct dentry)); return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_trusted_or_null"); diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c index 8df0dfaaca18..67af8a55185d 100644 --- a/kernel/dma/contiguous.c +++ b/kernel/dma/contiguous.c @@ -222,7 +222,10 @@ void __init dma_contiguous_reserve(phys_addr_t limit) if (size_cmdline != -1) { selected_size = size_cmdline; selected_base = base_cmdline; - selected_limit = min_not_zero(limit_cmdline, limit); + + /* Hornor the user setup dma address limit */ + selected_limit = limit_cmdline ?: limit; + if (base_cmdline + size_cmdline == limit_cmdline) fixed = true; } else { diff --git a/kernel/events/core.c b/kernel/events/core.c index 1f746469fda5..22fdf0c187cd 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -951,8 +951,6 @@ static void perf_cgroup_switch(struct task_struct *task) if (READ_ONCE(cpuctx->cgrp) == NULL) return; - WARN_ON_ONCE(cpuctx->ctx.nr_cgroups == 0); - cgrp = perf_cgroup_from_task(task, NULL); if (READ_ONCE(cpuctx->cgrp) == cgrp) return; @@ -964,6 +962,8 @@ static void perf_cgroup_switch(struct task_struct *task) if (READ_ONCE(cpuctx->cgrp) == NULL) return; + WARN_ON_ONCE(cpuctx->ctx.nr_cgroups == 0); + perf_ctx_disable(&cpuctx->ctx, true); ctx_sched_out(&cpuctx->ctx, NULL, EVENT_ALL|EVENT_CGROUP); @@ -7204,18 +7204,18 @@ void perf_event_wakeup(struct perf_event *event) static void perf_sigtrap(struct perf_event *event) { /* - * We'd expect this to only occur if the irq_work is delayed and either - * ctx->task or current has changed in the meantime. This can be the - * case on architectures that do not implement arch_irq_work_raise(). + * Both perf_pending_task() and perf_pending_irq() can race with the + * task exiting. */ - if (WARN_ON_ONCE(event->ctx->task != current)) + if (current->flags & PF_EXITING) return; /* - * Both perf_pending_task() and perf_pending_irq() can race with the - * task exiting. + * We'd expect this to only occur if the irq_work is delayed and either + * ctx->task or current has changed in the meantime. This can be the + * case on architectures that do not implement arch_irq_work_raise(). */ - if (current->flags & PF_EXITING) + if (WARN_ON_ONCE(event->ctx->task != current)) return; send_sig_perf((void __user *)event->pending_addr, @@ -7251,15 +7251,15 @@ static void __perf_pending_disable(struct perf_event *event) * CPU-A CPU-B * * perf_event_disable_inatomic() - * @pending_disable = CPU-A; + * @pending_disable = 1; * irq_work_queue(); * * sched-out - * @pending_disable = -1; + * @pending_disable = 0; * * sched-in * perf_event_disable_inatomic() - * @pending_disable = CPU-B; + * @pending_disable = 1; * irq_work_queue(); // FAILS * * irq_work_run() @@ -11116,7 +11116,7 @@ static int perf_uprobe_event_init(struct perf_event *event) if (event->attr.type != perf_uprobe.type) return -ENOENT; - if (!perfmon_capable()) + if (!capable(CAP_SYS_ADMIN)) return -EACCES; /* diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index d2aef87c7e9f..aa9a759e824f 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -441,7 +441,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle, * store that will be enabled on successful return */ if (!handle->size) { /* A, matches D */ - event->pending_disable = smp_processor_id(); + perf_event_disable_inatomic(handle->event); perf_output_wakeup(handle); WRITE_ONCE(rb->aux_nest, 0); goto err_put; @@ -526,7 +526,7 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size) if (wakeup) { if (handle->aux_flags & PERF_AUX_FLAG_TRUNCATED) - handle->event->pending_disable = smp_processor_id(); + perf_event_disable_inatomic(handle->event); perf_output_wakeup(handle); } diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index 9c59fa480b0b..3a9a9f240dbc 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -1136,6 +1136,7 @@ int kernel_kexec(void) Resume_devices: dpm_resume_end(PMSG_RESTORE); Resume_console: + pm_restore_gfp_mask(); console_resume_all(); thaw_processes(); Restore_console: diff --git a/kernel/kexec_handover.c b/kernel/kexec_handover.c index 69b953551677..5a21dbe17950 100644 --- a/kernel/kexec_handover.c +++ b/kernel/kexec_handover.c @@ -164,11 +164,21 @@ static int __kho_preserve_order(struct kho_mem_track *track, unsigned long pfn, } /* almost as free_reserved_page(), just don't free the page */ -static void kho_restore_page(struct page *page) +static void kho_restore_page(struct page *page, unsigned int order) { - ClearPageReserved(page); - init_page_count(page); - adjust_managed_page_count(page, 1); + unsigned int nr_pages = (1 << order); + + /* Head page gets refcount of 1. */ + set_page_count(page, 1); + + /* For higher order folios, tail pages get a page count of zero. */ + for (unsigned int i = 1; i < nr_pages; i++) + set_page_count(page + i, 0); + + if (order > 0) + prep_compound_page(page, order); + + adjust_managed_page_count(page, nr_pages); } /** @@ -186,15 +196,10 @@ struct folio *kho_restore_folio(phys_addr_t phys) return NULL; order = page->private; - if (order) { - if (order > MAX_PAGE_ORDER) - return NULL; - - prep_compound_page(page, order); - } else { - kho_restore_page(page); - } + if (order > MAX_PAGE_ORDER) + return NULL; + kho_restore_page(page, order); return page_folio(page); } EXPORT_SYMBOL_GPL(kho_restore_folio); diff --git a/kernel/module/main.c b/kernel/module/main.c index 413ac6ea3702..c2c08007029d 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -1573,8 +1573,14 @@ static int apply_relocations(struct module *mod, const struct load_info *info) if (infosec >= info->hdr->e_shnum) continue; - /* Don't bother with non-allocated sections */ - if (!(info->sechdrs[infosec].sh_flags & SHF_ALLOC)) + /* + * Don't bother with non-allocated sections. + * An exception is the percpu section, which has separate allocations + * for individual CPUs. We relocate the percpu section in the initial + * ELF template and subsequently copy it to the per-CPU destinations. + */ + if (!(info->sechdrs[infosec].sh_flags & SHF_ALLOC) && + (!infosec || infosec != info->index.pcpu)) continue; if (info->sechdrs[i].sh_flags & SHF_RELA_LIVEPATCH) @@ -2696,9 +2702,8 @@ static int find_module_sections(struct module *mod, struct load_info *info) static int move_module(struct module *mod, struct load_info *info) { - int i; - enum mod_mem_type t = 0; - int ret = -ENOMEM; + int i, ret; + enum mod_mem_type t = MOD_MEM_NUM_TYPES; bool codetag_section_found = false; for_each_mod_mem_type(type) { @@ -2776,7 +2781,7 @@ static int move_module(struct module *mod, struct load_info *info) return 0; out_err: module_memory_restore_rox(mod); - for (t--; t >= 0; t--) + while (t--) module_memory_free(mod, t); if (codetag_section_found) codetag_free_module_sections(mod); diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 519fb09de5e0..9216e3b91d3b 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -423,7 +423,6 @@ int hibernation_snapshot(int platform_mode) } console_suspend_all(); - pm_restrict_gfp_mask(); error = dpm_suspend(PMSG_FREEZE); @@ -559,7 +558,6 @@ int hibernation_restore(int platform_mode) pm_prepare_console(); console_suspend_all(); - pm_restrict_gfp_mask(); error = dpm_suspend_start(PMSG_QUIESCE); if (!error) { error = resume_target_kernel(platform_mode); @@ -571,7 +569,6 @@ int hibernation_restore(int platform_mode) BUG_ON(!error); } dpm_resume_end(PMSG_RECOVER); - pm_restore_gfp_mask(); console_resume_all(); pm_restore_console(); return error; diff --git a/kernel/power/power.h b/kernel/power/power.h index cb1d71562002..7ccd709af93f 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -239,11 +239,6 @@ static inline void suspend_test_finish(const char *label) {} /* kernel/power/main.c */ extern int pm_notifier_call_chain_robust(unsigned long val_up, unsigned long val_down); extern int pm_notifier_call_chain(unsigned long val); -void pm_restrict_gfp_mask(void); -void pm_restore_gfp_mask(void); -#else -static inline void pm_restrict_gfp_mask(void) {} -static inline void pm_restore_gfp_mask(void) {} #endif #ifdef CONFIG_HIGHMEM diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 76b141b9aac0..bb608b68fb30 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -540,6 +540,7 @@ int suspend_devices_and_enter(suspend_state_t state) return error; Recover_platform: + pm_restore_gfp_mask(); platform_recover(state); goto Resume_devices; } @@ -606,9 +607,7 @@ static int enter_state(suspend_state_t state) trace_suspend_resume(TPS("suspend_enter"), state, false); pm_pr_dbg("Suspending system (%s)\n", mem_sleep_labels[state]); - pm_restrict_gfp_mask(); error = suspend_devices_and_enter(state); - pm_restore_gfp_mask(); Finish: events_check_enabled = false; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 8988d38d46a3..81c6df746df1 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3362,10 +3362,6 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) #ifdef CONFIG_NUMA_BALANCING static void __migrate_swap_task(struct task_struct *p, int cpu) { - __schedstat_inc(p->stats.numa_task_swapped); - count_vm_numa_event(NUMA_TASK_SWAP); - count_memcg_event_mm(p->mm, NUMA_TASK_SWAP); - if (task_on_rq_queued(p)) { struct rq *src_rq, *dst_rq; struct rq_flags srf, drf; @@ -3943,6 +3939,11 @@ static inline bool ttwu_queue_cond(struct task_struct *p, int cpu) if (!scx_allow_ttwu_queue(p)) return false; +#ifdef CONFIG_SMP + if (p->sched_class == &stop_sched_class) + return false; +#endif + /* * Do not complicate things with the async wake_list while the CPU is * in hotplug state. @@ -7663,7 +7664,7 @@ const char *preempt_model_str(void) if (IS_ENABLED(CONFIG_PREEMPT_DYNAMIC)) { seq_buf_printf(&s, "(%s)%s", - preempt_dynamic_mode > 0 ? + preempt_dynamic_mode >= 0 ? preempt_modes[preempt_dynamic_mode] : "undef", brace ? "}" : ""); return seq_buf_str(&s); @@ -7934,9 +7935,8 @@ int migrate_task_to(struct task_struct *p, int target_cpu) if (!cpumask_test_cpu(target_cpu, p->cpus_ptr)) return -EINVAL; - __schedstat_inc(p->stats.numa_task_migrated); - count_vm_numa_event(NUMA_TASK_MIGRATE); - count_memcg_event_mm(p->mm, NUMA_TASK_MIGRATE); + /* TODO: This is not properly updating schedstats */ + trace_sched_move_numa(p, curr_cpu, target_cpu); return stop_one_cpu(curr_cpu, migration_cpu_stop, &arg); } diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index ad45a8fea245..89019a140826 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1504,7 +1504,9 @@ static void update_curr_dl_se(struct rq *rq, struct sched_dl_entity *dl_se, s64 if (dl_entity_is_special(dl_se)) return; - scaled_delta_exec = dl_scaled_delta_exec(rq, dl_se, delta_exec); + scaled_delta_exec = delta_exec; + if (!dl_server(dl_se)) + scaled_delta_exec = dl_scaled_delta_exec(rq, dl_se, delta_exec); dl_se->runtime -= scaled_delta_exec; @@ -1611,7 +1613,7 @@ throttle: */ void dl_server_update_idle_time(struct rq *rq, struct task_struct *p) { - s64 delta_exec, scaled_delta_exec; + s64 delta_exec; if (!rq->fair_server.dl_defer) return; @@ -1624,9 +1626,7 @@ void dl_server_update_idle_time(struct rq *rq, struct task_struct *p) if (delta_exec < 0) return; - scaled_delta_exec = dl_scaled_delta_exec(rq, &rq->fair_server, delta_exec); - - rq->fair_server.runtime -= scaled_delta_exec; + rq->fair_server.runtime -= delta_exec; if (rq->fair_server.runtime < 0) { rq->fair_server.dl_defer_running = 0; diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 9d71baf08075..557246880a7e 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -1210,10 +1210,6 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns, P_SCHEDSTAT(nr_failed_migrations_running); P_SCHEDSTAT(nr_failed_migrations_hot); P_SCHEDSTAT(nr_forced_migrations); -#ifdef CONFIG_NUMA_BALANCING - P_SCHEDSTAT(numa_task_migrated); - P_SCHEDSTAT(numa_task_swapped); -#endif P_SCHEDSTAT(nr_wakeups); P_SCHEDSTAT(nr_wakeups_sync); P_SCHEDSTAT(nr_wakeups_migrate); diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 5d2d0562115b..3fe6b0c99f3d 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -82,18 +82,15 @@ static void cpu_stop_signal_done(struct cpu_stop_done *done) } static void __cpu_stop_queue_work(struct cpu_stopper *stopper, - struct cpu_stop_work *work, - struct wake_q_head *wakeq) + struct cpu_stop_work *work) { list_add_tail(&work->list, &stopper->works); - wake_q_add(wakeq, stopper->thread); } /* queue @work to @stopper. if offline, @work is completed immediately */ static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work) { struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); - DEFINE_WAKE_Q(wakeq); unsigned long flags; bool enabled; @@ -101,12 +98,13 @@ static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work) raw_spin_lock_irqsave(&stopper->lock, flags); enabled = stopper->enabled; if (enabled) - __cpu_stop_queue_work(stopper, work, &wakeq); + __cpu_stop_queue_work(stopper, work); else if (work->done) cpu_stop_signal_done(work->done); raw_spin_unlock_irqrestore(&stopper->lock, flags); - wake_up_q(&wakeq); + if (enabled) + wake_up_process(stopper->thread); preempt_enable(); return enabled; @@ -264,7 +262,6 @@ static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1, { struct cpu_stopper *stopper1 = per_cpu_ptr(&cpu_stopper, cpu1); struct cpu_stopper *stopper2 = per_cpu_ptr(&cpu_stopper, cpu2); - DEFINE_WAKE_Q(wakeq); int err; retry: @@ -300,8 +297,8 @@ retry: } err = 0; - __cpu_stop_queue_work(stopper1, work1, &wakeq); - __cpu_stop_queue_work(stopper2, work2, &wakeq); + __cpu_stop_queue_work(stopper1, work1); + __cpu_stop_queue_work(stopper2, work2); unlock: raw_spin_unlock(&stopper2->lock); @@ -316,7 +313,10 @@ unlock: goto retry; } - wake_up_q(&wakeq); + if (!err) { + wake_up_process(stopper1->thread); + wake_up_process(stopper2->thread); + } preempt_enable(); return err; diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 08141f105c95..3885aadc434d 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -1436,13 +1436,6 @@ static void filter_free_subsystem_filters(struct trace_subsystem_dir *dir, INIT_LIST_HEAD(&head->list); - item = kmalloc(sizeof(*item), GFP_KERNEL); - if (!item) - goto free_now; - - item->filter = filter; - list_add_tail(&item->list, &head->list); - list_for_each_entry(file, &tr->events, list) { if (file->system != dir) continue; @@ -1454,6 +1447,13 @@ static void filter_free_subsystem_filters(struct trace_subsystem_dir *dir, event_clear_filter(file); } + item = kmalloc(sizeof(*item), GFP_KERNEL); + if (!item) + goto free_now; + + item->filter = filter; + list_add_tail(&item->list, &head->list); + delay_free_filter(head); return; free_now: diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c index d48b80f3f007..0142bc916f73 100644 --- a/lib/alloc_tag.c +++ b/lib/alloc_tag.c @@ -10,6 +10,7 @@ #include <linux/seq_buf.h> #include <linux/seq_file.h> #include <linux/vmalloc.h> +#include <linux/kmemleak.h> #define ALLOCINFO_FILE_NAME "allocinfo" #define MODULE_ALLOC_TAG_VMAP_SIZE (100000UL * sizeof(struct alloc_tag)) @@ -134,6 +135,9 @@ size_t alloc_tag_top_users(struct codetag_bytes *tags, size_t count, bool can_sl struct codetag_bytes n; unsigned int i, nr = 0; + if (IS_ERR_OR_NULL(alloc_tag_cttype)) + return 0; + if (can_sleep) codetag_lock_module_list(alloc_tag_cttype, true); else if (!codetag_trylock_module_list(alloc_tag_cttype)) @@ -632,8 +636,13 @@ static int load_module(struct module *mod, struct codetag *start, struct codetag mod->name); return -ENOMEM; } - } + /* + * Avoid a kmemleak false positive. The pointer to the counters is stored + * in the alloc_tag section of the module and cannot be directly accessed. + */ + kmemleak_ignore_percpu(tag->counters); + } return 0; } diff --git a/lib/group_cpus.c b/lib/group_cpus.c index ee272c4cefcc..18d43a406114 100644 --- a/lib/group_cpus.c +++ b/lib/group_cpus.c @@ -352,6 +352,9 @@ struct cpumask *group_cpus_evenly(unsigned int numgrps) int ret = -ENOMEM; struct cpumask *masks = NULL; + if (numgrps == 0) + return NULL; + if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL)) return NULL; @@ -426,8 +429,12 @@ struct cpumask *group_cpus_evenly(unsigned int numgrps) #else /* CONFIG_SMP */ struct cpumask *group_cpus_evenly(unsigned int numgrps) { - struct cpumask *masks = kcalloc(numgrps, sizeof(*masks), GFP_KERNEL); + struct cpumask *masks; + if (numgrps == 0) + return NULL; + + masks = kcalloc(numgrps, sizeof(*masks), GFP_KERNEL); if (!masks) return NULL; diff --git a/lib/maple_tree.c b/lib/maple_tree.c index affe979bd14d..ef66be963798 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -5319,6 +5319,7 @@ static void mt_destroy_walk(struct maple_enode *enode, struct maple_tree *mt, struct maple_enode *start; if (mte_is_leaf(enode)) { + mte_set_node_dead(enode); node->type = mte_node_type(enode); goto free_leaf; } @@ -5527,8 +5528,9 @@ int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp) mas->store_type = mas_wr_store_type(&wr_mas); request = mas_prealloc_calc(&wr_mas, entry); if (!request) - return ret; + goto set_flag; + mas->mas_flags &= ~MA_STATE_PREALLOC; mas_node_count_gfp(mas, request, gfp); if (mas_is_err(mas)) { mas_set_alloc_req(mas, 0); @@ -5538,6 +5540,7 @@ int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp) return ret; } +set_flag: mas->mas_flags |= MA_STATE_PREALLOC; return ret; } diff --git a/lib/raid6/rvv.c b/lib/raid6/rvv.c index f0887344b274..7d82efa5b14f 100644 --- a/lib/raid6/rvv.c +++ b/lib/raid6/rvv.c @@ -26,9 +26,9 @@ static int rvv_has_vector(void) static void raid6_rvv1_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs) { u8 **dptr = (u8 **)ptrs; - unsigned long d; - int z, z0; u8 *p, *q; + unsigned long vl, d; + int z, z0; z0 = disks - 3; /* Highest data disk */ p = dptr[z0 + 1]; /* XOR parity */ @@ -36,8 +36,9 @@ static void raid6_rvv1_gen_syndrome_real(int disks, unsigned long bytes, void ** asm volatile (".option push\n" ".option arch,+v\n" - "vsetvli t0, x0, e8, m1, ta, ma\n" + "vsetvli %0, x0, e8, m1, ta, ma\n" ".option pop\n" + : "=&r" (vl) ); /* v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 */ @@ -99,7 +100,7 @@ static void raid6_rvv1_xor_syndrome_real(int disks, int start, int stop, { u8 **dptr = (u8 **)ptrs; u8 *p, *q; - unsigned long d; + unsigned long vl, d; int z, z0; z0 = stop; /* P/Q right side optimization */ @@ -108,8 +109,9 @@ static void raid6_rvv1_xor_syndrome_real(int disks, int start, int stop, asm volatile (".option push\n" ".option arch,+v\n" - "vsetvli t0, x0, e8, m1, ta, ma\n" + "vsetvli %0, x0, e8, m1, ta, ma\n" ".option pop\n" + : "=&r" (vl) ); /* v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 */ @@ -195,9 +197,9 @@ static void raid6_rvv1_xor_syndrome_real(int disks, int start, int stop, static void raid6_rvv2_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs) { u8 **dptr = (u8 **)ptrs; - unsigned long d; - int z, z0; u8 *p, *q; + unsigned long vl, d; + int z, z0; z0 = disks - 3; /* Highest data disk */ p = dptr[z0 + 1]; /* XOR parity */ @@ -205,8 +207,9 @@ static void raid6_rvv2_gen_syndrome_real(int disks, unsigned long bytes, void ** asm volatile (".option push\n" ".option arch,+v\n" - "vsetvli t0, x0, e8, m1, ta, ma\n" + "vsetvli %0, x0, e8, m1, ta, ma\n" ".option pop\n" + : "=&r" (vl) ); /* @@ -287,7 +290,7 @@ static void raid6_rvv2_xor_syndrome_real(int disks, int start, int stop, { u8 **dptr = (u8 **)ptrs; u8 *p, *q; - unsigned long d; + unsigned long vl, d; int z, z0; z0 = stop; /* P/Q right side optimization */ @@ -296,8 +299,9 @@ static void raid6_rvv2_xor_syndrome_real(int disks, int start, int stop, asm volatile (".option push\n" ".option arch,+v\n" - "vsetvli t0, x0, e8, m1, ta, ma\n" + "vsetvli %0, x0, e8, m1, ta, ma\n" ".option pop\n" + : "=&r" (vl) ); /* @@ -413,9 +417,9 @@ static void raid6_rvv2_xor_syndrome_real(int disks, int start, int stop, static void raid6_rvv4_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs) { u8 **dptr = (u8 **)ptrs; - unsigned long d; - int z, z0; u8 *p, *q; + unsigned long vl, d; + int z, z0; z0 = disks - 3; /* Highest data disk */ p = dptr[z0 + 1]; /* XOR parity */ @@ -423,8 +427,9 @@ static void raid6_rvv4_gen_syndrome_real(int disks, unsigned long bytes, void ** asm volatile (".option push\n" ".option arch,+v\n" - "vsetvli t0, x0, e8, m1, ta, ma\n" + "vsetvli %0, x0, e8, m1, ta, ma\n" ".option pop\n" + : "=&r" (vl) ); /* @@ -539,7 +544,7 @@ static void raid6_rvv4_xor_syndrome_real(int disks, int start, int stop, { u8 **dptr = (u8 **)ptrs; u8 *p, *q; - unsigned long d; + unsigned long vl, d; int z, z0; z0 = stop; /* P/Q right side optimization */ @@ -548,8 +553,9 @@ static void raid6_rvv4_xor_syndrome_real(int disks, int start, int stop, asm volatile (".option push\n" ".option arch,+v\n" - "vsetvli t0, x0, e8, m1, ta, ma\n" + "vsetvli %0, x0, e8, m1, ta, ma\n" ".option pop\n" + : "=&r" (vl) ); /* @@ -721,9 +727,9 @@ static void raid6_rvv4_xor_syndrome_real(int disks, int start, int stop, static void raid6_rvv8_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs) { u8 **dptr = (u8 **)ptrs; - unsigned long d; - int z, z0; u8 *p, *q; + unsigned long vl, d; + int z, z0; z0 = disks - 3; /* Highest data disk */ p = dptr[z0 + 1]; /* XOR parity */ @@ -731,8 +737,9 @@ static void raid6_rvv8_gen_syndrome_real(int disks, unsigned long bytes, void ** asm volatile (".option push\n" ".option arch,+v\n" - "vsetvli t0, x0, e8, m1, ta, ma\n" + "vsetvli %0, x0, e8, m1, ta, ma\n" ".option pop\n" + : "=&r" (vl) ); /* @@ -915,7 +922,7 @@ static void raid6_rvv8_xor_syndrome_real(int disks, int start, int stop, { u8 **dptr = (u8 **)ptrs; u8 *p, *q; - unsigned long d; + unsigned long vl, d; int z, z0; z0 = stop; /* P/Q right side optimization */ @@ -924,8 +931,9 @@ static void raid6_rvv8_xor_syndrome_real(int disks, int start, int stop, asm volatile (".option push\n" ".option arch,+v\n" - "vsetvli t0, x0, e8, m1, ta, ma\n" + "vsetvli %0, x0, e8, m1, ta, ma\n" ".option pop\n" + : "=&r" (vl) ); /* diff --git a/lib/test_objagg.c b/lib/test_objagg.c index d34df4306b87..222b39fc2629 100644 --- a/lib/test_objagg.c +++ b/lib/test_objagg.c @@ -899,8 +899,10 @@ static int check_expect_hints_stats(struct objagg_hints *objagg_hints, int err; stats = objagg_hints_stats_get(objagg_hints); - if (IS_ERR(stats)) + if (IS_ERR(stats)) { + *errmsg = "objagg_hints_stats_get() failed."; return PTR_ERR(stats); + } err = __check_expect_stats(stats, expect_stats, errmsg); objagg_stats_put(stats); return err; diff --git a/mm/damon/core.c b/mm/damon/core.c index b217e0120e09..979b29e16ef4 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -1449,6 +1449,7 @@ static unsigned long damon_get_intervals_score(struct damon_ctx *c) } } target_access_events = max_access_events * goal_bp / 10000; + target_access_events = target_access_events ? : 1; return access_events * 10000 / target_access_events; } @@ -2355,9 +2356,8 @@ static void kdamond_usleep(unsigned long usecs) * * If there is a &struct damon_call_control request that registered via * &damon_call() on @ctx, do or cancel the invocation of the function depending - * on @cancel. @cancel is set when the kdamond is deactivated by DAMOS - * watermarks, or the kdamond is already out of the main loop and therefore - * will be terminated. + * on @cancel. @cancel is set when the kdamond is already out of the main loop + * and therefore will be terminated. */ static void kdamond_call(struct damon_ctx *ctx, bool cancel) { @@ -2405,7 +2405,7 @@ static int kdamond_wait_activation(struct damon_ctx *ctx) if (ctx->callback.after_wmarks_check && ctx->callback.after_wmarks_check(ctx)) break; - kdamond_call(ctx, true); + kdamond_call(ctx, false); damos_walk_cancel(ctx); } return -EBUSY; diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c index 0f6c9e1fec0b..30ae7518ffbf 100644 --- a/mm/damon/sysfs-schemes.c +++ b/mm/damon/sysfs-schemes.c @@ -472,6 +472,7 @@ static ssize_t memcg_path_store(struct kobject *kobj, return -ENOMEM; strscpy(path, buf, count + 1); + kfree(filter->memcg_path); filter->memcg_path = path; return count; } @@ -2303,13 +2303,13 @@ static void pofs_unpin(struct pages_or_folios *pofs) /* * Returns the number of collected folios. Return value is always >= 0. */ -static void collect_longterm_unpinnable_folios( +static unsigned long collect_longterm_unpinnable_folios( struct list_head *movable_folio_list, struct pages_or_folios *pofs) { + unsigned long i, collected = 0; struct folio *prev_folio = NULL; bool drain_allow = true; - unsigned long i; for (i = 0; i < pofs->nr_entries; i++) { struct folio *folio = pofs_get_folio(pofs, i); @@ -2321,6 +2321,8 @@ static void collect_longterm_unpinnable_folios( if (folio_is_longterm_pinnable(folio)) continue; + collected++; + if (folio_is_device_coherent(folio)) continue; @@ -2342,6 +2344,8 @@ static void collect_longterm_unpinnable_folios( NR_ISOLATED_ANON + folio_is_file_lru(folio), folio_nr_pages(folio)); } + + return collected; } /* @@ -2418,9 +2422,11 @@ static long check_and_migrate_movable_pages_or_folios(struct pages_or_folios *pofs) { LIST_HEAD(movable_folio_list); + unsigned long collected; - collect_longterm_unpinnable_folios(&movable_folio_list, pofs); - if (list_empty(&movable_folio_list)) + collected = collect_longterm_unpinnable_folios(&movable_folio_list, + pofs); + if (!collected) return 0; return migrate_longterm_unpinnable_folios(&movable_folio_list, pofs); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 8746ed2fec13..a0d285d20992 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2340,12 +2340,15 @@ struct folio *alloc_hugetlb_folio_reserve(struct hstate *h, int preferred_nid, struct folio *folio; spin_lock_irq(&hugetlb_lock); + if (!h->resv_huge_pages) { + spin_unlock_irq(&hugetlb_lock); + return NULL; + } + folio = dequeue_hugetlb_folio_nodemask(h, gfp_mask, preferred_nid, nmask); - if (folio) { - VM_BUG_ON(!h->resv_huge_pages); + if (folio) h->resv_huge_pages--; - } spin_unlock_irq(&hugetlb_lock); return folio; @@ -2787,20 +2790,24 @@ void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma, /* * alloc_and_dissolve_hugetlb_folio - Allocate a new folio and dissolve * the old one - * @h: struct hstate old page belongs to * @old_folio: Old folio to dissolve * @list: List to isolate the page in case we need to * Returns 0 on success, otherwise negated error. */ -static int alloc_and_dissolve_hugetlb_folio(struct hstate *h, - struct folio *old_folio, struct list_head *list) +static int alloc_and_dissolve_hugetlb_folio(struct folio *old_folio, + struct list_head *list) { - gfp_t gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE; + gfp_t gfp_mask; + struct hstate *h; int nid = folio_nid(old_folio); struct folio *new_folio = NULL; int ret = 0; retry: + /* + * The old_folio might have been dissolved from under our feet, so make sure + * to carefully check the state under the lock. + */ spin_lock_irq(&hugetlb_lock); if (!folio_test_hugetlb(old_folio)) { /* @@ -2829,8 +2836,10 @@ retry: cond_resched(); goto retry; } else { + h = folio_hstate(old_folio); if (!new_folio) { spin_unlock_irq(&hugetlb_lock); + gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE; new_folio = alloc_buddy_hugetlb_folio(h, gfp_mask, nid, NULL, NULL); if (!new_folio) @@ -2874,35 +2883,24 @@ free_new: int isolate_or_dissolve_huge_folio(struct folio *folio, struct list_head *list) { - struct hstate *h; int ret = -EBUSY; - /* - * The page might have been dissolved from under our feet, so make sure - * to carefully check the state under the lock. - * Return success when racing as if we dissolved the page ourselves. - */ - spin_lock_irq(&hugetlb_lock); - if (folio_test_hugetlb(folio)) { - h = folio_hstate(folio); - } else { - spin_unlock_irq(&hugetlb_lock); + /* Not to disrupt normal path by vainly holding hugetlb_lock */ + if (!folio_test_hugetlb(folio)) return 0; - } - spin_unlock_irq(&hugetlb_lock); /* * Fence off gigantic pages as there is a cyclic dependency between * alloc_contig_range and them. Return -ENOMEM as this has the effect * of bailing out right away without further retrying. */ - if (hstate_is_gigantic(h)) + if (folio_order(folio) > MAX_PAGE_ORDER) return -ENOMEM; if (folio_ref_count(folio) && folio_isolate_hugetlb(folio, list)) ret = 0; else if (!folio_ref_count(folio)) - ret = alloc_and_dissolve_hugetlb_folio(h, folio, list); + ret = alloc_and_dissolve_hugetlb_folio(folio, list); return ret; } @@ -2916,7 +2914,6 @@ int isolate_or_dissolve_huge_folio(struct folio *folio, struct list_head *list) */ int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn) { - struct hstate *h; struct folio *folio; int ret = 0; @@ -2925,23 +2922,9 @@ int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn) while (start_pfn < end_pfn) { folio = pfn_folio(start_pfn); - /* - * The folio might have been dissolved from under our feet, so make sure - * to carefully check the state under the lock. - */ - spin_lock_irq(&hugetlb_lock); - if (folio_test_hugetlb(folio)) { - h = folio_hstate(folio); - } else { - spin_unlock_irq(&hugetlb_lock); - start_pfn++; - continue; - } - spin_unlock_irq(&hugetlb_lock); - - if (!folio_ref_count(folio)) { - ret = alloc_and_dissolve_hugetlb_folio(h, folio, - &isolate_list); + /* Not to disrupt normal path by vainly holding hugetlb_lock */ + if (folio_test_hugetlb(folio) && !folio_ref_count(folio)) { + ret = alloc_and_dissolve_hugetlb_folio(folio, &isolate_list); if (ret) break; diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 8357e1a33699..b0877035491f 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -370,36 +370,6 @@ static inline bool init_task_stack_addr(const void *addr) sizeof(init_thread_union.stack)); } -/* - * This function is invoked with report_lock (a raw_spinlock) held. A - * PREEMPT_RT kernel cannot call find_vm_area() as it will acquire a sleeping - * rt_spinlock. - * - * For !RT kernel, the PROVE_RAW_LOCK_NESTING config option will print a - * lockdep warning for this raw_spinlock -> spinlock dependency. This config - * option is enabled by default to ensure better test coverage to expose this - * kind of RT kernel problem. This lockdep splat, however, can be suppressed - * by using DEFINE_WAIT_OVERRIDE_MAP() if it serves a useful purpose and the - * invalid PREEMPT_RT case has been taken care of. - */ -static inline struct vm_struct *kasan_find_vm_area(void *addr) -{ - static DEFINE_WAIT_OVERRIDE_MAP(vmalloc_map, LD_WAIT_SLEEP); - struct vm_struct *va; - - if (IS_ENABLED(CONFIG_PREEMPT_RT)) - return NULL; - - /* - * Suppress lockdep warning and fetch vmalloc area of the - * offending address. - */ - lock_map_acquire_try(&vmalloc_map); - va = find_vm_area(addr); - lock_map_release(&vmalloc_map); - return va; -} - static void print_address_description(void *addr, u8 tag, struct kasan_report_info *info) { @@ -429,19 +399,8 @@ static void print_address_description(void *addr, u8 tag, } if (is_vmalloc_addr(addr)) { - struct vm_struct *va = kasan_find_vm_area(addr); - - if (va) { - pr_err("The buggy address belongs to the virtual mapping at\n" - " [%px, %px) created by:\n" - " %pS\n", - va->addr, va->addr + va->size, va->caller); - pr_err("\n"); - - page = vmalloc_to_page(addr); - } else { - pr_err("The buggy address %px belongs to a vmalloc virtual mapping\n", addr); - } + pr_err("The buggy address %px belongs to a vmalloc virtual mapping\n", addr); + page = vmalloc_to_page(addr); } if (page) { diff --git a/mm/kmemleak.c b/mm/kmemleak.c index da9cee34ee1b..8d588e685311 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -1247,6 +1247,20 @@ void __ref kmemleak_transient_leak(const void *ptr) EXPORT_SYMBOL(kmemleak_transient_leak); /** + * kmemleak_ignore_percpu - similar to kmemleak_ignore but taking a percpu + * address argument + * @ptr: percpu address of the object + */ +void __ref kmemleak_ignore_percpu(const void __percpu *ptr) +{ + pr_debug("%s(0x%px)\n", __func__, ptr); + + if (kmemleak_enabled && ptr && !IS_ERR_PCPU(ptr)) + make_black_object((unsigned long)ptr, OBJECT_PERCPU); +} +EXPORT_SYMBOL_GPL(kmemleak_ignore_percpu); + +/** * kmemleak_ignore - ignore an allocated object * @ptr: pointer to beginning of the object * diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 902da8a9c643..70fdeda1120b 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -474,8 +474,6 @@ static const unsigned int memcg_vm_event_stat[] = { NUMA_PAGE_MIGRATE, NUMA_PTE_UPDATES, NUMA_HINT_FAULTS, - NUMA_TASK_MIGRATE, - NUMA_TASK_SWAP, #endif }; diff --git a/mm/memory.c b/mm/memory.c index 8eba595056fe..b0cda5aab398 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4315,26 +4315,6 @@ static struct folio *__alloc_swap_folio(struct vm_fault *vmf) } #ifdef CONFIG_TRANSPARENT_HUGEPAGE -static inline int non_swapcache_batch(swp_entry_t entry, int max_nr) -{ - struct swap_info_struct *si = swp_swap_info(entry); - pgoff_t offset = swp_offset(entry); - int i; - - /* - * While allocating a large folio and doing swap_read_folio, which is - * the case the being faulted pte doesn't have swapcache. We need to - * ensure all PTEs have no cache as well, otherwise, we might go to - * swap devices while the content is in swapcache. - */ - for (i = 0; i < max_nr; i++) { - if ((si->swap_map[offset + i] & SWAP_HAS_CACHE)) - return i; - } - - return i; -} - /* * Check if the PTEs within a range are contiguous swap entries * and have consistent swapcache, zeromap. diff --git a/mm/migrate.c b/mm/migrate.c index 8cf0f9c9599d..2c88f3b33833 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -2399,6 +2399,7 @@ set_status: static int get_compat_pages_array(const void __user *chunk_pages[], const void __user * __user *pages, + unsigned long chunk_offset, unsigned long chunk_nr) { compat_uptr_t __user *pages32 = (compat_uptr_t __user *)pages; @@ -2406,7 +2407,7 @@ static int get_compat_pages_array(const void __user *chunk_pages[], int i; for (i = 0; i < chunk_nr; i++) { - if (get_user(p, pages32 + i)) + if (get_user(p, pages32 + chunk_offset + i)) return -EFAULT; chunk_pages[i] = compat_ptr(p); } @@ -2425,27 +2426,28 @@ static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages, #define DO_PAGES_STAT_CHUNK_NR 16UL const void __user *chunk_pages[DO_PAGES_STAT_CHUNK_NR]; int chunk_status[DO_PAGES_STAT_CHUNK_NR]; + unsigned long chunk_offset = 0; while (nr_pages) { unsigned long chunk_nr = min(nr_pages, DO_PAGES_STAT_CHUNK_NR); if (in_compat_syscall()) { if (get_compat_pages_array(chunk_pages, pages, - chunk_nr)) + chunk_offset, chunk_nr)) break; } else { - if (copy_from_user(chunk_pages, pages, + if (copy_from_user(chunk_pages, pages + chunk_offset, chunk_nr * sizeof(*chunk_pages))) break; } do_pages_stat_array(mm, chunk_nr, chunk_pages, chunk_status); - if (copy_to_user(status, chunk_status, chunk_nr * sizeof(*status))) + if (copy_to_user(status + chunk_offset, chunk_status, + chunk_nr * sizeof(*status))) break; - pages += chunk_nr; - status += chunk_nr; + chunk_offset += chunk_nr; nr_pages -= chunk_nr; } return nr_pages ? -EFAULT : 0; diff --git a/mm/rmap.c b/mm/rmap.c index fb63d9256f09..1320b88fab74 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1845,23 +1845,32 @@ void folio_remove_rmap_pud(struct folio *folio, struct page *page, #endif } -/* We support batch unmapping of PTEs for lazyfree large folios */ -static inline bool can_batch_unmap_folio_ptes(unsigned long addr, - struct folio *folio, pte_t *ptep) +static inline unsigned int folio_unmap_pte_batch(struct folio *folio, + struct page_vma_mapped_walk *pvmw, + enum ttu_flags flags, pte_t pte) { const fpb_t fpb_flags = FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY; - int max_nr = folio_nr_pages(folio); - pte_t pte = ptep_get(ptep); + unsigned long end_addr, addr = pvmw->address; + struct vm_area_struct *vma = pvmw->vma; + unsigned int max_nr; + + if (flags & TTU_HWPOISON) + return 1; + if (!folio_test_large(folio)) + return 1; + /* We may only batch within a single VMA and a single page table. */ + end_addr = pmd_addr_end(addr, vma->vm_end); + max_nr = (end_addr - addr) >> PAGE_SHIFT; + + /* We only support lazyfree batching for now ... */ if (!folio_test_anon(folio) || folio_test_swapbacked(folio)) - return false; + return 1; if (pte_unused(pte)) - return false; - if (pte_pfn(pte) != folio_pfn(folio)) - return false; + return 1; - return folio_pte_batch(folio, addr, ptep, pte, max_nr, fpb_flags, NULL, - NULL, NULL) == max_nr; + return folio_pte_batch(folio, addr, pvmw->pte, pte, max_nr, fpb_flags, + NULL, NULL, NULL); } /* @@ -2024,9 +2033,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, if (pte_dirty(pteval)) folio_mark_dirty(folio); } else if (likely(pte_present(pteval))) { - if (folio_test_large(folio) && !(flags & TTU_HWPOISON) && - can_batch_unmap_folio_ptes(address, folio, pvmw.pte)) - nr_pages = folio_nr_pages(folio); + nr_pages = folio_unmap_pte_batch(folio, &pvmw, flags, pteval); end_addr = address + nr_pages * PAGE_SIZE; flush_cache_range(vma, address, end_addr); @@ -2206,13 +2213,16 @@ discard: hugetlb_remove_rmap(folio); } else { folio_remove_rmap_ptes(folio, subpage, nr_pages, vma); - folio_ref_sub(folio, nr_pages - 1); } if (vma->vm_flags & VM_LOCKED) mlock_drain_local(); - folio_put(folio); - /* We have already batched the entire folio */ - if (nr_pages > 1) + folio_put_refs(folio, nr_pages); + + /* + * If we are sure that we batched the entire folio and cleared + * all PTEs, we can just optimize and stop right here. + */ + if (nr_pages == folio_nr_pages(folio)) goto walk_done; continue; walk_abort: diff --git a/mm/secretmem.c b/mm/secretmem.c index 589b26c2d553..9a11a38a6770 100644 --- a/mm/secretmem.c +++ b/mm/secretmem.c @@ -195,18 +195,11 @@ static struct file *secretmem_file_create(unsigned long flags) struct file *file; struct inode *inode; const char *anon_name = "[secretmem]"; - int err; - inode = alloc_anon_inode(secretmem_mnt->mnt_sb); + inode = anon_inode_make_secure_inode(secretmem_mnt->mnt_sb, anon_name, NULL); if (IS_ERR(inode)) return ERR_CAST(inode); - err = security_inode_init_security_anon(inode, &QSTR(anon_name), NULL); - if (err) { - file = ERR_PTR(err); - goto err_free_inode; - } - file = alloc_file_pseudo(inode, secretmem_mnt, "secretmem", O_RDWR, &secretmem_fops); if (IS_ERR(file)) diff --git a/mm/shmem.c b/mm/shmem.c index 0c5fb4ffa03a..3a5a65b1f41a 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2259,6 +2259,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, folio = swap_cache_get_folio(swap, NULL, 0); order = xa_get_order(&mapping->i_pages, index); if (!folio) { + int nr_pages = 1 << order; bool fallback_order0 = false; /* Or update major stats only when swapin succeeds?? */ @@ -2272,9 +2273,12 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, * If uffd is active for the vma, we need per-page fault * fidelity to maintain the uffd semantics, then fallback * to swapin order-0 folio, as well as for zswap case. + * Any existing sub folio in the swap cache also blocks + * mTHP swapin. */ if (order > 0 && ((vma && unlikely(userfaultfd_armed(vma))) || - !zswap_never_enabled())) + !zswap_never_enabled() || + non_swapcache_batch(swap, nr_pages) != nr_pages)) fallback_order0 = true; /* Skip swapcache for synchronous device. */ diff --git a/mm/swap.h b/mm/swap.h index 2269eb9df0af..9096082a915e 100644 --- a/mm/swap.h +++ b/mm/swap.h @@ -106,6 +106,25 @@ static inline int swap_zeromap_batch(swp_entry_t entry, int max_nr, return find_next_bit(sis->zeromap, end, start) - start; } +static inline int non_swapcache_batch(swp_entry_t entry, int max_nr) +{ + struct swap_info_struct *si = swp_swap_info(entry); + pgoff_t offset = swp_offset(entry); + int i; + + /* + * While allocating a large folio and doing mTHP swapin, we need to + * ensure all entries are not cached, otherwise, the mTHP folio will + * be in conflict with the folio in swap cache. + */ + for (i = 0; i < max_nr; i++) { + if ((si->swap_map[offset + i] & SWAP_HAS_CACHE)) + return i; + } + + return i; +} + #else /* CONFIG_SWAP */ struct swap_iocb; static inline void swap_read_folio(struct folio *folio, struct swap_iocb **plug) @@ -199,6 +218,10 @@ static inline int swap_zeromap_batch(swp_entry_t entry, int max_nr, return 0; } +static inline int non_swapcache_batch(swp_entry_t entry, int max_nr) +{ + return 0; +} #endif /* CONFIG_SWAP */ /** diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index bc473ad21202..8253978ee0fb 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -1084,8 +1084,18 @@ static int move_swap_pte(struct mm_struct *mm, struct vm_area_struct *dst_vma, pte_t orig_dst_pte, pte_t orig_src_pte, pmd_t *dst_pmd, pmd_t dst_pmdval, spinlock_t *dst_ptl, spinlock_t *src_ptl, - struct folio *src_folio) + struct folio *src_folio, + struct swap_info_struct *si, swp_entry_t entry) { + /* + * Check if the folio still belongs to the target swap entry after + * acquiring the lock. Folio can be freed in the swap cache while + * not locked. + */ + if (src_folio && unlikely(!folio_test_swapcache(src_folio) || + entry.val != src_folio->swap.val)) + return -EAGAIN; + double_pt_lock(dst_ptl, src_ptl); if (!is_pte_pages_stable(dst_pte, src_pte, orig_dst_pte, orig_src_pte, @@ -1102,6 +1112,25 @@ static int move_swap_pte(struct mm_struct *mm, struct vm_area_struct *dst_vma, if (src_folio) { folio_move_anon_rmap(src_folio, dst_vma); src_folio->index = linear_page_index(dst_vma, dst_addr); + } else { + /* + * Check if the swap entry is cached after acquiring the src_pte + * lock. Otherwise, we might miss a newly loaded swap cache folio. + * + * Check swap_map directly to minimize overhead, READ_ONCE is sufficient. + * We are trying to catch newly added swap cache, the only possible case is + * when a folio is swapped in and out again staying in swap cache, using the + * same entry before the PTE check above. The PTL is acquired and released + * twice, each time after updating the swap_map's flag. So holding + * the PTL here ensures we see the updated value. False positive is possible, + * e.g. SWP_SYNCHRONOUS_IO swapin may set the flag without touching the + * cache, or during the tiny synchronization window between swap cache and + * swap_map, but it will be gone very quickly, worst result is retry jitters. + */ + if (READ_ONCE(si->swap_map[swp_offset(entry)]) & SWAP_HAS_CACHE) { + double_pt_unlock(dst_ptl, src_ptl); + return -EAGAIN; + } } orig_src_pte = ptep_get_and_clear(mm, src_addr, src_pte); @@ -1412,7 +1441,7 @@ retry: } err = move_swap_pte(mm, dst_vma, dst_addr, src_addr, dst_pte, src_pte, orig_dst_pte, orig_src_pte, dst_pmd, dst_pmdval, - dst_ptl, src_ptl, src_folio); + dst_ptl, src_ptl, src_folio, si, entry); } out: diff --git a/mm/vmalloc.c b/mm/vmalloc.c index ab986dd09b6a..6dbcdceecae1 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -514,6 +514,7 @@ static int vmap_pages_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, pgprot_t prot, struct page **pages, int *nr, pgtbl_mod_mask *mask) { + int err = 0; pte_t *pte; /* @@ -530,12 +531,18 @@ static int vmap_pages_pte_range(pmd_t *pmd, unsigned long addr, do { struct page *page = pages[*nr]; - if (WARN_ON(!pte_none(ptep_get(pte)))) - return -EBUSY; - if (WARN_ON(!page)) - return -ENOMEM; - if (WARN_ON(!pfn_valid(page_to_pfn(page)))) - return -EINVAL; + if (WARN_ON(!pte_none(ptep_get(pte)))) { + err = -EBUSY; + break; + } + if (WARN_ON(!page)) { + err = -ENOMEM; + break; + } + if (WARN_ON(!pfn_valid(page_to_pfn(page)))) { + err = -EINVAL; + break; + } set_pte_at(&init_mm, addr, pte, mk_pte(page, prot)); (*nr)++; @@ -543,7 +550,8 @@ static int vmap_pages_pte_range(pmd_t *pmd, unsigned long addr, arch_leave_lazy_mmu_mode(); *mask |= PGTBL_PTE_MODIFIED; - return 0; + + return err; } static int vmap_pages_pmd_range(pud_t *pud, unsigned long addr, diff --git a/mm/vmstat.c b/mm/vmstat.c index 429ae5339bfe..a78d70ddeacd 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1346,8 +1346,6 @@ const char * const vmstat_text[] = { "numa_hint_faults", "numa_hint_faults_local", "numa_pages_migrated", - "numa_task_migrated", - "numa_task_swapped", #endif #ifdef CONFIG_MIGRATION "pgmigrate_success", diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 73ea7e67f05a..30242fe10341 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -576,6 +576,7 @@ static int atrtr_create(struct rtentry *r, struct net_device *devhint) /* Fill in the routing entry */ rt->target = ta->sat_addr; + dev_put(rt->dev); /* Release old device */ dev_hold(devhint); rt->dev = devhint; rt->flags = r->rt_flags; diff --git a/net/atm/clip.c b/net/atm/clip.c index 61b5b700817d..f7a5565e794e 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -45,7 +45,8 @@ #include <net/atmclip.h> static struct net_device *clip_devs; -static struct atm_vcc *atmarpd; +static struct atm_vcc __rcu *atmarpd; +static DEFINE_MUTEX(atmarpd_lock); static struct timer_list idle_timer; static const struct neigh_ops clip_neigh_ops; @@ -53,24 +54,35 @@ static int to_atmarpd(enum atmarp_ctrl_type type, int itf, __be32 ip) { struct sock *sk; struct atmarp_ctrl *ctrl; + struct atm_vcc *vcc; struct sk_buff *skb; + int err = 0; pr_debug("(%d)\n", type); - if (!atmarpd) - return -EUNATCH; + + rcu_read_lock(); + vcc = rcu_dereference(atmarpd); + if (!vcc) { + err = -EUNATCH; + goto unlock; + } skb = alloc_skb(sizeof(struct atmarp_ctrl), GFP_ATOMIC); - if (!skb) - return -ENOMEM; + if (!skb) { + err = -ENOMEM; + goto unlock; + } ctrl = skb_put(skb, sizeof(struct atmarp_ctrl)); ctrl->type = type; ctrl->itf_num = itf; ctrl->ip = ip; - atm_force_charge(atmarpd, skb->truesize); + atm_force_charge(vcc, skb->truesize); - sk = sk_atm(atmarpd); + sk = sk_atm(vcc); skb_queue_tail(&sk->sk_receive_queue, skb); sk->sk_data_ready(sk); - return 0; +unlock: + rcu_read_unlock(); + return err; } static void link_vcc(struct clip_vcc *clip_vcc, struct atmarp_entry *entry) @@ -193,12 +205,6 @@ static void clip_push(struct atm_vcc *vcc, struct sk_buff *skb) pr_debug("\n"); - if (!clip_devs) { - atm_return(vcc, skb->truesize); - kfree_skb(skb); - return; - } - if (!skb) { pr_debug("removing VCC %p\n", clip_vcc); if (clip_vcc->entry) @@ -208,6 +214,11 @@ static void clip_push(struct atm_vcc *vcc, struct sk_buff *skb) return; } atm_return(vcc, skb->truesize); + if (!clip_devs) { + kfree_skb(skb); + return; + } + skb->dev = clip_vcc->entry ? clip_vcc->entry->neigh->dev : clip_devs; /* clip_vcc->entry == NULL if we don't have an IP address yet */ if (!skb->dev) { @@ -418,6 +429,8 @@ static int clip_mkip(struct atm_vcc *vcc, int timeout) if (!vcc->push) return -EBADFD; + if (vcc->user_back) + return -EINVAL; clip_vcc = kmalloc(sizeof(struct clip_vcc), GFP_KERNEL); if (!clip_vcc) return -ENOMEM; @@ -608,17 +621,27 @@ static void atmarpd_close(struct atm_vcc *vcc) { pr_debug("\n"); - rtnl_lock(); - atmarpd = NULL; + mutex_lock(&atmarpd_lock); + RCU_INIT_POINTER(atmarpd, NULL); + mutex_unlock(&atmarpd_lock); + + synchronize_rcu(); skb_queue_purge(&sk_atm(vcc)->sk_receive_queue); - rtnl_unlock(); pr_debug("(done)\n"); module_put(THIS_MODULE); } +static int atmarpd_send(struct atm_vcc *vcc, struct sk_buff *skb) +{ + atm_return_tx(vcc, skb); + dev_kfree_skb_any(skb); + return 0; +} + static const struct atmdev_ops atmarpd_dev_ops = { - .close = atmarpd_close + .close = atmarpd_close, + .send = atmarpd_send }; @@ -632,15 +655,18 @@ static struct atm_dev atmarpd_dev = { static int atm_init_atmarp(struct atm_vcc *vcc) { - rtnl_lock(); + if (vcc->push == clip_push) + return -EINVAL; + + mutex_lock(&atmarpd_lock); if (atmarpd) { - rtnl_unlock(); + mutex_unlock(&atmarpd_lock); return -EADDRINUSE; } mod_timer(&idle_timer, jiffies + CLIP_CHECK_INTERVAL * HZ); - atmarpd = vcc; + rcu_assign_pointer(atmarpd, vcc); set_bit(ATM_VF_META, &vcc->flags); set_bit(ATM_VF_READY, &vcc->flags); /* allow replies and avoid getting closed if signaling dies */ @@ -649,13 +675,14 @@ static int atm_init_atmarp(struct atm_vcc *vcc) vcc->push = NULL; vcc->pop = NULL; /* crash */ vcc->push_oam = NULL; /* crash */ - rtnl_unlock(); + mutex_unlock(&atmarpd_lock); return 0; } static int clip_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct atm_vcc *vcc = ATM_SD(sock); + struct sock *sk = sock->sk; int err = 0; switch (cmd) { @@ -676,14 +703,18 @@ static int clip_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) err = clip_create(arg); break; case ATMARPD_CTRL: + lock_sock(sk); err = atm_init_atmarp(vcc); if (!err) { sock->state = SS_CONNECTED; __module_get(THIS_MODULE); } + release_sock(sk); break; case ATMARP_MKIP: + lock_sock(sk); err = clip_mkip(vcc, arg); + release_sock(sk); break; case ATMARP_SETENTRY: err = clip_setentry(vcc, (__force __be32)arg); diff --git a/net/atm/resources.c b/net/atm/resources.c index 995d29e7fb13..b19d851e1f44 100644 --- a/net/atm/resources.c +++ b/net/atm/resources.c @@ -146,11 +146,10 @@ void atm_dev_deregister(struct atm_dev *dev) */ mutex_lock(&atm_dev_mutex); list_del(&dev->dev_list); - mutex_unlock(&atm_dev_mutex); - atm_dev_release_vccs(dev); atm_unregister_sysfs(dev); atm_proc_dev_deregister(dev); + mutex_unlock(&atm_dev_mutex); atm_dev_put(dev); } diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 07a8b4281a39..14d7221b8ac0 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -64,7 +64,7 @@ static DEFINE_IDA(hci_index_ida); /* Get HCI device by index. * Device is held on return. */ -struct hci_dev *hci_dev_get(int index) +static struct hci_dev *__hci_dev_get(int index, int *srcu_index) { struct hci_dev *hdev = NULL, *d; @@ -77,6 +77,8 @@ struct hci_dev *hci_dev_get(int index) list_for_each_entry(d, &hci_dev_list, list) { if (d->id == index) { hdev = hci_dev_hold(d); + if (srcu_index) + *srcu_index = srcu_read_lock(&d->srcu); break; } } @@ -84,6 +86,22 @@ struct hci_dev *hci_dev_get(int index) return hdev; } +struct hci_dev *hci_dev_get(int index) +{ + return __hci_dev_get(index, NULL); +} + +static struct hci_dev *hci_dev_get_srcu(int index, int *srcu_index) +{ + return __hci_dev_get(index, srcu_index); +} + +static void hci_dev_put_srcu(struct hci_dev *hdev, int srcu_index) +{ + srcu_read_unlock(&hdev->srcu, srcu_index); + hci_dev_put(hdev); +} + /* ---- Inquiry support ---- */ bool hci_discovery_active(struct hci_dev *hdev) @@ -568,9 +586,9 @@ static int hci_dev_do_reset(struct hci_dev *hdev) int hci_dev_reset(__u16 dev) { struct hci_dev *hdev; - int err; + int err, srcu_index; - hdev = hci_dev_get(dev); + hdev = hci_dev_get_srcu(dev, &srcu_index); if (!hdev) return -ENODEV; @@ -592,7 +610,7 @@ int hci_dev_reset(__u16 dev) err = hci_dev_do_reset(hdev); done: - hci_dev_put(hdev); + hci_dev_put_srcu(hdev, srcu_index); return err; } @@ -2433,6 +2451,11 @@ struct hci_dev *hci_alloc_dev_priv(int sizeof_priv) if (!hdev) return NULL; + if (init_srcu_struct(&hdev->srcu)) { + kfree(hdev); + return NULL; + } + hdev->pkt_type = (HCI_DM1 | HCI_DH1 | HCI_HV1); hdev->esco_type = (ESCO_HV1); hdev->link_mode = (HCI_LM_ACCEPT); @@ -2678,6 +2701,9 @@ void hci_unregister_dev(struct hci_dev *hdev) list_del(&hdev->list); write_unlock(&hci_dev_list_lock); + synchronize_srcu(&hdev->srcu); + cleanup_srcu_struct(&hdev->srcu); + disable_work_sync(&hdev->rx_work); disable_work_sync(&hdev->cmd_work); disable_work_sync(&hdev->tx_work); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 66052d6aaa1d..992131f88a45 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -2150,40 +2150,6 @@ static u8 hci_cc_set_adv_param(struct hci_dev *hdev, void *data, return rp->status; } -static u8 hci_cc_set_ext_adv_param(struct hci_dev *hdev, void *data, - struct sk_buff *skb) -{ - struct hci_rp_le_set_ext_adv_params *rp = data; - struct hci_cp_le_set_ext_adv_params *cp; - struct adv_info *adv_instance; - - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); - - if (rp->status) - return rp->status; - - cp = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_EXT_ADV_PARAMS); - if (!cp) - return rp->status; - - hci_dev_lock(hdev); - hdev->adv_addr_type = cp->own_addr_type; - if (!cp->handle) { - /* Store in hdev for instance 0 */ - hdev->adv_tx_power = rp->tx_power; - } else { - adv_instance = hci_find_adv_instance(hdev, cp->handle); - if (adv_instance) - adv_instance->tx_power = rp->tx_power; - } - /* Update adv data as tx power is known now */ - hci_update_adv_data(hdev, cp->handle); - - hci_dev_unlock(hdev); - - return rp->status; -} - static u8 hci_cc_read_rssi(struct hci_dev *hdev, void *data, struct sk_buff *skb) { @@ -4164,8 +4130,6 @@ static const struct hci_cc { HCI_CC(HCI_OP_LE_READ_NUM_SUPPORTED_ADV_SETS, hci_cc_le_read_num_adv_sets, sizeof(struct hci_rp_le_read_num_supported_adv_sets)), - HCI_CC(HCI_OP_LE_SET_EXT_ADV_PARAMS, hci_cc_set_ext_adv_param, - sizeof(struct hci_rp_le_set_ext_adv_params)), HCI_CC_STATUS(HCI_OP_LE_SET_EXT_ADV_ENABLE, hci_cc_le_set_ext_adv_enable), HCI_CC_STATUS(HCI_OP_LE_SET_ADV_SET_RAND_ADDR, @@ -7002,7 +6966,10 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data, bis->iso_qos.bcast.in.sdu = le16_to_cpu(ev->max_pdu); if (!ev->status) { + bis->state = BT_CONNECTED; set_bit(HCI_CONN_BIG_SYNC, &bis->flags); + hci_debugfs_create_conn(bis); + hci_conn_add_sysfs(bis); hci_iso_setup_path(bis); } } diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 6687f2a4d1eb..5f178db8d40d 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -1205,9 +1205,126 @@ static int hci_set_adv_set_random_addr_sync(struct hci_dev *hdev, u8 instance, sizeof(cp), &cp, HCI_CMD_TIMEOUT); } +static int +hci_set_ext_adv_params_sync(struct hci_dev *hdev, struct adv_info *adv, + const struct hci_cp_le_set_ext_adv_params *cp, + struct hci_rp_le_set_ext_adv_params *rp) +{ + struct sk_buff *skb; + + skb = __hci_cmd_sync(hdev, HCI_OP_LE_SET_EXT_ADV_PARAMS, sizeof(*cp), + cp, HCI_CMD_TIMEOUT); + + /* If command return a status event, skb will be set to -ENODATA */ + if (skb == ERR_PTR(-ENODATA)) + return 0; + + if (IS_ERR(skb)) { + bt_dev_err(hdev, "Opcode 0x%4.4x failed: %ld", + HCI_OP_LE_SET_EXT_ADV_PARAMS, PTR_ERR(skb)); + return PTR_ERR(skb); + } + + if (skb->len != sizeof(*rp)) { + bt_dev_err(hdev, "Invalid response length for 0x%4.4x: %u", + HCI_OP_LE_SET_EXT_ADV_PARAMS, skb->len); + kfree_skb(skb); + return -EIO; + } + + memcpy(rp, skb->data, sizeof(*rp)); + kfree_skb(skb); + + if (!rp->status) { + hdev->adv_addr_type = cp->own_addr_type; + if (!cp->handle) { + /* Store in hdev for instance 0 */ + hdev->adv_tx_power = rp->tx_power; + } else if (adv) { + adv->tx_power = rp->tx_power; + } + } + + return rp->status; +} + +static int hci_set_ext_adv_data_sync(struct hci_dev *hdev, u8 instance) +{ + DEFINE_FLEX(struct hci_cp_le_set_ext_adv_data, pdu, data, length, + HCI_MAX_EXT_AD_LENGTH); + u8 len; + struct adv_info *adv = NULL; + int err; + + if (instance) { + adv = hci_find_adv_instance(hdev, instance); + if (!adv || !adv->adv_data_changed) + return 0; + } + + len = eir_create_adv_data(hdev, instance, pdu->data, + HCI_MAX_EXT_AD_LENGTH); + + pdu->length = len; + pdu->handle = adv ? adv->handle : instance; + pdu->operation = LE_SET_ADV_DATA_OP_COMPLETE; + pdu->frag_pref = LE_SET_ADV_DATA_NO_FRAG; + + err = __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_ADV_DATA, + struct_size(pdu, data, len), pdu, + HCI_CMD_TIMEOUT); + if (err) + return err; + + /* Update data if the command succeed */ + if (adv) { + adv->adv_data_changed = false; + } else { + memcpy(hdev->adv_data, pdu->data, len); + hdev->adv_data_len = len; + } + + return 0; +} + +static int hci_set_adv_data_sync(struct hci_dev *hdev, u8 instance) +{ + struct hci_cp_le_set_adv_data cp; + u8 len; + + memset(&cp, 0, sizeof(cp)); + + len = eir_create_adv_data(hdev, instance, cp.data, sizeof(cp.data)); + + /* There's nothing to do if the data hasn't changed */ + if (hdev->adv_data_len == len && + memcmp(cp.data, hdev->adv_data, len) == 0) + return 0; + + memcpy(hdev->adv_data, cp.data, sizeof(cp.data)); + hdev->adv_data_len = len; + + cp.length = len; + + return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_ADV_DATA, + sizeof(cp), &cp, HCI_CMD_TIMEOUT); +} + +int hci_update_adv_data_sync(struct hci_dev *hdev, u8 instance) +{ + if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) + return 0; + + if (ext_adv_capable(hdev)) + return hci_set_ext_adv_data_sync(hdev, instance); + + return hci_set_adv_data_sync(hdev, instance); +} + int hci_setup_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance) { struct hci_cp_le_set_ext_adv_params cp; + struct hci_rp_le_set_ext_adv_params rp; bool connectable; u32 flags; bdaddr_t random_addr; @@ -1228,7 +1345,7 @@ int hci_setup_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance) * Command Disallowed error, so we must first disable the * instance if it is active. */ - if (adv && !adv->pending) { + if (adv) { err = hci_disable_ext_adv_instance_sync(hdev, instance); if (err) return err; @@ -1316,8 +1433,12 @@ int hci_setup_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance) cp.secondary_phy = HCI_ADV_PHY_1M; } - err = __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_ADV_PARAMS, - sizeof(cp), &cp, HCI_CMD_TIMEOUT); + err = hci_set_ext_adv_params_sync(hdev, adv, &cp, &rp); + if (err) + return err; + + /* Update adv data as tx power is known now */ + err = hci_set_ext_adv_data_sync(hdev, cp.handle); if (err) return err; @@ -1822,79 +1943,6 @@ int hci_le_terminate_big_sync(struct hci_dev *hdev, u8 handle, u8 reason) sizeof(cp), &cp, HCI_CMD_TIMEOUT); } -static int hci_set_ext_adv_data_sync(struct hci_dev *hdev, u8 instance) -{ - DEFINE_FLEX(struct hci_cp_le_set_ext_adv_data, pdu, data, length, - HCI_MAX_EXT_AD_LENGTH); - u8 len; - struct adv_info *adv = NULL; - int err; - - if (instance) { - adv = hci_find_adv_instance(hdev, instance); - if (!adv || !adv->adv_data_changed) - return 0; - } - - len = eir_create_adv_data(hdev, instance, pdu->data, - HCI_MAX_EXT_AD_LENGTH); - - pdu->length = len; - pdu->handle = adv ? adv->handle : instance; - pdu->operation = LE_SET_ADV_DATA_OP_COMPLETE; - pdu->frag_pref = LE_SET_ADV_DATA_NO_FRAG; - - err = __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_ADV_DATA, - struct_size(pdu, data, len), pdu, - HCI_CMD_TIMEOUT); - if (err) - return err; - - /* Update data if the command succeed */ - if (adv) { - adv->adv_data_changed = false; - } else { - memcpy(hdev->adv_data, pdu->data, len); - hdev->adv_data_len = len; - } - - return 0; -} - -static int hci_set_adv_data_sync(struct hci_dev *hdev, u8 instance) -{ - struct hci_cp_le_set_adv_data cp; - u8 len; - - memset(&cp, 0, sizeof(cp)); - - len = eir_create_adv_data(hdev, instance, cp.data, sizeof(cp.data)); - - /* There's nothing to do if the data hasn't changed */ - if (hdev->adv_data_len == len && - memcmp(cp.data, hdev->adv_data, len) == 0) - return 0; - - memcpy(hdev->adv_data, cp.data, sizeof(cp.data)); - hdev->adv_data_len = len; - - cp.length = len; - - return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_ADV_DATA, - sizeof(cp), &cp, HCI_CMD_TIMEOUT); -} - -int hci_update_adv_data_sync(struct hci_dev *hdev, u8 instance) -{ - if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) - return 0; - - if (ext_adv_capable(hdev)) - return hci_set_ext_adv_data_sync(hdev, instance); - - return hci_set_adv_data_sync(hdev, instance); -} - int hci_schedule_adv_instance_sync(struct hci_dev *hdev, u8 instance, bool force) { @@ -1970,13 +2018,10 @@ static int hci_clear_adv_sets_sync(struct hci_dev *hdev, struct sock *sk) static int hci_clear_adv_sync(struct hci_dev *hdev, struct sock *sk, bool force) { struct adv_info *adv, *n; - int err = 0; if (ext_adv_capable(hdev)) /* Remove all existing sets */ - err = hci_clear_adv_sets_sync(hdev, sk); - if (ext_adv_capable(hdev)) - return err; + return hci_clear_adv_sets_sync(hdev, sk); /* This is safe as long as there is no command send while the lock is * held. @@ -2004,13 +2049,11 @@ static int hci_clear_adv_sync(struct hci_dev *hdev, struct sock *sk, bool force) static int hci_remove_adv_sync(struct hci_dev *hdev, u8 instance, struct sock *sk) { - int err = 0; + int err; /* If we use extended advertising, instance has to be removed first. */ if (ext_adv_capable(hdev)) - err = hci_remove_ext_adv_instance_sync(hdev, instance, sk); - if (ext_adv_capable(hdev)) - return err; + return hci_remove_ext_adv_instance_sync(hdev, instance, sk); /* This is safe as long as there is no command send while the lock is * held. @@ -2109,16 +2152,13 @@ int hci_read_tx_power_sync(struct hci_dev *hdev, __le16 handle, u8 type) int hci_disable_advertising_sync(struct hci_dev *hdev) { u8 enable = 0x00; - int err = 0; /* If controller is not advertising we are done. */ if (!hci_dev_test_flag(hdev, HCI_LE_ADV)) return 0; if (ext_adv_capable(hdev)) - err = hci_disable_ext_adv_instance_sync(hdev, 0x00); - if (ext_adv_capable(hdev)) - return err; + return hci_disable_ext_adv_instance_sync(hdev, 0x00); return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable, HCI_CMD_TIMEOUT); @@ -2481,6 +2521,10 @@ static int hci_pause_advertising_sync(struct hci_dev *hdev) int err; int old_state; + /* If controller is not advertising we are done. */ + if (!hci_dev_test_flag(hdev, HCI_LE_ADV)) + return 0; + /* If already been paused there is nothing to do. */ if (hdev->advertising_paused) return 0; @@ -5449,7 +5493,7 @@ static int hci_disconnect_sync(struct hci_dev *hdev, struct hci_conn *conn, { struct hci_cp_disconnect cp; - if (test_bit(HCI_CONN_BIG_CREATED, &conn->flags)) { + if (conn->type == BIS_LINK) { /* This is a BIS connection, hci_conn_del will * do the necessary cleanup. */ @@ -6277,6 +6321,7 @@ static int hci_le_ext_directed_advertising_sync(struct hci_dev *hdev, struct hci_conn *conn) { struct hci_cp_le_set_ext_adv_params cp; + struct hci_rp_le_set_ext_adv_params rp; int err; bdaddr_t random_addr; u8 own_addr_type; @@ -6318,8 +6363,12 @@ static int hci_le_ext_directed_advertising_sync(struct hci_dev *hdev, if (err) return err; - err = __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_ADV_PARAMS, - sizeof(cp), &cp, HCI_CMD_TIMEOUT); + err = hci_set_ext_adv_params_sync(hdev, NULL, &cp, &rp); + if (err) + return err; + + /* Update adv data as tx power is known now */ + err = hci_set_ext_adv_data_sync(hdev, cp.handle); if (err) return err; diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index a5bde5db58ef..40daa38276f3 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -3415,7 +3415,7 @@ static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data, size_t data struct l2cap_conf_rfc rfc = { .mode = L2CAP_MODE_BASIC }; struct l2cap_conf_efs efs; u8 remote_efs = 0; - u16 mtu = L2CAP_DEFAULT_MTU; + u16 mtu = 0; u16 result = L2CAP_CONF_SUCCESS; u16 size; @@ -3520,6 +3520,13 @@ done: /* Configure output options and let the other side know * which ones we don't like. */ + /* If MTU is not provided in configure request, use the most recently + * explicitly or implicitly accepted value for the other direction, + * or the default value. + */ + if (mtu == 0) + mtu = chan->imtu ? chan->imtu : L2CAP_DEFAULT_MTU; + if (mtu < L2CAP_DEFAULT_MIN_MTU) result = L2CAP_CONF_UNACCEPT; else { diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index d540f7b4f75f..1485b455ade4 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1080,7 +1080,8 @@ static int mesh_send_done_sync(struct hci_dev *hdev, void *data) struct mgmt_mesh_tx *mesh_tx; hci_dev_clear_flag(hdev, HCI_MESH_SENDING); - hci_disable_advertising_sync(hdev); + if (list_empty(&hdev->adv_instances)) + hci_disable_advertising_sync(hdev); mesh_tx = mgmt_mesh_next(hdev, NULL); if (mesh_tx) @@ -2153,6 +2154,9 @@ static int set_mesh_sync(struct hci_dev *hdev, void *data) else hci_dev_clear_flag(hdev, HCI_MESH); + hdev->le_scan_interval = __le16_to_cpu(cp->period); + hdev->le_scan_window = __le16_to_cpu(cp->window); + len -= sizeof(*cp); /* If filters don't fit, forward all adv pkts */ @@ -2167,6 +2171,7 @@ static int set_mesh(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_set_mesh *cp = data; struct mgmt_pending_cmd *cmd; + __u16 period, window; int err = 0; bt_dev_dbg(hdev, "sock %p", sk); @@ -2180,6 +2185,23 @@ static int set_mesh(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_MESH_RECEIVER, MGMT_STATUS_INVALID_PARAMS); + /* Keep allowed ranges in sync with set_scan_params() */ + period = __le16_to_cpu(cp->period); + + if (period < 0x0004 || period > 0x4000) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_MESH_RECEIVER, + MGMT_STATUS_INVALID_PARAMS); + + window = __le16_to_cpu(cp->window); + + if (window < 0x0004 || window > 0x4000) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_MESH_RECEIVER, + MGMT_STATUS_INVALID_PARAMS); + + if (window > period) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_MESH_RECEIVER, + MGMT_STATUS_INVALID_PARAMS); + hci_dev_lock(hdev); cmd = mgmt_pending_add(sk, MGMT_OP_SET_MESH_RECEIVER, hdev, data, len); @@ -6432,6 +6454,7 @@ static int set_scan_params(struct sock *sk, struct hci_dev *hdev, return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_SCAN_PARAMS, MGMT_STATUS_NOT_SUPPORTED); + /* Keep allowed ranges in sync with set_mesh() */ interval = __le16_to_cpu(cp->interval); if (interval < 0x0004 || interval > 0x4000) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 0224ef3dfec0..1377f31b719c 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -2015,10 +2015,19 @@ void br_multicast_port_ctx_init(struct net_bridge_port *port, void br_multicast_port_ctx_deinit(struct net_bridge_mcast_port *pmctx) { + struct net_bridge *br = pmctx->port->br; + bool del = false; + #if IS_ENABLED(CONFIG_IPV6) timer_delete_sync(&pmctx->ip6_mc_router_timer); #endif timer_delete_sync(&pmctx->ip4_mc_router_timer); + + spin_lock_bh(&br->multicast_lock); + del |= br_ip6_multicast_rport_del(pmctx); + del |= br_ip4_multicast_rport_del(pmctx); + br_multicast_rport_del_notify(pmctx, del); + spin_unlock_bh(&br->multicast_lock); } int br_multicast_add_port(struct net_bridge_port *port) diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 4ddb7490df4b..6ad84d4a2b46 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -432,6 +432,7 @@ int netpoll_send_udp(struct netpoll *np, const char *msg, int len) udph->dest = htons(np->remote_port); udph->len = htons(udp_len); + udph->check = 0; if (np->ipv6) { udph->check = csum_ipv6_magic(&np->local_ip.in6, &np->remote_ip.in6, @@ -460,7 +461,6 @@ int netpoll_send_udp(struct netpoll *np, const char *msg, int len) skb_reset_mac_header(skb); skb->protocol = eth->h_proto = htons(ETH_P_IPV6); } else { - udph->check = 0; udph->check = csum_tcpudp_magic(np->local_ip.ip, np->remote_ip.ip, udp_len, IPPROTO_UDP, diff --git a/net/core/selftests.c b/net/core/selftests.c index 35f807ea9952..406faf8e5f3f 100644 --- a/net/core/selftests.c +++ b/net/core/selftests.c @@ -160,8 +160,9 @@ static struct sk_buff *net_test_get_skb(struct net_device *ndev, skb->csum = 0; skb->ip_summed = CHECKSUM_PARTIAL; if (attr->tcp) { - thdr->check = ~tcp_v4_check(skb->len, ihdr->saddr, - ihdr->daddr, 0); + int l4len = skb->len - skb_transport_offset(skb); + + thdr->check = ~tcp_v4_check(l4len, ihdr->saddr, ihdr->daddr, 0); skb->csum_start = skb_transport_header(skb) - skb->head; skb->csum_offset = offsetof(struct tcphdr, check); } else { diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 30a5e9460d00..5a49eb99e5c4 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -319,8 +319,8 @@ static int ip_rcv_finish_core(struct net *net, const struct sk_buff *hint) { const struct iphdr *iph = ip_hdr(skb); - int err, drop_reason; struct rtable *rt; + int drop_reason; if (ip_can_use_hint(skb, iph, hint)) { drop_reason = ip_route_use_hint(skb, iph->daddr, iph->saddr, @@ -345,9 +345,10 @@ static int ip_rcv_finish_core(struct net *net, break; case IPPROTO_UDP: if (READ_ONCE(net->ipv4.sysctl_udp_early_demux)) { - err = udp_v4_early_demux(skb); - if (unlikely(err)) + drop_reason = udp_v4_early_demux(skb); + if (unlikely(drop_reason)) goto drop_error; + drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; /* must reload iph, skb->head might have changed */ iph = ip_hdr(skb); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f64f8276a73c..461a9ab540af 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1176,7 +1176,7 @@ restart: goto do_error; while (msg_data_left(msg)) { - ssize_t copy = 0; + int copy = 0; skb = tcp_write_queue_tail(sk); if (skb) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 12c2e6fc85c6..68bc79eb9019 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5181,7 +5181,9 @@ end: skb_condense(skb); skb_set_owner_r(skb, sk); } - tcp_rcvbuf_grow(sk); + /* do not grow rcvbuf for not-yet-accepted or orphaned sockets. */ + if (sk->sk_socket) + tcp_rcvbuf_grow(sk); } static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index ba2ec7c870cc..870a0bd6c2ba 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3525,11 +3525,9 @@ static void addrconf_gre_config(struct net_device *dev) ASSERT_RTNL(); - idev = ipv6_find_idev(dev); - if (IS_ERR(idev)) { - pr_debug("%s: add_dev failed\n", __func__); + idev = addrconf_add_dev(dev); + if (IS_ERR(idev)) return; - } /* Generate the IPv6 link-local address using addrconf_addr_gen(), * unless we have an IPv4 GRE device not bound to an IP address and @@ -3543,9 +3541,6 @@ static void addrconf_gre_config(struct net_device *dev) } add_v4_addrs(idev); - - if (dev->flags & IFF_POINTOPOINT) - addrconf_add_mroute(dev); } #endif diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index d9d88f2f2831..954795b0fe48 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1959,6 +1959,20 @@ static int sta_link_apply_parameters(struct ieee80211_local *local, ieee80211_sta_init_nss(link_sta); if (params->opmode_notif_used) { + enum nl80211_chan_width width = link->conf->chanreq.oper.width; + + switch (width) { + case NL80211_CHAN_WIDTH_20: + case NL80211_CHAN_WIDTH_40: + case NL80211_CHAN_WIDTH_80: + case NL80211_CHAN_WIDTH_160: + case NL80211_CHAN_WIDTH_80P80: + case NL80211_CHAN_WIDTH_320: /* not VHT, allowed for HE/EHT */ + break; + default: + return -EINVAL; + } + /* returned value is only needed for rc update, but the * rc isn't initialized here yet, so ignore it */ diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 7c27f3cd841c..c01634fdba78 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1150,6 +1150,8 @@ static void ieee80211_sdata_init(struct ieee80211_local *local, { sdata->local = local; + INIT_LIST_HEAD(&sdata->key_list); + /* * Initialize the default link, so we can use link_id 0 for non-MLD, * and that continues to work for non-MLD-aware drivers that use just @@ -2210,8 +2212,6 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, ieee80211_init_frag_cache(&sdata->frags); - INIT_LIST_HEAD(&sdata->key_list); - wiphy_delayed_work_init(&sdata->dec_tailroom_needed_wk, ieee80211_delayed_tailroom_dec); diff --git a/net/mac80211/link.c b/net/mac80211/link.c index d40c2bd3b50b..4f7b7d0f64f2 100644 --- a/net/mac80211/link.c +++ b/net/mac80211/link.c @@ -93,9 +93,6 @@ void ieee80211_link_init(struct ieee80211_sub_if_data *sdata, if (link_id < 0) link_id = 0; - rcu_assign_pointer(sdata->vif.link_conf[link_id], link_conf); - rcu_assign_pointer(sdata->link[link_id], link); - if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) { struct ieee80211_sub_if_data *ap_bss; struct ieee80211_bss_conf *ap_bss_conf; @@ -145,6 +142,9 @@ void ieee80211_link_init(struct ieee80211_sub_if_data *sdata, ieee80211_link_debugfs_add(link); } + + rcu_assign_pointer(sdata->vif.link_conf[link_id], link_conf); + rcu_assign_pointer(sdata->link[link_id], link); } void ieee80211_link_stop(struct ieee80211_link_data *link) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 2d46d4af60d7..0ed68182f79b 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -3934,6 +3934,9 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, lockdep_assert_wiphy(local->hw.wiphy); + if (frame_buf) + memset(frame_buf, 0, IEEE80211_DEAUTH_FRAME_LEN); + if (WARN_ON(!ap_sta)) return; @@ -7195,6 +7198,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, struct ieee80211_bss_conf *bss_conf = link->conf; struct ieee80211_vif_cfg *vif_cfg = &sdata->vif.cfg; struct ieee80211_mgmt *mgmt = (void *) hdr; + struct ieee80211_ext *ext = NULL; size_t baselen; struct ieee802_11_elems *elems; struct ieee80211_local *local = sdata->local; @@ -7220,7 +7224,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, /* Process beacon from the current BSS */ bssid = ieee80211_get_bssid(hdr, len, sdata->vif.type); if (ieee80211_is_s1g_beacon(mgmt->frame_control)) { - struct ieee80211_ext *ext = (void *) mgmt; + ext = (void *)mgmt; variable = ext->u.s1g_beacon.variable + ieee80211_s1g_optional_len(ext->frame_control); } @@ -7407,7 +7411,9 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, } if ((ncrc == link->u.mgd.beacon_crc && link->u.mgd.beacon_crc_valid) || - ieee80211_is_s1g_short_beacon(mgmt->frame_control)) + (ext && ieee80211_is_s1g_short_beacon(ext->frame_control, + parse_params.start, + parse_params.len))) goto free; link->u.mgd.beacon_crc = ncrc; link->u.mgd.beacon_crc_valid = true; @@ -10699,8 +10705,8 @@ static void ieee80211_ml_epcs(struct ieee80211_sub_if_data *sdata, */ for_each_mle_subelement(sub, (const u8 *)elems->ml_epcs, elems->ml_epcs_len) { + struct ieee802_11_elems *link_elems __free(kfree) = NULL; struct ieee80211_link_data *link; - struct ieee802_11_elems *link_elems __free(kfree); u8 *pos = (void *)sub->data; u16 control; ssize_t len; diff --git a/net/mac80211/parse.c b/net/mac80211/parse.c index 96584b39215e..c5e0f7f46004 100644 --- a/net/mac80211/parse.c +++ b/net/mac80211/parse.c @@ -758,7 +758,6 @@ static size_t ieee802_11_find_bssid_profile(const u8 *start, size_t len, { const struct element *elem, *sub; size_t profile_len = 0; - bool found = false; if (!bss || !bss->transmitted_bss) return profile_len; @@ -809,15 +808,14 @@ static size_t ieee802_11_find_bssid_profile(const u8 *start, size_t len, index[2], new_bssid); if (ether_addr_equal(new_bssid, bss->bssid)) { - found = true; elems->bssid_index_len = index[1]; elems->bssid_index = (void *)&index[2]; - break; + return profile_len; } } } - return found ? profile_len : 0; + return 0; } static void diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 27d414efa3fd..e66da651678a 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2144,11 +2144,6 @@ int ieee80211_reconfig(struct ieee80211_local *local) cfg80211_sched_scan_stopped_locked(local->hw.wiphy, 0); wake_up: - - if (local->virt_monitors > 0 && - local->virt_monitors == local->open_count) - ieee80211_add_virtual_monitor(local); - /* * Clear the WLAN_STA_BLOCK_BA flag so new aggregation * sessions can be established after a resume. @@ -2202,6 +2197,10 @@ int ieee80211_reconfig(struct ieee80211_local *local) } } + if (local->virt_monitors > 0 && + local->virt_monitors == local->open_count) + ieee80211_add_virtual_monitor(local); + if (!suspended) return 0; @@ -3884,7 +3883,7 @@ void ieee80211_recalc_dtim(struct ieee80211_local *local, { u64 tsf = drv_get_tsf(local, sdata); u64 dtim_count = 0; - u16 beacon_int = sdata->vif.bss_conf.beacon_int * 1024; + u32 beacon_int = sdata->vif.bss_conf.beacon_int * 1024; u8 dtim_period = sdata->vif.bss_conf.dtim_period; struct ps_data *ps; u8 bcns_from_dtim; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index e8972a857e51..6332a0e06596 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -387,7 +387,6 @@ static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk) WARN_ON(skb->sk != NULL); skb->sk = sk; skb->destructor = netlink_skb_destructor; - atomic_add(skb->truesize, &sk->sk_rmem_alloc); sk_mem_charge(sk, skb->truesize); } @@ -1212,41 +1211,48 @@ struct sk_buff *netlink_alloc_large_skb(unsigned int size, int broadcast) int netlink_attachskb(struct sock *sk, struct sk_buff *skb, long *timeo, struct sock *ssk) { + DECLARE_WAITQUEUE(wait, current); struct netlink_sock *nlk; + unsigned int rmem; nlk = nlk_sk(sk); + rmem = atomic_add_return(skb->truesize, &sk->sk_rmem_alloc); - if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || - test_bit(NETLINK_S_CONGESTED, &nlk->state))) { - DECLARE_WAITQUEUE(wait, current); - if (!*timeo) { - if (!ssk || netlink_is_kernel(ssk)) - netlink_overrun(sk); - sock_put(sk); - kfree_skb(skb); - return -EAGAIN; - } - - __set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue(&nlk->wait, &wait); + if ((rmem == skb->truesize || rmem < READ_ONCE(sk->sk_rcvbuf)) && + !test_bit(NETLINK_S_CONGESTED, &nlk->state)) { + netlink_skb_set_owner_r(skb, sk); + return 0; + } - if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || - test_bit(NETLINK_S_CONGESTED, &nlk->state)) && - !sock_flag(sk, SOCK_DEAD)) - *timeo = schedule_timeout(*timeo); + atomic_sub(skb->truesize, &sk->sk_rmem_alloc); - __set_current_state(TASK_RUNNING); - remove_wait_queue(&nlk->wait, &wait); + if (!*timeo) { + if (!ssk || netlink_is_kernel(ssk)) + netlink_overrun(sk); sock_put(sk); + kfree_skb(skb); + return -EAGAIN; + } - if (signal_pending(current)) { - kfree_skb(skb); - return sock_intr_errno(*timeo); - } - return 1; + __set_current_state(TASK_INTERRUPTIBLE); + add_wait_queue(&nlk->wait, &wait); + rmem = atomic_read(&sk->sk_rmem_alloc); + + if (((rmem && rmem + skb->truesize > READ_ONCE(sk->sk_rcvbuf)) || + test_bit(NETLINK_S_CONGESTED, &nlk->state)) && + !sock_flag(sk, SOCK_DEAD)) + *timeo = schedule_timeout(*timeo); + + __set_current_state(TASK_RUNNING); + remove_wait_queue(&nlk->wait, &wait); + sock_put(sk); + + if (signal_pending(current)) { + kfree_skb(skb); + return sock_intr_errno(*timeo); } - netlink_skb_set_owner_r(skb, sk); - return 0; + + return 1; } static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb) @@ -1307,6 +1313,7 @@ static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb, ret = -ECONNREFUSED; if (nlk->netlink_rcv != NULL) { ret = skb->len; + atomic_add(skb->truesize, &sk->sk_rmem_alloc); netlink_skb_set_owner_r(skb, sk); NETLINK_CB(skb).sk = ssk; netlink_deliver_tap_kernel(sk, ssk, skb); @@ -1383,13 +1390,19 @@ EXPORT_SYMBOL_GPL(netlink_strict_get_check); static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb) { struct netlink_sock *nlk = nlk_sk(sk); + unsigned int rmem, rcvbuf; - if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && + rmem = atomic_add_return(skb->truesize, &sk->sk_rmem_alloc); + rcvbuf = READ_ONCE(sk->sk_rcvbuf); + + if ((rmem == skb->truesize || rmem <= rcvbuf) && !test_bit(NETLINK_S_CONGESTED, &nlk->state)) { netlink_skb_set_owner_r(skb, sk); __netlink_sendskb(sk, skb); - return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1); + return rmem > (rcvbuf >> 1); } + + atomic_sub(skb->truesize, &sk->sk_rmem_alloc); return -1; } @@ -2245,6 +2258,7 @@ static int netlink_dump(struct sock *sk, bool lock_taken) struct netlink_ext_ack extack = {}; struct netlink_callback *cb; struct sk_buff *skb = NULL; + unsigned int rmem, rcvbuf; size_t max_recvmsg_len; struct module *module; int err = -ENOBUFS; @@ -2258,9 +2272,6 @@ static int netlink_dump(struct sock *sk, bool lock_taken) goto errout_skb; } - if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) - goto errout_skb; - /* NLMSG_GOODSIZE is small to avoid high order allocations being * required, but it makes sense to _attempt_ a 32KiB allocation * to reduce number of system calls on dump operations, if user @@ -2283,6 +2294,13 @@ static int netlink_dump(struct sock *sk, bool lock_taken) if (!skb) goto errout_skb; + rcvbuf = READ_ONCE(sk->sk_rcvbuf); + rmem = atomic_add_return(skb->truesize, &sk->sk_rmem_alloc); + if (rmem != skb->truesize && rmem >= rcvbuf) { + atomic_sub(skb->truesize, &sk->sk_rmem_alloc); + goto errout_skb; + } + /* Trim skb to allocated size. User is expected to provide buffer as * large as max(min_dump_alloc, 32KiB (max_recvmsg_len capped at * netlink_recvmsg())). dump will pack as many smaller messages as diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index 2dd6bd3a3011..b72bf8a08d48 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -497,22 +497,15 @@ void rose_rt_device_down(struct net_device *dev) t = rose_node; rose_node = rose_node->next; - for (i = 0; i < t->count; i++) { + for (i = t->count - 1; i >= 0; i--) { if (t->neighbour[i] != s) continue; t->count--; - switch (i) { - case 0: - t->neighbour[0] = t->neighbour[1]; - fallthrough; - case 1: - t->neighbour[1] = t->neighbour[2]; - break; - case 2: - break; - } + memmove(&t->neighbour[i], &t->neighbour[i + 1], + sizeof(t->neighbour[0]) * + (t->count - i)); } if (t->count <= 0) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 5bd3922c310d..376e33dce8c1 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -361,12 +361,15 @@ struct rxrpc_local { struct list_head new_client_calls; /* Newly created client calls need connection */ spinlock_t client_call_lock; /* Lock for ->new_client_calls */ struct sockaddr_rxrpc srx; /* local address */ - /* Provide a kvec table sufficiently large to manage either a DATA - * packet with a maximum set of jumbo subpackets or a PING ACK padded - * out to 64K with zeropages for PMTUD. - */ - struct kvec kvec[1 + RXRPC_MAX_NR_JUMBO > 3 + 16 ? - 1 + RXRPC_MAX_NR_JUMBO : 3 + 16]; + union { + /* Provide a kvec table sufficiently large to manage either a + * DATA packet with a maximum set of jumbo subpackets or a PING + * ACK padded out to 64K with zeropages for PMTUD. + */ + struct kvec kvec[1 + RXRPC_MAX_NR_JUMBO > 3 + 16 ? + 1 + RXRPC_MAX_NR_JUMBO : 3 + 16]; + struct bio_vec bvec[3 + 16]; + }; }; /* diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index a4b363b47cca..49fccee1a726 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -149,6 +149,7 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, id_in_use: write_unlock(&rx->call_lock); + rxrpc_prefail_call(call, RXRPC_CALL_LOCAL_ERROR, -EBADSLT); rxrpc_cleanup_call(call); _leave(" = -EBADSLT"); return -EBADSLT; @@ -254,6 +255,9 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx, unsigned short call_tail, conn_tail, peer_tail; unsigned short call_count, conn_count; + if (!b) + return NULL; + /* #calls >= #conns >= #peers must hold true. */ call_head = smp_load_acquire(&b->call_backlog_head); call_tail = b->call_backlog_tail; diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 0af19bcdc80a..ef7b3096c95e 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -924,7 +924,7 @@ void rxrpc_send_response(struct rxrpc_connection *conn, struct sk_buff *response { struct rxrpc_skb_priv *sp = rxrpc_skb(response); struct scatterlist sg[16]; - struct bio_vec bvec[16]; + struct bio_vec *bvec = conn->local->bvec; struct msghdr msg; size_t len = sp->resp.len; __be32 wserial; @@ -938,6 +938,9 @@ void rxrpc_send_response(struct rxrpc_connection *conn, struct sk_buff *response if (ret < 0) goto fail; nr_sg = ret; + ret = -EIO; + if (WARN_ON_ONCE(nr_sg > ARRAY_SIZE(conn->local->bvec))) + goto fail; for (int i = 0; i < nr_sg; i++) bvec_set_page(&bvec[i], sg_page(&sg[i]), sg[i].length, sg[i].offset); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index c5e3673aadbe..d7c767b861a4 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -336,17 +336,22 @@ out: return q; } -static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid) +static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid, + struct netlink_ext_ack *extack) { unsigned long cl; const struct Qdisc_class_ops *cops = p->ops->cl_ops; - if (cops == NULL) - return NULL; + if (cops == NULL) { + NL_SET_ERR_MSG(extack, "Parent qdisc is not classful"); + return ERR_PTR(-EOPNOTSUPP); + } cl = cops->find(p, classid); - if (cl == 0) - return NULL; + if (cl == 0) { + NL_SET_ERR_MSG(extack, "Specified class not found"); + return ERR_PTR(-ENOENT); + } return cops->leaf(p, cl); } @@ -596,16 +601,6 @@ out: qdisc_skb_cb(skb)->pkt_len = pkt_len; } -void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc) -{ - if (!(qdisc->flags & TCQ_F_WARN_NONWC)) { - pr_warn("%s: %s qdisc %X: is non-work-conserving?\n", - txt, qdisc->ops->id, qdisc->handle >> 16); - qdisc->flags |= TCQ_F_WARN_NONWC; - } -} -EXPORT_SYMBOL(qdisc_warn_nonwc); - static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer) { struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog, @@ -780,15 +775,12 @@ static u32 qdisc_alloc_handle(struct net_device *dev) void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len) { - bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED; const struct Qdisc_class_ops *cops; unsigned long cl; u32 parentid; bool notify; int drops; - if (n == 0 && len == 0) - return; drops = max_t(int, n, 0); rcu_read_lock(); while ((parentid = sch->parent)) { @@ -797,17 +789,8 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len) if (sch->flags & TCQ_F_NOPARENT) break; - /* Notify parent qdisc only if child qdisc becomes empty. - * - * If child was empty even before update then backlog - * counter is screwed and we skip notification because - * parent class is already passive. - * - * If the original child was offloaded then it is allowed - * to be seem as empty, so the parent is notified anyway. - */ - notify = !sch->q.qlen && !WARN_ON_ONCE(!n && - !qdisc_is_offloaded); + /* Notify parent qdisc only if child qdisc becomes empty. */ + notify = !sch->q.qlen; /* TODO: perform the search on a per txq basis */ sch = qdisc_lookup_rcu(qdisc_dev(sch), TC_H_MAJ(parentid)); if (sch == NULL) { @@ -816,6 +799,9 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len) } cops = sch->ops->cl_ops; if (notify && cops->qlen_notify) { + /* Note that qlen_notify must be idempotent as it may get called + * multiple times. + */ cl = cops->find(sch, parentid); cops->qlen_notify(sch, cl); } @@ -1499,7 +1485,7 @@ static int __tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified classid"); return -ENOENT; } - q = qdisc_leaf(p, clid); + q = qdisc_leaf(p, clid, extack); } else if (dev_ingress_queue(dev)) { q = rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping); } @@ -1510,6 +1496,8 @@ static int __tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device"); return -ENOENT; } + if (IS_ERR(q)) + return PTR_ERR(q); if (tcm->tcm_handle && q->handle != tcm->tcm_handle) { NL_SET_ERR_MSG(extack, "Invalid handle"); @@ -1611,7 +1599,9 @@ static int __tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, NL_SET_ERR_MSG(extack, "Failed to find specified qdisc"); return -ENOENT; } - q = qdisc_leaf(p, clid); + q = qdisc_leaf(p, clid, extack); + if (IS_ERR(q)) + return PTR_ERR(q); } else if (dev_ingress_queue_create(dev)) { q = rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping); } diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 5a7745170e84..d8fd35da32a7 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -835,22 +835,6 @@ update_vf(struct hfsc_class *cl, unsigned int len, u64 cur_time) } } -static unsigned int -qdisc_peek_len(struct Qdisc *sch) -{ - struct sk_buff *skb; - unsigned int len; - - skb = sch->ops->peek(sch); - if (unlikely(skb == NULL)) { - qdisc_warn_nonwc("qdisc_peek_len", sch); - return 0; - } - len = qdisc_pkt_len(skb); - - return len; -} - static void hfsc_adjust_levels(struct hfsc_class *cl) { diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index bf1282cb22eb..bcce36608871 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -989,7 +989,7 @@ static struct sk_buff *agg_dequeue(struct qfq_aggregate *agg, if (cl->qdisc->q.qlen == 0) /* no more packets, remove from list */ list_del_init(&cl->alist); - else if (cl->deficit < qdisc_pkt_len(cl->qdisc->ops->peek(cl->qdisc))) { + else if (cl->deficit < qdisc_peek_len(cl->qdisc)) { cl->deficit += agg->lmax; list_move_tail(&cl->alist, &agg->active); } diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 0fa244f16876..7b943fbafcc3 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -1724,7 +1724,7 @@ gss_validate(struct rpc_task *task, struct xdr_stream *xdr) maj_stat = gss_validate_seqno_mic(ctx, task->tk_rqstp->rq_seqnos[0], seq, p, len); /* RFC 2203 5.3.3.1 - compute the checksum of each sequence number in the cache */ while (unlikely(maj_stat == GSS_S_BAD_SIG && i < task->tk_rqstp->rq_seqno_count)) - maj_stat = gss_validate_seqno_mic(ctx, task->tk_rqstp->rq_seqnos[i], seq, p, len); + maj_stat = gss_validate_seqno_mic(ctx, task->tk_rqstp->rq_seqnos[i++], seq, p, len); if (maj_stat == GSS_S_CONTEXT_EXPIRED) clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); if (maj_stat) diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c index 8ee0c07d00e9..ffe577bf6b51 100644 --- a/net/tipc/topsrv.c +++ b/net/tipc/topsrv.c @@ -704,8 +704,10 @@ static void tipc_topsrv_stop(struct net *net) for (id = 0; srv->idr_in_use; id++) { con = idr_find(&srv->conn_idr, id); if (con) { + conn_get(con); spin_unlock_bh(&srv->idr_lock); tipc_conn_close(con); + conn_put(con); spin_lock_bh(&srv->idr_lock); } } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 22e170fb5dda..52b155123985 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -660,6 +660,11 @@ static void unix_sock_destructor(struct sock *sk) #endif } +static unsigned int unix_skb_len(const struct sk_buff *skb) +{ + return skb->len - UNIXCB(skb).consumed; +} + static void unix_release_sock(struct sock *sk, int embrion) { struct unix_sock *u = unix_sk(sk); @@ -694,10 +699,16 @@ static void unix_release_sock(struct sock *sk, int embrion) if (skpair != NULL) { if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) { + struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); + +#if IS_ENABLED(CONFIG_AF_UNIX_OOB) + if (skb && !unix_skb_len(skb)) + skb = skb_peek_next(skb, &sk->sk_receive_queue); +#endif unix_state_lock(skpair); /* No more writes */ WRITE_ONCE(skpair->sk_shutdown, SHUTDOWN_MASK); - if (!skb_queue_empty_lockless(&sk->sk_receive_queue) || embrion) + if (skb || embrion) WRITE_ONCE(skpair->sk_err, ECONNRESET); unix_state_unlock(skpair); skpair->sk_state_change(skpair); @@ -2661,11 +2672,6 @@ static long unix_stream_data_wait(struct sock *sk, long timeo, return timeo; } -static unsigned int unix_skb_len(const struct sk_buff *skb) -{ - return skb->len - UNIXCB(skb).consumed; -} - struct unix_stream_read_state { int (*recv_actor)(struct sk_buff *, int, int, struct unix_stream_read_state *); @@ -2680,11 +2686,11 @@ struct unix_stream_read_state { #if IS_ENABLED(CONFIG_AF_UNIX_OOB) static int unix_stream_recv_urg(struct unix_stream_read_state *state) { + struct sk_buff *oob_skb, *read_skb = NULL; struct socket *sock = state->socket; struct sock *sk = sock->sk; struct unix_sock *u = unix_sk(sk); int chunk = 1; - struct sk_buff *oob_skb; mutex_lock(&u->iolock); unix_state_lock(sk); @@ -2699,9 +2705,16 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state) oob_skb = u->oob_skb; - if (!(state->flags & MSG_PEEK)) + if (!(state->flags & MSG_PEEK)) { WRITE_ONCE(u->oob_skb, NULL); + if (oob_skb->prev != (struct sk_buff *)&sk->sk_receive_queue && + !unix_skb_len(oob_skb->prev)) { + read_skb = oob_skb->prev; + __skb_unlink(read_skb, &sk->sk_receive_queue); + } + } + spin_unlock(&sk->sk_receive_queue.lock); unix_state_unlock(sk); @@ -2712,6 +2725,8 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state) mutex_unlock(&u->iolock); + consume_skb(read_skb); + if (chunk < 0) return -EFAULT; diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 2e7a3034e965..1053662725f8 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -407,6 +407,8 @@ EXPORT_SYMBOL_GPL(vsock_enqueue_accept); static bool vsock_use_local_transport(unsigned int remote_cid) { + lockdep_assert_held(&vsock_register_mutex); + if (!transport_local) return false; @@ -464,6 +466,8 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) remote_flags = vsk->remote_addr.svm_flags; + mutex_lock(&vsock_register_mutex); + switch (sk->sk_type) { case SOCK_DGRAM: new_transport = transport_dgram; @@ -479,12 +483,15 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) new_transport = transport_h2g; break; default: - return -ESOCKTNOSUPPORT; + ret = -ESOCKTNOSUPPORT; + goto err; } if (vsk->transport) { - if (vsk->transport == new_transport) - return 0; + if (vsk->transport == new_transport) { + ret = 0; + goto err; + } /* transport->release() must be called with sock lock acquired. * This path can only be taken during vsock_connect(), where we @@ -508,8 +515,16 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) /* We increase the module refcnt to prevent the transport unloading * while there are open sockets assigned to it. */ - if (!new_transport || !try_module_get(new_transport->module)) - return -ENODEV; + if (!new_transport || !try_module_get(new_transport->module)) { + ret = -ENODEV; + goto err; + } + + /* It's safe to release the mutex after a successful try_module_get(). + * Whichever transport `new_transport` points at, it won't go away until + * the last module_put() below or in vsock_deassign_transport(). + */ + mutex_unlock(&vsock_register_mutex); if (sk->sk_type == SOCK_SEQPACKET) { if (!new_transport->seqpacket_allow || @@ -528,12 +543,31 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) vsk->transport = new_transport; return 0; +err: + mutex_unlock(&vsock_register_mutex); + return ret; } EXPORT_SYMBOL_GPL(vsock_assign_transport); +/* + * Provide safe access to static transport_{h2g,g2h,dgram,local} callbacks. + * Otherwise we may race with module removal. Do not use on `vsk->transport`. + */ +static u32 vsock_registered_transport_cid(const struct vsock_transport **transport) +{ + u32 cid = VMADDR_CID_ANY; + + mutex_lock(&vsock_register_mutex); + if (*transport) + cid = (*transport)->get_local_cid(); + mutex_unlock(&vsock_register_mutex); + + return cid; +} + bool vsock_find_cid(unsigned int cid) { - if (transport_g2h && cid == transport_g2h->get_local_cid()) + if (cid == vsock_registered_transport_cid(&transport_g2h)) return true; if (transport_h2g && cid == VMADDR_CID_HOST) @@ -2536,18 +2570,19 @@ static long vsock_dev_do_ioctl(struct file *filp, unsigned int cmd, void __user *ptr) { u32 __user *p = ptr; - u32 cid = VMADDR_CID_ANY; int retval = 0; + u32 cid; switch (cmd) { case IOCTL_VM_SOCKETS_GET_LOCAL_CID: /* To be compatible with the VMCI behavior, we prioritize the * guest CID instead of well-know host CID (VMADDR_CID_HOST). */ - if (transport_g2h) - cid = transport_g2h->get_local_cid(); - else if (transport_h2g) - cid = transport_h2g->get_local_cid(); + cid = vsock_registered_transport_cid(&transport_g2h); + if (cid == VMADDR_CID_ANY) + cid = vsock_registered_transport_cid(&transport_h2g); + if (cid == VMADDR_CID_ANY) + cid = vsock_registered_transport_cid(&transport_local); if (put_user(cid, p) != 0) retval = -EFAULT; diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index b370070194fa..7eccd6708d66 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -119,6 +119,8 @@ vmci_transport_packet_init(struct vmci_transport_packet *pkt, u16 proto, struct vmci_handle handle) { + memset(pkt, 0, sizeof(*pkt)); + /* We register the stream control handler as an any cid handle so we * must always send from a source address of VMADDR_CID_ANY */ @@ -131,8 +133,6 @@ vmci_transport_packet_init(struct vmci_transport_packet *pkt, pkt->type = type; pkt->src_port = src->svm_port; pkt->dst_port = dst->svm_port; - memset(&pkt->proto, 0, sizeof(pkt->proto)); - memset(&pkt->_reserved2, 0, sizeof(pkt->_reserved2)); switch (pkt->type) { case VMCI_TRANSPORT_PACKET_TYPE_INVALID: diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 85f139016da2..50202d170f3a 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -229,6 +229,7 @@ static int validate_beacon_head(const struct nlattr *attr, unsigned int len = nla_len(attr); const struct element *elem; const struct ieee80211_mgmt *mgmt = (void *)data; + const struct ieee80211_ext *ext; unsigned int fixedlen, hdrlen; bool s1g_bcn; @@ -237,8 +238,10 @@ static int validate_beacon_head(const struct nlattr *attr, s1g_bcn = ieee80211_is_s1g_beacon(mgmt->frame_control); if (s1g_bcn) { - fixedlen = offsetof(struct ieee80211_ext, - u.s1g_beacon.variable); + ext = (struct ieee80211_ext *)mgmt; + fixedlen = + offsetof(struct ieee80211_ext, u.s1g_beacon.variable) + + ieee80211_s1g_optional_len(ext->frame_control); hdrlen = offsetof(struct ieee80211_ext, u.s1g_beacon); } else { fixedlen = offsetof(struct ieee80211_mgmt, diff --git a/net/wireless/util.c b/net/wireless/util.c index ed868c0f7ca8..1ad5a6bdfd75 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -820,6 +820,52 @@ bool ieee80211_is_valid_amsdu(struct sk_buff *skb, u8 mesh_hdr) } EXPORT_SYMBOL(ieee80211_is_valid_amsdu); + +/* + * Detects if an MSDU frame was maliciously converted into an A-MSDU + * frame by an adversary. This is done by parsing the received frame + * as if it were a regular MSDU, even though the A-MSDU flag is set. + * + * For non-mesh interfaces, detection involves checking whether the + * payload, when interpreted as an MSDU, begins with a valid RFC1042 + * header. This is done by comparing the A-MSDU subheader's destination + * address to the start of the RFC1042 header. + * + * For mesh interfaces, the MSDU includes a 6-byte Mesh Control field + * and an optional variable-length Mesh Address Extension field before + * the RFC1042 header. The position of the RFC1042 header must therefore + * be calculated based on the mesh header length. + * + * Since this function intentionally parses an A-MSDU frame as an MSDU, + * it only assumes that the A-MSDU subframe header is present, and + * beyond this it performs its own bounds checks under the assumption + * that the frame is instead parsed as a non-aggregated MSDU. + */ +static bool +is_amsdu_aggregation_attack(struct ethhdr *eth, struct sk_buff *skb, + enum nl80211_iftype iftype) +{ + int offset; + + /* Non-mesh case can be directly compared */ + if (iftype != NL80211_IFTYPE_MESH_POINT) + return ether_addr_equal(eth->h_dest, rfc1042_header); + + offset = __ieee80211_get_mesh_hdrlen(eth->h_dest[0]); + if (offset == 6) { + /* Mesh case with empty address extension field */ + return ether_addr_equal(eth->h_source, rfc1042_header); + } else if (offset + ETH_ALEN <= skb->len) { + /* Mesh case with non-empty address extension field */ + u8 temp[ETH_ALEN]; + + skb_copy_bits(skb, offset, temp, ETH_ALEN); + return ether_addr_equal(temp, rfc1042_header); + } + + return false; +} + void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list, const u8 *addr, enum nl80211_iftype iftype, const unsigned int extra_headroom, @@ -861,8 +907,10 @@ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list, /* the last MSDU has no padding */ if (subframe_len > remaining) goto purge; - /* mitigate A-MSDU aggregation injection attacks */ - if (ether_addr_equal(hdr.eth.h_dest, rfc1042_header)) + /* mitigate A-MSDU aggregation injection attacks, to be + * checked when processing first subframe (offset == 0). + */ + if (offset == 0 && is_amsdu_aggregation_attack(&hdr.eth, skb, iftype)) goto purge; offset += sizeof(struct ethhdr); diff --git a/rust/kernel/drm/device.rs b/rust/kernel/drm/device.rs index 624d7a4c83ea..14c1aa402951 100644 --- a/rust/kernel/drm/device.rs +++ b/rust/kernel/drm/device.rs @@ -66,7 +66,7 @@ impl<T: drm::Driver> Device<T> { open: Some(drm::File::<T::File>::open_callback), postclose: Some(drm::File::<T::File>::postclose_callback), unload: None, - release: None, + release: Some(Self::release), master_set: None, master_drop: None, debugfs_init: None, @@ -162,6 +162,16 @@ impl<T: drm::Driver> Device<T> { // SAFETY: `ptr` is valid by the safety requirements of this function. unsafe { &*ptr.cast() } } + + extern "C" fn release(ptr: *mut bindings::drm_device) { + // SAFETY: `ptr` is a valid pointer to a `struct drm_device` and embedded in `Self`. + let this = unsafe { Self::from_drm_device(ptr) }; + + // SAFETY: + // - When `release` runs it is guaranteed that there is no further access to `this`. + // - `this` is valid for dropping. + unsafe { core::ptr::drop_in_place(this) }; + } } impl<T: drm::Driver> Deref for Device<T> { diff --git a/rust/kernel/drm/driver.rs b/rust/kernel/drm/driver.rs index acb638086131..af93d46d03d3 100644 --- a/rust/kernel/drm/driver.rs +++ b/rust/kernel/drm/driver.rs @@ -10,7 +10,6 @@ use crate::{ drm, error::{to_result, Result}, prelude::*, - str::CStr, types::ARef, }; use macros::vtable; diff --git a/samples/damon/mtier.c b/samples/damon/mtier.c index 36d2cd933f5a..c94254b77fc9 100644 --- a/samples/damon/mtier.c +++ b/samples/damon/mtier.c @@ -164,8 +164,12 @@ static int damon_sample_mtier_enable_store( if (enable == enabled) return 0; - if (enable) - return damon_sample_mtier_start(); + if (enable) { + err = damon_sample_mtier_start(); + if (err) + enable = false; + return err; + } damon_sample_mtier_stop(); return 0; } diff --git a/samples/damon/prcl.c b/samples/damon/prcl.c index 056b1b21a0fe..5597e6a08ab2 100644 --- a/samples/damon/prcl.c +++ b/samples/damon/prcl.c @@ -122,8 +122,12 @@ static int damon_sample_prcl_enable_store( if (enable == enabled) return 0; - if (enable) - return damon_sample_prcl_start(); + if (enable) { + err = damon_sample_prcl_start(); + if (err) + enable = false; + return err; + } damon_sample_prcl_stop(); return 0; } diff --git a/samples/damon/wsse.c b/samples/damon/wsse.c index 11be25803274..e20238a249e7 100644 --- a/samples/damon/wsse.c +++ b/samples/damon/wsse.c @@ -102,8 +102,12 @@ static int damon_sample_wsse_enable_store( if (enable == enabled) return 0; - if (enable) - return damon_sample_wsse_start(); + if (enable) { + err = damon_sample_wsse_start(); + if (err) + enable = false; + return err; + } damon_sample_wsse_stop(); return 0; } diff --git a/scripts/gdb/linux/constants.py.in b/scripts/gdb/linux/constants.py.in index fd6bd69c5096..f795302ddfa8 100644 --- a/scripts/gdb/linux/constants.py.in +++ b/scripts/gdb/linux/constants.py.in @@ -20,6 +20,7 @@ #include <linux/of_fdt.h> #include <linux/page_ext.h> #include <linux/radix-tree.h> +#include <linux/maple_tree.h> #include <linux/slab.h> #include <linux/threads.h> #include <linux/vmalloc.h> @@ -93,6 +94,12 @@ LX_GDBPARSED(RADIX_TREE_MAP_SIZE) LX_GDBPARSED(RADIX_TREE_MAP_SHIFT) LX_GDBPARSED(RADIX_TREE_MAP_MASK) +/* linux/maple_tree.h */ +LX_VALUE(MAPLE_NODE_SLOTS) +LX_VALUE(MAPLE_RANGE64_SLOTS) +LX_VALUE(MAPLE_ARANGE64_SLOTS) +LX_GDBPARSED(MAPLE_NODE_MASK) + /* linux/vmalloc.h */ LX_VALUE(VM_IOREMAP) LX_VALUE(VM_ALLOC) diff --git a/scripts/gdb/linux/interrupts.py b/scripts/gdb/linux/interrupts.py index 616a5f26377a..f4f715a8f0e3 100644 --- a/scripts/gdb/linux/interrupts.py +++ b/scripts/gdb/linux/interrupts.py @@ -7,7 +7,7 @@ import gdb from linux import constants from linux import cpus from linux import utils -from linux import radixtree +from linux import mapletree irq_desc_type = utils.CachedType("struct irq_desc") @@ -23,12 +23,12 @@ def irqd_is_level(desc): def show_irq_desc(prec, irq): text = "" - desc = radixtree.lookup(gdb.parse_and_eval("&irq_desc_tree"), irq) + desc = mapletree.mtree_load(gdb.parse_and_eval("&sparse_irqs"), irq) if desc is None: return text - desc = desc.cast(irq_desc_type.get_type()) - if desc is None: + desc = desc.cast(irq_desc_type.get_type().pointer()) + if desc == 0: return text if irq_settings_is_hidden(desc): @@ -110,7 +110,7 @@ def x86_show_mce(prec, var, pfx, desc): pvar = gdb.parse_and_eval(var) text = "%*s: " % (prec, pfx) for cpu in cpus.each_online_cpu(): - text += "%10u " % (cpus.per_cpu(pvar, cpu)) + text += "%10u " % (cpus.per_cpu(pvar, cpu).dereference()) text += " %s\n" % (desc) return text @@ -142,7 +142,7 @@ def x86_show_interupts(prec): if constants.LX_CONFIG_X86_MCE: text += x86_show_mce(prec, "&mce_exception_count", "MCE", "Machine check exceptions") - text == x86_show_mce(prec, "&mce_poll_count", "MCP", "Machine check polls") + text += x86_show_mce(prec, "&mce_poll_count", "MCP", "Machine check polls") text += show_irq_err_count(prec) @@ -221,8 +221,8 @@ class LxInterruptList(gdb.Command): gdb.write("CPU%-8d" % cpu) gdb.write("\n") - if utils.gdb_eval_or_none("&irq_desc_tree") is None: - return + if utils.gdb_eval_or_none("&sparse_irqs") is None: + raise gdb.GdbError("Unable to find the sparse IRQ tree, is CONFIG_SPARSE_IRQ enabled?") for irq in range(nr_irqs): gdb.write(show_irq_desc(prec, irq)) diff --git a/scripts/gdb/linux/mapletree.py b/scripts/gdb/linux/mapletree.py new file mode 100644 index 000000000000..d52d51c0a03f --- /dev/null +++ b/scripts/gdb/linux/mapletree.py @@ -0,0 +1,252 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Maple tree helpers +# +# Copyright (c) 2025 Broadcom +# +# Authors: +# Florian Fainelli <florian.fainelli@broadcom.com> + +import gdb + +from linux import utils +from linux import constants +from linux import xarray + +maple_tree_root_type = utils.CachedType("struct maple_tree") +maple_node_type = utils.CachedType("struct maple_node") +maple_enode_type = utils.CachedType("void") + +maple_dense = 0 +maple_leaf_64 = 1 +maple_range_64 = 2 +maple_arange_64 = 3 + +class Mas(object): + ma_active = 0 + ma_start = 1 + ma_root = 2 + ma_none = 3 + ma_pause = 4 + ma_overflow = 5 + ma_underflow = 6 + ma_error = 7 + + def __init__(self, mt, first, end): + if mt.type == maple_tree_root_type.get_type().pointer(): + self.tree = mt.dereference() + elif mt.type != maple_tree_root_type.get_type(): + raise gdb.GdbError("must be {} not {}" + .format(maple_tree_root_type.get_type().pointer(), mt.type)) + self.tree = mt + self.index = first + self.last = end + self.node = None + self.status = self.ma_start + self.min = 0 + self.max = -1 + + def is_start(self): + # mas_is_start() + return self.status == self.ma_start + + def is_ptr(self): + # mas_is_ptr() + return self.status == self.ma_root + + def is_none(self): + # mas_is_none() + return self.status == self.ma_none + + def root(self): + # mas_root() + return self.tree['ma_root'].cast(maple_enode_type.get_type().pointer()) + + def start(self): + # mas_start() + if self.is_start() is False: + return None + + self.min = 0 + self.max = ~0 + + while True: + self.depth = 0 + root = self.root() + if xarray.xa_is_node(root): + self.depth = 0 + self.status = self.ma_active + self.node = mte_safe_root(root) + self.offset = 0 + if mte_dead_node(self.node) is True: + continue + + return None + + self.node = None + # Empty tree + if root is None: + self.status = self.ma_none + self.offset = constants.LX_MAPLE_NODE_SLOTS + return None + + # Single entry tree + self.status = self.ma_root + self.offset = constants.LX_MAPLE_NODE_SLOTS + + if self.index != 0: + return None + + return root + + return None + + def reset(self): + # mas_reset() + self.status = self.ma_start + self.node = None + +def mte_safe_root(node): + if node.type != maple_enode_type.get_type().pointer(): + raise gdb.GdbError("{} must be {} not {}" + .format(mte_safe_root.__name__, maple_enode_type.get_type().pointer(), node.type)) + ulong_type = utils.get_ulong_type() + indirect_ptr = node.cast(ulong_type) & ~0x2 + val = indirect_ptr.cast(maple_enode_type.get_type().pointer()) + return val + +def mte_node_type(entry): + ulong_type = utils.get_ulong_type() + val = None + if entry.type == maple_enode_type.get_type().pointer(): + val = entry.cast(ulong_type) + elif entry.type == ulong_type: + val = entry + else: + raise gdb.GdbError("{} must be {} not {}" + .format(mte_node_type.__name__, maple_enode_type.get_type().pointer(), entry.type)) + return (val >> 0x3) & 0xf + +def ma_dead_node(node): + if node.type != maple_node_type.get_type().pointer(): + raise gdb.GdbError("{} must be {} not {}" + .format(ma_dead_node.__name__, maple_node_type.get_type().pointer(), node.type)) + ulong_type = utils.get_ulong_type() + parent = node['parent'] + indirect_ptr = node['parent'].cast(ulong_type) & ~constants.LX_MAPLE_NODE_MASK + return indirect_ptr == node + +def mte_to_node(enode): + ulong_type = utils.get_ulong_type() + if enode.type == maple_enode_type.get_type().pointer(): + indirect_ptr = enode.cast(ulong_type) + elif enode.type == ulong_type: + indirect_ptr = enode + else: + raise gdb.GdbError("{} must be {} not {}" + .format(mte_to_node.__name__, maple_enode_type.get_type().pointer(), enode.type)) + indirect_ptr = indirect_ptr & ~constants.LX_MAPLE_NODE_MASK + return indirect_ptr.cast(maple_node_type.get_type().pointer()) + +def mte_dead_node(enode): + if enode.type != maple_enode_type.get_type().pointer(): + raise gdb.GdbError("{} must be {} not {}" + .format(mte_dead_node.__name__, maple_enode_type.get_type().pointer(), enode.type)) + node = mte_to_node(enode) + return ma_dead_node(node) + +def ma_is_leaf(tp): + result = tp < maple_range_64 + return tp < maple_range_64 + +def mt_pivots(t): + if t == maple_dense: + return 0 + elif t == maple_leaf_64 or t == maple_range_64: + return constants.LX_MAPLE_RANGE64_SLOTS - 1 + elif t == maple_arange_64: + return constants.LX_MAPLE_ARANGE64_SLOTS - 1 + +def ma_pivots(node, t): + if node.type != maple_node_type.get_type().pointer(): + raise gdb.GdbError("{}: must be {} not {}" + .format(ma_pivots.__name__, maple_node_type.get_type().pointer(), node.type)) + if t == maple_arange_64: + return node['ma64']['pivot'] + elif t == maple_leaf_64 or t == maple_range_64: + return node['mr64']['pivot'] + else: + return None + +def ma_slots(node, tp): + if node.type != maple_node_type.get_type().pointer(): + raise gdb.GdbError("{}: must be {} not {}" + .format(ma_slots.__name__, maple_node_type.get_type().pointer(), node.type)) + if tp == maple_arange_64: + return node['ma64']['slot'] + elif tp == maple_range_64 or tp == maple_leaf_64: + return node['mr64']['slot'] + elif tp == maple_dense: + return node['slot'] + else: + return None + +def mt_slot(mt, slots, offset): + ulong_type = utils.get_ulong_type() + return slots[offset].cast(ulong_type) + +def mtree_lookup_walk(mas): + ulong_type = utils.get_ulong_type() + n = mas.node + + while True: + node = mte_to_node(n) + tp = mte_node_type(n) + pivots = ma_pivots(node, tp) + end = mt_pivots(tp) + offset = 0 + while True: + if pivots[offset] >= mas.index: + break + if offset >= end: + break + offset += 1 + + slots = ma_slots(node, tp) + n = mt_slot(mas.tree, slots, offset) + if ma_dead_node(node) is True: + mas.reset() + return None + break + + if ma_is_leaf(tp) is True: + break + + return n + +def mtree_load(mt, index): + ulong_type = utils.get_ulong_type() + # MT_STATE(...) + mas = Mas(mt, index, index) + entry = None + + while True: + entry = mas.start() + if mas.is_none(): + return None + + if mas.is_ptr(): + if index != 0: + entry = None + return entry + + entry = mtree_lookup_walk(mas) + if entry is None and mas.is_start(): + continue + else: + break + + if xarray.xa_is_zero(entry): + return None + + return entry diff --git a/scripts/gdb/linux/vfs.py b/scripts/gdb/linux/vfs.py index c77b9ce75f6d..9e921b645a68 100644 --- a/scripts/gdb/linux/vfs.py +++ b/scripts/gdb/linux/vfs.py @@ -22,7 +22,7 @@ def dentry_name(d): if parent == d or parent == 0: return "" p = dentry_name(d['d_parent']) + "/" - return p + d['d_iname'].string() + return p + d['d_name']['name'].string() class DentryName(gdb.Function): """Return string of the full path of a dentry. diff --git a/scripts/gdb/linux/xarray.py b/scripts/gdb/linux/xarray.py new file mode 100644 index 000000000000..f4477b5def75 --- /dev/null +++ b/scripts/gdb/linux/xarray.py @@ -0,0 +1,28 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Xarray helpers +# +# Copyright (c) 2025 Broadcom +# +# Authors: +# Florian Fainelli <florian.fainelli@broadcom.com> + +import gdb + +from linux import utils +from linux import constants + +def xa_is_internal(entry): + ulong_type = utils.get_ulong_type() + return ((entry.cast(ulong_type) & 3) == 2) + +def xa_mk_internal(v): + return ((v << 2) | 2) + +def xa_is_zero(entry): + ulong_type = utils.get_ulong_type() + return entry.cast(ulong_type) == xa_mk_internal(257) + +def xa_is_node(entry): + ulong_type = utils.get_ulong_type() + return xa_is_internal(entry) and (entry.cast(ulong_type) > 4096) diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 7becf3808818..d185754c2786 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -1909,11 +1909,17 @@ retry: goto out_unlock; } /* Obtain the sid for the context. */ - rc = sidtab_context_to_sid(sidtab, &newcontext, out_sid); - if (rc == -ESTALE) { - rcu_read_unlock(); - context_destroy(&newcontext); - goto retry; + if (context_equal(scontext, &newcontext)) + *out_sid = ssid; + else if (context_equal(tcontext, &newcontext)) + *out_sid = tsid; + else { + rc = sidtab_context_to_sid(sidtab, &newcontext, out_sid); + if (rc == -ESTALE) { + rcu_read_unlock(); + context_destroy(&newcontext); + goto retry; + } } out_unlock: rcu_read_unlock(); diff --git a/sound/isa/ad1816a/ad1816a.c b/sound/isa/ad1816a/ad1816a.c index 99006dc4777e..5c9e2d41d900 100644 --- a/sound/isa/ad1816a/ad1816a.c +++ b/sound/isa/ad1816a/ad1816a.c @@ -98,7 +98,7 @@ static int snd_card_ad1816a_pnp(int dev, struct pnp_card_link *card, pdev = pnp_request_card_device(card, id->devs[1].id, NULL); if (pdev == NULL) { mpu_port[dev] = -1; - dev_warn(&pdev->dev, "MPU401 device busy, skipping.\n"); + pr_warn("MPU401 device busy, skipping.\n"); return 0; } diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 08308231b4ed..9a7793eb16e9 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -4551,7 +4551,9 @@ HDA_CODEC_ENTRY(0x10de002e, "Tegra186 HDMI/DP1", patch_tegra_hdmi), HDA_CODEC_ENTRY(0x10de002f, "Tegra194 HDMI/DP2", patch_tegra_hdmi), HDA_CODEC_ENTRY(0x10de0030, "Tegra194 HDMI/DP3", patch_tegra_hdmi), HDA_CODEC_ENTRY(0x10de0031, "Tegra234 HDMI/DP", patch_tegra234_hdmi), +HDA_CODEC_ENTRY(0x10de0033, "SoC 33 HDMI/DP", patch_tegra234_hdmi), HDA_CODEC_ENTRY(0x10de0034, "Tegra264 HDMI/DP", patch_tegra234_hdmi), +HDA_CODEC_ENTRY(0x10de0035, "SoC 35 HDMI/DP", patch_tegra234_hdmi), HDA_CODEC_ENTRY(0x10de0040, "GPU 40 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de0041, "GPU 41 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de0042, "GPU 42 HDMI/DP", patch_nvhdmi), @@ -4590,15 +4592,32 @@ HDA_CODEC_ENTRY(0x10de0097, "GPU 97 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de0098, "GPU 98 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de0099, "GPU 99 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de009a, "GPU 9a HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de009b, "GPU 9b HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de009c, "GPU 9c HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de009d, "GPU 9d HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de009e, "GPU 9e HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de009f, "GPU 9f HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de00a0, "GPU a0 HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de00a1, "GPU a1 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de00a3, "GPU a3 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de00a4, "GPU a4 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de00a5, "GPU a5 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de00a6, "GPU a6 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de00a7, "GPU a7 HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de00a8, "GPU a8 HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de00a9, "GPU a9 HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de00aa, "GPU aa HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de00ab, "GPU ab HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de00ad, "GPU ad HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de00ae, "GPU ae HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de00af, "GPU af HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de00b0, "GPU b0 HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de00b1, "GPU b1 HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de00c0, "GPU c0 HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de00c1, "GPU c1 HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de00c3, "GPU c3 HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de00c4, "GPU c4 HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de00c5, "GPU c5 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de8001, "MCP73 HDMI", patch_nvhdmi_2ch), HDA_CODEC_ENTRY(0x10de8067, "MCP67/68 HDMI", patch_nvhdmi_2ch), HDA_CODEC_ENTRY(0x67663d82, "Arise 82 HDMI/DP", patch_gf_hdmi), diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 2e1618494c20..060db37eab83 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -2656,6 +2656,7 @@ static const struct hda_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK(0x147b, 0x107a, "Abit AW9D-MAX", ALC882_FIXUP_ABIT_AW9D_MAX), SND_PCI_QUIRK(0x1558, 0x3702, "Clevo X370SN[VW]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x50d3, "Clevo PC50[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), + SND_PCI_QUIRK(0x1558, 0x5802, "Clevo X58[05]WN[RST]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x65d1, "Clevo PB51[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x65d2, "Clevo PB51R[CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x65e1, "Clevo PB51[ED][DF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), @@ -6609,6 +6610,7 @@ static void alc294_fixup_bass_speaker_15(struct hda_codec *codec, if (action == HDA_FIXUP_ACT_PRE_PROBE) { static const hda_nid_t conn[] = { 0x02, 0x03 }; snd_hda_override_conn_list(codec, 0x15, ARRAY_SIZE(conn), conn); + snd_hda_gen_add_micmute_led_cdev(codec, NULL); } } @@ -10737,6 +10739,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8975, "HP EliteBook x360 840 Aero G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x897d, "HP mt440 Mobile Thin Client U74", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8981, "HP Elite Dragonfly G3", ALC245_FIXUP_CS35L41_SPI_4), + SND_PCI_QUIRK(0x103c, 0x898a, "HP Pavilion 15-eg100", ALC287_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x898e, "HP EliteBook 835 G9", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x898f, "HP EliteBook 835 G9", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8991, "HP EliteBook 845 G9", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED), @@ -10878,6 +10881,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8ce0, "HP SnowWhite", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8cf5, "HP ZBook Studio 16", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8d01, "HP ZBook Power 14 G12", ALC285_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8d07, "HP Victus 15-fb2xxx (MB 8D07)", ALC245_FIXUP_HP_MUTE_LED_COEFBIT), SND_PCI_QUIRK(0x103c, 0x8d18, "HP EliteStudio 8 AIO", ALC274_FIXUP_HP_AIO_BIND_DACS), SND_PCI_QUIRK(0x103c, 0x8d84, "HP EliteBook X G1i", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8d85, "HP EliteBook 14 G12", ALC285_FIXUP_HP_GPIO_LED), @@ -10907,7 +10911,9 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8def, "HP EliteBook 660 G12", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8df0, "HP EliteBook 630 G12", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8df1, "HP EliteBook 630 G12", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8dfb, "HP EliteBook 6 G1a 14", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8dfc, "HP EliteBook 645 G12", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8dfd, "HP EliteBook 6 G1a 16", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8dfe, "HP EliteBook 665 G12", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8e11, "HP Trekker", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8e12, "HP Trekker", ALC287_FIXUP_CS35L41_I2C_2), @@ -11026,6 +11032,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1df3, "ASUS UM5606WA", ALC294_FIXUP_BASS_SPEAKER_15), SND_PCI_QUIRK(0x1043, 0x1264, "ASUS UM5606KA", ALC294_FIXUP_BASS_SPEAKER_15), SND_PCI_QUIRK(0x1043, 0x1e02, "ASUS UX3402ZA", ALC245_FIXUP_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1043, 0x1e10, "ASUS VivoBook X507UAR", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x1e11, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA502), SND_PCI_QUIRK(0x1043, 0x1e12, "ASUS UM3402", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x1e1f, "ASUS Vivobook 15 X1504VAP", ALC2XX_FIXUP_HEADSET_MIC), @@ -11034,6 +11041,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1e63, "ASUS H7606W", ALC285_FIXUP_ASUS_GU605_SPI_SPEAKER2_TO_DAC1), SND_PCI_QUIRK(0x1043, 0x1e83, "ASUS GA605W", ALC285_FIXUP_ASUS_GU605_SPI_SPEAKER2_TO_DAC1), SND_PCI_QUIRK(0x1043, 0x1e8e, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA401), + SND_PCI_QUIRK(0x1043, 0x1e93, "ASUS ExpertBook B9403CVAR", ALC294_FIXUP_ASUS_HPE), SND_PCI_QUIRK(0x1043, 0x1eb3, "ASUS Ally RCLA72", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x1043, 0x1ed3, "ASUS HN7306W", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x1ee2, "ASUS UM6702RA/RC", ALC287_FIXUP_CS35L41_I2C_2), @@ -11135,6 +11143,8 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1558, 0x14a1, "Clevo L141MU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x2624, "Clevo L240TU", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x28c1, "Clevo V370VND", ALC2XX_FIXUP_HEADSET_MIC), + SND_PCI_QUIRK(0x1558, 0x35a1, "Clevo V3[56]0EN[CDE]", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x35b1, "Clevo V3[57]0WN[MNP]Q", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x4018, "Clevo NV40M[BE]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x4019, "Clevo NV40MZ", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x4020, "Clevo NV40MB", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), @@ -11162,6 +11172,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1558, 0x51b1, "Clevo NS50AU", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x51b3, "Clevo NS70AU", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x5630, "Clevo NP50RNJS", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x5700, "Clevo X560WN[RST]", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x70a1, "Clevo NB70T[HJK]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x70b3, "Clevo NK70SB", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x70f2, "Clevo NH79EPY", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), @@ -11201,6 +11212,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1558, 0xa650, "Clevo NP[567]0SN[CD]", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0xa671, "Clevo NP70SN[CDE]", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0xa741, "Clevo V54x_6x_TNE", ALC245_FIXUP_CLEVO_NOISY_MIC), + SND_PCI_QUIRK(0x1558, 0xa743, "Clevo V54x_6x_TU", ALC245_FIXUP_CLEVO_NOISY_MIC), SND_PCI_QUIRK(0x1558, 0xa763, "Clevo V54x_6x_TU", ALC245_FIXUP_CLEVO_NOISY_MIC), SND_PCI_QUIRK(0x1558, 0xb018, "Clevo NP50D[BE]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0xb019, "Clevo NH77D[BE]Q", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), @@ -11414,6 +11426,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x2782, 0x0228, "Infinix ZERO BOOK 13", ALC269VB_FIXUP_INFINIX_ZERO_BOOK_13), SND_PCI_QUIRK(0x2782, 0x0232, "CHUWI CoreBook XPro", ALC269VB_FIXUP_CHUWI_COREBOOK_XPRO), SND_PCI_QUIRK(0x2782, 0x1407, "Positivo P15X", ALC269_FIXUP_POSITIVO_P15X_HEADSET_MIC), + SND_PCI_QUIRK(0x2782, 0x1409, "Positivo K116J", ALC269_FIXUP_POSITIVO_P15X_HEADSET_MIC), SND_PCI_QUIRK(0x2782, 0x1701, "Infinix Y4 Max", ALC269VC_FIXUP_INFINIX_Y4_MAX), SND_PCI_QUIRK(0x2782, 0x1705, "MEDION E15433", ALC269VC_FIXUP_INFINIX_Y4_MAX), SND_PCI_QUIRK(0x2782, 0x1707, "Vaio VJFE-ADL", ALC298_FIXUP_SPK_VOLUME), diff --git a/sound/pci/hda/tas2781_hda.c b/sound/pci/hda/tas2781_hda.c index 5f1d4b3e9688..34217ce9f28e 100644 --- a/sound/pci/hda/tas2781_hda.c +++ b/sound/pci/hda/tas2781_hda.c @@ -44,7 +44,7 @@ static void tas2781_apply_calib(struct tasdevice_priv *p) TASDEVICE_REG(0, 0x13, 0x70), TASDEVICE_REG(0, 0x18, 0x7c), }; - unsigned int crc, oft; + unsigned int crc, oft, node_num; unsigned char *buf; int i, j, k, l; @@ -80,8 +80,9 @@ static void tas2781_apply_calib(struct tasdevice_priv *p) dev_err(p->dev, "%s: CRC error\n", __func__); return; } + node_num = tmp_val[1]; - for (j = 0, k = 0; j < tmp_val[1]; j++) { + for (j = 0, k = 0; j < node_num; j++) { oft = j * 6 + 3; if (tmp_val[oft] == TASDEV_UEFI_CALI_REG_ADDR_FLG) { for (i = 0; i < TASDEV_CALIB_N; i++) { @@ -99,8 +100,9 @@ static void tas2781_apply_calib(struct tasdevice_priv *p) } data[l] = k; + oft++; for (i = 0; i < TASDEV_CALIB_N * 4; i++) - data[l + i] = data[4 * oft + i]; + data[l + i + 1] = data[4 * oft + i]; k++; } } diff --git a/sound/soc/amd/ps/acp63.h b/sound/soc/amd/ps/acp63.h index 85feae45c44c..d7c994e26e4d 100644 --- a/sound/soc/amd/ps/acp63.h +++ b/sound/soc/amd/ps/acp63.h @@ -334,6 +334,8 @@ struct acp_hw_ops { * @addr: pci ioremap address * @reg_range: ACP reigister range * @acp_rev: ACP PCI revision id + * @acp_sw_pad_keeper_en: store acp SoundWire pad keeper enable register value + * @acp_pad_pulldown_ctrl: store acp pad pulldown control register value * @acp63_sdw0-dma_intr_stat: DMA interrupt status array for ACP6.3 platform SoundWire * manager-SW0 instance * @acp63_sdw_dma_intr_stat: DMA interrupt status array for ACP6.3 platform SoundWire @@ -367,6 +369,8 @@ struct acp63_dev_data { u32 addr; u32 reg_range; u32 acp_rev; + u32 acp_sw_pad_keeper_en; + u32 acp_pad_pulldown_ctrl; u16 acp63_sdw0_dma_intr_stat[ACP63_SDW0_DMA_MAX_STREAMS]; u16 acp63_sdw1_dma_intr_stat[ACP63_SDW1_DMA_MAX_STREAMS]; u16 acp70_sdw0_dma_intr_stat[ACP70_SDW0_DMA_MAX_STREAMS]; diff --git a/sound/soc/amd/ps/ps-common.c b/sound/soc/amd/ps/ps-common.c index 1c89fb5fe1da..7b4966b75dc6 100644 --- a/sound/soc/amd/ps/ps-common.c +++ b/sound/soc/amd/ps/ps-common.c @@ -160,6 +160,8 @@ static int __maybe_unused snd_acp63_suspend(struct device *dev) adata = dev_get_drvdata(dev); if (adata->is_sdw_dev) { + adata->acp_sw_pad_keeper_en = readl(adata->acp63_base + ACP_SW0_PAD_KEEPER_EN); + adata->acp_pad_pulldown_ctrl = readl(adata->acp63_base + ACP_PAD_PULLDOWN_CTRL); adata->sdw_en_stat = check_acp_sdw_enable_status(adata); if (adata->sdw_en_stat) { writel(1, adata->acp63_base + ACP_ZSC_DSP_CTRL); @@ -197,6 +199,7 @@ static int __maybe_unused snd_acp63_runtime_resume(struct device *dev) static int __maybe_unused snd_acp63_resume(struct device *dev) { struct acp63_dev_data *adata; + u32 acp_sw_pad_keeper_en; int ret; adata = dev_get_drvdata(dev); @@ -209,6 +212,12 @@ static int __maybe_unused snd_acp63_resume(struct device *dev) if (ret) dev_err(dev, "ACP init failed\n"); + acp_sw_pad_keeper_en = readl(adata->acp63_base + ACP_SW0_PAD_KEEPER_EN); + dev_dbg(dev, "ACP_SW0_PAD_KEEPER_EN:0x%x\n", acp_sw_pad_keeper_en); + if (!acp_sw_pad_keeper_en) { + writel(adata->acp_sw_pad_keeper_en, adata->acp63_base + ACP_SW0_PAD_KEEPER_EN); + writel(adata->acp_pad_pulldown_ctrl, adata->acp63_base + ACP_PAD_PULLDOWN_CTRL); + } return ret; } @@ -408,6 +417,8 @@ static int __maybe_unused snd_acp70_suspend(struct device *dev) adata = dev_get_drvdata(dev); if (adata->is_sdw_dev) { + adata->acp_sw_pad_keeper_en = readl(adata->acp63_base + ACP_SW0_PAD_KEEPER_EN); + adata->acp_pad_pulldown_ctrl = readl(adata->acp63_base + ACP_PAD_PULLDOWN_CTRL); adata->sdw_en_stat = check_acp_sdw_enable_status(adata); if (adata->sdw_en_stat) { writel(1, adata->acp63_base + ACP_ZSC_DSP_CTRL); @@ -445,6 +456,7 @@ static int __maybe_unused snd_acp70_runtime_resume(struct device *dev) static int __maybe_unused snd_acp70_resume(struct device *dev) { struct acp63_dev_data *adata; + u32 acp_sw_pad_keeper_en; int ret; adata = dev_get_drvdata(dev); @@ -459,6 +471,12 @@ static int __maybe_unused snd_acp70_resume(struct device *dev) if (ret) dev_err(dev, "ACP init failed\n"); + acp_sw_pad_keeper_en = readl(adata->acp63_base + ACP_SW0_PAD_KEEPER_EN); + dev_dbg(dev, "ACP_SW0_PAD_KEEPER_EN:0x%x\n", acp_sw_pad_keeper_en); + if (!acp_sw_pad_keeper_en) { + writel(adata->acp_sw_pad_keeper_en, adata->acp63_base + ACP_SW0_PAD_KEEPER_EN); + writel(adata->acp_pad_pulldown_ctrl, adata->acp63_base + ACP_PAD_PULLDOWN_CTRL); + } return ret; } diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index 98022e5fd428..97e340140d0c 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -356,6 +356,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { { .driver_data = &acp6x_card, .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "RB"), + DMI_MATCH(DMI_PRODUCT_NAME, "Nitro ANV15-41"), + } + }, + { + .driver_data = &acp6x_card, + .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "83J2"), } @@ -363,6 +370,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { { .driver_data = &acp6x_card, .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "83J3"), + } + }, + { + .driver_data = &acp6x_card, + .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC."), DMI_MATCH(DMI_PRODUCT_NAME, "UM5302TA"), } diff --git a/sound/soc/codecs/cs35l56-shared.c b/sound/soc/codecs/cs35l56-shared.c index d0831d609584..ba653f6ccfae 100644 --- a/sound/soc/codecs/cs35l56-shared.c +++ b/sound/soc/codecs/cs35l56-shared.c @@ -980,7 +980,7 @@ int cs35l56_hw_init(struct cs35l56_base *cs35l56_base) break; default: dev_err(cs35l56_base->dev, "Unknown device %x\n", devid); - return ret; + return -ENODEV; } cs35l56_base->type = devid & 0xFF; diff --git a/sound/soc/codecs/rt721-sdca.c b/sound/soc/codecs/rt721-sdca.c index 1c9f32e405cf..ba080957e933 100644 --- a/sound/soc/codecs/rt721-sdca.c +++ b/sound/soc/codecs/rt721-sdca.c @@ -430,6 +430,7 @@ static int rt721_sdca_set_gain_get(struct snd_kcontrol *kcontrol, unsigned int read_l, read_r, ctl_l = 0, ctl_r = 0; unsigned int adc_vol_flag = 0; const unsigned int interval_offset = 0xc0; + const unsigned int tendA = 0x200; const unsigned int tendB = 0xa00; if (strstr(ucontrol->id.name, "FU1E Capture Volume") || @@ -439,9 +440,16 @@ static int rt721_sdca_set_gain_get(struct snd_kcontrol *kcontrol, regmap_read(rt721->mbq_regmap, mc->reg, &read_l); regmap_read(rt721->mbq_regmap, mc->rreg, &read_r); - if (mc->shift == 8) /* boost gain */ + if (mc->shift == 8) { + /* boost gain */ ctl_l = read_l / tendB; - else { + } else if (mc->shift == 1) { + /* FU33 boost gain */ + if (read_l == 0x8000 || read_l == 0xfe00) + ctl_l = 0; + else + ctl_l = read_l / tendA + 1; + } else { if (adc_vol_flag) ctl_l = mc->max - (((0x1e00 - read_l) & 0xffff) / interval_offset); else @@ -449,9 +457,16 @@ static int rt721_sdca_set_gain_get(struct snd_kcontrol *kcontrol, } if (read_l != read_r) { - if (mc->shift == 8) /* boost gain */ + if (mc->shift == 8) { + /* boost gain */ ctl_r = read_r / tendB; - else { /* ADC/DAC gain */ + } else if (mc->shift == 1) { + /* FU33 boost gain */ + if (read_r == 0x8000 || read_r == 0xfe00) + ctl_r = 0; + else + ctl_r = read_r / tendA + 1; + } else { /* ADC/DAC gain */ if (adc_vol_flag) ctl_r = mc->max - (((0x1e00 - read_r) & 0xffff) / interval_offset); else diff --git a/sound/soc/fsl/fsl_asrc.c b/sound/soc/fsl/fsl_asrc.c index 677529916dc0..745532ccbdba 100644 --- a/sound/soc/fsl/fsl_asrc.c +++ b/sound/soc/fsl/fsl_asrc.c @@ -517,7 +517,8 @@ static int fsl_asrc_config_pair(struct fsl_asrc_pair *pair, bool use_ideal_rate) regmap_update_bits(asrc->regmap, REG_ASRCTR, ASRCTR_ATSi_MASK(index), ASRCTR_ATS(index)); regmap_update_bits(asrc->regmap, REG_ASRCTR, - ASRCTR_USRi_MASK(index), 0); + ASRCTR_IDRi_MASK(index) | ASRCTR_USRi_MASK(index), + ASRCTR_USR(index)); /* Set the input and output clock sources */ regmap_update_bits(asrc->regmap, REG_ASRCSR, diff --git a/sound/soc/fsl/fsl_sai.c b/sound/soc/fsl/fsl_sai.c index af1a168d35e3..50af6b725670 100644 --- a/sound/soc/fsl/fsl_sai.c +++ b/sound/soc/fsl/fsl_sai.c @@ -803,13 +803,15 @@ static void fsl_sai_config_disable(struct fsl_sai *sai, int dir) * anymore. Add software reset to fix this issue. * This is a hardware bug, and will be fix in the * next sai version. + * + * In consumer mode, this can happen even after a + * single open/close, especially if both tx and rx + * are running concurrently. */ - if (!sai->is_consumer_mode[tx]) { - /* Software Reset */ - regmap_write(sai->regmap, FSL_SAI_xCSR(tx, ofs), FSL_SAI_CSR_SR); - /* Clear SR bit to finish the reset */ - regmap_write(sai->regmap, FSL_SAI_xCSR(tx, ofs), 0); - } + /* Software Reset */ + regmap_write(sai->regmap, FSL_SAI_xCSR(tx, ofs), FSL_SAI_CSR_SR); + /* Clear SR bit to finish the reset */ + regmap_write(sai->regmap, FSL_SAI_xCSR(tx, ofs), 0); } static int fsl_sai_trigger(struct snd_pcm_substream *substream, int cmd, diff --git a/sound/soc/intel/boards/Kconfig b/sound/soc/intel/boards/Kconfig index 2df7afa2f469..128b6876af83 100644 --- a/sound/soc/intel/boards/Kconfig +++ b/sound/soc/intel/boards/Kconfig @@ -42,6 +42,7 @@ config SND_SOC_INTEL_SOF_NUVOTON_COMMON tristate config SND_SOC_INTEL_SOF_BOARD_HELPERS + select SND_SOC_ACPI_INTEL_MATCH tristate if SND_SOC_INTEL_CATPT diff --git a/sound/soc/intel/boards/sof_sdw.c b/sound/soc/intel/boards/sof_sdw.c index 81a914bd7ec2..504887505e68 100644 --- a/sound/soc/intel/boards/sof_sdw.c +++ b/sound/soc/intel/boards/sof_sdw.c @@ -783,6 +783,9 @@ static const struct dmi_system_id sof_sdw_quirk_table[] = { static const struct snd_pci_quirk sof_sdw_ssid_quirk_table[] = { SND_PCI_QUIRK(0x1043, 0x1e13, "ASUS Zenbook S14", SOC_SDW_CODEC_MIC), SND_PCI_QUIRK(0x1043, 0x1f43, "ASUS Zenbook S16", SOC_SDW_CODEC_MIC), + SND_PCI_QUIRK(0x17aa, 0x2347, "Lenovo P16", SOC_SDW_CODEC_MIC), + SND_PCI_QUIRK(0x17aa, 0x2348, "Lenovo P16", SOC_SDW_CODEC_MIC), + SND_PCI_QUIRK(0x17aa, 0x2349, "Lenovo P1", SOC_SDW_CODEC_MIC), {} }; diff --git a/sound/soc/intel/common/soc-acpi-intel-arl-match.c b/sound/soc/intel/common/soc-acpi-intel-arl-match.c index 73e581e93755..1ad704ca2c5f 100644 --- a/sound/soc/intel/common/soc-acpi-intel-arl-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-arl-match.c @@ -468,17 +468,17 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_arl_sdw_machines[] = { .get_function_tplg_files = sof_sdw_get_tplg_files, }, { - .link_mask = BIT(2), - .links = arl_cs42l43_l2, + .link_mask = BIT(2) | BIT(3), + .links = arl_cs42l43_l2_cs35l56_l3, .drv_name = "sof_sdw", - .sof_tplg_filename = "sof-arl-cs42l43-l2.tplg", + .sof_tplg_filename = "sof-arl-cs42l43-l2-cs35l56-l3.tplg", .get_function_tplg_files = sof_sdw_get_tplg_files, }, { - .link_mask = BIT(2) | BIT(3), - .links = arl_cs42l43_l2_cs35l56_l3, + .link_mask = BIT(2), + .links = arl_cs42l43_l2, .drv_name = "sof_sdw", - .sof_tplg_filename = "sof-arl-cs42l43-l2-cs35l56-l3.tplg", + .sof_tplg_filename = "sof-arl-cs42l43-l2.tplg", .get_function_tplg_files = sof_sdw_get_tplg_files, }, { diff --git a/sound/soc/qcom/Kconfig b/sound/soc/qcom/Kconfig index e86b4a03dd61..3d9ba13ee1e5 100644 --- a/sound/soc/qcom/Kconfig +++ b/sound/soc/qcom/Kconfig @@ -186,6 +186,7 @@ config SND_SOC_SM8250 tristate "SoC Machine driver for SM8250 boards" depends on QCOM_APR && SOUNDWIRE depends on COMMON_CLK + depends on SND_SOC_QCOM_OFFLOAD_UTILS || !SND_SOC_QCOM_OFFLOAD_UTILS select SND_SOC_QDSP6 select SND_SOC_QCOM_COMMON select SND_SOC_QCOM_SDW diff --git a/sound/soc/sof/intel/hda.c b/sound/soc/sof/intel/hda.c index bdfe388da198..3b47191ea7a5 100644 --- a/sound/soc/sof/intel/hda.c +++ b/sound/soc/sof/intel/hda.c @@ -1257,11 +1257,11 @@ static int check_tplg_quirk_mask(struct snd_soc_acpi_mach *mach) return 0; } -static char *remove_file_ext(const char *tplg_filename) +static char *remove_file_ext(struct device *dev, const char *tplg_filename) { char *filename, *tmp; - filename = kstrdup(tplg_filename, GFP_KERNEL); + filename = devm_kstrdup(dev, tplg_filename, GFP_KERNEL); if (!filename) return NULL; @@ -1345,7 +1345,7 @@ struct snd_soc_acpi_mach *hda_machine_select(struct snd_sof_dev *sdev) */ if (!sof_pdata->tplg_filename) { /* remove file extension if it exists */ - tplg_filename = remove_file_ext(mach->sof_tplg_filename); + tplg_filename = remove_file_ext(sdev->dev, mach->sof_tplg_filename); if (!tplg_filename) return NULL; diff --git a/sound/usb/format.c b/sound/usb/format.c index 8cd54f7bf33a..0ee532acbb60 100644 --- a/sound/usb/format.c +++ b/sound/usb/format.c @@ -310,16 +310,14 @@ static bool focusrite_valid_sample_rate(struct snd_usb_audio *chip, struct audioformat *fp, unsigned int rate) { - struct usb_interface *iface; struct usb_host_interface *alts; unsigned char *fmt; unsigned int max_rate; - iface = usb_ifnum_to_if(chip->dev, fp->iface); - if (!iface) + alts = snd_usb_get_host_interface(chip, fp->iface, fp->altsetting); + if (!alts) return true; - alts = &iface->altsetting[fp->altset_idx]; fmt = snd_usb_find_csint_desc(alts->extra, alts->extralen, NULL, UAC_FORMAT_TYPE); if (!fmt) @@ -328,20 +326,20 @@ static bool focusrite_valid_sample_rate(struct snd_usb_audio *chip, if (fmt[0] == 10) { /* bLength */ max_rate = combine_quad(&fmt[6]); - /* Validate max rate */ - if (max_rate != 48000 && - max_rate != 96000 && - max_rate != 192000 && - max_rate != 384000) { - + switch (max_rate) { + case 48000: + return (rate == 44100 || rate == 48000); + case 96000: + return (rate == 88200 || rate == 96000); + case 192000: + return (rate == 176400 || rate == 192000); + default: usb_audio_info(chip, "%u:%d : unexpected max rate: %u\n", fp->iface, fp->altsetting, max_rate); return true; } - - return rate <= max_rate; } return true; diff --git a/sound/usb/qcom/qc_audio_offload.c b/sound/usb/qcom/qc_audio_offload.c index 5bc27c82e0af..3543b5a53592 100644 --- a/sound/usb/qcom/qc_audio_offload.c +++ b/sound/usb/qcom/qc_audio_offload.c @@ -759,7 +759,7 @@ static void qmi_stop_session(void) subs = find_substream(pcm_card_num, info->pcm_dev_num, info->direction); if (!subs || !chip || atomic_read(&chip->shutdown)) { - dev_err(&subs->dev->dev, + dev_err(&uadev[idx].udev->dev, "no sub for c#%u dev#%u dir%u\n", info->pcm_card_num, info->pcm_dev_num, @@ -1360,20 +1360,21 @@ static int prepare_qmi_response(struct snd_usb_substream *subs, if (!uadev[card_num].ctrl_intf) { dev_err(&subs->dev->dev, "audio ctrl intf info not cached\n"); - ret = -ENODEV; - goto err; + return -ENODEV; } ret = uaudio_populate_uac_desc(subs, resp); if (ret < 0) - goto err; + return ret; resp->slot_id = subs->dev->slot_id; resp->slot_id_valid = 1; data = snd_soc_usb_find_priv_data(uaudio_qdev->auxdev->dev.parent); - if (!data) - goto err; + if (!data) { + dev_err(&subs->dev->dev, "No private data found\n"); + return -ENODEV; + } uaudio_qdev->data = data; @@ -1382,7 +1383,7 @@ static int prepare_qmi_response(struct snd_usb_substream *subs, &resp->xhci_mem_info.tr_data, &resp->std_as_data_ep_desc); if (ret < 0) - goto err; + return ret; resp->std_as_data_ep_desc_valid = 1; @@ -1500,7 +1501,6 @@ drop_data_ep: xhci_sideband_remove_endpoint(uadev[card_num].sb, usb_pipe_endpoint(subs->dev, subs->data_endpoint->pipe)); -err: return ret; } diff --git a/sound/usb/stream.c b/sound/usb/stream.c index c1ea8844a46f..aa91d63749f2 100644 --- a/sound/usb/stream.c +++ b/sound/usb/stream.c @@ -987,6 +987,8 @@ snd_usb_get_audioformat_uac3(struct snd_usb_audio *chip, * and request Cluster Descriptor */ wLength = le16_to_cpu(hc_header.wLength); + if (wLength < sizeof(cluster)) + return NULL; cluster = kzalloc(wLength, GFP_KERNEL); if (!cluster) return ERR_PTR(-ENOMEM); diff --git a/tools/arch/loongarch/include/asm/orc_types.h b/tools/arch/loongarch/include/asm/orc_types.h index caf1f71a1057..d5fa98d1d177 100644 --- a/tools/arch/loongarch/include/asm/orc_types.h +++ b/tools/arch/loongarch/include/asm/orc_types.h @@ -34,7 +34,7 @@ #define ORC_TYPE_REGS 3 #define ORC_TYPE_REGS_PARTIAL 4 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * This struct is more or less a vastly simplified version of the DWARF Call * Frame Information standard. It contains only the necessary parts of DWARF @@ -53,6 +53,6 @@ struct orc_entry { unsigned int type:3; unsigned int signal:1; }; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ORC_TYPES_H */ diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index b7dded3c8113..5cfb5d74dd5f 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -628,6 +628,7 @@ #define MSR_AMD64_OSVW_STATUS 0xc0010141 #define MSR_AMD_PPIN_CTL 0xc00102f0 #define MSR_AMD_PPIN 0xc00102f1 +#define MSR_AMD64_CPUID_FN_7 0xc0011002 #define MSR_AMD64_CPUID_FN_1 0xc0011004 #define MSR_AMD64_LS_CFG 0xc0011020 #define MSR_AMD64_DC_CFG 0xc0011022 diff --git a/tools/include/linux/kallsyms.h b/tools/include/linux/kallsyms.h index 5a37ccbec54f..f61a01dd7eb7 100644 --- a/tools/include/linux/kallsyms.h +++ b/tools/include/linux/kallsyms.h @@ -18,6 +18,7 @@ static inline const char *kallsyms_lookup(unsigned long addr, return NULL; } +#ifdef HAVE_BACKTRACE_SUPPORT #include <execinfo.h> #include <stdlib.h> static inline void print_ip_sym(const char *loglvl, unsigned long ip) @@ -30,5 +31,8 @@ static inline void print_ip_sym(const char *loglvl, unsigned long ip) free(name); } +#else +static inline void print_ip_sym(const char *loglvl, unsigned long ip) {} +#endif #endif diff --git a/tools/include/uapi/linux/bits.h b/tools/include/uapi/linux/bits.h index 682b406e1067..a04afef9efca 100644 --- a/tools/include/uapi/linux/bits.h +++ b/tools/include/uapi/linux/bits.h @@ -4,9 +4,9 @@ #ifndef _UAPI_LINUX_BITS_H #define _UAPI_LINUX_BITS_H -#define __GENMASK(h, l) (((~_UL(0)) << (l)) & (~_UL(0) >> (BITS_PER_LONG - 1 - (h)))) +#define __GENMASK(h, l) (((~_UL(0)) << (l)) & (~_UL(0) >> (__BITS_PER_LONG - 1 - (h)))) -#define __GENMASK_ULL(h, l) (((~_ULL(0)) << (l)) & (~_ULL(0) >> (BITS_PER_LONG_LONG - 1 - (h)))) +#define __GENMASK_ULL(h, l) (((~_ULL(0)) << (l)) & (~_ULL(0) >> (__BITS_PER_LONG_LONG - 1 - (h)))) #define __GENMASK_U128(h, l) \ ((_BIT128((h)) << 1) - (_BIT128(l))) diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index 460c3e57fadb..0381f209920a 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -226,6 +226,9 @@ static void btf_dump_free_names(struct hashmap *map) size_t bkt; struct hashmap_entry *cur; + if (!map) + return; + hashmap__for_each_entry(map, cur, bkt) free((void *)cur->pkey); diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index e9c641a2fb20..52e353368f58 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -597,7 +597,7 @@ struct extern_desc { int sym_idx; int btf_id; int sec_btf_id; - const char *name; + char *name; char *essent_name; bool is_set; bool is_weak; @@ -4259,7 +4259,9 @@ static int bpf_object__collect_externs(struct bpf_object *obj) return ext->btf_id; } t = btf__type_by_id(obj->btf, ext->btf_id); - ext->name = btf__name_by_offset(obj->btf, t->name_off); + ext->name = strdup(btf__name_by_offset(obj->btf, t->name_off)); + if (!ext->name) + return -ENOMEM; ext->sym_idx = i; ext->is_weak = ELF64_ST_BIND(sym->st_info) == STB_WEAK; @@ -9138,8 +9140,10 @@ void bpf_object__close(struct bpf_object *obj) zfree(&obj->btf_custom_path); zfree(&obj->kconfig); - for (i = 0; i < obj->nr_extern; i++) + for (i = 0; i < obj->nr_extern; i++) { + zfree(&obj->externs[i].name); zfree(&obj->externs[i].essent_name); + } zfree(&obj->externs); obj->nr_extern = 0; diff --git a/tools/objtool/check.c b/tools/objtool/check.c index f23bdda737aa..d967ac001498 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -2318,6 +2318,7 @@ static int read_annotate(struct objtool_file *file, for_each_reloc(sec->rsec, reloc) { type = *(u32 *)(sec->data->d_buf + (reloc_idx(reloc) * sec->sh.sh_entsize) + 4); + type = bswap_if_needed(file->elf, type); offset = reloc->sym->offset + reloc_addend(reloc); insn = find_insn(file, reloc->sym->sec, offset); diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index e2a2c46c008b..3d8378972d26 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -21,7 +21,6 @@ test_lirc_mode2_user flow_dissector_load test_tcpnotify_user test_libbpf -test_sysctl xdping test_cpp *.d diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index cf5ed3bee573..910d8d6402ef 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -73,7 +73,7 @@ endif # Order correspond to 'make run_tests' order TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_progs \ test_sockmap \ - test_tcpnotify_user test_sysctl \ + test_tcpnotify_user \ test_progs-no_alu32 TEST_INST_SUBDIRS := no_alu32 @@ -220,7 +220,7 @@ ifeq ($(VMLINUX_BTF),) $(error Cannot find a vmlinux for VMLINUX_BTF at any of "$(VMLINUX_BTF_PATHS)") endif -# Define simple and short `make test_progs`, `make test_sysctl`, etc targets +# Define simple and short `make test_progs`, `make test_maps`, etc targets # to build individual tests. # NOTE: Semicolon at the end is critical to override lib.mk's default static # rule for binaries. @@ -329,7 +329,6 @@ NETWORK_HELPERS := $(OUTPUT)/network_helpers.o $(OUTPUT)/test_sockmap: $(CGROUP_HELPERS) $(TESTING_HELPERS) $(OUTPUT)/test_tcpnotify_user: $(CGROUP_HELPERS) $(TESTING_HELPERS) $(TRACE_HELPERS) $(OUTPUT)/test_sock_fields: $(CGROUP_HELPERS) $(TESTING_HELPERS) -$(OUTPUT)/test_sysctl: $(CGROUP_HELPERS) $(TESTING_HELPERS) $(OUTPUT)/test_tag: $(TESTING_HELPERS) $(OUTPUT)/test_lirc_mode2_user: $(TESTING_HELPERS) $(OUTPUT)/xdping: $(TESTING_HELPERS) diff --git a/tools/testing/selftests/bpf/test_sysctl.c b/tools/testing/selftests/bpf/prog_tests/test_sysctl.c index bcdbd27f22f0..273dd41ca09e 100644 --- a/tools/testing/selftests/bpf/test_sysctl.c +++ b/tools/testing/selftests/bpf/prog_tests/test_sysctl.c @@ -1,22 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2019 Facebook -#include <fcntl.h> -#include <stdint.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <unistd.h> - -#include <linux/filter.h> - -#include <bpf/bpf.h> -#include <bpf/libbpf.h> - -#include <bpf/bpf_endian.h> -#include "bpf_util.h" +#include "test_progs.h" #include "cgroup_helpers.h" -#include "testing_helpers.h" #define CG_PATH "/foo" #define MAX_INSNS 512 @@ -1608,26 +1594,19 @@ static int run_tests(int cgfd) return fails ? -1 : 0; } -int main(int argc, char **argv) +void test_sysctl(void) { - int cgfd = -1; - int err = 0; + int cgfd; cgfd = cgroup_setup_and_join(CG_PATH); - if (cgfd < 0) - goto err; + if (!ASSERT_OK_FD(cgfd < 0, "create_cgroup")) + goto out; - /* Use libbpf 1.0 API mode */ - libbpf_set_strict_mode(LIBBPF_STRICT_ALL); + if (!ASSERT_OK(run_tests(cgfd), "run_tests")) + goto out; - if (run_tests(cgfd)) - goto err; - - goto out; -err: - err = -1; out: close(cgfd); cleanup_cgroup_environment(); - return err; + return; } diff --git a/tools/testing/selftests/bpf/progs/test_global_map_resize.c b/tools/testing/selftests/bpf/progs/test_global_map_resize.c index a3f220ba7025..ee65bad0436d 100644 --- a/tools/testing/selftests/bpf/progs/test_global_map_resize.c +++ b/tools/testing/selftests/bpf/progs/test_global_map_resize.c @@ -32,6 +32,16 @@ int my_int_last SEC(".data.array_not_last"); int percpu_arr[1] SEC(".data.percpu_arr"); +/* at least one extern is included, to ensure that a specific + * regression is tested whereby resizing resulted in a free-after-use + * bug after type information is invalidated by the resize operation. + * + * There isn't a particularly good API to test for this specific condition, + * but by having externs for the resizing tests it will cover this path. + */ +extern int LINUX_KERNEL_VERSION __kconfig; +long version_sink; + SEC("tp/syscalls/sys_enter_getpid") int bss_array_sum(void *ctx) { @@ -44,6 +54,9 @@ int bss_array_sum(void *ctx) for (size_t i = 0; i < bss_array_len; ++i) sum += array[i]; + /* see above; ensure this is not optimized out */ + version_sink = LINUX_KERNEL_VERSION; + return 0; } @@ -59,6 +72,9 @@ int data_array_sum(void *ctx) for (size_t i = 0; i < data_array_len; ++i) sum += my_array[i]; + /* see above; ensure this is not optimized out */ + version_sink = LINUX_KERNEL_VERSION; + return 0; } diff --git a/tools/testing/selftests/bpf/progs/verifier_vfs_accept.c b/tools/testing/selftests/bpf/progs/verifier_vfs_accept.c index a7c0a553aa50..3e2d76ee8050 100644 --- a/tools/testing/selftests/bpf/progs/verifier_vfs_accept.c +++ b/tools/testing/selftests/bpf/progs/verifier_vfs_accept.c @@ -2,6 +2,7 @@ /* Copyright (c) 2024 Google LLC. */ #include <vmlinux.h> +#include <errno.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> @@ -82,4 +83,21 @@ int BPF_PROG(path_d_path_from_file_argument, struct file *file) return 0; } +SEC("lsm.s/inode_rename") +__success +int BPF_PROG(inode_rename, struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags) +{ + struct inode *inode = new_dentry->d_inode; + ino_t ino; + + if (!inode) + return 0; + ino = inode->i_ino; + if (ino == 0) + return -EACCES; + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_vfs_reject.c b/tools/testing/selftests/bpf/progs/verifier_vfs_reject.c index d6d3f4fcb24c..4b392c6c8fc4 100644 --- a/tools/testing/selftests/bpf/progs/verifier_vfs_reject.c +++ b/tools/testing/selftests/bpf/progs/verifier_vfs_reject.c @@ -2,6 +2,7 @@ /* Copyright (c) 2024 Google LLC. */ #include <vmlinux.h> +#include <errno.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> #include <linux/limits.h> @@ -158,4 +159,18 @@ int BPF_PROG(path_d_path_kfunc_non_lsm, struct path *path, struct file *f) return 0; } +SEC("lsm.s/inode_rename") +__failure __msg("invalid mem access 'trusted_ptr_or_null_'") +int BPF_PROG(inode_rename, struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags) +{ + struct inode *inode = new_dentry->d_inode; + ino_t ino; + + ino = inode->i_ino; + if (ino == 0) + return -EACCES; + return 0; +} char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c index fda7589c5023..0921939532c6 100644 --- a/tools/testing/selftests/bpf/test_lru_map.c +++ b/tools/testing/selftests/bpf/test_lru_map.c @@ -138,6 +138,18 @@ static int sched_next_online(int pid, int *next_to_try) return ret; } +/* Derive target_free from map_size, same as bpf_common_lru_populate */ +static unsigned int __tgt_size(unsigned int map_size) +{ + return (map_size / nr_cpus) / 2; +} + +/* Inverse of how bpf_common_lru_populate derives target_free from map_size. */ +static unsigned int __map_size(unsigned int tgt_free) +{ + return tgt_free * nr_cpus * 2; +} + /* Size of the LRU map is 2 * Add key=1 (+1 key) * Add key=2 (+1 key) @@ -231,11 +243,11 @@ static void test_lru_sanity0(int map_type, int map_flags) printf("Pass\n"); } -/* Size of the LRU map is 1.5*tgt_free - * Insert 1 to tgt_free (+tgt_free keys) - * Lookup 1 to tgt_free/2 - * Insert 1+tgt_free to 2*tgt_free (+tgt_free keys) - * => 1+tgt_free/2 to LOCALFREE_TARGET will be removed by LRU +/* Verify that unreferenced elements are recycled before referenced ones. + * Insert elements. + * Reference a subset of these. + * Insert more, enough to trigger recycling. + * Verify that unreferenced are recycled. */ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free) { @@ -257,7 +269,7 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free) batch_size = tgt_free / 2; assert(batch_size * 2 == tgt_free); - map_size = tgt_free + batch_size; + map_size = __map_size(tgt_free) + batch_size; lru_map_fd = create_map(map_type, map_flags, map_size); assert(lru_map_fd != -1); @@ -266,13 +278,13 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free) value[0] = 1234; - /* Insert 1 to tgt_free (+tgt_free keys) */ - end_key = 1 + tgt_free; + /* Insert map_size - batch_size keys */ + end_key = 1 + __map_size(tgt_free); for (key = 1; key < end_key; key++) assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); - /* Lookup 1 to tgt_free/2 */ + /* Lookup 1 to batch_size */ end_key = 1 + batch_size; for (key = 1; key < end_key; key++) { assert(!bpf_map_lookup_elem_with_ref_bit(lru_map_fd, key, value)); @@ -280,12 +292,13 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free) BPF_NOEXIST)); } - /* Insert 1+tgt_free to 2*tgt_free - * => 1+tgt_free/2 to LOCALFREE_TARGET will be + /* Insert another map_size - batch_size keys + * Map will contain 1 to batch_size plus these latest, i.e., + * => previous 1+batch_size to map_size - batch_size will have been * removed by LRU */ - key = 1 + tgt_free; - end_key = key + tgt_free; + key = 1 + __map_size(tgt_free); + end_key = key + __map_size(tgt_free); for (; key < end_key; key++) { assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); @@ -301,17 +314,8 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free) printf("Pass\n"); } -/* Size of the LRU map 1.5 * tgt_free - * Insert 1 to tgt_free (+tgt_free keys) - * Update 1 to tgt_free/2 - * => The original 1 to tgt_free/2 will be removed due to - * the LRU shrink process - * Re-insert 1 to tgt_free/2 again and do a lookup immeidately - * Insert 1+tgt_free to tgt_free*3/2 - * Insert 1+tgt_free*3/2 to tgt_free*5/2 - * => Key 1+tgt_free to tgt_free*3/2 - * will be removed from LRU because it has never - * been lookup and ref bit is not set +/* Verify that insertions exceeding map size will recycle the oldest. + * Verify that unreferenced elements are recycled before referenced. */ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free) { @@ -334,7 +338,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free) batch_size = tgt_free / 2; assert(batch_size * 2 == tgt_free); - map_size = tgt_free + batch_size; + map_size = __map_size(tgt_free) + batch_size; lru_map_fd = create_map(map_type, map_flags, map_size); assert(lru_map_fd != -1); @@ -343,8 +347,8 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free) value[0] = 1234; - /* Insert 1 to tgt_free (+tgt_free keys) */ - end_key = 1 + tgt_free; + /* Insert map_size - batch_size keys */ + end_key = 1 + __map_size(tgt_free); for (key = 1; key < end_key; key++) assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); @@ -357,8 +361,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free) * shrink the inactive list to get tgt_free * number of free nodes. * - * Hence, the oldest key 1 to tgt_free/2 - * are removed from the LRU list. + * Hence, the oldest key is removed from the LRU list. */ key = 1; if (map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { @@ -370,8 +373,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free) BPF_EXIST)); } - /* Re-insert 1 to tgt_free/2 again and do a lookup - * immeidately. + /* Re-insert 1 to batch_size again and do a lookup immediately. */ end_key = 1 + batch_size; value[0] = 4321; @@ -387,17 +389,18 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free) value[0] = 1234; - /* Insert 1+tgt_free to tgt_free*3/2 */ - end_key = 1 + tgt_free + batch_size; - for (key = 1 + tgt_free; key < end_key; key++) + /* Insert batch_size new elements */ + key = 1 + __map_size(tgt_free); + end_key = key + batch_size; + for (; key < end_key; key++) /* These newly added but not referenced keys will be * gone during the next LRU shrink. */ assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); - /* Insert 1+tgt_free*3/2 to tgt_free*5/2 */ - end_key = key + tgt_free; + /* Insert map_size - batch_size elements */ + end_key += __map_size(tgt_free); for (; key < end_key; key++) { assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); @@ -413,12 +416,12 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free) printf("Pass\n"); } -/* Size of the LRU map is 2*tgt_free - * It is to test the active/inactive list rotation - * Insert 1 to 2*tgt_free (+2*tgt_free keys) - * Lookup key 1 to tgt_free*3/2 - * Add 1+2*tgt_free to tgt_free*5/2 (+tgt_free/2 keys) - * => key 1+tgt_free*3/2 to 2*tgt_free are removed from LRU +/* Test the active/inactive list rotation + * + * Fill the whole map, deplete the free list. + * Reference all except the last lru->target_free elements. + * Insert lru->target_free new elements. This triggers one shrink. + * Verify that the non-referenced elements are replaced. */ static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free) { @@ -437,8 +440,7 @@ static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free) assert(sched_next_online(0, &next_cpu) != -1); - batch_size = tgt_free / 2; - assert(batch_size * 2 == tgt_free); + batch_size = __tgt_size(tgt_free); map_size = tgt_free * 2; lru_map_fd = create_map(map_type, map_flags, map_size); @@ -449,23 +451,21 @@ static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free) value[0] = 1234; - /* Insert 1 to 2*tgt_free (+2*tgt_free keys) */ - end_key = 1 + (2 * tgt_free); + /* Fill the map */ + end_key = 1 + map_size; for (key = 1; key < end_key; key++) assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); - /* Lookup key 1 to tgt_free*3/2 */ - end_key = tgt_free + batch_size; + /* Reference all but the last batch_size */ + end_key = 1 + map_size - batch_size; for (key = 1; key < end_key; key++) { assert(!bpf_map_lookup_elem_with_ref_bit(lru_map_fd, key, value)); assert(!bpf_map_update_elem(expected_map_fd, &key, value, BPF_NOEXIST)); } - /* Add 1+2*tgt_free to tgt_free*5/2 - * (+tgt_free/2 keys) - */ + /* Insert new batch_size: replaces the non-referenced elements */ key = 2 * tgt_free + 1; end_key = key + batch_size; for (; key < end_key; key++) { @@ -500,7 +500,8 @@ static void test_lru_sanity4(int map_type, int map_flags, unsigned int tgt_free) lru_map_fd = create_map(map_type, map_flags, 3 * tgt_free * nr_cpus); else - lru_map_fd = create_map(map_type, map_flags, 3 * tgt_free); + lru_map_fd = create_map(map_type, map_flags, + 3 * __map_size(tgt_free)); assert(lru_map_fd != -1); expected_map_fd = create_map(BPF_MAP_TYPE_HASH, 0, diff --git a/tools/testing/selftests/coredump/stackdump_test.c b/tools/testing/selftests/coredump/stackdump_test.c index 9984413be9f0..68f8e479ac36 100644 --- a/tools/testing/selftests/coredump/stackdump_test.c +++ b/tools/testing/selftests/coredump/stackdump_test.c @@ -461,10 +461,15 @@ TEST_F(coredump, socket_detect_userspace_client) _exit(EXIT_FAILURE); } + ret = read(fd_coredump, &c, 1); + close(fd_coredump); close(fd_server); close(fd_peer_pidfd); close(fd_core_file); + + if (ret < 1) + _exit(EXIT_FAILURE); _exit(EXIT_SUCCESS); } self->pid_coredump_server = pid_coredump_server; diff --git a/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py index f439c434ba36..648ff50bc1c3 100755 --- a/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py +++ b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py @@ -38,7 +38,7 @@ def test_rss_input_xfrm(cfg, ipver): raise KsftSkipEx("socket.SO_INCOMING_CPU was added in Python 3.11") input_xfrm = cfg.ethnl.rss_get( - {'header': {'dev-name': cfg.ifname}}).get('input_xfrm') + {'header': {'dev-name': cfg.ifname}}).get('input-xfrm') # Check for symmetric xor/or-xor if not input_xfrm or (input_xfrm != 1 and input_xfrm != 2): diff --git a/tools/testing/selftests/futex/functional/.gitignore b/tools/testing/selftests/futex/functional/.gitignore index 7b24ae89594a..776ad658f75e 100644 --- a/tools/testing/selftests/futex/functional/.gitignore +++ b/tools/testing/selftests/futex/functional/.gitignore @@ -11,3 +11,4 @@ futex_wait_timeout futex_wait_uninitialized_heap futex_wait_wouldblock futex_waitv +futex_numa diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c index 1a8e85afe9aa..1926ef6b40ab 100644 --- a/tools/testing/selftests/iommu/iommufd.c +++ b/tools/testing/selftests/iommu/iommufd.c @@ -54,6 +54,8 @@ static __attribute__((constructor)) void setup_sizes(void) mfd_buffer = memfd_mmap(BUFFER_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, &mfd); + assert(mfd_buffer != MAP_FAILED); + assert(mfd > 0); } FIXTURE(iommufd) @@ -1746,13 +1748,15 @@ TEST_F(iommufd_mock_domain, all_aligns) unsigned int end; uint8_t *buf; int prot = PROT_READ | PROT_WRITE; - int mfd; + int mfd = -1; if (variant->file) buf = memfd_mmap(buf_size, prot, MAP_SHARED, &mfd); else buf = mmap(0, buf_size, prot, self->mmap_flags, -1, 0); ASSERT_NE(MAP_FAILED, buf); + if (variant->file) + ASSERT_GT(mfd, 0); check_refs(buf, buf_size, 0); /* @@ -1798,13 +1802,15 @@ TEST_F(iommufd_mock_domain, all_aligns_copy) unsigned int end; uint8_t *buf; int prot = PROT_READ | PROT_WRITE; - int mfd; + int mfd = -1; if (variant->file) buf = memfd_mmap(buf_size, prot, MAP_SHARED, &mfd); else buf = mmap(0, buf_size, prot, self->mmap_flags, -1, 0); ASSERT_NE(MAP_FAILED, buf); + if (variant->file) + ASSERT_GT(mfd, 0); check_refs(buf, buf_size, 0); /* @@ -2008,6 +2014,7 @@ FIXTURE_VARIANT(iommufd_dirty_tracking) FIXTURE_SETUP(iommufd_dirty_tracking) { + size_t mmap_buffer_size; unsigned long size; int mmap_flags; void *vrc; @@ -2022,22 +2029,33 @@ FIXTURE_SETUP(iommufd_dirty_tracking) self->fd = open("/dev/iommu", O_RDWR); ASSERT_NE(-1, self->fd); - rc = posix_memalign(&self->buffer, HUGEPAGE_SIZE, variant->buffer_size); - if (rc || !self->buffer) { - SKIP(return, "Skipping buffer_size=%lu due to errno=%d", - variant->buffer_size, rc); - } - mmap_flags = MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED; + mmap_buffer_size = variant->buffer_size; if (variant->hugepages) { /* * MAP_POPULATE will cause the kernel to fail mmap if THPs are * not available. */ mmap_flags |= MAP_HUGETLB | MAP_POPULATE; + + /* + * Allocation must be aligned to the HUGEPAGE_SIZE, because the + * following mmap() will automatically align the length to be a + * multiple of the underlying huge page size. Failing to do the + * same at this allocation will result in a memory overwrite by + * the mmap(). + */ + if (mmap_buffer_size < HUGEPAGE_SIZE) + mmap_buffer_size = HUGEPAGE_SIZE; + } + + rc = posix_memalign(&self->buffer, HUGEPAGE_SIZE, mmap_buffer_size); + if (rc || !self->buffer) { + SKIP(return, "Skipping buffer_size=%lu due to errno=%d", + mmap_buffer_size, rc); } assert((uintptr_t)self->buffer % HUGEPAGE_SIZE == 0); - vrc = mmap(self->buffer, variant->buffer_size, PROT_READ | PROT_WRITE, + vrc = mmap(self->buffer, mmap_buffer_size, PROT_READ | PROT_WRITE, mmap_flags, -1, 0); assert(vrc == self->buffer); @@ -2066,8 +2084,8 @@ FIXTURE_SETUP(iommufd_dirty_tracking) FIXTURE_TEARDOWN(iommufd_dirty_tracking) { - munmap(self->buffer, variant->buffer_size); - munmap(self->bitmap, DIV_ROUND_UP(self->bitmap_size, BITS_PER_BYTE)); + free(self->buffer); + free(self->bitmap); teardown_iommufd(self->fd, _metadata); } diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h index 72f6636e5d90..6e967b58acfd 100644 --- a/tools/testing/selftests/iommu/iommufd_utils.h +++ b/tools/testing/selftests/iommu/iommufd_utils.h @@ -60,13 +60,18 @@ static inline void *memfd_mmap(size_t length, int prot, int flags, int *mfd_p) { int mfd_flags = (flags & MAP_HUGETLB) ? MFD_HUGETLB : 0; int mfd = memfd_create("buffer", mfd_flags); + void *buf = MAP_FAILED; if (mfd <= 0) return MAP_FAILED; if (ftruncate(mfd, length)) - return MAP_FAILED; + goto out; *mfd_p = mfd; - return mmap(0, length, prot, flags, mfd, 0); + buf = mmap(0, length, prot, flags, mfd, 0); +out: + if (buf == MAP_FAILED) + close(mfd); + return buf; } /* diff --git a/tools/testing/selftests/kvm/x86/monitor_mwait_test.c b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c index 390ae2d87493..0eb371c62ab8 100644 --- a/tools/testing/selftests/kvm/x86/monitor_mwait_test.c +++ b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c @@ -74,6 +74,7 @@ int main(int argc, char *argv[]) int testcase; char test[80]; + TEST_REQUIRE(this_cpu_has(X86_FEATURE_MWAIT)); TEST_REQUIRE(kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2)); ksft_print_header(); diff --git a/tools/testing/selftests/mm/config b/tools/testing/selftests/mm/config index a28baa536332..deba93379c80 100644 --- a/tools/testing/selftests/mm/config +++ b/tools/testing/selftests/mm/config @@ -8,3 +8,6 @@ CONFIG_GUP_TEST=y CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_MEM_SOFT_DIRTY=y CONFIG_ANON_VMA_NAME=y +CONFIG_FTRACE=y +CONFIG_PROFILING=y +CONFIG_UPROBES=y diff --git a/tools/testing/selftests/mm/merge.c b/tools/testing/selftests/mm/merge.c index bbae66fc5038..cc26480098ae 100644 --- a/tools/testing/selftests/mm/merge.c +++ b/tools/testing/selftests/mm/merge.c @@ -470,7 +470,9 @@ TEST_F(merge, handle_uprobe_upon_merged_vma) ASSERT_GE(fd, 0); ASSERT_EQ(ftruncate(fd, page_size), 0); - ASSERT_EQ(read_sysfs("/sys/bus/event_source/devices/uprobe/type", &type), 0); + if (read_sysfs("/sys/bus/event_source/devices/uprobe/type", &type) != 0) { + SKIP(goto out, "Failed to read uprobe sysfs file, skipping"); + } memset(&attr, 0, attr_sz); attr.size = attr_sz; @@ -491,6 +493,7 @@ TEST_F(merge, handle_uprobe_upon_merged_vma) ASSERT_NE(mremap(ptr2, page_size, page_size, MREMAP_MAYMOVE | MREMAP_FIXED, ptr1), MAP_FAILED); +out: close(fd); remove(probe_file); } diff --git a/tools/testing/selftests/mm/settings b/tools/testing/selftests/mm/settings index a953c96aa16e..e2206265f67c 100644 --- a/tools/testing/selftests/mm/settings +++ b/tools/testing/selftests/mm/settings @@ -1 +1 @@ -timeout=180 +timeout=900 diff --git a/tools/testing/selftests/mm/virtual_address_range.c b/tools/testing/selftests/mm/virtual_address_range.c index b380e102b22f..169dbd692bf5 100644 --- a/tools/testing/selftests/mm/virtual_address_range.c +++ b/tools/testing/selftests/mm/virtual_address_range.c @@ -77,8 +77,11 @@ static void validate_addr(char *ptr, int high_addr) { unsigned long addr = (unsigned long) ptr; - if (high_addr && addr < HIGH_ADDR_MARK) - ksft_exit_fail_msg("Bad address %lx\n", addr); + if (high_addr) { + if (addr < HIGH_ADDR_MARK) + ksft_exit_fail_msg("Bad address %lx\n", addr); + return; + } if (addr > HIGH_ADDR_MARK) ksft_exit_fail_msg("Bad address %lx\n", addr); diff --git a/tools/testing/selftests/net/af_unix/msg_oob.c b/tools/testing/selftests/net/af_unix/msg_oob.c index 3ed3882a93b8..b5f474969917 100644 --- a/tools/testing/selftests/net/af_unix/msg_oob.c +++ b/tools/testing/selftests/net/af_unix/msg_oob.c @@ -210,7 +210,7 @@ static void __sendpair(struct __test_metadata *_metadata, static void __recvpair(struct __test_metadata *_metadata, FIXTURE_DATA(msg_oob) *self, const char *expected_buf, int expected_len, - int buf_len, int flags) + int buf_len, int flags, bool is_sender) { int i, ret[2], recv_errno[2], expected_errno = 0; char recv_buf[2][BUF_SZ] = {}; @@ -221,7 +221,9 @@ static void __recvpair(struct __test_metadata *_metadata, errno = 0; for (i = 0; i < 2; i++) { - ret[i] = recv(self->fd[i * 2 + 1], recv_buf[i], buf_len, flags); + int index = is_sender ? i * 2 : i * 2 + 1; + + ret[i] = recv(self->fd[index], recv_buf[i], buf_len, flags); recv_errno[i] = errno; } @@ -308,6 +310,20 @@ static void __siocatmarkpair(struct __test_metadata *_metadata, ASSERT_EQ(answ[0], answ[1]); } +static void __resetpair(struct __test_metadata *_metadata, + FIXTURE_DATA(msg_oob) *self, + const FIXTURE_VARIANT(msg_oob) *variant, + bool reset) +{ + int i; + + for (i = 0; i < 2; i++) + close(self->fd[i * 2 + 1]); + + __recvpair(_metadata, self, "", reset ? -ECONNRESET : 0, 1, + variant->peek ? MSG_PEEK : 0, true); +} + #define sendpair(buf, len, flags) \ __sendpair(_metadata, self, buf, len, flags) @@ -316,9 +332,10 @@ static void __siocatmarkpair(struct __test_metadata *_metadata, if (variant->peek) \ __recvpair(_metadata, self, \ expected_buf, expected_len, \ - buf_len, (flags) | MSG_PEEK); \ + buf_len, (flags) | MSG_PEEK, false); \ __recvpair(_metadata, self, \ - expected_buf, expected_len, buf_len, flags); \ + expected_buf, expected_len, \ + buf_len, flags, false); \ } while (0) #define epollpair(oob_remaining) \ @@ -330,6 +347,9 @@ static void __siocatmarkpair(struct __test_metadata *_metadata, #define setinlinepair() \ __setinlinepair(_metadata, self) +#define resetpair(reset) \ + __resetpair(_metadata, self, variant, reset) + #define tcp_incompliant \ for (self->tcp_compliant = false; \ self->tcp_compliant == false; \ @@ -344,6 +364,21 @@ TEST_F(msg_oob, non_oob) recvpair("", -EINVAL, 1, MSG_OOB); epollpair(false); siocatmarkpair(false); + + resetpair(true); +} + +TEST_F(msg_oob, non_oob_no_reset) +{ + sendpair("x", 1, 0); + epollpair(false); + siocatmarkpair(false); + + recvpair("x", 1, 1, 0); + epollpair(false); + siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, oob) @@ -355,6 +390,19 @@ TEST_F(msg_oob, oob) recvpair("x", 1, 1, MSG_OOB); epollpair(false); siocatmarkpair(true); + + tcp_incompliant { + resetpair(false); /* TCP sets -ECONNRESET for ex-OOB. */ + } +} + +TEST_F(msg_oob, oob_reset) +{ + sendpair("x", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + resetpair(true); } TEST_F(msg_oob, oob_drop) @@ -370,6 +418,8 @@ TEST_F(msg_oob, oob_drop) recvpair("", -EINVAL, 1, MSG_OOB); epollpair(false); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, oob_ahead) @@ -385,6 +435,10 @@ TEST_F(msg_oob, oob_ahead) recvpair("hell", 4, 4, 0); epollpair(false); siocatmarkpair(true); + + tcp_incompliant { + resetpair(false); /* TCP sets -ECONNRESET for ex-OOB. */ + } } TEST_F(msg_oob, oob_break) @@ -403,6 +457,8 @@ TEST_F(msg_oob, oob_break) recvpair("", -EAGAIN, 1, 0); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, oob_ahead_break) @@ -426,6 +482,8 @@ TEST_F(msg_oob, oob_ahead_break) recvpair("world", 5, 5, 0); epollpair(false); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, oob_break_drop) @@ -449,6 +507,8 @@ TEST_F(msg_oob, oob_break_drop) recvpair("", -EINVAL, 1, MSG_OOB); epollpair(false); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, ex_oob_break) @@ -476,6 +536,8 @@ TEST_F(msg_oob, ex_oob_break) recvpair("ld", 2, 2, 0); epollpair(false); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, ex_oob_drop) @@ -498,6 +560,8 @@ TEST_F(msg_oob, ex_oob_drop) epollpair(false); siocatmarkpair(true); } + + resetpair(false); } TEST_F(msg_oob, ex_oob_drop_2) @@ -523,6 +587,8 @@ TEST_F(msg_oob, ex_oob_drop_2) epollpair(false); siocatmarkpair(true); } + + resetpair(false); } TEST_F(msg_oob, ex_oob_oob) @@ -546,6 +612,54 @@ TEST_F(msg_oob, ex_oob_oob) recvpair("", -EINVAL, 1, MSG_OOB); epollpair(false); siocatmarkpair(false); + + resetpair(false); +} + +TEST_F(msg_oob, ex_oob_ex_oob) +{ + sendpair("x", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + recvpair("x", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(true); + + sendpair("y", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + recvpair("y", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(true); + + tcp_incompliant { + resetpair(false); /* TCP sets -ECONNRESET for ex-OOB. */ + } +} + +TEST_F(msg_oob, ex_oob_ex_oob_oob) +{ + sendpair("x", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + recvpair("x", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(true); + + sendpair("y", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + recvpair("y", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(true); + + sendpair("z", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); } TEST_F(msg_oob, ex_oob_ahead_break) @@ -576,6 +690,10 @@ TEST_F(msg_oob, ex_oob_ahead_break) recvpair("d", 1, 1, MSG_OOB); epollpair(false); siocatmarkpair(true); + + tcp_incompliant { + resetpair(false); /* TCP sets -ECONNRESET for ex-OOB. */ + } } TEST_F(msg_oob, ex_oob_siocatmark) @@ -595,6 +713,8 @@ TEST_F(msg_oob, ex_oob_siocatmark) recvpair("hell", 4, 4, 0); /* Intentionally stop at ex-OOB. */ epollpair(true); siocatmarkpair(false); + + resetpair(true); } TEST_F(msg_oob, inline_oob) @@ -612,6 +732,8 @@ TEST_F(msg_oob, inline_oob) recvpair("x", 1, 1, 0); epollpair(false); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, inline_oob_break) @@ -633,6 +755,8 @@ TEST_F(msg_oob, inline_oob_break) recvpair("o", 1, 1, 0); epollpair(false); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, inline_oob_ahead_break) @@ -661,6 +785,8 @@ TEST_F(msg_oob, inline_oob_ahead_break) epollpair(false); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, inline_ex_oob_break) @@ -686,6 +812,8 @@ TEST_F(msg_oob, inline_ex_oob_break) recvpair("rld", 3, 3, 0); epollpair(false); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, inline_ex_oob_no_drop) @@ -707,6 +835,8 @@ TEST_F(msg_oob, inline_ex_oob_no_drop) recvpair("y", 1, 1, 0); epollpair(false); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, inline_ex_oob_drop) @@ -731,6 +861,8 @@ TEST_F(msg_oob, inline_ex_oob_drop) epollpair(false); siocatmarkpair(false); } + + resetpair(false); } TEST_F(msg_oob, inline_ex_oob_siocatmark) @@ -752,6 +884,8 @@ TEST_F(msg_oob, inline_ex_oob_siocatmark) recvpair("hell", 4, 4, 0); /* Intentionally stop at ex-OOB. */ epollpair(true); siocatmarkpair(false); + + resetpair(true); } TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/gre_ipv6_lladdr.sh b/tools/testing/selftests/net/gre_ipv6_lladdr.sh index 5b34f6e1f831..48eb999a3120 100755 --- a/tools/testing/selftests/net/gre_ipv6_lladdr.sh +++ b/tools/testing/selftests/net/gre_ipv6_lladdr.sh @@ -24,7 +24,10 @@ setup_basenet() ip -netns "${NS0}" address add dev lo 2001:db8::10/64 nodad } -# Check if network device has an IPv6 link-local address assigned. +# Check the IPv6 configuration of a network device. +# +# We currently check the generation of the link-local IPv6 address and the +# creation of the ff00::/8 multicast route. # # Parameters: # @@ -35,7 +38,7 @@ setup_basenet() # a link-local address) # * $4: The user visible name for the scenario being tested # -check_ipv6_ll_addr() +check_ipv6_device_config() { local DEV="$1" local EXTRA_MATCH="$2" @@ -45,7 +48,11 @@ check_ipv6_ll_addr() RET=0 set +e ip -netns "${NS0}" -6 address show dev "${DEV}" scope link | grep "fe80::" | grep -q "${EXTRA_MATCH}" - check_err_fail "${XRET}" $? "" + check_err_fail "${XRET}" $? "IPv6 link-local address generation" + + ip -netns "${NS0}" -6 route show table local type multicast ff00::/8 proto kernel | grep -q "${DEV}" + check_err_fail 0 $? "IPv6 multicast route creation" + log_test "${MSG}" set -e } @@ -102,20 +109,20 @@ test_gre_device() ;; esac - # Check that IPv6 link-local address is generated when device goes up + # Check the IPv6 device configuration when it goes up ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode="${ADDR_GEN_MODE}" ip -netns "${NS0}" link set dev gretest up - check_ipv6_ll_addr gretest "${MATCH_REGEXP}" "${XRET}" "config: ${MSG}" + check_ipv6_device_config gretest "${MATCH_REGEXP}" "${XRET}" "config: ${MSG}" # Now disable link-local address generation ip -netns "${NS0}" link set dev gretest down ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode=1 ip -netns "${NS0}" link set dev gretest up - # Check that link-local address generation works when re-enabled while - # the device is already up + # Check the IPv6 device configuration when link-local address + # generation is re-enabled while the device is already up ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode="${ADDR_GEN_MODE}" - check_ipv6_ll_addr gretest "${MATCH_REGEXP}" "${XRET}" "update: ${MSG}" + check_ipv6_device_config gretest "${MATCH_REGEXP}" "${XRET}" "update: ${MSG}" ip -netns "${NS0}" link del dev gretest } @@ -126,7 +133,7 @@ test_gre4() local MODE for GRE_TYPE in "gre" "gretap"; do - printf "\n####\nTesting IPv6 link-local address generation on ${GRE_TYPE} devices\n####\n\n" + printf "\n####\nTesting IPv6 configuration of ${GRE_TYPE} devices\n####\n\n" for MODE in "eui64" "none" "stable-privacy" "random"; do test_gre_device "${GRE_TYPE}" 192.0.2.10 192.0.2.11 "${MODE}" @@ -142,7 +149,7 @@ test_gre6() local MODE for GRE_TYPE in "ip6gre" "ip6gretap"; do - printf "\n####\nTesting IPv6 link-local address generation on ${GRE_TYPE} devices\n####\n\n" + printf "\n####\nTesting IPv6 configuration of ${GRE_TYPE} devices\n####\n\n" for MODE in "eui64" "none" "stable-privacy" "random"; do test_gre_device "${GRE_TYPE}" 2001:db8::10 2001:db8::11 "${MODE}" diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh index 006fdadcc4b9..86a216e9aca8 100644 --- a/tools/testing/selftests/net/lib.sh +++ b/tools/testing/selftests/net/lib.sh @@ -312,7 +312,7 @@ log_test_result() local test_name=$1; shift local opt_str=$1; shift local result=$1; shift - local retmsg=$1; shift + local retmsg=$1 printf "TEST: %-60s [%s]\n" "$test_name $opt_str" "$result" if [[ $retmsg ]]; then diff --git a/tools/testing/selftests/net/packetdrill/tcp_ooo-before-and-after-accept.pkt b/tools/testing/selftests/net/packetdrill/tcp_ooo-before-and-after-accept.pkt new file mode 100644 index 000000000000..09aabc775e80 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_ooo-before-and-after-accept.pkt @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0 + +--mss=1000 + +`./defaults.sh +sysctl -q net.ipv4.tcp_rmem="4096 131072 $((32*1024*1024))"` + +// Test that a not-yet-accepted socket does not change +// its initial sk_rcvbuf (tcp_rmem[1]) when receiving ooo packets. + + +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 65535 <mss 1000,nop,nop,sackOK,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 10> + +.1 < . 1:1(0) ack 1 win 257 + +0 < . 2001:41001(39000) ack 1 win 257 + +0 > . 1:1(0) ack 1 <nop,nop,sack 2001:41001> + +0 < . 41001:101001(60000) ack 1 win 257 + +0 > . 1:1(0) ack 1 <nop,nop,sack 2001:101001> + +0 < . 1:1001(1000) ack 1 win 257 + +0 > . 1:1(0) ack 1001 <nop,nop,sack 2001:101001> + +0 < . 1001:2001(1000) ack 1 win 257 + +0 > . 1:1(0) ack 101001 + + +0 accept(3, ..., ...) = 4 + + +0 %{ assert SK_MEMINFO_RCVBUF == 131072, SK_MEMINFO_RCVBUF }% + + +0 close(4) = 0 + +0 close(3) = 0 + +// Test that ooo packets for accepted sockets do increase sk_rcvbuf + +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 65535 <mss 1000,nop,nop,sackOK,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 10> + +.1 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + +0 < . 2001:41001(39000) ack 1 win 257 + +0 > . 1:1(0) ack 1 <nop,nop,sack 2001:41001> + +0 < . 41001:101001(60000) ack 1 win 257 + +0 > . 1:1(0) ack 1 <nop,nop,sack 2001:101001> + + +0 %{ assert SK_MEMINFO_RCVBUF > 131072, SK_MEMINFO_RCVBUF }% + diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index 9aa44d8176d9..5c6851e8d311 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -635,5 +635,42 @@ "$TC qdisc del dev $DUMMY handle 1:0 root", "$IP addr del 10.10.10.10/24 dev $DUMMY || true" ] + }, + { + "id": "d74b", + "name": "Test use-after-free with DRR/NETEM/BLACKHOLE chain", + "category": [ + "qdisc", + "hfsc", + "drr", + "netem", + "blackhole" + ], + "plugins": { + "requires": [ + "nsPlugin", + "scapyPlugin" + ] + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.11.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: drr", + "$TC filter add dev $DUMMY parent 1: basic classid 1:1", + "$TC class add dev $DUMMY parent 1: classid 1:1 drr", + "$TC qdisc add dev $DUMMY parent 1:1 handle 2: hfsc def 1", + "$TC class add dev $DUMMY parent 2: classid 2:1 hfsc rt m1 8 d 1 m2 0", + "$TC qdisc add dev $DUMMY parent 2:1 handle 3: netem", + "$TC qdisc add dev $DUMMY parent 3:1 handle 4: blackhole", + "ping -c1 -W0.01 -I $DUMMY 10.10.11.11 || true", + "$TC class del dev $DUMMY classid 1:1" + ], + "cmdUnderTest": "ping -c1 -W0.01 -I $DUMMY 10.10.11.11", + "expExitCode": "1", + "verifyCmd": "$TC -j class ls dev $DUMMY classid 1:1", + "matchJSON": [], + "teardown": [ + "$TC qdisc del dev $DUMMY root handle 1: drr" + ] } ] diff --git a/tools/testing/selftests/ublk/test_stress_03.sh b/tools/testing/selftests/ublk/test_stress_03.sh index 6eef282d569f..3ed4c9b2d8c0 100755 --- a/tools/testing/selftests/ublk/test_stress_03.sh +++ b/tools/testing/selftests/ublk/test_stress_03.sh @@ -32,22 +32,23 @@ _create_backfile 2 128M ublk_io_and_remove 8G -t null -q 4 -z & ublk_io_and_remove 256M -t loop -q 4 -z "${UBLK_BACKFILES[0]}" & ublk_io_and_remove 256M -t stripe -q 4 -z "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait if _have_feature "AUTO_BUF_REG"; then ublk_io_and_remove 8G -t null -q 4 --auto_zc & ublk_io_and_remove 256M -t loop -q 4 --auto_zc "${UBLK_BACKFILES[0]}" & ublk_io_and_remove 256M -t stripe -q 4 --auto_zc "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & ublk_io_and_remove 8G -t null -q 4 -z --auto_zc --auto_zc_fallback & + wait fi -wait if _have_feature "PER_IO_DAEMON"; then ublk_io_and_remove 8G -t null -q 4 --auto_zc --nthreads 8 --per_io_tasks & ublk_io_and_remove 256M -t loop -q 4 --auto_zc --nthreads 8 --per_io_tasks "${UBLK_BACKFILES[0]}" & ublk_io_and_remove 256M -t stripe -q 4 --auto_zc --nthreads 8 --per_io_tasks "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & ublk_io_and_remove 8G -t null -q 4 -z --auto_zc --auto_zc_fallback --nthreads 8 --per_io_tasks & + wait fi -wait _cleanup_test "stress" _show_result $TID $ERR_CODE diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index eec82775c5bf..222f0e894a0c 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2572,6 +2572,8 @@ static int kvm_vm_set_mem_attributes(struct kvm *kvm, gfn_t start, gfn_t end, r = xa_reserve(&kvm->mem_attr_array, i, GFP_KERNEL_ACCOUNT); if (r) goto out_unlock; + + cond_resched(); } kvm_handle_gfn_range(kvm, &pre_set_range); @@ -2580,6 +2582,7 @@ static int kvm_vm_set_mem_attributes(struct kvm *kvm, gfn_t start, gfn_t end, r = xa_err(xa_store(&kvm->mem_attr_array, i, entry, GFP_KERNEL_ACCOUNT)); KVM_BUG_ON(r, kvm); + cond_resched(); } kvm_handle_gfn_range(kvm, &post_set_range); |